From e47e052bc0869540e003e27dc09f5c546b64e869 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Thu, 28 Jul 2016 12:19:37 +0200 Subject: [PATCH 0001/1109] initial empty commit From 2adf33028ed9efd77fa910c915774242f28169ae Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Thu, 28 Jul 2016 12:19:38 +0200 Subject: [PATCH 0002/1109] Tokenize.jl generated files. license: MIT authors: Kristoffer Carlsson years: 2016 user: Julia Version 0.5.0-pre+5667 [c105848*] --- JuliaSyntax/.gitignore | 3 +++ JuliaSyntax/.travis.yml | 19 +++++++++++++++++++ JuliaSyntax/LICENSE.md | 22 ++++++++++++++++++++++ JuliaSyntax/README.md | 1 + JuliaSyntax/REQUIRE | 1 + JuliaSyntax/appveyor.yml | 34 ++++++++++++++++++++++++++++++++++ JuliaSyntax/src/Tokenize.jl | 5 +++++ JuliaSyntax/test/runtests.jl | 5 +++++ 8 files changed, 90 insertions(+) create mode 100644 JuliaSyntax/.gitignore create mode 100644 JuliaSyntax/.travis.yml create mode 100644 JuliaSyntax/LICENSE.md create mode 100644 JuliaSyntax/README.md create mode 100644 JuliaSyntax/REQUIRE create mode 100644 JuliaSyntax/appveyor.yml create mode 100644 JuliaSyntax/src/Tokenize.jl create mode 100644 JuliaSyntax/test/runtests.jl diff --git a/JuliaSyntax/.gitignore b/JuliaSyntax/.gitignore new file mode 100644 index 0000000000000..8c960ec808d9e --- /dev/null +++ b/JuliaSyntax/.gitignore @@ -0,0 +1,3 @@ +*.jl.cov +*.jl.*.cov +*.jl.mem diff --git a/JuliaSyntax/.travis.yml b/JuliaSyntax/.travis.yml new file mode 100644 index 0000000000000..3b361a57ef605 --- /dev/null +++ b/JuliaSyntax/.travis.yml @@ -0,0 +1,19 @@ +# Documentation: http://docs.travis-ci.com/user/languages/julia/ +language: julia +os: + - linux + - osx +julia: + - release + - nightly +notifications: + email: false +# uncomment the following lines to override the default test script +#script: +# - if [[ -a .git/shallow ]]; then git fetch --unshallow; fi +# - julia -e 'Pkg.clone(pwd()); Pkg.build("Tokenize"); Pkg.test("Tokenize"; coverage=true)' +after_success: + # push coverage results to Coveralls + - julia -e 'cd(Pkg.dir("Tokenize")); Pkg.add("Coverage"); using Coverage; Coveralls.submit(Coveralls.process_folder())' + # push coverage results to Codecov + - julia -e 'cd(Pkg.dir("Tokenize")); Pkg.add("Coverage"); using Coverage; Codecov.submit(Codecov.process_folder())' diff --git a/JuliaSyntax/LICENSE.md b/JuliaSyntax/LICENSE.md new file mode 100644 index 0000000000000..c55684c31049a --- /dev/null +++ b/JuliaSyntax/LICENSE.md @@ -0,0 +1,22 @@ +The Tokenize.jl package is licensed under the MIT "Expat" License: + +> Copyright (c) 2016: Kristoffer Carlsson. +> +> Permission is hereby granted, free of charge, to any person obtaining a copy +> of this software and associated documentation files (the "Software"), to deal +> in the Software without restriction, including without limitation the rights +> to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +> copies of the Software, and to permit persons to whom the Software is +> furnished to do so, subject to the following conditions: +> +> The above copyright notice and this permission notice shall be included in all +> copies or substantial portions of the Software. +> +> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +> IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +> FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +> AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +> LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +> OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +> SOFTWARE. +> diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md new file mode 100644 index 0000000000000..6c557541e2296 --- /dev/null +++ b/JuliaSyntax/README.md @@ -0,0 +1 @@ +# Tokenize diff --git a/JuliaSyntax/REQUIRE b/JuliaSyntax/REQUIRE new file mode 100644 index 0000000000000..d5d646713dcf8 --- /dev/null +++ b/JuliaSyntax/REQUIRE @@ -0,0 +1 @@ +julia 0.4 diff --git a/JuliaSyntax/appveyor.yml b/JuliaSyntax/appveyor.yml new file mode 100644 index 0000000000000..83b657170b2a5 --- /dev/null +++ b/JuliaSyntax/appveyor.yml @@ -0,0 +1,34 @@ +environment: + matrix: + - JULIAVERSION: "julialang/bin/winnt/x86/0.4/julia-0.4-latest-win32.exe" + - JULIAVERSION: "julialang/bin/winnt/x64/0.4/julia-0.4-latest-win64.exe" + - JULIAVERSION: "julianightlies/bin/winnt/x86/julia-latest-win32.exe" + - JULIAVERSION: "julianightlies/bin/winnt/x64/julia-latest-win64.exe" + +branches: + only: + - master + - /release-.*/ + +notifications: + - provider: Email + on_build_success: false + on_build_failure: false + on_build_status_changed: false + +install: +# Download most recent Julia Windows binary + - ps: (new-object net.webclient).DownloadFile( + $("http://s3.amazonaws.com/"+$env:JULIAVERSION), + "C:\projects\julia-binary.exe") +# Run installer silently, output to C:\projects\julia + - C:\projects\julia-binary.exe /S /D=C:\projects\julia + +build_script: +# Need to convert from shallow to complete for Pkg.clone to work + - IF EXIST .git\shallow (git fetch --unshallow) + - C:\projects\julia\bin\julia -e "versioninfo(); + Pkg.clone(pwd(), \"Tokenize\"); Pkg.build(\"Tokenize\")" + +test_script: + - C:\projects\julia\bin\julia -e "Pkg.test(\"Tokenize\")" diff --git a/JuliaSyntax/src/Tokenize.jl b/JuliaSyntax/src/Tokenize.jl new file mode 100644 index 0000000000000..99470368629b4 --- /dev/null +++ b/JuliaSyntax/src/Tokenize.jl @@ -0,0 +1,5 @@ +module Tokenize + +# package code goes here + +end # module diff --git a/JuliaSyntax/test/runtests.jl b/JuliaSyntax/test/runtests.jl new file mode 100644 index 0000000000000..45e445eaf0483 --- /dev/null +++ b/JuliaSyntax/test/runtests.jl @@ -0,0 +1,5 @@ +using Tokenize +using Base.Test + +# write your own tests here +@test 1 == 1 From bd3c96bc038c8a10e62cf8089b092ca001c938e4 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Mon, 1 Aug 2016 00:44:20 +0200 Subject: [PATCH 0003/1109] dsada --- JuliaSyntax/src/lexer.jl | 594 +++++++++++++++++++++++++++++++++++ JuliaSyntax/src/token.jl | 187 +++++++++++ JuliaSyntax/src/utilities.jl | 153 +++++++++ 3 files changed, 934 insertions(+) create mode 100644 JuliaSyntax/src/lexer.jl create mode 100644 JuliaSyntax/src/token.jl create mode 100644 JuliaSyntax/src/utilities.jl diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl new file mode 100644 index 0000000000000..162900a55e6d6 --- /dev/null +++ b/JuliaSyntax/src/lexer.jl @@ -0,0 +1,594 @@ +module Lexers + +include("utilities.jl") + +import Base: push! +using Compat +import Compat.String + +import ..Tokens +import ..Tokens: Token, Kind, TokenError + +export tokenize + +using Logging +@Logging.configure(level=WARNING) + +#macro debug(ex) +# return :() +#end + + +ishead(c::Char) = ('A' <= c <= 'z') || c == '$' || c == '-' || c == '_' || c == '.' +istail(c::Char) = ishead(c) || isdigit(c) +ishex(c::Char) = isdigit(c) || ('a' <= c <= 'f') +iswhitespace(c::Char) = Base.UTF8proc.isspace(c) + +type Lexer{IO_t <: Union{IO, String}} + io::IO_t + + token_start_row::Int + token_start_col::Int + + prevpos::Int64 + token_startpos::Int64 + + current_row::Int + current_col::Int + current_pos::Int64 +end + +Lexer(io::Union{IO, String}) = Lexer(io, 1, 1, -1, 0, 1, 1, 1) +tokenize(x) = Lexer(x) + + + +if VERSION > v"v0.5.0-" + Base.iteratorsize(::Lexer) = Base.SizeUnknown() + Base.iteratoreltype(::Lexer) = Base.HasEltype() +end + +Base.eltype(::Lexer) = Token + + +function Base.start(l::Lexer) + seekstart(l) + l.token_startpos = 0 + l.token_start_row = 1 + l.token_start_col = 1 + + l.current_row = 1 + l.current_col = 1 + l.current_pos = 1 + false +end + +function Base.next(l::Lexer, isdone) + t = next_token(l) + return t, t.kind == Tokens.Eof +end + +Base.done(l::Lexer, isdone) = isdone + + +function Base.show(io::IO, l::Lexer) + #print(io, "Token buffer:") + #print(io, extract_tokenstring(l)) + println(io, "Position: ", position(l)) + println(io, l.current_row, l.current_col) + #print(io, "\n# tokens read: ", length(tokens(l)), " n errors: ", n_errors) + #print(io, "\n", tokens(l)) +end + + +startpos(l::Lexer) = l.token_startpos +startpos!(l::Lexer, i::Int64) = l.token_startpos = i +tokens(l::Lexer) = l.tokens +io(l::Lexer) = l.io +prevpos(l::Lexer) = l.prevpos +prevpos!(l::Lexer, i::Int64) = l.prevpos = i +Base.seekstart{I <: IO}(l::Lexer{I}) = seekstart(l.io) +Base.seekstart{I <: String}(l::Lexer{I}) = seek(l, 1) + +seek2startpos!(l::Lexer) = seek(l, startpos(l)) + +push!(l::Lexer, t::Token) = push!(l.tokens, t) +peekchar{I <: IO}(l::Lexer{I}) = peekchar(l.io) +peekchar{I <: String}(l::Lexer{I}) = eof(l) ? EOF_CHAR : l.io[position(l)] + +position{I <: String}(l::Lexer{I}) = l.current_pos +position{I <: IO}(l::Lexer{I}) = Base.position(l.io) +eof{I <: IO}(l::Lexer{I}) = eof(l.io) +eof{I <: String}(l::Lexer{I}) = position(l) > sizeof(l.io) +Base.seek{I <: IO}(l::Lexer{I}, pos) = seek(l.io, pos) +Base.seek{I <: String}(l::Lexer{I}, pos) = l.current_pos = pos +function ignore!{I <: IO}(l::Lexer{I}) + l.token_startpos = position(l) + l.token_start_row = l.current_row + l.token_start_col = l.current_col +end + +function ignore!{I <: String}(l::Lexer{I}) + l.token_startpos = position(l) - 1 + l.token_start_row = l.current_row + l.token_start_col = l.current_col +end + +function prevchar(l::Lexer) + backup!(l) + return readchar(l) +end + + +function readchar{I <: IO}(l::Lexer{I}) + prevpos!(l, position(l)) + c = readchar(l.io) + return c +end + +function readchar{I <: String}(l::Lexer{I}) + prevpos!(l, position(l)) + eof(l) && return EOF_CHAR + c = l.io[position(l)] + l.current_pos = nextind(l.io, position(l)) + return c +end + + +function backup!(l::Lexer) + @assert prevpos(l) != -1 + seek(l, prevpos(l)) + prevpos!(l, -1) +end + +function accept(l::Lexer, f::Union{Function, Char, Vector{Char}, String}) + c = peekchar(l) + if isa(f, Function) + ok = f(c) + elseif isa(f, Char) + ok = c == f + else + ok = c in f + end + ok && readchar(l) + return ok +end + +function accept_batch(l::Lexer, f) + ok = false + while accept(l, f) + ok = true + end + return ok +end + +function emit(l::Lexer, kind::Kind, str::String) + skipfirst, skiplast = 0,0 + tok = Token(kind, (l.token_start_row, l.token_start_col), + (l.current_row, l.current_col), + startpos(l) + skipfirst, position(l) - skiplast - 1, + str) + @debug "emitted token: $tok:" + ignore!(l) + return tok +end + +function emit(l::Lexer, kind::Kind, err::TokenError=Tokens.unknown; skipfirst::Int = 0, skiplast::Int = 0) + skipfirst, skiplast = 0, 0 + str = extract_tokenstring(l) + tok = Token(kind, (l.token_start_row, l.token_start_col), + (l.current_row, l.current_col), + startpos(l) + skipfirst, position(l) - skiplast - 1, + str) + @debug "emitted token: $tok:" + ignore!(l) + return tok +end + +function emit_error(l::Lexer, err::TokenError=Tokens.unknown) + return emit(l, Tokens.Error, err) +end + +function extract_tokenstring{T}(l::Lexer{T}) + isstr = T <: String + cs = Char[] + sizehint!(cs, position(l) - startpos(l)) + curr_pos = position(l) + seek2startpos!(l) + if isstr + seek(l, position(l) + 1) + end + while position(l) < curr_pos + c = readchar(l) + l.current_col += 1 + if c == '\n' + l.current_row += 1 + l.current_col = 1 + end + push!(cs, c) + end + str = String(cs) + return str +end + + +# We just consumed a " or a """ +function read_string(l::Lexer, kind::Tokens.Kind) + while true + c = readchar(l) + if c == '\\' && eof(readchar(l)) + return false + end + if c == '"' + if kind == Tokens.t_string + return true + else + if accept(l, "\"") && accept(l, "\"") + return true + end + end + elseif eof(c) + return false + end + end +end + + +function next_token(l::Lexer) + + c = readchar(l) + + @debug "startpos at $(l.token_startpos)" + + if eof(c); return emit(l, Tokens.Eof) + elseif iswhitespace(c); return lex_whitespace(l) + elseif c == '#'; return lex_comment(l) + elseif c == '='; return lex_equal(l) + elseif c == '!'; return lex_exclaim(l) + elseif c == '['; return emit(l, Tokens.lsquare) + elseif c == ']'; return emit(l, Tokens.rsquare) + elseif c == '{'; return emit(l, Tokens.lbrace) + elseif c == ';'; return emit(l, Tokens.semicolon) + elseif c == '}'; return emit(l, Tokens.rbrace) + elseif c == '('; return emit(l, Tokens.lparen) + elseif c == ')'; return emit(l, Tokens.rparen) + elseif c == ','; return emit(l, Tokens.comma) + elseif c == '*'; return emit(l, Tokens.star) + elseif c == '>'; return lex_greater(l) + elseif c == '<'; return lex_less(l) + elseif c == ':'; return lex_colon(l) + elseif c == '|'; return lex_bar(l) + elseif c == '@'; return lex_at(l) + elseif c == '&'; return lex_amper(l) + elseif c == '\''; return lex_prime(l) + elseif c == '?'; return emit(l, Tokens.conditional) + elseif c == '"'; return lex_quote(l); + elseif c == '%'; return lex_percent(l); + elseif c == '/'; return lex_forwardslash(l); + elseif c == '.'; return lex_dot(l); + elseif isdigit(c) || c == '-' || c == '+' return lex_digitorsign(l) + elseif is_identifier_start_char(c); return lex_identifier(l) + else emit_error(l) + end + + #= + if eof(c); return emit(l, Tokens.Eof) + elseif c == '$'; return lex_dollar(l); + elseif c == '"'; return lex_quote(l); + elseif c == ';'; return lex_comment(l) + elseif c == '!'; return lex_exclaim(l); + + else emit_error(l) + end + =# +end + + +# Lex whitespace, a whitespace char has been consumed +function lex_whitespace(l::Lexer) + accept_batch(l, iswhitespace) + return emit(l, Tokens.Whitespace) +end + +function lex_comment(l::Lexer) + if readchar(l) != '=' + while true + c = readchar(l) + if c == '\n' || eof(c) + backup!(l) + return emit(l, Tokens.Comment, skipfirst = 1) + end + end + else + c = readchar(l) # consume the '=' + n_start, n_end = 1, 0 + while true + if eof(c) + return emit_error(l, Tokens.EOF_in_multicomment) + end + nc = readchar(l) + if c == '#' && nc == '=' + n_start += 1 + elseif c == '=' && nc == '#' + n_end += 1 + end + if n_start == n_end + return emit(l, Tokens.Comment, skipfirst = 2, skiplast = 2) + end + c = nc + end + end +end + +# Lex a greater char, a '>' has been consumed +function lex_greater(l::Lexer) + if accept(l, '>') # >> + if accept(l, '>') # >>> + if accept(l, '=') # >>>= + return emit(l, Tokens.ass_bitshift_rrr) + elseif accept(l, iswhitespace) + return emit(l, Tokens.bitshift_rrr) + else # >>>?, ? not a = + return emit_error(l) + end + else # >>? + if accept(l, '=') # >>= + return emit(l, Tokens.ass_bitshift_rr) + elseif accept(l, iswhitespace) # '>> ' + return emit(l, Tokens.bitshift_rr) + else # '>>?', ? not =, >, ' ' + return emit_error(l) + end + end + elseif accept(l, '=') + return emit(l, Tokens.ass_bitshift_r) + elseif accept(l, iswhitespace) + return emit(l, Tokens.comp_r) # '> ' + else + return emit_error(l) + end +end + +# Lex a less char, a '<' has been consumed +function lex_less(l::Lexer) + if accept(l, '<') # << + if accept(l, '=') # <<= + return emit(l, Tokens.ass_bitshift_ll) + elseif accept(l, iswhitespace) # '<< ' + return emit(l, Tokens.bitshift_ll) + else # '< ' + end +end + + + +# Lex all tokens that start with an = character. +# An '=' char has been consumed +function lex_equal(l::Lexer) + if accept(l, '=') # == + if accept(l, iswhitespace) + return emit(l, Tokens.ass_equal2) + elseif accept(l, '=') # === + if accept(l, iswhitespace) + emit(l, Tokens.ass_equal3) + else # ===?, ? != ' ' + emit_error(l) + end + end + elseif accept(l, '>') # => + emit(l, Tokens.ass_equal_r) + else + emit(l, Tokens.ass_equal) + end +end + +# Lex a colon, a ':' has been consumed +function lex_colon(l::Lexer) + if accept(l, ':') # '::' + emit(l, Tokens.decl) + elseif accept(l, iswhitespace) # ': ' + emit(l, Tokens._colon) + elseif accept_batch(l, is_identifier_char) # :foo32 + emit(l, Tokens.t_symbol, skipfirst = 1) + else + emit_error(l) + end +end + +function lex_exclaim(l::Lexer) + if accept(l, '=') # != + if accept(l, '=') + return emit(l, Tokens.comp_neq2) # !== + else # != + return emit(l, Tokens.comp_neq) + end + else + return emit(l, Tokens.exclaim) + end +end + +function lex_percent(l::Lexer) + if accept(l, '=') + return emit(l, Tokens.ass_perc) + else + return emit(l, Tokens.perc) + end +end + +function lex_bar(l::Lexer) + if accept(l, iswhitespace) + emit(l, Tokens.pipe) # '| ' + elseif accept(l, '=') # |= + return emit(l, Tokens.ass_bar) + elseif accept(l, '>') # |> + return emit(l, Tokens.pipe_r) + elseif accept(l, '|') # || + return emit(l, Tokens.lazy_or) + else + return emit_error(l) + end +end + + +function lex_digitorsign(l::Lexer) + # A digit is an int + longest, kind = position(l), Tokens.t_int + + accept(l, '-') + if accept_batch(l, isdigit) && position(l) > longest + longest, kind = position(l), Tokens.t_int + end + + seek2startpos!(l) + + accept(l, "+-") + if accept_batch(l, isdigit) && accept(l, '.') + accept_batch(l, isdigit) + if position(l) > longest + longest, kind = position(l), Tokens.t_float + end + if accept(l, "eE") + accept(l, "+-") + if accept_batch(l, isdigit) && position(l) > longest + longest, kind = position(l), Tokens.t_float + end + end + end + + seek2startpos!(l) + + # 0x[0-9A-Fa-f]+ + if accept(l, '0') && accept(l, 'x') + accept("o") + if accept_batch(ishex) && position(l) > longest + longest, kind = position(l), Tokens.APFloat + end + end + + seek(l, longest) + + if kind == Tokens.Error + return emit_error(l) + else + return emit(l, kind) + end + +end + + +# Lex a prim sign, a ''' has been consumed +function lex_prime(l) + @debug "lexing prime, current char is $(peekchar(l)), pos is $(position(l))" + while true + c = readchar(l) + if eof(c) + return emit_error(l, Tokens.EOF_in_char) + elseif c == '\\' + if eof(readchar(l)) + return emit_error(l, Tokens.EOF_in_char) + end + elseif c == '\'' + return emit(l, Tokens.t_char, skipfirst=1, skiplast=1) + end + end +end + + +# Lex all tokens that start with an @ character. +# An '@' char has been consumed +function lex_at(l::Lexer) + if accept_batch(l, is_identifier_char) + return emit(l, Tokens.macro_call) + else + return emit_error(l) + end +end + + +function lex_amper(l::Lexer) + if accept(l, '&') + return emit(l, Tokens.lazy_and) + elseif accept(l, "=") + return emit(l, Tokens.ass_ampr) + else + return emit(l, Tokens.amper) + end +end + +function lex_identifier(l::Lexer) + + accept_batch(l, is_identifier_char) + + str = extract_tokenstring(l) + kind = get(Tokens.KEYWORDS, str, Tokens.Identifier) + return emit(l, kind, str) +end + +# Parse a token starting with a quote. +# A '"' has been consumed +function lex_quote(l::Lexer) + if accept(l, '"') # "" + if accept(l, '"') # """ + if read_string(l, Tokens.t_string_triple) + emit(l, Tokens.t_string_triple) + else + emit_error(l, Tokens.EOF_in_string) + end + else # empty string + return emit(l, Tokens.t_string) + end + else # "?, ? != '"' + if read_string(l, Tokens.t_string) + emit(l, Tokens.t_string) + else + return emit_error(l, Tokens.EOF_in_string) + end + end +end + +# Parse a token starting with a quote. +# A '"' has been consumed +function lex_forwardslash(l::Lexer) + if accept(l, "/") # // + if accept(l, "=") # //= + return emit(l, Tokens.ass_fslash2) + else + return emit(l, Tokens.fslash2) + end + elseif accept(l, "=") # /= + return emit(l, Tokens.ass_fslash) + else + return emit(l, Tokens.fslash) + end +end + + +function lex_dot(l::Lexer) + if accept(l, '.') + if accept(l, '.') + return emit(l, Tokens.dot3) + else + return emit(l, Tokens.dot2) + end + else + return emit(l, Tokens.dot) + end +end + + +function lex_dollar(l::Lexer) + return emit(l, Tokens.dollar) +end + + +end # module \ No newline at end of file diff --git a/JuliaSyntax/src/token.jl b/JuliaSyntax/src/token.jl new file mode 100644 index 0000000000000..16d0254482a8c --- /dev/null +++ b/JuliaSyntax/src/token.jl @@ -0,0 +1,187 @@ +module Tokens + +using Compat +import Compat.String +import Base.eof + +export Token + +# https://github.com/llvm-mirror/llvm/blob/master/lib/AsmParser/LLToken.h +@enum(Kind, + # Markers + Nothing, + Eof, + Error, + Comment, + Whitespace, + Identifier, + + decl, + _colon, # : + prime, # ' + macro_call, # @ + + dot,# . + dot2, # .. + dot3, # ... + + begin_ops, + + lazy_or, # || + lazy_and, # && + pipe, #? | + amper, # & + conditional, # ? + perc, # % + fslash, # / + fslash2, # // + transpose, # .' + issubtype, # <: + dollar, # $ + + + comp_r, # > + comp_l, # < + comp_neq, # != + comp_neq2, # !== + + pipe_l, # |> + pipe_r, # <| + + + begin_assignments, + ass_equal, # = + ass_equal2, # == + ass_equal3, # === + ass_equal_r, # => + ass_bitshift_r, # >= + ass_bitshift_l, # <= + ass_bitshift_rr, # >>= + ass_bitshift_rrr, # >>>= + ass_bitshift_ll, # <<= + ass_bar, # |= + ass_ampr, # &= + ass_perc, # %= + ass_fslash, # /= + ass_fslash2, # //= + end_assignments, + + begin_bitshifts, + bitshift_ll, # << + bitshift_rr, # >> + bitshift_rrr, # >>> + end_bitshifts, + + end_ops, + + begin_keywords, + + kw_begin, kw_while, kw_if, kw_for, kw_try, kw_return, + kw_break, kw_continue, kw_function, + kw_macro, kw_quote, kw_let, kw_local, kw_global, kw_const, + kw_abstract, kw_typealias, kw_type, kw_bitstype, kw_immutable, + kw_do, kw_module, kw_baremodule, kw_using, kw_import, + kw_export, kw_importall, kw_end, kw_false, kw_true, + + end_keywords, + + comma, # = , + star, # * + lsquare, + rsquare, # [ ] + lbrace, + rbrace, # { } + lparen, + rparen, # ( ) + exclaim, # ! + bar, # | + semicolon, + + begin_types, + + t_int, t_float, t_string, t_string_triple, t_symbol, t_char, + + end_types +) + +@enum(TokenError, + no_err, + EOF_in_multicomment, + EOF_in_string, + EOF_in_char, + unknown +) + +TOKEN_ERROR_DESCRIPTION = Dict{TokenError, String}( +EOF_in_multicomment => "unterminated multi-line comment #= ... =#", +EOF_in_string => "unterminated string literal", +EOF_in_char => "unterminated character literal", +unknown => "unknown" +) + + +iskeyword(k::Kind) = begin_keywords < k < end_keywords +istype(k::Kind) = begin_types < k < end_types +isoperator(k::Kind) = begin_ops < k < end_ops + +const KEYWORDS = Dict{String, Kind}() + +function _add_kws() + offset = length("kw_") + for k in instances(Kind) + if iskeyword(k) + KEYWORDS[string(k)[offset+1:end]] = k + end + end +end +_add_kws() + +const KEYWORD_SYMBOLS = Vector{Symbol}() + + +function _add_kw_symbols() + resize!(KEYWORD_SYMBOLS, Int32(end_keywords) - Int32(begin_keywords) - 1) + for (k, v) in KEYWORDS + KEYWORD_SYMBOLS[Int32(v) - Int32(begin_keywords)] = Symbol(k) + end +end +_add_kw_symbols() +kwsym(k::Kind) = KEYWORD_SYMBOLS[Int32(k) - Int32(begin_keywords)] + +immutable Token + kind::Kind + # Offsets into a string or buffer + startpos::Tuple{Int, Int} # row, col where token starts /end, col is a string index + endpos::Tuple{Int, Int} + startbyte::Int64 # The byte where the token start in the buffer + endbyte::Int64 # The byte where the token ended in the buffer + val::Compat.UTF8String # The actual string of the token + token_error::TokenError +end + +function Token(kind::Kind, startposition::Tuple{Int, Int}, endposition::Tuple{Int, Int}, + startbyte::Int64, endbyte::Int64, val::String) + Token(kind, startposition, endposition, startbyte, endbyte, val, no_err) +end +Token() = Token(Nothing, (0,0), (0,0), 0, 0, "", unknown) + + +kind(t::Token) = t.kind +startpos(t::Token) = t.startpos +endpos(t::Token) = t.endpos +untokenize(t::Token) = t.val + +function Base.show(io::IO, t::Token) + start_r, start_c = startpos(t) + end_r, end_c = endpos(t) + str = kind(t) == Eof ? "" : untokenize(t) + print(io, start_r, ",", start_c, "-", + end_r, ",", end_c, ":", + "\t", kind(t), "\t", str) +end + +Base.print(io::IO, t::Token) = print(io, untokenize(t)) + +eof(t::Token) = t.kind == Eof + +end # module diff --git a/JuliaSyntax/src/utilities.jl b/JuliaSyntax/src/utilities.jl new file mode 100644 index 0000000000000..cb84b3edc5e21 --- /dev/null +++ b/JuliaSyntax/src/utilities.jl @@ -0,0 +1,153 @@ +#= +The code in here has been extracted from the JuliaParser.jl package +with license: + +The JuliaParser.jl package is licensed under the MIT "Expat" License: + +> Copyright (c) 2014: Jake Bolewski. +> +> Permission is hereby granted, free of charge, to any person obtaining +> a copy of this software and associated documentation files (the +> "Software"), to deal in the Software without restriction, including +> without limitation the rights to use, copy, modify, merge, publish, +> distribute, sublicense, and/or sell copies of the Software, and to +> permit persons to whom the Software is furnished to do so, subject to +> the following conditions: +> +> The above copyright notice and this permission notice shall be +> included in all copies or substantial portions of the Software. +> +> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +> EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +> MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +> IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +> CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +> TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +> SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +=# +const EOF_CHAR = convert(Char,typemax(UInt32)) + + +function is_cat_id_start(ch::Char, cat::Integer) + c = UInt32(ch) + return (cat == UTF8proc.UTF8PROC_CATEGORY_LU || cat == UTF8proc.UTF8PROC_CATEGORY_LL || + cat == UTF8proc.UTF8PROC_CATEGORY_LT || cat == UTF8proc.UTF8PROC_CATEGORY_LM || + cat == UTF8proc.UTF8PROC_CATEGORY_LO || cat == UTF8proc.UTF8PROC_CATEGORY_NL || + cat == UTF8proc.UTF8PROC_CATEGORY_SC || # allow currency symbols + cat == UTF8proc.UTF8PROC_CATEGORY_SO || # other symbols + + # math symbol (category Sm) whitelist + (c >= 0x2140 && c <= 0x2a1c && + ((c >= 0x2140 && c <= 0x2144) || # ⅀, ⅁, ⅂, ⅃, ⅄ + c == 0x223f || c == 0x22be || c == 0x22bf || # ∿, ⊾, ⊿ + c == 0x22a4 || c == 0x22a5 || # ⊤ ⊥ + (c >= 0x22ee && c <= 0x22f1) || # ⋮, ⋯, ⋰, ⋱ + + (c >= 0x2202 && c <= 0x2233 && + (c == 0x2202 || c == 0x2205 || c == 0x2206 || # ∂, ∅, ∆ + c == 0x2207 || c == 0x220e || c == 0x220f || # ∇, ∎, ∏ + c == 0x2210 || c == 0x2211 || # ∐, ∑ + c == 0x221e || c == 0x221f || # ∞, ∟ + c >= 0x222b)) || # ∫, ∬, ∭, ∮, ∯, ∰, ∱, ∲, ∳ + + (c >= 0x22c0 && c <= 0x22c3) || # N-ary big ops: ⋀, ⋁, ⋂, ⋃ + (c >= 0x25F8 && c <= 0x25ff) || # ◸, ◹, ◺, ◻, ◼, ◽, ◾, ◿ + + (c >= 0x266f && + (c == 0x266f || c == 0x27d8 || c == 0x27d9 || # ♯, ⟘, ⟙ + (c >= 0x27c0 && c <= 0x27c2) || # ⟀, ⟁, ⟂ + (c >= 0x29b0 && c <= 0x29b4) || # ⦰, ⦱, ⦲, ⦳, ⦴ + (c >= 0x2a00 && c <= 0x2a06) || # ⨀, ⨁, ⨂, ⨃, ⨄, ⨅, ⨆ + (c >= 0x2a09 && c <= 0x2a16) || # ⨉, ⨊, ⨋, ⨌, ⨍, ⨎, ⨏, ⨐, ⨑, ⨒, + # ⨓, ⨔, ⨕, ⨖ + c == 0x2a1b || c == 0x2a1c)))) || # ⨛, ⨜ + + (c >= 0x1d6c1 && # variants of \nabla and \partial + (c == 0x1d6c1 || c == 0x1d6db || + c == 0x1d6fb || c == 0x1d715 || + c == 0x1d735 || c == 0x1d74f || + c == 0x1d76f || c == 0x1d789 || + c == 0x1d7a9 || c == 0x1d7c3)) || + + # super- and subscript +-=() + (c >= 0x207a && c <= 0x207e) || + (c >= 0x208a && c <= 0x208e) || + + # angle symbols + (c >= 0x2220 && c <= 0x2222) || # ∠, ∡, ∢ + (c >= 0x299b && c <= 0x29af) || # ⦛, ⦜, ⦝, ⦞, ⦟, ⦠, ⦡, ⦢, ⦣, ⦤, ⦥, + # ⦦, ⦧, ⦨, ⦩, ⦪, ⦫, ⦬, ⦭, ⦮, ⦯ + # Other_ID_Start + c == 0x2118 || c == 0x212E || # ℘, ℮ + (c >= 0x309B && c <= 0x309C)) # katakana-hiragana sound marks +end + +function is_identifier_char(c::Char) + if ((c >= 'A' && c <= 'Z') || + (c >= 'a' && c <= 'z') || c == '_' || + (c >= '0' && c <= '9') || c == '!') + return true + elseif (UInt32(c) < 0xA1 || UInt32(c) > 0x10ffff) + return false + end + cat = UTF8proc.category_code(c) + is_cat_id_start(c, cat) && return true + if cat == UTF8proc.UTF8PROC_CATEGORY_MN || cat == UTF8proc.UTF8PROC_CATEGORY_MC || + cat == UTF8proc.UTF8PROC_CATEGORY_ND || cat == UTF8proc.UTF8PROC_CATEGORY_PC || + cat == UTF8proc.UTF8PROC_CATEGORY_SK || cat == UTF8proc.UTF8PROC_CATEGORY_ME || + cat == UTF8proc.UTF8PROC_CATEGORY_NO || + (0x2032 <= UInt32(c) <= 0x2034) || # primes + UInt32(c) == 0x0387 || UInt32(c) == 0x19da || + (0x1369 <= UInt32(c) <= 0x1371) + return true + end + return false +end + +function is_identifier_start_char(c::Char) + if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '_') + return true + elseif (UInt32(c) < 0xA1 || UInt32(c) > 0x10ffff) + return false + end + cat = UTF8proc.category_code(c) + return is_cat_id_start(c, cat) +end + + +function peekchar(io::IOBuffer) + if !io.readable || io.ptr > io.size + return EOF_CHAR + end + ch = convert(UInt8,io.data[io.ptr]) + if ch < 0x80 + return convert(Char,ch) + end + # mimic utf8.next function + trailing = Base.utf8_trailing[ch+1] + c::UInt32 = 0 + for j = 1:trailing + c += ch + c <<= 6 + ch = convert(UInt8,io.data[io.ptr+j]) + end + c += ch + c -= Base.utf8_offset[trailing+1] + return convert(Char,c) +end + +# this implementation is copied from Base +const _CHTMP = Array(Char, 1) + +peekchar(s::IOStream) = begin + if ccall(:ios_peekutf8, Int32, (Ptr{Void}, Ptr{Char}), s, _CHTMP) < 0 + return EOF_CHAR + end + return _CHTMP[1] +end + +eof(io::IO) = Base.eof(io) +eof(c) = is(c, EOF_CHAR) + +readchar(io::IO) = eof(io) ? EOF_CHAR : read(io, Char) +takechar(io::IO) = (readchar(io); io) From a829a98538cbd12f0a5599ab7be918e6fa2c244a Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Mon, 1 Aug 2016 00:49:16 +0200 Subject: [PATCH 0004/1109] dsad --- JuliaSyntax/README.md | 24 ++++++++++++++++++++++++ JuliaSyntax/REQUIRE | 1 + JuliaSyntax/src/Tokenize.jl | 10 +++++++++- JuliaSyntax/test/runtests.jl | 9 +++++++-- 4 files changed, 41 insertions(+), 3 deletions(-) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index 6c557541e2296..52e2ab13d6fb6 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -1 +1,25 @@ # Tokenize + +`Tokenize` is a Julia package that serves a similar purpose and API as the [tokenize module](https://docs.python.org/3/library/tokenize.html) in Python but for Julia code. This is to take a string or buffer containing Julia code, perform lexical analysis and return a stream of tokens. Whitespace and comments are also returned as tokens making a collection of tokens completely roundtrippable back to the original string. This is useful for making syntax highlighter or formatters for example. + + +### Tokenization + +The function `tokenize` is the main entrypoint for generating `Token`s. +It takes a string or a buffer and creates an iterator that will sequentially return the next `Token` until the end of string or buffer. + + +### `Token`s + +Each `Token` is represented by where it starts and ends, what string it contains and what type it is. + +```julia + +startpos(t)::Tuple{Int, Int} # row and column where the token start +endpos(t)::Tuple{Int, Int} # row and column where the token ends +startbyte(T)::Int64 # byte offset where the token start +endbyte(t)::Int64 # byte offset where the token ends +string(t)::String # the string representation of the token + +``` + diff --git a/JuliaSyntax/REQUIRE b/JuliaSyntax/REQUIRE index d5d646713dcf8..4d57bb70074b7 100644 --- a/JuliaSyntax/REQUIRE +++ b/JuliaSyntax/REQUIRE @@ -1 +1,2 @@ julia 0.4 +Compat \ No newline at end of file diff --git a/JuliaSyntax/src/Tokenize.jl b/JuliaSyntax/src/Tokenize.jl index 99470368629b4..3a408fe9cec3c 100644 --- a/JuliaSyntax/src/Tokenize.jl +++ b/JuliaSyntax/src/Tokenize.jl @@ -1,5 +1,13 @@ + __precompile__() + module Tokenize -# package code goes here +include("token.jl") +include("lexer.jl") + +import .Lexers: tokenize +import .Tokens: untokenize + +export tokenize, untokenize end # module diff --git a/JuliaSyntax/test/runtests.jl b/JuliaSyntax/test/runtests.jl index 45e445eaf0483..8914351a67802 100644 --- a/JuliaSyntax/test/runtests.jl +++ b/JuliaSyntax/test/runtests.jl @@ -1,5 +1,10 @@ -using Tokenize +using PimpMyREPL.Tokenize using Base.Test + + # write your own tests here -@test 1 == 1 + +tokvec(x) = collect(tokenize(x)) + +v = tokvec("function foo end") From 53d0c400c4bbec869a70ce91c20485706edd6e50 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Mon, 1 Aug 2016 19:20:55 +0200 Subject: [PATCH 0005/1109] fdsf --- JuliaSyntax/src/token.jl | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/JuliaSyntax/src/token.jl b/JuliaSyntax/src/token.jl index 16d0254482a8c..747058fd7c310 100644 --- a/JuliaSyntax/src/token.jl +++ b/JuliaSyntax/src/token.jl @@ -21,12 +21,14 @@ export Token prime, # ' macro_call, # @ - dot,# . - dot2, # .. - dot3, # ... + begin_ops, + dot,# . + dot2, # .. + dot3, # ... + lazy_or, # || lazy_and, # && pipe, #? | From d7689fa4b16401615bd24da683e7cf840983d350 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Tue, 2 Aug 2016 02:48:20 +0200 Subject: [PATCH 0006/1109] das --- JuliaSyntax/src/lexer.jl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index 162900a55e6d6..12fa7671b1384 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -11,12 +11,12 @@ import ..Tokens: Token, Kind, TokenError export tokenize -using Logging -@Logging.configure(level=WARNING) +# using Logging +# @Logging.configure(level=WARNING) -#macro debug(ex) -# return :() -#end +macro debug(ex) + return :() +end ishead(c::Char) = ('A' <= c <= 'z') || c == '$' || c == '-' || c == '_' || c == '.' @@ -591,4 +591,4 @@ function lex_dollar(l::Lexer) end -end # module \ No newline at end of file +end # module From 9a9ebb7f4291826526fa2a658d8cd08528b758fb Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Tue, 2 Aug 2016 18:10:21 +0200 Subject: [PATCH 0007/1109] bleh --- JuliaSyntax/README.md | 21 +- JuliaSyntax/benchmark/lex_base.jl | 59 ++ JuliaSyntax/src/Tokenize.jl | 2 +- JuliaSyntax/src/lexer.jl | 339 ++++---- JuliaSyntax/src/token.jl | 183 +---- JuliaSyntax/src/token_kinds.jl | 1192 +++++++++++++++++++++++++++++ JuliaSyntax/src/utilities.jl | 4 +- 7 files changed, 1472 insertions(+), 328 deletions(-) create mode 100644 JuliaSyntax/benchmark/lex_base.jl create mode 100644 JuliaSyntax/src/token_kinds.jl diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index 52e2ab13d6fb6..c03255c8a0224 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -1,25 +1,38 @@ # Tokenize -`Tokenize` is a Julia package that serves a similar purpose and API as the [tokenize module](https://docs.python.org/3/library/tokenize.html) in Python but for Julia code. This is to take a string or buffer containing Julia code, perform lexical analysis and return a stream of tokens. Whitespace and comments are also returned as tokens making a collection of tokens completely roundtrippable back to the original string. This is useful for making syntax highlighter or formatters for example. +`Tokenize` is a Julia package that serves a similar purpose and API as the [tokenize module](https://docs.python.org/3/library/tokenize.html) in Python but for Julia. This is to take a string or buffer containing Julia code, perform lexical analysis and return a stream of tokens. +The goals of this package is to be + +* Fast +* Round trippable, that is, from a stream of tokens the original string should be recoverable exactly. +* Non error throwing. Instead of throwing errors a certain error token is returned. + +### API ### Tokenization The function `tokenize` is the main entrypoint for generating `Token`s. It takes a string or a buffer and creates an iterator that will sequentially return the next `Token` until the end of string or buffer. +```jl + +e ### `Token`s Each `Token` is represented by where it starts and ends, what string it contains and what type it is. -```julia +The types are +```julia startpos(t)::Tuple{Int, Int} # row and column where the token start endpos(t)::Tuple{Int, Int} # row and column where the token ends startbyte(T)::Int64 # byte offset where the token start endbyte(t)::Int64 # byte offset where the token ends -string(t)::String # the string representation of the token - +untokenize(t)::String # the string representation of the token +kind(t)::Token.Kind # A +exactkind(t):: ``` + diff --git a/JuliaSyntax/benchmark/lex_base.jl b/JuliaSyntax/benchmark/lex_base.jl new file mode 100644 index 0000000000000..13b8eda6a0811 --- /dev/null +++ b/JuliaSyntax/benchmark/lex_base.jl @@ -0,0 +1,59 @@ +import Tokenize + +const BASEPATH = abspath(joinpath(JULIA_HOME, "..", "..")) + +tot_files = 0 +tot_time = 0.0 +tot_tokens = 0 +function testall(srcdir::AbstractString) + global tot_files, tot_time, tot_tokens + dirs, files = [], [] + + for fname in sort(readdir(srcdir)) + path = joinpath(srcdir, fname) + if isdir(path) + push!(dirs, path) + continue + end + _, ext = splitext(fname) + if ext == ".jl" + push!(files, path) + end + end + + if !isempty(files) + for jlpath in files + + fname = splitdir(jlpath)[end] + + buf = IOBuffer() + write(buf, open(readstring, jlpath)) + tot_files += 1 + tot_time += @elapsed tokens = collect(Tokenize.tokenize(buf)) + tot_tokens += length(tokens) + for token in tokens + if Tokenize.Tokens.kind(token) == Tokenize.Tokens.ERROR + show(token) + error("Error in file $jlpath, for token $token") + end + end + end + end + for dir in dirs + testall(dir) + end +end + + +if isdir(BASEPATH) && isdir(joinpath(BASEPATH, "base")) + testall(joinpath(BASEPATH, "examples")) + testall(joinpath(BASEPATH, "test")) + testall(joinpath(BASEPATH, "base")) +else + warn(""" +Could not find julia base sources in $BASEPATH, +perhaps you are using a Julia not built from source?""") +end + +print("Lexed ", tot_files, " files in ", @sprintf("%3.4f", tot_time), + " seconds with a total of ", tot_tokens, " tokens") diff --git a/JuliaSyntax/src/Tokenize.jl b/JuliaSyntax/src/Tokenize.jl index 3a408fe9cec3c..14d1a8e11d5e3 100644 --- a/JuliaSyntax/src/Tokenize.jl +++ b/JuliaSyntax/src/Tokenize.jl @@ -8,6 +8,6 @@ include("lexer.jl") import .Lexers: tokenize import .Tokens: untokenize -export tokenize, untokenize +export tokenize end # module diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index 12fa7671b1384..a01f3ade724db 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -7,7 +7,7 @@ using Compat import Compat.String import ..Tokens -import ..Tokens: Token, Kind, TokenError +import ..Tokens: Token, Kind, TokenError, UNICODE_OPS export tokenize @@ -18,9 +18,6 @@ macro debug(ex) return :() end - -ishead(c::Char) = ('A' <= c <= 'z') || c == '$' || c == '-' || c == '_' || c == '.' -istail(c::Char) = ishead(c) || isdigit(c) ishex(c::Char) = isdigit(c) || ('a' <= c <= 'f') iswhitespace(c::Char) = Base.UTF8proc.isspace(c) @@ -38,10 +35,11 @@ type Lexer{IO_t <: Union{IO, String}} current_pos::Int64 end -Lexer(io::Union{IO, String}) = Lexer(io, 1, 1, -1, 0, 1, 1, 1) -tokenize(x) = Lexer(x) +Lexer(io) = Lexer(io, 1, 1, -1, 0, 1, 1, 1) +# Iterator interface +tokenize(x) = Lexer(x) if VERSION > v"v0.5.0-" Base.iteratorsize(::Lexer) = Base.SizeUnknown() @@ -51,8 +49,8 @@ end Base.eltype(::Lexer) = Token -function Base.start(l::Lexer) - seekstart(l) +function Base.start{T}(l::Lexer{T}) + seekstart(l) l.token_startpos = 0 l.token_start_row = 1 l.token_start_col = 1 @@ -65,19 +63,14 @@ end function Base.next(l::Lexer, isdone) t = next_token(l) - return t, t.kind == Tokens.Eof + return t, t.kind == Tokens.ENDMARKER end Base.done(l::Lexer, isdone) = isdone function Base.show(io::IO, l::Lexer) - #print(io, "Token buffer:") - #print(io, extract_tokenstring(l)) - println(io, "Position: ", position(l)) - println(io, l.current_row, l.current_col) - #print(io, "\n# tokens read: ", length(tokens(l)), " n errors: ", n_errors) - #print(io, "\n", tokens(l)) + println(io, "Lexer at position: ", position(l)) end @@ -90,7 +83,8 @@ prevpos!(l::Lexer, i::Int64) = l.prevpos = i Base.seekstart{I <: IO}(l::Lexer{I}) = seekstart(l.io) Base.seekstart{I <: String}(l::Lexer{I}) = seek(l, 1) -seek2startpos!(l::Lexer) = seek(l, startpos(l)) +seek2startpos!{I <: IO}(l::Lexer{I}) = seek(l, startpos(l)) +seek2startpos!{I <: String}(l::Lexer{I}) = seek(l, startpos(l) + 1) push!(l::Lexer, t::Token) = push!(l.tokens, t) peekchar{I <: IO}(l::Lexer{I}) = peekchar(l.io) @@ -163,41 +157,38 @@ function accept_batch(l::Lexer, f) end function emit(l::Lexer, kind::Kind, str::String) - skipfirst, skiplast = 0,0 tok = Token(kind, (l.token_start_row, l.token_start_col), (l.current_row, l.current_col), - startpos(l) + skipfirst, position(l) - skiplast - 1, + startpos(l), position(l) - 1, str) @debug "emitted token: $tok:" ignore!(l) return tok end -function emit(l::Lexer, kind::Kind, err::TokenError=Tokens.unknown; skipfirst::Int = 0, skiplast::Int = 0) - skipfirst, skiplast = 0, 0 +function emit(l::Lexer, kind::Kind, err::TokenError=Tokens.UNKNOWN) str = extract_tokenstring(l) tok = Token(kind, (l.token_start_row, l.token_start_col), (l.current_row, l.current_col), - startpos(l) + skipfirst, position(l) - skiplast - 1, + startpos(l), position(l) - 1, str) @debug "emitted token: $tok:" ignore!(l) return tok end -function emit_error(l::Lexer, err::TokenError=Tokens.unknown) - return emit(l, Tokens.Error, err) +function emit_error(l::Lexer, err::TokenError=Tokens.UNKNOWN) + return emit(l, Tokens.ERROR, err) end +# TODO, just use String mby function extract_tokenstring{T}(l::Lexer{T}) isstr = T <: String cs = Char[] sizehint!(cs, position(l) - startpos(l)) curr_pos = position(l) seek2startpos!(l) - if isstr - seek(l, position(l) + 1) - end + while position(l) < curr_pos c = readchar(l) l.current_col += 1 @@ -211,16 +202,16 @@ function extract_tokenstring{T}(l::Lexer{T}) return str end - # We just consumed a " or a """ function read_string(l::Lexer, kind::Tokens.Kind) while true c = readchar(l) + show(c) if c == '\\' && eof(readchar(l)) return false end if c == '"' - if kind == Tokens.t_string + if kind == Tokens.STRING return true else if accept(l, "\"") && accept(l, "\"") @@ -228,6 +219,14 @@ function read_string(l::Lexer, kind::Tokens.Kind) end end elseif eof(c) + show(l.io) + + println("....") + + println(position(l)) + println(l.io.size) + println(c) + println("...") return false end end @@ -235,59 +234,52 @@ end function next_token(l::Lexer) - c = readchar(l) - @debug "startpos at $(l.token_startpos)" - - if eof(c); return emit(l, Tokens.Eof) + if eof(c); return emit(l, Tokens.ENDMARKER) elseif iswhitespace(c); return lex_whitespace(l) + elseif c == '['; return emit(l, Tokens.LSQUARE) + elseif c == ']'; return emit(l, Tokens.RSQUARE) + elseif c == '{'; return emit(l, Tokens.LBRACE) + elseif c == ';'; return emit(l, Tokens.SEMICOLON) + elseif c == '}'; return emit(l, Tokens.RBRACE) + elseif c == '('; return emit(l, Tokens.LPAREN) + elseif c == ')'; return emit(l, Tokens.RPAREN) + elseif c == ','; return emit(l, Tokens.COMMA) + elseif c == '*'; return emit(l, Tokens.STAR) + elseif c == '@'; return emit(l, Tokens.AT_SIGN) + elseif c == '?'; return emit(l, Tokens.CONDITIONAL) + elseif c == '$'; return emit(l, Tokens.EX_OR) + elseif c == '~'; return emit(l, Tokens.APPROX) + elseif c == '\\'; return emit(l, Tokens.BACKSLASH) elseif c == '#'; return lex_comment(l) elseif c == '='; return lex_equal(l) elseif c == '!'; return lex_exclaim(l) - elseif c == '['; return emit(l, Tokens.lsquare) - elseif c == ']'; return emit(l, Tokens.rsquare) - elseif c == '{'; return emit(l, Tokens.lbrace) - elseif c == ';'; return emit(l, Tokens.semicolon) - elseif c == '}'; return emit(l, Tokens.rbrace) - elseif c == '('; return emit(l, Tokens.lparen) - elseif c == ')'; return emit(l, Tokens.rparen) - elseif c == ','; return emit(l, Tokens.comma) - elseif c == '*'; return emit(l, Tokens.star) elseif c == '>'; return lex_greater(l) elseif c == '<'; return lex_less(l) elseif c == ':'; return lex_colon(l) elseif c == '|'; return lex_bar(l) - elseif c == '@'; return lex_at(l) elseif c == '&'; return lex_amper(l) - elseif c == '\''; return lex_prime(l) - elseif c == '?'; return emit(l, Tokens.conditional) + elseif c == '\'';return lex_prime(l) elseif c == '"'; return lex_quote(l); elseif c == '%'; return lex_percent(l); elseif c == '/'; return lex_forwardslash(l); elseif c == '.'; return lex_dot(l); - elseif isdigit(c) || c == '-' || c == '+' return lex_digitorsign(l) + elseif c == '+'; return lex_plus(l); + elseif c == '-'; return lex_minus(l); + elseif c == '`'; return lex_cmd(l); + elseif isdigit(c); return lex_digit(l) elseif is_identifier_start_char(c); return lex_identifier(l) + elseif (k = get(UNICODE_OPS, c, Tokens.ERROR)) != Tokens.ERROR return emit(l, k) else emit_error(l) end - - #= - if eof(c); return emit(l, Tokens.Eof) - elseif c == '$'; return lex_dollar(l); - elseif c == '"'; return lex_quote(l); - elseif c == ';'; return lex_comment(l) - elseif c == '!'; return lex_exclaim(l); - - else emit_error(l) - end - =# end # Lex whitespace, a whitespace char has been consumed function lex_whitespace(l::Lexer) accept_batch(l, iswhitespace) - return emit(l, Tokens.Whitespace) + return emit(l, Tokens.WHITESPACE) end function lex_comment(l::Lexer) @@ -296,7 +288,7 @@ function lex_comment(l::Lexer) c = readchar(l) if c == '\n' || eof(c) backup!(l) - return emit(l, Tokens.Comment, skipfirst = 1) + return emit(l, Tokens.COMMENT) end end else @@ -304,7 +296,7 @@ function lex_comment(l::Lexer) n_start, n_end = 1, 0 while true if eof(c) - return emit_error(l, Tokens.EOF_in_multicomment) + return emit_error(l, Tokens.EOF_MULTICOMMENT) end nc = readchar(l) if c == '#' && nc == '=' @@ -313,7 +305,7 @@ function lex_comment(l::Lexer) n_end += 1 end if n_start == n_end - return emit(l, Tokens.Comment, skipfirst = 2, skiplast = 2) + return emit(l, Tokens.COMMENT) end c = nc end @@ -325,27 +317,21 @@ function lex_greater(l::Lexer) if accept(l, '>') # >> if accept(l, '>') # >>> if accept(l, '=') # >>>= - return emit(l, Tokens.ass_bitshift_rrr) - elseif accept(l, iswhitespace) - return emit(l, Tokens.bitshift_rrr) + return emit(l, Tokens.UNSIGNED_BITSHIFT_EQ) else # >>>?, ? not a = - return emit_error(l) + return emit(l, Tokens.UNSIGNED_BITSHIFT) end else # >>? if accept(l, '=') # >>= - return emit(l, Tokens.ass_bitshift_rr) - elseif accept(l, iswhitespace) # '>> ' - return emit(l, Tokens.bitshift_rr) - else # '>>?', ? not =, >, ' ' - return emit_error(l) + return emit(l, Tokens.RBITSHIFT_EQ) + else accept(l, iswhitespace) # '>> ' + return emit(l, Tokens.RBITSHIFT) end end - elseif accept(l, '=') - return emit(l, Tokens.ass_bitshift_r) - elseif accept(l, iswhitespace) - return emit(l, Tokens.comp_r) # '> ' - else - return emit_error(l) + elseif accept(l, '=') # >= + return emit(l, Tokens.GREATER_EQ) + else # '>' + return emit(l, Tokens.GREATER) end end @@ -353,184 +339,171 @@ end function lex_less(l::Lexer) if accept(l, '<') # << if accept(l, '=') # <<= - return emit(l, Tokens.ass_bitshift_ll) - elseif accept(l, iswhitespace) # '<< ' - return emit(l, Tokens.bitshift_ll) + return emit(l, Tokens.LBITSHIFT_EQ) else # '< ' + return emit(l, Tokens.LESS) # '<' end end - - # Lex all tokens that start with an = character. # An '=' char has been consumed function lex_equal(l::Lexer) if accept(l, '=') # == - if accept(l, iswhitespace) - return emit(l, Tokens.ass_equal2) - elseif accept(l, '=') # === - if accept(l, iswhitespace) - emit(l, Tokens.ass_equal3) - else # ===?, ? != ' ' - emit_error(l) - end + if accept(l, '=') # === + emit(l, Tokens.EQEQEQ) + else + emit(l, Tokens.EQEQ) end elseif accept(l, '>') # => - emit(l, Tokens.ass_equal_r) + emit(l, Tokens.PAIR_ARROW) else - emit(l, Tokens.ass_equal) + emit(l, Tokens.EQ) end end # Lex a colon, a ':' has been consumed function lex_colon(l::Lexer) if accept(l, ':') # '::' - emit(l, Tokens.decl) - elseif accept(l, iswhitespace) # ': ' - emit(l, Tokens._colon) - elseif accept_batch(l, is_identifier_char) # :foo32 - emit(l, Tokens.t_symbol, skipfirst = 1) + emit(l, Tokens.DECLARATION) else - emit_error(l) + emit(l, Tokens.COLON) end end function lex_exclaim(l::Lexer) if accept(l, '=') # != - if accept(l, '=') - return emit(l, Tokens.comp_neq2) # !== + if accept(l, '=') # !== + return emit(l, Tokens.NOT_IS) else # != - return emit(l, Tokens.comp_neq) + return emit(l, Tokens.NOT_EQ) end else - return emit(l, Tokens.exclaim) + return emit(l, Tokens.NOT) end end function lex_percent(l::Lexer) if accept(l, '=') - return emit(l, Tokens.ass_perc) + return emit(l, Tokens.REM_EQ) else - return emit(l, Tokens.perc) + return emit(l, Tokens.REM) end end function lex_bar(l::Lexer) - if accept(l, iswhitespace) - emit(l, Tokens.pipe) # '| ' - elseif accept(l, '=') # |= - return emit(l, Tokens.ass_bar) + if accept(l, '=') # |= + return emit(l, Tokens.OR_EQ) elseif accept(l, '>') # |> - return emit(l, Tokens.pipe_r) + return emit(l, Tokens.RPIPE) elseif accept(l, '|') # || - return emit(l, Tokens.lazy_or) + return emit(l, Tokens.LAZY_OR) else - return emit_error(l) + emit(l, Tokens.OR) # '|' end end +function lex_plus(l::Lexer) + accept(l, '+') && emit(l, Tokens.PLUSPLUS) + accept(l, isdigit) && lex_digit(l) + return emit(l, Tokens.PLUS) +end -function lex_digitorsign(l::Lexer) - # A digit is an int - longest, kind = position(l), Tokens.t_int +function lex_minus(l::Lexer) + accept(l, '-') && return emit_error(l) # "--" is an invalid operator + accept(l, isdigit) && return lex_digit(l) + return emit(l, Tokens.MINUS) +end - accept(l, '-') - if accept_batch(l, isdigit) && position(l) > longest - longest, kind = position(l), Tokens.t_int - end - seek2startpos!(l) +# A digit has been consumed +function lex_digit(l::Lexer) + backup!(l) + longest, kind = position(l), Tokens.ERROR - accept(l, "+-") - if accept_batch(l, isdigit) && accept(l, '.') + accept_batch(l, isdigit) + + if accept(l, '.') + if peekchar(l) == '.' # 43.. -> [43, ..] + backup!(l) + return emit(l, Tokens.INTEGER) + end accept_batch(l, isdigit) - if position(l) > longest - longest, kind = position(l), Tokens.t_float + if accept(l, '.') # 3213.313.3123 is error + return emit_error(l) + elseif position(l) > longest # 323213.3232 candidate + longest, kind = position(l), Tokens.FLOAT end - if accept(l, "eE") + if accept(l, "eE") # 1313.[0-9]*e accept(l, "+-") if accept_batch(l, isdigit) && position(l) > longest - longest, kind = position(l), Tokens.t_float + longest, kind = position(l), Tokens.FLOAT end end + elseif position(l) > longest + longest, kind = position(l), Tokens.INTEGER end seek2startpos!(l) # 0x[0-9A-Fa-f]+ if accept(l, '0') && accept(l, 'x') - accept("o") - if accept_batch(ishex) && position(l) > longest - longest, kind = position(l), Tokens.APFloat + accept(l, "o") + if accept_batch(l, ishex) && position(l) > longest + longest, kind = position(l), Tokens.INTEGER end end seek(l, longest) - if kind == Tokens.Error - return emit_error(l) - else - return emit(l, kind) - end - + return emit(l, kind) end - # Lex a prim sign, a ''' has been consumed function lex_prime(l) - @debug "lexing prime, current char is $(peekchar(l)), pos is $(position(l))" + return emit(l, Tokens.PRIME) +end +# This does not work because a ' could be a ctranspose function call +# and we need to parse the expression for this to work. +#= +function lex_prime(l) while true c = readchar(l) if eof(c) - return emit_error(l, Tokens.EOF_in_char) + return emit_error(l, Tokens.EOF_CHAR) elseif c == '\\' if eof(readchar(l)) - return emit_error(l, Tokens.EOF_in_char) + return emit_error(l, Tokens.EOF_CHAR) end elseif c == '\'' - return emit(l, Tokens.t_char, skipfirst=1, skiplast=1) + return emit(l, Tokens.CHAR) end end end - - -# Lex all tokens that start with an @ character. -# An '@' char has been consumed -function lex_at(l::Lexer) - if accept_batch(l, is_identifier_char) - return emit(l, Tokens.macro_call) - else - return emit_error(l) - end -end - +=# function lex_amper(l::Lexer) if accept(l, '&') - return emit(l, Tokens.lazy_and) + return emit(l, Tokens.LAZY_AND) elseif accept(l, "=") - return emit(l, Tokens.ass_ampr) + return emit(l, Tokens.AND_EQ) else - return emit(l, Tokens.amper) + return emit(l, Tokens.AND) end end function lex_identifier(l::Lexer) - accept_batch(l, is_identifier_char) - str = extract_tokenstring(l) - kind = get(Tokens.KEYWORDS, str, Tokens.Identifier) + kind = get(Tokens.KEYWORDS, str, Tokens.IDENTIFIER) return emit(l, kind, str) end @@ -539,19 +512,19 @@ end function lex_quote(l::Lexer) if accept(l, '"') # "" if accept(l, '"') # """ - if read_string(l, Tokens.t_string_triple) - emit(l, Tokens.t_string_triple) + if read_string(l, Tokens.TRIPLE_STRING) + emit(l, Tokens.TRIPLE_STRING) else - emit_error(l, Tokens.EOF_in_string) + emit_error(l, Tokens.EOF_STRING) end else # empty string - return emit(l, Tokens.t_string) + return emit(l, Tokens.STRING) end else # "?, ? != '"' - if read_string(l, Tokens.t_string) - emit(l, Tokens.t_string) + if read_string(l, Tokens.STRING) + emit(l, Tokens.STRING) else - return emit_error(l, Tokens.EOF_in_string) + return emit_error(l, Tokens.EOF_STRING) end end end @@ -561,34 +534,40 @@ end function lex_forwardslash(l::Lexer) if accept(l, "/") # // if accept(l, "=") # //= - return emit(l, Tokens.ass_fslash2) + return emit(l, Tokens.FWDFWD_SLASH_EQ) else - return emit(l, Tokens.fslash2) + return emit(l, Tokens.FWDFWD_SLASH) end elseif accept(l, "=") # /= - return emit(l, Tokens.ass_fslash) + return emit(l, Tokens.FWD_SLASH_EQ) else - return emit(l, Tokens.fslash) + return emit(l, Tokens.FWD_SLASH) end end - +# TODO .op function lex_dot(l::Lexer) if accept(l, '.') if accept(l, '.') - return emit(l, Tokens.dot3) + return emit(l, Tokens.DDDOT) else - return emit(l, Tokens.dot2) + return emit(l, Tokens.DDOT) end else - return emit(l, Tokens.dot) + return emit(l, Tokens.DOT) end end - -function lex_dollar(l::Lexer) - return emit(l, Tokens.dollar) +# A ` has been consumed, find the next one +function lex_cmd(l::Lexer) + while true + c = readchar(l) + if c == '`' + return emit(l, Tokens.CMD) + elseif eof(c) + return emit_error(l, Tokens.EOF_CMD) + end + end end - end # module diff --git a/JuliaSyntax/src/token.jl b/JuliaSyntax/src/token.jl index 747058fd7c310..e7a26f77c72f5 100644 --- a/JuliaSyntax/src/token.jl +++ b/JuliaSyntax/src/token.jl @@ -6,169 +6,67 @@ import Base.eof export Token -# https://github.com/llvm-mirror/llvm/blob/master/lib/AsmParser/LLToken.h -@enum(Kind, - # Markers - Nothing, - Eof, - Error, - Comment, - Whitespace, - Identifier, - - decl, - _colon, # : - prime, # ' - macro_call, # @ - - - - begin_ops, - - dot,# . - dot2, # .. - dot3, # ... - - lazy_or, # || - lazy_and, # && - pipe, #? | - amper, # & - conditional, # ? - perc, # % - fslash, # / - fslash2, # // - transpose, # .' - issubtype, # <: - dollar, # $ - - - comp_r, # > - comp_l, # < - comp_neq, # != - comp_neq2, # !== - - pipe_l, # |> - pipe_r, # <| - - - begin_assignments, - ass_equal, # = - ass_equal2, # == - ass_equal3, # === - ass_equal_r, # => - ass_bitshift_r, # >= - ass_bitshift_l, # <= - ass_bitshift_rr, # >>= - ass_bitshift_rrr, # >>>= - ass_bitshift_ll, # <<= - ass_bar, # |= - ass_ampr, # &= - ass_perc, # %= - ass_fslash, # /= - ass_fslash2, # //= - end_assignments, - - begin_bitshifts, - bitshift_ll, # << - bitshift_rr, # >> - bitshift_rrr, # >>> - end_bitshifts, - - end_ops, - - begin_keywords, - - kw_begin, kw_while, kw_if, kw_for, kw_try, kw_return, - kw_break, kw_continue, kw_function, - kw_macro, kw_quote, kw_let, kw_local, kw_global, kw_const, - kw_abstract, kw_typealias, kw_type, kw_bitstype, kw_immutable, - kw_do, kw_module, kw_baremodule, kw_using, kw_import, - kw_export, kw_importall, kw_end, kw_false, kw_true, - - end_keywords, - - comma, # = , - star, # * - lsquare, - rsquare, # [ ] - lbrace, - rbrace, # { } - lparen, - rparen, # ( ) - exclaim, # ! - bar, # | - semicolon, - - begin_types, - - t_int, t_float, t_string, t_string_triple, t_symbol, t_char, - - end_types -) - -@enum(TokenError, - no_err, - EOF_in_multicomment, - EOF_in_string, - EOF_in_char, - unknown -) - -TOKEN_ERROR_DESCRIPTION = Dict{TokenError, String}( -EOF_in_multicomment => "unterminated multi-line comment #= ... =#", -EOF_in_string => "unterminated string literal", -EOF_in_char => "unterminated character literal", -unknown => "unknown" -) +include("token_kinds.jl") iskeyword(k::Kind) = begin_keywords < k < end_keywords -istype(k::Kind) = begin_types < k < end_types +isliteral(k::Kind) = begin_literal < k < end_literal isoperator(k::Kind) = begin_ops < k < end_ops +# Create string => keyword kind const KEYWORDS = Dict{String, Kind}() function _add_kws() - offset = length("kw_") for k in instances(Kind) if iskeyword(k) - KEYWORDS[string(k)[offset+1:end]] = k + KEYWORDS[lowercase(string(k))] = k end end end _add_kws() -const KEYWORD_SYMBOLS = Vector{Symbol}() - +# TODO: more +@enum(TokenError, + NO_ERR, + EOF_MULTICOMMENT, + EOF_STRING, + EOF_CHAR, + EOF_CMD, + UNKNOWN, +) -function _add_kw_symbols() - resize!(KEYWORD_SYMBOLS, Int32(end_keywords) - Int32(begin_keywords) - 1) - for (k, v) in KEYWORDS - KEYWORD_SYMBOLS[Int32(v) - Int32(begin_keywords)] = Symbol(k) - end -end -_add_kw_symbols() -kwsym(k::Kind) = KEYWORD_SYMBOLS[Int32(k) - Int32(begin_keywords)] +# Error kind => description +TOKEN_ERROR_DESCRIPTION = Dict{TokenError, String}( +EOF_MULTICOMMENT => "unterminated multi-line comment #= ... =#", +EOF_STRING => "unterminated string literal", +EOF_CHAR => "unterminated character literal", +EOF_CMD => "unterminated cmd literal", +UNKNOWN => "unknown", +) immutable Token - kind::Kind - # Offsets into a string or buffer - startpos::Tuple{Int, Int} # row, col where token starts /end, col is a string index - endpos::Tuple{Int, Int} - startbyte::Int64 # The byte where the token start in the buffer - endbyte::Int64 # The byte where the token ended in the buffer - val::Compat.UTF8String # The actual string of the token - token_error::TokenError + kind::Kind + # Offsets into a string or buffer + startpos::Tuple{Int, Int} # row, col where token starts /end, col is a string index + endpos::Tuple{Int, Int} + startbyte::Int64 # The byte where the token start in the buffer + endbyte::Int64 # The byte where the token ended in the buffer + val::Compat.UTF8String # The actual string of the token + token_error::TokenError end function Token(kind::Kind, startposition::Tuple{Int, Int}, endposition::Tuple{Int, Int}, - startbyte::Int64, endbyte::Int64, val::String) - Token(kind, startposition, endposition, startbyte, endbyte, val, no_err) + startbyte::Int64, endbyte::Int64, val::String) + Token(kind, startposition, endposition, startbyte, endbyte, val, NO_ERR) end Token() = Token(Nothing, (0,0), (0,0), 0, 0, "", unknown) - -kind(t::Token) = t.kind +function kind(t::Token) + isoperator(t.kind) && return OP + iskeyword(t.kind) && return KEYWORD + return t.kind +end +exactkind(t::Token) = t.kind startpos(t::Token) = t.startpos endpos(t::Token) = t.endpos untokenize(t::Token) = t.val @@ -176,10 +74,11 @@ untokenize(t::Token) = t.val function Base.show(io::IO, t::Token) start_r, start_c = startpos(t) end_r, end_c = endpos(t) - str = kind(t) == Eof ? "" : untokenize(t) - print(io, start_r, ",", start_c, "-", + str = kind(t) == ENDMARKER ? "" : untokenize(t) + print(io, start_r, ",", start_c, "-", end_r, ",", end_c, ":", - "\t", kind(t), "\t", str) + " ", kind(t), "\t") + show(io, str) end Base.print(io::IO, t::Token) = print(io, untokenize(t)) diff --git a/JuliaSyntax/src/token_kinds.jl b/JuliaSyntax/src/token_kinds.jl new file mode 100644 index 0000000000000..00fcaf80a9761 --- /dev/null +++ b/JuliaSyntax/src/token_kinds.jl @@ -0,0 +1,1192 @@ +@enum(Kind, + ENDMARKER, # EOF + ERROR, + COMMENT, # aadsdsa, #= fdsf #= + WHITESPACE, # '\n \t' + IDENTIFIER, # foo, Σxx + AT_SIGN, # @ + COMMA, #, + SEMICOLON, # ; + + begin_keywords, + KEYWORD, # general + BEGIN, WHILE, IF, FOR, TRY, RETURN, BREAK, CONTINUE, + FUNCTION, GLOBAL, MACRO, QUOTE, LET, LOCAL, + CONST, ABSTRACT, TYPE, BITSTYPE, IMMUTABLE, + DO, MODULE, BAREMODULE, USING, IMPORT, EXPORT, IMPORTALL, + END, FALSE, TRUE, + end_keywords, + + begin_literal, + LITERAL, # general + INTEGER, # 4 + FLOAT, # 3.5, 3.7e+3 + STRING, # "foo" + TRIPLE_STRING, # """ foo \n """ + CHAR, # 'a' + CMD, # `cmd ...` + end_literal, + + begin_delimiters, + LSQUARE, # [ + RSQUARE, # [ + LBRACE, # { + RBRACE, # } + LPAREN, # ( + RPAREN, # ) + end_delimiters, + + begin_ops, + OP, # general + STAR, # * + PLUS, # + + MINUS, # - + PLUSPLUS, # ++ + BACKSLASH, # \ + NOT, # ! + APPROX, # ~ + + DECLARATION, # :: + COLON, # : + PRIME, # ' + + DOT,# . + DDOT, # .. + DDDOT, # ... + + LAZY_OR, # || + LAZY_AND, # && + OR, # | + AND, # & + CONDITIONAL, # ? + REM, # % + FWD_SLASH, # / + FWDFWD_SLASH, # // + TRANSPOSE, # .' + ISSUBTYPE, # <: + EX_OR, # $ + + GREATER, # > + LESS, # < + NOT_EQ, # != + NOT_IS, # !== + + LPIPE, # |> + RPIPE, # <| + + begin_bitshifts, + LBITSHIFT, # << + RBITSHIFT, # >> + UNSIGNED_BITSHIFT, # >>> + end_bitshifts, + + begin_assignments, + EQ, # = + EQEQ, # == + EQEQEQ, # === + PAIR_ARROW, # => + GREATER_EQ, # >= + LESS_EQ, # <= + RBITSHIFT_EQ, # >>= + UNSIGNED_BITSHIFT_EQ, # >>>= + LBITSHIFT_EQ, # <<= + OR_EQ, # |= + AND_EQ, # &= + REM_EQ, # %= + FWD_SLASH_EQ, # /= + FWDFWD_SLASH_EQ, # //= + end_assignments, + + begin_unicode_ops, + DIVISION_SIGN, # ÷ + NOT_SIGN, # ¬ + SQUARE_ROOT, # √ + CUBE_ROOT, # ∛ + QUAD_ROOT, # ∜ + LEFTWARDS_ARROW, # ← + RIGHTWARDS_ARROW, # → + LEFT_RIGHT_ARROW, # ↔ + LEFTWARDS_ARROW_WITH_STROKE, # ↚ + RIGHTWARDS_ARROW_WITH_STROKE, # ↛ + RIGHTWARDS_TWO_HEADED_ARROW, # ↠ + RIGHTWARDS_ARROW_WITH_TAIL, # ↣ + RIGHTWARDS_ARROW_FROM_BAR, # ↦ + LEFT_RIGHT_ARROW_WITH_STROKE, # ↮ + LEFT_RIGHT_DOUBLE_ARROW_WITH_STROKE, # ⇎ + RIGHTWARDS_DOUBLE_ARROW_WITH_STROKE, # ⇏ + RIGHTWARDS_DOUBLE_ARROW, # ⇒ + LEFT_RIGHT_DOUBLE_ARROW, # ⇔ + RIGHT_ARROW_WITH_SMALL_CIRCLE, # ⇴ + THREE_RIGHTWARDS_ARROWS, # ⇶ + LEFTWARDS_ARROW_WITH_VERTICAL_STROKE, # ⇷ + RIGHTWARDS_ARROW_WITH_VERTICAL_STROKE, # ⇸ + LEFT_RIGHT_ARROW_WITH_VERTICAL_STROKE, # ⇹ + LEFTWARDS_ARROW_WITH_DOUBLE_VERTICAL_STROKE, # ⇺ + RIGHTWARDS_ARROW_WITH_DOUBLE_VERTICAL_STROKE, # ⇻ + LEFT_RIGHT_ARROW_WITH_DOUBLE_VERTICAL_STROKE, # ⇼ + LEFTWARDS_OPEN_HEADED_ARROW, # ⇽ + RIGHTWARDS_OPEN_HEADED_ARROW, # ⇾ + LEFT_RIGHT_OPEN_HEADED_ARROW, # ⇿ + LONG_LEFTWARDS_ARROW, # ⟵ + LONG_RIGHTWARDS_ARROW, # ⟶ + LONG_LEFT_RIGHT_ARROW, # ⟷ + LONG_RIGHTWARDS_DOUBLE_ARROW, # ⟹ + LONG_LEFT_RIGHT_DOUBLE_ARROW, # ⟺ + LONG_LEFTWARDS_ARROW_FROM_BAR, # ⟻ + LONG_RIGHTWARDS_ARROW_FROM_BAR, # ⟼ + LONG_LEFTWARDS_DOUBLE_ARROW_FROM_BAR, # ⟽ + LONG_RIGHTWARDS_DOUBLE_ARROW_FROM_BAR, # ⟾ + LONG_RIGHTWARDS_SQUIGGLE_ARROW, # ⟿ + RIGHTWARDS_TWO_HEADED_ARROW_WITH_VERTICAL_STROKE, # ⤀ + RIGHTWARDS_TWO_HEADED_ARROW_WITH_DOUBLE_VERTICAL_STROKE, # ⤁ + LEFTWARDS_DOUBLE_ARROW_WITH_VERTICAL_STROKE, # ⤂ + RIGHTWARDS_DOUBLE_ARROW_WITH_VERTICAL_STROKE, # ⤃ + LEFT_RIGHT_DOUBLE_ARROW_WITH_VERTICAL_STROKE, # ⤄ + RIGHTWARDS_TWO_HEADED_ARROW_FROM_BAR, # ⤅ + LEFTWARDS_DOUBLE_ARROW_FROM_BAR, # ⤆ + RIGHTWARDS_DOUBLE_ARROW_FROM_BAR, # ⤇ + LEFTWARDS_DOUBLE_DASH_ARROW, # ⤌ + RIGHTWARDS_DOUBLE_DASH_ARROW, # ⤍ + LEFTWARDS_TRIPLE_DASH_ARROW, # ⤎ + RIGHTWARDS_TRIPLE_DASH_ARROW, # ⤏ + RIGHTWARDS_TWO_HEADED_TRIPLE_DASH_ARROW, # ⤐ + RIGHTWARDS_ARROW_WITH_DOTTED_STEM, # ⤑ + RIGHTWARDS_ARROW_WITH_TAIL_WITH_VERTICAL_STROKE, # ⤔ + RIGHTWARDS_ARROW_WITH_TAIL_WITH_DOUBLE_VERTICAL_STROKE, # ⤕ + RIGHTWARDS_TWO_HEADED_ARROW_WITH_TAIL, # ⤖ + RIGHTWARDS_TWO_HEADED_ARROW_WITH_TAIL_WITH_VERTICAL_STROKE, # ⤗ + RIGHTWARDS_TWO_HEADED_ARROW_WITH_TAIL_WITH_DOUBLE_VERTICAL_STROKE, # ⤘ + LEFTWARDS_ARROW_TO_BLACK_DIAMOND, # ⤝ + RIGHTWARDS_ARROW_TO_BLACK_DIAMOND, # ⤞ + LEFTWARDS_ARROW_FROM_BAR_TO_BLACK_DIAMOND, # ⤟ + RIGHTWARDS_ARROW_FROM_BAR_TO_BLACK_DIAMOND, # ⤠ + SHORT_RIGHTWARDS_ARROW_ABOVE_LEFTWARDS_ARROW, # ⥄ + RIGHTWARDS_ARROW_WITH_PLUS_BELOW, # ⥅ + LEFTWARDS_ARROW_WITH_PLUS_BELOW, # ⥆ + RIGHTWARDS_ARROW_THROUGH_X, # ⥇ + LEFT_RIGHT_ARROW_THROUGH_SMALL_CIRCLE, # ⥈ + LEFT_BARB_UP_RIGHT_BARB_DOWN_HARPOON, # ⥊ + LEFT_BARB_DOWN_RIGHT_BARB_UP_HARPOON, # ⥋ + LEFT_BARB_UP_RIGHT_BARB_UP_HARPOON, # ⥎ + LEFT_BARB_DOWN_RIGHT_BARB_DOWN_HARPOON, # ⥐ + LEFTWARDS_HARPOON_WITH_BARB_UP_TO_BAR, # ⥒ + RIGHTWARDS_HARPOON_WITH_BARB_UP_TO_BAR, # ⥓ + LEFTWARDS_HARPOON_WITH_BARB_DOWN_TO_BAR, # ⥖ + RIGHTWARDS_HARPOON_WITH_BARB_DOWN_TO_BAR, # ⥗ + LEFTWARDS_HARPOON_WITH_BARB_UP_FROM_BAR, # ⥚ + RIGHTWARDS_HARPOON_WITH_BARB_UP_FROM_BAR, # ⥛ + LEFTWARDS_HARPOON_WITH_BARB_DOWN_FROM_BAR, # ⥞ + RIGHTWARDS_HARPOON_WITH_BARB_DOWN_FROM_BAR, # ⥟ + LEFTWARDS_HARPOON_WITH_BARB_UP_ABOVE_LEFTWARDS_HARPOON_WITH_BARB_DOWN, # ⥢ + RIGHTWARDS_HARPOON_WITH_BARB_UP_ABOVE_RIGHTWARDS_HARPOON_WITH_BARB_DOWN, # ⥤ + LEFTWARDS_HARPOON_WITH_BARB_UP_ABOVE_RIGHTWARDS_HARPOON_WITH_BARB_UP, # ⥦ + LEFTWARDS_HARPOON_WITH_BARB_DOWN_ABOVE_RIGHTWARDS_HARPOON_WITH_BARB_DOWN, # ⥧ + RIGHTWARDS_HARPOON_WITH_BARB_UP_ABOVE_LEFTWARDS_HARPOON_WITH_BARB_UP, # ⥨ + RIGHTWARDS_HARPOON_WITH_BARB_DOWN_ABOVE_LEFTWARDS_HARPOON_WITH_BARB_DOWN, # ⥩ + LEFTWARDS_HARPOON_WITH_BARB_UP_ABOVE_LONG_DASH, # ⥪ + LEFTWARDS_HARPOON_WITH_BARB_DOWN_BELOW_LONG_DASH, # ⥫ + RIGHTWARDS_HARPOON_WITH_BARB_UP_ABOVE_LONG_DASH, # ⥬ + RIGHTWARDS_HARPOON_WITH_BARB_DOWN_BELOW_LONG_DASH, # ⥭ + RIGHT_DOUBLE_ARROW_WITH_ROUNDED_HEAD, # ⥰ + RULE_DELAYED, # ⧴ + THREE_LEFTWARDS_ARROWS, # ⬱ + LEFT_ARROW_WITH_SMALL_CIRCLE, # ⬰ + LEFT_ARROW_WITH_CIRCLED_PLUS, # ⬲ + LONG_LEFTWARDS_SQUIGGLE_ARROW, # ⬳ + LEFTWARDS_TWO_HEADED_ARROW_WITH_VERTICAL_STROKE, # ⬴ + LEFTWARDS_TWO_HEADED_ARROW_WITH_DOUBLE_VERTICAL_STROKE, # ⬵ + LEFTWARDS_TWO_HEADED_ARROW_FROM_BAR, # ⬶ + LEFTWARDS_TWO_HEADED_TRIPLE_DASH_ARROW, # ⬷ + LEFTWARDS_ARROW_WITH_DOTTED_STEM, # ⬸ + LEFTWARDS_ARROW_WITH_TAIL_WITH_VERTICAL_STROKE, # ⬹ + LEFTWARDS_ARROW_WITH_TAIL_WITH_DOUBLE_VERTICAL_STROKE, # ⬺ + LEFTWARDS_TWO_HEADED_ARROW_WITH_TAIL, # ⬻ + LEFTWARDS_TWO_HEADED_ARROW_WITH_TAIL_WITH_VERTICAL_STROKE, # ⬼ + LEFTWARDS_TWO_HEADED_ARROW_WITH_TAIL_WITH_DOUBLE_VERTICAL_STROKE, # ⬽ + LEFTWARDS_ARROW_THROUGH_X, # ⬾ + WAVE_ARROW_POINTING_DIRECTLY_LEFT, # ⬿ + EQUALS_SIGN_ABOVE_LEFTWARDS_ARROW, # ⭀ + REVERSE_TILDE_OPERATOR_ABOVE_LEFTWARDS_ARROW, # ⭁ + LEFTWARDS_ARROW_ABOVE_REVERSE_ALMOST_EQUAL_TO, # ⭂ + RIGHTWARDS_ARROW_THROUGH_GREATER_THAN, # ⭃ + RIGHTWARDS_ARROW_THROUGH_SUPERSET, # ⭄ + REVERSE_TILDE_OPERATOR_ABOVE_RIGHTWARDS_ARROW, # ⭇ + RIGHTWARDS_ARROW_ABOVE_REVERSE_ALMOST_EQUAL_TO, # ⭈ + TILDE_OPERATOR_ABOVE_LEFTWARDS_ARROW, # ⭉ + LEFTWARDS_ARROW_ABOVE_ALMOST_EQUAL_TO, # ⭊ + LEFTWARDS_ARROW_ABOVE_REVERSE_TILDE_OPERATOR, # ⭋ + RIGHTWARDS_ARROW_ABOVE_REVERSE_TILDE_OPERATOR, # ⭌ + HALFWIDTH_LEFTWARDS_ARROW, # ← + HALFWIDTH_RIGHTWARDS_ARROW, # → + GREATER_THAN_OR_EQUAL_TO, # ≥ + LESS_THAN_OR_EQUAL_TO, # ≤ + IDENTICAL_TO, # ≡ + NOT_EQUAL_TO, # ≠ + NOT_IDENTICAL_TO, # ≢ + ELEMENT_OF, # ∈ + NOT_AN_ELEMENT_OF, # ∉ + CONTAINS_AS_MEMBER, # ∋ + DOES_NOT_CONTAIN_AS_MEMBER, # ∌ + SUBSET_OF_OR_EQUAL_TO, # ⊆ + NEITHER_A_SUBSET_OF_NOR_EQUAL_TO, # ⊈ + SUBSET_OF, # ⊂ + NOT_A_SUBSET_OF, # ⊄ + SUBSET_OF_WITH_NOT_EQUAL_TO, # ⊊ + PROPORTIONAL_TO, # ∝ + SMALL_ELEMENT_OF, # ∊ + SMALL_CONTAINS_AS_MEMBER, # ∍ + PARALLEL_TO, # ∥ + NOT_PARALLEL_TO, # ∦ + PROPORTION, # ∷ + GEOMETRIC_PROPORTION, # ∺ + HOMOTHETIC, # ∻ + REVERSED_TILDE, # ∽ + INVERTED_LAZY_S, # ∾ + NOT_TILDE, # ≁ + ASYMPTOTICALLY_EQUAL_TO, # ≃ + NOT_ASYMPTOTICALLY_EQUAL_TO, # ≄ + APPROXIMATELY_EQUAL_TO, # ≅ + APPROXIMATELY_BUT_NOT_ACTUALLY_EQUAL_TO, # ≆ + NEITHER_APPROXIMATELY_NOR_ACTUALLY_EQUAL_TO, # ≇ + ALMOST_EQUAL_TO, # ≈ + NOT_ALMOST_EQUAL_TO, # ≉ + ALMOST_EQUAL_OR_EQUAL_TO, # ≊ + TRIPLE_TILDE, # ≋ + ALL_EQUAL_TO, # ≌ + EQUIVALENT_TO, # ≍ + GEOMETRICALLY_EQUIVALENT_TO, # ≎ + APPROACHES_THE_LIMIT, # ≐ + GEOMETRICALLY_EQUAL_TO, # ≑ + APPROXIMATELY_EQUAL_TO_OR_THE_IMAGE_OF, # ≒ + IMAGE_OF_OR_APPROXIMATELY_EQUAL_TO, # ≓ + COLON_EQUALS, # ≔ + EQUALS_COLON, # ≕ + RING_IN_EQUAL_TO, # ≖ + RING_EQUAL_TO, # ≗ + CORRESPONDS_TO, # ≘ + ESTIMATES, # ≙ + EQUIANGULAR_TO, # ≚ + STAR_EQUALS, # ≛ + DELTA_EQUAL_TO, # ≜ + EQUAL_TO_BY_DEFINITION, # ≝ + MEASURED_BY, # ≞ + QUESTIONED_EQUAL_TO, # ≟ + STRICTLY_EQUIVALENT_TO, # ≣ + LESS_THAN_OVER_EQUAL_TO, # ≦ + GREATER_THAN_OVER_EQUAL_TO, # ≧ + LESS_THAN_BUT_NOT_EQUAL_TO, # ≨ + GREATER_THAN_BUT_NOT_EQUAL_TO, # ≩ + MUCH_LESS_THAN, # ≪ + MUCH_GREATER_THAN, # ≫ + BETWEEN, # ≬ + NOT_EQUIVALENT_TO, # ≭ + NOT_LESS_THAN, # ≮ + NOT_GREATER_THAN, # ≯ + NEITHER_LESS_THAN_NOR_EQUAL_TO, # ≰ + NEITHER_GREATER_THAN_NOR_EQUAL_TO, # ≱ + LESS_THAN_OR_EQUIVALENT_TO, # ≲ + GREATER_THAN_OR_EQUIVALENT_TO, # ≳ + NEITHER_LESS_THAN_NOR_EQUIVALENT_TO, # ≴ + NEITHER_GREATER_THAN_NOR_EQUIVALENT_TO, # ≵ + LESS_THAN_OR_GREATER_THAN, # ≶ + GREATER_THAN_OR_LESS_THAN, # ≷ + NEITHER_LESS_THAN_NOR_GREATER_THAN, # ≸ + NEITHER_GREATER_THAN_NOR_LESS_THAN, # ≹ + PRECEDES, # ≺ + SUCCEEDS, # ≻ + PRECEDES_OR_EQUAL_TO, # ≼ + SUCCEEDS_OR_EQUAL_TO, # ≽ + PRECEDES_OR_EQUIVALENT_TO, # ≾ + SUCCEEDS_OR_EQUIVALENT_TO, # ≿ + DOES_NOT_PRECEDE, # ⊀ + DOES_NOT_SUCCEED, # ⊁ + SUPERSET_OF, # ⊃ + NOT_A_SUPERSET_OF, # ⊅ + SUPERSET_OF_OR_EQUAL_TO, # ⊇ + NEITHER_A_SUPERSET_OF_NOR_EQUAL_TO, # ⊉ + SUPERSET_OF_WITH_NOT_EQUAL_TO, # ⊋ + SQUARE_IMAGE_OF, # ⊏ + SQUARE_ORIGINAL_OF, # ⊐ + SQUARE_IMAGE_OF_OR_EQUAL_TO, # ⊑ + SQUARE_ORIGINAL_OF_OR_EQUAL_TO, # ⊒ + CIRCLED_EQUALS, # ⊜ + FORCES, # ⊩ + DOES_NOT_PROVE, # ⊬ + DOES_NOT_FORCE, # ⊮ + PRECEDES_UNDER_RELATION, # ⊰ + SUCCEEDS_UNDER_RELATION, # ⊱ + NORMAL_SUBGROUP_OF, # ⊲ + CONTAINS_AS_NORMAL_SUBGROUP, # ⊳ + NORMAL_SUBGROUP_OF_OR_EQUAL_TO, # ⊴ + CONTAINS_AS_NORMAL_SUBGROUP_OR_EQUAL_TO, # ⊵ + ORIGINAL_OF, # ⊶ + IMAGE_OF, # ⊷ + REVERSED_TILDE_EQUALS, # ⋍ + DOUBLE_SUBSET, # ⋐ + DOUBLE_SUPERSET, # ⋑ + EQUAL_AND_PARALLEL_TO, # ⋕ + LESS_THAN_WITH_DOT, # ⋖ + GREATER_THAN_WITH_DOT, # ⋗ + VERY_MUCH_LESS_THAN, # ⋘ + VERY_MUCH_GREATER_THAN, # ⋙ + LESS_THAN_EQUAL_TO_OR_GREATER_THAN, # ⋚ + GREATER_THAN_EQUAL_TO_OR_LESS_THAN, # ⋛ + EQUAL_TO_OR_LESS_THAN, # ⋜ + EQUAL_TO_OR_GREATER_THAN, # ⋝ + EQUAL_TO_OR_PRECEDES, # ⋞ + EQUAL_TO_OR_SUCCEEDS, # ⋟ + DOES_NOT_PRECEDE_OR_EQUAL, # ⋠ + DOES_NOT_SUCCEED_OR_EQUAL, # ⋡ + NOT_SQUARE_IMAGE_OF_OR_EQUAL_TO, # ⋢ + NOT_SQUARE_ORIGINAL_OF_OR_EQUAL_TO, # ⋣ + SQUARE_IMAGE_OF_OR_NOT_EQUAL_TO, # ⋤ + SQUARE_ORIGINAL_OF_OR_NOT_EQUAL_TO, # ⋥ + LESS_THAN_BUT_NOT_EQUIVALENT_TO, # ⋦ + GREATER_THAN_BUT_NOT_EQUIVALENT_TO, # ⋧ + PRECEDES_BUT_NOT_EQUIVALENT_TO, # ⋨ + SUCCEEDS_BUT_NOT_EQUIVALENT_TO, # ⋩ + NOT_NORMAL_SUBGROUP_OF, # ⋪ + DOES_NOT_CONTAIN_AS_NORMAL_SUBGROUP, # ⋫ + NOT_NORMAL_SUBGROUP_OF_OR_EQUAL_TO, # ⋬ + DOES_NOT_CONTAIN_AS_NORMAL_SUBGROUP_OR_EQUAL, # ⋭ + ELEMENT_OF_WITH_LONG_HORIZONTAL_STROKE, # ⋲ + ELEMENT_OF_WITH_VERTICAL_BAR_AT_END_OF_HORIZONTAL_STROKE, # ⋳ + SMALL_ELEMENT_OF_WITH_VERTICAL_BAR_AT_END_OF_HORIZONTAL_STROKE, # ⋴ + ELEMENT_OF_WITH_DOT_ABOVE, # ⋵ + ELEMENT_OF_WITH_OVERBAR, # ⋶ + SMALL_ELEMENT_OF_WITH_OVERBAR, # ⋷ + ELEMENT_OF_WITH_UNDERBAR, # ⋸ + ELEMENT_OF_WITH_TWO_HORIZONTAL_STROKES, # ⋹ + CONTAINS_WITH_LONG_HORIZONTAL_STROKE, # ⋺ + CONTAINS_WITH_VERTICAL_BAR_AT_END_OF_HORIZONTAL_STROKE, # ⋻ + SMALL_CONTAINS_WITH_VERTICAL_BAR_AT_END_OF_HORIZONTAL_STROKE, # ⋼ + CONTAINS_WITH_OVERBAR, # ⋽ + SMALL_CONTAINS_WITH_OVERBAR, # ⋾ + Z_NOTATION_BAG_MEMBERSHIP, # ⋿ + REVERSE_SOLIDUS_PRECEDING_SUBSET, # ⟈ + SUPERSET_PRECEDING_SOLIDUS, # ⟉ + ELEMENT_OF_OPENING_UPWARDS, # ⟒ + CIRCLED_PARALLEL, # ⦷ + CIRCLED_LESS_THAN, # ⧀ + CIRCLED_GREATER_THAN, # ⧁ + INCREASES_AS, # ⧡ + EQUALS_SIGN_AND_SLANTED_PARALLEL, # ⧣ + EQUALS_SIGN_AND_SLANTED_PARALLEL_WITH_TILDE_ABOVE, # ⧤ + IDENTICAL_TO_AND_SLANTED_PARALLEL, # ⧥ + EQUALS_SIGN_WITH_DOT_BELOW, # ⩦ + IDENTICAL_WITH_DOT_ABOVE, # ⩧ + TILDE_OPERATOR_WITH_DOT_ABOVE, # ⩪ + TILDE_OPERATOR_WITH_RISING_DOTS, # ⩫ + SIMILAR_MINUS_SIMILAR, # ⩬ + CONGRUENT_WITH_DOT_ABOVE, # ⩭ + EQUALS_WITH_ASTERISK, # ⩮ + ALMOST_EQUAL_TO_WITH_CIRCUMFLEX_ACCENT, # ⩯ + APPROXIMATELY_EQUAL_OR_EQUAL_TO, # ⩰ + EQUALS_SIGN_ABOVE_PLUS_SIGN, # ⩱ + PLUS_SIGN_ABOVE_EQUALS_SIGN, # ⩲ + EQUALS_SIGN_ABOVE_TILDE_OPERATOR, # ⩳ + DOUBLE_COLON_EQUAL, # ⩴ + TWO_CONSECUTIVE_EQUALS_SIGNS, # ⩵ + THREE_CONSECUTIVE_EQUALS_SIGNS, # ⩶ + EQUALS_SIGN_WITH_TWO_DOTS_ABOVE_AND_TWO_DOTS_BELOW, # ⩷ + EQUIVALENT_WITH_FOUR_DOTS_ABOVE, # ⩸ + LESS_THAN_WITH_CIRCLE_INSIDE, # ⩹ + GREATER_THAN_WITH_CIRCLE_INSIDE, # ⩺ + LESS_THAN_WITH_QUESTION_MARK_ABOVE, # ⩻ + GREATER_THAN_WITH_QUESTION_MARK_ABOVE, # ⩼ + LESS_THAN_OR_SLANTED_EQUAL_TO, # ⩽ + GREATER_THAN_OR_SLANTED_EQUAL_TO, # ⩾ + LESS_THAN_OR_SLANTED_EQUAL_TO_WITH_DOT_INSIDE, # ⩿ + GREATER_THAN_OR_SLANTED_EQUAL_TO_WITH_DOT_INSIDE, # ⪀ + LESS_THAN_OR_SLANTED_EQUAL_TO_WITH_DOT_ABOVE, # ⪁ + GREATER_THAN_OR_SLANTED_EQUAL_TO_WITH_DOT_ABOVE, # ⪂ + LESS_THAN_OR_SLANTED_EQUAL_TO_WITH_DOT_ABOVE_RIGHT, # ⪃ + GREATER_THAN_OR_SLANTED_EQUAL_TO_WITH_DOT_ABOVE_LEFT, # ⪄ + LESS_THAN_OR_APPROXIMATE, # ⪅ + GREATER_THAN_OR_APPROXIMATE, # ⪆ + LESS_THAN_AND_SINGLE_LINE_NOT_EQUAL_TO, # ⪇ + GREATER_THAN_AND_SINGLE_LINE_NOT_EQUAL_TO, # ⪈ + LESS_THAN_AND_NOT_APPROXIMATE, # ⪉ + GREATER_THAN_AND_NOT_APPROXIMATE, # ⪊ + LESS_THAN_ABOVE_DOUBLE_LINE_EQUAL_ABOVE_GREATER_THAN, # ⪋ + GREATER_THAN_ABOVE_DOUBLE_LINE_EQUAL_ABOVE_LESS_THAN, # ⪌ + LESS_THAN_ABOVE_SIMILAR_OR_EQUAL, # ⪍ + GREATER_THAN_ABOVE_SIMILAR_OR_EQUAL, # ⪎ + LESS_THAN_ABOVE_SIMILAR_ABOVE_GREATER_THAN, # ⪏ + GREATER_THAN_ABOVE_SIMILAR_ABOVE_LESS_THAN, # ⪐ + LESS_THAN_ABOVE_GREATER_THAN_ABOVE_DOUBLE_LINE_EQUAL, # ⪑ + GREATER_THAN_ABOVE_LESS_THAN_ABOVE_DOUBLE_LINE_EQUAL, # ⪒ + LESS_THAN_ABOVE_SLANTED_EQUAL_ABOVE_GREATER_THAN_ABOVE_SLANTED_EQUAL, # ⪓ + GREATER_THAN_ABOVE_SLANTED_EQUAL_ABOVE_LESS_THAN_ABOVE_SLANTED_EQUAL, # ⪔ + SLANTED_EQUAL_TO_OR_LESS_THAN, # ⪕ + SLANTED_EQUAL_TO_OR_GREATER_THAN, # ⪖ + SLANTED_EQUAL_TO_OR_LESS_THAN_WITH_DOT_INSIDE, # ⪗ + SLANTED_EQUAL_TO_OR_GREATER_THAN_WITH_DOT_INSIDE, # ⪘ + DOUBLE_LINE_EQUAL_TO_OR_LESS_THAN, # ⪙ + DOUBLE_LINE_EQUAL_TO_OR_GREATER_THAN, # ⪚ + DOUBLE_LINE_SLANTED_EQUAL_TO_OR_LESS_THAN, # ⪛ + DOUBLE_LINE_SLANTED_EQUAL_TO_OR_GREATER_THAN, # ⪜ + SIMILAR_OR_LESS_THAN, # ⪝ + SIMILAR_OR_GREATER_THAN, # ⪞ + SIMILAR_ABOVE_LESS_THAN_ABOVE_EQUALS_SIGN, # ⪟ + SIMILAR_ABOVE_GREATER_THAN_ABOVE_EQUALS_SIGN, # ⪠ + DOUBLE_NESTED_LESS_THAN, # ⪡ + DOUBLE_NESTED_GREATER_THAN, # ⪢ + DOUBLE_NESTED_LESS_THAN_WITH_UNDERBAR, # ⪣ + GREATER_THAN_OVERLAPPING_LESS_THAN, # ⪤ + GREATER_THAN_BESIDE_LESS_THAN, # ⪥ + LESS_THAN_CLOSED_BY_CURVE, # ⪦ + GREATER_THAN_CLOSED_BY_CURVE, # ⪧ + LESS_THAN_CLOSED_BY_CURVE_ABOVE_SLANTED_EQUAL, # ⪨ + GREATER_THAN_CLOSED_BY_CURVE_ABOVE_SLANTED_EQUAL, # ⪩ + SMALLER_THAN, # ⪪ + LARGER_THAN, # ⪫ + SMALLER_THAN_OR_EQUAL_TO, # ⪬ + LARGER_THAN_OR_EQUAL_TO, # ⪭ + EQUALS_SIGN_WITH_BUMPY_ABOVE, # ⪮ + PRECEDES_ABOVE_SINGLE_LINE_EQUALS_SIGN, # ⪯ + SUCCEEDS_ABOVE_SINGLE_LINE_EQUALS_SIGN, # ⪰ + PRECEDES_ABOVE_SINGLE_LINE_NOT_EQUAL_TO, # ⪱ + SUCCEEDS_ABOVE_SINGLE_LINE_NOT_EQUAL_TO, # ⪲ + PRECEDES_ABOVE_EQUALS_SIGN, # ⪳ + SUCCEEDS_ABOVE_EQUALS_SIGN, # ⪴ + PRECEDES_ABOVE_NOT_EQUAL_TO, # ⪵ + SUCCEEDS_ABOVE_NOT_EQUAL_TO, # ⪶ + PRECEDES_ABOVE_ALMOST_EQUAL_TO, # ⪷ + SUCCEEDS_ABOVE_ALMOST_EQUAL_TO, # ⪸ + PRECEDES_ABOVE_NOT_ALMOST_EQUAL_TO, # ⪹ + SUCCEEDS_ABOVE_NOT_ALMOST_EQUAL_TO, # ⪺ + DOUBLE_PRECEDES, # ⪻ + DOUBLE_SUCCEEDS, # ⪼ + SUBSET_WITH_DOT, # ⪽ + SUPERSET_WITH_DOT, # ⪾ + SUBSET_WITH_PLUS_SIGN_BELOW, # ⪿ + SUPERSET_WITH_PLUS_SIGN_BELOW, # ⫀ + SUBSET_WITH_MULTIPLICATION_SIGN_BELOW, # ⫁ + SUPERSET_WITH_MULTIPLICATION_SIGN_BELOW, # ⫂ + SUBSET_OF_OR_EQUAL_TO_WITH_DOT_ABOVE, # ⫃ + SUPERSET_OF_OR_EQUAL_TO_WITH_DOT_ABOVE, # ⫄ + SUBSET_OF_ABOVE_EQUALS_SIGN, # ⫅ + SUPERSET_OF_ABOVE_EQUALS_SIGN, # ⫆ + SUBSET_OF_ABOVE_TILDE_OPERATOR, # ⫇ + SUPERSET_OF_ABOVE_TILDE_OPERATOR, # ⫈ + SUBSET_OF_ABOVE_ALMOST_EQUAL_TO, # ⫉ + SUPERSET_OF_ABOVE_ALMOST_EQUAL_TO, # ⫊ + SUBSET_OF_ABOVE_NOT_EQUAL_TO, # ⫋ + SUPERSET_OF_ABOVE_NOT_EQUAL_TO, # ⫌ + SQUARE_LEFT_OPEN_BOX_OPERATOR, # ⫍ + SQUARE_RIGHT_OPEN_BOX_OPERATOR, # ⫎ + CLOSED_SUBSET, # ⫏ + CLOSED_SUPERSET, # ⫐ + CLOSED_SUBSET_OR_EQUAL_TO, # ⫑ + CLOSED_SUPERSET_OR_EQUAL_TO, # ⫒ + SUBSET_ABOVE_SUPERSET, # ⫓ + SUPERSET_ABOVE_SUBSET, # ⫔ + SUBSET_ABOVE_SUBSET, # ⫕ + SUPERSET_ABOVE_SUPERSET, # ⫖ + SUPERSET_BESIDE_SUBSET, # ⫗ + SUPERSET_BESIDE_AND_JOINED_BY_DASH_WITH_SUBSET, # ⫘ + ELEMENT_OF_OPENING_DOWNWARDS, # ⫙ + TRIPLE_NESTED_LESS_THAN, # ⫷ + TRIPLE_NESTED_GREATER_THAN, # ⫸ + DOUBLE_LINE_SLANTED_LESS_THAN_OR_EQUAL_TO, # ⫹ + DOUBLE_LINE_SLANTED_GREATER_THAN_OR_EQUAL_TO, # ⫺ + RIGHT_TACK, # ⊢ + LEFT_TACK, # ⊣ + CIRCLED_PLUS, # ⊕ + CIRCLED_MINUS, # ⊖ + SQUARED_PLUS, # ⊞ + SQUARED_MINUS, # ⊟ + VERTICAL_LINE, # | + UNION, # ∪ + LOGICAL_OR, # ∨ + SQUARE_CUP, # ⊔ + PLUS_MINUS_SIGN, # ± + MINUS_OR_PLUS_SIGN, # ∓ + DOT_PLUS, # ∔ + DOT_MINUS, # ∸ + MINUS_TILDE, # ≂ + DIFFERENCE_BETWEEN, # ≏ + MULTISET_UNION, # ⊎ + XOR, # ⊻ + NOR, # ⊽ + CURLY_LOGICAL_OR, # ⋎ + DOUBLE_UNION, # ⋓ + DOUBLE_PLUS, # ⧺ + TRIPLE_PLUS, # ⧻ + TWO_LOGICAL_OR_OPERATOR, # ⨈ + PLUS_SIGN_WITH_SMALL_CIRCLE_ABOVE, # ⨢ + PLUS_SIGN_WITH_CIRCUMFLEX_ACCENT_ABOVE, # ⨣ + PLUS_SIGN_WITH_TILDE_ABOVE, # ⨤ + PLUS_SIGN_WITH_DOT_BELOW, # ⨥ + PLUS_SIGN_WITH_TILDE_BELOW, # ⨦ + PLUS_SIGN_WITH_SUBSCRIPT_TWO, # ⨧ + PLUS_SIGN_WITH_BLACK_TRIANGLE, # ⨨ + MINUS_SIGN_WITH_COMMA_ABOVE, # ⨩ + MINUS_SIGN_WITH_DOT_BELOW, # ⨪ + MINUS_SIGN_WITH_FALLING_DOTS, # ⨫ + MINUS_SIGN_WITH_RISING_DOTS, # ⨬ + PLUS_SIGN_IN_LEFT_HALF_CIRCLE, # ⨭ + PLUS_SIGN_IN_RIGHT_HALF_CIRCLE, # ⨮ + PLUS_SIGN_IN_TRIANGLE, # ⨹ + MINUS_SIGN_IN_TRIANGLE, # ⨺ + UNION_WITH_MINUS_SIGN, # ⩁ + UNION_WITH_OVERBAR, # ⩂ + UNION_WITH_LOGICAL_OR, # ⩅ + UNION_BESIDE_AND_JOINED_WITH_UNION, # ⩊ + CLOSED_UNION_WITH_SERIFS, # ⩌ + DOUBLE_SQUARE_UNION, # ⩏ + CLOSED_UNION_WITH_SERIFS_AND_SMASH_PRODUCT, # ⩐ + LOGICAL_OR_WITH_DOT_ABOVE, # ⩒ + DOUBLE_LOGICAL_OR, # ⩔ + TWO_INTERSECTING_LOGICAL_OR, # ⩖ + SLOPING_LARGE_OR, # ⩗ + LOGICAL_OR_WITH_MIDDLE_STEM, # ⩛ + LOGICAL_OR_WITH_HORIZONTAL_DASH, # ⩝ + SMALL_VEE_WITH_UNDERBAR, # ⩡ + LOGICAL_OR_WITH_DOUBLE_OVERBAR, # ⩢ + LOGICAL_OR_WITH_DOUBLE_UNDERBAR, # ⩣ + RING_OPERATOR, # ∘ + MULTIPLICATION_SIGN, # × + INTERSECTION, # ∩ + LOGICAL_AND, # ∧ + CIRCLED_TIMES, # ⊗ + CIRCLED_DIVISION_SLASH, # ⊘ + CIRCLED_DOT_OPERATOR, # ⊙ + CIRCLED_RING_OPERATOR, # ⊚ + CIRCLED_ASTERISK_OPERATOR, # ⊛ + SQUARED_TIMES, # ⊠ + SQUARED_DOT_OPERATOR, # ⊡ + SQUARE_CAP, # ⊓ + ASTERISK_OPERATOR, # ∗ + BULLET_OPERATOR, # ∙ + DOES_NOT_DIVIDE, # ∤ + TURNED_AMPERSAND, # ⅋ + WREATH_PRODUCT, # ≀ + NAND, # ⊼ + DIAMOND_OPERATOR, # ⋄ + STAR_OPERATOR, # ⋆ + DIVISION_TIMES, # ⋇ + LEFT_NORMAL_FACTOR_SEMIDIRECT_PRODUCT, # ⋉ + RIGHT_NORMAL_FACTOR_SEMIDIRECT_PRODUCT, # ⋊ + LEFT_SEMIDIRECT_PRODUCT, # ⋋ + RIGHT_SEMIDIRECT_PRODUCT, # ⋌ + CURLY_LOGICAL_AND, # ⋏ + DOUBLE_INTERSECTION, # ⋒ + AND_WITH_DOT, # ⟑ + CIRCLED_REVERSE_SOLIDUS, # ⦸ + CIRCLED_ANTICLOCKWISE_ROTATED_DIVISION_SIGN, # ⦼ + CIRCLED_WHITE_BULLET, # ⦾ + CIRCLED_BULLET, # ⦿ + SOLIDUS_WITH_OVERBAR, # ⧶ + REVERSE_SOLIDUS_WITH_HORIZONTAL_STROKE, # ⧷ + TWO_LOGICAL_AND_OPERATOR, # ⨇ + MULTIPLICATION_SIGN_WITH_DOT_ABOVE, # ⨰ + MULTIPLICATION_SIGN_WITH_UNDERBAR, # ⨱ + SEMIDIRECT_PRODUCT_WITH_BOTTOM_CLOSED, # ⨲ + SMASH_PRODUCT, # ⨳ + MULTIPLICATION_SIGN_IN_LEFT_HALF_CIRCLE, # ⨴ + MULTIPLICATION_SIGN_IN_RIGHT_HALF_CIRCLE, # ⨵ + CIRCLED_MULTIPLICATION_SIGN_WITH_CIRCUMFLEX_ACCENT, # ⨶ + MULTIPLICATION_SIGN_IN_DOUBLE_CIRCLE, # ⨷ + CIRCLED_DIVISION_SIGN, # ⨸ + MULTIPLICATION_SIGN_IN_TRIANGLE, # ⨻ + INTERIOR_PRODUCT, # ⨼ + RIGHTHAND_INTERIOR_PRODUCT, # ⨽ + INTERSECTION_WITH_DOT, # ⩀ + INTERSECTION_WITH_OVERBAR, # ⩃ + INTERSECTION_WITH_LOGICAL_AND, # ⩄ + INTERSECTION_BESIDE_AND_JOINED_WITH_INTERSECTION, # ⩋ + CLOSED_INTERSECTION_WITH_SERIFS, # ⩍ + DOUBLE_SQUARE_INTERSECTION, # ⩎ + LOGICAL_AND_WITH_DOT_ABOVE, # ⩑ + DOUBLE_LOGICAL_AND, # ⩓ + TWO_INTERSECTING_LOGICAL_AND, # ⩕ + SLOPING_LARGE_AND, # ⩘ + LOGICAL_AND_WITH_MIDDLE_STEM, # ⩚ + LOGICAL_AND_WITH_HORIZONTAL_DASH, # ⩜ + LOGICAL_AND_WITH_DOUBLE_OVERBAR, # ⩞ + LOGICAL_AND_WITH_UNDERBAR, # ⩟ + LOGICAL_AND_WITH_DOUBLE_UNDERBAR, # ⩠ + TRANSVERSAL_INTERSECTION, # ⫛ + MULTISET_MULTIPLICATION, # ⊍ + CIRCUMFLEX_ACCENT, # ^ + UPWARDS_ARROW, # ↑ + DOWNWARDS_ARROW, # ↓ + DOWNWARDS_ARROW_LEFTWARDS_OF_UPWARDS_ARROW, # ⇵ + UPWARDS_QUADRUPLE_ARROW, # ⟰ + DOWNWARDS_QUADRUPLE_ARROW, # ⟱ + DOWNWARDS_ARROW_WITH_HORIZONTAL_STROKE, # ⤈ + UPWARDS_ARROW_WITH_HORIZONTAL_STROKE, # ⤉ + UPWARDS_TRIPLE_ARROW, # ⤊ + DOWNWARDS_TRIPLE_ARROW, # ⤋ + UPWARDS_ARROW_TO_BAR, # ⤒ + DOWNWARDS_ARROW_TO_BAR, # ⤓ + UPWARDS_TWO_HEADED_ARROW_FROM_SMALL_CIRCLE, # ⥉ + UP_BARB_RIGHT_DOWN_BARB_LEFT_HARPOON, # ⥌ + UP_BARB_LEFT_DOWN_BARB_RIGHT_HARPOON, # ⥍ + UP_BARB_RIGHT_DOWN_BARB_RIGHT_HARPOON, # ⥏ + UP_BARB_LEFT_DOWN_BARB_LEFT_HARPOON, # ⥑ + UPWARDS_HARPOON_WITH_BARB_RIGHT_TO_BAR, # ⥔ + DOWNWARDS_HARPOON_WITH_BARB_RIGHT_TO_BAR, # ⥕ + UPWARDS_HARPOON_WITH_BARB_LEFT_TO_BAR, # ⥘ + DOWNWARDS_HARPOON_WITH_BARB_LEFT_TO_BAR, # ⥙ + UPWARDS_HARPOON_WITH_BARB_RIGHT_FROM_BAR, # ⥜ + DOWNWARDS_HARPOON_WITH_BARB_RIGHT_FROM_BAR, # ⥝ + UPWARDS_HARPOON_WITH_BARB_LEFT_FROM_BAR, # ⥠ + DOWNWARDS_HARPOON_WITH_BARB_LEFT_FROM_BAR, # ⥡ + UPWARDS_HARPOON_WITH_BARB_LEFT_BESIDE_UPWARDS_HARPOON_WITH_BARB_RIGHT, # ⥣ + DOWNWARDS_HARPOON_WITH_BARB_LEFT_BESIDE_DOWNWARDS_HARPOON_WITH_BARB_RIGHT, # ⥥ + UPWARDS_HARPOON_WITH_BARB_LEFT_BESIDE_DOWNWARDS_HARPOON_WITH_BARB_RIGHT, # ⥮ + DOWNWARDS_HARPOON_WITH_BARB_LEFT_BESIDE_UPWARDS_HARPOON_WITH_BARB_RIGHT, # ⥯ + HALFWIDTH_UPWARDS_ARROW, # ↑ + HALFWIDTH_DOWNWARDS_ARROW, # ↓ + end_unicode_ops, + end_ops, +) + + +const UNICODE_OPS = Dict{Char, Kind}( +'÷' => DIVISION_SIGN, +'¬' => NOT_SIGN, +'√' => SQUARE_ROOT, +'∛' => CUBE_ROOT, +'∜' => QUAD_ROOT, +'←' => LEFTWARDS_ARROW, +'→' => RIGHTWARDS_ARROW, +'↔' => LEFT_RIGHT_ARROW, +'↚' => LEFTWARDS_ARROW_WITH_STROKE, +'↛' => RIGHTWARDS_ARROW_WITH_STROKE, +'↠' => RIGHTWARDS_TWO_HEADED_ARROW, +'↣' => RIGHTWARDS_ARROW_WITH_TAIL, +'↦' => RIGHTWARDS_ARROW_FROM_BAR, +'↮' => LEFT_RIGHT_ARROW_WITH_STROKE, +'⇎' => LEFT_RIGHT_DOUBLE_ARROW_WITH_STROKE, +'⇏' => RIGHTWARDS_DOUBLE_ARROW_WITH_STROKE, +'⇒' => RIGHTWARDS_DOUBLE_ARROW, +'⇔' => LEFT_RIGHT_DOUBLE_ARROW, +'⇴' => RIGHT_ARROW_WITH_SMALL_CIRCLE, +'⇶' => THREE_RIGHTWARDS_ARROWS, +'⇷' => LEFTWARDS_ARROW_WITH_VERTICAL_STROKE, +'⇸' => RIGHTWARDS_ARROW_WITH_VERTICAL_STROKE, +'⇹' => LEFT_RIGHT_ARROW_WITH_VERTICAL_STROKE, +'⇺' => LEFTWARDS_ARROW_WITH_DOUBLE_VERTICAL_STROKE, +'⇻' => RIGHTWARDS_ARROW_WITH_DOUBLE_VERTICAL_STROKE, +'⇼' => LEFT_RIGHT_ARROW_WITH_DOUBLE_VERTICAL_STROKE, +'⇽' => LEFTWARDS_OPEN_HEADED_ARROW, +'⇾' => RIGHTWARDS_OPEN_HEADED_ARROW, +'⇿' => LEFT_RIGHT_OPEN_HEADED_ARROW, +'⟵' => LONG_LEFTWARDS_ARROW, +'⟶' => LONG_RIGHTWARDS_ARROW, +'⟷' => LONG_LEFT_RIGHT_ARROW, +'⟹' => LONG_RIGHTWARDS_DOUBLE_ARROW, +'⟺' => LONG_LEFT_RIGHT_DOUBLE_ARROW, +'⟻' => LONG_LEFTWARDS_ARROW_FROM_BAR, +'⟼' => LONG_RIGHTWARDS_ARROW_FROM_BAR, +'⟽' => LONG_LEFTWARDS_DOUBLE_ARROW_FROM_BAR, +'⟾' => LONG_RIGHTWARDS_DOUBLE_ARROW_FROM_BAR, +'⟿' => LONG_RIGHTWARDS_SQUIGGLE_ARROW, +'⤀' => RIGHTWARDS_TWO_HEADED_ARROW_WITH_VERTICAL_STROKE, +'⤁' => RIGHTWARDS_TWO_HEADED_ARROW_WITH_DOUBLE_VERTICAL_STROKE, +'⤂' => LEFTWARDS_DOUBLE_ARROW_WITH_VERTICAL_STROKE, +'⤃' => RIGHTWARDS_DOUBLE_ARROW_WITH_VERTICAL_STROKE, +'⤄' => LEFT_RIGHT_DOUBLE_ARROW_WITH_VERTICAL_STROKE, +'⤅' => RIGHTWARDS_TWO_HEADED_ARROW_FROM_BAR, +'⤆' => LEFTWARDS_DOUBLE_ARROW_FROM_BAR, +'⤇' => RIGHTWARDS_DOUBLE_ARROW_FROM_BAR, +'⤌' => LEFTWARDS_DOUBLE_DASH_ARROW, +'⤍' => RIGHTWARDS_DOUBLE_DASH_ARROW, +'⤎' => LEFTWARDS_TRIPLE_DASH_ARROW, +'⤏' => RIGHTWARDS_TRIPLE_DASH_ARROW, +'⤐' => RIGHTWARDS_TWO_HEADED_TRIPLE_DASH_ARROW, +'⤑' => RIGHTWARDS_ARROW_WITH_DOTTED_STEM, +'⤔' => RIGHTWARDS_ARROW_WITH_TAIL_WITH_VERTICAL_STROKE, +'⤕' => RIGHTWARDS_ARROW_WITH_TAIL_WITH_DOUBLE_VERTICAL_STROKE, +'⤖' => RIGHTWARDS_TWO_HEADED_ARROW_WITH_TAIL, +'⤗' => RIGHTWARDS_TWO_HEADED_ARROW_WITH_TAIL_WITH_VERTICAL_STROKE, +'⤘' => RIGHTWARDS_TWO_HEADED_ARROW_WITH_TAIL_WITH_DOUBLE_VERTICAL_STROKE, +'⤝' => LEFTWARDS_ARROW_TO_BLACK_DIAMOND, +'⤞' => RIGHTWARDS_ARROW_TO_BLACK_DIAMOND, +'⤟' => LEFTWARDS_ARROW_FROM_BAR_TO_BLACK_DIAMOND, +'⤠' => RIGHTWARDS_ARROW_FROM_BAR_TO_BLACK_DIAMOND, +'⥄' => SHORT_RIGHTWARDS_ARROW_ABOVE_LEFTWARDS_ARROW, +'⥅' => RIGHTWARDS_ARROW_WITH_PLUS_BELOW, +'⥆' => LEFTWARDS_ARROW_WITH_PLUS_BELOW, +'⥇' => RIGHTWARDS_ARROW_THROUGH_X, +'⥈' => LEFT_RIGHT_ARROW_THROUGH_SMALL_CIRCLE, +'⥊' => LEFT_BARB_UP_RIGHT_BARB_DOWN_HARPOON, +'⥋' => LEFT_BARB_DOWN_RIGHT_BARB_UP_HARPOON, +'⥎' => LEFT_BARB_UP_RIGHT_BARB_UP_HARPOON, +'⥐' => LEFT_BARB_DOWN_RIGHT_BARB_DOWN_HARPOON, +'⥒' => LEFTWARDS_HARPOON_WITH_BARB_UP_TO_BAR, +'⥓' => RIGHTWARDS_HARPOON_WITH_BARB_UP_TO_BAR, +'⥖' => LEFTWARDS_HARPOON_WITH_BARB_DOWN_TO_BAR, +'⥗' => RIGHTWARDS_HARPOON_WITH_BARB_DOWN_TO_BAR, +'⥚' => LEFTWARDS_HARPOON_WITH_BARB_UP_FROM_BAR, +'⥛' => RIGHTWARDS_HARPOON_WITH_BARB_UP_FROM_BAR, +'⥞' => LEFTWARDS_HARPOON_WITH_BARB_DOWN_FROM_BAR, +'⥟' => RIGHTWARDS_HARPOON_WITH_BARB_DOWN_FROM_BAR, +'⥢' => LEFTWARDS_HARPOON_WITH_BARB_UP_ABOVE_LEFTWARDS_HARPOON_WITH_BARB_DOWN, +'⥤' => RIGHTWARDS_HARPOON_WITH_BARB_UP_ABOVE_RIGHTWARDS_HARPOON_WITH_BARB_DOWN, +'⥦' => LEFTWARDS_HARPOON_WITH_BARB_UP_ABOVE_RIGHTWARDS_HARPOON_WITH_BARB_UP, +'⥧' => LEFTWARDS_HARPOON_WITH_BARB_DOWN_ABOVE_RIGHTWARDS_HARPOON_WITH_BARB_DOWN, +'⥨' => RIGHTWARDS_HARPOON_WITH_BARB_UP_ABOVE_LEFTWARDS_HARPOON_WITH_BARB_UP, +'⥩' => RIGHTWARDS_HARPOON_WITH_BARB_DOWN_ABOVE_LEFTWARDS_HARPOON_WITH_BARB_DOWN, +'⥪' => LEFTWARDS_HARPOON_WITH_BARB_UP_ABOVE_LONG_DASH, +'⥫' => LEFTWARDS_HARPOON_WITH_BARB_DOWN_BELOW_LONG_DASH, +'⥬' => RIGHTWARDS_HARPOON_WITH_BARB_UP_ABOVE_LONG_DASH, +'⥭' => RIGHTWARDS_HARPOON_WITH_BARB_DOWN_BELOW_LONG_DASH, +'⥰' => RIGHT_DOUBLE_ARROW_WITH_ROUNDED_HEAD, +'⧴' => RULE_DELAYED, +'⬱' => THREE_LEFTWARDS_ARROWS, +'⬰' => LEFT_ARROW_WITH_SMALL_CIRCLE, +'⬲' => LEFT_ARROW_WITH_CIRCLED_PLUS, +'⬳' => LONG_LEFTWARDS_SQUIGGLE_ARROW, +'⬴' => LEFTWARDS_TWO_HEADED_ARROW_WITH_VERTICAL_STROKE, +'⬵' => LEFTWARDS_TWO_HEADED_ARROW_WITH_DOUBLE_VERTICAL_STROKE, +'⬶' => LEFTWARDS_TWO_HEADED_ARROW_FROM_BAR, +'⬷' => LEFTWARDS_TWO_HEADED_TRIPLE_DASH_ARROW, +'⬸' => LEFTWARDS_ARROW_WITH_DOTTED_STEM, +'⬹' => LEFTWARDS_ARROW_WITH_TAIL_WITH_VERTICAL_STROKE, +'⬺' => LEFTWARDS_ARROW_WITH_TAIL_WITH_DOUBLE_VERTICAL_STROKE, +'⬻' => LEFTWARDS_TWO_HEADED_ARROW_WITH_TAIL, +'⬼' => LEFTWARDS_TWO_HEADED_ARROW_WITH_TAIL_WITH_VERTICAL_STROKE, +'⬽' => LEFTWARDS_TWO_HEADED_ARROW_WITH_TAIL_WITH_DOUBLE_VERTICAL_STROKE, +'⬾' => LEFTWARDS_ARROW_THROUGH_X, +'⬿' => WAVE_ARROW_POINTING_DIRECTLY_LEFT, +'⭀' => EQUALS_SIGN_ABOVE_LEFTWARDS_ARROW, +'⭁' => REVERSE_TILDE_OPERATOR_ABOVE_LEFTWARDS_ARROW, +'⭂' => LEFTWARDS_ARROW_ABOVE_REVERSE_ALMOST_EQUAL_TO, +'⭃' => RIGHTWARDS_ARROW_THROUGH_GREATER_THAN, +'⭄' => RIGHTWARDS_ARROW_THROUGH_SUPERSET, +'⭇' => REVERSE_TILDE_OPERATOR_ABOVE_RIGHTWARDS_ARROW, +'⭈' => RIGHTWARDS_ARROW_ABOVE_REVERSE_ALMOST_EQUAL_TO, +'⭉' => TILDE_OPERATOR_ABOVE_LEFTWARDS_ARROW, +'⭊' => LEFTWARDS_ARROW_ABOVE_ALMOST_EQUAL_TO, +'⭋' => LEFTWARDS_ARROW_ABOVE_REVERSE_TILDE_OPERATOR, +'⭌' => RIGHTWARDS_ARROW_ABOVE_REVERSE_TILDE_OPERATOR, +'←' => HALFWIDTH_LEFTWARDS_ARROW, +'→' => HALFWIDTH_RIGHTWARDS_ARROW, +'≥' => GREATER_THAN_OR_EQUAL_TO, +'≤' => LESS_THAN_OR_EQUAL_TO, +'≡' => IDENTICAL_TO, +'≠' => NOT_EQUAL_TO, +'≢' => NOT_IDENTICAL_TO, +'∈' => ELEMENT_OF, +'∉' => NOT_AN_ELEMENT_OF, +'∋' => CONTAINS_AS_MEMBER, +'∌' => DOES_NOT_CONTAIN_AS_MEMBER, +'⊆' => SUBSET_OF_OR_EQUAL_TO, +'⊈' => NEITHER_A_SUBSET_OF_NOR_EQUAL_TO, +'⊂' => SUBSET_OF, +'⊄' => NOT_A_SUBSET_OF, +'⊊' => SUBSET_OF_WITH_NOT_EQUAL_TO, +'∝' => PROPORTIONAL_TO, +'∊' => SMALL_ELEMENT_OF, +'∍' => SMALL_CONTAINS_AS_MEMBER, +'∥' => PARALLEL_TO, +'∦' => NOT_PARALLEL_TO, +'∷' => PROPORTION, +'∺' => GEOMETRIC_PROPORTION, +'∻' => HOMOTHETIC, +'∽' => REVERSED_TILDE, +'∾' => INVERTED_LAZY_S, +'≁' => NOT_TILDE, +'≃' => ASYMPTOTICALLY_EQUAL_TO, +'≄' => NOT_ASYMPTOTICALLY_EQUAL_TO, +'≅' => APPROXIMATELY_EQUAL_TO, +'≆' => APPROXIMATELY_BUT_NOT_ACTUALLY_EQUAL_TO, +'≇' => NEITHER_APPROXIMATELY_NOR_ACTUALLY_EQUAL_TO, +'≈' => ALMOST_EQUAL_TO, +'≉' => NOT_ALMOST_EQUAL_TO, +'≊' => ALMOST_EQUAL_OR_EQUAL_TO, +'≋' => TRIPLE_TILDE, +'≌' => ALL_EQUAL_TO, +'≍' => EQUIVALENT_TO, +'≎' => GEOMETRICALLY_EQUIVALENT_TO, +'≐' => APPROACHES_THE_LIMIT, +'≑' => GEOMETRICALLY_EQUAL_TO, +'≒' => APPROXIMATELY_EQUAL_TO_OR_THE_IMAGE_OF, +'≓' => IMAGE_OF_OR_APPROXIMATELY_EQUAL_TO, +'≔' => COLON_EQUALS, +'≕' => EQUALS_COLON, +'≖' => RING_IN_EQUAL_TO, +'≗' => RING_EQUAL_TO, +'≘' => CORRESPONDS_TO, +'≙' => ESTIMATES, +'≚' => EQUIANGULAR_TO, +'≛' => STAR_EQUALS, +'≜' => DELTA_EQUAL_TO, +'≝' => EQUAL_TO_BY_DEFINITION, +'≞' => MEASURED_BY, +'≟' => QUESTIONED_EQUAL_TO, +'≣' => STRICTLY_EQUIVALENT_TO, +'≦' => LESS_THAN_OVER_EQUAL_TO, +'≧' => GREATER_THAN_OVER_EQUAL_TO, +'≨' => LESS_THAN_BUT_NOT_EQUAL_TO, +'≩' => GREATER_THAN_BUT_NOT_EQUAL_TO, +'≪' => MUCH_LESS_THAN, +'≫' => MUCH_GREATER_THAN, +'≬' => BETWEEN, +'≭' => NOT_EQUIVALENT_TO, +'≮' => NOT_LESS_THAN, +'≯' => NOT_GREATER_THAN, +'≰' => NEITHER_LESS_THAN_NOR_EQUAL_TO, +'≱' => NEITHER_GREATER_THAN_NOR_EQUAL_TO, +'≲' => LESS_THAN_OR_EQUIVALENT_TO, +'≳' => GREATER_THAN_OR_EQUIVALENT_TO, +'≴' => NEITHER_LESS_THAN_NOR_EQUIVALENT_TO, +'≵' => NEITHER_GREATER_THAN_NOR_EQUIVALENT_TO, +'≶' => LESS_THAN_OR_GREATER_THAN, +'≷' => GREATER_THAN_OR_LESS_THAN, +'≸' => NEITHER_LESS_THAN_NOR_GREATER_THAN, +'≹' => NEITHER_GREATER_THAN_NOR_LESS_THAN, +'≺' => PRECEDES, +'≻' => SUCCEEDS, +'≼' => PRECEDES_OR_EQUAL_TO, +'≽' => SUCCEEDS_OR_EQUAL_TO, +'≾' => PRECEDES_OR_EQUIVALENT_TO, +'≿' => SUCCEEDS_OR_EQUIVALENT_TO, +'⊀' => DOES_NOT_PRECEDE, +'⊁' => DOES_NOT_SUCCEED, +'⊃' => SUPERSET_OF, +'⊅' => NOT_A_SUPERSET_OF, +'⊇' => SUPERSET_OF_OR_EQUAL_TO, +'⊉' => NEITHER_A_SUPERSET_OF_NOR_EQUAL_TO, +'⊋' => SUPERSET_OF_WITH_NOT_EQUAL_TO, +'⊏' => SQUARE_IMAGE_OF, +'⊐' => SQUARE_ORIGINAL_OF, +'⊑' => SQUARE_IMAGE_OF_OR_EQUAL_TO, +'⊒' => SQUARE_ORIGINAL_OF_OR_EQUAL_TO, +'⊜' => CIRCLED_EQUALS, +'⊩' => FORCES, +'⊬' => DOES_NOT_PROVE, +'⊮' => DOES_NOT_FORCE, +'⊰' => PRECEDES_UNDER_RELATION, +'⊱' => SUCCEEDS_UNDER_RELATION, +'⊲' => NORMAL_SUBGROUP_OF, +'⊳' => CONTAINS_AS_NORMAL_SUBGROUP, +'⊴' => NORMAL_SUBGROUP_OF_OR_EQUAL_TO, +'⊵' => CONTAINS_AS_NORMAL_SUBGROUP_OR_EQUAL_TO, +'⊶' => ORIGINAL_OF, +'⊷' => IMAGE_OF, +'⋍' => REVERSED_TILDE_EQUALS, +'⋐' => DOUBLE_SUBSET, +'⋑' => DOUBLE_SUPERSET, +'⋕' => EQUAL_AND_PARALLEL_TO, +'⋖' => LESS_THAN_WITH_DOT, +'⋗' => GREATER_THAN_WITH_DOT, +'⋘' => VERY_MUCH_LESS_THAN, +'⋙' => VERY_MUCH_GREATER_THAN, +'⋚' => LESS_THAN_EQUAL_TO_OR_GREATER_THAN, +'⋛' => GREATER_THAN_EQUAL_TO_OR_LESS_THAN, +'⋜' => EQUAL_TO_OR_LESS_THAN, +'⋝' => EQUAL_TO_OR_GREATER_THAN, +'⋞' => EQUAL_TO_OR_PRECEDES, +'⋟' => EQUAL_TO_OR_SUCCEEDS, +'⋠' => DOES_NOT_PRECEDE_OR_EQUAL, +'⋡' => DOES_NOT_SUCCEED_OR_EQUAL, +'⋢' => NOT_SQUARE_IMAGE_OF_OR_EQUAL_TO, +'⋣' => NOT_SQUARE_ORIGINAL_OF_OR_EQUAL_TO, +'⋤' => SQUARE_IMAGE_OF_OR_NOT_EQUAL_TO, +'⋥' => SQUARE_ORIGINAL_OF_OR_NOT_EQUAL_TO, +'⋦' => LESS_THAN_BUT_NOT_EQUIVALENT_TO, +'⋧' => GREATER_THAN_BUT_NOT_EQUIVALENT_TO, +'⋨' => PRECEDES_BUT_NOT_EQUIVALENT_TO, +'⋩' => SUCCEEDS_BUT_NOT_EQUIVALENT_TO, +'⋪' => NOT_NORMAL_SUBGROUP_OF, +'⋫' => DOES_NOT_CONTAIN_AS_NORMAL_SUBGROUP, +'⋬' => NOT_NORMAL_SUBGROUP_OF_OR_EQUAL_TO, +'⋭' => DOES_NOT_CONTAIN_AS_NORMAL_SUBGROUP_OR_EQUAL, +'⋲' => ELEMENT_OF_WITH_LONG_HORIZONTAL_STROKE, +'⋳' => ELEMENT_OF_WITH_VERTICAL_BAR_AT_END_OF_HORIZONTAL_STROKE, +'⋴' => SMALL_ELEMENT_OF_WITH_VERTICAL_BAR_AT_END_OF_HORIZONTAL_STROKE, +'⋵' => ELEMENT_OF_WITH_DOT_ABOVE, +'⋶' => ELEMENT_OF_WITH_OVERBAR, +'⋷' => SMALL_ELEMENT_OF_WITH_OVERBAR, +'⋸' => ELEMENT_OF_WITH_UNDERBAR, +'⋹' => ELEMENT_OF_WITH_TWO_HORIZONTAL_STROKES, +'⋺' => CONTAINS_WITH_LONG_HORIZONTAL_STROKE, +'⋻' => CONTAINS_WITH_VERTICAL_BAR_AT_END_OF_HORIZONTAL_STROKE, +'⋼' => SMALL_CONTAINS_WITH_VERTICAL_BAR_AT_END_OF_HORIZONTAL_STROKE, +'⋽' => CONTAINS_WITH_OVERBAR, +'⋾' => SMALL_CONTAINS_WITH_OVERBAR, +'⋿' => Z_NOTATION_BAG_MEMBERSHIP, +'⟈' => REVERSE_SOLIDUS_PRECEDING_SUBSET, +'⟉' => SUPERSET_PRECEDING_SOLIDUS, +'⟒' => ELEMENT_OF_OPENING_UPWARDS, +'⦷' => CIRCLED_PARALLEL, +'⧀' => CIRCLED_LESS_THAN, +'⧁' => CIRCLED_GREATER_THAN, +'⧡' => INCREASES_AS, +'⧣' => EQUALS_SIGN_AND_SLANTED_PARALLEL, +'⧤' => EQUALS_SIGN_AND_SLANTED_PARALLEL_WITH_TILDE_ABOVE, +'⧥' => IDENTICAL_TO_AND_SLANTED_PARALLEL, +'⩦' => EQUALS_SIGN_WITH_DOT_BELOW, +'⩧' => IDENTICAL_WITH_DOT_ABOVE, +'⩪' => TILDE_OPERATOR_WITH_DOT_ABOVE, +'⩫' => TILDE_OPERATOR_WITH_RISING_DOTS, +'⩬' => SIMILAR_MINUS_SIMILAR, +'⩭' => CONGRUENT_WITH_DOT_ABOVE, +'⩮' => EQUALS_WITH_ASTERISK, +'⩯' => ALMOST_EQUAL_TO_WITH_CIRCUMFLEX_ACCENT, +'⩰' => APPROXIMATELY_EQUAL_OR_EQUAL_TO, +'⩱' => EQUALS_SIGN_ABOVE_PLUS_SIGN, +'⩲' => PLUS_SIGN_ABOVE_EQUALS_SIGN, +'⩳' => EQUALS_SIGN_ABOVE_TILDE_OPERATOR, +'⩴' => DOUBLE_COLON_EQUAL, +'⩵' => TWO_CONSECUTIVE_EQUALS_SIGNS, +'⩶' => THREE_CONSECUTIVE_EQUALS_SIGNS, +'⩷' => EQUALS_SIGN_WITH_TWO_DOTS_ABOVE_AND_TWO_DOTS_BELOW, +'⩸' => EQUIVALENT_WITH_FOUR_DOTS_ABOVE, +'⩹' => LESS_THAN_WITH_CIRCLE_INSIDE, +'⩺' => GREATER_THAN_WITH_CIRCLE_INSIDE, +'⩻' => LESS_THAN_WITH_QUESTION_MARK_ABOVE, +'⩼' => GREATER_THAN_WITH_QUESTION_MARK_ABOVE, +'⩽' => LESS_THAN_OR_SLANTED_EQUAL_TO, +'⩾' => GREATER_THAN_OR_SLANTED_EQUAL_TO, +'⩿' => LESS_THAN_OR_SLANTED_EQUAL_TO_WITH_DOT_INSIDE, +'⪀' => GREATER_THAN_OR_SLANTED_EQUAL_TO_WITH_DOT_INSIDE, +'⪁' => LESS_THAN_OR_SLANTED_EQUAL_TO_WITH_DOT_ABOVE, +'⪂' => GREATER_THAN_OR_SLANTED_EQUAL_TO_WITH_DOT_ABOVE, +'⪃' => LESS_THAN_OR_SLANTED_EQUAL_TO_WITH_DOT_ABOVE_RIGHT, +'⪄' => GREATER_THAN_OR_SLANTED_EQUAL_TO_WITH_DOT_ABOVE_LEFT, +'⪅' => LESS_THAN_OR_APPROXIMATE, +'⪆' => GREATER_THAN_OR_APPROXIMATE, +'⪇' => LESS_THAN_AND_SINGLE_LINE_NOT_EQUAL_TO, +'⪈' => GREATER_THAN_AND_SINGLE_LINE_NOT_EQUAL_TO, +'⪉' => LESS_THAN_AND_NOT_APPROXIMATE, +'⪊' => GREATER_THAN_AND_NOT_APPROXIMATE, +'⪋' => LESS_THAN_ABOVE_DOUBLE_LINE_EQUAL_ABOVE_GREATER_THAN, +'⪌' => GREATER_THAN_ABOVE_DOUBLE_LINE_EQUAL_ABOVE_LESS_THAN, +'⪍' => LESS_THAN_ABOVE_SIMILAR_OR_EQUAL, +'⪎' => GREATER_THAN_ABOVE_SIMILAR_OR_EQUAL, +'⪏' => LESS_THAN_ABOVE_SIMILAR_ABOVE_GREATER_THAN, +'⪐' => GREATER_THAN_ABOVE_SIMILAR_ABOVE_LESS_THAN, +'⪑' => LESS_THAN_ABOVE_GREATER_THAN_ABOVE_DOUBLE_LINE_EQUAL, +'⪒' => GREATER_THAN_ABOVE_LESS_THAN_ABOVE_DOUBLE_LINE_EQUAL, +'⪓' => LESS_THAN_ABOVE_SLANTED_EQUAL_ABOVE_GREATER_THAN_ABOVE_SLANTED_EQUAL, +'⪔' => GREATER_THAN_ABOVE_SLANTED_EQUAL_ABOVE_LESS_THAN_ABOVE_SLANTED_EQUAL, +'⪕' => SLANTED_EQUAL_TO_OR_LESS_THAN, +'⪖' => SLANTED_EQUAL_TO_OR_GREATER_THAN, +'⪗' => SLANTED_EQUAL_TO_OR_LESS_THAN_WITH_DOT_INSIDE, +'⪘' => SLANTED_EQUAL_TO_OR_GREATER_THAN_WITH_DOT_INSIDE, +'⪙' => DOUBLE_LINE_EQUAL_TO_OR_LESS_THAN, +'⪚' => DOUBLE_LINE_EQUAL_TO_OR_GREATER_THAN, +'⪛' => DOUBLE_LINE_SLANTED_EQUAL_TO_OR_LESS_THAN, +'⪜' => DOUBLE_LINE_SLANTED_EQUAL_TO_OR_GREATER_THAN, +'⪝' => SIMILAR_OR_LESS_THAN, +'⪞' => SIMILAR_OR_GREATER_THAN, +'⪟' => SIMILAR_ABOVE_LESS_THAN_ABOVE_EQUALS_SIGN, +'⪠' => SIMILAR_ABOVE_GREATER_THAN_ABOVE_EQUALS_SIGN, +'⪡' => DOUBLE_NESTED_LESS_THAN, +'⪢' => DOUBLE_NESTED_GREATER_THAN, +'⪣' => DOUBLE_NESTED_LESS_THAN_WITH_UNDERBAR, +'⪤' => GREATER_THAN_OVERLAPPING_LESS_THAN, +'⪥' => GREATER_THAN_BESIDE_LESS_THAN, +'⪦' => LESS_THAN_CLOSED_BY_CURVE, +'⪧' => GREATER_THAN_CLOSED_BY_CURVE, +'⪨' => LESS_THAN_CLOSED_BY_CURVE_ABOVE_SLANTED_EQUAL, +'⪩' => GREATER_THAN_CLOSED_BY_CURVE_ABOVE_SLANTED_EQUAL, +'⪪' => SMALLER_THAN, +'⪫' => LARGER_THAN, +'⪬' => SMALLER_THAN_OR_EQUAL_TO, +'⪭' => LARGER_THAN_OR_EQUAL_TO, +'⪮' => EQUALS_SIGN_WITH_BUMPY_ABOVE, +'⪯' => PRECEDES_ABOVE_SINGLE_LINE_EQUALS_SIGN, +'⪰' => SUCCEEDS_ABOVE_SINGLE_LINE_EQUALS_SIGN, +'⪱' => PRECEDES_ABOVE_SINGLE_LINE_NOT_EQUAL_TO, +'⪲' => SUCCEEDS_ABOVE_SINGLE_LINE_NOT_EQUAL_TO, +'⪳' => PRECEDES_ABOVE_EQUALS_SIGN, +'⪴' => SUCCEEDS_ABOVE_EQUALS_SIGN, +'⪵' => PRECEDES_ABOVE_NOT_EQUAL_TO, +'⪶' => SUCCEEDS_ABOVE_NOT_EQUAL_TO, +'⪷' => PRECEDES_ABOVE_ALMOST_EQUAL_TO, +'⪸' => SUCCEEDS_ABOVE_ALMOST_EQUAL_TO, +'⪹' => PRECEDES_ABOVE_NOT_ALMOST_EQUAL_TO, +'⪺' => SUCCEEDS_ABOVE_NOT_ALMOST_EQUAL_TO, +'⪻' => DOUBLE_PRECEDES, +'⪼' => DOUBLE_SUCCEEDS, +'⪽' => SUBSET_WITH_DOT, +'⪾' => SUPERSET_WITH_DOT, +'⪿' => SUBSET_WITH_PLUS_SIGN_BELOW, +'⫀' => SUPERSET_WITH_PLUS_SIGN_BELOW, +'⫁' => SUBSET_WITH_MULTIPLICATION_SIGN_BELOW, +'⫂' => SUPERSET_WITH_MULTIPLICATION_SIGN_BELOW, +'⫃' => SUBSET_OF_OR_EQUAL_TO_WITH_DOT_ABOVE, +'⫄' => SUPERSET_OF_OR_EQUAL_TO_WITH_DOT_ABOVE, +'⫅' => SUBSET_OF_ABOVE_EQUALS_SIGN, +'⫆' => SUPERSET_OF_ABOVE_EQUALS_SIGN, +'⫇' => SUBSET_OF_ABOVE_TILDE_OPERATOR, +'⫈' => SUPERSET_OF_ABOVE_TILDE_OPERATOR, +'⫉' => SUBSET_OF_ABOVE_ALMOST_EQUAL_TO, +'⫊' => SUPERSET_OF_ABOVE_ALMOST_EQUAL_TO, +'⫋' => SUBSET_OF_ABOVE_NOT_EQUAL_TO, +'⫌' => SUPERSET_OF_ABOVE_NOT_EQUAL_TO, +'⫍' => SQUARE_LEFT_OPEN_BOX_OPERATOR, +'⫎' => SQUARE_RIGHT_OPEN_BOX_OPERATOR, +'⫏' => CLOSED_SUBSET, +'⫐' => CLOSED_SUPERSET, +'⫑' => CLOSED_SUBSET_OR_EQUAL_TO, +'⫒' => CLOSED_SUPERSET_OR_EQUAL_TO, +'⫓' => SUBSET_ABOVE_SUPERSET, +'⫔' => SUPERSET_ABOVE_SUBSET, +'⫕' => SUBSET_ABOVE_SUBSET, +'⫖' => SUPERSET_ABOVE_SUPERSET, +'⫗' => SUPERSET_BESIDE_SUBSET, +'⫘' => SUPERSET_BESIDE_AND_JOINED_BY_DASH_WITH_SUBSET, +'⫙' => ELEMENT_OF_OPENING_DOWNWARDS, +'⫷' => TRIPLE_NESTED_LESS_THAN, +'⫸' => TRIPLE_NESTED_GREATER_THAN, +'⫹' => DOUBLE_LINE_SLANTED_LESS_THAN_OR_EQUAL_TO, +'⫺' => DOUBLE_LINE_SLANTED_GREATER_THAN_OR_EQUAL_TO, +'⊢' => RIGHT_TACK, +'⊣' => LEFT_TACK, +'⊕' => CIRCLED_PLUS, +'⊖' => CIRCLED_MINUS, +'⊞' => SQUARED_PLUS, +'⊟' => SQUARED_MINUS, +'|' => VERTICAL_LINE, +'∪' => UNION, +'∨' => LOGICAL_OR, +'⊔' => SQUARE_CUP, +'±' => PLUS_MINUS_SIGN, +'∓' => MINUS_OR_PLUS_SIGN, +'∔' => DOT_PLUS, +'∸' => DOT_MINUS, +'≂' => MINUS_TILDE, +'≏' => DIFFERENCE_BETWEEN, +'⊎' => MULTISET_UNION, +'⊻' => XOR, +'⊽' => NOR, +'⋎' => CURLY_LOGICAL_OR, +'⋓' => DOUBLE_UNION, +'⧺' => DOUBLE_PLUS, +'⧻' => TRIPLE_PLUS, +'⨈' => TWO_LOGICAL_OR_OPERATOR, +'⨢' => PLUS_SIGN_WITH_SMALL_CIRCLE_ABOVE, +'⨣' => PLUS_SIGN_WITH_CIRCUMFLEX_ACCENT_ABOVE, +'⨤' => PLUS_SIGN_WITH_TILDE_ABOVE, +'⨥' => PLUS_SIGN_WITH_DOT_BELOW, +'⨦' => PLUS_SIGN_WITH_TILDE_BELOW, +'⨧' => PLUS_SIGN_WITH_SUBSCRIPT_TWO, +'⨨' => PLUS_SIGN_WITH_BLACK_TRIANGLE, +'⨩' => MINUS_SIGN_WITH_COMMA_ABOVE, +'⨪' => MINUS_SIGN_WITH_DOT_BELOW, +'⨫' => MINUS_SIGN_WITH_FALLING_DOTS, +'⨬' => MINUS_SIGN_WITH_RISING_DOTS, +'⨭' => PLUS_SIGN_IN_LEFT_HALF_CIRCLE, +'⨮' => PLUS_SIGN_IN_RIGHT_HALF_CIRCLE, +'⨹' => PLUS_SIGN_IN_TRIANGLE, +'⨺' => MINUS_SIGN_IN_TRIANGLE, +'⩁' => UNION_WITH_MINUS_SIGN, +'⩂' => UNION_WITH_OVERBAR, +'⩅' => UNION_WITH_LOGICAL_OR, +'⩊' => UNION_BESIDE_AND_JOINED_WITH_UNION, +'⩌' => CLOSED_UNION_WITH_SERIFS, +'⩏' => DOUBLE_SQUARE_UNION, +'⩐' => CLOSED_UNION_WITH_SERIFS_AND_SMASH_PRODUCT, +'⩒' => LOGICAL_OR_WITH_DOT_ABOVE, +'⩔' => DOUBLE_LOGICAL_OR, +'⩖' => TWO_INTERSECTING_LOGICAL_OR, +'⩗' => SLOPING_LARGE_OR, +'⩛' => LOGICAL_OR_WITH_MIDDLE_STEM, +'⩝' => LOGICAL_OR_WITH_HORIZONTAL_DASH, +'⩡' => SMALL_VEE_WITH_UNDERBAR, +'⩢' => LOGICAL_OR_WITH_DOUBLE_OVERBAR, +'⩣' => LOGICAL_OR_WITH_DOUBLE_UNDERBAR, +'∘' => RING_OPERATOR, +'×' => MULTIPLICATION_SIGN, +'∩' => INTERSECTION, +'∧' => LOGICAL_AND, +'⊗' => CIRCLED_TIMES, +'⊘' => CIRCLED_DIVISION_SLASH, +'⊙' => CIRCLED_DOT_OPERATOR, +'⊚' => CIRCLED_RING_OPERATOR, +'⊛' => CIRCLED_ASTERISK_OPERATOR, +'⊠' => SQUARED_TIMES, +'⊡' => SQUARED_DOT_OPERATOR, +'⊓' => SQUARE_CAP, +'∗' => ASTERISK_OPERATOR, +'∙' => BULLET_OPERATOR, +'∤' => DOES_NOT_DIVIDE, +'⅋' => TURNED_AMPERSAND, +'≀' => WREATH_PRODUCT, +'⊼' => NAND, +'⋄' => DIAMOND_OPERATOR, +'⋆' => STAR_OPERATOR, +'⋇' => DIVISION_TIMES, +'⋉' => LEFT_NORMAL_FACTOR_SEMIDIRECT_PRODUCT, +'⋊' => RIGHT_NORMAL_FACTOR_SEMIDIRECT_PRODUCT, +'⋋' => LEFT_SEMIDIRECT_PRODUCT, +'⋌' => RIGHT_SEMIDIRECT_PRODUCT, +'⋏' => CURLY_LOGICAL_AND, +'⋒' => DOUBLE_INTERSECTION, +'⟑' => AND_WITH_DOT, +'⦸' => CIRCLED_REVERSE_SOLIDUS, +'⦼' => CIRCLED_ANTICLOCKWISE_ROTATED_DIVISION_SIGN, +'⦾' => CIRCLED_WHITE_BULLET, +'⦿' => CIRCLED_BULLET, +'⧶' => SOLIDUS_WITH_OVERBAR, +'⧷' => REVERSE_SOLIDUS_WITH_HORIZONTAL_STROKE, +'⨇' => TWO_LOGICAL_AND_OPERATOR, +'⨰' => MULTIPLICATION_SIGN_WITH_DOT_ABOVE, +'⨱' => MULTIPLICATION_SIGN_WITH_UNDERBAR, +'⨲' => SEMIDIRECT_PRODUCT_WITH_BOTTOM_CLOSED, +'⨳' => SMASH_PRODUCT, +'⨴' => MULTIPLICATION_SIGN_IN_LEFT_HALF_CIRCLE, +'⨵' => MULTIPLICATION_SIGN_IN_RIGHT_HALF_CIRCLE, +'⨶' => CIRCLED_MULTIPLICATION_SIGN_WITH_CIRCUMFLEX_ACCENT, +'⨷' => MULTIPLICATION_SIGN_IN_DOUBLE_CIRCLE, +'⨸' => CIRCLED_DIVISION_SIGN, +'⨻' => MULTIPLICATION_SIGN_IN_TRIANGLE, +'⨼' => INTERIOR_PRODUCT, +'⨽' => RIGHTHAND_INTERIOR_PRODUCT, +'⩀' => INTERSECTION_WITH_DOT, +'⩃' => INTERSECTION_WITH_OVERBAR, +'⩄' => INTERSECTION_WITH_LOGICAL_AND, +'⩋' => INTERSECTION_BESIDE_AND_JOINED_WITH_INTERSECTION, +'⩍' => CLOSED_INTERSECTION_WITH_SERIFS, +'⩎' => DOUBLE_SQUARE_INTERSECTION, +'⩑' => LOGICAL_AND_WITH_DOT_ABOVE, +'⩓' => DOUBLE_LOGICAL_AND, +'⩕' => TWO_INTERSECTING_LOGICAL_AND, +'⩘' => SLOPING_LARGE_AND, +'⩚' => LOGICAL_AND_WITH_MIDDLE_STEM, +'⩜' => LOGICAL_AND_WITH_HORIZONTAL_DASH, +'⩞' => LOGICAL_AND_WITH_DOUBLE_OVERBAR, +'⩟' => LOGICAL_AND_WITH_UNDERBAR, +'⩠' => LOGICAL_AND_WITH_DOUBLE_UNDERBAR, +'⫛' => TRANSVERSAL_INTERSECTION, +'⊍' => MULTISET_MULTIPLICATION, +'^' => CIRCUMFLEX_ACCENT, +'↑' => UPWARDS_ARROW, +'↓' => DOWNWARDS_ARROW, +'⇵' => DOWNWARDS_ARROW_LEFTWARDS_OF_UPWARDS_ARROW, +'⟰' => UPWARDS_QUADRUPLE_ARROW, +'⟱' => DOWNWARDS_QUADRUPLE_ARROW, +'⤈' => DOWNWARDS_ARROW_WITH_HORIZONTAL_STROKE, +'⤉' => UPWARDS_ARROW_WITH_HORIZONTAL_STROKE, +'⤊' => UPWARDS_TRIPLE_ARROW, +'⤋' => DOWNWARDS_TRIPLE_ARROW, +'⤒' => UPWARDS_ARROW_TO_BAR, +'⤓' => DOWNWARDS_ARROW_TO_BAR, +'⥉' => UPWARDS_TWO_HEADED_ARROW_FROM_SMALL_CIRCLE, +'⥌' => UP_BARB_RIGHT_DOWN_BARB_LEFT_HARPOON, +'⥍' => UP_BARB_LEFT_DOWN_BARB_RIGHT_HARPOON, +'⥏' => UP_BARB_RIGHT_DOWN_BARB_RIGHT_HARPOON, +'⥑' => UP_BARB_LEFT_DOWN_BARB_LEFT_HARPOON, +'⥔' => UPWARDS_HARPOON_WITH_BARB_RIGHT_TO_BAR, +'⥕' => DOWNWARDS_HARPOON_WITH_BARB_RIGHT_TO_BAR, +'⥘' => UPWARDS_HARPOON_WITH_BARB_LEFT_TO_BAR, +'⥙' => DOWNWARDS_HARPOON_WITH_BARB_LEFT_TO_BAR, +'⥜' => UPWARDS_HARPOON_WITH_BARB_RIGHT_FROM_BAR, +'⥝' => DOWNWARDS_HARPOON_WITH_BARB_RIGHT_FROM_BAR, +'⥠' => UPWARDS_HARPOON_WITH_BARB_LEFT_FROM_BAR, +'⥡' => DOWNWARDS_HARPOON_WITH_BARB_LEFT_FROM_BAR, +'⥣' => UPWARDS_HARPOON_WITH_BARB_LEFT_BESIDE_UPWARDS_HARPOON_WITH_BARB_RIGHT, +'⥥' => DOWNWARDS_HARPOON_WITH_BARB_LEFT_BESIDE_DOWNWARDS_HARPOON_WITH_BARB_RIGHT, +'⥮' => UPWARDS_HARPOON_WITH_BARB_LEFT_BESIDE_DOWNWARDS_HARPOON_WITH_BARB_RIGHT, +'⥯' => DOWNWARDS_HARPOON_WITH_BARB_LEFT_BESIDE_UPWARDS_HARPOON_WITH_BARB_RIGHT, +'↑' => HALFWIDTH_UPWARDS_ARROW, +'↓' => HALFWIDTH_DOWNWARDS_ARROW) diff --git a/JuliaSyntax/src/utilities.jl b/JuliaSyntax/src/utilities.jl index cb84b3edc5e21..77fee125c0ea0 100644 --- a/JuliaSyntax/src/utilities.jl +++ b/JuliaSyntax/src/utilities.jl @@ -25,8 +25,10 @@ The JuliaParser.jl package is licensed under the MIT "Expat" License: > TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE > SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. =# -const EOF_CHAR = convert(Char,typemax(UInt32)) +import Base.UTF8proc + +const EOF_CHAR = convert(Char,typemax(UInt32)) function is_cat_id_start(ch::Char, cat::Integer) c = UInt32(ch) From d6d94180833afd0540495db5387319559b247245 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Tue, 2 Aug 2016 18:22:30 +0200 Subject: [PATCH 0008/1109] lol --- JuliaSyntax/README.md | 37 ++----------------------------- JuliaSyntax/benchmark/lex_base.jl | 9 ++++---- JuliaSyntax/src/lexer.jl | 17 -------------- 3 files changed, 7 insertions(+), 56 deletions(-) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index c03255c8a0224..aa70a5d0ec9a6 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -1,38 +1,5 @@ # Tokenize -`Tokenize` is a Julia package that serves a similar purpose and API as the [tokenize module](https://docs.python.org/3/library/tokenize.html) in Python but for Julia. This is to take a string or buffer containing Julia code, perform lexical analysis and return a stream of tokens. - -The goals of this package is to be - -* Fast -* Round trippable, that is, from a stream of tokens the original string should be recoverable exactly. -* Non error throwing. Instead of throwing errors a certain error token is returned. - -### API - -### Tokenization - -The function `tokenize` is the main entrypoint for generating `Token`s. -It takes a string or a buffer and creates an iterator that will sequentially return the next `Token` until the end of string or buffer. - -```jl - -e - -### `Token`s - -Each `Token` is represented by where it starts and ends, what string it contains and what type it is. - -The types are - -```julia -startpos(t)::Tuple{Int, Int} # row and column where the token start -endpos(t)::Tuple{Int, Int} # row and column where the token ends -startbyte(T)::Int64 # byte offset where the token start -endbyte(t)::Int64 # byte offset where the token ends -untokenize(t)::String # the string representation of the token -kind(t)::Token.Kind # A -exactkind(t):: -``` - +This was supposed to be a package for tokenizing julia code but the approach is likely flawed due to the grammar of Julia being too complicated. +It is currently used as the Tokenizer for a syntax highlighter but should probably be replaced with a regexp tokenizer instead. \ No newline at end of file diff --git a/JuliaSyntax/benchmark/lex_base.jl b/JuliaSyntax/benchmark/lex_base.jl index 13b8eda6a0811..97e34fe89818e 100644 --- a/JuliaSyntax/benchmark/lex_base.jl +++ b/JuliaSyntax/benchmark/lex_base.jl @@ -5,8 +5,9 @@ const BASEPATH = abspath(joinpath(JULIA_HOME, "..", "..")) tot_files = 0 tot_time = 0.0 tot_tokens = 0 +tot_errors = 0 function testall(srcdir::AbstractString) - global tot_files, tot_time, tot_tokens + global tot_files, tot_time, tot_tokens, tot_errors dirs, files = [], [] for fname in sort(readdir(srcdir)) @@ -31,10 +32,10 @@ function testall(srcdir::AbstractString) tot_files += 1 tot_time += @elapsed tokens = collect(Tokenize.tokenize(buf)) tot_tokens += length(tokens) + for token in tokens if Tokenize.Tokens.kind(token) == Tokenize.Tokens.ERROR - show(token) - error("Error in file $jlpath, for token $token") + tot_errors += 1 end end end @@ -56,4 +57,4 @@ perhaps you are using a Julia not built from source?""") end print("Lexed ", tot_files, " files in ", @sprintf("%3.4f", tot_time), - " seconds with a total of ", tot_tokens, " tokens") + " seconds with a total of ", tot_tokens, " tokens with ", tot_errors, " errors") diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index a01f3ade724db..0ff0f81affdb4 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -206,7 +206,6 @@ end function read_string(l::Lexer, kind::Tokens.Kind) while true c = readchar(l) - show(c) if c == '\\' && eof(readchar(l)) return false end @@ -219,14 +218,6 @@ function read_string(l::Lexer, kind::Tokens.Kind) end end elseif eof(c) - show(l.io) - - println("....") - - println(position(l)) - println(l.io.size) - println(c) - println("...") return false end end @@ -467,13 +458,6 @@ function lex_digit(l::Lexer) return emit(l, kind) end -# Lex a prim sign, a ''' has been consumed -function lex_prime(l) - return emit(l, Tokens.PRIME) -end -# This does not work because a ' could be a ctranspose function call -# and we need to parse the expression for this to work. -#= function lex_prime(l) while true c = readchar(l) @@ -488,7 +472,6 @@ function lex_prime(l) end end end -=# function lex_amper(l::Lexer) if accept(l, '&') From 602ab5813455590429ddaa932b335d294df68fd8 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Tue, 2 Aug 2016 18:50:57 +0200 Subject: [PATCH 0009/1109] dsad --- JuliaSyntax/src/token.jl | 2 +- JuliaSyntax/src/token_kinds.jl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/JuliaSyntax/src/token.jl b/JuliaSyntax/src/token.jl index e7a26f77c72f5..8426435a33a1f 100644 --- a/JuliaSyntax/src/token.jl +++ b/JuliaSyntax/src/token.jl @@ -59,7 +59,7 @@ function Token(kind::Kind, startposition::Tuple{Int, Int}, endposition::Tuple{In startbyte::Int64, endbyte::Int64, val::String) Token(kind, startposition, endposition, startbyte, endbyte, val, NO_ERR) end -Token() = Token(Nothing, (0,0), (0,0), 0, 0, "", unknown) +Token() = Token(ERROR, (0,0), (0,0), 0, 0, "", UNKNOWN) function kind(t::Token) isoperator(t.kind) && return OP diff --git a/JuliaSyntax/src/token_kinds.jl b/JuliaSyntax/src/token_kinds.jl index 00fcaf80a9761..dae1017f1f332 100644 --- a/JuliaSyntax/src/token_kinds.jl +++ b/JuliaSyntax/src/token_kinds.jl @@ -14,7 +14,7 @@ FUNCTION, GLOBAL, MACRO, QUOTE, LET, LOCAL, CONST, ABSTRACT, TYPE, BITSTYPE, IMMUTABLE, DO, MODULE, BAREMODULE, USING, IMPORT, EXPORT, IMPORTALL, - END, FALSE, TRUE, + END, FALSE, TRUE, ELSE, ELSEIF, end_keywords, begin_literal, From 905b455fe7f26cf40f88e7af8b11874b182b6c12 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Wed, 3 Aug 2016 12:10:08 +0200 Subject: [PATCH 0010/1109] always parse + and - as operators --- JuliaSyntax/src/lexer.jl | 2 -- 1 file changed, 2 deletions(-) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index 0ff0f81affdb4..f44a0a1b512ad 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -404,13 +404,11 @@ end function lex_plus(l::Lexer) accept(l, '+') && emit(l, Tokens.PLUSPLUS) - accept(l, isdigit) && lex_digit(l) return emit(l, Tokens.PLUS) end function lex_minus(l::Lexer) accept(l, '-') && return emit_error(l) # "--" is an invalid operator - accept(l, isdigit) && return lex_digit(l) return emit(l, Tokens.MINUS) end From cf0d94a017bb0177f7e85cdc429d55749be286e3 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Wed, 3 Aug 2016 13:53:25 +0200 Subject: [PATCH 0011/1109] update readme --- JuliaSyntax/README.md | 59 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 57 insertions(+), 2 deletions(-) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index aa70a5d0ec9a6..373384d99135c 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -1,5 +1,60 @@ # Tokenize -This was supposed to be a package for tokenizing julia code but the approach is likely flawed due to the grammar of Julia being too complicated. +**Note:** This package does not currently work well with transpose and conjugate transpose operators like in `A' * b` and will try to tokenize this as the beginning of a `CHAR` literal. -It is currently used as the Tokenizer for a syntax highlighter but should probably be replaced with a regexp tokenizer instead. \ No newline at end of file +`Tokenize` is a Julia package that serves a similar purpose and API as the [tokenize module](https://docs.python.org/3/library/tokenize.html) in Python but for Julia. This is to take a string or buffer containing Julia code, perform lexical analysis and return a stream of tokens. + +The goals of this package is to be + +* Fast, it currently lexes all of Julia source files in less than a second (1.6 million Tokens) +* Round trippable, that is, from a stream of tokens the original string should be recoverable exactly. +* Non error throwing. Instead of throwing errors a certain error token is returned. + +### API + +#### Tokenization + +The function `tokenize` is the main entrypoint for generating `Token`s. +It takes a string or a buffer and creates an iterator that will sequentially return the next `Token` until the end of string or buffer. + +```jl +julia> collect(tokenize("function f(x) end")) +9-element Array{Tokenize.Tokens.Token,1}: + 1,1-1,9: KEYWORD "function" + 1,9-1,10: WHITESPACE " " + 1,10-1,11: IDENTIFIER "f" + 1,11-1,12: LPAREN "(" + 1,12-1,13: IDENTIFIER "x" + 1,13-1,14: RPAREN ")" + 1,14-1,15: WHITESPACE " " + 1,15-1,18: KEYWORD "end" + 1,18-1,18: ENDMARKER "" +``` + +#### `Token`s + +Each `Token` is represented by where it starts and ends, what string it contains and what type it is. + +The API for a `Token` (non exported from the `Tokenize.Tokens` module) is. + +```julia +startpos(t)::Tuple{Int, Int} # row and column where the token start +endpos(t)::Tuple{Int, Int} # row and column where the token ends +startbyte(T)::Int64 # byte offset where the token start +endbyte(t)::Int64 # byte offset where the token ends +untokenize(t)::String # the string representation of the token +kind(t)::Token.Kind # The type of the token +exactkind(t):: The exact type of the token +``` + +The difference between `kind` and `exactkind` is that `kind` returns `OP` for all operators while `exactkind` returns a unique type for all different operators, ex; + +```jl +julia> tok = collect(tokenize("⇒"))[1]; + +julia> Tokenize.Tokens.kind(tok) +OP::Tokenize.Tokens.Kind = 60 + +julia> Tokenize.Tokens.exactkind(tok) +RIGHTWARDS_DOUBLE_ARROW::Tokenize.Tokens.Kind = 129 +``` \ No newline at end of file From 13eb3ab6eb741a196ab4b13b41a2690ba5da6b04 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Wed, 3 Aug 2016 13:56:55 +0200 Subject: [PATCH 0012/1109] Update README.md --- JuliaSyntax/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index 373384d99135c..4b7d870428d57 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -44,7 +44,7 @@ startbyte(T)::Int64 # byte offset where the token start endbyte(t)::Int64 # byte offset where the token ends untokenize(t)::String # the string representation of the token kind(t)::Token.Kind # The type of the token -exactkind(t):: The exact type of the token +exactkind(t)::Token.Kind The exact type of the token ``` The difference between `kind` and `exactkind` is that `kind` returns `OP` for all operators while `exactkind` returns a unique type for all different operators, ex; @@ -57,4 +57,4 @@ OP::Tokenize.Tokens.Kind = 60 julia> Tokenize.Tokens.exactkind(tok) RIGHTWARDS_DOUBLE_ARROW::Tokenize.Tokens.Kind = 129 -``` \ No newline at end of file +``` From 2dd76ea0c577dad68583e755fbd91b1bce2e9925 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Wed, 3 Aug 2016 13:57:29 +0200 Subject: [PATCH 0013/1109] Update README.md --- JuliaSyntax/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index 4b7d870428d57..72862f8dff334 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -44,7 +44,7 @@ startbyte(T)::Int64 # byte offset where the token start endbyte(t)::Int64 # byte offset where the token ends untokenize(t)::String # the string representation of the token kind(t)::Token.Kind # The type of the token -exactkind(t)::Token.Kind The exact type of the token +exactkind(t)::Token.Kind # The exact type of the token ``` The difference between `kind` and `exactkind` is that `kind` returns `OP` for all operators while `exactkind` returns a unique type for all different operators, ex; From 65f75e13a99198273331b9d2363a1e7115bab298 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Wed, 3 Aug 2016 16:01:48 +0200 Subject: [PATCH 0014/1109] Update README.md --- JuliaSyntax/README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index 72862f8dff334..bf2b64c6aaa6c 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -58,3 +58,5 @@ OP::Tokenize.Tokens.Kind = 60 julia> Tokenize.Tokens.exactkind(tok) RIGHTWARDS_DOUBLE_ARROW::Tokenize.Tokens.Kind = 129 ``` + +All the different `Token.Kind` can be seen in the [`token_kinds.jl` file](https://github.com/KristofferC/Tokenize.jl/blob/master/src/token_kinds.jl) From d6980dadd3fced9ac790f0c9a2c034c95296aa8b Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Wed, 3 Aug 2016 17:21:44 +0200 Subject: [PATCH 0015/1109] fix off by one in token column number --- JuliaSyntax/src/lexer.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index f44a0a1b512ad..e8cb6b02e6ab3 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -158,7 +158,7 @@ end function emit(l::Lexer, kind::Kind, str::String) tok = Token(kind, (l.token_start_row, l.token_start_col), - (l.current_row, l.current_col), + (l.current_row, l.current_col - 1), startpos(l), position(l) - 1, str) @debug "emitted token: $tok:" @@ -169,7 +169,7 @@ end function emit(l::Lexer, kind::Kind, err::TokenError=Tokens.UNKNOWN) str = extract_tokenstring(l) tok = Token(kind, (l.token_start_row, l.token_start_col), - (l.current_row, l.current_col), + (l.current_row, l.current_col - 1), startpos(l), position(l) - 1, str) @debug "emitted token: $tok:" From d49fd708753bf85f7e36f443ea424363b3d44f4d Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Wed, 3 Aug 2016 17:23:30 +0200 Subject: [PATCH 0016/1109] Update README.md --- JuliaSyntax/README.md | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index bf2b64c6aaa6c..d979d54fc1d53 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -20,15 +20,15 @@ It takes a string or a buffer and creates an iterator that will sequentially ret ```jl julia> collect(tokenize("function f(x) end")) 9-element Array{Tokenize.Tokens.Token,1}: - 1,1-1,9: KEYWORD "function" - 1,9-1,10: WHITESPACE " " - 1,10-1,11: IDENTIFIER "f" - 1,11-1,12: LPAREN "(" - 1,12-1,13: IDENTIFIER "x" - 1,13-1,14: RPAREN ")" - 1,14-1,15: WHITESPACE " " - 1,15-1,18: KEYWORD "end" - 1,18-1,18: ENDMARKER "" + 1,1-1,8: KEYWORD "function" + 1,9-1,9: WHITESPACE " " + 1,10-1,10: IDENTIFIER "f" + 1,11-1,11: LPAREN "(" + 1,12-1,12: IDENTIFIER "x" + 1,13-1,13: RPAREN ")" + 1,14-1,14: WHITESPACE " " + 1,15-1,17: KEYWORD "end" + 1,18-1,17: ENDMARKER "" ``` #### `Token`s From b638a365448fcc7b68267a0ee7575e15314512fe Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Wed, 3 Aug 2016 17:24:25 +0200 Subject: [PATCH 0017/1109] Update README.md --- JuliaSyntax/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index d979d54fc1d53..f6f52f8a437b9 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -47,7 +47,7 @@ kind(t)::Token.Kind # The type of the token exactkind(t)::Token.Kind # The exact type of the token ``` -The difference between `kind` and `exactkind` is that `kind` returns `OP` for all operators while `exactkind` returns a unique type for all different operators, ex; +The difference between `kind` and `exactkind` is that `kind` returns `OP` for all operators and `KEYWORD` for all keywords while `exactkind` returns a unique kind for all different operators and keywords, ex; ```jl julia> tok = collect(tokenize("⇒"))[1]; From 3d3bab20c6e08ce6c1365c6509cb05d2a0c6f5e7 Mon Sep 17 00:00:00 2001 From: Eric Davies Date: Wed, 3 Aug 2016 12:12:22 -0500 Subject: [PATCH 0018/1109] Add typealias keyword --- JuliaSyntax/src/token_kinds.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaSyntax/src/token_kinds.jl b/JuliaSyntax/src/token_kinds.jl index dae1017f1f332..770b45933a0ad 100644 --- a/JuliaSyntax/src/token_kinds.jl +++ b/JuliaSyntax/src/token_kinds.jl @@ -12,7 +12,7 @@ KEYWORD, # general BEGIN, WHILE, IF, FOR, TRY, RETURN, BREAK, CONTINUE, FUNCTION, GLOBAL, MACRO, QUOTE, LET, LOCAL, - CONST, ABSTRACT, TYPE, BITSTYPE, IMMUTABLE, + CONST, ABSTRACT, TYPE, BITSTYPE, IMMUTABLE, TYPEALIAS, DO, MODULE, BAREMODULE, USING, IMPORT, EXPORT, IMPORTALL, END, FALSE, TRUE, ELSE, ELSEIF, end_keywords, From 5ad0dbcfcf8a9fbca6bd12457abdd363eae31941 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Wed, 3 Aug 2016 19:47:22 +0200 Subject: [PATCH 0019/1109] Update README.md --- JuliaSyntax/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index f6f52f8a437b9..79d9c8a40993b 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -1,7 +1,5 @@ # Tokenize -**Note:** This package does not currently work well with transpose and conjugate transpose operators like in `A' * b` and will try to tokenize this as the beginning of a `CHAR` literal. - `Tokenize` is a Julia package that serves a similar purpose and API as the [tokenize module](https://docs.python.org/3/library/tokenize.html) in Python but for Julia. This is to take a string or buffer containing Julia code, perform lexical analysis and return a stream of tokens. The goals of this package is to be @@ -10,6 +8,8 @@ The goals of this package is to be * Round trippable, that is, from a stream of tokens the original string should be recoverable exactly. * Non error throwing. Instead of throwing errors a certain error token is returned. +**Note:** This package does not currently work well with transpose and conjugate transpose operators like in `A' * b` and will try to tokenize this as the beginning of a `CHAR` literal. + ### API #### Tokenization From bbd750fc160e92875b05c44320aae6f1deb75eb5 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Wed, 3 Aug 2016 19:54:49 +0200 Subject: [PATCH 0020/1109] Update README.md --- JuliaSyntax/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index 79d9c8a40993b..9f25e1a265109 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -15,7 +15,7 @@ The goals of this package is to be #### Tokenization The function `tokenize` is the main entrypoint for generating `Token`s. -It takes a string or a buffer and creates an iterator that will sequentially return the next `Token` until the end of string or buffer. +It takes a string or a buffer and creates an iterator that will sequentially return the next `Token` until the end of string or buffer. The argument to `tokenize` can either be a `String`, `IOBuffer` or an `IOStream`. ```jl julia> collect(tokenize("function f(x) end")) From 4e87f27eaf8faa569919d71b879991b441aa8105 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Wed, 3 Aug 2016 22:08:11 +0200 Subject: [PATCH 0021/1109] fix missing return --- JuliaSyntax/src/lexer.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index e8cb6b02e6ab3..c263da2abcba2 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -403,7 +403,7 @@ function lex_bar(l::Lexer) end function lex_plus(l::Lexer) - accept(l, '+') && emit(l, Tokens.PLUSPLUS) + accept(l, '+') && return emit(l, Tokens.PLUSPLUS) return emit(l, Tokens.PLUS) end From e92697373279fd70da10802edc3a93ff0eca3d4b Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Wed, 10 Aug 2016 17:14:08 +0200 Subject: [PATCH 0022/1109] precompile a few methods --- JuliaSyntax/benchmark/lex_base.jl | 4 ++++ JuliaSyntax/src/Tokenize.jl | 3 +++ 2 files changed, 7 insertions(+) diff --git a/JuliaSyntax/benchmark/lex_base.jl b/JuliaSyntax/benchmark/lex_base.jl index 97e34fe89818e..f10ec7e2e9f73 100644 --- a/JuliaSyntax/benchmark/lex_base.jl +++ b/JuliaSyntax/benchmark/lex_base.jl @@ -33,6 +33,10 @@ function testall(srcdir::AbstractString) tot_time += @elapsed tokens = collect(Tokenize.tokenize(buf)) tot_tokens += length(tokens) + seek(buf, 0) + str = takebuf_string(buf) + collect(Tokenize.tokenize(str)) + for token in tokens if Tokenize.Tokens.kind(token) == Tokenize.Tokens.ERROR tot_errors += 1 diff --git a/JuliaSyntax/src/Tokenize.jl b/JuliaSyntax/src/Tokenize.jl index 14d1a8e11d5e3..19965eeb1ac88 100644 --- a/JuliaSyntax/src/Tokenize.jl +++ b/JuliaSyntax/src/Tokenize.jl @@ -10,4 +10,7 @@ import .Tokens: untokenize export tokenize +include("precompile.jl") +_precompile_() + end # module From 1840c90d519cd55d8cbf1cf290a58fb0e5c12bf2 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Wed, 10 Aug 2016 21:33:46 +0200 Subject: [PATCH 0023/1109] Update Tokenize.jl --- JuliaSyntax/src/Tokenize.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaSyntax/src/Tokenize.jl b/JuliaSyntax/src/Tokenize.jl index 19965eeb1ac88..8874efaca9d28 100644 --- a/JuliaSyntax/src/Tokenize.jl +++ b/JuliaSyntax/src/Tokenize.jl @@ -11,6 +11,6 @@ import .Tokens: untokenize export tokenize include("precompile.jl") -_precompile_() +# _precompile_() end # module From 88b20174494df0993e0686a1758f89d82988d698 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Wed, 10 Aug 2016 21:34:20 +0200 Subject: [PATCH 0024/1109] Update Tokenize.jl --- JuliaSyntax/src/Tokenize.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaSyntax/src/Tokenize.jl b/JuliaSyntax/src/Tokenize.jl index 8874efaca9d28..239b5e5c09f6d 100644 --- a/JuliaSyntax/src/Tokenize.jl +++ b/JuliaSyntax/src/Tokenize.jl @@ -10,7 +10,7 @@ import .Tokens: untokenize export tokenize -include("precompile.jl") +# include("precompile.jl") # _precompile_() end # module From 94885e4d5059ef903ba795951ff4645ea1d5df70 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Tue, 23 Aug 2016 11:36:09 +0200 Subject: [PATCH 0025/1109] few updates --- JuliaSyntax/src/Tokenize.jl | 4 +- JuliaSyntax/src/lexer.jl | 2 +- JuliaSyntax/src/precompile.jl | 75 +++++++++++++++++++++++++++++++++++ JuliaSyntax/test/runtests.jl | 8 +--- 4 files changed, 79 insertions(+), 10 deletions(-) create mode 100644 JuliaSyntax/src/precompile.jl diff --git a/JuliaSyntax/src/Tokenize.jl b/JuliaSyntax/src/Tokenize.jl index 239b5e5c09f6d..19965eeb1ac88 100644 --- a/JuliaSyntax/src/Tokenize.jl +++ b/JuliaSyntax/src/Tokenize.jl @@ -10,7 +10,7 @@ import .Tokens: untokenize export tokenize -# include("precompile.jl") -# _precompile_() +include("precompile.jl") +_precompile_() end # module diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index c263da2abcba2..390237b86fc65 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -21,7 +21,7 @@ end ishex(c::Char) = isdigit(c) || ('a' <= c <= 'f') iswhitespace(c::Char) = Base.UTF8proc.isspace(c) -type Lexer{IO_t <: Union{IO, String}} +type Lexer{IO_t <: Union{IO, AbstractString}} io::IO_t token_start_row::Int diff --git a/JuliaSyntax/src/precompile.jl b/JuliaSyntax/src/precompile.jl new file mode 100644 index 0000000000000..db988e3a06f8b --- /dev/null +++ b/JuliaSyntax/src/precompile.jl @@ -0,0 +1,75 @@ +function _precompile_() + ccall(:jl_generating_output, Cint, ()) == 1 || return nothing + precompile(Tokenize.Lexers.backup!, (Tokenize.Lexers.Lexer{String},)) + precompile(Tokenize.Lexers.backup!, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.is_cat_id_start, (Char, Int32,)) + precompile(Tokenize.Lexers.lex_less, (Tokenize.Lexers.Lexer{String},)) + precompile(Tokenize.Lexers.lex_digit, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.lex_less, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.is_identifier_char, (Char,)) + precompile(Tokenize.Lexers.lex_forwardslash, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.lex_digit, (Tokenize.Lexers.Lexer{String},)) + precompile(Tokenize.Lexers.start, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.readchar, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.lex_forwardslash, (Tokenize.Lexers.Lexer{String},)) + precompile(Tokenize.Lexers.readchar, (Tokenize.Lexers.Lexer{String},)) + precompile(Tokenize.Lexers.peekchar, (Base.AbstractIOBuffer{Array{UInt8, 1}},)) + precompile(Tokenize.Lexers.lex_bar, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.next_token, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},typeof( Tokenize.Lexers.iswhitespace),)) + precompile(Tokenize.Lexers.next_token, (Tokenize.Lexers.Lexer{String},)) + precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{String},typeof( Tokenize.Lexers.iswhitespace),)) + precompile(Tokenize.Lexers.start, (Tokenize.Lexers.Lexer{String},)) + precompile(Tokenize.Lexers.lex_quote, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.lex_prime, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.lex_exclaim, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.lex_bar, (Tokenize.Lexers.Lexer{String},)) + precompile(Tokenize.Lexers.lex_quote, (Tokenize.Lexers.Lexer{String},)) + precompile(Tokenize.Lexers.ishex, (Char,)) + precompile(Tokenize.Lexers.lex_prime, (Tokenize.Lexers.Lexer{String},)) + precompile(Tokenize.Lexers.lex_exclaim, (Tokenize.Lexers.Lexer{String},)) + precompile(Tokenize.Lexers.lex_dot, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.lex_equal, (Tokenize.Lexers.Lexer{String},)) + precompile(Tokenize.Lexers.lex_dot, (Tokenize.Lexers.Lexer{String},)) + precompile(Tokenize.Lexers.lex_colon, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.lex_cmd, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.lex_comment, (Tokenize.Lexers.Lexer{String},)) + precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}}, String,)) + precompile(Tokenize.Lexers.extract_tokenstring, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.extract_tokenstring, (Tokenize.Lexers.Lexer{String},)) + precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{String}, String,)) + precompile(Tokenize.Lexers.lex_comment, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.lex_equal, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.lex_cmd, (Tokenize.Lexers.Lexer{String},)) + precompile(Tokenize.Lexers.peekchar, (Tokenize.Lexers.Lexer{String},)) + precompile(Tokenize.Lexers.lex_amper, (Tokenize.Lexers.Lexer{String},)) + precompile(Tokenize.Lexers.lex_greater, (Tokenize.Lexers.Lexer{String},)) + precompile(Tokenize.Lexers.lex_greater, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.read_string, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}}, Tokenize.Tokens.Kind,)) + precompile(Tokenize.Lexers.lex_minus, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.read_string, (Tokenize.Lexers.Lexer{String}, Tokenize.Tokens.Kind,)) + precompile(Tokenize.Lexers.lex_plus, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.lex_percent, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.lex_minus, (Tokenize.Lexers.Lexer{String},)) + precompile(Tokenize.Lexers.lex_plus, (Tokenize.Lexers.Lexer{String},)) + precompile(Tokenize.Lexers.lex_colon, (Tokenize.Lexers.Lexer{String},)) + precompile(Tokenize.Lexers.lex_percent, (Tokenize.Lexers.Lexer{String},)) + precompile(Tokenize.Lexers.lex_identifier, (Tokenize.Lexers.Lexer{String},)) + precompile(Tokenize.Tokens.kind, (Tokenize.Tokens.Token,)) + precompile(Tokenize.Lexers.accept_batch, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}}, Function,)) + precompile(Tokenize.Lexers.lex_amper, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{String},typeof( Base.UTF8proc.isdigit),)) + precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},typeof( Base.UTF8proc.isdigit),)) + precompile(Tokenize.Lexers.iswhitespace, (Char,)) + precompile(Tokenize.Lexers.ignore!, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{String},typeof( Tokenize.Lexers.ishex),)) + precompile(Tokenize.Lexers.ignore!, (Tokenize.Lexers.Lexer{String},)) + precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{String},typeof( Tokenize.Lexers.is_identifier_char),)) + precompile(Tokenize.Lexers.lex_identifier, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.accept_batch, (Tokenize.Lexers.Lexer{String}, Function,)) + precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}}, Char,)) + precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},typeof( Tokenize.Lexers.ishex),)) + precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{String}, Char,)) + precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},typeof( Tokenize.Lexers.is_identifier_char),)) + precompile(Tokenize.Lexers.is_identifier_start_char, (Char,)) +end diff --git a/JuliaSyntax/test/runtests.jl b/JuliaSyntax/test/runtests.jl index 8914351a67802..cc93eda7b5847 100644 --- a/JuliaSyntax/test/runtests.jl +++ b/JuliaSyntax/test/runtests.jl @@ -1,10 +1,4 @@ using PimpMyREPL.Tokenize using Base.Test - - -# write your own tests here - -tokvec(x) = collect(tokenize(x)) - -v = tokvec("function foo end") +include("../benchmark/lex_base.jl") From f58af5e6699e18bd0004f4ee66656b68e01560d4 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Tue, 23 Aug 2016 11:38:40 +0200 Subject: [PATCH 0026/1109] only precomp on 0.5 --- JuliaSyntax/src/Tokenize.jl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/JuliaSyntax/src/Tokenize.jl b/JuliaSyntax/src/Tokenize.jl index 19965eeb1ac88..639c8ec3d7104 100644 --- a/JuliaSyntax/src/Tokenize.jl +++ b/JuliaSyntax/src/Tokenize.jl @@ -10,7 +10,9 @@ import .Tokens: untokenize export tokenize -include("precompile.jl") -_precompile_() +if VERSION > v"0.5-" + include("precompile.jl") + _precompile_() +end end # module From 4d5b78aa87fbe601b9881a0052a9964dcde7bac1 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Wed, 24 Aug 2016 14:02:52 +0200 Subject: [PATCH 0027/1109] lex underscore in numbers --- JuliaSyntax/src/lexer.jl | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index 390237b86fc65..dd3d4f281b1b4 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -420,6 +420,16 @@ function lex_digit(l::Lexer) accept_batch(l, isdigit) + # Accept "_" in digits + while true + if !accept(l, '_') + break + end + if !accept_batch(l, isdigit) + return emit_error(l) + end + end + if accept(l, '.') if peekchar(l) == '.' # 43.. -> [43, ..] backup!(l) From 2a48a3628dcba1564e61e8e60ffa9a610a0b573c Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Fri, 9 Sep 2016 14:11:15 +0200 Subject: [PATCH 0028/1109] add finally keyword (JuliaLang/JuliaSyntax.jl#4) --- JuliaSyntax/src/token_kinds.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaSyntax/src/token_kinds.jl b/JuliaSyntax/src/token_kinds.jl index 770b45933a0ad..09c9b2501e034 100644 --- a/JuliaSyntax/src/token_kinds.jl +++ b/JuliaSyntax/src/token_kinds.jl @@ -14,7 +14,7 @@ FUNCTION, GLOBAL, MACRO, QUOTE, LET, LOCAL, CONST, ABSTRACT, TYPE, BITSTYPE, IMMUTABLE, TYPEALIAS, DO, MODULE, BAREMODULE, USING, IMPORT, EXPORT, IMPORTALL, - END, FALSE, TRUE, ELSE, ELSEIF, + END, FALSE, TRUE, ELSE, ELSEIF, FINALLY, end_keywords, begin_literal, From 5ff7776f61091a468266d6b26af9f9862aa84492 Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Tue, 18 Oct 2016 07:05:30 -0500 Subject: [PATCH 0029/1109] Replace is with === --- JuliaSyntax/src/utilities.jl | 2 +- JuliaSyntax/test/runtests.jl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/JuliaSyntax/src/utilities.jl b/JuliaSyntax/src/utilities.jl index 77fee125c0ea0..c56d4dff40c85 100644 --- a/JuliaSyntax/src/utilities.jl +++ b/JuliaSyntax/src/utilities.jl @@ -149,7 +149,7 @@ peekchar(s::IOStream) = begin end eof(io::IO) = Base.eof(io) -eof(c) = is(c, EOF_CHAR) +eof(c) = c === EOF_CHAR readchar(io::IO) = eof(io) ? EOF_CHAR : read(io, Char) takechar(io::IO) = (readchar(io); io) diff --git a/JuliaSyntax/test/runtests.jl b/JuliaSyntax/test/runtests.jl index cc93eda7b5847..1415c8bea7f80 100644 --- a/JuliaSyntax/test/runtests.jl +++ b/JuliaSyntax/test/runtests.jl @@ -1,4 +1,4 @@ -using PimpMyREPL.Tokenize +using OhMyREPL.Tokenize using Base.Test include("../benchmark/lex_base.jl") From 6426ba63ca4c800ce146b8eb76f0f37648b00958 Mon Sep 17 00:00:00 2001 From: Sebastian Pfitzner Date: Sun, 11 Dec 2016 18:03:13 +0100 Subject: [PATCH 0030/1109] Various fixes (JuliaLang/JuliaSyntax.jl#7) * fix transpose lexing * fix digit parsing for juxtaposition e.g. `2_foo`, where `_foo` is the identifier * add `\dot` to unicode ops though I guess this list is probably autogenerated and therefore it doesn't make much sense to do this manually --- JuliaSyntax/src/lexer.jl | 29 +++++++++++++++++++---------- JuliaSyntax/src/token_kinds.jl | 4 +++- 2 files changed, 22 insertions(+), 11 deletions(-) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index dd3d4f281b1b4..30eb5abc2bfdd 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -33,9 +33,11 @@ type Lexer{IO_t <: Union{IO, AbstractString}} current_row::Int current_col::Int current_pos::Int64 + + last_token::Tokens.Kind end -Lexer(io) = Lexer(io, 1, 1, -1, 0, 1, 1, 1) +Lexer(io) = Lexer(io, 1, 1, -1, 0, 1, 1, 1, Tokens.ERROR) # Iterator interface @@ -162,6 +164,7 @@ function emit(l::Lexer, kind::Kind, str::String) startpos(l), position(l) - 1, str) @debug "emitted token: $tok:" + l.last_token = kind ignore!(l) return tok end @@ -173,6 +176,7 @@ function emit(l::Lexer, kind::Kind, err::TokenError=Tokens.UNKNOWN) startpos(l), position(l) - 1, str) @debug "emitted token: $tok:" + l.last_token = kind ignore!(l) return tok end @@ -426,7 +430,8 @@ function lex_digit(l::Lexer) break end if !accept_batch(l, isdigit) - return emit_error(l) + backup!(l) + return emit(l, Tokens.INTEGER) end end @@ -467,16 +472,20 @@ function lex_digit(l::Lexer) end function lex_prime(l) - while true - c = readchar(l) - if eof(c) - return emit_error(l, Tokens.EOF_CHAR) - elseif c == '\\' - if eof(readchar(l)) + if l.last_token ∈ [Tokens.IDENTIFIER, Tokens.DOT, Tokens.RPAREN, Tokens.RSQUARE] + return emit(l, Tokens.PRIME) + else + while true + c = readchar(l) + if eof(c) return emit_error(l, Tokens.EOF_CHAR) + elseif c == '\\' + if eof(readchar(l)) + return emit_error(l, Tokens.EOF_CHAR) + end + elseif c == '\'' + return emit(l, Tokens.CHAR) end - elseif c == '\'' - return emit(l, Tokens.CHAR) end end end diff --git a/JuliaSyntax/src/token_kinds.jl b/JuliaSyntax/src/token_kinds.jl index 09c9b2501e034..2f508d2ffc91f 100644 --- a/JuliaSyntax/src/token_kinds.jl +++ b/JuliaSyntax/src/token_kinds.jl @@ -641,6 +641,7 @@ DOWNWARDS_HARPOON_WITH_BARB_LEFT_BESIDE_UPWARDS_HARPOON_WITH_BARB_RIGHT, # ⥯ HALFWIDTH_UPWARDS_ARROW, # ↑ HALFWIDTH_DOWNWARDS_ARROW, # ↓ + UNICODE_DOT, # ⋅ end_unicode_ops, end_ops, ) @@ -1189,4 +1190,5 @@ const UNICODE_OPS = Dict{Char, Kind}( '⥮' => UPWARDS_HARPOON_WITH_BARB_LEFT_BESIDE_DOWNWARDS_HARPOON_WITH_BARB_RIGHT, '⥯' => DOWNWARDS_HARPOON_WITH_BARB_LEFT_BESIDE_UPWARDS_HARPOON_WITH_BARB_RIGHT, '↑' => HALFWIDTH_UPWARDS_ARROW, -'↓' => HALFWIDTH_DOWNWARDS_ARROW) +'↓' => HALFWIDTH_DOWNWARDS_ARROW, +'⋅' => UNICODE_DOT) From 06b33de00699ff816bd54bb8f5a0355d9bc2601f Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Sun, 11 Dec 2016 18:06:15 +0100 Subject: [PATCH 0031/1109] Update README.md --- JuliaSyntax/README.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index 9f25e1a265109..c99d615d3e8cb 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -8,8 +8,6 @@ The goals of this package is to be * Round trippable, that is, from a stream of tokens the original string should be recoverable exactly. * Non error throwing. Instead of throwing errors a certain error token is returned. -**Note:** This package does not currently work well with transpose and conjugate transpose operators like in `A' * b` and will try to tokenize this as the beginning of a `CHAR` literal. - ### API #### Tokenization From 1c657f2eec9d3e2fa54bfea4b33bfe82965e6825 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Tue, 13 Dec 2016 09:28:07 +0100 Subject: [PATCH 0032/1109] add tests (JuliaLang/JuliaSyntax.jl#8) * add tests * fix some deprecations --- JuliaSyntax/README.md | 2 + JuliaSyntax/REQUIRE | 2 +- JuliaSyntax/benchmark/lex_base.jl | 2 +- JuliaSyntax/test/runtests.jl | 63 ++++++++++++++++++++++++++++++- 4 files changed, 65 insertions(+), 4 deletions(-) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index c99d615d3e8cb..3a10905e6b05e 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -1,5 +1,7 @@ # Tokenize +[![Build Status](https://travis-ci.org/KristofferC/Tokenize.jl.svg?branch=master)](https://travis-ci.org/KristofferC/Tokenize.jl) + `Tokenize` is a Julia package that serves a similar purpose and API as the [tokenize module](https://docs.python.org/3/library/tokenize.html) in Python but for Julia. This is to take a string or buffer containing Julia code, perform lexical analysis and return a stream of tokens. The goals of this package is to be diff --git a/JuliaSyntax/REQUIRE b/JuliaSyntax/REQUIRE index 4d57bb70074b7..73391b7c297c7 100644 --- a/JuliaSyntax/REQUIRE +++ b/JuliaSyntax/REQUIRE @@ -1,2 +1,2 @@ julia 0.4 -Compat \ No newline at end of file +Compat 0.9.5 \ No newline at end of file diff --git a/JuliaSyntax/benchmark/lex_base.jl b/JuliaSyntax/benchmark/lex_base.jl index f10ec7e2e9f73..891c28071037c 100644 --- a/JuliaSyntax/benchmark/lex_base.jl +++ b/JuliaSyntax/benchmark/lex_base.jl @@ -34,7 +34,7 @@ function testall(srcdir::AbstractString) tot_tokens += length(tokens) seek(buf, 0) - str = takebuf_string(buf) + str = String(take!(buf)) collect(Tokenize.tokenize(str)) for token in tokens diff --git a/JuliaSyntax/test/runtests.jl b/JuliaSyntax/test/runtests.jl index 1415c8bea7f80..b2c69acb3cf01 100644 --- a/JuliaSyntax/test/runtests.jl +++ b/JuliaSyntax/test/runtests.jl @@ -1,4 +1,63 @@ -using OhMyREPL.Tokenize using Base.Test -include("../benchmark/lex_base.jl") +import Tokenize + +const PKGPATH = joinpath(dirname(@__FILE__), "..") + +tot_files = 0 +tot_time = 0.0 +tot_tokens = 0 +tot_errors = 0 +function testall(srcdir::AbstractString) + global tot_files, tot_time, tot_tokens, tot_errors + dirs, files = [], [] + + for fname in sort(readdir(srcdir)) + path = joinpath(srcdir, fname) + if isdir(path) + push!(dirs, path) + continue + end + _, ext = splitext(fname) + if ext == ".jl" + push!(files, path) + end + end + + if !isempty(files) + for jlpath in files + + fname = splitdir(jlpath)[end] + + buf = IOBuffer() + write(buf, open(readstring, jlpath)) + tot_files += 1 + tot_time += @elapsed tokens = collect(Tokenize.tokenize(buf)) + tot_tokens += length(tokens) + + seek(buf, 0) + str = String(take!(buf)) + collect(Tokenize.tokenize(str)) + + for token in tokens + if Tokenize.Tokens.kind(token) == Tokenize.Tokens.ERROR + tot_errors += 1 + end + end + end + end + for dir in dirs + testall(dir) + end +end + + +testall(joinpath(PKGPATH, "benchmark")) +testall(joinpath(PKGPATH, "src")) +testall(joinpath(PKGPATH, "test")) + + +print("Lexed ", tot_files, " files in ", @sprintf("%3.4f", tot_time), + " seconds with a total of ", tot_tokens, " tokens with ", tot_errors, " errors") + +@test tot_errors == 0 From 67efce18dfa3476ee9aa0eeb0ef8c3235bd4fbcc Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Tue, 13 Dec 2016 13:08:15 +0100 Subject: [PATCH 0033/1109] fix stuff (JuliaLang/JuliaSyntax.jl#9) --- JuliaSyntax/LICENSE.md | 33 +++++++++++++++++++++++++++++---- JuliaSyntax/REQUIRE | 4 ++-- JuliaSyntax/test/runtests.jl | 1 + 3 files changed, 32 insertions(+), 6 deletions(-) diff --git a/JuliaSyntax/LICENSE.md b/JuliaSyntax/LICENSE.md index c55684c31049a..f316960a03e7b 100644 --- a/JuliaSyntax/LICENSE.md +++ b/JuliaSyntax/LICENSE.md @@ -1,17 +1,17 @@ The Tokenize.jl package is licensed under the MIT "Expat" License: > Copyright (c) 2016: Kristoffer Carlsson. -> +> > Permission is hereby granted, free of charge, to any person obtaining a copy > of this software and associated documentation files (the "Software"), to deal > in the Software without restriction, including without limitation the rights > to use, copy, modify, merge, publish, distribute, sublicense, and/or sell > copies of the Software, and to permit persons to whom the Software is > furnished to do so, subject to the following conditions: -> +> > The above copyright notice and this permission notice shall be included in all > copies or substantial portions of the Software. -> +> > THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -19,4 +19,29 @@ The Tokenize.jl package is licensed under the MIT "Expat" License: > LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, > OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE > SOFTWARE. -> +> + +The code in src/utilities.jl is extracted from JuliaParser.jl: + +The JuliaParser.jl package is licensed under the MIT "Expat" License: + +> Copyright (c) 2014: Jake Bolewski. +> +> Permission is hereby granted, free of charge, to any person obtaining +> a copy of this software and associated documentation files (the +> "Software"), to deal in the Software without restriction, including +> without limitation the rights to use, copy, modify, merge, publish, +> distribute, sublicense, and/or sell copies of the Software, and to +> permit persons to whom the Software is furnished to do so, subject to +> the following conditions: +> +> The above copyright notice and this permission notice shall be +> included in all copies or substantial portions of the Software. +> +> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +> EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +> MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +> IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +> CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +> TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +> SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. \ No newline at end of file diff --git a/JuliaSyntax/REQUIRE b/JuliaSyntax/REQUIRE index 73391b7c297c7..9e74e5b21d7d5 100644 --- a/JuliaSyntax/REQUIRE +++ b/JuliaSyntax/REQUIRE @@ -1,2 +1,2 @@ -julia 0.4 -Compat 0.9.5 \ No newline at end of file +julia 0.5 +Compat 0.9.5 diff --git a/JuliaSyntax/test/runtests.jl b/JuliaSyntax/test/runtests.jl index b2c69acb3cf01..01e7fcd46dd5c 100644 --- a/JuliaSyntax/test/runtests.jl +++ b/JuliaSyntax/test/runtests.jl @@ -37,6 +37,7 @@ function testall(srcdir::AbstractString) seek(buf, 0) str = String(take!(buf)) + collect(Tokenize.tokenize(str)) for token in tokens From 0980441d26031ae77ed9d85bf2bbb1b7f9cf833b Mon Sep 17 00:00:00 2001 From: Sebastian Pfitzner Date: Tue, 13 Dec 2016 13:01:51 +0100 Subject: [PATCH 0034/1109] lexer docs and cleanup --- JuliaSyntax/src/lexer.jl | 186 +++++++++++++++++++++++++--------- JuliaSyntax/src/precompile.jl | 4 +- 2 files changed, 139 insertions(+), 51 deletions(-) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index 30eb5abc2bfdd..12ebca4aa5da8 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -50,7 +50,6 @@ end Base.eltype(::Lexer) = Token - function Base.start{T}(l::Lexer{T}) seekstart(l) l.token_startpos = 0 @@ -70,51 +69,116 @@ end Base.done(l::Lexer, isdone) = isdone - function Base.show(io::IO, l::Lexer) println(io, "Lexer at position: ", position(l)) end +""" + startpos(l::Lexer) +Return the latest `Token`'s starting position. +""" startpos(l::Lexer) = l.token_startpos + +""" + startpos!(l::Lexer, i::Int64) + +Set a new starting position. +""" startpos!(l::Lexer, i::Int64) = l.token_startpos = i -tokens(l::Lexer) = l.tokens -io(l::Lexer) = l.io + +""" + prevpos(l::Lexer) + +Return the lexer's previous position. +""" prevpos(l::Lexer) = l.prevpos + +""" + prevpos!(l::Lexer, i::Int64) + +Set the lexer's previous position. +""" prevpos!(l::Lexer, i::Int64) = l.prevpos = i + Base.seekstart{I <: IO}(l::Lexer{I}) = seekstart(l.io) Base.seekstart{I <: String}(l::Lexer{I}) = seek(l, 1) +""" + seek2startpos!(l::Lexer) + +Sets the lexer's current position to the beginning of the latest `Token`. +""" +function seek2startpos! end seek2startpos!{I <: IO}(l::Lexer{I}) = seek(l, startpos(l)) seek2startpos!{I <: String}(l::Lexer{I}) = seek(l, startpos(l) + 1) -push!(l::Lexer, t::Token) = push!(l.tokens, t) +""" + peekchar(l::Lexer) + +Returns the next character without changing the lexer's state. +""" +function peekchar end peekchar{I <: IO}(l::Lexer{I}) = peekchar(l.io) peekchar{I <: String}(l::Lexer{I}) = eof(l) ? EOF_CHAR : l.io[position(l)] +""" + position(l::Lexer) + +Returns the current position. +""" +function position end position{I <: String}(l::Lexer{I}) = l.current_pos position{I <: IO}(l::Lexer{I}) = Base.position(l.io) + +""" + eof(l::Lexer) + +Determine whether the end of the lexer's underlying buffer or string has been reached. +""" +function eof end eof{I <: IO}(l::Lexer{I}) = eof(l.io) eof{I <: String}(l::Lexer{I}) = position(l) > sizeof(l.io) + Base.seek{I <: IO}(l::Lexer{I}, pos) = seek(l.io, pos) Base.seek{I <: String}(l::Lexer{I}, pos) = l.current_pos = pos -function ignore!{I <: IO}(l::Lexer{I}) + +""" + start_token!(l::Lexer) + +Updates the lexer's state such that the next `Token` will start at the current +position. +""" +function start_token! end + +function start_token!{I <: IO}(l::Lexer{I}) l.token_startpos = position(l) l.token_start_row = l.current_row l.token_start_col = l.current_col end -function ignore!{I <: String}(l::Lexer{I}) +function start_token!{I <: String}(l::Lexer{I}) l.token_startpos = position(l) - 1 l.token_start_row = l.current_row l.token_start_col = l.current_col end +""" + prevchar(l::Lexer) + +Returns the previous character. Does not change the lexer's state. +""" function prevchar(l::Lexer) backup!(l) return readchar(l) end +""" + readchar(l::Lexer) + +Returns the next character and increments the current position. +""" +function readchar end function readchar{I <: IO}(l::Lexer{I}) prevpos!(l, position(l)) @@ -130,13 +194,25 @@ function readchar{I <: String}(l::Lexer{I}) return c end +""" + backup!(l::Lexer) +Decrements the current position and sets the previous position to `-1`, unless +the previous position already is `-1`. +""" function backup!(l::Lexer) - @assert prevpos(l) != -1 + prevpos(l) == -1 && error("prevpos(l) == -1\n Cannot backup! multiple times.") seek(l, prevpos(l)) prevpos!(l, -1) end +""" + accept(l::Lexer, f::Union{Function, Char, Vector{Char}, String}) + +Consumes the next character `c` if either `f::Function(c)` returns true, `c == f` +for `c::Char` or `c in f` otherwise. Returns `true` if a character has been +consumed and `false` otherwise. +""" function accept(l::Lexer, f::Union{Function, Char, Vector{Char}, String}) c = peekchar(l) if isa(f, Function) @@ -150,6 +226,11 @@ function accept(l::Lexer, f::Union{Function, Char, Vector{Char}, String}) return ok end +""" + accept_batch(l::Lexer, f) + +Consumes all following characters until `accept(l, f)` is `false`. +""" function accept_batch(l::Lexer, f) ok = false while accept(l, f) @@ -158,36 +239,39 @@ function accept_batch(l::Lexer, f) return ok end -function emit(l::Lexer, kind::Kind, str::String) - tok = Token(kind, (l.token_start_row, l.token_start_col), - (l.current_row, l.current_col - 1), - startpos(l), position(l) - 1, - str) - @debug "emitted token: $tok:" - l.last_token = kind - ignore!(l) - return tok -end +""" + emit(l::Lexer, kind::Kind, + str::String=extract_tokenstring(l), err::TokenError=Tokens.NO_ERR) -function emit(l::Lexer, kind::Kind, err::TokenError=Tokens.UNKNOWN) - str = extract_tokenstring(l) +Returns a `Token` of kind `kind` with contents `str` and starts a new `Token`. +""" +function emit(l::Lexer, kind::Kind, + str::String=extract_tokenstring(l), err::TokenError=Tokens.NO_ERR) tok = Token(kind, (l.token_start_row, l.token_start_col), (l.current_row, l.current_col - 1), startpos(l), position(l) - 1, - str) + str, err) @debug "emitted token: $tok:" l.last_token = kind - ignore!(l) + start_token!(l) return tok end +""" + emit_error(l::Lexer, err::TokenError=Tokens.UNKNOWN) + +Returns an `ERROR` token with error `err` and starts a new `Token`. +""" function emit_error(l::Lexer, err::TokenError=Tokens.UNKNOWN) - return emit(l, Tokens.ERROR, err) + return emit(l, Tokens.ERROR, extract_tokenstring(l), err) end -# TODO, just use String mby -function extract_tokenstring{T}(l::Lexer{T}) - isstr = T <: String +""" + extract_tokenstring(l::Lexer) + +Returns all characters since the start of the current `Token` as a `String`. +""" +function extract_tokenstring(l::Lexer) cs = Char[] sizehint!(cs, position(l) - startpos(l)) curr_pos = position(l) @@ -206,28 +290,11 @@ function extract_tokenstring{T}(l::Lexer{T}) return str end -# We just consumed a " or a """ -function read_string(l::Lexer, kind::Tokens.Kind) - while true - c = readchar(l) - if c == '\\' && eof(readchar(l)) - return false - end - if c == '"' - if kind == Tokens.STRING - return true - else - if accept(l, "\"") && accept(l, "\"") - return true - end - end - elseif eof(c) - return false - end - end -end - +""" + next_token(l::Lexer) +Returns the next `Token`. +""" function next_token(l::Lexer) c = readchar(l) @@ -529,8 +596,29 @@ function lex_quote(l::Lexer) end end -# Parse a token starting with a quote. -# A '"' has been consumed +# We just consumed a " or a """ +function read_string(l::Lexer, kind::Tokens.Kind) + while true + c = readchar(l) + if c == '\\' && eof(readchar(l)) + return false + end + if c == '"' + if kind == Tokens.STRING + return true + else + if accept(l, "\"") && accept(l, "\"") + return true + end + end + elseif eof(c) + return false + end + end +end + +# Parse a token starting with a forward slash. +# A '/' has been consumed function lex_forwardslash(l::Lexer) if accept(l, "/") # // if accept(l, "=") # //= diff --git a/JuliaSyntax/src/precompile.jl b/JuliaSyntax/src/precompile.jl index db988e3a06f8b..f536b2fdb0c01 100644 --- a/JuliaSyntax/src/precompile.jl +++ b/JuliaSyntax/src/precompile.jl @@ -61,9 +61,9 @@ function _precompile_() precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{String},typeof( Base.UTF8proc.isdigit),)) precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},typeof( Base.UTF8proc.isdigit),)) precompile(Tokenize.Lexers.iswhitespace, (Char,)) - precompile(Tokenize.Lexers.ignore!, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.start_token!, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{String},typeof( Tokenize.Lexers.ishex),)) - precompile(Tokenize.Lexers.ignore!, (Tokenize.Lexers.Lexer{String},)) + precompile(Tokenize.Lexers.start_token!, (Tokenize.Lexers.Lexer{String},)) precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{String},typeof( Tokenize.Lexers.is_identifier_char),)) precompile(Tokenize.Lexers.lex_identifier, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) precompile(Tokenize.Lexers.accept_batch, (Tokenize.Lexers.Lexer{String}, Function,)) From a2aada9f72ac15e92c7be2db4ec6335093316f9f Mon Sep 17 00:00:00 2001 From: Sebastian Pfitzner Date: Tue, 13 Dec 2016 13:02:11 +0100 Subject: [PATCH 0035/1109] export `untokenize` and `Tokens` --- JuliaSyntax/src/Tokenize.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaSyntax/src/Tokenize.jl b/JuliaSyntax/src/Tokenize.jl index 639c8ec3d7104..dd182474146c8 100644 --- a/JuliaSyntax/src/Tokenize.jl +++ b/JuliaSyntax/src/Tokenize.jl @@ -8,7 +8,7 @@ include("lexer.jl") import .Lexers: tokenize import .Tokens: untokenize -export tokenize +export tokenize, untokenize, Tokens if VERSION > v"0.5-" include("precompile.jl") From ea183146df666e292832cc5ec843cd1673f7590f Mon Sep 17 00:00:00 2001 From: Sebastian Pfitzner Date: Tue, 13 Dec 2016 13:15:03 +0100 Subject: [PATCH 0036/1109] remove 0.4 support --- JuliaSyntax/src/Tokenize.jl | 6 ++---- JuliaSyntax/src/lexer.jl | 14 ++++++++------ 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/JuliaSyntax/src/Tokenize.jl b/JuliaSyntax/src/Tokenize.jl index dd182474146c8..2e87330e1d33a 100644 --- a/JuliaSyntax/src/Tokenize.jl +++ b/JuliaSyntax/src/Tokenize.jl @@ -10,9 +10,7 @@ import .Tokens: untokenize export tokenize, untokenize, Tokens -if VERSION > v"0.5-" - include("precompile.jl") - _precompile_() -end +include("precompile.jl") +_precompile_() end # module diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index 12ebca4aa5da8..96f78a0236b96 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -2,7 +2,6 @@ module Lexers include("utilities.jl") -import Base: push! using Compat import Compat.String @@ -39,14 +38,17 @@ end Lexer(io) = Lexer(io, 1, 1, -1, 0, 1, 1, 1, Tokens.ERROR) +""" + tokenize(x) -# Iterator interface +Returns an `Iterable` containing the tokenized input. Can be reverted by e.g. +`join(untokenize.(tokenize(x)))`. +""" tokenize(x) = Lexer(x) -if VERSION > v"v0.5.0-" - Base.iteratorsize(::Lexer) = Base.SizeUnknown() - Base.iteratoreltype(::Lexer) = Base.HasEltype() -end +# Iterator interface +Base.iteratorsize(::Lexer) = Base.SizeUnknown() +Base.iteratoreltype(::Lexer) = Base.HasEltype() Base.eltype(::Lexer) = Token From 47bb44052f82562c01f27418be0062da399ef628 Mon Sep 17 00:00:00 2001 From: Sebastian Pfitzner Date: Tue, 13 Dec 2016 16:21:40 +0100 Subject: [PATCH 0037/1109] correctly parse 1.12..1.32 fixes JuliaLang/JuliaSyntax.jl#5 --- JuliaSyntax/src/lexer.jl | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index 96f78a0236b96..7473740c36614 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -510,8 +510,13 @@ function lex_digit(l::Lexer) return emit(l, Tokens.INTEGER) end accept_batch(l, isdigit) - if accept(l, '.') # 3213.313.3123 is error - return emit_error(l) + if accept(l, '.') + if peekchar(l) == '.' # 1.23..3.21 is valid + backup!(l) + return emit(l, Tokens.FLOAT) + else # 3213.313.3123 is an error + return emit_error(l) + end elseif position(l) > longest # 323213.3232 candidate longest, kind = position(l), Tokens.FLOAT end From 94695ad5f07068f719c6a513193b6a7f8f4d57fe Mon Sep 17 00:00:00 2001 From: Sebastian Pfitzner Date: Wed, 14 Dec 2016 22:50:59 +0100 Subject: [PATCH 0038/1109] [WIP] Tests (JuliaLang/JuliaSyntax.jl#11) * correctly parse 1.12..1.32 fixes JuliaLang/JuliaSyntax.jl#5 * some tests * test JuliaLang/JuliaSyntax.jl#5 --- JuliaSyntax/src/lexer.jl | 2 +- JuliaSyntax/src/token.jl | 10 +-- JuliaSyntax/test/lexer.jl | 123 +++++++++++++++++++++++++++++++++++ JuliaSyntax/test/runtests.jl | 6 +- 4 files changed, 133 insertions(+), 8 deletions(-) create mode 100644 JuliaSyntax/test/lexer.jl diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index 7473740c36614..25c68328bbe6c 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -52,7 +52,7 @@ Base.iteratoreltype(::Lexer) = Base.HasEltype() Base.eltype(::Lexer) = Token -function Base.start{T}(l::Lexer{T}) +function Base.start(l::Lexer) seekstart(l) l.token_startpos = 0 l.token_start_row = 1 diff --git a/JuliaSyntax/src/token.jl b/JuliaSyntax/src/token.jl index 8426435a33a1f..0bae24ae996e5 100644 --- a/JuliaSyntax/src/token.jl +++ b/JuliaSyntax/src/token.jl @@ -37,11 +37,11 @@ _add_kws() # Error kind => description TOKEN_ERROR_DESCRIPTION = Dict{TokenError, String}( -EOF_MULTICOMMENT => "unterminated multi-line comment #= ... =#", -EOF_STRING => "unterminated string literal", -EOF_CHAR => "unterminated character literal", -EOF_CMD => "unterminated cmd literal", -UNKNOWN => "unknown", + EOF_MULTICOMMENT => "unterminated multi-line comment #= ... =#", + EOF_STRING => "unterminated string literal", + EOF_CHAR => "unterminated character literal", + EOF_CMD => "unterminated cmd literal", + UNKNOWN => "unknown", ) immutable Token diff --git a/JuliaSyntax/test/lexer.jl b/JuliaSyntax/test/lexer.jl new file mode 100644 index 0000000000000..9185704b4be98 --- /dev/null +++ b/JuliaSyntax/test/lexer.jl @@ -0,0 +1,123 @@ +using Tokenize +using Tokenize.Lexers +using Base.Test + +for s in ["a", IOBuffer("a")] + # IOBuffer indexing starts at 0, string indexing at 1 + # difference is only relevant for internals + ob1 = isa(s, IOBuffer) ? 1 : 0 + + l = tokenize(s) + @test Lexers.readchar(l) == 'a' + @test Lexers.prevpos(l) == 1 - ob1 + + @test l.current_pos == 2 - ob1 + l_old = l + @test Lexers.prevchar(l) == 'a' + @test l == l_old + @test Lexers.eof(l) + @test Lexers.readchar(l) == Lexers.EOF_CHAR + + Lexers.backup!(l) + @test Lexers.prevpos(l) == -1 + @test l.current_pos == 2 - ob1 +end + +# correctly tokenizes simple unicode expressions: +str = "𝘋 =2β" +for s in [str, IOBuffer(str)] + l = tokenize(s) + kinds = [Tokens.IDENTIFIER, Tokens.WHITESPACE, Tokens.OP, + Tokens.INTEGER, Tokens.IDENTIFIER, Tokens.ENDMARKER] + token_strs = ["𝘋", " ", "=", "2", "β", ""] + for (i, n) in enumerate(l) + @test Tokens.kind(n) == kinds[i] + @test untokenize(n) == token_strs[i] + @test Tokens.startpos(n) == (1, i) + @test Tokens.endpos(n) == (1, i - 1 + length(token_strs[i])) + end +end + +const T = Tokenize.Tokens + +# correctly tokenizes a complex piece of code +str = """ +function foo!{T<:Bar}(x::{T}=12) + @time (x+x, x+x); +end +@time x+x +y[[1 2 3]] +[1*2,2;3,4] +"string"; 'c' +(a&&b)||(a||b) +# comment +#= comment +is done here =# +2%5 +a'/b' +a.'\\b.' +`command` +12_sin(12) +{} +' +""" + +# Generate the following with +# ``` +# for t in Tokens.kind.(collect(tokenize(str))) +# print("T.", t, ",") +# end +# ``` +# and *check* it afterwards. + +kinds = [T.KEYWORD,T.WHITESPACE,T.IDENTIFIER,T.LBRACE,T.IDENTIFIER, + T.OP,T.IDENTIFIER,T.RBRACE,T.LPAREN,T.IDENTIFIER,T.OP, + T.LBRACE,T.IDENTIFIER,T.RBRACE,T.OP,T.INTEGER,T.RPAREN, + + T.WHITESPACE,T.AT_SIGN,T.IDENTIFIER,T.WHITESPACE,T.LPAREN, + T.IDENTIFIER,T.OP,T.IDENTIFIER,T.COMMA,T.WHITESPACE, + T.IDENTIFIER,T.OP,T.IDENTIFIER,T.RPAREN,T.SEMICOLON, + + T.WHITESPACE,T.KEYWORD, + + T.WHITESPACE,T.AT_SIGN,T.IDENTIFIER,T.WHITESPACE,T.IDENTIFIER, + T.OP,T.IDENTIFIER, + + T.WHITESPACE,T.IDENTIFIER,T.LSQUARE,T.LSQUARE,T.INTEGER,T.WHITESPACE, + T.INTEGER,T.WHITESPACE,T.INTEGER,T.RSQUARE,T.RSQUARE, + + T.WHITESPACE,T.LSQUARE,T.INTEGER,T.OP,T.INTEGER,T.COMMA,T.INTEGER, + T.SEMICOLON,T.INTEGER,T.COMMA,T.INTEGER,T.RSQUARE, + + T.WHITESPACE,T.STRING,T.SEMICOLON,T.WHITESPACE,T.CHAR, + + T.WHITESPACE,T.LPAREN,T.IDENTIFIER,T.OP,T.IDENTIFIER,T.RPAREN,T.OP, + T.LPAREN,T.IDENTIFIER,T.OP,T.IDENTIFIER,T.RPAREN, + + T.WHITESPACE,T.COMMENT, + + T.WHITESPACE,T.COMMENT, + + T.WHITESPACE,T.INTEGER,T.OP,T.INTEGER, + + T.WHITESPACE,T.IDENTIFIER,T.OP,T.OP,T.IDENTIFIER,T.OP, + + T.WHITESPACE,T.IDENTIFIER,T.OP,T.OP,T.OP,T.IDENTIFIER,T.OP,T.OP, + + T.WHITESPACE,T.CMD, + + T.WHITESPACE,T.INTEGER,T.IDENTIFIER,T.LPAREN,T.INTEGER,T.RPAREN, + + T.WHITESPACE,T.LBRACE,T.RBRACE, + + T.WHITESPACE,T.ERROR,T.ENDMARKER] + +for (i, n) in enumerate(tokenize(str)) + @test Tokens.kind(n) == kinds[i] +end + +# test roundtrippability +@test join(untokenize.(collect(tokenize(str)))) == str + +# test #5 +@test Tokens.kind.(collect(tokenize("1.23..3.21"))) == [T.FLOAT,T.OP,T.FLOAT,T.ENDMARKER] diff --git a/JuliaSyntax/test/runtests.jl b/JuliaSyntax/test/runtests.jl index 01e7fcd46dd5c..6518106e2b0e5 100644 --- a/JuliaSyntax/test/runtests.jl +++ b/JuliaSyntax/test/runtests.jl @@ -58,7 +58,9 @@ testall(joinpath(PKGPATH, "src")) testall(joinpath(PKGPATH, "test")) -print("Lexed ", tot_files, " files in ", @sprintf("%3.4f", tot_time), - " seconds with a total of ", tot_tokens, " tokens with ", tot_errors, " errors") +println("Lexed ", tot_files, " files in ", @sprintf("%3.4f", tot_time), + " seconds with a total of ", tot_tokens, " tokens with ", tot_errors, " errors") @test tot_errors == 0 + +include("lexer.jl") From d8f13598a43589ba5bf57023246a5653ed9e055e Mon Sep 17 00:00:00 2001 From: Michael Hatherly Date: Sun, 18 Dec 2016 10:34:31 +0200 Subject: [PATCH 0039/1109] Add 'catch' keyword (JuliaLang/JuliaSyntax.jl#13) --- JuliaSyntax/src/token_kinds.jl | 2 +- JuliaSyntax/test/lexer.jl | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/JuliaSyntax/src/token_kinds.jl b/JuliaSyntax/src/token_kinds.jl index 2f508d2ffc91f..89ade984a65f9 100644 --- a/JuliaSyntax/src/token_kinds.jl +++ b/JuliaSyntax/src/token_kinds.jl @@ -14,7 +14,7 @@ FUNCTION, GLOBAL, MACRO, QUOTE, LET, LOCAL, CONST, ABSTRACT, TYPE, BITSTYPE, IMMUTABLE, TYPEALIAS, DO, MODULE, BAREMODULE, USING, IMPORT, EXPORT, IMPORTALL, - END, FALSE, TRUE, ELSE, ELSEIF, FINALLY, + END, FALSE, TRUE, ELSE, ELSEIF, CATCH, FINALLY, end_keywords, begin_literal, diff --git a/JuliaSyntax/test/lexer.jl b/JuliaSyntax/test/lexer.jl index 9185704b4be98..a34d4ecc565aa 100644 --- a/JuliaSyntax/test/lexer.jl +++ b/JuliaSyntax/test/lexer.jl @@ -45,6 +45,11 @@ str = """ function foo!{T<:Bar}(x::{T}=12) @time (x+x, x+x); end +try + foo +catch + bar +end @time x+x y[[1 2 3]] [1*2,2;3,4] @@ -80,6 +85,12 @@ kinds = [T.KEYWORD,T.WHITESPACE,T.IDENTIFIER,T.LBRACE,T.IDENTIFIER, T.WHITESPACE,T.KEYWORD, + T.WHITESPACE,T.KEYWORD, + T.WHITESPACE,T.IDENTIFIER, + T.WHITESPACE,T.KEYWORD, + T.WHITESPACE,T.IDENTIFIER, + T.WHITESPACE,T.KEYWORD, + T.WHITESPACE,T.AT_SIGN,T.IDENTIFIER,T.WHITESPACE,T.IDENTIFIER, T.OP,T.IDENTIFIER, From f782d8773b8e1b0ebf8dbf46b81faa0e5e1af0d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kenta=20Sato=20=28=E4=BD=90=E8=97=A4=20=E5=BB=BA=E5=A4=AA?= =?UTF-8?q?=29?= Date: Mon, 19 Dec 2016 03:27:20 +0900 Subject: [PATCH 0040/1109] fix ishex (JuliaLang/JuliaSyntax.jl#15) --- JuliaSyntax/src/lexer.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index 25c68328bbe6c..1b8c46a2a03d6 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -17,7 +17,7 @@ macro debug(ex) return :() end -ishex(c::Char) = isdigit(c) || ('a' <= c <= 'f') +ishex(c::Char) = isdigit(c) || ('a' <= c <= 'f') || ('A' <= c <= 'F') iswhitespace(c::Char) = Base.UTF8proc.isspace(c) type Lexer{IO_t <: Union{IO, AbstractString}} From 780ef300112215df17e9107492cd579823f1a760 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kenta=20Sato=20=28=E4=BD=90=E8=97=A4=20=E5=BB=BA=E5=A4=AA?= =?UTF-8?q?=29?= Date: Mon, 19 Dec 2016 03:27:58 +0900 Subject: [PATCH 0041/1109] fix methods of iterator interface (JuliaLang/JuliaSyntax.jl#14) --- JuliaSyntax/src/lexer.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index 1b8c46a2a03d6..80ebc9eff70da 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -47,10 +47,10 @@ Returns an `Iterable` containing the tokenized input. Can be reverted by e.g. tokenize(x) = Lexer(x) # Iterator interface -Base.iteratorsize(::Lexer) = Base.SizeUnknown() -Base.iteratoreltype(::Lexer) = Base.HasEltype() +Base.iteratorsize{IO_t}(::Type{Lexer{IO_t}}) = Base.SizeUnknown() +Base.iteratoreltype{IO_t}(::Type{Lexer{IO_t}}) = Base.HasEltype() -Base.eltype(::Lexer) = Token +Base.eltype{IO_t}(::Type{Lexer{IO_t}}) = Token function Base.start(l::Lexer) seekstart(l) From f0f86e4ed68aede74f34df86116de569fb311126 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Sat, 24 Dec 2016 00:48:58 +0100 Subject: [PATCH 0042/1109] update build on win (JuliaLang/JuliaSyntax.jl#16) * update build on win * remove precompile stuff * fix some 32 bit issues --- JuliaSyntax/.travis.yml | 2 +- JuliaSyntax/README.md | 2 +- JuliaSyntax/appveyor.yml | 4 +- JuliaSyntax/src/Tokenize.jl | 3 -- JuliaSyntax/src/lexer.jl | 10 ++--- JuliaSyntax/src/precompile.jl | 75 ----------------------------------- 6 files changed, 9 insertions(+), 87 deletions(-) delete mode 100644 JuliaSyntax/src/precompile.jl diff --git a/JuliaSyntax/.travis.yml b/JuliaSyntax/.travis.yml index 3b361a57ef605..11496537dc964 100644 --- a/JuliaSyntax/.travis.yml +++ b/JuliaSyntax/.travis.yml @@ -4,7 +4,7 @@ os: - linux - osx julia: - - release + - 0.5 - nightly notifications: email: false diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index 3a10905e6b05e..c854ef68b0141 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -1,6 +1,6 @@ # Tokenize -[![Build Status](https://travis-ci.org/KristofferC/Tokenize.jl.svg?branch=master)](https://travis-ci.org/KristofferC/Tokenize.jl) +[![Build Status](https://travis-ci.org/KristofferC/Tokenize.jl.svg?branch=master)](https://travis-ci.org/KristofferC/Tokenize.jl) [![Build status](https://ci.appveyor.com/api/projects/status/h9d9webkxyhpx790?svg=true)](https://ci.appveyor.com/project/KristofferC/tokenize-jl) `Tokenize` is a Julia package that serves a similar purpose and API as the [tokenize module](https://docs.python.org/3/library/tokenize.html) in Python but for Julia. This is to take a string or buffer containing Julia code, perform lexical analysis and return a stream of tokens. diff --git a/JuliaSyntax/appveyor.yml b/JuliaSyntax/appveyor.yml index 83b657170b2a5..b503fc06d47b2 100644 --- a/JuliaSyntax/appveyor.yml +++ b/JuliaSyntax/appveyor.yml @@ -1,7 +1,7 @@ environment: matrix: - - JULIAVERSION: "julialang/bin/winnt/x86/0.4/julia-0.4-latest-win32.exe" - - JULIAVERSION: "julialang/bin/winnt/x64/0.4/julia-0.4-latest-win64.exe" + - JULIAVERSION: "julialang/bin/winnt/x86/0.5/julia-0.5-latest-win32.exe" + - JULIAVERSION: "julialang/bin/winnt/x64/0.5/julia-0.5-latest-win64.exe" - JULIAVERSION: "julianightlies/bin/winnt/x86/julia-latest-win32.exe" - JULIAVERSION: "julianightlies/bin/winnt/x64/julia-latest-win64.exe" diff --git a/JuliaSyntax/src/Tokenize.jl b/JuliaSyntax/src/Tokenize.jl index 2e87330e1d33a..b56dcf73a71f0 100644 --- a/JuliaSyntax/src/Tokenize.jl +++ b/JuliaSyntax/src/Tokenize.jl @@ -10,7 +10,4 @@ import .Tokens: untokenize export tokenize, untokenize, Tokens -include("precompile.jl") -_precompile_() - end # module diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index 80ebc9eff70da..d90279694f38b 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -36,7 +36,7 @@ type Lexer{IO_t <: Union{IO, AbstractString}} last_token::Tokens.Kind end -Lexer(io) = Lexer(io, 1, 1, -1, 0, 1, 1, 1, Tokens.ERROR) +Lexer(io) = Lexer(io, 1, 1, Int64(-1), Int64(0), 1, 1, Int64(1), Tokens.ERROR) """ tokenize(x) @@ -83,11 +83,11 @@ Return the latest `Token`'s starting position. startpos(l::Lexer) = l.token_startpos """ - startpos!(l::Lexer, i::Int64) + startpos!(l::Lexer, i::Integer) Set a new starting position. """ -startpos!(l::Lexer, i::Int64) = l.token_startpos = i +startpos!(l::Lexer, i::Integer) = l.token_startpos = i """ prevpos(l::Lexer) @@ -97,11 +97,11 @@ Return the lexer's previous position. prevpos(l::Lexer) = l.prevpos """ - prevpos!(l::Lexer, i::Int64) + prevpos!(l::Lexer, i::Integer) Set the lexer's previous position. """ -prevpos!(l::Lexer, i::Int64) = l.prevpos = i +prevpos!(l::Lexer, i::Integer) = l.prevpos = i Base.seekstart{I <: IO}(l::Lexer{I}) = seekstart(l.io) Base.seekstart{I <: String}(l::Lexer{I}) = seek(l, 1) diff --git a/JuliaSyntax/src/precompile.jl b/JuliaSyntax/src/precompile.jl deleted file mode 100644 index f536b2fdb0c01..0000000000000 --- a/JuliaSyntax/src/precompile.jl +++ /dev/null @@ -1,75 +0,0 @@ -function _precompile_() - ccall(:jl_generating_output, Cint, ()) == 1 || return nothing - precompile(Tokenize.Lexers.backup!, (Tokenize.Lexers.Lexer{String},)) - precompile(Tokenize.Lexers.backup!, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.is_cat_id_start, (Char, Int32,)) - precompile(Tokenize.Lexers.lex_less, (Tokenize.Lexers.Lexer{String},)) - precompile(Tokenize.Lexers.lex_digit, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.lex_less, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.is_identifier_char, (Char,)) - precompile(Tokenize.Lexers.lex_forwardslash, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.lex_digit, (Tokenize.Lexers.Lexer{String},)) - precompile(Tokenize.Lexers.start, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.readchar, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.lex_forwardslash, (Tokenize.Lexers.Lexer{String},)) - precompile(Tokenize.Lexers.readchar, (Tokenize.Lexers.Lexer{String},)) - precompile(Tokenize.Lexers.peekchar, (Base.AbstractIOBuffer{Array{UInt8, 1}},)) - precompile(Tokenize.Lexers.lex_bar, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.next_token, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},typeof( Tokenize.Lexers.iswhitespace),)) - precompile(Tokenize.Lexers.next_token, (Tokenize.Lexers.Lexer{String},)) - precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{String},typeof( Tokenize.Lexers.iswhitespace),)) - precompile(Tokenize.Lexers.start, (Tokenize.Lexers.Lexer{String},)) - precompile(Tokenize.Lexers.lex_quote, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.lex_prime, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.lex_exclaim, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.lex_bar, (Tokenize.Lexers.Lexer{String},)) - precompile(Tokenize.Lexers.lex_quote, (Tokenize.Lexers.Lexer{String},)) - precompile(Tokenize.Lexers.ishex, (Char,)) - precompile(Tokenize.Lexers.lex_prime, (Tokenize.Lexers.Lexer{String},)) - precompile(Tokenize.Lexers.lex_exclaim, (Tokenize.Lexers.Lexer{String},)) - precompile(Tokenize.Lexers.lex_dot, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.lex_equal, (Tokenize.Lexers.Lexer{String},)) - precompile(Tokenize.Lexers.lex_dot, (Tokenize.Lexers.Lexer{String},)) - precompile(Tokenize.Lexers.lex_colon, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.lex_cmd, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.lex_comment, (Tokenize.Lexers.Lexer{String},)) - precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}}, String,)) - precompile(Tokenize.Lexers.extract_tokenstring, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.extract_tokenstring, (Tokenize.Lexers.Lexer{String},)) - precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{String}, String,)) - precompile(Tokenize.Lexers.lex_comment, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.lex_equal, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.lex_cmd, (Tokenize.Lexers.Lexer{String},)) - precompile(Tokenize.Lexers.peekchar, (Tokenize.Lexers.Lexer{String},)) - precompile(Tokenize.Lexers.lex_amper, (Tokenize.Lexers.Lexer{String},)) - precompile(Tokenize.Lexers.lex_greater, (Tokenize.Lexers.Lexer{String},)) - precompile(Tokenize.Lexers.lex_greater, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.read_string, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}}, Tokenize.Tokens.Kind,)) - precompile(Tokenize.Lexers.lex_minus, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.read_string, (Tokenize.Lexers.Lexer{String}, Tokenize.Tokens.Kind,)) - precompile(Tokenize.Lexers.lex_plus, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.lex_percent, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.lex_minus, (Tokenize.Lexers.Lexer{String},)) - precompile(Tokenize.Lexers.lex_plus, (Tokenize.Lexers.Lexer{String},)) - precompile(Tokenize.Lexers.lex_colon, (Tokenize.Lexers.Lexer{String},)) - precompile(Tokenize.Lexers.lex_percent, (Tokenize.Lexers.Lexer{String},)) - precompile(Tokenize.Lexers.lex_identifier, (Tokenize.Lexers.Lexer{String},)) - precompile(Tokenize.Tokens.kind, (Tokenize.Tokens.Token,)) - precompile(Tokenize.Lexers.accept_batch, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}}, Function,)) - precompile(Tokenize.Lexers.lex_amper, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{String},typeof( Base.UTF8proc.isdigit),)) - precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},typeof( Base.UTF8proc.isdigit),)) - precompile(Tokenize.Lexers.iswhitespace, (Char,)) - precompile(Tokenize.Lexers.start_token!, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{String},typeof( Tokenize.Lexers.ishex),)) - precompile(Tokenize.Lexers.start_token!, (Tokenize.Lexers.Lexer{String},)) - precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{String},typeof( Tokenize.Lexers.is_identifier_char),)) - precompile(Tokenize.Lexers.lex_identifier, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.accept_batch, (Tokenize.Lexers.Lexer{String}, Function,)) - precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}}, Char,)) - precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},typeof( Tokenize.Lexers.ishex),)) - precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{String}, Char,)) - precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},typeof( Tokenize.Lexers.is_identifier_char),)) - precompile(Tokenize.Lexers.is_identifier_start_char, (Char,)) -end From b766c2b0431cfae4b3e6410639aded4c137e780a Mon Sep 17 00:00:00 2001 From: ZacNugent Date: Sun, 22 Jan 2017 13:58:53 +0000 Subject: [PATCH 0043/1109] fix RBITSHIFT lexing (JuliaLang/JuliaSyntax.jl#18) * fix RBITSHIFT lexing * add test --- JuliaSyntax/src/lexer.jl | 10 ++++------ JuliaSyntax/test/lexer.jl | 3 +++ 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index d90279694f38b..4533a98e12655 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -385,12 +385,10 @@ function lex_greater(l::Lexer) else # >>>?, ? not a = return emit(l, Tokens.UNSIGNED_BITSHIFT) end - else # >>? - if accept(l, '=') # >>= - return emit(l, Tokens.RBITSHIFT_EQ) - else accept(l, iswhitespace) # '>> ' - return emit(l, Tokens.RBITSHIFT) - end + elseif accept(l, '=') # >>= + return emit(l, Tokens.RBITSHIFT_EQ) + else # '>>' + return emit(l, Tokens.RBITSHIFT) end elseif accept(l, '=') # >= return emit(l, Tokens.GREATER_EQ) diff --git a/JuliaSyntax/test/lexer.jl b/JuliaSyntax/test/lexer.jl index a34d4ecc565aa..673ebc74c4ce7 100644 --- a/JuliaSyntax/test/lexer.jl +++ b/JuliaSyntax/test/lexer.jl @@ -132,3 +132,6 @@ end # test #5 @test Tokens.kind.(collect(tokenize("1.23..3.21"))) == [T.FLOAT,T.OP,T.FLOAT,T.ENDMARKER] + +# issue #17 +@test collect(tokenize(">> "))[1].val==">>" From 8991fc7f0ed0e2d7ce60aeca6db8c50f44da4a7d Mon Sep 17 00:00:00 2001 From: ZacNugent Date: Sun, 22 Jan 2017 15:19:19 +0000 Subject: [PATCH 0044/1109] restructure operators inline with parser (JuliaLang/JuliaSyntax.jl#19) * fix RBITSHIFT lexing * add etst * add missing operators and precedence structure --- JuliaSyntax/src/lexer.jl | 71 +++++++++++-- JuliaSyntax/src/token_kinds.jl | 184 ++++++++++++++++++++++----------- JuliaSyntax/test/lexer.jl | 12 +++ 3 files changed, 198 insertions(+), 69 deletions(-) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index 4533a98e12655..52be0a0a72cde 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -310,12 +310,12 @@ function next_token(l::Lexer) elseif c == '('; return emit(l, Tokens.LPAREN) elseif c == ')'; return emit(l, Tokens.RPAREN) elseif c == ','; return emit(l, Tokens.COMMA) - elseif c == '*'; return emit(l, Tokens.STAR) + elseif c == '*'; return lex_star(l); + elseif c == '^'; return lex_circumflex(l); elseif c == '@'; return emit(l, Tokens.AT_SIGN) elseif c == '?'; return emit(l, Tokens.CONDITIONAL) - elseif c == '$'; return emit(l, Tokens.EX_OR) + elseif c == '$'; return lex_xor(l); elseif c == '~'; return emit(l, Tokens.APPROX) - elseif c == '\\'; return emit(l, Tokens.BACKSLASH) elseif c == '#'; return lex_comment(l) elseif c == '='; return lex_equal(l) elseif c == '!'; return lex_exclaim(l) @@ -324,10 +324,12 @@ function next_token(l::Lexer) elseif c == ':'; return lex_colon(l) elseif c == '|'; return lex_bar(l) elseif c == '&'; return lex_amper(l) - elseif c == '\'';return lex_prime(l) + elseif c == '\''; return lex_prime(l) + elseif c == '÷'; return lex_division(l) elseif c == '"'; return lex_quote(l); elseif c == '%'; return lex_percent(l); elseif c == '/'; return lex_forwardslash(l); + elseif c == '\\'; return lex_backslash(l); elseif c == '.'; return lex_dot(l); elseif c == '+'; return lex_plus(l); elseif c == '-'; return lex_minus(l); @@ -392,6 +394,8 @@ function lex_greater(l::Lexer) end elseif accept(l, '=') # >= return emit(l, Tokens.GREATER_EQ) + elseif accept(l, ':') # >: + return emit(l, Tokens.GREATER_COLON) else # '>' return emit(l, Tokens.GREATER) end @@ -435,9 +439,11 @@ end # Lex a colon, a ':' has been consumed function lex_colon(l::Lexer) if accept(l, ':') # '::' - emit(l, Tokens.DECLARATION) + return emit(l, Tokens.DECLARATION) + elseif accept(l, '=') # ':=' + return emit(l, Tokens.COLON_EQ) else - emit(l, Tokens.COLON) + return emit(l, Tokens.COLON) end end @@ -474,15 +480,57 @@ function lex_bar(l::Lexer) end function lex_plus(l::Lexer) - accept(l, '+') && return emit(l, Tokens.PLUSPLUS) + if accept(l, '+') + return emit(l, Tokens.PLUSPLUS) + elseif accept(l, '=') + return emit(l, Tokens.PLUS_EQ) + end return emit(l, Tokens.PLUS) end function lex_minus(l::Lexer) - accept(l, '-') && return emit_error(l) # "--" is an invalid operator + if accept(l, '-') + if accept(l, '>') + return emit(l, Tokens.RIGHT_ARROW) + else + return emit_error(l) # "--" is an invalid operator + end + elseif accept(l, '=') + return emit(l, Tokens.MINUS_EQ) + end return emit(l, Tokens.MINUS) end +function lex_star(l::Lexer) + if accept(l, '*') + return emit_error(l) # "**" is an invalid operator use ^ + elseif accept(l, '=') + return emit(l, Tokens.STAR_EQ) + end + return emit(l, Tokens.STAR) +end + +function lex_circumflex(l::Lexer) + if accept(l, '=') + return emit(l, Tokens.CIRCUMFLEX_EQ) + end + return emit(l, Tokens.CIRCUMFLEX_ACCENT) +end + +function lex_division(l::Lexer) + if accept(l, '=') + return emit(l, Tokens.DIVISION_EQ) + end + return emit(l, Tokens.DIVISION_SIGN) +end + +function lex_xor(l::Lexer) + if accept(l, '=') + return emit(l, Tokens.EX_OR_EQ) + end + return emit(l, Tokens.EX_OR) +end + # A digit has been consumed function lex_digit(l::Lexer) @@ -638,6 +686,13 @@ function lex_forwardslash(l::Lexer) end end +function lex_backslash(l::Lexer) + if accept(l, '=') + return emit(l, Tokens.BACKSLASH_EQ) + end + return emit(l, Tokens.BACKSLASH) +end + # TODO .op function lex_dot(l::Lexer) if accept(l, '.') diff --git a/JuliaSyntax/src/token_kinds.jl b/JuliaSyntax/src/token_kinds.jl index 89ade984a65f9..6f7bfa9013997 100644 --- a/JuliaSyntax/src/token_kinds.jl +++ b/JuliaSyntax/src/token_kinds.jl @@ -38,71 +38,48 @@ begin_ops, OP, # general - STAR, # * - PLUS, # + - MINUS, # - - PLUSPLUS, # ++ - BACKSLASH, # \ - NOT, # ! - APPROX, # ~ - - DECLARATION, # :: - COLON, # : - PRIME, # ' - DOT,# . - DDOT, # .. - DDDOT, # ... - - LAZY_OR, # || - LAZY_AND, # && - OR, # | - AND, # & - CONDITIONAL, # ? - REM, # % - FWD_SLASH, # / - FWDFWD_SLASH, # // - TRANSPOSE, # .' - ISSUBTYPE, # <: - EX_OR, # $ + # Level 1 + begin_assignments, + EQ, # = + PLUS_EQ, # += + MINUS_EQ, # -= + STAR_EQ, # *= + FWD_SLASH_EQ, # /= + FWDFWD_SLASH_EQ, # //= + OR_EQ, # |= + CIRCUMFLEX_EQ, # ^= + DIVISION_EQ, # ÷= + REM_EQ, # %= + LBITSHIFT_EQ, # <<= + RBITSHIFT_EQ, # >>= + UNSIGNED_BITSHIFT_EQ, # >>>= + BACKSLASH_EQ, # \= + AND_EQ, # &= + COLON_EQ, # := + PAIR_ARROW, # => + APPROX, # ~ + EX_OR_EQ, # $= + end_assignments, - GREATER, # > - LESS, # < - NOT_EQ, # != - NOT_IS, # !== + # Level 2 + begin_conditional, + CONDITIONAL, # ? + end_conditional, - LPIPE, # |> - RPIPE, # <| + # Level 3 + begin_lazyor, + LAZY_OR, # || + end_lazyor, - begin_bitshifts, - LBITSHIFT, # << - RBITSHIFT, # >> - UNSIGNED_BITSHIFT, # >>> - end_bitshifts, + # Level 4 + begin_lazyand, + LAZY_AND, # && + end_lazyand, - begin_assignments, - EQ, # = - EQEQ, # == - EQEQEQ, # === - PAIR_ARROW, # => - GREATER_EQ, # >= - LESS_EQ, # <= - RBITSHIFT_EQ, # >>= - UNSIGNED_BITSHIFT_EQ, # >>>= - LBITSHIFT_EQ, # <<= - OR_EQ, # |= - AND_EQ, # &= - REM_EQ, # %= - FWD_SLASH_EQ, # /= - FWDFWD_SLASH_EQ, # //= - end_assignments, - - begin_unicode_ops, - DIVISION_SIGN, # ÷ - NOT_SIGN, # ¬ - SQUARE_ROOT, # √ - CUBE_ROOT, # ∛ - QUAD_ROOT, # ∜ + # Level 5 + begin_arrow, + RIGHT_ARROW, # --> LEFTWARDS_ARROW, # ← RIGHTWARDS_ARROW, # → LEFT_RIGHT_ARROW, # ↔ @@ -218,10 +195,25 @@ RIGHTWARDS_ARROW_ABOVE_REVERSE_TILDE_OPERATOR, # ⭌ HALFWIDTH_LEFTWARDS_ARROW, # ← HALFWIDTH_RIGHTWARDS_ARROW, # → + end_arrow, + + + # Level 6 + begin_comparison, + ISSUBTYPE, # <: + GREATER_COLON, # >: + GREATER, # > + LESS, # < + GREATER_EQ, # >= GREATER_THAN_OR_EQUAL_TO, # ≥ + LESS_EQ, # <= LESS_THAN_OR_EQUAL_TO, # ≤ + EQEQ, # == + EQEQEQ, # === IDENTICAL_TO, # ≡ + NOT_EQ, # != NOT_EQUAL_TO, # ≠ + NOT_IS, # !== NOT_IDENTICAL_TO, # ≢ ELEMENT_OF, # ∈ NOT_AN_ELEMENT_OF, # ∉ @@ -493,6 +485,26 @@ DOUBLE_LINE_SLANTED_GREATER_THAN_OR_EQUAL_TO, # ⫺ RIGHT_TACK, # ⊢ LEFT_TACK, # ⊣ + end_comparison, + + # Level 7 + begin_pipe, + LPIPE, # |> + RPIPE, # <| + end_pipe, + + # Level 8 + begin_colon, + COLON, # : + DDOT, # .. + end_colon, + + # Level 9 + begin_plus, + EX_OR, # $ + PLUS, # + + MINUS, # - + PLUSPLUS, # ++ CIRCLED_PLUS, # ⊕ CIRCLED_MINUS, # ⊖ SQUARED_PLUS, # ⊞ @@ -546,8 +558,26 @@ SMALL_VEE_WITH_UNDERBAR, # ⩡ LOGICAL_OR_WITH_DOUBLE_OVERBAR, # ⩢ LOGICAL_OR_WITH_DOUBLE_UNDERBAR, # ⩣ + end_plus, + + # Level 10 + begin_bitshifts, + LBITSHIFT, # << + RBITSHIFT, # >> + UNSIGNED_BITSHIFT, # >>> + end_bitshifts, + + # Level 11 + begin_times, + STAR, # * + FWD_SLASH, # / + DIVISION_SIGN, # ÷ + REM, # % + UNICODE_DOT, # ⋅ RING_OPERATOR, # ∘ MULTIPLICATION_SIGN, # × + BACKSLASH, # \ + AND, # & INTERSECTION, # ∩ LOGICAL_AND, # ∧ CIRCLED_TIMES, # ⊗ @@ -610,6 +640,15 @@ LOGICAL_AND_WITH_DOUBLE_UNDERBAR, # ⩠ TRANSVERSAL_INTERSECTION, # ⫛ MULTISET_MULTIPLICATION, # ⊍ + end_times, + + # Level 12 + begin_rational, + FWDFWD_SLASH, # // + end_rational, + + # Level 13 + begin_power, CIRCUMFLEX_ACCENT, # ^ UPWARDS_ARROW, # ↑ DOWNWARDS_ARROW, # ↓ @@ -641,7 +680,30 @@ DOWNWARDS_HARPOON_WITH_BARB_LEFT_BESIDE_UPWARDS_HARPOON_WITH_BARB_RIGHT, # ⥯ HALFWIDTH_UPWARDS_ARROW, # ↑ HALFWIDTH_DOWNWARDS_ARROW, # ↓ - UNICODE_DOT, # ⋅ + end_power, + + # Level 14 + begin_decl, + DECLARATION, # :: + end_decl, + + # Level 15 + begin_dot, + DOT,# . + end_dot, + + NOT, # ! + PRIME, # ' + DDDOT, # ... + OR, # | + TRANSPOSE, # .' + + + begin_unicode_ops, + NOT_SIGN, # ¬ + SQUARE_ROOT, # √ + CUBE_ROOT, # ∛ + QUAD_ROOT, # ∜ end_unicode_ops, end_ops, ) diff --git a/JuliaSyntax/test/lexer.jl b/JuliaSyntax/test/lexer.jl index 673ebc74c4ce7..4c80b7e83b916 100644 --- a/JuliaSyntax/test/lexer.jl +++ b/JuliaSyntax/test/lexer.jl @@ -135,3 +135,15 @@ end # issue #17 @test collect(tokenize(">> "))[1].val==">>" + +# test added operators +@test collect(tokenize("1+=2"))[2].kind == Tokenize.Tokens.PLUS_EQ +@test collect(tokenize("1-=2"))[2].kind == Tokenize.Tokens.MINUS_EQ +@test collect(tokenize("1:=2"))[2].kind == Tokenize.Tokens.COLON_EQ +@test collect(tokenize("1*=2"))[2].kind == Tokenize.Tokens.STAR_EQ +@test collect(tokenize("1^=2"))[2].kind == Tokenize.Tokens.CIRCUMFLEX_EQ +@test collect(tokenize("1÷=2"))[2].kind == Tokenize.Tokens.DIVISION_EQ +@test collect(tokenize("1\\=2"))[2].kind == Tokenize.Tokens.BACKSLASH_EQ +@test collect(tokenize("1\$=2"))[2].kind == Tokenize.Tokens.EX_OR_EQ +@test collect(tokenize("1-->2"))[2].kind == Tokenize.Tokens.RIGHT_ARROW +@test collect(tokenize("1>:2"))[2].kind == Tokenize.Tokens.GREATER_COLON From b2a7e9dfb0247ee48ec2d50911584c46a0578b9a Mon Sep 17 00:00:00 2001 From: ZacNugent Date: Wed, 25 Jan 2017 10:42:06 +0000 Subject: [PATCH 0045/1109] add `in` and `isa` as operators (JuliaLang/JuliaSyntax.jl#20) * add newline * add `in` and `isa` as operators * fix 0.5 test --- JuliaSyntax/src/lexer.jl | 11 +++++++++++ JuliaSyntax/src/token_kinds.jl | 2 ++ JuliaSyntax/test/lexer.jl | 12 ++++++++++++ 3 files changed, 25 insertions(+) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index 52be0a0a72cde..bccf950dd5c1e 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -334,6 +334,7 @@ function next_token(l::Lexer) elseif c == '+'; return lex_plus(l); elseif c == '-'; return lex_minus(l); elseif c == '`'; return lex_cmd(l); + elseif c == 'i'; return lex_i(l); elseif isdigit(c); return lex_digit(l) elseif is_identifier_start_char(c); return lex_identifier(l) elseif (k = get(UNICODE_OPS, c, Tokens.ERROR)) != Tokens.ERROR return emit(l, k) @@ -531,6 +532,16 @@ function lex_xor(l::Lexer) return emit(l, Tokens.EX_OR) end +function lex_i(l::Lexer) + str = lex_identifier(l) + if str.val=="in" + return emit(l, Tokens.IN, "in") + elseif (VERSION >= v"0.6.0-dev.1471" && str.val == "isa") + return emit(l, Tokens.ISA, "isa") + else + return str + end +end # A digit has been consumed function lex_digit(l::Lexer) diff --git a/JuliaSyntax/src/token_kinds.jl b/JuliaSyntax/src/token_kinds.jl index 6f7bfa9013997..ac75c25b1ab73 100644 --- a/JuliaSyntax/src/token_kinds.jl +++ b/JuliaSyntax/src/token_kinds.jl @@ -216,6 +216,8 @@ NOT_IS, # !== NOT_IDENTICAL_TO, # ≢ ELEMENT_OF, # ∈ + IN, # in + ISA, # isa NOT_AN_ELEMENT_OF, # ∉ CONTAINS_AS_MEMBER, # ∋ DOES_NOT_CONTAIN_AS_MEMBER, # ∌ diff --git a/JuliaSyntax/test/lexer.jl b/JuliaSyntax/test/lexer.jl index 4c80b7e83b916..a900e3d4a76f5 100644 --- a/JuliaSyntax/test/lexer.jl +++ b/JuliaSyntax/test/lexer.jl @@ -147,3 +147,15 @@ end @test collect(tokenize("1\$=2"))[2].kind == Tokenize.Tokens.EX_OR_EQ @test collect(tokenize("1-->2"))[2].kind == Tokenize.Tokens.RIGHT_ARROW @test collect(tokenize("1>:2"))[2].kind == Tokenize.Tokens.GREATER_COLON + +@test collect(tokenize("1 in 2"))[3].kind == Tokenize.Tokens.IN +@test collect(tokenize("1 in[1]"))[3].kind == Tokenize.Tokens.IN + +if VERSION >= v"0.6.0-dev.1471" + @test collect(tokenize("1 isa 2"))[3].kind == Tokenize.Tokens.ISA + @test collect(tokenize("1 isa[2]"))[3].kind == Tokenize.Tokens.ISA +else + @test collect(tokenize("1 isa 2"))[3].kind == Tokenize.Tokens.IDENTIFIER + @test collect(tokenize("1 isa[2]"))[3].kind == Tokenize.Tokens.IDENTIFIER +end + From 969f0f0fadcefe57b044a25d0987a044ec44d4a9 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Wed, 25 Jan 2017 13:01:48 +0100 Subject: [PATCH 0046/1109] fix deprecation (JuliaLang/JuliaSyntax.jl#21) --- JuliaSyntax/src/utilities.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaSyntax/src/utilities.jl b/JuliaSyntax/src/utilities.jl index c56d4dff40c85..3fd814a4528d3 100644 --- a/JuliaSyntax/src/utilities.jl +++ b/JuliaSyntax/src/utilities.jl @@ -139,7 +139,7 @@ function peekchar(io::IOBuffer) end # this implementation is copied from Base -const _CHTMP = Array(Char, 1) +const _CHTMP = Vector{Char}(1) peekchar(s::IOStream) = begin if ccall(:ios_peekutf8, Int32, (Ptr{Void}, Ptr{Char}), s, _CHTMP) < 0 From 8a1fd727ee895bb3bd14eefbc8f925e598c73379 Mon Sep 17 00:00:00 2001 From: Zac Nugent Date: Thu, 26 Jan 2017 08:35:02 +0000 Subject: [PATCH 0047/1109] restructure token kinds --- JuliaSyntax/src/token_kinds.jl | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/JuliaSyntax/src/token_kinds.jl b/JuliaSyntax/src/token_kinds.jl index ac75c25b1ab73..7de3ce23939db 100644 --- a/JuliaSyntax/src/token_kinds.jl +++ b/JuliaSyntax/src/token_kinds.jl @@ -10,11 +10,22 @@ begin_keywords, KEYWORD, # general - BEGIN, WHILE, IF, FOR, TRY, RETURN, BREAK, CONTINUE, - FUNCTION, GLOBAL, MACRO, QUOTE, LET, LOCAL, - CONST, ABSTRACT, TYPE, BITSTYPE, IMMUTABLE, TYPEALIAS, - DO, MODULE, BAREMODULE, USING, IMPORT, EXPORT, IMPORTALL, - END, FALSE, TRUE, ELSE, ELSEIF, CATCH, FINALLY, + begin_0arg_kw, + TRUE, FALSE, BREAK, CONTINUE, + end_0arg_kw, + begin_1arg_kw, + ABSTRACT, CONST, EXPORT, GLOBAL, LOCAL, IMPORT, IMPORTALL, RETURN,USING, + end_1arg_kw, + begin_2arg_kw, + BITSTYPE, TYPEALIAS, + end_2arg_kw, + BEGIN, QUOTE, + begin_3arg_kw, + BAREMODULE, DO, FOR, FUNCTION, IMMUTABLE, LET, MACRO, MODULE, TYPE, WHILE, + end_3arg_kw, + IF, ELSEIF, ELSE, + TRY, CATCH, FINALLY, + END, end_keywords, begin_literal, From 0172d38d88dee8a1790f1be533c2f718d43a0b8f Mon Sep 17 00:00:00 2001 From: Zac Nugent Date: Thu, 26 Jan 2017 16:45:51 +0000 Subject: [PATCH 0048/1109] make bools literals --- JuliaSyntax/src/token_kinds.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/JuliaSyntax/src/token_kinds.jl b/JuliaSyntax/src/token_kinds.jl index 7de3ce23939db..f61186c29ce91 100644 --- a/JuliaSyntax/src/token_kinds.jl +++ b/JuliaSyntax/src/token_kinds.jl @@ -11,7 +11,7 @@ begin_keywords, KEYWORD, # general begin_0arg_kw, - TRUE, FALSE, BREAK, CONTINUE, + BREAK, CONTINUE, end_0arg_kw, begin_1arg_kw, ABSTRACT, CONST, EXPORT, GLOBAL, LOCAL, IMPORT, IMPORTALL, RETURN,USING, @@ -36,6 +36,7 @@ TRIPLE_STRING, # """ foo \n """ CHAR, # 'a' CMD, # `cmd ...` + TRUE, FALSE, end_literal, begin_delimiters, From a662d2e7fdee4371bd6c06d05479d91192b2f057 Mon Sep 17 00:00:00 2001 From: Zac Nugent Date: Thu, 26 Jan 2017 17:13:04 +0000 Subject: [PATCH 0049/1109] fix lexer, add tests --- JuliaSyntax/src/lexer.jl | 12 ++++++++++++ JuliaSyntax/test/lexer.jl | 3 +++ 2 files changed, 15 insertions(+) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index bccf950dd5c1e..89a9fc289b135 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -335,6 +335,7 @@ function next_token(l::Lexer) elseif c == '-'; return lex_minus(l); elseif c == '`'; return lex_cmd(l); elseif c == 'i'; return lex_i(l); + elseif c == 't' || c == 'f'; return lex_bool(l); elseif isdigit(c); return lex_digit(l) elseif is_identifier_start_char(c); return lex_identifier(l) elseif (k = get(UNICODE_OPS, c, Tokens.ERROR)) != Tokens.ERROR return emit(l, k) @@ -543,6 +544,17 @@ function lex_i(l::Lexer) end end +function lex_bool(l::Lexer) + str = lex_identifier(l) + if str.val=="true" + return emit(l, Tokens.TRUE, "true") + elseif str.val == "false" + return emit(l, Tokens.FALSE, "false") + else + return str + end +end + # A digit has been consumed function lex_digit(l::Lexer) backup!(l) diff --git a/JuliaSyntax/test/lexer.jl b/JuliaSyntax/test/lexer.jl index a900e3d4a76f5..84ea53654e5ad 100644 --- a/JuliaSyntax/test/lexer.jl +++ b/JuliaSyntax/test/lexer.jl @@ -159,3 +159,6 @@ else @test collect(tokenize("1 isa[2]"))[3].kind == Tokenize.Tokens.IDENTIFIER end +@test collect(tokenize("somtext true"))[3].kind == Tokenize.Tokens.TRUE +@test collect(tokenize("somtext false"))[3].kind == Tokenize.Tokens.FALSE + From 58ae464dde25e6049b6a30f6d51ee4490f602f23 Mon Sep 17 00:00:00 2001 From: Zac Nugent Date: Thu, 26 Jan 2017 17:41:45 +0000 Subject: [PATCH 0050/1109] more tests --- JuliaSyntax/test/lexer.jl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/JuliaSyntax/test/lexer.jl b/JuliaSyntax/test/lexer.jl index 84ea53654e5ad..19acbb5bc2d61 100644 --- a/JuliaSyntax/test/lexer.jl +++ b/JuliaSyntax/test/lexer.jl @@ -161,4 +161,6 @@ end @test collect(tokenize("somtext true"))[3].kind == Tokenize.Tokens.TRUE @test collect(tokenize("somtext false"))[3].kind == Tokenize.Tokens.FALSE +@test collect(tokenize("somtext tr"))[3].kind == Tokenize.Tokens.IDENTIFIER +@test collect(tokenize("somtext falsething"))[3].kind == Tokenize.Tokens.IDENTIFIER From fc5db9b313509d533e1d776ca495e8c24fae0a6c Mon Sep 17 00:00:00 2001 From: Zac Nugent Date: Fri, 27 Jan 2017 11:00:04 +0000 Subject: [PATCH 0051/1109] replace Char[] w IObuffer --- JuliaSyntax/src/lexer.jl | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index 89a9fc289b135..4b84fdff54d3b 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -274,8 +274,7 @@ end Returns all characters since the start of the current `Token` as a `String`. """ function extract_tokenstring(l::Lexer) - cs = Char[] - sizehint!(cs, position(l) - startpos(l)) + cs = IOBuffer() curr_pos = position(l) seek2startpos!(l) @@ -286,9 +285,9 @@ function extract_tokenstring(l::Lexer) l.current_row += 1 l.current_col = 1 end - push!(cs, c) + write(cs, c) end - str = String(cs) + str = String(take!(cs)) return str end From c30d85a1b5f5aa0e6494ce7e1a6304df62d673a5 Mon Sep 17 00:00:00 2001 From: Zac Nugent Date: Fri, 27 Jan 2017 11:34:38 +0000 Subject: [PATCH 0052/1109] use global charstore --- JuliaSyntax/src/lexer.jl | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index 4b84fdff54d3b..664ec9540476f 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -1,6 +1,7 @@ module Lexers include("utilities.jl") +global const charstore = IOBuffer() using Compat import Compat.String @@ -274,7 +275,7 @@ end Returns all characters since the start of the current `Token` as a `String`. """ function extract_tokenstring(l::Lexer) - cs = IOBuffer() + global charstore curr_pos = position(l) seek2startpos!(l) @@ -285,9 +286,9 @@ function extract_tokenstring(l::Lexer) l.current_row += 1 l.current_col = 1 end - write(cs, c) + write(charstore, c) end - str = String(take!(cs)) + str = String(take!(charstore)) return str end From cacc5bd691f83e38927f53b1b822a07214b917c6 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Mon, 30 Jan 2017 11:37:37 +0100 Subject: [PATCH 0053/1109] Update README.md --- JuliaSyntax/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index c854ef68b0141..bef962b8d97e5 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -6,7 +6,7 @@ The goals of this package is to be -* Fast, it currently lexes all of Julia source files in less than a second (1.6 million Tokens) +* Fast, it currently lexes all of Julia source files in ~0.7 seconds (523 files, 1.8 million Tokens) * Round trippable, that is, from a stream of tokens the original string should be recoverable exactly. * Non error throwing. Instead of throwing errors a certain error token is returned. From efe1b3d710b51ba837c816d7d2c062bcdd58c452 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Mon, 30 Jan 2017 11:51:31 +0100 Subject: [PATCH 0054/1109] add untokenize for iterables of tokens --- JuliaSyntax/src/token.jl | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/JuliaSyntax/src/token.jl b/JuliaSyntax/src/token.jl index 0bae24ae996e5..2ccf6fb1d8154 100644 --- a/JuliaSyntax/src/token.jl +++ b/JuliaSyntax/src/token.jl @@ -70,6 +70,17 @@ exactkind(t::Token) = t.kind startpos(t::Token) = t.startpos endpos(t::Token) = t.endpos untokenize(t::Token) = t.val +function untokenize(ts) + if eltype(ts) != Token + throw(ArgumentError("element type of iterator has to be Token")) + end + io = IOBuffer() + for tok in ts + write(io, untokenize(tok)) + end + return String(take!(io)) +end + function Base.show(io::IO, t::Token) start_r, start_c = startpos(t) From a0b83d91c6e480975b6c08b078a082465d0331d6 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Mon, 30 Jan 2017 11:51:45 +0100 Subject: [PATCH 0055/1109] clean up testing --- JuliaSyntax/test/lex_yourself.jl | 62 +++++++ JuliaSyntax/test/lexer.jl | 282 ++++++++++++++++--------------- JuliaSyntax/test/runtests.jl | 62 +------ 3 files changed, 212 insertions(+), 194 deletions(-) create mode 100644 JuliaSyntax/test/lex_yourself.jl diff --git a/JuliaSyntax/test/lex_yourself.jl b/JuliaSyntax/test/lex_yourself.jl new file mode 100644 index 0000000000000..77d262fab689f --- /dev/null +++ b/JuliaSyntax/test/lex_yourself.jl @@ -0,0 +1,62 @@ +@testset "lex yourself" begin + +const PKGPATH = joinpath(dirname(@__FILE__), "..") + +global tot_files = 0 +global tot_time = 0.0 +global tot_tokens = 0 +global tot_errors = 0 +function testall(srcdir::AbstractString) + global tot_files, tot_time, tot_tokens, tot_errors + dirs, files = [], [] + + for fname in sort(readdir(srcdir)) + path = joinpath(srcdir, fname) + if isdir(path) + push!(dirs, path) + continue + end + _, ext = splitext(fname) + if ext == ".jl" + push!(files, path) + end + end + + if !isempty(files) + for jlpath in files + + fname = splitdir(jlpath)[end] + + buf = IOBuffer() + write(buf, open(readstring, jlpath)) + tot_files += 1 + tot_time += @elapsed tokens = collect(Tokenize.tokenize(buf)) + tot_tokens += length(tokens) + + seek(buf, 0) + str = String(take!(buf)) + + collect(Tokenize.tokenize(str)) + + for token in tokens + if Tokenize.Tokens.kind(token) == Tokenize.Tokens.ERROR + tot_errors += 1 + end + end + end + end + for dir in dirs + testall(dir) + end +end + +testall(joinpath(PKGPATH, "benchmark")) +testall(joinpath(PKGPATH, "src")) +testall(joinpath(PKGPATH, "test")) + +println("Lexed ", tot_files, " files in ", @sprintf("%3.4f", tot_time), + " seconds with a total of ", tot_tokens, " tokens with ", tot_errors, " errors") + +@test tot_errors == 0 + +end # testset \ No newline at end of file diff --git a/JuliaSyntax/test/lexer.jl b/JuliaSyntax/test/lexer.jl index 19acbb5bc2d61..fbe814a569050 100644 --- a/JuliaSyntax/test/lexer.jl +++ b/JuliaSyntax/test/lexer.jl @@ -2,165 +2,181 @@ using Tokenize using Tokenize.Lexers using Base.Test -for s in ["a", IOBuffer("a")] - # IOBuffer indexing starts at 0, string indexing at 1 - # difference is only relevant for internals - ob1 = isa(s, IOBuffer) ? 1 : 0 - - l = tokenize(s) - @test Lexers.readchar(l) == 'a' - @test Lexers.prevpos(l) == 1 - ob1 - - @test l.current_pos == 2 - ob1 - l_old = l - @test Lexers.prevchar(l) == 'a' - @test l == l_old - @test Lexers.eof(l) - @test Lexers.readchar(l) == Lexers.EOF_CHAR - - Lexers.backup!(l) - @test Lexers.prevpos(l) == -1 - @test l.current_pos == 2 - ob1 -end +const T = Tokenize.Tokens -# correctly tokenizes simple unicode expressions: -str = "𝘋 =2β" -for s in [str, IOBuffer(str)] - l = tokenize(s) - kinds = [Tokens.IDENTIFIER, Tokens.WHITESPACE, Tokens.OP, - Tokens.INTEGER, Tokens.IDENTIFIER, Tokens.ENDMARKER] - token_strs = ["𝘋", " ", "=", "2", "β", ""] - for (i, n) in enumerate(l) - @test Tokens.kind(n) == kinds[i] - @test untokenize(n) == token_strs[i] - @test Tokens.startpos(n) == (1, i) - @test Tokens.endpos(n) == (1, i - 1 + length(token_strs[i])) +@testset "tokens" begin + for s in ["a", IOBuffer("a")] + # IOBuffer indexing starts at 0, string indexing at 1 + # difference is only relevant for internals + ob1 = isa(s, IOBuffer) ? 1 : 0 + + l = tokenize(s) + @test Lexers.readchar(l) == 'a' + @test Lexers.prevpos(l) == 1 - ob1 + + @test l.current_pos == 2 - ob1 + l_old = l + @test Lexers.prevchar(l) == 'a' + @test l == l_old + @test Lexers.eof(l) + @test Lexers.readchar(l) == Lexers.EOF_CHAR + + Lexers.backup!(l) + @test Lexers.prevpos(l) == -1 + @test l.current_pos == 2 - ob1 end -end +end # testset + +@testset "tokenize unicode" begin + str = "𝘋 =2β" + for s in [str, IOBuffer(str)] + l = tokenize(s) + kinds = [T.IDENTIFIER, T.WHITESPACE, T.OP, + T.INTEGER, T.IDENTIFIER, T.ENDMARKER] + token_strs = ["𝘋", " ", "=", "2", "β", ""] + for (i, n) in enumerate(l) + @test T.kind(n) == kinds[i] + @test untokenize(n) == token_strs[i] + @test T.startpos(n) == (1, i) + @test T.endpos(n) == (1, i - 1 + length(token_strs[i])) + end + end +end # testset -const T = Tokenize.Tokens +@testset "tokenize complex piece of code" begin -# correctly tokenizes a complex piece of code -str = """ -function foo!{T<:Bar}(x::{T}=12) - @time (x+x, x+x); -end -try - foo -catch - bar -end -@time x+x -y[[1 2 3]] -[1*2,2;3,4] -"string"; 'c' -(a&&b)||(a||b) -# comment -#= comment -is done here =# -2%5 -a'/b' -a.'\\b.' -`command` -12_sin(12) -{} -' -""" + str = """ + function foo!{T<:Bar}(x::{T}=12) + @time (x+x, x+x); + end + try + foo + catch + bar + end + @time x+x + y[[1 2 3]] + [1*2,2;3,4] + "string"; 'c' + (a&&b)||(a||b) + # comment + #= comment + is done here =# + 2%5 + a'/b' + a.'\\b.' + `command` + 12_sin(12) + {} + ' + """ + + # Generate the following with + # ``` + # for t in Tokens.kind.(collect(tokenize(str))) + # print("T.", t, ",") + # end + # ``` + # and *check* it afterwards. -# Generate the following with -# ``` -# for t in Tokens.kind.(collect(tokenize(str))) -# print("T.", t, ",") -# end -# ``` -# and *check* it afterwards. + kinds = [T.KEYWORD,T.WHITESPACE,T.IDENTIFIER,T.LBRACE,T.IDENTIFIER, + T.OP,T.IDENTIFIER,T.RBRACE,T.LPAREN,T.IDENTIFIER,T.OP, + T.LBRACE,T.IDENTIFIER,T.RBRACE,T.OP,T.INTEGER,T.RPAREN, -kinds = [T.KEYWORD,T.WHITESPACE,T.IDENTIFIER,T.LBRACE,T.IDENTIFIER, - T.OP,T.IDENTIFIER,T.RBRACE,T.LPAREN,T.IDENTIFIER,T.OP, - T.LBRACE,T.IDENTIFIER,T.RBRACE,T.OP,T.INTEGER,T.RPAREN, + T.WHITESPACE,T.AT_SIGN,T.IDENTIFIER,T.WHITESPACE,T.LPAREN, + T.IDENTIFIER,T.OP,T.IDENTIFIER,T.COMMA,T.WHITESPACE, + T.IDENTIFIER,T.OP,T.IDENTIFIER,T.RPAREN,T.SEMICOLON, - T.WHITESPACE,T.AT_SIGN,T.IDENTIFIER,T.WHITESPACE,T.LPAREN, - T.IDENTIFIER,T.OP,T.IDENTIFIER,T.COMMA,T.WHITESPACE, - T.IDENTIFIER,T.OP,T.IDENTIFIER,T.RPAREN,T.SEMICOLON, + T.WHITESPACE,T.KEYWORD, - T.WHITESPACE,T.KEYWORD, + T.WHITESPACE,T.KEYWORD, + T.WHITESPACE,T.IDENTIFIER, + T.WHITESPACE,T.KEYWORD, + T.WHITESPACE,T.IDENTIFIER, + T.WHITESPACE,T.KEYWORD, - T.WHITESPACE,T.KEYWORD, - T.WHITESPACE,T.IDENTIFIER, - T.WHITESPACE,T.KEYWORD, - T.WHITESPACE,T.IDENTIFIER, - T.WHITESPACE,T.KEYWORD, + T.WHITESPACE,T.AT_SIGN,T.IDENTIFIER,T.WHITESPACE,T.IDENTIFIER, + T.OP,T.IDENTIFIER, - T.WHITESPACE,T.AT_SIGN,T.IDENTIFIER,T.WHITESPACE,T.IDENTIFIER, - T.OP,T.IDENTIFIER, + T.WHITESPACE,T.IDENTIFIER,T.LSQUARE,T.LSQUARE,T.INTEGER,T.WHITESPACE, + T.INTEGER,T.WHITESPACE,T.INTEGER,T.RSQUARE,T.RSQUARE, - T.WHITESPACE,T.IDENTIFIER,T.LSQUARE,T.LSQUARE,T.INTEGER,T.WHITESPACE, - T.INTEGER,T.WHITESPACE,T.INTEGER,T.RSQUARE,T.RSQUARE, + T.WHITESPACE,T.LSQUARE,T.INTEGER,T.OP,T.INTEGER,T.COMMA,T.INTEGER, + T.SEMICOLON,T.INTEGER,T.COMMA,T.INTEGER,T.RSQUARE, - T.WHITESPACE,T.LSQUARE,T.INTEGER,T.OP,T.INTEGER,T.COMMA,T.INTEGER, - T.SEMICOLON,T.INTEGER,T.COMMA,T.INTEGER,T.RSQUARE, + T.WHITESPACE,T.STRING,T.SEMICOLON,T.WHITESPACE,T.CHAR, - T.WHITESPACE,T.STRING,T.SEMICOLON,T.WHITESPACE,T.CHAR, + T.WHITESPACE,T.LPAREN,T.IDENTIFIER,T.OP,T.IDENTIFIER,T.RPAREN,T.OP, + T.LPAREN,T.IDENTIFIER,T.OP,T.IDENTIFIER,T.RPAREN, - T.WHITESPACE,T.LPAREN,T.IDENTIFIER,T.OP,T.IDENTIFIER,T.RPAREN,T.OP, - T.LPAREN,T.IDENTIFIER,T.OP,T.IDENTIFIER,T.RPAREN, + T.WHITESPACE,T.COMMENT, - T.WHITESPACE,T.COMMENT, + T.WHITESPACE,T.COMMENT, - T.WHITESPACE,T.COMMENT, + T.WHITESPACE,T.INTEGER,T.OP,T.INTEGER, - T.WHITESPACE,T.INTEGER,T.OP,T.INTEGER, + T.WHITESPACE,T.IDENTIFIER,T.OP,T.OP,T.IDENTIFIER,T.OP, - T.WHITESPACE,T.IDENTIFIER,T.OP,T.OP,T.IDENTIFIER,T.OP, + T.WHITESPACE,T.IDENTIFIER,T.OP,T.OP,T.OP,T.IDENTIFIER,T.OP,T.OP, - T.WHITESPACE,T.IDENTIFIER,T.OP,T.OP,T.OP,T.IDENTIFIER,T.OP,T.OP, + T.WHITESPACE,T.CMD, - T.WHITESPACE,T.CMD, + T.WHITESPACE,T.INTEGER,T.IDENTIFIER,T.LPAREN,T.INTEGER,T.RPAREN, - T.WHITESPACE,T.INTEGER,T.IDENTIFIER,T.LPAREN,T.INTEGER,T.RPAREN, + T.WHITESPACE,T.LBRACE,T.RBRACE, - T.WHITESPACE,T.LBRACE,T.RBRACE, + T.WHITESPACE,T.ERROR,T.ENDMARKER] - T.WHITESPACE,T.ERROR,T.ENDMARKER] + for (i, n) in enumerate(tokenize(str)) + @test Tokens.kind(n) == kinds[i] + end + + @testset "roundtrippability" begin + @test join(untokenize.(collect(tokenize(str)))) == str + @test untokenize(collect(tokenize(str))) == str + @test untokenize(tokenize(str)) == str + @test_throws ArgumentError untokenize("blabla") + end +end # testset -for (i, n) in enumerate(tokenize(str)) - @test Tokens.kind(n) == kinds[i] +@testset "issue 5, '..'" begin + @test Tokens.kind.(collect(tokenize("1.23..3.21"))) == [T.FLOAT,T.OP,T.FLOAT,T.ENDMARKER] end -# test roundtrippability -@test join(untokenize.(collect(tokenize(str)))) == str - -# test #5 -@test Tokens.kind.(collect(tokenize("1.23..3.21"))) == [T.FLOAT,T.OP,T.FLOAT,T.ENDMARKER] - -# issue #17 -@test collect(tokenize(">> "))[1].val==">>" - -# test added operators -@test collect(tokenize("1+=2"))[2].kind == Tokenize.Tokens.PLUS_EQ -@test collect(tokenize("1-=2"))[2].kind == Tokenize.Tokens.MINUS_EQ -@test collect(tokenize("1:=2"))[2].kind == Tokenize.Tokens.COLON_EQ -@test collect(tokenize("1*=2"))[2].kind == Tokenize.Tokens.STAR_EQ -@test collect(tokenize("1^=2"))[2].kind == Tokenize.Tokens.CIRCUMFLEX_EQ -@test collect(tokenize("1÷=2"))[2].kind == Tokenize.Tokens.DIVISION_EQ -@test collect(tokenize("1\\=2"))[2].kind == Tokenize.Tokens.BACKSLASH_EQ -@test collect(tokenize("1\$=2"))[2].kind == Tokenize.Tokens.EX_OR_EQ -@test collect(tokenize("1-->2"))[2].kind == Tokenize.Tokens.RIGHT_ARROW -@test collect(tokenize("1>:2"))[2].kind == Tokenize.Tokens.GREATER_COLON - -@test collect(tokenize("1 in 2"))[3].kind == Tokenize.Tokens.IN -@test collect(tokenize("1 in[1]"))[3].kind == Tokenize.Tokens.IN - -if VERSION >= v"0.6.0-dev.1471" - @test collect(tokenize("1 isa 2"))[3].kind == Tokenize.Tokens.ISA - @test collect(tokenize("1 isa[2]"))[3].kind == Tokenize.Tokens.ISA -else - @test collect(tokenize("1 isa 2"))[3].kind == Tokenize.Tokens.IDENTIFIER - @test collect(tokenize("1 isa[2]"))[3].kind == Tokenize.Tokens.IDENTIFIER +@testset "issue 17, >>" begin + @test collect(tokenize(">> "))[1].val==">>" end -@test collect(tokenize("somtext true"))[3].kind == Tokenize.Tokens.TRUE -@test collect(tokenize("somtext false"))[3].kind == Tokenize.Tokens.FALSE -@test collect(tokenize("somtext tr"))[3].kind == Tokenize.Tokens.IDENTIFIER -@test collect(tokenize("somtext falsething"))[3].kind == Tokenize.Tokens.IDENTIFIER +@testset "test added operators" begin + @test collect(tokenize("1+=2"))[2].kind == T.PLUS_EQ + @test collect(tokenize("1-=2"))[2].kind == T.MINUS_EQ + @test collect(tokenize("1:=2"))[2].kind == T.COLON_EQ + @test collect(tokenize("1*=2"))[2].kind == T.STAR_EQ + @test collect(tokenize("1^=2"))[2].kind == T.CIRCUMFLEX_EQ + @test collect(tokenize("1÷=2"))[2].kind == T.DIVISION_EQ + @test collect(tokenize("1\\=2"))[2].kind == T.BACKSLASH_EQ + @test collect(tokenize("1\$=2"))[2].kind == T.EX_OR_EQ + @test collect(tokenize("1-->2"))[2].kind == T.RIGHT_ARROW + @test collect(tokenize("1>:2"))[2].kind == T.GREATER_COLON +end + +@testset "infix" begin + @test collect(tokenize("1 in 2"))[3].kind == T.IN + @test collect(tokenize("1 in[1]"))[3].kind == T.IN + + if VERSION >= v"0.6.0-dev.1471" + @test collect(tokenize("1 isa 2"))[3].kind == T.ISA + @test collect(tokenize("1 isa[2]"))[3].kind == T.ISA + else + @test collect(tokenize("1 isa 2"))[3].kind == T.IDENTIFIER + @test collect(tokenize("1 isa[2]"))[3].kind == T.IDENTIFIER + end +end + +@testset "tokenizing true/false literals" begin + @test collect(tokenize("somtext true"))[3].kind == T.TRUE + @test collect(tokenize("somtext false"))[3].kind == T.FALSE + @test collect(tokenize("somtext tr"))[3].kind == T.IDENTIFIER + @test collect(tokenize("somtext falsething"))[3].kind == T.IDENTIFIER +end diff --git a/JuliaSyntax/test/runtests.jl b/JuliaSyntax/test/runtests.jl index 6518106e2b0e5..55482fe0168cb 100644 --- a/JuliaSyntax/test/runtests.jl +++ b/JuliaSyntax/test/runtests.jl @@ -2,65 +2,5 @@ using Base.Test import Tokenize -const PKGPATH = joinpath(dirname(@__FILE__), "..") - -tot_files = 0 -tot_time = 0.0 -tot_tokens = 0 -tot_errors = 0 -function testall(srcdir::AbstractString) - global tot_files, tot_time, tot_tokens, tot_errors - dirs, files = [], [] - - for fname in sort(readdir(srcdir)) - path = joinpath(srcdir, fname) - if isdir(path) - push!(dirs, path) - continue - end - _, ext = splitext(fname) - if ext == ".jl" - push!(files, path) - end - end - - if !isempty(files) - for jlpath in files - - fname = splitdir(jlpath)[end] - - buf = IOBuffer() - write(buf, open(readstring, jlpath)) - tot_files += 1 - tot_time += @elapsed tokens = collect(Tokenize.tokenize(buf)) - tot_tokens += length(tokens) - - seek(buf, 0) - str = String(take!(buf)) - - collect(Tokenize.tokenize(str)) - - for token in tokens - if Tokenize.Tokens.kind(token) == Tokenize.Tokens.ERROR - tot_errors += 1 - end - end - end - end - for dir in dirs - testall(dir) - end -end - - -testall(joinpath(PKGPATH, "benchmark")) -testall(joinpath(PKGPATH, "src")) -testall(joinpath(PKGPATH, "test")) - - -println("Lexed ", tot_files, " files in ", @sprintf("%3.4f", tot_time), - " seconds with a total of ", tot_tokens, " tokens with ", tot_errors, " errors") - -@test tot_errors == 0 - +include("lex_yourself.jl") include("lexer.jl") From e36ff1b134c10e155ed3e2cc6aff00761d44c8ad Mon Sep 17 00:00:00 2001 From: ZacNugent Date: Tue, 31 Jan 2017 11:42:52 +0000 Subject: [PATCH 0056/1109] correct order --- JuliaSyntax/src/token_kinds.jl | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/JuliaSyntax/src/token_kinds.jl b/JuliaSyntax/src/token_kinds.jl index f61186c29ce91..aab754dca4f2a 100644 --- a/JuliaSyntax/src/token_kinds.jl +++ b/JuliaSyntax/src/token_kinds.jl @@ -78,18 +78,8 @@ begin_conditional, CONDITIONAL, # ? end_conditional, - + # Level 3 - begin_lazyor, - LAZY_OR, # || - end_lazyor, - - # Level 4 - begin_lazyand, - LAZY_AND, # && - end_lazyand, - - # Level 5 begin_arrow, RIGHT_ARROW, # --> LEFTWARDS_ARROW, # ← @@ -209,6 +199,15 @@ HALFWIDTH_RIGHTWARDS_ARROW, # → end_arrow, + # Level 4 + begin_lazyor, + LAZY_OR, # || + end_lazyor, + + # Level 5 + begin_lazyand, + LAZY_AND, # && + end_lazyand, # Level 6 begin_comparison, From b23affa2a5a228c6772c62ffe4932ae406d0ca12 Mon Sep 17 00:00:00 2001 From: ZacNugent Date: Thu, 2 Feb 2017 14:34:12 +0000 Subject: [PATCH 0057/1109] Fix juxtaposed dots and function calls (JuliaLang/JuliaSyntax.jl#29) * ds * fix juxtaposed numbers --- JuliaSyntax/src/lexer.jl | 6 ++++++ JuliaSyntax/test/lexer.jl | 12 ++++++++++++ 2 files changed, 18 insertions(+) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index 664ec9540476f..3faec8a5ac737 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -577,12 +577,18 @@ function lex_digit(l::Lexer) if peekchar(l) == '.' # 43.. -> [43, ..] backup!(l) return emit(l, Tokens.INTEGER) + elseif !(isdigit(peekchar(l)) || iswhitespace(peekchar(l)) || is_identifier_start_char(peekchar(l))) + backup!(l) + return emit(l, Tokens.INTEGER) end accept_batch(l, isdigit) if accept(l, '.') if peekchar(l) == '.' # 1.23..3.21 is valid backup!(l) return emit(l, Tokens.FLOAT) + elseif !(isdigit(peekchar(l)) || iswhitespace(peekchar(l)) || is_identifier_start_char(peekchar(l))) + backup!(l) + return emit(l, Tokens.FLOAT) else # 3213.313.3123 is an error return emit_error(l) end diff --git a/JuliaSyntax/test/lexer.jl b/JuliaSyntax/test/lexer.jl index fbe814a569050..21d7e6078ea48 100644 --- a/JuliaSyntax/test/lexer.jl +++ b/JuliaSyntax/test/lexer.jl @@ -180,3 +180,15 @@ end @test collect(tokenize("somtext tr"))[3].kind == T.IDENTIFIER @test collect(tokenize("somtext falsething"))[3].kind == T.IDENTIFIER end + + +@testset "tokenizing juxtaposed numbers and dotted operators/identifiers" begin + @test (t->t.val=="1234" && t.kind == Tokens.INTEGER)(collect(tokenize("1234.+1"))[1]) + @test (t->t.val=="1234" && t.kind == Tokens.INTEGER)(collect(tokenize("1234 .+1"))[1]) + @test (t->t.val=="1234.0" && t.kind == Tokens.FLOAT)(collect(tokenize("1234.0.+1"))[1]) + @test (t->t.val=="1234.0" && t.kind == Tokens.FLOAT)(collect(tokenize("1234.0 .+1"))[1]) + @test (t->t.val=="1234." && t.kind == Tokens.FLOAT)(collect(tokenize("1234.f(a)"))[1]) + @test (t->t.val=="1234" && t.kind == Tokens.INTEGER)(collect(tokenize("1234 .f(a)"))[1]) + @test (t->t.val=="1234.0." && t.kind == Tokens.ERROR)(collect(tokenize("1234.0.f(a)"))[1]) + @test (t->t.val=="1234.0" && t.kind == Tokens.FLOAT)(collect(tokenize("1234.0 .f(a)"))[1]) +end From 20b393fd3a96f83f2a24cd355f257559752aaad4 Mon Sep 17 00:00:00 2001 From: ZacNugent Date: Thu, 2 Feb 2017 15:52:46 +0000 Subject: [PATCH 0058/1109] Anon func (JuliaLang/JuliaSyntax.jl#30) * ds * anon func def --- JuliaSyntax/src/lexer.jl | 2 ++ JuliaSyntax/src/token_kinds.jl | 1 + JuliaSyntax/test/lexer.jl | 5 +++++ 3 files changed, 8 insertions(+) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index 3faec8a5ac737..79c9845e8b45e 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -497,6 +497,8 @@ function lex_minus(l::Lexer) else return emit_error(l) # "--" is an invalid operator end + elseif accept(l, '>') + return emit(l, Tokens.ANON_FUNC) elseif accept(l, '=') return emit(l, Tokens.MINUS_EQ) end diff --git a/JuliaSyntax/src/token_kinds.jl b/JuliaSyntax/src/token_kinds.jl index aab754dca4f2a..7d5766eff8eb8 100644 --- a/JuliaSyntax/src/token_kinds.jl +++ b/JuliaSyntax/src/token_kinds.jl @@ -710,6 +710,7 @@ DDDOT, # ... OR, # | TRANSPOSE, # .' + ANON_FUNC, # -> begin_unicode_ops, diff --git a/JuliaSyntax/test/lexer.jl b/JuliaSyntax/test/lexer.jl index 21d7e6078ea48..d3effecb69c00 100644 --- a/JuliaSyntax/test/lexer.jl +++ b/JuliaSyntax/test/lexer.jl @@ -192,3 +192,8 @@ end @test (t->t.val=="1234.0." && t.kind == Tokens.ERROR)(collect(tokenize("1234.0.f(a)"))[1]) @test (t->t.val=="1234.0" && t.kind == Tokens.FLOAT)(collect(tokenize("1234.0 .f(a)"))[1]) end + + +@testset "lexing anon functions '->' " begin + @test collect(tokenize("a->b"))[2].kind==Tokens.ANON_FUNC +end From 9388b49ffe6b94f4c5f13802c3a606fe7efa275e Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Fri, 3 Feb 2017 16:39:21 +0100 Subject: [PATCH 0059/1109] use tuple instead of allocating vector (JuliaLang/JuliaSyntax.jl#33) --- JuliaSyntax/src/lexer.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index 79c9845e8b45e..fa6ee4d72167f 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -623,7 +623,7 @@ function lex_digit(l::Lexer) end function lex_prime(l) - if l.last_token ∈ [Tokens.IDENTIFIER, Tokens.DOT, Tokens.RPAREN, Tokens.RSQUARE] + if l.last_token ∈ (Tokens.IDENTIFIER, Tokens.DOT, Tokens.RPAREN, Tokens.RSQUARE) return emit(l, Tokens.PRIME) else while true From 4dc0605ec29cadbbe20556e4a384ff6a5cd95de4 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Fri, 3 Feb 2017 16:39:32 +0100 Subject: [PATCH 0060/1109] fix lex comments (JuliaLang/JuliaSyntax.jl#32) --- JuliaSyntax/src/lexer.jl | 4 ++-- JuliaSyntax/test/lexer.jl | 18 +++++++++++++++++- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index fa6ee4d72167f..488cf57fd7088 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -325,7 +325,7 @@ function next_token(l::Lexer) elseif c == '|'; return lex_bar(l) elseif c == '&'; return lex_amper(l) elseif c == '\''; return lex_prime(l) - elseif c == '÷'; return lex_division(l) + elseif c == '÷'; return lex_division(l) elseif c == '"'; return lex_quote(l); elseif c == '%'; return lex_percent(l); elseif c == '/'; return lex_forwardslash(l); @@ -351,7 +351,7 @@ function lex_whitespace(l::Lexer) end function lex_comment(l::Lexer) - if readchar(l) != '=' + if peekchar(l) != '=' while true c = readchar(l) if c == '\n' || eof(c) diff --git a/JuliaSyntax/test/lexer.jl b/JuliaSyntax/test/lexer.jl index d3effecb69c00..243f4c748eda9 100644 --- a/JuliaSyntax/test/lexer.jl +++ b/JuliaSyntax/test/lexer.jl @@ -165,7 +165,7 @@ end @test collect(tokenize("1 in 2"))[3].kind == T.IN @test collect(tokenize("1 in[1]"))[3].kind == T.IN - if VERSION >= v"0.6.0-dev.1471" + if VERSION >= v"0.6.0-dev.1471" @test collect(tokenize("1 isa 2"))[3].kind == T.ISA @test collect(tokenize("1 isa[2]"))[3].kind == T.ISA else @@ -197,3 +197,19 @@ end @testset "lexing anon functions '->' " begin @test collect(tokenize("a->b"))[2].kind==Tokens.ANON_FUNC end + +@testset "comments" begin + toks = collect(tokenize(""" + # + \"\"\" + f + \"\"\" + 1 + """)) + + kinds = [T.COMMENT, T.WHITESPACE, + T.TRIPLE_STRING, T.WHITESPACE, + T.INTEGER, T.WHITESPACE, + T.ENDMARKER] + @test T.kind.(toks) == kinds +end From fc4cd417cac5b1e7b05ebb6e14a9e3ced081eb20 Mon Sep 17 00:00:00 2001 From: ZacNugent Date: Fri, 3 Feb 2017 17:36:03 +0000 Subject: [PATCH 0061/1109] fix prime handling (JuliaLang/JuliaSyntax.jl#34) * fix prime handling * speedup * tests * typo * another typo --- JuliaSyntax/src/lexer.jl | 8 +++++++- JuliaSyntax/test/lexer.jl | 13 +++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index 488cf57fd7088..04e72d68d92e3 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -623,9 +623,15 @@ function lex_digit(l::Lexer) end function lex_prime(l) - if l.last_token ∈ (Tokens.IDENTIFIER, Tokens.DOT, Tokens.RPAREN, Tokens.RSQUARE) + if l.last_token == Tokens.IDENTIFIER || + l.last_token == Tokens.DOT || + l.last_token == Tokens.RPAREN || + l.last_token == Tokens.RSQUARE || l.last_token == Tokens.PRIME return emit(l, Tokens.PRIME) else + if peekchar(l)=='\'' + return emit(l, Tokens.PRIME) + end while true c = readchar(l) if eof(c) diff --git a/JuliaSyntax/test/lexer.jl b/JuliaSyntax/test/lexer.jl index 243f4c748eda9..7e11934b697e4 100644 --- a/JuliaSyntax/test/lexer.jl +++ b/JuliaSyntax/test/lexer.jl @@ -213,3 +213,16 @@ end T.ENDMARKER] @test T.kind.(toks) == kinds end + + +@testset "primes" begin + tokens = collect(tokenize( + """ + ImageMagick.save(fn, reinterpret(ARGB32, [0xf0884422]'')) + D = ImageMagick.load(fn) + """)) + @test tokens[16].val==tokens[17].val=="'" + @test all(x->x.val=="'", collect(tokenize("''"))[1:2]) + @test all(x->x.val=="'", collect(tokenize("'''"))[1:3]) + @test all(x->x.val=="'", collect(tokenize("''''"))[1:4]) +end \ No newline at end of file From 1ce03d427e752e9c814bd81e7b367e9164789fa4 Mon Sep 17 00:00:00 2001 From: ZacNugent Date: Wed, 8 Feb 2017 06:34:28 +0000 Subject: [PATCH 0062/1109] fix lex_bool (JuliaLang/JuliaSyntax.jl#35) * fix lex_bool * add test --- JuliaSyntax/src/lexer.jl | 14 ++++++++++++-- JuliaSyntax/test/lexer.jl | 2 ++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index 04e72d68d92e3..ae187765627f6 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -549,9 +549,19 @@ end function lex_bool(l::Lexer) str = lex_identifier(l) if str.val=="true" - return emit(l, Tokens.TRUE, "true") + l.last_token = Tokens.TRUE + start_token!(l) + return Token(Tokens.TRUE, str.startpos, + str.endpos, + str.startbyte, str.endbyte, + str.val, str.token_error) elseif str.val == "false" - return emit(l, Tokens.FALSE, "false") + l.last_token = Tokens.FALSE + start_token!(l) + return Token(Tokens.FALSE, str.startpos, + str.endpos, + str.startbyte, str.endbyte, + str.val, str.token_error) else return str end diff --git a/JuliaSyntax/test/lexer.jl b/JuliaSyntax/test/lexer.jl index 7e11934b697e4..d6add87744aaf 100644 --- a/JuliaSyntax/test/lexer.jl +++ b/JuliaSyntax/test/lexer.jl @@ -137,6 +137,8 @@ end # testset @test untokenize(tokenize(str)) == str @test_throws ArgumentError untokenize("blabla") end + + @test all((t.endbyte - t.startbyte)==sizeof(t.val) for t in tokenize(str)) end # testset @testset "issue 5, '..'" begin From 37d54b4ee5e91659762295cf8942e96479da14be Mon Sep 17 00:00:00 2001 From: ZacNugent Date: Wed, 8 Feb 2017 16:21:49 +0000 Subject: [PATCH 0063/1109] Fix lex i (JuliaLang/JuliaSyntax.jl#36) * fix lex_i * fix test --- JuliaSyntax/src/lexer.jl | 14 ++++++++++++-- JuliaSyntax/test/lexer.jl | 5 +++++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index ae187765627f6..03f2a5a31fc93 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -538,9 +538,19 @@ end function lex_i(l::Lexer) str = lex_identifier(l) if str.val=="in" - return emit(l, Tokens.IN, "in") + l.last_token = Tokens.IN + start_token!(l) + return Token(Tokens.IN, str.startpos, + str.endpos, + str.startbyte, str.endbyte, + str.val, str.token_error) elseif (VERSION >= v"0.6.0-dev.1471" && str.val == "isa") - return emit(l, Tokens.ISA, "isa") + l.last_token = Tokens.ISA + start_token!(l) + return Token(Tokens.ISA, str.startpos, + str.endpos, + str.startbyte, str.endbyte, + str.val, str.token_error) else return str end diff --git a/JuliaSyntax/test/lexer.jl b/JuliaSyntax/test/lexer.jl index d6add87744aaf..775d9794b1343 100644 --- a/JuliaSyntax/test/lexer.jl +++ b/JuliaSyntax/test/lexer.jl @@ -227,4 +227,9 @@ end @test all(x->x.val=="'", collect(tokenize("''"))[1:2]) @test all(x->x.val=="'", collect(tokenize("'''"))[1:3]) @test all(x->x.val=="'", collect(tokenize("''''"))[1:4]) +end + +@testset "in/isa bytelength" begin + t = collect(tokenize("x in y"))[3] + @test t.endbyte-t.startbyte==2 end \ No newline at end of file From 03f400dc6469ab480bc90f78ce6044b5892ee87d Mon Sep 17 00:00:00 2001 From: ZacNugent Date: Wed, 8 Feb 2017 20:35:27 +0000 Subject: [PATCH 0064/1109] Move OR, remove VERTICAL_LINE (JuliaLang/JuliaSyntax.jl#37) * remove 2nd instance of `|` * fix --- JuliaSyntax/src/token_kinds.jl | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/JuliaSyntax/src/token_kinds.jl b/JuliaSyntax/src/token_kinds.jl index 7d5766eff8eb8..616b1663f89a5 100644 --- a/JuliaSyntax/src/token_kinds.jl +++ b/JuliaSyntax/src/token_kinds.jl @@ -522,7 +522,7 @@ CIRCLED_MINUS, # ⊖ SQUARED_PLUS, # ⊞ SQUARED_MINUS, # ⊟ - VERTICAL_LINE, # | + OR, # | UNION, # ∪ LOGICAL_OR, # ∨ SQUARE_CUP, # ⊔ @@ -708,7 +708,6 @@ NOT, # ! PRIME, # ' DDDOT, # ... - OR, # | TRANSPOSE, # .' ANON_FUNC, # -> @@ -1123,7 +1122,7 @@ const UNICODE_OPS = Dict{Char, Kind}( '⊖' => CIRCLED_MINUS, '⊞' => SQUARED_PLUS, '⊟' => SQUARED_MINUS, -'|' => VERTICAL_LINE, +'|' => OR, '∪' => UNION, '∨' => LOGICAL_OR, '⊔' => SQUARE_CUP, From d7bbe73afbfd1519c3f761fc1b56b0cb36161f14 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Thu, 9 Feb 2017 22:09:45 +0100 Subject: [PATCH 0065/1109] always use emit to emit tokens (JuliaLang/JuliaSyntax.jl#40) --- JuliaSyntax/src/lexer.jl | 47 +++++++++++++--------------------------- 1 file changed, 15 insertions(+), 32 deletions(-) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index 03f2a5a31fc93..971e6866db4e3 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -536,47 +536,30 @@ function lex_xor(l::Lexer) end function lex_i(l::Lexer) - str = lex_identifier(l) - if str.val=="in" - l.last_token = Tokens.IN - start_token!(l) - return Token(Tokens.IN, str.startpos, - str.endpos, - str.startbyte, str.endbyte, - str.val, str.token_error) - elseif (VERSION >= v"0.6.0-dev.1471" && str.val == "isa") - l.last_token = Tokens.ISA - start_token!(l) - return Token(Tokens.ISA, str.startpos, - str.endpos, - str.startbyte, str.endbyte, - str.val, str.token_error) + accept_batch(l, is_identifier_char) + str = extract_tokenstring(l) + if str == "in" + return emit(l, Tokens.IN) + elseif (VERSION >= v"0.6.0-dev.1471" && str == "isa") + return emit(l, Tokens.ISA) else - return str + return emit(l, get(Tokens.KEYWORDS, str, Tokens.IDENTIFIER), str) end end function lex_bool(l::Lexer) - str = lex_identifier(l) - if str.val=="true" - l.last_token = Tokens.TRUE - start_token!(l) - return Token(Tokens.TRUE, str.startpos, - str.endpos, - str.startbyte, str.endbyte, - str.val, str.token_error) - elseif str.val == "false" - l.last_token = Tokens.FALSE - start_token!(l) - return Token(Tokens.FALSE, str.startpos, - str.endpos, - str.startbyte, str.endbyte, - str.val, str.token_error) + accept_batch(l, is_identifier_char) + str = extract_tokenstring(l) + if str == "true" + return emit(l, Tokens.TRUE) + elseif str == "false" + return emit(l, Tokens.FALSE) else - return str + return emit(l, get(Tokens.KEYWORDS, str, Tokens.IDENTIFIER), str) end end + # A digit has been consumed function lex_digit(l::Lexer) backup!(l) From 1202d027df8b85ed35e0dd995616c712f15930c5 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Fri, 10 Feb 2017 09:27:09 +0100 Subject: [PATCH 0066/1109] static if on version (JuliaLang/JuliaSyntax.jl#42) --- JuliaSyntax/src/lexer.jl | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index 971e6866db4e3..9cc2024f75a71 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -538,13 +538,11 @@ end function lex_i(l::Lexer) accept_batch(l, is_identifier_char) str = extract_tokenstring(l) - if str == "in" - return emit(l, Tokens.IN) - elseif (VERSION >= v"0.6.0-dev.1471" && str == "isa") - return emit(l, Tokens.ISA) - else - return emit(l, get(Tokens.KEYWORDS, str, Tokens.IDENTIFIER), str) + str == "in" && return emit(l, Tokens.IN) + @static if VERSION >= v"0.6.0-dev.1471" + str == "isa" && return emit(l, Tokens.ISA) end + return emit(l, get(Tokens.KEYWORDS, str, Tokens.IDENTIFIER), str) end function lex_bool(l::Lexer) From e6a48918268e093a6e4447813f7778d9577012b5 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Fri, 10 Feb 2017 10:57:18 +0100 Subject: [PATCH 0067/1109] only use buffer internally (JuliaLang/JuliaSyntax.jl#43) --- JuliaSyntax/src/lexer.jl | 47 +++++++++------------------------------ JuliaSyntax/test/lexer.jl | 14 +++++------- 2 files changed, 16 insertions(+), 45 deletions(-) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index 9cc2024f75a71..7f43294b70754 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -21,7 +21,7 @@ end ishex(c::Char) = isdigit(c) || ('a' <= c <= 'f') || ('A' <= c <= 'F') iswhitespace(c::Char) = Base.UTF8proc.isspace(c) -type Lexer{IO_t <: Union{IO, AbstractString}} +type Lexer{IO_t <: IO} io::IO_t token_start_row::Int @@ -38,6 +38,7 @@ type Lexer{IO_t <: Union{IO, AbstractString}} end Lexer(io) = Lexer(io, 1, 1, Int64(-1), Int64(0), 1, 1, Int64(1), Tokens.ERROR) +Lexer(str::AbstractString) = Lexer(IOBuffer(str)) """ tokenize(x) @@ -73,7 +74,7 @@ end Base.done(l::Lexer, isdone) = isdone function Base.show(io::IO, l::Lexer) - println(io, "Lexer at position: ", position(l)) + print(io, typeof(l), " at position: ", position(l)) end """ @@ -104,47 +105,37 @@ Set the lexer's previous position. """ prevpos!(l::Lexer, i::Integer) = l.prevpos = i -Base.seekstart{I <: IO}(l::Lexer{I}) = seekstart(l.io) -Base.seekstart{I <: String}(l::Lexer{I}) = seek(l, 1) +Base.seekstart(l::Lexer) = seekstart(l.io) """ seek2startpos!(l::Lexer) Sets the lexer's current position to the beginning of the latest `Token`. """ -function seek2startpos! end -seek2startpos!{I <: IO}(l::Lexer{I}) = seek(l, startpos(l)) -seek2startpos!{I <: String}(l::Lexer{I}) = seek(l, startpos(l) + 1) +seek2startpos!(l::Lexer) = seek(l, startpos(l)) """ peekchar(l::Lexer) Returns the next character without changing the lexer's state. """ -function peekchar end -peekchar{I <: IO}(l::Lexer{I}) = peekchar(l.io) -peekchar{I <: String}(l::Lexer{I}) = eof(l) ? EOF_CHAR : l.io[position(l)] +peekchar(l::Lexer) = peekchar(l.io) """ position(l::Lexer) Returns the current position. """ -function position end -position{I <: String}(l::Lexer{I}) = l.current_pos -position{I <: IO}(l::Lexer{I}) = Base.position(l.io) +position(l::Lexer) = Base.position(l.io) """ eof(l::Lexer) -Determine whether the end of the lexer's underlying buffer or string has been reached. +Determine whether the end of the lexer's underlying buffer has been reached. """ -function eof end -eof{I <: IO}(l::Lexer{I}) = eof(l.io) -eof{I <: String}(l::Lexer{I}) = position(l) > sizeof(l.io) +eof(l::Lexer) = eof(l.io) -Base.seek{I <: IO}(l::Lexer{I}, pos) = seek(l.io, pos) -Base.seek{I <: String}(l::Lexer{I}, pos) = l.current_pos = pos +Base.seek(l::Lexer, pos) = seek(l.io, pos) """ start_token!(l::Lexer) @@ -152,20 +143,12 @@ Base.seek{I <: String}(l::Lexer{I}, pos) = l.current_pos = pos Updates the lexer's state such that the next `Token` will start at the current position. """ -function start_token! end - -function start_token!{I <: IO}(l::Lexer{I}) +function start_token!(l::Lexer) l.token_startpos = position(l) l.token_start_row = l.current_row l.token_start_col = l.current_col end -function start_token!{I <: String}(l::Lexer{I}) - l.token_startpos = position(l) - 1 - l.token_start_row = l.current_row - l.token_start_col = l.current_col -end - """ prevchar(l::Lexer) @@ -189,14 +172,6 @@ function readchar{I <: IO}(l::Lexer{I}) return c end -function readchar{I <: String}(l::Lexer{I}) - prevpos!(l, position(l)) - eof(l) && return EOF_CHAR - c = l.io[position(l)] - l.current_pos = nextind(l.io, position(l)) - return c -end - """ backup!(l::Lexer) diff --git a/JuliaSyntax/test/lexer.jl b/JuliaSyntax/test/lexer.jl index 775d9794b1343..80f1760360ce0 100644 --- a/JuliaSyntax/test/lexer.jl +++ b/JuliaSyntax/test/lexer.jl @@ -6,15 +6,11 @@ const T = Tokenize.Tokens @testset "tokens" begin for s in ["a", IOBuffer("a")] - # IOBuffer indexing starts at 0, string indexing at 1 - # difference is only relevant for internals - ob1 = isa(s, IOBuffer) ? 1 : 0 - l = tokenize(s) @test Lexers.readchar(l) == 'a' - @test Lexers.prevpos(l) == 1 - ob1 + @test Lexers.prevpos(l) == 0 - @test l.current_pos == 2 - ob1 + @test l.current_pos == 1 l_old = l @test Lexers.prevchar(l) == 'a' @test l == l_old @@ -23,7 +19,7 @@ const T = Tokenize.Tokens Lexers.backup!(l) @test Lexers.prevpos(l) == -1 - @test l.current_pos == 2 - ob1 + @test l.current_pos == 1 end end # testset @@ -138,7 +134,7 @@ end # testset @test_throws ArgumentError untokenize("blabla") end - @test all((t.endbyte - t.startbyte)==sizeof(t.val) for t in tokenize(str)) + @test all((t.endbyte - t.startbyte + 1)==sizeof(t.val) for t in tokenize(str)) end # testset @testset "issue 5, '..'" begin @@ -231,5 +227,5 @@ end @testset "in/isa bytelength" begin t = collect(tokenize("x in y"))[3] - @test t.endbyte-t.startbyte==2 + @test t.endbyte - t.startbyte + 1 == 2 end \ No newline at end of file From 4da19959b7d7d7d73f27e5d05a8b4fa245565bc6 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Fri, 10 Feb 2017 17:28:54 +0100 Subject: [PATCH 0068/1109] lex identifiers faster (JuliaLang/JuliaSyntax.jl#45) * lex identifiers faster * remove special case lexing first chars * fix version check on ISA * fix * add tests and make function a bit smaller * add test * add missing returns --- JuliaSyntax/src/lexer.jl | 257 +++++++++++++++++++++++++++++++++++++- JuliaSyntax/test/lexer.jl | 45 +++++++ 2 files changed, 298 insertions(+), 4 deletions(-) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index 7f43294b70754..3720fa305dd75 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -9,6 +9,11 @@ import Compat.String import ..Tokens import ..Tokens: Token, Kind, TokenError, UNICODE_OPS +import ..Tokens: FUNCTION, ABSTRACT, IDENTIFIER, BAREMODULE, BEGIN, BITSTYPE, BREAK, CATCH, CONST, CONTINUE, + DO, ELSE, ELSEIF, END, EXPORT, FALSE, FINALLY, FOR, FUNCTION, GLOBAL, LET, LOCAL, IF, IMMUTABLE, + IMPORT, IMPORTALL, MACRO, MODULE, QUOTE, RETURN, TRUE, TRY, TYPE, TYPEALIAS, USING, WHILE, ISA, IN + + export tokenize # using Logging @@ -51,7 +56,6 @@ tokenize(x) = Lexer(x) # Iterator interface Base.iteratorsize{IO_t}(::Type{Lexer{IO_t}}) = Base.SizeUnknown() Base.iteratoreltype{IO_t}(::Type{Lexer{IO_t}}) = Base.HasEltype() - Base.eltype{IO_t}(::Type{Lexer{IO_t}}) = Token function Base.start(l::Lexer) @@ -309,10 +313,8 @@ function next_token(l::Lexer) elseif c == '+'; return lex_plus(l); elseif c == '-'; return lex_minus(l); elseif c == '`'; return lex_cmd(l); - elseif c == 'i'; return lex_i(l); - elseif c == 't' || c == 'f'; return lex_bool(l); elseif isdigit(c); return lex_digit(l) - elseif is_identifier_start_char(c); return lex_identifier(l) + elseif is_identifier_start_char(c); return lex_identifier(l, c) elseif (k = get(UNICODE_OPS, c, Tokens.ERROR)) != Tokens.ERROR return emit(l, k) else emit_error(l) end @@ -731,4 +733,251 @@ function lex_cmd(l::Lexer) end end +function tryread(l, str, k) + for s in str + c = readchar(l) + if c!=s + if !is_identifier_char(c) + backup!(l) + return emit(l, IDENTIFIER) + end + accept_batch(l, is_identifier_char) + return emit(l, IDENTIFIER) + end + end + if is_identifier_char(peekchar(l)) + accept_batch(l, is_identifier_char) + return emit(l, IDENTIFIER) + end + return emit(l, k) +end + +function readrest(l) + accept_batch(l, is_identifier_char) + return emit(l, IDENTIFIER) +end + + +function _doret(c, l) + if !is_identifier_char(c) + backup!(l) + return emit(l, IDENTIFIER) + else + return readrest(l) + end +end + +function lex_identifier(l, c) + if c == 'a' + return tryread(l, ('b', 's', 't', 'r', 'a', 'c', 't'), ABSTRACT) + elseif c == 'b' + c = readchar(l) + if c == 'a' + return tryread(l, ('r', 'e', 'm', 'o', 'd', 'u', 'l', 'e'), BAREMODULE) + elseif c == 'e' + return tryread(l, ('g', 'i', 'n'), BEGIN) + elseif c == 'i' + return tryread(l, ('t', 's', 't', 'y', 'p', 'e'), BITSTYPE) + elseif c == 'r' + return tryread(l, ('e', 'a', 'k'), BREAK) + else + return _doret(c, l) + end + elseif c == 'c' + c = readchar(l) + if c == 'a' + return tryread(l, ('t', 'c', 'h'), CATCH) + elseif c == 'o' + c = readchar(l) + if c == 'n' + c = readchar(l) + if c == 's' + return tryread(l, ('t',), CONST) + elseif c == 't' + return tryread(l, ('i', 'n', 'u', 'e'), CONTINUE) + else + return _doret(c, l) + end + else + return _doret(c, l) + end + else + return _doret(c, l) + end + elseif c == 'd' + return tryread(l, ('o'), DO) + elseif c == 'e' + c = readchar(l) + if c == 'l' + c = readchar(l) + if c == 's' + c = readchar(l) + if c == 'e' + c = readchar(l) + if !is_identifier_char(c) + backup!(l) + return emit(l, ELSE) + elseif c == 'i' + return tryread(l, ('f'), ELSEIF) + else + return _doret(c, l) + end + else + return _doret(c, l) + end + else + return _doret(c, l) + end + elseif c == 'n' + return tryread(l, ('d'), END) + elseif c == 'x' + return tryread(l, ('p', 'o', 'r', 't'), EXPORT) + else + return _doret(c, l) + end + elseif c == 'f' + c = readchar(l) + if c == 'a' + return tryread(l, ('l', 's', 'e'), FALSE) + elseif c == 'i' + return tryread(l, ('n', 'a', 'l', 'l', 'y'), FINALLY) + elseif c == 'o' + return tryread(l, ('r'), FOR) + elseif c == 'u' + return tryread(l, ('n', 'c', 't', 'i', 'o', 'n'), FUNCTION) + else + return _doret(c, l) + end + elseif c == 'g' + return tryread(l, ('l', 'o', 'b', 'a', 'l'), GLOBAL) + elseif c == 'i' + c = readchar(l) + if c == 'f' + c = readchar(l) + if !is_identifier_char(c) + skip(l.io, -Int(!eof(c))) + return emit(l, IF) + else + return readrest(l) + end + elseif c == 'm' + c = readchar(l) + if c == 'm' + return tryread(l, ('u', 't', 'a', 'b', 'l', 'e'), IMMUTABLE) + elseif c == 'p' + c = readchar(l) + if c == 'o' + c = readchar(l) + if c == 'r' + c = readchar(l) + if c == 't' + c = readchar(l) + if !is_identifier_char(c) + skip(l.io, -Int(!eof(c))) + return emit(l, IMPORT) + elseif c == 'a' + return tryread(l, ('l','l'), IMPORTALL) + else + return _doret(c, l) + end + else + return _doret(c, l) + end + else + return _doret(c, l) + end + else + return _doret(c, l) + end + else + return _doret(c, l) + end + elseif c == 'n' + c = readchar(l) + if !is_identifier_char(c) + skip(l.io, -Int(!eof(c))) + return emit(l, IN) + else + return readrest(l) + end + elseif (@static VERSION >= v"0.6.0-dev.1471" ? true : false) && c == 's' + return tryread(l, ('a'), ISA) + else + return _doret(c, l) + end + elseif c == 'l' + c = readchar(l) + if c == 'e' + return tryread(l, ('t'), LET) + elseif c == 'o' + return tryread(l, ('c', 'a', 'l'), LOCAL) + else + return _doret(c, l) + end + elseif c == 'm' + c = readchar(l) + if c == 'a' + return tryread(l, ('c', 'r', 'o'), MACRO) + elseif c == 'o' + return tryread(l, ('d', 'u', 'l', 'e'), MODULE) + else + return _doret(c, l) + end + elseif c == 'q' + return tryread(l, ('u', 'o', 't', 'e'), QUOTE) + elseif c == 'r' + return tryread(l, ('e', 't', 'u', 'r', 'n'), RETURN) + elseif c == 't' + c = readchar(l) + if c == 'r' + c = readchar(l) + if c == 'u' + return tryread(l, ('e'), TRUE) + elseif c == 'y' + return emit(l, TRY) + else + if !is_identifier_char(c) + backup!(l) + return emit(l, IDENTIFIER) + else + return readrest(l) + end + end + elseif c == 'y' + c = readchar(l) + if c == 'p' + c = readchar(l) + if c == 'e' + c = readchar(l) + if !is_identifier_char(c) + backup!(l) + return emit(l, TYPE) + elseif c == 'a' + return tryread(l, ('l', 'i', 'a', 's'), TYPEALIAS) + else + return _doret(c, l) + end + else + return _doret(c, l) + end + else + return _doret(c, l) + end + else + return _doret(c, l) + end + elseif c == 'u' + return tryread(l, ('s', 'i', 'n', 'g'), USING) + elseif c == 'w' + return tryread(l, ('h', 'i', 'l', 'e'), WHILE) + else + if !is_identifier_char(c) + backup!(l) + return emit(l, IDENTIFIER) + else + return readrest(l) + end + end +end + end # module diff --git a/JuliaSyntax/test/lexer.jl b/JuliaSyntax/test/lexer.jl index 80f1760360ce0..1ae005557e6df 100644 --- a/JuliaSyntax/test/lexer.jl +++ b/JuliaSyntax/test/lexer.jl @@ -228,4 +228,49 @@ end @testset "in/isa bytelength" begin t = collect(tokenize("x in y"))[3] @test t.endbyte - t.startbyte + 1 == 2 +end + +@testset "keywords" begin + for kw in ["function", + "abstract", + "baremodule", + "begin", + "bitstype", + "break", + "catch", + "const", + "continue", + "do", + "else", + "elseif", + "end", + "export", + #"false", + "finally", + "for", + "function", + "global", + "let", + "local", + "if", + "immutable", + "import", + "importall", + "macro", + "module", + "quote", + "return", + #"true", + "try", + "type", + "typealias", + "using", + "while"] + + @test T.kind(collect(tokenize(kw))[1]) == T.KEYWORD + end +end + +@testset "issue in PR #45" begin + @test length(collect(tokenize("x)")))==3 end \ No newline at end of file From 3e258f5aa6a8130980708baeee85c8a799a9e261 Mon Sep 17 00:00:00 2001 From: ZacNugent Date: Fri, 10 Feb 2017 20:16:22 +0000 Subject: [PATCH 0069/1109] add invisble keywords (JuliaLang/JuliaSyntax.jl#46) * add invisble keywords * add ccall --- JuliaSyntax/src/token_kinds.jl | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/JuliaSyntax/src/token_kinds.jl b/JuliaSyntax/src/token_kinds.jl index 616b1663f89a5..545f6154f3a2a 100644 --- a/JuliaSyntax/src/token_kinds.jl +++ b/JuliaSyntax/src/token_kinds.jl @@ -28,6 +28,26 @@ END, end_keywords, + begin_invisble_keywords, + BLOCK, + CALL, + CCALL, + COMPARISON, + COMPREHENSION, + CURLY, + GENERATOR, + KW, + LINE, + MACROCALL, + PARAMETERS, + REF, + TOPLEVEL, + TUPLE, + TYPED_COMPREHENSION, + VCAT, + VECT, + end_invisble_keywords, + begin_literal, LITERAL, # general INTEGER, # 4 From 116951ea036b17402d60926a2cc783d78a5b7c1c Mon Sep 17 00:00:00 2001 From: ZacNugent Date: Sat, 11 Feb 2017 09:46:51 +0000 Subject: [PATCH 0070/1109] identifier fixes (JuliaLang/JuliaSyntax.jl#48) --- JuliaSyntax/src/lexer.jl | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index 3720fa305dd75..cf4aed398914d 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -855,7 +855,7 @@ function lex_identifier(l, c) if c == 'f' c = readchar(l) if !is_identifier_char(c) - skip(l.io, -Int(!eof(c))) + backup!(l) return emit(l, IF) else return readrest(l) @@ -873,7 +873,7 @@ function lex_identifier(l, c) if c == 't' c = readchar(l) if !is_identifier_char(c) - skip(l.io, -Int(!eof(c))) + backup!(l) return emit(l, IMPORT) elseif c == 'a' return tryread(l, ('l','l'), IMPORTALL) @@ -895,7 +895,7 @@ function lex_identifier(l, c) elseif c == 'n' c = readchar(l) if !is_identifier_char(c) - skip(l.io, -Int(!eof(c))) + backup!(l) return emit(l, IN) else return readrest(l) @@ -934,14 +934,15 @@ function lex_identifier(l, c) if c == 'u' return tryread(l, ('e'), TRUE) elseif c == 'y' - return emit(l, TRY) - else + c = readchar(l) if !is_identifier_char(c) backup!(l) - return emit(l, IDENTIFIER) + return emit(l, TRY) else - return readrest(l) + return _doret(c, l) end + else + return _doret(c, l) end elseif c == 'y' c = readchar(l) @@ -971,12 +972,7 @@ function lex_identifier(l, c) elseif c == 'w' return tryread(l, ('h', 'i', 'l', 'e'), WHILE) else - if !is_identifier_char(c) - backup!(l) - return emit(l, IDENTIFIER) - else - return readrest(l) - end + return _doret(c, l) end end From ab5b9ff88b99d996fb9ab4c11e3299bef5fab6f7 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Sat, 11 Feb 2017 21:28:05 +0100 Subject: [PATCH 0071/1109] add coverage badge --- JuliaSyntax/README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index bef962b8d97e5..0e2aaafc189d1 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -1,6 +1,7 @@ # Tokenize -[![Build Status](https://travis-ci.org/KristofferC/Tokenize.jl.svg?branch=master)](https://travis-ci.org/KristofferC/Tokenize.jl) [![Build status](https://ci.appveyor.com/api/projects/status/h9d9webkxyhpx790?svg=true)](https://ci.appveyor.com/project/KristofferC/tokenize-jl) +[![Build Status](https://travis-ci.org/KristofferC/Tokenize.jl.svg?branch=master)](https://travis-ci.org/KristofferC/Tokenize.jl) [![Build status](https://ci.appveyor.com/api/projects/status/h9d9webkxyhpx790?svg=true)](https://ci.appveyor.com/project/KristofferC/tokenize-jl) [![codecov.io](https://codecov.io/github/KristofferC/Tokenize.jl/coverage.svg?branch=master)](https://codecov.io/github/KristofferC/Tokenize.jl?branch=master) + `Tokenize` is a Julia package that serves a similar purpose and API as the [tokenize module](https://docs.python.org/3/library/tokenize.html) in Python but for Julia. This is to take a string or buffer containing Julia code, perform lexical analysis and return a stream of tokens. From 49908858571f1547267bc70c701da5ac87a53c30 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Sat, 11 Feb 2017 22:07:16 +0100 Subject: [PATCH 0072/1109] add some tests (JuliaLang/JuliaSyntax.jl#50) --- JuliaSyntax/src/lexer.jl | 23 ----------------------- JuliaSyntax/test/lexer.jl | 9 +++++++++ 2 files changed, 9 insertions(+), 23 deletions(-) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index cf4aed398914d..95af3f7499017 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -512,29 +512,6 @@ function lex_xor(l::Lexer) return emit(l, Tokens.EX_OR) end -function lex_i(l::Lexer) - accept_batch(l, is_identifier_char) - str = extract_tokenstring(l) - str == "in" && return emit(l, Tokens.IN) - @static if VERSION >= v"0.6.0-dev.1471" - str == "isa" && return emit(l, Tokens.ISA) - end - return emit(l, get(Tokens.KEYWORDS, str, Tokens.IDENTIFIER), str) -end - -function lex_bool(l::Lexer) - accept_batch(l, is_identifier_char) - str = extract_tokenstring(l) - if str == "true" - return emit(l, Tokens.TRUE) - elseif str == "false" - return emit(l, Tokens.FALSE) - else - return emit(l, get(Tokens.KEYWORDS, str, Tokens.IDENTIFIER), str) - end -end - - # A digit has been consumed function lex_digit(l::Lexer) backup!(l) diff --git a/JuliaSyntax/test/lexer.jl b/JuliaSyntax/test/lexer.jl index 1ae005557e6df..9ffa358e5c293 100644 --- a/JuliaSyntax/test/lexer.jl +++ b/JuliaSyntax/test/lexer.jl @@ -273,4 +273,13 @@ end @testset "issue in PR #45" begin @test length(collect(tokenize("x)")))==3 +end + +@testset "errors" begin + @test collect(tokenize("#= #= =#"))[1].kind == T.ERROR + @test collect(tokenize("'dsadsa"))[1].kind == T.ERROR + @test collect(tokenize("aa **"))[3].kind == T.ERROR + @test collect(tokenize("aa \" "))[3].kind == T.ERROR + @test collect(tokenize("aa \"\"\" \"dsad\" \"\""))[3].kind == T.ERROR + end \ No newline at end of file From 32ae809ac134c10a7ba6cac447ab002f56c6a7e6 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Mon, 13 Feb 2017 10:07:28 +0100 Subject: [PATCH 0073/1109] use better padding for printing (JuliaLang/JuliaSyntax.jl#52) --- JuliaSyntax/README.md | 39 +++++++++++++++++++-------------------- JuliaSyntax/src/token.jl | 7 +++---- 2 files changed, 22 insertions(+), 24 deletions(-) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index 0e2aaafc189d1..8105e2bf965bd 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -20,16 +20,15 @@ It takes a string or a buffer and creates an iterator that will sequentially ret ```jl julia> collect(tokenize("function f(x) end")) -9-element Array{Tokenize.Tokens.Token,1}: - 1,1-1,8: KEYWORD "function" - 1,9-1,9: WHITESPACE " " - 1,10-1,10: IDENTIFIER "f" - 1,11-1,11: LPAREN "(" - 1,12-1,12: IDENTIFIER "x" - 1,13-1,13: RPAREN ")" - 1,14-1,14: WHITESPACE " " - 1,15-1,17: KEYWORD "end" - 1,18-1,17: ENDMARKER "" + 1,1-1,8 KEYWORD "function" + 1,9-1,9 WHITESPACE " " + 1,10-1,10 IDENTIFIER "f" + 1,11-1,11 LPAREN "(" + 1,12-1,12 IDENTIFIER "x" + 1,13-1,13 RPAREN ")" + 1,14-1,14 WHITESPACE " " + 1,15-1,17 KEYWORD "end" + 1,18-1,17 ENDMARKER "" ``` #### `Token`s @@ -40,12 +39,12 @@ The API for a `Token` (non exported from the `Tokenize.Tokens` module) is. ```julia startpos(t)::Tuple{Int, Int} # row and column where the token start -endpos(t)::Tuple{Int, Int} # row and column where the token ends -startbyte(T)::Int64 # byte offset where the token start -endbyte(t)::Int64 # byte offset where the token ends -untokenize(t)::String # the string representation of the token -kind(t)::Token.Kind # The type of the token -exactkind(t)::Token.Kind # The exact type of the token +endpos(t)::Tuple{Int, Int} # row and column where the token ends +startbyte(T)::Int64 # byte offset where the token start +endbyte(t)::Int64 # byte offset where the token ends +untokenize(t)::String # string representation of the token +kind(t)::Token.Kind # kind of the token +exactkind(t)::Token.Kind # exact kind of the token ``` The difference between `kind` and `exactkind` is that `kind` returns `OP` for all operators and `KEYWORD` for all keywords while `exactkind` returns a unique kind for all different operators and keywords, ex; @@ -53,11 +52,11 @@ The difference between `kind` and `exactkind` is that `kind` returns `OP` for al ```jl julia> tok = collect(tokenize("⇒"))[1]; -julia> Tokenize.Tokens.kind(tok) -OP::Tokenize.Tokens.Kind = 60 +julia> Tokens.kind(tok) +OP::Tokenize.Tokens.Kind = 90 -julia> Tokenize.Tokens.exactkind(tok) -RIGHTWARDS_DOUBLE_ARROW::Tokenize.Tokens.Kind = 129 +julia> Tokens.exactkind(tok) +RIGHTWARDS_DOUBLE_ARROW::Tokenize.Tokens.Kind = 128 ``` All the different `Token.Kind` can be seen in the [`token_kinds.jl` file](https://github.com/KristofferC/Tokenize.jl/blob/master/src/token_kinds.jl) diff --git a/JuliaSyntax/src/token.jl b/JuliaSyntax/src/token.jl index 2ccf6fb1d8154..2545bf8b46cff 100644 --- a/JuliaSyntax/src/token.jl +++ b/JuliaSyntax/src/token.jl @@ -86,10 +86,9 @@ function Base.show(io::IO, t::Token) start_r, start_c = startpos(t) end_r, end_c = endpos(t) str = kind(t) == ENDMARKER ? "" : untokenize(t) - print(io, start_r, ",", start_c, "-", - end_r, ",", end_c, ":", - " ", kind(t), "\t") - show(io, str) + print(io, rpad(string(start_r, ",", start_c, "-", end_r, ",", end_c), 17, " ")) + print(io, rpad(kind(t), 15, " ")) + print(io, "\"", str, "\"") end Base.print(io::IO, t::Token) = print(io, untokenize(t)) From ef5615ae8ab112a56aabf0c3cc5ae87285891998 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Tue, 21 Feb 2017 13:46:39 +0100 Subject: [PATCH 0074/1109] add precompilation directives (JuliaLang/JuliaSyntax.jl#53) --- JuliaSyntax/src/Tokenize.jl | 3 ++ JuliaSyntax/src/_precompile.jl | 68 ++++++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+) create mode 100644 JuliaSyntax/src/_precompile.jl diff --git a/JuliaSyntax/src/Tokenize.jl b/JuliaSyntax/src/Tokenize.jl index b56dcf73a71f0..9d9f3e41303ee 100644 --- a/JuliaSyntax/src/Tokenize.jl +++ b/JuliaSyntax/src/Tokenize.jl @@ -10,4 +10,7 @@ import .Tokens: untokenize export tokenize, untokenize, Tokens +include("_precompile.jl") +_precompile_() + end # module diff --git a/JuliaSyntax/src/_precompile.jl b/JuliaSyntax/src/_precompile.jl new file mode 100644 index 0000000000000..f9d17376bad4e --- /dev/null +++ b/JuliaSyntax/src/_precompile.jl @@ -0,0 +1,68 @@ +function _precompile_() + ccall(:jl_generating_output, Cint, ()) == 1 || return nothing + precompile(Tokenize.Lexers.is_cat_id_start, (Char, Int32,)) + precompile(Tokenize.Lexers.peekchar, (Base.AbstractIOBuffer{Array{UInt8, 1}},)) + precompile(Tokenize.Lexers.is_identifier_char, (Char,)) + precompile(Tokenize.Lexers.tryread, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}}, Tuple{Char, Char}, Tokenize.Tokens.Kind,)) + precompile(Tokenize.Lexers.lex_greater, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.lex_prime, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.tryread, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}}, Tuple{Char, Char, Char, Char, Char, Char}, Tokenize.Tokens.Kind,)) + precompile(Tokenize.Lexers.tryread, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}}, Tuple{Char, Char, Char, Char, Char}, Tokenize.Tokens.Kind,)) + precompile(Tokenize.Lexers.lex_digit, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.lex_identifier, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}}, Char,)) + precompile(Tokenize.Lexers.lex_less, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.tryread, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}}, Tuple{Char, Char, Char}, Tokenize.Tokens.Kind,)) + precompile(Tokenize.Lexers.tryread, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}}, Tuple{Char, Char, Char, Char, Char, Char, Char, Char}, Tokenize.Tokens.Kind,)) + precompile(Tokenize.Lexers.lex_forwardslash, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.lex_minus, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.tryread, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}}, Char, Tokenize.Tokens.Kind,)) + precompile(Tokenize.Lexers.tryread, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}}, Tuple{Char, Char, Char, Char}, Tokenize.Tokens.Kind,)) + precompile(Tokenize.Lexers.start, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.lex_xor, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.Type, (Type{Tokenize.Lexers.Lexer}, String,)) + precompile(Tokenize.Lexers.tryread, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}}, Tuple{Char}, Tokenize.Tokens.Kind,)) + precompile(Tokenize.Lexers.lex_equal, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.readchar, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.lex_bar, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.readrest, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.backup!, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.ishex, (Char,)) + precompile(Tokenize.Lexers.next_token, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Tokens.untokenize, (Array{Tokenize.Tokens.Token, 1},)) + precompile(Tokenize.Lexers.lex_quote, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Tokens.kind, (Tokenize.Tokens.Token,)) + precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}}, String,)) + precompile(Tokenize.Lexers.lex_plus, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.tryread, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}}, Tuple{Char, Char, Char, Char, Char, Char, Char}, Tokenize.Tokens.Kind,)) + precompile(Tokenize.Lexers.lex_dot, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.lex_exclaim, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.lex_colon, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Tokens.untokenize, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.lex_percent, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.lex_comment, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.lex_cmd, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.lex_division, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.lex_circumflex, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.lex_backslash, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.read_string, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}}, Tokenize.Tokens.Kind,)) + precompile(Tokenize.Lexers.lex_star, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.accept_batch, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}}, Function,)) + precompile(Tokenize.Lexers.extract_tokenstring, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.lex_amper, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},typeof( Base.UTF8proc.isdigit),)) + precompile(Tokenize.Lexers.lex_whitespace, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.iswhitespace, (Char,)) + precompile(Tokenize.Lexers.next, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}}, Bool,)) + precompile(Tokenize.Lexers.start_token!, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},typeof( Tokenize.Lexers.iswhitespace),)) + precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},typeof( Tokenize.Lexers.is_identifier_char),)) + precompile(Tokenize.Tokens.endpos, (Tokenize.Tokens.Token,)) + precompile(Tokenize.Lexers.done, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}}, Bool,)) + precompile(Tokenize.Lexers.Type, (Type{Tokenize.Lexers.Lexer}, Base.AbstractIOBuffer{Array{UInt8, 1}},)) + precompile(Tokenize.Tokens.startpos, (Tokenize.Tokens.Token,)) + precompile(Tokenize.Tokens.untokenize, (Tokenize.Tokens.Token,)) + precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},typeof( Tokenize.Lexers.ishex),)) + precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}}, Char,)) + precompile(Tokenize.Lexers._doret, (Char, Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.is_identifier_start_char, (Char,)) +end From a088a0165ec1acc35a3e5932217a93a279438913 Mon Sep 17 00:00:00 2001 From: ZacNugent Date: Fri, 24 Feb 2017 06:35:02 +0000 Subject: [PATCH 0075/1109] lex binary numbers (JuliaLang/JuliaSyntax.jl#57) --- JuliaSyntax/src/lexer.jl | 16 ++++++++++++---- JuliaSyntax/test/lexer.jl | 6 +++++- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index 95af3f7499017..5b058459b93b9 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -24,6 +24,7 @@ macro debug(ex) end ishex(c::Char) = isdigit(c) || ('a' <= c <= 'f') || ('A' <= c <= 'F') +isbinary(c::Char) = c == '0' || c == '1' iswhitespace(c::Char) = Base.UTF8proc.isspace(c) type Lexer{IO_t <: IO} @@ -565,10 +566,17 @@ function lex_digit(l::Lexer) seek2startpos!(l) # 0x[0-9A-Fa-f]+ - if accept(l, '0') && accept(l, 'x') - accept(l, "o") - if accept_batch(l, ishex) && position(l) > longest - longest, kind = position(l), Tokens.INTEGER + if accept(l, '0') + if accept(l, 'x') + accept(l, "o") + if accept_batch(l, ishex) && position(l) > longest + longest, kind = position(l), Tokens.INTEGER + end + elseif accept(l, 'b') + accept(l, "o") + if accept_batch(l, isbinary) && position(l) > longest + longest, kind = position(l), Tokens.INTEGER + end end end diff --git a/JuliaSyntax/test/lexer.jl b/JuliaSyntax/test/lexer.jl index 9ffa358e5c293..c2828edeb0c76 100644 --- a/JuliaSyntax/test/lexer.jl +++ b/JuliaSyntax/test/lexer.jl @@ -282,4 +282,8 @@ end @test collect(tokenize("aa \" "))[3].kind == T.ERROR @test collect(tokenize("aa \"\"\" \"dsad\" \"\""))[3].kind == T.ERROR -end \ No newline at end of file +end + +@testset "lex binary" begin + @test collect(tokenize("0b0101"))[1].kind==T.INTEGER +end From 845afb7b4ad8a27ded6900b66a23215eacf3ed57 Mon Sep 17 00:00:00 2001 From: ZacNugent Date: Fri, 24 Feb 2017 08:48:32 +0000 Subject: [PATCH 0076/1109] add xor_eq (JuliaLang/JuliaSyntax.jl#58) * add xor_eq * test --- JuliaSyntax/src/lexer.jl | 12 ++++++++++-- JuliaSyntax/src/token_kinds.jl | 1 + JuliaSyntax/test/lexer.jl | 5 +++++ 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index 5b058459b93b9..49959a38cd206 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -294,7 +294,8 @@ function next_token(l::Lexer) elseif c == '^'; return lex_circumflex(l); elseif c == '@'; return emit(l, Tokens.AT_SIGN) elseif c == '?'; return emit(l, Tokens.CONDITIONAL) - elseif c == '$'; return lex_xor(l); + elseif c == '$'; return lex_dollar(l); + elseif c == '⊻'; return lex_xor(l); elseif c == '~'; return emit(l, Tokens.APPROX) elseif c == '#'; return lex_comment(l) elseif c == '='; return lex_equal(l) @@ -506,13 +507,20 @@ function lex_division(l::Lexer) return emit(l, Tokens.DIVISION_SIGN) end -function lex_xor(l::Lexer) +function lex_dollar(l::Lexer) if accept(l, '=') return emit(l, Tokens.EX_OR_EQ) end return emit(l, Tokens.EX_OR) end +function lex_xor(l::Lexer) + if accept(l, '=') + return emit(l, Tokens.XOR_EQ) + end + return emit(l, Tokens.XOR) +end + # A digit has been consumed function lex_digit(l::Lexer) backup!(l) diff --git a/JuliaSyntax/src/token_kinds.jl b/JuliaSyntax/src/token_kinds.jl index 545f6154f3a2a..9c42deeb13de4 100644 --- a/JuliaSyntax/src/token_kinds.jl +++ b/JuliaSyntax/src/token_kinds.jl @@ -92,6 +92,7 @@ PAIR_ARROW, # => APPROX, # ~ EX_OR_EQ, # $= + XOR_EQ, # ⊻= end_assignments, # Level 2 diff --git a/JuliaSyntax/test/lexer.jl b/JuliaSyntax/test/lexer.jl index c2828edeb0c76..eed174eaf151e 100644 --- a/JuliaSyntax/test/lexer.jl +++ b/JuliaSyntax/test/lexer.jl @@ -284,6 +284,11 @@ end end +@testset "xor_eq" begin + @test collect(tokenize("1 ⊻= 2"))[3].kind==T.XOR_EQ +end + @testset "lex binary" begin @test collect(tokenize("0b0101"))[1].kind==T.INTEGER end + From 9076be2cf4259a77d342bf740db5425633efbadf Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Sun, 26 Feb 2017 15:21:18 +0100 Subject: [PATCH 0077/1109] escape tokenstrings in show (JuliaLang/JuliaSyntax.jl#59) --- JuliaSyntax/src/token.jl | 2 +- JuliaSyntax/test/lexer.jl | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/JuliaSyntax/src/token.jl b/JuliaSyntax/src/token.jl index 2545bf8b46cff..f9772f7bc2fab 100644 --- a/JuliaSyntax/src/token.jl +++ b/JuliaSyntax/src/token.jl @@ -85,7 +85,7 @@ end function Base.show(io::IO, t::Token) start_r, start_c = startpos(t) end_r, end_c = endpos(t) - str = kind(t) == ENDMARKER ? "" : untokenize(t) + str = kind(t) == ENDMARKER ? "" : escape_string(untokenize(t)) print(io, rpad(string(start_r, ",", start_c, "-", end_r, ",", end_c), 17, " ")) print(io, rpad(kind(t), 15, " ")) print(io, "\"", str, "\"") diff --git a/JuliaSyntax/test/lexer.jl b/JuliaSyntax/test/lexer.jl index eed174eaf151e..9e59594b7cf95 100644 --- a/JuliaSyntax/test/lexer.jl +++ b/JuliaSyntax/test/lexer.jl @@ -292,3 +292,8 @@ end @test collect(tokenize("0b0101"))[1].kind==T.INTEGER end +@testset "show" begin + io = IOBuffer() + show(io, collect(tokenize("\"abc\nd\"ef"))[1]) + @test String(take!(io)) == "1,1-2,2 STRING \"\\\"abc\\nd\\\"\"" +end \ No newline at end of file From 1aa89c4cbfb6e036e90d15e5f6dd0215a237831b Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Wed, 1 Mar 2017 12:12:38 +0100 Subject: [PATCH 0078/1109] fix some linting (JuliaLang/JuliaSyntax.jl#60) --- JuliaSyntax/src/lexer.jl | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index 49959a38cd206..ab95ac84ffaea 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -229,7 +229,7 @@ end Returns a `Token` of kind `kind` with contents `str` and starts a new `Token`. """ function emit(l::Lexer, kind::Kind, - str::String=extract_tokenstring(l), err::TokenError=Tokens.NO_ERR) + str::String = extract_tokenstring(l), err::TokenError = Tokens.NO_ERR) tok = Token(kind, (l.token_start_row, l.token_start_col), (l.current_row, l.current_col - 1), startpos(l), position(l) - 1, @@ -245,7 +245,7 @@ end Returns an `ERROR` token with error `err` and starts a new `Token`. """ -function emit_error(l::Lexer, err::TokenError=Tokens.UNKNOWN) +function emit_error(l::Lexer, err::TokenError = Tokens.UNKNOWN) return emit(l, Tokens.ERROR, extract_tokenstring(l), err) end @@ -317,7 +317,7 @@ function next_token(l::Lexer) elseif c == '`'; return lex_cmd(l); elseif isdigit(c); return lex_digit(l) elseif is_identifier_start_char(c); return lex_identifier(l, c) - elseif (k = get(UNICODE_OPS, c, Tokens.ERROR)) != Tokens.ERROR return emit(l, k) + elseif (k = get(UNICODE_OPS, c, Tokens.ERROR)) != Tokens.ERROR; return emit(l, k) else emit_error(l) end end @@ -574,7 +574,7 @@ function lex_digit(l::Lexer) seek2startpos!(l) # 0x[0-9A-Fa-f]+ - if accept(l, '0') + if accept(l, '0') if accept(l, 'x') accept(l, "o") if accept_batch(l, ishex) && position(l) > longest @@ -600,7 +600,7 @@ function lex_prime(l) l.last_token == Tokens.RSQUARE || l.last_token == Tokens.PRIME return emit(l, Tokens.PRIME) else - if peekchar(l)=='\'' + if peekchar(l) == '\'' return emit(l, Tokens.PRIME) end while true @@ -729,18 +729,18 @@ end function tryread(l, str, k) for s in str c = readchar(l) - if c!=s + if c != s if !is_identifier_char(c) backup!(l) return emit(l, IDENTIFIER) end accept_batch(l, is_identifier_char) - return emit(l, IDENTIFIER) + return emit(l, IDENTIFIER) end end if is_identifier_char(peekchar(l)) accept_batch(l, is_identifier_char) - return emit(l, IDENTIFIER) + return emit(l, IDENTIFIER) end return emit(l, k) end From 9dc27f1e7dd7f030d7cbc97cb6714df2b10cf46f Mon Sep 17 00:00:00 2001 From: ZacNugent Date: Fri, 3 Mar 2017 14:37:13 +0000 Subject: [PATCH 0079/1109] add invisible keyword for typed_vcat and typed_hcat (JuliaLang/JuliaSyntax.jl#61) * add invisible keyword for typed_vcat and typed_hcat * and hcat --- JuliaSyntax/src/token_kinds.jl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/JuliaSyntax/src/token_kinds.jl b/JuliaSyntax/src/token_kinds.jl index 9c42deeb13de4..50d68abdbb519 100644 --- a/JuliaSyntax/src/token_kinds.jl +++ b/JuliaSyntax/src/token_kinds.jl @@ -36,6 +36,7 @@ COMPREHENSION, CURLY, GENERATOR, + HCAT, KW, LINE, MACROCALL, @@ -44,6 +45,8 @@ TOPLEVEL, TUPLE, TYPED_COMPREHENSION, + TYPED_HCAT, + TYPED_VCAT, VCAT, VECT, end_invisble_keywords, From 093bc0627ce8504947a4a135f17e1c164b8664dd Mon Sep 17 00:00:00 2001 From: ZacNugent Date: Wed, 8 Mar 2017 08:38:20 +0000 Subject: [PATCH 0080/1109] move dddot, precedence is less than assignments' (JuliaLang/JuliaSyntax.jl#65) --- JuliaSyntax/src/token_kinds.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaSyntax/src/token_kinds.jl b/JuliaSyntax/src/token_kinds.jl index 50d68abdbb519..00a5aa42e951a 100644 --- a/JuliaSyntax/src/token_kinds.jl +++ b/JuliaSyntax/src/token_kinds.jl @@ -73,6 +73,7 @@ begin_ops, OP, # general + DDDOT, # ... # Level 1 begin_assignments, @@ -731,7 +732,6 @@ NOT, # ! PRIME, # ' - DDDOT, # ... TRANSPOSE, # .' ANON_FUNC, # -> From 4606c22ab4d2afb40b49a282ef02dae75ceeface Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Wed, 8 Mar 2017 18:11:42 +0100 Subject: [PATCH 0081/1109] add inferred test (JuliaLang/JuliaSyntax.jl#66) --- JuliaSyntax/test/lexer.jl | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/JuliaSyntax/test/lexer.jl b/JuliaSyntax/test/lexer.jl index 9e59594b7cf95..4ade13235d2d1 100644 --- a/JuliaSyntax/test/lexer.jl +++ b/JuliaSyntax/test/lexer.jl @@ -296,4 +296,9 @@ end io = IOBuffer() show(io, collect(tokenize("\"abc\nd\"ef"))[1]) @test String(take!(io)) == "1,1-2,2 STRING \"\\\"abc\\nd\\\"\"" -end \ No newline at end of file +end + +@testset "inferred" begin + l = tokenize("abc") + Base.Test.@inferred Tokenize.Lexers.next_token(l) +end From 4126518503808dc0ce3d4bae82ef40d4f61bbd73 Mon Sep 17 00:00:00 2001 From: ZacNugent Date: Wed, 8 Mar 2017 17:12:43 +0000 Subject: [PATCH 0082/1109] recognise STRINGs w/ enclosed interpolations (JuliaLang/JuliaSyntax.jl#64) * recognise STRINGs w/ enclosed interpolations * (type) stabilize lex_quote, add const EMPTY_TOKEN * fix trailing $ case --- JuliaSyntax/src/lexer.jl | 36 ++++++++++++++++++++++++++++++------ JuliaSyntax/src/token.jl | 2 ++ JuliaSyntax/test/lexer.jl | 10 ++++++++++ 3 files changed, 42 insertions(+), 6 deletions(-) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index ab95ac84ffaea..3bc6479334536 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -7,7 +7,7 @@ using Compat import Compat.String import ..Tokens -import ..Tokens: Token, Kind, TokenError, UNICODE_OPS +import ..Tokens: Token, Kind, TokenError, UNICODE_OPS, EMPTY_TOKEN import ..Tokens: FUNCTION, ABSTRACT, IDENTIFIER, BAREMODULE, BEGIN, BITSTYPE, BREAK, CATCH, CONST, CONTINUE, DO, ELSE, ELSEIF, END, EXPORT, FALSE, FINALLY, FOR, FUNCTION, GLOBAL, LET, LOCAL, IF, IMMUTABLE, @@ -637,20 +637,20 @@ end # Parse a token starting with a quote. # A '"' has been consumed -function lex_quote(l::Lexer) +function lex_quote(l::Lexer, doemit=true) if accept(l, '"') # "" if accept(l, '"') # """ if read_string(l, Tokens.TRIPLE_STRING) - emit(l, Tokens.TRIPLE_STRING) + return doemit ? emit(l, Tokens.TRIPLE_STRING) : EMPTY_TOKEN else - emit_error(l, Tokens.EOF_STRING) + return emit_error(l, Tokens.EOF_STRING) end else # empty string - return emit(l, Tokens.STRING) + return doemit ? emit(l, Tokens.STRING) : EMPTY_TOKEN end else # "?, ? != '"' if read_string(l, Tokens.STRING) - emit(l, Tokens.STRING) + return doemit ? emit(l, Tokens.STRING) : EMPTY_TOKEN else return emit_error(l, Tokens.EOF_STRING) end @@ -675,6 +675,30 @@ function read_string(l::Lexer, kind::Tokens.Kind) elseif eof(c) return false end + if c == '$' + c = readchar(l) + if c == '"' + if kind == Tokens.STRING + return true + else + if accept(l, "\"") && accept(l, "\"") + return true + end + end + elseif c == '(' + o = 1 + while o > 0 + c = readchar(l) + if c == '(' + o += 1 + elseif c == ')' + o -= 1 + elseif c == '"' + lex_quote(l, false) + end + end + end + end end end diff --git a/JuliaSyntax/src/token.jl b/JuliaSyntax/src/token.jl index f9772f7bc2fab..fca17269166cf 100644 --- a/JuliaSyntax/src/token.jl +++ b/JuliaSyntax/src/token.jl @@ -61,6 +61,8 @@ function Token(kind::Kind, startposition::Tuple{Int, Int}, endposition::Tuple{In end Token() = Token(ERROR, (0,0), (0,0), 0, 0, "", UNKNOWN) +const EMPTY_TOKEN = Token() + function kind(t::Token) isoperator(t.kind) && return OP iskeyword(t.kind) && return KEYWORD diff --git a/JuliaSyntax/test/lexer.jl b/JuliaSyntax/test/lexer.jl index 4ade13235d2d1..619ebdd3c42bd 100644 --- a/JuliaSyntax/test/lexer.jl +++ b/JuliaSyntax/test/lexer.jl @@ -298,7 +298,17 @@ end @test String(take!(io)) == "1,1-2,2 STRING \"\\\"abc\\nd\\\"\"" end + +@testset "interpolation" begin + ts = collect(tokenize(""""str: \$(g("str: \$(h("str"))"))" """)) + @test length(ts)==3 + @test ts[1].kind == Tokens.STRING + ts = collect(tokenize("""\"\$\"""")) + @test ts[1].kind == Tokens.STRING +end + @testset "inferred" begin l = tokenize("abc") Base.Test.@inferred Tokenize.Lexers.next_token(l) end + From 97ec8d9f947881350986ab55788b7f1813ec3a47 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Fri, 10 Mar 2017 11:57:16 +0100 Subject: [PATCH 0083/1109] remove unused broken function (JuliaLang/JuliaSyntax.jl#68) --- JuliaSyntax/src/token.jl | 2 -- 1 file changed, 2 deletions(-) diff --git a/JuliaSyntax/src/token.jl b/JuliaSyntax/src/token.jl index fca17269166cf..402f3d3ae4a54 100644 --- a/JuliaSyntax/src/token.jl +++ b/JuliaSyntax/src/token.jl @@ -95,6 +95,4 @@ end Base.print(io::IO, t::Token) = print(io, untokenize(t)) -eof(t::Token) = t.kind == Eof - end # module From 1e0d9e9b47c37dd89bbebd07144a142267eca6dc Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Fri, 10 Mar 2017 12:28:46 +0100 Subject: [PATCH 0084/1109] remove unused vars (JuliaLang/JuliaSyntax.jl#69) --- JuliaSyntax/src/lexer.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index 3bc6479334536..4ddcb1fdbbf5d 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -71,12 +71,12 @@ function Base.start(l::Lexer) false end -function Base.next(l::Lexer, isdone) +function Base.next(l::Lexer, ::Any) t = next_token(l) return t, t.kind == Tokens.ENDMARKER end -Base.done(l::Lexer, isdone) = isdone +Base.done(::Lexer, isdone) = isdone function Base.show(io::IO, l::Lexer) print(io, typeof(l), " at position: ", position(l)) From 1bb7e41ad8e6da74f9105d574f979e5cdde724a6 Mon Sep 17 00:00:00 2001 From: ZacNugent Date: Mon, 13 Mar 2017 08:23:06 +0000 Subject: [PATCH 0085/1109] Fix lexing of identifiers followed by not_eq (JuliaLang/JuliaSyntax.jl#71) * fix led_id * fix '!' parsing * fix --- JuliaSyntax/src/lexer.jl | 144 +++++++++++++++++++------------------- JuliaSyntax/test/lexer.jl | 5 ++ 2 files changed, 77 insertions(+), 72 deletions(-) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index 4ddcb1fdbbf5d..5aecb1a81416f 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -628,13 +628,6 @@ function lex_amper(l::Lexer) end end -function lex_identifier(l::Lexer) - accept_batch(l, is_identifier_char) - str = extract_tokenstring(l) - kind = get(Tokens.KEYWORDS, str, Tokens.IDENTIFIER) - return emit(l, kind, str) -end - # Parse a token starting with a quote. # A '"' has been consumed function lex_quote(l::Lexer, doemit=true) @@ -750,7 +743,7 @@ function lex_cmd(l::Lexer) end end -function tryread(l, str, k) +function tryread(l, str, k, c) for s in str c = readchar(l) if c != s @@ -758,71 +751,78 @@ function tryread(l, str, k) backup!(l) return emit(l, IDENTIFIER) end - accept_batch(l, is_identifier_char) - return emit(l, IDENTIFIER) + return readrest(l, c) end end if is_identifier_char(peekchar(l)) - accept_batch(l, is_identifier_char) - return emit(l, IDENTIFIER) + return readrest(l, c) end return emit(l, k) end -function readrest(l) - accept_batch(l, is_identifier_char) +function readrest(l, c) + while is_identifier_char(c) + if c == '!' && peekchar(l) == '=' + backup!(l) + break + elseif !is_identifier_char(peekchar(l)) + break + end + c = readchar(l) + end + return emit(l, IDENTIFIER) end -function _doret(c, l) +function _doret(l, c) if !is_identifier_char(c) backup!(l) return emit(l, IDENTIFIER) else - return readrest(l) + return readrest(l, c) end end function lex_identifier(l, c) if c == 'a' - return tryread(l, ('b', 's', 't', 'r', 'a', 'c', 't'), ABSTRACT) + return tryread(l, ('b', 's', 't', 'r', 'a', 'c', 't'), ABSTRACT, c) elseif c == 'b' c = readchar(l) if c == 'a' - return tryread(l, ('r', 'e', 'm', 'o', 'd', 'u', 'l', 'e'), BAREMODULE) + return tryread(l, ('r', 'e', 'm', 'o', 'd', 'u', 'l', 'e'), BAREMODULE, c) elseif c == 'e' - return tryread(l, ('g', 'i', 'n'), BEGIN) + return tryread(l, ('g', 'i', 'n'), BEGIN, c) elseif c == 'i' - return tryread(l, ('t', 's', 't', 'y', 'p', 'e'), BITSTYPE) + return tryread(l, ('t', 's', 't', 'y', 'p', 'e'), BITSTYPE, c) elseif c == 'r' - return tryread(l, ('e', 'a', 'k'), BREAK) + return tryread(l, ('e', 'a', 'k'), BREAK, c) else - return _doret(c, l) + return _doret(l, c) end elseif c == 'c' c = readchar(l) if c == 'a' - return tryread(l, ('t', 'c', 'h'), CATCH) + return tryread(l, ('t', 'c', 'h'), CATCH, c) elseif c == 'o' c = readchar(l) if c == 'n' c = readchar(l) if c == 's' - return tryread(l, ('t',), CONST) + return tryread(l, ('t',), CONST, c) elseif c == 't' - return tryread(l, ('i', 'n', 'u', 'e'), CONTINUE) + return tryread(l, ('i', 'n', 'u', 'e'), CONTINUE, c) else - return _doret(c, l) + return _doret(l, c) end else - return _doret(c, l) + return _doret(l, c) end else - return _doret(c, l) + return _doret(l, c) end elseif c == 'd' - return tryread(l, ('o'), DO) + return tryread(l, ('o'), DO, c) elseif c == 'e' c = readchar(l) if c == 'l' @@ -835,38 +835,38 @@ function lex_identifier(l, c) backup!(l) return emit(l, ELSE) elseif c == 'i' - return tryread(l, ('f'), ELSEIF) + return tryread(l, ('f'), ELSEIF ,c) else - return _doret(c, l) + return _doret(l, c) end else - return _doret(c, l) + return _doret(l, c) end else - return _doret(c, l) + return _doret(l, c) end elseif c == 'n' - return tryread(l, ('d'), END) + return tryread(l, ('d'), END, c) elseif c == 'x' - return tryread(l, ('p', 'o', 'r', 't'), EXPORT) + return tryread(l, ('p', 'o', 'r', 't'), EXPORT, c) else - return _doret(c, l) + return _doret(l, c) end elseif c == 'f' c = readchar(l) if c == 'a' - return tryread(l, ('l', 's', 'e'), FALSE) + return tryread(l, ('l', 's', 'e'), FALSE, c) elseif c == 'i' - return tryread(l, ('n', 'a', 'l', 'l', 'y'), FINALLY) + return tryread(l, ('n', 'a', 'l', 'l', 'y'), FINALLY, c) elseif c == 'o' - return tryread(l, ('r'), FOR) + return tryread(l, ('r'), FOR, c) elseif c == 'u' - return tryread(l, ('n', 'c', 't', 'i', 'o', 'n'), FUNCTION) + return tryread(l, ('n', 'c', 't', 'i', 'o', 'n'), FUNCTION, c) else - return _doret(c, l) + return _doret(l, c) end elseif c == 'g' - return tryread(l, ('l', 'o', 'b', 'a', 'l'), GLOBAL) + return tryread(l, ('l', 'o', 'b', 'a', 'l'), GLOBAL, c) elseif c == 'i' c = readchar(l) if c == 'f' @@ -875,12 +875,12 @@ function lex_identifier(l, c) backup!(l) return emit(l, IF) else - return readrest(l) + return readrest(l, c) end elseif c == 'm' c = readchar(l) if c == 'm' - return tryread(l, ('u', 't', 'a', 'b', 'l', 'e'), IMMUTABLE) + return tryread(l, ('u', 't', 'a', 'b', 'l', 'e'), IMMUTABLE, c) elseif c == 'p' c = readchar(l) if c == 'o' @@ -893,21 +893,21 @@ function lex_identifier(l, c) backup!(l) return emit(l, IMPORT) elseif c == 'a' - return tryread(l, ('l','l'), IMPORTALL) + return tryread(l, ('l','l'), IMPORTALL, c) else - return _doret(c, l) + return _doret(l, c) end else - return _doret(c, l) + return _doret(l, c) end else - return _doret(c, l) + return _doret(l, c) end else - return _doret(c, l) + return _doret(l, c) end else - return _doret(c, l) + return _doret(l, c) end elseif c == 'n' c = readchar(l) @@ -915,51 +915,51 @@ function lex_identifier(l, c) backup!(l) return emit(l, IN) else - return readrest(l) + return readrest(l, c) end elseif (@static VERSION >= v"0.6.0-dev.1471" ? true : false) && c == 's' - return tryread(l, ('a'), ISA) + return tryread(l, ('a'), ISA, c) else - return _doret(c, l) + return _doret(l, c) end elseif c == 'l' c = readchar(l) if c == 'e' - return tryread(l, ('t'), LET) + return tryread(l, ('t'), LET, c) elseif c == 'o' - return tryread(l, ('c', 'a', 'l'), LOCAL) + return tryread(l, ('c', 'a', 'l'), LOCAL, c) else - return _doret(c, l) + return _doret(l, c) end elseif c == 'm' c = readchar(l) if c == 'a' - return tryread(l, ('c', 'r', 'o'), MACRO) + return tryread(l, ('c', 'r', 'o'), MACRO, c) elseif c == 'o' - return tryread(l, ('d', 'u', 'l', 'e'), MODULE) + return tryread(l, ('d', 'u', 'l', 'e'), MODULE, c) else - return _doret(c, l) + return _doret(l, c) end elseif c == 'q' - return tryread(l, ('u', 'o', 't', 'e'), QUOTE) + return tryread(l, ('u', 'o', 't', 'e'), QUOTE, c) elseif c == 'r' - return tryread(l, ('e', 't', 'u', 'r', 'n'), RETURN) + return tryread(l, ('e', 't', 'u', 'r', 'n'), RETURN, c) elseif c == 't' c = readchar(l) if c == 'r' c = readchar(l) if c == 'u' - return tryread(l, ('e'), TRUE) + return tryread(l, ('e'), TRUE, c) elseif c == 'y' c = readchar(l) if !is_identifier_char(c) backup!(l) return emit(l, TRY) else - return _doret(c, l) + return _doret(l, c) end else - return _doret(c, l) + return _doret(l, c) end elseif c == 'y' c = readchar(l) @@ -971,25 +971,25 @@ function lex_identifier(l, c) backup!(l) return emit(l, TYPE) elseif c == 'a' - return tryread(l, ('l', 'i', 'a', 's'), TYPEALIAS) + return tryread(l, ('l', 'i', 'a', 's'), TYPEALIAS, c) else - return _doret(c, l) + return _doret(l, c) end else - return _doret(c, l) + return _doret(l, c) end else - return _doret(c, l) + return _doret(l, c) end else - return _doret(c, l) + return _doret(l, c) end elseif c == 'u' - return tryread(l, ('s', 'i', 'n', 'g'), USING) + return tryread(l, ('s', 'i', 'n', 'g'), USING, c) elseif c == 'w' - return tryread(l, ('h', 'i', 'l', 'e'), WHILE) + return tryread(l, ('h', 'i', 'l', 'e'), WHILE, c) else - return _doret(c, l) + return _doret(l, c) end end diff --git a/JuliaSyntax/test/lexer.jl b/JuliaSyntax/test/lexer.jl index 619ebdd3c42bd..3179d89a60591 100644 --- a/JuliaSyntax/test/lexer.jl +++ b/JuliaSyntax/test/lexer.jl @@ -312,3 +312,8 @@ end Base.Test.@inferred Tokenize.Lexers.next_token(l) end +@testset "modifying function names (!) followed by operator" begin + @test collect(tokenize("a!=b"))[2].kind == Tokens.NOT_EQ + @test collect(tokenize("a!!=b"))[2].kind == Tokens.NOT_EQ + @test collect(tokenize("!=b"))[1].kind == Tokens.NOT_EQ +end From a608c0bb5aebe4e83656ee0a1c213cac5fa4f444 Mon Sep 17 00:00:00 2001 From: ZacNugent Date: Mon, 13 Mar 2017 10:54:22 +0000 Subject: [PATCH 0086/1109] fix trailing `.` in numbers ambiguity (JuliaLang/JuliaSyntax.jl#72) --- JuliaSyntax/src/lexer.jl | 15 ++++++++++++++- JuliaSyntax/test/lexer.jl | 20 ++++++++++++++++++++ 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index 5aecb1a81416f..2c864b90dcc9c 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -543,7 +543,20 @@ function lex_digit(l::Lexer) if peekchar(l) == '.' # 43.. -> [43, ..] backup!(l) return emit(l, Tokens.INTEGER) - elseif !(isdigit(peekchar(l)) || iswhitespace(peekchar(l)) || is_identifier_start_char(peekchar(l))) + elseif !(isdigit(peekchar(l)) || + iswhitespace(peekchar(l)) || + is_identifier_start_char(peekchar(l)) + || peekchar(l) == '(' + || peekchar(l) == ')' + || peekchar(l) == '[' + || peekchar(l) == ']' + || peekchar(l) == '{' + || peekchar(l) == '}' + || peekchar(l) == ',' + || peekchar(l) == ';' + || peekchar(l) == '@' + || peekchar(l) == '`' + || peekchar(l) == '"') backup!(l) return emit(l, Tokens.INTEGER) end diff --git a/JuliaSyntax/test/lexer.jl b/JuliaSyntax/test/lexer.jl index 3179d89a60591..9bd37653abbad 100644 --- a/JuliaSyntax/test/lexer.jl +++ b/JuliaSyntax/test/lexer.jl @@ -317,3 +317,23 @@ end @test collect(tokenize("a!!=b"))[2].kind == Tokens.NOT_EQ @test collect(tokenize("!=b"))[1].kind == Tokens.NOT_EQ end + + +@testset "floats with trailing `.` " begin + @test collect(tokenize("1.0"))[1].kind == Tokens.FLOAT + @test collect(tokenize("1.a"))[1].kind == Tokens.FLOAT + @test collect(tokenize("1.("))[1].kind == Tokens.FLOAT + @test collect(tokenize("1.["))[1].kind == Tokens.FLOAT + @test collect(tokenize("1.{"))[1].kind == Tokens.FLOAT + @test collect(tokenize("1.)"))[1].kind == Tokens.FLOAT + @test collect(tokenize("1.]"))[1].kind == Tokens.FLOAT + @test collect(tokenize("1.{"))[1].kind == Tokens.FLOAT + @test collect(tokenize("1.,"))[1].kind == Tokens.FLOAT + @test collect(tokenize("1.;"))[1].kind == Tokens.FLOAT + @test collect(tokenize("1.@"))[1].kind == Tokens.FLOAT + @test collect(tokenize("1.\"text\" "))[1].kind == Tokens.FLOAT + + @test collect(tokenize("1.+ "))[1].kind == Tokens.INTEGER + @test collect(tokenize("1.⤋"))[1].kind == Tokens.INTEGER + @test collect(tokenize("1.."))[1].kind == Tokens.INTEGER +end \ No newline at end of file From e691f79dc510620760619b2b90877798bd447067 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Tue, 14 Mar 2017 11:32:57 +0100 Subject: [PATCH 0087/1109] fix infinite loop when lexing strings (JuliaLang/JuliaSyntax.jl#74) --- JuliaSyntax/src/lexer.jl | 17 +++++------------ JuliaSyntax/test/lexer.jl | 10 ++++++++-- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index 2c864b90dcc9c..6eece56d00ee3 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -16,13 +16,6 @@ import ..Tokens: FUNCTION, ABSTRACT, IDENTIFIER, BAREMODULE, BEGIN, BITSTYPE, BR export tokenize -# using Logging -# @Logging.configure(level=WARNING) - -macro debug(ex) - return :() -end - ishex(c::Char) = isdigit(c) || ('a' <= c <= 'f') || ('A' <= c <= 'F') isbinary(c::Char) = c == '0' || c == '1' iswhitespace(c::Char) = Base.UTF8proc.isspace(c) @@ -234,7 +227,6 @@ function emit(l::Lexer, kind::Kind, (l.current_row, l.current_col - 1), startpos(l), position(l) - 1, str, err) - @debug "emitted token: $tok:" l.last_token = kind start_token!(l) return tok @@ -649,16 +641,16 @@ function lex_quote(l::Lexer, doemit=true) if read_string(l, Tokens.TRIPLE_STRING) return doemit ? emit(l, Tokens.TRIPLE_STRING) : EMPTY_TOKEN else - return emit_error(l, Tokens.EOF_STRING) + return doemit ? emit_error(l, Tokens.EOF_STRING) : EMPTY_TOKEN end else # empty string - return doemit ? emit(l, Tokens.STRING) : EMPTY_TOKEN + return doemit ? emit(l, Tokens.STRING) : EMPTY_TOKEN end else # "?, ? != '"' if read_string(l, Tokens.STRING) - return doemit ? emit(l, Tokens.STRING) : EMPTY_TOKEN + return doemit ? emit(l, Tokens.STRING) : EMPTY_TOKEN else - return emit_error(l, Tokens.EOF_STRING) + return doemit ? emit_error(l, Tokens.EOF_STRING) : EMPTY_TOKEN end end end @@ -695,6 +687,7 @@ function read_string(l::Lexer, kind::Tokens.Kind) o = 1 while o > 0 c = readchar(l) + eof(c) && return false if c == '(' o += 1 elseif c == ')' diff --git a/JuliaSyntax/test/lexer.jl b/JuliaSyntax/test/lexer.jl index 9bd37653abbad..ab5d5fc0f28de 100644 --- a/JuliaSyntax/test/lexer.jl +++ b/JuliaSyntax/test/lexer.jl @@ -304,7 +304,13 @@ end @test length(ts)==3 @test ts[1].kind == Tokens.STRING ts = collect(tokenize("""\"\$\"""")) - @test ts[1].kind == Tokens.STRING + @test ts[1].kind == Tokens.STRING + # issue 73: + t_err = collect(tokenize("\"\$(fdsf\""))[1] + @test t_err.kind == Tokens.ERROR + @test t_err.token_error == Tokens.EOF_STRING + @test Tokenize.Tokens.startpos(t_err) == (1,1) + @test Tokenize.Tokens.endpos(t_err) == (1,8) end @testset "inferred" begin @@ -336,4 +342,4 @@ end @test collect(tokenize("1.+ "))[1].kind == Tokens.INTEGER @test collect(tokenize("1.⤋"))[1].kind == Tokens.INTEGER @test collect(tokenize("1.."))[1].kind == Tokens.INTEGER -end \ No newline at end of file +end From 2771bdf633a6af8f2f1d1f2ca59f7c8b604cc841 Mon Sep 17 00:00:00 2001 From: ZacNugent Date: Tue, 14 Mar 2017 16:20:39 +0000 Subject: [PATCH 0088/1109] lex octals (JuliaLang/JuliaSyntax.jl#75) * lex octals * allow upderscore --- JuliaSyntax/src/lexer.jl | 10 ++++++++-- JuliaSyntax/test/lexer.jl | 12 ++++++++++++ 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index 6eece56d00ee3..4d27d00503b0a 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -16,8 +16,9 @@ import ..Tokens: FUNCTION, ABSTRACT, IDENTIFIER, BAREMODULE, BEGIN, BITSTYPE, BR export tokenize -ishex(c::Char) = isdigit(c) || ('a' <= c <= 'f') || ('A' <= c <= 'F') -isbinary(c::Char) = c == '0' || c == '1' +ishex(c::Char) = isdigit(c) || ('a' <= c <= 'f') || ('A' <= c <= 'F') || c == '_' +isbinary(c::Char) = c == '0' || c == '1' || c == '_' +isoctal(c::Char) = '0' ≤ c ≤ '7' || c == '_' iswhitespace(c::Char) = Base.UTF8proc.isspace(c) type Lexer{IO_t <: IO} @@ -590,6 +591,11 @@ function lex_digit(l::Lexer) if accept_batch(l, isbinary) && position(l) > longest longest, kind = position(l), Tokens.INTEGER end + elseif accept(l, 'o') + accept(l, "o") + if accept_batch(l, isoctal) && position(l) > longest + longest, kind = position(l), Tokens.INTEGER + end end end diff --git a/JuliaSyntax/test/lexer.jl b/JuliaSyntax/test/lexer.jl index ab5d5fc0f28de..8ed47a91c3bf9 100644 --- a/JuliaSyntax/test/lexer.jl +++ b/JuliaSyntax/test/lexer.jl @@ -343,3 +343,15 @@ end @test collect(tokenize("1.⤋"))[1].kind == Tokens.INTEGER @test collect(tokenize("1.."))[1].kind == Tokens.INTEGER end + + + +@testset "lex octal" begin + @test collect(tokenize("0o0167"))[1].kind==T.INTEGER +end + +@testset "lex bin/hex/oct w underscores" begin + @test collect(tokenize("0x0167_032"))[1].kind==T.INTEGER + @test collect(tokenize("0b0101001_0100_0101"))[1].kind==T.INTEGER + @test collect(tokenize("0o01054001_0100_0101"))[1].kind==T.INTEGER +end \ No newline at end of file From 3f4deeeaca5e3c78b520bf95ecd77c1c38e4258d Mon Sep 17 00:00:00 2001 From: ZacNugent Date: Tue, 14 Mar 2017 17:18:36 +0000 Subject: [PATCH 0089/1109] fix float lexing (JuliaLang/JuliaSyntax.jl#76) * fix float lexing * fix * remove mysterious code --- JuliaSyntax/src/lexer.jl | 10 ++++++---- JuliaSyntax/test/lexer.jl | 16 ++++++++++++++++ 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index 4d27d00503b0a..495b04d2ff248 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -567,12 +567,17 @@ function lex_digit(l::Lexer) elseif position(l) > longest # 323213.3232 candidate longest, kind = position(l), Tokens.FLOAT end - if accept(l, "eE") # 1313.[0-9]*e + if accept(l, "eEf") # 1313.[0-9]*e accept(l, "+-") if accept_batch(l, isdigit) && position(l) > longest longest, kind = position(l), Tokens.FLOAT end end + elseif accept(l, "eEf") + accept(l, "+-") + if accept_batch(l, isdigit) && position(l) > longest + longest, kind = position(l), Tokens.FLOAT + end elseif position(l) > longest longest, kind = position(l), Tokens.INTEGER end @@ -582,17 +587,14 @@ function lex_digit(l::Lexer) # 0x[0-9A-Fa-f]+ if accept(l, '0') if accept(l, 'x') - accept(l, "o") if accept_batch(l, ishex) && position(l) > longest longest, kind = position(l), Tokens.INTEGER end elseif accept(l, 'b') - accept(l, "o") if accept_batch(l, isbinary) && position(l) > longest longest, kind = position(l), Tokens.INTEGER end elseif accept(l, 'o') - accept(l, "o") if accept_batch(l, isoctal) && position(l) > longest longest, kind = position(l), Tokens.INTEGER end diff --git a/JuliaSyntax/test/lexer.jl b/JuliaSyntax/test/lexer.jl index 8ed47a91c3bf9..bbb9078236544 100644 --- a/JuliaSyntax/test/lexer.jl +++ b/JuliaSyntax/test/lexer.jl @@ -354,4 +354,20 @@ end @test collect(tokenize("0x0167_032"))[1].kind==T.INTEGER @test collect(tokenize("0b0101001_0100_0101"))[1].kind==T.INTEGER @test collect(tokenize("0o01054001_0100_0101"))[1].kind==T.INTEGER +end + +@testset "floating points" begin + @test collect(tokenize("1.0e0"))[1].kind == Tokens.FLOAT + @test collect(tokenize("1.0e-0"))[1].kind == Tokens.FLOAT + @test collect(tokenize("1.0E0"))[1].kind == Tokens.FLOAT + @test collect(tokenize("1.0E-0"))[1].kind == Tokens.FLOAT + @test collect(tokenize("1.0f0"))[1].kind == Tokens.FLOAT + @test collect(tokenize("1.0f-0"))[1].kind == Tokens.FLOAT + + @test collect(tokenize("0e0"))[1].kind == Tokens.FLOAT + @test collect(tokenize("0e+0"))[1].kind == Tokens.FLOAT + @test collect(tokenize("0E0"))[1].kind == Tokens.FLOAT + @test collect(tokenize("201E+0"))[1].kind == Tokens.FLOAT + @test collect(tokenize("2f+0"))[1].kind == Tokens.FLOAT + @test collect(tokenize("2048f0"))[1].kind == Tokens.FLOAT end \ No newline at end of file From 85a3fc582a0de1b5bf1018e2b0086cfbfb1e1956 Mon Sep 17 00:00:00 2001 From: ZacNugent Date: Tue, 14 Mar 2017 22:37:11 +0000 Subject: [PATCH 0090/1109] add dict_comprehension (JuliaLang/JuliaSyntax.jl#78) --- JuliaSyntax/src/token_kinds.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/JuliaSyntax/src/token_kinds.jl b/JuliaSyntax/src/token_kinds.jl index 00a5aa42e951a..e66344bdc82c5 100644 --- a/JuliaSyntax/src/token_kinds.jl +++ b/JuliaSyntax/src/token_kinds.jl @@ -35,6 +35,7 @@ COMPARISON, COMPREHENSION, CURLY, + DICT_COMPREHENSION, GENERATOR, HCAT, KW, From 3f94302a82a9497ccebb658a54e1463b9d10f2ed Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Wed, 15 Mar 2017 07:34:27 +0100 Subject: [PATCH 0091/1109] compact test a bit (JuliaLang/JuliaSyntax.jl#77) --- JuliaSyntax/test/lexer.jl | 162 +++++++++++++++++++------------------- 1 file changed, 82 insertions(+), 80 deletions(-) diff --git a/JuliaSyntax/test/lexer.jl b/JuliaSyntax/test/lexer.jl index bbb9078236544..361489b3aa118 100644 --- a/JuliaSyntax/test/lexer.jl +++ b/JuliaSyntax/test/lexer.jl @@ -4,6 +4,8 @@ using Base.Test const T = Tokenize.Tokens +tok(str, i = 1) = collect(tokenize(str))[i] + @testset "tokens" begin for s in ["a", IOBuffer("a")] l = tokenize(s) @@ -142,58 +144,58 @@ end # testset end @testset "issue 17, >>" begin - @test collect(tokenize(">> "))[1].val==">>" + @test tok(">> ").val==">>" end @testset "test added operators" begin - @test collect(tokenize("1+=2"))[2].kind == T.PLUS_EQ - @test collect(tokenize("1-=2"))[2].kind == T.MINUS_EQ - @test collect(tokenize("1:=2"))[2].kind == T.COLON_EQ - @test collect(tokenize("1*=2"))[2].kind == T.STAR_EQ - @test collect(tokenize("1^=2"))[2].kind == T.CIRCUMFLEX_EQ - @test collect(tokenize("1÷=2"))[2].kind == T.DIVISION_EQ - @test collect(tokenize("1\\=2"))[2].kind == T.BACKSLASH_EQ - @test collect(tokenize("1\$=2"))[2].kind == T.EX_OR_EQ - @test collect(tokenize("1-->2"))[2].kind == T.RIGHT_ARROW - @test collect(tokenize("1>:2"))[2].kind == T.GREATER_COLON + @test tok("1+=2", 2).kind == T.PLUS_EQ + @test tok("1-=2", 2).kind == T.MINUS_EQ + @test tok("1:=2", 2).kind == T.COLON_EQ + @test tok("1*=2", 2).kind == T.STAR_EQ + @test tok("1^=2", 2).kind == T.CIRCUMFLEX_EQ + @test tok("1÷=2", 2).kind == T.DIVISION_EQ + @test tok("1\\=2", 2).kind == T.BACKSLASH_EQ + @test tok("1\$=2", 2).kind == T.EX_OR_EQ + @test tok("1-->2", 2).kind == T.RIGHT_ARROW + @test tok("1>:2", 2).kind == T.GREATER_COLON end @testset "infix" begin - @test collect(tokenize("1 in 2"))[3].kind == T.IN - @test collect(tokenize("1 in[1]"))[3].kind == T.IN + @test tok("1 in 2", 3).kind == T.IN + @test tok("1 in[1]", 3).kind == T.IN if VERSION >= v"0.6.0-dev.1471" - @test collect(tokenize("1 isa 2"))[3].kind == T.ISA - @test collect(tokenize("1 isa[2]"))[3].kind == T.ISA + @test tok("1 isa 2", 3).kind == T.ISA + @test tok("1 isa[2]", 3).kind == T.ISA else - @test collect(tokenize("1 isa 2"))[3].kind == T.IDENTIFIER - @test collect(tokenize("1 isa[2]"))[3].kind == T.IDENTIFIER + @test tok("1 isa 2", 3).kind == T.IDENTIFIER + @test tok("1 isa[2]", 3).kind == T.IDENTIFIER end end @testset "tokenizing true/false literals" begin - @test collect(tokenize("somtext true"))[3].kind == T.TRUE - @test collect(tokenize("somtext false"))[3].kind == T.FALSE - @test collect(tokenize("somtext tr"))[3].kind == T.IDENTIFIER - @test collect(tokenize("somtext falsething"))[3].kind == T.IDENTIFIER + @test tok("somtext true", 3).kind == T.TRUE + @test tok("somtext false", 3).kind == T.FALSE + @test tok("somtext tr", 3).kind == T.IDENTIFIER + @test tok("somtext falsething", 3).kind == T.IDENTIFIER end @testset "tokenizing juxtaposed numbers and dotted operators/identifiers" begin - @test (t->t.val=="1234" && t.kind == Tokens.INTEGER)(collect(tokenize("1234.+1"))[1]) - @test (t->t.val=="1234" && t.kind == Tokens.INTEGER)(collect(tokenize("1234 .+1"))[1]) - @test (t->t.val=="1234.0" && t.kind == Tokens.FLOAT)(collect(tokenize("1234.0.+1"))[1]) - @test (t->t.val=="1234.0" && t.kind == Tokens.FLOAT)(collect(tokenize("1234.0 .+1"))[1]) - @test (t->t.val=="1234." && t.kind == Tokens.FLOAT)(collect(tokenize("1234.f(a)"))[1]) - @test (t->t.val=="1234" && t.kind == Tokens.INTEGER)(collect(tokenize("1234 .f(a)"))[1]) - @test (t->t.val=="1234.0." && t.kind == Tokens.ERROR)(collect(tokenize("1234.0.f(a)"))[1]) - @test (t->t.val=="1234.0" && t.kind == Tokens.FLOAT)(collect(tokenize("1234.0 .f(a)"))[1]) + @test (t->t.val=="1234" && t.kind == Tokens.INTEGER )(tok("1234.+1")) + @test (t->t.val=="1234" && t.kind == Tokens.INTEGER )(tok("1234 .+1")) + @test (t->t.val=="1234.0" && t.kind == Tokens.FLOAT )(tok("1234.0.+1")) + @test (t->t.val=="1234.0" && t.kind == Tokens.FLOAT )(tok("1234.0 .+1")) + @test (t->t.val=="1234." && t.kind == Tokens.FLOAT )(tok("1234.f(a)")) + @test (t->t.val=="1234" && t.kind == Tokens.INTEGER )(tok("1234 .f(a)")) + @test (t->t.val=="1234.0." && t.kind == Tokens.ERROR )(tok("1234.0.f(a)")) + @test (t->t.val=="1234.0" && t.kind == Tokens.FLOAT )(tok("1234.0 .f(a)")) end @testset "lexing anon functions '->' " begin - @test collect(tokenize("a->b"))[2].kind==Tokens.ANON_FUNC + @test tok("a->b", 2).kind==Tokens.ANON_FUNC end @testset "comments" begin @@ -220,13 +222,13 @@ end D = ImageMagick.load(fn) """)) @test tokens[16].val==tokens[17].val=="'" - @test all(x->x.val=="'", collect(tokenize("''"))[1:2]) - @test all(x->x.val=="'", collect(tokenize("'''"))[1:3]) - @test all(x->x.val=="'", collect(tokenize("''''"))[1:4]) + @test all(x->x.val=="'", tok("''", 1:2)) + @test all(x->x.val=="'", tok("'''", 1:3)) + @test all(x->x.val=="'", tok("''''", 1:4)) end @testset "in/isa bytelength" begin - t = collect(tokenize("x in y"))[3] + t = tok("x in y", 3) @test t.endbyte - t.startbyte + 1 == 2 end @@ -267,29 +269,29 @@ end "using", "while"] - @test T.kind(collect(tokenize(kw))[1]) == T.KEYWORD + @test T.kind(tok(kw)) == T.KEYWORD end end @testset "issue in PR #45" begin - @test length(collect(tokenize("x)")))==3 + @test length(collect(tokenize("x)"))) == 3 end @testset "errors" begin - @test collect(tokenize("#= #= =#"))[1].kind == T.ERROR - @test collect(tokenize("'dsadsa"))[1].kind == T.ERROR - @test collect(tokenize("aa **"))[3].kind == T.ERROR - @test collect(tokenize("aa \" "))[3].kind == T.ERROR - @test collect(tokenize("aa \"\"\" \"dsad\" \"\""))[3].kind == T.ERROR + @test tok("#= #= =#", 1).kind == T.ERROR + @test tok("'dsadsa", 1).kind == T.ERROR + @test tok("aa **", 3).kind == T.ERROR + @test tok("aa \" ", 3).kind == T.ERROR + @test tok("aa \"\"\" \"dsad\" \"\"",3).kind == T.ERROR end @testset "xor_eq" begin - @test collect(tokenize("1 ⊻= 2"))[3].kind==T.XOR_EQ + @test tok("1 ⊻= 2", 3).kind==T.XOR_EQ end @testset "lex binary" begin - @test collect(tokenize("0b0101"))[1].kind==T.INTEGER + @test tok("0b0101").kind==T.INTEGER end @testset "show" begin @@ -306,7 +308,7 @@ end ts = collect(tokenize("""\"\$\"""")) @test ts[1].kind == Tokens.STRING # issue 73: - t_err = collect(tokenize("\"\$(fdsf\""))[1] + t_err = tok("\"\$(fdsf\"") @test t_err.kind == Tokens.ERROR @test t_err.token_error == Tokens.EOF_STRING @test Tokenize.Tokens.startpos(t_err) == (1,1) @@ -319,55 +321,55 @@ end end @testset "modifying function names (!) followed by operator" begin - @test collect(tokenize("a!=b"))[2].kind == Tokens.NOT_EQ - @test collect(tokenize("a!!=b"))[2].kind == Tokens.NOT_EQ - @test collect(tokenize("!=b"))[1].kind == Tokens.NOT_EQ + @test tok("a!=b", 2).kind == Tokens.NOT_EQ + @test tok("a!!=b", 2).kind == Tokens.NOT_EQ + @test tok("!=b", 1).kind == Tokens.NOT_EQ end @testset "floats with trailing `.` " begin - @test collect(tokenize("1.0"))[1].kind == Tokens.FLOAT - @test collect(tokenize("1.a"))[1].kind == Tokens.FLOAT - @test collect(tokenize("1.("))[1].kind == Tokens.FLOAT - @test collect(tokenize("1.["))[1].kind == Tokens.FLOAT - @test collect(tokenize("1.{"))[1].kind == Tokens.FLOAT - @test collect(tokenize("1.)"))[1].kind == Tokens.FLOAT - @test collect(tokenize("1.]"))[1].kind == Tokens.FLOAT - @test collect(tokenize("1.{"))[1].kind == Tokens.FLOAT - @test collect(tokenize("1.,"))[1].kind == Tokens.FLOAT - @test collect(tokenize("1.;"))[1].kind == Tokens.FLOAT - @test collect(tokenize("1.@"))[1].kind == Tokens.FLOAT - @test collect(tokenize("1.\"text\" "))[1].kind == Tokens.FLOAT - - @test collect(tokenize("1.+ "))[1].kind == Tokens.INTEGER - @test collect(tokenize("1.⤋"))[1].kind == Tokens.INTEGER - @test collect(tokenize("1.."))[1].kind == Tokens.INTEGER + @test tok("1.0").kind == Tokens.FLOAT + @test tok("1.a").kind == Tokens.FLOAT + @test tok("1.(").kind == Tokens.FLOAT + @test tok("1.[").kind == Tokens.FLOAT + @test tok("1.{").kind == Tokens.FLOAT + @test tok("1.)").kind == Tokens.FLOAT + @test tok("1.]").kind == Tokens.FLOAT + @test tok("1.{").kind == Tokens.FLOAT + @test tok("1.,").kind == Tokens.FLOAT + @test tok("1.;").kind == Tokens.FLOAT + @test tok("1.@").kind == Tokens.FLOAT + @test tok("1.\"text\" ").kind == Tokens.FLOAT + + @test tok("1.+ ").kind == Tokens.INTEGER + @test tok("1.⤋").kind == Tokens.INTEGER + @test tok("1..").kind == Tokens.INTEGER end @testset "lex octal" begin - @test collect(tokenize("0o0167"))[1].kind==T.INTEGER + @test tok("0o0167").kind == T.INTEGER end @testset "lex bin/hex/oct w underscores" begin - @test collect(tokenize("0x0167_032"))[1].kind==T.INTEGER - @test collect(tokenize("0b0101001_0100_0101"))[1].kind==T.INTEGER - @test collect(tokenize("0o01054001_0100_0101"))[1].kind==T.INTEGER + @test tok("0x0167_032").kind == T.INTEGER + @test tok("0b0101001_0100_0101").kind == T.INTEGER + @test tok("0o01054001_0100_0101").kind == T.INTEGER end @testset "floating points" begin - @test collect(tokenize("1.0e0"))[1].kind == Tokens.FLOAT - @test collect(tokenize("1.0e-0"))[1].kind == Tokens.FLOAT - @test collect(tokenize("1.0E0"))[1].kind == Tokens.FLOAT - @test collect(tokenize("1.0E-0"))[1].kind == Tokens.FLOAT - @test collect(tokenize("1.0f0"))[1].kind == Tokens.FLOAT - @test collect(tokenize("1.0f-0"))[1].kind == Tokens.FLOAT - - @test collect(tokenize("0e0"))[1].kind == Tokens.FLOAT - @test collect(tokenize("0e+0"))[1].kind == Tokens.FLOAT - @test collect(tokenize("0E0"))[1].kind == Tokens.FLOAT - @test collect(tokenize("201E+0"))[1].kind == Tokens.FLOAT - @test collect(tokenize("2f+0"))[1].kind == Tokens.FLOAT - @test collect(tokenize("2048f0"))[1].kind == Tokens.FLOAT + @test tok("1.0e0").kind == Tokens.FLOAT + @test tok("1.0e-0").kind == Tokens.FLOAT + @test tok("1.0E0").kind == Tokens.FLOAT + @test tok("1.0E-0").kind == Tokens.FLOAT + @test tok("1.0f0").kind == Tokens.FLOAT + @test tok("1.0f-0").kind == Tokens.FLOAT + + @test tok("0e0").kind == Tokens.FLOAT + @test tok("0e+0").kind == Tokens.FLOAT + @test tok("0E0").kind == Tokens.FLOAT + @test tok("201E+0").kind == Tokens.FLOAT + @test tok("2f+0").kind == Tokens.FLOAT + @test tok("2048f0").kind == Tokens.FLOAT end \ No newline at end of file From c8be2299fcb63258928242c1eaf41ee163898396 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Fri, 17 Mar 2017 23:48:16 +0100 Subject: [PATCH 0092/1109] fix lexing 1e (JuliaLang/JuliaSyntax.jl#79) --- JuliaSyntax/src/lexer.jl | 25 ++++++++++++++----------- JuliaSyntax/test/lexer.jl | 7 ++++++- 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index 495b04d2ff248..7aea4a75de35d 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -536,18 +536,18 @@ function lex_digit(l::Lexer) if peekchar(l) == '.' # 43.. -> [43, ..] backup!(l) return emit(l, Tokens.INTEGER) - elseif !(isdigit(peekchar(l)) || - iswhitespace(peekchar(l)) || - is_identifier_start_char(peekchar(l)) - || peekchar(l) == '(' - || peekchar(l) == ')' - || peekchar(l) == '[' - || peekchar(l) == ']' - || peekchar(l) == '{' + elseif !(isdigit(peekchar(l)) || + iswhitespace(peekchar(l)) || + is_identifier_start_char(peekchar(l)) + || peekchar(l) == '(' + || peekchar(l) == ')' + || peekchar(l) == '[' + || peekchar(l) == ']' + || peekchar(l) == '{' || peekchar(l) == '}' - || peekchar(l) == ',' - || peekchar(l) == ';' - || peekchar(l) == '@' + || peekchar(l) == ',' + || peekchar(l) == ';' + || peekchar(l) == '@' || peekchar(l) == '`' || peekchar(l) == '"') backup!(l) @@ -577,6 +577,9 @@ function lex_digit(l::Lexer) accept(l, "+-") if accept_batch(l, isdigit) && position(l) > longest longest, kind = position(l), Tokens.FLOAT + else + backup!(l) + return emit(l, Tokens.INTEGER) end elseif position(l) > longest longest, kind = position(l), Tokens.INTEGER diff --git a/JuliaSyntax/test/lexer.jl b/JuliaSyntax/test/lexer.jl index 361489b3aa118..535dcb47502f9 100644 --- a/JuliaSyntax/test/lexer.jl +++ b/JuliaSyntax/test/lexer.jl @@ -372,4 +372,9 @@ end @test tok("201E+0").kind == Tokens.FLOAT @test tok("2f+0").kind == Tokens.FLOAT @test tok("2048f0").kind == Tokens.FLOAT -end \ No newline at end of file +end + +@testset "1e1" begin + @test tok("1e", 1).kind == Tokens.INTEGER + @test tok("1e", 2).kind == Tokens.IDENTIFIER +end From 25107386202eb1cceacb72ffb997217a3a77e63d Mon Sep 17 00:00:00 2001 From: m-j-w Date: Tue, 21 Mar 2017 21:08:19 +0100 Subject: [PATCH 0093/1109] Hack to add Julia 0.6 'primitive', 'struct' and 'mutable' keywords. (JuliaLang/JuliaSyntax.jl#80) * Hack to add Julia 0.6 'primitive', 'struct' and 'mutable' keywords. * Added 'where' and introduced new token kinds as requested. --- JuliaSyntax/src/lexer.jl | 23 +++++++++++++++++++++-- JuliaSyntax/src/token_kinds.jl | 6 +++--- JuliaSyntax/test/lexer.jl | 15 +++++++++++++++ 3 files changed, 39 insertions(+), 5 deletions(-) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index 7aea4a75de35d..527bc34aeb83f 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -11,7 +11,8 @@ import ..Tokens: Token, Kind, TokenError, UNICODE_OPS, EMPTY_TOKEN import ..Tokens: FUNCTION, ABSTRACT, IDENTIFIER, BAREMODULE, BEGIN, BITSTYPE, BREAK, CATCH, CONST, CONTINUE, DO, ELSE, ELSEIF, END, EXPORT, FALSE, FINALLY, FOR, FUNCTION, GLOBAL, LET, LOCAL, IF, IMMUTABLE, - IMPORT, IMPORTALL, MACRO, MODULE, QUOTE, RETURN, TRUE, TRY, TYPE, TYPEALIAS, USING, WHILE, ISA, IN + IMPORT, IMPORTALL, MACRO, MODULE, QUOTE, RETURN, TRUE, TRY, TYPE, TYPEALIAS, USING, WHILE, ISA, IN, + MUTABLE, PRIMITIVE, STRUCT, WHERE export tokenize @@ -954,13 +955,19 @@ function lex_identifier(l, c) return tryread(l, ('c', 'r', 'o'), MACRO, c) elseif c == 'o' return tryread(l, ('d', 'u', 'l', 'e'), MODULE, c) + elseif c == 'u' + return tryread(l, ('t', 'a', 'b', 'l', 'e'), MUTABLE, c) else return _doret(l, c) end + elseif c == 'p' + return tryread(l, ('r', 'i', 'm', 'i', 't', 'i', 'v', 'e'), PRIMITIVE, c) elseif c == 'q' return tryread(l, ('u', 'o', 't', 'e'), QUOTE, c) elseif c == 'r' return tryread(l, ('e', 't', 'u', 'r', 'n'), RETURN, c) + elseif c == 's' + return tryread(l, ('t', 'r', 'u', 'c', 't'), STRUCT, c) elseif c == 't' c = readchar(l) if c == 'r' @@ -1004,7 +1011,19 @@ function lex_identifier(l, c) elseif c == 'u' return tryread(l, ('s', 'i', 'n', 'g'), USING, c) elseif c == 'w' - return tryread(l, ('h', 'i', 'l', 'e'), WHILE, c) + c = readchar(l) + if c == 'h' + c = readchar(l) + if c == 'e' + return tryread(l, ('r', 'e'), WHERE, c) + elseif c == 'i' + return tryread(l, ('l', 'e'), WHILE, c) + else + return _doret(l, c) + end + else + return _doret(l, c) + end else return _doret(l, c) end diff --git a/JuliaSyntax/src/token_kinds.jl b/JuliaSyntax/src/token_kinds.jl index e66344bdc82c5..d95598f86288e 100644 --- a/JuliaSyntax/src/token_kinds.jl +++ b/JuliaSyntax/src/token_kinds.jl @@ -14,14 +14,14 @@ BREAK, CONTINUE, end_0arg_kw, begin_1arg_kw, - ABSTRACT, CONST, EXPORT, GLOBAL, LOCAL, IMPORT, IMPORTALL, RETURN,USING, + ABSTRACT, CONST, EXPORT, GLOBAL, LOCAL, IMPORT, IMPORTALL, RETURN, USING, WHERE, end_1arg_kw, begin_2arg_kw, - BITSTYPE, TYPEALIAS, + BITSTYPE, PRIMITIVE, TYPEALIAS, end_2arg_kw, BEGIN, QUOTE, begin_3arg_kw, - BAREMODULE, DO, FOR, FUNCTION, IMMUTABLE, LET, MACRO, MODULE, TYPE, WHILE, + BAREMODULE, DO, FOR, FUNCTION, IMMUTABLE, LET, MACRO, MUTABLE, MODULE, STRUCT, TYPE, WHILE, end_3arg_kw, IF, ELSEIF, ELSE, TRY, CATCH, FINALLY, diff --git a/JuliaSyntax/test/lexer.jl b/JuliaSyntax/test/lexer.jl index 535dcb47502f9..255d5dc0957b4 100644 --- a/JuliaSyntax/test/lexer.jl +++ b/JuliaSyntax/test/lexer.jl @@ -260,13 +260,17 @@ end "importall", "macro", "module", + "mutable", + "primitive", "quote", "return", + "struct", #"true", "try", "type", "typealias", "using", + "where", "while"] @test T.kind(tok(kw)) == T.KEYWORD @@ -378,3 +382,14 @@ end @test tok("1e", 1).kind == Tokens.INTEGER @test tok("1e", 2).kind == Tokens.IDENTIFIER end + +@testset "jl06types" begin + @test tok("mutable").kind == Tokens.MUTABLE + @test tok("primitive").kind == Tokens.PRIMITIVE + @test tok("struct").kind == Tokens.STRUCT + @test tok("where").kind == Tokens.WHERE + @test tok("mutable struct s{T} where T", 1).kind == Tokens.MUTABLE + @test tok("mutable struct s{T} where T", 3).kind == Tokens.STRUCT + @test tok("mutable struct s{T} where T", 10).kind == Tokens.WHERE +end + From c5208da20081122285784fbf86ca4a42253ae462 Mon Sep 17 00:00:00 2001 From: ZacNugent Date: Wed, 22 Mar 2017 11:53:16 +0000 Subject: [PATCH 0094/1109] flatten kws, add ROW (JuliaLang/JuliaSyntax.jl#81) --- JuliaSyntax/src/token_kinds.jl | 52 +++++++++++++++++++++++----------- 1 file changed, 36 insertions(+), 16 deletions(-) diff --git a/JuliaSyntax/src/token_kinds.jl b/JuliaSyntax/src/token_kinds.jl index d95598f86288e..d743c3727491a 100644 --- a/JuliaSyntax/src/token_kinds.jl +++ b/JuliaSyntax/src/token_kinds.jl @@ -10,22 +10,42 @@ begin_keywords, KEYWORD, # general - begin_0arg_kw, - BREAK, CONTINUE, - end_0arg_kw, - begin_1arg_kw, - ABSTRACT, CONST, EXPORT, GLOBAL, LOCAL, IMPORT, IMPORTALL, RETURN, USING, WHERE, - end_1arg_kw, - begin_2arg_kw, - BITSTYPE, PRIMITIVE, TYPEALIAS, - end_2arg_kw, - BEGIN, QUOTE, - begin_3arg_kw, - BAREMODULE, DO, FOR, FUNCTION, IMMUTABLE, LET, MACRO, MUTABLE, MODULE, STRUCT, TYPE, WHILE, - end_3arg_kw, - IF, ELSEIF, ELSE, - TRY, CATCH, FINALLY, + ABSTRACT, + BAREMODULE, + BEGIN, + BITSTYPE, + BREAK, + CATCH, + CONST, + CONTINUE, + DO, + ELSE, + ELSEIF, END, + EXPORT, + FINALLY, + FOR, + FUNCTION, + GLOBAL, + IF, + IMMUTABLE, + IMPORT, + IMPORTALL, + LET, + LOCAL, + MACRO, + MODULE, + MUTABLE, + PRIMITIVE, + QUOTE, + RETURN, + STRUCT, + TRY, + TYPE, + TYPEALIAS, + USING, + WHERE, + WHILE, end_keywords, begin_invisble_keywords, @@ -43,6 +63,7 @@ MACROCALL, PARAMETERS, REF, + ROW, TOPLEVEL, TUPLE, TYPED_COMPREHENSION, @@ -735,7 +756,6 @@ PRIME, # ' TRANSPOSE, # .' ANON_FUNC, # -> - begin_unicode_ops, NOT_SIGN, # ¬ From fdc888a305aa6b851c97f8bc8f7eaae28a141ea5 Mon Sep 17 00:00:00 2001 From: ZacNugent Date: Thu, 23 Mar 2017 14:19:08 +0000 Subject: [PATCH 0095/1109] fix for underscores in decimal part of floats (JuliaLang/JuliaSyntax.jl#82) * fix for underscores in decimal part of floats * don't allow _ at start or end, parse FLOATs beggining with `.` --- JuliaSyntax/src/lexer.jl | 47 ++++++++++++++++++++++++--------------- JuliaSyntax/test/lexer.jl | 17 +++++++++++++- 2 files changed, 45 insertions(+), 19 deletions(-) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index 527bc34aeb83f..e2ebe3ef6f4be 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -17,7 +17,8 @@ import ..Tokens: FUNCTION, ABSTRACT, IDENTIFIER, BAREMODULE, BEGIN, BITSTYPE, BR export tokenize -ishex(c::Char) = isdigit(c) || ('a' <= c <= 'f') || ('A' <= c <= 'F') || c == '_' +ishex(c::Char) = isdigit(c) || ('a' <= c <= 'f') || ('A' <= c <= 'F') || c == +'_' isbinary(c::Char) = c == '0' || c == '1' || c == '_' isoctal(c::Char) = '0' ≤ c ≤ '7' || c == '_' iswhitespace(c::Char) = Base.UTF8proc.isspace(c) @@ -309,8 +310,8 @@ function next_token(l::Lexer) elseif c == '+'; return lex_plus(l); elseif c == '-'; return lex_minus(l); elseif c == '`'; return lex_cmd(l); - elseif isdigit(c); return lex_digit(l) elseif is_identifier_start_char(c); return lex_identifier(l, c) + elseif isdigit(c); return lex_digit(l) elseif (k = get(UNICODE_OPS, c, Tokens.ERROR)) != Tokens.ERROR; return emit(l, k) else emit_error(l) end @@ -515,23 +516,29 @@ function lex_xor(l::Lexer) return emit(l, Tokens.XOR) end +function accept_integer(l::Lexer) + !isdigit(peekchar(l)) && return false + while true + if !accept(l, isdigit) + if accept(l, '_') + if !isdigit(peekchar(l)) + backup!(l) + return true + end + else + return true + end + end + end +end + # A digit has been consumed function lex_digit(l::Lexer) backup!(l) longest, kind = position(l), Tokens.ERROR - accept_batch(l, isdigit) - - # Accept "_" in digits - while true - if !accept(l, '_') - break - end - if !accept_batch(l, isdigit) - backup!(l) - return emit(l, Tokens.INTEGER) - end - end + # accept_batch(l, isdigit) + accept_integer(l) if accept(l, '.') if peekchar(l) == '.' # 43.. -> [43, ..] @@ -550,11 +557,13 @@ function lex_digit(l::Lexer) || peekchar(l) == ';' || peekchar(l) == '@' || peekchar(l) == '`' - || peekchar(l) == '"') + || peekchar(l) == '"' + || eof(l)) backup!(l) return emit(l, Tokens.INTEGER) end - accept_batch(l, isdigit) + # accept_batch(l, isdigit) + accept_integer(l) if accept(l, '.') if peekchar(l) == '.' # 1.23..3.21 is valid backup!(l) @@ -570,13 +579,13 @@ function lex_digit(l::Lexer) end if accept(l, "eEf") # 1313.[0-9]*e accept(l, "+-") - if accept_batch(l, isdigit) && position(l) > longest + if accept_integer(l) && position(l) > longest longest, kind = position(l), Tokens.FLOAT end end elseif accept(l, "eEf") accept(l, "+-") - if accept_batch(l, isdigit) && position(l) > longest + if accept_integer(l) && position(l) > longest longest, kind = position(l), Tokens.FLOAT else backup!(l) @@ -744,6 +753,8 @@ function lex_dot(l::Lexer) else return emit(l, Tokens.DDOT) end + elseif Base.isdigit(peekchar(l)) + return lex_digit(l) else return emit(l, Tokens.DOT) end diff --git a/JuliaSyntax/test/lexer.jl b/JuliaSyntax/test/lexer.jl index 255d5dc0957b4..7d89f9449a216 100644 --- a/JuliaSyntax/test/lexer.jl +++ b/JuliaSyntax/test/lexer.jl @@ -330,6 +330,15 @@ end @test tok("!=b", 1).kind == Tokens.NOT_EQ end +@testset "lex integers" begin + @test tok("1234").kind == T.INTEGER + @test tok("12_34").kind == T.INTEGER + @test tok("_1234").kind == T.IDENTIFIER + @test tok("1234_").kind == T.INTEGER + @test tok("1234_", 2).kind == T.IDENTIFIER + @test tok("1234x").kind == T.INTEGER + @test tok("1234x", 2).kind == T.IDENTIFIER +end @testset "floats with trailing `.` " begin @test tok("1.0").kind == Tokens.FLOAT @@ -343,6 +352,7 @@ end @test tok("1.,").kind == Tokens.FLOAT @test tok("1.;").kind == Tokens.FLOAT @test tok("1.@").kind == Tokens.FLOAT + @test tok("1.").kind == Tokens.FLOAT @test tok("1.\"text\" ").kind == Tokens.FLOAT @test tok("1.+ ").kind == Tokens.INTEGER @@ -356,7 +366,12 @@ end @test tok("0o0167").kind == T.INTEGER end -@testset "lex bin/hex/oct w underscores" begin +@testset "lex float/bin/hex/oct w underscores" begin + @test tok("1_1.11").kind == T.FLOAT + @test tok("11.1_1").kind == T.FLOAT + @test tok("1_1.1_1").kind == T.FLOAT + @test tok("_1.1_1", 1).kind == T.IDENTIFIER + @test tok("_1.1_1", 2).kind == T.FLOAT @test tok("0x0167_032").kind == T.INTEGER @test tok("0b0101001_0100_0101").kind == T.INTEGER @test tok("0o01054001_0100_0101").kind == T.INTEGER From 6cca248f7054d37baf16d0d13261f06bbb1b7c35 Mon Sep 17 00:00:00 2001 From: ZacNugent Date: Fri, 24 Mar 2017 14:03:56 +0000 Subject: [PATCH 0096/1109] add invisible generator keywords (JuliaLang/JuliaSyntax.jl#83) --- JuliaSyntax/src/token_kinds.jl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/JuliaSyntax/src/token_kinds.jl b/JuliaSyntax/src/token_kinds.jl index d743c3727491a..0f80b45eb16a3 100644 --- a/JuliaSyntax/src/token_kinds.jl +++ b/JuliaSyntax/src/token_kinds.jl @@ -56,6 +56,8 @@ COMPREHENSION, CURLY, DICT_COMPREHENSION, + FILTER, + FLATTEN, GENERATOR, HCAT, KW, From 7bbb10cdf561254183ead5fca7a9fb0421e859ff Mon Sep 17 00:00:00 2001 From: ZacNugent Date: Tue, 4 Apr 2017 20:28:25 +0100 Subject: [PATCH 0097/1109] add triple backquote support (JuliaLang/JuliaSyntax.jl#84) --- JuliaSyntax/src/lexer.jl | 37 +++++++++++++++++++++++++++------- JuliaSyntax/src/token_kinds.jl | 1 + JuliaSyntax/test/lexer.jl | 7 +++++++ 3 files changed, 38 insertions(+), 7 deletions(-) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index e2ebe3ef6f4be..3eb1add1b7fd4 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -760,18 +760,41 @@ function lex_dot(l::Lexer) end end -# A ` has been consumed, find the next one +# A ` has been consumed function lex_cmd(l::Lexer) - while true - c = readchar(l) - if c == '`' - return emit(l, Tokens.CMD) - elseif eof(c) - return emit_error(l, Tokens.EOF_CMD) + if accept(l, '`') + if accept(l, '`') # TRIPLE_CMD + while true + c = readchar(l) + if c == '`' + c = readchar(l) + if c == '`' + c = readchar(l) + if c == '`' + return emit(l, Tokens.TRIPLE_CMD) + end + end + end + if eof(c) + return emit_error(l, Tokens.EOF_CMD) + end + end + else # empty CMD + return emit(l, Tokens.CMD) + end + else # CMD + while true + c = readchar(l) + if c == '`' + return emit(l, Tokens.CMD) + elseif eof(c) + return emit_error(l, Tokens.EOF_CMD) + end end end end + function tryread(l, str, k, c) for s in str c = readchar(l) diff --git a/JuliaSyntax/src/token_kinds.jl b/JuliaSyntax/src/token_kinds.jl index 0f80b45eb16a3..b7a6e482764f6 100644 --- a/JuliaSyntax/src/token_kinds.jl +++ b/JuliaSyntax/src/token_kinds.jl @@ -83,6 +83,7 @@ TRIPLE_STRING, # """ foo \n """ CHAR, # 'a' CMD, # `cmd ...` + TRIPLE_CMD, # ```cmd ...``` TRUE, FALSE, end_literal, diff --git a/JuliaSyntax/test/lexer.jl b/JuliaSyntax/test/lexer.jl index 7d89f9449a216..078502ab800b9 100644 --- a/JuliaSyntax/test/lexer.jl +++ b/JuliaSyntax/test/lexer.jl @@ -408,3 +408,10 @@ end @test tok("mutable struct s{T} where T", 10).kind == Tokens.WHERE end +@testset "CMDs" begin + @test tok("`cmd`").kind == T.CMD + @test tok("```cmd```", 1).kind == T.TRIPLE_CMD + @test tok("```cmd```", 2).kind == T.ENDMARKER + @test tok("```cmd````cmd`", 1).kind == T.TRIPLE_CMD + @test tok("```cmd````cmd`", 2).kind == T.CMD +end From e29bc343375e033fa682a7532376ef8dd4b574d3 Mon Sep 17 00:00:00 2001 From: ZacNugent Date: Wed, 5 Apr 2017 13:44:10 +0100 Subject: [PATCH 0098/1109] GREATER_COLON -> ISSUPERTYPE (JuliaLang/JuliaSyntax.jl#85) --- JuliaSyntax/src/lexer.jl | 2 +- JuliaSyntax/src/token_kinds.jl | 2 +- JuliaSyntax/test/lexer.jl | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index 3eb1add1b7fd4..66092be8447f2 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -371,7 +371,7 @@ function lex_greater(l::Lexer) elseif accept(l, '=') # >= return emit(l, Tokens.GREATER_EQ) elseif accept(l, ':') # >: - return emit(l, Tokens.GREATER_COLON) + return emit(l, Tokens.ISSUPERTYPE) else # '>' return emit(l, Tokens.GREATER) end diff --git a/JuliaSyntax/src/token_kinds.jl b/JuliaSyntax/src/token_kinds.jl index b7a6e482764f6..edbf167ce8a79 100644 --- a/JuliaSyntax/src/token_kinds.jl +++ b/JuliaSyntax/src/token_kinds.jl @@ -262,7 +262,7 @@ # Level 6 begin_comparison, ISSUBTYPE, # <: - GREATER_COLON, # >: + ISSUPERTYPE, # >: GREATER, # > LESS, # < GREATER_EQ, # >= diff --git a/JuliaSyntax/test/lexer.jl b/JuliaSyntax/test/lexer.jl index 078502ab800b9..650bf24d7b1e7 100644 --- a/JuliaSyntax/test/lexer.jl +++ b/JuliaSyntax/test/lexer.jl @@ -158,7 +158,7 @@ end @test tok("1\\=2", 2).kind == T.BACKSLASH_EQ @test tok("1\$=2", 2).kind == T.EX_OR_EQ @test tok("1-->2", 2).kind == T.RIGHT_ARROW - @test tok("1>:2", 2).kind == T.GREATER_COLON + @test tok("1>:2", 2).kind == T.ISSUPERTYPE end @testset "infix" begin From 9f87d2a96c39b0e16d6a490647fde42d94371c28 Mon Sep 17 00:00:00 2001 From: ZacNugent Date: Tue, 18 Apr 2017 19:26:37 +0100 Subject: [PATCH 0099/1109] some more inviisible keywords (JuliaLang/JuliaSyntax.jl#86) --- JuliaSyntax/src/token_kinds.jl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/JuliaSyntax/src/token_kinds.jl b/JuliaSyntax/src/token_kinds.jl index edbf167ce8a79..593500c80353c 100644 --- a/JuliaSyntax/src/token_kinds.jl +++ b/JuliaSyntax/src/token_kinds.jl @@ -73,6 +73,8 @@ TYPED_VCAT, VCAT, VECT, + x_STR, + x_CMD, end_invisble_keywords, begin_literal, From db15785bf172bf2ee96f2bd423049c2bad165c31 Mon Sep 17 00:00:00 2001 From: ZacNugent Date: Wed, 19 Apr 2017 16:37:51 +0100 Subject: [PATCH 0100/1109] fix FLOAT parsing (JuliaLang/JuliaSyntax.jl#87) * fix FLOAT parsing * test --- JuliaSyntax/src/lexer.jl | 2 ++ JuliaSyntax/test/lexer.jl | 2 ++ 2 files changed, 4 insertions(+) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index 66092be8447f2..935c1aeb319dd 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -558,6 +558,8 @@ function lex_digit(l::Lexer) || peekchar(l) == '@' || peekchar(l) == '`' || peekchar(l) == '"' + || peekchar(l) == ':' + || peekchar(l) == '?' || eof(l)) backup!(l) return emit(l, Tokens.INTEGER) diff --git a/JuliaSyntax/test/lexer.jl b/JuliaSyntax/test/lexer.jl index 650bf24d7b1e7..6e857edb453a7 100644 --- a/JuliaSyntax/test/lexer.jl +++ b/JuliaSyntax/test/lexer.jl @@ -391,6 +391,8 @@ end @test tok("201E+0").kind == Tokens.FLOAT @test tok("2f+0").kind == Tokens.FLOAT @test tok("2048f0").kind == Tokens.FLOAT + @test tok("1.:0").kind == Tokens.FLOAT + @test tok("1.?").kind == Tokens.FLOAT end @testset "1e1" begin From 234e37265e0613266326e83f404fd7f1bed6eee2 Mon Sep 17 00:00:00 2001 From: ZacNugent Date: Fri, 21 Apr 2017 13:52:34 +0100 Subject: [PATCH 0101/1109] fix prime parsing (JuliaLang/JuliaSyntax.jl#89) * fix prime parsing * use accept --- JuliaSyntax/src/lexer.jl | 14 ++++++++++---- JuliaSyntax/test/lexer.jl | 17 +++++++++-------- 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index 935c1aeb319dd..8c35de5184dad 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -7,7 +7,7 @@ using Compat import Compat.String import ..Tokens -import ..Tokens: Token, Kind, TokenError, UNICODE_OPS, EMPTY_TOKEN +import ..Tokens: Token, Kind, TokenError, UNICODE_OPS, EMPTY_TOKEN, isliteral import ..Tokens: FUNCTION, ABSTRACT, IDENTIFIER, BAREMODULE, BEGIN, BITSTYPE, BREAK, CATCH, CONST, CONTINUE, DO, ELSE, ELSEIF, END, EXPORT, FALSE, FINALLY, FOR, FUNCTION, GLOBAL, LET, LOCAL, IF, IMMUTABLE, @@ -625,11 +625,17 @@ function lex_prime(l) if l.last_token == Tokens.IDENTIFIER || l.last_token == Tokens.DOT || l.last_token == Tokens.RPAREN || - l.last_token == Tokens.RSQUARE || l.last_token == Tokens.PRIME + l.last_token == Tokens.RSQUARE || + l.last_token == Tokens.RBRACE || + l.last_token == Tokens.PRIME || isliteral(l.last_token) return emit(l, Tokens.PRIME) else - if peekchar(l) == '\'' - return emit(l, Tokens.PRIME) + if accept(l, '\'') + if accept(l, '\'') + return emit(l, Tokens.CHAR) + else + return emit_error(l, Tokens.EOF_CHAR) + end end while true c = readchar(l) diff --git a/JuliaSyntax/test/lexer.jl b/JuliaSyntax/test/lexer.jl index 6e857edb453a7..468435fbde52b 100644 --- a/JuliaSyntax/test/lexer.jl +++ b/JuliaSyntax/test/lexer.jl @@ -222,14 +222,15 @@ end D = ImageMagick.load(fn) """)) @test tokens[16].val==tokens[17].val=="'" - @test all(x->x.val=="'", tok("''", 1:2)) - @test all(x->x.val=="'", tok("'''", 1:3)) - @test all(x->x.val=="'", tok("''''", 1:4)) -end - -@testset "in/isa bytelength" begin - t = tok("x in y", 3) - @test t.endbyte - t.startbyte + 1 == 2 + @test tok("'a'").val == "'a'" + @test tok("'a'").kind == Tokens.CHAR + @test tok("'''").val == "'''" + @test tok("'''").kind == Tokens.CHAR + @test tok("''''", 1).kind == Tokens.CHAR + @test tok("''''", 2).kind == Tokens.PRIME + @test tok("()'", 3).kind == Tokens.PRIME + @test tok("{}'", 3).kind == Tokens.PRIME + @test tok("[]'", 3).kind == Tokens.PRIME end @testset "keywords" begin From 1141165df6d70346f4d993363ed4770ee5f1e1ba Mon Sep 17 00:00:00 2001 From: ZacNugent Date: Mon, 1 May 2017 09:57:23 +0100 Subject: [PATCH 0102/1109] use where as operator, fix typo (JuliaLang/JuliaSyntax.jl#90) * make where an operator * fix typo --- JuliaSyntax/src/token_kinds.jl | 12 ++++++++---- JuliaSyntax/test/lexer.jl | 5 ++++- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/JuliaSyntax/src/token_kinds.jl b/JuliaSyntax/src/token_kinds.jl index 593500c80353c..ffea6751d480e 100644 --- a/JuliaSyntax/src/token_kinds.jl +++ b/JuliaSyntax/src/token_kinds.jl @@ -44,11 +44,10 @@ TYPE, TYPEALIAS, USING, - WHERE, WHILE, end_keywords, - begin_invisble_keywords, + begin_invisible_keywords, BLOCK, CALL, CCALL, @@ -75,7 +74,7 @@ VECT, x_STR, x_CMD, - end_invisble_keywords, + end_invisible_keywords, begin_literal, LITERAL, # general @@ -746,13 +745,18 @@ HALFWIDTH_UPWARDS_ARROW, # ↑ HALFWIDTH_DOWNWARDS_ARROW, # ↓ end_power, - + # Level 14 begin_decl, DECLARATION, # :: end_decl, # Level 15 + begin_where, + WHERE, + end_where, + + # Level 16 begin_dot, DOT,# . end_dot, diff --git a/JuliaSyntax/test/lexer.jl b/JuliaSyntax/test/lexer.jl index 468435fbde52b..a816aa44dd5f5 100644 --- a/JuliaSyntax/test/lexer.jl +++ b/JuliaSyntax/test/lexer.jl @@ -271,7 +271,6 @@ end "type", "typealias", "using", - "where", "while"] @test T.kind(tok(kw)) == T.KEYWORD @@ -418,3 +417,7 @@ end @test tok("```cmd````cmd`", 1).kind == T.TRIPLE_CMD @test tok("```cmd````cmd`", 2).kind == T.CMD end + +@testset "where" begin + @test tok("a where b", 3).kind == T.WHERE +end From e75e84e2eaa6e8839847c0d299f1eb3c95fd7864 Mon Sep 17 00:00:00 2001 From: David Anthoff Date: Sun, 14 May 2017 06:22:19 -0700 Subject: [PATCH 0103/1109] Replace all Int64 with Int (JuliaLang/JuliaSyntax.jl#91) --- JuliaSyntax/README.md | 4 ++-- JuliaSyntax/src/lexer.jl | 8 ++++---- JuliaSyntax/src/token.jl | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index 8105e2bf965bd..83654a72b0b0f 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -40,8 +40,8 @@ The API for a `Token` (non exported from the `Tokenize.Tokens` module) is. ```julia startpos(t)::Tuple{Int, Int} # row and column where the token start endpos(t)::Tuple{Int, Int} # row and column where the token ends -startbyte(T)::Int64 # byte offset where the token start -endbyte(t)::Int64 # byte offset where the token ends +startbyte(T)::Int # byte offset where the token start +endbyte(t)::Int # byte offset where the token ends untokenize(t)::String # string representation of the token kind(t)::Token.Kind # kind of the token exactkind(t)::Token.Kind # exact kind of the token diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index 8c35de5184dad..b41a99b2a9733 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -29,17 +29,17 @@ type Lexer{IO_t <: IO} token_start_row::Int token_start_col::Int - prevpos::Int64 - token_startpos::Int64 + prevpos::Int + token_startpos::Int current_row::Int current_col::Int - current_pos::Int64 + current_pos::Int last_token::Tokens.Kind end -Lexer(io) = Lexer(io, 1, 1, Int64(-1), Int64(0), 1, 1, Int64(1), Tokens.ERROR) +Lexer(io) = Lexer(io, 1, 1, -1, 0, 1, 1, 1, Tokens.ERROR) Lexer(str::AbstractString) = Lexer(IOBuffer(str)) """ diff --git a/JuliaSyntax/src/token.jl b/JuliaSyntax/src/token.jl index 402f3d3ae4a54..0c55e3d0964af 100644 --- a/JuliaSyntax/src/token.jl +++ b/JuliaSyntax/src/token.jl @@ -49,14 +49,14 @@ immutable Token # Offsets into a string or buffer startpos::Tuple{Int, Int} # row, col where token starts /end, col is a string index endpos::Tuple{Int, Int} - startbyte::Int64 # The byte where the token start in the buffer - endbyte::Int64 # The byte where the token ended in the buffer + startbyte::Int # The byte where the token start in the buffer + endbyte::Int # The byte where the token ended in the buffer val::Compat.UTF8String # The actual string of the token token_error::TokenError end function Token(kind::Kind, startposition::Tuple{Int, Int}, endposition::Tuple{Int, Int}, - startbyte::Int64, endbyte::Int64, val::String) + startbyte::Int, endbyte::Int, val::String) Token(kind, startposition, endposition, startbyte, endbyte, val, NO_ERR) end Token() = Token(ERROR, (0,0), (0,0), 0, 0, "", UNKNOWN) From 3943187c2aae72b776f1e4359bbe8820830fe556 Mon Sep 17 00:00:00 2001 From: Elliot Saba Date: Tue, 16 May 2017 22:36:59 -0700 Subject: [PATCH 0104/1109] Update CI URLs to point to new caching infrastructure (JuliaLang/JuliaSyntax.jl#92) --- JuliaSyntax/appveyor.yml | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/JuliaSyntax/appveyor.yml b/JuliaSyntax/appveyor.yml index b503fc06d47b2..501fd3bc79216 100644 --- a/JuliaSyntax/appveyor.yml +++ b/JuliaSyntax/appveyor.yml @@ -1,9 +1,9 @@ environment: matrix: - - JULIAVERSION: "julialang/bin/winnt/x86/0.5/julia-0.5-latest-win32.exe" - - JULIAVERSION: "julialang/bin/winnt/x64/0.5/julia-0.5-latest-win64.exe" - - JULIAVERSION: "julianightlies/bin/winnt/x86/julia-latest-win32.exe" - - JULIAVERSION: "julianightlies/bin/winnt/x64/julia-latest-win64.exe" + - JULIA_URL: "https://julialang-s3.julialang.org/bin/winnt/x86/0.5/julia-0.5-latest-win32.exe" + - JULIA_URL: "https://julialang-s3.julialang.org/bin/winnt/x64/0.5/julia-0.5-latest-win64.exe" + - JULIA_URL: "https://julialangnightlies-s3.julialang.org/bin/winnt/x86/julia-latest-win32.exe" + - JULIA_URL: "https://julialangnightlies-s3.julialang.org/bin/winnt/x64/julia-latest-win64.exe" branches: only: @@ -17,9 +17,10 @@ notifications: on_build_status_changed: false install: + - ps: "[System.Net.ServicePointManager]::SecurityProtocol = [System.Net.SecurityProtocolType]::Tls12" # Download most recent Julia Windows binary - ps: (new-object net.webclient).DownloadFile( - $("http://s3.amazonaws.com/"+$env:JULIAVERSION), + $env:JULIA_URL, "C:\projects\julia-binary.exe") # Run installer silently, output to C:\projects\julia - C:\projects\julia-binary.exe /S /D=C:\projects\julia From b474b3c3ec404dea4424f803a7b4cf5cdb851cb1 Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Mon, 24 Jul 2017 03:30:31 -0400 Subject: [PATCH 0105/1109] Respect an IO's starting position (JuliaLang/JuliaSyntax.jl#95) when construction a lexer from an IO object --- JuliaSyntax/src/lexer.jl | 16 +++++++++------- JuliaSyntax/test/lexer.jl | 10 ++++++++-- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index b41a99b2a9733..ff45ece7e607f 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -25,6 +25,7 @@ iswhitespace(c::Char) = Base.UTF8proc.isspace(c) type Lexer{IO_t <: IO} io::IO_t + io_startpos::Int token_start_row::Int token_start_col::Int @@ -39,7 +40,7 @@ type Lexer{IO_t <: IO} last_token::Tokens.Kind end -Lexer(io) = Lexer(io, 1, 1, -1, 0, 1, 1, 1, Tokens.ERROR) +Lexer(io) = Lexer(io, position(io), 1, 1, -1, 0, 1, 1, position(io), Tokens.ERROR) Lexer(str::AbstractString) = Lexer(IOBuffer(str)) """ @@ -57,13 +58,13 @@ Base.eltype{IO_t}(::Type{Lexer{IO_t}}) = Token function Base.start(l::Lexer) seekstart(l) - l.token_startpos = 0 + l.token_startpos = position(l) l.token_start_row = 1 l.token_start_col = 1 l.current_row = 1 l.current_col = 1 - l.current_pos = 1 + l.current_pos = l.io_startpos false end @@ -106,7 +107,7 @@ Set the lexer's previous position. """ prevpos!(l::Lexer, i::Integer) = l.prevpos = i -Base.seekstart(l::Lexer) = seekstart(l.io) +Base.seekstart(l::Lexer) = seek(l.io, l.io_startpos) """ seek2startpos!(l::Lexer) @@ -127,7 +128,7 @@ peekchar(l::Lexer) = peekchar(l.io) Returns the current position. """ -position(l::Lexer) = Base.position(l.io) +Base.position(l::Lexer) = Base.position(l.io) """ eof(l::Lexer) @@ -688,8 +689,9 @@ end function read_string(l::Lexer, kind::Tokens.Kind) while true c = readchar(l) - if c == '\\' && eof(readchar(l)) - return false + if c == '\\' + eof(readchar(l)) && return false + continue end if c == '"' if kind == Tokens.STRING diff --git a/JuliaSyntax/test/lexer.jl b/JuliaSyntax/test/lexer.jl index a816aa44dd5f5..1d803aee2c71c 100644 --- a/JuliaSyntax/test/lexer.jl +++ b/JuliaSyntax/test/lexer.jl @@ -12,7 +12,7 @@ tok(str, i = 1) = collect(tokenize(str))[i] @test Lexers.readchar(l) == 'a' @test Lexers.prevpos(l) == 0 - @test l.current_pos == 1 + @test l.current_pos == 0 l_old = l @test Lexers.prevchar(l) == 'a' @test l == l_old @@ -21,7 +21,7 @@ tok(str, i = 1) = collect(tokenize(str))[i] Lexers.backup!(l) @test Lexers.prevpos(l) == -1 - @test l.current_pos == 1 + @test l.current_pos == 0 end end # testset @@ -421,3 +421,9 @@ end @testset "where" begin @test tok("a where b", 3).kind == T.WHERE end + +@testset "IO position" begin + io = IOBuffer("#1+1") + skip(io, 1) + @test length(collect(tokenize(io))) == 4 +end \ No newline at end of file From 14c0170a571652ba625a727e052f318f252c96ca Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Fri, 28 Jul 2017 12:49:40 -0400 Subject: [PATCH 0106/1109] Fix bug in recent IO PR (JuliaLang/JuliaSyntax.jl#96) Somehow I had this locally, but not on the PR. Responsible for the test failures in https://github.com/ZacLN/CSTParser.jl/pull/24. --- JuliaSyntax/src/lexer.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index ff45ece7e607f..1e52d17f6b27e 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -40,7 +40,7 @@ type Lexer{IO_t <: IO} last_token::Tokens.Kind end -Lexer(io) = Lexer(io, position(io), 1, 1, -1, 0, 1, 1, position(io), Tokens.ERROR) +Lexer(io) = Lexer(io, position(io), 1, 1, -1, position(io), 1, 1, position(io), Tokens.ERROR) Lexer(str::AbstractString) = Lexer(IOBuffer(str)) """ From 5f75f4dfdfeb3d388c1e1b40ad01871918e87926 Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Fri, 28 Jul 2017 13:35:59 -0400 Subject: [PATCH 0107/1109] Handle more complicated interpolations correctly (JuliaLang/JuliaSyntax.jl#94) --- JuliaSyntax/src/lexer.jl | 106 +++++++++++++++++++------------------- JuliaSyntax/test/lexer.jl | 15 +++++- 2 files changed, 65 insertions(+), 56 deletions(-) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index 1e52d17f6b27e..a7ed21899fb65 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -17,7 +17,7 @@ import ..Tokens: FUNCTION, ABSTRACT, IDENTIFIER, BAREMODULE, BEGIN, BITSTYPE, BR export tokenize -ishex(c::Char) = isdigit(c) || ('a' <= c <= 'f') || ('A' <= c <= 'F') || c == +ishex(c::Char) = isdigit(c) || ('a' <= c <= 'f') || ('A' <= c <= 'F') || c == '_' isbinary(c::Char) = c == '0' || c == '1' || c == '_' isoctal(c::Char) = '0' ≤ c ≤ '7' || c == '_' @@ -325,13 +325,13 @@ function lex_whitespace(l::Lexer) return emit(l, Tokens.WHITESPACE) end -function lex_comment(l::Lexer) +function lex_comment(l::Lexer, doemit=true) if peekchar(l) != '=' while true c = readchar(l) if c == '\n' || eof(c) backup!(l) - return emit(l, Tokens.COMMENT) + return doemit ? emit(l, Tokens.COMMENT) : EMPTY_TOKEN end end else @@ -339,7 +339,7 @@ function lex_comment(l::Lexer) n_start, n_end = 1, 0 while true if eof(c) - return emit_error(l, Tokens.EOF_MULTICOMMENT) + return doemit ? emit_error(l, Tokens.EOF_MULTICOMMENT) : EMPTY_TOKEN end nc = readchar(l) if c == '#' && nc == '=' @@ -348,7 +348,7 @@ function lex_comment(l::Lexer) n_end += 1 end if n_start == n_end - return emit(l, Tokens.COMMENT) + return doemit ? emit(l, Tokens.COMMENT) : EMPTY_TOKEN end c = nc end @@ -626,8 +626,8 @@ function lex_prime(l) if l.last_token == Tokens.IDENTIFIER || l.last_token == Tokens.DOT || l.last_token == Tokens.RPAREN || - l.last_token == Tokens.RSQUARE || - l.last_token == Tokens.RBRACE || + l.last_token == Tokens.RSQUARE || + l.last_token == Tokens.RBRACE || l.last_token == Tokens.PRIME || isliteral(l.last_token) return emit(l, Tokens.PRIME) else @@ -685,7 +685,28 @@ function lex_quote(l::Lexer, doemit=true) end end -# We just consumed a " or a """ +function string_terminated(l, c, kind::Tokens.Kind) + if (kind == Tokens.STRING || kind == Tokens.TRIPLE_STRING) && c == '"' + if kind == Tokens.STRING + return true + else + if accept(l, "\"") && accept(l, "\"") + return true + end + end + elseif (kind == Tokens.CMD || kind == Tokens.TRIPLE_CMD) && c == '`' + if kind == Tokens.CMD + return true + else + if accept(l, "\`") && accept(l, "\`") + return true + end + end + end + return false +end + +# We just consumed a ", """, `, or ``` function read_string(l::Lexer, kind::Tokens.Kind) while true c = readchar(l) @@ -693,27 +714,17 @@ function read_string(l::Lexer, kind::Tokens.Kind) eof(readchar(l)) && return false continue end - if c == '"' - if kind == Tokens.STRING - return true - else - if accept(l, "\"") && accept(l, "\"") - return true - end - end + if string_terminated(l, c, kind) + return true elseif eof(c) return false end if c == '$' c = readchar(l) - if c == '"' - if kind == Tokens.STRING - return true - else - if accept(l, "\"") && accept(l, "\"") - return true - end - end + if string_terminated(l, c, kind) + return true + elseif eof(c) + return false elseif c == '(' o = 1 while o > 0 @@ -725,6 +736,10 @@ function read_string(l::Lexer, kind::Tokens.Kind) o -= 1 elseif c == '"' lex_quote(l, false) + elseif c == '`' + lex_cmd(l, false) + elseif c == '#' + lex_comment(l, false) end end end @@ -771,40 +786,23 @@ function lex_dot(l::Lexer) end # A ` has been consumed -function lex_cmd(l::Lexer) - if accept(l, '`') - if accept(l, '`') # TRIPLE_CMD - while true - c = readchar(l) - if c == '`' - c = readchar(l) - if c == '`' - c = readchar(l) - if c == '`' - return emit(l, Tokens.TRIPLE_CMD) - end - end - end - if eof(c) - return emit_error(l, Tokens.EOF_CMD) - end - end - else # empty CMD - return emit(l, Tokens.CMD) - end - else # CMD - while true - c = readchar(l) - if c == '`' - return emit(l, Tokens.CMD) - elseif eof(c) - return emit_error(l, Tokens.EOF_CMD) - end +# N.B.: cmds do not currently have special parser interpolation support +function lex_cmd(l::Lexer, doemit=true) + kind = Tokens.CMD + if accept(l, '`') # `` + if accept(l, '`') # ``` + kind = Tokens.TRIPLE_CMD + else # empty cmd + return doemit ? emit(l, Tokens.CMD) : EMPTY_TOKEN end end + while true + c = readchar(l) + eof(c) && return (doemit ? emit_error(l, Tokens.EOF_CMD) : EMPTY_TOKEN) + string_terminated(l, c, kind) && return (doemit ? emit(l, kind) : EMPTY_TOKEN) + end end - function tryread(l, str, k, c) for s in str c = readchar(l) diff --git a/JuliaSyntax/test/lexer.jl b/JuliaSyntax/test/lexer.jl index 1d803aee2c71c..87f2b8c701180 100644 --- a/JuliaSyntax/test/lexer.jl +++ b/JuliaSyntax/test/lexer.jl @@ -321,7 +321,7 @@ end @testset "inferred" begin l = tokenize("abc") - Base.Test.@inferred Tokenize.Lexers.next_token(l) + @test Base.Test.@inferred Tokenize.Lexers.next_token(l).kind == T.IDENTIFIER end @testset "modifying function names (!) followed by operator" begin @@ -426,4 +426,15 @@ end io = IOBuffer("#1+1") skip(io, 1) @test length(collect(tokenize(io))) == 4 -end \ No newline at end of file +end + +@testset "complicated interpolations" begin + @test length(collect(tokenize("\"\$(())\""))) == 2 + @test length(collect(tokenize("\"\$(#=inline ) comment=#\"\")\""))) == 2 + @test length(collect(tokenize("\"\$(string(`inline ')' cmd`)\"\")\""))) == 2 + # These would require special interpolation support in the parse (Base issue #3150). + # If that gets implemented, thses should all be adjust to `== 2` + @test length(collect(tokenize("`\$((``))`"))) == 3 + @test length(collect(tokenize("`\$(#=inline ) comment=#``)`"))) == 3 + @test length(collect(tokenize("`\$(\"inline ) string\"*string(``))`"))) == 3 +end From 900cd0930b63ebd6431a4b2373809fc70a52cc85 Mon Sep 17 00:00:00 2001 From: ZacNugent Date: Mon, 31 Jul 2017 13:47:34 +0100 Subject: [PATCH 0108/1109] add 5 missing multiplication symbols (JuliaLang/JuliaSyntax.jl#99) * add 5 missing multiplication symbols * ws fixes --- JuliaSyntax/src/token_kinds.jl | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/JuliaSyntax/src/token_kinds.jl b/JuliaSyntax/src/token_kinds.jl index ffea6751d480e..ce4ee41afd225 100644 --- a/JuliaSyntax/src/token_kinds.jl +++ b/JuliaSyntax/src/token_kinds.jl @@ -704,6 +704,11 @@ LOGICAL_AND_WITH_DOUBLE_UNDERBAR, # ⩠ TRANSVERSAL_INTERSECTION, # ⫛ MULTISET_MULTIPLICATION, # ⊍ + WHITE_RIGHT_POINTING_TRIANGLE, # ▷ + JOIN, # ⨝ + LEFT_OUTER_JOIN, # ⟕ + RIGHT_OUTER_JOIN, # ⟖ + FULL_OUTER_JOIN, # ⟗ end_times, # Level 12 @@ -1289,6 +1294,11 @@ const UNICODE_OPS = Dict{Char, Kind}( '⩠' => LOGICAL_AND_WITH_DOUBLE_UNDERBAR, '⫛' => TRANSVERSAL_INTERSECTION, '⊍' => MULTISET_MULTIPLICATION, +'▷' => WHITE_RIGHT_POINTING_TRIANGLE, +'⨝' => JOIN, +'⟕' => LEFT_OUTER_JOIN, +'⟖' => RIGHT_OUTER_JOIN, +'⟗' => FULL_OUTER_JOIN, '^' => CIRCUMFLEX_ACCENT, '↑' => UPWARDS_ARROW, '↓' => DOWNWARDS_ARROW, From 6e9ba60aa2e6d0dc1f2470e75dd7cfefbecfc457 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Mon, 31 Jul 2017 18:18:15 +0200 Subject: [PATCH 0109/1109] use String instead of Compat.UTF8String (JuliaLang/JuliaSyntax.jl#98) --- JuliaSyntax/src/token.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaSyntax/src/token.jl b/JuliaSyntax/src/token.jl index 0c55e3d0964af..8795a87490f38 100644 --- a/JuliaSyntax/src/token.jl +++ b/JuliaSyntax/src/token.jl @@ -51,7 +51,7 @@ immutable Token endpos::Tuple{Int, Int} startbyte::Int # The byte where the token start in the buffer endbyte::Int # The byte where the token ended in the buffer - val::Compat.UTF8String # The actual string of the token + val::String # The actual string of the token token_error::TokenError end From b775a7b43720737e19eaa9c0c168158db1f94580 Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Mon, 31 Jul 2017 13:37:41 -0400 Subject: [PATCH 0110/1109] Accepted empty char literals in the lexer (JuliaLang/JuliaSyntax.jl#97) The one thing I know is that EOF_CHAR is the wrong error here, because it's not actually an EOF (it's a grammar error). However, given that we don't look at the contents of the char literal at all here (and basically accept arbitrary string), I think this error should happen at a higher level, together will all other such errors. As a result, I propose removing it here. --- JuliaSyntax/src/lexer.jl | 6 +++++- JuliaSyntax/test/lexer.jl | 2 ++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index a7ed21899fb65..0de3641baebf0 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -635,7 +635,11 @@ function lex_prime(l) if accept(l, '\'') return emit(l, Tokens.CHAR) else - return emit_error(l, Tokens.EOF_CHAR) + # Empty char literal + # Arguably this should be an error here, but we generally + # look at the contents of the char literal in the parser, + # so we defer erroring until there. + return emit(l, Tokens.CHAR) end end while true diff --git a/JuliaSyntax/test/lexer.jl b/JuliaSyntax/test/lexer.jl index 87f2b8c701180..d1faaf3fd3d5d 100644 --- a/JuliaSyntax/test/lexer.jl +++ b/JuliaSyntax/test/lexer.jl @@ -224,6 +224,8 @@ end @test tokens[16].val==tokens[17].val=="'" @test tok("'a'").val == "'a'" @test tok("'a'").kind == Tokens.CHAR + @test tok("''").val == "''" + @test tok("''").kind == Tokens.CHAR @test tok("'''").val == "'''" @test tok("'''").kind == Tokens.CHAR @test tok("''''", 1).kind == Tokens.CHAR From c9228e265173e054242c63c9c71ac8f4c419c5bd Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Thu, 3 Aug 2017 16:22:59 +0200 Subject: [PATCH 0111/1109] fix deprecation and drop 0.5 (JuliaLang/JuliaSyntax.jl#100) * fix deprecation and drop 0.5 * remove compat --- JuliaSyntax/.travis.yml | 2 +- JuliaSyntax/REQUIRE | 3 +- JuliaSyntax/appveyor.yml | 4 +- JuliaSyntax/benchmark/lex_base.jl | 3 +- JuliaSyntax/src/Tokenize.jl | 2 +- JuliaSyntax/src/_precompile.jl | 110 +++++++++++++++--------------- JuliaSyntax/src/lexer.jl | 15 ++-- JuliaSyntax/src/token.jl | 38 +++++------ JuliaSyntax/test/lex_yourself.jl | 3 +- 9 files changed, 89 insertions(+), 91 deletions(-) diff --git a/JuliaSyntax/.travis.yml b/JuliaSyntax/.travis.yml index 11496537dc964..575c667fc3bf2 100644 --- a/JuliaSyntax/.travis.yml +++ b/JuliaSyntax/.travis.yml @@ -4,7 +4,7 @@ os: - linux - osx julia: - - 0.5 + - 0.6 - nightly notifications: email: false diff --git a/JuliaSyntax/REQUIRE b/JuliaSyntax/REQUIRE index 9e74e5b21d7d5..137767a42af4a 100644 --- a/JuliaSyntax/REQUIRE +++ b/JuliaSyntax/REQUIRE @@ -1,2 +1 @@ -julia 0.5 -Compat 0.9.5 +julia 0.6 diff --git a/JuliaSyntax/appveyor.yml b/JuliaSyntax/appveyor.yml index 501fd3bc79216..7a4151d2228af 100644 --- a/JuliaSyntax/appveyor.yml +++ b/JuliaSyntax/appveyor.yml @@ -1,7 +1,7 @@ environment: matrix: - - JULIA_URL: "https://julialang-s3.julialang.org/bin/winnt/x86/0.5/julia-0.5-latest-win32.exe" - - JULIA_URL: "https://julialang-s3.julialang.org/bin/winnt/x64/0.5/julia-0.5-latest-win64.exe" + - JULIA_URL: "https://julialang-s3.julialang.org/bin/winnt/x86/0.6/julia-0.6-latest-win32.exe" + - JULIA_URL: "https://julialang-s3.julialang.org/bin/winnt/x64/0.6/julia-0.6-latest-win64.exe" - JULIA_URL: "https://julialangnightlies-s3.julialang.org/bin/winnt/x86/julia-latest-win32.exe" - JULIA_URL: "https://julialangnightlies-s3.julialang.org/bin/winnt/x64/julia-latest-win64.exe" diff --git a/JuliaSyntax/benchmark/lex_base.jl b/JuliaSyntax/benchmark/lex_base.jl index 891c28071037c..4c13f43e0d495 100644 --- a/JuliaSyntax/benchmark/lex_base.jl +++ b/JuliaSyntax/benchmark/lex_base.jl @@ -28,7 +28,8 @@ function testall(srcdir::AbstractString) fname = splitdir(jlpath)[end] buf = IOBuffer() - write(buf, open(readstring, jlpath)) + print(buf, open(read, jlpath)) + seek(buf, 0) tot_files += 1 tot_time += @elapsed tokens = collect(Tokenize.tokenize(buf)) tot_tokens += length(tokens) diff --git a/JuliaSyntax/src/Tokenize.jl b/JuliaSyntax/src/Tokenize.jl index 9d9f3e41303ee..fca523a90a01b 100644 --- a/JuliaSyntax/src/Tokenize.jl +++ b/JuliaSyntax/src/Tokenize.jl @@ -1,4 +1,4 @@ - __precompile__() +__precompile__() module Tokenize diff --git a/JuliaSyntax/src/_precompile.jl b/JuliaSyntax/src/_precompile.jl index f9d17376bad4e..2d02e07826c26 100644 --- a/JuliaSyntax/src/_precompile.jl +++ b/JuliaSyntax/src/_precompile.jl @@ -1,68 +1,70 @@ +isdefined(Base, :GenericIOBuffer) ? (import Base.GenericIOBuffer) : (GenericIOBuffer{T} = Base.AbstractIOBuffer{T}) + function _precompile_() ccall(:jl_generating_output, Cint, ()) == 1 || return nothing precompile(Tokenize.Lexers.is_cat_id_start, (Char, Int32,)) - precompile(Tokenize.Lexers.peekchar, (Base.AbstractIOBuffer{Array{UInt8, 1}},)) + precompile(Tokenize.Lexers.peekchar, (GenericIOBuffer{Array{UInt8, 1}},)) precompile(Tokenize.Lexers.is_identifier_char, (Char,)) - precompile(Tokenize.Lexers.tryread, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}}, Tuple{Char, Char}, Tokenize.Tokens.Kind,)) - precompile(Tokenize.Lexers.lex_greater, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.lex_prime, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.tryread, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}}, Tuple{Char, Char, Char, Char, Char, Char}, Tokenize.Tokens.Kind,)) - precompile(Tokenize.Lexers.tryread, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}}, Tuple{Char, Char, Char, Char, Char}, Tokenize.Tokens.Kind,)) - precompile(Tokenize.Lexers.lex_digit, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.lex_identifier, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}}, Char,)) - precompile(Tokenize.Lexers.lex_less, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.tryread, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}}, Tuple{Char, Char, Char}, Tokenize.Tokens.Kind,)) - precompile(Tokenize.Lexers.tryread, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}}, Tuple{Char, Char, Char, Char, Char, Char, Char, Char}, Tokenize.Tokens.Kind,)) - precompile(Tokenize.Lexers.lex_forwardslash, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.lex_minus, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.tryread, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}}, Char, Tokenize.Tokens.Kind,)) - precompile(Tokenize.Lexers.tryread, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}}, Tuple{Char, Char, Char, Char}, Tokenize.Tokens.Kind,)) - precompile(Tokenize.Lexers.start, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.lex_xor, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.tryread, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}}, Tuple{Char, Char}, Tokenize.Tokens.Kind,)) + precompile(Tokenize.Lexers.lex_greater, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.lex_prime, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.tryread, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}}, Tuple{Char, Char, Char, Char, Char, Char}, Tokenize.Tokens.Kind,)) + precompile(Tokenize.Lexers.tryread, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}}, Tuple{Char, Char, Char, Char, Char}, Tokenize.Tokens.Kind,)) + precompile(Tokenize.Lexers.lex_digit, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.lex_identifier, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}}, Char,)) + precompile(Tokenize.Lexers.lex_less, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.tryread, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}}, Tuple{Char, Char, Char}, Tokenize.Tokens.Kind,)) + precompile(Tokenize.Lexers.tryread, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}}, Tuple{Char, Char, Char, Char, Char, Char, Char, Char}, Tokenize.Tokens.Kind,)) + precompile(Tokenize.Lexers.lex_forwardslash, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.lex_minus, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.tryread, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}}, Char, Tokenize.Tokens.Kind,)) + precompile(Tokenize.Lexers.tryread, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}}, Tuple{Char, Char, Char, Char}, Tokenize.Tokens.Kind,)) + precompile(Tokenize.Lexers.start, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.lex_xor, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) precompile(Tokenize.Lexers.Type, (Type{Tokenize.Lexers.Lexer}, String,)) - precompile(Tokenize.Lexers.tryread, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}}, Tuple{Char}, Tokenize.Tokens.Kind,)) - precompile(Tokenize.Lexers.lex_equal, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.readchar, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.lex_bar, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.readrest, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.backup!, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.tryread, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}}, Tuple{Char}, Tokenize.Tokens.Kind,)) + precompile(Tokenize.Lexers.lex_equal, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.readchar, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.lex_bar, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.readrest, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.backup!, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) precompile(Tokenize.Lexers.ishex, (Char,)) - precompile(Tokenize.Lexers.next_token, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.next_token, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) precompile(Tokenize.Tokens.untokenize, (Array{Tokenize.Tokens.Token, 1},)) - precompile(Tokenize.Lexers.lex_quote, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.lex_quote, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) precompile(Tokenize.Tokens.kind, (Tokenize.Tokens.Token,)) - precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}}, String,)) - precompile(Tokenize.Lexers.lex_plus, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.tryread, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}}, Tuple{Char, Char, Char, Char, Char, Char, Char}, Tokenize.Tokens.Kind,)) - precompile(Tokenize.Lexers.lex_dot, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.lex_exclaim, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.lex_colon, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Tokens.untokenize, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.lex_percent, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.lex_comment, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.lex_cmd, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.lex_division, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.lex_circumflex, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.lex_backslash, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.read_string, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}}, Tokenize.Tokens.Kind,)) - precompile(Tokenize.Lexers.lex_star, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.accept_batch, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}}, Function,)) - precompile(Tokenize.Lexers.extract_tokenstring, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.lex_amper, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},typeof( Base.UTF8proc.isdigit),)) - precompile(Tokenize.Lexers.lex_whitespace, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}}, String,)) + precompile(Tokenize.Lexers.lex_plus, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.tryread, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}}, Tuple{Char, Char, Char, Char, Char, Char, Char}, Tokenize.Tokens.Kind,)) + precompile(Tokenize.Lexers.lex_dot, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.lex_exclaim, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.lex_colon, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Tokens.untokenize, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.lex_percent, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.lex_comment, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.lex_cmd, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.lex_division, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.lex_circumflex, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.lex_backslash, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.read_string, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}}, Tokenize.Tokens.Kind,)) + precompile(Tokenize.Lexers.lex_star, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.accept_batch, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}}, Function,)) + precompile(Tokenize.Lexers.extract_tokenstring, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.lex_amper, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},typeof( Base.UTF8proc.isdigit),)) + precompile(Tokenize.Lexers.lex_whitespace, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) precompile(Tokenize.Lexers.iswhitespace, (Char,)) - precompile(Tokenize.Lexers.next, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}}, Bool,)) - precompile(Tokenize.Lexers.start_token!, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},typeof( Tokenize.Lexers.iswhitespace),)) - precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},typeof( Tokenize.Lexers.is_identifier_char),)) + precompile(Tokenize.Lexers.next, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}}, Bool,)) + precompile(Tokenize.Lexers.start_token!, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},typeof( Tokenize.Lexers.iswhitespace),)) + precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},typeof( Tokenize.Lexers.is_identifier_char),)) precompile(Tokenize.Tokens.endpos, (Tokenize.Tokens.Token,)) - precompile(Tokenize.Lexers.done, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}}, Bool,)) - precompile(Tokenize.Lexers.Type, (Type{Tokenize.Lexers.Lexer}, Base.AbstractIOBuffer{Array{UInt8, 1}},)) + precompile(Tokenize.Lexers.done, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}}, Bool,)) + precompile(Tokenize.Lexers.Type, (Type{Tokenize.Lexers.Lexer}, GenericIOBuffer{Array{UInt8, 1}},)) precompile(Tokenize.Tokens.startpos, (Tokenize.Tokens.Token,)) precompile(Tokenize.Tokens.untokenize, (Tokenize.Tokens.Token,)) - precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},typeof( Tokenize.Lexers.ishex),)) - precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}}, Char,)) - precompile(Tokenize.Lexers._doret, (Char, Tokenize.Lexers.Lexer{Base.AbstractIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},typeof( Tokenize.Lexers.ishex),)) + precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}}, Char,)) + precompile(Tokenize.Lexers._doret, (Char, Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) precompile(Tokenize.Lexers.is_identifier_start_char, (Char,)) end diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index 0de3641baebf0..2541ae8cd2f48 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -1,10 +1,7 @@ module Lexers include("utilities.jl") -global const charstore = IOBuffer() - -using Compat -import Compat.String +global const charstore = IOBuffer() # TODO thread safety? import ..Tokens import ..Tokens: Token, Kind, TokenError, UNICODE_OPS, EMPTY_TOKEN, isliteral @@ -23,7 +20,7 @@ isbinary(c::Char) = c == '0' || c == '1' || c == '_' isoctal(c::Char) = '0' ≤ c ≤ '7' || c == '_' iswhitespace(c::Char) = Base.UTF8proc.isspace(c) -type Lexer{IO_t <: IO} +mutable struct Lexer{IO_t <: IO} io::IO_t io_startpos::Int @@ -52,9 +49,9 @@ Returns an `Iterable` containing the tokenized input. Can be reverted by e.g. tokenize(x) = Lexer(x) # Iterator interface -Base.iteratorsize{IO_t}(::Type{Lexer{IO_t}}) = Base.SizeUnknown() -Base.iteratoreltype{IO_t}(::Type{Lexer{IO_t}}) = Base.HasEltype() -Base.eltype{IO_t}(::Type{Lexer{IO_t}}) = Token +Base.iteratorsize(::Type{Lexer{IO_t}}) where {IO_t} = Base.SizeUnknown() +Base.iteratoreltype(::Type{Lexer{IO_t}}) where {IO_t} = Base.HasEltype() +Base.eltype(::Type{Lexer{IO_t}}) where {IO_t} = Token function Base.start(l::Lexer) seekstart(l) @@ -168,7 +165,7 @@ Returns the next character and increments the current position. """ function readchar end -function readchar{I <: IO}(l::Lexer{I}) +function readchar(l::Lexer{I}) where {I <: IO} prevpos!(l, position(l)) c = readchar(l.io) return c diff --git a/JuliaSyntax/src/token.jl b/JuliaSyntax/src/token.jl index 8795a87490f38..c1a01b9867679 100644 --- a/JuliaSyntax/src/token.jl +++ b/JuliaSyntax/src/token.jl @@ -1,7 +1,5 @@ module Tokens -using Compat -import Compat.String import Base.eof export Token @@ -27,24 +25,24 @@ _add_kws() # TODO: more @enum(TokenError, - NO_ERR, - EOF_MULTICOMMENT, - EOF_STRING, - EOF_CHAR, - EOF_CMD, - UNKNOWN, + NO_ERR, + EOF_MULTICOMMENT, + EOF_STRING, + EOF_CHAR, + EOF_CMD, + UNKNOWN, ) # Error kind => description TOKEN_ERROR_DESCRIPTION = Dict{TokenError, String}( - EOF_MULTICOMMENT => "unterminated multi-line comment #= ... =#", - EOF_STRING => "unterminated string literal", - EOF_CHAR => "unterminated character literal", - EOF_CMD => "unterminated cmd literal", - UNKNOWN => "unknown", + EOF_MULTICOMMENT => "unterminated multi-line comment #= ... =#", + EOF_STRING => "unterminated string literal", + EOF_CHAR => "unterminated character literal", + EOF_CMD => "unterminated cmd literal", + UNKNOWN => "unknown", ) -immutable Token +struct Token kind::Kind # Offsets into a string or buffer startpos::Tuple{Int, Int} # row, col where token starts /end, col is a string index @@ -85,12 +83,12 @@ end function Base.show(io::IO, t::Token) - start_r, start_c = startpos(t) - end_r, end_c = endpos(t) - str = kind(t) == ENDMARKER ? "" : escape_string(untokenize(t)) - print(io, rpad(string(start_r, ",", start_c, "-", end_r, ",", end_c), 17, " ")) - print(io, rpad(kind(t), 15, " ")) - print(io, "\"", str, "\"") + start_r, start_c = startpos(t) + end_r, end_c = endpos(t) + str = kind(t) == ENDMARKER ? "" : escape_string(untokenize(t)) + print(io, rpad(string(start_r, ",", start_c, "-", end_r, ",", end_c), 17, " ")) + print(io, rpad(kind(t), 15, " ")) + print(io, "\"", str, "\"") end Base.print(io::IO, t::Token) = print(io, untokenize(t)) diff --git a/JuliaSyntax/test/lex_yourself.jl b/JuliaSyntax/test/lex_yourself.jl index 77d262fab689f..732354a837681 100644 --- a/JuliaSyntax/test/lex_yourself.jl +++ b/JuliaSyntax/test/lex_yourself.jl @@ -28,7 +28,8 @@ function testall(srcdir::AbstractString) fname = splitdir(jlpath)[end] buf = IOBuffer() - write(buf, open(readstring, jlpath)) + print(buf, open(read, jlpath)) + seek(buf, 0) tot_files += 1 tot_time += @elapsed tokens = collect(Tokenize.tokenize(buf)) tot_tokens += length(tokens) From 480d8f03573536af205a10b26253a5059c25b9ee Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Sat, 19 Aug 2017 23:46:43 +0200 Subject: [PATCH 0112/1109] fix some number lexing (JuliaLang/JuliaSyntax.jl#104) --- JuliaSyntax/src/lexer.jl | 55 +++++++++++++++++++--------------- JuliaSyntax/src/token_kinds.jl | 6 ++-- JuliaSyntax/src/utilities.jl | 14 ++++----- JuliaSyntax/test/lexer.jl | 12 ++++++++ 4 files changed, 53 insertions(+), 34 deletions(-) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index 2541ae8cd2f48..6507e5797bb68 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -1,7 +1,6 @@ module Lexers include("utilities.jl") -global const charstore = IOBuffer() # TODO thread safety? import ..Tokens import ..Tokens: Token, Kind, TokenError, UNICODE_OPS, EMPTY_TOKEN, isliteral @@ -14,10 +13,9 @@ import ..Tokens: FUNCTION, ABSTRACT, IDENTIFIER, BAREMODULE, BEGIN, BITSTYPE, BR export tokenize -ishex(c::Char) = isdigit(c) || ('a' <= c <= 'f') || ('A' <= c <= 'F') || c == -'_' -isbinary(c::Char) = c == '0' || c == '1' || c == '_' -isoctal(c::Char) = '0' ≤ c ≤ '7' || c == '_' +ishex(c::Char) = isdigit(c) || ('a' <= c <= 'f') || ('A' <= c <= 'F') +isbinary(c::Char) = c == '0' || c == '1' +isoctal(c::Char) = '0' ≤ c ≤ '7' iswhitespace(c::Char) = Base.UTF8proc.isspace(c) mutable struct Lexer{IO_t <: IO} @@ -35,9 +33,10 @@ mutable struct Lexer{IO_t <: IO} current_pos::Int last_token::Tokens.Kind + charstore::IOBuffer end -Lexer(io) = Lexer(io, position(io), 1, 1, -1, position(io), 1, 1, position(io), Tokens.ERROR) +Lexer(io) = Lexer(io, position(io), 1, 1, -1, position(io), 1, 1, position(io), Tokens.ERROR, IOBuffer()) Lexer(str::AbstractString) = Lexer(IOBuffer(str)) """ @@ -248,7 +247,7 @@ end Returns all characters since the start of the current `Token` as a `String`. """ function extract_tokenstring(l::Lexer) - global charstore + charstore = l.charstore curr_pos = position(l) seek2startpos!(l) @@ -514,12 +513,12 @@ function lex_xor(l::Lexer) return emit(l, Tokens.XOR) end -function accept_integer(l::Lexer) - !isdigit(peekchar(l)) && return false +function accept_number{F}(l::Lexer, f::F) + !f(peekchar(l)) && return false while true - if !accept(l, isdigit) + if !accept(l, f) if accept(l, '_') - if !isdigit(peekchar(l)) + if !f(peekchar(l)) backup!(l) return true end @@ -536,7 +535,7 @@ function lex_digit(l::Lexer) longest, kind = position(l), Tokens.ERROR # accept_batch(l, isdigit) - accept_integer(l) + accept_number(l, isdigit) if accept(l, '.') if peekchar(l) == '.' # 43.. -> [43, ..] @@ -562,13 +561,13 @@ function lex_digit(l::Lexer) backup!(l) return emit(l, Tokens.INTEGER) end - # accept_batch(l, isdigit) - accept_integer(l) + accept_number(l, isdigit) if accept(l, '.') - if peekchar(l) == '.' # 1.23..3.21 is valid + if peekchar(l) == '.' # 1.23.. -> [1.23, ..] backup!(l) return emit(l, Tokens.FLOAT) - elseif !(isdigit(peekchar(l)) || iswhitespace(peekchar(l)) || is_identifier_start_char(peekchar(l))) + elseif !(isdigit(peekchar(l)) || iswhitespace(peekchar(l)) || + is_identifier_start_char(peekchar(l)) || eof(peekchar(l))) # {1.23a, 1.23␣, 1.23EOF} -> [1.23, ?] backup!(l) return emit(l, Tokens.FLOAT) else # 3213.313.3123 is an error @@ -579,14 +578,22 @@ function lex_digit(l::Lexer) end if accept(l, "eEf") # 1313.[0-9]*e accept(l, "+-") - if accept_integer(l) && position(l) > longest - longest, kind = position(l), Tokens.FLOAT + if accept_batch(l, isdigit) + if accept(l, '.' ) # 1.2e2.3 -> [ERROR, 3] + return emit_error(l) + elseif position(l) > longest + longest, kind = position(l), Tokens.FLOAT + end end end elseif accept(l, "eEf") accept(l, "+-") - if accept_integer(l) && position(l) > longest - longest, kind = position(l), Tokens.FLOAT + if accept_batch(l, isdigit) + if accept(l, '.') # 1e2.3 -> [ERROR, 3] + return emit_error(l) + elseif position(l) > longest + longest, kind = position(l), Tokens.FLOAT + end else backup!(l) return emit(l, Tokens.INTEGER) @@ -600,15 +607,15 @@ function lex_digit(l::Lexer) # 0x[0-9A-Fa-f]+ if accept(l, '0') if accept(l, 'x') - if accept_batch(l, ishex) && position(l) > longest + if accept_number(l, ishex) && position(l) > longest longest, kind = position(l), Tokens.INTEGER end elseif accept(l, 'b') - if accept_batch(l, isbinary) && position(l) > longest + if accept_number(l, isbinary) && position(l) > longest longest, kind = position(l), Tokens.INTEGER end elseif accept(l, 'o') - if accept_batch(l, isoctal) && position(l) > longest + if accept_number(l, isoctal) && position(l) > longest longest, kind = position(l), Tokens.INTEGER end end @@ -956,7 +963,7 @@ function lex_identifier(l, c) elseif c == 'a' return tryread(l, ('l','l'), IMPORTALL, c) else - return _doret(l, c) + return _doret(l, c) end else return _doret(l, c) diff --git a/JuliaSyntax/src/token_kinds.jl b/JuliaSyntax/src/token_kinds.jl index ce4ee41afd225..00369a37c3cba 100644 --- a/JuliaSyntax/src/token_kinds.jl +++ b/JuliaSyntax/src/token_kinds.jl @@ -21,7 +21,7 @@ DO, ELSE, ELSEIF, - END, + END, EXPORT, FINALLY, FOR, @@ -85,7 +85,7 @@ CHAR, # 'a' CMD, # `cmd ...` TRIPLE_CMD, # ```cmd ...``` - TRUE, FALSE, + TRUE, FALSE, end_literal, begin_delimiters, @@ -129,7 +129,7 @@ begin_conditional, CONDITIONAL, # ? end_conditional, - + # Level 3 begin_arrow, RIGHT_ARROW, # --> diff --git a/JuliaSyntax/src/utilities.jl b/JuliaSyntax/src/utilities.jl index 3fd814a4528d3..329d7e711f2b0 100644 --- a/JuliaSyntax/src/utilities.jl +++ b/JuliaSyntax/src/utilities.jl @@ -95,12 +95,12 @@ function is_identifier_char(c::Char) cat = UTF8proc.category_code(c) is_cat_id_start(c, cat) && return true if cat == UTF8proc.UTF8PROC_CATEGORY_MN || cat == UTF8proc.UTF8PROC_CATEGORY_MC || - cat == UTF8proc.UTF8PROC_CATEGORY_ND || cat == UTF8proc.UTF8PROC_CATEGORY_PC || - cat == UTF8proc.UTF8PROC_CATEGORY_SK || cat == UTF8proc.UTF8PROC_CATEGORY_ME || - cat == UTF8proc.UTF8PROC_CATEGORY_NO || - (0x2032 <= UInt32(c) <= 0x2034) || # primes - UInt32(c) == 0x0387 || UInt32(c) == 0x19da || - (0x1369 <= UInt32(c) <= 0x1371) + cat == UTF8proc.UTF8PROC_CATEGORY_ND || cat == UTF8proc.UTF8PROC_CATEGORY_PC || + cat == UTF8proc.UTF8PROC_CATEGORY_SK || cat == UTF8proc.UTF8PROC_CATEGORY_ME || + cat == UTF8proc.UTF8PROC_CATEGORY_NO || + (0x2032 <= UInt32(c) <= 0x2034) || # primes + UInt32(c) == 0x0387 || UInt32(c) == 0x19da || + (0x1369 <= UInt32(c) <= 0x1371) return true end return false @@ -149,7 +149,7 @@ peekchar(s::IOStream) = begin end eof(io::IO) = Base.eof(io) -eof(c) = c === EOF_CHAR +eof(c::Char) = c === EOF_CHAR readchar(io::IO) = eof(io) ? EOF_CHAR : read(io, Char) takechar(io::IO) = (readchar(io); io) diff --git a/JuliaSyntax/test/lexer.jl b/JuliaSyntax/test/lexer.jl index d1faaf3fd3d5d..b054ad448d2c4 100644 --- a/JuliaSyntax/test/lexer.jl +++ b/JuliaSyntax/test/lexer.jl @@ -377,6 +377,18 @@ end @test tok("0x0167_032").kind == T.INTEGER @test tok("0b0101001_0100_0101").kind == T.INTEGER @test tok("0o01054001_0100_0101").kind == T.INTEGER + @test T.kind.(collect(tokenize("1.2."))) == [T.ERROR, T.ENDMARKER] + @test tok("1__2").kind == T.INTEGER + @test tok("1.2_3").kind == T.FLOAT + @test tok("1.2_3", 2).kind == T.ENDMARKER + @test T.kind.(collect(tokenize("3e2_2"))) == [T.FLOAT, T.IDENTIFIER, T.ENDMARKER] + @test T.kind.(collect(tokenize("1__2"))) == [T.INTEGER, T.IDENTIFIER, T.ENDMARKER] + @test T.kind.(collect(tokenize("0x2_0_2"))) == [T.INTEGER, T.ENDMARKER] + @test T.kind.(collect(tokenize("0x2__2"))) == [T.INTEGER, T.IDENTIFIER, T.ENDMARKER] + @test T.kind.(collect(tokenize("3_2.5_2"))) == [T.FLOAT, T.ENDMARKER] + @test T.kind.(collect(tokenize("3.2e2.2"))) == [T.ERROR, T.INTEGER, T.ENDMARKER] + @test T.kind.(collect(tokenize("3e2.2"))) == [T.ERROR, T.INTEGER, T.ENDMARKER] + @test T.kind.(collect(tokenize("0b101__101"))) == [T.INTEGER, T.IDENTIFIER, T.ENDMARKER] end @testset "floating points" begin From 5e7b46a7de7adb9eafce00ff3c3f41f87176c096 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Sat, 26 Aug 2017 01:51:43 +0200 Subject: [PATCH 0113/1109] fix deprecation (JuliaLang/JuliaSyntax.jl#106) --- JuliaSyntax/src/lexer.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index 6507e5797bb68..2faf3887ccef9 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -513,7 +513,7 @@ function lex_xor(l::Lexer) return emit(l, Tokens.XOR) end -function accept_number{F}(l::Lexer, f::F) +function accept_number(l::Lexer, f::F) where F !f(peekchar(l)) && return false while true if !accept(l, f) From dc438b7dd29bc4d0d1e38505ae093bccf8d131ff Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Mon, 28 Aug 2017 17:14:21 -0400 Subject: [PATCH 0114/1109] Make Tokenize work for more general kinds of IOBuffers (JuliaLang/JuliaSyntax.jl#107) --- JuliaSyntax/src/utilities.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/JuliaSyntax/src/utilities.jl b/JuliaSyntax/src/utilities.jl index 329d7e711f2b0..c5ae701559263 100644 --- a/JuliaSyntax/src/utilities.jl +++ b/JuliaSyntax/src/utilities.jl @@ -117,7 +117,8 @@ function is_identifier_start_char(c::Char) end -function peekchar(io::IOBuffer) +function peekchar(io::(isdefined(Base, :GenericIOBuffer) ? + Base.GenericIOBuffer : Base.AbstractIOBuffer)) if !io.readable || io.ptr > io.size return EOF_CHAR end From d74629daf1e692966924c8dbf3c31df9893d5fa7 Mon Sep 17 00:00:00 2001 From: ZacLN Date: Wed, 30 Aug 2017 15:26:35 +0000 Subject: [PATCH 0115/1109] Make Tokenize work for more general kinds of IOBuffers (JuliaLang/JuliaSyntax.jl#107) --- JuliaSyntax/src/utilities.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/JuliaSyntax/src/utilities.jl b/JuliaSyntax/src/utilities.jl index 329d7e711f2b0..c5ae701559263 100644 --- a/JuliaSyntax/src/utilities.jl +++ b/JuliaSyntax/src/utilities.jl @@ -117,7 +117,8 @@ function is_identifier_start_char(c::Char) end -function peekchar(io::IOBuffer) +function peekchar(io::(isdefined(Base, :GenericIOBuffer) ? + Base.GenericIOBuffer : Base.AbstractIOBuffer)) if !io.readable || io.ptr > io.size return EOF_CHAR end From 34fe5135b566b9738241799528beca7a0cdeded1 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Thu, 31 Aug 2017 15:01:29 +0200 Subject: [PATCH 0116/1109] allow failure nightly --- JuliaSyntax/.travis.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/JuliaSyntax/.travis.yml b/JuliaSyntax/.travis.yml index 575c667fc3bf2..9f9c001b46efb 100644 --- a/JuliaSyntax/.travis.yml +++ b/JuliaSyntax/.travis.yml @@ -6,6 +6,9 @@ os: julia: - 0.6 - nightly +matrix: + allow_failures: + - julia: nightly notifications: email: false # uncomment the following lines to override the default test script From ebb70ba2740c6dc923f153d70a605b40b8c5edf0 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Thu, 31 Aug 2017 15:11:02 +0200 Subject: [PATCH 0117/1109] test CSTParser (JuliaLang/JuliaSyntax.jl#105) * test CSTParser * add OMR --- JuliaSyntax/.travis.yml | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/JuliaSyntax/.travis.yml b/JuliaSyntax/.travis.yml index 9f9c001b46efb..841602557235f 100644 --- a/JuliaSyntax/.travis.yml +++ b/JuliaSyntax/.travis.yml @@ -11,10 +11,11 @@ matrix: - julia: nightly notifications: email: false -# uncomment the following lines to override the default test script -#script: -# - if [[ -a .git/shallow ]]; then git fetch --unshallow; fi -# - julia -e 'Pkg.clone(pwd()); Pkg.build("Tokenize"); Pkg.test("Tokenize"; coverage=true)' +script: + - if [[ -a .git/shallow ]]; then git fetch --unshallow; fi + - julia -e 'Pkg.clone(pwd()); Pkg.build("Tokenize"); Pkg.test("Tokenize"; coverage=true)' + - julia -e 'Pkg.clone("https://github.com/ZacLN/CSTParser.jl"); Pkg.test("CSTParser")' + - julia -e 'Pkg.clone("https://github.com/KristofferC/OhMyREPL.jl"); Pkg.test("OhMyREPL")' after_success: # push coverage results to Coveralls - julia -e 'cd(Pkg.dir("Tokenize")); Pkg.add("Coverage"); using Coverage; Coveralls.submit(Coveralls.process_folder())' From 2833dbb9273283e768ece987470e28dd48f7d7ed Mon Sep 17 00:00:00 2001 From: ZacNugent Date: Thu, 31 Aug 2017 14:42:38 +0100 Subject: [PATCH 0118/1109] Add missing hex-float lexing (JuliaLang/JuliaSyntax.jl#109) * only store vals for identifiers and literals, fill charstore on the fly * remove duplicate function * add RawToken type * parameterise Lexer * change tokenize signature * Lex hexidecimal float format * add tests * fixes * add tests * fix * fix --- JuliaSyntax/src/_precompile.jl | 2 - JuliaSyntax/src/lexer.jl | 554 ++++++++++++++++++--------------- JuliaSyntax/src/token.jl | 65 +++- JuliaSyntax/src/token_kinds.jl | 75 +++++ JuliaSyntax/src/utilities.jl | 33 +- JuliaSyntax/test/lexer.jl | 25 +- JuliaSyntax/test/runtests.jl | 2 + 7 files changed, 486 insertions(+), 270 deletions(-) diff --git a/JuliaSyntax/src/_precompile.jl b/JuliaSyntax/src/_precompile.jl index 2d02e07826c26..b640a48720965 100644 --- a/JuliaSyntax/src/_precompile.jl +++ b/JuliaSyntax/src/_precompile.jl @@ -27,7 +27,6 @@ function _precompile_() precompile(Tokenize.Lexers.readchar, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) precompile(Tokenize.Lexers.lex_bar, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) precompile(Tokenize.Lexers.readrest, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.backup!, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) precompile(Tokenize.Lexers.ishex, (Char,)) precompile(Tokenize.Lexers.next_token, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) precompile(Tokenize.Tokens.untokenize, (Array{Tokenize.Tokens.Token, 1},)) @@ -49,7 +48,6 @@ function _precompile_() precompile(Tokenize.Lexers.read_string, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}}, Tokenize.Tokens.Kind,)) precompile(Tokenize.Lexers.lex_star, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) precompile(Tokenize.Lexers.accept_batch, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}}, Function,)) - precompile(Tokenize.Lexers.extract_tokenstring, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) precompile(Tokenize.Lexers.lex_amper, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},typeof( Base.UTF8proc.isdigit),)) precompile(Tokenize.Lexers.lex_whitespace, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index 2faf3887ccef9..6805a9cc0e794 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -3,7 +3,7 @@ module Lexers include("utilities.jl") import ..Tokens -import ..Tokens: Token, Kind, TokenError, UNICODE_OPS, EMPTY_TOKEN, isliteral +import ..Tokens: AbstractToken, Token, RawToken, Kind, TokenError, UNICODE_OPS, EMPTY_TOKEN, isliteral import ..Tokens: FUNCTION, ABSTRACT, IDENTIFIER, BAREMODULE, BEGIN, BITSTYPE, BREAK, CATCH, CONST, CONTINUE, DO, ELSE, ELSEIF, END, EXPORT, FALSE, FINALLY, FOR, FUNCTION, GLOBAL, LET, LOCAL, IF, IMMUTABLE, @@ -18,14 +18,12 @@ isbinary(c::Char) = c == '0' || c == '1' isoctal(c::Char) = '0' ≤ c ≤ '7' iswhitespace(c::Char) = Base.UTF8proc.isspace(c) -mutable struct Lexer{IO_t <: IO} +mutable struct Lexer{IO_t <: IO, T <: AbstractToken} io::IO_t io_startpos::Int token_start_row::Int token_start_col::Int - - prevpos::Int token_startpos::Int current_row::Int @@ -34,23 +32,29 @@ mutable struct Lexer{IO_t <: IO} last_token::Tokens.Kind charstore::IOBuffer + current_char::Char + doread::Bool end -Lexer(io) = Lexer(io, position(io), 1, 1, -1, position(io), 1, 1, position(io), Tokens.ERROR, IOBuffer()) -Lexer(str::AbstractString) = Lexer(IOBuffer(str)) +Lexer(io::IO_t, T::Type{TT} = Token) where {IO_t,TT <: AbstractToken} = Lexer{IO_t,T}(io, position(io), 1, 1, position(io), 1, 1, position(io), Tokens.ERROR, IOBuffer(), ' ', false) +Lexer(str::AbstractString, T::Type{TT} = Token) where TT <: AbstractToken = Lexer(IOBuffer(str), T) """ - tokenize(x) + tokenize(x, T = Token) Returns an `Iterable` containing the tokenized input. Can be reverted by e.g. -`join(untokenize.(tokenize(x)))`. +`join(untokenize.(tokenize(x)))`. Setting `T` chooses the type of token +produced by the lexer (`Token` or `RawToken`). """ -tokenize(x) = Lexer(x) +tokenize(x, ::Type{Token}) = Lexer(x, Token) +tokenize(x, ::Type{RawToken}) = Lexer(x, RawToken) +tokenize(x) = Lexer(x, Token) # Iterator interface -Base.iteratorsize(::Type{Lexer{IO_t}}) where {IO_t} = Base.SizeUnknown() -Base.iteratoreltype(::Type{Lexer{IO_t}}) where {IO_t} = Base.HasEltype() -Base.eltype(::Type{Lexer{IO_t}}) where {IO_t} = Token +Base.iteratorsize(::Type{Lexer{IO_t,T}}) where {IO_t,T} = Base.SizeUnknown() +Base.iteratoreltype(::Type{Lexer{IO_t,T}}) where {IO_t,T} = Base.HasEltype() +Base.eltype(::Type{Lexer{IO_t,T}}) where {IO_t,T} = T + function Base.start(l::Lexer) seekstart(l) @@ -89,20 +93,6 @@ Set a new starting position. """ startpos!(l::Lexer, i::Integer) = l.token_startpos = i -""" - prevpos(l::Lexer) - -Return the lexer's previous position. -""" -prevpos(l::Lexer) = l.prevpos - -""" - prevpos!(l::Lexer, i::Integer) - -Set the lexer's previous position. -""" -prevpos!(l::Lexer, i::Integer) = l.prevpos = i - Base.seekstart(l::Lexer) = seek(l.io, l.io_startpos) """ @@ -119,6 +109,13 @@ Returns the next character without changing the lexer's state. """ peekchar(l::Lexer) = peekchar(l.io) +""" +dpeekchar(l::Lexer) + +Returns the next two characters without changing the lexer's state. +""" +dpeekchar(l::Lexer) = dpeekchar(l.io) + """ position(l::Lexer) @@ -147,16 +144,6 @@ function start_token!(l::Lexer) l.token_start_col = l.current_col end -""" - prevchar(l::Lexer) - -Returns the previous character. Does not change the lexer's state. -""" -function prevchar(l::Lexer) - backup!(l) - return readchar(l) -end - """ readchar(l::Lexer) @@ -165,21 +152,33 @@ Returns the next character and increments the current position. function readchar end function readchar(l::Lexer{I}) where {I <: IO} - prevpos!(l, position(l)) - c = readchar(l.io) - return c + l.current_char = readchar(l.io) + if l.doread + write(l.charstore, l.current_char) + end + if l.current_char == '\n' + l.current_row += 1 + l.current_col = 1 + elseif !eof(l.current_char) + l.current_col += 1 + end + return l.current_char end -""" - backup!(l::Lexer) +readon(l::Lexer{I,RawToken}) where {I <: IO} = l.current_char +function readon(l::Lexer{I,Token}) where {I <: IO} + if l.charstore.size != 0 + take!(l.charstore) + end + write(l.charstore, l.current_char) + l.doread = true + return l.current_char +end -Decrements the current position and sets the previous position to `-1`, unless -the previous position already is `-1`. -""" -function backup!(l::Lexer) - prevpos(l) == -1 && error("prevpos(l) == -1\n Cannot backup! multiple times.") - seek(l, prevpos(l)) - prevpos!(l, -1) +readoff(l::Lexer{I,RawToken}) where {I <: IO} = l.current_char +function readoff(l::Lexer{I,Token}) where {I <: IO} + l.doread = false + return l.current_char end """ @@ -216,19 +215,33 @@ function accept_batch(l::Lexer, f) end """ - emit(l::Lexer, kind::Kind, - str::String=extract_tokenstring(l), err::TokenError=Tokens.NO_ERR) + emit(l::Lexer, kind::Kind, err::TokenError=Tokens.NO_ERR) Returns a `Token` of kind `kind` with contents `str` and starts a new `Token`. """ -function emit(l::Lexer, kind::Kind, - str::String = extract_tokenstring(l), err::TokenError = Tokens.NO_ERR) +function emit(l::Lexer{IO_t,Token}, kind::Kind, err::TokenError = Tokens.NO_ERR) where IO_t + if (kind == Tokens.IDENTIFIER || isliteral(kind) || kind == Tokens.COMMENT || kind == Tokens.WHITESPACE) + str = String(take!(l.charstore)) + elseif kind == Tokens.ERROR + str = String(l.io.data[(l.token_startpos + 1):position(l.io)]) + else + str = "" + end tok = Token(kind, (l.token_start_row, l.token_start_col), (l.current_row, l.current_col - 1), startpos(l), position(l) - 1, str, err) l.last_token = kind - start_token!(l) + readoff(l) + return tok +end + +function emit(l::Lexer{IO_t,RawToken}, kind::Kind, err::TokenError = Tokens.NO_ERR) where IO_t + tok = RawToken(kind, (l.token_start_row, l.token_start_col), + (l.current_row, l.current_col - 1), + startpos(l), position(l) - 1) + l.last_token = kind + readoff(l) return tok end @@ -238,31 +251,9 @@ end Returns an `ERROR` token with error `err` and starts a new `Token`. """ function emit_error(l::Lexer, err::TokenError = Tokens.UNKNOWN) - return emit(l, Tokens.ERROR, extract_tokenstring(l), err) + return emit(l, Tokens.ERROR, err) end -""" - extract_tokenstring(l::Lexer) - -Returns all characters since the start of the current `Token` as a `String`. -""" -function extract_tokenstring(l::Lexer) - charstore = l.charstore - curr_pos = position(l) - seek2startpos!(l) - - while position(l) < curr_pos - c = readchar(l) - l.current_col += 1 - if c == '\n' - l.current_row += 1 - l.current_col = 1 - end - write(charstore, c) - end - str = String(take!(charstore)) - return str -end """ next_token(l::Lexer) @@ -270,47 +261,92 @@ end Returns the next `Token`. """ function next_token(l::Lexer) + start_token!(l) c = readchar(l) - - if eof(c); return emit(l, Tokens.ENDMARKER) - elseif iswhitespace(c); return lex_whitespace(l) - elseif c == '['; return emit(l, Tokens.LSQUARE) - elseif c == ']'; return emit(l, Tokens.RSQUARE) - elseif c == '{'; return emit(l, Tokens.LBRACE) - elseif c == ';'; return emit(l, Tokens.SEMICOLON) - elseif c == '}'; return emit(l, Tokens.RBRACE) - elseif c == '('; return emit(l, Tokens.LPAREN) - elseif c == ')'; return emit(l, Tokens.RPAREN) - elseif c == ','; return emit(l, Tokens.COMMA) - elseif c == '*'; return lex_star(l); - elseif c == '^'; return lex_circumflex(l); - elseif c == '@'; return emit(l, Tokens.AT_SIGN) - elseif c == '?'; return emit(l, Tokens.CONDITIONAL) - elseif c == '$'; return lex_dollar(l); - elseif c == '⊻'; return lex_xor(l); - elseif c == '~'; return emit(l, Tokens.APPROX) - elseif c == '#'; return lex_comment(l) - elseif c == '='; return lex_equal(l) - elseif c == '!'; return lex_exclaim(l) - elseif c == '>'; return lex_greater(l) - elseif c == '<'; return lex_less(l) - elseif c == ':'; return lex_colon(l) - elseif c == '|'; return lex_bar(l) - elseif c == '&'; return lex_amper(l) - elseif c == '\''; return lex_prime(l) - elseif c == '÷'; return lex_division(l) - elseif c == '"'; return lex_quote(l); - elseif c == '%'; return lex_percent(l); - elseif c == '/'; return lex_forwardslash(l); - elseif c == '\\'; return lex_backslash(l); - elseif c == '.'; return lex_dot(l); - elseif c == '+'; return lex_plus(l); - elseif c == '-'; return lex_minus(l); - elseif c == '`'; return lex_cmd(l); - elseif is_identifier_start_char(c); return lex_identifier(l, c) - elseif isdigit(c); return lex_digit(l) - elseif (k = get(UNICODE_OPS, c, Tokens.ERROR)) != Tokens.ERROR; return emit(l, k) - else emit_error(l) + if eof(c); + return emit(l, Tokens.ENDMARKER) + elseif iswhitespace(c) + readon(l) + return lex_whitespace(l) + elseif c == '[' + return emit(l, Tokens.LSQUARE) + elseif c == ']' + return emit(l, Tokens.RSQUARE) + elseif c == '{' + return emit(l, Tokens.LBRACE) + elseif c == ';' + return emit(l, Tokens.SEMICOLON) + elseif c == '}' + return emit(l, Tokens.RBRACE) + elseif c == '(' + return emit(l, Tokens.LPAREN) + elseif c == ')' + return emit(l, Tokens.RPAREN) + elseif c == ',' + return emit(l, Tokens.COMMA) + elseif c == '*' + return lex_star(l); + elseif c == '^' + return lex_circumflex(l); + elseif c == '@' + return emit(l, Tokens.AT_SIGN) + elseif c == '?' + return emit(l, Tokens.CONDITIONAL) + elseif c == '$' + return lex_dollar(l); + elseif c == '⊻' + return lex_xor(l); + elseif c == '~' + return emit(l, Tokens.APPROX) + elseif c == '#' + readon(l) + return lex_comment(l) + elseif c == '=' + return lex_equal(l) + elseif c == '!' + return lex_exclaim(l) + elseif c == '>' + return lex_greater(l) + elseif c == '<' + return lex_less(l) + elseif c == ':' + return lex_colon(l) + elseif c == '|' + return lex_bar(l) + elseif c == '&' + return lex_amper(l) + elseif c == '\'' + return lex_prime(l) + elseif c == '÷' + return lex_division(l) + elseif c == '"' + readon(l) + return lex_quote(l); + elseif c == '%' + return lex_percent(l); + elseif c == '/' + return lex_forwardslash(l); + elseif c == '\\' + return lex_backslash(l); + elseif c == '.' + return lex_dot(l); + elseif c == '+' + return lex_plus(l); + elseif c == '-' + return lex_minus(l); + elseif c == '`' + readon(l) + return lex_cmd(l); + elseif is_identifier_start_char(c) + readon(l) + return lex_identifier(l, c) + elseif isdigit(c) + readon(l) + return lex_digit(l, Tokens.INTEGER) + elseif (k = get(UNICODE_OPS, c, Tokens.ERROR)) != Tokens.ERROR + return emit(l, k) + else + emit_error(l) end end @@ -324,11 +360,11 @@ end function lex_comment(l::Lexer, doemit=true) if peekchar(l) != '=' while true - c = readchar(l) - if c == '\n' || eof(c) - backup!(l) + pc = peekchar(l) + if pc == '\n' || eof(pc) return doemit ? emit(l, Tokens.COMMENT) : EMPTY_TOKEN end + readchar(l) end else c = readchar(l) # consume the '=' @@ -513,116 +549,100 @@ function lex_xor(l::Lexer) return emit(l, Tokens.XOR) end -function accept_number(l::Lexer, f::F) where F - !f(peekchar(l)) && return false +function accept_number(l::Lexer, f::F) where {F} while true - if !accept(l, f) - if accept(l, '_') - if !f(peekchar(l)) - backup!(l) - return true - end - else - return true - end + pc, ppc = dpeekchar(l) + if pc == '_' && !f(ppc) + return + elseif f(pc) || pc == '_' + readchar(l) + else + return end end end # A digit has been consumed -function lex_digit(l::Lexer) - backup!(l) - longest, kind = position(l), Tokens.ERROR - - # accept_batch(l, isdigit) +function lex_digit(l::Lexer, kind) accept_number(l, isdigit) - - if accept(l, '.') - if peekchar(l) == '.' # 43.. -> [43, ..] - backup!(l) - return emit(l, Tokens.INTEGER) - elseif !(isdigit(peekchar(l)) || - iswhitespace(peekchar(l)) || - is_identifier_start_char(peekchar(l)) - || peekchar(l) == '(' - || peekchar(l) == ')' - || peekchar(l) == '[' - || peekchar(l) == ']' - || peekchar(l) == '{' - || peekchar(l) == '}' - || peekchar(l) == ',' - || peekchar(l) == ';' - || peekchar(l) == '@' - || peekchar(l) == '`' - || peekchar(l) == '"' - || peekchar(l) == ':' - || peekchar(l) == '?' - || eof(l)) - backup!(l) - return emit(l, Tokens.INTEGER) + pc,ppc = dpeekchar(l) + if pc == '.' + if ppc == '.' + return emit(l, kind) + elseif (!(isdigit(ppc) || + iswhitespace(ppc) || + is_identifier_start_char(ppc) + || ppc == '(' + || ppc == ')' + || ppc == '[' + || ppc == ']' + || ppc == '{' + || ppc == '}' + || ppc == ',' + || ppc == ';' + || ppc == '@' + || ppc == '`' + || ppc == '"' + || ppc == ':' + || ppc == '?' + || eof(ppc))) + kind = Tokens.INTEGER + + return emit(l, kind) end + readchar(l) + + kind = Tokens.FLOAT accept_number(l, isdigit) - if accept(l, '.') - if peekchar(l) == '.' # 1.23.. -> [1.23, ..] - backup!(l) - return emit(l, Tokens.FLOAT) - elseif !(isdigit(peekchar(l)) || iswhitespace(peekchar(l)) || - is_identifier_start_char(peekchar(l)) || eof(peekchar(l))) # {1.23a, 1.23␣, 1.23EOF} -> [1.23, ?] - backup!(l) - return emit(l, Tokens.FLOAT) - else # 3213.313.3123 is an error - return emit_error(l) - end - elseif position(l) > longest # 323213.3232 candidate - longest, kind = position(l), Tokens.FLOAT - end - if accept(l, "eEf") # 1313.[0-9]*e + pc, ppc = dpeekchar(l) + if (pc == 'e' || pc == 'E' || pc == 'f') && (isdigit(ppc) || ppc == '+' || ppc == '-') + kind = Tokens.FLOAT + readchar(l) accept(l, "+-") if accept_batch(l, isdigit) - if accept(l, '.' ) # 1.2e2.3 -> [ERROR, 3] + if accept(l, '.') # 1.2e2.3 -> [ERROR, 3] return emit_error(l) - elseif position(l) > longest - longest, kind = position(l), Tokens.FLOAT end + else + return emit_error(l) end + elseif pc == '.' && (is_identifier_start_char(ppc) || eof(ppc)) + readchar(l) + return emit_error(l) end - elseif accept(l, "eEf") + + elseif (pc == 'e' || pc == 'E' || pc == 'f') && (isdigit(ppc) || ppc == '+' || ppc == '-') + kind = Tokens.FLOAT + readchar(l) accept(l, "+-") if accept_batch(l, isdigit) - if accept(l, '.') # 1e2.3 -> [ERROR, 3] + if accept(l, '.') # 1.2e2.3 -> [ERROR, 3] return emit_error(l) - elseif position(l) > longest - longest, kind = position(l), Tokens.FLOAT end else - backup!(l) - return emit(l, Tokens.INTEGER) + return emit_error(l) end - elseif position(l) > longest - longest, kind = position(l), Tokens.INTEGER - end - - seek2startpos!(l) - - # 0x[0-9A-Fa-f]+ - if accept(l, '0') - if accept(l, 'x') - if accept_number(l, ishex) && position(l) > longest - longest, kind = position(l), Tokens.INTEGER - end - elseif accept(l, 'b') - if accept_number(l, isbinary) && position(l) > longest - longest, kind = position(l), Tokens.INTEGER + elseif position(l) - startpos(l) == 1 && l.current_char == '0' + kind == Tokens.INTEGER + if pc == 'x' + readchar(l) + accept_number(l, ishex) + if accept(l, '.') + accept_number(l, ishex) end - elseif accept(l, 'o') - if accept_number(l, isoctal) && position(l) > longest - longest, kind = position(l), Tokens.INTEGER + if accept(l, "pP") + kind = Tokens.FLOAT + accept(l, "+-") + accept_number(l, isdigit) end + elseif pc == 'b' + readchar(l) + accept_number(l, isbinary) + elseif pc == 'o' + readchar(l) + accept_number(l, isoctal) end end - - seek(l, longest) - return emit(l, kind) end @@ -635,6 +655,7 @@ function lex_prime(l) l.last_token == Tokens.PRIME || isliteral(l.last_token) return emit(l, Tokens.PRIME) else + readon(l) if accept(l, '\'') if accept(l, '\'') return emit(l, Tokens.CHAR) @@ -787,7 +808,8 @@ function lex_dot(l::Lexer) return emit(l, Tokens.DDOT) end elseif Base.isdigit(peekchar(l)) - return lex_digit(l) + readon(l) + return lex_digit(l, Tokens.FLOAT) else return emit(l, Tokens.DOT) end @@ -813,13 +835,14 @@ end function tryread(l, str, k, c) for s in str - c = readchar(l) + c = peekchar(l) if c != s if !is_identifier_char(c) - backup!(l) return emit(l, IDENTIFIER) end return readrest(l, c) + else + readchar(l) end end if is_identifier_char(peekchar(l)) @@ -829,11 +852,9 @@ function tryread(l, str, k, c) end function readrest(l, c) - while is_identifier_char(c) - if c == '!' && peekchar(l) == '=' - backup!(l) - break - elseif !is_identifier_char(peekchar(l)) + while true + pc, ppc = dpeekchar(l) + if !is_identifier_char(pc) || (pc == '!' && ppc == '=') break end c = readchar(l) @@ -845,7 +866,6 @@ end function _doret(l, c) if !is_identifier_char(c) - backup!(l) return emit(l, IDENTIFIER) else return readrest(l, c) @@ -856,29 +876,40 @@ function lex_identifier(l, c) if c == 'a' return tryread(l, ('b', 's', 't', 'r', 'a', 'c', 't'), ABSTRACT, c) elseif c == 'b' - c = readchar(l) + c = peekchar(l) if c == 'a' + c = readchar(l) return tryread(l, ('r', 'e', 'm', 'o', 'd', 'u', 'l', 'e'), BAREMODULE, c) elseif c == 'e' + c = readchar(l) return tryread(l, ('g', 'i', 'n'), BEGIN, c) elseif c == 'i' + c = readchar(l) return tryread(l, ('t', 's', 't', 'y', 'p', 'e'), BITSTYPE, c) elseif c == 'r' + c = readchar(l) return tryread(l, ('e', 'a', 'k'), BREAK, c) else return _doret(l, c) end elseif c == 'c' - c = readchar(l) + c = peekchar(l) if c == 'a' + c = readchar(l) return tryread(l, ('t', 'c', 'h'), CATCH, c) elseif c == 'o' - c = readchar(l) + readchar(l) + c = peekchar(l) if c == 'n' - c = readchar(l) + readchar(l) + c = peekchar(l) if c == 's' + readchar(l) + c = peekchar(l) return tryread(l, ('t',), CONST, c) elseif c == 't' + readchar(l) + c = peekchar(l) return tryread(l, ('i', 'n', 'u', 'e'), CONTINUE, c) else return _doret(l, c) @@ -892,17 +923,20 @@ function lex_identifier(l, c) elseif c == 'd' return tryread(l, ('o'), DO, c) elseif c == 'e' - c = readchar(l) + c = peekchar(l) if c == 'l' - c = readchar(l) + readchar(l) + c = peekchar(l) if c == 's' - c = readchar(l) + readchar(l) + c = peekchar(l) if c == 'e' - c = readchar(l) + readchar(l) + c = peekchar(l) if !is_identifier_char(c) - backup!(l) return emit(l, ELSE) elseif c == 'i' + c = readchar(l) return tryread(l, ('f'), ELSEIF ,c) else return _doret(l, c) @@ -914,21 +948,27 @@ function lex_identifier(l, c) return _doret(l, c) end elseif c == 'n' + c = readchar(l) return tryread(l, ('d'), END, c) elseif c == 'x' + c = readchar(l) return tryread(l, ('p', 'o', 'r', 't'), EXPORT, c) else return _doret(l, c) end elseif c == 'f' - c = readchar(l) + c = peekchar(l) if c == 'a' + c = readchar(l) return tryread(l, ('l', 's', 'e'), FALSE, c) elseif c == 'i' + c = readchar(l) return tryread(l, ('n', 'a', 'l', 'l', 'y'), FINALLY, c) elseif c == 'o' + c = readchar(l) return tryread(l, ('r'), FOR, c) elseif c == 'u' + c = readchar(l) return tryread(l, ('n', 'c', 't', 'i', 'o', 'n'), FUNCTION, c) else return _doret(l, c) @@ -936,31 +976,37 @@ function lex_identifier(l, c) elseif c == 'g' return tryread(l, ('l', 'o', 'b', 'a', 'l'), GLOBAL, c) elseif c == 'i' - c = readchar(l) + c = peekchar(l) if c == 'f' - c = readchar(l) + readchar(l) + c = peekchar(l) if !is_identifier_char(c) - backup!(l) return emit(l, IF) else return readrest(l, c) end elseif c == 'm' - c = readchar(l) + readchar(l) + c = peekchar(l) if c == 'm' + readchar(l) return tryread(l, ('u', 't', 'a', 'b', 'l', 'e'), IMMUTABLE, c) elseif c == 'p' - c = readchar(l) + readchar(l) + c = peekchar(l) if c == 'o' - c = readchar(l) + readchar(l) + c = peekchar(l) if c == 'r' - c = readchar(l) + readchar(l) + c = peekchar(l) if c == 't' - c = readchar(l) + readchar(l) + c = peekchar(l) if !is_identifier_char(c) - backup!(l) return emit(l, IMPORT) elseif c == 'a' + c = readchar(l) return tryread(l, ('l','l'), IMPORTALL, c) else return _doret(l, c) @@ -978,34 +1024,40 @@ function lex_identifier(l, c) return _doret(l, c) end elseif c == 'n' - c = readchar(l) + readchar(l) + c = peekchar(l) if !is_identifier_char(c) - backup!(l) return emit(l, IN) else return readrest(l, c) end elseif (@static VERSION >= v"0.6.0-dev.1471" ? true : false) && c == 's' + c = readchar(l) return tryread(l, ('a'), ISA, c) else return _doret(l, c) end elseif c == 'l' - c = readchar(l) + c = peekchar(l) if c == 'e' + readchar(l) return tryread(l, ('t'), LET, c) elseif c == 'o' + readchar(l) return tryread(l, ('c', 'a', 'l'), LOCAL, c) else return _doret(l, c) end elseif c == 'm' - c = readchar(l) + c = peekchar(l) if c == 'a' + c = readchar(l) return tryread(l, ('c', 'r', 'o'), MACRO, c) elseif c == 'o' + c = readchar(l) return tryread(l, ('d', 'u', 'l', 'e'), MODULE, c) elseif c == 'u' + c = readchar(l) return tryread(l, ('t', 'a', 'b', 'l', 'e'), MUTABLE, c) else return _doret(l, c) @@ -1019,34 +1071,41 @@ function lex_identifier(l, c) elseif c == 's' return tryread(l, ('t', 'r', 'u', 'c', 't'), STRUCT, c) elseif c == 't' - c = readchar(l) + c = peekchar(l) if c == 'r' - c = readchar(l) + readchar(l) + c = peekchar(l) if c == 'u' + c = readchar(l) return tryread(l, ('e'), TRUE, c) elseif c == 'y' - c = readchar(l) + readchar(l) + c = peekchar(l) if !is_identifier_char(c) - backup!(l) return emit(l, TRY) else + c = readchar(l) return _doret(l, c) end else return _doret(l, c) end elseif c == 'y' - c = readchar(l) + readchar(l) + c = peekchar(l) if c == 'p' - c = readchar(l) + readchar(l) + c = peekchar(l) if c == 'e' - c = readchar(l) + readchar(l) + c = peekchar(l) if !is_identifier_char(c) - backup!(l) return emit(l, TYPE) elseif c == 'a' + c = readchar(l) return tryread(l, ('l', 'i', 'a', 's'), TYPEALIAS, c) else + c = readchar(l) return _doret(l, c) end else @@ -1061,12 +1120,15 @@ function lex_identifier(l, c) elseif c == 'u' return tryread(l, ('s', 'i', 'n', 'g'), USING, c) elseif c == 'w' - c = readchar(l) + c = peekchar(l) if c == 'h' - c = readchar(l) + readchar(l) + c = peekchar(l) if c == 'e' + c = readchar(l) return tryread(l, ('r', 'e'), WHERE, c) elseif c == 'i' + c = readchar(l) return tryread(l, ('l', 'e'), WHILE, c) else return _doret(l, c) diff --git a/JuliaSyntax/src/token.jl b/JuliaSyntax/src/token.jl index c1a01b9867679..c61ca4d03fb05 100644 --- a/JuliaSyntax/src/token.jl +++ b/JuliaSyntax/src/token.jl @@ -42,7 +42,9 @@ TOKEN_ERROR_DESCRIPTION = Dict{TokenError, String}( UNKNOWN => "unknown", ) -struct Token +abstract type AbstractToken end + +struct Token <: AbstractToken kind::Kind # Offsets into a string or buffer startpos::Tuple{Int, Int} # row, col where token starts /end, col is a string index @@ -53,6 +55,15 @@ struct Token token_error::TokenError end +struct RawToken <: AbstractToken + kind::Kind + # Offsets into a string or buffer + startpos::Tuple{Int, Int} # row, col where token starts /end, col is a string index + endpos::Tuple{Int, Int} + startbyte::Int # The byte where the token start in the buffer + endbyte::Int # The byte where the token ended in the buffer +end + function Token(kind::Kind, startposition::Tuple{Int, Int}, endposition::Tuple{Int, Int}, startbyte::Int, endbyte::Int, val::String) Token(kind, startposition, endposition, startbyte, endbyte, val, NO_ERR) @@ -61,17 +72,50 @@ Token() = Token(ERROR, (0,0), (0,0), 0, 0, "", UNKNOWN) const EMPTY_TOKEN = Token() -function kind(t::Token) +function kind(t::AbstractToken) isoperator(t.kind) && return OP iskeyword(t.kind) && return KEYWORD return t.kind end -exactkind(t::Token) = t.kind -startpos(t::Token) = t.startpos -endpos(t::Token) = t.endpos -untokenize(t::Token) = t.val +exactkind(t::AbstractToken) = t.kind +startpos(t::AbstractToken) = t.startpos +endpos(t::AbstractToken) = t.endpos +function untokenize(t::Token) + if t.kind == IDENTIFIER || isliteral(t.kind) || t.kind == COMMENT || t.kind == WHITESPACE || t.kind == ERROR + return t.val + elseif iskeyword(t.kind) + return lowercase(string(t.kind)) + elseif isoperator(t.kind) + return string(UNICODE_OPS_REVERSE[t.kind]) + elseif t.kind == LPAREN + return "(" + elseif t.kind == LSQUARE + return "[" + elseif t.kind == LBRACE + return "{" + elseif t.kind == RPAREN + return ")" + elseif t.kind == RSQUARE + return "]" + elseif t.kind == RBRACE + return "}" + elseif t.kind == AT_SIGN + return "@" + elseif t.kind == COMMA + return "," + elseif t.kind == SEMICOLON + return ";" + else + return "" + end +end + +function untokenize(t::RawToken, str::String) + String(str[1 + (t.startbyte:t.endbyte)]) +end + function untokenize(ts) - if eltype(ts) != Token + if !(eltype(ts) <: AbstractToken) throw(ArgumentError("element type of iterator has to be Token")) end io = IOBuffer() @@ -93,4 +137,11 @@ end Base.print(io::IO, t::Token) = print(io, untokenize(t)) +function Base.show(io::IO, t::RawToken) + start_r, start_c = startpos(t) + end_r, end_c = endpos(t) + print(io, rpad(string(start_r, ",", start_c, "-", end_r, ",", end_c), 17, " ")) + print(io, rpad(kind(t), 15, " ")) +end + end # module diff --git a/JuliaSyntax/src/token_kinds.jl b/JuliaSyntax/src/token_kinds.jl index 00369a37c3cba..d4c0cf7107b9c 100644 --- a/JuliaSyntax/src/token_kinds.jl +++ b/JuliaSyntax/src/token_kinds.jl @@ -1331,3 +1331,78 @@ const UNICODE_OPS = Dict{Char, Kind}( '↑' => HALFWIDTH_UPWARDS_ARROW, '↓' => HALFWIDTH_DOWNWARDS_ARROW, '⋅' => UNICODE_DOT) + + +const UNICODE_OPS_REVERSE = Dict{Kind,Symbol}() +for (k, v) in UNICODE_OPS + UNICODE_OPS_REVERSE[v] = Symbol(k) +end + +UNICODE_OPS_REVERSE[EQ] = :(=) +UNICODE_OPS_REVERSE[PLUS_EQ] = :(+=) +UNICODE_OPS_REVERSE[MINUS_EQ] = :(-=) +UNICODE_OPS_REVERSE[STAR_EQ] = :(*=) +UNICODE_OPS_REVERSE[FWD_SLASH_EQ] = :(/=) +UNICODE_OPS_REVERSE[FWDFWD_SLASH_EQ] = :(//=) +UNICODE_OPS_REVERSE[OR_EQ] = :(|=) +UNICODE_OPS_REVERSE[CIRCUMFLEX_EQ] = :(^=) +UNICODE_OPS_REVERSE[DIVISION_EQ] = :(÷=) +UNICODE_OPS_REVERSE[REM_EQ] = :(%=) +UNICODE_OPS_REVERSE[LBITSHIFT_EQ] = :(<<=) +UNICODE_OPS_REVERSE[RBITSHIFT_EQ] = :(>>=) +UNICODE_OPS_REVERSE[LBITSHIFT] = :(<<) +UNICODE_OPS_REVERSE[RBITSHIFT] = :(>>) +UNICODE_OPS_REVERSE[UNSIGNED_BITSHIFT] = :(>>>) +UNICODE_OPS_REVERSE[UNSIGNED_BITSHIFT_EQ] = :(>>>=) +UNICODE_OPS_REVERSE[BACKSLASH_EQ] = :(\=) +UNICODE_OPS_REVERSE[AND_EQ] = :(&=) +UNICODE_OPS_REVERSE[COLON_EQ] = :(:=) +UNICODE_OPS_REVERSE[PAIR_ARROW] = :(=>) +UNICODE_OPS_REVERSE[APPROX] = :(~) +UNICODE_OPS_REVERSE[EX_OR_EQ] = :($=) +UNICODE_OPS_REVERSE[XOR_EQ] = :(⊻=) +UNICODE_OPS_REVERSE[RIGHT_ARROW] = :(-->) +UNICODE_OPS_REVERSE[LAZY_OR] = :(||) +UNICODE_OPS_REVERSE[LAZY_AND] = :(&&) +UNICODE_OPS_REVERSE[ISSUBTYPE] = :(<:) +UNICODE_OPS_REVERSE[ISSUPERTYPE] = :(>:) +UNICODE_OPS_REVERSE[GREATER] = :(>) +UNICODE_OPS_REVERSE[LESS] = :(<) +UNICODE_OPS_REVERSE[GREATER_EQ] = :(>=) +UNICODE_OPS_REVERSE[GREATER_THAN_OR_EQUAL_TO] = :(≥) +UNICODE_OPS_REVERSE[LESS_EQ] = :(<=) +UNICODE_OPS_REVERSE[LESS_THAN_OR_EQUAL_TO] = :(≤) +UNICODE_OPS_REVERSE[EQEQ] = :(==) +UNICODE_OPS_REVERSE[EQEQEQ] = :(===) +UNICODE_OPS_REVERSE[IDENTICAL_TO] = :(≡) +UNICODE_OPS_REVERSE[NOT_EQ] = :(!=) +UNICODE_OPS_REVERSE[NOT_EQUAL_TO] = :(≠) +UNICODE_OPS_REVERSE[NOT_IS] = :(!==) +UNICODE_OPS_REVERSE[NOT_IDENTICAL_TO] = :(≢) +UNICODE_OPS_REVERSE[IN] = :(in) +UNICODE_OPS_REVERSE[ISA] = :(isa) +UNICODE_OPS_REVERSE[LPIPE] = :(<|) +UNICODE_OPS_REVERSE[RPIPE] = :(|>) +UNICODE_OPS_REVERSE[COLON] = :(:) +UNICODE_OPS_REVERSE[DDOT] = :(..) +UNICODE_OPS_REVERSE[EX_OR] = :($) +UNICODE_OPS_REVERSE[PLUS] = :(+) +UNICODE_OPS_REVERSE[MINUS] = :(-) +UNICODE_OPS_REVERSE[PLUSPLUS] = :(++) +UNICODE_OPS_REVERSE[OR] = :(|) +UNICODE_OPS_REVERSE[STAR] = :(*) +UNICODE_OPS_REVERSE[FWD_SLASH] = :(/) +UNICODE_OPS_REVERSE[REM] = :(%) +UNICODE_OPS_REVERSE[BACKSLASH] = :(\) +UNICODE_OPS_REVERSE[AND] = :(&) +UNICODE_OPS_REVERSE[FWDFWD_SLASH] = :(//) +UNICODE_OPS_REVERSE[CIRCUMFLEX_ACCENT] = :(^) +UNICODE_OPS_REVERSE[DECLARATION] = :(::) +UNICODE_OPS_REVERSE[CONDITIONAL] = :? +UNICODE_OPS_REVERSE[DOT] = :(.) +UNICODE_OPS_REVERSE[NOT] = :(!) +UNICODE_OPS_REVERSE[PRIME] = Symbol(''') +UNICODE_OPS_REVERSE[DDDOT] = :(...) +UNICODE_OPS_REVERSE[TRANSPOSE] = Symbol(".'") +UNICODE_OPS_REVERSE[ANON_FUNC] = :(->) +UNICODE_OPS_REVERSE[WHERE] = :where diff --git a/JuliaSyntax/src/utilities.jl b/JuliaSyntax/src/utilities.jl index c5ae701559263..c88617e193803 100644 --- a/JuliaSyntax/src/utilities.jl +++ b/JuliaSyntax/src/utilities.jl @@ -122,21 +122,40 @@ function peekchar(io::(isdefined(Base, :GenericIOBuffer) ? if !io.readable || io.ptr > io.size return EOF_CHAR end - ch = convert(UInt8,io.data[io.ptr]) + ch, _ = readutf(io) + return ch +end + +function readutf(io, offset = 0) + ch = convert(UInt8, io.data[io.ptr + offset]) if ch < 0x80 - return convert(Char,ch) + return convert(Char, ch), 0 end - # mimic utf8.next function - trailing = Base.utf8_trailing[ch+1] + trailing = Base.utf8_trailing[ch + 1] c::UInt32 = 0 for j = 1:trailing c += ch c <<= 6 - ch = convert(UInt8,io.data[io.ptr+j]) + ch = convert(UInt8, io.data[io.ptr + j + offset]) end c += ch - c -= Base.utf8_offset[trailing+1] - return convert(Char,c) + c -= Base.utf8_offset[trailing + 1] + return convert(Char, c), trailing +end + +function dpeekchar(io::IOBuffer) + if !io.readable || io.ptr > io.size + return EOF_CHAR, EOF_CHAR + end + ch1, trailing = readutf(io) + offset = trailing + 1 + + if io.ptr + offset > io.size + return ch1, EOF_CHAR + end + ch2, _ = readutf(io, offset) + + return ch1, ch2 end # this implementation is copied from Base diff --git a/JuliaSyntax/test/lexer.jl b/JuliaSyntax/test/lexer.jl index b054ad448d2c4..518b8f652efbf 100644 --- a/JuliaSyntax/test/lexer.jl +++ b/JuliaSyntax/test/lexer.jl @@ -10,17 +10,13 @@ tok(str, i = 1) = collect(tokenize(str))[i] for s in ["a", IOBuffer("a")] l = tokenize(s) @test Lexers.readchar(l) == 'a' - @test Lexers.prevpos(l) == 0 @test l.current_pos == 0 l_old = l - @test Lexers.prevchar(l) == 'a' @test l == l_old @test Lexers.eof(l) @test Lexers.readchar(l) == Lexers.EOF_CHAR - Lexers.backup!(l) - @test Lexers.prevpos(l) == -1 @test l.current_pos == 0 end end # testset @@ -128,6 +124,9 @@ end # testset for (i, n) in enumerate(tokenize(str)) @test Tokens.kind(n) == kinds[i] end + for (i, n) in enumerate(tokenize(str, Tokens.RawToken)) + @test Tokens.kind(n) == kinds[i] + end @testset "roundtrippability" begin @test join(untokenize.(collect(tokenize(str)))) == str @@ -136,7 +135,7 @@ end # testset @test_throws ArgumentError untokenize("blabla") end - @test all((t.endbyte - t.startbyte + 1)==sizeof(t.val) for t in tokenize(str)) + @test all((t.endbyte - t.startbyte + 1)==sizeof(untokenize(t)) for t in tokenize(str)) end # testset @testset "issue 5, '..'" begin @@ -144,7 +143,7 @@ end # testset end @testset "issue 17, >>" begin - @test tok(">> ").val==">>" + @test untokenize(tok(">> "))==">>" end @@ -221,7 +220,7 @@ end ImageMagick.save(fn, reinterpret(ARGB32, [0xf0884422]'')) D = ImageMagick.load(fn) """)) - @test tokens[16].val==tokens[17].val=="'" + @test string(untokenize(tokens[16]))==string(untokenize(tokens[17]))=="'" @test tok("'a'").val == "'a'" @test tok("'a'").kind == Tokens.CHAR @test tok("''").val == "''" @@ -274,7 +273,7 @@ end "typealias", "using", "while"] - + @test T.kind(tok(kw)) == T.KEYWORD end end @@ -324,6 +323,8 @@ end @testset "inferred" begin l = tokenize("abc") @test Base.Test.@inferred Tokenize.Lexers.next_token(l).kind == T.IDENTIFIER + l = tokenize("abc", Tokens.RawToken) + @test Base.Test.@inferred typeof(Tokenize.Lexers.next_token(l)) == Tokens.RawToken end @testset "modifying function names (!) followed by operator" begin @@ -407,6 +408,14 @@ end @test tok("2048f0").kind == Tokens.FLOAT @test tok("1.:0").kind == Tokens.FLOAT @test tok("1.?").kind == Tokens.FLOAT + @test tok("0x00p2").kind == Tokens.FLOAT + @test tok("0x00P2").kind == Tokens.FLOAT + @test tok("0x0.00p23").kind == Tokens.FLOAT + @test tok("0x0.0ap23").kind == Tokens.FLOAT + @test tok("0x0.0_0p2").kind == Tokens.FLOAT + @test tok("0x0_0_0.0_0p2").kind == Tokens.FLOAT + @test tok("0x0p+2").kind == Tokens.FLOAT + @test tok("0x0p-2").kind == Tokens.FLOAT end @testset "1e1" begin diff --git a/JuliaSyntax/test/runtests.jl b/JuliaSyntax/test/runtests.jl index 55482fe0168cb..032d254435e22 100644 --- a/JuliaSyntax/test/runtests.jl +++ b/JuliaSyntax/test/runtests.jl @@ -3,4 +3,6 @@ using Base.Test import Tokenize include("lex_yourself.jl") +@testset "lexer" begin include("lexer.jl") +end From 4e658cbc77b3cf4520affce17b1262ca572df360 Mon Sep 17 00:00:00 2001 From: ZacNugent Date: Fri, 1 Sep 2017 10:55:03 +0100 Subject: [PATCH 0119/1109] update precompile.jl (JuliaLang/JuliaSyntax.jl#112) --- JuliaSyntax/src/_precompile.jl | 153 ++++++++++++++++++++------------- 1 file changed, 94 insertions(+), 59 deletions(-) diff --git a/JuliaSyntax/src/_precompile.jl b/JuliaSyntax/src/_precompile.jl index b640a48720965..458c26a434bd7 100644 --- a/JuliaSyntax/src/_precompile.jl +++ b/JuliaSyntax/src/_precompile.jl @@ -2,67 +2,102 @@ isdefined(Base, :GenericIOBuffer) ? (import Base.GenericIOBuffer) : (GenericIOBu function _precompile_() ccall(:jl_generating_output, Cint, ()) == 1 || return nothing - precompile(Tokenize.Lexers.is_cat_id_start, (Char, Int32,)) - precompile(Tokenize.Lexers.peekchar, (GenericIOBuffer{Array{UInt8, 1}},)) - precompile(Tokenize.Lexers.is_identifier_char, (Char,)) - precompile(Tokenize.Lexers.tryread, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}}, Tuple{Char, Char}, Tokenize.Tokens.Kind,)) - precompile(Tokenize.Lexers.lex_greater, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.lex_prime, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.tryread, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}}, Tuple{Char, Char, Char, Char, Char, Char}, Tokenize.Tokens.Kind,)) - precompile(Tokenize.Lexers.tryread, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}}, Tuple{Char, Char, Char, Char, Char}, Tokenize.Tokens.Kind,)) - precompile(Tokenize.Lexers.lex_digit, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.lex_identifier, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}}, Char,)) - precompile(Tokenize.Lexers.lex_less, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.tryread, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}}, Tuple{Char, Char, Char}, Tokenize.Tokens.Kind,)) - precompile(Tokenize.Lexers.tryread, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}}, Tuple{Char, Char, Char, Char, Char, Char, Char, Char}, Tokenize.Tokens.Kind,)) - precompile(Tokenize.Lexers.lex_forwardslash, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.lex_minus, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.tryread, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}}, Char, Tokenize.Tokens.Kind,)) - precompile(Tokenize.Lexers.tryread, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}}, Tuple{Char, Char, Char, Char}, Tokenize.Tokens.Kind,)) - precompile(Tokenize.Lexers.start, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.lex_xor, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.Type, (Type{Tokenize.Lexers.Lexer}, String,)) - precompile(Tokenize.Lexers.tryread, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}}, Tuple{Char}, Tokenize.Tokens.Kind,)) - precompile(Tokenize.Lexers.lex_equal, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.readchar, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.lex_bar, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.readrest, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.ishex, (Char,)) - precompile(Tokenize.Lexers.next_token, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Tokens.untokenize, (Array{Tokenize.Tokens.Token, 1},)) - precompile(Tokenize.Lexers.lex_quote, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Tokens.iskeyword, (Tokenize.Tokens.Kind,)) + precompile(Tokenize.Tokens.isliteral, (Tokenize.Tokens.Kind,)) + precompile(Tokenize.Tokens.isoperator, (Tokenize.Tokens.Kind,)) + precompile(Tokenize.Tokens.Token, (Tokenize.Tokens.Kind,Tuple{Int,Int},Tuple{Int,Int},Int,Int,String)) + precompile(Tokenize.Tokens.Token, ()) precompile(Tokenize.Tokens.kind, (Tokenize.Tokens.Token,)) - precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}}, String,)) - precompile(Tokenize.Lexers.lex_plus, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.tryread, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}}, Tuple{Char, Char, Char, Char, Char, Char, Char}, Tokenize.Tokens.Kind,)) - precompile(Tokenize.Lexers.lex_dot, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.lex_exclaim, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.lex_colon, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Tokens.untokenize, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.lex_percent, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.lex_comment, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.lex_cmd, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.lex_division, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.lex_circumflex, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.lex_backslash, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.read_string, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}}, Tokenize.Tokens.Kind,)) - precompile(Tokenize.Lexers.lex_star, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.accept_batch, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}}, Function,)) - precompile(Tokenize.Lexers.lex_amper, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},typeof( Base.UTF8proc.isdigit),)) - precompile(Tokenize.Lexers.lex_whitespace, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.iswhitespace, (Char,)) - precompile(Tokenize.Lexers.next, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}}, Bool,)) - precompile(Tokenize.Lexers.start_token!, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},typeof( Tokenize.Lexers.iswhitespace),)) - precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},typeof( Tokenize.Lexers.is_identifier_char),)) - precompile(Tokenize.Tokens.endpos, (Tokenize.Tokens.Token,)) - precompile(Tokenize.Lexers.done, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}}, Bool,)) - precompile(Tokenize.Lexers.Type, (Type{Tokenize.Lexers.Lexer}, GenericIOBuffer{Array{UInt8, 1}},)) precompile(Tokenize.Tokens.startpos, (Tokenize.Tokens.Token,)) + precompile(Tokenize.Tokens.endpos, (Tokenize.Tokens.Token,)) precompile(Tokenize.Tokens.untokenize, (Tokenize.Tokens.Token,)) - precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},typeof( Tokenize.Lexers.ishex),)) - precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}}, Char,)) - precompile(Tokenize.Lexers._doret, (Char, Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Tokens.untokenize, (Tokenize.Tokens.RawToken,String)) + precompile(Tokenize.Tokens.untokenize, (Array{Tokenize.Tokens.Token, 1},)) + precompile(Tokenize.Tokens.untokenize, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) + + precompile(Tokenize.Lexers.is_cat_id_start, (Char, Int32,)) + precompile(Tokenize.Lexers.is_identifier_char, (Char,)) precompile(Tokenize.Lexers.is_identifier_start_char, (Char,)) + precompile(Tokenize.Lexers.peekchar, (GenericIOBuffer{Array{UInt8, 1}},)) + precompile(Tokenize.Lexers.dpeekchar, (GenericIOBuffer{Array{UInt8, 1}},)) + precompile(Tokenize.Lexers.readutf, (GenericIOBuffer{Array{UInt8, 1}},)) + precompile(Tokenize.Lexers.readutf, (GenericIOBuffer{Array{UInt8, 1}},Int)) + precompile(Tokenize.Lexers.readchar, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) + precompile(Tokenize.Lexers.readchar, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.RawToken},)) + precompile(Tokenize.Lexers.next_token, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) + + precompile(Tokenize.Lexers.ishex, (Char,)) + precompile(Tokenize.Lexers.isbinary, (Char,)) + precompile(Tokenize.Lexers.isoctal, (Char,)) + precompile(Tokenize.Lexers.iswhitespace, (Char,)) + precompile(Tokenize.Lexers.Lexer, (String,)) + precompile(Tokenize.Lexers.Lexer, (String,Type{Tokenize.Tokens.Token})) + precompile(Tokenize.Lexers.Lexer, (String,Type{Tokenize.Tokens.RawToken})) + precompile(Tokenize.Lexers.Lexer, (GenericIOBuffer{Array{UInt8, 1}},Type{Tokenize.Tokens.Token})) + precompile(Tokenize.Lexers.Lexer, (GenericIOBuffer{Array{UInt8, 1}},Type{Tokenize.Tokens.RawToken})) + precompile(Tokenize.Lexers.tokenize, (String,)) + + precompile(Tokenize.Lexers.start, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) + precompile(Tokenize.Lexers.start, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.RawToken},)) + precompile(Tokenize.Lexers.next, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, Bool,)) + precompile(Tokenize.Lexers.next, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.RawToken}, Bool,)) + precompile(Tokenize.Lexers.done, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, Bool,)) + precompile(Tokenize.Lexers.done, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.RawToken}, Bool,)) + precompile(Tokenize.Lexers.startpos, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) + precompile(Tokenize.Lexers.startpos, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.RawToken},)) + precompile(Tokenize.Lexers.startpos!, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},Int)) + precompile(Tokenize.Lexers.startpos!, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.RawToken},Int)) + precompile(Tokenize.Lexers.start_token!, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) + precompile(Tokenize.Lexers.start_token!, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.RawToken},)) + + precompile(Tokenize.Lexers.tryread, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, Tuple{Char}, Tokenize.Tokens.Kind,Char)) + precompile(Tokenize.Lexers.tryread, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, Tuple{Char,Char}, Tokenize.Tokens.Kind,Char)) + precompile(Tokenize.Lexers.tryread, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, Tuple{Char,Char,Char}, Tokenize.Tokens.Kind,Char)) + precompile(Tokenize.Lexers.tryread, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, Tuple{Char,Char,Char,Char}, Tokenize.Tokens.Kind,Char)) + precompile(Tokenize.Lexers.tryread, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, Tuple{Char,Char,Char,Char,Char}, Tokenize.Tokens.Kind,Char)) + precompile(Tokenize.Lexers.tryread, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, Tuple{Char,Char,Char,Char,Char,Char}, Tokenize.Tokens.Kind,Char)) + precompile(Tokenize.Lexers.tryread, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, Tuple{Char,Char,Char,Char,Char,Char,Char}, Tokenize.Tokens.Kind,Char)) + precompile(Tokenize.Lexers.tryread, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, Tuple{Char,Char,Char,Char,Char,Char,Char,Char}, Tokenize.Tokens.Kind,Char)) + + + precompile(Tokenize.Lexers.lex_greater, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) + precompile(Tokenize.Lexers.lex_prime, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) + precompile(Tokenize.Lexers.lex_digit, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},Tokenize.Tokens.Kind)) + precompile(Tokenize.Lexers.lex_identifier, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, Char,)) + precompile(Tokenize.Lexers.lex_less, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) + precompile(Tokenize.Lexers.lex_forwardslash, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) + precompile(Tokenize.Lexers.lex_minus, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) + precompile(Tokenize.Lexers.lex_xor, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) + precompile(Tokenize.Lexers.lex_equal, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) + precompile(Tokenize.Lexers.lex_bar, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) + precompile(Tokenize.Lexers.lex_quote, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) + precompile(Tokenize.Lexers.lex_plus, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) + precompile(Tokenize.Lexers.lex_dot, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) + precompile(Tokenize.Lexers.lex_exclaim, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) + precompile(Tokenize.Lexers.lex_colon, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) + precompile(Tokenize.Lexers.lex_percent, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) + precompile(Tokenize.Lexers.lex_comment, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) + precompile(Tokenize.Lexers.lex_comment, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},Bool)) + precompile(Tokenize.Lexers.lex_cmd, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) + precompile(Tokenize.Lexers.lex_division, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) + precompile(Tokenize.Lexers.lex_circumflex, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) + precompile(Tokenize.Lexers.lex_backslash, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) + precompile(Tokenize.Lexers.lex_star, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) + precompile(Tokenize.Lexers.lex_amper, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) + + precompile(Tokenize.Lexers.lex_whitespace, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) + precompile(Tokenize.Lexers.read_string, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, Tokenize.Tokens.Kind,)) + precompile(Tokenize.Lexers.readrest, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},Char)) + + precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, Char,)) + precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, String,)) + precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},typeof( Base.UTF8proc.isdigit),)) + precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},typeof( Tokenize.Lexers.iswhitespace),)) + precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},typeof( Tokenize.Lexers.is_identifier_char),)) + precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},typeof(Tokenize.Lexers.ishex),)) + precompile(Tokenize.Lexers.accept_batch, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, typeof(Tokenize.Lexers.iswhitespace),)) + precompile(Tokenize.Lexers.accept_batch, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, typeof(Tokenize.Lexers.isdigit),)) + precompile(Tokenize.Lexers._doret, (Char, Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) + precompile(Tokenize.Lexers._doret, (Char, Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.RawToken},)) + end From 8117dba7c820f567b80168525cc8d7c8953161e5 Mon Sep 17 00:00:00 2001 From: ZacNugent Date: Sat, 16 Sep 2017 11:28:38 +0100 Subject: [PATCH 0120/1109] Add special cstparser token kinds (JuliaLang/JuliaSyntax.jl#113) --- JuliaSyntax/src/token_kinds.jl | 36 ++++++++-------------------------- 1 file changed, 8 insertions(+), 28 deletions(-) diff --git a/JuliaSyntax/src/token_kinds.jl b/JuliaSyntax/src/token_kinds.jl index d4c0cf7107b9c..69062bb28f6e6 100644 --- a/JuliaSyntax/src/token_kinds.jl +++ b/JuliaSyntax/src/token_kinds.jl @@ -47,34 +47,14 @@ WHILE, end_keywords, - begin_invisible_keywords, - BLOCK, - CALL, - CCALL, - COMPARISON, - COMPREHENSION, - CURLY, - DICT_COMPREHENSION, - FILTER, - FLATTEN, - GENERATOR, - HCAT, - KW, - LINE, - MACROCALL, - PARAMETERS, - REF, - ROW, - TOPLEVEL, - TUPLE, - TYPED_COMPREHENSION, - TYPED_HCAT, - TYPED_VCAT, - VCAT, - VECT, - x_STR, - x_CMD, - end_invisible_keywords, + begin_cstparser, + INVISIBLE_BRACKETS, + NOTHING, + WS, + SEMICOLON_WS, + NEWLINE_WS, + EMPTY_WS, + end_cstparser, begin_literal, LITERAL, # general From b4a3671563411cf0260855713a7b9d88c40e8925 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Mon, 18 Sep 2017 13:28:16 +0200 Subject: [PATCH 0121/1109] improve benchmarking code (JuliaLang/JuliaSyntax.jl#115) --- JuliaSyntax/README.md | 2 +- JuliaSyntax/benchmark/lex_base.jl | 76 +++++++++---------------------- 2 files changed, 22 insertions(+), 56 deletions(-) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index 83654a72b0b0f..39af3694a2034 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -7,7 +7,7 @@ The goals of this package is to be -* Fast, it currently lexes all of Julia source files in ~0.7 seconds (523 files, 1.8 million Tokens) +* Fast, it currently lexes all of Julia source files in ~0.3 seconds (295 files, 1.16 million Tokens) * Round trippable, that is, from a stream of tokens the original string should be recoverable exactly. * Non error throwing. Instead of throwing errors a certain error token is returned. diff --git a/JuliaSyntax/benchmark/lex_base.jl b/JuliaSyntax/benchmark/lex_base.jl index 4c13f43e0d495..ef5f4f1041bcc 100644 --- a/JuliaSyntax/benchmark/lex_base.jl +++ b/JuliaSyntax/benchmark/lex_base.jl @@ -1,65 +1,31 @@ import Tokenize -const BASEPATH = abspath(joinpath(JULIA_HOME, "..", "..")) - -tot_files = 0 -tot_time = 0.0 -tot_tokens = 0 -tot_errors = 0 -function testall(srcdir::AbstractString) - global tot_files, tot_time, tot_tokens, tot_errors - dirs, files = [], [] - - for fname in sort(readdir(srcdir)) - path = joinpath(srcdir, fname) - if isdir(path) - push!(dirs, path) - continue - end - _, ext = splitext(fname) - if ext == ".jl" - push!(files, path) - end - end - - if !isempty(files) - for jlpath in files - - fname = splitdir(jlpath)[end] - - buf = IOBuffer() - print(buf, open(read, jlpath)) - seek(buf, 0) - tot_files += 1 - tot_time += @elapsed tokens = collect(Tokenize.tokenize(buf)) - tot_tokens += length(tokens) - - seek(buf, 0) - str = String(take!(buf)) - collect(Tokenize.tokenize(str)) - - for token in tokens - if Tokenize.Tokens.kind(token) == Tokenize.Tokens.ERROR - tot_errors += 1 +function speed_test() + tot_files = 0 + tot_tokens = 0 + tot_errors = 0 + dir = dirname(Base.find_source_file("base.jl")) + for (root, dirs, files) in walkdir(dir) + for file in files + if endswith(file, ".jl") + tot_files += 1 + file = joinpath(root, file) + str = readstring(file) + l = tokenize(str) + while !Tokenize.Lexers.eof(l) + t = Tokenize.Lexers.next_token(l) + tot_tokens += 1 + if t.kind == Tokens.ERROR + tot_errors += 1 + end end end end end - for dir in dirs - testall(dir) - end -end - - -if isdir(BASEPATH) && isdir(joinpath(BASEPATH, "base")) - testall(joinpath(BASEPATH, "examples")) - testall(joinpath(BASEPATH, "test")) - testall(joinpath(BASEPATH, "base")) -else - warn(""" -Could not find julia base sources in $BASEPATH, -perhaps you are using a Julia not built from source?""") + tot_files, tot_tokens, tot_errors end +tot_files, tot_tokens, tot_errors = speed_test() +tot_time = @belapsed speed_test() print("Lexed ", tot_files, " files in ", @sprintf("%3.4f", tot_time), " seconds with a total of ", tot_tokens, " tokens with ", tot_errors, " errors") From e7d025245ad30b8b3b3316e450b87f80b34629c8 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Mon, 18 Sep 2017 20:29:28 +0200 Subject: [PATCH 0122/1109] actually test inferability and make RawToken inferrable (JuliaLang/JuliaSyntax.jl#114) * actually test inferrability and make RawToken inferrable * Update token.jl --- JuliaSyntax/src/lexer.jl | 44 ++++++++++++++++++++------------------- JuliaSyntax/src/token.jl | 21 +++++++++++++------ JuliaSyntax/test/lexer.jl | 4 ++-- 3 files changed, 40 insertions(+), 29 deletions(-) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index 6805a9cc0e794..d2170e68c9c16 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -39,6 +39,8 @@ end Lexer(io::IO_t, T::Type{TT} = Token) where {IO_t,TT <: AbstractToken} = Lexer{IO_t,T}(io, position(io), 1, 1, position(io), 1, 1, position(io), Tokens.ERROR, IOBuffer(), ' ', false) Lexer(str::AbstractString, T::Type{TT} = Token) where TT <: AbstractToken = Lexer(IOBuffer(str), T) +@inline token_type(l::Lexer{IO_t, TT}) where {IO_t, TT} = TT + """ tokenize(x, T = Token) @@ -239,7 +241,7 @@ end function emit(l::Lexer{IO_t,RawToken}, kind::Kind, err::TokenError = Tokens.NO_ERR) where IO_t tok = RawToken(kind, (l.token_start_row, l.token_start_col), (l.current_row, l.current_col - 1), - startpos(l), position(l) - 1) + startpos(l), position(l) - 1, err) l.last_token = kind readoff(l) return tok @@ -316,24 +318,24 @@ function next_token(l::Lexer) elseif c == '&' return lex_amper(l) elseif c == '\'' - return lex_prime(l) + return lex_prime(l) elseif c == '÷' - return lex_division(l) + return lex_division(l) elseif c == '"' readon(l) return lex_quote(l); elseif c == '%' - return lex_percent(l); + return lex_percent(l); elseif c == '/' - return lex_forwardslash(l); + return lex_forwardslash(l); elseif c == '\\' - return lex_backslash(l); + return lex_backslash(l); elseif c == '.' - return lex_dot(l); + return lex_dot(l); elseif c == '+' - return lex_plus(l); + return lex_plus(l); elseif c == '-' - return lex_minus(l); + return lex_minus(l); elseif c == '`' readon(l) return lex_cmd(l); @@ -344,7 +346,7 @@ function next_token(l::Lexer) readon(l) return lex_digit(l, Tokens.INTEGER) elseif (k = get(UNICODE_OPS, c, Tokens.ERROR)) != Tokens.ERROR - return emit(l, k) + return emit(l, k) else emit_error(l) end @@ -362,7 +364,7 @@ function lex_comment(l::Lexer, doemit=true) while true pc = peekchar(l) if pc == '\n' || eof(pc) - return doemit ? emit(l, Tokens.COMMENT) : EMPTY_TOKEN + return doemit ? emit(l, Tokens.COMMENT) : EMPTY_TOKEN(token_type(l)) end readchar(l) end @@ -371,7 +373,7 @@ function lex_comment(l::Lexer, doemit=true) n_start, n_end = 1, 0 while true if eof(c) - return doemit ? emit_error(l, Tokens.EOF_MULTICOMMENT) : EMPTY_TOKEN + return doemit ? emit_error(l, Tokens.EOF_MULTICOMMENT) : EMPTY_TOKEN(token_type(l)) end nc = readchar(l) if c == '#' && nc == '=' @@ -380,7 +382,7 @@ function lex_comment(l::Lexer, doemit=true) n_end += 1 end if n_start == n_end - return doemit ? emit(l, Tokens.COMMENT) : EMPTY_TOKEN + return doemit ? emit(l, Tokens.COMMENT) : EMPTY_TOKEN(token_type(l)) end c = nc end @@ -698,18 +700,18 @@ function lex_quote(l::Lexer, doemit=true) if accept(l, '"') # "" if accept(l, '"') # """ if read_string(l, Tokens.TRIPLE_STRING) - return doemit ? emit(l, Tokens.TRIPLE_STRING) : EMPTY_TOKEN + return doemit ? emit(l, Tokens.TRIPLE_STRING) : EMPTY_TOKEN(token_type(l)) else - return doemit ? emit_error(l, Tokens.EOF_STRING) : EMPTY_TOKEN + return doemit ? emit_error(l, Tokens.EOF_STRING) : EMPTY_TOKEN(token_type(l)) end else # empty string - return doemit ? emit(l, Tokens.STRING) : EMPTY_TOKEN + return doemit ? emit(l, Tokens.STRING) : EMPTY_TOKEN(token_type(l)) end else # "?, ? != '"' if read_string(l, Tokens.STRING) - return doemit ? emit(l, Tokens.STRING) : EMPTY_TOKEN + return doemit ? emit(l, Tokens.STRING) : EMPTY_TOKEN(token_type(l)) else - return doemit ? emit_error(l, Tokens.EOF_STRING) : EMPTY_TOKEN + return doemit ? emit_error(l, Tokens.EOF_STRING) : EMPTY_TOKEN(token_type(l)) end end end @@ -823,13 +825,13 @@ function lex_cmd(l::Lexer, doemit=true) if accept(l, '`') # ``` kind = Tokens.TRIPLE_CMD else # empty cmd - return doemit ? emit(l, Tokens.CMD) : EMPTY_TOKEN + return doemit ? emit(l, Tokens.CMD) : EMPTY_TOKEN(token_type(l)) end end while true c = readchar(l) - eof(c) && return (doemit ? emit_error(l, Tokens.EOF_CMD) : EMPTY_TOKEN) - string_terminated(l, c, kind) && return (doemit ? emit(l, kind) : EMPTY_TOKEN) + eof(c) && return (doemit ? emit_error(l, Tokens.EOF_CMD) : EMPTY_TOKEN(token_type(l))) + string_terminated(l, c, kind) && return (doemit ? emit(l, kind) : EMPTY_TOKEN(token_type(l))) end end diff --git a/JuliaSyntax/src/token.jl b/JuliaSyntax/src/token.jl index c61ca4d03fb05..f20737156ec1f 100644 --- a/JuliaSyntax/src/token.jl +++ b/JuliaSyntax/src/token.jl @@ -54,6 +54,11 @@ struct Token <: AbstractToken val::String # The actual string of the token token_error::TokenError end +function Token(kind::Kind, startposition::Tuple{Int, Int}, endposition::Tuple{Int, Int}, + startbyte::Int, endbyte::Int, val::String) +Token(kind, startposition, endposition, startbyte, endbyte, val, NO_ERR) +end +Token() = Token(ERROR, (0,0), (0,0), 0, 0, "", UNKNOWN) struct RawToken <: AbstractToken kind::Kind @@ -62,15 +67,19 @@ struct RawToken <: AbstractToken endpos::Tuple{Int, Int} startbyte::Int # The byte where the token start in the buffer endbyte::Int # The byte where the token ended in the buffer + token_error::TokenError end - -function Token(kind::Kind, startposition::Tuple{Int, Int}, endposition::Tuple{Int, Int}, - startbyte::Int, endbyte::Int, val::String) - Token(kind, startposition, endposition, startbyte, endbyte, val, NO_ERR) +function RawToken(kind::Kind, startposition::Tuple{Int, Int}, endposition::Tuple{Int, Int}, + startbyte::Int, endbyte::Int) +RawToken(kind, startposition, endposition, startbyte, endbyte, NO_ERR) end -Token() = Token(ERROR, (0,0), (0,0), 0, 0, "", UNKNOWN) +RawToken() = RawToken(ERROR, (0,0), (0,0), 0, 0, UNKNOWN) + -const EMPTY_TOKEN = Token() +const _EMPTY_TOKEN = Token() +const _EMPTY_RAWTOKEN = RawToken() +EMPTY_TOKEN(::Type{Token}) = _EMPTY_TOKEN +EMPTY_TOKEN(::Type{RawToken}) = _EMPTY_RAWTOKEN function kind(t::AbstractToken) isoperator(t.kind) && return OP diff --git a/JuliaSyntax/test/lexer.jl b/JuliaSyntax/test/lexer.jl index 518b8f652efbf..0e2122eb027bd 100644 --- a/JuliaSyntax/test/lexer.jl +++ b/JuliaSyntax/test/lexer.jl @@ -322,9 +322,9 @@ end @testset "inferred" begin l = tokenize("abc") - @test Base.Test.@inferred Tokenize.Lexers.next_token(l).kind == T.IDENTIFIER + @inferred Tokenize.Lexers.next_token(l) l = tokenize("abc", Tokens.RawToken) - @test Base.Test.@inferred typeof(Tokenize.Lexers.next_token(l)) == Tokens.RawToken + @inferred Tokenize.Lexers.next_token(l) end @testset "modifying function names (!) followed by operator" begin From c35a2d441b0cb8c154b18cc80d95903c4a19e3c3 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Mon, 18 Sep 2017 22:42:58 +0200 Subject: [PATCH 0123/1109] inline some functions (JuliaLang/JuliaSyntax.jl#116) --- JuliaSyntax/benchmark/lex_base.jl | 14 +++++++++----- JuliaSyntax/src/lexer.jl | 28 ++++++++++++++-------------- 2 files changed, 23 insertions(+), 19 deletions(-) diff --git a/JuliaSyntax/benchmark/lex_base.jl b/JuliaSyntax/benchmark/lex_base.jl index ef5f4f1041bcc..32191814b4da1 100644 --- a/JuliaSyntax/benchmark/lex_base.jl +++ b/JuliaSyntax/benchmark/lex_base.jl @@ -1,6 +1,7 @@ import Tokenize +using BenchmarkTools -function speed_test() +function speed_test(::Type{T}=Tokenize.Tokens.Token) where T <: Tokenize.Tokens.AbstractToken tot_files = 0 tot_tokens = 0 tot_errors = 0 @@ -11,7 +12,7 @@ function speed_test() tot_files += 1 file = joinpath(root, file) str = readstring(file) - l = tokenize(str) + l = tokenize(str, T) while !Tokenize.Lexers.eof(l) t = Tokenize.Lexers.next_token(l) tot_tokens += 1 @@ -26,6 +27,9 @@ function speed_test() end tot_files, tot_tokens, tot_errors = speed_test() -tot_time = @belapsed speed_test() -print("Lexed ", tot_files, " files in ", @sprintf("%3.4f", tot_time), - " seconds with a total of ", tot_tokens, " tokens with ", tot_errors, " errors") +tot_time_token = @belapsed speed_test() +tot_time_rawtoken = @belapsed speed_test(Tokenize.Tokens.RawToken) +println("Lexed ", tot_files, " files, with a total of ", tot_tokens, + " tokens with ", tot_errors, " errors") +println("Time Token: ", @sprintf("%3.4f", tot_time_token), " seconds") +println("Time RawToken: ", @sprintf("%3.4f", tot_time_rawtoken), " seconds") \ No newline at end of file diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index d2170e68c9c16..ed50657ba963c 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -13,10 +13,10 @@ import ..Tokens: FUNCTION, ABSTRACT, IDENTIFIER, BAREMODULE, BEGIN, BITSTYPE, BR export tokenize -ishex(c::Char) = isdigit(c) || ('a' <= c <= 'f') || ('A' <= c <= 'F') -isbinary(c::Char) = c == '0' || c == '1' -isoctal(c::Char) = '0' ≤ c ≤ '7' -iswhitespace(c::Char) = Base.UTF8proc.isspace(c) +@inline ishex(c::Char) = isdigit(c) || ('a' <= c <= 'f') || ('A' <= c <= 'F') +@inline isbinary(c::Char) = c == '0' || c == '1' +@inline isoctal(c::Char) = '0' ≤ c ≤ '7' +@inline iswhitespace(c::Char) = Base.UTF8proc.isspace(c) mutable struct Lexer{IO_t <: IO, T <: AbstractToken} io::IO_t @@ -45,7 +45,7 @@ Lexer(str::AbstractString, T::Type{TT} = Token) where TT <: AbstractToken = Lexe tokenize(x, T = Token) Returns an `Iterable` containing the tokenized input. Can be reverted by e.g. -`join(untokenize.(tokenize(x)))`. Setting `T` chooses the type of token +`join(untokenize.(tokenize(x)))`. Setting `T` chooses the type of token produced by the lexer (`Token` or `RawToken`). """ tokenize(x, ::Type{Token}) = Lexer(x, Token) @@ -158,7 +158,7 @@ function readchar(l::Lexer{I}) where {I <: IO} if l.doread write(l.charstore, l.current_char) end - if l.current_char == '\n' + if l.current_char == '\n' l.current_row += 1 l.current_col = 1 elseif !eof(l.current_char) @@ -190,7 +190,7 @@ Consumes the next character `c` if either `f::Function(c)` returns true, `c == f for `c::Char` or `c in f` otherwise. Returns `true` if a character has been consumed and `false` otherwise. """ -function accept(l::Lexer, f::Union{Function, Char, Vector{Char}, String}) +@inline function accept(l::Lexer, f::Union{Function, Char, Vector{Char}, String}) c = peekchar(l) if isa(f, Function) ok = f(c) @@ -208,7 +208,7 @@ end Consumes all following characters until `accept(l, f)` is `false`. """ -function accept_batch(l::Lexer, f) +@inline function accept_batch(l::Lexer, f) ok = false while accept(l, f) ok = true @@ -265,7 +265,7 @@ Returns the next `Token`. function next_token(l::Lexer) start_token!(l) c = readchar(l) - if eof(c); + if eof(c); return emit(l, Tokens.ENDMARKER) elseif iswhitespace(c) readon(l) @@ -347,7 +347,7 @@ function next_token(l::Lexer) return lex_digit(l, Tokens.INTEGER) elseif (k = get(UNICODE_OPS, c, Tokens.ERROR)) != Tokens.ERROR return emit(l, k) - else + else emit_error(l) end end @@ -569,7 +569,7 @@ function lex_digit(l::Lexer, kind) accept_number(l, isdigit) pc,ppc = dpeekchar(l) if pc == '.' - if ppc == '.' + if ppc == '.' return emit(l, kind) elseif (!(isdigit(ppc) || iswhitespace(ppc) || @@ -589,11 +589,11 @@ function lex_digit(l::Lexer, kind) || ppc == '?' || eof(ppc))) kind = Tokens.INTEGER - + return emit(l, kind) end readchar(l) - + kind = Tokens.FLOAT accept_number(l, isdigit) pc, ppc = dpeekchar(l) @@ -612,7 +612,7 @@ function lex_digit(l::Lexer, kind) readchar(l) return emit_error(l) end - + elseif (pc == 'e' || pc == 'E' || pc == 'f') && (isdigit(ppc) || ppc == '+' || ppc == '-') kind = Tokens.FLOAT readchar(l) From be69865e978586c3ea35782519a7e0cee3d1e717 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Wed, 20 Sep 2017 21:33:13 +0200 Subject: [PATCH 0124/1109] use using --- JuliaSyntax/benchmark/lex_base.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/JuliaSyntax/benchmark/lex_base.jl b/JuliaSyntax/benchmark/lex_base.jl index 32191814b4da1..1cc30b650e0ee 100644 --- a/JuliaSyntax/benchmark/lex_base.jl +++ b/JuliaSyntax/benchmark/lex_base.jl @@ -1,4 +1,4 @@ -import Tokenize +using Tokenize using BenchmarkTools function speed_test(::Type{T}=Tokenize.Tokens.Token) where T <: Tokenize.Tokens.AbstractToken @@ -32,4 +32,4 @@ tot_time_rawtoken = @belapsed speed_test(Tokenize.Tokens.RawToken) println("Lexed ", tot_files, " files, with a total of ", tot_tokens, " tokens with ", tot_errors, " errors") println("Time Token: ", @sprintf("%3.4f", tot_time_token), " seconds") -println("Time RawToken: ", @sprintf("%3.4f", tot_time_rawtoken), " seconds") \ No newline at end of file +println("Time RawToken: ", @sprintf("%3.4f", tot_time_rawtoken), " seconds") From 5bd25286e6a1e40f099292aa48e0b2fb6541be8d Mon Sep 17 00:00:00 2001 From: ZacNugent Date: Sun, 8 Oct 2017 14:01:39 +0100 Subject: [PATCH 0125/1109] return error on invalid binary/octal/hex intege (JuliaLang/JuliaSyntax.jl#118) * return error on invalid binary/octal/hex intege * add tests, fix typo * fix for hex-floats --- JuliaSyntax/src/lexer.jl | 3 +++ JuliaSyntax/test/lexer.jl | 9 +++++++++ 2 files changed, 12 insertions(+) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index ed50657ba963c..2f4eb55447d44 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -628,6 +628,7 @@ function lex_digit(l::Lexer, kind) kind == Tokens.INTEGER if pc == 'x' readchar(l) + !(ishex(ppc) || ppc =='.') && return emit_error(l) accept_number(l, ishex) if accept(l, '.') accept_number(l, ishex) @@ -638,9 +639,11 @@ function lex_digit(l::Lexer, kind) accept_number(l, isdigit) end elseif pc == 'b' + !isbinary(ppc) && return emit_error(l) readchar(l) accept_number(l, isbinary) elseif pc == 'o' + !isoctal(ppc) && return emit_error(l) readchar(l) accept_number(l, isoctal) end diff --git a/JuliaSyntax/test/lexer.jl b/JuliaSyntax/test/lexer.jl index 0e2122eb027bd..62f44675a4e06 100644 --- a/JuliaSyntax/test/lexer.jl +++ b/JuliaSyntax/test/lexer.jl @@ -461,3 +461,12 @@ end @test length(collect(tokenize("`\$(#=inline ) comment=#``)`"))) == 3 @test length(collect(tokenize("`\$(\"inline ) string\"*string(``))`"))) == 3 end + + +@testset "hex/bin/octal errors" begin +@test tok("0x").kind == T.ERROR +@test tok("0b").kind == T.ERROR +@test tok("0o").kind == T.ERROR +@test tok("0x 2", 1).kind == T.ERROR +@test tok("0x.1p1").kind == T.FLOAT +end From 4ae32f8dc626935dd68c71c68e59097d92ce372a Mon Sep 17 00:00:00 2001 From: ZacNugent Date: Tue, 13 Feb 2018 10:36:30 +0000 Subject: [PATCH 0126/1109] misc v0.7 fixes (JuliaLang/JuliaSyntax.jl#127) * misc v0.7 fixes * bump ver * fixes * fix EOF_CHAR * revert EOF change, rm dep warning * Change pairarrow precedence * small 0.7 fixes --- JuliaSyntax/.travis.yml | 5 +-- JuliaSyntax/REQUIRE | 2 +- JuliaSyntax/appveyor.yml | 2 - JuliaSyntax/benchmark/lex_base.jl | 2 +- JuliaSyntax/src/_precompile.jl | 16 +++---- JuliaSyntax/src/lexer.jl | 6 +-- JuliaSyntax/src/token_kinds.jl | 5 ++- JuliaSyntax/src/utilities.jl | 71 +++++++++++++++++++++---------- JuliaSyntax/test/lex_yourself.jl | 2 +- JuliaSyntax/test/lexer.jl | 2 +- JuliaSyntax/test/runtests.jl | 2 +- 11 files changed, 69 insertions(+), 46 deletions(-) diff --git a/JuliaSyntax/.travis.yml b/JuliaSyntax/.travis.yml index 841602557235f..f3283dcb2979c 100644 --- a/JuliaSyntax/.travis.yml +++ b/JuliaSyntax/.travis.yml @@ -4,7 +4,6 @@ os: - linux - osx julia: - - 0.6 - nightly matrix: allow_failures: @@ -14,8 +13,8 @@ notifications: script: - if [[ -a .git/shallow ]]; then git fetch --unshallow; fi - julia -e 'Pkg.clone(pwd()); Pkg.build("Tokenize"); Pkg.test("Tokenize"; coverage=true)' - - julia -e 'Pkg.clone("https://github.com/ZacLN/CSTParser.jl"); Pkg.test("CSTParser")' - - julia -e 'Pkg.clone("https://github.com/KristofferC/OhMyREPL.jl"); Pkg.test("OhMyREPL")' + # - julia -e 'Pkg.clone("https://github.com/ZacLN/CSTParser.jl"); Pkg.test("CSTParser")' + # - julia -e 'Pkg.clone("https://github.com/KristofferC/OhMyREPL.jl"); Pkg.test("OhMyREPL")' after_success: # push coverage results to Coveralls - julia -e 'cd(Pkg.dir("Tokenize")); Pkg.add("Coverage"); using Coverage; Coveralls.submit(Coveralls.process_folder())' diff --git a/JuliaSyntax/REQUIRE b/JuliaSyntax/REQUIRE index 137767a42af4a..4aa321c1e26fe 100644 --- a/JuliaSyntax/REQUIRE +++ b/JuliaSyntax/REQUIRE @@ -1 +1 @@ -julia 0.6 +julia 0.7- diff --git a/JuliaSyntax/appveyor.yml b/JuliaSyntax/appveyor.yml index 7a4151d2228af..cd987b7986bb9 100644 --- a/JuliaSyntax/appveyor.yml +++ b/JuliaSyntax/appveyor.yml @@ -1,7 +1,5 @@ environment: matrix: - - JULIA_URL: "https://julialang-s3.julialang.org/bin/winnt/x86/0.6/julia-0.6-latest-win32.exe" - - JULIA_URL: "https://julialang-s3.julialang.org/bin/winnt/x64/0.6/julia-0.6-latest-win64.exe" - JULIA_URL: "https://julialangnightlies-s3.julialang.org/bin/winnt/x86/julia-latest-win32.exe" - JULIA_URL: "https://julialangnightlies-s3.julialang.org/bin/winnt/x64/julia-latest-win64.exe" diff --git a/JuliaSyntax/benchmark/lex_base.jl b/JuliaSyntax/benchmark/lex_base.jl index 1cc30b650e0ee..7b0b66263b2ef 100644 --- a/JuliaSyntax/benchmark/lex_base.jl +++ b/JuliaSyntax/benchmark/lex_base.jl @@ -11,7 +11,7 @@ function speed_test(::Type{T}=Tokenize.Tokens.Token) where T <: Tokenize.Tokens. if endswith(file, ".jl") tot_files += 1 file = joinpath(root, file) - str = readstring(file) + str = read(file, String)::String l = tokenize(str, T) while !Tokenize.Lexers.eof(l) t = Tokenize.Lexers.next_token(l) diff --git a/JuliaSyntax/src/_precompile.jl b/JuliaSyntax/src/_precompile.jl index 458c26a434bd7..d07e29300b259 100644 --- a/JuliaSyntax/src/_precompile.jl +++ b/JuliaSyntax/src/_precompile.jl @@ -1,4 +1,4 @@ -isdefined(Base, :GenericIOBuffer) ? (import Base.GenericIOBuffer) : (GenericIOBuffer{T} = Base.AbstractIOBuffer{T}) +import Base: GenericIOBuffer function _precompile_() ccall(:jl_generating_output, Cint, ()) == 1 || return nothing @@ -20,8 +20,6 @@ function _precompile_() precompile(Tokenize.Lexers.is_identifier_start_char, (Char,)) precompile(Tokenize.Lexers.peekchar, (GenericIOBuffer{Array{UInt8, 1}},)) precompile(Tokenize.Lexers.dpeekchar, (GenericIOBuffer{Array{UInt8, 1}},)) - precompile(Tokenize.Lexers.readutf, (GenericIOBuffer{Array{UInt8, 1}},)) - precompile(Tokenize.Lexers.readutf, (GenericIOBuffer{Array{UInt8, 1}},Int)) precompile(Tokenize.Lexers.readchar, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) precompile(Tokenize.Lexers.readchar, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.RawToken},)) precompile(Tokenize.Lexers.next_token, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) @@ -49,7 +47,7 @@ function _precompile_() precompile(Tokenize.Lexers.startpos!, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.RawToken},Int)) precompile(Tokenize.Lexers.start_token!, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) precompile(Tokenize.Lexers.start_token!, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.RawToken},)) - + precompile(Tokenize.Lexers.tryread, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, Tuple{Char}, Tokenize.Tokens.Kind,Char)) precompile(Tokenize.Lexers.tryread, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, Tuple{Char,Char}, Tokenize.Tokens.Kind,Char)) precompile(Tokenize.Lexers.tryread, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, Tuple{Char,Char,Char}, Tokenize.Tokens.Kind,Char)) @@ -58,7 +56,7 @@ function _precompile_() precompile(Tokenize.Lexers.tryread, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, Tuple{Char,Char,Char,Char,Char,Char}, Tokenize.Tokens.Kind,Char)) precompile(Tokenize.Lexers.tryread, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, Tuple{Char,Char,Char,Char,Char,Char,Char}, Tokenize.Tokens.Kind,Char)) precompile(Tokenize.Lexers.tryread, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, Tuple{Char,Char,Char,Char,Char,Char,Char,Char}, Tokenize.Tokens.Kind,Char)) - + precompile(Tokenize.Lexers.lex_greater, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) precompile(Tokenize.Lexers.lex_prime, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) @@ -84,14 +82,14 @@ function _precompile_() precompile(Tokenize.Lexers.lex_backslash, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) precompile(Tokenize.Lexers.lex_star, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) precompile(Tokenize.Lexers.lex_amper, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) - + precompile(Tokenize.Lexers.lex_whitespace, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) precompile(Tokenize.Lexers.read_string, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, Tokenize.Tokens.Kind,)) precompile(Tokenize.Lexers.readrest, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},Char)) - + precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, Char,)) precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, String,)) - precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},typeof( Base.UTF8proc.isdigit),)) + precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},typeof( Base.isdigit),)) precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},typeof( Tokenize.Lexers.iswhitespace),)) precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},typeof( Tokenize.Lexers.is_identifier_char),)) precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},typeof(Tokenize.Lexers.ishex),)) @@ -99,5 +97,5 @@ function _precompile_() precompile(Tokenize.Lexers.accept_batch, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, typeof(Tokenize.Lexers.isdigit),)) precompile(Tokenize.Lexers._doret, (Char, Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) precompile(Tokenize.Lexers._doret, (Char, Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.RawToken},)) - + end diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index 2f4eb55447d44..970a94814408d 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -16,7 +16,7 @@ export tokenize @inline ishex(c::Char) = isdigit(c) || ('a' <= c <= 'f') || ('A' <= c <= 'F') @inline isbinary(c::Char) = c == '0' || c == '1' @inline isoctal(c::Char) = '0' ≤ c ≤ '7' -@inline iswhitespace(c::Char) = Base.UTF8proc.isspace(c) +@inline iswhitespace(c::Char) = Base.isspace(c) mutable struct Lexer{IO_t <: IO, T <: AbstractToken} io::IO_t @@ -53,8 +53,8 @@ tokenize(x, ::Type{RawToken}) = Lexer(x, RawToken) tokenize(x) = Lexer(x, Token) # Iterator interface -Base.iteratorsize(::Type{Lexer{IO_t,T}}) where {IO_t,T} = Base.SizeUnknown() -Base.iteratoreltype(::Type{Lexer{IO_t,T}}) where {IO_t,T} = Base.HasEltype() +Base.IteratorSize(::Type{Lexer{IO_t,T}}) where {IO_t,T} = Base.SizeUnknown() +Base.IteratorEltype(::Type{Lexer{IO_t,T}}) where {IO_t,T} = Base.HasEltype() Base.eltype(::Type{Lexer{IO_t,T}}) where {IO_t,T} = T diff --git a/JuliaSyntax/src/token_kinds.jl b/JuliaSyntax/src/token_kinds.jl index 69062bb28f6e6..45be42fbe0052 100644 --- a/JuliaSyntax/src/token_kinds.jl +++ b/JuliaSyntax/src/token_kinds.jl @@ -99,12 +99,15 @@ BACKSLASH_EQ, # \= AND_EQ, # &= COLON_EQ, # := - PAIR_ARROW, # => APPROX, # ~ EX_OR_EQ, # $= XOR_EQ, # ⊻= end_assignments, + begin_pairarrow, + PAIR_ARROW, # => + end_pairarrow, + # Level 2 begin_conditional, CONDITIONAL, # ? diff --git a/JuliaSyntax/src/utilities.jl b/JuliaSyntax/src/utilities.jl index c88617e193803..d5e19f431f559 100644 --- a/JuliaSyntax/src/utilities.jl +++ b/JuliaSyntax/src/utilities.jl @@ -26,17 +26,42 @@ The JuliaParser.jl package is licensed under the MIT "Expat" License: > SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. =# -import Base.UTF8proc +import Base.Unicode + + +@inline function utf8_trailing(i) + if i < 193 + return 0 + elseif i < 225 + return 1 + elseif i < 241 + return 2 + elseif i < 249 + return 3 + elseif i < 253 + return 4 + else + return 5 + end +end + +const utf8_offset = [0x00000000 + 0x00003080 + 0x000e2080 + 0x03c82080 + 0xfa082080 + 0x82082080] +# const EOF_CHAR = convert(Char,typemax(UInt32)) +const EOF_CHAR = typemax(Char) -const EOF_CHAR = convert(Char,typemax(UInt32)) function is_cat_id_start(ch::Char, cat::Integer) c = UInt32(ch) - return (cat == UTF8proc.UTF8PROC_CATEGORY_LU || cat == UTF8proc.UTF8PROC_CATEGORY_LL || - cat == UTF8proc.UTF8PROC_CATEGORY_LT || cat == UTF8proc.UTF8PROC_CATEGORY_LM || - cat == UTF8proc.UTF8PROC_CATEGORY_LO || cat == UTF8proc.UTF8PROC_CATEGORY_NL || - cat == UTF8proc.UTF8PROC_CATEGORY_SC || # allow currency symbols - cat == UTF8proc.UTF8PROC_CATEGORY_SO || # other symbols + return (cat == Unicode.UTF8PROC_CATEGORY_LU || cat == Unicode.UTF8PROC_CATEGORY_LL || + cat == Unicode.UTF8PROC_CATEGORY_LT || cat == Unicode.UTF8PROC_CATEGORY_LM || + cat == Unicode.UTF8PROC_CATEGORY_LO || cat == Unicode.UTF8PROC_CATEGORY_NL || + cat == Unicode.UTF8PROC_CATEGORY_SC || # allow currency symbols + cat == Unicode.UTF8PROC_CATEGORY_SO || # other symbols # math symbol (category Sm) whitelist (c >= 0x2140 && c <= 0x2a1c && @@ -85,6 +110,7 @@ function is_cat_id_start(ch::Char, cat::Integer) end function is_identifier_char(c::Char) + c == EOF_CHAR && return false if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '_' || (c >= '0' && c <= '9') || c == '!') @@ -92,12 +118,12 @@ function is_identifier_char(c::Char) elseif (UInt32(c) < 0xA1 || UInt32(c) > 0x10ffff) return false end - cat = UTF8proc.category_code(c) + cat = Unicode.category_code(c) is_cat_id_start(c, cat) && return true - if cat == UTF8proc.UTF8PROC_CATEGORY_MN || cat == UTF8proc.UTF8PROC_CATEGORY_MC || - cat == UTF8proc.UTF8PROC_CATEGORY_ND || cat == UTF8proc.UTF8PROC_CATEGORY_PC || - cat == UTF8proc.UTF8PROC_CATEGORY_SK || cat == UTF8proc.UTF8PROC_CATEGORY_ME || - cat == UTF8proc.UTF8PROC_CATEGORY_NO || + if cat == Unicode.UTF8PROC_CATEGORY_MN || cat == Unicode.UTF8PROC_CATEGORY_MC || + cat == Unicode.UTF8PROC_CATEGORY_ND || cat == Unicode.UTF8PROC_CATEGORY_PC || + cat == Unicode.UTF8PROC_CATEGORY_SK || cat == Unicode.UTF8PROC_CATEGORY_ME || + cat == Unicode.UTF8PROC_CATEGORY_NO || (0x2032 <= UInt32(c) <= 0x2034) || # primes UInt32(c) == 0x0387 || UInt32(c) == 0x19da || (0x1369 <= UInt32(c) <= 0x1371) @@ -107,18 +133,18 @@ function is_identifier_char(c::Char) end function is_identifier_start_char(c::Char) + c == EOF_CHAR && return false if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '_') return true elseif (UInt32(c) < 0xA1 || UInt32(c) > 0x10ffff) return false end - cat = UTF8proc.category_code(c) + cat = Unicode.category_code(c) return is_cat_id_start(c, cat) end -function peekchar(io::(isdefined(Base, :GenericIOBuffer) ? - Base.GenericIOBuffer : Base.AbstractIOBuffer)) +function peekchar(io::Base.GenericIOBuffer) if !io.readable || io.ptr > io.size return EOF_CHAR end @@ -131,7 +157,7 @@ function readutf(io, offset = 0) if ch < 0x80 return convert(Char, ch), 0 end - trailing = Base.utf8_trailing[ch + 1] + trailing = utf8_trailing(ch + 1) c::UInt32 = 0 for j = 1:trailing c += ch @@ -139,7 +165,7 @@ function readutf(io, offset = 0) ch = convert(UInt8, io.data[io.ptr + j + offset]) end c += ch - c -= Base.utf8_offset[trailing + 1] + c -= utf8_offset[trailing + 1] return convert(Char, c), trailing end @@ -149,23 +175,22 @@ function dpeekchar(io::IOBuffer) end ch1, trailing = readutf(io) offset = trailing + 1 - + if io.ptr + offset > io.size return ch1, EOF_CHAR end ch2, _ = readutf(io, offset) - + return ch1, ch2 end # this implementation is copied from Base -const _CHTMP = Vector{Char}(1) - peekchar(s::IOStream) = begin - if ccall(:ios_peekutf8, Int32, (Ptr{Void}, Ptr{Char}), s, _CHTMP) < 0 + _CHTMP = Ref{Char}() + if ccall(:ios_peekutf8, Int32, (Ptr{Nothing}, Ptr{Char}), s, _CHTMP) < 0 return EOF_CHAR end - return _CHTMP[1] + return _CHTMP[] end eof(io::IO) = Base.eof(io) diff --git a/JuliaSyntax/test/lex_yourself.jl b/JuliaSyntax/test/lex_yourself.jl index 732354a837681..73e99d73cdc83 100644 --- a/JuliaSyntax/test/lex_yourself.jl +++ b/JuliaSyntax/test/lex_yourself.jl @@ -1,6 +1,6 @@ @testset "lex yourself" begin -const PKGPATH = joinpath(dirname(@__FILE__), "..") +PKGPATH = joinpath(dirname(@__FILE__), "..") global tot_files = 0 global tot_time = 0.0 diff --git a/JuliaSyntax/test/lexer.jl b/JuliaSyntax/test/lexer.jl index 62f44675a4e06..6e8362848b647 100644 --- a/JuliaSyntax/test/lexer.jl +++ b/JuliaSyntax/test/lexer.jl @@ -1,6 +1,6 @@ using Tokenize using Tokenize.Lexers -using Base.Test +using Test const T = Tokenize.Tokens diff --git a/JuliaSyntax/test/runtests.jl b/JuliaSyntax/test/runtests.jl index 032d254435e22..77fc777d005a0 100644 --- a/JuliaSyntax/test/runtests.jl +++ b/JuliaSyntax/test/runtests.jl @@ -1,4 +1,4 @@ -using Base.Test +using Test, Printf import Tokenize From 89d8472002dbb2e7380968fe9bf478813bb42365 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Tue, 13 Feb 2018 14:44:41 +0100 Subject: [PATCH 0127/1109] add missing methods (JuliaLang/JuliaSyntax.jl#122) --- JuliaSyntax/src/token.jl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/JuliaSyntax/src/token.jl b/JuliaSyntax/src/token.jl index f20737156ec1f..6905ae6407155 100644 --- a/JuliaSyntax/src/token.jl +++ b/JuliaSyntax/src/token.jl @@ -89,6 +89,8 @@ end exactkind(t::AbstractToken) = t.kind startpos(t::AbstractToken) = t.startpos endpos(t::AbstractToken) = t.endpos +startbyte(t::AbstractToken) = t.startbyte +endbyte(t::AbstractToken) = t.endbyte function untokenize(t::Token) if t.kind == IDENTIFIER || isliteral(t.kind) || t.kind == COMMENT || t.kind == WHITESPACE || t.kind == ERROR return t.val From 2f8b31f6a67c49abd9360c0152531b797b907ec5 Mon Sep 17 00:00:00 2001 From: ZacNugent Date: Thu, 14 Jun 2018 19:30:07 +0100 Subject: [PATCH 0128/1109] v0.7 compatability (JuliaLang/JuliaSyntax.jl#129) * misc v0.7 fixes * bump ver * fixes * fix EOF_CHAR * revert EOF change, rm dep warning * Change pairarrow precedence * small 0.7 fixes * dot/suffix ops * eof fix * bin/hex/oct ints * add perp * add unary op dot/suffix tests * some comments * rem codegen * add pair_arrow * fix --- JuliaSyntax/src/lexer.jl | 117 +++++++++++++++++++++++-- JuliaSyntax/src/token.jl | 17 ++-- JuliaSyntax/src/token_kinds.jl | 5 ++ JuliaSyntax/src/utilities.jl | 151 ++++++++++++++++++++++++++++++++- JuliaSyntax/test/lexer.jl | 68 +++++++++++++-- 5 files changed, 338 insertions(+), 20 deletions(-) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index 970a94814408d..4b7b14688332d 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -34,9 +34,10 @@ mutable struct Lexer{IO_t <: IO, T <: AbstractToken} charstore::IOBuffer current_char::Char doread::Bool + dotop::Bool end -Lexer(io::IO_t, T::Type{TT} = Token) where {IO_t,TT <: AbstractToken} = Lexer{IO_t,T}(io, position(io), 1, 1, position(io), 1, 1, position(io), Tokens.ERROR, IOBuffer(), ' ', false) +Lexer(io::IO_t, T::Type{TT} = Token) where {IO_t,TT <: AbstractToken} = Lexer{IO_t,T}(io, position(io), 1, 1, position(io), 1, 1, position(io), Tokens.ERROR, IOBuffer(), ' ', false, false) Lexer(str::AbstractString, T::Type{TT} = Token) where TT <: AbstractToken = Lexer(IOBuffer(str), T) @inline token_type(l::Lexer{IO_t, TT}) where {IO_t, TT} = TT @@ -226,22 +227,49 @@ function emit(l::Lexer{IO_t,Token}, kind::Kind, err::TokenError = Tokens.NO_ERR) str = String(take!(l.charstore)) elseif kind == Tokens.ERROR str = String(l.io.data[(l.token_startpos + 1):position(l.io)]) + elseif optakessuffix(kind) + str = "" + while isopsuffix(peekchar(l)) + str = string(str, readchar(l)) + end else str = "" end - tok = Token(kind, (l.token_start_row, l.token_start_col), + if l.dotop + tok = Token(kind, (l.token_start_row, l.token_start_col-1), + (l.current_row, l.current_col - 1), + startpos(l)-1, position(l) - 1, + str, err, true) + l.dotop = false + else + tok = Token(kind, (l.token_start_row, l.token_start_col), (l.current_row, l.current_col - 1), startpos(l), position(l) - 1, - str, err) + str, err,false) + end l.last_token = kind readoff(l) return tok end function emit(l::Lexer{IO_t,RawToken}, kind::Kind, err::TokenError = Tokens.NO_ERR) where IO_t - tok = RawToken(kind, (l.token_start_row, l.token_start_col), + if optakessuffix(kind) + while isopsuffix(peekchar(l)) + readchar(l) + end + end + + if l.dotop + tok = RawToken(kind, (l.token_start_row, l.token_start_col), (l.current_row, l.current_col - 1), - startpos(l), position(l) - 1, err) + startpos(l), position(l) - 1, err, true) + l.dotop = false + else + tok = RawToken(kind, (l.token_start_row, l.token_start_col), + (l.current_row, l.current_col - 1), + startpos(l), position(l) - 1, err, false) + end + l.last_token = kind readoff(l) return tok @@ -627,6 +655,7 @@ function lex_digit(l::Lexer, kind) elseif position(l) - startpos(l) == 1 && l.current_char == '0' kind == Tokens.INTEGER if pc == 'x' + kind = Tokens.HEX_INT readchar(l) !(ishex(ppc) || ppc =='.') && return emit_error(l) accept_number(l, ishex) @@ -642,10 +671,12 @@ function lex_digit(l::Lexer, kind) !isbinary(ppc) && return emit_error(l) readchar(l) accept_number(l, isbinary) + kind = Tokens.BIN_INT elseif pc == 'o' !isoctal(ppc) && return emit_error(l) readchar(l) accept_number(l, isoctal) + kind = Tokens.OCT_INT end end return emit(l, kind) @@ -816,7 +847,81 @@ function lex_dot(l::Lexer) readon(l) return lex_digit(l, Tokens.FLOAT) else - return emit(l, Tokens.DOT) + pc, dpc = dpeekchar(l) + if dotop1(pc) + l.dotop = true + return next_token(l) + elseif pc =='+' + l.dotop = true + readchar(l) + return lex_plus(l) + elseif pc =='-' + l.dotop = true + readchar(l) + return lex_minus(l) + elseif pc =='*' + l.dotop = true + readchar(l) + return lex_star(l) + elseif pc =='/' + l.dotop = true + readchar(l) + return lex_forwardslash(l) + elseif pc =='\\' + l.dotop = true + readchar(l) + return lex_backslash(l) + elseif pc =='^' + l.dotop = true + readchar(l) + return lex_circumflex(l) + elseif pc =='<' + l.dotop = true + readchar(l) + return lex_less(l) + elseif pc =='>' + l.dotop = true + readchar(l) + return lex_greater(l) + elseif pc =='&' + l.dotop = true + readchar(l) + if accept(l, "=") + return emit(l, Tokens.AND_EQ) + else + return emit(l, Tokens.AND) + end + elseif pc =='%' + l.dotop = true + readchar(l) + return lex_percent(l) + elseif pc == '=' && dpc != '>' + l.dotop = true + readchar(l) + return lex_equal(l) + elseif pc == '|' && dpc != '|' + l.dotop = true + readchar(l) + return lex_bar(l) + elseif pc == '!' && dpc == '=' + l.dotop = true + readchar(l) + return lex_exclaim(l) + elseif pc == '⊻' + l.dotop = true + readchar(l) + return lex_xor(l) + elseif pc == '÷' + l.dotop = true + readchar(l) + return lex_division(l) + elseif pc == '=' && dpc == '>' + l.dotop = true + readchar(l) + return lex_equal(l) + else + return emit(l, Tokens.DOT) + end end end diff --git a/JuliaSyntax/src/token.jl b/JuliaSyntax/src/token.jl index 6905ae6407155..ae42a589113bd 100644 --- a/JuliaSyntax/src/token.jl +++ b/JuliaSyntax/src/token.jl @@ -53,12 +53,13 @@ struct Token <: AbstractToken endbyte::Int # The byte where the token ended in the buffer val::String # The actual string of the token token_error::TokenError + dotop::Bool end function Token(kind::Kind, startposition::Tuple{Int, Int}, endposition::Tuple{Int, Int}, startbyte::Int, endbyte::Int, val::String) -Token(kind, startposition, endposition, startbyte, endbyte, val, NO_ERR) +Token(kind, startposition, endposition, startbyte, endbyte, val, NO_ERR, false) end -Token() = Token(ERROR, (0,0), (0,0), 0, 0, "", UNKNOWN) +Token() = Token(ERROR, (0,0), (0,0), 0, 0, "", UNKNOWN, false) struct RawToken <: AbstractToken kind::Kind @@ -68,12 +69,13 @@ struct RawToken <: AbstractToken startbyte::Int # The byte where the token start in the buffer endbyte::Int # The byte where the token ended in the buffer token_error::TokenError + dotop::Bool end function RawToken(kind::Kind, startposition::Tuple{Int, Int}, endposition::Tuple{Int, Int}, startbyte::Int, endbyte::Int) -RawToken(kind, startposition, endposition, startbyte, endbyte, NO_ERR) +RawToken(kind, startposition, endposition, startbyte, endbyte, NO_ERR, false) end -RawToken() = RawToken(ERROR, (0,0), (0,0), 0, 0, UNKNOWN) +RawToken() = RawToken(ERROR, (0,0), (0,0), 0, 0, UNKNOWN, false) const _EMPTY_TOKEN = Token() @@ -97,7 +99,12 @@ function untokenize(t::Token) elseif iskeyword(t.kind) return lowercase(string(t.kind)) elseif isoperator(t.kind) - return string(UNICODE_OPS_REVERSE[t.kind]) + if t.dotop + str = string(".", UNICODE_OPS_REVERSE[t.kind]) + else + str = string(UNICODE_OPS_REVERSE[t.kind]) + end + return string(str, t.val) elseif t.kind == LPAREN return "(" elseif t.kind == LSQUARE diff --git a/JuliaSyntax/src/token_kinds.jl b/JuliaSyntax/src/token_kinds.jl index 45be42fbe0052..3378c2dbb93cf 100644 --- a/JuliaSyntax/src/token_kinds.jl +++ b/JuliaSyntax/src/token_kinds.jl @@ -59,6 +59,9 @@ begin_literal, LITERAL, # general INTEGER, # 4 + BIN_INT, # 0b1 + HEX_INT, # 0x0 + OCT_INT, # 0o0 FLOAT, # 3.5, 3.7e+3 STRING, # "foo" TRIPLE_STRING, # """ foo \n """ @@ -532,6 +535,7 @@ DOUBLE_LINE_SLANTED_GREATER_THAN_OR_EQUAL_TO, # ⫺ RIGHT_TACK, # ⊢ LEFT_TACK, # ⊣ + PERP, # ⟂ end_comparison, # Level 7 @@ -1160,6 +1164,7 @@ const UNICODE_OPS = Dict{Char, Kind}( '⫺' => DOUBLE_LINE_SLANTED_GREATER_THAN_OR_EQUAL_TO, '⊢' => RIGHT_TACK, '⊣' => LEFT_TACK, +'⟂' => PERP, '⊕' => CIRCLED_PLUS, '⊖' => CIRCLED_MINUS, '⊞' => SQUARED_PLUS, diff --git a/JuliaSyntax/src/utilities.jl b/JuliaSyntax/src/utilities.jl index d5e19f431f559..bfe14a9991953 100644 --- a/JuliaSyntax/src/utilities.jl +++ b/JuliaSyntax/src/utilities.jl @@ -82,7 +82,7 @@ function is_cat_id_start(ch::Char, cat::Integer) (c >= 0x266f && (c == 0x266f || c == 0x27d8 || c == 0x27d9 || # ♯, ⟘, ⟙ - (c >= 0x27c0 && c <= 0x27c2) || # ⟀, ⟁, ⟂ + (c >= 0x27c0 && c <= 0x27c1) || # ⟀, ⟁ (c >= 0x29b0 && c <= 0x29b4) || # ⦰, ⦱, ⦲, ⦳, ⦴ (c >= 0x2a00 && c <= 0x2a06) || # ⨀, ⨁, ⨂, ⨃, ⨄, ⨅, ⨆ (c >= 0x2a09 && c <= 0x2a16) || # ⨉, ⨊, ⨋, ⨌, ⨍, ⨎, ⨏, ⨐, ⨑, ⨒, @@ -198,3 +198,152 @@ eof(c::Char) = c === EOF_CHAR readchar(io::IO) = eof(io) ? EOF_CHAR : read(io, Char) takechar(io::IO) = (readchar(io); io) + +# Checks whether a Char is an operator, which can not be juxtaposed with another +# Char to be an operator (i.e <=), and can be prefixed by a dot (.) +# magic number list created by filtering ops by those that successfully parse +# `a .(op) b` or `.(op)a` and where `length(string(op)) == 1` +@inline function dotop1(c1::Char) + c1 == EOF_CHAR && return false + c = UInt32(c1) + c == 0x00000021 || + c == 0x0000002e || + c == 0x0000007e || + c == 0x000000ac || + c == 0x000000b1 || + c == 0x000000d7 || + c == 0x0000214b || + 0x00002190 <= c <= 0x00002194 || + 0x0000219a <= c <= 0x0000219b || + c == 0x000021a0 || + c == 0x000021a3 || + c == 0x000021a6 || + c == 0x000021ae || + 0x000021ce <= c <= 0x000021cf || + c == 0x000021d2 || + c == 0x000021d4 || + 0x000021f4 <= c <= 0x000021ff || + 0x00002208 <= c <= 0x0000220d || + 0x00002213 <= c <= 0x00002214 || + 0x00002217 <= c <= 0x00002219 || + 0x0000221a <= c <= 0x0000221d || + 0x00002224 <= c <= 0x0000222a || + 0x00002237 <= c <= 0x00002238 || + 0x0000223a <= c <= 0x0000223b || + 0x0000223d <= c <= 0x0000223e || + 0x00002240 <= c <= 0x0000228b || + 0x0000228d <= c <= 0x0000229c || + 0x0000229e <= c <= 0x000022a3 || + c == 0x000022a9 || + c == 0x000022ac || + c == 0x000022ae || + 0x000022b0 <= c <= 0x000022b7 || + 0x000022bc <= c <= 0x000022bd || + 0x000022c4 <= c <= 0x000022c7 || + 0x000022c9 <= c <= 0x000022d3 || + 0x000022d5 <= c <= 0x000022ed || + 0x000022f2 <= c <= 0x000022ff || + c == 0x000025b7 || + c == 0x000027c2 || + 0x000027c8 <= c <= 0x000027c9 || + 0x000027d1 <= c <= 0x000027d2 || + 0x000027d5 <= c <= 0x000027d7 || + 0x000027f0 <= c <= 0x000027f1 || + 0x000027f5 <= c <= 0x000027f7 || + 0x000027f9 <= c <= 0x000027ff || + 0x00002900 <= c <= 0x00002918 || + 0x0000291d <= c <= 0x00002920 || + 0x00002944 <= c <= 0x00002970 || + 0x000029b7 <= c <= 0x000029b8 || + c == 0x000029bc || + 0x000029be <= c <= 0x000029c1 || + c == 0x000029e1 || + 0x000029e3 <= c <= 0x000029e5 || + c == 0x000029f4 || + 0x000029f6 <= c <= 0x000029f7 || + 0x000029fa <= c <= 0x000029fb || + 0x00002a07 <= c <= 0x00002a08 || + c == 0x00002a1d || + 0x00002a22 <= c <= 0x00002a2e || + 0x00002a30 <= c <= 0x00002a3d || + 0x00002a40 <= c <= 0x00002a45 || + 0x00002a4a <= c <= 0x00002a58 || + 0x00002a5a <= c <= 0x00002a63 || + 0x00002a66 <= c <= 0x00002a67 || + 0x00002a6a <= c <= 0x00002ad9 || + c == 0x00002adb || + 0x00002af7 <= c <= 0x00002afa || + 0x00002b30 <= c <= 0x00002b44 || + 0x00002b47 <= c <= 0x00002b4c || + 0x0000ffe9 <= c <= 0x0000ffec +end + +# suffix operators +# "₀₁₂₃₄₅₆₇₈₉₊₋₌₍₎²³¹ʰʲʳʷʸˡˢˣᴬᴮᴰᴱᴳᴴᴵᴶᴷᴸᴹᴺᴼᴾᴿᵀᵁᵂᵃᵇᵈᵉᵍᵏᵐᵒᵖᵗᵘᵛᵝᵞᵟᵠᵡᵢᵣᵤᵥᵦᵧᵨᵩᵪᶜᶠᶥᶦᶫᶰᶸᶻᶿ ⁰ⁱ⁴⁵⁶⁷⁸⁹⁺⁻⁼⁽⁾ⁿₐₑₒₓₕₖₗₘₙₚₛₜⱼⱽ′″‴‵‶‷⁗" +@inline function isopsuffix(c1::Char) + c1 == EOF_CHAR && return false + c = UInt32(c1) + 0x000000b2 <= c <= 0x000000b3 || + c == 0x000000b9 || + c == 0x000002b0 || + 0x000002b2 <= c <= 0x000002b3 || + 0x000002b7 <= c <= 0x000002b8 || + 0x000002e1 <= c <= 0x000002e3 || + c == 0x00001d2c || + c == 0x00001d2e || + 0x00001d30 <= c <= 0x00001d31 || + 0x00001d33 <= c <= 0x00001d3a || + c == 0x00001d3c || + 0x00001d3e <= c <= 0x00001d43 || + 0x00001d47 <= c <= 0x00001d49 || + c == 0x00001d4d || + 0x00001d4f <= c <= 0x00001d50 || + c == 0x00001d52 || + 0x00001d56 <= c <= 0x00001d58 || + c == 0x00001d5b || + 0x00001d5d <= c <= 0x00001d6a || + c == 0x00001d9c || + c == 0x00001da0 || + 0x00001da5 <= c <= 0x00001da6 || + c == 0x00001dab || + c == 0x00001db0 || + c == 0x00001db8 || + c == 0x00001dbb || + c == 0x00001dbf || + c == 0x00002009 || + 0x00002032 <= c <= 0x00002037 || + c == 0x00002057 || + 0x00002070 <= c <= 0x00002071 || + 0x00002074 <= c <= 0x0000208e || + 0x00002090 <= c <= 0x00002093 || + 0x00002095 <= c <= 0x0000209c || + 0x00002c7c <= c <= 0x00002c7d +end + + +function optakessuffix(k) + (Tokens.begin_ops < k < Tokens.end_ops) && + !(k == Tokens.DDDOT || + Tokens.EQ <= k <= k == Tokens.XOR_EQ || + k == Tokens.CONDITIONAL || + k == Tokens.RIGHT_ARROW || + k == Tokens.LAZY_OR || + k == Tokens.LAZY_AND || + k == Tokens.ISSUBTYPE || + k == Tokens.ISSUPERTYPE || + k == Tokens.IN || + k == Tokens.ISA || + k == Tokens.COLON_EQUALS || + k == Tokens.DOUBLE_COLON_EQUAL || + k == Tokens.COLON || + k == Tokens.DDOT || + k == Tokens.EX_OR || + k == Tokens.DECLARATION || + k == Tokens.WHERE || + k == Tokens.DOT || + k == Tokens.NOT || + k == Tokens.TRANSPOSE || + k == Tokens.ANON_FUNC || + Tokens.NOT_SIGN <= k <= Tokens.QUAD_ROOT + ) +end diff --git a/JuliaSyntax/test/lexer.jl b/JuliaSyntax/test/lexer.jl index 6e8362848b647..7d751f8a451e3 100644 --- a/JuliaSyntax/test/lexer.jl +++ b/JuliaSyntax/test/lexer.jl @@ -296,7 +296,7 @@ end end @testset "lex binary" begin - @test tok("0b0101").kind==T.INTEGER + @test tok("0b0101").kind==T.BIN_INT end @testset "show" begin @@ -366,7 +366,7 @@ end @testset "lex octal" begin - @test tok("0o0167").kind == T.INTEGER + @test tok("0o0167").kind == T.OCT_INT end @testset "lex float/bin/hex/oct w underscores" begin @@ -375,21 +375,21 @@ end @test tok("1_1.1_1").kind == T.FLOAT @test tok("_1.1_1", 1).kind == T.IDENTIFIER @test tok("_1.1_1", 2).kind == T.FLOAT - @test tok("0x0167_032").kind == T.INTEGER - @test tok("0b0101001_0100_0101").kind == T.INTEGER - @test tok("0o01054001_0100_0101").kind == T.INTEGER + @test tok("0x0167_032").kind == T.HEX_INT + @test tok("0b0101001_0100_0101").kind == T.BIN_INT + @test tok("0o01054001_0100_0101").kind == T.OCT_INT @test T.kind.(collect(tokenize("1.2."))) == [T.ERROR, T.ENDMARKER] @test tok("1__2").kind == T.INTEGER @test tok("1.2_3").kind == T.FLOAT @test tok("1.2_3", 2).kind == T.ENDMARKER @test T.kind.(collect(tokenize("3e2_2"))) == [T.FLOAT, T.IDENTIFIER, T.ENDMARKER] @test T.kind.(collect(tokenize("1__2"))) == [T.INTEGER, T.IDENTIFIER, T.ENDMARKER] - @test T.kind.(collect(tokenize("0x2_0_2"))) == [T.INTEGER, T.ENDMARKER] - @test T.kind.(collect(tokenize("0x2__2"))) == [T.INTEGER, T.IDENTIFIER, T.ENDMARKER] + @test T.kind.(collect(tokenize("0x2_0_2"))) == [T.HEX_INT, T.ENDMARKER] + @test T.kind.(collect(tokenize("0x2__2"))) == [T.HEX_INT, T.IDENTIFIER, T.ENDMARKER] @test T.kind.(collect(tokenize("3_2.5_2"))) == [T.FLOAT, T.ENDMARKER] @test T.kind.(collect(tokenize("3.2e2.2"))) == [T.ERROR, T.INTEGER, T.ENDMARKER] @test T.kind.(collect(tokenize("3e2.2"))) == [T.ERROR, T.INTEGER, T.ENDMARKER] - @test T.kind.(collect(tokenize("0b101__101"))) == [T.INTEGER, T.IDENTIFIER, T.ENDMARKER] + @test T.kind.(collect(tokenize("0b101__101"))) == [T.BIN_INT, T.IDENTIFIER, T.ENDMARKER] end @testset "floating points" begin @@ -470,3 +470,55 @@ end @test tok("0x 2", 1).kind == T.ERROR @test tok("0x.1p1").kind == T.FLOAT end + + +@testset "dotted and suffixed operators" begin +ops = collect(values(Main.Tokenize.Tokens.UNICODE_OPS_REVERSE)) + +for op in ops + op in (:isa, :in, :where, Symbol('\''), :?, :(:)) && continue + str1 = "$(op)b" + str2 = ".$(op)b" + str3 = "a $op b" + str4 = "a .$op b" + str5 = "a $(op)₁ b" + str6 = "a .$(op)₁ b" + ex1 = Meta.parse(str1, raise = false) + ex2 = Meta.parse(str2, raise = false) + ex3 = Meta.parse(str3, raise = false) + ex4 = Meta.parse(str4, raise = false) + ex5 = Meta.parse(str5, raise = false) + ex6 = Meta.parse(str6, raise = false) + if ex1.head != :error # unary + t1 = collect(tokenize(str1)) + exop1 = ex1.head == :call ? ex1.args[1] : ex1.head + @test Symbol(Tokenize.Tokens.untokenize(t1[1])) == exop1 + if ex2.head != :error + t2 = collect(tokenize(str2)) + exop2 = ex2.head == :call ? ex2.args[1] : ex2.head + @test Symbol(Tokenize.Tokens.untokenize(t2[1])) == exop2 + end + elseif ex3.head != :error # binary + t3 = collect(tokenize(str3)) + exop3 = ex3.head == :call ? ex3.args[1] : ex3.head + @test Symbol(Tokenize.Tokens.untokenize(t3[3])) == exop3 + if ex4.head != :error + t4 = collect(tokenize(str4)) + exop4 = ex4.head == :call ? ex4.args[1] : ex4.head + @test Symbol(Tokenize.Tokens.untokenize(t4[3])) == exop4 + elseif ex5.head != :error + t5 = collect(tokenize(str5)) + exop5 = ex5.head == :call ? ex5.args[1] : ex5.head + @test Symbol(Tokenize.Tokens.untokenize(t5[3])) == exop5 + elseif ex6.head != :error + t6 = collect(tokenize(str6)) + exop6 = ex6.head == :call ? ex6.args[1] : ex6.head + @test Symbol(Tokenize.Tokens.untokenize(t6[3])) == exop6 + end + end +end +end + +@testset "perp" begin + @test tok("1 ⟂ 2", 3).kind==T.PERP +end From f6d04d4ad2f953bec9a5af9c100691e4d9945846 Mon Sep 17 00:00:00 2001 From: ZacNugent Date: Tue, 19 Jun 2018 21:50:10 +0100 Subject: [PATCH 0129/1109] add outer kw (JuliaLang/JuliaSyntax.jl#130) --- JuliaSyntax/src/lexer.jl | 4 +++- JuliaSyntax/src/token_kinds.jl | 1 + JuliaSyntax/test/lexer.jl | 4 ++++ 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index 4b7b14688332d..09313e3c0d781 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -7,7 +7,7 @@ import ..Tokens: AbstractToken, Token, RawToken, Kind, TokenError, UNICODE_OPS, import ..Tokens: FUNCTION, ABSTRACT, IDENTIFIER, BAREMODULE, BEGIN, BITSTYPE, BREAK, CATCH, CONST, CONTINUE, DO, ELSE, ELSEIF, END, EXPORT, FALSE, FINALLY, FOR, FUNCTION, GLOBAL, LET, LOCAL, IF, IMMUTABLE, - IMPORT, IMPORTALL, MACRO, MODULE, QUOTE, RETURN, TRUE, TRY, TYPE, TYPEALIAS, USING, WHILE, ISA, IN, + IMPORT, IMPORTALL, MACRO, MODULE, OUTER, QUOTE, RETURN, TRUE, TRY, TYPE, TYPEALIAS, USING, WHILE, ISA, IN, MUTABLE, PRIMITIVE, STRUCT, WHERE @@ -1172,6 +1172,8 @@ function lex_identifier(l, c) else return _doret(l, c) end + elseif c == 'o' + return tryread(l, ('u', 't', 'e', 'r'), OUTER, c) elseif c == 'p' return tryread(l, ('r', 'i', 'm', 'i', 't', 'i', 'v', 'e'), PRIMITIVE, c) elseif c == 'q' diff --git a/JuliaSyntax/src/token_kinds.jl b/JuliaSyntax/src/token_kinds.jl index 3378c2dbb93cf..d7573bc8554ce 100644 --- a/JuliaSyntax/src/token_kinds.jl +++ b/JuliaSyntax/src/token_kinds.jl @@ -36,6 +36,7 @@ MACRO, MODULE, MUTABLE, + OUTER, PRIMITIVE, QUOTE, RETURN, diff --git a/JuliaSyntax/test/lexer.jl b/JuliaSyntax/test/lexer.jl index 7d751f8a451e3..626bbeaa63967 100644 --- a/JuliaSyntax/test/lexer.jl +++ b/JuliaSyntax/test/lexer.jl @@ -522,3 +522,7 @@ end @testset "perp" begin @test tok("1 ⟂ 2", 3).kind==T.PERP end + +@testset "outer" begin + @test tok("outer", 1).kind==T.OUTER +end From 254f9aa3f59825330e5d0d90b2b6c635d63c87a0 Mon Sep 17 00:00:00 2001 From: Sebastian Pfitzner Date: Tue, 7 Aug 2018 22:14:32 +0100 Subject: [PATCH 0130/1109] Transition to new iteration protocol (JuliaLang/JuliaSyntax.jl#131) * 1.0 compat * update REQUIRE and appveyor.yml * remove commented out `Base.done` overload --- JuliaSyntax/REQUIRE | 2 +- JuliaSyntax/appveyor.yml | 31 +++++++++++++++++-------------- JuliaSyntax/src/Tokenize.jl | 2 -- JuliaSyntax/src/_precompile.jl | 12 ++++++------ JuliaSyntax/src/lexer.jl | 12 ++++++------ 5 files changed, 30 insertions(+), 29 deletions(-) diff --git a/JuliaSyntax/REQUIRE b/JuliaSyntax/REQUIRE index 4aa321c1e26fe..d8e28292cbd3f 100644 --- a/JuliaSyntax/REQUIRE +++ b/JuliaSyntax/REQUIRE @@ -1 +1 @@ -julia 0.7- +julia 0.7-beta2 diff --git a/JuliaSyntax/appveyor.yml b/JuliaSyntax/appveyor.yml index cd987b7986bb9..831beb78f8fa9 100644 --- a/JuliaSyntax/appveyor.yml +++ b/JuliaSyntax/appveyor.yml @@ -1,7 +1,17 @@ environment: matrix: - - JULIA_URL: "https://julialangnightlies-s3.julialang.org/bin/winnt/x86/julia-latest-win32.exe" - - JULIA_URL: "https://julialangnightlies-s3.julialang.org/bin/winnt/x64/julia-latest-win64.exe" + - julia_version: 0.7 + - julia_version: latest + +platform: + - x86 # 32-bit + - x64 # 64-bit + +## uncomment the following lines to allow failures on nightly julia +## (tests will run but not make your overall status red) +#matrix: +# allow_failures: +# - julia_version: latest branches: only: @@ -15,19 +25,12 @@ notifications: on_build_status_changed: false install: - - ps: "[System.Net.ServicePointManager]::SecurityProtocol = [System.Net.SecurityProtocolType]::Tls12" -# Download most recent Julia Windows binary - - ps: (new-object net.webclient).DownloadFile( - $env:JULIA_URL, - "C:\projects\julia-binary.exe") -# Run installer silently, output to C:\projects\julia - - C:\projects\julia-binary.exe /S /D=C:\projects\julia + - ps: iex ((new-object net.webclient).DownloadString("https://raw.githubusercontent.com/JuliaCI/Appveyor.jl/version-1/bin/install.ps1")) build_script: -# Need to convert from shallow to complete for Pkg.clone to work - - IF EXIST .git\shallow (git fetch --unshallow) - - C:\projects\julia\bin\julia -e "versioninfo(); - Pkg.clone(pwd(), \"Tokenize\"); Pkg.build(\"Tokenize\")" + - echo "%JL_BUILD_SCRIPT%" + - C:\julia\bin\julia -e "%JL_BUILD_SCRIPT%" test_script: - - C:\projects\julia\bin\julia -e "Pkg.test(\"Tokenize\")" + - echo "%JL_TEST_SCRIPT%" + - C:\julia\bin\julia -e "%JL_TEST_SCRIPT%" diff --git a/JuliaSyntax/src/Tokenize.jl b/JuliaSyntax/src/Tokenize.jl index fca523a90a01b..8e7f0b4ba6d32 100644 --- a/JuliaSyntax/src/Tokenize.jl +++ b/JuliaSyntax/src/Tokenize.jl @@ -1,5 +1,3 @@ -__precompile__() - module Tokenize include("token.jl") diff --git a/JuliaSyntax/src/_precompile.jl b/JuliaSyntax/src/_precompile.jl index d07e29300b259..6de323a2ef639 100644 --- a/JuliaSyntax/src/_precompile.jl +++ b/JuliaSyntax/src/_precompile.jl @@ -35,12 +35,12 @@ function _precompile_() precompile(Tokenize.Lexers.Lexer, (GenericIOBuffer{Array{UInt8, 1}},Type{Tokenize.Tokens.RawToken})) precompile(Tokenize.Lexers.tokenize, (String,)) - precompile(Tokenize.Lexers.start, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) - precompile(Tokenize.Lexers.start, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.RawToken},)) - precompile(Tokenize.Lexers.next, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, Bool,)) - precompile(Tokenize.Lexers.next, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.RawToken}, Bool,)) - precompile(Tokenize.Lexers.done, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, Bool,)) - precompile(Tokenize.Lexers.done, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.RawToken}, Bool,)) + precompile(Tokenize.Lexers.iterate, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) + precompile(Tokenize.Lexers.iterate, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.RawToken},)) + precompile(Tokenize.Lexers.iterate, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, Bool,)) + precompile(Tokenize.Lexers.iterate, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.RawToken}, Bool,)) + precompile(Tokenize.Lexers.iterate, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, Bool,)) + precompile(Tokenize.Lexers.iterate, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.RawToken}, Bool,)) precompile(Tokenize.Lexers.startpos, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) precompile(Tokenize.Lexers.startpos, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.RawToken},)) precompile(Tokenize.Lexers.startpos!, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},Int)) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index 09313e3c0d781..848119ca7fec9 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -59,7 +59,7 @@ Base.IteratorEltype(::Type{Lexer{IO_t,T}}) where {IO_t,T} = Base.HasEltype() Base.eltype(::Type{Lexer{IO_t,T}}) where {IO_t,T} = T -function Base.start(l::Lexer) +function Base.iterate(l::Lexer) seekstart(l) l.token_startpos = position(l) l.token_start_row = 1 @@ -68,16 +68,16 @@ function Base.start(l::Lexer) l.current_row = 1 l.current_col = 1 l.current_pos = l.io_startpos - false + t = next_token(l) + return t, t.kind == Tokens.ENDMARKER end -function Base.next(l::Lexer, ::Any) +function Base.iterate(l::Lexer, isdone::Any) + isdone && return nothing t = next_token(l) return t, t.kind == Tokens.ENDMARKER end -Base.done(::Lexer, isdone) = isdone - function Base.show(io::IO, l::Lexer) print(io, typeof(l), " at position: ", position(l)) end @@ -883,7 +883,7 @@ function lex_dot(l::Lexer) l.dotop = true readchar(l) return lex_greater(l) - elseif pc =='&' + elseif pc =='&' l.dotop = true readchar(l) if accept(l, "=") From e8b4c5c6ac509e7ea275718f02fcf4064c43d172 Mon Sep 17 00:00:00 2001 From: ZacNugent Date: Mon, 15 Oct 2018 14:09:02 +0100 Subject: [PATCH 0131/1109] remove old keywords (JuliaLang/JuliaSyntax.jl#133) --- JuliaSyntax/src/lexer.jl | 17 ++++------------- JuliaSyntax/src/token_kinds.jl | 3 --- JuliaSyntax/test/lexer.jl | 3 --- 3 files changed, 4 insertions(+), 19 deletions(-) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index 848119ca7fec9..f5c930797f7e1 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -5,9 +5,9 @@ include("utilities.jl") import ..Tokens import ..Tokens: AbstractToken, Token, RawToken, Kind, TokenError, UNICODE_OPS, EMPTY_TOKEN, isliteral -import ..Tokens: FUNCTION, ABSTRACT, IDENTIFIER, BAREMODULE, BEGIN, BITSTYPE, BREAK, CATCH, CONST, CONTINUE, - DO, ELSE, ELSEIF, END, EXPORT, FALSE, FINALLY, FOR, FUNCTION, GLOBAL, LET, LOCAL, IF, IMMUTABLE, - IMPORT, IMPORTALL, MACRO, MODULE, OUTER, QUOTE, RETURN, TRUE, TRY, TYPE, TYPEALIAS, USING, WHILE, ISA, IN, +import ..Tokens: FUNCTION, ABSTRACT, IDENTIFIER, BAREMODULE, BEGIN, BREAK, CATCH, CONST, CONTINUE, + DO, ELSE, ELSEIF, END, EXPORT, FALSE, FINALLY, FOR, FUNCTION, GLOBAL, LET, LOCAL, IF, + IMPORT, IMPORTALL, MACRO, MODULE, OUTER, QUOTE, RETURN, TRUE, TRY, TYPE, USING, WHILE, ISA, IN, MUTABLE, PRIMITIVE, STRUCT, WHERE @@ -993,9 +993,6 @@ function lex_identifier(l, c) elseif c == 'e' c = readchar(l) return tryread(l, ('g', 'i', 'n'), BEGIN, c) - elseif c == 'i' - c = readchar(l) - return tryread(l, ('t', 's', 't', 'y', 'p', 'e'), BITSTYPE, c) elseif c == 'r' c = readchar(l) return tryread(l, ('e', 'a', 'k'), BREAK, c) @@ -1098,10 +1095,7 @@ function lex_identifier(l, c) elseif c == 'm' readchar(l) c = peekchar(l) - if c == 'm' - readchar(l) - return tryread(l, ('u', 't', 'a', 'b', 'l', 'e'), IMMUTABLE, c) - elseif c == 'p' + if c == 'p' readchar(l) c = peekchar(l) if c == 'o' @@ -1213,9 +1207,6 @@ function lex_identifier(l, c) c = peekchar(l) if !is_identifier_char(c) return emit(l, TYPE) - elseif c == 'a' - c = readchar(l) - return tryread(l, ('l', 'i', 'a', 's'), TYPEALIAS, c) else c = readchar(l) return _doret(l, c) diff --git a/JuliaSyntax/src/token_kinds.jl b/JuliaSyntax/src/token_kinds.jl index d7573bc8554ce..5fb5002387042 100644 --- a/JuliaSyntax/src/token_kinds.jl +++ b/JuliaSyntax/src/token_kinds.jl @@ -13,7 +13,6 @@ ABSTRACT, BAREMODULE, BEGIN, - BITSTYPE, BREAK, CATCH, CONST, @@ -28,7 +27,6 @@ FUNCTION, GLOBAL, IF, - IMMUTABLE, IMPORT, IMPORTALL, LET, @@ -43,7 +41,6 @@ STRUCT, TRY, TYPE, - TYPEALIAS, USING, WHILE, end_keywords, diff --git a/JuliaSyntax/test/lexer.jl b/JuliaSyntax/test/lexer.jl index 626bbeaa63967..fd8b72c92581b 100644 --- a/JuliaSyntax/test/lexer.jl +++ b/JuliaSyntax/test/lexer.jl @@ -239,7 +239,6 @@ end "abstract", "baremodule", "begin", - "bitstype", "break", "catch", "const", @@ -257,7 +256,6 @@ end "let", "local", "if", - "immutable", "import", "importall", "macro", @@ -270,7 +268,6 @@ end #"true", "try", "type", - "typealias", "using", "while"] From 034240d85230fefb89ef2e2cab424b920b7cc686 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Thu, 1 Nov 2018 15:19:29 -0400 Subject: [PATCH 0132/1109] base.jl doesnt exist anymore --- JuliaSyntax/benchmark/lex_base.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaSyntax/benchmark/lex_base.jl b/JuliaSyntax/benchmark/lex_base.jl index 7b0b66263b2ef..1dc2f17aa841e 100644 --- a/JuliaSyntax/benchmark/lex_base.jl +++ b/JuliaSyntax/benchmark/lex_base.jl @@ -5,7 +5,7 @@ function speed_test(::Type{T}=Tokenize.Tokens.Token) where T <: Tokenize.Tokens. tot_files = 0 tot_tokens = 0 tot_errors = 0 - dir = dirname(Base.find_source_file("base.jl")) + dir = dirname(Base.find_source_file("int.jl")) for (root, dirs, files) in walkdir(dir) for file in files if endswith(file, ".jl") From c36fd1e505087b19e86b3dc3f3fbd9a7f4138a42 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Thu, 1 Nov 2018 15:26:16 -0400 Subject: [PATCH 0133/1109] fix using Printf --- JuliaSyntax/benchmark/lex_base.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/JuliaSyntax/benchmark/lex_base.jl b/JuliaSyntax/benchmark/lex_base.jl index 1dc2f17aa841e..2ffedc541a9cf 100644 --- a/JuliaSyntax/benchmark/lex_base.jl +++ b/JuliaSyntax/benchmark/lex_base.jl @@ -1,5 +1,6 @@ using Tokenize using BenchmarkTools +using Printf function speed_test(::Type{T}=Tokenize.Tokens.Token) where T <: Tokenize.Tokens.AbstractToken tot_files = 0 From 367af9e4ed42444e853af3d42682bb77daa94a49 Mon Sep 17 00:00:00 2001 From: ZacNugent Date: Sun, 6 Jan 2019 17:35:18 +0000 Subject: [PATCH 0134/1109] update cmd lexing (JuliaLang/JuliaSyntax.jl#138) --- JuliaSyntax/src/lexer.jl | 23 +++++++++++++---------- JuliaSyntax/test/lexer.jl | 6 +++--- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index f5c930797f7e1..1d7e36d711168 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -926,20 +926,23 @@ function lex_dot(l::Lexer) end # A ` has been consumed -# N.B.: cmds do not currently have special parser interpolation support function lex_cmd(l::Lexer, doemit=true) - kind = Tokens.CMD - if accept(l, '`') # `` - if accept(l, '`') # ``` - kind = Tokens.TRIPLE_CMD + if accept(l, '`') # + if accept(l, '`') # """ + if read_string(l, Tokens.TRIPLE_CMD) + return doemit ? emit(l, Tokens.TRIPLE_CMD) : EMPTY_TOKEN(token_type(l)) + else + return doemit ? emit_error(l, Tokens.EOF_CMD) : EMPTY_TOKEN(token_type(l)) + end else # empty cmd return doemit ? emit(l, Tokens.CMD) : EMPTY_TOKEN(token_type(l)) end - end - while true - c = readchar(l) - eof(c) && return (doemit ? emit_error(l, Tokens.EOF_CMD) : EMPTY_TOKEN(token_type(l))) - string_terminated(l, c, kind) && return (doemit ? emit(l, kind) : EMPTY_TOKEN(token_type(l))) + else + if read_string(l, Tokens.CMD) + return doemit ? emit(l, Tokens.CMD) : EMPTY_TOKEN(token_type(l)) + else + return doemit ? emit_error(l, Tokens.EOF_CMD) : EMPTY_TOKEN(token_type(l)) + end end end diff --git a/JuliaSyntax/test/lexer.jl b/JuliaSyntax/test/lexer.jl index fd8b72c92581b..2d1a3ad383b38 100644 --- a/JuliaSyntax/test/lexer.jl +++ b/JuliaSyntax/test/lexer.jl @@ -454,9 +454,9 @@ end @test length(collect(tokenize("\"\$(string(`inline ')' cmd`)\"\")\""))) == 2 # These would require special interpolation support in the parse (Base issue #3150). # If that gets implemented, thses should all be adjust to `== 2` - @test length(collect(tokenize("`\$((``))`"))) == 3 - @test length(collect(tokenize("`\$(#=inline ) comment=#``)`"))) == 3 - @test length(collect(tokenize("`\$(\"inline ) string\"*string(``))`"))) == 3 + @test length(collect(tokenize("`\$((``))`"))) == 2 + @test length(collect(tokenize("`\$(#=inline ) comment=#``)`"))) == 2 + @test length(collect(tokenize("`\$(\"inline ) string\"*string(``))`"))) == 2 end From 8b919c2e30b058f8c4c39e84cebc6e5221bcee0d Mon Sep 17 00:00:00 2001 From: ZacNugent Date: Fri, 8 Feb 2019 13:03:38 +0530 Subject: [PATCH 0135/1109] don't peek Chars (JuliaLang/JuliaSyntax.jl#137) --- JuliaSyntax/src/lexer.jl | 93 +++++++++++++++++++++++---------------- JuliaSyntax/test/lexer.jl | 4 +- 2 files changed, 57 insertions(+), 40 deletions(-) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index 1d7e36d711168..e65489f0cd78e 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -32,12 +32,31 @@ mutable struct Lexer{IO_t <: IO, T <: AbstractToken} last_token::Tokens.Kind charstore::IOBuffer - current_char::Char + chars::Tuple{Char,Char,Char} + charspos::Tuple{Int,Int,Int} doread::Bool dotop::Bool end -Lexer(io::IO_t, T::Type{TT} = Token) where {IO_t,TT <: AbstractToken} = Lexer{IO_t,T}(io, position(io), 1, 1, position(io), 1, 1, position(io), Tokens.ERROR, IOBuffer(), ' ', false, false) +function Lexer(io::IO_t, T::Type{TT} = Token) where {IO_t,TT <: AbstractToken} + c1 = ' ' + p1 = position(io) + if eof(io) + c2, p2 = EOF_CHAR, p1 + c3, p3 = EOF_CHAR, p1 + else + c2 = read(io, Char) + p2 = position(io) + if eof(io) + c3, p3 = EOF_CHAR, p1 + else + c3 = read(io, Char) + p3 = position(io) + end + + end + Lexer{IO_t,T}(io, position(io), 1, 1, position(io), 1, 1, position(io), Tokens.ERROR, IOBuffer(), (c1,c2,c3), (p1,p2,p3), false, false) +end Lexer(str::AbstractString, T::Type{TT} = Token) where TT <: AbstractToken = Lexer(IOBuffer(str), T) @inline token_type(l::Lexer{IO_t, TT}) where {IO_t, TT} = TT @@ -110,27 +129,27 @@ seek2startpos!(l::Lexer) = seek(l, startpos(l)) Returns the next character without changing the lexer's state. """ -peekchar(l::Lexer) = peekchar(l.io) +peekchar(l::Lexer) = l.chars[2] """ dpeekchar(l::Lexer) Returns the next two characters without changing the lexer's state. """ -dpeekchar(l::Lexer) = dpeekchar(l.io) +dpeekchar(l::Lexer) = l.chars[2], l.chars[3] """ position(l::Lexer) Returns the current position. """ -Base.position(l::Lexer) = Base.position(l.io) +Base.position(l::Lexer) = l.charspos[1] """ eof(l::Lexer) Determine whether the end of the lexer's underlying buffer has been reached. -""" +"""# Base.position(l::Lexer) = Base.position(l.io) eof(l::Lexer) = eof(l.io) Base.seek(l::Lexer, pos) = seek(l.io, pos) @@ -142,7 +161,7 @@ Updates the lexer's state such that the next `Token` will start at the current position. """ function start_token!(l::Lexer) - l.token_startpos = position(l) + l.token_startpos = l.charspos[1] l.token_start_row = l.current_row l.token_start_col = l.current_col end @@ -155,33 +174,35 @@ Returns the next character and increments the current position. function readchar end function readchar(l::Lexer{I}) where {I <: IO} - l.current_char = readchar(l.io) + c = readchar(l.io) + l.chars = (l.chars[2], l.chars[3], c) + l.charspos = (l.charspos[2], l.charspos[3], position(l.io)) if l.doread - write(l.charstore, l.current_char) + write(l.charstore, l.chars[1]) end - if l.current_char == '\n' + if l.chars[1] == '\n' l.current_row += 1 l.current_col = 1 - elseif !eof(l.current_char) + elseif !eof(l.chars[1]) l.current_col += 1 end - return l.current_char + return l.chars[1] end -readon(l::Lexer{I,RawToken}) where {I <: IO} = l.current_char +readon(l::Lexer{I,RawToken}) where {I <: IO} = l.chars[1] function readon(l::Lexer{I,Token}) where {I <: IO} if l.charstore.size != 0 take!(l.charstore) end - write(l.charstore, l.current_char) + write(l.charstore, l.chars[1]) l.doread = true - return l.current_char + return l.chars[1] end -readoff(l::Lexer{I,RawToken}) where {I <: IO} = l.current_char +readoff(l::Lexer{I,RawToken}) where {I <: IO} = l.chars[1] function readoff(l::Lexer{I,Token}) where {I <: IO} l.doread = false - return l.current_char + return l.chars[1] end """ @@ -226,7 +247,7 @@ function emit(l::Lexer{IO_t,Token}, kind::Kind, err::TokenError = Tokens.NO_ERR) if (kind == Tokens.IDENTIFIER || isliteral(kind) || kind == Tokens.COMMENT || kind == Tokens.WHITESPACE) str = String(take!(l.charstore)) elseif kind == Tokens.ERROR - str = String(l.io.data[(l.token_startpos + 1):position(l.io)]) + str = String(l.io.data[(l.token_startpos + 1):position(l)]) elseif optakessuffix(kind) str = "" while isopsuffix(peekchar(l)) @@ -652,7 +673,7 @@ function lex_digit(l::Lexer, kind) else return emit_error(l) end - elseif position(l) - startpos(l) == 1 && l.current_char == '0' + elseif position(l) - startpos(l) == 1 && l.chars[1] == '0' kind == Tokens.INTEGER if pc == 'x' kind = Tokens.HEX_INT @@ -750,23 +771,19 @@ function lex_quote(l::Lexer, doemit=true) end end -function string_terminated(l, c, kind::Tokens.Kind) - if (kind == Tokens.STRING || kind == Tokens.TRIPLE_STRING) && c == '"' - if kind == Tokens.STRING - return true - else - if accept(l, "\"") && accept(l, "\"") - return true - end - end - elseif (kind == Tokens.CMD || kind == Tokens.TRIPLE_CMD) && c == '`' - if kind == Tokens.CMD - return true - else - if accept(l, "\`") && accept(l, "\`") - return true - end - end +function string_terminated(l, kind::Tokens.Kind) + if kind == Tokens.STRING && l.chars[1] == '"' + return true + elseif kind == Tokens.TRIPLE_STRING && l.chars[1] == l.chars[2] == l.chars[3] == '"' + readchar(l) + readchar(l) + return true + elseif kind == Tokens.CMD && l.chars[1] == '`' + return true + elseif kind == Tokens.TRIPLE_CMD && l.chars[1] == l.chars[2] == l.chars[3] == '`' + readchar(l) + readchar(l) + return true end return false end @@ -779,14 +796,14 @@ function read_string(l::Lexer, kind::Tokens.Kind) eof(readchar(l)) && return false continue end - if string_terminated(l, c, kind) + if string_terminated(l, kind) return true elseif eof(c) return false end if c == '$' c = readchar(l) - if string_terminated(l, c, kind) + if string_terminated(l, kind) return true elseif eof(c) return false diff --git a/JuliaSyntax/test/lexer.jl b/JuliaSyntax/test/lexer.jl index 2d1a3ad383b38..762ec51132a9b 100644 --- a/JuliaSyntax/test/lexer.jl +++ b/JuliaSyntax/test/lexer.jl @@ -11,13 +11,13 @@ tok(str, i = 1) = collect(tokenize(str))[i] l = tokenize(s) @test Lexers.readchar(l) == 'a' - @test l.current_pos == 0 + # @test l.current_pos == 0 l_old = l @test l == l_old @test Lexers.eof(l) @test Lexers.readchar(l) == Lexers.EOF_CHAR - @test l.current_pos == 0 + # @test l.current_pos == 0 end end # testset From a88bb37cad2b8b18bc66952308dc3002921a3639 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Thu, 9 May 2019 08:37:49 +0200 Subject: [PATCH 0136/1109] fix startpos for dotted ops (JuliaLang/JuliaSyntax.jl#140) --- JuliaSyntax/src/lexer.jl | 33 ++++++++++----------------------- JuliaSyntax/test/lexer.jl | 4 ++++ 2 files changed, 14 insertions(+), 23 deletions(-) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index e65489f0cd78e..ac2b68bb556eb 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -256,18 +256,11 @@ function emit(l::Lexer{IO_t,Token}, kind::Kind, err::TokenError = Tokens.NO_ERR) else str = "" end - if l.dotop - tok = Token(kind, (l.token_start_row, l.token_start_col-1), - (l.current_row, l.current_col - 1), - startpos(l)-1, position(l) - 1, - str, err, true) - l.dotop = false - else - tok = Token(kind, (l.token_start_row, l.token_start_col), - (l.current_row, l.current_col - 1), - startpos(l), position(l) - 1, - str, err,false) - end + tok = Token(kind, (l.token_start_row, l.token_start_col), + (l.current_row, l.current_col - 1), + startpos(l), position(l) - 1, + str, err, l.dotop) + l.dotop = false l.last_token = kind readoff(l) return tok @@ -280,17 +273,11 @@ function emit(l::Lexer{IO_t,RawToken}, kind::Kind, err::TokenError = Tokens.NO_E end end - if l.dotop - tok = RawToken(kind, (l.token_start_row, l.token_start_col), - (l.current_row, l.current_col - 1), - startpos(l), position(l) - 1, err, true) - l.dotop = false - else - tok = RawToken(kind, (l.token_start_row, l.token_start_col), - (l.current_row, l.current_col - 1), - startpos(l), position(l) - 1, err, false) - end - + tok = RawToken(kind, (l.token_start_row, l.token_start_col), + (l.current_row, l.current_col - 1), + startpos(l), position(l) - 1, err, l.dotop) + + l.dotop = false l.last_token = kind readoff(l) return tok diff --git a/JuliaSyntax/test/lexer.jl b/JuliaSyntax/test/lexer.jl index 762ec51132a9b..c41483c1a7e04 100644 --- a/JuliaSyntax/test/lexer.jl +++ b/JuliaSyntax/test/lexer.jl @@ -523,3 +523,7 @@ end @testset "outer" begin @test tok("outer", 1).kind==T.OUTER end + +@testset "dot startpos" begin + @test Tokenize.Tokens.startpos(tok("./")) == (1,1) +end \ No newline at end of file From 9665331bd4c49b75b44c2362ca40a953ec3662ba Mon Sep 17 00:00:00 2001 From: ZacNugent Date: Fri, 17 May 2019 06:58:41 +0100 Subject: [PATCH 0137/1109] add errortoken for invalid numeric constants (JuliaLang/JuliaSyntax.jl#142) * add errortoken for invalid numeric constants * tests --- JuliaSyntax/src/lexer.jl | 16 ++++++++-------- JuliaSyntax/src/token.jl | 4 ++++ JuliaSyntax/test/lexer.jl | 13 ++++++++++++- 3 files changed, 24 insertions(+), 9 deletions(-) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index ac2b68bb556eb..c12579d43f808 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -540,7 +540,7 @@ function lex_minus(l::Lexer) if accept(l, '>') return emit(l, Tokens.RIGHT_ARROW) else - return emit_error(l) # "--" is an invalid operator + return emit_error(l, Tokens.INVALID_OPERATOR) # "--" is an invalid operator end elseif accept(l, '>') return emit(l, Tokens.ANON_FUNC) @@ -552,7 +552,7 @@ end function lex_star(l::Lexer) if accept(l, '*') - return emit_error(l) # "**" is an invalid operator use ^ + return emit_error(l, Tokens.INVALID_OPERATOR) # "**" is an invalid operator use ^ elseif accept(l, '=') return emit(l, Tokens.STAR_EQ) end @@ -639,14 +639,14 @@ function lex_digit(l::Lexer, kind) accept(l, "+-") if accept_batch(l, isdigit) if accept(l, '.') # 1.2e2.3 -> [ERROR, 3] - return emit_error(l) + return emit_error(l, Tokens.INVALID_NUMERIC_CONSTANT) end else return emit_error(l) end elseif pc == '.' && (is_identifier_start_char(ppc) || eof(ppc)) readchar(l) - return emit_error(l) + return emit_error(l, Tokens.INVALID_NUMERIC_CONSTANT) end elseif (pc == 'e' || pc == 'E' || pc == 'f') && (isdigit(ppc) || ppc == '+' || ppc == '-') @@ -655,7 +655,7 @@ function lex_digit(l::Lexer, kind) accept(l, "+-") if accept_batch(l, isdigit) if accept(l, '.') # 1.2e2.3 -> [ERROR, 3] - return emit_error(l) + return emit_error(l, Tokens.INVALID_NUMERIC_CONSTANT) end else return emit_error(l) @@ -665,7 +665,7 @@ function lex_digit(l::Lexer, kind) if pc == 'x' kind = Tokens.HEX_INT readchar(l) - !(ishex(ppc) || ppc =='.') && return emit_error(l) + !(ishex(ppc) || ppc =='.') && return emit_error(l, Tokens.INVALID_NUMERIC_CONSTANT) accept_number(l, ishex) if accept(l, '.') accept_number(l, ishex) @@ -676,12 +676,12 @@ function lex_digit(l::Lexer, kind) accept_number(l, isdigit) end elseif pc == 'b' - !isbinary(ppc) && return emit_error(l) + !isbinary(ppc) && return emit_error(l, Tokens.INVALID_NUMERIC_CONSTANT) readchar(l) accept_number(l, isbinary) kind = Tokens.BIN_INT elseif pc == 'o' - !isoctal(ppc) && return emit_error(l) + !isoctal(ppc) && return emit_error(l, Tokens.INVALID_NUMERIC_CONSTANT) readchar(l) accept_number(l, isoctal) kind = Tokens.OCT_INT diff --git a/JuliaSyntax/src/token.jl b/JuliaSyntax/src/token.jl index ae42a589113bd..75567a4d9ccea 100644 --- a/JuliaSyntax/src/token.jl +++ b/JuliaSyntax/src/token.jl @@ -30,6 +30,8 @@ _add_kws() EOF_STRING, EOF_CHAR, EOF_CMD, + INVALID_NUMERIC_CONSTANT, + INVALID_OPERATOR, UNKNOWN, ) @@ -39,6 +41,8 @@ TOKEN_ERROR_DESCRIPTION = Dict{TokenError, String}( EOF_STRING => "unterminated string literal", EOF_CHAR => "unterminated character literal", EOF_CMD => "unterminated cmd literal", + INVALID_NUMERIC_CONSTANT => "invalid numeric constant", + INVALID_OPERATOR => "invalid operator", UNKNOWN => "unknown", ) diff --git a/JuliaSyntax/test/lexer.jl b/JuliaSyntax/test/lexer.jl index c41483c1a7e04..8e6e84fc86ba4 100644 --- a/JuliaSyntax/test/lexer.jl +++ b/JuliaSyntax/test/lexer.jl @@ -526,4 +526,15 @@ end @testset "dot startpos" begin @test Tokenize.Tokens.startpos(tok("./")) == (1,1) -end \ No newline at end of file +end + +@testset "token errors" begin + @test tok("1.2e2.3",1).token_error === Tokens.INVALID_NUMERIC_CONSTANT + @test tok("1.2.",1).token_error === Tokens.INVALID_NUMERIC_CONSTANT + @test tok("1.2.f",1).token_error === Tokens.INVALID_NUMERIC_CONSTANT + @test tok("0xv",1).token_error === Tokens.INVALID_NUMERIC_CONSTANT + @test tok("0b3",1).token_error === Tokens.INVALID_NUMERIC_CONSTANT + @test tok("0op",1).token_error === Tokens.INVALID_NUMERIC_CONSTANT + @test tok("--",1).token_error === Tokens.INVALID_OPERATOR + @test tok("1**2",2).token_error === Tokens.INVALID_OPERATOR +end From 31d2899271edce3e9be16372012a873342733298 Mon Sep 17 00:00:00 2001 From: ZacNugent Date: Fri, 17 May 2019 06:58:52 +0100 Subject: [PATCH 0138/1109] add `new` kw kind (JuliaLang/JuliaSyntax.jl#141) --- JuliaSyntax/src/token_kinds.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/JuliaSyntax/src/token_kinds.jl b/JuliaSyntax/src/token_kinds.jl index 5fb5002387042..531c26c7de885 100644 --- a/JuliaSyntax/src/token_kinds.jl +++ b/JuliaSyntax/src/token_kinds.jl @@ -34,6 +34,7 @@ MACRO, MODULE, MUTABLE, + NEW, OUTER, PRIMITIVE, QUOTE, From eaf8149026b27184635c565767e0c64ed5afdb64 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Tue, 11 Jun 2019 11:04:52 +0200 Subject: [PATCH 0139/1109] update travis (JuliaLang/JuliaSyntax.jl#144) * update travis * Update .travis.yml --- JuliaSyntax/.travis.yml | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/JuliaSyntax/.travis.yml b/JuliaSyntax/.travis.yml index f3283dcb2979c..7cb5f52853c7c 100644 --- a/JuliaSyntax/.travis.yml +++ b/JuliaSyntax/.travis.yml @@ -4,19 +4,15 @@ os: - linux - osx julia: + - 1.0 + - 1.1 + - 1.2 - nightly matrix: allow_failures: - julia: nightly notifications: email: false -script: - - if [[ -a .git/shallow ]]; then git fetch --unshallow; fi - - julia -e 'Pkg.clone(pwd()); Pkg.build("Tokenize"); Pkg.test("Tokenize"; coverage=true)' - # - julia -e 'Pkg.clone("https://github.com/ZacLN/CSTParser.jl"); Pkg.test("CSTParser")' - # - julia -e 'Pkg.clone("https://github.com/KristofferC/OhMyREPL.jl"); Pkg.test("OhMyREPL")' after_success: - # push coverage results to Coveralls - - julia -e 'cd(Pkg.dir("Tokenize")); Pkg.add("Coverage"); using Coverage; Coveralls.submit(Coveralls.process_folder())' # push coverage results to Codecov - - julia -e 'cd(Pkg.dir("Tokenize")); Pkg.add("Coverage"); using Coverage; Codecov.submit(Codecov.process_folder())' + - julia -e 'import Pkg; Pkg.add("Coverage"); using Coverage; Codecov.submit(Codecov.process_folder())' From 1a3d4c89e7d6d270c199e648ad598aa69c266be7 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Tue, 11 Jun 2019 11:05:08 +0200 Subject: [PATCH 0140/1109] add Project file (JuliaLang/JuliaSyntax.jl#143) --- JuliaSyntax/Project.toml | 13 +++++++++++++ JuliaSyntax/REQUIRE | 1 - 2 files changed, 13 insertions(+), 1 deletion(-) create mode 100644 JuliaSyntax/Project.toml delete mode 100644 JuliaSyntax/REQUIRE diff --git a/JuliaSyntax/Project.toml b/JuliaSyntax/Project.toml new file mode 100644 index 0000000000000..88438383d5e5d --- /dev/null +++ b/JuliaSyntax/Project.toml @@ -0,0 +1,13 @@ +name = "Tokenize" +uuid = "0796e94c-ce3b-5d07-9a54-7f471281c624" +version = "0.5.4" + +[compat] +julia = "1" + +[extras] +Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" +Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" + +[targets] +test = ["Test", "Printf"] diff --git a/JuliaSyntax/REQUIRE b/JuliaSyntax/REQUIRE deleted file mode 100644 index d8e28292cbd3f..0000000000000 --- a/JuliaSyntax/REQUIRE +++ /dev/null @@ -1 +0,0 @@ -julia 0.7-beta2 From e091b8761414896b1718cecb24e982a8212c5ae6 Mon Sep 17 00:00:00 2001 From: ZacNugent Date: Tue, 16 Jul 2019 20:39:10 +0100 Subject: [PATCH 0141/1109] fix dotop starting position (JuliaLang/JuliaSyntax.jl#145) --- JuliaSyntax/src/lexer.jl | 6 +++--- JuliaSyntax/test/lexer.jl | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index c12579d43f808..985ccbe591306 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -298,8 +298,8 @@ end Returns the next `Token`. """ -function next_token(l::Lexer) - start_token!(l) +function next_token(l::Lexer, start = true) + start && start_token!(l) c = readchar(l) if eof(c); return emit(l, Tokens.ENDMARKER) @@ -854,7 +854,7 @@ function lex_dot(l::Lexer) pc, dpc = dpeekchar(l) if dotop1(pc) l.dotop = true - return next_token(l) + return next_token(l, false) elseif pc =='+' l.dotop = true readchar(l) diff --git a/JuliaSyntax/test/lexer.jl b/JuliaSyntax/test/lexer.jl index 8e6e84fc86ba4..21a9af2a00d09 100644 --- a/JuliaSyntax/test/lexer.jl +++ b/JuliaSyntax/test/lexer.jl @@ -526,6 +526,7 @@ end @testset "dot startpos" begin @test Tokenize.Tokens.startpos(tok("./")) == (1,1) + @test Tokenize.Tokens.startbyte(tok(".≤")) == 0 end @testset "token errors" begin From 50be02174fe6347deec7a0a3d408b1d7949e4e23 Mon Sep 17 00:00:00 2001 From: ZacNugent Date: Tue, 16 Jul 2019 20:39:20 +0100 Subject: [PATCH 0142/1109] add hat suffix (JuliaLang/JuliaSyntax.jl#146) --- JuliaSyntax/src/utilities.jl | 1 + JuliaSyntax/test/lexer.jl | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/JuliaSyntax/src/utilities.jl b/JuliaSyntax/src/utilities.jl index bfe14a9991953..cd7aaba616c8a 100644 --- a/JuliaSyntax/src/utilities.jl +++ b/JuliaSyntax/src/utilities.jl @@ -289,6 +289,7 @@ end 0x000002b2 <= c <= 0x000002b3 || 0x000002b7 <= c <= 0x000002b8 || 0x000002e1 <= c <= 0x000002e3 || + c == 0x00000302 || c == 0x00001d2c || c == 0x00001d2e || 0x00001d30 <= c <= 0x00001d31 || diff --git a/JuliaSyntax/test/lexer.jl b/JuliaSyntax/test/lexer.jl index 21a9af2a00d09..64ebaaeda0464 100644 --- a/JuliaSyntax/test/lexer.jl +++ b/JuliaSyntax/test/lexer.jl @@ -539,3 +539,7 @@ end @test tok("--",1).token_error === Tokens.INVALID_OPERATOR @test tok("1**2",2).token_error === Tokens.INVALID_OPERATOR end + +@testset "hat suffix" begin + @test tok("ŝ", 1).kind==Tokens.IDENTIFIER +end From 46c8d8f62f8b3fd9f4c0fdaf3cf35c4fb58a7acf Mon Sep 17 00:00:00 2001 From: ZacNugent Date: Tue, 16 Jul 2019 20:39:31 +0100 Subject: [PATCH 0143/1109] add colon ops (JuliaLang/JuliaSyntax.jl#147) --- JuliaSyntax/src/token_kinds.jl | 14 +++++++++++++- JuliaSyntax/src/utilities.jl | 6 +++--- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/JuliaSyntax/src/token_kinds.jl b/JuliaSyntax/src/token_kinds.jl index 531c26c7de885..51e6b58ec9bba 100644 --- a/JuliaSyntax/src/token_kinds.jl +++ b/JuliaSyntax/src/token_kinds.jl @@ -547,6 +547,12 @@ begin_colon, COLON, # : DDOT, # .. + LDOTS, # … + TRICOLON, # ⁝ + VDOTS, # ⋮ + DDOTS, # ⋱ + ADOTS, # ⋰ + CDOTS, # ⋯ end_colon, # Level 9 @@ -1317,7 +1323,13 @@ const UNICODE_OPS = Dict{Char, Kind}( '⥯' => DOWNWARDS_HARPOON_WITH_BARB_LEFT_BESIDE_UPWARDS_HARPOON_WITH_BARB_RIGHT, '↑' => HALFWIDTH_UPWARDS_ARROW, '↓' => HALFWIDTH_DOWNWARDS_ARROW, -'⋅' => UNICODE_DOT) +'⋅' => UNICODE_DOT, +'…' => LDOTS, +'⁝' => TRICOLON, +'⋮' => VDOTS, +'⋱' => DDOTS, +'⋰' => ADOTS, +'⋯' => CDOTS) const UNICODE_OPS_REVERSE = Dict{Kind,Symbol}() diff --git a/JuliaSyntax/src/utilities.jl b/JuliaSyntax/src/utilities.jl index cd7aaba616c8a..77d548eb8d374 100644 --- a/JuliaSyntax/src/utilities.jl +++ b/JuliaSyntax/src/utilities.jl @@ -68,7 +68,6 @@ function is_cat_id_start(ch::Char, cat::Integer) ((c >= 0x2140 && c <= 0x2144) || # ⅀, ⅁, ⅂, ⅃, ⅄ c == 0x223f || c == 0x22be || c == 0x22bf || # ∿, ⊾, ⊿ c == 0x22a4 || c == 0x22a5 || # ⊤ ⊥ - (c >= 0x22ee && c <= 0x22f1) || # ⋮, ⋯, ⋰, ⋱ (c >= 0x2202 && c <= 0x2233 && (c == 0x2202 || c == 0x2205 || c == 0x2206 || # ∂, ∅, ∆ @@ -212,6 +211,8 @@ takechar(io::IO) = (readchar(io); io) c == 0x000000ac || c == 0x000000b1 || c == 0x000000d7 || + c == 0x00002026 || + c == 0x0000205d || c == 0x0000214b || 0x00002190 <= c <= 0x00002194 || 0x0000219a <= c <= 0x0000219b || @@ -241,8 +242,7 @@ takechar(io::IO) = (readchar(io); io) 0x000022bc <= c <= 0x000022bd || 0x000022c4 <= c <= 0x000022c7 || 0x000022c9 <= c <= 0x000022d3 || - 0x000022d5 <= c <= 0x000022ed || - 0x000022f2 <= c <= 0x000022ff || + 0x000022d5 <= c <= 0x000022ff || c == 0x000025b7 || c == 0x000027c2 || 0x000027c8 <= c <= 0x000027c9 || From 53badf3a1f0d72a93f2994f5ae4e95de963e4b71 Mon Sep 17 00:00:00 2001 From: ZacNugent Date: Tue, 16 Jul 2019 20:40:04 +0100 Subject: [PATCH 0144/1109] fix float parsing with trailing dotted operator (JuliaLang/JuliaSyntax.jl#149) --- JuliaSyntax/src/lexer.jl | 8 ++++++-- JuliaSyntax/src/utilities.jl | 21 +++++++++++++++++++++ JuliaSyntax/test/lexer.jl | 1 + 3 files changed, 28 insertions(+), 2 deletions(-) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index 985ccbe591306..58506d15dc444 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -638,7 +638,9 @@ function lex_digit(l::Lexer, kind) readchar(l) accept(l, "+-") if accept_batch(l, isdigit) - if accept(l, '.') # 1.2e2.3 -> [ERROR, 3] + pc,ppc = dpeekchar(l) + if pc === '.' && !dotop2(ppc, ' ') + accept(l, '.') return emit_error(l, Tokens.INVALID_NUMERIC_CONSTANT) end else @@ -654,7 +656,9 @@ function lex_digit(l::Lexer, kind) readchar(l) accept(l, "+-") if accept_batch(l, isdigit) - if accept(l, '.') # 1.2e2.3 -> [ERROR, 3] + pc,ppc = dpeekchar(l) + if pc === '.' && !dotop2(ppc, ' ') + accept(l, '.') return emit_error(l, Tokens.INVALID_NUMERIC_CONSTANT) end else diff --git a/JuliaSyntax/src/utilities.jl b/JuliaSyntax/src/utilities.jl index 77d548eb8d374..ed3c53ba04720 100644 --- a/JuliaSyntax/src/utilities.jl +++ b/JuliaSyntax/src/utilities.jl @@ -278,6 +278,27 @@ takechar(io::IO) = (readchar(io); io) 0x0000ffe9 <= c <= 0x0000ffec end +function dotop2(pc, dpc) + dotop1(pc) || + pc =='+' || + pc =='-' || + pc =='*' || + pc =='/' || + pc =='\\' || + pc =='^' || + pc =='<' || + pc =='>' || + pc =='&' && dpc === '=' || + pc =='&' || + pc =='%' || + pc == '=' && dpc != '>' || + pc == '|' && dpc != '|' || + pc == '!' && dpc == '=' || + pc == '⊻' || + pc == '÷' || + pc == '=' && dpc == '>' +end + # suffix operators # "₀₁₂₃₄₅₆₇₈₉₊₋₌₍₎²³¹ʰʲʳʷʸˡˢˣᴬᴮᴰᴱᴳᴴᴵᴶᴷᴸᴹᴺᴼᴾᴿᵀᵁᵂᵃᵇᵈᵉᵍᵏᵐᵒᵖᵗᵘᵛᵝᵞᵟᵠᵡᵢᵣᵤᵥᵦᵧᵨᵩᵪᶜᶠᶥᶦᶫᶰᶸᶻᶿ ⁰ⁱ⁴⁵⁶⁷⁸⁹⁺⁻⁼⁽⁾ⁿₐₑₒₓₕₖₗₘₙₚₛₜⱼⱽ′″‴‵‶‷⁗" @inline function isopsuffix(c1::Char) diff --git a/JuliaSyntax/test/lexer.jl b/JuliaSyntax/test/lexer.jl index 64ebaaeda0464..0a90d7b273e8b 100644 --- a/JuliaSyntax/test/lexer.jl +++ b/JuliaSyntax/test/lexer.jl @@ -358,6 +358,7 @@ end @test tok("1.+ ").kind == Tokens.INTEGER @test tok("1.⤋").kind == Tokens.INTEGER @test tok("1..").kind == Tokens.INTEGER + @test T.kind.(collect(tokenize("1f0./1"))) == [T.FLOAT, T.OP, T.INTEGER, T.ENDMARKER] end From 1ec82ea8f6c33be717c1e7a6e31aeaa894686a88 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Tue, 16 Jul 2019 21:41:14 +0200 Subject: [PATCH 0145/1109] bump version --- JuliaSyntax/Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaSyntax/Project.toml b/JuliaSyntax/Project.toml index 88438383d5e5d..e7410a7b8c42a 100644 --- a/JuliaSyntax/Project.toml +++ b/JuliaSyntax/Project.toml @@ -1,6 +1,6 @@ name = "Tokenize" uuid = "0796e94c-ce3b-5d07-9a54-7f471281c624" -version = "0.5.4" +version = "0.5.5" [compat] julia = "1" From 424a371c2a7f8363c209e55dcd85e62186039a54 Mon Sep 17 00:00:00 2001 From: janEbert Date: Mon, 29 Jul 2019 18:35:44 +0000 Subject: [PATCH 0146/1109] Fix outdated links in README (JuliaLang/JuliaSyntax.jl#150) --- JuliaSyntax/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index 39af3694a2034..cdc0b248bcf5e 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -1,6 +1,6 @@ # Tokenize -[![Build Status](https://travis-ci.org/KristofferC/Tokenize.jl.svg?branch=master)](https://travis-ci.org/KristofferC/Tokenize.jl) [![Build status](https://ci.appveyor.com/api/projects/status/h9d9webkxyhpx790?svg=true)](https://ci.appveyor.com/project/KristofferC/tokenize-jl) [![codecov.io](https://codecov.io/github/KristofferC/Tokenize.jl/coverage.svg?branch=master)](https://codecov.io/github/KristofferC/Tokenize.jl?branch=master) +[![Build Status](https://travis-ci.org/JuliaLang/Tokenize.jl.svg?branch=master)](https://travis-ci.org/JuliaLang/Tokenize.jl) [![Build status](https://ci.appveyor.com/api/projects/status/h9d9webkxyhpx790?svg=true)](https://ci.appveyor.com/project/KristofferC/tokenize-jl) [![codecov.io](https://codecov.io/github/JuliaLang/Tokenize.jl/coverage.svg?branch=master)](https://codecov.io/github/JuliaLang/Tokenize.jl?branch=master) `Tokenize` is a Julia package that serves a similar purpose and API as the [tokenize module](https://docs.python.org/3/library/tokenize.html) in Python but for Julia. This is to take a string or buffer containing Julia code, perform lexical analysis and return a stream of tokens. @@ -59,4 +59,4 @@ julia> Tokens.exactkind(tok) RIGHTWARDS_DOUBLE_ARROW::Tokenize.Tokens.Kind = 128 ``` -All the different `Token.Kind` can be seen in the [`token_kinds.jl` file](https://github.com/KristofferC/Tokenize.jl/blob/master/src/token_kinds.jl) +All the different `Token.Kind` can be seen in the [`token_kinds.jl` file](https://github.com/JuliaLang/Tokenize.jl/blob/master/src/token_kinds.jl) From c97a5a0158bd0d4b3e0b28c3d18177a71893f903 Mon Sep 17 00:00:00 2001 From: ZacNugent Date: Mon, 5 Aug 2019 21:21:33 +0100 Subject: [PATCH 0147/1109] mark suffixed ops (needed for CSTParser) --- JuliaSyntax/src/lexer.jl | 8 ++++++-- JuliaSyntax/src/token.jl | 12 +++++++----- JuliaSyntax/test/lexer.jl | 8 ++++++++ 3 files changed, 21 insertions(+), 7 deletions(-) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index 58506d15dc444..f6ce0ad37ad75 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -244,6 +244,7 @@ end Returns a `Token` of kind `kind` with contents `str` and starts a new `Token`. """ function emit(l::Lexer{IO_t,Token}, kind::Kind, err::TokenError = Tokens.NO_ERR) where IO_t + suffix = false if (kind == Tokens.IDENTIFIER || isliteral(kind) || kind == Tokens.COMMENT || kind == Tokens.WHITESPACE) str = String(take!(l.charstore)) elseif kind == Tokens.ERROR @@ -252,6 +253,7 @@ function emit(l::Lexer{IO_t,Token}, kind::Kind, err::TokenError = Tokens.NO_ERR) str = "" while isopsuffix(peekchar(l)) str = string(str, readchar(l)) + suffix = true end else str = "" @@ -259,7 +261,7 @@ function emit(l::Lexer{IO_t,Token}, kind::Kind, err::TokenError = Tokens.NO_ERR) tok = Token(kind, (l.token_start_row, l.token_start_col), (l.current_row, l.current_col - 1), startpos(l), position(l) - 1, - str, err, l.dotop) + str, err, l.dotop, suffix) l.dotop = false l.last_token = kind readoff(l) @@ -267,15 +269,17 @@ function emit(l::Lexer{IO_t,Token}, kind::Kind, err::TokenError = Tokens.NO_ERR) end function emit(l::Lexer{IO_t,RawToken}, kind::Kind, err::TokenError = Tokens.NO_ERR) where IO_t + suffix = false if optakessuffix(kind) while isopsuffix(peekchar(l)) readchar(l) + suffix = true end end tok = RawToken(kind, (l.token_start_row, l.token_start_col), (l.current_row, l.current_col - 1), - startpos(l), position(l) - 1, err, l.dotop) + startpos(l), position(l) - 1, err, l.dotop, suffix) l.dotop = false l.last_token = kind diff --git a/JuliaSyntax/src/token.jl b/JuliaSyntax/src/token.jl index 75567a4d9ccea..c9f50511da06d 100644 --- a/JuliaSyntax/src/token.jl +++ b/JuliaSyntax/src/token.jl @@ -58,12 +58,13 @@ struct Token <: AbstractToken val::String # The actual string of the token token_error::TokenError dotop::Bool + suffix::Bool end function Token(kind::Kind, startposition::Tuple{Int, Int}, endposition::Tuple{Int, Int}, startbyte::Int, endbyte::Int, val::String) -Token(kind, startposition, endposition, startbyte, endbyte, val, NO_ERR, false) +Token(kind, startposition, endposition, startbyte, endbyte, val, NO_ERR, false, false) end -Token() = Token(ERROR, (0,0), (0,0), 0, 0, "", UNKNOWN, false) +Token() = Token(ERROR, (0,0), (0,0), 0, 0, "", UNKNOWN, false, false) struct RawToken <: AbstractToken kind::Kind @@ -74,12 +75,13 @@ struct RawToken <: AbstractToken endbyte::Int # The byte where the token ended in the buffer token_error::TokenError dotop::Bool + suffix::Bool end function RawToken(kind::Kind, startposition::Tuple{Int, Int}, endposition::Tuple{Int, Int}, startbyte::Int, endbyte::Int) -RawToken(kind, startposition, endposition, startbyte, endbyte, NO_ERR, false) +RawToken(kind, startposition, endposition, startbyte, endbyte, NO_ERR, false, false) end -RawToken() = RawToken(ERROR, (0,0), (0,0), 0, 0, UNKNOWN, false) +RawToken() = RawToken(ERROR, (0,0), (0,0), 0, 0, UNKNOWN, false, false) const _EMPTY_TOKEN = Token() @@ -133,7 +135,7 @@ function untokenize(t::Token) end function untokenize(t::RawToken, str::String) - String(str[1 + (t.startbyte:t.endbyte)]) + String(codeunits(str)[1 .+ (t.startbyte:t.endbyte)]) end function untokenize(ts) diff --git a/JuliaSyntax/test/lexer.jl b/JuliaSyntax/test/lexer.jl index 0a90d7b273e8b..cfe8b708ad4e8 100644 --- a/JuliaSyntax/test/lexer.jl +++ b/JuliaSyntax/test/lexer.jl @@ -543,4 +543,12 @@ end @testset "hat suffix" begin @test tok("ŝ", 1).kind==Tokens.IDENTIFIER + @test untokenize(collect(tokenize("ŝ", Tokens.RawToken))[1], "ŝ") == "ŝ" end + +@testset "suffixed op" begin + s = "+¹" + @test Tokens.isoperator(tok(s, 1).kind) + @test untokenize(collect(tokenize(s, Tokens.RawToken))[1], s) == s +end + From f79c6e5708852e7e11fe0e7d88af3e0164d695fb Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Tue, 13 Aug 2019 21:37:04 +0200 Subject: [PATCH 0148/1109] Bump version --- JuliaSyntax/Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaSyntax/Project.toml b/JuliaSyntax/Project.toml index e7410a7b8c42a..a2a5cf3af2021 100644 --- a/JuliaSyntax/Project.toml +++ b/JuliaSyntax/Project.toml @@ -1,6 +1,6 @@ name = "Tokenize" uuid = "0796e94c-ce3b-5d07-9a54-7f471281c624" -version = "0.5.5" +version = "0.5.6" [compat] julia = "1" From 967f3a6ff10319ca46586718995f1555c117f198 Mon Sep 17 00:00:00 2001 From: ZacNugent Date: Wed, 27 Nov 2019 12:45:06 +0000 Subject: [PATCH 0149/1109] disallow float w/ trailing . juxt w/ op (JuliaLang/JuliaSyntax.jl#152) * disallow float w/ trailing . juxt w/ op * fix tests * reorder tests --- JuliaSyntax/src/_precompile.jl | 7 +++++++ JuliaSyntax/src/lexer.jl | 3 +++ JuliaSyntax/src/utilities.jl | 7 +++++++ JuliaSyntax/test/lexer.jl | 15 +++++++++++---- 4 files changed, 28 insertions(+), 4 deletions(-) diff --git a/JuliaSyntax/src/_precompile.jl b/JuliaSyntax/src/_precompile.jl index 6de323a2ef639..cb813a4bca49b 100644 --- a/JuliaSyntax/src/_precompile.jl +++ b/JuliaSyntax/src/_precompile.jl @@ -98,4 +98,11 @@ function _precompile_() precompile(Tokenize.Lexers._doret, (Char, Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) precompile(Tokenize.Lexers._doret, (Char, Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.RawToken},)) + precompile(Tokenize.Lexers.accept_batch, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.RawToken}, typeof(Tokenize.Lexers.iswhitespace),)) + precompile(Tokenize.Lexers.accept_batch, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.RawToken}, typeof(Tokenize.Lexers.isdigit),)) + precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.RawToken}, Char,)) + precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.RawToken}, Function,)) + + + precompile(Tokenize.Lexers.readchar, (GenericIOBuffer{Array{UInt8, 1}},)) end diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index f6ce0ad37ad75..5b22d9d39e16e 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -611,6 +611,9 @@ function lex_digit(l::Lexer, kind) if pc == '.' if ppc == '.' return emit(l, kind) + elseif is_operator_start_char(ppc) && ppc !== ':' + readchar(l) + return emit_error(l) elseif (!(isdigit(ppc) || iswhitespace(ppc) || is_identifier_start_char(ppc) diff --git a/JuliaSyntax/src/utilities.jl b/JuliaSyntax/src/utilities.jl index ed3c53ba04720..7cca93a2d58d9 100644 --- a/JuliaSyntax/src/utilities.jl +++ b/JuliaSyntax/src/utilities.jl @@ -369,3 +369,10 @@ function optakessuffix(k) Tokens.NOT_SIGN <= k <= Tokens.QUAD_ROOT ) end + +function is_operator_start_char(c::Char) + eof(c) && return false + is_operator_start_char(UInt32(c)) +end +is_operator_start_char(u::UInt32) = u == 0x00000021 || (u == 0x00000024 || (u == 0x00000025 || (u == 0x00000026 || (u == 0x00000027 || (u == 0x0000002a || (u == 0x0000002b || (u == 0x0000002d || (u == 0x0000002e || (u == 0x0000002f || (u == 0x0000003a || (u == 0x0000003c || (u == 0x0000003d || (u == 0x0000003e || (u == 0x0000003f || (u == 0x0000005c || (u == 0x0000005e || (u == 0x00000069 || (u == 0x00000077 || (u == 0x0000007c || (u == 0x0000007e || (u == 0x000000ac || (u == 0x000000b1 || (u == 0x000000d7 || (u == 0x000000f7 || (u == 0x00002026 || (u == 0x0000205d || (u == 0x0000214b || (u == 0x00002190 || (u == 0x00002191 || (u == 0x00002192 || (u == 0x00002193 || (u == 0x00002194 || (u == 0x0000219a || (u == 0x0000219b || (u == 0x000021a0 || (u == 0x000021a3 || (u == 0x000021a6 || (u == 0x000021ae || (u == 0x000021ce || (u == 0x000021cf || (u == 0x000021d2 || (u == 0x000021d4 || (u == 0x000021f4 || (u == 0x000021f5 || (u == 0x000021f6 || (u == 0x000021f7 || (u == 0x000021f8 || (u == 0x000021f9 || (u == 0x000021fa || (u == 0x000021fb || (u == 0x000021fc || (u == 0x000021fd || (u == 0x000021fe || (u == 0x000021ff || (u == 0x00002208 || (u == 0x00002209 || (u == 0x0000220a || (u == 0x0000220b || (u == 0x0000220c || (u == 0x0000220d || (u == 0x00002213 || (u == 0x00002214 || (u == 0x00002217 || (u == 0x00002218 || (u == 0x00002219 || (u == 0x0000221a || (u == 0x0000221b || (u == 0x0000221c || (u == 0x0000221d || (u == 0x00002224 || (u == 0x00002225 || (u == 0x00002226 || (u == 0x00002227 || (u == 0x00002228 || (u == 0x00002229 || (u == 0x0000222a || (u == 0x00002237 || (u == 0x00002238 || (u == 0x0000223a || (u == 0x0000223b || (u == 0x0000223d || (u == 0x0000223e || (u == 0x00002240 || (u == 0x00002241 || (u == 0x00002242 || (u == 0x00002243 || (u == 0x00002244 || (u == 0x00002245 || (u == 0x00002246 || (u == 0x00002247 || (u == 0x00002248 || (u == 0x00002249 || (u == 0x0000224a || (u == 0x0000224b || (u == 0x0000224c || (u == 0x0000224d || (u == 0x0000224e || (u == 0x0000224f || (u == 0x00002250 || (u == 0x00002251 || (u == 0x00002252 || (u == 0x00002253 || (u == 0x00002254 || (u == 0x00002255 || (u == 0x00002256 || (u == 0x00002257 || (u == 0x00002258 || (u == 0x00002259 || (u == 0x0000225a || (u == 0x0000225b || (u == 0x0000225c || (u == 0x0000225d || (u == 0x0000225e || (u == 0x0000225f || (u == 0x00002260 || (u == 0x00002261 || (u == 0x00002262 || (u == 0x00002263 || (u == 0x00002264 || (u == 0x00002265 || (u == 0x00002266 || (u == 0x00002267 || (u == 0x00002268 || (u == 0x00002269 || (u == 0x0000226a || (u == 0x0000226b || (u == 0x0000226c || (u == 0x0000226d || (u == 0x0000226e || (u == 0x0000226f || (u == 0x00002270 || (u == 0x00002271 || (u == 0x00002272 || (u == 0x00002273 || (u == 0x00002274 || (u == 0x00002275 || (u == 0x00002276 || (u == 0x00002277 || (u == 0x00002278 || (u == 0x00002279 || (u == 0x0000227a || (u == 0x0000227b || (u == 0x0000227c || (u == 0x0000227d || (u == 0x0000227e || (u == 0x0000227f || (u == 0x00002280 || (u == 0x00002281 || (u == 0x00002282 || (u == 0x00002283 || (u == 0x00002284 || (u == 0x00002285 || (u == 0x00002286 || (u == 0x00002287 || (u == 0x00002288 || (u == 0x00002289 || (u == 0x0000228a || (u == 0x0000228b || (u == 0x0000228d || (u == 0x0000228e || (u == 0x0000228f || (u == 0x00002290 || (u == 0x00002291 || (u == 0x00002292 || (u == 0x00002293 || (u == 0x00002294 || (u == 0x00002295 || (u == 0x00002296 || (u == 0x00002297 || (u == 0x00002298 || (u == 0x00002299 || (u == 0x0000229a || (u == 0x0000229b || (u == 0x0000229c || (u == 0x0000229e || (u == 0x0000229f || (u == 0x000022a0 || (u == 0x000022a1 || (u == 0x000022a2 || (u == 0x000022a3 || (u == 0x000022a9 || (u == 0x000022ac || (u == 0x000022ae || (u == 0x000022b0 || (u == 0x000022b1 || (u == 0x000022b2 || (u == 0x000022b3 || (u == 0x000022b4 || (u == 0x000022b5 || (u == 0x000022b6 || (u == 0x000022b7 || (u == 0x000022bb || (u == 0x000022bc || (u == 0x000022bd || (u == 0x000022c4 || (u == 0x000022c5 || (u == 0x000022c6 || (u == 0x000022c7 || (u == 0x000022c9 || (u == 0x000022ca || (u == 0x000022cb || (u == 0x000022cc || (u == 0x000022cd || (u == 0x000022ce || (u == 0x000022cf || (u == 0x000022d0 || (u == 0x000022d1 || (u == 0x000022d2 || (u == 0x000022d3 || (u == 0x000022d5 || (u == 0x000022d6 || (u == 0x000022d7 || (u == 0x000022d8 || (u == 0x000022d9 || (u == 0x000022da || (u == 0x000022db || (u == 0x000022dc || (u == 0x000022dd || (u == 0x000022de || (u == 0x000022df || (u == 0x000022e0 || (u == 0x000022e1 || (u == 0x000022e2 || (u == 0x000022e3 || (u == 0x000022e4 || (u == 0x000022e5 || (u == 0x000022e6 || (u == 0x000022e7 || (u == 0x000022e8 || (u == 0x000022e9 || (u == 0x000022ea || (u == 0x000022eb || (u == 0x000022ec || (u == 0x000022ed || (u == 0x000022ee || (u == 0x000022ef || (u == 0x000022f0 || (u == 0x000022f1 || (u == 0x000022f2 || (u == 0x000022f3 || (u == 0x000022f4 || (u == 0x000022f5 || (u == 0x000022f6 || (u == 0x000022f7 || (u == 0x000022f8 || (u == 0x000022f9 || (u == 0x000022fa || (u == 0x000022fb || (u == 0x000022fc || (u == 0x000022fd || (u == 0x000022fe || (u == 0x000022ff || (u == 0x000025b7 || (u == 0x000027c2 || (u == 0x000027c8 || (u == 0x000027c9 || (u == 0x000027d1 || (u == 0x000027d2 || (u == 0x000027d5 || (u == 0x000027d6 || (u == 0x000027d7 || (u == 0x000027f0 || (u == 0x000027f1 || (u == 0x000027f5 || (u == 0x000027f6 || (u == 0x000027f7 || (u == 0x000027f9 || (u == 0x000027fa || (u == 0x000027fb || (u == 0x000027fc || (u == 0x000027fd || (u == 0x000027fe || (u == 0x000027ff || (u == 0x00002900 || (u == 0x00002901 || (u == 0x00002902 || (u == 0x00002903 || (u == 0x00002904 || (u == 0x00002905 || (u == 0x00002906 || (u == 0x00002907 || (u == 0x00002908 || (u == 0x00002909 || (u == 0x0000290a || (u == 0x0000290b || (u == 0x0000290c || (u == 0x0000290d || (u == 0x0000290e || (u == 0x0000290f || (u == 0x00002910 || (u == 0x00002911 || (u == 0x00002912 || (u == 0x00002913 || (u == 0x00002914 || (u == 0x00002915 || (u == 0x00002916 || (u == 0x00002917 || (u == 0x00002918 || (u == 0x0000291d || (u == 0x0000291e || (u == 0x0000291f || (u == 0x00002920 || (u == 0x00002944 || (u == 0x00002945 || (u == 0x00002946 || (u == 0x00002947 || (u == 0x00002948 || (u == 0x00002949 || (u == 0x0000294a || (u == 0x0000294b || (u == 0x0000294c || (u == 0x0000294d || (u == 0x0000294e || (u == 0x0000294f || (u == 0x00002950 || (u == 0x00002951 || (u == 0x00002952 || (u == 0x00002953 || (u == 0x00002954 || (u == 0x00002955 || (u == 0x00002956 || (u == 0x00002957 || (u == 0x00002958 || (u == 0x00002959 || (u == 0x0000295a || (u == 0x0000295b || (u == 0x0000295c || (u == 0x0000295d || (u == 0x0000295e || (u == 0x0000295f || (u == 0x00002960 || (u == 0x00002961 || (u == 0x00002962 || (u == 0x00002963 || (u == 0x00002964 || (u == 0x00002965 || (u == 0x00002966 || (u == 0x00002967 || (u == 0x00002968 || (u == 0x00002969 || (u == 0x0000296a || (u == 0x0000296b || (u == 0x0000296c || (u == 0x0000296d || (u == 0x0000296e || (u == 0x0000296f || (u == 0x00002970 || (u == 0x000029b7 || (u == 0x000029b8 || (u == 0x000029bc || (u == 0x000029be || (u == 0x000029bf || (u == 0x000029c0 || (u == 0x000029c1 || (u == 0x000029e1 || (u == 0x000029e3 || (u == 0x000029e4 || (u == 0x000029e5 || (u == 0x000029f4 || (u == 0x000029f6 || (u == 0x000029f7 || (u == 0x000029fa || (u == 0x000029fb || (u == 0x00002a07 || (u == 0x00002a08 || (u == 0x00002a1d || (u == 0x00002a22 || (u == 0x00002a23 || (u == 0x00002a24 || (u == 0x00002a25 || (u == 0x00002a26 || (u == 0x00002a27 || (u == 0x00002a28 || (u == 0x00002a29 || (u == 0x00002a2a || (u == 0x00002a2b || (u == 0x00002a2c || (u == 0x00002a2d || (u == 0x00002a2e || (u == 0x00002a30 || (u == 0x00002a31 || (u == 0x00002a32 || (u == 0x00002a33 || (u == 0x00002a34 || (u == 0x00002a35 || (u == 0x00002a36 || (u == 0x00002a37 || (u == 0x00002a38 || (u == 0x00002a39 || (u == 0x00002a3a || (u == 0x00002a3b || (u == 0x00002a3c || (u == 0x00002a3d || (u == 0x00002a40 || (u == 0x00002a41 || (u == 0x00002a42 || (u == 0x00002a43 || (u == 0x00002a44 || (u == 0x00002a45 || (u == 0x00002a4a || (u == 0x00002a4b || (u == 0x00002a4c || (u == 0x00002a4d || (u == 0x00002a4e || (u == 0x00002a4f || (u == 0x00002a50 || (u == 0x00002a51 || (u == 0x00002a52 || (u == 0x00002a53 || (u == 0x00002a54 || (u == 0x00002a55 || (u == 0x00002a56 || (u == 0x00002a57 || (u == 0x00002a58 || (u == 0x00002a5a || (u == 0x00002a5b || (u == 0x00002a5c || (u == 0x00002a5d || (u == 0x00002a5e || (u == 0x00002a5f || (u == 0x00002a60 || (u == 0x00002a61 || (u == 0x00002a62 || (u == 0x00002a63 || (u == 0x00002a66 || (u == 0x00002a67 || (u == 0x00002a6a || (u == 0x00002a6b || (u == 0x00002a6c || (u == 0x00002a6d || (u == 0x00002a6e || (u == 0x00002a6f || (u == 0x00002a70 || (u == 0x00002a71 || (u == 0x00002a72 || (u == 0x00002a73 || (u == 0x00002a74 || (u == 0x00002a75 || (u == 0x00002a76 || (u == 0x00002a77 || (u == 0x00002a78 || (u == 0x00002a79 || (u == 0x00002a7a || (u == 0x00002a7b || (u == 0x00002a7c || (u == 0x00002a7d || (u == 0x00002a7e || (u == 0x00002a7f || (u == 0x00002a80 || (u == 0x00002a81 || (u == 0x00002a82 || (u == 0x00002a83 || (u == 0x00002a84 || (u == 0x00002a85 || (u == 0x00002a86 || (u == 0x00002a87 || (u == 0x00002a88 || (u == 0x00002a89 || (u == 0x00002a8a || (u == 0x00002a8b || (u == 0x00002a8c || (u == 0x00002a8d || (u == 0x00002a8e || (u == 0x00002a8f || (u == 0x00002a90 || (u == 0x00002a91 || (u == 0x00002a92 || (u == 0x00002a93 || (u == 0x00002a94 || (u == 0x00002a95 || (u == 0x00002a96 || (u == 0x00002a97 || (u == 0x00002a98 || (u == 0x00002a99 || (u == 0x00002a9a || (u == 0x00002a9b || (u == 0x00002a9c || (u == 0x00002a9d || (u == 0x00002a9e || (u == 0x00002a9f || (u == 0x00002aa0 || (u == 0x00002aa1 || (u == 0x00002aa2 || (u == 0x00002aa3 || (u == 0x00002aa4 || (u == 0x00002aa5 || (u == 0x00002aa6 || (u == 0x00002aa7 || (u == 0x00002aa8 || (u == 0x00002aa9 || (u == 0x00002aaa || (u == 0x00002aab || (u == 0x00002aac || (u == 0x00002aad || (u == 0x00002aae || (u == 0x00002aaf || (u == 0x00002ab0 || (u == 0x00002ab1 || (u == 0x00002ab2 || (u == 0x00002ab3 || (u == 0x00002ab4 || (u == 0x00002ab5 || (u == 0x00002ab6 || (u == 0x00002ab7 || (u == 0x00002ab8 || (u == 0x00002ab9 || (u == 0x00002aba || (u == 0x00002abb || (u == 0x00002abc || (u == 0x00002abd || (u == 0x00002abe || (u == 0x00002abf || (u == 0x00002ac0 || (u == 0x00002ac1 || (u == 0x00002ac2 || (u == 0x00002ac3 || (u == 0x00002ac4 || (u == 0x00002ac5 || (u == 0x00002ac6 || (u == 0x00002ac7 || (u == 0x00002ac8 || (u == 0x00002ac9 || (u == 0x00002aca || (u == 0x00002acb || (u == 0x00002acc || (u == 0x00002acd || (u == 0x00002ace || (u == 0x00002acf || (u == 0x00002ad0 || (u == 0x00002ad1 || (u == 0x00002ad2 || (u == 0x00002ad3 || (u == 0x00002ad4 || (u == 0x00002ad5 || (u == 0x00002ad6 || (u == 0x00002ad7 || (u == 0x00002ad8 || (u == 0x00002ad9 || (u == 0x00002adb || (u == 0x00002af7 || (u == 0x00002af8 || (u == 0x00002af9 || (u == 0x00002afa || (u == 0x00002b30 || (u == 0x00002b31 || (u == 0x00002b32 || (u == 0x00002b33 || (u == 0x00002b34 || (u == 0x00002b35 || (u == 0x00002b36 || (u == 0x00002b37 || (u == 0x00002b38 || (u == 0x00002b39 || (u == 0x00002b3a || (u == 0x00002b3b || (u == 0x00002b3c || (u == 0x00002b3d || (u == 0x00002b3e || (u == 0x00002b3f || (u == 0x00002b40 || (u == 0x00002b41 || (u == 0x00002b42 || (u == 0x00002b43 || (u == 0x00002b44 || (u == 0x00002b47 || (u == 0x00002b48 || (u == 0x00002b49 || (u == 0x00002b4a || (u == 0x00002b4b || (u == 0x00002b4c || (u == 0x0000ffe9 || (u == 0x0000ffea || (u == 0x0000ffeb || u == 0x0000ffec))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))) + diff --git a/JuliaSyntax/test/lexer.jl b/JuliaSyntax/test/lexer.jl index cfe8b708ad4e8..bf027e34e5ccf 100644 --- a/JuliaSyntax/test/lexer.jl +++ b/JuliaSyntax/test/lexer.jl @@ -182,7 +182,6 @@ end @testset "tokenizing juxtaposed numbers and dotted operators/identifiers" begin - @test (t->t.val=="1234" && t.kind == Tokens.INTEGER )(tok("1234.+1")) @test (t->t.val=="1234" && t.kind == Tokens.INTEGER )(tok("1234 .+1")) @test (t->t.val=="1234.0" && t.kind == Tokens.FLOAT )(tok("1234.0.+1")) @test (t->t.val=="1234.0" && t.kind == Tokens.FLOAT )(tok("1234.0 .+1")) @@ -355,8 +354,6 @@ end @test tok("1.").kind == Tokens.FLOAT @test tok("1.\"text\" ").kind == Tokens.FLOAT - @test tok("1.+ ").kind == Tokens.INTEGER - @test tok("1.⤋").kind == Tokens.INTEGER @test tok("1..").kind == Tokens.INTEGER @test T.kind.(collect(tokenize("1f0./1"))) == [T.FLOAT, T.OP, T.INTEGER, T.ENDMARKER] end @@ -405,7 +402,6 @@ end @test tok("2f+0").kind == Tokens.FLOAT @test tok("2048f0").kind == Tokens.FLOAT @test tok("1.:0").kind == Tokens.FLOAT - @test tok("1.?").kind == Tokens.FLOAT @test tok("0x00p2").kind == Tokens.FLOAT @test tok("0x00P2").kind == Tokens.FLOAT @test tok("0x0.00p23").kind == Tokens.FLOAT @@ -552,3 +548,14 @@ end @test untokenize(collect(tokenize(s, Tokens.RawToken))[1], s) == s end +@testset "invalid float juxt" begin + s = "1.+2" + @test tok(s, 1).kind == Tokens.ERROR + @test Tokens.isoperator(tok(s, 2).kind) + @test (t->t.val=="1234." && t.kind == Tokens.ERROR )(tok("1234.+1")) # requires space before '.' + @test tok("1.+ ").kind == Tokens.ERROR + @test tok("1.⤋").kind == Tokens.ERROR + @test tok("1.?").kind == Tokens.ERROR +end + + From d781538d58ac0ca5c2e083782c4486a16fd9bfd3 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Wed, 27 Nov 2019 20:56:12 +0100 Subject: [PATCH 0150/1109] Bump version --- JuliaSyntax/Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaSyntax/Project.toml b/JuliaSyntax/Project.toml index a2a5cf3af2021..04261944d5a23 100644 --- a/JuliaSyntax/Project.toml +++ b/JuliaSyntax/Project.toml @@ -1,6 +1,6 @@ name = "Tokenize" uuid = "0796e94c-ce3b-5d07-9a54-7f471281c624" -version = "0.5.6" +version = "0.5.7" [compat] julia = "1" From 6a51e8dad628e9c5d874f7b09e85ebde1e93946b Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Wed, 25 Mar 2020 10:48:42 +0100 Subject: [PATCH 0151/1109] update Julia test versions (JuliaLang/JuliaSyntax.jl#157) * update Julia test versions * update Julia test versions --- JuliaSyntax/.travis.yml | 3 +-- JuliaSyntax/appveyor.yml | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/JuliaSyntax/.travis.yml b/JuliaSyntax/.travis.yml index 7cb5f52853c7c..f1a635ca33f5b 100644 --- a/JuliaSyntax/.travis.yml +++ b/JuliaSyntax/.travis.yml @@ -5,8 +5,7 @@ os: - osx julia: - 1.0 - - 1.1 - - 1.2 + - 1.4 - nightly matrix: allow_failures: diff --git a/JuliaSyntax/appveyor.yml b/JuliaSyntax/appveyor.yml index 831beb78f8fa9..e061db30636b2 100644 --- a/JuliaSyntax/appveyor.yml +++ b/JuliaSyntax/appveyor.yml @@ -1,6 +1,6 @@ environment: matrix: - - julia_version: 0.7 + - julia_version: 1 - julia_version: latest platform: From 1661289e0eaa9c672d2986eb9f44c4255dd31f6c Mon Sep 17 00:00:00 2001 From: ZacNugent Date: Wed, 25 Mar 2020 09:58:51 +0000 Subject: [PATCH 0152/1109] use Base.is_id_char, etc. (JuliaLang/JuliaSyntax.jl#156) --- JuliaSyntax/src/utilities.jl | 28 ++-------------------------- 1 file changed, 2 insertions(+), 26 deletions(-) diff --git a/JuliaSyntax/src/utilities.jl b/JuliaSyntax/src/utilities.jl index 7cca93a2d58d9..74e6ad04ccc15 100644 --- a/JuliaSyntax/src/utilities.jl +++ b/JuliaSyntax/src/utilities.jl @@ -110,36 +110,12 @@ end function is_identifier_char(c::Char) c == EOF_CHAR && return false - if ((c >= 'A' && c <= 'Z') || - (c >= 'a' && c <= 'z') || c == '_' || - (c >= '0' && c <= '9') || c == '!') - return true - elseif (UInt32(c) < 0xA1 || UInt32(c) > 0x10ffff) - return false - end - cat = Unicode.category_code(c) - is_cat_id_start(c, cat) && return true - if cat == Unicode.UTF8PROC_CATEGORY_MN || cat == Unicode.UTF8PROC_CATEGORY_MC || - cat == Unicode.UTF8PROC_CATEGORY_ND || cat == Unicode.UTF8PROC_CATEGORY_PC || - cat == Unicode.UTF8PROC_CATEGORY_SK || cat == Unicode.UTF8PROC_CATEGORY_ME || - cat == Unicode.UTF8PROC_CATEGORY_NO || - (0x2032 <= UInt32(c) <= 0x2034) || # primes - UInt32(c) == 0x0387 || UInt32(c) == 0x19da || - (0x1369 <= UInt32(c) <= 0x1371) - return true - end - return false + return Base.is_id_char(c) end function is_identifier_start_char(c::Char) c == EOF_CHAR && return false - if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '_') - return true - elseif (UInt32(c) < 0xA1 || UInt32(c) > 0x10ffff) - return false - end - cat = Unicode.category_code(c) - return is_cat_id_start(c, cat) + return Base.is_id_start_char(c) end From acd72f92080ceff6472c6b8f5df949c1fea976c0 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Wed, 25 Mar 2020 10:59:08 +0100 Subject: [PATCH 0153/1109] bump version --- JuliaSyntax/Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaSyntax/Project.toml b/JuliaSyntax/Project.toml index 04261944d5a23..5ec0da72e444e 100644 --- a/JuliaSyntax/Project.toml +++ b/JuliaSyntax/Project.toml @@ -1,6 +1,6 @@ name = "Tokenize" uuid = "0796e94c-ce3b-5d07-9a54-7f471281c624" -version = "0.5.7" +version = "0.5.8" [compat] julia = "1" From c9a5ba53372fdc9a30b6f861813956b35ae11a55 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Mon, 30 Mar 2020 21:58:45 +0200 Subject: [PATCH 0154/1109] also lex stdlibs in benchmark (JuliaLang/JuliaSyntax.jl#158) --- JuliaSyntax/README.md | 2 +- JuliaSyntax/benchmark/lex_base.jl | 28 +++++++++++++++------------- 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index cdc0b248bcf5e..bb87201312a55 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -7,7 +7,7 @@ The goals of this package is to be -* Fast, it currently lexes all of Julia source files in ~0.3 seconds (295 files, 1.16 million Tokens) +* Fast, it currently lexes all of Julia source files in ~0.25 seconds (580 files, 2 million Tokens) * Round trippable, that is, from a stream of tokens the original string should be recoverable exactly. * Non error throwing. Instead of throwing errors a certain error token is returned. diff --git a/JuliaSyntax/benchmark/lex_base.jl b/JuliaSyntax/benchmark/lex_base.jl index 2ffedc541a9cf..cf58cb9f3e77a 100644 --- a/JuliaSyntax/benchmark/lex_base.jl +++ b/JuliaSyntax/benchmark/lex_base.jl @@ -6,19 +6,21 @@ function speed_test(::Type{T}=Tokenize.Tokens.Token) where T <: Tokenize.Tokens. tot_files = 0 tot_tokens = 0 tot_errors = 0 - dir = dirname(Base.find_source_file("int.jl")) - for (root, dirs, files) in walkdir(dir) - for file in files - if endswith(file, ".jl") - tot_files += 1 - file = joinpath(root, file) - str = read(file, String)::String - l = tokenize(str, T) - while !Tokenize.Lexers.eof(l) - t = Tokenize.Lexers.next_token(l) - tot_tokens += 1 - if t.kind == Tokens.ERROR - tot_errors += 1 + basedir = dirname(Base.find_source_file("int.jl")) + for dir in (basedir, Sys.STDLIB) + for (root, dirs, files) in walkdir(dir) + for file in files + if endswith(file, ".jl") + tot_files += 1 + file = joinpath(root, file) + str = read(file, String)::String + l = tokenize(str, T) + while !Tokenize.Lexers.eof(l) + t = Tokenize.Lexers.next_token(l) + tot_tokens += 1 + if t.kind == Tokens.ERROR + tot_errors += 1 + end end end end From 1045b00cb381d5018466dc701b0fc8ab5bdabfb5 Mon Sep 17 00:00:00 2001 From: Zac Nugent Date: Tue, 12 Jan 2021 13:23:12 +0000 Subject: [PATCH 0155/1109] fix comment lexing --- JuliaSyntax/src/lexer.jl | 4 +++- JuliaSyntax/test/lexer.jl | 5 +++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index 5b22d9d39e16e..444f548c8e289 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -409,6 +409,7 @@ function lex_comment(l::Lexer, doemit=true) readchar(l) end else + pc = '#' c = readchar(l) # consume the '=' n_start, n_end = 1, 0 while true @@ -418,12 +419,13 @@ function lex_comment(l::Lexer, doemit=true) nc = readchar(l) if c == '#' && nc == '=' n_start += 1 - elseif c == '=' && nc == '#' + elseif c == '=' && nc == '#' && pc != '#' n_end += 1 end if n_start == n_end return doemit ? emit(l, Tokens.COMMENT) : EMPTY_TOKEN(token_type(l)) end + pc = c c = nc end end diff --git a/JuliaSyntax/test/lexer.jl b/JuliaSyntax/test/lexer.jl index bf027e34e5ccf..77f2c3ce97586 100644 --- a/JuliaSyntax/test/lexer.jl +++ b/JuliaSyntax/test/lexer.jl @@ -559,3 +559,8 @@ end end +@testset "comments" begin + s = "#=# text=#" + @test length(collect(tokenize(s, Tokens.RawToken))) == 2 +end + From 78a7920c503a05d2676fd8fc397237214415dabb Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Tue, 12 Jan 2021 17:32:10 +0100 Subject: [PATCH 0156/1109] use github actions instead of travis (JuliaLang/JuliaSyntax.jl#160) * use github actions instead of travis --- JuliaSyntax/.github/workflows/CI.yml | 49 ++++++++++++++++++++++++ JuliaSyntax/.github/workflows/TagBot.yml | 11 ++++++ JuliaSyntax/.travis.yml | 17 -------- JuliaSyntax/README.md | 3 +- JuliaSyntax/appveyor.yml | 36 ----------------- 5 files changed, 61 insertions(+), 55 deletions(-) create mode 100644 JuliaSyntax/.github/workflows/CI.yml create mode 100644 JuliaSyntax/.github/workflows/TagBot.yml delete mode 100644 JuliaSyntax/.travis.yml delete mode 100644 JuliaSyntax/appveyor.yml diff --git a/JuliaSyntax/.github/workflows/CI.yml b/JuliaSyntax/.github/workflows/CI.yml new file mode 100644 index 0000000000000..08e7ec3027c62 --- /dev/null +++ b/JuliaSyntax/.github/workflows/CI.yml @@ -0,0 +1,49 @@ +name: CI +on: + pull_request: + branches: + - master + push: + branches: + - master + tags: '*' +jobs: + test: + name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + version: + - '1.0' + - '1' + - 'nightly' + os: + - ubuntu-latest + - macOS-latest + - windows-latest + arch: + - x64 + steps: + - uses: actions/checkout@v2 + - uses: julia-actions/setup-julia@v1 + with: + version: ${{ matrix.version }} + arch: ${{ matrix.arch }} + - uses: actions/cache@v1 + env: + cache-name: cache-artifacts + with: + path: ~/.julia/artifacts + key: ${{ runner.os }}-test-${{ env.cache-name }}-${{ hashFiles('**/Project.toml') }} + restore-keys: | + ${{ runner.os }}-test-${{ env.cache-name }}- + ${{ runner.os }}-test- + ${{ runner.os }}- + - uses: julia-actions/julia-buildpkg@v1 + - uses: julia-actions/julia-runtest@v1 + - uses: julia-actions/julia-processcoverage@v1 + - uses: codecov/codecov-action@v1 + with: + file: lcov.info + diff --git a/JuliaSyntax/.github/workflows/TagBot.yml b/JuliaSyntax/.github/workflows/TagBot.yml new file mode 100644 index 0000000000000..d77d3a0c36d8a --- /dev/null +++ b/JuliaSyntax/.github/workflows/TagBot.yml @@ -0,0 +1,11 @@ +name: TagBot +on: + schedule: + - cron: 0 * * * * +jobs: + TagBot: + runs-on: ubuntu-latest + steps: + - uses: JuliaRegistries/TagBot@v1 + with: + token: ${{ secrets.GITHUB_TOKEN }} diff --git a/JuliaSyntax/.travis.yml b/JuliaSyntax/.travis.yml deleted file mode 100644 index f1a635ca33f5b..0000000000000 --- a/JuliaSyntax/.travis.yml +++ /dev/null @@ -1,17 +0,0 @@ -# Documentation: http://docs.travis-ci.com/user/languages/julia/ -language: julia -os: - - linux - - osx -julia: - - 1.0 - - 1.4 - - nightly -matrix: - allow_failures: - - julia: nightly -notifications: - email: false -after_success: - # push coverage results to Codecov - - julia -e 'import Pkg; Pkg.add("Coverage"); using Coverage; Codecov.submit(Codecov.process_folder())' diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index bb87201312a55..f26f748dddac5 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -1,7 +1,6 @@ # Tokenize -[![Build Status](https://travis-ci.org/JuliaLang/Tokenize.jl.svg?branch=master)](https://travis-ci.org/JuliaLang/Tokenize.jl) [![Build status](https://ci.appveyor.com/api/projects/status/h9d9webkxyhpx790?svg=true)](https://ci.appveyor.com/project/KristofferC/tokenize-jl) [![codecov.io](https://codecov.io/github/JuliaLang/Tokenize.jl/coverage.svg?branch=master)](https://codecov.io/github/JuliaLang/Tokenize.jl?branch=master) - +[![Build Status](https://github.com/KristofferC/Tokenize.jl/workflows/CI/badge.svg)](https://github.com/KristofferC/Tokenize.jl/actions?query=workflows/CI) `Tokenize` is a Julia package that serves a similar purpose and API as the [tokenize module](https://docs.python.org/3/library/tokenize.html) in Python but for Julia. This is to take a string or buffer containing Julia code, perform lexical analysis and return a stream of tokens. diff --git a/JuliaSyntax/appveyor.yml b/JuliaSyntax/appveyor.yml deleted file mode 100644 index e061db30636b2..0000000000000 --- a/JuliaSyntax/appveyor.yml +++ /dev/null @@ -1,36 +0,0 @@ -environment: - matrix: - - julia_version: 1 - - julia_version: latest - -platform: - - x86 # 32-bit - - x64 # 64-bit - -## uncomment the following lines to allow failures on nightly julia -## (tests will run but not make your overall status red) -#matrix: -# allow_failures: -# - julia_version: latest - -branches: - only: - - master - - /release-.*/ - -notifications: - - provider: Email - on_build_success: false - on_build_failure: false - on_build_status_changed: false - -install: - - ps: iex ((new-object net.webclient).DownloadString("https://raw.githubusercontent.com/JuliaCI/Appveyor.jl/version-1/bin/install.ps1")) - -build_script: - - echo "%JL_BUILD_SCRIPT%" - - C:\julia\bin\julia -e "%JL_BUILD_SCRIPT%" - -test_script: - - echo "%JL_TEST_SCRIPT%" - - C:\julia\bin\julia -e "%JL_TEST_SCRIPT%" From e106071d7501744792ab8f3f4244c6379e497009 Mon Sep 17 00:00:00 2001 From: ZacNugent Date: Wed, 13 Jan 2021 11:36:51 +0000 Subject: [PATCH 0157/1109] fix string interpolation of Chars (JuliaLang/JuliaSyntax.jl#162) --- JuliaSyntax/src/lexer.jl | 14 ++++++++------ JuliaSyntax/test/lexer.jl | 4 ++++ 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index 5b22d9d39e16e..e5d78a9edac26 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -701,7 +701,7 @@ function lex_digit(l::Lexer, kind) return emit(l, kind) end -function lex_prime(l) +function lex_prime(l, doemit = true) if l.last_token == Tokens.IDENTIFIER || l.last_token == Tokens.DOT || l.last_token == Tokens.RPAREN || @@ -713,25 +713,25 @@ function lex_prime(l) readon(l) if accept(l, '\'') if accept(l, '\'') - return emit(l, Tokens.CHAR) + return doemit ? emit(l, Tokens.CHAR) : EMPTY_TOKEN(token_type(l)) else # Empty char literal # Arguably this should be an error here, but we generally # look at the contents of the char literal in the parser, # so we defer erroring until there. - return emit(l, Tokens.CHAR) + return doemit ? emit(l, Tokens.CHAR) : EMPTY_TOKEN(token_type(l)) end end while true c = readchar(l) if eof(c) - return emit_error(l, Tokens.EOF_CHAR) + return doemit ? emit_error(l, Tokens.EOF_CHAR) : EMPTY_TOKEN(token_type(l)) elseif c == '\\' if eof(readchar(l)) - return emit_error(l, Tokens.EOF_CHAR) + return doemit ? emit_error(l, Tokens.EOF_CHAR) : EMPTY_TOKEN(token_type(l)) end elseif c == '\'' - return emit(l, Tokens.CHAR) + return doemit ? emit(l, Tokens.CHAR) : EMPTY_TOKEN(token_type(l)) end end end @@ -820,6 +820,8 @@ function read_string(l::Lexer, kind::Tokens.Kind) lex_cmd(l, false) elseif c == '#' lex_comment(l, false) + elseif c == '\'' + lex_prime(l, false) end end end diff --git a/JuliaSyntax/test/lexer.jl b/JuliaSyntax/test/lexer.jl index bf027e34e5ccf..6745c7f512370 100644 --- a/JuliaSyntax/test/lexer.jl +++ b/JuliaSyntax/test/lexer.jl @@ -558,4 +558,8 @@ end @test tok("1.?").kind == Tokens.ERROR end +@testset "interpolation of char within string" begin + s = "\"\$('\"')\"" + @test collect(tokenize(s))[1].kind == Tokenize.Tokens.STRING +end From a6c6f56d72de84d87ccbcd1456704e1619fab9e2 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Wed, 13 Jan 2021 12:37:41 +0100 Subject: [PATCH 0158/1109] bump version --- JuliaSyntax/Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaSyntax/Project.toml b/JuliaSyntax/Project.toml index 5ec0da72e444e..6cff936bab2ad 100644 --- a/JuliaSyntax/Project.toml +++ b/JuliaSyntax/Project.toml @@ -1,6 +1,6 @@ name = "Tokenize" uuid = "0796e94c-ce3b-5d07-9a54-7f471281c624" -version = "0.5.8" +version = "0.5.9" [compat] julia = "1" From 8ec91df2690ea8bfbd019c5676bf8f03c015285d Mon Sep 17 00:00:00 2001 From: Sebastian Pfitzner Date: Wed, 13 Jan 2021 14:24:56 +0100 Subject: [PATCH 0159/1109] bump version --- JuliaSyntax/Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaSyntax/Project.toml b/JuliaSyntax/Project.toml index 6cff936bab2ad..c6d0b0fdcbbfa 100644 --- a/JuliaSyntax/Project.toml +++ b/JuliaSyntax/Project.toml @@ -1,6 +1,6 @@ name = "Tokenize" uuid = "0796e94c-ce3b-5d07-9a54-7f471281c624" -version = "0.5.9" +version = "0.5.10" [compat] julia = "1" From 528f40b93dcc7d3f378fad4b7fbd848096c2c66d Mon Sep 17 00:00:00 2001 From: ZacNugent Date: Fri, 22 Jan 2021 22:22:08 +0000 Subject: [PATCH 0160/1109] add missing operator (JuliaLang/JuliaSyntax.jl#164) * add missing operator * make dotop --- JuliaSyntax/src/token_kinds.jl | 4 +++- JuliaSyntax/src/utilities.jl | 1 + JuliaSyntax/test/lexer.jl | 4 ++++ 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/JuliaSyntax/src/token_kinds.jl b/JuliaSyntax/src/token_kinds.jl index 51e6b58ec9bba..ff27bd8c8c2fc 100644 --- a/JuliaSyntax/src/token_kinds.jl +++ b/JuliaSyntax/src/token_kinds.jl @@ -233,6 +233,7 @@ RIGHTWARDS_ARROW_ABOVE_REVERSE_TILDE_OPERATOR, # ⭌ HALFWIDTH_LEFTWARDS_ARROW, # ← HALFWIDTH_RIGHTWARDS_ARROW, # → + CIRCLE_ARROW_RIGHT, end_arrow, # Level 4 @@ -1329,7 +1330,8 @@ const UNICODE_OPS = Dict{Char, Kind}( '⋮' => VDOTS, '⋱' => DDOTS, '⋰' => ADOTS, -'⋯' => CDOTS) +'⋯' => CDOTS, +'↻' => CIRCLE_ARROW_RIGHT) const UNICODE_OPS_REVERSE = Dict{Kind,Symbol}() diff --git a/JuliaSyntax/src/utilities.jl b/JuliaSyntax/src/utilities.jl index 74e6ad04ccc15..98cfe2841b380 100644 --- a/JuliaSyntax/src/utilities.jl +++ b/JuliaSyntax/src/utilities.jl @@ -199,6 +199,7 @@ takechar(io::IO) = (readchar(io); io) 0x000021ce <= c <= 0x000021cf || c == 0x000021d2 || c == 0x000021d4 || + c == 0x000021bb || 0x000021f4 <= c <= 0x000021ff || 0x00002208 <= c <= 0x0000220d || 0x00002213 <= c <= 0x00002214 || diff --git a/JuliaSyntax/test/lexer.jl b/JuliaSyntax/test/lexer.jl index b1f8c11b0ea73..5a11340d6c5e8 100644 --- a/JuliaSyntax/test/lexer.jl +++ b/JuliaSyntax/test/lexer.jl @@ -568,3 +568,7 @@ end @test length(collect(tokenize(s, Tokens.RawToken))) == 2 end +@testset "circ arrow right op" begin + s = "↻" + @test collect(tokenize(s, Tokens.RawToken))[1].kind == Tokens.CIRCLE_ARROW_RIGHT +end From 30c9b8a86b7214bae16202e17debb3a10979bf68 Mon Sep 17 00:00:00 2001 From: ZacNugent Date: Sat, 23 Jan 2021 09:02:05 +0000 Subject: [PATCH 0161/1109] emit error on invalid hexadecimal (JuliaLang/JuliaSyntax.jl#166) * emit error on invalid hexadecimal * typo --- JuliaSyntax/src/lexer.jl | 4 ++++ JuliaSyntax/test/lexer.jl | 5 +++++ 2 files changed, 9 insertions(+) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index af068e9f698c2..71bf6bd81debb 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -677,16 +677,20 @@ function lex_digit(l::Lexer, kind) kind == Tokens.INTEGER if pc == 'x' kind = Tokens.HEX_INT + isfloat = false readchar(l) !(ishex(ppc) || ppc =='.') && return emit_error(l, Tokens.INVALID_NUMERIC_CONSTANT) accept_number(l, ishex) if accept(l, '.') accept_number(l, ishex) + isfloat = true end if accept(l, "pP") kind = Tokens.FLOAT accept(l, "+-") accept_number(l, isdigit) + elseif isfloat + return emit_error(l, Tokens.INVALID_NUMERIC_CONSTANT) end elseif pc == 'b' !isbinary(ppc) && return emit_error(l, Tokens.INVALID_NUMERIC_CONSTANT) diff --git a/JuliaSyntax/test/lexer.jl b/JuliaSyntax/test/lexer.jl index 5a11340d6c5e8..582ef7d8faf73 100644 --- a/JuliaSyntax/test/lexer.jl +++ b/JuliaSyntax/test/lexer.jl @@ -568,6 +568,11 @@ end @test length(collect(tokenize(s, Tokens.RawToken))) == 2 end +@testset "invalid hexadecimal" begin + s = "0x." + tok(s, 1).kind === Tokens.ERROR +end + @testset "circ arrow right op" begin s = "↻" @test collect(tokenize(s, Tokens.RawToken))[1].kind == Tokens.CIRCLE_ARROW_RIGHT From 8fd246de9f52283043c7b21a70cb5f7a64d4df05 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Mon, 1 Feb 2021 23:17:30 +0100 Subject: [PATCH 0162/1109] Bump version --- JuliaSyntax/Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaSyntax/Project.toml b/JuliaSyntax/Project.toml index c6d0b0fdcbbfa..9c3a26666bb7e 100644 --- a/JuliaSyntax/Project.toml +++ b/JuliaSyntax/Project.toml @@ -1,6 +1,6 @@ name = "Tokenize" uuid = "0796e94c-ce3b-5d07-9a54-7f471281c624" -version = "0.5.10" +version = "0.5.11" [compat] julia = "1" From a830a47eeac28a3aca3bea43d359d4c44c348bdd Mon Sep 17 00:00:00 2001 From: ZacNugent Date: Wed, 3 Feb 2021 11:28:04 +0000 Subject: [PATCH 0163/1109] fix invalid float parse (JuliaLang/JuliaSyntax.jl#167) * fix invalid float parse Co-authored-by: Sebastian Pfitzner --- JuliaSyntax/src/lexer.jl | 6 +++++- JuliaSyntax/test/lexer.jl | 5 +++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index 71bf6bd81debb..318b19b7baa8a 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -611,7 +611,11 @@ function lex_digit(l::Lexer, kind) accept_number(l, isdigit) pc,ppc = dpeekchar(l) if pc == '.' - if ppc == '.' + if kind === Tokens.FLOAT + # If we enter the function with kind == FLOAT then a '.' has been parsed. + readchar(l) + return emit_error(l, Tokens.INVALID_NUMERIC_CONSTANT) + elseif ppc == '.' return emit(l, kind) elseif is_operator_start_char(ppc) && ppc !== ':' readchar(l) diff --git a/JuliaSyntax/test/lexer.jl b/JuliaSyntax/test/lexer.jl index 582ef7d8faf73..c7a7605eff37a 100644 --- a/JuliaSyntax/test/lexer.jl +++ b/JuliaSyntax/test/lexer.jl @@ -577,3 +577,8 @@ end s = "↻" @test collect(tokenize(s, Tokens.RawToken))[1].kind == Tokens.CIRCLE_ARROW_RIGHT end + +@testset "invalid float" begin + s = ".0." + @test collect(tokenize(s, Tokens.RawToken))[1].kind == Tokens.ERROR +end From 2a06a3dabb7c57bb3e079c717f320d91c6883793 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Wed, 3 Feb 2021 12:28:20 +0100 Subject: [PATCH 0164/1109] bump version --- JuliaSyntax/Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaSyntax/Project.toml b/JuliaSyntax/Project.toml index 9c3a26666bb7e..ff527193d62e1 100644 --- a/JuliaSyntax/Project.toml +++ b/JuliaSyntax/Project.toml @@ -1,6 +1,6 @@ name = "Tokenize" uuid = "0796e94c-ce3b-5d07-9a54-7f471281c624" -version = "0.5.11" +version = "0.5.12" [compat] julia = "1" From a877c82f7cbf323972c6a1e802d8d59c0893c732 Mon Sep 17 00:00:00 2001 From: ZacNugent Date: Wed, 10 Feb 2021 14:48:48 +0000 Subject: [PATCH 0165/1109] update list of operators (JuliaLang/JuliaSyntax.jl#169) * update list of operators * fix tests --- JuliaSyntax/src/token_kinds.jl | 68 +++++++++++++++++++++++++++++++++- JuliaSyntax/src/utilities.jl | 25 +++++++++++-- JuliaSyntax/test/lexer.jl | 25 ++++++++++++- 3 files changed, 112 insertions(+), 6 deletions(-) diff --git a/JuliaSyntax/src/token_kinds.jl b/JuliaSyntax/src/token_kinds.jl index ff27bd8c8c2fc..f04fa0f6a52f0 100644 --- a/JuliaSyntax/src/token_kinds.jl +++ b/JuliaSyntax/src/token_kinds.jl @@ -123,12 +123,17 @@ LEFT_RIGHT_ARROW, # ↔ LEFTWARDS_ARROW_WITH_STROKE, # ↚ RIGHTWARDS_ARROW_WITH_STROKE, # ↛ + LEFTWARDS_TWO_HEADED_ARROW,# ↞ RIGHTWARDS_TWO_HEADED_ARROW, # ↠ + LEFTWARDS_ARROW_WITH_TAIL, # ↢ RIGHTWARDS_ARROW_WITH_TAIL, # ↣ + LEFTWARDS_ARROW_FROM_BAR,# ↤ RIGHTWARDS_ARROW_FROM_BAR, # ↦ LEFT_RIGHT_ARROW_WITH_STROKE, # ↮ LEFT_RIGHT_DOUBLE_ARROW_WITH_STROKE, # ⇎ + LEFTWARDS_DOUBLE_ARROW_WITH_STROKE, # ⇍ RIGHTWARDS_DOUBLE_ARROW_WITH_STROKE, # ⇏ + LEFTWARDS_DOUBLE_ARROW, # ⇐ RIGHTWARDS_DOUBLE_ARROW, # ⇒ LEFT_RIGHT_DOUBLE_ARROW, # ⇔ RIGHT_ARROW_WITH_SMALL_CIRCLE, # ⇴ @@ -234,6 +239,31 @@ HALFWIDTH_LEFTWARDS_ARROW, # ← HALFWIDTH_RIGHTWARDS_ARROW, # → CIRCLE_ARROW_RIGHT, + LEFT_SQUIGGLE_ARROW, # ⇜ + RIGHT_SQUIGGLE_ARROW, # ⇝ + LEFT_WAVE_ARROW, # ↜ + RIGHT_WAVE_ARROW, # ↝ + LEFTWARDS_ARROW_WITH_HOOK, # ↩ + RIGHTWARDS_ARROW_WITH_HOOK, # ↪ + LOOP_ARROW_LEFT, # ↫ + LOOP_ARROW_RIGHT, # ↬ + LEFT_HARPOON_UP, # ↼ + LEFT_HARPOON_DOWN, # ↽ + RIGHT_HARPOON_UP, # ⇀ + RIGHT_HARPOON_DOWN, # ⇁ + RIGHT_LEFT_ARROWS, # ⇄ + LEFT_RIGHT_ARROWS, # ⇆ + LEFT_LEFT_ARROWS, # ⇇ + RIGHT_RIGHT_ARROWS, # ⇉ + LEFT_RIGHT_HARPOONS, # ⇋ + RIGHT_LEFT_HARPOONS, # ⇌ + L_LEFT_ARROW, # ⇚ + R_RIGHT_ARROW, # ⇛ + LEFT_DASH_ARROW, # ⇠ + RIGHT_DASH_ARROW, # ⇢ + CURVE_ARROW_RIGHT, # ↷ + CURVE_ARROW_LEFT,# ↶ + CIRCLE_ARROW_LEFT,# ↺ end_arrow, # Level 4 @@ -615,6 +645,7 @@ SMALL_VEE_WITH_UNDERBAR, # ⩡ LOGICAL_OR_WITH_DOUBLE_OVERBAR, # ⩢ LOGICAL_OR_WITH_DOUBLE_UNDERBAR, # ⩣ + BROKEN_BAR, # ¦ end_plus, # Level 10 @@ -702,6 +733,8 @@ LEFT_OUTER_JOIN, # ⟕ RIGHT_OUTER_JOIN, # ⟖ FULL_OUTER_JOIN, # ⟗ + NOT_SLASH, # ⌿ + BB_SEMI, # ⨟ end_times, # Level 12 @@ -785,12 +818,17 @@ const UNICODE_OPS = Dict{Char, Kind}( '↔' => LEFT_RIGHT_ARROW, '↚' => LEFTWARDS_ARROW_WITH_STROKE, '↛' => RIGHTWARDS_ARROW_WITH_STROKE, +'↞' => LEFTWARDS_TWO_HEADED_ARROW, '↠' => RIGHTWARDS_TWO_HEADED_ARROW, +'↢' => LEFTWARDS_ARROW_WITH_TAIL, '↣' => RIGHTWARDS_ARROW_WITH_TAIL, +'↤' => LEFTWARDS_ARROW_FROM_BAR, '↦' => RIGHTWARDS_ARROW_FROM_BAR, '↮' => LEFT_RIGHT_ARROW_WITH_STROKE, '⇎' => LEFT_RIGHT_DOUBLE_ARROW_WITH_STROKE, +'⇍' => LEFTWARDS_DOUBLE_ARROW_WITH_STROKE, '⇏' => RIGHTWARDS_DOUBLE_ARROW_WITH_STROKE, +'⇐' => LEFTWARDS_DOUBLE_ARROW, '⇒' => RIGHTWARDS_DOUBLE_ARROW, '⇔' => LEFT_RIGHT_DOUBLE_ARROW, '⇴' => RIGHT_ARROW_WITH_SMALL_CIRCLE, @@ -1331,7 +1369,35 @@ const UNICODE_OPS = Dict{Char, Kind}( '⋱' => DDOTS, '⋰' => ADOTS, '⋯' => CDOTS, -'↻' => CIRCLE_ARROW_RIGHT) +'↻' => CIRCLE_ARROW_RIGHT, +'⇜' => LEFT_SQUIGGLE_ARROW, +'⇝' => RIGHT_SQUIGGLE_ARROW, +'↜' => LEFT_WAVE_ARROW, +'↝' => RIGHT_WAVE_ARROW, +'↩' => LEFTWARDS_ARROW_WITH_HOOK, +'↪' => RIGHTWARDS_ARROW_WITH_HOOK, +'↫' => LOOP_ARROW_LEFT, +'↬' => LOOP_ARROW_RIGHT, +'↼' => LEFT_HARPOON_UP, +'↽' => LEFT_HARPOON_DOWN, +'⇀' => RIGHT_HARPOON_UP, +'⇁' => RIGHT_HARPOON_DOWN, +'⇄' => RIGHT_LEFT_ARROWS, +'⇆' => LEFT_RIGHT_ARROWS, +'⇇' => LEFT_LEFT_ARROWS, +'⇉' => RIGHT_RIGHT_ARROWS, +'⇋' => LEFT_RIGHT_HARPOONS, +'⇌' => RIGHT_LEFT_HARPOONS, +'⇚' => L_LEFT_ARROW, +'⇛' => R_RIGHT_ARROW, +'⇠' => LEFT_DASH_ARROW, +'⇢' => RIGHT_DASH_ARROW, +'↷' => CURVE_ARROW_RIGHT, +'↶' => CURVE_ARROW_LEFT, +'↺' => CIRCLE_ARROW_LEFT, +'¦' => BROKEN_BAR, +'⌿' => NOT_SLASH, +'⨟' => BB_SEMI) const UNICODE_OPS_REVERSE = Dict{Kind,Symbol}() diff --git a/JuliaSyntax/src/utilities.jl b/JuliaSyntax/src/utilities.jl index 98cfe2841b380..614b8d7e1dba4 100644 --- a/JuliaSyntax/src/utilities.jl +++ b/JuliaSyntax/src/utilities.jl @@ -182,6 +182,7 @@ takechar(io::IO) = (readchar(io); io) c1 == EOF_CHAR && return false c = UInt32(c1) c == 0x00000021 || + c == 0x000000a6 || c == 0x0000002e || c == 0x0000007e || c == 0x000000ac || @@ -191,15 +192,29 @@ takechar(io::IO) = (readchar(io); io) c == 0x0000205d || c == 0x0000214b || 0x00002190 <= c <= 0x00002194 || - 0x0000219a <= c <= 0x0000219b || + 0x0000219a <= c <= 0x0000219e || c == 0x000021a0 || - c == 0x000021a3 || + 0x000021a2 <= c <= 0x000021a4 || + 0x000021aa <= c <= 0x000021ac || c == 0x000021a6 || + c == 0x000021a9 || c == 0x000021ae || - 0x000021ce <= c <= 0x000021cf || + c == 0x000021c0 || + c == 0x000021c1 || + c == 0x000021c4 || + c == 0x000021c6 || + c == 0x000021c7 || + c == 0x000021c9 || + 0x000021cb <= c <= 0x000021cf || c == 0x000021d2 || c == 0x000021d4 || - c == 0x000021bb || + c == 0x000021b6 || + c == 0x000021b7 || + 0x000021ba <= c <= 0x000021bd || + c == 0x000021d0 || + 0x000021da <= c <= 0x000021dd || + c == 0x000021e0 || + c == 0x000021e2 || 0x000021f4 <= c <= 0x000021ff || 0x00002208 <= c <= 0x0000220d || 0x00002213 <= c <= 0x00002214 || @@ -220,6 +235,7 @@ takechar(io::IO) = (readchar(io); io) 0x000022c4 <= c <= 0x000022c7 || 0x000022c9 <= c <= 0x000022d3 || 0x000022d5 <= c <= 0x000022ff || + c == 0x0000233f || c == 0x000025b7 || c == 0x000027c2 || 0x000027c8 <= c <= 0x000027c9 || @@ -241,6 +257,7 @@ takechar(io::IO) = (readchar(io); io) 0x000029fa <= c <= 0x000029fb || 0x00002a07 <= c <= 0x00002a08 || c == 0x00002a1d || + c == 0x00002a1f || 0x00002a22 <= c <= 0x00002a2e || 0x00002a30 <= c <= 0x00002a3d || 0x00002a40 <= c <= 0x00002a45 || diff --git a/JuliaSyntax/test/lexer.jl b/JuliaSyntax/test/lexer.jl index c7a7605eff37a..a2bdc3e30c603 100644 --- a/JuliaSyntax/test/lexer.jl +++ b/JuliaSyntax/test/lexer.jl @@ -483,7 +483,7 @@ for op in ops ex4 = Meta.parse(str4, raise = false) ex5 = Meta.parse(str5, raise = false) ex6 = Meta.parse(str6, raise = false) - if ex1.head != :error # unary + if ex1 isa Expr && ex1.head != :error # unary t1 = collect(tokenize(str1)) exop1 = ex1.head == :call ? ex1.args[1] : ex1.head @test Symbol(Tokenize.Tokens.untokenize(t1[1])) == exop1 @@ -582,3 +582,26 @@ end s = ".0." @test collect(tokenize(s, Tokens.RawToken))[1].kind == Tokens.ERROR end + +@testset "new ops" begin + ops = [raw"= += -= *= /= //= \= ^= ÷= %= <<= >>= >>>= |= &= ⊻= ≔ ⩴ ≕ ~ := $=" + raw"=>" + raw"?" + raw"← → ↔ ↚ ↛ ↞ ↠ ↢ ↣ ↦ ↤ ↮ ⇎ ⇍ ⇏ ⇐ ⇒ ⇔ ⇴ ⇶ ⇷ ⇸ ⇹ ⇺ ⇻ ⇼ ⇽ ⇾ ⇿ ⟵ ⟶ ⟷ ⟹ ⟺ ⟻ ⟼ ⟽ ⟾ ⟿ ⤀ ⤁ ⤂ ⤃ ⤄ ⤅ ⤆ ⤇ ⤌ ⤍ ⤎ ⤏ ⤐ ⤑ ⤔ ⤕ ⤖ ⤗ ⤘ ⤝ ⤞ ⤟ ⤠ ⥄ ⥅ ⥆ ⥇ ⥈ ⥊ ⥋ ⥎ ⥐ ⥒ ⥓ ⥖ ⥗ ⥚ ⥛ ⥞ ⥟ ⥢ ⥤ ⥦ ⥧ ⥨ ⥩ ⥪ ⥫ ⥬ ⥭ ⥰ ⧴ ⬱ ⬰ ⬲ ⬳ ⬴ ⬵ ⬶ ⬷ ⬸ ⬹ ⬺ ⬻ ⬼ ⬽ ⬾ ⬿ ⭀ ⭁ ⭂ ⭃ ⭄ ⭇ ⭈ ⭉ ⭊ ⭋ ⭌ ← → ⇜ ⇝ ↜ ↝ ↩ ↪ ↫ ↬ ↼ ↽ ⇀ ⇁ ⇄ ⇆ ⇇ ⇉ ⇋ ⇌ ⇚ ⇛ ⇠ ⇢ ↷ ↶ ↺ ↻ -->" + raw"||" + raw"&&" + raw"> < >= ≥ <= ≤ == === ≡ != ≠ !== ≢ ∈ ∉ ∋ ∌ ⊆ ⊈ ⊂ ⊄ ⊊ ∝ ∊ ∍ ∥ ∦ ∷ ∺ ∻ ∽ ∾ ≁ ≃ ≂ ≄ ≅ ≆ ≇ ≈ ≉ ≊ ≋ ≌ ≍ ≎ ≐ ≑ ≒ ≓ ≖ ≗ ≘ ≙ ≚ ≛ ≜ ≝ ≞ ≟ ≣ ≦ ≧ ≨ ≩ ≪ ≫ ≬ ≭ ≮ ≯ ≰ ≱ ≲ ≳ ≴ ≵ ≶ ≷ ≸ ≹ ≺ ≻ ≼ ≽ ≾ ≿ ⊀ ⊁ ⊃ ⊅ ⊇ ⊉ ⊋ ⊏ ⊐ ⊑ ⊒ ⊜ ⊩ ⊬ ⊮ ⊰ ⊱ ⊲ ⊳ ⊴ ⊵ ⊶ ⊷ ⋍ ⋐ ⋑ ⋕ ⋖ ⋗ ⋘ ⋙ ⋚ ⋛ ⋜ ⋝ ⋞ ⋟ ⋠ ⋡ ⋢ ⋣ ⋤ ⋥ ⋦ ⋧ ⋨ ⋩ ⋪ ⋫ ⋬ ⋭ ⋲ ⋳ ⋴ ⋵ ⋶ ⋷ ⋸ ⋹ ⋺ ⋻ ⋼ ⋽ ⋾ ⋿ ⟈ ⟉ ⟒ ⦷ ⧀ ⧁ ⧡ ⧣ ⧤ ⧥ ⩦ ⩧ ⩪ ⩫ ⩬ ⩭ ⩮ ⩯ ⩰ ⩱ ⩲ ⩳ ⩵ ⩶ ⩷ ⩸ ⩹ ⩺ ⩻ ⩼ ⩽ ⩾ ⩿ ⪀ ⪁ ⪂ ⪃ ⪄ ⪅ ⪆ ⪇ ⪈ ⪉ ⪊ ⪋ ⪌ ⪍ ⪎ ⪏ ⪐ ⪑ ⪒ ⪓ ⪔ ⪕ ⪖ ⪗ ⪘ ⪙ ⪚ ⪛ ⪜ ⪝ ⪞ ⪟ ⪠ ⪡ ⪢ ⪣ ⪤ ⪥ ⪦ ⪧ ⪨ ⪩ ⪪ ⪫ ⪬ ⪭ ⪮ ⪯ ⪰ ⪱ ⪲ ⪳ ⪴ ⪵ ⪶ ⪷ ⪸ ⪹ ⪺ ⪻ ⪼ ⪽ ⪾ ⪿ ⫀ ⫁ ⫂ ⫃ ⫄ ⫅ ⫆ ⫇ ⫈ ⫉ ⫊ ⫋ ⫌ ⫍ ⫎ ⫏ ⫐ ⫑ ⫒ ⫓ ⫔ ⫕ ⫖ ⫗ ⫘ ⫙ ⫷ ⫸ ⫹ ⫺ ⊢ ⊣ ⟂ <: >:" + raw"<|" + raw"|>" + raw": .. … ⁝ ⋮ ⋱ ⋰ ⋯" + raw"$ + - ¦ | ⊕ ⊖ ⊞ ⊟ ++ ∪ ∨ ⊔ ± ∓ ∔ ∸ ≏ ⊎ ⊻ ⊽ ⋎ ⋓ ⧺ ⧻ ⨈ ⨢ ⨣ ⨤ ⨥ ⨦ ⨧ ⨨ ⨩ ⨪ ⨫ ⨬ ⨭ ⨮ ⨹ ⨺ ⩁ ⩂ ⩅ ⩊ ⩌ ⩏ ⩐ ⩒ ⩔ ⩖ ⩗ ⩛ ⩝ ⩡ ⩢ ⩣" + raw"* / ⌿ ÷ % & ⋅ ∘ × \ ∩ ∧ ⊗ ⊘ ⊙ ⊚ ⊛ ⊠ ⊡ ⊓ ∗ ∙ ∤ ⅋ ≀ ⊼ ⋄ ⋆ ⋇ ⋉ ⋊ ⋋ ⋌ ⋏ ⋒ ⟑ ⦸ ⦼ ⦾ ⦿ ⧶ ⧷ ⨇ ⨰ ⨱ ⨲ ⨳ ⨴ ⨵ ⨶ ⨷ ⨸ ⨻ ⨼ ⨽ ⩀ ⩃ ⩄ ⩋ ⩍ ⩎ ⩑ ⩓ ⩕ ⩘ ⩚ ⩜ ⩞ ⩟ ⩠ ⫛ ⊍ ▷ ⨝ ⟕ ⟖ ⟗" + raw"//" + raw"<< >> >>>" + raw"^ ↑ ↓ ⇵ ⟰ ⟱ ⤈ ⤉ ⤊ ⤋ ⤒ ⤓ ⥉ ⥌ ⥍ ⥏ ⥑ ⥔ ⥕ ⥘ ⥙ ⥜ ⥝ ⥠ ⥡ ⥣ ⥥ ⥮ ⥯ ↑ ↓" + raw"::" + raw"." + ] + allops = split(join(ops, " "), " ") + @test all(s->Base.isoperator(Symbol(s)) == Tokens.isoperator(first(collect(tokenize(s))).kind), allops) +end From 446e7b9129c9ff1357484540aa3bf9496e9bb10e Mon Sep 17 00:00:00 2001 From: ZacNugent Date: Wed, 10 Feb 2021 19:44:40 +0000 Subject: [PATCH 0166/1109] allow PRIME after END kw (JuliaLang/JuliaSyntax.jl#168) --- JuliaSyntax/src/lexer.jl | 3 ++- JuliaSyntax/test/lexer.jl | 5 +++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index 318b19b7baa8a..b9600ab45038e 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -717,7 +717,8 @@ function lex_prime(l, doemit = true) l.last_token == Tokens.RPAREN || l.last_token == Tokens.RSQUARE || l.last_token == Tokens.RBRACE || - l.last_token == Tokens.PRIME || isliteral(l.last_token) + l.last_token == Tokens.PRIME || + l.last_token == Tokens.END || isliteral(l.last_token) return emit(l, Tokens.PRIME) else readon(l) diff --git a/JuliaSyntax/test/lexer.jl b/JuliaSyntax/test/lexer.jl index a2bdc3e30c603..30b0bed43cb61 100644 --- a/JuliaSyntax/test/lexer.jl +++ b/JuliaSyntax/test/lexer.jl @@ -583,6 +583,10 @@ end @test collect(tokenize(s, Tokens.RawToken))[1].kind == Tokens.ERROR end +@testset "allow prime after end" begin + @test tok("begin end'", 4).kind === Tokens.PRIME +end + @testset "new ops" begin ops = [raw"= += -= *= /= //= \= ^= ÷= %= <<= >>= >>>= |= &= ⊻= ≔ ⩴ ≕ ~ := $=" raw"=>" @@ -605,3 +609,4 @@ end allops = split(join(ops, " "), " ") @test all(s->Base.isoperator(Symbol(s)) == Tokens.isoperator(first(collect(tokenize(s))).kind), allops) end + From 27c513adff633bdfebafd87cc3b9e76a22a66346 Mon Sep 17 00:00:00 2001 From: Sebastian Pfitzner Date: Wed, 10 Feb 2021 21:18:29 +0100 Subject: [PATCH 0167/1109] 0.5.13 --- JuliaSyntax/Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaSyntax/Project.toml b/JuliaSyntax/Project.toml index ff527193d62e1..4f9be2c012dc6 100644 --- a/JuliaSyntax/Project.toml +++ b/JuliaSyntax/Project.toml @@ -1,6 +1,6 @@ name = "Tokenize" uuid = "0796e94c-ce3b-5d07-9a54-7f471281c624" -version = "0.5.12" +version = "0.5.13" [compat] julia = "1" From 667da78d454f5b44af296c3c7f0c3c2ae2d304f0 Mon Sep 17 00:00:00 2001 From: Sebastian Pfitzner Date: Mon, 22 Mar 2021 12:55:58 +0100 Subject: [PATCH 0168/1109] improve string interpolation parsing to allow for single primes again --- JuliaSyntax/src/lexer.jl | 34 +++++++++++++++++----------------- JuliaSyntax/test/lexer.jl | 32 ++++++++++++++++++-------------- 2 files changed, 35 insertions(+), 31 deletions(-) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index b9600ab45038e..fca10e97854b9 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -53,7 +53,7 @@ function Lexer(io::IO_t, T::Type{TT} = Token) where {IO_t,TT <: AbstractToken} c3 = read(io, Char) p3 = position(io) end - + end Lexer{IO_t,T}(io, position(io), 1, 1, position(io), 1, 1, position(io), Tokens.ERROR, IOBuffer(), (c1,c2,c3), (p1,p2,p3), false, false) end @@ -280,7 +280,7 @@ function emit(l::Lexer{IO_t,RawToken}, kind::Kind, err::TokenError = Tokens.NO_E tok = RawToken(kind, (l.token_start_row, l.token_start_col), (l.current_row, l.current_col - 1), startpos(l), position(l) - 1, err, l.dotop, suffix) - + l.dotop = false l.last_token = kind readoff(l) @@ -717,7 +717,7 @@ function lex_prime(l, doemit = true) l.last_token == Tokens.RPAREN || l.last_token == Tokens.RSQUARE || l.last_token == Tokens.RBRACE || - l.last_token == Tokens.PRIME || + l.last_token == Tokens.PRIME || l.last_token == Tokens.END || isliteral(l.last_token) return emit(l, Tokens.PRIME) else @@ -818,21 +818,21 @@ function read_string(l::Lexer, kind::Tokens.Kind) return false elseif c == '(' o = 1 + l2 = deepcopy(l) while o > 0 - c = readchar(l) - eof(c) && return false - if c == '(' + prevpos = position(l2) + t = next_token(l2) + + for _ in 1:(position(l2) - prevpos) + readchar(l) + end + + if Tokens.kind(t) == Tokens.ENDMARKER + return false + elseif Tokens.kind(t) == Tokens.LPAREN o += 1 - elseif c == ')' + elseif Tokens.kind(t) == Tokens.RPAREN o -= 1 - elseif c == '"' - lex_quote(l, false) - elseif c == '`' - lex_cmd(l, false) - elseif c == '#' - lex_comment(l, false) - elseif c == '\'' - lex_prime(l, false) end end end @@ -955,7 +955,7 @@ end # A ` has been consumed function lex_cmd(l::Lexer, doemit=true) - if accept(l, '`') # + if accept(l, '`') # if accept(l, '`') # """ if read_string(l, Tokens.TRIPLE_CMD) return doemit ? emit(l, Tokens.TRIPLE_CMD) : EMPTY_TOKEN(token_type(l)) @@ -965,7 +965,7 @@ function lex_cmd(l::Lexer, doemit=true) else # empty cmd return doemit ? emit(l, Tokens.CMD) : EMPTY_TOKEN(token_type(l)) end - else + else if read_string(l, Tokens.CMD) return doemit ? emit(l, Tokens.CMD) : EMPTY_TOKEN(token_type(l)) else diff --git a/JuliaSyntax/test/lexer.jl b/JuliaSyntax/test/lexer.jl index 30b0bed43cb61..99bf3ffeb67f2 100644 --- a/JuliaSyntax/test/lexer.jl +++ b/JuliaSyntax/test/lexer.jl @@ -269,7 +269,7 @@ end "type", "using", "while"] - + @test T.kind(tok(kw)) == T.KEYWORD end end @@ -513,11 +513,11 @@ for op in ops end end -@testset "perp" begin - @test tok("1 ⟂ 2", 3).kind==T.PERP +@testset "perp" begin + @test tok("1 ⟂ 2", 3).kind==T.PERP end -@testset "outer" begin +@testset "outer" begin @test tok("outer", 1).kind==T.OUTER end @@ -537,43 +537,48 @@ end @test tok("1**2",2).token_error === Tokens.INVALID_OPERATOR end -@testset "hat suffix" begin +@testset "hat suffix" begin @test tok("ŝ", 1).kind==Tokens.IDENTIFIER @test untokenize(collect(tokenize("ŝ", Tokens.RawToken))[1], "ŝ") == "ŝ" end -@testset "suffixed op" begin +@testset "suffixed op" begin s = "+¹" @test Tokens.isoperator(tok(s, 1).kind) @test untokenize(collect(tokenize(s, Tokens.RawToken))[1], s) == s end -@testset "invalid float juxt" begin +@testset "invalid float juxt" begin s = "1.+2" @test tok(s, 1).kind == Tokens.ERROR - @test Tokens.isoperator(tok(s, 2).kind) + @test Tokens.isoperator(tok(s, 2).kind) @test (t->t.val=="1234." && t.kind == Tokens.ERROR )(tok("1234.+1")) # requires space before '.' - @test tok("1.+ ").kind == Tokens.ERROR + @test tok("1.+ ").kind == Tokens.ERROR @test tok("1.⤋").kind == Tokens.ERROR @test tok("1.?").kind == Tokens.ERROR end -@testset "interpolation of char within string" begin +@testset "interpolation of char within string" begin s = "\"\$('\"')\"" @test collect(tokenize(s))[1].kind == Tokenize.Tokens.STRING end -@testset "comments" begin +@testset "interpolation of prime within string" begin + s = "\"\$(a')\"" + @test collect(tokenize(s))[1].kind == Tokenize.Tokens.STRING +end + +@testset "comments" begin s = "#=# text=#" @test length(collect(tokenize(s, Tokens.RawToken))) == 2 end -@testset "invalid hexadecimal" begin +@testset "invalid hexadecimal" begin s = "0x." tok(s, 1).kind === Tokens.ERROR end -@testset "circ arrow right op" begin +@testset "circ arrow right op" begin s = "↻" @test collect(tokenize(s, Tokens.RawToken))[1].kind == Tokens.CIRCLE_ARROW_RIGHT end @@ -609,4 +614,3 @@ end allops = split(join(ops, " "), " ") @test all(s->Base.isoperator(Symbol(s)) == Tokens.isoperator(first(collect(tokenize(s))).kind), allops) end - From 07188fc96add3e754cb0fc373649ee0ea51728da Mon Sep 17 00:00:00 2001 From: Sebastian Pfitzner Date: Mon, 22 Mar 2021 15:25:53 +0100 Subject: [PATCH 0169/1109] make shallow copy instead --- JuliaSyntax/src/lexer.jl | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index fca10e97854b9..b45e6592240a9 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -59,6 +59,28 @@ function Lexer(io::IO_t, T::Type{TT} = Token) where {IO_t,TT <: AbstractToken} end Lexer(str::AbstractString, T::Type{TT} = Token) where TT <: AbstractToken = Lexer(IOBuffer(str), T) +function Base.copy(l::Lexer{IO_t, TT}) where IO_t where TT + return Lexer{IO_t, TT}( + l.io, + l.io_startpos, + + l.token_start_row, + l.token_start_col, + l.token_startpos, + + l.current_row, + l.current_col, + l.current_pos, + + l.last_token, + l.charstore, + l.chars, + l.charspos, + l.doread, + l.dotop + ) +end + @inline token_type(l::Lexer{IO_t, TT}) where {IO_t, TT} = TT """ @@ -818,10 +840,12 @@ function read_string(l::Lexer, kind::Tokens.Kind) return false elseif c == '(' o = 1 - l2 = deepcopy(l) + l2 = copy(l) while o > 0 prevpos = position(l2) + prevpos_io = position(l2.io) t = next_token(l2) + seek(l.io, prevpos_io) for _ in 1:(position(l2) - prevpos) readchar(l) From 10dbba7a876c364e7ae44f061b69d28cd21e36a3 Mon Sep 17 00:00:00 2001 From: Sebastian Pfitzner Date: Fri, 26 Mar 2021 12:24:02 +0100 Subject: [PATCH 0170/1109] 0.5.14 --- JuliaSyntax/Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaSyntax/Project.toml b/JuliaSyntax/Project.toml index 4f9be2c012dc6..fbc5a7ad8c976 100644 --- a/JuliaSyntax/Project.toml +++ b/JuliaSyntax/Project.toml @@ -1,6 +1,6 @@ name = "Tokenize" uuid = "0796e94c-ce3b-5d07-9a54-7f471281c624" -version = "0.5.13" +version = "0.5.14" [compat] julia = "1" From d675d75e73a3e9cde5130815f9eacda0bfaf8b2d Mon Sep 17 00:00:00 2001 From: Sebastian Pfitzner Date: Fri, 26 Mar 2021 19:55:02 +0100 Subject: [PATCH 0171/1109] fix string interpolation --- JuliaSyntax/src/lexer.jl | 2 +- JuliaSyntax/test/lexer.jl | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index b45e6592240a9..78fc2d74b0d79 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -73,7 +73,7 @@ function Base.copy(l::Lexer{IO_t, TT}) where IO_t where TT l.current_pos, l.last_token, - l.charstore, + IOBuffer(), l.chars, l.charspos, l.doread, diff --git a/JuliaSyntax/test/lexer.jl b/JuliaSyntax/test/lexer.jl index 99bf3ffeb67f2..421c3d95e8609 100644 --- a/JuliaSyntax/test/lexer.jl +++ b/JuliaSyntax/test/lexer.jl @@ -303,9 +303,11 @@ end @testset "interpolation" begin - ts = collect(tokenize(""""str: \$(g("str: \$(h("str"))"))" """)) + str = """"str: \$(g("str: \$(h("str"))"))" """ + ts = collect(tokenize(str)) @test length(ts)==3 @test ts[1].kind == Tokens.STRING + @test ts[1].val == strip(str) ts = collect(tokenize("""\"\$\"""")) @test ts[1].kind == Tokens.STRING # issue 73: From 29d2fe19507fbce4c651e29b6b3db66178e2be9a Mon Sep 17 00:00:00 2001 From: Sebastian Pfitzner Date: Fri, 26 Mar 2021 21:22:33 +0100 Subject: [PATCH 0172/1109] 0.5.15 --- JuliaSyntax/Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaSyntax/Project.toml b/JuliaSyntax/Project.toml index fbc5a7ad8c976..e2782b791c28f 100644 --- a/JuliaSyntax/Project.toml +++ b/JuliaSyntax/Project.toml @@ -1,6 +1,6 @@ name = "Tokenize" uuid = "0796e94c-ce3b-5d07-9a54-7f471281c624" -version = "0.5.14" +version = "0.5.15" [compat] julia = "1" From e68a6d38815619b8a586212d4460e3c9e61a48fa Mon Sep 17 00:00:00 2001 From: Sebastian Pfitzner Date: Sun, 28 Mar 2021 11:01:22 +0200 Subject: [PATCH 0173/1109] fix string unicode string interpolation --- JuliaSyntax/src/lexer.jl | 3 +-- JuliaSyntax/test/lexer.jl | 8 ++++++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index 78fc2d74b0d79..f53bc5894ab61 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -842,12 +842,11 @@ function read_string(l::Lexer, kind::Tokens.Kind) o = 1 l2 = copy(l) while o > 0 - prevpos = position(l2) prevpos_io = position(l2.io) t = next_token(l2) seek(l.io, prevpos_io) - for _ in 1:(position(l2) - prevpos) + while position(l) < position(l2) readchar(l) end diff --git a/JuliaSyntax/test/lexer.jl b/JuliaSyntax/test/lexer.jl index 421c3d95e8609..d556026092154 100644 --- a/JuliaSyntax/test/lexer.jl +++ b/JuliaSyntax/test/lexer.jl @@ -310,12 +310,20 @@ end @test ts[1].val == strip(str) ts = collect(tokenize("""\"\$\"""")) @test ts[1].kind == Tokens.STRING + # issue 73: t_err = tok("\"\$(fdsf\"") @test t_err.kind == Tokens.ERROR @test t_err.token_error == Tokens.EOF_STRING @test Tokenize.Tokens.startpos(t_err) == (1,1) @test Tokenize.Tokens.endpos(t_err) == (1,8) + + # issue 178: + str = """"\$uₕx \$(uₕx - ux)" """ + ts = collect(tokenize(str)) + @test length(ts)==3 + @test ts[1].kind == Tokens.STRING + @test ts[1].val == strip(str) end @testset "inferred" begin From 56047d18444f321ad91fb999c7a13d523d40cf0c Mon Sep 17 00:00:00 2001 From: Sebastian Pfitzner Date: Fri, 9 Apr 2021 11:17:58 +0200 Subject: [PATCH 0174/1109] 0.5.16 --- JuliaSyntax/Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaSyntax/Project.toml b/JuliaSyntax/Project.toml index e2782b791c28f..e7299530c8eba 100644 --- a/JuliaSyntax/Project.toml +++ b/JuliaSyntax/Project.toml @@ -1,6 +1,6 @@ name = "Tokenize" uuid = "0796e94c-ce3b-5d07-9a54-7f471281c624" -version = "0.5.15" +version = "0.5.16" [compat] julia = "1" From a76ed33f735261b4daeb43ba5cabff090763e2ba Mon Sep 17 00:00:00 2001 From: Sebastian Pfitzner Date: Thu, 10 Jun 2021 09:52:29 +0200 Subject: [PATCH 0175/1109] fix isopsuffix --- JuliaSyntax/src/utilities.jl | 20 +++++++++++++++----- JuliaSyntax/test/lexer.jl | 3 ++- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/JuliaSyntax/src/utilities.jl b/JuliaSyntax/src/utilities.jl index 614b8d7e1dba4..c5b644f047969 100644 --- a/JuliaSyntax/src/utilities.jl +++ b/JuliaSyntax/src/utilities.jl @@ -294,11 +294,22 @@ function dotop2(pc, dpc) end # suffix operators -# "₀₁₂₃₄₅₆₇₈₉₊₋₌₍₎²³¹ʰʲʳʷʸˡˢˣᴬᴮᴰᴱᴳᴴᴵᴶᴷᴸᴹᴺᴼᴾᴿᵀᵁᵂᵃᵇᵈᵉᵍᵏᵐᵒᵖᵗᵘᵛᵝᵞᵟᵠᵡᵢᵣᵤᵥᵦᵧᵨᵩᵪᶜᶠᶥᶦᶫᶰᶸᶻᶿ ⁰ⁱ⁴⁵⁶⁷⁸⁹⁺⁻⁼⁽⁾ⁿₐₑₒₓₕₖₗₘₙₚₛₜⱼⱽ′″‴‵‶‷⁗" +# https://github.com/JuliaLang/julia/blob/d7d2b0c692eb6ad409d7193ba8d9d42972cbf182/src/flisp/julia_extensions.c#L156-L174 +# +# ₀₁₂₃₄₅₆₇₈₉₊₋₌₍₎²³¹ʰʲʳʷʸˡˢˣᴬᴮᴰᴱᴳᴴᴵᴶᴷᴸᴹᴺᴼᴾᴿᵀᵁᵂᵃᵇᵈᵉᵍᵏᵐᵒᵖᵗᵘᵛᵝᵞᵟᵠᵡᵢᵣᵤᵥᵦᵧᵨᵩᵪᶜᶠᶥᶦᶫᶰᶸᶻᶿ ⁰ⁱ⁴⁵⁶⁷⁸⁹⁺⁻⁼⁽⁾ⁿₐₑₒₓₕₖₗₘₙₚₛₜⱼⱽ′″‴‵‶‷⁗ @inline function isopsuffix(c1::Char) c1 == EOF_CHAR && return false c = UInt32(c1) - 0x000000b2 <= c <= 0x000000b3 || + if (c < 0xa1 || c > 0x10ffff) + return false + end + cat = Base.Unicode.category_code(c) + if (cat == Base.Unicode.UTF8PROC_CATEGORY_MN || + cat == Base.Unicode.UTF8PROC_CATEGORY_MC || + cat == Base.Unicode.UTF8PROC_CATEGORY_ME) + return true + end + return 0x000000b2 <= c <= 0x000000b3 || c == 0x000000b9 || c == 0x000002b0 || 0x000002b2 <= c <= 0x000002b3 || @@ -338,7 +349,7 @@ end function optakessuffix(k) - (Tokens.begin_ops < k < Tokens.end_ops) && + (Tokens.begin_ops < k < Tokens.end_ops) && !(k == Tokens.DDDOT || Tokens.EQ <= k <= k == Tokens.XOR_EQ || k == Tokens.CONDITIONAL || @@ -361,7 +372,7 @@ function optakessuffix(k) k == Tokens.TRANSPOSE || k == Tokens.ANON_FUNC || Tokens.NOT_SIGN <= k <= Tokens.QUAD_ROOT - ) + ) end function is_operator_start_char(c::Char) @@ -369,4 +380,3 @@ function is_operator_start_char(c::Char) is_operator_start_char(UInt32(c)) end is_operator_start_char(u::UInt32) = u == 0x00000021 || (u == 0x00000024 || (u == 0x00000025 || (u == 0x00000026 || (u == 0x00000027 || (u == 0x0000002a || (u == 0x0000002b || (u == 0x0000002d || (u == 0x0000002e || (u == 0x0000002f || (u == 0x0000003a || (u == 0x0000003c || (u == 0x0000003d || (u == 0x0000003e || (u == 0x0000003f || (u == 0x0000005c || (u == 0x0000005e || (u == 0x00000069 || (u == 0x00000077 || (u == 0x0000007c || (u == 0x0000007e || (u == 0x000000ac || (u == 0x000000b1 || (u == 0x000000d7 || (u == 0x000000f7 || (u == 0x00002026 || (u == 0x0000205d || (u == 0x0000214b || (u == 0x00002190 || (u == 0x00002191 || (u == 0x00002192 || (u == 0x00002193 || (u == 0x00002194 || (u == 0x0000219a || (u == 0x0000219b || (u == 0x000021a0 || (u == 0x000021a3 || (u == 0x000021a6 || (u == 0x000021ae || (u == 0x000021ce || (u == 0x000021cf || (u == 0x000021d2 || (u == 0x000021d4 || (u == 0x000021f4 || (u == 0x000021f5 || (u == 0x000021f6 || (u == 0x000021f7 || (u == 0x000021f8 || (u == 0x000021f9 || (u == 0x000021fa || (u == 0x000021fb || (u == 0x000021fc || (u == 0x000021fd || (u == 0x000021fe || (u == 0x000021ff || (u == 0x00002208 || (u == 0x00002209 || (u == 0x0000220a || (u == 0x0000220b || (u == 0x0000220c || (u == 0x0000220d || (u == 0x00002213 || (u == 0x00002214 || (u == 0x00002217 || (u == 0x00002218 || (u == 0x00002219 || (u == 0x0000221a || (u == 0x0000221b || (u == 0x0000221c || (u == 0x0000221d || (u == 0x00002224 || (u == 0x00002225 || (u == 0x00002226 || (u == 0x00002227 || (u == 0x00002228 || (u == 0x00002229 || (u == 0x0000222a || (u == 0x00002237 || (u == 0x00002238 || (u == 0x0000223a || (u == 0x0000223b || (u == 0x0000223d || (u == 0x0000223e || (u == 0x00002240 || (u == 0x00002241 || (u == 0x00002242 || (u == 0x00002243 || (u == 0x00002244 || (u == 0x00002245 || (u == 0x00002246 || (u == 0x00002247 || (u == 0x00002248 || (u == 0x00002249 || (u == 0x0000224a || (u == 0x0000224b || (u == 0x0000224c || (u == 0x0000224d || (u == 0x0000224e || (u == 0x0000224f || (u == 0x00002250 || (u == 0x00002251 || (u == 0x00002252 || (u == 0x00002253 || (u == 0x00002254 || (u == 0x00002255 || (u == 0x00002256 || (u == 0x00002257 || (u == 0x00002258 || (u == 0x00002259 || (u == 0x0000225a || (u == 0x0000225b || (u == 0x0000225c || (u == 0x0000225d || (u == 0x0000225e || (u == 0x0000225f || (u == 0x00002260 || (u == 0x00002261 || (u == 0x00002262 || (u == 0x00002263 || (u == 0x00002264 || (u == 0x00002265 || (u == 0x00002266 || (u == 0x00002267 || (u == 0x00002268 || (u == 0x00002269 || (u == 0x0000226a || (u == 0x0000226b || (u == 0x0000226c || (u == 0x0000226d || (u == 0x0000226e || (u == 0x0000226f || (u == 0x00002270 || (u == 0x00002271 || (u == 0x00002272 || (u == 0x00002273 || (u == 0x00002274 || (u == 0x00002275 || (u == 0x00002276 || (u == 0x00002277 || (u == 0x00002278 || (u == 0x00002279 || (u == 0x0000227a || (u == 0x0000227b || (u == 0x0000227c || (u == 0x0000227d || (u == 0x0000227e || (u == 0x0000227f || (u == 0x00002280 || (u == 0x00002281 || (u == 0x00002282 || (u == 0x00002283 || (u == 0x00002284 || (u == 0x00002285 || (u == 0x00002286 || (u == 0x00002287 || (u == 0x00002288 || (u == 0x00002289 || (u == 0x0000228a || (u == 0x0000228b || (u == 0x0000228d || (u == 0x0000228e || (u == 0x0000228f || (u == 0x00002290 || (u == 0x00002291 || (u == 0x00002292 || (u == 0x00002293 || (u == 0x00002294 || (u == 0x00002295 || (u == 0x00002296 || (u == 0x00002297 || (u == 0x00002298 || (u == 0x00002299 || (u == 0x0000229a || (u == 0x0000229b || (u == 0x0000229c || (u == 0x0000229e || (u == 0x0000229f || (u == 0x000022a0 || (u == 0x000022a1 || (u == 0x000022a2 || (u == 0x000022a3 || (u == 0x000022a9 || (u == 0x000022ac || (u == 0x000022ae || (u == 0x000022b0 || (u == 0x000022b1 || (u == 0x000022b2 || (u == 0x000022b3 || (u == 0x000022b4 || (u == 0x000022b5 || (u == 0x000022b6 || (u == 0x000022b7 || (u == 0x000022bb || (u == 0x000022bc || (u == 0x000022bd || (u == 0x000022c4 || (u == 0x000022c5 || (u == 0x000022c6 || (u == 0x000022c7 || (u == 0x000022c9 || (u == 0x000022ca || (u == 0x000022cb || (u == 0x000022cc || (u == 0x000022cd || (u == 0x000022ce || (u == 0x000022cf || (u == 0x000022d0 || (u == 0x000022d1 || (u == 0x000022d2 || (u == 0x000022d3 || (u == 0x000022d5 || (u == 0x000022d6 || (u == 0x000022d7 || (u == 0x000022d8 || (u == 0x000022d9 || (u == 0x000022da || (u == 0x000022db || (u == 0x000022dc || (u == 0x000022dd || (u == 0x000022de || (u == 0x000022df || (u == 0x000022e0 || (u == 0x000022e1 || (u == 0x000022e2 || (u == 0x000022e3 || (u == 0x000022e4 || (u == 0x000022e5 || (u == 0x000022e6 || (u == 0x000022e7 || (u == 0x000022e8 || (u == 0x000022e9 || (u == 0x000022ea || (u == 0x000022eb || (u == 0x000022ec || (u == 0x000022ed || (u == 0x000022ee || (u == 0x000022ef || (u == 0x000022f0 || (u == 0x000022f1 || (u == 0x000022f2 || (u == 0x000022f3 || (u == 0x000022f4 || (u == 0x000022f5 || (u == 0x000022f6 || (u == 0x000022f7 || (u == 0x000022f8 || (u == 0x000022f9 || (u == 0x000022fa || (u == 0x000022fb || (u == 0x000022fc || (u == 0x000022fd || (u == 0x000022fe || (u == 0x000022ff || (u == 0x000025b7 || (u == 0x000027c2 || (u == 0x000027c8 || (u == 0x000027c9 || (u == 0x000027d1 || (u == 0x000027d2 || (u == 0x000027d5 || (u == 0x000027d6 || (u == 0x000027d7 || (u == 0x000027f0 || (u == 0x000027f1 || (u == 0x000027f5 || (u == 0x000027f6 || (u == 0x000027f7 || (u == 0x000027f9 || (u == 0x000027fa || (u == 0x000027fb || (u == 0x000027fc || (u == 0x000027fd || (u == 0x000027fe || (u == 0x000027ff || (u == 0x00002900 || (u == 0x00002901 || (u == 0x00002902 || (u == 0x00002903 || (u == 0x00002904 || (u == 0x00002905 || (u == 0x00002906 || (u == 0x00002907 || (u == 0x00002908 || (u == 0x00002909 || (u == 0x0000290a || (u == 0x0000290b || (u == 0x0000290c || (u == 0x0000290d || (u == 0x0000290e || (u == 0x0000290f || (u == 0x00002910 || (u == 0x00002911 || (u == 0x00002912 || (u == 0x00002913 || (u == 0x00002914 || (u == 0x00002915 || (u == 0x00002916 || (u == 0x00002917 || (u == 0x00002918 || (u == 0x0000291d || (u == 0x0000291e || (u == 0x0000291f || (u == 0x00002920 || (u == 0x00002944 || (u == 0x00002945 || (u == 0x00002946 || (u == 0x00002947 || (u == 0x00002948 || (u == 0x00002949 || (u == 0x0000294a || (u == 0x0000294b || (u == 0x0000294c || (u == 0x0000294d || (u == 0x0000294e || (u == 0x0000294f || (u == 0x00002950 || (u == 0x00002951 || (u == 0x00002952 || (u == 0x00002953 || (u == 0x00002954 || (u == 0x00002955 || (u == 0x00002956 || (u == 0x00002957 || (u == 0x00002958 || (u == 0x00002959 || (u == 0x0000295a || (u == 0x0000295b || (u == 0x0000295c || (u == 0x0000295d || (u == 0x0000295e || (u == 0x0000295f || (u == 0x00002960 || (u == 0x00002961 || (u == 0x00002962 || (u == 0x00002963 || (u == 0x00002964 || (u == 0x00002965 || (u == 0x00002966 || (u == 0x00002967 || (u == 0x00002968 || (u == 0x00002969 || (u == 0x0000296a || (u == 0x0000296b || (u == 0x0000296c || (u == 0x0000296d || (u == 0x0000296e || (u == 0x0000296f || (u == 0x00002970 || (u == 0x000029b7 || (u == 0x000029b8 || (u == 0x000029bc || (u == 0x000029be || (u == 0x000029bf || (u == 0x000029c0 || (u == 0x000029c1 || (u == 0x000029e1 || (u == 0x000029e3 || (u == 0x000029e4 || (u == 0x000029e5 || (u == 0x000029f4 || (u == 0x000029f6 || (u == 0x000029f7 || (u == 0x000029fa || (u == 0x000029fb || (u == 0x00002a07 || (u == 0x00002a08 || (u == 0x00002a1d || (u == 0x00002a22 || (u == 0x00002a23 || (u == 0x00002a24 || (u == 0x00002a25 || (u == 0x00002a26 || (u == 0x00002a27 || (u == 0x00002a28 || (u == 0x00002a29 || (u == 0x00002a2a || (u == 0x00002a2b || (u == 0x00002a2c || (u == 0x00002a2d || (u == 0x00002a2e || (u == 0x00002a30 || (u == 0x00002a31 || (u == 0x00002a32 || (u == 0x00002a33 || (u == 0x00002a34 || (u == 0x00002a35 || (u == 0x00002a36 || (u == 0x00002a37 || (u == 0x00002a38 || (u == 0x00002a39 || (u == 0x00002a3a || (u == 0x00002a3b || (u == 0x00002a3c || (u == 0x00002a3d || (u == 0x00002a40 || (u == 0x00002a41 || (u == 0x00002a42 || (u == 0x00002a43 || (u == 0x00002a44 || (u == 0x00002a45 || (u == 0x00002a4a || (u == 0x00002a4b || (u == 0x00002a4c || (u == 0x00002a4d || (u == 0x00002a4e || (u == 0x00002a4f || (u == 0x00002a50 || (u == 0x00002a51 || (u == 0x00002a52 || (u == 0x00002a53 || (u == 0x00002a54 || (u == 0x00002a55 || (u == 0x00002a56 || (u == 0x00002a57 || (u == 0x00002a58 || (u == 0x00002a5a || (u == 0x00002a5b || (u == 0x00002a5c || (u == 0x00002a5d || (u == 0x00002a5e || (u == 0x00002a5f || (u == 0x00002a60 || (u == 0x00002a61 || (u == 0x00002a62 || (u == 0x00002a63 || (u == 0x00002a66 || (u == 0x00002a67 || (u == 0x00002a6a || (u == 0x00002a6b || (u == 0x00002a6c || (u == 0x00002a6d || (u == 0x00002a6e || (u == 0x00002a6f || (u == 0x00002a70 || (u == 0x00002a71 || (u == 0x00002a72 || (u == 0x00002a73 || (u == 0x00002a74 || (u == 0x00002a75 || (u == 0x00002a76 || (u == 0x00002a77 || (u == 0x00002a78 || (u == 0x00002a79 || (u == 0x00002a7a || (u == 0x00002a7b || (u == 0x00002a7c || (u == 0x00002a7d || (u == 0x00002a7e || (u == 0x00002a7f || (u == 0x00002a80 || (u == 0x00002a81 || (u == 0x00002a82 || (u == 0x00002a83 || (u == 0x00002a84 || (u == 0x00002a85 || (u == 0x00002a86 || (u == 0x00002a87 || (u == 0x00002a88 || (u == 0x00002a89 || (u == 0x00002a8a || (u == 0x00002a8b || (u == 0x00002a8c || (u == 0x00002a8d || (u == 0x00002a8e || (u == 0x00002a8f || (u == 0x00002a90 || (u == 0x00002a91 || (u == 0x00002a92 || (u == 0x00002a93 || (u == 0x00002a94 || (u == 0x00002a95 || (u == 0x00002a96 || (u == 0x00002a97 || (u == 0x00002a98 || (u == 0x00002a99 || (u == 0x00002a9a || (u == 0x00002a9b || (u == 0x00002a9c || (u == 0x00002a9d || (u == 0x00002a9e || (u == 0x00002a9f || (u == 0x00002aa0 || (u == 0x00002aa1 || (u == 0x00002aa2 || (u == 0x00002aa3 || (u == 0x00002aa4 || (u == 0x00002aa5 || (u == 0x00002aa6 || (u == 0x00002aa7 || (u == 0x00002aa8 || (u == 0x00002aa9 || (u == 0x00002aaa || (u == 0x00002aab || (u == 0x00002aac || (u == 0x00002aad || (u == 0x00002aae || (u == 0x00002aaf || (u == 0x00002ab0 || (u == 0x00002ab1 || (u == 0x00002ab2 || (u == 0x00002ab3 || (u == 0x00002ab4 || (u == 0x00002ab5 || (u == 0x00002ab6 || (u == 0x00002ab7 || (u == 0x00002ab8 || (u == 0x00002ab9 || (u == 0x00002aba || (u == 0x00002abb || (u == 0x00002abc || (u == 0x00002abd || (u == 0x00002abe || (u == 0x00002abf || (u == 0x00002ac0 || (u == 0x00002ac1 || (u == 0x00002ac2 || (u == 0x00002ac3 || (u == 0x00002ac4 || (u == 0x00002ac5 || (u == 0x00002ac6 || (u == 0x00002ac7 || (u == 0x00002ac8 || (u == 0x00002ac9 || (u == 0x00002aca || (u == 0x00002acb || (u == 0x00002acc || (u == 0x00002acd || (u == 0x00002ace || (u == 0x00002acf || (u == 0x00002ad0 || (u == 0x00002ad1 || (u == 0x00002ad2 || (u == 0x00002ad3 || (u == 0x00002ad4 || (u == 0x00002ad5 || (u == 0x00002ad6 || (u == 0x00002ad7 || (u == 0x00002ad8 || (u == 0x00002ad9 || (u == 0x00002adb || (u == 0x00002af7 || (u == 0x00002af8 || (u == 0x00002af9 || (u == 0x00002afa || (u == 0x00002b30 || (u == 0x00002b31 || (u == 0x00002b32 || (u == 0x00002b33 || (u == 0x00002b34 || (u == 0x00002b35 || (u == 0x00002b36 || (u == 0x00002b37 || (u == 0x00002b38 || (u == 0x00002b39 || (u == 0x00002b3a || (u == 0x00002b3b || (u == 0x00002b3c || (u == 0x00002b3d || (u == 0x00002b3e || (u == 0x00002b3f || (u == 0x00002b40 || (u == 0x00002b41 || (u == 0x00002b42 || (u == 0x00002b43 || (u == 0x00002b44 || (u == 0x00002b47 || (u == 0x00002b48 || (u == 0x00002b49 || (u == 0x00002b4a || (u == 0x00002b4b || (u == 0x00002b4c || (u == 0x0000ffe9 || (u == 0x0000ffea || (u == 0x0000ffeb || u == 0x0000ffec))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))) - diff --git a/JuliaSyntax/test/lexer.jl b/JuliaSyntax/test/lexer.jl index d556026092154..9b991402758bb 100644 --- a/JuliaSyntax/test/lexer.jl +++ b/JuliaSyntax/test/lexer.jl @@ -310,7 +310,7 @@ end @test ts[1].val == strip(str) ts = collect(tokenize("""\"\$\"""")) @test ts[1].kind == Tokens.STRING - + # issue 73: t_err = tok("\"\$(fdsf\"") @test t_err.kind == Tokens.ERROR @@ -486,6 +486,7 @@ for op in ops str3 = "a $op b" str4 = "a .$op b" str5 = "a $(op)₁ b" + str5 = "a $(op)\U0304 b" str6 = "a .$(op)₁ b" ex1 = Meta.parse(str1, raise = false) ex2 = Meta.parse(str2, raise = false) From f925452f7261774c1cd5809e0f29819baacc1267 Mon Sep 17 00:00:00 2001 From: Sebastian Pfitzner Date: Thu, 10 Jun 2021 10:43:54 +0200 Subject: [PATCH 0176/1109] fix tests --- JuliaSyntax/src/utilities.jl | 44 ++++++++++++------------ JuliaSyntax/test/lexer.jl | 66 ++++++++++++++++-------------------- 2 files changed, 51 insertions(+), 59 deletions(-) diff --git a/JuliaSyntax/src/utilities.jl b/JuliaSyntax/src/utilities.jl index c5b644f047969..341bc059b7665 100644 --- a/JuliaSyntax/src/utilities.jl +++ b/JuliaSyntax/src/utilities.jl @@ -350,28 +350,28 @@ end function optakessuffix(k) (Tokens.begin_ops < k < Tokens.end_ops) && - !(k == Tokens.DDDOT || - Tokens.EQ <= k <= k == Tokens.XOR_EQ || - k == Tokens.CONDITIONAL || - k == Tokens.RIGHT_ARROW || - k == Tokens.LAZY_OR || - k == Tokens.LAZY_AND || - k == Tokens.ISSUBTYPE || - k == Tokens.ISSUPERTYPE || - k == Tokens.IN || - k == Tokens.ISA || - k == Tokens.COLON_EQUALS || - k == Tokens.DOUBLE_COLON_EQUAL || - k == Tokens.COLON || - k == Tokens.DDOT || - k == Tokens.EX_OR || - k == Tokens.DECLARATION || - k == Tokens.WHERE || - k == Tokens.DOT || - k == Tokens.NOT || - k == Tokens.TRANSPOSE || - k == Tokens.ANON_FUNC || - Tokens.NOT_SIGN <= k <= Tokens.QUAD_ROOT + !( + k == Tokens.DDDOT || + Tokens.begin_assignments <= k <= Tokens.end_assignments || + k == Tokens.CONDITIONAL || + k == Tokens.LAZY_OR || + k == Tokens.LAZY_AND || + k == Tokens.ISSUBTYPE || + k == Tokens.ISSUPERTYPE || + k == Tokens.IN || + k == Tokens.ISA || + k == Tokens.COLON_EQUALS || + k == Tokens.DOUBLE_COLON_EQUAL || + k == Tokens.COLON || + k == Tokens.DDOT || + k == Tokens.EX_OR || + k == Tokens.DECLARATION || + k == Tokens.WHERE || + k == Tokens.DOT || + k == Tokens.NOT || + k == Tokens.TRANSPOSE || + k == Tokens.ANON_FUNC || + Tokens.NOT_SIGN <= k <= Tokens.QUAD_ROOT ) end diff --git a/JuliaSyntax/test/lexer.jl b/JuliaSyntax/test/lexer.jl index 9b991402758bb..5627ccbbc5559 100644 --- a/JuliaSyntax/test/lexer.jl +++ b/JuliaSyntax/test/lexer.jl @@ -481,44 +481,36 @@ ops = collect(values(Main.Tokenize.Tokens.UNICODE_OPS_REVERSE)) for op in ops op in (:isa, :in, :where, Symbol('\''), :?, :(:)) && continue - str1 = "$(op)b" - str2 = ".$(op)b" - str3 = "a $op b" - str4 = "a .$op b" - str5 = "a $(op)₁ b" - str5 = "a $(op)\U0304 b" - str6 = "a .$(op)₁ b" - ex1 = Meta.parse(str1, raise = false) - ex2 = Meta.parse(str2, raise = false) - ex3 = Meta.parse(str3, raise = false) - ex4 = Meta.parse(str4, raise = false) - ex5 = Meta.parse(str5, raise = false) - ex6 = Meta.parse(str6, raise = false) - if ex1 isa Expr && ex1.head != :error # unary - t1 = collect(tokenize(str1)) - exop1 = ex1.head == :call ? ex1.args[1] : ex1.head - @test Symbol(Tokenize.Tokens.untokenize(t1[1])) == exop1 - if ex2.head != :error - t2 = collect(tokenize(str2)) - exop2 = ex2.head == :call ? ex2.args[1] : ex2.head - @test Symbol(Tokenize.Tokens.untokenize(t2[1])) == exop2 + strs1 = [ + "$(op)b", + ".$(op)b", + ] + strs2 = [ + "a $op b", + "a .$op b", + "a $(op)₁ b", + "a $(op)\U0304 b", + "a .$(op)₁ b" + ] + + for str in strs1 + expr = Meta.parse(str, raise = false) + if expr isa Expr && (expr.head != :error && expr.head != :incomplete) + tokens = collect(tokenize(str)) + exop = expr.head == :call ? expr.args[1] : expr.head + @test Symbol(Tokenize.Tokens.untokenize(tokens[1])) == exop + else + break end - elseif ex3.head != :error # binary - t3 = collect(tokenize(str3)) - exop3 = ex3.head == :call ? ex3.args[1] : ex3.head - @test Symbol(Tokenize.Tokens.untokenize(t3[3])) == exop3 - if ex4.head != :error - t4 = collect(tokenize(str4)) - exop4 = ex4.head == :call ? ex4.args[1] : ex4.head - @test Symbol(Tokenize.Tokens.untokenize(t4[3])) == exop4 - elseif ex5.head != :error - t5 = collect(tokenize(str5)) - exop5 = ex5.head == :call ? ex5.args[1] : ex5.head - @test Symbol(Tokenize.Tokens.untokenize(t5[3])) == exop5 - elseif ex6.head != :error - t6 = collect(tokenize(str6)) - exop6 = ex6.head == :call ? ex6.args[1] : ex6.head - @test Symbol(Tokenize.Tokens.untokenize(t6[3])) == exop6 + end + for str in strs2 + expr = Meta.parse(str, raise = false) + if expr isa Expr && (expr.head != :error && expr.head != :incomplete) + tokens = collect(tokenize(str)) + exop = expr.head == :call ? expr.args[1] : expr.head + @test Symbol(Tokenize.Tokens.untokenize(tokens[3])) == exop + else + break end end end From b40a05b9e0f9db79b351f2a50bdd1e43758317f4 Mon Sep 17 00:00:00 2001 From: Sebastian Pfitzner Date: Thu, 10 Jun 2021 10:48:38 +0200 Subject: [PATCH 0177/1109] prettier tests --- JuliaSyntax/test/lexer.jl | 50 +++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 28 deletions(-) diff --git a/JuliaSyntax/test/lexer.jl b/JuliaSyntax/test/lexer.jl index 5627ccbbc5559..0d0b4a62462c5 100644 --- a/JuliaSyntax/test/lexer.jl +++ b/JuliaSyntax/test/lexer.jl @@ -481,36 +481,30 @@ ops = collect(values(Main.Tokenize.Tokens.UNICODE_OPS_REVERSE)) for op in ops op in (:isa, :in, :where, Symbol('\''), :?, :(:)) && continue - strs1 = [ - "$(op)b", - ".$(op)b", - ] - strs2 = [ - "a $op b", - "a .$op b", - "a $(op)₁ b", - "a $(op)\U0304 b", - "a .$(op)₁ b" + strs = [ + 1 => [ # unary + "$(op)b", + ".$(op)b", + ], + 2 => [ # binary + "a $op b", + "a .$op b", + "a $(op)₁ b", + "a $(op)\U0304 b", + "a .$(op)₁ b" + ] ] - for str in strs1 - expr = Meta.parse(str, raise = false) - if expr isa Expr && (expr.head != :error && expr.head != :incomplete) - tokens = collect(tokenize(str)) - exop = expr.head == :call ? expr.args[1] : expr.head - @test Symbol(Tokenize.Tokens.untokenize(tokens[1])) == exop - else - break - end - end - for str in strs2 - expr = Meta.parse(str, raise = false) - if expr isa Expr && (expr.head != :error && expr.head != :incomplete) - tokens = collect(tokenize(str)) - exop = expr.head == :call ? expr.args[1] : expr.head - @test Symbol(Tokenize.Tokens.untokenize(tokens[3])) == exop - else - break + for (arity, container) in strs + for str in container + expr = Meta.parse(str, raise = false) + if expr isa Expr && (expr.head != :error && expr.head != :incomplete) + tokens = collect(tokenize(str)) + exop = expr.head == :call ? expr.args[1] : expr.head + @test Symbol(Tokenize.Tokens.untokenize(tokens[arity == 1 ? 1 : 3])) == exop + else + break + end end end end From 7cb4dda334ec36068d7971c35d943d5bd4018c81 Mon Sep 17 00:00:00 2001 From: Sebastian Pfitzner Date: Thu, 10 Jun 2021 12:00:14 +0200 Subject: [PATCH 0178/1109] fix tests on nightly --- JuliaSyntax/src/utilities.jl | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/JuliaSyntax/src/utilities.jl b/JuliaSyntax/src/utilities.jl index 341bc059b7665..a58f8f1f97433 100644 --- a/JuliaSyntax/src/utilities.jl +++ b/JuliaSyntax/src/utilities.jl @@ -354,8 +354,12 @@ function optakessuffix(k) k == Tokens.DDDOT || Tokens.begin_assignments <= k <= Tokens.end_assignments || k == Tokens.CONDITIONAL || - k == Tokens.LAZY_OR || - k == Tokens.LAZY_AND || + @static(if Meta.parse("a .&& b").args[1] == :.& + k == Tokens.LAZY_OR || + k == Tokens.LAZY_AND + else + false + end) || k == Tokens.ISSUBTYPE || k == Tokens.ISSUPERTYPE || k == Tokens.IN || From 80c2b710281cf6fbe672f2d689aaed91d3adaae3 Mon Sep 17 00:00:00 2001 From: Sebastian Pfitzner Date: Thu, 10 Jun 2021 13:25:17 +0200 Subject: [PATCH 0179/1109] actually fix tests --- JuliaSyntax/src/lexer.jl | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index f53bc5894ab61..8d926ee55cee6 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -327,7 +327,7 @@ Returns the next `Token`. function next_token(l::Lexer, start = true) start && start_token!(l) c = readchar(l) - if eof(c); + if eof(c) return emit(l, Tokens.ENDMARKER) elseif iswhitespace(c) readon(l) @@ -940,6 +940,11 @@ function lex_dot(l::Lexer) if accept(l, "=") return emit(l, Tokens.AND_EQ) else + @static if Meta.parse("a .&& b").args[1] != :.& + if accept(l, "&") + return emit(l, Tokens.LAZY_AND) + end + end return emit(l, Tokens.AND) end elseif pc =='%' @@ -950,9 +955,19 @@ function lex_dot(l::Lexer) l.dotop = true readchar(l) return lex_equal(l) - elseif pc == '|' && dpc != '|' + elseif pc == '|' + @static if Meta.parse("a .&& b").args[1] == :.& + if dpc == '|' + return emit(l, Tokens.DOT) + end + end l.dotop = true readchar(l) + @static if Meta.parse("a .&& b").args[1] != :.& + if accept(l, "|") + return emit(l, Tokens.LAZY_OR) + end + end return lex_bar(l) elseif pc == '!' && dpc == '=' l.dotop = true @@ -970,9 +985,8 @@ function lex_dot(l::Lexer) l.dotop = true readchar(l) return lex_equal(l) - else - return emit(l, Tokens.DOT) end + return emit(l, Tokens.DOT) end end From 069683e571348d4c746888bf391c6575e9279370 Mon Sep 17 00:00:00 2001 From: Sebastian Pfitzner Date: Thu, 10 Jun 2021 14:17:26 +0200 Subject: [PATCH 0180/1109] clean up code --- JuliaSyntax/src/lexer.jl | 12 +++++++++--- JuliaSyntax/src/utilities.jl | 2 +- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index 8d926ee55cee6..eeb4df0ea3785 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -1,5 +1,11 @@ module Lexers +@static if Meta.parse("a .&& b").args[1] != :.& + const CAN_DOT_LAZY_AND_OR = true +else + const CAN_DOT_LAZY_AND_OR = false +end + include("utilities.jl") import ..Tokens @@ -940,7 +946,7 @@ function lex_dot(l::Lexer) if accept(l, "=") return emit(l, Tokens.AND_EQ) else - @static if Meta.parse("a .&& b").args[1] != :.& + @static if CAN_DOT_LAZY_AND_OR if accept(l, "&") return emit(l, Tokens.LAZY_AND) end @@ -956,14 +962,14 @@ function lex_dot(l::Lexer) readchar(l) return lex_equal(l) elseif pc == '|' - @static if Meta.parse("a .&& b").args[1] == :.& + @static if !CAN_DOT_LAZY_AND_OR if dpc == '|' return emit(l, Tokens.DOT) end end l.dotop = true readchar(l) - @static if Meta.parse("a .&& b").args[1] != :.& + @static if CAN_DOT_LAZY_AND_OR if accept(l, "|") return emit(l, Tokens.LAZY_OR) end diff --git a/JuliaSyntax/src/utilities.jl b/JuliaSyntax/src/utilities.jl index a58f8f1f97433..9f7dc9af260d0 100644 --- a/JuliaSyntax/src/utilities.jl +++ b/JuliaSyntax/src/utilities.jl @@ -354,7 +354,7 @@ function optakessuffix(k) k == Tokens.DDDOT || Tokens.begin_assignments <= k <= Tokens.end_assignments || k == Tokens.CONDITIONAL || - @static(if Meta.parse("a .&& b").args[1] == :.& + @static(if !CAN_DOT_LAZY_AND_OR k == Tokens.LAZY_OR || k == Tokens.LAZY_AND else From 4a7e0afd66f69f4792ed17a3d8004c736496fb09 Mon Sep 17 00:00:00 2001 From: Gustavo Goretkin Date: Thu, 17 Jun 2021 15:12:08 -0400 Subject: [PATCH 0181/1109] 0.5.17 (JuliaLang/JuliaSyntax.jl#183) --- JuliaSyntax/Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaSyntax/Project.toml b/JuliaSyntax/Project.toml index e7299530c8eba..5ea3b29cf5800 100644 --- a/JuliaSyntax/Project.toml +++ b/JuliaSyntax/Project.toml @@ -1,6 +1,6 @@ name = "Tokenize" uuid = "0796e94c-ce3b-5d07-9a54-7f471281c624" -version = "0.5.16" +version = "0.5.17" [compat] julia = "1" From f1109b8dcc5be2179b11d98ec8e22425d1cd9969 Mon Sep 17 00:00:00 2001 From: Sebastian Pfitzner Date: Fri, 2 Jul 2021 12:17:55 +0200 Subject: [PATCH 0182/1109] add LEFT_ARROW --- JuliaSyntax/src/lexer.jl | 3 +++ JuliaSyntax/src/token_kinds.jl | 11 ++++++----- JuliaSyntax/test/lexer.jl | 3 ++- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index eeb4df0ea3785..4967f528b234e 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -496,6 +496,9 @@ function lex_less(l::Lexer) return emit(l, Tokens.ISSUBTYPE) elseif accept(l, '|') # <| return emit(l, Tokens.LPIPE) + elseif dpeekchar(l) == ('-', '-') # <-- + readchar(l); readchar(l) + return emit(l, Tokens.LEFT_ARROW) else return emit(l, Tokens.LESS) # '<' end diff --git a/JuliaSyntax/src/token_kinds.jl b/JuliaSyntax/src/token_kinds.jl index f04fa0f6a52f0..8d3f90563f369 100644 --- a/JuliaSyntax/src/token_kinds.jl +++ b/JuliaSyntax/src/token_kinds.jl @@ -118,6 +118,7 @@ # Level 3 begin_arrow, RIGHT_ARROW, # --> + LEFT_ARROW, # <-- LEFTWARDS_ARROW, # ← RIGHTWARDS_ARROW, # → LEFT_RIGHT_ARROW, # ↔ @@ -580,7 +581,7 @@ DDOT, # .. LDOTS, # … TRICOLON, # ⁝ - VDOTS, # ⋮ + VDOTS, # ⋮ DDOTS, # ⋱ ADOTS, # ⋰ CDOTS, # ⋯ @@ -1365,13 +1366,13 @@ const UNICODE_OPS = Dict{Char, Kind}( '⋅' => UNICODE_DOT, '…' => LDOTS, '⁝' => TRICOLON, -'⋮' => VDOTS, +'⋮' => VDOTS, '⋱' => DDOTS, '⋰' => ADOTS, '⋯' => CDOTS, -'↻' => CIRCLE_ARROW_RIGHT, +'↻' => CIRCLE_ARROW_RIGHT, '⇜' => LEFT_SQUIGGLE_ARROW, -'⇝' => RIGHT_SQUIGGLE_ARROW, +'⇝' => RIGHT_SQUIGGLE_ARROW, '↜' => LEFT_WAVE_ARROW, '↝' => RIGHT_WAVE_ARROW, '↩' => LEFTWARDS_ARROW_WITH_HOOK, @@ -1396,7 +1397,7 @@ const UNICODE_OPS = Dict{Char, Kind}( '↶' => CURVE_ARROW_LEFT, '↺' => CIRCLE_ARROW_LEFT, '¦' => BROKEN_BAR, -'⌿' => NOT_SLASH, +'⌿' => NOT_SLASH, '⨟' => BB_SEMI) diff --git a/JuliaSyntax/test/lexer.jl b/JuliaSyntax/test/lexer.jl index 0d0b4a62462c5..f74dc9149dad8 100644 --- a/JuliaSyntax/test/lexer.jl +++ b/JuliaSyntax/test/lexer.jl @@ -157,6 +157,7 @@ end @test tok("1\\=2", 2).kind == T.BACKSLASH_EQ @test tok("1\$=2", 2).kind == T.EX_OR_EQ @test tok("1-->2", 2).kind == T.RIGHT_ARROW + @test tok("1<--2", 2).kind == T.LEFT_ARROW @test tok("1>:2", 2).kind == T.ISSUPERTYPE end @@ -593,7 +594,7 @@ end ops = [raw"= += -= *= /= //= \= ^= ÷= %= <<= >>= >>>= |= &= ⊻= ≔ ⩴ ≕ ~ := $=" raw"=>" raw"?" - raw"← → ↔ ↚ ↛ ↞ ↠ ↢ ↣ ↦ ↤ ↮ ⇎ ⇍ ⇏ ⇐ ⇒ ⇔ ⇴ ⇶ ⇷ ⇸ ⇹ ⇺ ⇻ ⇼ ⇽ ⇾ ⇿ ⟵ ⟶ ⟷ ⟹ ⟺ ⟻ ⟼ ⟽ ⟾ ⟿ ⤀ ⤁ ⤂ ⤃ ⤄ ⤅ ⤆ ⤇ ⤌ ⤍ ⤎ ⤏ ⤐ ⤑ ⤔ ⤕ ⤖ ⤗ ⤘ ⤝ ⤞ ⤟ ⤠ ⥄ ⥅ ⥆ ⥇ ⥈ ⥊ ⥋ ⥎ ⥐ ⥒ ⥓ ⥖ ⥗ ⥚ ⥛ ⥞ ⥟ ⥢ ⥤ ⥦ ⥧ ⥨ ⥩ ⥪ ⥫ ⥬ ⥭ ⥰ ⧴ ⬱ ⬰ ⬲ ⬳ ⬴ ⬵ ⬶ ⬷ ⬸ ⬹ ⬺ ⬻ ⬼ ⬽ ⬾ ⬿ ⭀ ⭁ ⭂ ⭃ ⭄ ⭇ ⭈ ⭉ ⭊ ⭋ ⭌ ← → ⇜ ⇝ ↜ ↝ ↩ ↪ ↫ ↬ ↼ ↽ ⇀ ⇁ ⇄ ⇆ ⇇ ⇉ ⇋ ⇌ ⇚ ⇛ ⇠ ⇢ ↷ ↶ ↺ ↻ -->" + raw"← → ↔ ↚ ↛ ↞ ↠ ↢ ↣ ↦ ↤ ↮ ⇎ ⇍ ⇏ ⇐ ⇒ ⇔ ⇴ ⇶ ⇷ ⇸ ⇹ ⇺ ⇻ ⇼ ⇽ ⇾ ⇿ ⟵ ⟶ ⟷ ⟹ ⟺ ⟻ ⟼ ⟽ ⟾ ⟿ ⤀ ⤁ ⤂ ⤃ ⤄ ⤅ ⤆ ⤇ ⤌ ⤍ ⤎ ⤏ ⤐ ⤑ ⤔ ⤕ ⤖ ⤗ ⤘ ⤝ ⤞ ⤟ ⤠ ⥄ ⥅ ⥆ ⥇ ⥈ ⥊ ⥋ ⥎ ⥐ ⥒ ⥓ ⥖ ⥗ ⥚ ⥛ ⥞ ⥟ ⥢ ⥤ ⥦ ⥧ ⥨ ⥩ ⥪ ⥫ ⥬ ⥭ ⥰ ⧴ ⬱ ⬰ ⬲ ⬳ ⬴ ⬵ ⬶ ⬷ ⬸ ⬹ ⬺ ⬻ ⬼ ⬽ ⬾ ⬿ ⭀ ⭁ ⭂ ⭃ ⭄ ⭇ ⭈ ⭉ ⭊ ⭋ ⭌ ← → ⇜ ⇝ ↜ ↝ ↩ ↪ ↫ ↬ ↼ ↽ ⇀ ⇁ ⇄ ⇆ ⇇ ⇉ ⇋ ⇌ ⇚ ⇛ ⇠ ⇢ ↷ ↶ ↺ ↻ --> <--" raw"||" raw"&&" raw"> < >= ≥ <= ≤ == === ≡ != ≠ !== ≢ ∈ ∉ ∋ ∌ ⊆ ⊈ ⊂ ⊄ ⊊ ∝ ∊ ∍ ∥ ∦ ∷ ∺ ∻ ∽ ∾ ≁ ≃ ≂ ≄ ≅ ≆ ≇ ≈ ≉ ≊ ≋ ≌ ≍ ≎ ≐ ≑ ≒ ≓ ≖ ≗ ≘ ≙ ≚ ≛ ≜ ≝ ≞ ≟ ≣ ≦ ≧ ≨ ≩ ≪ ≫ ≬ ≭ ≮ ≯ ≰ ≱ ≲ ≳ ≴ ≵ ≶ ≷ ≸ ≹ ≺ ≻ ≼ ≽ ≾ ≿ ⊀ ⊁ ⊃ ⊅ ⊇ ⊉ ⊋ ⊏ ⊐ ⊑ ⊒ ⊜ ⊩ ⊬ ⊮ ⊰ ⊱ ⊲ ⊳ ⊴ ⊵ ⊶ ⊷ ⋍ ⋐ ⋑ ⋕ ⋖ ⋗ ⋘ ⋙ ⋚ ⋛ ⋜ ⋝ ⋞ ⋟ ⋠ ⋡ ⋢ ⋣ ⋤ ⋥ ⋦ ⋧ ⋨ ⋩ ⋪ ⋫ ⋬ ⋭ ⋲ ⋳ ⋴ ⋵ ⋶ ⋷ ⋸ ⋹ ⋺ ⋻ ⋼ ⋽ ⋾ ⋿ ⟈ ⟉ ⟒ ⦷ ⧀ ⧁ ⧡ ⧣ ⧤ ⧥ ⩦ ⩧ ⩪ ⩫ ⩬ ⩭ ⩮ ⩯ ⩰ ⩱ ⩲ ⩳ ⩵ ⩶ ⩷ ⩸ ⩹ ⩺ ⩻ ⩼ ⩽ ⩾ ⩿ ⪀ ⪁ ⪂ ⪃ ⪄ ⪅ ⪆ ⪇ ⪈ ⪉ ⪊ ⪋ ⪌ ⪍ ⪎ ⪏ ⪐ ⪑ ⪒ ⪓ ⪔ ⪕ ⪖ ⪗ ⪘ ⪙ ⪚ ⪛ ⪜ ⪝ ⪞ ⪟ ⪠ ⪡ ⪢ ⪣ ⪤ ⪥ ⪦ ⪧ ⪨ ⪩ ⪪ ⪫ ⪬ ⪭ ⪮ ⪯ ⪰ ⪱ ⪲ ⪳ ⪴ ⪵ ⪶ ⪷ ⪸ ⪹ ⪺ ⪻ ⪼ ⪽ ⪾ ⪿ ⫀ ⫁ ⫂ ⫃ ⫄ ⫅ ⫆ ⫇ ⫈ ⫉ ⫊ ⫋ ⫌ ⫍ ⫎ ⫏ ⫐ ⫑ ⫒ ⫓ ⫔ ⫕ ⫖ ⫗ ⫘ ⫙ ⫷ ⫸ ⫹ ⫺ ⊢ ⊣ ⟂ <: >:" From 929def584c86c5c3155917a79e92568cabc95ae8 Mon Sep 17 00:00:00 2001 From: Sebastian Pfitzner Date: Fri, 2 Jul 2021 12:22:30 +0200 Subject: [PATCH 0183/1109] add DOUBLE_ARROW --- JuliaSyntax/src/lexer.jl | 8 ++++++-- JuliaSyntax/src/token_kinds.jl | 1 + JuliaSyntax/test/lexer.jl | 3 ++- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index 4967f528b234e..c1eec0ed5c437 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -496,9 +496,13 @@ function lex_less(l::Lexer) return emit(l, Tokens.ISSUBTYPE) elseif accept(l, '|') # <| return emit(l, Tokens.LPIPE) - elseif dpeekchar(l) == ('-', '-') # <-- + elseif dpeekchar(l) == ('-', '-') # <-- or <--> readchar(l); readchar(l) - return emit(l, Tokens.LEFT_ARROW) + if accept(l, '>') + return emit(l, Tokens.DOUBLE_ARROW) + else + return emit(l, Tokens.LEFT_ARROW) + end else return emit(l, Tokens.LESS) # '<' end diff --git a/JuliaSyntax/src/token_kinds.jl b/JuliaSyntax/src/token_kinds.jl index 8d3f90563f369..89c6928b9ac93 100644 --- a/JuliaSyntax/src/token_kinds.jl +++ b/JuliaSyntax/src/token_kinds.jl @@ -119,6 +119,7 @@ begin_arrow, RIGHT_ARROW, # --> LEFT_ARROW, # <-- + DOUBLE_ARROW, # <--> LEFTWARDS_ARROW, # ← RIGHTWARDS_ARROW, # → LEFT_RIGHT_ARROW, # ↔ diff --git a/JuliaSyntax/test/lexer.jl b/JuliaSyntax/test/lexer.jl index f74dc9149dad8..0ebd36b215c87 100644 --- a/JuliaSyntax/test/lexer.jl +++ b/JuliaSyntax/test/lexer.jl @@ -158,6 +158,7 @@ end @test tok("1\$=2", 2).kind == T.EX_OR_EQ @test tok("1-->2", 2).kind == T.RIGHT_ARROW @test tok("1<--2", 2).kind == T.LEFT_ARROW + @test tok("1<-->2", 2).kind == T.DOUBLE_ARROW @test tok("1>:2", 2).kind == T.ISSUPERTYPE end @@ -594,7 +595,7 @@ end ops = [raw"= += -= *= /= //= \= ^= ÷= %= <<= >>= >>>= |= &= ⊻= ≔ ⩴ ≕ ~ := $=" raw"=>" raw"?" - raw"← → ↔ ↚ ↛ ↞ ↠ ↢ ↣ ↦ ↤ ↮ ⇎ ⇍ ⇏ ⇐ ⇒ ⇔ ⇴ ⇶ ⇷ ⇸ ⇹ ⇺ ⇻ ⇼ ⇽ ⇾ ⇿ ⟵ ⟶ ⟷ ⟹ ⟺ ⟻ ⟼ ⟽ ⟾ ⟿ ⤀ ⤁ ⤂ ⤃ ⤄ ⤅ ⤆ ⤇ ⤌ ⤍ ⤎ ⤏ ⤐ ⤑ ⤔ ⤕ ⤖ ⤗ ⤘ ⤝ ⤞ ⤟ ⤠ ⥄ ⥅ ⥆ ⥇ ⥈ ⥊ ⥋ ⥎ ⥐ ⥒ ⥓ ⥖ ⥗ ⥚ ⥛ ⥞ ⥟ ⥢ ⥤ ⥦ ⥧ ⥨ ⥩ ⥪ ⥫ ⥬ ⥭ ⥰ ⧴ ⬱ ⬰ ⬲ ⬳ ⬴ ⬵ ⬶ ⬷ ⬸ ⬹ ⬺ ⬻ ⬼ ⬽ ⬾ ⬿ ⭀ ⭁ ⭂ ⭃ ⭄ ⭇ ⭈ ⭉ ⭊ ⭋ ⭌ ← → ⇜ ⇝ ↜ ↝ ↩ ↪ ↫ ↬ ↼ ↽ ⇀ ⇁ ⇄ ⇆ ⇇ ⇉ ⇋ ⇌ ⇚ ⇛ ⇠ ⇢ ↷ ↶ ↺ ↻ --> <--" + raw"← → ↔ ↚ ↛ ↞ ↠ ↢ ↣ ↦ ↤ ↮ ⇎ ⇍ ⇏ ⇐ ⇒ ⇔ ⇴ ⇶ ⇷ ⇸ ⇹ ⇺ ⇻ ⇼ ⇽ ⇾ ⇿ ⟵ ⟶ ⟷ ⟹ ⟺ ⟻ ⟼ ⟽ ⟾ ⟿ ⤀ ⤁ ⤂ ⤃ ⤄ ⤅ ⤆ ⤇ ⤌ ⤍ ⤎ ⤏ ⤐ ⤑ ⤔ ⤕ ⤖ ⤗ ⤘ ⤝ ⤞ ⤟ ⤠ ⥄ ⥅ ⥆ ⥇ ⥈ ⥊ ⥋ ⥎ ⥐ ⥒ ⥓ ⥖ ⥗ ⥚ ⥛ ⥞ ⥟ ⥢ ⥤ ⥦ ⥧ ⥨ ⥩ ⥪ ⥫ ⥬ ⥭ ⥰ ⧴ ⬱ ⬰ ⬲ ⬳ ⬴ ⬵ ⬶ ⬷ ⬸ ⬹ ⬺ ⬻ ⬼ ⬽ ⬾ ⬿ ⭀ ⭁ ⭂ ⭃ ⭄ ⭇ ⭈ ⭉ ⭊ ⭋ ⭌ ← → ⇜ ⇝ ↜ ↝ ↩ ↪ ↫ ↬ ↼ ↽ ⇀ ⇁ ⇄ ⇆ ⇇ ⇉ ⇋ ⇌ ⇚ ⇛ ⇠ ⇢ ↷ ↶ ↺ ↻ --> <-- <-->" raw"||" raw"&&" raw"> < >= ≥ <= ≤ == === ≡ != ≠ !== ≢ ∈ ∉ ∋ ∌ ⊆ ⊈ ⊂ ⊄ ⊊ ∝ ∊ ∍ ∥ ∦ ∷ ∺ ∻ ∽ ∾ ≁ ≃ ≂ ≄ ≅ ≆ ≇ ≈ ≉ ≊ ≋ ≌ ≍ ≎ ≐ ≑ ≒ ≓ ≖ ≗ ≘ ≙ ≚ ≛ ≜ ≝ ≞ ≟ ≣ ≦ ≧ ≨ ≩ ≪ ≫ ≬ ≭ ≮ ≯ ≰ ≱ ≲ ≳ ≴ ≵ ≶ ≷ ≸ ≹ ≺ ≻ ≼ ≽ ≾ ≿ ⊀ ⊁ ⊃ ⊅ ⊇ ⊉ ⊋ ⊏ ⊐ ⊑ ⊒ ⊜ ⊩ ⊬ ⊮ ⊰ ⊱ ⊲ ⊳ ⊴ ⊵ ⊶ ⊷ ⋍ ⋐ ⋑ ⋕ ⋖ ⋗ ⋘ ⋙ ⋚ ⋛ ⋜ ⋝ ⋞ ⋟ ⋠ ⋡ ⋢ ⋣ ⋤ ⋥ ⋦ ⋧ ⋨ ⋩ ⋪ ⋫ ⋬ ⋭ ⋲ ⋳ ⋴ ⋵ ⋶ ⋷ ⋸ ⋹ ⋺ ⋻ ⋼ ⋽ ⋾ ⋿ ⟈ ⟉ ⟒ ⦷ ⧀ ⧁ ⧡ ⧣ ⧤ ⧥ ⩦ ⩧ ⩪ ⩫ ⩬ ⩭ ⩮ ⩯ ⩰ ⩱ ⩲ ⩳ ⩵ ⩶ ⩷ ⩸ ⩹ ⩺ ⩻ ⩼ ⩽ ⩾ ⩿ ⪀ ⪁ ⪂ ⪃ ⪄ ⪅ ⪆ ⪇ ⪈ ⪉ ⪊ ⪋ ⪌ ⪍ ⪎ ⪏ ⪐ ⪑ ⪒ ⪓ ⪔ ⪕ ⪖ ⪗ ⪘ ⪙ ⪚ ⪛ ⪜ ⪝ ⪞ ⪟ ⪠ ⪡ ⪢ ⪣ ⪤ ⪥ ⪦ ⪧ ⪨ ⪩ ⪪ ⪫ ⪬ ⪭ ⪮ ⪯ ⪰ ⪱ ⪲ ⪳ ⪴ ⪵ ⪶ ⪷ ⪸ ⪹ ⪺ ⪻ ⪼ ⪽ ⪾ ⪿ ⫀ ⫁ ⫂ ⫃ ⫄ ⫅ ⫆ ⫇ ⫈ ⫉ ⫊ ⫋ ⫌ ⫍ ⫎ ⫏ ⫐ ⫑ ⫒ ⫓ ⫔ ⫕ ⫖ ⫗ ⫘ ⫙ ⫷ ⫸ ⫹ ⫺ ⊢ ⊣ ⟂ <: >:" From 546b57c143dcb68b5d78021f8ba79480260f3a63 Mon Sep 17 00:00:00 2001 From: Sebastian Pfitzner Date: Fri, 2 Jul 2021 12:39:44 +0200 Subject: [PATCH 0184/1109] fix tests --- JuliaSyntax/test/lexer.jl | 38 +++++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/JuliaSyntax/test/lexer.jl b/JuliaSyntax/test/lexer.jl index 0ebd36b215c87..58c35edafd591 100644 --- a/JuliaSyntax/test/lexer.jl +++ b/JuliaSyntax/test/lexer.jl @@ -592,24 +592,28 @@ end end @testset "new ops" begin - ops = [raw"= += -= *= /= //= \= ^= ÷= %= <<= >>= >>>= |= &= ⊻= ≔ ⩴ ≕ ~ := $=" - raw"=>" - raw"?" - raw"← → ↔ ↚ ↛ ↞ ↠ ↢ ↣ ↦ ↤ ↮ ⇎ ⇍ ⇏ ⇐ ⇒ ⇔ ⇴ ⇶ ⇷ ⇸ ⇹ ⇺ ⇻ ⇼ ⇽ ⇾ ⇿ ⟵ ⟶ ⟷ ⟹ ⟺ ⟻ ⟼ ⟽ ⟾ ⟿ ⤀ ⤁ ⤂ ⤃ ⤄ ⤅ ⤆ ⤇ ⤌ ⤍ ⤎ ⤏ ⤐ ⤑ ⤔ ⤕ ⤖ ⤗ ⤘ ⤝ ⤞ ⤟ ⤠ ⥄ ⥅ ⥆ ⥇ ⥈ ⥊ ⥋ ⥎ ⥐ ⥒ ⥓ ⥖ ⥗ ⥚ ⥛ ⥞ ⥟ ⥢ ⥤ ⥦ ⥧ ⥨ ⥩ ⥪ ⥫ ⥬ ⥭ ⥰ ⧴ ⬱ ⬰ ⬲ ⬳ ⬴ ⬵ ⬶ ⬷ ⬸ ⬹ ⬺ ⬻ ⬼ ⬽ ⬾ ⬿ ⭀ ⭁ ⭂ ⭃ ⭄ ⭇ ⭈ ⭉ ⭊ ⭋ ⭌ ← → ⇜ ⇝ ↜ ↝ ↩ ↪ ↫ ↬ ↼ ↽ ⇀ ⇁ ⇄ ⇆ ⇇ ⇉ ⇋ ⇌ ⇚ ⇛ ⇠ ⇢ ↷ ↶ ↺ ↻ --> <-- <-->" - raw"||" - raw"&&" - raw"> < >= ≥ <= ≤ == === ≡ != ≠ !== ≢ ∈ ∉ ∋ ∌ ⊆ ⊈ ⊂ ⊄ ⊊ ∝ ∊ ∍ ∥ ∦ ∷ ∺ ∻ ∽ ∾ ≁ ≃ ≂ ≄ ≅ ≆ ≇ ≈ ≉ ≊ ≋ ≌ ≍ ≎ ≐ ≑ ≒ ≓ ≖ ≗ ≘ ≙ ≚ ≛ ≜ ≝ ≞ ≟ ≣ ≦ ≧ ≨ ≩ ≪ ≫ ≬ ≭ ≮ ≯ ≰ ≱ ≲ ≳ ≴ ≵ ≶ ≷ ≸ ≹ ≺ ≻ ≼ ≽ ≾ ≿ ⊀ ⊁ ⊃ ⊅ ⊇ ⊉ ⊋ ⊏ ⊐ ⊑ ⊒ ⊜ ⊩ ⊬ ⊮ ⊰ ⊱ ⊲ ⊳ ⊴ ⊵ ⊶ ⊷ ⋍ ⋐ ⋑ ⋕ ⋖ ⋗ ⋘ ⋙ ⋚ ⋛ ⋜ ⋝ ⋞ ⋟ ⋠ ⋡ ⋢ ⋣ ⋤ ⋥ ⋦ ⋧ ⋨ ⋩ ⋪ ⋫ ⋬ ⋭ ⋲ ⋳ ⋴ ⋵ ⋶ ⋷ ⋸ ⋹ ⋺ ⋻ ⋼ ⋽ ⋾ ⋿ ⟈ ⟉ ⟒ ⦷ ⧀ ⧁ ⧡ ⧣ ⧤ ⧥ ⩦ ⩧ ⩪ ⩫ ⩬ ⩭ ⩮ ⩯ ⩰ ⩱ ⩲ ⩳ ⩵ ⩶ ⩷ ⩸ ⩹ ⩺ ⩻ ⩼ ⩽ ⩾ ⩿ ⪀ ⪁ ⪂ ⪃ ⪄ ⪅ ⪆ ⪇ ⪈ ⪉ ⪊ ⪋ ⪌ ⪍ ⪎ ⪏ ⪐ ⪑ ⪒ ⪓ ⪔ ⪕ ⪖ ⪗ ⪘ ⪙ ⪚ ⪛ ⪜ ⪝ ⪞ ⪟ ⪠ ⪡ ⪢ ⪣ ⪤ ⪥ ⪦ ⪧ ⪨ ⪩ ⪪ ⪫ ⪬ ⪭ ⪮ ⪯ ⪰ ⪱ ⪲ ⪳ ⪴ ⪵ ⪶ ⪷ ⪸ ⪹ ⪺ ⪻ ⪼ ⪽ ⪾ ⪿ ⫀ ⫁ ⫂ ⫃ ⫄ ⫅ ⫆ ⫇ ⫈ ⫉ ⫊ ⫋ ⫌ ⫍ ⫎ ⫏ ⫐ ⫑ ⫒ ⫓ ⫔ ⫕ ⫖ ⫗ ⫘ ⫙ ⫷ ⫸ ⫹ ⫺ ⊢ ⊣ ⟂ <: >:" - raw"<|" - raw"|>" - raw": .. … ⁝ ⋮ ⋱ ⋰ ⋯" - raw"$ + - ¦ | ⊕ ⊖ ⊞ ⊟ ++ ∪ ∨ ⊔ ± ∓ ∔ ∸ ≏ ⊎ ⊻ ⊽ ⋎ ⋓ ⧺ ⧻ ⨈ ⨢ ⨣ ⨤ ⨥ ⨦ ⨧ ⨨ ⨩ ⨪ ⨫ ⨬ ⨭ ⨮ ⨹ ⨺ ⩁ ⩂ ⩅ ⩊ ⩌ ⩏ ⩐ ⩒ ⩔ ⩖ ⩗ ⩛ ⩝ ⩡ ⩢ ⩣" - raw"* / ⌿ ÷ % & ⋅ ∘ × \ ∩ ∧ ⊗ ⊘ ⊙ ⊚ ⊛ ⊠ ⊡ ⊓ ∗ ∙ ∤ ⅋ ≀ ⊼ ⋄ ⋆ ⋇ ⋉ ⋊ ⋋ ⋌ ⋏ ⋒ ⟑ ⦸ ⦼ ⦾ ⦿ ⧶ ⧷ ⨇ ⨰ ⨱ ⨲ ⨳ ⨴ ⨵ ⨶ ⨷ ⨸ ⨻ ⨼ ⨽ ⩀ ⩃ ⩄ ⩋ ⩍ ⩎ ⩑ ⩓ ⩕ ⩘ ⩚ ⩜ ⩞ ⩟ ⩠ ⫛ ⊍ ▷ ⨝ ⟕ ⟖ ⟗" - raw"//" - raw"<< >> >>>" - raw"^ ↑ ↓ ⇵ ⟰ ⟱ ⤈ ⤉ ⤊ ⤋ ⤒ ⤓ ⥉ ⥌ ⥍ ⥏ ⥑ ⥔ ⥕ ⥘ ⥙ ⥜ ⥝ ⥠ ⥡ ⥣ ⥥ ⥮ ⥯ ↑ ↓" - raw"::" - raw"." + ops = [ + raw"= += -= *= /= //= \= ^= ÷= %= <<= >>= >>>= |= &= ⊻= ≔ ⩴ ≕ ~ := $=" + raw"=>" + raw"?" + raw"← → ↔ ↚ ↛ ↞ ↠ ↢ ↣ ↦ ↤ ↮ ⇎ ⇍ ⇏ ⇐ ⇒ ⇔ ⇴ ⇶ ⇷ ⇸ ⇹ ⇺ ⇻ ⇼ ⇽ ⇾ ⇿ ⟵ ⟶ ⟷ ⟹ ⟺ ⟻ ⟼ ⟽ ⟾ ⟿ ⤀ ⤁ ⤂ ⤃ ⤄ ⤅ ⤆ ⤇ ⤌ ⤍ ⤎ ⤏ ⤐ ⤑ ⤔ ⤕ ⤖ ⤗ ⤘ ⤝ ⤞ ⤟ ⤠ ⥄ ⥅ ⥆ ⥇ ⥈ ⥊ ⥋ ⥎ ⥐ ⥒ ⥓ ⥖ ⥗ ⥚ ⥛ ⥞ ⥟ ⥢ ⥤ ⥦ ⥧ ⥨ ⥩ ⥪ ⥫ ⥬ ⥭ ⥰ ⧴ ⬱ ⬰ ⬲ ⬳ ⬴ ⬵ ⬶ ⬷ ⬸ ⬹ ⬺ ⬻ ⬼ ⬽ ⬾ ⬿ ⭀ ⭁ ⭂ ⭃ ⭄ ⭇ ⭈ ⭉ ⭊ ⭋ ⭌ ← → ⇜ ⇝ ↜ ↝ ↩ ↪ ↫ ↬ ↼ ↽ ⇀ ⇁ ⇄ ⇆ ⇇ ⇉ ⇋ ⇌ ⇚ ⇛ ⇠ ⇢ ↷ ↶ ↺ ↻ -->" + raw"||" + raw"&&" + raw"> < >= ≥ <= ≤ == === ≡ != ≠ !== ≢ ∈ ∉ ∋ ∌ ⊆ ⊈ ⊂ ⊄ ⊊ ∝ ∊ ∍ ∥ ∦ ∷ ∺ ∻ ∽ ∾ ≁ ≃ ≂ ≄ ≅ ≆ ≇ ≈ ≉ ≊ ≋ ≌ ≍ ≎ ≐ ≑ ≒ ≓ ≖ ≗ ≘ ≙ ≚ ≛ ≜ ≝ ≞ ≟ ≣ ≦ ≧ ≨ ≩ ≪ ≫ ≬ ≭ ≮ ≯ ≰ ≱ ≲ ≳ ≴ ≵ ≶ ≷ ≸ ≹ ≺ ≻ ≼ ≽ ≾ ≿ ⊀ ⊁ ⊃ ⊅ ⊇ ⊉ ⊋ ⊏ ⊐ ⊑ ⊒ ⊜ ⊩ ⊬ ⊮ ⊰ ⊱ ⊲ ⊳ ⊴ ⊵ ⊶ ⊷ ⋍ ⋐ ⋑ ⋕ ⋖ ⋗ ⋘ ⋙ ⋚ ⋛ ⋜ ⋝ ⋞ ⋟ ⋠ ⋡ ⋢ ⋣ ⋤ ⋥ ⋦ ⋧ ⋨ ⋩ ⋪ ⋫ ⋬ ⋭ ⋲ ⋳ ⋴ ⋵ ⋶ ⋷ ⋸ ⋹ ⋺ ⋻ ⋼ ⋽ ⋾ ⋿ ⟈ ⟉ ⟒ ⦷ ⧀ ⧁ ⧡ ⧣ ⧤ ⧥ ⩦ ⩧ ⩪ ⩫ ⩬ ⩭ ⩮ ⩯ ⩰ ⩱ ⩲ ⩳ ⩵ ⩶ ⩷ ⩸ ⩹ ⩺ ⩻ ⩼ ⩽ ⩾ ⩿ ⪀ ⪁ ⪂ ⪃ ⪄ ⪅ ⪆ ⪇ ⪈ ⪉ ⪊ ⪋ ⪌ ⪍ ⪎ ⪏ ⪐ ⪑ ⪒ ⪓ ⪔ ⪕ ⪖ ⪗ ⪘ ⪙ ⪚ ⪛ ⪜ ⪝ ⪞ ⪟ ⪠ ⪡ ⪢ ⪣ ⪤ ⪥ ⪦ ⪧ ⪨ ⪩ ⪪ ⪫ ⪬ ⪭ ⪮ ⪯ ⪰ ⪱ ⪲ ⪳ ⪴ ⪵ ⪶ ⪷ ⪸ ⪹ ⪺ ⪻ ⪼ ⪽ ⪾ ⪿ ⫀ ⫁ ⫂ ⫃ ⫄ ⫅ ⫆ ⫇ ⫈ ⫉ ⫊ ⫋ ⫌ ⫍ ⫎ ⫏ ⫐ ⫑ ⫒ ⫓ ⫔ ⫕ ⫖ ⫗ ⫘ ⫙ ⫷ ⫸ ⫹ ⫺ ⊢ ⊣ ⟂ <: >:" + raw"<|" + raw"|>" + raw": .. … ⁝ ⋮ ⋱ ⋰ ⋯" + raw"$ + - ¦ | ⊕ ⊖ ⊞ ⊟ ++ ∪ ∨ ⊔ ± ∓ ∔ ∸ ≏ ⊎ ⊻ ⊽ ⋎ ⋓ ⧺ ⧻ ⨈ ⨢ ⨣ ⨤ ⨥ ⨦ ⨧ ⨨ ⨩ ⨪ ⨫ ⨬ ⨭ ⨮ ⨹ ⨺ ⩁ ⩂ ⩅ ⩊ ⩌ ⩏ ⩐ ⩒ ⩔ ⩖ ⩗ ⩛ ⩝ ⩡ ⩢ ⩣" + raw"* / ⌿ ÷ % & ⋅ ∘ × \ ∩ ∧ ⊗ ⊘ ⊙ ⊚ ⊛ ⊠ ⊡ ⊓ ∗ ∙ ∤ ⅋ ≀ ⊼ ⋄ ⋆ ⋇ ⋉ ⋊ ⋋ ⋌ ⋏ ⋒ ⟑ ⦸ ⦼ ⦾ ⦿ ⧶ ⧷ ⨇ ⨰ ⨱ ⨲ ⨳ ⨴ ⨵ ⨶ ⨷ ⨸ ⨻ ⨼ ⨽ ⩀ ⩃ ⩄ ⩋ ⩍ ⩎ ⩑ ⩓ ⩕ ⩘ ⩚ ⩜ ⩞ ⩟ ⩠ ⫛ ⊍ ▷ ⨝ ⟕ ⟖ ⟗" + raw"//" + raw"<< >> >>>" + raw"^ ↑ ↓ ⇵ ⟰ ⟱ ⤈ ⤉ ⤊ ⤋ ⤒ ⤓ ⥉ ⥌ ⥍ ⥏ ⥑ ⥔ ⥕ ⥘ ⥙ ⥜ ⥝ ⥠ ⥡ ⥣ ⥥ ⥮ ⥯ ↑ ↓" + raw"::" + raw"." ] + if VERSION >= v"1.6.0" + push!(ops, raw"<-- <-->") + end allops = split(join(ops, " "), " ") @test all(s->Base.isoperator(Symbol(s)) == Tokens.isoperator(first(collect(tokenize(s))).kind), allops) end From c26fdfe1cd5d26a8410575aee056aae696aab628 Mon Sep 17 00:00:00 2001 From: Sebastian Pfitzner Date: Fri, 2 Jul 2021 12:42:54 +0200 Subject: [PATCH 0185/1109] add additional operator suffixes --- JuliaSyntax/src/utilities.jl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/JuliaSyntax/src/utilities.jl b/JuliaSyntax/src/utilities.jl index 9f7dc9af260d0..98afe0c714d36 100644 --- a/JuliaSyntax/src/utilities.jl +++ b/JuliaSyntax/src/utilities.jl @@ -297,6 +297,7 @@ end # https://github.com/JuliaLang/julia/blob/d7d2b0c692eb6ad409d7193ba8d9d42972cbf182/src/flisp/julia_extensions.c#L156-L174 # # ₀₁₂₃₄₅₆₇₈₉₊₋₌₍₎²³¹ʰʲʳʷʸˡˢˣᴬᴮᴰᴱᴳᴴᴵᴶᴷᴸᴹᴺᴼᴾᴿᵀᵁᵂᵃᵇᵈᵉᵍᵏᵐᵒᵖᵗᵘᵛᵝᵞᵟᵠᵡᵢᵣᵤᵥᵦᵧᵨᵩᵪᶜᶠᶥᶦᶫᶰᶸᶻᶿ ⁰ⁱ⁴⁵⁶⁷⁸⁹⁺⁻⁼⁽⁾ⁿₐₑₒₓₕₖₗₘₙₚₛₜⱼⱽ′″‴‵‶‷⁗ + @inline function isopsuffix(c1::Char) c1 == EOF_CHAR && return false c = UInt32(c1) @@ -344,7 +345,8 @@ end 0x00002074 <= c <= 0x0000208e || 0x00002090 <= c <= 0x00002093 || 0x00002095 <= c <= 0x0000209c || - 0x00002c7c <= c <= 0x00002c7d + 0x00002c7c <= c <= 0x00002c7d || + 0x0000a71b <= c <= 0x0000a71d end From 5ae57d3aa2a328fbaec3ceebec5a1082c6e2535c Mon Sep 17 00:00:00 2001 From: Sebastian Pfitzner Date: Fri, 2 Jul 2021 12:47:20 +0200 Subject: [PATCH 0186/1109] lex all of Base in tests --- JuliaSyntax/test/lex_yourself.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/JuliaSyntax/test/lex_yourself.jl b/JuliaSyntax/test/lex_yourself.jl index 73e99d73cdc83..bb88a31cfec1d 100644 --- a/JuliaSyntax/test/lex_yourself.jl +++ b/JuliaSyntax/test/lex_yourself.jl @@ -54,10 +54,11 @@ end testall(joinpath(PKGPATH, "benchmark")) testall(joinpath(PKGPATH, "src")) testall(joinpath(PKGPATH, "test")) +testall(joinpath(Sys.BINDIR, Base.DATAROOTDIR)) println("Lexed ", tot_files, " files in ", @sprintf("%3.4f", tot_time), " seconds with a total of ", tot_tokens, " tokens with ", tot_errors, " errors") @test tot_errors == 0 -end # testset \ No newline at end of file +end # testset From 2b43efdcc6b96fba961f25e7199d14959e752e79 Mon Sep 17 00:00:00 2001 From: Sebastian Pfitzner Date: Mon, 5 Jul 2021 16:01:13 +0200 Subject: [PATCH 0187/1109] v0.5.18 --- JuliaSyntax/Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaSyntax/Project.toml b/JuliaSyntax/Project.toml index 5ea3b29cf5800..ae15bda992d7b 100644 --- a/JuliaSyntax/Project.toml +++ b/JuliaSyntax/Project.toml @@ -1,6 +1,6 @@ name = "Tokenize" uuid = "0796e94c-ce3b-5d07-9a54-7f471281c624" -version = "0.5.17" +version = "0.5.18" [compat] julia = "1" From f229b16a2d1ad30484d2fd78bc93f397882cf833 Mon Sep 17 00:00:00 2001 From: ZacNugent Date: Tue, 24 Aug 2021 08:27:17 +0100 Subject: [PATCH 0188/1109] Simplify lex_identifier (JuliaLang/JuliaSyntax.jl#188) --- JuliaSyntax/src/_precompile.jl | 12 -- JuliaSyntax/src/lexer.jl | 360 ++++++--------------------------- 2 files changed, 67 insertions(+), 305 deletions(-) diff --git a/JuliaSyntax/src/_precompile.jl b/JuliaSyntax/src/_precompile.jl index cb813a4bca49b..f5797cf011efa 100644 --- a/JuliaSyntax/src/_precompile.jl +++ b/JuliaSyntax/src/_precompile.jl @@ -48,14 +48,6 @@ function _precompile_() precompile(Tokenize.Lexers.start_token!, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) precompile(Tokenize.Lexers.start_token!, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.RawToken},)) - precompile(Tokenize.Lexers.tryread, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, Tuple{Char}, Tokenize.Tokens.Kind,Char)) - precompile(Tokenize.Lexers.tryread, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, Tuple{Char,Char}, Tokenize.Tokens.Kind,Char)) - precompile(Tokenize.Lexers.tryread, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, Tuple{Char,Char,Char}, Tokenize.Tokens.Kind,Char)) - precompile(Tokenize.Lexers.tryread, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, Tuple{Char,Char,Char,Char}, Tokenize.Tokens.Kind,Char)) - precompile(Tokenize.Lexers.tryread, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, Tuple{Char,Char,Char,Char,Char}, Tokenize.Tokens.Kind,Char)) - precompile(Tokenize.Lexers.tryread, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, Tuple{Char,Char,Char,Char,Char,Char}, Tokenize.Tokens.Kind,Char)) - precompile(Tokenize.Lexers.tryread, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, Tuple{Char,Char,Char,Char,Char,Char,Char}, Tokenize.Tokens.Kind,Char)) - precompile(Tokenize.Lexers.tryread, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, Tuple{Char,Char,Char,Char,Char,Char,Char,Char}, Tokenize.Tokens.Kind,Char)) precompile(Tokenize.Lexers.lex_greater, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) @@ -85,7 +77,6 @@ function _precompile_() precompile(Tokenize.Lexers.lex_whitespace, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) precompile(Tokenize.Lexers.read_string, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, Tokenize.Tokens.Kind,)) - precompile(Tokenize.Lexers.readrest, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},Char)) precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, Char,)) precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, String,)) @@ -95,13 +86,10 @@ function _precompile_() precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},typeof(Tokenize.Lexers.ishex),)) precompile(Tokenize.Lexers.accept_batch, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, typeof(Tokenize.Lexers.iswhitespace),)) precompile(Tokenize.Lexers.accept_batch, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, typeof(Tokenize.Lexers.isdigit),)) - precompile(Tokenize.Lexers._doret, (Char, Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) - precompile(Tokenize.Lexers._doret, (Char, Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.RawToken},)) precompile(Tokenize.Lexers.accept_batch, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.RawToken}, typeof(Tokenize.Lexers.iswhitespace),)) precompile(Tokenize.Lexers.accept_batch, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.RawToken}, typeof(Tokenize.Lexers.isdigit),)) precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.RawToken}, Char,)) - precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.RawToken}, Function,)) precompile(Tokenize.Lexers.readchar, (GenericIOBuffer{Array{UInt8, 1}},)) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index c1eec0ed5c437..58a04d144195f 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -224,7 +224,6 @@ function readon(l::Lexer{I,Token}) where {I <: IO} end write(l.charstore, l.chars[1]) l.doread = true - return l.chars[1] end readoff(l::Lexer{I,RawToken}) where {I <: IO} = l.chars[1] @@ -336,7 +335,6 @@ function next_token(l::Lexer, start = true) if eof(c) return emit(l, Tokens.ENDMARKER) elseif iswhitespace(c) - readon(l) return lex_whitespace(l) elseif c == '[' return emit(l, Tokens.LSQUARE) @@ -369,7 +367,6 @@ function next_token(l::Lexer, start = true) elseif c == '~' return emit(l, Tokens.APPROX) elseif c == '#' - readon(l) return lex_comment(l) elseif c == '=' return lex_equal(l) @@ -390,7 +387,6 @@ function next_token(l::Lexer, start = true) elseif c == '÷' return lex_division(l) elseif c == '"' - readon(l) return lex_quote(l); elseif c == '%' return lex_percent(l); @@ -405,13 +401,10 @@ function next_token(l::Lexer, start = true) elseif c == '-' return lex_minus(l); elseif c == '`' - readon(l) return lex_cmd(l); elseif is_identifier_start_char(c) - readon(l) return lex_identifier(l, c) elseif isdigit(c) - readon(l) return lex_digit(l, Tokens.INTEGER) elseif (k = get(UNICODE_OPS, c, Tokens.ERROR)) != Tokens.ERROR return emit(l, k) @@ -423,11 +416,13 @@ end # Lex whitespace, a whitespace char has been consumed function lex_whitespace(l::Lexer) + readon(l) accept_batch(l, iswhitespace) return emit(l, Tokens.WHITESPACE) end function lex_comment(l::Lexer, doemit=true) + readon(l) if peekchar(l) != '=' while true pc = peekchar(l) @@ -643,6 +638,7 @@ end # A digit has been consumed function lex_digit(l::Lexer, kind) + readon(l) accept_number(l, isdigit) pc,ppc = dpeekchar(l) if pc == '.' @@ -796,6 +792,7 @@ end # Parse a token starting with a quote. # A '"' has been consumed function lex_quote(l::Lexer, doemit=true) + readon(l) if accept(l, '"') # "" if accept(l, '"') # """ if read_string(l, Tokens.TRIPLE_STRING) @@ -908,7 +905,6 @@ function lex_dot(l::Lexer) return emit(l, Tokens.DDOT) end elseif Base.isdigit(peekchar(l)) - readon(l) return lex_digit(l, Tokens.FLOAT) else pc, dpc = dpeekchar(l) @@ -1005,6 +1001,7 @@ end # A ` has been consumed function lex_cmd(l::Lexer, doemit=true) + readon(l) if accept(l, '`') # if accept(l, '`') # """ if read_string(l, Tokens.TRIPLE_CMD) @@ -1024,305 +1021,82 @@ function lex_cmd(l::Lexer, doemit=true) end end -function tryread(l, str, k, c) - for s in str - c = peekchar(l) - if c != s - if !is_identifier_char(c) - return emit(l, IDENTIFIER) - end - return readrest(l, c) - else - readchar(l) - end - end - if is_identifier_char(peekchar(l)) - return readrest(l, c) +function lex_identifier(l::Lexer{IO_t,T}, c) where {IO_t,T} + if T == Token + readon(l) end - return emit(l, k) -end - -function readrest(l, c) + cnt = 1 + h = simple_hash(Int(c), cnt, 1) while true pc, ppc = dpeekchar(l) if !is_identifier_char(pc) || (pc == '!' && ppc == '=') break end c = readchar(l) + cnt += 1 + h = simple_hash(Int(c), cnt, h) end - return emit(l, IDENTIFIER) + return emit(l, get(kw_hash, h, IDENTIFIER)) end - -function _doret(l, c) - if !is_identifier_char(c) - return emit(l, IDENTIFIER) - else - return readrest(l, c) - end +function simple_hash(c, cnt, h) + h = h*c + c + cnt end -function lex_identifier(l, c) - if c == 'a' - return tryread(l, ('b', 's', 't', 'r', 'a', 'c', 't'), ABSTRACT, c) - elseif c == 'b' - c = peekchar(l) - if c == 'a' - c = readchar(l) - return tryread(l, ('r', 'e', 'm', 'o', 'd', 'u', 'l', 'e'), BAREMODULE, c) - elseif c == 'e' - c = readchar(l) - return tryread(l, ('g', 'i', 'n'), BEGIN, c) - elseif c == 'r' - c = readchar(l) - return tryread(l, ('e', 'a', 'k'), BREAK, c) - else - return _doret(l, c) - end - elseif c == 'c' - c = peekchar(l) - if c == 'a' - c = readchar(l) - return tryread(l, ('t', 'c', 'h'), CATCH, c) - elseif c == 'o' - readchar(l) - c = peekchar(l) - if c == 'n' - readchar(l) - c = peekchar(l) - if c == 's' - readchar(l) - c = peekchar(l) - return tryread(l, ('t',), CONST, c) - elseif c == 't' - readchar(l) - c = peekchar(l) - return tryread(l, ('i', 'n', 'u', 'e'), CONTINUE, c) - else - return _doret(l, c) - end - else - return _doret(l, c) - end - else - return _doret(l, c) - end - elseif c == 'd' - return tryread(l, ('o'), DO, c) - elseif c == 'e' - c = peekchar(l) - if c == 'l' - readchar(l) - c = peekchar(l) - if c == 's' - readchar(l) - c = peekchar(l) - if c == 'e' - readchar(l) - c = peekchar(l) - if !is_identifier_char(c) - return emit(l, ELSE) - elseif c == 'i' - c = readchar(l) - return tryread(l, ('f'), ELSEIF ,c) - else - return _doret(l, c) - end - else - return _doret(l, c) - end - else - return _doret(l, c) - end - elseif c == 'n' - c = readchar(l) - return tryread(l, ('d'), END, c) - elseif c == 'x' - c = readchar(l) - return tryread(l, ('p', 'o', 'r', 't'), EXPORT, c) - else - return _doret(l, c) - end - elseif c == 'f' - c = peekchar(l) - if c == 'a' - c = readchar(l) - return tryread(l, ('l', 's', 'e'), FALSE, c) - elseif c == 'i' - c = readchar(l) - return tryread(l, ('n', 'a', 'l', 'l', 'y'), FINALLY, c) - elseif c == 'o' - c = readchar(l) - return tryread(l, ('r'), FOR, c) - elseif c == 'u' - c = readchar(l) - return tryread(l, ('n', 'c', 't', 'i', 'o', 'n'), FUNCTION, c) - else - return _doret(l, c) - end - elseif c == 'g' - return tryread(l, ('l', 'o', 'b', 'a', 'l'), GLOBAL, c) - elseif c == 'i' - c = peekchar(l) - if c == 'f' - readchar(l) - c = peekchar(l) - if !is_identifier_char(c) - return emit(l, IF) - else - return readrest(l, c) - end - elseif c == 'm' - readchar(l) - c = peekchar(l) - if c == 'p' - readchar(l) - c = peekchar(l) - if c == 'o' - readchar(l) - c = peekchar(l) - if c == 'r' - readchar(l) - c = peekchar(l) - if c == 't' - readchar(l) - c = peekchar(l) - if !is_identifier_char(c) - return emit(l, IMPORT) - elseif c == 'a' - c = readchar(l) - return tryread(l, ('l','l'), IMPORTALL, c) - else - return _doret(l, c) - end - else - return _doret(l, c) - end - else - return _doret(l, c) - end - else - return _doret(l, c) - end - else - return _doret(l, c) - end - elseif c == 'n' - readchar(l) - c = peekchar(l) - if !is_identifier_char(c) - return emit(l, IN) - else - return readrest(l, c) - end - elseif (@static VERSION >= v"0.6.0-dev.1471" ? true : false) && c == 's' - c = readchar(l) - return tryread(l, ('a'), ISA, c) - else - return _doret(l, c) - end - elseif c == 'l' - c = peekchar(l) - if c == 'e' - readchar(l) - return tryread(l, ('t'), LET, c) - elseif c == 'o' - readchar(l) - return tryread(l, ('c', 'a', 'l'), LOCAL, c) - else - return _doret(l, c) - end - elseif c == 'm' - c = peekchar(l) - if c == 'a' - c = readchar(l) - return tryread(l, ('c', 'r', 'o'), MACRO, c) - elseif c == 'o' - c = readchar(l) - return tryread(l, ('d', 'u', 'l', 'e'), MODULE, c) - elseif c == 'u' - c = readchar(l) - return tryread(l, ('t', 'a', 'b', 'l', 'e'), MUTABLE, c) - else - return _doret(l, c) - end - elseif c == 'o' - return tryread(l, ('u', 't', 'e', 'r'), OUTER, c) - elseif c == 'p' - return tryread(l, ('r', 'i', 'm', 'i', 't', 'i', 'v', 'e'), PRIMITIVE, c) - elseif c == 'q' - return tryread(l, ('u', 'o', 't', 'e'), QUOTE, c) - elseif c == 'r' - return tryread(l, ('e', 't', 'u', 'r', 'n'), RETURN, c) - elseif c == 's' - return tryread(l, ('t', 'r', 'u', 'c', 't'), STRUCT, c) - elseif c == 't' - c = peekchar(l) - if c == 'r' - readchar(l) - c = peekchar(l) - if c == 'u' - c = readchar(l) - return tryread(l, ('e'), TRUE, c) - elseif c == 'y' - readchar(l) - c = peekchar(l) - if !is_identifier_char(c) - return emit(l, TRY) - else - c = readchar(l) - return _doret(l, c) - end - else - return _doret(l, c) - end - elseif c == 'y' - readchar(l) - c = peekchar(l) - if c == 'p' - readchar(l) - c = peekchar(l) - if c == 'e' - readchar(l) - c = peekchar(l) - if !is_identifier_char(c) - return emit(l, TYPE) - else - c = readchar(l) - return _doret(l, c) - end - else - return _doret(l, c) - end - else - return _doret(l, c) - end - else - return _doret(l, c) - end - elseif c == 'u' - return tryread(l, ('s', 'i', 'n', 'g'), USING, c) - elseif c == 'w' - c = peekchar(l) - if c == 'h' - readchar(l) - c = peekchar(l) - if c == 'e' - c = readchar(l) - return tryread(l, ('r', 'e'), WHERE, c) - elseif c == 'i' - c = readchar(l) - return tryread(l, ('l', 'e'), WHILE, c) - else - return _doret(l, c) - end - else - return _doret(l, c) - end - else - return _doret(l, c) +function simple_hash(str) + ind = 1 + cnt = 1 + h = 1 + while ind <= length(str) + h = simple_hash(Int(str[ind]), cnt, h) + cnt += 1 + ind = nextind(str, ind) end + h end +kws = [ +Tokens.ABSTRACT, +Tokens.BAREMODULE, +Tokens.BEGIN, +Tokens.BREAK, +Tokens.CATCH, +Tokens.CONST, +Tokens.CONTINUE, +Tokens.DO, +Tokens.ELSE, +Tokens.ELSEIF, +Tokens.END, +Tokens.EXPORT, +Tokens.FINALLY, +Tokens.FOR, +Tokens.FUNCTION, +Tokens.GLOBAL, +Tokens.IF, +Tokens.IMPORT, +Tokens.IMPORTALL, +Tokens.LET, +Tokens.LOCAL, +Tokens.MACRO, +Tokens.MODULE, +Tokens.MUTABLE, +Tokens.OUTER, +Tokens.PRIMITIVE, +Tokens.QUOTE, +Tokens.RETURN, +Tokens.STRUCT, +Tokens.TRY, +Tokens.TYPE, +Tokens.USING, +Tokens.WHILE, +Tokens.IN, +Tokens.ISA, +Tokens.WHERE, +Tokens.TRUE, +Tokens.FALSE, +] + +const kw_hash = Dict(simple_hash(lowercase(string(kw))) => kw for kw in kws) + end # module From c5fb880d0f3aae4746bf71721b8e8ddbd18df044 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Tue, 24 Aug 2021 09:27:50 +0200 Subject: [PATCH 0189/1109] change optimization level to 1 (JuliaLang/JuliaSyntax.jl#187) --- JuliaSyntax/src/Tokenize.jl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/JuliaSyntax/src/Tokenize.jl b/JuliaSyntax/src/Tokenize.jl index 8e7f0b4ba6d32..8cf3e8b5f6533 100644 --- a/JuliaSyntax/src/Tokenize.jl +++ b/JuliaSyntax/src/Tokenize.jl @@ -1,5 +1,9 @@ module Tokenize +if isdefined(Base, :Experimental) && isdefined(Base.Experimental, Symbol("@optlevel")) + @eval Base.Experimental.@optlevel 1 +end + include("token.jl") include("lexer.jl") From 5b457495e4c8de99fd62f8f433dcc187defbe242 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Tue, 24 Aug 2021 09:28:19 +0200 Subject: [PATCH 0190/1109] Bump version --- JuliaSyntax/Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaSyntax/Project.toml b/JuliaSyntax/Project.toml index ae15bda992d7b..1f0b6a7815e31 100644 --- a/JuliaSyntax/Project.toml +++ b/JuliaSyntax/Project.toml @@ -1,6 +1,6 @@ name = "Tokenize" uuid = "0796e94c-ce3b-5d07-9a54-7f471281c624" -version = "0.5.18" +version = "0.5.19" [compat] julia = "1" From 7d6f57d317d1bd5207ddd53f8912b91091020891 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Wed, 25 Aug 2021 07:01:19 +0200 Subject: [PATCH 0191/1109] Revert "Simplify lex_identifier (JuliaLang/JuliaSyntax.jl#188)" (JuliaLang/JuliaSyntax.jl#190) This reverts commit f229b16a2d1ad30484d2fd78bc93f397882cf833. --- JuliaSyntax/src/_precompile.jl | 12 ++ JuliaSyntax/src/lexer.jl | 360 +++++++++++++++++++++++++++------ 2 files changed, 305 insertions(+), 67 deletions(-) diff --git a/JuliaSyntax/src/_precompile.jl b/JuliaSyntax/src/_precompile.jl index f5797cf011efa..cb813a4bca49b 100644 --- a/JuliaSyntax/src/_precompile.jl +++ b/JuliaSyntax/src/_precompile.jl @@ -48,6 +48,14 @@ function _precompile_() precompile(Tokenize.Lexers.start_token!, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) precompile(Tokenize.Lexers.start_token!, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.RawToken},)) + precompile(Tokenize.Lexers.tryread, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, Tuple{Char}, Tokenize.Tokens.Kind,Char)) + precompile(Tokenize.Lexers.tryread, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, Tuple{Char,Char}, Tokenize.Tokens.Kind,Char)) + precompile(Tokenize.Lexers.tryread, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, Tuple{Char,Char,Char}, Tokenize.Tokens.Kind,Char)) + precompile(Tokenize.Lexers.tryread, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, Tuple{Char,Char,Char,Char}, Tokenize.Tokens.Kind,Char)) + precompile(Tokenize.Lexers.tryread, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, Tuple{Char,Char,Char,Char,Char}, Tokenize.Tokens.Kind,Char)) + precompile(Tokenize.Lexers.tryread, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, Tuple{Char,Char,Char,Char,Char,Char}, Tokenize.Tokens.Kind,Char)) + precompile(Tokenize.Lexers.tryread, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, Tuple{Char,Char,Char,Char,Char,Char,Char}, Tokenize.Tokens.Kind,Char)) + precompile(Tokenize.Lexers.tryread, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, Tuple{Char,Char,Char,Char,Char,Char,Char,Char}, Tokenize.Tokens.Kind,Char)) precompile(Tokenize.Lexers.lex_greater, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) @@ -77,6 +85,7 @@ function _precompile_() precompile(Tokenize.Lexers.lex_whitespace, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) precompile(Tokenize.Lexers.read_string, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, Tokenize.Tokens.Kind,)) + precompile(Tokenize.Lexers.readrest, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},Char)) precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, Char,)) precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, String,)) @@ -86,10 +95,13 @@ function _precompile_() precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},typeof(Tokenize.Lexers.ishex),)) precompile(Tokenize.Lexers.accept_batch, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, typeof(Tokenize.Lexers.iswhitespace),)) precompile(Tokenize.Lexers.accept_batch, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, typeof(Tokenize.Lexers.isdigit),)) + precompile(Tokenize.Lexers._doret, (Char, Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) + precompile(Tokenize.Lexers._doret, (Char, Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.RawToken},)) precompile(Tokenize.Lexers.accept_batch, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.RawToken}, typeof(Tokenize.Lexers.iswhitespace),)) precompile(Tokenize.Lexers.accept_batch, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.RawToken}, typeof(Tokenize.Lexers.isdigit),)) precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.RawToken}, Char,)) + precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.RawToken}, Function,)) precompile(Tokenize.Lexers.readchar, (GenericIOBuffer{Array{UInt8, 1}},)) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index 58a04d144195f..c1eec0ed5c437 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -224,6 +224,7 @@ function readon(l::Lexer{I,Token}) where {I <: IO} end write(l.charstore, l.chars[1]) l.doread = true + return l.chars[1] end readoff(l::Lexer{I,RawToken}) where {I <: IO} = l.chars[1] @@ -335,6 +336,7 @@ function next_token(l::Lexer, start = true) if eof(c) return emit(l, Tokens.ENDMARKER) elseif iswhitespace(c) + readon(l) return lex_whitespace(l) elseif c == '[' return emit(l, Tokens.LSQUARE) @@ -367,6 +369,7 @@ function next_token(l::Lexer, start = true) elseif c == '~' return emit(l, Tokens.APPROX) elseif c == '#' + readon(l) return lex_comment(l) elseif c == '=' return lex_equal(l) @@ -387,6 +390,7 @@ function next_token(l::Lexer, start = true) elseif c == '÷' return lex_division(l) elseif c == '"' + readon(l) return lex_quote(l); elseif c == '%' return lex_percent(l); @@ -401,10 +405,13 @@ function next_token(l::Lexer, start = true) elseif c == '-' return lex_minus(l); elseif c == '`' + readon(l) return lex_cmd(l); elseif is_identifier_start_char(c) + readon(l) return lex_identifier(l, c) elseif isdigit(c) + readon(l) return lex_digit(l, Tokens.INTEGER) elseif (k = get(UNICODE_OPS, c, Tokens.ERROR)) != Tokens.ERROR return emit(l, k) @@ -416,13 +423,11 @@ end # Lex whitespace, a whitespace char has been consumed function lex_whitespace(l::Lexer) - readon(l) accept_batch(l, iswhitespace) return emit(l, Tokens.WHITESPACE) end function lex_comment(l::Lexer, doemit=true) - readon(l) if peekchar(l) != '=' while true pc = peekchar(l) @@ -638,7 +643,6 @@ end # A digit has been consumed function lex_digit(l::Lexer, kind) - readon(l) accept_number(l, isdigit) pc,ppc = dpeekchar(l) if pc == '.' @@ -792,7 +796,6 @@ end # Parse a token starting with a quote. # A '"' has been consumed function lex_quote(l::Lexer, doemit=true) - readon(l) if accept(l, '"') # "" if accept(l, '"') # """ if read_string(l, Tokens.TRIPLE_STRING) @@ -905,6 +908,7 @@ function lex_dot(l::Lexer) return emit(l, Tokens.DDOT) end elseif Base.isdigit(peekchar(l)) + readon(l) return lex_digit(l, Tokens.FLOAT) else pc, dpc = dpeekchar(l) @@ -1001,7 +1005,6 @@ end # A ` has been consumed function lex_cmd(l::Lexer, doemit=true) - readon(l) if accept(l, '`') # if accept(l, '`') # """ if read_string(l, Tokens.TRIPLE_CMD) @@ -1021,82 +1024,305 @@ function lex_cmd(l::Lexer, doemit=true) end end -function lex_identifier(l::Lexer{IO_t,T}, c) where {IO_t,T} - if T == Token - readon(l) +function tryread(l, str, k, c) + for s in str + c = peekchar(l) + if c != s + if !is_identifier_char(c) + return emit(l, IDENTIFIER) + end + return readrest(l, c) + else + readchar(l) + end + end + if is_identifier_char(peekchar(l)) + return readrest(l, c) end - cnt = 1 - h = simple_hash(Int(c), cnt, 1) + return emit(l, k) +end + +function readrest(l, c) while true pc, ppc = dpeekchar(l) if !is_identifier_char(pc) || (pc == '!' && ppc == '=') break end c = readchar(l) - cnt += 1 - h = simple_hash(Int(c), cnt, h) end - return emit(l, get(kw_hash, h, IDENTIFIER)) + return emit(l, IDENTIFIER) end -function simple_hash(c, cnt, h) - h = h*c + c + cnt -end -function simple_hash(str) - ind = 1 - cnt = 1 - h = 1 - while ind <= length(str) - h = simple_hash(Int(str[ind]), cnt, h) - cnt += 1 - ind = nextind(str, ind) +function _doret(l, c) + if !is_identifier_char(c) + return emit(l, IDENTIFIER) + else + return readrest(l, c) end - h end -kws = [ -Tokens.ABSTRACT, -Tokens.BAREMODULE, -Tokens.BEGIN, -Tokens.BREAK, -Tokens.CATCH, -Tokens.CONST, -Tokens.CONTINUE, -Tokens.DO, -Tokens.ELSE, -Tokens.ELSEIF, -Tokens.END, -Tokens.EXPORT, -Tokens.FINALLY, -Tokens.FOR, -Tokens.FUNCTION, -Tokens.GLOBAL, -Tokens.IF, -Tokens.IMPORT, -Tokens.IMPORTALL, -Tokens.LET, -Tokens.LOCAL, -Tokens.MACRO, -Tokens.MODULE, -Tokens.MUTABLE, -Tokens.OUTER, -Tokens.PRIMITIVE, -Tokens.QUOTE, -Tokens.RETURN, -Tokens.STRUCT, -Tokens.TRY, -Tokens.TYPE, -Tokens.USING, -Tokens.WHILE, -Tokens.IN, -Tokens.ISA, -Tokens.WHERE, -Tokens.TRUE, -Tokens.FALSE, -] - -const kw_hash = Dict(simple_hash(lowercase(string(kw))) => kw for kw in kws) +function lex_identifier(l, c) + if c == 'a' + return tryread(l, ('b', 's', 't', 'r', 'a', 'c', 't'), ABSTRACT, c) + elseif c == 'b' + c = peekchar(l) + if c == 'a' + c = readchar(l) + return tryread(l, ('r', 'e', 'm', 'o', 'd', 'u', 'l', 'e'), BAREMODULE, c) + elseif c == 'e' + c = readchar(l) + return tryread(l, ('g', 'i', 'n'), BEGIN, c) + elseif c == 'r' + c = readchar(l) + return tryread(l, ('e', 'a', 'k'), BREAK, c) + else + return _doret(l, c) + end + elseif c == 'c' + c = peekchar(l) + if c == 'a' + c = readchar(l) + return tryread(l, ('t', 'c', 'h'), CATCH, c) + elseif c == 'o' + readchar(l) + c = peekchar(l) + if c == 'n' + readchar(l) + c = peekchar(l) + if c == 's' + readchar(l) + c = peekchar(l) + return tryread(l, ('t',), CONST, c) + elseif c == 't' + readchar(l) + c = peekchar(l) + return tryread(l, ('i', 'n', 'u', 'e'), CONTINUE, c) + else + return _doret(l, c) + end + else + return _doret(l, c) + end + else + return _doret(l, c) + end + elseif c == 'd' + return tryread(l, ('o'), DO, c) + elseif c == 'e' + c = peekchar(l) + if c == 'l' + readchar(l) + c = peekchar(l) + if c == 's' + readchar(l) + c = peekchar(l) + if c == 'e' + readchar(l) + c = peekchar(l) + if !is_identifier_char(c) + return emit(l, ELSE) + elseif c == 'i' + c = readchar(l) + return tryread(l, ('f'), ELSEIF ,c) + else + return _doret(l, c) + end + else + return _doret(l, c) + end + else + return _doret(l, c) + end + elseif c == 'n' + c = readchar(l) + return tryread(l, ('d'), END, c) + elseif c == 'x' + c = readchar(l) + return tryread(l, ('p', 'o', 'r', 't'), EXPORT, c) + else + return _doret(l, c) + end + elseif c == 'f' + c = peekchar(l) + if c == 'a' + c = readchar(l) + return tryread(l, ('l', 's', 'e'), FALSE, c) + elseif c == 'i' + c = readchar(l) + return tryread(l, ('n', 'a', 'l', 'l', 'y'), FINALLY, c) + elseif c == 'o' + c = readchar(l) + return tryread(l, ('r'), FOR, c) + elseif c == 'u' + c = readchar(l) + return tryread(l, ('n', 'c', 't', 'i', 'o', 'n'), FUNCTION, c) + else + return _doret(l, c) + end + elseif c == 'g' + return tryread(l, ('l', 'o', 'b', 'a', 'l'), GLOBAL, c) + elseif c == 'i' + c = peekchar(l) + if c == 'f' + readchar(l) + c = peekchar(l) + if !is_identifier_char(c) + return emit(l, IF) + else + return readrest(l, c) + end + elseif c == 'm' + readchar(l) + c = peekchar(l) + if c == 'p' + readchar(l) + c = peekchar(l) + if c == 'o' + readchar(l) + c = peekchar(l) + if c == 'r' + readchar(l) + c = peekchar(l) + if c == 't' + readchar(l) + c = peekchar(l) + if !is_identifier_char(c) + return emit(l, IMPORT) + elseif c == 'a' + c = readchar(l) + return tryread(l, ('l','l'), IMPORTALL, c) + else + return _doret(l, c) + end + else + return _doret(l, c) + end + else + return _doret(l, c) + end + else + return _doret(l, c) + end + else + return _doret(l, c) + end + elseif c == 'n' + readchar(l) + c = peekchar(l) + if !is_identifier_char(c) + return emit(l, IN) + else + return readrest(l, c) + end + elseif (@static VERSION >= v"0.6.0-dev.1471" ? true : false) && c == 's' + c = readchar(l) + return tryread(l, ('a'), ISA, c) + else + return _doret(l, c) + end + elseif c == 'l' + c = peekchar(l) + if c == 'e' + readchar(l) + return tryread(l, ('t'), LET, c) + elseif c == 'o' + readchar(l) + return tryread(l, ('c', 'a', 'l'), LOCAL, c) + else + return _doret(l, c) + end + elseif c == 'm' + c = peekchar(l) + if c == 'a' + c = readchar(l) + return tryread(l, ('c', 'r', 'o'), MACRO, c) + elseif c == 'o' + c = readchar(l) + return tryread(l, ('d', 'u', 'l', 'e'), MODULE, c) + elseif c == 'u' + c = readchar(l) + return tryread(l, ('t', 'a', 'b', 'l', 'e'), MUTABLE, c) + else + return _doret(l, c) + end + elseif c == 'o' + return tryread(l, ('u', 't', 'e', 'r'), OUTER, c) + elseif c == 'p' + return tryread(l, ('r', 'i', 'm', 'i', 't', 'i', 'v', 'e'), PRIMITIVE, c) + elseif c == 'q' + return tryread(l, ('u', 'o', 't', 'e'), QUOTE, c) + elseif c == 'r' + return tryread(l, ('e', 't', 'u', 'r', 'n'), RETURN, c) + elseif c == 's' + return tryread(l, ('t', 'r', 'u', 'c', 't'), STRUCT, c) + elseif c == 't' + c = peekchar(l) + if c == 'r' + readchar(l) + c = peekchar(l) + if c == 'u' + c = readchar(l) + return tryread(l, ('e'), TRUE, c) + elseif c == 'y' + readchar(l) + c = peekchar(l) + if !is_identifier_char(c) + return emit(l, TRY) + else + c = readchar(l) + return _doret(l, c) + end + else + return _doret(l, c) + end + elseif c == 'y' + readchar(l) + c = peekchar(l) + if c == 'p' + readchar(l) + c = peekchar(l) + if c == 'e' + readchar(l) + c = peekchar(l) + if !is_identifier_char(c) + return emit(l, TYPE) + else + c = readchar(l) + return _doret(l, c) + end + else + return _doret(l, c) + end + else + return _doret(l, c) + end + else + return _doret(l, c) + end + elseif c == 'u' + return tryread(l, ('s', 'i', 'n', 'g'), USING, c) + elseif c == 'w' + c = peekchar(l) + if c == 'h' + readchar(l) + c = peekchar(l) + if c == 'e' + c = readchar(l) + return tryread(l, ('r', 'e'), WHERE, c) + elseif c == 'i' + c = readchar(l) + return tryread(l, ('l', 'e'), WHILE, c) + else + return _doret(l, c) + end + else + return _doret(l, c) + end + else + return _doret(l, c) + end +end end # module From e7ebc5b8c73de77f9c65dd30588fabeba717ff25 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Wed, 25 Aug 2021 07:02:03 +0200 Subject: [PATCH 0192/1109] Update version --- JuliaSyntax/Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaSyntax/Project.toml b/JuliaSyntax/Project.toml index 1f0b6a7815e31..68debc14a0569 100644 --- a/JuliaSyntax/Project.toml +++ b/JuliaSyntax/Project.toml @@ -1,6 +1,6 @@ name = "Tokenize" uuid = "0796e94c-ce3b-5d07-9a54-7f471281c624" -version = "0.5.19" +version = "0.5.20" [compat] julia = "1" From ac64c28ee97b2b57a7662e380d3228845e712ad7 Mon Sep 17 00:00:00 2001 From: Sebastian Pfitzner Date: Thu, 26 Aug 2021 09:36:00 +0200 Subject: [PATCH 0193/1109] Simplify lex_identifier --- JuliaSyntax/src/_precompile.jl | 12 -- JuliaSyntax/src/lexer.jl | 365 +++++++-------------------------- JuliaSyntax/test/lexer.jl | 51 +++++ 3 files changed, 123 insertions(+), 305 deletions(-) diff --git a/JuliaSyntax/src/_precompile.jl b/JuliaSyntax/src/_precompile.jl index cb813a4bca49b..f5797cf011efa 100644 --- a/JuliaSyntax/src/_precompile.jl +++ b/JuliaSyntax/src/_precompile.jl @@ -48,14 +48,6 @@ function _precompile_() precompile(Tokenize.Lexers.start_token!, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) precompile(Tokenize.Lexers.start_token!, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.RawToken},)) - precompile(Tokenize.Lexers.tryread, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, Tuple{Char}, Tokenize.Tokens.Kind,Char)) - precompile(Tokenize.Lexers.tryread, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, Tuple{Char,Char}, Tokenize.Tokens.Kind,Char)) - precompile(Tokenize.Lexers.tryread, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, Tuple{Char,Char,Char}, Tokenize.Tokens.Kind,Char)) - precompile(Tokenize.Lexers.tryread, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, Tuple{Char,Char,Char,Char}, Tokenize.Tokens.Kind,Char)) - precompile(Tokenize.Lexers.tryread, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, Tuple{Char,Char,Char,Char,Char}, Tokenize.Tokens.Kind,Char)) - precompile(Tokenize.Lexers.tryread, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, Tuple{Char,Char,Char,Char,Char,Char}, Tokenize.Tokens.Kind,Char)) - precompile(Tokenize.Lexers.tryread, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, Tuple{Char,Char,Char,Char,Char,Char,Char}, Tokenize.Tokens.Kind,Char)) - precompile(Tokenize.Lexers.tryread, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, Tuple{Char,Char,Char,Char,Char,Char,Char,Char}, Tokenize.Tokens.Kind,Char)) precompile(Tokenize.Lexers.lex_greater, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) @@ -85,7 +77,6 @@ function _precompile_() precompile(Tokenize.Lexers.lex_whitespace, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) precompile(Tokenize.Lexers.read_string, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, Tokenize.Tokens.Kind,)) - precompile(Tokenize.Lexers.readrest, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},Char)) precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, Char,)) precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, String,)) @@ -95,13 +86,10 @@ function _precompile_() precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},typeof(Tokenize.Lexers.ishex),)) precompile(Tokenize.Lexers.accept_batch, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, typeof(Tokenize.Lexers.iswhitespace),)) precompile(Tokenize.Lexers.accept_batch, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, typeof(Tokenize.Lexers.isdigit),)) - precompile(Tokenize.Lexers._doret, (Char, Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) - precompile(Tokenize.Lexers._doret, (Char, Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.RawToken},)) precompile(Tokenize.Lexers.accept_batch, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.RawToken}, typeof(Tokenize.Lexers.iswhitespace),)) precompile(Tokenize.Lexers.accept_batch, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.RawToken}, typeof(Tokenize.Lexers.isdigit),)) precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.RawToken}, Char,)) - precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.RawToken}, Function,)) precompile(Tokenize.Lexers.readchar, (GenericIOBuffer{Array{UInt8, 1}},)) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index c1eec0ed5c437..e26d80c2af802 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -224,7 +224,6 @@ function readon(l::Lexer{I,Token}) where {I <: IO} end write(l.charstore, l.chars[1]) l.doread = true - return l.chars[1] end readoff(l::Lexer{I,RawToken}) where {I <: IO} = l.chars[1] @@ -336,7 +335,6 @@ function next_token(l::Lexer, start = true) if eof(c) return emit(l, Tokens.ENDMARKER) elseif iswhitespace(c) - readon(l) return lex_whitespace(l) elseif c == '[' return emit(l, Tokens.LSQUARE) @@ -369,7 +367,6 @@ function next_token(l::Lexer, start = true) elseif c == '~' return emit(l, Tokens.APPROX) elseif c == '#' - readon(l) return lex_comment(l) elseif c == '=' return lex_equal(l) @@ -390,7 +387,6 @@ function next_token(l::Lexer, start = true) elseif c == '÷' return lex_division(l) elseif c == '"' - readon(l) return lex_quote(l); elseif c == '%' return lex_percent(l); @@ -405,13 +401,10 @@ function next_token(l::Lexer, start = true) elseif c == '-' return lex_minus(l); elseif c == '`' - readon(l) return lex_cmd(l); elseif is_identifier_start_char(c) - readon(l) return lex_identifier(l, c) elseif isdigit(c) - readon(l) return lex_digit(l, Tokens.INTEGER) elseif (k = get(UNICODE_OPS, c, Tokens.ERROR)) != Tokens.ERROR return emit(l, k) @@ -423,11 +416,13 @@ end # Lex whitespace, a whitespace char has been consumed function lex_whitespace(l::Lexer) + readon(l) accept_batch(l, iswhitespace) return emit(l, Tokens.WHITESPACE) end function lex_comment(l::Lexer, doemit=true) + readon(l) if peekchar(l) != '=' while true pc = peekchar(l) @@ -643,6 +638,7 @@ end # A digit has been consumed function lex_digit(l::Lexer, kind) + readon(l) accept_number(l, isdigit) pc,ppc = dpeekchar(l) if pc == '.' @@ -796,6 +792,7 @@ end # Parse a token starting with a quote. # A '"' has been consumed function lex_quote(l::Lexer, doemit=true) + readon(l) if accept(l, '"') # "" if accept(l, '"') # """ if read_string(l, Tokens.TRIPLE_STRING) @@ -908,7 +905,6 @@ function lex_dot(l::Lexer) return emit(l, Tokens.DDOT) end elseif Base.isdigit(peekchar(l)) - readon(l) return lex_digit(l, Tokens.FLOAT) else pc, dpc = dpeekchar(l) @@ -1005,6 +1001,7 @@ end # A ` has been consumed function lex_cmd(l::Lexer, doemit=true) + readon(l) if accept(l, '`') # if accept(l, '`') # """ if read_string(l, Tokens.TRIPLE_CMD) @@ -1024,305 +1021,87 @@ function lex_cmd(l::Lexer, doemit=true) end end -function tryread(l, str, k, c) - for s in str - c = peekchar(l) - if c != s - if !is_identifier_char(c) - return emit(l, IDENTIFIER) - end - return readrest(l, c) - else - readchar(l) - end - end - if is_identifier_char(peekchar(l)) - return readrest(l, c) +function lex_identifier(l::Lexer{IO_t,T}, c) where {IO_t,T} + if T == Token + readon(l) end - return emit(l, k) -end - -function readrest(l, c) + h = simple_hash(c, 0) while true pc, ppc = dpeekchar(l) if !is_identifier_char(pc) || (pc == '!' && ppc == '=') break end c = readchar(l) + h = simple_hash(c, h) end - return emit(l, IDENTIFIER) + return emit(l, get(kw_hash, h, IDENTIFIER)) end - -function _doret(l, c) - if !is_identifier_char(c) - return emit(l, IDENTIFIER) - else - return readrest(l, c) - end +# This creates a hash using 5 bit per lower case ASCII char. +# It checks its input to be between 'a' and 'z' (because only those chars) +# are valid in keywords, and returns a sentinel value for invalid inputs +# or when the hash is about to overflow. +function simple_hash(c, h) + h == UInt64(0xff) && return h + # only 'a' - 'z' actually need to be hashed + 'a' <= c <= 'z' || return UInt64(0xff) + # catch possible overflow by checking the 10 high bits + (h & (UInt64(0x3ff) << (64 - 10))) > 0 && return UInt64(0xff) + UInt64(h) << 5 + UInt8(c - 'a' + 1) end -function lex_identifier(l, c) - if c == 'a' - return tryread(l, ('b', 's', 't', 'r', 'a', 'c', 't'), ABSTRACT, c) - elseif c == 'b' - c = peekchar(l) - if c == 'a' - c = readchar(l) - return tryread(l, ('r', 'e', 'm', 'o', 'd', 'u', 'l', 'e'), BAREMODULE, c) - elseif c == 'e' - c = readchar(l) - return tryread(l, ('g', 'i', 'n'), BEGIN, c) - elseif c == 'r' - c = readchar(l) - return tryread(l, ('e', 'a', 'k'), BREAK, c) - else - return _doret(l, c) - end - elseif c == 'c' - c = peekchar(l) - if c == 'a' - c = readchar(l) - return tryread(l, ('t', 'c', 'h'), CATCH, c) - elseif c == 'o' - readchar(l) - c = peekchar(l) - if c == 'n' - readchar(l) - c = peekchar(l) - if c == 's' - readchar(l) - c = peekchar(l) - return tryread(l, ('t',), CONST, c) - elseif c == 't' - readchar(l) - c = peekchar(l) - return tryread(l, ('i', 'n', 'u', 'e'), CONTINUE, c) - else - return _doret(l, c) - end - else - return _doret(l, c) - end - else - return _doret(l, c) - end - elseif c == 'd' - return tryread(l, ('o'), DO, c) - elseif c == 'e' - c = peekchar(l) - if c == 'l' - readchar(l) - c = peekchar(l) - if c == 's' - readchar(l) - c = peekchar(l) - if c == 'e' - readchar(l) - c = peekchar(l) - if !is_identifier_char(c) - return emit(l, ELSE) - elseif c == 'i' - c = readchar(l) - return tryread(l, ('f'), ELSEIF ,c) - else - return _doret(l, c) - end - else - return _doret(l, c) - end - else - return _doret(l, c) - end - elseif c == 'n' - c = readchar(l) - return tryread(l, ('d'), END, c) - elseif c == 'x' - c = readchar(l) - return tryread(l, ('p', 'o', 'r', 't'), EXPORT, c) - else - return _doret(l, c) - end - elseif c == 'f' - c = peekchar(l) - if c == 'a' - c = readchar(l) - return tryread(l, ('l', 's', 'e'), FALSE, c) - elseif c == 'i' - c = readchar(l) - return tryread(l, ('n', 'a', 'l', 'l', 'y'), FINALLY, c) - elseif c == 'o' - c = readchar(l) - return tryread(l, ('r'), FOR, c) - elseif c == 'u' - c = readchar(l) - return tryread(l, ('n', 'c', 't', 'i', 'o', 'n'), FUNCTION, c) - else - return _doret(l, c) - end - elseif c == 'g' - return tryread(l, ('l', 'o', 'b', 'a', 'l'), GLOBAL, c) - elseif c == 'i' - c = peekchar(l) - if c == 'f' - readchar(l) - c = peekchar(l) - if !is_identifier_char(c) - return emit(l, IF) - else - return readrest(l, c) - end - elseif c == 'm' - readchar(l) - c = peekchar(l) - if c == 'p' - readchar(l) - c = peekchar(l) - if c == 'o' - readchar(l) - c = peekchar(l) - if c == 'r' - readchar(l) - c = peekchar(l) - if c == 't' - readchar(l) - c = peekchar(l) - if !is_identifier_char(c) - return emit(l, IMPORT) - elseif c == 'a' - c = readchar(l) - return tryread(l, ('l','l'), IMPORTALL, c) - else - return _doret(l, c) - end - else - return _doret(l, c) - end - else - return _doret(l, c) - end - else - return _doret(l, c) - end - else - return _doret(l, c) - end - elseif c == 'n' - readchar(l) - c = peekchar(l) - if !is_identifier_char(c) - return emit(l, IN) - else - return readrest(l, c) - end - elseif (@static VERSION >= v"0.6.0-dev.1471" ? true : false) && c == 's' - c = readchar(l) - return tryread(l, ('a'), ISA, c) - else - return _doret(l, c) - end - elseif c == 'l' - c = peekchar(l) - if c == 'e' - readchar(l) - return tryread(l, ('t'), LET, c) - elseif c == 'o' - readchar(l) - return tryread(l, ('c', 'a', 'l'), LOCAL, c) - else - return _doret(l, c) - end - elseif c == 'm' - c = peekchar(l) - if c == 'a' - c = readchar(l) - return tryread(l, ('c', 'r', 'o'), MACRO, c) - elseif c == 'o' - c = readchar(l) - return tryread(l, ('d', 'u', 'l', 'e'), MODULE, c) - elseif c == 'u' - c = readchar(l) - return tryread(l, ('t', 'a', 'b', 'l', 'e'), MUTABLE, c) - else - return _doret(l, c) - end - elseif c == 'o' - return tryread(l, ('u', 't', 'e', 'r'), OUTER, c) - elseif c == 'p' - return tryread(l, ('r', 'i', 'm', 'i', 't', 'i', 'v', 'e'), PRIMITIVE, c) - elseif c == 'q' - return tryread(l, ('u', 'o', 't', 'e'), QUOTE, c) - elseif c == 'r' - return tryread(l, ('e', 't', 'u', 'r', 'n'), RETURN, c) - elseif c == 's' - return tryread(l, ('t', 'r', 'u', 'c', 't'), STRUCT, c) - elseif c == 't' - c = peekchar(l) - if c == 'r' - readchar(l) - c = peekchar(l) - if c == 'u' - c = readchar(l) - return tryread(l, ('e'), TRUE, c) - elseif c == 'y' - readchar(l) - c = peekchar(l) - if !is_identifier_char(c) - return emit(l, TRY) - else - c = readchar(l) - return _doret(l, c) - end - else - return _doret(l, c) - end - elseif c == 'y' - readchar(l) - c = peekchar(l) - if c == 'p' - readchar(l) - c = peekchar(l) - if c == 'e' - readchar(l) - c = peekchar(l) - if !is_identifier_char(c) - return emit(l, TYPE) - else - c = readchar(l) - return _doret(l, c) - end - else - return _doret(l, c) - end - else - return _doret(l, c) - end - else - return _doret(l, c) - end - elseif c == 'u' - return tryread(l, ('s', 'i', 'n', 'g'), USING, c) - elseif c == 'w' - c = peekchar(l) - if c == 'h' - readchar(l) - c = peekchar(l) - if c == 'e' - c = readchar(l) - return tryread(l, ('r', 'e'), WHERE, c) - elseif c == 'i' - c = readchar(l) - return tryread(l, ('l', 'e'), WHILE, c) - else - return _doret(l, c) - end - else - return _doret(l, c) - end - else - return _doret(l, c) +function simple_hash(str) + ind = 1 + h = 0 + while ind <= length(str) + h = simple_hash(str[ind], h) + ind = nextind(str, ind) end + h end +kws = [ +Tokens.ABSTRACT, +Tokens.BAREMODULE, +Tokens.BEGIN, +Tokens.BREAK, +Tokens.CATCH, +Tokens.CONST, +Tokens.CONTINUE, +Tokens.DO, +Tokens.ELSE, +Tokens.ELSEIF, +Tokens.END, +Tokens.EXPORT, +Tokens.FINALLY, +Tokens.FOR, +Tokens.FUNCTION, +Tokens.GLOBAL, +Tokens.IF, +Tokens.IMPORT, +Tokens.IMPORTALL, +Tokens.LET, +Tokens.LOCAL, +Tokens.MACRO, +Tokens.MODULE, +Tokens.MUTABLE, +Tokens.OUTER, +Tokens.PRIMITIVE, +Tokens.QUOTE, +Tokens.RETURN, +Tokens.STRUCT, +Tokens.TRY, +Tokens.TYPE, +Tokens.USING, +Tokens.WHILE, +Tokens.IN, +Tokens.ISA, +Tokens.WHERE, +Tokens.TRUE, +Tokens.FALSE, +] + +const kw_hash = Dict(simple_hash(lowercase(string(kw))) => kw for kw in kws) + end # module diff --git a/JuliaSyntax/test/lexer.jl b/JuliaSyntax/test/lexer.jl index 58c35edafd591..6392d806625ab 100644 --- a/JuliaSyntax/test/lexer.jl +++ b/JuliaSyntax/test/lexer.jl @@ -617,3 +617,54 @@ end allops = split(join(ops, " "), " ") @test all(s->Base.isoperator(Symbol(s)) == Tokens.isoperator(first(collect(tokenize(s))).kind), allops) end + +@testset "simple_hash" begin + is_kw(x) = uppercase(x) in ( + "ABSTRACT", + "BAREMODULE", + "BEGIN", + "BREAK", + "CATCH", + "CONST", + "CONTINUE", + "DO", + "ELSE", + "ELSEIF", + "END", + "EXPORT", + "FINALLY", + "FOR", + "FUNCTION", + "GLOBAL", + "IF", + "IMPORT", + "IMPORTALL", + "LET", + "LOCAL", + "MACRO", + "MODULE", + "MUTABLE", + "OUTER", + "PRIMITIVE", + "QUOTE", + "RETURN", + "STRUCT", + "TRY", + "TYPE", + "USING", + "WHILE", + "IN", + "ISA", + "WHERE", + "TRUE", + "FALSE", + ) + for len in 1:5 + for cs in Iterators.product(['a':'z' for _ in 1:len]...) + str = String([cs...]) + is_kw(str) && continue + + @test Tokenize.Lexers.simple_hash(str) ∉ keys(Tokenize.Lexers.kw_hash) + end + end +end From 15c109a9e342baabcdda1a72c36ca9f125c49df3 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Thu, 26 Aug 2021 09:36:47 +0200 Subject: [PATCH 0194/1109] Bump version --- JuliaSyntax/Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaSyntax/Project.toml b/JuliaSyntax/Project.toml index 68debc14a0569..454daa772a52c 100644 --- a/JuliaSyntax/Project.toml +++ b/JuliaSyntax/Project.toml @@ -1,6 +1,6 @@ name = "Tokenize" uuid = "0796e94c-ce3b-5d07-9a54-7f471281c624" -version = "0.5.20" +version = "0.5.21" [compat] julia = "1" From 1ca5a7b2c3996ed0aff75965f43fabc381392c2e Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Thu, 2 Sep 2021 11:34:08 +1000 Subject: [PATCH 0195/1109] Initial commit From 75e76da0daef2fc54397406d6b51ab8c3b67b6c3 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Thu, 2 Sep 2021 11:34:29 +1000 Subject: [PATCH 0196/1109] Files generated by PkgTemplates PkgTemplates version: 0.7.17 --- JuliaSyntax/.github/workflows/CI.yml | 37 +++++++++++++++++++ .../.github/workflows/CompatHelper.yml | 16 ++++++++ JuliaSyntax/.github/workflows/TagBot.yml | 15 ++++++++ JuliaSyntax/.gitignore | 1 + JuliaSyntax/LICENSE | 21 +++++++++++ JuliaSyntax/Project.toml | 13 +++++++ JuliaSyntax/README.md | 3 ++ JuliaSyntax/src/JuliaSyntax.jl | 5 +++ JuliaSyntax/test/runtests.jl | 6 +++ 9 files changed, 117 insertions(+) create mode 100644 JuliaSyntax/.github/workflows/CI.yml create mode 100644 JuliaSyntax/.github/workflows/CompatHelper.yml create mode 100644 JuliaSyntax/.github/workflows/TagBot.yml create mode 100644 JuliaSyntax/.gitignore create mode 100644 JuliaSyntax/LICENSE create mode 100644 JuliaSyntax/Project.toml create mode 100644 JuliaSyntax/README.md create mode 100644 JuliaSyntax/src/JuliaSyntax.jl create mode 100644 JuliaSyntax/test/runtests.jl diff --git a/JuliaSyntax/.github/workflows/CI.yml b/JuliaSyntax/.github/workflows/CI.yml new file mode 100644 index 0000000000000..fd8ea2f05d80e --- /dev/null +++ b/JuliaSyntax/.github/workflows/CI.yml @@ -0,0 +1,37 @@ +name: CI +on: + - push + - pull_request +jobs: + test: + name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + version: + - '1.0' + - '1.6' + - 'nightly' + os: + - ubuntu-latest + arch: + - x64 + steps: + - uses: actions/checkout@v2 + - uses: julia-actions/setup-julia@v1 + with: + version: ${{ matrix.version }} + arch: ${{ matrix.arch }} + - uses: actions/cache@v1 + env: + cache-name: cache-artifacts + with: + path: ~/.julia/artifacts + key: ${{ runner.os }}-test-${{ env.cache-name }}-${{ hashFiles('**/Project.toml') }} + restore-keys: | + ${{ runner.os }}-test-${{ env.cache-name }}- + ${{ runner.os }}-test- + ${{ runner.os }}- + - uses: julia-actions/julia-buildpkg@v1 + - uses: julia-actions/julia-runtest@v1 diff --git a/JuliaSyntax/.github/workflows/CompatHelper.yml b/JuliaSyntax/.github/workflows/CompatHelper.yml new file mode 100644 index 0000000000000..cba9134c670f0 --- /dev/null +++ b/JuliaSyntax/.github/workflows/CompatHelper.yml @@ -0,0 +1,16 @@ +name: CompatHelper +on: + schedule: + - cron: 0 0 * * * + workflow_dispatch: +jobs: + CompatHelper: + runs-on: ubuntu-latest + steps: + - name: Pkg.add("CompatHelper") + run: julia -e 'using Pkg; Pkg.add("CompatHelper")' + - name: CompatHelper.main() + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + COMPATHELPER_PRIV: ${{ secrets.DOCUMENTER_KEY }} + run: julia -e 'using CompatHelper; CompatHelper.main()' diff --git a/JuliaSyntax/.github/workflows/TagBot.yml b/JuliaSyntax/.github/workflows/TagBot.yml new file mode 100644 index 0000000000000..f49313b662013 --- /dev/null +++ b/JuliaSyntax/.github/workflows/TagBot.yml @@ -0,0 +1,15 @@ +name: TagBot +on: + issue_comment: + types: + - created + workflow_dispatch: +jobs: + TagBot: + if: github.event_name == 'workflow_dispatch' || github.actor == 'JuliaTagBot' + runs-on: ubuntu-latest + steps: + - uses: JuliaRegistries/TagBot@v1 + with: + token: ${{ secrets.GITHUB_TOKEN }} + ssh: ${{ secrets.DOCUMENTER_KEY }} diff --git a/JuliaSyntax/.gitignore b/JuliaSyntax/.gitignore new file mode 100644 index 0000000000000..b067eddee4ee0 --- /dev/null +++ b/JuliaSyntax/.gitignore @@ -0,0 +1 @@ +/Manifest.toml diff --git a/JuliaSyntax/LICENSE b/JuliaSyntax/LICENSE new file mode 100644 index 0000000000000..7f98356226bb3 --- /dev/null +++ b/JuliaSyntax/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2021 Chris Foster and contributors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/JuliaSyntax/Project.toml b/JuliaSyntax/Project.toml new file mode 100644 index 0000000000000..8272bfe7ad282 --- /dev/null +++ b/JuliaSyntax/Project.toml @@ -0,0 +1,13 @@ +name = "JuliaSyntax" +uuid = "70703baa-626e-46a2-a12c-08ffd08c73b4" +authors = ["Chris Foster and contributors"] +version = "0.1.0" + +[compat] +julia = "1" + +[extras] +Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" + +[targets] +test = ["Test"] diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md new file mode 100644 index 0000000000000..64a7267cc53ad --- /dev/null +++ b/JuliaSyntax/README.md @@ -0,0 +1,3 @@ +# JuliaSyntax + +[![Build Status](https://github.com/c42f/JuliaSyntax.jl/workflows/CI/badge.svg)](https://github.com/c42f/JuliaSyntax.jl/actions) diff --git a/JuliaSyntax/src/JuliaSyntax.jl b/JuliaSyntax/src/JuliaSyntax.jl new file mode 100644 index 0000000000000..c690938a021cf --- /dev/null +++ b/JuliaSyntax/src/JuliaSyntax.jl @@ -0,0 +1,5 @@ +module JuliaSyntax + +# Write your package code here. + +end diff --git a/JuliaSyntax/test/runtests.jl b/JuliaSyntax/test/runtests.jl new file mode 100644 index 0000000000000..f61b6dc32451f --- /dev/null +++ b/JuliaSyntax/test/runtests.jl @@ -0,0 +1,6 @@ +using JuliaSyntax +using Test + +@testset "JuliaSyntax.jl" begin + # Write your tests here. +end From 6f7b585c72addc575059c66939a42efbd6089779 Mon Sep 17 00:00:00 2001 From: Sebastian Pfitzner Date: Wed, 17 Nov 2021 12:13:18 +0100 Subject: [PATCH 0197/1109] Use Base.hash and add profiling script --- JuliaSyntax/src/Tokenize.jl | 2 ++ JuliaSyntax/src/lexer.jl | 21 ++++----------- JuliaSyntax/test/profile.jl | 54 +++++++++++++++++++++++++++++++++++++ 3 files changed, 61 insertions(+), 16 deletions(-) create mode 100644 JuliaSyntax/test/profile.jl diff --git a/JuliaSyntax/src/Tokenize.jl b/JuliaSyntax/src/Tokenize.jl index 8cf3e8b5f6533..79175d96fc7b1 100644 --- a/JuliaSyntax/src/Tokenize.jl +++ b/JuliaSyntax/src/Tokenize.jl @@ -12,6 +12,8 @@ import .Tokens: untokenize export tokenize, untokenize, Tokens +# disable precompilation when profiling runtime performance, as +# it can lead to wrong traces include("_precompile.jl") _precompile_() diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index e26d80c2af802..63973213f5ade 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -714,7 +714,7 @@ function lex_digit(l::Lexer, kind) kind = Tokens.HEX_INT isfloat = false readchar(l) - !(ishex(ppc) || ppc =='.') && return emit_error(l, Tokens.INVALID_NUMERIC_CONSTANT) + !(ishex(ppc) || ppc == '.') && return emit_error(l, Tokens.INVALID_NUMERIC_CONSTANT) accept_number(l, ishex) if accept(l, '.') accept_number(l, ishex) @@ -1025,10 +1025,10 @@ function lex_identifier(l::Lexer{IO_t,T}, c) where {IO_t,T} if T == Token readon(l) end - h = simple_hash(c, 0) + h = simple_hash(c, UInt64(0)) while true pc, ppc = dpeekchar(l) - if !is_identifier_char(pc) || (pc == '!' && ppc == '=') + if (pc == '!' && ppc == '=') || !is_identifier_char(pc) break end c = readchar(l) @@ -1038,22 +1038,11 @@ function lex_identifier(l::Lexer{IO_t,T}, c) where {IO_t,T} return emit(l, get(kw_hash, h, IDENTIFIER)) end -# This creates a hash using 5 bit per lower case ASCII char. -# It checks its input to be between 'a' and 'z' (because only those chars) -# are valid in keywords, and returns a sentinel value for invalid inputs -# or when the hash is about to overflow. -function simple_hash(c, h) - h == UInt64(0xff) && return h - # only 'a' - 'z' actually need to be hashed - 'a' <= c <= 'z' || return UInt64(0xff) - # catch possible overflow by checking the 10 high bits - (h & (UInt64(0x3ff) << (64 - 10))) > 0 && return UInt64(0xff) - UInt64(h) << 5 + UInt8(c - 'a' + 1) -end +@inline simple_hash(c::Char, h::UInt64) = hash(c, h) function simple_hash(str) ind = 1 - h = 0 + h = UInt64(0) while ind <= length(str) h = simple_hash(str[ind], h) ind = nextind(str, ind) diff --git a/JuliaSyntax/test/profile.jl b/JuliaSyntax/test/profile.jl new file mode 100644 index 0000000000000..756f3f666e5d0 --- /dev/null +++ b/JuliaSyntax/test/profile.jl @@ -0,0 +1,54 @@ +using Tokenize + +nt = @timed @eval(collect(Tokenize.tokenize("foo + bar"))) +println("First run took $(nt.time) seconds with $(nt.bytes/1e6) MB allocated") + +srcdir = joinpath(Sys.BINDIR, Base.DATAROOTDIR, "..") + +allfiles = [] +for (root, dirs, files) in walkdir(srcdir, follow_symlinks = true) + for file in files + splitext(file)[2] == ".jl" || continue + push!(allfiles, joinpath(root, file)) + end +end + +# warmup +let time_taken = 0.0, allocated = 0.0 + for file in allfiles + content = IOBuffer(read(file, String)) + nt = @timed collect(Tokenize.tokenize(content, Tokens.RawToken)) + time_taken += nt.time + allocated += nt.bytes + end +end + +# actual run +let time_taken = 0.0, allocated = 0.0 + for file in allfiles + content = IOBuffer(read(file, String)) + nt = @timed collect(Tokenize.tokenize(content, Tokens.RawToken)) + time_taken += nt.time + allocated += nt.bytes + end + println("Tokenized $(length(allfiles)) files in $(time_taken) seconds with $(allocated/1e6) MB allocated") +end + +isempty(ARGS) && exit(0) + +using PProf, Profile + +# warm up profiler +let content = read(first(allfiles), String) + @profile collect(Tokenize.tokenize(content, Tokens.RawToken)) +end + +Profile.clear() +for file in allfiles + content = read(file, String) + @profile collect(Tokenize.tokenize(content, Tokens.RawToken)) +end +pprof() + +println("Press any key to exit...") +readline() From 662a22d125c2678956c3b7a68388056baa6fa692 Mon Sep 17 00:00:00 2001 From: Sebastian Pfitzner Date: Wed, 17 Nov 2021 12:54:58 +0100 Subject: [PATCH 0198/1109] iterate instead of collect in profile script --- JuliaSyntax/test/profile.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/JuliaSyntax/test/profile.jl b/JuliaSyntax/test/profile.jl index 756f3f666e5d0..fd6cd61aeb55f 100644 --- a/JuliaSyntax/test/profile.jl +++ b/JuliaSyntax/test/profile.jl @@ -1,6 +1,6 @@ using Tokenize -nt = @timed @eval(collect(Tokenize.tokenize("foo + bar"))) +nt = @timed @eval(collect(Tokenize.tokenize("foo + bar", Tokens.RawToken))) println("First run took $(nt.time) seconds with $(nt.bytes/1e6) MB allocated") srcdir = joinpath(Sys.BINDIR, Base.DATAROOTDIR, "..") @@ -17,7 +17,7 @@ end let time_taken = 0.0, allocated = 0.0 for file in allfiles content = IOBuffer(read(file, String)) - nt = @timed collect(Tokenize.tokenize(content, Tokens.RawToken)) + nt = @timed for t in Tokenize.tokenize(content, Tokens.RawToken) end time_taken += nt.time allocated += nt.bytes end @@ -27,7 +27,7 @@ end let time_taken = 0.0, allocated = 0.0 for file in allfiles content = IOBuffer(read(file, String)) - nt = @timed collect(Tokenize.tokenize(content, Tokens.RawToken)) + nt = @timed for t in Tokenize.tokenize(content, Tokens.RawToken) end time_taken += nt.time allocated += nt.bytes end From ba8f9784c1be2b60ab2cc2c8ca906510218e8900 Mon Sep 17 00:00:00 2001 From: Sebastian Pfitzner Date: Wed, 17 Nov 2021 16:43:31 +0100 Subject: [PATCH 0199/1109] speed up RawToken interpolation --- JuliaSyntax/src/lexer.jl | 49 +++++++++++++--------------------------- 1 file changed, 16 insertions(+), 33 deletions(-) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index 63973213f5ade..c2b01cfc16b78 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -65,28 +65,6 @@ function Lexer(io::IO_t, T::Type{TT} = Token) where {IO_t,TT <: AbstractToken} end Lexer(str::AbstractString, T::Type{TT} = Token) where TT <: AbstractToken = Lexer(IOBuffer(str), T) -function Base.copy(l::Lexer{IO_t, TT}) where IO_t where TT - return Lexer{IO_t, TT}( - l.io, - l.io_startpos, - - l.token_start_row, - l.token_start_col, - l.token_startpos, - - l.current_row, - l.current_col, - l.current_pos, - - l.last_token, - IOBuffer(), - l.chars, - l.charspos, - l.doread, - l.dotop - ) -end - @inline token_type(l::Lexer{IO_t, TT}) where {IO_t, TT} = TT """ @@ -272,10 +250,10 @@ Returns a `Token` of kind `kind` with contents `str` and starts a new `Token`. """ function emit(l::Lexer{IO_t,Token}, kind::Kind, err::TokenError = Tokens.NO_ERR) where IO_t suffix = false - if (kind == Tokens.IDENTIFIER || isliteral(kind) || kind == Tokens.COMMENT || kind == Tokens.WHITESPACE) - str = String(take!(l.charstore)) - elseif kind == Tokens.ERROR + if kind in (Tokens.ERROR, Tokens.STRING, Tokens.TRIPLE_STRING, Tokens.CMD, Tokens.TRIPLE_CMD) str = String(l.io.data[(l.token_startpos + 1):position(l)]) + elseif (kind == Tokens.IDENTIFIER || isliteral(kind) || kind == Tokens.COMMENT || kind == Tokens.WHITESPACE) + str = String(take!(l.charstore)) elseif optakessuffix(kind) str = "" while isopsuffix(peekchar(l)) @@ -850,17 +828,18 @@ function read_string(l::Lexer, kind::Tokens.Kind) return false elseif c == '(' o = 1 - l2 = copy(l) + last_token = l.last_token + token_start_row = l.token_start_row + token_start_col = l.token_start_col + token_startpos = l.token_startpos while o > 0 - prevpos_io = position(l2.io) - t = next_token(l2) - seek(l.io, prevpos_io) - - while position(l) < position(l2) - readchar(l) - end + t = next_token(l) if Tokens.kind(t) == Tokens.ENDMARKER + l.last_token = last_token + l.token_start_row = token_start_row + l.token_start_col = token_start_col + l.token_startpos = token_startpos return false elseif Tokens.kind(t) == Tokens.LPAREN o += 1 @@ -868,6 +847,10 @@ function read_string(l::Lexer, kind::Tokens.Kind) o -= 1 end end + l.last_token = last_token + l.token_start_row = token_start_row + l.token_start_col = token_start_col + l.token_startpos = token_startpos end end end From f3d6c250f34da193494b8e4a42c5e82aabff861e Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Sat, 4 Sep 2021 14:08:45 +1000 Subject: [PATCH 0200/1109] Initial experiments on top of Tokenize Trying to figure out what data structures to use for lexing and porting a small part of the flisp parser code. --- JuliaSyntax/Project.toml | 3 + JuliaSyntax/src/JuliaSyntax.jl | 236 ++++++++++++++++++++++++++++++++- 2 files changed, 238 insertions(+), 1 deletion(-) diff --git a/JuliaSyntax/Project.toml b/JuliaSyntax/Project.toml index 8272bfe7ad282..c4188436db7b5 100644 --- a/JuliaSyntax/Project.toml +++ b/JuliaSyntax/Project.toml @@ -3,6 +3,9 @@ uuid = "70703baa-626e-46a2-a12c-08ffd08c73b4" authors = ["Chris Foster and contributors"] version = "0.1.0" +[deps] +Tokenize = "0796e94c-ce3b-5d07-9a54-7f471281c624" + [compat] julia = "1" diff --git a/JuliaSyntax/src/JuliaSyntax.jl b/JuliaSyntax/src/JuliaSyntax.jl index c690938a021cf..ae4247f0bba9b 100644 --- a/JuliaSyntax/src/JuliaSyntax.jl +++ b/JuliaSyntax/src/JuliaSyntax.jl @@ -1,5 +1,239 @@ module JuliaSyntax -# Write your package code here. +#------------------------------------------------------------------------------- +# Token stream utilities + +import Tokenize +using Tokenize.Tokens: RawToken + +""" +We define a token type which is more suited to parsing than the basic token +types from Tokenize. +""" +struct SyntaxToken + raw::RawToken + leading_trivia::RawToken +end + +#= +function Base.show(io::IO, mime::MIME"text/plain", token) + show(io, mime, RawToken()) +end +=# + +kind(tok::SyntaxToken) = tok.raw.kind + +const EMPTY_RAW_TOKEN = RawToken() +const EMPTY_TOKEN = SyntaxToken(RawToken(), RawToken()) + +""" +TokenStream wraps the lexer from Tokenize.jl with a short putback buffer and +condenses syntactically irrelevant whitespace tokens into "syntax trivia" which +are attached to other tokens. +""" +mutable struct TokenStream + lexer::Tokenize.Lexers.Lexer{IOBuffer,RawToken} + # We buffer up to two tokens here, with `next2` taken before `next1`. It + # suffices to support only a single putback token (which always goes into + # `next2`). The presence of a valid token in `next2` does not imply there's + # one in `next1`. + next1::SyntaxToken + next2::SyntaxToken + hasnext1::Bool + hasnext2::Bool +end + +function TokenStream(code) + lexer = Tokenize.tokenize(code, RawToken) + TokenStream(lexer, EMPTY_TOKEN, EMPTY_TOKEN, false, false) +end + +function Base.show(io::IO, mime::MIME"text/plain", ts::TokenStream) + print(io, TokenStream, ":\n lexer = ") + show(io, mime, ts.lexer) + if ts.hasnext2 + print(io, "\n next2 = ", ts.next2) + end + if ts.hasnext1 + print(io, "\n next1 = ", ts.next1) + end +end + +# Iterator interface +Base.IteratorSize(::Type{TokenStream}) = Base.SizeUnknown() +Base.IteratorEltype(::Type{TokenStream}) = Base.HasEltype() +Base.eltype(::Type{TokenStream}) = SyntaxToken + +function Base.iterate(ts::TokenStream, end_state=false) + end_state && return nothing + t = take_token!(ts) + return t, kind(t) == Tokens.ENDMARKER +end + +function _read_raw_token(lexer::Tokenize.Lexers.Lexer) + c = Tokenize.Lexers.peekchar(lexer) + if isspace(c) + # We lex whitespace slightly differently from Tokenize.jl, as newlines + # are syntactically significant + if Tokenize.Lexers.accept(lexer, '\n') + return Tokenize.Lexers.emit(lexer, Tokens.NEWLINE_WS) + else + Tokenize.Lexers.readon(lexer) + Tokenize.Lexers.accept_batch(lexer, c->isspace(c) && c != '\n') + return Tokenize.Lexers.emit(lexer, Tokens.WHITESPACE) + end + else + return Tokenize.Lexers.next_token(lexer) + end +end + +function _read_token(lexer::Tokenize.Lexers.Lexer) + # No token - do the actual work of taking a token from the lexer + leading_trivia = EMPTY_RAW_TOKEN + raw = _read_raw_token(lexer) + if Tokens.exactkind(raw) == Tokens.WHITESPACE + leading_trivia = raw + raw = _read_raw_token(lexer) + end + return SyntaxToken(raw, leading_trivia) +end + +# Return next token in the stream, but don't remove it. +function peek_token(ts::TokenStream) + ts.hasnext2 && return ts.next2 + ts.hasnext1 && return ts.next1 + ts.next1 = _read_token(ts.lexer) + ts.hasnext1 = true + return ts.next1 +end + +# Like peek_token, but +# * EOF becomes an error +# * Newlines tokens are gobbled (TODO!) +function require_token(ts::TokenStream) + tok = peek_token(ts) + if kind(tok) == Tokens.ENDMARKER + error("incomplete: premature end of input") + end + return tok +end + +# Remove next token from from the stream and return it. +function take_token!(ts::TokenStream) + if ts.hasnext2 + ts.hasnext2 = false + return ts.next2 + end + if ts.hasnext1 + ts.hasnext1 = false + return ts.next1 + end + # This line is a departure from the scheme parser, which requires + # peek_token to be called + return _read_token(ts.lexer) +end + +function put_back!(ts::TokenStream, tok::RawToken) + ts.hasnext2 || error("Cannot put back two tokens") + ts.next2 = tok +end + +function had_space(ts::TokenStream) +end + +#------------------------------------------------------------------------------- + +""" +ParseState carries parser context as we recursively descend into the parse +tree. For example, normally `x -y` means `(x) - (y)`, but when parsing matrix +literals we're in "whitespace sensitive" mode, and `[x -y]` means [(x) (-y)]. +""" +struct ParseState + tokens::TokenStream + + # Disable range colon for parsing ternary conditional operator + range_colon_enabled::Bool + # In space-sensitive mode "x -y" is 2 expressions, not a subtraction + space_sensitive::Bool + # Seeing `for` stops parsing macro arguments and makes a generator + for_generator::Bool + # Treat 'end' like a normal symbol instead of a reserved word + end_symbol::Bool + # Treat newline like ordinary whitespace instead of as a potential separator + whitespace_newline::Bool + # Enable parsing `where` with high precedence + where_enabled::Bool +end + +# Normal context +function ParseState(tokens::TokenStream) + ParseState(tokens, true, false, true, false, false, false) +end + +function ParseState(ps::ParseState; range_colon_enabled=nothing, + space_sensitive=nothing, for_generator=nothing, + end_symbol=nothing, whitespace_newline=nothing, + where_enabled=nothing) + ParseState(ps.tokens, + range_colon_enabled === nothing ? ps.range_colon_enabled : range_colon_enabled, + space_sensitive === nothing ? ps.space_sensitive : space_sensitive, + for_generator === nothing ? ps.for_generator : for_generator, + end_symbol === nothing ? ps.end_symbol : end_symbol, + whitespace_newline === nothing ? ps.whitespace_newline : whitespace_newline, + where_enabled === nothing ? ps.where_enabled : where_enabled) +end + +take_token!(ps::ParseState) = take_token!(ps.tokens) +require_token(ps::ParseState) = require_token(ps.tokens) +peek_token(ps::ParseState) = peek_token(ps.tokens) +put_back!(ps::ParseState, tok::RawToken) = put_back!(ps.tokens, tok) + + +#------------------------------------------------------------------------------- +# Parser + +function is_closing_token(ps::ParseState, tok) + k = kind(tok) + return k in (Tokens.ELSE, Tokens.ELSEIF, Tokens.CATCH, Tokens.FINALLY, + Tokens.COMMA, Tokens.LPAREN, Tokens.RSQUARE, Tokens.RBRACE, + Tokens.SEMICOLON, Tokens.ENDMARKER) || + k == Tokens.END && !ps.end_symbol +end + +function has_whitespace_prefix(tok::SyntaxToken) + tok.leading_trivia.kind == Tokens.WHITESPACE +end + + +# Parse numbers, identifiers, parenthesized expressions, lists, vectors, etc. +function parse_atom(ps::ParseState; checked::Bool=true) + tok = require_token(ps) + tok_kind = kind(tok) + if tok_kind == Tokens.COLON # symbol/expression quote + take_token!(ps) + next = peek_token(ps) + if is_closing_token(ps, next) && (kind(next) != Tokens.KEYWORD || + has_whitespace_prefix(next)) + return Symbol(":") # FIXME: CST NODE ??? + elseif has_whitespace_prefix(next) + error("whitespace not allowed after \":\" used for quoting") + elseif kind(next) == Tokens.NEWLINE_WS + error("newline not allowed after \":\" used for quoting") + else + # Being inside quote makes `end` non-special again. issue #27690 + ps1 = ParseState(ps, end_symbol=false) + return Expr(:quote, parse_atom(ps1, checked=false)) + end + elseif tok_kind == Tokens.EQ # misplaced = + error("unexpected `=`") + elseif tok_kind == Tokens.IDENTIFIER + if checked + # FIXME: Check identifier names + end + take_token!(ps) + else + return :heloooo + end +end end From ab214a1c106040de32a49101a642f78d9d58951f Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Mon, 22 Nov 2021 14:27:15 +1000 Subject: [PATCH 0201/1109] Friendlier more literal names for token kinds Parser source which tests token kinds is easier to read when those tokens are represented as string literals; the flisp code uses character literals to great effect for this purpose. In Julia code we can do even better by introducing a string macro for these purposes. Introduce TK"]" for the token kind of the ']' token and a TokenKinds module to hold the mapping between Symbols and kinds. --- JuliaSyntax/README.md | 13 + JuliaSyntax/src/JuliaSyntax.jl | 45 +- JuliaSyntax/src/token_kinds.jl | 820 +++++++++++++++++++++++++++++++++ 3 files changed, 862 insertions(+), 16 deletions(-) create mode 100644 JuliaSyntax/src/token_kinds.jl diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index 64a7267cc53ad..76f27023b2f58 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -1,3 +1,16 @@ # JuliaSyntax [![Build Status](https://github.com/c42f/JuliaSyntax.jl/workflows/CI/badge.svg)](https://github.com/c42f/JuliaSyntax.jl/actions) + +Yet another Julia frontend, written in Julia. + +Goals: +* Parse Julia code with precise source mapping (concrete syntax trees) +* Avoid worrying about how much work this will be 😅 + +Nice to have: +* Speedy enough for interactive editing +* Production quality error recovery and reporting +* "Compilation as an API" to support all sorts of tooling +* Go further than parsing - macro expansion, syntax desugaring and scope analysis +* Code which is correct, fast and understandable diff --git a/JuliaSyntax/src/JuliaSyntax.jl b/JuliaSyntax/src/JuliaSyntax.jl index ae4247f0bba9b..f5ac97e0d5b0f 100644 --- a/JuliaSyntax/src/JuliaSyntax.jl +++ b/JuliaSyntax/src/JuliaSyntax.jl @@ -4,7 +4,20 @@ module JuliaSyntax # Token stream utilities import Tokenize -using Tokenize.Tokens: RawToken +using Tokenize.Tokens: Tokens, RawToken + +include("token_kinds.jl") + +""" + TK"s" + +The full token kind of a string "s". For example, TK")" is the kind of the +right parenthesis token. +""" +macro TK_str(str) + name = Symbol(str) + return :(TokenKinds.$name) +end """ We define a token type which is more suited to parsing than the basic token @@ -67,7 +80,7 @@ Base.eltype(::Type{TokenStream}) = SyntaxToken function Base.iterate(ts::TokenStream, end_state=false) end_state && return nothing t = take_token!(ts) - return t, kind(t) == Tokens.ENDMARKER + return t, kind(t) == TK"ENDMARKER" end function _read_raw_token(lexer::Tokenize.Lexers.Lexer) @@ -76,11 +89,11 @@ function _read_raw_token(lexer::Tokenize.Lexers.Lexer) # We lex whitespace slightly differently from Tokenize.jl, as newlines # are syntactically significant if Tokenize.Lexers.accept(lexer, '\n') - return Tokenize.Lexers.emit(lexer, Tokens.NEWLINE_WS) + return Tokenize.Lexers.emit(lexer, TK"NEWLINE_WS") else Tokenize.Lexers.readon(lexer) Tokenize.Lexers.accept_batch(lexer, c->isspace(c) && c != '\n') - return Tokenize.Lexers.emit(lexer, Tokens.WHITESPACE) + return Tokenize.Lexers.emit(lexer, TK"WHITESPACE") end else return Tokenize.Lexers.next_token(lexer) @@ -91,7 +104,7 @@ function _read_token(lexer::Tokenize.Lexers.Lexer) # No token - do the actual work of taking a token from the lexer leading_trivia = EMPTY_RAW_TOKEN raw = _read_raw_token(lexer) - if Tokens.exactkind(raw) == Tokens.WHITESPACE + if Tokens.exactkind(raw) == TK"WHITESPACE" leading_trivia = raw raw = _read_raw_token(lexer) end @@ -112,7 +125,7 @@ end # * Newlines tokens are gobbled (TODO!) function require_token(ts::TokenStream) tok = peek_token(ts) - if kind(tok) == Tokens.ENDMARKER + if kind(tok) == TK"ENDMARKER" error("incomplete: premature end of input") end return tok @@ -194,14 +207,14 @@ put_back!(ps::ParseState, tok::RawToken) = put_back!(ps.tokens, tok) function is_closing_token(ps::ParseState, tok) k = kind(tok) - return k in (Tokens.ELSE, Tokens.ELSEIF, Tokens.CATCH, Tokens.FINALLY, - Tokens.COMMA, Tokens.LPAREN, Tokens.RSQUARE, Tokens.RBRACE, - Tokens.SEMICOLON, Tokens.ENDMARKER) || - k == Tokens.END && !ps.end_symbol + return k in (TK"else", TK"elseif", TK"catch", TK"finally", + TK",", TK"(", TK"]", TK"}", TK";", + TK"ENDMARKER") || + k == TK"END" && !ps.end_symbol end function has_whitespace_prefix(tok::SyntaxToken) - tok.leading_trivia.kind == Tokens.WHITESPACE + tok.leading_trivia.kind == TK"WHITESPACE" end @@ -209,24 +222,24 @@ end function parse_atom(ps::ParseState; checked::Bool=true) tok = require_token(ps) tok_kind = kind(tok) - if tok_kind == Tokens.COLON # symbol/expression quote + if tok_kind == TK":" # symbol/expression quote take_token!(ps) next = peek_token(ps) - if is_closing_token(ps, next) && (kind(next) != Tokens.KEYWORD || + if is_closing_token(ps, next) && (kind(next) != TK"KEYWORD" || has_whitespace_prefix(next)) return Symbol(":") # FIXME: CST NODE ??? elseif has_whitespace_prefix(next) error("whitespace not allowed after \":\" used for quoting") - elseif kind(next) == Tokens.NEWLINE_WS + elseif kind(next) == TK"NEWLINE_WS" error("newline not allowed after \":\" used for quoting") else # Being inside quote makes `end` non-special again. issue #27690 ps1 = ParseState(ps, end_symbol=false) return Expr(:quote, parse_atom(ps1, checked=false)) end - elseif tok_kind == Tokens.EQ # misplaced = + elseif tok_kind == TK"=" # misplaced = error("unexpected `=`") - elseif tok_kind == Tokens.IDENTIFIER + elseif tok_kind == TK"IDENTIFIER" if checked # FIXME: Check identifier names end diff --git a/JuliaSyntax/src/token_kinds.jl b/JuliaSyntax/src/token_kinds.jl new file mode 100644 index 0000000000000..bde19ce82c6d2 --- /dev/null +++ b/JuliaSyntax/src/token_kinds.jl @@ -0,0 +1,820 @@ +""" + A module to give Tokenize tokens better names! +""" +baremodule TokenKinds + +import Tokenize +const _T = Tokenize.Tokens + +const ENDMARKER = _T.ENDMARKER +const ERROR = _T.ERROR +const COMMENT = _T.COMMENT +const WHITESPACE = _T.WHITESPACE +const IDENTIFIER = _T.IDENTIFIER +const var"@" = _T.AT_SIGN +const var"," = _T.COMMA +const var";" = _T.SEMICOLON + +const BEGIN_KEYWORDS = _T.begin_keywords +const KEYWORD = _T.KEYWORD +const var"abstract" = _T.ABSTRACT +const var"baremodule" = _T.BAREMODULE +const var"begin" = _T.BEGIN +const var"break" = _T.BREAK +const var"catch" = _T.CATCH +const var"const" = _T.CONST +const var"continue" = _T.CONTINUE +const var"do" = _T.DO +const var"else" = _T.ELSE +const var"elseif" = _T.ELSEIF +const var"end" = _T.END +const var"export" = _T.EXPORT +const var"finally" = _T.FINALLY +const var"for" = _T.FOR +const var"function" = _T.FUNCTION +const var"global" = _T.GLOBAL +const var"if" = _T.IF +const var"import" = _T.IMPORT +const var"importall" = _T.IMPORTALL +const var"let" = _T.LET +const var"local" = _T.LOCAL +const var"macro" = _T.MACRO +const var"module" = _T.MODULE +const var"mutable" = _T.MUTABLE +const var"new" = _T.NEW +const var"outer" = _T.OUTER +const var"primitive" = _T.PRIMITIVE +const var"quote" = _T.QUOTE +const var"return" = _T.RETURN +const var"struct" = _T.STRUCT +const var"try" = _T.TRY +const var"type" = _T.TYPE +const var"using" = _T.USING +const var"while" = _T.WHILE +const END_KEYWORDS = _T.end_keywords + +const BEGIN_CSTPARSER = _T.begin_cstparser +const invisible_brackets = _T.INVISIBLE_BRACKETS +const nothing = _T.NOTHING +const ws = _T.WS +const semicolon_ws = _T.SEMICOLON_WS +const newline_ws = _T.NEWLINE_WS +const empty_ws = _T.EMPTY_WS +const END_CSTPARSER = _T.end_cstparser + +const BEGIN_LITERAL = _T.begin_literal +const LITERAL = _T.LITERAL +const integer = _T.INTEGER +const bin_int = _T.BIN_INT +const hex_int = _T.HEX_INT +const oct_int = _T.OCT_INT +const float = _T.FLOAT +const string = _T.STRING +const triple_string = _T.TRIPLE_STRING +const char = _T.CHAR +const cmd = _T.CMD +const triple_cmd = _T.TRIPLE_CMD +const var"true" = _T.TRUE +const var"false" = _T.FALSE +const END_LITERAL = _T.end_literal + +const BEGIN_DELIMITERS = _T.begin_delimiters +const var"[" = _T.LSQUARE +const var"]" = _T.RSQUARE +const var"{" = _T.LBRACE +const var"}" = _T.RBRACE +const var"(" = _T.LPAREN +const var")" = _T.RPAREN +const END_DELIMITERS = _T.end_delimiters + +const BEGIN_OPS = _T.begin_ops +const OP = _T.OP +const var"..." = _T.DDDOT + +# Level 1 +const BEGIN_ASSIGNMENTS = _T.begin_assignments +const var"=" = _T.EQ +const var"+=" = _T.PLUS_EQ +const var"-=" = _T.MINUS_EQ +const var"*=" = _T.STAR_EQ +const var"/=" = _T.FWD_SLASH_EQ +const var"//=" = _T.FWDFWD_SLASH_EQ +const var"|=" = _T.OR_EQ +const var"^=" = _T.CIRCUMFLEX_EQ +const var"÷=" = _T.DIVISION_EQ +const var"%=" = _T.REM_EQ +const var"<<=" = _T.LBITSHIFT_EQ +const var">>=" = _T.RBITSHIFT_EQ +const var">>>=" = _T.UNSIGNED_BITSHIFT_EQ +const var"\=" = _T.BACKSLASH_EQ +const var"&=" = _T.AND_EQ +const var":=" = _T.COLON_EQ +const var"~" = _T.APPROX +const var"$=" = _T.EX_OR_EQ +const var"⊻=" = _T.XOR_EQ +const END_ASSIGNMENTS = _T.end_assignments + +const BEGIN_PAIRARROW = _T.begin_pairarrow +const var"=>" = _T.PAIR_ARROW +const END_PAIRARROW = _T.end_pairarrow + +# Level 2 +const BEGIN_CONDITIONAL = _T.begin_conditional +const var"?" = _T.CONDITIONAL +const END_CONDITIONAL = _T.end_conditional + +# Level 3 +const BEGIN_ARROW = _T.begin_arrow +const var"-->" = _T.RIGHT_ARROW +const var"<--" = _T.LEFT_ARROW +const var"<-->" = _T.DOUBLE_ARROW +const var"←" = _T.LEFTWARDS_ARROW +const var"→" = _T.RIGHTWARDS_ARROW +const var"↔" = _T.LEFT_RIGHT_ARROW +const var"↚" = _T.LEFTWARDS_ARROW_WITH_STROKE +const var"↛" = _T.RIGHTWARDS_ARROW_WITH_STROKE +const var"↞" = _T.LEFTWARDS_TWO_HEADED_ARROW +const var"↠" = _T.RIGHTWARDS_TWO_HEADED_ARROW +const var"↢" = _T.LEFTWARDS_ARROW_WITH_TAIL +const var"↣" = _T.RIGHTWARDS_ARROW_WITH_TAIL +const var"↤" = _T.LEFTWARDS_ARROW_FROM_BAR +const var"↦" = _T.RIGHTWARDS_ARROW_FROM_BAR +const var"↮" = _T.LEFT_RIGHT_ARROW_WITH_STROKE +const var"⇎" = _T.LEFT_RIGHT_DOUBLE_ARROW_WITH_STROKE +const var"⇍" = _T.LEFTWARDS_DOUBLE_ARROW_WITH_STROKE +const var"⇏" = _T.RIGHTWARDS_DOUBLE_ARROW_WITH_STROKE +const var"⇐" = _T.LEFTWARDS_DOUBLE_ARROW +const var"⇒" = _T.RIGHTWARDS_DOUBLE_ARROW +const var"⇔" = _T.LEFT_RIGHT_DOUBLE_ARROW +const var"⇴" = _T.RIGHT_ARROW_WITH_SMALL_CIRCLE +const var"⇶" = _T.THREE_RIGHTWARDS_ARROWS +const var"⇷" = _T.LEFTWARDS_ARROW_WITH_VERTICAL_STROKE +const var"⇸" = _T.RIGHTWARDS_ARROW_WITH_VERTICAL_STROKE +const var"⇹" = _T.LEFT_RIGHT_ARROW_WITH_VERTICAL_STROKE +const var"⇺" = _T.LEFTWARDS_ARROW_WITH_DOUBLE_VERTICAL_STROKE +const var"⇻" = _T.RIGHTWARDS_ARROW_WITH_DOUBLE_VERTICAL_STROKE +const var"⇼" = _T.LEFT_RIGHT_ARROW_WITH_DOUBLE_VERTICAL_STROKE +const var"⇽" = _T.LEFTWARDS_OPEN_HEADED_ARROW +const var"⇾" = _T.RIGHTWARDS_OPEN_HEADED_ARROW +const var"⇿" = _T.LEFT_RIGHT_OPEN_HEADED_ARROW +const var"⟵" = _T.LONG_LEFTWARDS_ARROW +const var"⟶" = _T.LONG_RIGHTWARDS_ARROW +const var"⟷" = _T.LONG_LEFT_RIGHT_ARROW +const var"⟹" = _T.LONG_RIGHTWARDS_DOUBLE_ARROW +const var"⟺" = _T.LONG_LEFT_RIGHT_DOUBLE_ARROW +const var"⟻" = _T.LONG_LEFTWARDS_ARROW_FROM_BAR +const var"⟼" = _T.LONG_RIGHTWARDS_ARROW_FROM_BAR +const var"⟽" = _T.LONG_LEFTWARDS_DOUBLE_ARROW_FROM_BAR +const var"⟾" = _T.LONG_RIGHTWARDS_DOUBLE_ARROW_FROM_BAR +const var"⟿" = _T.LONG_RIGHTWARDS_SQUIGGLE_ARROW +const var"⤀" = _T.RIGHTWARDS_TWO_HEADED_ARROW_WITH_VERTICAL_STROKE +const var"⤁" = _T.RIGHTWARDS_TWO_HEADED_ARROW_WITH_DOUBLE_VERTICAL_STROKE +const var"⤂" = _T.LEFTWARDS_DOUBLE_ARROW_WITH_VERTICAL_STROKE +const var"⤃" = _T.RIGHTWARDS_DOUBLE_ARROW_WITH_VERTICAL_STROKE +const var"⤄" = _T.LEFT_RIGHT_DOUBLE_ARROW_WITH_VERTICAL_STROKE +const var"⤅" = _T.RIGHTWARDS_TWO_HEADED_ARROW_FROM_BAR +const var"⤆" = _T.LEFTWARDS_DOUBLE_ARROW_FROM_BAR +const var"⤇" = _T.RIGHTWARDS_DOUBLE_ARROW_FROM_BAR +const var"⤌" = _T.LEFTWARDS_DOUBLE_DASH_ARROW +const var"⤍" = _T.RIGHTWARDS_DOUBLE_DASH_ARROW +const var"⤎" = _T.LEFTWARDS_TRIPLE_DASH_ARROW +const var"⤏" = _T.RIGHTWARDS_TRIPLE_DASH_ARROW +const var"⤐" = _T.RIGHTWARDS_TWO_HEADED_TRIPLE_DASH_ARROW +const var"⤑" = _T.RIGHTWARDS_ARROW_WITH_DOTTED_STEM +const var"⤔" = _T.RIGHTWARDS_ARROW_WITH_TAIL_WITH_VERTICAL_STROKE +const var"⤕" = _T.RIGHTWARDS_ARROW_WITH_TAIL_WITH_DOUBLE_VERTICAL_STROKE +const var"⤖" = _T.RIGHTWARDS_TWO_HEADED_ARROW_WITH_TAIL +const var"⤗" = _T.RIGHTWARDS_TWO_HEADED_ARROW_WITH_TAIL_WITH_VERTICAL_STROKE +const var"⤘" = _T.RIGHTWARDS_TWO_HEADED_ARROW_WITH_TAIL_WITH_DOUBLE_VERTICAL_STROKE +const var"⤝" = _T.LEFTWARDS_ARROW_TO_BLACK_DIAMOND +const var"⤞" = _T.RIGHTWARDS_ARROW_TO_BLACK_DIAMOND +const var"⤟" = _T.LEFTWARDS_ARROW_FROM_BAR_TO_BLACK_DIAMOND +const var"⤠" = _T.RIGHTWARDS_ARROW_FROM_BAR_TO_BLACK_DIAMOND +const var"⥄" = _T.SHORT_RIGHTWARDS_ARROW_ABOVE_LEFTWARDS_ARROW +const var"⥅" = _T.RIGHTWARDS_ARROW_WITH_PLUS_BELOW +const var"⥆" = _T.LEFTWARDS_ARROW_WITH_PLUS_BELOW +const var"⥇" = _T.RIGHTWARDS_ARROW_THROUGH_X +const var"⥈" = _T.LEFT_RIGHT_ARROW_THROUGH_SMALL_CIRCLE +const var"⥊" = _T.LEFT_BARB_UP_RIGHT_BARB_DOWN_HARPOON +const var"⥋" = _T.LEFT_BARB_DOWN_RIGHT_BARB_UP_HARPOON +const var"⥎" = _T.LEFT_BARB_UP_RIGHT_BARB_UP_HARPOON +const var"⥐" = _T.LEFT_BARB_DOWN_RIGHT_BARB_DOWN_HARPOON +const var"⥒" = _T.LEFTWARDS_HARPOON_WITH_BARB_UP_TO_BAR +const var"⥓" = _T.RIGHTWARDS_HARPOON_WITH_BARB_UP_TO_BAR +const var"⥖" = _T.LEFTWARDS_HARPOON_WITH_BARB_DOWN_TO_BAR +const var"⥗" = _T.RIGHTWARDS_HARPOON_WITH_BARB_DOWN_TO_BAR +const var"⥚" = _T.LEFTWARDS_HARPOON_WITH_BARB_UP_FROM_BAR +const var"⥛" = _T.RIGHTWARDS_HARPOON_WITH_BARB_UP_FROM_BAR +const var"⥞" = _T.LEFTWARDS_HARPOON_WITH_BARB_DOWN_FROM_BAR +const var"⥟" = _T.RIGHTWARDS_HARPOON_WITH_BARB_DOWN_FROM_BAR +const var"⥢" = _T.LEFTWARDS_HARPOON_WITH_BARB_UP_ABOVE_LEFTWARDS_HARPOON_WITH_BARB_DOWN +const var"⥤" = _T.RIGHTWARDS_HARPOON_WITH_BARB_UP_ABOVE_RIGHTWARDS_HARPOON_WITH_BARB_DOWN +const var"⥦" = _T.LEFTWARDS_HARPOON_WITH_BARB_UP_ABOVE_RIGHTWARDS_HARPOON_WITH_BARB_UP +const var"⥧" = _T.LEFTWARDS_HARPOON_WITH_BARB_DOWN_ABOVE_RIGHTWARDS_HARPOON_WITH_BARB_DOWN +const var"⥨" = _T.RIGHTWARDS_HARPOON_WITH_BARB_UP_ABOVE_LEFTWARDS_HARPOON_WITH_BARB_UP +const var"⥩" = _T.RIGHTWARDS_HARPOON_WITH_BARB_DOWN_ABOVE_LEFTWARDS_HARPOON_WITH_BARB_DOWN +const var"⥪" = _T.LEFTWARDS_HARPOON_WITH_BARB_UP_ABOVE_LONG_DASH +const var"⥫" = _T.LEFTWARDS_HARPOON_WITH_BARB_DOWN_BELOW_LONG_DASH +const var"⥬" = _T.RIGHTWARDS_HARPOON_WITH_BARB_UP_ABOVE_LONG_DASH +const var"⥭" = _T.RIGHTWARDS_HARPOON_WITH_BARB_DOWN_BELOW_LONG_DASH +const var"⥰" = _T.RIGHT_DOUBLE_ARROW_WITH_ROUNDED_HEAD +const var"⧴" = _T.RULE_DELAYED +const var"⬱" = _T.THREE_LEFTWARDS_ARROWS +const var"⬰" = _T.LEFT_ARROW_WITH_SMALL_CIRCLE +const var"⬲" = _T.LEFT_ARROW_WITH_CIRCLED_PLUS +const var"⬳" = _T.LONG_LEFTWARDS_SQUIGGLE_ARROW +const var"⬴" = _T.LEFTWARDS_TWO_HEADED_ARROW_WITH_VERTICAL_STROKE +const var"⬵" = _T.LEFTWARDS_TWO_HEADED_ARROW_WITH_DOUBLE_VERTICAL_STROKE +const var"⬶" = _T.LEFTWARDS_TWO_HEADED_ARROW_FROM_BAR +const var"⬷" = _T.LEFTWARDS_TWO_HEADED_TRIPLE_DASH_ARROW +const var"⬸" = _T.LEFTWARDS_ARROW_WITH_DOTTED_STEM +const var"⬹" = _T.LEFTWARDS_ARROW_WITH_TAIL_WITH_VERTICAL_STROKE +const var"⬺" = _T.LEFTWARDS_ARROW_WITH_TAIL_WITH_DOUBLE_VERTICAL_STROKE +const var"⬻" = _T.LEFTWARDS_TWO_HEADED_ARROW_WITH_TAIL +const var"⬼" = _T.LEFTWARDS_TWO_HEADED_ARROW_WITH_TAIL_WITH_VERTICAL_STROKE +const var"⬽" = _T.LEFTWARDS_TWO_HEADED_ARROW_WITH_TAIL_WITH_DOUBLE_VERTICAL_STROKE +const var"⬾" = _T.LEFTWARDS_ARROW_THROUGH_X +const var"⬿" = _T.WAVE_ARROW_POINTING_DIRECTLY_LEFT +const var"⭀" = _T.EQUALS_SIGN_ABOVE_LEFTWARDS_ARROW +const var"⭁" = _T.REVERSE_TILDE_OPERATOR_ABOVE_LEFTWARDS_ARROW +const var"⭂" = _T.LEFTWARDS_ARROW_ABOVE_REVERSE_ALMOST_EQUAL_TO +const var"⭃" = _T.RIGHTWARDS_ARROW_THROUGH_GREATER_THAN +const var"⭄" = _T.RIGHTWARDS_ARROW_THROUGH_SUPERSET +const var"⭇" = _T.REVERSE_TILDE_OPERATOR_ABOVE_RIGHTWARDS_ARROW +const var"⭈" = _T.RIGHTWARDS_ARROW_ABOVE_REVERSE_ALMOST_EQUAL_TO +const var"⭉" = _T.TILDE_OPERATOR_ABOVE_LEFTWARDS_ARROW +const var"⭊" = _T.LEFTWARDS_ARROW_ABOVE_ALMOST_EQUAL_TO +const var"⭋" = _T.LEFTWARDS_ARROW_ABOVE_REVERSE_TILDE_OPERATOR +const var"⭌" = _T.RIGHTWARDS_ARROW_ABOVE_REVERSE_TILDE_OPERATOR +const var"←" = _T.HALFWIDTH_LEFTWARDS_ARROW +const var"→" = _T.HALFWIDTH_RIGHTWARDS_ARROW +const var"↻" = _T.CIRCLE_ARROW_RIGHT +const var"⇜" = _T.LEFT_SQUIGGLE_ARROW +const var"⇝" = _T.RIGHT_SQUIGGLE_ARROW +const var"↜" = _T.LEFT_WAVE_ARROW +const var"↝" = _T.RIGHT_WAVE_ARROW +const var"↩" = _T.LEFTWARDS_ARROW_WITH_HOOK +const var"↪" = _T.RIGHTWARDS_ARROW_WITH_HOOK +const var"↫" = _T.LOOP_ARROW_LEFT +const var"↬" = _T.LOOP_ARROW_RIGHT +const var"↼" = _T.LEFT_HARPOON_UP +const var"↽" = _T.LEFT_HARPOON_DOWN +const var"⇀" = _T.RIGHT_HARPOON_UP +const var"⇁" = _T.RIGHT_HARPOON_DOWN +const var"⇄" = _T.RIGHT_LEFT_ARROWS +const var"⇆" = _T.LEFT_RIGHT_ARROWS +const var"⇇" = _T.LEFT_LEFT_ARROWS +const var"⇉" = _T.RIGHT_RIGHT_ARROWS +const var"⇋" = _T.LEFT_RIGHT_HARPOONS +const var"⇌" = _T.RIGHT_LEFT_HARPOONS +const var"⇚" = _T.L_LEFT_ARROW +const var"⇛" = _T.R_RIGHT_ARROW +const var"⇠" = _T.LEFT_DASH_ARROW +const var"⇢" = _T.RIGHT_DASH_ARROW +const var"↷" = _T.CURVE_ARROW_RIGHT +const var"↶" = _T.CURVE_ARROW_LEFT +const var"↺" = _T.CIRCLE_ARROW_LEFT +const END_ARROW = _T.end_arrow + +# Level 4 +const BEGIN_LAZYOR = _T.begin_lazyor +const var"||" = _T.LAZY_OR +const END_LAZYOR = _T.end_lazyor + +# Level 5 +const BEGIN_LAZYAND = _T.begin_lazyand +const var"&&" = _T.LAZY_AND +const END_LAZYAND = _T.end_lazyand + +# Level 6 +const BEGIN_COMPARISON = _T.begin_comparison +const var"<:" = _T.ISSUBTYPE +const var">:" = _T.ISSUPERTYPE +const var">" = _T.GREATER +const var"<" = _T.LESS +const var">=" = _T.GREATER_EQ +const var"≥" = _T.GREATER_THAN_OR_EQUAL_TO +const var"<=" = _T.LESS_EQ +const var"≤" = _T.LESS_THAN_OR_EQUAL_TO +const var"==" = _T.EQEQ +const var"===" = _T.EQEQEQ +const var"≡" = _T.IDENTICAL_TO +const var"!=" = _T.NOT_EQ +const var"≠" = _T.NOT_EQUAL_TO +const var"!==" = _T.NOT_IS +const var"≢" = _T.NOT_IDENTICAL_TO +const var"∈" = _T.ELEMENT_OF +const var"in" = _T.IN +const var"isa" = _T.ISA +const var"∉" = _T.NOT_AN_ELEMENT_OF +const var"∋" = _T.CONTAINS_AS_MEMBER +const var"∌" = _T.DOES_NOT_CONTAIN_AS_MEMBER +const var"⊆" = _T.SUBSET_OF_OR_EQUAL_TO +const var"⊈" = _T.NEITHER_A_SUBSET_OF_NOR_EQUAL_TO +const var"⊂" = _T.SUBSET_OF +const var"⊄" = _T.NOT_A_SUBSET_OF +const var"⊊" = _T.SUBSET_OF_WITH_NOT_EQUAL_TO +const var"∝" = _T.PROPORTIONAL_TO +const var"∊" = _T.SMALL_ELEMENT_OF +const var"∍" = _T.SMALL_CONTAINS_AS_MEMBER +const var"∥" = _T.PARALLEL_TO +const var"∦" = _T.NOT_PARALLEL_TO +const var"∷" = _T.PROPORTION +const var"∺" = _T.GEOMETRIC_PROPORTION +const var"∻" = _T.HOMOTHETIC +const var"∽" = _T.REVERSED_TILDE +const var"∾" = _T.INVERTED_LAZY_S +const var"≁" = _T.NOT_TILDE +const var"≃" = _T.ASYMPTOTICALLY_EQUAL_TO +const var"≄" = _T.NOT_ASYMPTOTICALLY_EQUAL_TO +const var"≅" = _T.APPROXIMATELY_EQUAL_TO +const var"≆" = _T.APPROXIMATELY_BUT_NOT_ACTUALLY_EQUAL_TO +const var"≇" = _T.NEITHER_APPROXIMATELY_NOR_ACTUALLY_EQUAL_TO +const var"≈" = _T.ALMOST_EQUAL_TO +const var"≉" = _T.NOT_ALMOST_EQUAL_TO +const var"≊" = _T.ALMOST_EQUAL_OR_EQUAL_TO +const var"≋" = _T.TRIPLE_TILDE +const var"≌" = _T.ALL_EQUAL_TO +const var"≍" = _T.EQUIVALENT_TO +const var"≎" = _T.GEOMETRICALLY_EQUIVALENT_TO +const var"≐" = _T.APPROACHES_THE_LIMIT +const var"≑" = _T.GEOMETRICALLY_EQUAL_TO +const var"≒" = _T.APPROXIMATELY_EQUAL_TO_OR_THE_IMAGE_OF +const var"≓" = _T.IMAGE_OF_OR_APPROXIMATELY_EQUAL_TO +const var"≔" = _T.COLON_EQUALS +const var"≕" = _T.EQUALS_COLON +const var"≖" = _T.RING_IN_EQUAL_TO +const var"≗" = _T.RING_EQUAL_TO +const var"≘" = _T.CORRESPONDS_TO +const var"≙" = _T.ESTIMATES +const var"≚" = _T.EQUIANGULAR_TO +const var"≛" = _T.STAR_EQUALS +const var"≜" = _T.DELTA_EQUAL_TO +const var"≝" = _T.EQUAL_TO_BY_DEFINITION +const var"≞" = _T.MEASURED_BY +const var"≟" = _T.QUESTIONED_EQUAL_TO +const var"≣" = _T.STRICTLY_EQUIVALENT_TO +const var"≦" = _T.LESS_THAN_OVER_EQUAL_TO +const var"≧" = _T.GREATER_THAN_OVER_EQUAL_TO +const var"≨" = _T.LESS_THAN_BUT_NOT_EQUAL_TO +const var"≩" = _T.GREATER_THAN_BUT_NOT_EQUAL_TO +const var"≪" = _T.MUCH_LESS_THAN +const var"≫" = _T.MUCH_GREATER_THAN +const var"≬" = _T.BETWEEN +const var"≭" = _T.NOT_EQUIVALENT_TO +const var"≮" = _T.NOT_LESS_THAN +const var"≯" = _T.NOT_GREATER_THAN +const var"≰" = _T.NEITHER_LESS_THAN_NOR_EQUAL_TO +const var"≱" = _T.NEITHER_GREATER_THAN_NOR_EQUAL_TO +const var"≲" = _T.LESS_THAN_OR_EQUIVALENT_TO +const var"≳" = _T.GREATER_THAN_OR_EQUIVALENT_TO +const var"≴" = _T.NEITHER_LESS_THAN_NOR_EQUIVALENT_TO +const var"≵" = _T.NEITHER_GREATER_THAN_NOR_EQUIVALENT_TO +const var"≶" = _T.LESS_THAN_OR_GREATER_THAN +const var"≷" = _T.GREATER_THAN_OR_LESS_THAN +const var"≸" = _T.NEITHER_LESS_THAN_NOR_GREATER_THAN +const var"≹" = _T.NEITHER_GREATER_THAN_NOR_LESS_THAN +const var"≺" = _T.PRECEDES +const var"≻" = _T.SUCCEEDS +const var"≼" = _T.PRECEDES_OR_EQUAL_TO +const var"≽" = _T.SUCCEEDS_OR_EQUAL_TO +const var"≾" = _T.PRECEDES_OR_EQUIVALENT_TO +const var"≿" = _T.SUCCEEDS_OR_EQUIVALENT_TO +const var"⊀" = _T.DOES_NOT_PRECEDE +const var"⊁" = _T.DOES_NOT_SUCCEED +const var"⊃" = _T.SUPERSET_OF +const var"⊅" = _T.NOT_A_SUPERSET_OF +const var"⊇" = _T.SUPERSET_OF_OR_EQUAL_TO +const var"⊉" = _T.NEITHER_A_SUPERSET_OF_NOR_EQUAL_TO +const var"⊋" = _T.SUPERSET_OF_WITH_NOT_EQUAL_TO +const var"⊏" = _T.SQUARE_IMAGE_OF +const var"⊐" = _T.SQUARE_ORIGINAL_OF +const var"⊑" = _T.SQUARE_IMAGE_OF_OR_EQUAL_TO +const var"⊒" = _T.SQUARE_ORIGINAL_OF_OR_EQUAL_TO +const var"⊜" = _T.CIRCLED_EQUALS +const var"⊩" = _T.FORCES +const var"⊬" = _T.DOES_NOT_PROVE +const var"⊮" = _T.DOES_NOT_FORCE +const var"⊰" = _T.PRECEDES_UNDER_RELATION +const var"⊱" = _T.SUCCEEDS_UNDER_RELATION +const var"⊲" = _T.NORMAL_SUBGROUP_OF +const var"⊳" = _T.CONTAINS_AS_NORMAL_SUBGROUP +const var"⊴" = _T.NORMAL_SUBGROUP_OF_OR_EQUAL_TO +const var"⊵" = _T.CONTAINS_AS_NORMAL_SUBGROUP_OR_EQUAL_TO +const var"⊶" = _T.ORIGINAL_OF +const var"⊷" = _T.IMAGE_OF +const var"⋍" = _T.REVERSED_TILDE_EQUALS +const var"⋐" = _T.DOUBLE_SUBSET +const var"⋑" = _T.DOUBLE_SUPERSET +const var"⋕" = _T.EQUAL_AND_PARALLEL_TO +const var"⋖" = _T.LESS_THAN_WITH_DOT +const var"⋗" = _T.GREATER_THAN_WITH_DOT +const var"⋘" = _T.VERY_MUCH_LESS_THAN +const var"⋙" = _T.VERY_MUCH_GREATER_THAN +const var"⋚" = _T.LESS_THAN_EQUAL_TO_OR_GREATER_THAN +const var"⋛" = _T.GREATER_THAN_EQUAL_TO_OR_LESS_THAN +const var"⋜" = _T.EQUAL_TO_OR_LESS_THAN +const var"⋝" = _T.EQUAL_TO_OR_GREATER_THAN +const var"⋞" = _T.EQUAL_TO_OR_PRECEDES +const var"⋟" = _T.EQUAL_TO_OR_SUCCEEDS +const var"⋠" = _T.DOES_NOT_PRECEDE_OR_EQUAL +const var"⋡" = _T.DOES_NOT_SUCCEED_OR_EQUAL +const var"⋢" = _T.NOT_SQUARE_IMAGE_OF_OR_EQUAL_TO +const var"⋣" = _T.NOT_SQUARE_ORIGINAL_OF_OR_EQUAL_TO +const var"⋤" = _T.SQUARE_IMAGE_OF_OR_NOT_EQUAL_TO +const var"⋥" = _T.SQUARE_ORIGINAL_OF_OR_NOT_EQUAL_TO +const var"⋦" = _T.LESS_THAN_BUT_NOT_EQUIVALENT_TO +const var"⋧" = _T.GREATER_THAN_BUT_NOT_EQUIVALENT_TO +const var"⋨" = _T.PRECEDES_BUT_NOT_EQUIVALENT_TO +const var"⋩" = _T.SUCCEEDS_BUT_NOT_EQUIVALENT_TO +const var"⋪" = _T.NOT_NORMAL_SUBGROUP_OF +const var"⋫" = _T.DOES_NOT_CONTAIN_AS_NORMAL_SUBGROUP +const var"⋬" = _T.NOT_NORMAL_SUBGROUP_OF_OR_EQUAL_TO +const var"⋭" = _T.DOES_NOT_CONTAIN_AS_NORMAL_SUBGROUP_OR_EQUAL +const var"⋲" = _T.ELEMENT_OF_WITH_LONG_HORIZONTAL_STROKE +const var"⋳" = _T.ELEMENT_OF_WITH_VERTICAL_BAR_AT_END_OF_HORIZONTAL_STROKE +const var"⋴" = _T.SMALL_ELEMENT_OF_WITH_VERTICAL_BAR_AT_END_OF_HORIZONTAL_STROKE +const var"⋵" = _T.ELEMENT_OF_WITH_DOT_ABOVE +const var"⋶" = _T.ELEMENT_OF_WITH_OVERBAR +const var"⋷" = _T.SMALL_ELEMENT_OF_WITH_OVERBAR +const var"⋸" = _T.ELEMENT_OF_WITH_UNDERBAR +const var"⋹" = _T.ELEMENT_OF_WITH_TWO_HORIZONTAL_STROKES +const var"⋺" = _T.CONTAINS_WITH_LONG_HORIZONTAL_STROKE +const var"⋻" = _T.CONTAINS_WITH_VERTICAL_BAR_AT_END_OF_HORIZONTAL_STROKE +const var"⋼" = _T.SMALL_CONTAINS_WITH_VERTICAL_BAR_AT_END_OF_HORIZONTAL_STROKE +const var"⋽" = _T.CONTAINS_WITH_OVERBAR +const var"⋾" = _T.SMALL_CONTAINS_WITH_OVERBAR +const var"⋿" = _T.Z_NOTATION_BAG_MEMBERSHIP +const var"⟈" = _T.REVERSE_SOLIDUS_PRECEDING_SUBSET +const var"⟉" = _T.SUPERSET_PRECEDING_SOLIDUS +const var"⟒" = _T.ELEMENT_OF_OPENING_UPWARDS +const var"⦷" = _T.CIRCLED_PARALLEL +const var"⧀" = _T.CIRCLED_LESS_THAN +const var"⧁" = _T.CIRCLED_GREATER_THAN +const var"⧡" = _T.INCREASES_AS +const var"⧣" = _T.EQUALS_SIGN_AND_SLANTED_PARALLEL +const var"⧤" = _T.EQUALS_SIGN_AND_SLANTED_PARALLEL_WITH_TILDE_ABOVE +const var"⧥" = _T.IDENTICAL_TO_AND_SLANTED_PARALLEL +const var"⩦" = _T.EQUALS_SIGN_WITH_DOT_BELOW +const var"⩧" = _T.IDENTICAL_WITH_DOT_ABOVE +const var"⩪" = _T.TILDE_OPERATOR_WITH_DOT_ABOVE +const var"⩫" = _T.TILDE_OPERATOR_WITH_RISING_DOTS +const var"⩬" = _T.SIMILAR_MINUS_SIMILAR +const var"⩭" = _T.CONGRUENT_WITH_DOT_ABOVE +const var"⩮" = _T.EQUALS_WITH_ASTERISK +const var"⩯" = _T.ALMOST_EQUAL_TO_WITH_CIRCUMFLEX_ACCENT +const var"⩰" = _T.APPROXIMATELY_EQUAL_OR_EQUAL_TO +const var"⩱" = _T.EQUALS_SIGN_ABOVE_PLUS_SIGN +const var"⩲" = _T.PLUS_SIGN_ABOVE_EQUALS_SIGN +const var"⩳" = _T.EQUALS_SIGN_ABOVE_TILDE_OPERATOR +const var"⩴" = _T.DOUBLE_COLON_EQUAL +const var"⩵" = _T.TWO_CONSECUTIVE_EQUALS_SIGNS +const var"⩶" = _T.THREE_CONSECUTIVE_EQUALS_SIGNS +const var"⩷" = _T.EQUALS_SIGN_WITH_TWO_DOTS_ABOVE_AND_TWO_DOTS_BELOW +const var"⩸" = _T.EQUIVALENT_WITH_FOUR_DOTS_ABOVE +const var"⩹" = _T.LESS_THAN_WITH_CIRCLE_INSIDE +const var"⩺" = _T.GREATER_THAN_WITH_CIRCLE_INSIDE +const var"⩻" = _T.LESS_THAN_WITH_QUESTION_MARK_ABOVE +const var"⩼" = _T.GREATER_THAN_WITH_QUESTION_MARK_ABOVE +const var"⩽" = _T.LESS_THAN_OR_SLANTED_EQUAL_TO +const var"⩾" = _T.GREATER_THAN_OR_SLANTED_EQUAL_TO +const var"⩿" = _T.LESS_THAN_OR_SLANTED_EQUAL_TO_WITH_DOT_INSIDE +const var"⪀" = _T.GREATER_THAN_OR_SLANTED_EQUAL_TO_WITH_DOT_INSIDE +const var"⪁" = _T.LESS_THAN_OR_SLANTED_EQUAL_TO_WITH_DOT_ABOVE +const var"⪂" = _T.GREATER_THAN_OR_SLANTED_EQUAL_TO_WITH_DOT_ABOVE +const var"⪃" = _T.LESS_THAN_OR_SLANTED_EQUAL_TO_WITH_DOT_ABOVE_RIGHT +const var"⪄" = _T.GREATER_THAN_OR_SLANTED_EQUAL_TO_WITH_DOT_ABOVE_LEFT +const var"⪅" = _T.LESS_THAN_OR_APPROXIMATE +const var"⪆" = _T.GREATER_THAN_OR_APPROXIMATE +const var"⪇" = _T.LESS_THAN_AND_SINGLE_LINE_NOT_EQUAL_TO +const var"⪈" = _T.GREATER_THAN_AND_SINGLE_LINE_NOT_EQUAL_TO +const var"⪉" = _T.LESS_THAN_AND_NOT_APPROXIMATE +const var"⪊" = _T.GREATER_THAN_AND_NOT_APPROXIMATE +const var"⪋" = _T.LESS_THAN_ABOVE_DOUBLE_LINE_EQUAL_ABOVE_GREATER_THAN +const var"⪌" = _T.GREATER_THAN_ABOVE_DOUBLE_LINE_EQUAL_ABOVE_LESS_THAN +const var"⪍" = _T.LESS_THAN_ABOVE_SIMILAR_OR_EQUAL +const var"⪎" = _T.GREATER_THAN_ABOVE_SIMILAR_OR_EQUAL +const var"⪏" = _T.LESS_THAN_ABOVE_SIMILAR_ABOVE_GREATER_THAN +const var"⪐" = _T.GREATER_THAN_ABOVE_SIMILAR_ABOVE_LESS_THAN +const var"⪑" = _T.LESS_THAN_ABOVE_GREATER_THAN_ABOVE_DOUBLE_LINE_EQUAL +const var"⪒" = _T.GREATER_THAN_ABOVE_LESS_THAN_ABOVE_DOUBLE_LINE_EQUAL +const var"⪓" = _T.LESS_THAN_ABOVE_SLANTED_EQUAL_ABOVE_GREATER_THAN_ABOVE_SLANTED_EQUAL +const var"⪔" = _T.GREATER_THAN_ABOVE_SLANTED_EQUAL_ABOVE_LESS_THAN_ABOVE_SLANTED_EQUAL +const var"⪕" = _T.SLANTED_EQUAL_TO_OR_LESS_THAN +const var"⪖" = _T.SLANTED_EQUAL_TO_OR_GREATER_THAN +const var"⪗" = _T.SLANTED_EQUAL_TO_OR_LESS_THAN_WITH_DOT_INSIDE +const var"⪘" = _T.SLANTED_EQUAL_TO_OR_GREATER_THAN_WITH_DOT_INSIDE +const var"⪙" = _T.DOUBLE_LINE_EQUAL_TO_OR_LESS_THAN +const var"⪚" = _T.DOUBLE_LINE_EQUAL_TO_OR_GREATER_THAN +const var"⪛" = _T.DOUBLE_LINE_SLANTED_EQUAL_TO_OR_LESS_THAN +const var"⪜" = _T.DOUBLE_LINE_SLANTED_EQUAL_TO_OR_GREATER_THAN +const var"⪝" = _T.SIMILAR_OR_LESS_THAN +const var"⪞" = _T.SIMILAR_OR_GREATER_THAN +const var"⪟" = _T.SIMILAR_ABOVE_LESS_THAN_ABOVE_EQUALS_SIGN +const var"⪠" = _T.SIMILAR_ABOVE_GREATER_THAN_ABOVE_EQUALS_SIGN +const var"⪡" = _T.DOUBLE_NESTED_LESS_THAN +const var"⪢" = _T.DOUBLE_NESTED_GREATER_THAN +const var"⪣" = _T.DOUBLE_NESTED_LESS_THAN_WITH_UNDERBAR +const var"⪤" = _T.GREATER_THAN_OVERLAPPING_LESS_THAN +const var"⪥" = _T.GREATER_THAN_BESIDE_LESS_THAN +const var"⪦" = _T.LESS_THAN_CLOSED_BY_CURVE +const var"⪧" = _T.GREATER_THAN_CLOSED_BY_CURVE +const var"⪨" = _T.LESS_THAN_CLOSED_BY_CURVE_ABOVE_SLANTED_EQUAL +const var"⪩" = _T.GREATER_THAN_CLOSED_BY_CURVE_ABOVE_SLANTED_EQUAL +const var"⪪" = _T.SMALLER_THAN +const var"⪫" = _T.LARGER_THAN +const var"⪬" = _T.SMALLER_THAN_OR_EQUAL_TO +const var"⪭" = _T.LARGER_THAN_OR_EQUAL_TO +const var"⪮" = _T.EQUALS_SIGN_WITH_BUMPY_ABOVE +const var"⪯" = _T.PRECEDES_ABOVE_SINGLE_LINE_EQUALS_SIGN +const var"⪰" = _T.SUCCEEDS_ABOVE_SINGLE_LINE_EQUALS_SIGN +const var"⪱" = _T.PRECEDES_ABOVE_SINGLE_LINE_NOT_EQUAL_TO +const var"⪲" = _T.SUCCEEDS_ABOVE_SINGLE_LINE_NOT_EQUAL_TO +const var"⪳" = _T.PRECEDES_ABOVE_EQUALS_SIGN +const var"⪴" = _T.SUCCEEDS_ABOVE_EQUALS_SIGN +const var"⪵" = _T.PRECEDES_ABOVE_NOT_EQUAL_TO +const var"⪶" = _T.SUCCEEDS_ABOVE_NOT_EQUAL_TO +const var"⪷" = _T.PRECEDES_ABOVE_ALMOST_EQUAL_TO +const var"⪸" = _T.SUCCEEDS_ABOVE_ALMOST_EQUAL_TO +const var"⪹" = _T.PRECEDES_ABOVE_NOT_ALMOST_EQUAL_TO +const var"⪺" = _T.SUCCEEDS_ABOVE_NOT_ALMOST_EQUAL_TO +const var"⪻" = _T.DOUBLE_PRECEDES +const var"⪼" = _T.DOUBLE_SUCCEEDS +const var"⪽" = _T.SUBSET_WITH_DOT +const var"⪾" = _T.SUPERSET_WITH_DOT +const var"⪿" = _T.SUBSET_WITH_PLUS_SIGN_BELOW +const var"⫀" = _T.SUPERSET_WITH_PLUS_SIGN_BELOW +const var"⫁" = _T.SUBSET_WITH_MULTIPLICATION_SIGN_BELOW +const var"⫂" = _T.SUPERSET_WITH_MULTIPLICATION_SIGN_BELOW +const var"⫃" = _T.SUBSET_OF_OR_EQUAL_TO_WITH_DOT_ABOVE +const var"⫄" = _T.SUPERSET_OF_OR_EQUAL_TO_WITH_DOT_ABOVE +const var"⫅" = _T.SUBSET_OF_ABOVE_EQUALS_SIGN +const var"⫆" = _T.SUPERSET_OF_ABOVE_EQUALS_SIGN +const var"⫇" = _T.SUBSET_OF_ABOVE_TILDE_OPERATOR +const var"⫈" = _T.SUPERSET_OF_ABOVE_TILDE_OPERATOR +const var"⫉" = _T.SUBSET_OF_ABOVE_ALMOST_EQUAL_TO +const var"⫊" = _T.SUPERSET_OF_ABOVE_ALMOST_EQUAL_TO +const var"⫋" = _T.SUBSET_OF_ABOVE_NOT_EQUAL_TO +const var"⫌" = _T.SUPERSET_OF_ABOVE_NOT_EQUAL_TO +const var"⫍" = _T.SQUARE_LEFT_OPEN_BOX_OPERATOR +const var"⫎" = _T.SQUARE_RIGHT_OPEN_BOX_OPERATOR +const var"⫏" = _T.CLOSED_SUBSET +const var"⫐" = _T.CLOSED_SUPERSET +const var"⫑" = _T.CLOSED_SUBSET_OR_EQUAL_TO +const var"⫒" = _T.CLOSED_SUPERSET_OR_EQUAL_TO +const var"⫓" = _T.SUBSET_ABOVE_SUPERSET +const var"⫔" = _T.SUPERSET_ABOVE_SUBSET +const var"⫕" = _T.SUBSET_ABOVE_SUBSET +const var"⫖" = _T.SUPERSET_ABOVE_SUPERSET +const var"⫗" = _T.SUPERSET_BESIDE_SUBSET +const var"⫘" = _T.SUPERSET_BESIDE_AND_JOINED_BY_DASH_WITH_SUBSET +const var"⫙" = _T.ELEMENT_OF_OPENING_DOWNWARDS +const var"⫷" = _T.TRIPLE_NESTED_LESS_THAN +const var"⫸" = _T.TRIPLE_NESTED_GREATER_THAN +const var"⫹" = _T.DOUBLE_LINE_SLANTED_LESS_THAN_OR_EQUAL_TO +const var"⫺" = _T.DOUBLE_LINE_SLANTED_GREATER_THAN_OR_EQUAL_TO +const var"⊢" = _T.RIGHT_TACK +const var"⊣" = _T.LEFT_TACK +const var"⟂" = _T.PERP +const END_COMPARISON = _T.end_comparison + +# Level 7 +const BEGIN_PIPE = _T.begin_pipe +const var"|>" = _T.LPIPE +const var"<|" = _T.RPIPE +const END_PIPE = _T.end_pipe + +# Level 8 +const BEGIN_COLON = _T.begin_colon +const var":" = _T.COLON +const var".." = _T.DDOT +const var"…" = _T.LDOTS +const var"⁝" = _T.TRICOLON +const var"⋮" = _T.VDOTS +const var"⋱" = _T.DDOTS +const var"⋰" = _T.ADOTS +const var"⋯" = _T.CDOTS +const END_COLON = _T.end_colon + +# Level 9 +const BEGIN_PLUS = _T.begin_plus +const var"$" = _T.EX_OR +const var"+" = _T.PLUS +const var"-" = _T.MINUS +const var"++" = _T.PLUSPLUS +const var"⊕" = _T.CIRCLED_PLUS +const var"⊖" = _T.CIRCLED_MINUS +const var"⊞" = _T.SQUARED_PLUS +const var"⊟" = _T.SQUARED_MINUS +const var"|" = _T.OR +const var"∪" = _T.UNION +const var"∨" = _T.LOGICAL_OR +const var"⊔" = _T.SQUARE_CUP +const var"±" = _T.PLUS_MINUS_SIGN +const var"∓" = _T.MINUS_OR_PLUS_SIGN +const var"∔" = _T.DOT_PLUS +const var"∸" = _T.DOT_MINUS +const var"≂" = _T.MINUS_TILDE +const var"≏" = _T.DIFFERENCE_BETWEEN +const var"⊎" = _T.MULTISET_UNION +const var"⊻" = _T.XOR +const var"⊽" = _T.NOR +const var"⋎" = _T.CURLY_LOGICAL_OR +const var"⋓" = _T.DOUBLE_UNION +const var"⧺" = _T.DOUBLE_PLUS +const var"⧻" = _T.TRIPLE_PLUS +const var"⨈" = _T.TWO_LOGICAL_OR_OPERATOR +const var"⨢" = _T.PLUS_SIGN_WITH_SMALL_CIRCLE_ABOVE +const var"⨣" = _T.PLUS_SIGN_WITH_CIRCUMFLEX_ACCENT_ABOVE +const var"⨤" = _T.PLUS_SIGN_WITH_TILDE_ABOVE +const var"⨥" = _T.PLUS_SIGN_WITH_DOT_BELOW +const var"⨦" = _T.PLUS_SIGN_WITH_TILDE_BELOW +const var"⨧" = _T.PLUS_SIGN_WITH_SUBSCRIPT_TWO +const var"⨨" = _T.PLUS_SIGN_WITH_BLACK_TRIANGLE +const var"⨩" = _T.MINUS_SIGN_WITH_COMMA_ABOVE +const var"⨪" = _T.MINUS_SIGN_WITH_DOT_BELOW +const var"⨫" = _T.MINUS_SIGN_WITH_FALLING_DOTS +const var"⨬" = _T.MINUS_SIGN_WITH_RISING_DOTS +const var"⨭" = _T.PLUS_SIGN_IN_LEFT_HALF_CIRCLE +const var"⨮" = _T.PLUS_SIGN_IN_RIGHT_HALF_CIRCLE +const var"⨹" = _T.PLUS_SIGN_IN_TRIANGLE +const var"⨺" = _T.MINUS_SIGN_IN_TRIANGLE +const var"⩁" = _T.UNION_WITH_MINUS_SIGN +const var"⩂" = _T.UNION_WITH_OVERBAR +const var"⩅" = _T.UNION_WITH_LOGICAL_OR +const var"⩊" = _T.UNION_BESIDE_AND_JOINED_WITH_UNION +const var"⩌" = _T.CLOSED_UNION_WITH_SERIFS +const var"⩏" = _T.DOUBLE_SQUARE_UNION +const var"⩐" = _T.CLOSED_UNION_WITH_SERIFS_AND_SMASH_PRODUCT +const var"⩒" = _T.LOGICAL_OR_WITH_DOT_ABOVE +const var"⩔" = _T.DOUBLE_LOGICAL_OR +const var"⩖" = _T.TWO_INTERSECTING_LOGICAL_OR +const var"⩗" = _T.SLOPING_LARGE_OR +const var"⩛" = _T.LOGICAL_OR_WITH_MIDDLE_STEM +const var"⩝" = _T.LOGICAL_OR_WITH_HORIZONTAL_DASH +const var"⩡" = _T.SMALL_VEE_WITH_UNDERBAR +const var"⩢" = _T.LOGICAL_OR_WITH_DOUBLE_OVERBAR +const var"⩣" = _T.LOGICAL_OR_WITH_DOUBLE_UNDERBAR +const var"¦" = _T.BROKEN_BAR +const END_PLUS = _T.end_plus + +# Level 10 +const BEGIN_BITSHIFTS = _T.begin_bitshifts +const var"<<" = _T.LBITSHIFT +const var">>" = _T.RBITSHIFT +const var">>>" = _T.UNSIGNED_BITSHIFT +const END_BITSHIFTS = _T.end_bitshifts + +# Level 11 +const BEGIN_TIMES = _T.begin_times +const var"*" = _T.STAR +const var"/" = _T.FWD_SLASH +const var"÷" = _T.DIVISION_SIGN +const var"%" = _T.REM +const var"⋅" = _T.UNICODE_DOT +const var"∘" = _T.RING_OPERATOR +const var"×" = _T.MULTIPLICATION_SIGN +const var"\\" = _T.BACKSLASH +const var"&" = _T.AND +const var"∩" = _T.INTERSECTION +const var"∧" = _T.LOGICAL_AND +const var"⊗" = _T.CIRCLED_TIMES +const var"⊘" = _T.CIRCLED_DIVISION_SLASH +const var"⊙" = _T.CIRCLED_DOT_OPERATOR +const var"⊚" = _T.CIRCLED_RING_OPERATOR +const var"⊛" = _T.CIRCLED_ASTERISK_OPERATOR +const var"⊠" = _T.SQUARED_TIMES +const var"⊡" = _T.SQUARED_DOT_OPERATOR +const var"⊓" = _T.SQUARE_CAP +const var"∗" = _T.ASTERISK_OPERATOR +const var"∙" = _T.BULLET_OPERATOR +const var"∤" = _T.DOES_NOT_DIVIDE +const var"⅋" = _T.TURNED_AMPERSAND +const var"≀" = _T.WREATH_PRODUCT +const var"⊼" = _T.NAND +const var"⋄" = _T.DIAMOND_OPERATOR +const var"⋆" = _T.STAR_OPERATOR +const var"⋇" = _T.DIVISION_TIMES +const var"⋉" = _T.LEFT_NORMAL_FACTOR_SEMIDIRECT_PRODUCT +const var"⋊" = _T.RIGHT_NORMAL_FACTOR_SEMIDIRECT_PRODUCT +const var"⋋" = _T.LEFT_SEMIDIRECT_PRODUCT +const var"⋌" = _T.RIGHT_SEMIDIRECT_PRODUCT +const var"⋏" = _T.CURLY_LOGICAL_AND +const var"⋒" = _T.DOUBLE_INTERSECTION +const var"⟑" = _T.AND_WITH_DOT +const var"⦸" = _T.CIRCLED_REVERSE_SOLIDUS +const var"⦼" = _T.CIRCLED_ANTICLOCKWISE_ROTATED_DIVISION_SIGN +const var"⦾" = _T.CIRCLED_WHITE_BULLET +const var"⦿" = _T.CIRCLED_BULLET +const var"⧶" = _T.SOLIDUS_WITH_OVERBAR +const var"⧷" = _T.REVERSE_SOLIDUS_WITH_HORIZONTAL_STROKE +const var"⨇" = _T.TWO_LOGICAL_AND_OPERATOR +const var"⨰" = _T.MULTIPLICATION_SIGN_WITH_DOT_ABOVE +const var"⨱" = _T.MULTIPLICATION_SIGN_WITH_UNDERBAR +const var"⨲" = _T.SEMIDIRECT_PRODUCT_WITH_BOTTOM_CLOSED +const var"⨳" = _T.SMASH_PRODUCT +const var"⨴" = _T.MULTIPLICATION_SIGN_IN_LEFT_HALF_CIRCLE +const var"⨵" = _T.MULTIPLICATION_SIGN_IN_RIGHT_HALF_CIRCLE +const var"⨶" = _T.CIRCLED_MULTIPLICATION_SIGN_WITH_CIRCUMFLEX_ACCENT +const var"⨷" = _T.MULTIPLICATION_SIGN_IN_DOUBLE_CIRCLE +const var"⨸" = _T.CIRCLED_DIVISION_SIGN +const var"⨻" = _T.MULTIPLICATION_SIGN_IN_TRIANGLE +const var"⨼" = _T.INTERIOR_PRODUCT +const var"⨽" = _T.RIGHTHAND_INTERIOR_PRODUCT +const var"⩀" = _T.INTERSECTION_WITH_DOT +const var"⩃" = _T.INTERSECTION_WITH_OVERBAR +const var"⩄" = _T.INTERSECTION_WITH_LOGICAL_AND +const var"⩋" = _T.INTERSECTION_BESIDE_AND_JOINED_WITH_INTERSECTION +const var"⩍" = _T.CLOSED_INTERSECTION_WITH_SERIFS +const var"⩎" = _T.DOUBLE_SQUARE_INTERSECTION +const var"⩑" = _T.LOGICAL_AND_WITH_DOT_ABOVE +const var"⩓" = _T.DOUBLE_LOGICAL_AND +const var"⩕" = _T.TWO_INTERSECTING_LOGICAL_AND +const var"⩘" = _T.SLOPING_LARGE_AND +const var"⩚" = _T.LOGICAL_AND_WITH_MIDDLE_STEM +const var"⩜" = _T.LOGICAL_AND_WITH_HORIZONTAL_DASH +const var"⩞" = _T.LOGICAL_AND_WITH_DOUBLE_OVERBAR +const var"⩟" = _T.LOGICAL_AND_WITH_UNDERBAR +const var"⩠" = _T.LOGICAL_AND_WITH_DOUBLE_UNDERBAR +const var"⫛" = _T.TRANSVERSAL_INTERSECTION +const var"⊍" = _T.MULTISET_MULTIPLICATION +const var"▷" = _T.WHITE_RIGHT_POINTING_TRIANGLE +const var"⨝" = _T.JOIN +const var"⟕" = _T.LEFT_OUTER_JOIN +const var"⟖" = _T.RIGHT_OUTER_JOIN +const var"⟗" = _T.FULL_OUTER_JOIN +const var"⌿" = _T.NOT_SLASH +const var"⨟" = _T.BB_SEMI +const END_TIMES = _T.end_times + +# Level 12 +const BEGIN_RATIONAL = _T.begin_rational +const var"//" = _T.FWDFWD_SLASH +const END_RATIONAL = _T.end_rational + +# Level 13 +const BEGIN_POWER = _T.begin_power +const var"^" = _T.CIRCUMFLEX_ACCENT +const var"↑" = _T.UPWARDS_ARROW +const var"↓" = _T.DOWNWARDS_ARROW +const var"⇵" = _T.DOWNWARDS_ARROW_LEFTWARDS_OF_UPWARDS_ARROW +const var"⟰" = _T.UPWARDS_QUADRUPLE_ARROW +const var"⟱" = _T.DOWNWARDS_QUADRUPLE_ARROW +const var"⤈" = _T.DOWNWARDS_ARROW_WITH_HORIZONTAL_STROKE +const var"⤉" = _T.UPWARDS_ARROW_WITH_HORIZONTAL_STROKE +const var"⤊" = _T.UPWARDS_TRIPLE_ARROW +const var"⤋" = _T.DOWNWARDS_TRIPLE_ARROW +const var"⤒" = _T.UPWARDS_ARROW_TO_BAR +const var"⤓" = _T.DOWNWARDS_ARROW_TO_BAR +const var"⥉" = _T.UPWARDS_TWO_HEADED_ARROW_FROM_SMALL_CIRCLE +const var"⥌" = _T.UP_BARB_RIGHT_DOWN_BARB_LEFT_HARPOON +const var"⥍" = _T.UP_BARB_LEFT_DOWN_BARB_RIGHT_HARPOON +const var"⥏" = _T.UP_BARB_RIGHT_DOWN_BARB_RIGHT_HARPOON +const var"⥑" = _T.UP_BARB_LEFT_DOWN_BARB_LEFT_HARPOON +const var"⥔" = _T.UPWARDS_HARPOON_WITH_BARB_RIGHT_TO_BAR +const var"⥕" = _T.DOWNWARDS_HARPOON_WITH_BARB_RIGHT_TO_BAR +const var"⥘" = _T.UPWARDS_HARPOON_WITH_BARB_LEFT_TO_BAR +const var"⥙" = _T.DOWNWARDS_HARPOON_WITH_BARB_LEFT_TO_BAR +const var"⥜" = _T.UPWARDS_HARPOON_WITH_BARB_RIGHT_FROM_BAR +const var"⥝" = _T.DOWNWARDS_HARPOON_WITH_BARB_RIGHT_FROM_BAR +const var"⥠" = _T.UPWARDS_HARPOON_WITH_BARB_LEFT_FROM_BAR +const var"⥡" = _T.DOWNWARDS_HARPOON_WITH_BARB_LEFT_FROM_BAR +const var"⥣" = _T.UPWARDS_HARPOON_WITH_BARB_LEFT_BESIDE_UPWARDS_HARPOON_WITH_BARB_RIGHT +const var"⥥" = _T.DOWNWARDS_HARPOON_WITH_BARB_LEFT_BESIDE_DOWNWARDS_HARPOON_WITH_BARB_RIGHT +const var"⥮" = _T.UPWARDS_HARPOON_WITH_BARB_LEFT_BESIDE_DOWNWARDS_HARPOON_WITH_BARB_RIGHT +const var"⥯" = _T.DOWNWARDS_HARPOON_WITH_BARB_LEFT_BESIDE_UPWARDS_HARPOON_WITH_BARB_RIGHT +const var"↑" = _T.HALFWIDTH_UPWARDS_ARROW +const var"↓" = _T.HALFWIDTH_DOWNWARDS_ARROW +const END_POWER = _T.end_power + +# Level 14 +const BEGIN_DECL = _T.begin_decl +const var"::" = _T.DECLARATION +const END_DECL = _T.end_decl + +# Level 15 +const BEGIN_WHERE = _T.begin_where +const var"where" = _T.WHERE +const END_WHERE = _T.end_where + +# Level 16 +const BEGIN_DOT = _T.begin_dot +const var"." = _T.DOT +const END_DOT = _T.end_dot + +const var"!" = _T.NOT +const var"'" = _T.PRIME +const var".'" = _T.TRANSPOSE +const var"->" = _T.ANON_FUNC + +const BEGIN_UNICODE_OPS = _T.begin_unicode_ops +const var"¬" = _T.NOT_SIGN +const var"√" = _T.SQUARE_ROOT +const var"∛" = _T.CUBE_ROOT +const var"∜" = _T.QUAD_ROOT +const END_UNICODE_OPS = _T.end_unicode_ops + +const END_OPS = _T.end_ops + +end + From d18dd661a54d6c09b5b21f3b107f7133801b3b67 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Mon, 22 Nov 2021 15:56:44 +1000 Subject: [PATCH 0202/1109] Require VERSION >= 1.4 var_str doesn't exist prior to 1.3, and `var"true" = x` doesn't work prior to 1.4. Can fix this later if necessary. --- JuliaSyntax/.github/workflows/CI.yml | 2 +- JuliaSyntax/Project.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/JuliaSyntax/.github/workflows/CI.yml b/JuliaSyntax/.github/workflows/CI.yml index fd8ea2f05d80e..b01591dd39feb 100644 --- a/JuliaSyntax/.github/workflows/CI.yml +++ b/JuliaSyntax/.github/workflows/CI.yml @@ -10,7 +10,7 @@ jobs: fail-fast: false matrix: version: - - '1.0' + - '1.4' - '1.6' - 'nightly' os: diff --git a/JuliaSyntax/Project.toml b/JuliaSyntax/Project.toml index c4188436db7b5..89bb0d7c80a52 100644 --- a/JuliaSyntax/Project.toml +++ b/JuliaSyntax/Project.toml @@ -7,7 +7,7 @@ version = "0.1.0" Tokenize = "0796e94c-ce3b-5d07-9a54-7f471281c624" [compat] -julia = "1" +julia = "1.4" [extras] Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" From ae15e69910db7de4737db4bde67040ef9c273133 Mon Sep 17 00:00:00 2001 From: Sebastian Pfitzner Date: Mon, 22 Nov 2021 10:14:10 +0100 Subject: [PATCH 0203/1109] more accurate hash --- JuliaSyntax/src/lexer.jl | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index c2b01cfc16b78..e7b12be536174 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -1004,11 +1004,13 @@ function lex_cmd(l::Lexer, doemit=true) end end +const MAX_KW_LENGTH = 10 function lex_identifier(l::Lexer{IO_t,T}, c) where {IO_t,T} if T == Token readon(l) end h = simple_hash(c, UInt64(0)) + n = 1 while true pc, ppc = dpeekchar(l) if (pc == '!' && ppc == '=') || !is_identifier_char(pc) @@ -1016,12 +1018,23 @@ function lex_identifier(l::Lexer{IO_t,T}, c) where {IO_t,T} end c = readchar(l) h = simple_hash(c, h) + n += 1 end - return emit(l, get(kw_hash, h, IDENTIFIER)) + if n > MAX_KW_LENGTH + emit(l, IDENTIFIER) + else + emit(l, get(kw_hash, h, IDENTIFIER)) + end end -@inline simple_hash(c::Char, h::UInt64) = hash(c, h) +# This creates a hash for chars in [a-z] using 5 bit per char. +# Requires an additional input-length check somewhere, because +# this only works up to ~12 chars. +@inline function simple_hash(c::Char, h::UInt64) + bytehash = (clamp(c - 'a' + 1, -1, 30) % UInt8) & 0x1f + h << 5 + bytehash +end function simple_hash(str) ind = 1 From d6fbff525a8bc6183bad8a3153689b5c11efdfae Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 23 Nov 2021 11:14:21 +1000 Subject: [PATCH 0204/1109] CompatHelper: add new compat entry for Tokenize at version 0.5 (JuliaLang/JuliaSyntax.jl#1) --- JuliaSyntax/Project.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/JuliaSyntax/Project.toml b/JuliaSyntax/Project.toml index 89bb0d7c80a52..ec34ef35c06ac 100644 --- a/JuliaSyntax/Project.toml +++ b/JuliaSyntax/Project.toml @@ -7,6 +7,7 @@ version = "0.1.0" Tokenize = "0796e94c-ce3b-5d07-9a54-7f471281c624" [compat] +Tokenize = "0.5" julia = "1.4" [extras] From cb06e6ab5529dd84477df7a688d638f8861e26c2 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Fri, 26 Nov 2021 16:24:26 +1000 Subject: [PATCH 0205/1109] Translating some parsing code; fiddling with kinds * Translate a bit of recursive descent parsing code from Base * Start work on a syntax Kind which can combine Tokenize token kinds with other syntax kinds. Still using a hacked version of Tokenize though. --- JuliaSyntax/README.md | 25 + JuliaSyntax/src/JuliaSyntax.jl | 181 +++- JuliaSyntax/src/token_kinds.jl | 1629 +++++++++++++++++--------------- JuliaSyntax/test/runtests.jl | 11 +- 4 files changed, 1019 insertions(+), 827 deletions(-) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index 76f27023b2f58..77c3ae56e0aea 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -14,3 +14,28 @@ Nice to have: * "Compilation as an API" to support all sorts of tooling * Go further than parsing - macro expansion, syntax desugaring and scope analysis * Code which is correct, fast and understandable + +## Design + +The datastructure design here is hard: +- There's many useful ways to augment a syntax tree depending on use case. +- Analysis algorithms should be able to act on any tree type, ignoring + but carrying augmentations which they don't know about. + +Let's tackle it by prototyping identifying several important work flows: + +1. Precise error reporting in lowering + - Syntax desugaring `[a, b] = (c, d)` should report "invalid assignment + location `[a, b]`". But at a precise source location. + - Try something several layers deeper inside lowering. For example "macro + definition not allowed inside a local scope" + +2. Refactoring + - A pass to rename local variables + +3. Incremental reparsing + - Reparse a source file, given a byte range replacement + +4. Formatting + - Re-indent a file + diff --git a/JuliaSyntax/src/JuliaSyntax.jl b/JuliaSyntax/src/JuliaSyntax.jl index f5ac97e0d5b0f..638e02f2c5e3e 100644 --- a/JuliaSyntax/src/JuliaSyntax.jl +++ b/JuliaSyntax/src/JuliaSyntax.jl @@ -4,38 +4,34 @@ module JuliaSyntax # Token stream utilities import Tokenize -using Tokenize.Tokens: Tokens, RawToken +using Tokenize.Tokens: RawToken +const TzTokens = Tokenize.Tokens include("token_kinds.jl") -""" - TK"s" - -The full token kind of a string "s". For example, TK")" is the kind of the -right parenthesis token. -""" -macro TK_str(str) - name = Symbol(str) - return :(TokenKinds.$name) -end - """ We define a token type which is more suited to parsing than the basic token types from Tokenize. """ struct SyntaxToken - raw::RawToken + # TODO: Could use a more stripped down version of RawToken which only + # stores byte offsets? leading_trivia::RawToken + raw::RawToken end -#= -function Base.show(io::IO, mime::MIME"text/plain", token) - show(io, mime, RawToken()) +function Base.show(io::IO, t::SyntaxToken) + fullrange = string(lpad(t.leading_trivia.startbyte+1, 3), ":", rpad(t.raw.endbyte+1, 3)) + + range = string(lpad(t.raw.startbyte+1, 3), ":", rpad(t.raw.endbyte+1, 3)) + print(io, rpad(string(fullrange, "│", range), 17, " "), rpad(kind(t), 15, " ")) end -=# + kind(tok::SyntaxToken) = tok.raw.kind +# summary_kind(tok::SyntaxToken) = TzTokens.kind(tok.raw) + const EMPTY_RAW_TOKEN = RawToken() const EMPTY_TOKEN = SyntaxToken(RawToken(), RawToken()) @@ -80,35 +76,41 @@ Base.eltype(::Type{TokenStream}) = SyntaxToken function Base.iterate(ts::TokenStream, end_state=false) end_state && return nothing t = take_token!(ts) - return t, kind(t) == TK"ENDMARKER" + return t, kind(t) == K"ENDMARKER" end function _read_raw_token(lexer::Tokenize.Lexers.Lexer) c = Tokenize.Lexers.peekchar(lexer) if isspace(c) + Tokenize.Lexers.start_token!(lexer) # We lex whitespace slightly differently from Tokenize.jl, as newlines # are syntactically significant if Tokenize.Lexers.accept(lexer, '\n') - return Tokenize.Lexers.emit(lexer, TK"NEWLINE_WS") + return Tokenize.Lexers.emit(lexer, K"NEWLINE_WS") else Tokenize.Lexers.readon(lexer) Tokenize.Lexers.accept_batch(lexer, c->isspace(c) && c != '\n') - return Tokenize.Lexers.emit(lexer, TK"WHITESPACE") + return Tokenize.Lexers.emit(lexer, K"WHITESPACE") end else - return Tokenize.Lexers.next_token(lexer) + return Tokenize.Lexers.next_token(lexer) end end function _read_token(lexer::Tokenize.Lexers.Lexer) # No token - do the actual work of taking a token from the lexer - leading_trivia = EMPTY_RAW_TOKEN - raw = _read_raw_token(lexer) - if Tokens.exactkind(raw) == TK"WHITESPACE" + raw = _read_raw_token(lexer) + if TzTokens.exactkind(raw) in (K"WHITESPACE", K"COMMENT") + # TODO: *Combine* comments with whitespace here to get a single leading + # trivia item per real token. leading_trivia = raw - raw = _read_raw_token(lexer) + raw = _read_raw_token(lexer) + else + leading_trivia = RawToken(K"ERROR", (0,0), (0,0), + raw.startbyte, raw.startbyte-1, + TzTokens.NO_ERR, false, false) end - return SyntaxToken(raw, leading_trivia) + return SyntaxToken(leading_trivia, raw) end # Return next token in the stream, but don't remove it. @@ -120,18 +122,18 @@ function peek_token(ts::TokenStream) return ts.next1 end -# Like peek_token, but +# Like peek_token, but # * EOF becomes an error # * Newlines tokens are gobbled (TODO!) function require_token(ts::TokenStream) tok = peek_token(ts) - if kind(tok) == TK"ENDMARKER" + if kind(tok) == K"ENDMARKER" error("incomplete: premature end of input") end return tok end -# Remove next token from from the stream and return it. +# Remove next token from the stream and return it. function take_token!(ts::TokenStream) if ts.hasnext2 ts.hasnext2 = false @@ -154,6 +156,9 @@ end function had_space(ts::TokenStream) end +is_prec_assignment(tok) = K"BEGIN_ASSIGNMENTS" < kind(tok) < K"END_ASSIGNMENTS" + + #------------------------------------------------------------------------------- """ @@ -201,52 +206,138 @@ require_token(ps::ParseState) = require_token(ps.tokens) peek_token(ps::ParseState) = peek_token(ps.tokens) put_back!(ps::ParseState, tok::RawToken) = put_back!(ps.tokens, tok) - #------------------------------------------------------------------------------- # Parser +include("syntax_tree.jl") + function is_closing_token(ps::ParseState, tok) k = kind(tok) - return k in (TK"else", TK"elseif", TK"catch", TK"finally", - TK",", TK"(", TK"]", TK"}", TK";", - TK"ENDMARKER") || - k == TK"END" && !ps.end_symbol + return k in (K"else", K"elseif", K"catch", K"finally", + K",", K")", K"]", K"}", K";", + K"ENDMARKER") || (k == K"END" && !ps.end_symbol) end function has_whitespace_prefix(tok::SyntaxToken) - tok.leading_trivia.kind == TK"WHITESPACE" + tok.leading_trivia.kind == K"WHITESPACE" end +function TODO(str) + error("TODO: $str") +end # Parse numbers, identifiers, parenthesized expressions, lists, vectors, etc. -function parse_atom(ps::ParseState; checked::Bool=true) +function parse_atom(ps::ParseState; checked::Bool=true)::SyntaxNode tok = require_token(ps) tok_kind = kind(tok) - if tok_kind == TK":" # symbol/expression quote + # TODO: Reorder these to put most likely tokens first + if tok_kind == K":" # symbol/expression quote take_token!(ps) next = peek_token(ps) - if is_closing_token(ps, next) && (kind(next) != TK"KEYWORD" || + if is_closing_token(ps, next) && (kind(next) != K"KEYWORD" || has_whitespace_prefix(next)) - return Symbol(":") # FIXME: CST NODE ??? + return SyntaxNode(tok) elseif has_whitespace_prefix(next) error("whitespace not allowed after \":\" used for quoting") - elseif kind(next) == TK"NEWLINE_WS" + elseif kind(next) == K"NEWLINE_WS" error("newline not allowed after \":\" used for quoting") else # Being inside quote makes `end` non-special again. issue #27690 ps1 = ParseState(ps, end_symbol=false) - return Expr(:quote, parse_atom(ps1, checked=false)) + return SyntaxNode(K"quote", parse_atom(ps1, checked=false)) end - elseif tok_kind == TK"=" # misplaced = + elseif tok_kind == K"=" # misplaced = error("unexpected `=`") - elseif tok_kind == TK"IDENTIFIER" + elseif tok_kind == K"IDENTIFIER" if checked - # FIXME: Check identifier names + TODO("Checked identifier names") end take_token!(ps) + return SyntaxNode(tok) + elseif tok_kind == K"VAR_IDENTIFIER" + take_token!(ps) + return SyntaxNode(tok) + elseif tok_kind == K"(" # parens or tuple + take_token!(ps) + return parse_paren(ps, checked) + elseif tok_kind == K"[" # cat expression + take_token!(ps) + TODO("""parse_cat(ps, K"]", ps.end_symbol)""") + elseif tok_kind == K"{" # cat expression + take_token!(ps) + TODO("""parse_cat(ps, K"}", ps.end_symbol)""") + elseif tok_kind == K"`" + TODO("(macrocall (core @cmd) ...)") + # return Expr(:macrocall, Expr(:core, Symbol("@cmd")), + elseif isliteral(tok_kind) + take_token!(ps) + return SyntaxNode(tok) + elseif is_closing_token(tok) + error("unexpected: $tok") else - return :heloooo + error("invalid syntax: `$tok`") end end +# parse `a@b@c@...` for some @ +# +# `is_separator` - predicate +# `head` the expression head to yield in the result, e.g. "a;b" => (block a b) +# `is_closer` - predicate to identify tokens that stop parsing +# however, this doesn't consume the closing token, just looks at it +function parse_Nary(ps::ParseState, down::Function, is_separator::Function, + result_kind, is_closer::Function) +end + +# flisp: parse-docstring +# Parse statement with possible docstring +function parse_statement_with_doc(ps::ParseState) + parse_eq(ps) + # TODO: Detect docstrings +end + +#------------------------------------------------------------------------------- + +# the principal non-terminals follow, in increasing precedence order + +#function parse_block(ps::ParseState, down=parse_eq) +#end + +# flisp: parse-stmts +# `;` at the top level produces a sequence of top level expressions +function parse_statements(ps::ParseState) + parse_Nary(ps, parse_statement) +end + +# flisp: parse-eq +function parse_eq(ps::ParseState) + parse_assignment(ps, parse_comma) +end + +# flisp: parse-eq* +# parse_eq_2 is used where commas are special, for example in an argument list +# function parse_eq_2 + +function parse_assignment(ps::ParseState, down) + ex = down(ps) + t = peek_token(ps) + if !is_prec_assignment(t) + return ex + end + take_token!(ps) + if kind(t) == K"~" + # ~ is the only non-syntactic assignment-precedence operator + TODO("Turn ~ into a call node") + else + SyntaxNode + end +end + +#------------------------------------------------------------------------------- + +function parse(code) + tokens = JuliaSyntax.TokenStream(code) + parse_statements(tokens) +end + end diff --git a/JuliaSyntax/src/token_kinds.jl b/JuliaSyntax/src/token_kinds.jl index bde19ce82c6d2..2a68fb91212db 100644 --- a/JuliaSyntax/src/token_kinds.jl +++ b/JuliaSyntax/src/token_kinds.jl @@ -1,820 +1,889 @@ + +#= +@enum(SyntaxKind, + Call +) + +# A type to multiplex token kinds from various libraries +struct Kind + id::UInt32 +end + +Kind(k::TzTokens.Kind) = Kind(0x00010000 | UInt32(k)) +Kind(k::SyntaxKind) = Kind(0x00020000 | UInt32(k)) + +_kind_namespace(k::Kind) = k.id >> 16 +_kind_code(k::Kind) = k.id & 0xffff + +function Base.show(io::IO, k::Kind) + ns = _kind_namespace(k) + code = _kind_code(k) + if ns == 1 + # Basic token kinds from Tokenize + print(io, Kind, "(") + show(io, Tokenize.Tokens.Kind(code)) + print(io, ")") + elseif ns == 2 + # Syntax node kinds, defined here + print(io, Kind, "(") + show(io, SyntaxKind(code)) + print(io, ")") + else + print(io, typeof(Kind), "(", k.id, ")") + end +end + +function Base.:(==)(k1::Kind, k2::Kind) + k1.id == k2.id +end +=# + +using Tokenize.Tokens: Kind, isliteral, iskeyword + +kind(k::Kind) = k + +""" + K"s" + +The full kind of a string "s". For example, K")" is the kind of the +right parenthesis token. +""" +macro K_str(str) + name = Symbol(str) + return :(Kinds.$name) +end + + """ A module to give Tokenize tokens better names! """ -baremodule TokenKinds +baremodule Kinds + +import ..JuliaSyntax: JuliaSyntax, Kind import Tokenize -const _T = Tokenize.Tokens - -const ENDMARKER = _T.ENDMARKER -const ERROR = _T.ERROR -const COMMENT = _T.COMMENT -const WHITESPACE = _T.WHITESPACE -const IDENTIFIER = _T.IDENTIFIER -const var"@" = _T.AT_SIGN -const var"," = _T.COMMA -const var";" = _T.SEMICOLON - -const BEGIN_KEYWORDS = _T.begin_keywords -const KEYWORD = _T.KEYWORD -const var"abstract" = _T.ABSTRACT -const var"baremodule" = _T.BAREMODULE -const var"begin" = _T.BEGIN -const var"break" = _T.BREAK -const var"catch" = _T.CATCH -const var"const" = _T.CONST -const var"continue" = _T.CONTINUE -const var"do" = _T.DO -const var"else" = _T.ELSE -const var"elseif" = _T.ELSEIF -const var"end" = _T.END -const var"export" = _T.EXPORT -const var"finally" = _T.FINALLY -const var"for" = _T.FOR -const var"function" = _T.FUNCTION -const var"global" = _T.GLOBAL -const var"if" = _T.IF -const var"import" = _T.IMPORT -const var"importall" = _T.IMPORTALL -const var"let" = _T.LET -const var"local" = _T.LOCAL -const var"macro" = _T.MACRO -const var"module" = _T.MODULE -const var"mutable" = _T.MUTABLE -const var"new" = _T.NEW -const var"outer" = _T.OUTER -const var"primitive" = _T.PRIMITIVE -const var"quote" = _T.QUOTE -const var"return" = _T.RETURN -const var"struct" = _T.STRUCT -const var"try" = _T.TRY -const var"type" = _T.TYPE -const var"using" = _T.USING -const var"while" = _T.WHILE -const END_KEYWORDS = _T.end_keywords - -const BEGIN_CSTPARSER = _T.begin_cstparser -const invisible_brackets = _T.INVISIBLE_BRACKETS -const nothing = _T.NOTHING -const ws = _T.WS -const semicolon_ws = _T.SEMICOLON_WS -const newline_ws = _T.NEWLINE_WS -const empty_ws = _T.EMPTY_WS -const END_CSTPARSER = _T.end_cstparser - -const BEGIN_LITERAL = _T.begin_literal -const LITERAL = _T.LITERAL -const integer = _T.INTEGER -const bin_int = _T.BIN_INT -const hex_int = _T.HEX_INT -const oct_int = _T.OCT_INT -const float = _T.FLOAT -const string = _T.STRING -const triple_string = _T.TRIPLE_STRING -const char = _T.CHAR -const cmd = _T.CMD -const triple_cmd = _T.TRIPLE_CMD -const var"true" = _T.TRUE -const var"false" = _T.FALSE -const END_LITERAL = _T.end_literal - -const BEGIN_DELIMITERS = _T.begin_delimiters -const var"[" = _T.LSQUARE -const var"]" = _T.RSQUARE -const var"{" = _T.LBRACE -const var"}" = _T.RBRACE -const var"(" = _T.LPAREN -const var")" = _T.RPAREN -const END_DELIMITERS = _T.end_delimiters - -const BEGIN_OPS = _T.begin_ops -const OP = _T.OP -const var"..." = _T.DDDOT + +macro _K(sym) + :(Tokenize.Tokens.$sym) +# :(Kind(Tokenize.Tokens.$sym)) +end + +const ENDMARKER = @_K ENDMARKER +const ERROR = @_K ERROR +const COMMENT = @_K COMMENT +const WHITESPACE = @_K WHITESPACE +const IDENTIFIER = @_K IDENTIFIER +const var"@" = @_K AT_SIGN +const var"," = @_K COMMA +const var";" = @_K SEMICOLON + +const BEGIN_KEYWORDS = @_K begin_keywords +const KEYWORD = @_K KEYWORD +const var"abstract" = @_K ABSTRACT +const var"baremodule" = @_K BAREMODULE +const var"begin" = @_K BEGIN +const var"break" = @_K BREAK +const var"catch" = @_K CATCH +const var"const" = @_K CONST +const var"continue" = @_K CONTINUE +const var"do" = @_K DO +const var"else" = @_K ELSE +const var"elseif" = @_K ELSEIF +const var"end" = @_K END +const var"export" = @_K EXPORT +const var"finally" = @_K FINALLY +const var"for" = @_K FOR +const var"function" = @_K FUNCTION +const var"global" = @_K GLOBAL +const var"if" = @_K IF +const var"import" = @_K IMPORT +const var"importall" = @_K IMPORTALL +const var"let" = @_K LET +const var"local" = @_K LOCAL +const var"macro" = @_K MACRO +const var"module" = @_K MODULE +const var"mutable" = @_K MUTABLE +const var"new" = @_K NEW +const var"outer" = @_K OUTER +const var"primitive" = @_K PRIMITIVE +const var"quote" = @_K QUOTE +const var"return" = @_K RETURN +const var"struct" = @_K STRUCT +const var"try" = @_K TRY +const var"type" = @_K TYPE +const var"using" = @_K USING +const var"while" = @_K WHILE +const END_KEYWORDS = @_K end_keywords + +const BEGIN_CSTPARSER = @_K begin_cstparser +const INVISIBLE_BRACKETS = @_K INVISIBLE_BRACKETS +const NOTHING = @_K NOTHING +const WS = @_K WS +const SEMICOLON_WS = @_K SEMICOLON_WS +const NEWLINE_WS = @_K NEWLINE_WS +const EMPTY_WS = @_K EMPTY_WS +const END_CSTPARSER = @_K end_cstparser + +const BEGIN_LITERAL = @_K begin_literal +const LITERAL = @_K LITERAL +const integer = @_K INTEGER +const bin_int = @_K BIN_INT +const hex_int = @_K HEX_INT +const oct_int = @_K OCT_INT +const float = @_K FLOAT +const string = @_K STRING +const triple_string = @_K TRIPLE_STRING +const char = @_K CHAR +const cmd = @_K CMD +const triple_cmd = @_K TRIPLE_CMD +const var"true" = @_K TRUE +const var"false" = @_K FALSE +const END_LITERAL = @_K end_literal + +const BEGIN_DELIMITERS = @_K begin_delimiters +const var"[" = @_K LSQUARE +const var"]" = @_K RSQUARE +const var"{" = @_K LBRACE +const var"}" = @_K RBRACE +const var"(" = @_K LPAREN +const var")" = @_K RPAREN +const END_DELIMITERS = @_K end_delimiters + +const BEGIN_OPS = @_K begin_ops +const OP = @_K OP +const var"..." = @_K DDDOT # Level 1 -const BEGIN_ASSIGNMENTS = _T.begin_assignments -const var"=" = _T.EQ -const var"+=" = _T.PLUS_EQ -const var"-=" = _T.MINUS_EQ -const var"*=" = _T.STAR_EQ -const var"/=" = _T.FWD_SLASH_EQ -const var"//=" = _T.FWDFWD_SLASH_EQ -const var"|=" = _T.OR_EQ -const var"^=" = _T.CIRCUMFLEX_EQ -const var"÷=" = _T.DIVISION_EQ -const var"%=" = _T.REM_EQ -const var"<<=" = _T.LBITSHIFT_EQ -const var">>=" = _T.RBITSHIFT_EQ -const var">>>=" = _T.UNSIGNED_BITSHIFT_EQ -const var"\=" = _T.BACKSLASH_EQ -const var"&=" = _T.AND_EQ -const var":=" = _T.COLON_EQ -const var"~" = _T.APPROX -const var"$=" = _T.EX_OR_EQ -const var"⊻=" = _T.XOR_EQ -const END_ASSIGNMENTS = _T.end_assignments - -const BEGIN_PAIRARROW = _T.begin_pairarrow -const var"=>" = _T.PAIR_ARROW -const END_PAIRARROW = _T.end_pairarrow +const BEGIN_ASSIGNMENTS = @_K begin_assignments +const var"=" = @_K EQ +const var"+=" = @_K PLUS_EQ +const var"-=" = @_K MINUS_EQ +const var"*=" = @_K STAR_EQ +const var"/=" = @_K FWD_SLASH_EQ +const var"//=" = @_K FWDFWD_SLASH_EQ +const var"|=" = @_K OR_EQ +const var"^=" = @_K CIRCUMFLEX_EQ +const var"÷=" = @_K DIVISION_EQ +const var"%=" = @_K REM_EQ +const var"<<=" = @_K LBITSHIFT_EQ +const var">>=" = @_K RBITSHIFT_EQ +const var">>>=" = @_K UNSIGNED_BITSHIFT_EQ +const var"\=" = @_K BACKSLASH_EQ +const var"&=" = @_K AND_EQ +const var":=" = @_K COLON_EQ +const var"~" = @_K APPROX +const var"$=" = @_K EX_OR_EQ +const var"⊻=" = @_K XOR_EQ +const END_ASSIGNMENTS = @_K end_assignments + +const BEGIN_PAIRARROW = @_K begin_pairarrow +const var"=>" = @_K PAIR_ARROW +const END_PAIRARROW = @_K end_pairarrow # Level 2 -const BEGIN_CONDITIONAL = _T.begin_conditional -const var"?" = _T.CONDITIONAL -const END_CONDITIONAL = _T.end_conditional +const BEGIN_CONDITIONAL = @_K begin_conditional +const var"?" = @_K CONDITIONAL +const END_CONDITIONAL = @_K end_conditional # Level 3 -const BEGIN_ARROW = _T.begin_arrow -const var"-->" = _T.RIGHT_ARROW -const var"<--" = _T.LEFT_ARROW -const var"<-->" = _T.DOUBLE_ARROW -const var"←" = _T.LEFTWARDS_ARROW -const var"→" = _T.RIGHTWARDS_ARROW -const var"↔" = _T.LEFT_RIGHT_ARROW -const var"↚" = _T.LEFTWARDS_ARROW_WITH_STROKE -const var"↛" = _T.RIGHTWARDS_ARROW_WITH_STROKE -const var"↞" = _T.LEFTWARDS_TWO_HEADED_ARROW -const var"↠" = _T.RIGHTWARDS_TWO_HEADED_ARROW -const var"↢" = _T.LEFTWARDS_ARROW_WITH_TAIL -const var"↣" = _T.RIGHTWARDS_ARROW_WITH_TAIL -const var"↤" = _T.LEFTWARDS_ARROW_FROM_BAR -const var"↦" = _T.RIGHTWARDS_ARROW_FROM_BAR -const var"↮" = _T.LEFT_RIGHT_ARROW_WITH_STROKE -const var"⇎" = _T.LEFT_RIGHT_DOUBLE_ARROW_WITH_STROKE -const var"⇍" = _T.LEFTWARDS_DOUBLE_ARROW_WITH_STROKE -const var"⇏" = _T.RIGHTWARDS_DOUBLE_ARROW_WITH_STROKE -const var"⇐" = _T.LEFTWARDS_DOUBLE_ARROW -const var"⇒" = _T.RIGHTWARDS_DOUBLE_ARROW -const var"⇔" = _T.LEFT_RIGHT_DOUBLE_ARROW -const var"⇴" = _T.RIGHT_ARROW_WITH_SMALL_CIRCLE -const var"⇶" = _T.THREE_RIGHTWARDS_ARROWS -const var"⇷" = _T.LEFTWARDS_ARROW_WITH_VERTICAL_STROKE -const var"⇸" = _T.RIGHTWARDS_ARROW_WITH_VERTICAL_STROKE -const var"⇹" = _T.LEFT_RIGHT_ARROW_WITH_VERTICAL_STROKE -const var"⇺" = _T.LEFTWARDS_ARROW_WITH_DOUBLE_VERTICAL_STROKE -const var"⇻" = _T.RIGHTWARDS_ARROW_WITH_DOUBLE_VERTICAL_STROKE -const var"⇼" = _T.LEFT_RIGHT_ARROW_WITH_DOUBLE_VERTICAL_STROKE -const var"⇽" = _T.LEFTWARDS_OPEN_HEADED_ARROW -const var"⇾" = _T.RIGHTWARDS_OPEN_HEADED_ARROW -const var"⇿" = _T.LEFT_RIGHT_OPEN_HEADED_ARROW -const var"⟵" = _T.LONG_LEFTWARDS_ARROW -const var"⟶" = _T.LONG_RIGHTWARDS_ARROW -const var"⟷" = _T.LONG_LEFT_RIGHT_ARROW -const var"⟹" = _T.LONG_RIGHTWARDS_DOUBLE_ARROW -const var"⟺" = _T.LONG_LEFT_RIGHT_DOUBLE_ARROW -const var"⟻" = _T.LONG_LEFTWARDS_ARROW_FROM_BAR -const var"⟼" = _T.LONG_RIGHTWARDS_ARROW_FROM_BAR -const var"⟽" = _T.LONG_LEFTWARDS_DOUBLE_ARROW_FROM_BAR -const var"⟾" = _T.LONG_RIGHTWARDS_DOUBLE_ARROW_FROM_BAR -const var"⟿" = _T.LONG_RIGHTWARDS_SQUIGGLE_ARROW -const var"⤀" = _T.RIGHTWARDS_TWO_HEADED_ARROW_WITH_VERTICAL_STROKE -const var"⤁" = _T.RIGHTWARDS_TWO_HEADED_ARROW_WITH_DOUBLE_VERTICAL_STROKE -const var"⤂" = _T.LEFTWARDS_DOUBLE_ARROW_WITH_VERTICAL_STROKE -const var"⤃" = _T.RIGHTWARDS_DOUBLE_ARROW_WITH_VERTICAL_STROKE -const var"⤄" = _T.LEFT_RIGHT_DOUBLE_ARROW_WITH_VERTICAL_STROKE -const var"⤅" = _T.RIGHTWARDS_TWO_HEADED_ARROW_FROM_BAR -const var"⤆" = _T.LEFTWARDS_DOUBLE_ARROW_FROM_BAR -const var"⤇" = _T.RIGHTWARDS_DOUBLE_ARROW_FROM_BAR -const var"⤌" = _T.LEFTWARDS_DOUBLE_DASH_ARROW -const var"⤍" = _T.RIGHTWARDS_DOUBLE_DASH_ARROW -const var"⤎" = _T.LEFTWARDS_TRIPLE_DASH_ARROW -const var"⤏" = _T.RIGHTWARDS_TRIPLE_DASH_ARROW -const var"⤐" = _T.RIGHTWARDS_TWO_HEADED_TRIPLE_DASH_ARROW -const var"⤑" = _T.RIGHTWARDS_ARROW_WITH_DOTTED_STEM -const var"⤔" = _T.RIGHTWARDS_ARROW_WITH_TAIL_WITH_VERTICAL_STROKE -const var"⤕" = _T.RIGHTWARDS_ARROW_WITH_TAIL_WITH_DOUBLE_VERTICAL_STROKE -const var"⤖" = _T.RIGHTWARDS_TWO_HEADED_ARROW_WITH_TAIL -const var"⤗" = _T.RIGHTWARDS_TWO_HEADED_ARROW_WITH_TAIL_WITH_VERTICAL_STROKE -const var"⤘" = _T.RIGHTWARDS_TWO_HEADED_ARROW_WITH_TAIL_WITH_DOUBLE_VERTICAL_STROKE -const var"⤝" = _T.LEFTWARDS_ARROW_TO_BLACK_DIAMOND -const var"⤞" = _T.RIGHTWARDS_ARROW_TO_BLACK_DIAMOND -const var"⤟" = _T.LEFTWARDS_ARROW_FROM_BAR_TO_BLACK_DIAMOND -const var"⤠" = _T.RIGHTWARDS_ARROW_FROM_BAR_TO_BLACK_DIAMOND -const var"⥄" = _T.SHORT_RIGHTWARDS_ARROW_ABOVE_LEFTWARDS_ARROW -const var"⥅" = _T.RIGHTWARDS_ARROW_WITH_PLUS_BELOW -const var"⥆" = _T.LEFTWARDS_ARROW_WITH_PLUS_BELOW -const var"⥇" = _T.RIGHTWARDS_ARROW_THROUGH_X -const var"⥈" = _T.LEFT_RIGHT_ARROW_THROUGH_SMALL_CIRCLE -const var"⥊" = _T.LEFT_BARB_UP_RIGHT_BARB_DOWN_HARPOON -const var"⥋" = _T.LEFT_BARB_DOWN_RIGHT_BARB_UP_HARPOON -const var"⥎" = _T.LEFT_BARB_UP_RIGHT_BARB_UP_HARPOON -const var"⥐" = _T.LEFT_BARB_DOWN_RIGHT_BARB_DOWN_HARPOON -const var"⥒" = _T.LEFTWARDS_HARPOON_WITH_BARB_UP_TO_BAR -const var"⥓" = _T.RIGHTWARDS_HARPOON_WITH_BARB_UP_TO_BAR -const var"⥖" = _T.LEFTWARDS_HARPOON_WITH_BARB_DOWN_TO_BAR -const var"⥗" = _T.RIGHTWARDS_HARPOON_WITH_BARB_DOWN_TO_BAR -const var"⥚" = _T.LEFTWARDS_HARPOON_WITH_BARB_UP_FROM_BAR -const var"⥛" = _T.RIGHTWARDS_HARPOON_WITH_BARB_UP_FROM_BAR -const var"⥞" = _T.LEFTWARDS_HARPOON_WITH_BARB_DOWN_FROM_BAR -const var"⥟" = _T.RIGHTWARDS_HARPOON_WITH_BARB_DOWN_FROM_BAR -const var"⥢" = _T.LEFTWARDS_HARPOON_WITH_BARB_UP_ABOVE_LEFTWARDS_HARPOON_WITH_BARB_DOWN -const var"⥤" = _T.RIGHTWARDS_HARPOON_WITH_BARB_UP_ABOVE_RIGHTWARDS_HARPOON_WITH_BARB_DOWN -const var"⥦" = _T.LEFTWARDS_HARPOON_WITH_BARB_UP_ABOVE_RIGHTWARDS_HARPOON_WITH_BARB_UP -const var"⥧" = _T.LEFTWARDS_HARPOON_WITH_BARB_DOWN_ABOVE_RIGHTWARDS_HARPOON_WITH_BARB_DOWN -const var"⥨" = _T.RIGHTWARDS_HARPOON_WITH_BARB_UP_ABOVE_LEFTWARDS_HARPOON_WITH_BARB_UP -const var"⥩" = _T.RIGHTWARDS_HARPOON_WITH_BARB_DOWN_ABOVE_LEFTWARDS_HARPOON_WITH_BARB_DOWN -const var"⥪" = _T.LEFTWARDS_HARPOON_WITH_BARB_UP_ABOVE_LONG_DASH -const var"⥫" = _T.LEFTWARDS_HARPOON_WITH_BARB_DOWN_BELOW_LONG_DASH -const var"⥬" = _T.RIGHTWARDS_HARPOON_WITH_BARB_UP_ABOVE_LONG_DASH -const var"⥭" = _T.RIGHTWARDS_HARPOON_WITH_BARB_DOWN_BELOW_LONG_DASH -const var"⥰" = _T.RIGHT_DOUBLE_ARROW_WITH_ROUNDED_HEAD -const var"⧴" = _T.RULE_DELAYED -const var"⬱" = _T.THREE_LEFTWARDS_ARROWS -const var"⬰" = _T.LEFT_ARROW_WITH_SMALL_CIRCLE -const var"⬲" = _T.LEFT_ARROW_WITH_CIRCLED_PLUS -const var"⬳" = _T.LONG_LEFTWARDS_SQUIGGLE_ARROW -const var"⬴" = _T.LEFTWARDS_TWO_HEADED_ARROW_WITH_VERTICAL_STROKE -const var"⬵" = _T.LEFTWARDS_TWO_HEADED_ARROW_WITH_DOUBLE_VERTICAL_STROKE -const var"⬶" = _T.LEFTWARDS_TWO_HEADED_ARROW_FROM_BAR -const var"⬷" = _T.LEFTWARDS_TWO_HEADED_TRIPLE_DASH_ARROW -const var"⬸" = _T.LEFTWARDS_ARROW_WITH_DOTTED_STEM -const var"⬹" = _T.LEFTWARDS_ARROW_WITH_TAIL_WITH_VERTICAL_STROKE -const var"⬺" = _T.LEFTWARDS_ARROW_WITH_TAIL_WITH_DOUBLE_VERTICAL_STROKE -const var"⬻" = _T.LEFTWARDS_TWO_HEADED_ARROW_WITH_TAIL -const var"⬼" = _T.LEFTWARDS_TWO_HEADED_ARROW_WITH_TAIL_WITH_VERTICAL_STROKE -const var"⬽" = _T.LEFTWARDS_TWO_HEADED_ARROW_WITH_TAIL_WITH_DOUBLE_VERTICAL_STROKE -const var"⬾" = _T.LEFTWARDS_ARROW_THROUGH_X -const var"⬿" = _T.WAVE_ARROW_POINTING_DIRECTLY_LEFT -const var"⭀" = _T.EQUALS_SIGN_ABOVE_LEFTWARDS_ARROW -const var"⭁" = _T.REVERSE_TILDE_OPERATOR_ABOVE_LEFTWARDS_ARROW -const var"⭂" = _T.LEFTWARDS_ARROW_ABOVE_REVERSE_ALMOST_EQUAL_TO -const var"⭃" = _T.RIGHTWARDS_ARROW_THROUGH_GREATER_THAN -const var"⭄" = _T.RIGHTWARDS_ARROW_THROUGH_SUPERSET -const var"⭇" = _T.REVERSE_TILDE_OPERATOR_ABOVE_RIGHTWARDS_ARROW -const var"⭈" = _T.RIGHTWARDS_ARROW_ABOVE_REVERSE_ALMOST_EQUAL_TO -const var"⭉" = _T.TILDE_OPERATOR_ABOVE_LEFTWARDS_ARROW -const var"⭊" = _T.LEFTWARDS_ARROW_ABOVE_ALMOST_EQUAL_TO -const var"⭋" = _T.LEFTWARDS_ARROW_ABOVE_REVERSE_TILDE_OPERATOR -const var"⭌" = _T.RIGHTWARDS_ARROW_ABOVE_REVERSE_TILDE_OPERATOR -const var"←" = _T.HALFWIDTH_LEFTWARDS_ARROW -const var"→" = _T.HALFWIDTH_RIGHTWARDS_ARROW -const var"↻" = _T.CIRCLE_ARROW_RIGHT -const var"⇜" = _T.LEFT_SQUIGGLE_ARROW -const var"⇝" = _T.RIGHT_SQUIGGLE_ARROW -const var"↜" = _T.LEFT_WAVE_ARROW -const var"↝" = _T.RIGHT_WAVE_ARROW -const var"↩" = _T.LEFTWARDS_ARROW_WITH_HOOK -const var"↪" = _T.RIGHTWARDS_ARROW_WITH_HOOK -const var"↫" = _T.LOOP_ARROW_LEFT -const var"↬" = _T.LOOP_ARROW_RIGHT -const var"↼" = _T.LEFT_HARPOON_UP -const var"↽" = _T.LEFT_HARPOON_DOWN -const var"⇀" = _T.RIGHT_HARPOON_UP -const var"⇁" = _T.RIGHT_HARPOON_DOWN -const var"⇄" = _T.RIGHT_LEFT_ARROWS -const var"⇆" = _T.LEFT_RIGHT_ARROWS -const var"⇇" = _T.LEFT_LEFT_ARROWS -const var"⇉" = _T.RIGHT_RIGHT_ARROWS -const var"⇋" = _T.LEFT_RIGHT_HARPOONS -const var"⇌" = _T.RIGHT_LEFT_HARPOONS -const var"⇚" = _T.L_LEFT_ARROW -const var"⇛" = _T.R_RIGHT_ARROW -const var"⇠" = _T.LEFT_DASH_ARROW -const var"⇢" = _T.RIGHT_DASH_ARROW -const var"↷" = _T.CURVE_ARROW_RIGHT -const var"↶" = _T.CURVE_ARROW_LEFT -const var"↺" = _T.CIRCLE_ARROW_LEFT -const END_ARROW = _T.end_arrow +const BEGIN_ARROW = @_K begin_arrow +const var"-->" = @_K RIGHT_ARROW +const var"<--" = @_K LEFT_ARROW +const var"<-->" = @_K DOUBLE_ARROW +const var"←" = @_K LEFTWARDS_ARROW +const var"→" = @_K RIGHTWARDS_ARROW +const var"↔" = @_K LEFT_RIGHT_ARROW +const var"↚" = @_K LEFTWARDS_ARROW_WITH_STROKE +const var"↛" = @_K RIGHTWARDS_ARROW_WITH_STROKE +const var"↞" = @_K LEFTWARDS_TWO_HEADED_ARROW +const var"↠" = @_K RIGHTWARDS_TWO_HEADED_ARROW +const var"↢" = @_K LEFTWARDS_ARROW_WITH_TAIL +const var"↣" = @_K RIGHTWARDS_ARROW_WITH_TAIL +const var"↤" = @_K LEFTWARDS_ARROW_FROM_BAR +const var"↦" = @_K RIGHTWARDS_ARROW_FROM_BAR +const var"↮" = @_K LEFT_RIGHT_ARROW_WITH_STROKE +const var"⇎" = @_K LEFT_RIGHT_DOUBLE_ARROW_WITH_STROKE +const var"⇍" = @_K LEFTWARDS_DOUBLE_ARROW_WITH_STROKE +const var"⇏" = @_K RIGHTWARDS_DOUBLE_ARROW_WITH_STROKE +const var"⇐" = @_K LEFTWARDS_DOUBLE_ARROW +const var"⇒" = @_K RIGHTWARDS_DOUBLE_ARROW +const var"⇔" = @_K LEFT_RIGHT_DOUBLE_ARROW +const var"⇴" = @_K RIGHT_ARROW_WITH_SMALL_CIRCLE +const var"⇶" = @_K THREE_RIGHTWARDS_ARROWS +const var"⇷" = @_K LEFTWARDS_ARROW_WITH_VERTICAL_STROKE +const var"⇸" = @_K RIGHTWARDS_ARROW_WITH_VERTICAL_STROKE +const var"⇹" = @_K LEFT_RIGHT_ARROW_WITH_VERTICAL_STROKE +const var"⇺" = @_K LEFTWARDS_ARROW_WITH_DOUBLE_VERTICAL_STROKE +const var"⇻" = @_K RIGHTWARDS_ARROW_WITH_DOUBLE_VERTICAL_STROKE +const var"⇼" = @_K LEFT_RIGHT_ARROW_WITH_DOUBLE_VERTICAL_STROKE +const var"⇽" = @_K LEFTWARDS_OPEN_HEADED_ARROW +const var"⇾" = @_K RIGHTWARDS_OPEN_HEADED_ARROW +const var"⇿" = @_K LEFT_RIGHT_OPEN_HEADED_ARROW +const var"⟵" = @_K LONG_LEFTWARDS_ARROW +const var"⟶" = @_K LONG_RIGHTWARDS_ARROW +const var"⟷" = @_K LONG_LEFT_RIGHT_ARROW +const var"⟹" = @_K LONG_RIGHTWARDS_DOUBLE_ARROW +const var"⟺" = @_K LONG_LEFT_RIGHT_DOUBLE_ARROW +const var"⟻" = @_K LONG_LEFTWARDS_ARROW_FROM_BAR +const var"⟼" = @_K LONG_RIGHTWARDS_ARROW_FROM_BAR +const var"⟽" = @_K LONG_LEFTWARDS_DOUBLE_ARROW_FROM_BAR +const var"⟾" = @_K LONG_RIGHTWARDS_DOUBLE_ARROW_FROM_BAR +const var"⟿" = @_K LONG_RIGHTWARDS_SQUIGGLE_ARROW +const var"⤀" = @_K RIGHTWARDS_TWO_HEADED_ARROW_WITH_VERTICAL_STROKE +const var"⤁" = @_K RIGHTWARDS_TWO_HEADED_ARROW_WITH_DOUBLE_VERTICAL_STROKE +const var"⤂" = @_K LEFTWARDS_DOUBLE_ARROW_WITH_VERTICAL_STROKE +const var"⤃" = @_K RIGHTWARDS_DOUBLE_ARROW_WITH_VERTICAL_STROKE +const var"⤄" = @_K LEFT_RIGHT_DOUBLE_ARROW_WITH_VERTICAL_STROKE +const var"⤅" = @_K RIGHTWARDS_TWO_HEADED_ARROW_FROM_BAR +const var"⤆" = @_K LEFTWARDS_DOUBLE_ARROW_FROM_BAR +const var"⤇" = @_K RIGHTWARDS_DOUBLE_ARROW_FROM_BAR +const var"⤌" = @_K LEFTWARDS_DOUBLE_DASH_ARROW +const var"⤍" = @_K RIGHTWARDS_DOUBLE_DASH_ARROW +const var"⤎" = @_K LEFTWARDS_TRIPLE_DASH_ARROW +const var"⤏" = @_K RIGHTWARDS_TRIPLE_DASH_ARROW +const var"⤐" = @_K RIGHTWARDS_TWO_HEADED_TRIPLE_DASH_ARROW +const var"⤑" = @_K RIGHTWARDS_ARROW_WITH_DOTTED_STEM +const var"⤔" = @_K RIGHTWARDS_ARROW_WITH_TAIL_WITH_VERTICAL_STROKE +const var"⤕" = @_K RIGHTWARDS_ARROW_WITH_TAIL_WITH_DOUBLE_VERTICAL_STROKE +const var"⤖" = @_K RIGHTWARDS_TWO_HEADED_ARROW_WITH_TAIL +const var"⤗" = @_K RIGHTWARDS_TWO_HEADED_ARROW_WITH_TAIL_WITH_VERTICAL_STROKE +const var"⤘" = @_K RIGHTWARDS_TWO_HEADED_ARROW_WITH_TAIL_WITH_DOUBLE_VERTICAL_STROKE +const var"⤝" = @_K LEFTWARDS_ARROW_TO_BLACK_DIAMOND +const var"⤞" = @_K RIGHTWARDS_ARROW_TO_BLACK_DIAMOND +const var"⤟" = @_K LEFTWARDS_ARROW_FROM_BAR_TO_BLACK_DIAMOND +const var"⤠" = @_K RIGHTWARDS_ARROW_FROM_BAR_TO_BLACK_DIAMOND +const var"⥄" = @_K SHORT_RIGHTWARDS_ARROW_ABOVE_LEFTWARDS_ARROW +const var"⥅" = @_K RIGHTWARDS_ARROW_WITH_PLUS_BELOW +const var"⥆" = @_K LEFTWARDS_ARROW_WITH_PLUS_BELOW +const var"⥇" = @_K RIGHTWARDS_ARROW_THROUGH_X +const var"⥈" = @_K LEFT_RIGHT_ARROW_THROUGH_SMALL_CIRCLE +const var"⥊" = @_K LEFT_BARB_UP_RIGHT_BARB_DOWN_HARPOON +const var"⥋" = @_K LEFT_BARB_DOWN_RIGHT_BARB_UP_HARPOON +const var"⥎" = @_K LEFT_BARB_UP_RIGHT_BARB_UP_HARPOON +const var"⥐" = @_K LEFT_BARB_DOWN_RIGHT_BARB_DOWN_HARPOON +const var"⥒" = @_K LEFTWARDS_HARPOON_WITH_BARB_UP_TO_BAR +const var"⥓" = @_K RIGHTWARDS_HARPOON_WITH_BARB_UP_TO_BAR +const var"⥖" = @_K LEFTWARDS_HARPOON_WITH_BARB_DOWN_TO_BAR +const var"⥗" = @_K RIGHTWARDS_HARPOON_WITH_BARB_DOWN_TO_BAR +const var"⥚" = @_K LEFTWARDS_HARPOON_WITH_BARB_UP_FROM_BAR +const var"⥛" = @_K RIGHTWARDS_HARPOON_WITH_BARB_UP_FROM_BAR +const var"⥞" = @_K LEFTWARDS_HARPOON_WITH_BARB_DOWN_FROM_BAR +const var"⥟" = @_K RIGHTWARDS_HARPOON_WITH_BARB_DOWN_FROM_BAR +const var"⥢" = @_K LEFTWARDS_HARPOON_WITH_BARB_UP_ABOVE_LEFTWARDS_HARPOON_WITH_BARB_DOWN +const var"⥤" = @_K RIGHTWARDS_HARPOON_WITH_BARB_UP_ABOVE_RIGHTWARDS_HARPOON_WITH_BARB_DOWN +const var"⥦" = @_K LEFTWARDS_HARPOON_WITH_BARB_UP_ABOVE_RIGHTWARDS_HARPOON_WITH_BARB_UP +const var"⥧" = @_K LEFTWARDS_HARPOON_WITH_BARB_DOWN_ABOVE_RIGHTWARDS_HARPOON_WITH_BARB_DOWN +const var"⥨" = @_K RIGHTWARDS_HARPOON_WITH_BARB_UP_ABOVE_LEFTWARDS_HARPOON_WITH_BARB_UP +const var"⥩" = @_K RIGHTWARDS_HARPOON_WITH_BARB_DOWN_ABOVE_LEFTWARDS_HARPOON_WITH_BARB_DOWN +const var"⥪" = @_K LEFTWARDS_HARPOON_WITH_BARB_UP_ABOVE_LONG_DASH +const var"⥫" = @_K LEFTWARDS_HARPOON_WITH_BARB_DOWN_BELOW_LONG_DASH +const var"⥬" = @_K RIGHTWARDS_HARPOON_WITH_BARB_UP_ABOVE_LONG_DASH +const var"⥭" = @_K RIGHTWARDS_HARPOON_WITH_BARB_DOWN_BELOW_LONG_DASH +const var"⥰" = @_K RIGHT_DOUBLE_ARROW_WITH_ROUNDED_HEAD +const var"⧴" = @_K RULE_DELAYED +const var"⬱" = @_K THREE_LEFTWARDS_ARROWS +const var"⬰" = @_K LEFT_ARROW_WITH_SMALL_CIRCLE +const var"⬲" = @_K LEFT_ARROW_WITH_CIRCLED_PLUS +const var"⬳" = @_K LONG_LEFTWARDS_SQUIGGLE_ARROW +const var"⬴" = @_K LEFTWARDS_TWO_HEADED_ARROW_WITH_VERTICAL_STROKE +const var"⬵" = @_K LEFTWARDS_TWO_HEADED_ARROW_WITH_DOUBLE_VERTICAL_STROKE +const var"⬶" = @_K LEFTWARDS_TWO_HEADED_ARROW_FROM_BAR +const var"⬷" = @_K LEFTWARDS_TWO_HEADED_TRIPLE_DASH_ARROW +const var"⬸" = @_K LEFTWARDS_ARROW_WITH_DOTTED_STEM +const var"⬹" = @_K LEFTWARDS_ARROW_WITH_TAIL_WITH_VERTICAL_STROKE +const var"⬺" = @_K LEFTWARDS_ARROW_WITH_TAIL_WITH_DOUBLE_VERTICAL_STROKE +const var"⬻" = @_K LEFTWARDS_TWO_HEADED_ARROW_WITH_TAIL +const var"⬼" = @_K LEFTWARDS_TWO_HEADED_ARROW_WITH_TAIL_WITH_VERTICAL_STROKE +const var"⬽" = @_K LEFTWARDS_TWO_HEADED_ARROW_WITH_TAIL_WITH_DOUBLE_VERTICAL_STROKE +const var"⬾" = @_K LEFTWARDS_ARROW_THROUGH_X +const var"⬿" = @_K WAVE_ARROW_POINTING_DIRECTLY_LEFT +const var"⭀" = @_K EQUALS_SIGN_ABOVE_LEFTWARDS_ARROW +const var"⭁" = @_K REVERSE_TILDE_OPERATOR_ABOVE_LEFTWARDS_ARROW +const var"⭂" = @_K LEFTWARDS_ARROW_ABOVE_REVERSE_ALMOST_EQUAL_TO +const var"⭃" = @_K RIGHTWARDS_ARROW_THROUGH_GREATER_THAN +const var"⭄" = @_K RIGHTWARDS_ARROW_THROUGH_SUPERSET +const var"⭇" = @_K REVERSE_TILDE_OPERATOR_ABOVE_RIGHTWARDS_ARROW +const var"⭈" = @_K RIGHTWARDS_ARROW_ABOVE_REVERSE_ALMOST_EQUAL_TO +const var"⭉" = @_K TILDE_OPERATOR_ABOVE_LEFTWARDS_ARROW +const var"⭊" = @_K LEFTWARDS_ARROW_ABOVE_ALMOST_EQUAL_TO +const var"⭋" = @_K LEFTWARDS_ARROW_ABOVE_REVERSE_TILDE_OPERATOR +const var"⭌" = @_K RIGHTWARDS_ARROW_ABOVE_REVERSE_TILDE_OPERATOR +const var"←" = @_K HALFWIDTH_LEFTWARDS_ARROW +const var"→" = @_K HALFWIDTH_RIGHTWARDS_ARROW +const var"↻" = @_K CIRCLE_ARROW_RIGHT +const var"⇜" = @_K LEFT_SQUIGGLE_ARROW +const var"⇝" = @_K RIGHT_SQUIGGLE_ARROW +const var"↜" = @_K LEFT_WAVE_ARROW +const var"↝" = @_K RIGHT_WAVE_ARROW +const var"↩" = @_K LEFTWARDS_ARROW_WITH_HOOK +const var"↪" = @_K RIGHTWARDS_ARROW_WITH_HOOK +const var"↫" = @_K LOOP_ARROW_LEFT +const var"↬" = @_K LOOP_ARROW_RIGHT +const var"↼" = @_K LEFT_HARPOON_UP +const var"↽" = @_K LEFT_HARPOON_DOWN +const var"⇀" = @_K RIGHT_HARPOON_UP +const var"⇁" = @_K RIGHT_HARPOON_DOWN +const var"⇄" = @_K RIGHT_LEFT_ARROWS +const var"⇆" = @_K LEFT_RIGHT_ARROWS +const var"⇇" = @_K LEFT_LEFT_ARROWS +const var"⇉" = @_K RIGHT_RIGHT_ARROWS +const var"⇋" = @_K LEFT_RIGHT_HARPOONS +const var"⇌" = @_K RIGHT_LEFT_HARPOONS +const var"⇚" = @_K L_LEFT_ARROW +const var"⇛" = @_K R_RIGHT_ARROW +const var"⇠" = @_K LEFT_DASH_ARROW +const var"⇢" = @_K RIGHT_DASH_ARROW +const var"↷" = @_K CURVE_ARROW_RIGHT +const var"↶" = @_K CURVE_ARROW_LEFT +const var"↺" = @_K CIRCLE_ARROW_LEFT +const END_ARROW = @_K end_arrow # Level 4 -const BEGIN_LAZYOR = _T.begin_lazyor -const var"||" = _T.LAZY_OR -const END_LAZYOR = _T.end_lazyor +const BEGIN_LAZYOR = @_K begin_lazyor +const var"||" = @_K LAZY_OR +const END_LAZYOR = @_K end_lazyor # Level 5 -const BEGIN_LAZYAND = _T.begin_lazyand -const var"&&" = _T.LAZY_AND -const END_LAZYAND = _T.end_lazyand +const BEGIN_LAZYAND = @_K begin_lazyand +const var"&&" = @_K LAZY_AND +const END_LAZYAND = @_K end_lazyand # Level 6 -const BEGIN_COMPARISON = _T.begin_comparison -const var"<:" = _T.ISSUBTYPE -const var">:" = _T.ISSUPERTYPE -const var">" = _T.GREATER -const var"<" = _T.LESS -const var">=" = _T.GREATER_EQ -const var"≥" = _T.GREATER_THAN_OR_EQUAL_TO -const var"<=" = _T.LESS_EQ -const var"≤" = _T.LESS_THAN_OR_EQUAL_TO -const var"==" = _T.EQEQ -const var"===" = _T.EQEQEQ -const var"≡" = _T.IDENTICAL_TO -const var"!=" = _T.NOT_EQ -const var"≠" = _T.NOT_EQUAL_TO -const var"!==" = _T.NOT_IS -const var"≢" = _T.NOT_IDENTICAL_TO -const var"∈" = _T.ELEMENT_OF -const var"in" = _T.IN -const var"isa" = _T.ISA -const var"∉" = _T.NOT_AN_ELEMENT_OF -const var"∋" = _T.CONTAINS_AS_MEMBER -const var"∌" = _T.DOES_NOT_CONTAIN_AS_MEMBER -const var"⊆" = _T.SUBSET_OF_OR_EQUAL_TO -const var"⊈" = _T.NEITHER_A_SUBSET_OF_NOR_EQUAL_TO -const var"⊂" = _T.SUBSET_OF -const var"⊄" = _T.NOT_A_SUBSET_OF -const var"⊊" = _T.SUBSET_OF_WITH_NOT_EQUAL_TO -const var"∝" = _T.PROPORTIONAL_TO -const var"∊" = _T.SMALL_ELEMENT_OF -const var"∍" = _T.SMALL_CONTAINS_AS_MEMBER -const var"∥" = _T.PARALLEL_TO -const var"∦" = _T.NOT_PARALLEL_TO -const var"∷" = _T.PROPORTION -const var"∺" = _T.GEOMETRIC_PROPORTION -const var"∻" = _T.HOMOTHETIC -const var"∽" = _T.REVERSED_TILDE -const var"∾" = _T.INVERTED_LAZY_S -const var"≁" = _T.NOT_TILDE -const var"≃" = _T.ASYMPTOTICALLY_EQUAL_TO -const var"≄" = _T.NOT_ASYMPTOTICALLY_EQUAL_TO -const var"≅" = _T.APPROXIMATELY_EQUAL_TO -const var"≆" = _T.APPROXIMATELY_BUT_NOT_ACTUALLY_EQUAL_TO -const var"≇" = _T.NEITHER_APPROXIMATELY_NOR_ACTUALLY_EQUAL_TO -const var"≈" = _T.ALMOST_EQUAL_TO -const var"≉" = _T.NOT_ALMOST_EQUAL_TO -const var"≊" = _T.ALMOST_EQUAL_OR_EQUAL_TO -const var"≋" = _T.TRIPLE_TILDE -const var"≌" = _T.ALL_EQUAL_TO -const var"≍" = _T.EQUIVALENT_TO -const var"≎" = _T.GEOMETRICALLY_EQUIVALENT_TO -const var"≐" = _T.APPROACHES_THE_LIMIT -const var"≑" = _T.GEOMETRICALLY_EQUAL_TO -const var"≒" = _T.APPROXIMATELY_EQUAL_TO_OR_THE_IMAGE_OF -const var"≓" = _T.IMAGE_OF_OR_APPROXIMATELY_EQUAL_TO -const var"≔" = _T.COLON_EQUALS -const var"≕" = _T.EQUALS_COLON -const var"≖" = _T.RING_IN_EQUAL_TO -const var"≗" = _T.RING_EQUAL_TO -const var"≘" = _T.CORRESPONDS_TO -const var"≙" = _T.ESTIMATES -const var"≚" = _T.EQUIANGULAR_TO -const var"≛" = _T.STAR_EQUALS -const var"≜" = _T.DELTA_EQUAL_TO -const var"≝" = _T.EQUAL_TO_BY_DEFINITION -const var"≞" = _T.MEASURED_BY -const var"≟" = _T.QUESTIONED_EQUAL_TO -const var"≣" = _T.STRICTLY_EQUIVALENT_TO -const var"≦" = _T.LESS_THAN_OVER_EQUAL_TO -const var"≧" = _T.GREATER_THAN_OVER_EQUAL_TO -const var"≨" = _T.LESS_THAN_BUT_NOT_EQUAL_TO -const var"≩" = _T.GREATER_THAN_BUT_NOT_EQUAL_TO -const var"≪" = _T.MUCH_LESS_THAN -const var"≫" = _T.MUCH_GREATER_THAN -const var"≬" = _T.BETWEEN -const var"≭" = _T.NOT_EQUIVALENT_TO -const var"≮" = _T.NOT_LESS_THAN -const var"≯" = _T.NOT_GREATER_THAN -const var"≰" = _T.NEITHER_LESS_THAN_NOR_EQUAL_TO -const var"≱" = _T.NEITHER_GREATER_THAN_NOR_EQUAL_TO -const var"≲" = _T.LESS_THAN_OR_EQUIVALENT_TO -const var"≳" = _T.GREATER_THAN_OR_EQUIVALENT_TO -const var"≴" = _T.NEITHER_LESS_THAN_NOR_EQUIVALENT_TO -const var"≵" = _T.NEITHER_GREATER_THAN_NOR_EQUIVALENT_TO -const var"≶" = _T.LESS_THAN_OR_GREATER_THAN -const var"≷" = _T.GREATER_THAN_OR_LESS_THAN -const var"≸" = _T.NEITHER_LESS_THAN_NOR_GREATER_THAN -const var"≹" = _T.NEITHER_GREATER_THAN_NOR_LESS_THAN -const var"≺" = _T.PRECEDES -const var"≻" = _T.SUCCEEDS -const var"≼" = _T.PRECEDES_OR_EQUAL_TO -const var"≽" = _T.SUCCEEDS_OR_EQUAL_TO -const var"≾" = _T.PRECEDES_OR_EQUIVALENT_TO -const var"≿" = _T.SUCCEEDS_OR_EQUIVALENT_TO -const var"⊀" = _T.DOES_NOT_PRECEDE -const var"⊁" = _T.DOES_NOT_SUCCEED -const var"⊃" = _T.SUPERSET_OF -const var"⊅" = _T.NOT_A_SUPERSET_OF -const var"⊇" = _T.SUPERSET_OF_OR_EQUAL_TO -const var"⊉" = _T.NEITHER_A_SUPERSET_OF_NOR_EQUAL_TO -const var"⊋" = _T.SUPERSET_OF_WITH_NOT_EQUAL_TO -const var"⊏" = _T.SQUARE_IMAGE_OF -const var"⊐" = _T.SQUARE_ORIGINAL_OF -const var"⊑" = _T.SQUARE_IMAGE_OF_OR_EQUAL_TO -const var"⊒" = _T.SQUARE_ORIGINAL_OF_OR_EQUAL_TO -const var"⊜" = _T.CIRCLED_EQUALS -const var"⊩" = _T.FORCES -const var"⊬" = _T.DOES_NOT_PROVE -const var"⊮" = _T.DOES_NOT_FORCE -const var"⊰" = _T.PRECEDES_UNDER_RELATION -const var"⊱" = _T.SUCCEEDS_UNDER_RELATION -const var"⊲" = _T.NORMAL_SUBGROUP_OF -const var"⊳" = _T.CONTAINS_AS_NORMAL_SUBGROUP -const var"⊴" = _T.NORMAL_SUBGROUP_OF_OR_EQUAL_TO -const var"⊵" = _T.CONTAINS_AS_NORMAL_SUBGROUP_OR_EQUAL_TO -const var"⊶" = _T.ORIGINAL_OF -const var"⊷" = _T.IMAGE_OF -const var"⋍" = _T.REVERSED_TILDE_EQUALS -const var"⋐" = _T.DOUBLE_SUBSET -const var"⋑" = _T.DOUBLE_SUPERSET -const var"⋕" = _T.EQUAL_AND_PARALLEL_TO -const var"⋖" = _T.LESS_THAN_WITH_DOT -const var"⋗" = _T.GREATER_THAN_WITH_DOT -const var"⋘" = _T.VERY_MUCH_LESS_THAN -const var"⋙" = _T.VERY_MUCH_GREATER_THAN -const var"⋚" = _T.LESS_THAN_EQUAL_TO_OR_GREATER_THAN -const var"⋛" = _T.GREATER_THAN_EQUAL_TO_OR_LESS_THAN -const var"⋜" = _T.EQUAL_TO_OR_LESS_THAN -const var"⋝" = _T.EQUAL_TO_OR_GREATER_THAN -const var"⋞" = _T.EQUAL_TO_OR_PRECEDES -const var"⋟" = _T.EQUAL_TO_OR_SUCCEEDS -const var"⋠" = _T.DOES_NOT_PRECEDE_OR_EQUAL -const var"⋡" = _T.DOES_NOT_SUCCEED_OR_EQUAL -const var"⋢" = _T.NOT_SQUARE_IMAGE_OF_OR_EQUAL_TO -const var"⋣" = _T.NOT_SQUARE_ORIGINAL_OF_OR_EQUAL_TO -const var"⋤" = _T.SQUARE_IMAGE_OF_OR_NOT_EQUAL_TO -const var"⋥" = _T.SQUARE_ORIGINAL_OF_OR_NOT_EQUAL_TO -const var"⋦" = _T.LESS_THAN_BUT_NOT_EQUIVALENT_TO -const var"⋧" = _T.GREATER_THAN_BUT_NOT_EQUIVALENT_TO -const var"⋨" = _T.PRECEDES_BUT_NOT_EQUIVALENT_TO -const var"⋩" = _T.SUCCEEDS_BUT_NOT_EQUIVALENT_TO -const var"⋪" = _T.NOT_NORMAL_SUBGROUP_OF -const var"⋫" = _T.DOES_NOT_CONTAIN_AS_NORMAL_SUBGROUP -const var"⋬" = _T.NOT_NORMAL_SUBGROUP_OF_OR_EQUAL_TO -const var"⋭" = _T.DOES_NOT_CONTAIN_AS_NORMAL_SUBGROUP_OR_EQUAL -const var"⋲" = _T.ELEMENT_OF_WITH_LONG_HORIZONTAL_STROKE -const var"⋳" = _T.ELEMENT_OF_WITH_VERTICAL_BAR_AT_END_OF_HORIZONTAL_STROKE -const var"⋴" = _T.SMALL_ELEMENT_OF_WITH_VERTICAL_BAR_AT_END_OF_HORIZONTAL_STROKE -const var"⋵" = _T.ELEMENT_OF_WITH_DOT_ABOVE -const var"⋶" = _T.ELEMENT_OF_WITH_OVERBAR -const var"⋷" = _T.SMALL_ELEMENT_OF_WITH_OVERBAR -const var"⋸" = _T.ELEMENT_OF_WITH_UNDERBAR -const var"⋹" = _T.ELEMENT_OF_WITH_TWO_HORIZONTAL_STROKES -const var"⋺" = _T.CONTAINS_WITH_LONG_HORIZONTAL_STROKE -const var"⋻" = _T.CONTAINS_WITH_VERTICAL_BAR_AT_END_OF_HORIZONTAL_STROKE -const var"⋼" = _T.SMALL_CONTAINS_WITH_VERTICAL_BAR_AT_END_OF_HORIZONTAL_STROKE -const var"⋽" = _T.CONTAINS_WITH_OVERBAR -const var"⋾" = _T.SMALL_CONTAINS_WITH_OVERBAR -const var"⋿" = _T.Z_NOTATION_BAG_MEMBERSHIP -const var"⟈" = _T.REVERSE_SOLIDUS_PRECEDING_SUBSET -const var"⟉" = _T.SUPERSET_PRECEDING_SOLIDUS -const var"⟒" = _T.ELEMENT_OF_OPENING_UPWARDS -const var"⦷" = _T.CIRCLED_PARALLEL -const var"⧀" = _T.CIRCLED_LESS_THAN -const var"⧁" = _T.CIRCLED_GREATER_THAN -const var"⧡" = _T.INCREASES_AS -const var"⧣" = _T.EQUALS_SIGN_AND_SLANTED_PARALLEL -const var"⧤" = _T.EQUALS_SIGN_AND_SLANTED_PARALLEL_WITH_TILDE_ABOVE -const var"⧥" = _T.IDENTICAL_TO_AND_SLANTED_PARALLEL -const var"⩦" = _T.EQUALS_SIGN_WITH_DOT_BELOW -const var"⩧" = _T.IDENTICAL_WITH_DOT_ABOVE -const var"⩪" = _T.TILDE_OPERATOR_WITH_DOT_ABOVE -const var"⩫" = _T.TILDE_OPERATOR_WITH_RISING_DOTS -const var"⩬" = _T.SIMILAR_MINUS_SIMILAR -const var"⩭" = _T.CONGRUENT_WITH_DOT_ABOVE -const var"⩮" = _T.EQUALS_WITH_ASTERISK -const var"⩯" = _T.ALMOST_EQUAL_TO_WITH_CIRCUMFLEX_ACCENT -const var"⩰" = _T.APPROXIMATELY_EQUAL_OR_EQUAL_TO -const var"⩱" = _T.EQUALS_SIGN_ABOVE_PLUS_SIGN -const var"⩲" = _T.PLUS_SIGN_ABOVE_EQUALS_SIGN -const var"⩳" = _T.EQUALS_SIGN_ABOVE_TILDE_OPERATOR -const var"⩴" = _T.DOUBLE_COLON_EQUAL -const var"⩵" = _T.TWO_CONSECUTIVE_EQUALS_SIGNS -const var"⩶" = _T.THREE_CONSECUTIVE_EQUALS_SIGNS -const var"⩷" = _T.EQUALS_SIGN_WITH_TWO_DOTS_ABOVE_AND_TWO_DOTS_BELOW -const var"⩸" = _T.EQUIVALENT_WITH_FOUR_DOTS_ABOVE -const var"⩹" = _T.LESS_THAN_WITH_CIRCLE_INSIDE -const var"⩺" = _T.GREATER_THAN_WITH_CIRCLE_INSIDE -const var"⩻" = _T.LESS_THAN_WITH_QUESTION_MARK_ABOVE -const var"⩼" = _T.GREATER_THAN_WITH_QUESTION_MARK_ABOVE -const var"⩽" = _T.LESS_THAN_OR_SLANTED_EQUAL_TO -const var"⩾" = _T.GREATER_THAN_OR_SLANTED_EQUAL_TO -const var"⩿" = _T.LESS_THAN_OR_SLANTED_EQUAL_TO_WITH_DOT_INSIDE -const var"⪀" = _T.GREATER_THAN_OR_SLANTED_EQUAL_TO_WITH_DOT_INSIDE -const var"⪁" = _T.LESS_THAN_OR_SLANTED_EQUAL_TO_WITH_DOT_ABOVE -const var"⪂" = _T.GREATER_THAN_OR_SLANTED_EQUAL_TO_WITH_DOT_ABOVE -const var"⪃" = _T.LESS_THAN_OR_SLANTED_EQUAL_TO_WITH_DOT_ABOVE_RIGHT -const var"⪄" = _T.GREATER_THAN_OR_SLANTED_EQUAL_TO_WITH_DOT_ABOVE_LEFT -const var"⪅" = _T.LESS_THAN_OR_APPROXIMATE -const var"⪆" = _T.GREATER_THAN_OR_APPROXIMATE -const var"⪇" = _T.LESS_THAN_AND_SINGLE_LINE_NOT_EQUAL_TO -const var"⪈" = _T.GREATER_THAN_AND_SINGLE_LINE_NOT_EQUAL_TO -const var"⪉" = _T.LESS_THAN_AND_NOT_APPROXIMATE -const var"⪊" = _T.GREATER_THAN_AND_NOT_APPROXIMATE -const var"⪋" = _T.LESS_THAN_ABOVE_DOUBLE_LINE_EQUAL_ABOVE_GREATER_THAN -const var"⪌" = _T.GREATER_THAN_ABOVE_DOUBLE_LINE_EQUAL_ABOVE_LESS_THAN -const var"⪍" = _T.LESS_THAN_ABOVE_SIMILAR_OR_EQUAL -const var"⪎" = _T.GREATER_THAN_ABOVE_SIMILAR_OR_EQUAL -const var"⪏" = _T.LESS_THAN_ABOVE_SIMILAR_ABOVE_GREATER_THAN -const var"⪐" = _T.GREATER_THAN_ABOVE_SIMILAR_ABOVE_LESS_THAN -const var"⪑" = _T.LESS_THAN_ABOVE_GREATER_THAN_ABOVE_DOUBLE_LINE_EQUAL -const var"⪒" = _T.GREATER_THAN_ABOVE_LESS_THAN_ABOVE_DOUBLE_LINE_EQUAL -const var"⪓" = _T.LESS_THAN_ABOVE_SLANTED_EQUAL_ABOVE_GREATER_THAN_ABOVE_SLANTED_EQUAL -const var"⪔" = _T.GREATER_THAN_ABOVE_SLANTED_EQUAL_ABOVE_LESS_THAN_ABOVE_SLANTED_EQUAL -const var"⪕" = _T.SLANTED_EQUAL_TO_OR_LESS_THAN -const var"⪖" = _T.SLANTED_EQUAL_TO_OR_GREATER_THAN -const var"⪗" = _T.SLANTED_EQUAL_TO_OR_LESS_THAN_WITH_DOT_INSIDE -const var"⪘" = _T.SLANTED_EQUAL_TO_OR_GREATER_THAN_WITH_DOT_INSIDE -const var"⪙" = _T.DOUBLE_LINE_EQUAL_TO_OR_LESS_THAN -const var"⪚" = _T.DOUBLE_LINE_EQUAL_TO_OR_GREATER_THAN -const var"⪛" = _T.DOUBLE_LINE_SLANTED_EQUAL_TO_OR_LESS_THAN -const var"⪜" = _T.DOUBLE_LINE_SLANTED_EQUAL_TO_OR_GREATER_THAN -const var"⪝" = _T.SIMILAR_OR_LESS_THAN -const var"⪞" = _T.SIMILAR_OR_GREATER_THAN -const var"⪟" = _T.SIMILAR_ABOVE_LESS_THAN_ABOVE_EQUALS_SIGN -const var"⪠" = _T.SIMILAR_ABOVE_GREATER_THAN_ABOVE_EQUALS_SIGN -const var"⪡" = _T.DOUBLE_NESTED_LESS_THAN -const var"⪢" = _T.DOUBLE_NESTED_GREATER_THAN -const var"⪣" = _T.DOUBLE_NESTED_LESS_THAN_WITH_UNDERBAR -const var"⪤" = _T.GREATER_THAN_OVERLAPPING_LESS_THAN -const var"⪥" = _T.GREATER_THAN_BESIDE_LESS_THAN -const var"⪦" = _T.LESS_THAN_CLOSED_BY_CURVE -const var"⪧" = _T.GREATER_THAN_CLOSED_BY_CURVE -const var"⪨" = _T.LESS_THAN_CLOSED_BY_CURVE_ABOVE_SLANTED_EQUAL -const var"⪩" = _T.GREATER_THAN_CLOSED_BY_CURVE_ABOVE_SLANTED_EQUAL -const var"⪪" = _T.SMALLER_THAN -const var"⪫" = _T.LARGER_THAN -const var"⪬" = _T.SMALLER_THAN_OR_EQUAL_TO -const var"⪭" = _T.LARGER_THAN_OR_EQUAL_TO -const var"⪮" = _T.EQUALS_SIGN_WITH_BUMPY_ABOVE -const var"⪯" = _T.PRECEDES_ABOVE_SINGLE_LINE_EQUALS_SIGN -const var"⪰" = _T.SUCCEEDS_ABOVE_SINGLE_LINE_EQUALS_SIGN -const var"⪱" = _T.PRECEDES_ABOVE_SINGLE_LINE_NOT_EQUAL_TO -const var"⪲" = _T.SUCCEEDS_ABOVE_SINGLE_LINE_NOT_EQUAL_TO -const var"⪳" = _T.PRECEDES_ABOVE_EQUALS_SIGN -const var"⪴" = _T.SUCCEEDS_ABOVE_EQUALS_SIGN -const var"⪵" = _T.PRECEDES_ABOVE_NOT_EQUAL_TO -const var"⪶" = _T.SUCCEEDS_ABOVE_NOT_EQUAL_TO -const var"⪷" = _T.PRECEDES_ABOVE_ALMOST_EQUAL_TO -const var"⪸" = _T.SUCCEEDS_ABOVE_ALMOST_EQUAL_TO -const var"⪹" = _T.PRECEDES_ABOVE_NOT_ALMOST_EQUAL_TO -const var"⪺" = _T.SUCCEEDS_ABOVE_NOT_ALMOST_EQUAL_TO -const var"⪻" = _T.DOUBLE_PRECEDES -const var"⪼" = _T.DOUBLE_SUCCEEDS -const var"⪽" = _T.SUBSET_WITH_DOT -const var"⪾" = _T.SUPERSET_WITH_DOT -const var"⪿" = _T.SUBSET_WITH_PLUS_SIGN_BELOW -const var"⫀" = _T.SUPERSET_WITH_PLUS_SIGN_BELOW -const var"⫁" = _T.SUBSET_WITH_MULTIPLICATION_SIGN_BELOW -const var"⫂" = _T.SUPERSET_WITH_MULTIPLICATION_SIGN_BELOW -const var"⫃" = _T.SUBSET_OF_OR_EQUAL_TO_WITH_DOT_ABOVE -const var"⫄" = _T.SUPERSET_OF_OR_EQUAL_TO_WITH_DOT_ABOVE -const var"⫅" = _T.SUBSET_OF_ABOVE_EQUALS_SIGN -const var"⫆" = _T.SUPERSET_OF_ABOVE_EQUALS_SIGN -const var"⫇" = _T.SUBSET_OF_ABOVE_TILDE_OPERATOR -const var"⫈" = _T.SUPERSET_OF_ABOVE_TILDE_OPERATOR -const var"⫉" = _T.SUBSET_OF_ABOVE_ALMOST_EQUAL_TO -const var"⫊" = _T.SUPERSET_OF_ABOVE_ALMOST_EQUAL_TO -const var"⫋" = _T.SUBSET_OF_ABOVE_NOT_EQUAL_TO -const var"⫌" = _T.SUPERSET_OF_ABOVE_NOT_EQUAL_TO -const var"⫍" = _T.SQUARE_LEFT_OPEN_BOX_OPERATOR -const var"⫎" = _T.SQUARE_RIGHT_OPEN_BOX_OPERATOR -const var"⫏" = _T.CLOSED_SUBSET -const var"⫐" = _T.CLOSED_SUPERSET -const var"⫑" = _T.CLOSED_SUBSET_OR_EQUAL_TO -const var"⫒" = _T.CLOSED_SUPERSET_OR_EQUAL_TO -const var"⫓" = _T.SUBSET_ABOVE_SUPERSET -const var"⫔" = _T.SUPERSET_ABOVE_SUBSET -const var"⫕" = _T.SUBSET_ABOVE_SUBSET -const var"⫖" = _T.SUPERSET_ABOVE_SUPERSET -const var"⫗" = _T.SUPERSET_BESIDE_SUBSET -const var"⫘" = _T.SUPERSET_BESIDE_AND_JOINED_BY_DASH_WITH_SUBSET -const var"⫙" = _T.ELEMENT_OF_OPENING_DOWNWARDS -const var"⫷" = _T.TRIPLE_NESTED_LESS_THAN -const var"⫸" = _T.TRIPLE_NESTED_GREATER_THAN -const var"⫹" = _T.DOUBLE_LINE_SLANTED_LESS_THAN_OR_EQUAL_TO -const var"⫺" = _T.DOUBLE_LINE_SLANTED_GREATER_THAN_OR_EQUAL_TO -const var"⊢" = _T.RIGHT_TACK -const var"⊣" = _T.LEFT_TACK -const var"⟂" = _T.PERP -const END_COMPARISON = _T.end_comparison +const BEGIN_COMPARISON = @_K begin_comparison +const var"<:" = @_K ISSUBTYPE +const var">:" = @_K ISSUPERTYPE +const var">" = @_K GREATER +const var"<" = @_K LESS +const var">=" = @_K GREATER_EQ +const var"≥" = @_K GREATER_THAN_OR_EQUAL_TO +const var"<=" = @_K LESS_EQ +const var"≤" = @_K LESS_THAN_OR_EQUAL_TO +const var"==" = @_K EQEQ +const var"===" = @_K EQEQEQ +const var"≡" = @_K IDENTICAL_TO +const var"!=" = @_K NOT_EQ +const var"≠" = @_K NOT_EQUAL_TO +const var"!==" = @_K NOT_IS +const var"≢" = @_K NOT_IDENTICAL_TO +const var"∈" = @_K ELEMENT_OF +const var"in" = @_K IN +const var"isa" = @_K ISA +const var"∉" = @_K NOT_AN_ELEMENT_OF +const var"∋" = @_K CONTAINS_AS_MEMBER +const var"∌" = @_K DOES_NOT_CONTAIN_AS_MEMBER +const var"⊆" = @_K SUBSET_OF_OR_EQUAL_TO +const var"⊈" = @_K NEITHER_A_SUBSET_OF_NOR_EQUAL_TO +const var"⊂" = @_K SUBSET_OF +const var"⊄" = @_K NOT_A_SUBSET_OF +const var"⊊" = @_K SUBSET_OF_WITH_NOT_EQUAL_TO +const var"∝" = @_K PROPORTIONAL_TO +const var"∊" = @_K SMALL_ELEMENT_OF +const var"∍" = @_K SMALL_CONTAINS_AS_MEMBER +const var"∥" = @_K PARALLEL_TO +const var"∦" = @_K NOT_PARALLEL_TO +const var"∷" = @_K PROPORTION +const var"∺" = @_K GEOMETRIC_PROPORTION +const var"∻" = @_K HOMOTHETIC +const var"∽" = @_K REVERSED_TILDE +const var"∾" = @_K INVERTED_LAZY_S +const var"≁" = @_K NOT_TILDE +const var"≃" = @_K ASYMPTOTICALLY_EQUAL_TO +const var"≄" = @_K NOT_ASYMPTOTICALLY_EQUAL_TO +const var"≅" = @_K APPROXIMATELY_EQUAL_TO +const var"≆" = @_K APPROXIMATELY_BUT_NOT_ACTUALLY_EQUAL_TO +const var"≇" = @_K NEITHER_APPROXIMATELY_NOR_ACTUALLY_EQUAL_TO +const var"≈" = @_K ALMOST_EQUAL_TO +const var"≉" = @_K NOT_ALMOST_EQUAL_TO +const var"≊" = @_K ALMOST_EQUAL_OR_EQUAL_TO +const var"≋" = @_K TRIPLE_TILDE +const var"≌" = @_K ALL_EQUAL_TO +const var"≍" = @_K EQUIVALENT_TO +const var"≎" = @_K GEOMETRICALLY_EQUIVALENT_TO +const var"≐" = @_K APPROACHES_THE_LIMIT +const var"≑" = @_K GEOMETRICALLY_EQUAL_TO +const var"≒" = @_K APPROXIMATELY_EQUAL_TO_OR_THE_IMAGE_OF +const var"≓" = @_K IMAGE_OF_OR_APPROXIMATELY_EQUAL_TO +const var"≔" = @_K COLON_EQUALS +const var"≕" = @_K EQUALS_COLON +const var"≖" = @_K RING_IN_EQUAL_TO +const var"≗" = @_K RING_EQUAL_TO +const var"≘" = @_K CORRESPONDS_TO +const var"≙" = @_K ESTIMATES +const var"≚" = @_K EQUIANGULAR_TO +const var"≛" = @_K STAR_EQUALS +const var"≜" = @_K DELTA_EQUAL_TO +const var"≝" = @_K EQUAL_TO_BY_DEFINITION +const var"≞" = @_K MEASURED_BY +const var"≟" = @_K QUESTIONED_EQUAL_TO +const var"≣" = @_K STRICTLY_EQUIVALENT_TO +const var"≦" = @_K LESS_THAN_OVER_EQUAL_TO +const var"≧" = @_K GREATER_THAN_OVER_EQUAL_TO +const var"≨" = @_K LESS_THAN_BUT_NOT_EQUAL_TO +const var"≩" = @_K GREATER_THAN_BUT_NOT_EQUAL_TO +const var"≪" = @_K MUCH_LESS_THAN +const var"≫" = @_K MUCH_GREATER_THAN +const var"≬" = @_K BETWEEN +const var"≭" = @_K NOT_EQUIVALENT_TO +const var"≮" = @_K NOT_LESS_THAN +const var"≯" = @_K NOT_GREATER_THAN +const var"≰" = @_K NEITHER_LESS_THAN_NOR_EQUAL_TO +const var"≱" = @_K NEITHER_GREATER_THAN_NOR_EQUAL_TO +const var"≲" = @_K LESS_THAN_OR_EQUIVALENT_TO +const var"≳" = @_K GREATER_THAN_OR_EQUIVALENT_TO +const var"≴" = @_K NEITHER_LESS_THAN_NOR_EQUIVALENT_TO +const var"≵" = @_K NEITHER_GREATER_THAN_NOR_EQUIVALENT_TO +const var"≶" = @_K LESS_THAN_OR_GREATER_THAN +const var"≷" = @_K GREATER_THAN_OR_LESS_THAN +const var"≸" = @_K NEITHER_LESS_THAN_NOR_GREATER_THAN +const var"≹" = @_K NEITHER_GREATER_THAN_NOR_LESS_THAN +const var"≺" = @_K PRECEDES +const var"≻" = @_K SUCCEEDS +const var"≼" = @_K PRECEDES_OR_EQUAL_TO +const var"≽" = @_K SUCCEEDS_OR_EQUAL_TO +const var"≾" = @_K PRECEDES_OR_EQUIVALENT_TO +const var"≿" = @_K SUCCEEDS_OR_EQUIVALENT_TO +const var"⊀" = @_K DOES_NOT_PRECEDE +const var"⊁" = @_K DOES_NOT_SUCCEED +const var"⊃" = @_K SUPERSET_OF +const var"⊅" = @_K NOT_A_SUPERSET_OF +const var"⊇" = @_K SUPERSET_OF_OR_EQUAL_TO +const var"⊉" = @_K NEITHER_A_SUPERSET_OF_NOR_EQUAL_TO +const var"⊋" = @_K SUPERSET_OF_WITH_NOT_EQUAL_TO +const var"⊏" = @_K SQUARE_IMAGE_OF +const var"⊐" = @_K SQUARE_ORIGINAL_OF +const var"⊑" = @_K SQUARE_IMAGE_OF_OR_EQUAL_TO +const var"⊒" = @_K SQUARE_ORIGINAL_OF_OR_EQUAL_TO +const var"⊜" = @_K CIRCLED_EQUALS +const var"⊩" = @_K FORCES +const var"⊬" = @_K DOES_NOT_PROVE +const var"⊮" = @_K DOES_NOT_FORCE +const var"⊰" = @_K PRECEDES_UNDER_RELATION +const var"⊱" = @_K SUCCEEDS_UNDER_RELATION +const var"⊲" = @_K NORMAL_SUBGROUP_OF +const var"⊳" = @_K CONTAINS_AS_NORMAL_SUBGROUP +const var"⊴" = @_K NORMAL_SUBGROUP_OF_OR_EQUAL_TO +const var"⊵" = @_K CONTAINS_AS_NORMAL_SUBGROUP_OR_EQUAL_TO +const var"⊶" = @_K ORIGINAL_OF +const var"⊷" = @_K IMAGE_OF +const var"⋍" = @_K REVERSED_TILDE_EQUALS +const var"⋐" = @_K DOUBLE_SUBSET +const var"⋑" = @_K DOUBLE_SUPERSET +const var"⋕" = @_K EQUAL_AND_PARALLEL_TO +const var"⋖" = @_K LESS_THAN_WITH_DOT +const var"⋗" = @_K GREATER_THAN_WITH_DOT +const var"⋘" = @_K VERY_MUCH_LESS_THAN +const var"⋙" = @_K VERY_MUCH_GREATER_THAN +const var"⋚" = @_K LESS_THAN_EQUAL_TO_OR_GREATER_THAN +const var"⋛" = @_K GREATER_THAN_EQUAL_TO_OR_LESS_THAN +const var"⋜" = @_K EQUAL_TO_OR_LESS_THAN +const var"⋝" = @_K EQUAL_TO_OR_GREATER_THAN +const var"⋞" = @_K EQUAL_TO_OR_PRECEDES +const var"⋟" = @_K EQUAL_TO_OR_SUCCEEDS +const var"⋠" = @_K DOES_NOT_PRECEDE_OR_EQUAL +const var"⋡" = @_K DOES_NOT_SUCCEED_OR_EQUAL +const var"⋢" = @_K NOT_SQUARE_IMAGE_OF_OR_EQUAL_TO +const var"⋣" = @_K NOT_SQUARE_ORIGINAL_OF_OR_EQUAL_TO +const var"⋤" = @_K SQUARE_IMAGE_OF_OR_NOT_EQUAL_TO +const var"⋥" = @_K SQUARE_ORIGINAL_OF_OR_NOT_EQUAL_TO +const var"⋦" = @_K LESS_THAN_BUT_NOT_EQUIVALENT_TO +const var"⋧" = @_K GREATER_THAN_BUT_NOT_EQUIVALENT_TO +const var"⋨" = @_K PRECEDES_BUT_NOT_EQUIVALENT_TO +const var"⋩" = @_K SUCCEEDS_BUT_NOT_EQUIVALENT_TO +const var"⋪" = @_K NOT_NORMAL_SUBGROUP_OF +const var"⋫" = @_K DOES_NOT_CONTAIN_AS_NORMAL_SUBGROUP +const var"⋬" = @_K NOT_NORMAL_SUBGROUP_OF_OR_EQUAL_TO +const var"⋭" = @_K DOES_NOT_CONTAIN_AS_NORMAL_SUBGROUP_OR_EQUAL +const var"⋲" = @_K ELEMENT_OF_WITH_LONG_HORIZONTAL_STROKE +const var"⋳" = @_K ELEMENT_OF_WITH_VERTICAL_BAR_AT_END_OF_HORIZONTAL_STROKE +const var"⋴" = @_K SMALL_ELEMENT_OF_WITH_VERTICAL_BAR_AT_END_OF_HORIZONTAL_STROKE +const var"⋵" = @_K ELEMENT_OF_WITH_DOT_ABOVE +const var"⋶" = @_K ELEMENT_OF_WITH_OVERBAR +const var"⋷" = @_K SMALL_ELEMENT_OF_WITH_OVERBAR +const var"⋸" = @_K ELEMENT_OF_WITH_UNDERBAR +const var"⋹" = @_K ELEMENT_OF_WITH_TWO_HORIZONTAL_STROKES +const var"⋺" = @_K CONTAINS_WITH_LONG_HORIZONTAL_STROKE +const var"⋻" = @_K CONTAINS_WITH_VERTICAL_BAR_AT_END_OF_HORIZONTAL_STROKE +const var"⋼" = @_K SMALL_CONTAINS_WITH_VERTICAL_BAR_AT_END_OF_HORIZONTAL_STROKE +const var"⋽" = @_K CONTAINS_WITH_OVERBAR +const var"⋾" = @_K SMALL_CONTAINS_WITH_OVERBAR +const var"⋿" = @_K Z_NOTATION_BAG_MEMBERSHIP +const var"⟈" = @_K REVERSE_SOLIDUS_PRECEDING_SUBSET +const var"⟉" = @_K SUPERSET_PRECEDING_SOLIDUS +const var"⟒" = @_K ELEMENT_OF_OPENING_UPWARDS +const var"⦷" = @_K CIRCLED_PARALLEL +const var"⧀" = @_K CIRCLED_LESS_THAN +const var"⧁" = @_K CIRCLED_GREATER_THAN +const var"⧡" = @_K INCREASES_AS +const var"⧣" = @_K EQUALS_SIGN_AND_SLANTED_PARALLEL +const var"⧤" = @_K EQUALS_SIGN_AND_SLANTED_PARALLEL_WITH_TILDE_ABOVE +const var"⧥" = @_K IDENTICAL_TO_AND_SLANTED_PARALLEL +const var"⩦" = @_K EQUALS_SIGN_WITH_DOT_BELOW +const var"⩧" = @_K IDENTICAL_WITH_DOT_ABOVE +const var"⩪" = @_K TILDE_OPERATOR_WITH_DOT_ABOVE +const var"⩫" = @_K TILDE_OPERATOR_WITH_RISING_DOTS +const var"⩬" = @_K SIMILAR_MINUS_SIMILAR +const var"⩭" = @_K CONGRUENT_WITH_DOT_ABOVE +const var"⩮" = @_K EQUALS_WITH_ASTERISK +const var"⩯" = @_K ALMOST_EQUAL_TO_WITH_CIRCUMFLEX_ACCENT +const var"⩰" = @_K APPROXIMATELY_EQUAL_OR_EQUAL_TO +const var"⩱" = @_K EQUALS_SIGN_ABOVE_PLUS_SIGN +const var"⩲" = @_K PLUS_SIGN_ABOVE_EQUALS_SIGN +const var"⩳" = @_K EQUALS_SIGN_ABOVE_TILDE_OPERATOR +const var"⩴" = @_K DOUBLE_COLON_EQUAL +const var"⩵" = @_K TWO_CONSECUTIVE_EQUALS_SIGNS +const var"⩶" = @_K THREE_CONSECUTIVE_EQUALS_SIGNS +const var"⩷" = @_K EQUALS_SIGN_WITH_TWO_DOTS_ABOVE_AND_TWO_DOTS_BELOW +const var"⩸" = @_K EQUIVALENT_WITH_FOUR_DOTS_ABOVE +const var"⩹" = @_K LESS_THAN_WITH_CIRCLE_INSIDE +const var"⩺" = @_K GREATER_THAN_WITH_CIRCLE_INSIDE +const var"⩻" = @_K LESS_THAN_WITH_QUESTION_MARK_ABOVE +const var"⩼" = @_K GREATER_THAN_WITH_QUESTION_MARK_ABOVE +const var"⩽" = @_K LESS_THAN_OR_SLANTED_EQUAL_TO +const var"⩾" = @_K GREATER_THAN_OR_SLANTED_EQUAL_TO +const var"⩿" = @_K LESS_THAN_OR_SLANTED_EQUAL_TO_WITH_DOT_INSIDE +const var"⪀" = @_K GREATER_THAN_OR_SLANTED_EQUAL_TO_WITH_DOT_INSIDE +const var"⪁" = @_K LESS_THAN_OR_SLANTED_EQUAL_TO_WITH_DOT_ABOVE +const var"⪂" = @_K GREATER_THAN_OR_SLANTED_EQUAL_TO_WITH_DOT_ABOVE +const var"⪃" = @_K LESS_THAN_OR_SLANTED_EQUAL_TO_WITH_DOT_ABOVE_RIGHT +const var"⪄" = @_K GREATER_THAN_OR_SLANTED_EQUAL_TO_WITH_DOT_ABOVE_LEFT +const var"⪅" = @_K LESS_THAN_OR_APPROXIMATE +const var"⪆" = @_K GREATER_THAN_OR_APPROXIMATE +const var"⪇" = @_K LESS_THAN_AND_SINGLE_LINE_NOT_EQUAL_TO +const var"⪈" = @_K GREATER_THAN_AND_SINGLE_LINE_NOT_EQUAL_TO +const var"⪉" = @_K LESS_THAN_AND_NOT_APPROXIMATE +const var"⪊" = @_K GREATER_THAN_AND_NOT_APPROXIMATE +const var"⪋" = @_K LESS_THAN_ABOVE_DOUBLE_LINE_EQUAL_ABOVE_GREATER_THAN +const var"⪌" = @_K GREATER_THAN_ABOVE_DOUBLE_LINE_EQUAL_ABOVE_LESS_THAN +const var"⪍" = @_K LESS_THAN_ABOVE_SIMILAR_OR_EQUAL +const var"⪎" = @_K GREATER_THAN_ABOVE_SIMILAR_OR_EQUAL +const var"⪏" = @_K LESS_THAN_ABOVE_SIMILAR_ABOVE_GREATER_THAN +const var"⪐" = @_K GREATER_THAN_ABOVE_SIMILAR_ABOVE_LESS_THAN +const var"⪑" = @_K LESS_THAN_ABOVE_GREATER_THAN_ABOVE_DOUBLE_LINE_EQUAL +const var"⪒" = @_K GREATER_THAN_ABOVE_LESS_THAN_ABOVE_DOUBLE_LINE_EQUAL +const var"⪓" = @_K LESS_THAN_ABOVE_SLANTED_EQUAL_ABOVE_GREATER_THAN_ABOVE_SLANTED_EQUAL +const var"⪔" = @_K GREATER_THAN_ABOVE_SLANTED_EQUAL_ABOVE_LESS_THAN_ABOVE_SLANTED_EQUAL +const var"⪕" = @_K SLANTED_EQUAL_TO_OR_LESS_THAN +const var"⪖" = @_K SLANTED_EQUAL_TO_OR_GREATER_THAN +const var"⪗" = @_K SLANTED_EQUAL_TO_OR_LESS_THAN_WITH_DOT_INSIDE +const var"⪘" = @_K SLANTED_EQUAL_TO_OR_GREATER_THAN_WITH_DOT_INSIDE +const var"⪙" = @_K DOUBLE_LINE_EQUAL_TO_OR_LESS_THAN +const var"⪚" = @_K DOUBLE_LINE_EQUAL_TO_OR_GREATER_THAN +const var"⪛" = @_K DOUBLE_LINE_SLANTED_EQUAL_TO_OR_LESS_THAN +const var"⪜" = @_K DOUBLE_LINE_SLANTED_EQUAL_TO_OR_GREATER_THAN +const var"⪝" = @_K SIMILAR_OR_LESS_THAN +const var"⪞" = @_K SIMILAR_OR_GREATER_THAN +const var"⪟" = @_K SIMILAR_ABOVE_LESS_THAN_ABOVE_EQUALS_SIGN +const var"⪠" = @_K SIMILAR_ABOVE_GREATER_THAN_ABOVE_EQUALS_SIGN +const var"⪡" = @_K DOUBLE_NESTED_LESS_THAN +const var"⪢" = @_K DOUBLE_NESTED_GREATER_THAN +const var"⪣" = @_K DOUBLE_NESTED_LESS_THAN_WITH_UNDERBAR +const var"⪤" = @_K GREATER_THAN_OVERLAPPING_LESS_THAN +const var"⪥" = @_K GREATER_THAN_BESIDE_LESS_THAN +const var"⪦" = @_K LESS_THAN_CLOSED_BY_CURVE +const var"⪧" = @_K GREATER_THAN_CLOSED_BY_CURVE +const var"⪨" = @_K LESS_THAN_CLOSED_BY_CURVE_ABOVE_SLANTED_EQUAL +const var"⪩" = @_K GREATER_THAN_CLOSED_BY_CURVE_ABOVE_SLANTED_EQUAL +const var"⪪" = @_K SMALLER_THAN +const var"⪫" = @_K LARGER_THAN +const var"⪬" = @_K SMALLER_THAN_OR_EQUAL_TO +const var"⪭" = @_K LARGER_THAN_OR_EQUAL_TO +const var"⪮" = @_K EQUALS_SIGN_WITH_BUMPY_ABOVE +const var"⪯" = @_K PRECEDES_ABOVE_SINGLE_LINE_EQUALS_SIGN +const var"⪰" = @_K SUCCEEDS_ABOVE_SINGLE_LINE_EQUALS_SIGN +const var"⪱" = @_K PRECEDES_ABOVE_SINGLE_LINE_NOT_EQUAL_TO +const var"⪲" = @_K SUCCEEDS_ABOVE_SINGLE_LINE_NOT_EQUAL_TO +const var"⪳" = @_K PRECEDES_ABOVE_EQUALS_SIGN +const var"⪴" = @_K SUCCEEDS_ABOVE_EQUALS_SIGN +const var"⪵" = @_K PRECEDES_ABOVE_NOT_EQUAL_TO +const var"⪶" = @_K SUCCEEDS_ABOVE_NOT_EQUAL_TO +const var"⪷" = @_K PRECEDES_ABOVE_ALMOST_EQUAL_TO +const var"⪸" = @_K SUCCEEDS_ABOVE_ALMOST_EQUAL_TO +const var"⪹" = @_K PRECEDES_ABOVE_NOT_ALMOST_EQUAL_TO +const var"⪺" = @_K SUCCEEDS_ABOVE_NOT_ALMOST_EQUAL_TO +const var"⪻" = @_K DOUBLE_PRECEDES +const var"⪼" = @_K DOUBLE_SUCCEEDS +const var"⪽" = @_K SUBSET_WITH_DOT +const var"⪾" = @_K SUPERSET_WITH_DOT +const var"⪿" = @_K SUBSET_WITH_PLUS_SIGN_BELOW +const var"⫀" = @_K SUPERSET_WITH_PLUS_SIGN_BELOW +const var"⫁" = @_K SUBSET_WITH_MULTIPLICATION_SIGN_BELOW +const var"⫂" = @_K SUPERSET_WITH_MULTIPLICATION_SIGN_BELOW +const var"⫃" = @_K SUBSET_OF_OR_EQUAL_TO_WITH_DOT_ABOVE +const var"⫄" = @_K SUPERSET_OF_OR_EQUAL_TO_WITH_DOT_ABOVE +const var"⫅" = @_K SUBSET_OF_ABOVE_EQUALS_SIGN +const var"⫆" = @_K SUPERSET_OF_ABOVE_EQUALS_SIGN +const var"⫇" = @_K SUBSET_OF_ABOVE_TILDE_OPERATOR +const var"⫈" = @_K SUPERSET_OF_ABOVE_TILDE_OPERATOR +const var"⫉" = @_K SUBSET_OF_ABOVE_ALMOST_EQUAL_TO +const var"⫊" = @_K SUPERSET_OF_ABOVE_ALMOST_EQUAL_TO +const var"⫋" = @_K SUBSET_OF_ABOVE_NOT_EQUAL_TO +const var"⫌" = @_K SUPERSET_OF_ABOVE_NOT_EQUAL_TO +const var"⫍" = @_K SQUARE_LEFT_OPEN_BOX_OPERATOR +const var"⫎" = @_K SQUARE_RIGHT_OPEN_BOX_OPERATOR +const var"⫏" = @_K CLOSED_SUBSET +const var"⫐" = @_K CLOSED_SUPERSET +const var"⫑" = @_K CLOSED_SUBSET_OR_EQUAL_TO +const var"⫒" = @_K CLOSED_SUPERSET_OR_EQUAL_TO +const var"⫓" = @_K SUBSET_ABOVE_SUPERSET +const var"⫔" = @_K SUPERSET_ABOVE_SUBSET +const var"⫕" = @_K SUBSET_ABOVE_SUBSET +const var"⫖" = @_K SUPERSET_ABOVE_SUPERSET +const var"⫗" = @_K SUPERSET_BESIDE_SUBSET +const var"⫘" = @_K SUPERSET_BESIDE_AND_JOINED_BY_DASH_WITH_SUBSET +const var"⫙" = @_K ELEMENT_OF_OPENING_DOWNWARDS +const var"⫷" = @_K TRIPLE_NESTED_LESS_THAN +const var"⫸" = @_K TRIPLE_NESTED_GREATER_THAN +const var"⫹" = @_K DOUBLE_LINE_SLANTED_LESS_THAN_OR_EQUAL_TO +const var"⫺" = @_K DOUBLE_LINE_SLANTED_GREATER_THAN_OR_EQUAL_TO +const var"⊢" = @_K RIGHT_TACK +const var"⊣" = @_K LEFT_TACK +const var"⟂" = @_K PERP +const END_COMPARISON = @_K end_comparison # Level 7 -const BEGIN_PIPE = _T.begin_pipe -const var"|>" = _T.LPIPE -const var"<|" = _T.RPIPE -const END_PIPE = _T.end_pipe +const BEGIN_PIPE = @_K begin_pipe +const var"|>" = @_K LPIPE +const var"<|" = @_K RPIPE +const END_PIPE = @_K end_pipe # Level 8 -const BEGIN_COLON = _T.begin_colon -const var":" = _T.COLON -const var".." = _T.DDOT -const var"…" = _T.LDOTS -const var"⁝" = _T.TRICOLON -const var"⋮" = _T.VDOTS -const var"⋱" = _T.DDOTS -const var"⋰" = _T.ADOTS -const var"⋯" = _T.CDOTS -const END_COLON = _T.end_colon +const BEGIN_COLON = @_K begin_colon +const var":" = @_K COLON +const var".." = @_K DDOT +const var"…" = @_K LDOTS +const var"⁝" = @_K TRICOLON +const var"⋮" = @_K VDOTS +const var"⋱" = @_K DDOTS +const var"⋰" = @_K ADOTS +const var"⋯" = @_K CDOTS +const END_COLON = @_K end_colon # Level 9 -const BEGIN_PLUS = _T.begin_plus -const var"$" = _T.EX_OR -const var"+" = _T.PLUS -const var"-" = _T.MINUS -const var"++" = _T.PLUSPLUS -const var"⊕" = _T.CIRCLED_PLUS -const var"⊖" = _T.CIRCLED_MINUS -const var"⊞" = _T.SQUARED_PLUS -const var"⊟" = _T.SQUARED_MINUS -const var"|" = _T.OR -const var"∪" = _T.UNION -const var"∨" = _T.LOGICAL_OR -const var"⊔" = _T.SQUARE_CUP -const var"±" = _T.PLUS_MINUS_SIGN -const var"∓" = _T.MINUS_OR_PLUS_SIGN -const var"∔" = _T.DOT_PLUS -const var"∸" = _T.DOT_MINUS -const var"≂" = _T.MINUS_TILDE -const var"≏" = _T.DIFFERENCE_BETWEEN -const var"⊎" = _T.MULTISET_UNION -const var"⊻" = _T.XOR -const var"⊽" = _T.NOR -const var"⋎" = _T.CURLY_LOGICAL_OR -const var"⋓" = _T.DOUBLE_UNION -const var"⧺" = _T.DOUBLE_PLUS -const var"⧻" = _T.TRIPLE_PLUS -const var"⨈" = _T.TWO_LOGICAL_OR_OPERATOR -const var"⨢" = _T.PLUS_SIGN_WITH_SMALL_CIRCLE_ABOVE -const var"⨣" = _T.PLUS_SIGN_WITH_CIRCUMFLEX_ACCENT_ABOVE -const var"⨤" = _T.PLUS_SIGN_WITH_TILDE_ABOVE -const var"⨥" = _T.PLUS_SIGN_WITH_DOT_BELOW -const var"⨦" = _T.PLUS_SIGN_WITH_TILDE_BELOW -const var"⨧" = _T.PLUS_SIGN_WITH_SUBSCRIPT_TWO -const var"⨨" = _T.PLUS_SIGN_WITH_BLACK_TRIANGLE -const var"⨩" = _T.MINUS_SIGN_WITH_COMMA_ABOVE -const var"⨪" = _T.MINUS_SIGN_WITH_DOT_BELOW -const var"⨫" = _T.MINUS_SIGN_WITH_FALLING_DOTS -const var"⨬" = _T.MINUS_SIGN_WITH_RISING_DOTS -const var"⨭" = _T.PLUS_SIGN_IN_LEFT_HALF_CIRCLE -const var"⨮" = _T.PLUS_SIGN_IN_RIGHT_HALF_CIRCLE -const var"⨹" = _T.PLUS_SIGN_IN_TRIANGLE -const var"⨺" = _T.MINUS_SIGN_IN_TRIANGLE -const var"⩁" = _T.UNION_WITH_MINUS_SIGN -const var"⩂" = _T.UNION_WITH_OVERBAR -const var"⩅" = _T.UNION_WITH_LOGICAL_OR -const var"⩊" = _T.UNION_BESIDE_AND_JOINED_WITH_UNION -const var"⩌" = _T.CLOSED_UNION_WITH_SERIFS -const var"⩏" = _T.DOUBLE_SQUARE_UNION -const var"⩐" = _T.CLOSED_UNION_WITH_SERIFS_AND_SMASH_PRODUCT -const var"⩒" = _T.LOGICAL_OR_WITH_DOT_ABOVE -const var"⩔" = _T.DOUBLE_LOGICAL_OR -const var"⩖" = _T.TWO_INTERSECTING_LOGICAL_OR -const var"⩗" = _T.SLOPING_LARGE_OR -const var"⩛" = _T.LOGICAL_OR_WITH_MIDDLE_STEM -const var"⩝" = _T.LOGICAL_OR_WITH_HORIZONTAL_DASH -const var"⩡" = _T.SMALL_VEE_WITH_UNDERBAR -const var"⩢" = _T.LOGICAL_OR_WITH_DOUBLE_OVERBAR -const var"⩣" = _T.LOGICAL_OR_WITH_DOUBLE_UNDERBAR -const var"¦" = _T.BROKEN_BAR -const END_PLUS = _T.end_plus +const BEGIN_PLUS = @_K begin_plus +const var"$" = @_K EX_OR +const var"+" = @_K PLUS +const var"-" = @_K MINUS +const var"++" = @_K PLUSPLUS +const var"⊕" = @_K CIRCLED_PLUS +const var"⊖" = @_K CIRCLED_MINUS +const var"⊞" = @_K SQUARED_PLUS +const var"⊟" = @_K SQUARED_MINUS +const var"|" = @_K OR +const var"∪" = @_K UNION +const var"∨" = @_K LOGICAL_OR +const var"⊔" = @_K SQUARE_CUP +const var"±" = @_K PLUS_MINUS_SIGN +const var"∓" = @_K MINUS_OR_PLUS_SIGN +const var"∔" = @_K DOT_PLUS +const var"∸" = @_K DOT_MINUS +const var"≂" = @_K MINUS_TILDE +const var"≏" = @_K DIFFERENCE_BETWEEN +const var"⊎" = @_K MULTISET_UNION +const var"⊻" = @_K XOR +const var"⊽" = @_K NOR +const var"⋎" = @_K CURLY_LOGICAL_OR +const var"⋓" = @_K DOUBLE_UNION +const var"⧺" = @_K DOUBLE_PLUS +const var"⧻" = @_K TRIPLE_PLUS +const var"⨈" = @_K TWO_LOGICAL_OR_OPERATOR +const var"⨢" = @_K PLUS_SIGN_WITH_SMALL_CIRCLE_ABOVE +const var"⨣" = @_K PLUS_SIGN_WITH_CIRCUMFLEX_ACCENT_ABOVE +const var"⨤" = @_K PLUS_SIGN_WITH_TILDE_ABOVE +const var"⨥" = @_K PLUS_SIGN_WITH_DOT_BELOW +const var"⨦" = @_K PLUS_SIGN_WITH_TILDE_BELOW +const var"⨧" = @_K PLUS_SIGN_WITH_SUBSCRIPT_TWO +const var"⨨" = @_K PLUS_SIGN_WITH_BLACK_TRIANGLE +const var"⨩" = @_K MINUS_SIGN_WITH_COMMA_ABOVE +const var"⨪" = @_K MINUS_SIGN_WITH_DOT_BELOW +const var"⨫" = @_K MINUS_SIGN_WITH_FALLING_DOTS +const var"⨬" = @_K MINUS_SIGN_WITH_RISING_DOTS +const var"⨭" = @_K PLUS_SIGN_IN_LEFT_HALF_CIRCLE +const var"⨮" = @_K PLUS_SIGN_IN_RIGHT_HALF_CIRCLE +const var"⨹" = @_K PLUS_SIGN_IN_TRIANGLE +const var"⨺" = @_K MINUS_SIGN_IN_TRIANGLE +const var"⩁" = @_K UNION_WITH_MINUS_SIGN +const var"⩂" = @_K UNION_WITH_OVERBAR +const var"⩅" = @_K UNION_WITH_LOGICAL_OR +const var"⩊" = @_K UNION_BESIDE_AND_JOINED_WITH_UNION +const var"⩌" = @_K CLOSED_UNION_WITH_SERIFS +const var"⩏" = @_K DOUBLE_SQUARE_UNION +const var"⩐" = @_K CLOSED_UNION_WITH_SERIFS_AND_SMASH_PRODUCT +const var"⩒" = @_K LOGICAL_OR_WITH_DOT_ABOVE +const var"⩔" = @_K DOUBLE_LOGICAL_OR +const var"⩖" = @_K TWO_INTERSECTING_LOGICAL_OR +const var"⩗" = @_K SLOPING_LARGE_OR +const var"⩛" = @_K LOGICAL_OR_WITH_MIDDLE_STEM +const var"⩝" = @_K LOGICAL_OR_WITH_HORIZONTAL_DASH +const var"⩡" = @_K SMALL_VEE_WITH_UNDERBAR +const var"⩢" = @_K LOGICAL_OR_WITH_DOUBLE_OVERBAR +const var"⩣" = @_K LOGICAL_OR_WITH_DOUBLE_UNDERBAR +const var"¦" = @_K BROKEN_BAR +const END_PLUS = @_K end_plus # Level 10 -const BEGIN_BITSHIFTS = _T.begin_bitshifts -const var"<<" = _T.LBITSHIFT -const var">>" = _T.RBITSHIFT -const var">>>" = _T.UNSIGNED_BITSHIFT -const END_BITSHIFTS = _T.end_bitshifts +const BEGIN_BITSHIFTS = @_K begin_bitshifts +const var"<<" = @_K LBITSHIFT +const var">>" = @_K RBITSHIFT +const var">>>" = @_K UNSIGNED_BITSHIFT +const END_BITSHIFTS = @_K end_bitshifts # Level 11 -const BEGIN_TIMES = _T.begin_times -const var"*" = _T.STAR -const var"/" = _T.FWD_SLASH -const var"÷" = _T.DIVISION_SIGN -const var"%" = _T.REM -const var"⋅" = _T.UNICODE_DOT -const var"∘" = _T.RING_OPERATOR -const var"×" = _T.MULTIPLICATION_SIGN -const var"\\" = _T.BACKSLASH -const var"&" = _T.AND -const var"∩" = _T.INTERSECTION -const var"∧" = _T.LOGICAL_AND -const var"⊗" = _T.CIRCLED_TIMES -const var"⊘" = _T.CIRCLED_DIVISION_SLASH -const var"⊙" = _T.CIRCLED_DOT_OPERATOR -const var"⊚" = _T.CIRCLED_RING_OPERATOR -const var"⊛" = _T.CIRCLED_ASTERISK_OPERATOR -const var"⊠" = _T.SQUARED_TIMES -const var"⊡" = _T.SQUARED_DOT_OPERATOR -const var"⊓" = _T.SQUARE_CAP -const var"∗" = _T.ASTERISK_OPERATOR -const var"∙" = _T.BULLET_OPERATOR -const var"∤" = _T.DOES_NOT_DIVIDE -const var"⅋" = _T.TURNED_AMPERSAND -const var"≀" = _T.WREATH_PRODUCT -const var"⊼" = _T.NAND -const var"⋄" = _T.DIAMOND_OPERATOR -const var"⋆" = _T.STAR_OPERATOR -const var"⋇" = _T.DIVISION_TIMES -const var"⋉" = _T.LEFT_NORMAL_FACTOR_SEMIDIRECT_PRODUCT -const var"⋊" = _T.RIGHT_NORMAL_FACTOR_SEMIDIRECT_PRODUCT -const var"⋋" = _T.LEFT_SEMIDIRECT_PRODUCT -const var"⋌" = _T.RIGHT_SEMIDIRECT_PRODUCT -const var"⋏" = _T.CURLY_LOGICAL_AND -const var"⋒" = _T.DOUBLE_INTERSECTION -const var"⟑" = _T.AND_WITH_DOT -const var"⦸" = _T.CIRCLED_REVERSE_SOLIDUS -const var"⦼" = _T.CIRCLED_ANTICLOCKWISE_ROTATED_DIVISION_SIGN -const var"⦾" = _T.CIRCLED_WHITE_BULLET -const var"⦿" = _T.CIRCLED_BULLET -const var"⧶" = _T.SOLIDUS_WITH_OVERBAR -const var"⧷" = _T.REVERSE_SOLIDUS_WITH_HORIZONTAL_STROKE -const var"⨇" = _T.TWO_LOGICAL_AND_OPERATOR -const var"⨰" = _T.MULTIPLICATION_SIGN_WITH_DOT_ABOVE -const var"⨱" = _T.MULTIPLICATION_SIGN_WITH_UNDERBAR -const var"⨲" = _T.SEMIDIRECT_PRODUCT_WITH_BOTTOM_CLOSED -const var"⨳" = _T.SMASH_PRODUCT -const var"⨴" = _T.MULTIPLICATION_SIGN_IN_LEFT_HALF_CIRCLE -const var"⨵" = _T.MULTIPLICATION_SIGN_IN_RIGHT_HALF_CIRCLE -const var"⨶" = _T.CIRCLED_MULTIPLICATION_SIGN_WITH_CIRCUMFLEX_ACCENT -const var"⨷" = _T.MULTIPLICATION_SIGN_IN_DOUBLE_CIRCLE -const var"⨸" = _T.CIRCLED_DIVISION_SIGN -const var"⨻" = _T.MULTIPLICATION_SIGN_IN_TRIANGLE -const var"⨼" = _T.INTERIOR_PRODUCT -const var"⨽" = _T.RIGHTHAND_INTERIOR_PRODUCT -const var"⩀" = _T.INTERSECTION_WITH_DOT -const var"⩃" = _T.INTERSECTION_WITH_OVERBAR -const var"⩄" = _T.INTERSECTION_WITH_LOGICAL_AND -const var"⩋" = _T.INTERSECTION_BESIDE_AND_JOINED_WITH_INTERSECTION -const var"⩍" = _T.CLOSED_INTERSECTION_WITH_SERIFS -const var"⩎" = _T.DOUBLE_SQUARE_INTERSECTION -const var"⩑" = _T.LOGICAL_AND_WITH_DOT_ABOVE -const var"⩓" = _T.DOUBLE_LOGICAL_AND -const var"⩕" = _T.TWO_INTERSECTING_LOGICAL_AND -const var"⩘" = _T.SLOPING_LARGE_AND -const var"⩚" = _T.LOGICAL_AND_WITH_MIDDLE_STEM -const var"⩜" = _T.LOGICAL_AND_WITH_HORIZONTAL_DASH -const var"⩞" = _T.LOGICAL_AND_WITH_DOUBLE_OVERBAR -const var"⩟" = _T.LOGICAL_AND_WITH_UNDERBAR -const var"⩠" = _T.LOGICAL_AND_WITH_DOUBLE_UNDERBAR -const var"⫛" = _T.TRANSVERSAL_INTERSECTION -const var"⊍" = _T.MULTISET_MULTIPLICATION -const var"▷" = _T.WHITE_RIGHT_POINTING_TRIANGLE -const var"⨝" = _T.JOIN -const var"⟕" = _T.LEFT_OUTER_JOIN -const var"⟖" = _T.RIGHT_OUTER_JOIN -const var"⟗" = _T.FULL_OUTER_JOIN -const var"⌿" = _T.NOT_SLASH -const var"⨟" = _T.BB_SEMI -const END_TIMES = _T.end_times +const BEGIN_TIMES = @_K begin_times +const var"*" = @_K STAR +const var"/" = @_K FWD_SLASH +const var"÷" = @_K DIVISION_SIGN +const var"%" = @_K REM +const var"⋅" = @_K UNICODE_DOT +const var"∘" = @_K RING_OPERATOR +const var"×" = @_K MULTIPLICATION_SIGN +const var"\\" = @_K BACKSLASH +const var"&" = @_K AND +const var"∩" = @_K INTERSECTION +const var"∧" = @_K LOGICAL_AND +const var"⊗" = @_K CIRCLED_TIMES +const var"⊘" = @_K CIRCLED_DIVISION_SLASH +const var"⊙" = @_K CIRCLED_DOT_OPERATOR +const var"⊚" = @_K CIRCLED_RING_OPERATOR +const var"⊛" = @_K CIRCLED_ASTERISK_OPERATOR +const var"⊠" = @_K SQUARED_TIMES +const var"⊡" = @_K SQUARED_DOT_OPERATOR +const var"⊓" = @_K SQUARE_CAP +const var"∗" = @_K ASTERISK_OPERATOR +const var"∙" = @_K BULLET_OPERATOR +const var"∤" = @_K DOES_NOT_DIVIDE +const var"⅋" = @_K TURNED_AMPERSAND +const var"≀" = @_K WREATH_PRODUCT +const var"⊼" = @_K NAND +const var"⋄" = @_K DIAMOND_OPERATOR +const var"⋆" = @_K STAR_OPERATOR +const var"⋇" = @_K DIVISION_TIMES +const var"⋉" = @_K LEFT_NORMAL_FACTOR_SEMIDIRECT_PRODUCT +const var"⋊" = @_K RIGHT_NORMAL_FACTOR_SEMIDIRECT_PRODUCT +const var"⋋" = @_K LEFT_SEMIDIRECT_PRODUCT +const var"⋌" = @_K RIGHT_SEMIDIRECT_PRODUCT +const var"⋏" = @_K CURLY_LOGICAL_AND +const var"⋒" = @_K DOUBLE_INTERSECTION +const var"⟑" = @_K AND_WITH_DOT +const var"⦸" = @_K CIRCLED_REVERSE_SOLIDUS +const var"⦼" = @_K CIRCLED_ANTICLOCKWISE_ROTATED_DIVISION_SIGN +const var"⦾" = @_K CIRCLED_WHITE_BULLET +const var"⦿" = @_K CIRCLED_BULLET +const var"⧶" = @_K SOLIDUS_WITH_OVERBAR +const var"⧷" = @_K REVERSE_SOLIDUS_WITH_HORIZONTAL_STROKE +const var"⨇" = @_K TWO_LOGICAL_AND_OPERATOR +const var"⨰" = @_K MULTIPLICATION_SIGN_WITH_DOT_ABOVE +const var"⨱" = @_K MULTIPLICATION_SIGN_WITH_UNDERBAR +const var"⨲" = @_K SEMIDIRECT_PRODUCT_WITH_BOTTOM_CLOSED +const var"⨳" = @_K SMASH_PRODUCT +const var"⨴" = @_K MULTIPLICATION_SIGN_IN_LEFT_HALF_CIRCLE +const var"⨵" = @_K MULTIPLICATION_SIGN_IN_RIGHT_HALF_CIRCLE +const var"⨶" = @_K CIRCLED_MULTIPLICATION_SIGN_WITH_CIRCUMFLEX_ACCENT +const var"⨷" = @_K MULTIPLICATION_SIGN_IN_DOUBLE_CIRCLE +const var"⨸" = @_K CIRCLED_DIVISION_SIGN +const var"⨻" = @_K MULTIPLICATION_SIGN_IN_TRIANGLE +const var"⨼" = @_K INTERIOR_PRODUCT +const var"⨽" = @_K RIGHTHAND_INTERIOR_PRODUCT +const var"⩀" = @_K INTERSECTION_WITH_DOT +const var"⩃" = @_K INTERSECTION_WITH_OVERBAR +const var"⩄" = @_K INTERSECTION_WITH_LOGICAL_AND +const var"⩋" = @_K INTERSECTION_BESIDE_AND_JOINED_WITH_INTERSECTION +const var"⩍" = @_K CLOSED_INTERSECTION_WITH_SERIFS +const var"⩎" = @_K DOUBLE_SQUARE_INTERSECTION +const var"⩑" = @_K LOGICAL_AND_WITH_DOT_ABOVE +const var"⩓" = @_K DOUBLE_LOGICAL_AND +const var"⩕" = @_K TWO_INTERSECTING_LOGICAL_AND +const var"⩘" = @_K SLOPING_LARGE_AND +const var"⩚" = @_K LOGICAL_AND_WITH_MIDDLE_STEM +const var"⩜" = @_K LOGICAL_AND_WITH_HORIZONTAL_DASH +const var"⩞" = @_K LOGICAL_AND_WITH_DOUBLE_OVERBAR +const var"⩟" = @_K LOGICAL_AND_WITH_UNDERBAR +const var"⩠" = @_K LOGICAL_AND_WITH_DOUBLE_UNDERBAR +const var"⫛" = @_K TRANSVERSAL_INTERSECTION +const var"⊍" = @_K MULTISET_MULTIPLICATION +const var"▷" = @_K WHITE_RIGHT_POINTING_TRIANGLE +const var"⨝" = @_K JOIN +const var"⟕" = @_K LEFT_OUTER_JOIN +const var"⟖" = @_K RIGHT_OUTER_JOIN +const var"⟗" = @_K FULL_OUTER_JOIN +const var"⌿" = @_K NOT_SLASH +const var"⨟" = @_K BB_SEMI +const END_TIMES = @_K end_times # Level 12 -const BEGIN_RATIONAL = _T.begin_rational -const var"//" = _T.FWDFWD_SLASH -const END_RATIONAL = _T.end_rational +const BEGIN_RATIONAL = @_K begin_rational +const var"//" = @_K FWDFWD_SLASH +const END_RATIONAL = @_K end_rational # Level 13 -const BEGIN_POWER = _T.begin_power -const var"^" = _T.CIRCUMFLEX_ACCENT -const var"↑" = _T.UPWARDS_ARROW -const var"↓" = _T.DOWNWARDS_ARROW -const var"⇵" = _T.DOWNWARDS_ARROW_LEFTWARDS_OF_UPWARDS_ARROW -const var"⟰" = _T.UPWARDS_QUADRUPLE_ARROW -const var"⟱" = _T.DOWNWARDS_QUADRUPLE_ARROW -const var"⤈" = _T.DOWNWARDS_ARROW_WITH_HORIZONTAL_STROKE -const var"⤉" = _T.UPWARDS_ARROW_WITH_HORIZONTAL_STROKE -const var"⤊" = _T.UPWARDS_TRIPLE_ARROW -const var"⤋" = _T.DOWNWARDS_TRIPLE_ARROW -const var"⤒" = _T.UPWARDS_ARROW_TO_BAR -const var"⤓" = _T.DOWNWARDS_ARROW_TO_BAR -const var"⥉" = _T.UPWARDS_TWO_HEADED_ARROW_FROM_SMALL_CIRCLE -const var"⥌" = _T.UP_BARB_RIGHT_DOWN_BARB_LEFT_HARPOON -const var"⥍" = _T.UP_BARB_LEFT_DOWN_BARB_RIGHT_HARPOON -const var"⥏" = _T.UP_BARB_RIGHT_DOWN_BARB_RIGHT_HARPOON -const var"⥑" = _T.UP_BARB_LEFT_DOWN_BARB_LEFT_HARPOON -const var"⥔" = _T.UPWARDS_HARPOON_WITH_BARB_RIGHT_TO_BAR -const var"⥕" = _T.DOWNWARDS_HARPOON_WITH_BARB_RIGHT_TO_BAR -const var"⥘" = _T.UPWARDS_HARPOON_WITH_BARB_LEFT_TO_BAR -const var"⥙" = _T.DOWNWARDS_HARPOON_WITH_BARB_LEFT_TO_BAR -const var"⥜" = _T.UPWARDS_HARPOON_WITH_BARB_RIGHT_FROM_BAR -const var"⥝" = _T.DOWNWARDS_HARPOON_WITH_BARB_RIGHT_FROM_BAR -const var"⥠" = _T.UPWARDS_HARPOON_WITH_BARB_LEFT_FROM_BAR -const var"⥡" = _T.DOWNWARDS_HARPOON_WITH_BARB_LEFT_FROM_BAR -const var"⥣" = _T.UPWARDS_HARPOON_WITH_BARB_LEFT_BESIDE_UPWARDS_HARPOON_WITH_BARB_RIGHT -const var"⥥" = _T.DOWNWARDS_HARPOON_WITH_BARB_LEFT_BESIDE_DOWNWARDS_HARPOON_WITH_BARB_RIGHT -const var"⥮" = _T.UPWARDS_HARPOON_WITH_BARB_LEFT_BESIDE_DOWNWARDS_HARPOON_WITH_BARB_RIGHT -const var"⥯" = _T.DOWNWARDS_HARPOON_WITH_BARB_LEFT_BESIDE_UPWARDS_HARPOON_WITH_BARB_RIGHT -const var"↑" = _T.HALFWIDTH_UPWARDS_ARROW -const var"↓" = _T.HALFWIDTH_DOWNWARDS_ARROW -const END_POWER = _T.end_power +const BEGIN_POWER = @_K begin_power +const var"^" = @_K CIRCUMFLEX_ACCENT +const var"↑" = @_K UPWARDS_ARROW +const var"↓" = @_K DOWNWARDS_ARROW +const var"⇵" = @_K DOWNWARDS_ARROW_LEFTWARDS_OF_UPWARDS_ARROW +const var"⟰" = @_K UPWARDS_QUADRUPLE_ARROW +const var"⟱" = @_K DOWNWARDS_QUADRUPLE_ARROW +const var"⤈" = @_K DOWNWARDS_ARROW_WITH_HORIZONTAL_STROKE +const var"⤉" = @_K UPWARDS_ARROW_WITH_HORIZONTAL_STROKE +const var"⤊" = @_K UPWARDS_TRIPLE_ARROW +const var"⤋" = @_K DOWNWARDS_TRIPLE_ARROW +const var"⤒" = @_K UPWARDS_ARROW_TO_BAR +const var"⤓" = @_K DOWNWARDS_ARROW_TO_BAR +const var"⥉" = @_K UPWARDS_TWO_HEADED_ARROW_FROM_SMALL_CIRCLE +const var"⥌" = @_K UP_BARB_RIGHT_DOWN_BARB_LEFT_HARPOON +const var"⥍" = @_K UP_BARB_LEFT_DOWN_BARB_RIGHT_HARPOON +const var"⥏" = @_K UP_BARB_RIGHT_DOWN_BARB_RIGHT_HARPOON +const var"⥑" = @_K UP_BARB_LEFT_DOWN_BARB_LEFT_HARPOON +const var"⥔" = @_K UPWARDS_HARPOON_WITH_BARB_RIGHT_TO_BAR +const var"⥕" = @_K DOWNWARDS_HARPOON_WITH_BARB_RIGHT_TO_BAR +const var"⥘" = @_K UPWARDS_HARPOON_WITH_BARB_LEFT_TO_BAR +const var"⥙" = @_K DOWNWARDS_HARPOON_WITH_BARB_LEFT_TO_BAR +const var"⥜" = @_K UPWARDS_HARPOON_WITH_BARB_RIGHT_FROM_BAR +const var"⥝" = @_K DOWNWARDS_HARPOON_WITH_BARB_RIGHT_FROM_BAR +const var"⥠" = @_K UPWARDS_HARPOON_WITH_BARB_LEFT_FROM_BAR +const var"⥡" = @_K DOWNWARDS_HARPOON_WITH_BARB_LEFT_FROM_BAR +const var"⥣" = @_K UPWARDS_HARPOON_WITH_BARB_LEFT_BESIDE_UPWARDS_HARPOON_WITH_BARB_RIGHT +const var"⥥" = @_K DOWNWARDS_HARPOON_WITH_BARB_LEFT_BESIDE_DOWNWARDS_HARPOON_WITH_BARB_RIGHT +const var"⥮" = @_K UPWARDS_HARPOON_WITH_BARB_LEFT_BESIDE_DOWNWARDS_HARPOON_WITH_BARB_RIGHT +const var"⥯" = @_K DOWNWARDS_HARPOON_WITH_BARB_LEFT_BESIDE_UPWARDS_HARPOON_WITH_BARB_RIGHT +const var"↑" = @_K HALFWIDTH_UPWARDS_ARROW +const var"↓" = @_K HALFWIDTH_DOWNWARDS_ARROW +const END_POWER = @_K end_power # Level 14 -const BEGIN_DECL = _T.begin_decl -const var"::" = _T.DECLARATION -const END_DECL = _T.end_decl +const BEGIN_DECL = @_K begin_decl +const var"::" = @_K DECLARATION +const END_DECL = @_K end_decl # Level 15 -const BEGIN_WHERE = _T.begin_where -const var"where" = _T.WHERE -const END_WHERE = _T.end_where +const BEGIN_WHERE = @_K begin_where +const var"where" = @_K WHERE +const END_WHERE = @_K end_where # Level 16 -const BEGIN_DOT = _T.begin_dot -const var"." = _T.DOT -const END_DOT = _T.end_dot - -const var"!" = _T.NOT -const var"'" = _T.PRIME -const var".'" = _T.TRANSPOSE -const var"->" = _T.ANON_FUNC - -const BEGIN_UNICODE_OPS = _T.begin_unicode_ops -const var"¬" = _T.NOT_SIGN -const var"√" = _T.SQUARE_ROOT -const var"∛" = _T.CUBE_ROOT -const var"∜" = _T.QUAD_ROOT -const END_UNICODE_OPS = _T.end_unicode_ops - -const END_OPS = _T.end_ops +const BEGIN_DOT = @_K begin_dot +const var"." = @_K DOT +const END_DOT = @_K end_dot + +const var"!" = @_K NOT +const var"'" = @_K PRIME +const var".'" = @_K TRANSPOSE +const var"->" = @_K ANON_FUNC + +const BEGIN_UNICODE_OPS = @_K begin_unicode_ops +const var"¬" = @_K NOT_SIGN +const var"√" = @_K SQUARE_ROOT +const var"∛" = @_K CUBE_ROOT +const var"∜" = @_K QUAD_ROOT +const END_UNICODE_OPS = @_K end_unicode_ops + +const END_OPS = @_K end_ops + + +# Our custom syntax tokens + +const toplevel = @_K TOPLEVEL +const call = @_K CALL +const block = @_K BLOCK end diff --git a/JuliaSyntax/test/runtests.jl b/JuliaSyntax/test/runtests.jl index f61b6dc32451f..0ef55803e1eed 100644 --- a/JuliaSyntax/test/runtests.jl +++ b/JuliaSyntax/test/runtests.jl @@ -1,6 +1,13 @@ using JuliaSyntax using Test -@testset "JuliaSyntax.jl" begin +#@testset "JuliaSyntax.jl" begin # Write your tests here. -end +#end + + +code = """ +[1,2, 3] +""" + + From 51f75459edf61ad688bfd67b15dd116c69a118e3 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Sun, 28 Nov 2021 22:04:01 +1000 Subject: [PATCH 0206/1109] Raw syntax tree data structure - prototyping --- JuliaSyntax/README.md | 102 +++++++++- JuliaSyntax/src/JuliaSyntax.jl | 334 +-------------------------------- JuliaSyntax/src/lexer.jl | 156 +++++++++++++++ JuliaSyntax/src/parser.jl | 194 +++++++++++++++++++ JuliaSyntax/src/syntax_tree.jl | 60 ++++++ JuliaSyntax/src/token_kinds.jl | 98 +++++++--- JuliaSyntax/test/runtests.jl | 40 ++++ 7 files changed, 625 insertions(+), 359 deletions(-) create mode 100644 JuliaSyntax/src/lexer.jl create mode 100644 JuliaSyntax/src/parser.jl create mode 100644 JuliaSyntax/src/syntax_tree.jl diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index 77c3ae56e0aea..eb55c1af6d940 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -17,12 +17,25 @@ Nice to have: ## Design -The datastructure design here is hard: -- There's many useful ways to augment a syntax tree depending on use case. +The tree datastructure design here is hard: + +1. The symbolic part of compilation (the compiler frontend) incrementally + abstracts the source text, but errors along the way should refer back to the + source. + - The tree must be a lossless representation of the source text + - Some aspects of the source text (comments, most whitespace) are irrelevant + to parsing. + - More aspects of the source text are irrelevant after we have an abstract + syntax tree of the surface syntax. Some good examples here are the + parentheses in `2*(x + y)` and the explicit vs implicit multiplication + symbol in `2*x` vs `2x`. + +2. There's various type of *analyses* +- There's many useful ways to augment, a syntax tree depending on use case. - Analysis algorithms should be able to act on any tree type, ignoring but carrying augmentations which they don't know about. -Let's tackle it by prototyping identifying several important work flows: +Let's tackle it by prototyping several important work flows: 1. Precise error reporting in lowering - Syntax desugaring `[a, b] = (c, d)` should report "invalid assignment @@ -39,3 +52,86 @@ Let's tackle it by prototyping identifying several important work flows: 4. Formatting - Re-indent a file + +## Tree design + +Raw syntax tree (RST / "Green tree") + +We want RawSyntaxNode to be +* *structurally minimal* — For efficiency and generality +* *immutable* — For efficiency (& thread safety?) +* *complete* — To preserve parser knowledge + +``` +for i = 1:10 + a + 2 + # hi + c + #= hey + ho =# +end +``` + +The simplest idea possible is to have: +* Leaf nodes are a single token +* Children are in source order + +``` +- - trivia +I - identifier +L - literal + +[for] + - "for" + - " " + [=] + I "i" + - " " + - "=" + - " " + [call] + I "1" + - ":" + L "10" + - "\n " + [call] + I "a" + - " " + I "+" + - " " + L "2" + - "\n " + - "# hi" + - "\n " + I "c" + - "\n " + - #= hey\n ho =#' + - "\n" + - "end" +``` + +Call represents a challange for the AST vs RST in terms of node placement / +iteration for infix operators vs normal prefix function calls. + +- The normal problem of `a + 1` vs `+(a, 1)` +- Or even worse, `a + 1 + 2` vs `+(a, 1, 2)` + +Clearly in the AST's *interface* we need to abstract over this placement. For +example with something like the normal Julia AST. But in the RST we only need +to distinguish between infix and prefix. + + + +## Fun research questions + +* Given the raw tree (the green tree, in Roslyn terminology) can we regress a + model of indentiation? Such that formatting rules for new code is defined + implicitly by a software project's existing style? + +# Resources + +* [Persistence, façades and Roslyn’s red-green trees](https://ericlippert.com/2012/06/08/red-green-trees/) + - [Roslyn optimization overview](https://github.com/KirillOsenkov/Bliki/wiki/Roslyn-Immutable-Trees +* Andy Chu (the author of the OIL shell) has written some things about syntax + - https://github.com/oilshell/oil/wiki/Lossless-Syntax-Tree-Pattern + - [From AST to Lossless Syntax Tree](https://www.oilshell.org/blog/2017/02/11.html) diff --git a/JuliaSyntax/src/JuliaSyntax.jl b/JuliaSyntax/src/JuliaSyntax.jl index 638e02f2c5e3e..25355d5537a0b 100644 --- a/JuliaSyntax/src/JuliaSyntax.jl +++ b/JuliaSyntax/src/JuliaSyntax.jl @@ -1,343 +1,13 @@ module JuliaSyntax -#------------------------------------------------------------------------------- -# Token stream utilities - import Tokenize using Tokenize.Tokens: RawToken const TzTokens = Tokenize.Tokens include("token_kinds.jl") - -""" -We define a token type which is more suited to parsing than the basic token -types from Tokenize. -""" -struct SyntaxToken - # TODO: Could use a more stripped down version of RawToken which only - # stores byte offsets? - leading_trivia::RawToken - raw::RawToken -end - -function Base.show(io::IO, t::SyntaxToken) - fullrange = string(lpad(t.leading_trivia.startbyte+1, 3), ":", rpad(t.raw.endbyte+1, 3)) - - range = string(lpad(t.raw.startbyte+1, 3), ":", rpad(t.raw.endbyte+1, 3)) - print(io, rpad(string(fullrange, "│", range), 17, " "), rpad(kind(t), 15, " ")) -end - - -kind(tok::SyntaxToken) = tok.raw.kind - -# summary_kind(tok::SyntaxToken) = TzTokens.kind(tok.raw) - -const EMPTY_RAW_TOKEN = RawToken() -const EMPTY_TOKEN = SyntaxToken(RawToken(), RawToken()) - -""" -TokenStream wraps the lexer from Tokenize.jl with a short putback buffer and -condenses syntactically irrelevant whitespace tokens into "syntax trivia" which -are attached to other tokens. -""" -mutable struct TokenStream - lexer::Tokenize.Lexers.Lexer{IOBuffer,RawToken} - # We buffer up to two tokens here, with `next2` taken before `next1`. It - # suffices to support only a single putback token (which always goes into - # `next2`). The presence of a valid token in `next2` does not imply there's - # one in `next1`. - next1::SyntaxToken - next2::SyntaxToken - hasnext1::Bool - hasnext2::Bool -end - -function TokenStream(code) - lexer = Tokenize.tokenize(code, RawToken) - TokenStream(lexer, EMPTY_TOKEN, EMPTY_TOKEN, false, false) -end - -function Base.show(io::IO, mime::MIME"text/plain", ts::TokenStream) - print(io, TokenStream, ":\n lexer = ") - show(io, mime, ts.lexer) - if ts.hasnext2 - print(io, "\n next2 = ", ts.next2) - end - if ts.hasnext1 - print(io, "\n next1 = ", ts.next1) - end -end - -# Iterator interface -Base.IteratorSize(::Type{TokenStream}) = Base.SizeUnknown() -Base.IteratorEltype(::Type{TokenStream}) = Base.HasEltype() -Base.eltype(::Type{TokenStream}) = SyntaxToken - -function Base.iterate(ts::TokenStream, end_state=false) - end_state && return nothing - t = take_token!(ts) - return t, kind(t) == K"ENDMARKER" -end - -function _read_raw_token(lexer::Tokenize.Lexers.Lexer) - c = Tokenize.Lexers.peekchar(lexer) - if isspace(c) - Tokenize.Lexers.start_token!(lexer) - # We lex whitespace slightly differently from Tokenize.jl, as newlines - # are syntactically significant - if Tokenize.Lexers.accept(lexer, '\n') - return Tokenize.Lexers.emit(lexer, K"NEWLINE_WS") - else - Tokenize.Lexers.readon(lexer) - Tokenize.Lexers.accept_batch(lexer, c->isspace(c) && c != '\n') - return Tokenize.Lexers.emit(lexer, K"WHITESPACE") - end - else - return Tokenize.Lexers.next_token(lexer) - end -end - -function _read_token(lexer::Tokenize.Lexers.Lexer) - # No token - do the actual work of taking a token from the lexer - raw = _read_raw_token(lexer) - if TzTokens.exactkind(raw) in (K"WHITESPACE", K"COMMENT") - # TODO: *Combine* comments with whitespace here to get a single leading - # trivia item per real token. - leading_trivia = raw - raw = _read_raw_token(lexer) - else - leading_trivia = RawToken(K"ERROR", (0,0), (0,0), - raw.startbyte, raw.startbyte-1, - TzTokens.NO_ERR, false, false) - end - return SyntaxToken(leading_trivia, raw) -end - -# Return next token in the stream, but don't remove it. -function peek_token(ts::TokenStream) - ts.hasnext2 && return ts.next2 - ts.hasnext1 && return ts.next1 - ts.next1 = _read_token(ts.lexer) - ts.hasnext1 = true - return ts.next1 -end - -# Like peek_token, but -# * EOF becomes an error -# * Newlines tokens are gobbled (TODO!) -function require_token(ts::TokenStream) - tok = peek_token(ts) - if kind(tok) == K"ENDMARKER" - error("incomplete: premature end of input") - end - return tok -end - -# Remove next token from the stream and return it. -function take_token!(ts::TokenStream) - if ts.hasnext2 - ts.hasnext2 = false - return ts.next2 - end - if ts.hasnext1 - ts.hasnext1 = false - return ts.next1 - end - # This line is a departure from the scheme parser, which requires - # peek_token to be called - return _read_token(ts.lexer) -end - -function put_back!(ts::TokenStream, tok::RawToken) - ts.hasnext2 || error("Cannot put back two tokens") - ts.next2 = tok -end - -function had_space(ts::TokenStream) -end - -is_prec_assignment(tok) = K"BEGIN_ASSIGNMENTS" < kind(tok) < K"END_ASSIGNMENTS" - - -#------------------------------------------------------------------------------- - -""" -ParseState carries parser context as we recursively descend into the parse -tree. For example, normally `x -y` means `(x) - (y)`, but when parsing matrix -literals we're in "whitespace sensitive" mode, and `[x -y]` means [(x) (-y)]. -""" -struct ParseState - tokens::TokenStream - - # Disable range colon for parsing ternary conditional operator - range_colon_enabled::Bool - # In space-sensitive mode "x -y" is 2 expressions, not a subtraction - space_sensitive::Bool - # Seeing `for` stops parsing macro arguments and makes a generator - for_generator::Bool - # Treat 'end' like a normal symbol instead of a reserved word - end_symbol::Bool - # Treat newline like ordinary whitespace instead of as a potential separator - whitespace_newline::Bool - # Enable parsing `where` with high precedence - where_enabled::Bool -end - -# Normal context -function ParseState(tokens::TokenStream) - ParseState(tokens, true, false, true, false, false, false) -end - -function ParseState(ps::ParseState; range_colon_enabled=nothing, - space_sensitive=nothing, for_generator=nothing, - end_symbol=nothing, whitespace_newline=nothing, - where_enabled=nothing) - ParseState(ps.tokens, - range_colon_enabled === nothing ? ps.range_colon_enabled : range_colon_enabled, - space_sensitive === nothing ? ps.space_sensitive : space_sensitive, - for_generator === nothing ? ps.for_generator : for_generator, - end_symbol === nothing ? ps.end_symbol : end_symbol, - whitespace_newline === nothing ? ps.whitespace_newline : whitespace_newline, - where_enabled === nothing ? ps.where_enabled : where_enabled) -end - -take_token!(ps::ParseState) = take_token!(ps.tokens) -require_token(ps::ParseState) = require_token(ps.tokens) -peek_token(ps::ParseState) = peek_token(ps.tokens) -put_back!(ps::ParseState, tok::RawToken) = put_back!(ps.tokens, tok) - -#------------------------------------------------------------------------------- -# Parser +include("lexer.jl") include("syntax_tree.jl") - -function is_closing_token(ps::ParseState, tok) - k = kind(tok) - return k in (K"else", K"elseif", K"catch", K"finally", - K",", K")", K"]", K"}", K";", - K"ENDMARKER") || (k == K"END" && !ps.end_symbol) -end - -function has_whitespace_prefix(tok::SyntaxToken) - tok.leading_trivia.kind == K"WHITESPACE" -end - -function TODO(str) - error("TODO: $str") -end - -# Parse numbers, identifiers, parenthesized expressions, lists, vectors, etc. -function parse_atom(ps::ParseState; checked::Bool=true)::SyntaxNode - tok = require_token(ps) - tok_kind = kind(tok) - # TODO: Reorder these to put most likely tokens first - if tok_kind == K":" # symbol/expression quote - take_token!(ps) - next = peek_token(ps) - if is_closing_token(ps, next) && (kind(next) != K"KEYWORD" || - has_whitespace_prefix(next)) - return SyntaxNode(tok) - elseif has_whitespace_prefix(next) - error("whitespace not allowed after \":\" used for quoting") - elseif kind(next) == K"NEWLINE_WS" - error("newline not allowed after \":\" used for quoting") - else - # Being inside quote makes `end` non-special again. issue #27690 - ps1 = ParseState(ps, end_symbol=false) - return SyntaxNode(K"quote", parse_atom(ps1, checked=false)) - end - elseif tok_kind == K"=" # misplaced = - error("unexpected `=`") - elseif tok_kind == K"IDENTIFIER" - if checked - TODO("Checked identifier names") - end - take_token!(ps) - return SyntaxNode(tok) - elseif tok_kind == K"VAR_IDENTIFIER" - take_token!(ps) - return SyntaxNode(tok) - elseif tok_kind == K"(" # parens or tuple - take_token!(ps) - return parse_paren(ps, checked) - elseif tok_kind == K"[" # cat expression - take_token!(ps) - TODO("""parse_cat(ps, K"]", ps.end_symbol)""") - elseif tok_kind == K"{" # cat expression - take_token!(ps) - TODO("""parse_cat(ps, K"}", ps.end_symbol)""") - elseif tok_kind == K"`" - TODO("(macrocall (core @cmd) ...)") - # return Expr(:macrocall, Expr(:core, Symbol("@cmd")), - elseif isliteral(tok_kind) - take_token!(ps) - return SyntaxNode(tok) - elseif is_closing_token(tok) - error("unexpected: $tok") - else - error("invalid syntax: `$tok`") - end -end - -# parse `a@b@c@...` for some @ -# -# `is_separator` - predicate -# `head` the expression head to yield in the result, e.g. "a;b" => (block a b) -# `is_closer` - predicate to identify tokens that stop parsing -# however, this doesn't consume the closing token, just looks at it -function parse_Nary(ps::ParseState, down::Function, is_separator::Function, - result_kind, is_closer::Function) -end - -# flisp: parse-docstring -# Parse statement with possible docstring -function parse_statement_with_doc(ps::ParseState) - parse_eq(ps) - # TODO: Detect docstrings -end - -#------------------------------------------------------------------------------- - -# the principal non-terminals follow, in increasing precedence order - -#function parse_block(ps::ParseState, down=parse_eq) -#end - -# flisp: parse-stmts -# `;` at the top level produces a sequence of top level expressions -function parse_statements(ps::ParseState) - parse_Nary(ps, parse_statement) -end - -# flisp: parse-eq -function parse_eq(ps::ParseState) - parse_assignment(ps, parse_comma) -end - -# flisp: parse-eq* -# parse_eq_2 is used where commas are special, for example in an argument list -# function parse_eq_2 - -function parse_assignment(ps::ParseState, down) - ex = down(ps) - t = peek_token(ps) - if !is_prec_assignment(t) - return ex - end - take_token!(ps) - if kind(t) == K"~" - # ~ is the only non-syntactic assignment-precedence operator - TODO("Turn ~ into a call node") - else - SyntaxNode - end -end - -#------------------------------------------------------------------------------- - -function parse(code) - tokens = JuliaSyntax.TokenStream(code) - parse_statements(tokens) -end +include("parser.jl") end diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl new file mode 100644 index 0000000000000..339ebce59c3dc --- /dev/null +++ b/JuliaSyntax/src/lexer.jl @@ -0,0 +1,156 @@ +#------------------------------------------------------------------------------- +""" +`SyntaxToken` covers a contiguous range of the source text which contains a +token *relevant for parsing*, with a possibly-irrelevant prefix of "token +trivia". Trivial tokens include +* Whitespace +* Comments + +Note that "triviality" of tokens is context-dependent in general. For example, +the parentheses in `(1+2)*3` are important for parsing but are irrelevant after +the abstract syntax tree is constructed. +""" +struct SyntaxToken + # TODO: Could use a more stripped down version of RawToken which only + # stores byte offsets? + leading_trivia::RawToken + raw::RawToken +end + +function Base.show(io::IO, t::SyntaxToken) + fullrange = string(lpad(t.leading_trivia.startbyte+1, 3), ":", rpad(t.raw.endbyte+1, 3)) + + range = string(lpad(t.raw.startbyte+1, 3), ":", rpad(t.raw.endbyte+1, 3)) + print(io, rpad(string(fullrange, "│", range), 17, " "), rpad(kind(t), 15, " ")) +end + + +kind(tok::SyntaxToken) = tok.raw.kind + +# summary_kind(tok::SyntaxToken) = TzTokens.kind(tok.raw) + +const EMPTY_RAW_TOKEN = RawToken() +const EMPTY_TOKEN = SyntaxToken(RawToken(), RawToken()) + + +#------------------------------------------------------------------------------- +""" +TokenStream wraps the lexer from Tokenize.jl with a short putback buffer and +condenses syntactically irrelevant whitespace tokens into "syntax trivia" which +are attached to other tokens. +""" +mutable struct TokenStream + lexer::Tokenize.Lexers.Lexer{IOBuffer,RawToken} + # We buffer up to two tokens here, with `next2` taken before `next1`. It + # suffices to support only a single putback token (which always goes into + # `next2`). The presence of a valid token in `next2` does not imply there's + # one in `next1`. + next1::SyntaxToken + next2::SyntaxToken + hasnext1::Bool + hasnext2::Bool +end + +function TokenStream(code) + lexer = Tokenize.tokenize(code, RawToken) + TokenStream(lexer, EMPTY_TOKEN, EMPTY_TOKEN, false, false) +end + +function Base.show(io::IO, mime::MIME"text/plain", ts::TokenStream) + print(io, TokenStream, ":\n lexer = ") + show(io, mime, ts.lexer) + if ts.hasnext2 + print(io, "\n next2 = ", ts.next2) + end + if ts.hasnext1 + print(io, "\n next1 = ", ts.next1) + end +end + +# Iterator interface +Base.IteratorSize(::Type{TokenStream}) = Base.SizeUnknown() +Base.IteratorEltype(::Type{TokenStream}) = Base.HasEltype() +Base.eltype(::Type{TokenStream}) = SyntaxToken + +function Base.iterate(ts::TokenStream, end_state=false) + end_state && return nothing + t = take_token!(ts) + return t, kind(t) == K"ENDMARKER" +end + +function _read_raw_token(lexer::Tokenize.Lexers.Lexer) + c = Tokenize.Lexers.peekchar(lexer) + if isspace(c) + Tokenize.Lexers.start_token!(lexer) + # We lex whitespace slightly differently from Tokenize.jl, as newlines + # are syntactically significant + if Tokenize.Lexers.accept(lexer, '\n') + return Tokenize.Lexers.emit(lexer, K"NEWLINE_WS") + else + Tokenize.Lexers.readon(lexer) + Tokenize.Lexers.accept_batch(lexer, c->isspace(c) && c != '\n') + return Tokenize.Lexers.emit(lexer, K"WHITESPACE") + end + else + return Tokenize.Lexers.next_token(lexer) + end +end + +function _read_token(lexer::Tokenize.Lexers.Lexer) + # No token - do the actual work of taking a token from the lexer + raw = _read_raw_token(lexer) + if TzTokens.exactkind(raw) in (K"WHITESPACE", K"COMMENT") + # TODO: *Combine* comments with whitespace here to get a single leading + # trivia item per real token. + leading_trivia = raw + raw = _read_raw_token(lexer) + else + leading_trivia = RawToken(K"ERROR", (0,0), (0,0), + raw.startbyte, raw.startbyte-1, + TzTokens.NO_ERR, false, false) + end + return SyntaxToken(leading_trivia, raw) +end + +# Return next token in the stream, but don't remove it. +function peek_token(ts::TokenStream) + ts.hasnext2 && return ts.next2 + ts.hasnext1 && return ts.next1 + ts.next1 = _read_token(ts.lexer) + ts.hasnext1 = true + return ts.next1 +end + +# Like peek_token, but +# * EOF becomes an error +# * Newlines tokens are gobbled (TODO!) +function require_token(ts::TokenStream) + tok = peek_token(ts) + if kind(tok) == K"ENDMARKER" + error("incomplete: premature end of input") + end + return tok +end + +# Remove next token from the stream and return it. +function take_token!(ts::TokenStream) + if ts.hasnext2 + ts.hasnext2 = false + return ts.next2 + end + if ts.hasnext1 + ts.hasnext1 = false + return ts.next1 + end + # This line is a departure from the scheme parser, which requires + # peek_token to be called + return _read_token(ts.lexer) +end + +function put_back!(ts::TokenStream, tok::RawToken) + ts.hasnext2 || error("Cannot put back two tokens") + ts.next2 = tok +end + +is_prec_assignment(tok) = K"BEGIN_ASSIGNMENTS" < kind(tok) < K"END_ASSIGNMENTS" + diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl new file mode 100644 index 0000000000000..65e9b90c7625c --- /dev/null +++ b/JuliaSyntax/src/parser.jl @@ -0,0 +1,194 @@ +""" +ParseState carries parser context as we recursively descend into the parse +tree. For example, normally `x -y` means `(x) - (y)`, but when parsing matrix +literals we're in "whitespace sensitive" mode, and `[x -y]` means [(x) (-y)]. +""" +struct ParseState + tokens::TokenStream + # Vesion of Julia we're parsing this code for. May be different from VERSION! + julia_version::VersionNumber + + # Disable range colon for parsing ternary conditional operator + range_colon_enabled::Bool + # In space-sensitive mode "x -y" is 2 expressions, not a subtraction + space_sensitive::Bool + # Seeing `for` stops parsing macro arguments and makes a generator + for_generator::Bool + # Treat 'end' like a normal symbol instead of a reserved word + end_symbol::Bool + # Treat newline like ordinary whitespace instead of as a potential separator + whitespace_newline::Bool + # Enable parsing `where` with high precedence + where_enabled::Bool +end + +# Normal context +function ParseState(tokens::TokenStream; julia_version=VERSION) + ParseState(tokens, julia_version, true, false, true, false, false, false) +end + +function ParseState(ps::ParseState; range_colon_enabled=nothing, + space_sensitive=nothing, for_generator=nothing, + end_symbol=nothing, whitespace_newline=nothing, + where_enabled=nothing) + ParseState(ps.tokens, ps.julia_version, + range_colon_enabled === nothing ? ps.range_colon_enabled : range_colon_enabled, + space_sensitive === nothing ? ps.space_sensitive : space_sensitive, + for_generator === nothing ? ps.for_generator : for_generator, + end_symbol === nothing ? ps.end_symbol : end_symbol, + whitespace_newline === nothing ? ps.whitespace_newline : whitespace_newline, + where_enabled === nothing ? ps.where_enabled : where_enabled) +end + +take_token!(ps::ParseState) = take_token!(ps.tokens) +require_token(ps::ParseState) = require_token(ps.tokens) +peek_token(ps::ParseState) = peek_token(ps.tokens) +put_back!(ps::ParseState, tok::RawToken) = put_back!(ps.tokens, tok) + +#------------------------------------------------------------------------------- +# Parser + +function is_closing_token(ps::ParseState, tok) + k = kind(tok) + return k in (K"else", K"elseif", K"catch", K"finally", + K",", K")", K"]", K"}", K";", + K"EndMarker") || (k == K"end" && !ps.end_symbol) +end + +function has_whitespace_prefix(tok::SyntaxToken) + tok.leading_trivia.kind == K" " +end + +function TODO(str) + error("TODO: $str") +end + +# Parse numbers, identifiers, parenthesized expressions, lists, vectors, etc. +function parse_atom(ps::ParseState; checked::Bool=true)::RawSyntaxNode + tok = require_token(ps) + tok_kind = kind(tok) + # TODO: Reorder these to put most likely tokens first + if tok_kind == K":" # symbol/expression quote + take_token!(ps) + next = peek_token(ps) + if is_closing_token(ps, next) && (kind(next) != K"Keyword" || + has_whitespace_prefix(next)) + return RawSyntaxNode(tok) + elseif has_whitespace_prefix(next) + error("whitespace not allowed after \":\" used for quoting") + elseif kind(next) == K"NewlineWs" + error("newline not allowed after \":\" used for quoting") + else + # Being inside quote makes `end` non-special again. issue #27690 + ps1 = ParseState(ps, end_symbol=false) + return RawSyntaxNode(K"quote", parse_atom(ps1, checked=false)) + end + elseif tok_kind == K"=" # misplaced = + error("unexpected `=`") + elseif tok_kind == K"Identifier" + if checked + TODO("Checked identifier names") + end + take_token!(ps) + return RawSyntaxNode(tok) + elseif tok_kind == K"VarIdentifier" + take_token!(ps) + return RawSyntaxNode(tok) + elseif tok_kind == K"(" # parens or tuple + take_token!(ps) + return parse_paren(ps, checked) + elseif tok_kind == K"[" # cat expression + # NB: Avoid take_token! here? It's better to not consume tokens early + # take_token!(ps) + vex = parse_cat(ps, tok, K"]", ps.end_symbol) + elseif tok_kind == K"{" # cat expression + take_token!(ps) + TODO("""parse_cat(ps, K"}", )""") + elseif tok_kind == K"`" + TODO("(macrocall (core @cmd) ...)") + # return Expr(:macrocall, Expr(:core, Symbol("@cmd")), + elseif isliteral(tok_kind) + take_token!(ps) + return RawSyntaxNode(tok) + elseif is_closing_token(tok) + error("unexpected: $tok") + else + error("invalid syntax: `$tok`") + end +end + +# parse `a@b@c@...` for some @ +# +# `is_separator` - predicate +# `head` the expression head to yield in the result, e.g. "a;b" => (block a b) +# `is_closer` - predicate to identify tokens that stop parsing +# however, this doesn't consume the closing token, just looks at it +function parse_Nary(ps::ParseState, down::Function, is_separator::Function, + result_kind, is_closer::Function) +end + +# flisp: parse-docstring +# Parse statement with possible docstring +function parse_statement_with_doc(ps::ParseState) + parse_eq(ps) + # TODO: Detect docstrings +end + +# flisp: parse-cat +# Parse syntax inside of `[]` or `{}` +function parse_cat(ps0::ParseState, opening_tok, closer, last_end_symbol::Bool) + ps = ParseState(ps0, range_colon_enabled=true, + space_sensitive=true, + where_enabled=true, + whitespace_newline=false, + for_generator=true) + if require_token(ps) == closer + take_token!(ps) + return + end +end + +#------------------------------------------------------------------------------- + +# the principal non-terminals follow, in increasing precedence order + +#function parse_block(ps::ParseState, down=parse_eq) +#end + +# flisp: parse-stmts +# `;` at the top level produces a sequence of top level expressions +function parse_statements(ps::ParseState) + parse_Nary(ps, parse_statement) +end + +# flisp: parse-eq +function parse_eq(ps::ParseState) + parse_assignment(ps, parse_comma) +end + +# flisp: parse-eq* +# parse_eq_2 is used where commas are special, for example in an argument list +# function parse_eq_2 + +function parse_assignment(ps::ParseState, down) + ex = down(ps) + t = peek_token(ps) + if !is_prec_assignment(t) + return ex + end + take_token!(ps) + if kind(t) == K"~" + # ~ is the only non-syntactic assignment-precedence operator + TODO("Turn ~ into a call node") + else + RawSyntaxNode + end +end + +#------------------------------------------------------------------------------- + +function parse(code) + tokens = JuliaSyntax.TokenStream(code) + parse_statements(tokens) +end + diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl new file mode 100644 index 0000000000000..9d467f68f7ca9 --- /dev/null +++ b/JuliaSyntax/src/syntax_tree.jl @@ -0,0 +1,60 @@ +#------------------------------------------------------------------------------- +# Syntax tree types + +# Rules of concrete syntax: +# +# * Every byte is covered by the tree +# * The children (including trivia) cover the span of the parent + +# The rawest version of a parse tree node. +struct RawSyntaxNode + kind::Kind + span::Int + args::Vector{RawSyntaxNode} + # has_diagnostics::Bool +end + +function RawSyntaxNode(kind::Kind, span::Int) + RawSyntaxNode(kind, span, RawSyntaxNode[]) +end + +function RawSyntaxNode(raw::TzTokens.RawToken) + span = 1 + raw.endbyte - raw.startbyte + RawSyntaxNode(kind(raw), span) +end + +function RawSyntaxNode(kind::Kind, args::RawSyntaxNode...) + span = sum(x.span for x in args) + RawSyntaxNode(kind, span, RawSyntaxNode[args...]) +end + +function _show_node(io, node, indent, pos, str) + if isempty(node.args) + line = string(rpad(node.span, 4), indent, _kind_str(node.kind)) + if isnothing(str) + println(io, line) + else + println(io, rpad(line, 40), repr(str[pos:pos + node.span - 1])) + end + else + println(io, rpad(node.span, 4), indent, '[', _kind_str(node.kind), "]") + new_indent = indent*" " + p = pos + for a in node.args + _show_node(io, a, new_indent, p, str) + p += a.span + end + end +end + +function Base.show(io::IO, ::MIME"text/plain", node::RawSyntaxNode) + _show_node(io, node, "", 1, nothing) +end + +function Base.show(io::IO, ::MIME"text/plain", node::RawSyntaxNode, str::String) + _show_node(io, node, "", 1, str) +end + +kind(node::RawSyntaxNode) = node.kind + + diff --git a/JuliaSyntax/src/token_kinds.jl b/JuliaSyntax/src/token_kinds.jl index 2a68fb91212db..5be275d2a0108 100644 --- a/JuliaSyntax/src/token_kinds.jl +++ b/JuliaSyntax/src/token_kinds.jl @@ -41,6 +41,7 @@ end using Tokenize.Tokens: Kind, isliteral, iskeyword kind(k::Kind) = k +kind(raw::TzTokens.RawToken) = TzTokens.exactkind(raw) """ K"s" @@ -53,9 +54,52 @@ macro K_str(str) return :(Kinds.$name) end +function _kind_str(k::Kind) + if k in (K"Identifier", K"VarIdentifier") + "I" + elseif isliteral(k) + "L" + elseif k == K"Comment" + "C" + elseif k == K"Whitespace" + "W" + elseif k == K"NewlineWs" + "N" + elseif iskeyword(k) + lowercase(string(k)) + elseif TzTokens.isoperator(k) + string(TzTokens.UNICODE_OPS_REVERSE[k]) + elseif k == K"(" + "(" + elseif k == K"[" + "[" + elseif k == K"{" + "{" + elseif k == K")" + ")" + elseif k == K"]" + "]" + elseif k == K"}" + "}" + elseif k == K"@" + "@" + elseif k == K"," + "," + elseif k == K";" + ";" + else + lowercase(string(k)) + end +end """ - A module to give Tokenize tokens better names! +A module to giving literal names to token kinds + +Rules: +* Kinds which correspond to exactly one textural form are represented with that + text. This includes keywords like K"for" and operators like K"*". +* Kinds which represent many textural forms have UpperCamelCase names. This + includes kinds like K"Identifier" and K"Comment". """ baremodule Kinds @@ -68,17 +112,18 @@ macro _K(sym) # :(Kind(Tokenize.Tokens.$sym)) end -const ENDMARKER = @_K ENDMARKER -const ERROR = @_K ERROR -const COMMENT = @_K COMMENT -const WHITESPACE = @_K WHITESPACE -const IDENTIFIER = @_K IDENTIFIER +const EndMarker = @_K ENDMARKER +const Error = @_K ERROR +const Comment = @_K COMMENT +const Whitespace = @_K WHITESPACE +const Identifier = @_K IDENTIFIER +const VarIdentifier = @_K VAR_IDENTIFIER const var"@" = @_K AT_SIGN const var"," = @_K COMMA const var";" = @_K SEMICOLON const BEGIN_KEYWORDS = @_K begin_keywords -const KEYWORD = @_K KEYWORD +const Keyword = @_K KEYWORD const var"abstract" = @_K ABSTRACT const var"baremodule" = @_K BAREMODULE const var"begin" = @_K BEGIN @@ -116,26 +161,26 @@ const var"while" = @_K WHILE const END_KEYWORDS = @_K end_keywords const BEGIN_CSTPARSER = @_K begin_cstparser -const INVISIBLE_BRACKETS = @_K INVISIBLE_BRACKETS -const NOTHING = @_K NOTHING -const WS = @_K WS -const SEMICOLON_WS = @_K SEMICOLON_WS -const NEWLINE_WS = @_K NEWLINE_WS -const EMPTY_WS = @_K EMPTY_WS +const InvisibleBrackets = @_K INVISIBLE_BRACKETS +const Nothing = @_K NOTHING +const Ws = @_K WS +const SemicolonWs = @_K SEMICOLON_WS +const NewlineWs = @_K NEWLINE_WS +const EmptyWs = @_K EMPTY_WS const END_CSTPARSER = @_K end_cstparser const BEGIN_LITERAL = @_K begin_literal -const LITERAL = @_K LITERAL -const integer = @_K INTEGER -const bin_int = @_K BIN_INT -const hex_int = @_K HEX_INT -const oct_int = @_K OCT_INT -const float = @_K FLOAT -const string = @_K STRING -const triple_string = @_K TRIPLE_STRING -const char = @_K CHAR -const cmd = @_K CMD -const triple_cmd = @_K TRIPLE_CMD +const Literal = @_K LITERAL +const Integer = @_K INTEGER +const BinInt = @_K BIN_INT +const HexInt = @_K HEX_INT +const OctInt = @_K OCT_INT +const Float = @_K FLOAT +const String = @_K STRING +const TripleString = @_K TRIPLE_STRING +const Char = @_K CHAR +const Cmd = @_K CMD +const TripleCmd = @_K TRIPLE_CMD const var"true" = @_K TRUE const var"false" = @_K FALSE const END_LITERAL = @_K end_literal @@ -878,12 +923,17 @@ const END_UNICODE_OPS = @_K end_unicode_ops const END_OPS = @_K end_ops +# Cute synonyms +const var" " = @_K WHITESPACE +const var"\n" = @_K NEWLINE_WS # Our custom syntax tokens +const BEGIN_SYNTAX_KINDS = @_K begin_syntax_kinds const toplevel = @_K TOPLEVEL const call = @_K CALL const block = @_K BLOCK +const END_SYNTAX_KINDS = @_K end_syntax_kinds end diff --git a/JuliaSyntax/test/runtests.jl b/JuliaSyntax/test/runtests.jl index 0ef55803e1eed..90777124af4b5 100644 --- a/JuliaSyntax/test/runtests.jl +++ b/JuliaSyntax/test/runtests.jl @@ -10,4 +10,44 @@ code = """ [1,2, 3] """ +using JuliaSyntax: RawSyntaxNode +using JuliaSyntax: Kind, @K_str +const N = RawSyntaxNode + +code = """ +for i = 1:10 + a + 2 + # hi + c +end +""" + +t = +N(K"for", + N(K"for", 3), + N(K" ", 1), + N(K"=", + N(K"Identifier", 1), + N(K" ", 1), + N(K"=", 1), + N(K" ", 1), + N(K"call", + N(K"Integer", 1), + N(K":", 1), + N(K"Integer", 2))), + N(K"\n", 5), + N(K"call", + N(K"Identifier", 1), + N(K" ", 1), + N(K"+", 1), + N(K" ", 1), + N(K"Integer", 1)), + N(K"\n", 5), + N(K"Comment", 4), + N(K"\n", 5), + N(K"Identifier", 1), + N(K"\n", 1), + N(K"end", 3)) + +show(stdout, MIME"text/plain"(), t, code) From cdeea425bf7c75a2f4fa69d3aa611fddee797a9b Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Wed, 24 Nov 2021 15:13:58 +1000 Subject: [PATCH 0207/1109] Lex the special form var"..." as kind VAR_IDENTIFIER --- JuliaSyntax/src/lexer.jl | 19 +++++++++++++++++-- JuliaSyntax/src/token.jl | 4 +++- JuliaSyntax/src/token_kinds.jl | 1 + JuliaSyntax/test/lexer.jl | 6 ++++++ 4 files changed, 27 insertions(+), 3 deletions(-) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index e7b12be536174..667a818279ee6 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -252,7 +252,7 @@ function emit(l::Lexer{IO_t,Token}, kind::Kind, err::TokenError = Tokens.NO_ERR) suffix = false if kind in (Tokens.ERROR, Tokens.STRING, Tokens.TRIPLE_STRING, Tokens.CMD, Tokens.TRIPLE_CMD) str = String(l.io.data[(l.token_startpos + 1):position(l)]) - elseif (kind == Tokens.IDENTIFIER || isliteral(kind) || kind == Tokens.COMMENT || kind == Tokens.WHITESPACE) + elseif (kind == Tokens.IDENTIFIER || kind == Tokens.VAR_IDENTIFIER || isliteral(kind) || kind == Tokens.COMMENT || kind == Tokens.WHITESPACE) str = String(take!(l.charstore)) elseif optakessuffix(kind) str = "" @@ -790,6 +790,16 @@ function lex_quote(l::Lexer, doemit=true) end end +# Lex var"..." identifiers. +# The prefix `var"` has been consumed +function lex_var(l::Lexer) + if read_string(l, Tokens.STRING) + return emit(l, Tokens.VAR_IDENTIFIER) + else + return emit_error(l, Tokens.EOF_VAR) + end +end + function string_terminated(l, kind::Tokens.Kind) if kind == Tokens.STRING && l.chars[1] == '"' return true @@ -1024,7 +1034,11 @@ function lex_identifier(l::Lexer{IO_t,T}, c) where {IO_t,T} if n > MAX_KW_LENGTH emit(l, IDENTIFIER) else - emit(l, get(kw_hash, h, IDENTIFIER)) + if h == var_kw_hash && accept(l, '"') + return lex_var(l) + else + return emit(l, get(kw_hash, h, IDENTIFIER)) + end end end @@ -1088,5 +1102,6 @@ Tokens.FALSE, ] const kw_hash = Dict(simple_hash(lowercase(string(kw))) => kw for kw in kws) +const var_kw_hash = simple_hash("var") end # module diff --git a/JuliaSyntax/src/token.jl b/JuliaSyntax/src/token.jl index c9f50511da06d..55abb84e35c3a 100644 --- a/JuliaSyntax/src/token.jl +++ b/JuliaSyntax/src/token.jl @@ -30,6 +30,7 @@ _add_kws() EOF_STRING, EOF_CHAR, EOF_CMD, + EOF_VAR, INVALID_NUMERIC_CONSTANT, INVALID_OPERATOR, UNKNOWN, @@ -41,6 +42,7 @@ TOKEN_ERROR_DESCRIPTION = Dict{TokenError, String}( EOF_STRING => "unterminated string literal", EOF_CHAR => "unterminated character literal", EOF_CMD => "unterminated cmd literal", + EOF_VAR => "unterminated var\"...\" identifier", INVALID_NUMERIC_CONSTANT => "invalid numeric constant", INVALID_OPERATOR => "invalid operator", UNKNOWN => "unknown", @@ -100,7 +102,7 @@ endpos(t::AbstractToken) = t.endpos startbyte(t::AbstractToken) = t.startbyte endbyte(t::AbstractToken) = t.endbyte function untokenize(t::Token) - if t.kind == IDENTIFIER || isliteral(t.kind) || t.kind == COMMENT || t.kind == WHITESPACE || t.kind == ERROR + if t.kind == IDENTIFIER || t.kind == VAR_IDENTIFIER || isliteral(t.kind) || t.kind == COMMENT || t.kind == WHITESPACE || t.kind == ERROR return t.val elseif iskeyword(t.kind) return lowercase(string(t.kind)) diff --git a/JuliaSyntax/src/token_kinds.jl b/JuliaSyntax/src/token_kinds.jl index 89c6928b9ac93..2cafbdcb7f571 100644 --- a/JuliaSyntax/src/token_kinds.jl +++ b/JuliaSyntax/src/token_kinds.jl @@ -4,6 +4,7 @@ COMMENT, # aadsdsa, #= fdsf #= WHITESPACE, # '\n \t' IDENTIFIER, # foo, Σxx + VAR_IDENTIFIER, # var"#1" AT_SIGN, # @ COMMA, #, SEMICOLON, # ; diff --git a/JuliaSyntax/test/lexer.jl b/JuliaSyntax/test/lexer.jl index 6392d806625ab..957cad69ee9f3 100644 --- a/JuliaSyntax/test/lexer.jl +++ b/JuliaSyntax/test/lexer.jl @@ -182,6 +182,12 @@ end @test tok("somtext falsething", 3).kind == T.IDENTIFIER end +@testset "tokenizing var identifiers" begin + t = tok("var\"#1\"") + @test t.kind == T.VAR_IDENTIFIER && untokenize(t) == "var\"#1\"" + t = tok("var\" \"") + @test t.kind == T.VAR_IDENTIFIER && untokenize(t) == "var\" \"" +end @testset "tokenizing juxtaposed numbers and dotted operators/identifiers" begin @test (t->t.val=="1234" && t.kind == Tokens.INTEGER )(tok("1234 .+1")) From c7395aed47177d3fc8e25eeca0ed9d82e8c1663b Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Mon, 29 Nov 2021 15:18:12 +1000 Subject: [PATCH 0208/1109] More comments about tree design --- JuliaSyntax/README.md | 24 +++++++++++++----------- JuliaSyntax/src/syntax_tree.jl | 11 ++++++++--- 2 files changed, 21 insertions(+), 14 deletions(-) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index eb55c1af6d940..b6d93537864db 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -37,19 +37,20 @@ The tree datastructure design here is hard: Let's tackle it by prototyping several important work flows: -1. Precise error reporting in lowering +* Syntax transformations + - Choose some macros to implement +* Refactoring + - A pass to rename local variables +* Precise error reporting in lowering - Syntax desugaring `[a, b] = (c, d)` should report "invalid assignment location `[a, b]`". But at a precise source location. - Try something several layers deeper inside lowering. For example "macro definition not allowed inside a local scope" - -2. Refactoring +* Refactoring - A pass to rename local variables - -3. Incremental reparsing +* Incremental reparsing - Reparse a source file, given a byte range replacement - -4. Formatting +* Formatting - Re-indent a file @@ -131,7 +132,8 @@ to distinguish between infix and prefix. # Resources * [Persistence, façades and Roslyn’s red-green trees](https://ericlippert.com/2012/06/08/red-green-trees/) - - [Roslyn optimization overview](https://github.com/KirillOsenkov/Bliki/wiki/Roslyn-Immutable-Trees -* Andy Chu (the author of the OIL shell) has written some things about syntax - - https://github.com/oilshell/oil/wiki/Lossless-Syntax-Tree-Pattern - - [From AST to Lossless Syntax Tree](https://www.oilshell.org/blog/2017/02/11.html) + - [Roslyn optimization overview](https://github.com/KirillOsenkov/Bliki/wiki/Roslyn-Immutable-Trees) + - [Literate C# Usage Example](https://johtela.github.io/LiterateCS/LiterateCS/BlockBuilder.html) +* Andy Chu (the author of the OIL shell) has written some things about this + - Collected links about lossless syntax in [a wiki page](https://github.com/oilshell/oil/wiki/Lossless-Syntax-Tree-Pattern) + - A blog post [From AST to Lossless Syntax Tree](https://www.oilshell.org/blog/2017/02/11.html) diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index 9d467f68f7ca9..b83fc57bd2fa2 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -1,10 +1,15 @@ #------------------------------------------------------------------------------- # Syntax tree types -# Rules of concrete syntax: +# Desired rules of lossless syntax trees: # -# * Every byte is covered by the tree -# * The children (including trivia) cover the span of the parent +# * Every source byte is covered by the tree +# * The children (including trivia) cover the full span of the parent +# * Children occur in source order +# +# Additionally +# * Nodes should be position-independent so that reparsing doesn't disturb them, +# and so that it's possible to pool and reuse them (especially leaf nodes!) # The rawest version of a parse tree node. struct RawSyntaxNode From 4e337c045baaca7b39d60abaae6bba1b5285e94d Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Mon, 29 Nov 2021 22:04:10 +1000 Subject: [PATCH 0209/1109] Prototype of AST built on top of raw tree. * Tag raw nodes with a bit more information to preserve what the parser knows about them. We keep information about - Whether a node is syntax trivia or not. This can be quite context-dependent. - Ordering of children. For example infix vs prefix call syntax. * Add a SyntaxNode AST type on top of the RawSyntaxNode type. This has - Child ordering consistent with Julia's standard AST - Leaves are always stored in SyntaxNodes, rather than being represented as their values. - Values in leaves are parsed and stored eagerly * Add utilities for printing the source text, highlighting parts based on nodes of the raw or abstract syntax trees. --- JuliaSyntax/src/syntax_tree.jl | 186 ++++++++++++++++++++++++++++++--- JuliaSyntax/src/token_kinds.jl | 4 +- JuliaSyntax/test/runtests.jl | 77 ++++++++------ 3 files changed, 221 insertions(+), 46 deletions(-) diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index b83fc57bd2fa2..79f38f3609603 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -15,26 +15,55 @@ struct RawSyntaxNode kind::Kind span::Int - args::Vector{RawSyntaxNode} + flags::UInt32 + args::Union{Tuple{},Vector{RawSyntaxNode}} # has_diagnostics::Bool end -function RawSyntaxNode(kind::Kind, span::Int) - RawSyntaxNode(kind, span, RawSyntaxNode[]) +const _RawFlags = UInt32 +TRIVIA_FLAG = 0x00000001 +INFIX_FLAG = 0x00000002 + +function raw_flags(; trivia::Bool=false, infix::Bool=false) + flags = _RawFlags(0) + trivia && (flags |= TRIVIA_FLAG) + infix && (flags |= INFIX_FLAG) + return flags::_RawFlags +end + +function RawSyntaxNode(kind::Kind, span::Int, flags::_RawFlags=0x00000000) + RawSyntaxNode(kind, span, flags, ()) end function RawSyntaxNode(raw::TzTokens.RawToken) span = 1 + raw.endbyte - raw.startbyte - RawSyntaxNode(kind(raw), span) + RawSyntaxNode(kind(raw), span, 0, FIXME) end -function RawSyntaxNode(kind::Kind, args::RawSyntaxNode...) +function RawSyntaxNode(kind::Kind, flags::_RawFlags, args::RawSyntaxNode...) span = sum(x.span for x in args) - RawSyntaxNode(kind, span, RawSyntaxNode[args...]) + RawSyntaxNode(kind, span, flags, RawSyntaxNode[args...]) +end + +function RawSyntaxNode(kind::Kind, args::RawSyntaxNode...) + RawSyntaxNode(kind, _RawFlags(0), args...) end -function _show_node(io, node, indent, pos, str) - if isempty(node.args) +# Acessors / predicates +haschildren(node::RawSyntaxNode) = !(node.args isa Tuple{}) +children(node::RawSyntaxNode) = node.args + +istrivia(node::RawSyntaxNode) = node.flags & TRIVIA_FLAG != 0 +isinfix(node::RawSyntaxNode) = node.flags & INFIX_FLAG != 0 + +kind(node::RawSyntaxNode) = node.kind + +# Pretty printing +function _show_raw_node(io, node, indent, pos, str, show_trivia) + if !show_trivia && istrivia(node) + return + end + if !haschildren(node) line = string(rpad(node.span, 4), indent, _kind_str(node.kind)) if isnothing(str) println(io, line) @@ -46,20 +75,151 @@ function _show_node(io, node, indent, pos, str) new_indent = indent*" " p = pos for a in node.args - _show_node(io, a, new_indent, p, str) + _show_raw_node(io, a, new_indent, p, str, show_trivia) p += a.span end end end function Base.show(io::IO, ::MIME"text/plain", node::RawSyntaxNode) - _show_node(io, node, "", 1, nothing) + _show_raw_node(io, node, "", 1, nothing, true) end -function Base.show(io::IO, ::MIME"text/plain", node::RawSyntaxNode, str::String) - _show_node(io, node, "", 1, str) +function Base.show(io::IO, ::MIME"text/plain", node::RawSyntaxNode, str::String; show_trivia=true) + _show_raw_node(io, node, "", 1, str, show_trivia) end -kind(node::RawSyntaxNode) = node.kind +#------------------------------------------------------------------------------- +# AST interface, built on top of raw tree + +mutable struct SyntaxNode + raw::RawSyntaxNode + position::Int + parent::Union{Nothing,SyntaxNode} + head::Symbol + val::Any +end +function SyntaxNode(raw::RawSyntaxNode, position::Int, code::String) + if !haschildren(raw) + # Leaf node + k = raw.kind + val_range = position:position + raw.span - 1 + val_str = @view code[val_range] + # Here we parse the values eagerly rather than representing them as + # strings. Maybe this is good. Maybe not. + if k == K"Integer" + val = Base.parse(Int, val_str) + elseif k == K"Identifier" + val = Symbol(val_str) + elseif isoperator(k) + val = Symbol(val_str) + else + error("Can't parse literal of kind $k") + end + return SyntaxNode(raw, position, nothing, :leaf, val) + else + k = raw.kind + head = k == K"call" ? :call : + k == K"toplevel" ? :toplevel : + k == K"block" ? :block : + k == K"for" ? :for : + k == K"=" ? :(=) : + error("Unknown head of kind $k") + cs = SyntaxNode[] + pos = position + for (i,rawchild) in enumerate(children(raw)) + if !istrivia(rawchild) + push!(cs, SyntaxNode(rawchild, pos, code)) + end + pos += rawchild.span + end + # Julia's standard `Expr` ASTs have children stored in a canonical + # order which is not always source order. + # + # Swizzle the children here as necessary to get the canonical order. + if isinfix(raw) + cs[2], cs[1] = cs[1], cs[2] + end + node = SyntaxNode(raw, position, nothing, head, cs) + for c in cs + c.parent = node + end + return node + end +end +haschildren(node::SyntaxNode) = node.head !== :leaf +children(node::SyntaxNode) = haschildren(node) ? node.val::Vector{SyntaxNode} : () + +function _show_syntax_node(io, node, indent) + if !haschildren(node) + line = string(rpad(node.position, 4), indent, node.val) + println(io, line) + # rpad(line, 40), repr(str[node.position:node.position + node.span - 1])) + else + println(io, rpad(node.position, 4), indent, '[', _kind_str(kind(node.raw)), ']') + new_indent = indent*" " + for n in children(node) + _show_syntax_node(io, n, new_indent) + end + end +end + +function Base.show(io::IO, ::MIME"text/plain", node::SyntaxNode) + _show_syntax_node(io, node, "") +end + + +#------------------------------------------------------------------------------- +# Tree utilities +""" + child(node, i1, i2, ...) + +Get child at a tree path. If indexing accessed children, it would be +`node[i1][i2][...]` +""" +function child(node, path::Integer...) + n = node + for index in path + n = children(n)[index] + end + return n +end + +""" +Get absolute position and span of the child of `node` at the given tree `path`. +""" +function child_position_span(node::RawSyntaxNode, path::Int...) + n = node + p = 1 + for index in path + cs = children(n) + for i = 1:index-1 + p += cs[i].span + end + n = cs[index] + end + return n, p, n.span +end + +function child_position_span(node::SyntaxNode, path::Int...) + n = child(node, path...) + n, n.position, n.raw.span +end + +""" +Print the code, highlighting the part covered by `node` at tree `path`. +""" +function highlight(code::String, node, path::Int...) + node, p, span = child_position_span(node, path...) + q = p + span + print(code[1:p-1]) + first = true + for linepart in split(code[p:q-1], '\n') + first || print('\n') + print("\e[48;2;20;50;20m", linepart, "\e[0;0m") + first = false + end + print(code[q:end]) +end diff --git a/JuliaSyntax/src/token_kinds.jl b/JuliaSyntax/src/token_kinds.jl index 5be275d2a0108..e712027c0abfc 100644 --- a/JuliaSyntax/src/token_kinds.jl +++ b/JuliaSyntax/src/token_kinds.jl @@ -38,7 +38,7 @@ function Base.:(==)(k1::Kind, k2::Kind) end =# -using Tokenize.Tokens: Kind, isliteral, iskeyword +using Tokenize.Tokens: Kind, isliteral, iskeyword, isoperator kind(k::Kind) = k kind(raw::TzTokens.RawToken) = TzTokens.exactkind(raw) @@ -67,7 +67,7 @@ function _kind_str(k::Kind) "N" elseif iskeyword(k) lowercase(string(k)) - elseif TzTokens.isoperator(k) + elseif isoperator(k) string(TzTokens.UNICODE_OPS_REVERSE[k]) elseif k == K"(" "(" diff --git a/JuliaSyntax/test/runtests.jl b/JuliaSyntax/test/runtests.jl index 90777124af4b5..840014c5a1436 100644 --- a/JuliaSyntax/test/runtests.jl +++ b/JuliaSyntax/test/runtests.jl @@ -6,15 +6,18 @@ using Test #end -code = """ -[1,2, 3] -""" +using JuliaSyntax: RawSyntaxNode, SyntaxNode, raw_flags +using JuliaSyntax: Kind, @K_str, children, child -using JuliaSyntax: RawSyntaxNode -using JuliaSyntax: Kind, @K_str - -const N = RawSyntaxNode +# Trivia nodes +T(k, s) = RawSyntaxNode(k, s, raw_flags(trivia=true)) +# Non-trivia nodes +N(k, s) = RawSyntaxNode(k, s) +N(k, args::RawSyntaxNode...) = RawSyntaxNode(k, args...) +# Non-trivia, infix form +NI(k, args::RawSyntaxNode...) = RawSyntaxNode(k, raw_flags(infix=true), args...) +# For this code: code = """ for i = 1:10 a + 2 @@ -23,31 +26,43 @@ for i = 1:10 end """ +# We'd like to produce something the following raw tree t = N(K"for", - N(K"for", 3), - N(K" ", 1), - N(K"=", - N(K"Identifier", 1), - N(K" ", 1), - N(K"=", 1), - N(K" ", 1), - N(K"call", - N(K"Integer", 1), - N(K":", 1), - N(K"Integer", 2))), - N(K"\n", 5), - N(K"call", - N(K"Identifier", 1), - N(K" ", 1), - N(K"+", 1), - N(K" ", 1), - N(K"Integer", 1)), - N(K"\n", 5), - N(K"Comment", 4), - N(K"\n", 5), + T(K"for", 3), + T(K" ", 1), + N(K"=", + N(K"Identifier", 1), + T(K" ", 1), + T(K"=", 1), + T(K" ", 1), + NI(K"call", + N(K"Integer", 1), + N(K":", 1), + N(K"Integer", 2))), + T(K"\n", 5), + N(K"block", + NI(K"call", + N(K"Identifier", 1), + T(K" ", 1), + N(K"+", 1), + T(K" ", 1), + N(K"Integer", 1)), + T(K"\n", 5), + T(K"Comment", 4), + T(K"\n", 5), N(K"Identifier", 1), - N(K"\n", 1), - N(K"end", 3)) + T(K"\n", 1)), + T(K"end", 3)) + +println("\nRawSyntaxNode") +show(stdout, MIME"text/plain"(), t, code, show_trivia=true) + +println("\nSyntaxNode") + +# And the following AST +s = SyntaxNode(t, 1, code) + +#code = "42" +#SyntaxNode(N(K"Integer", 2), 1, code) -show(stdout, MIME"text/plain"(), t, code) From 156ccb9af9a9d812b157f162c677803e972f55f2 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Tue, 30 Nov 2021 13:08:06 +1000 Subject: [PATCH 0210/1109] Add SourceFile abstraction --- JuliaSyntax/src/JuliaSyntax.jl | 4 ++++ JuliaSyntax/src/source_files.jl | 32 ++++++++++++++++++++++++++++++++ JuliaSyntax/src/syntax_tree.jl | 24 ++++++++++-------------- JuliaSyntax/src/utils.jl | 18 ++++++++++++++++++ JuliaSyntax/test/runtests.jl | 17 +++++++++++++++-- 5 files changed, 79 insertions(+), 16 deletions(-) create mode 100644 JuliaSyntax/src/source_files.jl create mode 100644 JuliaSyntax/src/utils.jl diff --git a/JuliaSyntax/src/JuliaSyntax.jl b/JuliaSyntax/src/JuliaSyntax.jl index 25355d5537a0b..23aa4643ac5e1 100644 --- a/JuliaSyntax/src/JuliaSyntax.jl +++ b/JuliaSyntax/src/JuliaSyntax.jl @@ -4,6 +4,10 @@ import Tokenize using Tokenize.Tokens: RawToken const TzTokens = Tokenize.Tokens +include("utils.jl") + +include("source_files.jl") + include("token_kinds.jl") include("lexer.jl") diff --git a/JuliaSyntax/src/source_files.jl b/JuliaSyntax/src/source_files.jl new file mode 100644 index 0000000000000..09b249cfd6a32 --- /dev/null +++ b/JuliaSyntax/src/source_files.jl @@ -0,0 +1,32 @@ +""" + SourceFile(code [, filename]) + +A UTF-8 source code string with associated file name and indexing structures. +""" +struct SourceFile + # We use `code::String` for now but it could be some other UTF-8 based + # string data structure with byte-based indexing. + # + # For example a rope data structure may be good for incremental editing + # https://en.wikipedia.org/wiki/Rope_(data_structure) + code::String + filename::String + # Probably want to maintain a map of byte_offset -> (line, column) here + # somewhere as well. +end + +function SourceFile(code::AbstractString) + SourceFile(code, "unknown.jl") +end + +function Base.show(io::IO, ::MIME"text/plain", source::SourceFile) + header = "Source: $(source.filename)" + print(io, header, '\n', + repeat('-', textwidth(header)), '\n', + source.code) +end + +function Base.getindex(source::SourceFile, rng::AbstractRange) + @view source.code[rng] +end + diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index 79f38f3609603..5ac67a947f23b 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -93,6 +93,7 @@ end # AST interface, built on top of raw tree mutable struct SyntaxNode + source::SourceFile raw::RawSyntaxNode position::Int parent::Union{Nothing,SyntaxNode} @@ -100,12 +101,12 @@ mutable struct SyntaxNode val::Any end -function SyntaxNode(raw::RawSyntaxNode, position::Int, code::String) +function SyntaxNode(source::SourceFile, raw::RawSyntaxNode, position::Int) if !haschildren(raw) # Leaf node k = raw.kind val_range = position:position + raw.span - 1 - val_str = @view code[val_range] + val_str = source[val_range] # Here we parse the values eagerly rather than representing them as # strings. Maybe this is good. Maybe not. if k == K"Integer" @@ -117,7 +118,7 @@ function SyntaxNode(raw::RawSyntaxNode, position::Int, code::String) else error("Can't parse literal of kind $k") end - return SyntaxNode(raw, position, nothing, :leaf, val) + return SyntaxNode(source, raw, position, nothing, :leaf, val) else k = raw.kind head = k == K"call" ? :call : @@ -130,7 +131,7 @@ function SyntaxNode(raw::RawSyntaxNode, position::Int, code::String) pos = position for (i,rawchild) in enumerate(children(raw)) if !istrivia(rawchild) - push!(cs, SyntaxNode(rawchild, pos, code)) + push!(cs, SyntaxNode(source, rawchild, pos)) end pos += rawchild.span end @@ -141,7 +142,7 @@ function SyntaxNode(raw::RawSyntaxNode, position::Int, code::String) if isinfix(raw) cs[2], cs[1] = cs[1], cs[2] end - node = SyntaxNode(raw, position, nothing, head, cs) + node = SyntaxNode(source, raw, position, nothing, head, cs) for c in cs c.parent = node end @@ -211,15 +212,10 @@ end """ Print the code, highlighting the part covered by `node` at tree `path`. """ -function highlight(code::String, node, path::Int...) +function highlight(code::String, node, path::Int...; color=(40,40,70)) node, p, span = child_position_span(node, path...) q = p + span - print(code[1:p-1]) - first = true - for linepart in split(code[p:q-1], '\n') - first || print('\n') - print("\e[48;2;20;50;20m", linepart, "\e[0;0m") - first = false - end - print(code[q:end]) + print(stdout, code[1:p-1]) + _printstyled(stdout, code[p:q-1]; color) + print(stdout, code[q:end]) end diff --git a/JuliaSyntax/src/utils.jl b/JuliaSyntax/src/utils.jl new file mode 100644 index 0000000000000..1458cab19b768 --- /dev/null +++ b/JuliaSyntax/src/utils.jl @@ -0,0 +1,18 @@ + +""" + Like printstyled, but allows providing RGB colors for true color terminals +""" +function _printstyled(io::IO, text; color) + if length(color) != 3 || !all(0 .<= color .< 256) + error("Invalid ansi color $color") + end + colcode = "\e[48;2;$(color[1]);$(color[2]);$(color[3])m" + colreset = "\e[0;0m" + first = true + for linepart in split(text, '\n') + first || print('\n') + print(colcode, linepart, colreset) + first = false + end +end + diff --git a/JuliaSyntax/test/runtests.jl b/JuliaSyntax/test/runtests.jl index 840014c5a1436..308ea2ec3c6c0 100644 --- a/JuliaSyntax/test/runtests.jl +++ b/JuliaSyntax/test/runtests.jl @@ -6,8 +6,10 @@ using Test #end +using JuliaSyntax: SourceFile using JuliaSyntax: RawSyntaxNode, SyntaxNode, raw_flags using JuliaSyntax: Kind, @K_str, children, child +using JuliaSyntax: highlight # Trivia nodes T(k, s) = RawSyntaxNode(k, s, raw_flags(trivia=true)) @@ -26,6 +28,8 @@ for i = 1:10 end """ +source = SourceFile(code) + # We'd like to produce something the following raw tree t = N(K"for", @@ -40,8 +44,8 @@ N(K"for", N(K"Integer", 1), N(K":", 1), N(K"Integer", 2))), - T(K"\n", 5), N(K"block", + T(K"\n", 5), NI(K"call", N(K"Identifier", 1), T(K" ", 1), @@ -61,8 +65,17 @@ show(stdout, MIME"text/plain"(), t, code, show_trivia=true) println("\nSyntaxNode") # And the following AST -s = SyntaxNode(t, 1, code) +s = SyntaxNode(source, t, 1) #code = "42" #SyntaxNode(N(K"Integer", 2), 1, code) +# Simulate the following Undescores.jl - like macro: + +# @U f(x+_, y) +# ↦ +# f(_1 -> x+_1, y) + +# macro U(ex) +# end + From a28f17a3f884f880bd05c995ceab3feaf7f91285 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Tue, 30 Nov 2021 16:59:15 +1000 Subject: [PATCH 0211/1109] SourceFile: add utility to look up line numbers from byte offset. Also fix two-argument form of show() for SyntaxNode to use a simple S-Expr --- JuliaSyntax/src/source_files.jl | 22 +++++++++++++++---- JuliaSyntax/src/syntax_tree.jl | 38 +++++++++++++++++++++++++++++++-- JuliaSyntax/test/runtests.jl | 2 +- 3 files changed, 55 insertions(+), 7 deletions(-) diff --git a/JuliaSyntax/src/source_files.jl b/JuliaSyntax/src/source_files.jl index 09b249cfd6a32..871f277c5e538 100644 --- a/JuliaSyntax/src/source_files.jl +++ b/JuliaSyntax/src/source_files.jl @@ -11,12 +11,22 @@ struct SourceFile # https://en.wikipedia.org/wiki/Rope_(data_structure) code::String filename::String - # Probably want to maintain a map of byte_offset -> (line, column) here - # somewhere as well. + # String index of start of every line + line_starts::Vector{Int} end -function SourceFile(code::AbstractString) - SourceFile(code, "unknown.jl") +function SourceFile(code::AbstractString; filename="unknown.jl") + line_starts = Int[0] + for i in eachindex(code) + # The line is considered to start after the `\n` + code[i] == '\n' && push!(line_starts, i+1) + end + SourceFile(code, filename, line_starts) +end + +# Get line number of the given byte within the code +function line_number(source::SourceFile, byte_index) + searchsortedlast(source.line_starts, byte_index) end function Base.show(io::IO, ::MIME"text/plain", source::SourceFile) @@ -30,3 +40,7 @@ function Base.getindex(source::SourceFile, rng::AbstractRange) @view source.code[rng] end +function Base.getindex(source::SourceFile, i::Int) + source.code[i] +end + diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index 5ac67a947f23b..e9242674e6a62 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -154,12 +154,20 @@ haschildren(node::SyntaxNode) = node.head !== :leaf children(node::SyntaxNode) = haschildren(node) ? node.val::Vector{SyntaxNode} : () function _show_syntax_node(io, node, indent) + pos_width = 20 + fname = node.source.filename + maxw = pos_width - 5 + if length(fname) > maxw + fname = node.source.filename[nextind(node.source.filename, end-maxw-1):end] + end + lno = (line_number(node.source, node.position)) + pos = rpad("$fname:$lno", pos_width)*"│" if !haschildren(node) - line = string(rpad(node.position, 4), indent, node.val) + line = string(pos, indent, node.val) println(io, line) # rpad(line, 40), repr(str[node.position:node.position + node.span - 1])) else - println(io, rpad(node.position, 4), indent, '[', _kind_str(kind(node.raw)), ']') + println(io, pos, indent, '[', _kind_str(kind(node.raw)), ']') new_indent = indent*" " for n in children(node) _show_syntax_node(io, n, new_indent) @@ -167,10 +175,36 @@ function _show_syntax_node(io, node, indent) end end +function _show_syntax_node_compact(io, node) + if !haschildren(node) + print(io, node.val) + else + print(io, "($(_kind_str(kind(node.raw))) ") + first = true + for n in children(node) + first || print(io, ' ') + _show_syntax_node_compact(io, n) + first = false + end + print(io, ')') + end +end + function Base.show(io::IO, ::MIME"text/plain", node::SyntaxNode) _show_syntax_node(io, node, "") end +function Base.show(io::IO, node::SyntaxNode) + _show_syntax_node_compact(io, node) +end + +function Base.push!(node::SyntaxNode, child::SyntaxNode) + if !haschildren(node) + error("Cannot add children") + end + args = node.val::Vector{SyntaxNode} + push!(args, child) +end #------------------------------------------------------------------------------- # Tree utilities diff --git a/JuliaSyntax/test/runtests.jl b/JuliaSyntax/test/runtests.jl index 308ea2ec3c6c0..3f33b29693a71 100644 --- a/JuliaSyntax/test/runtests.jl +++ b/JuliaSyntax/test/runtests.jl @@ -28,7 +28,7 @@ for i = 1:10 end """ -source = SourceFile(code) +source = SourceFile(code, filename="none.jl") # We'd like to produce something the following raw tree t = From bafb771ca3a69cb3165b50cdc73bb2b46e50ae92 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Tue, 30 Nov 2021 23:03:40 +1000 Subject: [PATCH 0212/1109] Use case: show how SyntaxNode works for macro expansion This shows that SyntaxNode works nicely for simple macros based on interpolating expressions into one another. In particular it shows how precise source information from multiple files can coexist within the same syntax tree. --- JuliaSyntax/src/source_files.jl | 13 ++--- JuliaSyntax/src/syntax_tree.jl | 25 ++++++--- JuliaSyntax/test/runtests.jl | 92 ++++++++++++++++++++++++++++++--- 3 files changed, 110 insertions(+), 20 deletions(-) diff --git a/JuliaSyntax/src/source_files.jl b/JuliaSyntax/src/source_files.jl index 871f277c5e538..567eb4fa8b273 100644 --- a/JuliaSyntax/src/source_files.jl +++ b/JuliaSyntax/src/source_files.jl @@ -10,12 +10,12 @@ struct SourceFile # For example a rope data structure may be good for incremental editing # https://en.wikipedia.org/wiki/Rope_(data_structure) code::String - filename::String + filename::Union{Nothing,String} # String index of start of every line line_starts::Vector{Int} end -function SourceFile(code::AbstractString; filename="unknown.jl") +function SourceFile(code::AbstractString; filename=nothing) line_starts = Int[0] for i in eachindex(code) # The line is considered to start after the `\n` @@ -30,10 +30,11 @@ function line_number(source::SourceFile, byte_index) end function Base.show(io::IO, ::MIME"text/plain", source::SourceFile) - header = "Source: $(source.filename)" - print(io, header, '\n', - repeat('-', textwidth(header)), '\n', - source.code) + if !isnothing(source.filename) + print(io, source.filename, '\n', + repeat('-', textwidth(source.filename)), '\n') + end + print(io, source.code) end function Base.getindex(source::SourceFile, rng::AbstractRange) diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index e9242674e6a62..48f5ac32bc5cf 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -101,7 +101,7 @@ mutable struct SyntaxNode val::Any end -function SyntaxNode(source::SourceFile, raw::RawSyntaxNode, position::Int) +function SyntaxNode(source::SourceFile, raw::RawSyntaxNode, position::Integer=1) if !haschildren(raw) # Leaf node k = raw.kind @@ -113,6 +113,8 @@ function SyntaxNode(source::SourceFile, raw::RawSyntaxNode, position::Int) val = Base.parse(Int, val_str) elseif k == K"Identifier" val = Symbol(val_str) + elseif k == K"String" + val = unescape_string(source[position+1:position+raw.span-2]) elseif isoperator(k) val = Symbol(val_str) else @@ -126,6 +128,8 @@ function SyntaxNode(source::SourceFile, raw::RawSyntaxNode, position::Int) k == K"block" ? :block : k == K"for" ? :for : k == K"=" ? :(=) : + k == K"$" ? :$ : + k == K"quote" ? :quote : error("Unknown head of kind $k") cs = SyntaxNode[] pos = position @@ -150,20 +154,27 @@ function SyntaxNode(source::SourceFile, raw::RawSyntaxNode, position::Int) end end +function interpolate_literal(node::SyntaxNode, val) + @assert node.head == :$ + SyntaxNode(node.source, node.raw, node.position, node.parent, :leaf, val) +end + haschildren(node::SyntaxNode) = node.head !== :leaf children(node::SyntaxNode) = haschildren(node) ? node.val::Vector{SyntaxNode} : () function _show_syntax_node(io, node, indent) pos_width = 20 fname = node.source.filename - maxw = pos_width - 5 - if length(fname) > maxw - fname = node.source.filename[nextind(node.source.filename, end-maxw-1):end] + if !isnothing(fname) + maxw = pos_width - 5 + if length(fname) > maxw + fname = fname[nextind(fname, end-maxw-1):end] + end end lno = (line_number(node.source, node.position)) - pos = rpad("$fname:$lno", pos_width)*"│" + pos = rpad("$fname:$lno", pos_width)*" │" if !haschildren(node) - line = string(pos, indent, node.val) + line = string(pos, indent, repr(node.val)) println(io, line) # rpad(line, 40), repr(str[node.position:node.position + node.span - 1])) else @@ -177,7 +188,7 @@ end function _show_syntax_node_compact(io, node) if !haschildren(node) - print(io, node.val) + print(io, repr(node.val)) else print(io, "($(_kind_str(kind(node.raw))) ") first = true diff --git a/JuliaSyntax/test/runtests.jl b/JuliaSyntax/test/runtests.jl index 3f33b29693a71..ae578b0d314a5 100644 --- a/JuliaSyntax/test/runtests.jl +++ b/JuliaSyntax/test/runtests.jl @@ -65,17 +65,95 @@ show(stdout, MIME"text/plain"(), t, code, show_trivia=true) println("\nSyntaxNode") # And the following AST -s = SyntaxNode(source, t, 1) +s = SyntaxNode(source, t) #code = "42" #SyntaxNode(N(K"Integer", 2), 1, code) -# Simulate the following Undescores.jl - like macro: +#------------------------------------------------------------------------------- +# The following shows that SyntaxNode works nicely for simple macros based on +# interpolating expressions into one another. In particular it shows how +# precise source information from multiple files can coexist within the same +# syntax tree. -# @U f(x+_, y) -# ↦ -# f(_1 -> x+_1, y) +# First, here's the functionality that we're going to implement as a normal +# Julia macro. It's similar to the standard @show macro. +macro show2(ex) + name = sprint(Base.show_unquoted, ex) + quote + value = $(esc(ex)) + println($name, " = ", value) + value + end +end + +# Now, how would this be implemented if we were to do it with SyntaxNode? +# We don't have a parser which is capable of producing our tree structures yet, +# so we need to hand construct all our trees. +function at_show2(ex::SyntaxNode) + code = String(read(@__FILE__)) + name = sprint(JuliaSyntax._show_syntax_node_compact, ex) + quote_begin = (@__LINE__) + 1 + quote + value = $ex + println($name, " = ", value) + value + end + raw = N(K"block", + T(K"quote", 5), + T(K"\n", 9), + N(K"=", + N(K"Identifier", 5), + T(K" ", 1), + T(K"=", 1), + T(K" ", 1), + N(K"$", + T(K"$", 1), + N(K"Identifier", 2)), + T(K"\n", 9)), + N(K"call", + N(K"Identifier", 7), + T(K"(", 1), + N(K"$", + T(K"$", 1), + N(K"Identifier", 4)), + T(K",", 1), + T(K" ", 1), + N(K"String", 5), + T(K",", 1), + T(K" ", 1), + N(K"Identifier", 5), + T(K")", 1)), + T(K"\n", 9), + N(K"Identifier", 5), + T(K"\n", 5), + T(K"end", 3)) + source = SourceFile(code, filename=@__FILE__) + block = SyntaxNode(source, raw, source.line_starts[quote_begin]+4) + # Now that we have the block, we need to interpolate into it. + + # Inserting a SyntaxNode `ex` is simple: + block.val[1].val[2] = ex + # The interpolation of a Julia *value* should inherit the source location + # of the $ interpolation expression. This is different to the + # interpolation of a SyntaxNode, which should just be inserted as-is. + block.val[2].val[2] = JuliaSyntax.interpolate_literal(block.val[2].val[2], name) + block +end + +# Usage of at_show2() -# macro U(ex) -# end +# Let's have some simple expression to pass to at_show2. This will be +# attributed to a different file foo.jl +code2 = "foo + 42" +source2 = SourceFile(code2, filename="foo.jl") +s2 = SyntaxNode(source2, NI(K"call", + N(K"Identifier", 3), + T(K" ", 1), + N(K"+", 1), + T(K" ", 1), + N(K"Integer", 2))) +# Calling at_show2, we see that the precise source information is preserved for +# both the surrounding expression and the interpolated fragments. +show(stdout, MIME"text/plain"(), at_show2(s2)) From 7d1f0686c43ce54feff6fe819d77045c8f0b396e Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Tue, 30 Nov 2021 23:27:40 +1000 Subject: [PATCH 0213/1109] Add column lookup to SourceFile and use it in SyntaxNode display --- JuliaSyntax/src/source_files.jl | 16 +++++++++++++++- JuliaSyntax/src/syntax_tree.jl | 8 ++++---- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/JuliaSyntax/src/source_files.jl b/JuliaSyntax/src/source_files.jl index 567eb4fa8b273..0d914445668f4 100644 --- a/JuliaSyntax/src/source_files.jl +++ b/JuliaSyntax/src/source_files.jl @@ -16,7 +16,7 @@ struct SourceFile end function SourceFile(code::AbstractString; filename=nothing) - line_starts = Int[0] + line_starts = Int[1] for i in eachindex(code) # The line is considered to start after the `\n` code[i] == '\n' && push!(line_starts, i+1) @@ -29,6 +29,20 @@ function line_number(source::SourceFile, byte_index) searchsortedlast(source.line_starts, byte_index) end +""" +Get line number and character within the line at the given byte index. +""" +function source_location(source::SourceFile, byte_index) + line = searchsortedlast(source.line_starts, byte_index) + i = source.line_starts[line] + column = 1 + while i < byte_index + i = nextind(source.code, i) + column += 1 + end + line, column +end + function Base.show(io::IO, ::MIME"text/plain", source::SourceFile) if !isnothing(source.filename) print(io, source.filename, '\n', diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index 48f5ac32bc5cf..18a19f1cae1c1 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -163,16 +163,16 @@ haschildren(node::SyntaxNode) = node.head !== :leaf children(node::SyntaxNode) = haschildren(node) ? node.val::Vector{SyntaxNode} : () function _show_syntax_node(io, node, indent) - pos_width = 20 + pos_width = 25 fname = node.source.filename if !isnothing(fname) - maxw = pos_width - 5 + maxw = pos_width - 10 if length(fname) > maxw fname = fname[nextind(fname, end-maxw-1):end] end end - lno = (line_number(node.source, node.position)) - pos = rpad("$fname:$lno", pos_width)*" │" + line, col = source_location(node.source, node.position) + pos = rpad("$fname:$line:$col", pos_width)*" │" if !haschildren(node) line = string(pos, indent, repr(node.val)) println(io, line) From 4af12a71a8cdfc4b702beded7c2e34a0edcf27a2 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Tue, 30 Nov 2021 23:37:33 +1000 Subject: [PATCH 0214/1109] More detailed thoughts about prototyping of the tree data structures. --- JuliaSyntax/README.md | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index b6d93537864db..c91ada17c67cd 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -38,20 +38,21 @@ The tree datastructure design here is hard: Let's tackle it by prototyping several important work flows: * Syntax transformations - - Choose some macros to implement + - Choose some macros to implement. This is a basic test of mixing source + trees from different files while preserving precise source locations. +* Formatting + - Re-indent a file. This tests the handling of syntax trivia. * Refactoring - - A pass to rename local variables + - A pass to rename local variables. This tests how information from further + down the compilation pipeline can be attached to the syntax tree and used + to modify the source code. * Precise error reporting in lowering - Syntax desugaring `[a, b] = (c, d)` should report "invalid assignment location `[a, b]`". But at a precise source location. - - Try something several layers deeper inside lowering. For example "macro + - Try something several layers deeper inside lowering? For example "macro definition not allowed inside a local scope" -* Refactoring - - A pass to rename local variables * Incremental reparsing - Reparse a source file, given a byte range replacement -* Formatting - - Re-indent a file ## Tree design From 51781f23f2cfc6130f5f6437f3701d4b2209e6e4 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Wed, 1 Dec 2021 13:41:54 +1000 Subject: [PATCH 0215/1109] Improve pretty printing of SyntaxNode with byte ranges and file names --- JuliaSyntax/src/syntax_tree.jl | 35 +++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index 18a19f1cae1c1..6f4bd5890c916 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -162,26 +162,26 @@ end haschildren(node::SyntaxNode) = node.head !== :leaf children(node::SyntaxNode) = haschildren(node) ? node.val::Vector{SyntaxNode} : () -function _show_syntax_node(io, node, indent) - pos_width = 25 +function _show_syntax_node(io, current_filename, node, indent) fname = node.source.filename - if !isnothing(fname) - maxw = pos_width - 10 - if length(fname) > maxw - fname = fname[nextind(fname, end-maxw-1):end] - end - end + #@info "" fname print_fname current_filename[] line, col = source_location(node.source, node.position) - pos = rpad("$fname:$line:$col", pos_width)*" │" - if !haschildren(node) - line = string(pos, indent, repr(node.val)) - println(io, line) - # rpad(line, 40), repr(str[node.position:node.position + node.span - 1])) - else - println(io, pos, indent, '[', _kind_str(kind(node.raw)), ']') + posstr = "$(lpad(line, 4)):$(rpad(col,3))│$(lpad(node.position,6)):$(rpad(node.position+node.raw.span,6))│" + nodestr = !haschildren(node) ? + repr(node.val) : + "[$(_kind_str(kind(node.raw)))]" + treestr = string(indent, nodestr) + # Add filename if it's changed from the previous node + if fname != current_filename[] + #println(io, "# ", fname) + treestr = string(rpad(treestr, 40), "│$fname") + current_filename[] = fname + end + println(io, posstr, treestr) + if haschildren(node) new_indent = indent*" " for n in children(node) - _show_syntax_node(io, n, new_indent) + _show_syntax_node(io, current_filename, n, new_indent) end end end @@ -202,7 +202,8 @@ function _show_syntax_node_compact(io, node) end function Base.show(io::IO, ::MIME"text/plain", node::SyntaxNode) - _show_syntax_node(io, node, "") + println(io, "line:col│ byte_range │ tree │ file_name") + _show_syntax_node(io, Ref{Union{Nothing,String}}(nothing), node, "") end function Base.show(io::IO, node::SyntaxNode) From 3e37e3ae65e1266b593bf521a6203cf883c7a29f Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Wed, 1 Dec 2021 16:45:59 +1000 Subject: [PATCH 0216/1109] Comparisions with the rust-analyzer design --- JuliaSyntax/README.md | 42 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 39 insertions(+), 3 deletions(-) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index c91ada17c67cd..8aade1f74fdad 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -132,9 +132,45 @@ to distinguish between infix and prefix. # Resources -* [Persistence, façades and Roslyn’s red-green trees](https://ericlippert.com/2012/06/08/red-green-trees/) - - [Roslyn optimization overview](https://github.com/KirillOsenkov/Bliki/wiki/Roslyn-Immutable-Trees) - - [Literate C# Usage Example](https://johtela.github.io/LiterateCS/LiterateCS/BlockBuilder.html) +## C# Roslyn + +[Persistence, façades and Roslyn’s red-green trees](https://ericlippert.com/2012/06/08/red-green-trees/) +* [Roslyn optimization overview](https://github.com/KirillOsenkov/Bliki/wiki/Roslyn-Immutable-Trees) +* [Literate C# Usage Example](https://johtela.github.io/LiterateCS/LiterateCS/BlockBuilder.html) + +## + +## Oil shell * Andy Chu (the author of the OIL shell) has written some things about this - Collected links about lossless syntax in [a wiki page](https://github.com/oilshell/oil/wiki/Lossless-Syntax-Tree-Pattern) - A blog post [From AST to Lossless Syntax Tree](https://www.oilshell.org/blog/2017/02/11.html) + +## Rust-analyzer + +`rust-analyzer` seems to be very close to what I'm buildin here, and has come +to the same conclusions on green tree layout with explicit trivia nodes. Their +document on internals +[here](https://github.com/rust-analyzer/rust-analyzer/blob/master/docs/dev/syntax.md) +is great. Points of note: + +* They have *three* trees! + 1. Green trees exactly like mine (pretty much all the same design + decisions, including trivia storage) + 2. Untyped red syntax trees somewhat like ours, but much more minimal. For + example, these don't attempt to reorder children. + 3. A typed AST layer with a type for each expression head. The AST searches + for children by dynamically traversing the child list each time, rather + than having a single canonical ordering. +* "Parser does not see whitespace nodes. Instead, they are attached to the + tree in the TreeSink layer." This may be relevant to us - it's a pain to + attach whitespace to otherwise significant tokens, and inefficient to + allocate and pass around a list of whitespace trivia. +* "In practice, incremental reparsing doesn't actually matter much for IDE + use-cases, parsing from scratch seems to be fast enough." +* There's various comments about macros... Rust macro expansion seems quite + different from Julia (it appears it may be interleaved with parsing??) + +In general I think it's unclear whether we want typed ASTs in Julia and we +particularly need to deal with the fact that `Expr` is the existing public +interface. Could we have `Expr2` wrap `SyntaxNode`? + From e7c5e6db029f9e2067e869cbcb407f3cb794e71d Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Wed, 1 Dec 2021 16:47:52 +1000 Subject: [PATCH 0217/1109] Store UInt32 span in RawSyntaxNode + improve printing --- JuliaSyntax/src/syntax_tree.jl | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index 6f4bd5890c916..fe18facf1ec72 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -14,7 +14,7 @@ # The rawest version of a parse tree node. struct RawSyntaxNode kind::Kind - span::Int + span::UInt32 flags::UInt32 args::Union{Tuple{},Vector{RawSyntaxNode}} # has_diagnostics::Bool @@ -63,15 +63,16 @@ function _show_raw_node(io, node, indent, pos, str, show_trivia) if !show_trivia && istrivia(node) return end + posstr = "$(lpad(pos, 6)):$(rpad(pos+node.span, 6)) |" if !haschildren(node) - line = string(rpad(node.span, 4), indent, _kind_str(node.kind)) + line = string(posstr, indent, _kind_str(node.kind)) if isnothing(str) println(io, line) else println(io, rpad(line, 40), repr(str[pos:pos + node.span - 1])) end else - println(io, rpad(node.span, 4), indent, '[', _kind_str(node.kind), "]") + println(io, posstr, indent, '[', _kind_str(node.kind), "]") new_indent = indent*" " p = pos for a in node.args From df615a34769e05b4e9f34ed82f9e69aed3fa43e3 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Wed, 1 Dec 2021 16:48:21 +1000 Subject: [PATCH 0218/1109] setchild! for setting SyntaxNode children --- JuliaSyntax/src/syntax_tree.jl | 21 +++++++++++++++++++++ JuliaSyntax/test/runtests.jl | 30 +++++++++++++++++++++++------- 2 files changed, 44 insertions(+), 7 deletions(-) diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index fe18facf1ec72..2634120a5f097 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -235,6 +235,27 @@ function child(node, path::Integer...) return n end +function setchild!(node::SyntaxNode, path, x) + n1 = child(node, path[1:end-1]...) + n1.val[path[end]] = x +end + +# We can overload multidimensional Base.getindex / Base.setindex! for node +# types. +# +# The justification for this is to view a tree as a multidimensional ragged +# array, where descending depthwise into the tree corresponds to dimensions of +# the array. +# +# However... this analogy is only good for complete trees at a given depth (= +# dimension). But the syntax is oh-so-handy! +function Base.getindex(node::Union{SyntaxNode,RawSyntaxNode}, path::Int...) + child(node, path...) +end +function Base.setindex!(node::SyntaxNode, x::SyntaxNode, path::Int...) + setchild!(node, path, x) +end + """ Get absolute position and span of the child of `node` at the given tree `path`. """ diff --git a/JuliaSyntax/test/runtests.jl b/JuliaSyntax/test/runtests.jl index ae578b0d314a5..691a4f272c3ef 100644 --- a/JuliaSyntax/test/runtests.jl +++ b/JuliaSyntax/test/runtests.jl @@ -8,9 +8,12 @@ using Test using JuliaSyntax: SourceFile using JuliaSyntax: RawSyntaxNode, SyntaxNode, raw_flags -using JuliaSyntax: Kind, @K_str, children, child +using JuliaSyntax: Kind, @K_str, children, child, setchild! using JuliaSyntax: highlight +#------------------------------------------------------------------------------- +# Raw syntax tree and AST layering + # Trivia nodes T(k, s) = RawSyntaxNode(k, s, raw_flags(trivia=true)) # Non-trivia nodes @@ -59,18 +62,21 @@ N(K"for", T(K"\n", 1)), T(K"end", 3)) +# And the following AST +s = SyntaxNode(source, t) + println("\nRawSyntaxNode") show(stdout, MIME"text/plain"(), t, code, show_trivia=true) println("\nSyntaxNode") - -# And the following AST -s = SyntaxNode(source, t) +show(stdout, MIME"text/plain"(), s) #code = "42" #SyntaxNode(N(K"Integer", 2), 1, code) #------------------------------------------------------------------------------- +# # Macros and expression interpolation + # The following shows that SyntaxNode works nicely for simple macros based on # interpolating expressions into one another. In particular it shows how # precise source information from multiple files can coexist within the same @@ -93,6 +99,8 @@ end function at_show2(ex::SyntaxNode) code = String(read(@__FILE__)) name = sprint(JuliaSyntax._show_syntax_node_compact, ex) + # The following quote block is not used directly, but the text for it is + # re-read from `code`. quote_begin = (@__LINE__) + 1 quote value = $ex @@ -132,12 +140,13 @@ function at_show2(ex::SyntaxNode) block = SyntaxNode(source, raw, source.line_starts[quote_begin]+4) # Now that we have the block, we need to interpolate into it. - # Inserting a SyntaxNode `ex` is simple: - block.val[1].val[2] = ex + # Interpolating a SyntaxNode `ex` is simple: + setchild!(block, (1, 2), ex) # The interpolation of a Julia *value* should inherit the source location # of the $ interpolation expression. This is different to the # interpolation of a SyntaxNode, which should just be inserted as-is. - block.val[2].val[2] = JuliaSyntax.interpolate_literal(block.val[2].val[2], name) + setchild!(block, (2, 2), + JuliaSyntax.interpolate_literal(block.val[2].val[2], name)) block end @@ -156,4 +165,11 @@ s2 = SyntaxNode(source2, NI(K"call", # Calling at_show2, we see that the precise source information is preserved for # both the surrounding expression and the interpolated fragments. +println("\nInterpolation example") show(stdout, MIME"text/plain"(), at_show2(s2)) + + +#------------------------------------------------------------------------------- +# # Formatting + + From 224aeb2de2de0be38a74c3d6f9914507c92c0788 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Fri, 3 Dec 2021 12:38:16 +1000 Subject: [PATCH 0219/1109] More comparative notes to rust-analyzer --- JuliaSyntax/README.md | 39 ++++++++++++++++++++++++++++++++++----- JuliaSyntax/src/lexer.jl | 16 ++++++++++++++++ 2 files changed, 50 insertions(+), 5 deletions(-) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index 8aade1f74fdad..8060976fb8b73 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -155,18 +155,22 @@ is great. Points of note: * They have *three* trees! 1. Green trees exactly like mine (pretty much all the same design - decisions, including trivia storage) - 2. Untyped red syntax trees somewhat like ours, but much more minimal. For + decisions, including trivia storage). Though note that the team are still + [toying with](https://github.com/rust-analyzer/rust-analyzer/issues/6584) + the idea of using the Roslyn model of trivia. + 2. Untyped red syntax trees somewhat like mine, but much more minimal. For example, these don't attempt to reorder children. 3. A typed AST layer with a type for each expression head. The AST searches for children by dynamically traversing the child list each time, rather - than having a single canonical ordering. + than having a single canonical ordering or remembering the placement of + children which the parser knew. * "Parser does not see whitespace nodes. Instead, they are attached to the tree in the TreeSink layer." This may be relevant to us - it's a pain to attach whitespace to otherwise significant tokens, and inefficient to - allocate and pass around a list of whitespace trivia. + allocate and pass around a dynamic list of whitespace trivia. * "In practice, incremental reparsing doesn't actually matter much for IDE - use-cases, parsing from scratch seems to be fast enough." + use-cases, parsing from scratch seems to be fast enough." (I wonder why + they've implemented incremental parsing then?) * There's various comments about macros... Rust macro expansion seems quite different from Julia (it appears it may be interleaved with parsing??) @@ -174,3 +178,28 @@ In general I think it's unclear whether we want typed ASTs in Julia and we particularly need to deal with the fact that `Expr` is the existing public interface. Could we have `Expr2` wrap `SyntaxNode`? +* A related very useful set of blog posts which discuss using the rust syntax + tree library (rowan) for representing of a non-rust toy language is here + https://dev.to/cad97/lossless-syntax-trees-280c + +Not all the design decisions in `rust-analyzer` are finalized but the +[architecture document](https://github.com/rust-analyzer/rust-analyzer/blob/master/docs/dev/architecture.md) +is a fantastic source of design inspiration. + +Highlights: +* "The parser is independent of the particular tree structure and particular + representation of the tokens. It transforms one flat stream of events into + another flat stream of events." This seems great, let's adopt it! +* TODO + + +## General resources about parsing + +* [Modern parser generator](https://matklad.github.io/2018/06/06/modern-parser-generator.html) + has a lot of practical notes on writing parsers. Highlights: + - Encourages writing tests for handwritten parsers as inline comments + - Mentions [Pratt parsers](http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/) for operator precedence. + - Some discussion of error recovery + +## `rust-analyzer` + diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index 339ebce59c3dc..08b72cb7cce74 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -51,6 +51,22 @@ mutable struct TokenStream hasnext2::Bool end +# TODO: replace TokenStream with "ParseStream"/"ParserIO"/? interface +# +# This would be an I/O interface for the parser +# - Input: Provides input tokens to the parser +# - Output: Accepts tree output events from the parser +# +# Such an interface can be used to decouple parsing from the input and output +# representations as is done in rust-analyzer's TreeSink. Part of the point of +# this is to have a place to preserve whitespace trivia outside the parser. ( +# The rust TextTreeSink is oddly named, as it appears to be used for both +# getting tokens and emitting nodes... see +# https://github.com/rust-analyzer/rust-analyzer/blob/4691a0647b2c96cc475d8bbe7c31fe194d1443e7/crates/syntax/src/parsing/text_tree_sink.rs ) +# +# struct ParseStream +# end + function TokenStream(code) lexer = Tokenize.tokenize(code, RawToken) TokenStream(lexer, EMPTY_TOKEN, EMPTY_TOKEN, false, false) From c043170494b33bec8514db066236401bff306f94 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Fri, 3 Dec 2021 22:10:14 +1000 Subject: [PATCH 0220/1109] More notes about syntax tree design options --- JuliaSyntax/src/syntax_tree.jl | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index 2634120a5f097..be94c02881397 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -11,18 +11,35 @@ # * Nodes should be position-independent so that reparsing doesn't disturb them, # and so that it's possible to pool and reuse them (especially leaf nodes!) -# The rawest version of a parse tree node. +""" +The rawest version of a lossless syntax tree. + +Design principles: +* Tree should remember what the lexer and parser knew about the source code +* Be position-independent so nodes can be interned and reused + +Design alternatives to explore: +* Maybe allow some loss of local parser state if it can be derived again + quickly? Particularly in the ordering of children. +* Store strings for tokens? (Surprisingly, rust-analyzer does this. It could be + efficient if the strings or nodes are interned for the parsing session?) +* Never construct this tree? Instead serialize it to Vector{UInt8} in an + efficient but compact format? Could this be more flexible with storing parser + state and beat the interning approach? We could also store the source tokens + in the serialization and discard the source text. (Caveat - unclear that this + could deal with incremental parsing...) +""" struct RawSyntaxNode kind::Kind span::UInt32 flags::UInt32 args::Union{Tuple{},Vector{RawSyntaxNode}} - # has_diagnostics::Bool end const _RawFlags = UInt32 TRIVIA_FLAG = 0x00000001 INFIX_FLAG = 0x00000002 +# DIAGNOSTICS_FLAG function raw_flags(; trivia::Bool=false, infix::Bool=false) flags = _RawFlags(0) @@ -93,6 +110,11 @@ end #------------------------------------------------------------------------------- # AST interface, built on top of raw tree +""" +Design options: +* rust-analyzer treats their version of an untyped syntax node as a cursor into + the green tree. They deallocate aggressively. +""" mutable struct SyntaxNode source::SourceFile raw::RawSyntaxNode From a3cd8b4cf0a8495ee3fee101e522e7b3f9eb4b77 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Sat, 4 Dec 2021 15:25:19 +1000 Subject: [PATCH 0221/1109] Rewrite TokenStream -> ParseStream ParseStream handles building the syntax tree and consuming whitespace tokens so that the parser can be decoupled from these considerations. --- JuliaSyntax/src/JuliaSyntax.jl | 3 +- JuliaSyntax/src/lexer.jl | 172 ----------------------- JuliaSyntax/src/parse_stream.jl | 165 ++++++++++++++++++++++ JuliaSyntax/src/parser.jl | 6 +- JuliaSyntax/src/syntax_tree.jl | 10 +- JuliaSyntax/src/token_kinds.jl | 19 +-- JuliaSyntax/test/parse_stream.jl | 52 +++++++ JuliaSyntax/test/runtests.jl | 163 +-------------------- JuliaSyntax/test/syntax_interpolation.jl | 92 ++++++++++++ JuliaSyntax/test/syntax_trees.jl | 55 ++++++++ 10 files changed, 393 insertions(+), 344 deletions(-) delete mode 100644 JuliaSyntax/src/lexer.jl create mode 100644 JuliaSyntax/src/parse_stream.jl create mode 100644 JuliaSyntax/test/parse_stream.jl create mode 100644 JuliaSyntax/test/syntax_interpolation.jl create mode 100644 JuliaSyntax/test/syntax_trees.jl diff --git a/JuliaSyntax/src/JuliaSyntax.jl b/JuliaSyntax/src/JuliaSyntax.jl index 23aa4643ac5e1..c9dd4e7c6ea70 100644 --- a/JuliaSyntax/src/JuliaSyntax.jl +++ b/JuliaSyntax/src/JuliaSyntax.jl @@ -9,9 +9,10 @@ include("utils.jl") include("source_files.jl") include("token_kinds.jl") -include("lexer.jl") include("syntax_tree.jl") +include("parse_stream.jl") + include("parser.jl") end diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl deleted file mode 100644 index 08b72cb7cce74..0000000000000 --- a/JuliaSyntax/src/lexer.jl +++ /dev/null @@ -1,172 +0,0 @@ -#------------------------------------------------------------------------------- -""" -`SyntaxToken` covers a contiguous range of the source text which contains a -token *relevant for parsing*, with a possibly-irrelevant prefix of "token -trivia". Trivial tokens include -* Whitespace -* Comments - -Note that "triviality" of tokens is context-dependent in general. For example, -the parentheses in `(1+2)*3` are important for parsing but are irrelevant after -the abstract syntax tree is constructed. -""" -struct SyntaxToken - # TODO: Could use a more stripped down version of RawToken which only - # stores byte offsets? - leading_trivia::RawToken - raw::RawToken -end - -function Base.show(io::IO, t::SyntaxToken) - fullrange = string(lpad(t.leading_trivia.startbyte+1, 3), ":", rpad(t.raw.endbyte+1, 3)) - - range = string(lpad(t.raw.startbyte+1, 3), ":", rpad(t.raw.endbyte+1, 3)) - print(io, rpad(string(fullrange, "│", range), 17, " "), rpad(kind(t), 15, " ")) -end - - -kind(tok::SyntaxToken) = tok.raw.kind - -# summary_kind(tok::SyntaxToken) = TzTokens.kind(tok.raw) - -const EMPTY_RAW_TOKEN = RawToken() -const EMPTY_TOKEN = SyntaxToken(RawToken(), RawToken()) - - -#------------------------------------------------------------------------------- -""" -TokenStream wraps the lexer from Tokenize.jl with a short putback buffer and -condenses syntactically irrelevant whitespace tokens into "syntax trivia" which -are attached to other tokens. -""" -mutable struct TokenStream - lexer::Tokenize.Lexers.Lexer{IOBuffer,RawToken} - # We buffer up to two tokens here, with `next2` taken before `next1`. It - # suffices to support only a single putback token (which always goes into - # `next2`). The presence of a valid token in `next2` does not imply there's - # one in `next1`. - next1::SyntaxToken - next2::SyntaxToken - hasnext1::Bool - hasnext2::Bool -end - -# TODO: replace TokenStream with "ParseStream"/"ParserIO"/? interface -# -# This would be an I/O interface for the parser -# - Input: Provides input tokens to the parser -# - Output: Accepts tree output events from the parser -# -# Such an interface can be used to decouple parsing from the input and output -# representations as is done in rust-analyzer's TreeSink. Part of the point of -# this is to have a place to preserve whitespace trivia outside the parser. ( -# The rust TextTreeSink is oddly named, as it appears to be used for both -# getting tokens and emitting nodes... see -# https://github.com/rust-analyzer/rust-analyzer/blob/4691a0647b2c96cc475d8bbe7c31fe194d1443e7/crates/syntax/src/parsing/text_tree_sink.rs ) -# -# struct ParseStream -# end - -function TokenStream(code) - lexer = Tokenize.tokenize(code, RawToken) - TokenStream(lexer, EMPTY_TOKEN, EMPTY_TOKEN, false, false) -end - -function Base.show(io::IO, mime::MIME"text/plain", ts::TokenStream) - print(io, TokenStream, ":\n lexer = ") - show(io, mime, ts.lexer) - if ts.hasnext2 - print(io, "\n next2 = ", ts.next2) - end - if ts.hasnext1 - print(io, "\n next1 = ", ts.next1) - end -end - -# Iterator interface -Base.IteratorSize(::Type{TokenStream}) = Base.SizeUnknown() -Base.IteratorEltype(::Type{TokenStream}) = Base.HasEltype() -Base.eltype(::Type{TokenStream}) = SyntaxToken - -function Base.iterate(ts::TokenStream, end_state=false) - end_state && return nothing - t = take_token!(ts) - return t, kind(t) == K"ENDMARKER" -end - -function _read_raw_token(lexer::Tokenize.Lexers.Lexer) - c = Tokenize.Lexers.peekchar(lexer) - if isspace(c) - Tokenize.Lexers.start_token!(lexer) - # We lex whitespace slightly differently from Tokenize.jl, as newlines - # are syntactically significant - if Tokenize.Lexers.accept(lexer, '\n') - return Tokenize.Lexers.emit(lexer, K"NEWLINE_WS") - else - Tokenize.Lexers.readon(lexer) - Tokenize.Lexers.accept_batch(lexer, c->isspace(c) && c != '\n') - return Tokenize.Lexers.emit(lexer, K"WHITESPACE") - end - else - return Tokenize.Lexers.next_token(lexer) - end -end - -function _read_token(lexer::Tokenize.Lexers.Lexer) - # No token - do the actual work of taking a token from the lexer - raw = _read_raw_token(lexer) - if TzTokens.exactkind(raw) in (K"WHITESPACE", K"COMMENT") - # TODO: *Combine* comments with whitespace here to get a single leading - # trivia item per real token. - leading_trivia = raw - raw = _read_raw_token(lexer) - else - leading_trivia = RawToken(K"ERROR", (0,0), (0,0), - raw.startbyte, raw.startbyte-1, - TzTokens.NO_ERR, false, false) - end - return SyntaxToken(leading_trivia, raw) -end - -# Return next token in the stream, but don't remove it. -function peek_token(ts::TokenStream) - ts.hasnext2 && return ts.next2 - ts.hasnext1 && return ts.next1 - ts.next1 = _read_token(ts.lexer) - ts.hasnext1 = true - return ts.next1 -end - -# Like peek_token, but -# * EOF becomes an error -# * Newlines tokens are gobbled (TODO!) -function require_token(ts::TokenStream) - tok = peek_token(ts) - if kind(tok) == K"ENDMARKER" - error("incomplete: premature end of input") - end - return tok -end - -# Remove next token from the stream and return it. -function take_token!(ts::TokenStream) - if ts.hasnext2 - ts.hasnext2 = false - return ts.next2 - end - if ts.hasnext1 - ts.hasnext1 = false - return ts.next1 - end - # This line is a departure from the scheme parser, which requires - # peek_token to be called - return _read_token(ts.lexer) -end - -function put_back!(ts::TokenStream, tok::RawToken) - ts.hasnext2 || error("Cannot put back two tokens") - ts.next2 = tok -end - -is_prec_assignment(tok) = K"BEGIN_ASSIGNMENTS" < kind(tok) < K"END_ASSIGNMENTS" - diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl new file mode 100644 index 0000000000000..aa475a3fe0b24 --- /dev/null +++ b/JuliaSyntax/src/parse_stream.jl @@ -0,0 +1,165 @@ +#------------------------------------------------------------------------------- +""" +`SyntaxToken` covers a contiguous range of the source text which contains a +token *relevant for parsing*. Syntax trivia (comments and whitespace) is dealt +with separately, though `SyntaxToken` does include some minimal information +about whether these were present. + +This does not include tokens include +* Whitespace +* Comments + +Note that "triviality" of tokens is context-dependent in general. For example, +the parentheses in `(1+2)*3` are important for parsing but are irrelevant after +the abstract syntax tree is constructed. +""" +struct SyntaxToken + raw::RawToken + # Flags for leading whitespace + had_whitespace::Bool + had_newline::Bool +end + +function Base.show(io::IO, tok::SyntaxToken) + range = string(lpad(start_byte(tok), 3), ":", rpad(end_byte(tok), 3)) + print(io, rpad(range, 17, " "), rpad(kind(tok), 15, " ")) +end + +kind(tok::SyntaxToken) = tok.raw.kind +start_byte(tok::SyntaxToken) = tok.raw.startbyte + 1 +end_byte(tok::SyntaxToken) = tok.raw.endbyte + 1 +span(tok::SyntaxToken) = end_byte(tok) - start_byte(tok) + 1 + + +#------------------------------------------------------------------------------- + +""" +ParseStream provides an IO interface for the parser. It +- Wraps the lexer from Tokenize.jl with a short lookahead buffer +- Removes whitespace and comment tokens, shifting them into the output implicitly +- Provides a begin_node/end_node interface to emit the parsed tree structure + +This is simililar to rust-analyzer's +[TextTreeSink](https://github.com/rust-analyzer/rust-analyzer/blob/4691a0647b2c96cc475d8bbe7c31fe194d1443e7/crates/syntax/src/parsing/text_tree_sink.rs) +""" +mutable struct ParseStream + lexer::Tokenize.Lexers.Lexer{IOBuffer,RawToken} + lookahead::Vector{SyntaxToken} + trivia_buf::Vector{SyntaxToken} + current_end_byte::Int # Byte index of the last *consumed* token + pending_node_stack::Vector{Tuple{Vector{RawSyntaxNode},Int}} +end + +function ParseStream(code) + lexer = Tokenize.tokenize(code, RawToken) + ParseStream(lexer, SyntaxToken[], SyntaxToken[], 0, Vector{RawSyntaxNode}[]) +end + +function Base.show(io::IO, mime::MIME"text/plain", stream::ParseStream) + print(io, ParseStream, ":\n lexer = ") + show(io, mime, stream.lexer) +end + +# Iterator interface +#= +Base.IteratorSize(::Type{ParseStream}) = Base.SizeUnknown() +Base.IteratorEltype(::Type{ParseStream}) = Base.HasEltype() +Base.eltype(::Type{ParseStream}) = SyntaxToken + +function Base.iterate(stream::ParseStream, end_state=false) + end_state && return nothing + t = peek(stream) + bump!() + return t, kind(t) == K"EndMarker" +end +=# + +# Read one nontrivia token; shift trivia into stream.trivia_buf +function _read_token(stream::ParseStream) + had_whitespace = false + had_newline = false + while true + raw = Tokenize.Lexers.next_token(stream.lexer) + k = TzTokens.exactkind(raw) + if k in (K"Whitespace", K"Comment", K"NewlineWs") + had_whitespace = true + had_newline = k == K"NewlineWs" + push!(stream.trivia_buf, SyntaxToken(raw, false, false)) + continue + end + return SyntaxToken(raw, had_whitespace, had_newline) + end +end + +""" + peek(stream [, n=1]) + +Look ahead in the stream `n` tokens. +""" +function peek(stream::ParseStream, n::Integer=1) + if length(stream.lookahead) < n + for i=1:(n-length(stream.lookahead)) + push!(stream.lookahead, _read_token(stream)) + end + end + return stream.lookahead[n] +end + +function _current_node_children(stream::ParseStream) + last(stream.pending_node_stack)[1] +end + +""" + bump!(stream) + +Remove next token from the stream and add it as a syntax leaf to the current +output node. + +`bump!` returns `nothing` to make synchronization with the output stream +clearer. To see token values use `peek()`. +""" +function bump!(stream::ParseStream, flags::_RawFlags=EMPTY_FLAGS) + if isempty(stream.pending_node_stack) + error("Cannot bump! stream outside begin_node-end_node pair because this would loose input tokens") + end + tok = isempty(stream.lookahead) ? + _read_token(stream) : + popfirst!(stream.lookahead) # TODO: use a circular buffer? + while true + if #==# isempty(stream.trivia_buf) || + start_byte(first(stream.trivia_buf)) > start_byte(tok) + break + end + t = popfirst!(stream.trivia_buf) + trivia_node = RawSyntaxNode(kind(t), span(t), TRIVIA_FLAG) + push!(_current_node_children(stream), trivia_node) + end + node = RawSyntaxNode(kind(tok), span(tok), flags) + push!(_current_node_children(stream), node) + stream.current_end_byte = end_byte(tok) + nothing +end + + +#------------------------------------------------------------------------------- +# ParseStream tree output interface + +function begin_node(stream::ParseStream) + # TODO: Add a trivia heuristic here and in end_node so that whitespace and + # comments attach to nodes more usefully. May need some hint from the + # parser (eg, designating nodes which tend to be "block" vs "inline"?) for + # this to work well. + node_begin_byte = stream.current_end_byte + 1 + push!(stream.pending_node_stack, (RawSyntaxNode[], node_begin_byte)) + nothing +end + +function end_node(stream::ParseStream, k::Kind, flags::_RawFlags=EMPTY_FLAGS) + children, node_begin_byte = pop!(stream.pending_node_stack) + span = stream.current_end_byte - node_begin_byte + 1 + node = RawSyntaxNode(k, span, flags, children) + if !isempty(stream.pending_node_stack) + push!(last(stream.pending_node_stack)[1], node) + end + return node +end diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 65e9b90c7625c..be3d3de45bc2f 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -4,7 +4,7 @@ tree. For example, normally `x -y` means `(x) - (y)`, but when parsing matrix literals we're in "whitespace sensitive" mode, and `[x -y]` means [(x) (-y)]. """ struct ParseState - tokens::TokenStream + tokens::ParseStream # Vesion of Julia we're parsing this code for. May be different from VERSION! julia_version::VersionNumber @@ -23,7 +23,7 @@ struct ParseState end # Normal context -function ParseState(tokens::TokenStream; julia_version=VERSION) +function ParseState(tokens::ParseStream; julia_version=VERSION) ParseState(tokens, julia_version, true, false, true, false, false, false) end @@ -188,7 +188,7 @@ end #------------------------------------------------------------------------------- function parse(code) - tokens = JuliaSyntax.TokenStream(code) + tokens = JuliaSyntax.ParseStream(code) parse_statements(tokens) end diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index be94c02881397..e53bb53b91609 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -37,6 +37,7 @@ struct RawSyntaxNode end const _RawFlags = UInt32 +EMPTY_FLAGS = 0x00000000 TRIVIA_FLAG = 0x00000001 INFIX_FLAG = 0x00000002 # DIAGNOSTICS_FLAG @@ -48,7 +49,7 @@ function raw_flags(; trivia::Bool=false, infix::Bool=false) return flags::_RawFlags end -function RawSyntaxNode(kind::Kind, span::Int, flags::_RawFlags=0x00000000) +function RawSyntaxNode(kind::Kind, span::Int, flags::_RawFlags=EMPTY_FLAGS) RawSyntaxNode(kind, span, flags, ()) end @@ -80,13 +81,16 @@ function _show_raw_node(io, node, indent, pos, str, show_trivia) if !show_trivia && istrivia(node) return end - posstr = "$(lpad(pos, 6)):$(rpad(pos+node.span, 6)) |" + posstr = "$(lpad(pos, 6)):$(rpad(pos+node.span, 6)) │" if !haschildren(node) line = string(posstr, indent, _kind_str(node.kind)) + if !istrivia(node) + line = rpad(line, 40) * "✔" + end if isnothing(str) println(io, line) else - println(io, rpad(line, 40), repr(str[pos:pos + node.span - 1])) + println(io, rpad(line, 42), ' ', repr(str[pos:pos + node.span - 1])) end else println(io, posstr, indent, '[', _kind_str(node.kind), "]") diff --git a/JuliaSyntax/src/token_kinds.jl b/JuliaSyntax/src/token_kinds.jl index e712027c0abfc..1943ebb9065f8 100644 --- a/JuliaSyntax/src/token_kinds.jl +++ b/JuliaSyntax/src/token_kinds.jl @@ -40,9 +40,6 @@ end using Tokenize.Tokens: Kind, isliteral, iskeyword, isoperator -kind(k::Kind) = k -kind(raw::TzTokens.RawToken) = TzTokens.exactkind(raw) - """ K"s" @@ -54,17 +51,22 @@ macro K_str(str) return :(Kinds.$name) end +kind(k::Kind) = k +kind(raw::TzTokens.RawToken) = TzTokens.exactkind(raw) + +is_prec_assignment(tok) = K"BEGIN_ASSIGNMENTS" < kind(tok) < K"END_ASSIGNMENTS" + function _kind_str(k::Kind) if k in (K"Identifier", K"VarIdentifier") - "I" + "Identifier" elseif isliteral(k) - "L" + "Literal" elseif k == K"Comment" - "C" + "Comment" elseif k == K"Whitespace" - "W" + "Whitespace" elseif k == K"NewlineWs" - "N" + "NewlineWs" elseif iskeyword(k) lowercase(string(k)) elseif isoperator(k) @@ -932,6 +934,7 @@ const var"\n" = @_K NEWLINE_WS const BEGIN_SYNTAX_KINDS = @_K begin_syntax_kinds const toplevel = @_K TOPLEVEL const call = @_K CALL +const ref = @_K REF const block = @_K BLOCK const END_SYNTAX_KINDS = @_K end_syntax_kinds diff --git a/JuliaSyntax/test/parse_stream.jl b/JuliaSyntax/test/parse_stream.jl new file mode 100644 index 0000000000000..0f28f11b9314a --- /dev/null +++ b/JuliaSyntax/test/parse_stream.jl @@ -0,0 +1,52 @@ +# Prototype ParseStream interface +# +# Here we test the ParseStream interface, by taking input code and checking +# that the correct sequence of begin_node, end_node and bump!() produces a +# valid parse tree. + +code = """ +for i = 1:10 + xx[i] + 2 + # hi + yy +end +""" + +st = ParseStream(code) + +# Here we manually issue parse events in the order a Julia parser would issue +# them (if such a parser existed... which it doesn't yet!) +begin_node(st) + bump!(st, TRIVIA_FLAG) # for + begin_node(st) + bump!(st) # 'i' + bump!(st, TRIVIA_FLAG) # = + begin_node(st) + bump!(st) # 1 + bump!(st) # : + bump!(st) # 10 + end_node(st, K"call", INFIX_FLAG) + end_node(st, K"=") + begin_node(st) + begin_node(st) # [call] + begin_node(st) # [ref] + bump!(st) # xx + bump!(st, TRIVIA_FLAG) # [ + bump!(st) # i + bump!(st, TRIVIA_FLAG) # ] + end_node(st, K"ref") + bump!(st) # + + bump!(st) # 2 + end_node(st, K"call", INFIX_FLAG) + bump!(st) # yy + end_node(st, K"block") + bump!(st, TRIVIA_FLAG) # end +t = end_node(st, K"for") + +# ## Input code +println("-----------------------") +print(code) +println() + +# ## Output tree +show(stdout, MIME"text/plain"(), t, code, show_trivia=true) diff --git a/JuliaSyntax/test/runtests.jl b/JuliaSyntax/test/runtests.jl index 691a4f272c3ef..7448d45e58d57 100644 --- a/JuliaSyntax/test/runtests.jl +++ b/JuliaSyntax/test/runtests.jl @@ -1,18 +1,13 @@ using JuliaSyntax using Test -#@testset "JuliaSyntax.jl" begin - # Write your tests here. -#end - - using JuliaSyntax: SourceFile -using JuliaSyntax: RawSyntaxNode, SyntaxNode, raw_flags +using JuliaSyntax: RawSyntaxNode, SyntaxNode, raw_flags, TRIVIA_FLAG, INFIX_FLAG using JuliaSyntax: Kind, @K_str, children, child, setchild! using JuliaSyntax: highlight +using JuliaSyntax: ParseStream, bump!, peek, begin_node, end_node -#------------------------------------------------------------------------------- -# Raw syntax tree and AST layering +# Shortcuts for defining raw syntax nodes # Trivia nodes T(k, s) = RawSyntaxNode(k, s, raw_flags(trivia=true)) @@ -22,154 +17,8 @@ N(k, args::RawSyntaxNode...) = RawSyntaxNode(k, args...) # Non-trivia, infix form NI(k, args::RawSyntaxNode...) = RawSyntaxNode(k, raw_flags(infix=true), args...) -# For this code: -code = """ -for i = 1:10 - a + 2 - # hi - c -end -""" - -source = SourceFile(code, filename="none.jl") - -# We'd like to produce something the following raw tree -t = -N(K"for", - T(K"for", 3), - T(K" ", 1), - N(K"=", - N(K"Identifier", 1), - T(K" ", 1), - T(K"=", 1), - T(K" ", 1), - NI(K"call", - N(K"Integer", 1), - N(K":", 1), - N(K"Integer", 2))), - N(K"block", - T(K"\n", 5), - NI(K"call", - N(K"Identifier", 1), - T(K" ", 1), - N(K"+", 1), - T(K" ", 1), - N(K"Integer", 1)), - T(K"\n", 5), - T(K"Comment", 4), - T(K"\n", 5), - N(K"Identifier", 1), - T(K"\n", 1)), - T(K"end", 3)) - -# And the following AST -s = SyntaxNode(source, t) - -println("\nRawSyntaxNode") -show(stdout, MIME"text/plain"(), t, code, show_trivia=true) - -println("\nSyntaxNode") -show(stdout, MIME"text/plain"(), s) - -#code = "42" -#SyntaxNode(N(K"Integer", 2), 1, code) - -#------------------------------------------------------------------------------- -# # Macros and expression interpolation - -# The following shows that SyntaxNode works nicely for simple macros based on -# interpolating expressions into one another. In particular it shows how -# precise source information from multiple files can coexist within the same -# syntax tree. - -# First, here's the functionality that we're going to implement as a normal -# Julia macro. It's similar to the standard @show macro. -macro show2(ex) - name = sprint(Base.show_unquoted, ex) - quote - value = $(esc(ex)) - println($name, " = ", value) - value - end -end - -# Now, how would this be implemented if we were to do it with SyntaxNode? -# We don't have a parser which is capable of producing our tree structures yet, -# so we need to hand construct all our trees. -function at_show2(ex::SyntaxNode) - code = String(read(@__FILE__)) - name = sprint(JuliaSyntax._show_syntax_node_compact, ex) - # The following quote block is not used directly, but the text for it is - # re-read from `code`. - quote_begin = (@__LINE__) + 1 - quote - value = $ex - println($name, " = ", value) - value - end - raw = N(K"block", - T(K"quote", 5), - T(K"\n", 9), - N(K"=", - N(K"Identifier", 5), - T(K" ", 1), - T(K"=", 1), - T(K" ", 1), - N(K"$", - T(K"$", 1), - N(K"Identifier", 2)), - T(K"\n", 9)), - N(K"call", - N(K"Identifier", 7), - T(K"(", 1), - N(K"$", - T(K"$", 1), - N(K"Identifier", 4)), - T(K",", 1), - T(K" ", 1), - N(K"String", 5), - T(K",", 1), - T(K" ", 1), - N(K"Identifier", 5), - T(K")", 1)), - T(K"\n", 9), - N(K"Identifier", 5), - T(K"\n", 5), - T(K"end", 3)) - source = SourceFile(code, filename=@__FILE__) - block = SyntaxNode(source, raw, source.line_starts[quote_begin]+4) - # Now that we have the block, we need to interpolate into it. - - # Interpolating a SyntaxNode `ex` is simple: - setchild!(block, (1, 2), ex) - # The interpolation of a Julia *value* should inherit the source location - # of the $ interpolation expression. This is different to the - # interpolation of a SyntaxNode, which should just be inserted as-is. - setchild!(block, (2, 2), - JuliaSyntax.interpolate_literal(block.val[2].val[2], name)) - block -end - -# Usage of at_show2() - -# Let's have some simple expression to pass to at_show2. This will be -# attributed to a different file foo.jl -code2 = "foo + 42" -source2 = SourceFile(code2, filename="foo.jl") -s2 = SyntaxNode(source2, NI(K"call", - N(K"Identifier", 3), - T(K" ", 1), - N(K"+", 1), - T(K" ", 1), - N(K"Integer", 2))) - -# Calling at_show2, we see that the precise source information is preserved for -# both the surrounding expression and the interpolated fragments. -println("\nInterpolation example") -show(stdout, MIME"text/plain"(), at_show2(s2)) - - -#------------------------------------------------------------------------------- -# # Formatting +include("syntax_trees.jl") +include("syntax_interpolation.jl") +include("parse_stream.jl") diff --git a/JuliaSyntax/test/syntax_interpolation.jl b/JuliaSyntax/test/syntax_interpolation.jl new file mode 100644 index 0000000000000..ecbe3fe061b02 --- /dev/null +++ b/JuliaSyntax/test/syntax_interpolation.jl @@ -0,0 +1,92 @@ +# # Macros and expression interpolation + +# The following shows that SyntaxNode works nicely for simple macros based on +# interpolating expressions into one another. In particular it shows how +# precise source information from multiple files can coexist within the same +# syntax tree. + +# First, here's the functionality that we're going to implement as a normal +# Julia macro. It's similar to the standard @show macro. +macro show2(ex) + name = sprint(Base.show_unquoted, ex) + quote + value = $(esc(ex)) + println($name, " = ", value) + value + end +end + +# Now, how would this be implemented if we were to do it with SyntaxNode? +# We don't have a parser which is capable of producing our tree structures yet, +# so we need to hand construct all our trees. +function at_show2(ex::SyntaxNode) + code = String(read(@__FILE__)) + name = sprint(JuliaSyntax._show_syntax_node_compact, ex) + # The following quote block is not used directly, but the text for it is + # re-read from `code`. + quote_begin = (@__LINE__) + 1 + quote + value = $ex + println($name, " = ", value) + value + end + raw = N(K"block", + T(K"quote", 5), + T(K"\n", 9), + N(K"=", + N(K"Identifier", 5), + T(K" ", 1), + T(K"=", 1), + T(K" ", 1), + N(K"$", + T(K"$", 1), + N(K"Identifier", 2)), + T(K"\n", 9)), + N(K"call", + N(K"Identifier", 7), + T(K"(", 1), + N(K"$", + T(K"$", 1), + N(K"Identifier", 4)), + T(K",", 1), + T(K" ", 1), + N(K"String", 5), + T(K",", 1), + T(K" ", 1), + N(K"Identifier", 5), + T(K")", 1)), + T(K"\n", 9), + N(K"Identifier", 5), + T(K"\n", 5), + T(K"end", 3)) + source = SourceFile(code, filename=@__FILE__) + block = SyntaxNode(source, raw, source.line_starts[quote_begin]+4) + # Now that we have the block, we need to interpolate into it. + + # Interpolating a SyntaxNode `ex` is simple: + setchild!(block, (1, 2), ex) + # The interpolation of a Julia *value* should inherit the source location + # of the $ interpolation expression. This is different to the + # interpolation of a SyntaxNode, which should just be inserted as-is. + setchild!(block, (2, 2), + JuliaSyntax.interpolate_literal(block.val[2].val[2], name)) + block +end + +# Usage of at_show2() + +# Let's have some simple expression to pass to at_show2. This will be +# attributed to a different file foo.jl +code2 = "foo + 42" +source2 = SourceFile(code2, filename="foo.jl") +s2 = SyntaxNode(source2, NI(K"call", + N(K"Identifier", 3), + T(K" ", 1), + N(K"+", 1), + T(K" ", 1), + N(K"Integer", 2))) + +# Calling at_show2, we see that the precise source information is preserved for +# both the surrounding expression and the interpolated fragments. +println("\nInterpolation example") +show(stdout, MIME"text/plain"(), at_show2(s2)) diff --git a/JuliaSyntax/test/syntax_trees.jl b/JuliaSyntax/test/syntax_trees.jl new file mode 100644 index 0000000000000..cb0f35f1d1ae4 --- /dev/null +++ b/JuliaSyntax/test/syntax_trees.jl @@ -0,0 +1,55 @@ +#------------------------------------------------------------------------------- +# Raw syntax tree and AST layering + +# For this code: +code = """ +for i = 1:10 + a + 2 + # hi + c +end +""" + +source = SourceFile(code, filename="none.jl") + +# We'd like to produce something the following raw tree +t = +N(K"for", + T(K"for", 3), + T(K" ", 1), + N(K"=", + N(K"Identifier", 1), + T(K" ", 1), + T(K"=", 1), + T(K" ", 1), + NI(K"call", + N(K"Integer", 1), + N(K":", 1), + N(K"Integer", 2))), + N(K"block", + T(K"\n", 5), + NI(K"call", + N(K"Identifier", 1), + T(K" ", 1), + N(K"+", 1), + T(K" ", 1), + N(K"Integer", 1)), + T(K"\n", 5), + T(K"Comment", 4), + T(K"\n", 5), + N(K"Identifier", 1), + T(K"\n", 1)), + T(K"end", 3)) + +# And the following AST +s = SyntaxNode(source, t) + +println("\nRawSyntaxNode") +show(stdout, MIME"text/plain"(), t, code, show_trivia=true) + +println("\nSyntaxNode") +show(stdout, MIME"text/plain"(), s) + +#code = "42" +#SyntaxNode(N(K"Integer", 2), 1, code) + From b718db3fedd2f3ed9f6c8cb1510a3fa57648bebb Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Tue, 7 Dec 2021 12:19:27 +1000 Subject: [PATCH 0222/1109] Fix ParseStream interface to allow extending parent nodes leftward When parsing infix constructs, child nodes are discovered before their parents so there's a need to reparent nodes which have already been emitted into the event stream. This change adds emit(::ParseStream), allowing a leftmost byte position from position(::ParseStream) to be saved and passed to that to emit any number of parent nodes retroactively. This is a bit like rust-analyzer's event stream, but recording the current position keeps the state in the parser rather than requiring Start markers in the event stream. There's no need to abort a node - simply don't emit() it and no chasing a linked list of forward_parent indices. (Instead, we need to iterate backward through the text spans to find children for each node when building the tree.) --- JuliaSyntax/README.md | 4 +- JuliaSyntax/src/parse_stream.jl | 173 ++++++++++++++++++------------- JuliaSyntax/src/syntax_tree.jl | 2 +- JuliaSyntax/test/parse_stream.jl | 74 ++++++++----- JuliaSyntax/test/runtests.jl | 6 +- 5 files changed, 156 insertions(+), 103 deletions(-) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index 8060976fb8b73..ad9a807d66b43 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -198,7 +198,9 @@ Highlights: * [Modern parser generator](https://matklad.github.io/2018/06/06/modern-parser-generator.html) has a lot of practical notes on writing parsers. Highlights: - Encourages writing tests for handwritten parsers as inline comments - - Mentions [Pratt parsers](http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/) for operator precedence. + - Mentions Pratt parsers for simple operator precedence parsing. Good articles: + - [From Aleksey Kladov (matklad - the main rust-analyzer author, etc)](https://matklad.github.io/2020/04/13/simple-but-powerful-pratt-parsing.html) + - [From Bob Nystrom (munificent - one of the Dart devs, etc](http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/) - Some discussion of error recovery ## `rust-analyzer` diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index aa475a3fe0b24..245e7bc87d9a0 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -21,23 +21,40 @@ struct SyntaxToken end function Base.show(io::IO, tok::SyntaxToken) - range = string(lpad(start_byte(tok), 3), ":", rpad(end_byte(tok), 3)) + range = string(lpad(first_byte(tok), 3), ":", rpad(last_byte(tok), 3)) print(io, rpad(range, 17, " "), rpad(kind(tok), 15, " ")) end kind(tok::SyntaxToken) = tok.raw.kind -start_byte(tok::SyntaxToken) = tok.raw.startbyte + 1 -end_byte(tok::SyntaxToken) = tok.raw.endbyte + 1 -span(tok::SyntaxToken) = end_byte(tok) - start_byte(tok) + 1 +first_byte(tok::SyntaxToken) = tok.raw.startbyte + 1 +last_byte(tok::SyntaxToken) = tok.raw.endbyte + 1 +span(tok::SyntaxToken) = last_byte(tok) - first_byte(tok) + 1 +Base.:(~)(tok::SyntaxToken, k::Kind) = kind(tok) == k +Base.:(~)(k::Kind, tok::SyntaxToken) = kind(tok) == k #------------------------------------------------------------------------------- +struct TextSpan + kind::Kind + flags::_RawFlags + first_byte::Int + last_byte::Int +end + +function TextSpan(raw::RawToken, flags::_RawFlags) + TextSpan(raw.kind, flags, raw.startbyte + 1, raw.endbyte + 1) +end + +kind(span::TextSpan) = span.kind +first_byte(span::TextSpan) = span.first_byte +last_byte(span::TextSpan) = span.last_byte +span(span::TextSpan) = last_byte(span) - first_byte(span) + 1 + """ ParseStream provides an IO interface for the parser. It - Wraps the lexer from Tokenize.jl with a short lookahead buffer - Removes whitespace and comment tokens, shifting them into the output implicitly -- Provides a begin_node/end_node interface to emit the parsed tree structure This is simililar to rust-analyzer's [TextTreeSink](https://github.com/rust-analyzer/rust-analyzer/blob/4691a0647b2c96cc475d8bbe7c31fe194d1443e7/crates/syntax/src/parsing/text_tree_sink.rs) @@ -45,36 +62,25 @@ This is simililar to rust-analyzer's mutable struct ParseStream lexer::Tokenize.Lexers.Lexer{IOBuffer,RawToken} lookahead::Vector{SyntaxToken} - trivia_buf::Vector{SyntaxToken} - current_end_byte::Int # Byte index of the last *consumed* token - pending_node_stack::Vector{Tuple{Vector{RawSyntaxNode},Int}} + lookahead_trivia::Vector{TextSpan} + spans::Vector{TextSpan} + # First byte of next token + next_byte::Int end function ParseStream(code) lexer = Tokenize.tokenize(code, RawToken) - ParseStream(lexer, SyntaxToken[], SyntaxToken[], 0, Vector{RawSyntaxNode}[]) + ParseStream(lexer, + Vector{SyntaxToken}(), + Vector{TextSpan}(), + Vector{TextSpan}(), + 1) end function Base.show(io::IO, mime::MIME"text/plain", stream::ParseStream) - print(io, ParseStream, ":\n lexer = ") - show(io, mime, stream.lexer) -end - -# Iterator interface -#= -Base.IteratorSize(::Type{ParseStream}) = Base.SizeUnknown() -Base.IteratorEltype(::Type{ParseStream}) = Base.HasEltype() -Base.eltype(::Type{ParseStream}) = SyntaxToken - -function Base.iterate(stream::ParseStream, end_state=false) - end_state && return nothing - t = peek(stream) - bump!() - return t, kind(t) == K"EndMarker" + println(io, "ParseStream at position $(stream.next_byte)") end -=# -# Read one nontrivia token; shift trivia into stream.trivia_buf function _read_token(stream::ParseStream) had_whitespace = false had_newline = false @@ -84,7 +90,7 @@ function _read_token(stream::ParseStream) if k in (K"Whitespace", K"Comment", K"NewlineWs") had_whitespace = true had_newline = k == K"NewlineWs" - push!(stream.trivia_buf, SyntaxToken(raw, false, false)) + push!(stream.lookahead_trivia, TextSpan(raw, TRIVIA_FLAG)) continue end return SyntaxToken(raw, had_whitespace, had_newline) @@ -94,7 +100,7 @@ end """ peek(stream [, n=1]) -Look ahead in the stream `n` tokens. +Look ahead in the stream `n` tokens, returning a SyntaxToken """ function peek(stream::ParseStream, n::Integer=1) if length(stream.lookahead) < n @@ -105,61 +111,88 @@ function peek(stream::ParseStream, n::Integer=1) return stream.lookahead[n] end -function _current_node_children(stream::ParseStream) - last(stream.pending_node_stack)[1] -end - """ - bump!(stream) - -Remove next token from the stream and add it as a syntax leaf to the current -output node. + bump(stream [, flags=EMPTY_FLAGS]) -`bump!` returns `nothing` to make synchronization with the output stream -clearer. To see token values use `peek()`. +Shift the current token into the output as a new text span with the given +`flags`. """ -function bump!(stream::ParseStream, flags::_RawFlags=EMPTY_FLAGS) - if isempty(stream.pending_node_stack) - error("Cannot bump! stream outside begin_node-end_node pair because this would loose input tokens") - end +function bump(stream::ParseStream, flags=EMPTY_FLAGS) tok = isempty(stream.lookahead) ? _read_token(stream) : popfirst!(stream.lookahead) # TODO: use a circular buffer? - while true - if #==# isempty(stream.trivia_buf) || - start_byte(first(stream.trivia_buf)) > start_byte(tok) - break - end - t = popfirst!(stream.trivia_buf) - trivia_node = RawSyntaxNode(kind(t), span(t), TRIVIA_FLAG) - push!(_current_node_children(stream), trivia_node) + # Bump trivia tokens into output + while !isempty(stream.lookahead_trivia) && + first_byte(first(stream.lookahead_trivia)) <= first_byte(tok) + trivia_span = popfirst!(stream.lookahead_trivia) + push!(stream.spans, trivia_span) end - node = RawSyntaxNode(kind(tok), span(tok), flags) - push!(_current_node_children(stream), node) - stream.current_end_byte = end_byte(tok) + span = TextSpan(kind(tok), flags, first_byte(tok), last_byte(tok)) + push!(stream.spans, span) + stream.next_byte = last_byte(tok) + 1 nothing end +function Base.position(stream::ParseStream) + return stream.next_byte +end + +""" + emit(stream, start_position, kind [, flags = EMPTY_FLAGS]) + +Emit a new text span into the output which covers source bytes from +`start_position` to the end of the most recent token which was `bump()`'ed. +The `start_position` of the span should be a previous return value of +`position()`. +""" +function emit(stream::ParseStream, start_position::Integer, kind::Kind, + flags = EMPTY_FLAGS) + push!(stream.spans, TextSpan(kind, flags, start_position, stream.next_byte-1)) + return nothing +end + #------------------------------------------------------------------------------- -# ParseStream tree output interface - -function begin_node(stream::ParseStream) - # TODO: Add a trivia heuristic here and in end_node so that whitespace and - # comments attach to nodes more usefully. May need some hint from the - # parser (eg, designating nodes which tend to be "block" vs "inline"?) for - # this to work well. - node_begin_byte = stream.current_end_byte + 1 - push!(stream.pending_node_stack, (RawSyntaxNode[], node_begin_byte)) - nothing +# Tree construction +# +# Note that this is largely independent of RawSyntaxNode, and could easily be +# made completely independent with a tree builder interface. + +function _push_node!(stack, text_span::TextSpan, children=nothing) + if isnothing(children) + node = RawSyntaxNode(kind(text_span), span(text_span), text_span.flags) + push!(stack, (text_span=text_span, node=node)) + else + node = RawSyntaxNode(kind(text_span), span(text_span), text_span.flags, children) + push!(stack, (text_span=text_span, node=node)) + end end -function end_node(stream::ParseStream, k::Kind, flags::_RawFlags=EMPTY_FLAGS) - children, node_begin_byte = pop!(stream.pending_node_stack) - span = stream.current_end_byte - node_begin_byte + 1 - node = RawSyntaxNode(k, span, flags, children) - if !isempty(stream.pending_node_stack) - push!(last(stream.pending_node_stack)[1], node) +function to_tree(st) + stack = Vector{@NamedTuple{text_span::TextSpan,node::RawSyntaxNode}}() + _push_node!(stack, st.spans[1]) + for i = 2:length(st.spans) + text_span = st.spans[i] + + if first_byte(text_span) > last_byte(stack[end].text_span) + # A leaf node (span covering a single token): + # [a][b][stack[end]] + # [text_span] + _push_node!(stack, text_span) + continue + end + # An interior node, span covering multiple tokens: + # + # [a][b][stack[end]] + # [ text_span] + j = length(stack) + while j > 1 && first_byte(text_span) < first_byte(stack[j].text_span) + j -= 1 + end + children = [stack[k].node for k = j:length(stack)] + resize!(stack, j-1) + _push_node!(stack, text_span, children) end - return node + return only(stack).node end + diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index e53bb53b91609..a632fe4893c32 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -81,7 +81,7 @@ function _show_raw_node(io, node, indent, pos, str, show_trivia) if !show_trivia && istrivia(node) return end - posstr = "$(lpad(pos, 6)):$(rpad(pos+node.span, 6)) │" + posstr = "$(lpad(pos, 6)):$(rpad(pos+node.span-1, 6)) │" if !haschildren(node) line = string(posstr, indent, _kind_str(node.kind)) if !istrivia(node) diff --git a/JuliaSyntax/test/parse_stream.jl b/JuliaSyntax/test/parse_stream.jl index 0f28f11b9314a..4fd89bfceec6a 100644 --- a/JuliaSyntax/test/parse_stream.jl +++ b/JuliaSyntax/test/parse_stream.jl @@ -1,8 +1,7 @@ # Prototype ParseStream interface # # Here we test the ParseStream interface, by taking input code and checking -# that the correct sequence of begin_node, end_node and bump!() produces a -# valid parse tree. +# that the correct sequence of emit() and bump() produces a valid parse tree. code = """ for i = 1:10 @@ -16,32 +15,51 @@ st = ParseStream(code) # Here we manually issue parse events in the order a Julia parser would issue # them (if such a parser existed... which it doesn't yet!) -begin_node(st) - bump!(st, TRIVIA_FLAG) # for - begin_node(st) - bump!(st) # 'i' - bump!(st, TRIVIA_FLAG) # = - begin_node(st) - bump!(st) # 1 - bump!(st) # : - bump!(st) # 10 - end_node(st, K"call", INFIX_FLAG) - end_node(st, K"=") - begin_node(st) - begin_node(st) # [call] - begin_node(st) # [ref] - bump!(st) # xx - bump!(st, TRIVIA_FLAG) # [ - bump!(st) # i - bump!(st, TRIVIA_FLAG) # ] - end_node(st, K"ref") - bump!(st) # + - bump!(st) # 2 - end_node(st, K"call", INFIX_FLAG) - bump!(st) # yy - end_node(st, K"block") - bump!(st, TRIVIA_FLAG) # end -t = end_node(st, K"for") +@testset "ParseStream" begin + p1 = position(st) + @test peek(st) ~ K"for" + bump(st, TRIVIA_FLAG) + p2 = position(st) + @test peek(st) ~ K"Identifier" # 'i' + bump(st) + @test peek(st) ~ K"=" + bump(st, TRIVIA_FLAG) + p3 = position(st) + @test peek(st) ~ K"Integer" # 1 + bump(st) + @test peek(st) ~ K":" + bump(st) # : + @test peek(st) ~ K"Integer" # 10 + bump(st) # 10 + emit(st, p3, K"call", INFIX_FLAG) + emit(st, p2, K"=") + p4 = position(st) + p5 = position(st) # [call] + p6 = position(st) # [ref] + @test peek(st) ~ K"Identifier" # 'xx' + bump(st) + @test peek(st) ~ K"[" + bump(st, TRIVIA_FLAG) + @test peek(st) ~ K"Identifier" # 'i' + bump(st) + @test peek(st) ~ K"]" + bump(st, TRIVIA_FLAG) + emit(st, p6, K"ref") + @test peek(st) ~ K"+" + bump(st) + @test peek(st) ~ K"Integer" # 2 + bump(st) + emit(st, p5, K"call", INFIX_FLAG) + @test peek(st) ~ K"Identifier" # 'yy' + bump(st) + emit(st, p4, K"block") + bump(st, TRIVIA_FLAG) # end + emit(st, p1, K"for") + bump(st, TRIVIA_FLAG) # \n + emit(st, p1, K"toplevel") +end + +t = JuliaSyntax.to_tree(st) # ## Input code println("-----------------------") diff --git a/JuliaSyntax/test/runtests.jl b/JuliaSyntax/test/runtests.jl index 7448d45e58d57..cdd3061c651fc 100644 --- a/JuliaSyntax/test/runtests.jl +++ b/JuliaSyntax/test/runtests.jl @@ -5,7 +5,7 @@ using JuliaSyntax: SourceFile using JuliaSyntax: RawSyntaxNode, SyntaxNode, raw_flags, TRIVIA_FLAG, INFIX_FLAG using JuliaSyntax: Kind, @K_str, children, child, setchild! using JuliaSyntax: highlight -using JuliaSyntax: ParseStream, bump!, peek, begin_node, end_node +using JuliaSyntax: ParseStream, bump, peek, emit # Shortcuts for defining raw syntax nodes @@ -18,7 +18,7 @@ N(k, args::RawSyntaxNode...) = RawSyntaxNode(k, args...) NI(k, args::RawSyntaxNode...) = RawSyntaxNode(k, raw_flags(infix=true), args...) -include("syntax_trees.jl") -include("syntax_interpolation.jl") +#include("syntax_trees.jl") +#include("syntax_interpolation.jl") include("parse_stream.jl") From f892043999d6c9feb2651ce6862d5979b27214e1 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Tue, 7 Dec 2021 14:46:01 +1000 Subject: [PATCH 0223/1109] Prototype parser for simple expressions This prototype for an extremely basic grammar shows that the ParseStream interface is at least somewhat practical! --- JuliaSyntax/src/parse_stream.jl | 42 +++++++++++++-- JuliaSyntax/src/parser.jl | 28 +++++----- JuliaSyntax/src/syntax_tree.jl | 33 ++++++++---- JuliaSyntax/test/parse_stream.jl | 28 +++++----- JuliaSyntax/test/runtests.jl | 13 +++-- JuliaSyntax/test/simple_parser.jl | 85 +++++++++++++++++++++++++++++++ 6 files changed, 182 insertions(+), 47 deletions(-) create mode 100644 JuliaSyntax/test/simple_parser.jl diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index 245e7bc87d9a0..ecb533b492313 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -51,6 +51,11 @@ first_byte(span::TextSpan) = span.first_byte last_byte(span::TextSpan) = span.last_byte span(span::TextSpan) = last_byte(span) - first_byte(span) + 1 +struct Diagnostic + text_span::TextSpan + message::String +end + """ ParseStream provides an IO interface for the parser. It - Wraps the lexer from Tokenize.jl with a short lookahead buffer @@ -64,6 +69,7 @@ mutable struct ParseStream lookahead::Vector{SyntaxToken} lookahead_trivia::Vector{TextSpan} spans::Vector{TextSpan} + diagnostics::Vector{Diagnostic} # First byte of next token next_byte::Int end @@ -74,6 +80,7 @@ function ParseStream(code) Vector{SyntaxToken}(), Vector{TextSpan}(), Vector{TextSpan}(), + Vector{Diagnostic}(), 1) end @@ -98,11 +105,11 @@ function _read_token(stream::ParseStream) end """ - peek(stream [, n=1]) + peek_token(stream [, n=1]) Look ahead in the stream `n` tokens, returning a SyntaxToken """ -function peek(stream::ParseStream, n::Integer=1) +function peek_token(stream::ParseStream, n::Integer=1) if length(stream.lookahead) < n for i=1:(n-length(stream.lookahead)) push!(stream.lookahead, _read_token(stream)) @@ -111,6 +118,15 @@ function peek(stream::ParseStream, n::Integer=1) return stream.lookahead[n] end +""" + peek_token(stream [, n=1]) + +Look ahead in the stream `n` tokens, returning a Kind +""" +function peek(stream::ParseStream, n::Integer=1) + kind(peek_token(stream, n)) +end + """ bump(stream [, flags=EMPTY_FLAGS]) @@ -146,8 +162,15 @@ The `start_position` of the span should be a previous return value of `position()`. """ function emit(stream::ParseStream, start_position::Integer, kind::Kind, - flags = EMPTY_FLAGS) - push!(stream.spans, TextSpan(kind, flags, start_position, stream.next_byte-1)) + flags::_RawFlags = EMPTY_FLAGS; error=nothing) + if !isnothing(error) + flags |= ERROR_FLAG + end + text_span = TextSpan(kind, flags, start_position, stream.next_byte-1) + if !isnothing(error) + push!(stream.diagnostics, Diagnostic(text_span, error)) + end + push!(stream.spans, text_span) return nothing end @@ -168,7 +191,7 @@ function _push_node!(stack, text_span::TextSpan, children=nothing) end end -function to_tree(st) +function to_raw_tree(st) stack = Vector{@NamedTuple{text_span::TextSpan,node::RawSyntaxNode}}() _push_node!(stack, st.spans[1]) for i = 2:length(st.spans) @@ -196,3 +219,12 @@ function to_tree(st) return only(stack).node end +function show_diagnostic(io::IO, diagnostic, code) + printstyled(io, "Error: ", color=:light_red) + print(io, diagnostic.message, ":\n") + p = first_byte(diagnostic.text_span) + q = last_byte(diagnostic.text_span) + print(io, code[1:p-1]) + _printstyled(io, code[p:q]; color=(100,40,40)) + print(io, code[q+1:end], '\n') +end diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index be3d3de45bc2f..f8efb008d9e90 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -1,10 +1,10 @@ """ ParseState carries parser context as we recursively descend into the parse tree. For example, normally `x -y` means `(x) - (y)`, but when parsing matrix -literals we're in "whitespace sensitive" mode, and `[x -y]` means [(x) (-y)]. +literals we're in `space_sensitive` mode, and `[x -y]` means [(x) (-y)]. """ struct ParseState - tokens::ParseStream + stream::ParseStream # Vesion of Julia we're parsing this code for. May be different from VERSION! julia_version::VersionNumber @@ -23,15 +23,15 @@ struct ParseState end # Normal context -function ParseState(tokens::ParseStream; julia_version=VERSION) - ParseState(tokens, julia_version, true, false, true, false, false, false) +function ParseState(stream::ParseStream; julia_version=VERSION) + ParseState(stream, julia_version, true, false, true, false, false, false) end function ParseState(ps::ParseState; range_colon_enabled=nothing, space_sensitive=nothing, for_generator=nothing, end_symbol=nothing, whitespace_newline=nothing, where_enabled=nothing) - ParseState(ps.tokens, ps.julia_version, + ParseState(ps.stream, ps.julia_version, range_colon_enabled === nothing ? ps.range_colon_enabled : range_colon_enabled, space_sensitive === nothing ? ps.space_sensitive : space_sensitive, for_generator === nothing ? ps.for_generator : for_generator, @@ -40,10 +40,9 @@ function ParseState(ps::ParseState; range_colon_enabled=nothing, where_enabled === nothing ? ps.where_enabled : where_enabled) end -take_token!(ps::ParseState) = take_token!(ps.tokens) -require_token(ps::ParseState) = require_token(ps.tokens) -peek_token(ps::ParseState) = peek_token(ps.tokens) -put_back!(ps::ParseState, tok::RawToken) = put_back!(ps.tokens, tok) +peek(ps::ParseState, args...) = peek(ps.stream, args...) +bump(ps::ParseState, args...) = bump(ps.stream, args...) +emit(ps::ParseState, args...) = emit(ps.stream, args...) #------------------------------------------------------------------------------- # Parser @@ -56,13 +55,16 @@ function is_closing_token(ps::ParseState, tok) end function has_whitespace_prefix(tok::SyntaxToken) - tok.leading_trivia.kind == K" " + tok.had_whitespace end function TODO(str) error("TODO: $str") end + +#= + # Parse numbers, identifiers, parenthesized expressions, lists, vectors, etc. function parse_atom(ps::ParseState; checked::Bool=true)::RawSyntaxNode tok = require_token(ps) @@ -148,6 +150,8 @@ function parse_cat(ps0::ParseState, opening_tok, closer, last_end_symbol::Bool) end end +=# + #------------------------------------------------------------------------------- # the principal non-terminals follow, in increasing precedence order @@ -188,7 +192,7 @@ end #------------------------------------------------------------------------------- function parse(code) - tokens = JuliaSyntax.ParseStream(code) - parse_statements(tokens) + stream = ParseStream(code) + parse_statements(stream) end diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index a632fe4893c32..eef0256905e3c 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -40,7 +40,7 @@ const _RawFlags = UInt32 EMPTY_FLAGS = 0x00000000 TRIVIA_FLAG = 0x00000001 INFIX_FLAG = 0x00000002 -# DIAGNOSTICS_FLAG +ERROR_FLAG = 0x80000000 function raw_flags(; trivia::Bool=false, infix::Bool=false) flags = _RawFlags(0) @@ -73,6 +73,7 @@ children(node::RawSyntaxNode) = node.args istrivia(node::RawSyntaxNode) = node.flags & TRIVIA_FLAG != 0 isinfix(node::RawSyntaxNode) = node.flags & INFIX_FLAG != 0 +iserror(node::RawSyntaxNode) = node.flags & ERROR_FLAG != 0 kind(node::RawSyntaxNode) = node.kind @@ -82,18 +83,28 @@ function _show_raw_node(io, node, indent, pos, str, show_trivia) return end posstr = "$(lpad(pos, 6)):$(rpad(pos+node.span-1, 6)) │" - if !haschildren(node) + is_leaf = !haschildren(node) + if is_leaf line = string(posstr, indent, _kind_str(node.kind)) - if !istrivia(node) - line = rpad(line, 40) * "✔" - end - if isnothing(str) - println(io, line) - else - println(io, rpad(line, 42), ' ', repr(str[pos:pos + node.span - 1])) - end else - println(io, posstr, indent, '[', _kind_str(node.kind), "]") + line = string(posstr, indent, '[', _kind_str(node.kind), "]") + end + if !istrivia(node) && is_leaf + line = rpad(line, 40) * "✔" + end + if iserror(node) + line = rpad(line, 41) * "✘" + end + if is_leaf && !isnothing(str) + line = string(rpad(line, 43), ' ', repr(str[pos:pos + node.span - 1])) + end + line = line*"\n" + if iserror(node) + printstyled(io, line, color=:light_red) + else + print(io, line) + end + if !is_leaf new_indent = indent*" " p = pos for a in node.args diff --git a/JuliaSyntax/test/parse_stream.jl b/JuliaSyntax/test/parse_stream.jl index 4fd89bfceec6a..6fb1b00eb98a7 100644 --- a/JuliaSyntax/test/parse_stream.jl +++ b/JuliaSyntax/test/parse_stream.jl @@ -17,40 +17,40 @@ st = ParseStream(code) # them (if such a parser existed... which it doesn't yet!) @testset "ParseStream" begin p1 = position(st) - @test peek(st) ~ K"for" + @test peek(st) == K"for" bump(st, TRIVIA_FLAG) p2 = position(st) - @test peek(st) ~ K"Identifier" # 'i' + @test peek(st) == K"Identifier" # 'i' bump(st) - @test peek(st) ~ K"=" + @test peek(st) == K"=" bump(st, TRIVIA_FLAG) p3 = position(st) - @test peek(st) ~ K"Integer" # 1 + @test peek(st) == K"Integer" # 1 bump(st) - @test peek(st) ~ K":" + @test peek(st) == K":" bump(st) # : - @test peek(st) ~ K"Integer" # 10 + @test peek(st) == K"Integer" # 10 bump(st) # 10 emit(st, p3, K"call", INFIX_FLAG) emit(st, p2, K"=") p4 = position(st) p5 = position(st) # [call] p6 = position(st) # [ref] - @test peek(st) ~ K"Identifier" # 'xx' + @test peek(st) == K"Identifier" # 'xx' bump(st) - @test peek(st) ~ K"[" + @test peek(st) == K"[" bump(st, TRIVIA_FLAG) - @test peek(st) ~ K"Identifier" # 'i' + @test peek(st) == K"Identifier" # 'i' bump(st) - @test peek(st) ~ K"]" + @test peek(st) == K"]" bump(st, TRIVIA_FLAG) emit(st, p6, K"ref") - @test peek(st) ~ K"+" + @test peek(st) == K"+" bump(st) - @test peek(st) ~ K"Integer" # 2 + @test peek(st) == K"Integer" # 2 bump(st) emit(st, p5, K"call", INFIX_FLAG) - @test peek(st) ~ K"Identifier" # 'yy' + @test peek(st) == K"Identifier" # 'yy' bump(st) emit(st, p4, K"block") bump(st, TRIVIA_FLAG) # end @@ -59,7 +59,7 @@ st = ParseStream(code) emit(st, p1, K"toplevel") end -t = JuliaSyntax.to_tree(st) +t = JuliaSyntax.to_raw_tree(st) # ## Input code println("-----------------------") diff --git a/JuliaSyntax/test/runtests.jl b/JuliaSyntax/test/runtests.jl index cdd3061c651fc..f20b88dceac78 100644 --- a/JuliaSyntax/test/runtests.jl +++ b/JuliaSyntax/test/runtests.jl @@ -2,8 +2,11 @@ using JuliaSyntax using Test using JuliaSyntax: SourceFile -using JuliaSyntax: RawSyntaxNode, SyntaxNode, raw_flags, TRIVIA_FLAG, INFIX_FLAG -using JuliaSyntax: Kind, @K_str, children, child, setchild! + +using JuliaSyntax: RawSyntaxNode, SyntaxNode, raw_flags, TRIVIA_FLAG, INFIX_FLAG, + children, child, setchild! + +using JuliaSyntax: Kind, @K_str, isliteral, iskeyword, isoperator using JuliaSyntax: highlight using JuliaSyntax: ParseStream, bump, peek, emit @@ -18,7 +21,7 @@ N(k, args::RawSyntaxNode...) = RawSyntaxNode(k, args...) NI(k, args::RawSyntaxNode...) = RawSyntaxNode(k, raw_flags(infix=true), args...) -#include("syntax_trees.jl") -#include("syntax_interpolation.jl") +include("syntax_trees.jl") +include("syntax_interpolation.jl") include("parse_stream.jl") - +include("simple_parser.jl") diff --git a/JuliaSyntax/test/simple_parser.jl b/JuliaSyntax/test/simple_parser.jl new file mode 100644 index 0000000000000..5262ae0ff8aa7 --- /dev/null +++ b/JuliaSyntax/test/simple_parser.jl @@ -0,0 +1,85 @@ +# Example parser for a very basic grammar +# +# This is simple but has some problems, most notably that expressions and terms +# aren't recursive so things like `a + b + c` can't be parsed! +# +# expression ::= +# term | term "+" term | term "-" term +# +# term ::= +# atom | atom "*" atom | atom "/" atom +# +# atom ::= +# literal | identifier | "(" expression ")" | "-" atom | "+" atom +# + +function parse_atom(st) + p = position(st) + k = peek(st) + if k == K"Identifier" || isliteral(k) + bump(st) + elseif k in (K"-", K"+") + bump(st) + parse_atom(st) + emit(st, p, K"call") + elseif k == K"(" + bump(st, TRIVIA_FLAG) + parse_expression(st) + if peek(st) == K")" + bump(st, TRIVIA_FLAG) + # emit(st, p, K"(") + else + emit(st, p, K"(", + error="Expected `)` following expression") + end + else + bump(st) + emit(st, p, K"Error", + error="Expected literal, identifier or opening parenthesis") + end +end + +function parse_term(st) + p = position(st) + parse_atom(st) + k = peek(st) + if k in (K"*", K"/") + bump(st) + parse_atom(st) + emit(st, p, K"call", INFIX_FLAG) + end +end + +function parse_expression(st) + p = position(st) + parse_term(st) + k = peek(st) + if k in (K"+", K"-") + bump(st) + parse_term(st) + emit(st, p, K"call", INFIX_FLAG) + end +end + +function parse_and_show(production::Function, code) + st = ParseStream(code) + production(st) + t = JuliaSyntax.to_raw_tree(st) + show(stdout, MIME"text/plain"(), t, code, show_trivia=true) + if !isempty(st.diagnostics) + println() + for d in st.diagnostics + JuliaSyntax.show_diagnostic(stdout, d, code) + end + end + t +end + +println() +println("Example diagnostics:") +parse_and_show(parse_expression, "(x + a*y) * (b") + +println() +println("Example good parse:") +parse_and_show(parse_expression, "(x + a*y) * b") +nothing From 57972442450cfb4485e6081f54eaa263b71040c3 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Wed, 8 Dec 2021 12:35:16 +1000 Subject: [PATCH 0224/1109] Crude tool for converting flisp defines to julia functions --- JuliaSyntax/tools/flisp_defines_to_julia.jl | 74 +++++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 JuliaSyntax/tools/flisp_defines_to_julia.jl diff --git a/JuliaSyntax/tools/flisp_defines_to_julia.jl b/JuliaSyntax/tools/flisp_defines_to_julia.jl new file mode 100644 index 0000000000000..59763feeb3b83 --- /dev/null +++ b/JuliaSyntax/tools/flisp_defines_to_julia.jl @@ -0,0 +1,74 @@ +function _replace(s, pairs::Pair...) + for p in pairs + s = replace(s, p) + end + return s +end + +# Convert flisp definitions and comments to psuedo-Julia to reflect the +# structure of the existing flisp parser. +# +# Surrounded with all this compiler technology, but still resorting to a pile +# of regexs? 😂😱 +function juliafy_flisp(fl_input, jl_output) + prev_newline = false + had_comment = false + for line in readlines(fl_input) + if occursin(r"^\(define *\(", line) + had_comment && println(jl_output, "#") + println(jl_output, "# flisp: $line") + m = match(r"\(define *\(([-a-zA-Z?_=0-9*><:!]+) *([^)]*)", replace(line, "-"=>"_")) + isnothing(m) && @error "no match for line" line + funcname = m[1] + funcname = _replace(funcname, + r"(.*)\?"=>s"is_\1", + "=" => "equals", + "*" => "_star", + ">" => "_gt", + "<" => "_lt", + ":" => "_", + ) + funcargs = _replace(m[2], + r" *\(" => ";", + r" +" => ", ", + "." => "_", + r"([-a-zA-Z?_=]+)\?" => s"is_\1", + r", *#t" => "=true", + r", *#f" => "=false", + ";" => "; ", + ) + if startswith(funcname, "parse_") + funcargs = "ps::ParseState, "*funcargs + end + text = """ + function $funcname($funcargs) + TODO("$funcname unimplemented") + end + """ + ex = Meta.parse(text, raise=false) + if Meta.isexpr(ex, :error) + @warn "Generated bad code" message=ex.args[1] code=Text(text) + end + print(jl_output, text) + prev_newline = false + had_comment = false + elseif occursin(r"^;;", line) + println(jl_output, replace(line, r"^;;" => "#")) + prev_newline = false + had_comment = true + elseif line == "" + if !prev_newline + println(jl_output) + end + prev_newline = true + had_comment = false + end + end +end + +open("/home/chris/dev/julia/src/julia-parser.scm", "r") do fl_input + open(joinpath(@__DIR__, "julia_parser_scm.jl"), "w") do jl_output + juliafy_flisp(fl_input, jl_output) + end +end + From 3a18cf5a9e6ab467bdb309a342952103bf946d45 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Wed, 8 Dec 2021 13:16:18 +1000 Subject: [PATCH 0225/1109] Add binding_power function Useful for a Pratt parser, if we go in that direction. --- JuliaSyntax/src/token_kinds.jl | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/JuliaSyntax/src/token_kinds.jl b/JuliaSyntax/src/token_kinds.jl index 1943ebb9065f8..731a9320ff51a 100644 --- a/JuliaSyntax/src/token_kinds.jl +++ b/JuliaSyntax/src/token_kinds.jl @@ -56,6 +56,30 @@ kind(raw::TzTokens.RawToken) = TzTokens.exactkind(raw) is_prec_assignment(tok) = K"BEGIN_ASSIGNMENTS" < kind(tok) < K"END_ASSIGNMENTS" +""" +Get the "binding power" (precedence level) of an operator kind +""" +function binding_power(k::Kind) + return k < K"END_ASSIGNMENTS" ? 1 : + k < K"END_CONDITIONAL" ? 2 : + k < K"END_ARROW" ? 3 : + k < K"END_LAZYOR" ? 4 : + k < K"END_LAZYAND" ? 5 : + k < K"END_COMPARISON" ? 6 : + k < K"END_PIPE" ? 7 : + k < K"END_COLON" ? 8 : + k < K"END_PLUS" ? 9 : + k < K"END_BITSHIFTS" ? 10 : + k < K"END_TIMES" ? 11 : + k < K"END_RATIONAL" ? 12 : + k < K"END_POWER" ? 13 : + k < K"END_DECL" ? 14 : + k < K"END_WHERE" ? 15 : + k < K"END_DOT" ? 16 : + k < K"END_OPS" ? 17 : # ?? unary ops + error("Not an operator") +end + function _kind_str(k::Kind) if k in (K"Identifier", K"VarIdentifier") "Identifier" From 55cc6d024b8fe1a903c32a0356f727481446d71c Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Wed, 8 Dec 2021 14:00:31 +1000 Subject: [PATCH 0226/1109] Musings on AST design --- JuliaSyntax/README.md | 63 +++++++++++++++++++++++++++++++++---------- 1 file changed, 49 insertions(+), 14 deletions(-) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index ad9a807d66b43..1035a41d47232 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -57,12 +57,15 @@ Let's tackle it by prototyping several important work flows: ## Tree design -Raw syntax tree (RST / "Green tree") +### Raw syntax tree / Green tree -We want RawSyntaxNode to be +Raw syntax tree (RST, or "Green tree" in the terminology from Roslyn) + +We want GreenNode to be * *structurally minimal* — For efficiency and generality * *immutable* — For efficiency (& thread safety?) * *complete* — To preserve parser knowledge +* *token agnostic* — To allow use with any source language ``` for i = 1:10 @@ -116,13 +119,49 @@ Call represents a challange for the AST vs RST in terms of node placement / iteration for infix operators vs normal prefix function calls. - The normal problem of `a + 1` vs `+(a, 1)` -- Or even worse, `a + 1 + 2` vs `+(a, 1, 2)` +- Or worse, `a + 1 + 2` vs `+(a, 1, 2)` Clearly in the AST's *interface* we need to abstract over this placement. For -example with something like the normal Julia AST. But in the RST we only need -to distinguish between infix and prefix. - +example with something like the normal Julia AST's iteration order. + +### Abstract syntax tree + +By pointing to green tree nodes, AST nodes become tracable back to the original +source. + +Unlike other languages, designing a new AST is tricky because the existing +`Expr` is a very public API used in every macro expansion. User-defined +macro expansions interpose between the source text and lowering, and using +`Expr` looses source information in many ways. + +There seems to be a few ways forward: +* Maybe we can give `Expr` some new semi-hidden fields to point back to the + green tree nodes that the `Expr` or its `args` list came from? +* We can use the existing `Expr` during macro expansion and try to recover + source information after macro expansion using heuristics. Likely the + presence of correct hygiene can help with this. +* Introducing a new AST would be possible if it were opt-in for new-style + macros only. Fixing hygiene should go along with this. Design challenge: How + do we make manipulating expressions reasonable when literals need to carry + source location? + +One option which may help bridge between locationless ASTs and something new +may be to have wrappers for the small number of literal types we need to cover. +For example: + +```julia +SourceSymbol <: AbstractSymbol +SourceInt <: Integer +SourceString <: AbstractString +``` +Having source location attached to symbols would potentially solve most of the +hygine problem. There's still the problem of macro helper functions which use +symbol literals; we can't very well be changing the meaning of `:x`! Perhaps +the trick there is to try capturing the current module at the location of the +interpolation syntax. Eg, if you do `:(y + $x)`, lowering expands this to +`Core._expr(:call, :+, :y, x)`, but it could expand it to something like +`Core._expr(:call, :+, :y, _add_source_symbol(_module_we_are_lowering_into, x))`? ## Fun research questions @@ -138,12 +177,6 @@ to distinguish between infix and prefix. * [Roslyn optimization overview](https://github.com/KirillOsenkov/Bliki/wiki/Roslyn-Immutable-Trees) * [Literate C# Usage Example](https://johtela.github.io/LiterateCS/LiterateCS/BlockBuilder.html) -## - -## Oil shell -* Andy Chu (the author of the OIL shell) has written some things about this - - Collected links about lossless syntax in [a wiki page](https://github.com/oilshell/oil/wiki/Lossless-Syntax-Tree-Pattern) - - A blog post [From AST to Lossless Syntax Tree](https://www.oilshell.org/blog/2017/02/11.html) ## Rust-analyzer @@ -192,6 +225,10 @@ Highlights: another flat stream of events." This seems great, let's adopt it! * TODO +## Oil shell +* Andy Chu (the author of the OIL shell) has written some things about this + - Collected links about lossless syntax in [a wiki page](https://github.com/oilshell/oil/wiki/Lossless-Syntax-Tree-Pattern) + - A blog post [From AST to Lossless Syntax Tree](https://www.oilshell.org/blog/2017/02/11.html) ## General resources about parsing @@ -203,5 +240,3 @@ Highlights: - [From Bob Nystrom (munificent - one of the Dart devs, etc](http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/) - Some discussion of error recovery -## `rust-analyzer` - From b5915554427700f1d81fa5b472e4a0570658834f Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Wed, 8 Dec 2021 14:01:08 +1000 Subject: [PATCH 0227/1109] Rename RawSyntaxNode -> GreenNode For some reason, I just find GreenNode to be a very memorable name. Even though the color comes arbitrarily from the Roslyn team's whiteboard markers... it seems this terminology has stuck. --- JuliaSyntax/src/parse_stream.jl | 8 +++--- JuliaSyntax/src/parser.jl | 14 +++++----- JuliaSyntax/src/syntax_tree.jl | 44 ++++++++++++++++---------------- JuliaSyntax/test/runtests.jl | 10 ++++---- JuliaSyntax/test/syntax_trees.jl | 2 +- 5 files changed, 39 insertions(+), 39 deletions(-) diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index ecb533b492313..fd89003677e8e 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -178,21 +178,21 @@ end #------------------------------------------------------------------------------- # Tree construction # -# Note that this is largely independent of RawSyntaxNode, and could easily be +# Note that this is largely independent of GreenNode, and could easily be # made completely independent with a tree builder interface. function _push_node!(stack, text_span::TextSpan, children=nothing) if isnothing(children) - node = RawSyntaxNode(kind(text_span), span(text_span), text_span.flags) + node = GreenNode(kind(text_span), span(text_span), text_span.flags) push!(stack, (text_span=text_span, node=node)) else - node = RawSyntaxNode(kind(text_span), span(text_span), text_span.flags, children) + node = GreenNode(kind(text_span), span(text_span), text_span.flags, children) push!(stack, (text_span=text_span, node=node)) end end function to_raw_tree(st) - stack = Vector{@NamedTuple{text_span::TextSpan,node::RawSyntaxNode}}() + stack = Vector{@NamedTuple{text_span::TextSpan,node::GreenNode}}() _push_node!(stack, st.spans[1]) for i = 2:length(st.spans) text_span = st.spans[i] diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index f8efb008d9e90..0aaa7c5ef0a16 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -66,7 +66,7 @@ end #= # Parse numbers, identifiers, parenthesized expressions, lists, vectors, etc. -function parse_atom(ps::ParseState; checked::Bool=true)::RawSyntaxNode +function parse_atom(ps::ParseState; checked::Bool=true)::GreenNode tok = require_token(ps) tok_kind = kind(tok) # TODO: Reorder these to put most likely tokens first @@ -75,7 +75,7 @@ function parse_atom(ps::ParseState; checked::Bool=true)::RawSyntaxNode next = peek_token(ps) if is_closing_token(ps, next) && (kind(next) != K"Keyword" || has_whitespace_prefix(next)) - return RawSyntaxNode(tok) + return GreenNode(tok) elseif has_whitespace_prefix(next) error("whitespace not allowed after \":\" used for quoting") elseif kind(next) == K"NewlineWs" @@ -83,7 +83,7 @@ function parse_atom(ps::ParseState; checked::Bool=true)::RawSyntaxNode else # Being inside quote makes `end` non-special again. issue #27690 ps1 = ParseState(ps, end_symbol=false) - return RawSyntaxNode(K"quote", parse_atom(ps1, checked=false)) + return GreenNode(K"quote", parse_atom(ps1, checked=false)) end elseif tok_kind == K"=" # misplaced = error("unexpected `=`") @@ -92,10 +92,10 @@ function parse_atom(ps::ParseState; checked::Bool=true)::RawSyntaxNode TODO("Checked identifier names") end take_token!(ps) - return RawSyntaxNode(tok) + return GreenNode(tok) elseif tok_kind == K"VarIdentifier" take_token!(ps) - return RawSyntaxNode(tok) + return GreenNode(tok) elseif tok_kind == K"(" # parens or tuple take_token!(ps) return parse_paren(ps, checked) @@ -111,7 +111,7 @@ function parse_atom(ps::ParseState; checked::Bool=true)::RawSyntaxNode # return Expr(:macrocall, Expr(:core, Symbol("@cmd")), elseif isliteral(tok_kind) take_token!(ps) - return RawSyntaxNode(tok) + return GreenNode(tok) elseif is_closing_token(tok) error("unexpected: $tok") else @@ -185,7 +185,7 @@ function parse_assignment(ps::ParseState, down) # ~ is the only non-syntactic assignment-precedence operator TODO("Turn ~ into a call node") else - RawSyntaxNode + GreenNode end end diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index eef0256905e3c..556c247aef060 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -29,11 +29,11 @@ Design alternatives to explore: in the serialization and discard the source text. (Caveat - unclear that this could deal with incremental parsing...) """ -struct RawSyntaxNode +struct GreenNode kind::Kind span::UInt32 flags::UInt32 - args::Union{Tuple{},Vector{RawSyntaxNode}} + args::Union{Tuple{},Vector{GreenNode}} end const _RawFlags = UInt32 @@ -49,33 +49,33 @@ function raw_flags(; trivia::Bool=false, infix::Bool=false) return flags::_RawFlags end -function RawSyntaxNode(kind::Kind, span::Int, flags::_RawFlags=EMPTY_FLAGS) - RawSyntaxNode(kind, span, flags, ()) +function GreenNode(kind::Kind, span::Int, flags::_RawFlags=EMPTY_FLAGS) + GreenNode(kind, span, flags, ()) end -function RawSyntaxNode(raw::TzTokens.RawToken) +function GreenNode(raw::TzTokens.RawToken) span = 1 + raw.endbyte - raw.startbyte - RawSyntaxNode(kind(raw), span, 0, FIXME) + GreenNode(kind(raw), span, 0, FIXME) end -function RawSyntaxNode(kind::Kind, flags::_RawFlags, args::RawSyntaxNode...) +function GreenNode(kind::Kind, flags::_RawFlags, args::GreenNode...) span = sum(x.span for x in args) - RawSyntaxNode(kind, span, flags, RawSyntaxNode[args...]) + GreenNode(kind, span, flags, GreenNode[args...]) end -function RawSyntaxNode(kind::Kind, args::RawSyntaxNode...) - RawSyntaxNode(kind, _RawFlags(0), args...) +function GreenNode(kind::Kind, args::GreenNode...) + GreenNode(kind, _RawFlags(0), args...) end # Acessors / predicates -haschildren(node::RawSyntaxNode) = !(node.args isa Tuple{}) -children(node::RawSyntaxNode) = node.args +haschildren(node::GreenNode) = !(node.args isa Tuple{}) +children(node::GreenNode) = node.args -istrivia(node::RawSyntaxNode) = node.flags & TRIVIA_FLAG != 0 -isinfix(node::RawSyntaxNode) = node.flags & INFIX_FLAG != 0 -iserror(node::RawSyntaxNode) = node.flags & ERROR_FLAG != 0 +istrivia(node::GreenNode) = node.flags & TRIVIA_FLAG != 0 +isinfix(node::GreenNode) = node.flags & INFIX_FLAG != 0 +iserror(node::GreenNode) = node.flags & ERROR_FLAG != 0 -kind(node::RawSyntaxNode) = node.kind +kind(node::GreenNode) = node.kind # Pretty printing function _show_raw_node(io, node, indent, pos, str, show_trivia) @@ -114,11 +114,11 @@ function _show_raw_node(io, node, indent, pos, str, show_trivia) end end -function Base.show(io::IO, ::MIME"text/plain", node::RawSyntaxNode) +function Base.show(io::IO, ::MIME"text/plain", node::GreenNode) _show_raw_node(io, node, "", 1, nothing, true) end -function Base.show(io::IO, ::MIME"text/plain", node::RawSyntaxNode, str::String; show_trivia=true) +function Base.show(io::IO, ::MIME"text/plain", node::GreenNode, str::String; show_trivia=true) _show_raw_node(io, node, "", 1, str, show_trivia) end @@ -132,14 +132,14 @@ Design options: """ mutable struct SyntaxNode source::SourceFile - raw::RawSyntaxNode + raw::GreenNode position::Int parent::Union{Nothing,SyntaxNode} head::Symbol val::Any end -function SyntaxNode(source::SourceFile, raw::RawSyntaxNode, position::Integer=1) +function SyntaxNode(source::SourceFile, raw::GreenNode, position::Integer=1) if !haschildren(raw) # Leaf node k = raw.kind @@ -286,7 +286,7 @@ end # # However... this analogy is only good for complete trees at a given depth (= # dimension). But the syntax is oh-so-handy! -function Base.getindex(node::Union{SyntaxNode,RawSyntaxNode}, path::Int...) +function Base.getindex(node::Union{SyntaxNode,GreenNode}, path::Int...) child(node, path...) end function Base.setindex!(node::SyntaxNode, x::SyntaxNode, path::Int...) @@ -296,7 +296,7 @@ end """ Get absolute position and span of the child of `node` at the given tree `path`. """ -function child_position_span(node::RawSyntaxNode, path::Int...) +function child_position_span(node::GreenNode, path::Int...) n = node p = 1 for index in path diff --git a/JuliaSyntax/test/runtests.jl b/JuliaSyntax/test/runtests.jl index f20b88dceac78..05afa697637f4 100644 --- a/JuliaSyntax/test/runtests.jl +++ b/JuliaSyntax/test/runtests.jl @@ -3,7 +3,7 @@ using Test using JuliaSyntax: SourceFile -using JuliaSyntax: RawSyntaxNode, SyntaxNode, raw_flags, TRIVIA_FLAG, INFIX_FLAG, +using JuliaSyntax: GreenNode, SyntaxNode, raw_flags, TRIVIA_FLAG, INFIX_FLAG, children, child, setchild! using JuliaSyntax: Kind, @K_str, isliteral, iskeyword, isoperator @@ -13,12 +13,12 @@ using JuliaSyntax: ParseStream, bump, peek, emit # Shortcuts for defining raw syntax nodes # Trivia nodes -T(k, s) = RawSyntaxNode(k, s, raw_flags(trivia=true)) +T(k, s) = GreenNode(k, s, raw_flags(trivia=true)) # Non-trivia nodes -N(k, s) = RawSyntaxNode(k, s) -N(k, args::RawSyntaxNode...) = RawSyntaxNode(k, args...) +N(k, s) = GreenNode(k, s) +N(k, args::GreenNode...) = GreenNode(k, args...) # Non-trivia, infix form -NI(k, args::RawSyntaxNode...) = RawSyntaxNode(k, raw_flags(infix=true), args...) +NI(k, args::GreenNode...) = GreenNode(k, raw_flags(infix=true), args...) include("syntax_trees.jl") diff --git a/JuliaSyntax/test/syntax_trees.jl b/JuliaSyntax/test/syntax_trees.jl index cb0f35f1d1ae4..ed57bca4d85a5 100644 --- a/JuliaSyntax/test/syntax_trees.jl +++ b/JuliaSyntax/test/syntax_trees.jl @@ -44,7 +44,7 @@ N(K"for", # And the following AST s = SyntaxNode(source, t) -println("\nRawSyntaxNode") +println("\nGreenNode") show(stdout, MIME"text/plain"(), t, code, show_trivia=true) println("\nSyntaxNode") From 44c055ea88d07e35f2bcd01cafda726fe0f0a29a Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Wed, 8 Dec 2021 14:37:11 +1000 Subject: [PATCH 0228/1109] Links to Rust's diagnostics system --- JuliaSyntax/README.md | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index 1035a41d47232..ac7629baa930a 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -5,15 +5,15 @@ Yet another Julia frontend, written in Julia. Goals: -* Parse Julia code with precise source mapping (concrete syntax trees) +* Parse Julia code with precise source mapping * Avoid worrying about how much work this will be 😅 Nice to have: * Speedy enough for interactive editing * Production quality error recovery and reporting * "Compilation as an API" to support all sorts of tooling +* Make the code easy to maintain in parallel with Julia's flisp frontend * Go further than parsing - macro expansion, syntax desugaring and scope analysis -* Code which is correct, fast and understandable ## Design @@ -225,10 +225,17 @@ Highlights: another flat stream of events." This seems great, let's adopt it! * TODO -## Oil shell -* Andy Chu (the author of the OIL shell) has written some things about this - - Collected links about lossless syntax in [a wiki page](https://github.com/oilshell/oil/wiki/Lossless-Syntax-Tree-Pattern) - - A blog post [From AST to Lossless Syntax Tree](https://www.oilshell.org/blog/2017/02/11.html) +## Diagnostics + +Rust is renowned for having great compiler diagnostics, so it's probably a good +place to get inspiration from. + +Some resources: +* [rustc_errors::Diagnostic](https://doc.rust-lang.org/stable/nightly-rustc/rustc_errors/struct.Diagnostic.html) +* The source of the Rust compiler's diagnostics system: + - The [`println!` macro](https://github.com/rust-lang/rust/blob/0b6f079e4987ded15c13a15b734e7cfb8176839f/compiler/rustc_builtin_macros/src/format.rs) + shows how these can be emitted from macros + - The parser's [diagnostics.rs](https://github.com/rust-lang/rust/blob/0b6f079e4987ded15c13a15b734e7cfb8176839f/compiler/rustc_parse/src/parser/diagnostics.rs) ## General resources about parsing From 008009a84ebd390fc741cd6e0e11aeb610fbee3d Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Thu, 9 Dec 2021 14:33:08 +1000 Subject: [PATCH 0229/1109] Split out green tree and make it language independent Language independence of the green tree allows us to use the low-level data structure to overlay the text of any formal language. For example, it would be neat to have an s-expression parser :-) It also allows for neater experimentation with the representation of the GreenNode's head. For example, do we want a 16-bit Kind? Do we want flags in there? How about memorizing the layout of the child nodes which the parser knew about? --- JuliaSyntax/src/JuliaSyntax.jl | 2 + JuliaSyntax/src/green_tree.jl | 115 +++++++++++++++++++++++++ JuliaSyntax/src/parse_stream.jl | 27 +++--- JuliaSyntax/src/syntax_tree.jl | 145 +++++++------------------------- JuliaSyntax/test/runtests.jl | 13 +-- 5 files changed, 168 insertions(+), 134 deletions(-) create mode 100644 JuliaSyntax/src/green_tree.jl diff --git a/JuliaSyntax/src/JuliaSyntax.jl b/JuliaSyntax/src/JuliaSyntax.jl index c9dd4e7c6ea70..6239cd268b486 100644 --- a/JuliaSyntax/src/JuliaSyntax.jl +++ b/JuliaSyntax/src/JuliaSyntax.jl @@ -8,6 +8,8 @@ include("utils.jl") include("source_files.jl") +include("green_tree.jl") + include("token_kinds.jl") include("syntax_tree.jl") diff --git a/JuliaSyntax/src/green_tree.jl b/JuliaSyntax/src/green_tree.jl new file mode 100644 index 0000000000000..e6bb4a8ba6043 --- /dev/null +++ b/JuliaSyntax/src/green_tree.jl @@ -0,0 +1,115 @@ +""" + GreenNode(head, span) + GreenNode(head, children...) + +A "green tree" is a lossless syntax tree which overlays all the source text and +where + +* Nodes cover a contiguous span of bytes in the text +* Node children are ordered in the same order as the text +* Nodes are immutable and don't know their absolute position, so can be cached + and reused + +As implementation choices, we choose that: + +* Nodes are homogenously typed at the language level so they can be stored + concretely, with the `head` defining the node type. Normally this would + include a "syntax kind" enumeration, but it can also include flags and record + information the parser knew about the layout of the child nodes. +* For simplicity and uniformity, leaf nodes cover a single token in the source. + This is like rust-analyzer, but different from Roslyn where leaves can + include syntax trivia. + +Design principles: +* Tree should remember what the lexer and parser knew about the source code +* Be position-independent so nodes can be interned and reused +* Be a low level textural overlay which is language independent. + +Design alternatives to explore: +* Maybe allow some loss of local parser state if it can be derived again + quickly? Particularly in the ordering of children. +* Store strings for tokens? (Surprisingly, rust-analyzer does this. It could be + efficient if the strings or nodes are interned for the parsing session?) +* Never construct this tree? Instead serialize it to Vector{UInt8} in an + efficient but compact format? Could this be more flexible with storing parser + state and beat the interning approach? We could also store the source tokens + in the serialization and discard the source text. (Caveat - unclear that this + could deal with incremental parsing...) +""" +struct GreenNode{Head} + head::Head + span::UInt32 + args::Union{Tuple{},Vector{GreenNode{Head}}} +end + +function GreenNode(head::Head, span::Integer) where {Head} + GreenNode{Head}(head, span, ()) +end + +function GreenNode(head::Head, span::Integer, args::Vector{GreenNode{Head}}) where {Head} + GreenNode{Head}(head, span, args) +end + +function GreenNode(head::Head, args::GreenNode{Head}...) where {Head} + span = sum(x.span for x in args) + GreenNode{Head}(head, span, GreenNode{Head}[args...]) +end + + +# Accessors / predicates +haschildren(node::GreenNode) = !(node.args isa Tuple{}) +children(node::GreenNode) = node.args +span(node::GreenNode) = node.span +head(node::GreenNode) = node.head + +# Predicates +istrivia(node::GreenNode) = istrivia(node.head) +iserror(node::GreenNode) = iserror(node.head) + +Base.summary(node::GreenNode) = summary(node.head) + +# Pretty printing +function _show_green_node(io, node, indent, pos, str, show_trivia) + if !show_trivia && istrivia(node) + return + end + posstr = "$(lpad(pos, 6)):$(rpad(pos+span(node)-1, 6)) │" + is_leaf = !haschildren(node) + if is_leaf + line = string(posstr, indent, summary(node)) + else + line = string(posstr, indent, '[', summary(node), "]") + end + if !istrivia(node) && is_leaf + line = rpad(line, 40) * "✔" + end + if iserror(node) + line = rpad(line, 41) * "✘" + end + if is_leaf && !isnothing(str) + line = string(rpad(line, 43), ' ', repr(str[pos:pos + span(node) - 1])) + end + line = line*"\n" + if iserror(node) + printstyled(io, line, color=:light_red) + else + print(io, line) + end + if !is_leaf + new_indent = indent*" " + p = pos + for x in children(node) + _show_green_node(io, x, new_indent, p, str, show_trivia) + p += x.span + end + end +end + +function Base.show(io::IO, ::MIME"text/plain", node::GreenNode) + _show_green_node(io, node, "", 1, nothing, true) +end + +function Base.show(io::IO, ::MIME"text/plain", node::GreenNode, str::String; show_trivia=true) + _show_green_node(io, node, "", 1, str, show_trivia) +end + diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index fd89003677e8e..ca916dd7355c2 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -36,20 +36,21 @@ Base.:(~)(k::Kind, tok::SyntaxToken) = kind(tok) == k #------------------------------------------------------------------------------- struct TextSpan - kind::Kind - flags::_RawFlags + head::SyntaxHead first_byte::Int last_byte::Int end -function TextSpan(raw::RawToken, flags::_RawFlags) - TextSpan(raw.kind, flags, raw.startbyte + 1, raw.endbyte + 1) +function TextSpan(raw::RawToken, flags::RawFlags) + TextSpan(SyntaxHead(raw.kind, flags), raw.startbyte + 1, raw.endbyte + 1) end -kind(span::TextSpan) = span.kind -first_byte(span::TextSpan) = span.first_byte -last_byte(span::TextSpan) = span.last_byte -span(span::TextSpan) = last_byte(span) - first_byte(span) + 1 +head(text_span::TextSpan) = text_span.head +kind(text_span::TextSpan) = kind(text_span.head) +flags(text_span::TextSpan) = flags(text_span.head) +first_byte(text_span::TextSpan) = text_span.first_byte +last_byte(text_span::TextSpan) = text_span.last_byte +span(text_span::TextSpan) = last_byte(text_span) - first_byte(text_span) + 1 struct Diagnostic text_span::TextSpan @@ -143,7 +144,7 @@ function bump(stream::ParseStream, flags=EMPTY_FLAGS) trivia_span = popfirst!(stream.lookahead_trivia) push!(stream.spans, trivia_span) end - span = TextSpan(kind(tok), flags, first_byte(tok), last_byte(tok)) + span = TextSpan(SyntaxHead(kind(tok), flags), first_byte(tok), last_byte(tok)) push!(stream.spans, span) stream.next_byte = last_byte(tok) + 1 nothing @@ -162,11 +163,11 @@ The `start_position` of the span should be a previous return value of `position()`. """ function emit(stream::ParseStream, start_position::Integer, kind::Kind, - flags::_RawFlags = EMPTY_FLAGS; error=nothing) + flags::RawFlags = EMPTY_FLAGS; error=nothing) if !isnothing(error) flags |= ERROR_FLAG end - text_span = TextSpan(kind, flags, start_position, stream.next_byte-1) + text_span = TextSpan(SyntaxHead(kind, flags), start_position, stream.next_byte-1) if !isnothing(error) push!(stream.diagnostics, Diagnostic(text_span, error)) end @@ -183,10 +184,10 @@ end function _push_node!(stack, text_span::TextSpan, children=nothing) if isnothing(children) - node = GreenNode(kind(text_span), span(text_span), text_span.flags) + node = GreenNode(head(text_span), span(text_span)) push!(stack, (text_span=text_span, node=node)) else - node = GreenNode(kind(text_span), span(text_span), text_span.flags, children) + node = GreenNode(head(text_span), span(text_span), children) push!(stack, (text_span=text_span, node=node)) end end diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index 556c247aef060..32b9943966399 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -1,126 +1,39 @@ #------------------------------------------------------------------------------- # Syntax tree types -# Desired rules of lossless syntax trees: -# -# * Every source byte is covered by the tree -# * The children (including trivia) cover the full span of the parent -# * Children occur in source order -# -# Additionally -# * Nodes should be position-independent so that reparsing doesn't disturb them, -# and so that it's possible to pool and reuse them (especially leaf nodes!) - -""" -The rawest version of a lossless syntax tree. - -Design principles: -* Tree should remember what the lexer and parser knew about the source code -* Be position-independent so nodes can be interned and reused - -Design alternatives to explore: -* Maybe allow some loss of local parser state if it can be derived again - quickly? Particularly in the ordering of children. -* Store strings for tokens? (Surprisingly, rust-analyzer does this. It could be - efficient if the strings or nodes are interned for the parsing session?) -* Never construct this tree? Instead serialize it to Vector{UInt8} in an - efficient but compact format? Could this be more flexible with storing parser - state and beat the interning approach? We could also store the source tokens - in the serialization and discard the source text. (Caveat - unclear that this - could deal with incremental parsing...) -""" -struct GreenNode - kind::Kind - span::UInt32 - flags::UInt32 - args::Union{Tuple{},Vector{GreenNode}} -end +#------------------------------------------------------------------------------- -const _RawFlags = UInt32 +const RawFlags = UInt32 EMPTY_FLAGS = 0x00000000 TRIVIA_FLAG = 0x00000001 INFIX_FLAG = 0x00000002 ERROR_FLAG = 0x80000000 -function raw_flags(; trivia::Bool=false, infix::Bool=false) - flags = _RawFlags(0) - trivia && (flags |= TRIVIA_FLAG) - infix && (flags |= INFIX_FLAG) - return flags::_RawFlags -end - -function GreenNode(kind::Kind, span::Int, flags::_RawFlags=EMPTY_FLAGS) - GreenNode(kind, span, flags, ()) +struct SyntaxHead + kind::Kind + flags::RawFlags end -function GreenNode(raw::TzTokens.RawToken) - span = 1 + raw.endbyte - raw.startbyte - GreenNode(kind(raw), span, 0, FIXME) -end +kind(head::SyntaxHead) = head.kind +flags(head::SyntaxHead) = head.flags -function GreenNode(kind::Kind, flags::_RawFlags, args::GreenNode...) - span = sum(x.span for x in args) - GreenNode(kind, span, flags, GreenNode[args...]) +function Base.summary(head::SyntaxHead) + _kind_str(kind(head)) end -function GreenNode(kind::Kind, args::GreenNode...) - GreenNode(kind, _RawFlags(0), args...) -end - -# Acessors / predicates -haschildren(node::GreenNode) = !(node.args isa Tuple{}) -children(node::GreenNode) = node.args - -istrivia(node::GreenNode) = node.flags & TRIVIA_FLAG != 0 -isinfix(node::GreenNode) = node.flags & INFIX_FLAG != 0 -iserror(node::GreenNode) = node.flags & ERROR_FLAG != 0 - -kind(node::GreenNode) = node.kind - -# Pretty printing -function _show_raw_node(io, node, indent, pos, str, show_trivia) - if !show_trivia && istrivia(node) - return - end - posstr = "$(lpad(pos, 6)):$(rpad(pos+node.span-1, 6)) │" - is_leaf = !haschildren(node) - if is_leaf - line = string(posstr, indent, _kind_str(node.kind)) - else - line = string(posstr, indent, '[', _kind_str(node.kind), "]") - end - if !istrivia(node) && is_leaf - line = rpad(line, 40) * "✔" - end - if iserror(node) - line = rpad(line, 41) * "✘" - end - if is_leaf && !isnothing(str) - line = string(rpad(line, 43), ' ', repr(str[pos:pos + node.span - 1])) - end - line = line*"\n" - if iserror(node) - printstyled(io, line, color=:light_red) - else - print(io, line) - end - if !is_leaf - new_indent = indent*" " - p = pos - for a in node.args - _show_raw_node(io, a, new_indent, p, str, show_trivia) - p += a.span - end - end +function raw_flags(; trivia::Bool=false, infix::Bool=false) + flags = RawFlags(0) + trivia && (flags |= TRIVIA_FLAG) + infix && (flags |= INFIX_FLAG) + return flags::RawFlags end -function Base.show(io::IO, ::MIME"text/plain", node::GreenNode) - _show_raw_node(io, node, "", 1, nothing, true) -end +kind(node::GreenNode{SyntaxHead}) = head(node).kind +flags(node::GreenNode{SyntaxHead}) = head(node).flags -function Base.show(io::IO, ::MIME"text/plain", node::GreenNode, str::String; show_trivia=true) - _show_raw_node(io, node, "", 1, str, show_trivia) -end +istrivia(node::GreenNode{SyntaxHead}) = flags(node) & TRIVIA_FLAG != 0 +isinfix(node::GreenNode{SyntaxHead}) = flags(node) & INFIX_FLAG != 0 +iserror(node::GreenNode{SyntaxHead}) = flags(node) & ERROR_FLAG != 0 #------------------------------------------------------------------------------- # AST interface, built on top of raw tree @@ -132,18 +45,18 @@ Design options: """ mutable struct SyntaxNode source::SourceFile - raw::GreenNode + raw::GreenNode{SyntaxHead} position::Int parent::Union{Nothing,SyntaxNode} head::Symbol val::Any end -function SyntaxNode(source::SourceFile, raw::GreenNode, position::Integer=1) +function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, position::Integer=1) if !haschildren(raw) # Leaf node - k = raw.kind - val_range = position:position + raw.span - 1 + k = kind(raw) + val_range = position:position + span(raw) - 1 val_str = source[val_range] # Here we parse the values eagerly rather than representing them as # strings. Maybe this is good. Maybe not. @@ -152,7 +65,7 @@ function SyntaxNode(source::SourceFile, raw::GreenNode, position::Integer=1) elseif k == K"Identifier" val = Symbol(val_str) elseif k == K"String" - val = unescape_string(source[position+1:position+raw.span-2]) + val = unescape_string(source[position+1:position+span(raw)-2]) elseif isoperator(k) val = Symbol(val_str) else @@ -160,7 +73,7 @@ function SyntaxNode(source::SourceFile, raw::GreenNode, position::Integer=1) end return SyntaxNode(source, raw, position, nothing, :leaf, val) else - k = raw.kind + k = kind(raw) head = k == K"call" ? :call : k == K"toplevel" ? :toplevel : k == K"block" ? :block : @@ -200,11 +113,13 @@ end haschildren(node::SyntaxNode) = node.head !== :leaf children(node::SyntaxNode) = haschildren(node) ? node.val::Vector{SyntaxNode} : () +span(node::SyntaxNode) = span(node.raw) + function _show_syntax_node(io, current_filename, node, indent) fname = node.source.filename #@info "" fname print_fname current_filename[] line, col = source_location(node.source, node.position) - posstr = "$(lpad(line, 4)):$(rpad(col,3))│$(lpad(node.position,6)):$(rpad(node.position+node.raw.span,6))│" + posstr = "$(lpad(line, 4)):$(rpad(col,3))│$(lpad(node.position,6)):$(rpad(node.position+span(node),6))│" nodestr = !haschildren(node) ? repr(node.val) : "[$(_kind_str(kind(node.raw)))]" @@ -302,7 +217,7 @@ function child_position_span(node::GreenNode, path::Int...) for index in path cs = children(n) for i = 1:index-1 - p += cs[i].span + p += span(cs[i]) end n = cs[index] end @@ -311,7 +226,7 @@ end function child_position_span(node::SyntaxNode, path::Int...) n = child(node, path...) - n, n.position, n.raw.span + n, n.position, span(n) end """ diff --git a/JuliaSyntax/test/runtests.jl b/JuliaSyntax/test/runtests.jl index 05afa697637f4..cdc15ad370e2b 100644 --- a/JuliaSyntax/test/runtests.jl +++ b/JuliaSyntax/test/runtests.jl @@ -3,8 +3,9 @@ using Test using JuliaSyntax: SourceFile -using JuliaSyntax: GreenNode, SyntaxNode, raw_flags, TRIVIA_FLAG, INFIX_FLAG, - children, child, setchild! +using JuliaSyntax: GreenNode, SyntaxNode, + raw_flags, TRIVIA_FLAG, INFIX_FLAG, + children, child, setchild!, SyntaxHead using JuliaSyntax: Kind, @K_str, isliteral, iskeyword, isoperator using JuliaSyntax: highlight @@ -13,12 +14,12 @@ using JuliaSyntax: ParseStream, bump, peek, emit # Shortcuts for defining raw syntax nodes # Trivia nodes -T(k, s) = GreenNode(k, s, raw_flags(trivia=true)) +T(k, s) = GreenNode(SyntaxHead(k, raw_flags(trivia=true)), s, ) # Non-trivia nodes -N(k, s) = GreenNode(k, s) -N(k, args::GreenNode...) = GreenNode(k, args...) +N(k, s) = GreenNode(SyntaxHead(k, raw_flags()), s) +N(k, args::GreenNode...) = GreenNode(SyntaxHead(k, raw_flags()), args...) # Non-trivia, infix form -NI(k, args::GreenNode...) = GreenNode(k, raw_flags(infix=true), args...) +NI(k, args::GreenNode...) = GreenNode(SyntaxHead(k, raw_flags(infix=true)), args...) include("syntax_trees.jl") From f13d45ab492deb44f91f8286759c21254411a75a Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Thu, 9 Dec 2021 14:50:42 +1000 Subject: [PATCH 0230/1109] Move ParseState out of parser.jl This reserves parser.jl for the definition of the parsing functions only, which should be somewhat clearer than mixing it with functions which act on the parser's supporting data structures. --- JuliaSyntax/src/parse_stream.jl | 68 ++++++++++++++++++++++++++++----- JuliaSyntax/src/parser.jl | 47 ----------------------- 2 files changed, 58 insertions(+), 57 deletions(-) diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index ca916dd7355c2..a19db14f8586f 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -57,6 +57,17 @@ struct Diagnostic message::String end +function show_diagnostic(io::IO, diagnostic::Diagnostic, code) + printstyled(io, "Error: ", color=:light_red) + print(io, diagnostic.message, ":\n") + p = first_byte(diagnostic.text_span) + q = last_byte(diagnostic.text_span) + print(io, code[1:p-1]) + _printstyled(io, code[p:q]; color=(100,40,40)) + print(io, code[q+1:end], '\n') +end + +#------------------------------------------------------------------------------- """ ParseStream provides an IO interface for the parser. It - Wraps the lexer from Tokenize.jl with a short lookahead buffer @@ -176,8 +187,7 @@ function emit(stream::ParseStream, start_position::Integer, kind::Kind, end -#------------------------------------------------------------------------------- -# Tree construction +# Tree construction from the list of text spans held by ParseStream # # Note that this is largely independent of GreenNode, and could easily be # made completely independent with a tree builder interface. @@ -220,12 +230,50 @@ function to_raw_tree(st) return only(stack).node end -function show_diagnostic(io::IO, diagnostic, code) - printstyled(io, "Error: ", color=:light_red) - print(io, diagnostic.message, ":\n") - p = first_byte(diagnostic.text_span) - q = last_byte(diagnostic.text_span) - print(io, code[1:p-1]) - _printstyled(io, code[p:q]; color=(100,40,40)) - print(io, code[q+1:end], '\n') + +#------------------------------------------------------------------------------- +""" +ParseState carries parser context as we recursively descend into the parse +tree. For example, normally `x -y` means `(x) - (y)`, but when parsing matrix +literals we're in `space_sensitive` mode, and `[x -y]` means [(x) (-y)]. +""" +struct ParseState + stream::ParseStream + # Vesion of Julia we're parsing this code for. May be different from VERSION! + julia_version::VersionNumber + + # Disable range colon for parsing ternary conditional operator + range_colon_enabled::Bool + # In space-sensitive mode "x -y" is 2 expressions, not a subtraction + space_sensitive::Bool + # Seeing `for` stops parsing macro arguments and makes a generator + for_generator::Bool + # Treat 'end' like a normal symbol instead of a reserved word + end_symbol::Bool + # Treat newline like ordinary whitespace instead of as a potential separator + whitespace_newline::Bool + # Enable parsing `where` with high precedence + where_enabled::Bool +end + +# Normal context +function ParseState(stream::ParseStream; julia_version=VERSION) + ParseState(stream, julia_version, true, false, true, false, false, false) end + +function ParseState(ps::ParseState; range_colon_enabled=nothing, + space_sensitive=nothing, for_generator=nothing, + end_symbol=nothing, whitespace_newline=nothing, + where_enabled=nothing) + ParseState(ps.stream, ps.julia_version, + range_colon_enabled === nothing ? ps.range_colon_enabled : range_colon_enabled, + space_sensitive === nothing ? ps.space_sensitive : space_sensitive, + for_generator === nothing ? ps.for_generator : for_generator, + end_symbol === nothing ? ps.end_symbol : end_symbol, + whitespace_newline === nothing ? ps.whitespace_newline : whitespace_newline, + where_enabled === nothing ? ps.where_enabled : where_enabled) +end + +peek(ps::ParseState, args...) = peek(ps.stream, args...) +bump(ps::ParseState, args...) = bump(ps.stream, args...) +emit(ps::ParseState, args...) = emit(ps.stream, args...) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 0aaa7c5ef0a16..06a6a966490c5 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -1,49 +1,3 @@ -""" -ParseState carries parser context as we recursively descend into the parse -tree. For example, normally `x -y` means `(x) - (y)`, but when parsing matrix -literals we're in `space_sensitive` mode, and `[x -y]` means [(x) (-y)]. -""" -struct ParseState - stream::ParseStream - # Vesion of Julia we're parsing this code for. May be different from VERSION! - julia_version::VersionNumber - - # Disable range colon for parsing ternary conditional operator - range_colon_enabled::Bool - # In space-sensitive mode "x -y" is 2 expressions, not a subtraction - space_sensitive::Bool - # Seeing `for` stops parsing macro arguments and makes a generator - for_generator::Bool - # Treat 'end' like a normal symbol instead of a reserved word - end_symbol::Bool - # Treat newline like ordinary whitespace instead of as a potential separator - whitespace_newline::Bool - # Enable parsing `where` with high precedence - where_enabled::Bool -end - -# Normal context -function ParseState(stream::ParseStream; julia_version=VERSION) - ParseState(stream, julia_version, true, false, true, false, false, false) -end - -function ParseState(ps::ParseState; range_colon_enabled=nothing, - space_sensitive=nothing, for_generator=nothing, - end_symbol=nothing, whitespace_newline=nothing, - where_enabled=nothing) - ParseState(ps.stream, ps.julia_version, - range_colon_enabled === nothing ? ps.range_colon_enabled : range_colon_enabled, - space_sensitive === nothing ? ps.space_sensitive : space_sensitive, - for_generator === nothing ? ps.for_generator : for_generator, - end_symbol === nothing ? ps.end_symbol : end_symbol, - whitespace_newline === nothing ? ps.whitespace_newline : whitespace_newline, - where_enabled === nothing ? ps.where_enabled : where_enabled) -end - -peek(ps::ParseState, args...) = peek(ps.stream, args...) -bump(ps::ParseState, args...) = bump(ps.stream, args...) -emit(ps::ParseState, args...) = emit(ps.stream, args...) - #------------------------------------------------------------------------------- # Parser @@ -195,4 +149,3 @@ function parse(code) stream = ParseStream(code) parse_statements(stream) end - From 72ebdb7df373de2faa2176e17550cdd9b31a5bdd Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Thu, 9 Dec 2021 16:16:41 +1000 Subject: [PATCH 0231/1109] Copy over high level structure of flisp-based parser These function definitions and comments were automatically reformatted into Julia code with the help of the script in tools/flisp_defines_to_julia.jl with very light editing. The idea is to try mirroring the structure and naming of the flisp-based parser as closely as possible so that * The code is mutually understandable * The edge cases of Julia syntax are easy to capture by reusing the same recursive parser. Perhaps this won't actually mesh well with the need to produce a lossless syntax tree, but let's try it. --- JuliaSyntax/src/parser.jl | 748 +++++++++++++++++++- JuliaSyntax/tools/flisp_defines_to_julia.jl | 6 +- 2 files changed, 740 insertions(+), 14 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 06a6a966490c5..b593b94bef935 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -1,5 +1,11 @@ #------------------------------------------------------------------------------- -# Parser + +# Parser Utils + +function TODO(str) + error("TODO: $str") +end + function is_closing_token(ps::ParseState, tok) k = kind(tok) @@ -12,10 +18,735 @@ function has_whitespace_prefix(tok::SyntaxToken) tok.had_whitespace end -function TODO(str) - error("TODO: $str") +#------------------------------------------------------------------------------- +# Parser +# +# The definitions and top-level comments here were automatically generated to +# match the structure of Julia's official flisp-based parser. +# +# This is to make both codebases mutually understandable and make porting +# changes simple. + + +# parse left-to-right binary operator +# produces structures like (+ (+ (+ 2 3) 4) 5) +# +# flisp: (define-macro (parse-LtoR s down ops) +function parse_LtoR(ps::ParseState, down, ops) +end + +# parse right-to-left binary operator +# produces structures like (= a (= b (= c d))) +# (define-macro (parse-RtoL s down ops syntactic self) +# flisp: +function parse_RtoL(ps::ParseState, down, ops) +end + +# flisp: (define (line-number-node s) +function line_number_node(s) + TODO("line_number_node unimplemented") +end + +# parse a@b@c@... as (@ a b c ...) for some operator @ +# ops: operators to look for +# head: the expression head to yield in the result, e.g. "a;b" => (block a b) +# closer?: predicate to identify tokens that stop parsing +# however, this doesn't consume the closing token, just looks at it +# ow, my eyes!! +# +# flisp: (define (parse-Nary s down ops head closer? add-linenums) +function parse_Nary(ps::ParseState, down, ops, head, is_closer, add_linenums) + TODO("parse_Nary unimplemented") +end + +# the principal non-terminals follow, in increasing precedence order + +# flisp: (define (parse-block s (down parse-eq)) +function parse_block(ps::ParseState; down, parse_eq) + TODO("parse_block unimplemented") +end + +# ";" at the top level produces a sequence of top level expressions +# +# flisp: (define (parse-stmts s) +function parse_stmts(ps::ParseState) + TODO("parse_stmts unimplemented") +end + +# flisp: (define (parse-eq s) (parse-assignment s parse-comma)) +function parse_eq(ps::ParseState) + TODO("parse_eq unimplemented") +end + +# symbol tokens that do not simply parse to themselves when appearing alone as +# an element of an argument list + +# parse-eq* is used where commas are special, for example in an argument list +# +# flisp: (define (parse-eq* s) +function parse_eq_star(ps::ParseState) + TODO("parse_eq_star unimplemented") +end + +# flisp: (define (eventually-call? ex) +function is_eventually_call(ex) + TODO("is_eventually_call unimplemented") +end + +# flisp: (define (add-line-number blk linenode) +function add_line_number(blk, linenode) + TODO("add_line_number unimplemented") +end + +# flisp: (define (short-form-function-loc ex lno) +function short_form_function_loc(ex, lno) + TODO("short_form_function_loc unimplemented") +end + +# flisp: (define (parse-assignment s down) +function parse_assignment(ps::ParseState, down) + TODO("parse_assignment unimplemented") +end + +# parse-comma is needed for commas outside parens, for example a = b,c +# +# flisp: (define (parse-comma s) +function parse_comma(ps::ParseState) + TODO("parse_comma unimplemented") +end + +# flisp: (define (parse-pair s) (parse-RtoL s parse-cond is-prec-pair? #f parse-pair)) +function parse_pair(ps::ParseState) + TODO("parse_pair unimplemented") +end + +# flisp: (define (parse-cond s) +function parse_cond(ps::ParseState) + TODO("parse_cond unimplemented") +end + +# flisp: (define (parse-arrow s) (parse-RtoL s parse-or is-prec-arrow? (eq? t '-->) parse-arrow)) +function parse_arrow(ps::ParseState) + TODO("parse_arrow unimplemented") +end +# flisp: (define (parse-or s) (parse-RtoL s parse-and is-prec-lazy-or? #t parse-or)) +function parse_or(ps::ParseState) + TODO("parse_or unimplemented") +end +# flisp: (define (parse-and s) (parse-RtoL s parse-comparison is-prec-lazy-and? #t parse-and)) +function parse_and(ps::ParseState) + TODO("parse_and unimplemented") +end + +# flisp: (define (parse-comparison s) +function parse_comparison(ps::ParseState) + TODO("parse_comparison unimplemented") +end + +# flisp: (define (parse-pipe< s) (parse-RtoL s parse-pipe> is-prec-pipe s) (parse-LtoR s parse-range is-prec-pipe>?)) +function parse_pipe_gt(ps::ParseState) + TODO("parse_pipe_gt unimplemented") +end + +# parse ranges and postfix ... +# colon is strange; 3 arguments with 2 colons yields one call: +# 1:2 => (call : 1 2) +# 1:2:3 => (call : 1 2 3) +# +# flisp: (define (parse-range s) +function parse_range(ps::ParseState) + TODO("parse_range unimplemented") +end + +# parse left to right chains of a certain binary operator +# returns a list of arguments +# +# flisp: (define (parse-chain s down op) +function parse_chain(ps::ParseState, down, op) + TODO("parse_chain unimplemented") +end + +# parse left to right, combining chains of a certain operator into 1 call +# e.g. a+b+c => (call + a b c) +# +# flisp: (define (parse-with-chains s down ops chain-ops) +function parse_with_chains(ps::ParseState, down, ops, chain_ops) + TODO("parse_with_chains unimplemented") +end + +# flisp: (define (parse-expr s) (parse-with-chains s parse-term is-prec-plus? '(+ ++))) +function parse_expr(ps::ParseState) + TODO("parse_expr unimplemented") +end +# flisp: (define (parse-term s) (parse-with-chains s parse-rational is-prec-times? '(*))) +function parse_term(ps::ParseState) + TODO("parse_term unimplemented") +end +# flisp: (define (parse-rational s) (parse-LtoR s parse-shift is-prec-rational?)) +function parse_rational(ps::ParseState) + TODO("parse_rational unimplemented") +end +# flisp: (define (parse-shift s) (parse-LtoR s parse-unary-subtype is-prec-bitshift?)) +function parse_shift(ps::ParseState) + TODO("parse_shift unimplemented") +end + +# parse `<: A where B` as `<: (A where B)` (issue #21545) +# +# flisp: (define (parse-unary-subtype s) +function parse_unary_subtype(ps::ParseState) + TODO("parse_unary_subtype unimplemented") +end + +# flisp: (define (parse-where-chain s first) +function parse_where_chain(ps::ParseState, first) + TODO("parse_where_chain unimplemented") +end + +# flisp: (define (parse-where s down) +function parse_where(ps::ParseState, down) + TODO("parse_where unimplemented") +end + +# given an expression and the next token, is there a juxtaposition +# operator between them? +# +# flisp: (define (juxtapose? s expr t) +function is_juxtapose(s, expr, t) + TODO("is_juxtapose unimplemented") +end + +# flisp: (define (parse-juxtapose s) +function parse_juxtapose(ps::ParseState) + TODO("parse_juxtapose unimplemented") +end + +# flisp: (define (maybe-negate op num) +function maybe_negate(op, num) + TODO("maybe_negate unimplemented") +end + +# operators handled by parse-unary at the start of an expression + +# flisp: (define (parse-unary s) +function parse_unary(ps::ParseState) + TODO("parse_unary unimplemented") +end + +# flisp: (define (fix-syntactic-unary e) +function fix_syntactic_unary(e) + TODO("fix_syntactic_unary unimplemented") +end + +# flisp: (define (parse-unary-call s op un spc) +function parse_unary_call(ps::ParseState, op, un, spc) + TODO("parse_unary_call unimplemented") +end + +# handle ^ and .^ +# -2^3 is parsed as -(2^3), so call parse-decl for the first argument, +# and parse-unary from then on (to handle 2^-3) +# +# flisp: (define (parse-factor s) +function parse_factor(ps::ParseState) + TODO("parse_factor unimplemented") +end + +# flisp: (define (parse-factor-with-initial-ex s ex0 (tok #f)) +function parse_factor_with_initial_ex(ps::ParseState, ex0; tok=false) + TODO("parse_factor_with_initial_ex unimplemented") +end + +# flisp: (define (parse-factor-after s) (parse-RtoL s parse-juxtapose is-prec-power? #f parse-factor-after)) +function parse_factor_after(ps::ParseState) + TODO("parse_factor_after unimplemented") +end + +# flisp: (define (parse-decl s) +function parse_decl(ps::ParseState) + TODO("parse_decl unimplemented") +end + +# flisp: (define (parse-decl-with-initial-ex s ex) +function parse_decl_with_initial_ex(ps::ParseState, ex) + TODO("parse_decl_with_initial_ex unimplemented") +end + +# parse function call, indexing, dot, and transpose expressions +# also handles looking for syntactic reserved words +# +# flisp: (define (parse-call s) +function parse_call(ps::ParseState) + TODO("parse_call unimplemented") +end + +# flisp: (define (parse-call-with-initial-ex s ex tok) +function parse_call_with_initial_ex(ps::ParseState, ex, tok) + TODO("parse_call_with_initial_ex unimplemented") +end + +# flisp: (define (parse-unary-prefix s) +function parse_unary_prefix(ps::ParseState) + TODO("parse_unary_prefix unimplemented") +end + +# flisp: (define (parse-def s is-func anon) +function parse_def(ps::ParseState, is_func, anon) + TODO("parse_def unimplemented") +end + +# flisp: (define (disallowed-space-error lno ex t) +function disallowed_space_error(lno, ex, t) + TODO("disallowed_space_error unimplemented") +end + +# flisp: (define (disallow-space s ex t) +function disallow_space(s, ex, t) + TODO("disallow_space unimplemented") +end + +# string macro suffix for given delimiter t +# +# flisp: (define (macsuffix t) +function macsuffix(t) + TODO("macsuffix unimplemented") +end + +# flisp: (define (parse-call-chain s ex macrocall?) +function parse_call_chain(ps::ParseState, ex, is_macrocall) + TODO("parse_call_chain unimplemented") +end + +# flisp: (define (expect-end s word) +function expect_end(s, word) + TODO("expect_end unimplemented") +end + +# flisp: (define (expect-end-error t word) +function expect_end_error(t, word) + TODO("expect_end_error unimplemented") +end + +# flisp: (define (parse-subtype-spec s) +function parse_subtype_spec(ps::ParseState) + TODO("parse_subtype_spec unimplemented") +end + +# flisp: (define (valid-func-sig? paren sig) +function is_valid_func_sig(paren, sig) + TODO("is_valid_func_sig unimplemented") +end + +# flisp: (define (valid-1arg-func-sig? sig) +function is_valid_1arg_func_sig(sig) + TODO("is_valid_1arg_func_sig unimplemented") +end + +# flisp: (define (unwrap-where x) +function unwrap_where(x) + TODO("unwrap_where unimplemented") +end + +# flisp: (define (rewrap-where x w) +function rewrap_where(x, w) + TODO("rewrap_where unimplemented") +end + +# flisp: (define (parse-struct-def s mut? word) +function parse_struct_def(ps::ParseState, is_mut, word) + TODO("parse_struct_def unimplemented") +end + +# consume any number of line endings from a token stream +# +# flisp: (define (take-lineendings s) +function take_lineendings(s) + TODO("take_lineendings unimplemented") +end + +# parse expressions or blocks introduced by syntactic reserved words +# +# flisp: (define (parse-resword s word) +function parse_resword(ps::ParseState, word) + TODO("parse_resword unimplemented") +end + +# flisp: (define (parse-do s) +function parse_do(ps::ParseState) + TODO("parse_do unimplemented") +end + +# flisp: (define (macrocall-to-atsym e) +function macrocall_to_atsym(e) + TODO("macrocall_to_atsym unimplemented") +end + +# flisp: (define (parse-imports s word) +function parse_imports(ps::ParseState, word) + TODO("parse_imports unimplemented") +end + +# flisp: (define (parse-macro-name s) +function parse_macro_name(ps::ParseState) + TODO("parse_macro_name unimplemented") +end + +# flisp: (define (parse-atsym s) +function parse_atsym(ps::ParseState) + TODO("parse_atsym unimplemented") +end + +# flisp: (define (parse-import-dots s) +function parse_import_dots(ps::ParseState) + TODO("parse_import_dots unimplemented") +end + +# flisp: (define (parse-import-path s word) +function parse_import_path(ps::ParseState, word) + TODO("parse_import_path unimplemented") +end + +# flisp: (define (parse-import s word from) +function parse_import(ps::ParseState, word, from) + TODO("parse_import unimplemented") +end + +# parse comma-separated assignments, like "i=1:n,j=1:m,..." +# +# flisp: (define (parse-comma-separated s what) +function parse_comma_separated(ps::ParseState, what) + TODO("parse_comma_separated unimplemented") +end + +# flisp: (define (parse-comma-separated-assignments s) +function parse_comma_separated_assignments(ps::ParseState) + TODO("parse_comma_separated_assignments unimplemented") end +# as above, but allows both "i=r" and "i in r" +# +# flisp: (define (parse-iteration-spec s) +function parse_iteration_spec(ps::ParseState) + TODO("parse_iteration_spec unimplemented") +end + +# flisp: (define (parse-comma-separated-iters s) +function parse_comma_separated_iters(ps::ParseState) + TODO("parse_comma_separated_iters unimplemented") +end + +# flisp: (define (parse-space-separated-exprs s) +function parse_space_separated_exprs(ps::ParseState) + TODO("parse_space_separated_exprs unimplemented") +end + +# flisp: (define (has-parameters? lst) +function is_has_parameters(lst) + TODO("is_has_parameters unimplemented") +end + +# flisp: (define (to-kws lst) +function to_kws(lst) + TODO("to_kws unimplemented") +end + +# like parse-arglist, but with `for` parsed as a generator +# +# flisp: (define (parse-call-arglist s closer) +function parse_call_arglist(ps::ParseState, closer) + TODO("parse_call_arglist unimplemented") +end + +# handle function call argument list, or any comma-delimited list. +# . an extra comma at the end is allowed +# . expressions after a ; are enclosed in (parameters ...) +# . an expression followed by ... becomes (... x) +# +# flisp: (define (parse-arglist s closer (add-linenums #f)) +function parse_arglist(ps::ParseState, closer; add_linenums=false) + TODO("parse_arglist unimplemented") +end + +# flisp: (define (parse-vect s first closer) +function parse_vect(ps::ParseState, first, closer) + TODO("parse_vect unimplemented") +end + +# flisp: (define (parse-generator s first) +function parse_generator(ps::ParseState, first) + TODO("parse_generator unimplemented") +end + +# flisp: (define (parse-comprehension s first closer) +function parse_comprehension(ps::ParseState, first, closer) + TODO("parse_comprehension unimplemented") +end + +# flisp: (define (parse-array s first closer gotnewline last-end-symbol) +function parse_array(ps::ParseState, first, closer, gotnewline, last_end_symbol) + TODO("parse_array unimplemented") +end + +# flisp: (define (expect-space-before s t) +function expect_space_before(s, t) + TODO("expect_space_before unimplemented") +end + +# flisp: (define (parse-cat s closer last-end-symbol) +function parse_cat(ps::ParseState, closer, last_end_symbol) + TODO("parse_cat unimplemented") +end + +# flisp: (define (kw-to-= e) (if (kwarg? e) (cons '= (cdr e)) e)) +function kw_to_equals(e) + TODO("kw_to_equals unimplemented") +end +# flisp: (define (=-to-kw e) (if (assignment? e) (cons 'kw (cdr e)) e)) +function equals_to_kw(e) + TODO("equals_to_kw unimplemented") +end + +# translate nested (parameters ...) expressions to a statement block if possible +# this allows us to first parse tuples using parse-arglist +# +# flisp: (define (parameters-to-block e) +function parameters_to_block(e) + TODO("parameters_to_block unimplemented") +end + +# flisp: (define (rm-linenums e) +function rm_linenums(e) + TODO("rm_linenums unimplemented") +end + +# convert an arglist to a tuple or block expr +# leading-semi? means we saw (; ...) +# comma? means there was a comma after the first expression +# +# flisp: (define (arglist-to-tuple s leading-semi? comma? args . first) +function arglist_to_tuple(s, is_leading_semi, is_comma, args, _, first) + TODO("arglist_to_tuple unimplemented") +end + +# flisp: (define (tuple-to-arglist e) +function tuple_to_arglist(e) + TODO("tuple_to_arglist unimplemented") +end + +# flisp: (define (parse-paren s (checked #t)) (car (parse-paren- s checked))) +function parse_paren(ps::ParseState; checked=true) + TODO("parse_paren unimplemented") +end + +# return (expr . arglist) where arglist is #t iff this isn't just a parenthesized expr +# +# flisp: (define (parse-paren- s checked) +function parse_paren_(ps::ParseState, checked) + TODO("parse_paren_ unimplemented") +end + +# flisp: (define (not-eof-for delim c) +function not_eof_for(delim, c) + TODO("not_eof_for unimplemented") +end + +# flisp: (define (take-char p) +function take_char(p) + TODO("take_char unimplemented") +end + +# map the first element of lst +# +# flisp: (define (map-first f lst) +function map_first(f, lst) + TODO("map_first unimplemented") +end + +# map the elements of lst where (pred index) is true +# e.g., (map-at odd? (lambda (x) 0) '(a b c d)) -> '(a 0 c 0) +# +# flisp: (define (map-at pred f lst) +function map_at(pred, f, lst) + TODO("map_at unimplemented") +end + +# flisp: (define (parse-raw-literal s delim) +function parse_raw_literal(ps::ParseState, delim) + TODO("parse_raw_literal unimplemented") +end + +# flisp: (define (unescape-parsed-string-literal strs) +function unescape_parsed_string_literal(strs) + TODO("unescape_parsed_string_literal unimplemented") +end + +# flisp: (define (strip-escaped-newline s raw) +function strip_escaped_newline(s, raw) + TODO("strip_escaped_newline unimplemented") +end + +# remove `\` followed by a newline +# +# flisp: (define (strip-escaped-newline- s) +function strip_escaped_newline_(s) + TODO("strip_escaped_newline_ unimplemented") +end + +# flisp: (define (parse-string-literal s delim raw) +function parse_string_literal(ps::ParseState, delim, raw) + TODO("parse_string_literal unimplemented") +end + +# flisp: (define (strip-leading-newline s) +function strip_leading_newline(s) + TODO("strip_leading_newline unimplemented") +end + +# flisp: (define (dedent-triplequoted-string lst) +function dedent_triplequoted_string(lst) + TODO("dedent_triplequoted_string unimplemented") +end + +# flisp: (define (triplequoted-string-indentation lst) +function triplequoted_string_indentation(lst) + TODO("triplequoted_string_indentation unimplemented") +end + +# flisp: (define (triplequoted-string-indentation- s) +function triplequoted_string_indentation_(s) + TODO("triplequoted_string_indentation_ unimplemented") +end + +# return the longest common prefix of the elements of l +# e.g., (longest-common-prefix ((1 2) (1 4))) -> (1) +# +# flisp: (define (longest-common-prefix l) +function longest_common_prefix(l) + TODO("longest_common_prefix unimplemented") +end + +# return the longest common prefix of lists a & b +# +# flisp: (define (longest-common-prefix2 a b) +function longest_common_prefix2(a, b) + TODO("longest_common_prefix2 unimplemented") +end + +# flisp: (define (longest-common-prefix2- a b p) +function longest_common_prefix2_(a, b, p) + TODO("longest_common_prefix2_ unimplemented") +end + +# flisp: (define (string-split s sep) +function string_split(s, sep) + TODO("string_split unimplemented") +end + +# flisp: (define (string-split- s sep start splits) +function string_split_(s, sep, start, splits) + TODO("string_split_ unimplemented") +end + +# replace all occurrences of a in s with b +# +# flisp: (define (string-replace s a b) +function string_replace(s, a, b) + TODO("string_replace unimplemented") +end + +# flisp: (define (ends-interpolated-atom? c) +function is_ends_interpolated_atom(c) + TODO("is_ends_interpolated_atom unimplemented") +end + +# flisp: (define (parse-interpolate s) +function parse_interpolate(ps::ParseState) + TODO("parse_interpolate unimplemented") +end + +# raw = raw string literal +# when raw is #t, unescape only \\ and delimiter +# otherwise do full unescaping, and parse interpolations too +# +# flisp: (define (parse-string-literal- n p s delim raw) +function parse_string_literal_(n, p, s, delim, raw) + TODO("parse_string_literal_ unimplemented") +end + +# flisp: (define (not-eof-1 c) +function not_eof_1(c) + TODO("not_eof_1 unimplemented") +end + +# flisp: (define (unescape-string s) +function unescape_string_(s) + TODO("unescape_string_ unimplemented") +end + +# parse numbers, identifiers, parenthesized expressions, lists, vectors, etc. +# +# flisp: (define (parse-atom s (checked #t)) +function parse_atom(ps::ParseState; checked=true) + TODO("parse_atom unimplemented") +end + +# flisp: (define (valid-modref? e) +function is_valid_modref(e) + TODO("is_valid_modref unimplemented") +end + +# flisp: (define (macroify-name e . suffixes) +function macroify_name(e, _, suffixes) + TODO("macroify_name unimplemented") +end + +# flisp: (define (macroify-call s call startloc) +function macroify_call(s, call, startloc) + TODO("macroify_call unimplemented") +end + +# flisp: (define (called-macro-name e) +function called_macro_name(e) + TODO("called_macro_name unimplemented") +end + +# flisp: (define (maybe-docstring s e) +function maybe_docstring(s, e) + TODO("maybe_docstring unimplemented") +end + +# flisp: (define (simple-string-literal? e) (string? e)) +function is_simple_string_literal(e) + TODO("is_simple_string_literal unimplemented") +end + +# flisp: (define (doc-string-literal? s e) +function is_doc_string_literal(s, e) + TODO("is_doc_string_literal unimplemented") +end + +# flisp: (define (parse-docstring s production) +function parse_docstring(ps::ParseState, production) + TODO("parse_docstring unimplemented") +end + +# --- main entry point --- + +# can optionally specify which grammar production to parse. +# default is parse-stmts. +# +# flisp: (define (julia-parse s . production) +function julia_parse(s, _, production) + TODO("julia_parse unimplemented") +end + +#------------------------------------------------------------------------------- +#------------------------------------------------------------------------------- + #= @@ -104,14 +835,6 @@ function parse_cat(ps0::ParseState, opening_tok, closer, last_end_symbol::Bool) end end -=# - -#------------------------------------------------------------------------------- - -# the principal non-terminals follow, in increasing precedence order - -#function parse_block(ps::ParseState, down=parse_eq) -#end # flisp: parse-stmts # `;` at the top level produces a sequence of top level expressions @@ -149,3 +872,6 @@ function parse(code) stream = ParseStream(code) parse_statements(stream) end + +=# + diff --git a/JuliaSyntax/tools/flisp_defines_to_julia.jl b/JuliaSyntax/tools/flisp_defines_to_julia.jl index 59763feeb3b83..a8e45dc23da2a 100644 --- a/JuliaSyntax/tools/flisp_defines_to_julia.jl +++ b/JuliaSyntax/tools/flisp_defines_to_julia.jl @@ -38,7 +38,7 @@ function juliafy_flisp(fl_input, jl_output) ";" => "; ", ) if startswith(funcname, "parse_") - funcargs = "ps::ParseState, "*funcargs + funcargs = replace(funcargs, r"^ *s\b"=>"ps::ParseState") end text = """ function $funcname($funcargs) @@ -52,8 +52,8 @@ function juliafy_flisp(fl_input, jl_output) print(jl_output, text) prev_newline = false had_comment = false - elseif occursin(r"^;;", line) - println(jl_output, replace(line, r"^;;" => "#")) + elseif occursin(r"^;", line) + println(jl_output, replace(line, r"^;+" => "#")) prev_newline = false had_comment = true elseif line == "" From d0b84f902065d8e7332581525dafe20df6232371 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Fri, 10 Dec 2021 01:14:56 +1000 Subject: [PATCH 0232/1109] Start porting flisp parser Parsing of various of the simpler constructs * assignment * ranges * pairs * arrows * comparison chains * pipes and various other operators with simple rules --- JuliaSyntax/src/green_tree.jl | 2 +- JuliaSyntax/src/parse_stream.jl | 65 ++++++- JuliaSyntax/src/parser.jl | 307 +++++++++++++++++++++++++++----- JuliaSyntax/src/token_kinds.jl | 129 +++++++++----- 4 files changed, 407 insertions(+), 96 deletions(-) diff --git a/JuliaSyntax/src/green_tree.jl b/JuliaSyntax/src/green_tree.jl index e6bb4a8ba6043..403efa27f9d6b 100644 --- a/JuliaSyntax/src/green_tree.jl +++ b/JuliaSyntax/src/green_tree.jl @@ -87,7 +87,7 @@ function _show_green_node(io, node, indent, pos, str, show_trivia) line = rpad(line, 41) * "✘" end if is_leaf && !isnothing(str) - line = string(rpad(line, 43), ' ', repr(str[pos:pos + span(node) - 1])) + line = string(rpad(line, 43), ' ', repr(str[pos:prevind(str, pos + span(node))])) end line = line*"\n" if iserror(node) diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index a19db14f8586f..ee56e6b104d4b 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -157,8 +157,36 @@ function bump(stream::ParseStream, flags=EMPTY_FLAGS) end span = TextSpan(SyntaxHead(kind(tok), flags), first_byte(tok), last_byte(tok)) push!(stream.spans, span) + mark = lastindex(stream.spans) stream.next_byte = last_byte(tok) + 1 - nothing + mark +end + +""" +Hack: Reset flags of an existing token in the output stream + +This is necessary on some occasions when we don't know whether a token will +have TRIVIA_FLAG set until. +""" +function set_flags!(stream::ParseStream, mark, flags) + text_span = stream.spans[mark] + stream.spans[mark] = TextSpan(SyntaxHead(kind(text_span), flags), + first_byte(text_span), last_byte(text_span)) +end + +#= +function accept(stream::ParseStream, k::Kind) + if peek(stream) != k + return false + else + bump(stream, TRIVIA_FLAG) + end +end +=# + +function bump(stream::ParseStream, k::Kind, flags=EMPTY_FLAGS) + @assert peek(stream) == k + bump(stream, flags) end function Base.position(stream::ParseStream) @@ -166,19 +194,19 @@ function Base.position(stream::ParseStream) end """ - emit(stream, start_position, kind [, flags = EMPTY_FLAGS]) + emit(stream, start_mark, kind, flags = EMPTY_FLAGS; error=nothing) Emit a new text span into the output which covers source bytes from -`start_position` to the end of the most recent token which was `bump()`'ed. -The `start_position` of the span should be a previous return value of +`start_mark` to the end of the most recent token which was `bump()`'ed. +The `start_mark` of the span should be a previous return value of `position()`. """ -function emit(stream::ParseStream, start_position::Integer, kind::Kind, +function emit(stream::ParseStream, start_mark::Integer, kind::Kind, flags::RawFlags = EMPTY_FLAGS; error=nothing) if !isnothing(error) flags |= ERROR_FLAG end - text_span = TextSpan(SyntaxHead(kind, flags), start_position, stream.next_byte-1) + text_span = TextSpan(SyntaxHead(kind, flags), start_mark, stream.next_byte-1) if !isnothing(error) push!(stream.diagnostics, Diagnostic(text_span, error)) end @@ -186,6 +214,16 @@ function emit(stream::ParseStream, start_position::Integer, kind::Kind, return nothing end +""" +Emit a diagnostic at the position of the next token +""" +function emit_diagnostic(stream::ParseStream; error) + byte = first_byte(peek_token(stream)) + # It's a bit weird to require supplying a SyntaxHead here... + text_span = TextSpan(SyntaxHead(K"Error", EMPTY_FLAGS), byte, byte) + push!(stream.diagnostics, Diagnostic(text_span, error)) +end + # Tree construction from the list of text spans held by ParseStream # @@ -230,6 +268,11 @@ function to_raw_tree(st) return only(stack).node end +function show_diagnostics(io::IO, stream::ParseStream, code) + for d in stream.diagnostics + show_diagnostic(io, d, code) + end +end #------------------------------------------------------------------------------- """ @@ -274,6 +317,10 @@ function ParseState(ps::ParseState; range_colon_enabled=nothing, where_enabled === nothing ? ps.where_enabled : where_enabled) end -peek(ps::ParseState, args...) = peek(ps.stream, args...) -bump(ps::ParseState, args...) = bump(ps.stream, args...) -emit(ps::ParseState, args...) = emit(ps.stream, args...) +peek(ps::ParseState, args...) = peek(ps.stream, args...) +peek_token(ps::ParseState, args...) = peek_token(ps.stream, args...) +bump(ps::ParseState, args...) = bump(ps.stream, args...) +set_flags!(ps::ParseState, args...) = set_flags!(ps.stream, args...) +Base.position(ps::ParseState, args...) = position(ps.stream, args...) +emit(ps::ParseState, args...; kws...) = emit(ps.stream, args...; kws...) +emit_diagnostic(ps::ParseState, args...; kws...) = emit_diagnostic(ps.stream, args...; kws...) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index b593b94bef935..c4b21d8d270bb 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -6,6 +6,14 @@ function TODO(str) error("TODO: $str") end +# Placeholder - bump an identifier in place of a production we haven't +# implemented yet. +function bumpTODO(ps::ParseState) + if peek(ps) != K"Identifier" + error("bump ident - TODO") + end + bump(ps) +end function is_closing_token(ps::ParseState, tok) k = kind(tok) @@ -32,14 +40,35 @@ end # produces structures like (+ (+ (+ 2 3) 4) 5) # # flisp: (define-macro (parse-LtoR s down ops) -function parse_LtoR(ps::ParseState, down, ops) +function parse_LtoR(ps::ParseState, down, is_op) + mark = position(ps) + down(ps) + while is_op(peek(ps)) + bump(ps) + down(ps) + emit(ps, mark, K"call", INFIX_FLAG) + end end # parse right-to-left binary operator # produces structures like (= a (= b (= c d))) # (define-macro (parse-RtoL s down ops syntactic self) # flisp: -function parse_RtoL(ps::ParseState, down, ops) +function parse_RtoL(ps::ParseState, down, is_op, syntactic, self) + mark = position(ps) + down(ps) + k = peek(ps) + if is_op(k) + if (syntactic isa Bool && syntactic) || syntactic(k) + bump(ps, TRIVIA_FLAG) + self(ps) + emit(ps, mark, k) + else + bump(ps) + self(ps) + emit(ps, mark, K"call", INFIX_FLAG) + end + end end # flisp: (define (line-number-node s) @@ -105,94 +134,307 @@ end # flisp: (define (parse-assignment s down) function parse_assignment(ps::ParseState, down) - TODO("parse_assignment unimplemented") + mark = position(ps) + down(ps) + k = peek(ps) + if !is_prec_assignment(k) + return + end + if k == K"~" + bump(ps) + if ps.space_sensitive # && ... + # Prefix operator ~x ? + TODO("parse_assignment... ~ not implemented") + else + parse_assignment(ps, down) + # ~ is the only non-syntactic assignment-precedence operator. + emit(ps, mark, K"call", INFIX_FLAG) + end + else + bump(ps, TRIVIA_FLAG) + parse_assignment(ps, down) + emit(ps, mark, k) + end end # parse-comma is needed for commas outside parens, for example a = b,c # # flisp: (define (parse-comma s) function parse_comma(ps::ParseState) - TODO("parse_comma unimplemented") + mark = position(ps) + n_commas = 0 + parse_pair(ps) + first = true + while true + if peek(ps) != K"," + if !first || n_commas > 0 + # FIXME: is use of n_commas correct here? flisp comments say: + # () => (tuple) + # (ex2 ex1) => (tuple ex1 ex2) + # (ex1,) => (tuple ex1) + emit(ps, mark, K"tuple") + end + return + end + first = false + bump(ps, K",", TRIVIA_FLAG) + n_commas += 1 + if peek(ps) == K"=" + # Test: + # x, = ... + continue + end + parse_pair(ps) + end end # flisp: (define (parse-pair s) (parse-RtoL s parse-cond is-prec-pair? #f parse-pair)) function parse_pair(ps::ParseState) - TODO("parse_pair unimplemented") + parse_RtoL(ps, parse_cond, is_prec_pair, false, parse_pair) end # flisp: (define (parse-cond s) function parse_cond(ps::ParseState) - TODO("parse_cond unimplemented") + bumpTODO(ps) + #= + mark = position(ps) + parse_arrow(ps) + t = peek_token(ps) + if kind(t) != K"?" + return + end + flags = EMPTY_FLAGS + if !t.had_whitespace + emit_diagnostic(ps, error="space required before `?` operator") + flags |= ERROR_FLAG + end + bump(ps, TRIVIA_FLAG) + f + + Tricky whitespace-newline! + =# end +# Parse arrows +# x → y ==> (call-i x → y) +# x <--> y ==> (call-i x <--> y) +# x --> y ==> (x --> y) # The only syntactic arrow +# # flisp: (define (parse-arrow s) (parse-RtoL s parse-or is-prec-arrow? (eq? t '-->) parse-arrow)) function parse_arrow(ps::ParseState) - TODO("parse_arrow unimplemented") + parse_RtoL(ps, parse_or, is_prec_arrow, ==(K"-->"), parse_arrow) end + +# x || y || z ==> (call-i x || (call-i y || z)) +# # flisp: (define (parse-or s) (parse-RtoL s parse-and is-prec-lazy-or? #t parse-or)) function parse_or(ps::ParseState) - TODO("parse_or unimplemented") + parse_RtoL(ps, parse_and, is_prec_lazy_or, true, parse_or) end + # flisp: (define (parse-and s) (parse-RtoL s parse-comparison is-prec-lazy-and? #t parse-and)) function parse_and(ps::ParseState) - TODO("parse_and unimplemented") + parse_RtoL(ps, parse_comparison, is_prec_lazy_and, true, parse_and) end +# Parse comparison chains like +# x > y ==> (call-i > x y) +# x < y < z ==> (comparison x < y < z) +# x == y < z ==> (comparison x == y < z) +# # flisp: (define (parse-comparison s) function parse_comparison(ps::ParseState) - TODO("parse_comparison unimplemented") + mark = position(ps) + parse_pipe_lt(ps) + n_comparisons = 0 + op_pos = 0 + initial_kind = peek(ps) + while is_prec_comparison(peek(ps)) + n_comparisons += 1 + op_pos = bump(ps) + parse_pipe_lt(ps) + end + if n_comparisons == 1 + if initial_kind in (K"<:", K">:") + # Type comparisons are syntactic and have their kind encoded in the head + # x <: y ==> (<: x y) + # x >: y ==> (>: x y) + set_flags!(ps, op_pos, TRIVIA_FLAG) + emit(ps, mark, initial_kind) + else + emit(ps, mark, K"call", INFIX_FLAG) + end + elseif n_comparisons > 1 + emit(ps, mark, K"comparison", INFIX_FLAG) + end end +# x |> y |> z ==> ((x |> y) |> z) # flisp: (define (parse-pipe< s) (parse-RtoL s parse-pipe> is-prec-pipe (x <| (y <| z)) # flisp: (define (parse-pipe> s) (parse-LtoR s parse-range is-prec-pipe>?)) function parse_pipe_gt(ps::ParseState) - TODO("parse_pipe_gt unimplemented") + parse_LtoR(ps, parse_range, is_prec_pipe_gt) end # parse ranges and postfix ... # colon is strange; 3 arguments with 2 colons yields one call: -# 1:2 => (call : 1 2) -# 1:2:3 => (call : 1 2 3) +# 1:2 ==> (call-i 1 : 2) +# 1:2:3 ==> (call-i 1 : 2 3) +# Chaining gives +# a:b:c:d:e ==> (call-i (call-i a : b c) : d e) # # flisp: (define (parse-range s) function parse_range(ps::ParseState) - TODO("parse_range unimplemented") + mark = position(ps) + parse_expr(ps) + initial_kind = peek(ps) + if initial_kind != K":" && is_prec_colon(initial_kind) + # a..b ==> (call-i a .. b) + # a … b ==> (call-i a … b) + bump(ps) + parse_expr(ps) + emit(ps, mark, K"call", INFIX_FLAG) + elseif initial_kind == K":" && ps.range_colon_enabled + # a ? b : c ==> (if a b c) + # a ? b : c:d ==> (if a b (call-i c : d)) + n_colons = 0 + while peek(ps) == K":" + if ps.space_sensitive && + peek_token(ps).had_whitespace && + !peek_token(ps, 2).had_whitespace + # Tricky cases in space sensitive mode + # [1 :a] ==> (vcat 1 (quote a)) + # [1 2:3 :a] ==> (vcat 1 (call-i 2 : 3) (quote a)) + break + end + n_colons += 1 + bump(ps, n_colons == 1 ? EMPTY_FLAGS : TRIVIA_FLAG) + t2 = peek_token(ps) + if is_closing_token(ps, kind(t2)) + # 1: } ==> (call-i-e 1 :) + # 1:2: } ==> (call-i-e 1 : 2) + emit(ps, mark, K"call", INFIX_FLAG, + error="missing last argument in range expression") + emit_diagnostic(ps, error="found unexpected closing token") + return + end + if t2.had_newline + # Error message for people coming from python + # === + # 1: + # 2 + # ==> + # (call-i-e 1 :) + # === + emit(ps, mark, K"call", INFIX_FLAG|ERROR_FLAG) + emit_diagnostic(ps, error="line break after `:` in range expression") + return + elseif kind(t2) in (K"<", K">") && !t2.had_whitespace + # :> and :< are not operators + ks = untokenize(kind(t2)) + emit_diagnostic(ps, error="Invalid `:$ks` found - did you mean `$ks:`?") + end + parse_expr(ps) + if n_colons == 2 + emit(ps, mark, K"call", INFIX_FLAG) + n_colons = 0 + end + end + if n_colons > 0 + emit(ps, mark, K"call", INFIX_FLAG) + end + end + + # x... ==> (... x) + # x:y... ==> (... (call-i x : y)) + # x..y... ==> (... (call-i x .. y)) # flisp parser fails here + if peek(ps) == K"..." + bump(ps, TRIVIA_FLAG) + emit(ps, mark, K"...") + end end # parse left to right chains of a certain binary operator -# returns a list of arguments # # flisp: (define (parse-chain s down op) -function parse_chain(ps::ParseState, down, op) - TODO("parse_chain unimplemented") +function parse_chain(ps::ParseState, down, op_kind) + mark = position(ps) + down(ps) + first = true + while (t = peek_token(ps); kind(t) == op_kind) + if ps.space_sensitive && t.had_whitespace && + is_both_unary_and_binary(kind(t)) && + !peek_token(ps, 2).had_whitespace + # [x +y] ==> (hcat x (call + y)) + break + end + bump(ps, first ? EMPTY_FLAGS : TRIVIA_FLAG) + first = false + down(ps) + end + emit(ps, mark, K"call", INFIX_FLAG) end # parse left to right, combining chains of a certain operator into 1 call -# e.g. a+b+c => (call + a b c) +# a + b + c ==> (call-i a + b c) # # flisp: (define (parse-with-chains s down ops chain-ops) -function parse_with_chains(ps::ParseState, down, ops, chain_ops) - TODO("parse_with_chains unimplemented") +function parse_with_chains(ps::ParseState, down, is_op, chain_ops) + TODO("parse_with_chains") + #= + mark = position(ps) + down(ps) + chain_op = K"Nothing" + while (t = peek_token(ps); is_op(kind(t))) + if ps.space_sensitive && t.had_whitespace && + is_both_unary_and_binary(kind(t)) && + !peek_token(ps, 2).had_whitespace + # [x+y +z] ==> (hcat (call-i x y) (call + z)) + break + end + op_is_trivia = false + if chain_op != K"Nothing" + if kind(t) != chain_op + # Finish the chain + emit(ps, mark, K"call", INFIX_FLAG) + chain_op = K"Nothing" + else + op_is_trivia = true + end + end + if chain_op == K"Nothing" && kind(t) in chain_ops + chain_op = kind(t) + bump(ps, op_is_trivia ? TRIVIA_FLAG : EMPTY_FLAGS) + down(ps) + if !(kind(t) in chain_ops) + end + end + =# end # flisp: (define (parse-expr s) (parse-with-chains s parse-term is-prec-plus? '(+ ++))) function parse_expr(ps::ParseState) - TODO("parse_expr unimplemented") + bumpTODO(ps) end + # flisp: (define (parse-term s) (parse-with-chains s parse-rational is-prec-times? '(*))) function parse_term(ps::ParseState) TODO("parse_term unimplemented") end + # flisp: (define (parse-rational s) (parse-LtoR s parse-shift is-prec-rational?)) function parse_rational(ps::ParseState) - TODO("parse_rational unimplemented") + parse_LtoR(ps, parse_shift, is_prec_rational) end + # flisp: (define (parse-shift s) (parse-LtoR s parse-unary-subtype is-prec-bitshift?)) function parse_shift(ps::ParseState) - TODO("parse_shift unimplemented") + parse_LtoR(ps, parse_unary_subtype, is_prec_bitshift) end # parse `<: A where B` as `<: (A where B)` (issue #21545) @@ -263,7 +505,7 @@ end # flisp: (define (parse-factor-after s) (parse-RtoL s parse-juxtapose is-prec-power? #f parse-factor-after)) function parse_factor_after(ps::ParseState) - TODO("parse_factor_after unimplemented") + parse_RtoL(ps, parse_juxtapose, is_prec_power, false, parse_factor_after) end # flisp: (define (parse-decl s) @@ -851,21 +1093,6 @@ end # parse_eq_2 is used where commas are special, for example in an argument list # function parse_eq_2 -function parse_assignment(ps::ParseState, down) - ex = down(ps) - t = peek_token(ps) - if !is_prec_assignment(t) - return ex - end - take_token!(ps) - if kind(t) == K"~" - # ~ is the only non-syntactic assignment-precedence operator - TODO("Turn ~ into a call node") - else - GreenNode - end -end - #------------------------------------------------------------------------------- function parse(code) diff --git a/JuliaSyntax/src/token_kinds.jl b/JuliaSyntax/src/token_kinds.jl index 731a9320ff51a..a9366e49cc1ad 100644 --- a/JuliaSyntax/src/token_kinds.jl +++ b/JuliaSyntax/src/token_kinds.jl @@ -54,7 +54,35 @@ end kind(k::Kind) = k kind(raw::TzTokens.RawToken) = TzTokens.exactkind(raw) -is_prec_assignment(tok) = K"BEGIN_ASSIGNMENTS" < kind(tok) < K"END_ASSIGNMENTS" +# Predicates for operator precedence +is_prec_assignment(t) = K"BEGIN_ASSIGNMENTS" < kind(t) < K"END_ASSIGNMENTS" +is_prec_pair(t) = K"BEGIN_PAIRARROW" < kind(t) < K"END_PAIRARROW" +is_prec_conditional(t) = K"BEGIN_CONDITIONAL" < kind(t) < K"END_CONDITIONAL" +is_prec_arrow(t) = K"BEGIN_ARROW" < kind(t) < K"END_ARROW" +is_prec_lazy_or(t) = K"BEGIN_LAZYOR" < kind(t) < K"END_LAZYOR" +is_prec_lazy_and(t) = K"BEGIN_LAZYAND" < kind(t) < K"END_LAZYAND" +is_prec_comparison(t) = K"BEGIN_COMPARISON" < kind(t) < K"END_COMPARISON" +is_prec_pipe(t) = K"BEGIN_PIPE" < kind(t) < K"END_PIPE" +is_prec_colon(t) = K"BEGIN_COLON" < kind(t) < K"END_COLON" +is_prec_plus(t) = K"BEGIN_PLUS" < kind(t) < K"END_PLUS" +is_prec_bitshift(t) = K"BEGIN_BITSHIFTS" < kind(t) < K"END_BITSHIFTS" +is_prec_times(t) = K"BEGIN_TIMES" < kind(t) < K"END_TIMES" +is_prec_rational(t) = K"BEGIN_RATIONAL" < kind(t) < K"END_RATIONAL" +is_prec_power(t) = K"BEGIN_POWER" < kind(t) < K"END_POWER" +is_prec_decl(t) = K"BEGIN_DECL" < kind(t) < K"END_DECL" +is_prec_where(t) = K"BEGIN_WHERE" < kind(t) < K"END_WHERE" +is_prec_dot(t) = K"BEGIN_DOT" < kind(t) < K"END_DOT" +is_prec_unicode_ops(t) = K"BEGIN_UNICODE_OPS" < kind(t) < K"END_UNICODE_OPS" + +is_prec_pipe_lt(t) = kind(t) == K"<|" +is_prec_pipe_gt(t) = kind(t) == K"|>" + +# Operators which are boty unary and binary +function is_both_unary_and_binary(t) + # TODO: Do we need to check dotop as well here? + kind(t) in (K"$", K"&", K"~", # <- dotop disallowed? + K"+", K"-", K"⋆", K"±", K"∓") # dotop allowed +end """ Get the "binding power" (precedence level) of an operator kind @@ -81,41 +109,22 @@ function binding_power(k::Kind) end function _kind_str(k::Kind) - if k in (K"Identifier", K"VarIdentifier") - "Identifier" - elseif isliteral(k) - "Literal" - elseif k == K"Comment" - "Comment" - elseif k == K"Whitespace" - "Whitespace" - elseif k == K"NewlineWs" - "NewlineWs" - elseif iskeyword(k) - lowercase(string(k)) - elseif isoperator(k) - string(TzTokens.UNICODE_OPS_REVERSE[k]) - elseif k == K"(" - "(" - elseif k == K"[" - "[" - elseif k == K"{" - "{" - elseif k == K")" - ")" - elseif k == K"]" - "]" - elseif k == K"}" - "}" - elseif k == K"@" - "@" - elseif k == K"," - "," - elseif k == K";" - ";" - else - lowercase(string(k)) - end + u = untokenize(k) + return !isnothing(u) ? u : + k in (K"Identifier", K"VarIdentifier") ? "Identifier" : + isliteral(k) ? "Literal" : + k == K"Comment" ? "Comment" : + k == K"Whitespace" ? "Whitespace" : + k == K"NewlineWs" ? "NewlineWs" : + lowercase(string(k)) +end + +""" +Return the string representation of a token kind, or `nothing` if the kind +represents a class of tokens like K"Identifier". +""" +function untokenize(k::Kind) + get(_kind_to_str, k, nothing) end """ @@ -168,7 +177,6 @@ const var"function" = @_K FUNCTION const var"global" = @_K GLOBAL const var"if" = @_K IF const var"import" = @_K IMPORT -const var"importall" = @_K IMPORTALL const var"let" = @_K LET const var"local" = @_K LOCAL const var"macro" = @_K MACRO @@ -713,8 +721,8 @@ const END_COMPARISON = @_K end_comparison # Level 7 const BEGIN_PIPE = @_K begin_pipe -const var"|>" = @_K LPIPE -const var"<|" = @_K RPIPE +const var"<|" = @_K LPIPE +const var"|>" = @_K RPIPE const END_PIPE = @_K end_pipe # Level 8 @@ -949,18 +957,47 @@ const END_UNICODE_OPS = @_K end_unicode_ops const END_OPS = @_K end_ops -# Cute synonyms +# (Too?) cute synonyms const var" " = @_K WHITESPACE const var"\n" = @_K NEWLINE_WS # Our custom syntax tokens - const BEGIN_SYNTAX_KINDS = @_K begin_syntax_kinds -const toplevel = @_K TOPLEVEL -const call = @_K CALL -const ref = @_K REF -const block = @_K BLOCK +const block = @_K BLOCK +const call = @_K CALL +const comparison = @_K COMPARISON +const curly = @_K CURLY +const string = @_K STRING_INTERP +const toplevel = @_K TOPLEVEL +const tuple = @_K TUPLE +const ref = @_K REF +const vect = @_K VECT +const braces = @_K BRACES +const bracescat = @_K BRACESCAT +const hcat = @_K HCAT +const vcat = @_K VCAT +const ncat = @_K NCAT +const typed_hcat = @_K TYPED_HCAT +const typed_vcat = @_K TYPED_VCAT +const typed_ncat = @_K TYPED_NCAT +const generator = @_K GENERATOR +const flatten = @_K FLATTEN +const comprehension = @_K COMPREHENSION +const typed_comprehension = @_K TYPED_COMPREHENSION const END_SYNTAX_KINDS = @_K end_syntax_kinds -end +end # module Kinds +# Mapping from kinds to their unique string representation, if it exists +const _kind_to_str = + Dict{Kind,String}(k=>string(s) for (k,s) in TzTokens.UNICODE_OPS_REVERSE) +for c in "([{}])@,;" + _kind_to_str[getfield(Kinds, Symbol(c))] = string(c) +end +for kw in split("""abstract baremodule begin break catch const + continue do else elseif end export finally for + function global if import let local + macro module mutable new outer primitive quote + return struct try type using while""") + _kind_to_str[getfield(Kinds, Symbol(kw))] = kw +end From 2f71060c6b86a545bef02b3fce63e77153a8b32b Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Fri, 10 Dec 2021 18:20:59 +1000 Subject: [PATCH 0233/1109] Context-sensitive newline tokens + top level parsing Allow peek(::ParseState) to produce newline tokens (or transparently discard as whitespace) depending on context. This required changing how trivia is handled in ParseStream - for now it's stored in the lookahead buffer along with the other tokens and we just search inside peek(). Add invisible "implied" tokens such as K"core_@doc" to allow more uniform tree structure when an implied token doesn't appear in the source text. Add parse_all driver function Add several top-level productions: * parse_block * parse_stmts * parse_docstring And some operator handling: * parse_eq * parse_with_chains * parse_expr * parse_term --- JuliaSyntax/src/parse_stream.jl | 192 +++++++++++++----- JuliaSyntax/src/parser.jl | 337 ++++++++++++++++++------------- JuliaSyntax/src/token_kinds.jl | 10 + JuliaSyntax/test/parse_stream.jl | 11 +- 4 files changed, 359 insertions(+), 191 deletions(-) diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index ee56e6b104d4b..39cd883f266e1 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -30,6 +30,10 @@ first_byte(tok::SyntaxToken) = tok.raw.startbyte + 1 last_byte(tok::SyntaxToken) = tok.raw.endbyte + 1 span(tok::SyntaxToken) = last_byte(tok) - first_byte(tok) + 1 +is_dotted(tok::SyntaxToken) = tok.raw.dotop +is_suffixed(tok::SyntaxToken) = tok.raw.suffix +is_decorated(tok::SyntaxToken) = is_dotted(tok) || is_suffixed(tok) + Base.:(~)(tok::SyntaxToken, k::Kind) = kind(tok) == k Base.:(~)(k::Kind, tok::SyntaxToken) = kind(tok) == k @@ -79,11 +83,12 @@ This is simililar to rust-analyzer's mutable struct ParseStream lexer::Tokenize.Lexers.Lexer{IOBuffer,RawToken} lookahead::Vector{SyntaxToken} - lookahead_trivia::Vector{TextSpan} spans::Vector{TextSpan} diagnostics::Vector{Diagnostic} # First byte of next token next_byte::Int + # Counter for number of peek()s we've done without making progress via a bump() + peek_count::Int end function ParseStream(code) @@ -91,28 +96,52 @@ function ParseStream(code) ParseStream(lexer, Vector{SyntaxToken}(), Vector{TextSpan}(), - Vector{TextSpan}(), Vector{Diagnostic}(), - 1) + 1, + 0) end function Base.show(io::IO, mime::MIME"text/plain", stream::ParseStream) println(io, "ParseStream at position $(stream.next_byte)") end -function _read_token(stream::ParseStream) +# Buffer up until the next non-whitespace token. +# This can buffer more than strictly necessary when newlines are significant, +# but this is not a big problem. +function _buffer_lookahead_tokens(stream::ParseStream) had_whitespace = false - had_newline = false + had_newline = false while true raw = Tokenize.Lexers.next_token(stream.lexer) k = TzTokens.exactkind(raw) - if k in (K"Whitespace", K"Comment", K"NewlineWs") - had_whitespace = true - had_newline = k == K"NewlineWs" - push!(stream.lookahead_trivia, TextSpan(raw, TRIVIA_FLAG)) - continue + + was_whitespace = k in (K"Whitespace", K"Comment", K"NewlineWs") + was_newline = k == K"NewlineWs" + had_whitespace |= was_whitespace + had_newline |= was_newline + push!(stream.lookahead, SyntaxToken(raw, had_whitespace, had_newline)) + if !was_whitespace + break + end + end +end + +function _lookahead_index(stream::ParseStream, n::Integer, skip_newlines::Bool) + i = 1 + while true + if i > length(stream.lookahead) + _buffer_lookahead_tokens(stream) + end + k = kind(stream.lookahead[i]) + is_skipped = k ∈ (K"Whitespace", K"Comment") || + (k == K"NewlineWs" && skip_newlines) + if !is_skipped + if n == 1 + return i + end + n -= 1 end - return SyntaxToken(raw, had_whitespace, had_newline) + i += 1 end end @@ -121,13 +150,12 @@ end Look ahead in the stream `n` tokens, returning a SyntaxToken """ -function peek_token(stream::ParseStream, n::Integer=1) - if length(stream.lookahead) < n - for i=1:(n-length(stream.lookahead)) - push!(stream.lookahead, _read_token(stream)) - end +function peek_token(stream::ParseStream, n::Integer=1, skip_newlines=false) + stream.peek_count += 1 + if stream.peek_count > 100_000 + error("The parser seems stuck at byte $(position(stream))") end - return stream.lookahead[n] + stream.lookahead[_lookahead_index(stream, n, skip_newlines)] end """ @@ -135,8 +163,8 @@ end Look ahead in the stream `n` tokens, returning a Kind """ -function peek(stream::ParseStream, n::Integer=1) - kind(peek_token(stream, n)) +function peek(stream::ParseStream, n::Integer=1, skip_newlines=false) + kind(peek_token(stream, n, skip_newlines)) end """ @@ -145,32 +173,47 @@ end Shift the current token into the output as a new text span with the given `flags`. """ -function bump(stream::ParseStream, flags=EMPTY_FLAGS) - tok = isempty(stream.lookahead) ? - _read_token(stream) : - popfirst!(stream.lookahead) # TODO: use a circular buffer? - # Bump trivia tokens into output - while !isempty(stream.lookahead_trivia) && - first_byte(first(stream.lookahead_trivia)) <= first_byte(tok) - trivia_span = popfirst!(stream.lookahead_trivia) - push!(stream.spans, trivia_span) +function bump(stream::ParseStream, flags=EMPTY_FLAGS, skip_newlines=false) + n = _lookahead_index(stream, 1, skip_newlines) + for i=1:n + tok = stream.lookahead[i] + k = kind(tok) + if k == K"EndMarker" + break + end + is_skipped_ws = k ∈ (K"Whitespace", K"Comment") || + (k == K"NewlineWs" && skip_newlines) + f = is_skipped_ws ? TRIVIA_FLAG : flags + span = TextSpan(SyntaxHead(kind(tok), f), first_byte(tok), last_byte(tok)) + push!(stream.spans, span) end - span = TextSpan(SyntaxHead(kind(tok), flags), first_byte(tok), last_byte(tok)) - push!(stream.spans, span) - mark = lastindex(stream.spans) - stream.next_byte = last_byte(tok) + 1 - mark + Base._deletebeg!(stream.lookahead, n) + stream.next_byte = last_byte(last(stream.spans)) + 1 + # Defuse the time bomb + stream.peek_count = 0 + # Return last token location in output if needed for set_flags! + return lastindex(stream.spans) +end + +function bump_invisible(stream::ParseStream, kind) + emit(stream, position(stream), kind) + return lastindex(stream.spans) end """ -Hack: Reset flags of an existing token in the output stream +Hack: Reset kind or flags of an existing token in the output stream This is necessary on some occasions when we don't know whether a token will -have TRIVIA_FLAG set until. +have TRIVIA_FLAG set until after consuming more input, or when we need to +insert a invisible token like core_@doc but aren't yet sure it'll be needed - +see bump_invisible() """ -function set_flags!(stream::ParseStream, mark, flags) +function reset_token!(stream::ParseStream, mark; + kind=nothing, flags=nothing) text_span = stream.spans[mark] - stream.spans[mark] = TextSpan(SyntaxHead(kind(text_span), flags), + k = isnothing(kind) ? (@__MODULE__).kind(text_span) : kind + f = isnothing(flags) ? (@__MODULE__).flags(text_span) : flags + stream.spans[mark] = TextSpan(SyntaxHead(k, f), first_byte(text_span), last_byte(text_span)) end @@ -184,10 +227,12 @@ function accept(stream::ParseStream, k::Kind) end =# +#= function bump(stream::ParseStream, k::Kind, flags=EMPTY_FLAGS) @assert peek(stream) == k bump(stream, flags) end +=# function Base.position(stream::ParseStream) return stream.next_byte @@ -242,11 +287,14 @@ end function to_raw_tree(st) stack = Vector{@NamedTuple{text_span::TextSpan,node::GreenNode}}() - _push_node!(stack, st.spans[1]) - for i = 2:length(st.spans) - text_span = st.spans[i] + for text_span in st.spans + if kind(text_span) == K"TOMBSTONE" + # Ignore invisible tokens which were created but never finalized. + # See bump_invisible() + continue + end - if first_byte(text_span) > last_byte(stack[end].text_span) + if isempty(stack) || first_byte(text_span) > last_byte(stack[end].text_span) # A leaf node (span covering a single token): # [a][b][stack[end]] # [text_span] @@ -258,13 +306,14 @@ function to_raw_tree(st) # [a][b][stack[end]] # [ text_span] j = length(stack) - while j > 1 && first_byte(text_span) < first_byte(stack[j].text_span) + while j > 1 && first_byte(text_span) <= first_byte(stack[j-1].text_span) j -= 1 end children = [stack[k].node for k = j:length(stack)] resize!(stack, j-1) _push_node!(stack, text_span, children) end + # show(stdout, MIME"text/plain"(), stack[1].node) return only(stack).node end @@ -317,10 +366,55 @@ function ParseState(ps::ParseState; range_colon_enabled=nothing, where_enabled === nothing ? ps.where_enabled : where_enabled) end -peek(ps::ParseState, args...) = peek(ps.stream, args...) -peek_token(ps::ParseState, args...) = peek_token(ps.stream, args...) -bump(ps::ParseState, args...) = bump(ps.stream, args...) -set_flags!(ps::ParseState, args...) = set_flags!(ps.stream, args...) -Base.position(ps::ParseState, args...) = position(ps.stream, args...) -emit(ps::ParseState, args...; kws...) = emit(ps.stream, args...; kws...) -emit_diagnostic(ps::ParseState, args...; kws...) = emit_diagnostic(ps.stream, args...; kws...) +function peek(ps::ParseState, n=1; skip_newlines=nothing) + skip_nl = isnothing(skip_newlines) ? ps.whitespace_newline : skip_newlines + peek(ps.stream, n, skip_nl) +end + +peek_token(ps::ParseState, n=1) = peek_token(ps.stream, n, ps.whitespace_newline) + +function bump(ps::ParseState, flags=EMPTY_FLAGS; skip_newlines=nothing) + skip_nl = isnothing(skip_newlines) ? ps.whitespace_newline : skip_newlines + bump(ps.stream, flags, skip_nl) +end + +function bump_newlines(ps::ParseState) + while peek(ps) == K"NewlineWs" + bump(ps, TRIVIA_FLAG) + end +end + +""" +Bump a new zero-width "invisible" token at the current stream position. These +can be useful in several situations, for example, + +* Implicit multiplication - the * is invisible + `2x ==> (call 2 * x)` +* Docstrings - the macro name is invisible + `"doc" foo() = 1 ==> (macrocall (core @doc) . (= (call foo) 1))` +* Big integer literals - again, an invisible macro name + `11111111111111111111 ==> (macrocall (core @int128_str) . 11111111111111111111)` + +By default if no `kind` is provided then the invisible token stays invisible +and will be discarded unless `reset_token!(kind=...)` is used. +""" +function bump_invisible(ps::ParseState, kind=K"TOMBSTONE") + bump_invisible(ps.stream, kind) +end + +function reset_token!(ps::ParseState, args...; kws...) + reset_token!(ps.stream, args...; kws...) +end + +function Base.position(ps::ParseState, args...) + position(ps.stream, args...) +end + +function emit(ps::ParseState, args...; kws...) + emit(ps.stream, args...; kws...) +end + +function emit_diagnostic(ps::ParseState, args...; kws...) + emit_diagnostic(ps.stream, args...; kws...) +end + diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index c4b21d8d270bb..34b652f55b3c0 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -6,13 +6,14 @@ function TODO(str) error("TODO: $str") end -# Placeholder - bump an identifier in place of a production we haven't -# implemented yet. +# Placeholder - bump an identifier or literal in place of a production we +# haven't implemented yet. function bumpTODO(ps::ParseState) - if peek(ps) != K"Identifier" - error("bump ident - TODO") + if peek(ps) == K"Identifier" || isliteral(peek(ps)) + bump(ps) + else + error("bumpTODO - got unexpected $(peek(ps))") end - bump(ps) end function is_closing_token(ps::ParseState, tok) @@ -71,50 +72,109 @@ function parse_RtoL(ps::ParseState, down, is_op, syntactic, self) end end -# flisp: (define (line-number-node s) -function line_number_node(s) - TODO("line_number_node unimplemented") -end - -# parse a@b@c@... as (@ a b c ...) for some operator @ -# ops: operators to look for -# head: the expression head to yield in the result, e.g. "a;b" => (block a b) -# closer?: predicate to identify tokens that stop parsing -# however, this doesn't consume the closing token, just looks at it -# ow, my eyes!! +# parse block-like structures +# +# `delimiters` are a set of token kinds acting as delimiters; `closing_tokens` +# stop the parsing. +# +# Returns true if the block was nontrivial and a node needs to be emitted by +# the caller. # # flisp: (define (parse-Nary s down ops head closer? add-linenums) -function parse_Nary(ps::ParseState, down, ops, head, is_closer, add_linenums) - TODO("parse_Nary unimplemented") +function parse_Nary(ps::ParseState, down, delimiters, closing_tokens) + bump_newlines(ps) + k = peek(ps) + if k in closing_tokens + return true + end + # Skip leading operator + n_delims = 0 + if k in delimiters + bump(ps, TRIVIA_FLAG) + n_delims += 1 + else + down(ps) + end + while peek(ps) in delimiters + bump(ps, TRIVIA_FLAG) + n_delims += 1 + k = peek(ps) + if k == K"EndMarker" || k in closing_tokens + break + elseif k in delimiters + # ignore empty delimited sections + # a;;;b ==> (block a b) + continue + end + down(ps) + end + return n_delims != 0 end # the principal non-terminals follow, in increasing precedence order +# Parse a newline or semicolon-delimited list of expressions. +# Repeated delimiters are allowed but ignored +# (a;b;c) ==> (block a b c) +# (a;;;b;;) ==> (block a b) +# === +# begin +# a +# b +# end +# ==> (block a b) +# # flisp: (define (parse-block s (down parse-eq)) -function parse_block(ps::ParseState; down, parse_eq) - TODO("parse_block unimplemented") +function parse_block(ps::ParseState, down=parse_eq) + mark = position(ps) + if parse_Nary(ps, down, (K"NewlineWs", K";"), + (K"end", K"else", K"elseif", K"catch", K"finally")) + emit(ps, mark, K"block") + end end # ";" at the top level produces a sequence of top level expressions # +# a;b;c ==> (toplevel a b c) +# a;;;b;; ==> (toplevel a b) +# # flisp: (define (parse-stmts s) function parse_stmts(ps::ParseState) - TODO("parse_stmts unimplemented") + mark = position(ps) + do_emit = parse_Nary(ps, parse_docstring, (K";",), (K"NewlineWs",)) + # check for unparsed junk after an expression + junk_mark = position(ps) + while peek(ps) ∉ (K"EndMarker", K"NewlineWs") + # Error recovery + bump(ps) + end + if junk_mark != position(ps) + emit(ps, junk_mark, K"Error", + error="Extra tokens after end of expression") + end + if do_emit + emit(ps, mark, K"toplevel") + end end # flisp: (define (parse-eq s) (parse-assignment s parse-comma)) function parse_eq(ps::ParseState) - TODO("parse_eq unimplemented") + parse_assignment(ps, parse_comma) end -# symbol tokens that do not simply parse to themselves when appearing alone as -# an element of an argument list - -# parse-eq* is used where commas are special, for example in an argument list +# parse_eq_ is used where commas are special, for example in an argument list # # flisp: (define (parse-eq* s) -function parse_eq_star(ps::ParseState) - TODO("parse_eq_star unimplemented") +function parse_eq_(ps::ParseState) + k = peek(ps) + k2 = peek(ps,2) + if (isliteral(k) || k == K"Identifier") && k2 in (K",", K")", K"}", K"]") + # optimization: skip checking the whole precedence stack if we have a + # simple token followed by a common closing token + bump(ps) + else + parse_assignment(ps, parse_pair) + end end # flisp: (define (eventually-call? ex) @@ -122,11 +182,6 @@ function is_eventually_call(ex) TODO("is_eventually_call unimplemented") end -# flisp: (define (add-line-number blk linenode) -function add_line_number(blk, linenode) - TODO("add_line_number unimplemented") -end - # flisp: (define (short-form-function-loc ex lno) function short_form_function_loc(ex, lno) TODO("short_form_function_loc unimplemented") @@ -177,7 +232,7 @@ function parse_comma(ps::ParseState) return end first = false - bump(ps, K",", TRIVIA_FLAG) + bump(ps, TRIVIA_FLAG) n_commas += 1 if peek(ps) == K"=" # Test: @@ -195,8 +250,6 @@ end # flisp: (define (parse-cond s) function parse_cond(ps::ParseState) - bumpTODO(ps) - #= mark = position(ps) parse_arrow(ps) t = peek_token(ps) @@ -210,9 +263,6 @@ function parse_cond(ps::ParseState) end bump(ps, TRIVIA_FLAG) f - - Tricky whitespace-newline! - =# end # Parse arrows @@ -259,7 +309,7 @@ function parse_comparison(ps::ParseState) # Type comparisons are syntactic and have their kind encoded in the head # x <: y ==> (<: x y) # x >: y ==> (>: x y) - set_flags!(ps, op_pos, TRIVIA_FLAG) + reset_token!(ps, op_pos, flags=TRIVIA_FLAG) emit(ps, mark, initial_kind) else emit(ps, mark, K"call", INFIX_FLAG) @@ -328,9 +378,7 @@ function parse_range(ps::ParseState) # === # 1: # 2 - # ==> - # (call-i-e 1 :) - # === + # ==> (call-i-e 1 :) emit(ps, mark, K"call", INFIX_FLAG|ERROR_FLAG) emit_diagnostic(ps, error="line break after `:` in range expression") return @@ -359,13 +407,10 @@ function parse_range(ps::ParseState) end end -# parse left to right chains of a certain binary operator +# parse left to right chains of a given binary operator # # flisp: (define (parse-chain s down op) function parse_chain(ps::ParseState, down, op_kind) - mark = position(ps) - down(ps) - first = true while (t = peek_token(ps); kind(t) == op_kind) if ps.space_sensitive && t.had_whitespace && is_both_unary_and_binary(kind(t)) && @@ -373,75 +418,67 @@ function parse_chain(ps::ParseState, down, op_kind) # [x +y] ==> (hcat x (call + y)) break end - bump(ps, first ? EMPTY_FLAGS : TRIVIA_FLAG) - first = false + bump(ps, TRIVIA_FLAG) down(ps) end - emit(ps, mark, K"call", INFIX_FLAG) end -# parse left to right, combining chains of a certain operator into 1 call -# a + b + c ==> (call-i a + b c) +# Parse left to right, combining any of `chain_ops` into one call +# a - b - c ==> (call-i (call-i a - b) - c) # -# flisp: (define (parse-with-chains s down ops chain-ops) +# flisp: parse-with-chains function parse_with_chains(ps::ParseState, down, is_op, chain_ops) - TODO("parse_with_chains") - #= mark = position(ps) down(ps) - chain_op = K"Nothing" while (t = peek_token(ps); is_op(kind(t))) if ps.space_sensitive && t.had_whitespace && is_both_unary_and_binary(kind(t)) && !peek_token(ps, 2).had_whitespace - # [x+y +z] ==> (hcat (call-i x y) (call + z)) + # The following is two elements of a hcat + # [x+y +z] ==> (hcat (call-i x + y) (call + z)) + # Conversely + # [x+y+z] ==> (hcat (call-i x + y z)) + # [x+y + z] ==> (hcat (call-i x + y z)) break end - op_is_trivia = false - if chain_op != K"Nothing" - if kind(t) != chain_op - # Finish the chain - emit(ps, mark, K"call", INFIX_FLAG) - chain_op = K"Nothing" - else - op_is_trivia = true - end - end - if chain_op == K"Nothing" && kind(t) in chain_ops - chain_op = kind(t) - bump(ps, op_is_trivia ? TRIVIA_FLAG : EMPTY_FLAGS) + bump(ps) down(ps) - if !(kind(t) in chain_ops) + if kind(t) in chain_ops && !is_decorated(t) + # a + b + c ==> (call-i a + b c) + # a +₁ b +₁ c ==> (call-i (call-i a +₁ b) +₁ c) + # a .+ b .+ c ==> (call-i (call-i a .+ b) .+ c) + parse_chain(ps, down, kind(t)) end + emit(ps, mark, K"call", INFIX_FLAG) end - =# end -# flisp: (define (parse-expr s) (parse-with-chains s parse-term is-prec-plus? '(+ ++))) +# flisp: parse-expr function parse_expr(ps::ParseState) - bumpTODO(ps) + parse_with_chains(ps, parse_term, is_prec_plus, (K"+", K"++")) end -# flisp: (define (parse-term s) (parse-with-chains s parse-rational is-prec-times? '(*))) +# flisp: parse-term function parse_term(ps::ParseState) - TODO("parse_term unimplemented") + parse_with_chains(ps, parse_rational, is_prec_times, (K"*",)) end -# flisp: (define (parse-rational s) (parse-LtoR s parse-shift is-prec-rational?)) +# flisp: parse-rational function parse_rational(ps::ParseState) parse_LtoR(ps, parse_shift, is_prec_rational) end -# flisp: (define (parse-shift s) (parse-LtoR s parse-unary-subtype is-prec-bitshift?)) +# flisp: parse-shift function parse_shift(ps::ParseState) parse_LtoR(ps, parse_unary_subtype, is_prec_bitshift) end # parse `<: A where B` as `<: (A where B)` (issue #21545) # -# flisp: (define (parse-unary-subtype s) +# flisp: parse-unary-subtype function parse_unary_subtype(ps::ParseState) - TODO("parse_unary_subtype unimplemented") + bumpTODO(ps) + #TODO("parse_unary_subtype unimplemented") end # flisp: (define (parse-where-chain s first) @@ -738,9 +775,20 @@ function expect_space_before(s, t) TODO("expect_space_before unimplemented") end +# Parse syntax inside of `[]` or `{}` +# # flisp: (define (parse-cat s closer last-end-symbol) function parse_cat(ps::ParseState, closer, last_end_symbol) TODO("parse_cat unimplemented") + ps = ParseState(ps0, range_colon_enabled=true, + space_sensitive=true, + where_enabled=true, + whitespace_newline=false, + for_generator=true) + if require_token(ps) == closer + take_token!(ps) + return + end end # flisp: (define (kw-to-= e) (if (kwarg? e) (cons '= (cdr e)) e)) @@ -971,19 +1019,78 @@ function is_doc_string_literal(s, e) TODO("is_doc_string_literal unimplemented") end +# Parse docstrings attached by a space or single newline # flisp: (define (parse-docstring s production) -function parse_docstring(ps::ParseState, production) - TODO("parse_docstring unimplemented") +function parse_docstring(ps::ParseState, down=parse_eq) + mark = position(ps) + # TODO? This is not quite equivalent to the flisp parser which accepts + # more than just a string. For example: + #! ("doc") foo ==> (macrocall core_@doc "doc" foo) + maybe_doc = peek(ps) in (K"String", K"TripleString") + atdoc_mark = bump_invisible(ps) + down(ps) + if maybe_doc + is_doc = true + k = peek(ps) + if is_closing_token(ps, k) + is_doc = false + elseif k == K"NewlineWs" + k2 = peek(ps, 2) + if is_closing_token(ps, k2) || k2 == K"NewlineWs" + is_doc = false + else + # Allow a single newline + # === + # "doc" + # foo + # ===> (macrocall core_@doc "doc" foo) + bump(ps, TRIVIA_FLAG) # NewlineWs + end + end + if is_doc + reset_token!(ps, atdoc_mark, kind=K"core_@doc") + down(ps) + emit(ps, mark, K"macrocall") + end + end end -# --- main entry point --- -# can optionally specify which grammar production to parse. -# default is parse-stmts. -# -# flisp: (define (julia-parse s . production) -function julia_parse(s, _, production) - TODO("julia_parse unimplemented") +""" + parse_all(input) + +Parse a sequence of top level statements. + +`input` may be a `ParseStream` or other input source which will be passed to +the `ParseStream` constructor. The `ParseStream` is returned. + +flisp: parse-all +""" +function parse_all(stream::ParseStream) + ps = ParseState(stream) + mark = position(ps) + while true + if peek(ps, skip_newlines=true) == K"EndMarker" + # As a special case, allow early end of input if there is + # nothing left but whitespace + # === + # # a + # + # #= b =# # c + # ==> (toplevel) + bump(ps, skip_newlines=true) + break + else + parse_stmts(ps) + end + end + emit(ps, mark, K"toplevel") + return ps.stream +end + +function parse_all(code, args...) + stream = ParseStream(code) + return parse_all(ParseState(stream), args...) end #------------------------------------------------------------------------------- @@ -1046,59 +1153,7 @@ function parse_atom(ps::ParseState; checked::Bool=true)::GreenNode end end -# parse `a@b@c@...` for some @ -# -# `is_separator` - predicate -# `head` the expression head to yield in the result, e.g. "a;b" => (block a b) -# `is_closer` - predicate to identify tokens that stop parsing -# however, this doesn't consume the closing token, just looks at it -function parse_Nary(ps::ParseState, down::Function, is_separator::Function, - result_kind, is_closer::Function) -end - -# flisp: parse-docstring -# Parse statement with possible docstring -function parse_statement_with_doc(ps::ParseState) - parse_eq(ps) - # TODO: Detect docstrings -end - -# flisp: parse-cat -# Parse syntax inside of `[]` or `{}` -function parse_cat(ps0::ParseState, opening_tok, closer, last_end_symbol::Bool) - ps = ParseState(ps0, range_colon_enabled=true, - space_sensitive=true, - where_enabled=true, - whitespace_newline=false, - for_generator=true) - if require_token(ps) == closer - take_token!(ps) - return - end -end - - -# flisp: parse-stmts -# `;` at the top level produces a sequence of top level expressions -function parse_statements(ps::ParseState) - parse_Nary(ps, parse_statement) -end - -# flisp: parse-eq -function parse_eq(ps::ParseState) - parse_assignment(ps, parse_comma) -end - -# flisp: parse-eq* -# parse_eq_2 is used where commas are special, for example in an argument list -# function parse_eq_2 - #------------------------------------------------------------------------------- -function parse(code) - stream = ParseStream(code) - parse_statements(stream) -end - =# diff --git a/JuliaSyntax/src/token_kinds.jl b/JuliaSyntax/src/token_kinds.jl index a9366e49cc1ad..a1499e9bd6bbe 100644 --- a/JuliaSyntax/src/token_kinds.jl +++ b/JuliaSyntax/src/token_kinds.jl @@ -961,6 +961,15 @@ const END_OPS = @_K end_ops const var" " = @_K WHITESPACE const var"\n" = @_K NEWLINE_WS +const BEGIN_INVISIBLE_TOKENS = @_K begin_invisible_tokens +const TOMBSTONE = @_K TOMBSTONE +const var"core_@doc" = @_K CORE_AT_DOC +const var"core_@cmd" = @_K CORE_AT_CMD +const var"core_@int128_str" = @_K CORE_AT_INT128_STR +const var"core_@uint128_str" = @_K CORE_AT_UINT128_STR +const var"core_@big_str" = @_K CORE_AT_BIG_STR +const END_INVISIBLE_TOKENS = @_K end_invisible_tokens + # Our custom syntax tokens const BEGIN_SYNTAX_KINDS = @_K begin_syntax_kinds const block = @_K BLOCK @@ -968,6 +977,7 @@ const call = @_K CALL const comparison = @_K COMPARISON const curly = @_K CURLY const string = @_K STRING_INTERP +const macrocall = @_K MACROCALL const toplevel = @_K TOPLEVEL const tuple = @_K TUPLE const ref = @_K REF diff --git a/JuliaSyntax/test/parse_stream.jl b/JuliaSyntax/test/parse_stream.jl index 6fb1b00eb98a7..30b46d5aafd14 100644 --- a/JuliaSyntax/test/parse_stream.jl +++ b/JuliaSyntax/test/parse_stream.jl @@ -33,6 +33,8 @@ st = ParseStream(code) bump(st) # 10 emit(st, p3, K"call", INFIX_FLAG) emit(st, p2, K"=") + @test peek(st) == K"NewlineWs" + bump(st, TRIVIA_FLAG) p4 = position(st) p5 = position(st) # [call] p6 = position(st) # [ref] @@ -50,12 +52,19 @@ st = ParseStream(code) @test peek(st) == K"Integer" # 2 bump(st) emit(st, p5, K"call", INFIX_FLAG) + @test peek(st) == K"NewlineWs" + bump(st, TRIVIA_FLAG) + @test peek(st) == K"NewlineWs" + bump(st, TRIVIA_FLAG) @test peek(st) == K"Identifier" # 'yy' bump(st) emit(st, p4, K"block") + @test peek(st) == K"NewlineWs" + bump(st, TRIVIA_FLAG) bump(st, TRIVIA_FLAG) # end emit(st, p1, K"for") - bump(st, TRIVIA_FLAG) # \n + @test peek(st) == K"NewlineWs" + bump(st, TRIVIA_FLAG) emit(st, p1, K"toplevel") end From 1002aa519785db8d12b5abb0c42a392edc6ee879 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Fri, 10 Dec 2021 19:00:32 +1000 Subject: [PATCH 0234/1109] Implement parse_cond --- JuliaSyntax/src/parse_stream.jl | 5 ++++- JuliaSyntax/src/parser.jl | 40 ++++++++++++++++++++++++++++----- 2 files changed, 38 insertions(+), 7 deletions(-) diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index 39cd883f266e1..7f700c84c19d8 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -371,7 +371,10 @@ function peek(ps::ParseState, n=1; skip_newlines=nothing) peek(ps.stream, n, skip_nl) end -peek_token(ps::ParseState, n=1) = peek_token(ps.stream, n, ps.whitespace_newline) +function peek_token(ps::ParseState, n=1; skip_newlines=nothing) + skip_nl = isnothing(skip_newlines) ? ps.whitespace_newline : skip_newlines + peek_token(ps.stream, n, skip_nl) +end function bump(ps::ParseState, flags=EMPTY_FLAGS; skip_newlines=nothing) skip_nl = isnothing(skip_newlines) ? ps.whitespace_newline : skip_newlines diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 34b652f55b3c0..d66baef5b42f0 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -162,10 +162,10 @@ function parse_eq(ps::ParseState) parse_assignment(ps, parse_comma) end -# parse_eq_ is used where commas are special, for example in an argument list +# parse_eq_star is used where commas are special, for example in an argument list # # flisp: (define (parse-eq* s) -function parse_eq_(ps::ParseState) +function parse_eq_star(ps::ParseState) k = peek(ps) k2 = peek(ps,2) if (isliteral(k) || k == K"Identifier") && k2 in (K",", K")", K"}", K"]") @@ -256,13 +256,41 @@ function parse_cond(ps::ParseState) if kind(t) != K"?" return end - flags = EMPTY_FLAGS + cond_flags = EMPTY_FLAGS if !t.had_whitespace + # a? b : c emit_diagnostic(ps, error="space required before `?` operator") - flags |= ERROR_FLAG + cond_flags |= ERROR_FLAG end - bump(ps, TRIVIA_FLAG) - f + bump(ps, TRIVIA_FLAG) # ? + t = peek_token(ps, skip_newlines=true) + if !t.had_whitespace + # a ?b : c + emit_diagnostic(ps, error="space required after `?` operator") + cond_flags |= ERROR_FLAG + end + parse_eq_star(ParseState(ps, range_colon_enabled=false)) + t = peek_token(ps) + if kind(t) != K":" + # a ? b: ==> (if-e a b) + emit(ps, mark, K"if", cond_flags, + error="colon expected in `?` expression") + return + end + if !t.had_whitespace + # a ? b: c + emit_diagnostic(ps, error="space required before `:` in `?` expression") + cond_flags |= ERROR_FLAG + end + bump(ps, TRIVIA_FLAG) # : + t = peek_token(ps, skip_newlines=true) + if !t.had_whitespace + # a ? b :c + emit_diagnostic(ps, error="space required after `:` in `?` expression") + cond_flags |= ERROR_FLAG + end + parse_eq_star(ps) + emit(ps, mark, K"if", cond_flags) end # Parse arrows From 5138de02336a8a8dae4c199c2210656066ca9178 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Fri, 10 Dec 2021 22:00:35 +1000 Subject: [PATCH 0235/1109] More parsing * parse_decl_with_initial_ex * parse_unary_prefix * parse_def * parse_call Parts of * parse_call_chain * parse_where * parse_factor --- JuliaSyntax/src/parse_stream.jl | 5 +- JuliaSyntax/src/parser.jl | 228 ++++++++++++++++++++++++++------ JuliaSyntax/src/token_kinds.jl | 4 + 3 files changed, 192 insertions(+), 45 deletions(-) diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index 7f700c84c19d8..d5ebd54939d9b 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -262,10 +262,11 @@ end """ Emit a diagnostic at the position of the next token """ -function emit_diagnostic(stream::ParseStream; error) +function emit_diagnostic(stream::ParseStream, mark=nothing; error) byte = first_byte(peek_token(stream)) + mark = isnothing(mark) ? byte : mark # It's a bit weird to require supplying a SyntaxHead here... - text_span = TextSpan(SyntaxHead(K"Error", EMPTY_FLAGS), byte, byte) + text_span = TextSpan(SyntaxHead(K"Error", EMPTY_FLAGS), mark, byte) push!(stream.diagnostics, Diagnostic(text_span, error)) end diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index d66baef5b42f0..d421c5e58d3ee 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -16,13 +16,28 @@ function bumpTODO(ps::ParseState) end end -function is_closing_token(ps::ParseState, tok) - k = kind(tok) +function is_closing_token(ps::ParseState, t) + k = kind(t) return k in (K"else", K"elseif", K"catch", K"finally", K",", K")", K"]", K"}", K";", K"EndMarker") || (k == K"end" && !ps.end_symbol) end +function is_initial_reserved_word(ps::ParseState, t) + k = kind(t) + is_iresword = k in ( + K"begin", K"while", K"if", K"for", K"try", K"return", K"break", + K"continue", K"function", K"macro", K"quote", K"let", K"local", + K"global", K"const", K"do", K"struct", K"module", K"baremodule", + K"using", K"import", K"export") + # `begin` means firstindex(a) inside a[...] + return is_iresword && !(k == K"begin" && ps.end_symbol) +end + +function is_syntactic_unary_op(t) + kind(t) in (K"$", K"&", K"::") +end + function has_whitespace_prefix(tok::SyntaxToken) tok.had_whitespace end @@ -60,7 +75,7 @@ function parse_RtoL(ps::ParseState, down, is_op, syntactic, self) down(ps) k = peek(ps) if is_op(k) - if (syntactic isa Bool && syntactic) || syntactic(k) + if syntactic isa Bool ? syntactic : syntactic(k) bump(ps, TRIVIA_FLAG) self(ps) emit(ps, mark, k) @@ -182,11 +197,6 @@ function is_eventually_call(ex) TODO("is_eventually_call unimplemented") end -# flisp: (define (short-form-function-loc ex lno) -function short_form_function_loc(ex, lno) - TODO("short_form_function_loc unimplemented") -end - # flisp: (define (parse-assignment s down) function parse_assignment(ps::ParseState, down) mark = position(ps) @@ -298,19 +308,19 @@ end # x <--> y ==> (call-i x <--> y) # x --> y ==> (x --> y) # The only syntactic arrow # -# flisp: (define (parse-arrow s) (parse-RtoL s parse-or is-prec-arrow? (eq? t '-->) parse-arrow)) +# flisp: parse-arrow function parse_arrow(ps::ParseState) parse_RtoL(ps, parse_or, is_prec_arrow, ==(K"-->"), parse_arrow) end # x || y || z ==> (call-i x || (call-i y || z)) # -# flisp: (define (parse-or s) (parse-RtoL s parse-and is-prec-lazy-or? #t parse-or)) +# flisp: parse-or function parse_or(ps::ParseState) parse_RtoL(ps, parse_and, is_prec_lazy_or, true, parse_or) end -# flisp: (define (parse-and s) (parse-RtoL s parse-comparison is-prec-lazy-and? #t parse-and)) +# flisp: parse-and function parse_and(ps::ParseState) parse_RtoL(ps, parse_comparison, is_prec_lazy_and, true, parse_and) end @@ -320,7 +330,7 @@ end # x < y < z ==> (comparison x < y < z) # x == y < z ==> (comparison x == y < z) # -# flisp: (define (parse-comparison s) +# flisp: parse-comparison function parse_comparison(ps::ParseState) mark = position(ps) parse_pipe_lt(ps) @@ -348,13 +358,13 @@ function parse_comparison(ps::ParseState) end # x |> y |> z ==> ((x |> y) |> z) -# flisp: (define (parse-pipe< s) (parse-RtoL s parse-pipe> is-prec-pipe (x <| (y <| z)) -# flisp: (define (parse-pipe> s) (parse-LtoR s parse-range is-prec-pipe>?)) +# flisp: parse-pipe> function parse_pipe_gt(ps::ParseState) parse_LtoR(ps, parse_range, is_prec_pipe_gt) end @@ -366,7 +376,7 @@ end # Chaining gives # a:b:c:d:e ==> (call-i (call-i a : b c) : d e) # -# flisp: (define (parse-range s) +# flisp: parse-range function parse_range(ps::ParseState) mark = position(ps) parse_expr(ps) @@ -505,18 +515,37 @@ end # # flisp: parse-unary-subtype function parse_unary_subtype(ps::ParseState) - bumpTODO(ps) + parse_where(ps, parse_juxtapose) #TODO("parse_unary_subtype unimplemented") end -# flisp: (define (parse-where-chain s first) -function parse_where_chain(ps::ParseState, first) - TODO("parse_where_chain unimplemented") +# flisp: parse-where-chain +function parse_where_chain(ps0::ParseState, mark) + ps = ParseState(ps0, where_enabled=false) + while peek(ps) == K"where" + bump(ps, TRIVIA_FLAG) # where + k = peek(ps) + if k == K"{" + # x where {T,S} ==> (where x T S) + TODO("bracescat, braces etc allowed here??") + parse_cat(ps, K"}", ps.end_symbol) + emit(ps, mark, K"where") + else + parse_comparison(ps) + emit(ps, mark, K"where") + end + end end # flisp: (define (parse-where s down) function parse_where(ps::ParseState, down) - TODO("parse_where unimplemented") + # `where` needs to be below unary for the following to work + # +(x::T,y::T) where {T} = x + mark = position(ps) + down(ps) + if ps.where_enabled && peek(ps) == K"where" + parse_where_chain(ps, mark) + end end # given an expression and the next token, is there a juxtaposition @@ -529,7 +558,8 @@ end # flisp: (define (parse-juxtapose s) function parse_juxtapose(ps::ParseState) - TODO("parse_juxtapose unimplemented") + parse_unary(ps) + #TODO("parse_juxtapose unimplemented") end # flisp: (define (maybe-negate op num) @@ -558,52 +588,138 @@ end # -2^3 is parsed as -(2^3), so call parse-decl for the first argument, # and parse-unary from then on (to handle 2^-3) # -# flisp: (define (parse-factor s) +# flisp: parse-factor function parse_factor(ps::ParseState) TODO("parse_factor unimplemented") + mark = position(ps) + parse_unary_prefix(ps) + parse_factor_with_initial_ex(ps, mark) end -# flisp: (define (parse-factor-with-initial-ex s ex0 (tok #f)) -function parse_factor_with_initial_ex(ps::ParseState, ex0; tok=false) +# flisp: parse-factor-with-initial-ex +function parse_factor_with_initial_ex(ps::ParseState, mark) TODO("parse_factor_with_initial_ex unimplemented") + parse_call_with_initial_ex(ps, mark) + parse_decl_with_initial_ex(ps, mark) + if is_prec_power(peek(ps)) + bump(ps) + parse_factor_after(ps) + emit(ps, mark, K"call", INFIX_FLAG) + end end -# flisp: (define (parse-factor-after s) (parse-RtoL s parse-juxtapose is-prec-power? #f parse-factor-after)) +# flisp: parse-factor-after function parse_factor_after(ps::ParseState) parse_RtoL(ps, parse_juxtapose, is_prec_power, false, parse_factor_after) end -# flisp: (define (parse-decl s) +# Parse type declarations and lambda syntax +# a->b ==> (-> a b) +# a::b ==> (:: a b) +# +# flisp: parse-decl function parse_decl(ps::ParseState) - TODO("parse_decl unimplemented") + mark = position(ps) + parse_call(ps) + parse_decl_with_initial_ex(ps, mark) end -# flisp: (define (parse-decl-with-initial-ex s ex) -function parse_decl_with_initial_ex(ps::ParseState, ex) - TODO("parse_decl_with_initial_ex unimplemented") +# flisp: parse-decl-with-initial-ex +function parse_decl_with_initial_ex(ps::ParseState, mark) + while peek(ps) == K"::" + # a::b::c ==> (:: (:: a b) c) + bump(ps, TRIVIA_FLAG) + parse_where(ps, parse_call) + emit(ps, mark, K"::") + end + if peek(ps) == K"->" + # a::b->c ==> (-> (:: a b) c) + bump(ps, TRIVIA_FLAG) + # -> is unusual: it binds tightly on the left and + # loosely on the right. + parse_eq_star(ps) + emit(ps, mark, K"->") + end end # parse function call, indexing, dot, and transpose expressions # also handles looking for syntactic reserved words # -# flisp: (define (parse-call s) +# flisp: parse-call function parse_call(ps::ParseState) - TODO("parse_call unimplemented") + mark = position(ps) + parse_unary_prefix(ps) + parse_call_with_initial_ex(ps, mark) end # flisp: (define (parse-call-with-initial-ex s ex tok) function parse_call_with_initial_ex(ps::ParseState, ex, tok) - TODO("parse_call_with_initial_ex unimplemented") + k = peek(ps) + if is_initial_reserved_word(ps, k) || k in (K"mutable", K"primitive", K"abstract") + parse_resword(ps, mark) + else + parse_call_chain(ps, mark, false) + end end -# flisp: (define (parse-unary-prefix s) +# parse syntactic unary operators +# +# &a ==> (& a) +# ::a ==> (:: a) +# $a ==> ($ a) +# +# flisp: parse-unary-prefix function parse_unary_prefix(ps::ParseState) - TODO("parse_unary_prefix unimplemented") + mark = position(ps) + k = peek(ps) + if is_syntactic_unary_op(k) + k2 = peek(ps, 2) + if k in (K"&", K"$") && (is_closing_token(ps, k2) || k2 == K"NewlineWs") + # (&) ==> (&) + # === + # x = $ + # ==> (= x &) + bump(ps) + else + bump(ps, TRIVIA_FLAG) + if k in (K"&", K"::") + parse_where(ps, parse_call) + else + # $$$a ==> ($ ($ ($ a))) + parse_unary_prefix(ps) + end + emit(ps, mark, k) + end + else + parse_atom(ps) + end end -# flisp: (define (parse-def s is-func anon) +# Parse function and macro signatures +# +# flisp: parse-def function parse_def(ps::ParseState, is_func, anon) - TODO("parse_def unimplemented") + mark = position(ps) + flags = EMPTY_FLAGS + k = peek(ps) + parse_unary_prefix(ps) + if (is_func && iskeyword(k)) || is_initial_reserved_word(ps, k) + # Forbid things like + # function begin() end ==> (function-e begin (call)) + emit_diagnostic(ps, mark, + error="invalid $(is_func ? "function" : "macro") name") + # FIXME: Which node does this error go with? + flags |= ERROR_FLAGS + end + parse_call_chain(ps, mark, false) + if is_func && peek(ps) == K"::" + bump(ps, TRIVIA_FLAG) + parse_call(ps) + emit(ps, mark, K"::") + end + if peek(ps) == K"where" + parse_where_chain(ps, mark) + end end # flisp: (define (disallowed-space-error lno ex t) @@ -611,9 +727,11 @@ function disallowed_space_error(lno, ex, t) TODO("disallowed_space_error unimplemented") end -# flisp: (define (disallow-space s ex t) -function disallow_space(s, ex, t) - TODO("disallow_space unimplemented") +# flisp: disallow-space +function disallow_space(ps, t) + if t.had_whitespace + emit_diagnostic(ps, mark, "space disallowed before $t") + end end # string macro suffix for given delimiter t @@ -624,8 +742,31 @@ function macsuffix(t) end # flisp: (define (parse-call-chain s ex macrocall?) -function parse_call_chain(ps::ParseState, ex, is_macrocall) - TODO("parse_call_chain unimplemented") +function parse_call_chain(ps::ParseState, mark, is_macrocall) + TODO("parse_call_chain") + while true + t = peek_token(ps) + k = kind(t) + if (ps.space_sensitive && t.had_whitespace && + k in (K"(", K"[", K"{", K"'", K"\"", K"\\")) || + (is_number(k) && k == K"(") + # 2(...) is multiply, not call + # FIXME: Is this `break` correct ? + break + end + if k == K"(" + disallow_space(ps, t) + bump(ps, TRIVIA_FLAG) + parse_call_arglist(ps, K")") + elseif k == K"[" + elseif k == K"." + elseif k == K"'" + elseif k == K"{" + elseif k in (K"\"", K"`") + else + break + end + end end # flisp: (define (expect-end s word) @@ -1009,7 +1150,8 @@ end # # flisp: (define (parse-atom s (checked #t)) function parse_atom(ps::ParseState; checked=true) - TODO("parse_atom unimplemented") + bumpTODO(ps) + #TODO("parse_atom unimplemented") end # flisp: (define (valid-modref? e) diff --git a/JuliaSyntax/src/token_kinds.jl b/JuliaSyntax/src/token_kinds.jl index a1499e9bd6bbe..985f5fe6837ba 100644 --- a/JuliaSyntax/src/token_kinds.jl +++ b/JuliaSyntax/src/token_kinds.jl @@ -84,6 +84,10 @@ function is_both_unary_and_binary(t) K"+", K"-", K"⋆", K"±", K"∓") # dotop allowed end +function is_number(t) + kind(t) in (K"Integer", K"BinInt", K"HexInt", K"OctInt", K"Float") +end + """ Get the "binding power" (precedence level) of an operator kind """ From 2c69c56cd6e72314893b807f0d7042b4d69b43be Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Sat, 11 Dec 2021 15:20:02 +1000 Subject: [PATCH 0236/1109] Mention RSLint in README --- JuliaSyntax/README.md | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index ac7629baa930a..8f914c97baa40 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -225,6 +225,24 @@ Highlights: another flat stream of events." This seems great, let's adopt it! * TODO +## RSLint + +[RSLint](https://rslint.org/dev) is a linter for javascript, built in Rust. It +uses the same parsing infrastructure and green tree libraries `rust-analyzer`. +There's an excellent and friendly high level overview of how all this works in +the rslint [parsing devdocs](https://rslint.org/dev/parsing.html). + +Points of note: + +* Backtracking and restarting the parser on error is actually quite simple in + the architecture we (mostly) share with `rust-analyzer`: + > ... events allow us to cheaply backtrack the parser by simply draining + > the events and resetting the token source cursor back to some place. + +* The section on [error + recovery](https://rslint.org/dev/parsing.html#error-recovery) is interesting; + they talk about various error recovery strategies. + ## Diagnostics Rust is renowned for having great compiler diagnostics, so it's probably a good From 7f57e4a683cafae68ee2c2aeee09dbcbfe0957a9 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Sat, 11 Dec 2021 15:21:42 +1000 Subject: [PATCH 0237/1109] Copy parser micro tests from comments to test suite Fix s-expression printing for SyntaxNode to deal with a wider range of heads. Also various small fixes to micro tests and a bit of trivial code movement. --- JuliaSyntax/src/parser.jl | 187 +++++++++++++---------- JuliaSyntax/src/source_files.jl | 8 +- JuliaSyntax/src/syntax_tree.jl | 25 +-- JuliaSyntax/src/token_kinds.jl | 8 +- JuliaSyntax/test/parser.jl | 101 ++++++++++++ JuliaSyntax/test/runtests.jl | 2 + JuliaSyntax/test/syntax_interpolation.jl | 2 +- 7 files changed, 234 insertions(+), 99 deletions(-) create mode 100644 JuliaSyntax/test/parser.jl diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index d421c5e58d3ee..a2c6258288225 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -2,6 +2,19 @@ # Parser Utils +# Like flisp: require-token +# +# * Skips newlines searching for the next token +# * Emits an error node if we've hit the end of the input +function peek_token_or_emit_incomplete(ps::ParseState, k, flags, mark) + t = peek_token(ps, skip_newlines=true) + if kind(t) == K"EndMarker" + emit(ps, mark, k, flags, + error="incomplete: premature end of input") + end + return t +end + function TODO(str) error("TODO: $str") end @@ -55,7 +68,7 @@ end # parse left-to-right binary operator # produces structures like (+ (+ (+ 2 3) 4) 5) # -# flisp: (define-macro (parse-LtoR s down ops) +# flisp: parse-LtoR function parse_LtoR(ps::ParseState, down, is_op) mark = position(ps) down(ps) @@ -68,8 +81,8 @@ end # parse right-to-left binary operator # produces structures like (= a (= b (= c d))) -# (define-macro (parse-RtoL s down ops syntactic self) -# flisp: +# +# flisp: parse-RtoL function parse_RtoL(ps::ParseState, down, is_op, syntactic, self) mark = position(ps) down(ps) @@ -260,6 +273,7 @@ end # flisp: (define (parse-cond s) function parse_cond(ps::ParseState) + cond_kind = K"if" mark = position(ps) parse_arrow(ps) t = peek_token(ps) @@ -273,7 +287,8 @@ function parse_cond(ps::ParseState) cond_flags |= ERROR_FLAG end bump(ps, TRIVIA_FLAG) # ? - t = peek_token(ps, skip_newlines=true) + t = peek_token_or_emit_incomplete(ps, cond_kind, cond_flags, mark) + kind(t) == K"EndMarker" && return if !t.had_whitespace # a ?b : c emit_diagnostic(ps, error="space required after `?` operator") @@ -293,7 +308,8 @@ function parse_cond(ps::ParseState) cond_flags |= ERROR_FLAG end bump(ps, TRIVIA_FLAG) # : - t = peek_token(ps, skip_newlines=true) + t = peek_token_or_emit_incomplete(ps, cond_kind, cond_flags, mark) + kind(t) == K"EndMarker" && return if !t.had_whitespace # a ? b :c emit_diagnostic(ps, error="space required after `:` in `?` expression") @@ -306,27 +322,29 @@ end # Parse arrows # x → y ==> (call-i x → y) # x <--> y ==> (call-i x <--> y) -# x --> y ==> (x --> y) # The only syntactic arrow +# x --> y ==> (--> x y) # The only syntactic arrow # # flisp: parse-arrow function parse_arrow(ps::ParseState) parse_RtoL(ps, parse_or, is_prec_arrow, ==(K"-->"), parse_arrow) end -# x || y || z ==> (call-i x || (call-i y || z)) +# x || y || z ==> (|| x (|| y z)) # # flisp: parse-or function parse_or(ps::ParseState) parse_RtoL(ps, parse_and, is_prec_lazy_or, true, parse_or) end +# x && y && z ==> (&& x (&& y z)) +# # flisp: parse-and function parse_and(ps::ParseState) parse_RtoL(ps, parse_comparison, is_prec_lazy_and, true, parse_and) end # Parse comparison chains like -# x > y ==> (call-i > x y) +# x > y ==> (call-i x > y) # x < y < z ==> (comparison x < y < z) # x == y < z ==> (comparison x == y < z) # @@ -344,7 +362,7 @@ function parse_comparison(ps::ParseState) end if n_comparisons == 1 if initial_kind in (K"<:", K">:") - # Type comparisons are syntactic and have their kind encoded in the head + # Type comparisons are syntactic # x <: y ==> (<: x y) # x >: y ==> (>: x y) reset_token!(ps, op_pos, flags=TRIVIA_FLAG) @@ -353,17 +371,17 @@ function parse_comparison(ps::ParseState) emit(ps, mark, K"call", INFIX_FLAG) end elseif n_comparisons > 1 - emit(ps, mark, K"comparison", INFIX_FLAG) + emit(ps, mark, K"comparison") end end -# x |> y |> z ==> ((x |> y) |> z) +# x <| y <| z ==> (call-i x <| (call-i y <| z)) # flisp: parse-pipe< function parse_pipe_lt(ps::ParseState) parse_RtoL(ps, parse_pipe_gt, is_prec_pipe_lt, false, parse_pipe_lt) end -# x <| y <| z ==> (x <| (y <| z)) +# x |> y |> z ==> (call-i (call-i x |> y) |> z) # flisp: parse-pipe> function parse_pipe_gt(ps::ParseState) parse_LtoR(ps, parse_range, is_prec_pipe_gt) @@ -388,7 +406,6 @@ function parse_range(ps::ParseState) parse_expr(ps) emit(ps, mark, K"call", INFIX_FLAG) elseif initial_kind == K":" && ps.range_colon_enabled - # a ? b : c ==> (if a b c) # a ? b : c:d ==> (if a b (call-i c : d)) n_colons = 0 while peek(ps) == K":" @@ -462,7 +479,6 @@ function parse_chain(ps::ParseState, down, op_kind) end # Parse left to right, combining any of `chain_ops` into one call -# a - b - c ==> (call-i (call-i a - b) - c) # # flisp: parse-with-chains function parse_with_chains(ps::ParseState, down, is_op, chain_ops) @@ -483,19 +499,24 @@ function parse_with_chains(ps::ParseState, down, is_op, chain_ops) down(ps) if kind(t) in chain_ops && !is_decorated(t) # a + b + c ==> (call-i a + b c) - # a +₁ b +₁ c ==> (call-i (call-i a +₁ b) +₁ c) - # a .+ b .+ c ==> (call-i (call-i a .+ b) .+ c) parse_chain(ps, down, kind(t)) end + # a +₁ b +₁ c ==> (call-i (call-i a +₁ b) +₁ c) + # a .+ b .+ c ==> (call-i (call-i a .+ b) .+ c) emit(ps, mark, K"call", INFIX_FLAG) end end +# a - b - c ==> (call-i (call-i a - b) - c) +# a + b + c ==> (call-i a + b c) +# # flisp: parse-expr function parse_expr(ps::ParseState) parse_with_chains(ps, parse_term, is_prec_plus, (K"+", K"++")) end +# a * b * c ==> (call-i a * b c) +# # flisp: parse-term function parse_term(ps::ParseState) parse_with_chains(ps, parse_rational, is_prec_times, (K"*",)) @@ -515,6 +536,9 @@ end # # flisp: parse-unary-subtype function parse_unary_subtype(ps::ParseState) + k = peek(ps, skip_newlines=true) + if k == K"EndMarker" + end parse_where(ps, parse_juxtapose) #TODO("parse_unary_subtype unimplemented") end @@ -571,7 +595,8 @@ end # flisp: (define (parse-unary s) function parse_unary(ps::ParseState) - TODO("parse_unary unimplemented") + bumpTODO(ps) + #TODO("parse_unary unimplemented") end # flisp: (define (fix-syntactic-unary e) @@ -614,8 +639,8 @@ function parse_factor_after(ps::ParseState) end # Parse type declarations and lambda syntax -# a->b ==> (-> a b) # a::b ==> (:: a b) +# a->b ==> (-> a b) # # flisp: parse-decl function parse_decl(ps::ParseState) @@ -653,7 +678,7 @@ function parse_call(ps::ParseState) end # flisp: (define (parse-call-with-initial-ex s ex tok) -function parse_call_with_initial_ex(ps::ParseState, ex, tok) +function parse_call_with_initial_ex(ps::ParseState, mark) k = peek(ps) if is_initial_reserved_word(ps, k) || k in (K"mutable", K"primitive", K"abstract") parse_resword(ps, mark) @@ -743,6 +768,7 @@ end # flisp: (define (parse-call-chain s ex macrocall?) function parse_call_chain(ps::ParseState, mark, is_macrocall) + bumpTODO(ps); return TODO("parse_call_chain") while true t = peek_token(ps) @@ -1152,6 +1178,58 @@ end function parse_atom(ps::ParseState; checked=true) bumpTODO(ps) #TODO("parse_atom unimplemented") + #= + tok = require_token(ps) + tok_kind = kind(tok) + # TODO: Reorder these to put most likely tokens first + if tok_kind == K":" # symbol/expression quote + take_token!(ps) + next = peek_token(ps) + if is_closing_token(ps, next) && (kind(next) != K"Keyword" || + has_whitespace_prefix(next)) + return GreenNode(tok) + elseif has_whitespace_prefix(next) + error("whitespace not allowed after \":\" used for quoting") + elseif kind(next) == K"NewlineWs" + error("newline not allowed after \":\" used for quoting") + else + # Being inside quote makes `end` non-special again. issue #27690 + ps1 = ParseState(ps, end_symbol=false) + return GreenNode(K"quote", parse_atom(ps1, checked=false)) + end + elseif tok_kind == K"=" # misplaced = + error("unexpected `=`") + elseif tok_kind == K"Identifier" + if checked + TODO("Checked identifier names") + end + take_token!(ps) + return GreenNode(tok) + elseif tok_kind == K"VarIdentifier" + take_token!(ps) + return GreenNode(tok) + elseif tok_kind == K"(" # parens or tuple + take_token!(ps) + return parse_paren(ps, checked) + elseif tok_kind == K"[" # cat expression + # NB: Avoid take_token! here? It's better to not consume tokens early + # take_token!(ps) + vex = parse_cat(ps, tok, K"]", ps.end_symbol) + elseif tok_kind == K"{" # cat expression + take_token!(ps) + TODO("""parse_cat(ps, K"}", )""") + elseif tok_kind == K"`" + TODO("(macrocall (core @cmd) ...)") + # return Expr(:macrocall, Expr(:core, Symbol("@cmd")), + elseif isliteral(tok_kind) + take_token!(ps) + return GreenNode(tok) + elseif is_closing_token(tok) + error("unexpected: $tok") + else + error("invalid syntax: `$tok`") + end + =# end # flisp: (define (valid-modref? e) @@ -1196,6 +1274,8 @@ function parse_docstring(ps::ParseState, down=parse_eq) # TODO? This is not quite equivalent to the flisp parser which accepts # more than just a string. For example: #! ("doc") foo ==> (macrocall core_@doc "doc" foo) + # TODO: Also, all these TOMBSTONEs are inefficient. Perhaps we can improve + # things? maybe_doc = peek(ps) in (K"String", K"TripleString") atdoc_mark = bump_invisible(ps) down(ps) @@ -1213,7 +1293,7 @@ function parse_docstring(ps::ParseState, down=parse_eq) # === # "doc" # foo - # ===> (macrocall core_@doc "doc" foo) + # ==> (macrocall core_@doc "doc" foo) bump(ps, TRIVIA_FLAG) # NewlineWs end end @@ -1226,6 +1306,9 @@ function parse_docstring(ps::ParseState, down=parse_eq) end +#------------------------------------------------------------------------------- +# Parser entry points + """ parse_all(input) @@ -1263,67 +1346,3 @@ function parse_all(code, args...) return parse_all(ParseState(stream), args...) end -#------------------------------------------------------------------------------- -#------------------------------------------------------------------------------- - - -#= - -# Parse numbers, identifiers, parenthesized expressions, lists, vectors, etc. -function parse_atom(ps::ParseState; checked::Bool=true)::GreenNode - tok = require_token(ps) - tok_kind = kind(tok) - # TODO: Reorder these to put most likely tokens first - if tok_kind == K":" # symbol/expression quote - take_token!(ps) - next = peek_token(ps) - if is_closing_token(ps, next) && (kind(next) != K"Keyword" || - has_whitespace_prefix(next)) - return GreenNode(tok) - elseif has_whitespace_prefix(next) - error("whitespace not allowed after \":\" used for quoting") - elseif kind(next) == K"NewlineWs" - error("newline not allowed after \":\" used for quoting") - else - # Being inside quote makes `end` non-special again. issue #27690 - ps1 = ParseState(ps, end_symbol=false) - return GreenNode(K"quote", parse_atom(ps1, checked=false)) - end - elseif tok_kind == K"=" # misplaced = - error("unexpected `=`") - elseif tok_kind == K"Identifier" - if checked - TODO("Checked identifier names") - end - take_token!(ps) - return GreenNode(tok) - elseif tok_kind == K"VarIdentifier" - take_token!(ps) - return GreenNode(tok) - elseif tok_kind == K"(" # parens or tuple - take_token!(ps) - return parse_paren(ps, checked) - elseif tok_kind == K"[" # cat expression - # NB: Avoid take_token! here? It's better to not consume tokens early - # take_token!(ps) - vex = parse_cat(ps, tok, K"]", ps.end_symbol) - elseif tok_kind == K"{" # cat expression - take_token!(ps) - TODO("""parse_cat(ps, K"}", )""") - elseif tok_kind == K"`" - TODO("(macrocall (core @cmd) ...)") - # return Expr(:macrocall, Expr(:core, Symbol("@cmd")), - elseif isliteral(tok_kind) - take_token!(ps) - return GreenNode(tok) - elseif is_closing_token(tok) - error("unexpected: $tok") - else - error("invalid syntax: `$tok`") - end -end - -#------------------------------------------------------------------------------- - -=# - diff --git a/JuliaSyntax/src/source_files.jl b/JuliaSyntax/src/source_files.jl index 0d914445668f4..e16d548202296 100644 --- a/JuliaSyntax/src/source_files.jl +++ b/JuliaSyntax/src/source_files.jl @@ -52,7 +52,13 @@ function Base.show(io::IO, ::MIME"text/plain", source::SourceFile) end function Base.getindex(source::SourceFile, rng::AbstractRange) - @view source.code[rng] + i = first(rng) + # Convert byte range into unicode String character range. + # Assumes valid unicode! (SubString doesn't give us a reliable way to opt + # out of the valid unicode check. The SubString{String} inner constructor + # has some @boundscheck, but using @inbounds depends on inlining choices.) + j = prevind(source.code, last(rng)+1) + @view source.code[i:j] end function Base.getindex(source::SourceFile, i::Int) diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index 32b9943966399..b49a02c01dbcc 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -68,20 +68,17 @@ function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, position::In val = unescape_string(source[position+1:position+span(raw)-2]) elseif isoperator(k) val = Symbol(val_str) + elseif k == K"core_@doc" + val = GlobalRef(Core, :var"@doc") else error("Can't parse literal of kind $k") end return SyntaxNode(source, raw, position, nothing, :leaf, val) else k = kind(raw) - head = k == K"call" ? :call : - k == K"toplevel" ? :toplevel : - k == K"block" ? :block : - k == K"for" ? :for : - k == K"=" ? :(=) : - k == K"$" ? :$ : - k == K"quote" ? :quote : - error("Unknown head of kind $k") + str = untokenize(k) + head = !isnothing(str) ? Symbol(str) : + error("Can't untokenize head of kind $k") cs = SyntaxNode[] pos = position for (i,rawchild) in enumerate(children(raw)) @@ -119,7 +116,7 @@ function _show_syntax_node(io, current_filename, node, indent) fname = node.source.filename #@info "" fname print_fname current_filename[] line, col = source_location(node.source, node.position) - posstr = "$(lpad(line, 4)):$(rpad(col,3))│$(lpad(node.position,6)):$(rpad(node.position+span(node),6))│" + posstr = "$(lpad(line, 4)):$(rpad(col,3))│$(lpad(node.position,6)):$(rpad(node.position+span(node)-1,6))│" nodestr = !haschildren(node) ? repr(node.val) : "[$(_kind_str(kind(node.raw)))]" @@ -139,7 +136,7 @@ function _show_syntax_node(io, current_filename, node, indent) end end -function _show_syntax_node_compact(io, node) +function _show_syntax_node_sexpr(io, node) if !haschildren(node) print(io, repr(node.val)) else @@ -147,7 +144,7 @@ function _show_syntax_node_compact(io, node) first = true for n in children(node) first || print(io, ' ') - _show_syntax_node_compact(io, n) + _show_syntax_node_sexpr(io, n) first = false end print(io, ')') @@ -159,8 +156,12 @@ function Base.show(io::IO, ::MIME"text/plain", node::SyntaxNode) _show_syntax_node(io, Ref{Union{Nothing,String}}(nothing), node, "") end +function Base.show(io::IO, ::MIME"text/x.sexpression", node::SyntaxNode) + _show_syntax_node_sexpr(io, node) +end + function Base.show(io::IO, node::SyntaxNode) - _show_syntax_node_compact(io, node) + _show_syntax_node_sexpr(io, node) end function Base.push!(node::SyntaxNode, child::SyntaxNode) diff --git a/JuliaSyntax/src/token_kinds.jl b/JuliaSyntax/src/token_kinds.jl index 985f5fe6837ba..57b3717d08d6a 100644 --- a/JuliaSyntax/src/token_kinds.jl +++ b/JuliaSyntax/src/token_kinds.jl @@ -1012,6 +1012,12 @@ for kw in split("""abstract baremodule begin break catch const continue do else elseif end export finally for function global if import let local macro module mutable new outer primitive quote - return struct try type using while""") + return struct try type using while + + block call comparison curly string macrocall + toplevel tuple ref vect braces bracescat hcat + vcat ncat typed_hcat typed_vcat typed_ncat generator + flatten comprehension typed_comprehension + """) _kind_to_str[getfield(Kinds, Symbol(kw))] = kw end diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl new file mode 100644 index 0000000000000..a009570dee9d8 --- /dev/null +++ b/JuliaSyntax/test/parser.jl @@ -0,0 +1,101 @@ +function test_parse(production, code) + stream = ParseStream(code) + production(JuliaSyntax.ParseState(stream)) + t = JuliaSyntax.to_raw_tree(stream) + @test Text(sprint(JuliaSyntax.show_diagnostics, stream, code)) == Text("") + s = SyntaxNode(SourceFile(code), t) + sprint(show, MIME("text/x.sexpression"), s) +end + +# TODO: +# * Extract the following test cases from the source itself. +# * Use only the green tree to generate the S-expressions +# (add flag annotations to heads) +tests = [ + JuliaSyntax.parse_block => [ + "a;b;c" => "(block :a :b :c)" + "a;;;b;;" => "(block :a :b)" + "a\nb" => "(block :a :b)" + ], + JuliaSyntax.parse_stmts => [ + "a;b;c" => "(toplevel :a :b :c)" + "a;;;b;;" => "(toplevel :a :b)" + ], + JuliaSyntax.parse_cond => [ + "a ? b : c" => "(if :a :b :c)" + #"a ?\nb : c" => "(if :a :b :c)" + #"a ? b :\nc" => "(if :a :b :c)" + ], + JuliaSyntax.parse_arrow => [ + "x → y" => "(call :→ :x :y)" + "x <--> y" => "(call :<--> :x :y)" + "x --> y" => "(--> :x :y)" + ], + JuliaSyntax.parse_or => [ + "x || y || z" => "(|| :x (|| :y :z))" + ], + JuliaSyntax.parse_and => [ + "x && y && z" => "(&& :x (&& :y :z))" + ], + JuliaSyntax.parse_comparison => [ + "x > y" => "(call :> :x :y)" + "x < y < z" => "(comparison :x :< :y :< :z)" + "x == y < z" => "(comparison :x :(==) :y :< :z)" + "x <: y" => "(<: :x :y)" + "x >: y" => "(>: :x :y)" + ], + JuliaSyntax.parse_pipe_lt => [ + "x <| y <| z" => "(call :<| :x (call :<| :y :z))" + ], + JuliaSyntax.parse_pipe_gt => [ + "x |> y |> z" => "(call :|> (call :|> :x :y) :z)" + ], + JuliaSyntax.parse_range => [ + "1:2" => "(call :(:) 1 2)" + "1:2:3" => "(call :(:) 1 2 3)" + "a:b:c:d:e" => "(call :(:) (call :(:) :a :b :c) :d :e)" + ], + JuliaSyntax.parse_range => [ + "a..b" => "(call :.. :a :b)" + "a … b" => "(call :… :a :b)" + # a ? b : c:d ==> (if a b (call-i c : d)) + # [1 :a] ==> (vcat 1 (quote a)) + # [1 2:3 :a] ==> (vcat 1 (call-i 2 : 3) (quote a)) + "x..." => "(... :x)" + "x:y..." => "(... (call :(:) :x :y))" + "x..y..." => "(... (call :.. :x :y))" + ], + JuliaSyntax.parse_expr => [ + # "[x +y]" ==> "(hcat x (call + y))" + # [x+y +z] ==> (hcat (call-i x + y) (call + z)) + # Conversely + # [x+y+z] ==> (hcat (call-i x + y z)) + # [x+y + z] ==> (hcat (call-i x + y z)) + "a - b - c" => "(call :- (call :- :a :b) :c)" + "a + b + c" => "(call :+ :a :b :c)" + "a +₁ b +₁ c" => "(call :+₁ (call :+₁ :a :b) :c)" + "a .+ b .+ c" => "(call :.+ (call :.+ :a :b) :c)" + ], + JuliaSyntax.parse_term => [ + "a * b * c" => "(call :* :a :b :c)" + ], + JuliaSyntax.parse_decl => [ + #"a::b" => "(:: a b)" + #"a->b" => "(-> a b)" + ], + JuliaSyntax.parse_unary_prefix => [ + #"&a" => "(& :a)" + #"::a" => "(:: :a)" + #"\$a" => "(\$ :a)" + ], + JuliaSyntax.parse_docstring => [ + "\"doc\" foo" => "(macrocall :(Core.var\"@doc\") \"doc\" :foo)" + ], +] + +@testset "$production" for (production, test_specs) in tests + for (input,output) in test_specs + @test test_parse(production, input) == output + end +end + diff --git a/JuliaSyntax/test/runtests.jl b/JuliaSyntax/test/runtests.jl index cdc15ad370e2b..855e8445ab4c5 100644 --- a/JuliaSyntax/test/runtests.jl +++ b/JuliaSyntax/test/runtests.jl @@ -10,6 +10,7 @@ using JuliaSyntax: GreenNode, SyntaxNode, using JuliaSyntax: Kind, @K_str, isliteral, iskeyword, isoperator using JuliaSyntax: highlight using JuliaSyntax: ParseStream, bump, peek, emit +using JuliaSyntax: ParseState # Shortcuts for defining raw syntax nodes @@ -26,3 +27,4 @@ include("syntax_trees.jl") include("syntax_interpolation.jl") include("parse_stream.jl") include("simple_parser.jl") +include("parser.jl") diff --git a/JuliaSyntax/test/syntax_interpolation.jl b/JuliaSyntax/test/syntax_interpolation.jl index ecbe3fe061b02..3c5658e5b79f1 100644 --- a/JuliaSyntax/test/syntax_interpolation.jl +++ b/JuliaSyntax/test/syntax_interpolation.jl @@ -21,7 +21,7 @@ end # so we need to hand construct all our trees. function at_show2(ex::SyntaxNode) code = String(read(@__FILE__)) - name = sprint(JuliaSyntax._show_syntax_node_compact, ex) + name = sprint(show, MIME"text/x.sexpression"(), ex) # The following quote block is not used directly, but the text for it is # re-read from `code`. quote_begin = (@__LINE__) + 1 From 4da543b4f6ccc7c83eafa333a97a379663421900 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Sat, 11 Dec 2021 21:20:41 +1000 Subject: [PATCH 0238/1109] Rename internal TextSpan -> TaggedRange TextSpan wasn't a very good name as it includes an expression head in addition to the range of bytes (which is a range, not just a width) --- JuliaSyntax/src/parse_stream.jl | 39 ++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index d5ebd54939d9b..c060e981093ea 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -39,25 +39,28 @@ Base.:(~)(k::Kind, tok::SyntaxToken) = kind(tok) == k #------------------------------------------------------------------------------- -struct TextSpan +# Range in the source text which will become a node in the tree. Can be either +# a token (leaf node of the tree) or an interior node, depending on how nodes +# overlap. +struct TaggedRange head::SyntaxHead first_byte::Int last_byte::Int end -function TextSpan(raw::RawToken, flags::RawFlags) - TextSpan(SyntaxHead(raw.kind, flags), raw.startbyte + 1, raw.endbyte + 1) +function TaggedRange(raw::RawToken, flags::RawFlags) + TaggedRange(SyntaxHead(raw.kind, flags), raw.startbyte + 1, raw.endbyte + 1) end -head(text_span::TextSpan) = text_span.head -kind(text_span::TextSpan) = kind(text_span.head) -flags(text_span::TextSpan) = flags(text_span.head) -first_byte(text_span::TextSpan) = text_span.first_byte -last_byte(text_span::TextSpan) = text_span.last_byte -span(text_span::TextSpan) = last_byte(text_span) - first_byte(text_span) + 1 +head(text_span::TaggedRange) = text_span.head +kind(text_span::TaggedRange) = kind(text_span.head) +flags(text_span::TaggedRange) = flags(text_span.head) +first_byte(text_span::TaggedRange) = text_span.first_byte +last_byte(text_span::TaggedRange) = text_span.last_byte +span(text_span::TaggedRange) = last_byte(text_span) - first_byte(text_span) + 1 struct Diagnostic - text_span::TextSpan + text_span::TaggedRange message::String end @@ -83,7 +86,7 @@ This is simililar to rust-analyzer's mutable struct ParseStream lexer::Tokenize.Lexers.Lexer{IOBuffer,RawToken} lookahead::Vector{SyntaxToken} - spans::Vector{TextSpan} + spans::Vector{TaggedRange} diagnostics::Vector{Diagnostic} # First byte of next token next_byte::Int @@ -95,7 +98,7 @@ function ParseStream(code) lexer = Tokenize.tokenize(code, RawToken) ParseStream(lexer, Vector{SyntaxToken}(), - Vector{TextSpan}(), + Vector{TaggedRange}(), Vector{Diagnostic}(), 1, 0) @@ -184,7 +187,7 @@ function bump(stream::ParseStream, flags=EMPTY_FLAGS, skip_newlines=false) is_skipped_ws = k ∈ (K"Whitespace", K"Comment") || (k == K"NewlineWs" && skip_newlines) f = is_skipped_ws ? TRIVIA_FLAG : flags - span = TextSpan(SyntaxHead(kind(tok), f), first_byte(tok), last_byte(tok)) + span = TaggedRange(SyntaxHead(kind(tok), f), first_byte(tok), last_byte(tok)) push!(stream.spans, span) end Base._deletebeg!(stream.lookahead, n) @@ -213,7 +216,7 @@ function reset_token!(stream::ParseStream, mark; text_span = stream.spans[mark] k = isnothing(kind) ? (@__MODULE__).kind(text_span) : kind f = isnothing(flags) ? (@__MODULE__).flags(text_span) : flags - stream.spans[mark] = TextSpan(SyntaxHead(k, f), + stream.spans[mark] = TaggedRange(SyntaxHead(k, f), first_byte(text_span), last_byte(text_span)) end @@ -251,7 +254,7 @@ function emit(stream::ParseStream, start_mark::Integer, kind::Kind, if !isnothing(error) flags |= ERROR_FLAG end - text_span = TextSpan(SyntaxHead(kind, flags), start_mark, stream.next_byte-1) + text_span = TaggedRange(SyntaxHead(kind, flags), start_mark, stream.next_byte-1) if !isnothing(error) push!(stream.diagnostics, Diagnostic(text_span, error)) end @@ -266,7 +269,7 @@ function emit_diagnostic(stream::ParseStream, mark=nothing; error) byte = first_byte(peek_token(stream)) mark = isnothing(mark) ? byte : mark # It's a bit weird to require supplying a SyntaxHead here... - text_span = TextSpan(SyntaxHead(K"Error", EMPTY_FLAGS), mark, byte) + text_span = TaggedRange(SyntaxHead(K"Error", EMPTY_FLAGS), mark, byte) push!(stream.diagnostics, Diagnostic(text_span, error)) end @@ -276,7 +279,7 @@ end # Note that this is largely independent of GreenNode, and could easily be # made completely independent with a tree builder interface. -function _push_node!(stack, text_span::TextSpan, children=nothing) +function _push_node!(stack, text_span::TaggedRange, children=nothing) if isnothing(children) node = GreenNode(head(text_span), span(text_span)) push!(stack, (text_span=text_span, node=node)) @@ -287,7 +290,7 @@ function _push_node!(stack, text_span::TextSpan, children=nothing) end function to_raw_tree(st) - stack = Vector{@NamedTuple{text_span::TextSpan,node::GreenNode}}() + stack = Vector{@NamedTuple{text_span::TaggedRange,node::GreenNode}}() for text_span in st.spans if kind(text_span) == K"TOMBSTONE" # Ignore invisible tokens which were created but never finalized. From 7d8d294297550d37fb0c0e6a3417733a3f105014 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Sun, 12 Dec 2021 06:51:30 +1000 Subject: [PATCH 0239/1109] Remove Kinds module as it seems the string macro is sufficient This also removes a lot of use of var"" which can only be a good thing! --- JuliaSyntax/src/JuliaSyntax.jl | 2 +- JuliaSyntax/src/token_kinds.jl | 1795 ++++++++++------------ JuliaSyntax/src/tokens.jl | 107 ++ JuliaSyntax/test/syntax_interpolation.jl | 20 +- JuliaSyntax/test/syntax_trees.jl | 18 +- 5 files changed, 950 insertions(+), 992 deletions(-) create mode 100644 JuliaSyntax/src/tokens.jl diff --git a/JuliaSyntax/src/JuliaSyntax.jl b/JuliaSyntax/src/JuliaSyntax.jl index 6239cd268b486..1baf970536cff 100644 --- a/JuliaSyntax/src/JuliaSyntax.jl +++ b/JuliaSyntax/src/JuliaSyntax.jl @@ -10,7 +10,7 @@ include("source_files.jl") include("green_tree.jl") -include("token_kinds.jl") +include("tokens.jl") include("syntax_tree.jl") include("parse_stream.jl") diff --git a/JuliaSyntax/src/token_kinds.jl b/JuliaSyntax/src/token_kinds.jl index 57b3717d08d6a..4af753bce456c 100644 --- a/JuliaSyntax/src/token_kinds.jl +++ b/JuliaSyntax/src/token_kinds.jl @@ -1,1012 +1,863 @@ - -#= -@enum(SyntaxKind, - Call -) - -# A type to multiplex token kinds from various libraries -struct Kind - id::UInt32 -end - -Kind(k::TzTokens.Kind) = Kind(0x00010000 | UInt32(k)) -Kind(k::SyntaxKind) = Kind(0x00020000 | UInt32(k)) - -_kind_namespace(k::Kind) = k.id >> 16 -_kind_code(k::Kind) = k.id & 0xffff - -function Base.show(io::IO, k::Kind) - ns = _kind_namespace(k) - code = _kind_code(k) - if ns == 1 - # Basic token kinds from Tokenize - print(io, Kind, "(") - show(io, Tokenize.Tokens.Kind(code)) - print(io, ")") - elseif ns == 2 - # Syntax node kinds, defined here - print(io, Kind, "(") - show(io, SyntaxKind(code)) - print(io, ")") - else - print(io, typeof(Kind), "(", k.id, ")") - end -end - -function Base.:(==)(k1::Kind, k2::Kind) - k1.id == k2.id -end -=# - -using Tokenize.Tokens: Kind, isliteral, iskeyword, isoperator - -""" - K"s" - -The full kind of a string "s". For example, K")" is the kind of the -right parenthesis token. -""" -macro K_str(str) - name = Symbol(str) - return :(Kinds.$name) -end - -kind(k::Kind) = k -kind(raw::TzTokens.RawToken) = TzTokens.exactkind(raw) - -# Predicates for operator precedence -is_prec_assignment(t) = K"BEGIN_ASSIGNMENTS" < kind(t) < K"END_ASSIGNMENTS" -is_prec_pair(t) = K"BEGIN_PAIRARROW" < kind(t) < K"END_PAIRARROW" -is_prec_conditional(t) = K"BEGIN_CONDITIONAL" < kind(t) < K"END_CONDITIONAL" -is_prec_arrow(t) = K"BEGIN_ARROW" < kind(t) < K"END_ARROW" -is_prec_lazy_or(t) = K"BEGIN_LAZYOR" < kind(t) < K"END_LAZYOR" -is_prec_lazy_and(t) = K"BEGIN_LAZYAND" < kind(t) < K"END_LAZYAND" -is_prec_comparison(t) = K"BEGIN_COMPARISON" < kind(t) < K"END_COMPARISON" -is_prec_pipe(t) = K"BEGIN_PIPE" < kind(t) < K"END_PIPE" -is_prec_colon(t) = K"BEGIN_COLON" < kind(t) < K"END_COLON" -is_prec_plus(t) = K"BEGIN_PLUS" < kind(t) < K"END_PLUS" -is_prec_bitshift(t) = K"BEGIN_BITSHIFTS" < kind(t) < K"END_BITSHIFTS" -is_prec_times(t) = K"BEGIN_TIMES" < kind(t) < K"END_TIMES" -is_prec_rational(t) = K"BEGIN_RATIONAL" < kind(t) < K"END_RATIONAL" -is_prec_power(t) = K"BEGIN_POWER" < kind(t) < K"END_POWER" -is_prec_decl(t) = K"BEGIN_DECL" < kind(t) < K"END_DECL" -is_prec_where(t) = K"BEGIN_WHERE" < kind(t) < K"END_WHERE" -is_prec_dot(t) = K"BEGIN_DOT" < kind(t) < K"END_DOT" -is_prec_unicode_ops(t) = K"BEGIN_UNICODE_OPS" < kind(t) < K"END_UNICODE_OPS" - -is_prec_pipe_lt(t) = kind(t) == K"<|" -is_prec_pipe_gt(t) = kind(t) == K"|>" - -# Operators which are boty unary and binary -function is_both_unary_and_binary(t) - # TODO: Do we need to check dotop as well here? - kind(t) in (K"$", K"&", K"~", # <- dotop disallowed? - K"+", K"-", K"⋆", K"±", K"∓") # dotop allowed -end - -function is_number(t) - kind(t) in (K"Integer", K"BinInt", K"HexInt", K"OctInt", K"Float") -end - -""" -Get the "binding power" (precedence level) of an operator kind -""" -function binding_power(k::Kind) - return k < K"END_ASSIGNMENTS" ? 1 : - k < K"END_CONDITIONAL" ? 2 : - k < K"END_ARROW" ? 3 : - k < K"END_LAZYOR" ? 4 : - k < K"END_LAZYAND" ? 5 : - k < K"END_COMPARISON" ? 6 : - k < K"END_PIPE" ? 7 : - k < K"END_COLON" ? 8 : - k < K"END_PLUS" ? 9 : - k < K"END_BITSHIFTS" ? 10 : - k < K"END_TIMES" ? 11 : - k < K"END_RATIONAL" ? 12 : - k < K"END_POWER" ? 13 : - k < K"END_DECL" ? 14 : - k < K"END_WHERE" ? 15 : - k < K"END_DOT" ? 16 : - k < K"END_OPS" ? 17 : # ?? unary ops - error("Not an operator") -end - -function _kind_str(k::Kind) - u = untokenize(k) - return !isnothing(u) ? u : - k in (K"Identifier", K"VarIdentifier") ? "Identifier" : - isliteral(k) ? "Literal" : - k == K"Comment" ? "Comment" : - k == K"Whitespace" ? "Whitespace" : - k == K"NewlineWs" ? "NewlineWs" : - lowercase(string(k)) -end - -""" -Return the string representation of a token kind, or `nothing` if the kind -represents a class of tokens like K"Identifier". -""" -function untokenize(k::Kind) - get(_kind_to_str, k, nothing) -end - -""" -A module to giving literal names to token kinds - -Rules: -* Kinds which correspond to exactly one textural form are represented with that - text. This includes keywords like K"for" and operators like K"*". -* Kinds which represent many textural forms have UpperCamelCase names. This - includes kinds like K"Identifier" and K"Comment". -""" -baremodule Kinds - -import ..JuliaSyntax: JuliaSyntax, Kind - -import Tokenize - -macro _K(sym) - :(Tokenize.Tokens.$sym) -# :(Kind(Tokenize.Tokens.$sym)) -end - -const EndMarker = @_K ENDMARKER -const Error = @_K ERROR -const Comment = @_K COMMENT -const Whitespace = @_K WHITESPACE -const Identifier = @_K IDENTIFIER -const VarIdentifier = @_K VAR_IDENTIFIER -const var"@" = @_K AT_SIGN -const var"," = @_K COMMA -const var";" = @_K SEMICOLON - -const BEGIN_KEYWORDS = @_K begin_keywords -const Keyword = @_K KEYWORD -const var"abstract" = @_K ABSTRACT -const var"baremodule" = @_K BAREMODULE -const var"begin" = @_K BEGIN -const var"break" = @_K BREAK -const var"catch" = @_K CATCH -const var"const" = @_K CONST -const var"continue" = @_K CONTINUE -const var"do" = @_K DO -const var"else" = @_K ELSE -const var"elseif" = @_K ELSEIF -const var"end" = @_K END -const var"export" = @_K EXPORT -const var"finally" = @_K FINALLY -const var"for" = @_K FOR -const var"function" = @_K FUNCTION -const var"global" = @_K GLOBAL -const var"if" = @_K IF -const var"import" = @_K IMPORT -const var"let" = @_K LET -const var"local" = @_K LOCAL -const var"macro" = @_K MACRO -const var"module" = @_K MODULE -const var"mutable" = @_K MUTABLE -const var"new" = @_K NEW -const var"outer" = @_K OUTER -const var"primitive" = @_K PRIMITIVE -const var"quote" = @_K QUOTE -const var"return" = @_K RETURN -const var"struct" = @_K STRUCT -const var"try" = @_K TRY -const var"type" = @_K TYPE -const var"using" = @_K USING -const var"while" = @_K WHILE -const END_KEYWORDS = @_K end_keywords - -const BEGIN_CSTPARSER = @_K begin_cstparser -const InvisibleBrackets = @_K INVISIBLE_BRACKETS -const Nothing = @_K NOTHING -const Ws = @_K WS -const SemicolonWs = @_K SEMICOLON_WS -const NewlineWs = @_K NEWLINE_WS -const EmptyWs = @_K EMPTY_WS -const END_CSTPARSER = @_K end_cstparser - -const BEGIN_LITERAL = @_K begin_literal -const Literal = @_K LITERAL -const Integer = @_K INTEGER -const BinInt = @_K BIN_INT -const HexInt = @_K HEX_INT -const OctInt = @_K OCT_INT -const Float = @_K FLOAT -const String = @_K STRING -const TripleString = @_K TRIPLE_STRING -const Char = @_K CHAR -const Cmd = @_K CMD -const TripleCmd = @_K TRIPLE_CMD -const var"true" = @_K TRUE -const var"false" = @_K FALSE -const END_LITERAL = @_K end_literal - -const BEGIN_DELIMITERS = @_K begin_delimiters -const var"[" = @_K LSQUARE -const var"]" = @_K RSQUARE -const var"{" = @_K LBRACE -const var"}" = @_K RBRACE -const var"(" = @_K LPAREN -const var")" = @_K RPAREN -const END_DELIMITERS = @_K end_delimiters - -const BEGIN_OPS = @_K begin_ops -const OP = @_K OP -const var"..." = @_K DDDOT +# Mapping from token string identifiers to enumeration values as used in @K_str + +const _str_to_kind = let Ts = TzTokens +Dict([ +"EndMarker" => Ts.ENDMARKER +"Error" => Ts.ERROR +"Comment" => Ts.COMMENT +"Whitespace" => Ts.WHITESPACE +"Identifier" => Ts.IDENTIFIER +"VarIdentifier" => Ts.VAR_IDENTIFIER +"@" => Ts.AT_SIGN +"," => Ts.COMMA +";" => Ts.SEMICOLON + +"BEGIN_KEYWORDS" => Ts.begin_keywords +"Keyword" => Ts.KEYWORD +"abstract" => Ts.ABSTRACT +"baremodule" => Ts.BAREMODULE +"begin" => Ts.BEGIN +"break" => Ts.BREAK +"catch" => Ts.CATCH +"const" => Ts.CONST +"continue" => Ts.CONTINUE +"do" => Ts.DO +"else" => Ts.ELSE +"elseif" => Ts.ELSEIF +"end" => Ts.END +"export" => Ts.EXPORT +"finally" => Ts.FINALLY +"for" => Ts.FOR +"function" => Ts.FUNCTION +"global" => Ts.GLOBAL +"if" => Ts.IF +"import" => Ts.IMPORT +"let" => Ts.LET +"local" => Ts.LOCAL +"macro" => Ts.MACRO +"module" => Ts.MODULE +"mutable" => Ts.MUTABLE +"new" => Ts.NEW +"outer" => Ts.OUTER +"primitive" => Ts.PRIMITIVE +"quote" => Ts.QUOTE +"return" => Ts.RETURN +"struct" => Ts.STRUCT +"try" => Ts.TRY +"type" => Ts.TYPE +"using" => Ts.USING +"while" => Ts.WHILE +"END_KEYWORDS" => Ts.end_keywords + +"BEGIN_CSTPARSER" => Ts.begin_cstparser +"InvisibleBrackets" => Ts.INVISIBLE_BRACKETS +"Nothing" => Ts.NOTHING +"Ws" => Ts.WS +"SemicolonWs" => Ts.SEMICOLON_WS +"NewlineWs" => Ts.NEWLINE_WS +"EmptyWs" => Ts.EMPTY_WS +"END_CSTPARSER" => Ts.end_cstparser + +"BEGIN_LITERAL" => Ts.begin_literal +"Literal" => Ts.LITERAL +"Integer" => Ts.INTEGER +"BinInt" => Ts.BIN_INT +"HexInt" => Ts.HEX_INT +"OctInt" => Ts.OCT_INT +"Float" => Ts.FLOAT +"String" => Ts.STRING +"TripleString" => Ts.TRIPLE_STRING +"Char" => Ts.CHAR +"Cmd" => Ts.CMD +"TripleCmd" => Ts.TRIPLE_CMD +"true" => Ts.TRUE +"false" => Ts.FALSE +"END_LITERAL" => Ts.end_literal + +"BEGIN_DELIMITERS" => Ts.begin_delimiters +"[" => Ts.LSQUARE +"]" => Ts.RSQUARE +"{" => Ts.LBRACE +"}" => Ts.RBRACE +"(" => Ts.LPAREN +")" => Ts.RPAREN +"END_DELIMITERS" => Ts.end_delimiters + +"BEGIN_OPS" => Ts.begin_ops +"OP" => Ts.OP +"..." => Ts.DDDOT # Level 1 -const BEGIN_ASSIGNMENTS = @_K begin_assignments -const var"=" = @_K EQ -const var"+=" = @_K PLUS_EQ -const var"-=" = @_K MINUS_EQ -const var"*=" = @_K STAR_EQ -const var"/=" = @_K FWD_SLASH_EQ -const var"//=" = @_K FWDFWD_SLASH_EQ -const var"|=" = @_K OR_EQ -const var"^=" = @_K CIRCUMFLEX_EQ -const var"÷=" = @_K DIVISION_EQ -const var"%=" = @_K REM_EQ -const var"<<=" = @_K LBITSHIFT_EQ -const var">>=" = @_K RBITSHIFT_EQ -const var">>>=" = @_K UNSIGNED_BITSHIFT_EQ -const var"\=" = @_K BACKSLASH_EQ -const var"&=" = @_K AND_EQ -const var":=" = @_K COLON_EQ -const var"~" = @_K APPROX -const var"$=" = @_K EX_OR_EQ -const var"⊻=" = @_K XOR_EQ -const END_ASSIGNMENTS = @_K end_assignments - -const BEGIN_PAIRARROW = @_K begin_pairarrow -const var"=>" = @_K PAIR_ARROW -const END_PAIRARROW = @_K end_pairarrow +"BEGIN_ASSIGNMENTS" => Ts.begin_assignments +"=" => Ts.EQ +"+=" => Ts.PLUS_EQ +"-=" => Ts.MINUS_EQ +"*=" => Ts.STAR_EQ +"/=" => Ts.FWD_SLASH_EQ +"//=" => Ts.FWDFWD_SLASH_EQ +"|=" => Ts.OR_EQ +"^=" => Ts.CIRCUMFLEX_EQ +"÷=" => Ts.DIVISION_EQ +"%=" => Ts.REM_EQ +"<<=" => Ts.LBITSHIFT_EQ +">>=" => Ts.RBITSHIFT_EQ +">>>=" => Ts.UNSIGNED_BITSHIFT_EQ +"\\=" => Ts.BACKSLASH_EQ +"&=" => Ts.AND_EQ +":=" => Ts.COLON_EQ +"~" => Ts.APPROX +"\$=" => Ts.EX_OR_EQ +"⊻=" => Ts.XOR_EQ +"END_ASSIGNMENTS" => Ts.end_assignments + +"BEGIN_PAIRARROW" => Ts.begin_pairarrow +"=>Ts." => Ts.PAIR_ARROW +"END_PAIRARROW" => Ts.end_pairarrow # Level 2 -const BEGIN_CONDITIONAL = @_K begin_conditional -const var"?" = @_K CONDITIONAL -const END_CONDITIONAL = @_K end_conditional +"BEGIN_CONDITIONAL" => Ts.begin_conditional +"?" => Ts.CONDITIONAL +"END_CONDITIONAL" => Ts.end_conditional # Level 3 -const BEGIN_ARROW = @_K begin_arrow -const var"-->" = @_K RIGHT_ARROW -const var"<--" = @_K LEFT_ARROW -const var"<-->" = @_K DOUBLE_ARROW -const var"←" = @_K LEFTWARDS_ARROW -const var"→" = @_K RIGHTWARDS_ARROW -const var"↔" = @_K LEFT_RIGHT_ARROW -const var"↚" = @_K LEFTWARDS_ARROW_WITH_STROKE -const var"↛" = @_K RIGHTWARDS_ARROW_WITH_STROKE -const var"↞" = @_K LEFTWARDS_TWO_HEADED_ARROW -const var"↠" = @_K RIGHTWARDS_TWO_HEADED_ARROW -const var"↢" = @_K LEFTWARDS_ARROW_WITH_TAIL -const var"↣" = @_K RIGHTWARDS_ARROW_WITH_TAIL -const var"↤" = @_K LEFTWARDS_ARROW_FROM_BAR -const var"↦" = @_K RIGHTWARDS_ARROW_FROM_BAR -const var"↮" = @_K LEFT_RIGHT_ARROW_WITH_STROKE -const var"⇎" = @_K LEFT_RIGHT_DOUBLE_ARROW_WITH_STROKE -const var"⇍" = @_K LEFTWARDS_DOUBLE_ARROW_WITH_STROKE -const var"⇏" = @_K RIGHTWARDS_DOUBLE_ARROW_WITH_STROKE -const var"⇐" = @_K LEFTWARDS_DOUBLE_ARROW -const var"⇒" = @_K RIGHTWARDS_DOUBLE_ARROW -const var"⇔" = @_K LEFT_RIGHT_DOUBLE_ARROW -const var"⇴" = @_K RIGHT_ARROW_WITH_SMALL_CIRCLE -const var"⇶" = @_K THREE_RIGHTWARDS_ARROWS -const var"⇷" = @_K LEFTWARDS_ARROW_WITH_VERTICAL_STROKE -const var"⇸" = @_K RIGHTWARDS_ARROW_WITH_VERTICAL_STROKE -const var"⇹" = @_K LEFT_RIGHT_ARROW_WITH_VERTICAL_STROKE -const var"⇺" = @_K LEFTWARDS_ARROW_WITH_DOUBLE_VERTICAL_STROKE -const var"⇻" = @_K RIGHTWARDS_ARROW_WITH_DOUBLE_VERTICAL_STROKE -const var"⇼" = @_K LEFT_RIGHT_ARROW_WITH_DOUBLE_VERTICAL_STROKE -const var"⇽" = @_K LEFTWARDS_OPEN_HEADED_ARROW -const var"⇾" = @_K RIGHTWARDS_OPEN_HEADED_ARROW -const var"⇿" = @_K LEFT_RIGHT_OPEN_HEADED_ARROW -const var"⟵" = @_K LONG_LEFTWARDS_ARROW -const var"⟶" = @_K LONG_RIGHTWARDS_ARROW -const var"⟷" = @_K LONG_LEFT_RIGHT_ARROW -const var"⟹" = @_K LONG_RIGHTWARDS_DOUBLE_ARROW -const var"⟺" = @_K LONG_LEFT_RIGHT_DOUBLE_ARROW -const var"⟻" = @_K LONG_LEFTWARDS_ARROW_FROM_BAR -const var"⟼" = @_K LONG_RIGHTWARDS_ARROW_FROM_BAR -const var"⟽" = @_K LONG_LEFTWARDS_DOUBLE_ARROW_FROM_BAR -const var"⟾" = @_K LONG_RIGHTWARDS_DOUBLE_ARROW_FROM_BAR -const var"⟿" = @_K LONG_RIGHTWARDS_SQUIGGLE_ARROW -const var"⤀" = @_K RIGHTWARDS_TWO_HEADED_ARROW_WITH_VERTICAL_STROKE -const var"⤁" = @_K RIGHTWARDS_TWO_HEADED_ARROW_WITH_DOUBLE_VERTICAL_STROKE -const var"⤂" = @_K LEFTWARDS_DOUBLE_ARROW_WITH_VERTICAL_STROKE -const var"⤃" = @_K RIGHTWARDS_DOUBLE_ARROW_WITH_VERTICAL_STROKE -const var"⤄" = @_K LEFT_RIGHT_DOUBLE_ARROW_WITH_VERTICAL_STROKE -const var"⤅" = @_K RIGHTWARDS_TWO_HEADED_ARROW_FROM_BAR -const var"⤆" = @_K LEFTWARDS_DOUBLE_ARROW_FROM_BAR -const var"⤇" = @_K RIGHTWARDS_DOUBLE_ARROW_FROM_BAR -const var"⤌" = @_K LEFTWARDS_DOUBLE_DASH_ARROW -const var"⤍" = @_K RIGHTWARDS_DOUBLE_DASH_ARROW -const var"⤎" = @_K LEFTWARDS_TRIPLE_DASH_ARROW -const var"⤏" = @_K RIGHTWARDS_TRIPLE_DASH_ARROW -const var"⤐" = @_K RIGHTWARDS_TWO_HEADED_TRIPLE_DASH_ARROW -const var"⤑" = @_K RIGHTWARDS_ARROW_WITH_DOTTED_STEM -const var"⤔" = @_K RIGHTWARDS_ARROW_WITH_TAIL_WITH_VERTICAL_STROKE -const var"⤕" = @_K RIGHTWARDS_ARROW_WITH_TAIL_WITH_DOUBLE_VERTICAL_STROKE -const var"⤖" = @_K RIGHTWARDS_TWO_HEADED_ARROW_WITH_TAIL -const var"⤗" = @_K RIGHTWARDS_TWO_HEADED_ARROW_WITH_TAIL_WITH_VERTICAL_STROKE -const var"⤘" = @_K RIGHTWARDS_TWO_HEADED_ARROW_WITH_TAIL_WITH_DOUBLE_VERTICAL_STROKE -const var"⤝" = @_K LEFTWARDS_ARROW_TO_BLACK_DIAMOND -const var"⤞" = @_K RIGHTWARDS_ARROW_TO_BLACK_DIAMOND -const var"⤟" = @_K LEFTWARDS_ARROW_FROM_BAR_TO_BLACK_DIAMOND -const var"⤠" = @_K RIGHTWARDS_ARROW_FROM_BAR_TO_BLACK_DIAMOND -const var"⥄" = @_K SHORT_RIGHTWARDS_ARROW_ABOVE_LEFTWARDS_ARROW -const var"⥅" = @_K RIGHTWARDS_ARROW_WITH_PLUS_BELOW -const var"⥆" = @_K LEFTWARDS_ARROW_WITH_PLUS_BELOW -const var"⥇" = @_K RIGHTWARDS_ARROW_THROUGH_X -const var"⥈" = @_K LEFT_RIGHT_ARROW_THROUGH_SMALL_CIRCLE -const var"⥊" = @_K LEFT_BARB_UP_RIGHT_BARB_DOWN_HARPOON -const var"⥋" = @_K LEFT_BARB_DOWN_RIGHT_BARB_UP_HARPOON -const var"⥎" = @_K LEFT_BARB_UP_RIGHT_BARB_UP_HARPOON -const var"⥐" = @_K LEFT_BARB_DOWN_RIGHT_BARB_DOWN_HARPOON -const var"⥒" = @_K LEFTWARDS_HARPOON_WITH_BARB_UP_TO_BAR -const var"⥓" = @_K RIGHTWARDS_HARPOON_WITH_BARB_UP_TO_BAR -const var"⥖" = @_K LEFTWARDS_HARPOON_WITH_BARB_DOWN_TO_BAR -const var"⥗" = @_K RIGHTWARDS_HARPOON_WITH_BARB_DOWN_TO_BAR -const var"⥚" = @_K LEFTWARDS_HARPOON_WITH_BARB_UP_FROM_BAR -const var"⥛" = @_K RIGHTWARDS_HARPOON_WITH_BARB_UP_FROM_BAR -const var"⥞" = @_K LEFTWARDS_HARPOON_WITH_BARB_DOWN_FROM_BAR -const var"⥟" = @_K RIGHTWARDS_HARPOON_WITH_BARB_DOWN_FROM_BAR -const var"⥢" = @_K LEFTWARDS_HARPOON_WITH_BARB_UP_ABOVE_LEFTWARDS_HARPOON_WITH_BARB_DOWN -const var"⥤" = @_K RIGHTWARDS_HARPOON_WITH_BARB_UP_ABOVE_RIGHTWARDS_HARPOON_WITH_BARB_DOWN -const var"⥦" = @_K LEFTWARDS_HARPOON_WITH_BARB_UP_ABOVE_RIGHTWARDS_HARPOON_WITH_BARB_UP -const var"⥧" = @_K LEFTWARDS_HARPOON_WITH_BARB_DOWN_ABOVE_RIGHTWARDS_HARPOON_WITH_BARB_DOWN -const var"⥨" = @_K RIGHTWARDS_HARPOON_WITH_BARB_UP_ABOVE_LEFTWARDS_HARPOON_WITH_BARB_UP -const var"⥩" = @_K RIGHTWARDS_HARPOON_WITH_BARB_DOWN_ABOVE_LEFTWARDS_HARPOON_WITH_BARB_DOWN -const var"⥪" = @_K LEFTWARDS_HARPOON_WITH_BARB_UP_ABOVE_LONG_DASH -const var"⥫" = @_K LEFTWARDS_HARPOON_WITH_BARB_DOWN_BELOW_LONG_DASH -const var"⥬" = @_K RIGHTWARDS_HARPOON_WITH_BARB_UP_ABOVE_LONG_DASH -const var"⥭" = @_K RIGHTWARDS_HARPOON_WITH_BARB_DOWN_BELOW_LONG_DASH -const var"⥰" = @_K RIGHT_DOUBLE_ARROW_WITH_ROUNDED_HEAD -const var"⧴" = @_K RULE_DELAYED -const var"⬱" = @_K THREE_LEFTWARDS_ARROWS -const var"⬰" = @_K LEFT_ARROW_WITH_SMALL_CIRCLE -const var"⬲" = @_K LEFT_ARROW_WITH_CIRCLED_PLUS -const var"⬳" = @_K LONG_LEFTWARDS_SQUIGGLE_ARROW -const var"⬴" = @_K LEFTWARDS_TWO_HEADED_ARROW_WITH_VERTICAL_STROKE -const var"⬵" = @_K LEFTWARDS_TWO_HEADED_ARROW_WITH_DOUBLE_VERTICAL_STROKE -const var"⬶" = @_K LEFTWARDS_TWO_HEADED_ARROW_FROM_BAR -const var"⬷" = @_K LEFTWARDS_TWO_HEADED_TRIPLE_DASH_ARROW -const var"⬸" = @_K LEFTWARDS_ARROW_WITH_DOTTED_STEM -const var"⬹" = @_K LEFTWARDS_ARROW_WITH_TAIL_WITH_VERTICAL_STROKE -const var"⬺" = @_K LEFTWARDS_ARROW_WITH_TAIL_WITH_DOUBLE_VERTICAL_STROKE -const var"⬻" = @_K LEFTWARDS_TWO_HEADED_ARROW_WITH_TAIL -const var"⬼" = @_K LEFTWARDS_TWO_HEADED_ARROW_WITH_TAIL_WITH_VERTICAL_STROKE -const var"⬽" = @_K LEFTWARDS_TWO_HEADED_ARROW_WITH_TAIL_WITH_DOUBLE_VERTICAL_STROKE -const var"⬾" = @_K LEFTWARDS_ARROW_THROUGH_X -const var"⬿" = @_K WAVE_ARROW_POINTING_DIRECTLY_LEFT -const var"⭀" = @_K EQUALS_SIGN_ABOVE_LEFTWARDS_ARROW -const var"⭁" = @_K REVERSE_TILDE_OPERATOR_ABOVE_LEFTWARDS_ARROW -const var"⭂" = @_K LEFTWARDS_ARROW_ABOVE_REVERSE_ALMOST_EQUAL_TO -const var"⭃" = @_K RIGHTWARDS_ARROW_THROUGH_GREATER_THAN -const var"⭄" = @_K RIGHTWARDS_ARROW_THROUGH_SUPERSET -const var"⭇" = @_K REVERSE_TILDE_OPERATOR_ABOVE_RIGHTWARDS_ARROW -const var"⭈" = @_K RIGHTWARDS_ARROW_ABOVE_REVERSE_ALMOST_EQUAL_TO -const var"⭉" = @_K TILDE_OPERATOR_ABOVE_LEFTWARDS_ARROW -const var"⭊" = @_K LEFTWARDS_ARROW_ABOVE_ALMOST_EQUAL_TO -const var"⭋" = @_K LEFTWARDS_ARROW_ABOVE_REVERSE_TILDE_OPERATOR -const var"⭌" = @_K RIGHTWARDS_ARROW_ABOVE_REVERSE_TILDE_OPERATOR -const var"←" = @_K HALFWIDTH_LEFTWARDS_ARROW -const var"→" = @_K HALFWIDTH_RIGHTWARDS_ARROW -const var"↻" = @_K CIRCLE_ARROW_RIGHT -const var"⇜" = @_K LEFT_SQUIGGLE_ARROW -const var"⇝" = @_K RIGHT_SQUIGGLE_ARROW -const var"↜" = @_K LEFT_WAVE_ARROW -const var"↝" = @_K RIGHT_WAVE_ARROW -const var"↩" = @_K LEFTWARDS_ARROW_WITH_HOOK -const var"↪" = @_K RIGHTWARDS_ARROW_WITH_HOOK -const var"↫" = @_K LOOP_ARROW_LEFT -const var"↬" = @_K LOOP_ARROW_RIGHT -const var"↼" = @_K LEFT_HARPOON_UP -const var"↽" = @_K LEFT_HARPOON_DOWN -const var"⇀" = @_K RIGHT_HARPOON_UP -const var"⇁" = @_K RIGHT_HARPOON_DOWN -const var"⇄" = @_K RIGHT_LEFT_ARROWS -const var"⇆" = @_K LEFT_RIGHT_ARROWS -const var"⇇" = @_K LEFT_LEFT_ARROWS -const var"⇉" = @_K RIGHT_RIGHT_ARROWS -const var"⇋" = @_K LEFT_RIGHT_HARPOONS -const var"⇌" = @_K RIGHT_LEFT_HARPOONS -const var"⇚" = @_K L_LEFT_ARROW -const var"⇛" = @_K R_RIGHT_ARROW -const var"⇠" = @_K LEFT_DASH_ARROW -const var"⇢" = @_K RIGHT_DASH_ARROW -const var"↷" = @_K CURVE_ARROW_RIGHT -const var"↶" = @_K CURVE_ARROW_LEFT -const var"↺" = @_K CIRCLE_ARROW_LEFT -const END_ARROW = @_K end_arrow +"BEGIN_ARROW" => Ts.begin_arrow +"-->" => Ts.RIGHT_ARROW +"<--" => Ts.LEFT_ARROW +"<-->" => Ts.DOUBLE_ARROW +"←" => Ts.LEFTWARDS_ARROW +"→" => Ts.RIGHTWARDS_ARROW +"↔" => Ts.LEFT_RIGHT_ARROW +"↚" => Ts.LEFTWARDS_ARROW_WITH_STROKE +"↛" => Ts.RIGHTWARDS_ARROW_WITH_STROKE +"↞" => Ts.LEFTWARDS_TWO_HEADED_ARROW +"↠" => Ts.RIGHTWARDS_TWO_HEADED_ARROW +"↢" => Ts.LEFTWARDS_ARROW_WITH_TAIL +"↣" => Ts.RIGHTWARDS_ARROW_WITH_TAIL +"↤" => Ts.LEFTWARDS_ARROW_FROM_BAR +"↦" => Ts.RIGHTWARDS_ARROW_FROM_BAR +"↮" => Ts.LEFT_RIGHT_ARROW_WITH_STROKE +"⇎" => Ts.LEFT_RIGHT_DOUBLE_ARROW_WITH_STROKE +"⇍" => Ts.LEFTWARDS_DOUBLE_ARROW_WITH_STROKE +"⇏" => Ts.RIGHTWARDS_DOUBLE_ARROW_WITH_STROKE +"⇐" => Ts.LEFTWARDS_DOUBLE_ARROW +"⇒" => Ts.RIGHTWARDS_DOUBLE_ARROW +"⇔" => Ts.LEFT_RIGHT_DOUBLE_ARROW +"⇴" => Ts.RIGHT_ARROW_WITH_SMALL_CIRCLE +"⇶" => Ts.THREE_RIGHTWARDS_ARROWS +"⇷" => Ts.LEFTWARDS_ARROW_WITH_VERTICAL_STROKE +"⇸" => Ts.RIGHTWARDS_ARROW_WITH_VERTICAL_STROKE +"⇹" => Ts.LEFT_RIGHT_ARROW_WITH_VERTICAL_STROKE +"⇺" => Ts.LEFTWARDS_ARROW_WITH_DOUBLE_VERTICAL_STROKE +"⇻" => Ts.RIGHTWARDS_ARROW_WITH_DOUBLE_VERTICAL_STROKE +"⇼" => Ts.LEFT_RIGHT_ARROW_WITH_DOUBLE_VERTICAL_STROKE +"⇽" => Ts.LEFTWARDS_OPEN_HEADED_ARROW +"⇾" => Ts.RIGHTWARDS_OPEN_HEADED_ARROW +"⇿" => Ts.LEFT_RIGHT_OPEN_HEADED_ARROW +"⟵" => Ts.LONG_LEFTWARDS_ARROW +"⟶" => Ts.LONG_RIGHTWARDS_ARROW +"⟷" => Ts.LONG_LEFT_RIGHT_ARROW +"⟹" => Ts.LONG_RIGHTWARDS_DOUBLE_ARROW +"⟺" => Ts.LONG_LEFT_RIGHT_DOUBLE_ARROW +"⟻" => Ts.LONG_LEFTWARDS_ARROW_FROM_BAR +"⟼" => Ts.LONG_RIGHTWARDS_ARROW_FROM_BAR +"⟽" => Ts.LONG_LEFTWARDS_DOUBLE_ARROW_FROM_BAR +"⟾" => Ts.LONG_RIGHTWARDS_DOUBLE_ARROW_FROM_BAR +"⟿" => Ts.LONG_RIGHTWARDS_SQUIGGLE_ARROW +"⤀" => Ts.RIGHTWARDS_TWO_HEADED_ARROW_WITH_VERTICAL_STROKE +"⤁" => Ts.RIGHTWARDS_TWO_HEADED_ARROW_WITH_DOUBLE_VERTICAL_STROKE +"⤂" => Ts.LEFTWARDS_DOUBLE_ARROW_WITH_VERTICAL_STROKE +"⤃" => Ts.RIGHTWARDS_DOUBLE_ARROW_WITH_VERTICAL_STROKE +"⤄" => Ts.LEFT_RIGHT_DOUBLE_ARROW_WITH_VERTICAL_STROKE +"⤅" => Ts.RIGHTWARDS_TWO_HEADED_ARROW_FROM_BAR +"⤆" => Ts.LEFTWARDS_DOUBLE_ARROW_FROM_BAR +"⤇" => Ts.RIGHTWARDS_DOUBLE_ARROW_FROM_BAR +"⤌" => Ts.LEFTWARDS_DOUBLE_DASH_ARROW +"⤍" => Ts.RIGHTWARDS_DOUBLE_DASH_ARROW +"⤎" => Ts.LEFTWARDS_TRIPLE_DASH_ARROW +"⤏" => Ts.RIGHTWARDS_TRIPLE_DASH_ARROW +"⤐" => Ts.RIGHTWARDS_TWO_HEADED_TRIPLE_DASH_ARROW +"⤑" => Ts.RIGHTWARDS_ARROW_WITH_DOTTED_STEM +"⤔" => Ts.RIGHTWARDS_ARROW_WITH_TAIL_WITH_VERTICAL_STROKE +"⤕" => Ts.RIGHTWARDS_ARROW_WITH_TAIL_WITH_DOUBLE_VERTICAL_STROKE +"⤖" => Ts.RIGHTWARDS_TWO_HEADED_ARROW_WITH_TAIL +"⤗" => Ts.RIGHTWARDS_TWO_HEADED_ARROW_WITH_TAIL_WITH_VERTICAL_STROKE +"⤘" => Ts.RIGHTWARDS_TWO_HEADED_ARROW_WITH_TAIL_WITH_DOUBLE_VERTICAL_STROKE +"⤝" => Ts.LEFTWARDS_ARROW_TO_BLACK_DIAMOND +"⤞" => Ts.RIGHTWARDS_ARROW_TO_BLACK_DIAMOND +"⤟" => Ts.LEFTWARDS_ARROW_FROM_BAR_TO_BLACK_DIAMOND +"⤠" => Ts.RIGHTWARDS_ARROW_FROM_BAR_TO_BLACK_DIAMOND +"⥄" => Ts.SHORT_RIGHTWARDS_ARROW_ABOVE_LEFTWARDS_ARROW +"⥅" => Ts.RIGHTWARDS_ARROW_WITH_PLUS_BELOW +"⥆" => Ts.LEFTWARDS_ARROW_WITH_PLUS_BELOW +"⥇" => Ts.RIGHTWARDS_ARROW_THROUGH_X +"⥈" => Ts.LEFT_RIGHT_ARROW_THROUGH_SMALL_CIRCLE +"⥊" => Ts.LEFT_BARB_UP_RIGHT_BARB_DOWN_HARPOON +"⥋" => Ts.LEFT_BARB_DOWN_RIGHT_BARB_UP_HARPOON +"⥎" => Ts.LEFT_BARB_UP_RIGHT_BARB_UP_HARPOON +"⥐" => Ts.LEFT_BARB_DOWN_RIGHT_BARB_DOWN_HARPOON +"⥒" => Ts.LEFTWARDS_HARPOON_WITH_BARB_UP_TO_BAR +"⥓" => Ts.RIGHTWARDS_HARPOON_WITH_BARB_UP_TO_BAR +"⥖" => Ts.LEFTWARDS_HARPOON_WITH_BARB_DOWN_TO_BAR +"⥗" => Ts.RIGHTWARDS_HARPOON_WITH_BARB_DOWN_TO_BAR +"⥚" => Ts.LEFTWARDS_HARPOON_WITH_BARB_UP_FROM_BAR +"⥛" => Ts.RIGHTWARDS_HARPOON_WITH_BARB_UP_FROM_BAR +"⥞" => Ts.LEFTWARDS_HARPOON_WITH_BARB_DOWN_FROM_BAR +"⥟" => Ts.RIGHTWARDS_HARPOON_WITH_BARB_DOWN_FROM_BAR +"⥢" => Ts.LEFTWARDS_HARPOON_WITH_BARB_UP_ABOVE_LEFTWARDS_HARPOON_WITH_BARB_DOWN +"⥤" => Ts.RIGHTWARDS_HARPOON_WITH_BARB_UP_ABOVE_RIGHTWARDS_HARPOON_WITH_BARB_DOWN +"⥦" => Ts.LEFTWARDS_HARPOON_WITH_BARB_UP_ABOVE_RIGHTWARDS_HARPOON_WITH_BARB_UP +"⥧" => Ts.LEFTWARDS_HARPOON_WITH_BARB_DOWN_ABOVE_RIGHTWARDS_HARPOON_WITH_BARB_DOWN +"⥨" => Ts.RIGHTWARDS_HARPOON_WITH_BARB_UP_ABOVE_LEFTWARDS_HARPOON_WITH_BARB_UP +"⥩" => Ts.RIGHTWARDS_HARPOON_WITH_BARB_DOWN_ABOVE_LEFTWARDS_HARPOON_WITH_BARB_DOWN +"⥪" => Ts.LEFTWARDS_HARPOON_WITH_BARB_UP_ABOVE_LONG_DASH +"⥫" => Ts.LEFTWARDS_HARPOON_WITH_BARB_DOWN_BELOW_LONG_DASH +"⥬" => Ts.RIGHTWARDS_HARPOON_WITH_BARB_UP_ABOVE_LONG_DASH +"⥭" => Ts.RIGHTWARDS_HARPOON_WITH_BARB_DOWN_BELOW_LONG_DASH +"⥰" => Ts.RIGHT_DOUBLE_ARROW_WITH_ROUNDED_HEAD +"⧴" => Ts.RULE_DELAYED +"⬱" => Ts.THREE_LEFTWARDS_ARROWS +"⬰" => Ts.LEFT_ARROW_WITH_SMALL_CIRCLE +"⬲" => Ts.LEFT_ARROW_WITH_CIRCLED_PLUS +"⬳" => Ts.LONG_LEFTWARDS_SQUIGGLE_ARROW +"⬴" => Ts.LEFTWARDS_TWO_HEADED_ARROW_WITH_VERTICAL_STROKE +"⬵" => Ts.LEFTWARDS_TWO_HEADED_ARROW_WITH_DOUBLE_VERTICAL_STROKE +"⬶" => Ts.LEFTWARDS_TWO_HEADED_ARROW_FROM_BAR +"⬷" => Ts.LEFTWARDS_TWO_HEADED_TRIPLE_DASH_ARROW +"⬸" => Ts.LEFTWARDS_ARROW_WITH_DOTTED_STEM +"⬹" => Ts.LEFTWARDS_ARROW_WITH_TAIL_WITH_VERTICAL_STROKE +"⬺" => Ts.LEFTWARDS_ARROW_WITH_TAIL_WITH_DOUBLE_VERTICAL_STROKE +"⬻" => Ts.LEFTWARDS_TWO_HEADED_ARROW_WITH_TAIL +"⬼" => Ts.LEFTWARDS_TWO_HEADED_ARROW_WITH_TAIL_WITH_VERTICAL_STROKE +"⬽" => Ts.LEFTWARDS_TWO_HEADED_ARROW_WITH_TAIL_WITH_DOUBLE_VERTICAL_STROKE +"⬾" => Ts.LEFTWARDS_ARROW_THROUGH_X +"⬿" => Ts.WAVE_ARROW_POINTING_DIRECTLY_LEFT +"⭀" => Ts.EQUALS_SIGN_ABOVE_LEFTWARDS_ARROW +"⭁" => Ts.REVERSE_TILDE_OPERATOR_ABOVE_LEFTWARDS_ARROW +"⭂" => Ts.LEFTWARDS_ARROW_ABOVE_REVERSE_ALMOST_EQUAL_TO +"⭃" => Ts.RIGHTWARDS_ARROW_THROUGH_GREATER_THAN +"⭄" => Ts.RIGHTWARDS_ARROW_THROUGH_SUPERSET +"⭇" => Ts.REVERSE_TILDE_OPERATOR_ABOVE_RIGHTWARDS_ARROW +"⭈" => Ts.RIGHTWARDS_ARROW_ABOVE_REVERSE_ALMOST_EQUAL_TO +"⭉" => Ts.TILDE_OPERATOR_ABOVE_LEFTWARDS_ARROW +"⭊" => Ts.LEFTWARDS_ARROW_ABOVE_ALMOST_EQUAL_TO +"⭋" => Ts.LEFTWARDS_ARROW_ABOVE_REVERSE_TILDE_OPERATOR +"⭌" => Ts.RIGHTWARDS_ARROW_ABOVE_REVERSE_TILDE_OPERATOR +"←" => Ts.HALFWIDTH_LEFTWARDS_ARROW +"→" => Ts.HALFWIDTH_RIGHTWARDS_ARROW +"↻" => Ts.CIRCLE_ARROW_RIGHT +"⇜" => Ts.LEFT_SQUIGGLE_ARROW +"⇝" => Ts.RIGHT_SQUIGGLE_ARROW +"↜" => Ts.LEFT_WAVE_ARROW +"↝" => Ts.RIGHT_WAVE_ARROW +"↩" => Ts.LEFTWARDS_ARROW_WITH_HOOK +"↪" => Ts.RIGHTWARDS_ARROW_WITH_HOOK +"↫" => Ts.LOOP_ARROW_LEFT +"↬" => Ts.LOOP_ARROW_RIGHT +"↼" => Ts.LEFT_HARPOON_UP +"↽" => Ts.LEFT_HARPOON_DOWN +"⇀" => Ts.RIGHT_HARPOON_UP +"⇁" => Ts.RIGHT_HARPOON_DOWN +"⇄" => Ts.RIGHT_LEFT_ARROWS +"⇆" => Ts.LEFT_RIGHT_ARROWS +"⇇" => Ts.LEFT_LEFT_ARROWS +"⇉" => Ts.RIGHT_RIGHT_ARROWS +"⇋" => Ts.LEFT_RIGHT_HARPOONS +"⇌" => Ts.RIGHT_LEFT_HARPOONS +"⇚" => Ts.L_LEFT_ARROW +"⇛" => Ts.R_RIGHT_ARROW +"⇠" => Ts.LEFT_DASH_ARROW +"⇢" => Ts.RIGHT_DASH_ARROW +"↷" => Ts.CURVE_ARROW_RIGHT +"↶" => Ts.CURVE_ARROW_LEFT +"↺" => Ts.CIRCLE_ARROW_LEFT +"END_ARROW" => Ts.end_arrow # Level 4 -const BEGIN_LAZYOR = @_K begin_lazyor -const var"||" = @_K LAZY_OR -const END_LAZYOR = @_K end_lazyor +"BEGIN_LAZYOR" => Ts.begin_lazyor +"||" => Ts.LAZY_OR +"END_LAZYOR" => Ts.end_lazyor # Level 5 -const BEGIN_LAZYAND = @_K begin_lazyand -const var"&&" = @_K LAZY_AND -const END_LAZYAND = @_K end_lazyand +"BEGIN_LAZYAND" => Ts.begin_lazyand +"&&" => Ts.LAZY_AND +"END_LAZYAND" => Ts.end_lazyand # Level 6 -const BEGIN_COMPARISON = @_K begin_comparison -const var"<:" = @_K ISSUBTYPE -const var">:" = @_K ISSUPERTYPE -const var">" = @_K GREATER -const var"<" = @_K LESS -const var">=" = @_K GREATER_EQ -const var"≥" = @_K GREATER_THAN_OR_EQUAL_TO -const var"<=" = @_K LESS_EQ -const var"≤" = @_K LESS_THAN_OR_EQUAL_TO -const var"==" = @_K EQEQ -const var"===" = @_K EQEQEQ -const var"≡" = @_K IDENTICAL_TO -const var"!=" = @_K NOT_EQ -const var"≠" = @_K NOT_EQUAL_TO -const var"!==" = @_K NOT_IS -const var"≢" = @_K NOT_IDENTICAL_TO -const var"∈" = @_K ELEMENT_OF -const var"in" = @_K IN -const var"isa" = @_K ISA -const var"∉" = @_K NOT_AN_ELEMENT_OF -const var"∋" = @_K CONTAINS_AS_MEMBER -const var"∌" = @_K DOES_NOT_CONTAIN_AS_MEMBER -const var"⊆" = @_K SUBSET_OF_OR_EQUAL_TO -const var"⊈" = @_K NEITHER_A_SUBSET_OF_NOR_EQUAL_TO -const var"⊂" = @_K SUBSET_OF -const var"⊄" = @_K NOT_A_SUBSET_OF -const var"⊊" = @_K SUBSET_OF_WITH_NOT_EQUAL_TO -const var"∝" = @_K PROPORTIONAL_TO -const var"∊" = @_K SMALL_ELEMENT_OF -const var"∍" = @_K SMALL_CONTAINS_AS_MEMBER -const var"∥" = @_K PARALLEL_TO -const var"∦" = @_K NOT_PARALLEL_TO -const var"∷" = @_K PROPORTION -const var"∺" = @_K GEOMETRIC_PROPORTION -const var"∻" = @_K HOMOTHETIC -const var"∽" = @_K REVERSED_TILDE -const var"∾" = @_K INVERTED_LAZY_S -const var"≁" = @_K NOT_TILDE -const var"≃" = @_K ASYMPTOTICALLY_EQUAL_TO -const var"≄" = @_K NOT_ASYMPTOTICALLY_EQUAL_TO -const var"≅" = @_K APPROXIMATELY_EQUAL_TO -const var"≆" = @_K APPROXIMATELY_BUT_NOT_ACTUALLY_EQUAL_TO -const var"≇" = @_K NEITHER_APPROXIMATELY_NOR_ACTUALLY_EQUAL_TO -const var"≈" = @_K ALMOST_EQUAL_TO -const var"≉" = @_K NOT_ALMOST_EQUAL_TO -const var"≊" = @_K ALMOST_EQUAL_OR_EQUAL_TO -const var"≋" = @_K TRIPLE_TILDE -const var"≌" = @_K ALL_EQUAL_TO -const var"≍" = @_K EQUIVALENT_TO -const var"≎" = @_K GEOMETRICALLY_EQUIVALENT_TO -const var"≐" = @_K APPROACHES_THE_LIMIT -const var"≑" = @_K GEOMETRICALLY_EQUAL_TO -const var"≒" = @_K APPROXIMATELY_EQUAL_TO_OR_THE_IMAGE_OF -const var"≓" = @_K IMAGE_OF_OR_APPROXIMATELY_EQUAL_TO -const var"≔" = @_K COLON_EQUALS -const var"≕" = @_K EQUALS_COLON -const var"≖" = @_K RING_IN_EQUAL_TO -const var"≗" = @_K RING_EQUAL_TO -const var"≘" = @_K CORRESPONDS_TO -const var"≙" = @_K ESTIMATES -const var"≚" = @_K EQUIANGULAR_TO -const var"≛" = @_K STAR_EQUALS -const var"≜" = @_K DELTA_EQUAL_TO -const var"≝" = @_K EQUAL_TO_BY_DEFINITION -const var"≞" = @_K MEASURED_BY -const var"≟" = @_K QUESTIONED_EQUAL_TO -const var"≣" = @_K STRICTLY_EQUIVALENT_TO -const var"≦" = @_K LESS_THAN_OVER_EQUAL_TO -const var"≧" = @_K GREATER_THAN_OVER_EQUAL_TO -const var"≨" = @_K LESS_THAN_BUT_NOT_EQUAL_TO -const var"≩" = @_K GREATER_THAN_BUT_NOT_EQUAL_TO -const var"≪" = @_K MUCH_LESS_THAN -const var"≫" = @_K MUCH_GREATER_THAN -const var"≬" = @_K BETWEEN -const var"≭" = @_K NOT_EQUIVALENT_TO -const var"≮" = @_K NOT_LESS_THAN -const var"≯" = @_K NOT_GREATER_THAN -const var"≰" = @_K NEITHER_LESS_THAN_NOR_EQUAL_TO -const var"≱" = @_K NEITHER_GREATER_THAN_NOR_EQUAL_TO -const var"≲" = @_K LESS_THAN_OR_EQUIVALENT_TO -const var"≳" = @_K GREATER_THAN_OR_EQUIVALENT_TO -const var"≴" = @_K NEITHER_LESS_THAN_NOR_EQUIVALENT_TO -const var"≵" = @_K NEITHER_GREATER_THAN_NOR_EQUIVALENT_TO -const var"≶" = @_K LESS_THAN_OR_GREATER_THAN -const var"≷" = @_K GREATER_THAN_OR_LESS_THAN -const var"≸" = @_K NEITHER_LESS_THAN_NOR_GREATER_THAN -const var"≹" = @_K NEITHER_GREATER_THAN_NOR_LESS_THAN -const var"≺" = @_K PRECEDES -const var"≻" = @_K SUCCEEDS -const var"≼" = @_K PRECEDES_OR_EQUAL_TO -const var"≽" = @_K SUCCEEDS_OR_EQUAL_TO -const var"≾" = @_K PRECEDES_OR_EQUIVALENT_TO -const var"≿" = @_K SUCCEEDS_OR_EQUIVALENT_TO -const var"⊀" = @_K DOES_NOT_PRECEDE -const var"⊁" = @_K DOES_NOT_SUCCEED -const var"⊃" = @_K SUPERSET_OF -const var"⊅" = @_K NOT_A_SUPERSET_OF -const var"⊇" = @_K SUPERSET_OF_OR_EQUAL_TO -const var"⊉" = @_K NEITHER_A_SUPERSET_OF_NOR_EQUAL_TO -const var"⊋" = @_K SUPERSET_OF_WITH_NOT_EQUAL_TO -const var"⊏" = @_K SQUARE_IMAGE_OF -const var"⊐" = @_K SQUARE_ORIGINAL_OF -const var"⊑" = @_K SQUARE_IMAGE_OF_OR_EQUAL_TO -const var"⊒" = @_K SQUARE_ORIGINAL_OF_OR_EQUAL_TO -const var"⊜" = @_K CIRCLED_EQUALS -const var"⊩" = @_K FORCES -const var"⊬" = @_K DOES_NOT_PROVE -const var"⊮" = @_K DOES_NOT_FORCE -const var"⊰" = @_K PRECEDES_UNDER_RELATION -const var"⊱" = @_K SUCCEEDS_UNDER_RELATION -const var"⊲" = @_K NORMAL_SUBGROUP_OF -const var"⊳" = @_K CONTAINS_AS_NORMAL_SUBGROUP -const var"⊴" = @_K NORMAL_SUBGROUP_OF_OR_EQUAL_TO -const var"⊵" = @_K CONTAINS_AS_NORMAL_SUBGROUP_OR_EQUAL_TO -const var"⊶" = @_K ORIGINAL_OF -const var"⊷" = @_K IMAGE_OF -const var"⋍" = @_K REVERSED_TILDE_EQUALS -const var"⋐" = @_K DOUBLE_SUBSET -const var"⋑" = @_K DOUBLE_SUPERSET -const var"⋕" = @_K EQUAL_AND_PARALLEL_TO -const var"⋖" = @_K LESS_THAN_WITH_DOT -const var"⋗" = @_K GREATER_THAN_WITH_DOT -const var"⋘" = @_K VERY_MUCH_LESS_THAN -const var"⋙" = @_K VERY_MUCH_GREATER_THAN -const var"⋚" = @_K LESS_THAN_EQUAL_TO_OR_GREATER_THAN -const var"⋛" = @_K GREATER_THAN_EQUAL_TO_OR_LESS_THAN -const var"⋜" = @_K EQUAL_TO_OR_LESS_THAN -const var"⋝" = @_K EQUAL_TO_OR_GREATER_THAN -const var"⋞" = @_K EQUAL_TO_OR_PRECEDES -const var"⋟" = @_K EQUAL_TO_OR_SUCCEEDS -const var"⋠" = @_K DOES_NOT_PRECEDE_OR_EQUAL -const var"⋡" = @_K DOES_NOT_SUCCEED_OR_EQUAL -const var"⋢" = @_K NOT_SQUARE_IMAGE_OF_OR_EQUAL_TO -const var"⋣" = @_K NOT_SQUARE_ORIGINAL_OF_OR_EQUAL_TO -const var"⋤" = @_K SQUARE_IMAGE_OF_OR_NOT_EQUAL_TO -const var"⋥" = @_K SQUARE_ORIGINAL_OF_OR_NOT_EQUAL_TO -const var"⋦" = @_K LESS_THAN_BUT_NOT_EQUIVALENT_TO -const var"⋧" = @_K GREATER_THAN_BUT_NOT_EQUIVALENT_TO -const var"⋨" = @_K PRECEDES_BUT_NOT_EQUIVALENT_TO -const var"⋩" = @_K SUCCEEDS_BUT_NOT_EQUIVALENT_TO -const var"⋪" = @_K NOT_NORMAL_SUBGROUP_OF -const var"⋫" = @_K DOES_NOT_CONTAIN_AS_NORMAL_SUBGROUP -const var"⋬" = @_K NOT_NORMAL_SUBGROUP_OF_OR_EQUAL_TO -const var"⋭" = @_K DOES_NOT_CONTAIN_AS_NORMAL_SUBGROUP_OR_EQUAL -const var"⋲" = @_K ELEMENT_OF_WITH_LONG_HORIZONTAL_STROKE -const var"⋳" = @_K ELEMENT_OF_WITH_VERTICAL_BAR_AT_END_OF_HORIZONTAL_STROKE -const var"⋴" = @_K SMALL_ELEMENT_OF_WITH_VERTICAL_BAR_AT_END_OF_HORIZONTAL_STROKE -const var"⋵" = @_K ELEMENT_OF_WITH_DOT_ABOVE -const var"⋶" = @_K ELEMENT_OF_WITH_OVERBAR -const var"⋷" = @_K SMALL_ELEMENT_OF_WITH_OVERBAR -const var"⋸" = @_K ELEMENT_OF_WITH_UNDERBAR -const var"⋹" = @_K ELEMENT_OF_WITH_TWO_HORIZONTAL_STROKES -const var"⋺" = @_K CONTAINS_WITH_LONG_HORIZONTAL_STROKE -const var"⋻" = @_K CONTAINS_WITH_VERTICAL_BAR_AT_END_OF_HORIZONTAL_STROKE -const var"⋼" = @_K SMALL_CONTAINS_WITH_VERTICAL_BAR_AT_END_OF_HORIZONTAL_STROKE -const var"⋽" = @_K CONTAINS_WITH_OVERBAR -const var"⋾" = @_K SMALL_CONTAINS_WITH_OVERBAR -const var"⋿" = @_K Z_NOTATION_BAG_MEMBERSHIP -const var"⟈" = @_K REVERSE_SOLIDUS_PRECEDING_SUBSET -const var"⟉" = @_K SUPERSET_PRECEDING_SOLIDUS -const var"⟒" = @_K ELEMENT_OF_OPENING_UPWARDS -const var"⦷" = @_K CIRCLED_PARALLEL -const var"⧀" = @_K CIRCLED_LESS_THAN -const var"⧁" = @_K CIRCLED_GREATER_THAN -const var"⧡" = @_K INCREASES_AS -const var"⧣" = @_K EQUALS_SIGN_AND_SLANTED_PARALLEL -const var"⧤" = @_K EQUALS_SIGN_AND_SLANTED_PARALLEL_WITH_TILDE_ABOVE -const var"⧥" = @_K IDENTICAL_TO_AND_SLANTED_PARALLEL -const var"⩦" = @_K EQUALS_SIGN_WITH_DOT_BELOW -const var"⩧" = @_K IDENTICAL_WITH_DOT_ABOVE -const var"⩪" = @_K TILDE_OPERATOR_WITH_DOT_ABOVE -const var"⩫" = @_K TILDE_OPERATOR_WITH_RISING_DOTS -const var"⩬" = @_K SIMILAR_MINUS_SIMILAR -const var"⩭" = @_K CONGRUENT_WITH_DOT_ABOVE -const var"⩮" = @_K EQUALS_WITH_ASTERISK -const var"⩯" = @_K ALMOST_EQUAL_TO_WITH_CIRCUMFLEX_ACCENT -const var"⩰" = @_K APPROXIMATELY_EQUAL_OR_EQUAL_TO -const var"⩱" = @_K EQUALS_SIGN_ABOVE_PLUS_SIGN -const var"⩲" = @_K PLUS_SIGN_ABOVE_EQUALS_SIGN -const var"⩳" = @_K EQUALS_SIGN_ABOVE_TILDE_OPERATOR -const var"⩴" = @_K DOUBLE_COLON_EQUAL -const var"⩵" = @_K TWO_CONSECUTIVE_EQUALS_SIGNS -const var"⩶" = @_K THREE_CONSECUTIVE_EQUALS_SIGNS -const var"⩷" = @_K EQUALS_SIGN_WITH_TWO_DOTS_ABOVE_AND_TWO_DOTS_BELOW -const var"⩸" = @_K EQUIVALENT_WITH_FOUR_DOTS_ABOVE -const var"⩹" = @_K LESS_THAN_WITH_CIRCLE_INSIDE -const var"⩺" = @_K GREATER_THAN_WITH_CIRCLE_INSIDE -const var"⩻" = @_K LESS_THAN_WITH_QUESTION_MARK_ABOVE -const var"⩼" = @_K GREATER_THAN_WITH_QUESTION_MARK_ABOVE -const var"⩽" = @_K LESS_THAN_OR_SLANTED_EQUAL_TO -const var"⩾" = @_K GREATER_THAN_OR_SLANTED_EQUAL_TO -const var"⩿" = @_K LESS_THAN_OR_SLANTED_EQUAL_TO_WITH_DOT_INSIDE -const var"⪀" = @_K GREATER_THAN_OR_SLANTED_EQUAL_TO_WITH_DOT_INSIDE -const var"⪁" = @_K LESS_THAN_OR_SLANTED_EQUAL_TO_WITH_DOT_ABOVE -const var"⪂" = @_K GREATER_THAN_OR_SLANTED_EQUAL_TO_WITH_DOT_ABOVE -const var"⪃" = @_K LESS_THAN_OR_SLANTED_EQUAL_TO_WITH_DOT_ABOVE_RIGHT -const var"⪄" = @_K GREATER_THAN_OR_SLANTED_EQUAL_TO_WITH_DOT_ABOVE_LEFT -const var"⪅" = @_K LESS_THAN_OR_APPROXIMATE -const var"⪆" = @_K GREATER_THAN_OR_APPROXIMATE -const var"⪇" = @_K LESS_THAN_AND_SINGLE_LINE_NOT_EQUAL_TO -const var"⪈" = @_K GREATER_THAN_AND_SINGLE_LINE_NOT_EQUAL_TO -const var"⪉" = @_K LESS_THAN_AND_NOT_APPROXIMATE -const var"⪊" = @_K GREATER_THAN_AND_NOT_APPROXIMATE -const var"⪋" = @_K LESS_THAN_ABOVE_DOUBLE_LINE_EQUAL_ABOVE_GREATER_THAN -const var"⪌" = @_K GREATER_THAN_ABOVE_DOUBLE_LINE_EQUAL_ABOVE_LESS_THAN -const var"⪍" = @_K LESS_THAN_ABOVE_SIMILAR_OR_EQUAL -const var"⪎" = @_K GREATER_THAN_ABOVE_SIMILAR_OR_EQUAL -const var"⪏" = @_K LESS_THAN_ABOVE_SIMILAR_ABOVE_GREATER_THAN -const var"⪐" = @_K GREATER_THAN_ABOVE_SIMILAR_ABOVE_LESS_THAN -const var"⪑" = @_K LESS_THAN_ABOVE_GREATER_THAN_ABOVE_DOUBLE_LINE_EQUAL -const var"⪒" = @_K GREATER_THAN_ABOVE_LESS_THAN_ABOVE_DOUBLE_LINE_EQUAL -const var"⪓" = @_K LESS_THAN_ABOVE_SLANTED_EQUAL_ABOVE_GREATER_THAN_ABOVE_SLANTED_EQUAL -const var"⪔" = @_K GREATER_THAN_ABOVE_SLANTED_EQUAL_ABOVE_LESS_THAN_ABOVE_SLANTED_EQUAL -const var"⪕" = @_K SLANTED_EQUAL_TO_OR_LESS_THAN -const var"⪖" = @_K SLANTED_EQUAL_TO_OR_GREATER_THAN -const var"⪗" = @_K SLANTED_EQUAL_TO_OR_LESS_THAN_WITH_DOT_INSIDE -const var"⪘" = @_K SLANTED_EQUAL_TO_OR_GREATER_THAN_WITH_DOT_INSIDE -const var"⪙" = @_K DOUBLE_LINE_EQUAL_TO_OR_LESS_THAN -const var"⪚" = @_K DOUBLE_LINE_EQUAL_TO_OR_GREATER_THAN -const var"⪛" = @_K DOUBLE_LINE_SLANTED_EQUAL_TO_OR_LESS_THAN -const var"⪜" = @_K DOUBLE_LINE_SLANTED_EQUAL_TO_OR_GREATER_THAN -const var"⪝" = @_K SIMILAR_OR_LESS_THAN -const var"⪞" = @_K SIMILAR_OR_GREATER_THAN -const var"⪟" = @_K SIMILAR_ABOVE_LESS_THAN_ABOVE_EQUALS_SIGN -const var"⪠" = @_K SIMILAR_ABOVE_GREATER_THAN_ABOVE_EQUALS_SIGN -const var"⪡" = @_K DOUBLE_NESTED_LESS_THAN -const var"⪢" = @_K DOUBLE_NESTED_GREATER_THAN -const var"⪣" = @_K DOUBLE_NESTED_LESS_THAN_WITH_UNDERBAR -const var"⪤" = @_K GREATER_THAN_OVERLAPPING_LESS_THAN -const var"⪥" = @_K GREATER_THAN_BESIDE_LESS_THAN -const var"⪦" = @_K LESS_THAN_CLOSED_BY_CURVE -const var"⪧" = @_K GREATER_THAN_CLOSED_BY_CURVE -const var"⪨" = @_K LESS_THAN_CLOSED_BY_CURVE_ABOVE_SLANTED_EQUAL -const var"⪩" = @_K GREATER_THAN_CLOSED_BY_CURVE_ABOVE_SLANTED_EQUAL -const var"⪪" = @_K SMALLER_THAN -const var"⪫" = @_K LARGER_THAN -const var"⪬" = @_K SMALLER_THAN_OR_EQUAL_TO -const var"⪭" = @_K LARGER_THAN_OR_EQUAL_TO -const var"⪮" = @_K EQUALS_SIGN_WITH_BUMPY_ABOVE -const var"⪯" = @_K PRECEDES_ABOVE_SINGLE_LINE_EQUALS_SIGN -const var"⪰" = @_K SUCCEEDS_ABOVE_SINGLE_LINE_EQUALS_SIGN -const var"⪱" = @_K PRECEDES_ABOVE_SINGLE_LINE_NOT_EQUAL_TO -const var"⪲" = @_K SUCCEEDS_ABOVE_SINGLE_LINE_NOT_EQUAL_TO -const var"⪳" = @_K PRECEDES_ABOVE_EQUALS_SIGN -const var"⪴" = @_K SUCCEEDS_ABOVE_EQUALS_SIGN -const var"⪵" = @_K PRECEDES_ABOVE_NOT_EQUAL_TO -const var"⪶" = @_K SUCCEEDS_ABOVE_NOT_EQUAL_TO -const var"⪷" = @_K PRECEDES_ABOVE_ALMOST_EQUAL_TO -const var"⪸" = @_K SUCCEEDS_ABOVE_ALMOST_EQUAL_TO -const var"⪹" = @_K PRECEDES_ABOVE_NOT_ALMOST_EQUAL_TO -const var"⪺" = @_K SUCCEEDS_ABOVE_NOT_ALMOST_EQUAL_TO -const var"⪻" = @_K DOUBLE_PRECEDES -const var"⪼" = @_K DOUBLE_SUCCEEDS -const var"⪽" = @_K SUBSET_WITH_DOT -const var"⪾" = @_K SUPERSET_WITH_DOT -const var"⪿" = @_K SUBSET_WITH_PLUS_SIGN_BELOW -const var"⫀" = @_K SUPERSET_WITH_PLUS_SIGN_BELOW -const var"⫁" = @_K SUBSET_WITH_MULTIPLICATION_SIGN_BELOW -const var"⫂" = @_K SUPERSET_WITH_MULTIPLICATION_SIGN_BELOW -const var"⫃" = @_K SUBSET_OF_OR_EQUAL_TO_WITH_DOT_ABOVE -const var"⫄" = @_K SUPERSET_OF_OR_EQUAL_TO_WITH_DOT_ABOVE -const var"⫅" = @_K SUBSET_OF_ABOVE_EQUALS_SIGN -const var"⫆" = @_K SUPERSET_OF_ABOVE_EQUALS_SIGN -const var"⫇" = @_K SUBSET_OF_ABOVE_TILDE_OPERATOR -const var"⫈" = @_K SUPERSET_OF_ABOVE_TILDE_OPERATOR -const var"⫉" = @_K SUBSET_OF_ABOVE_ALMOST_EQUAL_TO -const var"⫊" = @_K SUPERSET_OF_ABOVE_ALMOST_EQUAL_TO -const var"⫋" = @_K SUBSET_OF_ABOVE_NOT_EQUAL_TO -const var"⫌" = @_K SUPERSET_OF_ABOVE_NOT_EQUAL_TO -const var"⫍" = @_K SQUARE_LEFT_OPEN_BOX_OPERATOR -const var"⫎" = @_K SQUARE_RIGHT_OPEN_BOX_OPERATOR -const var"⫏" = @_K CLOSED_SUBSET -const var"⫐" = @_K CLOSED_SUPERSET -const var"⫑" = @_K CLOSED_SUBSET_OR_EQUAL_TO -const var"⫒" = @_K CLOSED_SUPERSET_OR_EQUAL_TO -const var"⫓" = @_K SUBSET_ABOVE_SUPERSET -const var"⫔" = @_K SUPERSET_ABOVE_SUBSET -const var"⫕" = @_K SUBSET_ABOVE_SUBSET -const var"⫖" = @_K SUPERSET_ABOVE_SUPERSET -const var"⫗" = @_K SUPERSET_BESIDE_SUBSET -const var"⫘" = @_K SUPERSET_BESIDE_AND_JOINED_BY_DASH_WITH_SUBSET -const var"⫙" = @_K ELEMENT_OF_OPENING_DOWNWARDS -const var"⫷" = @_K TRIPLE_NESTED_LESS_THAN -const var"⫸" = @_K TRIPLE_NESTED_GREATER_THAN -const var"⫹" = @_K DOUBLE_LINE_SLANTED_LESS_THAN_OR_EQUAL_TO -const var"⫺" = @_K DOUBLE_LINE_SLANTED_GREATER_THAN_OR_EQUAL_TO -const var"⊢" = @_K RIGHT_TACK -const var"⊣" = @_K LEFT_TACK -const var"⟂" = @_K PERP -const END_COMPARISON = @_K end_comparison +"BEGIN_COMPARISON" => Ts.begin_comparison +"<:" => Ts.ISSUBTYPE +">:" => Ts.ISSUPERTYPE +">" => Ts.GREATER +"<" => Ts.LESS +">=" => Ts.GREATER_EQ +"≥" => Ts.GREATER_THAN_OR_EQUAL_TO +"<=" => Ts.LESS_EQ +"≤" => Ts.LESS_THAN_OR_EQUAL_TO +"==" => Ts.EQEQ +"===" => Ts.EQEQEQ +"≡" => Ts.IDENTICAL_TO +"!=" => Ts.NOT_EQ +"≠" => Ts.NOT_EQUAL_TO +"!==" => Ts.NOT_IS +"≢" => Ts.NOT_IDENTICAL_TO +"∈" => Ts.ELEMENT_OF +"in" => Ts.IN +"isa" => Ts.ISA +"∉" => Ts.NOT_AN_ELEMENT_OF +"∋" => Ts.CONTAINS_AS_MEMBER +"∌" => Ts.DOES_NOT_CONTAIN_AS_MEMBER +"⊆" => Ts.SUBSET_OF_OR_EQUAL_TO +"⊈" => Ts.NEITHER_A_SUBSET_OF_NOR_EQUAL_TO +"⊂" => Ts.SUBSET_OF +"⊄" => Ts.NOT_A_SUBSET_OF +"⊊" => Ts.SUBSET_OF_WITH_NOT_EQUAL_TO +"∝" => Ts.PROPORTIONAL_TO +"∊" => Ts.SMALL_ELEMENT_OF +"∍" => Ts.SMALL_CONTAINS_AS_MEMBER +"∥" => Ts.PARALLEL_TO +"∦" => Ts.NOT_PARALLEL_TO +"∷" => Ts.PROPORTION +"∺" => Ts.GEOMETRIC_PROPORTION +"∻" => Ts.HOMOTHETIC +"∽" => Ts.REVERSED_TILDE +"∾" => Ts.INVERTED_LAZY_S +"≁" => Ts.NOT_TILDE +"≃" => Ts.ASYMPTOTICALLY_EQUAL_TO +"≄" => Ts.NOT_ASYMPTOTICALLY_EQUAL_TO +"≅" => Ts.APPROXIMATELY_EQUAL_TO +"≆" => Ts.APPROXIMATELY_BUT_NOT_ACTUALLY_EQUAL_TO +"≇" => Ts.NEITHER_APPROXIMATELY_NOR_ACTUALLY_EQUAL_TO +"≈" => Ts.ALMOST_EQUAL_TO +"≉" => Ts.NOT_ALMOST_EQUAL_TO +"≊" => Ts.ALMOST_EQUAL_OR_EQUAL_TO +"≋" => Ts.TRIPLE_TILDE +"≌" => Ts.ALL_EQUAL_TO +"≍" => Ts.EQUIVALENT_TO +"≎" => Ts.GEOMETRICALLY_EQUIVALENT_TO +"≐" => Ts.APPROACHES_THE_LIMIT +"≑" => Ts.GEOMETRICALLY_EQUAL_TO +"≒" => Ts.APPROXIMATELY_EQUAL_TO_OR_THE_IMAGE_OF +"≓" => Ts.IMAGE_OF_OR_APPROXIMATELY_EQUAL_TO +"≔" => Ts.COLON_EQUALS +"≕" => Ts.EQUALS_COLON +"≖" => Ts.RING_IN_EQUAL_TO +"≗" => Ts.RING_EQUAL_TO +"≘" => Ts.CORRESPONDS_TO +"≙" => Ts.ESTIMATES +"≚" => Ts.EQUIANGULAR_TO +"≛" => Ts.STAR_EQUALS +"≜" => Ts.DELTA_EQUAL_TO +"≝" => Ts.EQUAL_TO_BY_DEFINITION +"≞" => Ts.MEASURED_BY +"≟" => Ts.QUESTIONED_EQUAL_TO +"≣" => Ts.STRICTLY_EQUIVALENT_TO +"≦" => Ts.LESS_THAN_OVER_EQUAL_TO +"≧" => Ts.GREATER_THAN_OVER_EQUAL_TO +"≨" => Ts.LESS_THAN_BUT_NOT_EQUAL_TO +"≩" => Ts.GREATER_THAN_BUT_NOT_EQUAL_TO +"≪" => Ts.MUCH_LESS_THAN +"≫" => Ts.MUCH_GREATER_THAN +"≬" => Ts.BETWEEN +"≭" => Ts.NOT_EQUIVALENT_TO +"≮" => Ts.NOT_LESS_THAN +"≯" => Ts.NOT_GREATER_THAN +"≰" => Ts.NEITHER_LESS_THAN_NOR_EQUAL_TO +"≱" => Ts.NEITHER_GREATER_THAN_NOR_EQUAL_TO +"≲" => Ts.LESS_THAN_OR_EQUIVALENT_TO +"≳" => Ts.GREATER_THAN_OR_EQUIVALENT_TO +"≴" => Ts.NEITHER_LESS_THAN_NOR_EQUIVALENT_TO +"≵" => Ts.NEITHER_GREATER_THAN_NOR_EQUIVALENT_TO +"≶" => Ts.LESS_THAN_OR_GREATER_THAN +"≷" => Ts.GREATER_THAN_OR_LESS_THAN +"≸" => Ts.NEITHER_LESS_THAN_NOR_GREATER_THAN +"≹" => Ts.NEITHER_GREATER_THAN_NOR_LESS_THAN +"≺" => Ts.PRECEDES +"≻" => Ts.SUCCEEDS +"≼" => Ts.PRECEDES_OR_EQUAL_TO +"≽" => Ts.SUCCEEDS_OR_EQUAL_TO +"≾" => Ts.PRECEDES_OR_EQUIVALENT_TO +"≿" => Ts.SUCCEEDS_OR_EQUIVALENT_TO +"⊀" => Ts.DOES_NOT_PRECEDE +"⊁" => Ts.DOES_NOT_SUCCEED +"⊃" => Ts.SUPERSET_OF +"⊅" => Ts.NOT_A_SUPERSET_OF +"⊇" => Ts.SUPERSET_OF_OR_EQUAL_TO +"⊉" => Ts.NEITHER_A_SUPERSET_OF_NOR_EQUAL_TO +"⊋" => Ts.SUPERSET_OF_WITH_NOT_EQUAL_TO +"⊏" => Ts.SQUARE_IMAGE_OF +"⊐" => Ts.SQUARE_ORIGINAL_OF +"⊑" => Ts.SQUARE_IMAGE_OF_OR_EQUAL_TO +"⊒" => Ts.SQUARE_ORIGINAL_OF_OR_EQUAL_TO +"⊜" => Ts.CIRCLED_EQUALS +"⊩" => Ts.FORCES +"⊬" => Ts.DOES_NOT_PROVE +"⊮" => Ts.DOES_NOT_FORCE +"⊰" => Ts.PRECEDES_UNDER_RELATION +"⊱" => Ts.SUCCEEDS_UNDER_RELATION +"⊲" => Ts.NORMAL_SUBGROUP_OF +"⊳" => Ts.CONTAINS_AS_NORMAL_SUBGROUP +"⊴" => Ts.NORMAL_SUBGROUP_OF_OR_EQUAL_TO +"⊵" => Ts.CONTAINS_AS_NORMAL_SUBGROUP_OR_EQUAL_TO +"⊶" => Ts.ORIGINAL_OF +"⊷" => Ts.IMAGE_OF +"⋍" => Ts.REVERSED_TILDE_EQUALS +"⋐" => Ts.DOUBLE_SUBSET +"⋑" => Ts.DOUBLE_SUPERSET +"⋕" => Ts.EQUAL_AND_PARALLEL_TO +"⋖" => Ts.LESS_THAN_WITH_DOT +"⋗" => Ts.GREATER_THAN_WITH_DOT +"⋘" => Ts.VERY_MUCH_LESS_THAN +"⋙" => Ts.VERY_MUCH_GREATER_THAN +"⋚" => Ts.LESS_THAN_EQUAL_TO_OR_GREATER_THAN +"⋛" => Ts.GREATER_THAN_EQUAL_TO_OR_LESS_THAN +"⋜" => Ts.EQUAL_TO_OR_LESS_THAN +"⋝" => Ts.EQUAL_TO_OR_GREATER_THAN +"⋞" => Ts.EQUAL_TO_OR_PRECEDES +"⋟" => Ts.EQUAL_TO_OR_SUCCEEDS +"⋠" => Ts.DOES_NOT_PRECEDE_OR_EQUAL +"⋡" => Ts.DOES_NOT_SUCCEED_OR_EQUAL +"⋢" => Ts.NOT_SQUARE_IMAGE_OF_OR_EQUAL_TO +"⋣" => Ts.NOT_SQUARE_ORIGINAL_OF_OR_EQUAL_TO +"⋤" => Ts.SQUARE_IMAGE_OF_OR_NOT_EQUAL_TO +"⋥" => Ts.SQUARE_ORIGINAL_OF_OR_NOT_EQUAL_TO +"⋦" => Ts.LESS_THAN_BUT_NOT_EQUIVALENT_TO +"⋧" => Ts.GREATER_THAN_BUT_NOT_EQUIVALENT_TO +"⋨" => Ts.PRECEDES_BUT_NOT_EQUIVALENT_TO +"⋩" => Ts.SUCCEEDS_BUT_NOT_EQUIVALENT_TO +"⋪" => Ts.NOT_NORMAL_SUBGROUP_OF +"⋫" => Ts.DOES_NOT_CONTAIN_AS_NORMAL_SUBGROUP +"⋬" => Ts.NOT_NORMAL_SUBGROUP_OF_OR_EQUAL_TO +"⋭" => Ts.DOES_NOT_CONTAIN_AS_NORMAL_SUBGROUP_OR_EQUAL +"⋲" => Ts.ELEMENT_OF_WITH_LONG_HORIZONTAL_STROKE +"⋳" => Ts.ELEMENT_OF_WITH_VERTICAL_BAR_AT_END_OF_HORIZONTAL_STROKE +"⋴" => Ts.SMALL_ELEMENT_OF_WITH_VERTICAL_BAR_AT_END_OF_HORIZONTAL_STROKE +"⋵" => Ts.ELEMENT_OF_WITH_DOT_ABOVE +"⋶" => Ts.ELEMENT_OF_WITH_OVERBAR +"⋷" => Ts.SMALL_ELEMENT_OF_WITH_OVERBAR +"⋸" => Ts.ELEMENT_OF_WITH_UNDERBAR +"⋹" => Ts.ELEMENT_OF_WITH_TWO_HORIZONTAL_STROKES +"⋺" => Ts.CONTAINS_WITH_LONG_HORIZONTAL_STROKE +"⋻" => Ts.CONTAINS_WITH_VERTICAL_BAR_AT_END_OF_HORIZONTAL_STROKE +"⋼" => Ts.SMALL_CONTAINS_WITH_VERTICAL_BAR_AT_END_OF_HORIZONTAL_STROKE +"⋽" => Ts.CONTAINS_WITH_OVERBAR +"⋾" => Ts.SMALL_CONTAINS_WITH_OVERBAR +"⋿" => Ts.Z_NOTATION_BAG_MEMBERSHIP +"⟈" => Ts.REVERSE_SOLIDUS_PRECEDING_SUBSET +"⟉" => Ts.SUPERSET_PRECEDING_SOLIDUS +"⟒" => Ts.ELEMENT_OF_OPENING_UPWARDS +"⦷" => Ts.CIRCLED_PARALLEL +"⧀" => Ts.CIRCLED_LESS_THAN +"⧁" => Ts.CIRCLED_GREATER_THAN +"⧡" => Ts.INCREASES_AS +"⧣" => Ts.EQUALS_SIGN_AND_SLANTED_PARALLEL +"⧤" => Ts.EQUALS_SIGN_AND_SLANTED_PARALLEL_WITH_TILDE_ABOVE +"⧥" => Ts.IDENTICAL_TO_AND_SLANTED_PARALLEL +"⩦" => Ts.EQUALS_SIGN_WITH_DOT_BELOW +"⩧" => Ts.IDENTICAL_WITH_DOT_ABOVE +"⩪" => Ts.TILDE_OPERATOR_WITH_DOT_ABOVE +"⩫" => Ts.TILDE_OPERATOR_WITH_RISING_DOTS +"⩬" => Ts.SIMILAR_MINUS_SIMILAR +"⩭" => Ts.CONGRUENT_WITH_DOT_ABOVE +"⩮" => Ts.EQUALS_WITH_ASTERISK +"⩯" => Ts.ALMOST_EQUAL_TO_WITH_CIRCUMFLEX_ACCENT +"⩰" => Ts.APPROXIMATELY_EQUAL_OR_EQUAL_TO +"⩱" => Ts.EQUALS_SIGN_ABOVE_PLUS_SIGN +"⩲" => Ts.PLUS_SIGN_ABOVE_EQUALS_SIGN +"⩳" => Ts.EQUALS_SIGN_ABOVE_TILDE_OPERATOR +"⩴" => Ts.DOUBLE_COLON_EQUAL +"⩵" => Ts.TWO_CONSECUTIVE_EQUALS_SIGNS +"⩶" => Ts.THREE_CONSECUTIVE_EQUALS_SIGNS +"⩷" => Ts.EQUALS_SIGN_WITH_TWO_DOTS_ABOVE_AND_TWO_DOTS_BELOW +"⩸" => Ts.EQUIVALENT_WITH_FOUR_DOTS_ABOVE +"⩹" => Ts.LESS_THAN_WITH_CIRCLE_INSIDE +"⩺" => Ts.GREATER_THAN_WITH_CIRCLE_INSIDE +"⩻" => Ts.LESS_THAN_WITH_QUESTION_MARK_ABOVE +"⩼" => Ts.GREATER_THAN_WITH_QUESTION_MARK_ABOVE +"⩽" => Ts.LESS_THAN_OR_SLANTED_EQUAL_TO +"⩾" => Ts.GREATER_THAN_OR_SLANTED_EQUAL_TO +"⩿" => Ts.LESS_THAN_OR_SLANTED_EQUAL_TO_WITH_DOT_INSIDE +"⪀" => Ts.GREATER_THAN_OR_SLANTED_EQUAL_TO_WITH_DOT_INSIDE +"⪁" => Ts.LESS_THAN_OR_SLANTED_EQUAL_TO_WITH_DOT_ABOVE +"⪂" => Ts.GREATER_THAN_OR_SLANTED_EQUAL_TO_WITH_DOT_ABOVE +"⪃" => Ts.LESS_THAN_OR_SLANTED_EQUAL_TO_WITH_DOT_ABOVE_RIGHT +"⪄" => Ts.GREATER_THAN_OR_SLANTED_EQUAL_TO_WITH_DOT_ABOVE_LEFT +"⪅" => Ts.LESS_THAN_OR_APPROXIMATE +"⪆" => Ts.GREATER_THAN_OR_APPROXIMATE +"⪇" => Ts.LESS_THAN_AND_SINGLE_LINE_NOT_EQUAL_TO +"⪈" => Ts.GREATER_THAN_AND_SINGLE_LINE_NOT_EQUAL_TO +"⪉" => Ts.LESS_THAN_AND_NOT_APPROXIMATE +"⪊" => Ts.GREATER_THAN_AND_NOT_APPROXIMATE +"⪋" => Ts.LESS_THAN_ABOVE_DOUBLE_LINE_EQUAL_ABOVE_GREATER_THAN +"⪌" => Ts.GREATER_THAN_ABOVE_DOUBLE_LINE_EQUAL_ABOVE_LESS_THAN +"⪍" => Ts.LESS_THAN_ABOVE_SIMILAR_OR_EQUAL +"⪎" => Ts.GREATER_THAN_ABOVE_SIMILAR_OR_EQUAL +"⪏" => Ts.LESS_THAN_ABOVE_SIMILAR_ABOVE_GREATER_THAN +"⪐" => Ts.GREATER_THAN_ABOVE_SIMILAR_ABOVE_LESS_THAN +"⪑" => Ts.LESS_THAN_ABOVE_GREATER_THAN_ABOVE_DOUBLE_LINE_EQUAL +"⪒" => Ts.GREATER_THAN_ABOVE_LESS_THAN_ABOVE_DOUBLE_LINE_EQUAL +"⪓" => Ts.LESS_THAN_ABOVE_SLANTED_EQUAL_ABOVE_GREATER_THAN_ABOVE_SLANTED_EQUAL +"⪔" => Ts.GREATER_THAN_ABOVE_SLANTED_EQUAL_ABOVE_LESS_THAN_ABOVE_SLANTED_EQUAL +"⪕" => Ts.SLANTED_EQUAL_TO_OR_LESS_THAN +"⪖" => Ts.SLANTED_EQUAL_TO_OR_GREATER_THAN +"⪗" => Ts.SLANTED_EQUAL_TO_OR_LESS_THAN_WITH_DOT_INSIDE +"⪘" => Ts.SLANTED_EQUAL_TO_OR_GREATER_THAN_WITH_DOT_INSIDE +"⪙" => Ts.DOUBLE_LINE_EQUAL_TO_OR_LESS_THAN +"⪚" => Ts.DOUBLE_LINE_EQUAL_TO_OR_GREATER_THAN +"⪛" => Ts.DOUBLE_LINE_SLANTED_EQUAL_TO_OR_LESS_THAN +"⪜" => Ts.DOUBLE_LINE_SLANTED_EQUAL_TO_OR_GREATER_THAN +"⪝" => Ts.SIMILAR_OR_LESS_THAN +"⪞" => Ts.SIMILAR_OR_GREATER_THAN +"⪟" => Ts.SIMILAR_ABOVE_LESS_THAN_ABOVE_EQUALS_SIGN +"⪠" => Ts.SIMILAR_ABOVE_GREATER_THAN_ABOVE_EQUALS_SIGN +"⪡" => Ts.DOUBLE_NESTED_LESS_THAN +"⪢" => Ts.DOUBLE_NESTED_GREATER_THAN +"⪣" => Ts.DOUBLE_NESTED_LESS_THAN_WITH_UNDERBAR +"⪤" => Ts.GREATER_THAN_OVERLAPPING_LESS_THAN +"⪥" => Ts.GREATER_THAN_BESIDE_LESS_THAN +"⪦" => Ts.LESS_THAN_CLOSED_BY_CURVE +"⪧" => Ts.GREATER_THAN_CLOSED_BY_CURVE +"⪨" => Ts.LESS_THAN_CLOSED_BY_CURVE_ABOVE_SLANTED_EQUAL +"⪩" => Ts.GREATER_THAN_CLOSED_BY_CURVE_ABOVE_SLANTED_EQUAL +"⪪" => Ts.SMALLER_THAN +"⪫" => Ts.LARGER_THAN +"⪬" => Ts.SMALLER_THAN_OR_EQUAL_TO +"⪭" => Ts.LARGER_THAN_OR_EQUAL_TO +"⪮" => Ts.EQUALS_SIGN_WITH_BUMPY_ABOVE +"⪯" => Ts.PRECEDES_ABOVE_SINGLE_LINE_EQUALS_SIGN +"⪰" => Ts.SUCCEEDS_ABOVE_SINGLE_LINE_EQUALS_SIGN +"⪱" => Ts.PRECEDES_ABOVE_SINGLE_LINE_NOT_EQUAL_TO +"⪲" => Ts.SUCCEEDS_ABOVE_SINGLE_LINE_NOT_EQUAL_TO +"⪳" => Ts.PRECEDES_ABOVE_EQUALS_SIGN +"⪴" => Ts.SUCCEEDS_ABOVE_EQUALS_SIGN +"⪵" => Ts.PRECEDES_ABOVE_NOT_EQUAL_TO +"⪶" => Ts.SUCCEEDS_ABOVE_NOT_EQUAL_TO +"⪷" => Ts.PRECEDES_ABOVE_ALMOST_EQUAL_TO +"⪸" => Ts.SUCCEEDS_ABOVE_ALMOST_EQUAL_TO +"⪹" => Ts.PRECEDES_ABOVE_NOT_ALMOST_EQUAL_TO +"⪺" => Ts.SUCCEEDS_ABOVE_NOT_ALMOST_EQUAL_TO +"⪻" => Ts.DOUBLE_PRECEDES +"⪼" => Ts.DOUBLE_SUCCEEDS +"⪽" => Ts.SUBSET_WITH_DOT +"⪾" => Ts.SUPERSET_WITH_DOT +"⪿" => Ts.SUBSET_WITH_PLUS_SIGN_BELOW +"⫀" => Ts.SUPERSET_WITH_PLUS_SIGN_BELOW +"⫁" => Ts.SUBSET_WITH_MULTIPLICATION_SIGN_BELOW +"⫂" => Ts.SUPERSET_WITH_MULTIPLICATION_SIGN_BELOW +"⫃" => Ts.SUBSET_OF_OR_EQUAL_TO_WITH_DOT_ABOVE +"⫄" => Ts.SUPERSET_OF_OR_EQUAL_TO_WITH_DOT_ABOVE +"⫅" => Ts.SUBSET_OF_ABOVE_EQUALS_SIGN +"⫆" => Ts.SUPERSET_OF_ABOVE_EQUALS_SIGN +"⫇" => Ts.SUBSET_OF_ABOVE_TILDE_OPERATOR +"⫈" => Ts.SUPERSET_OF_ABOVE_TILDE_OPERATOR +"⫉" => Ts.SUBSET_OF_ABOVE_ALMOST_EQUAL_TO +"⫊" => Ts.SUPERSET_OF_ABOVE_ALMOST_EQUAL_TO +"⫋" => Ts.SUBSET_OF_ABOVE_NOT_EQUAL_TO +"⫌" => Ts.SUPERSET_OF_ABOVE_NOT_EQUAL_TO +"⫍" => Ts.SQUARE_LEFT_OPEN_BOX_OPERATOR +"⫎" => Ts.SQUARE_RIGHT_OPEN_BOX_OPERATOR +"⫏" => Ts.CLOSED_SUBSET +"⫐" => Ts.CLOSED_SUPERSET +"⫑" => Ts.CLOSED_SUBSET_OR_EQUAL_TO +"⫒" => Ts.CLOSED_SUPERSET_OR_EQUAL_TO +"⫓" => Ts.SUBSET_ABOVE_SUPERSET +"⫔" => Ts.SUPERSET_ABOVE_SUBSET +"⫕" => Ts.SUBSET_ABOVE_SUBSET +"⫖" => Ts.SUPERSET_ABOVE_SUPERSET +"⫗" => Ts.SUPERSET_BESIDE_SUBSET +"⫘" => Ts.SUPERSET_BESIDE_AND_JOINED_BY_DASH_WITH_SUBSET +"⫙" => Ts.ELEMENT_OF_OPENING_DOWNWARDS +"⫷" => Ts.TRIPLE_NESTED_LESS_THAN +"⫸" => Ts.TRIPLE_NESTED_GREATER_THAN +"⫹" => Ts.DOUBLE_LINE_SLANTED_LESS_THAN_OR_EQUAL_TO +"⫺" => Ts.DOUBLE_LINE_SLANTED_GREATER_THAN_OR_EQUAL_TO +"⊢" => Ts.RIGHT_TACK +"⊣" => Ts.LEFT_TACK +"⟂" => Ts.PERP +"END_COMPARISON" => Ts.end_comparison # Level 7 -const BEGIN_PIPE = @_K begin_pipe -const var"<|" = @_K LPIPE -const var"|>" = @_K RPIPE -const END_PIPE = @_K end_pipe +"BEGIN_PIPE" => Ts.begin_pipe +"<|" => Ts.LPIPE +"|>" => Ts.RPIPE +"END_PIPE" => Ts.end_pipe # Level 8 -const BEGIN_COLON = @_K begin_colon -const var":" = @_K COLON -const var".." = @_K DDOT -const var"…" = @_K LDOTS -const var"⁝" = @_K TRICOLON -const var"⋮" = @_K VDOTS -const var"⋱" = @_K DDOTS -const var"⋰" = @_K ADOTS -const var"⋯" = @_K CDOTS -const END_COLON = @_K end_colon +"BEGIN_COLON" => Ts.begin_colon +":" => Ts.COLON +".." => Ts.DDOT +"…" => Ts.LDOTS +"⁝" => Ts.TRICOLON +"⋮" => Ts.VDOTS +"⋱" => Ts.DDOTS +"⋰" => Ts.ADOTS +"⋯" => Ts.CDOTS +"END_COLON" => Ts.end_colon # Level 9 -const BEGIN_PLUS = @_K begin_plus -const var"$" = @_K EX_OR -const var"+" = @_K PLUS -const var"-" = @_K MINUS -const var"++" = @_K PLUSPLUS -const var"⊕" = @_K CIRCLED_PLUS -const var"⊖" = @_K CIRCLED_MINUS -const var"⊞" = @_K SQUARED_PLUS -const var"⊟" = @_K SQUARED_MINUS -const var"|" = @_K OR -const var"∪" = @_K UNION -const var"∨" = @_K LOGICAL_OR -const var"⊔" = @_K SQUARE_CUP -const var"±" = @_K PLUS_MINUS_SIGN -const var"∓" = @_K MINUS_OR_PLUS_SIGN -const var"∔" = @_K DOT_PLUS -const var"∸" = @_K DOT_MINUS -const var"≂" = @_K MINUS_TILDE -const var"≏" = @_K DIFFERENCE_BETWEEN -const var"⊎" = @_K MULTISET_UNION -const var"⊻" = @_K XOR -const var"⊽" = @_K NOR -const var"⋎" = @_K CURLY_LOGICAL_OR -const var"⋓" = @_K DOUBLE_UNION -const var"⧺" = @_K DOUBLE_PLUS -const var"⧻" = @_K TRIPLE_PLUS -const var"⨈" = @_K TWO_LOGICAL_OR_OPERATOR -const var"⨢" = @_K PLUS_SIGN_WITH_SMALL_CIRCLE_ABOVE -const var"⨣" = @_K PLUS_SIGN_WITH_CIRCUMFLEX_ACCENT_ABOVE -const var"⨤" = @_K PLUS_SIGN_WITH_TILDE_ABOVE -const var"⨥" = @_K PLUS_SIGN_WITH_DOT_BELOW -const var"⨦" = @_K PLUS_SIGN_WITH_TILDE_BELOW -const var"⨧" = @_K PLUS_SIGN_WITH_SUBSCRIPT_TWO -const var"⨨" = @_K PLUS_SIGN_WITH_BLACK_TRIANGLE -const var"⨩" = @_K MINUS_SIGN_WITH_COMMA_ABOVE -const var"⨪" = @_K MINUS_SIGN_WITH_DOT_BELOW -const var"⨫" = @_K MINUS_SIGN_WITH_FALLING_DOTS -const var"⨬" = @_K MINUS_SIGN_WITH_RISING_DOTS -const var"⨭" = @_K PLUS_SIGN_IN_LEFT_HALF_CIRCLE -const var"⨮" = @_K PLUS_SIGN_IN_RIGHT_HALF_CIRCLE -const var"⨹" = @_K PLUS_SIGN_IN_TRIANGLE -const var"⨺" = @_K MINUS_SIGN_IN_TRIANGLE -const var"⩁" = @_K UNION_WITH_MINUS_SIGN -const var"⩂" = @_K UNION_WITH_OVERBAR -const var"⩅" = @_K UNION_WITH_LOGICAL_OR -const var"⩊" = @_K UNION_BESIDE_AND_JOINED_WITH_UNION -const var"⩌" = @_K CLOSED_UNION_WITH_SERIFS -const var"⩏" = @_K DOUBLE_SQUARE_UNION -const var"⩐" = @_K CLOSED_UNION_WITH_SERIFS_AND_SMASH_PRODUCT -const var"⩒" = @_K LOGICAL_OR_WITH_DOT_ABOVE -const var"⩔" = @_K DOUBLE_LOGICAL_OR -const var"⩖" = @_K TWO_INTERSECTING_LOGICAL_OR -const var"⩗" = @_K SLOPING_LARGE_OR -const var"⩛" = @_K LOGICAL_OR_WITH_MIDDLE_STEM -const var"⩝" = @_K LOGICAL_OR_WITH_HORIZONTAL_DASH -const var"⩡" = @_K SMALL_VEE_WITH_UNDERBAR -const var"⩢" = @_K LOGICAL_OR_WITH_DOUBLE_OVERBAR -const var"⩣" = @_K LOGICAL_OR_WITH_DOUBLE_UNDERBAR -const var"¦" = @_K BROKEN_BAR -const END_PLUS = @_K end_plus +"BEGIN_PLUS" => Ts.begin_plus +"\$" => Ts.EX_OR +"+" => Ts.PLUS +"-" => Ts.MINUS +"++" => Ts.PLUSPLUS +"⊕" => Ts.CIRCLED_PLUS +"⊖" => Ts.CIRCLED_MINUS +"⊞" => Ts.SQUARED_PLUS +"⊟" => Ts.SQUARED_MINUS +"|" => Ts.OR +"∪" => Ts.UNION +"∨" => Ts.LOGICAL_OR +"⊔" => Ts.SQUARE_CUP +"±" => Ts.PLUS_MINUS_SIGN +"∓" => Ts.MINUS_OR_PLUS_SIGN +"∔" => Ts.DOT_PLUS +"∸" => Ts.DOT_MINUS +"≂" => Ts.MINUS_TILDE +"≏" => Ts.DIFFERENCE_BETWEEN +"⊎" => Ts.MULTISET_UNION +"⊻" => Ts.XOR +"⊽" => Ts.NOR +"⋎" => Ts.CURLY_LOGICAL_OR +"⋓" => Ts.DOUBLE_UNION +"⧺" => Ts.DOUBLE_PLUS +"⧻" => Ts.TRIPLE_PLUS +"⨈" => Ts.TWO_LOGICAL_OR_OPERATOR +"⨢" => Ts.PLUS_SIGN_WITH_SMALL_CIRCLE_ABOVE +"⨣" => Ts.PLUS_SIGN_WITH_CIRCUMFLEX_ACCENT_ABOVE +"⨤" => Ts.PLUS_SIGN_WITH_TILDE_ABOVE +"⨥" => Ts.PLUS_SIGN_WITH_DOT_BELOW +"⨦" => Ts.PLUS_SIGN_WITH_TILDE_BELOW +"⨧" => Ts.PLUS_SIGN_WITH_SUBSCRIPT_TWO +"⨨" => Ts.PLUS_SIGN_WITH_BLACK_TRIANGLE +"⨩" => Ts.MINUS_SIGN_WITH_COMMA_ABOVE +"⨪" => Ts.MINUS_SIGN_WITH_DOT_BELOW +"⨫" => Ts.MINUS_SIGN_WITH_FALLING_DOTS +"⨬" => Ts.MINUS_SIGN_WITH_RISING_DOTS +"⨭" => Ts.PLUS_SIGN_IN_LEFT_HALF_CIRCLE +"⨮" => Ts.PLUS_SIGN_IN_RIGHT_HALF_CIRCLE +"⨹" => Ts.PLUS_SIGN_IN_TRIANGLE +"⨺" => Ts.MINUS_SIGN_IN_TRIANGLE +"⩁" => Ts.UNION_WITH_MINUS_SIGN +"⩂" => Ts.UNION_WITH_OVERBAR +"⩅" => Ts.UNION_WITH_LOGICAL_OR +"⩊" => Ts.UNION_BESIDE_AND_JOINED_WITH_UNION +"⩌" => Ts.CLOSED_UNION_WITH_SERIFS +"⩏" => Ts.DOUBLE_SQUARE_UNION +"⩐" => Ts.CLOSED_UNION_WITH_SERIFS_AND_SMASH_PRODUCT +"⩒" => Ts.LOGICAL_OR_WITH_DOT_ABOVE +"⩔" => Ts.DOUBLE_LOGICAL_OR +"⩖" => Ts.TWO_INTERSECTING_LOGICAL_OR +"⩗" => Ts.SLOPING_LARGE_OR +"⩛" => Ts.LOGICAL_OR_WITH_MIDDLE_STEM +"⩝" => Ts.LOGICAL_OR_WITH_HORIZONTAL_DASH +"⩡" => Ts.SMALL_VEE_WITH_UNDERBAR +"⩢" => Ts.LOGICAL_OR_WITH_DOUBLE_OVERBAR +"⩣" => Ts.LOGICAL_OR_WITH_DOUBLE_UNDERBAR +"¦" => Ts.BROKEN_BAR +"END_PLUS" => Ts.end_plus # Level 10 -const BEGIN_BITSHIFTS = @_K begin_bitshifts -const var"<<" = @_K LBITSHIFT -const var">>" = @_K RBITSHIFT -const var">>>" = @_K UNSIGNED_BITSHIFT -const END_BITSHIFTS = @_K end_bitshifts +"BEGIN_BITSHIFTS" => Ts.begin_bitshifts +"<<" => Ts.LBITSHIFT +">>" => Ts.RBITSHIFT +">>>" => Ts.UNSIGNED_BITSHIFT +"END_BITSHIFTS" => Ts.end_bitshifts # Level 11 -const BEGIN_TIMES = @_K begin_times -const var"*" = @_K STAR -const var"/" = @_K FWD_SLASH -const var"÷" = @_K DIVISION_SIGN -const var"%" = @_K REM -const var"⋅" = @_K UNICODE_DOT -const var"∘" = @_K RING_OPERATOR -const var"×" = @_K MULTIPLICATION_SIGN -const var"\\" = @_K BACKSLASH -const var"&" = @_K AND -const var"∩" = @_K INTERSECTION -const var"∧" = @_K LOGICAL_AND -const var"⊗" = @_K CIRCLED_TIMES -const var"⊘" = @_K CIRCLED_DIVISION_SLASH -const var"⊙" = @_K CIRCLED_DOT_OPERATOR -const var"⊚" = @_K CIRCLED_RING_OPERATOR -const var"⊛" = @_K CIRCLED_ASTERISK_OPERATOR -const var"⊠" = @_K SQUARED_TIMES -const var"⊡" = @_K SQUARED_DOT_OPERATOR -const var"⊓" = @_K SQUARE_CAP -const var"∗" = @_K ASTERISK_OPERATOR -const var"∙" = @_K BULLET_OPERATOR -const var"∤" = @_K DOES_NOT_DIVIDE -const var"⅋" = @_K TURNED_AMPERSAND -const var"≀" = @_K WREATH_PRODUCT -const var"⊼" = @_K NAND -const var"⋄" = @_K DIAMOND_OPERATOR -const var"⋆" = @_K STAR_OPERATOR -const var"⋇" = @_K DIVISION_TIMES -const var"⋉" = @_K LEFT_NORMAL_FACTOR_SEMIDIRECT_PRODUCT -const var"⋊" = @_K RIGHT_NORMAL_FACTOR_SEMIDIRECT_PRODUCT -const var"⋋" = @_K LEFT_SEMIDIRECT_PRODUCT -const var"⋌" = @_K RIGHT_SEMIDIRECT_PRODUCT -const var"⋏" = @_K CURLY_LOGICAL_AND -const var"⋒" = @_K DOUBLE_INTERSECTION -const var"⟑" = @_K AND_WITH_DOT -const var"⦸" = @_K CIRCLED_REVERSE_SOLIDUS -const var"⦼" = @_K CIRCLED_ANTICLOCKWISE_ROTATED_DIVISION_SIGN -const var"⦾" = @_K CIRCLED_WHITE_BULLET -const var"⦿" = @_K CIRCLED_BULLET -const var"⧶" = @_K SOLIDUS_WITH_OVERBAR -const var"⧷" = @_K REVERSE_SOLIDUS_WITH_HORIZONTAL_STROKE -const var"⨇" = @_K TWO_LOGICAL_AND_OPERATOR -const var"⨰" = @_K MULTIPLICATION_SIGN_WITH_DOT_ABOVE -const var"⨱" = @_K MULTIPLICATION_SIGN_WITH_UNDERBAR -const var"⨲" = @_K SEMIDIRECT_PRODUCT_WITH_BOTTOM_CLOSED -const var"⨳" = @_K SMASH_PRODUCT -const var"⨴" = @_K MULTIPLICATION_SIGN_IN_LEFT_HALF_CIRCLE -const var"⨵" = @_K MULTIPLICATION_SIGN_IN_RIGHT_HALF_CIRCLE -const var"⨶" = @_K CIRCLED_MULTIPLICATION_SIGN_WITH_CIRCUMFLEX_ACCENT -const var"⨷" = @_K MULTIPLICATION_SIGN_IN_DOUBLE_CIRCLE -const var"⨸" = @_K CIRCLED_DIVISION_SIGN -const var"⨻" = @_K MULTIPLICATION_SIGN_IN_TRIANGLE -const var"⨼" = @_K INTERIOR_PRODUCT -const var"⨽" = @_K RIGHTHAND_INTERIOR_PRODUCT -const var"⩀" = @_K INTERSECTION_WITH_DOT -const var"⩃" = @_K INTERSECTION_WITH_OVERBAR -const var"⩄" = @_K INTERSECTION_WITH_LOGICAL_AND -const var"⩋" = @_K INTERSECTION_BESIDE_AND_JOINED_WITH_INTERSECTION -const var"⩍" = @_K CLOSED_INTERSECTION_WITH_SERIFS -const var"⩎" = @_K DOUBLE_SQUARE_INTERSECTION -const var"⩑" = @_K LOGICAL_AND_WITH_DOT_ABOVE -const var"⩓" = @_K DOUBLE_LOGICAL_AND -const var"⩕" = @_K TWO_INTERSECTING_LOGICAL_AND -const var"⩘" = @_K SLOPING_LARGE_AND -const var"⩚" = @_K LOGICAL_AND_WITH_MIDDLE_STEM -const var"⩜" = @_K LOGICAL_AND_WITH_HORIZONTAL_DASH -const var"⩞" = @_K LOGICAL_AND_WITH_DOUBLE_OVERBAR -const var"⩟" = @_K LOGICAL_AND_WITH_UNDERBAR -const var"⩠" = @_K LOGICAL_AND_WITH_DOUBLE_UNDERBAR -const var"⫛" = @_K TRANSVERSAL_INTERSECTION -const var"⊍" = @_K MULTISET_MULTIPLICATION -const var"▷" = @_K WHITE_RIGHT_POINTING_TRIANGLE -const var"⨝" = @_K JOIN -const var"⟕" = @_K LEFT_OUTER_JOIN -const var"⟖" = @_K RIGHT_OUTER_JOIN -const var"⟗" = @_K FULL_OUTER_JOIN -const var"⌿" = @_K NOT_SLASH -const var"⨟" = @_K BB_SEMI -const END_TIMES = @_K end_times +"BEGIN_TIMES" => Ts.begin_times +"*" => Ts.STAR +"/" => Ts.FWD_SLASH +"÷" => Ts.DIVISION_SIGN +"%" => Ts.REM +"⋅" => Ts.UNICODE_DOT +"∘" => Ts.RING_OPERATOR +"×" => Ts.MULTIPLICATION_SIGN +"\\" => Ts.BACKSLASH +"&" => Ts.AND +"∩" => Ts.INTERSECTION +"∧" => Ts.LOGICAL_AND +"⊗" => Ts.CIRCLED_TIMES +"⊘" => Ts.CIRCLED_DIVISION_SLASH +"⊙" => Ts.CIRCLED_DOT_OPERATOR +"⊚" => Ts.CIRCLED_RING_OPERATOR +"⊛" => Ts.CIRCLED_ASTERISK_OPERATOR +"⊠" => Ts.SQUARED_TIMES +"⊡" => Ts.SQUARED_DOT_OPERATOR +"⊓" => Ts.SQUARE_CAP +"∗" => Ts.ASTERISK_OPERATOR +"∙" => Ts.BULLET_OPERATOR +"∤" => Ts.DOES_NOT_DIVIDE +"⅋" => Ts.TURNED_AMPERSAND +"≀" => Ts.WREATH_PRODUCT +"⊼" => Ts.NAND +"⋄" => Ts.DIAMOND_OPERATOR +"⋆" => Ts.STAR_OPERATOR +"⋇" => Ts.DIVISION_TIMES +"⋉" => Ts.LEFT_NORMAL_FACTOR_SEMIDIRECT_PRODUCT +"⋊" => Ts.RIGHT_NORMAL_FACTOR_SEMIDIRECT_PRODUCT +"⋋" => Ts.LEFT_SEMIDIRECT_PRODUCT +"⋌" => Ts.RIGHT_SEMIDIRECT_PRODUCT +"⋏" => Ts.CURLY_LOGICAL_AND +"⋒" => Ts.DOUBLE_INTERSECTION +"⟑" => Ts.AND_WITH_DOT +"⦸" => Ts.CIRCLED_REVERSE_SOLIDUS +"⦼" => Ts.CIRCLED_ANTICLOCKWISE_ROTATED_DIVISION_SIGN +"⦾" => Ts.CIRCLED_WHITE_BULLET +"⦿" => Ts.CIRCLED_BULLET +"⧶" => Ts.SOLIDUS_WITH_OVERBAR +"⧷" => Ts.REVERSE_SOLIDUS_WITH_HORIZONTAL_STROKE +"⨇" => Ts.TWO_LOGICAL_AND_OPERATOR +"⨰" => Ts.MULTIPLICATION_SIGN_WITH_DOT_ABOVE +"⨱" => Ts.MULTIPLICATION_SIGN_WITH_UNDERBAR +"⨲" => Ts.SEMIDIRECT_PRODUCT_WITH_BOTTOM_CLOSED +"⨳" => Ts.SMASH_PRODUCT +"⨴" => Ts.MULTIPLICATION_SIGN_IN_LEFT_HALF_CIRCLE +"⨵" => Ts.MULTIPLICATION_SIGN_IN_RIGHT_HALF_CIRCLE +"⨶" => Ts.CIRCLED_MULTIPLICATION_SIGN_WITH_CIRCUMFLEX_ACCENT +"⨷" => Ts.MULTIPLICATION_SIGN_IN_DOUBLE_CIRCLE +"⨸" => Ts.CIRCLED_DIVISION_SIGN +"⨻" => Ts.MULTIPLICATION_SIGN_IN_TRIANGLE +"⨼" => Ts.INTERIOR_PRODUCT +"⨽" => Ts.RIGHTHAND_INTERIOR_PRODUCT +"⩀" => Ts.INTERSECTION_WITH_DOT +"⩃" => Ts.INTERSECTION_WITH_OVERBAR +"⩄" => Ts.INTERSECTION_WITH_LOGICAL_AND +"⩋" => Ts.INTERSECTION_BESIDE_AND_JOINED_WITH_INTERSECTION +"⩍" => Ts.CLOSED_INTERSECTION_WITH_SERIFS +"⩎" => Ts.DOUBLE_SQUARE_INTERSECTION +"⩑" => Ts.LOGICAL_AND_WITH_DOT_ABOVE +"⩓" => Ts.DOUBLE_LOGICAL_AND +"⩕" => Ts.TWO_INTERSECTING_LOGICAL_AND +"⩘" => Ts.SLOPING_LARGE_AND +"⩚" => Ts.LOGICAL_AND_WITH_MIDDLE_STEM +"⩜" => Ts.LOGICAL_AND_WITH_HORIZONTAL_DASH +"⩞" => Ts.LOGICAL_AND_WITH_DOUBLE_OVERBAR +"⩟" => Ts.LOGICAL_AND_WITH_UNDERBAR +"⩠" => Ts.LOGICAL_AND_WITH_DOUBLE_UNDERBAR +"⫛" => Ts.TRANSVERSAL_INTERSECTION +"⊍" => Ts.MULTISET_MULTIPLICATION +"▷" => Ts.WHITE_RIGHT_POINTING_TRIANGLE +"⨝" => Ts.JOIN +"⟕" => Ts.LEFT_OUTER_JOIN +"⟖" => Ts.RIGHT_OUTER_JOIN +"⟗" => Ts.FULL_OUTER_JOIN +"⌿" => Ts.NOT_SLASH +"⨟" => Ts.BB_SEMI +"END_TIMES" => Ts.end_times # Level 12 -const BEGIN_RATIONAL = @_K begin_rational -const var"//" = @_K FWDFWD_SLASH -const END_RATIONAL = @_K end_rational +"BEGIN_RATIONAL" => Ts.begin_rational +"//" => Ts.FWDFWD_SLASH +"END_RATIONAL" => Ts.end_rational # Level 13 -const BEGIN_POWER = @_K begin_power -const var"^" = @_K CIRCUMFLEX_ACCENT -const var"↑" = @_K UPWARDS_ARROW -const var"↓" = @_K DOWNWARDS_ARROW -const var"⇵" = @_K DOWNWARDS_ARROW_LEFTWARDS_OF_UPWARDS_ARROW -const var"⟰" = @_K UPWARDS_QUADRUPLE_ARROW -const var"⟱" = @_K DOWNWARDS_QUADRUPLE_ARROW -const var"⤈" = @_K DOWNWARDS_ARROW_WITH_HORIZONTAL_STROKE -const var"⤉" = @_K UPWARDS_ARROW_WITH_HORIZONTAL_STROKE -const var"⤊" = @_K UPWARDS_TRIPLE_ARROW -const var"⤋" = @_K DOWNWARDS_TRIPLE_ARROW -const var"⤒" = @_K UPWARDS_ARROW_TO_BAR -const var"⤓" = @_K DOWNWARDS_ARROW_TO_BAR -const var"⥉" = @_K UPWARDS_TWO_HEADED_ARROW_FROM_SMALL_CIRCLE -const var"⥌" = @_K UP_BARB_RIGHT_DOWN_BARB_LEFT_HARPOON -const var"⥍" = @_K UP_BARB_LEFT_DOWN_BARB_RIGHT_HARPOON -const var"⥏" = @_K UP_BARB_RIGHT_DOWN_BARB_RIGHT_HARPOON -const var"⥑" = @_K UP_BARB_LEFT_DOWN_BARB_LEFT_HARPOON -const var"⥔" = @_K UPWARDS_HARPOON_WITH_BARB_RIGHT_TO_BAR -const var"⥕" = @_K DOWNWARDS_HARPOON_WITH_BARB_RIGHT_TO_BAR -const var"⥘" = @_K UPWARDS_HARPOON_WITH_BARB_LEFT_TO_BAR -const var"⥙" = @_K DOWNWARDS_HARPOON_WITH_BARB_LEFT_TO_BAR -const var"⥜" = @_K UPWARDS_HARPOON_WITH_BARB_RIGHT_FROM_BAR -const var"⥝" = @_K DOWNWARDS_HARPOON_WITH_BARB_RIGHT_FROM_BAR -const var"⥠" = @_K UPWARDS_HARPOON_WITH_BARB_LEFT_FROM_BAR -const var"⥡" = @_K DOWNWARDS_HARPOON_WITH_BARB_LEFT_FROM_BAR -const var"⥣" = @_K UPWARDS_HARPOON_WITH_BARB_LEFT_BESIDE_UPWARDS_HARPOON_WITH_BARB_RIGHT -const var"⥥" = @_K DOWNWARDS_HARPOON_WITH_BARB_LEFT_BESIDE_DOWNWARDS_HARPOON_WITH_BARB_RIGHT -const var"⥮" = @_K UPWARDS_HARPOON_WITH_BARB_LEFT_BESIDE_DOWNWARDS_HARPOON_WITH_BARB_RIGHT -const var"⥯" = @_K DOWNWARDS_HARPOON_WITH_BARB_LEFT_BESIDE_UPWARDS_HARPOON_WITH_BARB_RIGHT -const var"↑" = @_K HALFWIDTH_UPWARDS_ARROW -const var"↓" = @_K HALFWIDTH_DOWNWARDS_ARROW -const END_POWER = @_K end_power +"BEGIN_POWER" => Ts.begin_power +"^" => Ts.CIRCUMFLEX_ACCENT +"↑" => Ts.UPWARDS_ARROW +"↓" => Ts.DOWNWARDS_ARROW +"⇵" => Ts.DOWNWARDS_ARROW_LEFTWARDS_OF_UPWARDS_ARROW +"⟰" => Ts.UPWARDS_QUADRUPLE_ARROW +"⟱" => Ts.DOWNWARDS_QUADRUPLE_ARROW +"⤈" => Ts.DOWNWARDS_ARROW_WITH_HORIZONTAL_STROKE +"⤉" => Ts.UPWARDS_ARROW_WITH_HORIZONTAL_STROKE +"⤊" => Ts.UPWARDS_TRIPLE_ARROW +"⤋" => Ts.DOWNWARDS_TRIPLE_ARROW +"⤒" => Ts.UPWARDS_ARROW_TO_BAR +"⤓" => Ts.DOWNWARDS_ARROW_TO_BAR +"⥉" => Ts.UPWARDS_TWO_HEADED_ARROW_FROM_SMALL_CIRCLE +"⥌" => Ts.UP_BARB_RIGHT_DOWN_BARB_LEFT_HARPOON +"⥍" => Ts.UP_BARB_LEFT_DOWN_BARB_RIGHT_HARPOON +"⥏" => Ts.UP_BARB_RIGHT_DOWN_BARB_RIGHT_HARPOON +"⥑" => Ts.UP_BARB_LEFT_DOWN_BARB_LEFT_HARPOON +"⥔" => Ts.UPWARDS_HARPOON_WITH_BARB_RIGHT_TO_BAR +"⥕" => Ts.DOWNWARDS_HARPOON_WITH_BARB_RIGHT_TO_BAR +"⥘" => Ts.UPWARDS_HARPOON_WITH_BARB_LEFT_TO_BAR +"⥙" => Ts.DOWNWARDS_HARPOON_WITH_BARB_LEFT_TO_BAR +"⥜" => Ts.UPWARDS_HARPOON_WITH_BARB_RIGHT_FROM_BAR +"⥝" => Ts.DOWNWARDS_HARPOON_WITH_BARB_RIGHT_FROM_BAR +"⥠" => Ts.UPWARDS_HARPOON_WITH_BARB_LEFT_FROM_BAR +"⥡" => Ts.DOWNWARDS_HARPOON_WITH_BARB_LEFT_FROM_BAR +"⥣" => Ts.UPWARDS_HARPOON_WITH_BARB_LEFT_BESIDE_UPWARDS_HARPOON_WITH_BARB_RIGHT +"⥥" => Ts.DOWNWARDS_HARPOON_WITH_BARB_LEFT_BESIDE_DOWNWARDS_HARPOON_WITH_BARB_RIGHT +"⥮" => Ts.UPWARDS_HARPOON_WITH_BARB_LEFT_BESIDE_DOWNWARDS_HARPOON_WITH_BARB_RIGHT +"⥯" => Ts.DOWNWARDS_HARPOON_WITH_BARB_LEFT_BESIDE_UPWARDS_HARPOON_WITH_BARB_RIGHT +"↑" => Ts.HALFWIDTH_UPWARDS_ARROW +"↓" => Ts.HALFWIDTH_DOWNWARDS_ARROW +"END_POWER" => Ts.end_power # Level 14 -const BEGIN_DECL = @_K begin_decl -const var"::" = @_K DECLARATION -const END_DECL = @_K end_decl +"BEGIN_DECL" => Ts.begin_decl +"::" => Ts.DECLARATION +"END_DECL" => Ts.end_decl # Level 15 -const BEGIN_WHERE = @_K begin_where -const var"where" = @_K WHERE -const END_WHERE = @_K end_where +"BEGIN_WHERE" => Ts.begin_where +"where" => Ts.WHERE +"END_WHERE" => Ts.end_where # Level 16 -const BEGIN_DOT = @_K begin_dot -const var"." = @_K DOT -const END_DOT = @_K end_dot - -const var"!" = @_K NOT -const var"'" = @_K PRIME -const var".'" = @_K TRANSPOSE -const var"->" = @_K ANON_FUNC - -const BEGIN_UNICODE_OPS = @_K begin_unicode_ops -const var"¬" = @_K NOT_SIGN -const var"√" = @_K SQUARE_ROOT -const var"∛" = @_K CUBE_ROOT -const var"∜" = @_K QUAD_ROOT -const END_UNICODE_OPS = @_K end_unicode_ops - -const END_OPS = @_K end_ops - -# (Too?) cute synonyms -const var" " = @_K WHITESPACE -const var"\n" = @_K NEWLINE_WS - -const BEGIN_INVISIBLE_TOKENS = @_K begin_invisible_tokens -const TOMBSTONE = @_K TOMBSTONE -const var"core_@doc" = @_K CORE_AT_DOC -const var"core_@cmd" = @_K CORE_AT_CMD -const var"core_@int128_str" = @_K CORE_AT_INT128_STR -const var"core_@uint128_str" = @_K CORE_AT_UINT128_STR -const var"core_@big_str" = @_K CORE_AT_BIG_STR -const END_INVISIBLE_TOKENS = @_K end_invisible_tokens +"BEGIN_DOT" => Ts.begin_dot +"." => Ts.DOT +"END_DOT" => Ts.end_dot + +"!" => Ts.NOT +"'" => Ts.PRIME +".'" => Ts.TRANSPOSE +"->" => Ts.ANON_FUNC + +"BEGIN_UNICODE_OPS" => Ts.begin_unicode_ops +"¬" => Ts.NOT_SIGN +"√" => Ts.SQUARE_ROOT +"∛" => Ts.CUBE_ROOT +"∜" => Ts.QUAD_ROOT +"END_UNICODE_OPS" => Ts.end_unicode_ops + +"END_OPS" => Ts.end_ops + +# Cute synonyms (too cute? +# " " => Ts.WHITESPACE +# "\n" => Ts.NEWLINE_WS + +"BEGIN_INVISIBLE_TOKENS" => Ts.begin_invisible_tokens +"TOMBSTONE" => Ts.TOMBSTONE +"core_@doc" => Ts.CORE_AT_DOC +"core_@cmd" => Ts.CORE_AT_CMD +"core_@int128_str" => Ts.CORE_AT_INT128_STR +"core_@uint128_str" => Ts.CORE_AT_UINT128_STR +"core_@big_str" => Ts.CORE_AT_BIG_STR +"END_INVISIBLE_TOKENS" => Ts.end_invisible_tokens # Our custom syntax tokens -const BEGIN_SYNTAX_KINDS = @_K begin_syntax_kinds -const block = @_K BLOCK -const call = @_K CALL -const comparison = @_K COMPARISON -const curly = @_K CURLY -const string = @_K STRING_INTERP -const macrocall = @_K MACROCALL -const toplevel = @_K TOPLEVEL -const tuple = @_K TUPLE -const ref = @_K REF -const vect = @_K VECT -const braces = @_K BRACES -const bracescat = @_K BRACESCAT -const hcat = @_K HCAT -const vcat = @_K VCAT -const ncat = @_K NCAT -const typed_hcat = @_K TYPED_HCAT -const typed_vcat = @_K TYPED_VCAT -const typed_ncat = @_K TYPED_NCAT -const generator = @_K GENERATOR -const flatten = @_K FLATTEN -const comprehension = @_K COMPREHENSION -const typed_comprehension = @_K TYPED_COMPREHENSION -const END_SYNTAX_KINDS = @_K end_syntax_kinds - -end # module Kinds +"BEGIN_SYNTAX_KINDS" => Ts.begin_syntax_kinds +"block" => Ts.BLOCK +"call" => Ts.CALL +"comparison" => Ts.COMPARISON +"curly" => Ts.CURLY +"string" => Ts.STRING_INTERP +"macrocall" => Ts.MACROCALL +"toplevel" => Ts.TOPLEVEL +"tuple" => Ts.TUPLE +"ref" => Ts.REF +"vect" => Ts.VECT +"braces" => Ts.BRACES +"bracescat" => Ts.BRACESCAT +"hcat" => Ts.HCAT +"vcat" => Ts.VCAT +"ncat" => Ts.NCAT +"typed_hcat" => Ts.TYPED_HCAT +"typed_vcat" => Ts.TYPED_VCAT +"typed_ncat" => Ts.TYPED_NCAT +"generator" => Ts.GENERATOR +"flatten" => Ts.FLATTEN +"comprehension" => Ts.COMPREHENSION +"typed_comprehension" => Ts.TYPED_COMPREHENSION +"END_SYNTAX_KINDS" => Ts.end_syntax_kinds +]) +end # Mapping from kinds to their unique string representation, if it exists -const _kind_to_str = +const _kind_to_str_unique = Dict{Kind,String}(k=>string(s) for (k,s) in TzTokens.UNICODE_OPS_REVERSE) for c in "([{}])@,;" - _kind_to_str[getfield(Kinds, Symbol(c))] = string(c) + _kind_to_str_unique[_str_to_kind[string(c)]] = string(c) end for kw in split("""abstract baremodule begin break catch const continue do else elseif end export finally for @@ -1019,5 +870,5 @@ for kw in split("""abstract baremodule begin break catch const vcat ncat typed_hcat typed_vcat typed_ncat generator flatten comprehension typed_comprehension """) - _kind_to_str[getfield(Kinds, Symbol(kw))] = kw + _kind_to_str_unique[_str_to_kind[kw]] = kw end diff --git a/JuliaSyntax/src/tokens.jl b/JuliaSyntax/src/tokens.jl new file mode 100644 index 0000000000000..7346af828f56d --- /dev/null +++ b/JuliaSyntax/src/tokens.jl @@ -0,0 +1,107 @@ +using Tokenize.Tokens: Kind, isliteral, iskeyword, isoperator + +include("token_kinds.jl") + +""" + K"s" + +The full kind of a string "s". For example, K")" is the kind of the +right parenthesis token. + +Naming rules: +* Kinds which correspond to exactly one textural form are represented with that + text. This includes keywords like K"for" and operators like K"*". +* Kinds which represent many textural forms have UpperCamelCase names. This + includes kinds like K"Identifier" and K"Comment". +* Kinds which exist merely as delimiters are all uppercase +""" +macro K_str(str) + get(_str_to_kind, str) do + error("unknown token kind K$(repr(str))") + end +end + +kind(k::Kind) = k +kind(raw::TzTokens.RawToken) = TzTokens.exactkind(raw) + +# Predicates for operator precedence +is_prec_assignment(t) = K"BEGIN_ASSIGNMENTS" < kind(t) < K"END_ASSIGNMENTS" +is_prec_pair(t) = K"BEGIN_PAIRARROW" < kind(t) < K"END_PAIRARROW" +is_prec_conditional(t) = K"BEGIN_CONDITIONAL" < kind(t) < K"END_CONDITIONAL" +is_prec_arrow(t) = K"BEGIN_ARROW" < kind(t) < K"END_ARROW" +is_prec_lazy_or(t) = K"BEGIN_LAZYOR" < kind(t) < K"END_LAZYOR" +is_prec_lazy_and(t) = K"BEGIN_LAZYAND" < kind(t) < K"END_LAZYAND" +is_prec_comparison(t) = K"BEGIN_COMPARISON" < kind(t) < K"END_COMPARISON" +is_prec_pipe(t) = K"BEGIN_PIPE" < kind(t) < K"END_PIPE" +is_prec_colon(t) = K"BEGIN_COLON" < kind(t) < K"END_COLON" +is_prec_plus(t) = K"BEGIN_PLUS" < kind(t) < K"END_PLUS" +is_prec_bitshift(t) = K"BEGIN_BITSHIFTS" < kind(t) < K"END_BITSHIFTS" +is_prec_times(t) = K"BEGIN_TIMES" < kind(t) < K"END_TIMES" +is_prec_rational(t) = K"BEGIN_RATIONAL" < kind(t) < K"END_RATIONAL" +is_prec_power(t) = K"BEGIN_POWER" < kind(t) < K"END_POWER" +is_prec_decl(t) = K"BEGIN_DECL" < kind(t) < K"END_DECL" +is_prec_where(t) = K"BEGIN_WHERE" < kind(t) < K"END_WHERE" +is_prec_dot(t) = K"BEGIN_DOT" < kind(t) < K"END_DOT" +is_prec_unicode_ops(t) = K"BEGIN_UNICODE_OPS" < kind(t) < K"END_UNICODE_OPS" + +is_prec_pipe_lt(t) = kind(t) == K"<|" +is_prec_pipe_gt(t) = kind(t) == K"|>" + +# Operators which are boty unary and binary +function is_both_unary_and_binary(t) + # TODO: Do we need to check dotop as well here? + kind(t) in (K"$", K"&", K"~", # <- dotop disallowed? + K"+", K"-", K"⋆", K"±", K"∓") # dotop allowed +end + +function is_number(t) + kind(t) in (K"Integer", K"BinInt", K"HexInt", K"OctInt", K"Float") +end + +function is_whitespace(t) + kind(t) in (K"Whitespace", K"NewlineWs") +end + +""" +Get the "binding power" (precedence level) of an operator kind +""" +function binding_power(k::Kind) + return k < K"END_ASSIGNMENTS" ? 1 : + k < K"END_CONDITIONAL" ? 2 : + k < K"END_ARROW" ? 3 : + k < K"END_LAZYOR" ? 4 : + k < K"END_LAZYAND" ? 5 : + k < K"END_COMPARISON" ? 6 : + k < K"END_PIPE" ? 7 : + k < K"END_COLON" ? 8 : + k < K"END_PLUS" ? 9 : + k < K"END_BITSHIFTS" ? 10 : + k < K"END_TIMES" ? 11 : + k < K"END_RATIONAL" ? 12 : + k < K"END_POWER" ? 13 : + k < K"END_DECL" ? 14 : + k < K"END_WHERE" ? 15 : + k < K"END_DOT" ? 16 : + k < K"END_OPS" ? 17 : # ?? unary ops + error("Not an operator") +end + +function _kind_str(k::Kind) + u = untokenize(k) + return !isnothing(u) ? u : + k in (K"Identifier", K"VarIdentifier") ? "Identifier" : + isliteral(k) ? "Literal" : + k == K"Comment" ? "Comment" : + k == K"Whitespace" ? "Whitespace" : + k == K"NewlineWs" ? "NewlineWs" : + lowercase(string(k)) +end + +""" +Return the string representation of a token kind, or `nothing` if the kind +represents a class of tokens like K"Identifier". +""" +function untokenize(k::Kind) + get(_kind_to_str_unique, k, nothing) +end + diff --git a/JuliaSyntax/test/syntax_interpolation.jl b/JuliaSyntax/test/syntax_interpolation.jl index 3c5658e5b79f1..0f9e30267366e 100644 --- a/JuliaSyntax/test/syntax_interpolation.jl +++ b/JuliaSyntax/test/syntax_interpolation.jl @@ -32,16 +32,16 @@ function at_show2(ex::SyntaxNode) end raw = N(K"block", T(K"quote", 5), - T(K"\n", 9), + T(K"NewlineWs", 9), N(K"=", N(K"Identifier", 5), - T(K" ", 1), + T(K"Whitespace", 1), T(K"=", 1), - T(K" ", 1), + T(K"Whitespace", 1), N(K"$", T(K"$", 1), N(K"Identifier", 2)), - T(K"\n", 9)), + T(K"NewlineWs", 9)), N(K"call", N(K"Identifier", 7), T(K"(", 1), @@ -49,15 +49,15 @@ function at_show2(ex::SyntaxNode) T(K"$", 1), N(K"Identifier", 4)), T(K",", 1), - T(K" ", 1), + T(K"Whitespace", 1), N(K"String", 5), T(K",", 1), - T(K" ", 1), + T(K"Whitespace", 1), N(K"Identifier", 5), T(K")", 1)), - T(K"\n", 9), + T(K"NewlineWs", 9), N(K"Identifier", 5), - T(K"\n", 5), + T(K"NewlineWs", 5), T(K"end", 3)) source = SourceFile(code, filename=@__FILE__) block = SyntaxNode(source, raw, source.line_starts[quote_begin]+4) @@ -81,9 +81,9 @@ code2 = "foo + 42" source2 = SourceFile(code2, filename="foo.jl") s2 = SyntaxNode(source2, NI(K"call", N(K"Identifier", 3), - T(K" ", 1), + T(K"Whitespace", 1), N(K"+", 1), - T(K" ", 1), + T(K"Whitespace", 1), N(K"Integer", 2))) # Calling at_show2, we see that the precise source information is preserved for diff --git a/JuliaSyntax/test/syntax_trees.jl b/JuliaSyntax/test/syntax_trees.jl index ed57bca4d85a5..3b6439b327faf 100644 --- a/JuliaSyntax/test/syntax_trees.jl +++ b/JuliaSyntax/test/syntax_trees.jl @@ -16,29 +16,29 @@ source = SourceFile(code, filename="none.jl") t = N(K"for", T(K"for", 3), - T(K" ", 1), + T(K"Whitespace", 1), N(K"=", N(K"Identifier", 1), - T(K" ", 1), + T(K"Whitespace", 1), T(K"=", 1), - T(K" ", 1), + T(K"Whitespace", 1), NI(K"call", N(K"Integer", 1), N(K":", 1), N(K"Integer", 2))), N(K"block", - T(K"\n", 5), + T(K"NewlineWs", 5), NI(K"call", N(K"Identifier", 1), - T(K" ", 1), + T(K"Whitespace", 1), N(K"+", 1), - T(K" ", 1), + T(K"Whitespace", 1), N(K"Integer", 1)), - T(K"\n", 5), + T(K"NewlineWs", 5), T(K"Comment", 4), - T(K"\n", 5), + T(K"NewlineWs", 5), N(K"Identifier", 1), - T(K"\n", 1)), + T(K"NewlineWs", 1)), T(K"end", 3)) # And the following AST From 5396c20f2f57b1da23a622b12e1ce065fa360790 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Sun, 12 Dec 2021 21:09:26 +1000 Subject: [PATCH 0240/1109] Enhance grammar of toy/test parser + work on real parse_atom --- JuliaSyntax/src/parse_stream.jl | 87 ++++++++++----- JuliaSyntax/src/parser.jl | 139 +++++++++++++++--------- JuliaSyntax/test/runtests.jl | 7 +- JuliaSyntax/test/simple_parser.jl | 171 ++++++++++++++++++++++-------- 4 files changed, 282 insertions(+), 122 deletions(-) diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index c060e981093ea..01cb799d11afc 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -170,34 +170,55 @@ function peek(stream::ParseStream, n::Integer=1, skip_newlines=false) kind(peek_token(stream, n, skip_newlines)) end -""" - bump(stream [, flags=EMPTY_FLAGS]) - -Shift the current token into the output as a new text span with the given -`flags`. -""" -function bump(stream::ParseStream, flags=EMPTY_FLAGS, skip_newlines=false) - n = _lookahead_index(stream, 1, skip_newlines) +# Bump the next `n` tokens +# flags and new_kind are applied to any non-trivia tokens +function _bump_n(stream::ParseStream, n::Integer, flags, new_kind=K"Nothing") + if n <= 0 + return + end for i=1:n tok = stream.lookahead[i] k = kind(tok) if k == K"EndMarker" break end - is_skipped_ws = k ∈ (K"Whitespace", K"Comment") || - (k == K"NewlineWs" && skip_newlines) - f = is_skipped_ws ? TRIVIA_FLAG : flags - span = TaggedRange(SyntaxHead(kind(tok), f), first_byte(tok), last_byte(tok)) + is_trivia = k ∈ (K"Whitespace", K"Comment", K"NewlineWs") + f = is_trivia ? TRIVIA_FLAG : flags + k = (is_trivia || new_kind == K"Nothing") ? k : new_kind + span = TaggedRange(SyntaxHead(k, f), first_byte(tok), last_byte(tok)) push!(stream.spans, span) end Base._deletebeg!(stream.lookahead, n) stream.next_byte = last_byte(last(stream.spans)) + 1 # Defuse the time bomb stream.peek_count = 0 +end + +""" + bump(stream [, flags=EMPTY_FLAGS]) + +Shift the current token into the output as a new text span with the given +`flags`. +""" +function bump(stream::ParseStream, flags=EMPTY_FLAGS; skip_newlines=false, + error=nothing, new_kind=K"Nothing") + if !isnothing(error) + flags |= ERROR_FLAG + emit_diagnostic(stream, error=error) + end + _bump_n(stream, _lookahead_index(stream, 1, skip_newlines), flags, new_kind) # Return last token location in output if needed for set_flags! return lastindex(stream.spans) end +""" +Bump comments and whitespace tokens preceding the next token +""" +function bump_trivia(stream::ParseStream; skip_newlines=false) + _bump_n(stream, _lookahead_index(stream, 1, skip_newlines) - 1, EMPTY_FLAGS) + return lastindex(stream.spans) +end + function bump_invisible(stream::ParseStream, kind) emit(stream, position(stream), kind) return lastindex(stream.spans) @@ -264,12 +285,24 @@ end """ Emit a diagnostic at the position of the next token + +If `whitespace` is true, the diagnostic is positioned on the whitespace before +the next token. Otherwise it's positioned at the next token as returned by `peek()`. """ -function emit_diagnostic(stream::ParseStream, mark=nothing; error) - byte = first_byte(peek_token(stream)) - mark = isnothing(mark) ? byte : mark +function emit_diagnostic(stream::ParseStream, mark=nothing; error, whitespace=false) + i = _lookahead_index(stream, 1, true) + begin_tok_i = i + end_tok_i = i + if whitespace + # It's the whitespace which is the error. Find the range of the current + # whitespace. + begin_tok_i = 1 + end_tok_i = is_whitespace(stream.lookahead[i]) ? i : max(1, i-1) + end + mark = isnothing(mark) ? first_byte(stream.lookahead[begin_tok_i]) : mark + err_end = last_byte(stream.lookahead[end_tok_i]) # It's a bit weird to require supplying a SyntaxHead here... - text_span = TaggedRange(SyntaxHead(K"Error", EMPTY_FLAGS), mark, byte) + text_span = TaggedRange(SyntaxHead(K"Error", EMPTY_FLAGS), mark, err_end) push!(stream.diagnostics, Diagnostic(text_span, error)) end @@ -289,7 +322,7 @@ function _push_node!(stack, text_span::TaggedRange, children=nothing) end end -function to_raw_tree(st) +function to_raw_tree(st; wrap_toplevel_as_kind=nothing) stack = Vector{@NamedTuple{text_span::TaggedRange,node::GreenNode}}() for text_span in st.spans if kind(text_span) == K"TOMBSTONE" @@ -318,7 +351,15 @@ function to_raw_tree(st) _push_node!(stack, text_span, children) end # show(stdout, MIME"text/plain"(), stack[1].node) - return only(stack).node + if length(stack) == 1 + return only(stack).node + elseif !isnothing(wrap_toplevel_as_kind) + # Mostly for debugging + children = [x.node for x in stack] + return GreenNode(SyntaxHead(wrap_toplevel_as_kind, ERROR_FLAG), children...) + else + error("Found multiple nodes at top level") + end end function show_diagnostics(io::IO, stream::ParseStream, code) @@ -380,15 +421,13 @@ function peek_token(ps::ParseState, n=1; skip_newlines=nothing) peek_token(ps.stream, n, skip_nl) end -function bump(ps::ParseState, flags=EMPTY_FLAGS; skip_newlines=nothing) +function bump(ps::ParseState, flags=EMPTY_FLAGS; skip_newlines=nothing, kws...) skip_nl = isnothing(skip_newlines) ? ps.whitespace_newline : skip_newlines - bump(ps.stream, flags, skip_nl) + bump(ps.stream, flags; skip_newlines=skip_nl, kws...) end -function bump_newlines(ps::ParseState) - while peek(ps) == K"NewlineWs" - bump(ps, TRIVIA_FLAG) - end +function bump_trivia(ps::ParseState; kws...) + bump_trivia(ps.stream; kws...) end """ diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index a2c6258288225..e0a908de6beca 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -47,12 +47,20 @@ function is_initial_reserved_word(ps::ParseState, t) return is_iresword && !(k == K"begin" && ps.end_symbol) end +function is_syntactic_operator(t) + k = kind(t) + return k in (K"&&", K"||", K".", K"...", K"->") || + (is_prec_assignment(k) && k != K"~") +end + function is_syntactic_unary_op(t) kind(t) in (K"$", K"&", K"::") end -function has_whitespace_prefix(tok::SyntaxToken) - tok.had_whitespace +# flisp: invalid-identifier? +function is_valid_identifier(k) + # FIXME? flisp also had K"...." disallowed, whatever that's for! + !(is_syntactic_operator(k) || k in (K"?", K".'")) end #------------------------------------------------------------------------------- @@ -110,7 +118,7 @@ end # # flisp: (define (parse-Nary s down ops head closer? add-linenums) function parse_Nary(ps::ParseState, down, delimiters, closing_tokens) - bump_newlines(ps) + bump_trivia(ps, skip_newlines=true) k = peek(ps) if k in closing_tokens return true @@ -755,7 +763,7 @@ end # flisp: disallow-space function disallow_space(ps, t) if t.had_whitespace - emit_diagnostic(ps, mark, "space disallowed before $t") + emit_diagnostic(ps, mark, error="space disallowed before $t", whitespace=true) end end @@ -770,6 +778,7 @@ end function parse_call_chain(ps::ParseState, mark, is_macrocall) bumpTODO(ps); return TODO("parse_call_chain") + #= while true t = peek_token(ps) k = kind(t) @@ -793,6 +802,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall) break end end + =# end # flisp: (define (expect-end s word) @@ -1022,15 +1032,17 @@ function tuple_to_arglist(e) TODO("tuple_to_arglist unimplemented") end +# The initial ( has been bumped +# # flisp: (define (parse-paren s (checked #t)) (car (parse-paren- s checked))) -function parse_paren(ps::ParseState; checked=true) +function parse_paren(ps::ParseState, check_identifiers=true) TODO("parse_paren unimplemented") end # return (expr . arglist) where arglist is #t iff this isn't just a parenthesized expr # # flisp: (define (parse-paren- s checked) -function parse_paren_(ps::ParseState, checked) +function parse_paren_(ps::ParseState, check_identifiers) TODO("parse_paren_ unimplemented") end @@ -1174,62 +1186,83 @@ end # parse numbers, identifiers, parenthesized expressions, lists, vectors, etc. # -# flisp: (define (parse-atom s (checked #t)) -function parse_atom(ps::ParseState; checked=true) - bumpTODO(ps) - #TODO("parse_atom unimplemented") - #= - tok = require_token(ps) - tok_kind = kind(tok) - # TODO: Reorder these to put most likely tokens first - if tok_kind == K":" # symbol/expression quote - take_token!(ps) - next = peek_token(ps) - if is_closing_token(ps, next) && (kind(next) != K"Keyword" || - has_whitespace_prefix(next)) - return GreenNode(tok) - elseif has_whitespace_prefix(next) - error("whitespace not allowed after \":\" used for quoting") - elseif kind(next) == K"NewlineWs" - error("newline not allowed after \":\" used for quoting") +# If `check_identifiers` is true, identifiers are disallowed from being one of +# the syntactic operators or closing tokens. +# +# flisp: parse-atom +function parse_atom(ps::ParseState, check_identifiers=true) + atom_mark = position(ps) + bump_trivia(ps, skip_newlines=true) + leading_kind = peek(ps) + # TODO: Reorder to put most likely tokens first + if leading_kind == K":" + # symbol/expression quote + t = peek_token(ps, 2) + k = kind(t) + if is_closing_token(ps, k) && (!iskeyword(k) || t.had_whitespace) + # : is a literal colon in some circumstances + # (:) ==> : + # begin : end ==> (block :) + bump(ps) # K":" + return + end + bump(ps, TRIVIA_FLAG) # K":" + # TODO: It's clunky to have to track flags here when we'll emit an + # error diagnostic. + flags = EMPTY_FLAGS + if t.had_whitespace + emit_diagnostic(ps, error="whitespace not allowed after `:` used for quoting", + whitespace=true) + flags = ERROR_FLAG + elseif k == K"NewlineWs" + emit_diagnostic(ps, error="newline not allowed after `:` used for quoting") + flags = ERROR_FLAG else # Being inside quote makes `end` non-special again. issue #27690 ps1 = ParseState(ps, end_symbol=false) - return GreenNode(K"quote", parse_atom(ps1, checked=false)) + parse_atom(ps1, false) + emit(ps1, atom_mark, K"quote", flags) end - elseif tok_kind == K"=" # misplaced = - error("unexpected `=`") - elseif tok_kind == K"Identifier" - if checked - TODO("Checked identifier names") + elseif leading_kind == K"=" + emit_diagnostic(ps, error="unexpected `=`") + bump(ps, ERROR_FLAG) + elseif leading_kind in (K"Identifier", K"VarIdentifier") + bump(ps) + elseif isoperator(leading_kind) || iskeyword(leading_kind) + # Operators and keywords are generally turned into identifiers if used + # as atoms. + if check_identifiers && (is_syntactic_operator(ps) || + is_closing_token(ps, leading_kind)) + bump(ps, error="Invalid identifier") end - take_token!(ps) - return GreenNode(tok) - elseif tok_kind == K"VarIdentifier" - take_token!(ps) - return GreenNode(tok) - elseif tok_kind == K"(" # parens or tuple - take_token!(ps) - return parse_paren(ps, checked) - elseif tok_kind == K"[" # cat expression - # NB: Avoid take_token! here? It's better to not consume tokens early - # take_token!(ps) - vex = parse_cat(ps, tok, K"]", ps.end_symbol) - elseif tok_kind == K"{" # cat expression + bump(ps, new_kind=K"Identifier") + elseif leading_kind == K"(" # parens or tuple + bump(ps, TRIVIA_FLAG) + parse_paren(ps, check_identifiers) + elseif leading_kind == K"[" # cat expression + TODO("parse_cat unimplemented") + parse_cat(ps, tok, K"]", ps.end_symbol) + elseif leading_kind == K"{" # cat expression take_token!(ps) TODO("""parse_cat(ps, K"}", )""") - elseif tok_kind == K"`" - TODO("(macrocall (core @cmd) ...)") - # return Expr(:macrocall, Expr(:core, Symbol("@cmd")), - elseif isliteral(tok_kind) - take_token!(ps) - return GreenNode(tok) - elseif is_closing_token(tok) - error("unexpected: $tok") + elseif leading_kind ∈ (K"String", K"TripleString") + parse_string_literal(ps) + elseif leading_kind == K"@" + bump(ps, TRIVIA_FLAG) + ps1 = ParseState(ps, space_sensitive=true) + parse_macro_name(ps) + TODO("parse macrocall") + elseif leading_kind in (K"Cmd", K"TripleCmd") + bump_invisible(ps, K"core_@cmd") + emit(ps, mark, K"macrocall") + elseif isliteral(leading_kind) + bump(ps) + elseif is_closing_token(ps, leading_kind) + # Leave closing token in place for other productions to + emit_diagnostic("Unexpected closing token") else - error("invalid syntax: `$tok`") + bump(ps, error="Invalid syntax") end - =# end # flisp: (define (valid-modref? e) diff --git a/JuliaSyntax/test/runtests.jl b/JuliaSyntax/test/runtests.jl index 855e8445ab4c5..41745ccb62d99 100644 --- a/JuliaSyntax/test/runtests.jl +++ b/JuliaSyntax/test/runtests.jl @@ -4,12 +4,15 @@ using Test using JuliaSyntax: SourceFile using JuliaSyntax: GreenNode, SyntaxNode, - raw_flags, TRIVIA_FLAG, INFIX_FLAG, + raw_flags, EMPTY_FLAGS, TRIVIA_FLAG, INFIX_FLAG, ERROR_FLAG, children, child, setchild!, SyntaxHead using JuliaSyntax: Kind, @K_str, isliteral, iskeyword, isoperator using JuliaSyntax: highlight -using JuliaSyntax: ParseStream, bump, peek, emit +using JuliaSyntax: ParseStream, + peek, peek_token, + bump, bump_trivia, + emit, emit_diagnostic using JuliaSyntax: ParseState # Shortcuts for defining raw syntax nodes diff --git a/JuliaSyntax/test/simple_parser.jl b/JuliaSyntax/test/simple_parser.jl index 5262ae0ff8aa7..19e160a2dbb22 100644 --- a/JuliaSyntax/test/simple_parser.jl +++ b/JuliaSyntax/test/simple_parser.jl @@ -1,70 +1,146 @@ -# Example parser for a very basic grammar -# -# This is simple but has some problems, most notably that expressions and terms -# aren't recursive so things like `a + b + c` can't be parsed! -# -# expression ::= -# term | term "+" term | term "-" term -# -# term ::= -# atom | atom "*" atom | atom "/" atom -# -# atom ::= -# literal | identifier | "(" expression ")" | "-" atom | "+" atom -# +# Example parser for a very basic language of expressions, calls and function +# definitions. + +function parse_toplevel(st) + mark = position(st) + while true + bump_trivia(st, skip_newlines=true) + if peek(st) == K"EndMarker" + break + end + parse_statement(st) + end + emit(st, mark, K"toplevel") +end + +function parse_statement(st) + mark = position(st) + if peek(st) == K"function" + parse_function_def(st) + else + parse_assignment(st) + end +end + +function parse_function_def(st) + mark = position(st) + @assert peek(st) == K"function" + bump(st, TRIVIA_FLAG) + parse_call(st) + parse_block(st, K"end") + @assert peek(st) == K"end" + bump(st, TRIVIA_FLAG) + emit(st, mark, K"function") +end + +function parse_block(st, closing_kind) + mark = position(st) + while true + bump_trivia(st, skip_newlines=true) + if peek(st) == closing_kind + break + elseif peek(st) == K"EndMarker" + emit_diagnostic(st, error="Unexpecte end of input") + break + end + parse_assignment(st) + end + emit(st, mark, K"block") +end + +function parse_assignment(st) + mark = position(st) + parse_expression(st) + if peek(st) == K"=" + bump(st, TRIVIA_FLAG) + parse_expression(st) + emit(st, mark, K"=") + end +end + +function parse_expression(st) + mark = position(st) + parse_term(st) + while peek(st) in (K"+", K"-") + bump(st) + parse_term(st) + emit(st, mark, K"call", INFIX_FLAG) + end +end + +function parse_term(st) + mark = position(st) + parse_call(st) + while peek(st) in (K"*", K"/") + bump(st) + parse_call(st) + emit(st, mark, K"call", INFIX_FLAG) + end +end + +function parse_call(st) + mark = position(st) + parse_atom(st) + flags = EMPTY_FLAGS + if peek(st) == K"(" + bump(st, TRIVIA_FLAG) + need_comma = false + while true + k = peek(st) + if need_comma && k == K"," + bump(st, TRIVIA_FLAG) + k = peek(st) + need_comma = false + end + if k == K")" + bump(st, TRIVIA_FLAG) + break + elseif k == K"EndMarker" + emit_diagnostic(st, error="Unexpected end of input") + flags = ERROR_FLAG + break + elseif need_comma + emit_diagnostic(st, error="Expected a `,`") + flags = ERROR_FLAG + end + parse_expression(st) + need_comma = true + end + emit(st, mark, K"call", flags) + end +end function parse_atom(st) - p = position(st) + bump_trivia(st, skip_newlines=true) + mark = position(st) k = peek(st) if k == K"Identifier" || isliteral(k) bump(st) elseif k in (K"-", K"+") bump(st) parse_atom(st) - emit(st, p, K"call") + emit(st, mark, K"call") elseif k == K"(" bump(st, TRIVIA_FLAG) parse_expression(st) if peek(st) == K")" bump(st, TRIVIA_FLAG) - # emit(st, p, K"(") + # emit(st, mark, K"(") else - emit(st, p, K"(", + emit(st, mark, K"(", error="Expected `)` following expression") end else bump(st) - emit(st, p, K"Error", + emit(st, mark, K"Error", error="Expected literal, identifier or opening parenthesis") end end -function parse_term(st) - p = position(st) - parse_atom(st) - k = peek(st) - if k in (K"*", K"/") - bump(st) - parse_atom(st) - emit(st, p, K"call", INFIX_FLAG) - end -end - -function parse_expression(st) - p = position(st) - parse_term(st) - k = peek(st) - if k in (K"+", K"-") - bump(st) - parse_term(st) - emit(st, p, K"call", INFIX_FLAG) - end -end - function parse_and_show(production::Function, code) st = ParseStream(code) production(st) - t = JuliaSyntax.to_raw_tree(st) + t = JuliaSyntax.to_raw_tree(st, wrap_toplevel_as_kind=K"Error") show(stdout, MIME"text/plain"(), t, code, show_trivia=true) if !isempty(st.diagnostics) println() @@ -81,5 +157,14 @@ parse_and_show(parse_expression, "(x + a*y) * (b") println() println("Example good parse:") -parse_and_show(parse_expression, "(x + a*y) * b") +parse_and_show(parse_toplevel, + """ + function f(x, y) + z = (x + y) * 2 + z * z + end + + f(1,2) + """) + nothing From e88865b6b9d11677770b076d15ab308965f691dd Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Sun, 12 Dec 2021 21:10:44 +1000 Subject: [PATCH 0241/1109] devdocs: some disorganized musings on error recovery --- JuliaSyntax/README.md | 97 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 97 insertions(+) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index 8f914c97baa40..e98b5b199b58a 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -163,6 +163,103 @@ interpolation syntax. Eg, if you do `:(y + $x)`, lowering expands this to `Core._expr(:call, :+, :y, x)`, but it could expand it to something like `Core._expr(:call, :+, :y, _add_source_symbol(_module_we_are_lowering_into, x))`? +## Error recovery + +Some disorganized musings about error recovery + +Different types of errors seem to occur... + +* Disallowed syntax (such as lack of spaces in conditional expressions) + where we can reasonably just continue parsing the production and emit the + node with an error flag which is otherwise fully formed. In some cases like + parsing infix expressions with a missing tail, emitting a zero width error + token can lead to a fully formed parse tree without the productions up the + stack needing to participate in recovery. +* A token which is disallowed in current context. Eg, `=` in parse_atom, or a + closing token inside an infix expression. Here we can emit a `K"Error"`, but + we can't descend further into the parse tree; we must pop several recursive + frames off. Seems tricky! + +A typical structure is as follows: + +```julia +function parse_foo(ps) + mark = position(ps) + parse_bar(ps) # What if this fails? + if peek(ps) == K"some-token" + bump(ps) + parse_baz(ps) # What if this fails? + emit(ps, mark, K"foo") + end +end +``` + +Emitting plain error tokens are good in unfinished infix expressions: + +```julia +begin + a = x + +end +``` + +The "missing end" problem is tricky, as the intermediate syntax is valid; the +problem is often only obvious until we get to EOF. + +Missing end +```julia +function f() + begin + a = 10 +end + +# <-- Indentation would be wrong if g() was an inner function of f. +function g() +end +``` + +It seems like ideal error recorvery would need to backtrack in this case. For +example: + +- Pop back to the frame which was parsing `f()` +- Backtrack through the parse events until we find a function with indentation + mismatched to the nesting of the parent. +- Reset ParseStream to a parsing checkpoint before `g()` was called +- Emit error and exit the function parsing `f()` +- Restart parsing +- Somehow make sure all of this can't result in infinite recursion 😅 + +For this kind of recovery it sure would be good if we could reify the program +stack into a parser state object... + +Missing commas or closing brackets in nested structures also present the +existing parser with a problem. + +```julia +f(a, + g(b, + c # -- missing comma? + d), + e) +``` + +Again the local indentation might tell a story + +```julia +f(a, + g(b, + c # -- missing closing `)` ? + d) +``` + +But not always! + +```julia +f(a, + g(b, + c # -- missing closing `)` ? + d) +``` + ## Fun research questions * Given the raw tree (the green tree, in Roslyn terminology) can we regress a From af757f339e8cd174cb510d61efeb750322e3258f Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Mon, 13 Dec 2021 15:31:20 +1000 Subject: [PATCH 0242/1109] Consistent error reporting for raw->AST interface Errors are now reported by emitting a node of kind K"error" in the appropriate location and arranged so that the transformation from green tree to AST for a given node kind is always deterministic. This provides a much clearer contract between the parser and the downstream consumers of the green tree. Error nodes can encompass zero or several source code tokens, and be marked as trivia if they need to be ignored as part of the AST transformation. --- JuliaSyntax/README.md | 34 ++++++- JuliaSyntax/src/parse_stream.jl | 53 ++++++---- JuliaSyntax/src/parser.jl | 163 +++++++++++++++++------------- JuliaSyntax/src/syntax_tree.jl | 11 +- JuliaSyntax/src/token_kinds.jl | 5 +- JuliaSyntax/test/parser.jl | 20 +++- JuliaSyntax/test/runtests.jl | 4 +- JuliaSyntax/test/simple_parser.jl | 40 ++++---- 8 files changed, 209 insertions(+), 121 deletions(-) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index e98b5b199b58a..fe59e3d18c6b3 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -129,7 +129,7 @@ example with something like the normal Julia AST's iteration order. By pointing to green tree nodes, AST nodes become tracable back to the original source. -Unlike other languages, designing a new AST is tricky because the existing +Unlike most languages, designing a new AST is tricky because the existing `Expr` is a very public API used in every macro expansion. User-defined macro expansions interpose between the source text and lowering, and using `Expr` looses source information in many ways. @@ -163,7 +163,35 @@ interpolation syntax. Eg, if you do `:(y + $x)`, lowering expands this to `Core._expr(:call, :+, :y, x)`, but it could expand it to something like `Core._expr(:call, :+, :y, _add_source_symbol(_module_we_are_lowering_into, x))`? -## Error recovery +## Parsing + +The goal of the parser is to produce well-formed heirarchical structure from +the source text. For interactive tools we need this to work even when the +source text contains errors, so it's the job of the parser to include the +recovery heuristics necessary to make this work. + +Concretely, the parser in `JuliaSyntax` should always produce a green tree +which is *well formed* in the sense that `GreenNode`s of a given `Kind` have +well-defined layout of children. This means the `GreenNode` to `SyntaxNode` +transformation is deterministic and tools can assume they're working with a +"mostly valid" AST. + +What does "mostly valid" mean? We allow the tree to contain the following types +of error nodes: + +* Missing tokens or nodes may be **added** as placeholders when they're needed + to complete a piece of syntax. For example, we could parse `a + (b *` as + `(call-i a + (call-i * b XXX))` where `XXX` is a placeholder. +* A sequence of unexpected tokens may be **removed** by collecting + them as children of an error node and treating them as syntax trivia during + AST construction. For example, `a + b end * c` could be parsed as the green + tree `(call-i a + b (error end * c))`, and turned into the AST `(call + a b)`. + +We want to encode both these cases in a way which is simplest for downstream +tools to use. This is an open question, but for now we use `K"error"` as the +token head, with the `TRIVIA_FLAG` set for unexpected syntax. + +### Error recovery Some disorganized musings about error recovery @@ -176,7 +204,7 @@ Different types of errors seem to occur... token can lead to a fully formed parse tree without the productions up the stack needing to participate in recovery. * A token which is disallowed in current context. Eg, `=` in parse_atom, or a - closing token inside an infix expression. Here we can emit a `K"Error"`, but + closing token inside an infix expression. Here we can emit a `K"error"`, but we can't descend further into the parse tree; we must pop several recursive frames off. Seems tricky! diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index 01cb799d11afc..8fe7ae65355fe 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -69,9 +69,19 @@ function show_diagnostic(io::IO, diagnostic::Diagnostic, code) print(io, diagnostic.message, ":\n") p = first_byte(diagnostic.text_span) q = last_byte(diagnostic.text_span) - print(io, code[1:p-1]) + if !isvalid(code, q) + # Transform byte range into valid text range + q = prevind(code, q) + end + if q < p || (p == q && code[p] == '\n') + # An empty or invisible range! We expand it symmetrically to make it + # visible. + p = max(firstindex(code), prevind(code, p)) + q = min(lastindex(code), nextind(code, q)) + end + print(io, code[1:prevind(code, p)]) _printstyled(io, code[p:q]; color=(100,40,40)) - print(io, code[q+1:end], '\n') + print(io, code[nextind(code, q):end], '\n') end #------------------------------------------------------------------------------- @@ -202,11 +212,11 @@ Shift the current token into the output as a new text span with the given """ function bump(stream::ParseStream, flags=EMPTY_FLAGS; skip_newlines=false, error=nothing, new_kind=K"Nothing") + emark = position(stream) + _bump_n(stream, _lookahead_index(stream, 1, skip_newlines), flags, new_kind) if !isnothing(error) - flags |= ERROR_FLAG - emit_diagnostic(stream, error=error) + emit(stream, emark, K"error", TRIVIA_FLAG, error=error) end - _bump_n(stream, _lookahead_index(stream, 1, skip_newlines), flags, new_kind) # Return last token location in output if needed for set_flags! return lastindex(stream.spans) end @@ -214,13 +224,18 @@ end """ Bump comments and whitespace tokens preceding the next token """ -function bump_trivia(stream::ParseStream; skip_newlines=false) +function bump_trivia(stream::ParseStream; skip_newlines=false, error=nothing) + emark = position(stream) _bump_n(stream, _lookahead_index(stream, 1, skip_newlines) - 1, EMPTY_FLAGS) + if !isnothing(error) + emit(stream, emark, K"error", TRIVIA_FLAG, error=error) + end return lastindex(stream.spans) end -function bump_invisible(stream::ParseStream, kind) - emit(stream, position(stream), kind) +function bump_invisible(stream::ParseStream, kind, flags=EMPTY_FLAGS; + error=nothing) + emit(stream, position(stream), kind, flags, error=error) return lastindex(stream.spans) end @@ -272,9 +287,6 @@ The `start_mark` of the span should be a previous return value of """ function emit(stream::ParseStream, start_mark::Integer, kind::Kind, flags::RawFlags = EMPTY_FLAGS; error=nothing) - if !isnothing(error) - flags |= ERROR_FLAG - end text_span = TaggedRange(SyntaxHead(kind, flags), start_mark, stream.next_byte-1) if !isnothing(error) push!(stream.diagnostics, Diagnostic(text_span, error)) @@ -302,11 +314,10 @@ function emit_diagnostic(stream::ParseStream, mark=nothing; error, whitespace=fa mark = isnothing(mark) ? first_byte(stream.lookahead[begin_tok_i]) : mark err_end = last_byte(stream.lookahead[end_tok_i]) # It's a bit weird to require supplying a SyntaxHead here... - text_span = TaggedRange(SyntaxHead(K"Error", EMPTY_FLAGS), mark, err_end) + text_span = TaggedRange(SyntaxHead(K"error", EMPTY_FLAGS), mark, err_end) push!(stream.diagnostics, Diagnostic(text_span, error)) end - # Tree construction from the list of text spans held by ParseStream # # Note that this is largely independent of GreenNode, and could easily be @@ -356,7 +367,7 @@ function to_raw_tree(st; wrap_toplevel_as_kind=nothing) elseif !isnothing(wrap_toplevel_as_kind) # Mostly for debugging children = [x.node for x in stack] - return GreenNode(SyntaxHead(wrap_toplevel_as_kind, ERROR_FLAG), children...) + return GreenNode(SyntaxHead(wrap_toplevel_as_kind), children...) else error("Found multiple nodes at top level") end @@ -426,26 +437,24 @@ function bump(ps::ParseState, flags=EMPTY_FLAGS; skip_newlines=nothing, kws...) bump(ps.stream, flags; skip_newlines=skip_nl, kws...) end -function bump_trivia(ps::ParseState; kws...) - bump_trivia(ps.stream; kws...) +function bump_trivia(ps::ParseState, args...; kws...) + bump_trivia(ps.stream, args...; kws...) end """ Bump a new zero-width "invisible" token at the current stream position. These -can be useful in several situations, for example, +can be useful in several situations. +When a token is implied but not present in the source text: * Implicit multiplication - the * is invisible `2x ==> (call 2 * x)` * Docstrings - the macro name is invisible `"doc" foo() = 1 ==> (macrocall (core @doc) . (= (call foo) 1))` * Big integer literals - again, an invisible macro name `11111111111111111111 ==> (macrocall (core @int128_str) . 11111111111111111111)` - -By default if no `kind` is provided then the invisible token stays invisible -and will be discarded unless `reset_token!(kind=...)` is used. """ -function bump_invisible(ps::ParseState, kind=K"TOMBSTONE") - bump_invisible(ps.stream, kind) +function bump_invisible(ps::ParseState, args...; kws...) + bump_invisible(ps.stream, args...; kws...) end function reset_token!(ps::ParseState, args...; kws...) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index e0a908de6beca..da13d6dcb9b17 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -185,7 +185,7 @@ function parse_stmts(ps::ParseState) bump(ps) end if junk_mark != position(ps) - emit(ps, junk_mark, K"Error", + emit(ps, junk_mark, K"error", error="Extra tokens after end of expression") end if do_emit @@ -279,52 +279,50 @@ function parse_pair(ps::ParseState) parse_RtoL(ps, parse_cond, is_prec_pair, false, parse_pair) end +# Parse short form conditional expression +# a ? b : c ==> (if a b c) +# # flisp: (define (parse-cond s) function parse_cond(ps::ParseState) - cond_kind = K"if" mark = position(ps) parse_arrow(ps) t = peek_token(ps) if kind(t) != K"?" return end - cond_flags = EMPTY_FLAGS if !t.had_whitespace - # a? b : c - emit_diagnostic(ps, error="space required before `?` operator") - cond_flags |= ERROR_FLAG + # a? b : c => (if a (error-t) b c) + bump_invisible(ps, K"error", TRIVIA_FLAG, + error="space required before `?` operator") end bump(ps, TRIVIA_FLAG) # ? - t = peek_token_or_emit_incomplete(ps, cond_kind, cond_flags, mark) - kind(t) == K"EndMarker" && return + t = peek_token(ps) if !t.had_whitespace # a ?b : c - emit_diagnostic(ps, error="space required after `?` operator") - cond_flags |= ERROR_FLAG + bump_invisible(ps, K"error", TRIVIA_FLAG, + error="space required after `?` operator") end parse_eq_star(ParseState(ps, range_colon_enabled=false)) t = peek_token(ps) - if kind(t) != K":" - # a ? b: ==> (if-e a b) - emit(ps, mark, K"if", cond_flags, - error="colon expected in `?` expression") - return - end if !t.had_whitespace - # a ? b: c - emit_diagnostic(ps, error="space required before `:` in `?` expression") - cond_flags |= ERROR_FLAG + # a ? b: c ==> (if a [ ] [?] [ ] b (error-t) [:] [ ] c) + bump_invisible(ps, K"error", TRIVIA_FLAG, + error="space required before `:` in `?` expression") + end + if kind(t) == K":" + bump(ps, TRIVIA_FLAG) + else + # a ? b c ==> (if a b (error) c) + bump_invisible(ps, K"error", TRIVIA_FLAG, error="`:` expected in `?` expression") end - bump(ps, TRIVIA_FLAG) # : - t = peek_token_or_emit_incomplete(ps, cond_kind, cond_flags, mark) - kind(t) == K"EndMarker" && return + t = peek_token(ps) if !t.had_whitespace - # a ? b :c - emit_diagnostic(ps, error="space required after `:` in `?` expression") - cond_flags |= ERROR_FLAG + # a ? b :c ==> (if a [ ] [?] [ ] b [ ] [:] (error-t) c) + bump_invisible(ps, K"error", TRIVIA_FLAG, + error="space required after `:` in `?` expression") end parse_eq_star(ps) - emit(ps, mark, K"if", cond_flags) + emit(ps, mark, K"if") end # Parse arrows @@ -425,30 +423,46 @@ function parse_range(ps::ParseState) # [1 2:3 :a] ==> (vcat 1 (call-i 2 : 3) (quote a)) break end + t2 = peek_token(ps,2) + if kind(t2) in (K"<", K">") && !t2.had_whitespace + # Error heuristic: we found `:>` or `:<` which are invalid lookalikes + # for `<:` and `>:`. Attempt to recover by treating them as a + # comparison operator. + # a :> b ==> (call-i a (error : >) b) + bump_trivia(ps) + emark = position(ps) + bump(ps) # K":" + ks = untokenize(peek(ps)) + bump(ps) # K"<" or K">" + emit(ps, emark, K"error", + error="Invalid `:$ks` found, maybe replace with `$ks:`") + parse_expr(ps) + emit(ps, mark, K"call", INFIX_FLAG) + break + end n_colons += 1 bump(ps, n_colons == 1 ? EMPTY_FLAGS : TRIVIA_FLAG) - t2 = peek_token(ps) - if is_closing_token(ps, kind(t2)) - # 1: } ==> (call-i-e 1 :) - # 1:2: } ==> (call-i-e 1 : 2) - emit(ps, mark, K"call", INFIX_FLAG, - error="missing last argument in range expression") + t = peek_token(ps) + if is_closing_token(ps, kind(t)) + # 1: } ==> (call-i 1 : (error)) + # 1:2: } ==> (call-i 1 : 2 (error)) + bump_invisible(ps, K"error", + error="missing last argument in range expression") + emit(ps, mark, K"call", INFIX_FLAG) emit_diagnostic(ps, error="found unexpected closing token") return end - if t2.had_newline + if t.had_newline # Error message for people coming from python # === # 1: # 2 - # ==> (call-i-e 1 :) - emit(ps, mark, K"call", INFIX_FLAG|ERROR_FLAG) - emit_diagnostic(ps, error="line break after `:` in range expression") + # ==> (call-i 1 : (error)) + emit_diagnostic(ps, whitespace=true, + error="line break after `:` in range expression") + bump_invisible(ps, K"error") + emit(ps, mark, K"call", INFIX_FLAG) return - elseif kind(t2) in (K"<", K">") && !t2.had_whitespace - # :> and :< are not operators - ks = untokenize(kind(t2)) - emit_diagnostic(ps, error="Invalid `:$ks` found - did you mean `$ks:`?") end parse_expr(ps) if n_colons == 2 @@ -546,6 +560,9 @@ end function parse_unary_subtype(ps::ParseState) k = peek(ps, skip_newlines=true) if k == K"EndMarker" + # FIXME - should be in parse_atom!! + bump_invisible(ps, K"error", error="expected identifier") + return end parse_where(ps, parse_juxtapose) #TODO("parse_unary_subtype unimplemented") @@ -733,16 +750,16 @@ end # flisp: parse-def function parse_def(ps::ParseState, is_func, anon) mark = position(ps) - flags = EMPTY_FLAGS k = peek(ps) - parse_unary_prefix(ps) if (is_func && iskeyword(k)) || is_initial_reserved_word(ps, k) # Forbid things like - # function begin() end ==> (function-e begin (call)) - emit_diagnostic(ps, mark, - error="invalid $(is_func ? "function" : "macro") name") - # FIXME: Which node does this error go with? - flags |= ERROR_FLAGS + # function begin() end ==> (function (call (error begin))) + emark = position(ps) + bump(ps) + emit(ps, emark, K"error", + error="invalid $(is_func ? "function" : "macro") name") + else + parse_unary_prefix(ps) end parse_call_chain(ps, mark, false) if is_func && peek(ps) == K"::" @@ -1191,47 +1208,55 @@ end # # flisp: parse-atom function parse_atom(ps::ParseState, check_identifiers=true) - atom_mark = position(ps) bump_trivia(ps, skip_newlines=true) + atom_mark = position(ps) leading_kind = peek(ps) - # TODO: Reorder to put most likely tokens first + # TODO: Reorder to put most likely tokens first. This can be done because + # our tokens are richer in information than the flisp parser. if leading_kind == K":" # symbol/expression quote + # :foo => (quote foo) t = peek_token(ps, 2) k = kind(t) if is_closing_token(ps, k) && (!iskeyword(k) || t.had_whitespace) # : is a literal colon in some circumstances - # (:) ==> : - # begin : end ==> (block :) + # :) ==> : + # : end ==> : bump(ps) # K":" return end bump(ps, TRIVIA_FLAG) # K":" - # TODO: It's clunky to have to track flags here when we'll emit an - # error diagnostic. - flags = EMPTY_FLAGS if t.had_whitespace - emit_diagnostic(ps, error="whitespace not allowed after `:` used for quoting", - whitespace=true) - flags = ERROR_FLAG - elseif k == K"NewlineWs" - emit_diagnostic(ps, error="newline not allowed after `:` used for quoting") - flags = ERROR_FLAG + # : a ==> (quote (error-t) a)) + # === + # : + # a + # ==> (quote (error)) + bump_trivia(ps, skip_newlines=true, + error="whitespace not allowed after `:` used for quoting") + # Heuristic recovery + if kind(t) == K"NewlineWs" + bump_invisible(ps, K"error") + else + bump(ps) + end else # Being inside quote makes `end` non-special again. issue #27690 + # a[:(end)] ==> (ref a (quote (error-t end))) ps1 = ParseState(ps, end_symbol=false) parse_atom(ps1, false) - emit(ps1, atom_mark, K"quote", flags) end + emit(ps, atom_mark, K"quote") elseif leading_kind == K"=" - emit_diagnostic(ps, error="unexpected `=`") - bump(ps, ERROR_FLAG) + emark = position(ps) + bump(ps) + emit(ps, emark, K"error", TRIVIA_FLAG) elseif leading_kind in (K"Identifier", K"VarIdentifier") bump(ps) elseif isoperator(leading_kind) || iskeyword(leading_kind) # Operators and keywords are generally turned into identifiers if used # as atoms. - if check_identifiers && (is_syntactic_operator(ps) || + if check_identifiers && (is_syntactic_operator(leading_kind) || is_closing_token(ps, leading_kind)) bump(ps, error="Invalid identifier") end @@ -1243,7 +1268,6 @@ function parse_atom(ps::ParseState, check_identifiers=true) TODO("parse_cat unimplemented") parse_cat(ps, tok, K"]", ps.end_symbol) elseif leading_kind == K"{" # cat expression - take_token!(ps) TODO("""parse_cat(ps, K"}", )""") elseif leading_kind ∈ (K"String", K"TripleString") parse_string_literal(ps) @@ -1259,7 +1283,8 @@ function parse_atom(ps::ParseState, check_identifiers=true) bump(ps) elseif is_closing_token(ps, leading_kind) # Leave closing token in place for other productions to - emit_diagnostic("Unexpected closing token") + # recover with + emit_diagnostic(ps, error="Unexpected closing token") else bump(ps, error="Invalid syntax") end @@ -1307,10 +1332,10 @@ function parse_docstring(ps::ParseState, down=parse_eq) # TODO? This is not quite equivalent to the flisp parser which accepts # more than just a string. For example: #! ("doc") foo ==> (macrocall core_@doc "doc" foo) - # TODO: Also, all these TOMBSTONEs are inefficient. Perhaps we can improve - # things? + # TODO: Also, all these TOMBSTONEs seem kind of inefficient. Perhaps we can + # improve things? maybe_doc = peek(ps) in (K"String", K"TripleString") - atdoc_mark = bump_invisible(ps) + atdoc_mark = bump_invisible(ps, K"TOMBSTONE") down(ps) if maybe_doc is_doc = true diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index b49a02c01dbcc..f6d03b8251015 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -7,7 +7,7 @@ const RawFlags = UInt32 EMPTY_FLAGS = 0x00000000 TRIVIA_FLAG = 0x00000001 INFIX_FLAG = 0x00000002 -ERROR_FLAG = 0x80000000 +# ERROR_FLAG = 0x80000000 struct SyntaxHead kind::Kind @@ -33,7 +33,7 @@ flags(node::GreenNode{SyntaxHead}) = head(node).flags istrivia(node::GreenNode{SyntaxHead}) = flags(node) & TRIVIA_FLAG != 0 isinfix(node::GreenNode{SyntaxHead}) = flags(node) & INFIX_FLAG != 0 -iserror(node::GreenNode{SyntaxHead}) = flags(node) & ERROR_FLAG != 0 +iserror(node::GreenNode{SyntaxHead}) = kind(node) == K"error" #------------------------------------------------------------------------------- # AST interface, built on top of raw tree @@ -70,8 +70,13 @@ function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, position::In val = Symbol(val_str) elseif k == K"core_@doc" val = GlobalRef(Core, :var"@doc") + elseif k == K"core_@cmd" + val = GlobalRef(Core, :var"@cmd") + elseif k in (K"error", K"Nothing") + val = nothing else - error("Can't parse literal of kind $k") + @error "Leaf node of kind $k unparsed" + val = nothing end return SyntaxNode(source, raw, position, nothing, :leaf, val) else diff --git a/JuliaSyntax/src/token_kinds.jl b/JuliaSyntax/src/token_kinds.jl index 4af753bce456c..471f1b3daf750 100644 --- a/JuliaSyntax/src/token_kinds.jl +++ b/JuliaSyntax/src/token_kinds.jl @@ -3,7 +3,7 @@ const _str_to_kind = let Ts = TzTokens Dict([ "EndMarker" => Ts.ENDMARKER -"Error" => Ts.ERROR +"error" => Ts.ERROR "Comment" => Ts.COMMENT "Whitespace" => Ts.WHITESPACE "Identifier" => Ts.IDENTIFIER @@ -850,6 +850,7 @@ Dict([ "comprehension" => Ts.COMPREHENSION "typed_comprehension" => Ts.TYPED_COMPREHENSION "END_SYNTAX_KINDS" => Ts.end_syntax_kinds + ]) end @@ -869,6 +870,8 @@ for kw in split("""abstract baremodule begin break catch const toplevel tuple ref vect braces bracescat hcat vcat ncat typed_hcat typed_vcat typed_ncat generator flatten comprehension typed_comprehension + + error """) _kind_to_str_unique[_str_to_kind[kw]] = kw end diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index a009570dee9d8..4d6922180bdb3 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -1,8 +1,8 @@ function test_parse(production, code) stream = ParseStream(code) production(JuliaSyntax.ParseState(stream)) - t = JuliaSyntax.to_raw_tree(stream) - @test Text(sprint(JuliaSyntax.show_diagnostics, stream, code)) == Text("") + t = JuliaSyntax.to_raw_tree(stream, wrap_toplevel_as_kind=K"error") + # @test Text(sprint(JuliaSyntax.show_diagnostics, stream, code)) == Text("") s = SyntaxNode(SourceFile(code), t) sprint(show, MIME("text/x.sexpression"), s) end @@ -23,6 +23,12 @@ tests = [ ], JuliaSyntax.parse_cond => [ "a ? b : c" => "(if :a :b :c)" + # Following are errors but should recover + "a? b : c" => "(if :a :b :c)" + "a ?b : c" => "(if :a :b :c)" + "a ? b: c" => "(if :a :b :c)" + "a ? b :c" => "(if :a :b :c)" + "a ? b c" => "(if :a :b :c)" #"a ?\nb : c" => "(if :a :b :c)" #"a ? b :\nc" => "(if :a :b :c)" ], @@ -54,6 +60,7 @@ tests = [ "1:2" => "(call :(:) 1 2)" "1:2:3" => "(call :(:) 1 2 3)" "a:b:c:d:e" => "(call :(:) (call :(:) :a :b :c) :d :e)" + "a :< b" => "(call (error :(:) :<) :a :b)" ], JuliaSyntax.parse_range => [ "a..b" => "(call :.. :a :b)" @@ -87,10 +94,19 @@ tests = [ #"&a" => "(& :a)" #"::a" => "(:: :a)" #"\$a" => "(\$ :a)" + #"\$\$a" => "(\$ (\$ :a))" ], JuliaSyntax.parse_docstring => [ "\"doc\" foo" => "(macrocall :(Core.var\"@doc\") \"doc\" :foo)" ], + JuliaSyntax.parse_atom => [ + ":foo" => "(quote :foo)" + # Literal colons + ":)" => ":(:)" + ": end" => ":(:)" + # Errors + ": foo" => "(quote :foo)" + ] ] @testset "$production" for (production, test_specs) in tests diff --git a/JuliaSyntax/test/runtests.jl b/JuliaSyntax/test/runtests.jl index 41745ccb62d99..d756c6acf2f26 100644 --- a/JuliaSyntax/test/runtests.jl +++ b/JuliaSyntax/test/runtests.jl @@ -4,14 +4,14 @@ using Test using JuliaSyntax: SourceFile using JuliaSyntax: GreenNode, SyntaxNode, - raw_flags, EMPTY_FLAGS, TRIVIA_FLAG, INFIX_FLAG, ERROR_FLAG, + raw_flags, EMPTY_FLAGS, TRIVIA_FLAG, INFIX_FLAG, children, child, setchild!, SyntaxHead using JuliaSyntax: Kind, @K_str, isliteral, iskeyword, isoperator using JuliaSyntax: highlight using JuliaSyntax: ParseStream, peek, peek_token, - bump, bump_trivia, + bump, bump_trivia, bump_invisible, emit, emit_diagnostic using JuliaSyntax: ParseState diff --git a/JuliaSyntax/test/simple_parser.jl b/JuliaSyntax/test/simple_parser.jl index 19e160a2dbb22..699b44f01a7fd 100644 --- a/JuliaSyntax/test/simple_parser.jl +++ b/JuliaSyntax/test/simple_parser.jl @@ -28,19 +28,17 @@ function parse_function_def(st) bump(st, TRIVIA_FLAG) parse_call(st) parse_block(st, K"end") - @assert peek(st) == K"end" - bump(st, TRIVIA_FLAG) emit(st, mark, K"function") end -function parse_block(st, closing_kind) - mark = position(st) +function parse_block(st, closing_kind, mark=position(st)) while true bump_trivia(st, skip_newlines=true) if peek(st) == closing_kind + bump(st, TRIVIA_FLAG) break elseif peek(st) == K"EndMarker" - emit_diagnostic(st, error="Unexpecte end of input") + emit_diagnostic(st, error="Unexpected end of input") break end parse_assignment(st) @@ -81,7 +79,6 @@ end function parse_call(st) mark = position(st) parse_atom(st) - flags = EMPTY_FLAGS if peek(st) == K"(" bump(st, TRIVIA_FLAG) need_comma = false @@ -97,16 +94,14 @@ function parse_call(st) break elseif k == K"EndMarker" emit_diagnostic(st, error="Unexpected end of input") - flags = ERROR_FLAG break elseif need_comma - emit_diagnostic(st, error="Expected a `,`") - flags = ERROR_FLAG + bump_invisible(st, K"error", TRIVIA_TOKEN, error="Expected a `,`") end parse_expression(st) need_comma = true end - emit(st, mark, K"call", flags) + emit(st, mark, K"call") end end @@ -127,12 +122,15 @@ function parse_atom(st) bump(st, TRIVIA_FLAG) # emit(st, mark, K"(") else - emit(st, mark, K"(", - error="Expected `)` following expression") + bump_invisible(st, K"error", TRIVIA_FLAG, + error="Expected `)` following expression") end + elseif k == K"begin" + bump(st, TRIVIA_FLAG) + parse_block(st, K"end", mark) else bump(st) - emit(st, mark, K"Error", + emit(st, mark, K"error", error="Expected literal, identifier or opening parenthesis") end end @@ -140,7 +138,7 @@ end function parse_and_show(production::Function, code) st = ParseStream(code) production(st) - t = JuliaSyntax.to_raw_tree(st, wrap_toplevel_as_kind=K"Error") + t = JuliaSyntax.to_raw_tree(st, wrap_toplevel_as_kind=K"error") show(stdout, MIME"text/plain"(), t, code, show_trivia=true) if !isempty(st.diagnostics) println() @@ -151,20 +149,24 @@ function parse_and_show(production::Function, code) t end -println() -println("Example diagnostics:") -parse_and_show(parse_expression, "(x + a*y) * (b") - println() println("Example good parse:") parse_and_show(parse_toplevel, """ function f(x, y) - z = (x + y) * 2 + z = x - y + begin + a + b + end z * z end f(1,2) """) +println() +println("Example diagnostics:") +parse_and_show(parse_expression, "(x + a*y) * (b") + nothing From 814085367c59f0755713fb6339c49b92436f34bc Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Wed, 15 Dec 2021 14:56:54 +1000 Subject: [PATCH 0243/1109] Porting of various parsing rules * Parentheses; various tuples, blocks and arglists * Justaposition (ow, my eyes!) * Space separated expressions for macros * Macros in parse_atom * Utils for conversion to Expr --- JuliaSyntax/src/green_tree.jl | 2 + JuliaSyntax/src/parse_stream.jl | 145 ++++++--- JuliaSyntax/src/parser.jl | 520 +++++++++++++++++++++++++------- JuliaSyntax/src/source_files.jl | 6 +- JuliaSyntax/src/syntax_tree.jl | 70 ++++- JuliaSyntax/src/token_kinds.jl | 9 +- JuliaSyntax/src/tokens.jl | 45 ++- JuliaSyntax/test/parser.jl | 78 ++++- 8 files changed, 678 insertions(+), 197 deletions(-) diff --git a/JuliaSyntax/src/green_tree.jl b/JuliaSyntax/src/green_tree.jl index 403efa27f9d6b..ea2b9c2eafa2b 100644 --- a/JuliaSyntax/src/green_tree.jl +++ b/JuliaSyntax/src/green_tree.jl @@ -63,6 +63,8 @@ span(node::GreenNode) = node.span head(node::GreenNode) = node.head # Predicates +# +# FIXME: All predicates should be consistently named, either with istrivia or is_trivia. istrivia(node::GreenNode) = istrivia(node.head) iserror(node::GreenNode) = iserror(node.head) diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index 8fe7ae65355fe..053bfd10067a9 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -87,10 +87,10 @@ end #------------------------------------------------------------------------------- """ ParseStream provides an IO interface for the parser. It -- Wraps the lexer from Tokenize.jl with a short lookahead buffer +- Wraps the lexer with a lookahead buffer - Removes whitespace and comment tokens, shifting them into the output implicitly -This is simililar to rust-analyzer's +This is simililar in spirit to rust-analyzer's [TextTreeSink](https://github.com/rust-analyzer/rust-analyzer/blob/4691a0647b2c96cc475d8bbe7c31fe194d1443e7/crates/syntax/src/parsing/text_tree_sink.rs) """ mutable struct ParseStream @@ -118,6 +118,15 @@ function Base.show(io::IO, mime::MIME"text/plain", stream::ParseStream) println(io, "ParseStream at position $(stream.next_byte)") end +function show_diagnostics(io::IO, stream::ParseStream, code) + for d in stream.diagnostics + show_diagnostic(io, d, code) + end +end + +#------------------------------------------------------------------------------- +# Stream input interface - the peek_* family of functions + # Buffer up until the next non-whitespace token. # This can buffer more than strictly necessary when newlines are significant, # but this is not a big problem. @@ -127,7 +136,6 @@ function _buffer_lookahead_tokens(stream::ParseStream) while true raw = Tokenize.Lexers.next_token(stream.lexer) k = TzTokens.exactkind(raw) - was_whitespace = k in (K"Whitespace", K"Comment", K"NewlineWs") was_newline = k == K"NewlineWs" had_whitespace |= was_whitespace @@ -180,6 +188,52 @@ function peek(stream::ParseStream, n::Integer=1, skip_newlines=false) kind(peek_token(stream, n, skip_newlines)) end +""" +Return true if the next token equals the string `str` + +This is a hack (ideally the tokenizer would provide tokens for any +identifiers which need special treatment) But occasionally the parser needs +access to interpret normal identifiers as contextural keywords or other special +syntactic constructs. + +For example, the special parsing rules for `@doc` line contination :-/ +""" +function peek_equal_to(stream::ParseStream, str::String) + t = peek_token(stream) + if span(t) != ncodeunits(str) + return false + end + # Humongous but should-be-allocation-free hack: peek at the underlying data + # buffer. TODO: Attach the code string to the stream so we don't have to + # dig into the lexer? + buf = stream.lexer.io.data + cbuf = codeunits(str) + for i = 1:span(t) + if buf[first_byte(t) + i - 1] != cbuf[i] + return false + end + end + return true +end + +""" +Return the kind of the previous non-trivia span which was inserted. + +This is a bit hacky but can be handy on occasion. +""" +function peek_behind(stream::ParseStream) + for i = length(stream.spans):-1:1 + s = stream.spans[i] + if !istrivia(head(s)) + return kind(s) + end + end + return K"Nothing" +end + +#------------------------------------------------------------------------------- +# Stream output interface - the `bump_*` and `emit_*` family of functions + # Bump the next `n` tokens # flags and new_kind are applied to any non-trivia tokens function _bump_n(stream::ParseStream, n::Integer, flags, new_kind=K"Nothing") @@ -217,14 +271,16 @@ function bump(stream::ParseStream, flags=EMPTY_FLAGS; skip_newlines=false, if !isnothing(error) emit(stream, emark, K"error", TRIVIA_FLAG, error=error) end - # Return last token location in output if needed for set_flags! + # Return last token location in output if needed for reset_token! return lastindex(stream.spans) end """ Bump comments and whitespace tokens preceding the next token + +**Skips newlines** by default. Set skip_newlines=false to avoid that. """ -function bump_trivia(stream::ParseStream; skip_newlines=false, error=nothing) +function bump_trivia(stream::ParseStream; skip_newlines=true, error=nothing) emark = position(stream) _bump_n(stream, _lookahead_index(stream, 1, skip_newlines) - 1, EMPTY_FLAGS) if !isnothing(error) @@ -233,6 +289,12 @@ function bump_trivia(stream::ParseStream; skip_newlines=false, error=nothing) return lastindex(stream.spans) end +""" +Bump an invisible zero-width token into the output + +This is useful when surrounding syntax implies the presence of a token. For +example, `2x` means `2*x` via the juxtoposition rules. +""" function bump_invisible(stream::ParseStream, kind, flags=EMPTY_FLAGS; error=nothing) emit(stream, position(stream), kind, flags, error=error) @@ -240,12 +302,11 @@ function bump_invisible(stream::ParseStream, kind, flags=EMPTY_FLAGS; end """ -Hack: Reset kind or flags of an existing token in the output stream +Reset kind or flags of an existing token in the output stream -This is necessary on some occasions when we don't know whether a token will -have TRIVIA_FLAG set until after consuming more input, or when we need to -insert a invisible token like core_@doc but aren't yet sure it'll be needed - -see bump_invisible() +This is a hack, but necessary on some occasions +* When some trailing syntax may change the kind or flags of the token +* When an invisible token might be required - see bump_invisible with K"TOMBSTONE" """ function reset_token!(stream::ParseStream, mark; kind=nothing, flags=nothing) @@ -256,34 +317,16 @@ function reset_token!(stream::ParseStream, mark; first_byte(text_span), last_byte(text_span)) end -#= -function accept(stream::ParseStream, k::Kind) - if peek(stream) != k - return false - else - bump(stream, TRIVIA_FLAG) - end -end -=# - -#= -function bump(stream::ParseStream, k::Kind, flags=EMPTY_FLAGS) - @assert peek(stream) == k - bump(stream, flags) -end -=# - function Base.position(stream::ParseStream) return stream.next_byte end """ - emit(stream, start_mark, kind, flags = EMPTY_FLAGS; error=nothing) + emit(stream, mark, kind, flags = EMPTY_FLAGS; error=nothing) -Emit a new text span into the output which covers source bytes from -`start_mark` to the end of the most recent token which was `bump()`'ed. -The `start_mark` of the span should be a previous return value of -`position()`. +Emit a new text span into the output which covers source bytes from `mark` to +the end of the most recent token which was `bump()`'ed. The starting `mark` +should be a previous return value of `position()`. """ function emit(stream::ParseStream, start_mark::Integer, kind::Kind, flags::RawFlags = EMPTY_FLAGS; error=nothing) @@ -318,6 +361,8 @@ function emit_diagnostic(stream::ParseStream, mark=nothing; error, whitespace=fa push!(stream.diagnostics, Diagnostic(text_span, error)) end + +#------------------------------------------------------------------------------- # Tree construction from the list of text spans held by ParseStream # # Note that this is largely independent of GreenNode, and could easily be @@ -367,18 +412,12 @@ function to_raw_tree(st; wrap_toplevel_as_kind=nothing) elseif !isnothing(wrap_toplevel_as_kind) # Mostly for debugging children = [x.node for x in stack] - return GreenNode(SyntaxHead(wrap_toplevel_as_kind), children...) + return GreenNode(SyntaxHead(wrap_toplevel_as_kind, EMPTY_FLAGS), children...) else error("Found multiple nodes at top level") end end -function show_diagnostics(io::IO, stream::ParseStream, code) - for d in stream.diagnostics - show_diagnostic(io, d, code) - end -end - #------------------------------------------------------------------------------- """ ParseState carries parser context as we recursively descend into the parse @@ -422,6 +461,26 @@ function ParseState(ps::ParseState; range_colon_enabled=nothing, where_enabled === nothing ? ps.where_enabled : where_enabled) end +# Functions to change parse state + +function with_normal_context(ps::ParseState) + f(ParseState(ps, + range_colon_enabled=true, + space_sensitive=false, + where_enabled=false, + for_generator=false, + end_symbol=false, + whitespace_newline=false)) +end + +function with_space_sensitive(f::Function, ps::ParseState) + f(ParseState(ps, + space_sensitive=true, + whitespace_newline=false)) +end + +# Convenient wrappers for ParseStream + function peek(ps::ParseState, n=1; skip_newlines=nothing) skip_nl = isnothing(skip_newlines) ? ps.whitespace_newline : skip_newlines peek(ps.stream, n, skip_nl) @@ -432,6 +491,14 @@ function peek_token(ps::ParseState, n=1; skip_newlines=nothing) peek_token(ps.stream, n, skip_nl) end +function peek_equal_to(ps::ParseState, args...) + peek_equal_to(ps.stream, args...) +end + +function peek_behind(ps::ParseState) + peek_behind(ps.stream) +end + function bump(ps::ParseState, flags=EMPTY_FLAGS; skip_newlines=nothing, kws...) skip_nl = isnothing(skip_newlines) ? ps.whitespace_newline : skip_newlines bump(ps.stream, flags; skip_newlines=skip_nl, kws...) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index da13d6dcb9b17..8862e541a8f87 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -1,18 +1,32 @@ #------------------------------------------------------------------------------- - # Parser Utils -# Like flisp: require-token +# Bump an expected closing token. If not found, discard unexpected tokens +# until we find it or another closing token. # -# * Skips newlines searching for the next token -# * Emits an error node if we've hit the end of the input -function peek_token_or_emit_incomplete(ps::ParseState, k, flags, mark) - t = peek_token(ps, skip_newlines=true) - if kind(t) == K"EndMarker" - emit(ps, mark, k, flags, - error="incomplete: premature end of input") +# Crude recovery heuristic: bump any tokens which aren't block or bracket +# closing tokens. +function bump_closing_token(ps, closing_kind) + bump_trivia(ps) + if peek(ps) == closing_kind + bump(ps, TRIVIA_FLAG) + return + end + # We didn't find the closing token. Read ahead in the stream + mark = position(ps) + while true + k = peek(ps) + if is_closing_token(ps, k) && !(k in (K",", K";")) + break + end + bump(ps) + end + # mark as trivia => ignore in AST. + emit(ps, mark, K"error", TRIVIA_FLAG, + error="Expected `$(untokenize(closing_kind))` but got unexpected tokens") + if peek(ps) == closing_kind + bump(ps, TRIVIA_FLAG) end - return t end function TODO(str) @@ -29,13 +43,18 @@ function bumpTODO(ps::ParseState) end end -function is_closing_token(ps::ParseState, t) - k = kind(t) +function is_closing_token(ps::ParseState, k) + k = kind(k) return k in (K"else", K"elseif", K"catch", K"finally", K",", K")", K"]", K"}", K";", K"EndMarker") || (k == K"end" && !ps.end_symbol) end +# Closing token which isn't a keyword +function is_non_keyword_closer(k) + kind(k) in (K",", K")", K"]", K"}", K";", K"EndMarker") +end + function is_initial_reserved_word(ps::ParseState, t) k = kind(t) is_iresword = k in ( @@ -47,6 +66,12 @@ function is_initial_reserved_word(ps::ParseState, t) return is_iresword && !(k == K"begin" && ps.end_symbol) end +function is_block_form(t) + kind(t) in (K"block", K"quote", K"if", K"for", K"while", + K"let", K"function", K"macro", K"abstract", + K"primitive", K"struct", K"try", K"module") +end + function is_syntactic_operator(t) k = kind(t) return k in (K"&&", K"||", K".", K"...", K"->") || @@ -66,11 +91,13 @@ end #------------------------------------------------------------------------------- # Parser # -# The definitions and top-level comments here were automatically generated to -# match the structure of Julia's official flisp-based parser. +# The definitions and top-level comments here were copied to match the +# structure of Julia's official flisp-based parser. # # This is to make both codebases mutually understandable and make porting # changes simple. +# +# The implementation using the ParseStream interface # parse left-to-right binary operator @@ -118,12 +145,12 @@ end # # flisp: (define (parse-Nary s down ops head closer? add-linenums) function parse_Nary(ps::ParseState, down, delimiters, closing_tokens) - bump_trivia(ps, skip_newlines=true) + bump_trivia(ps) k = peek(ps) if k in closing_tokens return true end - # Skip leading operator + # Skip leading delimiter n_delims = 0 if k in delimiters bump(ps, TRIVIA_FLAG) @@ -161,8 +188,7 @@ end # ==> (block a b) # # flisp: (define (parse-block s (down parse-eq)) -function parse_block(ps::ParseState, down=parse_eq) - mark = position(ps) +function parse_block(ps::ParseState, down=parse_eq, mark=position(ps)) if parse_Nary(ps, down, (K"NewlineWs", K";"), (K"end", K"else", K"elseif", K"catch", K"finally")) emit(ps, mark, K"block") @@ -195,13 +221,13 @@ end # flisp: (define (parse-eq s) (parse-assignment s parse-comma)) function parse_eq(ps::ParseState) - parse_assignment(ps, parse_comma) + parse_assignment(ps, parse_comma, false) end # parse_eq_star is used where commas are special, for example in an argument list # # flisp: (define (parse-eq* s) -function parse_eq_star(ps::ParseState) +function parse_eq_star(ps::ParseState, equals_is_kw=false) k = peek(ps) k2 = peek(ps,2) if (isliteral(k) || k == K"Identifier") && k2 in (K",", K")", K"}", K"]") @@ -209,7 +235,7 @@ function parse_eq_star(ps::ParseState) # simple token followed by a common closing token bump(ps) else - parse_assignment(ps, parse_pair) + parse_assignment(ps, parse_pair, equals_is_kw) end end @@ -218,8 +244,10 @@ function is_eventually_call(ex) TODO("is_eventually_call unimplemented") end +# a = b ==> (= a b) +# # flisp: (define (parse-assignment s down) -function parse_assignment(ps::ParseState, down) +function parse_assignment(ps::ParseState, down, equals_is_kw::Bool) mark = position(ps) down(ps) k = peek(ps) @@ -232,14 +260,16 @@ function parse_assignment(ps::ParseState, down) # Prefix operator ~x ? TODO("parse_assignment... ~ not implemented") else - parse_assignment(ps, down) # ~ is the only non-syntactic assignment-precedence operator. + # a ~ b ==> (call-i a ~ b) + parse_assignment(ps, down, equals_is_kw) emit(ps, mark, K"call", INFIX_FLAG) end else bump(ps, TRIVIA_FLAG) - parse_assignment(ps, down) - emit(ps, mark, k) + parse_assignment(ps, down, equals_is_kw) + result_k = (k == K"=" && equals_is_kw) ? K"kw" : k + emit(ps, mark, result_k) end end @@ -429,7 +459,7 @@ function parse_range(ps::ParseState) # for `<:` and `>:`. Attempt to recover by treating them as a # comparison operator. # a :> b ==> (call-i a (error : >) b) - bump_trivia(ps) + bump_trivia(ps, skip_newlines=false) emark = position(ps) bump(ps) # K":" ks = untokenize(peek(ps)) @@ -597,18 +627,62 @@ function parse_where(ps::ParseState, down) end end -# given an expression and the next token, is there a juxtaposition -# operator between them? +# given the previous expression kind and the next token, is there a +# juxtaposition operator between them? # -# flisp: (define (juxtapose? s expr t) -function is_juxtapose(s, expr, t) - TODO("is_juxtapose unimplemented") +# flisp: juxtapose? +function is_juxtapose(ps, prev_k, t) + k = kind(t) + + return !t.had_whitespace && + (is_number(prev_k) || + (!is_number(k) && # disallow "x.3" and "sqrt(2)2" + k != K"@" && # disallow "x@time" + !(is_block_form(prev_k) || + is_syntactic_unary_op(prev_k) || + is_initial_reserved_word(ps, prev_k) ))) && + (!isoperator(k) || is_radical_op(k)) && + !is_closing_token(ps, k) && + !is_initial_reserved_word(ps, k) end -# flisp: (define (parse-juxtapose s) +# Juxtoposition. Ugh! +# +# 2x ==> (call-i 2 * x) +# 2(x) ==> (call-i 2 * x) +# (2)(3)x ==> (call-i 2 * 3 x) +# (x-1)y ==> (call-i (- x 1) * y) +# +# flisp: parse-juxtapose function parse_juxtapose(ps::ParseState) + mark = position(ps) parse_unary(ps) - #TODO("parse_juxtapose unimplemented") + n_terms = 1 + while true + prev_kind = peek_behind(ps) + t = peek_token(ps) + if !is_juxtapose(ps, prev_kind, t) + break + end + bump_invisible(ps, K"*") + if is_string(prev_kind) || is_string(t) + # issue #20575 + # + # "a""b" ==> (call-i "a" * (error) "b") + # "a"x ==> (call-i "a" * (error) x) + bump_invisible(ps, K"error", TRIVIA_FLAG, + error="cannot juxtapose string literal") + end + if is_radical_op(t) + parse_unary(ps) + else + parse_factor(ps) + end + n_terms += 1 + end + if n_terms > 1 + emit(ps, mark, K"call", INFIX_FLAG) + end end # flisp: (define (maybe-negate op num) @@ -640,7 +714,8 @@ end # # flisp: parse-factor function parse_factor(ps::ParseState) - TODO("parse_factor unimplemented") + # FIXME!! + bumpTODO(ps) ; return mark = position(ps) parse_unary_prefix(ps) parse_factor_with_initial_ex(ps, mark) @@ -702,7 +777,7 @@ function parse_call(ps::ParseState) parse_call_with_initial_ex(ps, mark) end -# flisp: (define (parse-call-with-initial-ex s ex tok) +# flisp: parse-call-with-initial-ex function parse_call_with_initial_ex(ps::ParseState, mark) k = peek(ps) if is_initial_reserved_word(ps, k) || k in (K"mutable", K"primitive", K"abstract") @@ -772,15 +847,10 @@ function parse_def(ps::ParseState, is_func, anon) end end -# flisp: (define (disallowed-space-error lno ex t) -function disallowed_space_error(lno, ex, t) - TODO("disallowed_space_error unimplemented") -end - # flisp: disallow-space -function disallow_space(ps, t) - if t.had_whitespace - emit_diagnostic(ps, mark, error="space disallowed before $t", whitespace=true) +function bump_disallowed_space(ps) + if peek_token(ps).had_whitespace + bump_trivia(ps, skip_newlines=false, error="whitespace is disallowed here") end end @@ -807,7 +877,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall) break end if k == K"(" - disallow_space(ps, t) + bump_disallowed_space(ps, t) bump(ps, TRIVIA_FLAG) parse_call_arglist(ps, K")") elseif k == K"[" @@ -893,7 +963,14 @@ end # flisp: (define (parse-macro-name s) function parse_macro_name(ps::ParseState) - TODO("parse_macro_name unimplemented") + bump_disallowed_space(ps) + with_space_sensitive(ps) do ps + if peek(ps) == K"." + bump(ps, new_kind=K"__dot__") + else + parse_atom(ps, false) + end + end end # flisp: (define (parse-atsym s) @@ -942,7 +1019,19 @@ end # flisp: (define (parse-space-separated-exprs s) function parse_space_separated_exprs(ps::ParseState) - TODO("parse_space_separated_exprs unimplemented") + with_space_sensitive(ps) do ps + n_sep = 0 + while true + k = peek(ps) + if is_closing_token(ps, k) || k == K"NewlineWs" || + (ps.for_generator && k == K"for") + break + end + parse_eq(ps) + n_sep += 1 + end + return n_sep + end end # flisp: (define (has-parameters? lst) @@ -959,17 +1048,72 @@ end # # flisp: (define (parse-call-arglist s closer) function parse_call_arglist(ps::ParseState, closer) - TODO("parse_call_arglist unimplemented") + parse_arglist(ParseState(ps, for_generator=true), closer) end -# handle function call argument list, or any comma-delimited list. -# . an extra comma at the end is allowed -# . expressions after a ; are enclosed in (parameters ...) -# . an expression followed by ... becomes (... x) +# Handle function call argument list, or any comma-delimited list. +# * an extra comma at the end is allowed +# * expressions after a ; are enclosed in (parameters ...) +# * an expression followed by ... becomes (... x) # -# flisp: (define (parse-arglist s closer (add-linenums #f)) -function parse_arglist(ps::ParseState, closer; add_linenums=false) - TODO("parse_arglist unimplemented") +# flisp: parse-arglist +function parse_arglist(ps0::ParseState, closing_kind, equals_is_kw=true) + ps = ParseState(ps0, range_colon_enabled=true, + space_sensitive=false, + where_enabled=true, + whitespace_newline=true) + params_mark = 0 + while true + bump_trivia(ps) + k = peek(ps) + if k == closing_kind + break + elseif k == K";" + # Start of "parameters" list + # a, b; c d) ==> a b (parameters c d) + if params_mark != 0 + # a, b; c d; e f) ==> a b (parameters c d (parameters e f)) + TODO("nested parameters") + end + params_mark = position(ps) + equals_is_kw = true + bump(ps, TRIVIA_FLAG) + bump_trivia(ps) + else + prefix_mark = position(ps) + parse_eq_star(ps, equals_is_kw) + t = peek_token(ps, skip_newlines=true) + k = kind(t) + bump_trivia(ps) # FIXME! Handle EndMarker in all such bump_trivia()'s + if k == K"," + bump(ps, TRIVIA_FLAG) + elseif k == K";" || k == closing_kind + # Handled above + continue + elseif k == K"for" + if !t.had_whitespace + bump_invisible(ps, K"error", + error="expected whitespace before for") + end + bump(ps, TRIVIA_FLAG) + parse_generator(ps, prefix_mark) + else + k_str = untokenize(k) + ck_str = untokenize(closing_kind) + if k in (K"]", K"}") + emit_diagnostic(ps, error="unexpected `$k_str` in argument list") + else + emit_diagnostic(ps, error="missing comma or $ck_str in argument list") + end + # Recovery done after loop + break + end + end + end + if params_mark != 0 + emit(ps, params_mark, K"parameters") + end + bump_closing_token(ps, closing_kind) end # flisp: (define (parse-vect s first closer) @@ -1013,54 +1157,181 @@ function parse_cat(ps::ParseState, closer, last_end_symbol) end end -# flisp: (define (kw-to-= e) (if (kwarg? e) (cons '= (cdr e)) e)) -function kw_to_equals(e) - TODO("kw_to_equals unimplemented") -end -# flisp: (define (=-to-kw e) (if (assignment? e) (cons 'kw (cdr e)) e)) -function equals_to_kw(e) - TODO("equals_to_kw unimplemented") +# flisp: parse-paren +function parse_paren(ps::ParseState, check_identifiers=true) + parse_paren_(ps, check_identifiers) end -# translate nested (parameters ...) expressions to a statement block if possible -# this allows us to first parse tuples using parse-arglist +# Parse un-prefixed parenthesized syntax. This is hard because parentheses are +# *very* overloaded! Possible forms: # -# flisp: (define (parameters-to-block e) -function parameters_to_block(e) - TODO("parameters_to_block unimplemented") -end - -# flisp: (define (rm-linenums e) -function rm_linenums(e) - TODO("rm_linenums unimplemented") -end - -# convert an arglist to a tuple or block expr -# leading-semi? means we saw (; ...) -# comma? means there was a comma after the first expression +# Parentheses used for grouping +# (a * b) ==> (call * a b) +# (a=1) ==> (= a 1) +# (x) ==> (x) # -# flisp: (define (arglist-to-tuple s leading-semi? comma? args . first) -function arglist_to_tuple(s, is_leading_semi, is_comma, args, _, first) - TODO("arglist_to_tuple unimplemented") -end - -# flisp: (define (tuple-to-arglist e) -function tuple_to_arglist(e) - TODO("tuple_to_arglist unimplemented") -end - -# The initial ( has been bumped +# Block syntax +# (a=1; b=2) ==> (block (= a 1) (= b 2)) +# (a=1;) ==> (block (= a 1)) +# (;;) ==> (block) # -# flisp: (define (parse-paren s (checked #t)) (car (parse-paren- s checked))) -function parse_paren(ps::ParseState, check_identifiers=true) - TODO("parse_paren unimplemented") +# Tuple syntax +# (a,) ==> (tuple a) +# (a,b) ==> (tuple a b) +# +# Named tuple syntax +# (a=1, b=2) ==> (tuple (= a 1) (= b 2)) +# (; a=1) ==> (tuple (parameters (kw a 1))) +# (;) ==> (tuple (parameters)) +# +# Generators +# (i for i in 1:10) +# +# Frankentuples and nested parameters +# (; a=1; b=2) ==> (tuple (parameters (kw a 1) (parameters (kw b 2)))) +# (a=1, b=2; c=3, d=4) ==> (tuple (= a 1) (= b 2) (parameters (kw c 3) (parameters (kw d 4)))) +# +# The worst ambiguity is with messes like the following: +# (a;b=1;c;d) => a block with (= b 1) vs +# (a;b=1;c;d, e) => nested parameters with (kw b 1) +# +# This is such a mess (and furthermore, a largely invalid syntactic +# edgecase)... maybe we should just assume it's a block and deal with it via +# backtracking? +# +# To add to this, we've got to deal with prefixed parenthesized syntax... +# +# Return true iff this isn't just a parenthesized expr. +# +# flisp: parse-paren- +function parse_paren_(ps0::ParseState, check_identifiers) + ps = ParseState(ps0, range_colon_enabled=true, + space_sensitive=false, + where_enabled=true, + whitespace_newline=true) + mark = position(ps) + @assert peek(ps) == K"(" + bump(ps, TRIVIA_FLAG) # K"(" + after_paren_mark = position(ps) + k = peek(ps) + if k == K")" + # () ==> (tuple) + bump(ps, TRIVIA_FLAG) + emit(ps, mark, K"tuple") + return true + elseif is_syntactic_operator(k) + # allow :(=) etc in unchecked contexts, eg quotes + # :(=) ==> (quote =) + if check_identifiers && !is_valid_identifier(k) + bump(ps, error="invalid identifier") + else + bump(ps) + end + bump_closing_token(ps, K")") + return false + elseif !check_identifiers && k == K"::" && peek(ps, 2, skip_newlines=true) == K")" + # allow :(::) as a special case + # :(::) ==> (quote ::) + bump(ps) + bump(ps, TRIVIA_FLAG, skip_newlines=true) + return false + elseif k == K";" + # Tuples and empty blocks + if peek(ps, 2) == K";" + # (;;) ==> (block) + parse_paren_block(ps) + bump_closing_token(ps, K")") + emit(ps, mark, K"block") + return false + end + # Named tuple + # (;) ==> (tuple (parameters)) + # (; a=1) ==> (tuple (parameters (kw a 1))) + parse_arglist(ps, K")") + # FIXME Nested parameters + # (; a=1; b=2) + emit(ps, mark, K"tuple") + return true + else + # Here we parse the first subexpression separately, so + # we can look for a comma to see if it's a tuple. + # This lets us distinguish (x) from (x,) + parse_eq_star(ps) + k = peek(ps) + if k == K")" + # value in parentheses + # (x) ==> x + bump(ps, TRIVIA_FLAG) + # FIXME: return true if it has the ... suffix :-/ + return false + elseif k == K"," + # Tuple syntax + # (x,) ==> (tuple x) + # (x,y) ==> (tuple x y) + # (x=1,y=2) ==> (tuple (= x 1) (y 2)) + # Frankentuple syntax + # (a=1, b=2; c=3) ==> (tuple (= a 1) (= b 2) (parameters (kw c 3))) + bump(ps, TRIVIA_FLAG) + parse_arglist(ps, K")", false) + emit(ps, mark, K"tuple") + return false + elseif k == K";" + # In normal valid Julia code this is block syntax + # (a;b;;c) ==> (block a b c) + parse_paren_block(ps) + bump_closing_token(ps, K")") + # TODO: + # consider `(x...; ` the start of an arglist, since it's not useful as a block + emit(ps, mark, K"block") + elseif k == K"for" + if !peek_token(ps).had_whitespace + bump_invisible(ps, K"error", + error="expected whitespace before for") + end + bump(ps, TRIVIA_FLAG) + parse_generator(ps, after_paren_mark) + bump_closing_token(ps, K")") + return false + else + bump_closing_token(ps, K")") + return true + end + end end -# return (expr . arglist) where arglist is #t iff this isn't just a parenthesized expr +# Parse (the remainder of) a parenthesized block +# (a;b;c) ==> (block a b c) +# (;;) ==> (block) # -# flisp: (define (parse-paren- s checked) -function parse_paren_(ps::ParseState, check_identifiers) - TODO("parse_paren_ unimplemented") +# Also parse nested parameters lists in the same way that the flisp parser +# does (TODO) +# (a=1; b=2; c=3,d=4) ==> (tuple (= a 1) (parameters (kw b 2) (parameters (kw c 3) (kw d 4)))) +function parse_paren_block(ps) + need_delim = true + while true + k = peek(ps) + if k == K";" + bump(ps, TRIVIA_FLAG) + need_delim = false + continue + elseif is_closing_token(ps, k) + break + elseif k == K"," + # (a;b;c,d) ==> (tuple a (parameters b (parameters c d))) + # + # It's not clear this nested representation was an + # intentional choice in the flisp parser. It seems kind of + # awful! + TODO("Backtrack and parse as a nested parameters block") + break + else + if need_delim + break # ) or error; we'll deal with it in the caller. + end + parse_eq_star(ps) + need_delim = true + end + end end # flisp: (define (not-eof-for delim c) @@ -1208,7 +1479,7 @@ end # # flisp: parse-atom function parse_atom(ps::ParseState, check_identifiers=true) - bump_trivia(ps, skip_newlines=true) + bump_trivia(ps) atom_mark = position(ps) leading_kind = peek(ps) # TODO: Reorder to put most likely tokens first. This can be done because @@ -1232,8 +1503,7 @@ function parse_atom(ps::ParseState, check_identifiers=true) # : # a # ==> (quote (error)) - bump_trivia(ps, skip_newlines=true, - error="whitespace not allowed after `:` used for quoting") + bump_trivia(ps, error="whitespace not allowed after `:` used for quoting") # Heuristic recovery if kind(t) == K"NewlineWs" bump_invisible(ps, K"error") @@ -1243,16 +1513,21 @@ function parse_atom(ps::ParseState, check_identifiers=true) else # Being inside quote makes `end` non-special again. issue #27690 # a[:(end)] ==> (ref a (quote (error-t end))) - ps1 = ParseState(ps, end_symbol=false) - parse_atom(ps1, false) + parse_atom(ParseState(ps, end_symbol=false), false) end emit(ps, atom_mark, K"quote") elseif leading_kind == K"=" emark = position(ps) bump(ps) emit(ps, emark, K"error", TRIVIA_FLAG) - elseif leading_kind in (K"Identifier", K"VarIdentifier") + elseif leading_kind == K"Identifier" bump(ps) + elseif leading_kind == K"VarIdentifier" + bump(ps) + t = peek_token(ps) + if !t.had_whitespace && !(isoperator(kind(t)) || is_non_keyword_closer(t)) + bump(ps, error="suffix not allowed after var\"...\" syntax") + end elseif isoperator(leading_kind) || iskeyword(leading_kind) # Operators and keywords are generally turned into identifiers if used # as atoms. @@ -1262,28 +1537,53 @@ function parse_atom(ps::ParseState, check_identifiers=true) end bump(ps, new_kind=K"Identifier") elseif leading_kind == K"(" # parens or tuple - bump(ps, TRIVIA_FLAG) parse_paren(ps, check_identifiers) elseif leading_kind == K"[" # cat expression TODO("parse_cat unimplemented") parse_cat(ps, tok, K"]", ps.end_symbol) elseif leading_kind == K"{" # cat expression TODO("""parse_cat(ps, K"}", )""") - elseif leading_kind ∈ (K"String", K"TripleString") - parse_string_literal(ps) + elseif is_string(leading_kind) + bump(ps) + # FIXME parse_string_literal(ps) elseif leading_kind == K"@" bump(ps, TRIVIA_FLAG) - ps1 = ParseState(ps, space_sensitive=true) - parse_macro_name(ps) - TODO("parse macrocall") + with_space_sensitive(ps) do ps + is_doc_macro = peek_equal_to(ps, "doc") + # FIXME: The following syntactic oddity is also allowed 😱 + # @Mod.mac a b ==> (macrocall (. Mod mac) a b) + # see macroify-name + parse_macro_name(ps) + if peek_token(ps).had_whitespace + # Space separated macro arguments + # @mac a b ==> (macrocall mac a b) + n_args = parse_space_separated_exprs(ps) + if is_doc_macro && n_args == 1 + # Parse extended @doc args on next line + # + # @doc x\ny ==> (macrocall doc x y) + # @doc x y\nz ==> (macrocall doc x y) + # @doc x\nend ==> (macrocall doc x) + if peek(ps) == K"NewlineWs" && !is_closing_token(ps, peek(ps, 2)) + bump(ps) # newline + parse_eq(ps) + end + end + else + # Parenthesized macro arguments + # @mac(a,b) ==> (macrocall mac a b) + TODO("Parenthesized macro arguments") + end + emit(ps, atom_mark, K"macrocall") + end elseif leading_kind in (K"Cmd", K"TripleCmd") bump_invisible(ps, K"core_@cmd") - emit(ps, mark, K"macrocall") + emit(ps, atom_mark, K"macrocall") elseif isliteral(leading_kind) bump(ps) elseif is_closing_token(ps, leading_kind) # Leave closing token in place for other productions to - # recover with + # recover with (??) emit_diagnostic(ps, error="Unexpected closing token") else bump(ps, error="Invalid syntax") @@ -1326,6 +1626,8 @@ function is_doc_string_literal(s, e) end # Parse docstrings attached by a space or single newline +# "doc" foo ==> +# # flisp: (define (parse-docstring s production) function parse_docstring(ps::ParseState, down=parse_eq) mark = position(ps) @@ -1334,7 +1636,7 @@ function parse_docstring(ps::ParseState, down=parse_eq) #! ("doc") foo ==> (macrocall core_@doc "doc" foo) # TODO: Also, all these TOMBSTONEs seem kind of inefficient. Perhaps we can # improve things? - maybe_doc = peek(ps) in (K"String", K"TripleString") + maybe_doc = is_string(peek(ps)) atdoc_mark = bump_invisible(ps, K"TOMBSTONE") down(ps) if maybe_doc diff --git a/JuliaSyntax/src/source_files.jl b/JuliaSyntax/src/source_files.jl index e16d548202296..30bf3fff9fa7a 100644 --- a/JuliaSyntax/src/source_files.jl +++ b/JuliaSyntax/src/source_files.jl @@ -25,7 +25,7 @@ function SourceFile(code::AbstractString; filename=nothing) end # Get line number of the given byte within the code -function line_number(source::SourceFile, byte_index) +function source_line(source::SourceFile, byte_index) searchsortedlast(source.line_starts, byte_index) end @@ -43,6 +43,10 @@ function source_location(source::SourceFile, byte_index) line, column end +function source_location(::Type{LineNumberNode}, source::SourceFile, byte_index) + LineNumberNode(source_line(source, byte_index), source.filename) +end + function Base.show(io::IO, ::MIME"text/plain", source::SourceFile) if !isnothing(source.filename) print(io, source.filename, '\n', diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index f6d03b8251015..1998a52dc3576 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -17,6 +17,10 @@ end kind(head::SyntaxHead) = head.kind flags(head::SyntaxHead) = head.flags +istrivia(head::SyntaxHead) = flags(head) & TRIVIA_FLAG != 0 +isinfix(head::SyntaxHead) = flags(head) & INFIX_FLAG != 0 +iserror(head::SyntaxHead) = kind(head) == K"error" + function Base.summary(head::SyntaxHead) _kind_str(kind(head)) end @@ -31,9 +35,7 @@ end kind(node::GreenNode{SyntaxHead}) = head(node).kind flags(node::GreenNode{SyntaxHead}) = head(node).flags -istrivia(node::GreenNode{SyntaxHead}) = flags(node) & TRIVIA_FLAG != 0 -isinfix(node::GreenNode{SyntaxHead}) = flags(node) & INFIX_FLAG != 0 -iserror(node::GreenNode{SyntaxHead}) = kind(node) == K"error" +isinfix(node) = isinfix(head(node)) #------------------------------------------------------------------------------- # AST interface, built on top of raw tree @@ -52,6 +54,11 @@ mutable struct SyntaxNode val::Any end +struct ErrorVal +end + +Base.show(io::IO, ::ErrorVal) = printstyled(io, "✘", color=:light_red) + function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, position::Integer=1) if !haschildren(raw) # Leaf node @@ -64,18 +71,25 @@ function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, position::In val = Base.parse(Int, val_str) elseif k == K"Identifier" val = Symbol(val_str) + elseif k == K"VarIdentifier" + val = Symbol(val_str[5:end-1]) elseif k == K"String" val = unescape_string(source[position+1:position+span(raw)-2]) elseif isoperator(k) - val = Symbol(val_str) + val = isempty(val_range) ? + Symbol(untokenize(k)) : # synthetic invisible tokens + Symbol(val_str) + @assert !isnothing(val) elseif k == K"core_@doc" val = GlobalRef(Core, :var"@doc") elseif k == K"core_@cmd" val = GlobalRef(Core, :var"@cmd") - elseif k in (K"error", K"Nothing") - val = nothing + elseif k == K"error" + val = ErrorVal() + elseif k == K"__dot__" + val = :__dot__ else - @error "Leaf node of kind $k unparsed" + @error "Leaf node of kind $k unknown to SyntaxNode" val = nothing end return SyntaxNode(source, raw, position, nothing, :leaf, val) @@ -107,16 +121,18 @@ function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, position::In end end -function interpolate_literal(node::SyntaxNode, val) - @assert node.head == :$ - SyntaxNode(node.source, node.raw, node.position, node.parent, :leaf, val) -end +head(node::SyntaxNode) = node.head haschildren(node::SyntaxNode) = node.head !== :leaf children(node::SyntaxNode) = haschildren(node) ? node.val::Vector{SyntaxNode} : () span(node::SyntaxNode) = span(node.raw) +function interpolate_literal(node::SyntaxNode, val) + @assert node.head == :$ + SyntaxNode(node.source, node.raw, node.position, node.parent, :leaf, val) +end + function _show_syntax_node(io, current_filename, node, indent) fname = node.source.filename #@info "" fname print_fname current_filename[] @@ -245,3 +261,35 @@ function highlight(code::String, node, path::Int...; color=(40,40,70)) _printstyled(stdout, code[p:q-1]; color) print(stdout, code[q:end]) end + + +#------------------------------------------------------------------------------- +# Conversion to Base.Expr + +function _macroify_name(name) + @assert name isa Symbol # fixme + Symbol('@', name) +end + +function _to_expr(node::SyntaxNode) + if haschildren(node) + args = Vector{Any}(undef, length(children(node))) + args = map!(_to_expr, args, children(node)) + # Convert elements + if head(node) == :macrocall + line_node = source_location(LineNumberNode, node.source, node.position) + args[1] = _macroify_name(args[1]) + insert!(args, 2, line_node) + elseif head(node) == :call || head(node) == :tuple + if length(args) > 1 && Meta.isexpr(args[end], :parameters) + pushfirst!(args, args[end]) + pop!(args) + end + end + Expr(head(node), args...) + else + node.val + end +end + +Base.Expr(node::SyntaxNode) = _to_expr(node) diff --git a/JuliaSyntax/src/token_kinds.jl b/JuliaSyntax/src/token_kinds.jl index 471f1b3daf750..0742650afd51c 100644 --- a/JuliaSyntax/src/token_kinds.jl +++ b/JuliaSyntax/src/token_kinds.jl @@ -812,10 +812,6 @@ Dict([ "END_OPS" => Ts.end_ops -# Cute synonyms (too cute? -# " " => Ts.WHITESPACE -# "\n" => Ts.NEWLINE_WS - "BEGIN_INVISIBLE_TOKENS" => Ts.begin_invisible_tokens "TOMBSTONE" => Ts.TOMBSTONE "core_@doc" => Ts.CORE_AT_DOC @@ -823,6 +819,7 @@ Dict([ "core_@int128_str" => Ts.CORE_AT_INT128_STR "core_@uint128_str" => Ts.CORE_AT_UINT128_STR "core_@big_str" => Ts.CORE_AT_BIG_STR +"__dot__" => Ts.__DOT__ "END_INVISIBLE_TOKENS" => Ts.end_invisible_tokens # Our custom syntax tokens @@ -833,6 +830,8 @@ Dict([ "curly" => Ts.CURLY "string" => Ts.STRING_INTERP "macrocall" => Ts.MACROCALL +"kw" => Ts.KW # the = in f(a=1) +"parameters" => Ts.PARAMETERS # the list after ; in f(; a=1) "toplevel" => Ts.TOPLEVEL "tuple" => Ts.TUPLE "ref" => Ts.REF @@ -866,7 +865,7 @@ for kw in split("""abstract baremodule begin break catch const macro module mutable new outer primitive quote return struct try type using while - block call comparison curly string macrocall + block call comparison curly string macrocall kw parameters toplevel tuple ref vect braces bracescat hcat vcat ncat typed_hcat typed_vcat typed_ncat generator flatten comprehension typed_comprehension diff --git a/JuliaSyntax/src/tokens.jl b/JuliaSyntax/src/tokens.jl index 7346af828f56d..1fa882c8c709d 100644 --- a/JuliaSyntax/src/tokens.jl +++ b/JuliaSyntax/src/tokens.jl @@ -47,6 +47,19 @@ is_prec_unicode_ops(t) = K"BEGIN_UNICODE_OPS" < kind(t) < K"END_UNICODE_OPS" is_prec_pipe_lt(t) = kind(t) == K"<|" is_prec_pipe_gt(t) = kind(t) == K"|>" +#= +# Sholuld we optimize membership a bit by unrolling? +@generated function in(k::Kind, t::NTuple{N,Kind}) where {N} + ex = :(k === t[1]) + for i = 2:N + ex = :($ex || k === t[$i]) + end + quote + $ex + end +end +=# + # Operators which are boty unary and binary function is_both_unary_and_binary(t) # TODO: Do we need to check dotop as well here? @@ -58,32 +71,16 @@ function is_number(t) kind(t) in (K"Integer", K"BinInt", K"HexInt", K"OctInt", K"Float") end -function is_whitespace(t) - kind(t) in (K"Whitespace", K"NewlineWs") +function is_string(t) + kind(t) in (K"String", K"TripleString") end -""" -Get the "binding power" (precedence level) of an operator kind -""" -function binding_power(k::Kind) - return k < K"END_ASSIGNMENTS" ? 1 : - k < K"END_CONDITIONAL" ? 2 : - k < K"END_ARROW" ? 3 : - k < K"END_LAZYOR" ? 4 : - k < K"END_LAZYAND" ? 5 : - k < K"END_COMPARISON" ? 6 : - k < K"END_PIPE" ? 7 : - k < K"END_COLON" ? 8 : - k < K"END_PLUS" ? 9 : - k < K"END_BITSHIFTS" ? 10 : - k < K"END_TIMES" ? 11 : - k < K"END_RATIONAL" ? 12 : - k < K"END_POWER" ? 13 : - k < K"END_DECL" ? 14 : - k < K"END_WHERE" ? 15 : - k < K"END_DOT" ? 16 : - k < K"END_OPS" ? 17 : # ?? unary ops - error("Not an operator") +function is_radical_op(t) + kind(t) in (K"√", K"∛", K"∜") +end + +function is_whitespace(t) + kind(t) in (K"Whitespace", K"NewlineWs") end function _kind_str(k::Kind) diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 4d6922180bdb3..9ffda895057df 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -1,12 +1,40 @@ function test_parse(production, code) stream = ParseStream(code) production(JuliaSyntax.ParseState(stream)) - t = JuliaSyntax.to_raw_tree(stream, wrap_toplevel_as_kind=K"error") + t = JuliaSyntax.to_raw_tree(stream, wrap_toplevel_as_kind=K"toplevel") # @test Text(sprint(JuliaSyntax.show_diagnostics, stream, code)) == Text("") s = SyntaxNode(SourceFile(code), t) sprint(show, MIME("text/x.sexpression"), s) end +# Version of test_parse for interactive exploration +function itest_parse(production, code) + stream = ParseStream(code) + production(JuliaSyntax.ParseState(stream)) + t = JuliaSyntax.to_raw_tree(stream, wrap_toplevel_as_kind=K"toplevel") + s = SyntaxNode(SourceFile(code, filename="none"), t) + ex = Expr(s) + + println(stdout, "# Code:\n$code\n") + + println(stdout, "# Green tree:") + show(stdout, MIME"text/plain"(), t, code) + JuliaSyntax.show_diagnostics(stdout, stream, code) + + println(stdout, "\n# SyntaxNode:") + show(stdout, MIME"text/x.sexpression"(), s) + + println(stdout, "\n\n# Julia Expr:") + show(stdout, MIME"text/plain"(), ex) + + f_ex = Base.remove_linenums!(Meta.parse(code, raise=false)) + if ex != f_ex + printstyled(stdout, "\n\n# flisp Julia Expr:\n", color=:red) + show(stdout, MIME"text/plain"(), f_ex) + end + (code, stream, t, s, ex) +end + # TODO: # * Extract the following test cases from the source itself. # * Use only the green tree to generate the S-expressions @@ -86,6 +114,9 @@ tests = [ JuliaSyntax.parse_term => [ "a * b * c" => "(call :* :a :b :c)" ], + JuliaSyntax.parse_juxtapose => [ + "2x" => "(call :* 2 :x)" + ], JuliaSyntax.parse_decl => [ #"a::b" => "(:: a b)" #"a->b" => "(-> a b)" @@ -96,22 +127,53 @@ tests = [ #"\$a" => "(\$ :a)" #"\$\$a" => "(\$ (\$ :a))" ], - JuliaSyntax.parse_docstring => [ - "\"doc\" foo" => "(macrocall :(Core.var\"@doc\") \"doc\" :foo)" + JuliaSyntax.parse_paren => [ + # Parentheses used for grouping + # NB: The toplevel below is an artificial part of the test setup + "(a * b)" => "(toplevel (call :* :a :b))" + "(a=1)" => "(toplevel (= :a 1))" + "(x)" => "(toplevel :x)" + # Block syntax + "(a=1; b=2)" => "(block (= :a 1) (= :b 2))" + "(a=1;)" => "(block (= :a 1))" + "(;;)" => "(block )" + # Tuple syntax + "(a,)" => "(tuple :a)" + "(a,b)" => "(tuple :a :b)" + # Named tuple syntax + "(a=1, b=2)" => "(tuple (= :a 1) (= :b 2))" + "(; a=1)" => "(tuple (parameters (kw :a 1)))" + "(;)" => "(tuple (parameters ))" + # Franken tuple "syntax" + "(a=1, b=2; c=3)" => "(tuple (= :a 1) (= :b 2) (parameters (kw :c 3)))" + # "(a=1, b=2; c=3; d=4)" # BROKEN! ], JuliaSyntax.parse_atom => [ ":foo" => "(quote :foo)" # Literal colons ":)" => ":(:)" ": end" => ":(:)" + # Macros + "@foo x y" => "(macrocall :foo :x :y)" + "@foo x\ny" => "(macrocall :foo :x)" + # Doc macro parsing + "@doc x\ny" => "(macrocall :doc :x :y)" + "@doc x\nend" => "(macrocall :doc :x)" + "@doc x y\nz" => "(macrocall :doc :x :y)" + # __dot__ macro + "@. x y" => "(macrocall :__dot__ :x :y)" # Errors ": foo" => "(quote :foo)" - ] + ], + JuliaSyntax.parse_docstring => [ + "\"doc\" foo" => "(macrocall :(Core.var\"@doc\") \"doc\" :foo)" + ], ] -@testset "$production" for (production, test_specs) in tests - for (input,output) in test_specs - @test test_parse(production, input) == output +@testset "Inline test cases" begin + @testset "$production" for (production, test_specs) in tests + for (input,output) in test_specs + @test test_parse(production, input) == output + end end end - From ed743bb62b2f63d3b48be09a8e3394dbeba1de6f Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Wed, 15 Dec 2021 19:20:43 +1000 Subject: [PATCH 0244/1109] Greatly improve generic parenthesis handling + other productions Implement parse_brackets which is a generic tool for parsing bracketed expressions containing commas and semicolons into tuples, blocks, and function argument lists. Use this to fix various problems in parse_paren. Also port new productions: * parse_unary * parse_unary_call --- JuliaSyntax/src/parse_stream.jl | 85 +++-- JuliaSyntax/src/parser.jl | 577 +++++++++++++++++++------------- JuliaSyntax/src/syntax_tree.jl | 29 +- JuliaSyntax/src/tokens.jl | 7 - JuliaSyntax/test/parser.jl | 77 +++-- JuliaSyntax/test/runtests.jl | 2 + 6 files changed, 486 insertions(+), 291 deletions(-) diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index 053bfd10067a9..b7a261cb08012 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -147,6 +147,10 @@ function _buffer_lookahead_tokens(stream::ParseStream) end end +# Find the index of the first nontrivia token in the lookahead buffer. +# +# TODO: Store this as part of _buffer_lookahead_tokens to avoid redoing this +# work all the time! function _lookahead_index(stream::ParseStream, n::Integer, skip_newlines::Bool) i = 1 while true @@ -302,21 +306,54 @@ function bump_invisible(stream::ParseStream, kind, flags=EMPTY_FLAGS; end """ -Reset kind or flags of an existing token in the output stream +Bump several tokens, gluing them together into a single token + +This is for use in special circumstances where the parser needs to resolve +lexing ambiguities. There's no special whitespace handling — bump any +whitespace if necessary with bump_trivia. +""" +function bump_glue(stream::ParseStream, kind, flags, num_tokens) + span = TaggedRange(SyntaxHead(kind, flags), + first_byte(stream.lookahead[1]), + last_byte(stream.lookahead[num_tokens])) + Base._deletebeg!(stream.lookahead, num_tokens) + push!(stream.spans, span) + return lastindex(stream.spans) +end + +""" +Bump a token, splitting it into two pieces. + +Wow, this is a hack! It helps resolves the occasional lexing ambiguities. For +example whether .+ should be a single token or a composite (. +) +""" +function bump_split(stream::ParseStream, num_bytes, kind1, flags1, kind2, flags2) + tok = popfirst!(stream.lookahead) + push!(stream.spans, TaggedRange(SyntaxHead(kind1, flags1), + first_byte(tok), first_byte(tok)+num_bytes-1)) + push!(stream.spans, TaggedRange(SyntaxHead(kind2, flags2), + first_byte(tok)+num_bytes, last_byte(tok))) + nothing +end + +""" +Reset kind or flags of an existing node in the output stream This is a hack, but necessary on some occasions * When some trailing syntax may change the kind or flags of the token * When an invisible token might be required - see bump_invisible with K"TOMBSTONE" """ -function reset_token!(stream::ParseStream, mark; - kind=nothing, flags=nothing) - text_span = stream.spans[mark] - k = isnothing(kind) ? (@__MODULE__).kind(text_span) : kind +function reset_node!(stream::ParseStream, omark; + kind=nothing, flags=nothing) + text_span = stream.spans[omark] + k = isnothing(kind) ? (@__MODULE__).kind(text_span) : kind f = isnothing(flags) ? (@__MODULE__).flags(text_span) : flags - stream.spans[mark] = TaggedRange(SyntaxHead(k, f), - first_byte(text_span), last_byte(text_span)) + stream.spans[omark] = TaggedRange(SyntaxHead(k, f), + first_byte(text_span), last_byte(text_span)) end +const NO_POSITION = 0 + function Base.position(stream::ParseStream) return stream.next_byte end @@ -335,7 +372,7 @@ function emit(stream::ParseStream, start_mark::Integer, kind::Kind, push!(stream.diagnostics, Diagnostic(text_span, error)) end push!(stream.spans, text_span) - return nothing + return lastindex(stream.spans) end """ @@ -344,7 +381,8 @@ Emit a diagnostic at the position of the next token If `whitespace` is true, the diagnostic is positioned on the whitespace before the next token. Otherwise it's positioned at the next token as returned by `peek()`. """ -function emit_diagnostic(stream::ParseStream, mark=nothing; error, whitespace=false) +function emit_diagnostic(stream::ParseStream, mark=nothing, end_mark=nothing; + error, whitespace=false) i = _lookahead_index(stream, 1, true) begin_tok_i = i end_tok_i = i @@ -355,10 +393,11 @@ function emit_diagnostic(stream::ParseStream, mark=nothing; error, whitespace=fa end_tok_i = is_whitespace(stream.lookahead[i]) ? i : max(1, i-1) end mark = isnothing(mark) ? first_byte(stream.lookahead[begin_tok_i]) : mark - err_end = last_byte(stream.lookahead[end_tok_i]) + end_mark = isnothing(end_mark) ? last_byte(stream.lookahead[end_tok_i]) : end_mark # It's a bit weird to require supplying a SyntaxHead here... - text_span = TaggedRange(SyntaxHead(K"error", EMPTY_FLAGS), mark, err_end) + text_span = TaggedRange(SyntaxHead(K"error", EMPTY_FLAGS), mark, end_mark) push!(stream.diagnostics, Diagnostic(text_span, error)) + return nothing end @@ -508,24 +547,20 @@ function bump_trivia(ps::ParseState, args...; kws...) bump_trivia(ps.stream, args...; kws...) end -""" -Bump a new zero-width "invisible" token at the current stream position. These -can be useful in several situations. - -When a token is implied but not present in the source text: -* Implicit multiplication - the * is invisible - `2x ==> (call 2 * x)` -* Docstrings - the macro name is invisible - `"doc" foo() = 1 ==> (macrocall (core @doc) . (= (call foo) 1))` -* Big integer literals - again, an invisible macro name - `11111111111111111111 ==> (macrocall (core @int128_str) . 11111111111111111111)` -""" function bump_invisible(ps::ParseState, args...; kws...) bump_invisible(ps.stream, args...; kws...) end -function reset_token!(ps::ParseState, args...; kws...) - reset_token!(ps.stream, args...; kws...) +function bump_glue(ps::ParseState, args...; kws...) + bump_glue(ps.stream, args...; kws...) +end + +function bump_split(ps::ParseState, args...; kws...) + bump_split(ps.stream, args...; kws...) +end + +function reset_node!(ps::ParseState, args...; kws...) + reset_node!(ps.stream, args...; kws...) end function Base.position(ps::ParseState, args...) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 8862e541a8f87..8bb0295834058 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -43,6 +43,11 @@ function bumpTODO(ps::ParseState) end end +#------------------------------------------------------------------------------- +# Parsing-specific predicates on tokens/kinds +# +# All these take either a raw kind or a token. + function is_closing_token(ps::ParseState, k) k = kind(k) return k in (K"else", K"elseif", K"catch", K"finally", @@ -55,8 +60,8 @@ function is_non_keyword_closer(k) kind(k) in (K",", K")", K"]", K"}", K";", K"EndMarker") end -function is_initial_reserved_word(ps::ParseState, t) - k = kind(t) +function is_initial_reserved_word(ps::ParseState, k) + k = kind(k) is_iresword = k in ( K"begin", K"while", K"if", K"for", K"try", K"return", K"break", K"continue", K"function", K"macro", K"quote", K"let", K"local", @@ -66,25 +71,55 @@ function is_initial_reserved_word(ps::ParseState, t) return is_iresword && !(k == K"begin" && ps.end_symbol) end -function is_block_form(t) - kind(t) in (K"block", K"quote", K"if", K"for", K"while", +function is_block_form(k) + kind(k) in (K"block", K"quote", K"if", K"for", K"while", K"let", K"function", K"macro", K"abstract", K"primitive", K"struct", K"try", K"module") end -function is_syntactic_operator(t) - k = kind(t) +function is_syntactic_operator(k) + k = kind(k) return k in (K"&&", K"||", K".", K"...", K"->") || (is_prec_assignment(k) && k != K"~") end -function is_syntactic_unary_op(t) - kind(t) in (K"$", K"&", K"::") +function is_syntactic_unary_op(k) + kind(k) in (K"$", K"&", K"::") +end + +function is_type_operator(k) + kind(k) in (K"<:", K">:") +end + +function is_unary_op(k) + kind(k) in ( + K"<:", K">:", # TODO: dotop disallowed ? + K"+", K"-", K"!", K"~", K"¬", K"√", K"∛", K"∜", K"⋆", K"±", K"∓" # dotop allowed + ) +end + +# Operators which are both unary and binary +function is_both_unary_and_binary(k) + # TODO: Do we need to check dotop as well here? + kind(k) in (K"$", K"&", K"~", # <- dotop disallowed? + K"+", K"-", K"⋆", K"±", K"∓") # dotop allowed +end + +# operators handled by parse_unary at the start of an expression +function is_initial_operator(k) + k = kind(k) + # TODO(jb): ? should probably not be listed here except for the syntax hack in osutils.jl + isoperator(k) && + !(k in (K":", K"'", K".'", K"?")) && + !is_syntactic_unary_op(k) && + !is_syntactic_operator(k) end # flisp: invalid-identifier? function is_valid_identifier(k) - # FIXME? flisp also had K"...." disallowed, whatever that's for! + k = kind(k) + # TODO: flisp also had K"...." disallowed. But I don't know what that's + # for! Tokenize doesn't have an equivalent here. !(is_syntactic_operator(k) || k in (K"?", K".'")) end @@ -226,6 +261,9 @@ end # parse_eq_star is used where commas are special, for example in an argument list # +# If an `(= x y)` node was emitted, returns the position of that node in the +# output list so that it can be changed to `(kw x y)` later if necessary. +# # flisp: (define (parse-eq* s) function parse_eq_star(ps::ParseState, equals_is_kw=false) k = peek(ps) @@ -234,8 +272,9 @@ function parse_eq_star(ps::ParseState, equals_is_kw=false) # optimization: skip checking the whole precedence stack if we have a # simple token followed by a common closing token bump(ps) + return NO_POSITION else - parse_assignment(ps, parse_pair, equals_is_kw) + return parse_assignment(ps, parse_pair, equals_is_kw) end end @@ -252,7 +291,7 @@ function parse_assignment(ps::ParseState, down, equals_is_kw::Bool) down(ps) k = peek(ps) if !is_prec_assignment(k) - return + return NO_POSITION end if k == K"~" bump(ps) @@ -265,11 +304,13 @@ function parse_assignment(ps::ParseState, down, equals_is_kw::Bool) parse_assignment(ps, down, equals_is_kw) emit(ps, mark, K"call", INFIX_FLAG) end + return NO_POSITION else bump(ps, TRIVIA_FLAG) parse_assignment(ps, down, equals_is_kw) result_k = (k == K"=" && equals_is_kw) ? K"kw" : k - emit(ps, mark, result_k) + equals_pos = emit(ps, mark, result_k) + return k == K"=" ? equals_pos : NO_POSITION end end @@ -397,11 +438,11 @@ function parse_comparison(ps::ParseState) parse_pipe_lt(ps) end if n_comparisons == 1 - if initial_kind in (K"<:", K">:") + if is_type_operator(initial_kind) # Type comparisons are syntactic # x <: y ==> (<: x y) # x >: y ==> (>: x y) - reset_token!(ps, op_pos, flags=TRIVIA_FLAG) + reset_node!(ps, op_pos, flags=TRIVIA_FLAG) emit(ps, mark, initial_kind) else emit(ps, mark, K"call", INFIX_FLAG) @@ -651,7 +692,7 @@ end # 2x ==> (call-i 2 * x) # 2(x) ==> (call-i 2 * x) # (2)(3)x ==> (call-i 2 * 3 x) -# (x-1)y ==> (call-i (- x 1) * y) +# (x-1)y ==> (call-i (call-i x - 1) * y) # # flisp: parse-juxtapose function parse_juxtapose(ps::ParseState) @@ -664,12 +705,14 @@ function parse_juxtapose(ps::ParseState) if !is_juxtapose(ps, prev_kind, t) break end - bump_invisible(ps, K"*") + if n_terms == 1 + bump_invisible(ps, K"*") + end if is_string(prev_kind) || is_string(t) # issue #20575 # # "a""b" ==> (call-i "a" * (error) "b") - # "a"x ==> (call-i "a" * (error) x) + # "a"x ==> (call-i "a" * (error) x) bump_invisible(ps, K"error", TRIVIA_FLAG, error="cannot juxtapose string literal") end @@ -685,27 +728,149 @@ function parse_juxtapose(ps::ParseState) end end -# flisp: (define (maybe-negate op num) -function maybe_negate(op, num) - TODO("maybe_negate unimplemented") -end - -# operators handled by parse-unary at the start of an expression - +# Deal with numeric literal prefixes and unary calls +# # flisp: (define (parse-unary s) function parse_unary(ps::ParseState) - bumpTODO(ps) - #TODO("parse_unary unimplemented") + mark = position(ps) + bump_trivia(ps) + k = peek(ps) + if !is_initial_operator(k) + parse_factor(ps) + return + end + if k in (K"-", K"+") + t2 = peek_token(ps, 2) + if !t2.had_whitespace && kind(t2) in (K"Integer", K"Float") + k3 = peek(ps, 3) + if is_prec_power(k3) || k3 in (K"[", K"{") + # `[`, `{` (issue #18851) and `^` have higher precedence than + # unary negation + # -2^x ==> (call - (call-i 2 ^ x)) + # -2[1, 3] ==> (call - (ref 2 1 3)) + bump(ps) + parse_factor(ps) + emit(ps, mark, K"call") + else + # We have a signed numeric literal. Glue the operator to the + # next token to create a signed literal: + # +2 ==> +2 + # -2*x ==> (call-i -2 * x) + bump_glue(ps, kind(t2), EMPTY_FLAGS, 2) + end + return + end + end + parse_unary_call(ps) end -# flisp: (define (fix-syntactic-unary e) -function fix_syntactic_unary(e) - TODO("fix_syntactic_unary unimplemented") -end +# Parse calls to unary operators and prefix calls involving arbitrary operators +# with bracketed arglists (as opposed to infix notation) +# +# +a ==> (call + a) +# +(a,b) ==> (call + a b) +# +# flisp: parse-unary-call +function parse_unary_call(ps::ParseState) + mark = position(ps) + op_t = peek_token(ps) + op_k = kind(op_t) + op_node_kind = is_type_operator(op_k) ? op_k : K"call" + op_tok_flags = is_type_operator(op_t) ? TRIVIA_FLAG : EMPTY_FLAGS + t2 = peek_token(ps, 2) + k2 = kind(t2) + if is_closing_token(ps, k2) || k2 in (K"NewlineWs", K"=") + if is_dotted(op_t) + # standalone dotted operators are parsed as (|.| op) + # .+ ==> (. +) + bump_trivia(ps) + bump_split(ps, 1, + K".", TRIVIA_FLAG, + op_k, EMPTY_FLAGS) + emit(ps, mark, K".") + else + # return operator by itself, as in + # (+) ==> + + bump(ps) + end + elseif k2 == K"{" || (!is_unary_op(op_k) && k2 == K"(") + # this case is +{T}(x::T) = ... + parse_factor(ps) + elseif k2 == K"(" + # Cases like +(a,b) and +(a) + # + # Bump the operator + bump(ps, op_tok_flags) + + # Setup possible whitespace error between operator and ( + ws_mark = position(ps) + bump_trivia(ps) + ws_mark_end = position(ps) - 1 + ws_error_pos = emit(ps, ws_mark, K"TOMBSTONE") + + mark_before_paren = position(ps) + bump(ps, TRIVIA_FLAG) # ( + # There's two tricky parts for unary-prefixed parenthesized expressions + # like `+(a,b)` + # + # 1. The ambiguity between a function call arglist or a block. The + # flisp parser resolves in favor of a block if there's no initial + # commas before semicolons: + # + # Function calls: + # +(a,b) ==> (call + a b) + # +(a=1,) ==> (call + (kw a 1)) + # + # Not function calls: + # +(a;b) ==> (call + (block a b)) + # +(a=1) ==> (call + (= a 1)) + # + # However this heuristic fails in some cases: + # +(a;b,c) ??> (call + (tuple a (parameters b c))) + # + # Here we use a simpler rule: if there were any commas, it was a + # function call. + is_call = false + is_block = false + parse_brackets(ps, K")") do had_commas, num_semis, num_subexprs + is_call = had_commas + is_block = !is_call && num_semis > 0 + bump_closing_token(ps, K")") + return (needs_parameters=is_call, + eq_is_kw_before_semi=is_call, + eq_is_kw_after_semi=is_call) + end + + if is_call && t2.had_whitespace + reset_node!(ps, ws_error_pos, kind=K"error") + emit_diagnostic(ps, ws_mark, ws_mark_end, + error="whitespace not allowed between prefix function call and argument list") + end -# flisp: (define (parse-unary-call s op un spc) -function parse_unary_call(ps::ParseState, op, un, spc) - TODO("parse_unary_call unimplemented") + # 2. The precedence between unary + and any following infix ^ depends + # on whether the parens are a function call or not: + # + if is_call + # Prefix operator call + # +(a,b)^2 ==> (call-i (call + a b) ^ 2) + emit(ps, mark, op_node_kind) + parse_factor_with_initial_ex(ps, mark) + else + if is_block + emit(ps, mark_before_paren, K"block") + end + # Not a prefix operator call + # +(a)^2 ==> (call + (call-i ^ a 2)) + parse_factor_with_initial_ex(ps, mark_before_paren) + emit(ps, mark, op_node_kind) + end + elseif !is_unary_op(op_k) + emit_diagnostic(error="expected a unary operator") + else + bump(ps, op_tok_flags) + parse_unary(ps) + emit(ps, mark, op_node_kind) + end end # handle ^ and .^ @@ -714,8 +879,6 @@ end # # flisp: parse-factor function parse_factor(ps::ParseState) - # FIXME!! - bumpTODO(ps) ; return mark = position(ps) parse_unary_prefix(ps) parse_factor_with_initial_ex(ps, mark) @@ -723,9 +886,9 @@ end # flisp: parse-factor-with-initial-ex function parse_factor_with_initial_ex(ps::ParseState, mark) - TODO("parse_factor_with_initial_ex unimplemented") - parse_call_with_initial_ex(ps, mark) - parse_decl_with_initial_ex(ps, mark) + # FIXME + #parse_call_with_initial_ex(ps, mark) + #parse_decl_with_initial_ex(ps, mark) if is_prec_power(peek(ps)) bump(ps) parse_factor_after(ps) @@ -850,7 +1013,7 @@ end # flisp: disallow-space function bump_disallowed_space(ps) if peek_token(ps).had_whitespace - bump_trivia(ps, skip_newlines=false, error="whitespace is disallowed here") + bump_trivia(ps, skip_newlines=false, error="whitespace is not allowed here") end end @@ -1046,77 +1209,19 @@ end # like parse-arglist, but with `for` parsed as a generator # -# flisp: (define (parse-call-arglist s closer) +# flisp: parse-call-arglist function parse_call_arglist(ps::ParseState, closer) - parse_arglist(ParseState(ps, for_generator=true), closer) -end + ps = ParseState(ps, for_generator=true) -# Handle function call argument list, or any comma-delimited list. -# * an extra comma at the end is allowed -# * expressions after a ; are enclosed in (parameters ...) -# * an expression followed by ... becomes (... x) -# -# flisp: parse-arglist -function parse_arglist(ps0::ParseState, closing_kind, equals_is_kw=true) - ps = ParseState(ps0, range_colon_enabled=true, - space_sensitive=false, - where_enabled=true, - whitespace_newline=true) - params_mark = 0 - while true - bump_trivia(ps) - k = peek(ps) - if k == closing_kind - break - elseif k == K";" - # Start of "parameters" list - # a, b; c d) ==> a b (parameters c d) - if params_mark != 0 - # a, b; c d; e f) ==> a b (parameters c d (parameters e f)) - TODO("nested parameters") - end - params_mark = position(ps) - equals_is_kw = true - bump(ps, TRIVIA_FLAG) - bump_trivia(ps) - else - prefix_mark = position(ps) - parse_eq_star(ps, equals_is_kw) - t = peek_token(ps, skip_newlines=true) - k = kind(t) - bump_trivia(ps) # FIXME! Handle EndMarker in all such bump_trivia()'s - if k == K"," - bump(ps, TRIVIA_FLAG) - elseif k == K";" || k == closing_kind - # Handled above - continue - elseif k == K"for" - if !t.had_whitespace - bump_invisible(ps, K"error", - error="expected whitespace before for") - end - bump(ps, TRIVIA_FLAG) - parse_generator(ps, prefix_mark) - else - k_str = untokenize(k) - ck_str = untokenize(closing_kind) - if k in (K"]", K"}") - emit_diagnostic(ps, error="unexpected `$k_str` in argument list") - else - emit_diagnostic(ps, error="missing comma or $ck_str in argument list") - end - # Recovery done after loop - break - end - end - end - if params_mark != 0 - emit(ps, params_mark, K"parameters") + parse_brackets(ps, closer) do _, _, _ + bump_closing_token(ps, closer) + return (needs_parameters=true, + eq_is_kw_before_semi=true, + eq_is_kw_after_semi=true) end - bump_closing_token(ps, closing_kind) end -# flisp: (define (parse-vect s first closer) +# flisp: parse-vect function parse_vect(ps::ParseState, first, closer) TODO("parse_vect unimplemented") end @@ -1163,45 +1268,7 @@ function parse_paren(ps::ParseState, check_identifiers=true) end # Parse un-prefixed parenthesized syntax. This is hard because parentheses are -# *very* overloaded! Possible forms: -# -# Parentheses used for grouping -# (a * b) ==> (call * a b) -# (a=1) ==> (= a 1) -# (x) ==> (x) -# -# Block syntax -# (a=1; b=2) ==> (block (= a 1) (= b 2)) -# (a=1;) ==> (block (= a 1)) -# (;;) ==> (block) -# -# Tuple syntax -# (a,) ==> (tuple a) -# (a,b) ==> (tuple a b) -# -# Named tuple syntax -# (a=1, b=2) ==> (tuple (= a 1) (= b 2)) -# (; a=1) ==> (tuple (parameters (kw a 1))) -# (;) ==> (tuple (parameters)) -# -# Generators -# (i for i in 1:10) -# -# Frankentuples and nested parameters -# (; a=1; b=2) ==> (tuple (parameters (kw a 1) (parameters (kw b 2)))) -# (a=1, b=2; c=3, d=4) ==> (tuple (= a 1) (= b 2) (parameters (kw c 3) (parameters (kw d 4)))) -# -# The worst ambiguity is with messes like the following: -# (a;b=1;c;d) => a block with (= b 1) vs -# (a;b=1;c;d, e) => nested parameters with (kw b 1) -# -# This is such a mess (and furthermore, a largely invalid syntactic -# edgecase)... maybe we should just assume it's a block and deal with it via -# backtracking? -# -# To add to this, we've got to deal with prefixed parenthesized syntax... -# -# Return true iff this isn't just a parenthesized expr. +# *very* overloaded! # # flisp: parse-paren- function parse_paren_(ps0::ParseState, check_identifiers) @@ -1218,7 +1285,6 @@ function parse_paren_(ps0::ParseState, check_identifiers) # () ==> (tuple) bump(ps, TRIVIA_FLAG) emit(ps, mark, K"tuple") - return true elseif is_syntactic_operator(k) # allow :(=) etc in unchecked contexts, eg quotes # :(=) ==> (quote =) @@ -1228,108 +1294,163 @@ function parse_paren_(ps0::ParseState, check_identifiers) bump(ps) end bump_closing_token(ps, K")") - return false elseif !check_identifiers && k == K"::" && peek(ps, 2, skip_newlines=true) == K")" # allow :(::) as a special case # :(::) ==> (quote ::) bump(ps) bump(ps, TRIVIA_FLAG, skip_newlines=true) - return false - elseif k == K";" - # Tuples and empty blocks - if peek(ps, 2) == K";" - # (;;) ==> (block) - parse_paren_block(ps) + else + # Deal with all other cases of tuple or block syntax via the generic + # parse_brackets + initial_semi = peek(ps) == K";" + is_tuple = false + is_block = false + parse_brackets(ps, K")") do had_commas, num_semis, num_subexprs + is_tuple = had_commas || + (initial_semi && (num_semis == 1 || num_subexprs > 0)) + is_block = num_semis > 0 bump_closing_token(ps, K")") - emit(ps, mark, K"block") - return false + return (needs_parameters=is_tuple, + eq_is_kw_before_semi=false, + eq_is_kw_after_semi=is_tuple) end - # Named tuple - # (;) ==> (tuple (parameters)) - # (; a=1) ==> (tuple (parameters (kw a 1))) - parse_arglist(ps, K")") - # FIXME Nested parameters - # (; a=1; b=2) - emit(ps, mark, K"tuple") - return true - else - # Here we parse the first subexpression separately, so - # we can look for a comma to see if it's a tuple. - # This lets us distinguish (x) from (x,) - parse_eq_star(ps) - k = peek(ps) - if k == K")" - # value in parentheses - # (x) ==> x - bump(ps, TRIVIA_FLAG) - # FIXME: return true if it has the ... suffix :-/ - return false - elseif k == K"," - # Tuple syntax - # (x,) ==> (tuple x) - # (x,y) ==> (tuple x y) - # (x=1,y=2) ==> (tuple (= x 1) (y 2)) - # Frankentuple syntax + if is_tuple + # Tuple syntax with commas + # (x,) ==> (tuple x) + # (x,y) ==> (tuple x y) + # (x=1, y=2) ==> (tuple (= x 1) (= y 2)) + # + # Named tuple with initial semicolon + # (;) ==> (tuple (parameters)) + # (; a=1) ==> (tuple (parameters (kw a 1))) + # + # Extra credit: nested parameters and frankentuples + # (; a=1; b=2) ==> (tuple (parameters (kw a 1) (parameters (kw b 2)))) + # (a; b; c,d) ==> (tuple a (parameters b (parameters c d))) # (a=1, b=2; c=3) ==> (tuple (= a 1) (= b 2) (parameters (kw c 3))) - bump(ps, TRIVIA_FLAG) - parse_arglist(ps, K")", false) emit(ps, mark, K"tuple") - return false - elseif k == K";" - # In normal valid Julia code this is block syntax - # (a;b;;c) ==> (block a b c) - parse_paren_block(ps) - bump_closing_token(ps, K")") - # TODO: - # consider `(x...; ` the start of an arglist, since it's not useful as a block + elseif is_block + # Blocks + # (;;) ==> (block) + # (a=1;) ==> (block (= a 1)) + # (a;b;;c) ==> (block a b c) + # (a=1; b=2) ==> (block (= a 1) (= b 2)) emit(ps, mark, K"block") - elseif k == K"for" - if !peek_token(ps).had_whitespace - bump_invisible(ps, K"error", - error="expected whitespace before for") - end - bump(ps, TRIVIA_FLAG) - parse_generator(ps, after_paren_mark) - bump_closing_token(ps, K")") - return false - else - bump_closing_token(ps, K")") - return true end end end -# Parse (the remainder of) a parenthesized block -# (a;b;c) ==> (block a b c) -# (;;) ==> (block) +# Handle bracketed syntax inside any of () [] or {} where there's a mixture +# of commas and semicolon delimiters. +# +# This is hard because there's various ambiguities depending on context. +# In general (X; Y) is difficult when X and Y are subexpressions possibly +# containing `,` and `=`. +# +# For example, (a=1; b=2) could be seen to parse four different ways! +# +# Function args: (kw a 1) (parameters (kw b 2)) +# Tuple-like: (= a 1) (parameters (kw b 2)) +# Block: (= a 1) (= b 2) +# [] vect-like: (= a 1) (parameters (= b 2)) +# +# Expressions (X; Y; Z) with more semicolons are also allowed by the flisp +# parser and generally parse as nested parameters blocks. This is invalid Julia +# syntax so the parse tree is pretty strange in these cases! Some macros +# probably use it though. Example: +# +# (a,b=1; c,d=2; e,f=3) ==> (tuple a (= b 1) (parameters c (kw d 2) (parameters e (kw f 3)) +# +# Deciding which of these representations to use depends on both the prefix +# context and the contained expressions. To distinguish between blocks vs +# tuples we use the presence of `,` within the `;`-delimited sections: If +# there's commas, it's a tuple, otherwise a block. # -# Also parse nested parameters lists in the same way that the flisp parser -# does (TODO) -# (a=1; b=2; c=3,d=4) ==> (tuple (= a 1) (parameters (kw b 2) (parameters (kw c 3) (kw d 4)))) -function parse_paren_block(ps) - need_delim = true +function parse_brackets(after_parse::Function, + ps::ParseState, closing_kind) + ps = ParseState(ps, range_colon_enabled=true, + space_sensitive=false, + where_enabled=true, + whitespace_newline=true) + params_marks = Int[] + eq_positions = Int[] + last_eq_before_semi = 0 + num_subexprs = 0 + num_semis = 0 + had_commas = false while true + bump_trivia(ps) k = peek(ps) - if k == K";" - bump(ps, TRIVIA_FLAG) - need_delim = false - continue - elseif is_closing_token(ps, k) - break - elseif k == K"," - # (a;b;c,d) ==> (tuple a (parameters b (parameters c d))) - # - # It's not clear this nested representation was an - # intentional choice in the flisp parser. It seems kind of - # awful! - TODO("Backtrack and parse as a nested parameters block") + if k == closing_kind break + elseif k == K";" + # Start of "parameters" list + # a, b; c d ==> a b (parameters c d) + push!(params_marks, position(ps)) + if num_semis == 0 + last_eq_before_semi = length(eq_positions) + end + num_semis += 1 + bump(ps, TRIVIA_FLAG) + bump_trivia(ps) else - if need_delim - break # ) or error; we'll deal with it in the caller. + num_subexprs += 1 + mark = position(ps) + eq_pos = parse_eq_star(ps) + if eq_pos != NO_POSITION + push!(eq_positions, eq_pos) end - parse_eq_star(ps) - need_delim = true + t = peek_token(ps, skip_newlines=true) + k = kind(t) + bump_trivia(ps) + if k == K"," + had_commas = true + bump(ps, TRIVIA_FLAG) + elseif k == K";" || k == closing_kind + # Handled above + continue + elseif k == K"for" + # Generator syntax + # (i for i in 1:10) + if !t.had_whitespace + bump_invisible(ps, K"error", + error="expected whitespace before for") + end + bump(ps, TRIVIA_FLAG) + parse_generator(ps, mark) + else + k_str = untokenize(k) + ck_str = untokenize(closing_kind) + if is_closing_token(ps, k) + emit_diagnostic(ps, error="unexpected `$k_str` in bracketed list") + else + emit_diagnostic(ps, error="missing comma or $ck_str in bracketed list") + end + # Recovery done after loop + break + end + end + end + actions = after_parse(had_commas, num_semis, num_subexprs) + if num_semis == 0 + last_eq_before_semi = length(eq_positions) + end + # Turn any K"=" into K"kw" as necessary + if actions.eq_is_kw_before_semi + # f(a=1) ==> (call f (kw a 1)) + for i=1:last_eq_before_semi + reset_node!(ps, eq_positions[i], kind=K"kw") + end + end + if actions.eq_is_kw_after_semi + for i = last_eq_before_semi+1:length(eq_positions) + reset_node!(ps, eq_positions[i], kind=K"kw") + end + end + # Emit nested parameter nodes if necessary + if actions.needs_parameters + for mark in Iterators.reverse(params_marks) + emit(ps, mark, K"parameters") end end end @@ -1658,7 +1779,7 @@ function parse_docstring(ps::ParseState, down=parse_eq) end end if is_doc - reset_token!(ps, atdoc_mark, kind=K"core_@doc") + reset_node!(ps, atdoc_mark, kind=K"core_@doc") down(ps) emit(ps, mark, K"macrocall") end diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index 1998a52dc3576..09c4c0a5086e4 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -37,6 +37,10 @@ flags(node::GreenNode{SyntaxHead}) = head(node).flags isinfix(node) = isinfix(head(node)) +# Value of an error node with no children +struct ErrorVal +end + #------------------------------------------------------------------------------- # AST interface, built on top of raw tree @@ -54,9 +58,6 @@ mutable struct SyntaxNode val::Any end -struct ErrorVal -end - Base.show(io::IO, ::ErrorVal) = printstyled(io, "✘", color=:light_red) function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, position::Integer=1) @@ -69,6 +70,9 @@ function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, position::In # strings. Maybe this is good. Maybe not. if k == K"Integer" val = Base.parse(Int, val_str) + elseif k == K"Float" + # FIXME: Other float types! + val = Base.parse(Float64, val_str) elseif k == K"Identifier" val = Symbol(val_str) elseif k == K"VarIdentifier" @@ -101,7 +105,8 @@ function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, position::In cs = SyntaxNode[] pos = position for (i,rawchild) in enumerate(children(raw)) - if !istrivia(rawchild) + # FIXME: Allowing trivia iserror nodes here corrupts the tree layout. + if !istrivia(rawchild) || iserror(rawchild) push!(cs, SyntaxNode(source, rawchild, pos)) end pos += rawchild.span @@ -121,6 +126,9 @@ function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, position::In end end +iserror(node::SyntaxNode) = iserror(node.raw) +istrivia(node::SyntaxNode) = istrivia(node.raw) + head(node::SyntaxNode) = node.head haschildren(node::SyntaxNode) = node.head !== :leaf @@ -159,7 +167,11 @@ end function _show_syntax_node_sexpr(io, node) if !haschildren(node) - print(io, repr(node.val)) + if iserror(node) + print(io, "(error)") + else + print(io, repr(node.val)) + end else print(io, "($(_kind_str(kind(node.raw))) ") first = true @@ -280,7 +292,12 @@ function _to_expr(node::SyntaxNode) line_node = source_location(LineNumberNode, node.source, node.position) args[1] = _macroify_name(args[1]) insert!(args, 2, line_node) - elseif head(node) == :call || head(node) == :tuple + elseif head(node) == :call + if length(args) > 1 && Meta.isexpr(args[end], :parameters) + insert!(args, 2, args[end]) + pop!(args) + end + elseif head(node) == :tuple || head(node) == :parameters if length(args) > 1 && Meta.isexpr(args[end], :parameters) pushfirst!(args, args[end]) pop!(args) diff --git a/JuliaSyntax/src/tokens.jl b/JuliaSyntax/src/tokens.jl index 1fa882c8c709d..4b9c3010aa7b6 100644 --- a/JuliaSyntax/src/tokens.jl +++ b/JuliaSyntax/src/tokens.jl @@ -60,13 +60,6 @@ is_prec_pipe_gt(t) = kind(t) == K"|>" end =# -# Operators which are boty unary and binary -function is_both_unary_and_binary(t) - # TODO: Do we need to check dotop as well here? - kind(t) in (K"$", K"&", K"~", # <- dotop disallowed? - K"+", K"-", K"⋆", K"±", K"∓") # dotop allowed -end - function is_number(t) kind(t) in (K"Integer", K"BinInt", K"HexInt", K"OctInt", K"Float") end diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 9ffda895057df..3d9851ae6dc20 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -50,15 +50,16 @@ tests = [ "a;;;b;;" => "(toplevel :a :b)" ], JuliaSyntax.parse_cond => [ - "a ? b : c" => "(if :a :b :c)" + "a ? b : c" => "(if :a :b :c)" + "a ?\nb : c" => "(if :a :b :c)" + "a ? b :\nc" => "(if :a :b :c)" + "a ? b : c:d" => "(if :a :b (call :(:) :c :d))" # Following are errors but should recover - "a? b : c" => "(if :a :b :c)" - "a ?b : c" => "(if :a :b :c)" - "a ? b: c" => "(if :a :b :c)" - "a ? b :c" => "(if :a :b :c)" - "a ? b c" => "(if :a :b :c)" - #"a ?\nb : c" => "(if :a :b :c)" - #"a ? b :\nc" => "(if :a :b :c)" + "a? b : c" => "(if :a (error) :b :c)" + "a ?b : c" => "(if :a (error) :b :c)" + "a ? b: c" => "(if :a :b (error) :c)" + "a ? b :c" => "(if :a :b (error) :c)" + "a ? b c" => "(if :a :b (error) :c)" ], JuliaSyntax.parse_arrow => [ "x → y" => "(call :→ :x :y)" @@ -93,7 +94,6 @@ tests = [ JuliaSyntax.parse_range => [ "a..b" => "(call :.. :a :b)" "a … b" => "(call :… :a :b)" - # a ? b : c:d ==> (if a b (call-i c : d)) # [1 :a] ==> (vcat 1 (quote a)) # [1 2:3 :a] ==> (vcat 1 (call-i 2 : 3) (quote a)) "x..." => "(... :x)" @@ -113,13 +113,38 @@ tests = [ ], JuliaSyntax.parse_term => [ "a * b * c" => "(call :* :a :b :c)" + # For parse_unary + "-2*x" => "(call :* -2 :x)" ], JuliaSyntax.parse_juxtapose => [ - "2x" => "(call :* 2 :x)" + "2x" => "(call :* 2 :x)" + "2x" => "(call :* 2 :x)" + "2(x)" => "(call :* 2 :x)" + "(2)(3)x" => "(call :* 2 3 :x)" + "(x-1)y" => "(call :* (call :- :x 1) :y)" + # errors + "\"a\"\"b\"" => "(call :* \"a\" (error) \"b\")" + "\"a\"x" => "(call :* \"a\" (error) :x)" + ], + JuliaSyntax.parse_unary => [ + "+2" => "2" + "-2^x" => "(call :- (call :^ 2 :x))" + # -2[1, 3] ==> (call - (ref 2 1 3)) + ], + JuliaSyntax.parse_unary_call => [ + ".+" => "(. :+)" + "+)" => ":+" + # Function calls: + "+(a,b)" => "(call :+ :a :b)" + "+(a=1,)" => "(call :+ (kw :a 1))" + # Not function calls: + "+(a;b)" => "(call :+ (block :a :b))" + "+(a=1)" => "(call :+ (= :a 1))" + "+(a;b,c)" => "(call :+ :a (parameters :b :c))" ], JuliaSyntax.parse_decl => [ - #"a::b" => "(:: a b)" - #"a->b" => "(-> a b)" + #"a::b" => "(:: :a :b)" + #"a->b" => "(-> :a :b)" ], JuliaSyntax.parse_unary_prefix => [ #"&a" => "(& :a)" @@ -133,20 +158,22 @@ tests = [ "(a * b)" => "(toplevel (call :* :a :b))" "(a=1)" => "(toplevel (= :a 1))" "(x)" => "(toplevel :x)" + # Tuple syntax with commas + "(x,)" => "(tuple :x)" + "(x,y)" => "(tuple :x :y)" + "(x=1, y=2)" => "(tuple (= :x 1) (= :y 2))" + # Named tuples with initial semicolon + "(;)" => "(tuple (parameters ))" + "(; a=1)" => "(tuple (parameters (kw :a 1)))" + # Extra credit: nested parameters and frankentuples + "(; a=1; b=2)" => "(tuple (parameters (kw :a 1) (parameters (kw :b 2))))" + "(a; b; c,d)" => "(tuple :a (parameters :b (parameters :c :d)))" + "(a=1, b=2; c=3)" => "(tuple (= :a 1) (= :b 2) (parameters (kw :c 3)))" # Block syntax - "(a=1; b=2)" => "(block (= :a 1) (= :b 2))" - "(a=1;)" => "(block (= :a 1))" "(;;)" => "(block )" - # Tuple syntax - "(a,)" => "(tuple :a)" - "(a,b)" => "(tuple :a :b)" - # Named tuple syntax - "(a=1, b=2)" => "(tuple (= :a 1) (= :b 2))" - "(; a=1)" => "(tuple (parameters (kw :a 1)))" - "(;)" => "(tuple (parameters ))" - # Franken tuple "syntax" - "(a=1, b=2; c=3)" => "(tuple (= :a 1) (= :b 2) (parameters (kw :c 3)))" - # "(a=1, b=2; c=3; d=4)" # BROKEN! + "(a=1;)" => "(block (= :a 1))" + "(a;b;;c)" => "(block :a :b :c)" + "(a=1; b=2)" => "(block (= :a 1) (= :b 2))" ], JuliaSyntax.parse_atom => [ ":foo" => "(quote :foo)" @@ -163,7 +190,7 @@ tests = [ # __dot__ macro "@. x y" => "(macrocall :__dot__ :x :y)" # Errors - ": foo" => "(quote :foo)" + ": foo" => "(quote (error ) :foo)" ], JuliaSyntax.parse_docstring => [ "\"doc\" foo" => "(macrocall :(Core.var\"@doc\") \"doc\" :foo)" diff --git a/JuliaSyntax/test/runtests.jl b/JuliaSyntax/test/runtests.jl index d756c6acf2f26..77cd998df0d81 100644 --- a/JuliaSyntax/test/runtests.jl +++ b/JuliaSyntax/test/runtests.jl @@ -1,6 +1,8 @@ using JuliaSyntax using Test +using Base.Meta: @dump + using JuliaSyntax: SourceFile using JuliaSyntax: GreenNode, SyntaxNode, From b3007010bfbe7af5b587885952e1b9b6493217a4 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Fri, 17 Dec 2021 05:57:27 +1000 Subject: [PATCH 0245/1109] Some work on parse_call_chain and macro parsing --- JuliaSyntax/src/parser.jl | 137 +++++++++++++++++++++++++++++--------- 1 file changed, 106 insertions(+), 31 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 8bb0295834058..142ebeba3d7b4 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -29,6 +29,13 @@ function bump_closing_token(ps, closing_kind) end end +# flisp: disallow-space +function bump_disallowed_space(ps) + if peek_token(ps).had_whitespace + bump_trivia(ps, skip_newlines=false, error="whitespace is not allowed here") + end +end + function TODO(str) error("TODO: $str") end @@ -108,10 +115,10 @@ end # operators handled by parse_unary at the start of an expression function is_initial_operator(k) k = kind(k) - # TODO(jb): ? should probably not be listed here except for the syntax hack in osutils.jl + # TODO(jb): `?` should probably not be listed here except for the syntax hack in osutils.jl isoperator(k) && !(k in (K":", K"'", K".'", K"?")) && - !is_syntactic_unary_op(k) && + !is_syntactic_unary_op(k) && !is_syntactic_operator(k) end @@ -131,8 +138,6 @@ end # # This is to make both codebases mutually understandable and make porting # changes simple. -# -# The implementation using the ParseStream interface # parse left-to-right binary operator @@ -848,8 +853,7 @@ function parse_unary_call(ps::ParseState) end # 2. The precedence between unary + and any following infix ^ depends - # on whether the parens are a function call or not: - # + # on whether the parens are a function call or not if is_call # Prefix operator call # +(a,b)^2 ==> (call-i (call + a b) ^ 2) @@ -1010,13 +1014,6 @@ function parse_def(ps::ParseState, is_func, anon) end end -# flisp: disallow-space -function bump_disallowed_space(ps) - if peek_token(ps).had_whitespace - bump_trivia(ps, skip_newlines=false, error="whitespace is not allowed here") - end -end - # string macro suffix for given delimiter t # # flisp: (define (macsuffix t) @@ -1026,33 +1023,108 @@ end # flisp: (define (parse-call-chain s ex macrocall?) function parse_call_chain(ps::ParseState, mark, is_macrocall) - bumpTODO(ps); return - TODO("parse_call_chain") - #= while true t = peek_token(ps) k = kind(t) if (ps.space_sensitive && t.had_whitespace && - k in (K"(", K"[", K"{", K"'", K"\"", K"\\")) || - (is_number(k) && k == K"(") - # 2(...) is multiply, not call - # FIXME: Is this `break` correct ? + # TODO: Is `'` adjoint or Char here? + k in (K"(", K"[", K"{", K"\\", K"'", K"Char", K"String", K"TripleString")) || + (is_number(k) && k == K"(") # 2(...) is multiply, not call break end if k == K"(" - bump_disallowed_space(ps, t) + bump_disallowed_space(ps) bump(ps, TRIVIA_FLAG) - parse_call_arglist(ps, K")") + # Parse `a=b` as `=` instead of `kw` in macrocall + # @foo(a=1, b=2) ==> (macrocall @foo (= a 1) (= b 2)) + parse_call_arglist(ps, K")", is_macrocall) + emit(ps, mark, is_macrocall ? K"macrocall" : K"call") + if peek(ps) == K"do" + bump(ps, TRIVIA_FLAG) + parse_do(ps) + emit(ps, mark, K"do") + end + if is_macrocall + break + end elseif k == K"[" + bump_disallowed_space(ps) + bump(ps, TRIVIA_FLAG) + parse_cat(ParseState(ps, end_symbol=true), K"]") + if is_macrocall + emit(ps, mark, K"macrocall") + break + end + # ref is syntax, so we can distinguish + # a[i] = x from + # ref(a,i) = x + # + # TODO: Big list of rewrites + # + # vect -> ref + # hcat -> typed_hcat + # vcat -> typed_vcat + # comprehension -> typed_comprehension + # ncat -> typed_ncat elseif k == K"." + bump_disallowed_space(ps) + bump(ps, TRIVIA_FLAG) + k = peek(ps) + if k == K"(" + # f.(a,b) => (. f (tuple a b)) + # keyword params always use kw here; different from normal tuple: + # f.(a=1) => (. f (tuple (kw a 1))) + bump_disallowed_space(ps) + m = position(ps) + bump(ps, TRIVIA_FLAG) + parse_call_arglist(ps, K")", is_macrocall) + emit(ps, m, K"tuple") + emit(ps, mark, K".") + elseif k == K":" + # f.:x ==> (. f (quote x)) + m = position(ps) + bump(ps, TRIVIA_FLAG) + bump_disallowed_space(ps) + parse_atom(ps, false) + emit(ps, m, K"quote") + emit(ps, mark, K".") + elseif k == K"$" + # f.$x ==> (. f (quote ($ x))) + # f.$(x+y) ==> (. f (quote ($ (call + x y)))) + m = position(ps) + bump(ps, TRIVIA_FLAG) + parse_atom(ps) + emit(ps, m, K"$") + emit(ps, m, K"quote") # TODO: why does flisp uses K"inert" here? + emit(ps, mark, K".") + elseif k == K"@" + # A macro call + # A.@x ==> (macrocall (. A x)) + # A.@x a ==> (macrocall (. A x) a) + # + # Somewhat strangely the `@` in macrocall is allowed in any position + # A.B.@x ==> (macrocall ((. A B) x)) + # A.@B.x ==> (macrocall ((. A B) x)) + is_macrocall = true + m = position(ps) + bump(ps, TRIVIA_FLAG) # @ + parse_atom(ps, false) + emit(ps, m, K"quote") + emit(ps, mark, K".") + else + # f.x ==> (. f x) + m = position(ps) + parse_atom(ps, false) + emit(ps, m, K"quote") + emit(ps, mark, K".") + end elseif k == K"'" elseif k == K"{" - elseif k in (K"\"", K"`") + elseif k in (K"String", K"TripleString", K"Cmd", K"TripleCmd") else break end end - =# end # flisp: (define (expect-end s word) @@ -1210,13 +1282,13 @@ end # like parse-arglist, but with `for` parsed as a generator # # flisp: parse-call-arglist -function parse_call_arglist(ps::ParseState, closer) +function parse_call_arglist(ps::ParseState, closer, is_macrocall) ps = ParseState(ps, for_generator=true) parse_brackets(ps, closer) do _, _, _ bump_closing_token(ps, closer) return (needs_parameters=true, - eq_is_kw_before_semi=true, + eq_is_kw_before_semi=!is_macrocall, eq_is_kw_after_semi=true) end end @@ -1667,11 +1739,10 @@ function parse_atom(ps::ParseState, check_identifiers=true) elseif is_string(leading_kind) bump(ps) # FIXME parse_string_literal(ps) - elseif leading_kind == K"@" + elseif leading_kind == K"@" # macro call bump(ps, TRIVIA_FLAG) with_space_sensitive(ps) do ps is_doc_macro = peek_equal_to(ps, "doc") - # FIXME: The following syntactic oddity is also allowed 😱 # @Mod.mac a b ==> (macrocall (. Mod mac) a b) # see macroify-name parse_macro_name(ps) @@ -1690,22 +1761,26 @@ function parse_atom(ps::ParseState, check_identifiers=true) parse_eq(ps) end end + emit(ps, atom_mark, K"macrocall") else # Parenthesized macro arguments # @mac(a,b) ==> (macrocall mac a b) - TODO("Parenthesized macro arguments") + parse_call_chain(ps, atom_mark, true) end - emit(ps, atom_mark, K"macrocall") end elseif leading_kind in (K"Cmd", K"TripleCmd") bump_invisible(ps, K"core_@cmd") + bump(ps) emit(ps, atom_mark, K"macrocall") elseif isliteral(leading_kind) bump(ps) elseif is_closing_token(ps, leading_kind) # Leave closing token in place for other productions to # recover with (??) - emit_diagnostic(ps, error="Unexpected closing token") + msg = leading_kind == K"EndMarker" ? + "premature end of input" : + "unexpected closing token" + emit_diagnostic(ps, error=msg) else bump(ps, error="Invalid syntax") end From f9182be7a034954078f63f348dee8cf79098bc81 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Sat, 18 Dec 2021 05:32:28 +1000 Subject: [PATCH 0246/1109] More fixes for parse_call_chain * More robust processing of macro module path * Parsing of string macros * Parsing of curlies * ParseStreamPosition type for representing both the read and write heads of the stream. --- JuliaSyntax/README.md | 36 +++- JuliaSyntax/src/parse_stream.jl | 60 ++++--- JuliaSyntax/src/parser.jl | 287 +++++++++++++++++++++++--------- JuliaSyntax/src/syntax_tree.jl | 36 +++- JuliaSyntax/src/token_kinds.jl | 9 +- JuliaSyntax/test/parser.jl | 75 +++++++-- 6 files changed, 372 insertions(+), 131 deletions(-) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index fe59e3d18c6b3..cecc556823e73 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -290,9 +290,13 @@ f(a, ## Fun research questions -* Given the raw tree (the green tree, in Roslyn terminology) can we regress a - model of indentiation? Such that formatting rules for new code is defined - implicitly by a software project's existing style? +* Given source and syntax tree, can we regress/learn a generative model of + indentiation from the syntax tree? Source formatting involves a big pile of + heuristics to get something which "looks nice"... and ML systems have become + very good at heuristics. Also, we've got huge piles of traininig data — just + choose some high quality, tastefully hand-formatted libraries. + +* Similarly, can we learn fast and reasonably accurate recovery heuristics? # Resources @@ -390,3 +394,29 @@ Some resources: - [From Bob Nystrom (munificent - one of the Dart devs, etc](http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/) - Some discussion of error recovery +## Flisp parser oddities and bugs + +```julia +# Operator prefix call syntax doesn't work in some cases (tuple is produced) ++(a;b,c) + +# Inconsistent parsing of tuple keyword args inside vs outside of dot calls +(a=1,) # (tuple (= a 1)) +f.(a=1) # (tuple (kw a 1)) + +# Mixutres of , and ; in calls give nested parameter AST which parses strangely +# and is kind-of-horrible to use. +# (tuple (parameters (parameters e f) c d) a b) +(a,b; c,d; e,f) + +# Misplaced @ in macro module paths is parsed but produces odd AST +# (macrocall (. A (quote (. B @x)))) +# Should be rejected, or produce (macrocall (. (. A (quote B)) (quote @x))) +A.@B.x + +# Lookup for macro module path allows bizarre syntax and stateful semantics! +b() = rand() > 0.5 ? Base : Core +b().@info "hi" +``` + +Many inconsistencies between `kw` and `=` diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index b7a261cb08012..08848b0e98e40 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -84,6 +84,13 @@ function show_diagnostic(io::IO, diagnostic::Diagnostic, code) print(io, code[nextind(code, q):end], '\n') end +struct ParseStreamPosition + input_byte::Int # Index of next byte in input + output_index::Int # Index of last span in output +end + +const NO_POSITION = ParseStreamPosition(0,0) + #------------------------------------------------------------------------------- """ ParseStream provides an IO interface for the parser. It @@ -223,7 +230,7 @@ end """ Return the kind of the previous non-trivia span which was inserted. -This is a bit hacky but can be handy on occasion. +Looking backward is a bit hacky but can be handy on occasion. """ function peek_behind(stream::ParseStream) for i = length(stream.spans):-1:1 @@ -237,6 +244,8 @@ end #------------------------------------------------------------------------------- # Stream output interface - the `bump_*` and `emit_*` family of functions +# +# Though note bump() really does both input and output # Bump the next `n` tokens # flags and new_kind are applied to any non-trivia tokens @@ -263,10 +272,10 @@ function _bump_n(stream::ParseStream, n::Integer, flags, new_kind=K"Nothing") end """ - bump(stream [, flags=EMPTY_FLAGS]) + bump(stream [, flags=EMPTY_FLAGS]; + skip_newlines=false, error, new_kind) -Shift the current token into the output as a new text span with the given -`flags`. +Shift the current token from the input to the output, adding the given flags. """ function bump(stream::ParseStream, flags=EMPTY_FLAGS; skip_newlines=false, error=nothing, new_kind=K"Nothing") @@ -275,8 +284,8 @@ function bump(stream::ParseStream, flags=EMPTY_FLAGS; skip_newlines=false, if !isnothing(error) emit(stream, emark, K"error", TRIVIA_FLAG, error=error) end - # Return last token location in output if needed for reset_token! - return lastindex(stream.spans) + # Return last token location in output if needed for reset_node! + return position(stream) end """ @@ -290,7 +299,7 @@ function bump_trivia(stream::ParseStream; skip_newlines=true, error=nothing) if !isnothing(error) emit(stream, emark, K"error", TRIVIA_FLAG, error=error) end - return lastindex(stream.spans) + return position(stream) end """ @@ -302,7 +311,7 @@ example, `2x` means `2*x` via the juxtoposition rules. function bump_invisible(stream::ParseStream, kind, flags=EMPTY_FLAGS; error=nothing) emit(stream, position(stream), kind, flags, error=error) - return lastindex(stream.spans) + return position(stream) end """ @@ -318,7 +327,7 @@ function bump_glue(stream::ParseStream, kind, flags, num_tokens) last_byte(stream.lookahead[num_tokens])) Base._deletebeg!(stream.lookahead, num_tokens) push!(stream.spans, span) - return lastindex(stream.spans) + return position(stream) end """ @@ -333,7 +342,7 @@ function bump_split(stream::ParseStream, num_bytes, kind1, flags1, kind2, flags2 first_byte(tok), first_byte(tok)+num_bytes-1)) push!(stream.spans, TaggedRange(SyntaxHead(kind2, flags2), first_byte(tok)+num_bytes, last_byte(tok))) - nothing + nothing # position(stream) is ambiguous here, as it involves two spans end """ @@ -343,19 +352,17 @@ This is a hack, but necessary on some occasions * When some trailing syntax may change the kind or flags of the token * When an invisible token might be required - see bump_invisible with K"TOMBSTONE" """ -function reset_node!(stream::ParseStream, omark; +function reset_node!(stream::ParseStream, mark::ParseStreamPosition; kind=nothing, flags=nothing) - text_span = stream.spans[omark] + text_span = stream.spans[mark.output_index] k = isnothing(kind) ? (@__MODULE__).kind(text_span) : kind f = isnothing(flags) ? (@__MODULE__).flags(text_span) : flags - stream.spans[omark] = TaggedRange(SyntaxHead(k, f), - first_byte(text_span), last_byte(text_span)) + stream.spans[mark.output_index] = + TaggedRange(SyntaxHead(k, f), first_byte(text_span), last_byte(text_span)) end -const NO_POSITION = 0 - function Base.position(stream::ParseStream) - return stream.next_byte + ParseStreamPosition(stream.next_byte, lastindex(stream.spans)) end """ @@ -365,14 +372,14 @@ Emit a new text span into the output which covers source bytes from `mark` to the end of the most recent token which was `bump()`'ed. The starting `mark` should be a previous return value of `position()`. """ -function emit(stream::ParseStream, start_mark::Integer, kind::Kind, +function emit(stream::ParseStream, mark::ParseStreamPosition, kind::Kind, flags::RawFlags = EMPTY_FLAGS; error=nothing) - text_span = TaggedRange(SyntaxHead(kind, flags), start_mark, stream.next_byte-1) + text_span = TaggedRange(SyntaxHead(kind, flags), mark.input_byte, stream.next_byte-1) if !isnothing(error) push!(stream.diagnostics, Diagnostic(text_span, error)) end push!(stream.spans, text_span) - return lastindex(stream.spans) + return position(stream) end """ @@ -380,6 +387,8 @@ Emit a diagnostic at the position of the next token If `whitespace` is true, the diagnostic is positioned on the whitespace before the next token. Otherwise it's positioned at the next token as returned by `peek()`. + +FIXME: Rename? This doesn't emit normal tokens into the output event list! """ function emit_diagnostic(stream::ParseStream, mark=nothing, end_mark=nothing; error, whitespace=false) @@ -392,14 +401,19 @@ function emit_diagnostic(stream::ParseStream, mark=nothing, end_mark=nothing; begin_tok_i = 1 end_tok_i = is_whitespace(stream.lookahead[i]) ? i : max(1, i-1) end - mark = isnothing(mark) ? first_byte(stream.lookahead[begin_tok_i]) : mark - end_mark = isnothing(end_mark) ? last_byte(stream.lookahead[end_tok_i]) : end_mark + first_byte = isnothing(mark) ? + first_byte(stream.lookahead[begin_tok_i]) : mark.input_byte + last_byte = isnothing(end_mark) ? + last_byte(stream.lookahead[end_tok_i]) : end_mark.input_byte # It's a bit weird to require supplying a SyntaxHead here... - text_span = TaggedRange(SyntaxHead(K"error", EMPTY_FLAGS), mark, end_mark) + text_span = TaggedRange(SyntaxHead(K"error", EMPTY_FLAGS), first_byte, last_byte) push!(stream.diagnostics, Diagnostic(text_span, error)) return nothing end +function emit_diagnostic(stream::ParseStream, r::NTuple{2,ParseStreamPosition}; kws...) + emit_diagnostic(stream, first(r), last(r); kws...) +end #------------------------------------------------------------------------------- # Tree construction from the list of text spans held by ParseStream diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 142ebeba3d7b4..46a623cbe0c5d 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -55,6 +55,12 @@ end # # All these take either a raw kind or a token. +function is_identifier(k) + # FIXME: use is_identifier instead of K"Identifier" and add + # other virtual identifiers like K"core_@doc" etc? + k in (K"Identifier", K"__dot__") +end + function is_closing_token(ps::ParseState, k) k = kind(k) return k in (K"else", K"elseif", K"catch", K"finally", @@ -810,7 +816,7 @@ function parse_unary_call(ps::ParseState) # Setup possible whitespace error between operator and ( ws_mark = position(ps) bump_trivia(ps) - ws_mark_end = position(ps) - 1 + ws_mark_end = position(ps) # FIXME - 1 ws_error_pos = emit(ps, ws_mark, K"TOMBSTONE") mark_before_paren = position(ps) @@ -950,7 +956,7 @@ function parse_call_with_initial_ex(ps::ParseState, mark) if is_initial_reserved_word(ps, k) || k in (K"mutable", K"primitive", K"abstract") parse_resword(ps, mark) else - parse_call_chain(ps, mark, false) + parse_call_chain(ps, mark) end end @@ -1003,7 +1009,7 @@ function parse_def(ps::ParseState, is_func, anon) else parse_unary_prefix(ps) end - parse_call_chain(ps, mark, false) + parse_call_chain(ps, mark) if is_func && peek(ps) == K"::" bump(ps, TRIVIA_FLAG) parse_call(ps) @@ -1014,16 +1020,24 @@ function parse_def(ps::ParseState, is_func, anon) end end -# string macro suffix for given delimiter t -# -# flisp: (define (macsuffix t) -function macsuffix(t) - TODO("macsuffix unimplemented") +# Emit an error if the call chain syntax is not a valid module reference +function emit_modref_error(ps, mark) + emit(ps, mark, K"error", error="not a valid module reference") end +# Parses a chain of sufficies at function call precedence, leftmost binding +# tightest. +# f(a,b) ==> (call f a b) +# f(a).g(b) ==> (call (. (call f a) (quote g)) b) +# # flisp: (define (parse-call-chain s ex macrocall?) -function parse_call_chain(ps::ParseState, mark, is_macrocall) +function parse_call_chain(ps::ParseState, mark, is_macrocall=false, is_doc_macro=false) + # source range of the @-prefixed part of a macro + macro_atname_range = nothing + is_valid_modref = peek_behind(ps) in (K"__dot__", K"Identifier") + strmacro_name_position = position(ps) # same token as peek_behind while true + this_iter_valid_modref = false t = peek_token(ps) k = kind(t) if (ps.space_sensitive && t.had_whitespace && @@ -1033,13 +1047,20 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall) break end if k == K"(" + if is_macrocall && !is_valid_modref + # a().@x(y) ==> (macrocall (error (. (call a) (quote x))) y) + emit_modref_error(ps, mark) + end + # f(a,b) ==> (call f a b) bump_disallowed_space(ps) bump(ps, TRIVIA_FLAG) - # Parse `a=b` as `=` instead of `kw` in macrocall - # @foo(a=1, b=2) ==> (macrocall @foo (= a 1) (= b 2)) + # Keyword arguments depends on call vs macrocall + # foo(a=1) ==> (call foo (kw a 1)) + # @foo(a=1) ==> (macrocall foo (= a 1)) parse_call_arglist(ps, K")", is_macrocall) emit(ps, mark, is_macrocall ? K"macrocall" : K"call") if peek(ps) == K"do" + # f(x) do y body end ==> (do (call :f :x) (-> (tuple :y) (block :body))) bump(ps, TRIVIA_FLAG) parse_do(ps) emit(ps, mark, K"do") @@ -1047,7 +1068,42 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall) if is_macrocall break end + elseif is_macrocall && (t.had_whitespace || is_closing_token(ps, k)) + if is_macrocall && !is_valid_modref + # a().@x y ==> (macrocall (error (. (call a) (quote x))) y) + emit_modref_error(ps, mark) + end + with_space_sensitive(ps) do ps + # Space separated macro arguments + # @foo a b ==> (macrocall foo a b) + # A.@foo a b ==> (macrocall (. A (quote foo)) a b) + # @A.foo a b ==> (macrocall (. A (quote foo)) a b) + n_args = parse_space_separated_exprs(ps) + if is_doc_macro && n_args == 1 + # Parse extended @doc args on next line + # @doc x\ny ==> (macrocall doc x y) + # A.@doc x\ny ==> (macrocall (. A (quote doc)) doc x y) + # @A.doc x\ny ==> (macrocall (. A (quote doc)) doc x y) + # @doc x y\nz ==> (macrocall doc x y) + # + # Excluded cases + # @doc x\n\ny ==> (macrocall doc x) + # @doc x\nend ==> (macrocall doc x) + k2 = peek(ps, 2) + if peek(ps) == K"NewlineWs" && !is_closing_token(ps, k2) && + k2 != K"NewlineWs" + bump(ps) # newline + parse_eq(ps) + end + end + emit(ps, mark, K"macrocall") + end + break elseif k == K"[" + if is_macrocall && !is_valid_modref + # a().@x[1] ==> FIXME + emit_modref_error(ps, mark) + end bump_disallowed_space(ps) bump(ps, TRIVIA_FLAG) parse_cat(ParseState(ps, end_symbol=true), K"]") @@ -1059,7 +1115,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall) # a[i] = x from # ref(a,i) = x # - # TODO: Big list of rewrites + # FIXME: Big list of rewrites # # vect -> ref # hcat -> typed_hcat @@ -1068,12 +1124,37 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall) # ncat -> typed_ncat elseif k == K"." bump_disallowed_space(ps) - bump(ps, TRIVIA_FLAG) + if peek(ps, 2) == K"'" + emark = position(ps) + bump(ps) + bump(ps) + "f.'" => "f (error . ')" + emit(ps, emark, K"error", TRIVIA_FLAG, + error="the .' operator is discontinued") + is_valid_modref = false + continue + end + if !isnothing(macro_atname_range) + # Allow `@` in macrocall only in first and last position + # A.B.@x ==> (macrocall (. (. A (quote B)) (quote x))) + # @A.B.x ==> (macrocall (. (. A (quote B)) (quote x))) + # A.@B.x ==> (macrocall (. (. A (error) B) (quote x))) + emit_diagnostic(ps, macro_atname_range, + error="`@` must appear on first or last macro name component") + bump(ps, TRIVIA_FLAG, error="Unexpected `.` after macro name") + else + bump(ps, TRIVIA_FLAG) + end k = peek(ps) if k == K"(" - # f.(a,b) => (. f (tuple a b)) - # keyword params always use kw here; different from normal tuple: - # f.(a=1) => (. f (tuple (kw a 1))) + if is_macrocall + bump_invisible(ps, K"error") + emit_diagnostic(ps, mark, + error="dot call syntax not supported for macros") + end + # Keyword params always use kw inside tuple in dot calls + # f.(a,b) ==> (. f (tuple a b)) + # f.(a=1) ==> (. f (tuple (kw a 1))) bump_disallowed_space(ps) m = position(ps) bump(ps, TRIVIA_FLAG) @@ -1081,7 +1162,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall) emit(ps, m, K"tuple") emit(ps, mark, K".") elseif k == K":" - # f.:x ==> (. f (quote x)) + # A.:+ ==> (. A (quote +)) m = position(ps) bump(ps, TRIVIA_FLAG) bump_disallowed_space(ps) @@ -1095,48 +1176,106 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall) bump(ps, TRIVIA_FLAG) parse_atom(ps) emit(ps, m, K"$") - emit(ps, m, K"quote") # TODO: why does flisp uses K"inert" here? + emit(ps, m, K"quote") emit(ps, mark, K".") + # Syntax extension: We could allow interpolations like A.$B.@C + # to parse in the module reference path. But disallow this for + # now for simplicity and for compatibility with the flisp parser. elseif k == K"@" - # A macro call + # A macro call after some prefix A has been consumed # A.@x ==> (macrocall (. A x)) # A.@x a ==> (macrocall (. A x) a) - # - # Somewhat strangely the `@` in macrocall is allowed in any position - # A.B.@x ==> (macrocall ((. A B) x)) - # A.@B.x ==> (macrocall ((. A B) x)) - is_macrocall = true m = position(ps) - bump(ps, TRIVIA_FLAG) # @ - parse_atom(ps, false) + if is_macrocall + # @A.B.@x a ==> (macrocall (. A x) a) + bump(ps, TRIVIA_FLAG, error="repeated `@` in macro module path") + else + bump(ps, TRIVIA_FLAG) + is_macrocall = true + end + is_doc_macro = parse_macro_name(ps) + macro_atname_range = (m, position(ps)) emit(ps, m, K"quote") emit(ps, mark, K".") + this_iter_valid_modref = true else - # f.x ==> (. f x) + # Field/property syntax + # f.x.y ==> (. (. f (quote x)) (quote y)) m = position(ps) + if is_macrocall + is_doc_macro = peek_equal_to(ps, "doc") + end parse_atom(ps, false) + strmacro_name_position = position(ps) emit(ps, m, K"quote") emit(ps, mark, K".") + this_iter_valid_modref = true end elseif k == K"'" + if !is_suffixed(t) + # f' ==> (' f) + bump(ps, TRIVIA_FLAG) + emit(ps, mark, k) + else + # f'ᵀ ==> (call 'ᵀ f) + bump(ps) + emit(ps, mark, K"call", INFIX_FLAG) + end elseif k == K"{" - elseif k in (K"String", K"TripleString", K"Cmd", K"TripleCmd") + # Type parameter curlies and macro calls + if is_macrocall && !is_valid_modref + # a().@x{y} ==> (macrocall (error (. (call a) (quote x))) (braces y)) + emit_modref_error(ps, mark) + end + m = position(ps) + bump_disallowed_space(ps) + bump(ps, TRIVIA_FLAG) + parse_call_arglist(ps, K"}", is_macrocall) + if is_macrocall + # @S{a,b} ==> (macrocall S (braces a b)) + emit(ps, m, K"braces") + emit(ps, mark, K"macrocall") + # Extension + #if ps.julia_version < v"1.5" + # emit(ps, mark, K"error", + # error="", min_version=v"1.5") + #end + break + else + # S{a,b} ==> (curly S a b) + emit(ps, mark, K"curly") + end + elseif k in (K"String", K"TripleString", K"Cmd", K"TripleCmd") && + !t.had_whitespace && is_valid_modref + # Custom string and command literals + # x"str" ==> (macrocall x_str "str") + # x`str` ==> (macrocall x_cmd "str") + + # Use a special token kind for string and cmd macro names so the + # names can be expanded later as necessary. + reset_node!(ps, strmacro_name_position, + kind = is_string(k) ? K"StringMacroName" : K"CmdMacroName") + bump(ps) + t = peek_token(ps) + k = kind(t) + if !t.had_whitespace && (k == K"Identifier" || iskeyword(k) || is_number(k)) + # Macro sufficies can include keywords and numbers + # x"s"y ==> (macrocall x_str "s" "y") + # x"s"end ==> (macrocall x_str "s" "end") + # x"s"2 ==> (macrocall x_str "s" 2) + # x"s"10.0 ==> (macrocall x_str "s" 10.0) + suffix_kind = (k == K"Identifier" || iskeyword(k)) ? + K"UnquotedString" : k + bump(ps, new_kind=suffix_kind) + end + emit(ps, mark, K"macrocall") else break end + is_valid_modref &= this_iter_valid_modref end end -# flisp: (define (expect-end s word) -function expect_end(s, word) - TODO("expect_end unimplemented") -end - -# flisp: (define (expect-end-error t word) -function expect_end_error(t, word) - TODO("expect_end_error unimplemented") -end - # flisp: (define (parse-subtype-spec s) function parse_subtype_spec(ps::ParseState) TODO("parse_subtype_spec unimplemented") @@ -1198,14 +1337,18 @@ end # flisp: (define (parse-macro-name s) function parse_macro_name(ps::ParseState) + is_doc_macro = false bump_disallowed_space(ps) with_space_sensitive(ps) do ps if peek(ps) == K"." bump(ps, new_kind=K"__dot__") else + # The doc in @doc is a contextural keyword + is_doc_macro = peek_equal_to(ps, "doc") parse_atom(ps, false) end end + return is_doc_macro end # flisp: (define (parse-atsym s) @@ -1444,8 +1587,8 @@ function parse_brackets(after_parse::Function, space_sensitive=false, where_enabled=true, whitespace_newline=true) - params_marks = Int[] - eq_positions = Int[] + params_marks = ParseStreamPosition[] + eq_positions = ParseStreamPosition[] last_eq_before_semi = 0 num_subexprs = 0 num_semis = 0 @@ -1704,15 +1847,17 @@ function parse_atom(ps::ParseState, check_identifiers=true) bump(ps) end else - # Being inside quote makes `end` non-special again. issue #27690 + # Being inside quote makes keywords into identifiers at at the + # first level of nesting + # :end ==> (quote end) + # :(end) ==> (quote (error end)) + # Being inside quote makes end non-special again (issue #27690) # a[:(end)] ==> (ref a (quote (error-t end))) parse_atom(ParseState(ps, end_symbol=false), false) end emit(ps, atom_mark, K"quote") elseif leading_kind == K"=" - emark = position(ps) - bump(ps) - emit(ps, emark, K"error", TRIVIA_FLAG) + bump(ps, TRIVIA_FLAG, error="unexpected `=`") elseif leading_kind == K"Identifier" bump(ps) elseif leading_kind == K"VarIdentifier" @@ -1721,14 +1866,22 @@ function parse_atom(ps::ParseState, check_identifiers=true) if !t.had_whitespace && !(isoperator(kind(t)) || is_non_keyword_closer(t)) bump(ps, error="suffix not allowed after var\"...\" syntax") end - elseif isoperator(leading_kind) || iskeyword(leading_kind) + elseif isoperator(leading_kind) # Operators and keywords are generally turned into identifiers if used # as atoms. - if check_identifiers && (is_syntactic_operator(leading_kind) || - is_closing_token(ps, leading_kind)) - bump(ps, error="Invalid identifier") + if check_identifiers && is_syntactic_operator(leading_kind) + bump(ps, error="invalid identifier") + else + bump(ps) + end + elseif iskeyword(leading_kind) + if check_identifiers && is_closing_token(ps, leading_kind) + # :(end) ==> (quote (error end)) + bump(ps, error="invalid identifier") + else + # :end ==> (quote end) + bump(ps, new_kind=K"Identifier") end - bump(ps, new_kind=K"Identifier") elseif leading_kind == K"(" # parens or tuple parse_paren(ps, check_identifiers) elseif leading_kind == K"[" # cat expression @@ -1741,33 +1894,8 @@ function parse_atom(ps::ParseState, check_identifiers=true) # FIXME parse_string_literal(ps) elseif leading_kind == K"@" # macro call bump(ps, TRIVIA_FLAG) - with_space_sensitive(ps) do ps - is_doc_macro = peek_equal_to(ps, "doc") - # @Mod.mac a b ==> (macrocall (. Mod mac) a b) - # see macroify-name - parse_macro_name(ps) - if peek_token(ps).had_whitespace - # Space separated macro arguments - # @mac a b ==> (macrocall mac a b) - n_args = parse_space_separated_exprs(ps) - if is_doc_macro && n_args == 1 - # Parse extended @doc args on next line - # - # @doc x\ny ==> (macrocall doc x y) - # @doc x y\nz ==> (macrocall doc x y) - # @doc x\nend ==> (macrocall doc x) - if peek(ps) == K"NewlineWs" && !is_closing_token(ps, peek(ps, 2)) - bump(ps) # newline - parse_eq(ps) - end - end - emit(ps, atom_mark, K"macrocall") - else - # Parenthesized macro arguments - # @mac(a,b) ==> (macrocall mac a b) - parse_call_chain(ps, atom_mark, true) - end - end + is_doc_macro = parse_macro_name(ps) + parse_call_chain(ps, atom_mark, true, is_doc_macro) elseif leading_kind in (K"Cmd", K"TripleCmd") bump_invisible(ps, K"core_@cmd") bump(ps) @@ -1782,15 +1910,10 @@ function parse_atom(ps::ParseState, check_identifiers=true) "unexpected closing token" emit_diagnostic(ps, error=msg) else - bump(ps, error="Invalid syntax") + bump(ps, error="invalid syntax atom") end end -# flisp: (define (valid-modref? e) -function is_valid_modref(e) - TODO("is_valid_modref unimplemented") -end - # flisp: (define (macroify-name e . suffixes) function macroify_name(e, _, suffixes) TODO("macroify_name unimplemented") diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index 09c4c0a5086e4..ed706e56dfbe3 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -77,8 +77,15 @@ function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, position::In val = Symbol(val_str) elseif k == K"VarIdentifier" val = Symbol(val_str[5:end-1]) - elseif k == K"String" + elseif iskeyword(k) + # This only happens nodes nested inside errors + val = Symbol(val_str) + elseif k in (K"String", K"Cmd") val = unescape_string(source[position+1:position+span(raw)-2]) + elseif k in (K"TripleString", K"TripleCmd") + val = unescape_string(source[position+3:position+span(raw)-4]) + elseif k == K"UnquotedString" + val = String(val_str) elseif isoperator(k) val = isempty(val_range) ? Symbol(untokenize(k)) : # synthetic invisible tokens @@ -92,6 +99,11 @@ function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, position::In val = ErrorVal() elseif k == K"__dot__" val = :__dot__ + elseif k == K"StringMacroName" + val = Symbol(val_str*"_str") + elseif k == K"CmdMacroName" + val = Symbol(val_str*"_cmd") + @info "hi" val else @error "Leaf node of kind $k unknown to SyntaxNode" val = nothing @@ -143,7 +155,6 @@ end function _show_syntax_node(io, current_filename, node, indent) fname = node.source.filename - #@info "" fname print_fname current_filename[] line, col = source_location(node.source, node.position) posstr = "$(lpad(line, 4)):$(rpad(col,3))│$(lpad(node.position,6)):$(rpad(node.position+span(node)-1,6))│" nodestr = !haschildren(node) ? @@ -173,10 +184,10 @@ function _show_syntax_node_sexpr(io, node) print(io, repr(node.val)) end else - print(io, "($(_kind_str(kind(node.raw))) ") + print(io, "(", _kind_str(kind(node.raw))) first = true for n in children(node) - first || print(io, ' ') + print(io, ' ') _show_syntax_node_sexpr(io, n) first = false end @@ -279,8 +290,15 @@ end # Conversion to Base.Expr function _macroify_name(name) - @assert name isa Symbol # fixme - Symbol('@', name) + if name isa Symbol + Symbol('@', name) + else + if Meta.isexpr(name, :.) && name.args[2] isa QuoteNode + Expr(:., name.args[1], QuoteNode(_macroify_name(name.args[2].value))) + else + name + end + end end function _to_expr(node::SyntaxNode) @@ -303,7 +321,11 @@ function _to_expr(node::SyntaxNode) pop!(args) end end - Expr(head(node), args...) + if head(node) == :quote + QuoteNode(only(args)) + else + Expr(head(node), args...) + end else node.val end diff --git a/JuliaSyntax/src/token_kinds.jl b/JuliaSyntax/src/token_kinds.jl index 0742650afd51c..dd3e5f83a91d7 100644 --- a/JuliaSyntax/src/token_kinds.jl +++ b/JuliaSyntax/src/token_kinds.jl @@ -812,15 +812,18 @@ Dict([ "END_OPS" => Ts.end_ops -"BEGIN_INVISIBLE_TOKENS" => Ts.begin_invisible_tokens +"BEGIN_PARSER_TOKENS" => Ts.begin_parser_tokens "TOMBSTONE" => Ts.TOMBSTONE "core_@doc" => Ts.CORE_AT_DOC "core_@cmd" => Ts.CORE_AT_CMD "core_@int128_str" => Ts.CORE_AT_INT128_STR "core_@uint128_str" => Ts.CORE_AT_UINT128_STR "core_@big_str" => Ts.CORE_AT_BIG_STR -"__dot__" => Ts.__DOT__ -"END_INVISIBLE_TOKENS" => Ts.end_invisible_tokens +"__dot__" => Ts.DOT_MACRO_NAME +"StringMacroName" => Ts.STRING_MACRO_NAME +"CmdMacroName" => Ts.CMD_MACRO_NAME +"UnquotedString" => Ts.UNQUOTED_STRING +"END_PARSER_TOKENS" => Ts.end_parser_tokens # Our custom syntax tokens "BEGIN_SYNTAX_KINDS" => Ts.begin_syntax_kinds diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 3d9851ae6dc20..96cd293dafcaa 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -12,8 +12,6 @@ function itest_parse(production, code) stream = ParseStream(code) production(JuliaSyntax.ParseState(stream)) t = JuliaSyntax.to_raw_tree(stream, wrap_toplevel_as_kind=K"toplevel") - s = SyntaxNode(SourceFile(code, filename="none"), t) - ex = Expr(s) println(stdout, "# Code:\n$code\n") @@ -21,9 +19,11 @@ function itest_parse(production, code) show(stdout, MIME"text/plain"(), t, code) JuliaSyntax.show_diagnostics(stdout, stream, code) + s = SyntaxNode(SourceFile(code, filename="none"), t) println(stdout, "\n# SyntaxNode:") show(stdout, MIME"text/x.sexpression"(), s) + ex = Expr(s) println(stdout, "\n\n# Julia Expr:") show(stdout, MIME"text/plain"(), ex) @@ -152,6 +152,56 @@ tests = [ #"\$a" => "(\$ :a)" #"\$\$a" => "(\$ (\$ :a))" ], + JuliaSyntax.parse_call => [ + "f(a,b)" => "(call :f :a :b)" + "f(a).g(b)" => "(call (. (call :f :a) (quote :g)) :b)" + # Keyword arguments depend on call vs macrocall + "foo(a=1)" => "(call :foo (kw :a 1))" + "@foo(a=1)" => "(macrocall :foo (= :a 1))" + # f(x) do y body end ==> (do (call :f :x) (-> (tuple :y) (block :body))) + "@foo a b" => "(macrocall :foo :a :b)" + "A.@foo a b" => "(macrocall (. :A (quote :foo)) :a :b)" + "@A.foo a b" => "(macrocall (. :A (quote :foo)) :a :b)" + # Special @doc parsing rules + "@doc x\ny" => "(macrocall :doc :x :y)" + "A.@doc x\ny" => "(macrocall (. :A (quote :doc)) :x :y)" + "@A.doc x\ny" => "(macrocall (. :A (quote :doc)) :x :y)" + "@doc x y\nz" => "(macrocall :doc :x :y)" + "@doc x\n\ny" => "(macrocall :doc :x)" + "@doc x\nend" => "(macrocall :doc :x)" + # Allow `@` in macrocall only in first and last position + "A.B.@x" => "(macrocall (. (. :A (quote :B)) (quote :x)))" + "@A.B.x" => "(macrocall (. (. :A (quote :B)) (quote :x)))" + "A.@B.x" => "(macrocall (. (. :A (quote :B)) (error) (quote :x)))" + "a().@x(y)" => "(macrocall (error (. (call :a) (quote :x))) :y)" + "a().@x y" => "(macrocall (error (. (call :a) (quote :x))) :y)" + "a().@x{y}" => "(macrocall (error (. (call :a) (quote :x))) (braces :y))" + # Keyword params always use kw inside tuple in dot calls + "f.(a,b)" => "(. :f (tuple :a :b))" + "f.(a=1)" => "(. :f (tuple (kw :a 1)))" + # Other dotted syntax + "A.:+" => "(. :A (quote :+))" + "f.\$x" => "(. :f (quote (\$ :x)))" + "f.\$(x+y)" => "(. :f (quote (\$ (call :+ :x :y))))" + # .' discontinued + "f.'" => "(toplevel :f (error :. Symbol(\"'\")))" + # Field/property syntax + "f.x.y" => "(. (. :f (quote :x)) (quote :y))" + # Adjoint + "f'" => "(' :f)" + "f'ᵀ" => "(call Symbol(\"'ᵀ\") :f)" + # Curly calls + "@S{a,b}" => "(macrocall :S (braces :a :b))" + "S{a,b}" => "(curly :S :a :b)" + # String macros + """x"str\"""" => """(macrocall :x_str "str")""" + """x`str`""" => """(macrocall :x_cmd "str")""" + # Macro sufficies can include keywords and numbers + "x\"s\"y" => """(macrocall :x_str "s" "y")""" + "x\"s\"end" => """(macrocall :x_str "s" "end")""" + "x\"s\"2" => """(macrocall :x_str "s" 2)""" + "x\"s\"10.0" => """(macrocall :x_str "s" 10.0)""" + ], JuliaSyntax.parse_paren => [ # Parentheses used for grouping # NB: The toplevel below is an artificial part of the test setup @@ -163,14 +213,14 @@ tests = [ "(x,y)" => "(tuple :x :y)" "(x=1, y=2)" => "(tuple (= :x 1) (= :y 2))" # Named tuples with initial semicolon - "(;)" => "(tuple (parameters ))" + "(;)" => "(tuple (parameters))" "(; a=1)" => "(tuple (parameters (kw :a 1)))" # Extra credit: nested parameters and frankentuples "(; a=1; b=2)" => "(tuple (parameters (kw :a 1) (parameters (kw :b 2))))" "(a; b; c,d)" => "(tuple :a (parameters :b (parameters :c :d)))" "(a=1, b=2; c=3)" => "(tuple (= :a 1) (= :b 2) (parameters (kw :c 3)))" # Block syntax - "(;;)" => "(block )" + "(;;)" => "(block)" "(a=1;)" => "(block (= :a 1))" "(a;b;;c)" => "(block :a :b :c)" "(a=1; b=2)" => "(block (= :a 1) (= :b 2))" @@ -180,17 +230,16 @@ tests = [ # Literal colons ":)" => ":(:)" ": end" => ":(:)" - # Macros - "@foo x y" => "(macrocall :foo :x :y)" - "@foo x\ny" => "(macrocall :foo :x)" - # Doc macro parsing - "@doc x\ny" => "(macrocall :doc :x :y)" - "@doc x\nend" => "(macrocall :doc :x)" - "@doc x y\nz" => "(macrocall :doc :x :y)" + # Special symbols quoted + ":end" => "(quote :end)" + ":(end)" => "(quote (error :end))" + ":<:" => "(quote :<:)" + # Macro names can be keywords + "@end x" => "(macrocall :end :x)" # __dot__ macro "@. x y" => "(macrocall :__dot__ :x :y)" # Errors - ": foo" => "(quote (error ) :foo)" + ": foo" => "(quote (error) :foo)" ], JuliaSyntax.parse_docstring => [ "\"doc\" foo" => "(macrocall :(Core.var\"@doc\") \"doc\" :foo)" @@ -199,7 +248,7 @@ tests = [ @testset "Inline test cases" begin @testset "$production" for (production, test_specs) in tests - for (input,output) in test_specs + @testset "$input" for (input,output) in test_specs @test test_parse(production, input) == output end end From be46fc20f5b806745f637da83aa1855a505b0024 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Sat, 18 Dec 2021 05:58:03 +1000 Subject: [PATCH 0247/1109] Implement parse_do + more tests / test cleanup --- JuliaSyntax/src/parse_stream.jl | 16 +++++------ JuliaSyntax/src/parser.jl | 47 ++++++++++++++++++++++++--------- JuliaSyntax/src/syntax_tree.jl | 42 +++++++++++++++-------------- JuliaSyntax/src/tokens.jl | 5 ++++ JuliaSyntax/test/parser.jl | 43 +++++++++++++++++++----------- 5 files changed, 97 insertions(+), 56 deletions(-) diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index 08848b0e98e40..633f6f67717dc 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -516,14 +516,14 @@ end # Functions to change parse state -function with_normal_context(ps::ParseState) - f(ParseState(ps, - range_colon_enabled=true, - space_sensitive=false, - where_enabled=false, - for_generator=false, - end_symbol=false, - whitespace_newline=false)) +function normal_context(ps::ParseState) + ParseState(ps, + range_colon_enabled=true, + space_sensitive=false, + where_enabled=false, + for_generator=false, + end_symbol=false, + whitespace_newline=false) end function with_space_sensitive(f::Function, ps::ParseState) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 46a623cbe0c5d..ae22485d82620 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -235,10 +235,9 @@ end # # flisp: (define (parse-block s (down parse-eq)) function parse_block(ps::ParseState, down=parse_eq, mark=position(ps)) - if parse_Nary(ps, down, (K"NewlineWs", K";"), + parse_Nary(ps, down, (K"NewlineWs", K";"), (K"end", K"else", K"elseif", K"catch", K"finally")) - emit(ps, mark, K"block") - end + emit(ps, mark, K"block") end # ";" at the top level produces a sequence of top level expressions @@ -973,17 +972,16 @@ function parse_unary_prefix(ps::ParseState) if is_syntactic_unary_op(k) k2 = peek(ps, 2) if k in (K"&", K"$") && (is_closing_token(ps, k2) || k2 == K"NewlineWs") - # (&) ==> (&) - # === - # x = $ - # ==> (= x &) + # &) ==> & + # $\n ==> $ bump(ps) else bump(ps, TRIVIA_FLAG) if k in (K"&", K"::") parse_where(ps, parse_call) else - # $$$a ==> ($ ($ ($ a))) + # $a ==> ($ a) + # $$a ==> ($ ($ a)) parse_unary_prefix(ps) end emit(ps, mark, k) @@ -1320,9 +1318,24 @@ function parse_resword(ps::ParseState, word) TODO("parse_resword unimplemented") end -# flisp: (define (parse-do s) +# +# flisp: parse-do function parse_do(ps::ParseState) - TODO("parse_do unimplemented") + ps = normal_context(ps) + mark = position(ps) + if peek(ps) in (K"NewlineWs", K";") + # f() do\nend ==> (do (call f) (-> (tuple) (block))) + # f() do ; body end ==> (do (call f) (-> (tuple) (block body))) + # this trivia needs to go into the tuple due to the way position() + # works. + bump(ps, TRIVIA_FLAG) + else + # f() do x, y\n body end ==> (do (call f) (-> (tuple x y) (block body))) + parse_comma_separated(ps, parse_range) + end + emit(ps, mark, K"tuple") + parse_block(ps) + emit(ps, mark, K"->") end # flisp: (define (macrocall-to-atsym e) @@ -1373,9 +1386,16 @@ end # parse comma-separated assignments, like "i=1:n,j=1:m,..." # -# flisp: (define (parse-comma-separated s what) -function parse_comma_separated(ps::ParseState, what) - TODO("parse_comma_separated unimplemented") +# flisp: parse-comma-separated +function parse_comma_separated(ps::ParseState, down) + while true + down(ps) + if peek(ps) == K"," + bump(ps, TRIVIA_FLAG) + else + break + end + end end # flisp: (define (parse-comma-separated-assignments s) @@ -1581,6 +1601,7 @@ end # tuples we use the presence of `,` within the `;`-delimited sections: If # there's commas, it's a tuple, otherwise a block. # +# flisp: parts of parse-paren- and parse-arglist function parse_brackets(after_parse::Function, ps::ParseState, closing_kind) ps = ParseState(ps, range_colon_enabled=true, diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index ed706e56dfbe3..34bdb621e107f 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -61,49 +61,49 @@ end Base.show(io::IO, ::ErrorVal) = printstyled(io, "✘", color=:light_red) function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, position::Integer=1) - if !haschildren(raw) + if !haschildren(raw) && !is_syntax_kind(raw) # Leaf node k = kind(raw) val_range = position:position + span(raw) - 1 val_str = source[val_range] # Here we parse the values eagerly rather than representing them as # strings. Maybe this is good. Maybe not. - if k == K"Integer" - val = Base.parse(Int, val_str) + val = if k == K"Integer" + Base.parse(Int, val_str) elseif k == K"Float" # FIXME: Other float types! - val = Base.parse(Float64, val_str) + Base.parse(Float64, val_str) elseif k == K"Identifier" - val = Symbol(val_str) + Symbol(val_str) elseif k == K"VarIdentifier" - val = Symbol(val_str[5:end-1]) + Symbol(val_str[5:end-1]) elseif iskeyword(k) # This only happens nodes nested inside errors - val = Symbol(val_str) + Symbol(val_str) elseif k in (K"String", K"Cmd") - val = unescape_string(source[position+1:position+span(raw)-2]) + unescape_string(source[position+1:position+span(raw)-2]) elseif k in (K"TripleString", K"TripleCmd") - val = unescape_string(source[position+3:position+span(raw)-4]) + unescape_string(source[position+3:position+span(raw)-4]) elseif k == K"UnquotedString" - val = String(val_str) + String(val_str) elseif isoperator(k) - val = isempty(val_range) ? + isempty(val_range) ? Symbol(untokenize(k)) : # synthetic invisible tokens Symbol(val_str) - @assert !isnothing(val) elseif k == K"core_@doc" - val = GlobalRef(Core, :var"@doc") + GlobalRef(Core, :var"@doc") elseif k == K"core_@cmd" - val = GlobalRef(Core, :var"@cmd") + GlobalRef(Core, :var"@cmd") elseif k == K"error" - val = ErrorVal() + ErrorVal() elseif k == K"__dot__" - val = :__dot__ + :__dot__ elseif k == K"StringMacroName" - val = Symbol(val_str*"_str") + Symbol(val_str*"_str") elseif k == K"CmdMacroName" - val = Symbol(val_str*"_cmd") - @info "hi" val + Symbol(val_str*"_cmd") + elseif is_syntax_kind(raw) + nothing else @error "Leaf node of kind $k unknown to SyntaxNode" val = nothing @@ -111,7 +111,7 @@ function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, position::In return SyntaxNode(source, raw, position, nothing, :leaf, val) else k = kind(raw) - str = untokenize(k) + str = k == K"Nothing" ? "Nothing" : untokenize(k) head = !isnothing(str) ? Symbol(str) : error("Can't untokenize head of kind $k") cs = SyntaxNode[] @@ -142,6 +142,8 @@ iserror(node::SyntaxNode) = iserror(node.raw) istrivia(node::SyntaxNode) = istrivia(node.raw) head(node::SyntaxNode) = node.head +kind(node::SyntaxNode) = kind(node.raw) +flags(node::SyntaxNode) = kind(node.raw) haschildren(node::SyntaxNode) = node.head !== :leaf children(node::SyntaxNode) = haschildren(node) ? node.val::Vector{SyntaxNode} : () diff --git a/JuliaSyntax/src/tokens.jl b/JuliaSyntax/src/tokens.jl index 4b9c3010aa7b6..303609db1f5e7 100644 --- a/JuliaSyntax/src/tokens.jl +++ b/JuliaSyntax/src/tokens.jl @@ -60,6 +60,11 @@ is_prec_pipe_gt(t) = kind(t) == K"|>" end =# +function is_syntax_kind(t) + k = kind(t) + K"BEGIN_SYNTAX_KINDS" < k < K"END_SYNTAX_KINDS" +end + function is_number(t) kind(t) in (K"Integer", K"BinInt", K"HexInt", K"OctInt", K"Float") end diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 96cd293dafcaa..b69cce11cf714 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -1,10 +1,14 @@ function test_parse(production, code) stream = ParseStream(code) production(JuliaSyntax.ParseState(stream)) - t = JuliaSyntax.to_raw_tree(stream, wrap_toplevel_as_kind=K"toplevel") - # @test Text(sprint(JuliaSyntax.show_diagnostics, stream, code)) == Text("") - s = SyntaxNode(SourceFile(code), t) - sprint(show, MIME("text/x.sexpression"), s) + t = JuliaSyntax.to_raw_tree(stream, wrap_toplevel_as_kind=K"Nothing") + source = SourceFile(code) + s = SyntaxNode(source, t) + if JuliaSyntax.kind(s) == K"Nothing" + join([sprint(show, MIME("text/x.sexpression"), c) for c in children(s)], ' ') + else + sprint(show, MIME("text/x.sexpression"), s) + end end # Version of test_parse for interactive exploration @@ -25,7 +29,8 @@ function itest_parse(production, code) ex = Expr(s) println(stdout, "\n\n# Julia Expr:") - show(stdout, MIME"text/plain"(), ex) + dump(ex) + #show(stdout, MIME"text/plain"(), ex) f_ex = Base.remove_linenums!(Meta.parse(code, raise=false)) if ex != f_ex @@ -143,18 +148,26 @@ tests = [ "+(a;b,c)" => "(call :+ :a (parameters :b :c))" ], JuliaSyntax.parse_decl => [ - #"a::b" => "(:: :a :b)" - #"a->b" => "(-> :a :b)" + "a::b" => "(:: :a :b)" + "a->b" => "(-> :a :b)" + "a::b->c" => "(-> (:: :a :b) :c)" ], JuliaSyntax.parse_unary_prefix => [ - #"&a" => "(& :a)" - #"::a" => "(:: :a)" - #"\$a" => "(\$ :a)" - #"\$\$a" => "(\$ (\$ :a))" + "&)" => ":&" + "\$\n" => ":\$" + "&a" => "(& :a)" + "::a" => "(:: :a)" + "\$a" => "(\$ :a)" + "\$\$a" => "(\$ (\$ :a))" ], JuliaSyntax.parse_call => [ "f(a,b)" => "(call :f :a :b)" "f(a).g(b)" => "(call (. (call :f :a) (quote :g)) :b)" + # do + "f() do x, y\n body end" => "(do (call :f) (-> (tuple :x :y) (block :body)))" + "f() do\nend" => "(do (call :f) (-> (tuple) (block)))" + "f() do ; body end" => "(do (call :f) (-> (tuple) (block :body)))" + "f(x) do y,z body end" => "(do (call :f :x) (-> (tuple :y :z) (block :body)))" # Keyword arguments depend on call vs macrocall "foo(a=1)" => "(call :foo (kw :a 1))" "@foo(a=1)" => "(macrocall :foo (= :a 1))" @@ -184,7 +197,7 @@ tests = [ "f.\$x" => "(. :f (quote (\$ :x)))" "f.\$(x+y)" => "(. :f (quote (\$ (call :+ :x :y))))" # .' discontinued - "f.'" => "(toplevel :f (error :. Symbol(\"'\")))" + "f.'" => ":f (error :. Symbol(\"'\"))" # Field/property syntax "f.x.y" => "(. (. :f (quote :x)) (quote :y))" # Adjoint @@ -205,9 +218,9 @@ tests = [ JuliaSyntax.parse_paren => [ # Parentheses used for grouping # NB: The toplevel below is an artificial part of the test setup - "(a * b)" => "(toplevel (call :* :a :b))" - "(a=1)" => "(toplevel (= :a 1))" - "(x)" => "(toplevel :x)" + "(a * b)" => "(call :* :a :b)" + "(a=1)" => "(= :a 1)" + "(x)" => ":x" # Tuple syntax with commas "(x,)" => "(tuple :x)" "(x,y)" => "(tuple :x :y)" From f35c55fd4a2bef63ab07bc8922d4d0dd846fc365 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Sat, 18 Dec 2021 17:22:43 +1000 Subject: [PATCH 0248/1109] Some progress on parse_resword Parse block / quote / while / for / let --- JuliaSyntax/src/parse_stream.jl | 12 +- JuliaSyntax/src/parser.jl | 273 +++++++++++++++++++++++++++---- JuliaSyntax/src/syntax_tree.jl | 3 +- JuliaSyntax/src/token_kinds.jl | 3 +- JuliaSyntax/src/tokens.jl | 7 +- JuliaSyntax/test/parse_stream.jl | 4 +- JuliaSyntax/test/parser.jl | 50 +++++- JuliaSyntax/test/runtests.jl | 9 +- 8 files changed, 307 insertions(+), 54 deletions(-) diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index 633f6f67717dc..a1ce8c644bb31 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -248,8 +248,8 @@ end # Though note bump() really does both input and output # Bump the next `n` tokens -# flags and new_kind are applied to any non-trivia tokens -function _bump_n(stream::ParseStream, n::Integer, flags, new_kind=K"Nothing") +# flags and remap_kind are applied to any non-trivia tokens +function _bump_n(stream::ParseStream, n::Integer, flags, remap_kind=K"Nothing") if n <= 0 return end @@ -261,7 +261,7 @@ function _bump_n(stream::ParseStream, n::Integer, flags, new_kind=K"Nothing") end is_trivia = k ∈ (K"Whitespace", K"Comment", K"NewlineWs") f = is_trivia ? TRIVIA_FLAG : flags - k = (is_trivia || new_kind == K"Nothing") ? k : new_kind + k = (is_trivia || remap_kind == K"Nothing") ? k : remap_kind span = TaggedRange(SyntaxHead(k, f), first_byte(tok), last_byte(tok)) push!(stream.spans, span) end @@ -273,14 +273,14 @@ end """ bump(stream [, flags=EMPTY_FLAGS]; - skip_newlines=false, error, new_kind) + skip_newlines=false, error, remap_kind) Shift the current token from the input to the output, adding the given flags. """ function bump(stream::ParseStream, flags=EMPTY_FLAGS; skip_newlines=false, - error=nothing, new_kind=K"Nothing") + error=nothing, remap_kind=K"Nothing") emark = position(stream) - _bump_n(stream, _lookahead_index(stream, 1, skip_newlines), flags, new_kind) + _bump_n(stream, _lookahead_index(stream, 1, skip_newlines), flags, remap_kind) if !isnothing(error) emit(stream, emark, K"error", TRIVIA_FLAG, error=error) end diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index ae22485d82620..97ceeaab9fd59 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -7,6 +7,7 @@ # Crude recovery heuristic: bump any tokens which aren't block or bracket # closing tokens. function bump_closing_token(ps, closing_kind) + # TODO: Refactor with recover() ? bump_trivia(ps) if peek(ps) == closing_kind bump(ps, TRIVIA_FLAG) @@ -29,6 +30,24 @@ function bump_closing_token(ps, closing_kind) end end +# Read tokens until we find an expected closing token. +# Bump the big pile of resulting tokens as a single nontrivia error token +function recover(is_closer::Function, ps, flags=EMPTY_FLAGS; error="unexpected tokens") + mark = position(ps) + while true + k = peek(ps) + if k == K"EndMarker" + bump_invisible(ps, K"error", TRIVIA_FLAG, + error="premature end of input") + break + elseif is_closer(ps, k) + break + end + bump(ps) + end + emit(ps, mark, K"error", flags, error=error) +end + # flisp: disallow-space function bump_disallowed_space(ps) if peek_token(ps).had_whitespace @@ -234,12 +253,18 @@ end # ==> (block a b) # # flisp: (define (parse-block s (down parse-eq)) -function parse_block(ps::ParseState, down=parse_eq, mark=position(ps)) - parse_Nary(ps, down, (K"NewlineWs", K";"), - (K"end", K"else", K"elseif", K"catch", K"finally")) +function parse_block(ps::ParseState, down=parse_eq, mark=position(ps), + consume_end=false) + parse_block_inner(ps::ParseState, down) emit(ps, mark, K"block") end +# Parse a block, but leave emitting the block up to the caller. +function parse_block_inner(ps::ParseState, down) + parse_Nary(ps, down, (K"NewlineWs", K";"), + (K"end", K"else", K"elseif", K"catch", K"finally")) +end + # ";" at the top level produces a sequence of top level expressions # # a;b;c ==> (toplevel a b c) @@ -641,12 +666,29 @@ end function parse_unary_subtype(ps::ParseState) k = peek(ps, skip_newlines=true) if k == K"EndMarker" - # FIXME - should be in parse_atom!! - bump_invisible(ps, K"error", error="expected identifier") + parse_atom(ps) return + elseif k in (K"<:", K">:") + # FIXME add test cases + k2 = peek(ps, 2) + if is_closing_token(k2) || k2 in (K"NewlineWs", K"=") + # return operator by itself, as in (<:) + bump(ps) + return + end + if k2 in (K"{", K"(") + # parse <:{T}(x::T) or <:(x::T) like other unary operators + parse_where(ps, parse_juxtapose) + else + TODO("parse_unary_subtype") + parse_where(ps, parse_juxtapose) + if peek_behind(ps) == K"tuple" + # Argh + end + end + else + parse_where(ps, parse_juxtapose) end - parse_where(ps, parse_juxtapose) - #TODO("parse_unary_subtype unimplemented") end # flisp: parse-where-chain @@ -895,9 +937,8 @@ end # flisp: parse-factor-with-initial-ex function parse_factor_with_initial_ex(ps::ParseState, mark) - # FIXME - #parse_call_with_initial_ex(ps, mark) - #parse_decl_with_initial_ex(ps, mark) + parse_call_with_initial_ex(ps, mark) + parse_decl_with_initial_ex(ps, mark) if is_prec_power(peek(ps)) bump(ps) parse_factor_after(ps) @@ -945,15 +986,20 @@ end # flisp: parse-call function parse_call(ps::ParseState) mark = position(ps) - parse_unary_prefix(ps) - parse_call_with_initial_ex(ps, mark) + k = peek(ps) + if is_initial_reserved_word(ps, k) || k in (K"mutable", K"primitive", K"abstract") + parse_resword(ps) + else + parse_unary_prefix(ps) + parse_call_with_initial_ex(ps, mark) + end end # flisp: parse-call-with-initial-ex function parse_call_with_initial_ex(ps::ParseState, mark) k = peek(ps) if is_initial_reserved_word(ps, k) || k in (K"mutable", K"primitive", K"abstract") - parse_resword(ps, mark) + parse_resword(ps) else parse_call_chain(ps, mark) end @@ -987,6 +1033,7 @@ function parse_unary_prefix(ps::ParseState) emit(ps, mark, k) end else + # Here's where things go wrong. parse_atom(ps) end end @@ -1168,13 +1215,13 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false, is_doc_macro emit(ps, m, K"quote") emit(ps, mark, K".") elseif k == K"$" - # f.$x ==> (. f (quote ($ x))) - # f.$(x+y) ==> (. f (quote ($ (call + x y)))) + # f.$x ==> (. f (inert ($ x))) + # f.$(x+y) ==> (. f (inert ($ (call + x y)))) m = position(ps) bump(ps, TRIVIA_FLAG) parse_atom(ps) emit(ps, m, K"$") - emit(ps, m, K"quote") + emit(ps, m, K"inert") emit(ps, mark, K".") # Syntax extension: We could allow interpolations like A.$B.@C # to parse in the module reference path. But disallow this for @@ -1264,7 +1311,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false, is_doc_macro # x"s"10.0 ==> (macrocall x_str "s" 10.0) suffix_kind = (k == K"Identifier" || iskeyword(k)) ? K"UnquotedString" : k - bump(ps, new_kind=suffix_kind) + bump(ps, remap_kind=suffix_kind) end emit(ps, mark, K"macrocall") else @@ -1299,8 +1346,8 @@ function rewrap_where(x, w) TODO("rewrap_where unimplemented") end -# flisp: (define (parse-struct-def s mut? word) -function parse_struct_def(ps::ParseState, is_mut, word) +# flisp: parse-struct-def +function parse_struct_def(ps::ParseState, mark, is_mut) TODO("parse_struct_def unimplemented") end @@ -1313,9 +1360,127 @@ end # parse expressions or blocks introduced by syntactic reserved words # -# flisp: (define (parse-resword s word) -function parse_resword(ps::ParseState, word) - TODO("parse_resword unimplemented") +# flisp: parse-resword +function parse_resword(ps::ParseState) + mark = position(ps) + ps = normal_context(ps) + word = peek(ps) + if word in (K"begin", K"quote") + # begin end ==> (block) + # begin a ; b end ==> (block a b) + # begin\na\nb\nend ==> (block a b) + bump(ps, TRIVIA_FLAG) + parse_block_inner(ps, parse_docstring) + bump_closing_token(ps, K"end") + emit(ps, mark, K"block") + if word == K"quote" + # quote end ==> (quote (block)) + # quote body end ==> (quote (block body)) + emit(ps, mark, K"quote") + end + elseif word == K"while" + # while cond body end ==> (while cond (block body)) + # === + # while x < y + # a + # b + # end + # ==> (while (call < x y) (block a b)) + bump(ps, TRIVIA_FLAG) + parse_cond(ps) + parse_block(ps) + bump_closing_token(ps, K"end") + emit(ps, mark, K"while") + elseif word == K"for" + # for x in xs end ==> (for (= x xs) (block)) + # === + # for x in xs, y in ys + # a + # b + # end + # ==> (for (block (= x xs) (= y ys)) (block a b)) + bump(ps, TRIVIA_FLAG) + m = position(ps) + n_subexprs = parse_comma_separated(ps, parse_iteration_spec) + if n_subexprs > 1 + emit(ps, m, K"block") + end + parse_block(ps) + bump_closing_token(ps, K"end") + emit(ps, mark, K"for") + elseif word == K"let" + bump(ps, TRIVIA_FLAG) + if peek(ps) ∉ (K"NewlineWs", K";") + # let x=1\n end ==> (let (= x 1) (block)) + m = position(ps) + n_subexprs = parse_comma_separated(ps, parse_eq_star) + kb = peek_behind(ps) + # AST wart: This ugly logic seems unfortunate. Why not always emit a block? + # let x=1 ; end ==> (let (= x 1) (block)) + # let x::1 ; end ==> (let (:: x 1) (block)) + # let x ; end ==> (let x (block)) + if n_subexprs > 1 || !(kb in (K"Identifier", K"=", K"::")) + # let x=1,y=2 ; end ==> (let (block (= x 1) (= y 2) (block))) + # let x+=1 ; end ==> (let (block (+= x 1)) (block)) + emit(ps, m, K"block") + end + else + # let end ==> (let (block) (block)) + # let ; end ==> (let (block) (block)) + # let ; body end ==> (let (block) (block body)) + bump_invisible(ps, K"block") + end + k = peek(ps) + if k in (K"NewlineWs", K";") + bump(ps, TRIVIA_FLAG) + elseif k == K"end" + else + recover((ps,k)->(is_closing_token(ps,k) || k == K"NewlineWs"), + ps, TRIVIA_FLAG, + error="let variables should end in `;` or newline") + end + # let\na\nb\nend ==> (let (block) (block a b)) + parse_block(ps) + bump_closing_token(ps, K"end") + emit(ps, mark, K"let") + elseif word in (K"if", K"elseif") + TODO("parse_resword") + elseif word in (K"global", K"local") + TODO("parse_resword") + elseif word == K"const" + TODO("parse_resword") + elseif word in (K"function", K"macro") + TODO("parse_resword") + elseif word == K"abstract" + TODO("parse_resword") + elseif word == K"struct" + parse_struct_def(ps, mark, false) + elseif word == K"mutable" + if peek(ps) != K"struct" + bump(ps, remap_kind=K"Identifier") + parse_call_chain(ps, mark) + else + parse_struct_def(ps, mark, true) + end + elseif word == K"primitive" + TODO("parse_resword") + elseif word == K"try" + TODO("parse_resword") + elseif word == K"return" + TODO("parse_resword") + elseif word in (K"break", K"continue") + TODO("parse_resword") + elseif word in (K"module", K"baremodule") + TODO("parse_resword") + elseif word == K"export" + TODO("parse_resword") + elseif word in (K"import", K"using") + TODO("parse_resword") + elseif word == K"do" + TODO("parse_resword") + else + bump(ps, TRIVIA_FLAG, error="unhandled reserved word") + end end # @@ -1338,11 +1503,6 @@ function parse_do(ps::ParseState) emit(ps, mark, K"->") end -# flisp: (define (macrocall-to-atsym e) -function macrocall_to_atsym(e) - TODO("macrocall_to_atsym unimplemented") -end - # flisp: (define (parse-imports s word) function parse_imports(ps::ParseState, word) TODO("parse_imports unimplemented") @@ -1354,7 +1514,7 @@ function parse_macro_name(ps::ParseState) bump_disallowed_space(ps) with_space_sensitive(ps) do ps if peek(ps) == K"." - bump(ps, new_kind=K"__dot__") + bump(ps, remap_kind=K"__dot__") else # The doc in @doc is a contextural keyword is_doc_macro = peek_equal_to(ps, "doc") @@ -1388,14 +1548,17 @@ end # # flisp: parse-comma-separated function parse_comma_separated(ps::ParseState, down) + n_subexprs = 0 while true down(ps) + n_subexprs += 1 if peek(ps) == K"," bump(ps, TRIVIA_FLAG) else break end end + return n_subexprs end # flisp: (define (parse-comma-separated-assignments s) @@ -1403,16 +1566,56 @@ function parse_comma_separated_assignments(ps::ParseState) TODO("parse_comma_separated_assignments unimplemented") end -# as above, but allows both "i=r" and "i in r" +# FIXME(sschaub): for backwards compatibility, allows newline before =/in/∈ +# in generator expressions. See issue #37393 +function peek_skip_newline_in_gen(ps::ParseState, n=1) + k = peek(ps, n) + if ps.for_generator && k == K"NewlineWs" + k = peek(ps, n+1) + end + return k +end + +# parse comma-separated "assignment" but allowing `in` and `∈` as assignment operators +# +# i = rhs ==> (= i rhs) +# i in rhs ==> (= i rhs) +# i ∈ rhs ==> (= i rhs) +# +# i = 1:10 ==> (= i (call : 1 10)) +# (i,j) in iter ==> (= (tuple i j) iter) # # flisp: (define (parse-iteration-spec s) function parse_iteration_spec(ps::ParseState) - TODO("parse_iteration_spec unimplemented") + mark = position(ps) + k = peek(ps) + # Handle `outer` contextual keyword + is_outer_kw = k == K"outer" && !(peek_skip_newline_in_gen(ps, 2) in (K"=", K"in", K"∈")) + if is_outer_kw + # outer i = rhs ==> (= (outer i) rhs) + bump(ps, TRIVIA_FLAG) + end + with_space_sensitive(parse_pipe_lt, ps) + if is_outer_kw + emit(ps, mark, K"outer") + end + if peek_skip_newline_in_gen(ps) in (K"=", K"in", K"∈") + bump(ps, TRIVIA_FLAG) + parse_pipe_lt(ps) + else + # Recovery heuristic + recover(ps, error="invalid iteration spec: expected one of `=` `in` or `∈`") do ps, k + k in (K",", K"NewlineWs") || is_closing_token(ps, k) + end + # TODO: or try parse_pipe_lt ??? + end + emit(ps, mark, K"=") end -# flisp: (define (parse-comma-separated-iters s) +# flisp: parse-comma-separated-iters function parse_comma_separated_iters(ps::ParseState) - TODO("parse_comma_separated_iters unimplemented") + # FIXME REmove? + parse_comma_separated(ps, parse_iteration_spec) end # flisp: (define (parse-space-separated-exprs s) @@ -1901,7 +2104,7 @@ function parse_atom(ps::ParseState, check_identifiers=true) bump(ps, error="invalid identifier") else # :end ==> (quote end) - bump(ps, new_kind=K"Identifier") + bump(ps, remap_kind=K"Identifier") end elseif leading_kind == K"(" # parens or tuple parse_paren(ps, check_identifiers) @@ -2038,11 +2241,11 @@ function parse_all(stream::ParseStream) end end emit(ps, mark, K"toplevel") - return ps.stream + return stream end function parse_all(code, args...) stream = ParseStream(code) - return parse_all(ParseState(stream), args...) + return parse_all(stream, args...) end diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index 34bdb621e107f..56401f727b25d 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -323,7 +323,8 @@ function _to_expr(node::SyntaxNode) pop!(args) end end - if head(node) == :quote + if head(node) == :inert || (head(node) == :quote && + length(args) == 1 && !(only(args) isa Expr)) QuoteNode(only(args)) else Expr(head(node), args...) diff --git a/JuliaSyntax/src/token_kinds.jl b/JuliaSyntax/src/token_kinds.jl index dd3e5f83a91d7..d17a2d7a88c60 100644 --- a/JuliaSyntax/src/token_kinds.jl +++ b/JuliaSyntax/src/token_kinds.jl @@ -831,6 +831,7 @@ Dict([ "call" => Ts.CALL "comparison" => Ts.COMPARISON "curly" => Ts.CURLY +"inert" => Ts.INERT "string" => Ts.STRING_INTERP "macrocall" => Ts.MACROCALL "kw" => Ts.KW # the = in f(a=1) @@ -868,7 +869,7 @@ for kw in split("""abstract baremodule begin break catch const macro module mutable new outer primitive quote return struct try type using while - block call comparison curly string macrocall kw parameters + block call comparison curly string inert macrocall kw parameters toplevel tuple ref vect braces bracescat hcat vcat ncat typed_hcat typed_vcat typed_ncat generator flatten comprehension typed_comprehension diff --git a/JuliaSyntax/src/tokens.jl b/JuliaSyntax/src/tokens.jl index 303609db1f5e7..dd248100f5537 100644 --- a/JuliaSyntax/src/tokens.jl +++ b/JuliaSyntax/src/tokens.jl @@ -61,8 +61,7 @@ end =# function is_syntax_kind(t) - k = kind(t) - K"BEGIN_SYNTAX_KINDS" < k < K"END_SYNTAX_KINDS" + K"BEGIN_SYNTAX_KINDS" < kind(t) < K"END_SYNTAX_KINDS" end function is_number(t) @@ -73,6 +72,10 @@ function is_string(t) kind(t) in (K"String", K"TripleString") end +function is_assignment_op(t) + K"BEGIN_ASSIGNMENTS" < kind(t) < K"END_ASSIGNMENTS" +end + function is_radical_op(t) kind(t) in (K"√", K"∛", K"∜") end diff --git a/JuliaSyntax/test/parse_stream.jl b/JuliaSyntax/test/parse_stream.jl index 30b46d5aafd14..24fb10365231a 100644 --- a/JuliaSyntax/test/parse_stream.jl +++ b/JuliaSyntax/test/parse_stream.jl @@ -68,12 +68,14 @@ st = ParseStream(code) emit(st, p1, K"toplevel") end -t = JuliaSyntax.to_raw_tree(st) +@test JuliaSyntax.to_raw_tree(st) isa JuliaSyntax.GreenNode # ## Input code +#= println("-----------------------") print(code) println() # ## Output tree show(stdout, MIME"text/plain"(), t, code, show_trivia=true) +=# diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index b69cce11cf714..88f91d29ed6f6 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -29,11 +29,13 @@ function itest_parse(production, code) ex = Expr(s) println(stdout, "\n\n# Julia Expr:") - dump(ex) - #show(stdout, MIME"text/plain"(), ex) + show(stdout, MIME"text/plain"(), ex) f_ex = Base.remove_linenums!(Meta.parse(code, raise=false)) if ex != f_ex + println(stdout, "\n\n# AST dump") + dump(ex) + printstyled(stdout, "\n\n# flisp Julia Expr:\n", color=:red) show(stdout, MIME"text/plain"(), f_ex) end @@ -194,8 +196,8 @@ tests = [ "f.(a=1)" => "(. :f (tuple (kw :a 1)))" # Other dotted syntax "A.:+" => "(. :A (quote :+))" - "f.\$x" => "(. :f (quote (\$ :x)))" - "f.\$(x+y)" => "(. :f (quote (\$ (call :+ :x :y))))" + "f.\$x" => "(. :f (inert (\$ :x)))" + "f.\$(x+y)" => "(. :f (inert (\$ (call :+ :x :y))))" # .' discontinued "f.'" => ":f (error :. Symbol(\"'\"))" # Field/property syntax @@ -215,6 +217,46 @@ tests = [ "x\"s\"2" => """(macrocall :x_str "s" 2)""" "x\"s\"10.0" => """(macrocall :x_str "s" 10.0)""" ], + JuliaSyntax.parse_resword => [ + # block + "begin end" => "(block)" + "begin a ; b end" => "(block :a :b)" + "begin\na\nb\nend" => "(block :a :b)" + # quote + "quote end" => "(quote (block))" + "quote body end" => "(quote (block :body))" + # while + "while cond body end" => "(while :cond (block :body))" + """ + while x < y + a + b + end""" => "(while (call :< :x :y) (block :a :b))" + # for + "for x in xs end" => "(for (= :x :xs) (block))" + """ + for x in xs, y in ys + a + b + end""" => "(for (block (= :x :xs) (= :y :ys)) (block :a :b))" + # let + "let x=1\n end" => "(let (= :x 1) (block))" + "let x ; end" => "(let :x (block))" + "let x=1 ; end" => "(let (= :x 1) (block))" + "let x::1 ; end" => "(let (:: :x 1) (block))" + "let x=1,y=2 end" => "(let (block (= :x 1) (= :y 2)) (block))" + "let x+=1 ; end" => "(let (block (+= :x 1)) (block))" + "let ; end" => "(let (block) (block))" + "let ; body end" => "(let (block) (block :body))" + "let\na\nb\nend" => "(let (block) (block :a :b))" + ], + JuliaSyntax.parse_iteration_spec => [ + "i = rhs" => "(= :i :rhs)" + "i in rhs" => "(= :i :rhs)" + "i ∈ rhs" => "(= :i :rhs)" + "i = 1:10" => "(= :i (call :(:) 1 10))" + "(i,j) in iter" => "(= (tuple :i :j) :iter)" + ], JuliaSyntax.parse_paren => [ # Parentheses used for grouping # NB: The toplevel below is an artificial part of the test setup diff --git a/JuliaSyntax/test/runtests.jl b/JuliaSyntax/test/runtests.jl index 77cd998df0d81..deb5a70dd6993 100644 --- a/JuliaSyntax/test/runtests.jl +++ b/JuliaSyntax/test/runtests.jl @@ -27,9 +27,10 @@ N(k, args::GreenNode...) = GreenNode(SyntaxHead(k, raw_flags()), args...) # Non-trivia, infix form NI(k, args::GreenNode...) = GreenNode(SyntaxHead(k, raw_flags(infix=true)), args...) - -include("syntax_trees.jl") -include("syntax_interpolation.jl") include("parse_stream.jl") -include("simple_parser.jl") include("parser.jl") + +# Prototypes +#include("syntax_trees.jl") +#include("syntax_interpolation.jl") +#include("simple_parser.jl") From e43027ddd1b9d742f731fbb4ec15dd0c8362373c Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Sun, 19 Dec 2021 09:37:01 +1000 Subject: [PATCH 0249/1109] Detect reserved words earlier to prevent parsing them as atoms The flisp code is happy to call parse-atom on reserved words in parse-unary-prefix and then detect the reserved word later. However this is very awkward with ParseStream, so avoid this by detecting reserved words up front inside parse_factor and parse_call. --- JuliaSyntax/README.md | 81 +++++++++++++++++++++++++++++++++------ JuliaSyntax/src/parser.jl | 54 ++++++++++++++++++-------- 2 files changed, 106 insertions(+), 29 deletions(-) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index cecc556823e73..ce2cc6ee115f7 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -394,29 +394,86 @@ Some resources: - [From Bob Nystrom (munificent - one of the Dart devs, etc](http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/) - Some discussion of error recovery -## Flisp parser oddities and bugs +# Parser devdocs + +# Differences from the flisp parser + +## Make parsing decisions earlier + +The flisp-based parser has many places where it parses an expression and then +optionally rearranges the resulting AST, modifying heads of expressions etc. + +This parser tries hard to avoid that pattern becase +* It works poorly when the parser is emitting a stream of node spans rather + than eagerly creating a tree data structure. +* It's confusing to re-make parsing decisions + +Often the information required to avoid postprocessing the parse tree is +available early with a bit of restructuring and we make use of this wherever +possible. + +## Function names + +Large structural changes were generally avoided while porting. In particular, +nearly all function names for parsing productions are the same with `-` +replaced by `_` and predicates prefixed by `is_`. + +* `parse-arglist` and a parts of `parse-paren-` have been combined into a + general function `parse_brackets`. This function deals with all the odd + corner cases of how the AST is emitted when mixing `,` and `;` within + parentheses. In particular regard to: + - Determining whether `;` are block syntax separators or keyword parameters + - Determining whether to emit `parameter` sections based on context + - Emitting key-value pairs either as `kw` or `=` depending on context + +* The way that `parse-resword` is entered has been rearranged to avoid parsing + reserved words with `parse-atom` inside `parse-unary-prefix`. Instead, we + detect reserved words and enter `parse_resword` earlier. + +## Flisp parser bugs ```julia # Operator prefix call syntax doesn't work in some cases (tuple is produced) +(a;b,c) -# Inconsistent parsing of tuple keyword args inside vs outside of dot calls -(a=1,) # (tuple (= a 1)) -f.(a=1) # (tuple (kw a 1)) - -# Mixutres of , and ; in calls give nested parameter AST which parses strangely -# and is kind-of-horrible to use. -# (tuple (parameters (parameters e f) c d) a b) -(a,b; c,d; e,f) - # Misplaced @ in macro module paths is parsed but produces odd AST # (macrocall (. A (quote (. B @x)))) # Should be rejected, or produce (macrocall (. (. A (quote B)) (quote @x))) A.@B.x -# Lookup for macro module path allows bizarre syntax and stateful semantics! +# Macro module paths allow calls which gives weird stateful semantics! b() = rand() > 0.5 ? Base : Core b().@info "hi" ``` -Many inconsistencies between `kw` and `=` +## Parsing oddities and warts + +* There's many inconsistencies between how `kw` and `=` are used when parsing + `key=val` pairs inside parentheses. + +* Inconsistent parsing of tuple keyword args inside vs outside of dot calls + ```julia + (a=1,) # (tuple (= a 1)) + f.(a=1) # (tuple (kw a 1)) + ``` + +* Mixutres of `,` and `;` in calls give nested parameter AST which parses + strangely, and is kind-of-horrible to use. + ```julia + # (tuple (parameters (parameters e f) c d) a b) + (a,b; c,d; e,f) + ``` + +* `let` bindings might be stored in a block, or they might not be, depending on + context: + ``` + # Not in a block + let x=1 ; end ==> (let (= x 1) (block)) + let x::1 ; end ==> (let (:: x 1) (block)) + let x ; end ==> (let x (block)) + + # In a block + let x=1,y=2 ; end ==> (let (block (= x 1) (= y 2) (block))) + let x+=1 ; end ==> (let (block (+= x 1)) (block)) + ``` + diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 97ceeaab9fd59..6beeeef63123a 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -103,6 +103,22 @@ function is_initial_reserved_word(ps::ParseState, k) return is_iresword && !(k == K"begin" && ps.end_symbol) end +# Return true if the next word (or word pair) is reserved, introducing a +# syntactic structure. +function peek_initial_reserved_words(ps::ParseState) + k = peek(ps) + if is_initial_reserved_word(ps, k) + return true + elseif k in (K"mutable", K"primitive", K"abstract") + k2 = peek(ps,2) + return (k == K"mutable" && k2 == K"struct") || + (k == K"primitive" && k2 == K"type") || + (k == K"abstract" && k2 == K"type") + else + return false + end +end + function is_block_form(k) kind(k) in (K"block", K"quote", K"if", K"for", K"while", K"let", K"function", K"macro", K"abstract", @@ -930,9 +946,13 @@ end # # flisp: parse-factor function parse_factor(ps::ParseState) - mark = position(ps) - parse_unary_prefix(ps) - parse_factor_with_initial_ex(ps, mark) + if peek_initial_reserved_words(ps) + parse_resword(ps) + else + mark = position(ps) + parse_unary_prefix(ps) + parse_factor_with_initial_ex(ps, mark) + end end # flisp: parse-factor-with-initial-ex @@ -985,11 +1005,10 @@ end # # flisp: parse-call function parse_call(ps::ParseState) - mark = position(ps) - k = peek(ps) - if is_initial_reserved_word(ps, k) || k in (K"mutable", K"primitive", K"abstract") + if peek_initial_reserved_words(ps) parse_resword(ps) else + mark = position(ps) parse_unary_prefix(ps) parse_call_with_initial_ex(ps, mark) end @@ -997,12 +1016,8 @@ end # flisp: parse-call-with-initial-ex function parse_call_with_initial_ex(ps::ParseState, mark) - k = peek(ps) - if is_initial_reserved_word(ps, k) || k in (K"mutable", K"primitive", K"abstract") - parse_resword(ps) - else - parse_call_chain(ps, mark) - end + # FIXME: Remove parse_call_with_initial_ex which is redundant now? + parse_call_chain(ps, mark) end # parse syntactic unary operators @@ -1019,21 +1034,22 @@ function parse_unary_prefix(ps::ParseState) k2 = peek(ps, 2) if k in (K"&", K"$") && (is_closing_token(ps, k2) || k2 == K"NewlineWs") # &) ==> & - # $\n ==> $ + # $\n ==> $ bump(ps) else bump(ps, TRIVIA_FLAG) if k in (K"&", K"::") + # &a ==> (& a) parse_where(ps, parse_call) else # $a ==> ($ a) # $$a ==> ($ ($ a)) + # $&a ==> ($ (& a)) parse_unary_prefix(ps) end emit(ps, mark, k) end else - # Here's where things go wrong. parse_atom(ps) end end @@ -1077,6 +1093,11 @@ end # # flisp: (define (parse-call-chain s ex macrocall?) function parse_call_chain(ps::ParseState, mark, is_macrocall=false, is_doc_macro=false) + if is_number(peek_behind(ps)) && peek(ps) == K"(" + # juxtaposition with numbers is multiply, not call + # 2(x) ==> (* 2 x) + return + end # source range of the @-prefixed part of a macro macro_atname_range = nothing is_valid_modref = peek_behind(ps) in (K"__dot__", K"Identifier") @@ -1087,8 +1108,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false, is_doc_macro k = kind(t) if (ps.space_sensitive && t.had_whitespace && # TODO: Is `'` adjoint or Char here? - k in (K"(", K"[", K"{", K"\\", K"'", K"Char", K"String", K"TripleString")) || - (is_number(k) && k == K"(") # 2(...) is multiply, not call + k in (K"(", K"[", K"{", K"\\", K"'", K"Char", K"String", K"TripleString")) break end if k == K"(" @@ -1362,8 +1382,8 @@ end # # flisp: parse-resword function parse_resword(ps::ParseState) - mark = position(ps) ps = normal_context(ps) + mark = position(ps) word = peek(ps) if word in (K"begin", K"quote") # begin end ==> (block) From 4360ffa2d0b63c44f5508e5c3745b5255bc85c45 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Mon, 20 Dec 2021 06:50:35 +1000 Subject: [PATCH 0250/1109] Parsing of const, local, global --- JuliaSyntax/README.md | 7 +++ JuliaSyntax/src/parse_stream.jl | 2 + JuliaSyntax/src/parser.jl | 91 +++++++++++++++++++++++++++------ JuliaSyntax/src/tokens.jl | 4 -- JuliaSyntax/test/parser.jl | 14 +++++ 5 files changed, 99 insertions(+), 19 deletions(-) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index ce2cc6ee115f7..919942ab056ac 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -444,6 +444,10 @@ A.@B.x # Macro module paths allow calls which gives weird stateful semantics! b() = rand() > 0.5 ? Base : Core b().@info "hi" + +# `const` and `global` allow chained assignment, but the right hand side is not +# constant. `a` const here but not `b`. +const a = b = 1 ``` ## Parsing oddities and warts @@ -477,3 +481,6 @@ b().@info "hi" let x+=1 ; end ==> (let (block (+= x 1)) (block)) ``` +* `global const x=1` is normalized by the parser into `(const (global (= x 1)))` + This is pretty weird from a concrete syntax point of view! + diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index a1ce8c644bb31..340a7eb7121c6 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -37,6 +37,8 @@ is_decorated(tok::SyntaxToken) = is_dotted(tok) || is_suffixed(tok) Base.:(~)(tok::SyntaxToken, k::Kind) = kind(tok) == k Base.:(~)(k::Kind, tok::SyntaxToken) = kind(tok) == k +Base.:(==)(tok::SyntaxToken, k::Kind) = (kind(tok) == k && !is_decorated(tok)) + #------------------------------------------------------------------------------- # Range in the source text which will become a node in the tree. Can be either diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 6beeeef63123a..7ed8618b035f5 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -32,8 +32,7 @@ end # Read tokens until we find an expected closing token. # Bump the big pile of resulting tokens as a single nontrivia error token -function recover(is_closer::Function, ps, flags=EMPTY_FLAGS; error="unexpected tokens") - mark = position(ps) +function recover(is_closer::Function, ps, flags=EMPTY_FLAGS; mark = position(ps), error="unexpected tokens") while true k = peek(ps) if k == K"EndMarker" @@ -357,6 +356,9 @@ function parse_assignment(ps::ParseState, down, equals_is_kw::Bool) end return NO_POSITION else + # a += b ==> (+= a b) + # FIXME: + # a .= b ==> (.= a b) bump(ps, TRIVIA_FLAG) parse_assignment(ps, down, equals_is_kw) result_k = (k == K"=" && equals_is_kw) ? K"kw" : k @@ -365,31 +367,29 @@ function parse_assignment(ps::ParseState, down, equals_is_kw::Bool) end end -# parse-comma is needed for commas outside parens, for example a = b,c +# parse_comma is needed for commas outside parens, for example a = b,c # # flisp: (define (parse-comma s) -function parse_comma(ps::ParseState) +function parse_comma(ps::ParseState, do_emit=true) mark = position(ps) n_commas = 0 parse_pair(ps) - first = true while true if peek(ps) != K"," - if !first || n_commas > 0 + if do_emit && n_commas >= 1 # FIXME: is use of n_commas correct here? flisp comments say: # () => (tuple) # (ex2 ex1) => (tuple ex1 ex2) # (ex1,) => (tuple ex1) emit(ps, mark, K"tuple") end - return + return n_commas end - first = false bump(ps, TRIVIA_FLAG) n_commas += 1 - if peek(ps) == K"=" - # Test: - # x, = ... + if peek_token(ps) == K"=" + # Allow trailing comma before `=` + # x, = xs ==> (tuple x) continue end parse_pair(ps) @@ -1454,6 +1454,7 @@ function parse_resword(ps::ParseState) if k in (K"NewlineWs", K";") bump(ps, TRIVIA_FLAG) elseif k == K"end" + # pass else recover((ps,k)->(is_closing_token(ps,k) || k == K"NewlineWs"), ps, TRIVIA_FLAG, @@ -1465,10 +1466,8 @@ function parse_resword(ps::ParseState) emit(ps, mark, K"let") elseif word in (K"if", K"elseif") TODO("parse_resword") - elseif word in (K"global", K"local") - TODO("parse_resword") - elseif word == K"const" - TODO("parse_resword") + elseif word in (K"const", K"global", K"local") + parse_const_local_global(ps) elseif word in (K"function", K"macro") TODO("parse_resword") elseif word == K"abstract" @@ -1503,6 +1502,68 @@ function parse_resword(ps::ParseState) end end +function parse_const_local_global(ps) + mark = position(ps) + scope_mark = mark + has_const = false + scope_k = K"Nothing" + k = peek(ps) + if k in (K"global", K"local") + # global x = 1 ==> (global (= x 1)) + # local x = 1 ==> (local (= x 1)) + scope_k = k + bump(ps, TRIVIA_FLAG) + if peek(ps) == K"const" + # global const x = 1 ==> (const (global (= x 1))) + # local const x = 1 ==> (const (local (= x 1))) + has_const = true + bump(ps, TRIVIA_FLAG) + end + else + has_const = true + # const x = 1 ==> (const (= x 1)) + # const x,y = 1,2 ==> (const (= (tuple x y) (tuple 1 2))) + bump(ps, TRIVIA_FLAG) + k = peek(ps) + if k in (K"global", K"local") + # const global x = 1 ==> (const (global (= x 1))) + # const local x = 1 ==> (const (local (= x 1))) + scope_k = k + scope_mark = position(ps) + bump(ps, TRIVIA_FLAG) + end + end + # Like parse_eq, but specialized for error recovery: + beforevar_mark = position(ps) + n_commas = parse_comma(ps, false) + t = peek_token(ps) + if is_prec_assignment(t) && !is_decorated(t) + if n_commas >= 1 + emit(ps, beforevar_mark, K"tuple") + end + bump(ps, TRIVIA_FLAG) + parse_comma(ps) + emit(ps, beforevar_mark, K"=") + elseif has_const + # const x ==> (const (error x)) + # Recovery heuristic + recover(ps, mark=beforevar_mark, + error="Expected assignment after `const`") do ps, k + k == K"NewlineWs" || (k != K"," && is_closing_token(ps, k)) + end + else + # global x ==> (global x) + # local x ==> (local x) + # global x,y ==> (global x y) + end + if scope_k != K"Nothing" + emit(ps, scope_mark, scope_k) + end + if has_const + emit(ps, mark, K"const") + end +end + # # flisp: parse-do function parse_do(ps::ParseState) diff --git a/JuliaSyntax/src/tokens.jl b/JuliaSyntax/src/tokens.jl index dd248100f5537..07fcd49930489 100644 --- a/JuliaSyntax/src/tokens.jl +++ b/JuliaSyntax/src/tokens.jl @@ -72,10 +72,6 @@ function is_string(t) kind(t) in (K"String", K"TripleString") end -function is_assignment_op(t) - K"BEGIN_ASSIGNMENTS" < kind(t) < K"END_ASSIGNMENTS" -end - function is_radical_op(t) kind(t) in (K"√", K"∛", K"∜") end diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 88f91d29ed6f6..34019ffdc3fdd 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -250,6 +250,20 @@ tests = [ "let ; body end" => "(let (block) (block :body))" "let\na\nb\nend" => "(let (block) (block :a :b))" ], + JuliaSyntax.parse_const_local_global => [ + "global x = 1" => "(global (= :x 1))" + "local x = 1" => "(local (= :x 1))" + "global const x = 1" => "(const (global (= :x 1)))" + "local const x = 1" => "(const (local (= :x 1)))" + "const x = 1" => "(const (= :x 1))" + "const x,y = 1,2" => "(const (= (tuple :x :y) (tuple 1 2)))" + "const global x = 1" => "(const (global (= :x 1)))" + "const local x = 1" => "(const (local (= :x 1)))" + "global x" => "(global :x)" + "local x" => "(local :x)" + "global x,y" => "(global :x :y)" + "const x" => "(const (error :x (error)))" + ], JuliaSyntax.parse_iteration_spec => [ "i = rhs" => "(= :i :rhs)" "i in rhs" => "(= :i :rhs)" From 621e89e4b10f8c7c3f7b163297702898397bf7e4 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Mon, 20 Dec 2021 07:13:32 +1000 Subject: [PATCH 0251/1109] Parse `return` --- JuliaSyntax/src/parser.jl | 13 ++++++++++++- JuliaSyntax/src/syntax_tree.jl | 2 ++ JuliaSyntax/src/token_kinds.jl | 3 ++- JuliaSyntax/test/parser.jl | 5 +++++ 4 files changed, 21 insertions(+), 2 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 7ed8618b035f5..3b6875fe5b314 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -1486,7 +1486,18 @@ function parse_resword(ps::ParseState) elseif word == K"try" TODO("parse_resword") elseif word == K"return" - TODO("parse_resword") + bump(ps, TRIVIA_FLAG) + k = peek(ps) + if k == K"NewlineWs" || is_closing_token(ps, k) + # return\nx ==> (return nothing) + # return) ==> (return nothing) + bump_invisible(ps, K"NothingLiteral") + else + # return x ==> (return x) + # return x,y ==> (return (tuple x y)) + parse_eq(ps) + end + emit(ps, mark, K"return") elseif word in (K"break", K"continue") TODO("parse_resword") elseif word in (K"module", K"baremodule") diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index 56401f727b25d..b2b57bcbc233b 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -94,6 +94,8 @@ function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, position::In GlobalRef(Core, :var"@doc") elseif k == K"core_@cmd" GlobalRef(Core, :var"@cmd") + elseif k == K"NothingLiteral" + nothing elseif k == K"error" ErrorVal() elseif k == K"__dot__" diff --git a/JuliaSyntax/src/token_kinds.jl b/JuliaSyntax/src/token_kinds.jl index d17a2d7a88c60..fda70be7f6822 100644 --- a/JuliaSyntax/src/token_kinds.jl +++ b/JuliaSyntax/src/token_kinds.jl @@ -823,7 +823,8 @@ Dict([ "StringMacroName" => Ts.STRING_MACRO_NAME "CmdMacroName" => Ts.CMD_MACRO_NAME "UnquotedString" => Ts.UNQUOTED_STRING -"END_PARSER_TOKENS" => Ts.end_parser_tokens +"NothingLiteral" => Ts.NOTHING_LITERAL +"END_PARSER_TOKENS" => Ts.end_parser_tokens # Our custom syntax tokens "BEGIN_SYNTAX_KINDS" => Ts.begin_syntax_kinds diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 34019ffdc3fdd..0a57c64772b8c 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -249,6 +249,11 @@ tests = [ "let ; end" => "(let (block) (block))" "let ; body end" => "(let (block) (block :body))" "let\na\nb\nend" => "(let (block) (block :a :b))" + # return + "return\nx" => "(return nothing)" + "return)" => "(return nothing)" + "return x" => "(return :x)" + "return x,y" => "(return (tuple :x :y))" ], JuliaSyntax.parse_const_local_global => [ "global x = 1" => "(global (= :x 1))" From 160f9169326e48100d554ad5f16ce9de9f264c25 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Mon, 20 Dec 2021 21:37:35 +1000 Subject: [PATCH 0252/1109] Parse if-elseif-else and functions/macro definitions --- JuliaSyntax/README.md | 24 +++-- JuliaSyntax/src/parse_stream.jl | 6 +- JuliaSyntax/src/parser.jl | 182 ++++++++++++++++++++++++-------- JuliaSyntax/src/syntax_tree.jl | 43 ++++++++ JuliaSyntax/src/utils.jl | 4 +- JuliaSyntax/test/parser.jl | 51 ++++++++- 6 files changed, 256 insertions(+), 54 deletions(-) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index 919942ab056ac..edebbe713d789 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -450,23 +450,34 @@ b().@info "hi" const a = b = 1 ``` +* `macro (x) end` is allowed but is invalid and could easily be detected in the + parser. + ## Parsing oddities and warts -* There's many inconsistencies between how `kw` and `=` are used when parsing - `key=val` pairs inside parentheses. +There's many apparent inconsistencies between how `kw` and `=` are used when +parsing `key=val` pairs inside parentheses. * Inconsistent parsing of tuple keyword args inside vs outside of dot calls ```julia (a=1,) # (tuple (= a 1)) f.(a=1) # (tuple (kw a 1)) ``` - -* Mixutres of `,` and `;` in calls give nested parameter AST which parses +* Mixtures of `,` and `;` in calls give nested parameter AST which parses strangely, and is kind-of-horrible to use. ```julia # (tuple (parameters (parameters e f) c d) a b) (a,b; c,d; e,f) ``` +* Long-form anonymous functions have argument lists which are parsedj + as tuples. But the flisp parser doesn't pass the context that they're + function argument lists and needs some ugly disambiguation code. This also + leads to more inconsistency in the use of `kw` for keywords. + +Other oddities: + +* `global const x=1` is normalized by the parser into `(const (global (= x 1)))` + Somewhat useful for the AST, but pretty weird from a concrete syntax point of view. * `let` bindings might be stored in a block, or they might not be, depending on context: @@ -481,6 +492,7 @@ const a = b = 1 let x+=1 ; end ==> (let (block (+= x 1)) (block)) ``` -* `global const x=1` is normalized by the parser into `(const (global (= x 1)))` - This is pretty weird from a concrete syntax point of view! +* `elseif` condition is in a block but not `if` condition. Presumably because + of the need to add a line number node. + `if a xx elseif b yy end ==> (if a (block xx) (elseif (block b) (block yy)))` diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index 340a7eb7121c6..aaebfdf62a0e4 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -403,12 +403,12 @@ function emit_diagnostic(stream::ParseStream, mark=nothing, end_mark=nothing; begin_tok_i = 1 end_tok_i = is_whitespace(stream.lookahead[i]) ? i : max(1, i-1) end - first_byte = isnothing(mark) ? + fbyte = isnothing(mark) ? first_byte(stream.lookahead[begin_tok_i]) : mark.input_byte - last_byte = isnothing(end_mark) ? + lbyte = isnothing(end_mark) ? last_byte(stream.lookahead[end_tok_i]) : end_mark.input_byte # It's a bit weird to require supplying a SyntaxHead here... - text_span = TaggedRange(SyntaxHead(K"error", EMPTY_FLAGS), first_byte, last_byte) + text_span = TaggedRange(SyntaxHead(K"error", EMPTY_FLAGS), fbyte, lbyte) push!(stream.diagnostics, Diagnostic(text_span, error)) return nothing end diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 3b6875fe5b314..eb1f521e64044 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -86,6 +86,10 @@ function is_closing_token(ps::ParseState, k) K"EndMarker") || (k == K"end" && !ps.end_symbol) end +function is_closer_or_newline(ps::ParseState, k) + is_closing_token(ps,k) || k == K"NewlineWs" +end + # Closing token which isn't a keyword function is_non_keyword_closer(k) kind(k) in (K",", K")", K"]", K"}", K";", K"EndMarker") @@ -1054,33 +1058,6 @@ function parse_unary_prefix(ps::ParseState) end end -# Parse function and macro signatures -# -# flisp: parse-def -function parse_def(ps::ParseState, is_func, anon) - mark = position(ps) - k = peek(ps) - if (is_func && iskeyword(k)) || is_initial_reserved_word(ps, k) - # Forbid things like - # function begin() end ==> (function (call (error begin))) - emark = position(ps) - bump(ps) - emit(ps, emark, K"error", - error="invalid $(is_func ? "function" : "macro") name") - else - parse_unary_prefix(ps) - end - parse_call_chain(ps, mark) - if is_func && peek(ps) == K"::" - bump(ps, TRIVIA_FLAG) - parse_call(ps) - emit(ps, mark, K"::") - end - if peek(ps) == K"where" - parse_where_chain(ps, mark) - end -end - # Emit an error if the call chain syntax is not a valid module reference function emit_modref_error(ps, mark) emit(ps, mark, K"error", error="not a valid module reference") @@ -1435,7 +1412,7 @@ function parse_resword(ps::ParseState) m = position(ps) n_subexprs = parse_comma_separated(ps, parse_eq_star) kb = peek_behind(ps) - # AST wart: This ugly logic seems unfortunate. Why not always emit a block? + # Wart: This ugly logic seems unfortunate. Why not always emit a block? # let x=1 ; end ==> (let (= x 1) (block)) # let x::1 ; end ==> (let (:: x 1) (block)) # let x ; end ==> (let x (block)) @@ -1456,20 +1433,19 @@ function parse_resword(ps::ParseState) elseif k == K"end" # pass else - recover((ps,k)->(is_closing_token(ps,k) || k == K"NewlineWs"), - ps, TRIVIA_FLAG, + recover(is_closer_or_newline, ps, TRIVIA_FLAG, error="let variables should end in `;` or newline") end # let\na\nb\nend ==> (let (block) (block a b)) parse_block(ps) bump_closing_token(ps, K"end") emit(ps, mark, K"let") - elseif word in (K"if", K"elseif") - TODO("parse_resword") + elseif word == K"if" + parse_if_elseif(ps) elseif word in (K"const", K"global", K"local") parse_const_local_global(ps) elseif word in (K"function", K"macro") - TODO("parse_resword") + parse_function(ps) elseif word == K"abstract" TODO("parse_resword") elseif word == K"struct" @@ -1499,7 +1475,12 @@ function parse_resword(ps::ParseState) end emit(ps, mark, K"return") elseif word in (K"break", K"continue") - TODO("parse_resword") + bump(ps, TRIVIA_FLAG) + k = peek(ps) + if !(k in (K"NewlineWs", K";", K")", K":", K"EndMarker") || (k == K"end" && !ps.end_symbol)) + recover(is_closer_or_newline, ps, TRIVIA_FLAG, + error="unexpected token after $(untokenize(word))") + end elseif word in (K"module", K"baremodule") TODO("parse_resword") elseif word == K"export" @@ -1513,6 +1494,60 @@ function parse_resword(ps::ParseState) end end +# Parse if-elseif-else-end expressions +# +# if a xx elseif b yy else zz end ==> (if a (block xx) (elseif (block b) (block yy) (block zz))) +function parse_if_elseif(ps, is_elseif=false, is_elseif_whitespace_err=false) + mark = position(ps) + word = peek(ps) + if is_elseif_whitespace_err + # Only get here on recovery from error case - pretend we're parsing elseif. + word = K"elseif" + else + bump(ps, TRIVIA_FLAG) + end + cond_mark = position(ps) + if peek(ps) in (K"NewlineWs", K"end") + # if end ==> (if (error) (block)) + # if \n end ==> (if (error) (block)) + bump_trivia(ps, error="missing condition in `$(untokenize(word))`") + else + # if a end ==> (if a (block)) + # if a xx end ==> (if a (block xx)) + parse_cond(ps) + end + if is_elseif + # Wart: `elseif` condition is in a block but not `if` condition + emit(ps, cond_mark, K"block") + end + # if a \n\n xx \n\n end ==> (if a (block xx)) + parse_block(ps) + bump_trivia(ps) + k = peek(ps) + if k == K"elseif" + # if a xx elseif b yy end ==> (if a (block xx) (elseif (block b) (block yy))) + parse_if_elseif(ps, true) + elseif k == K"else" + emark = position(ps) + bump(ps, TRIVIA_FLAG) + if peek(ps) == K"if" + # User wrote `else if` by mistake ? + # if a xx else if b yy end ==> (if a (block xx) (else (error if) (block b) (block yy))) + bump(ps, TRIVIA_FLAG) + emit(ps, emark, K"error", TRIVIA_FLAG, + error="use `elseif` instead of `else if`") + parse_if_elseif(ps, true, true) + else + # if a xx else yy end ==> (if a (block xx) (block yy)) + parse_block(ps) + end + end + if !is_elseif + bump_closing_token(ps, K"end") + end + emit(ps, mark, word) +end + function parse_const_local_global(ps) mark = position(ps) scope_mark = mark @@ -1575,6 +1610,73 @@ function parse_const_local_global(ps) end end +# Parse function and macro definitions +function parse_function(ps::ParseState) + mark = position(ps) + word = peek(ps) + is_func = word == K"function" + bump(ps, TRIVIA_FLAG) + bump_trivia(ps) + + def_mark = position(ps) + k = peek(ps) + if k == K"(" + # Wart: flisp parser parses anon function arguments as tuples, roughly + # like `parse_paren(ps)`, but the code to disambiguate those cases + # is kind of awful. + # + # It seems much more consistent to treat them as function argument lists: + # function (x,y) end ==> (function (tuple x y) (block)) + # function (x=1) end ==> (function (tuple (kw x 1)) (block)) + # function (;x=1) end ==> (function (tuple (parameters (kw x 1))) (block)) + bump(ps, TRIVIA_FLAG) + parse_call_arglist(ps, K")", false) + emit(ps, def_mark, K"tuple") + # function (x) body end ==> (function (tuple x) (block body)) + # + # Wart: flisp parser allows the following but it's invalid syntax in lowering + # macro (x) end !=> (macro (tuple x) (block)) + # Fix is simple: + if !is_func + # macro (x) end ==> (macro (error (tuple x)) (block)) + emit(ps, def_mark, K"error", error="Expected macro name") + end + else + if iskeyword(k) + # Forbid things like + # function begin() end ==> (function (call (error begin)) (block)) + # macro begin() end ==> (macro (call (error begin)) (block)) + bump(ps, error="invalid $(untokenize(word)) name") + parse_call_chain(ps, def_mark) + else + # function f() end ==> (function (call f) (block)) + # function \n f() end ==> (function (call f) (block)) + parse_unary_prefix(ps) + end + parse_call_chain(ps, def_mark) + end + if is_func && peek(ps) == K"::" + # Return type + # function f()::T end ==> (function (:: (call f) T) (block)) + # function f()::g(T) end ==> (function (:: (call f) (call g T)) (block)) + bump(ps, TRIVIA_FLAG) + parse_call(ps) + emit(ps, def_mark, K"::") + end + if peek(ps) == K"where" + # function f() where {T} end ==> (function (where (call f) T) (block)) + # function f() where T end ==> (function (where (call f) T) (block)) + parse_where_chain(ps, def_mark) + end + + # The function body + # function f() \n a \n b end ==> (function (call f) (block a b)) + # function f() end ==> (function (call f) (block)) + parse_block(ps) + bump_closing_token(ps, K"end") + emit(ps, mark, word) +end + # # flisp: parse-do function parse_do(ps::ParseState) @@ -1592,6 +1694,7 @@ function parse_do(ps::ParseState) end emit(ps, mark, K"tuple") parse_block(ps) + bump_closing_token(ps, K"end") emit(ps, mark, K"->") end @@ -1792,17 +1895,12 @@ function parse_cat(ps::ParseState, closer, last_end_symbol) end end -# flisp: parse-paren -function parse_paren(ps::ParseState, check_identifiers=true) - parse_paren_(ps, check_identifiers) -end - # Parse un-prefixed parenthesized syntax. This is hard because parentheses are # *very* overloaded! # -# flisp: parse-paren- -function parse_paren_(ps0::ParseState, check_identifiers) - ps = ParseState(ps0, range_colon_enabled=true, +# flisp: parse-paren / parse-paren- +function parse_paren(ps::ParseState, check_identifiers=true) + ps = ParseState(ps, range_colon_enabled=true, space_sensitive=false, where_enabled=true, whitespace_newline=true) diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index b2b57bcbc233b..9b9876140716d 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -69,6 +69,7 @@ function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, position::In # Here we parse the values eagerly rather than representing them as # strings. Maybe this is good. Maybe not. val = if k == K"Integer" + # FIXME: this doesn't work with _'s as in 1_000_000 Base.parse(Int, val_str) elseif k == K"Float" # FIXME: Other float types! @@ -337,3 +338,45 @@ function _to_expr(node::SyntaxNode) end Base.Expr(node::SyntaxNode) = _to_expr(node) + + +#------------------------------------------------------------------------------- + +""" + parse_all(Expr, code::AbstractString; filename="none") + +Parse the given code and convert to a standard Expr +""" +function parse_all(::Type{Expr}, code::AbstractString; filename="none") + source_file = SourceFile(code, filename=filename) + + stream = ParseStream(code) + parse_all(stream) + + if !isempty(stream.diagnostics) + buf = IOBuffer() + show_diagnostics(IOContext(buf, stdout), stream, code) + @error Text(String(take!(buf))) + end + + green_tree = to_raw_tree(stream, wrap_toplevel_as_kind=K"toplevel") + + tree = SyntaxNode(source_file, green_tree) + + # convert to Julia expr + ex = Expr(tree) + + flisp_ex = flisp_parse_all(code) + if ex != flisp_ex && !(!isempty(flisp_ex.args) && + Meta.isexpr(flisp_ex.args[end], :error)) + @error "Mismatch with Meta.parse()" ex flisp_ex + end + ex +end + + +function flisp_parse_all(code) + flisp_ex = Base.remove_linenums!(Meta.parseall(code)) + filter!(x->!(x isa LineNumberNode), flisp_ex.args) + flisp_ex +end diff --git a/JuliaSyntax/src/utils.jl b/JuliaSyntax/src/utils.jl index 1458cab19b768..43d1b2320a7c5 100644 --- a/JuliaSyntax/src/utils.jl +++ b/JuliaSyntax/src/utils.jl @@ -10,8 +10,8 @@ function _printstyled(io::IO, text; color) colreset = "\e[0;0m" first = true for linepart in split(text, '\n') - first || print('\n') - print(colcode, linepart, colreset) + first || print(io, '\n') + print(io, colcode, linepart, colreset) first = false end end diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 0a57c64772b8c..a0493cc4cce0c 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -255,6 +255,17 @@ tests = [ "return x" => "(return :x)" "return x,y" => "(return (tuple :x :y))" ], + JuliaSyntax.parse_if_elseif => [ + "if a xx elseif b yy else zz end" => "(if :a (block :xx) (elseif (block :b) (block :yy) (block :zz)))" + "if end" => "(if (error) (block))" + "if \n end" => "(if (error) (block))" + "if a end" => "(if :a (block))" + "if a xx end" => "(if :a (block :xx))" + "if a \n\n xx \n\n end" => "(if :a (block :xx))" + "if a xx elseif b yy end" => "(if :a (block :xx) (elseif (block :b) (block :yy)))" + "if a xx else if b yy end" => "(if :a (block :xx) (error) (elseif (block :b) (block :yy)))" + "if a xx else yy end" => "(if :a (block :xx) (block :yy))" + ], JuliaSyntax.parse_const_local_global => [ "global x = 1" => "(global (= :x 1))" "local x = 1" => "(local (= :x 1))" @@ -269,6 +280,21 @@ tests = [ "global x,y" => "(global :x :y)" "const x" => "(const (error :x (error)))" ], + JuliaSyntax.parse_function => [ + "function (x) body end" => "(function (tuple :x) (block :body))" + "macro (x) end" => "(macro (error (tuple :x)) (block))" + "function (x,y) end" => "(function (tuple :x :y) (block))" + "function (x=1) end" => "(function (tuple (kw :x 1)) (block))" + "function (;x=1) end" => "(function (tuple (parameters (kw :x 1))) (block))" + "function begin() end" => "(function (call (error :begin)) (block))" + "macro begin() end" => "(macro (call (error :begin)) (block))" + "function f() end" => "(function (call :f) (block))" + "function \n f() end" => "(function (call :f) (block))" + "function f()::T end" => "(function (:: (call :f) :T) (block))" + "function f()::g(T) end" => "(function (:: (call :f) (call :g :T)) (block))" + "function f() \n a \n b end" => "(function (call :f) (block :a :b))" + "function f() end" => "(function (call :f) (block))" + ], JuliaSyntax.parse_iteration_spec => [ "i = rhs" => "(= :i :rhs)" "i in rhs" => "(= :i :rhs)" @@ -322,8 +348,31 @@ tests = [ @testset "Inline test cases" begin @testset "$production" for (production, test_specs) in tests - @testset "$input" for (input,output) in test_specs + @testset "$(repr(input))" for (input,output) in test_specs @test test_parse(production, input) == output end end end + +@testset "Larger code chunks" begin + # Something ever-so-slightly nontrivial for fun - + # the sum of the even Fibonacci numbers < 4_000_000 + # https://projecteuler.net/problem=2 + code = """ + let + s = 0 + f1 = 1 + f2 = 2 + while f1 < 4000000 + # println(f1) + if f1 % 2 == 0 + s += f1 + end + f1, f2 = f2, f1+f2 + end + s + end + """ + ex = JuliaSyntax.parse_all(Expr, code) + @test ex == JuliaSyntax.flisp_parse_all(code) +end From b6613d5d8910b454d01360ffc58a23e1aff763d3 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Mon, 20 Dec 2021 21:38:33 +1000 Subject: [PATCH 0253/1109] Parse abstract type definitions --- JuliaSyntax/README.md | 53 +++++++++++++++++-------------- JuliaSyntax/src/parser.jl | 65 +++++++++++++++----------------------- JuliaSyntax/test/parser.jl | 8 ++++- 3 files changed, 62 insertions(+), 64 deletions(-) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index edebbe713d789..924f2e0a4be58 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -432,26 +432,32 @@ replaced by `_` and predicates prefixed by `is_`. ## Flisp parser bugs -```julia -# Operator prefix call syntax doesn't work in some cases (tuple is produced) -+(a;b,c) - -# Misplaced @ in macro module paths is parsed but produces odd AST -# (macrocall (. A (quote (. B @x)))) -# Should be rejected, or produce (macrocall (. (. A (quote B)) (quote @x))) -A.@B.x +Some things seem to be bugs: -# Macro module paths allow calls which gives weird stateful semantics! -b() = rand() > 0.5 ? Base : Core -b().@info "hi" +* Macro module paths allow calls which gives weird stateful semantics! + ``` + b() = rand() > 0.5 ? Base : Core + b().@info "hi" + ``` +* Misplaced `@` in macro module paths like `A.@B.x` is parsed as odd + broken-looking AST like `(macrocall (. A (quote (. B @x))))`. It should + probably be rejected. +* Operator prefix call syntax doesn't work in the cases like `+(a;b,c)` where + parameters are separated by commas. A tuple is produced instead. +* `const` and `global` allow chained assignment, but the right hand side is not + constant. `a` const here but not `b`. + ``` + const a = b = 1 + ``` -# `const` and `global` allow chained assignment, but the right hand side is not -# constant. `a` const here but not `b`. -const a = b = 1 -``` +There's various allowed syntaxes which are fairly easily detected in the +parser, but which will be rejected later during lowering. To allow building +DSLs this is fine and good but some such allowed syntaxes don't seem very +useful even for DSLs: -* `macro (x) end` is allowed but is invalid and could easily be detected in the - parser. +* `macro (x) end` is allowed but there are no anonymous macros. +* `abstract type A < B end` and other subtypes comparisons are allowed, but + only `A <: B` makes sense. ## Parsing oddities and warts @@ -476,13 +482,14 @@ parsing `key=val` pairs inside parentheses. Other oddities: -* `global const x=1` is normalized by the parser into `(const (global (= x 1)))` - Somewhat useful for the AST, but pretty weird from a concrete syntax point of view. +* `global const x=1` is normalized by the parser into `(const (global (= x 1)))`. + I suppose this is somewhat useful for AST consumers, but it seems a bit weird + and unnecessary. * `let` bindings might be stored in a block, or they might not be, depending on - context: + special cases: ``` - # Not in a block + # Special cases not in a block let x=1 ; end ==> (let (= x 1) (block)) let x::1 ; end ==> (let (:: x 1) (block)) let x ; end ==> (let x (block)) @@ -492,7 +499,7 @@ Other oddities: let x+=1 ; end ==> (let (block (+= x 1)) (block)) ``` -* `elseif` condition is in a block but not `if` condition. Presumably because - of the need to add a line number node. +* The `elseif` condition is always in a block but not `if` condition. + Presumably because of the need to add a line number node in the flisp parser `if a xx elseif b yy end ==> (if a (block xx) (elseif (block b) (block yy)))` diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index eb1f521e64044..1e5e74066d358 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -936,7 +936,7 @@ function parse_unary_call(ps::ParseState) emit(ps, mark, op_node_kind) end elseif !is_unary_op(op_k) - emit_diagnostic(error="expected a unary operator") + emit_diagnostic(ps, error="expected a unary operator") else bump(ps, op_tok_flags) parse_unary(ps) @@ -1318,29 +1318,12 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false, is_doc_macro end end -# flisp: (define (parse-subtype-spec s) +# flisp: parse-subtype-spec function parse_subtype_spec(ps::ParseState) - TODO("parse_subtype_spec unimplemented") -end - -# flisp: (define (valid-func-sig? paren sig) -function is_valid_func_sig(paren, sig) - TODO("is_valid_func_sig unimplemented") -end - -# flisp: (define (valid-1arg-func-sig? sig) -function is_valid_1arg_func_sig(sig) - TODO("is_valid_1arg_func_sig unimplemented") -end - -# flisp: (define (unwrap-where x) -function unwrap_where(x) - TODO("unwrap_where unimplemented") -end - -# flisp: (define (rewrap-where x w) -function rewrap_where(x, w) - TODO("rewrap_where unimplemented") + # Wart: why isn't the flisp parser more strict here? + # <: is the only operator which isn't a syntax error, but parse_comparison + # allows all sorts of things. + parse_comparison(ps) end # flisp: parse-struct-def @@ -1348,14 +1331,11 @@ function parse_struct_def(ps::ParseState, mark, is_mut) TODO("parse_struct_def unimplemented") end -# consume any number of line endings from a token stream +# parse expressions or blocks introduced by syntactic reserved words. # -# flisp: (define (take-lineendings s) -function take_lineendings(s) - TODO("take_lineendings unimplemented") -end - -# parse expressions or blocks introduced by syntactic reserved words +# The caller should use peek_initial_reserved_words to determine whether +# to call parse_resword, or whether contextural keywords like `mutable` are +# simple identifiers. # # flisp: parse-resword function parse_resword(ps::ParseState) @@ -1447,16 +1427,21 @@ function parse_resword(ps::ParseState) elseif word in (K"function", K"macro") parse_function(ps) elseif word == K"abstract" - TODO("parse_resword") - elseif word == K"struct" - parse_struct_def(ps, mark, false) - elseif word == K"mutable" - if peek(ps) != K"struct" - bump(ps, remap_kind=K"Identifier") - parse_call_chain(ps, mark) - else - parse_struct_def(ps, mark, true) - end + # Abstract type definitions + # abstract type A end ==> (abstract A) + # abstract type \n\n A \n\n end ==> (abstract A) + # abstract type A <: B end ==> (abstract (<: A B)) + # abstract type A <: B{T,S} end ==> (abstract (<: A (curly B T S))) + # Oddities allowed by parser + # abstract type A < B end ==> (abstract (call < A B)) + bump(ps, TRIVIA_FLAG) + @assert peek(ps) == K"type" + bump(ps, TRIVIA_FLAG) + parse_subtype_spec(ps) + bump_closing_token(ps, K"end") + emit(ps, mark, K"abstract") + elseif word in (K"struct", K"mutable") + parse_struct_def(ps, mark) elseif word == K"primitive" TODO("parse_resword") elseif word == K"try" diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index a0493cc4cce0c..7901d79a6b240 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -38,8 +38,8 @@ function itest_parse(production, code) printstyled(stdout, "\n\n# flisp Julia Expr:\n", color=:red) show(stdout, MIME"text/plain"(), f_ex) + return (code, stream, t, s, ex) end - (code, stream, t, s, ex) end # TODO: @@ -249,6 +249,12 @@ tests = [ "let ; end" => "(let (block) (block))" "let ; body end" => "(let (block) (block :body))" "let\na\nb\nend" => "(let (block) (block :a :b))" + # abstract type + "abstract type A end" => "(abstract :A)" + "abstract type \n\n A \n\n end" => "(abstract :A)" + "abstract type A <: B end" => "(abstract (<: :A :B))" + "abstract type A <: B{T,S} end" => "(abstract (<: :A (curly :B :T :S)))" + "abstract type A < B end" => "(abstract (call :< :A :B))" # return "return\nx" => "(return nothing)" "return)" => "(return nothing)" From 5ca8a02f57e8d2c8a8d7d5abad631628d8dd8f12 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Tue, 21 Dec 2021 06:31:59 +1000 Subject: [PATCH 0254/1109] Parse module, struct, primitive type --- JuliaSyntax/src/parser.jl | 101 ++++++++++++++++++++++++++++----- JuliaSyntax/src/syntax_tree.jl | 4 ++ JuliaSyntax/test/parser.jl | 14 +++++ 3 files changed, 105 insertions(+), 14 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 1e5e74066d358..37e527afc74c9 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -106,13 +106,22 @@ function is_initial_reserved_word(ps::ParseState, k) return is_iresword && !(k == K"begin" && ps.end_symbol) end +function is_contextural_keyword(k) + kind(k) ∈ (K"mutable", K"primitive", K"abstract") +end + +function is_reserved_word(k) + k = kind(k) + iskeyword(k) && !is_contextural_keyword(k) +end + # Return true if the next word (or word pair) is reserved, introducing a # syntactic structure. function peek_initial_reserved_words(ps::ParseState) k = peek(ps) if is_initial_reserved_word(ps, k) return true - elseif k in (K"mutable", K"primitive", K"abstract") + elseif is_contextural_keyword(k) k2 = peek(ps,2) return (k == K"mutable" && k2 == K"struct") || (k == K"primitive" && k2 == K"type") || @@ -691,7 +700,7 @@ function parse_unary_subtype(ps::ParseState) elseif k in (K"<:", K">:") # FIXME add test cases k2 = peek(ps, 2) - if is_closing_token(k2) || k2 in (K"NewlineWs", K"=") + if is_closing_token(ps, k2) || k2 in (K"NewlineWs", K"=") # return operator by itself, as in (<:) bump(ps) return @@ -1320,15 +1329,41 @@ end # flisp: parse-subtype-spec function parse_subtype_spec(ps::ParseState) - # Wart: why isn't the flisp parser more strict here? - # <: is the only operator which isn't a syntax error, but parse_comparison - # allows all sorts of things. - parse_comparison(ps) + k = peek(ps) + if is_reserved_word(k) + # Recovery + # struct try end ==> (struct false (error try) (block)) + bump(ps, error="Invalid type name `$(untokenize(k))`") + m = position(ps) + if is_prec_comparison(peek(ps)) + bump(ps) + parse_pipe_lt(ps) + emit(ps, m, K"error", TRIVIA_FLAG) + end + else + # Wart: why isn't the flisp parser more strict here? + # <: is the only operator which isn't a syntax error, but + # parse_subtype_spec allows all sorts of things. + parse_comparison(ps) + end end +# Parse struct definitions. The caller must arrange for the next tokens to be +# `struct` or `mutable struct`. +# # flisp: parse-struct-def -function parse_struct_def(ps::ParseState, mark, is_mut) - TODO("parse_struct_def unimplemented") +function parse_struct_def(ps::ParseState) + mark = position(ps) + is_mutable = peek(ps) == K"mutable" + if is_mutable + bump(ps, TRIVIA_FLAG) + end + @assert peek(ps) == K"struct" + bump(ps, TRIVIA_FLAG) + k = peek(ps) + if is_reserved_word(k) + bump(ps, error="Invalid type name `$(untokenize(k))`") + end end # parse expressions or blocks introduced by syntactic reserved words. @@ -1441,11 +1476,32 @@ function parse_resword(ps::ParseState) bump_closing_token(ps, K"end") emit(ps, mark, K"abstract") elseif word in (K"struct", K"mutable") - parse_struct_def(ps, mark) + # struct A <: B \n a::X \n end ==> (struct false (<: A B) (block (:: a X))) + if word == K"mutable" + # mutable struct A end ==> (struct true A (block)) + bump(ps, remap_kind=K"true") + else + # struct A end ==> (struct false A (block)) + bump_invisible(ps, K"false") + end + @assert peek(ps) == K"struct" + bump(ps, TRIVIA_FLAG) + parse_subtype_spec(ps) + parse_block(ps) + bump_closing_token(ps, K"end") + emit(ps, mark, K"struct") elseif word == K"primitive" - TODO("parse_resword") + # primitive type A 32 end ==> (primitive A 32) + # primitive type A <: B \n 8 \n end ==> (primitive (<: A B) 8) + bump(ps, TRIVIA_FLAG) + @assert peek(ps) == K"type" + bump(ps, TRIVIA_FLAG) + parse_subtype_spec(ps) + parse_cond(ps) + bump_closing_token(ps, K"end") + emit(ps, mark, K"primitive") elseif word == K"try" - TODO("parse_resword") + parse_try(ps) elseif word == K"return" bump(ps, TRIVIA_FLAG) k = peek(ps) @@ -1467,15 +1523,28 @@ function parse_resword(ps::ParseState) error="unexpected token after $(untokenize(word))") end elseif word in (K"module", K"baremodule") - TODO("parse_resword") + # module A end ==> (module true A (block)) + # baremodule A end ==> (module false A (block)) + bump(ps, remap_kind= (word == K"module") ? K"true" : K"false") + if is_reserved_word(peek(ps)) + # module do \n end ==> (module true (error do) (block)) + bump(ps, error="Invalid module name") + else + # module $A end ==> (module true ($ A) (block)) + parse_unary_prefix(ps) + end + # module A \n a \n b \n end ==> (module true A (block a b)) + parse_block(ps) + bump_closing_token(ps, K"end") + emit(ps, mark, K"module") elseif word == K"export" TODO("parse_resword") elseif word in (K"import", K"using") TODO("parse_resword") elseif word == K"do" - TODO("parse_resword") + bump(ps, TRIVIA_FLAG, error="invalid `do` syntax") else - bump(ps, TRIVIA_FLAG, error="unhandled reserved word") + error("unhandled reserved word") end end @@ -1662,6 +1731,10 @@ function parse_function(ps::ParseState) emit(ps, mark, word) end +function parse_try(ps) + TODO("parse_try") + mark = position(ps) +end # # flisp: parse-do function parse_do(ps::ParseState) diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index 9b9876140716d..fcdb3495db8a5 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -74,6 +74,10 @@ function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, position::In elseif k == K"Float" # FIXME: Other float types! Base.parse(Float64, val_str) + elseif k == K"true" + true + elseif k == K"false" + false elseif k == K"Identifier" Symbol(val_str) elseif k == K"VarIdentifier" diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 7901d79a6b240..cac45ad45e38f 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -255,11 +255,25 @@ tests = [ "abstract type A <: B end" => "(abstract (<: :A :B))" "abstract type A <: B{T,S} end" => "(abstract (<: :A (curly :B :T :S)))" "abstract type A < B end" => "(abstract (call :< :A :B))" + # primitive type + "primitive type A 32 end" => "(primitive :A 32)" + "primitive type A <: B \n 8 \n end" => "(primitive (<: :A :B) 8)" + # struct + "struct A <: B \n a::X \n end" => "(struct false (<: :A :B) (block (:: :a :X)))" + "mutable struct A end" => "(struct true :A (block))" + "struct A end" => "(struct false :A (block))" + "struct try end" => "(struct false (error :try) (block))" # return "return\nx" => "(return nothing)" "return)" => "(return nothing)" "return x" => "(return :x)" "return x,y" => "(return (tuple :x :y))" + # module + "module A end" => "(module true :A (block))" + "baremodule A end" => "(module false :A (block))" + "module do \n end" => "(module true (error :do) (block))" + "module \$A end" => "(module true (\$ :A) (block))" + "module A \n a \n b \n end" => "(module true :A (block :a :b))" ], JuliaSyntax.parse_if_elseif => [ "if a xx elseif b yy else zz end" => "(if :a (block :xx) (elseif (block :b) (block :yy) (block :zz)))" From 62f02a0e835e92334181bc29e2efe2e06e46fa17 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Tue, 21 Dec 2021 20:27:07 +1000 Subject: [PATCH 0255/1109] Rewrite how macro names are handled + parse export Macro names are now represented with a special token kind so they can be locally mapped into Julia Exprs rather than having the `@` added by using the context of the containing expression. Modify parse_call_chain to use this, and make use of it to implement parsing of export lists. --- JuliaSyntax/README.md | 2 + JuliaSyntax/src/parse_stream.jl | 67 ++++++++---- JuliaSyntax/src/parser.jl | 184 ++++++++++++++++++++------------ JuliaSyntax/src/syntax_tree.jl | 33 +++--- JuliaSyntax/src/token_kinds.jl | 30 ++++-- JuliaSyntax/src/tokens.jl | 17 +-- JuliaSyntax/test/parser.jl | 65 ++++++----- 7 files changed, 242 insertions(+), 156 deletions(-) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index 924f2e0a4be58..2a807bf88a149 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -459,6 +459,8 @@ useful even for DSLs: * `abstract type A < B end` and other subtypes comparisons are allowed, but only `A <: B` makes sense. +* `export a, \n $b` is rejected, but `export a, \n b` parses fine. + ## Parsing oddities and warts There's many apparent inconsistencies between how `kw` and `=` are used when diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index aaebfdf62a0e4..69a83cf1819de 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -201,8 +201,22 @@ function peek(stream::ParseStream, n::Integer=1, skip_newlines=false) kind(peek_token(stream, n, skip_newlines)) end +function _peek_equal_to(stream, first_byte, len, str) + # Humongous but should-be-allocation-free hack: peek at the underlying data + # buffer. TODO: Attach the code string to the stream so we don't have to + # dig into the lexer? + buf = stream.lexer.io.data + cbuf = codeunits(str) + for i = 1:len + if buf[first_byte + i - 1] != cbuf[i] + return false + end + end + return true +end + """ -Return true if the next token equals the string `str` +Return true if the token equals the string `str` This is a hack (ideally the tokenizer would provide tokens for any identifiers which need special treatment) But occasionally the parser needs @@ -211,39 +225,44 @@ syntactic constructs. For example, the special parsing rules for `@doc` line contination :-/ """ -function peek_equal_to(stream::ParseStream, str::String) +function peek_equal_to(stream::ParseStream, pos::ParseStreamPosition, str::String) t = peek_token(stream) if span(t) != ncodeunits(str) return false end - # Humongous but should-be-allocation-free hack: peek at the underlying data - # buffer. TODO: Attach the code string to the stream so we don't have to - # dig into the lexer? - buf = stream.lexer.io.data - cbuf = codeunits(str) - for i = 1:span(t) - if buf[first_byte(t) + i - 1] != cbuf[i] - return false - end - end - return true + return _peek_equal_to(stream, first_byte(t), span(t), str) +end + +function peek_behind_str(stream::ParseStream, pos::ParseStreamPosition, str::String) + s = stream.spans[pos.output_index] + return _peek_equal_to(stream, first_byte(s), span(s), str) end """ -Return the kind of the previous non-trivia span which was inserted. +Return the kind of span which was previously inserted into the output, +defaulting to the most previous nontrivia node. -Looking backward is a bit hacky but can be handy on occasion. +Retroactively inspecting/modifying the parser's output can be confusing, so +using this function should be avoided where possible. """ -function peek_behind(stream::ParseStream) - for i = length(stream.spans):-1:1 - s = stream.spans[i] - if !istrivia(head(s)) - return kind(s) +function peek_behind(stream::ParseStream; skip_trivia::Bool=true) + if skip_trivia + for i = length(stream.spans):-1:1 + s = stream.spans[i] + if !istrivia(head(s)) + return kind(s) + end end + elseif !isempty(stream.spans) + return kind(last(stream.spans)) end return K"Nothing" end +function peek_behind(stream::ParseStream, pos::ParseStreamPosition) + return kind(stream.spans[pos.output_index]) +end + #------------------------------------------------------------------------------- # Stream output interface - the `bump_*` and `emit_*` family of functions # @@ -550,8 +569,12 @@ function peek_equal_to(ps::ParseState, args...) peek_equal_to(ps.stream, args...) end -function peek_behind(ps::ParseState) - peek_behind(ps.stream) +function peek_behind_str(ps::ParseState, args...) + peek_behind_str(ps.stream, args...) +end + +function peek_behind(ps::ParseState, args...) + peek_behind(ps.stream, args...) end function bump(ps::ParseState, flags=EMPTY_FLAGS; skip_newlines=nothing, kws...) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 37e527afc74c9..a038c4a6d46b6 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -58,27 +58,11 @@ function TODO(str) error("TODO: $str") end -# Placeholder - bump an identifier or literal in place of a production we -# haven't implemented yet. -function bumpTODO(ps::ParseState) - if peek(ps) == K"Identifier" || isliteral(peek(ps)) - bump(ps) - else - error("bumpTODO - got unexpected $(peek(ps))") - end -end - #------------------------------------------------------------------------------- # Parsing-specific predicates on tokens/kinds # # All these take either a raw kind or a token. -function is_identifier(k) - # FIXME: use is_identifier instead of K"Identifier" and add - # other virtual identifiers like K"core_@doc" etc? - k in (K"Identifier", K"__dot__") -end - function is_closing_token(ps::ParseState, k) k = kind(k) return k in (K"else", K"elseif", K"catch", K"finally", @@ -331,7 +315,7 @@ end function parse_eq_star(ps::ParseState, equals_is_kw=false) k = peek(ps) k2 = peek(ps,2) - if (isliteral(k) || k == K"Identifier") && k2 in (K",", K")", K"}", K"]") + if (isliteral(k) || is_identifier(k)) && k2 in (K",", K")", K"}", K"]") # optimization: skip checking the whole precedence stack if we have a # simple token followed by a common closing token bump(ps) @@ -1022,6 +1006,8 @@ function parse_call(ps::ParseState) parse_resword(ps) else mark = position(ps) + # f(x) ==> (call f x) + # $f(x) ==> (call ($ f) x) parse_unary_prefix(ps) parse_call_with_initial_ex(ps, mark) end @@ -1067,18 +1053,65 @@ function parse_unary_prefix(ps::ParseState) end end +# Parse a symbol or interpolation syntax (a restricted version of +# parse_unary_prefix) +function parse_identifier_or_interpolate(ps::ParseState, outermost=true) + mark = position(ps) + if peek(ps) == K"$" + bump(ps, TRIVIA_FLAG) + # $a ==> ($ a) + # $$a ==> ($ ($ a)) + parse_identifier_or_interpolate(ps, false) + emit(ps, mark, K"$") + else + parse_atom(ps) + if outermost && !is_identifier(peek_behind(ps)) + @info "" peek_behind(ps) + emit(ps, mark, K"error", + error="Expected identifier or interpolation syntax") + end + end +end + +function parse_export_symbol(ps::ParseState) + bump_trivia(ps) + if peek(ps) == K"@" + # export @a ==> (export @a) + # export a, \n @b ==> (export a @b) + bump(ps, TRIVIA_FLAG) + parse_macro_name(ps, remap_kind=true) + else + # export a ==> (export a) + # export \n a ==> (export a) + # export $a, $(a*b) ==> (export ($ a) ($ (call * a b))) + parse_identifier_or_interpolate(ps) + end +end + # Emit an error if the call chain syntax is not a valid module reference function emit_modref_error(ps, mark) emit(ps, mark, K"error", error="not a valid module reference") end +function finish_macroname(ps, mark, is_valid_modref, macro_name_position, + name_kind=nothing) + if is_valid_modref + if isnothing(name_kind) + name_kind = macro_name_kind(peek_behind(ps, macro_name_position)) + end + reset_node!(ps, macro_name_position, kind = name_kind) + else + emit(ps, mark, K"error", error="not a valid module reference") + end +end + # Parses a chain of sufficies at function call precedence, leftmost binding # tightest. # f(a,b) ==> (call f a b) # f(a).g(b) ==> (call (. (call f a) (quote g)) b) # # flisp: (define (parse-call-chain s ex macrocall?) -function parse_call_chain(ps::ParseState, mark, is_macrocall=false, is_doc_macro=false) +function parse_call_chain(ps::ParseState, mark, is_macrocall=false) if is_number(peek_behind(ps)) && peek(ps) == K"(" # juxtaposition with numbers is multiply, not call # 2(x) ==> (* 2 x) @@ -1086,8 +1119,11 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false, is_doc_macro end # source range of the @-prefixed part of a macro macro_atname_range = nothing - is_valid_modref = peek_behind(ps) in (K"__dot__", K"Identifier") - strmacro_name_position = position(ps) # same token as peek_behind + kb = peek_behind(ps) + is_valid_modref = is_identifier(kb) || kb == K"." + # We record the last component of chains of dot-separated identifiers so we + # know which identifier was the macro name. + macro_name_position = position(ps) # points to same output span as peek_behind while true this_iter_valid_modref = false t = peek_token(ps) @@ -1098,16 +1134,16 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false, is_doc_macro break end if k == K"(" - if is_macrocall && !is_valid_modref + if is_macrocall # a().@x(y) ==> (macrocall (error (. (call a) (quote x))) y) - emit_modref_error(ps, mark) + finish_macroname(ps, mark, is_valid_modref, macro_name_position) end # f(a,b) ==> (call f a b) bump_disallowed_space(ps) bump(ps, TRIVIA_FLAG) # Keyword arguments depends on call vs macrocall # foo(a=1) ==> (call foo (kw a 1)) - # @foo(a=1) ==> (macrocall foo (= a 1)) + # @foo(a=1) ==> (macrocall @foo (= a 1)) parse_call_arglist(ps, K")", is_macrocall) emit(ps, mark, is_macrocall ? K"macrocall" : K"call") if peek(ps) == K"do" @@ -1120,26 +1156,25 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false, is_doc_macro break end elseif is_macrocall && (t.had_whitespace || is_closing_token(ps, k)) - if is_macrocall && !is_valid_modref - # a().@x y ==> (macrocall (error (. (call a) (quote x))) y) - emit_modref_error(ps, mark) - end + # a().@x y ==> (macrocall (error (. (call a) (quote x))) y) + finish_macroname(ps, mark, is_valid_modref, macro_name_position) with_space_sensitive(ps) do ps # Space separated macro arguments - # @foo a b ==> (macrocall foo a b) - # A.@foo a b ==> (macrocall (. A (quote foo)) a b) - # @A.foo a b ==> (macrocall (. A (quote foo)) a b) + # @foo a b ==> (macrocall @foo a b) + # A.@foo a b ==> (macrocall (. A (quote @foo)) a b) + # @A.foo a b ==> (macrocall (. A (quote @foo)) a b) n_args = parse_space_separated_exprs(ps) + is_doc_macro = peek_behind_str(ps, macro_name_position, "doc") if is_doc_macro && n_args == 1 # Parse extended @doc args on next line - # @doc x\ny ==> (macrocall doc x y) - # A.@doc x\ny ==> (macrocall (. A (quote doc)) doc x y) - # @A.doc x\ny ==> (macrocall (. A (quote doc)) doc x y) - # @doc x y\nz ==> (macrocall doc x y) + # @doc x\ny ==> (macrocall @doc x y) + # A.@doc x\ny ==> (macrocall (. A (quote @doc)) doc x y) + # @A.doc x\ny ==> (macrocall (. A (quote @doc)) doc x y) + # @doc x y\nz ==> (macrocall @doc x y) # # Excluded cases - # @doc x\n\ny ==> (macrocall doc x) - # @doc x\nend ==> (macrocall doc x) + # @doc x\n\ny ==> (macrocall @doc x) + # @doc x\nend ==> (macrocall @doc x) k2 = peek(ps, 2) if peek(ps) == K"NewlineWs" && !is_closing_token(ps, k2) && k2 != K"NewlineWs" @@ -1151,9 +1186,9 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false, is_doc_macro end break elseif k == K"[" - if is_macrocall && !is_valid_modref + if is_macrocall # a().@x[1] ==> FIXME - emit_modref_error(ps, mark) + finish_macroname(ps, mark, is_valid_modref, macro_name_position) end bump_disallowed_space(ps) bump(ps, TRIVIA_FLAG) @@ -1187,9 +1222,9 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false, is_doc_macro end if !isnothing(macro_atname_range) # Allow `@` in macrocall only in first and last position - # A.B.@x ==> (macrocall (. (. A (quote B)) (quote x))) - # @A.B.x ==> (macrocall (. (. A (quote B)) (quote x))) - # A.@B.x ==> (macrocall (. (. A (error) B) (quote x))) + # A.B.@x ==> (macrocall (. (. A (quote B)) (quote @x))) + # @A.B.x ==> (macrocall (. (. A (quote B)) (quote @x))) + # A.@B.x ==> (macrocall (. (. A (error) B) (quote @x))) emit_diagnostic(ps, macro_atname_range, error="`@` must appear on first or last macro name component") bump(ps, TRIVIA_FLAG, error="Unexpected `.` after macro name") @@ -1234,18 +1269,19 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false, is_doc_macro # now for simplicity and for compatibility with the flisp parser. elseif k == K"@" # A macro call after some prefix A has been consumed - # A.@x ==> (macrocall (. A x)) - # A.@x a ==> (macrocall (. A x) a) + # A.@x ==> (macrocall (. A (quote @x))) + # A.@x a ==> (macrocall (. A (quote @x)) a) m = position(ps) if is_macrocall - # @A.B.@x a ==> (macrocall (. A x) a) + # @A.B.@x a ==> (macrocall (error (. A (quote x))) a) bump(ps, TRIVIA_FLAG, error="repeated `@` in macro module path") else bump(ps, TRIVIA_FLAG) is_macrocall = true end - is_doc_macro = parse_macro_name(ps) - macro_atname_range = (m, position(ps)) + parse_macro_name(ps) + macro_name_position = position(ps) + macro_atname_range = (m, macro_name_position) emit(ps, m, K"quote") emit(ps, mark, K".") this_iter_valid_modref = true @@ -1253,11 +1289,8 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false, is_doc_macro # Field/property syntax # f.x.y ==> (. (. f (quote x)) (quote y)) m = position(ps) - if is_macrocall - is_doc_macro = peek_equal_to(ps, "doc") - end parse_atom(ps, false) - strmacro_name_position = position(ps) + macro_name_position = position(ps) emit(ps, m, K"quote") emit(ps, mark, K".") this_iter_valid_modref = true @@ -1274,9 +1307,9 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false, is_doc_macro end elseif k == K"{" # Type parameter curlies and macro calls - if is_macrocall && !is_valid_modref + if is_macrocall # a().@x{y} ==> (macrocall (error (. (call a) (quote x))) (braces y)) - emit_modref_error(ps, mark) + finish_macroname(ps, mark, is_valid_modref, macro_name_position) end m = position(ps) bump_disallowed_space(ps) @@ -1304,8 +1337,8 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false, is_doc_macro # Use a special token kind for string and cmd macro names so the # names can be expanded later as necessary. - reset_node!(ps, strmacro_name_position, - kind = is_string(k) ? K"StringMacroName" : K"CmdMacroName") + mackind = is_string(k) ? K"StringMacroName" : K"CmdMacroName" + finish_macroname(ps, mark, is_valid_modref, macro_name_position, mackind) bump(ps) t = peek_token(ps) k = kind(t) @@ -1538,7 +1571,11 @@ function parse_resword(ps::ParseState) bump_closing_token(ps, K"end") emit(ps, mark, K"module") elseif word == K"export" - TODO("parse_resword") + # export a + # export a, b, + bump(ps, TRIVIA_FLAG) + parse_comma_separated(ps, parse_export_symbol) + emit(ps, mark, K"export") elseif word in (K"import", K"using") TODO("parse_resword") elseif word == K"do" @@ -1705,6 +1742,7 @@ function parse_function(ps::ParseState) else # function f() end ==> (function (call f) (block)) # function \n f() end ==> (function (call f) (block)) + # function $f() end ==> (function (call ($ f)) (block)) parse_unary_prefix(ps) end parse_call_chain(ps, def_mark) @@ -1761,20 +1799,28 @@ function parse_imports(ps::ParseState, word) TODO("parse_imports unimplemented") end -# flisp: (define (parse-macro-name s) -function parse_macro_name(ps::ParseState) - is_doc_macro = false +function macro_name_kind(k) + return k == K"Identifier" ? K"MacroName" : + k == K"." ? K"@." : + k == K"VarIdentifier" ? K"VarMacroName" : + error("Unrecognized source kind for macro name $k") +end + +# If remap_kind is false, the kind will be remapped by parse_call_chain after +# it discovers the macro name component of the module path. +# +# flisp: parse-macro-name +function parse_macro_name(ps::ParseState; remap_kind=false) bump_disallowed_space(ps) - with_space_sensitive(ps) do ps - if peek(ps) == K"." - bump(ps, remap_kind=K"__dot__") - else - # The doc in @doc is a contextural keyword - is_doc_macro = peek_equal_to(ps, "doc") - parse_atom(ps, false) - end + if peek(ps) == K"." + # @. y ==> (macrocall (quote @__dot__) y) + bump(ps) + else + parse_atom(ps, false) + end + if remap_kind + reset_node!(ps, position(ps), kind=macro_name_kind(peek_behind(ps))) end - return is_doc_macro end # flisp: (define (parse-atsym s) @@ -2366,8 +2412,8 @@ function parse_atom(ps::ParseState, check_identifiers=true) # FIXME parse_string_literal(ps) elseif leading_kind == K"@" # macro call bump(ps, TRIVIA_FLAG) - is_doc_macro = parse_macro_name(ps) - parse_call_chain(ps, atom_mark, true, is_doc_macro) + parse_macro_name(ps) + parse_call_chain(ps, atom_mark, true) elseif leading_kind in (K"Cmd", K"TripleCmd") bump_invisible(ps, K"core_@cmd") bump(ps) diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index fcdb3495db8a5..8fd6b62967387 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -95,20 +95,24 @@ function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, position::In isempty(val_range) ? Symbol(untokenize(k)) : # synthetic invisible tokens Symbol(val_str) - elseif k == K"core_@doc" - GlobalRef(Core, :var"@doc") - elseif k == K"core_@cmd" - GlobalRef(Core, :var"@cmd") elseif k == K"NothingLiteral" nothing elseif k == K"error" ErrorVal() - elseif k == K"__dot__" - :__dot__ + elseif k == K"@." + :var"@__dot__" + elseif k == K"MacroName" + Symbol("@$val_str") + elseif k == K"VarMacroName" + Symbol("@$(val_str[5:end-1])") elseif k == K"StringMacroName" - Symbol(val_str*"_str") + Symbol("@$(val_str)_str") elseif k == K"CmdMacroName" - Symbol(val_str*"_cmd") + Symbol("@$(val_str)_cmd") + elseif k == K"core_@doc" + GlobalRef(Core, :var"@doc") + elseif k == K"core_@cmd" + GlobalRef(Core, :var"@cmd") elseif is_syntax_kind(raw) nothing else @@ -298,18 +302,6 @@ end #------------------------------------------------------------------------------- # Conversion to Base.Expr -function _macroify_name(name) - if name isa Symbol - Symbol('@', name) - else - if Meta.isexpr(name, :.) && name.args[2] isa QuoteNode - Expr(:., name.args[1], QuoteNode(_macroify_name(name.args[2].value))) - else - name - end - end -end - function _to_expr(node::SyntaxNode) if haschildren(node) args = Vector{Any}(undef, length(children(node))) @@ -317,7 +309,6 @@ function _to_expr(node::SyntaxNode) # Convert elements if head(node) == :macrocall line_node = source_location(LineNumberNode, node.source, node.position) - args[1] = _macroify_name(args[1]) insert!(args, 2, line_node) elseif head(node) == :call if length(args) > 1 && Meta.isexpr(args[end], :parameters) diff --git a/JuliaSyntax/src/token_kinds.jl b/JuliaSyntax/src/token_kinds.jl index fda70be7f6822..432eba4b6f8d5 100644 --- a/JuliaSyntax/src/token_kinds.jl +++ b/JuliaSyntax/src/token_kinds.jl @@ -813,17 +813,27 @@ Dict([ "END_OPS" => Ts.end_ops "BEGIN_PARSER_TOKENS" => Ts.begin_parser_tokens + "TOMBSTONE" => Ts.TOMBSTONE -"core_@doc" => Ts.CORE_AT_DOC -"core_@cmd" => Ts.CORE_AT_CMD -"core_@int128_str" => Ts.CORE_AT_INT128_STR -"core_@uint128_str" => Ts.CORE_AT_UINT128_STR -"core_@big_str" => Ts.CORE_AT_BIG_STR -"__dot__" => Ts.DOT_MACRO_NAME -"StringMacroName" => Ts.STRING_MACRO_NAME -"CmdMacroName" => Ts.CMD_MACRO_NAME -"UnquotedString" => Ts.UNQUOTED_STRING "NothingLiteral" => Ts.NOTHING_LITERAL +"UnquotedString" => Ts.UNQUOTED_STRING + +# Macro names are modelled as a special kind of identifier because the +# @ may not be attached to the macro name in the source (or may not be +# associated with a token at all in the case of implied macro calls +# like CORE_DOC_MACRO_NAME) +"BEGIN_MACRO_NAMES" => Ts.begin_macro_names +"MacroName" => Ts.MACRO_NAME # A macro name identifier +"@." => Ts.DOT_MACRO_NAME # The macro name of @. +"VarMacroName" => Ts.VAR_MACRO_NAME # @var"..." +"StringMacroName" => Ts.STRING_MACRO_NAME # macname"some_str" +"CmdMacroName" => Ts.CMD_MACRO_NAME # macname`some_str` +"core_@doc" => Ts.CORE_DOC_MACRO_NAME # Core.@doc +"core_@cmd" => Ts.CORE_CMD_MACRO_NAME # Core.@cmd +"core_@int128_str" => Ts.CORE_INT128_STR_MACRO_NAME # Core.@int128_str +"core_@uint128_str" => Ts.CORE_UINT128_STR_MACRO_NAME # Core.@uint128_str +"core_@big_str" => Ts.CORE_BIG_STR_MACRO_NAME # Core.@big_str +"END_MACRO_NAMES" => Ts.end_macro_names "END_PARSER_TOKENS" => Ts.end_parser_tokens # Our custom syntax tokens @@ -879,3 +889,5 @@ for kw in split("""abstract baremodule begin break catch const """) _kind_to_str_unique[_str_to_kind[kw]] = kw end + +const _kind_to_str = Dict(s=>k for (k,s) in _str_to_kind) diff --git a/JuliaSyntax/src/tokens.jl b/JuliaSyntax/src/tokens.jl index 07fcd49930489..404494747db86 100644 --- a/JuliaSyntax/src/tokens.jl +++ b/JuliaSyntax/src/tokens.jl @@ -64,6 +64,14 @@ function is_syntax_kind(t) K"BEGIN_SYNTAX_KINDS" < kind(t) < K"END_SYNTAX_KINDS" end +function is_identifier(k) + kind(k) in (K"Identifier", K"VarIdentifier") +end + +function is_macro_name(k) + K"BEGIN_MACRO_NAMES" < kind(k) < K"END_MACRO_NAMES" +end + function is_number(t) kind(t) in (K"Integer", K"BinInt", K"HexInt", K"OctInt", K"Float") end @@ -81,14 +89,7 @@ function is_whitespace(t) end function _kind_str(k::Kind) - u = untokenize(k) - return !isnothing(u) ? u : - k in (K"Identifier", K"VarIdentifier") ? "Identifier" : - isliteral(k) ? "Literal" : - k == K"Comment" ? "Comment" : - k == K"Whitespace" ? "Whitespace" : - k == K"NewlineWs" ? "NewlineWs" : - lowercase(string(k)) + _kind_to_str[k] end """ diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index cac45ad45e38f..8c47edc2d00f8 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -163,7 +163,9 @@ tests = [ "\$\$a" => "(\$ (\$ :a))" ], JuliaSyntax.parse_call => [ - "f(a,b)" => "(call :f :a :b)" + "f(x)" => "(call :f :x)" + "\$f(x)" => "(call (\$ :f) :x)" + "f(a,b)" => "(call :f :a :b)" "f(a).g(b)" => "(call (. (call :f :a) (quote :g)) :b)" # do "f() do x, y\n body end" => "(do (call :f) (-> (tuple :x :y) (block :body)))" @@ -172,25 +174,26 @@ tests = [ "f(x) do y,z body end" => "(do (call :f :x) (-> (tuple :y :z) (block :body)))" # Keyword arguments depend on call vs macrocall "foo(a=1)" => "(call :foo (kw :a 1))" - "@foo(a=1)" => "(macrocall :foo (= :a 1))" + "@foo(a=1)" => """(macrocall Symbol("@foo") (= :a 1))""" # f(x) do y body end ==> (do (call :f :x) (-> (tuple :y) (block :body))) - "@foo a b" => "(macrocall :foo :a :b)" - "A.@foo a b" => "(macrocall (. :A (quote :foo)) :a :b)" - "@A.foo a b" => "(macrocall (. :A (quote :foo)) :a :b)" + "@foo a b" => """(macrocall Symbol("@foo") :a :b)""" + "A.@foo a b" => """(macrocall (. :A (quote Symbol("@foo"))) :a :b)""" + "@A.foo a b" => """(macrocall (. :A (quote Symbol("@foo"))) :a :b)""" # Special @doc parsing rules - "@doc x\ny" => "(macrocall :doc :x :y)" - "A.@doc x\ny" => "(macrocall (. :A (quote :doc)) :x :y)" - "@A.doc x\ny" => "(macrocall (. :A (quote :doc)) :x :y)" - "@doc x y\nz" => "(macrocall :doc :x :y)" - "@doc x\n\ny" => "(macrocall :doc :x)" - "@doc x\nend" => "(macrocall :doc :x)" + "@doc x\ny" => """(macrocall Symbol("@doc") :x :y)""" + "A.@doc x\ny" => """(macrocall (. :A (quote Symbol("@doc"))) :x :y)""" + "@A.doc x\ny" => """(macrocall (. :A (quote Symbol("@doc"))) :x :y)""" + "@doc x y\nz" => """(macrocall Symbol("@doc") :x :y)""" + "@doc x\n\ny" => """(macrocall Symbol("@doc") :x)""" + "@doc x\nend" => """(macrocall Symbol("@doc") :x)""" # Allow `@` in macrocall only in first and last position - "A.B.@x" => "(macrocall (. (. :A (quote :B)) (quote :x)))" - "@A.B.x" => "(macrocall (. (. :A (quote :B)) (quote :x)))" - "A.@B.x" => "(macrocall (. (. :A (quote :B)) (error) (quote :x)))" - "a().@x(y)" => "(macrocall (error (. (call :a) (quote :x))) :y)" - "a().@x y" => "(macrocall (error (. (call :a) (quote :x))) :y)" - "a().@x{y}" => "(macrocall (error (. (call :a) (quote :x))) (braces :y))" + "A.B.@x" => """(macrocall (. (. :A (quote :B)) (quote Symbol("@x"))))""" + "@A.B.x" => """(macrocall (. (. :A (quote :B)) (quote Symbol("@x"))))""" + "A.@B.x" => """(macrocall (. (. :A (quote :B)) (error) (quote Symbol("@x"))))""" + "A.@. y" => """(macrocall (. :A (quote Symbol("@__dot__"))) :y)""" + "a().@x(y)" => """(macrocall (error (. (call :a) (quote :x))) :y)""" + "a().@x y" => """(macrocall (error (. (call :a) (quote :x))) :y)""" + "a().@x{y}" => """(macrocall (error (. (call :a) (quote :x))) (braces :y))""" # Keyword params always use kw inside tuple in dot calls "f.(a,b)" => "(. :f (tuple :a :b))" "f.(a=1)" => "(. :f (tuple (kw :a 1)))" @@ -206,16 +209,16 @@ tests = [ "f'" => "(' :f)" "f'ᵀ" => "(call Symbol(\"'ᵀ\") :f)" # Curly calls - "@S{a,b}" => "(macrocall :S (braces :a :b))" + "@S{a,b}" => """(macrocall Symbol("@S") (braces :a :b))""" "S{a,b}" => "(curly :S :a :b)" # String macros - """x"str\"""" => """(macrocall :x_str "str")""" - """x`str`""" => """(macrocall :x_cmd "str")""" + """x"str\"""" => """(macrocall Symbol("@x_str") "str")""" + """x`str`""" => """(macrocall Symbol("@x_cmd") "str")""" # Macro sufficies can include keywords and numbers - "x\"s\"y" => """(macrocall :x_str "s" "y")""" - "x\"s\"end" => """(macrocall :x_str "s" "end")""" - "x\"s\"2" => """(macrocall :x_str "s" 2)""" - "x\"s\"10.0" => """(macrocall :x_str "s" 10.0)""" + "x\"s\"y" => """(macrocall Symbol("@x_str") "s" "y")""" + "x\"s\"end" => """(macrocall Symbol("@x_str") "s" "end")""" + "x\"s\"2" => """(macrocall Symbol("@x_str") "s" 2)""" + "x\"s\"10.0" => """(macrocall Symbol("@x_str") "s" 10.0)""" ], JuliaSyntax.parse_resword => [ # block @@ -268,12 +271,19 @@ tests = [ "return)" => "(return nothing)" "return x" => "(return :x)" "return x,y" => "(return (tuple :x :y))" - # module + # module/baremodule "module A end" => "(module true :A (block))" "baremodule A end" => "(module false :A (block))" "module do \n end" => "(module true (error :do) (block))" "module \$A end" => "(module true (\$ :A) (block))" "module A \n a \n b \n end" => "(module true :A (block :a :b))" + # export + "export @a" => "(export Symbol(\"@a\"))" + "export a, \n @b" => "(export :a Symbol(\"@b\"))" + "export a" => "(export :a)" + "export \n a" => "(export :a)" + "export \$a, \$(a*b)" => "(export (\$ :a) (\$ (call :* :a :b)))" + # import ], JuliaSyntax.parse_if_elseif => [ "if a xx elseif b yy else zz end" => "(if :a (block :xx) (elseif (block :b) (block :yy) (block :zz)))" @@ -310,6 +320,7 @@ tests = [ "macro begin() end" => "(macro (call (error :begin)) (block))" "function f() end" => "(function (call :f) (block))" "function \n f() end" => "(function (call :f) (block))" + "function \$f() end" => "(function (call (\$ :f)) (block))" "function f()::T end" => "(function (:: (call :f) :T) (block))" "function f()::g(T) end" => "(function (:: (call :f) (call :g :T)) (block))" "function f() \n a \n b end" => "(function (call :f) (block :a :b))" @@ -355,9 +366,9 @@ tests = [ ":(end)" => "(quote (error :end))" ":<:" => "(quote :<:)" # Macro names can be keywords - "@end x" => "(macrocall :end :x)" + "@end x" => """(macrocall Symbol("@end") :x)""" # __dot__ macro - "@. x y" => "(macrocall :__dot__ :x :y)" + "@. x y" => """(macrocall Symbol("@__dot__") :x :y)""" # Errors ": foo" => "(quote (error) :foo)" ], From 87b5cd9c7dd67584f6bc4ca052888e91791959f1 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Thu, 23 Dec 2021 20:20:49 +1000 Subject: [PATCH 0256/1109] Implement parse_try and some clean up to ParseStream Implement parse_try and set things up so the new else syntax from Julia 1.8 is tested independently from VERSION. Several cleanups to ParseStream: * Fix an ambiguitiy in the way that invisible nodes overlap * Some renaming of variables and fields to align with the newer TaggedRange name * Clean up diagnostics system a little; implement diagnostic warnings * Clean up build_tree (previously to_raw_tree) to make it not depend on GreenNode. --- JuliaSyntax/README.md | 3 + JuliaSyntax/src/parse_stream.jl | 279 +++++++++++++++++------------- JuliaSyntax/src/parser.jl | 82 ++++++++- JuliaSyntax/src/syntax_tree.jl | 47 ++++- JuliaSyntax/test/parse_stream.jl | 2 +- JuliaSyntax/test/parser.jl | 38 +++- JuliaSyntax/test/simple_parser.jl | 2 +- 7 files changed, 313 insertions(+), 140 deletions(-) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index 2a807bf88a149..3e924360c4428 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -461,6 +461,9 @@ useful even for DSLs: * `export a, \n $b` is rejected, but `export a, \n b` parses fine. +* In try-catch-finally, the `finally` clause is allowed before the `catch`, but + always executes afterward. (Presumably was this a mistake? It seems pretty awful!) + ## Parsing oddities and warts There's many apparent inconsistencies between how `kw` and `=` are used when diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index 69a83cf1819de..706c72e9b9b66 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -1,17 +1,7 @@ #------------------------------------------------------------------------------- """ -`SyntaxToken` covers a contiguous range of the source text which contains a -token *relevant for parsing*. Syntax trivia (comments and whitespace) is dealt -with separately, though `SyntaxToken` does include some minimal information -about whether these were present. - -This does not include tokens include -* Whitespace -* Comments - -Note that "triviality" of tokens is context-dependent in general. For example, -the parentheses in `(1+2)*3` are important for parsing but are irrelevant after -the abstract syntax tree is constructed. +`SyntaxToken` is a token covering a contiguous byte range in the input text. +Information about leading whitespace tokens is added for use by the parser. """ struct SyntaxToken raw::RawToken @@ -41,36 +31,46 @@ Base.:(==)(tok::SyntaxToken, k::Kind) = (kind(tok) == k && !is_decorated(tok)) #------------------------------------------------------------------------------- -# Range in the source text which will become a node in the tree. Can be either -# a token (leaf node of the tree) or an interior node, depending on how nodes -# overlap. -struct TaggedRange - head::SyntaxHead - first_byte::Int - last_byte::Int -end +""" +Range in the source text which will become a node in the tree. Can be either a +token (leaf node of the tree) or an interior node, depending on how the +start_mark compares to previous nodes. -function TaggedRange(raw::RawToken, flags::RawFlags) - TaggedRange(SyntaxHead(raw.kind, flags), raw.startbyte + 1, raw.endbyte + 1) +TODO: Optimize this data structure? It's very large at the moment. +""" +struct TaggedRange + head::SyntaxHead # Kind,flags + first_byte::Int # First byte in the input text + last_byte::Int # Last byte in the input text + start_mark::Int # Index of first emitted range which this range covers end -head(text_span::TaggedRange) = text_span.head -kind(text_span::TaggedRange) = kind(text_span.head) -flags(text_span::TaggedRange) = flags(text_span.head) -first_byte(text_span::TaggedRange) = text_span.first_byte -last_byte(text_span::TaggedRange) = text_span.last_byte -span(text_span::TaggedRange) = last_byte(text_span) - first_byte(text_span) + 1 +head(range::TaggedRange) = range.head +kind(range::TaggedRange) = kind(range.head) +flags(range::TaggedRange) = flags(range.head) +first_byte(range::TaggedRange) = range.first_byte +last_byte(range::TaggedRange) = range.last_byte +span(range::TaggedRange) = last_byte(range) - first_byte(range) + 1 struct Diagnostic - text_span::TaggedRange + first_byte::Int + last_byte::Int + level::Symbol message::String end +first_byte(d::Diagnostic) = d.first_byte +last_byte(d::Diagnostic) = d.last_byte + function show_diagnostic(io::IO, diagnostic::Diagnostic, code) - printstyled(io, "Error: ", color=:light_red) + col,prefix = diagnostic.level == :error ? (:light_red, "Error") : + diagnostic.level == :warning ? (:light_yellow, "Warning") : + diagnostic.level == :note ? (:light_blue, "Note") : + (:normal, "Info") + printstyled(io, "$prefix: ", color=col) print(io, diagnostic.message, ":\n") - p = first_byte(diagnostic.text_span) - q = last_byte(diagnostic.text_span) + p = first_byte(diagnostic) + q = last_byte(diagnostic) if !isvalid(code, q) # Transform byte range into valid text range q = prevind(code, q) @@ -105,7 +105,7 @@ This is simililar in spirit to rust-analyzer's mutable struct ParseStream lexer::Tokenize.Lexers.Lexer{IOBuffer,RawToken} lookahead::Vector{SyntaxToken} - spans::Vector{TaggedRange} + ranges::Vector{TaggedRange} diagnostics::Vector{Diagnostic} # First byte of next token next_byte::Int @@ -180,25 +180,28 @@ function _lookahead_index(stream::ParseStream, n::Integer, skip_newlines::Bool) end """ - peek_token(stream [, n=1]) + peek(stream [, n=1]) -Look ahead in the stream `n` tokens, returning a SyntaxToken +Look ahead in the stream `n` tokens, returning the token kind. Comments and +non-newline whitespace are skipped automatically. Whitespace containing a +single newline is returned as kind `K"NewlineWs"` unless `skip_newlines` is +true. """ -function peek_token(stream::ParseStream, n::Integer=1, skip_newlines=false) - stream.peek_count += 1 - if stream.peek_count > 100_000 - error("The parser seems stuck at byte $(position(stream))") - end - stream.lookahead[_lookahead_index(stream, n, skip_newlines)] +function peek(stream::ParseStream, n::Integer=1; skip_newlines::Bool=false) + kind(peek_token(stream, n; skip_newlines)) end """ peek_token(stream [, n=1]) -Look ahead in the stream `n` tokens, returning a Kind +Like `peek`, but return the full token information rather than just the kind. """ -function peek(stream::ParseStream, n::Integer=1, skip_newlines=false) - kind(peek_token(stream, n, skip_newlines)) +function peek_token(stream::ParseStream, n::Integer=1; skip_newlines=false) + stream.peek_count += 1 + if stream.peek_count > 100_000 + error("The parser seems stuck at byte $(position(stream))") + end + stream.lookahead[_lookahead_index(stream, n, skip_newlines)] end function _peek_equal_to(stream, first_byte, len, str) @@ -216,25 +219,14 @@ function _peek_equal_to(stream, first_byte, len, str) end """ -Return true if the token equals the string `str` - -This is a hack (ideally the tokenizer would provide tokens for any -identifiers which need special treatment) But occasionally the parser needs -access to interpret normal identifiers as contextural keywords or other special -syntactic constructs. +Return true if the node already emitted at `pos` covers the string `str` -For example, the special parsing rules for `@doc` line contination :-/ +This is a hack for edge cases where the parser needs access to interpret normal +identifiers as contextural keywords. For example, the special parsing rules for +`@doc` line contination :-( """ -function peek_equal_to(stream::ParseStream, pos::ParseStreamPosition, str::String) - t = peek_token(stream) - if span(t) != ncodeunits(str) - return false - end - return _peek_equal_to(stream, first_byte(t), span(t), str) -end - function peek_behind_str(stream::ParseStream, pos::ParseStreamPosition, str::String) - s = stream.spans[pos.output_index] + s = stream.ranges[pos.output_index] return _peek_equal_to(stream, first_byte(s), span(s), str) end @@ -247,20 +239,20 @@ using this function should be avoided where possible. """ function peek_behind(stream::ParseStream; skip_trivia::Bool=true) if skip_trivia - for i = length(stream.spans):-1:1 - s = stream.spans[i] + for i = length(stream.ranges):-1:1 + s = stream.ranges[i] if !istrivia(head(s)) return kind(s) end end - elseif !isempty(stream.spans) - return kind(last(stream.spans)) + elseif !isempty(stream.ranges) + return kind(last(stream.ranges)) end return K"Nothing" end function peek_behind(stream::ParseStream, pos::ParseStreamPosition) - return kind(stream.spans[pos.output_index]) + return kind(stream.ranges[pos.output_index]) end #------------------------------------------------------------------------------- @@ -283,11 +275,12 @@ function _bump_n(stream::ParseStream, n::Integer, flags, remap_kind=K"Nothing") is_trivia = k ∈ (K"Whitespace", K"Comment", K"NewlineWs") f = is_trivia ? TRIVIA_FLAG : flags k = (is_trivia || remap_kind == K"Nothing") ? k : remap_kind - span = TaggedRange(SyntaxHead(k, f), first_byte(tok), last_byte(tok)) - push!(stream.spans, span) + span = TaggedRange(SyntaxHead(k, f), first_byte(tok), + last_byte(tok), lastindex(stream.ranges)+1) + push!(stream.ranges, span) end Base._deletebeg!(stream.lookahead, n) - stream.next_byte = last_byte(last(stream.spans)) + 1 + stream.next_byte = last_byte(last(stream.ranges)) + 1 # Defuse the time bomb stream.peek_count = 0 end @@ -345,9 +338,10 @@ whitespace if necessary with bump_trivia. function bump_glue(stream::ParseStream, kind, flags, num_tokens) span = TaggedRange(SyntaxHead(kind, flags), first_byte(stream.lookahead[1]), - last_byte(stream.lookahead[num_tokens])) + last_byte(stream.lookahead[num_tokens]), + lastindex(stream.ranges) + 1) Base._deletebeg!(stream.lookahead, num_tokens) - push!(stream.spans, span) + push!(stream.ranges, span) return position(stream) end @@ -359,31 +353,37 @@ example whether .+ should be a single token or a composite (. +) """ function bump_split(stream::ParseStream, num_bytes, kind1, flags1, kind2, flags2) tok = popfirst!(stream.lookahead) - push!(stream.spans, TaggedRange(SyntaxHead(kind1, flags1), - first_byte(tok), first_byte(tok)+num_bytes-1)) - push!(stream.spans, TaggedRange(SyntaxHead(kind2, flags2), - first_byte(tok)+num_bytes, last_byte(tok))) - nothing # position(stream) is ambiguous here, as it involves two spans + push!(stream.ranges, TaggedRange(SyntaxHead(kind1, flags1), + first_byte(tok), first_byte(tok)+num_bytes-1, + lastindex(stream.ranges) + 1)) + push!(stream.ranges, TaggedRange(SyntaxHead(kind2, flags2), + first_byte(tok)+num_bytes, last_byte(tok), + lastindex(stream.ranges) + 1)) + # Returning position(stream) like the other bump* methods would be + # ambiguous here; return nothing instead. + nothing end """ Reset kind or flags of an existing node in the output stream -This is a hack, but necessary on some occasions -* When some trailing syntax may change the kind or flags of the token -* When an invisible token might be required - see bump_invisible with K"TOMBSTONE" +This is a hack, but in some limited occasions the trailing syntax may change +the kind or flags of a token in a way which would require unbounded lookahead +in a recursive descent parser. Modifying the output with reset_node! is useful +in those cases. """ function reset_node!(stream::ParseStream, mark::ParseStreamPosition; kind=nothing, flags=nothing) - text_span = stream.spans[mark.output_index] - k = isnothing(kind) ? (@__MODULE__).kind(text_span) : kind - f = isnothing(flags) ? (@__MODULE__).flags(text_span) : flags - stream.spans[mark.output_index] = - TaggedRange(SyntaxHead(k, f), first_byte(text_span), last_byte(text_span)) + range = stream.ranges[mark.output_index] + k = isnothing(kind) ? (@__MODULE__).kind(range) : kind + f = isnothing(flags) ? (@__MODULE__).flags(range) : flags + stream.ranges[mark.output_index] = + TaggedRange(SyntaxHead(k, f), first_byte(range), last_byte(range), + range.start_mark) end function Base.position(stream::ParseStream) - ParseStreamPosition(stream.next_byte, lastindex(stream.spans)) + ParseStreamPosition(stream.next_byte, lastindex(stream.ranges)) end """ @@ -395,14 +395,25 @@ should be a previous return value of `position()`. """ function emit(stream::ParseStream, mark::ParseStreamPosition, kind::Kind, flags::RawFlags = EMPTY_FLAGS; error=nothing) - text_span = TaggedRange(SyntaxHead(kind, flags), mark.input_byte, stream.next_byte-1) + range = TaggedRange(SyntaxHead(kind, flags), mark.input_byte, + stream.next_byte-1, mark.output_index+1) if !isnothing(error) - push!(stream.diagnostics, Diagnostic(text_span, error)) + _emit_diagnostic(stream, first_byte(range), last_byte(range), error=error) end - push!(stream.spans, text_span) + push!(stream.ranges, range) return position(stream) end +function _emit_diagnostic(stream::ParseStream, fbyte, lbyte; + error=nothing, warning=nothing) + message = !isnothing(error) ? error : + !isnothing(warning) ? warning : + error("No message in diagnostic") + level = !isnothing(error) ? :error : :warning + push!(stream.diagnostics, Diagnostic(fbyte, lbyte, level, message)) + return nothing +end + """ Emit a diagnostic at the position of the next token @@ -411,8 +422,7 @@ the next token. Otherwise it's positioned at the next token as returned by `peek FIXME: Rename? This doesn't emit normal tokens into the output event list! """ -function emit_diagnostic(stream::ParseStream, mark=nothing, end_mark=nothing; - error, whitespace=false) +function emit_diagnostic(stream::ParseStream; whitespace=false, kws...) i = _lookahead_index(stream, 1, true) begin_tok_i = i end_tok_i = i @@ -422,63 +432,92 @@ function emit_diagnostic(stream::ParseStream, mark=nothing, end_mark=nothing; begin_tok_i = 1 end_tok_i = is_whitespace(stream.lookahead[i]) ? i : max(1, i-1) end - fbyte = isnothing(mark) ? - first_byte(stream.lookahead[begin_tok_i]) : mark.input_byte - lbyte = isnothing(end_mark) ? - last_byte(stream.lookahead[end_tok_i]) : end_mark.input_byte - # It's a bit weird to require supplying a SyntaxHead here... - text_span = TaggedRange(SyntaxHead(K"error", EMPTY_FLAGS), fbyte, lbyte) - push!(stream.diagnostics, Diagnostic(text_span, error)) + fbyte = first_byte(stream.lookahead[begin_tok_i]) + lbyte = last_byte(stream.lookahead[end_tok_i]) + _emit_diagnostic(stream, fbyte, lbyte; kws...) return nothing end +function emit_diagnostic(stream::ParseStream, mark::ParseStreamPosition; kws...) + _emit_diagnostic(stream, mark.input_byte, stream.next_byte-1; kws...) +end + function emit_diagnostic(stream::ParseStream, r::NTuple{2,ParseStreamPosition}; kws...) emit_diagnostic(stream, first(r), last(r); kws...) end +function emit_diagnostic(stream::ParseStream, mark::ParseStreamPosition, + end_mark::ParseStreamPosition; kws...) + _emit_diagnostic(stream, mark.input_byte, end_mark.input_byte-1; kws...) +end + #------------------------------------------------------------------------------- -# Tree construction from the list of text spans held by ParseStream +# Tree construction from the list of text ranges held by ParseStream # # Note that this is largely independent of GreenNode, and could easily be # made completely independent with a tree builder interface. -function _push_node!(stack, text_span::TaggedRange, children=nothing) - if isnothing(children) - node = GreenNode(head(text_span), span(text_span)) - push!(stack, (text_span=text_span, node=node)) - else - node = GreenNode(head(text_span), span(text_span), children) - push!(stack, (text_span=text_span, node=node)) - end -end +""" + build_tree(::Type{NodeType}, stream::ParseStream; + wrap_toplevel_as_kind=nothing) + +Construct a tree with `NodeType` nodes from a ParseStream using depth-first +traversal. `NodeType` must have the constructors -function to_raw_tree(st; wrap_toplevel_as_kind=nothing) - stack = Vector{@NamedTuple{text_span::TaggedRange,node::GreenNode}}() - for text_span in st.spans - if kind(text_span) == K"TOMBSTONE" + NodeType(head::SyntaxHead, span::Integer) + NodeType(head::SyntaxHead, span::Integer, children::Vector{NodeType}) + +A single node which covers the input is expected, but if the ParseStream has +multiple nodes at the top level, `wrap_toplevel_as_kind` may be used to wrap +them in a single node. + +The tree here is constructed depth-first, but it would also be possible to use +a bottom-up tree builder interface similar to rust-analyzer. (In that case we'd +traverse the list of ranges backward rather than forward.) +""" +function build_tree(::Type{NodeType}, stream::ParseStream; + wrap_toplevel_as_kind=nothing) where NodeType + stack = Vector{@NamedTuple{range::TaggedRange, node::NodeType}}() + for (span_index, range) in enumerate(stream.ranges) + if kind(range) == K"TOMBSTONE" # Ignore invisible tokens which were created but never finalized. # See bump_invisible() continue end - if isempty(stack) || first_byte(text_span) > last_byte(stack[end].text_span) + if isempty(stack) || range.start_mark > stack[end].range.start_mark # A leaf node (span covering a single token): # [a][b][stack[end]] - # [text_span] - _push_node!(stack, text_span) + # [range] + node = NodeType(head(range), span(range)) + push!(stack, (range=range, node=node)) continue end # An interior node, span covering multiple tokens: # # [a][b][stack[end]] - # [ text_span] + # [ range] + # + # We use start_mark rather than first_byte to determine node overlap. + # This solve the following ambiguity between invisible nodes 1 and 2: + # + # [a][b]|[...] + # |--- invisible node 1 + # `--- invisible node 2 + # + # Does node 2 contain node 1? Using start_mark, we can distinguish the + # cases: + # + # [a][b][2][1] [a][b][2]... + # [ 1] j = length(stack) - while j > 1 && first_byte(text_span) <= first_byte(stack[j-1].text_span) + while j > 1 && range.start_mark < stack[j].range.start_mark j -= 1 end children = [stack[k].node for k = j:length(stack)] resize!(stack, j-1) - _push_node!(stack, text_span, children) + node = NodeType(head(range), span(range), children) + push!(stack, (range=range, node=node)) end # show(stdout, MIME"text/plain"(), stack[1].node) if length(stack) == 1 @@ -557,16 +596,12 @@ end function peek(ps::ParseState, n=1; skip_newlines=nothing) skip_nl = isnothing(skip_newlines) ? ps.whitespace_newline : skip_newlines - peek(ps.stream, n, skip_nl) + peek(ps.stream, n; skip_newlines=skip_nl) end function peek_token(ps::ParseState, n=1; skip_newlines=nothing) skip_nl = isnothing(skip_newlines) ? ps.whitespace_newline : skip_newlines - peek_token(ps.stream, n, skip_nl) -end - -function peek_equal_to(ps::ParseState, args...) - peek_equal_to(ps.stream, args...) + peek_token(ps.stream, n, skip_newlines=skip_nl) end function peek_behind_str(ps::ParseState, args...) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index a038c4a6d46b6..12b429d0d04ed 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -1769,11 +1769,89 @@ function parse_function(ps::ParseState) emit(ps, mark, word) end +# Parse a try block +# +# try \n x \n catch e \n y \n finally \n z end ==> (try (block x) e (block y) false (block z)) +#v1.8: try \n x \n catch e \n y \n else z finally \n w end ==> (try (block x) e (block y) (block z) (block w)) +# +# flisp: embedded in parse_resword function parse_try(ps) - TODO("parse_try") mark = position(ps) + bump(ps, TRIVIA_FLAG) + parse_block(ps) + has_catch = false + has_else = false + has_finally = false + bump_trivia(ps) + flags = EMPTY_FLAGS + bump_trivia(ps) + if peek(ps) == K"catch" + has_catch = true + parse_catch(ps) + else + bump_invisible(ps, K"false") + bump_invisible(ps, K"false") + end + bump_trivia(ps) + if peek(ps) == K"else" + # catch-else syntax: https://github.com/JuliaLang/julia/pull/42211 + # + #v1.8: try catch ; else end ==> (try (block) false (block) (block) false) + has_else = true + else_mark = position(ps) + bump(ps, TRIVIA_FLAG) + parse_block(ps) + if !has_catch + #v1.8: try else end ==> (try (block) false false (error (block)) false) + emit(ps, else_mark, K"error", error="Expected `catch` before `else`") + end + if ps.julia_version < v"1.8" + #v1.7: try catch ; else end ==> (try (block) false (block) (error (block)) false) + emit(ps, else_mark, K"error", + error="`else` in `try` requires at least Julia 1.8") + end + else + bump_invisible(ps, K"false") + end + bump_trivia(ps) + if peek(ps) == K"finally" + # try x finally y end ==> (try (block x) false false false (block y)) + has_finally = true + bump(ps, TRIVIA_FLAG) + parse_block(ps) + else + bump_invisible(ps, K"false") + end + # Wart: the flisp parser allows finally before catch, the *opposite* order + # in which these blocks execute. + bump_trivia(ps) + if !has_catch && peek(ps) == K"catch" + # try x finally y catch e z end ==> (try (block x) false false false (block y) e (block z)) + flags |= TRY_CATCH_AFTER_FINALLY_FLAG + m = position(ps) + parse_catch(ps) + emit_diagnostic(ps, m, position(ps), + warning="`catch` after `finally` will execute out of order") + end + bump_closing_token(ps, K"end") + emit(ps, mark, K"try", flags) end -# + +function parse_catch(ps::ParseState) + bump(ps, TRIVIA_FLAG) + k = peek(ps) + if k in (K";", K"NewlineWs") || is_closing_token(ps, k) + # try x catch ; y end ==> (try (block x) false (block y) false false) + # try x catch \n y end ==> (try (block x) false (block y) false false) + bump_invisible(ps, K"false") + bump(ps, TRIVIA_FLAG) + else + # try x catch e y end ==> (try (block x) e (block y) false false) + parse_identifier_or_interpolate(ps) + end + parse_block(ps) +end + # flisp: parse-do function parse_do(ps::ParseState) ps = normal_context(ps) diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index 8fd6b62967387..06cf497677c4e 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -4,9 +4,13 @@ #------------------------------------------------------------------------------- const RawFlags = UInt32 -EMPTY_FLAGS = 0x00000000 -TRIVIA_FLAG = 0x00000001 -INFIX_FLAG = 0x00000002 +EMPTY_FLAGS = RawFlags(0) +TRIVIA_FLAG = RawFlags(1<<0) +# The following flags are head-specific and could probably be allowed to cover +# the same bits +INFIX_FLAG = RawFlags(1<<1) +# try-finally-catch +TRY_CATCH_AFTER_FINALLY_FLAG = RawFlags(1<<2) # ERROR_FLAG = 0x80000000 struct SyntaxHead @@ -17,8 +21,10 @@ end kind(head::SyntaxHead) = head.kind flags(head::SyntaxHead) = head.flags -istrivia(head::SyntaxHead) = flags(head) & TRIVIA_FLAG != 0 -isinfix(head::SyntaxHead) = flags(head) & INFIX_FLAG != 0 +istrivia(head::SyntaxHead) = hasflags(head, TRIVIA_FLAG) +isinfix(head::SyntaxHead) = hasflags(head, INFIX_FLAG) +hasflags(head::SyntaxHead, flags_) = (flags(head) & flags_) == flags_ + iserror(head::SyntaxHead) = kind(head) == K"error" function Base.summary(head::SyntaxHead) @@ -151,6 +157,7 @@ end iserror(node::SyntaxNode) = iserror(node.raw) istrivia(node::SyntaxNode) = istrivia(node.raw) +hasflags(node::SyntaxNode, f) = hasflags(head(node.raw), f) head(node::SyntaxNode) = node.head kind(node::SyntaxNode) = kind(node.raw) @@ -311,15 +318,43 @@ function _to_expr(node::SyntaxNode) line_node = source_location(LineNumberNode, node.source, node.position) insert!(args, 2, line_node) elseif head(node) == :call + # Move parameters block to args[2] if length(args) > 1 && Meta.isexpr(args[end], :parameters) insert!(args, 2, args[end]) pop!(args) end elseif head(node) == :tuple || head(node) == :parameters + # Move parameters blocks to args[1] if length(args) > 1 && Meta.isexpr(args[end], :parameters) pushfirst!(args, args[end]) pop!(args) end + elseif head(node) == :try + # Try children in source order: + # try_block catch_var catch_block else_block finally_block + # Expr ordering: + # try_block catch_var catch_block [finally_block] [else_block] + catch_ = nothing + if hasflags(node, TRY_CATCH_AFTER_FINALLY_FLAG) + catch_ = pop!(args) + catch_var = pop!(args) + end + finally_ = pop!(args) + else_ = pop!(args) + if hasflags(node, TRY_CATCH_AFTER_FINALLY_FLAG) + pop!(args) + pop!(args) + push!(args, catch_var) + push!(args, catch_) + end + # At this poin args is + # [try_block catch_var catch_block] + if finally_ !== false + push!(args, finally_) + end + if else_ !== false + push!(args, else_) + end end if head(node) == :inert || (head(node) == :quote && length(args) == 1 && !(only(args) isa Expr)) @@ -354,7 +389,7 @@ function parse_all(::Type{Expr}, code::AbstractString; filename="none") @error Text(String(take!(buf))) end - green_tree = to_raw_tree(stream, wrap_toplevel_as_kind=K"toplevel") + green_tree = build_tree(GreenNode, stream, wrap_toplevel_as_kind=K"toplevel") tree = SyntaxNode(source_file, green_tree) diff --git a/JuliaSyntax/test/parse_stream.jl b/JuliaSyntax/test/parse_stream.jl index 24fb10365231a..0b7590c7eb691 100644 --- a/JuliaSyntax/test/parse_stream.jl +++ b/JuliaSyntax/test/parse_stream.jl @@ -68,7 +68,7 @@ st = ParseStream(code) emit(st, p1, K"toplevel") end -@test JuliaSyntax.to_raw_tree(st) isa JuliaSyntax.GreenNode +@test JuliaSyntax.build_tree(GreenNode, st) isa JuliaSyntax.GreenNode # ## Input code #= diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 8c47edc2d00f8..5ccff9e995110 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -1,7 +1,7 @@ -function test_parse(production, code) +function test_parse(production, code; v=v"1.6") stream = ParseStream(code) - production(JuliaSyntax.ParseState(stream)) - t = JuliaSyntax.to_raw_tree(stream, wrap_toplevel_as_kind=K"Nothing") + production(JuliaSyntax.ParseState(stream; julia_version=v)) + t = JuliaSyntax.build_tree(GreenNode, stream, wrap_toplevel_as_kind=K"Nothing") source = SourceFile(code) s = SyntaxNode(source, t) if JuliaSyntax.kind(s) == K"Nothing" @@ -12,10 +12,10 @@ function test_parse(production, code) end # Version of test_parse for interactive exploration -function itest_parse(production, code) +function itest_parse(production, code, julia_version::VersionNumber) stream = ParseStream(code) - production(JuliaSyntax.ParseState(stream)) - t = JuliaSyntax.to_raw_tree(stream, wrap_toplevel_as_kind=K"toplevel") + production(JuliaSyntax.ParseState(stream; julia_version)) + t = JuliaSyntax.build_tree(GreenNode, stream, wrap_toplevel_as_kind=K"toplevel") println(stdout, "# Code:\n$code\n") @@ -38,8 +38,9 @@ function itest_parse(production, code) printstyled(stdout, "\n\n# flisp Julia Expr:\n", color=:red) show(stdout, MIME"text/plain"(), f_ex) - return (code, stream, t, s, ex) + # return (code, stream, t, s, ex) end + nothing end # TODO: @@ -326,6 +327,22 @@ tests = [ "function f() \n a \n b end" => "(function (call :f) (block :a :b))" "function f() end" => "(function (call :f) (block))" ], + JuliaSyntax.parse_try => [ + "try \n x \n catch e \n y \n finally \n z end" => + "(try (block :x) :e (block :y) false (block :z))" + ((v=v"1.8",), "try \n x \n catch e \n y \n else z finally \n w end") => + "(try (block :x) :e (block :y) (block :z) (block :w))" + "try x catch ; y end" => "(try (block :x) false (block :y) false false)" + "try x catch \n y end" => "(try (block :x) false (block :y) false false)" + "try x catch e y end" => "(try (block :x) :e (block :y) false false)" + "try x finally y end" => "(try (block :x) false false false (block :y))" + # v1.8 only + ((v=v"1.8",), "try catch ; else end") => "(try (block) false (block) (block) false)" + ((v=v"1.8",), "try else end") => "(try (block) false false (error (block)) false)" + ((v=v"1.7",), "try catch ; else end") => "(try (block) false (block) (error (block)) false)" + # finally before catch :-( + "try x finally y catch e z end" => "(try (block :x) false false false (block :y) :e (block :z))" + ], JuliaSyntax.parse_iteration_spec => [ "i = rhs" => "(= :i :rhs)" "i in rhs" => "(= :i :rhs)" @@ -380,7 +397,12 @@ tests = [ @testset "Inline test cases" begin @testset "$production" for (production, test_specs) in tests @testset "$(repr(input))" for (input,output) in test_specs - @test test_parse(production, input) == output + if !(input isa AbstractString) + opts,input = input + else + opts = (;) + end + @test test_parse(production, input; opts...) == output end end end diff --git a/JuliaSyntax/test/simple_parser.jl b/JuliaSyntax/test/simple_parser.jl index 699b44f01a7fd..9ca3f8c9b02d7 100644 --- a/JuliaSyntax/test/simple_parser.jl +++ b/JuliaSyntax/test/simple_parser.jl @@ -138,7 +138,7 @@ end function parse_and_show(production::Function, code) st = ParseStream(code) production(st) - t = JuliaSyntax.to_raw_tree(st, wrap_toplevel_as_kind=K"error") + t = JuliaSyntax.build_tree(GreenNode, st, wrap_toplevel_as_kind=K"error") show(stdout, MIME"text/plain"(), t, code, show_trivia=true) if !isempty(st.diagnostics) println() From 644aafd3fa0c1ff4e1b6b6e3f679e259bf37fbb7 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Thu, 23 Dec 2021 20:28:33 +1000 Subject: [PATCH 0257/1109] Partially implement parse_cat + some cleanup * Implement the parse_vect part of parse_cat so we can now index into arrays. * Also implement parse_generator * Clean up references to flisp function names * Various small cleanups and fixes to make the code clean from JET optimization warnings. --- JuliaSyntax/README.md | 39 +++- JuliaSyntax/src/JuliaSyntax.jl | 2 + JuliaSyntax/src/parse_stream.jl | 4 +- JuliaSyntax/src/parser.jl | 365 ++++++++++++++++---------------- JuliaSyntax/src/syntax_tree.jl | 25 ++- JuliaSyntax/src/token_kinds.jl | 6 +- JuliaSyntax/test/parser.jl | 30 ++- 7 files changed, 277 insertions(+), 194 deletions(-) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index 3e924360c4428..00d85fa471599 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -464,7 +464,10 @@ useful even for DSLs: * In try-catch-finally, the `finally` clause is allowed before the `catch`, but always executes afterward. (Presumably was this a mistake? It seems pretty awful!) -## Parsing oddities and warts +* When parsing `"[x \n\n ]"` the flisp parser gets confused, but `"[x \n ]"` is + parsed as `Expr(:vect)` + +## Parsing / AST oddities and warts There's many apparent inconsistencies between how `kw` and `=` are used when parsing `key=val` pairs inside parentheses. @@ -508,3 +511,37 @@ Other oddities: Presumably because of the need to add a line number node in the flisp parser `if a xx elseif b yy end ==> (if a (block xx) (elseif (block b) (block yy)))` + +Flattened generators are hard because the Julia AST doesn't respect a key +rule we normally expect: that the children of an AST node are a contiguous +range in the source text. This is because the `for`s in +`[xy for x in xs for y in ys]` are parsed in the normal order of a for loop as + +``` +for x in xs + for y in ys + push!(xy, collection) +``` + +and the standard Julia AST is like this: + +``` +(flatten + (generator + (generator + xy + (= y ys)) + (= x xs)) +``` + +however, note that if this tree were flattened, the order of tokens would be +`(xy) (y in ys) (x in xs)` which is *not* the source order. So in this case +our tree needs to deviate from the Julia AST. The natural representation seems +to be to flatten the generators: + +``` +(flatten + xy + (= x xs) + (= y ys)) +``` diff --git a/JuliaSyntax/src/JuliaSyntax.jl b/JuliaSyntax/src/JuliaSyntax.jl index 1baf970536cff..602cd610d5fb8 100644 --- a/JuliaSyntax/src/JuliaSyntax.jl +++ b/JuliaSyntax/src/JuliaSyntax.jl @@ -17,4 +17,6 @@ include("parse_stream.jl") include("parser.jl") +# include("hooks.jl") + end diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index 706c72e9b9b66..8d4989c397dce 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -199,7 +199,7 @@ Like `peek`, but return the full token information rather than just the kind. function peek_token(stream::ParseStream, n::Integer=1; skip_newlines=false) stream.peek_count += 1 if stream.peek_count > 100_000 - error("The parser seems stuck at byte $(position(stream))") + error("The parser seems stuck at byte $(stream.next_byte)") end stream.lookahead[_lookahead_index(stream, n, skip_newlines)] end @@ -292,7 +292,7 @@ end Shift the current token from the input to the output, adding the given flags. """ function bump(stream::ParseStream, flags=EMPTY_FLAGS; skip_newlines=false, - error=nothing, remap_kind=K"Nothing") + error=nothing, remap_kind::Kind=K"Nothing") emark = position(stream) _bump_n(stream, _lookahead_index(stream, 1, skip_newlines), flags, remap_kind) if !isnothing(error) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 12b429d0d04ed..abed62a928efa 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -220,7 +220,7 @@ end # Returns true if the block was nontrivial and a node needs to be emitted by # the caller. # -# flisp: (define (parse-Nary s down ops head closer? add-linenums) +# flisp: parse-Nary function parse_Nary(ps::ParseState, down, delimiters, closing_tokens) bump_trivia(ps) k = peek(ps) @@ -264,7 +264,7 @@ end # end # ==> (block a b) # -# flisp: (define (parse-block s (down parse-eq)) +# flisp: parse-block function parse_block(ps::ParseState, down=parse_eq, mark=position(ps), consume_end=false) parse_block_inner(ps::ParseState, down) @@ -282,7 +282,7 @@ end # a;b;c ==> (toplevel a b c) # a;;;b;; ==> (toplevel a b) # -# flisp: (define (parse-stmts s) +# flisp: parse-stmts function parse_stmts(ps::ParseState) mark = position(ps) do_emit = parse_Nary(ps, parse_docstring, (K";",), (K"NewlineWs",)) @@ -301,7 +301,7 @@ function parse_stmts(ps::ParseState) end end -# flisp: (define (parse-eq s) (parse-assignment s parse-comma)) +# flisp: parse-eq function parse_eq(ps::ParseState) parse_assignment(ps, parse_comma, false) end @@ -311,7 +311,7 @@ end # If an `(= x y)` node was emitted, returns the position of that node in the # output list so that it can be changed to `(kw x y)` later if necessary. # -# flisp: (define (parse-eq* s) +# flisp: parse-eq* function parse_eq_star(ps::ParseState, equals_is_kw=false) k = peek(ps) k2 = peek(ps,2) @@ -325,14 +325,14 @@ function parse_eq_star(ps::ParseState, equals_is_kw=false) end end -# flisp: (define (eventually-call? ex) +# flisp: eventually-call? function is_eventually_call(ex) TODO("is_eventually_call unimplemented") end # a = b ==> (= a b) # -# flisp: (define (parse-assignment s down) +# flisp: parse-assignment function parse_assignment(ps::ParseState, down, equals_is_kw::Bool) mark = position(ps) down(ps) @@ -366,7 +366,7 @@ end # parse_comma is needed for commas outside parens, for example a = b,c # -# flisp: (define (parse-comma s) +# flisp: parse-comma function parse_comma(ps::ParseState, do_emit=true) mark = position(ps) n_commas = 0 @@ -393,7 +393,7 @@ function parse_comma(ps::ParseState, do_emit=true) end end -# flisp: (define (parse-pair s) (parse-RtoL s parse-cond is-prec-pair? #f parse-pair)) +# flisp: parse-pair function parse_pair(ps::ParseState) parse_RtoL(ps, parse_cond, is_prec_pair, false, parse_pair) end @@ -401,7 +401,7 @@ end # Parse short form conditional expression # a ? b : c ==> (if a b c) # -# flisp: (define (parse-cond s) +# flisp: parse-cond function parse_cond(ps::ParseState) mark = position(ps) parse_arrow(ps) @@ -478,7 +478,7 @@ function parse_comparison(ps::ParseState) mark = position(ps) parse_pipe_lt(ps) n_comparisons = 0 - op_pos = 0 + op_pos = NO_POSITION initial_kind = peek(ps) while is_prec_comparison(peek(ps)) n_comparisons += 1 @@ -605,7 +605,7 @@ end # parse left to right chains of a given binary operator # -# flisp: (define (parse-chain s down op) +# flisp: parse-chain function parse_chain(ps::ParseState, down, op_kind) while (t = peek_token(ps); kind(t) == op_kind) if ps.space_sensitive && t.had_whitespace && @@ -722,7 +722,7 @@ function parse_where_chain(ps0::ParseState, mark) end end -# flisp: (define (parse-where s down) +# flisp: parse-where function parse_where(ps::ParseState, down) # `where` needs to be below unary for the following to work # +(x::T,y::T) where {T} = x @@ -795,7 +795,7 @@ end # Deal with numeric literal prefixes and unary calls # -# flisp: (define (parse-unary s) +# flisp: parse-unary function parse_unary(ps::ParseState) mark = position(ps) bump_trivia(ps) @@ -890,11 +890,12 @@ function parse_unary_call(ps::ParseState) # +(a;b) ==> (call + (block a b)) # +(a=1) ==> (call + (= a 1)) # - # However this heuristic fails in some cases: - # +(a;b,c) ??> (call + (tuple a (parameters b c))) + # But this heuristic fails in some cases so here we use a simpler rule: + # if there were any commas, it was a function call. Then we also parse + # things like the following in a useful way: + # + # +(a;b,c) ==> (call + (tuple a (parameters b c))) # - # Here we use a simpler rule: if there were any commas, it was a - # function call. is_call = false is_block = false parse_brackets(ps, K")") do had_commas, num_semis, num_subexprs @@ -954,7 +955,7 @@ end # flisp: parse-factor-with-initial-ex function parse_factor_with_initial_ex(ps::ParseState, mark) - parse_call_with_initial_ex(ps, mark) + parse_call_chain(ps, mark) parse_decl_with_initial_ex(ps, mark) if is_prec_power(peek(ps)) bump(ps) @@ -1009,16 +1010,10 @@ function parse_call(ps::ParseState) # f(x) ==> (call f x) # $f(x) ==> (call ($ f) x) parse_unary_prefix(ps) - parse_call_with_initial_ex(ps, mark) + parse_call_chain(ps, mark) end end -# flisp: parse-call-with-initial-ex -function parse_call_with_initial_ex(ps::ParseState, mark) - # FIXME: Remove parse_call_with_initial_ex which is redundant now? - parse_call_chain(ps, mark) -end - # parse syntactic unary operators # # &a ==> (& a) @@ -1110,7 +1105,7 @@ end # f(a,b) ==> (call f a b) # f(a).g(b) ==> (call (. (call f a) (quote g)) b) # -# flisp: (define (parse-call-chain s ex macrocall?) +# flisp: parse-call-chain, parse-call-with-initial-ex function parse_call_chain(ps::ParseState, mark, is_macrocall=false) if is_number(peek_behind(ps)) && peek(ps) == K"(" # juxtaposition with numbers is multiply, not call @@ -1192,22 +1187,22 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) end bump_disallowed_space(ps) bump(ps, TRIVIA_FLAG) - parse_cat(ParseState(ps, end_symbol=true), K"]") + ckind = parse_cat(ParseState(ps, end_symbol=true), K"]", ps.end_symbol) + # a[i] ==> (ref a i) + # a[i,j] ==> (ref a i j) + # T[x for x in xs] ==> (typed_comprehension T (generator x (= x xs))) + # TODO: other test cases + out_kind = ckind == K"vect" ? K"ref" : + ckind == K"hcat" ? K"typed_hcat" : + ckind == K"vcat" ? K"typed_vcat" : + ckind == K"comprehension" ? K"typed_comprehension" : + ckind == K"ncat" ? K"typed_ncat" : + error("Unrecognized kind in parse_cat") + emit(ps, mark, out_kind) if is_macrocall emit(ps, mark, K"macrocall") break end - # ref is syntax, so we can distinguish - # a[i] = x from - # ref(a,i) = x - # - # FIXME: Big list of rewrites - # - # vect -> ref - # hcat -> typed_hcat - # vcat -> typed_vcat - # comprehension -> typed_comprehension - # ncat -> typed_ncat elseif k == K"." bump_disallowed_space(ps) if peek(ps, 2) == K"'" @@ -1872,7 +1867,7 @@ function parse_do(ps::ParseState) emit(ps, mark, K"->") end -# flisp: (define (parse-imports s word) +# flisp: parse-imports function parse_imports(ps::ParseState, word) TODO("parse_imports unimplemented") end @@ -1901,22 +1896,22 @@ function parse_macro_name(ps::ParseState; remap_kind=false) end end -# flisp: (define (parse-atsym s) +# flisp: parse-atsym function parse_atsym(ps::ParseState) TODO("parse_atsym unimplemented") end -# flisp: (define (parse-import-dots s) +# flisp: parse-import-dots function parse_import_dots(ps::ParseState) TODO("parse_import_dots unimplemented") end -# flisp: (define (parse-import-path s word) +# flisp: parse-import-path function parse_import_path(ps::ParseState, word) TODO("parse_import_path unimplemented") end -# flisp: (define (parse-import s word from) +# flisp: parse-import function parse_import(ps::ParseState, word, from) TODO("parse_import unimplemented") end @@ -1938,7 +1933,7 @@ function parse_comma_separated(ps::ParseState, down) return n_subexprs end -# flisp: (define (parse-comma-separated-assignments s) +# flisp: parse-comma-separated-assignments function parse_comma_separated_assignments(ps::ParseState) TODO("parse_comma_separated_assignments unimplemented") end @@ -1962,7 +1957,7 @@ end # i = 1:10 ==> (= i (call : 1 10)) # (i,j) in iter ==> (= (tuple i j) iter) # -# flisp: (define (parse-iteration-spec s) +# flisp: parse-iteration-spec function parse_iteration_spec(ps::ParseState) mark = position(ps) k = peek(ps) @@ -1995,7 +1990,7 @@ function parse_comma_separated_iters(ps::ParseState) parse_comma_separated(ps, parse_iteration_spec) end -# flisp: (define (parse-space-separated-exprs s) +# flisp: parse-space-separated-exprs function parse_space_separated_exprs(ps::ParseState) with_space_sensitive(ps) do ps n_sep = 0 @@ -2012,16 +2007,6 @@ function parse_space_separated_exprs(ps::ParseState) end end -# flisp: (define (has-parameters? lst) -function is_has_parameters(lst) - TODO("is_has_parameters unimplemented") -end - -# flisp: (define (to-kws lst) -function to_kws(lst) - TODO("to_kws unimplemented") -end - # like parse-arglist, but with `for` parsed as a generator # # flisp: parse-call-arglist @@ -2036,44 +2021,119 @@ function parse_call_arglist(ps::ParseState, closer, is_macrocall) end end +# Parse the suffix of comma-separated array expressions such as +# [x, suffix]. Consumes `closer`, but does not emit the AST node for the +# surrounding brackets. +# # flisp: parse-vect -function parse_vect(ps::ParseState, first, closer) - TODO("parse_vect unimplemented") +function parse_vect(ps::ParseState, closer) + # [x, y] ==> (vect x y) + # [x, y] ==> (vect x y) + # [x,y ; z] ==> (vect x y (parameters z)) + # [x=1, y=2] ==> (vect (= x 1) (= y 2)) + # [x=1, ; y=2] ==> (vect (= x 1) (parameters (= y 2))) + parse_brackets(ps, closer) do _, _, _ + bump_closing_token(ps, closer) + return (needs_parameters=true, + eq_is_kw_before_semi=false, + eq_is_kw_after_semi=false) + end + return K"vect" end -# flisp: (define (parse-generator s first) -function parse_generator(ps::ParseState, first) - TODO("parse_generator unimplemented") +# Flattened generators are hard because the Julia AST doesn't respect a key +# rule we normally expect: that the children of an AST node are a contiguous +# range in the source text. This is because the `for`s in +# `[xy for x in xs for y in ys]` are parsed in the normal order of a for as +# +# (flatten +# (generator +# (generator +# xy +# y in ys) +# x in xs)) +# +# A reasonable way to deal with this is to emit only the flatten: +# +# (flatten xy (= x xs) (= y ys)) +# +# then reconstruct the nested generators when converting to Expr. +# +# flisp: parse-generator +function parse_generator(ps::ParseState, mark, flatten=false) + # (x for x in xs) ==> (generator x (= x xs)) + t = peek_token(ps) + if !t.had_whitespace + # [(x)for x in xs] ==> (comprehension (generator x (error) (= x xs))) + bump_invisible(ps, K"error", TRIVIA_FLAG, + error="Expected space before `for` in generator") + end + @assert kind(t) == K"for" + bump(ps, TRIVIA_FLAG) + filter_mark = position(ps) + parse_comma_separated_iters(ps) + if peek(ps) == K"if" + bump(ps, TRIVIA_FLAG) + parse_cond(ps) + emit(ps, filter_mark, K"filter") + end + t = peek_token(ps) + if kind(t) == K"for" + # [xy for x in xs for y in ys] ==> (comprehension (flatten xy (= x xs) (= y ys))) + parse_generator(ps, mark, true) + emit(ps, mark, K"flatten") + elseif !flatten + emit(ps, mark, K"generator") + end end -# flisp: (define (parse-comprehension s first closer) -function parse_comprehension(ps::ParseState, first, closer) - TODO("parse_comprehension unimplemented") +# flisp: parse-comprehension +function parse_comprehension(ps::ParseState, mark, closer) + ps = ParseState(ps, whitespace_newline=true, + space_sensitive=false) + parse_generator(ps, mark) + bump_closing_token(ps, closer) + return K"comprehension" end -# flisp: (define (parse-array s first closer gotnewline last-end-symbol) -function parse_array(ps::ParseState, first, closer, gotnewline, last_end_symbol) +# flisp: parse-array +function parse_array(ps::ParseState, closer, gotnewline, end_is_symbol) TODO("parse_array unimplemented") end -# flisp: (define (expect-space-before s t) -function expect_space_before(s, t) - TODO("expect_space_before unimplemented") -end - -# Parse syntax inside of `[]` or `{}` +# Parse array concatenation/construction/indexing syntax inside of `[]` or `{}`. # -# flisp: (define (parse-cat s closer last-end-symbol) -function parse_cat(ps::ParseState, closer, last_end_symbol) - TODO("parse_cat unimplemented") - ps = ParseState(ps0, range_colon_enabled=true, +# flisp: parse-cat +function parse_cat(ps::ParseState, closer, end_is_symbol) + ps = ParseState(ps, range_colon_enabled=true, space_sensitive=true, where_enabled=true, whitespace_newline=false, for_generator=true) - if require_token(ps) == closer - take_token!(ps) - return + k = peek(ps, skip_newlines=true) + if k == closer + # [] ==> (vect) + return parse_vect(ps, closer) + end + mark = position(ps) + parse_eq_star(ps) + k = peek(ps, skip_newlines=true) + if k in (K",", closer) + if k == K"," + # [x,] ==> (vect x) + bump(ps, TRIVIA_FLAG) + end + # [x] ==> (vect x) + # [x \n ] ==> (vect x) + parse_vect(ps, closer) + elseif k == K"for" + # [x for x in xs] ==> (comprehension (generator x (= x xs))) + # [x \n\n for x in xs] ==> (comprehension (generator x (= x xs))) + parse_comprehension(ps, mark, closer) + else + # [x y] ==> (hcat x y) + # and other forms; See parse_array. + parse_array(ps, closer, false, end_is_symbol) end end @@ -2116,6 +2176,10 @@ function parse_paren(ps::ParseState, check_identifiers=true) is_tuple = false is_block = false parse_brackets(ps, K")") do had_commas, num_semis, num_subexprs + # Parentheses used for grouping + # (a * b) ==> (call-i * a b) + # (a=1) ==> (= a 1) + # (x) ==> x is_tuple = had_commas || (initial_semi && (num_semis == 1 || num_subexprs > 0)) is_block = num_semis > 0 @@ -2153,9 +2217,9 @@ end # Handle bracketed syntax inside any of () [] or {} where there's a mixture # of commas and semicolon delimiters. # -# This is hard because there's various ambiguities depending on context. -# In general (X; Y) is difficult when X and Y are subexpressions possibly -# containing `,` and `=`. +# For parentheses this is hard because there's various ambiguities depending on +# context. In general (X; Y) is difficult when X and Y are subexpressions +# possibly containing `,` and `=`. # # For example, (a=1; b=2) could be seen to parse four different ways! # @@ -2169,7 +2233,7 @@ end # syntax so the parse tree is pretty strange in these cases! Some macros # probably use it though. Example: # -# (a,b=1; c,d=2; e,f=3) ==> (tuple a (= b 1) (parameters c (kw d 2) (parameters e (kw f 3)) +# (a,b=1; c,d=2; e,f=3) ==> (tuple a (= b 1) (parameters c (kw d 2) (parameters e (kw f 3)))) # # Deciding which of these representations to use depends on both the prefix # context and the contained expressions. To distinguish between blocks vs @@ -2195,7 +2259,7 @@ function parse_brackets(after_parse::Function, if k == closing_kind break elseif k == K";" - # Start of "parameters" list + # Start of parameters list # a, b; c d ==> a b (parameters c d) push!(params_marks, position(ps)) if num_semis == 0 @@ -2222,12 +2286,6 @@ function parse_brackets(after_parse::Function, continue elseif k == K"for" # Generator syntax - # (i for i in 1:10) - if !t.had_whitespace - bump_invisible(ps, K"error", - error="expected whitespace before for") - end - bump(ps, TRIVIA_FLAG) parse_generator(ps, mark) else k_str = untokenize(k) @@ -2266,74 +2324,49 @@ function parse_brackets(after_parse::Function, end end -# flisp: (define (not-eof-for delim c) -function not_eof_for(delim, c) - TODO("not_eof_for unimplemented") -end - -# flisp: (define (take-char p) -function take_char(p) - TODO("take_char unimplemented") -end - -# map the first element of lst -# -# flisp: (define (map-first f lst) -function map_first(f, lst) - TODO("map_first unimplemented") -end - -# map the elements of lst where (pred index) is true -# e.g., (map-at odd? (lambda (x) 0) '(a b c d)) -> '(a 0 c 0) -# -# flisp: (define (map-at pred f lst) -function map_at(pred, f, lst) - TODO("map_at unimplemented") -end - -# flisp: (define (parse-raw-literal s delim) +# flisp: parse-raw-literal function parse_raw_literal(ps::ParseState, delim) TODO("parse_raw_literal unimplemented") end -# flisp: (define (unescape-parsed-string-literal strs) +# flisp: unescape-parsed-string-literal function unescape_parsed_string_literal(strs) TODO("unescape_parsed_string_literal unimplemented") end -# flisp: (define (strip-escaped-newline s raw) +# flisp: strip-escaped-newline function strip_escaped_newline(s, raw) TODO("strip_escaped_newline unimplemented") end # remove `\` followed by a newline # -# flisp: (define (strip-escaped-newline- s) +# flisp: strip-escaped-newline- function strip_escaped_newline_(s) TODO("strip_escaped_newline_ unimplemented") end -# flisp: (define (parse-string-literal s delim raw) +# flisp: parse-string-literal function parse_string_literal(ps::ParseState, delim, raw) TODO("parse_string_literal unimplemented") end -# flisp: (define (strip-leading-newline s) +# flisp: strip-leading-newline function strip_leading_newline(s) TODO("strip_leading_newline unimplemented") end -# flisp: (define (dedent-triplequoted-string lst) +# flisp: dedent-triplequoted-string function dedent_triplequoted_string(lst) TODO("dedent_triplequoted_string unimplemented") end -# flisp: (define (triplequoted-string-indentation lst) +# flisp: triplequoted-string-indentation function triplequoted_string_indentation(lst) TODO("triplequoted_string_indentation unimplemented") end -# flisp: (define (triplequoted-string-indentation- s) +# flisp: triplequoted-string-indentation- function triplequoted_string_indentation_(s) TODO("triplequoted_string_indentation_ unimplemented") end @@ -2341,46 +2374,46 @@ end # return the longest common prefix of the elements of l # e.g., (longest-common-prefix ((1 2) (1 4))) -> (1) # -# flisp: (define (longest-common-prefix l) +# flisp: longest-common-prefix function longest_common_prefix(l) TODO("longest_common_prefix unimplemented") end # return the longest common prefix of lists a & b # -# flisp: (define (longest-common-prefix2 a b) +# flisp: longest-common-prefix2 function longest_common_prefix2(a, b) TODO("longest_common_prefix2 unimplemented") end -# flisp: (define (longest-common-prefix2- a b p) +# flisp: longest-common-prefix2- function longest_common_prefix2_(a, b, p) TODO("longest_common_prefix2_ unimplemented") end -# flisp: (define (string-split s sep) +# flisp: string-split function string_split(s, sep) TODO("string_split unimplemented") end -# flisp: (define (string-split- s sep start splits) +# flisp: string-split- function string_split_(s, sep, start, splits) TODO("string_split_ unimplemented") end # replace all occurrences of a in s with b # -# flisp: (define (string-replace s a b) +# flisp: string-replace function string_replace(s, a, b) TODO("string_replace unimplemented") end -# flisp: (define (ends-interpolated-atom? c) +# flisp: ends-interpolated-atom? function is_ends_interpolated_atom(c) TODO("is_ends_interpolated_atom unimplemented") end -# flisp: (define (parse-interpolate s) +# flisp: parse-interpolate function parse_interpolate(ps::ParseState) TODO("parse_interpolate unimplemented") end @@ -2389,17 +2422,12 @@ end # when raw is #t, unescape only \\ and delimiter # otherwise do full unescaping, and parse interpolations too # -# flisp: (define (parse-string-literal- n p s delim raw) +# flisp: parse-string-literal- function parse_string_literal_(n, p, s, delim, raw) TODO("parse_string_literal_ unimplemented") end -# flisp: (define (not-eof-1 c) -function not_eof_1(c) - TODO("not_eof_1 unimplemented") -end - -# flisp: (define (unescape-string s) +# flisp: unescape-string function unescape_string_(s) TODO("unescape_string_ unimplemented") end @@ -2412,10 +2440,9 @@ end # flisp: parse-atom function parse_atom(ps::ParseState, check_identifiers=true) bump_trivia(ps) - atom_mark = position(ps) + mark = position(ps) leading_kind = peek(ps) - # TODO: Reorder to put most likely tokens first. This can be done because - # our tokens are richer in information than the flisp parser. + # TODO: Reorder to put most likely tokens first? if leading_kind == K":" # symbol/expression quote # :foo => (quote foo) @@ -2451,7 +2478,7 @@ function parse_atom(ps::ParseState, check_identifiers=true) # a[:(end)] ==> (ref a (quote (error-t end))) parse_atom(ParseState(ps, end_symbol=false), false) end - emit(ps, atom_mark, K"quote") + emit(ps, mark, K"quote") elseif leading_kind == K"=" bump(ps, TRIVIA_FLAG, error="unexpected `=`") elseif leading_kind == K"Identifier" @@ -2481,21 +2508,29 @@ function parse_atom(ps::ParseState, check_identifiers=true) elseif leading_kind == K"(" # parens or tuple parse_paren(ps, check_identifiers) elseif leading_kind == K"[" # cat expression - TODO("parse_cat unimplemented") - parse_cat(ps, tok, K"]", ps.end_symbol) + bump(ps, TRIVIA_FLAG) + ckind = parse_cat(ps, K"]", ps.end_symbol) + emit(ps, mark, ckind) elseif leading_kind == K"{" # cat expression - TODO("""parse_cat(ps, K"}", )""") + bump(ps, TRIVIA_FLAG) + ckind = parse_cat(ps, K"}", ps.end_symbol) + if ckind == K"hcat" + # {x y} ==> (bracescat (row x y)) + emit(ps, K"row", mark) + end + out_kind = ckind in (K"vect", K"comprehension") ? K"braces" : K"bracescat" + emit(ps, mark, out_kind) elseif is_string(leading_kind) bump(ps) # FIXME parse_string_literal(ps) elseif leading_kind == K"@" # macro call bump(ps, TRIVIA_FLAG) parse_macro_name(ps) - parse_call_chain(ps, atom_mark, true) + parse_call_chain(ps, mark, true) elseif leading_kind in (K"Cmd", K"TripleCmd") bump_invisible(ps, K"core_@cmd") bump(ps) - emit(ps, atom_mark, K"macrocall") + emit(ps, mark, K"macrocall") elseif isliteral(leading_kind) bump(ps) elseif is_closing_token(ps, leading_kind) @@ -2510,40 +2545,10 @@ function parse_atom(ps::ParseState, check_identifiers=true) end end -# flisp: (define (macroify-name e . suffixes) -function macroify_name(e, _, suffixes) - TODO("macroify_name unimplemented") -end - -# flisp: (define (macroify-call s call startloc) -function macroify_call(s, call, startloc) - TODO("macroify_call unimplemented") -end - -# flisp: (define (called-macro-name e) -function called_macro_name(e) - TODO("called_macro_name unimplemented") -end - -# flisp: (define (maybe-docstring s e) -function maybe_docstring(s, e) - TODO("maybe_docstring unimplemented") -end - -# flisp: (define (simple-string-literal? e) (string? e)) -function is_simple_string_literal(e) - TODO("is_simple_string_literal unimplemented") -end - -# flisp: (define (doc-string-literal? s e) -function is_doc_string_literal(s, e) - TODO("is_doc_string_literal unimplemented") -end - # Parse docstrings attached by a space or single newline # "doc" foo ==> # -# flisp: (define (parse-docstring s production) +# flisp: parse-docstring function parse_docstring(ps::ParseState, down=parse_eq) mark = position(ps) # TODO? This is not quite equivalent to the flisp parser which accepts diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index 06cf497677c4e..572441b3d1130 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -4,13 +4,13 @@ #------------------------------------------------------------------------------- const RawFlags = UInt32 -EMPTY_FLAGS = RawFlags(0) -TRIVIA_FLAG = RawFlags(1<<0) +const EMPTY_FLAGS = RawFlags(0) +const TRIVIA_FLAG = RawFlags(1<<0) # The following flags are head-specific and could probably be allowed to cover # the same bits -INFIX_FLAG = RawFlags(1<<1) +const INFIX_FLAG = RawFlags(1<<1) # try-finally-catch -TRY_CATCH_AFTER_FINALLY_FLAG = RawFlags(1<<2) +const TRY_CATCH_AFTER_FINALLY_FLAG = RawFlags(1<<2) # ERROR_FLAG = 0x80000000 struct SyntaxHead @@ -317,13 +317,13 @@ function _to_expr(node::SyntaxNode) if head(node) == :macrocall line_node = source_location(LineNumberNode, node.source, node.position) insert!(args, 2, line_node) - elseif head(node) == :call + elseif head(node) in (:call, :ref) # Move parameters block to args[2] if length(args) > 1 && Meta.isexpr(args[end], :parameters) insert!(args, 2, args[end]) pop!(args) end - elseif head(node) == :tuple || head(node) == :parameters + elseif head(node) in (:tuple, :parameters, :vect) # Move parameters blocks to args[1] if length(args) > 1 && Meta.isexpr(args[end], :parameters) pushfirst!(args, args[end]) @@ -355,6 +355,18 @@ function _to_expr(node::SyntaxNode) if else_ !== false push!(args, else_) end + elseif head(node) == :filter + pushfirst!(args, last(args)) + pop!(args) + elseif head(node) == :flatten + # The order of nodes inside the generators in Julia's flatten AST + # is noncontiguous in the source text, so need to reconstruct + # Julia's AST here from our alternative `flatten` expression. + gen = Expr(:generator, args[1], args[end]) + for i in length(args)-1:-1:2 + gen = Expr(:generator, gen, args[i]) + end + args = [gen] end if head(node) == :inert || (head(node) == :quote && length(args) == 1 && !(only(args) isa Expr)) @@ -404,7 +416,6 @@ function parse_all(::Type{Expr}, code::AbstractString; filename="none") ex end - function flisp_parse_all(code) flisp_ex = Base.remove_linenums!(Meta.parseall(code)) filter!(x->!(x isa LineNumberNode), flisp_ex.args) diff --git a/JuliaSyntax/src/token_kinds.jl b/JuliaSyntax/src/token_kinds.jl index 432eba4b6f8d5..8c548535c8ae7 100644 --- a/JuliaSyntax/src/token_kinds.jl +++ b/JuliaSyntax/src/token_kinds.jl @@ -859,7 +859,9 @@ Dict([ "typed_hcat" => Ts.TYPED_HCAT "typed_vcat" => Ts.TYPED_VCAT "typed_ncat" => Ts.TYPED_NCAT +"row" => Ts.ROW "generator" => Ts.GENERATOR +"filter" => Ts.FILTER "flatten" => Ts.FLATTEN "comprehension" => Ts.COMPREHENSION "typed_comprehension" => Ts.TYPED_COMPREHENSION @@ -882,8 +884,8 @@ for kw in split("""abstract baremodule begin break catch const block call comparison curly string inert macrocall kw parameters toplevel tuple ref vect braces bracescat hcat - vcat ncat typed_hcat typed_vcat typed_ncat generator - flatten comprehension typed_comprehension + vcat ncat typed_hcat typed_vcat typed_ncat row generator + filter flatten comprehension typed_comprehension error """) diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 5ccff9e995110..266ac8a3516c3 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -12,7 +12,7 @@ function test_parse(production, code; v=v"1.6") end # Version of test_parse for interactive exploration -function itest_parse(production, code, julia_version::VersionNumber) +function itest_parse(production, code, julia_version::VersionNumber=v"1.6") stream = ParseStream(code) production(JuliaSyntax.ParseState(stream; julia_version)) t = JuliaSyntax.build_tree(GreenNode, stream, wrap_toplevel_as_kind=K"toplevel") @@ -195,6 +195,10 @@ tests = [ "a().@x(y)" => """(macrocall (error (. (call :a) (quote :x))) :y)""" "a().@x y" => """(macrocall (error (. (call :a) (quote :x))) :y)""" "a().@x{y}" => """(macrocall (error (. (call :a) (quote :x))) (braces :y))""" + # array indexing, typed comprehension, etc + "a[i]" => "(ref :a :i)" + "a[i,j]" => "(ref :a :i :j)" + "T[x for x in xs]" => "(typed_comprehension :T (generator :x (= :x :xs)))" # Keyword params always use kw inside tuple in dot calls "f.(a,b)" => "(. :f (tuple :a :b))" "f.(a=1)" => "(. :f (tuple (kw :a 1)))" @@ -352,11 +356,11 @@ tests = [ ], JuliaSyntax.parse_paren => [ # Parentheses used for grouping - # NB: The toplevel below is an artificial part of the test setup "(a * b)" => "(call :* :a :b)" "(a=1)" => "(= :a 1)" "(x)" => ":x" # Tuple syntax with commas + "()" => "(tuple)" "(x,)" => "(tuple :x)" "(x,y)" => "(tuple :x :y)" "(x=1, y=2)" => "(tuple (= :x 1) (= :y 2))" @@ -372,6 +376,8 @@ tests = [ "(a=1;)" => "(block (= :a 1))" "(a;b;;c)" => "(block :a :b :c)" "(a=1; b=2)" => "(block (= :a 1) (= :b 2))" + # Generators + "(x for x in xs)" => "(generator :x (= :x :xs))" ], JuliaSyntax.parse_atom => [ ":foo" => "(quote :foo)" @@ -386,6 +392,26 @@ tests = [ "@end x" => """(macrocall Symbol("@end") :x)""" # __dot__ macro "@. x y" => """(macrocall Symbol("@__dot__") :x :y)""" + # parse_cat + "[]" => "(vect)" + "[x,]" => "(vect :x)" + "[x]" => "(vect :x)" + "[x \n ]" => "(vect :x)" + "[x \n\n ]" => "(vect :x)" + # parse_comprehension / parse_generator + "[x for x in xs]" => "(comprehension (generator :x (= :x :xs)))" + "[x \n\n for x in xs]" => "(comprehension (generator :x (= :x :xs)))" + "[(x)for x in xs]" => "(comprehension (generator :x (error) (= :x :xs)))" + "[xy for x in xs for y in ys]" => "(comprehension (flatten :xy (= :x :xs) (= :y :ys)))" + # parse_vect + "[x, y]" => "(vect :x :y)" + "[x, y]" => "(vect :x :y)" + "[x,y ; z]" => "(vect :x :y (parameters :z))" + "[x=1, y=2]" => "(vect (= :x 1) (= :y 2))" + "[x=1, ; y=2]" => "(vect (= :x 1) (parameters (= :y 2)))" + # parse_paren + ":(=)" => "(quote :(=))" + ":(::)" => "(quote :(::))" # Errors ": foo" => "(quote (error) :foo)" ], From 40f56a2ddbd6a341576523cdefe1a366d5fe2103 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Fri, 24 Dec 2021 12:22:32 +1000 Subject: [PATCH 0258/1109] Try fixing CI: Add Manifest to point to Tokenize branch --- JuliaSyntax/Manifest.toml | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 JuliaSyntax/Manifest.toml diff --git a/JuliaSyntax/Manifest.toml b/JuliaSyntax/Manifest.toml new file mode 100644 index 0000000000000..decda6f1b942d --- /dev/null +++ b/JuliaSyntax/Manifest.toml @@ -0,0 +1,8 @@ +# This file is machine-generated - editing it directly is not advised + +[[Tokenize]] +git-tree-sha1 = "433847efd9ac3d09deef4b136f0c435f22b967d9" +repo-rev = "cjf/julia-syntax-hacks" +repo-url = "https://github.com/c42f/Tokenize.jl.git" +uuid = "0796e94c-ce3b-5d07-9a54-7f471281c624" +version = "0.5.21" From 32d666628800e404f41117dd445414062c711bfb Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Wed, 29 Dec 2021 07:22:44 +1000 Subject: [PATCH 0259/1109] Parse array concatenation syntax Parse vcat/hcat/ncat. Gosh this was tricky to get working and it's still not 100% right as trailing separator trivia can be attributed to the wrong span in the output. --- JuliaSyntax/README.md | 101 ++++++++------- JuliaSyntax/src/parser.jl | 219 +++++++++++++++++++++++++++++---- JuliaSyntax/src/syntax_tree.jl | 23 +++- JuliaSyntax/src/token_kinds.jl | 3 +- JuliaSyntax/test/parser.jl | 25 ++++ 5 files changed, 293 insertions(+), 78 deletions(-) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index 00d85fa471599..2dcf7b9c4c483 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -432,7 +432,7 @@ replaced by `_` and predicates prefixed by `is_`. ## Flisp parser bugs -Some things seem to be bugs: +Here's some behaviors which seem to be bugs: * Macro module paths allow calls which gives weird stateful semantics! ``` @@ -449,25 +449,31 @@ Some things seem to be bugs: ``` const a = b = 1 ``` +* Parsing the `ncat` array concatenation syntax within braces gives + strange AST: `{a ;; b}` parses to `(bracescat 2 a b)` which is the same as + `{2 ; a ; b}`, but should probably be `(bracescat (nrow 2 a b))` in analogy + to how `{a b}` produces `(bracescat (row a b))`. +* `export a, \n $b` is rejected, but `export a, \n b` parses fine. +* In try-catch-finally, the `finally` clause is allowed before the `catch`, but + always executes afterward. (Presumably was this a mistake? It seems pretty awful!) +* When parsing `"[x \n\n ]"` the flisp parser gets confused, but `"[x \n ]"` is + parsed as `Expr(:vect)` + +## Parsing / AST oddities and warts + +### Questionable allowed forms There's various allowed syntaxes which are fairly easily detected in the parser, but which will be rejected later during lowering. To allow building DSLs this is fine and good but some such allowed syntaxes don't seem very -useful even for DSLs: +useful, even for DSLs: * `macro (x) end` is allowed but there are no anonymous macros. * `abstract type A < B end` and other subtypes comparisons are allowed, but only `A <: B` makes sense. +* `x where {S T}` produces `(where x (bracescat (row S T)))` -* `export a, \n $b` is rejected, but `export a, \n b` parses fine. - -* In try-catch-finally, the `finally` clause is allowed before the `catch`, but - always executes afterward. (Presumably was this a mistake? It seems pretty awful!) - -* When parsing `"[x \n\n ]"` the flisp parser gets confused, but `"[x \n ]"` is - parsed as `Expr(:vect)` - -## Parsing / AST oddities and warts +### `kw` and `=` inconsistencies There's many apparent inconsistencies between how `kw` and `=` are used when parsing `key=val` pairs inside parentheses. @@ -483,34 +489,12 @@ parsing `key=val` pairs inside parentheses. # (tuple (parameters (parameters e f) c d) a b) (a,b; c,d; e,f) ``` -* Long-form anonymous functions have argument lists which are parsedj - as tuples. But the flisp parser doesn't pass the context that they're - function argument lists and needs some ugly disambiguation code. This also - leads to more inconsistency in the use of `kw` for keywords. - -Other oddities: - -* `global const x=1` is normalized by the parser into `(const (global (= x 1)))`. - I suppose this is somewhat useful for AST consumers, but it seems a bit weird - and unnecessary. - -* `let` bindings might be stored in a block, or they might not be, depending on - special cases: - ``` - # Special cases not in a block - let x=1 ; end ==> (let (= x 1) (block)) - let x::1 ; end ==> (let (:: x 1) (block)) - let x ; end ==> (let x (block)) - - # In a block - let x=1,y=2 ; end ==> (let (block (= x 1) (= y 2) (block))) - let x+=1 ; end ==> (let (block (+= x 1)) (block)) - ``` +* Long-form anonymous functions have argument lists which are parsed + as tuples rather than argument lists. This leads to more inconsistency in the + use of `kw` for keywords. -* The `elseif` condition is always in a block but not `if` condition. - Presumably because of the need to add a line number node in the flisp parser - `if a xx elseif b yy end ==> (if a (block xx) (elseif (block b) (block yy)))` +### Flattened generators Flattened generators are hard because the Julia AST doesn't respect a key rule we normally expect: that the children of an AST node are a contiguous @@ -519,19 +503,19 @@ range in the source text. This is because the `for`s in ``` for x in xs - for y in ys - push!(xy, collection) +for y in ys + push!(xy, collection) ``` and the standard Julia AST is like this: ``` (flatten - (generator - (generator - xy - (= y ys)) - (= x xs)) +(generator +(generator + xy + (= y ys)) +(= x xs)) ``` however, note that if this tree were flattened, the order of tokens would be @@ -541,7 +525,30 @@ to be to flatten the generators: ``` (flatten - xy - (= x xs) - (= y ys)) +xy +(= x xs) +(= y ys)) ``` + +### Other oddities + +* `global const x=1` is normalized by the parser into `(const (global (= x 1)))`. + I suppose this is somewhat useful for AST consumers, but it seems a bit weird + and unnecessary. + +* `let` bindings might be stored in a block, or they might not be, depending on + special cases: + ``` + # Special cases not in a block + let x=1 ; end ==> (let (= x 1) (block)) + let x::1 ; end ==> (let (:: x 1) (block)) + let x ; end ==> (let x (block)) + + # In a block + let x=1,y=2 ; end ==> (let (block (= x 1) (= y 2) (block))) + let x+=1 ; end ==> (let (block (+= x 1)) (block)) + ``` + +* The `elseif` condition is always in a block but not the `if` condition. + Presumably because of the need to add a line number node in the flisp parser + `if a xx elseif b yy end ==> (if a (block xx) (elseif (block b) (block yy)))` diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index abed62a928efa..13b1e323913bc 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -712,8 +712,13 @@ function parse_where_chain(ps0::ParseState, mark) k = peek(ps) if k == K"{" # x where {T,S} ==> (where x T S) - TODO("bracescat, braces etc allowed here??") - parse_cat(ps, K"}", ps.end_symbol) + ckind, cflags = parse_cat(ps, K"}", ps.end_symbol) + if ckind != K"vect" + # Various nonsensical forms permitted here + # x where {T S} ==> (where x (bracescat (row T S))) + # x where {y for y in ys} ==> (where x (braces (generator y (= y ys)))) + emit_braces(ps, mark, ckind, cflags) + end emit(ps, mark, K"where") else parse_comparison(ps) @@ -1061,7 +1066,6 @@ function parse_identifier_or_interpolate(ps::ParseState, outermost=true) else parse_atom(ps) if outermost && !is_identifier(peek_behind(ps)) - @info "" peek_behind(ps) emit(ps, mark, K"error", error="Expected identifier or interpolation syntax") end @@ -1187,18 +1191,19 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) end bump_disallowed_space(ps) bump(ps, TRIVIA_FLAG) - ckind = parse_cat(ParseState(ps, end_symbol=true), K"]", ps.end_symbol) + ckind, cflags = parse_cat(ParseState(ps, end_symbol=true), + K"]", ps.end_symbol) # a[i] ==> (ref a i) # a[i,j] ==> (ref a i j) # T[x for x in xs] ==> (typed_comprehension T (generator x (= x xs))) # TODO: other test cases - out_kind = ckind == K"vect" ? K"ref" : - ckind == K"hcat" ? K"typed_hcat" : - ckind == K"vcat" ? K"typed_vcat" : - ckind == K"comprehension" ? K"typed_comprehension" : - ckind == K"ncat" ? K"typed_ncat" : - error("Unrecognized kind in parse_cat") - emit(ps, mark, out_kind) + outk = ckind == K"vect" ? K"ref" : + ckind == K"hcat" ? K"typed_hcat" : + ckind == K"vcat" ? K"typed_vcat" : + ckind == K"comprehension" ? K"typed_comprehension" : + ckind == K"ncat" ? K"typed_ncat" : + error("Unrecognized kind in parse_cat") + emit(ps, mark, outk, cflags) if is_macrocall emit(ps, mark, K"macrocall") break @@ -1572,7 +1577,7 @@ function parse_resword(ps::ParseState) parse_comma_separated(ps, parse_export_symbol) emit(ps, mark, K"export") elseif word in (K"import", K"using") - TODO("parse_resword") + TODO("parse_resword - $word") elseif word == K"do" bump(ps, TRIVIA_FLAG, error="invalid `do` syntax") else @@ -2038,7 +2043,7 @@ function parse_vect(ps::ParseState, closer) eq_is_kw_before_semi=false, eq_is_kw_after_semi=false) end - return K"vect" + return (K"vect", EMPTY_FLAGS) end # Flattened generators are hard because the Julia AST doesn't respect a key @@ -2093,12 +2098,165 @@ function parse_comprehension(ps::ParseState, mark, closer) space_sensitive=false) parse_generator(ps, mark) bump_closing_token(ps, closer) - return K"comprehension" + return (K"comprehension", EMPTY_FLAGS) end +# Parse array concatenation syntax with multiple semicolons +# +# Normal matrix construction syntax +# [x y ; z w] ==> (vcat (row x y) (row z w)) +# [x y ; z w ; a b] ==> (vcat (row x y) (row z w) (row a b)) +# [x ; y ; z] ==> (vcat x y z) +# [x;] ==> (vcat x) +# [x y] ==> (hcat x y) +# +# Mismatched rows +# [x y ; z] ==> (vcat (row x y) z) +# +# Double semicolon with spaces allowed (only) for line continuation +# [x y ;;\n z w] ==> (hcat x y z w) +# [x y ;; z w] ==> (hcat x y (error) z w) +# +# Single elements in rows +# [x ; y ;; z ] ==> (ncat 2 (nrow 1 x y) z) +# [x y ;;; z ] ==> (ncat 3 (row x y) z) +# +# Higher dimensional ncat +# Row major +# [x y ; z w ;;; a b ; c d] ==> +# (ncat 3 (nrow 1 (row x y) (row z w)) (nrow 1 (row a b) (row c d))) +# Column major +# [x ; y ;; z ; w ;;; a ; b ;; c ; d] ==> +# (ncat 3 (nrow 2 (nrow 1 x y) (nrow 1 z w)) (nrow 2 (nrow 1 a b) (nrow 1 c d))) +# # flisp: parse-array -function parse_array(ps::ParseState, closer, gotnewline, end_is_symbol) - TODO("parse_array unimplemented") +function parse_array(ps::ParseState, mark, closer, end_is_symbol) + ps = ParseState(ps, end_symbol=end_is_symbol) + + # Outer array parsing loop - parse chain of separators with descending + # precedence such as + # [a ; b ;; c ;;; d ;;;; e] ==> (ncat-4 (ncat-3 (ncat-2 (ncat-1 a b) c) d) e) + # + # Ascending and equal precedence is handled by parse_array_inner. + # + # This is a variant of a Pratt parser, but we have a separate outer loop + # because there's no minimum precedence/binding power - you can always get + # a lower binding power by adding more semicolons. + # + # For an excellent overview of Pratt parsing, see + # https://matklad.github.io/2020/04/13/simple-but-powerful-pratt-parsing.html + (dim, binding_power) = parse_array_separator(ps) + while true + (next_dim, next_bp) = parse_array_inner(ps, binding_power) + if next_bp == typemin(Int) + break + end + if binding_power == 0 + emit(ps, mark, K"row") + else + emit(ps, mark, K"nrow", numeric_flags(dim)) + end + dim = next_dim + binding_power = next_bp + end + bump_closing_token(ps, closer) + return binding_power == -1 ? (K"vcat", EMPTY_FLAGS) : + binding_power == 0 ? (K"hcat", EMPTY_FLAGS) : + (K"ncat", numeric_flags(dim)) +end + +# Parse equal and ascending precedence chains of array concatenation operators +# (semicolons, newlines and whitespace). Invariants: +# +# * The caller must have already consumed +# - The left hand side +# - The concatenation operator, providing the current binding_power. +# So eg, we're here in the input stream +# | +# [a ;; b ; c ] +# [a ;; ] +# +# * The caller must call emit() to delimit the AST node for this binding power. +# +function parse_array_inner(ps, binding_power) + mark = NO_POSITION + dim = -1 + bp = binding_power + while true + if bp < binding_power + return (dim, bp) + end + # Allow trailing separators + # [a ;] ==> (vcat a) + # [a ; b;;] ==> (ncat-2 (nrow-1 a b)) + if is_closing_token(ps, peek(ps)) + return (typemin(Int), typemin(Int)) + end + if bp == binding_power + # Parse one expression + mark = position(ps) + parse_eq_star(ps) + (next_dim, next_bp) = parse_array_separator(ps) + else # bp > binding_power + # Recurse to parse a separator with greater binding power. Eg: + # [a ;; b ; c ] + # | ^------ the next input is here + # '---------- the mark is here + (next_dim, next_bp) = parse_array_inner(ps, bp) + if bp == 0 + emit(ps, mark, K"row") + else + emit(ps, mark, K"nrow", numeric_flags(dim)) + end + end + dim, bp = next_dim, next_bp + end +end + +# Parse a separator in an array concatenation +# +# Here we aim to identify: +# * Dimension on which the next separator acts +# * Binding power (precedence) of the separator, where whitespace binds +# tightest: ... < `;;;` < `;;` < `;`,`\n` < whitespace. We choose binding +# power of 0 for whitespace and negative numbers for other separators. +function parse_array_separator(ps) + t = peek_token(ps) + k = kind(t) + if k == K";" + n_semis = 1 + while true + bump(ps, TRIVIA_FLAG) + t = peek_token(ps) + if kind(t) != K";" || t.had_whitespace + break + end + n_semis += 1 + end + # FIXME - following is ncat, not line continuation + # [a ;; \n c] + if n_semis == 2 && peek(ps) == K"NewlineWs" + # Line continuation + # [a b ;; \n \n c] + # TODO: Should this only consume a single newline? + while peek(ps) == K"NewlineWs" + bump(ps, TRIVIA_FLAG) + end + return (2, 0) + else + return (n_semis, -n_semis) + end + elseif k == K"NewlineWs" + bump_trivia(ps) + # Newlines separate the first dimension + return (1, -1) + else + if t.had_whitespace && !is_closing_token(ps, k) + return (2, 0) + else + return (typemin(Int), typemin(Int)) + end + end end # Parse array concatenation/construction/indexing syntax inside of `[]` or `{}`. @@ -2123,7 +2281,7 @@ function parse_cat(ps::ParseState, closer, end_is_symbol) # [x,] ==> (vect x) bump(ps, TRIVIA_FLAG) end - # [x] ==> (vect x) + # [x] ==> (vect x) # [x \n ] ==> (vect x) parse_vect(ps, closer) elseif k == K"for" @@ -2133,7 +2291,7 @@ function parse_cat(ps::ParseState, closer, end_is_symbol) else # [x y] ==> (hcat x y) # and other forms; See parse_array. - parse_array(ps, closer, false, end_is_symbol) + parse_array(ps, mark, closer, end_is_symbol) end end @@ -2509,17 +2667,12 @@ function parse_atom(ps::ParseState, check_identifiers=true) parse_paren(ps, check_identifiers) elseif leading_kind == K"[" # cat expression bump(ps, TRIVIA_FLAG) - ckind = parse_cat(ps, K"]", ps.end_symbol) - emit(ps, mark, ckind) + ckind, cflags = parse_cat(ps, K"]", ps.end_symbol) + emit(ps, mark, ckind, cflags) elseif leading_kind == K"{" # cat expression bump(ps, TRIVIA_FLAG) - ckind = parse_cat(ps, K"}", ps.end_symbol) - if ckind == K"hcat" - # {x y} ==> (bracescat (row x y)) - emit(ps, K"row", mark) - end - out_kind = ckind in (K"vect", K"comprehension") ? K"braces" : K"bracescat" - emit(ps, mark, out_kind) + ckind, cflags = parse_cat(ps, K"}", ps.end_symbol) + emit_braces(ps, mark, ckind, cflags) elseif is_string(leading_kind) bump(ps) # FIXME parse_string_literal(ps) @@ -2545,6 +2698,18 @@ function parse_atom(ps::ParseState, check_identifiers=true) end end +function emit_braces(ps, mark, ckind, cflags) + if ckind == K"hcat" + # {x y} ==> (bracescat (row x y)) + emit(ps, K"row", mark, cflags) + elseif ckind == K"ncat" + # {x ;;; y} ==> (bracescat (nrow-3 x y)) + emit(ps, K"nrow", mark, cflags) + end + outk = ckind in (K"vect", K"comprehension") ? K"braces" : K"bracescat" + emit(ps, mark, outk) +end + # Parse docstrings attached by a space or single newline # "doc" foo ==> # diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index 572441b3d1130..3719ba666404b 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -11,7 +11,9 @@ const TRIVIA_FLAG = RawFlags(1<<0) const INFIX_FLAG = RawFlags(1<<1) # try-finally-catch const TRY_CATCH_AFTER_FINALLY_FLAG = RawFlags(1<<2) -# ERROR_FLAG = 0x80000000 +# Flags holding the dimension of an nrow or other UInt8 not held in the source +const NUMERIC_FLAGS = RawFlags(RawFlags(0xff)<<8) +# Todo ERROR_FLAG = 0x80000000 ? struct SyntaxHead kind::Kind @@ -38,6 +40,14 @@ function raw_flags(; trivia::Bool=false, infix::Bool=false) return flags::RawFlags end +function numeric_flags(n::Integer) + RawFlags(UInt8(n)) << 8 +end + +function extract_numeric_flags(f::RawFlags) + Int((f >> 8) % UInt8) +end + kind(node::GreenNode{SyntaxHead}) = head(node).kind flags(node::GreenNode{SyntaxHead}) = head(node).flags @@ -84,6 +94,9 @@ function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, position::In true elseif k == K"false" false + elseif k == K"Char" + # FIXME: Escape sequences... + unescape_string(val_str)[2] elseif k == K"Identifier" Symbol(val_str) elseif k == K"VarIdentifier" @@ -161,7 +174,7 @@ hasflags(node::SyntaxNode, f) = hasflags(head(node.raw), f) head(node::SyntaxNode) = node.head kind(node::SyntaxNode) = kind(node.raw) -flags(node::SyntaxNode) = kind(node.raw) +flags(node::SyntaxNode) = flags(node.raw) haschildren(node::SyntaxNode) = node.head !== :leaf children(node::SyntaxNode) = haschildren(node) ? node.val::Vector{SyntaxNode} : () @@ -347,7 +360,7 @@ function _to_expr(node::SyntaxNode) push!(args, catch_var) push!(args, catch_) end - # At this poin args is + # At this point args is # [try_block catch_var catch_block] if finally_ !== false push!(args, finally_) @@ -367,6 +380,10 @@ function _to_expr(node::SyntaxNode) gen = Expr(:generator, gen, args[i]) end args = [gen] + elseif head(node) in (:nrow, :ncat) + # For lack of a better place, the dimension argument to nrow/ncat + # is stored in the flags + pushfirst!(args, extract_numeric_flags(flags(node))) end if head(node) == :inert || (head(node) == :quote && length(args) == 1 && !(only(args) isa Expr)) diff --git a/JuliaSyntax/src/token_kinds.jl b/JuliaSyntax/src/token_kinds.jl index 8c548535c8ae7..c7e2a33658f2a 100644 --- a/JuliaSyntax/src/token_kinds.jl +++ b/JuliaSyntax/src/token_kinds.jl @@ -860,6 +860,7 @@ Dict([ "typed_vcat" => Ts.TYPED_VCAT "typed_ncat" => Ts.TYPED_NCAT "row" => Ts.ROW +"nrow" => Ts.NROW "generator" => Ts.GENERATOR "filter" => Ts.FILTER "flatten" => Ts.FLATTEN @@ -884,7 +885,7 @@ for kw in split("""abstract baremodule begin break catch const block call comparison curly string inert macrocall kw parameters toplevel tuple ref vect braces bracescat hcat - vcat ncat typed_hcat typed_vcat typed_ncat row generator + vcat ncat typed_hcat typed_vcat typed_ncat row nrow generator filter flatten comprehension typed_comprehension error diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 266ac8a3516c3..ad710aaff1236 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -415,6 +415,31 @@ tests = [ # Errors ": foo" => "(quote (error) :foo)" ], + JuliaSyntax.parse_atom => [ + # parse_array + # Normal matrix construction syntax + "[x y ; z w]" => "(vcat (row :x :y) (row :z :w))" + "[x y ; z w ; a b]" => "(vcat (row :x :y) (row :z :w) (row :a :b))" + "[x ; y ; z]" => "(vcat :x :y :z)" + "[x;]" => "(vcat :x)" + "[x y]" => "(hcat :x :y)" + # Mismatched rows + "[x y ; z]" => "(vcat (row :x :y) :z)" + # Double semicolon with spaces allowed (only) for line continuation + "[x y ;;\n z w]" => "(hcat :x :y :z :w)" + # "[x y ;; z w]" => "(hcat x y (error) z w)" # FIXME + # FIXME: S-expr printing issues with ncat + # # Single elements in rows + # "[x ; y ;; z ]" => "(ncat 2 (nrow 1 :x :y) :z)" + # "[x y ;;; z ]" => "(ncat 3 (row :x :y) :z)" + # # Higher dimensional ncat + # # Row major + # "[x y ; z w ;;; a b ; c d]" => + # "(ncat 3 (nrow 1 (row :x :y) (row :z :w)) (nrow 1 (row :a :b) (row :c :d)))" + # # Column major + # "[x ; y ;; z ; w ;;; a ; b ;; c ; d]" => + # "(ncat 3 (nrow 2 (nrow 1 :x :y) (nrow 1 :z :w)) (nrow 2 (nrow 1 :a :b) (nrow 1 :c :d)))" + ], JuliaSyntax.parse_docstring => [ "\"doc\" foo" => "(macrocall :(Core.var\"@doc\") \"doc\" :foo)" ], From 903bc7a8e5ffc7e81b6ec94a25d2e8c4d25e1a40 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Sat, 4 Dec 2021 09:05:11 +1000 Subject: [PATCH 0260/1109] Use NEWLINE_WS kind for newline-containig whitespace Newlines are syntactically significant in some circumstances, so something like this is necessary for parsing. An alternative implementation would be to use a flag, similar to dotop. --- JuliaSyntax/src/lexer.jl | 23 +++++++++++++++++------ JuliaSyntax/src/token.jl | 2 +- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index 667a818279ee6..f5e755a16aa52 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -252,7 +252,7 @@ function emit(l::Lexer{IO_t,Token}, kind::Kind, err::TokenError = Tokens.NO_ERR) suffix = false if kind in (Tokens.ERROR, Tokens.STRING, Tokens.TRIPLE_STRING, Tokens.CMD, Tokens.TRIPLE_CMD) str = String(l.io.data[(l.token_startpos + 1):position(l)]) - elseif (kind == Tokens.IDENTIFIER || kind == Tokens.VAR_IDENTIFIER || isliteral(kind) || kind == Tokens.COMMENT || kind == Tokens.WHITESPACE) + elseif (kind == Tokens.IDENTIFIER || kind == Tokens.VAR_IDENTIFIER || isliteral(kind) || kind == Tokens.COMMENT || kind == Tokens.WHITESPACE || kind == Tokens.NEWLINE_WS) str = String(take!(l.charstore)) elseif optakessuffix(kind) str = "" @@ -313,7 +313,7 @@ function next_token(l::Lexer, start = true) if eof(c) return emit(l, Tokens.ENDMARKER) elseif iswhitespace(c) - return lex_whitespace(l) + return lex_whitespace(l, c) elseif c == '[' return emit(l, Tokens.LSQUARE) elseif c == ']' @@ -392,11 +392,22 @@ function next_token(l::Lexer, start = true) end -# Lex whitespace, a whitespace char has been consumed -function lex_whitespace(l::Lexer) +# Lex whitespace, a whitespace char `c` has been consumed +function lex_whitespace(l::Lexer, c) readon(l) - accept_batch(l, iswhitespace) - return emit(l, Tokens.WHITESPACE) + k = Tokens.WHITESPACE + while true + if c == '\n' + k = Tokens.NEWLINE_WS + end + pc = peekchar(l) + # stop on non whitespace and limit to a single newline in a token + if !iswhitespace(pc) || (k == Tokens.NEWLINE_WS && pc == '\n') + break + end + c = readchar(l) + end + return emit(l, k) end function lex_comment(l::Lexer, doemit=true) diff --git a/JuliaSyntax/src/token.jl b/JuliaSyntax/src/token.jl index 55abb84e35c3a..0f39b6208aab2 100644 --- a/JuliaSyntax/src/token.jl +++ b/JuliaSyntax/src/token.jl @@ -102,7 +102,7 @@ endpos(t::AbstractToken) = t.endpos startbyte(t::AbstractToken) = t.startbyte endbyte(t::AbstractToken) = t.endbyte function untokenize(t::Token) - if t.kind == IDENTIFIER || t.kind == VAR_IDENTIFIER || isliteral(t.kind) || t.kind == COMMENT || t.kind == WHITESPACE || t.kind == ERROR + if t.kind == IDENTIFIER || t.kind == VAR_IDENTIFIER || isliteral(t.kind) || t.kind == COMMENT || t.kind == WHITESPACE || t.kind == NEWLINE_WS || t.kind == ERROR return t.val elseif iskeyword(t.kind) return lowercase(string(t.kind)) From 1da60830a7123ed2cee19f218198c80c36e1143e Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Thu, 9 Dec 2021 19:44:53 +1000 Subject: [PATCH 0261/1109] Fix comments showing shapes of LPIPE and RPIPE --- JuliaSyntax/src/token_kinds.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/JuliaSyntax/src/token_kinds.jl b/JuliaSyntax/src/token_kinds.jl index 2cafbdcb7f571..d1762d472080c 100644 --- a/JuliaSyntax/src/token_kinds.jl +++ b/JuliaSyntax/src/token_kinds.jl @@ -573,8 +573,8 @@ # Level 7 begin_pipe, - LPIPE, # |> - RPIPE, # <| + LPIPE, # <| + RPIPE, # |> end_pipe, # Level 8 From 4137141a03dded2ac88daf69bd4b14c8798d4cbb Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Thu, 9 Dec 2021 22:13:05 +1000 Subject: [PATCH 0262/1109] Remove importall as a keyword This keyword hasn't existed since before Julia 1.0 https://github.com/JuliaLang/julia/issues/22789 --- JuliaSyntax/src/lexer.jl | 3 +-- JuliaSyntax/src/token_kinds.jl | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/JuliaSyntax/src/lexer.jl b/JuliaSyntax/src/lexer.jl index f5e755a16aa52..46f4de1320db2 100644 --- a/JuliaSyntax/src/lexer.jl +++ b/JuliaSyntax/src/lexer.jl @@ -13,7 +13,7 @@ import ..Tokens: AbstractToken, Token, RawToken, Kind, TokenError, UNICODE_OPS, import ..Tokens: FUNCTION, ABSTRACT, IDENTIFIER, BAREMODULE, BEGIN, BREAK, CATCH, CONST, CONTINUE, DO, ELSE, ELSEIF, END, EXPORT, FALSE, FINALLY, FOR, FUNCTION, GLOBAL, LET, LOCAL, IF, - IMPORT, IMPORTALL, MACRO, MODULE, OUTER, QUOTE, RETURN, TRUE, TRY, TYPE, USING, WHILE, ISA, IN, + IMPORT, MACRO, MODULE, OUTER, QUOTE, RETURN, TRUE, TRY, TYPE, USING, WHILE, ISA, IN, MUTABLE, PRIMITIVE, STRUCT, WHERE @@ -1090,7 +1090,6 @@ Tokens.FUNCTION, Tokens.GLOBAL, Tokens.IF, Tokens.IMPORT, -Tokens.IMPORTALL, Tokens.LET, Tokens.LOCAL, Tokens.MACRO, diff --git a/JuliaSyntax/src/token_kinds.jl b/JuliaSyntax/src/token_kinds.jl index d1762d472080c..f28ac267ae44c 100644 --- a/JuliaSyntax/src/token_kinds.jl +++ b/JuliaSyntax/src/token_kinds.jl @@ -29,7 +29,6 @@ GLOBAL, IF, IMPORT, - IMPORTALL, LET, LOCAL, MACRO, From 4d3e7a674f94ccf81c56d14242f23a61f060b351 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Mon, 29 Nov 2021 14:44:42 +1000 Subject: [PATCH 0263/1109] Add syntax nonterminal kinds These are various nonterminals which have surface syntax, but where the surface syntax itself doesn't include the nonterminal name. These were what I could find looking through `expand-table` in julia-syntax.scm, but perhaps there's others. Also add various kinds for special token types which must be inferred by the parser and not the tokenizer. In particular for invisible tokens and macro names. --- JuliaSyntax/src/token_kinds.jl | 77 ++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) diff --git a/JuliaSyntax/src/token_kinds.jl b/JuliaSyntax/src/token_kinds.jl index f28ac267ae44c..24d5aeebf6ea7 100644 --- a/JuliaSyntax/src/token_kinds.jl +++ b/JuliaSyntax/src/token_kinds.jl @@ -806,6 +806,83 @@ QUAD_ROOT, # ∜ end_unicode_ops, end_ops, + + # Kinds emitted by the parser. There's two types of these: + # 1. Implied tokens which have a position but might have zero width in the + # source text. + # + # In some cases we want to generate parse tree nodes in a standard form, + # but some of the leaf tokens are implied rather than existing in the + # source text, or the lexed tokens need to be re-kinded to represent + # special forms which only the parser can infer. These are "parser tokens". + # + # Some examples: + # + # Docstrings - the macro name is invisible + # "doc" foo() = 1 ==> (macrocall (core @doc) . (= (call foo) 1)) + # + # String macros - the macro name does not appear in the source text, so we + # need a special kind of token to imply it. + # + # In these cases, we use some special kinds which can be emitted as zero + # width tokens to keep the parse tree more uniform. + begin_parser_tokens, + TOMBSTONE, # Empty placeholder for kind to be filled later + NOTHING_LITERAL, # A literal Julia `nothing` in the AST + UNQUOTED_STRING, # An unquoted range of the source as a string + + # Macro names are modelled as a special kind of identifier because the + # @ may not be attached to the macro name in the source (or may not be + # associated with a token at all in the case of implied macro calls + # like CORE_DOC_MACRO_NAME) + begin_macro_names, + MACRO_NAME, # A macro name identifier + VAR_MACRO_NAME, # @var"..." + STRING_MACRO_NAME, # macname"some_str" + CMD_MACRO_NAME, # macname`some_str` + DOT_MACRO_NAME, # The macro name of @. + CORE_DOC_MACRO_NAME, # Core.@doc + CORE_CMD_MACRO_NAME, # Core.@cmd + CORE_INT128_STR_MACRO_NAME, # Core.@int128_str + CORE_UINT128_STR_MACRO_NAME, # Core.@uint128_str + CORE_BIG_STR_MACRO_NAME, # Core.@big_str + end_macro_names, + end_parser_tokens, + + # 2. Nonterminals which are exposed in the AST, but where the surface + # syntax doesn't have a token corresponding to the node type. + begin_syntax_kinds, + BLOCK, + CALL, + COMPARISON, + CURLY, + INERT, # QuoteNode; not quasiquote + STRING_INTERP, # "a $x" + TOPLEVEL, + TUPLE, + REF, + VECT, + MACROCALL, + KW, # the = in f(a=1) + PARAMETERS, # the list after ; in f(; a=1) + # Concatenation syntax + BRACES, + BRACESCAT, + HCAT, + VCAT, + NCAT, + TYPED_HCAT, + TYPED_VCAT, + TYPED_NCAT, + ROW, + NROW, + # Comprehensions + GENERATOR, + FILTER, + FLATTEN, + COMPREHENSION, + TYPED_COMPREHENSION, + end_syntax_kinds, ) From f9d8a5a7752b035b8446951dc2cff5d79b7cbd4b Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Thu, 30 Dec 2021 06:33:55 +1000 Subject: [PATCH 0264/1109] Use a vendored copy of Tokenize.jl We need several significant changes to Tokenize, so bringing that library in here for now. --- JuliaSyntax/Project.toml | 4 ---- JuliaSyntax/src/JuliaSyntax.jl | 6 ++++-- JuliaSyntax/src/tokens.jl | 2 +- 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/JuliaSyntax/Project.toml b/JuliaSyntax/Project.toml index ec34ef35c06ac..51b734c9e130e 100644 --- a/JuliaSyntax/Project.toml +++ b/JuliaSyntax/Project.toml @@ -3,11 +3,7 @@ uuid = "70703baa-626e-46a2-a12c-08ffd08c73b4" authors = ["Chris Foster and contributors"] version = "0.1.0" -[deps] -Tokenize = "0796e94c-ce3b-5d07-9a54-7f471281c624" - [compat] -Tokenize = "0.5" julia = "1.4" [extras] diff --git a/JuliaSyntax/src/JuliaSyntax.jl b/JuliaSyntax/src/JuliaSyntax.jl index 602cd610d5fb8..505e8adfd0feb 100644 --- a/JuliaSyntax/src/JuliaSyntax.jl +++ b/JuliaSyntax/src/JuliaSyntax.jl @@ -1,7 +1,9 @@ module JuliaSyntax -import Tokenize -using Tokenize.Tokens: RawToken +# Use a git subtree for a modified version of Tokenize.jl, as we need several +# significant changes +include("../Tokenize/src/Tokenize.jl") +using .Tokenize.Tokens: RawToken const TzTokens = Tokenize.Tokens include("utils.jl") diff --git a/JuliaSyntax/src/tokens.jl b/JuliaSyntax/src/tokens.jl index 404494747db86..b21a10dd8d411 100644 --- a/JuliaSyntax/src/tokens.jl +++ b/JuliaSyntax/src/tokens.jl @@ -1,4 +1,4 @@ -using Tokenize.Tokens: Kind, isliteral, iskeyword, isoperator +using .Tokenize.Tokens: Kind, isliteral, iskeyword, isoperator include("token_kinds.jl") From ed4afb991616690b468d9876778f611d8f1986a0 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Thu, 30 Dec 2021 09:21:43 +1000 Subject: [PATCH 0265/1109] Fixes + tests for parsing unary ~ and dotted assignments --- JuliaSyntax/src/parser.jl | 46 +++++++++++++++++++--------------- JuliaSyntax/src/syntax_tree.jl | 31 +++++++++++++++-------- JuliaSyntax/src/token_kinds.jl | 2 +- JuliaSyntax/test/parser.jl | 11 ++++++++ 4 files changed, 59 insertions(+), 31 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 13b1e323913bc..422715ed6ac41 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -301,6 +301,14 @@ function parse_stmts(ps::ParseState) end end +# Parse assignments with comma separated lists on each side +# a = b ==> (= a b) +# a .= b ==> (.= a b) +# a += b ==> (+= a b) +# a .+= b ==> (.+= a b) +# a, b = c, d ==> (= (tuple a b) (tuple c d)) +# x, = xs ==> (= (tuple x) xs) +# # flisp: parse-eq function parse_eq(ps::ParseState) parse_assignment(ps, parse_comma, false) @@ -325,42 +333,40 @@ function parse_eq_star(ps::ParseState, equals_is_kw=false) end end -# flisp: eventually-call? -function is_eventually_call(ex) - TODO("is_eventually_call unimplemented") -end - # a = b ==> (= a b) # # flisp: parse-assignment function parse_assignment(ps::ParseState, down, equals_is_kw::Bool) mark = position(ps) down(ps) - k = peek(ps) + t = peek_token(ps) + k = kind(t) if !is_prec_assignment(k) return NO_POSITION end if k == K"~" + if ps.space_sensitive && !peek_token(ps, 2).had_whitespace + # Unary ~ in space sensitive context is not assignment precedence + # [a ~b] ==> (hcat a (call ~ b)) + return NO_POSITION + end + # ~ is the only non-syntactic assignment-precedence operator. + # a ~ b ==> (call-i a ~ b) + # [a ~ b c] ==> (hcat (call-i a ~ b) c) bump(ps) - if ps.space_sensitive # && ... - # Prefix operator ~x ? - TODO("parse_assignment... ~ not implemented") - else - # ~ is the only non-syntactic assignment-precedence operator. - # a ~ b ==> (call-i a ~ b) - parse_assignment(ps, down, equals_is_kw) - emit(ps, mark, K"call", INFIX_FLAG) - end + parse_assignment(ps, down, equals_is_kw) + emit(ps, mark, K"call", INFIX_FLAG) return NO_POSITION else # a += b ==> (+= a b) - # FIXME: - # a .= b ==> (.= a b) bump(ps, TRIVIA_FLAG) parse_assignment(ps, down, equals_is_kw) - result_k = (k == K"=" && equals_is_kw) ? K"kw" : k - equals_pos = emit(ps, mark, result_k) - return k == K"=" ? equals_pos : NO_POSITION + plain_eq = (k == K"=" && !is_dotted(t)) + result_k = + equals_pos = + emit(ps, mark, plain_eq && equals_is_kw ? K"kw" : k, + is_dotted(t) ? DOTOP_FLAG : EMPTY_FLAGS) + return plain_eq ? equals_pos : NO_POSITION end end diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index 3719ba666404b..9f49cc31ec94b 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -6,11 +6,13 @@ const RawFlags = UInt32 const EMPTY_FLAGS = RawFlags(0) const TRIVIA_FLAG = RawFlags(1<<0) -# The following flags are head-specific and could probably be allowed to cover -# the same bits +# Some of the following flags are head-specific and could probably be allowed +# to cover the same bits... const INFIX_FLAG = RawFlags(1<<1) +# Record whether syntactic operators were dotted +const DOTOP_FLAG = RawFlags(1<<2) # try-finally-catch -const TRY_CATCH_AFTER_FINALLY_FLAG = RawFlags(1<<2) +const TRY_CATCH_AFTER_FINALLY_FLAG = RawFlags(1<<3) # Flags holding the dimension of an nrow or other UInt8 not held in the source const NUMERIC_FLAGS = RawFlags(RawFlags(0xff)<<8) # Todo ERROR_FLAG = 0x80000000 ? @@ -22,17 +24,27 @@ end kind(head::SyntaxHead) = head.kind flags(head::SyntaxHead) = head.flags +hasflags(head::SyntaxHead, flags_) = (flags(head) & flags_) == flags_ istrivia(head::SyntaxHead) = hasflags(head, TRIVIA_FLAG) isinfix(head::SyntaxHead) = hasflags(head, INFIX_FLAG) -hasflags(head::SyntaxHead, flags_) = (flags(head) & flags_) == flags_ iserror(head::SyntaxHead) = kind(head) == K"error" +is_dotted(head::SyntaxHead) = hasflags(head, DOTOP_FLAG) + function Base.summary(head::SyntaxHead) _kind_str(kind(head)) end +function untokenize(head::SyntaxHead) + str = untokenize(kind(head)) + if is_dotted(head) + str = "."*str + end + str +end + function raw_flags(; trivia::Bool=false, infix::Bool=false) flags = RawFlags(0) trivia && (flags |= TRIVIA_FLAG) @@ -140,10 +152,9 @@ function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, position::In end return SyntaxNode(source, raw, position, nothing, :leaf, val) else - k = kind(raw) - str = k == K"Nothing" ? "Nothing" : untokenize(k) - head = !isnothing(str) ? Symbol(str) : - error("Can't untokenize head of kind $k") + str = untokenize(head(raw)) + headsym = !isnothing(str) ? Symbol(str) : + error("Can't untokenize head of kind $(kind(raw))") cs = SyntaxNode[] pos = position for (i,rawchild) in enumerate(children(raw)) @@ -160,7 +171,7 @@ function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, position::In if isinfix(raw) cs[2], cs[1] = cs[1], cs[2] end - node = SyntaxNode(source, raw, position, nothing, head, cs) + node = SyntaxNode(source, raw, position, nothing, headsym, cs) for c in cs c.parent = node end @@ -217,7 +228,7 @@ function _show_syntax_node_sexpr(io, node) print(io, repr(node.val)) end else - print(io, "(", _kind_str(kind(node.raw))) + print(io, "(", untokenize(head(node.raw))) first = true for n in children(node) print(io, ' ') diff --git a/JuliaSyntax/src/token_kinds.jl b/JuliaSyntax/src/token_kinds.jl index c7e2a33658f2a..e5e13a747449a 100644 --- a/JuliaSyntax/src/token_kinds.jl +++ b/JuliaSyntax/src/token_kinds.jl @@ -888,7 +888,7 @@ for kw in split("""abstract baremodule begin break catch const vcat ncat typed_hcat typed_vcat typed_ncat row nrow generator filter flatten comprehension typed_comprehension - error + error Nothing """) _kind_to_str_unique[_str_to_kind[kw]] = kw end diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index ad710aaff1236..b8bc67189e086 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -57,6 +57,17 @@ tests = [ "a;b;c" => "(toplevel :a :b :c)" "a;;;b;;" => "(toplevel :a :b)" ], + JuliaSyntax.parse_eq => [ + # parse_assignment + "a = b" => "(= :a :b)" + "a .= b" => "(.= :a :b)" + "a += b" => "(+= :a :b)" + "a .+= b" => "(.+= :a :b)" + "a, b = c, d" => "(= (tuple :a :b) (tuple :c :d))" + "x, = xs" => "(= (tuple :x) :xs)" + "[a ~b]" => "(hcat :a (call :~ :b))" + "[a ~ b c]" => "(hcat (call :~ :a :b) :c)" + ], JuliaSyntax.parse_cond => [ "a ? b : c" => "(if :a :b :c)" "a ?\nb : c" => "(if :a :b :c)" From 36fba921a44e271f202c43152d1bc92ca3d0f09c Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Thu, 30 Dec 2021 13:20:57 +1000 Subject: [PATCH 0266/1109] Clean up of SyntaxNode and sexpression printing/tests * Make short-form SyntaxNode printing use bare Symbols so that tests are much nicer to look at. Clean up all tests as needed. * Don't reorder SyntaxNode args; keep them in the same order as the green tree. All reordering is now postponed until conversion to Expr. * Always use an underscore in predicate names for consistency. --- JuliaSyntax/src/green_tree.jl | 14 +- JuliaSyntax/src/parse_stream.jl | 9 +- JuliaSyntax/src/parser.jl | 277 +++++++------- JuliaSyntax/src/syntax_tree.jl | 246 +++++++------ JuliaSyntax/src/tokens.jl | 7 +- JuliaSyntax/test/parser.jl | 574 +++++++++++++++--------------- JuliaSyntax/test/runtests.jl | 12 +- JuliaSyntax/test/simple_parser.jl | 2 +- 8 files changed, 574 insertions(+), 567 deletions(-) diff --git a/JuliaSyntax/src/green_tree.jl b/JuliaSyntax/src/green_tree.jl index ea2b9c2eafa2b..5252b6daf3ed9 100644 --- a/JuliaSyntax/src/green_tree.jl +++ b/JuliaSyntax/src/green_tree.jl @@ -63,16 +63,14 @@ span(node::GreenNode) = node.span head(node::GreenNode) = node.head # Predicates -# -# FIXME: All predicates should be consistently named, either with istrivia or is_trivia. -istrivia(node::GreenNode) = istrivia(node.head) -iserror(node::GreenNode) = iserror(node.head) +is_trivia(node::GreenNode) = is_trivia(node.head) +is_error(node::GreenNode) = is_error(node.head) Base.summary(node::GreenNode) = summary(node.head) # Pretty printing function _show_green_node(io, node, indent, pos, str, show_trivia) - if !show_trivia && istrivia(node) + if !show_trivia && is_trivia(node) return end posstr = "$(lpad(pos, 6)):$(rpad(pos+span(node)-1, 6)) │" @@ -82,17 +80,17 @@ function _show_green_node(io, node, indent, pos, str, show_trivia) else line = string(posstr, indent, '[', summary(node), "]") end - if !istrivia(node) && is_leaf + if !is_trivia(node) && is_leaf line = rpad(line, 40) * "✔" end - if iserror(node) + if is_error(node) line = rpad(line, 41) * "✘" end if is_leaf && !isnothing(str) line = string(rpad(line, 43), ' ', repr(str[pos:prevind(str, pos + span(node))])) end line = line*"\n" - if iserror(node) + if is_error(node) printstyled(io, line, color=:light_red) else print(io, line) diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index 8d4989c397dce..d7603cd76fefe 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -241,7 +241,7 @@ function peek_behind(stream::ParseStream; skip_trivia::Bool=true) if skip_trivia for i = length(stream.ranges):-1:1 s = stream.ranges[i] - if !istrivia(head(s)) + if !is_trivia(head(s)) return kind(s) end end @@ -296,7 +296,7 @@ function bump(stream::ParseStream, flags=EMPTY_FLAGS; skip_newlines=false, emark = position(stream) _bump_n(stream, _lookahead_index(stream, 1, skip_newlines), flags, remap_kind) if !isnothing(error) - emit(stream, emark, K"error", TRIVIA_FLAG, error=error) + emit(stream, emark, K"error", flags, error=error) end # Return last token location in output if needed for reset_node! return position(stream) @@ -307,11 +307,12 @@ Bump comments and whitespace tokens preceding the next token **Skips newlines** by default. Set skip_newlines=false to avoid that. """ -function bump_trivia(stream::ParseStream; skip_newlines=true, error=nothing) +function bump_trivia(stream::ParseStream, flags=EMPTY_FLAGS; + skip_newlines=true, error=nothing) emark = position(stream) _bump_n(stream, _lookahead_index(stream, 1, skip_newlines) - 1, EMPTY_FLAGS) if !isnothing(error) - emit(stream, emark, K"error", TRIVIA_FLAG, error=error) + emit(stream, emark, K"error", flags, error=error) end return position(stream) end diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 422715ed6ac41..0d4e22985ad15 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -96,7 +96,7 @@ end function is_reserved_word(k) k = kind(k) - iskeyword(k) && !is_contextural_keyword(k) + is_keyword(k) && !is_contextural_keyword(k) end # Return true if the next word (or word pair) is reserved, introducing a @@ -153,7 +153,7 @@ end function is_initial_operator(k) k = kind(k) # TODO(jb): `?` should probably not be listed here except for the syntax hack in osutils.jl - isoperator(k) && + is_operator(k) && !(k in (K":", K"'", K".'", K"?")) && !is_syntactic_unary_op(k) && !is_syntactic_operator(k) @@ -323,7 +323,7 @@ end function parse_eq_star(ps::ParseState, equals_is_kw=false) k = peek(ps) k2 = peek(ps,2) - if (isliteral(k) || is_identifier(k)) && k2 in (K",", K")", K"}", K"]") + if (is_literal(k) || is_identifier(k)) && k2 in (K",", K")", K"}", K"]") # optimization: skip checking the whole precedence stack if we have a # simple token followed by a common closing token bump(ps) @@ -351,7 +351,7 @@ function parse_assignment(ps::ParseState, down, equals_is_kw::Bool) return NO_POSITION end # ~ is the only non-syntactic assignment-precedence operator. - # a ~ b ==> (call-i a ~ b) + # a ~ b ==> (call-i a ~ b) # [a ~ b c] ==> (hcat (call-i a ~ b) c) bump(ps) parse_assignment(ps, down, equals_is_kw) @@ -474,15 +474,20 @@ function parse_and(ps::ParseState) parse_RtoL(ps, parse_comparison, is_prec_lazy_and, true, parse_and) end -# Parse comparison chains like -# x > y ==> (call-i x > y) -# x < y < z ==> (comparison x < y < z) -# x == y < z ==> (comparison x == y < z) +# Parse binary comparisons and comparison chains # # flisp: parse-comparison -function parse_comparison(ps::ParseState) +function parse_comparison(ps::ParseState, subtype_comparison=false) mark = position(ps) - parse_pipe_lt(ps) + if subtype_comparison && is_reserved_word(peek(ps)) + # Recovery + # struct try end ==> (struct false (error try) (block)) + name = untokenize(peek(ps)) + bump(ps) + emit(ps, mark, K"error", error="Invalid type name `$name`") + else + parse_pipe_lt(ps) + end n_comparisons = 0 op_pos = NO_POSITION initial_kind = peek(ps) @@ -499,9 +504,14 @@ function parse_comparison(ps::ParseState) reset_node!(ps, op_pos, flags=TRIVIA_FLAG) emit(ps, mark, initial_kind) else + # Normal binary comparisons + # x < y ==> (call-i x < y) emit(ps, mark, K"call", INFIX_FLAG) end elseif n_comparisons > 1 + # Comparison chains + # x < y < z ==> (comparison x < y < z) + # x == y < z ==> (comparison x == y < z) emit(ps, mark, K"comparison") end end @@ -544,8 +554,8 @@ function parse_range(ps::ParseState) peek_token(ps).had_whitespace && !peek_token(ps, 2).had_whitespace # Tricky cases in space sensitive mode - # [1 :a] ==> (vcat 1 (quote a)) - # [1 2:3 :a] ==> (vcat 1 (call-i 2 : 3) (quote a)) + # [1 :a] ==> (hcat 1 (quote a)) + # [1 2:3 :a] ==> (hcat 1 (call-i 2 : 3) (quote a)) break end t2 = peek_token(ps,2) @@ -579,10 +589,7 @@ function parse_range(ps::ParseState) end if t.had_newline # Error message for people coming from python - # === - # 1: - # 2 - # ==> (call-i 1 : (error)) + # 1:\n2 ==> (call-i 1 : (error)) emit_diagnostic(ps, whitespace=true, error="line break after `:` in range expression") bump_invisible(ps, K"error") @@ -609,20 +616,19 @@ function parse_range(ps::ParseState) end end -# parse left to right chains of a given binary operator +# a - b - c ==> (call-i (call-i a - b) - c) +# a + b + c ==> (call-i a + b c) # -# flisp: parse-chain -function parse_chain(ps::ParseState, down, op_kind) - while (t = peek_token(ps); kind(t) == op_kind) - if ps.space_sensitive && t.had_whitespace && - is_both_unary_and_binary(kind(t)) && - !peek_token(ps, 2).had_whitespace - # [x +y] ==> (hcat x (call + y)) - break - end - bump(ps, TRIVIA_FLAG) - down(ps) - end +# flisp: parse-expr +function parse_expr(ps::ParseState) + parse_with_chains(ps, parse_term, is_prec_plus, (K"+", K"++")) +end + +# a * b * c ==> (call-i a * b c) +# +# flisp: parse-term +function parse_term(ps::ParseState) + parse_with_chains(ps, parse_rational, is_prec_times, (K"*",)) end # Parse left to right, combining any of `chain_ops` into one call @@ -637,9 +643,9 @@ function parse_with_chains(ps::ParseState, down, is_op, chain_ops) !peek_token(ps, 2).had_whitespace # The following is two elements of a hcat # [x+y +z] ==> (hcat (call-i x + y) (call + z)) - # Conversely - # [x+y+z] ==> (hcat (call-i x + y z)) - # [x+y + z] ==> (hcat (call-i x + y z)) + # Conversely the following are infix calls + # [x+y+z] ==> (vect (call-i x + y z)) + # [x+y + z] ==> (vect (call-i x + y z)) break end bump(ps) @@ -654,19 +660,20 @@ function parse_with_chains(ps::ParseState, down, is_op, chain_ops) end end -# a - b - c ==> (call-i (call-i a - b) - c) -# a + b + c ==> (call-i a + b c) -# -# flisp: parse-expr -function parse_expr(ps::ParseState) - parse_with_chains(ps, parse_term, is_prec_plus, (K"+", K"++")) -end - -# a * b * c ==> (call-i a * b c) +# parse left to right chains of a given binary operator # -# flisp: parse-term -function parse_term(ps::ParseState) - parse_with_chains(ps, parse_rational, is_prec_times, (K"*",)) +# flisp: parse-chain +function parse_chain(ps::ParseState, down, op_kind) + while (t = peek_token(ps); kind(t) == op_kind) + if ps.space_sensitive && t.had_whitespace && + is_both_unary_and_binary(kind(t)) && + !peek_token(ps, 2).had_whitespace + # [x +y] ==> (hcat x (call + y)) + break + end + bump(ps, TRIVIA_FLAG) + down(ps) + end end # flisp: parse-rational @@ -758,7 +765,7 @@ function is_juxtapose(ps, prev_k, t) !(is_block_form(prev_k) || is_syntactic_unary_op(prev_k) || is_initial_reserved_word(ps, prev_k) ))) && - (!isoperator(k) || is_radical_op(k)) && + (!is_operator(k) || is_radical_op(k)) && !is_closing_token(ps, k) && !is_initial_reserved_word(ps, k) end @@ -858,7 +865,10 @@ function parse_unary_call(ps::ParseState) if is_closing_token(ps, k2) || k2 in (K"NewlineWs", K"=") if is_dotted(op_t) # standalone dotted operators are parsed as (|.| op) - # .+ ==> (. +) + # .+ ==> (. +) + # .+\n ==> (. +) + # .+ = ==> (. +) + # .+) ==> (. +) bump_trivia(ps) bump_split(ps, 1, K".", TRIVIA_FLAG, @@ -866,16 +876,24 @@ function parse_unary_call(ps::ParseState) emit(ps, mark, K".") else # return operator by itself, as in - # (+) ==> + + # +) ==> + bump(ps) end elseif k2 == K"{" || (!is_unary_op(op_k) && k2 == K"(") - # this case is +{T}(x::T) = ... + # Call with type parameters or non-unary prefix call + # +{T}(x::T) ==> (call (curly + T) (:: x T)) + # *(x) ==> (call * x) parse_factor(ps) elseif k2 == K"(" - # Cases like +(a,b) and +(a) + # Cases like +(a;b) are ambiguous: are they prefix calls to + with b as + # a keyword argument, or is `a;b` a block? We resolve this with a + # simple heuristic: if there were any commas, it was a function call. + # # - # Bump the operator + # (The flisp parser only considers commas before `;` and thus gets this + # last case wrong) + # + bump(ps, op_tok_flags) # Setup possible whitespace error between operator and ( @@ -886,27 +904,6 @@ function parse_unary_call(ps::ParseState) mark_before_paren = position(ps) bump(ps, TRIVIA_FLAG) # ( - # There's two tricky parts for unary-prefixed parenthesized expressions - # like `+(a,b)` - # - # 1. The ambiguity between a function call arglist or a block. The - # flisp parser resolves in favor of a block if there's no initial - # commas before semicolons: - # - # Function calls: - # +(a,b) ==> (call + a b) - # +(a=1,) ==> (call + (kw a 1)) - # - # Not function calls: - # +(a;b) ==> (call + (block a b)) - # +(a=1) ==> (call + (= a 1)) - # - # But this heuristic fails in some cases so here we use a simpler rule: - # if there were any commas, it was a function call. Then we also parse - # things like the following in a useful way: - # - # +(a;b,c) ==> (call + (tuple a (parameters b c))) - # is_call = false is_block = false parse_brackets(ps, K")") do had_commas, num_semis, num_subexprs @@ -918,31 +915,42 @@ function parse_unary_call(ps::ParseState) eq_is_kw_after_semi=is_call) end - if is_call && t2.had_whitespace - reset_node!(ps, ws_error_pos, kind=K"error") - emit_diagnostic(ps, ws_mark, ws_mark_end, - error="whitespace not allowed between prefix function call and argument list") - end - - # 2. The precedence between unary + and any following infix ^ depends - # on whether the parens are a function call or not + # The precedence between unary + and any following infix ^ depends on + # whether the parens are a function call or not if is_call - # Prefix operator call + if t2.had_whitespace + # Whitespace not allowed before prefix function call bracket + # + (a,b) ==> (call + (error) a b) + reset_node!(ps, ws_error_pos, kind=K"error") + emit_diagnostic(ps, ws_mark, ws_mark_end, + error="whitespace not allowed between prefix function call and argument list") + end + # Prefix function calls for operators which are both binary and unary + # +(a,b) ==> (call + a b) + # +(a=1,) ==> (call + (kw a 1)) + # +(a;b,c) ==> (call + a (parameters b c)) + # Prefix calls have higher precedence than ^ # +(a,b)^2 ==> (call-i (call + a b) ^ 2) emit(ps, mark, op_node_kind) parse_factor_with_initial_ex(ps, mark) else + # Unary function calls with brackets as grouping, not an arglist if is_block + # +(a;b) ==> (call + (block a b)) emit(ps, mark_before_paren, K"block") end - # Not a prefix operator call - # +(a)^2 ==> (call + (call-i ^ a 2)) + # Not a prefix operator call but a block; `=` is not `kw` + # +(a=1) ==> (call + (= a 1)) + # Unary operators have lower precedence than ^ + # +(a)^2 ==> (call + (call-i a ^ 2)) parse_factor_with_initial_ex(ps, mark_before_paren) emit(ps, mark, op_node_kind) end elseif !is_unary_op(op_k) emit_diagnostic(ps, error="expected a unary operator") else + # Normal unary calls + # +x ==> (call + x) bump(ps, op_tok_flags) parse_unary(ps) emit(ps, mark, op_node_kind) @@ -1000,10 +1008,9 @@ function parse_decl_with_initial_ex(ps::ParseState, mark) emit(ps, mark, K"::") end if peek(ps) == K"->" + # -> is unusual: it binds tightly on the left and loosely on the right. # a::b->c ==> (-> (:: a b) c) bump(ps, TRIVIA_FLAG) - # -> is unusual: it binds tightly on the left and - # loosely on the right. parse_eq_star(ps) emit(ps, mark, K"->") end @@ -1220,9 +1227,9 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) emark = position(ps) bump(ps) bump(ps) - "f.'" => "f (error . ')" + "f.'" => "f (error-t . ')" emit(ps, emark, K"error", TRIVIA_FLAG, - error="the .' operator is discontinued") + error="the .' operator for transpose is discontinued") is_valid_modref = false continue end @@ -1230,7 +1237,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) # Allow `@` in macrocall only in first and last position # A.B.@x ==> (macrocall (. (. A (quote B)) (quote @x))) # @A.B.x ==> (macrocall (. (. A (quote B)) (quote @x))) - # A.@B.x ==> (macrocall (. (. A (error) B) (quote @x))) + # A.@B.x ==> (macrocall (. (. A (error-t) B) (quote @x))) emit_diagnostic(ps, macro_atname_range, error="`@` must appear on first or last macro name component") bump(ps, TRIVIA_FLAG, error="Unexpected `.` after macro name") @@ -1348,13 +1355,13 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) bump(ps) t = peek_token(ps) k = kind(t) - if !t.had_whitespace && (k == K"Identifier" || iskeyword(k) || is_number(k)) + if !t.had_whitespace && (k == K"Identifier" || is_keyword(k) || is_number(k)) # Macro sufficies can include keywords and numbers # x"s"y ==> (macrocall x_str "s" "y") # x"s"end ==> (macrocall x_str "s" "end") # x"s"2 ==> (macrocall x_str "s" 2) # x"s"10.0 ==> (macrocall x_str "s" 10.0) - suffix_kind = (k == K"Identifier" || iskeyword(k)) ? + suffix_kind = (k == K"Identifier" || is_keyword(k)) ? K"UnquotedString" : k bump(ps, remap_kind=suffix_kind) end @@ -1366,43 +1373,14 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) end end +# Parse the `A<:B` part of type definitions like `struct A<:B end` +# # flisp: parse-subtype-spec function parse_subtype_spec(ps::ParseState) - k = peek(ps) - if is_reserved_word(k) - # Recovery - # struct try end ==> (struct false (error try) (block)) - bump(ps, error="Invalid type name `$(untokenize(k))`") - m = position(ps) - if is_prec_comparison(peek(ps)) - bump(ps) - parse_pipe_lt(ps) - emit(ps, m, K"error", TRIVIA_FLAG) - end - else - # Wart: why isn't the flisp parser more strict here? - # <: is the only operator which isn't a syntax error, but - # parse_subtype_spec allows all sorts of things. - parse_comparison(ps) - end -end - -# Parse struct definitions. The caller must arrange for the next tokens to be -# `struct` or `mutable struct`. -# -# flisp: parse-struct-def -function parse_struct_def(ps::ParseState) - mark = position(ps) - is_mutable = peek(ps) == K"mutable" - if is_mutable - bump(ps, TRIVIA_FLAG) - end - @assert peek(ps) == K"struct" - bump(ps, TRIVIA_FLAG) - k = peek(ps) - if is_reserved_word(k) - bump(ps, error="Invalid type name `$(untokenize(k))`") - end + # Wart: why isn't the flisp parser more strict here? + # <: is the only operator which isn't a syntax error, but + # parse_comparison allows all sorts of things. + parse_comparison(ps, true) end # parse expressions or blocks introduced by syntactic reserved words. @@ -1431,12 +1409,7 @@ function parse_resword(ps::ParseState) end elseif word == K"while" # while cond body end ==> (while cond (block body)) - # === - # while x < y - # a - # b - # end - # ==> (while (call < x y) (block a b)) + # while x < y \n a \n b \n end ==> (while (call-i x < y) (block a b)) bump(ps, TRIVIA_FLAG) parse_cond(ps) parse_block(ps) @@ -1444,12 +1417,7 @@ function parse_resword(ps::ParseState) emit(ps, mark, K"while") elseif word == K"for" # for x in xs end ==> (for (= x xs) (block)) - # === - # for x in xs, y in ys - # a - # b - # end - # ==> (for (block (= x xs) (= y ys)) (block a b)) + # for x in xs, y in ys \n a \n end ==> (for (block (= x xs) (= y ys)) (block a)) bump(ps, TRIVIA_FLAG) m = position(ps) n_subexprs = parse_comma_separated(ps, parse_iteration_spec) @@ -1507,7 +1475,7 @@ function parse_resword(ps::ParseState) # abstract type A <: B end ==> (abstract (<: A B)) # abstract type A <: B{T,S} end ==> (abstract (<: A (curly B T S))) # Oddities allowed by parser - # abstract type A < B end ==> (abstract (call < A B)) + # abstract type A < B end ==> (abstract (call-i A < B)) bump(ps, TRIVIA_FLAG) @assert peek(ps) == K"type" bump(ps, TRIVIA_FLAG) @@ -1628,8 +1596,8 @@ function parse_if_elseif(ps, is_elseif=false, is_elseif_whitespace_err=false) emark = position(ps) bump(ps, TRIVIA_FLAG) if peek(ps) == K"if" - # User wrote `else if` by mistake ? - # if a xx else if b yy end ==> (if a (block xx) (else (error if) (block b) (block yy))) + # Recovery: User wrote `else if` by mistake ? + # if a xx else if b yy end ==> (if a (block xx) (error-t) (elseif (block b) (block yy))) bump(ps, TRIVIA_FLAG) emit(ps, emark, K"error", TRIVIA_FLAG, error="use `elseif` instead of `else if`") @@ -1739,7 +1707,7 @@ function parse_function(ps::ParseState) emit(ps, def_mark, K"error", error="Expected macro name") end else - if iskeyword(k) + if is_keyword(k) # Forbid things like # function begin() end ==> (function (call (error begin)) (block)) # macro begin() end ==> (macro (call (error begin)) (block)) @@ -1832,7 +1800,7 @@ function parse_try(ps) # in which these blocks execute. bump_trivia(ps) if !has_catch && peek(ps) == K"catch" - # try x finally y catch e z end ==> (try (block x) false false false (block y) e (block z)) + # try x finally y catch e z end ==> (try-f (block x) false false false (block y) e (block z)) flags |= TRY_CATCH_AFTER_FINALLY_FLAG m = position(ps) parse_catch(ps) @@ -2124,16 +2092,16 @@ end # [x y ;; z w] ==> (hcat x y (error) z w) # # Single elements in rows -# [x ; y ;; z ] ==> (ncat 2 (nrow 1 x y) z) -# [x y ;;; z ] ==> (ncat 3 (row x y) z) +# [x ; y ;; z ] ==> (ncat-2 (nrow-1 x y) z) +# [x y ;;; z ] ==> (ncat-3 (row x y) z) # # Higher dimensional ncat # Row major # [x y ; z w ;;; a b ; c d] ==> -# (ncat 3 (nrow 1 (row x y) (row z w)) (nrow 1 (row a b) (row c d))) +# (ncat-3 (nrow-1 (row x y) (row z w)) (nrow-1 (row a b) (row c d))) # Column major # [x ; y ;; z ; w ;;; a ; b ;; c ; d] ==> -# (ncat 3 (nrow 2 (nrow 1 x y) (nrow 1 z w)) (nrow 2 (nrow 1 a b) (nrow 1 c d))) +# (ncat-3 (nrow-2 (nrow-1 x y) (nrow-1 z w)) (nrow-2 (nrow-1 a b) (nrow-1 c d))) # # flisp: parse-array function parse_array(ps::ParseState, mark, closer, end_is_symbol) @@ -2160,7 +2128,7 @@ function parse_array(ps::ParseState, mark, closer, end_is_symbol) if binding_power == 0 emit(ps, mark, K"row") else - emit(ps, mark, K"nrow", numeric_flags(dim)) + emit(ps, mark, K"nrow", set_numeric_flags(dim)) end dim = next_dim binding_power = next_bp @@ -2168,7 +2136,7 @@ function parse_array(ps::ParseState, mark, closer, end_is_symbol) bump_closing_token(ps, closer) return binding_power == -1 ? (K"vcat", EMPTY_FLAGS) : binding_power == 0 ? (K"hcat", EMPTY_FLAGS) : - (K"ncat", numeric_flags(dim)) + (K"ncat", set_numeric_flags(dim)) end # Parse equal and ascending precedence chains of array concatenation operators @@ -2212,7 +2180,7 @@ function parse_array_inner(ps, binding_power) if bp == 0 emit(ps, mark, K"row") else - emit(ps, mark, K"nrow", numeric_flags(dim)) + emit(ps, mark, K"nrow", set_numeric_flags(dim)) end end dim, bp = next_dim, next_bp @@ -2612,7 +2580,7 @@ function parse_atom(ps::ParseState, check_identifiers=true) # :foo => (quote foo) t = peek_token(ps, 2) k = kind(t) - if is_closing_token(ps, k) && (!iskeyword(k) || t.had_whitespace) + if is_closing_token(ps, k) && (!is_keyword(k) || t.had_whitespace) # : is a literal colon in some circumstances # :) ==> : # : end ==> : @@ -2626,7 +2594,8 @@ function parse_atom(ps::ParseState, check_identifiers=true) # : # a # ==> (quote (error)) - bump_trivia(ps, error="whitespace not allowed after `:` used for quoting") + bump_trivia(ps, TRIVIA_FLAG, + error="whitespace not allowed after `:` used for quoting") # Heuristic recovery if kind(t) == K"NewlineWs" bump_invisible(ps, K"error") @@ -2650,10 +2619,10 @@ function parse_atom(ps::ParseState, check_identifiers=true) elseif leading_kind == K"VarIdentifier" bump(ps) t = peek_token(ps) - if !t.had_whitespace && !(isoperator(kind(t)) || is_non_keyword_closer(t)) + if !t.had_whitespace && !(is_operator(kind(t)) || is_non_keyword_closer(t)) bump(ps, error="suffix not allowed after var\"...\" syntax") end - elseif isoperator(leading_kind) + elseif is_operator(leading_kind) # Operators and keywords are generally turned into identifiers if used # as atoms. if check_identifiers && is_syntactic_operator(leading_kind) @@ -2661,7 +2630,7 @@ function parse_atom(ps::ParseState, check_identifiers=true) else bump(ps) end - elseif iskeyword(leading_kind) + elseif is_keyword(leading_kind) if check_identifiers && is_closing_token(ps, leading_kind) # :(end) ==> (quote (error end)) bump(ps, error="invalid identifier") @@ -2690,7 +2659,7 @@ function parse_atom(ps::ParseState, check_identifiers=true) bump_invisible(ps, K"core_@cmd") bump(ps) emit(ps, mark, K"macrocall") - elseif isliteral(leading_kind) + elseif is_literal(leading_kind) bump(ps) elseif is_closing_token(ps, leading_kind) # Leave closing token in place for other productions to diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index 9f49cc31ec94b..2caf71c35f11e 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -2,7 +2,8 @@ # Syntax tree types #------------------------------------------------------------------------------- - +# Flags hold auxilary information about tokens/nonterminals which the Kind +# doesn't capture in a nice way. const RawFlags = UInt32 const EMPTY_FLAGS = RawFlags(0) const TRIVIA_FLAG = RawFlags(1<<0) @@ -17,6 +18,37 @@ const TRY_CATCH_AFTER_FINALLY_FLAG = RawFlags(1<<3) const NUMERIC_FLAGS = RawFlags(RawFlags(0xff)<<8) # Todo ERROR_FLAG = 0x80000000 ? +function set_numeric_flags(n::Integer) + f = RawFlags((n << 8) & NUMERIC_FLAGS) + if numeric_flags(f) != n + error("Numeric flags unable to hold large integer $n") + end + f +end + +function numeric_flags(f::RawFlags) + Int((f >> 8) % UInt8) +end + +# Return true if any of `test_flags` are set +has_flags(flags::RawFlags, test_flags) = (flags & test_flags) != 0 + +# Function for combining flags. (Do we want this?) +function flags(; trivia::Bool=false, + infix::Bool=false, + dotop::Bool=false, + try_catch_after_finally::Bool=false, + numeric::Int=0) + flags = RawFlags(0) + trivia && (flags |= TRIVIA_FLAG) + infix && (flags |= INFIX_FLAG) + dotop && (flags |= DOTOP_FLAG) + try_catch_after_finally && (flags |= TRY_CATCH_AFTER_FINALLY_FLAG) + numeric != 0 && (flags |= set_numeric_flags(numeric)) + return flags::RawFlags +end + +#------------------------------------------------------------------------------- struct SyntaxHead kind::Kind flags::RawFlags @@ -24,46 +56,38 @@ end kind(head::SyntaxHead) = head.kind flags(head::SyntaxHead) = head.flags -hasflags(head::SyntaxHead, flags_) = (flags(head) & flags_) == flags_ - -istrivia(head::SyntaxHead) = hasflags(head, TRIVIA_FLAG) -isinfix(head::SyntaxHead) = hasflags(head, INFIX_FLAG) - -iserror(head::SyntaxHead) = kind(head) == K"error" +has_flags(head::SyntaxHead, test_flags) = has_flags(flags(head), test_flags) -is_dotted(head::SyntaxHead) = hasflags(head, DOTOP_FLAG) +is_trivia(head::SyntaxHead) = has_flags(head, TRIVIA_FLAG) +is_infix(head::SyntaxHead) = has_flags(head, INFIX_FLAG) +is_dotted(head::SyntaxHead) = has_flags(head, DOTOP_FLAG) +numeric_flags(head::SyntaxHead) = numeric_flags(flags(head)) +is_error(head::SyntaxHead) = kind(head) == K"error" function Base.summary(head::SyntaxHead) _kind_str(kind(head)) end -function untokenize(head::SyntaxHead) +function untokenize(head::SyntaxHead; include_flag_suff=true) str = untokenize(kind(head)) if is_dotted(head) str = "."*str end + if include_flag_suff && flags(head) ∉ (EMPTY_FLAGS, DOTOP_FLAG) + str = str*"-" + is_trivia(head) && (str = str*"t") + is_infix(head) && (str = str*"i") + has_flags(head, TRY_CATCH_AFTER_FINALLY_FLAG) && (str = str*"f") + n = numeric_flags(head) + n != 0 && (str = str*string(n)) + end str end -function raw_flags(; trivia::Bool=false, infix::Bool=false) - flags = RawFlags(0) - trivia && (flags |= TRIVIA_FLAG) - infix && (flags |= INFIX_FLAG) - return flags::RawFlags -end - -function numeric_flags(n::Integer) - RawFlags(UInt8(n)) << 8 -end - -function extract_numeric_flags(f::RawFlags) - Int((f >> 8) % UInt8) -end - kind(node::GreenNode{SyntaxHead}) = head(node).kind flags(node::GreenNode{SyntaxHead}) = head(node).flags -isinfix(node) = isinfix(head(node)) +is_infix(node) = is_infix(head(node)) # Value of an error node with no children struct ErrorVal @@ -113,8 +137,8 @@ function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, position::In Symbol(val_str) elseif k == K"VarIdentifier" Symbol(val_str[5:end-1]) - elseif iskeyword(k) - # This only happens nodes nested inside errors + elseif is_keyword(k) + # This should only happen for tokens nested inside errors Symbol(val_str) elseif k in (K"String", K"Cmd") unescape_string(source[position+1:position+span(raw)-2]) @@ -122,7 +146,7 @@ function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, position::In unescape_string(source[position+3:position+span(raw)-4]) elseif k == K"UnquotedString" String(val_str) - elseif isoperator(k) + elseif is_operator(k) isempty(val_range) ? Symbol(untokenize(k)) : # synthetic invisible tokens Symbol(val_str) @@ -152,25 +176,18 @@ function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, position::In end return SyntaxNode(source, raw, position, nothing, :leaf, val) else - str = untokenize(head(raw)) + str = untokenize(head(raw), include_flag_suff=false) headsym = !isnothing(str) ? Symbol(str) : error("Can't untokenize head of kind $(kind(raw))") cs = SyntaxNode[] pos = position for (i,rawchild) in enumerate(children(raw)) - # FIXME: Allowing trivia iserror nodes here corrupts the tree layout. - if !istrivia(rawchild) || iserror(rawchild) + # FIXME: Allowing trivia is_error nodes here corrupts the tree layout. + if !is_trivia(rawchild) || is_error(rawchild) push!(cs, SyntaxNode(source, rawchild, pos)) end pos += rawchild.span end - # Julia's standard `Expr` ASTs have children stored in a canonical - # order which is not always source order. - # - # Swizzle the children here as necessary to get the canonical order. - if isinfix(raw) - cs[2], cs[1] = cs[1], cs[2] - end node = SyntaxNode(source, raw, position, nothing, headsym, cs) for c in cs c.parent = node @@ -179,9 +196,9 @@ function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, position::In end end -iserror(node::SyntaxNode) = iserror(node.raw) -istrivia(node::SyntaxNode) = istrivia(node.raw) -hasflags(node::SyntaxNode, f) = hasflags(head(node.raw), f) +is_error(node::SyntaxNode) = is_error(node.raw) +is_trivia(node::SyntaxNode) = is_trivia(node.raw) +has_flags(node::SyntaxNode, f) = has_flags(head(node.raw), f) head(node::SyntaxNode) = node.head kind(node::SyntaxNode) = kind(node.raw) @@ -201,9 +218,9 @@ function _show_syntax_node(io, current_filename, node, indent) fname = node.source.filename line, col = source_location(node.source, node.position) posstr = "$(lpad(line, 4)):$(rpad(col,3))│$(lpad(node.position,6)):$(rpad(node.position+span(node)-1,6))│" - nodestr = !haschildren(node) ? - repr(node.val) : - "[$(_kind_str(kind(node.raw)))]" + nodestr = haschildren(node) ? "[$(untokenize(head(node.raw)))]" : + node.val isa Symbol ? string(node.val) : + repr(node.val) treestr = string(indent, nodestr) # Add filename if it's changed from the previous node if fname != current_filename[] @@ -222,10 +239,10 @@ end function _show_syntax_node_sexpr(io, node) if !haschildren(node) - if iserror(node) + if is_error(node) print(io, "(error)") else - print(io, repr(node.val)) + print(io, node.val isa Symbol ? string(node.val) : repr(node.val)) end else print(io, "(", untokenize(head(node.raw))) @@ -334,76 +351,81 @@ end # Conversion to Base.Expr function _to_expr(node::SyntaxNode) - if haschildren(node) - args = Vector{Any}(undef, length(children(node))) - args = map!(_to_expr, args, children(node)) - # Convert elements - if head(node) == :macrocall - line_node = source_location(LineNumberNode, node.source, node.position) - insert!(args, 2, line_node) - elseif head(node) in (:call, :ref) - # Move parameters block to args[2] - if length(args) > 1 && Meta.isexpr(args[end], :parameters) - insert!(args, 2, args[end]) - pop!(args) - end - elseif head(node) in (:tuple, :parameters, :vect) - # Move parameters blocks to args[1] - if length(args) > 1 && Meta.isexpr(args[end], :parameters) - pushfirst!(args, args[end]) - pop!(args) - end - elseif head(node) == :try - # Try children in source order: - # try_block catch_var catch_block else_block finally_block - # Expr ordering: - # try_block catch_var catch_block [finally_block] [else_block] - catch_ = nothing - if hasflags(node, TRY_CATCH_AFTER_FINALLY_FLAG) - catch_ = pop!(args) - catch_var = pop!(args) - end - finally_ = pop!(args) - else_ = pop!(args) - if hasflags(node, TRY_CATCH_AFTER_FINALLY_FLAG) - pop!(args) - pop!(args) - push!(args, catch_var) - push!(args, catch_) - end - # At this point args is - # [try_block catch_var catch_block] - if finally_ !== false - push!(args, finally_) - end - if else_ !== false - push!(args, else_) - end - elseif head(node) == :filter - pushfirst!(args, last(args)) + if !haschildren(node) + return node.val + end + args = Vector{Any}(undef, length(children(node))) + args = map!(_to_expr, args, children(node)) + # Julia's standard `Expr` ASTs have children stored in a canonical + # order which is often not always source order. We permute the children + # here as necessary to get the canonical order. + if is_infix(node.raw) + args[2], args[1] = args[1], args[2] + end + # Convert elements + if head(node) == :macrocall + line_node = source_location(LineNumberNode, node.source, node.position) + insert!(args, 2, line_node) + elseif head(node) in (:call, :ref) + # Move parameters block to args[2] + if length(args) > 1 && Meta.isexpr(args[end], :parameters) + insert!(args, 2, args[end]) pop!(args) - elseif head(node) == :flatten - # The order of nodes inside the generators in Julia's flatten AST - # is noncontiguous in the source text, so need to reconstruct - # Julia's AST here from our alternative `flatten` expression. - gen = Expr(:generator, args[1], args[end]) - for i in length(args)-1:-1:2 - gen = Expr(:generator, gen, args[i]) - end - args = [gen] - elseif head(node) in (:nrow, :ncat) - # For lack of a better place, the dimension argument to nrow/ncat - # is stored in the flags - pushfirst!(args, extract_numeric_flags(flags(node))) end - if head(node) == :inert || (head(node) == :quote && - length(args) == 1 && !(only(args) isa Expr)) - QuoteNode(only(args)) - else - Expr(head(node), args...) + elseif head(node) in (:tuple, :parameters, :vect) + # Move parameters blocks to args[1] + if length(args) > 1 && Meta.isexpr(args[end], :parameters) + pushfirst!(args, args[end]) + pop!(args) + end + elseif head(node) == :try + # Try children in source order: + # try_block catch_var catch_block else_block finally_block + # Expr ordering: + # try_block catch_var catch_block [finally_block] [else_block] + catch_ = nothing + if has_flags(node, TRY_CATCH_AFTER_FINALLY_FLAG) + catch_ = pop!(args) + catch_var = pop!(args) + end + finally_ = pop!(args) + else_ = pop!(args) + if has_flags(node, TRY_CATCH_AFTER_FINALLY_FLAG) + pop!(args) + pop!(args) + push!(args, catch_var) + push!(args, catch_) + end + # At this point args is + # [try_block catch_var catch_block] + if finally_ !== false + push!(args, finally_) end + if else_ !== false + push!(args, else_) + end + elseif head(node) == :filter + pushfirst!(args, last(args)) + pop!(args) + elseif head(node) == :flatten + # The order of nodes inside the generators in Julia's flatten AST + # is noncontiguous in the source text, so need to reconstruct + # Julia's AST here from our alternative `flatten` expression. + gen = Expr(:generator, args[1], args[end]) + for i in length(args)-1:-1:2 + gen = Expr(:generator, gen, args[i]) + end + args = [gen] + elseif head(node) in (:nrow, :ncat) + # For lack of a better place, the dimension argument to nrow/ncat + # is stored in the flags + pushfirst!(args, numeric_flags(flags(node))) + end + if head(node) == :inert || (head(node) == :quote && + length(args) == 1 && !(only(args) isa Expr)) + return QuoteNode(only(args)) else - node.val + return Expr(head(node), args...) end end diff --git a/JuliaSyntax/src/tokens.jl b/JuliaSyntax/src/tokens.jl index b21a10dd8d411..23d825064c42a 100644 --- a/JuliaSyntax/src/tokens.jl +++ b/JuliaSyntax/src/tokens.jl @@ -1,4 +1,4 @@ -using .Tokenize.Tokens: Kind, isliteral, iskeyword, isoperator +using .Tokenize.Tokens: Kind include("token_kinds.jl") @@ -24,6 +24,11 @@ end kind(k::Kind) = k kind(raw::TzTokens.RawToken) = TzTokens.exactkind(raw) +# Some renaming for consistency +is_literal(k::Kind) = TzTokens.isliteral(k) +is_keyword(k::Kind) = TzTokens.iskeyword(k) +is_operator(k::Kind) = TzTokens.isoperator(k) + # Predicates for operator precedence is_prec_assignment(t) = K"BEGIN_ASSIGNMENTS" < kind(t) < K"END_ASSIGNMENTS" is_prec_pair(t) = K"BEGIN_PAIRARROW" < kind(t) < K"END_PAIRARROW" diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index b8bc67189e086..9e288309a25e7 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -49,410 +49,422 @@ end # (add flag annotations to heads) tests = [ JuliaSyntax.parse_block => [ - "a;b;c" => "(block :a :b :c)" - "a;;;b;;" => "(block :a :b)" - "a\nb" => "(block :a :b)" + "a;b;c" => "(block a b c)" + "a;;;b;;" => "(block a b)" + "a\nb" => "(block a b)" ], JuliaSyntax.parse_stmts => [ - "a;b;c" => "(toplevel :a :b :c)" - "a;;;b;;" => "(toplevel :a :b)" + "a;b;c" => "(toplevel a b c)" + "a;;;b;;" => "(toplevel a b)" ], JuliaSyntax.parse_eq => [ # parse_assignment - "a = b" => "(= :a :b)" - "a .= b" => "(.= :a :b)" - "a += b" => "(+= :a :b)" - "a .+= b" => "(.+= :a :b)" - "a, b = c, d" => "(= (tuple :a :b) (tuple :c :d))" - "x, = xs" => "(= (tuple :x) :xs)" - "[a ~b]" => "(hcat :a (call :~ :b))" - "[a ~ b c]" => "(hcat (call :~ :a :b) :c)" + "a = b" => "(= a b)" + "a .= b" => "(.= a b)" + "a += b" => "(+= a b)" + "a .+= b" => "(.+= a b)" + "a, b = c, d" => "(= (tuple a b) (tuple c d))" + "x, = xs" => "(= (tuple x) xs)" + "[a ~b]" => "(hcat a (call ~ b))" + "[a ~ b c]" => "(hcat (call-i a ~ b) c)" ], JuliaSyntax.parse_cond => [ - "a ? b : c" => "(if :a :b :c)" - "a ?\nb : c" => "(if :a :b :c)" - "a ? b :\nc" => "(if :a :b :c)" - "a ? b : c:d" => "(if :a :b (call :(:) :c :d))" + "a ? b : c" => "(if a b c)" + "a ?\nb : c" => "(if a b c)" + "a ? b :\nc" => "(if a b c)" + "a ? b : c:d" => "(if a b (call-i c : d))" # Following are errors but should recover - "a? b : c" => "(if :a (error) :b :c)" - "a ?b : c" => "(if :a (error) :b :c)" - "a ? b: c" => "(if :a :b (error) :c)" - "a ? b :c" => "(if :a :b (error) :c)" - "a ? b c" => "(if :a :b (error) :c)" + "a? b : c" => "(if a (error) b c)" + "a ?b : c" => "(if a (error) b c)" + "a ? b: c" => "(if a b (error) c)" + "a ? b :c" => "(if a b (error) c)" + "a ? b c" => "(if a b (error) c)" ], JuliaSyntax.parse_arrow => [ - "x → y" => "(call :→ :x :y)" - "x <--> y" => "(call :<--> :x :y)" - "x --> y" => "(--> :x :y)" + "x → y" => "(call-i x → y)" + "x <--> y" => "(call-i x <--> y)" + "x --> y" => "(--> x y)" ], JuliaSyntax.parse_or => [ - "x || y || z" => "(|| :x (|| :y :z))" + "x || y || z" => "(|| x (|| y z))" ], JuliaSyntax.parse_and => [ - "x && y && z" => "(&& :x (&& :y :z))" + "x && y && z" => "(&& x (&& y z))" ], JuliaSyntax.parse_comparison => [ - "x > y" => "(call :> :x :y)" - "x < y < z" => "(comparison :x :< :y :< :z)" - "x == y < z" => "(comparison :x :(==) :y :< :z)" - "x <: y" => "(<: :x :y)" - "x >: y" => "(>: :x :y)" + # Type comparisons are syntactic + "x <: y" => "(<: x y)" + "x >: y" => "(>: x y)" + # Normal binary comparisons + "x < y" => "(call-i x < y)" + # Comparison chains + "x < y < z" => "(comparison x < y < z)" + "x == y < z" => "(comparison x == y < z)" ], JuliaSyntax.parse_pipe_lt => [ - "x <| y <| z" => "(call :<| :x (call :<| :y :z))" + "x <| y <| z" => "(call-i x <| (call-i y <| z))" ], JuliaSyntax.parse_pipe_gt => [ - "x |> y |> z" => "(call :|> (call :|> :x :y) :z)" + "x |> y |> z" => "(call-i (call-i x |> y) |> z)" ], JuliaSyntax.parse_range => [ - "1:2" => "(call :(:) 1 2)" - "1:2:3" => "(call :(:) 1 2 3)" - "a:b:c:d:e" => "(call :(:) (call :(:) :a :b :c) :d :e)" - "a :< b" => "(call (error :(:) :<) :a :b)" + "1:2" => "(call-i 1 : 2)" + "1:2:3" => "(call-i 1 : 2 3)" + "a:b:c:d:e" => "(call-i (call-i a : b c) : d e)" + "a :< b" => "(call-i a (error : <) b)" ], JuliaSyntax.parse_range => [ - "a..b" => "(call :.. :a :b)" - "a … b" => "(call :… :a :b)" - # [1 :a] ==> (vcat 1 (quote a)) - # [1 2:3 :a] ==> (vcat 1 (call-i 2 : 3) (quote a)) - "x..." => "(... :x)" - "x:y..." => "(... (call :(:) :x :y))" - "x..y..." => "(... (call :.. :x :y))" + "a..b" => "(call-i a .. b)" + "a … b" => "(call-i a … b)" + "[1 :a]" => "(hcat 1 (quote a))" + "[1 2:3 :a]" => "(hcat 1 (call-i 2 : 3) (quote a))" + "x..." => "(... x)" + "x:y..." => "(... (call-i x : y))" + "x..y..." => "(... (call-i x .. y))" ], JuliaSyntax.parse_expr => [ - # "[x +y]" ==> "(hcat x (call + y))" - # [x+y +z] ==> (hcat (call-i x + y) (call + z)) - # Conversely - # [x+y+z] ==> (hcat (call-i x + y z)) - # [x+y + z] ==> (hcat (call-i x + y z)) - "a - b - c" => "(call :- (call :- :a :b) :c)" - "a + b + c" => "(call :+ :a :b :c)" - "a +₁ b +₁ c" => "(call :+₁ (call :+₁ :a :b) :c)" - "a .+ b .+ c" => "(call :.+ (call :.+ :a :b) :c)" + "a - b - c" => "(call-i (call-i a - b) - c)" + "a + b + c" => "(call-i a + b c)" + # parse_with_chains: + # The following is two elements of a hcat + "[x +y]" => "(hcat x (call + y))" + "[x+y +z]" => "(hcat (call-i x + y) (call + z))" + # Conversely the following are infix calls + "[x+y+z]" => "(vect (call-i x + y z))" + "[x+y + z]" => "(vect (call-i x + y z))" + # Dotted and normal operators + "a +₁ b +₁ c" => "(call-i (call-i a +₁ b) +₁ c)" + "a .+ b .+ c" => "(call-i (call-i a .+ b) .+ c)" ], JuliaSyntax.parse_term => [ - "a * b * c" => "(call :* :a :b :c)" - # For parse_unary - "-2*x" => "(call :* -2 :x)" + "a * b * c" => "(call-i a * b c)" + # parse_unary + "-2*x" => "(call-i -2 * x)" ], JuliaSyntax.parse_juxtapose => [ - "2x" => "(call :* 2 :x)" - "2x" => "(call :* 2 :x)" - "2(x)" => "(call :* 2 :x)" - "(2)(3)x" => "(call :* 2 3 :x)" - "(x-1)y" => "(call :* (call :- :x 1) :y)" + "2x" => "(call-i 2 * x)" + "2x" => "(call-i 2 * x)" + "2(x)" => "(call-i 2 * x)" + "(2)(3)x" => "(call-i 2 * 3 x)" + "(x-1)y" => "(call-i (call-i x - 1) * y)" # errors - "\"a\"\"b\"" => "(call :* \"a\" (error) \"b\")" - "\"a\"x" => "(call :* \"a\" (error) :x)" + "\"a\"\"b\"" => "(call-i \"a\" * (error) \"b\")" + "\"a\"x" => "(call-i \"a\" * (error) x)" ], JuliaSyntax.parse_unary => [ - "+2" => "2" - "-2^x" => "(call :- (call :^ 2 :x))" - # -2[1, 3] ==> (call - (ref 2 1 3)) + "+2" => "2" + "-2^x" => "(call - (call-i 2 ^ x))" + "-2[1, 3]" => "(call - (ref 2 1 3))" ], JuliaSyntax.parse_unary_call => [ - ".+" => "(. :+)" - "+)" => ":+" - # Function calls: - "+(a,b)" => "(call :+ :a :b)" - "+(a=1,)" => "(call :+ (kw :a 1))" - # Not function calls: - "+(a;b)" => "(call :+ (block :a :b))" - "+(a=1)" => "(call :+ (= :a 1))" - "+(a;b,c)" => "(call :+ :a (parameters :b :c))" + # Standalone dotted operators are parsed as (|.| op) + ".+" => "(. +)" + ".+\n" => "(. +)" + ".+ =" => "(. +)" + ".+)" => "(. +)" + "+)" => "+" + # Call with type parameters or non-unary prefix call + "+{T}(x::T)" => "(call (curly + T) (:: x T))" + "*(x)" => "(call * x)" + # Prefix function calls for operators which are both binary and unary + "+(a,b)" => "(call + a b)" + "+(a=1,)" => "(call + (kw a 1))" + "+(a;b,c)" => "(call + a (parameters b c))" + # Whitespace not allowed before prefix function call bracket + "+ (a,b)" => "(call + (error) a b)" + # Prefix calls have higher precedence than ^ + "+(a,b)^2" => "(call-i (call + a b) ^ 2)" + # Unary function calls with brackets as grouping, not an arglist + "+(a;b)" => "(call + (block a b))" + "+(a=1)" => "(call + (= a 1))" + # Unary operators have lower precedence than ^ + "+(a)^2" => "(call + (call-i a ^ 2))" + # Normal unary calls (see parse_unary) + "+x" => "(call + x)" ], JuliaSyntax.parse_decl => [ - "a::b" => "(:: :a :b)" - "a->b" => "(-> :a :b)" - "a::b->c" => "(-> (:: :a :b) :c)" + "a::b" => "(:: a b)" + "a->b" => "(-> a b)" + "a::b->c" => "(-> (:: a b) c)" ], JuliaSyntax.parse_unary_prefix => [ - "&)" => ":&" - "\$\n" => ":\$" - "&a" => "(& :a)" - "::a" => "(:: :a)" - "\$a" => "(\$ :a)" - "\$\$a" => "(\$ (\$ :a))" + "&)" => "&" + "\$\n" => "\$" + "&a" => "(& a)" + "::a" => "(:: a)" + "\$a" => "(\$ a)" + "\$\$a" => "(\$ (\$ a))" ], JuliaSyntax.parse_call => [ - "f(x)" => "(call :f :x)" - "\$f(x)" => "(call (\$ :f) :x)" - "f(a,b)" => "(call :f :a :b)" - "f(a).g(b)" => "(call (. (call :f :a) (quote :g)) :b)" + "f(x)" => "(call f x)" + "\$f(x)" => "(call (\$ f) x)" + "f(a,b)" => "(call f a b)" + "f(a).g(b)" => "(call (. (call f a) (quote g)) b)" # do - "f() do x, y\n body end" => "(do (call :f) (-> (tuple :x :y) (block :body)))" - "f() do\nend" => "(do (call :f) (-> (tuple) (block)))" - "f() do ; body end" => "(do (call :f) (-> (tuple) (block :body)))" - "f(x) do y,z body end" => "(do (call :f :x) (-> (tuple :y :z) (block :body)))" + "f() do x, y\n body end" => "(do (call f) (-> (tuple x y) (block body)))" + "f() do\nend" => "(do (call f) (-> (tuple) (block)))" + "f() do ; body end" => "(do (call f) (-> (tuple) (block body)))" + "f(x) do y,z body end" => "(do (call f x) (-> (tuple y z) (block body)))" # Keyword arguments depend on call vs macrocall - "foo(a=1)" => "(call :foo (kw :a 1))" - "@foo(a=1)" => """(macrocall Symbol("@foo") (= :a 1))""" - # f(x) do y body end ==> (do (call :f :x) (-> (tuple :y) (block :body))) - "@foo a b" => """(macrocall Symbol("@foo") :a :b)""" - "A.@foo a b" => """(macrocall (. :A (quote Symbol("@foo"))) :a :b)""" - "@A.foo a b" => """(macrocall (. :A (quote Symbol("@foo"))) :a :b)""" + "foo(a=1)" => "(call foo (kw a 1))" + "@foo(a=1)" => """(macrocall @foo (= a 1))""" + # f(x) do y body end ==> (do (call f x) (-> (tuple y) (block body))) + "@foo a b" => """(macrocall @foo a b)""" + "A.@foo a b" => """(macrocall (. A (quote @foo)) a b)""" + "@A.foo a b" => """(macrocall (. A (quote @foo)) a b)""" # Special @doc parsing rules - "@doc x\ny" => """(macrocall Symbol("@doc") :x :y)""" - "A.@doc x\ny" => """(macrocall (. :A (quote Symbol("@doc"))) :x :y)""" - "@A.doc x\ny" => """(macrocall (. :A (quote Symbol("@doc"))) :x :y)""" - "@doc x y\nz" => """(macrocall Symbol("@doc") :x :y)""" - "@doc x\n\ny" => """(macrocall Symbol("@doc") :x)""" - "@doc x\nend" => """(macrocall Symbol("@doc") :x)""" + "@doc x\ny" => """(macrocall @doc x y)""" + "A.@doc x\ny" => """(macrocall (. A (quote @doc)) x y)""" + "@A.doc x\ny" => """(macrocall (. A (quote @doc)) x y)""" + "@doc x y\nz" => """(macrocall @doc x y)""" + "@doc x\n\ny" => """(macrocall @doc x)""" + "@doc x\nend" => """(macrocall @doc x)""" + # .' discontinued + "f.'" => "f (error-t . ')" # Allow `@` in macrocall only in first and last position - "A.B.@x" => """(macrocall (. (. :A (quote :B)) (quote Symbol("@x"))))""" - "@A.B.x" => """(macrocall (. (. :A (quote :B)) (quote Symbol("@x"))))""" - "A.@B.x" => """(macrocall (. (. :A (quote :B)) (error) (quote Symbol("@x"))))""" - "A.@. y" => """(macrocall (. :A (quote Symbol("@__dot__"))) :y)""" - "a().@x(y)" => """(macrocall (error (. (call :a) (quote :x))) :y)""" - "a().@x y" => """(macrocall (error (. (call :a) (quote :x))) :y)""" - "a().@x{y}" => """(macrocall (error (. (call :a) (quote :x))) (braces :y))""" + "A.B.@x" => """(macrocall (. (. A (quote B)) (quote @x)))""" + "@A.B.x" => """(macrocall (. (. A (quote B)) (quote @x)))""" + "A.@B.x" => """(macrocall (. (. A (quote B)) (error-t) (quote @x)))""" + "A.@. y" => """(macrocall (. A (quote @__dot__)) y)""" + "a().@x(y)" => """(macrocall (error (. (call a) (quote x))) y)""" + "a().@x y" => """(macrocall (error (. (call a) (quote x))) y)""" + "a().@x{y}" => """(macrocall (error (. (call a) (quote x))) (braces y))""" # array indexing, typed comprehension, etc - "a[i]" => "(ref :a :i)" - "a[i,j]" => "(ref :a :i :j)" - "T[x for x in xs]" => "(typed_comprehension :T (generator :x (= :x :xs)))" + "a[i]" => "(ref a i)" + "a[i,j]" => "(ref a i j)" + "T[x for x in xs]" => "(typed_comprehension T (generator x (= x xs)))" # Keyword params always use kw inside tuple in dot calls - "f.(a,b)" => "(. :f (tuple :a :b))" - "f.(a=1)" => "(. :f (tuple (kw :a 1)))" + "f.(a,b)" => "(. f (tuple a b))" + "f.(a=1)" => "(. f (tuple (kw a 1)))" # Other dotted syntax - "A.:+" => "(. :A (quote :+))" - "f.\$x" => "(. :f (inert (\$ :x)))" - "f.\$(x+y)" => "(. :f (inert (\$ (call :+ :x :y))))" - # .' discontinued - "f.'" => ":f (error :. Symbol(\"'\"))" + "A.:+" => "(. A (quote +))" + "f.\$x" => "(. f (inert (\$ x)))" + "f.\$(x+y)" => "(. f (inert (\$ (call-i x + y))))" # Field/property syntax - "f.x.y" => "(. (. :f (quote :x)) (quote :y))" + "f.x.y" => "(. (. f (quote x)) (quote y))" # Adjoint - "f'" => "(' :f)" - "f'ᵀ" => "(call Symbol(\"'ᵀ\") :f)" + "f'" => "(' f)" + "f'ᵀ" => "(call-i f 'ᵀ)" # Curly calls - "@S{a,b}" => """(macrocall Symbol("@S") (braces :a :b))""" - "S{a,b}" => "(curly :S :a :b)" + "@S{a,b}" => """(macrocall @S (braces a b))""" + "S{a,b}" => "(curly S a b)" # String macros - """x"str\"""" => """(macrocall Symbol("@x_str") "str")""" - """x`str`""" => """(macrocall Symbol("@x_cmd") "str")""" + """x"str\"""" => """(macrocall @x_str "str")""" + """x`str`""" => """(macrocall @x_cmd "str")""" # Macro sufficies can include keywords and numbers - "x\"s\"y" => """(macrocall Symbol("@x_str") "s" "y")""" - "x\"s\"end" => """(macrocall Symbol("@x_str") "s" "end")""" - "x\"s\"2" => """(macrocall Symbol("@x_str") "s" 2)""" - "x\"s\"10.0" => """(macrocall Symbol("@x_str") "s" 10.0)""" + "x\"s\"y" => """(macrocall @x_str "s" "y")""" + "x\"s\"end" => """(macrocall @x_str "s" "end")""" + "x\"s\"2" => """(macrocall @x_str "s" 2)""" + "x\"s\"10.0" => """(macrocall @x_str "s" 10.0)""" ], JuliaSyntax.parse_resword => [ # block "begin end" => "(block)" - "begin a ; b end" => "(block :a :b)" - "begin\na\nb\nend" => "(block :a :b)" + "begin a ; b end" => "(block a b)" + "begin\na\nb\nend" => "(block a b)" # quote "quote end" => "(quote (block))" - "quote body end" => "(quote (block :body))" + "quote body end" => "(quote (block body))" # while - "while cond body end" => "(while :cond (block :body))" - """ - while x < y - a - b - end""" => "(while (call :< :x :y) (block :a :b))" + "while cond body end" => "(while cond (block body))" + "while x < y \n a \n b \n end" => "(while (call-i x < y) (block a b))" # for - "for x in xs end" => "(for (= :x :xs) (block))" - """ - for x in xs, y in ys - a - b - end""" => "(for (block (= :x :xs) (= :y :ys)) (block :a :b))" + "for x in xs end" => "(for (= x xs) (block))" + "for x in xs, y in ys \n a \n end" => "(for (block (= x xs) (= y ys)) (block a))" # let - "let x=1\n end" => "(let (= :x 1) (block))" - "let x ; end" => "(let :x (block))" - "let x=1 ; end" => "(let (= :x 1) (block))" - "let x::1 ; end" => "(let (:: :x 1) (block))" - "let x=1,y=2 end" => "(let (block (= :x 1) (= :y 2)) (block))" - "let x+=1 ; end" => "(let (block (+= :x 1)) (block))" + "let x=1\n end" => "(let (= x 1) (block))" + "let x ; end" => "(let x (block))" + "let x=1 ; end" => "(let (= x 1) (block))" + "let x::1 ; end" => "(let (:: x 1) (block))" + "let x=1,y=2 end" => "(let (block (= x 1) (= y 2)) (block))" + "let x+=1 ; end" => "(let (block (+= x 1)) (block))" "let ; end" => "(let (block) (block))" - "let ; body end" => "(let (block) (block :body))" - "let\na\nb\nend" => "(let (block) (block :a :b))" + "let ; body end" => "(let (block) (block body))" + "let\na\nb\nend" => "(let (block) (block a b))" # abstract type - "abstract type A end" => "(abstract :A)" - "abstract type \n\n A \n\n end" => "(abstract :A)" - "abstract type A <: B end" => "(abstract (<: :A :B))" - "abstract type A <: B{T,S} end" => "(abstract (<: :A (curly :B :T :S)))" - "abstract type A < B end" => "(abstract (call :< :A :B))" + "abstract type A end" => "(abstract A)" + "abstract type \n\n A \n\n end" => "(abstract A)" + "abstract type A <: B end" => "(abstract (<: A B))" + "abstract type A <: B{T,S} end" => "(abstract (<: A (curly B T S)))" + "abstract type A < B end" => "(abstract (call-i A < B))" # primitive type - "primitive type A 32 end" => "(primitive :A 32)" - "primitive type A <: B \n 8 \n end" => "(primitive (<: :A :B) 8)" + "primitive type A 32 end" => "(primitive A 32)" + "primitive type A <: B \n 8 \n end" => "(primitive (<: A B) 8)" # struct - "struct A <: B \n a::X \n end" => "(struct false (<: :A :B) (block (:: :a :X)))" - "mutable struct A end" => "(struct true :A (block))" - "struct A end" => "(struct false :A (block))" - "struct try end" => "(struct false (error :try) (block))" + "struct A <: B \n a::X \n end" => "(struct false (<: A B) (block (:: a X)))" + "mutable struct A end" => "(struct true A (block))" + "struct A end" => "(struct false A (block))" + "struct try end" => "(struct false (error try) (block))" # return "return\nx" => "(return nothing)" "return)" => "(return nothing)" - "return x" => "(return :x)" - "return x,y" => "(return (tuple :x :y))" + "return x" => "(return x)" + "return x,y" => "(return (tuple x y))" # module/baremodule - "module A end" => "(module true :A (block))" - "baremodule A end" => "(module false :A (block))" - "module do \n end" => "(module true (error :do) (block))" - "module \$A end" => "(module true (\$ :A) (block))" - "module A \n a \n b \n end" => "(module true :A (block :a :b))" + "module A end" => "(module true A (block))" + "baremodule A end" => "(module false A (block))" + "module do \n end" => "(module true (error do) (block))" + "module \$A end" => "(module true (\$ A) (block))" + "module A \n a \n b \n end" => "(module true A (block a b))" # export - "export @a" => "(export Symbol(\"@a\"))" - "export a, \n @b" => "(export :a Symbol(\"@b\"))" - "export a" => "(export :a)" - "export \n a" => "(export :a)" - "export \$a, \$(a*b)" => "(export (\$ :a) (\$ (call :* :a :b)))" + "export @a" => "(export @a)" + "export a, \n @b" => "(export a @b)" + "export a" => "(export a)" + "export \n a" => "(export a)" + "export \$a, \$(a*b)" => "(export (\$ a) (\$ (call-i a * b)))" # import ], JuliaSyntax.parse_if_elseif => [ - "if a xx elseif b yy else zz end" => "(if :a (block :xx) (elseif (block :b) (block :yy) (block :zz)))" + "if a xx elseif b yy else zz end" => "(if a (block xx) (elseif (block b) (block yy) (block zz)))" "if end" => "(if (error) (block))" "if \n end" => "(if (error) (block))" - "if a end" => "(if :a (block))" - "if a xx end" => "(if :a (block :xx))" - "if a \n\n xx \n\n end" => "(if :a (block :xx))" - "if a xx elseif b yy end" => "(if :a (block :xx) (elseif (block :b) (block :yy)))" - "if a xx else if b yy end" => "(if :a (block :xx) (error) (elseif (block :b) (block :yy)))" - "if a xx else yy end" => "(if :a (block :xx) (block :yy))" + "if a end" => "(if a (block))" + "if a xx end" => "(if a (block xx))" + "if a \n\n xx \n\n end" => "(if a (block xx))" + "if a xx elseif b yy end" => "(if a (block xx) (elseif (block b) (block yy)))" + "if a xx else if b yy end" => "(if a (block xx) (error-t) (elseif (block b) (block yy)))" + "if a xx else yy end" => "(if a (block xx) (block yy))" ], JuliaSyntax.parse_const_local_global => [ - "global x = 1" => "(global (= :x 1))" - "local x = 1" => "(local (= :x 1))" - "global const x = 1" => "(const (global (= :x 1)))" - "local const x = 1" => "(const (local (= :x 1)))" - "const x = 1" => "(const (= :x 1))" - "const x,y = 1,2" => "(const (= (tuple :x :y) (tuple 1 2)))" - "const global x = 1" => "(const (global (= :x 1)))" - "const local x = 1" => "(const (local (= :x 1)))" - "global x" => "(global :x)" - "local x" => "(local :x)" - "global x,y" => "(global :x :y)" - "const x" => "(const (error :x (error)))" + "global x = 1" => "(global (= x 1))" + "local x = 1" => "(local (= x 1))" + "global const x = 1" => "(const (global (= x 1)))" + "local const x = 1" => "(const (local (= x 1)))" + "const x = 1" => "(const (= x 1))" + "const x,y = 1,2" => "(const (= (tuple x y) (tuple 1 2)))" + "const global x = 1" => "(const (global (= x 1)))" + "const local x = 1" => "(const (local (= x 1)))" + "global x" => "(global x)" + "local x" => "(local x)" + "global x,y" => "(global x y)" + "const x" => "(const (error x (error)))" ], JuliaSyntax.parse_function => [ - "function (x) body end" => "(function (tuple :x) (block :body))" - "macro (x) end" => "(macro (error (tuple :x)) (block))" - "function (x,y) end" => "(function (tuple :x :y) (block))" - "function (x=1) end" => "(function (tuple (kw :x 1)) (block))" - "function (;x=1) end" => "(function (tuple (parameters (kw :x 1))) (block))" - "function begin() end" => "(function (call (error :begin)) (block))" - "macro begin() end" => "(macro (call (error :begin)) (block))" - "function f() end" => "(function (call :f) (block))" - "function \n f() end" => "(function (call :f) (block))" - "function \$f() end" => "(function (call (\$ :f)) (block))" - "function f()::T end" => "(function (:: (call :f) :T) (block))" - "function f()::g(T) end" => "(function (:: (call :f) (call :g :T)) (block))" - "function f() \n a \n b end" => "(function (call :f) (block :a :b))" - "function f() end" => "(function (call :f) (block))" + "function (x) body end" => "(function (tuple x) (block body))" + "macro (x) end" => "(macro (error (tuple x)) (block))" + "function (x,y) end" => "(function (tuple x y) (block))" + "function (x=1) end" => "(function (tuple (kw x 1)) (block))" + "function (;x=1) end" => "(function (tuple (parameters (kw x 1))) (block))" + "function begin() end" => "(function (call (error begin)) (block))" + "macro begin() end" => "(macro (call (error begin)) (block))" + "function f() end" => "(function (call f) (block))" + "function \n f() end" => "(function (call f) (block))" + "function \$f() end" => "(function (call (\$ f)) (block))" + "function f()::T end" => "(function (:: (call f) T) (block))" + "function f()::g(T) end" => "(function (:: (call f) (call g T)) (block))" + "function f() \n a \n b end" => "(function (call f) (block a b))" + "function f() end" => "(function (call f) (block))" ], JuliaSyntax.parse_try => [ "try \n x \n catch e \n y \n finally \n z end" => - "(try (block :x) :e (block :y) false (block :z))" + "(try (block x) e (block y) false (block z))" ((v=v"1.8",), "try \n x \n catch e \n y \n else z finally \n w end") => - "(try (block :x) :e (block :y) (block :z) (block :w))" - "try x catch ; y end" => "(try (block :x) false (block :y) false false)" - "try x catch \n y end" => "(try (block :x) false (block :y) false false)" - "try x catch e y end" => "(try (block :x) :e (block :y) false false)" - "try x finally y end" => "(try (block :x) false false false (block :y))" + "(try (block x) e (block y) (block z) (block w))" + "try x catch ; y end" => "(try (block x) false (block y) false false)" + "try x catch \n y end" => "(try (block x) false (block y) false false)" + "try x catch e y end" => "(try (block x) e (block y) false false)" + "try x finally y end" => "(try (block x) false false false (block y))" # v1.8 only ((v=v"1.8",), "try catch ; else end") => "(try (block) false (block) (block) false)" ((v=v"1.8",), "try else end") => "(try (block) false false (error (block)) false)" ((v=v"1.7",), "try catch ; else end") => "(try (block) false (block) (error (block)) false)" # finally before catch :-( - "try x finally y catch e z end" => "(try (block :x) false false false (block :y) :e (block :z))" + "try x finally y catch e z end" => "(try-f (block x) false false false (block y) e (block z))" ], JuliaSyntax.parse_iteration_spec => [ - "i = rhs" => "(= :i :rhs)" - "i in rhs" => "(= :i :rhs)" - "i ∈ rhs" => "(= :i :rhs)" - "i = 1:10" => "(= :i (call :(:) 1 10))" - "(i,j) in iter" => "(= (tuple :i :j) :iter)" + "i = rhs" => "(= i rhs)" + "i in rhs" => "(= i rhs)" + "i ∈ rhs" => "(= i rhs)" + "i = 1:10" => "(= i (call-i 1 : 10))" + "(i,j) in iter" => "(= (tuple i j) iter)" ], JuliaSyntax.parse_paren => [ # Parentheses used for grouping - "(a * b)" => "(call :* :a :b)" - "(a=1)" => "(= :a 1)" - "(x)" => ":x" + "(a * b)" => "(call-i a * b)" + "(a=1)" => "(= a 1)" + "(x)" => "x" # Tuple syntax with commas "()" => "(tuple)" - "(x,)" => "(tuple :x)" - "(x,y)" => "(tuple :x :y)" - "(x=1, y=2)" => "(tuple (= :x 1) (= :y 2))" + "(x,)" => "(tuple x)" + "(x,y)" => "(tuple x y)" + "(x=1, y=2)" => "(tuple (= x 1) (= y 2))" # Named tuples with initial semicolon "(;)" => "(tuple (parameters))" - "(; a=1)" => "(tuple (parameters (kw :a 1)))" + "(; a=1)" => "(tuple (parameters (kw a 1)))" # Extra credit: nested parameters and frankentuples - "(; a=1; b=2)" => "(tuple (parameters (kw :a 1) (parameters (kw :b 2))))" - "(a; b; c,d)" => "(tuple :a (parameters :b (parameters :c :d)))" - "(a=1, b=2; c=3)" => "(tuple (= :a 1) (= :b 2) (parameters (kw :c 3)))" + "(; a=1; b=2)" => "(tuple (parameters (kw a 1) (parameters (kw b 2))))" + "(a; b; c,d)" => "(tuple a (parameters b (parameters c d)))" + "(a=1, b=2; c=3)" => "(tuple (= a 1) (= b 2) (parameters (kw c 3)))" # Block syntax "(;;)" => "(block)" - "(a=1;)" => "(block (= :a 1))" - "(a;b;;c)" => "(block :a :b :c)" - "(a=1; b=2)" => "(block (= :a 1) (= :b 2))" + "(a=1;)" => "(block (= a 1))" + "(a;b;;c)" => "(block a b c)" + "(a=1; b=2)" => "(block (= a 1) (= b 2))" # Generators - "(x for x in xs)" => "(generator :x (= :x :xs))" + "(x for x in xs)" => "(generator x (= x xs))" ], JuliaSyntax.parse_atom => [ - ":foo" => "(quote :foo)" + ":foo" => "(quote foo)" # Literal colons - ":)" => ":(:)" - ": end" => ":(:)" + ":)" => ":" + ": end" => ":" # Special symbols quoted - ":end" => "(quote :end)" - ":(end)" => "(quote (error :end))" - ":<:" => "(quote :<:)" + ":end" => "(quote end)" + ":(end)" => "(quote (error end))" + ":<:" => "(quote <:)" # Macro names can be keywords - "@end x" => """(macrocall Symbol("@end") :x)""" + "@end x" => """(macrocall @end x)""" # __dot__ macro - "@. x y" => """(macrocall Symbol("@__dot__") :x :y)""" + "@. x y" => """(macrocall @__dot__ x y)""" # parse_cat "[]" => "(vect)" - "[x,]" => "(vect :x)" - "[x]" => "(vect :x)" - "[x \n ]" => "(vect :x)" - "[x \n\n ]" => "(vect :x)" + "[x,]" => "(vect x)" + "[x]" => "(vect x)" + "[x \n ]" => "(vect x)" + "[x \n\n ]" => "(vect x)" # parse_comprehension / parse_generator - "[x for x in xs]" => "(comprehension (generator :x (= :x :xs)))" - "[x \n\n for x in xs]" => "(comprehension (generator :x (= :x :xs)))" - "[(x)for x in xs]" => "(comprehension (generator :x (error) (= :x :xs)))" - "[xy for x in xs for y in ys]" => "(comprehension (flatten :xy (= :x :xs) (= :y :ys)))" + "[x for x in xs]" => "(comprehension (generator x (= x xs)))" + "[x \n\n for x in xs]" => "(comprehension (generator x (= x xs)))" + "[(x)for x in xs]" => "(comprehension (generator x (error) (= x xs)))" + "[xy for x in xs for y in ys]" => "(comprehension (flatten xy (= x xs) (= y ys)))" # parse_vect - "[x, y]" => "(vect :x :y)" - "[x, y]" => "(vect :x :y)" - "[x,y ; z]" => "(vect :x :y (parameters :z))" - "[x=1, y=2]" => "(vect (= :x 1) (= :y 2))" - "[x=1, ; y=2]" => "(vect (= :x 1) (parameters (= :y 2)))" + "[x, y]" => "(vect x y)" + "[x, y]" => "(vect x y)" + "[x,y ; z]" => "(vect x y (parameters z))" + "[x=1, y=2]" => "(vect (= x 1) (= y 2))" + "[x=1, ; y=2]" => "(vect (= x 1) (parameters (= y 2)))" # parse_paren - ":(=)" => "(quote :(=))" - ":(::)" => "(quote :(::))" + ":(=)" => "(quote =)" + ":(::)" => "(quote ::)" # Errors - ": foo" => "(quote (error) :foo)" + ": foo" => "(quote (error-t) foo)" ], JuliaSyntax.parse_atom => [ # parse_array # Normal matrix construction syntax - "[x y ; z w]" => "(vcat (row :x :y) (row :z :w))" - "[x y ; z w ; a b]" => "(vcat (row :x :y) (row :z :w) (row :a :b))" - "[x ; y ; z]" => "(vcat :x :y :z)" - "[x;]" => "(vcat :x)" - "[x y]" => "(hcat :x :y)" + "[x y ; z w]" => "(vcat (row x y) (row z w))" + "[x y ; z w ; a b]" => "(vcat (row x y) (row z w) (row a b))" + "[x ; y ; z]" => "(vcat x y z)" + "[x;]" => "(vcat x)" + "[x y]" => "(hcat x y)" # Mismatched rows - "[x y ; z]" => "(vcat (row :x :y) :z)" + "[x y ; z]" => "(vcat (row x y) z)" # Double semicolon with spaces allowed (only) for line continuation - "[x y ;;\n z w]" => "(hcat :x :y :z :w)" + "[x y ;;\n z w]" => "(hcat x y z w)" # "[x y ;; z w]" => "(hcat x y (error) z w)" # FIXME - # FIXME: S-expr printing issues with ncat - # # Single elements in rows - # "[x ; y ;; z ]" => "(ncat 2 (nrow 1 :x :y) :z)" - # "[x y ;;; z ]" => "(ncat 3 (row :x :y) :z)" - # # Higher dimensional ncat - # # Row major - # "[x y ; z w ;;; a b ; c d]" => - # "(ncat 3 (nrow 1 (row :x :y) (row :z :w)) (nrow 1 (row :a :b) (row :c :d)))" - # # Column major - # "[x ; y ;; z ; w ;;; a ; b ;; c ; d]" => - # "(ncat 3 (nrow 2 (nrow 1 :x :y) (nrow 1 :z :w)) (nrow 2 (nrow 1 :a :b) (nrow 1 :c :d)))" + # Single elements in rows + "[x ; y ;; z ]" => "(ncat-2 (nrow-1 x y) z)" + "[x y ;;; z ]" => "(ncat-3 (row x y) z)" + # Higher dimensional ncat + # Row major + "[x y ; z w ;;; a b ; c d]" => + "(ncat-3 (nrow-1 (row x y) (row z w)) (nrow-1 (row a b) (row c d)))" + # Column major + "[x ; y ;; z ; w ;;; a ; b ;; c ; d]" => + "(ncat-3 (nrow-2 (nrow-1 x y) (nrow-1 z w)) (nrow-2 (nrow-1 a b) (nrow-1 c d)))" ], JuliaSyntax.parse_docstring => [ - "\"doc\" foo" => "(macrocall :(Core.var\"@doc\") \"doc\" :foo)" + "\"doc\" foo" => "(macrocall :(Core.var\"@doc\") \"doc\" foo)" ], ] diff --git a/JuliaSyntax/test/runtests.jl b/JuliaSyntax/test/runtests.jl index deb5a70dd6993..19ac153b4d39d 100644 --- a/JuliaSyntax/test/runtests.jl +++ b/JuliaSyntax/test/runtests.jl @@ -6,10 +6,10 @@ using Base.Meta: @dump using JuliaSyntax: SourceFile using JuliaSyntax: GreenNode, SyntaxNode, - raw_flags, EMPTY_FLAGS, TRIVIA_FLAG, INFIX_FLAG, + flags, EMPTY_FLAGS, TRIVIA_FLAG, INFIX_FLAG, children, child, setchild!, SyntaxHead -using JuliaSyntax: Kind, @K_str, isliteral, iskeyword, isoperator +using JuliaSyntax: Kind, @K_str, is_literal, is_keyword, is_operator using JuliaSyntax: highlight using JuliaSyntax: ParseStream, peek, peek_token, @@ -20,12 +20,12 @@ using JuliaSyntax: ParseState # Shortcuts for defining raw syntax nodes # Trivia nodes -T(k, s) = GreenNode(SyntaxHead(k, raw_flags(trivia=true)), s, ) +T(k, s) = GreenNode(SyntaxHead(k, flags(trivia=true)), s, ) # Non-trivia nodes -N(k, s) = GreenNode(SyntaxHead(k, raw_flags()), s) -N(k, args::GreenNode...) = GreenNode(SyntaxHead(k, raw_flags()), args...) +N(k, s) = GreenNode(SyntaxHead(k, flags()), s) +N(k, args::GreenNode...) = GreenNode(SyntaxHead(k, flags()), args...) # Non-trivia, infix form -NI(k, args::GreenNode...) = GreenNode(SyntaxHead(k, raw_flags(infix=true)), args...) +NI(k, args::GreenNode...) = GreenNode(SyntaxHead(k, flags(infix=true)), args...) include("parse_stream.jl") include("parser.jl") diff --git a/JuliaSyntax/test/simple_parser.jl b/JuliaSyntax/test/simple_parser.jl index 9ca3f8c9b02d7..9e8c517b6d0c8 100644 --- a/JuliaSyntax/test/simple_parser.jl +++ b/JuliaSyntax/test/simple_parser.jl @@ -109,7 +109,7 @@ function parse_atom(st) bump_trivia(st, skip_newlines=true) mark = position(st) k = peek(st) - if k == K"Identifier" || isliteral(k) + if k == K"Identifier" || is_literal(k) bump(st) elseif k in (K"-", K"+") bump(st) From 2d414d4e1c6f93ec3429931275df41b75b2b1ec0 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Thu, 30 Dec 2021 13:29:36 +1000 Subject: [PATCH 0267/1109] Hooks to connect the JuliaSyntax parser to the Julia runtime --- JuliaSyntax/src/JuliaSyntax.jl | 2 +- JuliaSyntax/src/hooks.jl | 75 ++++++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+), 1 deletion(-) create mode 100644 JuliaSyntax/src/hooks.jl diff --git a/JuliaSyntax/src/JuliaSyntax.jl b/JuliaSyntax/src/JuliaSyntax.jl index 505e8adfd0feb..ff9829a951a18 100644 --- a/JuliaSyntax/src/JuliaSyntax.jl +++ b/JuliaSyntax/src/JuliaSyntax.jl @@ -19,6 +19,6 @@ include("parse_stream.jl") include("parser.jl") -# include("hooks.jl") +include("hooks.jl") end diff --git a/JuliaSyntax/src/hooks.jl b/JuliaSyntax/src/hooks.jl new file mode 100644 index 0000000000000..6451c269e0579 --- /dev/null +++ b/JuliaSyntax/src/hooks.jl @@ -0,0 +1,75 @@ +# Error type for displaying errors in the Julia REPL +struct ParseError <: Exception + code::String + stream::ParseStream +end + +function Base.showerror(io::IO, err::ParseError, bt; backtrace=false) + show_diagnostics(io, err.stream, err.code) +end +Base.display_error(io::IO, err::ParseError, bt) = Base.showerror(io, err, bt) + +function Base.showerror(io::IO, err::ParseError) + show_diagnostics(io, err.stream, err.code) +end + +# Adaptor for the API/ABI expected by the Julia runtime code. +function core_parser_hook(code, filename, offset, options) + try + if code isa Core.SimpleVector # May be passed in from C entry points + (ptr,len) = code + code = String(unsafe_wrap(Array, ptr, len)) + end + + code = code[offset+1:end] # FIXME!! + + stream = ParseStream(code) + if options === :atom + parse_atom(ParseState(stream)) + elseif options === :statement + parse_stmts(ParseState(stream)) + elseif options === :all + parse_all(stream) + end + + if !isempty(stream.diagnostics) + ex = Expr(:error, ParseError(code, stream)) + else + green_tree = build_tree(GreenNode, stream) + src = SourceFile(code; filename) + tree = SyntaxNode(src, green_tree) + ex = Expr(tree) + end + pos = offset + stream.next_byte-1 + + # Rewrap result in an svec for use by the C code + return Core.svec(ex, pos) + catch exc + @error("JuliaSyntax parser failed — falling back to flisp!", + exception=(exc,catch_backtrace()), + offset=offset, + code=code) + end + return Core.Compiler.fl_parse(code, filename, offset, options) +end + +""" +Connect the JuliaSyntax parser to the Julia runtime so that it replaces the +flisp parser for all parsing work. + +That is, JuliaSyntax will be used for `include()` `Meta.parse()`, the REPL, etc. +""" +function enable_in_core!() + # TODO: Use invoke_in_world to freeze the world age at the time this was enabled. + Base.eval(Core, :(_parse = $core_parser_hook)) + nothing +end + +""" +Revert to the flisp parser for all parsing work. +""" +function disable_in_core!() + Base.eval(Core, :(_parse = Core.Compiler.fl_parse)) + nothing +end + From 13eaa23f1e9e4e7780ca25260cae285d3de6860b Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Thu, 30 Dec 2021 13:40:22 +1000 Subject: [PATCH 0268/1109] Fix typed_ncat Expr conversion --- JuliaSyntax/src/syntax_tree.jl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index 2caf71c35f11e..5f3556cb1c228 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -420,6 +420,8 @@ function _to_expr(node::SyntaxNode) # For lack of a better place, the dimension argument to nrow/ncat # is stored in the flags pushfirst!(args, numeric_flags(flags(node))) + elseif head(node) == :typed_ncat + insert!(args, 2, numeric_flags(flags(node))) end if head(node) == :inert || (head(node) == :quote && length(args) == 1 && !(only(args) isa Expr)) From 6d491f2d786c8c7a9927312e22b7fa810cfe41f8 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Thu, 30 Dec 2021 21:44:51 +1000 Subject: [PATCH 0269/1109] Tokenize: Add as keyword for import/using --- JuliaSyntax/Tokenize/src/lexer.jl | 1 + JuliaSyntax/Tokenize/src/token_kinds.jl | 1 + 2 files changed, 2 insertions(+) diff --git a/JuliaSyntax/Tokenize/src/lexer.jl b/JuliaSyntax/Tokenize/src/lexer.jl index 46f4de1320db2..aba943a5c82f2 100644 --- a/JuliaSyntax/Tokenize/src/lexer.jl +++ b/JuliaSyntax/Tokenize/src/lexer.jl @@ -1073,6 +1073,7 @@ end kws = [ Tokens.ABSTRACT, +Tokens.AS, Tokens.BAREMODULE, Tokens.BEGIN, Tokens.BREAK, diff --git a/JuliaSyntax/Tokenize/src/token_kinds.jl b/JuliaSyntax/Tokenize/src/token_kinds.jl index 24d5aeebf6ea7..f9efb4c312d88 100644 --- a/JuliaSyntax/Tokenize/src/token_kinds.jl +++ b/JuliaSyntax/Tokenize/src/token_kinds.jl @@ -12,6 +12,7 @@ begin_keywords, KEYWORD, # general ABSTRACT, + AS, BAREMODULE, BEGIN, BREAK, From 31204ae4859c9288feaf562a970f31d7a6bd2140 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Thu, 30 Dec 2021 22:21:17 +1000 Subject: [PATCH 0270/1109] parse_imports: import / using statements --- JuliaSyntax/README.md | 6 + JuliaSyntax/src/parse_stream.jl | 22 ++-- JuliaSyntax/src/parser.jl | 191 ++++++++++++++++++++++++-------- JuliaSyntax/src/token_kinds.jl | 3 +- JuliaSyntax/test/parser.jl | 34 ++++++ 5 files changed, 199 insertions(+), 57 deletions(-) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index 2dcf7b9c4c483..d057f545675f6 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -552,3 +552,9 @@ xy * The `elseif` condition is always in a block but not the `if` condition. Presumably because of the need to add a line number node in the flisp parser `if a xx elseif b yy end ==> (if a (block xx) (elseif (block b) (block yy)))` + +* `import . .A` is allowed, and parsed the same as `import ..A` + +* `import A..` produces `(import (. A .))` which is arguably nonsensical, as `.` + can't be a normal identifier. + diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index d7603cd76fefe..0290ee0811462 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -347,19 +347,23 @@ function bump_glue(stream::ParseStream, kind, flags, num_tokens) end """ -Bump a token, splitting it into two pieces. +Bump a token, splitting it into several pieces Wow, this is a hack! It helps resolves the occasional lexing ambiguities. For -example whether .+ should be a single token or a composite (. +) +example +* Whether .+ should be a single token or a composite (. +) +* Whether ... is splatting (most of the time) or three . tokens in import paths """ -function bump_split(stream::ParseStream, num_bytes, kind1, flags1, kind2, flags2) +function bump_split(stream::ParseStream, split_spec...) tok = popfirst!(stream.lookahead) - push!(stream.ranges, TaggedRange(SyntaxHead(kind1, flags1), - first_byte(tok), first_byte(tok)+num_bytes-1, - lastindex(stream.ranges) + 1)) - push!(stream.ranges, TaggedRange(SyntaxHead(kind2, flags2), - first_byte(tok)+num_bytes, last_byte(tok), - lastindex(stream.ranges) + 1)) + fbyte = first_byte(tok) + for (i, (nbyte, kind, flags)) in enumerate(split_spec) + lbyte = i == length(split_spec) ? last_byte(tok) : fbyte + nbyte - 1 + push!(stream.ranges, TaggedRange(SyntaxHead(kind, flags), + fbyte, lbyte, + lastindex(stream.ranges) + 1)) + fbyte += nbyte + end # Returning position(stream) like the other bump* methods would be # ambiguous here; return nothing instead. nothing diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 0d4e22985ad15..5377ca9116592 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -870,9 +870,7 @@ function parse_unary_call(ps::ParseState) # .+ = ==> (. +) # .+) ==> (. +) bump_trivia(ps) - bump_split(ps, 1, - K".", TRIVIA_FLAG, - op_k, EMPTY_FLAGS) + bump_split(ps, (1, K".", TRIVIA_FLAG), (0, op_k, EMPTY_FLAGS)) emit(ps, mark, K".") else # return operator by itself, as in @@ -1085,21 +1083,6 @@ function parse_identifier_or_interpolate(ps::ParseState, outermost=true) end end -function parse_export_symbol(ps::ParseState) - bump_trivia(ps) - if peek(ps) == K"@" - # export @a ==> (export @a) - # export a, \n @b ==> (export a @b) - bump(ps, TRIVIA_FLAG) - parse_macro_name(ps, remap_kind=true) - else - # export a ==> (export a) - # export \n a ==> (export a) - # export $a, $(a*b) ==> (export ($ a) ($ (call * a b))) - parse_identifier_or_interpolate(ps) - end -end - # Emit an error if the call chain syntax is not a valid module reference function emit_modref_error(ps, mark) emit(ps, mark, K"error", error="not a valid module reference") @@ -1548,10 +1531,10 @@ function parse_resword(ps::ParseState) # export a # export a, b, bump(ps, TRIVIA_FLAG) - parse_comma_separated(ps, parse_export_symbol) + parse_comma_separated(ps, parse_atsym) emit(ps, mark, K"export") elseif word in (K"import", K"using") - TODO("parse_resword - $word") + parse_imports(ps) elseif word == K"do" bump(ps, TRIVIA_FLAG, error="invalid `do` syntax") else @@ -1846,11 +1829,6 @@ function parse_do(ps::ParseState) emit(ps, mark, K"->") end -# flisp: parse-imports -function parse_imports(ps::ParseState, word) - TODO("parse_imports unimplemented") -end - function macro_name_kind(k) return k == K"Identifier" ? K"MacroName" : k == K"." ? K"@." : @@ -1875,24 +1853,154 @@ function parse_macro_name(ps::ParseState; remap_kind=false) end end +# Parse an identifier, interpolation of @-prefixed symbol +# # flisp: parse-atsym function parse_atsym(ps::ParseState) - TODO("parse_atsym unimplemented") + bump_trivia(ps) + if peek(ps) == K"@" + # export @a ==> (export @a) + # export a, \n @b ==> (export a @b) + bump(ps, TRIVIA_FLAG) + parse_macro_name(ps, remap_kind=true) + else + # export a ==> (export a) + # export \n a ==> (export a) + # export $a, $(a*b) ==> (export ($ a) ($ (call * a b))) + parse_identifier_or_interpolate(ps) + end end -# flisp: parse-import-dots -function parse_import_dots(ps::ParseState) - TODO("parse_import_dots unimplemented") +# Parse import and using syntax +# +# flisp: parse-imports +function parse_imports(ps::ParseState) + mark = position(ps) + word = peek(ps) + @assert word in (K"import", K"using") + bump(ps, TRIVIA_FLAG) + emark = position(ps) + initial_as = parse_import(ps, word, false) + t = peek_token(ps) + k = kind(t) + has_import_prefix = false # true if we have `prefix:` in `import prefix: stuff` + has_comma = false + if k == K":" && !t.had_whitespace + bump(ps, TRIVIA_FLAG) + has_import_prefix = true + if initial_as + # import A as B: x ==> (import (: (error (as (. A) B)) (. x))) + emit(ps, emark, K"error", error="`as` before `:` in import/using") + end + elseif k == K"," + bump(ps, TRIVIA_FLAG) + has_comma = true + end + if has_import_prefix || has_comma + # import x, y ==> (import (. x) (. y)) + # import A: x, y ==> (import (: (. A) (. x) (. y))) + parse_comma_separated(ps, ps1->parse_import(ps1, word, has_import_prefix)) + if peek(ps) == K":" + # Error recovery + # import A: x, B: y ==> (import (: (. A) (. x) (. B) (error-t (. y)))) + emark = position(ps) + bump(ps, TRIVIA_FLAG) + parse_comma_separated(ps, ps1->parse_import(ps1, word, has_import_prefix)) + emit(ps, emark, K"error", TRIVIA_FLAG, + error="`:` can only be used when importing a single module. Split imports into multiple lines") + end + end + if has_import_prefix + # import A: x ==> (import (: (. A) (. x))) + emit(ps, mark, K":") + end + # using A ==> (using (. A)) + # import A ==> (import (. A)) + emit(ps, mark, word) end -# flisp: parse-import-path -function parse_import_path(ps::ParseState, word) - TODO("parse_import_path unimplemented") +# Parse individual module path and renaming with `as` +# +# flisp: parse-import +function parse_import(ps::ParseState, word, has_import_prefix) + mark = position(ps) + parse_import_path(ps) + # import A: x, y ==> (import (: (. A) (. x) (. y))) + if peek(ps) == K"as" + # import A as B ==> (import (as (. A) B)) + # import A: x as y ==> (import (: (. A) (as (. x) y))) + # using A: x as y ==> (using (: (. A) (as (. x) y))) + bump(ps, TRIVIA_FLAG) + parse_atsym(ps) + emit(ps, mark, K"as") + if ps.julia_version < v"1.6" + #v1.5: import A as B ==> (import (error (as (. A) B))) + emit(ps, mark, K"error", + error="`import` with renaming using `as` requires at least Julia 1.6") + elseif word == K"using" && !has_import_prefix + # using A as B ==> (using (error (as (. A) B))) + # using A, B as C ==> (using (. A) (error (as (. B) C))) + emit(ps, mark, K"error", + error="`using` with `as` renaming requires a `:` and context module") + end + return true + else + return false + end end -# flisp: parse-import -function parse_import(ps::ParseState, word, from) - TODO("parse_import unimplemented") +# flisp: parse-import-path +function parse_import_path(ps::ParseState) + mark = position(ps) + bump_trivia(ps) + # The tokenizer produces conjoined dotted tokens .. and ... + # When parsing import we must split these into single dots + # import .A ==> (import (. . A)) + # import ..A ==> (import (. . . A)) + # import ...A ==> (import (. . . . A)) + # import ....A ==> (import (. . . . . A)) + # Dots with spaces are allowed (a misfeature?) + # import . .A ==> (import (. . . A)) + while true + k = peek(ps) + if k == K"." + bump(ps) + elseif k == K".." + bump_split(ps, (1,K".",EMPTY_FLAGS), (1,K".",EMPTY_FLAGS)) + elseif k == K"..." + bump_split(ps, (1,K".",EMPTY_FLAGS), (1,K".",EMPTY_FLAGS), (1,K".",EMPTY_FLAGS)) + else + break + end + end + # import @x ==> (import (. @x)) + # import $A ==> (import (. ($ A))) + parse_atsym(ps) + while true + k = peek(ps) + if k == K"." + # import A.B ==> (import (. A B)) + # import $A.@x ==> (import (. ($ A) @x)) + # import A.B.C ==> (import (. A B C)) + bump_disallowed_space(ps) + bump(ps, TRIVIA_FLAG) + parse_atsym(ps) + elseif k in (K"NewlineWs", K";", K",", K":", K"EndMarker") + # import A; B ==> (import (. A)) + break + elseif k == K".." + # Nonsensical?? + # import A.. ==> (import (. A .)) + bump_split(ps, (1,K".",TRIVIA_FLAG), (1,K".",EMPTY_FLAGS)) + elseif k == K"..." + # Import the .. operator + # import A... ==> (import (. A ..)) + bump_split(ps, (1,K".",TRIVIA_FLAG), (2,K"..",EMPTY_FLAGS)) + else + break + end + end + emit(ps, mark, K".") end # parse comma-separated assignments, like "i=1:n,j=1:m,..." @@ -1912,11 +2020,6 @@ function parse_comma_separated(ps::ParseState, down) return n_subexprs end -# flisp: parse-comma-separated-assignments -function parse_comma_separated_assignments(ps::ParseState) - TODO("parse_comma_separated_assignments unimplemented") -end - # FIXME(sschaub): for backwards compatibility, allows newline before =/in/∈ # in generator expressions. See issue #37393 function peek_skip_newline_in_gen(ps::ParseState, n=1) @@ -1963,12 +2066,6 @@ function parse_iteration_spec(ps::ParseState) emit(ps, mark, K"=") end -# flisp: parse-comma-separated-iters -function parse_comma_separated_iters(ps::ParseState) - # FIXME REmove? - parse_comma_separated(ps, parse_iteration_spec) -end - # flisp: parse-space-separated-exprs function parse_space_separated_exprs(ps::ParseState) with_space_sensitive(ps) do ps @@ -2050,7 +2147,7 @@ function parse_generator(ps::ParseState, mark, flatten=false) @assert kind(t) == K"for" bump(ps, TRIVIA_FLAG) filter_mark = position(ps) - parse_comma_separated_iters(ps) + parse_comma_separated(ps, parse_iteration_spec) if peek(ps) == K"if" bump(ps, TRIVIA_FLAG) parse_cond(ps) diff --git a/JuliaSyntax/src/token_kinds.jl b/JuliaSyntax/src/token_kinds.jl index e5e13a747449a..9c660e1e101eb 100644 --- a/JuliaSyntax/src/token_kinds.jl +++ b/JuliaSyntax/src/token_kinds.jl @@ -15,6 +15,7 @@ Dict([ "BEGIN_KEYWORDS" => Ts.begin_keywords "Keyword" => Ts.KEYWORD "abstract" => Ts.ABSTRACT +"as" => Ts.AS "baremodule" => Ts.BAREMODULE "begin" => Ts.BEGIN "break" => Ts.BREAK @@ -877,7 +878,7 @@ const _kind_to_str_unique = for c in "([{}])@,;" _kind_to_str_unique[_str_to_kind[string(c)]] = string(c) end -for kw in split("""abstract baremodule begin break catch const +for kw in split("""as abstract baremodule begin break catch const continue do else elseif end export finally for function global if import let local macro module mutable new outer primitive quote diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 9e288309a25e7..399a80ef326d4 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -371,6 +371,40 @@ tests = [ # finally before catch :-( "try x finally y catch e z end" => "(try-f (block x) false false false (block y) e (block z))" ], + JuliaSyntax.parse_imports => [ + "import A as B: x" => "(import (: (error (as (. A) B)) (. x)))" + "import x, y" => "(import (. x) (. y))" + "import A: x, y" => "(import (: (. A) (. x) (. y)))" + "import A: x, B: y" => "(import (: (. A) (. x) (. B) (error-t (. y))))" + "import A: x" => "(import (: (. A) (. x)))" + "using A" => "(using (. A))" + "import A" => "(import (. A))" + # parse_import + "import A: x, y" => "(import (: (. A) (. x) (. y)))" + "import A as B" => "(import (as (. A) B))" + "import A: x as y" => "(import (: (. A) (as (. x) y)))" + "using A: x as y" => "(using (: (. A) (as (. x) y)))" + ((v=v"1.5",), "import A as B") => "(import (error (as (. A) B)))" + "using A as B" => "(using (error (as (. A) B)))" + "using A, B as C" => "(using (. A) (error (as (. B) C)))" + # parse_import_path + # When parsing import we must split these into single dots + "import .A" => "(import (. . A))" + "import ..A" => "(import (. . . A))" + "import ...A" => "(import (. . . . A))" + "import ....A" => "(import (. . . . . A))" + # Dots with spaces are allowed (a misfeature?) + "import . .A" => "(import (. . . A))" + # Expressions allowed in import paths + "import @x" => "(import (. @x))" + "import \$A" => "(import (. (\$ A)))" + "import \$A.@x" => "(import (. (\$ A) @x))" + "import A.B" => "(import (. A B))" + "import A.B.C" => "(import (. A B C))" + "import A; B" => "(import (. A))" + "import A.." => "(import (. A .))" + "import A..." => "(import (. A ..))" + ], JuliaSyntax.parse_iteration_spec => [ "i = rhs" => "(= i rhs)" "i in rhs" => "(= i rhs)" From e352bb169610c5fc959a02cb0e15372a6dcc3ad9 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Fri, 31 Dec 2021 06:19:08 +1000 Subject: [PATCH 0271/1109] Ensure bump_glue / bump_split update the next byte index. This bug caused the output stream to get out of sync with the text, with all sorts of messy consequences. --- JuliaSyntax/src/parse_stream.jl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index 0290ee0811462..8b6b69cb9c280 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -343,6 +343,8 @@ function bump_glue(stream::ParseStream, kind, flags, num_tokens) lastindex(stream.ranges) + 1) Base._deletebeg!(stream.lookahead, num_tokens) push!(stream.ranges, span) + stream.next_byte = last_byte(last(stream.ranges)) + 1 + stream.peek_count = 0 return position(stream) end @@ -364,6 +366,8 @@ function bump_split(stream::ParseStream, split_spec...) lastindex(stream.ranges) + 1)) fbyte += nbyte end + stream.next_byte = last_byte(last(stream.ranges)) + 1 + stream.peek_count = 0 # Returning position(stream) like the other bump* methods would be # ambiguous here; return nothing instead. nothing From c1864b15f45e45f8c8c0bc58d5e04f4639c71107 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Sat, 1 Jan 2022 08:59:34 +1000 Subject: [PATCH 0272/1109] Tokenize: Lex raw strings --- JuliaSyntax/Tokenize/src/lexer.jl | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/JuliaSyntax/Tokenize/src/lexer.jl b/JuliaSyntax/Tokenize/src/lexer.jl index aba943a5c82f2..7feaff4b5f7d6 100644 --- a/JuliaSyntax/Tokenize/src/lexer.jl +++ b/JuliaSyntax/Tokenize/src/lexer.jl @@ -804,7 +804,7 @@ end # Lex var"..." identifiers. # The prefix `var"` has been consumed function lex_var(l::Lexer) - if read_string(l, Tokens.STRING) + if read_raw_string(l, Tokens.STRING) return emit(l, Tokens.VAR_IDENTIFIER) else return emit_error(l, Tokens.EOF_VAR) @@ -828,8 +828,37 @@ function string_terminated(l, kind::Tokens.Kind) return false end +# Read a raw string for use with custom string macros +# +# Raw strings treat all characters as literals with the exception that the +# closing quotes can be escaped with an odd number of \ characters. +function read_raw_string(l::Lexer, kind::Tokens.Kind) + delim = kind == Tokens.STRING || kind == Tokens.TRIPLE_STRING ? '"' : '`' + while true + c = readchar(l) + if c == '\\' + n = 0 + while c == '\\' + n += 1 + c = readchar(l) + end + if c == delim && !iseven(n) + c = readchar(l) + end + end + if string_terminated(l, kind) + return true + elseif eof(c) + return false + end + end +end + # We just consumed a ", """, `, or ``` function read_string(l::Lexer, kind::Tokens.Kind) + if l.last_token == Tokens.IDENTIFIER || l.last_token == Tokens.KEYWORD + return read_raw_string(l, kind) + end while true c = readchar(l) if c == '\\' From fafeecb08de81040649e05373584a5ce4af8df7a Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Sun, 2 Jan 2022 21:20:17 +1000 Subject: [PATCH 0273/1109] Adapt Tokenize tests to run from JuliaSyntax test suite --- JuliaSyntax/Tokenize/test/lexer.jl | 6 +++--- JuliaSyntax/Tokenize/test/runtests.jl | 5 +++-- JuliaSyntax/test/runtests.jl | 4 ++++ 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/JuliaSyntax/Tokenize/test/lexer.jl b/JuliaSyntax/Tokenize/test/lexer.jl index 957cad69ee9f3..4a2d929526667 100644 --- a/JuliaSyntax/Tokenize/test/lexer.jl +++ b/JuliaSyntax/Tokenize/test/lexer.jl @@ -1,5 +1,5 @@ -using Tokenize -using Tokenize.Lexers +using JuliaSyntax.Tokenize +using JuliaSyntax.Tokenize.Lexers using Test const T = Tokenize.Tokens @@ -485,7 +485,7 @@ end @testset "dotted and suffixed operators" begin -ops = collect(values(Main.Tokenize.Tokens.UNICODE_OPS_REVERSE)) +ops = collect(values(Tokenize.Tokens.UNICODE_OPS_REVERSE)) for op in ops op in (:isa, :in, :where, Symbol('\''), :?, :(:)) && continue diff --git a/JuliaSyntax/Tokenize/test/runtests.jl b/JuliaSyntax/Tokenize/test/runtests.jl index 77fc777d005a0..9c8267640f97a 100644 --- a/JuliaSyntax/Tokenize/test/runtests.jl +++ b/JuliaSyntax/Tokenize/test/runtests.jl @@ -1,8 +1,9 @@ using Test, Printf -import Tokenize +import JuliaSyntax.Tokenize -include("lex_yourself.jl") +# Takes 10s to run +# include("lex_yourself.jl") @testset "lexer" begin include("lexer.jl") end diff --git a/JuliaSyntax/test/runtests.jl b/JuliaSyntax/test/runtests.jl index 19ac153b4d39d..0a76994b32567 100644 --- a/JuliaSyntax/test/runtests.jl +++ b/JuliaSyntax/test/runtests.jl @@ -27,6 +27,10 @@ N(k, args::GreenNode...) = GreenNode(SyntaxHead(k, flags()), args...) # Non-trivia, infix form NI(k, args::GreenNode...) = GreenNode(SyntaxHead(k, flags(infix=true)), args...) +module TokenizeTests + include("../Tokenize/test/runtests.jl") +end + include("parse_stream.jl") include("parser.jl") From 57bb9a6c9fc423362f2d38ae5b92e7aa75eafe5e Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Sun, 2 Jan 2022 21:51:28 +1000 Subject: [PATCH 0274/1109] Tokenize: Rework string interpolations to emit source tokens This change completely reworks how tokenization of strings happens so that expressions within string interpolations are correctly tokenized as source tokens rather than being part of the surrounding string. Quotes and backticks are now emitted separately from the string that they delimit - * " and """ are DQUOTE and TRIPLE_DQUOTE * ` and ``` are BACKTICK and TRIPLE_BACKTICK * The enclosed string fragment is STRING (regardless of the delimiters that it's enclosed within) Raw strings (those within custom string macros or backticks) are parsed according to the Julia parser's rules. --- JuliaSyntax/Tokenize/src/_precompile.jl | 2 - JuliaSyntax/Tokenize/src/lexer.jl | 297 ++++++++++++--------- JuliaSyntax/Tokenize/src/token.jl | 18 +- JuliaSyntax/Tokenize/src/token_kinds.jl | 8 +- JuliaSyntax/Tokenize/test/lexer.jl | 329 ++++++++++++++---------- 5 files changed, 395 insertions(+), 259 deletions(-) diff --git a/JuliaSyntax/Tokenize/src/_precompile.jl b/JuliaSyntax/Tokenize/src/_precompile.jl index f5797cf011efa..fe4e7721d5669 100644 --- a/JuliaSyntax/Tokenize/src/_precompile.jl +++ b/JuliaSyntax/Tokenize/src/_precompile.jl @@ -68,7 +68,6 @@ function _precompile_() precompile(Tokenize.Lexers.lex_percent, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) precompile(Tokenize.Lexers.lex_comment, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) precompile(Tokenize.Lexers.lex_comment, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},Bool)) - precompile(Tokenize.Lexers.lex_cmd, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) precompile(Tokenize.Lexers.lex_division, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) precompile(Tokenize.Lexers.lex_circumflex, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) precompile(Tokenize.Lexers.lex_backslash, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) @@ -76,7 +75,6 @@ function _precompile_() precompile(Tokenize.Lexers.lex_amper, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) precompile(Tokenize.Lexers.lex_whitespace, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) - precompile(Tokenize.Lexers.read_string, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, Tokenize.Tokens.Kind,)) precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, Char,)) precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, String,)) diff --git a/JuliaSyntax/Tokenize/src/lexer.jl b/JuliaSyntax/Tokenize/src/lexer.jl index 7feaff4b5f7d6..a7843afca4d1a 100644 --- a/JuliaSyntax/Tokenize/src/lexer.jl +++ b/JuliaSyntax/Tokenize/src/lexer.jl @@ -24,6 +24,21 @@ export tokenize @inline isoctal(c::Char) = '0' ≤ c ≤ '7' @inline iswhitespace(c::Char) = Base.isspace(c) +struct StringState + triplestr::Bool + raw::Bool + delim::Char + paren_depth::Int +end + +""" +`Lexer` reads from an input stream and emits a single token each time +`next_token` is called. + +Ideally a lexer is stateless but some state is needed here for: +* Disambiguating cases like x' (adjoint) vs 'x' (character literal) +* Tokenizing code within string interpolations +""" mutable struct Lexer{IO_t <: IO, T <: AbstractToken} io::IO_t io_startpos::Int @@ -37,9 +52,10 @@ mutable struct Lexer{IO_t <: IO, T <: AbstractToken} current_pos::Int last_token::Tokens.Kind + string_states::Vector{StringState} charstore::IOBuffer - chars::Tuple{Char,Char,Char} - charspos::Tuple{Int,Int,Int} + chars::Tuple{Char,Char,Char,Char} + charspos::Tuple{Int,Int,Int,Int} doread::Bool dotop::Bool end @@ -50,18 +66,27 @@ function Lexer(io::IO_t, T::Type{TT} = Token) where {IO_t,TT <: AbstractToken} if eof(io) c2, p2 = EOF_CHAR, p1 c3, p3 = EOF_CHAR, p1 + c4, p4 = EOF_CHAR, p1 else c2 = read(io, Char) p2 = position(io) if eof(io) c3, p3 = EOF_CHAR, p1 + c4, p4 = EOF_CHAR, p1 else c3 = read(io, Char) p3 = position(io) + if eof(io) + c4, p4 = EOF_CHAR, p1 + else + c4 = read(io, Char) + p4 = position(io) + end end - end - Lexer{IO_t,T}(io, position(io), 1, 1, position(io), 1, 1, position(io), Tokens.ERROR, IOBuffer(), (c1,c2,c3), (p1,p2,p3), false, false) + Lexer{IO_t,T}(io, position(io), 1, 1, position(io), 1, 1, position(io), + Tokens.ERROR, Vector{StringState}(), IOBuffer(), + (c1,c2,c3,c4), (p1,p2,p3,p4), false, false) end Lexer(str::AbstractString, T::Type{TT} = Token) where TT <: AbstractToken = Lexer(IOBuffer(str), T) @@ -144,6 +169,13 @@ Returns the next two characters without changing the lexer's state. """ dpeekchar(l::Lexer) = l.chars[2], l.chars[3] +""" +peekchar3(l::Lexer) + +Returns the next three characters without changing the lexer's state. +""" +peekchar3(l::Lexer) = l.chars[2], l.chars[3], l.chars[4] + """ position(l::Lexer) @@ -181,8 +213,8 @@ function readchar end function readchar(l::Lexer{I}) where {I <: IO} c = readchar(l.io) - l.chars = (l.chars[2], l.chars[3], c) - l.charspos = (l.charspos[2], l.charspos[3], position(l.io)) + l.chars = (l.chars[2], l.chars[3], l.chars[4], c) + l.charspos = (l.charspos[2], l.charspos[3], l.charspos[4], position(l.io)) if l.doread write(l.charstore, l.chars[1]) end @@ -248,7 +280,7 @@ end Returns a `Token` of kind `kind` with contents `str` and starts a new `Token`. """ -function emit(l::Lexer{IO_t,Token}, kind::Kind, err::TokenError = Tokens.NO_ERR) where IO_t +function emit(l::Lexer{IO_t,Token}, kind::Kind, err::TokenError = Tokens.NO_ERR, triplestr::Bool=false) where IO_t suffix = false if kind in (Tokens.ERROR, Tokens.STRING, Tokens.TRIPLE_STRING, Tokens.CMD, Tokens.TRIPLE_CMD) str = String(l.io.data[(l.token_startpos + 1):position(l)]) @@ -266,14 +298,14 @@ function emit(l::Lexer{IO_t,Token}, kind::Kind, err::TokenError = Tokens.NO_ERR) tok = Token(kind, (l.token_start_row, l.token_start_col), (l.current_row, l.current_col - 1), startpos(l), position(l) - 1, - str, err, l.dotop, suffix) + str, err, l.dotop, suffix, triplestr) l.dotop = false l.last_token = kind readoff(l) return tok end -function emit(l::Lexer{IO_t,RawToken}, kind::Kind, err::TokenError = Tokens.NO_ERR) where IO_t +function emit(l::Lexer{IO_t,RawToken}, kind::Kind, err::TokenError = Tokens.NO_ERR, triplestr::Bool=false) where IO_t suffix = false if optakessuffix(kind) while isopsuffix(peekchar(l)) @@ -284,7 +316,7 @@ function emit(l::Lexer{IO_t,RawToken}, kind::Kind, err::TokenError = Tokens.NO_E tok = RawToken(kind, (l.token_start_row, l.token_start_col), (l.current_row, l.current_col - 1), - startpos(l), position(l) - 1, err, l.dotop, suffix) + startpos(l), position(l) - 1, err, l.dotop, suffix, triplestr) l.dotop = false l.last_token = kind @@ -309,7 +341,14 @@ Returns the next `Token`. """ function next_token(l::Lexer, start = true) start && start_token!(l) - c = readchar(l) + if !isempty(l.string_states) + lex_string_chunk(l) + else + _next_token(l, readchar(l)) + end +end + +function _next_token(l::Lexer, c) if eof(c) return emit(l, Tokens.ENDMARKER) elseif iswhitespace(c) @@ -379,9 +418,9 @@ function next_token(l::Lexer, start = true) elseif c == '-' return lex_minus(l); elseif c == '`' - return lex_cmd(l); + return lex_backtick(l); elseif is_identifier_start_char(c) - return lex_identifier(l, c) + return lex_identifier(l, c, true) elseif isdigit(c) return lex_digit(l, Tokens.INTEGER) elseif (k = get(UNICODE_OPS, c, Tokens.ERROR)) != Tokens.ERROR @@ -391,6 +430,81 @@ function next_token(l::Lexer, start = true) end end +# We're inside a string; possibly reading the string characters, or maybe in +# Julia code within an interpolation. +function lex_string_chunk(l) + state = last(l.string_states) + if state.paren_depth > 0 + # Read normal Julia code inside an interpolation but track nesting of + # parentheses. + c = readchar(l) + if c == '(' + l.string_states[end] = StringState(state.triplestr, state.raw, state.delim, + state.paren_depth + 1) + return emit(l, Tokens.LPAREN) + elseif c == ')' + l.string_states[end] = StringState(state.triplestr, state.raw, state.delim, + state.paren_depth - 1) + return emit(l, Tokens.RPAREN) + else + return _next_token(l, c) + end + elseif l.last_token == Tokens.EX_OR + pc = peekchar(l) + # Interpolated symbol or expression + if pc == '(' + readchar(l) + l.string_states[end] = StringState(state.triplestr, state.raw, state.delim, + state.paren_depth + 1) + return emit(l, Tokens.LPAREN) + elseif is_identifier_start_char(pc) + return lex_identifier(l, readchar(l), false) + else + # Getting here is a syntax error - fall through to reading string + # characters and let the parser deal with it. + end + end + pc = peekchar(l) + if eof(pc) + return emit(l, Tokens.ENDMARKER) + elseif !state.raw && pc == '$' + # Start interpolation + readchar(l) + return emit(l, Tokens.EX_OR) + elseif pc == state.delim && string_terminates(l, state.delim, state.triplestr) + # Terminate string + pop!(l.string_states) + readchar(l) + if state.triplestr + readchar(l); readchar(l) + return emit(l, state.delim == '"' ? + Tokens.TRIPLE_DQUOTE : Tokens.TRIPLE_BACKTICK) + else + return emit(l, state.delim == '"' ? Tokens.DQUOTE : Tokens.BACKTICK) + end + end + readon(l) + # Read a chunk of string characters + if state.raw + read_raw_string(l, state.delim, state.triplestr) + else + while true + pc = peekchar(l) + if pc == '$' || eof(pc) + break + elseif pc == state.delim && string_terminates(l, state.delim, state.triplestr) + break + end + c = readchar(l) + if c == '\\' + c = readchar(l) + eof(c) && break + continue + end + end + end + return emit(l, Tokens.STRING, Tokens.NO_ERR, state.triplestr) +end # Lex whitespace, a whitespace char `c` has been consumed function lex_whitespace(l::Lexer, c) @@ -780,60 +894,57 @@ end # Parse a token starting with a quote. # A '"' has been consumed -function lex_quote(l::Lexer, doemit=true) - readon(l) - if accept(l, '"') # "" - if accept(l, '"') # """ - if read_string(l, Tokens.TRIPLE_STRING) - return doemit ? emit(l, Tokens.TRIPLE_STRING) : EMPTY_TOKEN(token_type(l)) - else - return doemit ? emit_error(l, Tokens.EOF_STRING) : EMPTY_TOKEN(token_type(l)) - end - else # empty string - return doemit ? emit(l, Tokens.STRING) : EMPTY_TOKEN(token_type(l)) - end - else # "?, ? != '"' - if read_string(l, Tokens.STRING) - return doemit ? emit(l, Tokens.STRING) : EMPTY_TOKEN(token_type(l)) - else - return doemit ? emit_error(l, Tokens.EOF_STRING) : EMPTY_TOKEN(token_type(l)) - end +function lex_quote(l::Lexer) + raw = l.last_token == Tokens.IDENTIFIER || l.last_token == Tokens.KEYWORD + pc, dpc = dpeekchar(l) + triplestr = pc == '"' && dpc == '"' + push!(l.string_states, StringState(triplestr, raw, '"', 0)) + if triplestr + readchar(l) + readchar(l) + emit(l, Tokens.TRIPLE_DQUOTE) + else + emit(l, Tokens.DQUOTE) end end # Lex var"..." identifiers. # The prefix `var"` has been consumed function lex_var(l::Lexer) - if read_raw_string(l, Tokens.STRING) + read_raw_string(l, '"', false) + if readchar(l) == '"' return emit(l, Tokens.VAR_IDENTIFIER) else return emit_error(l, Tokens.EOF_VAR) end end -function string_terminated(l, kind::Tokens.Kind) - if kind == Tokens.STRING && l.chars[1] == '"' - return true - elseif kind == Tokens.TRIPLE_STRING && l.chars[1] == l.chars[2] == l.chars[3] == '"' - readchar(l) - readchar(l) - return true - elseif kind == Tokens.CMD && l.chars[1] == '`' - return true - elseif kind == Tokens.TRIPLE_CMD && l.chars[1] == l.chars[2] == l.chars[3] == '`' +function string_terminates(l, delim::Char, triplestr::Bool) + if triplestr + c1, c2, c3 = peekchar3(l) + c1 === delim && c2 === delim && c3 === delim + else + peekchar(l) === delim + end +end + +function terminate_string(l, delim::Char, triplestr::Bool) + # @assert string_terminates(l, delim, triplestr) + readchar(l) + if triplestr readchar(l) readchar(l) - return true + return delim == '"' ? Tokens.TRIPLE_DQUOTE : Tokens.TRIPLE_BACKTICK + else + return delim == '"' ? Tokens.DQUOTE : Tokens.BACKTICK end - return false end # Read a raw string for use with custom string macros # # Raw strings treat all characters as literals with the exception that the # closing quotes can be escaped with an odd number of \ characters. -function read_raw_string(l::Lexer, kind::Tokens.Kind) - delim = kind == Tokens.STRING || kind == Tokens.TRIPLE_STRING ? '"' : '`' +function read_raw_string(l::Lexer, delim::Char, triplestr::Bool) while true c = readchar(l) if c == '\\' @@ -846,62 +957,8 @@ function read_raw_string(l::Lexer, kind::Tokens.Kind) c = readchar(l) end end - if string_terminated(l, kind) - return true - elseif eof(c) - return false - end - end -end - -# We just consumed a ", """, `, or ``` -function read_string(l::Lexer, kind::Tokens.Kind) - if l.last_token == Tokens.IDENTIFIER || l.last_token == Tokens.KEYWORD - return read_raw_string(l, kind) - end - while true - c = readchar(l) - if c == '\\' - eof(readchar(l)) && return false - continue - end - if string_terminated(l, kind) - return true - elseif eof(c) - return false - end - if c == '$' - c = readchar(l) - if string_terminated(l, kind) - return true - elseif eof(c) - return false - elseif c == '(' - o = 1 - last_token = l.last_token - token_start_row = l.token_start_row - token_start_col = l.token_start_col - token_startpos = l.token_startpos - while o > 0 - t = next_token(l) - - if Tokens.kind(t) == Tokens.ENDMARKER - l.last_token = last_token - l.token_start_row = token_start_row - l.token_start_col = token_start_col - l.token_startpos = token_startpos - return false - elseif Tokens.kind(t) == Tokens.LPAREN - o += 1 - elseif Tokens.kind(t) == Tokens.RPAREN - o -= 1 - end - end - l.last_token = last_token - l.token_start_row = token_start_row - l.token_start_col = token_start_col - l.token_startpos = token_startpos - end + if string_terminates(l, delim, triplestr) || eof(c) + return end end end @@ -943,7 +1000,7 @@ function lex_dot(l::Lexer) pc, dpc = dpeekchar(l) if dotop1(pc) l.dotop = true - return next_token(l, false) + return _next_token(l, readchar(l)) elseif pc =='+' l.dotop = true readchar(l) @@ -1033,29 +1090,24 @@ function lex_dot(l::Lexer) end # A ` has been consumed -function lex_cmd(l::Lexer, doemit=true) - readon(l) - if accept(l, '`') # - if accept(l, '`') # """ - if read_string(l, Tokens.TRIPLE_CMD) - return doemit ? emit(l, Tokens.TRIPLE_CMD) : EMPTY_TOKEN(token_type(l)) - else - return doemit ? emit_error(l, Tokens.EOF_CMD) : EMPTY_TOKEN(token_type(l)) - end - else # empty cmd - return doemit ? emit(l, Tokens.CMD) : EMPTY_TOKEN(token_type(l)) - end +function lex_backtick(l::Lexer) + pc, dpc = dpeekchar(l) + triplestr = pc == '`' && dpc == '`' + # Backticks always contain raw strings only. See discussion on bug + # https://github.com/JuliaLang/julia/issues/3150 + raw = true + push!(l.string_states, StringState(triplestr, raw, '`', 0)) + if triplestr + readchar(l) + readchar(l) + emit(l, Tokens.TRIPLE_BACKTICK) else - if read_string(l, Tokens.CMD) - return doemit ? emit(l, Tokens.CMD) : EMPTY_TOKEN(token_type(l)) - else - return doemit ? emit_error(l, Tokens.EOF_CMD) : EMPTY_TOKEN(token_type(l)) - end + emit(l, Tokens.BACKTICK) end end const MAX_KW_LENGTH = 10 -function lex_identifier(l::Lexer{IO_t,T}, c) where {IO_t,T} +function lex_identifier(l::Lexer{IO_t,T}, c, allow_var) where {IO_t,T} if T == Token readon(l) end @@ -1074,7 +1126,8 @@ function lex_identifier(l::Lexer{IO_t,T}, c) where {IO_t,T} if n > MAX_KW_LENGTH emit(l, IDENTIFIER) else - if h == var_kw_hash && accept(l, '"') + # FIXME: var"" not allowed in strings + if allow_var && h == var_kw_hash && accept(l, '"') return lex_var(l) else return emit(l, get(kw_hash, h, IDENTIFIER)) diff --git a/JuliaSyntax/Tokenize/src/token.jl b/JuliaSyntax/Tokenize/src/token.jl index 0f39b6208aab2..f6e2fdd6220e6 100644 --- a/JuliaSyntax/Tokenize/src/token.jl +++ b/JuliaSyntax/Tokenize/src/token.jl @@ -61,12 +61,13 @@ struct Token <: AbstractToken token_error::TokenError dotop::Bool suffix::Bool + triplestr::Bool end function Token(kind::Kind, startposition::Tuple{Int, Int}, endposition::Tuple{Int, Int}, startbyte::Int, endbyte::Int, val::String) -Token(kind, startposition, endposition, startbyte, endbyte, val, NO_ERR, false, false) +Token(kind, startposition, endposition, startbyte, endbyte, val, NO_ERR, false, false, false) end -Token() = Token(ERROR, (0,0), (0,0), 0, 0, "", UNKNOWN, false, false) +Token() = Token(ERROR, (0,0), (0,0), 0, 0, "", UNKNOWN, false, false, false) struct RawToken <: AbstractToken kind::Kind @@ -78,12 +79,13 @@ struct RawToken <: AbstractToken token_error::TokenError dotop::Bool suffix::Bool + triplestr::Bool end function RawToken(kind::Kind, startposition::Tuple{Int, Int}, endposition::Tuple{Int, Int}, startbyte::Int, endbyte::Int) -RawToken(kind, startposition, endposition, startbyte, endbyte, NO_ERR, false, false) +RawToken(kind, startposition, endposition, startbyte, endbyte, NO_ERR, false, false, false) end -RawToken() = RawToken(ERROR, (0,0), (0,0), 0, 0, UNKNOWN, false, false) +RawToken() = RawToken(ERROR, (0,0), (0,0), 0, 0, UNKNOWN, false, false, false) const _EMPTY_TOKEN = Token() @@ -125,6 +127,14 @@ function untokenize(t::Token) return "]" elseif t.kind == RBRACE return "}" + elseif t.kind == DQUOTE + return "\"" + elseif t.kind == TRIPLE_DQUOTE + return "\"\"\"" + elseif t.kind == BACKTICK + return "`" + elseif t.kind == TRIPLE_BACKTICK + return "```" elseif t.kind == AT_SIGN return "@" elseif t.kind == COMMA diff --git a/JuliaSyntax/Tokenize/src/token_kinds.jl b/JuliaSyntax/Tokenize/src/token_kinds.jl index f9efb4c312d88..f68ed02547b50 100644 --- a/JuliaSyntax/Tokenize/src/token_kinds.jl +++ b/JuliaSyntax/Tokenize/src/token_kinds.jl @@ -63,7 +63,8 @@ HEX_INT, # 0x0 OCT_INT, # 0o0 FLOAT, # 3.5, 3.7e+3 - STRING, # "foo" + STRING, # "foo" (without the " delimiters) + # TODO: Remove this and TRIPLE_CMD; use flag? TRIPLE_STRING, # """ foo \n """ CHAR, # 'a' CMD, # `cmd ...` @@ -78,6 +79,10 @@ RBRACE, # } LPAREN, # ( RPAREN, # ) + DQUOTE, # " (double quote) + TRIPLE_DQUOTE, # """ + BACKTICK, # ` + TRIPLE_BACKTICK, # ``` end_delimiters, begin_ops, @@ -830,6 +835,7 @@ begin_parser_tokens, TOMBSTONE, # Empty placeholder for kind to be filled later NOTHING_LITERAL, # A literal Julia `nothing` in the AST + # FIXME: Remove UNQUOTED_STRING, # An unquoted range of the source as a string # Macro names are modelled as a special kind of identifier because the diff --git a/JuliaSyntax/Tokenize/test/lexer.jl b/JuliaSyntax/Tokenize/test/lexer.jl index 4a2d929526667..46eb47227edf9 100644 --- a/JuliaSyntax/Tokenize/test/lexer.jl +++ b/JuliaSyntax/Tokenize/test/lexer.jl @@ -77,49 +77,49 @@ end # testset T.OP,T.IDENTIFIER,T.RBRACE,T.LPAREN,T.IDENTIFIER,T.OP, T.LBRACE,T.IDENTIFIER,T.RBRACE,T.OP,T.INTEGER,T.RPAREN, - T.WHITESPACE,T.AT_SIGN,T.IDENTIFIER,T.WHITESPACE,T.LPAREN, + T.NEWLINE_WS,T.AT_SIGN,T.IDENTIFIER,T.WHITESPACE,T.LPAREN, T.IDENTIFIER,T.OP,T.IDENTIFIER,T.COMMA,T.WHITESPACE, T.IDENTIFIER,T.OP,T.IDENTIFIER,T.RPAREN,T.SEMICOLON, - T.WHITESPACE,T.KEYWORD, + T.NEWLINE_WS,T.KEYWORD, - T.WHITESPACE,T.KEYWORD, - T.WHITESPACE,T.IDENTIFIER, - T.WHITESPACE,T.KEYWORD, - T.WHITESPACE,T.IDENTIFIER, - T.WHITESPACE,T.KEYWORD, + T.NEWLINE_WS,T.KEYWORD, + T.NEWLINE_WS,T.IDENTIFIER, + T.NEWLINE_WS,T.KEYWORD, + T.NEWLINE_WS,T.IDENTIFIER, + T.NEWLINE_WS,T.KEYWORD, - T.WHITESPACE,T.AT_SIGN,T.IDENTIFIER,T.WHITESPACE,T.IDENTIFIER, + T.NEWLINE_WS,T.AT_SIGN,T.IDENTIFIER,T.WHITESPACE,T.IDENTIFIER, T.OP,T.IDENTIFIER, - T.WHITESPACE,T.IDENTIFIER,T.LSQUARE,T.LSQUARE,T.INTEGER,T.WHITESPACE, + T.NEWLINE_WS,T.IDENTIFIER,T.LSQUARE,T.LSQUARE,T.INTEGER,T.WHITESPACE, T.INTEGER,T.WHITESPACE,T.INTEGER,T.RSQUARE,T.RSQUARE, - T.WHITESPACE,T.LSQUARE,T.INTEGER,T.OP,T.INTEGER,T.COMMA,T.INTEGER, + T.NEWLINE_WS,T.LSQUARE,T.INTEGER,T.OP,T.INTEGER,T.COMMA,T.INTEGER, T.SEMICOLON,T.INTEGER,T.COMMA,T.INTEGER,T.RSQUARE, - T.WHITESPACE,T.STRING,T.SEMICOLON,T.WHITESPACE,T.CHAR, + T.NEWLINE_WS,T.DQUOTE,T.STRING,T.DQUOTE,T.SEMICOLON,T.WHITESPACE,T.CHAR, - T.WHITESPACE,T.LPAREN,T.IDENTIFIER,T.OP,T.IDENTIFIER,T.RPAREN,T.OP, + T.NEWLINE_WS,T.LPAREN,T.IDENTIFIER,T.OP,T.IDENTIFIER,T.RPAREN,T.OP, T.LPAREN,T.IDENTIFIER,T.OP,T.IDENTIFIER,T.RPAREN, - T.WHITESPACE,T.COMMENT, + T.NEWLINE_WS,T.COMMENT, - T.WHITESPACE,T.COMMENT, + T.NEWLINE_WS,T.COMMENT, - T.WHITESPACE,T.INTEGER,T.OP,T.INTEGER, + T.NEWLINE_WS,T.INTEGER,T.OP,T.INTEGER, - T.WHITESPACE,T.IDENTIFIER,T.OP,T.OP,T.IDENTIFIER,T.OP, + T.NEWLINE_WS,T.IDENTIFIER,T.OP,T.OP,T.IDENTIFIER,T.OP, - T.WHITESPACE,T.IDENTIFIER,T.OP,T.OP,T.OP,T.IDENTIFIER,T.OP,T.OP, + T.NEWLINE_WS,T.IDENTIFIER,T.OP,T.OP,T.OP,T.IDENTIFIER,T.OP,T.OP, - T.WHITESPACE,T.CMD, + T.NEWLINE_WS,T.BACKTICK,T.STRING,T.BACKTICK, - T.WHITESPACE,T.INTEGER,T.IDENTIFIER,T.LPAREN,T.INTEGER,T.RPAREN, + T.NEWLINE_WS,T.INTEGER,T.IDENTIFIER,T.LPAREN,T.INTEGER,T.RPAREN, - T.WHITESPACE,T.LBRACE,T.RBRACE, + T.NEWLINE_WS,T.LBRACE,T.RBRACE, - T.WHITESPACE,T.ERROR,T.ENDMARKER] + T.NEWLINE_WS,T.ERROR,T.ENDMARKER] for (i, n) in enumerate(tokenize(str)) @test Tokens.kind(n) == kinds[i] @@ -213,9 +213,9 @@ end 1 """)) - kinds = [T.COMMENT, T.WHITESPACE, - T.TRIPLE_STRING, T.WHITESPACE, - T.INTEGER, T.WHITESPACE, + kinds = [T.COMMENT, T.NEWLINE_WS, + T.TRIPLE_DQUOTE, T.STRING, T.TRIPLE_DQUOTE, T.NEWLINE_WS, + T.INTEGER, T.NEWLINE_WS, T.ENDMARKER] @test T.kind.(toks) == kinds end @@ -244,6 +244,7 @@ end @testset "keywords" begin for kw in ["function", "abstract", + "as", "baremodule", "begin", "break", @@ -264,7 +265,6 @@ end "local", "if", "import", - "importall", "macro", "module", "mutable", @@ -290,9 +290,6 @@ end @test tok("#= #= =#", 1).kind == T.ERROR @test tok("'dsadsa", 1).kind == T.ERROR @test tok("aa **", 3).kind == T.ERROR - @test tok("aa \" ", 3).kind == T.ERROR - @test tok("aa \"\"\" \"dsad\" \"\"",3).kind == T.ERROR - end @testset "xor_eq" begin @@ -305,33 +302,111 @@ end @testset "show" begin io = IOBuffer() - show(io, collect(tokenize("\"abc\nd\"ef"))[1]) - @test String(take!(io)) == "1,1-2,2 STRING \"\\\"abc\\nd\\\"\"" + show(io, collect(tokenize("\"abc\nd\"ef"))[2]) + @test String(take!(io)) == "1,2-2,1 STRING \"abc\\nd\"" end +~(tok::T.AbstractToken, t::Tuple) = tok.kind == t[1] && untokenize(tok) == t[2] + +@testset "raw strings" begin + ts = collect(tokenize(raw""" str"x $ \ y" """)) + @test ts[1] ~ (T.WHITESPACE , " " ) + @test ts[2] ~ (T.IDENTIFIER , "str" ) + @test ts[3] ~ (T.DQUOTE , "\"" ) + @test ts[4] ~ (T.STRING , "x \$ \\ y") + @test ts[5] ~ (T.DQUOTE , "\"" ) + @test ts[6] ~ (T.WHITESPACE , " " ) + @test ts[7] ~ (T.ENDMARKER , "" ) + + ts = collect(tokenize(raw"""`x $ \ y`""")) + @test ts[1] ~ (T.BACKTICK , "`" ) + @test ts[2] ~ (T.STRING , "x \$ \\ y" ) + @test ts[3] ~ (T.BACKTICK , "`" ) + @test ts[4] ~ (T.ENDMARKER , "" ) +end @testset "interpolation" begin - str = """"str: \$(g("str: \$(h("str"))"))" """ - ts = collect(tokenize(str)) - @test length(ts)==3 - @test ts[1].kind == Tokens.STRING - @test ts[1].val == strip(str) - ts = collect(tokenize("""\"\$\"""")) - @test ts[1].kind == Tokens.STRING - - # issue 73: - t_err = tok("\"\$(fdsf\"") - @test t_err.kind == Tokens.ERROR - @test t_err.token_error == Tokens.EOF_STRING - @test Tokenize.Tokens.startpos(t_err) == (1,1) - @test Tokenize.Tokens.endpos(t_err) == (1,8) - - # issue 178: - str = """"\$uₕx \$(uₕx - ux)" """ - ts = collect(tokenize(str)) - @test length(ts)==3 - @test ts[1].kind == Tokens.STRING - @test ts[1].val == strip(str) + @testset "basic interpolation" begin + ts = collect(tokenize("\"\$x \$y\"")) + @test ts[1] ~ (T.DQUOTE , "\"") + @test ts[2] ~ (T.EX_OR , "\$") + @test ts[3] ~ (T.IDENTIFIER , "x" ) + @test ts[4] ~ (T.STRING , " " ) + @test ts[5] ~ (T.EX_OR , "\$") + @test ts[6] ~ (T.IDENTIFIER , "y" ) + @test ts[7] ~ (T.DQUOTE , "\"") + @test ts[8] ~ (T.ENDMARKER , "" ) + end + + @testset "nested interpolation" begin + str = """"str: \$(g("str: \$(h("str"))"))" """ + ts = collect(tokenize(str)) + @test length(ts) == 23 + @test ts[1] ~ (T.DQUOTE , "\"" ) + @test ts[2] ~ (T.STRING , "str: ") + @test ts[3] ~ (T.EX_OR , "\$" ) + @test ts[4] ~ (T.LPAREN , "(" ) + @test ts[5] ~ (T.IDENTIFIER, "g" ) + @test ts[6] ~ (T.LPAREN , "(" ) + @test ts[7] ~ (T.DQUOTE , "\"" ) + @test ts[8] ~ (T.STRING , "str: ") + @test ts[9] ~ (T.EX_OR , "\$" ) + @test ts[10] ~ (T.LPAREN , "(" ) + @test ts[11] ~ (T.IDENTIFIER, "h" ) + @test ts[12] ~ (T.LPAREN , "(" ) + @test ts[13] ~ (T.DQUOTE , "\"" ) + @test ts[14] ~ (T.STRING , "str" ) + @test ts[15] ~ (T.DQUOTE , "\"" ) + @test ts[16] ~ (T.RPAREN , ")" ) + @test ts[17] ~ (T.RPAREN , ")" ) + @test ts[18] ~ (T.DQUOTE , "\"" ) + @test ts[19] ~ (T.RPAREN , ")" ) + @test ts[20] ~ (T.RPAREN , ")" ) + @test ts[21] ~ (T.DQUOTE , "\"" ) + @test ts[22] ~ (T.WHITESPACE, " " ) + @test ts[23] ~ (T.ENDMARKER , "" ) + end + + @testset "duplicate \$ in interpolation" begin + ts = collect(tokenize("\"\$\$\"")) + @test ts[1] ~ (T.DQUOTE , "\"") + @test ts[2] ~ (T.EX_OR , "\$") + @test ts[3] ~ (T.EX_OR , "\$") + @test ts[4] ~ (T.DQUOTE , "\"") + @test ts[5] ~ (T.ENDMARKER , "" ) + end + + @testset "Unmatched parens in interpolation" begin + # issue 73: https://github.com/JuliaLang/Tokenize.jl/issues/73 + ts = collect(tokenize("\"\$(fdsf\"")) + @test ts[1] ~ (T.DQUOTE , "\"" ) + @test ts[2] ~ (T.EX_OR , "\$" ) + @test ts[3] ~ (T.LPAREN , "(" ) + @test ts[4] ~ (T.IDENTIFIER , "fdsf" ) + @test ts[5] ~ (T.DQUOTE , "\"" ) + @test ts[6] ~ (T.ENDMARKER , "" ) + end + + @testset "Unicode in interpolation" begin + # issue 178: https://github.com/JuliaLang/Tokenize.jl/issues/178 + ts = collect(tokenize(""" "\$uₕx \$(uₕx - ux)" """)) + @test ts[ 1] ~ (T.WHITESPACE , " " ) + @test ts[ 2] ~ (T.DQUOTE , "\"" ) + @test ts[ 3] ~ (T.EX_OR , "\$" ) + @test ts[ 4] ~ (T.IDENTIFIER , "uₕx" ) + @test ts[ 5] ~ (T.STRING , " " ) + @test ts[ 6] ~ (T.EX_OR , "\$" ) + @test ts[ 7] ~ (T.LPAREN , "(" ) + @test ts[ 8] ~ (T.IDENTIFIER , "uₕx" ) + @test ts[ 9] ~ (T.WHITESPACE , " " ) + @test ts[10] ~ (T.MINUS , "-" ) + @test ts[11] ~ (T.WHITESPACE , " " ) + @test ts[12] ~ (T.IDENTIFIER , "ux" ) + @test ts[13] ~ (T.RPAREN , ")" ) + @test ts[14] ~ (T.DQUOTE , "\"" ) + @test ts[15] ~ (T.WHITESPACE , " " ) + @test ts[16] ~ (T.ENDMARKER , "" ) + end end @testset "inferred" begin @@ -446,11 +521,21 @@ end end @testset "CMDs" begin - @test tok("`cmd`").kind == T.CMD - @test tok("```cmd```", 1).kind == T.TRIPLE_CMD - @test tok("```cmd```", 2).kind == T.ENDMARKER - @test tok("```cmd````cmd`", 1).kind == T.TRIPLE_CMD - @test tok("```cmd````cmd`", 2).kind == T.CMD + @test tok("`cmd`",1).kind == T.BACKTICK + @test tok("`cmd`",2).kind == T.STRING + @test tok("`cmd`",3).kind == T.BACKTICK + @test tok("`cmd`",4).kind == T.ENDMARKER + @test tok("```cmd```", 1).kind == T.TRIPLE_BACKTICK + @test tok("```cmd```", 2).kind == T.STRING + @test tok("```cmd```", 3).kind == T.TRIPLE_BACKTICK + @test tok("```cmd```", 4).kind == T.ENDMARKER + @test tok("```cmd````cmd`", 1).kind == T.TRIPLE_BACKTICK + @test tok("```cmd````cmd`", 2).kind == T.STRING + @test tok("```cmd````cmd`", 3).kind == T.TRIPLE_BACKTICK + @test tok("```cmd````cmd`", 4).kind == T.BACKTICK + @test tok("```cmd````cmd`", 5).kind == T.STRING + @test tok("```cmd````cmd`", 6).kind == T.BACKTICK + @test tok("```cmd````cmd`", 7).kind == T.ENDMARKER end @testset "where" begin @@ -463,24 +548,12 @@ end @test length(collect(tokenize(io))) == 4 end -@testset "complicated interpolations" begin - @test length(collect(tokenize("\"\$(())\""))) == 2 - @test length(collect(tokenize("\"\$(#=inline ) comment=#\"\")\""))) == 2 - @test length(collect(tokenize("\"\$(string(`inline ')' cmd`)\"\")\""))) == 2 - # These would require special interpolation support in the parse (Base issue #3150). - # If that gets implemented, thses should all be adjust to `== 2` - @test length(collect(tokenize("`\$((``))`"))) == 2 - @test length(collect(tokenize("`\$(#=inline ) comment=#``)`"))) == 2 - @test length(collect(tokenize("`\$(\"inline ) string\"*string(``))`"))) == 2 -end - - @testset "hex/bin/octal errors" begin -@test tok("0x").kind == T.ERROR -@test tok("0b").kind == T.ERROR -@test tok("0o").kind == T.ERROR -@test tok("0x 2", 1).kind == T.ERROR -@test tok("0x.1p1").kind == T.FLOAT + @test tok("0x").kind == T.ERROR + @test tok("0b").kind == T.ERROR + @test tok("0o").kind == T.ERROR + @test tok("0x 2", 1).kind == T.ERROR + @test tok("0x.1p1").kind == T.FLOAT end @@ -563,16 +636,6 @@ end @test tok("1.?").kind == Tokens.ERROR end -@testset "interpolation of char within string" begin - s = "\"\$('\"')\"" - @test collect(tokenize(s))[1].kind == Tokenize.Tokens.STRING -end - -@testset "interpolation of prime within string" begin - s = "\"\$(a')\"" - @test collect(tokenize(s))[1].kind == Tokenize.Tokens.STRING -end - @testset "comments" begin s = "#=# text=#" @test length(collect(tokenize(s, Tokens.RawToken))) == 2 @@ -624,53 +687,59 @@ end @test all(s->Base.isoperator(Symbol(s)) == Tokens.isoperator(first(collect(tokenize(s))).kind), allops) end -@testset "simple_hash" begin - is_kw(x) = uppercase(x) in ( - "ABSTRACT", - "BAREMODULE", - "BEGIN", - "BREAK", - "CATCH", - "CONST", - "CONTINUE", - "DO", - "ELSE", - "ELSEIF", - "END", - "EXPORT", - "FINALLY", - "FOR", - "FUNCTION", - "GLOBAL", - "IF", - "IMPORT", - "IMPORTALL", - "LET", - "LOCAL", - "MACRO", - "MODULE", - "MUTABLE", - "OUTER", - "PRIMITIVE", - "QUOTE", - "RETURN", - "STRUCT", - "TRY", - "TYPE", - "USING", - "WHILE", - "IN", - "ISA", - "WHERE", - "TRUE", - "FALSE", - ) - for len in 1:5 - for cs in Iterators.product(['a':'z' for _ in 1:len]...) - str = String([cs...]) - is_kw(str) && continue - - @test Tokenize.Lexers.simple_hash(str) ∉ keys(Tokenize.Lexers.kw_hash) +const all_kws = Set(["abstract", + "as", + "baremodule", + "begin", + "break", + "catch", + "const", + "continue", + "do", + "else", + "elseif", + "end", + "export", + "finally", + "for", + "function", + "global", + "if", + "import", + "let", + "local", + "macro", + "module", + "mutable", + "outer", + "primitive", + "quote", + "return", + "struct", + "try", + "type", + "using", + "while", + "in", + "isa", + "where", + "true", + "false", +]) + +function check_kw_hashes(iter) + for cs in iter + str = String([cs...]) + if Lexers.simple_hash(str) in keys(Tokenize.Lexers.kw_hash) + @test str in all_kws end end end + +@testset "simple_hash" begin + @test length(all_kws) == length(Tokenize.Lexers.kw_hash) + + @testset "Length $len keywords" for len in 1:5 + check_kw_hashes(String([cs...]) for cs in Iterators.product(['a':'z' for _ in 1:len]...)) + end +end From d617ea70c9ceb8b65f4d7617c1d415463ea631da Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Mon, 3 Jan 2022 15:47:24 +1000 Subject: [PATCH 0275/1109] Tokenize: add test case for var"" syntax in string interpolations --- JuliaSyntax/Tokenize/test/lexer.jl | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/JuliaSyntax/Tokenize/test/lexer.jl b/JuliaSyntax/Tokenize/test/lexer.jl index 46eb47227edf9..d49fe6949c5b1 100644 --- a/JuliaSyntax/Tokenize/test/lexer.jl +++ b/JuliaSyntax/Tokenize/test/lexer.jl @@ -326,7 +326,7 @@ end end @testset "interpolation" begin - @testset "basic interpolation" begin + @testset "basic" begin ts = collect(tokenize("\"\$x \$y\"")) @test ts[1] ~ (T.DQUOTE , "\"") @test ts[2] ~ (T.EX_OR , "\$") @@ -338,7 +338,7 @@ end @test ts[8] ~ (T.ENDMARKER , "" ) end - @testset "nested interpolation" begin + @testset "nested" begin str = """"str: \$(g("str: \$(h("str"))"))" """ ts = collect(tokenize(str)) @test length(ts) == 23 @@ -367,7 +367,7 @@ end @test ts[23] ~ (T.ENDMARKER , "" ) end - @testset "duplicate \$ in interpolation" begin + @testset "duplicate \$" begin ts = collect(tokenize("\"\$\$\"")) @test ts[1] ~ (T.DQUOTE , "\"") @test ts[2] ~ (T.EX_OR , "\$") @@ -376,7 +376,7 @@ end @test ts[5] ~ (T.ENDMARKER , "" ) end - @testset "Unmatched parens in interpolation" begin + @testset "Unmatched parens" begin # issue 73: https://github.com/JuliaLang/Tokenize.jl/issues/73 ts = collect(tokenize("\"\$(fdsf\"")) @test ts[1] ~ (T.DQUOTE , "\"" ) @@ -387,7 +387,7 @@ end @test ts[6] ~ (T.ENDMARKER , "" ) end - @testset "Unicode in interpolation" begin + @testset "Unicode" begin # issue 178: https://github.com/JuliaLang/Tokenize.jl/issues/178 ts = collect(tokenize(""" "\$uₕx \$(uₕx - ux)" """)) @test ts[ 1] ~ (T.WHITESPACE , " " ) @@ -407,6 +407,21 @@ end @test ts[15] ~ (T.WHITESPACE , " " ) @test ts[16] ~ (T.ENDMARKER , "" ) end + + @testset "var\"...\" disabled in interpolations" begin + ts = collect(tokenize(""" "\$var"x" " """)) + @test ts[ 1] ~ (T.WHITESPACE , " " ) + @test ts[ 2] ~ (T.DQUOTE , "\"" ) + @test ts[ 3] ~ (T.EX_OR , "\$" ) + @test ts[ 4] ~ (T.IDENTIFIER , "var" ) + @test ts[ 5] ~ (T.DQUOTE , "\"" ) + @test ts[ 6] ~ (T.IDENTIFIER , "x" ) + @test ts[ 7] ~ (T.DQUOTE , "\"" ) + @test ts[ 8] ~ (T.STRING , " " ) + @test ts[ 9] ~ (T.DQUOTE , "\"" ) + @test ts[10] ~ (T.WHITESPACE , " " ) + @test ts[11] ~ (T.ENDMARKER , "" ) + end end @testset "inferred" begin From 74821fafbd9aae95f22f22027dffd61582e2930d Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Tue, 4 Jan 2022 08:59:33 +1000 Subject: [PATCH 0276/1109] Tokenize: disallow ambiguous string interpolation identifiers This is done in the lexer rather than the parser as the next character is readily available. --- JuliaSyntax/Tokenize/src/lexer.jl | 10 ++++++++-- JuliaSyntax/Tokenize/src/token.jl | 2 ++ JuliaSyntax/Tokenize/src/utilities.jl | 26 ++++++++++++++++++++++++++ JuliaSyntax/Tokenize/test/lexer.jl | 8 ++++++++ 4 files changed, 44 insertions(+), 2 deletions(-) diff --git a/JuliaSyntax/Tokenize/src/lexer.jl b/JuliaSyntax/Tokenize/src/lexer.jl index a7843afca4d1a..a59443138e80c 100644 --- a/JuliaSyntax/Tokenize/src/lexer.jl +++ b/JuliaSyntax/Tokenize/src/lexer.jl @@ -449,7 +449,9 @@ function lex_string_chunk(l) else return _next_token(l, c) end - elseif l.last_token == Tokens.EX_OR + end + pc = peekchar(l) + if l.last_token == Tokens.EX_OR pc = peekchar(l) # Interpolated symbol or expression if pc == '(' @@ -463,8 +465,12 @@ function lex_string_chunk(l) # Getting here is a syntax error - fall through to reading string # characters and let the parser deal with it. end + elseif l.last_token == Tokens.IDENTIFIER && + !(eof(pc) || is_operator_start_char(pc) || is_never_id_char(pc)) + # Only allow certain characters after interpolated vars + # https://github.com/JuliaLang/julia/pull/25234 + return emit_error(l, Tokens.INVALID_INTERPOLATION_TERMINATOR) end - pc = peekchar(l) if eof(pc) return emit(l, Tokens.ENDMARKER) elseif !state.raw && pc == '$' diff --git a/JuliaSyntax/Tokenize/src/token.jl b/JuliaSyntax/Tokenize/src/token.jl index f6e2fdd6220e6..05c57f8ef828c 100644 --- a/JuliaSyntax/Tokenize/src/token.jl +++ b/JuliaSyntax/Tokenize/src/token.jl @@ -33,6 +33,7 @@ _add_kws() EOF_VAR, INVALID_NUMERIC_CONSTANT, INVALID_OPERATOR, + INVALID_INTERPOLATION_TERMINATOR, UNKNOWN, ) @@ -45,6 +46,7 @@ TOKEN_ERROR_DESCRIPTION = Dict{TokenError, String}( EOF_VAR => "unterminated var\"...\" identifier", INVALID_NUMERIC_CONSTANT => "invalid numeric constant", INVALID_OPERATOR => "invalid operator", + INVALID_INTERPOLATION_TERMINATOR => "interpolated variable ends with invalid character; use `\$(...)` instead", UNKNOWN => "unknown", ) diff --git a/JuliaSyntax/Tokenize/src/utilities.jl b/JuliaSyntax/Tokenize/src/utilities.jl index 98afe0c714d36..2ad25090c787c 100644 --- a/JuliaSyntax/Tokenize/src/utilities.jl +++ b/JuliaSyntax/Tokenize/src/utilities.jl @@ -118,6 +118,32 @@ function is_identifier_start_char(c::Char) return Base.is_id_start_char(c) end +# Chars that we will never allow to be part of a valid non-operator identifier +function is_never_id_char(ch::Char) + cat = Unicode.category_code(ch) + c = UInt32(ch) + return ( + # spaces and control characters: + (cat >= Unicode.UTF8PROC_CATEGORY_ZS && cat <= Unicode.UTF8PROC_CATEGORY_CS) || + + # ASCII and Latin1 non-connector punctuation + (c < 0xff && + cat >= Unicode.UTF8PROC_CATEGORY_PD && cat <= Unicode.UTF8PROC_CATEGORY_PO) || + + c == UInt32('`') || + + # mathematical brackets + (c >= 0x27e6 && c <= 0x27ef) || + # angle, corner, and lenticular brackets + (c >= 0x3008 && c <= 0x3011) || + # tortoise shell, square, and more lenticular brackets + (c >= 0x3014 && c <= 0x301b) || + # fullwidth parens + (c == 0xff08 || c == 0xff09) || + # fullwidth square brackets + (c == 0xff3b || c == 0xff3d) + ) +end function peekchar(io::Base.GenericIOBuffer) if !io.readable || io.ptr > io.size diff --git a/JuliaSyntax/Tokenize/test/lexer.jl b/JuliaSyntax/Tokenize/test/lexer.jl index d49fe6949c5b1..4f49e1f46ceb6 100644 --- a/JuliaSyntax/Tokenize/test/lexer.jl +++ b/JuliaSyntax/Tokenize/test/lexer.jl @@ -422,6 +422,14 @@ end @test ts[10] ~ (T.WHITESPACE , " " ) @test ts[11] ~ (T.ENDMARKER , "" ) end + + @testset "invalid chars after identifier" begin + ts = collect(tokenize(""" "\$x෴" """)) + @test ts[4] ~ (T.IDENTIFIER , "x" ) + @test ts[5] ~ (T.ERROR , "" ) + @test ts[6] ~ (T.STRING , "෴" ) + @test ts[5].token_error == Tokens.INVALID_INTERPOLATION_TERMINATOR + end end @testset "inferred" begin From 810a6bdd851d93ed092dcefda81d7066c5de1c68 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Tue, 4 Jan 2022 09:49:48 +1000 Subject: [PATCH 0277/1109] Parse string interpolations and literals --- JuliaSyntax/README.md | 3 + JuliaSyntax/src/parser.jl | 210 +++++++++++++++------------------ JuliaSyntax/src/syntax_tree.jl | 6 +- JuliaSyntax/src/token_kinds.jl | 51 ++++---- JuliaSyntax/src/tokens.jl | 4 +- JuliaSyntax/test/parser.jl | 19 ++- 6 files changed, 143 insertions(+), 150 deletions(-) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index d057f545675f6..84a1737e0508b 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -558,3 +558,6 @@ xy * `import A..` produces `(import (. A .))` which is arguably nonsensical, as `.` can't be a normal identifier. +* When lexing raw strings, more than two backslashes are treated strangely at + the end of the string: `raw"\\\\ "` contains four backslashes, whereas + `raw"\\\\"` contains only two. diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 5377ca9116592..31b6342f8f888 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -50,7 +50,8 @@ end # flisp: disallow-space function bump_disallowed_space(ps) if peek_token(ps).had_whitespace - bump_trivia(ps, skip_newlines=false, error="whitespace is not allowed here") + bump_trivia(ps, TRIVIA_FLAG, skip_newlines=false, + error="whitespace is not allowed here") end end @@ -791,7 +792,7 @@ function parse_juxtapose(ps::ParseState) if n_terms == 1 bump_invisible(ps, K"*") end - if is_string(prev_kind) || is_string(t) + if prev_kind == K"String" || is_string_delim(t) # issue #20575 # # "a""b" ==> (call-i "a" * (error) "b") @@ -1124,8 +1125,8 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) t = peek_token(ps) k = kind(t) if (ps.space_sensitive && t.had_whitespace && - # TODO: Is `'` adjoint or Char here? - k in (K"(", K"[", K"{", K"\\", K"'", K"Char", K"String", K"TripleString")) + k in (K"(", K"[", K"{", K"\\", K"'", K"Char", K"\"", K"\"\"\"", K"`", K"```")) + # [f (x)] ==> (hcat f x) break end if k == K"(" @@ -1134,6 +1135,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) finish_macroname(ps, mark, is_valid_modref, macro_name_position) end # f(a,b) ==> (call f a b) + # f (a) ==> (call f (error-t) a b) bump_disallowed_space(ps) bump(ps, TRIVIA_FLAG) # Keyword arguments depends on call vs macrocall @@ -1185,6 +1187,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) # a().@x[1] ==> FIXME finish_macroname(ps, mark, is_valid_modref, macro_name_position) end + # a [i] ==> (ref a (error-t) i) bump_disallowed_space(ps) bump(ps, TRIVIA_FLAG) ckind, cflags = parse_cat(ParseState(ps, end_symbol=true), @@ -1205,12 +1208,13 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) break end elseif k == K"." + # x .y ==> (. x (error-t) (quote y)) bump_disallowed_space(ps) if peek(ps, 2) == K"'" + # f.' => f (error-t . ') emark = position(ps) bump(ps) bump(ps) - "f.'" => "f (error-t . ')" emit(ps, emark, K"error", TRIVIA_FLAG, error="the .' operator for transpose is discontinued") is_valid_modref = false @@ -1237,6 +1241,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) # Keyword params always use kw inside tuple in dot calls # f.(a,b) ==> (. f (tuple a b)) # f.(a=1) ==> (. f (tuple (kw a 1))) + # f. (x) ==> (. f (error-t) (tuple x)) bump_disallowed_space(ps) m = position(ps) bump(ps, TRIVIA_FLAG) @@ -1245,6 +1250,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) emit(ps, mark, K".") elseif k == K":" # A.:+ ==> (. A (quote +)) + # A.: + ==> (. A (error-t) (quote +)) m = position(ps) bump(ps, TRIVIA_FLAG) bump_disallowed_space(ps) @@ -1308,6 +1314,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) finish_macroname(ps, mark, is_valid_modref, macro_name_position) end m = position(ps) + # S {a} ==> (curly S (error-t) a) bump_disallowed_space(ps) bump(ps, TRIVIA_FLAG) parse_call_arglist(ps, K"}", is_macrocall) @@ -1325,7 +1332,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) # S{a,b} ==> (curly S a b) emit(ps, mark, K"curly") end - elseif k in (K"String", K"TripleString", K"Cmd", K"TripleCmd") && + elseif k in (K"\"", K"\"\"\"", K"`", K"```") && !t.had_whitespace && is_valid_modref # Custom string and command literals # x"str" ==> (macrocall x_str "str") @@ -1333,9 +1340,9 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) # Use a special token kind for string and cmd macro names so the # names can be expanded later as necessary. - mackind = is_string(k) ? K"StringMacroName" : K"CmdMacroName" - finish_macroname(ps, mark, is_valid_modref, macro_name_position, mackind) - bump(ps) + outk = is_string_delim(k) ? K"StringMacroName" : K"CmdMacroName" + finish_macroname(ps, mark, is_valid_modref, macro_name_position, outk) + parse_raw_string(ps) t = peek_token(ps) k = kind(t) if !t.had_whitespace && (k == K"Identifier" || is_keyword(k) || is_number(k)) @@ -2553,112 +2560,80 @@ function parse_brackets(after_parse::Function, end end -# flisp: parse-raw-literal -function parse_raw_literal(ps::ParseState, delim) - TODO("parse_raw_literal unimplemented") -end - -# flisp: unescape-parsed-string-literal -function unescape_parsed_string_literal(strs) - TODO("unescape_parsed_string_literal unimplemented") -end - -# flisp: strip-escaped-newline -function strip_escaped_newline(s, raw) - TODO("strip_escaped_newline unimplemented") -end - -# remove `\` followed by a newline -# -# flisp: strip-escaped-newline- -function strip_escaped_newline_(s) - TODO("strip_escaped_newline_ unimplemented") -end - -# flisp: parse-string-literal -function parse_string_literal(ps::ParseState, delim, raw) - TODO("parse_string_literal unimplemented") -end - -# flisp: strip-leading-newline -function strip_leading_newline(s) - TODO("strip_leading_newline unimplemented") -end - -# flisp: dedent-triplequoted-string -function dedent_triplequoted_string(lst) - TODO("dedent_triplequoted_string unimplemented") -end - -# flisp: triplequoted-string-indentation -function triplequoted_string_indentation(lst) - TODO("triplequoted_string_indentation unimplemented") -end - -# flisp: triplequoted-string-indentation- -function triplequoted_string_indentation_(s) - TODO("triplequoted_string_indentation_ unimplemented") -end - -# return the longest common prefix of the elements of l -# e.g., (longest-common-prefix ((1 2) (1 4))) -> (1) -# -# flisp: longest-common-prefix -function longest_common_prefix(l) - TODO("longest_common_prefix unimplemented") -end - -# return the longest common prefix of lists a & b -# -# flisp: longest-common-prefix2 -function longest_common_prefix2(a, b) - TODO("longest_common_prefix2 unimplemented") -end - -# flisp: longest-common-prefix2- -function longest_common_prefix2_(a, b, p) - TODO("longest_common_prefix2_ unimplemented") -end - -# flisp: string-split -function string_split(s, sep) - TODO("string_split unimplemented") -end - -# flisp: string-split- -function string_split_(s, sep, start, splits) - TODO("string_split_ unimplemented") -end - -# replace all occurrences of a in s with b -# -# flisp: string-replace -function string_replace(s, a, b) - TODO("string_replace unimplemented") -end - -# flisp: ends-interpolated-atom? -function is_ends_interpolated_atom(c) - TODO("is_ends_interpolated_atom unimplemented") -end - -# flisp: parse-interpolate -function parse_interpolate(ps::ParseState) - TODO("parse_interpolate unimplemented") -end - -# raw = raw string literal -# when raw is #t, unescape only \\ and delimiter -# otherwise do full unescaping, and parse interpolations too +# Parse a string, possibly with embedded interpolations # -# flisp: parse-string-literal- -function parse_string_literal_(n, p, s, delim, raw) - TODO("parse_string_literal_ unimplemented") +# flisp: parse-string-literal-, parse-interpolate +function parse_string(ps::ParseState) + mark = position(ps) + closer = peek(ps) + bump(ps, TRIVIA_FLAG) + n_components = 0 + while true + k = peek(ps) + if k == K"$" + n_components += 1 + bump(ps, TRIVIA_FLAG) + k = peek(ps) + if k == K"(" + # "a $(x + y) b" ==> (string "a " (call-i x + y) " b") + m = position(ps) + parse_atom(ps) + if ps.julia_version >= v"1.6" && peek_behind(ps) == K"String" + # Wrap interpolated literal strings in (string) so we can + # distinguish them from the surrounding text (issue #38501) + #v1.6: "hi$("ho")" ==> (string "hi" (string "ho")) + emit(ps, m, K"string") + end + elseif is_identifier(k) + # "a $foo b" ==> (string "a " foo " b") + bump(ps) + else + # It should be impossible for the lexer to get us into this state. + bump_invisible(ps, K"error", + error="Identifier or parenthesized expression expected after \$ in string") + end + elseif k == K"String" + bump(ps) + elseif k == closer + if n_components == 0 + # "" ==> "" + bump_invisible(ps, K"String") + end + bump(ps, TRIVIA_FLAG) + break + else + # Recovery + # "str ==> "str" (error-t) + bump_invisible(ps, K"error", TRIVIA_FLAG, error="Unterminated string literal") + break + end + n_components += 1 + end + if n_components > 1 + # "$x$y$z" ==> (string x y z) + # "$(x)" ==> (string x) + # "$x" ==> (string x) + emit(ps, mark, K"string") + else + # "str" ==> "str" + end end -# flisp: unescape-string -function unescape_string_(s) - TODO("unescape_string_ unimplemented") +function parse_raw_string(ps::ParseState) + emark = position(ps) + delim_k = peek(ps) + bump(ps, TRIVIA_FLAG) + if peek(ps) == K"String" + bump(ps) + else + bump_invisible(ps, K"String") + end + if peek(ps) == delim_k + bump(ps, TRIVIA_FLAG) + else + # Recovery + bump_invisible(ps, K"error", error="Unterminated string literal") + end end # parse numbers, identifiers, parenthesized expressions, lists, vectors, etc. @@ -2745,16 +2720,15 @@ function parse_atom(ps::ParseState, check_identifiers=true) bump(ps, TRIVIA_FLAG) ckind, cflags = parse_cat(ps, K"}", ps.end_symbol) emit_braces(ps, mark, ckind, cflags) - elseif is_string(leading_kind) - bump(ps) - # FIXME parse_string_literal(ps) + elseif is_string_delim(leading_kind) + parse_string(ps) elseif leading_kind == K"@" # macro call bump(ps, TRIVIA_FLAG) parse_macro_name(ps) parse_call_chain(ps, mark, true) - elseif leading_kind in (K"Cmd", K"TripleCmd") + elseif leading_kind in (K"`", K"```") bump_invisible(ps, K"core_@cmd") - bump(ps) + parse_raw_string(ps) emit(ps, mark, K"macrocall") elseif is_literal(leading_kind) bump(ps) @@ -2793,7 +2767,7 @@ function parse_docstring(ps::ParseState, down=parse_eq) #! ("doc") foo ==> (macrocall core_@doc "doc" foo) # TODO: Also, all these TOMBSTONEs seem kind of inefficient. Perhaps we can # improve things? - maybe_doc = is_string(peek(ps)) + maybe_doc = is_string_delim(peek(ps)) atdoc_mark = bump_invisible(ps, K"TOMBSTONE") down(ps) if maybe_doc diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index 5f3556cb1c228..8f0fe1373b598 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -140,10 +140,8 @@ function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, position::In elseif is_keyword(k) # This should only happen for tokens nested inside errors Symbol(val_str) - elseif k in (K"String", K"Cmd") - unescape_string(source[position+1:position+span(raw)-2]) - elseif k in (K"TripleString", K"TripleCmd") - unescape_string(source[position+3:position+span(raw)-4]) + elseif k == K"String" + unescape_string(val_str) elseif k == K"UnquotedString" String(val_str) elseif is_operator(k) diff --git a/JuliaSyntax/src/token_kinds.jl b/JuliaSyntax/src/token_kinds.jl index 9c660e1e101eb..9fa7319627fd0 100644 --- a/JuliaSyntax/src/token_kinds.jl +++ b/JuliaSyntax/src/token_kinds.jl @@ -67,21 +67,22 @@ Dict([ "OctInt" => Ts.OCT_INT "Float" => Ts.FLOAT "String" => Ts.STRING -"TripleString" => Ts.TRIPLE_STRING "Char" => Ts.CHAR -"Cmd" => Ts.CMD -"TripleCmd" => Ts.TRIPLE_CMD "true" => Ts.TRUE "false" => Ts.FALSE "END_LITERAL" => Ts.end_literal "BEGIN_DELIMITERS" => Ts.begin_delimiters -"[" => Ts.LSQUARE -"]" => Ts.RSQUARE -"{" => Ts.LBRACE -"}" => Ts.RBRACE -"(" => Ts.LPAREN -")" => Ts.RPAREN +"[" => Ts.LSQUARE +"]" => Ts.RSQUARE +"{" => Ts.LBRACE +"}" => Ts.RBRACE +"(" => Ts.LPAREN +")" => Ts.RPAREN +"\"" => Ts.DQUOTE +"\"\"\"" => Ts.TRIPLE_DQUOTE +"`" => Ts.BACKTICK +"```" => Ts.TRIPLE_BACKTICK "END_DELIMITERS" => Ts.end_delimiters "BEGIN_OPS" => Ts.begin_ops @@ -875,22 +876,22 @@ end # Mapping from kinds to their unique string representation, if it exists const _kind_to_str_unique = Dict{Kind,String}(k=>string(s) for (k,s) in TzTokens.UNICODE_OPS_REVERSE) -for c in "([{}])@,;" - _kind_to_str_unique[_str_to_kind[string(c)]] = string(c) -end -for kw in split("""as abstract baremodule begin break catch const - continue do else elseif end export finally for - function global if import let local - macro module mutable new outer primitive quote - return struct try type using while - - block call comparison curly string inert macrocall kw parameters - toplevel tuple ref vect braces bracescat hcat - vcat ncat typed_hcat typed_vcat typed_ncat row nrow generator - filter flatten comprehension typed_comprehension - - error Nothing - """) +for kw in split(""" + ( [ { } ] ) @ , ; " \"\"\" ` ``` + + as abstract baremodule begin break catch const + continue do else elseif end export finally for + function global if import let local + macro module mutable new outer primitive quote + return struct try type using while + + block call comparison curly string inert macrocall kw parameters + toplevel tuple ref vect braces bracescat hcat + vcat ncat typed_hcat typed_vcat typed_ncat row nrow generator + filter flatten comprehension typed_comprehension + + error Nothing + """) _kind_to_str_unique[_str_to_kind[kw]] = kw end diff --git a/JuliaSyntax/src/tokens.jl b/JuliaSyntax/src/tokens.jl index 23d825064c42a..3b9b4a29bbadf 100644 --- a/JuliaSyntax/src/tokens.jl +++ b/JuliaSyntax/src/tokens.jl @@ -81,8 +81,8 @@ function is_number(t) kind(t) in (K"Integer", K"BinInt", K"HexInt", K"OctInt", K"Float") end -function is_string(t) - kind(t) in (K"String", K"TripleString") +function is_string_delim(t) + kind(t) in (K"\"", K"\"\"\"") end function is_radical_op(t) diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 399a80ef326d4..8bec4df2c3016 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -199,6 +199,7 @@ tests = [ "f(x)" => "(call f x)" "\$f(x)" => "(call (\$ f) x)" "f(a,b)" => "(call f a b)" + "f (a)" => "(call f (error-t) a)" "f(a).g(b)" => "(call (. (call f a) (quote g)) b)" # do "f() do x, y\n body end" => "(do (call f) (-> (tuple x y) (block body)))" @@ -231,23 +232,28 @@ tests = [ "a().@x{y}" => """(macrocall (error (. (call a) (quote x))) (braces y))""" # array indexing, typed comprehension, etc "a[i]" => "(ref a i)" + "a [i]" => "(ref a (error-t) i)" "a[i,j]" => "(ref a i j)" "T[x for x in xs]" => "(typed_comprehension T (generator x (= x xs)))" # Keyword params always use kw inside tuple in dot calls "f.(a,b)" => "(. f (tuple a b))" "f.(a=1)" => "(. f (tuple (kw a 1)))" + "f. (x)" => "(. f (error-t) (tuple x))" # Other dotted syntax "A.:+" => "(. A (quote +))" + "A.: +" => "(. A (quote (error-t) +))" "f.\$x" => "(. f (inert (\$ x)))" "f.\$(x+y)" => "(. f (inert (\$ (call-i x + y))))" # Field/property syntax "f.x.y" => "(. (. f (quote x)) (quote y))" + "x .y" => "(. x (error-t) (quote y))" # Adjoint "f'" => "(' f)" "f'ᵀ" => "(call-i f 'ᵀ)" # Curly calls "@S{a,b}" => """(macrocall @S (braces a b))""" "S{a,b}" => "(curly S a b)" + "S {a}" => "(curly S (error-t) a)" # String macros """x"str\"""" => """(macrocall @x_str "str")""" """x`str`""" => """(macrocall @x_cmd "str")""" @@ -474,7 +480,7 @@ tests = [ ": foo" => "(quote (error-t) foo)" ], JuliaSyntax.parse_atom => [ - # parse_array + # Actually parse_array # Normal matrix construction syntax "[x y ; z w]" => "(vcat (row x y) (row z w))" "[x y ; z w ; a b]" => "(vcat (row x y) (row z w) (row a b))" @@ -497,6 +503,17 @@ tests = [ "[x ; y ;; z ; w ;;; a ; b ;; c ; d]" => "(ncat-3 (nrow-2 (nrow-1 x y) (nrow-1 z w)) (nrow-2 (nrow-1 a b) (nrow-1 c d)))" ], + JuliaSyntax.parse_string => [ + "\"a \$(x + y) b\"" => "(string \"a \" (call-i x + y) \" b\")" + "\"hi\$(\"ho\")\"" => "(string \"hi\" (string \"ho\"))" + ((v=v"1.5",), "\"hi\$(\"ho\")\"") => "(string \"hi\" \"ho\")" + "\"a \$foo b\"" => "(string \"a \" foo \" b\")" + "\"\"" => "\"\"" + "\"\$x\$y\$z\"" => "(string x y z)" + "\"\$(x)\"" => "(string x)" + "\"\$x\"" => "(string x)" + "\"str\"" => "\"str\"" + ], JuliaSyntax.parse_docstring => [ "\"doc\" foo" => "(macrocall :(Core.var\"@doc\") \"doc\" foo)" ], From 457a42c39985d70aa9696dd180b9572e2aa020d0 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Tue, 4 Jan 2022 12:02:36 +1000 Subject: [PATCH 0278/1109] Tokenize: Fix parsing of backslashes in raw strings --- JuliaSyntax/Tokenize/src/lexer.jl | 21 ++++++++++++--------- JuliaSyntax/Tokenize/test/lexer.jl | 16 ++++++++++++++++ 2 files changed, 28 insertions(+), 9 deletions(-) diff --git a/JuliaSyntax/Tokenize/src/lexer.jl b/JuliaSyntax/Tokenize/src/lexer.jl index a59443138e80c..9453d76ebe34b 100644 --- a/JuliaSyntax/Tokenize/src/lexer.jl +++ b/JuliaSyntax/Tokenize/src/lexer.jl @@ -918,7 +918,7 @@ end # The prefix `var"` has been consumed function lex_var(l::Lexer) read_raw_string(l, '"', false) - if readchar(l) == '"' + if accept(l, '"') return emit(l, Tokens.VAR_IDENTIFIER) else return emit_error(l, Tokens.EOF_VAR) @@ -952,20 +952,23 @@ end # closing quotes can be escaped with an odd number of \ characters. function read_raw_string(l::Lexer, delim::Char, triplestr::Bool) while true + if string_terminates(l, delim, triplestr) || eof(peekchar(l)) + return + end c = readchar(l) if c == '\\' - n = 0 - while c == '\\' + n = 1 + while true + readchar(l) n += 1 - c = readchar(l) + if peekchar(l) != '\\' + break + end end - if c == delim && !iseven(n) - c = readchar(l) + if peekchar(l) == delim && !iseven(n) + readchar(l) end end - if string_terminates(l, delim, triplestr) || eof(c) - return - end end end diff --git a/JuliaSyntax/Tokenize/test/lexer.jl b/JuliaSyntax/Tokenize/test/lexer.jl index 4f49e1f46ceb6..b3daef8c8f72a 100644 --- a/JuliaSyntax/Tokenize/test/lexer.jl +++ b/JuliaSyntax/Tokenize/test/lexer.jl @@ -323,6 +323,22 @@ end @test ts[2] ~ (T.STRING , "x \$ \\ y" ) @test ts[3] ~ (T.BACKTICK , "`" ) @test ts[4] ~ (T.ENDMARKER , "" ) + + # str"\\" + ts = collect(tokenize("str\"\\\\\"")) + @test ts[1] ~ (T.IDENTIFIER , "str" ) + @test ts[2] ~ (T.DQUOTE , "\"" ) + @test ts[3] ~ (T.STRING , "\\\\" ) + @test ts[4] ~ (T.DQUOTE , "\"" ) + @test ts[5] ~ (T.ENDMARKER , "" ) + + # str"\\\"" + ts = collect(tokenize("str\"\\\\\\\"\"")) + @test ts[1] ~ (T.IDENTIFIER , "str" ) + @test ts[2] ~ (T.DQUOTE , "\"" ) + @test ts[3] ~ (T.STRING , "\\\\\\\"" ) + @test ts[4] ~ (T.DQUOTE , "\"" ) + @test ts[5] ~ (T.ENDMARKER , "" ) end @testset "interpolation" begin From f9540e3fcebaec96b8b7a5808f5d62bb30e2b0f1 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Tue, 4 Jan 2022 12:28:09 +1000 Subject: [PATCH 0279/1109] Better printing of diagnostics for errors with large spans --- JuliaSyntax/src/parse_stream.jl | 53 ++++++++++++++++++++++++++------- JuliaSyntax/src/source_files.jl | 12 ++++++++ 2 files changed, 55 insertions(+), 10 deletions(-) diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index 8b6b69cb9c280..3fab4528c7bea 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -62,28 +62,55 @@ end first_byte(d::Diagnostic) = d.first_byte last_byte(d::Diagnostic) = d.last_byte -function show_diagnostic(io::IO, diagnostic::Diagnostic, code) +function show_diagnostic(io::IO, diagnostic::Diagnostic, source::SourceFile) col,prefix = diagnostic.level == :error ? (:light_red, "Error") : diagnostic.level == :warning ? (:light_yellow, "Warning") : diagnostic.level == :note ? (:light_blue, "Note") : (:normal, "Info") printstyled(io, "$prefix: ", color=col) print(io, diagnostic.message, ":\n") + p = first_byte(diagnostic) q = last_byte(diagnostic) - if !isvalid(code, q) - # Transform byte range into valid text range - q = prevind(code, q) - end + code = source.code if q < p || (p == q && code[p] == '\n') # An empty or invisible range! We expand it symmetrically to make it # visible. p = max(firstindex(code), prevind(code, p)) q = min(lastindex(code), nextind(code, q)) end - print(io, code[1:prevind(code, p)]) - _printstyled(io, code[p:q]; color=(100,40,40)) - print(io, code[nextind(code, q):end], '\n') + + # p and q mark the start and end of the diagnostic range. For context, + # buffer these out to the surrouding lines. + a,b = source_line_range(source, p, context_lines_before=2, context_lines_after=1) + c,d = source_line_range(source, q, context_lines_before=1, context_lines_after=2) + + hicol = (100,40,40) + + print(io, source[a:prevind(code, p)]) + # There's two situations, either + if b >= c + # The diagnostic range is compact and we show the whole thing + # a............... + # .....p...q...... + # ...............b + + _printstyled(io, source[p:q]; color=hicol) + else + # Or large and we trucate the code to show only the region around the + # start and end of the error. + # a............... + # .....p.......... + # ...............b + # (snip) + # c............... + # .....q.......... + # ...............d + _printstyled(io, source[p:b]; color=hicol) + println(io, "…") + _printstyled(io, source[c:q]; color=hicol) + end + print(io, source[nextind(code,q):d]) end struct ParseStreamPosition @@ -127,12 +154,18 @@ function Base.show(io::IO, mime::MIME"text/plain", stream::ParseStream) println(io, "ParseStream at position $(stream.next_byte)") end -function show_diagnostics(io::IO, stream::ParseStream, code) +function show_diagnostics(io::IO, stream::ParseStream, code::SourceFile) for d in stream.diagnostics show_diagnostic(io, d, code) end end +function show_diagnostics(io::IO, stream::ParseStream, code) + if !isempty(stream.diagnostics) + show_diagnostics(io, stream, SourceFile(code)) + end +end + #------------------------------------------------------------------------------- # Stream input interface - the peek_* family of functions @@ -266,7 +299,7 @@ function _bump_n(stream::ParseStream, n::Integer, flags, remap_kind=K"Nothing") if n <= 0 return end - for i=1:n + for i = 1:n tok = stream.lookahead[i] k = kind(tok) if k == K"EndMarker" diff --git a/JuliaSyntax/src/source_files.jl b/JuliaSyntax/src/source_files.jl index 30bf3fff9fa7a..71fc4bb6a2721 100644 --- a/JuliaSyntax/src/source_files.jl +++ b/JuliaSyntax/src/source_files.jl @@ -43,6 +43,18 @@ function source_location(source::SourceFile, byte_index) line, column end +""" +Get byte range of the source line at byte_index, buffered by +`context_lines_before` and `context_lines_after` before and after. +""" +function source_line_range(source::SourceFile, byte_index; + context_lines_before=0, context_lines_after=0) + line = searchsortedlast(source.line_starts, byte_index) + fbyte = source.line_starts[max(line-context_lines_before, 1)] + lbyte = source.line_starts[min(line+1+context_lines_after, end)] - 1 + fbyte,lbyte +end + function source_location(::Type{LineNumberNode}, source::SourceFile, byte_index) LineNumberNode(source_line(source, byte_index), source.filename) end From 178bf6b9b50d83204de576c00b8a319815315e9d Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Tue, 4 Jan 2022 12:28:54 +1000 Subject: [PATCH 0280/1109] Fix blocks in rhs of `->` syntax, fix break/continue trivia --- JuliaSyntax/src/parser.jl | 17 +++++++++++------ JuliaSyntax/src/syntax_tree.jl | 12 +++++++++--- JuliaSyntax/src/tokens.jl | 4 ++++ JuliaSyntax/test/parser.jl | 17 ++++++++++------- 4 files changed, 34 insertions(+), 16 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 31b6342f8f888..a893cfd98f65a 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -482,7 +482,7 @@ function parse_comparison(ps::ParseState, subtype_comparison=false) mark = position(ps) if subtype_comparison && is_reserved_word(peek(ps)) # Recovery - # struct try end ==> (struct false (error try) (block)) + # struct try end ==> (struct false (error (try)) (block)) name = untokenize(peek(ps)) bump(ps) emit(ps, mark, K"error", error="Invalid type name `$name`") @@ -1007,10 +1007,13 @@ function parse_decl_with_initial_ex(ps::ParseState, mark) emit(ps, mark, K"::") end if peek(ps) == K"->" + # x -> y ==> (-> x (block y)) + # a::b->c ==> (-> (:: a b) (block c)) # -> is unusual: it binds tightly on the left and loosely on the right. - # a::b->c ==> (-> (:: a b) c) bump(ps, TRIVIA_FLAG) + m = position(ps) parse_eq_star(ps) + emit(ps, m, K"block") emit(ps, mark, K"->") end end @@ -1513,7 +1516,9 @@ function parse_resword(ps::ParseState) end emit(ps, mark, K"return") elseif word in (K"break", K"continue") - bump(ps, TRIVIA_FLAG) + # break ==> (break) + # continue ==> (continue) + bump(ps) k = peek(ps) if !(k in (K"NewlineWs", K";", K")", K":", K"EndMarker") || (k == K"end" && !ps.end_symbol)) recover(is_closer_or_newline, ps, TRIVIA_FLAG, @@ -1699,8 +1704,8 @@ function parse_function(ps::ParseState) else if is_keyword(k) # Forbid things like - # function begin() end ==> (function (call (error begin)) (block)) - # macro begin() end ==> (macro (call (error begin)) (block)) + # function begin() end ==> (function (call (error (begin))) (block)) + # macro begin() end ==> (macro (call (error (begin))) (block)) bump(ps, error="invalid $(untokenize(word)) name") parse_call_chain(ps, def_mark) else @@ -2678,7 +2683,7 @@ function parse_atom(ps::ParseState, check_identifiers=true) # Being inside quote makes keywords into identifiers at at the # first level of nesting # :end ==> (quote end) - # :(end) ==> (quote (error end)) + # :(end) ==> (quote (error (end))) # Being inside quote makes end non-special again (issue #27690) # a[:(end)] ==> (ref a (quote (error-t end))) parse_atom(ParseState(ps, end_symbol=false), false) diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index 8f0fe1373b598..4ddac843a84b6 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -112,8 +112,14 @@ end Base.show(io::IO, ::ErrorVal) = printstyled(io, "✘", color=:light_red) +function unescape_julia_string(str, triplequote=false) + # FIXME: do this properly + str = replace(str, "\\\$" => '$') + unescape_string(str) +end + function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, position::Integer=1) - if !haschildren(raw) && !is_syntax_kind(raw) + if !haschildren(raw) && !(is_syntax_kind(raw) || is_keyword(raw)) # Leaf node k = kind(raw) val_range = position:position + span(raw) - 1 @@ -132,7 +138,7 @@ function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, position::In false elseif k == K"Char" # FIXME: Escape sequences... - unescape_string(val_str)[2] + unescape_julia_string(val_str)[2] elseif k == K"Identifier" Symbol(val_str) elseif k == K"VarIdentifier" @@ -141,7 +147,7 @@ function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, position::In # This should only happen for tokens nested inside errors Symbol(val_str) elseif k == K"String" - unescape_string(val_str) + unescape_julia_string(val_str) elseif k == K"UnquotedString" String(val_str) elseif is_operator(k) diff --git a/JuliaSyntax/src/tokens.jl b/JuliaSyntax/src/tokens.jl index 3b9b4a29bbadf..4baa6a886abfd 100644 --- a/JuliaSyntax/src/tokens.jl +++ b/JuliaSyntax/src/tokens.jl @@ -29,6 +29,10 @@ is_literal(k::Kind) = TzTokens.isliteral(k) is_keyword(k::Kind) = TzTokens.iskeyword(k) is_operator(k::Kind) = TzTokens.isoperator(k) +is_literal(k) = is_literal(kind(k)) +is_keyword(k) = is_keyword(kind(k)) +is_operator(k) = is_operator(kind(k)) + # Predicates for operator precedence is_prec_assignment(t) = K"BEGIN_ASSIGNMENTS" < kind(t) < K"END_ASSIGNMENTS" is_prec_pair(t) = K"BEGIN_PAIRARROW" < kind(t) < K"END_PAIRARROW" diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 8bec4df2c3016..634b15bf4e159 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -184,8 +184,8 @@ tests = [ ], JuliaSyntax.parse_decl => [ "a::b" => "(:: a b)" - "a->b" => "(-> a b)" - "a::b->c" => "(-> (:: a b) c)" + "a->b" => "(-> a (block b))" + "a::b->c" => "(-> (:: a b) (block c))" ], JuliaSyntax.parse_unary_prefix => [ "&)" => "&" @@ -300,16 +300,19 @@ tests = [ "struct A <: B \n a::X \n end" => "(struct false (<: A B) (block (:: a X)))" "mutable struct A end" => "(struct true A (block))" "struct A end" => "(struct false A (block))" - "struct try end" => "(struct false (error try) (block))" + "struct try end" => "(struct false (error (try)) (block))" # return "return\nx" => "(return nothing)" "return)" => "(return nothing)" "return x" => "(return x)" "return x,y" => "(return (tuple x y))" + # break/continue + "break" => "(break)" + "continue" => "(continue)" # module/baremodule "module A end" => "(module true A (block))" "baremodule A end" => "(module false A (block))" - "module do \n end" => "(module true (error do) (block))" + "module do \n end" => "(module true (error (do)) (block))" "module \$A end" => "(module true (\$ A) (block))" "module A \n a \n b \n end" => "(module true A (block a b))" # export @@ -351,8 +354,8 @@ tests = [ "function (x,y) end" => "(function (tuple x y) (block))" "function (x=1) end" => "(function (tuple (kw x 1)) (block))" "function (;x=1) end" => "(function (tuple (parameters (kw x 1))) (block))" - "function begin() end" => "(function (call (error begin)) (block))" - "macro begin() end" => "(macro (call (error begin)) (block))" + "function begin() end" => "(function (call (error (begin))) (block))" + "macro begin() end" => "(macro (call (error (begin))) (block))" "function f() end" => "(function (call f) (block))" "function \n f() end" => "(function (call f) (block))" "function \$f() end" => "(function (call (\$ f)) (block))" @@ -450,7 +453,7 @@ tests = [ ": end" => ":" # Special symbols quoted ":end" => "(quote end)" - ":(end)" => "(quote (error end))" + ":(end)" => "(quote (error (end)))" ":<:" => "(quote <:)" # Macro names can be keywords "@end x" => """(macrocall @end x)""" From 64a7d0058c4001320fc367449bf9b535e9060925 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Tue, 4 Jan 2022 14:36:34 +1000 Subject: [PATCH 0281/1109] =?UTF-8?q?Various=20fixes;=20JuliaSyntax=20now?= =?UTF-8?q?=20parses=20its=20own=20source=20code=20=F0=9F=8E=89?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Fix parsing of where with curlies; add test cases. * Don't emit blocks for right hand side of short form functions or anonymous functions as these are unnecessary. Instead, leave these to the Expr conversion stage where line numbers need to be added. * Very rough triple quoted string handling and hexint parsing --- JuliaSyntax/src/parse_stream.jl | 10 ++-- JuliaSyntax/src/parser.jl | 19 +++--- JuliaSyntax/src/syntax_tree.jl | 102 +++++++++++++++++++++++++++----- JuliaSyntax/test/parser.jl | 22 +++++-- 4 files changed, 121 insertions(+), 32 deletions(-) diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index 3fab4528c7bea..e90935f70b09c 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -307,10 +307,12 @@ function _bump_n(stream::ParseStream, n::Integer, flags, remap_kind=K"Nothing") end is_trivia = k ∈ (K"Whitespace", K"Comment", K"NewlineWs") f = is_trivia ? TRIVIA_FLAG : flags + tok.raw.dotop && (f |= DOTOP_FLAG) + tok.raw.triplestr && (f |= TRIPLE_STRING_FLAG) k = (is_trivia || remap_kind == K"Nothing") ? k : remap_kind - span = TaggedRange(SyntaxHead(k, f), first_byte(tok), - last_byte(tok), lastindex(stream.ranges)+1) - push!(stream.ranges, span) + range = TaggedRange(SyntaxHead(k, f), first_byte(tok), + last_byte(tok), lastindex(stream.ranges)+1) + push!(stream.ranges, range) end Base._deletebeg!(stream.lookahead, n) stream.next_byte = last_byte(last(stream.ranges)) + 1 @@ -600,7 +602,7 @@ end # Normal context function ParseState(stream::ParseStream; julia_version=VERSION) - ParseState(stream, julia_version, true, false, true, false, false, false) + ParseState(stream, julia_version, true, false, false, false, false, true) end function ParseState(ps::ParseState; range_colon_enabled=nothing, diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index a893cfd98f65a..ab9ccc8bdec55 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -725,16 +725,20 @@ function parse_where_chain(ps0::ParseState, mark) bump(ps, TRIVIA_FLAG) # where k = peek(ps) if k == K"{" + m = position(ps) + bump(ps, TRIVIA_FLAG) # x where {T,S} ==> (where x T S) ckind, cflags = parse_cat(ps, K"}", ps.end_symbol) if ckind != K"vect" # Various nonsensical forms permitted here # x where {T S} ==> (where x (bracescat (row T S))) # x where {y for y in ys} ==> (where x (braces (generator y (= y ys)))) - emit_braces(ps, mark, ckind, cflags) + emit_braces(ps, m, ckind, cflags) end emit(ps, mark, K"where") else + # x where T ==> (where x T) + # x where T<:S ==> (where x (<: T S)) parse_comparison(ps) emit(ps, mark, K"where") end @@ -1007,13 +1011,11 @@ function parse_decl_with_initial_ex(ps::ParseState, mark) emit(ps, mark, K"::") end if peek(ps) == K"->" - # x -> y ==> (-> x (block y)) - # a::b->c ==> (-> (:: a b) (block c)) - # -> is unusual: it binds tightly on the left and loosely on the right. + # x -> y ==> (-> x y) + # a::b->c ==> (-> (:: a b) c) bump(ps, TRIVIA_FLAG) - m = position(ps) + # -> is unusual: it binds tightly on the left and loosely on the right. parse_eq_star(ps) - emit(ps, m, K"block") emit(ps, mark, K"->") end end @@ -2343,6 +2345,7 @@ function parse_array_separator(ps) end # Parse array concatenation/construction/indexing syntax inside of `[]` or `{}`. +# The opening bracket has been consumed. # # flisp: parse-cat function parse_cat(ps::ParseState, closer, end_is_symbol) @@ -2752,10 +2755,10 @@ end function emit_braces(ps, mark, ckind, cflags) if ckind == K"hcat" # {x y} ==> (bracescat (row x y)) - emit(ps, K"row", mark, cflags) + emit(ps, mark, K"row", cflags) elseif ckind == K"ncat" # {x ;;; y} ==> (bracescat (nrow-3 x y)) - emit(ps, K"nrow", mark, cflags) + emit(ps, mark, K"nrow", cflags) end outk = ckind in (K"vect", K"comprehension") ? K"braces" : K"bracescat" emit(ps, mark, outk) diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index 4ddac843a84b6..b253a0b9bd3e6 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -12,8 +12,10 @@ const TRIVIA_FLAG = RawFlags(1<<0) const INFIX_FLAG = RawFlags(1<<1) # Record whether syntactic operators were dotted const DOTOP_FLAG = RawFlags(1<<2) +# Set when kind == K"String" was triple-delimited as with """ or ``` +const TRIPLE_STRING_FLAG = RawFlags(1<<3) # try-finally-catch -const TRY_CATCH_AFTER_FINALLY_FLAG = RawFlags(1<<3) +const TRY_CATCH_AFTER_FINALLY_FLAG = RawFlags(1<<4) # Flags holding the dimension of an nrow or other UInt8 not held in the source const NUMERIC_FLAGS = RawFlags(RawFlags(0xff)<<8) # Todo ERROR_FLAG = 0x80000000 ? @@ -115,7 +117,52 @@ Base.show(io::IO, ::ErrorVal) = printstyled(io, "✘", color=:light_red) function unescape_julia_string(str, triplequote=false) # FIXME: do this properly str = replace(str, "\\\$" => '$') - unescape_string(str) + str = unescape_string(str) + if triplequote + # FIXME: All sorts of rules need to be added here, and the separate + # fragments need to be aware of each other. + if startswith(str, '\n') + str = str[2:end] + end + lines = split(str, '\n') + indent = typemax(Int) + for line in lines + if isempty(line) + continue + end + j = findfirst(!=(' '), line) + indent = min(indent, j == nothing ? length(line) : j-1) + end + if isempty(last(lines)) + indent = 0 + end + str = join((l[indent+1:end] for l in lines), '\n') + end + str +end + +function julia_string_to_number(T, str, kind) + # FIXME: do this properly! + if kind == K"Integer" + str = replace(str, '_'=>"") + end + x = Base.parse(T, str) + if kind == K"HexInt" + if length(str) <= 4 + x = UInt8(x) + elseif length(str) <= 6 + x = UInt16(x) + elseif length(str) <= 10 + x = UInt32(x) + elseif length(str) <= 18 + x = UInt64(x) + elseif length(str) <= 34 + x = UInt128(x) + else + TODO("BigInt") + end + end + x end function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, position::Integer=1) @@ -126,12 +173,11 @@ function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, position::In val_str = source[val_range] # Here we parse the values eagerly rather than representing them as # strings. Maybe this is good. Maybe not. - val = if k == K"Integer" - # FIXME: this doesn't work with _'s as in 1_000_000 - Base.parse(Int, val_str) + val = if k in (K"Integer", K"BinInt", K"OctInt", K"HexInt") + julia_string_to_number(Int, val_str, k) elseif k == K"Float" # FIXME: Other float types! - Base.parse(Float64, val_str) + julia_string_to_number(Float64, val_str, k) elseif k == K"true" true elseif k == K"false" @@ -147,7 +193,7 @@ function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, position::In # This should only happen for tokens nested inside errors Symbol(val_str) elseif k == K"String" - unescape_julia_string(val_str) + unescape_julia_string(val_str, has_flags(head(raw), TRIPLE_STRING_FLAG)) elseif k == K"UnquotedString" String(val_str) elseif is_operator(k) @@ -354,6 +400,11 @@ end #------------------------------------------------------------------------------- # Conversion to Base.Expr +function is_eventually_call(ex) + return Meta.isexpr(ex, :call) || (Meta.isexpr(ex, (:where, :(::))) && + is_eventually_call(ex.args[1])) +end + function _to_expr(node::SyntaxNode) if !haschildren(node) return node.val @@ -366,10 +417,10 @@ function _to_expr(node::SyntaxNode) if is_infix(node.raw) args[2], args[1] = args[1], args[2] end + loc = source_location(LineNumberNode, node.source, node.position) # Convert elements if head(node) == :macrocall - line_node = source_location(LineNumberNode, node.source, node.position) - insert!(args, 2, line_node) + insert!(args, 2, loc) elseif head(node) in (:call, :ref) # Move parameters block to args[2] if length(args) > 1 && Meta.isexpr(args[end], :parameters) @@ -426,6 +477,22 @@ function _to_expr(node::SyntaxNode) pushfirst!(args, numeric_flags(flags(node))) elseif head(node) == :typed_ncat insert!(args, 2, numeric_flags(flags(node))) + elseif head(node) == :(=) + if is_eventually_call(args[1]) + if Meta.isexpr(args[2], :block) + pushfirst!(args[2].args, loc) + else + # Add block for short form function locations + args[2] = Expr(:block, loc, args[2]) + end + end + elseif head(node) == :(->) + if Meta.isexpr(args[2], :block) + pushfirst!(args[2].args, loc) + else + # Add block for source locations + args[2] = Expr(:block, loc, args[2]) + end end if head(node) == :inert || (head(node) == :quote && length(args) == 1 && !(only(args) isa Expr)) @@ -464,16 +531,23 @@ function parse_all(::Type{Expr}, code::AbstractString; filename="none") # convert to Julia expr ex = Expr(tree) - flisp_ex = flisp_parse_all(code) - if ex != flisp_ex && !(!isempty(flisp_ex.args) && + # TODO: Don't remove line nums; try to get them consistent with Base. + flisp_ex = remove_linenums!(flisp_parse_all(code)) + if remove_linenums!(deepcopy(ex)) != flisp_ex && !(!isempty(flisp_ex.args) && Meta.isexpr(flisp_ex.args[end], :error)) @error "Mismatch with Meta.parse()" ex flisp_ex end ex end +function remove_linenums!(ex) + ex = Base.remove_linenums!(ex) + if Meta.isexpr(ex, :toplevel) + filter!(x->!(x isa LineNumberNode), ex.args) + end + ex +end + function flisp_parse_all(code) - flisp_ex = Base.remove_linenums!(Meta.parseall(code)) - filter!(x->!(x isa LineNumberNode), flisp_ex.args) - flisp_ex + Meta.parseall(code) end diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 634b15bf4e159..ae0b799909aa1 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -31,8 +31,8 @@ function itest_parse(production, code, julia_version::VersionNumber=v"1.6") println(stdout, "\n\n# Julia Expr:") show(stdout, MIME"text/plain"(), ex) - f_ex = Base.remove_linenums!(Meta.parse(code, raise=false)) - if ex != f_ex + f_ex = JuliaSyntax.remove_linenums!(Meta.parse(code, raise=false)) + if JuliaSyntax.remove_linenums!(ex) != f_ex println(stdout, "\n\n# AST dump") dump(ex) @@ -183,9 +183,16 @@ tests = [ "+x" => "(call + x)" ], JuliaSyntax.parse_decl => [ - "a::b" => "(:: a b)" - "a->b" => "(-> a (block b))" - "a::b->c" => "(-> (:: a b) (block c))" + "a::b" => "(:: a b)" + "a->b" => "(-> a b)" + "a::b->c" => "(-> (:: a b) c)" + ], + JuliaSyntax.parse_unary_subtype => [ # Really for parse_where + "x where {T,S}" => "(where x T S)" + "x where {T S}" => "(where x (bracescat (row T S)))" + "x where {y for y in ys}" => "(where x (braces (generator y (= y ys))))" + "x where T" => "(where x T)" + "x where T<:S" => "(where x (<: T S))" ], JuliaSyntax.parse_unary_prefix => [ "&)" => "&" @@ -479,6 +486,9 @@ tests = [ # parse_paren ":(=)" => "(quote =)" ":(::)" => "(quote ::)" + # braces + "{x y}" => "(bracescat (row x y))" + "{x ;;; y}" => "(bracescat (nrow-3 x y))" # Errors ": foo" => "(quote (error-t) foo)" ], @@ -555,5 +565,5 @@ end end """ ex = JuliaSyntax.parse_all(Expr, code) - @test ex == JuliaSyntax.flisp_parse_all(code) + @test ex == JuliaSyntax.remove_linenums!(JuliaSyntax.flisp_parse_all(code)) end From ff3d67796ea50d590cd9bebe7d0ff9b5123c941b Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Tue, 4 Jan 2022 15:01:18 +1000 Subject: [PATCH 0282/1109] Fix project/manifest --- JuliaSyntax/Manifest.toml | 8 -------- JuliaSyntax/Project.toml | 3 ++- JuliaSyntax/README.md | 3 +++ JuliaSyntax/src/syntax_tree.jl | 12 ++++++++++-- 4 files changed, 15 insertions(+), 11 deletions(-) delete mode 100644 JuliaSyntax/Manifest.toml diff --git a/JuliaSyntax/Manifest.toml b/JuliaSyntax/Manifest.toml deleted file mode 100644 index decda6f1b942d..0000000000000 --- a/JuliaSyntax/Manifest.toml +++ /dev/null @@ -1,8 +0,0 @@ -# This file is machine-generated - editing it directly is not advised - -[[Tokenize]] -git-tree-sha1 = "433847efd9ac3d09deef4b136f0c435f22b967d9" -repo-rev = "cjf/julia-syntax-hacks" -repo-url = "https://github.com/c42f/Tokenize.jl.git" -uuid = "0796e94c-ce3b-5d07-9a54-7f471281c624" -version = "0.5.21" diff --git a/JuliaSyntax/Project.toml b/JuliaSyntax/Project.toml index 51b734c9e130e..1232839630e8d 100644 --- a/JuliaSyntax/Project.toml +++ b/JuliaSyntax/Project.toml @@ -7,7 +7,8 @@ version = "0.1.0" julia = "1.4" [extras] +Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["Test"] +test = ["Test", "Printf"] diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index 84a1737e0508b..b9aca9c519a24 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -394,6 +394,9 @@ Some resources: - [From Bob Nystrom (munificent - one of the Dart devs, etc](http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/) - Some discussion of error recovery +* Some notes about stateful lexers for parsing shell-like string interpolations: + http://www.oilshell.org/blog/2017/12/17.html + # Parser devdocs # Differences from the flisp parser diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index b253a0b9bd3e6..9a9c04c9391ac 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -165,6 +165,15 @@ function julia_string_to_number(T, str, kind) x end +function unescape_julia_char(str) + # FIXME: Do this properly! + if str == "\\'" + '\'' + else + unescape_julia_string(str)[1] + end +end + function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, position::Integer=1) if !haschildren(raw) && !(is_syntax_kind(raw) || is_keyword(raw)) # Leaf node @@ -183,8 +192,7 @@ function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, position::In elseif k == K"false" false elseif k == K"Char" - # FIXME: Escape sequences... - unescape_julia_string(val_str)[2] + unescape_julia_char(val_str[2:end-1]) elseif k == K"Identifier" Symbol(val_str) elseif k == K"VarIdentifier" From cc43c3f54c10dc0f9cc15724cff723ddb4d93553 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Tue, 4 Jan 2022 15:18:00 +1000 Subject: [PATCH 0283/1109] Make tests pass on Julia 1.4 --- JuliaSyntax/src/hooks.jl | 2 +- JuliaSyntax/src/parse_stream.jl | 4 ++-- JuliaSyntax/src/source_files.jl | 2 +- JuliaSyntax/src/syntax_tree.jl | 8 ++++++-- JuliaSyntax/test/parser.jl | 4 ++-- 5 files changed, 12 insertions(+), 8 deletions(-) diff --git a/JuliaSyntax/src/hooks.jl b/JuliaSyntax/src/hooks.jl index 6451c269e0579..2db2bfb750c01 100644 --- a/JuliaSyntax/src/hooks.jl +++ b/JuliaSyntax/src/hooks.jl @@ -36,7 +36,7 @@ function core_parser_hook(code, filename, offset, options) ex = Expr(:error, ParseError(code, stream)) else green_tree = build_tree(GreenNode, stream) - src = SourceFile(code; filename) + src = SourceFile(code; filename=filename) tree = SyntaxNode(src, green_tree) ex = Expr(tree) end diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index e90935f70b09c..2365b367d1e91 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -221,7 +221,7 @@ single newline is returned as kind `K"NewlineWs"` unless `skip_newlines` is true. """ function peek(stream::ParseStream, n::Integer=1; skip_newlines::Bool=false) - kind(peek_token(stream, n; skip_newlines)) + kind(peek_token(stream, n; skip_newlines=skip_newlines)) end """ @@ -521,7 +521,7 @@ traverse the list of ranges backward rather than forward.) """ function build_tree(::Type{NodeType}, stream::ParseStream; wrap_toplevel_as_kind=nothing) where NodeType - stack = Vector{@NamedTuple{range::TaggedRange, node::NodeType}}() + stack = Vector{NamedTuple{(:range,:node),Tuple{TaggedRange,NodeType}}}() for (span_index, range) in enumerate(stream.ranges) if kind(range) == K"TOMBSTONE" # Ignore invisible tokens which were created but never finalized. diff --git a/JuliaSyntax/src/source_files.jl b/JuliaSyntax/src/source_files.jl index 71fc4bb6a2721..3aa16eee10936 100644 --- a/JuliaSyntax/src/source_files.jl +++ b/JuliaSyntax/src/source_files.jl @@ -74,7 +74,7 @@ function Base.getindex(source::SourceFile, rng::AbstractRange) # out of the valid unicode check. The SubString{String} inner constructor # has some @boundscheck, but using @inbounds depends on inlining choices.) j = prevind(source.code, last(rng)+1) - @view source.code[i:j] + VERSION >= v"1.6" ? @view(source.code[i:j]) : source.code[i:j] end function Base.getindex(source::SourceFile, i::Int) diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index 9a9c04c9391ac..f1bce1676b965 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -400,7 +400,7 @@ function highlight(code::String, node, path::Int...; color=(40,40,70)) node, p, span = child_position_span(node, path...) q = p + span print(stdout, code[1:p-1]) - _printstyled(stdout, code[p:q-1]; color) + _printstyled(stdout, code[p:q-1]; color=color) print(stdout, code[q:end]) end @@ -557,5 +557,9 @@ function remove_linenums!(ex) end function flisp_parse_all(code) - Meta.parseall(code) + if VERSION >= v"1.6" + Meta.parseall(code) + else + Base.parse_input_line(code) + end end diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index ae0b799909aa1..b00df3b8fdaaa 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -14,7 +14,7 @@ end # Version of test_parse for interactive exploration function itest_parse(production, code, julia_version::VersionNumber=v"1.6") stream = ParseStream(code) - production(JuliaSyntax.ParseState(stream; julia_version)) + production(JuliaSyntax.ParseState(stream; julia_version=julia_version)) t = JuliaSyntax.build_tree(GreenNode, stream, wrap_toplevel_as_kind=K"toplevel") println(stdout, "# Code:\n$code\n") @@ -538,7 +538,7 @@ tests = [ if !(input isa AbstractString) opts,input = input else - opts = (;) + opts = NamedTuple() end @test test_parse(production, input; opts...) == output end From b5e493ec349a55fc39f174db835870dc8e324091 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Tue, 4 Jan 2022 23:26:18 +1000 Subject: [PATCH 0284/1109] KSet macro - lighter syntax for sets of kinds This macro makes sets of kinds easier to read. We could also consider optimizing the set membership queries using numerical values of the included kinds, if necessary. --- JuliaSyntax/src/parser.jl | 134 ++++++++++++++++++-------------------- JuliaSyntax/src/tokens.jl | 16 +++++ 2 files changed, 80 insertions(+), 70 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index ab9ccc8bdec55..68681aa6009b9 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -17,7 +17,7 @@ function bump_closing_token(ps, closing_kind) mark = position(ps) while true k = peek(ps) - if is_closing_token(ps, k) && !(k in (K",", K";")) + if is_closing_token(ps, k) && !(k in KSet`, ;`) break end bump(ps) @@ -66,9 +66,8 @@ end function is_closing_token(ps::ParseState, k) k = kind(k) - return k in (K"else", K"elseif", K"catch", K"finally", - K",", K")", K"]", K"}", K";", - K"EndMarker") || (k == K"end" && !ps.end_symbol) + return k in KSet`else elseif catch finally , ) ] } ; EndMarker` || + (k == K"end" && !ps.end_symbol) end function is_closer_or_newline(ps::ParseState, k) @@ -77,22 +76,20 @@ end # Closing token which isn't a keyword function is_non_keyword_closer(k) - kind(k) in (K",", K")", K"]", K"}", K";", K"EndMarker") + kind(k) in KSet`, ) ] } ; EndMarker` end function is_initial_reserved_word(ps::ParseState, k) k = kind(k) - is_iresword = k in ( - K"begin", K"while", K"if", K"for", K"try", K"return", K"break", - K"continue", K"function", K"macro", K"quote", K"let", K"local", - K"global", K"const", K"do", K"struct", K"module", K"baremodule", - K"using", K"import", K"export") + is_iresword = k in KSet`begin while if for try return break continue function + macro quote let local global const do struct module + baremodule using import export` # `begin` means firstindex(a) inside a[...] return is_iresword && !(k == K"begin" && ps.end_symbol) end function is_contextural_keyword(k) - kind(k) ∈ (K"mutable", K"primitive", K"abstract") + kind(k) ∈ KSet`mutable primitive abstract` end function is_reserved_word(k) @@ -117,46 +114,44 @@ function peek_initial_reserved_words(ps::ParseState) end function is_block_form(k) - kind(k) in (K"block", K"quote", K"if", K"for", K"while", - K"let", K"function", K"macro", K"abstract", - K"primitive", K"struct", K"try", K"module") + kind(k) in KSet`block quote if for while let function macro + abstract primitive struct try module` end function is_syntactic_operator(k) k = kind(k) - return k in (K"&&", K"||", K".", K"...", K"->") || - (is_prec_assignment(k) && k != K"~") + return k in KSet`&& || . ... ->` || (is_prec_assignment(k) && k != K"~") end function is_syntactic_unary_op(k) - kind(k) in (K"$", K"&", K"::") + kind(k) in KSet`$ & ::` end function is_type_operator(k) - kind(k) in (K"<:", K">:") + kind(k) in KSet`<: >:` end function is_unary_op(k) - kind(k) in ( - K"<:", K">:", # TODO: dotop disallowed ? - K"+", K"-", K"!", K"~", K"¬", K"√", K"∛", K"∜", K"⋆", K"±", K"∓" # dotop allowed - ) + k = kind(k) + k in KSet`<: >:` || # TODO: dotop disallowed ? + k in KSet`+ - ! ~ ¬ √ ∛ ∜ ⋆ ± ∓` # dotop allowed end # Operators which are both unary and binary function is_both_unary_and_binary(k) + k = kind(k) # TODO: Do we need to check dotop as well here? - kind(k) in (K"$", K"&", K"~", # <- dotop disallowed? - K"+", K"-", K"⋆", K"±", K"∓") # dotop allowed + k in KSet`$ & ~` || # dotop disallowed? + k in KSet`+ - ⋆ ± ∓` # dotop allowed end # operators handled by parse_unary at the start of an expression function is_initial_operator(k) k = kind(k) # TODO(jb): `?` should probably not be listed here except for the syntax hack in osutils.jl - is_operator(k) && - !(k in (K":", K"'", K".'", K"?")) && - !is_syntactic_unary_op(k) && + is_operator(k) && + !(k in KSet`: ' .' ?`) && + !is_syntactic_unary_op(k) && !is_syntactic_operator(k) end @@ -165,7 +160,7 @@ function is_valid_identifier(k) k = kind(k) # TODO: flisp also had K"...." disallowed. But I don't know what that's # for! Tokenize doesn't have an equivalent here. - !(is_syntactic_operator(k) || k in (K"?", K".'")) + !(is_syntactic_operator(k) || k in KSet`? .'`) end #------------------------------------------------------------------------------- @@ -274,8 +269,7 @@ end # Parse a block, but leave emitting the block up to the caller. function parse_block_inner(ps::ParseState, down) - parse_Nary(ps, down, (K"NewlineWs", K";"), - (K"end", K"else", K"elseif", K"catch", K"finally")) + parse_Nary(ps, down, KSet`NewlineWs ;`, KSet`end else elseif catch finally`) end # ";" at the top level produces a sequence of top level expressions @@ -289,7 +283,7 @@ function parse_stmts(ps::ParseState) do_emit = parse_Nary(ps, parse_docstring, (K";",), (K"NewlineWs",)) # check for unparsed junk after an expression junk_mark = position(ps) - while peek(ps) ∉ (K"EndMarker", K"NewlineWs") + while peek(ps) ∉ KSet`EndMarker NewlineWs` # Error recovery bump(ps) end @@ -324,7 +318,7 @@ end function parse_eq_star(ps::ParseState, equals_is_kw=false) k = peek(ps) k2 = peek(ps,2) - if (is_literal(k) || is_identifier(k)) && k2 in (K",", K")", K"}", K"]") + if (is_literal(k) || is_identifier(k)) && k2 in KSet`, ) } ]` # optimization: skip checking the whole precedence stack if we have a # simple token followed by a common closing token bump(ps) @@ -560,7 +554,7 @@ function parse_range(ps::ParseState) break end t2 = peek_token(ps,2) - if kind(t2) in (K"<", K">") && !t2.had_whitespace + if kind(t2) in KSet`< >` && !t2.had_whitespace # Error heuristic: we found `:>` or `:<` which are invalid lookalikes # for `<:` and `>:`. Attempt to recover by treating them as a # comparison operator. @@ -622,14 +616,14 @@ end # # flisp: parse-expr function parse_expr(ps::ParseState) - parse_with_chains(ps, parse_term, is_prec_plus, (K"+", K"++")) + parse_with_chains(ps, parse_term, is_prec_plus, KSet`+ ++`) end # a * b * c ==> (call-i a * b c) # # flisp: parse-term function parse_term(ps::ParseState) - parse_with_chains(ps, parse_rational, is_prec_times, (K"*",)) + parse_with_chains(ps, parse_rational, is_prec_times, KSet`*`) end # Parse left to right, combining any of `chain_ops` into one call @@ -695,15 +689,15 @@ function parse_unary_subtype(ps::ParseState) if k == K"EndMarker" parse_atom(ps) return - elseif k in (K"<:", K">:") + elseif k in KSet`<: >:` # FIXME add test cases k2 = peek(ps, 2) - if is_closing_token(ps, k2) || k2 in (K"NewlineWs", K"=") + if is_closing_token(ps, k2) || k2 in KSet`NewlineWs =` # return operator by itself, as in (<:) bump(ps) return end - if k2 in (K"{", K"(") + if k2 in KSet`{ (` # parse <:{T}(x::T) or <:(x::T) like other unary operators parse_where(ps, parse_juxtapose) else @@ -827,11 +821,11 @@ function parse_unary(ps::ParseState) parse_factor(ps) return end - if k in (K"-", K"+") + if k in KSet`- +` t2 = peek_token(ps, 2) - if !t2.had_whitespace && kind(t2) in (K"Integer", K"Float") + if !t2.had_whitespace && kind(t2) in KSet`Integer Float` k3 = peek(ps, 3) - if is_prec_power(k3) || k3 in (K"[", K"{") + if is_prec_power(k3) || k3 in KSet`[ {` # `[`, `{` (issue #18851) and `^` have higher precedence than # unary negation # -2^x ==> (call - (call-i 2 ^ x)) @@ -867,7 +861,7 @@ function parse_unary_call(ps::ParseState) op_tok_flags = is_type_operator(op_t) ? TRIVIA_FLAG : EMPTY_FLAGS t2 = peek_token(ps, 2) k2 = kind(t2) - if is_closing_token(ps, k2) || k2 in (K"NewlineWs", K"=") + if is_closing_token(ps, k2) || k2 in KSet`NewlineWs =` if is_dotted(op_t) # standalone dotted operators are parsed as (|.| op) # .+ ==> (. +) @@ -1048,13 +1042,13 @@ function parse_unary_prefix(ps::ParseState) k = peek(ps) if is_syntactic_unary_op(k) k2 = peek(ps, 2) - if k in (K"&", K"$") && (is_closing_token(ps, k2) || k2 == K"NewlineWs") + if k in KSet`& $` && (is_closing_token(ps, k2) || k2 == K"NewlineWs") # &) ==> & # $\n ==> $ bump(ps) else bump(ps, TRIVIA_FLAG) - if k in (K"&", K"::") + if k in KSet`& ::` # &a ==> (& a) parse_where(ps, parse_call) else @@ -1130,7 +1124,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) t = peek_token(ps) k = kind(t) if (ps.space_sensitive && t.had_whitespace && - k in (K"(", K"[", K"{", K"\\", K"'", K"Char", K"\"", K"\"\"\"", K"`", K"```")) + k in KSet`( [ { \ ' Char " """ \` \`\`\``) # [f (x)] ==> (hcat f x) break end @@ -1337,7 +1331,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) # S{a,b} ==> (curly S a b) emit(ps, mark, K"curly") end - elseif k in (K"\"", K"\"\"\"", K"`", K"```") && + elseif k in KSet` " """ \` \`\`\` ` && !t.had_whitespace && is_valid_modref # Custom string and command literals # x"str" ==> (macrocall x_str "str") @@ -1389,7 +1383,7 @@ function parse_resword(ps::ParseState) ps = normal_context(ps) mark = position(ps) word = peek(ps) - if word in (K"begin", K"quote") + if word in KSet`begin quote` # begin end ==> (block) # begin a ; b end ==> (block a b) # begin\na\nb\nend ==> (block a b) @@ -1424,7 +1418,7 @@ function parse_resword(ps::ParseState) emit(ps, mark, K"for") elseif word == K"let" bump(ps, TRIVIA_FLAG) - if peek(ps) ∉ (K"NewlineWs", K";") + if peek(ps) ∉ KSet`NewlineWs ;` # let x=1\n end ==> (let (= x 1) (block)) m = position(ps) n_subexprs = parse_comma_separated(ps, parse_eq_star) @@ -1433,7 +1427,7 @@ function parse_resword(ps::ParseState) # let x=1 ; end ==> (let (= x 1) (block)) # let x::1 ; end ==> (let (:: x 1) (block)) # let x ; end ==> (let x (block)) - if n_subexprs > 1 || !(kb in (K"Identifier", K"=", K"::")) + if n_subexprs > 1 || !(kb in KSet`Identifier = ::`) # let x=1,y=2 ; end ==> (let (block (= x 1) (= y 2) (block))) # let x+=1 ; end ==> (let (block (+= x 1)) (block)) emit(ps, m, K"block") @@ -1445,7 +1439,7 @@ function parse_resword(ps::ParseState) bump_invisible(ps, K"block") end k = peek(ps) - if k in (K"NewlineWs", K";") + if k in KSet`NewlineWs ;` bump(ps, TRIVIA_FLAG) elseif k == K"end" # pass @@ -1459,9 +1453,9 @@ function parse_resword(ps::ParseState) emit(ps, mark, K"let") elseif word == K"if" parse_if_elseif(ps) - elseif word in (K"const", K"global", K"local") + elseif word in KSet`const global local` parse_const_local_global(ps) - elseif word in (K"function", K"macro") + elseif word in KSet`function macro` parse_function(ps) elseif word == K"abstract" # Abstract type definitions @@ -1477,7 +1471,7 @@ function parse_resword(ps::ParseState) parse_subtype_spec(ps) bump_closing_token(ps, K"end") emit(ps, mark, K"abstract") - elseif word in (K"struct", K"mutable") + elseif word in KSet`struct mutable` # struct A <: B \n a::X \n end ==> (struct false (<: A B) (block (:: a X))) if word == K"mutable" # mutable struct A end ==> (struct true A (block)) @@ -1517,16 +1511,16 @@ function parse_resword(ps::ParseState) parse_eq(ps) end emit(ps, mark, K"return") - elseif word in (K"break", K"continue") + elseif word in KSet`break continue` # break ==> (break) # continue ==> (continue) bump(ps) k = peek(ps) - if !(k in (K"NewlineWs", K";", K")", K":", K"EndMarker") || (k == K"end" && !ps.end_symbol)) + if !(k in KSet`NewlineWs ; ) : EndMarker` || (k == K"end" && !ps.end_symbol)) recover(is_closer_or_newline, ps, TRIVIA_FLAG, error="unexpected token after $(untokenize(word))") end - elseif word in (K"module", K"baremodule") + elseif word in KSet`module baremodule` # module A end ==> (module true A (block)) # baremodule A end ==> (module false A (block)) bump(ps, remap_kind= (word == K"module") ? K"true" : K"false") @@ -1547,7 +1541,7 @@ function parse_resword(ps::ParseState) bump(ps, TRIVIA_FLAG) parse_comma_separated(ps, parse_atsym) emit(ps, mark, K"export") - elseif word in (K"import", K"using") + elseif word in KSet`import using` parse_imports(ps) elseif word == K"do" bump(ps, TRIVIA_FLAG, error="invalid `do` syntax") @@ -1569,7 +1563,7 @@ function parse_if_elseif(ps, is_elseif=false, is_elseif_whitespace_err=false) bump(ps, TRIVIA_FLAG) end cond_mark = position(ps) - if peek(ps) in (K"NewlineWs", K"end") + if peek(ps) in KSet`NewlineWs end` # if end ==> (if (error) (block)) # if \n end ==> (if (error) (block)) bump_trivia(ps, error="missing condition in `$(untokenize(word))`") @@ -1616,7 +1610,7 @@ function parse_const_local_global(ps) has_const = false scope_k = K"Nothing" k = peek(ps) - if k in (K"global", K"local") + if k in KSet`global local` # global x = 1 ==> (global (= x 1)) # local x = 1 ==> (local (= x 1)) scope_k = k @@ -1633,7 +1627,7 @@ function parse_const_local_global(ps) # const x,y = 1,2 ==> (const (= (tuple x y) (tuple 1 2))) bump(ps, TRIVIA_FLAG) k = peek(ps) - if k in (K"global", K"local") + if k in KSet`global local` # const global x = 1 ==> (const (global (= x 1))) # const local x = 1 ==> (const (local (= x 1))) scope_k = k @@ -1811,7 +1805,7 @@ end function parse_catch(ps::ParseState) bump(ps, TRIVIA_FLAG) k = peek(ps) - if k in (K";", K"NewlineWs") || is_closing_token(ps, k) + if k in KSet`NewlineWs ;` || is_closing_token(ps, k) # try x catch ; y end ==> (try (block x) false (block y) false false) # try x catch \n y end ==> (try (block x) false (block y) false false) bump_invisible(ps, K"false") @@ -1827,7 +1821,7 @@ end function parse_do(ps::ParseState) ps = normal_context(ps) mark = position(ps) - if peek(ps) in (K"NewlineWs", K";") + if peek(ps) in KSet`NewlineWs ;` # f() do\nend ==> (do (call f) (-> (tuple) (block))) # f() do ; body end ==> (do (call f) (-> (tuple) (block body))) # this trivia needs to go into the tuple due to the way position() @@ -1891,7 +1885,7 @@ end function parse_imports(ps::ParseState) mark = position(ps) word = peek(ps) - @assert word in (K"import", K"using") + @assert word in KSet`import using` bump(ps, TRIVIA_FLAG) emark = position(ps) initial_as = parse_import(ps, word, false) @@ -1999,7 +1993,7 @@ function parse_import_path(ps::ParseState) bump_disallowed_space(ps) bump(ps, TRIVIA_FLAG) parse_atsym(ps) - elseif k in (K"NewlineWs", K";", K",", K":", K"EndMarker") + elseif k in KSet`NewlineWs ; , : EndMarker` # import A; B ==> (import (. A)) break elseif k == K".." @@ -2058,7 +2052,7 @@ function parse_iteration_spec(ps::ParseState) mark = position(ps) k = peek(ps) # Handle `outer` contextual keyword - is_outer_kw = k == K"outer" && !(peek_skip_newline_in_gen(ps, 2) in (K"=", K"in", K"∈")) + is_outer_kw = k == K"outer" && !(peek_skip_newline_in_gen(ps, 2) in KSet`= in ∈`) if is_outer_kw # outer i = rhs ==> (= (outer i) rhs) bump(ps, TRIVIA_FLAG) @@ -2067,13 +2061,13 @@ function parse_iteration_spec(ps::ParseState) if is_outer_kw emit(ps, mark, K"outer") end - if peek_skip_newline_in_gen(ps) in (K"=", K"in", K"∈") + if peek_skip_newline_in_gen(ps) in KSet`= in ∈` bump(ps, TRIVIA_FLAG) parse_pipe_lt(ps) else # Recovery heuristic recover(ps, error="invalid iteration spec: expected one of `=` `in` or `∈`") do ps, k - k in (K",", K"NewlineWs") || is_closing_token(ps, k) + k in KSet`, NewlineWs` || is_closing_token(ps, k) end # TODO: or try parse_pipe_lt ??? end @@ -2362,7 +2356,7 @@ function parse_cat(ps::ParseState, closer, end_is_symbol) mark = position(ps) parse_eq_star(ps) k = peek(ps, skip_newlines=true) - if k in (K",", closer) + if k == K"," || k == closer if k == K"," # [x,] ==> (vect x) bump(ps, TRIVIA_FLAG) @@ -2734,7 +2728,7 @@ function parse_atom(ps::ParseState, check_identifiers=true) bump(ps, TRIVIA_FLAG) parse_macro_name(ps) parse_call_chain(ps, mark, true) - elseif leading_kind in (K"`", K"```") + elseif leading_kind in KSet`\` \`\`\`` bump_invisible(ps, K"core_@cmd") parse_raw_string(ps) emit(ps, mark, K"macrocall") @@ -2760,7 +2754,7 @@ function emit_braces(ps, mark, ckind, cflags) # {x ;;; y} ==> (bracescat (nrow-3 x y)) emit(ps, mark, K"nrow", cflags) end - outk = ckind in (K"vect", K"comprehension") ? K"braces" : K"bracescat" + outk = ckind in KSet`vect comprehension` ? K"braces" : K"bracescat" emit(ps, mark, outk) end diff --git a/JuliaSyntax/src/tokens.jl b/JuliaSyntax/src/tokens.jl index 4baa6a886abfd..3616d76cf6147 100644 --- a/JuliaSyntax/src/tokens.jl +++ b/JuliaSyntax/src/tokens.jl @@ -21,6 +21,22 @@ macro K_str(str) end end +""" +A set of kinds which can be used with the `in` operator. For example + + k in KSet`+ - *` +""" +macro KSet_cmd(str) + kinds = [get(_str_to_kind, s) do + error("unknown token kind K$(repr(str))") + end + for s in split(str)] + + quote + ($(kinds...),) + end +end + kind(k::Kind) = k kind(raw::TzTokens.RawToken) = TzTokens.exactkind(raw) From f55304cd4cf594094f5cd38a7da46a0a2e76c3f5 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Wed, 5 Jan 2022 15:18:15 +1000 Subject: [PATCH 0285/1109] Tokenize: emit all strings as STRING or CMD Emit strings as either STRING or CMD. This distinction is required downstream from the lexer when processing the escape sequences in raw strings. (Surprisingly no other cases need this.) Remove TRIPLE_STRING and TRIPLE_CMD, as that information is now available in the delimiters which are emitted as separate tokens. Remove the triplestr flag from the token structs, as that turned out not to be very useful to the parser. --- JuliaSyntax/Tokenize/src/lexer.jl | 12 ++++++------ JuliaSyntax/Tokenize/src/token.jl | 14 ++++---------- JuliaSyntax/Tokenize/src/token_kinds.jl | 7 +------ JuliaSyntax/Tokenize/test/lexer.jl | 12 ++++++------ 4 files changed, 17 insertions(+), 28 deletions(-) diff --git a/JuliaSyntax/Tokenize/src/lexer.jl b/JuliaSyntax/Tokenize/src/lexer.jl index 9453d76ebe34b..ea3b81f6cb5c5 100644 --- a/JuliaSyntax/Tokenize/src/lexer.jl +++ b/JuliaSyntax/Tokenize/src/lexer.jl @@ -280,9 +280,9 @@ end Returns a `Token` of kind `kind` with contents `str` and starts a new `Token`. """ -function emit(l::Lexer{IO_t,Token}, kind::Kind, err::TokenError = Tokens.NO_ERR, triplestr::Bool=false) where IO_t +function emit(l::Lexer{IO_t,Token}, kind::Kind, err::TokenError = Tokens.NO_ERR) where IO_t suffix = false - if kind in (Tokens.ERROR, Tokens.STRING, Tokens.TRIPLE_STRING, Tokens.CMD, Tokens.TRIPLE_CMD) + if kind in (Tokens.ERROR, Tokens.STRING, Tokens.CMD) str = String(l.io.data[(l.token_startpos + 1):position(l)]) elseif (kind == Tokens.IDENTIFIER || kind == Tokens.VAR_IDENTIFIER || isliteral(kind) || kind == Tokens.COMMENT || kind == Tokens.WHITESPACE || kind == Tokens.NEWLINE_WS) str = String(take!(l.charstore)) @@ -298,14 +298,14 @@ function emit(l::Lexer{IO_t,Token}, kind::Kind, err::TokenError = Tokens.NO_ERR, tok = Token(kind, (l.token_start_row, l.token_start_col), (l.current_row, l.current_col - 1), startpos(l), position(l) - 1, - str, err, l.dotop, suffix, triplestr) + str, err, l.dotop, suffix) l.dotop = false l.last_token = kind readoff(l) return tok end -function emit(l::Lexer{IO_t,RawToken}, kind::Kind, err::TokenError = Tokens.NO_ERR, triplestr::Bool=false) where IO_t +function emit(l::Lexer{IO_t,RawToken}, kind::Kind, err::TokenError = Tokens.NO_ERR) where IO_t suffix = false if optakessuffix(kind) while isopsuffix(peekchar(l)) @@ -316,7 +316,7 @@ function emit(l::Lexer{IO_t,RawToken}, kind::Kind, err::TokenError = Tokens.NO_E tok = RawToken(kind, (l.token_start_row, l.token_start_col), (l.current_row, l.current_col - 1), - startpos(l), position(l) - 1, err, l.dotop, suffix, triplestr) + startpos(l), position(l) - 1, err, l.dotop, suffix) l.dotop = false l.last_token = kind @@ -509,7 +509,7 @@ function lex_string_chunk(l) end end end - return emit(l, Tokens.STRING, Tokens.NO_ERR, state.triplestr) + return emit(l, state.delim == '"' ? Tokens.STRING : Tokens.CMD) end # Lex whitespace, a whitespace char `c` has been consumed diff --git a/JuliaSyntax/Tokenize/src/token.jl b/JuliaSyntax/Tokenize/src/token.jl index 05c57f8ef828c..8d6893a0c8fb0 100644 --- a/JuliaSyntax/Tokenize/src/token.jl +++ b/JuliaSyntax/Tokenize/src/token.jl @@ -27,9 +27,7 @@ _add_kws() @enum(TokenError, NO_ERR, EOF_MULTICOMMENT, - EOF_STRING, EOF_CHAR, - EOF_CMD, EOF_VAR, INVALID_NUMERIC_CONSTANT, INVALID_OPERATOR, @@ -40,9 +38,7 @@ _add_kws() # Error kind => description TOKEN_ERROR_DESCRIPTION = Dict{TokenError, String}( EOF_MULTICOMMENT => "unterminated multi-line comment #= ... =#", - EOF_STRING => "unterminated string literal", EOF_CHAR => "unterminated character literal", - EOF_CMD => "unterminated cmd literal", EOF_VAR => "unterminated var\"...\" identifier", INVALID_NUMERIC_CONSTANT => "invalid numeric constant", INVALID_OPERATOR => "invalid operator", @@ -63,13 +59,12 @@ struct Token <: AbstractToken token_error::TokenError dotop::Bool suffix::Bool - triplestr::Bool end function Token(kind::Kind, startposition::Tuple{Int, Int}, endposition::Tuple{Int, Int}, startbyte::Int, endbyte::Int, val::String) -Token(kind, startposition, endposition, startbyte, endbyte, val, NO_ERR, false, false, false) +Token(kind, startposition, endposition, startbyte, endbyte, val, NO_ERR, false, false) end -Token() = Token(ERROR, (0,0), (0,0), 0, 0, "", UNKNOWN, false, false, false) +Token() = Token(ERROR, (0,0), (0,0), 0, 0, "", UNKNOWN, false, false) struct RawToken <: AbstractToken kind::Kind @@ -81,13 +76,12 @@ struct RawToken <: AbstractToken token_error::TokenError dotop::Bool suffix::Bool - triplestr::Bool end function RawToken(kind::Kind, startposition::Tuple{Int, Int}, endposition::Tuple{Int, Int}, startbyte::Int, endbyte::Int) -RawToken(kind, startposition, endposition, startbyte, endbyte, NO_ERR, false, false, false) +RawToken(kind, startposition, endposition, startbyte, endbyte, NO_ERR, false, false) end -RawToken() = RawToken(ERROR, (0,0), (0,0), 0, 0, UNKNOWN, false, false, false) +RawToken() = RawToken(ERROR, (0,0), (0,0), 0, 0, UNKNOWN, false, false) const _EMPTY_TOKEN = Token() diff --git a/JuliaSyntax/Tokenize/src/token_kinds.jl b/JuliaSyntax/Tokenize/src/token_kinds.jl index f68ed02547b50..30c8724a7a78d 100644 --- a/JuliaSyntax/Tokenize/src/token_kinds.jl +++ b/JuliaSyntax/Tokenize/src/token_kinds.jl @@ -64,11 +64,8 @@ OCT_INT, # 0o0 FLOAT, # 3.5, 3.7e+3 STRING, # "foo" (without the " delimiters) - # TODO: Remove this and TRIPLE_CMD; use flag? - TRIPLE_STRING, # """ foo \n """ CHAR, # 'a' - CMD, # `cmd ...` - TRIPLE_CMD, # ```cmd ...``` + CMD, # `cmd ...` (without delimiters) TRUE, FALSE, end_literal, @@ -835,8 +832,6 @@ begin_parser_tokens, TOMBSTONE, # Empty placeholder for kind to be filled later NOTHING_LITERAL, # A literal Julia `nothing` in the AST - # FIXME: Remove - UNQUOTED_STRING, # An unquoted range of the source as a string # Macro names are modelled as a special kind of identifier because the # @ may not be attached to the macro name in the source (or may not be diff --git a/JuliaSyntax/Tokenize/test/lexer.jl b/JuliaSyntax/Tokenize/test/lexer.jl index b3daef8c8f72a..c885e99feaecf 100644 --- a/JuliaSyntax/Tokenize/test/lexer.jl +++ b/JuliaSyntax/Tokenize/test/lexer.jl @@ -113,7 +113,7 @@ end # testset T.NEWLINE_WS,T.IDENTIFIER,T.OP,T.OP,T.OP,T.IDENTIFIER,T.OP,T.OP, - T.NEWLINE_WS,T.BACKTICK,T.STRING,T.BACKTICK, + T.NEWLINE_WS,T.BACKTICK,T.CMD,T.BACKTICK, T.NEWLINE_WS,T.INTEGER,T.IDENTIFIER,T.LPAREN,T.INTEGER,T.RPAREN, @@ -320,7 +320,7 @@ end ts = collect(tokenize(raw"""`x $ \ y`""")) @test ts[1] ~ (T.BACKTICK , "`" ) - @test ts[2] ~ (T.STRING , "x \$ \\ y" ) + @test ts[2] ~ (T.CMD , "x \$ \\ y" ) @test ts[3] ~ (T.BACKTICK , "`" ) @test ts[4] ~ (T.ENDMARKER , "" ) @@ -561,18 +561,18 @@ end @testset "CMDs" begin @test tok("`cmd`",1).kind == T.BACKTICK - @test tok("`cmd`",2).kind == T.STRING + @test tok("`cmd`",2).kind == T.CMD @test tok("`cmd`",3).kind == T.BACKTICK @test tok("`cmd`",4).kind == T.ENDMARKER @test tok("```cmd```", 1).kind == T.TRIPLE_BACKTICK - @test tok("```cmd```", 2).kind == T.STRING + @test tok("```cmd```", 2).kind == T.CMD @test tok("```cmd```", 3).kind == T.TRIPLE_BACKTICK @test tok("```cmd```", 4).kind == T.ENDMARKER @test tok("```cmd````cmd`", 1).kind == T.TRIPLE_BACKTICK - @test tok("```cmd````cmd`", 2).kind == T.STRING + @test tok("```cmd````cmd`", 2).kind == T.CMD @test tok("```cmd````cmd`", 3).kind == T.TRIPLE_BACKTICK @test tok("```cmd````cmd`", 4).kind == T.BACKTICK - @test tok("```cmd````cmd`", 5).kind == T.STRING + @test tok("```cmd````cmd`", 5).kind == T.CMD @test tok("```cmd````cmd`", 6).kind == T.BACKTICK @test tok("```cmd````cmd`", 7).kind == T.ENDMARKER end From 838b8cf93125e6837cf9b345fefc3b91393e9f17 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Thu, 6 Jan 2022 20:38:37 +1000 Subject: [PATCH 0286/1109] String escape processing and triple quoted dedenting * String unescaping * Raw string unescaping * De-indentation of triple quoted strings --- JuliaSyntax/README.md | 3 + JuliaSyntax/src/JuliaSyntax.jl | 1 + JuliaSyntax/src/parse_stream.jl | 15 +- JuliaSyntax/src/parser.jl | 61 ++++--- JuliaSyntax/src/syntax_tree.jl | 113 +++++-------- JuliaSyntax/src/token_kinds.jl | 2 +- JuliaSyntax/src/value_parsing.jl | 268 ++++++++++++++++++++++++++++++ JuliaSyntax/test/parser.jl | 3 +- JuliaSyntax/test/runtests.jl | 1 + JuliaSyntax/test/value_parsing.jl | 124 ++++++++++++++ 10 files changed, 491 insertions(+), 100 deletions(-) create mode 100644 JuliaSyntax/src/value_parsing.jl create mode 100644 JuliaSyntax/test/value_parsing.jl diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index b9aca9c519a24..017bd7a6229f4 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -461,6 +461,9 @@ Here's some behaviors which seem to be bugs: always executes afterward. (Presumably was this a mistake? It seems pretty awful!) * When parsing `"[x \n\n ]"` the flisp parser gets confused, but `"[x \n ]"` is parsed as `Expr(:vect)` +* `f(x for x in in xs)` is accepted, and parsed very strangely. +* Octal escape sequences saturate rather than being reported as errors. Eg, + `"\777"` results in `"\xff"`. ## Parsing / AST oddities and warts diff --git a/JuliaSyntax/src/JuliaSyntax.jl b/JuliaSyntax/src/JuliaSyntax.jl index ff9829a951a18..b8866ce430152 100644 --- a/JuliaSyntax/src/JuliaSyntax.jl +++ b/JuliaSyntax/src/JuliaSyntax.jl @@ -18,6 +18,7 @@ include("syntax_tree.jl") include("parse_stream.jl") include("parser.jl") +include("value_parsing.jl") include("hooks.jl") diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index 2365b367d1e91..41e062d1d2bd5 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -271,17 +271,21 @@ Retroactively inspecting/modifying the parser's output can be confusing, so using this function should be avoided where possible. """ function peek_behind(stream::ParseStream; skip_trivia::Bool=true) + kind(peek_token_behind(stream; skip_trivia=skip_trivia)) +end + +function peek_token_behind(stream::ParseStream; skip_trivia::Bool=true) if skip_trivia for i = length(stream.ranges):-1:1 s = stream.ranges[i] if !is_trivia(head(s)) - return kind(s) + return head(s) end end elseif !isempty(stream.ranges) - return kind(last(stream.ranges)) + return head(last(stream.ranges)) end - return K"Nothing" + return SyntaxHead(K"Nothing", EMPTY_FLAGS) end function peek_behind(stream::ParseStream, pos::ParseStreamPosition) @@ -308,7 +312,6 @@ function _bump_n(stream::ParseStream, n::Integer, flags, remap_kind=K"Nothing") is_trivia = k ∈ (K"Whitespace", K"Comment", K"NewlineWs") f = is_trivia ? TRIVIA_FLAG : flags tok.raw.dotop && (f |= DOTOP_FLAG) - tok.raw.triplestr && (f |= TRIPLE_STRING_FLAG) k = (is_trivia || remap_kind == K"Nothing") ? k : remap_kind range = TaggedRange(SyntaxHead(k, f), first_byte(tok), last_byte(tok), lastindex(stream.ranges)+1) @@ -656,6 +659,10 @@ function peek_behind(ps::ParseState, args...) peek_behind(ps.stream, args...) end +function peek_token_behind(ps::ParseState, args...; kws...) + peek_token_behind(ps.stream, args...; kws...) +end + function bump(ps::ParseState, flags=EMPTY_FLAGS; skip_newlines=nothing, kws...) skip_nl = isnothing(skip_newlines) ? ps.whitespace_newline : skip_newlines bump(ps.stream, flags; skip_newlines=skip_nl, kws...) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 68681aa6009b9..94fbc22030fe3 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -59,6 +59,10 @@ function TODO(str) error("TODO: $str") end +@noinline function internal_error(strs...) + error("Internal error: ", strs...) +end + #------------------------------------------------------------------------------- # Parsing-specific predicates on tokens/kinds # @@ -1200,7 +1204,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) ckind == K"vcat" ? K"typed_vcat" : ckind == K"comprehension" ? K"typed_comprehension" : ckind == K"ncat" ? K"typed_ncat" : - error("Unrecognized kind in parse_cat") + internal_error("unrecognized kind in parse_cat", ckind) emit(ps, mark, outk, cflags) if is_macrocall emit(ps, mark, K"macrocall") @@ -1350,8 +1354,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) # x"s"end ==> (macrocall x_str "s" "end") # x"s"2 ==> (macrocall x_str "s" 2) # x"s"10.0 ==> (macrocall x_str "s" 10.0) - suffix_kind = (k == K"Identifier" || is_keyword(k)) ? - K"UnquotedString" : k + suffix_kind = (k == K"Identifier" || is_keyword(k)) ? K"String" : k bump(ps, remap_kind=suffix_kind) end emit(ps, mark, K"macrocall") @@ -1546,7 +1549,7 @@ function parse_resword(ps::ParseState) elseif word == K"do" bump(ps, TRIVIA_FLAG, error="invalid `do` syntax") else - error("unhandled reserved word") + internal_error("unhandled reserved word ", word) end end @@ -1841,7 +1844,7 @@ function macro_name_kind(k) return k == K"Identifier" ? K"MacroName" : k == K"." ? K"@." : k == K"VarIdentifier" ? K"VarMacroName" : - error("Unrecognized source kind for macro name $k") + internal_error("unrecognized source kind for macro name ", k) end # If remap_kind is false, the kind will be remapped by parse_call_chain after @@ -2562,12 +2565,13 @@ function parse_brackets(after_parse::Function, end end -# Parse a string, possibly with embedded interpolations +# Parse a string and any embedded interpolations # # flisp: parse-string-literal-, parse-interpolate function parse_string(ps::ParseState) mark = position(ps) - closer = peek(ps) + delim_k = peek(ps) + str_flags = delim_k == K"\"" ? EMPTY_FLAGS : TRIPLE_STRING_FLAG bump(ps, TRIVIA_FLAG) n_components = 0 while true @@ -2580,26 +2584,30 @@ function parse_string(ps::ParseState) # "a $(x + y) b" ==> (string "a " (call-i x + y) " b") m = position(ps) parse_atom(ps) - if ps.julia_version >= v"1.6" && peek_behind(ps) == K"String" - # Wrap interpolated literal strings in (string) so we can - # distinguish them from the surrounding text (issue #38501) - #v1.6: "hi$("ho")" ==> (string "hi" (string "ho")) - emit(ps, m, K"string") + if ps.julia_version >= v"1.6" + head = peek_token_behind(ps) + if kind(head) == K"String" + # Wrap interpolated literal strings in (string) so we can + # distinguish them from the surrounding text (issue #38501) + # "hi$("ho")" ==> (string "hi" (string "ho")) + # "hi$("""ho""")" ==> (string "hi" (string-s "ho")) + #v1.5: "hi$("ho")" ==> (string "hi" "ho") + emit(ps, m, K"string", flags(head)) + end end elseif is_identifier(k) # "a $foo b" ==> (string "a " foo " b") bump(ps) else - # It should be impossible for the lexer to get us into this state. bump_invisible(ps, K"error", error="Identifier or parenthesized expression expected after \$ in string") end elseif k == K"String" - bump(ps) - elseif k == closer + bump(ps, str_flags) + elseif k == delim_k if n_components == 0 # "" ==> "" - bump_invisible(ps, K"String") + bump_invisible(ps, K"String", str_flags) end bump(ps, TRIVIA_FLAG) break @@ -2612,11 +2620,13 @@ function parse_string(ps::ParseState) n_components += 1 end if n_components > 1 - # "$x$y$z" ==> (string x y z) - # "$(x)" ==> (string x) - # "$x" ==> (string x) - emit(ps, mark, K"string") + # "$x$y$z" ==> (string x y z) + # "$(x)" ==> (string x) + # "$x" ==> (string x) + # """$x""" ==> (string-s x) + emit(ps, mark, K"string", str_flags) else + # Strings with no interpolations # "str" ==> "str" end end @@ -2625,10 +2635,15 @@ function parse_raw_string(ps::ParseState) emark = position(ps) delim_k = peek(ps) bump(ps, TRIVIA_FLAG) - if peek(ps) == K"String" - bump(ps) + flags = RAW_STRING_FLAG | (delim_k in KSet`""" \`\`\`` ? + TRIPLE_STRING_FLAG : EMPTY_FLAGS) + if peek(ps) in KSet`String CmdString` + bump(ps, flags) else - bump_invisible(ps, K"String") + outk = delim_k in KSet`" """` ? K"String" : + delim_k == KSet`\` \`\`\`` ? K"CmdString" : + internal_error("unexpected delimiter ", delim_k) + bump_invisible(ps, outk, flags) end if peek(ps) == delim_k bump(ps, TRIVIA_FLAG) diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index f1bce1676b965..bb6921de6394b 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -14,8 +14,10 @@ const INFIX_FLAG = RawFlags(1<<1) const DOTOP_FLAG = RawFlags(1<<2) # Set when kind == K"String" was triple-delimited as with """ or ``` const TRIPLE_STRING_FLAG = RawFlags(1<<3) +# Set when the string is "raw" and needs minimal unescaping +const RAW_STRING_FLAG = RawFlags(1<<4) # try-finally-catch -const TRY_CATCH_AFTER_FINALLY_FLAG = RawFlags(1<<4) +const TRY_CATCH_AFTER_FINALLY_FLAG = RawFlags(1<<5) # Flags holding the dimension of an nrow or other UInt8 not held in the source const NUMERIC_FLAGS = RawFlags(RawFlags(0xff)<<8) # Todo ERROR_FLAG = 0x80000000 ? @@ -79,6 +81,8 @@ function untokenize(head::SyntaxHead; include_flag_suff=true) str = str*"-" is_trivia(head) && (str = str*"t") is_infix(head) && (str = str*"i") + has_flags(head, TRIPLE_STRING_FLAG) && (str = str*"s") + has_flags(head, RAW_STRING_FLAG) && (str = str*"r") has_flags(head, TRY_CATCH_AFTER_FINALLY_FLAG) && (str = str*"f") n = numeric_flags(head) n != 0 && (str = str*string(n)) @@ -114,66 +118,6 @@ end Base.show(io::IO, ::ErrorVal) = printstyled(io, "✘", color=:light_red) -function unescape_julia_string(str, triplequote=false) - # FIXME: do this properly - str = replace(str, "\\\$" => '$') - str = unescape_string(str) - if triplequote - # FIXME: All sorts of rules need to be added here, and the separate - # fragments need to be aware of each other. - if startswith(str, '\n') - str = str[2:end] - end - lines = split(str, '\n') - indent = typemax(Int) - for line in lines - if isempty(line) - continue - end - j = findfirst(!=(' '), line) - indent = min(indent, j == nothing ? length(line) : j-1) - end - if isempty(last(lines)) - indent = 0 - end - str = join((l[indent+1:end] for l in lines), '\n') - end - str -end - -function julia_string_to_number(T, str, kind) - # FIXME: do this properly! - if kind == K"Integer" - str = replace(str, '_'=>"") - end - x = Base.parse(T, str) - if kind == K"HexInt" - if length(str) <= 4 - x = UInt8(x) - elseif length(str) <= 6 - x = UInt16(x) - elseif length(str) <= 10 - x = UInt32(x) - elseif length(str) <= 18 - x = UInt64(x) - elseif length(str) <= 34 - x = UInt128(x) - else - TODO("BigInt") - end - end - x -end - -function unescape_julia_char(str) - # FIXME: Do this properly! - if str == "\\'" - '\'' - else - unescape_julia_string(str)[1] - end -end - function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, position::Integer=1) if !haschildren(raw) && !(is_syntax_kind(raw) || is_keyword(raw)) # Leaf node @@ -192,7 +136,7 @@ function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, position::In elseif k == K"false" false elseif k == K"Char" - unescape_julia_char(val_str[2:end-1]) + unescape_julia_string(val_str, false, false)[2] elseif k == K"Identifier" Symbol(val_str) elseif k == K"VarIdentifier" @@ -200,10 +144,12 @@ function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, position::In elseif is_keyword(k) # This should only happen for tokens nested inside errors Symbol(val_str) - elseif k == K"String" - unescape_julia_string(val_str, has_flags(head(raw), TRIPLE_STRING_FLAG)) - elseif k == K"UnquotedString" - String(val_str) + elseif k in KSet`String CmdString` + is_cmd = k == K"CmdString" + is_raw = has_flags(head(raw), RAW_STRING_FLAG) + has_flags(head(raw), TRIPLE_STRING_FLAG) ? + process_triple_strings!([val_str], is_raw)[1] : + unescape_julia_string(val_str, is_cmd, is_raw) elseif is_operator(k) isempty(val_range) ? Symbol(untokenize(k)) : # synthetic invisible tokens @@ -239,12 +185,37 @@ function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, position::In error("Can't untokenize head of kind $(kind(raw))") cs = SyntaxNode[] pos = position - for (i,rawchild) in enumerate(children(raw)) - # FIXME: Allowing trivia is_error nodes here corrupts the tree layout. - if !is_trivia(rawchild) || is_error(rawchild) - push!(cs, SyntaxNode(source, rawchild, pos)) + if kind(raw) == K"string" && has_flags(head(raw), TRIPLE_STRING_FLAG) + # Triple quoted strings need special processing of sibling String literals + strs = SubString[] + str_nodes = SyntaxNode[] + for (i,rawchild) in enumerate(children(raw)) + if !is_trivia(rawchild) || is_error(rawchild) + if kind(rawchild) == K"String" + val_range = pos:pos + span(rawchild) - 1 + push!(strs, source[val_range]) + n = SyntaxNode(source, rawchild, pos, nothing, :leaf, nothing) + push!(cs, n) + push!(str_nodes, n) + else + push!(cs, SyntaxNode(source, rawchild, pos)) + end + end + pos += rawchild.span + end + is_raw = has_flags(head(raw), RAW_STRING_FLAG) + process_triple_strings!(strs, is_raw) + for (s,n) in zip(strs, str_nodes) + n.val = s + end + else + for (i,rawchild) in enumerate(children(raw)) + # FIXME: Allowing trivia is_error nodes here corrupts the tree layout. + if !is_trivia(rawchild) || is_error(rawchild) + push!(cs, SyntaxNode(source, rawchild, pos)) + end + pos += rawchild.span end - pos += rawchild.span end node = SyntaxNode(source, raw, position, nothing, headsym, cs) for c in cs diff --git a/JuliaSyntax/src/token_kinds.jl b/JuliaSyntax/src/token_kinds.jl index 9fa7319627fd0..963545fb881c3 100644 --- a/JuliaSyntax/src/token_kinds.jl +++ b/JuliaSyntax/src/token_kinds.jl @@ -68,6 +68,7 @@ Dict([ "Float" => Ts.FLOAT "String" => Ts.STRING "Char" => Ts.CHAR +"CmdString" => Ts.CMD "true" => Ts.TRUE "false" => Ts.FALSE "END_LITERAL" => Ts.end_literal @@ -818,7 +819,6 @@ Dict([ "TOMBSTONE" => Ts.TOMBSTONE "NothingLiteral" => Ts.NOTHING_LITERAL -"UnquotedString" => Ts.UNQUOTED_STRING # Macro names are modelled as a special kind of identifier because the # @ may not be attached to the macro name in the source (or may not be diff --git a/JuliaSyntax/src/value_parsing.jl b/JuliaSyntax/src/value_parsing.jl new file mode 100644 index 0000000000000..0f29244cd5a2c --- /dev/null +++ b/JuliaSyntax/src/value_parsing.jl @@ -0,0 +1,268 @@ +#------------------------------------------------------------------------------- +# This file contains utility functions for converting undecorated source +# strings into Julia values. For example, string->number, string unescaping, etc. + +""" +Convert a Julia source code string into a number. +""" +function julia_string_to_number(T, str::AbstractString, kind) + # Fix this up... it's barely functional. + if kind == K"Integer" + str = replace(str, '_'=>"") + end + x = Base.parse(T, str) + if kind == K"HexInt" + if length(str) <= 4 + x = UInt8(x) + elseif length(str) <= 6 + x = UInt16(x) + elseif length(str) <= 10 + x = UInt32(x) + elseif length(str) <= 18 + x = UInt64(x) + elseif length(str) <= 34 + x = UInt128(x) + else + TODO("BigInt") + end + end + x +end + +""" +Process Julia source code escape sequences for raw strings +""" +function unescape_raw_string(io::IO, str::AbstractString, is_cmd::Bool, dedent::Integer) + delim = is_cmd ? '`' : '"' + i = firstindex(str) + lastidx = lastindex(str) + if i <= lastidx && str[i] != '\n' + i += dedent + end + while i <= lastidx + c = str[i] + if c != '\\' + if c == '\r' + # convert literal \r and \r\n in strings to \n (issue #11988) + if i+1 <= lastidx && str[i+1] == '\n' + i += 1 + end + c = '\n' + end + write(io, c) + if c == '\n' && i+1 <= lastidx && str[i+1] != '\n' + i += dedent + end + i = nextind(str, i) + continue + end + # Process \ escape sequences + j = i + while str[j] == '\\' && j <= lastidx + j += 1 + end + ndelim = j - i + if j <= lastidx && str[j] == delim + # Escaping a delimiter + ndelim = div(ndelim,2) + end + for k = 1:ndelim + write(io, '\\') + end + i = j + if i <= lastidx + write(io, str[i]) + end + i = nextind(str, i) + end +end + +""" +Process Julia source code escape sequences for non-raw strings. +`str` should be passed without delimiting quotes. +""" +function unescape_julia_string(io::IO, str::AbstractString, dedent::Integer) + i = firstindex(str) + dedent + lastidx = lastindex(str) + while i <= lastidx + c = str[i] + if c != '\\' + if c == '\r' + # convert literal \r and \r\n in strings to \n (issue #11988) + if i+1 <= lastidx && str[i+1] == '\n' + i += 1 + end + c = '\n' + end + write(io, c) + if c == '\n' && i+1 <= lastidx && str[i+1] != '\n' + i += dedent + end + i = nextind(str, i) + continue + end + # Process \ escape sequences. See also Base.unescape_string which some + # of this code derives from (but which disallows \` \' \$) + i += 1 + if i > lastidx + break + end + c = str[i] + if c == 'x' || c == 'u' || c == 'U' + n = k = 0 + m = c == 'x' ? 2 : + c == 'u' ? 4 : 8 + while (k += 1) <= m && i+1 <= lastidx + nc = str[i+1] + n = '0' <= nc <= '9' ? n<<4 + (nc-'0') : + 'a' <= nc <= 'f' ? n<<4 + (nc-'a'+10) : + 'A' <= nc <= 'F' ? n<<4 + (nc-'A'+10) : break + i += 1 + end + if k == 1 || n > 0x10ffff + u = m == 4 ? 'u' : 'U' + throw(ArgumentError("invalid $(m == 2 ? "hex (\\x)" : + "unicode (\\$u)") escape sequence")) + end + if m == 2 # \x escape sequence + write(io, UInt8(n)) + else + print(io, Char(n)) + end + elseif '0' <= c <= '7' + k = 1 + n = c-'0' + while (k += 1) <= 3 && i+1 <= lastidx + c = str[i+1] + n = ('0' <= c <= '7') ? n<<3 + c-'0' : break + i += 1 + end + if n > 255 + throw(ArgumentError("octal escape sequence out of range")) + end + write(io, UInt8(n)) + elseif c == '\n' || c == '\r' + # Remove \n \r and \r\n newlines following \ + if c == '\r' && i < lastidx && str[i+1] == '\n' + i += 1 + end + else + u = # C escapes + c == 'n' ? '\n' : + c == 't' ? '\t' : + c == 'r' ? '\r' : + c == 'e' ? '\e' : + c == 'b' ? '\b' : + c == 'f' ? '\f' : + c == 'v' ? '\v' : + c == 'a' ? '\a' : + # Literal escapes allowed in Julia source + c == '\\' ? '\\' : + c == '\'' ? '\'' : + c == '"' ? '"' : + c == '$' ? '$' : + c == '`' ? '`' : + throw(ArgumentError("Invalid escape sequence \\$c")) + write(io, u) + end + i = nextind(str, i) + end +end + +function unescape_julia_string(str::AbstractString, is_cmd::Bool, + is_raw::Bool, dedent::Integer=0) + io = IOBuffer() + if is_raw + unescape_raw_string(io, str, is_cmd, dedent) + else + unescape_julia_string(io, str, dedent) + end + String(take!(io)) +end + +# Compute length of longest common prefix of spaces and tabs, in characters +# +# This runs *before* normalization of newlines so that unescaping/normalization +# can happen in a single pass. +# +# TODO: Should we do triplequoted string splitting as part of the main parser? +# It would be conceptually clean if the trivial whitespace was emitted as +# syntax trivia. +# +# flisp: triplequoted-string-indentation- +function triplequoted_string_indentation(strs) + if isempty(strs) + return 0 + end + if last(last(strs)) in ('\n', '\r') + return 0 + end + refstr = SubString(strs[1], 1, 0) + reflen = -1 + for str in strs + i = 1 + lastidx = lastindex(str) + while i <= lastidx + c = str[i] + if i == 1 || c == '\n' || c == '\r' + while i <= lastidx + c = str[i] + (c == '\n' || c == '\r') || break + i += 1 + end + if i <= lastidx + # At this point we've found the start of a nonempty line. + if reflen < 0 + # Find indentation we'll use as a reference + j = i-1 + while j+1 <= lastidx + c = str[j+1] + (c == ' ' || c == '\t') || break + j += 1 + end + refstr = SubString(str, i, j) + reflen = j - i + 1 + if j > i + i = j + end + else + # Matching indentation with reference, shortening + # length if necessary. + j = i-1 + while j+1 <= lastidx && j-i+2 <= reflen + if str[j+1] != refstr[j-i+2] + break + end + j += 1 + end + if j-i+1 < reflen + reflen = j-i+1 + end + if j > i + i = j + end + end + end + end + i <= lastidx || break + i = nextind(str, i) + end + end + reflen +end + +function process_triple_strings!(strs, is_raw) + if isempty(strs) + return strs + end + dedent = triplequoted_string_indentation(strs) + for i = 1:length(strs) + if i == 1 && strs[1][1] == '\n' + strs[i] = unescape_julia_string(SubString(strs[i], 2), false, is_raw, dedent) + else + strs[i] = unescape_julia_string(strs[i], false, is_raw, dedent) + end + end + strs +end + diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index b00df3b8fdaaa..8ae8fb908d54f 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -518,7 +518,8 @@ tests = [ ], JuliaSyntax.parse_string => [ "\"a \$(x + y) b\"" => "(string \"a \" (call-i x + y) \" b\")" - "\"hi\$(\"ho\")\"" => "(string \"hi\" (string \"ho\"))" + "\"hi\$(\"ho\")\"" => "(string \"hi\" (string \"ho\"))" + "\"hi\$(\"\"\"ho\"\"\")\"" => "(string \"hi\" (string-s \"ho\"))" ((v=v"1.5",), "\"hi\$(\"ho\")\"") => "(string \"hi\" \"ho\")" "\"a \$foo b\"" => "(string \"a \" foo \" b\")" "\"\"" => "\"\"" diff --git a/JuliaSyntax/test/runtests.jl b/JuliaSyntax/test/runtests.jl index 0a76994b32567..75a920c684cbe 100644 --- a/JuliaSyntax/test/runtests.jl +++ b/JuliaSyntax/test/runtests.jl @@ -33,6 +33,7 @@ end include("parse_stream.jl") include("parser.jl") +include("value_parsing.jl") # Prototypes #include("syntax_trees.jl") diff --git a/JuliaSyntax/test/value_parsing.jl b/JuliaSyntax/test/value_parsing.jl new file mode 100644 index 0000000000000..afe611c272087 --- /dev/null +++ b/JuliaSyntax/test/value_parsing.jl @@ -0,0 +1,124 @@ +using JuliaSyntax: triplequoted_string_indentation, + unescape_julia_string, + process_triple_strings! + +@testset "String unescaping" begin + unesc(str) = unescape_julia_string(str, false, false) + # Allowed escapes of delimiters and dollar sign + @test only(unesc("\\\\")) == '\\' + @test only(unesc("\\\"")) == '"' + @test only(unesc("\\\$")) == '$' + @test only(unesc("\\'")) == '\'' + @test only(unesc("\\`")) == '`' + + # Newline normalization + @test unesc("a\nb\rc\r\nd") == "a\nb\nc\nd" + + # Removal of backslash-escaped newlines + @test unesc("a\\\nb") == "ab" + @test unesc("a\\\rb") == "ab" + @test unesc("a\\\r\nb") == "ab" + @test unesc("a\\\n") == "a" + @test unesc("a\\\r") == "a" + @test unesc("a\\\r\n") == "a" + + # Invalid escapes + @test_throws ArgumentError unesc("\\.") + @test_throws ArgumentError unesc("\\z") + + # Standard C escape sequences + @test codeunits(unesc("\\n\\t\\r\\e\\b\\f\\v\\a")) == + UInt8[0x0a, 0x09, 0x0d, 0x1b, 0x08, 0x0c, 0x0b, 0x07] + + # Hex and unicode escapes; \x \u and \U + @test unesc("x\\x61x") == "xax" + @test unesc("x\\u03b1x") == "xαx" + @test unesc("x\\U001F604x") == "x😄x" + # Maximum unicode code point + @test unesc("x\\U10ffffx") == "x\U10ffffx" + @test_throws ArgumentError unesc("x\\U110000x") + + # variable-length octal + @test unesc("x\\7x") == "x\ax" + @test unesc("x\\77x") == "x?x" + @test unesc("x\\141x") == "xax" + @test unesc("x\\377x") == "x\xffx" + @test_throws ArgumentError unesc("x\\400x") +end + +@testset "Raw string unescaping" begin + # " delimited + # x\"x ==> x"x + @test unescape_julia_string("x\\\"x", false, true) == "x\"x" + # x\`x ==> x\`x + @test unescape_julia_string("x\\`x", false, true) == "x\\`x" + # x\\\"x ==> x\"x + @test unescape_julia_string("x\\\\\\\"x", false, true) == "x\\\"x" + # x\\\`x ==> x\\\`x + @test unescape_julia_string("x\\\\\\`x", false, true) == "x\\\\\\`x" + # '\\ ' ==> '\\ ' + @test unescape_julia_string("\\\\ ", false, true) == "\\\\ " + + # ` delimited + # x\"x ==> x\"x + @test unescape_julia_string("x\\\"x", true, true) == "x\\\"x" + # x\`x ==> x`x + @test unescape_julia_string("x\\`x", true, true) == "x`x" + # x\\\"x ==> x\"x + @test unescape_julia_string("x\\\\\\\"x", true, true) == "x\\\\\\\"x" + # x\\\`x ==> x\`x + @test unescape_julia_string("x\\\\\\`x", true, true) == "x\\`x" + # '\\ ' ==> '\\ ' + @test unescape_julia_string("\\\\ ", true, true) == "\\\\ " +end + +@testset "Triple quoted string indentation" begin + @test triplequoted_string_indentation([]) == 0 + + # Spaces or tabs + @test triplequoted_string_indentation([" "]) == 2 + @test triplequoted_string_indentation(["\t "]) == 2 + @test triplequoted_string_indentation([" \t"]) == 2 + @test triplequoted_string_indentation(["\t\t"]) == 2 + + # Various newlines; empty lines ignored + @test triplequoted_string_indentation([" \n\n x"]) == 2 + @test triplequoted_string_indentation([" \n\r x"]) == 2 + @test triplequoted_string_indentation([" \r\n x"]) == 2 + @test triplequoted_string_indentation([" \r\r x"]) == 2 + @test triplequoted_string_indentation(["\n\r\r\n"]) == 0 + # Empty newline at the end not ignored + @test triplequoted_string_indentation([" \n"]) == 0 + @test triplequoted_string_indentation([" \r"]) == 0 + @test triplequoted_string_indentation([" \n\n"]) == 0 + @test triplequoted_string_indentation([" ", " \n"]) == 0 + + # Finds the minimum common prefix + @test triplequoted_string_indentation([" ", " "]) == 2 + @test triplequoted_string_indentation([" ", " "]) == 1 + @test triplequoted_string_indentation([" ", " "]) == 1 + @test triplequoted_string_indentation([" ", " "]) == 1 + @test triplequoted_string_indentation([" \t", " "]) == 1 + @test triplequoted_string_indentation([" ", " \t"]) == 1 + @test triplequoted_string_indentation([" \t", " \t"]) == 2 + @test triplequoted_string_indentation(["\t ", "\t "]) == 2 + @test triplequoted_string_indentation([" \n "]) == 2 + @test triplequoted_string_indentation([" \n "]) == 1 + @test triplequoted_string_indentation([" \n "]) == 1 + @test triplequoted_string_indentation(["\n \n \n "]) == 1 + @test triplequoted_string_indentation([" \n \n "]) == 1 + + # Cases of no indentation + @test triplequoted_string_indentation(["hi"]) == 0 + @test triplequoted_string_indentation(["x\ny", "z"]) == 0 +end + +@testset "Triple quoted string deindentation" begin + @test process_triple_strings!([" x", " y"], false) == ["x", "y"] + @test process_triple_strings!([" x", "y"], false) == [" x", "y"] + @test process_triple_strings!(["\n x", " y"], false) == ["x", "y"] + @test process_triple_strings!([" x", " y\n"], false) == [" x", " y\n"] + @test process_triple_strings!([" \tx", " \ty"], false) == ["x", "y"] + @test process_triple_strings!([" \tx", " y"], false) == ["\tx", " y"] +end + From 0c41c464df91412cf0272c42af32da50325cd99c Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Thu, 6 Jan 2022 21:04:21 +1000 Subject: [PATCH 0287/1109] Expand parse_all to producing GreenNode + SyntaxNode This allows any of the tree types to be conveniently produced using the parse_all interface. --- JuliaSyntax/src/parse_stream.jl | 11 +++++++---- JuliaSyntax/src/parser.jl | 2 -- JuliaSyntax/src/syntax_tree.jl | 19 ++++++++++++------- 3 files changed, 19 insertions(+), 13 deletions(-) diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index 41e062d1d2bd5..2555a6d9daf3a 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -500,9 +500,6 @@ end #------------------------------------------------------------------------------- # Tree construction from the list of text ranges held by ParseStream -# -# Note that this is largely independent of GreenNode, and could easily be -# made completely independent with a tree builder interface. """ build_tree(::Type{NodeType}, stream::ParseStream; @@ -572,12 +569,18 @@ function build_tree(::Type{NodeType}, stream::ParseStream; elseif !isnothing(wrap_toplevel_as_kind) # Mostly for debugging children = [x.node for x in stack] - return GreenNode(SyntaxHead(wrap_toplevel_as_kind, EMPTY_FLAGS), children...) + return NodeType(SyntaxHead(wrap_toplevel_as_kind, EMPTY_FLAGS), children...) else error("Found multiple nodes at top level") end end +function parse_all(::Type{GreenNode}, code) + stream = parse_all(code) + build_tree(GreenNode, stream) +end + + #------------------------------------------------------------------------------- """ ParseState carries parser context as we recursively descend into the parse diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 94fbc22030fe3..58ce2bccbbee4 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -2824,8 +2824,6 @@ Parse a sequence of top level statements. `input` may be a `ParseStream` or other input source which will be passed to the `ParseStream` constructor. The `ParseStream` is returned. - -flisp: parse-all """ function parse_all(stream::ParseStream) ps = ParseState(stream) diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index bb6921de6394b..e015fc2dce5e6 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -486,12 +486,7 @@ Base.Expr(node::SyntaxNode) = _to_expr(node) #------------------------------------------------------------------------------- -""" - parse_all(Expr, code::AbstractString; filename="none") - -Parse the given code and convert to a standard Expr -""" -function parse_all(::Type{Expr}, code::AbstractString; filename="none") +function parse_all(::Type{SyntaxNode}, code::AbstractString; filename="none") source_file = SourceFile(code, filename=filename) stream = ParseStream(code) @@ -505,7 +500,17 @@ function parse_all(::Type{Expr}, code::AbstractString; filename="none") green_tree = build_tree(GreenNode, stream, wrap_toplevel_as_kind=K"toplevel") - tree = SyntaxNode(source_file, green_tree) + SyntaxNode(source_file, green_tree) +end + + +""" + parse_all(Expr, code::AbstractString; filename="none") + +Parse the given code and convert to a standard Expr +""" +function parse_all(::Type{Expr}, code::AbstractString; filename="none") + tree = parse_all(SyntaxNode, code; filename=filename) # convert to Julia expr ex = Expr(tree) From 15652b1ace69275bd98b7af1e36f4d6e065413b9 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Thu, 6 Jan 2022 21:18:01 +1000 Subject: [PATCH 0288/1109] Enable self-parsing test! --- JuliaSyntax/test/runtests.jl | 1 + JuliaSyntax/test/self_parse.jl | 9 +++++++++ 2 files changed, 10 insertions(+) create mode 100644 JuliaSyntax/test/self_parse.jl diff --git a/JuliaSyntax/test/runtests.jl b/JuliaSyntax/test/runtests.jl index 75a920c684cbe..4a8a20d695c23 100644 --- a/JuliaSyntax/test/runtests.jl +++ b/JuliaSyntax/test/runtests.jl @@ -34,6 +34,7 @@ end include("parse_stream.jl") include("parser.jl") include("value_parsing.jl") +include("self_parse.jl") # Prototypes #include("syntax_trees.jl") diff --git a/JuliaSyntax/test/self_parse.jl b/JuliaSyntax/test/self_parse.jl new file mode 100644 index 0000000000000..2121e673fedb8 --- /dev/null +++ b/JuliaSyntax/test/self_parse.jl @@ -0,0 +1,9 @@ + +@testset "JuliaSyntax self-parsing" begin + srcdir = joinpath(@__DIR__, "..", "src") + @testset "Parse $(joinpath("src",f))" for f in readdir(srcdir) + code = read(joinpath(srcdir, f), String) + @test JuliaSyntax.remove_linenums!(JuliaSyntax.parse_all(Expr, code)) == + JuliaSyntax.remove_linenums!(JuliaSyntax.flisp_parse_all(code)) + end +end From 092636eef436d5d077d3a41789416029b3c2c28f Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Thu, 6 Jan 2022 21:50:19 +1000 Subject: [PATCH 0289/1109] Drop 1.4 from CI for now Probably no point keeping this working at this early stage. --- JuliaSyntax/.github/workflows/CI.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/JuliaSyntax/.github/workflows/CI.yml b/JuliaSyntax/.github/workflows/CI.yml index b01591dd39feb..d70120fcf3d4a 100644 --- a/JuliaSyntax/.github/workflows/CI.yml +++ b/JuliaSyntax/.github/workflows/CI.yml @@ -10,7 +10,6 @@ jobs: fail-fast: false matrix: version: - - '1.4' - '1.6' - 'nightly' os: From 2d69c8ee37ca3eed6ed3f0dc8251a244456d02b5 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Thu, 6 Jan 2022 22:27:03 +1000 Subject: [PATCH 0290/1109] Rearrange start of README Start to clean things up to give a better overview. --- JuliaSyntax/README.md | 128 +++++++++++++++++++++++++++++------------- 1 file changed, 89 insertions(+), 39 deletions(-) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index 017bd7a6229f4..bdc5e9e5378d6 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -2,20 +2,96 @@ [![Build Status](https://github.com/c42f/JuliaSyntax.jl/workflows/CI/badge.svg)](https://github.com/c42f/JuliaSyntax.jl/actions) -Yet another Julia frontend, written in Julia. +A Julia frontend, written in Julia. -Goals: -* Parse Julia code with precise source mapping -* Avoid worrying about how much work this will be 😅 +## Goals -Nice to have: +* Lossless parsing of Julia code with precise source mapping +* Production quality error recovery, reporting and unit testing. +* Parser structure comprehensible to people who know Julia's + flisp-based parser. Replace the flisp frontend once bootstrapping can be + solved. * Speedy enough for interactive editing -* Production quality error recovery and reporting -* "Compilation as an API" to support all sorts of tooling -* Make the code easy to maintain in parallel with Julia's flisp frontend -* Go further than parsing - macro expansion, syntax desugaring and scope analysis +* "Compilation as an API" to support all sorts of tooling. Not just parsing but + the whole compiler frontend: + - Parsing + - Macro expansion + - Syntax desugaring + - Scope analysis +* Try not to worry about how much work this will be 😅 + +## Parser overview + +The parsing technology is intentionally simple: it's a recursive descent parser +which closely follows the high level structure of the flisp reference parser. +This gives a lot of flexibility for the hard part: designing the data +structures and APIs for parsing. It also reduces porting bugs and is a natural +fit because the language was designed around the constraints of this kind of +parser. + +The main parser innovation is the `ParseStream` interface which provides a +stream-like I/O interface for writing the parser: +* The parser consumes a flat list of tokens as *input* +* It produces a flat list of text spans as *output* +* Diagnostics are emitted as separate text spans + +Notably, the parser does not depend on or produce any concrete tree data +structure as part of the parsing phase but the output spans can be +post-processed into various tree data structures as required. This is very +similar to the design of rust-analyzer, though our output format is simpler. + +## Lossless syntax trees + +Our goal is to losslessly represent the source text with a tree; this may be +called a "lossless syntax tree". (We avoid the term "concrete syntax tree" +because this has traditionally been a different concept — a parse tree of the +full formal grammar for a language, including any grammar hacks required to +solve ambiguities, etc. We don't need such a formal grammar as we're writing +the parser by hand.) + +Structurally, the output of a `ParseStream`-based parser can most naturally be +assembled into a "green tree" in Roslyn (C# compiler) terminology. The most +basic properties of a green tree are: +* Every node spans a complete and contiguous range of source code bytes +* Child nodes are in the order of the source text + +Additionally, green trees are usually designed so that +* Nodes are immutable, do not point to their parents and don't know their + absolute position in the source. This means they can be cached and reused + when building the tree. +* Nodes are homogenously typed at the language level so they can be efficiently + stored and accessed, with the node type held as a "syntax kind" enumeration. + +## Representing erroneous source code -## Design +The goal of the parser is to produce well-formed heirarchical structure from +the source text. For interactive tools we need this to work even when the +source text contains errors; it's the job of the parser to include the recovery +heuristics to make this work. + +Concretely, the parser in `JuliaSyntax` should always produce a green tree +which is *well formed* in the sense that `GreenNode`s of a given `Kind` have +well-defined layout of children. This means the `GreenNode` to `SyntaxNode` +transformation is deterministic and tools can assume they're working with a +"mostly valid" AST. + +What does "mostly valid" mean? We allow the tree to contain the following types +of error nodes: + +* Missing tokens or nodes may be **added** as placeholders when they're needed + to complete a piece of syntax. For example, we could parse `a + (b *` as + `(call-i a + (call-i * b XXX))` where `XXX` is a placeholder error node. +* A sequence of unexpected tokens may be **removed** by collecting + them as children of an error node and treating them as syntax trivia during + AST construction. For example, `a + b end * c` could be parsed as the green + tree `(call-i a + b (error-t end * c))`, and turned into the AST `(call + a b)`. + +We want to encode both these cases in a way which is simplest for downstream +tools to use. This is an open question, but for now we use `K"error"` as the +node head, with the `TRIVIA_FLAG` set for unexpected syntax. + + +# Prototyping approach The tree datastructure design here is hard: @@ -59,7 +135,7 @@ Let's tackle it by prototyping several important work flows: ### Raw syntax tree / Green tree -Raw syntax tree (RST, or "Green tree" in the terminology from Roslyn) +Raw syntax tree (or "Green tree" in the terminology from Roslyn) We want GreenNode to be * *structurally minimal* — For efficiency and generality @@ -115,8 +191,8 @@ L - literal - "end" ``` -Call represents a challange for the AST vs RST in terms of node placement / -iteration for infix operators vs normal prefix function calls. +Call represents a challange for the AST vs Green tree in terms of node +placement / iteration for infix operators vs normal prefix function calls. - The normal problem of `a + 1` vs `+(a, 1)` - Or worse, `a + 1 + 2` vs `+(a, 1, 2)` @@ -165,32 +241,6 @@ interpolation syntax. Eg, if you do `:(y + $x)`, lowering expands this to ## Parsing -The goal of the parser is to produce well-formed heirarchical structure from -the source text. For interactive tools we need this to work even when the -source text contains errors, so it's the job of the parser to include the -recovery heuristics necessary to make this work. - -Concretely, the parser in `JuliaSyntax` should always produce a green tree -which is *well formed* in the sense that `GreenNode`s of a given `Kind` have -well-defined layout of children. This means the `GreenNode` to `SyntaxNode` -transformation is deterministic and tools can assume they're working with a -"mostly valid" AST. - -What does "mostly valid" mean? We allow the tree to contain the following types -of error nodes: - -* Missing tokens or nodes may be **added** as placeholders when they're needed - to complete a piece of syntax. For example, we could parse `a + (b *` as - `(call-i a + (call-i * b XXX))` where `XXX` is a placeholder. -* A sequence of unexpected tokens may be **removed** by collecting - them as children of an error node and treating them as syntax trivia during - AST construction. For example, `a + b end * c` could be parsed as the green - tree `(call-i a + b (error end * c))`, and turned into the AST `(call + a b)`. - -We want to encode both these cases in a way which is simplest for downstream -tools to use. This is an open question, but for now we use `K"error"` as the -token head, with the `TRIVIA_FLAG` set for unexpected syntax. - ### Error recovery Some disorganized musings about error recovery From 5a499b0be7172921675fa6e0fdda43c1829d18a0 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Thu, 6 Jan 2022 22:53:17 +1000 Subject: [PATCH 0291/1109] Add self-parsing of test code as an additional test --- JuliaSyntax/test/self_parse.jl | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/JuliaSyntax/test/self_parse.jl b/JuliaSyntax/test/self_parse.jl index 2121e673fedb8..267b0cb5adeec 100644 --- a/JuliaSyntax/test/self_parse.jl +++ b/JuliaSyntax/test/self_parse.jl @@ -7,3 +7,12 @@ JuliaSyntax.remove_linenums!(JuliaSyntax.flisp_parse_all(code)) end end + +@testset "JuliaSyntax self-parsing tests" begin + testdir = @__DIR__ + @testset "Parse $(joinpath("test",f))" for f in readdir(testdir) + code = read(joinpath(testdir, f), String) + @test JuliaSyntax.remove_linenums!(JuliaSyntax.parse_all(Expr, code)) == + JuliaSyntax.remove_linenums!(JuliaSyntax.flisp_parse_all(code)) + end +end From 15c216d3fb6428341267b45bf449cc2b82e56c25 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Thu, 6 Jan 2022 23:02:39 +1000 Subject: [PATCH 0292/1109] Fix diagnostic printing on the last line of the source --- JuliaSyntax/src/parse_stream.jl | 3 +-- JuliaSyntax/src/parser.jl | 24 ++++++++++++++---------- JuliaSyntax/src/source_files.jl | 4 ++++ 3 files changed, 19 insertions(+), 12 deletions(-) diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index 2555a6d9daf3a..11ffd627a7527 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -84,7 +84,7 @@ function show_diagnostic(io::IO, diagnostic::Diagnostic, source::SourceFile) # buffer these out to the surrouding lines. a,b = source_line_range(source, p, context_lines_before=2, context_lines_after=1) c,d = source_line_range(source, q, context_lines_before=1, context_lines_after=2) - + hicol = (100,40,40) print(io, source[a:prevind(code, p)]) @@ -94,7 +94,6 @@ function show_diagnostic(io::IO, diagnostic::Diagnostic, source::SourceFile) # a............... # .....p...q...... # ...............b - _printstyled(io, source[p:q]; color=hicol) else # Or large and we trucate the code to show only the region around the diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 58ce2bccbbee4..61c7f4784ea5d 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -2817,16 +2817,7 @@ end #------------------------------------------------------------------------------- # Parser entry points -""" - parse_all(input) - -Parse a sequence of top level statements. - -`input` may be a `ParseStream` or other input source which will be passed to -the `ParseStream` constructor. The `ParseStream` is returned. -""" -function parse_all(stream::ParseStream) - ps = ParseState(stream) +function parse_all(ps::ParseState) mark = position(ps) while true if peek(ps, skip_newlines=true) == K"EndMarker" @@ -2844,6 +2835,19 @@ function parse_all(stream::ParseStream) end end emit(ps, mark, K"toplevel") + nothing +end + +""" + parse_all(input) + +Parse a sequence of top level statements. + +`input` may be a `ParseStream` or other input source which will be passed to +the `ParseStream` constructor. The `ParseStream` is returned. +""" +function parse_all(stream::ParseStream) + parse_all(ParseState(stream)) return stream end diff --git a/JuliaSyntax/src/source_files.jl b/JuliaSyntax/src/source_files.jl index 3aa16eee10936..e75e94b732370 100644 --- a/JuliaSyntax/src/source_files.jl +++ b/JuliaSyntax/src/source_files.jl @@ -19,8 +19,12 @@ function SourceFile(code::AbstractString; filename=nothing) line_starts = Int[1] for i in eachindex(code) # The line is considered to start after the `\n` + # FIXME: \r and \n\r code[i] == '\n' && push!(line_starts, i+1) end + if last(code) != '\n' + push!(line_starts, lastindex(code)+1) + end SourceFile(code, filename, line_starts) end From 02e54408f132980335b38de83740c22c08e0f27c Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Fri, 7 Jan 2022 08:30:22 +1000 Subject: [PATCH 0293/1109] Fix parse_unary_subtype, allow initial delimiter in parse_block --- JuliaSyntax/src/parse_stream.jl | 1 + JuliaSyntax/src/parser.jl | 55 ++++++++++++++++----------------- JuliaSyntax/test/parser.jl | 13 ++++++-- JuliaSyntax/test/self_parse.jl | 34 +++++++++++++------- 4 files changed, 62 insertions(+), 41 deletions(-) diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index 11ffd627a7527..eb7605199d5dd 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -110,6 +110,7 @@ function show_diagnostic(io::IO, diagnostic::Diagnostic, source::SourceFile) _printstyled(io, source[c:q]; color=hicol) end print(io, source[nextind(code,q):d]) + println(io) end struct ParseStreamPosition diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 61c7f4784ea5d..3094b273ffe9c 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -227,12 +227,12 @@ function parse_Nary(ps::ParseState, down, delimiters, closing_tokens) if k in closing_tokens return true end - # Skip leading delimiter n_delims = 0 if k in delimiters - bump(ps, TRIVIA_FLAG) - n_delims += 1 + # allow leading delimiters + # ; a ==> (block a) else + # a ; b ==> (block a b) down(ps) end while peek(ps) in delimiters @@ -255,14 +255,11 @@ end # Parse a newline or semicolon-delimited list of expressions. # Repeated delimiters are allowed but ignored -# (a;b;c) ==> (block a b c) -# (a;;;b;;) ==> (block a b) -# === -# begin -# a -# b -# end -# ==> (block a b) +# a;b;c ==> (block a b c) +# a;;;b;; ==> (block a b) +# ;a ==> (block a) +# \n a ==> (block a) +# a \n b ==> (block a b) # # flisp: parse-block function parse_block(ps::ParseState, down=parse_eq, mark=position(ps), @@ -361,10 +358,8 @@ function parse_assignment(ps::ParseState, down, equals_is_kw::Bool) bump(ps, TRIVIA_FLAG) parse_assignment(ps, down, equals_is_kw) plain_eq = (k == K"=" && !is_dotted(t)) - result_k = - equals_pos = - emit(ps, mark, plain_eq && equals_is_kw ? K"kw" : k, - is_dotted(t) ? DOTOP_FLAG : EMPTY_FLAGS) + equals_pos = emit(ps, mark, plain_eq && equals_is_kw ? K"kw" : k, + is_dotted(t) ? DOTOP_FLAG : EMPTY_FLAGS) return plain_eq ? equals_pos : NO_POSITION end end @@ -690,26 +685,28 @@ end # flisp: parse-unary-subtype function parse_unary_subtype(ps::ParseState) k = peek(ps, skip_newlines=true) - if k == K"EndMarker" - parse_atom(ps) - return - elseif k in KSet`<: >:` - # FIXME add test cases + if k in KSet`<: >:` k2 = peek(ps, 2) if is_closing_token(ps, k2) || k2 in KSet`NewlineWs =` - # return operator by itself, as in (<:) + # return operator by itself + # <: ) ==> <: + # <: \n ==> <: + # <: = ==> <: bump(ps) - return - end - if k2 in KSet`{ (` + elseif k2 in KSet`{ (` # parse <:{T}(x::T) or <:(x::T) like other unary operators + # <:{T}(x::T) ==> (call (curly <: T) (:: x T)) + # <:(x::T) ==> (<: (:: x T)) parse_where(ps, parse_juxtapose) else - TODO("parse_unary_subtype") + # <: A where B ==> (<: (where A B)) + mark = position(ps) + bump(ps, TRIVIA_FLAG) parse_where(ps, parse_juxtapose) if peek_behind(ps) == K"tuple" - # Argh + TODO("Can this even happen?") end + emit(ps, mark, k) end else parse_where(ps, parse_juxtapose) @@ -1627,7 +1624,6 @@ function parse_const_local_global(ps) else has_const = true # const x = 1 ==> (const (= x 1)) - # const x,y = 1,2 ==> (const (= (tuple x y) (tuple 1 2))) bump(ps, TRIVIA_FLAG) k = peek(ps) if k in KSet`global local` @@ -1638,12 +1634,13 @@ function parse_const_local_global(ps) bump(ps, TRIVIA_FLAG) end end - # Like parse_eq, but specialized for error recovery: + # Like parse_eq/parse_assignment, but specialized in case we need error recovery beforevar_mark = position(ps) n_commas = parse_comma(ps, false) t = peek_token(ps) if is_prec_assignment(t) && !is_decorated(t) if n_commas >= 1 + # const x,y = 1,2 ==> (const (= (tuple x y) (tuple 1 2))) emit(ps, beforevar_mark, K"tuple") end bump(ps, TRIVIA_FLAG) @@ -2302,6 +2299,8 @@ end # * Binding power (precedence) of the separator, where whitespace binds # tightest: ... < `;;;` < `;;` < `;`,`\n` < whitespace. We choose binding # power of 0 for whitespace and negative numbers for other separators. +# +# FIXME: Error messages for mixed spaces and ;; delimiters function parse_array_separator(ps) t = peek_token(ps) k = kind(t) diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 8ae8fb908d54f..bca4f9bd38be8 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -51,6 +51,8 @@ tests = [ JuliaSyntax.parse_block => [ "a;b;c" => "(block a b c)" "a;;;b;;" => "(block a b)" + ";a" => "(block a)" + "\n a" => "(block a)" "a\nb" => "(block a b)" ], JuliaSyntax.parse_stmts => [ @@ -187,7 +189,14 @@ tests = [ "a->b" => "(-> a b)" "a::b->c" => "(-> (:: a b) c)" ], - JuliaSyntax.parse_unary_subtype => [ # Really for parse_where + JuliaSyntax.parse_unary_subtype => [ + "<: )" => "<:" + "<: \n" => "<:" + "<: =" => "<:" + "<:{T}(x::T)" => "(call (curly <: T) (:: x T))" + "<:(x::T)" => "(<: (:: x T))" + "<: A where B" => "(<: (where A B))" + # Really for parse_where "x where {T,S}" => "(where x T S)" "x where {T S}" => "(where x (bracescat (row T S)))" "x where {y for y in ys}" => "(where x (braces (generator y (= y ys))))" @@ -347,12 +356,12 @@ tests = [ "global const x = 1" => "(const (global (= x 1)))" "local const x = 1" => "(const (local (= x 1)))" "const x = 1" => "(const (= x 1))" - "const x,y = 1,2" => "(const (= (tuple x y) (tuple 1 2)))" "const global x = 1" => "(const (global (= x 1)))" "const local x = 1" => "(const (local (= x 1)))" "global x" => "(global x)" "local x" => "(local x)" "global x,y" => "(global x y)" + "const x,y = 1,2" => "(const (= (tuple x y) (tuple 1 2)))" "const x" => "(const (error x (error)))" ], JuliaSyntax.parse_function => [ diff --git a/JuliaSyntax/test/self_parse.jl b/JuliaSyntax/test/self_parse.jl index 267b0cb5adeec..83ab4af38c592 100644 --- a/JuliaSyntax/test/self_parse.jl +++ b/JuliaSyntax/test/self_parse.jl @@ -1,18 +1,30 @@ +function test_parse_file(root_path, path) + fullpath = joinpath(root_path, path) + if endswith(path, ".jl") && isfile(fullpath) + @testset "Parse $path" begin + code = read(fullpath, String) + @test JuliaSyntax.remove_linenums!(JuliaSyntax.parse_all(Expr, code)) == + JuliaSyntax.remove_linenums!(JuliaSyntax.flisp_parse_all(code)) + end + end +end @testset "JuliaSyntax self-parsing" begin - srcdir = joinpath(@__DIR__, "..", "src") - @testset "Parse $(joinpath("src",f))" for f in readdir(srcdir) - code = read(joinpath(srcdir, f), String) - @test JuliaSyntax.remove_linenums!(JuliaSyntax.parse_all(Expr, code)) == - JuliaSyntax.remove_linenums!(JuliaSyntax.flisp_parse_all(code)) + pkgdir = joinpath(@__DIR__, "..") + for f in readdir(joinpath(pkgdir, "src")) + test_parse_file(pkgdir, joinpath("src",f)) + end + + for f in readdir(joinpath(pkgdir, "test")) + test_parse_file(pkgdir, joinpath("test",f)) end end -@testset "JuliaSyntax self-parsing tests" begin - testdir = @__DIR__ - @testset "Parse $(joinpath("test",f))" for f in readdir(testdir) - code = read(joinpath(testdir, f), String) - @test JuliaSyntax.remove_linenums!(JuliaSyntax.parse_all(Expr, code)) == - JuliaSyntax.remove_linenums!(JuliaSyntax.flisp_parse_all(code)) +#= +@testset "JuliaSyntax Base parsing" begin + basedir = "/home/chris/dev/julia/base" + for f in readdir(joinpath(basedir)) + test_parse_file(basedir, f) end end +=# From c68dc81a532fe9918952b8cf76bc605ef39ea8a5 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Fri, 7 Jan 2022 13:49:10 +1000 Subject: [PATCH 0294/1109] Use SyntaxHead for head of SyntaxNode It seems that SyntaxNode shouldn't simply be a proxy for Expr, but rather a more convenient interface over the green tree. (Perhaps it makes sense to have yet-another tree type as the API-compatible proxy for Expr?) --- JuliaSyntax/src/syntax_tree.jl | 53 +++++++++++++++++----------------- 1 file changed, 27 insertions(+), 26 deletions(-) diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index e015fc2dce5e6..409a9e35a7e06 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -112,7 +112,7 @@ mutable struct SyntaxNode raw::GreenNode{SyntaxHead} position::Int parent::Union{Nothing,SyntaxNode} - head::Symbol + is_leaf::Bool val::Any end @@ -178,11 +178,8 @@ function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, position::In @error "Leaf node of kind $k unknown to SyntaxNode" val = nothing end - return SyntaxNode(source, raw, position, nothing, :leaf, val) + return SyntaxNode(source, raw, position, nothing, true, val) else - str = untokenize(head(raw), include_flag_suff=false) - headsym = !isnothing(str) ? Symbol(str) : - error("Can't untokenize head of kind $(kind(raw))") cs = SyntaxNode[] pos = position if kind(raw) == K"string" && has_flags(head(raw), TRIPLE_STRING_FLAG) @@ -194,7 +191,7 @@ function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, position::In if kind(rawchild) == K"String" val_range = pos:pos + span(rawchild) - 1 push!(strs, source[val_range]) - n = SyntaxNode(source, rawchild, pos, nothing, :leaf, nothing) + n = SyntaxNode(source, rawchild, pos, nothing, true, nothing) push!(cs, n) push!(str_nodes, n) else @@ -217,7 +214,7 @@ function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, position::In pos += rawchild.span end end - node = SyntaxNode(source, raw, position, nothing, headsym, cs) + node = SyntaxNode(source, raw, position, nothing, false, cs) for c in cs c.parent = node end @@ -227,27 +224,27 @@ end is_error(node::SyntaxNode) = is_error(node.raw) is_trivia(node::SyntaxNode) = is_trivia(node.raw) -has_flags(node::SyntaxNode, f) = has_flags(head(node.raw), f) +has_flags(node::SyntaxNode, f) = has_flags(head(node), f) -head(node::SyntaxNode) = node.head +head(node::SyntaxNode) = head(node.raw) kind(node::SyntaxNode) = kind(node.raw) flags(node::SyntaxNode) = flags(node.raw) -haschildren(node::SyntaxNode) = node.head !== :leaf +haschildren(node::SyntaxNode) = !node.is_leaf children(node::SyntaxNode) = haschildren(node) ? node.val::Vector{SyntaxNode} : () span(node::SyntaxNode) = span(node.raw) function interpolate_literal(node::SyntaxNode, val) - @assert node.head == :$ - SyntaxNode(node.source, node.raw, node.position, node.parent, :leaf, val) + @assert kind(node) == K"$" + SyntaxNode(node.source, node.raw, node.position, node.parent, true, val) end function _show_syntax_node(io, current_filename, node, indent) fname = node.source.filename line, col = source_location(node.source, node.position) posstr = "$(lpad(line, 4)):$(rpad(col,3))│$(lpad(node.position,6)):$(rpad(node.position+span(node)-1,6))│" - nodestr = haschildren(node) ? "[$(untokenize(head(node.raw)))]" : + nodestr = haschildren(node) ? "[$(untokenize(head(node)))]" : node.val isa Symbol ? string(node.val) : repr(node.val) treestr = string(indent, nodestr) @@ -274,7 +271,7 @@ function _show_syntax_node_sexpr(io, node) print(io, node.val isa Symbol ? string(node.val) : repr(node.val)) end else - print(io, "(", untokenize(head(node.raw))) + print(io, "(", untokenize(head(node))) first = true for n in children(node) print(io, ' ') @@ -397,22 +394,26 @@ function _to_expr(node::SyntaxNode) args[2], args[1] = args[1], args[2] end loc = source_location(LineNumberNode, node.source, node.position) + + headstr = untokenize(head(node), include_flag_suff=false) + headsym = !isnothing(headstr) ? Symbol(headstr) : + error("Can't untokenize head of kind $(kind(node))") # Convert elements - if head(node) == :macrocall + if headsym == :macrocall insert!(args, 2, loc) - elseif head(node) in (:call, :ref) + elseif headsym in (:call, :ref) # Move parameters block to args[2] if length(args) > 1 && Meta.isexpr(args[end], :parameters) insert!(args, 2, args[end]) pop!(args) end - elseif head(node) in (:tuple, :parameters, :vect) + elseif headsym in (:tuple, :parameters, :vect) # Move parameters blocks to args[1] if length(args) > 1 && Meta.isexpr(args[end], :parameters) pushfirst!(args, args[end]) pop!(args) end - elseif head(node) == :try + elseif headsym == :try # Try children in source order: # try_block catch_var catch_block else_block finally_block # Expr ordering: @@ -438,10 +439,10 @@ function _to_expr(node::SyntaxNode) if else_ !== false push!(args, else_) end - elseif head(node) == :filter + elseif headsym == :filter pushfirst!(args, last(args)) pop!(args) - elseif head(node) == :flatten + elseif headsym == :flatten # The order of nodes inside the generators in Julia's flatten AST # is noncontiguous in the source text, so need to reconstruct # Julia's AST here from our alternative `flatten` expression. @@ -450,13 +451,13 @@ function _to_expr(node::SyntaxNode) gen = Expr(:generator, gen, args[i]) end args = [gen] - elseif head(node) in (:nrow, :ncat) + elseif headsym in (:nrow, :ncat) # For lack of a better place, the dimension argument to nrow/ncat # is stored in the flags pushfirst!(args, numeric_flags(flags(node))) - elseif head(node) == :typed_ncat + elseif headsym == :typed_ncat insert!(args, 2, numeric_flags(flags(node))) - elseif head(node) == :(=) + elseif headsym == :(=) if is_eventually_call(args[1]) if Meta.isexpr(args[2], :block) pushfirst!(args[2].args, loc) @@ -465,7 +466,7 @@ function _to_expr(node::SyntaxNode) args[2] = Expr(:block, loc, args[2]) end end - elseif head(node) == :(->) + elseif headsym == :(->) if Meta.isexpr(args[2], :block) pushfirst!(args[2].args, loc) else @@ -473,11 +474,11 @@ function _to_expr(node::SyntaxNode) args[2] = Expr(:block, loc, args[2]) end end - if head(node) == :inert || (head(node) == :quote && + if headsym == :inert || (headsym == :quote && length(args) == 1 && !(only(args) isa Expr)) return QuoteNode(only(args)) else - return Expr(head(node), args...) + return Expr(headsym, args...) end end From 32bd53e81a6e6a3317a02ecb70abc4e00c7a880d Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Fri, 7 Jan 2022 21:28:48 +1000 Subject: [PATCH 0295/1109] Fix parsing of macrocall followed by a space and tuple For example `@eval (x,y)` --- JuliaSyntax/src/parser.jl | 58 +++++++++++++++++++++----------------- JuliaSyntax/test/parser.jl | 42 ++++++++++++++------------- 2 files changed, 54 insertions(+), 46 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 3094b273ffe9c..2fc483d5a15a3 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -758,6 +758,9 @@ end function is_juxtapose(ps, prev_k, t) k = kind(t) + # FIXME: + # https://github.com/JuliaLang/julia/issues/16356 + # https://github.com/JuliaLang/julia/commit/e3eacbb4a4479a6df4f588089490aeefc6e8cad8 return !t.had_whitespace && (is_number(prev_k) || (!is_number(k) && # disallow "x.3" and "sqrt(2)2" @@ -1129,35 +1132,15 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) # [f (x)] ==> (hcat f x) break end - if k == K"(" - if is_macrocall - # a().@x(y) ==> (macrocall (error (. (call a) (quote x))) y) - finish_macroname(ps, mark, is_valid_modref, macro_name_position) - end - # f(a,b) ==> (call f a b) - # f (a) ==> (call f (error-t) a b) - bump_disallowed_space(ps) - bump(ps, TRIVIA_FLAG) - # Keyword arguments depends on call vs macrocall - # foo(a=1) ==> (call foo (kw a 1)) - # @foo(a=1) ==> (macrocall @foo (= a 1)) - parse_call_arglist(ps, K")", is_macrocall) - emit(ps, mark, is_macrocall ? K"macrocall" : K"call") - if peek(ps) == K"do" - # f(x) do y body end ==> (do (call :f :x) (-> (tuple :y) (block :body))) - bump(ps, TRIVIA_FLAG) - parse_do(ps) - emit(ps, mark, K"do") - end - if is_macrocall - break - end - elseif is_macrocall && (t.had_whitespace || is_closing_token(ps, k)) - # a().@x y ==> (macrocall (error (. (call a) (quote x))) y) + if is_macrocall && (t.had_whitespace || is_closing_token(ps, k)) + # Macro calls with space-separated arguments + # @foo a b ==> (macrocall @foo a b) + # @foo (x) ==> (macrocall @foo x) + # @foo (x,y) ==> (macrocall @foo (tuple x y)) + # a().@x y ==> (macrocall (error (. (call a) (quote x))) y) finish_macroname(ps, mark, is_valid_modref, macro_name_position) with_space_sensitive(ps) do ps # Space separated macro arguments - # @foo a b ==> (macrocall @foo a b) # A.@foo a b ==> (macrocall (. A (quote @foo)) a b) # @A.foo a b ==> (macrocall (. A (quote @foo)) a b) n_args = parse_space_separated_exprs(ps) @@ -1182,6 +1165,29 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) emit(ps, mark, K"macrocall") end break + elseif k == K"(" + if is_macrocall + # a().@x(y) ==> (macrocall (error (. (call a) (quote x))) y) + finish_macroname(ps, mark, is_valid_modref, macro_name_position) + end + # f(a,b) ==> (call f a b) + # f (a) ==> (call f (error-t) a b) + bump_disallowed_space(ps) + bump(ps, TRIVIA_FLAG) + # Keyword arguments depends on call vs macrocall + # foo(a=1) ==> (call foo (kw a 1)) + # @foo(a=1) ==> (macrocall @foo (= a 1)) + parse_call_arglist(ps, K")", is_macrocall) + emit(ps, mark, is_macrocall ? K"macrocall" : K"call") + if peek(ps) == K"do" + # f(x) do y body end ==> (do (call :f :x) (-> (tuple :y) (block :body))) + bump(ps, TRIVIA_FLAG) + parse_do(ps) + emit(ps, mark, K"do") + end + if is_macrocall + break + end elseif k == K"[" if is_macrocall # a().@x[1] ==> FIXME diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index bca4f9bd38be8..4d0520929a593 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -224,28 +224,30 @@ tests = [ "f(x) do y,z body end" => "(do (call f x) (-> (tuple y z) (block body)))" # Keyword arguments depend on call vs macrocall "foo(a=1)" => "(call foo (kw a 1))" - "@foo(a=1)" => """(macrocall @foo (= a 1))""" + "@foo(a=1)" => "(macrocall @foo (= a 1))" # f(x) do y body end ==> (do (call f x) (-> (tuple y) (block body))) - "@foo a b" => """(macrocall @foo a b)""" - "A.@foo a b" => """(macrocall (. A (quote @foo)) a b)""" - "@A.foo a b" => """(macrocall (. A (quote @foo)) a b)""" + "@foo a b" => "(macrocall @foo a b)" + "@foo (x)" => "(macrocall @foo x)" + "@foo (x,y)" => "(macrocall @foo (tuple x y))" + "A.@foo a b" => "(macrocall (. A (quote @foo)) a b)" + "@A.foo a b" => "(macrocall (. A (quote @foo)) a b)" # Special @doc parsing rules - "@doc x\ny" => """(macrocall @doc x y)""" - "A.@doc x\ny" => """(macrocall (. A (quote @doc)) x y)""" - "@A.doc x\ny" => """(macrocall (. A (quote @doc)) x y)""" - "@doc x y\nz" => """(macrocall @doc x y)""" - "@doc x\n\ny" => """(macrocall @doc x)""" - "@doc x\nend" => """(macrocall @doc x)""" + "@doc x\ny" => "(macrocall @doc x y)" + "A.@doc x\ny" => "(macrocall (. A (quote @doc)) x y)" + "@A.doc x\ny" => "(macrocall (. A (quote @doc)) x y)" + "@doc x y\nz" => "(macrocall @doc x y)" + "@doc x\n\ny" => "(macrocall @doc x)" + "@doc x\nend" => "(macrocall @doc x)" # .' discontinued "f.'" => "f (error-t . ')" # Allow `@` in macrocall only in first and last position - "A.B.@x" => """(macrocall (. (. A (quote B)) (quote @x)))""" - "@A.B.x" => """(macrocall (. (. A (quote B)) (quote @x)))""" - "A.@B.x" => """(macrocall (. (. A (quote B)) (error-t) (quote @x)))""" - "A.@. y" => """(macrocall (. A (quote @__dot__)) y)""" - "a().@x(y)" => """(macrocall (error (. (call a) (quote x))) y)""" - "a().@x y" => """(macrocall (error (. (call a) (quote x))) y)""" - "a().@x{y}" => """(macrocall (error (. (call a) (quote x))) (braces y))""" + "A.B.@x" => "(macrocall (. (. A (quote B)) (quote @x)))" + "@A.B.x" => "(macrocall (. (. A (quote B)) (quote @x)))" + "A.@B.x" => "(macrocall (. (. A (quote B)) (error-t) (quote @x)))" + "A.@. y" => "(macrocall (. A (quote @__dot__)) y)" + "a().@x(y)" => "(macrocall (error (. (call a) (quote x))) y)" + "a().@x y" => "(macrocall (error (. (call a) (quote x))) y)" + "a().@x{y}" => "(macrocall (error (. (call a) (quote x))) (braces y))" # array indexing, typed comprehension, etc "a[i]" => "(ref a i)" "a [i]" => "(ref a (error-t) i)" @@ -267,12 +269,12 @@ tests = [ "f'" => "(' f)" "f'ᵀ" => "(call-i f 'ᵀ)" # Curly calls - "@S{a,b}" => """(macrocall @S (braces a b))""" + "@S{a,b}" => "(macrocall @S (braces a b))" "S{a,b}" => "(curly S a b)" "S {a}" => "(curly S (error-t) a)" # String macros - """x"str\"""" => """(macrocall @x_str "str")""" - """x`str`""" => """(macrocall @x_cmd "str")""" + "x\"str\"" => """(macrocall @x_str "str")""" + "x`str`" => """(macrocall @x_cmd "str")""" # Macro sufficies can include keywords and numbers "x\"s\"y" => """(macrocall @x_str "s" "y")""" "x\"s\"end" => """(macrocall @x_str "s" "end")""" From 4b28343e5c33bd2de7788006707d1d0ea1daf98f Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Fri, 7 Jan 2022 21:30:02 +1000 Subject: [PATCH 0296/1109] Rough parsing of numerical values from strings --- JuliaSyntax/src/syntax_tree.jl | 7 +--- JuliaSyntax/src/value_parsing.jl | 70 ++++++++++++++++++++++++-------- 2 files changed, 54 insertions(+), 23 deletions(-) diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index 409a9e35a7e06..f20150729bb3e 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -126,11 +126,8 @@ function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, position::In val_str = source[val_range] # Here we parse the values eagerly rather than representing them as # strings. Maybe this is good. Maybe not. - val = if k in (K"Integer", K"BinInt", K"OctInt", K"HexInt") - julia_string_to_number(Int, val_str, k) - elseif k == K"Float" - # FIXME: Other float types! - julia_string_to_number(Float64, val_str, k) + val = if k in KSet`Integer Float BinInt OctInt HexInt` + julia_string_to_number(val_str, k) elseif k == K"true" true elseif k == K"false" diff --git a/JuliaSyntax/src/value_parsing.jl b/JuliaSyntax/src/value_parsing.jl index 0f29244cd5a2c..186fc35c08808 100644 --- a/JuliaSyntax/src/value_parsing.jl +++ b/JuliaSyntax/src/value_parsing.jl @@ -5,28 +5,62 @@ """ Convert a Julia source code string into a number. """ -function julia_string_to_number(T, str::AbstractString, kind) - # Fix this up... it's barely functional. +function julia_string_to_number(str::AbstractString, kind) + str = replace(str, '_'=>"") if kind == K"Integer" - str = replace(str, '_'=>"") - end - x = Base.parse(T, str) - if kind == K"HexInt" - if length(str) <= 4 - x = UInt8(x) - elseif length(str) <= 6 - x = UInt16(x) - elseif length(str) <= 10 - x = UInt32(x) - elseif length(str) <= 18 - x = UInt64(x) - elseif length(str) <= 34 - x = UInt128(x) + x = Base.tryparse(Int, str) + if Int === Int32 && isnothing(x) + x = Base.tryparse(Int64, str) + end + if isnothing(x) + # TODO: flisp parses BigInt and Int128 as string macros rather than + # literals. Is this necessary or can we get away with using values + # here? + x = Base.tryparse(Int128, str) + if isnothing(x) + x = Base.parse(BigInt, str) + end + end + return x + elseif kind == K"Float" + if 'f' in str + # This is kind of awful. Should we have a separate Float32 literal + # type produced by the lexer? The `f` suffix is nonstandard after all. + return Base.parse(Float32, replace(str, 'f'=>'e')) + else + return Base.parse(Float64, str) + end + elseif kind == K"HexInt" + ndigits = length(str)-2 + return ndigits <= 2 ? Base.parse(UInt8, str) : + ndigits <= 4 ? Base.parse(UInt16, str) : + ndigits <= 8 ? Base.parse(UInt32, str) : + ndigits <= 16 ? Base.parse(UInt64, str) : + ndigits <= 32 ? Base.parse(UInt128, str) : + Base.parse(BigInt, str) + elseif kind == K"BinInt" + ndigits = length(str)-2 + return ndigits <= 8 ? Base.parse(UInt8, str) : + ndigits <= 16 ? Base.parse(UInt16, str) : + ndigits <= 32 ? Base.parse(UInt32, str) : + ndigits <= 64 ? Base.parse(UInt64, str) : + ndigits <= 128 ? Base.parse(UInt128, str) : + Base.parse(BigInt, str) + elseif kind == K"OctInt" + x = Base.tryparse(UInt64, str) + if isnothing(x) + x = Base.tryparse(UInt128, str) + if isnothing(x) + x = Base.parse(BigInt, str) + end else - TODO("BigInt") + x = x <= typemax(UInt8) ? UInt8(x) : + x <= typemax(UInt16) ? UInt16(x) : + x <= typemax(UInt32) ? UInt32(x) : + x end + return x end - x end """ From e3aa4e73397a580b7eaf0af88752a539668df85a Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Sat, 8 Jan 2022 14:33:23 +1000 Subject: [PATCH 0297/1109] Disallow hex/bin/oct floats before juxtaposition --- JuliaSyntax/src/parser.jl | 6 +++--- JuliaSyntax/test/parser.jl | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 2fc483d5a15a3..d6b1770f1b185 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -758,9 +758,6 @@ end function is_juxtapose(ps, prev_k, t) k = kind(t) - # FIXME: - # https://github.com/JuliaLang/julia/issues/16356 - # https://github.com/JuliaLang/julia/commit/e3eacbb4a4479a6df4f588089490aeefc6e8cad8 return !t.had_whitespace && (is_number(prev_k) || (!is_number(k) && # disallow "x.3" and "sqrt(2)2" @@ -768,6 +765,9 @@ function is_juxtapose(ps, prev_k, t) !(is_block_form(prev_k) || is_syntactic_unary_op(prev_k) || is_initial_reserved_word(ps, prev_k) ))) && + # https://github.com/JuliaLang/julia/issues/16356 + # 0xenomorph ==> 0x0e + !(prev_k in KSet`BinInt HexInt OctInt` && k in KSet`Identifier Keyword`) && (!is_operator(k) || is_radical_op(k)) && !is_closing_token(ps, k) && !is_initial_reserved_word(ps, k) diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 4d0520929a593..d291cf3a92854 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -149,6 +149,7 @@ tests = [ "2(x)" => "(call-i 2 * x)" "(2)(3)x" => "(call-i 2 * 3 x)" "(x-1)y" => "(call-i (call-i x - 1) * y)" + "0xenomorph" => "0x0e" # ie, not juxtoposition # errors "\"a\"\"b\"" => "(call-i \"a\" * (error) \"b\")" "\"a\"x" => "(call-i \"a\" * (error) x)" From da1792085ecc3ca56e75d5344474f431d5dcf253 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Sat, 8 Jan 2022 14:41:35 +1000 Subject: [PATCH 0298/1109] Fix backslash escaping of newlines and following indentation --- JuliaSyntax/src/value_parsing.jl | 11 +++++++---- JuliaSyntax/test/runtests.jl | 11 +++++++++++ JuliaSyntax/test/self_parse.jl | 19 ------------------- JuliaSyntax/test/value_parsing.jl | 4 +++- 4 files changed, 21 insertions(+), 24 deletions(-) diff --git a/JuliaSyntax/src/value_parsing.jl b/JuliaSyntax/src/value_parsing.jl index 186fc35c08808..e38ba259b1a76 100644 --- a/JuliaSyntax/src/value_parsing.jl +++ b/JuliaSyntax/src/value_parsing.jl @@ -2,6 +2,8 @@ # This file contains utility functions for converting undecorated source # strings into Julia values. For example, string->number, string unescaping, etc. +is_indentation(c) = c == ' ' || c == '\t' + """ Convert a Julia source code string into a number. """ @@ -176,10 +178,13 @@ function unescape_julia_string(io::IO, str::AbstractString, dedent::Integer) end write(io, UInt8(n)) elseif c == '\n' || c == '\r' - # Remove \n \r and \r\n newlines following \ + # Remove \n \r and \r\n newlines + indentation following \ if c == '\r' && i < lastidx && str[i+1] == '\n' i += 1 end + while i < lastidx && is_indentation(str[i+1]) + i += 1 + end else u = # C escapes c == 'n' ? '\n' : @@ -249,9 +254,7 @@ function triplequoted_string_indentation(strs) if reflen < 0 # Find indentation we'll use as a reference j = i-1 - while j+1 <= lastidx - c = str[j+1] - (c == ' ' || c == '\t') || break + while j < lastidx && is_indentation(str[j+1]) j += 1 end refstr = SubString(str, i, j) diff --git a/JuliaSyntax/test/runtests.jl b/JuliaSyntax/test/runtests.jl index 4a8a20d695c23..f505cd9063340 100644 --- a/JuliaSyntax/test/runtests.jl +++ b/JuliaSyntax/test/runtests.jl @@ -17,6 +17,17 @@ using JuliaSyntax: ParseStream, emit, emit_diagnostic using JuliaSyntax: ParseState +function test_parse_file(root_path, path) + fullpath = joinpath(root_path, path) + if endswith(path, ".jl") && isfile(fullpath) + @testset "Parse $path" begin + code = read(fullpath, String) + @test JuliaSyntax.remove_linenums!(JuliaSyntax.parse_all(Expr, code)) == + JuliaSyntax.remove_linenums!(JuliaSyntax.flisp_parse_all(code)) + end + end +end + # Shortcuts for defining raw syntax nodes # Trivia nodes diff --git a/JuliaSyntax/test/self_parse.jl b/JuliaSyntax/test/self_parse.jl index 83ab4af38c592..25fe88e49edf8 100644 --- a/JuliaSyntax/test/self_parse.jl +++ b/JuliaSyntax/test/self_parse.jl @@ -1,14 +1,3 @@ -function test_parse_file(root_path, path) - fullpath = joinpath(root_path, path) - if endswith(path, ".jl") && isfile(fullpath) - @testset "Parse $path" begin - code = read(fullpath, String) - @test JuliaSyntax.remove_linenums!(JuliaSyntax.parse_all(Expr, code)) == - JuliaSyntax.remove_linenums!(JuliaSyntax.flisp_parse_all(code)) - end - end -end - @testset "JuliaSyntax self-parsing" begin pkgdir = joinpath(@__DIR__, "..") for f in readdir(joinpath(pkgdir, "src")) @@ -20,11 +9,3 @@ end end end -#= -@testset "JuliaSyntax Base parsing" begin - basedir = "/home/chris/dev/julia/base" - for f in readdir(joinpath(basedir)) - test_parse_file(basedir, f) - end -end -=# diff --git a/JuliaSyntax/test/value_parsing.jl b/JuliaSyntax/test/value_parsing.jl index afe611c272087..07cd3bc88cf1d 100644 --- a/JuliaSyntax/test/value_parsing.jl +++ b/JuliaSyntax/test/value_parsing.jl @@ -14,10 +14,12 @@ using JuliaSyntax: triplequoted_string_indentation, # Newline normalization @test unesc("a\nb\rc\r\nd") == "a\nb\nc\nd" - # Removal of backslash-escaped newlines + # Removal of backslash-escaped newlines & indentation @test unesc("a\\\nb") == "ab" @test unesc("a\\\rb") == "ab" @test unesc("a\\\r\nb") == "ab" + @test unesc("a\\\n b") == "ab" + @test unesc("a\\\r\n \tb") == "ab" @test unesc("a\\\n") == "a" @test unesc("a\\\r") == "a" @test unesc("a\\\r\n") == "a" From 8bf56ab83494d33061be720b755211118b8c3732 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Sat, 8 Jan 2022 15:05:21 +1000 Subject: [PATCH 0299/1109] Fix parsing of functions definition without method --- JuliaSyntax/src/parser.jl | 12 ++++++++++-- JuliaSyntax/test/parser.jl | 3 +++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index d6b1770f1b185..61febdaa6ee90 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -1709,12 +1709,20 @@ function parse_function(ps::ParseState) # function begin() end ==> (function (call (error (begin))) (block)) # macro begin() end ==> (macro (call (error (begin))) (block)) bump(ps, error="invalid $(untokenize(word)) name") - parse_call_chain(ps, def_mark) else # function f() end ==> (function (call f) (block)) # function \n f() end ==> (function (call f) (block)) # function $f() end ==> (function (call ($ f)) (block)) - parse_unary_prefix(ps) + parse_identifier_or_interpolate(ps) + end + if peek(ps, skip_newlines=true) == K"end" + # Function definition with no methods + # function f end ==> (function f) + # function f \n\n end ==> (function f) + # function $f end ==> (function ($ f)) + bump(ps, TRIVIA_FLAG) + emit(ps, mark, word) + return end parse_call_chain(ps, def_mark) end diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index d291cf3a92854..a2ccd7abad88e 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -378,6 +378,9 @@ tests = [ "function f() end" => "(function (call f) (block))" "function \n f() end" => "(function (call f) (block))" "function \$f() end" => "(function (call (\$ f)) (block))" + "function f end" => "(function f)" + "function f \n\n end" => "(function f)" + "function \$f end" => "(function (\$ f))" "function f()::T end" => "(function (:: (call f) T) (block))" "function f()::g(T) end" => "(function (:: (call f) (call g T)) (block))" "function f() \n a \n b end" => "(function (call f) (block a b))" From 348718033daa1dec52d1c7b160db3093f0513013 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Sat, 8 Jan 2022 15:28:39 +1000 Subject: [PATCH 0300/1109] Fix parsing of docstrings within module blocks --- JuliaSyntax/src/parser.jl | 6 ++++-- JuliaSyntax/test/parser.jl | 5 ++++- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 61febdaa6ee90..41e4e564a56a1 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -277,6 +277,7 @@ end # # a;b;c ==> (toplevel a b c) # a;;;b;; ==> (toplevel a b) +# "x" a ; "y" b ==> (toplevel (macrocall core_@doc "x" a) (macrocall core_@doc "y" b)) # # flisp: parse-stmts function parse_stmts(ps::ParseState) @@ -1538,7 +1539,8 @@ function parse_resword(ps::ParseState) parse_unary_prefix(ps) end # module A \n a \n b \n end ==> (module true A (block a b)) - parse_block(ps) + # module A \n "x"\na \n end ==> (module true A (block (core_@doc "x" a))) + parse_block(ps, parse_docstring) bump_closing_token(ps, K"end") emit(ps, mark, K"module") elseif word == K"export" @@ -2787,7 +2789,7 @@ function emit_braces(ps, mark, ckind, cflags) end # Parse docstrings attached by a space or single newline -# "doc" foo ==> +# "doc" foo ==> (macrocall core_@doc "doc" foo) # # flisp: parse-docstring function parse_docstring(ps::ParseState, down=parse_eq) diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index a2ccd7abad88e..605df49280b69 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -58,6 +58,8 @@ tests = [ JuliaSyntax.parse_stmts => [ "a;b;c" => "(toplevel a b c)" "a;;;b;;" => "(toplevel a b)" + """ "x" a ; "y" b """ => + """(toplevel (macrocall :(Core.var"@doc") "x" a) (macrocall :(Core.var"@doc") "y" b))""" ], JuliaSyntax.parse_eq => [ # parse_assignment @@ -334,6 +336,7 @@ tests = [ "module do \n end" => "(module true (error (do)) (block))" "module \$A end" => "(module true (\$ A) (block))" "module A \n a \n b \n end" => "(module true A (block a b))" + """module A \n "x"\na\n end""" => """(module true A (block (macrocall :(Core.var"@doc") "x" a)))""" # export "export @a" => "(export @a)" "export a, \n @b" => "(export a @b)" @@ -544,7 +547,7 @@ tests = [ "\"str\"" => "\"str\"" ], JuliaSyntax.parse_docstring => [ - "\"doc\" foo" => "(macrocall :(Core.var\"@doc\") \"doc\" foo)" + """ "doc" foo """ => """(macrocall :(Core.var"@doc") "doc" foo)""" ], ] From 35eacfe852a6763b74d8cdff37f8b595a81954ea Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Sat, 8 Jan 2022 15:49:24 +1000 Subject: [PATCH 0301/1109] Fix where_enabled=true in normal parsing context --- JuliaSyntax/src/parse_stream.jl | 2 +- JuliaSyntax/src/parser.jl | 2 ++ JuliaSyntax/test/parser.jl | 3 ++- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index eb7605199d5dd..8491d5c355668 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -630,7 +630,7 @@ function normal_context(ps::ParseState) ParseState(ps, range_colon_enabled=true, space_sensitive=false, - where_enabled=false, + where_enabled=true, for_generator=false, end_symbol=false, whitespace_newline=false) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 41e4e564a56a1..718797aec6c52 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -1387,6 +1387,8 @@ end # # flisp: parse-resword function parse_resword(ps::ParseState) + # In normal_context + # begin f() where T = x end ==> (block (= (where (call f) T) x)) ps = normal_context(ps) mark = position(ps) word = peek(ps) diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 605df49280b69..1a8ed6b088864 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -285,6 +285,8 @@ tests = [ "x\"s\"10.0" => """(macrocall @x_str "s" 10.0)""" ], JuliaSyntax.parse_resword => [ + # In normal_context + "begin f() where T = x end" => "(block (= (where (call f) T) x))" # block "begin end" => "(block)" "begin a ; b end" => "(block a b)" @@ -343,7 +345,6 @@ tests = [ "export a" => "(export a)" "export \n a" => "(export a)" "export \$a, \$(a*b)" => "(export (\$ a) (\$ (call-i a * b)))" - # import ], JuliaSyntax.parse_if_elseif => [ "if a xx elseif b yy else zz end" => "(if a (block xx) (elseif (block b) (block yy) (block zz)))" From 7df7df0c3204ee0b52edd08e0ae1d32146f19d1b Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Sun, 9 Jan 2022 11:42:45 +1000 Subject: [PATCH 0302/1109] Fix: allow operators in parse_identifier_or_interpolate --- JuliaSyntax/src/parser.jl | 22 +++++++++++++++------- JuliaSyntax/test/parser.jl | 6 ++++-- 2 files changed, 19 insertions(+), 9 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 718797aec6c52..acb9e2e79a2a6 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -1081,9 +1081,12 @@ function parse_identifier_or_interpolate(ps::ParseState, outermost=true) emit(ps, mark, K"$") else parse_atom(ps) - if outermost && !is_identifier(peek_behind(ps)) - emit(ps, mark, K"error", - error="Expected identifier or interpolation syntax") + if outermost + kb = peek_behind(ps) + if !(is_identifier(kb) || is_operator(kb)) + emit(ps, mark, K"error", + error="Expected identifier or interpolation syntax") + end end end end @@ -1546,8 +1549,12 @@ function parse_resword(ps::ParseState) bump_closing_token(ps, K"end") emit(ps, mark, K"module") elseif word == K"export" - # export a - # export a, b, + # export a ==> (export a) + # export @a ==> (export @a) + # export a, \n @b ==> (export a @b) + # export +, == ==> (export + ==) + # export \n a ==> (export a) + # export \$a, \$(a*b) ==> (export (\$ a) (\$ (call-i a * b))) bump(ps, TRIVIA_FLAG) parse_comma_separated(ps, parse_atsym) emit(ps, mark, K"export") @@ -1923,8 +1930,9 @@ function parse_imports(ps::ParseState) has_comma = true end if has_import_prefix || has_comma - # import x, y ==> (import (. x) (. y)) - # import A: x, y ==> (import (: (. A) (. x) (. y))) + # import A, y ==> (import (. A) (. y)) + # import A: x, y ==> (import (: (. A) (. x) (. y))) + # import A: +, == ==> (import (: (. A) (. +) (. ==))) parse_comma_separated(ps, ps1->parse_import(ps1, word, has_import_prefix)) if peek(ps) == K":" # Error recovery diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 1a8ed6b088864..08ab08e430b0a 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -340,9 +340,10 @@ tests = [ "module A \n a \n b \n end" => "(module true A (block a b))" """module A \n "x"\na\n end""" => """(module true A (block (macrocall :(Core.var"@doc") "x" a)))""" # export + "export a" => "(export a)" "export @a" => "(export @a)" "export a, \n @b" => "(export a @b)" - "export a" => "(export a)" + "export +, ==" => "(export + ==)" "export \n a" => "(export a)" "export \$a, \$(a*b)" => "(export (\$ a) (\$ (call-i a * b)))" ], @@ -408,7 +409,8 @@ tests = [ ], JuliaSyntax.parse_imports => [ "import A as B: x" => "(import (: (error (as (. A) B)) (. x)))" - "import x, y" => "(import (. x) (. y))" + "import A, y" => "(import (. A) (. y))" + "import A: +, ==" => "(import (: (. A) (. +) (. ==)))" "import A: x, y" => "(import (: (. A) (. x) (. y)))" "import A: x, B: y" => "(import (: (. A) (. x) (. B) (error-t (. y))))" "import A: x" => "(import (: (. A) (. x)))" From b37a6f7c35dfb6b863d7dd27e03050253d77a77a Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Sun, 9 Jan 2022 15:33:29 +1000 Subject: [PATCH 0303/1109] Notes kinds/sum types vs the Julia type system --- JuliaSyntax/README.md | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index bdc5e9e5378d6..d30a25308ac9b 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -62,6 +62,36 @@ Additionally, green trees are usually designed so that * Nodes are homogenously typed at the language level so they can be efficiently stored and accessed, with the node type held as a "syntax kind" enumeration. +## Syntax Kinds and sum types + +We generally track the type of syntax nodes with a syntax "kind", stored +explicitly in each node an integer tag. This effectively makes the node type a +[sum type](https://blog.waleedkhan.name/union-vs-sum-types/) in the type system +sense, but with the type tracked explicitly outside of Julia's type system. + +Managing the type explicitly brings a few benefits: +* Code and data structures for manipulating syntax nodes is always concretely + typed from the point of view of the compiler. +* We control the data layout, and can pack the kind very efficiently into very + few bits (along with any flags, as desired). +* Predicates such as `is_operator` can be extremely efficient, given that we + know the meaning of the kind's bits. +* The kind can be applied to several different tree data structures, or + manipulated by itself as needed. +* We can generate very efficient pattern matching code. + +There's arguably a few downsides: +* Normal Julia dispatch can't express dispatch over syntax kind. Luckily, + a pattern matching macro can provide a very elegant way of expressing such + algorithms over a non-extensible set of kinds, so this is not a big problem. +* Different node kinds could come with different data fields, but a syntax + tree must have generic fields to cater for all kinds. (Consider as an analogy + the normal Julia AST `QuoteNode` with a single field vs `Expr` with generic + `head` and `args` fields.) This could be a disadvantage for code which + processes one specific kind, but for generic code processing many kinds, + having a generic but *concrete* data layout should bring a performance + advantage. + ## Representing erroneous source code The goal of the parser is to produce well-formed heirarchical structure from From db36b48e9fd05b5e71549a20ca5c495fa66738eb Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Sun, 9 Jan 2022 15:31:53 +1000 Subject: [PATCH 0304/1109] Remove unneded generic kinds and fix associated bugs It turns out that things like K"Keyword" are very easy to accidentally misuse. --- JuliaSyntax/src/parser.jl | 2 +- JuliaSyntax/src/token_kinds.jl | 8 +------- JuliaSyntax/src/tokens.jl | 2 +- 3 files changed, 3 insertions(+), 9 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index acb9e2e79a2a6..f37582639214d 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -768,7 +768,7 @@ function is_juxtapose(ps, prev_k, t) is_initial_reserved_word(ps, prev_k) ))) && # https://github.com/JuliaLang/julia/issues/16356 # 0xenomorph ==> 0x0e - !(prev_k in KSet`BinInt HexInt OctInt` && k in KSet`Identifier Keyword`) && + !(prev_k in KSet`BinInt HexInt OctInt` && (is_identifier(k) || is_keyword(k))) && (!is_operator(k) || is_radical_op(k)) && !is_closing_token(ps, k) && !is_initial_reserved_word(ps, k) diff --git a/JuliaSyntax/src/token_kinds.jl b/JuliaSyntax/src/token_kinds.jl index 963545fb881c3..dac0e4614b663 100644 --- a/JuliaSyntax/src/token_kinds.jl +++ b/JuliaSyntax/src/token_kinds.jl @@ -13,7 +13,6 @@ Dict([ ";" => Ts.SEMICOLON "BEGIN_KEYWORDS" => Ts.begin_keywords -"Keyword" => Ts.KEYWORD "abstract" => Ts.ABSTRACT "as" => Ts.AS "baremodule" => Ts.BAREMODULE @@ -50,17 +49,13 @@ Dict([ "while" => Ts.WHILE "END_KEYWORDS" => Ts.end_keywords +# FIXME: Define precisely what Nothing means; integrate better with other tokens. "BEGIN_CSTPARSER" => Ts.begin_cstparser -"InvisibleBrackets" => Ts.INVISIBLE_BRACKETS "Nothing" => Ts.NOTHING -"Ws" => Ts.WS -"SemicolonWs" => Ts.SEMICOLON_WS "NewlineWs" => Ts.NEWLINE_WS -"EmptyWs" => Ts.EMPTY_WS "END_CSTPARSER" => Ts.end_cstparser "BEGIN_LITERAL" => Ts.begin_literal -"Literal" => Ts.LITERAL "Integer" => Ts.INTEGER "BinInt" => Ts.BIN_INT "HexInt" => Ts.HEX_INT @@ -87,7 +82,6 @@ Dict([ "END_DELIMITERS" => Ts.end_delimiters "BEGIN_OPS" => Ts.begin_ops -"OP" => Ts.OP "..." => Ts.DDDOT # Level 1 diff --git a/JuliaSyntax/src/tokens.jl b/JuliaSyntax/src/tokens.jl index 3616d76cf6147..5ce2057021cbf 100644 --- a/JuliaSyntax/src/tokens.jl +++ b/JuliaSyntax/src/tokens.jl @@ -28,7 +28,7 @@ A set of kinds which can be used with the `in` operator. For example """ macro KSet_cmd(str) kinds = [get(_str_to_kind, s) do - error("unknown token kind K$(repr(str))") + error("unknown token kind KSet`$(repr(str)[2:end-1])`") end for s in split(str)] From 1588db8868c54b490e76e0737bfd35b2491be635 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Sun, 9 Jan 2022 17:55:57 +1000 Subject: [PATCH 0305/1109] Fix triplequoted string indentation again The previous version turned out to be quite broken - initial whitespace is never regarded as indentation in any triple quoted string chunk, as it's always preceded in the source code by a visible token of some kind; either a """ delimiter or $() interpolation. --- JuliaSyntax/src/value_parsing.jl | 90 ++++++++++++++-------- JuliaSyntax/test/value_parsing.jl | 120 +++++++++++++++++++----------- 2 files changed, 135 insertions(+), 75 deletions(-) diff --git a/JuliaSyntax/src/value_parsing.jl b/JuliaSyntax/src/value_parsing.jl index e38ba259b1a76..cc38a6f5b89b4 100644 --- a/JuliaSyntax/src/value_parsing.jl +++ b/JuliaSyntax/src/value_parsing.jl @@ -68,13 +68,10 @@ end """ Process Julia source code escape sequences for raw strings """ -function unescape_raw_string(io::IO, str::AbstractString, is_cmd::Bool, dedent::Integer) +function unescape_raw_string(io::IO, str::AbstractString, is_cmd::Bool, dedent::Integer, skip_initial_newline::Bool) delim = is_cmd ? '`' : '"' i = firstindex(str) lastidx = lastindex(str) - if i <= lastidx && str[i] != '\n' - i += dedent - end while i <= lastidx c = str[i] if c != '\\' @@ -85,9 +82,17 @@ function unescape_raw_string(io::IO, str::AbstractString, is_cmd::Bool, dedent:: end c = '\n' end - write(io, c) - if c == '\n' && i+1 <= lastidx && str[i+1] != '\n' - i += dedent + if c == '\n' + if skip_initial_newline + skip_initial_newline = false + else + write(io, c) + end + if i+1 <= lastidx && str[i+1] != '\n' && str[i+1] != '\r' + i += dedent + end + else + write(io, c) end i = nextind(str, i) continue @@ -117,8 +122,8 @@ end Process Julia source code escape sequences for non-raw strings. `str` should be passed without delimiting quotes. """ -function unescape_julia_string(io::IO, str::AbstractString, dedent::Integer) - i = firstindex(str) + dedent +function unescape_julia_string(io::IO, str::AbstractString, dedent::Integer, skip_initial_newline::Bool) + i = firstindex(str) lastidx = lastindex(str) while i <= lastidx c = str[i] @@ -130,9 +135,17 @@ function unescape_julia_string(io::IO, str::AbstractString, dedent::Integer) end c = '\n' end - write(io, c) - if c == '\n' && i+1 <= lastidx && str[i+1] != '\n' - i += dedent + if c == '\n' + if skip_initial_newline + skip_initial_newline = false + else + write(io, c) + end + if i+1 <= lastidx && str[i+1] != '\n' && str[i+1] != '\r' + i += dedent + end + else + write(io, c) end i = nextind(str, i) continue @@ -209,33 +222,36 @@ function unescape_julia_string(io::IO, str::AbstractString, dedent::Integer) end function unescape_julia_string(str::AbstractString, is_cmd::Bool, - is_raw::Bool, dedent::Integer=0) + is_raw::Bool, dedent::Integer=0, + skip_initial_newline=false) io = IOBuffer() if is_raw - unescape_raw_string(io, str, is_cmd, dedent) + unescape_raw_string(io, str, is_cmd, dedent, skip_initial_newline) else - unescape_julia_string(io, str, dedent) + unescape_julia_string(io, str, dedent, skip_initial_newline) end String(take!(io)) end -# Compute length of longest common prefix of spaces and tabs, in characters +# Compute length of longest common prefix of mixed spaces and tabs, in +# characters (/bytes). # -# This runs *before* normalization of newlines so that unescaping/normalization -# can happen in a single pass. +# Initial whitespace is never regarded as indentation in any triple quoted +# string chunk, as it's always preceded in the source code by a visible token +# of some kind; either a """ delimiter or $() interpolation. +# +# This pass runs *before* normalization of newlines so that +# unescaping/normalization can happen in a single pass. # # TODO: Should we do triplequoted string splitting as part of the main parser? # It would be conceptually clean if the trivial whitespace was emitted as # syntax trivia. # # flisp: triplequoted-string-indentation- -function triplequoted_string_indentation(strs) +function triplequoted_string_indentation(strs, is_raw) if isempty(strs) return 0 end - if last(last(strs)) in ('\n', '\r') - return 0 - end refstr = SubString(strs[1], 1, 0) reflen = -1 for str in strs @@ -243,7 +259,21 @@ function triplequoted_string_indentation(strs) lastidx = lastindex(str) while i <= lastidx c = str[i] - if i == 1 || c == '\n' || c == '\r' + if c == '\\' && !is_raw + # Escaped newlines stop indentation detection for the current + # line but do not start detection of indentation on the next + # line + if i+1 <= lastidx + if str[i+1] == '\n' + i += 1 + elseif str[i+1] == '\r' + i += 1 + if i+1 <= lastidx && str[i+1] == '\n' + i += 1 + end + end + end + elseif c == '\n' || c == '\r' while i <= lastidx c = str[i] (c == '\n' || c == '\r') || break @@ -279,26 +309,26 @@ function triplequoted_string_indentation(strs) i = j end end + else + # A newline directly before the end of the string means a + # delimiter was in column zero, implying zero indentation. + reflen = 0 end end i <= lastidx || break i = nextind(str, i) end end - reflen + max(reflen, 0) end function process_triple_strings!(strs, is_raw) if isempty(strs) return strs end - dedent = triplequoted_string_indentation(strs) + dedent = triplequoted_string_indentation(strs, is_raw) for i = 1:length(strs) - if i == 1 && strs[1][1] == '\n' - strs[i] = unescape_julia_string(SubString(strs[i], 2), false, is_raw, dedent) - else - strs[i] = unescape_julia_string(strs[i], false, is_raw, dedent) - end + strs[i] = unescape_julia_string(strs[i], false, is_raw, dedent, i==1) end strs end diff --git a/JuliaSyntax/test/value_parsing.jl b/JuliaSyntax/test/value_parsing.jl index 07cd3bc88cf1d..c02298ee6f68f 100644 --- a/JuliaSyntax/test/value_parsing.jl +++ b/JuliaSyntax/test/value_parsing.jl @@ -75,52 +75,82 @@ end end @testset "Triple quoted string indentation" begin - @test triplequoted_string_indentation([]) == 0 - - # Spaces or tabs - @test triplequoted_string_indentation([" "]) == 2 - @test triplequoted_string_indentation(["\t "]) == 2 - @test triplequoted_string_indentation([" \t"]) == 2 - @test triplequoted_string_indentation(["\t\t"]) == 2 - - # Various newlines; empty lines ignored - @test triplequoted_string_indentation([" \n\n x"]) == 2 - @test triplequoted_string_indentation([" \n\r x"]) == 2 - @test triplequoted_string_indentation([" \r\n x"]) == 2 - @test triplequoted_string_indentation([" \r\r x"]) == 2 - @test triplequoted_string_indentation(["\n\r\r\n"]) == 0 - # Empty newline at the end not ignored - @test triplequoted_string_indentation([" \n"]) == 0 - @test triplequoted_string_indentation([" \r"]) == 0 - @test triplequoted_string_indentation([" \n\n"]) == 0 - @test triplequoted_string_indentation([" ", " \n"]) == 0 - - # Finds the minimum common prefix - @test triplequoted_string_indentation([" ", " "]) == 2 - @test triplequoted_string_indentation([" ", " "]) == 1 - @test triplequoted_string_indentation([" ", " "]) == 1 - @test triplequoted_string_indentation([" ", " "]) == 1 - @test triplequoted_string_indentation([" \t", " "]) == 1 - @test triplequoted_string_indentation([" ", " \t"]) == 1 - @test triplequoted_string_indentation([" \t", " \t"]) == 2 - @test triplequoted_string_indentation(["\t ", "\t "]) == 2 - @test triplequoted_string_indentation([" \n "]) == 2 - @test triplequoted_string_indentation([" \n "]) == 1 - @test triplequoted_string_indentation([" \n "]) == 1 - @test triplequoted_string_indentation(["\n \n \n "]) == 1 - @test triplequoted_string_indentation([" \n \n "]) == 1 - - # Cases of no indentation - @test triplequoted_string_indentation(["hi"]) == 0 - @test triplequoted_string_indentation(["x\ny", "z"]) == 0 + # Alias for non-raw triple str indentation + triplestr_indent(str) = triplequoted_string_indentation(str, false) + + @test triplestr_indent([]) == 0 + + # Spaces or tabs acceptable + @test triplestr_indent(["\n "]) == 2 + @test triplestr_indent(["\n\t "]) == 2 + @test triplestr_indent(["\n \t"]) == 2 + @test triplestr_indent(["\n\t\t"]) == 2 + + # Start of the string is not indentation, as it's always preceded by a + # delimiter in the source + @test triplestr_indent([" "]) == 0 + @test triplestr_indent([" ", " "]) == 0 + + # Various newlines are allowed. empty lines are ignored + @test triplestr_indent(["\n\n x"]) == 2 + @test triplestr_indent(["\n\r x"]) == 2 + @test triplestr_indent(["\r\n x"]) == 2 + @test triplestr_indent(["\r\r x"]) == 2 + @test triplestr_indent(["\n\r\r\n"]) == 0 + + # Empty line at the end of any chunk implies the next source line started + # with a delimiter, yielding zero indentation + @test triplestr_indent([" \n"]) == 0 + @test triplestr_indent([" \r"]) == 0 + @test triplestr_indent([" \n\n"]) == 0 + @test triplestr_indent([" ", " \n"]) == 0 + @test triplestr_indent([" \n", " "]) == 0 + + # Find the minimum common prefix in one or several chunks + @test triplestr_indent(["\n ", "\n "]) == 2 + @test triplestr_indent(["\n ", "\n "]) == 1 + @test triplestr_indent(["\n ", "\n "]) == 1 + @test triplestr_indent(["\n ", "\n "]) == 1 + @test triplestr_indent(["\n \t", "\n "]) == 1 + @test triplestr_indent(["\n ", "\n \t"]) == 1 + @test triplestr_indent(["\n \t", "\n \t"]) == 2 + @test triplestr_indent(["\n\t ", "\n\t "]) == 2 + @test triplestr_indent(["\n \n "]) == 2 + @test triplestr_indent(["\n \n "]) == 1 + @test triplestr_indent(["\n \n "]) == 1 + # Increasing widths + @test triplestr_indent(["\n\n \n \n "]) == 1 + # Decreasing widths + @test triplestr_indent(["\n \n \n "]) == 1 + + # Some cases of no indentation + @test triplestr_indent(["hi"]) == 0 + @test triplestr_indent(["x\ny", "z"]) == 0 + + # Escaped newlines + @test triplestr_indent(["\\\n "]) == 0 + @test triplestr_indent(["\\\r "]) == 0 + @test triplestr_indent(["\\\r\n "]) == 0 + @test triplestr_indent(["\\\r\n "]) == 0 + @test triplestr_indent(["\n \\\n "]) == 2 + @test triplestr_indent(["\n \\\n "]) == 1 + + # Raw strings don't have escaped newline processing + @test triplequoted_string_indentation(["\n \\\n "], true) == 1 + @test triplequoted_string_indentation(["\n \\\n "], true) == 1 end @testset "Triple quoted string deindentation" begin - @test process_triple_strings!([" x", " y"], false) == ["x", "y"] - @test process_triple_strings!([" x", "y"], false) == [" x", "y"] - @test process_triple_strings!(["\n x", " y"], false) == ["x", "y"] - @test process_triple_strings!([" x", " y\n"], false) == [" x", " y\n"] - @test process_triple_strings!([" \tx", " \ty"], false) == ["x", "y"] - @test process_triple_strings!([" \tx", " y"], false) == ["\tx", " y"] + # Various combinations of dedent + leading newline stripping + @test process_triple_strings!(["\n x", "\n y"], false) == ["x", "\ny"] + @test process_triple_strings!(["\n\tx", "\n\ty"], false) == ["x", "\ny"] + @test process_triple_strings!(["\r x", "\r y"], false) == ["x", "\ny"] + @test process_triple_strings!(["\r x\r y"], false) == ["x\ny"] + @test process_triple_strings!(["\r x\r\r y"], false) == ["x\n\ny"] + @test process_triple_strings!(["\n \t x", "\n \t y"], false) == ["x", "\ny"] + # Cases of no dedent + newline normalization + @test process_triple_strings!(["\n x", "\ny"], false) == [" x", "\ny"] + @test process_triple_strings!(["\nx", "\n y"], false) == ["x", "\n y"] + @test process_triple_strings!(["\n y\n"], false) == [" y\n"] + @test process_triple_strings!(["\n y\r"], false) == [" y\n"] end - From c1601de4185d992a9e89685d8cbd01546d9ab6a0 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Sun, 9 Jan 2022 18:15:09 +1000 Subject: [PATCH 0306/1109] Add test file for parsing Base Not yet connected, as it causes a few failures still. --- JuliaSyntax/test/parse_base.jl | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 JuliaSyntax/test/parse_base.jl diff --git a/JuliaSyntax/test/parse_base.jl b/JuliaSyntax/test/parse_base.jl new file mode 100644 index 0000000000000..5597585165c7a --- /dev/null +++ b/JuliaSyntax/test/parse_base.jl @@ -0,0 +1,7 @@ + +@testset "JuliaSyntax Base parsing" begin + basedir = "/home/chris/dev/julia/base" + for f in readdir(joinpath(basedir)) + test_parse_file(basedir, f) + end +end From d1f423f516494b81619f4a3196d55c5a5ccb86dc Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Sun, 9 Jan 2022 18:37:47 +1000 Subject: [PATCH 0307/1109] Fix parse_docstring to allow interpolated docstrings --- JuliaSyntax/src/parser.jl | 19 ++++++++----------- JuliaSyntax/test/parser.jl | 7 ++++++- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index f37582639214d..4c258a6e905b1 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -2804,31 +2804,28 @@ end # flisp: parse-docstring function parse_docstring(ps::ParseState, down=parse_eq) mark = position(ps) - # TODO? This is not quite equivalent to the flisp parser which accepts - # more than just a string. For example: - #! ("doc") foo ==> (macrocall core_@doc "doc" foo) - # TODO: Also, all these TOMBSTONEs seem kind of inefficient. Perhaps we can - # improve things? - maybe_doc = is_string_delim(peek(ps)) atdoc_mark = bump_invisible(ps, K"TOMBSTONE") down(ps) - if maybe_doc + if peek_behind(ps) in KSet`String string` is_doc = true k = peek(ps) if is_closing_token(ps, k) + # "notdoc" ] ==> "notdoc" is_doc = false elseif k == K"NewlineWs" k2 = peek(ps, 2) if is_closing_token(ps, k2) || k2 == K"NewlineWs" + # "notdoc" \n] ==> "notdoc" + # "notdoc" \n\n foo ==> "notdoc" is_doc = false else # Allow a single newline - # === - # "doc" - # foo - # ==> (macrocall core_@doc "doc" foo) + # "doc" \n foo ==> (macrocall core_@doc "doc" foo) bump(ps, TRIVIA_FLAG) # NewlineWs end + else + # "doc" foo ==> (macrocall core_@doc "doc" foo) + # "doc $x" foo ==> (macrocall core_@doc (string "doc " x) foo) end if is_doc reset_node!(ps, atdoc_mark, kind=K"core_@doc") diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 08ab08e430b0a..29104d8ea66f3 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -550,7 +550,12 @@ tests = [ "\"str\"" => "\"str\"" ], JuliaSyntax.parse_docstring => [ - """ "doc" foo """ => """(macrocall :(Core.var"@doc") "doc" foo)""" + """ "notdoc" ] """ => "\"notdoc\"" + """ "notdoc" \n] """ => "\"notdoc\"" + """ "notdoc" \n\n foo """ => "\"notdoc\"" + """ "doc" \n foo """ => """(macrocall :(Core.var"@doc") "doc" foo)""" + """ "doc" foo """ => """(macrocall :(Core.var"@doc") "doc" foo)""" + """ "doc \$x" foo """ => """(macrocall :(Core.var"@doc") (string "doc " x) foo)""" ], ] From bf171bc60414e3c00cec4377c77f7ace6cb49992 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Sun, 9 Jan 2022 18:45:03 +1000 Subject: [PATCH 0308/1109] Fix triple quoted string initial newline stripping --- JuliaSyntax/src/value_parsing.jl | 8 ++------ JuliaSyntax/test/value_parsing.jl | 30 ++++++++++++++++++------------ 2 files changed, 20 insertions(+), 18 deletions(-) diff --git a/JuliaSyntax/src/value_parsing.jl b/JuliaSyntax/src/value_parsing.jl index cc38a6f5b89b4..d14e27a88aacb 100644 --- a/JuliaSyntax/src/value_parsing.jl +++ b/JuliaSyntax/src/value_parsing.jl @@ -83,9 +83,7 @@ function unescape_raw_string(io::IO, str::AbstractString, is_cmd::Bool, dedent:: c = '\n' end if c == '\n' - if skip_initial_newline - skip_initial_newline = false - else + if i > 1 || !skip_initial_newline write(io, c) end if i+1 <= lastidx && str[i+1] != '\n' && str[i+1] != '\r' @@ -136,9 +134,7 @@ function unescape_julia_string(io::IO, str::AbstractString, dedent::Integer, ski c = '\n' end if c == '\n' - if skip_initial_newline - skip_initial_newline = false - else + if i > 1 || !skip_initial_newline write(io, c) end if i+1 <= lastidx && str[i+1] != '\n' && str[i+1] != '\r' diff --git a/JuliaSyntax/test/value_parsing.jl b/JuliaSyntax/test/value_parsing.jl index c02298ee6f68f..89869dd86e39b 100644 --- a/JuliaSyntax/test/value_parsing.jl +++ b/JuliaSyntax/test/value_parsing.jl @@ -141,16 +141,22 @@ end end @testset "Triple quoted string deindentation" begin - # Various combinations of dedent + leading newline stripping - @test process_triple_strings!(["\n x", "\n y"], false) == ["x", "\ny"] - @test process_triple_strings!(["\n\tx", "\n\ty"], false) == ["x", "\ny"] - @test process_triple_strings!(["\r x", "\r y"], false) == ["x", "\ny"] - @test process_triple_strings!(["\r x\r y"], false) == ["x\ny"] - @test process_triple_strings!(["\r x\r\r y"], false) == ["x\n\ny"] - @test process_triple_strings!(["\n \t x", "\n \t y"], false) == ["x", "\ny"] - # Cases of no dedent + newline normalization - @test process_triple_strings!(["\n x", "\ny"], false) == [" x", "\ny"] - @test process_triple_strings!(["\nx", "\n y"], false) == ["x", "\n y"] - @test process_triple_strings!(["\n y\n"], false) == [" y\n"] - @test process_triple_strings!(["\n y\r"], false) == [" y\n"] + # Weird thing I noticed: In Julia 1.7 this @testset for loop adds an + # absurd amount of testing latency given how trivial it is. Why? Is it + # because of compiler heuristics which try to compile all for loops? + @testset "Raw=$raw" for raw in (false, true) + # Various combinations of dedent + leading newline stripping + @test process_triple_strings!(["\n x", "\n y"], raw) == ["x", "\ny"] + @test process_triple_strings!(["\n\tx", "\n\ty"], raw) == ["x", "\ny"] + @test process_triple_strings!(["\r x", "\r y"], raw) == ["x", "\ny"] + @test process_triple_strings!(["\r x\r y"], raw) == ["x\ny"] + @test process_triple_strings!(["\r x\r\r y"], raw) == ["x\n\ny"] + @test process_triple_strings!(["\n \t x", "\n \t y"], raw) == ["x", "\ny"] + @test process_triple_strings!(["x\n\n y", "\n z"], raw) == ["x\n\ny", "\nz"] + # Cases of no dedent + newline normalization + @test process_triple_strings!(["\n x", "\ny"], raw) == [" x", "\ny"] + @test process_triple_strings!(["\nx", "\n y"], raw) == ["x", "\n y"] + @test process_triple_strings!(["\n y\n"], raw) == [" y\n"] + @test process_triple_strings!(["\n y\r"], raw) == [" y\n"] + end end From 4bea351fc08da255a4d5f731eb0552bcdd8513ae Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Sun, 9 Jan 2022 21:10:57 +1000 Subject: [PATCH 0309/1109] Fix space sensitive parsing of primitive types This allows the bit size to be interpolated. --- JuliaSyntax/src/parser.jl | 9 ++++++--- JuliaSyntax/test/parser.jl | 3 ++- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 4c258a6e905b1..fe59152857421 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -1500,12 +1500,13 @@ function parse_resword(ps::ParseState) emit(ps, mark, K"struct") elseif word == K"primitive" # primitive type A 32 end ==> (primitive A 32) + # primitive type A $N end ==> (primitive A ($ N)) # primitive type A <: B \n 8 \n end ==> (primitive (<: A B) 8) bump(ps, TRIVIA_FLAG) @assert peek(ps) == K"type" bump(ps, TRIVIA_FLAG) - parse_subtype_spec(ps) - parse_cond(ps) + with_space_sensitive(parse_subtype_spec, ps) + with_space_sensitive(parse_cond, ps) bump_closing_token(ps, K"end") emit(ps, mark, K"primitive") elseif word == K"try" @@ -1879,7 +1880,9 @@ function parse_macro_name(ps::ParseState; remap_kind=false) # @. y ==> (macrocall (quote @__dot__) y) bump(ps) else - parse_atom(ps, false) + with_space_sensitive(ps) do ps1 + parse_atom(ps1, false) + end end if remap_kind reset_node!(ps, position(ps), kind=macro_name_kind(peek_behind(ps))) diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 29104d8ea66f3..b3aa949eccf09 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -317,7 +317,8 @@ tests = [ "abstract type A <: B{T,S} end" => "(abstract (<: A (curly B T S)))" "abstract type A < B end" => "(abstract (call-i A < B))" # primitive type - "primitive type A 32 end" => "(primitive A 32)" + "primitive type A 32 end" => "(primitive A 32)" + "primitive type A \$N end" => "(primitive A (\$ N))" "primitive type A <: B \n 8 \n end" => "(primitive (<: A B) 8)" # struct "struct A <: B \n a::X \n end" => "(struct false (<: A B) (block (:: a X)))" From 121229fe086f39bce56201cf5ebfb731cb10dfe9 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Sun, 9 Jan 2022 21:13:36 +1000 Subject: [PATCH 0310/1109] Move ParseState into parser.jl ParseState is very specific to Julia parsing, whereas the ParseStream is somewhat less so and might be generalizable. So it seems ParseState might belong better in with the main body of the parser code. --- JuliaSyntax/src/parse_stream.jl | 121 -------------------------------- JuliaSyntax/src/parser.jl | 121 ++++++++++++++++++++++++++++++++ 2 files changed, 121 insertions(+), 121 deletions(-) diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index 8491d5c355668..431393e2bbafe 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -582,124 +582,3 @@ end #------------------------------------------------------------------------------- -""" -ParseState carries parser context as we recursively descend into the parse -tree. For example, normally `x -y` means `(x) - (y)`, but when parsing matrix -literals we're in `space_sensitive` mode, and `[x -y]` means [(x) (-y)]. -""" -struct ParseState - stream::ParseStream - # Vesion of Julia we're parsing this code for. May be different from VERSION! - julia_version::VersionNumber - - # Disable range colon for parsing ternary conditional operator - range_colon_enabled::Bool - # In space-sensitive mode "x -y" is 2 expressions, not a subtraction - space_sensitive::Bool - # Seeing `for` stops parsing macro arguments and makes a generator - for_generator::Bool - # Treat 'end' like a normal symbol instead of a reserved word - end_symbol::Bool - # Treat newline like ordinary whitespace instead of as a potential separator - whitespace_newline::Bool - # Enable parsing `where` with high precedence - where_enabled::Bool -end - -# Normal context -function ParseState(stream::ParseStream; julia_version=VERSION) - ParseState(stream, julia_version, true, false, false, false, false, true) -end - -function ParseState(ps::ParseState; range_colon_enabled=nothing, - space_sensitive=nothing, for_generator=nothing, - end_symbol=nothing, whitespace_newline=nothing, - where_enabled=nothing) - ParseState(ps.stream, ps.julia_version, - range_colon_enabled === nothing ? ps.range_colon_enabled : range_colon_enabled, - space_sensitive === nothing ? ps.space_sensitive : space_sensitive, - for_generator === nothing ? ps.for_generator : for_generator, - end_symbol === nothing ? ps.end_symbol : end_symbol, - whitespace_newline === nothing ? ps.whitespace_newline : whitespace_newline, - where_enabled === nothing ? ps.where_enabled : where_enabled) -end - -# Functions to change parse state - -function normal_context(ps::ParseState) - ParseState(ps, - range_colon_enabled=true, - space_sensitive=false, - where_enabled=true, - for_generator=false, - end_symbol=false, - whitespace_newline=false) -end - -function with_space_sensitive(f::Function, ps::ParseState) - f(ParseState(ps, - space_sensitive=true, - whitespace_newline=false)) -end - -# Convenient wrappers for ParseStream - -function peek(ps::ParseState, n=1; skip_newlines=nothing) - skip_nl = isnothing(skip_newlines) ? ps.whitespace_newline : skip_newlines - peek(ps.stream, n; skip_newlines=skip_nl) -end - -function peek_token(ps::ParseState, n=1; skip_newlines=nothing) - skip_nl = isnothing(skip_newlines) ? ps.whitespace_newline : skip_newlines - peek_token(ps.stream, n, skip_newlines=skip_nl) -end - -function peek_behind_str(ps::ParseState, args...) - peek_behind_str(ps.stream, args...) -end - -function peek_behind(ps::ParseState, args...) - peek_behind(ps.stream, args...) -end - -function peek_token_behind(ps::ParseState, args...; kws...) - peek_token_behind(ps.stream, args...; kws...) -end - -function bump(ps::ParseState, flags=EMPTY_FLAGS; skip_newlines=nothing, kws...) - skip_nl = isnothing(skip_newlines) ? ps.whitespace_newline : skip_newlines - bump(ps.stream, flags; skip_newlines=skip_nl, kws...) -end - -function bump_trivia(ps::ParseState, args...; kws...) - bump_trivia(ps.stream, args...; kws...) -end - -function bump_invisible(ps::ParseState, args...; kws...) - bump_invisible(ps.stream, args...; kws...) -end - -function bump_glue(ps::ParseState, args...; kws...) - bump_glue(ps.stream, args...; kws...) -end - -function bump_split(ps::ParseState, args...; kws...) - bump_split(ps.stream, args...; kws...) -end - -function reset_node!(ps::ParseState, args...; kws...) - reset_node!(ps.stream, args...; kws...) -end - -function Base.position(ps::ParseState, args...) - position(ps.stream, args...) -end - -function emit(ps::ParseState, args...; kws...) - emit(ps.stream, args...; kws...) -end - -function emit_diagnostic(ps::ParseState, args...; kws...) - emit_diagnostic(ps.stream, args...; kws...) -end - diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index fe59152857421..8c26b4b9a35fa 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -1,3 +1,124 @@ +""" +ParseState carries parser context as we recursively descend into the parse +tree. For example, normally `x -y` means `(x) - (y)`, but when parsing matrix +literals we're in `space_sensitive` mode, and `[x -y]` means [(x) (-y)]. +""" +struct ParseState + stream::ParseStream + # Vesion of Julia we're parsing this code for. May be different from VERSION! + julia_version::VersionNumber + + # Disable range colon for parsing ternary conditional operator + range_colon_enabled::Bool + # In space-sensitive mode "x -y" is 2 expressions, not a subtraction + space_sensitive::Bool + # Seeing `for` stops parsing macro arguments and makes a generator + for_generator::Bool + # Treat 'end' like a normal symbol instead of a reserved word + end_symbol::Bool + # Treat newline like ordinary whitespace instead of as a potential separator + whitespace_newline::Bool + # Enable parsing `where` with high precedence + where_enabled::Bool +end + +# Normal context +function ParseState(stream::ParseStream; julia_version=VERSION) + ParseState(stream, julia_version, true, false, false, false, false, true) +end + +function ParseState(ps::ParseState; range_colon_enabled=nothing, + space_sensitive=nothing, for_generator=nothing, + end_symbol=nothing, whitespace_newline=nothing, + where_enabled=nothing) + ParseState(ps.stream, ps.julia_version, + range_colon_enabled === nothing ? ps.range_colon_enabled : range_colon_enabled, + space_sensitive === nothing ? ps.space_sensitive : space_sensitive, + for_generator === nothing ? ps.for_generator : for_generator, + end_symbol === nothing ? ps.end_symbol : end_symbol, + whitespace_newline === nothing ? ps.whitespace_newline : whitespace_newline, + where_enabled === nothing ? ps.where_enabled : where_enabled) +end + +# Functions to change parse state + +function normal_context(ps::ParseState) + ParseState(ps, + range_colon_enabled=true, + space_sensitive=false, + where_enabled=true, + for_generator=false, + end_symbol=false, + whitespace_newline=false) +end + +function with_space_sensitive(f::Function, ps::ParseState) + f(ParseState(ps, + space_sensitive=true, + whitespace_newline=false)) +end + +# Convenient wrappers for ParseStream + +function peek(ps::ParseState, n=1; skip_newlines=nothing) + skip_nl = isnothing(skip_newlines) ? ps.whitespace_newline : skip_newlines + peek(ps.stream, n; skip_newlines=skip_nl) +end + +function peek_token(ps::ParseState, n=1; skip_newlines=nothing) + skip_nl = isnothing(skip_newlines) ? ps.whitespace_newline : skip_newlines + peek_token(ps.stream, n, skip_newlines=skip_nl) +end + +function peek_behind_str(ps::ParseState, args...) + peek_behind_str(ps.stream, args...) +end + +function peek_behind(ps::ParseState, args...) + peek_behind(ps.stream, args...) +end + +function peek_token_behind(ps::ParseState, args...; kws...) + peek_token_behind(ps.stream, args...; kws...) +end + +function bump(ps::ParseState, flags=EMPTY_FLAGS; skip_newlines=nothing, kws...) + skip_nl = isnothing(skip_newlines) ? ps.whitespace_newline : skip_newlines + bump(ps.stream, flags; skip_newlines=skip_nl, kws...) +end + +function bump_trivia(ps::ParseState, args...; kws...) + bump_trivia(ps.stream, args...; kws...) +end + +function bump_invisible(ps::ParseState, args...; kws...) + bump_invisible(ps.stream, args...; kws...) +end + +function bump_glue(ps::ParseState, args...; kws...) + bump_glue(ps.stream, args...; kws...) +end + +function bump_split(ps::ParseState, args...; kws...) + bump_split(ps.stream, args...; kws...) +end + +function reset_node!(ps::ParseState, args...; kws...) + reset_node!(ps.stream, args...; kws...) +end + +function Base.position(ps::ParseState, args...) + position(ps.stream, args...) +end + +function emit(ps::ParseState, args...; kws...) + emit(ps.stream, args...; kws...) +end + +function emit_diagnostic(ps::ParseState, args...; kws...) + emit_diagnostic(ps.stream, args...; kws...) +end + #------------------------------------------------------------------------------- # Parser Utils From 1ddd29a59b4b3c01f6011b57fe8a559b32ee7bbb Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Wed, 12 Jan 2022 09:33:23 +1000 Subject: [PATCH 0311/1109] Rework parsing of function and macro names There's quite a wide variety of syntax allowed in the function "name" component which makes distinguishing it anonymous function syntax quite hard. Rework the parsing of function names to make this more robust. * Separate macro name parsing as this is simpler - anonymous macros don't make much sense! * Use parse_brackets() directly to allow for more fine-grained control over the the tuple which is emitted for anonymous function arguments * Rework peek_behind to preserve the original (rather than remapped) token kind so we can reject keywords as function names. * Improve parse_identifier_or_interpolate to disallow syntactic operator expressions (in macro names and export/import lists) --- JuliaSyntax/README.md | 31 ++--- JuliaSyntax/src/parse_stream.jl | 54 +++++---- JuliaSyntax/src/parser.jl | 194 ++++++++++++++++++++------------ JuliaSyntax/test/parser.jl | 70 +++++++++--- 4 files changed, 224 insertions(+), 125 deletions(-) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index d30a25308ac9b..e156c66e575c1 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -62,7 +62,7 @@ Additionally, green trees are usually designed so that * Nodes are homogenously typed at the language level so they can be efficiently stored and accessed, with the node type held as a "syntax kind" enumeration. -## Syntax Kinds and sum types +## Syntax kinds and sum types We generally track the type of syntax nodes with a syntax "kind", stored explicitly in each node an integer tag. This effectively makes the node type a @@ -481,26 +481,28 @@ Some resources: # Differences from the flisp parser -## Make parsing decisions earlier +Practically the flisp parser is not quite a classic [recursive descent +parser](https://en.wikipedia.org/wiki/Recursive_descent_parser), because it +often looks back and modifies the output tree it has already produced. We've +tried to eliminate this pattern it favor of lookahead where possible because -The flisp-based parser has many places where it parses an expression and then -optionally rearranges the resulting AST, modifying heads of expressions etc. - -This parser tries hard to avoid that pattern becase * It works poorly when the parser is emitting a stream of node spans rather than eagerly creating a tree data structure. -* It's confusing to re-make parsing decisions +* It's confusing to reason about this kind of code -Often the information required to avoid postprocessing the parse tree is -available early with a bit of restructuring and we make use of this wherever -possible. +However, on occasion it seems to solve genuine ambiguities where Julia code +can't be parsed top-down with finite lookahead. Eg for the `kw` vs `=` +ambiguity within parentheses. In these cases we put up with using the +functions `look_behind` and `reset_node!()`. -## Function names +## Code structure Large structural changes were generally avoided while porting. In particular, nearly all function names for parsing productions are the same with `-` replaced by `_` and predicates prefixed by `is_`. +Some notable differences: + * `parse-arglist` and a parts of `parse-paren-` have been combined into a general function `parse_brackets`. This function deals with all the odd corner cases of how the AST is emitted when mixing `,` and `;` within @@ -508,7 +510,6 @@ replaced by `_` and predicates prefixed by `is_`. - Determining whether `;` are block syntax separators or keyword parameters - Determining whether to emit `parameter` sections based on context - Emitting key-value pairs either as `kw` or `=` depending on context - * The way that `parse-resword` is entered has been rearranged to avoid parsing reserved words with `parse-atom` inside `parse-unary-prefix`. Instead, we detect reserved words and enter `parse_resword` earlier. @@ -576,8 +577,10 @@ parsing `key=val` pairs inside parentheses. (a,b; c,d; e,f) ``` * Long-form anonymous functions have argument lists which are parsed - as tuples rather than argument lists. This leads to more inconsistency in the - use of `kw` for keywords. + as tuples (or blocks!) rather than argument lists and this mess appears to be + papered over as part of lowering. For example, in `function (a;b) end` the + `(a;b)` is parsed as a block! This leads to more inconsistency in the use of + `kw` for keywords. ### Flattened generators diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index 431393e2bbafe..45939a354ecef 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -40,6 +40,7 @@ TODO: Optimize this data structure? It's very large at the moment. """ struct TaggedRange head::SyntaxHead # Kind,flags + orig_kind::Kind # Kind of the original token for leaf tokens, or K"Nothing" first_byte::Int # First byte in the input text last_byte::Int # Last byte in the input text start_mark::Int # Index of first emitted range which this range covers @@ -263,33 +264,41 @@ function peek_behind_str(stream::ParseStream, pos::ParseStreamPosition, str::Str return _peek_equal_to(stream, first_byte(s), span(s), str) end +function _peek_behind_fields(ranges, i) + r = ranges[i] + return (kind=kind(r), + flags=flags(r), + orig_kind=r.orig_kind, + is_leaf=r.start_mark == i) +end + """ -Return the kind of span which was previously inserted into the output, -defaulting to the most previous nontrivia node. + peek_behind(ps; skip_trivia=true) + peek_behind(ps, pos::ParseStreamPosition) -Retroactively inspecting/modifying the parser's output can be confusing, so +Return information about a span which was previously inserted into the output, +defaulting to the most previous nontrivia node when `skip_trivia` is true, or +at the provided position `pos`. + +Retroactively inspecting or modifying the parser's output can be confusing, so using this function should be avoided where possible. """ function peek_behind(stream::ParseStream; skip_trivia::Bool=true) - kind(peek_token_behind(stream; skip_trivia=skip_trivia)) -end - -function peek_token_behind(stream::ParseStream; skip_trivia::Bool=true) if skip_trivia for i = length(stream.ranges):-1:1 - s = stream.ranges[i] - if !is_trivia(head(s)) - return head(s) + r = stream.ranges[i] + if !is_trivia(head(r)) + return _peek_behind_fields(stream.ranges, i) end end elseif !isempty(stream.ranges) - return head(last(stream.ranges)) + return _peek_behind_fields(stream.ranges, lastindex(stream.ranges)) end - return SyntaxHead(K"Nothing", EMPTY_FLAGS) + internal_error("Can't peek behind at start of stream") end function peek_behind(stream::ParseStream, pos::ParseStreamPosition) - return kind(stream.ranges[pos.output_index]) + return _peek_behind_fields(stream.ranges, pos.output_index) end #------------------------------------------------------------------------------- @@ -312,8 +321,8 @@ function _bump_n(stream::ParseStream, n::Integer, flags, remap_kind=K"Nothing") is_trivia = k ∈ (K"Whitespace", K"Comment", K"NewlineWs") f = is_trivia ? TRIVIA_FLAG : flags tok.raw.dotop && (f |= DOTOP_FLAG) - k = (is_trivia || remap_kind == K"Nothing") ? k : remap_kind - range = TaggedRange(SyntaxHead(k, f), first_byte(tok), + outk = (is_trivia || remap_kind == K"Nothing") ? k : remap_kind + range = TaggedRange(SyntaxHead(outk, f), k, first_byte(tok), last_byte(tok), lastindex(stream.ranges)+1) push!(stream.ranges, range) end @@ -375,7 +384,7 @@ lexing ambiguities. There's no special whitespace handling — bump any whitespace if necessary with bump_trivia. """ function bump_glue(stream::ParseStream, kind, flags, num_tokens) - span = TaggedRange(SyntaxHead(kind, flags), + span = TaggedRange(SyntaxHead(kind, flags), K"Nothing", first_byte(stream.lookahead[1]), last_byte(stream.lookahead[num_tokens]), lastindex(stream.ranges) + 1) @@ -397,9 +406,9 @@ example function bump_split(stream::ParseStream, split_spec...) tok = popfirst!(stream.lookahead) fbyte = first_byte(tok) - for (i, (nbyte, kind, flags)) in enumerate(split_spec) + for (i, (nbyte, k, f)) in enumerate(split_spec) lbyte = i == length(split_spec) ? last_byte(tok) : fbyte + nbyte - 1 - push!(stream.ranges, TaggedRange(SyntaxHead(kind, flags), + push!(stream.ranges, TaggedRange(SyntaxHead(k, f), kind(tok), fbyte, lbyte, lastindex(stream.ranges) + 1)) fbyte += nbyte @@ -425,8 +434,8 @@ function reset_node!(stream::ParseStream, mark::ParseStreamPosition; k = isnothing(kind) ? (@__MODULE__).kind(range) : kind f = isnothing(flags) ? (@__MODULE__).flags(range) : flags stream.ranges[mark.output_index] = - TaggedRange(SyntaxHead(k, f), first_byte(range), last_byte(range), - range.start_mark) + TaggedRange(SyntaxHead(k, f), range.orig_kind, + first_byte(range), last_byte(range), range.start_mark) end function Base.position(stream::ParseStream) @@ -442,7 +451,7 @@ should be a previous return value of `position()`. """ function emit(stream::ParseStream, mark::ParseStreamPosition, kind::Kind, flags::RawFlags = EMPTY_FLAGS; error=nothing) - range = TaggedRange(SyntaxHead(kind, flags), mark.input_byte, + range = TaggedRange(SyntaxHead(kind, flags), K"Nothing", mark.input_byte, stream.next_byte-1, mark.output_index+1) if !isnothing(error) _emit_diagnostic(stream, first_byte(range), last_byte(range), error=error) @@ -579,6 +588,3 @@ function parse_all(::Type{GreenNode}, code) stream = parse_all(code) build_tree(GreenNode, stream) end - - -#------------------------------------------------------------------------------- diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 8c26b4b9a35fa..d51874ddae83d 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -78,10 +78,6 @@ function peek_behind(ps::ParseState, args...) peek_behind(ps.stream, args...) end -function peek_token_behind(ps::ParseState, args...; kws...) - peek_token_behind(ps.stream, args...; kws...) -end - function bump(ps::ParseState, flags=EMPTY_FLAGS; skip_newlines=nothing, kws...) skip_nl = isnothing(skip_newlines) ? ps.whitespace_newline : skip_newlines bump(ps.stream, flags; skip_newlines=skip_nl, kws...) @@ -214,7 +210,7 @@ function is_initial_reserved_word(ps::ParseState, k) end function is_contextural_keyword(k) - kind(k) ∈ KSet`mutable primitive abstract` + kind(k) ∈ KSet`as abstract mutable outer primitive type` end function is_reserved_word(k) @@ -825,7 +821,7 @@ function parse_unary_subtype(ps::ParseState) mark = position(ps) bump(ps, TRIVIA_FLAG) parse_where(ps, parse_juxtapose) - if peek_behind(ps) == K"tuple" + if peek_behind(ps).kind == K"tuple" TODO("Can this even happen?") end emit(ps, mark, k) @@ -908,7 +904,7 @@ function parse_juxtapose(ps::ParseState) parse_unary(ps) n_terms = 1 while true - prev_kind = peek_behind(ps) + prev_kind = peek_behind(ps).kind t = peek_token(ps) if !is_juxtapose(ps, prev_kind, t) break @@ -1192,22 +1188,21 @@ end # Parse a symbol or interpolation syntax (a restricted version of # parse_unary_prefix) -function parse_identifier_or_interpolate(ps::ParseState, outermost=true) +function parse_identifier_or_interpolate(ps::ParseState) mark = position(ps) if peek(ps) == K"$" bump(ps, TRIVIA_FLAG) # $a ==> ($ a) # $$a ==> ($ ($ a)) - parse_identifier_or_interpolate(ps, false) + parse_unary_prefix(ps) emit(ps, mark, K"$") else parse_atom(ps) - if outermost - kb = peek_behind(ps) - if !(is_identifier(kb) || is_operator(kb)) - emit(ps, mark, K"error", - error="Expected identifier or interpolation syntax") - end + b = peek_behind(ps) + # export (x::T) ==> (export (error (:: x T))) + # export outer ==> (export outer) + if !b.is_leaf || !(is_identifier(b.kind) || is_operator(b.kind)) + emit(ps, mark, K"error", error="Expected identifier") end end end @@ -1221,7 +1216,7 @@ function finish_macroname(ps, mark, is_valid_modref, macro_name_position, name_kind=nothing) if is_valid_modref if isnothing(name_kind) - name_kind = macro_name_kind(peek_behind(ps, macro_name_position)) + name_kind = macro_name_kind(peek_behind(ps, macro_name_position).kind) end reset_node!(ps, macro_name_position, kind = name_kind) else @@ -1236,14 +1231,14 @@ end # # flisp: parse-call-chain, parse-call-with-initial-ex function parse_call_chain(ps::ParseState, mark, is_macrocall=false) - if is_number(peek_behind(ps)) && peek(ps) == K"(" + if is_number(peek_behind(ps).kind) && peek(ps) == K"(" # juxtaposition with numbers is multiply, not call # 2(x) ==> (* 2 x) return end # source range of the @-prefixed part of a macro macro_atname_range = nothing - kb = peek_behind(ps) + kb = peek_behind(ps).kind is_valid_modref = is_identifier(kb) || kb == K"." # We record the last component of chains of dot-separated identifiers so we # know which identifier was the macro name. @@ -1269,6 +1264,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) # A.@foo a b ==> (macrocall (. A (quote @foo)) a b) # @A.foo a b ==> (macrocall (. A (quote @foo)) a b) n_args = parse_space_separated_exprs(ps) + # TODO: Introduce K"doc" to make this hack less awful. is_doc_macro = peek_behind_str(ps, macro_name_position, "doc") if is_doc_macro && n_args == 1 # Parse extended @doc args on next line @@ -1555,7 +1551,7 @@ function parse_resword(ps::ParseState) # let x=1\n end ==> (let (= x 1) (block)) m = position(ps) n_subexprs = parse_comma_separated(ps, parse_eq_star) - kb = peek_behind(ps) + kb = peek_behind(ps).kind # Wart: This ugly logic seems unfortunate. Why not always emit a block? # let x=1 ; end ==> (let (= x 1) (block)) # let x::1 ; end ==> (let (:: x 1) (block)) @@ -1608,7 +1604,8 @@ function parse_resword(ps::ParseState) # struct A <: B \n a::X \n end ==> (struct false (<: A B) (block (:: a X))) if word == K"mutable" # mutable struct A end ==> (struct true A (block)) - bump(ps, remap_kind=K"true") + bump(ps, TRIVIA_FLAG) + bump_invisible(ps, K"true") else # struct A end ==> (struct false A (block)) bump_invisible(ps, K"false") @@ -1657,7 +1654,8 @@ function parse_resword(ps::ParseState) elseif word in KSet`module baremodule` # module A end ==> (module true A (block)) # baremodule A end ==> (module false A (block)) - bump(ps, remap_kind= (word == K"module") ? K"true" : K"false") + bump(ps, TRIVIA_FLAG) + bump_invisible(ps, (word == K"module") ? K"true" : K"false") if is_reserved_word(peek(ps)) # module do \n end ==> (module true (error do) (block)) bump(ps, error="Invalid module name") @@ -1809,58 +1807,96 @@ end function parse_function(ps::ParseState) mark = position(ps) word = peek(ps) - is_func = word == K"function" + @assert word in KSet`macro function` + is_function = word == K"function" + is_anon_func::Bool = false bump(ps, TRIVIA_FLAG) bump_trivia(ps) def_mark = position(ps) - k = peek(ps) - if k == K"(" - # Wart: flisp parser parses anon function arguments as tuples, roughly - # like `parse_paren(ps)`, but the code to disambiguate those cases - # is kind of awful. - # - # It seems much more consistent to treat them as function argument lists: - # function (x,y) end ==> (function (tuple x y) (block)) - # function (x=1) end ==> (function (tuple (kw x 1)) (block)) - # function (;x=1) end ==> (function (tuple (parameters (kw x 1))) (block)) - bump(ps, TRIVIA_FLAG) - parse_call_arglist(ps, K")", false) - emit(ps, def_mark, K"tuple") - # function (x) body end ==> (function (tuple x) (block body)) - # - # Wart: flisp parser allows the following but it's invalid syntax in lowering - # macro (x) end !=> (macro (tuple x) (block)) - # Fix is simple: - if !is_func - # macro (x) end ==> (macro (error (tuple x)) (block)) - emit(ps, def_mark, K"error", error="Expected macro name") + if !is_function + # Parse macro name + parse_identifier_or_interpolate(ps) + kb = peek_behind(ps).orig_kind + if is_initial_reserved_word(ps, kb) + # macro while(ex) end ==> (macro (call (error while) ex) (block)) + emit(ps, def_mark, K"error", error="Invalid macro name") + else + # macro f() end ==> (macro (call f) (block)) + # macro (:)(ex) end ==> (macro (call : ex) (block)) + # macro (type)(ex) end ==> (macro (call type ex) (block)) end else - if is_keyword(k) - # Forbid things like - # function begin() end ==> (function (call (error (begin))) (block)) - # macro begin() end ==> (macro (call (error (begin))) (block)) - bump(ps, error="invalid $(untokenize(word)) name") - else - # function f() end ==> (function (call f) (block)) - # function \n f() end ==> (function (call f) (block)) - # function $f() end ==> (function (call ($ f)) (block)) - parse_identifier_or_interpolate(ps) - end - if peek(ps, skip_newlines=true) == K"end" - # Function definition with no methods - # function f end ==> (function f) - # function f \n\n end ==> (function f) - # function $f end ==> (function ($ f)) + if peek(ps) == K"(" bump(ps, TRIVIA_FLAG) - emit(ps, mark, word) - return + # When an initial parenthesis is present, we might either have the + # function name or the argument list in an anonymous function. We + # use parse_brackets directly here (rather than dispatching to it + # via parse_atom) so we can distinguish these two cases by peeking + # at the following parenthesis, if present. + # + # The flisp parser disambiguates this case quite differently, + # producing less consistent syntax for anonymous functions. + parse_brackets(ps, K")") do _, _, _ + bump_closing_token(ps, K")") + is_anon_func = peek(ps) != K"(" + return (needs_parameters = is_anon_func, + eq_is_kw_before_semi = is_anon_func, + eq_is_kw_after_semi = is_anon_func) + end + if is_anon_func + # function (x) body end ==> (function (tuple x) (block body)) + # function (x,y) end ==> (function (tuple x y) (block)) + # function (x=1) end ==> (function (tuple (kw x 1)) (block)) + # function (;x=1) end ==> (function (tuple (parameters (kw x 1))) (block)) + emit(ps, def_mark, K"tuple") + else + # function (:)() end ==> (function (call :) (block)) + # function (x::T)() end ==> (function (call (:: x T)) (block)) + # function (::T)() end ==> (function (call (:: T)) (block)) + end + else + parse_unary_prefix(ps) + end + if !is_anon_func + kb = peek_behind(ps).orig_kind + if is_reserved_word(kb) + # function begin() end ==> (function (call (error begin)) (block)) + emit(ps, def_mark, K"error", error="Invalid function name") + else + # function f() end ==> (function (call f) (block)) + # function type() end ==> (function (call type) (block)) + # function \n f() end ==> (function (call f) (block)) + # function $f() end ==> (function (call ($ f)) (block)) + # function (:)() end ==> (function (call :) (block)) + # function (::Type{T})(x) end ==> (function (call (:: (curly Type T)) x) (block)) + end end + end + if peek(ps, skip_newlines=true) == K"end" && !is_anon_func + # Function/macro definition with no methods + # function f end ==> (function f) + # function f \n\n end ==> (function f) + # function $f end ==> (function ($ f)) + # macro f end ==> (macro f) + bump(ps, TRIVIA_FLAG) + emit(ps, mark, word) + return + end + if !is_anon_func + # Parse function argument list + # function f(x,y) end ==> (function (call f x y) (block)) + # function f{T}() end ==> (function (call (curly f T)) (block)) + # function A.f() end ==> (function (call (. A (quote f))) (block)) parse_call_chain(ps, def_mark) + if peek_behind(ps).kind != K"call" + # function f body end ==> (function (error f) (block body)) + emit(ps, def_mark, K"error", + error="Invalid signature in $(untokenize(word)) definition") + end end - if is_func && peek(ps) == K"::" - # Return type + if is_function && peek(ps) == K"::" + # Function return type # function f()::T end ==> (function (:: (call f) T) (block)) # function f()::g(T) end ==> (function (:: (call f) (call g T)) (block)) bump(ps, TRIVIA_FLAG) @@ -1868,6 +1904,7 @@ function parse_function(ps::ParseState) emit(ps, def_mark, K"::") end if peek(ps) == K"where" + # Function signature where syntax # function f() where {T} end ==> (function (where (call f) T) (block)) # function f() where T end ==> (function (where (call f) T) (block)) parse_where_chain(ps, def_mark) @@ -2006,7 +2043,7 @@ function parse_macro_name(ps::ParseState; remap_kind=false) end end if remap_kind - reset_node!(ps, position(ps), kind=macro_name_kind(peek_behind(ps))) + reset_node!(ps, position(ps), kind=macro_name_kind(peek_behind(ps).kind)) end end @@ -2734,14 +2771,14 @@ function parse_string(ps::ParseState) m = position(ps) parse_atom(ps) if ps.julia_version >= v"1.6" - head = peek_token_behind(ps) - if kind(head) == K"String" + prev = peek_behind(ps) + if prev.kind == K"String" # Wrap interpolated literal strings in (string) so we can # distinguish them from the surrounding text (issue #38501) # "hi$("ho")" ==> (string "hi" (string "ho")) # "hi$("""ho""")" ==> (string "hi" (string-s "ho")) #v1.5: "hi$("ho")" ==> (string "hi" "ho") - emit(ps, m, K"string", flags(head)) + emit(ps, m, K"string", prev.flags) end end elseif is_identifier(k) @@ -2858,14 +2895,24 @@ function parse_atom(ps::ParseState, check_identifiers=true) bump(ps) t = peek_token(ps) if !t.had_whitespace && !(is_operator(kind(t)) || is_non_keyword_closer(t)) + # var"x"end ==> (error (end)) + # var"x"1 ==> (error 1) + # var"x"y ==> (error y) bump(ps, error="suffix not allowed after var\"...\" syntax") + else + # var"x") ==> x + # var"x"+ ==> x end elseif is_operator(leading_kind) - # Operators and keywords are generally turned into identifiers if used - # as atoms. if check_identifiers && is_syntactic_operator(leading_kind) + # += ==> (error +=) + # .+= ==> (error .+=) bump(ps, error="invalid identifier") else + # + ==> + + # ~ ==> ~ + # Quoted syntactic operators allowed + # :+= ==> (quote +=) bump(ps) end elseif is_keyword(leading_kind) @@ -2873,7 +2920,9 @@ function parse_atom(ps::ParseState, check_identifiers=true) # :(end) ==> (quote (error end)) bump(ps, error="invalid identifier") else + # Remap keywords to identifiers. # :end ==> (quote end) + # :<: ==> (quote <:) bump(ps, remap_kind=K"Identifier") end elseif leading_kind == K"(" # parens or tuple @@ -2889,10 +2938,15 @@ function parse_atom(ps::ParseState, check_identifiers=true) elseif is_string_delim(leading_kind) parse_string(ps) elseif leading_kind == K"@" # macro call + # Macro names can be keywords + # @end x ==> (macrocall @end x) + # @. x y ==> (macrocall @__dot__ x y) bump(ps, TRIVIA_FLAG) parse_macro_name(ps) parse_call_chain(ps, mark, true) elseif leading_kind in KSet`\` \`\`\`` + # `cmd` ==> (macrocall core_@cmd "cmd") + # ```cmd``` ==> (macrocall core_@cmd "cmd"-s) bump_invisible(ps, K"core_@cmd") parse_raw_string(ps) emit(ps, mark, K"macrocall") @@ -2930,7 +2984,7 @@ function parse_docstring(ps::ParseState, down=parse_eq) mark = position(ps) atdoc_mark = bump_invisible(ps, K"TOMBSTONE") down(ps) - if peek_behind(ps) in KSet`String string` + if peek_behind(ps).kind in KSet`String string` is_doc = true k = peek(ps) if is_closing_token(ps, k) diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index b3aa949eccf09..07a5cbf85b74f 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -347,6 +347,8 @@ tests = [ "export +, ==" => "(export + ==)" "export \n a" => "(export a)" "export \$a, \$(a*b)" => "(export (\$ a) (\$ (call-i a * b)))" + "export (x::T)" => "(export (error (:: x T)))" + "export outer" => "(export outer)" ], JuliaSyntax.parse_if_elseif => [ "if a xx elseif b yy else zz end" => "(if a (block xx) (elseif (block b) (block yy) (block zz)))" @@ -374,21 +376,38 @@ tests = [ "const x" => "(const (error x (error)))" ], JuliaSyntax.parse_function => [ - "function (x) body end" => "(function (tuple x) (block body))" - "macro (x) end" => "(macro (error (tuple x)) (block))" - "function (x,y) end" => "(function (tuple x y) (block))" - "function (x=1) end" => "(function (tuple (kw x 1)) (block))" - "function (;x=1) end" => "(function (tuple (parameters (kw x 1))) (block))" - "function begin() end" => "(function (call (error (begin))) (block))" - "macro begin() end" => "(macro (call (error (begin))) (block))" - "function f() end" => "(function (call f) (block))" - "function \n f() end" => "(function (call f) (block))" - "function \$f() end" => "(function (call (\$ f)) (block))" - "function f end" => "(function f)" - "function f \n\n end" => "(function f)" - "function \$f end" => "(function (\$ f))" + "macro while(ex) end" => "(macro (call (error while) ex) (block))" + "macro f() end" => "(macro (call f) (block))" + "macro (:)(ex) end" => "(macro (call : ex) (block))" + "macro (type)(ex) end" => "(macro (call type ex) (block))" + "function (x) body end"=> "(function (tuple x) (block body))" + "function (x,y) end" => "(function (tuple x y) (block))" + "function (x=1) end" => "(function (tuple (kw x 1)) (block))" + "function (;x=1) end" => "(function (tuple (parameters (kw x 1))) (block))" + "function (:)() end" => "(function (call :) (block))" + "function (x::T)() end"=> "(function (call (:: x T)) (block))" + "function (::T)() end" => "(function (call (:: T)) (block))" + "function begin() end" => "(function (call (error begin)) (block))" + "function f() end" => "(function (call f) (block))" + "function type() end" => "(function (call type) (block))" + "function \n f() end" => "(function (call f) (block))" + "function \$f() end" => "(function (call (\$ f)) (block))" + "function (:)() end" => "(function (call :) (block))" + "function (::Type{T})(x) end" => "(function (call (:: (curly Type T)) x) (block))" + # Function/macro definition with no methods + "function f end" => "(function f)" + "function f \n\n end" => "(function f)" + "function \$f end" => "(function (\$ f))" + "macro f end" => "(macro f)" + # Function argument list + "function f(x,y) end" => "(function (call f x y) (block))" + "function f{T}() end" => "(function (call (curly f T)) (block))" + "function A.f() end" => "(function (call (. A (quote f))) (block))" + "function f body end" => "(function (error f) (block body))" "function f()::T end" => "(function (:: (call f) T) (block))" "function f()::g(T) end" => "(function (:: (call f) (call g T)) (block))" + "function f() where {T} end" => "(function (where (call f) T) (block))" + "function f() where T end" => "(function (where (call f) T) (block))" "function f() \n a \n b end" => "(function (call f) (block a b))" "function f() end" => "(function (call f) (block))" ], @@ -480,14 +499,24 @@ tests = [ # Literal colons ":)" => ":" ": end" => ":" + # var syntax + """var"x"end""" => "x (error (end))" + """var"x"1""" => "x (error 1)" + """var"x"y""" => "x (error y)" + """var"x")""" => "x" + """var"x"+""" => "x" + # Syntactic operators + "+=" => "(error +=)" + ".+=" => "(error .+=)" + # Normal operators + "+" => "+" + "~" => "~" + # Quoted syntactic operators allowed + ":+=" => "(quote +=)" # Special symbols quoted ":end" => "(quote end)" ":(end)" => "(quote (error (end)))" ":<:" => "(quote <:)" - # Macro names can be keywords - "@end x" => """(macrocall @end x)""" - # __dot__ macro - "@. x y" => """(macrocall @__dot__ x y)""" # parse_cat "[]" => "(vect)" "[x,]" => "(vect x)" @@ -511,6 +540,13 @@ tests = [ # braces "{x y}" => "(bracescat (row x y))" "{x ;;; y}" => "(bracescat (nrow-3 x y))" + # Macro names can be keywords + "@end x" => "(macrocall @end x)" + # __dot__ macro + "@. x y" => "(macrocall @__dot__ x y)" + # cmd strings + "`cmd`" => "(macrocall :(Core.var\"@cmd\") \"cmd\")" + "```cmd```" => "(macrocall :(Core.var\"@cmd\") \"cmd\")" # Errors ": foo" => "(quote (error-t) foo)" ], From b6b6c8d53bb1ad363afddf547780c8f211a59694 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Wed, 12 Jan 2022 11:52:32 +1000 Subject: [PATCH 0312/1109] =?UTF-8?q?Parse=20Base=20=F0=9F=8E=89?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Many small fixes to correctly parse Base (some failures remain in the stdlib and Base tests). * Allow word operators as identifiers * Allow quoted .= * (x...;) means tuple, not a block * Fix parsing of space-separated strings in nested space sensitive contexts * Allow redundant semicolons in abstract and primitive types * Allow empty catch block * Remap interpolated contextural keywords to identifier names in parse_string * Fix parsing of empty cmd `` * Fix representation of large integers in Expr --- JuliaSyntax/src/parser.jl | 115 +++++++++++++++++++---------- JuliaSyntax/src/source_files.jl | 8 +- JuliaSyntax/src/syntax_tree.jl | 23 ++++-- JuliaSyntax/test/parse_base.jl | 7 -- JuliaSyntax/test/parse_packages.jl | 21 ++++++ JuliaSyntax/test/parser.jl | 29 ++++++-- JuliaSyntax/test/runtests.jl | 24 +++++- JuliaSyntax/test/self_parse.jl | 11 --- 8 files changed, 166 insertions(+), 72 deletions(-) delete mode 100644 JuliaSyntax/test/parse_base.jl create mode 100644 JuliaSyntax/test/parse_packages.jl delete mode 100644 JuliaSyntax/test/self_parse.jl diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index d51874ddae83d..690ce74fe0ee4 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -147,6 +147,12 @@ function bump_closing_token(ps, closing_kind) end end +function bump_semicolon_trivia(ps) + while peek(ps) in KSet`; NewlineWs` + bump(ps, TRIVIA_FLAG) + end +end + # Read tokens until we find an expected closing token. # Bump the big pile of resulting tokens as a single nontrivia error token function recover(is_closer::Function, ps, flags=EMPTY_FLAGS; mark = position(ps), error="unexpected tokens") @@ -185,6 +191,10 @@ end # # All these take either a raw kind or a token. +function is_plain_equals(t) + kind(t) == K"=" && !is_decorated(t) +end + function is_closing_token(ps::ParseState, k) k = kind(k) return k in KSet`else elseif catch finally , ) ] } ; EndMarker` || @@ -266,12 +276,17 @@ function is_both_unary_and_binary(k) k in KSet`+ - ⋆ ± ∓` # dotop allowed end +function is_word_operator(k) + kind(k) in KSet`in isa where` +end + # operators handled by parse_unary at the start of an expression function is_initial_operator(k) k = kind(k) # TODO(jb): `?` should probably not be listed here except for the syntax hack in osutils.jl is_operator(k) && - !(k in KSet`: ' .' ?`) && + !is_word_operator(k) && + !(k in KSet`: ' .' ?`) && !is_syntactic_unary_op(k) && !is_syntactic_operator(k) end @@ -475,7 +490,7 @@ function parse_assignment(ps::ParseState, down, equals_is_kw::Bool) # a += b ==> (+= a b) bump(ps, TRIVIA_FLAG) parse_assignment(ps, down, equals_is_kw) - plain_eq = (k == K"=" && !is_dotted(t)) + plain_eq = is_plain_equals(t) equals_pos = emit(ps, mark, plain_eq && equals_is_kw ? K"kw" : k, is_dotted(t) ? DOTOP_FLAG : EMPTY_FLAGS) return plain_eq ? equals_pos : NO_POSITION @@ -502,7 +517,7 @@ function parse_comma(ps::ParseState, do_emit=true) end bump(ps, TRIVIA_FLAG) n_commas += 1 - if peek_token(ps) == K"=" + if is_plain_equals(peek_token(ps)) # Allow trailing comma before `=` # x, = xs ==> (tuple x) continue @@ -940,6 +955,9 @@ function parse_unary(ps::ParseState) bump_trivia(ps) k = peek(ps) if !is_initial_operator(k) + # :T ==> (quote T) + # in::T ==> (:: in T) + # isa::T ==> (:: isa T) parse_factor(ps) return end @@ -1006,13 +1024,11 @@ function parse_unary_call(ps::ParseState) elseif k2 == K"(" # Cases like +(a;b) are ambiguous: are they prefix calls to + with b as # a keyword argument, or is `a;b` a block? We resolve this with a - # simple heuristic: if there were any commas, it was a function call. - # + # simple heuristic: if there were any commas (or an initial splat), it + # was a function call. # # (The flisp parser only considers commas before `;` and thus gets this # last case wrong) - # - bump(ps, op_tok_flags) # Setup possible whitespace error between operator and ( @@ -1025,8 +1041,8 @@ function parse_unary_call(ps::ParseState) bump(ps, TRIVIA_FLAG) # ( is_call = false is_block = false - parse_brackets(ps, K")") do had_commas, num_semis, num_subexprs - is_call = had_commas + parse_brackets(ps, K")") do had_commas, had_splat, num_semis, num_subexprs + is_call = had_commas || had_splat is_block = !is_call && num_semis > 0 bump_closing_token(ps, K")") return (needs_parameters=is_call, @@ -1047,6 +1063,7 @@ function parse_unary_call(ps::ParseState) # Prefix function calls for operators which are both binary and unary # +(a,b) ==> (call + a b) # +(a=1,) ==> (call + (kw a 1)) + # +(a...) ==> (call + (... a)) # +(a;b,c) ==> (call + a (parameters b c)) # Prefix calls have higher precedence than ^ # +(a,b)^2 ==> (call-i (call + a b) ^ 2) @@ -1247,17 +1264,13 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) this_iter_valid_modref = false t = peek_token(ps) k = kind(t) - if (ps.space_sensitive && t.had_whitespace && - k in KSet`( [ { \ ' Char " """ \` \`\`\``) - # [f (x)] ==> (hcat f x) - break - end if is_macrocall && (t.had_whitespace || is_closing_token(ps, k)) # Macro calls with space-separated arguments - # @foo a b ==> (macrocall @foo a b) + # @foo a b ==> (macrocall @foo a b) # @foo (x) ==> (macrocall @foo x) # @foo (x,y) ==> (macrocall @foo (tuple x y)) # a().@x y ==> (macrocall (error (. (call a) (quote x))) y) + # [@foo "x"] ==> (vect (macrocall @foo "x")) finish_macroname(ps, mark, is_valid_modref, macro_name_position) with_space_sensitive(ps) do ps # Space separated macro arguments @@ -1286,6 +1299,11 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) emit(ps, mark, K"macrocall") end break + elseif (ps.space_sensitive && t.had_whitespace && + k in KSet`( [ { \ Char " """ \` \`\`\``) + # [f (x)] ==> (hcat f x) + # [f "x"] ==> (hcat f "x") + break elseif k == K"(" if is_macrocall # a().@x(y) ==> (macrocall (error (. (call a) (quote x))) y) @@ -1462,8 +1480,10 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) elseif k in KSet` " """ \` \`\`\` ` && !t.had_whitespace && is_valid_modref # Custom string and command literals - # x"str" ==> (macrocall x_str "str") - # x`str` ==> (macrocall x_cmd "str") + # x"str" ==> (macrocall @x_str "str") + # x`str` ==> (macrocall @x_cmd "str") + # x"" ==> (macrocall @x_str "") + # x`` ==> (macrocall @x_cmd "") # Use a special token kind for string and cmd macro names so the # names can be expanded later as necessary. @@ -1474,10 +1494,10 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) k = kind(t) if !t.had_whitespace && (k == K"Identifier" || is_keyword(k) || is_number(k)) # Macro sufficies can include keywords and numbers - # x"s"y ==> (macrocall x_str "s" "y") - # x"s"end ==> (macrocall x_str "s" "end") - # x"s"2 ==> (macrocall x_str "s" 2) - # x"s"10.0 ==> (macrocall x_str "s" 10.0) + # x"s"y ==> (macrocall @x_str "s" "y") + # x"s"end ==> (macrocall @x_str "s" "end") + # x"s"2 ==> (macrocall @x_str "s" 2) + # x"s"10.0 ==> (macrocall @x_str "s" 10.0) suffix_kind = (k == K"Identifier" || is_keyword(k)) ? K"String" : k bump(ps, remap_kind=suffix_kind) end @@ -1589,6 +1609,7 @@ function parse_resword(ps::ParseState) elseif word == K"abstract" # Abstract type definitions # abstract type A end ==> (abstract A) + # abstract type A ; end ==> (abstract A) # abstract type \n\n A \n\n end ==> (abstract A) # abstract type A <: B end ==> (abstract (<: A B)) # abstract type A <: B{T,S} end ==> (abstract (<: A (curly B T S))) @@ -1598,6 +1619,7 @@ function parse_resword(ps::ParseState) @assert peek(ps) == K"type" bump(ps, TRIVIA_FLAG) parse_subtype_spec(ps) + bump_semicolon_trivia(ps) bump_closing_token(ps, K"end") emit(ps, mark, K"abstract") elseif word in KSet`struct mutable` @@ -1618,6 +1640,7 @@ function parse_resword(ps::ParseState) emit(ps, mark, K"struct") elseif word == K"primitive" # primitive type A 32 end ==> (primitive A 32) + # primitive type A 32 ; end ==> (primitive A 32) # primitive type A $N end ==> (primitive A ($ N)) # primitive type A <: B \n 8 \n end ==> (primitive (<: A B) 8) bump(ps, TRIVIA_FLAG) @@ -1625,6 +1648,7 @@ function parse_resword(ps::ParseState) bump(ps, TRIVIA_FLAG) with_space_sensitive(parse_subtype_spec, ps) with_space_sensitive(parse_cond, ps) + bump_semicolon_trivia(ps) bump_closing_token(ps, K"end") emit(ps, mark, K"primitive") elseif word == K"try" @@ -1837,7 +1861,7 @@ function parse_function(ps::ParseState) # # The flisp parser disambiguates this case quite differently, # producing less consistent syntax for anonymous functions. - parse_brackets(ps, K")") do _, _, _ + parse_brackets(ps, K")") do _, _, _, _ bump_closing_token(ps, K")") is_anon_func = peek(ps) != K"(" return (needs_parameters = is_anon_func, @@ -1990,10 +2014,10 @@ function parse_catch(ps::ParseState) bump(ps, TRIVIA_FLAG) k = peek(ps) if k in KSet`NewlineWs ;` || is_closing_token(ps, k) + # try x catch end ==> (try (block x) false (block) false false) # try x catch ; y end ==> (try (block x) false (block y) false false) - # try x catch \n y end ==> (try (block x) false (block y) false false) + # try x catch \n y end ==> (try (block x) false (block y) false false) bump_invisible(ps, K"false") - bump(ps, TRIVIA_FLAG) else # try x catch e y end ==> (try (block x) e (block y) false false) parse_identifier_or_interpolate(ps) @@ -2284,7 +2308,7 @@ end function parse_call_arglist(ps::ParseState, closer, is_macrocall) ps = ParseState(ps, for_generator=true) - parse_brackets(ps, closer) do _, _, _ + parse_brackets(ps, closer) do _, _, _, _ bump_closing_token(ps, closer) return (needs_parameters=true, eq_is_kw_before_semi=!is_macrocall, @@ -2303,7 +2327,7 @@ function parse_vect(ps::ParseState, closer) # [x,y ; z] ==> (vect x y (parameters z)) # [x=1, y=2] ==> (vect (= x 1) (= y 2)) # [x=1, ; y=2] ==> (vect (= x 1) (parameters (= y 2))) - parse_brackets(ps, closer) do _, _, _ + parse_brackets(ps, closer) do _, _, _, _ bump_closing_token(ps, closer) return (needs_parameters=true, eq_is_kw_before_semi=false, @@ -2602,12 +2626,8 @@ function parse_paren(ps::ParseState, check_identifiers=true) initial_semi = peek(ps) == K";" is_tuple = false is_block = false - parse_brackets(ps, K")") do had_commas, num_semis, num_subexprs - # Parentheses used for grouping - # (a * b) ==> (call-i * a b) - # (a=1) ==> (= a 1) - # (x) ==> x - is_tuple = had_commas || + parse_brackets(ps, K")") do had_commas, had_splat, num_semis, num_subexprs + is_tuple = had_commas || (had_splat && num_semis >= 1) || (initial_semi && (num_semis == 1 || num_subexprs > 0)) is_block = num_semis > 0 bump_closing_token(ps, K")") @@ -2626,6 +2646,8 @@ function parse_paren(ps::ParseState, check_identifiers=true) # (; a=1) ==> (tuple (parameters (kw a 1))) # # Extra credit: nested parameters and frankentuples + # (x...;) ==> (tuple (... x) (parameters)) + # (x...; y) ==> (tuple (... x) (parameters y)) # (; a=1; b=2) ==> (tuple (parameters (kw a 1) (parameters (kw b 2)))) # (a; b; c,d) ==> (tuple a (parameters b (parameters c d))) # (a=1, b=2; c=3) ==> (tuple (= a 1) (= b 2) (parameters (kw c 3))) @@ -2637,6 +2659,12 @@ function parse_paren(ps::ParseState, check_identifiers=true) # (a;b;;c) ==> (block a b c) # (a=1; b=2) ==> (block (= a 1) (= b 2)) emit(ps, mark, K"block") + else + # Parentheses used for grouping + # (a * b) ==> (call-i * a b) + # (a=1) ==> (= a 1) + # (x) ==> x + # (a...) ==> (... a) end end end @@ -2680,6 +2708,7 @@ function parse_brackets(after_parse::Function, num_subexprs = 0 num_semis = 0 had_commas = false + had_splat = false while true bump_trivia(ps) k = peek(ps) @@ -2696,9 +2725,12 @@ function parse_brackets(after_parse::Function, bump(ps, TRIVIA_FLAG) bump_trivia(ps) else - num_subexprs += 1 mark = position(ps) eq_pos = parse_eq_star(ps) + num_subexprs += 1 + if num_subexprs == 1 + had_splat = peek_behind(ps).kind == K"..." + end if eq_pos != NO_POSITION push!(eq_positions, eq_pos) end @@ -2727,7 +2759,7 @@ function parse_brackets(after_parse::Function, end end end - actions = after_parse(had_commas, num_semis, num_subexprs) + actions = after_parse(had_commas, had_splat, num_semis, num_subexprs) if num_semis == 0 last_eq_before_semi = length(eq_positions) end @@ -2781,9 +2813,10 @@ function parse_string(ps::ParseState) emit(ps, m, K"string", prev.flags) end end - elseif is_identifier(k) + elseif is_identifier(k) || is_keyword(k) # "a $foo b" ==> (string "a " foo " b") - bump(ps) + # "$outer" ==> (string outer) + parse_atom(ps) else bump_invisible(ps, K"error", error="Identifier or parenthesized expression expected after \$ in string") @@ -2827,7 +2860,7 @@ function parse_raw_string(ps::ParseState) bump(ps, flags) else outk = delim_k in KSet`" """` ? K"String" : - delim_k == KSet`\` \`\`\`` ? K"CmdString" : + delim_k in KSet`\` \`\`\`` ? K"CmdString" : internal_error("unexpected delimiter ", delim_k) bump_invisible(ps, outk, flags) end @@ -2887,7 +2920,7 @@ function parse_atom(ps::ParseState, check_identifiers=true) parse_atom(ParseState(ps, end_symbol=false), false) end emit(ps, mark, K"quote") - elseif leading_kind == K"=" + elseif leading_kind == K"=" && is_plain_equals(peek_token(ps)) bump(ps, TRIVIA_FLAG, error="unexpected `=`") elseif leading_kind == K"Identifier" bump(ps) @@ -2913,6 +2946,7 @@ function parse_atom(ps::ParseState, check_identifiers=true) # ~ ==> ~ # Quoted syntactic operators allowed # :+= ==> (quote +=) + # :.= ==> (quote .=) bump(ps) end elseif is_keyword(leading_kind) @@ -2935,8 +2969,6 @@ function parse_atom(ps::ParseState, check_identifiers=true) bump(ps, TRIVIA_FLAG) ckind, cflags = parse_cat(ps, K"}", ps.end_symbol) emit_braces(ps, mark, ckind, cflags) - elseif is_string_delim(leading_kind) - parse_string(ps) elseif leading_kind == K"@" # macro call # Macro names can be keywords # @end x ==> (macrocall @end x) @@ -2944,7 +2976,10 @@ function parse_atom(ps::ParseState, check_identifiers=true) bump(ps, TRIVIA_FLAG) parse_macro_name(ps) parse_call_chain(ps, mark, true) + elseif is_string_delim(leading_kind) + parse_string(ps) elseif leading_kind in KSet`\` \`\`\`` + # `` ==> (macrocall core_@cmd "") # `cmd` ==> (macrocall core_@cmd "cmd") # ```cmd``` ==> (macrocall core_@cmd "cmd"-s) bump_invisible(ps, K"core_@cmd") diff --git a/JuliaSyntax/src/source_files.jl b/JuliaSyntax/src/source_files.jl index e75e94b732370..aa6607ba1b253 100644 --- a/JuliaSyntax/src/source_files.jl +++ b/JuliaSyntax/src/source_files.jl @@ -78,7 +78,13 @@ function Base.getindex(source::SourceFile, rng::AbstractRange) # out of the valid unicode check. The SubString{String} inner constructor # has some @boundscheck, but using @inbounds depends on inlining choices.) j = prevind(source.code, last(rng)+1) - VERSION >= v"1.6" ? @view(source.code[i:j]) : source.code[i:j] + source.code[i:j] +end + +function Base.view(source::SourceFile, rng::AbstractRange) + i = first(rng) + j = prevind(source.code, last(rng)+1) + SubString(source.code, i, j) end function Base.getindex(source::SourceFile, i::Int) diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index f20150729bb3e..27e260dd3e923 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -123,7 +123,7 @@ function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, position::In # Leaf node k = kind(raw) val_range = position:position + span(raw) - 1 - val_str = source[val_range] + val_str = view(source, val_range) # Here we parse the values eagerly rather than representing them as # strings. Maybe this is good. Maybe not. val = if k in KSet`Integer Float BinInt OctInt HexInt` @@ -187,7 +187,7 @@ function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, position::In if !is_trivia(rawchild) || is_error(rawchild) if kind(rawchild) == K"String" val_range = pos:pos + span(rawchild) - 1 - push!(strs, source[val_range]) + push!(strs, view(source, val_range)) n = SyntaxNode(source, rawchild, pos, nothing, true, nothing) push!(cs, n) push!(str_nodes, n) @@ -380,7 +380,21 @@ end function _to_expr(node::SyntaxNode) if !haschildren(node) - return node.val + if node.val isa Union{Int128,UInt128,BigInt} + # Ignore the values of large integers and convert them back to + # symbolic/textural form for compatibility with the Expr + # representation of these. + val_range = (node.position-1) .+ (1:node.raw.span) + val_str = replace(view(node.source, val_range), '_'=>"") + headsym = :macrocall + k = kind(node) + macname = node.val isa Int128 ? Symbol("@int128_str") : + node.val isa UInt128 ? Symbol("@uint128_str") : + Symbol("@big_str") + return Expr(:macrocall, GlobalRef(Core, macname), nothing, val_str) + else + return node.val + end end args = Vector{Any}(undef, length(children(node))) args = map!(_to_expr, args, children(node)) @@ -390,11 +404,10 @@ function _to_expr(node::SyntaxNode) if is_infix(node.raw) args[2], args[1] = args[1], args[2] end - loc = source_location(LineNumberNode, node.source, node.position) - headstr = untokenize(head(node), include_flag_suff=false) headsym = !isnothing(headstr) ? Symbol(headstr) : error("Can't untokenize head of kind $(kind(node))") + loc = source_location(LineNumberNode, node.source, node.position) # Convert elements if headsym == :macrocall insert!(args, 2, loc) diff --git a/JuliaSyntax/test/parse_base.jl b/JuliaSyntax/test/parse_base.jl deleted file mode 100644 index 5597585165c7a..0000000000000 --- a/JuliaSyntax/test/parse_base.jl +++ /dev/null @@ -1,7 +0,0 @@ - -@testset "JuliaSyntax Base parsing" begin - basedir = "/home/chris/dev/julia/base" - for f in readdir(joinpath(basedir)) - test_parse_file(basedir, f) - end -end diff --git a/JuliaSyntax/test/parse_packages.jl b/JuliaSyntax/test/parse_packages.jl new file mode 100644 index 0000000000000..58d13075683b3 --- /dev/null +++ b/JuliaSyntax/test/parse_packages.jl @@ -0,0 +1,21 @@ +# Full-scale parsing tests of JuliaSyntax itself, Julia Base, etc. + +@testset "Parse JuliaSyntax" begin + pkgdir = joinpath(@__DIR__, "..") + parse_all_in_path(joinpath(pkgdir, "src")) + parse_all_in_path(joinpath(pkgdir, "test")) +end + +@testset "Parse Base" begin + parse_all_in_path(joinpath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "base")) +end + +#= +@testset "Parse Base tests" begin + parse_all_in_path(joinpath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "test")) +end + +@testset "Parse Julia stdlib" begin + parse_all_in_path(joinpath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "stdlib")) +end +=# diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 07a5cbf85b74f..bd3f5810e093d 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -143,7 +143,10 @@ tests = [ JuliaSyntax.parse_term => [ "a * b * c" => "(call-i a * b c)" # parse_unary - "-2*x" => "(call-i -2 * x)" + "-2*x" => "(call-i -2 * x)" + ":T" => "(quote T)" + "in::T" => "(:: in T)" + "isa::T" => "(:: isa T)" ], JuliaSyntax.parse_juxtapose => [ "2x" => "(call-i 2 * x)" @@ -174,6 +177,7 @@ tests = [ # Prefix function calls for operators which are both binary and unary "+(a,b)" => "(call + a b)" "+(a=1,)" => "(call + (kw a 1))" + "+(a...)" => "(call + (... a))" "+(a;b,c)" => "(call + a (parameters b c))" # Whitespace not allowed before prefix function call bracket "+ (a,b)" => "(call + (error) a b)" @@ -215,6 +219,7 @@ tests = [ "\$\$a" => "(\$ (\$ a))" ], JuliaSyntax.parse_call => [ + # Mostly parse_call_chain "f(x)" => "(call f x)" "\$f(x)" => "(call (\$ f) x)" "f(a,b)" => "(call f a b)" @@ -234,6 +239,9 @@ tests = [ "@foo (x,y)" => "(macrocall @foo (tuple x y))" "A.@foo a b" => "(macrocall (. A (quote @foo)) a b)" "@A.foo a b" => "(macrocall (. A (quote @foo)) a b)" + "[@foo \"x\"]" => "(vect (macrocall @foo \"x\"))" + "[f (x)]" => "(hcat f x)" + "[f \"x\"]" => "(hcat f \"x\")" # Special @doc parsing rules "@doc x\ny" => "(macrocall @doc x y)" "A.@doc x\ny" => "(macrocall (. A (quote @doc)) x y)" @@ -278,6 +286,8 @@ tests = [ # String macros "x\"str\"" => """(macrocall @x_str "str")""" "x`str`" => """(macrocall @x_cmd "str")""" + "x\"\"" => """(macrocall @x_str "")""" + "x``" => """(macrocall @x_cmd "")""" # Macro sufficies can include keywords and numbers "x\"s\"y" => """(macrocall @x_str "s" "y")""" "x\"s\"end" => """(macrocall @x_str "s" "end")""" @@ -312,12 +322,14 @@ tests = [ "let\na\nb\nend" => "(let (block) (block a b))" # abstract type "abstract type A end" => "(abstract A)" + "abstract type A ; end" => "(abstract A)" "abstract type \n\n A \n\n end" => "(abstract A)" "abstract type A <: B end" => "(abstract (<: A B))" "abstract type A <: B{T,S} end" => "(abstract (<: A (curly B T S)))" "abstract type A < B end" => "(abstract (call-i A < B))" # primitive type "primitive type A 32 end" => "(primitive A 32)" + "primitive type A 32 ; end" => "(primitive A 32)" "primitive type A \$N end" => "(primitive A (\$ N))" "primitive type A <: B \n 8 \n end" => "(primitive (<: A B) 8)" # struct @@ -416,6 +428,7 @@ tests = [ "(try (block x) e (block y) false (block z))" ((v=v"1.8",), "try \n x \n catch e \n y \n else z finally \n w end") => "(try (block x) e (block y) (block z) (block w))" + "try x catch end" => "(try (block x) false (block) false false)" "try x catch ; y end" => "(try (block x) false (block y) false false)" "try x catch \n y end" => "(try (block x) false (block y) false false)" "try x catch e y end" => "(try (block x) e (block y) false false)" @@ -470,10 +483,6 @@ tests = [ "(i,j) in iter" => "(= (tuple i j) iter)" ], JuliaSyntax.parse_paren => [ - # Parentheses used for grouping - "(a * b)" => "(call-i a * b)" - "(a=1)" => "(= a 1)" - "(x)" => "x" # Tuple syntax with commas "()" => "(tuple)" "(x,)" => "(tuple x)" @@ -483,6 +492,8 @@ tests = [ "(;)" => "(tuple (parameters))" "(; a=1)" => "(tuple (parameters (kw a 1)))" # Extra credit: nested parameters and frankentuples + "(x...; y)" => "(tuple (... x) (parameters y))" + "(x...;)" => "(tuple (... x) (parameters))" "(; a=1; b=2)" => "(tuple (parameters (kw a 1) (parameters (kw b 2))))" "(a; b; c,d)" => "(tuple a (parameters b (parameters c d)))" "(a=1, b=2; c=3)" => "(tuple (= a 1) (= b 2) (parameters (kw c 3)))" @@ -491,6 +502,11 @@ tests = [ "(a=1;)" => "(block (= a 1))" "(a;b;;c)" => "(block a b c)" "(a=1; b=2)" => "(block (= a 1) (= b 2))" + # Parentheses used for grouping + "(a * b)" => "(call-i a * b)" + "(a=1)" => "(= a 1)" + "(x)" => "x" + "(a...)" => "(... a)" # Generators "(x for x in xs)" => "(generator x (= x xs))" ], @@ -513,6 +529,7 @@ tests = [ "~" => "~" # Quoted syntactic operators allowed ":+=" => "(quote +=)" + ":.=" => "(quote .=)" # Special symbols quoted ":end" => "(quote end)" ":(end)" => "(quote (error (end)))" @@ -545,6 +562,7 @@ tests = [ # __dot__ macro "@. x y" => "(macrocall @__dot__ x y)" # cmd strings + "``" => "(macrocall :(Core.var\"@cmd\") \"\")" "`cmd`" => "(macrocall :(Core.var\"@cmd\") \"cmd\")" "```cmd```" => "(macrocall :(Core.var\"@cmd\") \"cmd\")" # Errors @@ -580,6 +598,7 @@ tests = [ "\"hi\$(\"\"\"ho\"\"\")\"" => "(string \"hi\" (string-s \"ho\"))" ((v=v"1.5",), "\"hi\$(\"ho\")\"") => "(string \"hi\" \"ho\")" "\"a \$foo b\"" => "(string \"a \" foo \" b\")" + "\"\$outer\"" => "(string outer)" "\"\"" => "\"\"" "\"\$x\$y\$z\"" => "(string x y z)" "\"\$(x)\"" => "(string x)" diff --git a/JuliaSyntax/test/runtests.jl b/JuliaSyntax/test/runtests.jl index f505cd9063340..9e44680d7f9ac 100644 --- a/JuliaSyntax/test/runtests.jl +++ b/JuliaSyntax/test/runtests.jl @@ -27,6 +27,17 @@ function test_parse_file(root_path, path) end end end +test_parse_file(path) = test_parse_file(dirname(path), basename(path)) + +function parse_all_in_path(basedir) + src_list = String[] + for (root, dirs, files) in walkdir(basedir) + append!(src_list, (joinpath(root, f) for f in files if endswith(f, ".jl"))) + end + for f in src_list + test_parse_file(basedir, relpath(f, basedir)) + end +end # Shortcuts for defining raw syntax nodes @@ -39,13 +50,20 @@ N(k, args::GreenNode...) = GreenNode(SyntaxHead(k, flags()), args...) NI(k, args::GreenNode...) = GreenNode(SyntaxHead(k, flags(infix=true)), args...) module TokenizeTests - include("../Tokenize/test/runtests.jl") + using Test + @testset "Tokenize" begin + include("../Tokenize/test/runtests.jl") + end end include("parse_stream.jl") include("parser.jl") -include("value_parsing.jl") -include("self_parse.jl") + +@testset "Parsing values from strings" begin + include("value_parsing.jl") +end + +include("parse_packages.jl") # Prototypes #include("syntax_trees.jl") diff --git a/JuliaSyntax/test/self_parse.jl b/JuliaSyntax/test/self_parse.jl deleted file mode 100644 index 25fe88e49edf8..0000000000000 --- a/JuliaSyntax/test/self_parse.jl +++ /dev/null @@ -1,11 +0,0 @@ -@testset "JuliaSyntax self-parsing" begin - pkgdir = joinpath(@__DIR__, "..") - for f in readdir(joinpath(pkgdir, "src")) - test_parse_file(pkgdir, joinpath("src",f)) - end - - for f in readdir(joinpath(pkgdir, "test")) - test_parse_file(pkgdir, joinpath("test",f)) - end -end - From bd050964b055f26b2529015d0666cc75d225f681 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Wed, 12 Jan 2022 16:08:51 +1000 Subject: [PATCH 0313/1109] Allow parsing of const fields for Julia 1.8 --- JuliaSyntax/src/parser.jl | 23 +++++++++++++++++------ JuliaSyntax/test/parser.jl | 5 ++++- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 690ce74fe0ee4..d8ae856effd6a 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -1795,7 +1795,8 @@ function parse_const_local_global(ps) bump(ps, TRIVIA_FLAG) end end - # Like parse_eq/parse_assignment, but specialized in case we need error recovery + # Like parse_eq/parse_assignment, but specialized so that we can omit the + # tuple when there's commas but no assignment. beforevar_mark = position(ps) n_commas = parse_comma(ps, false) t = peek_token(ps) @@ -1808,13 +1809,22 @@ function parse_const_local_global(ps) parse_comma(ps) emit(ps, beforevar_mark, K"=") elseif has_const - # const x ==> (const (error x)) - # Recovery heuristic - recover(ps, mark=beforevar_mark, - error="Expected assignment after `const`") do ps, k - k == K"NewlineWs" || (k != K"," && is_closing_token(ps, k)) + if ps.julia_version >= v"1.8.0-DEV.1148" + # Const fields https://github.com/JuliaLang/julia/pull/43305 + # const x ==> (const x) + # const x::T ==> (const (:: x T)) + if n_commas >= 1 + # Maybe nonsensical? But this is what the flisp parser does. + # const x,y ==> (const (tuple x y)) + emit(ps, beforevar_mark, K"tuple") + end + else + # const x ==> (const (error x)) + emit(ps, beforevar_mark, K"error", + error="Expected assignment after `const`") end else + #v1.8: const x ==> (const x) # global x ==> (global x) # local x ==> (local x) # global x,y ==> (global x y) @@ -1823,6 +1833,7 @@ function parse_const_local_global(ps) emit(ps, scope_mark, scope_k) end if has_const + # TODO: Normalize `global const` during Expr conversion rather than here? emit(ps, mark, K"const") end end diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index bd3f5810e093d..25ed5ea6add9c 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -385,7 +385,10 @@ tests = [ "local x" => "(local x)" "global x,y" => "(global x y)" "const x,y = 1,2" => "(const (= (tuple x y) (tuple 1 2)))" - "const x" => "(const (error x (error)))" + "const x" => "(const (error x))" + ((v=v"1.8",), "const x") => "(const x)" + ((v=v"1.8",), "const x::T") => "(const (:: x T))" + ((v=v"1.8",), "const x,y") => "(const (tuple x y))" ], JuliaSyntax.parse_function => [ "macro while(ex) end" => "(macro (call (error while) ex) (block))" From a6fb2063c05f1ade471c8153193ee7c32258cdf3 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Wed, 12 Jan 2022 16:09:43 +1000 Subject: [PATCH 0314/1109] Tools for reducing files down to minimal test cases --- JuliaSyntax/src/hooks.jl | 4 +-- JuliaSyntax/src/parser.jl | 6 ++-- JuliaSyntax/src/syntax_tree.jl | 49 ++++++++++++++---------------- JuliaSyntax/test/parse_packages.jl | 10 +++--- JuliaSyntax/test/parser.jl | 6 ++-- JuliaSyntax/test/runtests.jl | 28 ++++++++--------- 6 files changed, 50 insertions(+), 53 deletions(-) diff --git a/JuliaSyntax/src/hooks.jl b/JuliaSyntax/src/hooks.jl index 2db2bfb750c01..d9dd08b8e403f 100644 --- a/JuliaSyntax/src/hooks.jl +++ b/JuliaSyntax/src/hooks.jl @@ -25,9 +25,9 @@ function core_parser_hook(code, filename, offset, options) stream = ParseStream(code) if options === :atom - parse_atom(ParseState(stream)) + parse_atom(ParseState(stream, VERSION)) elseif options === :statement - parse_stmts(ParseState(stream)) + parse_stmts(ParseState(stream, VERSION)) elseif options === :all parse_all(stream) end diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index d8ae856effd6a..6feac2731be3f 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -23,7 +23,7 @@ struct ParseState end # Normal context -function ParseState(stream::ParseStream; julia_version=VERSION) +function ParseState(stream::ParseStream, julia_version::VersionNumber) ParseState(stream, julia_version, true, false, false, false, false, true) end @@ -3092,8 +3092,8 @@ Parse a sequence of top level statements. `input` may be a `ParseStream` or other input source which will be passed to the `ParseStream` constructor. The `ParseStream` is returned. """ -function parse_all(stream::ParseStream) - parse_all(ParseState(stream)) +function parse_all(stream::ParseStream; julia_version=VERSION) + parse_all(ParseState(stream, julia_version)) return stream end diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index 27e260dd3e923..8b250ebf0dc77 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -232,6 +232,16 @@ children(node::SyntaxNode) = haschildren(node) ? node.val::Vector{SyntaxNode} : span(node::SyntaxNode) = span(node.raw) +""" + sourcetext(node) + +Get the full source text of a node. +""" +function sourcetext(node::SyntaxNode) + val_range = (node.position-1) .+ (1:span(node)) + view(node.source, val_range) +end + function interpolate_literal(node::SyntaxNode, val) @assert kind(node) == K"$" SyntaxNode(node.source, node.raw, node.position, node.parent, true, val) @@ -384,14 +394,13 @@ function _to_expr(node::SyntaxNode) # Ignore the values of large integers and convert them back to # symbolic/textural form for compatibility with the Expr # representation of these. - val_range = (node.position-1) .+ (1:node.raw.span) - val_str = replace(view(node.source, val_range), '_'=>"") + str = replace(sourcetext(node), '_'=>"") headsym = :macrocall k = kind(node) macname = node.val isa Int128 ? Symbol("@int128_str") : node.val isa UInt128 ? Symbol("@uint128_str") : Symbol("@big_str") - return Expr(:macrocall, GlobalRef(Core, macname), nothing, val_str) + return Expr(:macrocall, GlobalRef(Core, macname), nothing, str) else return node.val end @@ -497,21 +506,20 @@ Base.Expr(node::SyntaxNode) = _to_expr(node) #------------------------------------------------------------------------------- -function parse_all(::Type{SyntaxNode}, code::AbstractString; filename="none") - source_file = SourceFile(code, filename=filename) - - stream = ParseStream(code) +function parse_all(::Type{SyntaxNode}, source::SourceFile) + stream = ParseStream(source.code) parse_all(stream) - if !isempty(stream.diagnostics) buf = IOBuffer() - show_diagnostics(IOContext(buf, stdout), stream, code) + show_diagnostics(IOContext(buf, stdout), stream, source.code) @error Text(String(take!(buf))) end - green_tree = build_tree(GreenNode, stream, wrap_toplevel_as_kind=K"toplevel") + SyntaxNode(source, green_tree) +end - SyntaxNode(source_file, green_tree) +function parse_all(::Type{SyntaxNode}, code::AbstractString; filename="none") + parse_all(SyntaxNode, SourceFile(code, filename=filename)) end @@ -521,18 +529,7 @@ end Parse the given code and convert to a standard Expr """ function parse_all(::Type{Expr}, code::AbstractString; filename="none") - tree = parse_all(SyntaxNode, code; filename=filename) - - # convert to Julia expr - ex = Expr(tree) - - # TODO: Don't remove line nums; try to get them consistent with Base. - flisp_ex = remove_linenums!(flisp_parse_all(code)) - if remove_linenums!(deepcopy(ex)) != flisp_ex && !(!isempty(flisp_ex.args) && - Meta.isexpr(flisp_ex.args[end], :error)) - @error "Mismatch with Meta.parse()" ex flisp_ex - end - ex + Expr(parse_all(SyntaxNode, code; filename=filename)) end function remove_linenums!(ex) @@ -543,10 +540,10 @@ function remove_linenums!(ex) ex end -function flisp_parse_all(code) +function flisp_parse_all(code; filename="none") if VERSION >= v"1.6" - Meta.parseall(code) + Meta.parseall(code, filename=filename) else - Base.parse_input_line(code) + Base.parse_input_line(code, filename=filename) end end diff --git a/JuliaSyntax/test/parse_packages.jl b/JuliaSyntax/test/parse_packages.jl index 58d13075683b3..a6385d1b6eba0 100644 --- a/JuliaSyntax/test/parse_packages.jl +++ b/JuliaSyntax/test/parse_packages.jl @@ -2,20 +2,20 @@ @testset "Parse JuliaSyntax" begin pkgdir = joinpath(@__DIR__, "..") - parse_all_in_path(joinpath(pkgdir, "src")) - parse_all_in_path(joinpath(pkgdir, "test")) + test_parse_all_in_path(joinpath(pkgdir, "src")) + test_parse_all_in_path(joinpath(pkgdir, "test")) end @testset "Parse Base" begin - parse_all_in_path(joinpath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "base")) + test_parse_all_in_path(joinpath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "base")) end #= @testset "Parse Base tests" begin - parse_all_in_path(joinpath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "test")) + test_parse_all_in_path(joinpath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "test")) end @testset "Parse Julia stdlib" begin - parse_all_in_path(joinpath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "stdlib")) + test_parse_all_in_path(joinpath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "stdlib")) end =# diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 25ed5ea6add9c..3ffd31a44c723 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -1,6 +1,6 @@ function test_parse(production, code; v=v"1.6") stream = ParseStream(code) - production(JuliaSyntax.ParseState(stream; julia_version=v)) + production(JuliaSyntax.ParseState(stream, v)) t = JuliaSyntax.build_tree(GreenNode, stream, wrap_toplevel_as_kind=K"Nothing") source = SourceFile(code) s = SyntaxNode(source, t) @@ -12,9 +12,9 @@ function test_parse(production, code; v=v"1.6") end # Version of test_parse for interactive exploration -function itest_parse(production, code, julia_version::VersionNumber=v"1.6") +function itest_parse(production, code; julia_version::VersionNumber=v"1.6") stream = ParseStream(code) - production(JuliaSyntax.ParseState(stream; julia_version=julia_version)) + production(JuliaSyntax.ParseState(stream, julia_version)) t = JuliaSyntax.build_tree(GreenNode, stream, wrap_toplevel_as_kind=K"toplevel") println(stdout, "# Code:\n$code\n") diff --git a/JuliaSyntax/test/runtests.jl b/JuliaSyntax/test/runtests.jl index 9e44680d7f9ac..b7ad6e9af230f 100644 --- a/JuliaSyntax/test/runtests.jl +++ b/JuliaSyntax/test/runtests.jl @@ -7,7 +7,7 @@ using JuliaSyntax: SourceFile using JuliaSyntax: GreenNode, SyntaxNode, flags, EMPTY_FLAGS, TRIVIA_FLAG, INFIX_FLAG, - children, child, setchild!, SyntaxHead + children, child, setchild!, SyntaxHead, parse_all using JuliaSyntax: Kind, @K_str, is_literal, is_keyword, is_operator using JuliaSyntax: highlight @@ -17,25 +17,25 @@ using JuliaSyntax: ParseStream, emit, emit_diagnostic using JuliaSyntax: ParseState -function test_parse_file(root_path, path) - fullpath = joinpath(root_path, path) - if endswith(path, ".jl") && isfile(fullpath) - @testset "Parse $path" begin - code = read(fullpath, String) - @test JuliaSyntax.remove_linenums!(JuliaSyntax.parse_all(Expr, code)) == - JuliaSyntax.remove_linenums!(JuliaSyntax.flisp_parse_all(code)) - end - end +function parsers_agree_on_file(path) + code = read(path, String) + JuliaSyntax.remove_linenums!(JuliaSyntax.parse_all(Expr, code)) == + JuliaSyntax.remove_linenums!(JuliaSyntax.flisp_parse_all(code)) end -test_parse_file(path) = test_parse_file(dirname(path), basename(path)) -function parse_all_in_path(basedir) +function find_source_in_path(basedir) src_list = String[] for (root, dirs, files) in walkdir(basedir) append!(src_list, (joinpath(root, f) for f in files if endswith(f, ".jl"))) end - for f in src_list - test_parse_file(basedir, relpath(f, basedir)) + src_list +end + +function test_parse_all_in_path(basedir) + for f in find_source_in_path(basedir) + @testset "Parse $(relpath(f, basedir))" begin + @test parsers_agree_on_file(f) + end end end From ee459d2188102c45265785fb6fc2d515ca193e6c Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Wed, 12 Jan 2022 22:13:38 +1000 Subject: [PATCH 0315/1109] Rearrange test utilities into their own file --- JuliaSyntax/README.md | 21 ++++--- JuliaSyntax/test/parse_stream.jl | 9 ++- JuliaSyntax/test/parser.jl | 32 ---------- JuliaSyntax/test/runtests.jl | 43 +------------- JuliaSyntax/test/simple_parser.jl | 6 +- JuliaSyntax/test/syntax_interpolation.jl | 10 ++++ JuliaSyntax/test/test_utils.jl | 75 ++++++++++++++++++++++++ 7 files changed, 109 insertions(+), 87 deletions(-) create mode 100644 JuliaSyntax/test/test_utils.jl diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index e156c66e575c1..7132415497564 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -376,7 +376,8 @@ f(a, very good at heuristics. Also, we've got huge piles of traininig data — just choose some high quality, tastefully hand-formatted libraries. -* Similarly, can we learn fast and reasonably accurate recovery heuristics? +* Similarly, can we learn fast and reasonably accurate recovery heuristics for + when the parser encounters broken syntax rather than hand-coding these? # Resources @@ -541,10 +542,11 @@ Here's some behaviors which seem to be bugs: * In try-catch-finally, the `finally` clause is allowed before the `catch`, but always executes afterward. (Presumably was this a mistake? It seems pretty awful!) * When parsing `"[x \n\n ]"` the flisp parser gets confused, but `"[x \n ]"` is - parsed as `Expr(:vect)` + correctly parsed as `Expr(:vect)` * `f(x for x in in xs)` is accepted, and parsed very strangely. * Octal escape sequences saturate rather than being reported as errors. Eg, - `"\777"` results in `"\xff"`. + `"\777"` results in `"\xff"`. This is inconsistent with + `Base.parse(::Type{Int}, ...)` ## Parsing / AST oddities and warts @@ -585,9 +587,9 @@ parsing `key=val` pairs inside parentheses. ### Flattened generators -Flattened generators are hard because the Julia AST doesn't respect a key -rule we normally expect: that the children of an AST node are a contiguous -range in the source text. This is because the `for`s in +Flattened generators are uniquely problematic because the Julia AST doesn't +respect a key rule we normally expect: that the children of an AST node are a +*contiguous* range in the source text. This is because the `for`s in `[xy for x in xs for y in ys]` are parsed in the normal order of a for loop as ``` @@ -609,8 +611,8 @@ and the standard Julia AST is like this: however, note that if this tree were flattened, the order of tokens would be `(xy) (y in ys) (x in xs)` which is *not* the source order. So in this case -our tree needs to deviate from the Julia AST. The natural representation seems -to be to flatten the generators: +our green tree must deviate from the Julia AST. The natural representation +seems to be to flatten the generators: ``` (flatten @@ -642,7 +644,8 @@ xy Presumably because of the need to add a line number node in the flisp parser `if a xx elseif b yy end ==> (if a (block xx) (elseif (block b) (block yy)))` -* `import . .A` is allowed, and parsed the same as `import ..A` +* Spaces are alloweed between import dots — `import . .A` is allowed, and + parsed the same as `import ..A` * `import A..` produces `(import (. A .))` which is arguably nonsensical, as `.` can't be a normal identifier. diff --git a/JuliaSyntax/test/parse_stream.jl b/JuliaSyntax/test/parse_stream.jl index 0b7590c7eb691..d2e63c1a4dcea 100644 --- a/JuliaSyntax/test/parse_stream.jl +++ b/JuliaSyntax/test/parse_stream.jl @@ -3,6 +3,11 @@ # Here we test the ParseStream interface, by taking input code and checking # that the correct sequence of emit() and bump() produces a valid parse tree. +using JuliaSyntax: ParseStream, + peek, peek_token, + bump, bump_trivia, bump_invisible, + emit, emit_diagnostic + code = """ for i = 1:10 xx[i] + 2 @@ -13,8 +18,8 @@ end st = ParseStream(code) -# Here we manually issue parse events in the order a Julia parser would issue -# them (if such a parser existed... which it doesn't yet!) +# Here we manually issue parse events in the order the Julia parser would issue +# them @testset "ParseStream" begin p1 = position(st) @test peek(st) == K"for" diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 3ffd31a44c723..4c11963aac68c 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -11,38 +11,6 @@ function test_parse(production, code; v=v"1.6") end end -# Version of test_parse for interactive exploration -function itest_parse(production, code; julia_version::VersionNumber=v"1.6") - stream = ParseStream(code) - production(JuliaSyntax.ParseState(stream, julia_version)) - t = JuliaSyntax.build_tree(GreenNode, stream, wrap_toplevel_as_kind=K"toplevel") - - println(stdout, "# Code:\n$code\n") - - println(stdout, "# Green tree:") - show(stdout, MIME"text/plain"(), t, code) - JuliaSyntax.show_diagnostics(stdout, stream, code) - - s = SyntaxNode(SourceFile(code, filename="none"), t) - println(stdout, "\n# SyntaxNode:") - show(stdout, MIME"text/x.sexpression"(), s) - - ex = Expr(s) - println(stdout, "\n\n# Julia Expr:") - show(stdout, MIME"text/plain"(), ex) - - f_ex = JuliaSyntax.remove_linenums!(Meta.parse(code, raise=false)) - if JuliaSyntax.remove_linenums!(ex) != f_ex - println(stdout, "\n\n# AST dump") - dump(ex) - - printstyled(stdout, "\n\n# flisp Julia Expr:\n", color=:red) - show(stdout, MIME"text/plain"(), f_ex) - # return (code, stream, t, s, ex) - end - nothing -end - # TODO: # * Extract the following test cases from the source itself. # * Use only the green tree to generate the S-expressions diff --git a/JuliaSyntax/test/runtests.jl b/JuliaSyntax/test/runtests.jl index b7ad6e9af230f..aedca996046e7 100644 --- a/JuliaSyntax/test/runtests.jl +++ b/JuliaSyntax/test/runtests.jl @@ -1,54 +1,12 @@ using JuliaSyntax using Test -using Base.Meta: @dump - using JuliaSyntax: SourceFile using JuliaSyntax: GreenNode, SyntaxNode, flags, EMPTY_FLAGS, TRIVIA_FLAG, INFIX_FLAG, children, child, setchild!, SyntaxHead, parse_all -using JuliaSyntax: Kind, @K_str, is_literal, is_keyword, is_operator -using JuliaSyntax: highlight -using JuliaSyntax: ParseStream, - peek, peek_token, - bump, bump_trivia, bump_invisible, - emit, emit_diagnostic -using JuliaSyntax: ParseState - -function parsers_agree_on_file(path) - code = read(path, String) - JuliaSyntax.remove_linenums!(JuliaSyntax.parse_all(Expr, code)) == - JuliaSyntax.remove_linenums!(JuliaSyntax.flisp_parse_all(code)) -end - -function find_source_in_path(basedir) - src_list = String[] - for (root, dirs, files) in walkdir(basedir) - append!(src_list, (joinpath(root, f) for f in files if endswith(f, ".jl"))) - end - src_list -end - -function test_parse_all_in_path(basedir) - for f in find_source_in_path(basedir) - @testset "Parse $(relpath(f, basedir))" begin - @test parsers_agree_on_file(f) - end - end -end - -# Shortcuts for defining raw syntax nodes - -# Trivia nodes -T(k, s) = GreenNode(SyntaxHead(k, flags(trivia=true)), s, ) -# Non-trivia nodes -N(k, s) = GreenNode(SyntaxHead(k, flags()), s) -N(k, args::GreenNode...) = GreenNode(SyntaxHead(k, flags()), args...) -# Non-trivia, infix form -NI(k, args::GreenNode...) = GreenNode(SyntaxHead(k, flags(infix=true)), args...) - module TokenizeTests using Test @testset "Tokenize" begin @@ -56,6 +14,7 @@ module TokenizeTests end end +include("test_utils.jl") include("parse_stream.jl") include("parser.jl") diff --git a/JuliaSyntax/test/simple_parser.jl b/JuliaSyntax/test/simple_parser.jl index 9e8c517b6d0c8..918f12ed5e210 100644 --- a/JuliaSyntax/test/simple_parser.jl +++ b/JuliaSyntax/test/simple_parser.jl @@ -1,5 +1,7 @@ -# Example parser for a very basic language of expressions, calls and function -# definitions. +# Example parser for a very basic Julia-like language of expressions, calls and +# function definitions. + +using JuliaSyntax: @K_str, is_literal, is_keyword, is_operator function parse_toplevel(st) mark = position(st) diff --git a/JuliaSyntax/test/syntax_interpolation.jl b/JuliaSyntax/test/syntax_interpolation.jl index 0f9e30267366e..73d55eb5e531c 100644 --- a/JuliaSyntax/test/syntax_interpolation.jl +++ b/JuliaSyntax/test/syntax_interpolation.jl @@ -1,3 +1,13 @@ +# Shortcuts for defining raw syntax nodes + +# Trivia nodes +T(k, s) = GreenNode(SyntaxHead(k, flags(trivia=true)), s, ) +# Non-trivia nodes +N(k, s) = GreenNode(SyntaxHead(k, flags()), s) +N(k, args::GreenNode...) = GreenNode(SyntaxHead(k, flags()), args...) +# Non-trivia, infix form +NI(k, args::GreenNode...) = GreenNode(SyntaxHead(k, flags(infix=true)), args...) + # # Macros and expression interpolation # The following shows that SyntaxNode works nicely for simple macros based on diff --git a/JuliaSyntax/test/test_utils.jl b/JuliaSyntax/test/test_utils.jl new file mode 100644 index 0000000000000..863d0a1b20a65 --- /dev/null +++ b/JuliaSyntax/test/test_utils.jl @@ -0,0 +1,75 @@ +using Test +using JuliaSyntax + +using Base.Meta: @dump + +using JuliaSyntax: + # Parsing + ParseStream, + SourceFile, + parse_all, + @K_str, + # Nodes + GreenNode, + SyntaxNode, + # Node inspection + kind, + flags, + haschildren, + children, + child + +function parsers_agree_on_file(path) + code = read(path, String) + JuliaSyntax.remove_linenums!(JuliaSyntax.parse_all(Expr, code)) == + JuliaSyntax.remove_linenums!(JuliaSyntax.flisp_parse_all(code)) +end + +function find_source_in_path(basedir) + src_list = String[] + for (root, dirs, files) in walkdir(basedir) + append!(src_list, (joinpath(root, f) for f in files if endswith(f, ".jl"))) + end + src_list +end + +function test_parse_all_in_path(basedir) + for f in find_source_in_path(basedir) + @testset "Parse $(relpath(f, basedir))" begin + @test parsers_agree_on_file(f) + end + end +end + +# Version of test_parse for interactive exploration +function itest_parse(production, code; julia_version::VersionNumber=v"1.6") + stream = ParseStream(code) + production(JuliaSyntax.ParseState(stream, julia_version)) + t = JuliaSyntax.build_tree(GreenNode, stream, wrap_toplevel_as_kind=K"toplevel") + + println(stdout, "# Code:\n$code\n") + + println(stdout, "# Green tree:") + show(stdout, MIME"text/plain"(), t, code) + JuliaSyntax.show_diagnostics(stdout, stream, code) + + s = SyntaxNode(SourceFile(code, filename="none"), t) + println(stdout, "\n# SyntaxNode:") + show(stdout, MIME"text/x.sexpression"(), s) + + ex = Expr(s) + println(stdout, "\n\n# Julia Expr:") + show(stdout, MIME"text/plain"(), ex) + + f_ex = JuliaSyntax.remove_linenums!(Meta.parse(code, raise=false)) + if JuliaSyntax.remove_linenums!(ex) != f_ex + println(stdout, "\n\n# AST dump") + dump(ex) + + printstyled(stdout, "\n\n# flisp Julia Expr:\n", color=:red) + show(stdout, MIME"text/plain"(), f_ex) + # return (code, stream, t, s, ex) + end + nothing +end + From b711804156483d8e7bbb25cfee5ef28cb48881cb Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Wed, 12 Jan 2022 22:14:52 +1000 Subject: [PATCH 0316/1109] Emit a warning for space between dots in in import paths This seems to be strange syntax, and I guess unintentional that this was allowed by the flisp parser. --- JuliaSyntax/src/parser.jl | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 6feac2731be3f..4dfe97458d449 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -2191,7 +2191,11 @@ function parse_import_path(ps::ParseState) # import ....A ==> (import (. . . . . A)) # Dots with spaces are allowed (a misfeature?) # import . .A ==> (import (. . . A)) + first_dot = true while true + m = position(ps) + bump_trivia(ps) + m2 = position(ps) k = peek(ps) if k == K"." bump(ps) @@ -2202,6 +2206,10 @@ function parse_import_path(ps::ParseState) else break end + if !first_dot && m != m2 + emit_diagnostic(ps, m, m2, warning="space between dots in import path") + end + first_dot = false end # import @x ==> (import (. @x)) # import $A ==> (import (. ($ A))) From d173c799e6562a2bb2d8f84a1b5f4c8d9aae0133 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Wed, 12 Jan 2022 23:57:15 +1000 Subject: [PATCH 0317/1109] Clean up early prototype of syntax interpolation Now that we have a real parser, we can just use this parser for the syntax interpolation prototype rather than constructing the syntax tree by hand. It's much nicer! Also remove the test/syntax_trees.jl prototype. All this is now implemented so the prototype doesn't seem that useful anymore. --- JuliaSyntax/test/runtests.jl | 1 - JuliaSyntax/test/syntax_interpolation.jl | 84 ++++++------------------ JuliaSyntax/test/syntax_trees.jl | 55 ---------------- 3 files changed, 19 insertions(+), 121 deletions(-) delete mode 100644 JuliaSyntax/test/syntax_trees.jl diff --git a/JuliaSyntax/test/runtests.jl b/JuliaSyntax/test/runtests.jl index aedca996046e7..9185bb98f13eb 100644 --- a/JuliaSyntax/test/runtests.jl +++ b/JuliaSyntax/test/runtests.jl @@ -25,6 +25,5 @@ end include("parse_packages.jl") # Prototypes -#include("syntax_trees.jl") #include("syntax_interpolation.jl") #include("simple_parser.jl") diff --git a/JuliaSyntax/test/syntax_interpolation.jl b/JuliaSyntax/test/syntax_interpolation.jl index 73d55eb5e531c..d7f1094e2de64 100644 --- a/JuliaSyntax/test/syntax_interpolation.jl +++ b/JuliaSyntax/test/syntax_interpolation.jl @@ -1,17 +1,9 @@ -# Shortcuts for defining raw syntax nodes - -# Trivia nodes -T(k, s) = GreenNode(SyntaxHead(k, flags(trivia=true)), s, ) -# Non-trivia nodes -N(k, s) = GreenNode(SyntaxHead(k, flags()), s) -N(k, args::GreenNode...) = GreenNode(SyntaxHead(k, flags()), args...) -# Non-trivia, infix form -NI(k, args::GreenNode...) = GreenNode(SyntaxHead(k, flags(infix=true)), args...) - # # Macros and expression interpolation -# The following shows that SyntaxNode works nicely for simple macros based on -# interpolating expressions into one another. In particular it shows how +using JuliaSyntax: SourceFile, SyntaxNode, parse_all, child, setchild! + +# The following shows that SyntaxNode works nicely for simple macros which +# just interpolate expressions into one another. In particular it shows how # precise source information from multiple files can coexist within the same # syntax tree. @@ -26,60 +18,29 @@ macro show2(ex) end end -# Now, how would this be implemented if we were to do it with SyntaxNode? -# We don't have a parser which is capable of producing our tree structures yet, -# so we need to hand construct all our trees. +# Now, let's implement the same expression interpolation but using SyntaxNode +# (and with a normal Julia function which we need to use, absent any deeper +# integration with the Julia runtime) function at_show2(ex::SyntaxNode) - code = String(read(@__FILE__)) name = sprint(show, MIME"text/x.sexpression"(), ex) - # The following quote block is not used directly, but the text for it is - # re-read from `code`. - quote_begin = (@__LINE__) + 1 quote - value = $ex + value = $(esc(ex)) println($name, " = ", value) value end - raw = N(K"block", - T(K"quote", 5), - T(K"NewlineWs", 9), - N(K"=", - N(K"Identifier", 5), - T(K"Whitespace", 1), - T(K"=", 1), - T(K"Whitespace", 1), - N(K"$", - T(K"$", 1), - N(K"Identifier", 2)), - T(K"NewlineWs", 9)), - N(K"call", - N(K"Identifier", 7), - T(K"(", 1), - N(K"$", - T(K"$", 1), - N(K"Identifier", 4)), - T(K",", 1), - T(K"Whitespace", 1), - N(K"String", 5), - T(K",", 1), - T(K"Whitespace", 1), - N(K"Identifier", 5), - T(K")", 1)), - T(K"NewlineWs", 9), - N(K"Identifier", 5), - T(K"NewlineWs", 5), - T(K"end", 3)) - source = SourceFile(code, filename=@__FILE__) - block = SyntaxNode(source, raw, source.line_starts[quote_begin]+4) - # Now that we have the block, we need to interpolate into it. - + # The following emulates the expression interpolation lowering which is + # usually done by the compiler. + # 1. Extract the expression literal as `block` + tree = parse_all(SyntaxNode, SourceFile(String(read(@__FILE__)), filename=@__FILE__)) + block = child(tree, 3, 2, 2, 1) + # 2. Interpolate local variables into the block at positions of $'s # Interpolating a SyntaxNode `ex` is simple: setchild!(block, (1, 2), ex) # The interpolation of a Julia *value* should inherit the source location - # of the $ interpolation expression. This is different to the - # interpolation of a SyntaxNode, which should just be inserted as-is. + # of the $ interpolation expression. This is different to when substituting + # in a SyntaxNode which should just be inserted as-is. setchild!(block, (2, 2), - JuliaSyntax.interpolate_literal(block.val[2].val[2], name)) + JuliaSyntax.interpolate_literal(child(block, 2, 2), name)) block end @@ -87,16 +48,9 @@ end # Let's have some simple expression to pass to at_show2. This will be # attributed to a different file foo.jl -code2 = "foo + 42" -source2 = SourceFile(code2, filename="foo.jl") -s2 = SyntaxNode(source2, NI(K"call", - N(K"Identifier", 3), - T(K"Whitespace", 1), - N(K"+", 1), - T(K"Whitespace", 1), - N(K"Integer", 2))) +s2 = child(parse_all(SyntaxNode, SourceFile("foo +\n42", filename="foo.jl")), 1) # Calling at_show2, we see that the precise source information is preserved for # both the surrounding expression and the interpolated fragments. println("\nInterpolation example") -show(stdout, MIME"text/plain"(), at_show2(s2)) +s3 = at_show2(s2) diff --git a/JuliaSyntax/test/syntax_trees.jl b/JuliaSyntax/test/syntax_trees.jl deleted file mode 100644 index 3b6439b327faf..0000000000000 --- a/JuliaSyntax/test/syntax_trees.jl +++ /dev/null @@ -1,55 +0,0 @@ -#------------------------------------------------------------------------------- -# Raw syntax tree and AST layering - -# For this code: -code = """ -for i = 1:10 - a + 2 - # hi - c -end -""" - -source = SourceFile(code, filename="none.jl") - -# We'd like to produce something the following raw tree -t = -N(K"for", - T(K"for", 3), - T(K"Whitespace", 1), - N(K"=", - N(K"Identifier", 1), - T(K"Whitespace", 1), - T(K"=", 1), - T(K"Whitespace", 1), - NI(K"call", - N(K"Integer", 1), - N(K":", 1), - N(K"Integer", 2))), - N(K"block", - T(K"NewlineWs", 5), - NI(K"call", - N(K"Identifier", 1), - T(K"Whitespace", 1), - N(K"+", 1), - T(K"Whitespace", 1), - N(K"Integer", 1)), - T(K"NewlineWs", 5), - T(K"Comment", 4), - T(K"NewlineWs", 5), - N(K"Identifier", 1), - T(K"NewlineWs", 1)), - T(K"end", 3)) - -# And the following AST -s = SyntaxNode(source, t) - -println("\nGreenNode") -show(stdout, MIME"text/plain"(), t, code, show_trivia=true) - -println("\nSyntaxNode") -show(stdout, MIME"text/plain"(), s) - -#code = "42" -#SyntaxNode(N(K"Integer", 2), 1, code) - From f792e02debdc9607db65f4a8e305d2fe2c53a6de Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Thu, 13 Jan 2022 14:36:49 +1000 Subject: [PATCH 0318/1109] Systemetize the way that version compatibility checking is done --- JuliaSyntax/src/parser.jl | 84 ++++++++++++++++++++++---------------- JuliaSyntax/test/parser.jl | 10 ++--- 2 files changed, 53 insertions(+), 41 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 4dfe97458d449..cb567b70a3efe 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -24,7 +24,11 @@ end # Normal context function ParseState(stream::ParseStream, julia_version::VersionNumber) - ParseState(stream, julia_version, true, false, false, false, false, true) + # To avoid keeping track of the exact Julia development version where new + # features were added, treat prereleases or dev versons as the release + # version by stripping the prerelease. + ver = VersionNumber(julia_version.major, julia_version.minor, julia_version.patch) + ParseState(stream, ver, true, false, false, false, false, true) end function ParseState(ps::ParseState; range_colon_enabled=nothing, @@ -153,6 +157,15 @@ function bump_semicolon_trivia(ps) end end +# Emit an error if the version is less than `min_ver` +function min_supported_version(min_ver, ps, mark, message) + # NB: the prerelease version will be removed from ps.julia_version before this point. + if ps.julia_version < min_ver + msg = "$message is not supported in Julia version $(ps.julia_version) < $(min_ver)" + emit(ps, mark, K"error", error=msg) + end +end + # Read tokens until we find an expected closing token. # Bump the big pile of resulting tokens as a single nontrivia error token function recover(is_closer::Function, ps, flags=EMPTY_FLAGS; mark = position(ps), error="unexpected tokens") @@ -1348,6 +1361,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) ckind == K"ncat" ? K"typed_ncat" : internal_error("unrecognized kind in parse_cat", ckind) emit(ps, mark, outk, cflags) + check_ncat_compat(ps, mark, ckind) if is_macrocall emit(ps, mark, K"macrocall") break @@ -1467,11 +1481,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) # @S{a,b} ==> (macrocall S (braces a b)) emit(ps, m, K"braces") emit(ps, mark, K"macrocall") - # Extension - #if ps.julia_version < v"1.5" - # emit(ps, mark, K"error", - # error="", min_version=v"1.5") - #end + min_supported_version(v"1.6", ps, mark, "macro call without space before `{}`") break else # S{a,b} ==> (curly S a b) @@ -1809,20 +1819,17 @@ function parse_const_local_global(ps) parse_comma(ps) emit(ps, beforevar_mark, K"=") elseif has_const - if ps.julia_version >= v"1.8.0-DEV.1148" - # Const fields https://github.com/JuliaLang/julia/pull/43305 - # const x ==> (const x) - # const x::T ==> (const (:: x T)) - if n_commas >= 1 - # Maybe nonsensical? But this is what the flisp parser does. - # const x,y ==> (const (tuple x y)) - emit(ps, beforevar_mark, K"tuple") - end - else - # const x ==> (const (error x)) - emit(ps, beforevar_mark, K"error", - error="Expected assignment after `const`") + # Const fields https://github.com/JuliaLang/julia/pull/43305 + # const x ==> (const x) + # const x::T ==> (const (:: x T)) + if n_commas >= 1 + # Maybe nonsensical? But this is what the flisp parser does. + # const x,y ==> (const (tuple x y)) + emit(ps, beforevar_mark, K"tuple") end + #v1.7: const x ==> (const (error x)) + min_supported_version(v"1.8", ps, beforevar_mark, + "`const` struct field without assignment") else #v1.8: const x ==> (const x) # global x ==> (global x) @@ -1989,11 +1996,8 @@ function parse_try(ps) #v1.8: try else end ==> (try (block) false false (error (block)) false) emit(ps, else_mark, K"error", error="Expected `catch` before `else`") end - if ps.julia_version < v"1.8" - #v1.7: try catch ; else end ==> (try (block) false (block) (error (block)) false) - emit(ps, else_mark, K"error", - error="`else` in `try` requires at least Julia 1.8") - end + #v1.7: try catch ; else end ==> (try (block) false (block) (error (block)) false) + min_supported_version(v"1.8", ps, else_mark, "`else` after `try`") else bump_invisible(ps, K"false") end @@ -2163,16 +2167,14 @@ function parse_import(ps::ParseState, word, has_import_prefix) bump(ps, TRIVIA_FLAG) parse_atsym(ps) emit(ps, mark, K"as") - if ps.julia_version < v"1.6" - #v1.5: import A as B ==> (import (error (as (. A) B))) - emit(ps, mark, K"error", - error="`import` with renaming using `as` requires at least Julia 1.6") - elseif word == K"using" && !has_import_prefix + if word == K"using" && !has_import_prefix # using A as B ==> (using (error (as (. A) B))) # using A, B as C ==> (using (. A) (error (as (. B) C))) emit(ps, mark, K"error", error="`using` with `as` renaming requires a `:` and context module") end + #v1.5: import A as B ==> (import (error (as (. A) B))) + min_supported_version(v"1.6", ps, mark, "`import ... as`") return true else return false @@ -2423,19 +2425,19 @@ end # [x y ; z] ==> (vcat (row x y) z) # # Double semicolon with spaces allowed (only) for line continuation -# [x y ;;\n z w] ==> (hcat x y z w) -# [x y ;; z w] ==> (hcat x y (error) z w) +#v1.7: [x y ;;\n z w] ==> (hcat x y z w) +#v1.7: [x y ;; z w] ==> (hcat x y (error) z w) # # Single elements in rows -# [x ; y ;; z ] ==> (ncat-2 (nrow-1 x y) z) -# [x y ;;; z ] ==> (ncat-3 (row x y) z) +#v1.7: [x ; y ;; z ] ==> (ncat-2 (nrow-1 x y) z) +#v1.7: [x y ;;; z ] ==> (ncat-3 (row x y) z) # # Higher dimensional ncat # Row major -# [x y ; z w ;;; a b ; c d] ==> +#v1.7: [x y ; z w ;;; a b ; c d] ==> # (ncat-3 (nrow-1 (row x y) (row z w)) (nrow-1 (row a b) (row c d))) # Column major -# [x ; y ;; z ; w ;;; a ; b ;; c ; d] ==> +#v1.7: [x ; y ;; z ; w ;;; a ; b ;; c ; d] ==> # (ncat-3 (nrow-2 (nrow-1 x y) (nrow-1 z w)) (nrow-2 (nrow-1 a b) (nrow-1 c d))) # # flisp: parse-array @@ -2444,7 +2446,7 @@ function parse_array(ps::ParseState, mark, closer, end_is_symbol) # Outer array parsing loop - parse chain of separators with descending # precedence such as - # [a ; b ;; c ;;; d ;;;; e] ==> (ncat-4 (ncat-3 (ncat-2 (ncat-1 a b) c) d) e) + #v1.7: [a ; b ;; c ;;; d ;;;; e] ==> (ncat-4 (ncat-3 (ncat-2 (ncat-1 a b) c) d) e) # # Ascending and equal precedence is handled by parse_array_inner. # @@ -2607,6 +2609,13 @@ function parse_cat(ps::ParseState, closer, end_is_symbol) end end +function check_ncat_compat(ps, mark, k) + # https://github.com/JuliaLang/julia/pull/33697 + if k == K"ncat" + min_supported_version(v"1.7", ps, mark, "multidimensional array syntax") + end +end + # Parse un-prefixed parenthesized syntax. This is hard because parentheses are # *very* overloaded! # @@ -2822,6 +2831,7 @@ function parse_string(ps::ParseState) m = position(ps) parse_atom(ps) if ps.julia_version >= v"1.6" + # https://github.com/JuliaLang/julia/pull/38692 prev = peek_behind(ps) if prev.kind == K"String" # Wrap interpolated literal strings in (string) so we can @@ -2984,6 +2994,7 @@ function parse_atom(ps::ParseState, check_identifiers=true) bump(ps, TRIVIA_FLAG) ckind, cflags = parse_cat(ps, K"]", ps.end_symbol) emit(ps, mark, ckind, cflags) + check_ncat_compat(ps, mark, ckind) elseif leading_kind == K"{" # cat expression bump(ps, TRIVIA_FLAG) ckind, cflags = parse_cat(ps, K"}", ps.end_symbol) @@ -3026,6 +3037,7 @@ function emit_braces(ps, mark, ckind, cflags) # {x ;;; y} ==> (bracescat (nrow-3 x y)) emit(ps, mark, K"nrow", cflags) end + check_ncat_compat(ps, mark, ckind) outk = ckind in KSet`vect comprehension` ? K"braces" : K"bracescat" emit(ps, mark, outk) end diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 4c11963aac68c..dfef08a616349 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -527,7 +527,7 @@ tests = [ ":(::)" => "(quote ::)" # braces "{x y}" => "(bracescat (row x y))" - "{x ;;; y}" => "(bracescat (nrow-3 x y))" + ((v=v"1.7",), "{x ;;; y}") => "(bracescat (nrow-3 x y))" # Macro names can be keywords "@end x" => "(macrocall @end x)" # __dot__ macro @@ -553,14 +553,14 @@ tests = [ "[x y ;;\n z w]" => "(hcat x y z w)" # "[x y ;; z w]" => "(hcat x y (error) z w)" # FIXME # Single elements in rows - "[x ; y ;; z ]" => "(ncat-2 (nrow-1 x y) z)" - "[x y ;;; z ]" => "(ncat-3 (row x y) z)" + ((v=v"1.7",), "[x ; y ;; z ]") => "(ncat-2 (nrow-1 x y) z)" + ((v=v"1.7",), "[x y ;;; z ]") => "(ncat-3 (row x y) z)" # Higher dimensional ncat # Row major - "[x y ; z w ;;; a b ; c d]" => + ((v=v"1.7",), "[x y ; z w ;;; a b ; c d]") => "(ncat-3 (nrow-1 (row x y) (row z w)) (nrow-1 (row a b) (row c d)))" # Column major - "[x ; y ;; z ; w ;;; a ; b ;; c ; d]" => + ((v=v"1.7",), "[x ; y ;; z ; w ;;; a ; b ;; c ; d]") => "(ncat-3 (nrow-2 (nrow-1 x y) (nrow-1 z w)) (nrow-2 (nrow-1 a b) (nrow-1 c d)))" ], JuliaSyntax.parse_string => [ From f6f4ae010594e33bf8300023177042d9fb675af3 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Fri, 14 Jan 2022 18:41:23 +1000 Subject: [PATCH 0319/1109] Skeleton for generated docs --- JuliaSyntax/.github/workflows/CI.yml | 15 +++++ JuliaSyntax/docs/Manifest.toml | 92 ++++++++++++++++++++++++++++ JuliaSyntax/docs/Project.toml | 2 + JuliaSyntax/docs/make.jl | 19 ++++++ JuliaSyntax/docs/src/design.md | 2 + JuliaSyntax/docs/src/index.md | 2 + JuliaSyntax/docs/src/reference.md | 7 +++ 7 files changed, 139 insertions(+) create mode 100644 JuliaSyntax/docs/Manifest.toml create mode 100644 JuliaSyntax/docs/Project.toml create mode 100644 JuliaSyntax/docs/make.jl create mode 100644 JuliaSyntax/docs/src/design.md create mode 100644 JuliaSyntax/docs/src/index.md create mode 100644 JuliaSyntax/docs/src/reference.md diff --git a/JuliaSyntax/.github/workflows/CI.yml b/JuliaSyntax/.github/workflows/CI.yml index d70120fcf3d4a..11795d3c6cd59 100644 --- a/JuliaSyntax/.github/workflows/CI.yml +++ b/JuliaSyntax/.github/workflows/CI.yml @@ -34,3 +34,18 @@ jobs: ${{ runner.os }}- - uses: julia-actions/julia-buildpkg@v1 - uses: julia-actions/julia-runtest@v1 +# docs: +# name: Documentation +# runs-on: ubuntu-latest +# steps: +# - uses: actions/checkout@v2 +# - uses: julia-actions/setup-julia@latest +# with: +# version: '1.6' +# - run: julia --project=docs -e ' +# using Pkg; +# Pkg.develop(PackageSpec(; path=pwd())); +# Pkg.instantiate();' +# - run: julia --project=docs docs/make.jl +# env: +# GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/JuliaSyntax/docs/Manifest.toml b/JuliaSyntax/docs/Manifest.toml new file mode 100644 index 0000000000000..32615300909ab --- /dev/null +++ b/JuliaSyntax/docs/Manifest.toml @@ -0,0 +1,92 @@ +# This file is machine-generated - editing it directly is not advised + +[[ANSIColoredPrinters]] +git-tree-sha1 = "574baf8110975760d391c710b6341da1afa48d8c" +uuid = "a4c015fc-c6ff-483c-b24f-f7ea428134e9" +version = "0.0.1" + +[[Base64]] +uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" + +[[Dates]] +deps = ["Printf"] +uuid = "ade2ca70-3891-5945-98fb-dc099432e06a" + +[[DocStringExtensions]] +deps = ["LibGit2"] +git-tree-sha1 = "b19534d1895d702889b219c382a6e18010797f0b" +uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" +version = "0.8.6" + +[[Documenter]] +deps = ["ANSIColoredPrinters", "Base64", "Dates", "DocStringExtensions", "IOCapture", "InteractiveUtils", "JSON", "LibGit2", "Logging", "Markdown", "REPL", "Test", "Unicode"] +git-tree-sha1 = "f425293f7e0acaf9144de6d731772de156676233" +uuid = "e30172f5-a6a5-5a46-863b-614d45cd2de4" +version = "0.27.10" + +[[IOCapture]] +deps = ["Logging", "Random"] +git-tree-sha1 = "f7be53659ab06ddc986428d3a9dcc95f6fa6705a" +uuid = "b5f81e59-6552-4d32-b1f0-c071b021bf89" +version = "0.2.2" + +[[InteractiveUtils]] +deps = ["Markdown"] +uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240" + +[[JSON]] +deps = ["Dates", "Mmap", "Parsers", "Unicode"] +git-tree-sha1 = "8076680b162ada2a031f707ac7b4953e30667a37" +uuid = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" +version = "0.21.2" + +[[LibGit2]] +deps = ["Base64", "NetworkOptions", "Printf", "SHA"] +uuid = "76f85450-5226-5b5a-8eaa-529ad045b433" + +[[Logging]] +uuid = "56ddb016-857b-54e1-b83d-db4d58db5568" + +[[Markdown]] +deps = ["Base64"] +uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" + +[[Mmap]] +uuid = "a63ad114-7e13-5084-954f-fe012c677804" + +[[NetworkOptions]] +uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908" + +[[Parsers]] +deps = ["Dates"] +git-tree-sha1 = "92f91ba9e5941fc781fecf5494ac1da87bdac775" +uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0" +version = "2.2.0" + +[[Printf]] +deps = ["Unicode"] +uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7" + +[[REPL]] +deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"] +uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" + +[[Random]] +deps = ["Serialization"] +uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" + +[[SHA]] +uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce" + +[[Serialization]] +uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b" + +[[Sockets]] +uuid = "6462fe0b-24de-5631-8697-dd941f90decc" + +[[Test]] +deps = ["InteractiveUtils", "Logging", "Random", "Serialization"] +uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40" + +[[Unicode]] +uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" diff --git a/JuliaSyntax/docs/Project.toml b/JuliaSyntax/docs/Project.toml new file mode 100644 index 0000000000000..dfa65cd107d06 --- /dev/null +++ b/JuliaSyntax/docs/Project.toml @@ -0,0 +1,2 @@ +[deps] +Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" diff --git a/JuliaSyntax/docs/make.jl b/JuliaSyntax/docs/make.jl new file mode 100644 index 0000000000000..f753c467440ad --- /dev/null +++ b/JuliaSyntax/docs/make.jl @@ -0,0 +1,19 @@ +using Documenter, JuliaSyntax + +makedocs(; + modules=[JuliaSyntax], + format=Documenter.HTML(), + pages=[ + "Overview" => "index.md", + "API Reference" => "reference.md", + "Design Discussion" => "design.md", + ], + repo="https://github.com/c42f/JuliaSyntax.jl/blob/{commit}{path}#L{line}", + sitename="JuliaSyntax.jl", + authors = "Chris Foster and contributors: https://github.com/c42f/JuliaSyntax.jl/graphs/contributors" +) + +deploydocs(; + repo="github.com/c42f/JuliaSyntax.jl", + push_preview=true +) diff --git a/JuliaSyntax/docs/src/design.md b/JuliaSyntax/docs/src/design.md new file mode 100644 index 0000000000000..a0d2d12947548 --- /dev/null +++ b/JuliaSyntax/docs/src/design.md @@ -0,0 +1,2 @@ +# Design discussion + diff --git a/JuliaSyntax/docs/src/index.md b/JuliaSyntax/docs/src/index.md new file mode 100644 index 0000000000000..a8605a79474d3 --- /dev/null +++ b/JuliaSyntax/docs/src/index.md @@ -0,0 +1,2 @@ +# JuliaSyntax.jl + diff --git a/JuliaSyntax/docs/src/reference.md b/JuliaSyntax/docs/src/reference.md new file mode 100644 index 0000000000000..1d966e4eb3389 --- /dev/null +++ b/JuliaSyntax/docs/src/reference.md @@ -0,0 +1,7 @@ +# API Reference + +## Parsing code + +```@docs +``` + From 9df93447bb60ba44584b80b099244e5b54a76a3b Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Thu, 20 Jan 2022 14:46:08 +1000 Subject: [PATCH 0320/1109] Fix parsing of dot before operators in export paths MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In `import A.==` the `.` is a separator, not part of a dotted operator `.==`. Fix this. Also fix the associated case of importing modules named with operators as names, eg `import .⋆.f` --- JuliaSyntax/README.md | 4 +++ JuliaSyntax/src/parse_stream.jl | 18 +++++++++--- JuliaSyntax/src/parser.jl | 44 ++++++++++++++++++++++-------- JuliaSyntax/test/parse_packages.jl | 2 +- JuliaSyntax/test/parser.jl | 10 +++++-- 5 files changed, 59 insertions(+), 19 deletions(-) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index 7132415497564..8d2c8fda55aef 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -547,6 +547,10 @@ Here's some behaviors which seem to be bugs: * Octal escape sequences saturate rather than being reported as errors. Eg, `"\777"` results in `"\xff"`. This is inconsistent with `Base.parse(::Type{Int}, ...)` +* Leading dots in import paths with operator-named modules are parsed into + dotted operators rather than a relative path. Ie, we have `import .⋆` parsing + to `(import (. .⋆))` whereas it should be `(import (. . ⋆))` for consistency + with the parsing of `import .A`. ## Parsing / AST oddities and warts diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index 45939a354ecef..769ec2a842ec7 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -396,18 +396,28 @@ function bump_glue(stream::ParseStream, kind, flags, num_tokens) end """ -Bump a token, splitting it into several pieces + bump_split(stream, token_spec1, [token_spec2 ...]) -Wow, this is a hack! It helps resolves the occasional lexing ambiguities. For +Bump the next token, splitting it into several pieces + +Tokens are defined by a number of `token_spec` of shape `(nbyte, kind, flags)`. +The number of input bytes of the last spec is taken from the remaining bytes of +the input token, with the associated `nbyte` ignored. + +This is a hack which helps resolves the occasional lexing ambiguity. For example -* Whether .+ should be a single token or a composite (. +) +* Whether .+ should be a single token or the composite (. +) which is used for + standalone operators. * Whether ... is splatting (most of the time) or three . tokens in import paths + +TODO: Are these the only cases? Can we replace this general utility with a +simpler one which only splits preceding dots? """ function bump_split(stream::ParseStream, split_spec...) tok = popfirst!(stream.lookahead) fbyte = first_byte(tok) for (i, (nbyte, k, f)) in enumerate(split_spec) - lbyte = i == length(split_spec) ? last_byte(tok) : fbyte + nbyte - 1 + lbyte = (i == length(split_spec)) ? last_byte(tok) : fbyte + nbyte - 1 push!(stream.ranges, TaggedRange(SyntaxHead(k, f), kind(tok), fbyte, lbyte, lastindex(stream.ranges) + 1)) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index cb567b70a3efe..532c6bac90d29 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -2213,11 +2213,19 @@ function parse_import_path(ps::ParseState) end first_dot = false end - # import @x ==> (import (. @x)) - # import $A ==> (import (. ($ A))) - parse_atsym(ps) + if is_dotted(peek_token(ps)) + # Modules with operator symbol names + # import .⋆ ==> (import (. . ⋆)) + bump_trivia(ps) + bump_split(ps, (1,K".",EMPTY_FLAGS), (1,peek(ps),EMPTY_FLAGS)) + else + # import @x ==> (import (. @x)) + # import $A ==> (import (. ($ A))) + parse_atsym(ps) + end while true - k = peek(ps) + t = peek_token(ps) + k = kind(t) if k == K"." # import A.B ==> (import (. A B)) # import $A.@x ==> (import (. ($ A) @x)) @@ -2225,18 +2233,32 @@ function parse_import_path(ps::ParseState) bump_disallowed_space(ps) bump(ps, TRIVIA_FLAG) parse_atsym(ps) - elseif k in KSet`NewlineWs ; , : EndMarker` - # import A; B ==> (import (. A)) - break - elseif k == K".." - # Nonsensical?? - # import A.. ==> (import (. A .)) - bump_split(ps, (1,K".",TRIVIA_FLAG), (1,K".",EMPTY_FLAGS)) + elseif is_dotted(t) + # Resolve tokenization ambiguity: In imports, dots are part of the + # path, not operators + # import A.== ==> (import (. A ==)) + # import A.⋆.f ==> (import (. A ⋆ f)) + if t.had_whitespace + # Whitespace in import path allowed but discouraged + # import A .== ==> (import (. A ==)) + emit_diagnostic(ps, whitespace=true, + warning="space between dots in import path") + end + bump_trivia(ps) + bump_split(ps, (1,K".",TRIVIA_FLAG), (1,k,EMPTY_FLAGS)) + # elseif k == K".." + # # The flisp parser does this, but it's nonsense? + # # import A.. !=> (import (. A .)) + # bump_split(ps, (1,K".",TRIVIA_FLAG), (1,K".",EMPTY_FLAGS)) elseif k == K"..." # Import the .. operator # import A... ==> (import (. A ..)) bump_split(ps, (1,K".",TRIVIA_FLAG), (2,K"..",EMPTY_FLAGS)) + elseif k in KSet`NewlineWs ; , : EndMarker` + # import A; B ==> (import (. A)) + break else + # Could we emit a more comprehensible error here? break end end diff --git a/JuliaSyntax/test/parse_packages.jl b/JuliaSyntax/test/parse_packages.jl index a6385d1b6eba0..be99af221172d 100644 --- a/JuliaSyntax/test/parse_packages.jl +++ b/JuliaSyntax/test/parse_packages.jl @@ -14,8 +14,8 @@ end @testset "Parse Base tests" begin test_parse_all_in_path(joinpath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "test")) end +=# @testset "Parse Julia stdlib" begin test_parse_all_in_path(joinpath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "stdlib")) end -=# diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index dfef08a616349..70c6c29911e33 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -429,22 +429,26 @@ tests = [ "using A as B" => "(using (error (as (. A) B)))" "using A, B as C" => "(using (. A) (error (as (. B) C)))" # parse_import_path - # When parsing import we must split these into single dots + # When parsing import we must split initial dots into nontrivial + # leading dots for relative paths "import .A" => "(import (. . A))" "import ..A" => "(import (. . . A))" "import ...A" => "(import (. . . . A))" "import ....A" => "(import (. . . . . A))" # Dots with spaces are allowed (a misfeature?) "import . .A" => "(import (. . . A))" + # Modules with operator symbol names + "import .⋆" => "(import (. . ⋆))" # Expressions allowed in import paths "import @x" => "(import (. @x))" "import \$A" => "(import (. (\$ A)))" "import \$A.@x" => "(import (. (\$ A) @x))" "import A.B" => "(import (. A B))" "import A.B.C" => "(import (. A B C))" - "import A; B" => "(import (. A))" - "import A.." => "(import (. A .))" + "import A.==" => "(import (. A ==))" + "import A.⋆.f" => "(import (. A ⋆ f))" "import A..." => "(import (. A ..))" + "import A; B" => "(import (. A))" ], JuliaSyntax.parse_iteration_spec => [ "i = rhs" => "(= i rhs)" From 336d4e93811466f1ea4d3e07632a46a4f07ba678 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Thu, 20 Jan 2022 15:43:51 +1000 Subject: [PATCH 0321/1109] Allow word operators where keywords are allowed --- JuliaSyntax/src/parser.jl | 11 +++++++---- JuliaSyntax/test/parser.jl | 2 ++ 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 532c6bac90d29..a4ed67b25e630 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -1502,13 +1502,15 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) parse_raw_string(ps) t = peek_token(ps) k = kind(t) - if !t.had_whitespace && (k == K"Identifier" || is_keyword(k) || is_number(k)) + if !t.had_whitespace && (k == K"Identifier" || is_keyword(k) || is_word_operator(k) || is_number(k)) # Macro sufficies can include keywords and numbers # x"s"y ==> (macrocall @x_str "s" "y") # x"s"end ==> (macrocall @x_str "s" "end") + # x"s"in ==> (macrocall @x_str "s" "in") # x"s"2 ==> (macrocall @x_str "s" 2) # x"s"10.0 ==> (macrocall @x_str "s" 10.0) - suffix_kind = (k == K"Identifier" || is_keyword(k)) ? K"String" : k + suffix_kind = (k == K"Identifier" || is_keyword(k) || + is_word_operator(k)) ? K"String" : k bump(ps, remap_kind=suffix_kind) end emit(ps, mark, K"macrocall") @@ -2864,13 +2866,14 @@ function parse_string(ps::ParseState) emit(ps, m, K"string", prev.flags) end end - elseif is_identifier(k) || is_keyword(k) + elseif is_identifier(k) || is_keyword(k) || is_word_operator(k) # "a $foo b" ==> (string "a " foo " b") # "$outer" ==> (string outer) + # "$in" ==> (string in) parse_atom(ps) else bump_invisible(ps, K"error", - error="Identifier or parenthesized expression expected after \$ in string") + error="identifier or parenthesized expression expected after \$ in string") end elseif k == K"String" bump(ps, str_flags) diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 70c6c29911e33..0c08429953356 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -259,6 +259,7 @@ tests = [ # Macro sufficies can include keywords and numbers "x\"s\"y" => """(macrocall @x_str "s" "y")""" "x\"s\"end" => """(macrocall @x_str "s" "end")""" + "x\"s\"in" => """(macrocall @x_str "s" "in")""" "x\"s\"2" => """(macrocall @x_str "s" 2)""" "x\"s\"10.0" => """(macrocall @x_str "s" 10.0)""" ], @@ -574,6 +575,7 @@ tests = [ ((v=v"1.5",), "\"hi\$(\"ho\")\"") => "(string \"hi\" \"ho\")" "\"a \$foo b\"" => "(string \"a \" foo \" b\")" "\"\$outer\"" => "(string outer)" + "\"\$in\"" => "(string in)" "\"\"" => "\"\"" "\"\$x\$y\$z\"" => "(string x y z)" "\"\$(x)\"" => "(string x)" From f35ac6302335d5ed25910d4887e98ffd9ebc806f Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Thu, 20 Jan 2022 16:10:31 +1000 Subject: [PATCH 0322/1109] Allow interpolations in macro module paths It turns out that the flisp parser does allow this, so we should too. --- JuliaSyntax/src/parser.jl | 8 ++++---- JuliaSyntax/test/parser.jl | 2 ++ 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index a4ed67b25e630..36a990c0ba04a 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -1269,7 +1269,8 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) # source range of the @-prefixed part of a macro macro_atname_range = nothing kb = peek_behind(ps).kind - is_valid_modref = is_identifier(kb) || kb == K"." + # $A.@x ==> (macrocall (. ($ A) (quote @x))) + is_valid_modref = is_identifier(kb) || kb == K"." || kb == K"$" # We record the last component of chains of dot-separated identifiers so we # know which identifier was the macro name. macro_name_position = position(ps) # points to same output span as peek_behind @@ -1425,9 +1426,8 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) emit(ps, m, K"$") emit(ps, m, K"inert") emit(ps, mark, K".") - # Syntax extension: We could allow interpolations like A.$B.@C - # to parse in the module reference path. But disallow this for - # now for simplicity and for compatibility with the flisp parser. + # A.$B.@x ==> (macrocall (. (. A (inert ($ B))) (quote @x))) + this_iter_valid_modref = true elseif k == K"@" # A macro call after some prefix A has been consumed # A.@x ==> (macrocall (. A (quote @x))) diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 0c08429953356..0b9541e30152e 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -193,6 +193,7 @@ tests = [ "f(a,b)" => "(call f a b)" "f (a)" => "(call f (error-t) a)" "f(a).g(b)" => "(call (. (call f a) (quote g)) b)" + "\$A.@x" => "(macrocall (. (\$ A) (quote @x)))" # do "f() do x, y\n body end" => "(do (call f) (-> (tuple x y) (block body)))" "f() do\nend" => "(do (call f) (-> (tuple) (block)))" @@ -241,6 +242,7 @@ tests = [ "A.: +" => "(. A (quote (error-t) +))" "f.\$x" => "(. f (inert (\$ x)))" "f.\$(x+y)" => "(. f (inert (\$ (call-i x + y))))" + "A.\$B.@x" => "(macrocall (. (. A (inert (\$ B))) (quote @x)))" # Field/property syntax "f.x.y" => "(. (. f (quote x)) (quote y))" "x .y" => "(. x (error-t) (quote y))" From 4b9d2f022df7defb0c490e155e42bcd60cc9933f Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Thu, 20 Jan 2022 18:00:30 +1000 Subject: [PATCH 0323/1109] More consistent parsing of assignment in const/global/local Split parse_assignment up so that we can reuse the second part - parse_assignment_with_initial_ex - within the parsing code for const,global and local to consistently parse all sorts of assignments when they're nested within a global/const/local declaration. --- JuliaSyntax/src/parser.jl | 53 +++++++++++++++++++++----------------- JuliaSyntax/test/parser.jl | 18 ++++++++----- 2 files changed, 40 insertions(+), 31 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 36a990c0ba04a..b56c3faf407d7 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -481,6 +481,10 @@ end function parse_assignment(ps::ParseState, down, equals_is_kw::Bool) mark = position(ps) down(ps) + parse_assignment_with_initial_ex(ps, mark, down, equals_is_kw) +end + +function parse_assignment_with_initial_ex(ps::ParseState, mark, down, equals_is_kw::Bool) t = peek_token(ps) k = kind(t) if !is_prec_assignment(k) @@ -1784,8 +1788,8 @@ function parse_const_local_global(ps) scope_k = K"Nothing" k = peek(ps) if k in KSet`global local` - # global x = 1 ==> (global (= x 1)) - # local x = 1 ==> (local (= x 1)) + # global x ==> (global x) + # local x ==> (local x) scope_k = k bump(ps, TRIVIA_FLAG) if peek(ps) == K"const" @@ -1807,37 +1811,38 @@ function parse_const_local_global(ps) bump(ps, TRIVIA_FLAG) end end - # Like parse_eq/parse_assignment, but specialized so that we can omit the + # Like parse_assignment, but specialized so that we can omit the # tuple when there's commas but no assignment. beforevar_mark = position(ps) n_commas = parse_comma(ps, false) t = peek_token(ps) - if is_prec_assignment(t) && !is_decorated(t) - if n_commas >= 1 - # const x,y = 1,2 ==> (const (= (tuple x y) (tuple 1 2))) - emit(ps, beforevar_mark, K"tuple") - end - bump(ps, TRIVIA_FLAG) - parse_comma(ps) - emit(ps, beforevar_mark, K"=") - elseif has_const - # Const fields https://github.com/JuliaLang/julia/pull/43305 - # const x ==> (const x) - # const x::T ==> (const (:: x T)) - if n_commas >= 1 - # Maybe nonsensical? But this is what the flisp parser does. - # const x,y ==> (const (tuple x y)) - emit(ps, beforevar_mark, K"tuple") - end - #v1.7: const x ==> (const (error x)) - min_supported_version(v"1.8", ps, beforevar_mark, - "`const` struct field without assignment") + has_assignment = is_prec_assignment(t) + if n_commas >= 1 && (has_assignment || has_const) + # const x,y = 1,2 ==> (const (= (tuple x y) (tuple 1 2))) + # Maybe nonsensical? But this is what the flisp parser does. + #v1.8: const x,y ==> (const (tuple x y)) + emit(ps, beforevar_mark, K"tuple") + end + if has_assignment + # const x = 1 ==> (const (= x 1)) + # global x ~ 1 ==> (global (call-i x ~ 1)) + # global x += 1 ==> (global (+= x 1)) + parse_assignment_with_initial_ex(ps, beforevar_mark, parse_comma, false) else - #v1.8: const x ==> (const x) # global x ==> (global x) # local x ==> (local x) # global x,y ==> (global x y) end + if has_const && (!has_assignment || is_dotted(t)) + # Const fields https://github.com/JuliaLang/julia/pull/43305 + #v1.8: const x ==> (const x) + #v1.8: const x::T ==> (const (:: x T)) + # Disallowed const forms on <= 1.7 + #v1.7: const x ==> (const (error x)) + #v1.7: const x .= 1 ==> (const (error (.= x 1))) + min_supported_version(v"1.8", ps, beforevar_mark, + "`const` struct field without assignment") + end if scope_k != K"Nothing" emit(ps, scope_mark, scope_k) end diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 0b9541e30152e..fdbf0b16b8f2e 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -345,21 +345,25 @@ tests = [ "if a xx else yy end" => "(if a (block xx) (block yy))" ], JuliaSyntax.parse_const_local_global => [ - "global x = 1" => "(global (= x 1))" - "local x = 1" => "(local (= x 1))" + "global x" => "(global x)" + "local x" => "(local x)" "global const x = 1" => "(const (global (= x 1)))" "local const x = 1" => "(const (local (= x 1)))" "const x = 1" => "(const (= x 1))" "const global x = 1" => "(const (global (= x 1)))" "const local x = 1" => "(const (local (= x 1)))" + "const x,y = 1,2" => "(const (= (tuple x y) (tuple 1 2)))" + ((v=v"1.8",), "const x,y") => "(const (tuple x y))" + "const x = 1" => "(const (= x 1))" + "global x ~ 1" => "(global (call-i x ~ 1))" + "global x += 1" => "(global (+= x 1))" "global x" => "(global x)" "local x" => "(local x)" "global x,y" => "(global x y)" - "const x,y = 1,2" => "(const (= (tuple x y) (tuple 1 2)))" - "const x" => "(const (error x))" - ((v=v"1.8",), "const x") => "(const x)" - ((v=v"1.8",), "const x::T") => "(const (:: x T))" - ((v=v"1.8",), "const x,y") => "(const (tuple x y))" + ((v=v"1.8",), "const x") => "(const x)" + ((v=v"1.8",), "const x::T") => "(const (:: x T))" + ((v=v"1.7",), "const x") => "(const (error x))" + ((v=v"1.7",), "const x .= 1") => "(const (error (.= x 1)))" ], JuliaSyntax.parse_function => [ "macro while(ex) end" => "(macro (call (error while) ex) (block))" From 14a2a5324cea006d0775297af04ee75e3bbbaf6f Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Thu, 20 Jan 2022 21:06:22 +1000 Subject: [PATCH 0324/1109] Fix bracketed interpolation syntax in macro names, exports, etc. --- JuliaSyntax/src/parser.jl | 28 ++++++++++++---------------- JuliaSyntax/test/parser.jl | 3 +++ 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index b56c3faf407d7..8bfce3f9a57e3 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -1220,24 +1220,18 @@ function parse_unary_prefix(ps::ParseState) end end -# Parse a symbol or interpolation syntax (a restricted version of -# parse_unary_prefix) +# Parse a symbol or interpolation syntax function parse_identifier_or_interpolate(ps::ParseState) mark = position(ps) - if peek(ps) == K"$" - bump(ps, TRIVIA_FLAG) - # $a ==> ($ a) - # $$a ==> ($ ($ a)) - parse_unary_prefix(ps) - emit(ps, mark, K"$") - else - parse_atom(ps) - b = peek_behind(ps) - # export (x::T) ==> (export (error (:: x T))) - # export outer ==> (export outer) - if !b.is_leaf || !(is_identifier(b.kind) || is_operator(b.kind)) - emit(ps, mark, K"error", error="Expected identifier") - end + parse_unary_prefix(ps) + b = peek_behind(ps) + # export (x::T) ==> (export (error (:: x T))) + # export outer ==> (export outer) + # export ($f) ==> (export ($ f)) + ok = (b.is_leaf && (is_identifier(b.kind) || is_operator(b.kind))) || + (!b.is_leaf && b.kind == K"$") + if !ok + emit(ps, mark, K"error", error="Expected identifier") end end @@ -1874,6 +1868,8 @@ function parse_function(ps::ParseState) # macro f() end ==> (macro (call f) (block)) # macro (:)(ex) end ==> (macro (call : ex) (block)) # macro (type)(ex) end ==> (macro (call type ex) (block)) + # macro $f() end ==> (macro (call ($ f)) (block)) + # macro ($f)() end ==> (macro (call ($ f)) (block)) end else if peek(ps) == K"(" diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index fdbf0b16b8f2e..40946b3c687bf 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -332,6 +332,7 @@ tests = [ "export \$a, \$(a*b)" => "(export (\$ a) (\$ (call-i a * b)))" "export (x::T)" => "(export (error (:: x T)))" "export outer" => "(export outer)" + "export (\$f)" => "(export (\$ f))" ], JuliaSyntax.parse_if_elseif => [ "if a xx elseif b yy else zz end" => "(if a (block xx) (elseif (block b) (block yy) (block zz)))" @@ -370,6 +371,8 @@ tests = [ "macro f() end" => "(macro (call f) (block))" "macro (:)(ex) end" => "(macro (call : ex) (block))" "macro (type)(ex) end" => "(macro (call type ex) (block))" + "macro \$f() end" => "(macro (call (\$ f)) (block))" + "macro (\$f)() end" => "(macro (call (\$ f)) (block))" "function (x) body end"=> "(function (tuple x) (block body))" "function (x,y) end" => "(function (tuple x y) (block))" "function (x=1) end" => "(function (tuple (kw x 1)) (block))" From 38e7062a02d9fb0dea58fe84cb69c087bff00833 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Thu, 20 Jan 2022 21:20:13 +1000 Subject: [PATCH 0325/1109] Fix parsing chains of mixtures of decorated and undecorated operators --- JuliaSyntax/src/parser.jl | 3 ++- JuliaSyntax/test/parser.jl | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 8bfce3f9a57e3..f51419abd80b6 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -796,6 +796,7 @@ function parse_with_chains(ps::ParseState, down, is_op, chain_ops) down(ps) if kind(t) in chain_ops && !is_decorated(t) # a + b + c ==> (call-i a + b c) + # a + b .+ c ==> (call-i (call-i a + b) .+ c) parse_chain(ps, down, kind(t)) end # a +₁ b +₁ c ==> (call-i (call-i a +₁ b) +₁ c) @@ -808,7 +809,7 @@ end # # flisp: parse-chain function parse_chain(ps::ParseState, down, op_kind) - while (t = peek_token(ps); kind(t) == op_kind) + while (t = peek_token(ps); kind(t) == op_kind && !is_decorated(t)) if ps.space_sensitive && t.had_whitespace && is_both_unary_and_binary(kind(t)) && !peek_token(ps, 2).had_whitespace diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 40946b3c687bf..74992c58603e2 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -97,6 +97,7 @@ tests = [ JuliaSyntax.parse_expr => [ "a - b - c" => "(call-i (call-i a - b) - c)" "a + b + c" => "(call-i a + b c)" + "a + b .+ c" => "(call-i (call-i a + b) .+ c)" # parse_with_chains: # The following is two elements of a hcat "[x +y]" => "(hcat x (call + y))" From c44148075d03db0993b73adc25058b147d326567 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Fri, 21 Jan 2022 12:06:59 +1000 Subject: [PATCH 0326/1109] Basic tools for reducing test cases This includes some simple tooling for test case reduction. This isn't very precise yet and in various circumstances still can't reduce the test case down from the whole file. --- JuliaSyntax/test/parse_packages.jl | 23 +++++--- JuliaSyntax/test/test_utils.jl | 84 ++++++++++++++++++++++++++++-- 2 files changed, 96 insertions(+), 11 deletions(-) diff --git a/JuliaSyntax/test/parse_packages.jl b/JuliaSyntax/test/parse_packages.jl index be99af221172d..b46681b15a150 100644 --- a/JuliaSyntax/test/parse_packages.jl +++ b/JuliaSyntax/test/parse_packages.jl @@ -6,16 +6,25 @@ test_parse_all_in_path(joinpath(pkgdir, "test")) end -@testset "Parse Base" begin - test_parse_all_in_path(joinpath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "base")) +base_path = joinpath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "base") +@testset "Parse Base at $base_path" begin + test_parse_all_in_path(base_path) end #= -@testset "Parse Base tests" begin - test_parse_all_in_path(joinpath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "test")) +base_tests_path = joinpath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "test") +@testset "Parse Base tests at $base_tests_path" begin + test_parse_all_in_path(base_tests_path) end -=# -@testset "Parse Julia stdlib" begin - test_parse_all_in_path(joinpath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "stdlib")) +@testset "Parse Julia stdlib at $(Sys.STDLIB)" begin + for stdlib in readdir(Sys.STDLIB) + fulldir = joinpath(Sys.STDLIB, stdlib) + if isdir(fulldir) + @testset "Parse $stdlib" begin + test_parse_all_in_path(joinpath(Sys.STDLIB, fulldir)) + end + end + end end +=# diff --git a/JuliaSyntax/test/test_utils.jl b/JuliaSyntax/test/test_utils.jl index 863d0a1b20a65..83dc890388d97 100644 --- a/JuliaSyntax/test/test_utils.jl +++ b/JuliaSyntax/test/test_utils.jl @@ -15,14 +15,32 @@ using JuliaSyntax: # Node inspection kind, flags, + is_trivia, + sourcetext, haschildren, children, - child + child, + flisp_parse_all + +function remove_macro_linenums!(ex) + if Meta.isexpr(ex, :macrocall) + ex.args[2] = nothing + end + if ex isa Expr + map!(remove_macro_linenums!, ex.args, ex.args) + end + return ex +end + +function remove_all_linenums!(ex) + JuliaSyntax.remove_linenums!(ex) + remove_macro_linenums!(ex) +end function parsers_agree_on_file(path) code = read(path, String) - JuliaSyntax.remove_linenums!(JuliaSyntax.parse_all(Expr, code)) == - JuliaSyntax.remove_linenums!(JuliaSyntax.flisp_parse_all(code)) + JuliaSyntax.remove_linenums!(parse_all(Expr, code)) == + JuliaSyntax.remove_linenums!(flisp_parse_all(code)) end function find_source_in_path(basedir) @@ -41,7 +59,65 @@ function test_parse_all_in_path(basedir) end end -# Version of test_parse for interactive exploration +#------------------------------------------------------------------------------- +# Test case reduction + +# Check whether a given SyntaxNode converts to the same Expr as the flisp +# parser produces from the source text of the node. +function equals_flisp_parse(tree) + node_text = sourcetext(tree) + fl_ex = kind(tree) == K"toplevel" ? + flisp_parse_all(node_text) : + Meta.parse(node_text, raise=false) + if Meta.isexpr(fl_ex, :error) + return true # Something went wrong in reduction; ignore these cases 😬 + end + remove_all_linenums!(Expr(tree)) == remove_all_linenums!(fl_ex) +end + +""" +Select a subtree of `tree` which is inconsistent between flisp and JuliaSyntax +parsers. This isn't very precise yet! + +TODO: +* For some syntax elements (eg, the `x in xs` inside `for x in xs`) the + children can't be parsed out of context. Fix this. +* Replace good siblings of bad nodes with placeholders. For blocks, delete such + siblings. +""" +function reduce_test(tree) + if equals_flisp_parse(tree) + return nothing + end + if !haschildren(tree) + return tree + else + subtrees = [] + for child in children(tree) + if is_trivia(child) || !haschildren(child) + continue + end + t = reduce_test(child) + if !isnothing(t) + push!(subtrees, t) + end + end + if length(subtrees) == 1 + return only(subtrees) + end + end + return tree +end + + +#------------------------------------------------------------------------------- +""" + itest_parse(production, code; julia_version::VersionNumber=v"1.6") + +Parse `code`, entering the recursive descent parser at the given function +`production`. This function shows the various tree representations on stdout +for debugging. +""" function itest_parse(production, code; julia_version::VersionNumber=v"1.6") stream = ParseStream(code) production(JuliaSyntax.ParseState(stream, julia_version)) From 5bd95c2ab52c44ab5cc8b6d5494b12669db2959c Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Fri, 21 Jan 2022 15:02:26 +1000 Subject: [PATCH 0327/1109] Tokenize: remove special parsing of var It turns out that having this special token type in the lexer isn't actually very natural. Instead, we leave dealing with `var` to the parser. --- JuliaSyntax/Tokenize/src/lexer.jl | 28 ++++++------------------- JuliaSyntax/Tokenize/src/token.jl | 4 +--- JuliaSyntax/Tokenize/src/token_kinds.jl | 3 +-- JuliaSyntax/Tokenize/test/lexer.jl | 10 ++------- 4 files changed, 10 insertions(+), 35 deletions(-) diff --git a/JuliaSyntax/Tokenize/src/lexer.jl b/JuliaSyntax/Tokenize/src/lexer.jl index ea3b81f6cb5c5..b261ceaae713a 100644 --- a/JuliaSyntax/Tokenize/src/lexer.jl +++ b/JuliaSyntax/Tokenize/src/lexer.jl @@ -284,7 +284,7 @@ function emit(l::Lexer{IO_t,Token}, kind::Kind, err::TokenError = Tokens.NO_ERR) suffix = false if kind in (Tokens.ERROR, Tokens.STRING, Tokens.CMD) str = String(l.io.data[(l.token_startpos + 1):position(l)]) - elseif (kind == Tokens.IDENTIFIER || kind == Tokens.VAR_IDENTIFIER || isliteral(kind) || kind == Tokens.COMMENT || kind == Tokens.WHITESPACE || kind == Tokens.NEWLINE_WS) + elseif (kind == Tokens.IDENTIFIER || isliteral(kind) || kind == Tokens.COMMENT || kind == Tokens.WHITESPACE || kind == Tokens.NEWLINE_WS) str = String(take!(l.charstore)) elseif optakessuffix(kind) str = "" @@ -420,7 +420,7 @@ function _next_token(l::Lexer, c) elseif c == '`' return lex_backtick(l); elseif is_identifier_start_char(c) - return lex_identifier(l, c, true) + return lex_identifier(l, c) elseif isdigit(c) return lex_digit(l, Tokens.INTEGER) elseif (k = get(UNICODE_OPS, c, Tokens.ERROR)) != Tokens.ERROR @@ -460,7 +460,7 @@ function lex_string_chunk(l) state.paren_depth + 1) return emit(l, Tokens.LPAREN) elseif is_identifier_start_char(pc) - return lex_identifier(l, readchar(l), false) + return lex_identifier(l, readchar(l)) else # Getting here is a syntax error - fall through to reading string # characters and let the parser deal with it. @@ -914,17 +914,6 @@ function lex_quote(l::Lexer) end end -# Lex var"..." identifiers. -# The prefix `var"` has been consumed -function lex_var(l::Lexer) - read_raw_string(l, '"', false) - if accept(l, '"') - return emit(l, Tokens.VAR_IDENTIFIER) - else - return emit_error(l, Tokens.EOF_VAR) - end -end - function string_terminates(l, delim::Char, triplestr::Bool) if triplestr c1, c2, c3 = peekchar3(l) @@ -1116,7 +1105,7 @@ function lex_backtick(l::Lexer) end const MAX_KW_LENGTH = 10 -function lex_identifier(l::Lexer{IO_t,T}, c, allow_var) where {IO_t,T} +function lex_identifier(l::Lexer{IO_t,T}, c) where {IO_t,T} if T == Token readon(l) end @@ -1135,12 +1124,7 @@ function lex_identifier(l::Lexer{IO_t,T}, c, allow_var) where {IO_t,T} if n > MAX_KW_LENGTH emit(l, IDENTIFIER) else - # FIXME: var"" not allowed in strings - if allow_var && h == var_kw_hash && accept(l, '"') - return lex_var(l) - else - return emit(l, get(kw_hash, h, IDENTIFIER)) - end + emit(l, get(kw_hash, h, IDENTIFIER)) end end @@ -1195,6 +1179,7 @@ Tokens.STRUCT, Tokens.TRY, Tokens.TYPE, Tokens.USING, +Tokens.VAR, Tokens.WHILE, Tokens.IN, Tokens.ISA, @@ -1204,6 +1189,5 @@ Tokens.FALSE, ] const kw_hash = Dict(simple_hash(lowercase(string(kw))) => kw for kw in kws) -const var_kw_hash = simple_hash("var") end # module diff --git a/JuliaSyntax/Tokenize/src/token.jl b/JuliaSyntax/Tokenize/src/token.jl index 8d6893a0c8fb0..41bde09981f1a 100644 --- a/JuliaSyntax/Tokenize/src/token.jl +++ b/JuliaSyntax/Tokenize/src/token.jl @@ -28,7 +28,6 @@ _add_kws() NO_ERR, EOF_MULTICOMMENT, EOF_CHAR, - EOF_VAR, INVALID_NUMERIC_CONSTANT, INVALID_OPERATOR, INVALID_INTERPOLATION_TERMINATOR, @@ -39,7 +38,6 @@ _add_kws() TOKEN_ERROR_DESCRIPTION = Dict{TokenError, String}( EOF_MULTICOMMENT => "unterminated multi-line comment #= ... =#", EOF_CHAR => "unterminated character literal", - EOF_VAR => "unterminated var\"...\" identifier", INVALID_NUMERIC_CONSTANT => "invalid numeric constant", INVALID_OPERATOR => "invalid operator", INVALID_INTERPOLATION_TERMINATOR => "interpolated variable ends with invalid character; use `\$(...)` instead", @@ -100,7 +98,7 @@ endpos(t::AbstractToken) = t.endpos startbyte(t::AbstractToken) = t.startbyte endbyte(t::AbstractToken) = t.endbyte function untokenize(t::Token) - if t.kind == IDENTIFIER || t.kind == VAR_IDENTIFIER || isliteral(t.kind) || t.kind == COMMENT || t.kind == WHITESPACE || t.kind == NEWLINE_WS || t.kind == ERROR + if t.kind == IDENTIFIER || isliteral(t.kind) || t.kind == COMMENT || t.kind == WHITESPACE || t.kind == NEWLINE_WS || t.kind == ERROR return t.val elseif iskeyword(t.kind) return lowercase(string(t.kind)) diff --git a/JuliaSyntax/Tokenize/src/token_kinds.jl b/JuliaSyntax/Tokenize/src/token_kinds.jl index 30c8724a7a78d..efa3c2022378b 100644 --- a/JuliaSyntax/Tokenize/src/token_kinds.jl +++ b/JuliaSyntax/Tokenize/src/token_kinds.jl @@ -4,7 +4,6 @@ COMMENT, # aadsdsa, #= fdsf #= WHITESPACE, # '\n \t' IDENTIFIER, # foo, Σxx - VAR_IDENTIFIER, # var"#1" AT_SIGN, # @ COMMA, #, SEMICOLON, # ; @@ -44,6 +43,7 @@ TRY, TYPE, USING, + VAR, WHILE, end_keywords, @@ -839,7 +839,6 @@ # like CORE_DOC_MACRO_NAME) begin_macro_names, MACRO_NAME, # A macro name identifier - VAR_MACRO_NAME, # @var"..." STRING_MACRO_NAME, # macname"some_str" CMD_MACRO_NAME, # macname`some_str` DOT_MACRO_NAME, # The macro name of @. diff --git a/JuliaSyntax/Tokenize/test/lexer.jl b/JuliaSyntax/Tokenize/test/lexer.jl index c885e99feaecf..df92235b07c36 100644 --- a/JuliaSyntax/Tokenize/test/lexer.jl +++ b/JuliaSyntax/Tokenize/test/lexer.jl @@ -182,13 +182,6 @@ end @test tok("somtext falsething", 3).kind == T.IDENTIFIER end -@testset "tokenizing var identifiers" begin - t = tok("var\"#1\"") - @test t.kind == T.VAR_IDENTIFIER && untokenize(t) == "var\"#1\"" - t = tok("var\" \"") - @test t.kind == T.VAR_IDENTIFIER && untokenize(t) == "var\" \"" -end - @testset "tokenizing juxtaposed numbers and dotted operators/identifiers" begin @test (t->t.val=="1234" && t.kind == Tokens.INTEGER )(tok("1234 .+1")) @test (t->t.val=="1234.0" && t.kind == Tokens.FLOAT )(tok("1234.0.+1")) @@ -429,7 +422,7 @@ end @test ts[ 1] ~ (T.WHITESPACE , " " ) @test ts[ 2] ~ (T.DQUOTE , "\"" ) @test ts[ 3] ~ (T.EX_OR , "\$" ) - @test ts[ 4] ~ (T.IDENTIFIER , "var" ) + @test ts[ 4] ~ (T.VAR , "var" ) @test ts[ 5] ~ (T.DQUOTE , "\"" ) @test ts[ 6] ~ (T.IDENTIFIER , "x" ) @test ts[ 7] ~ (T.DQUOTE , "\"" ) @@ -761,6 +754,7 @@ const all_kws = Set(["abstract", "while", "in", "isa", + "var", "where", "true", "false", From b1e61c721b7bbca62258a4314f9194b49ae37879 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Fri, 21 Jan 2022 15:08:30 +1000 Subject: [PATCH 0328/1109] Remove use of K"VarIdentifier" from parser Removing this allows us to remove the VAR_IDENTIFIER/VarIdentifier token kind, the VAR_EOF token error and the derived VarMacroName kind. The small cost for removing all this is the need to remap the kind of the raw string following `var` into an identifier in the parser. Also fix parsing of var followed by opening brackets, as in var"x"( --- JuliaSyntax/src/parser.jl | 55 +++++++++++++++++----------------- JuliaSyntax/src/syntax_tree.jl | 4 --- JuliaSyntax/src/token_kinds.jl | 5 ++-- JuliaSyntax/src/tokens.jl | 2 +- JuliaSyntax/test/parser.jl | 7 +++-- 5 files changed, 36 insertions(+), 37 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index f51419abd80b6..c1b2dada8be2f 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -218,11 +218,6 @@ function is_closer_or_newline(ps::ParseState, k) is_closing_token(ps,k) || k == K"NewlineWs" end -# Closing token which isn't a keyword -function is_non_keyword_closer(k) - kind(k) in KSet`, ) ] } ; EndMarker` -end - function is_initial_reserved_word(ps::ParseState, k) k = kind(k) is_iresword = k in KSet`begin while if for try return break continue function @@ -233,7 +228,7 @@ function is_initial_reserved_word(ps::ParseState, k) end function is_contextural_keyword(k) - kind(k) ∈ KSet`as abstract mutable outer primitive type` + kind(k) ∈ KSet`as abstract mutable outer primitive type var` end function is_reserved_word(k) @@ -465,7 +460,7 @@ end function parse_eq_star(ps::ParseState, equals_is_kw=false) k = peek(ps) k2 = peek(ps,2) - if (is_literal(k) || is_identifier(k)) && k2 in KSet`, ) } ]` + if (is_literal(k) || k == K"Identifier") && k2 in KSet`, ) } ]` # optimization: skip checking the whole precedence stack if we have a # simple token followed by a common closing token bump(ps) @@ -918,7 +913,7 @@ function is_juxtapose(ps, prev_k, t) is_initial_reserved_word(ps, prev_k) ))) && # https://github.com/JuliaLang/julia/issues/16356 # 0xenomorph ==> 0x0e - !(prev_k in KSet`BinInt HexInt OctInt` && (is_identifier(k) || is_keyword(k))) && + !(prev_k in KSet`BinInt HexInt OctInt` && (k == K"Identifier" || is_keyword(k))) && (!is_operator(k) || is_radical_op(k)) && !is_closing_token(ps, k) && !is_initial_reserved_word(ps, k) @@ -1229,7 +1224,7 @@ function parse_identifier_or_interpolate(ps::ParseState) # export (x::T) ==> (export (error (:: x T))) # export outer ==> (export outer) # export ($f) ==> (export ($ f)) - ok = (b.is_leaf && (is_identifier(b.kind) || is_operator(b.kind))) || + ok = (b.is_leaf && (b.kind == K"Identifier" || is_operator(b.kind))) || (!b.is_leaf && b.kind == K"$") if !ok emit(ps, mark, K"error", error="Expected identifier") @@ -1269,7 +1264,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) macro_atname_range = nothing kb = peek_behind(ps).kind # $A.@x ==> (macrocall (. ($ A) (quote @x))) - is_valid_modref = is_identifier(kb) || kb == K"." || kb == K"$" + is_valid_modref = kb in KSet`Identifier . $` # We record the last component of chains of dot-separated identifiers so we # know which identifier was the macro name. macro_name_position = position(ps) # points to same output span as peek_behind @@ -2067,7 +2062,6 @@ end function macro_name_kind(k) return k == K"Identifier" ? K"MacroName" : k == K"." ? K"@." : - k == K"VarIdentifier" ? K"VarMacroName" : internal_error("unrecognized source kind for macro name ", k) end @@ -2868,7 +2862,7 @@ function parse_string(ps::ParseState) emit(ps, m, K"string", prev.flags) end end - elseif is_identifier(k) || is_keyword(k) || is_word_operator(k) + elseif k == K"Identifier" || is_keyword(k) || is_word_operator(k) # "a $foo b" ==> (string "a " foo " b") # "$outer" ==> (string outer) # "$in" ==> (string in) @@ -2906,14 +2900,14 @@ function parse_string(ps::ParseState) end end -function parse_raw_string(ps::ParseState) +function parse_raw_string(ps::ParseState; remap_kind=K"Nothing") emark = position(ps) delim_k = peek(ps) bump(ps, TRIVIA_FLAG) flags = RAW_STRING_FLAG | (delim_k in KSet`""" \`\`\`` ? TRIPLE_STRING_FLAG : EMPTY_FLAGS) if peek(ps) in KSet`String CmdString` - bump(ps, flags) + bump(ps, flags; remap_kind=remap_kind) else outk = delim_k in KSet`" """` ? K"String" : delim_k in KSet`\` \`\`\`` ? K"CmdString" : @@ -2980,18 +2974,6 @@ function parse_atom(ps::ParseState, check_identifiers=true) bump(ps, TRIVIA_FLAG, error="unexpected `=`") elseif leading_kind == K"Identifier" bump(ps) - elseif leading_kind == K"VarIdentifier" - bump(ps) - t = peek_token(ps) - if !t.had_whitespace && !(is_operator(kind(t)) || is_non_keyword_closer(t)) - # var"x"end ==> (error (end)) - # var"x"1 ==> (error 1) - # var"x"y ==> (error y) - bump(ps, error="suffix not allowed after var\"...\" syntax") - else - # var"x") ==> x - # var"x"+ ==> x - end elseif is_operator(leading_kind) if check_identifiers && is_syntactic_operator(leading_kind) # += ==> (error +=) @@ -3006,7 +2988,26 @@ function parse_atom(ps::ParseState, check_identifiers=true) bump(ps) end elseif is_keyword(leading_kind) - if check_identifiers && is_closing_token(ps, leading_kind) + if leading_kind == K"var" && (t = peek_token(ps,2); + kind(t) in KSet`" """` && !t.had_whitespace) + # var"x" ==> x + # var"""x""" ==> x + bump(ps, TRIVIA_FLAG) + parse_raw_string(ps, remap_kind=K"Identifier") + t = peek_token(ps) + k = kind(t) + if t.had_whitespace || is_operator(k) || + k in KSet`( ) [ ] { } , ; @ EndMarker` + # var"x"+ ==> x + # var"x") ==> x + # var"x"( ==> x + else + # var"x"end ==> (error (end)) + # var"x"1 ==> (error 1) + # var"x"y ==> (error y) + bump(ps, error="suffix not allowed after var\"...\" syntax") + end + elseif check_identifiers && is_closing_token(ps, leading_kind) # :(end) ==> (quote (error end)) bump(ps, error="invalid identifier") else diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index 8b250ebf0dc77..d95ffdb151934 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -136,8 +136,6 @@ function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, position::In unescape_julia_string(val_str, false, false)[2] elseif k == K"Identifier" Symbol(val_str) - elseif k == K"VarIdentifier" - Symbol(val_str[5:end-1]) elseif is_keyword(k) # This should only happen for tokens nested inside errors Symbol(val_str) @@ -159,8 +157,6 @@ function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, position::In :var"@__dot__" elseif k == K"MacroName" Symbol("@$val_str") - elseif k == K"VarMacroName" - Symbol("@$(val_str[5:end-1])") elseif k == K"StringMacroName" Symbol("@$(val_str)_str") elseif k == K"CmdMacroName" diff --git a/JuliaSyntax/src/token_kinds.jl b/JuliaSyntax/src/token_kinds.jl index dac0e4614b663..ead099393c052 100644 --- a/JuliaSyntax/src/token_kinds.jl +++ b/JuliaSyntax/src/token_kinds.jl @@ -7,7 +7,6 @@ Dict([ "Comment" => Ts.COMMENT "Whitespace" => Ts.WHITESPACE "Identifier" => Ts.IDENTIFIER -"VarIdentifier" => Ts.VAR_IDENTIFIER "@" => Ts.AT_SIGN "," => Ts.COMMA ";" => Ts.SEMICOLON @@ -46,6 +45,7 @@ Dict([ "try" => Ts.TRY "type" => Ts.TYPE "using" => Ts.USING +"var" => Ts.VAR "while" => Ts.WHILE "END_KEYWORDS" => Ts.end_keywords @@ -821,7 +821,6 @@ Dict([ "BEGIN_MACRO_NAMES" => Ts.begin_macro_names "MacroName" => Ts.MACRO_NAME # A macro name identifier "@." => Ts.DOT_MACRO_NAME # The macro name of @. -"VarMacroName" => Ts.VAR_MACRO_NAME # @var"..." "StringMacroName" => Ts.STRING_MACRO_NAME # macname"some_str" "CmdMacroName" => Ts.CMD_MACRO_NAME # macname`some_str` "core_@doc" => Ts.CORE_DOC_MACRO_NAME # Core.@doc @@ -877,7 +876,7 @@ for kw in split(""" continue do else elseif end export finally for function global if import let local macro module mutable new outer primitive quote - return struct try type using while + return struct try type using var while block call comparison curly string inert macrocall kw parameters toplevel tuple ref vect braces bracescat hcat diff --git a/JuliaSyntax/src/tokens.jl b/JuliaSyntax/src/tokens.jl index 5ce2057021cbf..7a6383ed0d40c 100644 --- a/JuliaSyntax/src/tokens.jl +++ b/JuliaSyntax/src/tokens.jl @@ -90,7 +90,7 @@ function is_syntax_kind(t) end function is_identifier(k) - kind(k) in (K"Identifier", K"VarIdentifier") + kind(k) == K"Identifier" end function is_macro_name(k) diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 74992c58603e2..1f1721e87b928 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -502,11 +502,14 @@ tests = [ ":)" => ":" ": end" => ":" # var syntax + """var"x" """ => "x" + """var""\"x""\"""" => "x" + """var"x"+""" => "x" + """var"x")""" => "x" + """var"x"(""" => "x" """var"x"end""" => "x (error (end))" """var"x"1""" => "x (error 1)" """var"x"y""" => "x (error y)" - """var"x")""" => "x" - """var"x"+""" => "x" # Syntactic operators "+=" => "(error +=)" ".+=" => "(error .+=)" From d04095bbc0324e87faab19470162899fb59a39e0 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Fri, 21 Jan 2022 16:00:56 +1000 Subject: [PATCH 0329/1109] Tokenize: Fix lexing raw strings with contextural keyword prefixes Introduce a category for contextural keywords. Use that to ensure that strings are parsed as raw strings when they follow contextural keywords or word-like operators as in var"$" isa"$" --- JuliaSyntax/Tokenize/src/lexer.jl | 4 +++- JuliaSyntax/Tokenize/src/token.jl | 9 +++++++++ JuliaSyntax/Tokenize/src/token_kinds.jl | 18 ++++++++++-------- JuliaSyntax/Tokenize/test/lexer.jl | 13 +++++++++++++ 4 files changed, 35 insertions(+), 9 deletions(-) diff --git a/JuliaSyntax/Tokenize/src/lexer.jl b/JuliaSyntax/Tokenize/src/lexer.jl index b261ceaae713a..369ed5a196c52 100644 --- a/JuliaSyntax/Tokenize/src/lexer.jl +++ b/JuliaSyntax/Tokenize/src/lexer.jl @@ -901,7 +901,9 @@ end # Parse a token starting with a quote. # A '"' has been consumed function lex_quote(l::Lexer) - raw = l.last_token == Tokens.IDENTIFIER || l.last_token == Tokens.KEYWORD + raw = l.last_token == Tokens.IDENTIFIER || + Tokens.iscontexturalkeyword(l.last_token) || + Tokens.iswordoperator(l.last_token) pc, dpc = dpeekchar(l) triplestr = pc == '"' && dpc == '"' push!(l.string_states, StringState(triplestr, raw, '"', 0)) diff --git a/JuliaSyntax/Tokenize/src/token.jl b/JuliaSyntax/Tokenize/src/token.jl index 41bde09981f1a..956b0b106a412 100644 --- a/JuliaSyntax/Tokenize/src/token.jl +++ b/JuliaSyntax/Tokenize/src/token.jl @@ -11,6 +11,15 @@ iskeyword(k::Kind) = begin_keywords < k < end_keywords isliteral(k::Kind) = begin_literal < k < end_literal isoperator(k::Kind) = begin_ops < k < end_ops +iscontexturalkeyword(k::Kind) = begin_contextural_keywords < k < end_contextural_keywords + +function iswordoperator(k::Kind) + # Keyword-like operators + k == Tokens.IN || + k == Tokens.ISA || + k == Tokens.WHERE +end + # Create string => keyword kind const KEYWORDS = Dict{String, Kind}() diff --git a/JuliaSyntax/Tokenize/src/token_kinds.jl b/JuliaSyntax/Tokenize/src/token_kinds.jl index efa3c2022378b..9f95dc1e66b61 100644 --- a/JuliaSyntax/Tokenize/src/token_kinds.jl +++ b/JuliaSyntax/Tokenize/src/token_kinds.jl @@ -10,8 +10,6 @@ begin_keywords, KEYWORD, # general - ABSTRACT, - AS, BAREMODULE, BEGIN, BREAK, @@ -33,18 +31,22 @@ LOCAL, MACRO, MODULE, - MUTABLE, - NEW, - OUTER, - PRIMITIVE, QUOTE, RETURN, STRUCT, TRY, - TYPE, USING, - VAR, WHILE, + begin_contextural_keywords, + ABSTRACT, + AS, + MUTABLE, + NEW, + OUTER, + PRIMITIVE, + TYPE, + VAR, + end_contextural_keywords, end_keywords, begin_cstparser, diff --git a/JuliaSyntax/Tokenize/test/lexer.jl b/JuliaSyntax/Tokenize/test/lexer.jl index df92235b07c36..ce85e1cad4d51 100644 --- a/JuliaSyntax/Tokenize/test/lexer.jl +++ b/JuliaSyntax/Tokenize/test/lexer.jl @@ -332,6 +332,19 @@ end @test ts[3] ~ (T.STRING , "\\\\\\\"" ) @test ts[4] ~ (T.DQUOTE , "\"" ) @test ts[5] ~ (T.ENDMARKER , "" ) + + # Contextural keywords and operators allowed as raw string prefixes + ts = collect(tokenize(raw""" var"x $ \ y" """)) + @test ts[2] ~ (T.VAR , "var") + @test ts[4] ~ (T.STRING , "x \$ \\ y") + + ts = collect(tokenize(raw""" outer"x $ \ y" """)) + @test ts[2] ~ (T.OUTER , "outer") + @test ts[4] ~ (T.STRING , "x \$ \\ y") + + ts = collect(tokenize(raw""" isa"x $ \ y" """)) + @test ts[2] ~ (T.ISA , "isa") + @test ts[4] ~ (T.STRING , "x \$ \\ y") end @testset "interpolation" begin From 3bb0755c539895bba8b39842c8b0c96261bf079a Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Fri, 21 Jan 2022 16:04:52 +1000 Subject: [PATCH 0330/1109] Use contextural keyword predicates from Tokenize module --- JuliaSyntax/src/parser.jl | 8 -------- JuliaSyntax/src/tokens.jl | 14 ++++++-------- 2 files changed, 6 insertions(+), 16 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index c1b2dada8be2f..e583dcddc804b 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -227,10 +227,6 @@ function is_initial_reserved_word(ps::ParseState, k) return is_iresword && !(k == K"begin" && ps.end_symbol) end -function is_contextural_keyword(k) - kind(k) ∈ KSet`as abstract mutable outer primitive type var` -end - function is_reserved_word(k) k = kind(k) is_keyword(k) && !is_contextural_keyword(k) @@ -284,10 +280,6 @@ function is_both_unary_and_binary(k) k in KSet`+ - ⋆ ± ∓` # dotop allowed end -function is_word_operator(k) - kind(k) in KSet`in isa where` -end - # operators handled by parse_unary at the start of an expression function is_initial_operator(k) k = kind(k) diff --git a/JuliaSyntax/src/tokens.jl b/JuliaSyntax/src/tokens.jl index 7a6383ed0d40c..2b8c8f629005d 100644 --- a/JuliaSyntax/src/tokens.jl +++ b/JuliaSyntax/src/tokens.jl @@ -40,14 +40,12 @@ end kind(k::Kind) = k kind(raw::TzTokens.RawToken) = TzTokens.exactkind(raw) -# Some renaming for consistency -is_literal(k::Kind) = TzTokens.isliteral(k) -is_keyword(k::Kind) = TzTokens.iskeyword(k) -is_operator(k::Kind) = TzTokens.isoperator(k) - -is_literal(k) = is_literal(kind(k)) -is_keyword(k) = is_keyword(kind(k)) -is_operator(k) = is_operator(kind(k)) +# Some renaming for naming consistency +is_literal(k) = TzTokens.isliteral(kind(k)) +is_keyword(k) = TzTokens.iskeyword(kind(k)) +is_contextural_keyword(k) = TzTokens.iscontexturalkeyword(kind(k)) +is_operator(k) = TzTokens.isoperator(kind(k)) +is_word_operator(k) = TzTokens.iswordoperator(kind(k)) # Predicates for operator precedence is_prec_assignment(t) = K"BEGIN_ASSIGNMENTS" < kind(t) < K"END_ASSIGNMENTS" From 626bfe3f7ba8b222a56f65ba47f99643430c5d25 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Fri, 21 Jan 2022 20:08:07 +1000 Subject: [PATCH 0331/1109] =?UTF-8?q?Tokenize:=20Add=20operators=20?= =?UTF-8?q?=E2=AB=AB=20=E2=AB=AA=20=E2=88=92=20and=20=E2=8B=85=20lookalike?= =?UTF-8?q?s?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- JuliaSyntax/Tokenize/src/lexer.jl | 6 ++++++ JuliaSyntax/Tokenize/src/token_kinds.jl | 10 ++++++++++ JuliaSyntax/Tokenize/src/utilities.jl | 4 ++++ JuliaSyntax/Tokenize/test/lexer.jl | 13 +++++++++++++ 4 files changed, 33 insertions(+) diff --git a/JuliaSyntax/Tokenize/src/lexer.jl b/JuliaSyntax/Tokenize/src/lexer.jl index 369ed5a196c52..ba2848d7e0838 100644 --- a/JuliaSyntax/Tokenize/src/lexer.jl +++ b/JuliaSyntax/Tokenize/src/lexer.jl @@ -417,6 +417,8 @@ function _next_token(l::Lexer, c) return lex_plus(l); elseif c == '-' return lex_minus(l); + elseif c == '−' # \minus '−' treated as hyphen '-' + return emit(l, accept(l, '=') ? Tokens.MINUS_EQ : Tokens.MINUS) elseif c == '`' return lex_backtick(l); elseif is_identifier_start_char(c) @@ -1009,6 +1011,10 @@ function lex_dot(l::Lexer) l.dotop = true readchar(l) return lex_minus(l) + elseif pc == '−' + l.dotop = true + readchar(l) + return emit(l, accept(l, '=') ? Tokens.MINUS_EQ : Tokens.MINUS) elseif pc =='*' l.dotop = true readchar(l) diff --git a/JuliaSyntax/Tokenize/src/token_kinds.jl b/JuliaSyntax/Tokenize/src/token_kinds.jl index 9f95dc1e66b61..615ce0b0ba862 100644 --- a/JuliaSyntax/Tokenize/src/token_kinds.jl +++ b/JuliaSyntax/Tokenize/src/token_kinds.jl @@ -572,6 +572,8 @@ DOUBLE_LINE_SLANTED_GREATER_THAN_OR_EQUAL_TO, # ⫺ RIGHT_TACK, # ⊢ LEFT_TACK, # ⊣ + DOUBLE_DOWN_TACK, # ⫪ + DOUBLE_UP_TACK, # ⫫ PERP, # ⟂ end_comparison, @@ -890,6 +892,7 @@ const UNICODE_OPS = Dict{Char, Kind}( +'−' => MINUS, '÷' => DIVISION_SIGN, '¬' => NOT_SIGN, '√' => SQUARE_ROOT, @@ -1290,6 +1293,8 @@ const UNICODE_OPS = Dict{Char, Kind}( '⫺' => DOUBLE_LINE_SLANTED_GREATER_THAN_OR_EQUAL_TO, '⊢' => RIGHT_TACK, '⊣' => LEFT_TACK, +'⫪' => DOUBLE_DOWN_TACK, +'⫫' => DOUBLE_UP_TACK, '⟂' => PERP, '⊕' => CIRCLED_PLUS, '⊖' => CIRCLED_MINUS, @@ -1444,6 +1449,10 @@ const UNICODE_OPS = Dict{Char, Kind}( '⥯' => DOWNWARDS_HARPOON_WITH_BARB_LEFT_BESIDE_UPWARDS_HARPOON_WITH_BARB_RIGHT, '↑' => HALFWIDTH_UPWARDS_ARROW, '↓' => HALFWIDTH_DOWNWARDS_ARROW, +# Lookalikes which are normalized into UNICODE_DOT +# https://github.com/JuliaLang/julia/pull/25157 +'\u00b7' => UNICODE_DOT, # '·' Middle Dot, +'\u0387' => UNICODE_DOT, # '·' Greek Ano Teleia, '⋅' => UNICODE_DOT, '…' => LDOTS, '⁝' => TRICOLON, @@ -1484,6 +1493,7 @@ const UNICODE_OPS = Dict{Char, Kind}( const UNICODE_OPS_REVERSE = Dict{Kind,Symbol}() for (k, v) in UNICODE_OPS + k in ('\u00b7', '\u0387') && continue UNICODE_OPS_REVERSE[v] = Symbol(k) end diff --git a/JuliaSyntax/Tokenize/src/utilities.jl b/JuliaSyntax/Tokenize/src/utilities.jl index 2ad25090c787c..7cf67536c5334 100644 --- a/JuliaSyntax/Tokenize/src/utilities.jl +++ b/JuliaSyntax/Tokenize/src/utilities.jl @@ -213,7 +213,9 @@ takechar(io::IO) = (readchar(io); io) c == 0x0000007e || c == 0x000000ac || c == 0x000000b1 || + c == 0x000000b7 || c == 0x000000d7 || + c == 0x00000387 || c == 0x00002026 || c == 0x0000205d || c == 0x0000214b || @@ -292,6 +294,8 @@ takechar(io::IO) = (readchar(io); io) 0x00002a66 <= c <= 0x00002a67 || 0x00002a6a <= c <= 0x00002ad9 || c == 0x00002adb || + c == 0x00002aea || + c == 0x00002aeb || 0x00002af7 <= c <= 0x00002afa || 0x00002b30 <= c <= 0x00002b44 || 0x00002b47 <= c <= 0x00002b4c || diff --git a/JuliaSyntax/Tokenize/test/lexer.jl b/JuliaSyntax/Tokenize/test/lexer.jl index ce85e1cad4d51..7fe0f6b9e0028 100644 --- a/JuliaSyntax/Tokenize/test/lexer.jl +++ b/JuliaSyntax/Tokenize/test/lexer.jl @@ -636,6 +636,19 @@ for op in ops end end +@testset "Normalization of Unicode symbols" begin + # https://github.com/JuliaLang/julia/pull/25157 + @test tok("\u00b7").kind == T.UNICODE_DOT + @test tok("\u0387").kind == T.UNICODE_DOT + @test tok(".\u00b7").dotop + @test tok(".\u0387").dotop + + # https://github.com/JuliaLang/julia/pull/40948 + @test tok("−").kind == T.MINUS + @test tok("−=").kind == T.MINUS_EQ + @test tok(".−").dotop +end + @testset "perp" begin @test tok("1 ⟂ 2", 3).kind==T.PERP end From 14ac8adf73e95e782fc589f0bc2b91487305a203 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Fri, 21 Jan 2022 20:10:00 +1000 Subject: [PATCH 0332/1109] Unicode normalization of identifiers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add Unicode NFC normalization of all identifiers * Add Julia-specific normalizations of lookalikes of - ⋅ ε μ Based on the same functionality from the Unicode stdlib in Julia 1.8 https://github.com/JuliaLang/julia/pull/42561 --- JuliaSyntax/src/syntax_tree.jl | 10 +++---- JuliaSyntax/src/token_kinds.jl | 5 +++- JuliaSyntax/src/tokens.jl | 2 ++ JuliaSyntax/src/value_parsing.jl | 49 +++++++++++++++++++++++++++++-- JuliaSyntax/test/parser.jl | 15 ++++++++++ JuliaSyntax/test/value_parsing.jl | 12 ++++++++ 6 files changed, 85 insertions(+), 8 deletions(-) diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index d95ffdb151934..50b2db7ab7c42 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -135,7 +135,7 @@ function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, position::In elseif k == K"Char" unescape_julia_string(val_str, false, false)[2] elseif k == K"Identifier" - Symbol(val_str) + Symbol(normalize_identifier(val_str)) elseif is_keyword(k) # This should only happen for tokens nested inside errors Symbol(val_str) @@ -148,7 +148,7 @@ function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, position::In elseif is_operator(k) isempty(val_range) ? Symbol(untokenize(k)) : # synthetic invisible tokens - Symbol(val_str) + Symbol(normalize_identifier(val_str)) elseif k == K"NothingLiteral" nothing elseif k == K"error" @@ -156,11 +156,11 @@ function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, position::In elseif k == K"@." :var"@__dot__" elseif k == K"MacroName" - Symbol("@$val_str") + Symbol("@$(normalize_identifier(val_str))") elseif k == K"StringMacroName" - Symbol("@$(val_str)_str") + Symbol("@$(normalize_identifier(val_str))_str") elseif k == K"CmdMacroName" - Symbol("@$(val_str)_cmd") + Symbol("@$(normalize_identifier(val_str))_cmd") elseif k == K"core_@doc" GlobalRef(Core, :var"@doc") elseif k == K"core_@cmd" diff --git a/JuliaSyntax/src/token_kinds.jl b/JuliaSyntax/src/token_kinds.jl index ead099393c052..e832e8d09beaa 100644 --- a/JuliaSyntax/src/token_kinds.jl +++ b/JuliaSyntax/src/token_kinds.jl @@ -568,6 +568,9 @@ Dict([ "⫺" => Ts.DOUBLE_LINE_SLANTED_GREATER_THAN_OR_EQUAL_TO "⊢" => Ts.RIGHT_TACK "⊣" => Ts.LEFT_TACK +# ⫪,⫫ see https://github.com/JuliaLang/julia/issues/39350 +"⫪" => Ts.DOUBLE_DOWN_TACK +"⫫" => Ts.DOUBLE_UP_TACK "⟂" => Ts.PERP "END_COMPARISON" => Ts.end_comparison @@ -591,7 +594,7 @@ Dict([ # Level 9 "BEGIN_PLUS" => Ts.begin_plus -"\$" => Ts.EX_OR +"\$" => Ts.EX_OR "+" => Ts.PLUS "-" => Ts.MINUS "++" => Ts.PLUSPLUS diff --git a/JuliaSyntax/src/tokens.jl b/JuliaSyntax/src/tokens.jl index 2b8c8f629005d..2821058551d45 100644 --- a/JuliaSyntax/src/tokens.jl +++ b/JuliaSyntax/src/tokens.jl @@ -48,6 +48,8 @@ is_operator(k) = TzTokens.isoperator(kind(k)) is_word_operator(k) = TzTokens.iswordoperator(kind(k)) # Predicates for operator precedence +# FIXME: Review how precedence depends on dottedness, eg +# https://github.com/JuliaLang/julia/pull/36725 is_prec_assignment(t) = K"BEGIN_ASSIGNMENTS" < kind(t) < K"END_ASSIGNMENTS" is_prec_pair(t) = K"BEGIN_PAIRARROW" < kind(t) < K"END_PAIRARROW" is_prec_conditional(t) = K"BEGIN_CONDITIONAL" < kind(t) < K"END_CONDITIONAL" diff --git a/JuliaSyntax/src/value_parsing.jl b/JuliaSyntax/src/value_parsing.jl index d14e27a88aacb..49988bc34e277 100644 --- a/JuliaSyntax/src/value_parsing.jl +++ b/JuliaSyntax/src/value_parsing.jl @@ -2,8 +2,6 @@ # This file contains utility functions for converting undecorated source # strings into Julia values. For example, string->number, string unescaping, etc. -is_indentation(c) = c == ' ' || c == '\t' - """ Convert a Julia source code string into a number. """ @@ -65,6 +63,10 @@ function julia_string_to_number(str::AbstractString, kind) end end + +#------------------------------------------------------------------------------- +is_indentation(c) = c == ' ' || c == '\t' + """ Process Julia source code escape sequences for raw strings """ @@ -329,3 +331,46 @@ function process_triple_strings!(strs, is_raw) strs end +#------------------------------------------------------------------------------- +# Unicode normalization. As of Julia 1.8, this is part of Base and the Unicode +# stdlib under the name `Unicode.julia_chartransform`. See +# https://github.com/JuliaLang/julia/pull/42561 +# +# To allow use on older Julia versions, we reproduce that logic here. + +# static wrapper around user callback function +utf8proc_custom_func(codepoint::UInt32, callback::Any) = + UInt32(callback(codepoint))::UInt32 + +function utf8proc_decompose(str, options, buffer, nwords, chartransform::T) where T + ret = ccall(:utf8proc_decompose_custom, Int, (Ptr{UInt8}, Int, Ptr{UInt8}, Int, Cint, Ptr{Cvoid}, Ref{T}), + str, sizeof(str), buffer, nwords, options, + @cfunction(utf8proc_custom_func, UInt32, (UInt32, Ref{T})), chartransform) + ret < 0 && utf8proc_error(ret) + return ret +end + +function utf8proc_map(str::Union{String,SubString{String}}, options::Integer, chartransform=identity) + nwords = utf8proc_decompose(str, options, C_NULL, 0, chartransform) + buffer = Base.StringVector(nwords*4) + nwords = utf8proc_decompose(str, options, buffer, nwords, chartransform) + nbytes = ccall(:utf8proc_reencode, Int, (Ptr{UInt8}, Int, Cint), buffer, nwords, options) + nbytes < 0 && utf8proc_error(nbytes) + return String(resize!(buffer, nbytes)) +end + +const _julia_charmap = Dict{UInt32,UInt32}( + 0x025B => 0x03B5, + 0x00B5 => 0x03BC, + 0x00B7 => 0x22C5, + 0x0387 => 0x22C5, + 0x2212 => 0x002D, +) + +julia_chartransform(codepoint::UInt32) = get(_julia_charmap, codepoint, codepoint) + +function normalize_identifier(str) + flags = Base.Unicode.UTF8PROC_STABLE | Base.Unicode.UTF8PROC_COMPOSE + utf8proc_map(str, flags, julia_chartransform) +end + diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 1f1721e87b928..cb8d4fc6ba05a 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -618,6 +618,21 @@ tests = [ end end +@testset "Unicode normalization in tree conversion" begin + # ɛµ normalizes to εμ + @test test_parse(JuliaSyntax.parse_eq, "\u025B\u00B5()") == "(call \u03B5\u03BC)" + @test test_parse(JuliaSyntax.parse_eq, "@\u025B\u00B5") == "(macrocall @\u03B5\u03BC)" + @test test_parse(JuliaSyntax.parse_eq, "\u025B\u00B5\"\"") == "(macrocall @\u03B5\u03BC_str \"\")" + @test test_parse(JuliaSyntax.parse_eq, "\u025B\u00B5``") == "(macrocall @\u03B5\u03BC_cmd \"\")" + # · and · normalize to ⋅ + @test test_parse(JuliaSyntax.parse_eq, "a \u00B7 b") == "(call-i a \u22C5 b)" + @test test_parse(JuliaSyntax.parse_eq, "a \u0387 b") == "(call-i a \u22C5 b)" + # − normalizes to - + @test test_parse(JuliaSyntax.parse_expr, "a \u2212 b") == "(call-i a - b)" + @test test_parse(JuliaSyntax.parse_eq, "a \u2212= b") == "(-= a b)" + @test test_parse(JuliaSyntax.parse_eq, "a .\u2212= b") == "(.-= a b)" +end + @testset "Larger code chunks" begin # Something ever-so-slightly nontrivial for fun - # the sum of the even Fibonacci numbers < 4_000_000 diff --git a/JuliaSyntax/test/value_parsing.jl b/JuliaSyntax/test/value_parsing.jl index 89869dd86e39b..eadf2e171895d 100644 --- a/JuliaSyntax/test/value_parsing.jl +++ b/JuliaSyntax/test/value_parsing.jl @@ -160,3 +160,15 @@ end @test process_triple_strings!(["\n y\r"], raw) == [" y\n"] end end + +@testset "Normalization of identifiers" begin + # NFC normalization + # https://github.com/JuliaLang/julia/issues/5434 + # https://github.com/JuliaLang/julia/pull/19464 + @test JuliaSyntax.normalize_identifier("\u0069\u0302") == "\u00ee" + + # Special Julia normalization + # https://github.com/JuliaLang/julia/pull/42561 + @test JuliaSyntax.normalize_identifier("julia\u025B\u00B5\u00B7\u0387\u2212") == + "julia\u03B5\u03BC\u22C5\u22C5\u002D" +end From 736c2538da7dc3bcbfad3e239aee08b2a827969c Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Fri, 21 Jan 2022 21:45:08 +1000 Subject: [PATCH 0333/1109] Fix to allow `begin x end::T` to parse correctly Our parse_factor / parse_factor_with_initial_ex is a bit different from the flisp parser so type declarations after blocks were not parsed correctly. Fix the logic so that type declarations can be parsed after parse_resword has run. As a side effect this cleans up the logic so that parse_call is the only place where we enter parse_resword. --- JuliaSyntax/src/parser.jl | 28 +++++++++++----------------- JuliaSyntax/test/parser.jl | 9 ++++++++- 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index e583dcddc804b..cde15f5cc7562 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -1072,7 +1072,9 @@ function parse_unary_call(ps::ParseState) # +(a;b,c) ==> (call + a (parameters b c)) # Prefix calls have higher precedence than ^ # +(a,b)^2 ==> (call-i (call + a b) ^ 2) + # +(a,b)(x)^2 ==> (call-i (call (call + a b) x) ^ 2) emit(ps, mark, op_node_kind) + parse_call_chain(ps, mark) parse_factor_with_initial_ex(ps, mark) else # Unary function calls with brackets as grouping, not an arglist @@ -1084,6 +1086,8 @@ function parse_unary_call(ps::ParseState) # +(a=1) ==> (call + (= a 1)) # Unary operators have lower precedence than ^ # +(a)^2 ==> (call + (call-i a ^ 2)) + # +(a)(x,y)^2 ==> (call + (call-i (call a x y) ^ 2)) + parse_call_chain(ps, mark_before_paren) parse_factor_with_initial_ex(ps, mark_before_paren) emit(ps, mark, op_node_kind) end @@ -1099,23 +1103,20 @@ function parse_unary_call(ps::ParseState) end # handle ^ and .^ -# -2^3 is parsed as -(2^3), so call parse-decl for the first argument, -# and parse-unary from then on (to handle 2^-3) +# +# x^y ==> (call-i x ^ y) +# x^y^z ==> (call-i x ^ (call-i y ^ z)) +# begin x end::T ==> (:: (block x) T) # # flisp: parse-factor function parse_factor(ps::ParseState) - if peek_initial_reserved_words(ps) - parse_resword(ps) - else - mark = position(ps) - parse_unary_prefix(ps) - parse_factor_with_initial_ex(ps, mark) - end + mark = position(ps) + parse_call(ps) + parse_factor_with_initial_ex(ps, mark) end # flisp: parse-factor-with-initial-ex function parse_factor_with_initial_ex(ps::ParseState, mark) - parse_call_chain(ps, mark) parse_decl_with_initial_ex(ps, mark) if is_prec_power(peek(ps)) bump(ps) @@ -1133,13 +1134,6 @@ end # a::b ==> (:: a b) # a->b ==> (-> a b) # -# flisp: parse-decl -function parse_decl(ps::ParseState) - mark = position(ps) - parse_call(ps) - parse_decl_with_initial_ex(ps, mark) -end - # flisp: parse-decl-with-initial-ex function parse_decl_with_initial_ex(ps::ParseState, mark) while peek(ps) == K"::" diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index cb8d4fc6ba05a..0e671aab7e206 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -152,17 +152,24 @@ tests = [ "+ (a,b)" => "(call + (error) a b)" # Prefix calls have higher precedence than ^ "+(a,b)^2" => "(call-i (call + a b) ^ 2)" + "+(a,b)(x)^2" => "(call-i (call (call + a b) x) ^ 2)" # Unary function calls with brackets as grouping, not an arglist "+(a;b)" => "(call + (block a b))" "+(a=1)" => "(call + (= a 1))" # Unary operators have lower precedence than ^ "+(a)^2" => "(call + (call-i a ^ 2))" + "+(a)(x,y)^2" => "(call + (call-i (call a x y) ^ 2))" # Normal unary calls (see parse_unary) "+x" => "(call + x)" ], - JuliaSyntax.parse_decl => [ + JuliaSyntax.parse_factor => [ + "x^y" => "(call-i x ^ y)" + "x^y^z" => "(call-i x ^ (call-i y ^ z))" + "begin x end::T" => "(:: (block x) T)" + # parse_decl_with_initial_ex "a::b" => "(:: a b)" "a->b" => "(-> a b)" + "a::b::c" => "(:: (:: a b) c)" "a::b->c" => "(-> (:: a b) c)" ], JuliaSyntax.parse_unary_subtype => [ From 419ef22f2234d15b5de96a278f80991309a0d67b Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Fri, 21 Jan 2022 22:37:02 +1000 Subject: [PATCH 0334/1109] Fixes for dotted syntactic operators * Fix parsing of dotted short circuiting operators .&& and .|| * Fix parsing of dotted type comparisons .<: and .>: --- JuliaSyntax/src/parse_stream.jl | 1 + JuliaSyntax/src/parser.jl | 50 ++++++++++++++++++++------------- JuliaSyntax/test/parser.jl | 8 ++++++ 3 files changed, 39 insertions(+), 20 deletions(-) diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index 769ec2a842ec7..b42939401404b 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -16,6 +16,7 @@ function Base.show(io::IO, tok::SyntaxToken) end kind(tok::SyntaxToken) = tok.raw.kind +flags(tok::SyntaxToken) = tok.raw.dotop ? DOTOP_FLAG : EMPTY_FLAGS first_byte(tok::SyntaxToken) = tok.raw.startbyte + 1 last_byte(tok::SyntaxToken) = tok.raw.endbyte + 1 span(tok::SyntaxToken) = last_byte(tok) - first_byte(tok) + 1 diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index cde15f5cc7562..26c17f3ce7afc 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -262,13 +262,13 @@ function is_syntactic_unary_op(k) kind(k) in KSet`$ & ::` end -function is_type_operator(k) - kind(k) in KSet`<: >:` +function is_type_operator(t) + kind(t) in KSet`<: >:` && !is_dotted(t) end -function is_unary_op(k) - k = kind(k) - k in KSet`<: >:` || # TODO: dotop disallowed ? +function is_unary_op(t) + k = kind(t) + (k in KSet`<: >:` && !is_dotted(t)) || k in KSet`+ - ! ~ ¬ √ ∛ ∜ ⋆ ± ∓` # dotop allowed end @@ -330,12 +330,13 @@ end function parse_RtoL(ps::ParseState, down, is_op, syntactic, self) mark = position(ps) down(ps) - k = peek(ps) + t = peek_token(ps) + k = kind(t) if is_op(k) if syntactic isa Bool ? syntactic : syntactic(k) bump(ps, TRIVIA_FLAG) self(ps) - emit(ps, mark, k) + emit(ps, mark, k, flags(t)) else bump(ps) self(ps) @@ -492,11 +493,11 @@ function parse_assignment_with_initial_ex(ps::ParseState, mark, down, equals_is_ return NO_POSITION else # a += b ==> (+= a b) + # a .= b ==> (.= a b) bump(ps, TRIVIA_FLAG) parse_assignment(ps, down, equals_is_kw) plain_eq = is_plain_equals(t) - equals_pos = emit(ps, mark, plain_eq && equals_is_kw ? K"kw" : k, - is_dotted(t) ? DOTOP_FLAG : EMPTY_FLAGS) + equals_pos = emit(ps, mark, plain_eq && equals_is_kw ? K"kw" : k, flags(t)) return plain_eq ? equals_pos : NO_POSITION end end @@ -592,6 +593,7 @@ function parse_arrow(ps::ParseState) end # x || y || z ==> (|| x (|| y z)) +# x .|| y ==> (.|| x y) # # flisp: parse-or function parse_or(ps::ParseState) @@ -599,6 +601,7 @@ function parse_or(ps::ParseState) end # x && y && z ==> (&& x (&& y z)) +# x .&& y ==> (.&& x y) # # flisp: parse-and function parse_and(ps::ParseState) @@ -621,22 +624,23 @@ function parse_comparison(ps::ParseState, subtype_comparison=false) end n_comparisons = 0 op_pos = NO_POSITION - initial_kind = peek(ps) + initial_tok = peek_token(ps) while is_prec_comparison(peek(ps)) n_comparisons += 1 op_pos = bump(ps) parse_pipe_lt(ps) end if n_comparisons == 1 - if is_type_operator(initial_kind) + if is_type_operator(initial_tok) # Type comparisons are syntactic # x <: y ==> (<: x y) # x >: y ==> (>: x y) reset_node!(ps, op_pos, flags=TRIVIA_FLAG) - emit(ps, mark, initial_kind) + emit(ps, mark, kind(initial_tok)) else # Normal binary comparisons - # x < y ==> (call-i x < y) + # x < y ==> (call-i x < y) + # x .<: y ==> (call-i x .<: y) emit(ps, mark, K"call", INFIX_FLAG) end elseif n_comparisons > 1 @@ -1002,7 +1006,7 @@ function parse_unary_call(ps::ParseState) mark = position(ps) op_t = peek_token(ps) op_k = kind(op_t) - op_node_kind = is_type_operator(op_k) ? op_k : K"call" + op_node_kind = is_type_operator(op_t) ? op_k : K"call" op_tok_flags = is_type_operator(op_t) ? TRIVIA_FLAG : EMPTY_FLAGS t2 = peek_token(ps, 2) k2 = kind(t2) @@ -1021,7 +1025,7 @@ function parse_unary_call(ps::ParseState) # +) ==> + bump(ps) end - elseif k2 == K"{" || (!is_unary_op(op_k) && k2 == K"(") + elseif k2 == K"{" || (!is_unary_op(op_t) && k2 == K"(") # Call with type parameters or non-unary prefix call # +{T}(x::T) ==> (call (curly + T) (:: x T)) # *(x) ==> (call * x) @@ -1091,12 +1095,18 @@ function parse_unary_call(ps::ParseState) parse_factor_with_initial_ex(ps, mark_before_paren) emit(ps, mark, op_node_kind) end - elseif !is_unary_op(op_k) - emit_diagnostic(ps, error="expected a unary operator") else - # Normal unary calls - # +x ==> (call + x) - bump(ps, op_tok_flags) + if is_unary_op(op_t) + # Normal unary calls + # +x ==> (call + x) + # √x ==> (call √ x) + # ±x ==> (call ± x) + bump(ps, op_tok_flags) + else + # /x ==> (call (error /) x) + # .<: x ==> (call (error .<:) x) + bump(ps, error="not a unary operator") + end parse_unary(ps) emit(ps, mark, op_node_kind) end diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 0e671aab7e206..956b5fa15f617 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -38,6 +38,7 @@ tests = [ "a, b = c, d" => "(= (tuple a b) (tuple c d))" "x, = xs" => "(= (tuple x) xs)" "[a ~b]" => "(hcat a (call ~ b))" + "a ~ b" => "(call-i a ~ b)" "[a ~ b c]" => "(hcat (call-i a ~ b) c)" ], JuliaSyntax.parse_cond => [ @@ -59,9 +60,11 @@ tests = [ ], JuliaSyntax.parse_or => [ "x || y || z" => "(|| x (|| y z))" + "x .|| y" => "(.|| x y)" ], JuliaSyntax.parse_and => [ "x && y && z" => "(&& x (&& y z))" + "x .&& y" => "(.&& x y)" ], JuliaSyntax.parse_comparison => [ # Type comparisons are syntactic @@ -161,6 +164,11 @@ tests = [ "+(a)(x,y)^2" => "(call + (call-i (call a x y) ^ 2))" # Normal unary calls (see parse_unary) "+x" => "(call + x)" + "√x" => "(call √ x)" + "±x" => "(call ± x)" + # Not a unary operator + "/x" => "(call (error /) x)" + ".<: x" => "(call (error .<:) x)" ], JuliaSyntax.parse_factor => [ "x^y" => "(call-i x ^ y)" From 6f18b64ccf20224c42b7dcfeb9018cffd2c1c8c7 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Fri, 21 Jan 2022 22:58:21 +1000 Subject: [PATCH 0335/1109] Fix Expr conversion of double quoted expressions --- JuliaSyntax/src/syntax_tree.jl | 4 ++-- JuliaSyntax/test/runtests.jl | 1 + JuliaSyntax/test/syntax_tree.jl | 10 ++++++++++ 3 files changed, 13 insertions(+), 2 deletions(-) create mode 100644 JuliaSyntax/test/syntax_tree.jl diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index 50b2db7ab7c42..6ccc1b51d6a4b 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -489,8 +489,8 @@ function _to_expr(node::SyntaxNode) args[2] = Expr(:block, loc, args[2]) end end - if headsym == :inert || (headsym == :quote && - length(args) == 1 && !(only(args) isa Expr)) + if headsym == :inert || (headsym == :quote && length(args) == 1 && + !(a1 = only(args); a1 isa Expr || a1 isa QuoteNode)) return QuoteNode(only(args)) else return Expr(headsym, args...) diff --git a/JuliaSyntax/test/runtests.jl b/JuliaSyntax/test/runtests.jl index 9185bb98f13eb..0aef593c320c8 100644 --- a/JuliaSyntax/test/runtests.jl +++ b/JuliaSyntax/test/runtests.jl @@ -16,6 +16,7 @@ end include("test_utils.jl") include("parse_stream.jl") +include("syntax_tree.jl") include("parser.jl") @testset "Parsing values from strings" begin diff --git a/JuliaSyntax/test/syntax_tree.jl b/JuliaSyntax/test/syntax_tree.jl new file mode 100644 index 0000000000000..66e4e329f56dc --- /dev/null +++ b/JuliaSyntax/test/syntax_tree.jl @@ -0,0 +1,10 @@ + +@testset "Parse tree conversion" begin + @testset "Quote nodes" begin + @test Expr(child(parse_all(SyntaxNode, ":(a)"), 1)) == QuoteNode(:a) + @test Expr(child(parse_all(SyntaxNode, ":(:a)"), 1)) == + Expr(:quote, QuoteNode(:a)) + @test Expr(child(parse_all(SyntaxNode, ":(1+2)"), 1)) == + Expr(:quote, Expr(:call, :+, 1, 2)) + end +end From 0c6aef0d6619aec1d0487e99ba46e8567b9a23b4 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Fri, 21 Jan 2022 23:10:46 +1000 Subject: [PATCH 0336/1109] Fix bumping of `end` in zero-method functions --- JuliaSyntax/src/parser.jl | 3 ++- JuliaSyntax/test/parser.jl | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 26c17f3ce7afc..5364dcc3aa7c1 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -1913,10 +1913,11 @@ function parse_function(ps::ParseState) if peek(ps, skip_newlines=true) == K"end" && !is_anon_func # Function/macro definition with no methods # function f end ==> (function f) + # (function f \n end) ==> (function f) # function f \n\n end ==> (function f) # function $f end ==> (function ($ f)) # macro f end ==> (macro f) - bump(ps, TRIVIA_FLAG) + bump(ps, TRIVIA_FLAG, skip_newlines=true) emit(ps, mark, word) return end diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 956b5fa15f617..3a72bab864d9b 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -558,6 +558,7 @@ tests = [ # parse_paren ":(=)" => "(quote =)" ":(::)" => "(quote ::)" + "(function f \n end)" => "(function f)" # braces "{x y}" => "(bracescat (row x y))" ((v=v"1.7",), "{x ;;; y}") => "(bracescat (nrow-3 x y))" From 3582d15ac0272d58bdd1db35e1144510aed12f59 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Sat, 22 Jan 2022 06:21:58 +1000 Subject: [PATCH 0337/1109] Tokenize: Allow floating point with \minus in exponent --- JuliaSyntax/Tokenize/src/lexer.jl | 10 +++++----- JuliaSyntax/Tokenize/test/lexer.jl | 5 +++++ 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/JuliaSyntax/Tokenize/src/lexer.jl b/JuliaSyntax/Tokenize/src/lexer.jl index ba2848d7e0838..9cbce60cc0224 100644 --- a/JuliaSyntax/Tokenize/src/lexer.jl +++ b/JuliaSyntax/Tokenize/src/lexer.jl @@ -788,10 +788,10 @@ function lex_digit(l::Lexer, kind) kind = Tokens.FLOAT accept_number(l, isdigit) pc, ppc = dpeekchar(l) - if (pc == 'e' || pc == 'E' || pc == 'f') && (isdigit(ppc) || ppc == '+' || ppc == '-') + if (pc == 'e' || pc == 'E' || pc == 'f') && (isdigit(ppc) || ppc == '+' || ppc == '-' || ppc == '−') kind = Tokens.FLOAT readchar(l) - accept(l, "+-") + accept(l, "+-−") if accept_batch(l, isdigit) pc,ppc = dpeekchar(l) if pc === '.' && !dotop2(ppc, ' ') @@ -806,10 +806,10 @@ function lex_digit(l::Lexer, kind) return emit_error(l, Tokens.INVALID_NUMERIC_CONSTANT) end - elseif (pc == 'e' || pc == 'E' || pc == 'f') && (isdigit(ppc) || ppc == '+' || ppc == '-') + elseif (pc == 'e' || pc == 'E' || pc == 'f') && (isdigit(ppc) || ppc == '+' || ppc == '-' || ppc == '−') kind = Tokens.FLOAT readchar(l) - accept(l, "+-") + accept(l, "+-−") if accept_batch(l, isdigit) pc,ppc = dpeekchar(l) if pc === '.' && !dotop2(ppc, ' ') @@ -833,7 +833,7 @@ function lex_digit(l::Lexer, kind) end if accept(l, "pP") kind = Tokens.FLOAT - accept(l, "+-") + accept(l, "+-−") accept_number(l, isdigit) elseif isfloat return emit_error(l, Tokens.INVALID_NUMERIC_CONSTANT) diff --git a/JuliaSyntax/Tokenize/test/lexer.jl b/JuliaSyntax/Tokenize/test/lexer.jl index 7fe0f6b9e0028..24b41f2da7112 100644 --- a/JuliaSyntax/Tokenize/test/lexer.jl +++ b/JuliaSyntax/Tokenize/test/lexer.jl @@ -548,6 +548,11 @@ end @test tok("0x0_0_0.0_0p2").kind == Tokens.FLOAT @test tok("0x0p+2").kind == Tokens.FLOAT @test tok("0x0p-2").kind == Tokens.FLOAT + + # Floating point with \minus rather than - + @test tok("1.0e−0").kind == Tokens.FLOAT + @test tok("1.0f−0").kind == Tokens.FLOAT + @test tok("0x0p−2").kind == Tokens.FLOAT end @testset "1e1" begin From 159af429eb2185d17702ab8b2c80ec00d488ee4c Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Sun, 23 Jan 2022 11:57:46 +1000 Subject: [PATCH 0338/1109] =?UTF-8?q?Allow=20U+2212=20(\minus=20/=20'?= =?UTF-8?q?=E2=88=92')=20in=20numeric=20literals?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Also add a bunch of test cases for numeric literal parsing and fix the types which arise from parsing octal literals of various lengths. --- JuliaSyntax/src/value_parsing.jl | 18 +++-- JuliaSyntax/test/value_parsing.jl | 130 +++++++++++++++++++++++++++++- 2 files changed, 137 insertions(+), 11 deletions(-) diff --git a/JuliaSyntax/src/value_parsing.jl b/JuliaSyntax/src/value_parsing.jl index 49988bc34e277..7b5b08a50d1ef 100644 --- a/JuliaSyntax/src/value_parsing.jl +++ b/JuliaSyntax/src/value_parsing.jl @@ -6,16 +6,13 @@ Convert a Julia source code string into a number. """ function julia_string_to_number(str::AbstractString, kind) - str = replace(str, '_'=>"") + str = replace(replace(str, '_'=>""), '−'=>'-') if kind == K"Integer" x = Base.tryparse(Int, str) if Int === Int32 && isnothing(x) x = Base.tryparse(Int64, str) end if isnothing(x) - # TODO: flisp parses BigInt and Int128 as string macros rather than - # literals. Is this necessary or can we get away with using values - # here? x = Base.tryparse(Int128, str) if isnothing(x) x = Base.parse(BigInt, str) @@ -47,17 +44,22 @@ function julia_string_to_number(str::AbstractString, kind) ndigits <= 128 ? Base.parse(UInt128, str) : Base.parse(BigInt, str) elseif kind == K"OctInt" + ndigits = length(str)-2 x = Base.tryparse(UInt64, str) if isnothing(x) x = Base.tryparse(UInt128, str) if isnothing(x) x = Base.parse(BigInt, str) + elseif ndigits > 43 + x = BigInt(x) end else - x = x <= typemax(UInt8) ? UInt8(x) : - x <= typemax(UInt16) ? UInt16(x) : - x <= typemax(UInt32) ? UInt32(x) : - x + x = ndigits <= 3 && x <= typemax(UInt8) ? UInt8(x) : + ndigits <= 6 && x <= typemax(UInt16) ? UInt16(x) : + ndigits <= 11 && x <= typemax(UInt32) ? UInt32(x) : + ndigits <= 22 ? x : + ndigits <= 43 ? UInt128(x) : + BigInt(x) end return x end diff --git a/JuliaSyntax/test/value_parsing.jl b/JuliaSyntax/test/value_parsing.jl index eadf2e171895d..24879aaecf0eb 100644 --- a/JuliaSyntax/test/value_parsing.jl +++ b/JuliaSyntax/test/value_parsing.jl @@ -1,6 +1,130 @@ -using JuliaSyntax: triplequoted_string_indentation, - unescape_julia_string, - process_triple_strings! +using JuliaSyntax: + julia_string_to_number, + triplequoted_string_indentation, + unescape_julia_string, + process_triple_strings! + +hexint(s) = julia_string_to_number(s, K"HexInt") +binint(s) = julia_string_to_number(s, K"BinInt") +octint(s) = julia_string_to_number(s, K"OctInt") + +@testset "Number parsing" begin + # Integers + @testset "Integers" begin + @test julia_string_to_number("-1", K"Integer") isa Int + @test julia_string_to_number("1", K"Integer") isa Int + @test julia_string_to_number("2147483647", K"Integer") isa Int + @test julia_string_to_number("9223372036854775807", K"Integer") isa Int64 + @test julia_string_to_number("9223372036854775808", K"Integer") isa Int128 + @test julia_string_to_number("170141183460469231731687303715884105727", K"Integer") isa Int128 + @test julia_string_to_number("170141183460469231731687303715884105728", K"Integer") isa BigInt + end + + # Floats + @testset "Floats" begin + @test julia_string_to_number("10e-0", K"Float") === Float64(10) + @test julia_string_to_number("10f-0", K"Float") === Float32(10) + @test julia_string_to_number("0x0ap-0", K"Float") === Float64(10) + end + + # HexInt + @testset "HexInt numeric limits for different types" begin + @test hexint("0xff") === UInt8(0xff) + @test hexint("0x100") === UInt16(0x100) + @test hexint("0xffff") === UInt16(0xffff) + @test hexint("0x10000") === UInt32(0x10000) + @test hexint("0xffffffff") === UInt32(0xffffffff) + @test hexint("0x100000000") === UInt64(0x100000000) + @test hexint("0xffffffffffffffff") === UInt64(0xffffffffffffffff) + @test hexint("0x10000000000000000") === UInt128(0x10000000000000000) + @test hexint("0xffffffffffffffffffffffffffffffff") === UInt128(0xffffffffffffffffffffffffffffffff) + @test (n = hexint("0x100000000000000000000000000000000"); + n isa BigInt && n == 0x100000000000000000000000000000000) + end + @testset "HexInt string length limits for different types" begin + @test hexint("0x00") === UInt8(0) + @test hexint("0x000") === UInt16(0) + @test hexint("0x0000") === UInt16(0) + @test hexint("0x00000") === UInt32(0) + @test hexint("0x00000000") === UInt32(0) + @test hexint("0x000000000") === UInt64(0) + @test hexint("0x0000000000000000") === UInt64(0) + @test hexint("0x00000000000000000") === UInt128(0) + @test hexint("0x00000000000000000000000000000000") === UInt128(0) + @test (n = hexint("0x000000000000000000000000000000000"); + n isa BigInt && n == 0) + end + + # BinInt + @testset "BinInt numeric limits for different types" begin + @test binint("0b11111111") === UInt8(0xff) + @test binint("0b100000000") === UInt16(0x100) + @test binint("0b1111111111111111") === UInt16(0xffff) + @test binint("0b10000000000000000") === UInt32(0x10000) + @test binint("0b11111111111111111111111111111111") === UInt32(0xffffffff) + @test binint("0b100000000000000000000000000000000") === UInt64(0x100000000) + @test binint("0b1111111111111111111111111111111111111111111111111111111111111111") === UInt64(0xffffffffffffffff) + @test binint("0b10000000000000000000000000000000000000000000000000000000000000000") === UInt128(0x10000000000000000) + @test binint("0b11111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111") === UInt128(0xffffffffffffffffffffffffffffffff) + @test (n = binint("0b100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000"); + n isa BigInt && n == 0x100000000000000000000000000000000) + end + @testset "BinInt string length limits for different types" begin + @test binint("0b00000000") === UInt8(0) + @test binint("0b000000000") === UInt16(0) + @test binint("0b0000000000000000") === UInt16(0) + @test binint("0b00000000000000000") === UInt32(0) + @test binint("0b00000000000000000000000000000000") === UInt32(0) + @test binint("0b000000000000000000000000000000000") === UInt64(0) + @test binint("0b0000000000000000000000000000000000000000000000000000000000000000") === UInt64(0) + @test binint("0b00000000000000000000000000000000000000000000000000000000000000000") === UInt128(0) + @test binint("0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000") === UInt128(0) + @test (n = binint("0b000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000"); + n isa BigInt && n == 0) + end + + # OctInt + @testset "OctInt numeric limits for different types" begin + @test octint("0o377") === UInt8(0xff) + @test octint("0o400") === UInt16(0x100) + @test octint("0o177777") === UInt16(0xffff) + @test octint("0o200000") === UInt32(0x10000) + @test octint("0o37777777777") === UInt32(0xffffffff) + @test octint("0o40000000000") === UInt64(0x100000000) + @test octint("0o1777777777777777777777") === UInt64(0xffffffffffffffff) + @test octint("0o2000000000000000000000") === UInt128(0x10000000000000000) + @test octint("0o3777777777777777777777777777777777777777777") === UInt128(0xffffffffffffffffffffffffffffffff) + @test (n = octint("0o4000000000000000000000000000000000000000000"); + n isa BigInt && n == 0x100000000000000000000000000000000) + end + @testset "OctInt string length limits for different types" begin + @test octint("0o000") === UInt8(0) + @test octint("0o0000") === UInt16(0) + @test octint("0o000000") === UInt16(0) + @test octint("0o0000000") === UInt32(0) + @test octint("0o00000000000") === UInt32(0) + @test octint("0o000000000000") === UInt64(0) + @test octint("0o0000000000000000000000") === UInt64(0) + @test octint("0o00000000000000000000000") === UInt128(0) + @test octint("0o0000000000000000000000000000000000000000000") === UInt128(0) + @test (n = octint("0o00000000000000000000000000000000000000000000"); + n isa BigInt && n == 0) + end + + @testset "Underscore separators" begin + @test julia_string_to_number("10_000", K"Integer") === 10000 + @test julia_string_to_number("10_000.0", K"Float") === Float64(10000) + @test julia_string_to_number("0xff_ff", K"HexInt") === 0xffff + @test julia_string_to_number("0b1111_1111", K"BinInt") === 0xff + @test julia_string_to_number("0o177_777", K"OctInt") === 0xffff + end + + @testset "\\minus ('\\u2212' / '−') allowed in numbers" begin + @test julia_string_to_number("−10", K"Integer") === -10 + @test julia_string_to_number("−10.0", K"Float") === Float64(-10) + @test julia_string_to_number("10e\u22121", K"Float") === Float64(1) + end +end @testset "String unescaping" begin unesc(str) = unescape_julia_string(str, false, false) From 609ea72d000878b85f33a5c2e5ab0f253ac77e9f Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Tue, 25 Jan 2022 18:19:48 +1000 Subject: [PATCH 0339/1109] Lots of code movement + parser API cleanup Work on defining a more useful public parser API in the combination of - parse() and build_tree() for generality - parseall() for convenience Lots of code movement and cleanup: - Diagnostics go into their own file. Write some guiding principles for diganostic messages. - Put ParseError into parser_api - Put SyntaxHead and flags into parse_stream.jl; the parser depends on these, but not on SyntaxNode. - Remove RawToken from SyntaxToken. We don't really need it in there and keeping all the extra fields is unnecessary. - Move julia_version onto ParseStream. The lexer will probably need this eventually, so it needs to go in here. Also, it's not necessary to modify this in ParseState. - Make ParseStream work with IOBuffer with nontrivial offsets - Add parse_toplevel to parser to do the job of parsing file-level code. - Some code motion in parser.jl to group functions more sensibly, and to move parse_docstring to a position better reflecting its precedence. Using all this cleanup, fix a minor incompatibility in adding line numbers to short form functions when they're defined in for loops. --- JuliaSyntax/src/JuliaSyntax.jl | 29 ++-- JuliaSyntax/src/diagnostics.jl | 109 +++++++++++++ JuliaSyntax/src/hooks.jl | 47 ++---- JuliaSyntax/src/parse_stream.jl | 246 ++++++++++++++++------------ JuliaSyntax/src/parser.jl | 280 ++++++++++++++++---------------- JuliaSyntax/src/parser_api.jl | 170 +++++++++++++++++++ JuliaSyntax/src/syntax_tree.jl | 174 ++++---------------- JuliaSyntax/src/utils.jl | 18 ++ JuliaSyntax/test/parser.jl | 12 +- JuliaSyntax/test/runtests.jl | 4 +- JuliaSyntax/test/syntax_tree.jl | 26 ++- JuliaSyntax/test/test_utils.jl | 9 +- 12 files changed, 677 insertions(+), 447 deletions(-) create mode 100644 JuliaSyntax/src/diagnostics.jl create mode 100644 JuliaSyntax/src/parser_api.jl diff --git a/JuliaSyntax/src/JuliaSyntax.jl b/JuliaSyntax/src/JuliaSyntax.jl index b8866ce430152..a7bfedd1b05cb 100644 --- a/JuliaSyntax/src/JuliaSyntax.jl +++ b/JuliaSyntax/src/JuliaSyntax.jl @@ -1,25 +1,34 @@ module JuliaSyntax -# Use a git subtree for a modified version of Tokenize.jl, as we need several -# significant changes +# Internal utilities which aren't related to JuliaSyntax per se. +include("utils.jl") + +# Lexing +# +# We're using a git subtree for a modified version of Tokenize.jl, as we need +# several significant changes. +# TODO: Perhaps integrate these back into Tokenize? Or maybe JuliaSyntax would +# be a sensible home for the Tokenize lexer in the future? include("../Tokenize/src/Tokenize.jl") using .Tokenize.Tokens: RawToken const TzTokens = Tokenize.Tokens +include("tokens.jl") -include("utils.jl") - +# Source and diagnostics include("source_files.jl") +include("diagnostics.jl") -include("green_tree.jl") - -include("tokens.jl") - -include("syntax_tree.jl") +# Parsing include("parse_stream.jl") - include("parser.jl") +include("parser_api.jl") include("value_parsing.jl") +# Tree data structures +include("green_tree.jl") +include("syntax_tree.jl") + +# Hooks to integrate the parser with Base include("hooks.jl") end diff --git a/JuliaSyntax/src/diagnostics.jl b/JuliaSyntax/src/diagnostics.jl new file mode 100644 index 0000000000000..5cbb0f83f583b --- /dev/null +++ b/JuliaSyntax/src/diagnostics.jl @@ -0,0 +1,109 @@ +""" + Diagnostic(first_byte, last_byte; [error="msg" | warning="msg"]) + +A diagnostic message, referring to the source code byte range +first_byte:last_byte, with a `warning` or `error` message. + +Messages should be concise, matter-of-fact and not include decorations: + +* Concise: "Show don't tell". Where possible, let's show the user what's wrong + by annotating their original source code via the byte range. +* Matter-of-fact: Admonishing the user isn't helpful. Let's gently show them + what's wrong instead, using a neutral tone. +* Decorations: Capitalization, punctuation and diagnostic class ("error" / + "warning") should be omitted. These decorations will be added by the + formatting code. + +TODO: At some point we should enhance Diagnostic to allow multiple sub-ranges +for better annotation. Let's follow the excellent precedent set by Rust's +[rustc_errors::Diagnostic](https://doc.rust-lang.org/stable/nightly-rustc/rustc_errors/struct.Diagnostic.html). + +TODO: We should cater for extended descriptions containing multiple sentences +via a diagnostic code which can be used to look up detailed information. Again, +Rust does this well. +""" +struct Diagnostic + first_byte::Int + last_byte::Int + level::Symbol + message::String +end + +function Diagnostic(first_byte, last_byte; error=nothing, warning=nothing) + message = !isnothing(error) ? error : + !isnothing(warning) ? warning : + error("No message in diagnostic") + level = !isnothing(error) ? :error : :warning + Diagnostic(first_byte, last_byte, level, message) +end + +first_byte(d::Diagnostic) = d.first_byte +last_byte(d::Diagnostic) = d.last_byte +is_error(d::Diagnostic) = d.level == :error + +function show_diagnostic(io::IO, diagnostic::Diagnostic, source::SourceFile) + col,prefix = diagnostic.level == :error ? (:light_red, "Error") : + diagnostic.level == :warning ? (:light_yellow, "Warning") : + diagnostic.level == :note ? (:light_blue, "Note") : + (:normal, "Info") + printstyled(io, "$prefix: ", color=col) + print(io, diagnostic.message, ":\n") + + p = first_byte(diagnostic) + q = last_byte(diagnostic) + code = source.code + if q < p || (p == q && code[p] == '\n') + # An empty or invisible range! We expand it symmetrically to make it + # visible. + p = max(firstindex(code), prevind(code, p)) + q = min(lastindex(code), nextind(code, q)) + end + + # p and q mark the start and end of the diagnostic range. For context, + # buffer these out to the surrouding lines. + a,b = source_line_range(source, p, context_lines_before=2, context_lines_after=1) + c,d = source_line_range(source, q, context_lines_before=1, context_lines_after=2) + + hicol = (100,40,40) + + print(io, source[a:prevind(code, p)]) + # There's two situations, either + if b >= c + # The diagnostic range is compact and we show the whole thing + # a............... + # .....p...q...... + # ...............b + _printstyled(io, source[p:q]; color=hicol) + else + # Or large and we trucate the code to show only the region around the + # start and end of the error. + # a............... + # .....p.......... + # ...............b + # (snip) + # c............... + # .....q.......... + # ...............d + _printstyled(io, source[p:b]; color=hicol) + println(io, "…") + _printstyled(io, source[c:q]; color=hicol) + end + print(io, source[nextind(code,q):d]) + println(io) +end + +function show_diagnostics(io::IO, diagnostics::AbstractVector{Diagnostic}, code::SourceFile) + for d in diagnostics + show_diagnostic(io, d, code) + end +end + +function show_diagnostics(io::IO, diagnostics::AbstractVector{Diagnostic}, code) + if !isempty(diagnostics) + show_diagnostics(io, diagnostics, SourceFile(code)) + end +end + +function any_error(diagnostics::AbstractVector{Diagnostic}) + any(is_error(d) for d in diagnostics) +end diff --git a/JuliaSyntax/src/hooks.jl b/JuliaSyntax/src/hooks.jl index d9dd08b8e403f..41e3d95928aef 100644 --- a/JuliaSyntax/src/hooks.jl +++ b/JuliaSyntax/src/hooks.jl @@ -1,46 +1,25 @@ -# Error type for displaying errors in the Julia REPL -struct ParseError <: Exception - code::String - stream::ParseStream -end - -function Base.showerror(io::IO, err::ParseError, bt; backtrace=false) - show_diagnostics(io, err.stream, err.code) -end -Base.display_error(io::IO, err::ParseError, bt) = Base.showerror(io, err, bt) - -function Base.showerror(io::IO, err::ParseError) - show_diagnostics(io, err.stream, err.code) -end - # Adaptor for the API/ABI expected by the Julia runtime code. function core_parser_hook(code, filename, offset, options) try - if code isa Core.SimpleVector # May be passed in from C entry points + # TODO: Check that we do all this input wrangling without copying the + # code buffer + if code isa Core.SimpleVector + # The C entry points will pass us this form. (ptr,len) = code code = String(unsafe_wrap(Array, ptr, len)) end + io = IOBuffer(code) + seek(io, offset) - code = code[offset+1:end] # FIXME!! + stream = ParseStream(io) + rule = options == :all ? :toplevel : options + JuliaSyntax.parse(stream; rule=rule) - stream = ParseStream(code) - if options === :atom - parse_atom(ParseState(stream, VERSION)) - elseif options === :statement - parse_stmts(ParseState(stream, VERSION)) - elseif options === :all - parse_all(stream) - end + ex = any_error(stream) ? + Expr(:error, ParseError(SourceFile(code), stream.diagnostics)) : + build_tree(Expr, stream) - if !isempty(stream.diagnostics) - ex = Expr(:error, ParseError(code, stream)) - else - green_tree = build_tree(GreenNode, stream) - src = SourceFile(code; filename=filename) - tree = SyntaxNode(src, green_tree) - ex = Expr(tree) - end - pos = offset + stream.next_byte-1 + pos = last_byte(stream) - 1 # Rewrap result in an svec for use by the C code return Core.svec(ex, pos) diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index b42939401404b..a3f35862e44c4 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -1,33 +1,128 @@ +#------------------------------------------------------------------------------- +# Flags hold auxilary information about tokens/nonterminals which the Kind +# doesn't capture in a nice way. +const RawFlags = UInt32 +const EMPTY_FLAGS = RawFlags(0) +const TRIVIA_FLAG = RawFlags(1<<0) +# Some of the following flags are head-specific and could probably be allowed +# to cover the same bits... +const INFIX_FLAG = RawFlags(1<<1) +# Record whether syntactic operators were dotted +const DOTOP_FLAG = RawFlags(1<<2) +# Set when kind == K"String" was triple-delimited as with """ or ``` +const TRIPLE_STRING_FLAG = RawFlags(1<<3) +# Set when the string is "raw" and needs minimal unescaping +const RAW_STRING_FLAG = RawFlags(1<<4) +# try-finally-catch +const TRY_CATCH_AFTER_FINALLY_FLAG = RawFlags(1<<5) +# Flags holding the dimension of an nrow or other UInt8 not held in the source +const NUMERIC_FLAGS = RawFlags(RawFlags(0xff)<<8) +# Todo ERROR_FLAG = 0x80000000 ? + +function set_numeric_flags(n::Integer) + f = RawFlags((n << 8) & NUMERIC_FLAGS) + if numeric_flags(f) != n + error("Numeric flags unable to hold large integer $n") + end + f +end + +function numeric_flags(f::RawFlags) + Int((f >> 8) % UInt8) +end + +# Return true if any of `test_flags` are set +has_flags(flags::RawFlags, test_flags) = (flags & test_flags) != 0 + +# Function for combining flags. (Do we want this?) +function flags(; trivia::Bool=false, + infix::Bool=false, + dotop::Bool=false, + try_catch_after_finally::Bool=false, + numeric::Int=0) + flags = RawFlags(0) + trivia && (flags |= TRIVIA_FLAG) + infix && (flags |= INFIX_FLAG) + dotop && (flags |= DOTOP_FLAG) + try_catch_after_finally && (flags |= TRY_CATCH_AFTER_FINALLY_FLAG) + numeric != 0 && (flags |= set_numeric_flags(numeric)) + return flags::RawFlags +end + +#------------------------------------------------------------------------------- +struct SyntaxHead + kind::Kind + flags::RawFlags +end + +kind(head::SyntaxHead) = head.kind +flags(head::SyntaxHead) = head.flags +has_flags(head::SyntaxHead, test_flags) = has_flags(flags(head), test_flags) + +is_trivia(head::SyntaxHead) = has_flags(head, TRIVIA_FLAG) +is_infix(head::SyntaxHead) = has_flags(head, INFIX_FLAG) +is_dotted(head::SyntaxHead) = has_flags(head, DOTOP_FLAG) +numeric_flags(head::SyntaxHead) = numeric_flags(flags(head)) +is_error(head::SyntaxHead) = kind(head) == K"error" + +function Base.summary(head::SyntaxHead) + _kind_str(kind(head)) +end + +function untokenize(head::SyntaxHead; include_flag_suff=true) + str = untokenize(kind(head)) + if is_dotted(head) + str = "."*str + end + if include_flag_suff && flags(head) ∉ (EMPTY_FLAGS, DOTOP_FLAG) + str = str*"-" + is_trivia(head) && (str = str*"t") + is_infix(head) && (str = str*"i") + has_flags(head, TRIPLE_STRING_FLAG) && (str = str*"s") + has_flags(head, RAW_STRING_FLAG) && (str = str*"r") + has_flags(head, TRY_CATCH_AFTER_FINALLY_FLAG) && (str = str*"f") + n = numeric_flags(head) + n != 0 && (str = str*string(n)) + end + str +end + #------------------------------------------------------------------------------- """ `SyntaxToken` is a token covering a contiguous byte range in the input text. -Information about leading whitespace tokens is added for use by the parser. +Information about preceding whitespace is added for use by the parser. """ struct SyntaxToken - raw::RawToken + kind::Kind + first_byte::Int + last_byte::Int # Flags for leading whitespace + is_dotted::Bool + is_suffixed::Bool had_whitespace::Bool had_newline::Bool end -function Base.show(io::IO, tok::SyntaxToken) +function SyntaxToken(raw::RawToken, had_whitespace, had_newline) + SyntaxToken(raw.kind, raw.startbyte + 1, raw.endbyte + 1, raw.dotop, raw.suffix, + had_whitespace, had_newline) +end + +function Base.show(ii::IO, tok::SyntaxToken) range = string(lpad(first_byte(tok), 3), ":", rpad(last_byte(tok), 3)) print(io, rpad(range, 17, " "), rpad(kind(tok), 15, " ")) end -kind(tok::SyntaxToken) = tok.raw.kind -flags(tok::SyntaxToken) = tok.raw.dotop ? DOTOP_FLAG : EMPTY_FLAGS -first_byte(tok::SyntaxToken) = tok.raw.startbyte + 1 -last_byte(tok::SyntaxToken) = tok.raw.endbyte + 1 +kind(tok::SyntaxToken) = tok.kind +flags(tok::SyntaxToken) = tok.is_dotted ? DOTOP_FLAG : EMPTY_FLAGS +first_byte(tok::SyntaxToken) = tok.first_byte +last_byte(tok::SyntaxToken) = tok.last_byte span(tok::SyntaxToken) = last_byte(tok) - first_byte(tok) + 1 -is_dotted(tok::SyntaxToken) = tok.raw.dotop -is_suffixed(tok::SyntaxToken) = tok.raw.suffix +is_dotted(tok::SyntaxToken) = tok.is_dotted +is_suffixed(tok::SyntaxToken) = tok.is_suffixed is_decorated(tok::SyntaxToken) = is_dotted(tok) || is_suffixed(tok) -Base.:(~)(tok::SyntaxToken, k::Kind) = kind(tok) == k -Base.:(~)(k::Kind, tok::SyntaxToken) = kind(tok) == k - Base.:(==)(tok::SyntaxToken, k::Kind) = (kind(tok) == k && !is_decorated(tok)) #------------------------------------------------------------------------------- @@ -54,67 +149,7 @@ first_byte(range::TaggedRange) = range.first_byte last_byte(range::TaggedRange) = range.last_byte span(range::TaggedRange) = last_byte(range) - first_byte(range) + 1 -struct Diagnostic - first_byte::Int - last_byte::Int - level::Symbol - message::String -end - -first_byte(d::Diagnostic) = d.first_byte -last_byte(d::Diagnostic) = d.last_byte - -function show_diagnostic(io::IO, diagnostic::Diagnostic, source::SourceFile) - col,prefix = diagnostic.level == :error ? (:light_red, "Error") : - diagnostic.level == :warning ? (:light_yellow, "Warning") : - diagnostic.level == :note ? (:light_blue, "Note") : - (:normal, "Info") - printstyled(io, "$prefix: ", color=col) - print(io, diagnostic.message, ":\n") - - p = first_byte(diagnostic) - q = last_byte(diagnostic) - code = source.code - if q < p || (p == q && code[p] == '\n') - # An empty or invisible range! We expand it symmetrically to make it - # visible. - p = max(firstindex(code), prevind(code, p)) - q = min(lastindex(code), nextind(code, q)) - end - - # p and q mark the start and end of the diagnostic range. For context, - # buffer these out to the surrouding lines. - a,b = source_line_range(source, p, context_lines_before=2, context_lines_after=1) - c,d = source_line_range(source, q, context_lines_before=1, context_lines_after=2) - - hicol = (100,40,40) - - print(io, source[a:prevind(code, p)]) - # There's two situations, either - if b >= c - # The diagnostic range is compact and we show the whole thing - # a............... - # .....p...q...... - # ...............b - _printstyled(io, source[p:q]; color=hicol) - else - # Or large and we trucate the code to show only the region around the - # start and end of the error. - # a............... - # .....p.......... - # ...............b - # (snip) - # c............... - # .....q.......... - # ...............d - _printstyled(io, source[p:b]; color=hicol) - println(io, "…") - _printstyled(io, source[c:q]; color=hicol) - end - print(io, source[nextind(code,q):d]) - println(io) -end - +#------------------------------------------------------------------------------- struct ParseStreamPosition input_byte::Int # Index of next byte in input output_index::Int # Index of last span in output @@ -126,46 +161,50 @@ const NO_POSITION = ParseStreamPosition(0,0) """ ParseStream provides an IO interface for the parser. It - Wraps the lexer with a lookahead buffer -- Removes whitespace and comment tokens, shifting them into the output implicitly - -This is simililar in spirit to rust-analyzer's -[TextTreeSink](https://github.com/rust-analyzer/rust-analyzer/blob/4691a0647b2c96cc475d8bbe7c31fe194d1443e7/crates/syntax/src/parsing/text_tree_sink.rs) +- Removes insignificant whitespace and comment tokens, shifting them into the + output implicitly (newlines may be significant depending on `skip_newlines`) """ mutable struct ParseStream + # Lexer, transforming the input bytes into a token stream lexer::Tokenize.Lexers.Lexer{IOBuffer,RawToken} + # Lookahead buffer for already lexed tokens lookahead::Vector{SyntaxToken} + # Parser output as an ordered sequence of ranges, parent nodes after children. ranges::Vector{TaggedRange} + # Parsing diagnostics (errors/warnings etc) diagnostics::Vector{Diagnostic} # First byte of next token next_byte::Int # Counter for number of peek()s we've done without making progress via a bump() peek_count::Int + # Vesion of Julia we're parsing this code for. May be different from VERSION! + julia_version_major::Int + julia_version_minor::Int end -function ParseStream(code) +function ParseStream(code::Base.GenericIOBuffer; julia_version=VERSION) + next_byte = position(code)+1 lexer = Tokenize.tokenize(code, RawToken) ParseStream(lexer, Vector{SyntaxToken}(), Vector{TaggedRange}(), Vector{Diagnostic}(), - 1, - 0) + next_byte, + 0, + julia_version.major, + julia_version.minor) end -function Base.show(io::IO, mime::MIME"text/plain", stream::ParseStream) - println(io, "ParseStream at position $(stream.next_byte)") +function ParseStream(code::AbstractString; kws...) + ParseStream(IOBuffer(code); kws...) end -function show_diagnostics(io::IO, stream::ParseStream, code::SourceFile) - for d in stream.diagnostics - show_diagnostic(io, d, code) - end +function Base.show(io::IO, mime::MIME"text/plain", stream::ParseStream) + println(io, "ParseStream at position $(stream.next_byte)") end function show_diagnostics(io::IO, stream::ParseStream, code) - if !isempty(stream.diagnostics) - show_diagnostics(io, stream, SourceFile(code)) - end + show_diagnostics(io, stream.diagnostics, code) end #------------------------------------------------------------------------------- @@ -239,11 +278,14 @@ function peek_token(stream::ParseStream, n::Integer=1; skip_newlines=false) stream.lookahead[_lookahead_index(stream, n, skip_newlines)] end +function _code_buf(stream) + # TODO: Peeking at the underlying data buffer inside the lexer is an awful + # hack. We should find a better way to do this kind of thing. + stream.lexer.io.data +end + function _peek_equal_to(stream, first_byte, len, str) - # Humongous but should-be-allocation-free hack: peek at the underlying data - # buffer. TODO: Attach the code string to the stream so we don't have to - # dig into the lexer? - buf = stream.lexer.io.data + buf = _code_buf(stream) cbuf = codeunits(str) for i = 1:len if buf[first_byte + i - 1] != cbuf[i] @@ -321,7 +363,7 @@ function _bump_n(stream::ParseStream, n::Integer, flags, remap_kind=K"Nothing") end is_trivia = k ∈ (K"Whitespace", K"Comment", K"NewlineWs") f = is_trivia ? TRIVIA_FLAG : flags - tok.raw.dotop && (f |= DOTOP_FLAG) + is_dotted(tok) && (f |= DOTOP_FLAG) outk = (is_trivia || remap_kind == K"Nothing") ? k : remap_kind range = TaggedRange(SyntaxHead(outk, f), k, first_byte(tok), last_byte(tok), lastindex(stream.ranges)+1) @@ -471,13 +513,8 @@ function emit(stream::ParseStream, mark::ParseStreamPosition, kind::Kind, return position(stream) end -function _emit_diagnostic(stream::ParseStream, fbyte, lbyte; - error=nothing, warning=nothing) - message = !isnothing(error) ? error : - !isnothing(warning) ? warning : - error("No message in diagnostic") - level = !isnothing(error) ? :error : :warning - push!(stream.diagnostics, Diagnostic(fbyte, lbyte, level, message)) +function _emit_diagnostic(stream::ParseStream, fbyte, lbyte; kws...) + push!(stream.diagnostics, Diagnostic(fbyte, lbyte; kws...)) return nothing end @@ -595,7 +632,6 @@ function build_tree(::Type{NodeType}, stream::ParseStream; end end -function parse_all(::Type{GreenNode}, code) - stream = parse_all(code) - build_tree(GreenNode, stream) -end +first_byte(stream::ParseStream) = first_byte(first(stream.ranges)) +last_byte(stream::ParseStream) = last_byte(last(stream.ranges)) +any_error(stream::ParseStream) = any_error(stream.diagnostics) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 5364dcc3aa7c1..1fb1b9f8ba70a 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -1,12 +1,13 @@ """ -ParseState carries parser context as we recursively descend into the parse -tree. For example, normally `x -y` means `(x) - (y)`, but when parsing matrix -literals we're in `space_sensitive` mode, and `[x -y]` means [(x) (-y)]. + ParseState(stream::ParseStream) + +ParseState is an internal data structure wrapping `ParseStream` to carry parser +context as we recursively descend into the parse tree. For example, normally +`x -y` means `(x) - (y)`, but when parsing matrix literals we're in +`space_sensitive` mode, and `[x -y]` means [(x) (-y)]. """ struct ParseState stream::ParseStream - # Vesion of Julia we're parsing this code for. May be different from VERSION! - julia_version::VersionNumber # Disable range colon for parsing ternary conditional operator range_colon_enabled::Bool @@ -23,19 +24,15 @@ struct ParseState end # Normal context -function ParseState(stream::ParseStream, julia_version::VersionNumber) - # To avoid keeping track of the exact Julia development version where new - # features were added, treat prereleases or dev versons as the release - # version by stripping the prerelease. - ver = VersionNumber(julia_version.major, julia_version.minor, julia_version.patch) - ParseState(stream, ver, true, false, false, false, false, true) +function ParseState(stream::ParseStream) + ParseState(stream, true, false, false, false, false, true) end function ParseState(ps::ParseState; range_colon_enabled=nothing, space_sensitive=nothing, for_generator=nothing, end_symbol=nothing, whitespace_newline=nothing, where_enabled=nothing) - ParseState(ps.stream, ps.julia_version, + ParseState(ps.stream, range_colon_enabled === nothing ? ps.range_colon_enabled : range_colon_enabled, space_sensitive === nothing ? ps.space_sensitive : space_sensitive, for_generator === nothing ? ps.for_generator : for_generator, @@ -151,21 +148,6 @@ function bump_closing_token(ps, closing_kind) end end -function bump_semicolon_trivia(ps) - while peek(ps) in KSet`; NewlineWs` - bump(ps, TRIVIA_FLAG) - end -end - -# Emit an error if the version is less than `min_ver` -function min_supported_version(min_ver, ps, mark, message) - # NB: the prerelease version will be removed from ps.julia_version before this point. - if ps.julia_version < min_ver - msg = "$message is not supported in Julia version $(ps.julia_version) < $(min_ver)" - emit(ps, mark, K"error", error=msg) - end -end - # Read tokens until we find an expected closing token. # Bump the big pile of resulting tokens as a single nontrivia error token function recover(is_closer::Function, ps, flags=EMPTY_FLAGS; mark = position(ps), error="unexpected tokens") @@ -183,6 +165,31 @@ function recover(is_closer::Function, ps, flags=EMPTY_FLAGS; mark = position(ps) emit(ps, mark, K"error", flags, error=error) end +@noinline function min_supported_version_err(ps, mark, message, min_ver) + major = ps.stream.julia_version_major + minor = ps.stream.julia_version_minor + msg = "$message is not supported in Julia version $major.$minor < $(min_ver)" + emit(ps, mark, K"error", error=msg) +end + +function version_lessthan(ps, ver) + # To avoid keeping track of the exact Julia development version where new + # features were added or comparing prerelease strings, we treat prereleases + # or dev versons as the release version using only major and minor version + # numbers. This means we're inexact for old dev versions but that seems + # like an acceptable tradeoff. + major = ps.stream.julia_version_major + minor = ps.stream.julia_version_minor + major < ver.major || (major == ver.major && minor < ver.minor) +end + +# Emit an error if the version is less than `min_ver` +function min_supported_version(min_ver, ps, mark, message) + if version_lessthan(ps, min_ver) + min_supported_version_err(ps, mark, message, min_ver) + end +end + # flisp: disallow-space function bump_disallowed_space(ps) if peek_token(ps).had_whitespace @@ -191,10 +198,27 @@ function bump_disallowed_space(ps) end end -function TODO(str) - error("TODO: $str") +function bump_semicolon_trivia(ps) + while peek(ps) in KSet`; NewlineWs` + bump(ps, TRIVIA_FLAG) + end end +# Like @assert, but always enabled and calls internal_error() +macro check(ex, msgs...) + msg = isempty(msgs) ? ex : msgs[1] + if isa(msg, AbstractString) + msg = msg + elseif !isempty(msgs) && (isa(msg, Expr) || isa(msg, Symbol)) + msg = :(string($(esc(msg)))) + else + msg = string(msg) + end + return :($(esc(ex)) ? nothing : internal_error($msg)) +end + +# Parser internal error, used as an assertion failure for cases we expect can't +# happen. @noinline function internal_error(strs...) error("Internal error: ", strs...) end @@ -307,7 +331,10 @@ end # # This is to make both codebases mutually understandable and make porting # changes simple. - +# +# The `parse_*` functions are listed here roughly in order of increasing +# precedence (lowest to highest binding power). A few helper functions are +# interspersed. # parse left-to-right binary operator # produces structures like (+ (+ (+ 2 3) 4) 5) @@ -384,7 +411,30 @@ function parse_Nary(ps::ParseState, down, delimiters, closing_tokens) return n_delims != 0 end -# the principal non-terminals follow, in increasing precedence order +# Parse a sequence of top level statements separated by newlines, all wrapped +# in a toplevel node. +# +# a \n b ==> (toplevel a b) +# +# Note that parse_stmts can also emit toplevel nodes for semicolon-separated +# statements, so it's possible for these to be nested one level deep. +# +# a;b \n c;d ==> (toplevel (toplevel a b) (toplevel c d)) +function parse_toplevel(ps::ParseState) + mark = position(ps) + while true + if peek(ps, skip_newlines=true) == K"EndMarker" + # Allow end of input if there is nothing left but whitespace + # a \n \n ==> (toplevel a) + bump(ps, skip_newlines=true) + break + else + parse_stmts(ps) + end + end + emit(ps, mark, K"toplevel") + nothing +end # Parse a newline or semicolon-delimited list of expressions. # Repeated delimiters are allowed but ignored @@ -424,13 +474,50 @@ function parse_stmts(ps::ParseState) end if junk_mark != position(ps) emit(ps, junk_mark, K"error", - error="Extra tokens after end of expression") + error="extra tokens after end of expression") end if do_emit emit(ps, mark, K"toplevel") end end +# Parse docstrings attached by a space or single newline +# "doc" foo ==> (macrocall core_@doc "doc" foo) +# +# flisp: parse-docstring +function parse_docstring(ps::ParseState, down=parse_eq) + mark = position(ps) + atdoc_mark = bump_invisible(ps, K"TOMBSTONE") + down(ps) + if peek_behind(ps).kind in KSet`String string` + is_doc = true + k = peek(ps) + if is_closing_token(ps, k) + # "notdoc" ] ==> "notdoc" + is_doc = false + elseif k == K"NewlineWs" + k2 = peek(ps, 2) + if is_closing_token(ps, k2) || k2 == K"NewlineWs" + # "notdoc" \n] ==> "notdoc" + # "notdoc" \n\n foo ==> "notdoc" + is_doc = false + else + # Allow a single newline + # "doc" \n foo ==> (macrocall core_@doc "doc" foo) + bump(ps, TRIVIA_FLAG) # NewlineWs + end + else + # "doc" foo ==> (macrocall core_@doc "doc" foo) + # "doc $x" foo ==> (macrocall core_@doc (string "doc " x) foo) + end + if is_doc + reset_node!(ps, atdoc_mark, kind=K"core_@doc") + down(ps) + emit(ps, mark, K"macrocall") + end + end +end + # Parse assignments with comma separated lists on each side # a = b ==> (= a b) # a .= b ==> (.= a b) @@ -845,9 +932,8 @@ function parse_unary_subtype(ps::ParseState) mark = position(ps) bump(ps, TRIVIA_FLAG) parse_where(ps, parse_juxtapose) - if peek_behind(ps).kind == K"tuple" - TODO("Can this even happen?") - end + # Flisp parser handled this, but I don't know how it can happen... + @check peek_behind(ps).kind != K"tuple" emit(ps, mark, k) end else @@ -1350,7 +1436,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) ckind == K"vcat" ? K"typed_vcat" : ckind == K"comprehension" ? K"typed_comprehension" : ckind == K"ncat" ? K"typed_ncat" : - internal_error("unrecognized kind in parse_cat", ckind) + internal_error("unrecognized kind in parse_cat ", ckind) emit(ps, mark, outk, cflags) check_ncat_compat(ps, mark, ckind) if is_macrocall @@ -1618,7 +1704,7 @@ function parse_resword(ps::ParseState) # Oddities allowed by parser # abstract type A < B end ==> (abstract (call-i A < B)) bump(ps, TRIVIA_FLAG) - @assert peek(ps) == K"type" + @check peek(ps) == K"type" bump(ps, TRIVIA_FLAG) parse_subtype_spec(ps) bump_semicolon_trivia(ps) @@ -1634,7 +1720,7 @@ function parse_resword(ps::ParseState) # struct A end ==> (struct false A (block)) bump_invisible(ps, K"false") end - @assert peek(ps) == K"struct" + @check peek(ps) == K"struct" bump(ps, TRIVIA_FLAG) parse_subtype_spec(ps) parse_block(ps) @@ -1646,7 +1732,7 @@ function parse_resword(ps::ParseState) # primitive type A $N end ==> (primitive A ($ N)) # primitive type A <: B \n 8 \n end ==> (primitive (<: A B) 8) bump(ps, TRIVIA_FLAG) - @assert peek(ps) == K"type" + @check peek(ps) == K"type" bump(ps, TRIVIA_FLAG) with_space_sensitive(parse_subtype_spec, ps) with_space_sensitive(parse_cond, ps) @@ -1842,7 +1928,7 @@ end function parse_function(ps::ParseState) mark = position(ps) word = peek(ps) - @assert word in KSet`macro function` + @check word in KSet`macro function` is_function = word == K"function" is_anon_func::Bool = false bump(ps, TRIVIA_FLAG) @@ -2105,7 +2191,7 @@ end function parse_imports(ps::ParseState) mark = position(ps) word = peek(ps) - @assert word in KSet`import using` + @check word in KSet`import using` bump(ps, TRIVIA_FLAG) emark = position(ps) initial_as = parse_import(ps, word, false) @@ -2401,7 +2487,7 @@ function parse_generator(ps::ParseState, mark, flatten=false) bump_invisible(ps, K"error", TRIVIA_FLAG, error="Expected space before `for` in generator") end - @assert kind(t) == K"for" + @check kind(t) == K"for" bump(ps, TRIVIA_FLAG) filter_mark = position(ps) parse_comma_separated(ps, parse_iteration_spec) @@ -2643,7 +2729,7 @@ function parse_paren(ps::ParseState, check_identifiers=true) where_enabled=true, whitespace_newline=true) mark = position(ps) - @assert peek(ps) == K"(" + @check peek(ps) == K"(" bump(ps, TRIVIA_FLAG) # K"(" after_paren_mark = position(ps) k = peek(ps) @@ -2847,7 +2933,7 @@ function parse_string(ps::ParseState) # "a $(x + y) b" ==> (string "a " (call-i x + y) " b") m = position(ps) parse_atom(ps) - if ps.julia_version >= v"1.6" + if !version_lessthan(ps, v"1.6") # https://github.com/JuliaLang/julia/pull/38692 prev = peek_behind(ps) if prev.kind == K"String" @@ -2919,6 +3005,19 @@ function parse_raw_string(ps::ParseState; remap_kind=K"Nothing") end end +function emit_braces(ps, mark, ckind, cflags) + if ckind == K"hcat" + # {x y} ==> (bracescat (row x y)) + emit(ps, mark, K"row", cflags) + elseif ckind == K"ncat" + # {x ;;; y} ==> (bracescat (nrow-3 x y)) + emit(ps, mark, K"nrow", cflags) + end + check_ncat_compat(ps, mark, ckind) + outk = ckind in KSet`vect comprehension` ? K"braces" : K"bracescat" + emit(ps, mark, outk) +end + # parse numbers, identifiers, parenthesized expressions, lists, vectors, etc. # # If `check_identifiers` is true, identifiers are disallowed from being one of @@ -3054,96 +3153,3 @@ function parse_atom(ps::ParseState, check_identifiers=true) end end -function emit_braces(ps, mark, ckind, cflags) - if ckind == K"hcat" - # {x y} ==> (bracescat (row x y)) - emit(ps, mark, K"row", cflags) - elseif ckind == K"ncat" - # {x ;;; y} ==> (bracescat (nrow-3 x y)) - emit(ps, mark, K"nrow", cflags) - end - check_ncat_compat(ps, mark, ckind) - outk = ckind in KSet`vect comprehension` ? K"braces" : K"bracescat" - emit(ps, mark, outk) -end - -# Parse docstrings attached by a space or single newline -# "doc" foo ==> (macrocall core_@doc "doc" foo) -# -# flisp: parse-docstring -function parse_docstring(ps::ParseState, down=parse_eq) - mark = position(ps) - atdoc_mark = bump_invisible(ps, K"TOMBSTONE") - down(ps) - if peek_behind(ps).kind in KSet`String string` - is_doc = true - k = peek(ps) - if is_closing_token(ps, k) - # "notdoc" ] ==> "notdoc" - is_doc = false - elseif k == K"NewlineWs" - k2 = peek(ps, 2) - if is_closing_token(ps, k2) || k2 == K"NewlineWs" - # "notdoc" \n] ==> "notdoc" - # "notdoc" \n\n foo ==> "notdoc" - is_doc = false - else - # Allow a single newline - # "doc" \n foo ==> (macrocall core_@doc "doc" foo) - bump(ps, TRIVIA_FLAG) # NewlineWs - end - else - # "doc" foo ==> (macrocall core_@doc "doc" foo) - # "doc $x" foo ==> (macrocall core_@doc (string "doc " x) foo) - end - if is_doc - reset_node!(ps, atdoc_mark, kind=K"core_@doc") - down(ps) - emit(ps, mark, K"macrocall") - end - end -end - - -#------------------------------------------------------------------------------- -# Parser entry points - -function parse_all(ps::ParseState) - mark = position(ps) - while true - if peek(ps, skip_newlines=true) == K"EndMarker" - # As a special case, allow early end of input if there is - # nothing left but whitespace - # === - # # a - # - # #= b =# # c - # ==> (toplevel) - bump(ps, skip_newlines=true) - break - else - parse_stmts(ps) - end - end - emit(ps, mark, K"toplevel") - nothing -end - -""" - parse_all(input) - -Parse a sequence of top level statements. - -`input` may be a `ParseStream` or other input source which will be passed to -the `ParseStream` constructor. The `ParseStream` is returned. -""" -function parse_all(stream::ParseStream; julia_version=VERSION) - parse_all(ParseState(stream, julia_version)) - return stream -end - -function parse_all(code, args...) - stream = ParseStream(code) - return parse_all(stream, args...) -end - diff --git a/JuliaSyntax/src/parser_api.jl b/JuliaSyntax/src/parser_api.jl new file mode 100644 index 0000000000000..ec0c7d72e3017 --- /dev/null +++ b/JuliaSyntax/src/parser_api.jl @@ -0,0 +1,170 @@ +# The main parser API. +# +# This is defined separately from parser.jl so that: +# * parser.jl doesn't need to refer to any tree data structures +# * It's clear which parts are the public API +# +# What should the general parsing API look like? Some points to consider: +# +# * After parsing atoms or statements or most other internal rules, it's +# usual to start in the middle of the input text and end somewhere else in +# the middle of the input text. So we should taken an index for the start of +# parsing and supply an index back to the caller after parsing. +# +# * `parseall` is a special case where we expect to consume all the input. +# Perhaps this is the API which throws an error if we don't consume it all, +# and doesn't accept an index as input? +# +# * The ParseStream is the fundamental interface which wraps the code string +# and index up together for input and contains the output events, diagnostics +# and current stream position after parsing. The user should potentially be +# able to use this directly. It does, however assume a Julia-compatible token +# stream. +# +# * It could be useful to support an IO-based interface so that users can parse +# Julia code intermixed with other DSLs. Documenter.jl and string macros come +# to mind as examples which could use this. A tricky part is deciding where +# the input ends: For string macros this is done by the parser, but for +# Documenter it's probably just done beforehand according to the Markdown +# code block rules. +# +# * The API should have an interface where a simple string is passed in. How +# does SourceFile relate to this? +# +# * It's neat for `parse` to be overloadable to produce various output data +# structures; GreenNode, SyntaxNode, Expr, (etc?) in the same way that +# Base.parse can be used for non-Julia code. (Heh... though +# `Base.parse(Expr, "...")` would also make a certain amount of sense.) +# +# * What's the no-copy API look like? A String can be put into an IOBuffer via +# unsafe_wrap(Vector{UInt8}, str) ... A SubString likewise. Also there's the +# `codeunits` function to hold a GC-safe view of string data as an array (but +# we can't use a Vector{UInt8}) + +struct ParseError <: Exception + source::SourceFile + diagnostics::Vector{Diagnostic} +end + +function Base.showerror(io::IO, err::ParseError, bt; backtrace=false) + show_diagnostics(io, err.diagnostics, err.source) +end + +function Base.showerror(io::IO, err::ParseError) + show_diagnostics(io, err.diagnostics, err.source) +end + +Base.display_error(io::IO, err::ParseError, bt) = Base.showerror(io, err, bt) + + +""" + # Input and output: + stream = parse(stream::ParseStream; kws...) + (tree, diagnostics) = parse(TreeType, io::IOBuffer; kws...) + (tree, diagnostics, index) = parse(TreeType, str::AbstractString, [index::Integer]; kws...) + # Keywords + parse(...; rule=:toplevel, julia_version=VERSION, ignore_trivia=true) + +Parse Julia source code from `input`, returning the output in a format +compatible with `input`: + +* When `input` is a `ParseStream`, the stream itself is returned and the + `ParseStream` interface can be used to process the output. +* When `input` is an `IOBuffer`, the output is `(tree, diagnostics)`. The + buffer `position` will be set to the next byte of input. +* When `input` is an `AbstractString, Integer`, the output is + `(tree, diagnostics, index)`, where `index` (default 1) is the next byte of + input. + +`rule` may be any of +* `toplevel` (default) — parse a whole "file" of top level statements. In this + mode, the parser expects to fully consume the input. +* `statement` — parse a single statement, or statements separated by semicolons. +* `atom` — parse a single syntax "atom": a literal, identifier, or + parenthesized expression. + +`julia_version` (default `VERSION`) may be used to set the syntax version to +any Julia version `>= v"1.0"`. We aim to parse all Julia syntax which has been +added after v"1.0", emitting an error if it's not compatible with the requested +`julia_version`. + +See also [`parseall`](@ref) for a simpler but less powerful interface. +""" +function parse(stream::ParseStream; rule::Symbol=:toplevel) + ps = ParseState(stream) + if rule === :toplevel + parse_toplevel(ps) + elseif rule === :statement + parse_stmts(ps) + elseif rule === :atom + parse_atom(ps) + else + throw(ArgumentError("Unknown grammar rule $rule")) + end + stream +end + +function parse(::Type{T}, io::Base.GenericIOBuffer; + rule::Symbol=:toplevel, julia_version=VERSION, kws...) where {T} + stream = ParseStream(io; julia_version=julia_version) + parse(stream; rule=rule) + tree = build_tree(T, stream; kws...) + seek(io, stream.next_byte-1) + tree, stream.diagnostics +end + +function parse(::Type{T}, code::AbstractString, index::Integer=1; kws...) where {T} + io = IOBuffer(code) + seek(io, index-1) + tree, diagnostics = parse(T, io; kws...) + tree, diagnostics, position(io)+1 +end + + +""" + parseall(TreeType, input; + rule=:toplevel, + julia_version=VERSION, + ignore_trivia=true) + +Experimental convenience interface to parse `input` as Julia code, emitting an +error if the entire input is not consumed. By default `parseall` will ignore +whitespace and comments before and after valid code but you can turn this off +by setting `ignore_trivia=false`. + +A `ParseError` will be thrown if any errors occurred during parsing. + +See [`parse`](@ref) for a more complete and powerful interface to the parser, +as well as a description of the `julia_version` and `rule` keywords. +""" +function parseall(::Type{T}, input; rule=:toplevel, julia_version=VERSION, + ignore_trivia=true) where {T} + stream = ParseStream(input; julia_version=julia_version) + do_skip_trivia = ignore_trivia && rule != :toplevel + if do_skip_trivia + bump_trivia(stream, skip_newlines=true) + end + parse(stream; rule=rule) + if do_skip_trivia + bump_trivia(stream, skip_newlines=true) + else + if peek(stream) != K"EndMarker" + throw(ArgumentError("Parsing did not terminate at end of input")) + end + end + if any_error(stream.diagnostics) + throw(ParseError(SourceFile(input), stream.diagnostics)) + elseif !isempty(stream.diagnostics) + # Crudely format any warnings to the logger. TODO: This should be + # neatened up to avoid the double-decorations. + buf = IOBuffer() + show_diagnostics(IOContext(buf, stdout), stream, SourceFile(input)) + @warn Text(String(take!(buf))) + end + # TODO: Figure out a more satisfying solution to the wrap_toplevel_as_kind + # mess that we've got here. + # * It's required for GreenNode, as GreenNode is useless without + # * Dropping it would be ok for SyntaxNode and Expr + build_tree(T, stream; wrap_toplevel_as_kind=K"toplevel") +end + diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index 6ccc1b51d6a4b..9bbdd605b9e6b 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -1,104 +1,3 @@ -#------------------------------------------------------------------------------- -# Syntax tree types - -#------------------------------------------------------------------------------- -# Flags hold auxilary information about tokens/nonterminals which the Kind -# doesn't capture in a nice way. -const RawFlags = UInt32 -const EMPTY_FLAGS = RawFlags(0) -const TRIVIA_FLAG = RawFlags(1<<0) -# Some of the following flags are head-specific and could probably be allowed -# to cover the same bits... -const INFIX_FLAG = RawFlags(1<<1) -# Record whether syntactic operators were dotted -const DOTOP_FLAG = RawFlags(1<<2) -# Set when kind == K"String" was triple-delimited as with """ or ``` -const TRIPLE_STRING_FLAG = RawFlags(1<<3) -# Set when the string is "raw" and needs minimal unescaping -const RAW_STRING_FLAG = RawFlags(1<<4) -# try-finally-catch -const TRY_CATCH_AFTER_FINALLY_FLAG = RawFlags(1<<5) -# Flags holding the dimension of an nrow or other UInt8 not held in the source -const NUMERIC_FLAGS = RawFlags(RawFlags(0xff)<<8) -# Todo ERROR_FLAG = 0x80000000 ? - -function set_numeric_flags(n::Integer) - f = RawFlags((n << 8) & NUMERIC_FLAGS) - if numeric_flags(f) != n - error("Numeric flags unable to hold large integer $n") - end - f -end - -function numeric_flags(f::RawFlags) - Int((f >> 8) % UInt8) -end - -# Return true if any of `test_flags` are set -has_flags(flags::RawFlags, test_flags) = (flags & test_flags) != 0 - -# Function for combining flags. (Do we want this?) -function flags(; trivia::Bool=false, - infix::Bool=false, - dotop::Bool=false, - try_catch_after_finally::Bool=false, - numeric::Int=0) - flags = RawFlags(0) - trivia && (flags |= TRIVIA_FLAG) - infix && (flags |= INFIX_FLAG) - dotop && (flags |= DOTOP_FLAG) - try_catch_after_finally && (flags |= TRY_CATCH_AFTER_FINALLY_FLAG) - numeric != 0 && (flags |= set_numeric_flags(numeric)) - return flags::RawFlags -end - -#------------------------------------------------------------------------------- -struct SyntaxHead - kind::Kind - flags::RawFlags -end - -kind(head::SyntaxHead) = head.kind -flags(head::SyntaxHead) = head.flags -has_flags(head::SyntaxHead, test_flags) = has_flags(flags(head), test_flags) - -is_trivia(head::SyntaxHead) = has_flags(head, TRIVIA_FLAG) -is_infix(head::SyntaxHead) = has_flags(head, INFIX_FLAG) -is_dotted(head::SyntaxHead) = has_flags(head, DOTOP_FLAG) -numeric_flags(head::SyntaxHead) = numeric_flags(flags(head)) -is_error(head::SyntaxHead) = kind(head) == K"error" - -function Base.summary(head::SyntaxHead) - _kind_str(kind(head)) -end - -function untokenize(head::SyntaxHead; include_flag_suff=true) - str = untokenize(kind(head)) - if is_dotted(head) - str = "."*str - end - if include_flag_suff && flags(head) ∉ (EMPTY_FLAGS, DOTOP_FLAG) - str = str*"-" - is_trivia(head) && (str = str*"t") - is_infix(head) && (str = str*"i") - has_flags(head, TRIPLE_STRING_FLAG) && (str = str*"s") - has_flags(head, RAW_STRING_FLAG) && (str = str*"r") - has_flags(head, TRY_CATCH_AFTER_FINALLY_FLAG) && (str = str*"f") - n = numeric_flags(head) - n != 0 && (str = str*string(n)) - end - str -end - -kind(node::GreenNode{SyntaxHead}) = head(node).kind -flags(node::GreenNode{SyntaxHead}) = head(node).flags - -is_infix(node) = is_infix(head(node)) - -# Value of an error node with no children -struct ErrorVal -end - #------------------------------------------------------------------------------- # AST interface, built on top of raw tree @@ -116,6 +15,10 @@ mutable struct SyntaxNode val::Any end +# Value of an error node with no children +struct ErrorVal +end + Base.show(io::IO, ::ErrorVal) = printstyled(io, "✘", color=:light_red) function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, position::Integer=1) @@ -308,6 +211,11 @@ end #------------------------------------------------------------------------------- # Tree utilities + +kind(node) = kind(head(node)) +flags(node) = flags(head(node)) +is_infix(node) = is_infix(head(node)) + """ child(node, i1, i2, ...) @@ -384,7 +292,7 @@ function is_eventually_call(ex) is_eventually_call(ex.args[1])) end -function _to_expr(node::SyntaxNode) +function _to_expr(node::SyntaxNode, iteration_spec=false) if !haschildren(node) if node.val isa Union{Int128,UInt128,BigInt} # Ignore the values of large integers and convert them back to @@ -401,17 +309,23 @@ function _to_expr(node::SyntaxNode) return node.val end end - args = Vector{Any}(undef, length(children(node))) - args = map!(_to_expr, args, children(node)) + headstr = untokenize(head(node), include_flag_suff=false) + headsym = !isnothing(headstr) ? Symbol(headstr) : + error("Can't untokenize head of kind $(kind(node))") + node_args = children(node) + args = Vector{Any}(undef, length(node_args)) + if headsym == :for && length(node_args) == 2 + args[1] = _to_expr(node_args[1], true) + args[2] = _to_expr(node_args[2], false) + else + map!(_to_expr, args, node_args) + end # Julia's standard `Expr` ASTs have children stored in a canonical # order which is often not always source order. We permute the children # here as necessary to get the canonical order. if is_infix(node.raw) args[2], args[1] = args[1], args[2] end - headstr = untokenize(head(node), include_flag_suff=false) - headsym = !isnothing(headstr) ? Symbol(headstr) : - error("Can't untokenize head of kind $(kind(node))") loc = source_location(LineNumberNode, node.source, node.position) # Convert elements if headsym == :macrocall @@ -473,7 +387,7 @@ function _to_expr(node::SyntaxNode) elseif headsym == :typed_ncat insert!(args, 2, numeric_flags(flags(node))) elseif headsym == :(=) - if is_eventually_call(args[1]) + if is_eventually_call(args[1]) && !iteration_spec if Meta.isexpr(args[2], :block) pushfirst!(args[2].args, loc) else @@ -502,44 +416,14 @@ Base.Expr(node::SyntaxNode) = _to_expr(node) #------------------------------------------------------------------------------- -function parse_all(::Type{SyntaxNode}, source::SourceFile) - stream = ParseStream(source.code) - parse_all(stream) - if !isempty(stream.diagnostics) - buf = IOBuffer() - show_diagnostics(IOContext(buf, stdout), stream, source.code) - @error Text(String(take!(buf))) - end - green_tree = build_tree(GreenNode, stream, wrap_toplevel_as_kind=K"toplevel") - SyntaxNode(source, green_tree) -end - -function parse_all(::Type{SyntaxNode}, code::AbstractString; filename="none") - parse_all(SyntaxNode, SourceFile(code, filename=filename)) -end - - -""" - parse_all(Expr, code::AbstractString; filename="none") - -Parse the given code and convert to a standard Expr -""" -function parse_all(::Type{Expr}, code::AbstractString; filename="none") - Expr(parse_all(SyntaxNode, code; filename=filename)) +function build_tree(::Type{SyntaxNode}, stream::ParseStream; filename="none", kws...) + green_tree = build_tree(GreenNode, stream; kws...) + code = String(copy(_code_buf(stream))) + source = SourceFile(code, filename=filename) + SyntaxNode(source, green_tree, first_byte(stream)) end -function remove_linenums!(ex) - ex = Base.remove_linenums!(ex) - if Meta.isexpr(ex, :toplevel) - filter!(x->!(x isa LineNumberNode), ex.args) - end - ex +function build_tree(::Type{Expr}, stream::ParseStream; kws...) + Expr(build_tree(SyntaxNode, stream; kws...)) end -function flisp_parse_all(code; filename="none") - if VERSION >= v"1.6" - Meta.parseall(code, filename=filename) - else - Base.parse_input_line(code, filename=filename) - end -end diff --git a/JuliaSyntax/src/utils.jl b/JuliaSyntax/src/utils.jl index 43d1b2320a7c5..be7baa7f89d47 100644 --- a/JuliaSyntax/src/utils.jl +++ b/JuliaSyntax/src/utils.jl @@ -16,3 +16,21 @@ function _printstyled(io::IO, text; color) end end +function flisp_parse_all(code; filename="none") + if VERSION >= v"1.6" + Meta.parseall(code, filename=filename) + else + # This is approximate. It should work for well-formed code. + Base.parse_input_line(code, filename=filename) + end +end + +# Really remove line numbers, even from Expr(:toplevel) +function remove_linenums!(ex) + ex = Base.remove_linenums!(ex) + if Meta.isexpr(ex, :toplevel) + filter!(x->!(x isa LineNumberNode), ex.args) + end + ex +end + diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 3a72bab864d9b..50cca765464e5 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -1,6 +1,6 @@ function test_parse(production, code; v=v"1.6") - stream = ParseStream(code) - production(JuliaSyntax.ParseState(stream, v)) + stream = ParseStream(code, julia_version=v) + production(JuliaSyntax.ParseState(stream)) t = JuliaSyntax.build_tree(GreenNode, stream, wrap_toplevel_as_kind=K"Nothing") source = SourceFile(code) s = SyntaxNode(source, t) @@ -16,6 +16,11 @@ end # * Use only the green tree to generate the S-expressions # (add flag annotations to heads) tests = [ + JuliaSyntax.parse_toplevel => [ + "a \n b" => "(toplevel a b)" + "a;b \n c;d" => "(toplevel (toplevel a b) (toplevel c d))" + "a \n \n" => "(toplevel a)" + ], JuliaSyntax.parse_block => [ "a;b;c" => "(block a b c)" "a;;;b;;" => "(block a b)" @@ -668,6 +673,5 @@ end s end """ - ex = JuliaSyntax.parse_all(Expr, code) - @test ex == JuliaSyntax.remove_linenums!(JuliaSyntax.flisp_parse_all(code)) + @test parseall(Expr, code) == JuliaSyntax.remove_linenums!(JuliaSyntax.flisp_parse_all(code)) end diff --git a/JuliaSyntax/test/runtests.jl b/JuliaSyntax/test/runtests.jl index 0aef593c320c8..44d2280daf9fb 100644 --- a/JuliaSyntax/test/runtests.jl +++ b/JuliaSyntax/test/runtests.jl @@ -5,7 +5,7 @@ using JuliaSyntax: SourceFile using JuliaSyntax: GreenNode, SyntaxNode, flags, EMPTY_FLAGS, TRIVIA_FLAG, INFIX_FLAG, - children, child, setchild!, SyntaxHead, parse_all + children, child, setchild!, SyntaxHead module TokenizeTests using Test @@ -16,8 +16,8 @@ end include("test_utils.jl") include("parse_stream.jl") -include("syntax_tree.jl") include("parser.jl") +include("syntax_tree.jl") @testset "Parsing values from strings" begin include("value_parsing.jl") diff --git a/JuliaSyntax/test/syntax_tree.jl b/JuliaSyntax/test/syntax_tree.jl index 66e4e329f56dc..66b8ebd71070d 100644 --- a/JuliaSyntax/test/syntax_tree.jl +++ b/JuliaSyntax/test/syntax_tree.jl @@ -1,10 +1,24 @@ -@testset "Parse tree conversion" begin +@testset "Expr conversion" begin @testset "Quote nodes" begin - @test Expr(child(parse_all(SyntaxNode, ":(a)"), 1)) == QuoteNode(:a) - @test Expr(child(parse_all(SyntaxNode, ":(:a)"), 1)) == - Expr(:quote, QuoteNode(:a)) - @test Expr(child(parse_all(SyntaxNode, ":(1+2)"), 1)) == - Expr(:quote, Expr(:call, :+, 1, 2)) + @test parseall(Expr, ":(a)", rule=:atom) == QuoteNode(:a) + @test parseall(Expr, ":(:a)", rule=:atom) == Expr(:quote, QuoteNode(:a)) + @test parseall(Expr, ":(1+2)", rule=:atom) == Expr(:quote, Expr(:call, :+, 1, 2)) + end + + @testset "Short form function line numbers" begin + # A block is added to hold the line number node + @test parseall(Expr, "f() = xs", rule=:statement) == + Expr(:(=), + Expr(:call, :f), + Expr(:block, + LineNumberNode(1, :none), + :xs)) + # flisp parser quirk: In a for loop the block is not added, despite + # this defining a short-form function. + @test parseall(Expr, "for f() = xs\nend", rule=:statement) == + Expr(:for, + Expr(:(=), Expr(:call, :f), :xs), + Expr(:block)) end end diff --git a/JuliaSyntax/test/test_utils.jl b/JuliaSyntax/test/test_utils.jl index 83dc890388d97..17cab49a1dcba 100644 --- a/JuliaSyntax/test/test_utils.jl +++ b/JuliaSyntax/test/test_utils.jl @@ -7,7 +7,8 @@ using JuliaSyntax: # Parsing ParseStream, SourceFile, - parse_all, + parse, + parseall, @K_str, # Nodes GreenNode, @@ -39,7 +40,7 @@ end function parsers_agree_on_file(path) code = read(path, String) - JuliaSyntax.remove_linenums!(parse_all(Expr, code)) == + JuliaSyntax.remove_linenums!(parseall(Expr, code)) == JuliaSyntax.remove_linenums!(flisp_parse_all(code)) end @@ -119,8 +120,8 @@ Parse `code`, entering the recursive descent parser at the given function for debugging. """ function itest_parse(production, code; julia_version::VersionNumber=v"1.6") - stream = ParseStream(code) - production(JuliaSyntax.ParseState(stream, julia_version)) + stream = ParseStream(code; julia_version=julia_version) + production(JuliaSyntax.ParseState(stream)) t = JuliaSyntax.build_tree(GreenNode, stream, wrap_toplevel_as_kind=K"toplevel") println(stdout, "# Code:\n$code\n") From cb1200506a2914aa332fbcdd36b1f584cb95896a Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Tue, 25 Jan 2022 22:28:37 +1000 Subject: [PATCH 0340/1109] Tokenize: Always treat .&& and .|| as tokens In https://github.com/JuliaLang/julia/pull/39594 it was considered non-breaking enough to change the tokenization of ".&&" from `.&`,`&` to `.&&`. For JuliaSyntax it seems simplest to just use the newer tokenization for this and detect the use of the syntax in the parser, emitting an error on older versions. This leads to the most neat predictable parser errors. --- JuliaSyntax/Tokenize/src/lexer.jl | 23 ++++------------------- JuliaSyntax/Tokenize/src/utilities.jl | 7 ------- JuliaSyntax/Tokenize/test/lexer.jl | 12 +++++------- 3 files changed, 9 insertions(+), 33 deletions(-) diff --git a/JuliaSyntax/Tokenize/src/lexer.jl b/JuliaSyntax/Tokenize/src/lexer.jl index 9cbce60cc0224..afbc69e27c13a 100644 --- a/JuliaSyntax/Tokenize/src/lexer.jl +++ b/JuliaSyntax/Tokenize/src/lexer.jl @@ -1,11 +1,5 @@ module Lexers -@static if Meta.parse("a .&& b").args[1] != :.& - const CAN_DOT_LAZY_AND_OR = true -else - const CAN_DOT_LAZY_AND_OR = false -end - include("utilities.jl") import ..Tokens @@ -1045,10 +1039,8 @@ function lex_dot(l::Lexer) if accept(l, "=") return emit(l, Tokens.AND_EQ) else - @static if CAN_DOT_LAZY_AND_OR - if accept(l, "&") - return emit(l, Tokens.LAZY_AND) - end + if accept(l, "&") + return emit(l, Tokens.LAZY_AND) end return emit(l, Tokens.AND) end @@ -1061,17 +1053,10 @@ function lex_dot(l::Lexer) readchar(l) return lex_equal(l) elseif pc == '|' - @static if !CAN_DOT_LAZY_AND_OR - if dpc == '|' - return emit(l, Tokens.DOT) - end - end l.dotop = true readchar(l) - @static if CAN_DOT_LAZY_AND_OR - if accept(l, "|") - return emit(l, Tokens.LAZY_OR) - end + if accept(l, "|") + return emit(l, Tokens.LAZY_OR) end return lex_bar(l) elseif pc == '!' && dpc == '=' diff --git a/JuliaSyntax/Tokenize/src/utilities.jl b/JuliaSyntax/Tokenize/src/utilities.jl index 7cf67536c5334..173d330a16a7c 100644 --- a/JuliaSyntax/Tokenize/src/utilities.jl +++ b/JuliaSyntax/Tokenize/src/utilities.jl @@ -379,19 +379,12 @@ end 0x0000a71b <= c <= 0x0000a71d end - function optakessuffix(k) (Tokens.begin_ops < k < Tokens.end_ops) && !( k == Tokens.DDDOT || Tokens.begin_assignments <= k <= Tokens.end_assignments || k == Tokens.CONDITIONAL || - @static(if !CAN_DOT_LAZY_AND_OR - k == Tokens.LAZY_OR || - k == Tokens.LAZY_AND - else - false - end) || k == Tokens.ISSUBTYPE || k == Tokens.ISSUPERTYPE || k == Tokens.IN || diff --git a/JuliaSyntax/Tokenize/test/lexer.jl b/JuliaSyntax/Tokenize/test/lexer.jl index 24b41f2da7112..ab9e0d02529d4 100644 --- a/JuliaSyntax/Tokenize/test/lexer.jl +++ b/JuliaSyntax/Tokenize/test/lexer.jl @@ -166,13 +166,8 @@ end @test tok("1 in 2", 3).kind == T.IN @test tok("1 in[1]", 3).kind == T.IN - if VERSION >= v"0.6.0-dev.1471" - @test tok("1 isa 2", 3).kind == T.ISA - @test tok("1 isa[2]", 3).kind == T.ISA - else - @test tok("1 isa 2", 3).kind == T.IDENTIFIER - @test tok("1 isa[2]", 3).kind == T.IDENTIFIER - end + @test tok("1 isa 2", 3).kind == T.ISA + @test tok("1 isa[2]", 3).kind == T.ISA end @testset "tokenizing true/false literals" begin @@ -629,6 +624,9 @@ for op in ops for (arity, container) in strs for str in container expr = Meta.parse(str, raise = false) + if VERSION < v"1.7" && str == "a .&& b" + expr = Expr(Symbol(".&&"), :a, :b) + end if expr isa Expr && (expr.head != :error && expr.head != :incomplete) tokens = collect(tokenize(str)) exop = expr.head == :call ? expr.args[1] : expr.head From ef0b121d9e2038de6df807ce54c1b47dc798449a Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Tue, 25 Jan 2022 22:34:55 +1000 Subject: [PATCH 0341/1109] Version compatibility for `.&&` and `.||` syntax: set to >= 1.7 Also replace the julia_version keyword with simply `version` in various places and simplify the way that restricted version comparison with (major,minor) numbers is done. --- JuliaSyntax/src/parse_stream.jl | 25 +++++++++++------- JuliaSyntax/src/parser.jl | 47 +++++++++++++++++++-------------- JuliaSyntax/src/parser_api.jl | 18 ++++++------- JuliaSyntax/test/parser.jl | 8 +++--- JuliaSyntax/test/test_utils.jl | 6 ++--- 5 files changed, 59 insertions(+), 45 deletions(-) diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index a3f35862e44c4..40129d3af015f 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -177,22 +177,27 @@ mutable struct ParseStream next_byte::Int # Counter for number of peek()s we've done without making progress via a bump() peek_count::Int - # Vesion of Julia we're parsing this code for. May be different from VERSION! - julia_version_major::Int - julia_version_minor::Int -end - -function ParseStream(code::Base.GenericIOBuffer; julia_version=VERSION) - next_byte = position(code)+1 - lexer = Tokenize.tokenize(code, RawToken) + # (major,minor) version of Julia we're parsing this code for. + # May be different from VERSION! + version::Tuple{Int,Int} +end + +function ParseStream(io::Base.GenericIOBuffer; version=VERSION) + next_byte = position(io)+1 + lexer = Tokenize.Lexers.Lexer(io, RawToken) + # To avoid keeping track of the exact Julia development version where new + # features were added or comparing prerelease strings, we treat prereleases + # or dev versons as the release version using only major and minor version + # numbers. This means we're inexact for old dev versions but that seems + # like an acceptable tradeoff. + ver = (version.major, version.minor) ParseStream(lexer, Vector{SyntaxToken}(), Vector{TaggedRange}(), Vector{Diagnostic}(), next_byte, 0, - julia_version.major, - julia_version.minor) + ver) end function ParseStream(code::AbstractString; kws...) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 1fb1b9f8ba70a..8ceb760c63567 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -166,26 +166,14 @@ function recover(is_closer::Function, ps, flags=EMPTY_FLAGS; mark = position(ps) end @noinline function min_supported_version_err(ps, mark, message, min_ver) - major = ps.stream.julia_version_major - minor = ps.stream.julia_version_minor - msg = "$message is not supported in Julia version $major.$minor < $(min_ver)" + major, minor = ps.stream.version + msg = "$message not supported in Julia version $major.$minor < $(min_ver.major).$(min_ver.minor)" emit(ps, mark, K"error", error=msg) end -function version_lessthan(ps, ver) - # To avoid keeping track of the exact Julia development version where new - # features were added or comparing prerelease strings, we treat prereleases - # or dev versons as the release version using only major and minor version - # numbers. This means we're inexact for old dev versions but that seems - # like an acceptable tradeoff. - major = ps.stream.julia_version_major - minor = ps.stream.julia_version_minor - major < ver.major || (major == ver.major && minor < ver.minor) -end - # Emit an error if the version is less than `min_ver` function min_supported_version(min_ver, ps, mark, message) - if version_lessthan(ps, min_ver) + if ps.stream.version < (min_ver.major, min_ver.minor) min_supported_version_err(ps, mark, message, min_ver) end end @@ -679,20 +667,39 @@ function parse_arrow(ps::ParseState) parse_RtoL(ps, parse_or, is_prec_arrow, ==(K"-->"), parse_arrow) end +# Like parse_RtoL, but specialized for the version test of dotted operators. +function parse_lazy_cond(ps::ParseState, down, is_op, self) + mark = position(ps) + down(ps) + t = peek_token(ps) + k = kind(t) + if is_op(k) + bump(ps, TRIVIA_FLAG) + self(ps) + emit(ps, mark, k, flags(t)) + if is_dotted(t) + min_supported_version(v"1.7", ps, mark, "dotted operators `.||` and `.&&`") + end + end +end + + # x || y || z ==> (|| x (|| y z)) -# x .|| y ==> (.|| x y) +#v1.6: x .|| y ==> (error (.|| x y)) +#v1.7: x .|| y ==> (.|| x y) # # flisp: parse-or function parse_or(ps::ParseState) - parse_RtoL(ps, parse_and, is_prec_lazy_or, true, parse_or) + parse_lazy_cond(ps, parse_and, is_prec_lazy_or, parse_or) end # x && y && z ==> (&& x (&& y z)) -# x .&& y ==> (.&& x y) +#v1.6: x .&& y ==> (error (.&& x y)) +#v1.7: x .&& y ==> (.&& x y) # # flisp: parse-and function parse_and(ps::ParseState) - parse_RtoL(ps, parse_comparison, is_prec_lazy_and, true, parse_and) + parse_lazy_cond(ps, parse_comparison, is_prec_lazy_and, parse_and) end # Parse binary comparisons and comparison chains @@ -2933,7 +2940,7 @@ function parse_string(ps::ParseState) # "a $(x + y) b" ==> (string "a " (call-i x + y) " b") m = position(ps) parse_atom(ps) - if !version_lessthan(ps, v"1.6") + if ps.stream.version >= (1,6) # https://github.com/JuliaLang/julia/pull/38692 prev = peek_behind(ps) if prev.kind == K"String" diff --git a/JuliaSyntax/src/parser_api.jl b/JuliaSyntax/src/parser_api.jl index ec0c7d72e3017..f66ac1c0bc06b 100644 --- a/JuliaSyntax/src/parser_api.jl +++ b/JuliaSyntax/src/parser_api.jl @@ -63,7 +63,7 @@ Base.display_error(io::IO, err::ParseError, bt) = Base.showerror(io, err, bt) (tree, diagnostics) = parse(TreeType, io::IOBuffer; kws...) (tree, diagnostics, index) = parse(TreeType, str::AbstractString, [index::Integer]; kws...) # Keywords - parse(...; rule=:toplevel, julia_version=VERSION, ignore_trivia=true) + parse(...; rule=:toplevel, version=VERSION, ignore_trivia=true) Parse Julia source code from `input`, returning the output in a format compatible with `input`: @@ -83,10 +83,10 @@ compatible with `input`: * `atom` — parse a single syntax "atom": a literal, identifier, or parenthesized expression. -`julia_version` (default `VERSION`) may be used to set the syntax version to +`version` (default `VERSION`) may be used to set the syntax version to any Julia version `>= v"1.0"`. We aim to parse all Julia syntax which has been added after v"1.0", emitting an error if it's not compatible with the requested -`julia_version`. +`version`. See also [`parseall`](@ref) for a simpler but less powerful interface. """ @@ -105,8 +105,8 @@ function parse(stream::ParseStream; rule::Symbol=:toplevel) end function parse(::Type{T}, io::Base.GenericIOBuffer; - rule::Symbol=:toplevel, julia_version=VERSION, kws...) where {T} - stream = ParseStream(io; julia_version=julia_version) + rule::Symbol=:toplevel, version=VERSION, kws...) where {T} + stream = ParseStream(io; version=version) parse(stream; rule=rule) tree = build_tree(T, stream; kws...) seek(io, stream.next_byte-1) @@ -124,7 +124,7 @@ end """ parseall(TreeType, input; rule=:toplevel, - julia_version=VERSION, + version=VERSION, ignore_trivia=true) Experimental convenience interface to parse `input` as Julia code, emitting an @@ -135,11 +135,11 @@ by setting `ignore_trivia=false`. A `ParseError` will be thrown if any errors occurred during parsing. See [`parse`](@ref) for a more complete and powerful interface to the parser, -as well as a description of the `julia_version` and `rule` keywords. +as well as a description of the `version` and `rule` keywords. """ -function parseall(::Type{T}, input; rule=:toplevel, julia_version=VERSION, +function parseall(::Type{T}, input; rule=:toplevel, version=VERSION, ignore_trivia=true) where {T} - stream = ParseStream(input; julia_version=julia_version) + stream = ParseStream(input; version=version) do_skip_trivia = ignore_trivia && rule != :toplevel if do_skip_trivia bump_trivia(stream, skip_newlines=true) diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 50cca765464e5..008ec0cffc376 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -1,5 +1,5 @@ function test_parse(production, code; v=v"1.6") - stream = ParseStream(code, julia_version=v) + stream = ParseStream(code, version=v) production(JuliaSyntax.ParseState(stream)) t = JuliaSyntax.build_tree(GreenNode, stream, wrap_toplevel_as_kind=K"Nothing") source = SourceFile(code) @@ -65,11 +65,13 @@ tests = [ ], JuliaSyntax.parse_or => [ "x || y || z" => "(|| x (|| y z))" - "x .|| y" => "(.|| x y)" + ((v=v"1.6",), "x .|| y") => "(error (.|| x y))" + ((v=v"1.7",), "x .|| y") => "(.|| x y)" ], JuliaSyntax.parse_and => [ "x && y && z" => "(&& x (&& y z))" - "x .&& y" => "(.&& x y)" + ((v=v"1.6",), "x .&& y") => "(error (.&& x y))" + ((v=v"1.7",), "x .&& y") => "(.&& x y)" ], JuliaSyntax.parse_comparison => [ # Type comparisons are syntactic diff --git a/JuliaSyntax/test/test_utils.jl b/JuliaSyntax/test/test_utils.jl index 17cab49a1dcba..ac10895c08060 100644 --- a/JuliaSyntax/test/test_utils.jl +++ b/JuliaSyntax/test/test_utils.jl @@ -113,14 +113,14 @@ end #------------------------------------------------------------------------------- """ - itest_parse(production, code; julia_version::VersionNumber=v"1.6") + itest_parse(production, code; version::VersionNumber=v"1.6") Parse `code`, entering the recursive descent parser at the given function `production`. This function shows the various tree representations on stdout for debugging. """ -function itest_parse(production, code; julia_version::VersionNumber=v"1.6") - stream = ParseStream(code; julia_version=julia_version) +function itest_parse(production, code; version::VersionNumber=v"1.6") + stream = ParseStream(code; version=version) production(JuliaSyntax.ParseState(stream)) t = JuliaSyntax.build_tree(GreenNode, stream, wrap_toplevel_as_kind=K"toplevel") From 84b21c6c594279acf78c8ce7e99285913bcfeb67 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Tue, 25 Jan 2022 23:47:47 +1000 Subject: [PATCH 0342/1109] Fix parsing of nested flattened generators --- JuliaSyntax/src/parser.jl | 22 +++++++++++++++------- JuliaSyntax/src/syntax_tree.jl | 4 ++-- JuliaSyntax/test/parse_packages.jl | 11 +++++++++-- JuliaSyntax/test/parser.jl | 17 +++++++++++------ 4 files changed, 37 insertions(+), 17 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 8ceb760c63567..7119f9716e5da 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -2479,15 +2479,16 @@ end # y in ys) # x in xs)) # -# A reasonable way to deal with this is to emit only the flatten: +# We deal with this by only emitting the flatten: # # (flatten xy (= x xs) (= y ys)) # -# then reconstruct the nested generators when converting to Expr. +# then reconstructing the nested flattens and generators when converting to Expr. +# +# [x for a = as for b = bs if cond1 for c = cs if cond2] ==> (comprehension (flatten x (= a as) (filter (= b bs) cond1) (filter (= c cs) cond2))) # # flisp: parse-generator function parse_generator(ps::ParseState, mark, flatten=false) - # (x for x in xs) ==> (generator x (= x xs)) t = peek_token(ps) if !t.had_whitespace # [(x)for x in xs] ==> (comprehension (generator x (error) (= x xs))) @@ -2499,16 +2500,21 @@ function parse_generator(ps::ParseState, mark, flatten=false) filter_mark = position(ps) parse_comma_separated(ps, parse_iteration_spec) if peek(ps) == K"if" + # (a for x in xs if cond) ==> (generator a (filter (= x xs) cond)) bump(ps, TRIVIA_FLAG) parse_cond(ps) emit(ps, filter_mark, K"filter") end t = peek_token(ps) if kind(t) == K"for" - # [xy for x in xs for y in ys] ==> (comprehension (flatten xy (= x xs) (= y ys))) + # (xy for x in xs for y in ys) ==> (flatten xy (= x xs) (= y ys)) + # (xy for x in xs for y in ys for z in zs) ==> (flatten xy (= x xs) (= y ys) (= z zs)) parse_generator(ps, mark, true) - emit(ps, mark, K"flatten") + if !flatten + emit(ps, mark, K"flatten") + end elseif !flatten + # (x for a in as) ==> (generator x (= a as)) emit(ps, mark, K"generator") end end @@ -2709,8 +2715,8 @@ function parse_cat(ps::ParseState, closer, end_is_symbol) # [x \n ] ==> (vect x) parse_vect(ps, closer) elseif k == K"for" - # [x for x in xs] ==> (comprehension (generator x (= x xs))) - # [x \n\n for x in xs] ==> (comprehension (generator x (= x xs))) + # [x for a in as] ==> (comprehension (generator x (= a as))) + # [x \n\n for a in as] ==> (comprehension (generator x (= a as))) parse_comprehension(ps, mark, closer) else # [x y] ==> (hcat x y) @@ -2883,6 +2889,8 @@ function parse_brackets(after_parse::Function, continue elseif k == K"for" # Generator syntax + # (x for a in as) ==> (generator x (= a as)) + # (x \n\n for a in as) ==> (generator x (= a as)) parse_generator(ps, mark) else k_str = untokenize(k) diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index 9bbdd605b9e6b..b0c64c6155330 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -377,9 +377,9 @@ function _to_expr(node::SyntaxNode, iteration_spec=false) # Julia's AST here from our alternative `flatten` expression. gen = Expr(:generator, args[1], args[end]) for i in length(args)-1:-1:2 - gen = Expr(:generator, gen, args[i]) + gen = Expr(:flatten, Expr(:generator, gen, args[i])) end - args = [gen] + return gen elseif headsym in (:nrow, :ncat) # For lack of a better place, the dimension argument to nrow/ncat # is stored in the flags diff --git a/JuliaSyntax/test/parse_packages.jl b/JuliaSyntax/test/parse_packages.jl index b46681b15a150..69fbbd2175370 100644 --- a/JuliaSyntax/test/parse_packages.jl +++ b/JuliaSyntax/test/parse_packages.jl @@ -11,12 +11,18 @@ base_path = joinpath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "base") test_parse_all_in_path(base_path) end -#= +if haskey(ENV, "PARSE_BASE_TEST") +# TODO: Turn on by default + base_tests_path = joinpath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "test") @testset "Parse Base tests at $base_tests_path" begin test_parse_all_in_path(base_tests_path) end +end +if haskey(ENV, "PARSE_STDLIB") +# TODO: Turn on by default + @testset "Parse Julia stdlib at $(Sys.STDLIB)" begin for stdlib in readdir(Sys.STDLIB) fulldir = joinpath(Sys.STDLIB, stdlib) @@ -27,4 +33,5 @@ end end end end -=# + +end diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 008ec0cffc376..c0e540ed96884 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -516,7 +516,8 @@ tests = [ "(x)" => "x" "(a...)" => "(... a)" # Generators - "(x for x in xs)" => "(generator x (= x xs))" + "(x for a in as)" => "(generator x (= a as))" + "(x \n\n for a in as)" => "(generator x (= a as))" ], JuliaSyntax.parse_atom => [ ":foo" => "(quote foo)" @@ -551,11 +552,15 @@ tests = [ "[x]" => "(vect x)" "[x \n ]" => "(vect x)" "[x \n\n ]" => "(vect x)" - # parse_comprehension / parse_generator - "[x for x in xs]" => "(comprehension (generator x (= x xs)))" - "[x \n\n for x in xs]" => "(comprehension (generator x (= x xs)))" - "[(x)for x in xs]" => "(comprehension (generator x (error) (= x xs)))" - "[xy for x in xs for y in ys]" => "(comprehension (flatten xy (= x xs) (= y ys)))" + "[x for a in as]" => "(comprehension (generator x (= a as)))" + "[x \n\n for a in as]" => "(comprehension (generator x (= a as)))" + # parse_generator + "[x for a = as for b = bs if cond1 for c = cs if cond2]" => "(comprehension (flatten x (= a as) (filter (= b bs) cond1) (filter (= c cs) cond2)))" + "[(x)for x in xs]" => "(comprehension (generator x (error) (= x xs)))" + "(a for x in xs if cond)" => "(generator a (filter (= x xs) cond))" + "(xy for x in xs for y in ys)" => "(flatten xy (= x xs) (= y ys))" + "(xy for x in xs for y in ys for z in zs)" => "(flatten xy (= x xs) (= y ys) (= z zs))" + "(x for a in as)" => "(generator x (= a as))" # parse_vect "[x, y]" => "(vect x y)" "[x, y]" => "(vect x y)" From 35fa9a45867353b3266e1954373096b574d9d533 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Wed, 26 Jan 2022 00:23:28 +1000 Subject: [PATCH 0343/1109] Fix raw string escaping prior to closing delimiter --- JuliaSyntax/src/value_parsing.jl | 14 +++++++------- JuliaSyntax/test/value_parsing.jl | 4 ++++ 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/JuliaSyntax/src/value_parsing.jl b/JuliaSyntax/src/value_parsing.jl index 7b5b08a50d1ef..aa23dc9c9a838 100644 --- a/JuliaSyntax/src/value_parsing.jl +++ b/JuliaSyntax/src/value_parsing.jl @@ -101,22 +101,22 @@ function unescape_raw_string(io::IO, str::AbstractString, is_cmd::Bool, dedent:: end # Process \ escape sequences j = i - while str[j] == '\\' && j <= lastidx + while j <= lastidx && str[j] == '\\' j += 1 end - ndelim = j - i - if j <= lastidx && str[j] == delim - # Escaping a delimiter - ndelim = div(ndelim,2) + nbackslash = j - i + if (j <= lastidx && str[j] == delim) || j > lastidx + # Backslashes before a delimiter must also be escaped + nbackslash = div(nbackslash,2) end - for k = 1:ndelim + for k = 1:nbackslash write(io, '\\') end i = j if i <= lastidx write(io, str[i]) + i = nextind(str, i) end - i = nextind(str, i) end end diff --git a/JuliaSyntax/test/value_parsing.jl b/JuliaSyntax/test/value_parsing.jl index 24879aaecf0eb..44f8dcb8423f5 100644 --- a/JuliaSyntax/test/value_parsing.jl +++ b/JuliaSyntax/test/value_parsing.jl @@ -184,6 +184,10 @@ end @test unescape_julia_string("x\\\\\\`x", false, true) == "x\\\\\\`x" # '\\ ' ==> '\\ ' @test unescape_julia_string("\\\\ ", false, true) == "\\\\ " + # '\\' ==> '\' + @test unescape_julia_string("\\\\", false, true) == "\\" + # '\\\\' ==> '\\' + @test unescape_julia_string("\\\\\\\\", false, true) == "\\\\" # ` delimited # x\"x ==> x\"x From 7cad4a3da9a73e8e7f2331e5cbc1e84282ebb742 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Fri, 28 Jan 2022 23:04:12 +1000 Subject: [PATCH 0344/1109] Make parser API input and ParseStream text input coherent Internally in the parser we'd like to work with a string of UTF-8 encoded source code, as we'd like random access. We'd also not like to copy the text before parsing it. However some objects like IOBuffer work with a buffer of bytes rather than a string, and a string can't be constructed without copying the bytes. The best common, concrete and no-copy representation seems to be Vector{UInt8}, so settle on this for the concrete representation of code. (This is also easy to pass to the current Lexer implementation.) With this convention, make the ParseStream, parse and parseall functions accept any of a family of types, all of which won't be copied: - (sub)strings and buffers - IOs - Raw ptr,len textbuf::Vector{UInt8} text_root::Any --- JuliaSyntax/Project.toml | 3 + JuliaSyntax/src/JuliaSyntax.jl | 2 + JuliaSyntax/src/parse_stream.jl | 131 +++++++++++++++++++++++++------- JuliaSyntax/src/parser_api.jl | 66 ++++++++-------- JuliaSyntax/src/syntax_tree.jl | 3 +- JuliaSyntax/test/parser_api.jl | 70 +++++++++++++++++ JuliaSyntax/test/runtests.jl | 1 + JuliaSyntax/test/test_utils.jl | 1 + 8 files changed, 217 insertions(+), 60 deletions(-) create mode 100644 JuliaSyntax/test/parser_api.jl diff --git a/JuliaSyntax/Project.toml b/JuliaSyntax/Project.toml index 1232839630e8d..7025d9af5f2c3 100644 --- a/JuliaSyntax/Project.toml +++ b/JuliaSyntax/Project.toml @@ -6,6 +6,9 @@ version = "0.1.0" [compat] julia = "1.4" +[deps] +Mmap = "a63ad114-7e13-5084-954f-fe012c677804" + [extras] Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" diff --git a/JuliaSyntax/src/JuliaSyntax.jl b/JuliaSyntax/src/JuliaSyntax.jl index a7bfedd1b05cb..aa20aaa64ac67 100644 --- a/JuliaSyntax/src/JuliaSyntax.jl +++ b/JuliaSyntax/src/JuliaSyntax.jl @@ -1,5 +1,7 @@ module JuliaSyntax +using Mmap + # Internal utilities which aren't related to JuliaSyntax per se. include("utils.jl") diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index 40129d3af015f..4b8b836460f68 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -165,6 +165,17 @@ ParseStream provides an IO interface for the parser. It output implicitly (newlines may be significant depending on `skip_newlines`) """ mutable struct ParseStream + # `textbuf` is a buffer of UTF-8 encoded text of the source code. This is a + # natural representation as we desire random access and zero-copy parsing + # of UTF-8 text from various containers, and unsafe_wrap(Vector{UInt8}, + # ...) allows us to use a Vector here. + # + # We want `ParseStream` to be concrete so that all `parse_*` functions only + # need to be compiled once. Thus `textbuf` must not be parameterized here. + textbuf::Vector{UInt8} + # GC root for the object which owns the memory in `textbuf`. `nothing` if + # the `textbuf` owner was unknown (eg, ptr,length was passed) + text_root::Any # Lexer, transforming the input bytes into a token stream lexer::Tokenize.Lexers.Lexer{IOBuffer,RawToken} # Lookahead buffer for already lexed tokens @@ -180,29 +191,68 @@ mutable struct ParseStream # (major,minor) version of Julia we're parsing this code for. # May be different from VERSION! version::Tuple{Int,Int} + + function ParseStream(text_buf::Vector{UInt8}, text_root, next_byte::Integer, + version::VersionNumber) + io = IOBuffer(text_buf) + seek(io, next_byte-1) + lexer = Tokenize.Lexers.Lexer(io, RawToken) + # To avoid keeping track of the exact Julia development version where new + # features were added or comparing prerelease strings, we treat prereleases + # or dev versons as the release version using only major and minor version + # numbers. This means we're inexact for old dev versions but that seems + # like an acceptable tradeoff. + ver = (version.major, version.minor) + new(text_buf, text_root, lexer, + Vector{SyntaxToken}(), + Vector{TaggedRange}(), + Vector{Diagnostic}(), + next_byte, + 0, + ver) + end +end + +function ParseStream(text::Vector{UInt8}, index::Integer=1; version=VERSION) + ParseStream(text, text, index, version) end +# Buffer with unknown owner. Not exactly recommended, but good for C interop +function ParseStream(ptr::Ptr{UInt8}, len::Integer, index::Integer=1; version=VERSION) + ParseStream(unsafe_wrap(Vector{UInt8}, ptr, len), nothing, index, version) +end + +# Buffers originating from strings +function ParseStream(text::String, index::Integer=1; version=VERSION) + ParseStream(unsafe_wrap(Vector{UInt8}, text), + text, index, version) +end +function ParseStream(text::SubString, index::Integer=1; version=VERSION) + # See also IOBuffer(SubString("x")) + ParseStream(unsafe_wrap(Vector{UInt8}, pointer(text), length(text)), + text, index, version) +end +function ParseStream(text::AbstractString, index::Integer=1; version=VERSION) + ParseStream(String(text), index; version=version) +end + +# IO-based cases +function ParseStream(io::IOBuffer; version=VERSION) + ParseStream(io.data, io, position(io)+1, version) +end function ParseStream(io::Base.GenericIOBuffer; version=VERSION) - next_byte = position(io)+1 - lexer = Tokenize.Lexers.Lexer(io, RawToken) - # To avoid keeping track of the exact Julia development version where new - # features were added or comparing prerelease strings, we treat prereleases - # or dev versons as the release version using only major and minor version - # numbers. This means we're inexact for old dev versions but that seems - # like an acceptable tradeoff. - ver = (version.major, version.minor) - ParseStream(lexer, - Vector{SyntaxToken}(), - Vector{TaggedRange}(), - Vector{Diagnostic}(), - next_byte, - 0, - ver) -end - -function ParseStream(code::AbstractString; kws...) - ParseStream(IOBuffer(code); kws...) + textbuf = unsafe_wrap(Vector{UInt8}, pointer(io.data), length(io.data)) + ParseStream(textbuf, io, position(io)+1, version) +end +function ParseStream(io::IOStream; version=VERSION) + textbuf = Mmap.mmap(io) + ParseStream(textbuf, io, position(io)+1, version) end +function ParseStream(io::IO; version=VERSION) + textbuf = read(io) + ParseStream(textbuf, textbuf, 1, version) +end + function Base.show(io::IO, mime::MIME"text/plain", stream::ParseStream) println(io, "ParseStream at position $(stream.next_byte)") @@ -283,17 +333,10 @@ function peek_token(stream::ParseStream, n::Integer=1; skip_newlines=false) stream.lookahead[_lookahead_index(stream, n, skip_newlines)] end -function _code_buf(stream) - # TODO: Peeking at the underlying data buffer inside the lexer is an awful - # hack. We should find a better way to do this kind of thing. - stream.lexer.io.data -end - function _peek_equal_to(stream, first_byte, len, str) - buf = _code_buf(stream) cbuf = codeunits(str) for i = 1:len - if buf[first_byte + i - 1] != cbuf[i] + if stream.textbuf[first_byte + i - 1] != cbuf[i] return false end end @@ -563,6 +606,8 @@ end #------------------------------------------------------------------------------- # Tree construction from the list of text ranges held by ParseStream +# API for extracting results from ParseStream + """ build_tree(::Type{NodeType}, stream::ParseStream; wrap_toplevel_as_kind=nothing) @@ -637,6 +682,38 @@ function build_tree(::Type{NodeType}, stream::ParseStream; end end +""" + sourcetext(stream::ParseStream; steal_textbuf=true) + +Return the source text being parsed by this `ParseStream` as a UTF-8 encoded +string. + +If `steal_textbuf==true`, this is permitted to steal the content of the +stream's text buffer. Note that this leaves the `ParseStream` in an invalid +state for further parsing. +""" +function sourcetext(stream; steal_textbuf=false) + if stream.text_root isa AbstractString && codeunit(stream.text_root) == UInt8 + return stream.text_root + elseif steal_textbuf + return String(stream.textbuf) + else + # Safe default for other cases is to copy the buffer. Technically this + # could possibly be avoided in some situations, but might have side + # effects such as mutating stream.text_root or stealing the storage of + # stream.textbuf + return String(copy(stream.textbuf)) + end +end + +""" + textbuf(stream) + +Return the `Vector{UInt8}` text buffer being parsed by this `ParseStream`. +""" +textbuf(stream) = stream.textbuf + first_byte(stream::ParseStream) = first_byte(first(stream.ranges)) last_byte(stream::ParseStream) = last_byte(last(stream.ranges)) any_error(stream::ParseStream) = any_error(stream.diagnostics) + diff --git a/JuliaSyntax/src/parser_api.jl b/JuliaSyntax/src/parser_api.jl index f66ac1c0bc06b..f44aa74cd9b59 100644 --- a/JuliaSyntax/src/parser_api.jl +++ b/JuliaSyntax/src/parser_api.jl @@ -70,11 +70,11 @@ compatible with `input`: * When `input` is a `ParseStream`, the stream itself is returned and the `ParseStream` interface can be used to process the output. -* When `input` is an `IOBuffer`, the output is `(tree, diagnostics)`. The - buffer `position` will be set to the next byte of input. -* When `input` is an `AbstractString, Integer`, the output is - `(tree, diagnostics, index)`, where `index` (default 1) is the next byte of - input. +* When `input` is a seekable `IO` subtype, the output is `(tree, diagnostics)`. + The buffer `position` will be set to the next byte of input. +* When `input` is an `AbstractString, Integer`, or `Vector{UInt8}, Integer` the + output is `(tree, diagnostics, index)`, where `index` (default 1) is the next + byte of input. `rule` may be any of * `toplevel` (default) — parse a whole "file" of top level statements. In this @@ -104,7 +104,7 @@ function parse(stream::ParseStream; rule::Symbol=:toplevel) stream end -function parse(::Type{T}, io::Base.GenericIOBuffer; +function parse(::Type{T}, io::IO; rule::Symbol=:toplevel, version=VERSION, kws...) where {T} stream = ParseStream(io; version=version) parse(stream; rule=rule) @@ -113,11 +113,14 @@ function parse(::Type{T}, io::Base.GenericIOBuffer; tree, stream.diagnostics end -function parse(::Type{T}, code::AbstractString, index::Integer=1; kws...) where {T} - io = IOBuffer(code) - seek(io, index-1) - tree, diagnostics = parse(T, io; kws...) - tree, diagnostics, position(io)+1 +# Generic version of parse for all other cases where an index must be passed +# back - ie strings and buffers +function parse(::Type{T}, input...; + rule::Symbol=:toplevel, version=VERSION, kws...) where {T} + stream = ParseStream(input...; version=version) + parse(stream; rule=rule) + tree = build_tree(T, stream; kws...) + tree, stream.diagnostics, stream.next_byte end @@ -137,34 +140,35 @@ A `ParseError` will be thrown if any errors occurred during parsing. See [`parse`](@ref) for a more complete and powerful interface to the parser, as well as a description of the `version` and `rule` keywords. """ -function parseall(::Type{T}, input; rule=:toplevel, version=VERSION, +function parseall(::Type{T}, input...; rule=:toplevel, version=VERSION, ignore_trivia=true) where {T} - stream = ParseStream(input; version=version) - do_skip_trivia = ignore_trivia && rule != :toplevel - if do_skip_trivia + stream = ParseStream(input...; version=version) + if ignore_trivia && rule != :toplevel bump_trivia(stream, skip_newlines=true) + empty!(stream.ranges) end parse(stream; rule=rule) - if do_skip_trivia - bump_trivia(stream, skip_newlines=true) - else - if peek(stream) != K"EndMarker" - throw(ArgumentError("Parsing did not terminate at end of input")) - end + if (ignore_trivia && peek(stream, skip_newlines=true) != K"EndMarker") || + (!ignore_trivia && (peek(stream); kind(first(stream.lookahead)) != K"EndMarker")) + emit_diagnostic(stream, error="unexpected text after parsing $rule") end if any_error(stream.diagnostics) - throw(ParseError(SourceFile(input), stream.diagnostics)) - elseif !isempty(stream.diagnostics) - # Crudely format any warnings to the logger. TODO: This should be - # neatened up to avoid the double-decorations. - buf = IOBuffer() - show_diagnostics(IOContext(buf, stdout), stream, SourceFile(input)) - @warn Text(String(take!(buf))) + source = SourceFile(sourcetext(stream, steal_textbuf=true)) + throw(ParseError(source, stream.diagnostics)) end # TODO: Figure out a more satisfying solution to the wrap_toplevel_as_kind # mess that we've got here. - # * It's required for GreenNode, as GreenNode is useless without - # * Dropping it would be ok for SyntaxNode and Expr - build_tree(T, stream; wrap_toplevel_as_kind=K"toplevel") + # * It's kind of required for GreenNode, as GreenNode only records spans, + # not absolute positions. + # * Dropping it would be ok for SyntaxNode and Expr... + tree = build_tree(T, stream; wrap_toplevel_as_kind=K"toplevel") + if !isempty(stream.diagnostics) + # Crudely format any warnings to the current logger. + buf = IOBuffer() + show_diagnostics(IOContext(buf, stdout), stream, + SourceFile(sourcetext(stream, steal_textbuf=true))) + @warn Text(String(take!(buf))) + end + tree end diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index b0c64c6155330..156d92eca47ed 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -418,8 +418,7 @@ Base.Expr(node::SyntaxNode) = _to_expr(node) function build_tree(::Type{SyntaxNode}, stream::ParseStream; filename="none", kws...) green_tree = build_tree(GreenNode, stream; kws...) - code = String(copy(_code_buf(stream))) - source = SourceFile(code, filename=filename) + source = SourceFile(sourcetext(stream), filename=filename) SyntaxNode(source, green_tree, first_byte(stream)) end diff --git a/JuliaSyntax/test/parser_api.jl b/JuliaSyntax/test/parser_api.jl new file mode 100644 index 0000000000000..3541729fd8bd6 --- /dev/null +++ b/JuliaSyntax/test/parser_api.jl @@ -0,0 +1,70 @@ +@testset "parser API" begin + @testset "String and buffer input" begin + # String + @test parse(Expr, "x+y\nz") == (Expr(:toplevel, :(x+y), :z), [], 6) + @test parse(Expr, "x+y\nz", rule=:statement) == (:(x+y), [], 4) + @test parse(Expr, "x+y\nz", rule=:atom) == (:x, [], 2) + @test parse(Expr, "x+y\nz", 5, rule=:atom) == (:z, [], 6) + + # Vector{UInt8} + @test parse(Expr, Vector{UInt8}("x+y"), rule=:statement) == (:(x+y), [], 4) + @test parse(Expr, Vector{UInt8}("x+y"), 3, rule=:statement) == (:y, [], 4) + # Ptr{UInt8}, len + code = "x+y" + GC.@preserve code begin + stream = ParseStream(pointer(code), 3) + parse(stream, rule=:statement) + @test JuliaSyntax.build_tree(Expr, stream) == :(x+y) + @test stream.next_byte == 4 + end + + # SubString + @test parse(Expr, SubString("x+y"), rule=:statement) == (:(x+y), [], 4) + @test parse(Expr, SubString("x+y"), 1, rule=:atom) == (:x, [], 2) + @test parse(Expr, SubString("x+y"), 3, rule=:atom) == (:y, [], 4) + @test parse(Expr, SubString("x+y",3,3), 1, rule=:atom) == (:y, [], 2) + end + + @testset "IO input" begin + # IOBuffer + io = IOBuffer("x+y") + @test parse(Expr, io, rule=:statement) == (:(x+y), []) + @test position(io) == 3 + io = IOBuffer("x+y") + seek(io, 2) + @test parse(Expr, io, rule=:atom) == (:y, []) + @test position(io) == 3 + # A GenericIOBuffer, not actually IOBuffer + io = IOBuffer(SubString("x+y")) + @test parse(Expr, io, rule=:statement) == (:(x+y), []) + @test position(io) == 3 + # Another type of GenericIOBuffer + io = IOBuffer(codeunits("x+y")) + @test parse(Expr, io, rule=:statement) == (:(x+y), []) + @test position(io) == 3 + # IOStream + mktemp() do path, io + write(io, "x+y") + close(io) + + open(path, "r") do io + @test parse(Expr, io, rule=:statement) == (:(x+y), []) + @test position(io) == 3 + end + end + end + + @testset "parseall" begin + @test parseall(Expr, " x ") == Expr(:toplevel, :x) + @test parseall(Expr, " x ", rule=:statement) == :x + @test parseall(Expr, " x ", rule=:atom) == :x + # TODO: Fix this situation with trivia here; the brackets are trivia, but + # must be parsed to discover the atom inside. But in GreenTree we only + # place trivia as siblings of the leaf node with identifier `x`, not as + # children. + @test_broken parseall(Expr, "(x)", rule=:atom) == :x + + @test_throws JuliaSyntax.ParseError parseall(Expr, "x+y", rule=:atom) + @test_throws JuliaSyntax.ParseError parseall(Expr, "x+y\nz", rule=:statement) + end +end diff --git a/JuliaSyntax/test/runtests.jl b/JuliaSyntax/test/runtests.jl index 44d2280daf9fb..072a79e1e5113 100644 --- a/JuliaSyntax/test/runtests.jl +++ b/JuliaSyntax/test/runtests.jl @@ -17,6 +17,7 @@ end include("test_utils.jl") include("parse_stream.jl") include("parser.jl") +include("parser_api.jl") include("syntax_tree.jl") @testset "Parsing values from strings" begin diff --git a/JuliaSyntax/test/test_utils.jl b/JuliaSyntax/test/test_utils.jl index ac10895c08060..3b90f084526b8 100644 --- a/JuliaSyntax/test/test_utils.jl +++ b/JuliaSyntax/test/test_utils.jl @@ -67,6 +67,7 @@ end # parser produces from the source text of the node. function equals_flisp_parse(tree) node_text = sourcetext(tree) + ex,_,_ = parse(Expr, node_text) fl_ex = kind(tree) == K"toplevel" ? flisp_parse_all(node_text) : Meta.parse(node_text, raise=false) From 0045b2a9c00e721b922757d1578c33afb7350906 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Sat, 29 Jan 2022 15:23:52 +1000 Subject: [PATCH 0345/1109] Don't try to parse var identifiers in strings --- JuliaSyntax/src/parser.jl | 4 ++++ JuliaSyntax/test/parser.jl | 1 + 2 files changed, 5 insertions(+) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 7119f9716e5da..608bd41160d85 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -2960,6 +2960,10 @@ function parse_string(ps::ParseState) emit(ps, m, K"string", prev.flags) end end + elseif k == K"var" + # var identifiers disabled in strings + # "$var" ==> (string var) + bump(ps, remap_kind=K"Identifier") elseif k == K"Identifier" || is_keyword(k) || is_word_operator(k) # "a $foo b" ==> (string "a " foo " b") # "$outer" ==> (string outer) diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index c0e540ed96884..10242faff67b4 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -615,6 +615,7 @@ tests = [ "\"hi\$(\"\"\"ho\"\"\")\"" => "(string \"hi\" (string-s \"ho\"))" ((v=v"1.5",), "\"hi\$(\"ho\")\"") => "(string \"hi\" \"ho\")" "\"a \$foo b\"" => "(string \"a \" foo \" b\")" + "\"\$var\"" => "(string var)" "\"\$outer\"" => "(string outer)" "\"\$in\"" => "(string in)" "\"\"" => "\"\"" From d5431bd93ead4af6fe48dd90ec307d6aaa3492a0 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Sat, 29 Jan 2022 22:12:58 +1000 Subject: [PATCH 0346/1109] Various fixes for test case reduction Make peek() a method of Base.peek, as it's semantically similar. --- JuliaSyntax/src/green_tree.jl | 2 +- JuliaSyntax/src/parse_stream.jl | 6 ++-- JuliaSyntax/src/parser.jl | 2 +- JuliaSyntax/test/parser_api.jl | 1 + JuliaSyntax/test/test_utils.jl | 49 +++++++++++++++++++++++++-------- 5 files changed, 43 insertions(+), 17 deletions(-) diff --git a/JuliaSyntax/src/green_tree.jl b/JuliaSyntax/src/green_tree.jl index 5252b6daf3ed9..f69b91939d14b 100644 --- a/JuliaSyntax/src/green_tree.jl +++ b/JuliaSyntax/src/green_tree.jl @@ -109,7 +109,7 @@ function Base.show(io::IO, ::MIME"text/plain", node::GreenNode) _show_green_node(io, node, "", 1, nothing, true) end -function Base.show(io::IO, ::MIME"text/plain", node::GreenNode, str::String; show_trivia=true) +function Base.show(io::IO, ::MIME"text/plain", node::GreenNode, str::AbstractString; show_trivia=true) _show_green_node(io, node, "", 1, str, show_trivia) end diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index 4b8b836460f68..11bae53656257 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -108,7 +108,7 @@ function SyntaxToken(raw::RawToken, had_whitespace, had_newline) had_whitespace, had_newline) end -function Base.show(ii::IO, tok::SyntaxToken) +function Base.show(io::IO, tok::SyntaxToken) range = string(lpad(first_byte(tok), 3), ":", rpad(last_byte(tok), 3)) print(io, rpad(range, 17, " "), rpad(kind(tok), 15, " ")) end @@ -229,7 +229,7 @@ function ParseStream(text::String, index::Integer=1; version=VERSION) end function ParseStream(text::SubString, index::Integer=1; version=VERSION) # See also IOBuffer(SubString("x")) - ParseStream(unsafe_wrap(Vector{UInt8}, pointer(text), length(text)), + ParseStream(unsafe_wrap(Vector{UInt8}, pointer(text), sizeof(text)), text, index, version) end function ParseStream(text::AbstractString, index::Integer=1; version=VERSION) @@ -316,7 +316,7 @@ non-newline whitespace are skipped automatically. Whitespace containing a single newline is returned as kind `K"NewlineWs"` unless `skip_newlines` is true. """ -function peek(stream::ParseStream, n::Integer=1; skip_newlines::Bool=false) +function Base.peek(stream::ParseStream, n::Integer=1; skip_newlines::Bool=false) kind(peek_token(stream, n; skip_newlines=skip_newlines)) end diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 608bd41160d85..592273119cc76 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -61,7 +61,7 @@ end # Convenient wrappers for ParseStream -function peek(ps::ParseState, n=1; skip_newlines=nothing) +function Base.peek(ps::ParseState, n=1; skip_newlines=nothing) skip_nl = isnothing(skip_newlines) ? ps.whitespace_newline : skip_newlines peek(ps.stream, n; skip_newlines=skip_nl) end diff --git a/JuliaSyntax/test/parser_api.jl b/JuliaSyntax/test/parser_api.jl index 3541729fd8bd6..e12564887ef4f 100644 --- a/JuliaSyntax/test/parser_api.jl +++ b/JuliaSyntax/test/parser_api.jl @@ -23,6 +23,7 @@ @test parse(Expr, SubString("x+y"), 1, rule=:atom) == (:x, [], 2) @test parse(Expr, SubString("x+y"), 3, rule=:atom) == (:y, [], 4) @test parse(Expr, SubString("x+y",3,3), 1, rule=:atom) == (:y, [], 2) + @test parse(Expr, SubString("α+x"), rule=:statement) == (:(α+x), [], 5) end @testset "IO input" begin diff --git a/JuliaSyntax/test/test_utils.jl b/JuliaSyntax/test/test_utils.jl index 3b90f084526b8..6ee69622e214a 100644 --- a/JuliaSyntax/test/test_utils.jl +++ b/JuliaSyntax/test/test_utils.jl @@ -39,9 +39,10 @@ function remove_all_linenums!(ex) end function parsers_agree_on_file(path) - code = read(path, String) - JuliaSyntax.remove_linenums!(parseall(Expr, code)) == - JuliaSyntax.remove_linenums!(flisp_parse_all(code)) + text = read(path, String) + ex = parseall(Expr, text) + fl_ex = flisp_parse_all(text) + JuliaSyntax.remove_linenums!(ex) == JuliaSyntax.remove_linenums!(fl_ex) end function find_source_in_path(basedir) @@ -67,14 +68,14 @@ end # parser produces from the source text of the node. function equals_flisp_parse(tree) node_text = sourcetext(tree) + # Reparse with JuliaSyntax. This is a crude way to ensure we're not missing + # some context from the parent node. ex,_,_ = parse(Expr, node_text) - fl_ex = kind(tree) == K"toplevel" ? - flisp_parse_all(node_text) : - Meta.parse(node_text, raise=false) + fl_ex = flisp_parse_all(node_text) if Meta.isexpr(fl_ex, :error) return true # Something went wrong in reduction; ignore these cases 😬 end - remove_all_linenums!(Expr(tree)) == remove_all_linenums!(fl_ex) + remove_all_linenums!(ex) == remove_all_linenums!(fl_ex) end """ @@ -94,23 +95,47 @@ function reduce_test(tree) if !haschildren(tree) return tree else - subtrees = [] for child in children(tree) if is_trivia(child) || !haschildren(child) continue end t = reduce_test(child) if !isnothing(t) - push!(subtrees, t) + return t end end - if length(subtrees) == 1 - return only(subtrees) - end end return tree end +function reduce_all_failures_in_path(basedir, outdir) + rm(outdir, force=true, recursive=true) + mkpath(outdir) + for filename in find_source_in_path(basedir) + filetext = read(filename, String) + if !(try parsers_agree_on_file(filename) catch exc false end) + @info "Found failure" filename + filetext = read(filename, String) + text = nothing + try + tree, _ = parse(SyntaxNode, filetext) + rtree = reduce_test(tree) + text = sourcetext(rtree) + catch + @error "Error reducing file" exception=current_exceptions() + text = filetext + end + bn,_ = splitext(basename(filename)) + outname = joinpath(outdir, "$bn.jl") + i=1 + while isfile(outname) + outname = joinpath(outdir, "$bn-$i.jl") + i += 1 + end + write(outname, text) + end + end +end #------------------------------------------------------------------------------- """ From e9e087df59674c8ccd269ec7ce1886deb0648631 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Sat, 29 Jan 2022 22:34:18 +1000 Subject: [PATCH 0347/1109] Tokenize: allow ' to be adjoint after contextural keywords --- JuliaSyntax/Tokenize/src/lexer.jl | 2 ++ JuliaSyntax/Tokenize/test/lexer.jl | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/JuliaSyntax/Tokenize/src/lexer.jl b/JuliaSyntax/Tokenize/src/lexer.jl index afbc69e27c13a..326b0c1df7219 100644 --- a/JuliaSyntax/Tokenize/src/lexer.jl +++ b/JuliaSyntax/Tokenize/src/lexer.jl @@ -849,6 +849,8 @@ end function lex_prime(l, doemit = true) if l.last_token == Tokens.IDENTIFIER || + Tokens.iscontexturalkeyword(l.last_token) || + Tokens.iswordoperator(l.last_token) || l.last_token == Tokens.DOT || l.last_token == Tokens.RPAREN || l.last_token == Tokens.RSQUARE || diff --git a/JuliaSyntax/Tokenize/test/lexer.jl b/JuliaSyntax/Tokenize/test/lexer.jl index ab9e0d02529d4..4377834ec4870 100644 --- a/JuliaSyntax/Tokenize/test/lexer.jl +++ b/JuliaSyntax/Tokenize/test/lexer.jl @@ -227,6 +227,10 @@ end @test tok("()'", 3).kind == Tokens.PRIME @test tok("{}'", 3).kind == Tokens.PRIME @test tok("[]'", 3).kind == Tokens.PRIME + @test tok("outer'", 2).kind == Tokens.PRIME + @test tok("mutable'", 2).kind == Tokens.PRIME + @test tok("as'", 2).kind == Tokens.PRIME + @test tok("isa'", 2).kind == Tokens.PRIME end @testset "keywords" begin From 87ae25d376039c6a19f3818f1481578635eaba4a Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Sun, 30 Jan 2022 07:34:18 +1000 Subject: [PATCH 0348/1109] Fix parsing of empty source files Also some tweaks to SourceFile pretty printing --- JuliaSyntax/src/parser.jl | 4 +++- JuliaSyntax/src/source_files.jl | 20 +++++++++++++++----- JuliaSyntax/test/parser.jl | 1 + 3 files changed, 19 insertions(+), 6 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 592273119cc76..277d2f727e136 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -414,7 +414,9 @@ function parse_toplevel(ps::ParseState) if peek(ps, skip_newlines=true) == K"EndMarker" # Allow end of input if there is nothing left but whitespace # a \n \n ==> (toplevel a) - bump(ps, skip_newlines=true) + # Empty files + # ==> (toplevel) + bump_trivia(ps, skip_newlines=true) break else parse_stmts(ps) diff --git a/JuliaSyntax/src/source_files.jl b/JuliaSyntax/src/source_files.jl index aa6607ba1b253..35d8910dc69b9 100644 --- a/JuliaSyntax/src/source_files.jl +++ b/JuliaSyntax/src/source_files.jl @@ -22,12 +22,16 @@ function SourceFile(code::AbstractString; filename=nothing) # FIXME: \r and \n\r code[i] == '\n' && push!(line_starts, i+1) end - if last(code) != '\n' + if isempty(code) || last(code) != '\n' push!(line_starts, lastindex(code)+1) end SourceFile(code, filename, line_starts) end +function SourceFile(; filename) + SourceFile(read(filename, String); filename=filename) +end + # Get line number of the given byte within the code function source_line(source::SourceFile, byte_index) searchsortedlast(source.line_starts, byte_index) @@ -64,11 +68,17 @@ function source_location(::Type{LineNumberNode}, source::SourceFile, byte_index) end function Base.show(io::IO, ::MIME"text/plain", source::SourceFile) - if !isnothing(source.filename) - print(io, source.filename, '\n', - repeat('-', textwidth(source.filename)), '\n') + fn = isnothing(source.filename) ? "" : " $(source.filename)" + header = "## SourceFile$fn ##" + print(io, header, "\n") + heightlim = displaysize(io)[1] ÷ 2 + if !get(io, :limit, false) || length(source.line_starts) <= heightlim + print(io, source.code) + else + r1 = source_line_range(source, 1, context_lines_after=heightlim-3) + print(io, view(source, r1[1]:r1[2])) + println(io, "⋮") end - print(io, source.code) end function Base.getindex(source::SourceFile, rng::AbstractRange) diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 10242faff67b4..7a2b8ab46e2b3 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -20,6 +20,7 @@ tests = [ "a \n b" => "(toplevel a b)" "a;b \n c;d" => "(toplevel (toplevel a b) (toplevel c d))" "a \n \n" => "(toplevel a)" + "" => "(toplevel)" ], JuliaSyntax.parse_block => [ "a;b;c" => "(block a b c)" From 4c5ddbb58b7fc571b7b0dd1409170689c74a2567 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Tue, 1 Feb 2022 11:07:36 +1000 Subject: [PATCH 0349/1109] Parse +(;a) with (parameters a) rather than (block a) --- JuliaSyntax/src/parser.jl | 4 +++- JuliaSyntax/test/parser.jl | 5 +++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 277d2f727e136..605f668ac6b04 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -1143,10 +1143,11 @@ function parse_unary_call(ps::ParseState) mark_before_paren = position(ps) bump(ps, TRIVIA_FLAG) # ( + initial_semi = peek(ps) == K";" is_call = false is_block = false parse_brackets(ps, K")") do had_commas, had_splat, num_semis, num_subexprs - is_call = had_commas || had_splat + is_call = had_commas || had_splat || initial_semi is_block = !is_call && num_semis > 0 bump_closing_token(ps, K")") return (needs_parameters=is_call, @@ -1169,6 +1170,7 @@ function parse_unary_call(ps::ParseState) # +(a=1,) ==> (call + (kw a 1)) # +(a...) ==> (call + (... a)) # +(a;b,c) ==> (call + a (parameters b c)) + # +(;a) ==> (call + (parameters a)) # Prefix calls have higher precedence than ^ # +(a,b)^2 ==> (call-i (call + a b) ^ 2) # +(a,b)(x)^2 ==> (call-i (call (call + a b) x) ^ 2) diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 7a2b8ab46e2b3..2f42d8259aa03 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -155,10 +155,11 @@ tests = [ "+{T}(x::T)" => "(call (curly + T) (:: x T))" "*(x)" => "(call * x)" # Prefix function calls for operators which are both binary and unary - "+(a,b)" => "(call + a b)" + "+(a,b)" => "(call + a b)" "+(a=1,)" => "(call + (kw a 1))" "+(a...)" => "(call + (... a))" - "+(a;b,c)" => "(call + a (parameters b c))" + "+(a;b,c)" => "(call + a (parameters b c))" + "+(;a)" => "(call + (parameters a))" # Whitespace not allowed before prefix function call bracket "+ (a,b)" => "(call + (error) a b)" # Prefix calls have higher precedence than ^ From 696257e023a0453a95006daca6be9310df914d5f Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Tue, 1 Feb 2022 11:21:28 +1000 Subject: [PATCH 0350/1109] Fix: hexfloat literals are always parsed as Float64 --- JuliaSyntax/src/value_parsing.jl | 2 +- JuliaSyntax/test/value_parsing.jl | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/JuliaSyntax/src/value_parsing.jl b/JuliaSyntax/src/value_parsing.jl index aa23dc9c9a838..bba8f478d6f94 100644 --- a/JuliaSyntax/src/value_parsing.jl +++ b/JuliaSyntax/src/value_parsing.jl @@ -20,7 +20,7 @@ function julia_string_to_number(str::AbstractString, kind) end return x elseif kind == K"Float" - if 'f' in str + if !startswith(str,"0x") && 'f' in str # This is kind of awful. Should we have a separate Float32 literal # type produced by the lexer? The `f` suffix is nonstandard after all. return Base.parse(Float32, replace(str, 'f'=>'e')) diff --git a/JuliaSyntax/test/value_parsing.jl b/JuliaSyntax/test/value_parsing.jl index 44f8dcb8423f5..7e69a31869ebe 100644 --- a/JuliaSyntax/test/value_parsing.jl +++ b/JuliaSyntax/test/value_parsing.jl @@ -25,6 +25,7 @@ octint(s) = julia_string_to_number(s, K"OctInt") @test julia_string_to_number("10e-0", K"Float") === Float64(10) @test julia_string_to_number("10f-0", K"Float") === Float32(10) @test julia_string_to_number("0x0ap-0", K"Float") === Float64(10) + @test julia_string_to_number("0xffp-0", K"Float") === Float64(255) end # HexInt From fef238d87b982880990a84f624f3dc0727389ff5 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Tue, 1 Feb 2022 12:37:15 +1000 Subject: [PATCH 0351/1109] Improved tools for syntax test case reduction When reducing syntax disagreements, search for all failing subtrees rather than the first one. Also add tools to format the set of failures more conveniently. --- JuliaSyntax/src/parse_stream.jl | 2 +- JuliaSyntax/src/source_files.jl | 4 ++ JuliaSyntax/test/test_utils.jl | 85 ++++++++++++++++++++++----------- 3 files changed, 61 insertions(+), 30 deletions(-) diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index 11bae53656257..8b1779a259916 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -692,7 +692,7 @@ If `steal_textbuf==true`, this is permitted to steal the content of the stream's text buffer. Note that this leaves the `ParseStream` in an invalid state for further parsing. """ -function sourcetext(stream; steal_textbuf=false) +function sourcetext(stream::ParseStream; steal_textbuf=false) if stream.text_root isa AbstractString && codeunit(stream.text_root) == UInt8 return stream.text_root elseif steal_textbuf diff --git a/JuliaSyntax/src/source_files.jl b/JuliaSyntax/src/source_files.jl index 35d8910dc69b9..673e0708b19c8 100644 --- a/JuliaSyntax/src/source_files.jl +++ b/JuliaSyntax/src/source_files.jl @@ -91,6 +91,7 @@ function Base.getindex(source::SourceFile, rng::AbstractRange) source.code[i:j] end +# TODO: Change view() here to `sourcetext` ? function Base.view(source::SourceFile, rng::AbstractRange) i = first(rng) j = prevind(source.code, last(rng)+1) @@ -101,3 +102,6 @@ function Base.getindex(source::SourceFile, i::Int) source.code[i] end +function sourcetext(source::SourceFile) + return source.code +end diff --git a/JuliaSyntax/test/test_utils.jl b/JuliaSyntax/test/test_utils.jl index 6ee69622e214a..51ea1c71ee1ef 100644 --- a/JuliaSyntax/test/test_utils.jl +++ b/JuliaSyntax/test/test_utils.jl @@ -79,60 +79,87 @@ function equals_flisp_parse(tree) end """ -Select a subtree of `tree` which is inconsistent between flisp and JuliaSyntax -parsers. This isn't very precise yet! - -TODO: -* For some syntax elements (eg, the `x in xs` inside `for x in xs`) the - children can't be parsed out of context. Fix this. -* Replace good siblings of bad nodes with placeholders. For blocks, delete such - siblings. + reduce_test(text::AbstractString) + reduce_test(tree::SyntaxNode) + +Select minimal subtrees of `text` or `tree` which are inconsistent between +flisp and JuliaSyntax parsers. """ -function reduce_test(tree) +function reduce_test(failing_subtrees, tree) if equals_flisp_parse(tree) - return nothing + return false end if !haschildren(tree) - return tree - else + push!(failing_subtrees, tree) + return true + end + had_failing_subtrees = false + if haschildren(tree) for child in children(tree) if is_trivia(child) || !haschildren(child) continue end - t = reduce_test(child) - if !isnothing(t) - return t + had_failing_subtrees |= reduce_test(failing_subtrees, child) + end + end + if !had_failing_subtrees + push!(failing_subtrees, tree) + end + return true +end + +function reduce_test(tree::SyntaxNode) + subtrees = Vector{typeof(tree)}() + reduce_test(subtrees, tree) + subtrees +end + +function reduce_test(text::AbstractString) + tree, _, _ = parse(SyntaxNode, text) + reduce_test(tree) +end + + +""" + format_reduced_tests(out::IO, file_content) + +Reduced the syntax (a string or SyntaxNode) from `file_content` into the +minimal failing subtrees of syntax and write the results to `out`. +""" +function format_reduced_tests(out::IO, file_content) + text = nothing + try + rtrees = reduce_test(file_content) + first = true + for rt in rtrees + if !first + print(out, "\n#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n") end + first = false + print(out, sourcetext(rt)) end + catch + @error "Error reducing file" exception=current_exceptions() + print(out, sourcetext(file_content)) end - return tree end function reduce_all_failures_in_path(basedir, outdir) rm(outdir, force=true, recursive=true) mkpath(outdir) for filename in find_source_in_path(basedir) - filetext = read(filename, String) if !(try parsers_agree_on_file(filename) catch exc false end) @info "Found failure" filename - filetext = read(filename, String) - text = nothing - try - tree, _ = parse(SyntaxNode, filetext) - rtree = reduce_test(tree) - text = sourcetext(rtree) - catch - @error "Error reducing file" exception=current_exceptions() - text = filetext - end bn,_ = splitext(basename(filename)) outname = joinpath(outdir, "$bn.jl") - i=1 + i = 1 while isfile(outname) outname = joinpath(outdir, "$bn-$i.jl") i += 1 end - write(outname, text) + open(outname, "w") do io + format_reduced_tests(io, read(filename, String)) + end end end end From 47d81e26c03e3d9d5b9a57ed99fc106b6653e13f Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Tue, 1 Feb 2022 13:23:56 +1000 Subject: [PATCH 0352/1109] =?UTF-8?q?Fix=20parsing=20of=20decorated=20-->?= =?UTF-8?q?=20arrow=20(eg=20x.-->y=20and=20x-->=E2=82=81y)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- JuliaSyntax/src/parser.jl | 51 ++++++++++++++++++++-------------- JuliaSyntax/src/token_kinds.jl | 2 +- JuliaSyntax/test/parser.jl | 26 ++++------------- 3 files changed, 36 insertions(+), 43 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 605f668ac6b04..05bbb848eb158 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -342,21 +342,14 @@ end # produces structures like (= a (= b (= c d))) # # flisp: parse-RtoL -function parse_RtoL(ps::ParseState, down, is_op, syntactic, self) +function parse_RtoL(ps::ParseState, down, is_op, self) mark = position(ps) down(ps) - t = peek_token(ps) - k = kind(t) + k = peek(ps) if is_op(k) - if syntactic isa Bool ? syntactic : syntactic(k) - bump(ps, TRIVIA_FLAG) - self(ps) - emit(ps, mark, k, flags(t)) - else - bump(ps) - self(ps) - emit(ps, mark, K"call", INFIX_FLAG) - end + bump(ps) + self(ps) + emit(ps, mark, K"call", INFIX_FLAG) end end @@ -609,8 +602,9 @@ function parse_comma(ps::ParseState, do_emit=true) end # flisp: parse-pair +# a => b ==> (call-i a => b) function parse_pair(ps::ParseState) - parse_RtoL(ps, parse_cond, is_prec_pair, false, parse_pair) + parse_RtoL(ps, parse_cond, is_prec_pair, parse_pair) end # Parse short form conditional expression @@ -659,14 +653,30 @@ function parse_cond(ps::ParseState) emit(ps, mark, K"if") end -# Parse arrows -# x → y ==> (call-i x → y) -# x <--> y ==> (call-i x <--> y) -# x --> y ==> (--> x y) # The only syntactic arrow +# Parse arrows. Like parse_RtoL, but specialized for --> syntactic operator # # flisp: parse-arrow function parse_arrow(ps::ParseState) - parse_RtoL(ps, parse_or, is_prec_arrow, ==(K"-->"), parse_arrow) + mark = position(ps) + parse_or(ps) + t = peek_token(ps) + k = kind(t) + if is_prec_arrow(k) + if kind(t) == K"-->" && !is_decorated(t) + # x --> y ==> (--> x y) # The only syntactic arrow + bump(ps, TRIVIA_FLAG) + parse_arrow(ps) + emit(ps, mark, k, flags(t)) + else + # x → y ==> (call-i x → y) + # x <--> y ==> (call-i x <--> y) + # x .--> y ==> (call-i x .--> y) + # x -->₁ y ==> (call-i x -->₁ y) + bump(ps) + parse_arrow(ps) + emit(ps, mark, K"call", INFIX_FLAG) + end + end end # Like parse_RtoL, but specialized for the version test of dotted operators. @@ -685,7 +695,6 @@ function parse_lazy_cond(ps::ParseState, down, is_op, self) end end - # x || y || z ==> (|| x (|| y z)) #v1.6: x .|| y ==> (error (.|| x y)) #v1.7: x .|| y ==> (.|| x y) @@ -750,7 +759,7 @@ end # x <| y <| z ==> (call-i x <| (call-i y <| z)) # flisp: parse-pipe< function parse_pipe_lt(ps::ParseState) - parse_RtoL(ps, parse_pipe_gt, is_prec_pipe_lt, false, parse_pipe_lt) + parse_RtoL(ps, parse_pipe_gt, is_prec_pipe_lt, parse_pipe_lt) end # x |> y |> z ==> (call-i (call-i x |> y) |> z) @@ -1234,7 +1243,7 @@ end # flisp: parse-factor-after function parse_factor_after(ps::ParseState) - parse_RtoL(ps, parse_juxtapose, is_prec_power, false, parse_factor_after) + parse_RtoL(ps, parse_juxtapose, is_prec_power, parse_factor_after) end # Parse type declarations and lambda syntax diff --git a/JuliaSyntax/src/token_kinds.jl b/JuliaSyntax/src/token_kinds.jl index e832e8d09beaa..f043888e9cb62 100644 --- a/JuliaSyntax/src/token_kinds.jl +++ b/JuliaSyntax/src/token_kinds.jl @@ -108,7 +108,7 @@ Dict([ "END_ASSIGNMENTS" => Ts.end_assignments "BEGIN_PAIRARROW" => Ts.begin_pairarrow -"=>Ts." => Ts.PAIR_ARROW +"=>" => Ts.PAIR_ARROW "END_PAIRARROW" => Ts.end_pairarrow # Level 2 diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 2f42d8259aa03..30bd67ce25777 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -47,6 +47,9 @@ tests = [ "a ~ b" => "(call-i a ~ b)" "[a ~ b c]" => "(hcat (call-i a ~ b) c)" ], + JuliaSyntax.parse_pair => [ + "a => b" => "(call-i a => b)" + ], JuliaSyntax.parse_cond => [ "a ? b : c" => "(if a b c)" "a ?\nb : c" => "(if a b c)" @@ -63,6 +66,8 @@ tests = [ "x → y" => "(call-i x → y)" "x <--> y" => "(call-i x <--> y)" "x --> y" => "(--> x y)" + "x .--> y" => "(call-i x .--> y)" + "x -->₁ y" => "(call-i x -->₁ y)" ], JuliaSyntax.parse_or => [ "x || y || z" => "(|| x (|| y z))" @@ -664,24 +669,3 @@ end @test test_parse(JuliaSyntax.parse_eq, "a .\u2212= b") == "(.-= a b)" end -@testset "Larger code chunks" begin - # Something ever-so-slightly nontrivial for fun - - # the sum of the even Fibonacci numbers < 4_000_000 - # https://projecteuler.net/problem=2 - code = """ - let - s = 0 - f1 = 1 - f2 = 2 - while f1 < 4000000 - # println(f1) - if f1 % 2 == 0 - s += f1 - end - f1, f2 = f2, f1+f2 - end - s - end - """ - @test parseall(Expr, code) == JuliaSyntax.remove_linenums!(JuliaSyntax.flisp_parse_all(code)) -end From 22e14177276c0c9b80a7de44c3ff400cc2b4d56f Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Tue, 1 Feb 2022 14:53:59 +1000 Subject: [PATCH 0353/1109] Fix parsing `outer` keyword with expression on lhs It seems we need to use peek_behind here, as a full expression of arbitrary length (parsed by parse_pipe_lt) can follow `outer`. --- JuliaSyntax/src/parser.jl | 19 ++++++++++++------- JuliaSyntax/test/parser.jl | 4 ++++ 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 05bbb848eb158..f651c0d451930 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -2407,14 +2407,19 @@ function parse_iteration_spec(ps::ParseState) mark = position(ps) k = peek(ps) # Handle `outer` contextual keyword - is_outer_kw = k == K"outer" && !(peek_skip_newline_in_gen(ps, 2) in KSet`= in ∈`) - if is_outer_kw - # outer i = rhs ==> (= (outer i) rhs) - bump(ps, TRIVIA_FLAG) - end with_space_sensitive(parse_pipe_lt, ps) - if is_outer_kw - emit(ps, mark, K"outer") + if peek_behind(ps).orig_kind == K"outer" + if peek_skip_newline_in_gen(ps) in KSet`= in ∈` + # Not outer keyword + # outer = rhs ==> (= outer rhs) + # outer <| x = rhs ==> (= (call-i outer <| x) rhs) + else + # outer i = rhs ==> (= (outer i) rhs) + # outer (x,y) = rhs ==> (= (outer (tuple x y)) rhs) + reset_node!(ps, position(ps), kind=K"outer", flags=TRIVIA_FLAG) + parse_pipe_lt(ps) + emit(ps, mark, K"outer") + end end if peek_skip_newline_in_gen(ps) in KSet`= in ∈` bump(ps, TRIVIA_FLAG) diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 30bd67ce25777..69c82ac01a9dd 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -496,6 +496,10 @@ tests = [ "i ∈ rhs" => "(= i rhs)" "i = 1:10" => "(= i (call-i 1 : 10))" "(i,j) in iter" => "(= (tuple i j) iter)" + "outer = rhs" => "(= outer rhs)" + "outer <| x = rhs" => "(= (call-i outer <| x) rhs)" + "outer i = rhs" => "(= (outer i) rhs)" + "outer (x,y) = rhs" => "(= (outer (tuple x y)) rhs)" ], JuliaSyntax.parse_paren => [ # Tuple syntax with commas From 0748ec3f39f3964b4096bca05e7dda77cfc69e07 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Tue, 1 Feb 2022 18:08:38 +1000 Subject: [PATCH 0354/1109] Disallow sufficies on unary operators for compatibility MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Julia flisp parser disallows things like `+₁ x` to mean `(call +₁ x)`, so we probably should too, for now. It's not quite clear from the upstream code whether this is by design or happenstance, however! --- JuliaSyntax/README.md | 4 ++++ JuliaSyntax/src/parser.jl | 34 +++++++++++++++++++++------------- JuliaSyntax/test/parser.jl | 2 ++ 3 files changed, 27 insertions(+), 13 deletions(-) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index 8d2c8fda55aef..bff46606a7cef 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -627,6 +627,10 @@ xy ### Other oddities +* Operators with sufficies don't seem to always be parsed consistently as the + same operator without a suffix. Unclear whether this is by design or mistake. + For example, `[x +y] ==> (hcat x (+ y))`, but `[x +₁y] ==> (hcat (call +₁ x y))` + * `global const x=1` is normalized by the parser into `(const (global (= x 1)))`. I suppose this is somewhat useful for AST consumers, but it seems a bit weird and unnecessary. diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index f651c0d451930..f3d9cab71643e 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -267,6 +267,9 @@ end function is_syntactic_operator(k) k = kind(k) + # TODO: Do we need to disallow dotted and suffixed forms here? + # The lexer itself usually disallows such tokens, so it's not clear whether + # we need to handle them. (Though note `.->` is a token...) return k in KSet`&& || . ... ->` || (is_prec_assignment(k) && k != K"~") end @@ -280,16 +283,20 @@ end function is_unary_op(t) k = kind(t) - (k in KSet`<: >:` && !is_dotted(t)) || - k in KSet`+ - ! ~ ¬ √ ∛ ∜ ⋆ ± ∓` # dotop allowed + !is_suffixed(t) && ( + (k in KSet`<: >:` && !is_dotted(t)) || + k in KSet`+ - ! ~ ¬ √ ∛ ∜ ⋆ ± ∓` # dotop allowed + ) end -# Operators which are both unary and binary -function is_both_unary_and_binary(k) - k = kind(k) - # TODO: Do we need to check dotop as well here? - k in KSet`$ & ~` || # dotop disallowed? - k in KSet`+ - ⋆ ± ∓` # dotop allowed +# Operators that are both unary and binary +function is_both_unary_and_binary(t) + k = kind(t) + # Preventing is_suffixed here makes this consistent with the flisp parser. + # But is this by design or happenstance? + !is_suffixed(t) && ( + k in KSet`+ - ⋆ ± ∓` || (k in KSet`$ & ~` && !is_dotted(t)) + ) end # operators handled by parse_unary at the start of an expression @@ -306,8 +313,6 @@ end # flisp: invalid-identifier? function is_valid_identifier(k) k = kind(k) - # TODO: flisp also had K"...." disallowed. But I don't know what that's - # for! Tokenize doesn't have an equivalent here. !(is_syntactic_operator(k) || k in KSet`? .'`) end @@ -879,11 +884,13 @@ function parse_with_chains(ps::ParseState, down, is_op, chain_ops) down(ps) while (t = peek_token(ps); is_op(kind(t))) if ps.space_sensitive && t.had_whitespace && - is_both_unary_and_binary(kind(t)) && - !peek_token(ps, 2).had_whitespace + is_both_unary_and_binary(t) && + !peek_token(ps, 2).had_whitespace # The following is two elements of a hcat + # [x +y] ==> (hcat x (call + y)) # [x+y +z] ==> (hcat (call-i x + y) (call + z)) # Conversely the following are infix calls + # [x +₁y] ==> (vect (call-i x +₁ y)) # [x+y+z] ==> (vect (call-i x + y z)) # [x+y + z] ==> (vect (call-i x + y z)) break @@ -907,7 +914,7 @@ end function parse_chain(ps::ParseState, down, op_kind) while (t = peek_token(ps); kind(t) == op_kind && !is_decorated(t)) if ps.space_sensitive && t.had_whitespace && - is_both_unary_and_binary(kind(t)) && + is_both_unary_and_binary(t) && !peek_token(ps, 2).had_whitespace # [x +y] ==> (hcat x (call + y)) break @@ -1210,6 +1217,7 @@ function parse_unary_call(ps::ParseState) bump(ps, op_tok_flags) else # /x ==> (call (error /) x) + # +₁ x ==> (call (error +₁) x) # .<: x ==> (call (error .<:) x) bump(ps, error="not a unary operator") end diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 69c82ac01a9dd..9dea55c151230 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -119,6 +119,7 @@ tests = [ "[x +y]" => "(hcat x (call + y))" "[x+y +z]" => "(hcat (call-i x + y) (call + z))" # Conversely the following are infix calls + "[x +₁y]" => "(vect (call-i x +₁ y))" "[x+y+z]" => "(vect (call-i x + y z))" "[x+y + z]" => "(vect (call-i x + y z))" # Dotted and normal operators @@ -182,6 +183,7 @@ tests = [ "±x" => "(call ± x)" # Not a unary operator "/x" => "(call (error /) x)" + "+₁ x" => "(call (error +₁) x)" ".<: x" => "(call (error .<:) x)" ], JuliaSyntax.parse_factor => [ From 271d099325e5f0df959b2529e5b8a087693733b2 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Tue, 1 Feb 2022 18:30:33 +1000 Subject: [PATCH 0355/1109] Address a few minor fixmes and todos; add some tests --- JuliaSyntax/src/parser.jl | 20 +++++++++----------- JuliaSyntax/test/parser.jl | 5 +++++ 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index f3d9cab71643e..17971d00afe3f 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -125,7 +125,7 @@ end # Crude recovery heuristic: bump any tokens which aren't block or bracket # closing tokens. function bump_closing_token(ps, closing_kind) - # TODO: Refactor with recover() ? + # todo: Refactor with recover() ? bump_trivia(ps) if peek(ps) == closing_kind bump(ps, TRIVIA_FLAG) @@ -587,10 +587,6 @@ function parse_comma(ps::ParseState, do_emit=true) while true if peek(ps) != K"," if do_emit && n_commas >= 1 - # FIXME: is use of n_commas correct here? flisp comments say: - # () => (tuple) - # (ex2 ex1) => (tuple ex1 ex2) - # (ex1,) => (tuple ex1) emit(ps, mark, K"tuple") end return n_commas @@ -1154,7 +1150,7 @@ function parse_unary_call(ps::ParseState) # Setup possible whitespace error between operator and ( ws_mark = position(ps) bump_trivia(ps) - ws_mark_end = position(ps) # FIXME - 1 + ws_mark_end = position(ps) ws_error_pos = emit(ps, ws_mark, K"TOMBSTONE") mark_before_paren = position(ps) @@ -1447,7 +1443,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) end elseif k == K"[" if is_macrocall - # a().@x[1] ==> FIXME + # a().@x[1] ==> (macrocall (ref (error (. (call a) (quote x))) 1)) finish_macroname(ps, mark, is_valid_modref, macro_name_position) end # a [i] ==> (ref a (error-t) i) @@ -1457,8 +1453,11 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) K"]", ps.end_symbol) # a[i] ==> (ref a i) # a[i,j] ==> (ref a i j) + # T[x y] ==> (typed_hcat T x y) + # T[x ; y] ==> (typed_vcat T x y) + # T[a b; c d] ==> (typed_vcat T (row a b) (row c d)) # T[x for x in xs] ==> (typed_comprehension T (generator x (= x xs))) - # TODO: other test cases + #v1.8: T[a ; b ;; c ; d] ==> (typed_ncat-2 T (nrow-1 a b) (nrow-1 c d)) outk = ckind == K"vect" ? K"ref" : ckind == K"hcat" ? K"typed_hcat" : ckind == K"vcat" ? K"typed_vcat" : @@ -2437,7 +2436,7 @@ function parse_iteration_spec(ps::ParseState) recover(ps, error="invalid iteration spec: expected one of `=` `in` or `∈`") do ps, k k in KSet`, NewlineWs` || is_closing_token(ps, k) end - # TODO: or try parse_pipe_lt ??? + # Or try parse_pipe_lt ??? end emit(ps, mark, K"=") end @@ -2693,7 +2692,6 @@ function parse_array_separator(ps) if n_semis == 2 && peek(ps) == K"NewlineWs" # Line continuation # [a b ;; \n \n c] - # TODO: Should this only consume a single newline? while peek(ps) == K"NewlineWs" bump(ps, TRIVIA_FLAG) end @@ -3073,7 +3071,7 @@ function parse_atom(ps::ParseState, check_identifiers=true) bump_trivia(ps) mark = position(ps) leading_kind = peek(ps) - # TODO: Reorder to put most likely tokens first? + # todo: Reorder to put most likely tokens first? if leading_kind == K":" # symbol/expression quote # :foo => (quote foo) diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 9dea55c151230..b60bec534688c 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -261,10 +261,15 @@ tests = [ "a().@x y" => "(macrocall (error (. (call a) (quote x))) y)" "a().@x{y}" => "(macrocall (error (. (call a) (quote x))) (braces y))" # array indexing, typed comprehension, etc + "a().@x[1]" => "(macrocall (ref (error (. (call a) (quote x))) 1))" "a[i]" => "(ref a i)" "a [i]" => "(ref a (error-t) i)" "a[i,j]" => "(ref a i j)" + "T[x y]" => "(typed_hcat T x y)" + "T[x ; y]" => "(typed_vcat T x y)" + "T[a b; c d]" => "(typed_vcat T (row a b) (row c d))" "T[x for x in xs]" => "(typed_comprehension T (generator x (= x xs)))" + ((v=v"1.8",), "T[a ; b ;; c ; d]") => "(typed_ncat-2 T (nrow-1 a b) (nrow-1 c d))" # Keyword params always use kw inside tuple in dot calls "f.(a,b)" => "(. f (tuple a b))" "f.(a=1)" => "(. f (tuple (kw a 1)))" From 925c1ef498b1d7b63f9f46a1f88728a2c6a962a1 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Tue, 1 Feb 2022 21:05:21 +1000 Subject: [PATCH 0356/1109] Cleanup some usage of SourceFile --- JuliaSyntax/src/diagnostics.jl | 20 ++++++++++---------- JuliaSyntax/src/source_files.jl | 5 +++++ JuliaSyntax/test/test_utils.jl | 13 +++++++++---- 3 files changed, 24 insertions(+), 14 deletions(-) diff --git a/JuliaSyntax/src/diagnostics.jl b/JuliaSyntax/src/diagnostics.jl index 5cbb0f83f583b..2557cd41285ae 100644 --- a/JuliaSyntax/src/diagnostics.jl +++ b/JuliaSyntax/src/diagnostics.jl @@ -51,12 +51,12 @@ function show_diagnostic(io::IO, diagnostic::Diagnostic, source::SourceFile) p = first_byte(diagnostic) q = last_byte(diagnostic) - code = source.code - if q < p || (p == q && code[p] == '\n') + text = sourcetext(source) + if q < p || (p == q && source[p] == '\n') # An empty or invisible range! We expand it symmetrically to make it # visible. - p = max(firstindex(code), prevind(code, p)) - q = min(lastindex(code), nextind(code, q)) + p = max(firstindex(text), prevind(text, p)) + q = min(lastindex(text), nextind(text, q)) end # p and q mark the start and end of the diagnostic range. For context, @@ -66,7 +66,7 @@ function show_diagnostic(io::IO, diagnostic::Diagnostic, source::SourceFile) hicol = (100,40,40) - print(io, source[a:prevind(code, p)]) + print(io, source[a:prevind(text, p)]) # There's two situations, either if b >= c # The diagnostic range is compact and we show the whole thing @@ -88,19 +88,19 @@ function show_diagnostic(io::IO, diagnostic::Diagnostic, source::SourceFile) println(io, "…") _printstyled(io, source[c:q]; color=hicol) end - print(io, source[nextind(code,q):d]) + print(io, source[nextind(text,q):d]) println(io) end -function show_diagnostics(io::IO, diagnostics::AbstractVector{Diagnostic}, code::SourceFile) +function show_diagnostics(io::IO, diagnostics::AbstractVector{Diagnostic}, source::SourceFile) for d in diagnostics - show_diagnostic(io, d, code) + show_diagnostic(io, d, source) end end -function show_diagnostics(io::IO, diagnostics::AbstractVector{Diagnostic}, code) +function show_diagnostics(io::IO, diagnostics::AbstractVector{Diagnostic}, text::AbstractString) if !isempty(diagnostics) - show_diagnostics(io, diagnostics, SourceFile(code)) + show_diagnostics(io, diagnostics, SourceFile(text)) end end diff --git a/JuliaSyntax/src/source_files.jl b/JuliaSyntax/src/source_files.jl index 673e0708b19c8..adb44b4dfb357 100644 --- a/JuliaSyntax/src/source_files.jl +++ b/JuliaSyntax/src/source_files.jl @@ -102,6 +102,11 @@ function Base.getindex(source::SourceFile, i::Int) source.code[i] end +""" + sourcetext(source::SourceFile) + +Get the full source text of a `SourceFile` as a string. +""" function sourcetext(source::SourceFile) return source.code end diff --git a/JuliaSyntax/test/test_utils.jl b/JuliaSyntax/test/test_utils.jl index 51ea1c71ee1ef..2d6757d47be5c 100644 --- a/JuliaSyntax/test/test_utils.jl +++ b/JuliaSyntax/test/test_utils.jl @@ -126,7 +126,10 @@ end Reduced the syntax (a string or SyntaxNode) from `file_content` into the minimal failing subtrees of syntax and write the results to `out`. """ -function format_reduced_tests(out::IO, file_content) +function format_reduced_tests(out::IO, file_content; filename=nothing) + if !isnothing(filename) + println(out, "# $filename") + end text = nothing try rtrees = reduce_test(file_content) @@ -138,9 +141,11 @@ function format_reduced_tests(out::IO, file_content) first = false print(out, sourcetext(rt)) end - catch + catch exc + exc isa InterruptException && rethrow() @error "Error reducing file" exception=current_exceptions() - print(out, sourcetext(file_content)) + print(out, file_content isa AbstractString ? + file_content : sourcetext(file_content)) end end @@ -158,7 +163,7 @@ function reduce_all_failures_in_path(basedir, outdir) i += 1 end open(outname, "w") do io - format_reduced_tests(io, read(filename, String)) + format_reduced_tests(io, read(filename, String), filename=filename) end end end From e1a4e9a15cccb608ccce4e9511a17702b7d6f41e Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Tue, 1 Feb 2022 21:30:08 +1000 Subject: [PATCH 0357/1109] Compat hack for same representation of quoted :true --- JuliaSyntax/src/syntax_tree.jl | 4 +++- JuliaSyntax/test/syntax_tree.jl | 3 +++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index 156d92eca47ed..a41e1bd396828 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -404,7 +404,9 @@ function _to_expr(node::SyntaxNode, iteration_spec=false) end end if headsym == :inert || (headsym == :quote && length(args) == 1 && - !(a1 = only(args); a1 isa Expr || a1 isa QuoteNode)) + !(a1 = only(args); a1 isa Expr || a1 isa QuoteNode || + a1 isa Bool # <- compat hack, Julia 1.4+ + )) return QuoteNode(only(args)) else return Expr(headsym, args...) diff --git a/JuliaSyntax/test/syntax_tree.jl b/JuliaSyntax/test/syntax_tree.jl index 66b8ebd71070d..96823ea8bec74 100644 --- a/JuliaSyntax/test/syntax_tree.jl +++ b/JuliaSyntax/test/syntax_tree.jl @@ -4,6 +4,9 @@ @test parseall(Expr, ":(a)", rule=:atom) == QuoteNode(:a) @test parseall(Expr, ":(:a)", rule=:atom) == Expr(:quote, QuoteNode(:a)) @test parseall(Expr, ":(1+2)", rule=:atom) == Expr(:quote, Expr(:call, :+, 1, 2)) + # Compatibility hack for VERSION >= v"1.4" + # https://github.com/JuliaLang/julia/pull/34077 + @test parseall(Expr, ":true", rule=:atom) == Expr(:quote, true) end @testset "Short form function line numbers" begin From d8a32b398c08b307a5e294a5fceea828aef098e9 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Tue, 1 Feb 2022 21:36:59 +1000 Subject: [PATCH 0358/1109] Allow newlines after `where` --- JuliaSyntax/src/parser.jl | 3 +++ JuliaSyntax/test/parser.jl | 2 ++ 2 files changed, 5 insertions(+) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 17971d00afe3f..680412c62c9a1 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -967,10 +967,12 @@ function parse_where_chain(ps0::ParseState, mark) ps = ParseState(ps0, where_enabled=false) while peek(ps) == K"where" bump(ps, TRIVIA_FLAG) # where + bump_trivia(ps, skip_newlines=true) k = peek(ps) if k == K"{" m = position(ps) bump(ps, TRIVIA_FLAG) + # x where \n {T} ==> (where x T) # x where {T,S} ==> (where x T S) ckind, cflags = parse_cat(ps, K"}", ps.end_symbol) if ckind != K"vect" @@ -982,6 +984,7 @@ function parse_where_chain(ps0::ParseState, mark) emit(ps, mark, K"where") else # x where T ==> (where x T) + # x where \n T ==> (where x T) # x where T<:S ==> (where x (<: T S)) parse_comparison(ps) emit(ps, mark, K"where") diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index b60bec534688c..e3242d14e1d2c 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -204,10 +204,12 @@ tests = [ "<:(x::T)" => "(<: (:: x T))" "<: A where B" => "(<: (where A B))" # Really for parse_where + "x where \n {T}" => "(where x T)" "x where {T,S}" => "(where x T S)" "x where {T S}" => "(where x (bracescat (row T S)))" "x where {y for y in ys}" => "(where x (braces (generator y (= y ys))))" "x where T" => "(where x T)" + "x where \n T" => "(where x T)" "x where T<:S" => "(where x (<: T S))" ], JuliaSyntax.parse_unary_prefix => [ From 1e5ee7d944c8da3e296db617295552fcf9e8ef33 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Wed, 2 Feb 2022 14:02:27 +1000 Subject: [PATCH 0359/1109] Add various known broken test cases --- JuliaSyntax/README.md | 15 +++++++++++++ JuliaSyntax/test/parser.jl | 40 ++++++++++++++++++++++++++++++++++ JuliaSyntax/test/test_utils.jl | 3 ++- 3 files changed, 57 insertions(+), 1 deletion(-) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index bff46606a7cef..a5f06e68588a7 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -551,6 +551,17 @@ Here's some behaviors which seem to be bugs: dotted operators rather than a relative path. Ie, we have `import .⋆` parsing to `(import (. .⋆))` whereas it should be `(import (. . ⋆))` for consistency with the parsing of `import .A`. +* Looking back on the output disregards grouping parentheses which can lead to + odd results in some cases. For example, `f(((((x=1)))))` parses as a keyword + call to function `f` with the keyword `x=1`, but arguably it should be an + assignment. +* Hexfloat literals can have a trailing `f` for example, `0x1p1f` + but this doesn't do anything. In the `flisp` C code such cases are treated as + Float32 literals and this was intentional https://github.com/JuliaLang/julia/pull/2925 + but this has never been officially supported in Julia. It seems this bug + arises from `(set! pred char-hex?)` in `parse-number` accepting hex exponent + digits, all of which are detected as invalid except for a trailing `f` when + processed by `isnumtok_base`. ## Parsing / AST oddities and warts @@ -661,3 +672,7 @@ xy * When lexing raw strings, more than two backslashes are treated strangely at the end of the string: `raw"\\\\ "` contains four backslashes, whereas `raw"\\\\"` contains only two. + +* In braces after macrocall, `@S{a b}` is invalid but both `@S{a,b}` and + `@S {a b}` parse. Conversely, `@S[a b]` parses. + diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index e3242d14e1d2c..b61ff489ffbea 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -654,6 +654,36 @@ tests = [ ], ] +# Known bugs +broken_tests = [ + JuliaSyntax.parse_atom => [ + # Triple-quoted string processing + "\"\"\"\n\$x\"\"\"" => "(string x)" + # Operator-named macros with and without spaces + "@! x" => "(macrocall @! x)" + "@.. x" => "(macrocall @.. x)" + "@!x" => "(macrocall @! x)" + "@..x" => "(macrocall @.. x)" + "@.x" => "(macrocall @__dot__ x)" + # Invalid numeric literals + "0b12" => "(error \"0b12\")" + "0xex" => "(error \"0xex\")" + # Square brackets without space in macrocall + "@S[a,b]" => "(macrocall S (vect a b))" + "@S[a b]" => "(macrocall S (hcat a b))" + "@S[a; b]" => "(macrocall S (vcat a b))" + "@S[a; b ;; c; d]" => "(macrocall S (ncat-2 (nrow-1 a b) (nrow-1 c d)))" + ] + JuliaSyntax.parse_call => [ + # kw's in ref + "x[i=y]" => "(ref x (kw i y))" + ] + JuliaSyntax.parse_juxtapose => [ + # Want: "numeric constant \"10.\" cannot be implicitly multiplied because it ends with \".\"" + "10.x" => "(error (call * 10.0 x))" + ] +] + @testset "Inline test cases" begin @testset "$production" for (production, test_specs) in tests @testset "$(repr(input))" for (input,output) in test_specs @@ -665,6 +695,16 @@ tests = [ @test test_parse(production, input; opts...) == output end end + @testset "Broken $production" for (production, test_specs) in broken_tests + @testset "$(repr(input))" for (input,output) in test_specs + if !(input isa AbstractString) + opts,input = input + else + opts = NamedTuple() + end + @test_broken test_parse(production, input; opts...) == output + end + end end @testset "Unicode normalization in tree conversion" begin diff --git a/JuliaSyntax/test/test_utils.jl b/JuliaSyntax/test/test_utils.jl index 2d6757d47be5c..6c6ef83d5981e 100644 --- a/JuliaSyntax/test/test_utils.jl +++ b/JuliaSyntax/test/test_utils.jl @@ -48,7 +48,8 @@ end function find_source_in_path(basedir) src_list = String[] for (root, dirs, files) in walkdir(basedir) - append!(src_list, (joinpath(root, f) for f in files if endswith(f, ".jl"))) + append!(src_list, (joinpath(root, f) for f in files + if endswith(f, ".jl") && isfile(joinpath(root,f)))) end src_list end From 6c29a4dd110d41bf7c6637fd3884b58353601870 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Wed, 2 Feb 2022 13:52:55 +1000 Subject: [PATCH 0360/1109] Fixes for var"" syntax - raw mode unescaping and empty string --- JuliaSyntax/src/parse_stream.jl | 2 +- JuliaSyntax/src/parser.jl | 8 +++++++- JuliaSyntax/src/syntax_tree.jl | 7 ++++++- JuliaSyntax/test/parser.jl | 5 +++++ 4 files changed, 19 insertions(+), 3 deletions(-) diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index 8b1779a259916..29ec982c9082b 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -11,7 +11,7 @@ const INFIX_FLAG = RawFlags(1<<1) const DOTOP_FLAG = RawFlags(1<<2) # Set when kind == K"String" was triple-delimited as with """ or ``` const TRIPLE_STRING_FLAG = RawFlags(1<<3) -# Set when the string is "raw" and needs minimal unescaping +# Set when a string or identifier needs "raw string" unescaping const RAW_STRING_FLAG = RawFlags(1<<4) # try-finally-catch const TRY_CATCH_AFTER_FINALLY_FLAG = RawFlags(1<<5) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 680412c62c9a1..c304f054811f0 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -3038,7 +3038,8 @@ function parse_raw_string(ps::ParseState; remap_kind=K"Nothing") if peek(ps) in KSet`String CmdString` bump(ps, flags; remap_kind=remap_kind) else - outk = delim_k in KSet`" """` ? K"String" : + outk = remap_kind != K"Nothing" ? remap_kind : + delim_k in KSet`" """` ? K"String" : delim_k in KSet`\` \`\`\`` ? K"CmdString" : internal_error("unexpected delimiter ", delim_k) bump_invisible(ps, outk, flags) @@ -3134,6 +3135,11 @@ function parse_atom(ps::ParseState, check_identifiers=true) kind(t) in KSet`" """` && !t.had_whitespace) # var"x" ==> x # var"""x""" ==> x + # Raw mode unescaping + # var"" ==> + # var"\"" ==> " + # var"\\"" ==> \" + # var"\\x" ==> \\x bump(ps, TRIVIA_FLAG) parse_raw_string(ps, remap_kind=K"Identifier") t = peek_token(ps) diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index a41e1bd396828..f276bdbcbf873 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -38,7 +38,12 @@ function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, position::In elseif k == K"Char" unescape_julia_string(val_str, false, false)[2] elseif k == K"Identifier" - Symbol(normalize_identifier(val_str)) + if has_flags(head(raw), RAW_STRING_FLAG) + s = unescape_julia_string(val_str, false, true) + Symbol(normalize_identifier(s)) + else + Symbol(normalize_identifier(val_str)) + end elseif is_keyword(k) # This should only happen for tokens nested inside errors Symbol(val_str) diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index b61ff489ffbea..a88b93ce2d588 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -553,6 +553,11 @@ tests = [ """var"x"end""" => "x (error (end))" """var"x"1""" => "x (error 1)" """var"x"y""" => "x (error y)" + # var syntax raw string unescaping + "var\"\"" => "" + "var\"\\\"\"" => "\"" + "var\"\\\\\\\"\"" => "\\\"" + "var\"\\\\x\"" => "\\\\x" # Syntactic operators "+=" => "(error +=)" ".+=" => "(error .+=)" From af6fa6ff8d56a970e248cb80ebcafddadab87cf6 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Wed, 2 Feb 2022 14:17:40 +1000 Subject: [PATCH 0361/1109] Add another known broken test in triple string processing --- JuliaSyntax/test/parser.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index a88b93ce2d588..9e20207cd7436 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -664,13 +664,14 @@ broken_tests = [ JuliaSyntax.parse_atom => [ # Triple-quoted string processing "\"\"\"\n\$x\"\"\"" => "(string x)" + "\"\"\"\$x\n\"\"\"" => "(string x \"\n\")" # Operator-named macros with and without spaces "@! x" => "(macrocall @! x)" "@.. x" => "(macrocall @.. x)" "@!x" => "(macrocall @! x)" "@..x" => "(macrocall @.. x)" "@.x" => "(macrocall @__dot__ x)" - # Invalid numeric literals + # Invalid numeric literals, not juxtaposition "0b12" => "(error \"0b12\")" "0xex" => "(error \"0xex\")" # Square brackets without space in macrocall From ab54377566df177ba9378795bdd8bbdfde83dd45 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Wed, 2 Feb 2022 19:00:00 +1000 Subject: [PATCH 0362/1109] Better high level organization for README.md Add some basic examples and bring some high level order to a document that's grown organically. --- JuliaSyntax/README.md | 974 ++++++++++++++++++---------------- JuliaSyntax/src/green_tree.jl | 11 +- JuliaSyntax/src/parser_api.jl | 9 +- 3 files changed, 534 insertions(+), 460 deletions(-) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index a5f06e68588a7..b8c22589a0d70 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -7,92 +7,170 @@ A Julia frontend, written in Julia. ## Goals * Lossless parsing of Julia code with precise source mapping -* Production quality error recovery, reporting and unit testing. -* Parser structure comprehensible to people who know Julia's - flisp-based parser. Replace the flisp frontend once bootstrapping can be - solved. +* Production quality error recovery, reporting and unit testing +* Parser structure as similar as possible to Julia's flisp-based parser * Speedy enough for interactive editing -* "Compilation as an API" to support all sorts of tooling. Not just parsing but - the whole compiler frontend: - - Parsing - - Macro expansion - - Syntax desugaring - - Scope analysis -* Try not to worry about how much work this will be 😅 - -## Parser overview - -The parsing technology is intentionally simple: it's a recursive descent parser -which closely follows the high level structure of the flisp reference parser. -This gives a lot of flexibility for the hard part: designing the data -structures and APIs for parsing. It also reduces porting bugs and is a natural -fit because the language was designed around the constraints of this kind of -parser. +* "Compilation as an API" to support all sorts of tooling +* Grow to encompass the rest of the compiler frontend: macro expansion, + desugaring and other lowering steps. -The main parser innovation is the `ParseStream` interface which provides a -stream-like I/O interface for writing the parser: -* The parser consumes a flat list of tokens as *input* -* It produces a flat list of text spans as *output* -* Diagnostics are emitted as separate text spans +### Design Opinions -Notably, the parser does not depend on or produce any concrete tree data -structure as part of the parsing phase but the output spans can be -post-processed into various tree data structures as required. This is very -similar to the design of rust-analyzer, though our output format is simpler. +* Parser implementation should be independent from tree data structures so + we have the `ParseStream` interface. +* Tree data structures should be *layered* to balance losslessness with + abstraction and generality. So we have `SyntaxNode` (an AST) layered on top + of `GreenNode` (a lossless parse tree). We might need other tree types later. +* Fancy parser generators are marginal for production compilers. We use a + boring but flexible recursive descent parser. -## Lossless syntax trees +# Examples -Our goal is to losslessly represent the source text with a tree; this may be -called a "lossless syntax tree". (We avoid the term "concrete syntax tree" -because this has traditionally been a different concept — a parse tree of the -full formal grammar for a language, including any grammar hacks required to -solve ambiguities, etc. We don't need such a formal grammar as we're writing -the parser by hand.) - -Structurally, the output of a `ParseStream`-based parser can most naturally be -assembled into a "green tree" in Roslyn (C# compiler) terminology. The most -basic properties of a green tree are: -* Every node spans a complete and contiguous range of source code bytes -* Child nodes are in the order of the source text - -Additionally, green trees are usually designed so that -* Nodes are immutable, do not point to their parents and don't know their - absolute position in the source. This means they can be cached and reused - when building the tree. -* Nodes are homogenously typed at the language level so they can be efficiently - stored and accessed, with the node type held as a "syntax kind" enumeration. - -## Syntax kinds and sum types +Here's what parsing of a small piece of code currently looks like in various +forms. We'll use the `parseall` convenience function to demonstrate, but +there's also a more flexible parsing interface with `JuliaSyntax.parse()`. -We generally track the type of syntax nodes with a syntax "kind", stored -explicitly in each node an integer tag. This effectively makes the node type a -[sum type](https://blog.waleedkhan.name/union-vs-sum-types/) in the type system -sense, but with the type tracked explicitly outside of Julia's type system. +First, a source-ordered AST with `SyntaxNode` (`call-i` in the dump here means +the `call` has the infix `-i` flag): -Managing the type explicitly brings a few benefits: -* Code and data structures for manipulating syntax nodes is always concretely - typed from the point of view of the compiler. -* We control the data layout, and can pack the kind very efficiently into very - few bits (along with any flags, as desired). -* Predicates such as `is_operator` can be extremely efficient, given that we - know the meaning of the kind's bits. -* The kind can be applied to several different tree data structures, or - manipulated by itself as needed. -* We can generate very efficient pattern matching code. +```julia +julia> parseall(SyntaxNode, "(x + y)*z", filename="foo.jl") +line:col│ byte_range │ tree │ file_name + 1:1 │ 1:9 │[toplevel] │foo.jl + 1:1 │ 1:9 │ [call-i] + 1:2 │ 2:6 │ [call-i] + 1:2 │ 2:2 │ x + 1:4 │ 4:4 │ + + 1:6 │ 6:6 │ y + 1:8 │ 8:8 │ * + 1:9 │ 9:9 │ z +``` -There's arguably a few downsides: -* Normal Julia dispatch can't express dispatch over syntax kind. Luckily, - a pattern matching macro can provide a very elegant way of expressing such - algorithms over a non-extensible set of kinds, so this is not a big problem. -* Different node kinds could come with different data fields, but a syntax - tree must have generic fields to cater for all kinds. (Consider as an analogy - the normal Julia AST `QuoteNode` with a single field vs `Expr` with generic - `head` and `args` fields.) This could be a disadvantage for code which - processes one specific kind, but for generic code processing many kinds, - having a generic but *concrete* data layout should bring a performance - advantage. +Internally this has a full representation of all syntax trivia (whitespace and +comments) as can be seen with the more raw "green tree" representation with +`GreenNode`. Here ranges on the left are byte ranges, and `✔` flags nontrivia +tokens. Note that the parentheses are trivia in the tree representation, +despite being important for parsing. + +```julia +julia> text = "(x + y)*z" + greentree = parseall(GreenNode, text) + 1:9 │[toplevel] + 1:9 │ [call] + 1:1 │ ( + 2:6 │ [call] + 2:2 │ Identifier ✔ + 3:3 │ Whitespace + 4:4 │ + ✔ + 5:5 │ Whitespace + 6:6 │ Identifier ✔ + 7:7 │ ) + 8:8 │ * ✔ + 9:9 │ Identifier ✔ +``` + +`GreenNode` stores only byte ranges, but the token strings can be shown by +supplying the source text string: + +```julia +julia> show(stdout, MIME"text/plain"(), greentree, text) + 1:9 │[toplevel] + 1:9 │ [call] + 1:1 │ ( "(" + 2:6 │ [call] + 2:2 │ Identifier ✔ "x" + 3:3 │ Whitespace " " + 4:4 │ + ✔ "+" + 5:5 │ Whitespace " " + 6:6 │ Identifier ✔ "y" + 7:7 │ ) ")" + 8:8 │ * ✔ "*" + 9:9 │ Identifier ✔ "z" +``` + +Julia `Expr` can also be produced: + +```julia +julia> parseall(Expr, "(x + y)*z") +:($(Expr(:toplevel, :((x + y) * z)))) +``` + +# Parser implementation + +Our goal is to losslessly represent the source text with a tree; this may be +called a "lossless syntax tree". (This is sometimes called a "concrete syntax +tree", but that term has also been used for the parse tree of the full formal +grammar for a language including any grammar hacks required to solve +ambiguities, etc. So we avoid this term.) + +`JuliaSyntax` uses use a mostly recursive descent parser which closely +follows the high level structure of the flisp reference parser. This makes the +code familiar and reduces porting bugs. It also gives a lot of flexibility for +designing the diagnostics, tree data structures, compatibility with different +Julia versions, etc. I didn't choose a parser generator as they still seem +marginal for production compilers — for the parsing itself they don't seem +*greatly* more expressive and they can be less flexible for the important +"auxiliary" code which needs to be written in either case. + +### Lexing + +We use a version of [Tokenize.jl](https://github.com/JuliaLang/Tokenize.jl) +which has been modified to better match the needs of parsing: +* Newline-containing whitespace is emitted as a separate kind +* Tokens inside string interpolations are emitted separately from the string +* Strings delimiters are separate tokens and the `String` kind +* Additional contextural keywords (`as`, `var`, `doc`) have been added and + moved to a subcategory of keywords. +* Nonterminal kinds were added (though these should probably be factored out again) +* Various bugs fixed and additions for newer Julia versions + +This copy of Tokenize lives in the `JuliaSyntax` source tree due to the volume +of changes required but once the churn settles down it would be good to figure +out how to un-fork the lexer in some way or other. + +### Parsing with ParseStream -## Representing erroneous source code +The main parser innovation is the `ParseStream` interface which provides a +stream-like I/O interface for writing the parser. The parser does not +depend on or produce any concrete tree data structure as part of the parsing +phase but the output spans can be post-processed into various tree data +structures as required. This is like the design of rust-analyzer though with a +simpler implementation. + +Parsing proceeds by recursive descent; + +* The parser consumes a flat list of lexed tokens as *input* using `peek()` to + examine tokens and `bump()` to consume them. +* The parser produces a flat list of text spans as *output* using `bump()` to + transfer tokens to the output and `position()`/`emit()` for nonterminal ranges. +* Diagnostics are emitted as separate text span +* Whitespace and comments are automatically `bump()`ed, with the exception of + syntactically relevant newlines in space sensitive mode. +* Parser modes are passed down the call tree using `ParseState`. + +The output spans track the byte range, a syntax "kind" stored as an integer +tag, and some flags. The kind tag makes the spans a [sum +type](https://blog.waleedkhan.name/union-vs-sum-types/) but where the type is +tracked explicitly outside of Julia's type system. + +For lossless parsing the output spans must cover the entire input text. Using +`bump()`, `position()` and `emit()` in a natural way also ensures that: +* Spans are cleanly nested with children contained entirely within their parents +* Siblings spans are emitted in source order +* Parent spans are emitted after all their children. + +These properties make the output spans naturally isomorphic to a +["green tree"](https://ericlippert.com/2012/06/08/red-green-trees/) +in the terminology of C#'s Roslyn compiler. + +### Tree construction + +The `build_tree` function performs a depth-first traversal of the `ParseStream` +output spans allowing it to be assembled into a concrete tree data structure, +for example using the `GreenNode` data type. We further build on top of this to +define `build_tree` for the AST type `SyntaxNode` and for normal Julia `Expr`. + +### Error recovery The goal of the parser is to produce well-formed heirarchical structure from the source text. For interactive tools we need this to work even when the @@ -118,266 +196,235 @@ of error nodes: We want to encode both these cases in a way which is simplest for downstream tools to use. This is an open question, but for now we use `K"error"` as the -node head, with the `TRIVIA_FLAG` set for unexpected syntax. +kind, with the `TRIVIA_FLAG` set for unexpected syntax. -# Prototyping approach +### More about syntax kinds -The tree datastructure design here is hard: +We generally track the type of syntax nodes with a syntax "kind", stored +explicitly in each node an integer tag. This effectively makes the node type a +[sum type](https://blog.waleedkhan.name/union-vs-sum-types/) in the type system +sense, but with the type tracked explicitly outside of Julia's type system. -1. The symbolic part of compilation (the compiler frontend) incrementally - abstracts the source text, but errors along the way should refer back to the - source. - - The tree must be a lossless representation of the source text - - Some aspects of the source text (comments, most whitespace) are irrelevant - to parsing. - - More aspects of the source text are irrelevant after we have an abstract - syntax tree of the surface syntax. Some good examples here are the - parentheses in `2*(x + y)` and the explicit vs implicit multiplication - symbol in `2*x` vs `2x`. +Managing the type explicitly brings a few benefits: +* Code and data structures for manipulating syntax nodes is always concretely + typed from the point of view of the compiler. +* We control the data layout and can pack the kind into very few bits along + with other flags bits, as desired. +* Predicates such as `is_operator` can be extremely efficient, given that we + know the meaning of the kind's bits. +* The kind can be applied to several different tree data structures, or + manipulated by itself. +* Pattern matching code is efficient when the full set of kinds is closed and + known during compilation. -2. There's various type of *analyses* -- There's many useful ways to augment, a syntax tree depending on use case. -- Analysis algorithms should be able to act on any tree type, ignoring - but carrying augmentations which they don't know about. +There's arguably a few downsides: +* Normal Julia dispatch can't express dispatch over syntax kind. Luckily, + a pattern matching macro can provide a very elegant way of expressing such + algorithms over a non-extensible set of kinds, so this is not a big problem. +* Different node kinds could come with different data fields, but a syntax + tree must have generic fields to cater for all kinds. (Consider as an analogy + the normal Julia AST `QuoteNode` with a single field vs `Expr` with generic + `head` and `args` fields.) This could be a disadvantage for code which + processes one specific kind but for generic code processing many kinds + having a generic but *concrete* data layout should be faster. -Let's tackle it by prototyping several important work flows: +# Differences from the flisp parser -* Syntax transformations - - Choose some macros to implement. This is a basic test of mixing source - trees from different files while preserving precise source locations. -* Formatting - - Re-indent a file. This tests the handling of syntax trivia. -* Refactoring - - A pass to rename local variables. This tests how information from further - down the compilation pipeline can be attached to the syntax tree and used - to modify the source code. -* Precise error reporting in lowering - - Syntax desugaring `[a, b] = (c, d)` should report "invalid assignment - location `[a, b]`". But at a precise source location. - - Try something several layers deeper inside lowering? For example "macro - definition not allowed inside a local scope" -* Incremental reparsing - - Reparse a source file, given a byte range replacement +Practically the flisp parser is not quite a classic [recursive descent +parser](https://en.wikipedia.org/wiki/Recursive_descent_parser), because it +often looks back and modifies the output tree it has already produced. We've +tried to eliminate this pattern it favor of lookahead where possible because +* It works poorly when the parser is emitting a stream of node spans with + strict source ordering constraints. +* It's confusing to reason about this kind of code -## Tree design +However, on occasion it seems to solve genuine ambiguities where Julia code +can't be parsed top-down with finite lookahead. Eg for the `kw` vs `=` +ambiguity within parentheses. In these cases we put up with using the +functions `look_behind` and `reset_node!()`. -### Raw syntax tree / Green tree +## Code structure -Raw syntax tree (or "Green tree" in the terminology from Roslyn) +Large structural changes were generally avoided while porting. In particular, +nearly all function names for parsing productions are the same with `-` +replaced by `_` and predicates prefixed by `is_`. -We want GreenNode to be -* *structurally minimal* — For efficiency and generality -* *immutable* — For efficiency (& thread safety?) -* *complete* — To preserve parser knowledge -* *token agnostic* — To allow use with any source language +Some notable differences: -``` -for i = 1:10 - a + 2 - # hi - c - #= hey - ho =# -end -``` +* `parse-arglist` and a parts of `parse-paren-` have been combined into a + general function `parse_brackets`. This function deals with all the odd + corner cases of how the AST is emitted when mixing `,` and `;` within + parentheses. In particular regard to: + - Determining whether `;` are block syntax separators or keyword parameters + - Determining whether to emit `parameter` sections based on context + - Emitting key-value pairs either as `kw` or `=` depending on context +* The way that `parse-resword` is entered has been rearranged to avoid parsing + reserved words with `parse-atom` inside `parse-unary-prefix`. Instead, we + detect reserved words and enter `parse_resword` earlier. -The simplest idea possible is to have: -* Leaf nodes are a single token -* Children are in source order +## Flisp parser bugs -``` -- - trivia -I - identifier -L - literal - -[for] - - "for" - - " " - [=] - I "i" - - " " - - "=" - - " " - [call] - I "1" - - ":" - L "10" - - "\n " - [call] - I "a" - - " " - I "+" - - " " - L "2" - - "\n " - - "# hi" - - "\n " - I "c" - - "\n " - - #= hey\n ho =#' - - "\n" - - "end" -``` +Here's some behaviors which seem to be bugs. (Some of these we replicate in the +name of compatibility, perhaps with a warning.) -Call represents a challange for the AST vs Green tree in terms of node -placement / iteration for infix operators vs normal prefix function calls. +* Macro module paths allow calls which gives weird stateful semantics! + ``` + b() = rand() > 0.5 ? Base : Core + b().@info "hi" + ``` +* Misplaced `@` in macro module paths like `A.@B.x` is parsed as odd + broken-looking AST like `(macrocall (. A (quote (. B @x))))`. It should + probably be rejected. +* Operator prefix call syntax doesn't work in the cases like `+(a;b,c)` where + parameters are separated by commas. A tuple is produced instead. +* `const` and `global` allow chained assignment, but the right hand side is not + constant. `a` const here but not `b`. + ``` + const a = b = 1 + ``` +* Parsing the `ncat` array concatenation syntax within braces gives + strange AST: `{a ;; b}` parses to `(bracescat 2 a b)` which is the same as + `{2 ; a ; b}`, but should probably be `(bracescat (nrow 2 a b))` in analogy + to how `{a b}` produces `(bracescat (row a b))`. +* `export a, \n $b` is rejected, but `export a, \n b` parses fine. +* In try-catch-finally, the `finally` clause is allowed before the `catch`, but + always executes afterward. (Presumably was this a mistake? It seems pretty awful!) +* When parsing `"[x \n\n ]"` the flisp parser gets confused, but `"[x \n ]"` is + correctly parsed as `Expr(:vect)` +* `f(x for x in in xs)` is accepted, and parsed very strangely. +* Octal escape sequences saturate rather than being reported as errors. Eg, + `"\777"` results in `"\xff"`. This is inconsistent with + `Base.parse(::Type{Int}, ...)` +* Leading dots in import paths with operator-named modules are parsed into + dotted operators rather than a relative path. Ie, we have `import .⋆` parsing + to `(import (. .⋆))` whereas it should be `(import (. . ⋆))` for consistency + with the parsing of `import .A`. +* Looking back on the output disregards grouping parentheses which can lead to + odd results in some cases. For example, `f(((((x=1)))))` parses as a keyword + call to function `f` with the keyword `x=1`, but arguably it should be an + assignment. +* Hexfloat literals can have a trailing `f` for example, `0x1p1f` + but this doesn't do anything. In the `flisp` C code such cases are treated as + Float32 literals and this was intentional https://github.com/JuliaLang/julia/pull/2925 + but this has never been officially supported in Julia. It seems this bug + arises from `(set! pred char-hex?)` in `parse-number` accepting hex exponent + digits, all of which are detected as invalid except for a trailing `f` when + processed by `isnumtok_base`. -- The normal problem of `a + 1` vs `+(a, 1)` -- Or worse, `a + 1 + 2` vs `+(a, 1, 2)` +## Parsing / AST oddities and warts -Clearly in the AST's *interface* we need to abstract over this placement. For -example with something like the normal Julia AST's iteration order. +### Questionable allowed forms -### Abstract syntax tree +There's various allowed syntaxes which are fairly easily detected in the +parser, but which will be rejected later during lowering. To allow building +DSLs this is fine and good but some such allowed syntaxes don't seem very +useful, even for DSLs: -By pointing to green tree nodes, AST nodes become tracable back to the original -source. +* `macro (x) end` is allowed but there are no anonymous macros. +* `abstract type A < B end` and other subtypes comparisons are allowed, but + only `A <: B` makes sense. +* `x where {S T}` produces `(where x (bracescat (row S T)))` -Unlike most languages, designing a new AST is tricky because the existing -`Expr` is a very public API used in every macro expansion. User-defined -macro expansions interpose between the source text and lowering, and using -`Expr` looses source information in many ways. +### `kw` and `=` inconsistencies -There seems to be a few ways forward: -* Maybe we can give `Expr` some new semi-hidden fields to point back to the - green tree nodes that the `Expr` or its `args` list came from? -* We can use the existing `Expr` during macro expansion and try to recover - source information after macro expansion using heuristics. Likely the - presence of correct hygiene can help with this. -* Introducing a new AST would be possible if it were opt-in for new-style - macros only. Fixing hygiene should go along with this. Design challenge: How - do we make manipulating expressions reasonable when literals need to carry - source location? +There's many apparent inconsistencies between how `kw` and `=` are used when +parsing `key=val` pairs inside parentheses. -One option which may help bridge between locationless ASTs and something new -may be to have wrappers for the small number of literal types we need to cover. -For example: +* Inconsistent parsing of tuple keyword args inside vs outside of dot calls + ```julia + (a=1,) # (tuple (= a 1)) + f.(a=1) # (tuple (kw a 1)) + ``` +* Mixtures of `,` and `;` in calls give nested parameter AST which parses + strangely, and is kind-of-horrible to use. + ```julia + # (tuple (parameters (parameters e f) c d) a b) + (a,b; c,d; e,f) + ``` +* Long-form anonymous functions have argument lists which are parsed + as tuples (or blocks!) rather than argument lists and this mess appears to be + papered over as part of lowering. For example, in `function (a;b) end` the + `(a;b)` is parsed as a block! This leads to more inconsistency in the use of + `kw` for keywords. -```julia -SourceSymbol <: AbstractSymbol -SourceInt <: Integer -SourceString <: AbstractString -``` -Having source location attached to symbols would potentially solve most of the -hygine problem. There's still the problem of macro helper functions which use -symbol literals; we can't very well be changing the meaning of `:x`! Perhaps -the trick there is to try capturing the current module at the location of the -interpolation syntax. Eg, if you do `:(y + $x)`, lowering expands this to -`Core._expr(:call, :+, :y, x)`, but it could expand it to something like -`Core._expr(:call, :+, :y, _add_source_symbol(_module_we_are_lowering_into, x))`? +### Flattened generators -## Parsing +Flattened generators are uniquely problematic because the Julia AST doesn't +respect a key rule we normally expect: that the children of an AST node are a +*contiguous* range in the source text. This is because the `for`s in +`[xy for x in xs for y in ys]` are parsed in the normal order of a for loop as -### Error recovery +``` +for x in xs +for y in ys + push!(xy, collection) +``` -Some disorganized musings about error recovery +and the standard Julia AST is like this: -Different types of errors seem to occur... +``` +(flatten + (generator + (generator + xy + (= y ys)) + (= x xs))) +``` -* Disallowed syntax (such as lack of spaces in conditional expressions) - where we can reasonably just continue parsing the production and emit the - node with an error flag which is otherwise fully formed. In some cases like - parsing infix expressions with a missing tail, emitting a zero width error - token can lead to a fully formed parse tree without the productions up the - stack needing to participate in recovery. -* A token which is disallowed in current context. Eg, `=` in parse_atom, or a - closing token inside an infix expression. Here we can emit a `K"error"`, but - we can't descend further into the parse tree; we must pop several recursive - frames off. Seems tricky! - -A typical structure is as follows: - -```julia -function parse_foo(ps) - mark = position(ps) - parse_bar(ps) # What if this fails? - if peek(ps) == K"some-token" - bump(ps) - parse_baz(ps) # What if this fails? - emit(ps, mark, K"foo") - end -end -``` - -Emitting plain error tokens are good in unfinished infix expressions: +however, note that if this tree were flattened, the order of tokens would be +`(xy) (y in ys) (x in xs)` which is *not* the source order. So in this case +our green tree must deviate from the Julia AST. The natural representation +seems to be to flatten the generators: -```julia -begin - a = x + -end ``` - -The "missing end" problem is tricky, as the intermediate syntax is valid; the -problem is often only obvious until we get to EOF. - -Missing end -```julia -function f() - begin - a = 10 -end - -# <-- Indentation would be wrong if g() was an inner function of f. -function g() -end +(flatten + xy + (= x xs) + (= y ys)) ``` -It seems like ideal error recorvery would need to backtrack in this case. For -example: - -- Pop back to the frame which was parsing `f()` -- Backtrack through the parse events until we find a function with indentation - mismatched to the nesting of the parent. -- Reset ParseStream to a parsing checkpoint before `g()` was called -- Emit error and exit the function parsing `f()` -- Restart parsing -- Somehow make sure all of this can't result in infinite recursion 😅 - -For this kind of recovery it sure would be good if we could reify the program -stack into a parser state object... +### Other oddities -Missing commas or closing brackets in nested structures also present the -existing parser with a problem. +* Operators with sufficies don't seem to always be parsed consistently as the + same operator without a suffix. Unclear whether this is by design or mistake. + For example, `[x +y] ==> (hcat x (+ y))`, but `[x +₁y] ==> (hcat (call +₁ x y))` -```julia -f(a, - g(b, - c # -- missing comma? - d), - e) -``` +* `global const x=1` is normalized by the parser into `(const (global (= x 1)))`. + I suppose this is somewhat useful for AST consumers, but it seems a bit weird + and unnecessary. -Again the local indentation might tell a story +* `let` bindings might be stored in a block, or they might not be, depending on + special cases: + ``` + # Special cases not in a block + let x=1 ; end ==> (let (= x 1) (block)) + let x::1 ; end ==> (let (:: x 1) (block)) + let x ; end ==> (let x (block)) -```julia -f(a, - g(b, - c # -- missing closing `)` ? - d) -``` + # In a block + let x=1,y=2 ; end ==> (let (block (= x 1) (= y 2) (block))) + let x+=1 ; end ==> (let (block (+= x 1)) (block)) + ``` -But not always! +* The `elseif` condition is always in a block but not the `if` condition. + Presumably because of the need to add a line number node in the flisp parser + `if a xx elseif b yy end ==> (if a (block xx) (elseif (block b) (block yy)))` -```julia -f(a, - g(b, - c # -- missing closing `)` ? - d) -``` +* Spaces are alloweed between import dots — `import . .A` is allowed, and + parsed the same as `import ..A` -## Fun research questions +* `import A..` produces `(import (. A .))` which is arguably nonsensical, as `.` + can't be a normal identifier. -* Given source and syntax tree, can we regress/learn a generative model of - indentiation from the syntax tree? Source formatting involves a big pile of - heuristics to get something which "looks nice"... and ML systems have become - very good at heuristics. Also, we've got huge piles of traininig data — just - choose some high quality, tastefully hand-formatted libraries. +* When lexing raw strings, more than two backslashes are treated strangely at + the end of the string: `raw"\\\\ "` contains four backslashes, whereas + `raw"\\\\"` contains only two. -* Similarly, can we learn fast and reasonably accurate recovery heuristics for - when the parser encounters broken syntax rather than hand-coding these? +* In braces after macrocall, `@S{a b}` is invalid but both `@S{a,b}` and + `@S {a b}` parse. Conversely, `@S[a b]` parses. # Resources @@ -478,201 +525,226 @@ Some resources: * Some notes about stateful lexers for parsing shell-like string interpolations: http://www.oilshell.org/blog/2017/12/17.html -# Parser devdocs -# Differences from the flisp parser +# Design notes -Practically the flisp parser is not quite a classic [recursive descent -parser](https://en.wikipedia.org/wiki/Recursive_descent_parser), because it -often looks back and modifies the output tree it has already produced. We've -tried to eliminate this pattern it favor of lookahead where possible because +The following are some fairly disorganized design notes covering a mixture of +things which have already been done and musings about further work. -* It works poorly when the parser is emitting a stream of node spans rather - than eagerly creating a tree data structure. -* It's confusing to reason about this kind of code +## Prototyping approach -However, on occasion it seems to solve genuine ambiguities where Julia code -can't be parsed top-down with finite lookahead. Eg for the `kw` vs `=` -ambiguity within parentheses. In these cases we put up with using the -functions `look_behind` and `reset_node!()`. +The tree datastructure design here is tricky: -## Code structure +1. The symbolic part of compilation (the compiler frontend) incrementally + abstracts and transforms the source text, but errors along the way should + refer back to the source. + - The tree must be a lossless representation of the source text + - Some aspects of the source text (comments, most whitespace) are irrelevant + to parsing. + - More aspects of the source text are irrelevant after we have an abstract + syntax tree of the surface syntax. Some good examples here are the + parentheses in `2*(x + y)` and the explicit vs implicit multiplication + symbol in `2*x` vs `2x`. -Large structural changes were generally avoided while porting. In particular, -nearly all function names for parsing productions are the same with `-` -replaced by `_` and predicates prefixed by `is_`. +2. There's various type of *analyses* +- There's many useful ways to augment a syntax tree depending on use case. +- Analysis algorithms should be able to act on any tree type, ignoring + but carrying augmentations which they don't know about. -Some notable differences: +Having so many use cases suggests it might be best to have several different +tree types with a common interface rather than one main abstract syntax tree +type. But it seems useful to figure this out by prototyping several important +work flows: -* `parse-arglist` and a parts of `parse-paren-` have been combined into a - general function `parse_brackets`. This function deals with all the odd - corner cases of how the AST is emitted when mixing `,` and `;` within - parentheses. In particular regard to: - - Determining whether `;` are block syntax separators or keyword parameters - - Determining whether to emit `parameter` sections based on context - - Emitting key-value pairs either as `kw` or `=` depending on context -* The way that `parse-resword` is entered has been rearranged to avoid parsing - reserved words with `parse-atom` inside `parse-unary-prefix`. Instead, we - detect reserved words and enter `parse_resword` earlier. +* Syntax transformations + - Choose some macros to implement. This is a basic test of mixing source + trees from different files while preserving precise source locations. +* Formatting + - Re-indent a file. This tests the handling of syntax trivia. +* Refactoring + - A pass to rename local variables. This tests how information from further + down the compilation pipeline can be attached to the syntax tree and used + to modify the source code. +* Precise error reporting in lowering + - Syntax desugaring `[a, b] = (c, d)` should report "invalid assignment + location `[a, b]`". But at a precise source location. + - Try something several layers deeper inside lowering? For example "macro + definition not allowed inside a local scope" +* Incremental reparsing + - Reparse a source file, given a byte range replacement -## Flisp parser bugs -Here's some behaviors which seem to be bugs: +## Tree design -* Macro module paths allow calls which gives weird stateful semantics! - ``` - b() = rand() > 0.5 ? Base : Core - b().@info "hi" - ``` -* Misplaced `@` in macro module paths like `A.@B.x` is parsed as odd - broken-looking AST like `(macrocall (. A (quote (. B @x))))`. It should - probably be rejected. -* Operator prefix call syntax doesn't work in the cases like `+(a;b,c)` where - parameters are separated by commas. A tuple is produced instead. -* `const` and `global` allow chained assignment, but the right hand side is not - constant. `a` const here but not `b`. - ``` - const a = b = 1 - ``` -* Parsing the `ncat` array concatenation syntax within braces gives - strange AST: `{a ;; b}` parses to `(bracescat 2 a b)` which is the same as - `{2 ; a ; b}`, but should probably be `(bracescat (nrow 2 a b))` in analogy - to how `{a b}` produces `(bracescat (row a b))`. -* `export a, \n $b` is rejected, but `export a, \n b` parses fine. -* In try-catch-finally, the `finally` clause is allowed before the `catch`, but - always executes afterward. (Presumably was this a mistake? It seems pretty awful!) -* When parsing `"[x \n\n ]"` the flisp parser gets confused, but `"[x \n ]"` is - correctly parsed as `Expr(:vect)` -* `f(x for x in in xs)` is accepted, and parsed very strangely. -* Octal escape sequences saturate rather than being reported as errors. Eg, - `"\777"` results in `"\xff"`. This is inconsistent with - `Base.parse(::Type{Int}, ...)` -* Leading dots in import paths with operator-named modules are parsed into - dotted operators rather than a relative path. Ie, we have `import .⋆` parsing - to `(import (. .⋆))` whereas it should be `(import (. . ⋆))` for consistency - with the parsing of `import .A`. -* Looking back on the output disregards grouping parentheses which can lead to - odd results in some cases. For example, `f(((((x=1)))))` parses as a keyword - call to function `f` with the keyword `x=1`, but arguably it should be an - assignment. -* Hexfloat literals can have a trailing `f` for example, `0x1p1f` - but this doesn't do anything. In the `flisp` C code such cases are treated as - Float32 literals and this was intentional https://github.com/JuliaLang/julia/pull/2925 - but this has never been officially supported in Julia. It seems this bug - arises from `(set! pred char-hex?)` in `parse-number` accepting hex exponent - digits, all of which are detected as invalid except for a trailing `f` when - processed by `isnumtok_base`. +### Raw syntax tree / Green tree -## Parsing / AST oddities and warts +Raw syntax tree (or "Green tree" in the terminology from Roslyn) -### Questionable allowed forms +We want GreenNode to be +* *structurally minimal* — For efficiency and generality +* *immutable* — For efficiency (& thread safety) +* *complete* — To preserve parser knowledge +* *token agnostic* — To allow use with any source language -There's various allowed syntaxes which are fairly easily detected in the -parser, but which will be rejected later during lowering. To allow building -DSLs this is fine and good but some such allowed syntaxes don't seem very -useful, even for DSLs: +The simplest idea possible is to have: +* Leaf nodes are a single token +* Children are in source order -* `macro (x) end` is allowed but there are no anonymous macros. -* `abstract type A < B end` and other subtypes comparisons are allowed, but - only `A <: B` makes sense. -* `x where {S T}` produces `(where x (bracescat (row S T)))` -### `kw` and `=` inconsistencies +Call represents a challange for the AST vs Green tree in terms of node +placement / iteration for infix operators vs normal prefix function calls. -There's many apparent inconsistencies between how `kw` and `=` are used when -parsing `key=val` pairs inside parentheses. +- The normal problem of `a + 1` vs `+(a, 1)` +- Or worse, `a + 1 + 2` vs `+(a, 1, 2)` -* Inconsistent parsing of tuple keyword args inside vs outside of dot calls - ```julia - (a=1,) # (tuple (= a 1)) - f.(a=1) # (tuple (kw a 1)) - ``` -* Mixtures of `,` and `;` in calls give nested parameter AST which parses - strangely, and is kind-of-horrible to use. - ```julia - # (tuple (parameters (parameters e f) c d) a b) - (a,b; c,d; e,f) - ``` -* Long-form anonymous functions have argument lists which are parsed - as tuples (or blocks!) rather than argument lists and this mess appears to be - papered over as part of lowering. For example, in `function (a;b) end` the - `(a;b)` is parsed as a block! This leads to more inconsistency in the use of - `kw` for keywords. +Clearly in the AST's *interface* we need to abstract over this placement. For +example with something like the normal Julia AST's iteration order. +### Abstract syntax tree -### Flattened generators +By pointing to green tree nodes, AST nodes become tracable back to the original +source. -Flattened generators are uniquely problematic because the Julia AST doesn't -respect a key rule we normally expect: that the children of an AST node are a -*contiguous* range in the source text. This is because the `for`s in -`[xy for x in xs for y in ys]` are parsed in the normal order of a for loop as +Unlike most languages, designing a new AST is tricky because the existing +`Expr` is a very public API used in every macro expansion. User-defined +macro expansions interpose between the source text and lowering, and using +`Expr` looses source information in many ways. -``` -for x in xs -for y in ys - push!(xy, collection) -``` +There seems to be a few ways forward: +* Maybe we can give `Expr` some new semi-hidden fields to point back to the + green tree nodes that the `Expr` or its `args` list came from? +* We can use the existing `Expr` during macro expansion and try to recover + source information after macro expansion using heuristics. Likely the + presence of correct hygiene can help with this. +* Introducing a new AST would be possible if it were opt-in for new-style + macros only. Fixing hygiene should go along with this. Design challenge: How + do we make manipulating expressions reasonable when literals need to carry + source location? -and the standard Julia AST is like this: +One option which may help bridge between locationless ASTs and something new +may be to have wrappers for the small number of literal types we need to cover. +For example: +```julia +SourceSymbol <: AbstractSymbol +SourceInt <: Integer +SourceString <: AbstractString ``` -(flatten -(generator -(generator - xy - (= y ys)) -(= x xs)) + +Having source location attached to symbols would potentially solve most of the +hygine problem. There's still the problem of macro helper functions which use +symbol literals; we can't very well be changing the meaning of `:x`! Perhaps +the trick there is to try capturing the current module at the location of the +interpolation syntax. Eg, if you do `:(y + $x)`, lowering expands this to +`Core._expr(:call, :+, :y, x)`, but it could expand it to something like +`Core._expr(:call, :+, :y, _add_source_symbol(_module_we_are_lowering_into, x))`? + +## Parsing + +### Error recovery + +Some disorganized musings about error recovery + +Different types of errors seem to occur... + +* Disallowed syntax (such as lack of spaces in conditional expressions) + where we can reasonably just continue parsing the production and emit the + node with an error flag which is otherwise fully formed. In some cases like + parsing infix expressions with a missing tail, emitting a zero width error + token can lead to a fully formed parse tree without the productions up the + stack needing to participate in recovery. +* A token which is disallowed in current context. Eg, `=` in parse_atom, or a + closing token inside an infix expression. Here we can emit a `K"error"`, but + we can't descend further into the parse tree; we must pop several recursive + frames off. Seems tricky! + +A typical structure is as follows: + +```julia +function parse_foo(ps) + mark = position(ps) + parse_bar(ps) # What if this fails? + if peek(ps) == K"some-token" + bump(ps) + parse_baz(ps) # What if this fails? + emit(ps, mark, K"foo") + end +end ``` -however, note that if this tree were flattened, the order of tokens would be -`(xy) (y in ys) (x in xs)` which is *not* the source order. So in this case -our green tree must deviate from the Julia AST. The natural representation -seems to be to flatten the generators: +Emitting plain error tokens are good in unfinished infix expressions: +```julia +begin + a = x + +end ``` -(flatten -xy -(= x xs) -(= y ys)) + +The "missing end" problem is tricky, as the intermediate syntax is valid; the +problem is often only obvious until we get to EOF. + +Missing end +```julia +function f() + begin + a = 10 +end + +# <-- Indentation would be wrong if g() was an inner function of f. +function g() +end ``` -### Other oddities +It seems like ideal error recorvery would need to backtrack in this case. For +example: -* Operators with sufficies don't seem to always be parsed consistently as the - same operator without a suffix. Unclear whether this is by design or mistake. - For example, `[x +y] ==> (hcat x (+ y))`, but `[x +₁y] ==> (hcat (call +₁ x y))` +- Pop back to the frame which was parsing `f()` +- Backtrack through the parse events until we find a function with indentation + mismatched to the nesting of the parent. +- Reset ParseStream to a parsing checkpoint before `g()` was called +- Emit error and exit the function parsing `f()` +- Restart parsing +- Somehow make sure all of this can't result in infinite recursion 😅 -* `global const x=1` is normalized by the parser into `(const (global (= x 1)))`. - I suppose this is somewhat useful for AST consumers, but it seems a bit weird - and unnecessary. +For this kind of recovery it sure would be good if we could reify the program +stack into a parser state object... -* `let` bindings might be stored in a block, or they might not be, depending on - special cases: - ``` - # Special cases not in a block - let x=1 ; end ==> (let (= x 1) (block)) - let x::1 ; end ==> (let (:: x 1) (block)) - let x ; end ==> (let x (block)) +Missing commas or closing brackets in nested structures also present the +existing parser with a problem. - # In a block - let x=1,y=2 ; end ==> (let (block (= x 1) (= y 2) (block))) - let x+=1 ; end ==> (let (block (+= x 1)) (block)) - ``` +```julia +f(a, + g(b, + c # -- missing comma? + d), + e) +``` -* The `elseif` condition is always in a block but not the `if` condition. - Presumably because of the need to add a line number node in the flisp parser - `if a xx elseif b yy end ==> (if a (block xx) (elseif (block b) (block yy)))` +Again the local indentation might tell a story -* Spaces are alloweed between import dots — `import . .A` is allowed, and - parsed the same as `import ..A` +```julia +f(a, + g(b, + c # -- missing closing `)` ? + d) +``` -* `import A..` produces `(import (. A .))` which is arguably nonsensical, as `.` - can't be a normal identifier. +But not always! -* When lexing raw strings, more than two backslashes are treated strangely at - the end of the string: `raw"\\\\ "` contains four backslashes, whereas - `raw"\\\\"` contains only two. +```julia +f(a, + g(b, + c # -- missing closing `)` ? + d) +``` -* In braces after macrocall, `@S{a b}` is invalid but both `@S{a,b}` and - `@S {a b}` parse. Conversely, `@S[a b]` parses. +# Fun research questions +* Given source and syntax tree, can we regress/learn a generative model of + indentiation from the syntax tree? Source formatting involves a big pile of + heuristics to get something which "looks nice"... and ML systems have become + very good at heuristics. Also, we've got huge piles of traininig data — just + choose some high quality, tastefully hand-formatted libraries. + +* Similarly, can we learn fast and reasonably accurate recovery heuristics for + when the parser encounters broken syntax rather than hand-coding these? diff --git a/JuliaSyntax/src/green_tree.jl b/JuliaSyntax/src/green_tree.jl index f69b91939d14b..13bd4b023950d 100644 --- a/JuliaSyntax/src/green_tree.jl +++ b/JuliaSyntax/src/green_tree.jl @@ -2,16 +2,17 @@ GreenNode(head, span) GreenNode(head, children...) -A "green tree" is a lossless syntax tree which overlays all the source text and -where +A "green tree" in Roslyn (C# compiler) terminology is a lossless syntax tree +which overlays all the source text. The most basic properties of a green tree +are that: * Nodes cover a contiguous span of bytes in the text -* Node children are ordered in the same order as the text -* Nodes are immutable and don't know their absolute position, so can be cached - and reused +* Sibling nodes are ordered in the same order as the text As implementation choices, we choose that: +* Nodes are immutable and don't know their parents or absolute position, so can + be cached and reused * Nodes are homogenously typed at the language level so they can be stored concretely, with the `head` defining the node type. Normally this would include a "syntax kind" enumeration, but it can also include flags and record diff --git a/JuliaSyntax/src/parser_api.jl b/JuliaSyntax/src/parser_api.jl index f44aa74cd9b59..8fa0b4482e884 100644 --- a/JuliaSyntax/src/parser_api.jl +++ b/JuliaSyntax/src/parser_api.jl @@ -125,13 +125,14 @@ end """ - parseall(TreeType, input; + parseall(TreeType, input...; rule=:toplevel, version=VERSION, ignore_trivia=true) Experimental convenience interface to parse `input` as Julia code, emitting an -error if the entire input is not consumed. By default `parseall` will ignore +error if the entire input is not consumed. `input` can be a string or any other +valid input to the `ParseStream` constructor. By default `parseall` will ignore whitespace and comments before and after valid code but you can turn this off by setting `ignore_trivia=false`. @@ -141,7 +142,7 @@ See [`parse`](@ref) for a more complete and powerful interface to the parser, as well as a description of the `version` and `rule` keywords. """ function parseall(::Type{T}, input...; rule=:toplevel, version=VERSION, - ignore_trivia=true) where {T} + ignore_trivia=true, kws...) where {T} stream = ParseStream(input...; version=version) if ignore_trivia && rule != :toplevel bump_trivia(stream, skip_newlines=true) @@ -161,7 +162,7 @@ function parseall(::Type{T}, input...; rule=:toplevel, version=VERSION, # * It's kind of required for GreenNode, as GreenNode only records spans, # not absolute positions. # * Dropping it would be ok for SyntaxNode and Expr... - tree = build_tree(T, stream; wrap_toplevel_as_kind=K"toplevel") + tree = build_tree(T, stream; wrap_toplevel_as_kind=K"toplevel", kws...) if !isempty(stream.diagnostics) # Crudely format any warnings to the current logger. buf = IOBuffer() From 0fece8daa9ca4304f51659f083b6d50c8f4ee02b Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Wed, 2 Feb 2022 19:58:48 +1000 Subject: [PATCH 0363/1109] Remove utility function for combining parser flags This seemed like a good idea at the time but using the raw flags directly is quite ok too, no need for the extra abstraction. --- JuliaSyntax/src/parse_stream.jl | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index 29ec982c9082b..b8e15c31adf83 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -34,21 +34,6 @@ end # Return true if any of `test_flags` are set has_flags(flags::RawFlags, test_flags) = (flags & test_flags) != 0 -# Function for combining flags. (Do we want this?) -function flags(; trivia::Bool=false, - infix::Bool=false, - dotop::Bool=false, - try_catch_after_finally::Bool=false, - numeric::Int=0) - flags = RawFlags(0) - trivia && (flags |= TRIVIA_FLAG) - infix && (flags |= INFIX_FLAG) - dotop && (flags |= DOTOP_FLAG) - try_catch_after_finally && (flags |= TRY_CATCH_AFTER_FINALLY_FLAG) - numeric != 0 && (flags |= set_numeric_flags(numeric)) - return flags::RawFlags -end - #------------------------------------------------------------------------------- struct SyntaxHead kind::Kind From 0eb9d8487641b30c295087a7a60eab5f687144f6 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Thu, 3 Feb 2022 05:57:15 +1000 Subject: [PATCH 0364/1109] Make sure Julia Computing is properly attributed --- JuliaSyntax/LICENSE | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaSyntax/LICENSE b/JuliaSyntax/LICENSE index 7f98356226bb3..11212fbf53df0 100644 --- a/JuliaSyntax/LICENSE +++ b/JuliaSyntax/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2021 Chris Foster and contributors +Copyright (c) 2021 Julia Computing and contributors Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal From 065da3cbb4b4c28cf2bb15c2e2ba6f1af30f17ca Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Thu, 3 Feb 2022 08:07:22 +1000 Subject: [PATCH 0365/1109] Fix the syntax_interpolation prototype for the newer parser api --- JuliaSyntax/test/syntax_interpolation.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/JuliaSyntax/test/syntax_interpolation.jl b/JuliaSyntax/test/syntax_interpolation.jl index d7f1094e2de64..eddf6748bd423 100644 --- a/JuliaSyntax/test/syntax_interpolation.jl +++ b/JuliaSyntax/test/syntax_interpolation.jl @@ -1,6 +1,6 @@ # # Macros and expression interpolation -using JuliaSyntax: SourceFile, SyntaxNode, parse_all, child, setchild! +using JuliaSyntax: SourceFile, SyntaxNode, parseall, child, setchild! # The following shows that SyntaxNode works nicely for simple macros which # just interpolate expressions into one another. In particular it shows how @@ -31,7 +31,7 @@ function at_show2(ex::SyntaxNode) # The following emulates the expression interpolation lowering which is # usually done by the compiler. # 1. Extract the expression literal as `block` - tree = parse_all(SyntaxNode, SourceFile(String(read(@__FILE__)), filename=@__FILE__)) + tree = parseall(SyntaxNode, String(read(@__FILE__)), filename=@__FILE__) block = child(tree, 3, 2, 2, 1) # 2. Interpolate local variables into the block at positions of $'s # Interpolating a SyntaxNode `ex` is simple: @@ -48,7 +48,7 @@ end # Let's have some simple expression to pass to at_show2. This will be # attributed to a different file foo.jl -s2 = child(parse_all(SyntaxNode, SourceFile("foo +\n42", filename="foo.jl")), 1) +s2 = parseall(SyntaxNode, "foo +\n42", filename="foo.jl", rule=:statement) # Calling at_show2, we see that the precise source information is preserved for # both the surrounding expression and the interpolated fragments. From a98839fcd37be51d5f04fd544e215412451d7f5a Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Thu, 3 Feb 2022 09:02:32 +1000 Subject: [PATCH 0366/1109] README tweaks, add some links --- JuliaSyntax/README.md | 73 ++++++++++++++++++++++++++++--------------- 1 file changed, 47 insertions(+), 26 deletions(-) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index b8c22589a0d70..bf10fec01b9d4 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -16,13 +16,13 @@ A Julia frontend, written in Julia. ### Design Opinions -* Parser implementation should be independent from tree data structures so +* Parser implementation should be independent from tree data structures. So we have the `ParseStream` interface. * Tree data structures should be *layered* to balance losslessness with abstraction and generality. So we have `SyntaxNode` (an AST) layered on top of `GreenNode` (a lossless parse tree). We might need other tree types later. -* Fancy parser generators are marginal for production compilers. We use a - boring but flexible recursive descent parser. +* Fancy parser generators still seem marginal for production compilers. We use + a boring but flexible recursive descent parser. # Examples @@ -118,8 +118,9 @@ We use a version of [Tokenize.jl](https://github.com/JuliaLang/Tokenize.jl) which has been modified to better match the needs of parsing: * Newline-containing whitespace is emitted as a separate kind * Tokens inside string interpolations are emitted separately from the string -* Strings delimiters are separate tokens and the `String` kind -* Additional contextural keywords (`as`, `var`, `doc`) have been added and +* Strings delimiters are separate tokens and the actual string always has the + `String` kind +* Additional contextual keywords (`as`, `var`, `doc`) have been added and moved to a subcategory of keywords. * Nonterminal kinds were added (though these should probably be factored out again) * Various bugs fixed and additions for newer Julia versions @@ -143,9 +144,10 @@ Parsing proceeds by recursive descent; examine tokens and `bump()` to consume them. * The parser produces a flat list of text spans as *output* using `bump()` to transfer tokens to the output and `position()`/`emit()` for nonterminal ranges. -* Diagnostics are emitted as separate text span -* Whitespace and comments are automatically `bump()`ed, with the exception of - syntactically relevant newlines in space sensitive mode. +* Diagnostics are emitted as separate text spans +* Whitespace and comments are automatically `bump()`ed and don't need to be + handled explicitly. The exception is syntactically relevant newlines in space + sensitive mode. * Parser modes are passed down the call tree using `ParseState`. The output spans track the byte range, a syntax "kind" stored as an integer @@ -172,7 +174,7 @@ define `build_tree` for the AST type `SyntaxNode` and for normal Julia `Expr`. ### Error recovery -The goal of the parser is to produce well-formed heirarchical structure from +The goal of the parser is to produce well-formed hierarchical structure from the source text. For interactive tools we need this to work even when the source text contains errors; it's the job of the parser to include the recovery heuristics to make this work. @@ -278,7 +280,7 @@ name of compatibility, perhaps with a warning.) broken-looking AST like `(macrocall (. A (quote (. B @x))))`. It should probably be rejected. * Operator prefix call syntax doesn't work in the cases like `+(a;b,c)` where - parameters are separated by commas. A tuple is produced instead. + keyword parameters are separated by commas. A tuple is produced instead. * `const` and `global` allow chained assignment, but the right hand side is not constant. `a` const here but not `b`. ``` @@ -292,7 +294,7 @@ name of compatibility, perhaps with a warning.) * In try-catch-finally, the `finally` clause is allowed before the `catch`, but always executes afterward. (Presumably was this a mistake? It seems pretty awful!) * When parsing `"[x \n\n ]"` the flisp parser gets confused, but `"[x \n ]"` is - correctly parsed as `Expr(:vect)` + correctly parsed as `Expr(:vect)` (maybe fixed in 1.7?) * `f(x for x in in xs)` is accepted, and parsed very strangely. * Octal escape sequences saturate rather than being reported as errors. Eg, `"\777"` results in `"\xff"`. This is inconsistent with @@ -388,13 +390,13 @@ seems to be to flatten the generators: ### Other oddities -* Operators with sufficies don't seem to always be parsed consistently as the +* Operators with suffices don't seem to always be parsed consistently as the same operator without a suffix. Unclear whether this is by design or mistake. For example, `[x +y] ==> (hcat x (+ y))`, but `[x +₁y] ==> (hcat (call +₁ x y))` * `global const x=1` is normalized by the parser into `(const (global (= x 1)))`. - I suppose this is somewhat useful for AST consumers, but it seems a bit weird - and unnecessary. + I suppose this is somewhat useful for AST consumers, but reversing the source + order is pretty weird and inconvenient when moving to a lossless parser. * `let` bindings might be stored in a block, or they might not be, depending on special cases: @@ -413,21 +415,39 @@ seems to be to flatten the generators: Presumably because of the need to add a line number node in the flisp parser `if a xx elseif b yy end ==> (if a (block xx) (elseif (block b) (block yy)))` -* Spaces are alloweed between import dots — `import . .A` is allowed, and +* Spaces are allowed between import dots — `import . .A` is allowed, and parsed the same as `import ..A` * `import A..` produces `(import (. A .))` which is arguably nonsensical, as `.` can't be a normal identifier. -* When lexing raw strings, more than two backslashes are treated strangely at - the end of the string: `raw"\\\\ "` contains four backslashes, whereas - `raw"\\\\"` contains only two. +* The raw string escaping rules are *super* confusing for backslashes near vs + at the end of the string: `raw"\\\\ "` contains four backslashes, whereas + `raw"\\\\"` contains only two. It's unclear whether anything can be done + about this, however. * In braces after macrocall, `@S{a b}` is invalid but both `@S{a,b}` and `@S {a b}` parse. Conversely, `@S[a b]` parses. # Resources +## Julia issues + +Here's a few links to relevant Julia issues. No doubt there's many more. + +#### Macro expansion + +* Automatic hygiene for macros https://github.com/JuliaLang/julia/pull/6910 — + would be interesting to implement this in a new frontend. + +#### Lowering + +* A partial implementation of lowering in Julia https://github.com/JuliaLang/julia/pull/32201 — + some of this should be ported. +* The closure capture problem https://github.com/JuliaLang/julia/issues/15276 — + would be interesting to see whether we can tackle some of the harder cases in + a new implementation. + ## C# Roslyn [Persistence, façades and Roslyn’s red-green trees](https://ericlippert.com/2012/06/08/red-green-trees/) @@ -437,7 +457,7 @@ seems to be to flatten the generators: ## Rust-analyzer -`rust-analyzer` seems to be very close to what I'm buildin here, and has come +`rust-analyzer` seems to be very close to what I'm building here, and has come to the same conclusions on green tree layout with explicit trivia nodes. Their document on internals [here](https://github.com/rust-analyzer/rust-analyzer/blob/master/docs/dev/syntax.md) @@ -591,7 +611,7 @@ The simplest idea possible is to have: * Children are in source order -Call represents a challange for the AST vs Green tree in terms of node +Call represents a challenge for the AST vs Green tree in terms of node placement / iteration for infix operators vs normal prefix function calls. - The normal problem of `a + 1` vs `+(a, 1)` @@ -602,7 +622,7 @@ example with something like the normal Julia AST's iteration order. ### Abstract syntax tree -By pointing to green tree nodes, AST nodes become tracable back to the original +By pointing to green tree nodes, AST nodes become traceable back to the original source. Unlike most languages, designing a new AST is tricky because the existing @@ -632,7 +652,7 @@ SourceString <: AbstractString ``` Having source location attached to symbols would potentially solve most of the -hygine problem. There's still the problem of macro helper functions which use +hygiene problem. There's still the problem of macro helper functions which use symbol literals; we can't very well be changing the meaning of `:x`! Perhaps the trick there is to try capturing the current module at the location of the interpolation syntax. Eg, if you do `:(y + $x)`, lowering expands this to @@ -695,7 +715,7 @@ function g() end ``` -It seems like ideal error recorvery would need to backtrack in this case. For +It seems like ideal error recovery would need to backtrack in this case. For example: - Pop back to the frame which was parsing `f()` @@ -741,10 +761,11 @@ f(a, # Fun research questions * Given source and syntax tree, can we regress/learn a generative model of - indentiation from the syntax tree? Source formatting involves a big pile of + indentation from the syntax tree? Source formatting involves a big pile of heuristics to get something which "looks nice"... and ML systems have become - very good at heuristics. Also, we've got huge piles of traininig data — just + very good at heuristics. Also, we've got huge piles of training data — just choose some high quality, tastefully hand-formatted libraries. * Similarly, can we learn fast and reasonably accurate recovery heuristics for - when the parser encounters broken syntax rather than hand-coding these? + when the parser encounters broken syntax rather than hand-coding these? How + do we set the parser up so that training works and inference is nonintrusive? From 646139b65d4f2acca3f8c65a9a649988ea7ce837 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Thu, 3 Feb 2022 15:23:06 +1000 Subject: [PATCH 0367/1109] README: Add comparisons to other packages --- JuliaSyntax/README.md | 103 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 88 insertions(+), 15 deletions(-) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index bf10fec01b9d4..302a498495434 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -13,6 +13,7 @@ A Julia frontend, written in Julia. * "Compilation as an API" to support all sorts of tooling * Grow to encompass the rest of the compiler frontend: macro expansion, desugaring and other lowering steps. +* Once mature, replace Julia's flisp-based reference frontend in `Core` ### Design Opinions @@ -24,6 +25,13 @@ A Julia frontend, written in Julia. * Fancy parser generators still seem marginal for production compilers. We use a boring but flexible recursive descent parser. +### Status + +The library is in pre-0.1 stage, but parses all of Base correctly with only a +handful of failures remaining in the Base tests and standard library. +The tree data structures should be somewhat usable but will evolve as we try +out various use cases. + # Examples Here's what parsing of a small piece of code currently looks like in various @@ -325,9 +333,9 @@ DSLs this is fine and good but some such allowed syntaxes don't seem very useful, even for DSLs: * `macro (x) end` is allowed but there are no anonymous macros. -* `abstract type A < B end` and other subtypes comparisons are allowed, but +* `abstract type A < B end` and other subtype comparisons are allowed, but only `A <: B` makes sense. -* `x where {S T}` produces `(where x (bracescat (row S T)))` +* `x where {S T}` produces `(where x (bracescat (row S T)))`. This seems pretty weird! ### `kw` and `=` inconsistencies @@ -421,19 +429,80 @@ seems to be to flatten the generators: * `import A..` produces `(import (. A .))` which is arguably nonsensical, as `.` can't be a normal identifier. -* The raw string escaping rules are *super* confusing for backslashes near vs - at the end of the string: `raw"\\\\ "` contains four backslashes, whereas - `raw"\\\\"` contains only two. It's unclear whether anything can be done - about this, however. +* The raw string escaping rules are *super* confusing for backslashes near + the end of the string: `raw"\\\\ "` contains four backslashes, whereas + `raw"\\\\"` contains only two. However this was an intentional feature to + allow all strings to be represented and it's unclear whether the situation + can be improved. * In braces after macrocall, `@S{a b}` is invalid but both `@S{a,b}` and `@S {a b}` parse. Conversely, `@S[a b]` parses. +# Comparisons to other packages + +### JuliaParser.jl + +[JuliaParser.jl](https://github.com/JuliaLang/JuliaParser.jl) +was a direct port of Julia's flisp reference parser but was abandoned around +Julia 0.5 or so. However it doesn't support lossless parsing and doing so would +amount to a full rewrite. Given the divergence with the flisp reference parser +since Julia-0.5, it seemed better just to start with the reference parser +instead. + +### Tokenize.jl + +[Tokenize.jl](https://github.com/JuliaLang/Tokenize.jl) +is a fast lexer for Julia code. The code from Tokenize has been +imported and used in JuliaSyntax, with some major modifications as discussed in +the lexer implementation section. + +### CSTParser.jl + +[CSTParser.jl](https://github.com/julia-vscode/CSTParser.jl) +is a ([mostly?](https://github.com/domluna/JuliaFormatter.jl/issues/52#issuecomment-529945126)) +lossless parser with goals quite similar to JuliaParser and used extensively in +the VSCode / LanguageServer / JuliaFormatter ecosystem. CSTParser is very useful +but I do find the implementation hard to understand and I wanted to try a fresh +approach with a focus on: + +* "Production readyness": Good docs, tests, diagnostics and maximum similarity + with the flisp parser, with the goal of getting the new parser into `Core`. +* Learning from the latest ideas about composable parsing and data structures + from outside Julia. In particular the implementation of `rust-analyzer` is + very clean, well documented, and a great source of inspiration. +* Composability of tree data structures — I feel like the trees should be + layered somehow with a really lightweight green tree at the most basic level, + similar to Roslyn or rust-analyzer. In comparison CSTParser uses a more heavy + weight non-layered data structure. Alternatively or additionally, have a + common tree API with many concrete task-specific implementations. + +A big benefit of the JuliaSyntax parser is that it separates the parser code +from the tree data structures entirely which should give a lot of flexibility +in experimenting with various tree representations. + +I also want JuliaSyntax to tackle macro expansion and other lowering steps, and +provide APIs for this which can be used by both the core language and the +editor tooling. + +### tree-sitter-julia + +Using a modern production-ready parser generator like `tree-sitter` is an +interesting option and some progress has already been made in +[tree-sitter-julia](https://github.com/tree-sitter/tree-sitter-julia). +But I feel like the grammars for parser generators are only marginally more +expressive than writing the parser by hand after accounting for the effort +spent on the weird edge cases of a real language and writing the parser's tests +and "supporting code". + +On the other hand a hand-written parser completely flexible and can be mutually +understood with the reference implementation so I chose that approach for +JuliaSyntax. + # Resources ## Julia issues -Here's a few links to relevant Julia issues. No doubt there's many more. +Here's a few links to relevant Julia issues. #### Macro expansion @@ -760,12 +829,16 @@ f(a, # Fun research questions -* Given source and syntax tree, can we regress/learn a generative model of - indentation from the syntax tree? Source formatting involves a big pile of - heuristics to get something which "looks nice"... and ML systems have become - very good at heuristics. Also, we've got huge piles of training data — just - choose some high quality, tastefully hand-formatted libraries. +### Formatting + +Given source and syntax tree, can we regress/learn a generative model of +indentation from the syntax tree? Source formatting involves a big pile of +heuristics to get something which "looks nice"... and ML systems have become +very good at heuristics. Also, we've got huge piles of training data — just +choose some high quality, tastefully hand-formatted libraries. + +### Parser Recovery -* Similarly, can we learn fast and reasonably accurate recovery heuristics for - when the parser encounters broken syntax rather than hand-coding these? How - do we set the parser up so that training works and inference is nonintrusive? +Similarly, can we learn fast and reasonably accurate recovery heuristics for +when the parser encounters broken syntax rather than hand-coding these? How +do we set the parser up so that training works and inference is nonintrusive? From d915768bbd7274cc6624c510007aabca02a32bbd Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Fri, 4 Feb 2022 10:43:52 +1000 Subject: [PATCH 0368/1109] Add comparison to the Julia reference frontend --- JuliaSyntax/README.md | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index 302a498495434..e08135dbd99b9 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -440,6 +440,31 @@ seems to be to flatten the generators: # Comparisons to other packages +### Official Julia compiler + +The official Julia compiler frontend lives in the Julia source tree. It's +mostly contained in just a few files: +* The parser in [src/julia-parser.scm](https://github.com/JuliaLang/julia/blob/9c4b75d7f63d01d12b67aaf7ce8bb4a078825b52/src/julia-parser.scm) +* Macro expansion in [src/ast.c](https://github.com/JuliaLang/julia/blob/9c4b75d7f63d01d12b67aaf7ce8bb4a078825b52/src/ast.c) and [src/macroexpand.scm](https://github.com/JuliaLang/julia/blob/9c4b75d7f63d01d12b67aaf7ce8bb4a078825b52/src/macroexpand.scm) +* Syntax lowering in [src/julia-syntax.scm](https://github.com/JuliaLang/julia/blob/9c4b75d7f63d01d12b67aaf7ce8bb4a078825b52/src/julia-syntax.scm) +* The flisp runtime and C extensions for Julia in [src/flisp](https://github.com/JuliaLang/julia/tree/master/src/flisp) +* Supporting utility functions in a few other `.scm` and `.c` files. + +There's two issues with the official reference frontend which suggest a rewrite. + +First, there's no support for precise source locations and the existing data +structures (bare flisp lists) can't easily be extended to add these. Fixing +this would require changes to nearly all of the code. + +Second, it's written in flisp: an aestheically pleasing, minimal but obscure +implementation of Scheme. Learning Scheme is actually a good way to appreciate +some of Julia's design inspiration, but it's quite a barrier for developers of +Julia language tooling. (Flisp has no user-level documentation but non-schemers +can refer to the [Racket documentation](https://docs.racket-lang.org) which is +quite compatible for basic things.) In addition to the social factors, having +the embedded flisp interpreter and runtime with its own separate data +structures and FFI is complex and inefficient. + ### JuliaParser.jl [JuliaParser.jl](https://github.com/JuliaLang/JuliaParser.jl) From a8c84f40780dec2fee13ffea45d22848cc1144f5 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Fri, 4 Feb 2022 18:32:21 +1000 Subject: [PATCH 0369/1109] Downloader for packages in the General registry. This is a hacky script to download the latest version of all packages registered in the General registry, for testing the parser. --- JuliaSyntax/tools/registry_download.jl | 46 ++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 JuliaSyntax/tools/registry_download.jl diff --git a/JuliaSyntax/tools/registry_download.jl b/JuliaSyntax/tools/registry_download.jl new file mode 100644 index 0000000000000..e866a6ee72a94 --- /dev/null +++ b/JuliaSyntax/tools/registry_download.jl @@ -0,0 +1,46 @@ +# Hacky script to download the latest version of all packages registered in the +# General registry for testing the parser. +# +# This uses internal Pkg APIs and seems to work on Julia 1.7 + +using Pkg +using Downloads + +registry = only(filter(r->r.name == "General", Pkg.Registry.reachable_registries())) + +packages = [] + +for (uuid,pkg) in registry + versions = collect(Pkg.Registry.registry_info(pkg).version_info) + latest_ver, ver_info = last(sort(versions, by=first)) + if ver_info.yanked + continue + end + + push!(packages, (; uuid, pkg.name, version=latest_ver, ver_info.git_tree_sha1)) + +end + +server = Pkg.pkg_server() +output_dir = "pkgs" +mkpath(output_dir) + +asyncmap(packages, ntasks=5) do pkg + url = "$server/package/$(pkg.uuid)/$(pkg.git_tree_sha1)" + outfile_path = joinpath(output_dir, "$(pkg.name)_$(pkg.version).tgz") + if isfile(outfile_path) + @info "Skipping package" pkg + return outfile_path + else + @info "Download package" url outfile_path + for i=1:5 + try + Downloads.download(url, outfile_path) + break + catch + @error "Error downloading" pkg exception=current_exceptions() + end + sleep(i) + end + end +end From 7536966dad2f80e51d1c40dcdb37bf30a56a55a7 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Sat, 5 Feb 2022 06:38:35 +1000 Subject: [PATCH 0370/1109] Tokenize: Add doc contextural keyword --- JuliaSyntax/Tokenize/src/lexer.jl | 16 ++++---- JuliaSyntax/Tokenize/src/token_kinds.jl | 2 +- JuliaSyntax/Tokenize/test/lexer.jl | 49 ++++++++++++++----------- 3 files changed, 38 insertions(+), 29 deletions(-) diff --git a/JuliaSyntax/Tokenize/src/lexer.jl b/JuliaSyntax/Tokenize/src/lexer.jl index 326b0c1df7219..1e215512d24a9 100644 --- a/JuliaSyntax/Tokenize/src/lexer.jl +++ b/JuliaSyntax/Tokenize/src/lexer.jl @@ -1142,8 +1142,6 @@ function simple_hash(str) end kws = [ -Tokens.ABSTRACT, -Tokens.AS, Tokens.BAREMODULE, Tokens.BEGIN, Tokens.BREAK, @@ -1165,22 +1163,26 @@ Tokens.LET, Tokens.LOCAL, Tokens.MACRO, Tokens.MODULE, -Tokens.MUTABLE, -Tokens.OUTER, -Tokens.PRIMITIVE, Tokens.QUOTE, Tokens.RETURN, Tokens.STRUCT, Tokens.TRY, -Tokens.TYPE, Tokens.USING, -Tokens.VAR, Tokens.WHILE, Tokens.IN, Tokens.ISA, Tokens.WHERE, Tokens.TRUE, Tokens.FALSE, + +Tokens.ABSTRACT, +Tokens.AS, +Tokens.DOC, +Tokens.MUTABLE, +Tokens.OUTER, +Tokens.PRIMITIVE, +Tokens.TYPE, +Tokens.VAR, ] const kw_hash = Dict(simple_hash(lowercase(string(kw))) => kw for kw in kws) diff --git a/JuliaSyntax/Tokenize/src/token_kinds.jl b/JuliaSyntax/Tokenize/src/token_kinds.jl index 615ce0b0ba862..255802dff31f2 100644 --- a/JuliaSyntax/Tokenize/src/token_kinds.jl +++ b/JuliaSyntax/Tokenize/src/token_kinds.jl @@ -40,8 +40,8 @@ begin_contextural_keywords, ABSTRACT, AS, + DOC, MUTABLE, - NEW, OUTER, PRIMITIVE, TYPE, diff --git a/JuliaSyntax/Tokenize/test/lexer.jl b/JuliaSyntax/Tokenize/test/lexer.jl index 4377834ec4870..11d4604b4dc52 100644 --- a/JuliaSyntax/Tokenize/test/lexer.jl +++ b/JuliaSyntax/Tokenize/test/lexer.jl @@ -234,10 +234,7 @@ end end @testset "keywords" begin - for kw in ["function", - "abstract", - "as", - "baremodule", + for kw in ["baremodule", "begin", "break", "catch", @@ -248,27 +245,31 @@ end "elseif", "end", "export", - #"false", "finally", "for", "function", "global", - "let", - "local", "if", "import", + "let", + "local", "macro", "module", - "mutable", - "primitive", "quote", "return", "struct", - #"true", "try", - "type", "using", - "while"] + "while", + + "abstract", + "as", + "doc", + "mutable", + "outer", + "primitive", + "type", + "var"] @test T.kind(tok(kw)) == T.KEYWORD end @@ -752,8 +753,8 @@ end @test all(s->Base.isoperator(Symbol(s)) == Tokens.isoperator(first(collect(tokenize(s))).kind), allops) end -const all_kws = Set(["abstract", - "as", +const all_kws = Set([ + # Keywords "baremodule", "begin", "break", @@ -775,22 +776,28 @@ const all_kws = Set(["abstract", "local", "macro", "module", - "mutable", - "outer", - "primitive", "quote", "return", "struct", "try", - "type", "using", "while", - "in", - "isa", + # Contextural keywords + "abstract", + "as", + "doc", + "mutable", + "outer", + "primitive", + "type", "var", - "where", + # Literals "true", "false", + # Word-like operators + "in", + "isa", + "where", ]) function check_kw_hashes(iter) From 4288bcdb45084b8e6e2c4b590e94fbf3971a5a8b Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Sat, 5 Feb 2022 06:45:29 +1000 Subject: [PATCH 0371/1109] Use doc contextual keyword to detect @doc macro in parser --- JuliaSyntax/src/parse_stream.jl | 22 ---------------------- JuliaSyntax/src/parser.jl | 6 +----- JuliaSyntax/src/token_kinds.jl | 27 ++++++++++++++++----------- 3 files changed, 17 insertions(+), 38 deletions(-) diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index b8e15c31adf83..bb95be93d024a 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -318,28 +318,6 @@ function peek_token(stream::ParseStream, n::Integer=1; skip_newlines=false) stream.lookahead[_lookahead_index(stream, n, skip_newlines)] end -function _peek_equal_to(stream, first_byte, len, str) - cbuf = codeunits(str) - for i = 1:len - if stream.textbuf[first_byte + i - 1] != cbuf[i] - return false - end - end - return true -end - -""" -Return true if the node already emitted at `pos` covers the string `str` - -This is a hack for edge cases where the parser needs access to interpret normal -identifiers as contextural keywords. For example, the special parsing rules for -`@doc` line contination :-( -""" -function peek_behind_str(stream::ParseStream, pos::ParseStreamPosition, str::String) - s = stream.ranges[pos.output_index] - return _peek_equal_to(stream, first_byte(s), span(s), str) -end - function _peek_behind_fields(ranges, i) r = ranges[i] return (kind=kind(r), diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index c304f054811f0..9252a9e4967ba 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -71,10 +71,6 @@ function peek_token(ps::ParseState, n=1; skip_newlines=nothing) peek_token(ps.stream, n, skip_newlines=skip_nl) end -function peek_behind_str(ps::ParseState, args...) - peek_behind_str(ps.stream, args...) -end - function peek_behind(ps::ParseState, args...) peek_behind(ps.stream, args...) end @@ -1395,7 +1391,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) # @A.foo a b ==> (macrocall (. A (quote @foo)) a b) n_args = parse_space_separated_exprs(ps) # TODO: Introduce K"doc" to make this hack less awful. - is_doc_macro = peek_behind_str(ps, macro_name_position, "doc") + is_doc_macro = peek_behind(ps, macro_name_position).orig_kind == K"doc" if is_doc_macro && n_args == 1 # Parse extended @doc args on next line # @doc x\ny ==> (macrocall @doc x y) diff --git a/JuliaSyntax/src/token_kinds.jl b/JuliaSyntax/src/token_kinds.jl index f043888e9cb62..bf5b3fc84fc45 100644 --- a/JuliaSyntax/src/token_kinds.jl +++ b/JuliaSyntax/src/token_kinds.jl @@ -1,4 +1,7 @@ # Mapping from token string identifiers to enumeration values as used in @K_str +# +# TODO: Unify Tokenize with this approach so we don't need to write these out +# in two places. const _str_to_kind = let Ts = TzTokens Dict([ @@ -12,8 +15,6 @@ Dict([ ";" => Ts.SEMICOLON "BEGIN_KEYWORDS" => Ts.begin_keywords -"abstract" => Ts.ABSTRACT -"as" => Ts.AS "baremodule" => Ts.BAREMODULE "begin" => Ts.BEGIN "break" => Ts.BREAK @@ -35,18 +36,21 @@ Dict([ "local" => Ts.LOCAL "macro" => Ts.MACRO "module" => Ts.MODULE -"mutable" => Ts.MUTABLE -"new" => Ts.NEW -"outer" => Ts.OUTER -"primitive" => Ts.PRIMITIVE "quote" => Ts.QUOTE "return" => Ts.RETURN "struct" => Ts.STRUCT "try" => Ts.TRY -"type" => Ts.TYPE "using" => Ts.USING -"var" => Ts.VAR "while" => Ts.WHILE +# contextural keywords +"abstract" => Ts.ABSTRACT +"as" => Ts.AS +"doc" => Ts.DOC +"mutable" => Ts.MUTABLE +"outer" => Ts.OUTER +"primitive" => Ts.PRIMITIVE +"type" => Ts.TYPE +"var" => Ts.VAR "END_KEYWORDS" => Ts.end_keywords # FIXME: Define precisely what Nothing means; integrate better with other tokens. @@ -875,11 +879,12 @@ const _kind_to_str_unique = for kw in split(""" ( [ { } ] ) @ , ; " \"\"\" ` ``` - as abstract baremodule begin break catch const + baremodule begin break catch const continue do else elseif end export finally for function global if import let local - macro module mutable new outer primitive quote - return struct try type using var while + macro module quote return struct try type using while + + as abstract doc mutable outer primitive type var block call comparison curly string inert macrocall kw parameters toplevel tuple ref vect braces bracescat hcat From 0f82c6c78a3f54f0af491714236716f81c16c9df Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Sat, 5 Feb 2022 07:10:45 +1000 Subject: [PATCH 0372/1109] Fix spelling of contextual :-/ --- JuliaSyntax/Tokenize/src/lexer.jl | 4 ++-- JuliaSyntax/Tokenize/src/token.jl | 2 +- JuliaSyntax/Tokenize/src/token_kinds.jl | 4 ++-- JuliaSyntax/Tokenize/test/lexer.jl | 4 ++-- JuliaSyntax/src/parser.jl | 6 +++--- JuliaSyntax/src/token_kinds.jl | 2 +- JuliaSyntax/src/tokens.jl | 2 +- 7 files changed, 12 insertions(+), 12 deletions(-) diff --git a/JuliaSyntax/Tokenize/src/lexer.jl b/JuliaSyntax/Tokenize/src/lexer.jl index 1e215512d24a9..cb349553eb258 100644 --- a/JuliaSyntax/Tokenize/src/lexer.jl +++ b/JuliaSyntax/Tokenize/src/lexer.jl @@ -849,7 +849,7 @@ end function lex_prime(l, doemit = true) if l.last_token == Tokens.IDENTIFIER || - Tokens.iscontexturalkeyword(l.last_token) || + Tokens.iscontextualkeyword(l.last_token) || Tokens.iswordoperator(l.last_token) || l.last_token == Tokens.DOT || l.last_token == Tokens.RPAREN || @@ -900,7 +900,7 @@ end # A '"' has been consumed function lex_quote(l::Lexer) raw = l.last_token == Tokens.IDENTIFIER || - Tokens.iscontexturalkeyword(l.last_token) || + Tokens.iscontextualkeyword(l.last_token) || Tokens.iswordoperator(l.last_token) pc, dpc = dpeekchar(l) triplestr = pc == '"' && dpc == '"' diff --git a/JuliaSyntax/Tokenize/src/token.jl b/JuliaSyntax/Tokenize/src/token.jl index 956b0b106a412..cefd1f7b157ed 100644 --- a/JuliaSyntax/Tokenize/src/token.jl +++ b/JuliaSyntax/Tokenize/src/token.jl @@ -11,7 +11,7 @@ iskeyword(k::Kind) = begin_keywords < k < end_keywords isliteral(k::Kind) = begin_literal < k < end_literal isoperator(k::Kind) = begin_ops < k < end_ops -iscontexturalkeyword(k::Kind) = begin_contextural_keywords < k < end_contextural_keywords +iscontextualkeyword(k::Kind) = begin_contextual_keywords < k < end_contextual_keywords function iswordoperator(k::Kind) # Keyword-like operators diff --git a/JuliaSyntax/Tokenize/src/token_kinds.jl b/JuliaSyntax/Tokenize/src/token_kinds.jl index 255802dff31f2..10cd82dc1091d 100644 --- a/JuliaSyntax/Tokenize/src/token_kinds.jl +++ b/JuliaSyntax/Tokenize/src/token_kinds.jl @@ -37,7 +37,7 @@ TRY, USING, WHILE, - begin_contextural_keywords, + begin_contextual_keywords, ABSTRACT, AS, DOC, @@ -46,7 +46,7 @@ PRIMITIVE, TYPE, VAR, - end_contextural_keywords, + end_contextual_keywords, end_keywords, begin_cstparser, diff --git a/JuliaSyntax/Tokenize/test/lexer.jl b/JuliaSyntax/Tokenize/test/lexer.jl index 11d4604b4dc52..628c3c2974a36 100644 --- a/JuliaSyntax/Tokenize/test/lexer.jl +++ b/JuliaSyntax/Tokenize/test/lexer.jl @@ -333,7 +333,7 @@ end @test ts[4] ~ (T.DQUOTE , "\"" ) @test ts[5] ~ (T.ENDMARKER , "" ) - # Contextural keywords and operators allowed as raw string prefixes + # Contextual keywords and operators allowed as raw string prefixes ts = collect(tokenize(raw""" var"x $ \ y" """)) @test ts[2] ~ (T.VAR , "var") @test ts[4] ~ (T.STRING , "x \$ \\ y") @@ -782,7 +782,7 @@ const all_kws = Set([ "try", "using", "while", - # Contextural keywords + # Contextual keywords "abstract", "as", "doc", diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 9252a9e4967ba..923dd964c8731 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -237,7 +237,7 @@ end function is_reserved_word(k) k = kind(k) - is_keyword(k) && !is_contextural_keyword(k) + is_keyword(k) && !is_contextual_keyword(k) end # Return true if the next word (or word pair) is reserved, introducing a @@ -246,7 +246,7 @@ function peek_initial_reserved_words(ps::ParseState) k = peek(ps) if is_initial_reserved_word(ps, k) return true - elseif is_contextural_keyword(k) + elseif is_contextual_keyword(k) k2 = peek(ps,2) return (k == K"mutable" && k2 == K"struct") || (k == K"primitive" && k2 == K"type") || @@ -1636,7 +1636,7 @@ end # parse expressions or blocks introduced by syntactic reserved words. # # The caller should use peek_initial_reserved_words to determine whether -# to call parse_resword, or whether contextural keywords like `mutable` are +# to call parse_resword, or whether contextual keywords like `mutable` are # simple identifiers. # # flisp: parse-resword diff --git a/JuliaSyntax/src/token_kinds.jl b/JuliaSyntax/src/token_kinds.jl index bf5b3fc84fc45..2389aefe581f6 100644 --- a/JuliaSyntax/src/token_kinds.jl +++ b/JuliaSyntax/src/token_kinds.jl @@ -42,7 +42,7 @@ Dict([ "try" => Ts.TRY "using" => Ts.USING "while" => Ts.WHILE -# contextural keywords +# contextual keywords "abstract" => Ts.ABSTRACT "as" => Ts.AS "doc" => Ts.DOC diff --git a/JuliaSyntax/src/tokens.jl b/JuliaSyntax/src/tokens.jl index 2821058551d45..5b40a090dd45d 100644 --- a/JuliaSyntax/src/tokens.jl +++ b/JuliaSyntax/src/tokens.jl @@ -43,7 +43,7 @@ kind(raw::TzTokens.RawToken) = TzTokens.exactkind(raw) # Some renaming for naming consistency is_literal(k) = TzTokens.isliteral(kind(k)) is_keyword(k) = TzTokens.iskeyword(kind(k)) -is_contextural_keyword(k) = TzTokens.iscontexturalkeyword(kind(k)) +is_contextual_keyword(k) = TzTokens.iscontextualkeyword(kind(k)) is_operator(k) = TzTokens.isoperator(kind(k)) is_word_operator(k) = TzTokens.iswordoperator(kind(k)) From c91bdca22bb36f6aaf6d36e03fa45078995a7479 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Sat, 5 Feb 2022 18:38:30 +1000 Subject: [PATCH 0373/1109] Minor README tweaks --- JuliaSyntax/README.md | 51 ++++++++++++++++++++++++------------------- 1 file changed, 28 insertions(+), 23 deletions(-) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index e08135dbd99b9..f7acd335062b5 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -519,9 +519,9 @@ expressive than writing the parser by hand after accounting for the effort spent on the weird edge cases of a real language and writing the parser's tests and "supporting code". -On the other hand a hand-written parser completely flexible and can be mutually -understood with the reference implementation so I chose that approach for -JuliaSyntax. +On the other hand a hand-written parser is completely flexible and can be +mutually understood with the reference implementation so I chose that approach +for JuliaSyntax. # Resources @@ -673,6 +673,7 @@ work flows: * Syntax transformations - Choose some macros to implement. This is a basic test of mixing source trees from different files while preserving precise source locations. + (Done in .) * Formatting - Re-indent a file. This tests the handling of syntax trivia. * Refactoring @@ -704,7 +705,6 @@ The simplest idea possible is to have: * Leaf nodes are a single token * Children are in source order - Call represents a challenge for the AST vs Green tree in terms of node placement / iteration for infix operators vs normal prefix function calls. @@ -730,10 +730,10 @@ There seems to be a few ways forward: * We can use the existing `Expr` during macro expansion and try to recover source information after macro expansion using heuristics. Likely the presence of correct hygiene can help with this. -* Introducing a new AST would be possible if it were opt-in for new-style - macros only. Fixing hygiene should go along with this. Design challenge: How - do we make manipulating expressions reasonable when literals need to carry - source location? +* Introducing a new AST would be possible if it were opt-in for some + hypothetical "new-style macros" only. Fixing hygiene should go along with + this. Design challenge: How do we make manipulating expressions reasonable + when literals need to carry source location? One option which may help bridge between locationless ASTs and something new may be to have wrappers for the small number of literal types we need to cover. @@ -762,11 +762,11 @@ Some disorganized musings about error recovery Different types of errors seem to occur... * Disallowed syntax (such as lack of spaces in conditional expressions) - where we can reasonably just continue parsing the production and emit the - node with an error flag which is otherwise fully formed. In some cases like - parsing infix expressions with a missing tail, emitting a zero width error - token can lead to a fully formed parse tree without the productions up the - stack needing to participate in recovery. + where we can reasonably just continue parsing and emit the node with an error + flag which is otherwise fully formed. In some cases like parsing infix + expressions with a missing tail, emitting a zero width error token can lead + to a fully formed parse tree without the productions up the stack needing to + participate in recovery. * A token which is disallowed in current context. Eg, `=` in parse_atom, or a closing token inside an infix expression. Here we can emit a `K"error"`, but we can't descend further into the parse tree; we must pop several recursive @@ -820,9 +820,6 @@ example: - Restart parsing - Somehow make sure all of this can't result in infinite recursion 😅 -For this kind of recovery it sure would be good if we could reify the program -stack into a parser state object... - Missing commas or closing brackets in nested structures also present the existing parser with a problem. @@ -848,12 +845,25 @@ But not always! ```julia f(a, g(b, - c # -- missing closing `)` ? - d) + c # -- missing closing `,` ? + d)) ``` +Another particularly difficult problem for diagnostics in the current system is +broken parentheses or double quotes in string interpolations, especially when +nested. + # Fun research questions +### Parser Recovery + +Can we learn fast and reasonably accurate recovery heuristics for when the +parser encounters broken syntax, rather than hand-coding these? How would we +set the parser up so that training works and injecting the model is +nonintrusive? If the model is embedded in and works together with the parser, +can it be made compact enough that training is fast and the model itself is +tiny? + ### Formatting Given source and syntax tree, can we regress/learn a generative model of @@ -862,8 +872,3 @@ heuristics to get something which "looks nice"... and ML systems have become very good at heuristics. Also, we've got huge piles of training data — just choose some high quality, tastefully hand-formatted libraries. -### Parser Recovery - -Similarly, can we learn fast and reasonably accurate recovery heuristics for -when the parser encounters broken syntax rather than hand-coding these? How -do we set the parser up so that training works and inference is nonintrusive? From 118550997a11d570bc0d40f11b63f8c4cf6a342c Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Sat, 5 Feb 2022 18:39:38 +1000 Subject: [PATCH 0374/1109] Print file and line in diagnostics --- JuliaSyntax/src/diagnostics.jl | 28 ++++++++++++++++++++++------ JuliaSyntax/src/parser_api.jl | 10 ++++++---- JuliaSyntax/src/syntax_tree.jl | 2 +- JuliaSyntax/src/utils.jl | 15 ++++++++++----- JuliaSyntax/test/syntax_tree.jl | 2 +- JuliaSyntax/test/test_utils.jl | 4 ++-- 6 files changed, 42 insertions(+), 19 deletions(-) diff --git a/JuliaSyntax/src/diagnostics.jl b/JuliaSyntax/src/diagnostics.jl index 2557cd41285ae..53f63c0a32e99 100644 --- a/JuliaSyntax/src/diagnostics.jl +++ b/JuliaSyntax/src/diagnostics.jl @@ -42,12 +42,26 @@ last_byte(d::Diagnostic) = d.last_byte is_error(d::Diagnostic) = d.level == :error function show_diagnostic(io::IO, diagnostic::Diagnostic, source::SourceFile) - col,prefix = diagnostic.level == :error ? (:light_red, "Error") : - diagnostic.level == :warning ? (:light_yellow, "Warning") : - diagnostic.level == :note ? (:light_blue, "Note") : - (:normal, "Info") - printstyled(io, "$prefix: ", color=col) - print(io, diagnostic.message, ":\n") + color,prefix = diagnostic.level == :error ? (:light_red, "Error") : + diagnostic.level == :warning ? (:light_yellow, "Warning") : + diagnostic.level == :note ? (:light_blue, "Note") : + (:normal, "Info") + line, col = source_location(source, first_byte(diagnostic)) + linecol = "$line:$col" + if !isnothing(source.filename) + locstr = "$(source.filename):$linecol" + if get(io, :color, false) + # Also add hyperlinks in color terminals + url = "file://$(abspath(source.filename))#$linecol" + locstr = "\e]8;;$url\e\\$locstr\e]8;;\e\\" + end + else + locstr = "line $locstr" + end + print(io, prefix, ": ") + printstyled(io, diagnostic.message, color=color) + printstyled(io, "\n", "@ $locstr", color=:light_black) + print(io, "\n") p = first_byte(diagnostic) q = last_byte(diagnostic) @@ -66,6 +80,8 @@ function show_diagnostic(io::IO, diagnostic::Diagnostic, source::SourceFile) hicol = (100,40,40) + # TODO: show line numbers on left + print(io, source[a:prevind(text, p)]) # There's two situations, either if b >= c diff --git a/JuliaSyntax/src/parser_api.jl b/JuliaSyntax/src/parser_api.jl index 8fa0b4482e884..c5377f8551ece 100644 --- a/JuliaSyntax/src/parser_api.jl +++ b/JuliaSyntax/src/parser_api.jl @@ -47,10 +47,12 @@ struct ParseError <: Exception end function Base.showerror(io::IO, err::ParseError, bt; backtrace=false) + println(io, "ParseError:") show_diagnostics(io, err.diagnostics, err.source) end function Base.showerror(io::IO, err::ParseError) + println(io, "ParseError:") show_diagnostics(io, err.diagnostics, err.source) end @@ -142,7 +144,7 @@ See [`parse`](@ref) for a more complete and powerful interface to the parser, as well as a description of the `version` and `rule` keywords. """ function parseall(::Type{T}, input...; rule=:toplevel, version=VERSION, - ignore_trivia=true, kws...) where {T} + ignore_trivia=true, filename=nothing) where {T} stream = ParseStream(input...; version=version) if ignore_trivia && rule != :toplevel bump_trivia(stream, skip_newlines=true) @@ -154,7 +156,7 @@ function parseall(::Type{T}, input...; rule=:toplevel, version=VERSION, emit_diagnostic(stream, error="unexpected text after parsing $rule") end if any_error(stream.diagnostics) - source = SourceFile(sourcetext(stream, steal_textbuf=true)) + source = SourceFile(sourcetext(stream, steal_textbuf=true), filename=filename) throw(ParseError(source, stream.diagnostics)) end # TODO: Figure out a more satisfying solution to the wrap_toplevel_as_kind @@ -162,12 +164,12 @@ function parseall(::Type{T}, input...; rule=:toplevel, version=VERSION, # * It's kind of required for GreenNode, as GreenNode only records spans, # not absolute positions. # * Dropping it would be ok for SyntaxNode and Expr... - tree = build_tree(T, stream; wrap_toplevel_as_kind=K"toplevel", kws...) + tree = build_tree(T, stream; wrap_toplevel_as_kind=K"toplevel", filename=filename) if !isempty(stream.diagnostics) # Crudely format any warnings to the current logger. buf = IOBuffer() show_diagnostics(IOContext(buf, stdout), stream, - SourceFile(sourcetext(stream, steal_textbuf=true))) + SourceFile(sourcetext(stream, steal_textbuf=true), filename=filename)) @warn Text(String(take!(buf))) end tree diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index f276bdbcbf873..9823006eae8ed 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -423,7 +423,7 @@ Base.Expr(node::SyntaxNode) = _to_expr(node) #------------------------------------------------------------------------------- -function build_tree(::Type{SyntaxNode}, stream::ParseStream; filename="none", kws...) +function build_tree(::Type{SyntaxNode}, stream::ParseStream; filename=nothing, kws...) green_tree = build_tree(GreenNode, stream; kws...) source = SourceFile(sourcetext(stream), filename=filename) SyntaxNode(source, green_tree, first_byte(stream)) diff --git a/JuliaSyntax/src/utils.jl b/JuliaSyntax/src/utils.jl index be7baa7f89d47..b7edce50b6ddd 100644 --- a/JuliaSyntax/src/utils.jl +++ b/JuliaSyntax/src/utils.jl @@ -26,11 +26,16 @@ function flisp_parse_all(code; filename="none") end # Really remove line numbers, even from Expr(:toplevel) -function remove_linenums!(ex) - ex = Base.remove_linenums!(ex) - if Meta.isexpr(ex, :toplevel) - filter!(x->!(x isa LineNumberNode), ex.args) +remove_linenums!(ex) = ex +function remove_linenums!(ex::Expr) + if ex.head === :block || ex.head === :quote || ex.head === :toplevel + filter!(ex.args) do x + !(isa(x, Expr) && x.head === :line || isa(x, LineNumberNode)) + end end - ex + for subex in ex.args + subex isa Expr && remove_linenums!(subex) + end + return ex end diff --git a/JuliaSyntax/test/syntax_tree.jl b/JuliaSyntax/test/syntax_tree.jl index 96823ea8bec74..4eb94558fc9f8 100644 --- a/JuliaSyntax/test/syntax_tree.jl +++ b/JuliaSyntax/test/syntax_tree.jl @@ -15,7 +15,7 @@ Expr(:(=), Expr(:call, :f), Expr(:block, - LineNumberNode(1, :none), + LineNumberNode(1), :xs)) # flisp parser quirk: In a for loop the block is not added, despite # this defining a short-form function. diff --git a/JuliaSyntax/test/test_utils.jl b/JuliaSyntax/test/test_utils.jl index 6c6ef83d5981e..0005c4bfc393c 100644 --- a/JuliaSyntax/test/test_utils.jl +++ b/JuliaSyntax/test/test_utils.jl @@ -40,8 +40,8 @@ end function parsers_agree_on_file(path) text = read(path, String) - ex = parseall(Expr, text) - fl_ex = flisp_parse_all(text) + ex = parseall(Expr, text, filename=path) + fl_ex = flisp_parse_all(text, filename=path) JuliaSyntax.remove_linenums!(ex) == JuliaSyntax.remove_linenums!(fl_ex) end From 6206438f05a3e533e5aaf5f5195f383582517108 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Mon, 7 Feb 2022 08:24:35 +1000 Subject: [PATCH 0375/1109] Fix diagnostic printing without filename --- JuliaSyntax/src/diagnostics.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaSyntax/src/diagnostics.jl b/JuliaSyntax/src/diagnostics.jl index 53f63c0a32e99..b6e5d1caefcdb 100644 --- a/JuliaSyntax/src/diagnostics.jl +++ b/JuliaSyntax/src/diagnostics.jl @@ -56,7 +56,7 @@ function show_diagnostic(io::IO, diagnostic::Diagnostic, source::SourceFile) locstr = "\e]8;;$url\e\\$locstr\e]8;;\e\\" end else - locstr = "line $locstr" + locstr = "line $linecol" end print(io, prefix, ": ") printstyled(io, diagnostic.message, color=color) From 070cb15aeee80acf94570689ed7fb2c24f653f82 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Mon, 7 Feb 2022 12:04:07 +1000 Subject: [PATCH 0376/1109] Improve printing of reduced test cases --- JuliaSyntax/test/test_utils.jl | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/JuliaSyntax/test/test_utils.jl b/JuliaSyntax/test/test_utils.jl index 0005c4bfc393c..65e53e9654632 100644 --- a/JuliaSyntax/test/test_utils.jl +++ b/JuliaSyntax/test/test_utils.jl @@ -128,19 +128,20 @@ Reduced the syntax (a string or SyntaxNode) from `file_content` into the minimal failing subtrees of syntax and write the results to `out`. """ function format_reduced_tests(out::IO, file_content; filename=nothing) + println(out, "#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") if !isnothing(filename) println(out, "# $filename") end text = nothing try rtrees = reduce_test(file_content) - first = true for rt in rtrees - if !first - print(out, "\n#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n") + print(out, "\n#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n") + t = sourcetext(rt) + print(out, t) + if !endswith(t, '\n') + println(out) end - first = false - print(out, sourcetext(rt)) end catch exc exc isa InterruptException && rethrow() From f8d5139190f4dc5e708c992d91c80f1dff1472dd Mon Sep 17 00:00:00 2001 From: Simeon Schaub Date: Mon, 7 Feb 2022 18:59:40 -0500 Subject: [PATCH 0377/1109] Add support for empty nd-array syntax (JuliaLang/JuliaSyntax.jl#4) --- JuliaSyntax/src/parser.jl | 15 +++++++++++---- JuliaSyntax/test/parser.jl | 3 +++ 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 923dd964c8731..8b09f18b6c471 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -2673,13 +2673,13 @@ end # power of 0 for whitespace and negative numbers for other separators. # # FIXME: Error messages for mixed spaces and ;; delimiters -function parse_array_separator(ps) - t = peek_token(ps) +function parse_array_separator(ps; skip_newlines=false) + t = peek_token(ps; skip_newlines=skip_newlines) k = kind(t) if k == K";" n_semis = 1 while true - bump(ps, TRIVIA_FLAG) + bump(ps, TRIVIA_FLAG; skip_newlines=skip_newlines) t = peek_token(ps) if kind(t) != K";" || t.had_whitespace break @@ -2722,11 +2722,18 @@ function parse_cat(ps::ParseState, closer, end_is_symbol) whitespace_newline=false, for_generator=true) k = peek(ps, skip_newlines=true) + mark = position(ps) if k == closer # [] ==> (vect) return parse_vect(ps, closer) + elseif k == K";" + # [;;] ==> (ncat 2) + # [;; \n ] ==> (ncat 2) + n_semis, _ = parse_array_separator(ps; skip_newlines=true) + bump_closing_token(ps, closer) + min_supported_version(v"1.8", ps, mark, "empty multidimensional array syntax") + return (K"ncat", set_numeric_flags(n_semis)) end - mark = position(ps) parse_eq_star(ps) k = peek(ps, skip_newlines=true) if k == K"," || k == closer diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 9e20207cd7436..d54ab791265fe 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -633,6 +633,9 @@ tests = [ # Column major ((v=v"1.7",), "[x ; y ;; z ; w ;;; a ; b ;; c ; d]") => "(ncat-3 (nrow-2 (nrow-1 x y) (nrow-1 z w)) (nrow-2 (nrow-1 a b) (nrow-1 c d)))" + # Empty nd arrays + ((v=v"1.8",), "[;;]") => "(ncat-2)" + ((v=v"1.8",), "[\n ;; \n ]") => "(ncat-2)" ], JuliaSyntax.parse_string => [ "\"a \$(x + y) b\"" => "(string \"a \" (call-i x + y) \" b\")" From ff87a838fa6923cabf165e14e73491b1b43ea827 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Tue, 8 Feb 2022 11:46:40 +1000 Subject: [PATCH 0378/1109] Fix inline tests for empty multidimensional arrays --- JuliaSyntax/src/parser.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 8b09f18b6c471..5141c77c2118f 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -2727,8 +2727,8 @@ function parse_cat(ps::ParseState, closer, end_is_symbol) # [] ==> (vect) return parse_vect(ps, closer) elseif k == K";" - # [;;] ==> (ncat 2) - # [;; \n ] ==> (ncat 2) + #v1.8: [;;] ==> (ncat-2) + #v1.8: [\n ;; \n ] ==> (ncat-2) n_semis, _ = parse_array_separator(ps; skip_newlines=true) bump_closing_token(ps, closer) min_supported_version(v"1.8", ps, mark, "empty multidimensional array syntax") From 146b538bde824fed54ea26e03b7e8e0c1d5fb21e Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Tue, 8 Feb 2022 11:53:37 +1000 Subject: [PATCH 0379/1109] Fix trivia diagnostic range in empty mulitdimensional array syntax + add tests --- JuliaSyntax/src/parser.jl | 4 +++- JuliaSyntax/test/parser.jl | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 5141c77c2118f..4cde99933755f 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -2727,11 +2727,13 @@ function parse_cat(ps::ParseState, closer, end_is_symbol) # [] ==> (vect) return parse_vect(ps, closer) elseif k == K";" + #v1.8: [;] ==> (ncat-1) #v1.8: [;;] ==> (ncat-2) #v1.8: [\n ;; \n ] ==> (ncat-2) + #v1.7: [;;] ==> (ncat-2 (error)) n_semis, _ = parse_array_separator(ps; skip_newlines=true) - bump_closing_token(ps, closer) min_supported_version(v"1.8", ps, mark, "empty multidimensional array syntax") + bump_closing_token(ps, closer) return (K"ncat", set_numeric_flags(n_semis)) end parse_eq_star(ps) diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index d54ab791265fe..629fd2f446e2e 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -634,8 +634,10 @@ tests = [ ((v=v"1.7",), "[x ; y ;; z ; w ;;; a ; b ;; c ; d]") => "(ncat-3 (nrow-2 (nrow-1 x y) (nrow-1 z w)) (nrow-2 (nrow-1 a b) (nrow-1 c d)))" # Empty nd arrays + ((v=v"1.8",), "[;]") => "(ncat-1)" ((v=v"1.8",), "[;;]") => "(ncat-2)" ((v=v"1.8",), "[\n ;; \n ]") => "(ncat-2)" + ((v=v"1.7",), "[;;]") => "(ncat-2 (error))" ], JuliaSyntax.parse_string => [ "\"a \$(x + y) b\"" => "(string \"a \" (call-i x + y) \" b\")" From f35624fc08e7d742f937efe3e0b36f648a1fe29b Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Tue, 8 Feb 2022 16:45:48 +1000 Subject: [PATCH 0380/1109] Fix to allow operator-named macros --- JuliaSyntax/README.md | 32 ++++++++++++++---- JuliaSyntax/src/parser.jl | 66 +++++++++++++++++++------------------- JuliaSyntax/test/parser.jl | 13 ++++---- 3 files changed, 66 insertions(+), 45 deletions(-) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index f7acd335062b5..be55e1d1a673b 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -365,7 +365,8 @@ parsing `key=val` pairs inside parentheses. Flattened generators are uniquely problematic because the Julia AST doesn't respect a key rule we normally expect: that the children of an AST node are a *contiguous* range in the source text. This is because the `for`s in -`[xy for x in xs for y in ys]` are parsed in the normal order of a for loop as +`[xy for x in xs for y in ys]` are parsed in the normal order of a for loop to +mean ``` for x in xs @@ -373,7 +374,8 @@ for y in ys push!(xy, collection) ``` -and the standard Julia AST is like this: +so the `xy` prefix is in the *body* of the innermost for loop. Following this, +the standard Julia AST is like so: ``` (flatten @@ -384,10 +386,13 @@ and the standard Julia AST is like this: (= x xs))) ``` -however, note that if this tree were flattened, the order of tokens would be -`(xy) (y in ys) (x in xs)` which is *not* the source order. So in this case -our green tree must deviate from the Julia AST. The natural representation -seems to be to flatten the generators: +however, note that if this tree were flattened, the order would be +`(xy) (y in ys) (x in xs)` and the `x` and `y` iterations are *opposite* of the +source order. + +However, our green tree is strictly source-ordered, so we must deviate from the +Julia AST. The natural representation seems to be to remove the generators and +use a flattened structure: ``` (flatten @@ -438,6 +443,21 @@ seems to be to flatten the generators: * In braces after macrocall, `@S{a b}` is invalid but both `@S{a,b}` and `@S {a b}` parse. Conversely, `@S[a b]` parses. +* Macro names and invocations are post-processed from the output of + `parse-atom` / `parse-call`, which leads to some surprising and questionable + constructs which "work": + - Absurdities like `@(((((a))))) x ==> (macrocall @a x)` + - Infix macros!? `@(x + y) ==> (macrocall @+ x y)` (ok, kinda cute and has + some weird logic to it... but what?) + - Similarly additional parentheses are allowed `@(f(x)) ==> (macrocall @f x)` + +* Allowing `@` first in macro module paths (eg `@A.B.x` instead of `A.B.@x`) + seems like unnecessary variation in syntax. It makes parsing valid macro + module paths more complex and leads to oddities like `@$.x y ==> (macrocall + ($ (quote x)) y` where the `$` is first parsed as a macro name, but turns out + to be the module name after the `.` is parsed. But `$` can never be a valid + module name in normal Julia code so this makes no sense. + # Comparisons to other packages ### Official Julia compiler diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 4cde99933755f..5496550205e3f 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -71,8 +71,8 @@ function peek_token(ps::ParseState, n=1; skip_newlines=nothing) peek_token(ps.stream, n, skip_newlines=skip_nl) end -function peek_behind(ps::ParseState, args...) - peek_behind(ps.stream, args...) +function peek_behind(ps::ParseState, args...; kws...) + peek_behind(ps.stream, args...; kws...) end function bump(ps::ParseState, flags=EMPTY_FLAGS; skip_newlines=nothing, kws...) @@ -1336,20 +1336,15 @@ function parse_identifier_or_interpolate(ps::ParseState) end end -# Emit an error if the call chain syntax is not a valid module reference -function emit_modref_error(ps, mark) - emit(ps, mark, K"error", error="not a valid module reference") -end - -function finish_macroname(ps, mark, is_valid_modref, macro_name_position, +function finish_macroname(ps, mark, valid_macroname, macro_name_position, name_kind=nothing) - if is_valid_modref + if valid_macroname if isnothing(name_kind) name_kind = macro_name_kind(peek_behind(ps, macro_name_position).kind) end reset_node!(ps, macro_name_position, kind = name_kind) else - emit(ps, mark, K"error", error="not a valid module reference") + emit(ps, mark, K"error", error="not a valid macro name or macro module path") end end @@ -1367,14 +1362,13 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) end # source range of the @-prefixed part of a macro macro_atname_range = nothing - kb = peek_behind(ps).kind # $A.@x ==> (macrocall (. ($ A) (quote @x))) - is_valid_modref = kb in KSet`Identifier . $` + valid_macroname = peek_behind(ps, skip_trivia=false).kind in KSet`Identifier . $` # We record the last component of chains of dot-separated identifiers so we # know which identifier was the macro name. macro_name_position = position(ps) # points to same output span as peek_behind while true - this_iter_valid_modref = false + this_iter_valid_macroname = false t = peek_token(ps) k = kind(t) if is_macrocall && (t.had_whitespace || is_closing_token(ps, k)) @@ -1384,7 +1378,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) # @foo (x,y) ==> (macrocall @foo (tuple x y)) # a().@x y ==> (macrocall (error (. (call a) (quote x))) y) # [@foo "x"] ==> (vect (macrocall @foo "x")) - finish_macroname(ps, mark, is_valid_modref, macro_name_position) + finish_macroname(ps, mark, valid_macroname, macro_name_position) with_space_sensitive(ps) do ps # Space separated macro arguments # A.@foo a b ==> (macrocall (. A (quote @foo)) a b) @@ -1420,7 +1414,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) elseif k == K"(" if is_macrocall # a().@x(y) ==> (macrocall (error (. (call a) (quote x))) y) - finish_macroname(ps, mark, is_valid_modref, macro_name_position) + finish_macroname(ps, mark, valid_macroname, macro_name_position) end # f(a,b) ==> (call f a b) # f (a) ==> (call f (error-t) a b) @@ -1443,7 +1437,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) elseif k == K"[" if is_macrocall # a().@x[1] ==> (macrocall (ref (error (. (call a) (quote x))) 1)) - finish_macroname(ps, mark, is_valid_modref, macro_name_position) + finish_macroname(ps, mark, valid_macroname, macro_name_position) end # a [i] ==> (ref a (error-t) i) bump_disallowed_space(ps) @@ -1479,7 +1473,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) bump(ps) emit(ps, emark, K"error", TRIVIA_FLAG, error="the .' operator for transpose is discontinued") - is_valid_modref = false + valid_macroname = false continue end if !isnothing(macro_atname_range) @@ -1529,7 +1523,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) emit(ps, m, K"inert") emit(ps, mark, K".") # A.$B.@x ==> (macrocall (. (. A (inert ($ B))) (quote @x))) - this_iter_valid_modref = true + this_iter_valid_macroname = true elseif k == K"@" # A macro call after some prefix A has been consumed # A.@x ==> (macrocall (. A (quote @x))) @@ -1547,7 +1541,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) macro_atname_range = (m, macro_name_position) emit(ps, m, K"quote") emit(ps, mark, K".") - this_iter_valid_modref = true + this_iter_valid_macroname = true else # Field/property syntax # f.x.y ==> (. (. f (quote x)) (quote y)) @@ -1556,7 +1550,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) macro_name_position = position(ps) emit(ps, m, K"quote") emit(ps, mark, K".") - this_iter_valid_modref = true + this_iter_valid_macroname = true end elseif k == K"'" if !is_suffixed(t) @@ -1572,7 +1566,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) # Type parameter curlies and macro calls if is_macrocall # a().@x{y} ==> (macrocall (error (. (call a) (quote x))) (braces y)) - finish_macroname(ps, mark, is_valid_modref, macro_name_position) + finish_macroname(ps, mark, valid_macroname, macro_name_position) end m = position(ps) # S {a} ==> (curly S (error-t) a) @@ -1590,7 +1584,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) emit(ps, mark, K"curly") end elseif k in KSet` " """ \` \`\`\` ` && - !t.had_whitespace && is_valid_modref + !t.had_whitespace && valid_macroname # Custom string and command literals # x"str" ==> (macrocall @x_str "str") # x`str` ==> (macrocall @x_cmd "str") @@ -1600,7 +1594,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) # Use a special token kind for string and cmd macro names so the # names can be expanded later as necessary. outk = is_string_delim(k) ? K"StringMacroName" : K"CmdMacroName" - finish_macroname(ps, mark, is_valid_modref, macro_name_position, outk) + finish_macroname(ps, mark, valid_macroname, macro_name_position, outk) parse_raw_string(ps) t = peek_token(ps) k = kind(t) @@ -1619,7 +1613,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) else break end - is_valid_modref &= this_iter_valid_modref + valid_macroname &= this_iter_valid_macroname end end @@ -2175,22 +2169,24 @@ function macro_name_kind(k) end # If remap_kind is false, the kind will be remapped by parse_call_chain after -# it discovers the macro name component of the module path. +# it discovers which component of the macro's module path is the macro name. # # flisp: parse-macro-name -function parse_macro_name(ps::ParseState; remap_kind=false) +function parse_macro_name(ps::ParseState) bump_disallowed_space(ps) - if peek(ps) == K"." - # @. y ==> (macrocall (quote @__dot__) y) + mark = position(ps) + k = peek(ps) + if k == K"." + # @. y ==> (macrocall @__dot__ y) bump(ps) else + # @! x ==> (macrocall @! x) + # @.. x ==> (macrocall @.. x) + # @$ x ==> (macrocall @$ x) with_space_sensitive(ps) do ps1 parse_atom(ps1, false) end end - if remap_kind - reset_node!(ps, position(ps), kind=macro_name_kind(peek_behind(ps).kind)) - end end # Parse an identifier, interpolation of @-prefixed symbol @@ -2202,7 +2198,8 @@ function parse_atsym(ps::ParseState) # export @a ==> (export @a) # export a, \n @b ==> (export a @b) bump(ps, TRIVIA_FLAG) - parse_macro_name(ps, remap_kind=true) + parse_macro_name(ps) + reset_node!(ps, position(ps), kind=macro_name_kind(peek_behind(ps).kind)) else # export a ==> (export a) # export \n a ==> (export a) @@ -3133,7 +3130,10 @@ function parse_atom(ps::ParseState, check_identifiers=true) # Quoted syntactic operators allowed # :+= ==> (quote +=) # :.= ==> (quote .=) - bump(ps) + # Remap the kind here to K"Identifier", as operators parsed in this + # branch should be in "identifier-like" positions (I guess this is + # correct? is it convenient?) + bump(ps, remap_kind=K"Identifier") end elseif is_keyword(leading_kind) if leading_kind == K"var" && (t = peek_token(ps,2); diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 629fd2f446e2e..dce25028ff4a0 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -229,14 +229,13 @@ tests = [ "f(a).g(b)" => "(call (. (call f a) (quote g)) b)" "\$A.@x" => "(macrocall (. (\$ A) (quote @x)))" # do - "f() do x, y\n body end" => "(do (call f) (-> (tuple x y) (block body)))" "f() do\nend" => "(do (call f) (-> (tuple) (block)))" "f() do ; body end" => "(do (call f) (-> (tuple) (block body)))" - "f(x) do y,z body end" => "(do (call f x) (-> (tuple y z) (block body)))" + "f() do x, y\n body end" => "(do (call f) (-> (tuple x y) (block body)))" + "f(x) do y body end" => "(do (call f x) (-> (tuple y) (block body)))" # Keyword arguments depend on call vs macrocall "foo(a=1)" => "(call foo (kw a 1))" "@foo(a=1)" => "(macrocall @foo (= a 1))" - # f(x) do y body end ==> (do (call f x) (-> (tuple y) (block body))) "@foo a b" => "(macrocall @foo a b)" "@foo (x)" => "(macrocall @foo x)" "@foo (x,y)" => "(macrocall @foo (tuple x y))" @@ -245,6 +244,10 @@ tests = [ "[@foo \"x\"]" => "(vect (macrocall @foo \"x\"))" "[f (x)]" => "(hcat f x)" "[f \"x\"]" => "(hcat f \"x\")" + # Macro names + "@! x" => "(macrocall @! x)" + "@.. x" => "(macrocall @.. x)" + "@\$ y" => "(macrocall @\$ y)" # Special @doc parsing rules "@doc x\ny" => "(macrocall @doc x y)" "A.@doc x\ny" => "(macrocall (. A (quote @doc)) x y)" @@ -670,9 +673,7 @@ broken_tests = [ # Triple-quoted string processing "\"\"\"\n\$x\"\"\"" => "(string x)" "\"\"\"\$x\n\"\"\"" => "(string x \"\n\")" - # Operator-named macros with and without spaces - "@! x" => "(macrocall @! x)" - "@.. x" => "(macrocall @.. x)" + # Operator-named macros without spaces "@!x" => "(macrocall @! x)" "@..x" => "(macrocall @.. x)" "@.x" => "(macrocall @__dot__ x)" From 6cd9b8f064dc761168aa44fffedf332f516b3173 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Tue, 8 Feb 2022 16:58:16 +1000 Subject: [PATCH 0381/1109] Tokenize: Use chars instead of length-1 strings in accept() --- JuliaSyntax/Tokenize/src/lexer.jl | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/JuliaSyntax/Tokenize/src/lexer.jl b/JuliaSyntax/Tokenize/src/lexer.jl index cb349553eb258..a19dacd2a007d 100644 --- a/JuliaSyntax/Tokenize/src/lexer.jl +++ b/JuliaSyntax/Tokenize/src/lexer.jl @@ -889,7 +889,7 @@ end function lex_amper(l::Lexer) if accept(l, '&') return emit(l, Tokens.LAZY_AND) - elseif accept(l, "=") + elseif accept(l, '=') return emit(l, Tokens.AND_EQ) else return emit(l, Tokens.AND) @@ -964,13 +964,13 @@ end # Parse a token starting with a forward slash. # A '/' has been consumed function lex_forwardslash(l::Lexer) - if accept(l, "/") # // - if accept(l, "=") # //= + if accept(l, '/') # // + if accept(l, '=') # //= return emit(l, Tokens.FWDFWD_SLASH_EQ) else return emit(l, Tokens.FWDFWD_SLASH) end - elseif accept(l, "=") # /= + elseif accept(l, '=') # /= return emit(l, Tokens.FWD_SLASH_EQ) else return emit(l, Tokens.FWD_SLASH) @@ -1038,10 +1038,10 @@ function lex_dot(l::Lexer) elseif pc =='&' l.dotop = true readchar(l) - if accept(l, "=") + if accept(l, '=') return emit(l, Tokens.AND_EQ) else - if accept(l, "&") + if accept(l, '&') return emit(l, Tokens.LAZY_AND) end return emit(l, Tokens.AND) @@ -1057,7 +1057,7 @@ function lex_dot(l::Lexer) elseif pc == '|' l.dotop = true readchar(l) - if accept(l, "|") + if accept(l, '|') return emit(l, Tokens.LAZY_OR) end return lex_bar(l) From 1efbe13b45d2e992e6cca0966d87bbbf33dc5e8a Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Tue, 8 Feb 2022 17:35:02 +1000 Subject: [PATCH 0382/1109] Tokenize: remove startpos and endpos from RawToken The line and column are only necessary for diagnostics so we can defer the work of computing/tracking these until diagnostics are emitted. This speeds up Tokenization with RawToken by about 20% --- JuliaSyntax/Tokenize/src/lexer.jl | 11 ++++++++--- JuliaSyntax/Tokenize/src/token.jl | 13 ++++--------- JuliaSyntax/Tokenize/src/utilities.jl | 1 - 3 files changed, 12 insertions(+), 13 deletions(-) diff --git a/JuliaSyntax/Tokenize/src/lexer.jl b/JuliaSyntax/Tokenize/src/lexer.jl index a19dacd2a007d..ce87eff0feebc 100644 --- a/JuliaSyntax/Tokenize/src/lexer.jl +++ b/JuliaSyntax/Tokenize/src/lexer.jl @@ -221,6 +221,13 @@ function readchar(l::Lexer{I}) where {I <: IO} return l.chars[1] end +function readchar(l::Lexer{I,RawToken}) where {I <: IO} + c = readchar(l.io) + l.chars = (l.chars[2], l.chars[3], l.chars[4], c) + l.charspos = (l.charspos[2], l.charspos[3], l.charspos[4], position(l.io)) + return l.chars[1] +end + readon(l::Lexer{I,RawToken}) where {I <: IO} = l.chars[1] function readon(l::Lexer{I,Token}) where {I <: IO} if l.charstore.size != 0 @@ -308,9 +315,7 @@ function emit(l::Lexer{IO_t,RawToken}, kind::Kind, err::TokenError = Tokens.NO_E end end - tok = RawToken(kind, (l.token_start_row, l.token_start_col), - (l.current_row, l.current_col - 1), - startpos(l), position(l) - 1, err, l.dotop, suffix) + tok = RawToken(kind, startpos(l), position(l) - 1, err, l.dotop, suffix) l.dotop = false l.last_token = kind diff --git a/JuliaSyntax/Tokenize/src/token.jl b/JuliaSyntax/Tokenize/src/token.jl index cefd1f7b157ed..cb7e2da7f9367 100644 --- a/JuliaSyntax/Tokenize/src/token.jl +++ b/JuliaSyntax/Tokenize/src/token.jl @@ -76,19 +76,16 @@ Token() = Token(ERROR, (0,0), (0,0), 0, 0, "", UNKNOWN, false, false) struct RawToken <: AbstractToken kind::Kind # Offsets into a string or buffer - startpos::Tuple{Int, Int} # row, col where token starts /end, col is a string index - endpos::Tuple{Int, Int} startbyte::Int # The byte where the token start in the buffer endbyte::Int # The byte where the token ended in the buffer token_error::TokenError dotop::Bool suffix::Bool end -function RawToken(kind::Kind, startposition::Tuple{Int, Int}, endposition::Tuple{Int, Int}, - startbyte::Int, endbyte::Int) -RawToken(kind, startposition, endposition, startbyte, endbyte, NO_ERR, false, false) +function RawToken(kind::Kind, startbyte::Int, endbyte::Int) + RawToken(kind, startbyte, endbyte, NO_ERR, false, false) end -RawToken() = RawToken(ERROR, (0,0), (0,0), 0, 0, UNKNOWN, false, false) +RawToken() = RawToken(ERROR, 0, 0, UNKNOWN, false, false) const _EMPTY_TOKEN = Token() @@ -177,9 +174,7 @@ end Base.print(io::IO, t::Token) = print(io, untokenize(t)) function Base.show(io::IO, t::RawToken) - start_r, start_c = startpos(t) - end_r, end_c = endpos(t) - print(io, rpad(string(start_r, ",", start_c, "-", end_r, ",", end_c), 17, " ")) + print(io, rpad(string(startbyte(t), "-", endbyte(t)), 11, " ")) print(io, rpad(kind(t), 15, " ")) end diff --git a/JuliaSyntax/Tokenize/src/utilities.jl b/JuliaSyntax/Tokenize/src/utilities.jl index 173d330a16a7c..1c0cd3b7ebb28 100644 --- a/JuliaSyntax/Tokenize/src/utilities.jl +++ b/JuliaSyntax/Tokenize/src/utilities.jl @@ -198,7 +198,6 @@ eof(io::IO) = Base.eof(io) eof(c::Char) = c === EOF_CHAR readchar(io::IO) = eof(io) ? EOF_CHAR : read(io, Char) -takechar(io::IO) = (readchar(io); io) # Checks whether a Char is an operator, which can not be juxtaposed with another # Char to be an operator (i.e <=), and can be prefixed by a dot (.) From 389df88b8747d9a791388d42f55ce954cdb58864 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Tue, 8 Feb 2022 17:44:25 +1000 Subject: [PATCH 0383/1109] Use UInt32 for stored character indices This greatly reduces the size of the TaggedRange data structure on 64 bit systems, at the cost of not being able to parse files larger than 4GiB. But that seems like a reasonable tradeoff, for now. (We can parameterize later, if really necessary) It apperas that this one change makes parsing ~10% faster. --- JuliaSyntax/src/parse_stream.jl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index bb95be93d024a..0ab4e8040aae0 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -79,8 +79,8 @@ Information about preceding whitespace is added for use by the parser. """ struct SyntaxToken kind::Kind - first_byte::Int - last_byte::Int + first_byte::UInt32 + last_byte::UInt32 # Flags for leading whitespace is_dotted::Bool is_suffixed::Bool @@ -122,9 +122,9 @@ TODO: Optimize this data structure? It's very large at the moment. struct TaggedRange head::SyntaxHead # Kind,flags orig_kind::Kind # Kind of the original token for leaf tokens, or K"Nothing" - first_byte::Int # First byte in the input text - last_byte::Int # Last byte in the input text - start_mark::Int # Index of first emitted range which this range covers + first_byte::UInt32 # First byte in the input text + last_byte::UInt32 # Last byte in the input text + start_mark::UInt32 # Index of first emitted range which this range covers end head(range::TaggedRange) = range.head @@ -132,7 +132,7 @@ kind(range::TaggedRange) = kind(range.head) flags(range::TaggedRange) = flags(range.head) first_byte(range::TaggedRange) = range.first_byte last_byte(range::TaggedRange) = range.last_byte -span(range::TaggedRange) = last_byte(range) - first_byte(range) + 1 +span(range::TaggedRange) = 1 + last_byte(range) - first_byte(range) #------------------------------------------------------------------------------- struct ParseStreamPosition From da2d09b652adcb775c0dda5923165a424544933d Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Sat, 12 Feb 2022 13:41:13 +1000 Subject: [PATCH 0384/1109] Tokenize: Embedded whitespace trivia in strings * Emit escaped newlines as whitespace trivia * Split triple strings at newlines so parser can detect indentation and turn it into trivia. --- JuliaSyntax/Tokenize/src/lexer.jl | 94 +++++++++++++++++------------- JuliaSyntax/Tokenize/test/lexer.jl | 22 ++++++- 2 files changed, 76 insertions(+), 40 deletions(-) diff --git a/JuliaSyntax/Tokenize/src/lexer.jl b/JuliaSyntax/Tokenize/src/lexer.jl index ce87eff0feebc..d204bd4bb3ed5 100644 --- a/JuliaSyntax/Tokenize/src/lexer.jl +++ b/JuliaSyntax/Tokenize/src/lexer.jl @@ -478,6 +478,19 @@ function lex_string_chunk(l) # Start interpolation readchar(l) return emit(l, Tokens.EX_OR) + elseif !state.raw && pc == '\\' && (pc2 = dpeekchar(l)[2]; + pc2 == '\r' || pc2 == '\n') + # Process escaped newline as whitespace + readchar(l) + readon(l) + readchar(l) + if pc2 == '\r' && peekchar(l) == '\n' + readchar(l) + end + while (pc = peekchar(l); pc == ' ' || pc == '\t') + readchar(l) + end + return emit(l, Tokens.WHITESPACE) elseif pc == state.delim && string_terminates(l, state.delim, state.triplestr) # Terminate string pop!(l.string_states) @@ -493,14 +506,55 @@ function lex_string_chunk(l) readon(l) # Read a chunk of string characters if state.raw - read_raw_string(l, state.delim, state.triplestr) + # Raw strings treat all characters as literals with the exception that + # the closing quotes can be escaped with an odd number of \ characters. + while true + pc = peekchar(l) + if string_terminates(l, state.delim, state.triplestr) || eof(pc) + break + elseif state.triplestr && (pc == '\n' || pc == '\r') + # triple quoted newline splitting + readchar(l) + if pc == '\r' && peekchar(l) == '\n' + readchar(l) + end + break + end + c = readchar(l) + if c == '\\' + n = 1 + while true + readchar(l) + n += 1 + if peekchar(l) != '\\' + break + end + end + if peekchar(l) == state.delim && !iseven(n) + readchar(l) + end + end + end else while true pc = peekchar(l) if pc == '$' || eof(pc) break + elseif state.triplestr && (pc == '\n' || pc == '\r') + # triple quoted newline splitting + readchar(l) + if pc == '\r' && peekchar(l) == '\n' + readchar(l) + end + break elseif pc == state.delim && string_terminates(l, state.delim, state.triplestr) break + elseif pc == '\\' + # Escaped newline + pc2 = dpeekchar(l)[2] + if pc2 == '\r' || pc2 == '\n' + break + end end c = readchar(l) if c == '\\' @@ -928,44 +982,6 @@ function string_terminates(l, delim::Char, triplestr::Bool) end end -function terminate_string(l, delim::Char, triplestr::Bool) - # @assert string_terminates(l, delim, triplestr) - readchar(l) - if triplestr - readchar(l) - readchar(l) - return delim == '"' ? Tokens.TRIPLE_DQUOTE : Tokens.TRIPLE_BACKTICK - else - return delim == '"' ? Tokens.DQUOTE : Tokens.BACKTICK - end -end - -# Read a raw string for use with custom string macros -# -# Raw strings treat all characters as literals with the exception that the -# closing quotes can be escaped with an odd number of \ characters. -function read_raw_string(l::Lexer, delim::Char, triplestr::Bool) - while true - if string_terminates(l, delim, triplestr) || eof(peekchar(l)) - return - end - c = readchar(l) - if c == '\\' - n = 1 - while true - readchar(l) - n += 1 - if peekchar(l) != '\\' - break - end - end - if peekchar(l) == delim && !iseven(n) - readchar(l) - end - end - end -end - # Parse a token starting with a forward slash. # A '/' has been consumed function lex_forwardslash(l::Lexer) diff --git a/JuliaSyntax/Tokenize/test/lexer.jl b/JuliaSyntax/Tokenize/test/lexer.jl index 628c3c2974a36..10ee9a422068e 100644 --- a/JuliaSyntax/Tokenize/test/lexer.jl +++ b/JuliaSyntax/Tokenize/test/lexer.jl @@ -202,7 +202,7 @@ end """)) kinds = [T.COMMENT, T.NEWLINE_WS, - T.TRIPLE_DQUOTE, T.STRING, T.TRIPLE_DQUOTE, T.NEWLINE_WS, + T.TRIPLE_DQUOTE, T.STRING, T.STRING, T.TRIPLE_DQUOTE, T.NEWLINE_WS, T.INTEGER, T.NEWLINE_WS, T.ENDMARKER] @test T.kind.(toks) == kinds @@ -347,6 +347,26 @@ end @test ts[4] ~ (T.STRING , "x \$ \\ y") end +@testset "string escaped newline whitespace" begin + ts = collect(tokenize("\"x\\\n \ty\"")) + @test ts[1] ~ (T.DQUOTE , "\"") + @test ts[2] ~ (T.STRING, "x") + @test ts[3] ~ (T.WHITESPACE, "\\\n \t") + @test ts[4] ~ (T.STRING, "y") + @test ts[5] ~ (T.DQUOTE , "\"") +end + +@testset "triple quoted string line splitting" begin + ts = collect(tokenize("\"\"\"\nx\r\ny\rz\n\r\"\"\"")) + @test ts[1] ~ (T.TRIPLE_DQUOTE , "\"\"\"") + @test ts[2] ~ (T.STRING, "\n") + @test ts[3] ~ (T.STRING, "x\r\n") + @test ts[4] ~ (T.STRING, "y\r") + @test ts[5] ~ (T.STRING, "z\n") + @test ts[6] ~ (T.STRING, "\r") + @test ts[7] ~ (T.TRIPLE_DQUOTE, "\"\"\"") +end + @testset "interpolation" begin @testset "basic" begin ts = collect(tokenize("\"\$x \$y\"")) From fb9892bd284ba90c7d8c709c89ae6016865cc89d Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Sat, 12 Feb 2022 13:45:12 +1000 Subject: [PATCH 0385/1109] Process triple quoted string trivia during parsing Triple quoted strings are de-indented with fairly complicated rules * Based on string content * The position of interpolations across multiple tokens Because indentation isn't part of the string data it should also ideally be excluded from the string content within the green tree. That is, it should be treated as separate whitespace trivia tokens. With this separation things like formatting should be much easier. The same reasoning goes for escaping newlines and following whitespace with backslashes in normal strings. The downside of detecting string trivia during parsing is that string content is split over several tokens. Here we wrap these in the K"string" kind (as is already used for interpolations). The individual chunks can then be reassembled during Expr construction. A possible alternative might be to reuse the K"String" and K"CmdString" kinds for groups of string chunks (without interpolation). --- JuliaSyntax/README.md | 12 ++ JuliaSyntax/src/parse_stream.jl | 36 ++++- JuliaSyntax/src/parser.jl | 255 +++++++++++++++++++++++------- JuliaSyntax/src/syntax_tree.jl | 74 +++++---- JuliaSyntax/src/value_parsing.jl | 142 +---------------- JuliaSyntax/test/parser.jl | 56 +++++-- JuliaSyntax/test/value_parsing.jl | 101 +----------- 7 files changed, 333 insertions(+), 343 deletions(-) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index be55e1d1a673b..78e32056464d7 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -458,6 +458,18 @@ use a flattened structure: to be the module name after the `.` is parsed. But `$` can never be a valid module name in normal Julia code so this makes no sense. +* Triple quoted `var"""##"""` identifiers are allowed. But it's not clear these + are required or desired given that they come with the complex triple-quoted + string deindentation rules. + +* Deindentation of triple quoted strings with mismatched whitespace is weird + when there's nothing but whitespace. For example, we have + `"\"\"\"\n \n \n \"\"\"" ==> "\n \n"` so the middle line of whitespace + here isn't dedented but the other two longer lines are?? Here it seems more + consistent that either (a) the middle line should be deindented completely, + or (b) all lines should be dedented only one character, as that's the + matching prefix. + # Comparisons to other packages ### Official Julia compiler diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index 0ab4e8040aae0..1caae991fac5a 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -130,8 +130,8 @@ end head(range::TaggedRange) = range.head kind(range::TaggedRange) = kind(range.head) flags(range::TaggedRange) = flags(range.head) -first_byte(range::TaggedRange) = range.first_byte -last_byte(range::TaggedRange) = range.last_byte +first_byte(range::TaggedRange) = Int(range.first_byte) +last_byte(range::TaggedRange) = Int(range.last_byte) span(range::TaggedRange) = 1 + last_byte(range) - first_byte(range) #------------------------------------------------------------------------------- @@ -492,16 +492,42 @@ the kind or flags of a token in a way which would require unbounded lookahead in a recursive descent parser. Modifying the output with reset_node! is useful in those cases. """ -function reset_node!(stream::ParseStream, mark::ParseStreamPosition; +function reset_node!(stream::ParseStream, pos::ParseStreamPosition; kind=nothing, flags=nothing) - range = stream.ranges[mark.output_index] + range = stream.ranges[pos.output_index] k = isnothing(kind) ? (@__MODULE__).kind(range) : kind f = isnothing(flags) ? (@__MODULE__).flags(range) : flags - stream.ranges[mark.output_index] = + stream.ranges[pos.output_index] = TaggedRange(SyntaxHead(k, f), range.orig_kind, first_byte(range), last_byte(range), range.start_mark) end +""" +Move `numbytes` from the range at output position `pos+1` to the output +position `pos`. If the donor range becomes empty, mark it dead with +K"TOMBSTONE" and return `true`, otherwise return `false`. + +Hack alert! This is used only for managing the complicated rules related to +dedenting triple quoted strings. +""" +function steal_node_bytes!(stream::ParseStream, pos::ParseStreamPosition, numbytes) + i = pos.output_index + r1 = stream.ranges[i] + r2 = stream.ranges[i+1] + @assert span(r1) == 0 + @assert numbytes <= span(r2) + fb2 = r2.first_byte + numbytes + rhs_empty = fb2 > last_byte(r2) + head2 = rhs_empty ? SyntaxHead(K"TOMBSTONE", EMPTY_FLAGS) : r2.head + stream.ranges[i] = TaggedRange(r1.head, r1.orig_kind, + r2.first_byte, fb2 - 1, + r1.start_mark) + stream.ranges[i+1] = TaggedRange(head2, r2.orig_kind, + fb2, r2.last_byte, + r2.start_mark) + return rhs_empty +end + function Base.position(stream::ParseStream) ParseStreamPosition(stream.next_byte, lastindex(stream.ranges)) end diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 5496550205e3f..26161be81d684 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -100,6 +100,10 @@ function reset_node!(ps::ParseState, args...; kws...) reset_node!(ps.stream, args...; kws...) end +function steal_node_bytes!(ps::ParseState, args...) + steal_node_bytes!(ps.stream, args...) +end + function Base.position(ps::ParseState, args...) position(ps.stream, args...) end @@ -112,6 +116,10 @@ function emit_diagnostic(ps::ParseState, args...; kws...) emit_diagnostic(ps.stream, args...; kws...) end +function textbuf(ps::ParseState) + textbuf(ps.stream) +end + #------------------------------------------------------------------------------- # Parser Utils @@ -1590,12 +1598,15 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) # x`str` ==> (macrocall @x_cmd "str") # x"" ==> (macrocall @x_str "") # x`` ==> (macrocall @x_cmd "") + # Triple quoted procesing for custom strings + # r"""\nx""" ==> (macrocall @r_str "x") + # r"""\n x\n y""" ==> (macrocall @r_str (string-sr "x\n" "y")) # Use a special token kind for string and cmd macro names so the # names can be expanded later as necessary. outk = is_string_delim(k) ? K"StringMacroName" : K"CmdMacroName" finish_macroname(ps, mark, valid_macroname, macro_name_position, outk) - parse_raw_string(ps) + parse_string(ps, true) t = peek_token(ps) k = kind(t) if !t.had_whitespace && (k == K"Identifier" || is_keyword(k) || is_word_operator(k) || is_number(k)) @@ -2958,36 +2969,50 @@ function parse_brackets(after_parse::Function, end end -# Parse a string and any embedded interpolations +is_indentation(b::UInt8) = (b == UInt8(' ') || b == UInt8('\t')) + +# Parse a string, embedded interpolations and deindent triple quoted strings +# by marking indentation characters as whitespace trivia. # # flisp: parse-string-literal-, parse-interpolate -function parse_string(ps::ParseState) +function parse_string(ps::ParseState, raw::Bool) mark = position(ps) delim_k = peek(ps) - str_flags = delim_k == K"\"" ? EMPTY_FLAGS : TRIPLE_STRING_FLAG + triplestr = delim_k in KSet`""" \`\`\`` + string_chunk_kind = delim_k in KSet`" """` ? K"String" : K"CmdString" + indent_ref_i = 0 + indent_ref_len = typemax(Int) + if triplestr + indent_chunks = Vector{ParseStreamPosition}() + end + buf = textbuf(ps) + str_flags = (triplestr ? TRIPLE_STRING_FLAG : EMPTY_FLAGS) | + (raw ? RAW_STRING_FLAG : EMPTY_FLAGS) bump(ps, TRIVIA_FLAG) - n_components = 0 + first_chunk = true + n_valid_chunks = 0 + removed_initial_newline = false + had_interpolation = false + prev_chunk_newline = false while true - k = peek(ps) + t = peek_token(ps) + k = kind(t) if k == K"$" - n_components += 1 + @assert !raw # The lexer detects raw strings separately bump(ps, TRIVIA_FLAG) k = peek(ps) if k == K"(" # "a $(x + y) b" ==> (string "a " (call-i x + y) " b") m = position(ps) parse_atom(ps) - if ps.stream.version >= (1,6) - # https://github.com/JuliaLang/julia/pull/38692 - prev = peek_behind(ps) - if prev.kind == K"String" - # Wrap interpolated literal strings in (string) so we can - # distinguish them from the surrounding text (issue #38501) - # "hi$("ho")" ==> (string "hi" (string "ho")) - # "hi$("""ho""")" ==> (string "hi" (string-s "ho")) - #v1.5: "hi$("ho")" ==> (string "hi" "ho") - emit(ps, m, K"string", prev.flags) - end + # https://github.com/JuliaLang/julia/pull/38692 + prev = peek_behind(ps) + if prev.kind == string_chunk_kind + # Wrap interpolated literal strings in (string) so we can + # distinguish them from the surrounding text (issue #38501) + # "hi$("ho")" ==> (string "hi" (string "ho")) + # "hi$("""ho""")" ==> (string "hi" (string-s "ho")) + emit(ps, m, K"string", prev.flags) end elseif k == K"var" # var identifiers disabled in strings @@ -3002,55 +3027,148 @@ function parse_string(ps::ParseState) bump_invisible(ps, K"error", error="identifier or parenthesized expression expected after \$ in string") end - elseif k == K"String" - bump(ps, str_flags) - elseif k == delim_k - if n_components == 0 - # "" ==> "" - bump_invisible(ps, K"String", str_flags) + first_chunk = false + n_valid_chunks += 1 + had_interpolation = true + prev_chunk_newline = false + elseif k == string_chunk_kind + if triplestr && first_chunk && span(t) <= 2 && + begin + s = span(t) + b = buf[last_byte(t)] + # Test whether the string is a single logical newline + (s == 1 && (b == UInt8('\n') || b == UInt8('\r'))) || + (s == 2 && (buf[first_byte(t)] == UInt8('\r') && b == UInt8('\n'))) + end + # First line of triple string is a newline only: mark as trivia. + # """\nx""" ==> "x" + # """\n\nx""" ==> (string-s "\n" "x") + bump(ps, TRIVIA_FLAG) + first_chunk = false + prev_chunk_newline = true + else + if triplestr + # Triple-quoted dedenting: + # Various newlines (\n \r \r\n) and whitespace (' ' \t) + # """\n x\n y""" ==> (string-s "x\n" "y") + # """\r x\r y""" ==> (string-s "x\n" "y") + # """\r\n x\r\n y""" ==> (string-s "x\n" "y") + # Spaces or tabs or mixtures acceptable + # """\n\tx\n\ty""" ==> (string-s "x\n" "y") + # """\n \tx\n \ty""" ==> (string-s "x\n" "y") + # + # Mismatched tab vs space not deindented + # Find minimum common prefix in mismatched whitespace + # """\n\tx\n y""" ==> (string-s "\tx\n" " y") + # """\n x\n y""" ==> (string-s "x\n" " y") + # """\n x\n y""" ==> (string-s " x\n" "y") + # """\n \tx\n y""" ==> (string-s "\tx\n" " y") + # """\n x\n \ty""" ==> (string-s " x\n" "\ty") + # + # Empty lines don't affect dedenting + # """\n x\n\n y""" ==> (string-s "x\n" "\n" "y") + # Non-empty first line doesn't participate in deindentation + # """ x\n y""" ==> (string-s " x\n" "y") + # + # Dedenting and interpolations + # """\n $a\n $b""" ==> (string-s a "\n" b) + # """\n $a \n $b""" ==> (string-s a " \n" b) + # """\n $a\n $b\n""" ==> (string-s " " a "\n" " " b "\n") + # + if prev_chunk_newline && (b = buf[first_byte(t)]; + b != UInt8('\n') && b != UInt8('\r')) + # Compute length of longest common prefix of mixed + # spaces and tabs, in bytes + # + # Initial whitespace is never regarded as indentation + # in any triple quoted string chunk, as it's always + # preceded in the source code by a visible token of + # some kind; either a """ delimiter or $() + # interpolation. + if indent_ref_i == 0 + # No indentation found yet. Find indentation we'll + # use as a reference + i = first_byte(t) - 1 + while i < last_byte(t) && is_indentation(buf[i+1]) + i += 1 + end + indent_ref_i = first_byte(t) + indent_ref_len = i - first_byte(t) + 1 + else + # Matching the current indentation with reference, + # shortening length if necessary. + j = 0 + while j < span(t) && j < indent_ref_len + if buf[j + first_byte(t)] != buf[j + indent_ref_i] + break + end + j += 1 + end + indent_ref_len = min(indent_ref_len, j) + end + # Prepare a place for indentiation trivia, if necessary + push!(indent_chunks, bump_invisible(ps, K"TOMBSTONE")) + end + b = buf[last_byte(t)] + prev_chunk_newline = b == UInt8('\n') || b == UInt8('\r') + end + bump(ps, str_flags) + first_chunk = false + n_valid_chunks += 1 end - bump(ps, TRIVIA_FLAG) - break else - # Recovery - # "str ==> "str" (error-t) - bump_invisible(ps, K"error", TRIVIA_FLAG, error="Unterminated string literal") break end - n_components += 1 end - if n_components > 1 + had_end_delim = peek(ps) == delim_k + if triplestr && prev_chunk_newline && had_end_delim + # Newline at end of string + # """\n x\n y\n""" ==> (string-s " x\n" " y\n") + indent_ref_len = 0 + end + if triplestr && indent_ref_len > 0 + for pos in indent_chunks + reset_node!(ps, pos, kind=K"Whitespace", flags=TRIVIA_FLAG) + rhs_empty = steal_node_bytes!(ps, pos, indent_ref_len) + if rhs_empty + # Empty chunks after dedent are removed + # """\n \n """ ==> (string-s "\n") + n_valid_chunks -= 1 + end + end + end + if had_end_delim + if n_valid_chunks == 0 + # Empty strings, or empty after triple quoted processing + # "" ==> "" + # """\n """ ==> "" + bump_invisible(ps, string_chunk_kind, str_flags) + end + bump(ps, TRIVIA_FLAG) + else + # Missing delimiter recovery + # "str ==> "str" (error) + bump_invisible(ps, K"error", TRIVIA_FLAG, error="Unterminated string literal") + end + if n_valid_chunks > 1 || had_interpolation + # String interpolations # "$x$y$z" ==> (string x y z) # "$(x)" ==> (string x) # "$x" ==> (string x) # """$x""" ==> (string-s x) + # + # Strings with embedded whitespace trivia + # "a\\\nb" ==> (string "a" "b") + # "a\\\rb" ==> (string "a" "b") + # "a\\\r\nb" ==> (string "a" "b") + # "a\\\n \tb" ==> (string "a" "b") emit(ps, mark, K"string", str_flags) else - # Strings with no interpolations + # Strings with only a single valid string chunk # "str" ==> "str" - end -end - -function parse_raw_string(ps::ParseState; remap_kind=K"Nothing") - emark = position(ps) - delim_k = peek(ps) - bump(ps, TRIVIA_FLAG) - flags = RAW_STRING_FLAG | (delim_k in KSet`""" \`\`\`` ? - TRIPLE_STRING_FLAG : EMPTY_FLAGS) - if peek(ps) in KSet`String CmdString` - bump(ps, flags; remap_kind=remap_kind) - else - outk = remap_kind != K"Nothing" ? remap_kind : - delim_k in KSet`" """` ? K"String" : - delim_k in KSet`\` \`\`\`` ? K"CmdString" : - internal_error("unexpected delimiter ", delim_k) - bump_invisible(ps, outk, flags) - end - if peek(ps) == delim_k - bump(ps, TRIVIA_FLAG) - else - # Recovery - bump_invisible(ps, K"error", error="Unterminated string literal") + # "a\\\n" ==> "a" + # "a\\\r" ==> "a" + # "a\\\r\n" ==> "a" end end @@ -3137,16 +3255,31 @@ function parse_atom(ps::ParseState, check_identifiers=true) end elseif is_keyword(leading_kind) if leading_kind == K"var" && (t = peek_token(ps,2); - kind(t) in KSet`" """` && !t.had_whitespace) + kind(t) == K"\"" && !t.had_whitespace) # var"x" ==> x - # var"""x""" ==> x # Raw mode unescaping - # var"" ==> + # var"" ==> # var"\"" ==> " # var"\\"" ==> \" # var"\\x" ==> \\x + # + # NB: Triple quoted var identifiers are not implemented, but with + # the complex deindentation rules they seem like a misfeature + # anyway, maybe? + # var"""x""" !=> x + bump(ps, TRIVIA_FLAG) bump(ps, TRIVIA_FLAG) - parse_raw_string(ps, remap_kind=K"Identifier") + if peek(ps) == K"String" + bump(ps, RAW_STRING_FLAG; remap_kind=K"Identifier") + else + bump_invisible(ps, K"Identifier", RAW_STRING_FLAG) + end + if peek(ps) == K"\"" + bump(ps, TRIVIA_FLAG) + else + bump_invisible(ps, K"error", TRIVIA_FLAG, + error="Unterminated string literal") + end t = peek_token(ps) k = kind(t) if t.had_whitespace || is_operator(k) || @@ -3188,13 +3321,13 @@ function parse_atom(ps::ParseState, check_identifiers=true) parse_macro_name(ps) parse_call_chain(ps, mark, true) elseif is_string_delim(leading_kind) - parse_string(ps) + parse_string(ps, false) elseif leading_kind in KSet`\` \`\`\`` # `` ==> (macrocall core_@cmd "") # `cmd` ==> (macrocall core_@cmd "cmd") # ```cmd``` ==> (macrocall core_@cmd "cmd"-s) bump_invisible(ps, K"core_@cmd") - parse_raw_string(ps) + parse_string(ps, true) emit(ps, mark, K"macrocall") elseif is_literal(leading_kind) bump(ps) diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index 9823006eae8ed..bdbf60c612aec 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -50,9 +50,7 @@ function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, position::In elseif k in KSet`String CmdString` is_cmd = k == K"CmdString" is_raw = has_flags(head(raw), RAW_STRING_FLAG) - has_flags(head(raw), TRIPLE_STRING_FLAG) ? - process_triple_strings!([val_str], is_raw)[1] : - unescape_julia_string(val_str, is_cmd, is_raw) + unescape_julia_string(val_str, is_cmd, is_raw) elseif is_operator(k) isempty(val_range) ? Symbol(untokenize(k)) : # synthetic invisible tokens @@ -83,37 +81,12 @@ function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, position::In else cs = SyntaxNode[] pos = position - if kind(raw) == K"string" && has_flags(head(raw), TRIPLE_STRING_FLAG) - # Triple quoted strings need special processing of sibling String literals - strs = SubString[] - str_nodes = SyntaxNode[] - for (i,rawchild) in enumerate(children(raw)) - if !is_trivia(rawchild) || is_error(rawchild) - if kind(rawchild) == K"String" - val_range = pos:pos + span(rawchild) - 1 - push!(strs, view(source, val_range)) - n = SyntaxNode(source, rawchild, pos, nothing, true, nothing) - push!(cs, n) - push!(str_nodes, n) - else - push!(cs, SyntaxNode(source, rawchild, pos)) - end - end - pos += rawchild.span - end - is_raw = has_flags(head(raw), RAW_STRING_FLAG) - process_triple_strings!(strs, is_raw) - for (s,n) in zip(strs, str_nodes) - n.val = s - end - else - for (i,rawchild) in enumerate(children(raw)) - # FIXME: Allowing trivia is_error nodes here corrupts the tree layout. - if !is_trivia(rawchild) || is_error(rawchild) - push!(cs, SyntaxNode(source, rawchild, pos)) - end - pos += rawchild.span + for (i,rawchild) in enumerate(children(raw)) + # FIXME: Allowing trivia is_error nodes here corrupts the tree layout. + if !is_trivia(rawchild) || is_error(rawchild) + push!(cs, SyntaxNode(source, rawchild, pos)) end + pos += rawchild.span end node = SyntaxNode(source, raw, position, nothing, false, cs) for c in cs @@ -391,6 +364,41 @@ function _to_expr(node::SyntaxNode, iteration_spec=false) pushfirst!(args, numeric_flags(flags(node))) elseif headsym == :typed_ncat insert!(args, 2, numeric_flags(flags(node))) + elseif headsym == :string && length(args) > 1 + # Julia string literals may be interspersed with trivia in two situations: + # 1. Triple quoted string indentation is trivia + # 2. An \ before newline removes the newline and any following indentation + # + # Such trivia is eagerly removed by the reference parser, so here we + # concatenate adjacent string chunks together for compatibility. + # + # TODO: Manage the non-interpolation cases with String and CmdString + # kinds instead? + args2 = Vector{Any}() + i = 1 + while i <= length(args) + if args[i] isa String && i < length(args) && args[i+1] isa String + buf = IOBuffer() + while i <= length(args) && args[i] isa String + write(buf, args[i]) + i += 1 + end + push!(args2, String(take!(buf))) + else + push!(args2, args[i]) + i += 1 + end + end + args = args2 + if length(args2) == 1 && args2[1] isa String + # If there's a single string remaining after joining we unwrap to + # give a string literal. + # """\n a\n b""" ==> "a\nb" + return args2[1] + end + # elseif headsym == :string && length(args) == 1 && version <= (1,5) + # Strip string from interpolations in 1.5 and lower to preserve + # "hi$("ho")" ==> (string "hi" "ho") elseif headsym == :(=) if is_eventually_call(args[1]) && !iteration_spec if Meta.isexpr(args[2], :block) diff --git a/JuliaSyntax/src/value_parsing.jl b/JuliaSyntax/src/value_parsing.jl index bba8f478d6f94..6b94cbb806e41 100644 --- a/JuliaSyntax/src/value_parsing.jl +++ b/JuliaSyntax/src/value_parsing.jl @@ -72,7 +72,7 @@ is_indentation(c) = c == ' ' || c == '\t' """ Process Julia source code escape sequences for raw strings """ -function unescape_raw_string(io::IO, str::AbstractString, is_cmd::Bool, dedent::Integer, skip_initial_newline::Bool) +function unescape_raw_string(io::IO, str::AbstractString, is_cmd::Bool) delim = is_cmd ? '`' : '"' i = firstindex(str) lastidx = lastindex(str) @@ -86,16 +86,7 @@ function unescape_raw_string(io::IO, str::AbstractString, is_cmd::Bool, dedent:: end c = '\n' end - if c == '\n' - if i > 1 || !skip_initial_newline - write(io, c) - end - if i+1 <= lastidx && str[i+1] != '\n' && str[i+1] != '\r' - i += dedent - end - else - write(io, c) - end + write(io, c) i = nextind(str, i) continue end @@ -124,7 +115,7 @@ end Process Julia source code escape sequences for non-raw strings. `str` should be passed without delimiting quotes. """ -function unescape_julia_string(io::IO, str::AbstractString, dedent::Integer, skip_initial_newline::Bool) +function unescape_julia_string(io::IO, str::AbstractString) i = firstindex(str) lastidx = lastindex(str) while i <= lastidx @@ -137,16 +128,7 @@ function unescape_julia_string(io::IO, str::AbstractString, dedent::Integer, ski end c = '\n' end - if c == '\n' - if i > 1 || !skip_initial_newline - write(io, c) - end - if i+1 <= lastidx && str[i+1] != '\n' && str[i+1] != '\r' - i += dedent - end - else - write(io, c) - end + write(io, c) i = nextind(str, i) continue end @@ -190,14 +172,6 @@ function unescape_julia_string(io::IO, str::AbstractString, dedent::Integer, ski throw(ArgumentError("octal escape sequence out of range")) end write(io, UInt8(n)) - elseif c == '\n' || c == '\r' - # Remove \n \r and \r\n newlines + indentation following \ - if c == '\r' && i < lastidx && str[i+1] == '\n' - i += 1 - end - while i < lastidx && is_indentation(str[i+1]) - i += 1 - end else u = # C escapes c == 'n' ? '\n' : @@ -221,118 +195,16 @@ function unescape_julia_string(io::IO, str::AbstractString, dedent::Integer, ski end end -function unescape_julia_string(str::AbstractString, is_cmd::Bool, - is_raw::Bool, dedent::Integer=0, - skip_initial_newline=false) +function unescape_julia_string(str::AbstractString, is_cmd::Bool, is_raw::Bool) io = IOBuffer() if is_raw - unescape_raw_string(io, str, is_cmd, dedent, skip_initial_newline) + unescape_raw_string(io, str, is_cmd) else - unescape_julia_string(io, str, dedent, skip_initial_newline) + unescape_julia_string(io, str) end String(take!(io)) end -# Compute length of longest common prefix of mixed spaces and tabs, in -# characters (/bytes). -# -# Initial whitespace is never regarded as indentation in any triple quoted -# string chunk, as it's always preceded in the source code by a visible token -# of some kind; either a """ delimiter or $() interpolation. -# -# This pass runs *before* normalization of newlines so that -# unescaping/normalization can happen in a single pass. -# -# TODO: Should we do triplequoted string splitting as part of the main parser? -# It would be conceptually clean if the trivial whitespace was emitted as -# syntax trivia. -# -# flisp: triplequoted-string-indentation- -function triplequoted_string_indentation(strs, is_raw) - if isempty(strs) - return 0 - end - refstr = SubString(strs[1], 1, 0) - reflen = -1 - for str in strs - i = 1 - lastidx = lastindex(str) - while i <= lastidx - c = str[i] - if c == '\\' && !is_raw - # Escaped newlines stop indentation detection for the current - # line but do not start detection of indentation on the next - # line - if i+1 <= lastidx - if str[i+1] == '\n' - i += 1 - elseif str[i+1] == '\r' - i += 1 - if i+1 <= lastidx && str[i+1] == '\n' - i += 1 - end - end - end - elseif c == '\n' || c == '\r' - while i <= lastidx - c = str[i] - (c == '\n' || c == '\r') || break - i += 1 - end - if i <= lastidx - # At this point we've found the start of a nonempty line. - if reflen < 0 - # Find indentation we'll use as a reference - j = i-1 - while j < lastidx && is_indentation(str[j+1]) - j += 1 - end - refstr = SubString(str, i, j) - reflen = j - i + 1 - if j > i - i = j - end - else - # Matching indentation with reference, shortening - # length if necessary. - j = i-1 - while j+1 <= lastidx && j-i+2 <= reflen - if str[j+1] != refstr[j-i+2] - break - end - j += 1 - end - if j-i+1 < reflen - reflen = j-i+1 - end - if j > i - i = j - end - end - else - # A newline directly before the end of the string means a - # delimiter was in column zero, implying zero indentation. - reflen = 0 - end - end - i <= lastidx || break - i = nextind(str, i) - end - end - max(reflen, 0) -end - -function process_triple_strings!(strs, is_raw) - if isempty(strs) - return strs - end - dedent = triplequoted_string_indentation(strs, is_raw) - for i = 1:length(strs) - strs[i] = unescape_julia_string(strs[i], false, is_raw, dedent, i==1) - end - strs -end - #------------------------------------------------------------------------------- # Unicode normalization. As of Julia 1.8, this is part of Base and the Unicode # stdlib under the name `Unicode.julia_chartransform`. See diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index dce25028ff4a0..b05524c1e18cd 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -300,6 +300,9 @@ tests = [ "x`str`" => """(macrocall @x_cmd "str")""" "x\"\"" => """(macrocall @x_str "")""" "x``" => """(macrocall @x_cmd "")""" + # Triple quoted procesing for custom strings + "r\"\"\"\nx\"\"\"" => raw"""(macrocall @r_str "x")""" + "r\"\"\"\n x\n y\"\"\"" => raw"""(macrocall @r_str (string-sr "x\n" "y"))""" # Macro sufficies can include keywords and numbers "x\"s\"y" => """(macrocall @x_str "s" "y")""" "x\"s\"end" => """(macrocall @x_str "s" "end")""" @@ -549,7 +552,6 @@ tests = [ ": end" => ":" # var syntax """var"x" """ => "x" - """var""\"x""\"""" => "x" """var"x"+""" => "x" """var"x")""" => "x" """var"x"(""" => "x" @@ -641,20 +643,58 @@ tests = [ ((v=v"1.8",), "[;;]") => "(ncat-2)" ((v=v"1.8",), "[\n ;; \n ]") => "(ncat-2)" ((v=v"1.7",), "[;;]") => "(ncat-2 (error))" - ], - JuliaSyntax.parse_string => [ + # parse_string "\"a \$(x + y) b\"" => "(string \"a \" (call-i x + y) \" b\")" "\"hi\$(\"ho\")\"" => "(string \"hi\" (string \"ho\"))" "\"hi\$(\"\"\"ho\"\"\")\"" => "(string \"hi\" (string-s \"ho\"))" - ((v=v"1.5",), "\"hi\$(\"ho\")\"") => "(string \"hi\" \"ho\")" "\"a \$foo b\"" => "(string \"a \" foo \" b\")" "\"\$var\"" => "(string var)" "\"\$outer\"" => "(string outer)" "\"\$in\"" => "(string in)" - "\"\"" => "\"\"" + # Triple-quoted dedenting: + "\"\"\"\nx\"\"\"" => "\"x\"" + "\"\"\"\n\nx\"\"\"" => raw"""(string-s "\n" "x")""" + # Various newlines (\n \r \r\n) and whitespace (' ' \t) + "\"\"\"\n x\n y\"\"\"" => raw"""(string-s "x\n" "y")""" + "\"\"\"\r x\r y\"\"\"" => raw"""(string-s "x\n" "y")""" + "\"\"\"\r\n x\r\n y\"\"\"" => raw"""(string-s "x\n" "y")""" + # Spaces or tabs or mixtures acceptable + "\"\"\"\n\tx\n\ty\"\"\"" => raw"""(string-s "x\n" "y")""" + "\"\"\"\n \tx\n \ty\"\"\"" => raw"""(string-s "x\n" "y")""" + # Mismatched tab vs space not deindented + # Find minimum common prefix in mismatched whitespace + "\"\"\"\n\tx\n y\"\"\"" => raw"""(string-s "\tx\n" " y")""" + "\"\"\"\n x\n y\"\"\"" => raw"""(string-s "x\n" " y")""" + "\"\"\"\n x\n y\"\"\"" => raw"""(string-s " x\n" "y")""" + "\"\"\"\n \tx\n y\"\"\"" => raw"""(string-s "\tx\n" " y")""" + "\"\"\"\n x\n \ty\"\"\"" => raw"""(string-s " x\n" "\ty")""" + # Empty lines don't affect dedenting + "\"\"\"\n x\n\n y\"\"\"" => raw"""(string-s "x\n" "\n" "y")""" + # Non-empty first line doesn't participate in deindentation + "\"\"\" x\n y\"\"\"" => raw"""(string-s " x\n" "y")""" + # Dedenting and interpolations + "\"\"\"\n \$a\n \$b\"\"\"" => raw"""(string-s a "\n" b)""" + "\"\"\"\n \$a \n \$b\"\"\"" => raw"""(string-s a " \n" b)""" + "\"\"\"\n \$a\n \$b\n\"\"\"" => raw"""(string-s " " a "\n" " " b "\n")""" + # Empty chunks after dedent are removed + "\"\"\"\n \n \"\"\"" => "\"\\n\"" + # Newline at end of string + "\"\"\"\n x\n y\n\"\"\"" => raw"""(string-s " x\n" " y\n")""" + # Empty strings, or empty after triple quoted processing + "\"\"" => "\"\"" + "\"\"\"\n \"\"\"" => "\"\"" + # Missing delimiter + "\"str" => "\"str\" (error)" + # String interpolations "\"\$x\$y\$z\"" => "(string x y z)" "\"\$(x)\"" => "(string x)" "\"\$x\"" => "(string x)" + # Strings with embedded whitespace trivia + "\"a\\\nb\"" => raw"""(string "a" "b")""" + "\"a\\\rb\"" => raw"""(string "a" "b")""" + "\"a\\\r\nb\"" => raw"""(string "a" "b")""" + "\"a\\\n \tb\"" => raw"""(string "a" "b")""" + # Strings with only a single valid string chunk "\"str\"" => "\"str\"" ], JuliaSyntax.parse_docstring => [ @@ -667,12 +707,10 @@ tests = [ ], ] -# Known bugs +# Known bugs / incompatibilities broken_tests = [ JuliaSyntax.parse_atom => [ - # Triple-quoted string processing - "\"\"\"\n\$x\"\"\"" => "(string x)" - "\"\"\"\$x\n\"\"\"" => "(string x \"\n\")" + """var""\"x""\"""" => "x" # Operator-named macros without spaces "@!x" => "(macrocall @! x)" "@..x" => "(macrocall @.. x)" diff --git a/JuliaSyntax/test/value_parsing.jl b/JuliaSyntax/test/value_parsing.jl index 7e69a31869ebe..9ba3c093d5934 100644 --- a/JuliaSyntax/test/value_parsing.jl +++ b/JuliaSyntax/test/value_parsing.jl @@ -1,8 +1,6 @@ using JuliaSyntax: julia_string_to_number, - triplequoted_string_indentation, - unescape_julia_string, - process_triple_strings! + unescape_julia_string hexint(s) = julia_string_to_number(s, K"HexInt") binint(s) = julia_string_to_number(s, K"BinInt") @@ -139,16 +137,6 @@ end # Newline normalization @test unesc("a\nb\rc\r\nd") == "a\nb\nc\nd" - # Removal of backslash-escaped newlines & indentation - @test unesc("a\\\nb") == "ab" - @test unesc("a\\\rb") == "ab" - @test unesc("a\\\r\nb") == "ab" - @test unesc("a\\\n b") == "ab" - @test unesc("a\\\r\n \tb") == "ab" - @test unesc("a\\\n") == "a" - @test unesc("a\\\r") == "a" - @test unesc("a\\\r\n") == "a" - # Invalid escapes @test_throws ArgumentError unesc("\\.") @test_throws ArgumentError unesc("\\z") @@ -203,93 +191,6 @@ end @test unescape_julia_string("\\\\ ", true, true) == "\\\\ " end -@testset "Triple quoted string indentation" begin - # Alias for non-raw triple str indentation - triplestr_indent(str) = triplequoted_string_indentation(str, false) - - @test triplestr_indent([]) == 0 - - # Spaces or tabs acceptable - @test triplestr_indent(["\n "]) == 2 - @test triplestr_indent(["\n\t "]) == 2 - @test triplestr_indent(["\n \t"]) == 2 - @test triplestr_indent(["\n\t\t"]) == 2 - - # Start of the string is not indentation, as it's always preceded by a - # delimiter in the source - @test triplestr_indent([" "]) == 0 - @test triplestr_indent([" ", " "]) == 0 - - # Various newlines are allowed. empty lines are ignored - @test triplestr_indent(["\n\n x"]) == 2 - @test triplestr_indent(["\n\r x"]) == 2 - @test triplestr_indent(["\r\n x"]) == 2 - @test triplestr_indent(["\r\r x"]) == 2 - @test triplestr_indent(["\n\r\r\n"]) == 0 - - # Empty line at the end of any chunk implies the next source line started - # with a delimiter, yielding zero indentation - @test triplestr_indent([" \n"]) == 0 - @test triplestr_indent([" \r"]) == 0 - @test triplestr_indent([" \n\n"]) == 0 - @test triplestr_indent([" ", " \n"]) == 0 - @test triplestr_indent([" \n", " "]) == 0 - - # Find the minimum common prefix in one or several chunks - @test triplestr_indent(["\n ", "\n "]) == 2 - @test triplestr_indent(["\n ", "\n "]) == 1 - @test triplestr_indent(["\n ", "\n "]) == 1 - @test triplestr_indent(["\n ", "\n "]) == 1 - @test triplestr_indent(["\n \t", "\n "]) == 1 - @test triplestr_indent(["\n ", "\n \t"]) == 1 - @test triplestr_indent(["\n \t", "\n \t"]) == 2 - @test triplestr_indent(["\n\t ", "\n\t "]) == 2 - @test triplestr_indent(["\n \n "]) == 2 - @test triplestr_indent(["\n \n "]) == 1 - @test triplestr_indent(["\n \n "]) == 1 - # Increasing widths - @test triplestr_indent(["\n\n \n \n "]) == 1 - # Decreasing widths - @test triplestr_indent(["\n \n \n "]) == 1 - - # Some cases of no indentation - @test triplestr_indent(["hi"]) == 0 - @test triplestr_indent(["x\ny", "z"]) == 0 - - # Escaped newlines - @test triplestr_indent(["\\\n "]) == 0 - @test triplestr_indent(["\\\r "]) == 0 - @test triplestr_indent(["\\\r\n "]) == 0 - @test triplestr_indent(["\\\r\n "]) == 0 - @test triplestr_indent(["\n \\\n "]) == 2 - @test triplestr_indent(["\n \\\n "]) == 1 - - # Raw strings don't have escaped newline processing - @test triplequoted_string_indentation(["\n \\\n "], true) == 1 - @test triplequoted_string_indentation(["\n \\\n "], true) == 1 -end - -@testset "Triple quoted string deindentation" begin - # Weird thing I noticed: In Julia 1.7 this @testset for loop adds an - # absurd amount of testing latency given how trivial it is. Why? Is it - # because of compiler heuristics which try to compile all for loops? - @testset "Raw=$raw" for raw in (false, true) - # Various combinations of dedent + leading newline stripping - @test process_triple_strings!(["\n x", "\n y"], raw) == ["x", "\ny"] - @test process_triple_strings!(["\n\tx", "\n\ty"], raw) == ["x", "\ny"] - @test process_triple_strings!(["\r x", "\r y"], raw) == ["x", "\ny"] - @test process_triple_strings!(["\r x\r y"], raw) == ["x\ny"] - @test process_triple_strings!(["\r x\r\r y"], raw) == ["x\n\ny"] - @test process_triple_strings!(["\n \t x", "\n \t y"], raw) == ["x", "\ny"] - @test process_triple_strings!(["x\n\n y", "\n z"], raw) == ["x\n\ny", "\nz"] - # Cases of no dedent + newline normalization - @test process_triple_strings!(["\n x", "\ny"], raw) == [" x", "\ny"] - @test process_triple_strings!(["\nx", "\n y"], raw) == ["x", "\n y"] - @test process_triple_strings!(["\n y\n"], raw) == [" y\n"] - @test process_triple_strings!(["\n y\r"], raw) == [" y\n"] - end -end - @testset "Normalization of identifiers" begin # NFC normalization # https://github.com/JuliaLang/julia/issues/5434 From 21e81c5d251373af4576fdac667cbde240f3f91f Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Sat, 12 Feb 2022 15:33:20 +1000 Subject: [PATCH 0386/1109] Add notes about different tree types --- JuliaSyntax/README.md | 160 +++++++++++++++++++++++--------- JuliaSyntax/src/parse_stream.jl | 4 +- 2 files changed, 120 insertions(+), 44 deletions(-) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index 78e32056464d7..056c68ff7790a 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -208,8 +208,124 @@ We want to encode both these cases in a way which is simplest for downstream tools to use. This is an open question, but for now we use `K"error"` as the kind, with the `TRIVIA_FLAG` set for unexpected syntax. +# Syntax trees + +Julia's `Expr` abstract syntax tree can't store precise source locations or +deal with syntax trivia like whitespace or comments. So we need some new tree +types in `JuliaSyntax`. + +JuliaSyntax currently deals in three types of trees: +* `GreenNode` is a minimal *lossless syntax tree* where + - Nodes store a kind and length in bytes, but no text + - Syntax trivia are included in the list of children + - Children are strictly in source order +* `SyntaxNode` is an *abstract syntax tree* which has + - An absolute position and pointer to the source text + - Children strictly in source order + - Leaf nodes store values, not text + - Trivia are ignored, but there is a 1:1 mapping of non-trivia nodes to the + associated `GreenTree` nodes. +* `Expr` is used as a conversion target for compatibility + +Wherever possible, the tree structure of `GreenNode`/`SyntaxNode` is 1:1 with +`Expr`. There are, however, some exceptions. + +## Tree differences between GreenNode and Expr + +First, `GreenNode` inherently stores source position, so there's no need for +the `LineNumberNode`s used by `Expr`. There's also a small number of other +differences -### More about syntax kinds +### Flattened generators + +Flattened generators are uniquely problematic because the Julia AST doesn't +respect a key rule we normally expect: that the children of an AST node are a +*contiguous* range in the source text. This is because the `for`s in +`[xy for x in xs for y in ys]` are parsed in the normal order of a for loop to +mean + +``` +for x in xs +for y in ys + push!(xy, collection) +``` + +so the `xy` prefix is in the *body* of the innermost for loop. Following this, +the standard Julia AST is like so: + +``` +(flatten + (generator + (generator + xy + (= y ys)) + (= x xs))) +``` + +however, note that if this tree were flattened, the order would be +`(xy) (y in ys) (x in xs)` and the `x` and `y` iterations are *opposite* of the +source order. + +However, our green tree is strictly source-ordered, so we must deviate from the +Julia AST. The natural representation seems to be to remove the generators and +use a flattened structure: + +``` +(flatten + xy + (= x xs) + (= y ys)) +``` + +### Whitespace trivia inside strings + +For triple quoted strings, the indentation isn't part of the string data so +should also be excluded from the string content within the green tree. That is, +it should be treated as separate whitespace trivia tokens. With this separation +things like formatting should be much easier. The same reasoning goes for +escaping newlines and following whitespace with backslashes in normal strings. + +Detecting string trivia during parsing means that string content is split over +several tokens. Here we wrap these in the K"string" kind (as is already used +for interpolations). The individual chunks can then be reassembled during Expr +construction. (A possible alternative might be to reuse the K"String" and +K"CmdString" kinds for groups of string chunks (without interpolation).) + +Take as an example the following Julia fragment. + +```julia +x = """ + $a + b""" +``` + +Here this is parsed as `(= x (string-s a "\n" "b"))` (the `-s` flag in +`string-s` means "triple quoted string") + +Looking at the green tree, we see the indentation before the `$a` and `b` are +marked as trivia: + +``` +julia> text = "x = \"\"\"\n \$a\n b\"\"\"" + show(stdout, MIME"text/plain"(), parseall(GreenNode, text, rule=:statement), text) + 1:23 │[=] + 1:1 │ Identifier ✔ "x" + 2:2 │ Whitespace " " + 3:3 │ = "=" + 4:4 │ Whitespace " " + 5:23 │ [string] + 5:7 │ """ "\"\"\"" + 8:8 │ String "\n" + 9:12 │ Whitespace " " + 13:13 │ $ "\$" + 14:14 │ Identifier ✔ "a" + 15:15 │ String ✔ "\n" + 16:19 │ Whitespace " " + 20:20 │ String ✔ "b" + 21:23 │ """ "\"\"\"" +``` + +## More about syntax kinds We generally track the type of syntax nodes with a syntax "kind", stored explicitly in each node an integer tag. This effectively makes the node type a @@ -239,6 +355,7 @@ There's arguably a few downsides: processes one specific kind but for generic code processing many kinds having a generic but *concrete* data layout should be faster. + # Differences from the flisp parser Practically the flisp parser is not quite a classic [recursive descent @@ -360,47 +477,6 @@ parsing `key=val` pairs inside parentheses. `kw` for keywords. -### Flattened generators - -Flattened generators are uniquely problematic because the Julia AST doesn't -respect a key rule we normally expect: that the children of an AST node are a -*contiguous* range in the source text. This is because the `for`s in -`[xy for x in xs for y in ys]` are parsed in the normal order of a for loop to -mean - -``` -for x in xs -for y in ys - push!(xy, collection) -``` - -so the `xy` prefix is in the *body* of the innermost for loop. Following this, -the standard Julia AST is like so: - -``` -(flatten - (generator - (generator - xy - (= y ys)) - (= x xs))) -``` - -however, note that if this tree were flattened, the order would be -`(xy) (y in ys) (x in xs)` and the `x` and `y` iterations are *opposite* of the -source order. - -However, our green tree is strictly source-ordered, so we must deviate from the -Julia AST. The natural representation seems to be to remove the generators and -use a flattened structure: - -``` -(flatten - xy - (= x xs) - (= y ys)) -``` - ### Other oddities * Operators with suffices don't seem to always be parsed consistently as the diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index 1caae991fac5a..e8cc222a0c749 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -599,7 +599,7 @@ end """ build_tree(::Type{NodeType}, stream::ParseStream; - wrap_toplevel_as_kind=nothing) + wrap_toplevel_as_kind=nothing, kws...) Construct a tree with `NodeType` nodes from a ParseStream using depth-first traversal. `NodeType` must have the constructors @@ -616,7 +616,7 @@ a bottom-up tree builder interface similar to rust-analyzer. (In that case we'd traverse the list of ranges backward rather than forward.) """ function build_tree(::Type{NodeType}, stream::ParseStream; - wrap_toplevel_as_kind=nothing) where NodeType + wrap_toplevel_as_kind=nothing, kws...) where NodeType stack = Vector{NamedTuple{(:range,:node),Tuple{TaggedRange,NodeType}}}() for (span_index, range) in enumerate(stream.ranges) if kind(range) == K"TOMBSTONE" From 720e256c3b68545601e57b2c99c955c074f384ae Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Mon, 14 Feb 2022 15:47:09 +1000 Subject: [PATCH 0387/1109] Adjust CI to add 1.x latest and remove unnecessary runs on push. --- JuliaSyntax/.github/workflows/CI.yml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/JuliaSyntax/.github/workflows/CI.yml b/JuliaSyntax/.github/workflows/CI.yml index 11795d3c6cd59..60a91056805d7 100644 --- a/JuliaSyntax/.github/workflows/CI.yml +++ b/JuliaSyntax/.github/workflows/CI.yml @@ -1,7 +1,10 @@ name: CI on: - - push - - pull_request + push: + branches: + - master + tags: '*' + pull_request: jobs: test: name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} @@ -11,6 +14,7 @@ jobs: matrix: version: - '1.6' + - '1' - 'nightly' os: - ubuntu-latest From c28813cbcae818c0526c17b8a3eaf66f4dca0850 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Wed, 16 Feb 2022 11:33:53 +1000 Subject: [PATCH 0388/1109] Fix: Wrap :error in :toplevel for core parser hook This is expected for the output of `Meta.parseall()` --- JuliaSyntax/src/hooks.jl | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/JuliaSyntax/src/hooks.jl b/JuliaSyntax/src/hooks.jl index 41e3d95928aef..1f0df0c497799 100644 --- a/JuliaSyntax/src/hooks.jl +++ b/JuliaSyntax/src/hooks.jl @@ -12,12 +12,15 @@ function core_parser_hook(code, filename, offset, options) seek(io, offset) stream = ParseStream(io) - rule = options == :all ? :toplevel : options + rule = options === :all ? :toplevel : options JuliaSyntax.parse(stream; rule=rule) - ex = any_error(stream) ? - Expr(:error, ParseError(SourceFile(code), stream.diagnostics)) : + ex = if any_error(stream) + e = Expr(:error, ParseError(SourceFile(code), stream.diagnostics)) + options === :all ? Expr(:toplevel, e) : e + else build_tree(Expr, stream) + end pos = last_byte(stream) - 1 From cdf2d1fa48c32076e0988e4c0d7fed9c76898d0e Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Wed, 16 Feb 2022 17:34:12 +1000 Subject: [PATCH 0389/1109] Fixes for `;;` and spaces in array concatenation (JuliaLang/JuliaSyntax.jl#7) * Dectect whether array concatenation is either column-major or row-major in the first and second dimesions. Report errors for mixed orderings. * Treat ;; as line continuation when used in hcat * Treat newlines as insignificant when mixed with semicolons as separators. --- JuliaSyntax/src/green_tree.jl | 2 +- JuliaSyntax/src/parse_stream.jl | 6 +- JuliaSyntax/src/parser.jl | 103 +++++++++++++++++++++----------- JuliaSyntax/src/tokens.jl | 14 +++-- JuliaSyntax/test/parser.jl | 21 ++++++- 5 files changed, 99 insertions(+), 47 deletions(-) diff --git a/JuliaSyntax/src/green_tree.jl b/JuliaSyntax/src/green_tree.jl index 13bd4b023950d..4852dcd7adee0 100644 --- a/JuliaSyntax/src/green_tree.jl +++ b/JuliaSyntax/src/green_tree.jl @@ -79,7 +79,7 @@ function _show_green_node(io, node, indent, pos, str, show_trivia) if is_leaf line = string(posstr, indent, summary(node)) else - line = string(posstr, indent, '[', summary(node), "]") + line = string(posstr, indent, '[', summary(node), ']') end if !is_trivia(node) && is_leaf line = rpad(line, 40) * "✔" diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index e8cc222a0c749..604f0c67bc4b4 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -51,11 +51,11 @@ numeric_flags(head::SyntaxHead) = numeric_flags(flags(head)) is_error(head::SyntaxHead) = kind(head) == K"error" function Base.summary(head::SyntaxHead) - _kind_str(kind(head)) + untokenize(head, unique=false, include_flag_suff=false) end -function untokenize(head::SyntaxHead; include_flag_suff=true) - str = untokenize(kind(head)) +function untokenize(head::SyntaxHead; unique=true, include_flag_suff=true) + str = untokenize(kind(head); unique=unique) if is_dotted(head) str = "."*str end diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 26161be81d684..2c1c7669f4f0e 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -2572,10 +2572,6 @@ end # Mismatched rows # [x y ; z] ==> (vcat (row x y) z) # -# Double semicolon with spaces allowed (only) for line continuation -#v1.7: [x y ;;\n z w] ==> (hcat x y z w) -#v1.7: [x y ;; z w] ==> (hcat x y (error) z w) -# # Single elements in rows #v1.7: [x ; y ;; z ] ==> (ncat-2 (nrow-1 x y) z) #v1.7: [x y ;;; z ] ==> (ncat-3 (row x y) z) @@ -2592,6 +2588,7 @@ end function parse_array(ps::ParseState, mark, closer, end_is_symbol) ps = ParseState(ps, end_symbol=end_is_symbol) + array_order = Ref(:unknown) # Outer array parsing loop - parse chain of separators with descending # precedence such as #v1.7: [a ; b ;; c ;;; d ;;;; e] ==> (ncat-4 (ncat-3 (ncat-2 (ncat-1 a b) c) d) e) @@ -2604,9 +2601,9 @@ function parse_array(ps::ParseState, mark, closer, end_is_symbol) # # For an excellent overview of Pratt parsing, see # https://matklad.github.io/2020/04/13/simple-but-powerful-pratt-parsing.html - (dim, binding_power) = parse_array_separator(ps) + (dim, binding_power) = parse_array_separator(ps, array_order) while true - (next_dim, next_bp) = parse_array_inner(ps, binding_power) + (next_dim, next_bp) = parse_array_inner(ps, binding_power, array_order) if next_bp == typemin(Int) break end @@ -2624,20 +2621,20 @@ function parse_array(ps::ParseState, mark, closer, end_is_symbol) (K"ncat", set_numeric_flags(dim)) end -# Parse equal and ascending precedence chains of array concatenation operators -# (semicolons, newlines and whitespace). Invariants: +# Parse equal and ascending precedence chains of array concatenation operators - +# semicolons, newlines and whitespace. Invariants: # # * The caller must have already consumed # - The left hand side -# - The concatenation operator, providing the current binding_power. -# So eg, we're here in the input stream +# - The concatenation operator, providing `binding_power`. +# So eg, we're here in the input stream, either at an element or closing token # | # [a ;; b ; c ] # [a ;; ] # # * The caller must call emit() to delimit the AST node for this binding power. # -function parse_array_inner(ps, binding_power) +function parse_array_inner(ps, binding_power, array_order) mark = NO_POSITION dim = -1 bp = binding_power @@ -2655,13 +2652,13 @@ function parse_array_inner(ps, binding_power) # Parse one expression mark = position(ps) parse_eq_star(ps) - (next_dim, next_bp) = parse_array_separator(ps) + (next_dim, next_bp) = parse_array_separator(ps, array_order) else # bp > binding_power # Recurse to parse a separator with greater binding power. Eg: # [a ;; b ; c ] # | ^------ the next input is here # '---------- the mark is here - (next_dim, next_bp) = parse_array_inner(ps, bp) + (next_dim, next_bp) = parse_array_inner(ps, bp, array_order) if bp == 0 emit(ps, mark, K"row") else @@ -2674,46 +2671,83 @@ end # Parse a separator in an array concatenation # -# Here we aim to identify: +# Here we return a tuple (dim, binding_power) containing # * Dimension on which the next separator acts # * Binding power (precedence) of the separator, where whitespace binds # tightest: ... < `;;;` < `;;` < `;`,`\n` < whitespace. We choose binding # power of 0 for whitespace and negative numbers for other separators. # # FIXME: Error messages for mixed spaces and ;; delimiters -function parse_array_separator(ps; skip_newlines=false) - t = peek_token(ps; skip_newlines=skip_newlines) - k = kind(t) - if k == K";" +function parse_array_separator(ps, array_order) + sep_mismatch_err = "cannot mix space and ;; separators in an array expression, except to wrap a line" + mark = position(ps) + t = peek_token(ps, skip_newlines=true) + if kind(t) == K";" + # Newlines before semicolons are not significant + # [a \n ;] ==> (vcat a) + bump_trivia(ps) n_semis = 1 while true - bump(ps, TRIVIA_FLAG; skip_newlines=skip_newlines) + bump(ps, TRIVIA_FLAG) t = peek_token(ps) - if kind(t) != K";" || t.had_whitespace + if kind(t) != K";" break end + if t.had_whitespace + bump_disallowed_space(ps) + end n_semis += 1 end - # FIXME - following is ncat, not line continuation - # [a ;; \n c] - if n_semis == 2 && peek(ps) == K"NewlineWs" - # Line continuation - # [a b ;; \n \n c] - while peek(ps) == K"NewlineWs" - bump(ps, TRIVIA_FLAG) + had_newline = peek(ps) == K"NewlineWs" + # Newlines after semicolons are not significant + # [a ; \n] ==> (vcat a) + # [a ; \n\n b] ==> (vcat a b) + #v1.7: [a ;; \n b] ==> (ncat-2 a b) + bump_trivia(ps) + if n_semis == 2 + if array_order[] === :row_major + if had_newline + # In hcat with spaces as separators, `;;` is a line + # continuation character + #v1.7: [a b ;; \n c] ==> (hcat a b c) + #v1.7: [a b \n ;; c] ==> (ncat-2 (row a b (error-t)) c) + return (2, 0) + else + # Can't mix spaces and multiple ;; + #v1.7: [a b ;; c] ==> (ncat-2 (row a b (error-t)) c) + emit(ps, mark, K"error", TRIVIA_FLAG, error=sep_mismatch_err) + end + else + array_order[] = :column_major end - return (2, 0) - else - return (n_semis, -n_semis) end - elseif k == K"NewlineWs" + return (n_semis, -n_semis) + end + t = peek_token(ps) + k = kind(t) + if k == K"NewlineWs" bump_trivia(ps) - # Newlines separate the first dimension + # Treat a linebreak prior to a value as a semicolon (ie, separator for + # the first dimension) if no previous semicolons observed + # [a \n b] ==> (vcat a b) + return (1, -1) + elseif k == K"," + # Treat `,` as semicolon for the purposes of recovery + # [a; b, c] ==> (vcat a b (error-t) c) + bump(ps, TRIVIA_FLAG, error="unexpected comma in array expression") return (1, -1) else if t.had_whitespace && !is_closing_token(ps, k) + if array_order[] === :column_major + # Can't mix multiple ;'s and spaces + #v1.7: [a ;; b c] ==> (ncat-2 a (row b (error-t) c)) + bump_trivia(ps, TRIVIA_FLAG, error=sep_mismatch_err) + else + array_order[] = :row_major + end return (2, 0) else + # Something else; use typemin to exit array parsing return (typemin(Int), typemin(Int)) end end @@ -2739,10 +2773,11 @@ function parse_cat(ps::ParseState, closer, end_is_symbol) #v1.8: [;;] ==> (ncat-2) #v1.8: [\n ;; \n ] ==> (ncat-2) #v1.7: [;;] ==> (ncat-2 (error)) - n_semis, _ = parse_array_separator(ps; skip_newlines=true) + bump_trivia(ps) + dim, _ = parse_array_separator(ps, Ref(:unknown)) min_supported_version(v"1.8", ps, mark, "empty multidimensional array syntax") bump_closing_token(ps, closer) - return (K"ncat", set_numeric_flags(n_semis)) + return (K"ncat", set_numeric_flags(dim)) end parse_eq_star(ps) k = peek(ps, skip_newlines=true) diff --git a/JuliaSyntax/src/tokens.jl b/JuliaSyntax/src/tokens.jl index 5b40a090dd45d..9a032f7bc940e 100644 --- a/JuliaSyntax/src/tokens.jl +++ b/JuliaSyntax/src/tokens.jl @@ -113,15 +113,17 @@ function is_whitespace(t) kind(t) in (K"Whitespace", K"NewlineWs") end -function _kind_str(k::Kind) - _kind_to_str[k] -end - """ Return the string representation of a token kind, or `nothing` if the kind represents a class of tokens like K"Identifier". + +When `unique=true` only return a string when the kind uniquely defines the +corresponding input token, otherwise return `nothing`. When `unique=false`, +return the name of the kind. + +TODO: Replace `untokenize()` with `Base.string()`? """ -function untokenize(k::Kind) - get(_kind_to_str_unique, k, nothing) +function untokenize(k::Kind; unique=true) + get(unique ? _kind_to_str_unique : _kind_to_str, k, nothing) end diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index b05524c1e18cd..b46f992e218a5 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -625,9 +625,6 @@ tests = [ "[x y]" => "(hcat x y)" # Mismatched rows "[x y ; z]" => "(vcat (row x y) z)" - # Double semicolon with spaces allowed (only) for line continuation - "[x y ;;\n z w]" => "(hcat x y z w)" - # "[x y ;; z w]" => "(hcat x y (error) z w)" # FIXME # Single elements in rows ((v=v"1.7",), "[x ; y ;; z ]") => "(ncat-2 (nrow-1 x y) z)" ((v=v"1.7",), "[x y ;;; z ]") => "(ncat-3 (row x y) z)" @@ -638,6 +635,24 @@ tests = [ # Column major ((v=v"1.7",), "[x ; y ;; z ; w ;;; a ; b ;; c ; d]") => "(ncat-3 (nrow-2 (nrow-1 x y) (nrow-1 z w)) (nrow-2 (nrow-1 a b) (nrow-1 c d)))" + # Array separators + # Newlines before semicolons are not significant + "[a \n ;]" => "(vcat a)" + # Newlines after semicolons are not significant + "[a ; \n]" => "(vcat a)" + "[a ; \n\n b]" => "(vcat a b)" + ((v=v"1.7",), "[a ;; \n b]") => "(ncat-2 a b)" + # In hcat with spaces as separators, `;;` is a line + # continuation character + ((v=v"1.7",), "[a b ;; \n c]") => "(hcat a b c)" + ((v=v"1.7",), "[a b \n ;; c]") => "(ncat-2 (row a b (error-t)) c)" + # Can't mix spaces and multiple ;'s + ((v=v"1.7",), "[a b ;; c]") => "(ncat-2 (row a b (error-t)) c)" + # Treat a linebreak prior to a value as a semicolon (ie, separator for + # the first dimension) if no previous semicolons observed + "[a \n b]" => "(vcat a b)" + # Can't mix multiple ;'s and spaces + ((v=v"1.7",), "[a ;; b c]") => "(ncat-2 a (row b (error-t) c))" # Empty nd arrays ((v=v"1.8",), "[;]") => "(ncat-1)" ((v=v"1.8",), "[;;]") => "(ncat-2)" From ba9be2c563583b57f41813e37c5afe4879d83a0f Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Wed, 16 Feb 2022 17:35:58 +1000 Subject: [PATCH 0390/1109] Compat for odd Exprs in long form anonymous functions (JuliaLang/JuliaSyntax.jl#6) The following construct is parsed oddly by the reference parser with the tuple omitted from the argument list. Add a compatibility hack for that during Expr conversion. function (xs...) body end With this we now have compatible parsing for all of the stdlib for Julia 1.6 and 1.7, so turn on this test by default in those cases. --- JuliaSyntax/README.md | 4 ++++ JuliaSyntax/src/syntax_tree.jl | 10 ++++++++++ JuliaSyntax/test/parse_packages.jl | 5 +++-- JuliaSyntax/test/syntax_tree.jl | 7 +++++++ 4 files changed, 24 insertions(+), 2 deletions(-) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index 056c68ff7790a..128f93f8330d2 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -546,6 +546,10 @@ parsing `key=val` pairs inside parentheses. or (b) all lines should be dedented only one character, as that's the matching prefix. +* Parsing of anonymous function arguments is somewhat inconsistent. + `function (xs...) \n body end` parses the argument list as `(... xs)`, whereas + `function (x) \n body end` parses the argument list as `(tuple x)`. + # Comparisons to other packages ### Official Julia compiler diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index bdbf60c612aec..e3b807381fb25 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -415,6 +415,16 @@ function _to_expr(node::SyntaxNode, iteration_spec=false) # Add block for source locations args[2] = Expr(:block, loc, args[2]) end + elseif headsym == :function + if length(args) > 1 && Meta.isexpr(args[1], :tuple) + # Convert to weird Expr forms for long-form anonymous functions. + # + # (function (tuple (... xs)) body) ==> (function (... xs) body) + if length(args[1].args) == 1 && Meta.isexpr(args[1].args[1], :...) + # function (xs...) \n body end + args[1] = args[1].args[1] + end + end end if headsym == :inert || (headsym == :quote && length(args) == 1 && !(a1 = only(args); a1 isa Expr || a1 isa QuoteNode || diff --git a/JuliaSyntax/test/parse_packages.jl b/JuliaSyntax/test/parse_packages.jl index 69fbbd2175370..bdf05b0ba166f 100644 --- a/JuliaSyntax/test/parse_packages.jl +++ b/JuliaSyntax/test/parse_packages.jl @@ -20,8 +20,9 @@ base_tests_path = joinpath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "test") end end -if haskey(ENV, "PARSE_STDLIB") -# TODO: Turn on by default + +if VERSION < v"1.8-DEV" || haskey(ENV, "PARSE_STDLIB") +# TODO: Fix on 1.8 @testset "Parse Julia stdlib at $(Sys.STDLIB)" begin for stdlib in readdir(Sys.STDLIB) diff --git a/JuliaSyntax/test/syntax_tree.jl b/JuliaSyntax/test/syntax_tree.jl index 4eb94558fc9f8..669994e7c4bc2 100644 --- a/JuliaSyntax/test/syntax_tree.jl +++ b/JuliaSyntax/test/syntax_tree.jl @@ -24,4 +24,11 @@ Expr(:(=), Expr(:call, :f), :xs), Expr(:block)) end + + @testset "Long form anonymous functions" begin + @test parseall(Expr, "function (xs...)\nbody end", rule=:statement) == + Expr(:function, + Expr(:..., :xs), + Expr(:block, :body)) + end end From dd7177da05ecda541a8656e0df4fbee59ab79059 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Thu, 17 Feb 2022 16:31:37 +1000 Subject: [PATCH 0391/1109] Fix parser comparison tool. stdlib now parses for 1.6 - 1.8 (JuliaLang/JuliaSyntax.jl#9) It turns out that SparseArrays has some unused .jl files in the tests with broken syntax in 1.8 dev. Fix the JuliaSyntax parser comparison tool to ignore these cases. --- JuliaSyntax/src/parser.jl | 1 - JuliaSyntax/test/parse_packages.jl | 5 ----- JuliaSyntax/test/test_utils.jl | 31 ++++++++++++++++++++++++------ 3 files changed, 25 insertions(+), 12 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 2c1c7669f4f0e..7a19f6060a4d2 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -2677,7 +2677,6 @@ end # tightest: ... < `;;;` < `;;` < `;`,`\n` < whitespace. We choose binding # power of 0 for whitespace and negative numbers for other separators. # -# FIXME: Error messages for mixed spaces and ;; delimiters function parse_array_separator(ps, array_order) sep_mismatch_err = "cannot mix space and ;; separators in an array expression, except to wrap a line" mark = position(ps) diff --git a/JuliaSyntax/test/parse_packages.jl b/JuliaSyntax/test/parse_packages.jl index bdf05b0ba166f..52338b7577175 100644 --- a/JuliaSyntax/test/parse_packages.jl +++ b/JuliaSyntax/test/parse_packages.jl @@ -21,9 +21,6 @@ end end -if VERSION < v"1.8-DEV" || haskey(ENV, "PARSE_STDLIB") -# TODO: Fix on 1.8 - @testset "Parse Julia stdlib at $(Sys.STDLIB)" begin for stdlib in readdir(Sys.STDLIB) fulldir = joinpath(Sys.STDLIB, stdlib) @@ -34,5 +31,3 @@ if VERSION < v"1.8-DEV" || haskey(ENV, "PARSE_STDLIB") end end end - -end diff --git a/JuliaSyntax/test/test_utils.jl b/JuliaSyntax/test/test_utils.jl index 65e53e9654632..5ec8a74f70590 100644 --- a/JuliaSyntax/test/test_utils.jl +++ b/JuliaSyntax/test/test_utils.jl @@ -38,11 +38,30 @@ function remove_all_linenums!(ex) remove_macro_linenums!(ex) end -function parsers_agree_on_file(path) - text = read(path, String) - ex = parseall(Expr, text, filename=path) - fl_ex = flisp_parse_all(text, filename=path) - JuliaSyntax.remove_linenums!(ex) == JuliaSyntax.remove_linenums!(fl_ex) +function parsers_agree_on_file(filename) + text = try + read(filename, String) + catch + # Something went wrong reading the file. This isn't a parser failure so + # ignore this case. + return true + end + fl_ex = flisp_parse_all(text, filename=filename) + if Meta.isexpr(fl_ex, :toplevel) && !isempty(fl_ex.args) && + Meta.isexpr(fl_ex.args[end], (:error, :incomplete)) + # Reference parser failed. This generally indicates a broken file not a + # parser problem, so ignore this case. + return true + end + try + ex, diagnostics, _ = parse(Expr, text, filename=filename) + return !JuliaSyntax.any_error(diagnostics) && + JuliaSyntax.remove_linenums!(ex) == + JuliaSyntax.remove_linenums!(fl_ex) + catch exc + @error "Parsing failed" path exception=current_exceptions() + return false + end end function find_source_in_path(basedir) @@ -155,7 +174,7 @@ function reduce_all_failures_in_path(basedir, outdir) rm(outdir, force=true, recursive=true) mkpath(outdir) for filename in find_source_in_path(basedir) - if !(try parsers_agree_on_file(filename) catch exc false end) + if !parsers_agree_on_file(filename) @info "Found failure" filename bn,_ = splitext(basename(filename)) outname = joinpath(outdir, "$bn.jl") From 73bd4d1f4a141dc8f7cb8c294631430ec357c1e9 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Fri, 18 Feb 2022 12:24:52 +1000 Subject: [PATCH 0392/1109] Add diff of AST dump to itest_parse test tool --- JuliaSyntax/test/test_utils.jl | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/JuliaSyntax/test/test_utils.jl b/JuliaSyntax/test/test_utils.jl index 5ec8a74f70590..01f18d5506e14 100644 --- a/JuliaSyntax/test/test_utils.jl +++ b/JuliaSyntax/test/test_utils.jl @@ -219,11 +219,24 @@ function itest_parse(production, code; version::VersionNumber=v"1.6") f_ex = JuliaSyntax.remove_linenums!(Meta.parse(code, raise=false)) if JuliaSyntax.remove_linenums!(ex) != f_ex - println(stdout, "\n\n# AST dump") - dump(ex) - printstyled(stdout, "\n\n# flisp Julia Expr:\n", color=:red) show(stdout, MIME"text/plain"(), f_ex) + + printstyled(stdout, "\n\n# Diff of AST dump:\n", color=:red) + if Sys.isunix() + mktemp() do path1, io1 + mktemp() do path2, io2 + dump(io1, ex); close(io1) + dump(io2, f_ex); close(io2) + run(ignorestatus(`diff -U10 --color=always $path1 $path2`)) + end + end + else + dump(ex) + println("------------------------------------") + dump(f_ex) + end + return (ex, f_ex) # return (code, stream, t, s, ex) end nothing From 34ab4e3303fb624f4b873b864ab6461004b9d8b1 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Fri, 18 Feb 2022 11:55:52 +1000 Subject: [PATCH 0393/1109] Tokenize: Fix raw triple string splitting with `\` before newline --- JuliaSyntax/Tokenize/src/lexer.jl | 5 +---- JuliaSyntax/Tokenize/test/lexer.jl | 32 ++++++++++++++++++++++-------- 2 files changed, 25 insertions(+), 12 deletions(-) diff --git a/JuliaSyntax/Tokenize/src/lexer.jl b/JuliaSyntax/Tokenize/src/lexer.jl index d204bd4bb3ed5..e17850b544125 100644 --- a/JuliaSyntax/Tokenize/src/lexer.jl +++ b/JuliaSyntax/Tokenize/src/lexer.jl @@ -523,12 +523,9 @@ function lex_string_chunk(l) c = readchar(l) if c == '\\' n = 1 - while true + while peekchar(l) == '\\' readchar(l) n += 1 - if peekchar(l) != '\\' - break - end end if peekchar(l) == state.delim && !iseven(n) readchar(l) diff --git a/JuliaSyntax/Tokenize/test/lexer.jl b/JuliaSyntax/Tokenize/test/lexer.jl index 10ee9a422068e..ca255b0e8cd3c 100644 --- a/JuliaSyntax/Tokenize/test/lexer.jl +++ b/JuliaSyntax/Tokenize/test/lexer.jl @@ -349,22 +349,38 @@ end @testset "string escaped newline whitespace" begin ts = collect(tokenize("\"x\\\n \ty\"")) - @test ts[1] ~ (T.DQUOTE , "\"") + @test ts[1] ~ (T.DQUOTE, "\"") @test ts[2] ~ (T.STRING, "x") @test ts[3] ~ (T.WHITESPACE, "\\\n \t") @test ts[4] ~ (T.STRING, "y") - @test ts[5] ~ (T.DQUOTE , "\"") + @test ts[5] ~ (T.DQUOTE, "\"") + + # No newline escape for raw strings + ts = collect(tokenize("r\"x\\\ny\"")) + @test ts[1] ~ (T.IDENTIFIER , "r") + @test ts[2] ~ (T.DQUOTE, "\"") + @test ts[3] ~ (T.STRING, "x\\\ny") + @test ts[4] ~ (T.DQUOTE , "\"") end @testset "triple quoted string line splitting" begin ts = collect(tokenize("\"\"\"\nx\r\ny\rz\n\r\"\"\"")) @test ts[1] ~ (T.TRIPLE_DQUOTE , "\"\"\"") - @test ts[2] ~ (T.STRING, "\n") - @test ts[3] ~ (T.STRING, "x\r\n") - @test ts[4] ~ (T.STRING, "y\r") - @test ts[5] ~ (T.STRING, "z\n") - @test ts[6] ~ (T.STRING, "\r") - @test ts[7] ~ (T.TRIPLE_DQUOTE, "\"\"\"") + @test ts[2] ~ (T.STRING , "\n") + @test ts[3] ~ (T.STRING , "x\r\n") + @test ts[4] ~ (T.STRING , "y\r") + @test ts[5] ~ (T.STRING , "z\n") + @test ts[6] ~ (T.STRING , "\r") + @test ts[7] ~ (T.TRIPLE_DQUOTE , "\"\"\"") + + # Also for raw strings + ts = collect(tokenize("r\"\"\"\nx\ny\"\"\"")) + @test ts[1] ~ (T.IDENTIFIER , "r") + @test ts[2] ~ (T.TRIPLE_DQUOTE , "\"\"\"") + @test ts[3] ~ (T.STRING , "\n") + @test ts[4] ~ (T.STRING , "x\n") + @test ts[5] ~ (T.STRING , "y") + @test ts[6] ~ (T.TRIPLE_DQUOTE , "\"\"\"") end @testset "interpolation" begin From 965d6205bc5433d1c640483cb7c1cbf4f0997fca Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Fri, 18 Feb 2022 11:57:23 +1000 Subject: [PATCH 0394/1109] Add parser test for raw triple quoted string with backslash --- JuliaSyntax/src/parser.jl | 3 ++- JuliaSyntax/test/parser.jl | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 7a19f6060a4d2..a50b088538689 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -1601,7 +1601,8 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) # Triple quoted procesing for custom strings # r"""\nx""" ==> (macrocall @r_str "x") # r"""\n x\n y""" ==> (macrocall @r_str (string-sr "x\n" "y")) - + # r"""\n x\\n y""" ==> (macrocall @r_str (string-sr "x\\\n" "y")) + # # Use a special token kind for string and cmd macro names so the # names can be expanded later as necessary. outk = is_string_delim(k) ? K"StringMacroName" : K"CmdMacroName" diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index b46f992e218a5..64b777eacfa29 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -301,8 +301,9 @@ tests = [ "x\"\"" => """(macrocall @x_str "")""" "x``" => """(macrocall @x_cmd "")""" # Triple quoted procesing for custom strings - "r\"\"\"\nx\"\"\"" => raw"""(macrocall @r_str "x")""" - "r\"\"\"\n x\n y\"\"\"" => raw"""(macrocall @r_str (string-sr "x\n" "y"))""" + "r\"\"\"\nx\"\"\"" => raw"""(macrocall @r_str "x")""" + "r\"\"\"\n x\n y\"\"\"" => raw"""(macrocall @r_str (string-sr "x\n" "y"))""" + "r\"\"\"\n x\\\n y\"\"\"" => raw"""(macrocall @r_str (string-sr "x\\\n" "y"))""" # Macro sufficies can include keywords and numbers "x\"s\"y" => """(macrocall @x_str "s" "y")""" "x\"s\"end" => """(macrocall @x_str "s" "end")""" From 8b77897e172395afc3dcb64d61eb911e8e04f541 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Sun, 20 Feb 2022 08:52:33 +1000 Subject: [PATCH 0395/1109] Add flags to error node printing Here we can distinguish `(error-t)` from `(error)` --- JuliaSyntax/src/syntax_tree.jl | 2 +- JuliaSyntax/test/parser.jl | 18 +++++++++--------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index e3b807381fb25..8f3dc3350db95 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -150,7 +150,7 @@ end function _show_syntax_node_sexpr(io, node) if !haschildren(node) if is_error(node) - print(io, "(error)") + print(io, "(", untokenize(head(node)), ")") else print(io, node.val isa Symbol ? string(node.val) : repr(node.val)) end diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 64b777eacfa29..f8e0a533fa474 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -56,11 +56,11 @@ tests = [ "a ? b :\nc" => "(if a b c)" "a ? b : c:d" => "(if a b (call-i c : d))" # Following are errors but should recover - "a? b : c" => "(if a (error) b c)" - "a ?b : c" => "(if a (error) b c)" - "a ? b: c" => "(if a b (error) c)" - "a ? b :c" => "(if a b (error) c)" - "a ? b c" => "(if a b (error) c)" + "a? b : c" => "(if a (error-t) b c)" + "a ?b : c" => "(if a (error-t) b c)" + "a ? b: c" => "(if a b (error-t) c)" + "a ? b :c" => "(if a b (error-t) c)" + "a ? b c" => "(if a b (error-t) c)" ], JuliaSyntax.parse_arrow => [ "x → y" => "(call-i x → y)" @@ -142,8 +142,8 @@ tests = [ "(x-1)y" => "(call-i (call-i x - 1) * y)" "0xenomorph" => "0x0e" # ie, not juxtoposition # errors - "\"a\"\"b\"" => "(call-i \"a\" * (error) \"b\")" - "\"a\"x" => "(call-i \"a\" * (error) x)" + "\"a\"\"b\"" => "(call-i \"a\" * (error-t) \"b\")" + "\"a\"x" => "(call-i \"a\" * (error-t) x)" ], JuliaSyntax.parse_unary => [ "+2" => "2" @@ -587,7 +587,7 @@ tests = [ "[x \n\n for a in as]" => "(comprehension (generator x (= a as)))" # parse_generator "[x for a = as for b = bs if cond1 for c = cs if cond2]" => "(comprehension (flatten x (= a as) (filter (= b bs) cond1) (filter (= c cs) cond2)))" - "[(x)for x in xs]" => "(comprehension (generator x (error) (= x xs)))" + "[(x)for x in xs]" => "(comprehension (generator x (error-t) (= x xs)))" "(a for x in xs if cond)" => "(generator a (filter (= x xs) cond))" "(xy for x in xs for y in ys)" => "(flatten xy (= x xs) (= y ys))" "(xy for x in xs for y in ys for z in zs)" => "(flatten xy (= x xs) (= y ys) (= z zs))" @@ -700,7 +700,7 @@ tests = [ "\"\"" => "\"\"" "\"\"\"\n \"\"\"" => "\"\"" # Missing delimiter - "\"str" => "\"str\" (error)" + "\"str" => "\"str\" (error-t)" # String interpolations "\"\$x\$y\$z\"" => "(string x y z)" "\"\$(x)\"" => "(string x)" From dacd9144e4ae5de8d6dc21d21861e03927780ec0 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Sun, 20 Feb 2022 08:54:31 +1000 Subject: [PATCH 0396/1109] Add placehold kind K"None" and use K"nothing" more precisely K"nothing" now means the literal Julia `nothing`, and K"None" is used as a placeholder in the code instead. This also allows us to remove the redundant NothingLiteral kind. --- JuliaSyntax/Tokenize/src/token_kinds.jl | 4 ++-- JuliaSyntax/src/parse_stream.jl | 12 ++++++------ JuliaSyntax/src/parser.jl | 6 +++--- JuliaSyntax/src/syntax_tree.jl | 2 +- JuliaSyntax/src/token_kinds.jl | 7 +++---- JuliaSyntax/test/parser.jl | 4 ++-- 6 files changed, 17 insertions(+), 18 deletions(-) diff --git a/JuliaSyntax/Tokenize/src/token_kinds.jl b/JuliaSyntax/Tokenize/src/token_kinds.jl index 10cd82dc1091d..8b4b0fac614b0 100644 --- a/JuliaSyntax/Tokenize/src/token_kinds.jl +++ b/JuliaSyntax/Tokenize/src/token_kinds.jl @@ -1,4 +1,5 @@ @enum(Kind, + NONE, # Placeholder; never emitted by lexer ENDMARKER, # EOF ERROR, COMMENT, # aadsdsa, #= fdsf #= @@ -51,7 +52,7 @@ begin_cstparser, INVISIBLE_BRACKETS, - NOTHING, + NOTHING, # A literal `nothing` WS, SEMICOLON_WS, NEWLINE_WS, @@ -835,7 +836,6 @@ # width tokens to keep the parse tree more uniform. begin_parser_tokens, TOMBSTONE, # Empty placeholder for kind to be filled later - NOTHING_LITERAL, # A literal Julia `nothing` in the AST # Macro names are modelled as a special kind of identifier because the # @ may not be attached to the macro name in the source (or may not be diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index 604f0c67bc4b4..ea0dce7e2dc44 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -121,7 +121,7 @@ TODO: Optimize this data structure? It's very large at the moment. """ struct TaggedRange head::SyntaxHead # Kind,flags - orig_kind::Kind # Kind of the original token for leaf tokens, or K"Nothing" + orig_kind::Kind # Kind of the original token for leaf tokens, or K"None" first_byte::UInt32 # First byte in the input text last_byte::UInt32 # Last byte in the input text start_mark::UInt32 # Index of first emitted range which this range covers @@ -362,7 +362,7 @@ end # Bump the next `n` tokens # flags and remap_kind are applied to any non-trivia tokens -function _bump_n(stream::ParseStream, n::Integer, flags, remap_kind=K"Nothing") +function _bump_n(stream::ParseStream, n::Integer, flags, remap_kind=K"None") if n <= 0 return end @@ -375,7 +375,7 @@ function _bump_n(stream::ParseStream, n::Integer, flags, remap_kind=K"Nothing") is_trivia = k ∈ (K"Whitespace", K"Comment", K"NewlineWs") f = is_trivia ? TRIVIA_FLAG : flags is_dotted(tok) && (f |= DOTOP_FLAG) - outk = (is_trivia || remap_kind == K"Nothing") ? k : remap_kind + outk = (is_trivia || remap_kind == K"None") ? k : remap_kind range = TaggedRange(SyntaxHead(outk, f), k, first_byte(tok), last_byte(tok), lastindex(stream.ranges)+1) push!(stream.ranges, range) @@ -393,7 +393,7 @@ end Shift the current token from the input to the output, adding the given flags. """ function bump(stream::ParseStream, flags=EMPTY_FLAGS; skip_newlines=false, - error=nothing, remap_kind::Kind=K"Nothing") + error=nothing, remap_kind::Kind=K"None") emark = position(stream) _bump_n(stream, _lookahead_index(stream, 1, skip_newlines), flags, remap_kind) if !isnothing(error) @@ -438,7 +438,7 @@ lexing ambiguities. There's no special whitespace handling — bump any whitespace if necessary with bump_trivia. """ function bump_glue(stream::ParseStream, kind, flags, num_tokens) - span = TaggedRange(SyntaxHead(kind, flags), K"Nothing", + span = TaggedRange(SyntaxHead(kind, flags), K"None", first_byte(stream.lookahead[1]), last_byte(stream.lookahead[num_tokens]), lastindex(stream.ranges) + 1) @@ -541,7 +541,7 @@ should be a previous return value of `position()`. """ function emit(stream::ParseStream, mark::ParseStreamPosition, kind::Kind, flags::RawFlags = EMPTY_FLAGS; error=nothing) - range = TaggedRange(SyntaxHead(kind, flags), K"Nothing", mark.input_byte, + range = TaggedRange(SyntaxHead(kind, flags), K"None", mark.input_byte, stream.next_byte-1, mark.output_index+1) if !isnothing(error) _emit_diagnostic(stream, first_byte(range), last_byte(range), error=error) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index a50b088538689..0306f029a5850 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -1779,7 +1779,7 @@ function parse_resword(ps::ParseState) if k == K"NewlineWs" || is_closing_token(ps, k) # return\nx ==> (return nothing) # return) ==> (return nothing) - bump_invisible(ps, K"NothingLiteral") + bump_invisible(ps, K"nothing") else # return x ==> (return x) # return x,y ==> (return (tuple x y)) @@ -1889,7 +1889,7 @@ function parse_const_local_global(ps) mark = position(ps) scope_mark = mark has_const = false - scope_k = K"Nothing" + scope_k = K"None" k = peek(ps) if k in KSet`global local` # global x ==> (global x) @@ -1947,7 +1947,7 @@ function parse_const_local_global(ps) min_supported_version(v"1.8", ps, beforevar_mark, "`const` struct field without assignment") end - if scope_k != K"Nothing" + if scope_k != K"None" emit(ps, scope_mark, scope_k) end if has_const diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index 8f3dc3350db95..383d3422f7ff2 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -55,7 +55,7 @@ function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, position::In isempty(val_range) ? Symbol(untokenize(k)) : # synthetic invisible tokens Symbol(normalize_identifier(val_str)) - elseif k == K"NothingLiteral" + elseif k == K"nothing" nothing elseif k == K"error" ErrorVal() diff --git a/JuliaSyntax/src/token_kinds.jl b/JuliaSyntax/src/token_kinds.jl index 2389aefe581f6..b1944f66dcfee 100644 --- a/JuliaSyntax/src/token_kinds.jl +++ b/JuliaSyntax/src/token_kinds.jl @@ -5,6 +5,7 @@ const _str_to_kind = let Ts = TzTokens Dict([ +"None" => Ts.NONE "EndMarker" => Ts.ENDMARKER "error" => Ts.ERROR "Comment" => Ts.COMMENT @@ -53,9 +54,8 @@ Dict([ "var" => Ts.VAR "END_KEYWORDS" => Ts.end_keywords -# FIXME: Define precisely what Nothing means; integrate better with other tokens. "BEGIN_CSTPARSER" => Ts.begin_cstparser -"Nothing" => Ts.NOTHING +"nothing" => Ts.NOTHING "NewlineWs" => Ts.NEWLINE_WS "END_CSTPARSER" => Ts.end_cstparser @@ -819,7 +819,6 @@ Dict([ "BEGIN_PARSER_TOKENS" => Ts.begin_parser_tokens "TOMBSTONE" => Ts.TOMBSTONE -"NothingLiteral" => Ts.NOTHING_LITERAL # Macro names are modelled as a special kind of identifier because the # @ may not be attached to the macro name in the source (or may not be @@ -891,7 +890,7 @@ for kw in split(""" vcat ncat typed_hcat typed_vcat typed_ncat row nrow generator filter flatten comprehension typed_comprehension - error Nothing + error nothing true false None """) _kind_to_str_unique[_str_to_kind[kw]] = kw end diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index f8e0a533fa474..f85dc770129aa 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -1,10 +1,10 @@ function test_parse(production, code; v=v"1.6") stream = ParseStream(code, version=v) production(JuliaSyntax.ParseState(stream)) - t = JuliaSyntax.build_tree(GreenNode, stream, wrap_toplevel_as_kind=K"Nothing") + t = JuliaSyntax.build_tree(GreenNode, stream, wrap_toplevel_as_kind=K"None") source = SourceFile(code) s = SyntaxNode(source, t) - if JuliaSyntax.kind(s) == K"Nothing" + if JuliaSyntax.kind(s) == K"None" join([sprint(show, MIME("text/x.sexpression"), c) for c in children(s)], ' ') else sprint(show, MIME("text/x.sexpression"), s) From ebec700d0e605fa878b7306946f89d4f9043e7df Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Sun, 20 Feb 2022 09:33:47 +1000 Subject: [PATCH 0397/1109] Fix parsing of broken syntax `[x` --- JuliaSyntax/src/parser.jl | 3 ++- JuliaSyntax/test/parser.jl | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 0306f029a5850..d813a6af27aca 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -2781,9 +2781,10 @@ function parse_cat(ps::ParseState, closer, end_is_symbol) end parse_eq_star(ps) k = peek(ps, skip_newlines=true) - if k == K"," || k == closer + if k == K"," || (is_closing_token(ps, k) && k != K";") if k == K"," # [x,] ==> (vect x) + # [x ==> (vect x) bump(ps, TRIVIA_FLAG) end # [x] ==> (vect x) diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index f85dc770129aa..cd920d9ae02f0 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -581,6 +581,7 @@ tests = [ "[]" => "(vect)" "[x,]" => "(vect x)" "[x]" => "(vect x)" + "[x" => "(vect x (error-t))" "[x \n ]" => "(vect x)" "[x \n\n ]" => "(vect x)" "[x for a in as]" => "(comprehension (generator x (= a as)))" From 32e822dc5cf40e43dbe3dc7e6692a491774dd6ee Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Sun, 20 Feb 2022 13:51:38 +1000 Subject: [PATCH 0398/1109] Improvements to core parsing hooks (JuliaLang/JuliaSyntax.jl#12) * Make ParseError work with Meta.parse() * Make incremental parsing of statements work * Add some basic tests --- JuliaSyntax/src/hooks.jl | 37 ++++++++++++++++++++++++++++++------ JuliaSyntax/src/parser.jl | 4 ++-- JuliaSyntax/test/hooks.jl | 16 ++++++++++++++++ JuliaSyntax/test/runtests.jl | 3 +-- 4 files changed, 50 insertions(+), 10 deletions(-) create mode 100644 JuliaSyntax/test/hooks.jl diff --git a/JuliaSyntax/src/hooks.jl b/JuliaSyntax/src/hooks.jl index 1f0df0c497799..505d2b5fa961d 100644 --- a/JuliaSyntax/src/hooks.jl +++ b/JuliaSyntax/src/hooks.jl @@ -1,5 +1,5 @@ # Adaptor for the API/ABI expected by the Julia runtime code. -function core_parser_hook(code, filename, offset, options) +function core_parser_hook(code, filename, lineno, offset, options) try # TODO: Check that we do all this input wrangling without copying the # code buffer @@ -13,19 +13,32 @@ function core_parser_hook(code, filename, offset, options) stream = ParseStream(io) rule = options === :all ? :toplevel : options + if rule !== :toplevel + # To copy the flisp parser driver, we ignore leading trivia when + # parsing statements or atoms + bump_trivia(stream) + end JuliaSyntax.parse(stream; rule=rule) - ex = if any_error(stream) + if any_error(stream) e = Expr(:error, ParseError(SourceFile(code), stream.diagnostics)) - options === :all ? Expr(:toplevel, e) : e + ex = options === :all ? Expr(:toplevel, e) : e else - build_tree(Expr, stream) + ex = build_tree(Expr, stream, wrap_toplevel_as_kind=K"None") + if Meta.isexpr(ex, :None) + # The None wrapping is only to give somewhere for trivia to be + # attached; unwrap! + ex = only(ex.args) + end end - pos = last_byte(stream) - 1 + # Note the next byte in 1-based indexing is `last_byte(stream) + 1` but + # the Core hook must return an offset (ie, it's 0-based) so the factors + # of one cancel here. + last_offset = last_byte(stream) # Rewrap result in an svec for use by the C code - return Core.svec(ex, pos) + return Core.svec(ex, last_offset) catch exc @error("JuliaSyntax parser failed — falling back to flisp!", exception=(exc,catch_backtrace()), @@ -35,6 +48,18 @@ function core_parser_hook(code, filename, offset, options) return Core.Compiler.fl_parse(code, filename, offset, options) end +# Core._parse gained a `lineno` argument in +# https://github.com/JuliaLang/julia/pull/43876 +# Prior to this, the following signature was needed: +function core_parser_hook(code, filename, offset, options) + core_parser_hook(code, filename, LineNumberNode(0), offset, options) +end + +# Hack: +# Meta.parse() attempts to construct a ParseError from a string if it receives +# `Expr(:error)`. +Base.Meta.ParseError(e::JuliaSyntax.ParseError) = e + """ Connect the JuliaSyntax parser to the Julia runtime so that it replaces the flisp parser for all parsing work. diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index d813a6af27aca..ae290a3c226bf 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -2969,11 +2969,11 @@ function parse_brackets(after_parse::Function, # (x \n\n for a in as) ==> (generator x (= a as)) parse_generator(ps, mark) else - k_str = untokenize(k) - ck_str = untokenize(closing_kind) if is_closing_token(ps, k) + k_str = untokenize(k, unique=false) emit_diagnostic(ps, error="unexpected `$k_str` in bracketed list") else + ck_str = untokenize(closing_kind) emit_diagnostic(ps, error="missing comma or $ck_str in bracketed list") end # Recovery done after loop diff --git a/JuliaSyntax/test/hooks.jl b/JuliaSyntax/test/hooks.jl new file mode 100644 index 0000000000000..fc2f30a42a629 --- /dev/null +++ b/JuliaSyntax/test/hooks.jl @@ -0,0 +1,16 @@ +@testset "Hooks for Core integration" begin + JuliaSyntax.enable_in_core!() + + @test Meta.parse("x + 1") == :(x + 1) + @test Meta.parse("x + 1", 1) == (:(x + 1), 6) + + # Test that parsing statements incrementally works + @test Meta.parse("x + 1\n(y)", 1) == (:(x + 1), 6) + @test Meta.parse("x + 1\n(y)", 6) == (:y, 10) + + # Check that Meta.parse throws the JuliaSyntax.ParseError rather than + # Meta.ParseError when Core integration is enabled. + @test_throws JuliaSyntax.ParseError Meta.parse("[x") + + JuliaSyntax.disable_in_core!() +end diff --git a/JuliaSyntax/test/runtests.jl b/JuliaSyntax/test/runtests.jl index 072a79e1e5113..ea0c0a1b9cfcd 100644 --- a/JuliaSyntax/test/runtests.jl +++ b/JuliaSyntax/test/runtests.jl @@ -19,11 +19,10 @@ include("parse_stream.jl") include("parser.jl") include("parser_api.jl") include("syntax_tree.jl") - @testset "Parsing values from strings" begin include("value_parsing.jl") end - +include("hooks.jl") include("parse_packages.jl") # Prototypes From a5cad683a5554742320a0ec4a7c3670671d811e4 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Mon, 21 Feb 2022 18:15:13 +1000 Subject: [PATCH 0399/1109] Fix @ doc parsing for triple strings with trailing whitespace trivia (JuliaLang/JuliaSyntax.jl#13) --- JuliaSyntax/src/parse_stream.jl | 2 +- JuliaSyntax/src/parser.jl | 2 ++ JuliaSyntax/test/parser.jl | 2 ++ JuliaSyntax/test/test_utils.jl | 2 +- 4 files changed, 6 insertions(+), 2 deletions(-) diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index ea0dce7e2dc44..a048211c559a6 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -341,7 +341,7 @@ function peek_behind(stream::ParseStream; skip_trivia::Bool=true) if skip_trivia for i = length(stream.ranges):-1:1 r = stream.ranges[i] - if !is_trivia(head(r)) + if !is_trivia(head(r)) && kind(r) != K"TOMBSTONE" return _peek_behind_fields(stream.ranges, i) end end diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index ae290a3c226bf..c63a648ea9269 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -501,6 +501,8 @@ function parse_docstring(ps::ParseState, down=parse_eq) else # "doc" foo ==> (macrocall core_@doc "doc" foo) # "doc $x" foo ==> (macrocall core_@doc (string "doc " x) foo) + # Allow docstrings with embedded trailing whitespace trivia + # """\n doc\n """ foo ==> (macrocall core_@doc "doc\n" foo) end if is_doc reset_node!(ps, atdoc_mark, kind=K"core_@doc") diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index cd920d9ae02f0..c3f75be286936 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -721,6 +721,8 @@ tests = [ """ "doc" \n foo """ => """(macrocall :(Core.var"@doc") "doc" foo)""" """ "doc" foo """ => """(macrocall :(Core.var"@doc") "doc" foo)""" """ "doc \$x" foo """ => """(macrocall :(Core.var"@doc") (string "doc " x) foo)""" + # Allow docstrings with embedded trailing whitespace trivia + "\"\"\"\n doc\n \"\"\" foo" => """(macrocall :(Core.var"@doc") "doc\\n" foo)""" ], ] diff --git a/JuliaSyntax/test/test_utils.jl b/JuliaSyntax/test/test_utils.jl index 01f18d5506e14..49cdda6626606 100644 --- a/JuliaSyntax/test/test_utils.jl +++ b/JuliaSyntax/test/test_utils.jl @@ -236,7 +236,7 @@ function itest_parse(production, code; version::VersionNumber=v"1.6") println("------------------------------------") dump(f_ex) end - return (ex, f_ex) + # return (ex, f_ex) # return (code, stream, t, s, ex) end nothing From 00f1eb50d65de18ad1bca71f438d02b2e500d69e Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Mon, 21 Feb 2022 18:43:42 +1000 Subject: [PATCH 0400/1109] Parse standalone .& as (. &) (JuliaLang/JuliaSyntax.jl#14) --- JuliaSyntax/src/parser.jl | 16 +++++++++------- JuliaSyntax/test/parser.jl | 2 ++ 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index c63a648ea9269..c76effdb5d939 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -304,13 +304,13 @@ function is_both_unary_and_binary(t) end # operators handled by parse_unary at the start of an expression -function is_initial_operator(k) - k = kind(k) +function is_initial_operator(t) + k = kind(t) # TODO(jb): `?` should probably not be listed here except for the syntax hack in osutils.jl is_operator(k) && !is_word_operator(k) && !(k in KSet`: ' .' ?`) && - !is_syntactic_unary_op(k) && + !(is_syntactic_unary_op(k) && !is_dotted(t)) && !is_syntactic_operator(k) end @@ -1078,8 +1078,9 @@ end function parse_unary(ps::ParseState) mark = position(ps) bump_trivia(ps) - k = peek(ps) - if !is_initial_operator(k) + t = peek_token(ps) + k = kind(t) + if !is_initial_operator(t) # :T ==> (quote T) # in::T ==> (:: in T) # isa::T ==> (:: isa T) @@ -1128,16 +1129,17 @@ function parse_unary_call(ps::ParseState) k2 = kind(t2) if is_closing_token(ps, k2) || k2 in KSet`NewlineWs =` if is_dotted(op_t) - # standalone dotted operators are parsed as (|.| op) + # Standalone dotted operators are parsed as (|.| op) # .+ ==> (. +) # .+\n ==> (. +) # .+ = ==> (. +) # .+) ==> (. +) + # .& ==> (. &) bump_trivia(ps) bump_split(ps, (1, K".", TRIVIA_FLAG), (0, op_k, EMPTY_FLAGS)) emit(ps, mark, K".") else - # return operator by itself, as in + # Standalone non-dotted operators # +) ==> + bump(ps) end diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index c3f75be286936..455dd1acdf762 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -156,6 +156,8 @@ tests = [ ".+\n" => "(. +)" ".+ =" => "(. +)" ".+)" => "(. +)" + ".&" => "(. &)" + # Standalone non-dotted operators "+)" => "+" # Call with type parameters or non-unary prefix call "+{T}(x::T)" => "(call (curly + T) (:: x T))" From 2b4110344a5341e38d0276b4cfa40801b8aa456c Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Mon, 21 Feb 2022 22:46:49 +1000 Subject: [PATCH 0401/1109] Add various inline parser test cases Some of these derive from Base's test/syntax.jl --- JuliaSyntax/src/parser.jl | 16 +++++++++++++--- JuliaSyntax/test/parser.jl | 17 +++++++++++++++-- 2 files changed, 28 insertions(+), 5 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index c76effdb5d939..6ccced0e5e632 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -465,7 +465,8 @@ function parse_stmts(ps::ParseState) bump(ps) end if junk_mark != position(ps) - emit(ps, junk_mark, K"error", + # x y ==> x (error-t y) + emit(ps, junk_mark, K"error", TRIVIA_FLAG, error="extra tokens after end of expression") end if do_emit @@ -1016,6 +1017,12 @@ end function is_juxtapose(ps, prev_k, t) k = kind(t) + # Not juxtaposition - parse_juxtapose will consume only the first token. + # x.3 ==> x + # sqrt(2)2 ==> (call sqrt 2) + # x' y ==> x + # x 'y ==> x + return !t.had_whitespace && (is_number(prev_k) || (!is_number(k) && # disallow "x.3" and "sqrt(2)2" @@ -1037,6 +1044,7 @@ end # 2(x) ==> (call-i 2 * x) # (2)(3)x ==> (call-i 2 * 3 x) # (x-1)y ==> (call-i (call-i x - 1) * y) +# x'y ==> x # # flisp: parse-juxtapose function parse_juxtapose(ps::ParseState) @@ -2152,7 +2160,8 @@ function parse_catch(ps::ParseState) # try x catch \n y end ==> (try (block x) false (block y) false false) bump_invisible(ps, K"false") else - # try x catch e y end ==> (try (block x) e (block y) false false) + # try x catch e y end ==> (try (block x) e (block y) false false) + # try x catch $e y end ==> (try (block x) ($ e) (block y) false false) parse_identifier_or_interpolate(ps) end parse_block(ps) @@ -3238,7 +3247,8 @@ function parse_atom(ps::ParseState, check_identifiers=true) # todo: Reorder to put most likely tokens first? if leading_kind == K":" # symbol/expression quote - # :foo => (quote foo) + # :foo ==> (quote foo) + # : foo ==> (quote (error-t) foo) t = peek_token(ps, 2) k = kind(t) if is_closing_token(ps, k) && (!is_keyword(k) || t.had_whitespace) diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 455dd1acdf762..d30c3eff132dc 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -34,6 +34,7 @@ tests = [ "a;;;b;;" => "(toplevel a b)" """ "x" a ; "y" b """ => """(toplevel (macrocall :(Core.var"@doc") "x" a) (macrocall :(Core.var"@doc") "y" b))""" + "x y" => "x (error-t y)" ], JuliaSyntax.parse_eq => [ # parse_assignment @@ -140,10 +141,16 @@ tests = [ "2(x)" => "(call-i 2 * x)" "(2)(3)x" => "(call-i 2 * 3 x)" "(x-1)y" => "(call-i (call-i x - 1) * y)" - "0xenomorph" => "0x0e" # ie, not juxtoposition + "x'y" => "(call-i (' x) * y)" # errors "\"a\"\"b\"" => "(call-i \"a\" * (error-t) \"b\")" "\"a\"x" => "(call-i \"a\" * (error-t) x)" + # Not juxtaposition - parse_juxtapose will consume only the first token. + "x.3" => "x" + "sqrt(2)2" => "(call sqrt 2)" + "x' y" => "(' x)" + "x 'y" => "x" + "0xenomorph" => "0x0e" ], JuliaSyntax.parse_unary => [ "+2" => "2" @@ -461,6 +468,7 @@ tests = [ "try x catch ; y end" => "(try (block x) false (block y) false false)" "try x catch \n y end" => "(try (block x) false (block y) false false)" "try x catch e y end" => "(try (block x) e (block y) false false)" + "try x catch \$e y end" => "(try (block x) (\$ e) (block y) false false)" "try x finally y end" => "(try (block x) false false false (block y))" # v1.8 only ((v=v"1.8",), "try catch ; else end") => "(try (block) false (block) (block) false)" @@ -549,7 +557,8 @@ tests = [ "(x \n\n for a in as)" => "(generator x (= a as))" ], JuliaSyntax.parse_atom => [ - ":foo" => "(quote foo)" + ":foo" => "(quote foo)" + ": foo" => "(quote (error-t) foo)" # Literal colons ":)" => ":" ": end" => ":" @@ -744,6 +753,10 @@ broken_tests = [ "@S[a b]" => "(macrocall S (hcat a b))" "@S[a; b]" => "(macrocall S (vcat a b))" "@S[a; b ;; c; d]" => "(macrocall S (ncat-2 (nrow-1 a b) (nrow-1 c d)))" + # Bad character literals + "'\\xff'" => "(error '\\xff')" + "'\\x80'" => "(error '\\x80')" + "'ab'" => "(error 'ab')" ] JuliaSyntax.parse_call => [ # kw's in ref From 327a67b7930426bc37b6328862c8e7a358be820c Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Tue, 22 Feb 2022 14:48:22 +1000 Subject: [PATCH 0402/1109] Ensure CI runs on pushes to the main branch --- JuliaSyntax/.github/workflows/CI.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaSyntax/.github/workflows/CI.yml b/JuliaSyntax/.github/workflows/CI.yml index 60a91056805d7..e25245c9277f4 100644 --- a/JuliaSyntax/.github/workflows/CI.yml +++ b/JuliaSyntax/.github/workflows/CI.yml @@ -2,7 +2,7 @@ name: CI on: push: branches: - - master + - main tags: '*' pull_request: jobs: From 77315a794d2488e6fc34bd6f32fe03394b080d92 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Tue, 22 Feb 2022 18:27:28 +1000 Subject: [PATCH 0403/1109] Tweak version bounds message for else after catch --- JuliaSyntax/src/parser.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 6ccced0e5e632..6fca238513970 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -2123,7 +2123,7 @@ function parse_try(ps) emit(ps, else_mark, K"error", error="Expected `catch` before `else`") end #v1.7: try catch ; else end ==> (try (block) false (block) (error (block)) false) - min_supported_version(v"1.8", ps, else_mark, "`else` after `try`") + min_supported_version(v"1.8", ps, else_mark, "`else` after `catch`") else bump_invisible(ps, K"false") end From 0d5b838c88c38fb0686dee2060c5bb2ee00d1ebc Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Tue, 22 Feb 2022 18:36:45 +1000 Subject: [PATCH 0404/1109] Move some prototypes out of test into the prototypes directory --- JuliaSyntax/{test => prototypes}/simple_parser.jl | 0 JuliaSyntax/{test => prototypes}/syntax_interpolation.jl | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename JuliaSyntax/{test => prototypes}/simple_parser.jl (100%) rename JuliaSyntax/{test => prototypes}/syntax_interpolation.jl (100%) diff --git a/JuliaSyntax/test/simple_parser.jl b/JuliaSyntax/prototypes/simple_parser.jl similarity index 100% rename from JuliaSyntax/test/simple_parser.jl rename to JuliaSyntax/prototypes/simple_parser.jl diff --git a/JuliaSyntax/test/syntax_interpolation.jl b/JuliaSyntax/prototypes/syntax_interpolation.jl similarity index 100% rename from JuliaSyntax/test/syntax_interpolation.jl rename to JuliaSyntax/prototypes/syntax_interpolation.jl From def45c7a7cdd9ccc233e91f030b10f7ee8f6798a Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Thu, 24 Feb 2022 09:23:58 +0100 Subject: [PATCH 0405/1109] fold `RawToken` into `Token` and remove the old `Token` (JuliaLang/JuliaSyntax.jl#16) --- JuliaSyntax/Tokenize/README.md | 4 +- JuliaSyntax/Tokenize/benchmark/lex_base.jl | 6 +- JuliaSyntax/Tokenize/src/_precompile.jl | 108 +++--- JuliaSyntax/Tokenize/src/lexer.jl | 99 +----- JuliaSyntax/Tokenize/src/token.jl | 113 +----- JuliaSyntax/Tokenize/test/lexer.jl | 378 +++++++++++---------- JuliaSyntax/Tokenize/test/profile.jl | 10 +- JuliaSyntax/src/JuliaSyntax.jl | 2 +- JuliaSyntax/src/parse_stream.jl | 9 +- JuliaSyntax/src/tokens.jl | 3 +- 10 files changed, 279 insertions(+), 453 deletions(-) diff --git a/JuliaSyntax/Tokenize/README.md b/JuliaSyntax/Tokenize/README.md index f26f748dddac5..b4f4ac78b5d6a 100644 --- a/JuliaSyntax/Tokenize/README.md +++ b/JuliaSyntax/Tokenize/README.md @@ -37,11 +37,9 @@ Each `Token` is represented by where it starts and ends, what string it contains The API for a `Token` (non exported from the `Tokenize.Tokens` module) is. ```julia -startpos(t)::Tuple{Int, Int} # row and column where the token start -endpos(t)::Tuple{Int, Int} # row and column where the token ends startbyte(T)::Int # byte offset where the token start endbyte(t)::Int # byte offset where the token ends -untokenize(t)::String # string representation of the token +untokenize(t, str)::String # string representation of the token kind(t)::Token.Kind # kind of the token exactkind(t)::Token.Kind # exact kind of the token ``` diff --git a/JuliaSyntax/Tokenize/benchmark/lex_base.jl b/JuliaSyntax/Tokenize/benchmark/lex_base.jl index cf58cb9f3e77a..f440ecf0f668c 100644 --- a/JuliaSyntax/Tokenize/benchmark/lex_base.jl +++ b/JuliaSyntax/Tokenize/benchmark/lex_base.jl @@ -2,7 +2,7 @@ using Tokenize using BenchmarkTools using Printf -function speed_test(::Type{T}=Tokenize.Tokens.Token) where T <: Tokenize.Tokens.AbstractToken +function speed_test() tot_files = 0 tot_tokens = 0 tot_errors = 0 @@ -14,7 +14,7 @@ function speed_test(::Type{T}=Tokenize.Tokens.Token) where T <: Tokenize.Tokens. tot_files += 1 file = joinpath(root, file) str = read(file, String)::String - l = tokenize(str, T) + l = tokenize(str) while !Tokenize.Lexers.eof(l) t = Tokenize.Lexers.next_token(l) tot_tokens += 1 @@ -31,8 +31,6 @@ end tot_files, tot_tokens, tot_errors = speed_test() tot_time_token = @belapsed speed_test() -tot_time_rawtoken = @belapsed speed_test(Tokenize.Tokens.RawToken) println("Lexed ", tot_files, " files, with a total of ", tot_tokens, " tokens with ", tot_errors, " errors") println("Time Token: ", @sprintf("%3.4f", tot_time_token), " seconds") -println("Time RawToken: ", @sprintf("%3.4f", tot_time_rawtoken), " seconds") diff --git a/JuliaSyntax/Tokenize/src/_precompile.jl b/JuliaSyntax/Tokenize/src/_precompile.jl index fe4e7721d5669..406e3aee285dc 100644 --- a/JuliaSyntax/Tokenize/src/_precompile.jl +++ b/JuliaSyntax/Tokenize/src/_precompile.jl @@ -8,87 +8,65 @@ function _precompile_() precompile(Tokenize.Tokens.Token, (Tokenize.Tokens.Kind,Tuple{Int,Int},Tuple{Int,Int},Int,Int,String)) precompile(Tokenize.Tokens.Token, ()) precompile(Tokenize.Tokens.kind, (Tokenize.Tokens.Token,)) - precompile(Tokenize.Tokens.startpos, (Tokenize.Tokens.Token,)) - precompile(Tokenize.Tokens.endpos, (Tokenize.Tokens.Token,)) - precompile(Tokenize.Tokens.untokenize, (Tokenize.Tokens.Token,)) - precompile(Tokenize.Tokens.untokenize, (Tokenize.Tokens.RawToken,String)) - precompile(Tokenize.Tokens.untokenize, (Array{Tokenize.Tokens.Token, 1},)) - precompile(Tokenize.Tokens.untokenize, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) precompile(Tokenize.Lexers.is_cat_id_start, (Char, Int32,)) precompile(Tokenize.Lexers.is_identifier_char, (Char,)) precompile(Tokenize.Lexers.is_identifier_start_char, (Char,)) - precompile(Tokenize.Lexers.peekchar, (GenericIOBuffer{Array{UInt8, 1}},)) - precompile(Tokenize.Lexers.dpeekchar, (GenericIOBuffer{Array{UInt8, 1}},)) - precompile(Tokenize.Lexers.readchar, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) - precompile(Tokenize.Lexers.readchar, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.RawToken},)) - precompile(Tokenize.Lexers.next_token, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) + precompile(Tokenize.Lexers.readchar, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.next_token, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) precompile(Tokenize.Lexers.ishex, (Char,)) precompile(Tokenize.Lexers.isbinary, (Char,)) precompile(Tokenize.Lexers.isoctal, (Char,)) precompile(Tokenize.Lexers.iswhitespace, (Char,)) precompile(Tokenize.Lexers.Lexer, (String,)) - precompile(Tokenize.Lexers.Lexer, (String,Type{Tokenize.Tokens.Token})) - precompile(Tokenize.Lexers.Lexer, (String,Type{Tokenize.Tokens.RawToken})) - precompile(Tokenize.Lexers.Lexer, (GenericIOBuffer{Array{UInt8, 1}},Type{Tokenize.Tokens.Token})) - precompile(Tokenize.Lexers.Lexer, (GenericIOBuffer{Array{UInt8, 1}},Type{Tokenize.Tokens.RawToken})) + precompile(Tokenize.Lexers.Lexer, (GenericIOBuffer{Array{UInt8, 1}},)) precompile(Tokenize.Lexers.tokenize, (String,)) - precompile(Tokenize.Lexers.iterate, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) - precompile(Tokenize.Lexers.iterate, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.RawToken},)) - precompile(Tokenize.Lexers.iterate, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, Bool,)) - precompile(Tokenize.Lexers.iterate, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.RawToken}, Bool,)) - precompile(Tokenize.Lexers.iterate, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, Bool,)) - precompile(Tokenize.Lexers.iterate, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.RawToken}, Bool,)) - precompile(Tokenize.Lexers.startpos, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) - precompile(Tokenize.Lexers.startpos, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.RawToken},)) - precompile(Tokenize.Lexers.startpos!, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},Int)) - precompile(Tokenize.Lexers.startpos!, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.RawToken},Int)) - precompile(Tokenize.Lexers.start_token!, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) - precompile(Tokenize.Lexers.start_token!, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.RawToken},)) + precompile(Tokenize.Lexers.iterate, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}}, Bool,)) + precompile(Tokenize.Lexers.iterate, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}}, Bool,)) + precompile(Tokenize.Lexers.startpos, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.startpos!, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},Int)) + precompile(Tokenize.Lexers.start_token!, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.lex_greater, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.lex_prime, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.lex_digit, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},Tokenize.Tokens.Kind)) + precompile(Tokenize.Lexers.lex_identifier, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}}, Char,)) + precompile(Tokenize.Lexers.lex_less, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.lex_forwardslash, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.lex_minus, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.lex_xor, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.lex_equal, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.lex_bar, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.lex_quote, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.lex_plus, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.lex_dot, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.lex_exclaim, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.lex_colon, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.lex_percent, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.lex_comment, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.lex_comment, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},Bool)) + precompile(Tokenize.Lexers.lex_division, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.lex_circumflex, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.lex_backslash, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.lex_star, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.lex_amper, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) + precompile(Tokenize.Lexers.lex_whitespace, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},Char)) - precompile(Tokenize.Lexers.lex_greater, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) - precompile(Tokenize.Lexers.lex_prime, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) - precompile(Tokenize.Lexers.lex_digit, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},Tokenize.Tokens.Kind)) - precompile(Tokenize.Lexers.lex_identifier, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, Char,)) - precompile(Tokenize.Lexers.lex_less, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) - precompile(Tokenize.Lexers.lex_forwardslash, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) - precompile(Tokenize.Lexers.lex_minus, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) - precompile(Tokenize.Lexers.lex_xor, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) - precompile(Tokenize.Lexers.lex_equal, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) - precompile(Tokenize.Lexers.lex_bar, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) - precompile(Tokenize.Lexers.lex_quote, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) - precompile(Tokenize.Lexers.lex_plus, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) - precompile(Tokenize.Lexers.lex_dot, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) - precompile(Tokenize.Lexers.lex_exclaim, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) - precompile(Tokenize.Lexers.lex_colon, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) - precompile(Tokenize.Lexers.lex_percent, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) - precompile(Tokenize.Lexers.lex_comment, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) - precompile(Tokenize.Lexers.lex_comment, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},Bool)) - precompile(Tokenize.Lexers.lex_division, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) - precompile(Tokenize.Lexers.lex_circumflex, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) - precompile(Tokenize.Lexers.lex_backslash, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) - precompile(Tokenize.Lexers.lex_star, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) - precompile(Tokenize.Lexers.lex_amper, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) - - precompile(Tokenize.Lexers.lex_whitespace, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},)) - - precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, Char,)) - precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, String,)) - precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},typeof( Base.isdigit),)) - precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},typeof( Tokenize.Lexers.iswhitespace),)) - precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},typeof( Tokenize.Lexers.is_identifier_char),)) - precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},typeof(Tokenize.Lexers.ishex),)) - precompile(Tokenize.Lexers.accept_batch, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, typeof(Tokenize.Lexers.iswhitespace),)) - precompile(Tokenize.Lexers.accept_batch, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, typeof(Tokenize.Lexers.isdigit),)) - - precompile(Tokenize.Lexers.accept_batch, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.RawToken}, typeof(Tokenize.Lexers.iswhitespace),)) - precompile(Tokenize.Lexers.accept_batch, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.RawToken}, typeof(Tokenize.Lexers.isdigit),)) - precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.RawToken}, Char,)) + precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}}, Char,)) + precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}}, String,)) + precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},typeof( Base.isdigit),)) + precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},typeof( Tokenize.Lexers.iswhitespace),)) + precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},typeof( Tokenize.Lexers.is_identifier_char),)) + precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},typeof(Tokenize.Lexers.ishex),)) + precompile(Tokenize.Lexers.accept_batch, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}}, typeof(Tokenize.Lexers.iswhitespace),)) + precompile(Tokenize.Lexers.accept_batch, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}}, typeof(Tokenize.Lexers.isdigit),)) + precompile(Tokenize.Lexers.accept_batch, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}}, typeof(Tokenize.Lexers.iswhitespace),)) + precompile(Tokenize.Lexers.accept_batch, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}}, typeof(Tokenize.Lexers.isdigit),)) + precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}}, Char,)) precompile(Tokenize.Lexers.readchar, (GenericIOBuffer{Array{UInt8, 1}},)) end diff --git a/JuliaSyntax/Tokenize/src/lexer.jl b/JuliaSyntax/Tokenize/src/lexer.jl index e17850b544125..86ba9050cd578 100644 --- a/JuliaSyntax/Tokenize/src/lexer.jl +++ b/JuliaSyntax/Tokenize/src/lexer.jl @@ -3,7 +3,7 @@ module Lexers include("utilities.jl") import ..Tokens -import ..Tokens: AbstractToken, Token, RawToken, Kind, TokenError, UNICODE_OPS, EMPTY_TOKEN, isliteral +import ..Tokens: Token, Kind, TokenError, UNICODE_OPS, EMPTY_TOKEN, isliteral import ..Tokens: FUNCTION, ABSTRACT, IDENTIFIER, BAREMODULE, BEGIN, BREAK, CATCH, CONST, CONTINUE, DO, ELSE, ELSEIF, END, EXPORT, FALSE, FINALLY, FOR, FUNCTION, GLOBAL, LET, LOCAL, IF, @@ -33,7 +33,7 @@ Ideally a lexer is stateless but some state is needed here for: * Disambiguating cases like x' (adjoint) vs 'x' (character literal) * Tokenizing code within string interpolations """ -mutable struct Lexer{IO_t <: IO, T <: AbstractToken} +mutable struct Lexer{IO_t <: IO} io::IO_t io_startpos::Int @@ -54,7 +54,7 @@ mutable struct Lexer{IO_t <: IO, T <: AbstractToken} dotop::Bool end -function Lexer(io::IO_t, T::Type{TT} = Token) where {IO_t,TT <: AbstractToken} +function Lexer(io::IO) c1 = ' ' p1 = position(io) if eof(io) @@ -78,29 +78,25 @@ function Lexer(io::IO_t, T::Type{TT} = Token) where {IO_t,TT <: AbstractToken} end end end - Lexer{IO_t,T}(io, position(io), 1, 1, position(io), 1, 1, position(io), + Lexer(io, position(io), 1, 1, position(io), 1, 1, position(io), Tokens.ERROR, Vector{StringState}(), IOBuffer(), (c1,c2,c3,c4), (p1,p2,p3,p4), false, false) end -Lexer(str::AbstractString, T::Type{TT} = Token) where TT <: AbstractToken = Lexer(IOBuffer(str), T) - -@inline token_type(l::Lexer{IO_t, TT}) where {IO_t, TT} = TT +Lexer(str::AbstractString) = Lexer(IOBuffer(str)) """ tokenize(x, T = Token) Returns an `Iterable` containing the tokenized input. Can be reverted by e.g. `join(untokenize.(tokenize(x)))`. Setting `T` chooses the type of token -produced by the lexer (`Token` or `RawToken`). +produced by the lexer (`Token` or `Token`). """ -tokenize(x, ::Type{Token}) = Lexer(x, Token) -tokenize(x, ::Type{RawToken}) = Lexer(x, RawToken) -tokenize(x) = Lexer(x, Token) +tokenize(x) = Lexer(x) # Iterator interface -Base.IteratorSize(::Type{Lexer{IO_t,T}}) where {IO_t,T} = Base.SizeUnknown() -Base.IteratorEltype(::Type{Lexer{IO_t,T}}) where {IO_t,T} = Base.HasEltype() -Base.eltype(::Type{Lexer{IO_t,T}}) where {IO_t,T} = T +Base.IteratorSize(::Type{<:Lexer}) = Base.SizeUnknown() +Base.IteratorEltype(::Type{<:Lexer}) = Base.HasEltype() +Base.eltype(::Type{<:Lexer}) = Token function Base.iterate(l::Lexer) @@ -205,44 +201,14 @@ Returns the next character and increments the current position. """ function readchar end -function readchar(l::Lexer{I}) where {I <: IO} - c = readchar(l.io) - l.chars = (l.chars[2], l.chars[3], l.chars[4], c) - l.charspos = (l.charspos[2], l.charspos[3], l.charspos[4], position(l.io)) - if l.doread - write(l.charstore, l.chars[1]) - end - if l.chars[1] == '\n' - l.current_row += 1 - l.current_col = 1 - elseif !eof(l.chars[1]) - l.current_col += 1 - end - return l.chars[1] -end -function readchar(l::Lexer{I,RawToken}) where {I <: IO} +function readchar(l::Lexer) c = readchar(l.io) l.chars = (l.chars[2], l.chars[3], l.chars[4], c) l.charspos = (l.charspos[2], l.charspos[3], l.charspos[4], position(l.io)) return l.chars[1] end -readon(l::Lexer{I,RawToken}) where {I <: IO} = l.chars[1] -function readon(l::Lexer{I,Token}) where {I <: IO} - if l.charstore.size != 0 - take!(l.charstore) - end - write(l.charstore, l.chars[1]) - l.doread = true -end - -readoff(l::Lexer{I,RawToken}) where {I <: IO} = l.chars[1] -function readoff(l::Lexer{I,Token}) where {I <: IO} - l.doread = false - return l.chars[1] -end - """ accept(l::Lexer, f::Union{Function, Char, Vector{Char}, String}) @@ -281,32 +247,7 @@ end Returns a `Token` of kind `kind` with contents `str` and starts a new `Token`. """ -function emit(l::Lexer{IO_t,Token}, kind::Kind, err::TokenError = Tokens.NO_ERR) where IO_t - suffix = false - if kind in (Tokens.ERROR, Tokens.STRING, Tokens.CMD) - str = String(l.io.data[(l.token_startpos + 1):position(l)]) - elseif (kind == Tokens.IDENTIFIER || isliteral(kind) || kind == Tokens.COMMENT || kind == Tokens.WHITESPACE || kind == Tokens.NEWLINE_WS) - str = String(take!(l.charstore)) - elseif optakessuffix(kind) - str = "" - while isopsuffix(peekchar(l)) - str = string(str, readchar(l)) - suffix = true - end - else - str = "" - end - tok = Token(kind, (l.token_start_row, l.token_start_col), - (l.current_row, l.current_col - 1), - startpos(l), position(l) - 1, - str, err, l.dotop, suffix) - l.dotop = false - l.last_token = kind - readoff(l) - return tok -end - -function emit(l::Lexer{IO_t,RawToken}, kind::Kind, err::TokenError = Tokens.NO_ERR) where IO_t +function emit(l::Lexer, kind::Kind, err::TokenError = Tokens.NO_ERR) suffix = false if optakessuffix(kind) while isopsuffix(peekchar(l)) @@ -315,11 +256,10 @@ function emit(l::Lexer{IO_t,RawToken}, kind::Kind, err::TokenError = Tokens.NO_E end end - tok = RawToken(kind, startpos(l), position(l) - 1, err, l.dotop, suffix) + tok = Token(kind, startpos(l), position(l) - 1, err, l.dotop, suffix) l.dotop = false l.last_token = kind - readoff(l) return tok end @@ -478,11 +418,10 @@ function lex_string_chunk(l) # Start interpolation readchar(l) return emit(l, Tokens.EX_OR) - elseif !state.raw && pc == '\\' && (pc2 = dpeekchar(l)[2]; + elseif !state.raw && pc == '\\' && (pc2 = dpeekchar(l)[2]; pc2 == '\r' || pc2 == '\n') # Process escaped newline as whitespace readchar(l) - readon(l) readchar(l) if pc2 == '\r' && peekchar(l) == '\n' readchar(l) @@ -503,7 +442,6 @@ function lex_string_chunk(l) return emit(l, state.delim == '"' ? Tokens.DQUOTE : Tokens.BACKTICK) end end - readon(l) # Read a chunk of string characters if state.raw # Raw strings treat all characters as literals with the exception that @@ -566,7 +504,6 @@ end # Lex whitespace, a whitespace char `c` has been consumed function lex_whitespace(l::Lexer, c) - readon(l) k = Tokens.WHITESPACE while true if c == '\n' @@ -583,7 +520,6 @@ function lex_whitespace(l::Lexer, c) end function lex_comment(l::Lexer, doemit=true) - readon(l) if peekchar(l) != '=' while true pc = peekchar(l) @@ -799,7 +735,6 @@ end # A digit has been consumed function lex_digit(l::Lexer, kind) - readon(l) accept_number(l, isdigit) pc,ppc = dpeekchar(l) if pc == '.' @@ -915,7 +850,6 @@ function lex_prime(l, doemit = true) l.last_token == Tokens.END || isliteral(l.last_token) return emit(l, Tokens.PRIME) else - readon(l) if accept(l, '\'') if accept(l, '\'') return doemit ? emit(l, Tokens.CHAR) : EMPTY_TOKEN(token_type(l)) @@ -1118,10 +1052,7 @@ function lex_backtick(l::Lexer) end const MAX_KW_LENGTH = 10 -function lex_identifier(l::Lexer{IO_t,T}, c) where {IO_t,T} - if T == Token - readon(l) - end +function lex_identifier(l::Lexer, c) h = simple_hash(c, UInt64(0)) n = 1 while true diff --git a/JuliaSyntax/Tokenize/src/token.jl b/JuliaSyntax/Tokenize/src/token.jl index cb7e2da7f9367..f7d2ea3b16f44 100644 --- a/JuliaSyntax/Tokenize/src/token.jl +++ b/JuliaSyntax/Tokenize/src/token.jl @@ -53,27 +53,7 @@ TOKEN_ERROR_DESCRIPTION = Dict{TokenError, String}( UNKNOWN => "unknown", ) -abstract type AbstractToken end - -struct Token <: AbstractToken - kind::Kind - # Offsets into a string or buffer - startpos::Tuple{Int, Int} # row, col where token starts /end, col is a string index - endpos::Tuple{Int, Int} - startbyte::Int # The byte where the token start in the buffer - endbyte::Int # The byte where the token ended in the buffer - val::String # The actual string of the token - token_error::TokenError - dotop::Bool - suffix::Bool -end -function Token(kind::Kind, startposition::Tuple{Int, Int}, endposition::Tuple{Int, Int}, - startbyte::Int, endbyte::Int, val::String) -Token(kind, startposition, endposition, startbyte, endbyte, val, NO_ERR, false, false) -end -Token() = Token(ERROR, (0,0), (0,0), 0, 0, "", UNKNOWN, false, false) - -struct RawToken <: AbstractToken +struct Token kind::Kind # Offsets into a string or buffer startbyte::Int # The byte where the token start in the buffer @@ -82,98 +62,31 @@ struct RawToken <: AbstractToken dotop::Bool suffix::Bool end -function RawToken(kind::Kind, startbyte::Int, endbyte::Int) - RawToken(kind, startbyte, endbyte, NO_ERR, false, false) +function Token(kind::Kind, startbyte::Int, endbyte::Int) + Token(kind, startbyte, endbyte, NO_ERR, false, false) end -RawToken() = RawToken(ERROR, 0, 0, UNKNOWN, false, false) +Token() = Token(ERROR, 0, 0, UNKNOWN, false, false) -const _EMPTY_TOKEN = Token() -const _EMPTY_RAWTOKEN = RawToken() -EMPTY_TOKEN(::Type{Token}) = _EMPTY_TOKEN -EMPTY_TOKEN(::Type{RawToken}) = _EMPTY_RAWTOKEN +const _EMPTY_RAWTOKEN = Token() +EMPTY_TOKEN(::Type{Token}) = _EMPTY_RAWTOKEN -function kind(t::AbstractToken) +function kind(t::Token) isoperator(t.kind) && return OP iskeyword(t.kind) && return KEYWORD return t.kind end -exactkind(t::AbstractToken) = t.kind -startpos(t::AbstractToken) = t.startpos -endpos(t::AbstractToken) = t.endpos -startbyte(t::AbstractToken) = t.startbyte -endbyte(t::AbstractToken) = t.endbyte -function untokenize(t::Token) - if t.kind == IDENTIFIER || isliteral(t.kind) || t.kind == COMMENT || t.kind == WHITESPACE || t.kind == NEWLINE_WS || t.kind == ERROR - return t.val - elseif iskeyword(t.kind) - return lowercase(string(t.kind)) - elseif isoperator(t.kind) - if t.dotop - str = string(".", UNICODE_OPS_REVERSE[t.kind]) - else - str = string(UNICODE_OPS_REVERSE[t.kind]) - end - return string(str, t.val) - elseif t.kind == LPAREN - return "(" - elseif t.kind == LSQUARE - return "[" - elseif t.kind == LBRACE - return "{" - elseif t.kind == RPAREN - return ")" - elseif t.kind == RSQUARE - return "]" - elseif t.kind == RBRACE - return "}" - elseif t.kind == DQUOTE - return "\"" - elseif t.kind == TRIPLE_DQUOTE - return "\"\"\"" - elseif t.kind == BACKTICK - return "`" - elseif t.kind == TRIPLE_BACKTICK - return "```" - elseif t.kind == AT_SIGN - return "@" - elseif t.kind == COMMA - return "," - elseif t.kind == SEMICOLON - return ";" - else - return "" - end -end - -function untokenize(t::RawToken, str::String) - String(codeunits(str)[1 .+ (t.startbyte:t.endbyte)]) -end +exactkind(t::Token) = t.kind -function untokenize(ts) - if !(eltype(ts) <: AbstractToken) - throw(ArgumentError("element type of iterator has to be Token")) - end - io = IOBuffer() - for tok in ts - write(io, untokenize(tok)) - end - return String(take!(io)) -end +startbyte(t::Token) = t.startbyte +endbyte(t::Token) = t.endbyte -function Base.show(io::IO, t::Token) - start_r, start_c = startpos(t) - end_r, end_c = endpos(t) - str = kind(t) == ENDMARKER ? "" : escape_string(untokenize(t)) - print(io, rpad(string(start_r, ",", start_c, "-", end_r, ",", end_c), 17, " ")) - print(io, rpad(kind(t), 15, " ")) - print(io, "\"", str, "\"") +function untokenize(t::Token, str::String) + String(codeunits(str)[1 .+ (t.startbyte:t.endbyte)]) end -Base.print(io::IO, t::Token) = print(io, untokenize(t)) - -function Base.show(io::IO, t::RawToken) +function Base.show(io::IO, t::Token) print(io, rpad(string(startbyte(t), "-", endbyte(t)), 11, " ")) print(io, rpad(kind(t), 15, " ")) end diff --git a/JuliaSyntax/Tokenize/test/lexer.jl b/JuliaSyntax/Tokenize/test/lexer.jl index ca255b0e8cd3c..72789d164627b 100644 --- a/JuliaSyntax/Tokenize/test/lexer.jl +++ b/JuliaSyntax/Tokenize/test/lexer.jl @@ -30,9 +30,7 @@ end # testset token_strs = ["𝘋", " ", "=", "2", "β", ""] for (i, n) in enumerate(l) @test T.kind(n) == kinds[i] - @test untokenize(n) == token_strs[i] - @test T.startpos(n) == (1, i) - @test T.endpos(n) == (1, i - 1 + length(token_strs[i])) + @test untokenize(n, str) == token_strs[i] end end end # testset @@ -124,18 +122,12 @@ end # testset for (i, n) in enumerate(tokenize(str)) @test Tokens.kind(n) == kinds[i] end - for (i, n) in enumerate(tokenize(str, Tokens.RawToken)) - @test Tokens.kind(n) == kinds[i] - end @testset "roundtrippability" begin - @test join(untokenize.(collect(tokenize(str)))) == str - @test untokenize(collect(tokenize(str))) == str - @test untokenize(tokenize(str)) == str - @test_throws ArgumentError untokenize("blabla") + @test join(untokenize.(collect(tokenize(str)), str)) == str end - @test all((t.endbyte - t.startbyte + 1)==sizeof(untokenize(t)) for t in tokenize(str)) + @test all((t.endbyte - t.startbyte + 1)==sizeof(untokenize(t, str)) for t in tokenize(str)) end # testset @testset "issue 5, '..'" begin @@ -143,7 +135,8 @@ end # testset end @testset "issue 17, >>" begin - @test untokenize(tok(">> "))==">>" + str = ">> " + @test untokenize(tok(str), str)==">>" end @@ -177,14 +170,21 @@ end @test tok("somtext falsething", 3).kind == T.IDENTIFIER end + +function test_roundtrip(str, kind, val) + T = tok(str) + @test T.kind == kind + @test untokenize(T, str) == val +end + @testset "tokenizing juxtaposed numbers and dotted operators/identifiers" begin - @test (t->t.val=="1234" && t.kind == Tokens.INTEGER )(tok("1234 .+1")) - @test (t->t.val=="1234.0" && t.kind == Tokens.FLOAT )(tok("1234.0.+1")) - @test (t->t.val=="1234.0" && t.kind == Tokens.FLOAT )(tok("1234.0 .+1")) - @test (t->t.val=="1234." && t.kind == Tokens.FLOAT )(tok("1234.f(a)")) - @test (t->t.val=="1234" && t.kind == Tokens.INTEGER )(tok("1234 .f(a)")) - @test (t->t.val=="1234.0." && t.kind == Tokens.ERROR )(tok("1234.0.f(a)")) - @test (t->t.val=="1234.0" && t.kind == Tokens.FLOAT )(tok("1234.0 .f(a)")) + test_roundtrip("1234 .+1", Tokens.INTEGER, "1234") + test_roundtrip("1234.0+1", Tokens.FLOAT, "1234.0") + test_roundtrip("1234.0 .+1", Tokens.FLOAT, "1234.0") + test_roundtrip("1234.f(a)", Tokens.FLOAT, "1234.") + test_roundtrip("1234 .f(a)", Tokens.INTEGER, "1234") + test_roundtrip("1234.0.f(a)", Tokens.ERROR, "1234.0.") + test_roundtrip("1234.0 .f(a)", Tokens.FLOAT, "1234.0") end @@ -210,18 +210,18 @@ end @testset "primes" begin - tokens = collect(tokenize( - """ + str = """ ImageMagick.save(fn, reinterpret(ARGB32, [0xf0884422]'')) D = ImageMagick.load(fn) - """)) - @test string(untokenize(tokens[16]))==string(untokenize(tokens[17]))=="'" - @test tok("'a'").val == "'a'" - @test tok("'a'").kind == Tokens.CHAR - @test tok("''").val == "''" - @test tok("''").kind == Tokens.CHAR - @test tok("'''").val == "'''" - @test tok("'''").kind == Tokens.CHAR + """ + tokens = collect(tokenize(str)) + @test string(untokenize(tokens[16], str))==string(untokenize(tokens[17], str))=="'" + + test_roundtrip("'a'", Tokens.CHAR, "'a'") + test_roundtrip("''", Tokens.CHAR, "''") + test_roundtrip("'''", Tokens.CHAR, "'''") + test_roundtrip("''''", Tokens.CHAR, "'''") + @test tok("''''", 1).kind == Tokens.CHAR @test tok("''''", 2).kind == Tokens.PRIME @test tok("()'", 3).kind == Tokens.PRIME @@ -296,196 +296,213 @@ end @testset "show" begin io = IOBuffer() show(io, collect(tokenize("\"abc\nd\"ef"))[2]) - @test String(take!(io)) == "1,2-2,1 STRING \"abc\\nd\"" + @test String(take!(io)) == "1-5 STRING " end -~(tok::T.AbstractToken, t::Tuple) = tok.kind == t[1] && untokenize(tok) == t[2] +~(tok::T.Token, t::Tuple) = tok.kind == t[1] && untokenize(tok, t[3]) == t[2] @testset "raw strings" begin - ts = collect(tokenize(raw""" str"x $ \ y" """)) - @test ts[1] ~ (T.WHITESPACE , " " ) - @test ts[2] ~ (T.IDENTIFIER , "str" ) - @test ts[3] ~ (T.DQUOTE , "\"" ) - @test ts[4] ~ (T.STRING , "x \$ \\ y") - @test ts[5] ~ (T.DQUOTE , "\"" ) - @test ts[6] ~ (T.WHITESPACE , " " ) - @test ts[7] ~ (T.ENDMARKER , "" ) - - ts = collect(tokenize(raw"""`x $ \ y`""")) - @test ts[1] ~ (T.BACKTICK , "`" ) - @test ts[2] ~ (T.CMD , "x \$ \\ y" ) - @test ts[3] ~ (T.BACKTICK , "`" ) - @test ts[4] ~ (T.ENDMARKER , "" ) + str = raw""" str"x $ \ y" """ + ts = collect(tokenize(str)) + @test ts[1] ~ (T.WHITESPACE , " " , str) + @test ts[2] ~ (T.IDENTIFIER , "str" , str) + @test ts[3] ~ (T.DQUOTE , "\"" , str) + @test ts[4] ~ (T.STRING , "x \$ \\ y", str) + @test ts[5] ~ (T.DQUOTE , "\"" , str) + @test ts[6] ~ (T.WHITESPACE , " " , str) + @test ts[7] ~ (T.ENDMARKER , "" , str) + + str = raw"""`x $ \ y`""" + ts = collect(tokenize(str)) + @test ts[1] ~ (T.BACKTICK , "`" , str) + @test ts[2] ~ (T.CMD , "x \$ \\ y" , str) + @test ts[3] ~ (T.BACKTICK , "`" , str) + @test ts[4] ~ (T.ENDMARKER , "" , str) # str"\\" - ts = collect(tokenize("str\"\\\\\"")) - @test ts[1] ~ (T.IDENTIFIER , "str" ) - @test ts[2] ~ (T.DQUOTE , "\"" ) - @test ts[3] ~ (T.STRING , "\\\\" ) - @test ts[4] ~ (T.DQUOTE , "\"" ) - @test ts[5] ~ (T.ENDMARKER , "" ) + str = "str\"\\\\\"" + ts = collect(tokenize(str)) + @test ts[1] ~ (T.IDENTIFIER , "str" , str) + @test ts[2] ~ (T.DQUOTE , "\"" , str) + @test ts[3] ~ (T.STRING , "\\\\" , str) + @test ts[4] ~ (T.DQUOTE , "\"" , str) + @test ts[5] ~ (T.ENDMARKER , "" , str) # str"\\\"" - ts = collect(tokenize("str\"\\\\\\\"\"")) - @test ts[1] ~ (T.IDENTIFIER , "str" ) - @test ts[2] ~ (T.DQUOTE , "\"" ) - @test ts[3] ~ (T.STRING , "\\\\\\\"" ) - @test ts[4] ~ (T.DQUOTE , "\"" ) - @test ts[5] ~ (T.ENDMARKER , "" ) + str = "str\"\\\\\\\"\"" + ts = collect(tokenize(str)) + @test ts[1] ~ (T.IDENTIFIER , "str" , str) + @test ts[2] ~ (T.DQUOTE , "\"" , str) + @test ts[3] ~ (T.STRING , "\\\\\\\"" , str) + @test ts[4] ~ (T.DQUOTE , "\"" , str) + @test ts[5] ~ (T.ENDMARKER , "" , str) # Contextual keywords and operators allowed as raw string prefixes - ts = collect(tokenize(raw""" var"x $ \ y" """)) - @test ts[2] ~ (T.VAR , "var") - @test ts[4] ~ (T.STRING , "x \$ \\ y") + str = raw""" var"x $ \ y" """ + ts = collect(tokenize(str)) + @test ts[2] ~ (T.VAR , "var", str) + @test ts[4] ~ (T.STRING , "x \$ \\ y", str) - ts = collect(tokenize(raw""" outer"x $ \ y" """)) - @test ts[2] ~ (T.OUTER , "outer") - @test ts[4] ~ (T.STRING , "x \$ \\ y") + str = raw""" outer"x $ \ y" """ + ts = collect(tokenize(str)) + @test ts[2] ~ (T.OUTER , "outer", str) + @test ts[4] ~ (T.STRING , "x \$ \\ y", str) - ts = collect(tokenize(raw""" isa"x $ \ y" """)) - @test ts[2] ~ (T.ISA , "isa") - @test ts[4] ~ (T.STRING , "x \$ \\ y") + str = raw""" isa"x $ \ y" """ + ts = collect(tokenize(str)) + @test ts[2] ~ (T.ISA , "isa", str) + @test ts[4] ~ (T.STRING , "x \$ \\ y", str) end @testset "string escaped newline whitespace" begin - ts = collect(tokenize("\"x\\\n \ty\"")) - @test ts[1] ~ (T.DQUOTE, "\"") - @test ts[2] ~ (T.STRING, "x") - @test ts[3] ~ (T.WHITESPACE, "\\\n \t") - @test ts[4] ~ (T.STRING, "y") - @test ts[5] ~ (T.DQUOTE, "\"") + str = "\"x\\\n \ty\"" + ts = collect(tokenize(str)) + @test ts[1] ~ (T.DQUOTE, "\"", str) + @test ts[2] ~ (T.STRING, "x", str) + @test ts[3] ~ (T.WHITESPACE, "\\\n \t", str) + @test ts[4] ~ (T.STRING, "y", str) + @test ts[5] ~ (T.DQUOTE, "\"", str) # No newline escape for raw strings - ts = collect(tokenize("r\"x\\\ny\"")) - @test ts[1] ~ (T.IDENTIFIER , "r") - @test ts[2] ~ (T.DQUOTE, "\"") - @test ts[3] ~ (T.STRING, "x\\\ny") - @test ts[4] ~ (T.DQUOTE , "\"") + str = "r\"x\\\ny\"" + ts = collect(tokenize(str)) + @test ts[1] ~ (T.IDENTIFIER , "r", str) + @test ts[2] ~ (T.DQUOTE, "\"", str) + @test ts[3] ~ (T.STRING, "x\\\ny", str) + @test ts[4] ~ (T.DQUOTE , "\"", str) end @testset "triple quoted string line splitting" begin - ts = collect(tokenize("\"\"\"\nx\r\ny\rz\n\r\"\"\"")) - @test ts[1] ~ (T.TRIPLE_DQUOTE , "\"\"\"") - @test ts[2] ~ (T.STRING , "\n") - @test ts[3] ~ (T.STRING , "x\r\n") - @test ts[4] ~ (T.STRING , "y\r") - @test ts[5] ~ (T.STRING , "z\n") - @test ts[6] ~ (T.STRING , "\r") - @test ts[7] ~ (T.TRIPLE_DQUOTE , "\"\"\"") + str = "\"\"\"\nx\r\ny\rz\n\r\"\"\"" + ts = collect(tokenize(str)) + @test ts[1] ~ (T.TRIPLE_DQUOTE , "\"\"\"", str) + @test ts[2] ~ (T.STRING , "\n", str) + @test ts[3] ~ (T.STRING , "x\r\n", str) + @test ts[4] ~ (T.STRING , "y\r", str) + @test ts[5] ~ (T.STRING , "z\n", str) + @test ts[6] ~ (T.STRING , "\r", str) + @test ts[7] ~ (T.TRIPLE_DQUOTE , "\"\"\"", str) # Also for raw strings - ts = collect(tokenize("r\"\"\"\nx\ny\"\"\"")) - @test ts[1] ~ (T.IDENTIFIER , "r") - @test ts[2] ~ (T.TRIPLE_DQUOTE , "\"\"\"") - @test ts[3] ~ (T.STRING , "\n") - @test ts[4] ~ (T.STRING , "x\n") - @test ts[5] ~ (T.STRING , "y") - @test ts[6] ~ (T.TRIPLE_DQUOTE , "\"\"\"") + str = "r\"\"\"\nx\ny\"\"\"" + ts = collect(tokenize(str)) + @test ts[1] ~ (T.IDENTIFIER , "r", str) + @test ts[2] ~ (T.TRIPLE_DQUOTE , "\"\"\"", str) + @test ts[3] ~ (T.STRING , "\n", str) + @test ts[4] ~ (T.STRING , "x\n", str) + @test ts[5] ~ (T.STRING , "y", str) + @test ts[6] ~ (T.TRIPLE_DQUOTE , "\"\"\"", str) end @testset "interpolation" begin @testset "basic" begin - ts = collect(tokenize("\"\$x \$y\"")) - @test ts[1] ~ (T.DQUOTE , "\"") - @test ts[2] ~ (T.EX_OR , "\$") - @test ts[3] ~ (T.IDENTIFIER , "x" ) - @test ts[4] ~ (T.STRING , " " ) - @test ts[5] ~ (T.EX_OR , "\$") - @test ts[6] ~ (T.IDENTIFIER , "y" ) - @test ts[7] ~ (T.DQUOTE , "\"") - @test ts[8] ~ (T.ENDMARKER , "" ) + str = "\"\$x \$y\"" + ts = collect(tokenize(str)) + @test ts[1] ~ (T.DQUOTE , "\"", str) + @test ts[2] ~ (T.EX_OR , "\$", str) + @test ts[3] ~ (T.IDENTIFIER , "x" , str) + @test ts[4] ~ (T.STRING , " " , str) + @test ts[5] ~ (T.EX_OR , "\$", str) + @test ts[6] ~ (T.IDENTIFIER , "y" , str) + @test ts[7] ~ (T.DQUOTE , "\"", str) + @test ts[8] ~ (T.ENDMARKER , "" , str) end @testset "nested" begin str = """"str: \$(g("str: \$(h("str"))"))" """ ts = collect(tokenize(str)) @test length(ts) == 23 - @test ts[1] ~ (T.DQUOTE , "\"" ) - @test ts[2] ~ (T.STRING , "str: ") - @test ts[3] ~ (T.EX_OR , "\$" ) - @test ts[4] ~ (T.LPAREN , "(" ) - @test ts[5] ~ (T.IDENTIFIER, "g" ) - @test ts[6] ~ (T.LPAREN , "(" ) - @test ts[7] ~ (T.DQUOTE , "\"" ) - @test ts[8] ~ (T.STRING , "str: ") - @test ts[9] ~ (T.EX_OR , "\$" ) - @test ts[10] ~ (T.LPAREN , "(" ) - @test ts[11] ~ (T.IDENTIFIER, "h" ) - @test ts[12] ~ (T.LPAREN , "(" ) - @test ts[13] ~ (T.DQUOTE , "\"" ) - @test ts[14] ~ (T.STRING , "str" ) - @test ts[15] ~ (T.DQUOTE , "\"" ) - @test ts[16] ~ (T.RPAREN , ")" ) - @test ts[17] ~ (T.RPAREN , ")" ) - @test ts[18] ~ (T.DQUOTE , "\"" ) - @test ts[19] ~ (T.RPAREN , ")" ) - @test ts[20] ~ (T.RPAREN , ")" ) - @test ts[21] ~ (T.DQUOTE , "\"" ) - @test ts[22] ~ (T.WHITESPACE, " " ) - @test ts[23] ~ (T.ENDMARKER , "" ) + @test ts[1] ~ (T.DQUOTE , "\"" , str) + @test ts[2] ~ (T.STRING , "str: ", str) + @test ts[3] ~ (T.EX_OR , "\$" , str) + @test ts[4] ~ (T.LPAREN , "(" , str) + @test ts[5] ~ (T.IDENTIFIER, "g" , str) + @test ts[6] ~ (T.LPAREN , "(" , str) + @test ts[7] ~ (T.DQUOTE , "\"" , str) + @test ts[8] ~ (T.STRING , "str: ", str) + @test ts[9] ~ (T.EX_OR , "\$" , str) + @test ts[10] ~ (T.LPAREN , "(" , str) + @test ts[11] ~ (T.IDENTIFIER, "h" , str) + @test ts[12] ~ (T.LPAREN , "(" , str) + @test ts[13] ~ (T.DQUOTE , "\"" , str) + @test ts[14] ~ (T.STRING , "str" , str) + @test ts[15] ~ (T.DQUOTE , "\"" , str) + @test ts[16] ~ (T.RPAREN , ")" , str) + @test ts[17] ~ (T.RPAREN , ")" , str) + @test ts[18] ~ (T.DQUOTE , "\"" , str) + @test ts[19] ~ (T.RPAREN , ")" , str) + @test ts[20] ~ (T.RPAREN , ")" , str) + @test ts[21] ~ (T.DQUOTE , "\"" , str) + @test ts[22] ~ (T.WHITESPACE, " " , str) + @test ts[23] ~ (T.ENDMARKER , "" , str) end @testset "duplicate \$" begin - ts = collect(tokenize("\"\$\$\"")) - @test ts[1] ~ (T.DQUOTE , "\"") - @test ts[2] ~ (T.EX_OR , "\$") - @test ts[3] ~ (T.EX_OR , "\$") - @test ts[4] ~ (T.DQUOTE , "\"") - @test ts[5] ~ (T.ENDMARKER , "" ) + str = "\"\$\$\"" + ts = collect(tokenize(str)) + @test ts[1] ~ (T.DQUOTE , "\"", str) + @test ts[2] ~ (T.EX_OR , "\$", str) + @test ts[3] ~ (T.EX_OR , "\$", str) + @test ts[4] ~ (T.DQUOTE , "\"", str) + @test ts[5] ~ (T.ENDMARKER , "" , str) end @testset "Unmatched parens" begin # issue 73: https://github.com/JuliaLang/Tokenize.jl/issues/73 - ts = collect(tokenize("\"\$(fdsf\"")) - @test ts[1] ~ (T.DQUOTE , "\"" ) - @test ts[2] ~ (T.EX_OR , "\$" ) - @test ts[3] ~ (T.LPAREN , "(" ) - @test ts[4] ~ (T.IDENTIFIER , "fdsf" ) - @test ts[5] ~ (T.DQUOTE , "\"" ) - @test ts[6] ~ (T.ENDMARKER , "" ) + str = "\"\$(fdsf\"" + ts = collect(tokenize(str)) + @test ts[1] ~ (T.DQUOTE , "\"" , str) + @test ts[2] ~ (T.EX_OR , "\$" , str) + @test ts[3] ~ (T.LPAREN , "(" , str) + @test ts[4] ~ (T.IDENTIFIER , "fdsf" , str) + @test ts[5] ~ (T.DQUOTE , "\"" , str) + @test ts[6] ~ (T.ENDMARKER , "" , str) end @testset "Unicode" begin # issue 178: https://github.com/JuliaLang/Tokenize.jl/issues/178 - ts = collect(tokenize(""" "\$uₕx \$(uₕx - ux)" """)) - @test ts[ 1] ~ (T.WHITESPACE , " " ) - @test ts[ 2] ~ (T.DQUOTE , "\"" ) - @test ts[ 3] ~ (T.EX_OR , "\$" ) - @test ts[ 4] ~ (T.IDENTIFIER , "uₕx" ) - @test ts[ 5] ~ (T.STRING , " " ) - @test ts[ 6] ~ (T.EX_OR , "\$" ) - @test ts[ 7] ~ (T.LPAREN , "(" ) - @test ts[ 8] ~ (T.IDENTIFIER , "uₕx" ) - @test ts[ 9] ~ (T.WHITESPACE , " " ) - @test ts[10] ~ (T.MINUS , "-" ) - @test ts[11] ~ (T.WHITESPACE , " " ) - @test ts[12] ~ (T.IDENTIFIER , "ux" ) - @test ts[13] ~ (T.RPAREN , ")" ) - @test ts[14] ~ (T.DQUOTE , "\"" ) - @test ts[15] ~ (T.WHITESPACE , " " ) - @test ts[16] ~ (T.ENDMARKER , "" ) + str = """ "\$uₕx \$(uₕx - ux)" """ + ts = collect(tokenize(str)) + @test ts[ 1] ~ (T.WHITESPACE , " " , str) + @test ts[ 2] ~ (T.DQUOTE , "\"" , str) + @test ts[ 3] ~ (T.EX_OR , "\$" , str) + @test ts[ 4] ~ (T.IDENTIFIER , "uₕx" , str) + @test ts[ 5] ~ (T.STRING , " " , str) + @test ts[ 6] ~ (T.EX_OR , "\$" , str) + @test ts[ 7] ~ (T.LPAREN , "(" , str) + @test ts[ 8] ~ (T.IDENTIFIER , "uₕx" , str) + @test ts[ 9] ~ (T.WHITESPACE , " " , str) + @test ts[10] ~ (T.MINUS , "-" , str) + @test ts[11] ~ (T.WHITESPACE , " " , str) + @test ts[12] ~ (T.IDENTIFIER , "ux" , str) + @test ts[13] ~ (T.RPAREN , ")" , str) + @test ts[14] ~ (T.DQUOTE , "\"" , str) + @test ts[15] ~ (T.WHITESPACE , " " , str) + @test ts[16] ~ (T.ENDMARKER , "" , str) end @testset "var\"...\" disabled in interpolations" begin - ts = collect(tokenize(""" "\$var"x" " """)) - @test ts[ 1] ~ (T.WHITESPACE , " " ) - @test ts[ 2] ~ (T.DQUOTE , "\"" ) - @test ts[ 3] ~ (T.EX_OR , "\$" ) - @test ts[ 4] ~ (T.VAR , "var" ) - @test ts[ 5] ~ (T.DQUOTE , "\"" ) - @test ts[ 6] ~ (T.IDENTIFIER , "x" ) - @test ts[ 7] ~ (T.DQUOTE , "\"" ) - @test ts[ 8] ~ (T.STRING , " " ) - @test ts[ 9] ~ (T.DQUOTE , "\"" ) - @test ts[10] ~ (T.WHITESPACE , " " ) - @test ts[11] ~ (T.ENDMARKER , "" ) + str = """ "\$var"x" " """ + ts = collect(tokenize(str)) + @test ts[ 1] ~ (T.WHITESPACE , " " , str) + @test ts[ 2] ~ (T.DQUOTE , "\"" , str) + @test ts[ 3] ~ (T.EX_OR , "\$" , str) + @test ts[ 4] ~ (T.VAR , "var" , str) + @test ts[ 5] ~ (T.DQUOTE , "\"" , str) + @test ts[ 6] ~ (T.IDENTIFIER , "x" , str) + @test ts[ 7] ~ (T.DQUOTE , "\"" , str) + @test ts[ 8] ~ (T.STRING , " " , str) + @test ts[ 9] ~ (T.DQUOTE , "\"" , str) + @test ts[10] ~ (T.WHITESPACE , " " , str) + @test ts[11] ~ (T.ENDMARKER , "" , str) end @testset "invalid chars after identifier" begin - ts = collect(tokenize(""" "\$x෴" """)) - @test ts[4] ~ (T.IDENTIFIER , "x" ) - @test ts[5] ~ (T.ERROR , "" ) - @test ts[6] ~ (T.STRING , "෴" ) + str = """ "\$x෴" """ + ts = collect(tokenize(str)) + @test ts[4] ~ (T.IDENTIFIER , "x" , str) + @test ts[5] ~ (T.ERROR , "" , str) + @test ts[6] ~ (T.STRING , "෴" , str) @test ts[5].token_error == Tokens.INVALID_INTERPOLATION_TERMINATOR end end @@ -493,8 +510,6 @@ end @testset "inferred" begin l = tokenize("abc") @inferred Tokenize.Lexers.next_token(l) - l = tokenize("abc", Tokens.RawToken) - @inferred Tokenize.Lexers.next_token(l) end @testset "modifying function names (!) followed by operator" begin @@ -671,7 +686,7 @@ for op in ops if expr isa Expr && (expr.head != :error && expr.head != :incomplete) tokens = collect(tokenize(str)) exop = expr.head == :call ? expr.args[1] : expr.head - @test Symbol(Tokenize.Tokens.untokenize(tokens[arity == 1 ? 1 : 3])) == exop + @test Symbol(Tokenize.Tokens.untokenize(tokens[arity == 1 ? 1 : 3], str)) == exop else break end @@ -701,11 +716,6 @@ end @test tok("outer", 1).kind==T.OUTER end -@testset "dot startpos" begin - @test Tokenize.Tokens.startpos(tok("./")) == (1,1) - @test Tokenize.Tokens.startbyte(tok(".≤")) == 0 -end - @testset "token errors" begin @test tok("1.2e2.3",1).token_error === Tokens.INVALID_NUMERIC_CONSTANT @test tok("1.2.",1).token_error === Tokens.INVALID_NUMERIC_CONSTANT @@ -719,20 +729,20 @@ end @testset "hat suffix" begin @test tok("ŝ", 1).kind==Tokens.IDENTIFIER - @test untokenize(collect(tokenize("ŝ", Tokens.RawToken))[1], "ŝ") == "ŝ" + @test untokenize(collect(tokenize("ŝ"))[1], "ŝ") == "ŝ" end @testset "suffixed op" begin s = "+¹" @test Tokens.isoperator(tok(s, 1).kind) - @test untokenize(collect(tokenize(s, Tokens.RawToken))[1], s) == s + @test untokenize(collect(tokenize(s))[1], s) == s end @testset "invalid float juxt" begin s = "1.+2" @test tok(s, 1).kind == Tokens.ERROR @test Tokens.isoperator(tok(s, 2).kind) - @test (t->t.val=="1234." && t.kind == Tokens.ERROR )(tok("1234.+1")) # requires space before '.' + test_roundtrip("1234.+1", Tokens.ERROR, "1234.") @test tok("1.+ ").kind == Tokens.ERROR @test tok("1.⤋").kind == Tokens.ERROR @test tok("1.?").kind == Tokens.ERROR @@ -740,7 +750,7 @@ end @testset "comments" begin s = "#=# text=#" - @test length(collect(tokenize(s, Tokens.RawToken))) == 2 + @test length(collect(tokenize(s))) == 2 end @testset "invalid hexadecimal" begin @@ -750,12 +760,12 @@ end @testset "circ arrow right op" begin s = "↻" - @test collect(tokenize(s, Tokens.RawToken))[1].kind == Tokens.CIRCLE_ARROW_RIGHT + @test collect(tokenize(s))[1].kind == Tokens.CIRCLE_ARROW_RIGHT end @testset "invalid float" begin s = ".0." - @test collect(tokenize(s, Tokens.RawToken))[1].kind == Tokens.ERROR + @test collect(tokenize(s))[1].kind == Tokens.ERROR end @testset "allow prime after end" begin diff --git a/JuliaSyntax/Tokenize/test/profile.jl b/JuliaSyntax/Tokenize/test/profile.jl index fd6cd61aeb55f..154e9b7b50ddb 100644 --- a/JuliaSyntax/Tokenize/test/profile.jl +++ b/JuliaSyntax/Tokenize/test/profile.jl @@ -1,6 +1,6 @@ using Tokenize -nt = @timed @eval(collect(Tokenize.tokenize("foo + bar", Tokens.RawToken))) +nt = @timed @eval(collect(Tokenize.tokenize("foo + bar"))) println("First run took $(nt.time) seconds with $(nt.bytes/1e6) MB allocated") srcdir = joinpath(Sys.BINDIR, Base.DATAROOTDIR, "..") @@ -17,7 +17,7 @@ end let time_taken = 0.0, allocated = 0.0 for file in allfiles content = IOBuffer(read(file, String)) - nt = @timed for t in Tokenize.tokenize(content, Tokens.RawToken) end + nt = @timed for t in Tokenize.tokenize(content) end time_taken += nt.time allocated += nt.bytes end @@ -27,7 +27,7 @@ end let time_taken = 0.0, allocated = 0.0 for file in allfiles content = IOBuffer(read(file, String)) - nt = @timed for t in Tokenize.tokenize(content, Tokens.RawToken) end + nt = @timed for t in Tokenize.tokenize(content) end time_taken += nt.time allocated += nt.bytes end @@ -40,13 +40,13 @@ using PProf, Profile # warm up profiler let content = read(first(allfiles), String) - @profile collect(Tokenize.tokenize(content, Tokens.RawToken)) + @profile collect(Tokenize.tokenize(content)) end Profile.clear() for file in allfiles content = read(file, String) - @profile collect(Tokenize.tokenize(content, Tokens.RawToken)) + @profile collect(Tokenize.tokenize(content)) end pprof() diff --git a/JuliaSyntax/src/JuliaSyntax.jl b/JuliaSyntax/src/JuliaSyntax.jl index aa20aaa64ac67..a0851d873d434 100644 --- a/JuliaSyntax/src/JuliaSyntax.jl +++ b/JuliaSyntax/src/JuliaSyntax.jl @@ -12,7 +12,7 @@ include("utils.jl") # TODO: Perhaps integrate these back into Tokenize? Or maybe JuliaSyntax would # be a sensible home for the Tokenize lexer in the future? include("../Tokenize/src/Tokenize.jl") -using .Tokenize.Tokens: RawToken +using .Tokenize.Tokens: Token const TzTokens = Tokenize.Tokens include("tokens.jl") diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index a048211c559a6..e185311b789e9 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -88,7 +88,7 @@ struct SyntaxToken had_newline::Bool end -function SyntaxToken(raw::RawToken, had_whitespace, had_newline) +function SyntaxToken(raw::Token, had_whitespace, had_newline) SyntaxToken(raw.kind, raw.startbyte + 1, raw.endbyte + 1, raw.dotop, raw.suffix, had_whitespace, had_newline) end @@ -162,7 +162,7 @@ mutable struct ParseStream # the `textbuf` owner was unknown (eg, ptr,length was passed) text_root::Any # Lexer, transforming the input bytes into a token stream - lexer::Tokenize.Lexers.Lexer{IOBuffer,RawToken} + lexer::Tokenize.Lexers.Lexer{IOBuffer} # Lookahead buffer for already lexed tokens lookahead::Vector{SyntaxToken} # Parser output as an ordered sequence of ranges, parent nodes after children. @@ -181,7 +181,7 @@ mutable struct ParseStream version::VersionNumber) io = IOBuffer(text_buf) seek(io, next_byte-1) - lexer = Tokenize.Lexers.Lexer(io, RawToken) + lexer = Tokenize.Lexers.Lexer(io) # To avoid keeping track of the exact Julia development version where new # features were added or comparing prerelease strings, we treat prereleases # or dev versons as the release version using only major and minor version @@ -640,7 +640,7 @@ function build_tree(::Type{NodeType}, stream::ParseStream; # # We use start_mark rather than first_byte to determine node overlap. # This solve the following ambiguity between invisible nodes 1 and 2: - # + # # [a][b]|[...] # |--- invisible node 1 # `--- invisible node 2 @@ -705,4 +705,3 @@ textbuf(stream) = stream.textbuf first_byte(stream::ParseStream) = first_byte(first(stream.ranges)) last_byte(stream::ParseStream) = last_byte(last(stream.ranges)) any_error(stream::ParseStream) = any_error(stream.diagnostics) - diff --git a/JuliaSyntax/src/tokens.jl b/JuliaSyntax/src/tokens.jl index 9a032f7bc940e..4df8c9e398fa3 100644 --- a/JuliaSyntax/src/tokens.jl +++ b/JuliaSyntax/src/tokens.jl @@ -38,7 +38,7 @@ macro KSet_cmd(str) end kind(k::Kind) = k -kind(raw::TzTokens.RawToken) = TzTokens.exactkind(raw) +kind(raw::TzTokens.Token) = TzTokens.exactkind(raw) # Some renaming for naming consistency is_literal(k) = TzTokens.isliteral(kind(k)) @@ -126,4 +126,3 @@ TODO: Replace `untokenize()` with `Base.string()`? function untokenize(k::Kind; unique=true) get(unique ? _kind_to_str_unique : _kind_to_str, k, nothing) end - From 726dbf40f1fad8cffe910157578723212a4cf82e Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Thu, 24 Feb 2022 09:32:48 +0100 Subject: [PATCH 0406/1109] get rid of some unused code in tokenize (JuliaLang/JuliaSyntax.jl#15) --- JuliaSyntax/Tokenize/src/_precompile.jl | 4 +- JuliaSyntax/Tokenize/src/utilities.jl | 155 ------------------------ 2 files changed, 1 insertion(+), 158 deletions(-) diff --git a/JuliaSyntax/Tokenize/src/_precompile.jl b/JuliaSyntax/Tokenize/src/_precompile.jl index 406e3aee285dc..75d5eb647f54b 100644 --- a/JuliaSyntax/Tokenize/src/_precompile.jl +++ b/JuliaSyntax/Tokenize/src/_precompile.jl @@ -9,8 +9,6 @@ function _precompile_() precompile(Tokenize.Tokens.Token, ()) precompile(Tokenize.Tokens.kind, (Tokenize.Tokens.Token,)) - precompile(Tokenize.Lexers.is_cat_id_start, (Char, Int32,)) - precompile(Tokenize.Lexers.is_identifier_char, (Char,)) precompile(Tokenize.Lexers.is_identifier_start_char, (Char,)) precompile(Tokenize.Lexers.readchar, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) precompile(Tokenize.Lexers.next_token, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) @@ -68,5 +66,5 @@ function _precompile_() precompile(Tokenize.Lexers.accept_batch, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}}, typeof(Tokenize.Lexers.isdigit),)) precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}}, Char,)) - precompile(Tokenize.Lexers.readchar, (GenericIOBuffer{Array{UInt8, 1}},)) + precompile(Tokenize.Lexers.readchar, (GenericIOBuffer{Array{UInt8, 1}},)) end diff --git a/JuliaSyntax/Tokenize/src/utilities.jl b/JuliaSyntax/Tokenize/src/utilities.jl index 1c0cd3b7ebb28..71cfe5d6e139a 100644 --- a/JuliaSyntax/Tokenize/src/utilities.jl +++ b/JuliaSyntax/Tokenize/src/utilities.jl @@ -1,113 +1,7 @@ -#= -The code in here has been extracted from the JuliaParser.jl package -with license: - -The JuliaParser.jl package is licensed under the MIT "Expat" License: - -> Copyright (c) 2014: Jake Bolewski. -> -> Permission is hereby granted, free of charge, to any person obtaining -> a copy of this software and associated documentation files (the -> "Software"), to deal in the Software without restriction, including -> without limitation the rights to use, copy, modify, merge, publish, -> distribute, sublicense, and/or sell copies of the Software, and to -> permit persons to whom the Software is furnished to do so, subject to -> the following conditions: -> -> The above copyright notice and this permission notice shall be -> included in all copies or substantial portions of the Software. -> -> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -> EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -> MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -> IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -> CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -> TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE -> SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -=# - import Base.Unicode - -@inline function utf8_trailing(i) - if i < 193 - return 0 - elseif i < 225 - return 1 - elseif i < 241 - return 2 - elseif i < 249 - return 3 - elseif i < 253 - return 4 - else - return 5 - end -end - -const utf8_offset = [0x00000000 - 0x00003080 - 0x000e2080 - 0x03c82080 - 0xfa082080 - 0x82082080] -# const EOF_CHAR = convert(Char,typemax(UInt32)) const EOF_CHAR = typemax(Char) - -function is_cat_id_start(ch::Char, cat::Integer) - c = UInt32(ch) - return (cat == Unicode.UTF8PROC_CATEGORY_LU || cat == Unicode.UTF8PROC_CATEGORY_LL || - cat == Unicode.UTF8PROC_CATEGORY_LT || cat == Unicode.UTF8PROC_CATEGORY_LM || - cat == Unicode.UTF8PROC_CATEGORY_LO || cat == Unicode.UTF8PROC_CATEGORY_NL || - cat == Unicode.UTF8PROC_CATEGORY_SC || # allow currency symbols - cat == Unicode.UTF8PROC_CATEGORY_SO || # other symbols - - # math symbol (category Sm) whitelist - (c >= 0x2140 && c <= 0x2a1c && - ((c >= 0x2140 && c <= 0x2144) || # ⅀, ⅁, ⅂, ⅃, ⅄ - c == 0x223f || c == 0x22be || c == 0x22bf || # ∿, ⊾, ⊿ - c == 0x22a4 || c == 0x22a5 || # ⊤ ⊥ - - (c >= 0x2202 && c <= 0x2233 && - (c == 0x2202 || c == 0x2205 || c == 0x2206 || # ∂, ∅, ∆ - c == 0x2207 || c == 0x220e || c == 0x220f || # ∇, ∎, ∏ - c == 0x2210 || c == 0x2211 || # ∐, ∑ - c == 0x221e || c == 0x221f || # ∞, ∟ - c >= 0x222b)) || # ∫, ∬, ∭, ∮, ∯, ∰, ∱, ∲, ∳ - - (c >= 0x22c0 && c <= 0x22c3) || # N-ary big ops: ⋀, ⋁, ⋂, ⋃ - (c >= 0x25F8 && c <= 0x25ff) || # ◸, ◹, ◺, ◻, ◼, ◽, ◾, ◿ - - (c >= 0x266f && - (c == 0x266f || c == 0x27d8 || c == 0x27d9 || # ♯, ⟘, ⟙ - (c >= 0x27c0 && c <= 0x27c1) || # ⟀, ⟁ - (c >= 0x29b0 && c <= 0x29b4) || # ⦰, ⦱, ⦲, ⦳, ⦴ - (c >= 0x2a00 && c <= 0x2a06) || # ⨀, ⨁, ⨂, ⨃, ⨄, ⨅, ⨆ - (c >= 0x2a09 && c <= 0x2a16) || # ⨉, ⨊, ⨋, ⨌, ⨍, ⨎, ⨏, ⨐, ⨑, ⨒, - # ⨓, ⨔, ⨕, ⨖ - c == 0x2a1b || c == 0x2a1c)))) || # ⨛, ⨜ - - (c >= 0x1d6c1 && # variants of \nabla and \partial - (c == 0x1d6c1 || c == 0x1d6db || - c == 0x1d6fb || c == 0x1d715 || - c == 0x1d735 || c == 0x1d74f || - c == 0x1d76f || c == 0x1d789 || - c == 0x1d7a9 || c == 0x1d7c3)) || - - # super- and subscript +-=() - (c >= 0x207a && c <= 0x207e) || - (c >= 0x208a && c <= 0x208e) || - - # angle symbols - (c >= 0x2220 && c <= 0x2222) || # ∠, ∡, ∢ - (c >= 0x299b && c <= 0x29af) || # ⦛, ⦜, ⦝, ⦞, ⦟, ⦠, ⦡, ⦢, ⦣, ⦤, ⦥, - # ⦦, ⦧, ⦨, ⦩, ⦪, ⦫, ⦬, ⦭, ⦮, ⦯ - # Other_ID_Start - c == 0x2118 || c == 0x212E || # ℘, ℮ - (c >= 0x309B && c <= 0x309C)) # katakana-hiragana sound marks -end - function is_identifier_char(c::Char) c == EOF_CHAR && return false return Base.is_id_char(c) @@ -145,55 +39,6 @@ function is_never_id_char(ch::Char) ) end -function peekchar(io::Base.GenericIOBuffer) - if !io.readable || io.ptr > io.size - return EOF_CHAR - end - ch, _ = readutf(io) - return ch -end - -function readutf(io, offset = 0) - ch = convert(UInt8, io.data[io.ptr + offset]) - if ch < 0x80 - return convert(Char, ch), 0 - end - trailing = utf8_trailing(ch + 1) - c::UInt32 = 0 - for j = 1:trailing - c += ch - c <<= 6 - ch = convert(UInt8, io.data[io.ptr + j + offset]) - end - c += ch - c -= utf8_offset[trailing + 1] - return convert(Char, c), trailing -end - -function dpeekchar(io::IOBuffer) - if !io.readable || io.ptr > io.size - return EOF_CHAR, EOF_CHAR - end - ch1, trailing = readutf(io) - offset = trailing + 1 - - if io.ptr + offset > io.size - return ch1, EOF_CHAR - end - ch2, _ = readutf(io, offset) - - return ch1, ch2 -end - -# this implementation is copied from Base -peekchar(s::IOStream) = begin - _CHTMP = Ref{Char}() - if ccall(:ios_peekutf8, Int32, (Ptr{Nothing}, Ptr{Char}), s, _CHTMP) < 0 - return EOF_CHAR - end - return _CHTMP[] -end - eof(io::IO) = Base.eof(io) eof(c::Char) = c === EOF_CHAR From 0959f1e2e1f75fab20f8a29f84400c0b5d58f217 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Tue, 1 Mar 2022 15:35:14 +1000 Subject: [PATCH 0407/1109] Add skip_whitespace to peek() functions This allows whitespace to be inspected in some special cases. --- JuliaSyntax/src/parse_stream.jl | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index e185311b789e9..5bf4a9a15892c 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -294,15 +294,16 @@ function _lookahead_index(stream::ParseStream, n::Integer, skip_newlines::Bool) end """ - peek(stream [, n=1]) + peek(stream [, n=1]; skip_newlines=false) Look ahead in the stream `n` tokens, returning the token kind. Comments and non-newline whitespace are skipped automatically. Whitespace containing a single newline is returned as kind `K"NewlineWs"` unless `skip_newlines` is true. """ -function Base.peek(stream::ParseStream, n::Integer=1; skip_newlines::Bool=false) - kind(peek_token(stream, n; skip_newlines=skip_newlines)) +function Base.peek(stream::ParseStream, n::Integer=1; + skip_newlines::Bool=false, skip_whitespace=true) + kind(peek_token(stream, n; skip_newlines=skip_newlines, skip_whitespace=skip_whitespace)) end """ @@ -310,12 +311,17 @@ end Like `peek`, but return the full token information rather than just the kind. """ -function peek_token(stream::ParseStream, n::Integer=1; skip_newlines=false) +function peek_token(stream::ParseStream, n::Integer=1; + skip_newlines=false, skip_whitespace=true) stream.peek_count += 1 if stream.peek_count > 100_000 error("The parser seems stuck at byte $(stream.next_byte)") end - stream.lookahead[_lookahead_index(stream, n, skip_newlines)] + i = _lookahead_index(stream, n, skip_newlines) + if !skip_whitespace + i = 1 + end + return stream.lookahead[i] end function _peek_behind_fields(ranges, i) From 47b56d7ec9b60e4fd22af5cceb8fd01f432fd38d Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Tue, 1 Mar 2022 16:15:41 +1000 Subject: [PATCH 0408/1109] Remove need for storing newline flag on SyntaxToken --- JuliaSyntax/src/parse_stream.jl | 10 +++------- JuliaSyntax/src/parser.jl | 3 ++- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index 5bf4a9a15892c..3903ef6534a2e 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -85,12 +85,11 @@ struct SyntaxToken is_dotted::Bool is_suffixed::Bool had_whitespace::Bool - had_newline::Bool end -function SyntaxToken(raw::Token, had_whitespace, had_newline) +function SyntaxToken(raw::Token, had_whitespace) SyntaxToken(raw.kind, raw.startbyte + 1, raw.endbyte + 1, raw.dotop, raw.suffix, - had_whitespace, had_newline) + had_whitespace) end function Base.show(io::IO, tok::SyntaxToken) @@ -255,15 +254,12 @@ end # but this is not a big problem. function _buffer_lookahead_tokens(stream::ParseStream) had_whitespace = false - had_newline = false while true raw = Tokenize.Lexers.next_token(stream.lexer) k = TzTokens.exactkind(raw) was_whitespace = k in (K"Whitespace", K"Comment", K"NewlineWs") - was_newline = k == K"NewlineWs" had_whitespace |= was_whitespace - had_newline |= was_newline - push!(stream.lookahead, SyntaxToken(raw, had_whitespace, had_newline)) + push!(stream.lookahead, SyntaxToken(raw, had_whitespace)) if !was_whitespace break end diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 6fca238513970..5d5f17942f952 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -825,6 +825,7 @@ function parse_range(ps::ParseState) end n_colons += 1 bump(ps, n_colons == 1 ? EMPTY_FLAGS : TRIVIA_FLAG) + had_newline = peek(ps, skip_newlines=false) == K"NewlineWs" t = peek_token(ps) if is_closing_token(ps, kind(t)) # 1: } ==> (call-i 1 : (error)) @@ -835,7 +836,7 @@ function parse_range(ps::ParseState) emit_diagnostic(ps, error="found unexpected closing token") return end - if t.had_newline + if had_newline # Error message for people coming from python # 1:\n2 ==> (call-i 1 : (error)) emit_diagnostic(ps, whitespace=true, From 579b665b0bf9726ce7b716ba38a97e53f707f7ab Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Tue, 1 Mar 2022 17:37:12 +1000 Subject: [PATCH 0409/1109] Better kind()/flags() API + compactify SyntaxToken flags Define the combination of head/kind/flags functions as a more formal API - many syntax nodes and token types need these. On top of this we can define various predicates such as `is_trivia` in one place rather than having multiple definitions of these functions. --- JuliaSyntax/src/green_tree.jl | 4 -- JuliaSyntax/src/parse_stream.jl | 70 ++++++++++++++++++++++----------- JuliaSyntax/src/parser.jl | 58 +++++++++++++-------------- JuliaSyntax/src/syntax_tree.jl | 10 ----- 4 files changed, 75 insertions(+), 67 deletions(-) diff --git a/JuliaSyntax/src/green_tree.jl b/JuliaSyntax/src/green_tree.jl index 4852dcd7adee0..14fb0a70f3456 100644 --- a/JuliaSyntax/src/green_tree.jl +++ b/JuliaSyntax/src/green_tree.jl @@ -63,10 +63,6 @@ children(node::GreenNode) = node.args span(node::GreenNode) = node.span head(node::GreenNode) = node.head -# Predicates -is_trivia(node::GreenNode) = is_trivia(node.head) -is_error(node::GreenNode) = is_error(node.head) - Base.summary(node::GreenNode) = summary(node.head) # Pretty printing diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index 3903ef6534a2e..924f5704a554f 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -1,7 +1,9 @@ #------------------------------------------------------------------------------- # Flags hold auxilary information about tokens/nonterminals which the Kind # doesn't capture in a nice way. -const RawFlags = UInt32 +# +# TODO: Use `primitive type SyntaxFlags 16 end` rather than an alias? +const RawFlags = UInt16 const EMPTY_FLAGS = RawFlags(0) const TRIVIA_FLAG = RawFlags(1<<0) # Some of the following flags are head-specific and could probably be allowed @@ -17,7 +19,11 @@ const RAW_STRING_FLAG = RawFlags(1<<4) const TRY_CATCH_AFTER_FINALLY_FLAG = RawFlags(1<<5) # Flags holding the dimension of an nrow or other UInt8 not held in the source const NUMERIC_FLAGS = RawFlags(RawFlags(0xff)<<8) -# Todo ERROR_FLAG = 0x80000000 ? +# Todo ERROR_FLAG = 0x8000 ? + +## Flags for tokens (may overlap with the flags allocated for syntax above) +const SUFFIXED_FLAG = RawFlags(1<<6) +const PRECEDING_WHITESPACE_FLAG = RawFlags(1<<7) function set_numeric_flags(n::Integer) f = RawFlags((n << 8) & NUMERIC_FLAGS) @@ -31,6 +37,10 @@ function numeric_flags(f::RawFlags) Int((f >> 8) % UInt8) end +function remove_flags(n::RawFlags, fs...) + RawFlags(n & ~(RawFlags((|)(fs...)))) +end + # Return true if any of `test_flags` are set has_flags(flags::RawFlags, test_flags) = (flags & test_flags) != 0 @@ -42,13 +52,6 @@ end kind(head::SyntaxHead) = head.kind flags(head::SyntaxHead) = head.flags -has_flags(head::SyntaxHead, test_flags) = has_flags(flags(head), test_flags) - -is_trivia(head::SyntaxHead) = has_flags(head, TRIVIA_FLAG) -is_infix(head::SyntaxHead) = has_flags(head, INFIX_FLAG) -is_dotted(head::SyntaxHead) = has_flags(head, DOTOP_FLAG) -numeric_flags(head::SyntaxHead) = numeric_flags(flags(head)) -is_error(head::SyntaxHead) = kind(head) == K"error" function Base.summary(head::SyntaxHead) untokenize(head, unique=false, include_flag_suff=false) @@ -59,55 +62,74 @@ function untokenize(head::SyntaxHead; unique=true, include_flag_suff=true) if is_dotted(head) str = "."*str end - if include_flag_suff && flags(head) ∉ (EMPTY_FLAGS, DOTOP_FLAG) + f = flags(head) + # Ignore some flags: + # - DOTOP_FLAG is represented with . prefix + # - PRECEDING_WHITESPACE_FLAG relates to the environment of this token + f &= ~(DOTOP_FLAG | PRECEDING_WHITESPACE_FLAG) + suffix_flags = remove_flags(flags(head), DOTOP_FLAG, PRECEDING_WHITESPACE_FLAG) + if include_flag_suff && suffix_flags != EMPTY_FLAGS str = str*"-" is_trivia(head) && (str = str*"t") is_infix(head) && (str = str*"i") has_flags(head, TRIPLE_STRING_FLAG) && (str = str*"s") has_flags(head, RAW_STRING_FLAG) && (str = str*"r") has_flags(head, TRY_CATCH_AFTER_FINALLY_FLAG) && (str = str*"f") + is_suffixed(head) && (str = str*"S") n = numeric_flags(head) n != 0 && (str = str*string(n)) end str end +#------------------------------------------------------------------------------- +# Generic interface for types `T` which have kind and flags: +# 1. Define kind(::T) and flags(::T) directly +# 2. Define head(::T) to return a type like `SyntaxKind` for which `kind` and +# `flags` are defined +kind(x) = kind(head(x)) +flags(x) = flags(head(x)) + +# Predicates based on kind() / flags() +is_error(x) = kind(x) == K"error" +has_flags(x, test_flags) = has_flags(flags(x), test_flags) +is_trivia(x) = has_flags(x, TRIVIA_FLAG) +is_infix(x) = has_flags(x, INFIX_FLAG) +is_dotted(x) = has_flags(x, DOTOP_FLAG) +is_suffixed(x) = has_flags(x, SUFFIXED_FLAG) +preceding_whitespace(x) = has_flags(x, PRECEDING_WHITESPACE_FLAG) +numeric_flags(x) = numeric_flags(flags(x)) + #------------------------------------------------------------------------------- """ `SyntaxToken` is a token covering a contiguous byte range in the input text. Information about preceding whitespace is added for use by the parser. """ struct SyntaxToken - kind::Kind + head::SyntaxHead first_byte::UInt32 last_byte::UInt32 - # Flags for leading whitespace - is_dotted::Bool - is_suffixed::Bool - had_whitespace::Bool end function SyntaxToken(raw::Token, had_whitespace) - SyntaxToken(raw.kind, raw.startbyte + 1, raw.endbyte + 1, raw.dotop, raw.suffix, - had_whitespace) + f = EMPTY_FLAGS + had_whitespace && (f |= PRECEDING_WHITESPACE_FLAG) + raw.dotop && (f |= DOTOP_FLAG) + raw.suffix && (f |= SUFFIXED_FLAG) + SyntaxToken(SyntaxHead(raw.kind, f), raw.startbyte + 1, raw.endbyte + 1) end function Base.show(io::IO, tok::SyntaxToken) - range = string(lpad(first_byte(tok), 3), ":", rpad(last_byte(tok), 3)) - print(io, rpad(range, 17, " "), rpad(kind(tok), 15, " ")) + print(io, untokenize(tok.head, unique=false), " @", first_byte(tok)) end -kind(tok::SyntaxToken) = tok.kind -flags(tok::SyntaxToken) = tok.is_dotted ? DOTOP_FLAG : EMPTY_FLAGS +head(tok::SyntaxToken) = tok.head first_byte(tok::SyntaxToken) = tok.first_byte last_byte(tok::SyntaxToken) = tok.last_byte span(tok::SyntaxToken) = last_byte(tok) - first_byte(tok) + 1 -is_dotted(tok::SyntaxToken) = tok.is_dotted -is_suffixed(tok::SyntaxToken) = tok.is_suffixed is_decorated(tok::SyntaxToken) = is_dotted(tok) || is_suffixed(tok) -Base.:(==)(tok::SyntaxToken, k::Kind) = (kind(tok) == k && !is_decorated(tok)) #------------------------------------------------------------------------------- diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 5d5f17942f952..831b797dc1d8a 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -184,7 +184,7 @@ end # flisp: disallow-space function bump_disallowed_space(ps) - if peek_token(ps).had_whitespace + if preceding_whitespace(peek_token(ps)) bump_trivia(ps, TRIVIA_FLAG, skip_newlines=false, error="whitespace is not allowed here") end @@ -561,7 +561,7 @@ function parse_assignment_with_initial_ex(ps::ParseState, mark, down, equals_is_ return NO_POSITION end if k == K"~" - if ps.space_sensitive && !peek_token(ps, 2).had_whitespace + if ps.space_sensitive && !preceding_whitespace(peek_token(ps, 2)) # Unary ~ in space sensitive context is not assignment precedence # [a ~b] ==> (hcat a (call ~ b)) return NO_POSITION @@ -626,21 +626,21 @@ function parse_cond(ps::ParseState) if kind(t) != K"?" return end - if !t.had_whitespace + if !preceding_whitespace(t) # a? b : c => (if a (error-t) b c) bump_invisible(ps, K"error", TRIVIA_FLAG, error="space required before `?` operator") end bump(ps, TRIVIA_FLAG) # ? t = peek_token(ps) - if !t.had_whitespace + if !preceding_whitespace(t) # a ?b : c bump_invisible(ps, K"error", TRIVIA_FLAG, error="space required after `?` operator") end parse_eq_star(ParseState(ps, range_colon_enabled=false)) t = peek_token(ps) - if !t.had_whitespace + if !preceding_whitespace(t) # a ? b: c ==> (if a [ ] [?] [ ] b (error-t) [:] [ ] c) bump_invisible(ps, K"error", TRIVIA_FLAG, error="space required before `:` in `?` expression") @@ -652,7 +652,7 @@ function parse_cond(ps::ParseState) bump_invisible(ps, K"error", TRIVIA_FLAG, error="`:` expected in `?` expression") end t = peek_token(ps) - if !t.had_whitespace + if !preceding_whitespace(t) # a ? b :c ==> (if a [ ] [?] [ ] b [ ] [:] (error-t) c) bump_invisible(ps, K"error", TRIVIA_FLAG, error="space required after `:` in `?` expression") @@ -799,15 +799,15 @@ function parse_range(ps::ParseState) n_colons = 0 while peek(ps) == K":" if ps.space_sensitive && - peek_token(ps).had_whitespace && - !peek_token(ps, 2).had_whitespace + preceding_whitespace(peek_token(ps)) && + !preceding_whitespace(peek_token(ps, 2)) # Tricky cases in space sensitive mode # [1 :a] ==> (hcat 1 (quote a)) # [1 2:3 :a] ==> (hcat 1 (call-i 2 : 3) (quote a)) break end t2 = peek_token(ps,2) - if kind(t2) in KSet`< >` && !t2.had_whitespace + if kind(t2) in KSet`< >` && !preceding_whitespace(t2) # Error heuristic: we found `:>` or `:<` which are invalid lookalikes # for `<:` and `>:`. Attempt to recover by treating them as a # comparison operator. @@ -887,9 +887,9 @@ function parse_with_chains(ps::ParseState, down, is_op, chain_ops) mark = position(ps) down(ps) while (t = peek_token(ps); is_op(kind(t))) - if ps.space_sensitive && t.had_whitespace && + if ps.space_sensitive && preceding_whitespace(t) && is_both_unary_and_binary(t) && - !peek_token(ps, 2).had_whitespace + !preceding_whitespace(peek_token(ps, 2)) # The following is two elements of a hcat # [x +y] ==> (hcat x (call + y)) # [x+y +z] ==> (hcat (call-i x + y) (call + z)) @@ -917,9 +917,9 @@ end # flisp: parse-chain function parse_chain(ps::ParseState, down, op_kind) while (t = peek_token(ps); kind(t) == op_kind && !is_decorated(t)) - if ps.space_sensitive && t.had_whitespace && + if ps.space_sensitive && preceding_whitespace(t) && is_both_unary_and_binary(t) && - !peek_token(ps, 2).had_whitespace + !preceding_whitespace(peek_token(ps, 2)) # [x +y] ==> (hcat x (call + y)) break end @@ -1024,7 +1024,7 @@ function is_juxtapose(ps, prev_k, t) # x' y ==> x # x 'y ==> x - return !t.had_whitespace && + return !preceding_whitespace(t) && (is_number(prev_k) || (!is_number(k) && # disallow "x.3" and "sqrt(2)2" k != K"@" && # disallow "x@time" @@ -1098,7 +1098,7 @@ function parse_unary(ps::ParseState) end if k in KSet`- +` t2 = peek_token(ps, 2) - if !t2.had_whitespace && kind(t2) in KSet`Integer Float` + if !preceding_whitespace(t2) && kind(t2) in KSet`Integer Float` k3 = peek(ps, 3) if is_prec_power(k3) || k3 in KSet`[ {` # `[`, `{` (issue #18851) and `^` have higher precedence than @@ -1190,7 +1190,7 @@ function parse_unary_call(ps::ParseState) # The precedence between unary + and any following infix ^ depends on # whether the parens are a function call or not if is_call - if t2.had_whitespace + if preceding_whitespace(t2) # Whitespace not allowed before prefix function call bracket # + (a,b) ==> (call + (error) a b) reset_node!(ps, ws_error_pos, kind=K"error") @@ -1392,7 +1392,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) this_iter_valid_macroname = false t = peek_token(ps) k = kind(t) - if is_macrocall && (t.had_whitespace || is_closing_token(ps, k)) + if is_macrocall && (preceding_whitespace(t) || is_closing_token(ps, k)) # Macro calls with space-separated arguments # @foo a b ==> (macrocall @foo a b) # @foo (x) ==> (macrocall @foo x) @@ -1427,7 +1427,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) emit(ps, mark, K"macrocall") end break - elseif (ps.space_sensitive && t.had_whitespace && + elseif (ps.space_sensitive && preceding_whitespace(t) && k in KSet`( [ { \ Char " """ \` \`\`\``) # [f (x)] ==> (hcat f x) # [f "x"] ==> (hcat f "x") @@ -1605,7 +1605,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) emit(ps, mark, K"curly") end elseif k in KSet` " """ \` \`\`\` ` && - !t.had_whitespace && valid_macroname + !preceding_whitespace(t) && valid_macroname # Custom string and command literals # x"str" ==> (macrocall @x_str "str") # x`str` ==> (macrocall @x_cmd "str") @@ -1623,7 +1623,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) parse_string(ps, true) t = peek_token(ps) k = kind(t) - if !t.had_whitespace && (k == K"Identifier" || is_keyword(k) || is_word_operator(k) || is_number(k)) + if !preceding_whitespace(t) && (k == K"Identifier" || is_keyword(k) || is_word_operator(k) || is_number(k)) # Macro sufficies can include keywords and numbers # x"s"y ==> (macrocall @x_str "s" "y") # x"s"end ==> (macrocall @x_str "s" "end") @@ -2248,7 +2248,7 @@ function parse_imports(ps::ParseState) k = kind(t) has_import_prefix = false # true if we have `prefix:` in `import prefix: stuff` has_comma = false - if k == K":" && !t.had_whitespace + if k == K":" && !preceding_whitespace(t) bump(ps, TRIVIA_FLAG) has_import_prefix = true if initial_as @@ -2368,7 +2368,7 @@ function parse_import_path(ps::ParseState) # path, not operators # import A.== ==> (import (. A ==)) # import A.⋆.f ==> (import (. A ⋆ f)) - if t.had_whitespace + if preceding_whitespace(t) # Whitespace in import path allowed but discouraged # import A .== ==> (import (. A ==)) emit_diagnostic(ps, whitespace=true, @@ -2537,7 +2537,7 @@ end # flisp: parse-generator function parse_generator(ps::ParseState, mark, flatten=false) t = peek_token(ps) - if !t.had_whitespace + if !preceding_whitespace(t) # [(x)for x in xs] ==> (comprehension (generator x (error) (= x xs))) bump_invisible(ps, K"error", TRIVIA_FLAG, error="Expected space before `for` in generator") @@ -2707,7 +2707,7 @@ function parse_array_separator(ps, array_order) if kind(t) != K";" break end - if t.had_whitespace + if preceding_whitespace(t) bump_disallowed_space(ps) end n_semis += 1 @@ -2751,7 +2751,7 @@ function parse_array_separator(ps, array_order) bump(ps, TRIVIA_FLAG, error="unexpected comma in array expression") return (1, -1) else - if t.had_whitespace && !is_closing_token(ps, k) + if preceding_whitespace(t) && !is_closing_token(ps, k) if array_order[] === :column_major # Can't mix multiple ;'s and spaces #v1.7: [a ;; b c] ==> (ncat-2 a (row b (error-t) c)) @@ -3252,7 +3252,7 @@ function parse_atom(ps::ParseState, check_identifiers=true) # : foo ==> (quote (error-t) foo) t = peek_token(ps, 2) k = kind(t) - if is_closing_token(ps, k) && (!is_keyword(k) || t.had_whitespace) + if is_closing_token(ps, k) && (!is_keyword(k) || preceding_whitespace(t)) # : is a literal colon in some circumstances # :) ==> : # : end ==> : @@ -3260,7 +3260,7 @@ function parse_atom(ps::ParseState, check_identifiers=true) return end bump(ps, TRIVIA_FLAG) # K":" - if t.had_whitespace + if preceding_whitespace(t) # : a ==> (quote (error-t) a)) # === # : @@ -3306,7 +3306,7 @@ function parse_atom(ps::ParseState, check_identifiers=true) end elseif is_keyword(leading_kind) if leading_kind == K"var" && (t = peek_token(ps,2); - kind(t) == K"\"" && !t.had_whitespace) + kind(t) == K"\"" && !preceding_whitespace(t)) # var"x" ==> x # Raw mode unescaping # var"" ==> @@ -3333,7 +3333,7 @@ function parse_atom(ps::ParseState, check_identifiers=true) end t = peek_token(ps) k = kind(t) - if t.had_whitespace || is_operator(k) || + if preceding_whitespace(t) || is_operator(k) || k in KSet`( ) [ ] { } , ; @ EndMarker` # var"x"+ ==> x # var"x") ==> x diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index 383d3422f7ff2..14446ba8bb2d1 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -96,13 +96,7 @@ function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, position::In end end -is_error(node::SyntaxNode) = is_error(node.raw) -is_trivia(node::SyntaxNode) = is_trivia(node.raw) -has_flags(node::SyntaxNode, f) = has_flags(head(node), f) - head(node::SyntaxNode) = head(node.raw) -kind(node::SyntaxNode) = kind(node.raw) -flags(node::SyntaxNode) = flags(node.raw) haschildren(node::SyntaxNode) = !node.is_leaf children(node::SyntaxNode) = haschildren(node) ? node.val::Vector{SyntaxNode} : () @@ -190,10 +184,6 @@ end #------------------------------------------------------------------------------- # Tree utilities -kind(node) = kind(head(node)) -flags(node) = flags(head(node)) -is_infix(node) = is_infix(head(node)) - """ child(node, i1, i2, ...) From fc20caa666ead7fdf8c69fbf73aaf84c539a4ab0 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Tue, 1 Mar 2022 21:52:58 +1000 Subject: [PATCH 0410/1109] Fix EMPTY_TOKEN after the RawToken->Token transition The token_type() function no longer exists --- JuliaSyntax/Tokenize/src/lexer.jl | 16 ++++++++-------- JuliaSyntax/Tokenize/src/token.jl | 4 +--- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/JuliaSyntax/Tokenize/src/lexer.jl b/JuliaSyntax/Tokenize/src/lexer.jl index 86ba9050cd578..0cfd9d749f49c 100644 --- a/JuliaSyntax/Tokenize/src/lexer.jl +++ b/JuliaSyntax/Tokenize/src/lexer.jl @@ -524,7 +524,7 @@ function lex_comment(l::Lexer, doemit=true) while true pc = peekchar(l) if pc == '\n' || eof(pc) - return doemit ? emit(l, Tokens.COMMENT) : EMPTY_TOKEN(token_type(l)) + return doemit ? emit(l, Tokens.COMMENT) : EMPTY_TOKEN end readchar(l) end @@ -534,7 +534,7 @@ function lex_comment(l::Lexer, doemit=true) n_start, n_end = 1, 0 while true if eof(c) - return doemit ? emit_error(l, Tokens.EOF_MULTICOMMENT) : EMPTY_TOKEN(token_type(l)) + return doemit ? emit_error(l, Tokens.EOF_MULTICOMMENT) : EMPTY_TOKEN end nc = readchar(l) if c == '#' && nc == '=' @@ -543,7 +543,7 @@ function lex_comment(l::Lexer, doemit=true) n_end += 1 end if n_start == n_end - return doemit ? emit(l, Tokens.COMMENT) : EMPTY_TOKEN(token_type(l)) + return doemit ? emit(l, Tokens.COMMENT) : EMPTY_TOKEN end pc = c c = nc @@ -852,25 +852,25 @@ function lex_prime(l, doemit = true) else if accept(l, '\'') if accept(l, '\'') - return doemit ? emit(l, Tokens.CHAR) : EMPTY_TOKEN(token_type(l)) + return doemit ? emit(l, Tokens.CHAR) : EMPTY_TOKEN else # Empty char literal # Arguably this should be an error here, but we generally # look at the contents of the char literal in the parser, # so we defer erroring until there. - return doemit ? emit(l, Tokens.CHAR) : EMPTY_TOKEN(token_type(l)) + return doemit ? emit(l, Tokens.CHAR) : EMPTY_TOKEN end end while true c = readchar(l) if eof(c) - return doemit ? emit_error(l, Tokens.EOF_CHAR) : EMPTY_TOKEN(token_type(l)) + return doemit ? emit_error(l, Tokens.EOF_CHAR) : EMPTY_TOKEN elseif c == '\\' if eof(readchar(l)) - return doemit ? emit_error(l, Tokens.EOF_CHAR) : EMPTY_TOKEN(token_type(l)) + return doemit ? emit_error(l, Tokens.EOF_CHAR) : EMPTY_TOKEN end elseif c == '\'' - return doemit ? emit(l, Tokens.CHAR) : EMPTY_TOKEN(token_type(l)) + return doemit ? emit(l, Tokens.CHAR) : EMPTY_TOKEN end end end diff --git a/JuliaSyntax/Tokenize/src/token.jl b/JuliaSyntax/Tokenize/src/token.jl index f7d2ea3b16f44..adef857722fe3 100644 --- a/JuliaSyntax/Tokenize/src/token.jl +++ b/JuliaSyntax/Tokenize/src/token.jl @@ -67,9 +67,7 @@ function Token(kind::Kind, startbyte::Int, endbyte::Int) end Token() = Token(ERROR, 0, 0, UNKNOWN, false, false) - -const _EMPTY_RAWTOKEN = Token() -EMPTY_TOKEN(::Type{Token}) = _EMPTY_RAWTOKEN +const EMPTY_TOKEN = Token() function kind(t::Token) isoperator(t.kind) && return OP From 79a7fe0019419ec9f33f60d7a667b1abd817762f Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Tue, 1 Mar 2022 21:55:05 +1000 Subject: [PATCH 0411/1109] Remove boxed closure captures warned about by JET * Use Ref to avoid triggering boxing of captures via closure variable assignment. * Use let blocks for temporary ParseState transitions to avoid some closures. --- JuliaSyntax/src/parser.jl | 94 ++++++++++++++++++++------------------- 1 file changed, 49 insertions(+), 45 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 831b797dc1d8a..aadb4fa1f4516 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -53,10 +53,10 @@ function normal_context(ps::ParseState) whitespace_newline=false) end -function with_space_sensitive(f::Function, ps::ParseState) - f(ParseState(ps, - space_sensitive=true, - whitespace_newline=false)) +function with_space_sensitive(ps::ParseState) + ParseState(ps, + space_sensitive=true, + whitespace_newline=false) end # Convenient wrappers for ParseStream @@ -1176,20 +1176,20 @@ function parse_unary_call(ps::ParseState) mark_before_paren = position(ps) bump(ps, TRIVIA_FLAG) # ( initial_semi = peek(ps) == K";" - is_call = false - is_block = false + is_call = Ref(false) + is_block = Ref(false) parse_brackets(ps, K")") do had_commas, had_splat, num_semis, num_subexprs - is_call = had_commas || had_splat || initial_semi - is_block = !is_call && num_semis > 0 + is_call[] = had_commas || had_splat || initial_semi + is_block[] = !is_call[] && num_semis > 0 bump_closing_token(ps, K")") - return (needs_parameters=is_call, - eq_is_kw_before_semi=is_call, - eq_is_kw_after_semi=is_call) + return (needs_parameters=is_call[], + eq_is_kw_before_semi=is_call[], + eq_is_kw_after_semi=is_call[]) end # The precedence between unary + and any following infix ^ depends on # whether the parens are a function call or not - if is_call + if is_call[] if preceding_whitespace(t2) # Whitespace not allowed before prefix function call bracket # + (a,b) ==> (call + (error) a b) @@ -1211,7 +1211,7 @@ function parse_unary_call(ps::ParseState) parse_factor_with_initial_ex(ps, mark) else # Unary function calls with brackets as grouping, not an arglist - if is_block + if is_block[] # +(a;b) ==> (call + (block a b)) emit(ps, mark_before_paren, K"block") end @@ -1400,7 +1400,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) # a().@x y ==> (macrocall (error (. (call a) (quote x))) y) # [@foo "x"] ==> (vect (macrocall @foo "x")) finish_macroname(ps, mark, valid_macroname, macro_name_position) - with_space_sensitive(ps) do ps + let ps = with_space_sensitive(ps) # Space separated macro arguments # A.@foo a b ==> (macrocall (. A (quote @foo)) a b) # @A.foo a b ==> (macrocall (. A (quote @foo)) a b) @@ -1779,8 +1779,10 @@ function parse_resword(ps::ParseState) bump(ps, TRIVIA_FLAG) @check peek(ps) == K"type" bump(ps, TRIVIA_FLAG) - with_space_sensitive(parse_subtype_spec, ps) - with_space_sensitive(parse_cond, ps) + let ps = with_space_sensitive(ps) + parse_subtype_spec(ps) + parse_cond(ps) + end bump_semicolon_trivia(ps) bump_closing_token(ps, K"end") emit(ps, mark, K"primitive") @@ -1975,7 +1977,7 @@ function parse_function(ps::ParseState) word = peek(ps) @check word in KSet`macro function` is_function = word == K"function" - is_anon_func::Bool = false + is_anon_func = false bump(ps, TRIVIA_FLAG) bump_trivia(ps) @@ -2005,13 +2007,15 @@ function parse_function(ps::ParseState) # # The flisp parser disambiguates this case quite differently, # producing less consistent syntax for anonymous functions. + is_anon_func_ = Ref(is_anon_func) parse_brackets(ps, K")") do _, _, _, _ bump_closing_token(ps, K")") - is_anon_func = peek(ps) != K"(" - return (needs_parameters = is_anon_func, - eq_is_kw_before_semi = is_anon_func, - eq_is_kw_after_semi = is_anon_func) + is_anon_func_[] = peek(ps) != K"(" + return (needs_parameters = is_anon_func_[], + eq_is_kw_before_semi = is_anon_func_[], + eq_is_kw_after_semi = is_anon_func_[]) end + is_anon_func = is_anon_func_[] if is_anon_func # function (x) body end ==> (function (tuple x) (block body)) # function (x,y) end ==> (function (tuple x y) (block)) @@ -2209,8 +2213,8 @@ function parse_macro_name(ps::ParseState) # @! x ==> (macrocall @! x) # @.. x ==> (macrocall @.. x) # @$ x ==> (macrocall @$ x) - with_space_sensitive(ps) do ps1 - parse_atom(ps1, false) + let ps = with_space_sensitive(ps) + parse_atom(ps, false) end end end @@ -2263,13 +2267,14 @@ function parse_imports(ps::ParseState) # import A, y ==> (import (. A) (. y)) # import A: x, y ==> (import (: (. A) (. x) (. y))) # import A: +, == ==> (import (: (. A) (. +) (. ==))) - parse_comma_separated(ps, ps1->parse_import(ps1, word, has_import_prefix)) + has_import_prefix_ = has_import_prefix + parse_comma_separated(ps, ps1->parse_import(ps1, word, has_import_prefix_)) if peek(ps) == K":" # Error recovery # import A: x, B: y ==> (import (: (. A) (. x) (. B) (error-t (. y)))) emark = position(ps) bump(ps, TRIVIA_FLAG) - parse_comma_separated(ps, ps1->parse_import(ps1, word, has_import_prefix)) + parse_comma_separated(ps, ps1->parse_import(ps1, word, has_import_prefix_)) emit(ps, emark, K"error", TRIVIA_FLAG, error="`:` can only be used when importing a single module. Split imports into multiple lines") end @@ -2436,7 +2441,7 @@ function parse_iteration_spec(ps::ParseState) mark = position(ps) k = peek(ps) # Handle `outer` contextual keyword - with_space_sensitive(parse_pipe_lt, ps) + parse_pipe_lt(with_space_sensitive(ps)) if peek_behind(ps).orig_kind == K"outer" if peek_skip_newline_in_gen(ps) in KSet`= in ∈` # Not outer keyword @@ -2465,19 +2470,18 @@ end # flisp: parse-space-separated-exprs function parse_space_separated_exprs(ps::ParseState) - with_space_sensitive(ps) do ps - n_sep = 0 - while true - k = peek(ps) - if is_closing_token(ps, k) || k == K"NewlineWs" || - (ps.for_generator && k == K"for") - break - end - parse_eq(ps) - n_sep += 1 + ps = with_space_sensitive(ps) + n_sep = 0 + while true + k = peek(ps) + if is_closing_token(ps, k) || k == K"NewlineWs" || + (ps.for_generator && k == K"for") + break end - return n_sep + parse_eq(ps) + n_sep += 1 end + return n_sep end # like parse-arglist, but with `for` parsed as a generator @@ -2858,18 +2862,18 @@ function parse_paren(ps::ParseState, check_identifiers=true) # Deal with all other cases of tuple or block syntax via the generic # parse_brackets initial_semi = peek(ps) == K";" - is_tuple = false - is_block = false + is_tuple = Ref(false) + is_block = Ref(false) parse_brackets(ps, K")") do had_commas, had_splat, num_semis, num_subexprs - is_tuple = had_commas || (had_splat && num_semis >= 1) || + is_tuple[] = had_commas || (had_splat && num_semis >= 1) || (initial_semi && (num_semis == 1 || num_subexprs > 0)) - is_block = num_semis > 0 + is_block[] = num_semis > 0 bump_closing_token(ps, K")") - return (needs_parameters=is_tuple, + return (needs_parameters=is_tuple[], eq_is_kw_before_semi=false, - eq_is_kw_after_semi=is_tuple) + eq_is_kw_after_semi=is_tuple[]) end - if is_tuple + if is_tuple[] # Tuple syntax with commas # (x,) ==> (tuple x) # (x,y) ==> (tuple x y) @@ -2886,7 +2890,7 @@ function parse_paren(ps::ParseState, check_identifiers=true) # (a; b; c,d) ==> (tuple a (parameters b (parameters c d))) # (a=1, b=2; c=3) ==> (tuple (= a 1) (= b 2) (parameters (kw c 3))) emit(ps, mark, K"tuple") - elseif is_block + elseif is_block[] # Blocks # (;;) ==> (block) # (a=1;) ==> (block (= a 1)) From be182050a78da9a2a7fbbd59678860762b248abd Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Wed, 2 Mar 2022 12:04:40 +1000 Subject: [PATCH 0412/1109] Parser optimization: Reuse position arrays, outline error code paths --- JuliaSyntax/src/parse_stream.jl | 23 ++++++++++++++++++++++- JuliaSyntax/src/parser.jl | 11 ++++++----- 2 files changed, 28 insertions(+), 6 deletions(-) diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index 924f5704a554f..f66ca4b18d60c 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -186,6 +186,8 @@ mutable struct ParseStream lexer::Tokenize.Lexers.Lexer{IOBuffer} # Lookahead buffer for already lexed tokens lookahead::Vector{SyntaxToken} + # Pool of stream positions for use as working space in parsing + position_pool::Vector{Vector{ParseStreamPosition}} # Parser output as an ordered sequence of ranges, parent nodes after children. ranges::Vector{TaggedRange} # Parsing diagnostics (errors/warnings etc) @@ -210,6 +212,7 @@ mutable struct ParseStream # like an acceptable tradeoff. ver = (version.major, version.minor) new(text_buf, text_root, lexer, + Vector{Vector{ParseStreamPosition}}(), Vector{SyntaxToken}(), Vector{TaggedRange}(), Vector{Diagnostic}(), @@ -268,6 +271,19 @@ function show_diagnostics(io::IO, stream::ParseStream, code) show_diagnostics(io, stream.diagnostics, code) end +# We manage a pool of stream positions as parser working space +function acquire_positions(stream) + if isempty(stream.position_pool) + return Vector{ParseStreamPosition}() + end + pop!(stream.position_pool) +end + +function release_positions(stream, positions) + empty!(positions) + push!(stream.position_pool, positions) +end + #------------------------------------------------------------------------------- # Stream input interface - the peek_* family of functions @@ -311,6 +327,11 @@ function _lookahead_index(stream::ParseStream, n::Integer, skip_newlines::Bool) end end +@noinline function _parser_stuck_error(stream) + # Optimization: emit unlikely errors in a separate function + error("The parser seems stuck at byte $(stream.next_byte)") +end + """ peek(stream [, n=1]; skip_newlines=false) @@ -333,7 +354,7 @@ function peek_token(stream::ParseStream, n::Integer=1; skip_newlines=false, skip_whitespace=true) stream.peek_count += 1 if stream.peek_count > 100_000 - error("The parser seems stuck at byte $(stream.next_byte)") + _parser_stuck_error(stream) end i = _lookahead_index(stream, n, skip_newlines) if !skip_whitespace diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index aadb4fa1f4516..b14e9aa9f832a 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -2940,8 +2940,8 @@ function parse_brackets(after_parse::Function, space_sensitive=false, where_enabled=true, whitespace_newline=true) - params_marks = ParseStreamPosition[] - eq_positions = ParseStreamPosition[] + params_marks = acquire_positions(ps.stream) + eq_positions = acquire_positions(ps.stream) last_eq_before_semi = 0 num_subexprs = 0 num_semis = 0 @@ -3021,6 +3021,8 @@ function parse_brackets(after_parse::Function, emit(ps, mark, K"parameters") end end + release_positions(ps.stream, params_marks) + release_positions(ps.stream, eq_positions) end is_indentation(b::UInt8) = (b == UInt8(' ') || b == UInt8('\t')) @@ -3036,9 +3038,7 @@ function parse_string(ps::ParseState, raw::Bool) string_chunk_kind = delim_k in KSet`" """` ? K"String" : K"CmdString" indent_ref_i = 0 indent_ref_len = typemax(Int) - if triplestr - indent_chunks = Vector{ParseStreamPosition}() - end + indent_chunks = acquire_positions(ps.stream) buf = textbuf(ps) str_flags = (triplestr ? TRIPLE_STRING_FLAG : EMPTY_FLAGS) | (raw ? RAW_STRING_FLAG : EMPTY_FLAGS) @@ -3191,6 +3191,7 @@ function parse_string(ps::ParseState, raw::Bool) end end end + release_positions(ps.stream, indent_chunks) if had_end_delim if n_valid_chunks == 0 # Empty strings, or empty after triple quoted processing From 802171e90b3b8f4b618106d84f5c745c22de338b Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Wed, 2 Mar 2022 12:17:06 +1000 Subject: [PATCH 0413/1109] Reduce the size of Kind to 16 bits --- JuliaSyntax/Tokenize/src/token_kinds.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaSyntax/Tokenize/src/token_kinds.jl b/JuliaSyntax/Tokenize/src/token_kinds.jl index 8b4b0fac614b0..4dc60e060d700 100644 --- a/JuliaSyntax/Tokenize/src/token_kinds.jl +++ b/JuliaSyntax/Tokenize/src/token_kinds.jl @@ -1,4 +1,4 @@ -@enum(Kind, +@enum(Kind::UInt16, NONE, # Placeholder; never emitted by lexer ENDMARKER, # EOF ERROR, From 5626ab2426b0a89b5b73fb840532ff2677451296 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Wed, 2 Mar 2022 18:57:35 +1000 Subject: [PATCH 0414/1109] Avoid directly accessing ParseStream lookahead array in parser API --- JuliaSyntax/src/parser.jl | 1 - JuliaSyntax/src/parser_api.jl | 2 +- JuliaSyntax/test/parse_stream.jl | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index b14e9aa9f832a..761a125c1fbdb 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -1405,7 +1405,6 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) # A.@foo a b ==> (macrocall (. A (quote @foo)) a b) # @A.foo a b ==> (macrocall (. A (quote @foo)) a b) n_args = parse_space_separated_exprs(ps) - # TODO: Introduce K"doc" to make this hack less awful. is_doc_macro = peek_behind(ps, macro_name_position).orig_kind == K"doc" if is_doc_macro && n_args == 1 # Parse extended @doc args on next line diff --git a/JuliaSyntax/src/parser_api.jl b/JuliaSyntax/src/parser_api.jl index c5377f8551ece..4ae84b3129bb9 100644 --- a/JuliaSyntax/src/parser_api.jl +++ b/JuliaSyntax/src/parser_api.jl @@ -152,7 +152,7 @@ function parseall(::Type{T}, input...; rule=:toplevel, version=VERSION, end parse(stream; rule=rule) if (ignore_trivia && peek(stream, skip_newlines=true) != K"EndMarker") || - (!ignore_trivia && (peek(stream); kind(first(stream.lookahead)) != K"EndMarker")) + (!ignore_trivia && (peek(stream, skip_newlines=false, skip_whitespace=false) != K"EndMarker")) emit_diagnostic(stream, error="unexpected text after parsing $rule") end if any_error(stream.diagnostics) diff --git a/JuliaSyntax/test/parse_stream.jl b/JuliaSyntax/test/parse_stream.jl index d2e63c1a4dcea..10c021f775df4 100644 --- a/JuliaSyntax/test/parse_stream.jl +++ b/JuliaSyntax/test/parse_stream.jl @@ -6,7 +6,7 @@ using JuliaSyntax: ParseStream, peek, peek_token, bump, bump_trivia, bump_invisible, - emit, emit_diagnostic + emit, emit_diagnostic, TRIVIA_FLAG, INFIX_FLAG code = """ for i = 1:10 From 400396c78f03077f1d47273a790e2eef68ac6852 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Fri, 4 Mar 2022 15:14:16 +1000 Subject: [PATCH 0415/1109] Optimize token buffering and peek() This change implements a fast-path for token lookahead in peek() and increases the size of the lookahead buffer to make this more efficient. --- JuliaSyntax/src/parse_stream.jl | 82 ++++++++++++++++++++++++--------- 1 file changed, 61 insertions(+), 21 deletions(-) diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index f66ca4b18d60c..89550273b6e9d 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -108,15 +108,7 @@ Information about preceding whitespace is added for use by the parser. struct SyntaxToken head::SyntaxHead first_byte::UInt32 - last_byte::UInt32 -end - -function SyntaxToken(raw::Token, had_whitespace) - f = EMPTY_FLAGS - had_whitespace && (f |= PRECEDING_WHITESPACE_FLAG) - raw.dotop && (f |= DOTOP_FLAG) - raw.suffix && (f |= SUFFIXED_FLAG) - SyntaxToken(SyntaxHead(raw.kind, f), raw.startbyte + 1, raw.endbyte + 1) + last_byte::UInt32 # TODO: Remove this? end function Base.show(io::IO, tok::SyntaxToken) @@ -137,8 +129,6 @@ is_decorated(tok::SyntaxToken) = is_dotted(tok) || is_suffixed(tok) Range in the source text which will become a node in the tree. Can be either a token (leaf node of the tree) or an interior node, depending on how the start_mark compares to previous nodes. - -TODO: Optimize this data structure? It's very large at the moment. """ struct TaggedRange head::SyntaxHead # Kind,flags @@ -146,6 +136,12 @@ struct TaggedRange first_byte::UInt32 # First byte in the input text last_byte::UInt32 # Last byte in the input text start_mark::UInt32 # Index of first emitted range which this range covers + # TODO: Remove the three fields above & replace with: + # is_leaf::Bool + # # The following field is used for one of two things: + # # - For leaf nodes it points to the last byte of the token in the input text + # # - For non-leaf nodes it points to the index of the first child + # last_byte_or_first_child::UInt32 end head(range::TaggedRange) = range.head @@ -287,28 +283,72 @@ end #------------------------------------------------------------------------------- # Stream input interface - the peek_* family of functions -# Buffer up until the next non-whitespace token. -# This can buffer more than strictly necessary when newlines are significant, -# but this is not a big problem. +# Buffer several tokens ahead function _buffer_lookahead_tokens(stream::ParseStream) had_whitespace = false + token_count = 0 while true raw = Tokenize.Lexers.next_token(stream.lexer) k = TzTokens.exactkind(raw) was_whitespace = k in (K"Whitespace", K"Comment", K"NewlineWs") had_whitespace |= was_whitespace - push!(stream.lookahead, SyntaxToken(raw, had_whitespace)) - if !was_whitespace + f = EMPTY_FLAGS + had_whitespace && (f |= PRECEDING_WHITESPACE_FLAG) + raw.dotop && (f |= DOTOP_FLAG) + raw.suffix && (f |= SUFFIXED_FLAG) + push!(stream.lookahead, SyntaxToken(SyntaxHead(k, f), raw.startbyte + 1, raw.endbyte + 1)) + token_count += 1 + if k == K"EndMarker" break end + if !was_whitespace + # Buffer tokens in batches for lookahead. Generally we want a + # moderate-size buffer to make sure we hit the fast path of peek(), + # but not too large to avoid (a) polluting the processor cache and + # (b) doing unnecessary work when not parsing the whole input. + had_whitespace = false + if token_count > 100 + break + end + end end end -# Find the index of the first nontrivia token in the lookahead buffer. -# -# TODO: Store this as part of _buffer_lookahead_tokens to avoid redoing this -# work all the time! -function _lookahead_index(stream::ParseStream, n::Integer, skip_newlines::Bool) +# Find the index of the next nontrivia token +@inline function _lookahead_index(stream::ParseStream, n::Integer, skip_newlines::Bool) + # Much of the time we'll be peeking ahead a single token and have one or + # zero whitespace tokens before the next token. The following code is an + # unrolled optimized version for that fast path. Empirically it seems we + # only hit the slow path about 5% of the time here. + i = 1 + if n == 1 && i+1 <= length(stream.lookahead) + if skip_newlines + k = kind(stream.lookahead[i]) + if !(k == K"Whitespace" || k == K"Comment" || k == K"NewlineWs") + return i + end + i += 1 + k = kind(stream.lookahead[i]) + if !(k == K"Whitespace" || k == K"Comment" || k == K"NewlineWs") + return i + end + else + k = kind(stream.lookahead[i]) + if !(k == K"Whitespace" || k == K"Comment") + return i + end + i += 1 + k = kind(stream.lookahead[i]) + if !(k == K"Whitespace" || k == K"Comment") + return i + end + end + end + # Fall through to the general case + return __lookahead_index(stream, n, skip_newlines) +end + +@noinline function __lookahead_index(stream, n, skip_newlines) i = 1 while true if i > length(stream.lookahead) From 1574072149431f8bac6c81230868ddc92e24bb11 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Fri, 4 Mar 2022 16:51:47 +1000 Subject: [PATCH 0416/1109] Reduce resizing of token lookahead buffer Manually track an index into the lookahead buffer to avoid buffer resizing. (Julia's builtin array actually uses the same strategy to avoid shuffling elements in popfront!(). But an extra layer here can help as we know more about the data access.) --- JuliaSyntax/src/parse_stream.jl | 62 ++++++++++++++++++++------------- 1 file changed, 37 insertions(+), 25 deletions(-) diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index 89550273b6e9d..22bd52344e9f7 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -182,6 +182,7 @@ mutable struct ParseStream lexer::Tokenize.Lexers.Lexer{IOBuffer} # Lookahead buffer for already lexed tokens lookahead::Vector{SyntaxToken} + lookahead_index::Int # Pool of stream positions for use as working space in parsing position_pool::Vector{Vector{ParseStreamPosition}} # Parser output as an ordered sequence of ranges, parent nodes after children. @@ -207,9 +208,12 @@ mutable struct ParseStream # numbers. This means we're inexact for old dev versions but that seems # like an acceptable tradeoff. ver = (version.major, version.minor) - new(text_buf, text_root, lexer, - Vector{Vector{ParseStreamPosition}}(), + new(text_buf, + text_root, + lexer, Vector{SyntaxToken}(), + 1, + Vector{Vector{ParseStreamPosition}}(), Vector{TaggedRange}(), Vector{Diagnostic}(), next_byte, @@ -284,11 +288,11 @@ end # Stream input interface - the peek_* family of functions # Buffer several tokens ahead -function _buffer_lookahead_tokens(stream::ParseStream) +function _buffer_lookahead_tokens(lexer, lookahead) had_whitespace = false token_count = 0 while true - raw = Tokenize.Lexers.next_token(stream.lexer) + raw = Tokenize.Lexers.next_token(lexer) k = TzTokens.exactkind(raw) was_whitespace = k in (K"Whitespace", K"Comment", K"NewlineWs") had_whitespace |= was_whitespace @@ -296,7 +300,7 @@ function _buffer_lookahead_tokens(stream::ParseStream) had_whitespace && (f |= PRECEDING_WHITESPACE_FLAG) raw.dotop && (f |= DOTOP_FLAG) raw.suffix && (f |= SUFFIXED_FLAG) - push!(stream.lookahead, SyntaxToken(SyntaxHead(k, f), raw.startbyte + 1, raw.endbyte + 1)) + push!(lookahead, SyntaxToken(SyntaxHead(k, f), raw.startbyte + 1, raw.endbyte + 1)) token_count += 1 if k == K"EndMarker" break @@ -320,7 +324,7 @@ end # zero whitespace tokens before the next token. The following code is an # unrolled optimized version for that fast path. Empirically it seems we # only hit the slow path about 5% of the time here. - i = 1 + i = stream.lookahead_index if n == 1 && i+1 <= length(stream.lookahead) if skip_newlines k = kind(stream.lookahead[i]) @@ -349,15 +353,20 @@ end end @noinline function __lookahead_index(stream, n, skip_newlines) - i = 1 + i = stream.lookahead_index while true if i > length(stream.lookahead) - _buffer_lookahead_tokens(stream) + n_to_delete = stream.lookahead_index-1 + if n_to_delete > 0.9*length(stream.lookahead) + Base._deletebeg!(stream.lookahead, n_to_delete) + i -= n_to_delete + stream.lookahead_index = 1 + end + _buffer_lookahead_tokens(stream.lexer, stream.lookahead) end k = kind(stream.lookahead[i]) - is_skipped = k ∈ (K"Whitespace", K"Comment") || - (k == K"NewlineWs" && skip_newlines) - if !is_skipped + if !((k == K"Whitespace" || k == K"Comment") || + (k == K"NewlineWs" && skip_newlines)) if n == 1 return i end @@ -398,7 +407,7 @@ function peek_token(stream::ParseStream, n::Integer=1; end i = _lookahead_index(stream, n, skip_newlines) if !skip_whitespace - i = 1 + i = stream.lookahead_index end return stream.lookahead[i] end @@ -445,13 +454,13 @@ end # # Though note bump() really does both input and output -# Bump the next `n` tokens +# Bump up until the `n`th token # flags and remap_kind are applied to any non-trivia tokens -function _bump_n(stream::ParseStream, n::Integer, flags, remap_kind=K"None") - if n <= 0 +function _bump_until_n(stream::ParseStream, n::Integer, flags, remap_kind=K"None") + if n < stream.lookahead_index return end - for i = 1:n + for i in stream.lookahead_index:n tok = stream.lookahead[i] k = kind(tok) if k == K"EndMarker" @@ -465,7 +474,7 @@ function _bump_n(stream::ParseStream, n::Integer, flags, remap_kind=K"None") last_byte(tok), lastindex(stream.ranges)+1) push!(stream.ranges, range) end - Base._deletebeg!(stream.lookahead, n) + stream.lookahead_index = n + 1 stream.next_byte = last_byte(last(stream.ranges)) + 1 # Defuse the time bomb stream.peek_count = 0 @@ -480,7 +489,7 @@ Shift the current token from the input to the output, adding the given flags. function bump(stream::ParseStream, flags=EMPTY_FLAGS; skip_newlines=false, error=nothing, remap_kind::Kind=K"None") emark = position(stream) - _bump_n(stream, _lookahead_index(stream, 1, skip_newlines), flags, remap_kind) + _bump_until_n(stream, _lookahead_index(stream, 1, skip_newlines), flags, remap_kind) if !isnothing(error) emit(stream, emark, K"error", flags, error=error) end @@ -496,7 +505,7 @@ Bump comments and whitespace tokens preceding the next token function bump_trivia(stream::ParseStream, flags=EMPTY_FLAGS; skip_newlines=true, error=nothing) emark = position(stream) - _bump_n(stream, _lookahead_index(stream, 1, skip_newlines) - 1, EMPTY_FLAGS) + _bump_until_n(stream, _lookahead_index(stream, 1, skip_newlines) - 1, EMPTY_FLAGS) if !isnothing(error) emit(stream, emark, K"error", flags, error=error) end @@ -523,11 +532,12 @@ lexing ambiguities. There's no special whitespace handling — bump any whitespace if necessary with bump_trivia. """ function bump_glue(stream::ParseStream, kind, flags, num_tokens) + i = stream.lookahead_index span = TaggedRange(SyntaxHead(kind, flags), K"None", - first_byte(stream.lookahead[1]), - last_byte(stream.lookahead[num_tokens]), + first_byte(stream.lookahead[i]), + last_byte(stream.lookahead[i-1+num_tokens]), lastindex(stream.ranges) + 1) - Base._deletebeg!(stream.lookahead, num_tokens) + stream.lookahead_index += num_tokens push!(stream.ranges, span) stream.next_byte = last_byte(last(stream.ranges)) + 1 stream.peek_count = 0 @@ -553,7 +563,8 @@ TODO: Are these the only cases? Can we replace this general utility with a simpler one which only splits preceding dots? """ function bump_split(stream::ParseStream, split_spec...) - tok = popfirst!(stream.lookahead) + tok = stream.lookahead[stream.lookahead_index] + stream.lookahead_index += 1 fbyte = first_byte(tok) for (i, (nbyte, k, f)) in enumerate(split_spec) lbyte = (i == length(split_spec)) ? last_byte(tok) : fbyte + nbyte - 1 @@ -655,8 +666,9 @@ function emit_diagnostic(stream::ParseStream; whitespace=false, kws...) if whitespace # It's the whitespace which is the error. Find the range of the current # whitespace. - begin_tok_i = 1 - end_tok_i = is_whitespace(stream.lookahead[i]) ? i : max(1, i-1) + begin_tok_i = stream.lookahead_index + end_tok_i = is_whitespace(stream.lookahead[i]) ? + i : max(stream.lookahead_index, i-1) end fbyte = first_byte(stream.lookahead[begin_tok_i]) lbyte = last_byte(stream.lookahead[end_tok_i]) From 8382abacb2647f622179abdd6bd973f921594524 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Fri, 4 Mar 2022 16:58:09 +1000 Subject: [PATCH 0417/1109] Add inbounds annotations in peek() Some inbounds annotations in the `peek()` hot code paths seem to provide a few percent improvement (maybe 5%)? --- JuliaSyntax/src/parse_stream.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index 22bd52344e9f7..385c43ca40489 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -325,7 +325,7 @@ end # unrolled optimized version for that fast path. Empirically it seems we # only hit the slow path about 5% of the time here. i = stream.lookahead_index - if n == 1 && i+1 <= length(stream.lookahead) + @inbounds if n == 1 && i+1 <= length(stream.lookahead) if skip_newlines k = kind(stream.lookahead[i]) if !(k == K"Whitespace" || k == K"Comment" || k == K"NewlineWs") @@ -364,7 +364,7 @@ end end _buffer_lookahead_tokens(stream.lexer, stream.lookahead) end - k = kind(stream.lookahead[i]) + k = @inbounds kind(stream.lookahead[i]) if !((k == K"Whitespace" || k == K"Comment") || (k == K"NewlineWs" && skip_newlines)) if n == 1 @@ -409,7 +409,7 @@ function peek_token(stream::ParseStream, n::Integer=1; if !skip_whitespace i = stream.lookahead_index end - return stream.lookahead[i] + return @inbounds stream.lookahead[i] end function _peek_behind_fields(ranges, i) From 78247be3f664882ec013653ad5deae6e9785c97e Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Wed, 9 Mar 2022 09:46:08 +1000 Subject: [PATCH 0418/1109] Separate token output from internal nodes in ParseStream (JuliaLang/JuliaSyntax.jl#19) This data rearrangement gives a cleaner separation between tokens (which keep track of bytes in the source text) vs internal tree nodes (which keep track of which tokens they cover). As a result it reduces the size of the intermediate data structures. As part of rewriting build_tree to use the new data structures it's also become much faster and building the green tree no longer dominates the parsing time (probably due to fixing some type stability issues). --- JuliaSyntax/src/green_tree.jl | 13 +- JuliaSyntax/src/parse_stream.jl | 397 +++++++++++++++++++------------- JuliaSyntax/src/parser.jl | 13 +- JuliaSyntax/src/parser_api.jl | 5 +- JuliaSyntax/test/parser_api.jl | 2 +- JuliaSyntax/test/test_utils.jl | 2 +- 6 files changed, 264 insertions(+), 168 deletions(-) diff --git a/JuliaSyntax/src/green_tree.jl b/JuliaSyntax/src/green_tree.jl index 14fb0a70f3456..587097b859ae1 100644 --- a/JuliaSyntax/src/green_tree.jl +++ b/JuliaSyntax/src/green_tree.jl @@ -43,17 +43,22 @@ struct GreenNode{Head} args::Union{Tuple{},Vector{GreenNode{Head}}} end +function GreenNode{Head}(head::Head, span::Integer) where {Head} + GreenNode{Head}(head, span, ()) +end + function GreenNode(head::Head, span::Integer) where {Head} GreenNode{Head}(head, span, ()) end -function GreenNode(head::Head, span::Integer, args::Vector{GreenNode{Head}}) where {Head} - GreenNode{Head}(head, span, args) +function GreenNode(head::Head, args) where {Head} + children = collect(GreenNode{Head}, args) + span = isempty(children) ? 0 : sum(x.span for x in children) + GreenNode{Head}(head, span, children) end function GreenNode(head::Head, args::GreenNode{Head}...) where {Head} - span = sum(x.span for x in args) - GreenNode{Head}(head, span, GreenNode{Head}[args...]) + GreenNode{Head}(head, GreenNode{Head}[args...]) end diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index 385c43ca40489..23a8269275f37 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -17,14 +17,17 @@ const TRIPLE_STRING_FLAG = RawFlags(1<<3) const RAW_STRING_FLAG = RawFlags(1<<4) # try-finally-catch const TRY_CATCH_AFTER_FINALLY_FLAG = RawFlags(1<<5) +# Record whether operator has a suffix +const SUFFIXED_FLAG = RawFlags(1<<6) + +# Token-only flag +# Record whether a token had preceding whitespace +const PRECEDING_WHITESPACE_FLAG = RawFlags(1<<7) + # Flags holding the dimension of an nrow or other UInt8 not held in the source const NUMERIC_FLAGS = RawFlags(RawFlags(0xff)<<8) # Todo ERROR_FLAG = 0x8000 ? -## Flags for tokens (may overlap with the flags allocated for syntax above) -const SUFFIXED_FLAG = RawFlags(1<<6) -const PRECEDING_WHITESPACE_FLAG = RawFlags(1<<7) - function set_numeric_flags(n::Integer) f = RawFlags((n << 8) & NUMERIC_FLAGS) if numeric_flags(f) != n @@ -62,11 +65,9 @@ function untokenize(head::SyntaxHead; unique=true, include_flag_suff=true) if is_dotted(head) str = "."*str end - f = flags(head) # Ignore some flags: - # - DOTOP_FLAG is represented with . prefix - # - PRECEDING_WHITESPACE_FLAG relates to the environment of this token - f &= ~(DOTOP_FLAG | PRECEDING_WHITESPACE_FLAG) + # DOTOP_FLAG is represented above with . prefix + # PRECEDING_WHITESPACE_FLAG relates to the environment of this token suffix_flags = remove_flags(flags(head), DOTOP_FLAG, PRECEDING_WHITESPACE_FLAG) if include_flag_suff && suffix_flags != EMPTY_FLAGS str = str*"-" @@ -97,6 +98,7 @@ is_trivia(x) = has_flags(x, TRIVIA_FLAG) is_infix(x) = has_flags(x, INFIX_FLAG) is_dotted(x) = has_flags(x, DOTOP_FLAG) is_suffixed(x) = has_flags(x, SUFFIXED_FLAG) +is_decorated(x) = is_dotted(x) || is_suffixed(x) preceding_whitespace(x) = has_flags(x, PRECEDING_WHITESPACE_FLAG) numeric_flags(x) = numeric_flags(flags(x)) @@ -107,20 +109,20 @@ Information about preceding whitespace is added for use by the parser. """ struct SyntaxToken head::SyntaxHead + orig_kind::Kind first_byte::UInt32 - last_byte::UInt32 # TODO: Remove this? +end + +function SyntaxToken(head::SyntaxHead, first_byte::Integer) + SyntaxToken(head, kind(head), first_byte) end function Base.show(io::IO, tok::SyntaxToken) - print(io, untokenize(tok.head, unique=false), " @", first_byte(tok)) + print(io, rpad(untokenize(tok.head, unique=false), 15), " @", first_byte(tok)) end head(tok::SyntaxToken) = tok.head first_byte(tok::SyntaxToken) = tok.first_byte -last_byte(tok::SyntaxToken) = tok.last_byte -span(tok::SyntaxToken) = last_byte(tok) - first_byte(tok) + 1 - -is_decorated(tok::SyntaxToken) = is_dotted(tok) || is_suffixed(tok) #------------------------------------------------------------------------------- @@ -132,32 +134,22 @@ start_mark compares to previous nodes. """ struct TaggedRange head::SyntaxHead # Kind,flags - orig_kind::Kind # Kind of the original token for leaf tokens, or K"None" - first_byte::UInt32 # First byte in the input text - last_byte::UInt32 # Last byte in the input text - start_mark::UInt32 # Index of first emitted range which this range covers - # TODO: Remove the three fields above & replace with: - # is_leaf::Bool - # # The following field is used for one of two things: - # # - For leaf nodes it points to the last byte of the token in the input text - # # - For non-leaf nodes it points to the index of the first child - # last_byte_or_first_child::UInt32 -end - -head(range::TaggedRange) = range.head -kind(range::TaggedRange) = kind(range.head) -flags(range::TaggedRange) = flags(range.head) -first_byte(range::TaggedRange) = Int(range.first_byte) -last_byte(range::TaggedRange) = Int(range.last_byte) -span(range::TaggedRange) = 1 + last_byte(range) - first_byte(range) + # The following field is used for one of two things: + # - For leaf nodes it's an index in the tokens array + # - For non-leaf nodes it points to the index of the first child + first_token::UInt32 + last_token::UInt32 +end + +head(range::TaggedRange) = range.head #------------------------------------------------------------------------------- struct ParseStreamPosition - input_byte::Int # Index of next byte in input - output_index::Int # Index of last span in output + token_index::UInt32 # Index of last token in output + range_index::UInt32 end -const NO_POSITION = ParseStreamPosition(0,0) +const NO_POSITION = ParseStreamPosition(0, 0) #------------------------------------------------------------------------------- """ @@ -185,12 +177,12 @@ mutable struct ParseStream lookahead_index::Int # Pool of stream positions for use as working space in parsing position_pool::Vector{Vector{ParseStreamPosition}} + # Buffer of finalized tokens + tokens::Vector{SyntaxToken} # Parser output as an ordered sequence of ranges, parent nodes after children. ranges::Vector{TaggedRange} # Parsing diagnostics (errors/warnings etc) diagnostics::Vector{Diagnostic} - # First byte of next token - next_byte::Int # Counter for number of peek()s we've done without making progress via a bump() peek_count::Int # (major,minor) version of Julia we're parsing this code for. @@ -214,9 +206,9 @@ mutable struct ParseStream Vector{SyntaxToken}(), 1, Vector{Vector{ParseStreamPosition}}(), + Vector{SyntaxToken}(), Vector{TaggedRange}(), Vector{Diagnostic}(), - next_byte, 0, ver) end @@ -262,9 +254,8 @@ function ParseStream(io::IO; version=VERSION) ParseStream(textbuf, textbuf, 1, version) end - function Base.show(io::IO, mime::MIME"text/plain", stream::ParseStream) - println(io, "ParseStream at position $(stream.next_byte)") + println(io, "ParseStream at position $(_next_byte(stream))") end function show_diagnostics(io::IO, stream::ParseStream, code) @@ -284,6 +275,21 @@ function release_positions(stream, positions) push!(stream.position_pool, positions) end +#------------------------------------------------------------------------------- +# Return true when a token was emitted last at stream position `pos` +function token_is_last(stream, pos) + return pos.range_index == 0 || + pos.token_index > stream.ranges[pos.range_index].last_token +end + +# Safely compute the first byte of a token, including the token off the end of +# the stream. +function token_first_byte(stream, i) + i == length(stream.tokens) + 1 ? + _next_byte(stream) : + stream.tokens[i].first_byte +end + #------------------------------------------------------------------------------- # Stream input interface - the peek_* family of functions @@ -291,6 +297,7 @@ end function _buffer_lookahead_tokens(lexer, lookahead) had_whitespace = false token_count = 0 + done = false while true raw = Tokenize.Lexers.next_token(lexer) k = TzTokens.exactkind(raw) @@ -300,7 +307,7 @@ function _buffer_lookahead_tokens(lexer, lookahead) had_whitespace && (f |= PRECEDING_WHITESPACE_FLAG) raw.dotop && (f |= DOTOP_FLAG) raw.suffix && (f |= SUFFIXED_FLAG) - push!(lookahead, SyntaxToken(SyntaxHead(k, f), raw.startbyte + 1, raw.endbyte + 1)) + push!(lookahead, SyntaxToken(SyntaxHead(k, f), raw.startbyte + 1)) token_count += 1 if k == K"EndMarker" break @@ -311,21 +318,38 @@ function _buffer_lookahead_tokens(lexer, lookahead) # but not too large to avoid (a) polluting the processor cache and # (b) doing unnecessary work when not parsing the whole input. had_whitespace = false - if token_count > 100 + if done break end + if token_count > 100 + # Buffer at least one token after the last so we can get the + # current token's last byte based on the next token. (May need + # more than one to correctly apply had_whitespace state.) + done = true + end end end end +# Return the index of the next byte of the input +function _next_byte(stream) + if stream.lookahead_index > length(stream.lookahead) + __lookahead_index(stream, 1, false) # Will buffer more tokens + end + stream.lookahead[stream.lookahead_index].first_byte +end + # Find the index of the next nontrivia token +# +# Postcondition: After returning `i`, the lookahead buffer will buffers tokens +# at least up until stream.lookahead[i+1] @inline function _lookahead_index(stream::ParseStream, n::Integer, skip_newlines::Bool) # Much of the time we'll be peeking ahead a single token and have one or # zero whitespace tokens before the next token. The following code is an # unrolled optimized version for that fast path. Empirically it seems we # only hit the slow path about 5% of the time here. i = stream.lookahead_index - @inbounds if n == 1 && i+1 <= length(stream.lookahead) + @inbounds if n == 1 && i+2 <= length(stream.lookahead) if skip_newlines k = kind(stream.lookahead[i]) if !(k == K"Whitespace" || k == K"Comment" || k == K"NewlineWs") @@ -355,7 +379,7 @@ end @noinline function __lookahead_index(stream, n, skip_newlines) i = stream.lookahead_index while true - if i > length(stream.lookahead) + if i+1 > length(stream.lookahead) n_to_delete = stream.lookahead_index-1 if n_to_delete > 0.9*length(stream.lookahead) Base._deletebeg!(stream.lookahead, n_to_delete) @@ -363,6 +387,7 @@ end stream.lookahead_index = 1 end _buffer_lookahead_tokens(stream.lexer, stream.lookahead) + continue end k = @inbounds kind(stream.lookahead[i]) if !((k == K"Whitespace" || k == K"Comment") || @@ -378,7 +403,7 @@ end @noinline function _parser_stuck_error(stream) # Optimization: emit unlikely errors in a separate function - error("The parser seems stuck at byte $(stream.next_byte)") + error("The parser seems stuck at byte $(_next_byte(stream))") end """ @@ -412,12 +437,33 @@ function peek_token(stream::ParseStream, n::Integer=1; return @inbounds stream.lookahead[i] end -function _peek_behind_fields(ranges, i) - r = ranges[i] - return (kind=kind(r), - flags=flags(r), - orig_kind=r.orig_kind, - is_leaf=r.start_mark == i) + +struct FullToken + head::SyntaxHead + first_byte::UInt32 + last_byte::UInt32 +end + +head(t::FullToken) = t.head +first_byte(t::FullToken) = t.first_byte +last_byte(t::FullToken) = t.last_byte +span(t::FullToken) = 1 + last_byte(t) - first_byte(t) + +function peek_full_token(stream::ParseStream, n::Integer=1; + skip_newlines=false, skip_whitespace=true) + stream.peek_count += 1 + if stream.peek_count > 100_000 + _parser_stuck_error(stream) + end + i = _lookahead_index(stream, n, skip_newlines) + if !skip_whitespace + i = stream.lookahead_index + end + tok = stream.lookahead[i] + + FullToken(head(tok), + first_byte(tok), + first_byte(stream.lookahead[i+1]) - 1) end """ @@ -431,22 +477,46 @@ at the provided position `pos`. Retroactively inspecting or modifying the parser's output can be confusing, so using this function should be avoided where possible. """ -function peek_behind(stream::ParseStream; skip_trivia::Bool=true) - if skip_trivia - for i = length(stream.ranges):-1:1 - r = stream.ranges[i] - if !is_trivia(head(r)) && kind(r) != K"TOMBSTONE" - return _peek_behind_fields(stream.ranges, i) - end - end +function peek_behind(stream::ParseStream, pos::ParseStreamPosition) + if token_is_last(stream, pos) && !isempty(stream.tokens) + t = stream.tokens[pos.token_index] + return (kind=kind(t), + flags=flags(t), + orig_kind=t.orig_kind, + is_leaf=true) elseif !isempty(stream.ranges) - return _peek_behind_fields(stream.ranges, lastindex(stream.ranges)) + r = stream.ranges[pos.range_index] + return (kind=kind(r), + flags=flags(r), + orig_kind=K"None", + is_leaf=false) + else + internal_error("Can't peek behind at start of stream") end - internal_error("Can't peek behind at start of stream") end -function peek_behind(stream::ParseStream, pos::ParseStreamPosition) - return _peek_behind_fields(stream.ranges, pos.output_index) +function peek_behind(stream::ParseStream; skip_trivia::Bool=true) + pos = position(stream) + if !skip_trivia || !token_is_last(stream, pos) + return peek_behind(stream, pos) + else + token_index = lastindex(stream.tokens) + range_index = lastindex(stream.ranges) + last_token_in_nonterminal = isempty(stream.ranges) ? 0 : + stream.ranges[range_index].last_token + while token_index > last_token_in_nonterminal + t = stream.tokens[token_index] + if !is_trivia(t) && kind(t) != K"TOMBSTONE" + break + end + token_index -= 1 + end + if token_index > 0 + return peek_behind(stream, ParseStreamPosition(token_index, range_index)) + else + internal_error("Can't peek behind at start of stream") + end + end end #------------------------------------------------------------------------------- @@ -466,16 +536,14 @@ function _bump_until_n(stream::ParseStream, n::Integer, flags, remap_kind=K"None if k == K"EndMarker" break end + f = flags | remove_flags((@__MODULE__).flags(tok), PRECEDING_WHITESPACE_FLAG) is_trivia = k ∈ (K"Whitespace", K"Comment", K"NewlineWs") - f = is_trivia ? TRIVIA_FLAG : flags - is_dotted(tok) && (f |= DOTOP_FLAG) + is_trivia && (f |= TRIVIA_FLAG) outk = (is_trivia || remap_kind == K"None") ? k : remap_kind - range = TaggedRange(SyntaxHead(outk, f), k, first_byte(tok), - last_byte(tok), lastindex(stream.ranges)+1) - push!(stream.ranges, range) + h = SyntaxHead(outk, f) + push!(stream.tokens, SyntaxToken(h, kind(tok), first_byte(tok))) end stream.lookahead_index = n + 1 - stream.next_byte = last_byte(last(stream.ranges)) + 1 # Defuse the time bomb stream.peek_count = 0 end @@ -520,7 +588,13 @@ example, `2x` means `2*x` via the juxtoposition rules. """ function bump_invisible(stream::ParseStream, kind, flags=EMPTY_FLAGS; error=nothing) - emit(stream, position(stream), kind, flags, error=error) + b = _next_byte(stream) + h = SyntaxHead(kind, flags) + push!(stream.tokens, SyntaxToken(h, b)) + if !isnothing(error) + _emit_diagnostic(stream, b, b-1, error=error) + end + stream.peek_count = 0 return position(stream) end @@ -533,13 +607,9 @@ whitespace if necessary with bump_trivia. """ function bump_glue(stream::ParseStream, kind, flags, num_tokens) i = stream.lookahead_index - span = TaggedRange(SyntaxHead(kind, flags), K"None", - first_byte(stream.lookahead[i]), - last_byte(stream.lookahead[i-1+num_tokens]), - lastindex(stream.ranges) + 1) + h = SyntaxHead(kind, flags) + push!(stream.tokens, SyntaxToken(h, stream.lookahead[i].first_byte)) stream.lookahead_index += num_tokens - push!(stream.ranges, span) - stream.next_byte = last_byte(last(stream.ranges)) + 1 stream.peek_count = 0 return position(stream) end @@ -565,21 +635,23 @@ simpler one which only splits preceding dots? function bump_split(stream::ParseStream, split_spec...) tok = stream.lookahead[stream.lookahead_index] stream.lookahead_index += 1 - fbyte = first_byte(tok) + fbyte = tok.first_byte for (i, (nbyte, k, f)) in enumerate(split_spec) - lbyte = (i == length(split_spec)) ? last_byte(tok) : fbyte + nbyte - 1 - push!(stream.ranges, TaggedRange(SyntaxHead(k, f), kind(tok), - fbyte, lbyte, - lastindex(stream.ranges) + 1)) + h = SyntaxHead(k, f) + push!(stream.tokens, SyntaxToken(h, kind(tok), fbyte)) fbyte += nbyte end - stream.next_byte = last_byte(last(stream.ranges)) + 1 stream.peek_count = 0 # Returning position(stream) like the other bump* methods would be # ambiguous here; return nothing instead. nothing end +function _reset_node_head(x, k, f) + h = SyntaxHead(isnothing(k) ? kind(x) : k, + isnothing(f) ? flags(x) : f) +end + """ Reset kind or flags of an existing node in the output stream @@ -590,12 +662,15 @@ in those cases. """ function reset_node!(stream::ParseStream, pos::ParseStreamPosition; kind=nothing, flags=nothing) - range = stream.ranges[pos.output_index] - k = isnothing(kind) ? (@__MODULE__).kind(range) : kind - f = isnothing(flags) ? (@__MODULE__).flags(range) : flags - stream.ranges[pos.output_index] = - TaggedRange(SyntaxHead(k, f), range.orig_kind, - first_byte(range), last_byte(range), range.start_mark) + if token_is_last(stream, pos) + t = stream.tokens[pos.token_index] + stream.tokens[pos.token_index] = SyntaxToken(_reset_node_head(t, kind, flags), + t.orig_kind, t.first_byte) + else + r = stream.ranges[pos.range_index] + stream.ranges[pos.range_index] = TaggedRange(_reset_node_head(r, kind, flags), + r.first_token, r.last_token) + end end """ @@ -606,26 +681,22 @@ K"TOMBSTONE" and return `true`, otherwise return `false`. Hack alert! This is used only for managing the complicated rules related to dedenting triple quoted strings. """ -function steal_node_bytes!(stream::ParseStream, pos::ParseStreamPosition, numbytes) - i = pos.output_index - r1 = stream.ranges[i] - r2 = stream.ranges[i+1] - @assert span(r1) == 0 - @assert numbytes <= span(r2) - fb2 = r2.first_byte + numbytes - rhs_empty = fb2 > last_byte(r2) - head2 = rhs_empty ? SyntaxHead(K"TOMBSTONE", EMPTY_FLAGS) : r2.head - stream.ranges[i] = TaggedRange(r1.head, r1.orig_kind, - r2.first_byte, fb2 - 1, - r1.start_mark) - stream.ranges[i+1] = TaggedRange(head2, r2.orig_kind, - fb2, r2.last_byte, - r2.start_mark) - return rhs_empty +function steal_token_bytes!(stream::ParseStream, pos::ParseStreamPosition, numbytes) + # Token index to modify + i = pos.token_index + 1 + t = stream.tokens[i] + # Compute new token + next_byte = token_first_byte(stream, i + 1) + first_byte = t.first_byte + numbytes + is_empty = first_byte >= next_byte + head2 = is_empty ? SyntaxHead(K"TOMBSTONE", EMPTY_FLAGS) : t.head + stream.tokens[i] = SyntaxToken(head2, t.orig_kind, first_byte) + + return is_empty end function Base.position(stream::ParseStream) - ParseStreamPosition(stream.next_byte, lastindex(stream.ranges)) + ParseStreamPosition(lastindex(stream.tokens), lastindex(stream.ranges)) end """ @@ -637,10 +708,14 @@ should be a previous return value of `position()`. """ function emit(stream::ParseStream, mark::ParseStreamPosition, kind::Kind, flags::RawFlags = EMPTY_FLAGS; error=nothing) - range = TaggedRange(SyntaxHead(kind, flags), K"None", mark.input_byte, - stream.next_byte-1, mark.output_index+1) + first_token = mark.token_index + 1 + range = TaggedRange(SyntaxHead(kind, flags), first_token, length(stream.tokens)) if !isnothing(error) - _emit_diagnostic(stream, first_byte(range), last_byte(range), error=error) + # The first child must be a leaf, otherwise ranges would be improperly + # nested. + fbyte = token_first_byte(stream, first_token) + lbyte = _next_byte(stream) - 1 + _emit_diagnostic(stream, fbyte, lbyte, error=error) end push!(stream.ranges, range) return position(stream) @@ -657,7 +732,7 @@ Emit a diagnostic at the position of the next token If `whitespace` is true, the diagnostic is positioned on the whitespace before the next token. Otherwise it's positioned at the next token as returned by `peek()`. -FIXME: Rename? This doesn't emit normal tokens into the output event list! +TODO: Rename? This doesn't emit normal tokens into the output event list! """ function emit_diagnostic(stream::ParseStream; whitespace=false, kws...) i = _lookahead_index(stream, 1, true) @@ -668,16 +743,17 @@ function emit_diagnostic(stream::ParseStream; whitespace=false, kws...) # whitespace. begin_tok_i = stream.lookahead_index end_tok_i = is_whitespace(stream.lookahead[i]) ? - i : max(stream.lookahead_index, i-1) + i : max(stream.lookahead_index, i - 1) end fbyte = first_byte(stream.lookahead[begin_tok_i]) - lbyte = last_byte(stream.lookahead[end_tok_i]) + lbyte = first_byte(stream.lookahead[end_tok_i + 1]) - 1 _emit_diagnostic(stream, fbyte, lbyte; kws...) return nothing end function emit_diagnostic(stream::ParseStream, mark::ParseStreamPosition; kws...) - _emit_diagnostic(stream, mark.input_byte, stream.next_byte-1; kws...) + _emit_diagnostic(stream, token_first_byte(stream, mark.token_index), + _next_byte(stream) - 1; kws...) end function emit_diagnostic(stream::ParseStream, r::NTuple{2,ParseStreamPosition}; kws...) @@ -686,7 +762,9 @@ end function emit_diagnostic(stream::ParseStream, mark::ParseStreamPosition, end_mark::ParseStreamPosition; kws...) - _emit_diagnostic(stream, mark.input_byte, end_mark.input_byte-1; kws...) + fbyte = token_first_byte(stream, mark.token_index) + lbyte = token_first_byte(stream, end_mark.token_index) - 1 + _emit_diagnostic(stream, fbyte, lbyte; kws...) end #------------------------------------------------------------------------------- @@ -714,60 +792,64 @@ traverse the list of ranges backward rather than forward.) """ function build_tree(::Type{NodeType}, stream::ParseStream; wrap_toplevel_as_kind=nothing, kws...) where NodeType - stack = Vector{NamedTuple{(:range,:node),Tuple{TaggedRange,NodeType}}}() - for (span_index, range) in enumerate(stream.ranges) - if kind(range) == K"TOMBSTONE" - # Ignore invisible tokens which were created but never finalized. - # See bump_invisible() - continue - end + stack = Vector{NamedTuple{(:first_token,:node),Tuple{Int,NodeType}}}() - if isempty(stack) || range.start_mark > stack[end].range.start_mark - # A leaf node (span covering a single token): - # [a][b][stack[end]] - # [range] - node = NodeType(head(range), span(range)) - push!(stack, (range=range, node=node)) - continue + tokens = stream.tokens + ranges = stream.ranges + i = firstindex(tokens) + j = firstindex(ranges) + while true + last_token = j <= lastindex(ranges) ? + ranges[j].last_token : lastindex(tokens) + # Process tokens to nodes for all tokens used by the next internal node + while i <= last_token + t = tokens[i] + if kind(t) == K"TOMBSTONE" + i += 1 + continue # Ignore removed tokens + end + next_byte = token_first_byte(stream, i + 1) + node = NodeType(head(t), next_byte - t.first_byte) + push!(stack, (first_token=i, node=node)) + i += 1 + end + if j > lastindex(ranges) + break end - # An interior node, span covering multiple tokens: - # - # [a][b][stack[end]] - # [ range] - # - # We use start_mark rather than first_byte to determine node overlap. - # This solve the following ambiguity between invisible nodes 1 and 2: - # - # [a][b]|[...] - # |--- invisible node 1 - # `--- invisible node 2 - # - # Does node 2 contain node 1? Using start_mark, we can distinguish the - # cases: - # - # [a][b][2][1] [a][b][2]... - # [ 1] - j = length(stack) - while j > 1 && range.start_mark < stack[j].range.start_mark - j -= 1 + # Process internal nodes which end at the current position + while j <= lastindex(ranges) + r = ranges[j] + if r.last_token != last_token + break + end + if kind(r) == K"TOMBSTONE" + j += 1 + continue + end + # Collect children from the stack for this internal node + k = length(stack) + 1 + while k > 1 && r.first_token <= stack[k-1].first_token + k -= 1 + end + children = (stack[n].node for n = k:length(stack)) + node = NodeType(head(r), children) + resize!(stack, k-1) + push!(stack, (first_token=r.first_token, node=node)) + j += 1 end - children = [stack[k].node for k = j:length(stack)] - resize!(stack, j-1) - node = NodeType(head(range), span(range), children) - push!(stack, (range=range, node=node)) end - # show(stdout, MIME"text/plain"(), stack[1].node) if length(stack) == 1 return only(stack).node elseif !isnothing(wrap_toplevel_as_kind) # Mostly for debugging - children = [x.node for x in stack] - return NodeType(SyntaxHead(wrap_toplevel_as_kind, EMPTY_FLAGS), children...) + children = (x.node for x in stack) + return NodeType(SyntaxHead(wrap_toplevel_as_kind, EMPTY_FLAGS), children) else error("Found multiple nodes at top level") end end + """ sourcetext(stream::ParseStream; steal_textbuf=true) @@ -799,6 +881,9 @@ Return the `Vector{UInt8}` text buffer being parsed by this `ParseStream`. """ textbuf(stream) = stream.textbuf -first_byte(stream::ParseStream) = first_byte(first(stream.ranges)) -last_byte(stream::ParseStream) = last_byte(last(stream.ranges)) +function first_byte(stream::ParseStream) + isempty(stream.tokens) ? _next_byte(stream) : first_byte(first(stream.tokens)) +end + +last_byte(stream::ParseStream) = _next_byte(stream)-1 any_error(stream::ParseStream) = any_error(stream.diagnostics) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 761a125c1fbdb..54030f74a4fcc 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -71,6 +71,11 @@ function peek_token(ps::ParseState, n=1; skip_newlines=nothing) peek_token(ps.stream, n, skip_newlines=skip_nl) end +function peek_full_token(ps::ParseState, n=1; skip_newlines=nothing, kws...) + skip_nl = isnothing(skip_newlines) ? ps.whitespace_newline : skip_newlines + peek_full_token(ps.stream, n; skip_newlines=skip_nl, kws...) +end + function peek_behind(ps::ParseState, args...; kws...) peek_behind(ps.stream, args...; kws...) end @@ -100,8 +105,8 @@ function reset_node!(ps::ParseState, args...; kws...) reset_node!(ps.stream, args...; kws...) end -function steal_node_bytes!(ps::ParseState, args...) - steal_node_bytes!(ps.stream, args...) +function steal_token_bytes!(ps::ParseState, args...) + steal_token_bytes!(ps.stream, args...) end function Base.position(ps::ParseState, args...) @@ -3048,7 +3053,7 @@ function parse_string(ps::ParseState, raw::Bool) had_interpolation = false prev_chunk_newline = false while true - t = peek_token(ps) + t = peek_full_token(ps) k = kind(t) if k == K"$" @assert !raw # The lexer detects raw strings separately @@ -3182,7 +3187,7 @@ function parse_string(ps::ParseState, raw::Bool) if triplestr && indent_ref_len > 0 for pos in indent_chunks reset_node!(ps, pos, kind=K"Whitespace", flags=TRIVIA_FLAG) - rhs_empty = steal_node_bytes!(ps, pos, indent_ref_len) + rhs_empty = steal_token_bytes!(ps, pos, indent_ref_len) if rhs_empty # Empty chunks after dedent are removed # """\n \n """ ==> (string-s "\n") diff --git a/JuliaSyntax/src/parser_api.jl b/JuliaSyntax/src/parser_api.jl index 4ae84b3129bb9..b2add6151277f 100644 --- a/JuliaSyntax/src/parser_api.jl +++ b/JuliaSyntax/src/parser_api.jl @@ -111,7 +111,7 @@ function parse(::Type{T}, io::IO; stream = ParseStream(io; version=version) parse(stream; rule=rule) tree = build_tree(T, stream; kws...) - seek(io, stream.next_byte-1) + seek(io, last_byte(stream)) tree, stream.diagnostics end @@ -122,7 +122,7 @@ function parse(::Type{T}, input...; stream = ParseStream(input...; version=version) parse(stream; rule=rule) tree = build_tree(T, stream; kws...) - tree, stream.diagnostics, stream.next_byte + tree, stream.diagnostics, last_byte(stream) + 1 end @@ -148,6 +148,7 @@ function parseall(::Type{T}, input...; rule=:toplevel, version=VERSION, stream = ParseStream(input...; version=version) if ignore_trivia && rule != :toplevel bump_trivia(stream, skip_newlines=true) + empty!(stream.tokens) empty!(stream.ranges) end parse(stream; rule=rule) diff --git a/JuliaSyntax/test/parser_api.jl b/JuliaSyntax/test/parser_api.jl index e12564887ef4f..ed15570faa148 100644 --- a/JuliaSyntax/test/parser_api.jl +++ b/JuliaSyntax/test/parser_api.jl @@ -15,7 +15,7 @@ stream = ParseStream(pointer(code), 3) parse(stream, rule=:statement) @test JuliaSyntax.build_tree(Expr, stream) == :(x+y) - @test stream.next_byte == 4 + @test JuliaSyntax.last_byte(stream) == 3 end # SubString diff --git a/JuliaSyntax/test/test_utils.jl b/JuliaSyntax/test/test_utils.jl index 49cdda6626606..914b41afa228d 100644 --- a/JuliaSyntax/test/test_utils.jl +++ b/JuliaSyntax/test/test_utils.jl @@ -59,7 +59,7 @@ function parsers_agree_on_file(filename) JuliaSyntax.remove_linenums!(ex) == JuliaSyntax.remove_linenums!(fl_ex) catch exc - @error "Parsing failed" path exception=current_exceptions() + @error "Parsing failed" filename exception=current_exceptions() return false end end From 8c2a7b8cf41dad871e7d29ae19c588d7bcf002ae Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Wed, 9 Mar 2022 05:33:01 +0100 Subject: [PATCH 0419/1109] get rid of the `TokenError` field in `Token` (JuliaLang/JuliaSyntax.jl#17) --- JuliaSyntax/README.md | 7 ++-- JuliaSyntax/Tokenize/benchmark/lex_base.jl | 2 +- JuliaSyntax/Tokenize/src/lexer.jl | 19 ++++++---- JuliaSyntax/Tokenize/src/token.jl | 23 +++-------- JuliaSyntax/Tokenize/src/token_kinds.jl | 10 ++++- JuliaSyntax/Tokenize/test/lexer.jl | 44 ++++++++++++---------- JuliaSyntax/src/parse_stream.jl | 2 +- JuliaSyntax/src/tokens.jl | 1 + 8 files changed, 57 insertions(+), 51 deletions(-) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index 128f93f8330d2..a274343625fa9 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -405,7 +405,7 @@ name of compatibility, perhaps with a warning.) broken-looking AST like `(macrocall (. A (quote (. B @x))))`. It should probably be rejected. * Operator prefix call syntax doesn't work in the cases like `+(a;b,c)` where - keyword parameters are separated by commas. A tuple is produced instead. + keyword parameters are separated by commas. A tuple is produced instead. * `const` and `global` allow chained assignment, but the right hand side is not constant. `a` const here but not `b`. ``` @@ -698,7 +698,7 @@ interface. Could we have `Expr2` wrap `SyntaxNode`? tree library (rowan) for representing of a non-rust toy language is here https://dev.to/cad97/lossless-syntax-trees-280c -Not all the design decisions in `rust-analyzer` are finalized but the +Not all the design decisions in `rust-analyzer` are finalized but the [architecture document](https://github.com/rust-analyzer/rust-analyzer/blob/master/docs/dev/architecture.md) is a fantastic source of design inspiration. @@ -772,7 +772,7 @@ The tree datastructure design here is tricky: parentheses in `2*(x + y)` and the explicit vs implicit multiplication symbol in `2*x` vs `2x`. -2. There's various type of *analyses* +2. There's various type of *analyses* - There's many useful ways to augment a syntax tree depending on use case. - Analysis algorithms should be able to act on any tree type, ignoring but carrying augmentations which they don't know about. @@ -983,4 +983,3 @@ indentation from the syntax tree? Source formatting involves a big pile of heuristics to get something which "looks nice"... and ML systems have become very good at heuristics. Also, we've got huge piles of training data — just choose some high quality, tastefully hand-formatted libraries. - diff --git a/JuliaSyntax/Tokenize/benchmark/lex_base.jl b/JuliaSyntax/Tokenize/benchmark/lex_base.jl index f440ecf0f668c..38d65f786fc7a 100644 --- a/JuliaSyntax/Tokenize/benchmark/lex_base.jl +++ b/JuliaSyntax/Tokenize/benchmark/lex_base.jl @@ -18,7 +18,7 @@ function speed_test() while !Tokenize.Lexers.eof(l) t = Tokenize.Lexers.next_token(l) tot_tokens += 1 - if t.kind == Tokens.ERROR + if Tokens.iserror(t.kind) tot_errors += 1 end end diff --git a/JuliaSyntax/Tokenize/src/lexer.jl b/JuliaSyntax/Tokenize/src/lexer.jl index 0cfd9d749f49c..9e14b9163cbe6 100644 --- a/JuliaSyntax/Tokenize/src/lexer.jl +++ b/JuliaSyntax/Tokenize/src/lexer.jl @@ -3,7 +3,7 @@ module Lexers include("utilities.jl") import ..Tokens -import ..Tokens: Token, Kind, TokenError, UNICODE_OPS, EMPTY_TOKEN, isliteral +import ..Tokens: Token, Kind, UNICODE_OPS, EMPTY_TOKEN, isliteral import ..Tokens: FUNCTION, ABSTRACT, IDENTIFIER, BAREMODULE, BEGIN, BREAK, CATCH, CONST, CONTINUE, DO, ELSE, ELSEIF, END, EXPORT, FALSE, FINALLY, FOR, FUNCTION, GLOBAL, LET, LOCAL, IF, @@ -52,6 +52,7 @@ mutable struct Lexer{IO_t <: IO} charspos::Tuple{Int,Int,Int,Int} doread::Bool dotop::Bool + errored::Bool end function Lexer(io::IO) @@ -80,7 +81,7 @@ function Lexer(io::IO) end Lexer(io, position(io), 1, 1, position(io), 1, 1, position(io), Tokens.ERROR, Vector{StringState}(), IOBuffer(), - (c1,c2,c3,c4), (p1,p2,p3,p4), false, false) + (c1,c2,c3,c4), (p1,p2,p3,p4), false, false, false) end Lexer(str::AbstractString) = Lexer(IOBuffer(str)) @@ -243,11 +244,11 @@ Consumes all following characters until `accept(l, f)` is `false`. end """ - emit(l::Lexer, kind::Kind, err::TokenError=Tokens.NO_ERR) + emit(l::Lexer, kind::Kind) Returns a `Token` of kind `kind` with contents `str` and starts a new `Token`. """ -function emit(l::Lexer, kind::Kind, err::TokenError = Tokens.NO_ERR) +function emit(l::Lexer, kind::Kind) suffix = false if optakessuffix(kind) while isopsuffix(peekchar(l)) @@ -256,7 +257,7 @@ function emit(l::Lexer, kind::Kind, err::TokenError = Tokens.NO_ERR) end end - tok = Token(kind, startpos(l), position(l) - 1, err, l.dotop, suffix) + tok = Token(kind, startpos(l), position(l) - 1, l.dotop, suffix) l.dotop = false l.last_token = kind @@ -264,12 +265,14 @@ function emit(l::Lexer, kind::Kind, err::TokenError = Tokens.NO_ERR) end """ - emit_error(l::Lexer, err::TokenError=Tokens.UNKNOWN) + emit_error(l::Lexer, err::Kind=Tokens.ERROR) Returns an `ERROR` token with error `err` and starts a new `Token`. """ -function emit_error(l::Lexer, err::TokenError = Tokens.UNKNOWN) - return emit(l, Tokens.ERROR, err) +function emit_error(l::Lexer, err::Kind = Tokens.ERROR) + l.errored = true + @assert Tokens.iserror(err) + return emit(l, err) end diff --git a/JuliaSyntax/Tokenize/src/token.jl b/JuliaSyntax/Tokenize/src/token.jl index adef857722fe3..debb4fe8a3c3e 100644 --- a/JuliaSyntax/Tokenize/src/token.jl +++ b/JuliaSyntax/Tokenize/src/token.jl @@ -10,7 +10,7 @@ include("token_kinds.jl") iskeyword(k::Kind) = begin_keywords < k < end_keywords isliteral(k::Kind) = begin_literal < k < end_literal isoperator(k::Kind) = begin_ops < k < end_ops - +iserror(k::Kind) = begin_errors < k < end_errors iscontextualkeyword(k::Kind) = begin_contextual_keywords < k < end_contextual_keywords function iswordoperator(k::Kind) @@ -32,25 +32,14 @@ function _add_kws() end _add_kws() -# TODO: more -@enum(TokenError, - NO_ERR, - EOF_MULTICOMMENT, - EOF_CHAR, - INVALID_NUMERIC_CONSTANT, - INVALID_OPERATOR, - INVALID_INTERPOLATION_TERMINATOR, - UNKNOWN, -) - # Error kind => description -TOKEN_ERROR_DESCRIPTION = Dict{TokenError, String}( +TOKEN_ERROR_DESCRIPTION = Dict{Kind, String}( EOF_MULTICOMMENT => "unterminated multi-line comment #= ... =#", EOF_CHAR => "unterminated character literal", INVALID_NUMERIC_CONSTANT => "invalid numeric constant", INVALID_OPERATOR => "invalid operator", INVALID_INTERPOLATION_TERMINATOR => "interpolated variable ends with invalid character; use `\$(...)` instead", - UNKNOWN => "unknown", + ERROR => "unknown error", ) struct Token @@ -58,20 +47,20 @@ struct Token # Offsets into a string or buffer startbyte::Int # The byte where the token start in the buffer endbyte::Int # The byte where the token ended in the buffer - token_error::TokenError dotop::Bool suffix::Bool end function Token(kind::Kind, startbyte::Int, endbyte::Int) - Token(kind, startbyte, endbyte, NO_ERR, false, false) + Token(kind, startbyte, endbyte, false, false) end -Token() = Token(ERROR, 0, 0, UNKNOWN, false, false) +Token() = Token(ERROR, 0, 0, false, false) const EMPTY_TOKEN = Token() function kind(t::Token) isoperator(t.kind) && return OP iskeyword(t.kind) && return KEYWORD + iserror(t.kind) && return ERROR return t.kind end exactkind(t::Token) = t.kind diff --git a/JuliaSyntax/Tokenize/src/token_kinds.jl b/JuliaSyntax/Tokenize/src/token_kinds.jl index 4dc60e060d700..a81b301bc1e62 100644 --- a/JuliaSyntax/Tokenize/src/token_kinds.jl +++ b/JuliaSyntax/Tokenize/src/token_kinds.jl @@ -1,7 +1,6 @@ @enum(Kind::UInt16, NONE, # Placeholder; never emitted by lexer ENDMARKER, # EOF - ERROR, COMMENT, # aadsdsa, #= fdsf #= WHITESPACE, # '\n \t' IDENTIFIER, # foo, Σxx @@ -9,6 +8,15 @@ COMMA, #, SEMICOLON, # ; + begin_errors, + EOF_MULTICOMMENT, + EOF_CHAR, + INVALID_NUMERIC_CONSTANT, + INVALID_OPERATOR, + INVALID_INTERPOLATION_TERMINATOR, + ERROR, + end_errors, + begin_keywords, KEYWORD, # general BAREMODULE, diff --git a/JuliaSyntax/Tokenize/test/lexer.jl b/JuliaSyntax/Tokenize/test/lexer.jl index 72789d164627b..3d7e117002c0b 100644 --- a/JuliaSyntax/Tokenize/test/lexer.jl +++ b/JuliaSyntax/Tokenize/test/lexer.jl @@ -183,7 +183,7 @@ end test_roundtrip("1234.0 .+1", Tokens.FLOAT, "1234.0") test_roundtrip("1234.f(a)", Tokens.FLOAT, "1234.") test_roundtrip("1234 .f(a)", Tokens.INTEGER, "1234") - test_roundtrip("1234.0.f(a)", Tokens.ERROR, "1234.0.") + test_roundtrip("1234.0.f(a)", Tokens.INVALID_NUMERIC_CONSTANT, "1234.0.") test_roundtrip("1234.0 .f(a)", Tokens.FLOAT, "1234.0") end @@ -280,9 +280,9 @@ end end @testset "errors" begin - @test tok("#= #= =#", 1).kind == T.ERROR - @test tok("'dsadsa", 1).kind == T.ERROR - @test tok("aa **", 3).kind == T.ERROR + @test tok("#= #= =#", 1).kind == T.EOF_MULTICOMMENT + @test tok("'dsadsa", 1).kind == T.EOF_CHAR + @test tok("aa **", 3).kind == T.INVALID_OPERATOR end @testset "xor_eq" begin @@ -501,9 +501,10 @@ end str = """ "\$x෴" """ ts = collect(tokenize(str)) @test ts[4] ~ (T.IDENTIFIER , "x" , str) - @test ts[5] ~ (T.ERROR , "" , str) + @test ts[5] ~ (T.INVALID_INTERPOLATION_TERMINATOR , "" , str) @test ts[6] ~ (T.STRING , "෴" , str) - @test ts[5].token_error == Tokens.INVALID_INTERPOLATION_TERMINATOR + @test Tokens.iserror(ts[5].kind) + @test ts[5].kind == Tokens.INVALID_INTERPOLATION_TERMINATOR end end @@ -650,10 +651,10 @@ end end @testset "hex/bin/octal errors" begin - @test tok("0x").kind == T.ERROR - @test tok("0b").kind == T.ERROR - @test tok("0o").kind == T.ERROR - @test tok("0x 2", 1).kind == T.ERROR + @test tok("0x").kind == T.INVALID_NUMERIC_CONSTANT + @test tok("0b").kind == T.INVALID_NUMERIC_CONSTANT + @test tok("0o").kind == T.INVALID_NUMERIC_CONSTANT + @test tok("0x 2", 1).kind == T.INVALID_NUMERIC_CONSTANT @test tok("0x.1p1").kind == T.FLOAT end @@ -716,15 +717,20 @@ end @test tok("outer", 1).kind==T.OUTER end +function test_error(tok, kind) + @test Tokens.iserror(tok.kind) + @test tok.kind == kind +end + @testset "token errors" begin - @test tok("1.2e2.3",1).token_error === Tokens.INVALID_NUMERIC_CONSTANT - @test tok("1.2.",1).token_error === Tokens.INVALID_NUMERIC_CONSTANT - @test tok("1.2.f",1).token_error === Tokens.INVALID_NUMERIC_CONSTANT - @test tok("0xv",1).token_error === Tokens.INVALID_NUMERIC_CONSTANT - @test tok("0b3",1).token_error === Tokens.INVALID_NUMERIC_CONSTANT - @test tok("0op",1).token_error === Tokens.INVALID_NUMERIC_CONSTANT - @test tok("--",1).token_error === Tokens.INVALID_OPERATOR - @test tok("1**2",2).token_error === Tokens.INVALID_OPERATOR + test_error(tok("1.2e2.3",1), Tokens.INVALID_NUMERIC_CONSTANT) + test_error(tok("1.2.",1), Tokens.INVALID_NUMERIC_CONSTANT) + test_error(tok("1.2.f",1), Tokens.INVALID_NUMERIC_CONSTANT) + test_error(tok("0xv",1), Tokens.INVALID_NUMERIC_CONSTANT) + test_error(tok("0b3",1), Tokens.INVALID_NUMERIC_CONSTANT) + test_error(tok("0op",1), Tokens.INVALID_NUMERIC_CONSTANT) + test_error(tok("--",1), Tokens.INVALID_OPERATOR) + test_error(tok("1**2",2), Tokens.INVALID_OPERATOR) end @testset "hat suffix" begin @@ -765,7 +771,7 @@ end @testset "invalid float" begin s = ".0." - @test collect(tokenize(s))[1].kind == Tokens.ERROR + @test collect(tokenize(s))[1].kind == Tokens.INVALID_NUMERIC_CONSTANT end @testset "allow prime after end" begin diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index 23a8269275f37..0bdd1b5ded526 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -61,7 +61,7 @@ function Base.summary(head::SyntaxHead) end function untokenize(head::SyntaxHead; unique=true, include_flag_suff=true) - str = untokenize(kind(head); unique=unique) + str = is_error(kind(head)) ? "error" : untokenize(kind(head); unique=unique) if is_dotted(head) str = "."*str end diff --git a/JuliaSyntax/src/tokens.jl b/JuliaSyntax/src/tokens.jl index 4df8c9e398fa3..96e07e5064d6e 100644 --- a/JuliaSyntax/src/tokens.jl +++ b/JuliaSyntax/src/tokens.jl @@ -43,6 +43,7 @@ kind(raw::TzTokens.Token) = TzTokens.exactkind(raw) # Some renaming for naming consistency is_literal(k) = TzTokens.isliteral(kind(k)) is_keyword(k) = TzTokens.iskeyword(kind(k)) +is_error(k) = TzTokens.iserror(kind(k)) is_contextual_keyword(k) = TzTokens.iscontextualkeyword(kind(k)) is_operator(k) = TzTokens.isoperator(kind(k)) is_word_operator(k) = TzTokens.iswordoperator(kind(k)) From 2d38322d08654abd70ce79c5ca0c502bbe3788f8 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Wed, 9 Mar 2022 18:59:09 +1000 Subject: [PATCH 0420/1109] Make is_error and K"" macro consistent with new lexer error kinds (JuliaLang/JuliaSyntax.jl#20) --- JuliaSyntax/src/parse_stream.jl | 2 +- JuliaSyntax/src/token_kinds.jl | 12 +++++++++++- JuliaSyntax/src/tokens.jl | 2 +- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index 0bdd1b5ded526..1013c877f534d 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -92,7 +92,7 @@ kind(x) = kind(head(x)) flags(x) = flags(head(x)) # Predicates based on kind() / flags() -is_error(x) = kind(x) == K"error" +is_error(x) = is_error(kind(x)) has_flags(x, test_flags) = has_flags(flags(x), test_flags) is_trivia(x) = has_flags(x, TRIVIA_FLAG) is_infix(x) = has_flags(x, INFIX_FLAG) diff --git a/JuliaSyntax/src/token_kinds.jl b/JuliaSyntax/src/token_kinds.jl index b1944f66dcfee..61680d1ea6a45 100644 --- a/JuliaSyntax/src/token_kinds.jl +++ b/JuliaSyntax/src/token_kinds.jl @@ -7,7 +7,6 @@ const _str_to_kind = let Ts = TzTokens Dict([ "None" => Ts.NONE "EndMarker" => Ts.ENDMARKER -"error" => Ts.ERROR "Comment" => Ts.COMMENT "Whitespace" => Ts.WHITESPACE "Identifier" => Ts.IDENTIFIER @@ -15,6 +14,17 @@ Dict([ "," => Ts.COMMA ";" => Ts.SEMICOLON +"BEGIN_ERRORS" => Ts.begin_errors +# Tokenization errors +"ErrorEofMultiComment" => Ts.EOF_MULTICOMMENT +"ErrorEofChar" => Ts.EOF_CHAR +"ErrorInvalidNumericConstant" => Ts.INVALID_NUMERIC_CONSTANT +"ErrorInvalidOperator" => Ts.INVALID_OPERATOR +"ErrorInvalidInterpolationTerminator" => Ts.INVALID_INTERPOLATION_TERMINATOR +# Generic error +"error" => Ts.ERROR +"END_ERRORS" => Ts.end_errors + "BEGIN_KEYWORDS" => Ts.begin_keywords "baremodule" => Ts.BAREMODULE "begin" => Ts.BEGIN diff --git a/JuliaSyntax/src/tokens.jl b/JuliaSyntax/src/tokens.jl index 96e07e5064d6e..8d41e9ee40da0 100644 --- a/JuliaSyntax/src/tokens.jl +++ b/JuliaSyntax/src/tokens.jl @@ -43,7 +43,7 @@ kind(raw::TzTokens.Token) = TzTokens.exactkind(raw) # Some renaming for naming consistency is_literal(k) = TzTokens.isliteral(kind(k)) is_keyword(k) = TzTokens.iskeyword(kind(k)) -is_error(k) = TzTokens.iserror(kind(k)) +is_error(k::Kind) = TzTokens.iserror(k) is_contextual_keyword(k) = TzTokens.iscontextualkeyword(kind(k)) is_operator(k) = TzTokens.isoperator(kind(k)) is_word_operator(k) = TzTokens.iswordoperator(kind(k)) From 8ed6689e139d7b85d962d2338457251b08bf64c4 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Thu, 10 Mar 2022 15:31:27 +1000 Subject: [PATCH 0421/1109] Cleanup: put next_byte in token rather than first_byte (JuliaLang/JuliaSyntax.jl#21) While a bit counter-intuitive, this enables us to use an initial sentinel token for recording the first byte of the first real token which removes a bunch of special case hacks for computing the last byte of the current output token. Also return only SyntaxHead from peek_token() - the byte range is never needed. --- JuliaSyntax/src/parse_stream.jl | 126 +++++++++++++++++--------------- JuliaSyntax/src/parser_api.jl | 3 +- 2 files changed, 69 insertions(+), 60 deletions(-) diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index 1013c877f534d..4621270d74610 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -105,24 +105,27 @@ numeric_flags(x) = numeric_flags(flags(x)) #------------------------------------------------------------------------------- """ `SyntaxToken` is a token covering a contiguous byte range in the input text. -Information about preceding whitespace is added for use by the parser. + +We record only the `next_byte` here (the index of the next byte *after* the +token) to avoid duplication of data between neighbouring tokens. This is more +useful than recording the first byte, as it allows an initial fixed sentinel +token to be used for recording the first byte of the first real token. """ struct SyntaxToken head::SyntaxHead orig_kind::Kind - first_byte::UInt32 + next_byte::UInt32 end -function SyntaxToken(head::SyntaxHead, first_byte::Integer) - SyntaxToken(head, kind(head), first_byte) +function SyntaxToken(head::SyntaxHead, next_byte::Integer) + SyntaxToken(head, kind(head), next_byte) end function Base.show(io::IO, tok::SyntaxToken) - print(io, rpad(untokenize(tok.head, unique=false), 15), " @", first_byte(tok)) + print(io, rpad(untokenize(tok.head, unique=false), 15), " |", tok.next_byte) end head(tok::SyntaxToken) = tok.head -first_byte(tok::SyntaxToken) = tok.first_byte #------------------------------------------------------------------------------- @@ -200,13 +203,16 @@ mutable struct ParseStream # numbers. This means we're inexact for old dev versions but that seems # like an acceptable tradeoff. ver = (version.major, version.minor) + # Initial sentinel token containing the first byte of the first real token. + sentinel = SyntaxToken(SyntaxHead(K"TOMBSTONE", EMPTY_FLAGS), + K"TOMBSTONE", next_byte) new(text_buf, text_root, lexer, Vector{SyntaxToken}(), 1, Vector{Vector{ParseStreamPosition}}(), - Vector{SyntaxToken}(), + SyntaxToken[sentinel], Vector{TaggedRange}(), Vector{Diagnostic}(), 0, @@ -282,12 +288,25 @@ function token_is_last(stream, pos) pos.token_index > stream.ranges[pos.range_index].last_token end -# Safely compute the first byte of a token, including the token off the end of -# the stream. +# Compute the first byte of a token at given index `i` function token_first_byte(stream, i) - i == length(stream.tokens) + 1 ? - _next_byte(stream) : - stream.tokens[i].first_byte + stream.tokens[i-1].next_byte +end + +function token_last_byte(stream::ParseStream, i) + stream.tokens[i].next_byte - 1 +end + +function token_span(stream::ParseStream, i) + stream.tokens[i].next_byte - stream.tokens[i-1].next_byte +end + +function lookahead_token_first_byte(stream, i) + i == 1 ? _next_byte(stream) : stream.lookahead[i-1].next_byte +end + +function lookahead_token_last_byte(stream, i) + stream.lookahead[i].next_byte - 1 end #------------------------------------------------------------------------------- @@ -297,7 +316,6 @@ end function _buffer_lookahead_tokens(lexer, lookahead) had_whitespace = false token_count = 0 - done = false while true raw = Tokenize.Lexers.next_token(lexer) k = TzTokens.exactkind(raw) @@ -307,7 +325,7 @@ function _buffer_lookahead_tokens(lexer, lookahead) had_whitespace && (f |= PRECEDING_WHITESPACE_FLAG) raw.dotop && (f |= DOTOP_FLAG) raw.suffix && (f |= SUFFIXED_FLAG) - push!(lookahead, SyntaxToken(SyntaxHead(k, f), raw.startbyte + 1)) + push!(lookahead, SyntaxToken(SyntaxHead(k, f), raw.endbyte + 2)) token_count += 1 if k == K"EndMarker" break @@ -318,14 +336,8 @@ function _buffer_lookahead_tokens(lexer, lookahead) # but not too large to avoid (a) polluting the processor cache and # (b) doing unnecessary work when not parsing the whole input. had_whitespace = false - if done - break - end if token_count > 100 - # Buffer at least one token after the last so we can get the - # current token's last byte based on the next token. (May need - # more than one to correctly apply had_whitespace state.) - done = true + break end end end @@ -333,16 +345,10 @@ end # Return the index of the next byte of the input function _next_byte(stream) - if stream.lookahead_index > length(stream.lookahead) - __lookahead_index(stream, 1, false) # Will buffer more tokens - end - stream.lookahead[stream.lookahead_index].first_byte + last(stream.tokens).next_byte end # Find the index of the next nontrivia token -# -# Postcondition: After returning `i`, the lookahead buffer will buffers tokens -# at least up until stream.lookahead[i+1] @inline function _lookahead_index(stream::ParseStream, n::Integer, skip_newlines::Bool) # Much of the time we'll be peeking ahead a single token and have one or # zero whitespace tokens before the next token. The following code is an @@ -434,7 +440,7 @@ function peek_token(stream::ParseStream, n::Integer=1; if !skip_whitespace i = stream.lookahead_index end - return @inbounds stream.lookahead[i] + return @inbounds head(stream.lookahead[i]) end @@ -459,11 +465,10 @@ function peek_full_token(stream::ParseStream, n::Integer=1; if !skip_whitespace i = stream.lookahead_index end - tok = stream.lookahead[i] + t = stream.lookahead[i] - FullToken(head(tok), - first_byte(tok), - first_byte(stream.lookahead[i+1]) - 1) + FullToken(head(t), lookahead_token_first_byte(stream, i), + lookahead_token_last_byte(stream, i)) end """ @@ -541,7 +546,7 @@ function _bump_until_n(stream::ParseStream, n::Integer, flags, remap_kind=K"None is_trivia && (f |= TRIVIA_FLAG) outk = (is_trivia || remap_kind == K"None") ? k : remap_kind h = SyntaxHead(outk, f) - push!(stream.tokens, SyntaxToken(h, kind(tok), first_byte(tok))) + push!(stream.tokens, SyntaxToken(h, kind(tok), tok.next_byte)) end stream.lookahead_index = n + 1 # Defuse the time bomb @@ -608,7 +613,7 @@ whitespace if necessary with bump_trivia. function bump_glue(stream::ParseStream, kind, flags, num_tokens) i = stream.lookahead_index h = SyntaxHead(kind, flags) - push!(stream.tokens, SyntaxToken(h, stream.lookahead[i].first_byte)) + push!(stream.tokens, SyntaxToken(h, stream.lookahead[i+1].next_byte)) stream.lookahead_index += num_tokens stream.peek_count = 0 return position(stream) @@ -635,11 +640,11 @@ simpler one which only splits preceding dots? function bump_split(stream::ParseStream, split_spec...) tok = stream.lookahead[stream.lookahead_index] stream.lookahead_index += 1 - fbyte = tok.first_byte + b = _next_byte(stream) for (i, (nbyte, k, f)) in enumerate(split_spec) h = SyntaxHead(k, f) - push!(stream.tokens, SyntaxToken(h, kind(tok), fbyte)) - fbyte += nbyte + b = (i == length(split_spec)) ? tok.next_byte : b + nbyte + push!(stream.tokens, SyntaxToken(h, kind(tok), b)) end stream.peek_count = 0 # Returning position(stream) like the other bump* methods would be @@ -665,7 +670,7 @@ function reset_node!(stream::ParseStream, pos::ParseStreamPosition; if token_is_last(stream, pos) t = stream.tokens[pos.token_index] stream.tokens[pos.token_index] = SyntaxToken(_reset_node_head(t, kind, flags), - t.orig_kind, t.first_byte) + t.orig_kind, t.next_byte) else r = stream.ranges[pos.range_index] stream.ranges[pos.range_index] = TaggedRange(_reset_node_head(r, kind, flags), @@ -682,17 +687,17 @@ Hack alert! This is used only for managing the complicated rules related to dedenting triple quoted strings. """ function steal_token_bytes!(stream::ParseStream, pos::ParseStreamPosition, numbytes) - # Token index to modify - i = pos.token_index + 1 - t = stream.tokens[i] - # Compute new token - next_byte = token_first_byte(stream, i + 1) - first_byte = t.first_byte + numbytes - is_empty = first_byte >= next_byte - head2 = is_empty ? SyntaxHead(K"TOMBSTONE", EMPTY_FLAGS) : t.head - stream.tokens[i] = SyntaxToken(head2, t.orig_kind, first_byte) + i = pos.token_index + t1 = stream.tokens[i] + t2 = stream.tokens[i+1] - return is_empty + t1_next_byte = t1.next_byte + numbytes + stream.tokens[i] = SyntaxToken(t1.head, t1.orig_kind, t1_next_byte) + + t2_is_empty = t1_next_byte == t2.next_byte + head2 = t2_is_empty ? SyntaxHead(K"TOMBSTONE", EMPTY_FLAGS) : t2.head + stream.tokens[i+1] = SyntaxToken(head2, t2.orig_kind, t2.next_byte) + return t2_is_empty end function Base.position(stream::ParseStream) @@ -714,7 +719,7 @@ function emit(stream::ParseStream, mark::ParseStreamPosition, kind::Kind, # The first child must be a leaf, otherwise ranges would be improperly # nested. fbyte = token_first_byte(stream, first_token) - lbyte = _next_byte(stream) - 1 + lbyte = token_last_byte(stream, lastindex(stream.tokens)) _emit_diagnostic(stream, fbyte, lbyte, error=error) end push!(stream.ranges, range) @@ -745,8 +750,8 @@ function emit_diagnostic(stream::ParseStream; whitespace=false, kws...) end_tok_i = is_whitespace(stream.lookahead[i]) ? i : max(stream.lookahead_index, i - 1) end - fbyte = first_byte(stream.lookahead[begin_tok_i]) - lbyte = first_byte(stream.lookahead[end_tok_i + 1]) - 1 + fbyte = lookahead_token_first_byte(stream, begin_tok_i) + lbyte = lookahead_token_last_byte(stream, end_tok_i) _emit_diagnostic(stream, fbyte, lbyte; kws...) return nothing end @@ -808,8 +813,7 @@ function build_tree(::Type{NodeType}, stream::ParseStream; i += 1 continue # Ignore removed tokens end - next_byte = token_first_byte(stream, i + 1) - node = NodeType(head(t), next_byte - t.first_byte) + node = NodeType(head(t), token_span(stream, i)) push!(stack, (first_token=i, node=node)) i += 1 end @@ -881,9 +885,15 @@ Return the `Vector{UInt8}` text buffer being parsed by this `ParseStream`. """ textbuf(stream) = stream.textbuf -function first_byte(stream::ParseStream) - isempty(stream.tokens) ? _next_byte(stream) : first_byte(first(stream.tokens)) -end - +first_byte(stream::ParseStream) = first(stream.tokens).next_byte # Use sentinel token last_byte(stream::ParseStream) = _next_byte(stream)-1 any_error(stream::ParseStream) = any_error(stream.diagnostics) + +function Base.empty!(stream::ParseStream) + t = last(stream.tokens) + empty!(stream.tokens) + # Restore sentinel token + push!(stream.tokens, SyntaxToken(SyntaxHead(K"TOMBSTONE",EMPTY_FLAGS), + K"TOMBSTONE", t.next_byte)) + empty!(stream.ranges) +end diff --git a/JuliaSyntax/src/parser_api.jl b/JuliaSyntax/src/parser_api.jl index b2add6151277f..e70c62da43254 100644 --- a/JuliaSyntax/src/parser_api.jl +++ b/JuliaSyntax/src/parser_api.jl @@ -148,8 +148,7 @@ function parseall(::Type{T}, input...; rule=:toplevel, version=VERSION, stream = ParseStream(input...; version=version) if ignore_trivia && rule != :toplevel bump_trivia(stream, skip_newlines=true) - empty!(stream.tokens) - empty!(stream.ranges) + empty!(stream) end parse(stream; rule=rule) if (ignore_trivia && peek(stream, skip_newlines=true) != K"EndMarker") || From 9ecff47363240612352c39e0f93b862807c113ae Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Wed, 18 May 2022 15:43:10 +1000 Subject: [PATCH 0422/1109] Treat UTF-8 BOM as whitespace (JuliaLang/JuliaSyntax.jl#26) Co-authored-by: Sebastian Pfitzner --- JuliaSyntax/Tokenize/src/lexer.jl | 2 +- JuliaSyntax/Tokenize/test/lexer.jl | 13 +++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/JuliaSyntax/Tokenize/src/lexer.jl b/JuliaSyntax/Tokenize/src/lexer.jl index 9e14b9163cbe6..005992834a8a4 100644 --- a/JuliaSyntax/Tokenize/src/lexer.jl +++ b/JuliaSyntax/Tokenize/src/lexer.jl @@ -16,7 +16,7 @@ export tokenize @inline ishex(c::Char) = isdigit(c) || ('a' <= c <= 'f') || ('A' <= c <= 'F') @inline isbinary(c::Char) = c == '0' || c == '1' @inline isoctal(c::Char) = '0' ≤ c ≤ '7' -@inline iswhitespace(c::Char) = Base.isspace(c) +@inline iswhitespace(c::Char) = Base.isspace(c) || c === '\ufeff' struct StringState triplestr::Bool diff --git a/JuliaSyntax/Tokenize/test/lexer.jl b/JuliaSyntax/Tokenize/test/lexer.jl index 3d7e117002c0b..7730ba9c467c4 100644 --- a/JuliaSyntax/Tokenize/test/lexer.jl +++ b/JuliaSyntax/Tokenize/test/lexer.jl @@ -868,3 +868,16 @@ end check_kw_hashes(String([cs...]) for cs in Iterators.product(['a':'z' for _ in 1:len]...)) end end + + +@testset "UTF-8 BOM" begin + @test Tokenize.Tokens.kind.(collect(tokenize("\ufeff[1\ufeff2]"))) == [ + Tokens.WHITESPACE, + Tokens.LSQUARE, + Tokens.INTEGER, + Tokens.WHITESPACE, + Tokens.INTEGER, + Tokens.RSQUARE, + Tokens.ENDMARKER + ] +end From 2d53ff85aedd9c791a6b863ec1f7b43c3e9eed5e Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Thu, 26 May 2022 19:04:00 +1000 Subject: [PATCH 0423/1109] Add note about weird filtering syntax to the readme. --- JuliaSyntax/README.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index a274343625fa9..6faa7fa0472e0 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -550,6 +550,20 @@ parsing `key=val` pairs inside parentheses. `function (xs...) \n body end` parses the argument list as `(... xs)`, whereas `function (x) \n body end` parses the argument list as `(tuple x)`. +* The difference between multidimensional vs flattened iterators is subtle, and + perhaps too syntactically permissive. For example, + - `[(x,y) for x * in 1:10, y in 1:10]` is a multidimensional iterator + - `[(x,y) for x * in 1:10 for y in 1:10]` is a flattened iterator + - `[(x,y) for x in 1:10, y in 1:10 if y < x]` is a flattened iterator + + It's this last case which seems problematic (why not *require* the second + form as a more explicit way to indicate flattening?). It's not even pretty + printed correctly: + ``` + julia> :([(x,y) for x in 1:10, y in 1:10 if y < x]) + :([(x, y) for $(Expr(:filter, :(y < x), :(x = 1:10), :(y = 1:10)))]) + ``` + # Comparisons to other packages ### Official Julia compiler From ba49425a289f76bf7410e6264f5b67c3ea890285 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Fri, 27 May 2022 11:35:43 +1000 Subject: [PATCH 0424/1109] README: Link to P2429 paper for compiler diagnostics review --- JuliaSyntax/README.md | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index 6faa7fa0472e0..905786de76cc0 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -742,10 +742,12 @@ Points of note: ## Diagnostics -Rust is renowned for having great compiler diagnostics, so it's probably a good -place to get inspiration from. +The paper [P2429 - Concepts Error Messages for +Humans](https://wg21.tartanllama.xyz/P2429%20-%20Concepts%20Error%20Messages%20for%20Humans.pdf) +is C++ centric, but has a nice review of quality error reporting in various +compilers including Elm, ReasonML, Flow, D and Rust. -Some resources: +Some Rust-specific resources: * [rustc_errors::Diagnostic](https://doc.rust-lang.org/stable/nightly-rustc/rustc_errors/struct.Diagnostic.html) * The source of the Rust compiler's diagnostics system: - The [`println!` macro](https://github.com/rust-lang/rust/blob/0b6f079e4987ded15c13a15b734e7cfb8176839f/compiler/rustc_builtin_macros/src/format.rs) From 258cdb52ce6f52b5710f9aaed5dbb49592367b73 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Fri, 27 May 2022 17:57:19 +1000 Subject: [PATCH 0425/1109] Add note about `outer` in comprehensions --- JuliaSyntax/README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index 905786de76cc0..87b8e5ed81cb2 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -453,6 +453,8 @@ useful, even for DSLs: * `abstract type A < B end` and other subtype comparisons are allowed, but only `A <: B` makes sense. * `x where {S T}` produces `(where x (bracescat (row S T)))`. This seems pretty weird! +* `[x for outer x in xs]` parses, but `outer` makes no real sense in this + context (and using this form is a lowering error) ### `kw` and `=` inconsistencies From 7e6fa76d05751764d14fae1625348cb3c76d35ed Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Thu, 16 Jun 2022 15:52:18 +1000 Subject: [PATCH 0426/1109] Move Expr-related code to expr.jl --- JuliaSyntax/src/JuliaSyntax.jl | 1 + JuliaSyntax/src/expr.jl | 180 ++++++++++++++++++++++++++++++ JuliaSyntax/src/syntax_tree.jl | 196 +-------------------------------- 3 files changed, 187 insertions(+), 190 deletions(-) create mode 100644 JuliaSyntax/src/expr.jl diff --git a/JuliaSyntax/src/JuliaSyntax.jl b/JuliaSyntax/src/JuliaSyntax.jl index a0851d873d434..a9407be8f3014 100644 --- a/JuliaSyntax/src/JuliaSyntax.jl +++ b/JuliaSyntax/src/JuliaSyntax.jl @@ -29,6 +29,7 @@ include("value_parsing.jl") # Tree data structures include("green_tree.jl") include("syntax_tree.jl") +include("expr.jl") # Hooks to integrate the parser with Base include("hooks.jl") diff --git a/JuliaSyntax/src/expr.jl b/JuliaSyntax/src/expr.jl new file mode 100644 index 0000000000000..33688c2832bc9 --- /dev/null +++ b/JuliaSyntax/src/expr.jl @@ -0,0 +1,180 @@ +#------------------------------------------------------------------------------- +# Conversion to Base.Expr + +function is_eventually_call(ex) + return Meta.isexpr(ex, :call) || (Meta.isexpr(ex, (:where, :(::))) && + is_eventually_call(ex.args[1])) +end + +function _to_expr(node::SyntaxNode, iteration_spec=false) + if !haschildren(node) + if node.val isa Union{Int128,UInt128,BigInt} + # Ignore the values of large integers and convert them back to + # symbolic/textural form for compatibility with the Expr + # representation of these. + str = replace(sourcetext(node), '_'=>"") + headsym = :macrocall + k = kind(node) + macname = node.val isa Int128 ? Symbol("@int128_str") : + node.val isa UInt128 ? Symbol("@uint128_str") : + Symbol("@big_str") + return Expr(:macrocall, GlobalRef(Core, macname), nothing, str) + else + return node.val + end + end + headstr = untokenize(head(node), include_flag_suff=false) + headsym = !isnothing(headstr) ? Symbol(headstr) : + error("Can't untokenize head of kind $(kind(node))") + node_args = children(node) + args = Vector{Any}(undef, length(node_args)) + if headsym == :for && length(node_args) == 2 + args[1] = _to_expr(node_args[1], true) + args[2] = _to_expr(node_args[2], false) + else + map!(_to_expr, args, node_args) + end + # Julia's standard `Expr` ASTs have children stored in a canonical + # order which is often not always source order. We permute the children + # here as necessary to get the canonical order. + if is_infix(node.raw) + args[2], args[1] = args[1], args[2] + end + loc = source_location(LineNumberNode, node.source, node.position) + # Convert elements + if headsym == :macrocall + insert!(args, 2, loc) + elseif headsym in (:call, :ref) + # Move parameters block to args[2] + if length(args) > 1 && Meta.isexpr(args[end], :parameters) + insert!(args, 2, args[end]) + pop!(args) + end + elseif headsym in (:tuple, :parameters, :vect) + # Move parameters blocks to args[1] + if length(args) > 1 && Meta.isexpr(args[end], :parameters) + pushfirst!(args, args[end]) + pop!(args) + end + elseif headsym == :try + # Try children in source order: + # try_block catch_var catch_block else_block finally_block + # Expr ordering: + # try_block catch_var catch_block [finally_block] [else_block] + catch_ = nothing + if has_flags(node, TRY_CATCH_AFTER_FINALLY_FLAG) + catch_ = pop!(args) + catch_var = pop!(args) + end + finally_ = pop!(args) + else_ = pop!(args) + if has_flags(node, TRY_CATCH_AFTER_FINALLY_FLAG) + pop!(args) + pop!(args) + push!(args, catch_var) + push!(args, catch_) + end + # At this point args is + # [try_block catch_var catch_block] + if finally_ !== false + push!(args, finally_) + end + if else_ !== false + push!(args, else_) + end + elseif headsym == :filter + pushfirst!(args, last(args)) + pop!(args) + elseif headsym == :flatten + # The order of nodes inside the generators in Julia's flatten AST + # is noncontiguous in the source text, so need to reconstruct + # Julia's AST here from our alternative `flatten` expression. + gen = Expr(:generator, args[1], args[end]) + for i in length(args)-1:-1:2 + gen = Expr(:flatten, Expr(:generator, gen, args[i])) + end + return gen + elseif headsym in (:nrow, :ncat) + # For lack of a better place, the dimension argument to nrow/ncat + # is stored in the flags + pushfirst!(args, numeric_flags(flags(node))) + elseif headsym == :typed_ncat + insert!(args, 2, numeric_flags(flags(node))) + elseif headsym == :string && length(args) > 1 + # Julia string literals may be interspersed with trivia in two situations: + # 1. Triple quoted string indentation is trivia + # 2. An \ before newline removes the newline and any following indentation + # + # Such trivia is eagerly removed by the reference parser, so here we + # concatenate adjacent string chunks together for compatibility. + # + # TODO: Manage the non-interpolation cases with String and CmdString + # kinds instead? + args2 = Vector{Any}() + i = 1 + while i <= length(args) + if args[i] isa String && i < length(args) && args[i+1] isa String + buf = IOBuffer() + while i <= length(args) && args[i] isa String + write(buf, args[i]) + i += 1 + end + push!(args2, String(take!(buf))) + else + push!(args2, args[i]) + i += 1 + end + end + args = args2 + if length(args2) == 1 && args2[1] isa String + # If there's a single string remaining after joining we unwrap to + # give a string literal. + # """\n a\n b""" ==> "a\nb" + return args2[1] + end + # elseif headsym == :string && length(args) == 1 && version <= (1,5) + # Strip string from interpolations in 1.5 and lower to preserve + # "hi$("ho")" ==> (string "hi" "ho") + elseif headsym == :(=) + if is_eventually_call(args[1]) && !iteration_spec + if Meta.isexpr(args[2], :block) + pushfirst!(args[2].args, loc) + else + # Add block for short form function locations + args[2] = Expr(:block, loc, args[2]) + end + end + elseif headsym == :(->) + if Meta.isexpr(args[2], :block) + pushfirst!(args[2].args, loc) + else + # Add block for source locations + args[2] = Expr(:block, loc, args[2]) + end + elseif headsym == :function + if length(args) > 1 && Meta.isexpr(args[1], :tuple) + # Convert to weird Expr forms for long-form anonymous functions. + # + # (function (tuple (... xs)) body) ==> (function (... xs) body) + if length(args[1].args) == 1 && Meta.isexpr(args[1].args[1], :...) + # function (xs...) \n body end + args[1] = args[1].args[1] + end + end + end + if headsym == :inert || (headsym == :quote && length(args) == 1 && + !(a1 = only(args); a1 isa Expr || a1 isa QuoteNode || + a1 isa Bool # <- compat hack, Julia 1.4+ + )) + return QuoteNode(only(args)) + else + return Expr(headsym, args...) + end +end + +Base.Expr(node::SyntaxNode) = _to_expr(node) + +function build_tree(::Type{Expr}, stream::ParseStream; kws...) + Expr(build_tree(SyntaxNode, stream; kws...)) +end + diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index 14446ba8bb2d1..ff8221e891252 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -181,6 +181,12 @@ function Base.push!(node::SyntaxNode, child::SyntaxNode) push!(args, child) end +function build_tree(::Type{SyntaxNode}, stream::ParseStream; filename=nothing, kws...) + green_tree = build_tree(GreenNode, stream; kws...) + source = SourceFile(sourcetext(stream), filename=filename) + SyntaxNode(source, green_tree, first_byte(stream)) +end + #------------------------------------------------------------------------------- # Tree utilities @@ -251,193 +257,3 @@ function highlight(code::String, node, path::Int...; color=(40,40,70)) print(stdout, code[q:end]) end - -#------------------------------------------------------------------------------- -# Conversion to Base.Expr - -function is_eventually_call(ex) - return Meta.isexpr(ex, :call) || (Meta.isexpr(ex, (:where, :(::))) && - is_eventually_call(ex.args[1])) -end - -function _to_expr(node::SyntaxNode, iteration_spec=false) - if !haschildren(node) - if node.val isa Union{Int128,UInt128,BigInt} - # Ignore the values of large integers and convert them back to - # symbolic/textural form for compatibility with the Expr - # representation of these. - str = replace(sourcetext(node), '_'=>"") - headsym = :macrocall - k = kind(node) - macname = node.val isa Int128 ? Symbol("@int128_str") : - node.val isa UInt128 ? Symbol("@uint128_str") : - Symbol("@big_str") - return Expr(:macrocall, GlobalRef(Core, macname), nothing, str) - else - return node.val - end - end - headstr = untokenize(head(node), include_flag_suff=false) - headsym = !isnothing(headstr) ? Symbol(headstr) : - error("Can't untokenize head of kind $(kind(node))") - node_args = children(node) - args = Vector{Any}(undef, length(node_args)) - if headsym == :for && length(node_args) == 2 - args[1] = _to_expr(node_args[1], true) - args[2] = _to_expr(node_args[2], false) - else - map!(_to_expr, args, node_args) - end - # Julia's standard `Expr` ASTs have children stored in a canonical - # order which is often not always source order. We permute the children - # here as necessary to get the canonical order. - if is_infix(node.raw) - args[2], args[1] = args[1], args[2] - end - loc = source_location(LineNumberNode, node.source, node.position) - # Convert elements - if headsym == :macrocall - insert!(args, 2, loc) - elseif headsym in (:call, :ref) - # Move parameters block to args[2] - if length(args) > 1 && Meta.isexpr(args[end], :parameters) - insert!(args, 2, args[end]) - pop!(args) - end - elseif headsym in (:tuple, :parameters, :vect) - # Move parameters blocks to args[1] - if length(args) > 1 && Meta.isexpr(args[end], :parameters) - pushfirst!(args, args[end]) - pop!(args) - end - elseif headsym == :try - # Try children in source order: - # try_block catch_var catch_block else_block finally_block - # Expr ordering: - # try_block catch_var catch_block [finally_block] [else_block] - catch_ = nothing - if has_flags(node, TRY_CATCH_AFTER_FINALLY_FLAG) - catch_ = pop!(args) - catch_var = pop!(args) - end - finally_ = pop!(args) - else_ = pop!(args) - if has_flags(node, TRY_CATCH_AFTER_FINALLY_FLAG) - pop!(args) - pop!(args) - push!(args, catch_var) - push!(args, catch_) - end - # At this point args is - # [try_block catch_var catch_block] - if finally_ !== false - push!(args, finally_) - end - if else_ !== false - push!(args, else_) - end - elseif headsym == :filter - pushfirst!(args, last(args)) - pop!(args) - elseif headsym == :flatten - # The order of nodes inside the generators in Julia's flatten AST - # is noncontiguous in the source text, so need to reconstruct - # Julia's AST here from our alternative `flatten` expression. - gen = Expr(:generator, args[1], args[end]) - for i in length(args)-1:-1:2 - gen = Expr(:flatten, Expr(:generator, gen, args[i])) - end - return gen - elseif headsym in (:nrow, :ncat) - # For lack of a better place, the dimension argument to nrow/ncat - # is stored in the flags - pushfirst!(args, numeric_flags(flags(node))) - elseif headsym == :typed_ncat - insert!(args, 2, numeric_flags(flags(node))) - elseif headsym == :string && length(args) > 1 - # Julia string literals may be interspersed with trivia in two situations: - # 1. Triple quoted string indentation is trivia - # 2. An \ before newline removes the newline and any following indentation - # - # Such trivia is eagerly removed by the reference parser, so here we - # concatenate adjacent string chunks together for compatibility. - # - # TODO: Manage the non-interpolation cases with String and CmdString - # kinds instead? - args2 = Vector{Any}() - i = 1 - while i <= length(args) - if args[i] isa String && i < length(args) && args[i+1] isa String - buf = IOBuffer() - while i <= length(args) && args[i] isa String - write(buf, args[i]) - i += 1 - end - push!(args2, String(take!(buf))) - else - push!(args2, args[i]) - i += 1 - end - end - args = args2 - if length(args2) == 1 && args2[1] isa String - # If there's a single string remaining after joining we unwrap to - # give a string literal. - # """\n a\n b""" ==> "a\nb" - return args2[1] - end - # elseif headsym == :string && length(args) == 1 && version <= (1,5) - # Strip string from interpolations in 1.5 and lower to preserve - # "hi$("ho")" ==> (string "hi" "ho") - elseif headsym == :(=) - if is_eventually_call(args[1]) && !iteration_spec - if Meta.isexpr(args[2], :block) - pushfirst!(args[2].args, loc) - else - # Add block for short form function locations - args[2] = Expr(:block, loc, args[2]) - end - end - elseif headsym == :(->) - if Meta.isexpr(args[2], :block) - pushfirst!(args[2].args, loc) - else - # Add block for source locations - args[2] = Expr(:block, loc, args[2]) - end - elseif headsym == :function - if length(args) > 1 && Meta.isexpr(args[1], :tuple) - # Convert to weird Expr forms for long-form anonymous functions. - # - # (function (tuple (... xs)) body) ==> (function (... xs) body) - if length(args[1].args) == 1 && Meta.isexpr(args[1].args[1], :...) - # function (xs...) \n body end - args[1] = args[1].args[1] - end - end - end - if headsym == :inert || (headsym == :quote && length(args) == 1 && - !(a1 = only(args); a1 isa Expr || a1 isa QuoteNode || - a1 isa Bool # <- compat hack, Julia 1.4+ - )) - return QuoteNode(only(args)) - else - return Expr(headsym, args...) - end -end - -Base.Expr(node::SyntaxNode) = _to_expr(node) - - -#------------------------------------------------------------------------------- - -function build_tree(::Type{SyntaxNode}, stream::ParseStream; filename=nothing, kws...) - green_tree = build_tree(GreenNode, stream; kws...) - source = SourceFile(sourcetext(stream), filename=filename) - SyntaxNode(source, green_tree, first_byte(stream)) -end - -function build_tree(::Type{Expr}, stream::ParseStream; kws...) - Expr(build_tree(SyntaxNode, stream; kws...)) -end - From 88917942634ef9f3889c0b114b7bf47cdab44003 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Fri, 17 Jun 2022 15:50:29 +1000 Subject: [PATCH 0427/1109] Fix test code: find Base source code more robustly It seems that the path to base has changed in the Julia 1.9 nightly images installed by the julia-setup@v1 action. So search in that location for the Base source as well. --- JuliaSyntax/test/parse_packages.jl | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/JuliaSyntax/test/parse_packages.jl b/JuliaSyntax/test/parse_packages.jl index 52338b7577175..1603af1392618 100644 --- a/JuliaSyntax/test/parse_packages.jl +++ b/JuliaSyntax/test/parse_packages.jl @@ -6,7 +6,17 @@ test_parse_all_in_path(joinpath(pkgdir, "test")) end -base_path = joinpath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "base") +base_path = let + p = joinpath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "base") + if !isdir(p) + # For julia 1.9 images. + p = joinpath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "src", "base") + if !isdir(p) + error("source for Julia base not found") + end + end + p +end @testset "Parse Base at $base_path" begin test_parse_all_in_path(base_path) end From 4ae75e1616a3fca0ed2aeff944723a261d087faf Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Thu, 16 Jun 2022 15:52:53 +1000 Subject: [PATCH 0428/1109] Move disable_in_core!() -> enable_in_core!(false) This reduces the number of functions by one. --- JuliaSyntax/src/hooks.jl | 18 +++++++----------- JuliaSyntax/test/hooks.jl | 2 +- 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/JuliaSyntax/src/hooks.jl b/JuliaSyntax/src/hooks.jl index 505d2b5fa961d..ad57cdc326dca 100644 --- a/JuliaSyntax/src/hooks.jl +++ b/JuliaSyntax/src/hooks.jl @@ -66,17 +66,13 @@ flisp parser for all parsing work. That is, JuliaSyntax will be used for `include()` `Meta.parse()`, the REPL, etc. """ -function enable_in_core!() - # TODO: Use invoke_in_world to freeze the world age at the time this was enabled. - Base.eval(Core, :(_parse = $core_parser_hook)) - nothing -end - -""" -Revert to the flisp parser for all parsing work. -""" -function disable_in_core!() - Base.eval(Core, :(_parse = Core.Compiler.fl_parse)) +function enable_in_core!(enable=true) + if enable + # TODO: Use invoke_in_world to freeze the world age at the time this was enabled. + Base.eval(Core, :(_parse = $core_parser_hook)) + else + Base.eval(Core, :(_parse = Core.Compiler.fl_parse)) + end nothing end diff --git a/JuliaSyntax/test/hooks.jl b/JuliaSyntax/test/hooks.jl index fc2f30a42a629..542cd65c9c9d8 100644 --- a/JuliaSyntax/test/hooks.jl +++ b/JuliaSyntax/test/hooks.jl @@ -12,5 +12,5 @@ # Meta.ParseError when Core integration is enabled. @test_throws JuliaSyntax.ParseError Meta.parse("[x") - JuliaSyntax.disable_in_core!() + JuliaSyntax.enable_in_core!(false) end From fee072531df1aa3aab133c12b16e204a1077030e Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Fri, 17 Jun 2022 14:59:20 +1000 Subject: [PATCH 0429/1109] Work around sysimage compilation bug with unicode normalization Using the code from the Unicode stdlib directly, it seems there's something funky going on with @ cfunction, generic dispatch and sysimage generation. https://github.com/JuliaLang/julia/issues/45716 Here I hard code the charmap to avoid this! --- JuliaSyntax/src/value_parsing.jl | 38 +++++++++++++++----------------- 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/JuliaSyntax/src/value_parsing.jl b/JuliaSyntax/src/value_parsing.jl index 6b94cbb806e41..1d91693591be8 100644 --- a/JuliaSyntax/src/value_parsing.jl +++ b/JuliaSyntax/src/value_parsing.jl @@ -210,41 +210,39 @@ end # stdlib under the name `Unicode.julia_chartransform`. See # https://github.com/JuliaLang/julia/pull/42561 # -# To allow use on older Julia versions, we reproduce that logic here. +# To allow use on older Julia versions and to workaround the bug +# https://github.com/JuliaLang/julia/issues/45716 +# we reproduce a specialized version of that logic here. # static wrapper around user callback function -utf8proc_custom_func(codepoint::UInt32, callback::Any) = - UInt32(callback(codepoint))::UInt32 +function utf8proc_custom_func(codepoint::UInt32, ::Ptr{Cvoid})::UInt32 + (codepoint == 0x025B ? 0x03B5 : + codepoint == 0x00B5 ? 0x03BC : + codepoint == 0x00B7 ? 0x22C5 : + codepoint == 0x0387 ? 0x22C5 : + codepoint == 0x2212 ? 0x002D : + codepoint) +end -function utf8proc_decompose(str, options, buffer, nwords, chartransform::T) where T - ret = ccall(:utf8proc_decompose_custom, Int, (Ptr{UInt8}, Int, Ptr{UInt8}, Int, Cint, Ptr{Cvoid}, Ref{T}), +function utf8proc_decompose(str, options, buffer, nwords) + ret = ccall(:utf8proc_decompose_custom, Int, (Ptr{UInt8}, Int, Ptr{UInt8}, Int, Cint, Ptr{Cvoid}, Ptr{Cvoid}), str, sizeof(str), buffer, nwords, options, - @cfunction(utf8proc_custom_func, UInt32, (UInt32, Ref{T})), chartransform) + @cfunction(utf8proc_custom_func, UInt32, (UInt32, Ptr{Cvoid})), C_NULL) ret < 0 && utf8proc_error(ret) return ret end -function utf8proc_map(str::Union{String,SubString{String}}, options::Integer, chartransform=identity) - nwords = utf8proc_decompose(str, options, C_NULL, 0, chartransform) +function utf8proc_map(str::Union{String,SubString{String}}, options::Integer) + nwords = utf8proc_decompose(str, options, C_NULL, 0) buffer = Base.StringVector(nwords*4) - nwords = utf8proc_decompose(str, options, buffer, nwords, chartransform) + nwords = utf8proc_decompose(str, options, buffer, nwords) nbytes = ccall(:utf8proc_reencode, Int, (Ptr{UInt8}, Int, Cint), buffer, nwords, options) nbytes < 0 && utf8proc_error(nbytes) return String(resize!(buffer, nbytes)) end -const _julia_charmap = Dict{UInt32,UInt32}( - 0x025B => 0x03B5, - 0x00B5 => 0x03BC, - 0x00B7 => 0x22C5, - 0x0387 => 0x22C5, - 0x2212 => 0x002D, -) - -julia_chartransform(codepoint::UInt32) = get(_julia_charmap, codepoint, codepoint) - function normalize_identifier(str) flags = Base.Unicode.UTF8PROC_STABLE | Base.Unicode.UTF8PROC_COMPOSE - utf8proc_map(str, flags, julia_chartransform) + utf8proc_map(str, flags) end From 62b4802c2d61017e7b12336ea8699ee2843ade79 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Mon, 20 Jun 2022 21:50:58 +1000 Subject: [PATCH 0430/1109] Utilities for compiling a JuliaSyntax sysimage --- JuliaSyntax/README.md | 16 +++++++++ JuliaSyntax/src/hooks.jl | 8 ++--- .../sysimage/JuliaSyntaxCore/Project.toml | 10 ++++++ .../JuliaSyntaxCore/src/JuliaSyntaxCore.jl | 26 ++++++++++++++ JuliaSyntax/sysimage/compile.jl | 34 +++++++++++++++++++ JuliaSyntax/sysimage/precompile_exec.jl | 5 +++ 6 files changed, 93 insertions(+), 6 deletions(-) create mode 100644 JuliaSyntax/sysimage/JuliaSyntaxCore/Project.toml create mode 100644 JuliaSyntax/sysimage/JuliaSyntaxCore/src/JuliaSyntaxCore.jl create mode 100755 JuliaSyntax/sysimage/compile.jl create mode 100644 JuliaSyntax/sysimage/precompile_exec.jl diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index 87b8e5ed81cb2..e83375f993fc7 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -103,6 +103,22 @@ julia> parseall(Expr, "(x + y)*z") :($(Expr(:toplevel, :((x + y) * z)))) ``` +# Using JuliaSyntax as the default parser + +To use JuliaSyntax as the default Julia parser to `include()` files, +parse code with `Meta.parse()`, etc, call + +``` +julia> JuliaSyntax.enable_in_core!() +``` + +This causes some startup latency, so to reduce that you can create a custom +system image by running the code in `./sysimage/compile.jl` as a Julia script +(or directly using the shell, on unix). Then use `julia -J $resulting_sysimage`. + +Using a custom sysimage has the advantage that package precompilation will also +go through the JuliaSyntax parser. + # Parser implementation Our goal is to losslessly represent the source text with a tree; this may be diff --git a/JuliaSyntax/src/hooks.jl b/JuliaSyntax/src/hooks.jl index ad57cdc326dca..3c6e940a022b2 100644 --- a/JuliaSyntax/src/hooks.jl +++ b/JuliaSyntax/src/hooks.jl @@ -67,12 +67,8 @@ flisp parser for all parsing work. That is, JuliaSyntax will be used for `include()` `Meta.parse()`, the REPL, etc. """ function enable_in_core!(enable=true) - if enable - # TODO: Use invoke_in_world to freeze the world age at the time this was enabled. - Base.eval(Core, :(_parse = $core_parser_hook)) - else - Base.eval(Core, :(_parse = Core.Compiler.fl_parse)) - end + parser = enable ? core_parser_hook : Core.Compiler.fl_parse + Base.eval(Core, :(_parse = $parser)) nothing end diff --git a/JuliaSyntax/sysimage/JuliaSyntaxCore/Project.toml b/JuliaSyntax/sysimage/JuliaSyntaxCore/Project.toml new file mode 100644 index 0000000000000..5b969f2202591 --- /dev/null +++ b/JuliaSyntax/sysimage/JuliaSyntaxCore/Project.toml @@ -0,0 +1,10 @@ +name = "JuliaSyntaxCore" +uuid = "05e5f68f-ccd0-4d84-a81a-f557a333a331" +authors = ["Chris Foster and contributors"] +version = "0.1.0" + +[compat] +julia = "1.6" + +[deps] +JuliaSyntax = "70703baa-626e-46a2-a12c-08ffd08c73b4" diff --git a/JuliaSyntax/sysimage/JuliaSyntaxCore/src/JuliaSyntaxCore.jl b/JuliaSyntax/sysimage/JuliaSyntaxCore/src/JuliaSyntaxCore.jl new file mode 100644 index 0000000000000..3188fd8a96f38 --- /dev/null +++ b/JuliaSyntax/sysimage/JuliaSyntaxCore/src/JuliaSyntaxCore.jl @@ -0,0 +1,26 @@ +module JuliaSyntaxCore + +# A tiny module to hold initialization code for JuliaSyntax.jl integration with +# the runtime. + +using JuliaSyntax + +import Base: JLOptions + +function __init__() + # HACK! Fool the runtime into allowing us to set Core._parse, even during + # incremental compilation. (Ideally we'd just arrange for Core._parse to be + # set to the JuliaSyntax parser. But how do we signal that to the dumping + # code outside of the initial creation of Core?) + i = findfirst(==(:incremental), fieldnames(JLOptions)) + ptr = convert(Ptr{fieldtype(JLOptions, i)}, + cglobal(:jl_options, JLOptions) + fieldoffset(JLOptions, i)) + incremental = unsafe_load(ptr) + incremental == 0 || unsafe_store!(ptr, 0) + + JuliaSyntax.enable_in_core!() + + incremental == 0 || unsafe_store!(ptr, incremental) +end + +end diff --git a/JuliaSyntax/sysimage/compile.jl b/JuliaSyntax/sysimage/compile.jl new file mode 100755 index 0000000000000..ea950555bebfe --- /dev/null +++ b/JuliaSyntax/sysimage/compile.jl @@ -0,0 +1,34 @@ +#!/bin/bash +#= +[[ $1 == +* ]] && juliaup_arg=$1 && shift # release channel for juliaup +exec julia ${juliaup_arg} --startup-file=no -e 'include(popfirst!(ARGS))' "$0" "$@" +=# + +imgs_base_path = joinpath(first(DEPOT_PATH), "sysimages", "v$VERSION") +mkpath(imgs_base_path) + +using Libdl + +cd(@__DIR__) + +using Pkg +Pkg.activate(".") +Pkg.develop("JuliaSyntax") +Pkg.develop(path="./JuliaSyntaxCore") + +image_path = joinpath(imgs_base_path, "juliasyntax_sysimage."*Libdl.dlext) + +using PackageCompiler +PackageCompiler.create_sysimage( + ["JuliaSyntaxCore"], + project=".", + sysimage_path=image_path, + precompile_execution_file="precompile_exec.jl", + incremental=true, +) + +@info """## System image compiled! + + Use it with `julia -J "$image_path"` + """ + diff --git a/JuliaSyntax/sysimage/precompile_exec.jl b/JuliaSyntax/sysimage/precompile_exec.jl new file mode 100644 index 0000000000000..74f7bd7d1145d --- /dev/null +++ b/JuliaSyntax/sysimage/precompile_exec.jl @@ -0,0 +1,5 @@ +import JuliaSyntax +Base.include(@__MODULE__(), joinpath(pkgdir(JuliaSyntax), "test", "test_utils.jl")) +Base.include(@__MODULE__(), joinpath(pkgdir(JuliaSyntax), "test", "parser.jl")) +JuliaSyntax.enable_in_core!() +@info "Some parsing" Meta.parse("x+y+z-w .+ [a b c]") From 19173869aadbf3a8b61a47ab32cddec1a8b3e27a Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Tue, 21 Jun 2022 08:18:07 +1000 Subject: [PATCH 0431/1109] Add debug logging to core hooks A debug log can help to understand what's gone wrong in certain cases, such as when a separate Julia processes is used during precompilation. --- JuliaSyntax/src/hooks.jl | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/JuliaSyntax/src/hooks.jl b/JuliaSyntax/src/hooks.jl index 3c6e940a022b2..29c22abc50832 100644 --- a/JuliaSyntax/src/hooks.jl +++ b/JuliaSyntax/src/hooks.jl @@ -1,3 +1,5 @@ +_debug_log = nothing + # Adaptor for the API/ABI expected by the Julia runtime code. function core_parser_hook(code, filename, lineno, offset, options) try @@ -8,6 +10,15 @@ function core_parser_hook(code, filename, lineno, offset, options) (ptr,len) = code code = String(unsafe_wrap(Array, ptr, len)) end + if !isnothing(_debug_log) + print(_debug_log, """ + #-#-#------------------------------- + # ENTER filename=$filename, lineno=$lineno, offset=$offset, options=$options" + #-#-#------------------------------- + """) + write(_debug_log, code) + end + io = IOBuffer(code) seek(io, offset) @@ -37,6 +48,14 @@ function core_parser_hook(code, filename, lineno, offset, options) # of one cancel here. last_offset = last_byte(stream) + if !isnothing(_debug_log) + println(_debug_log, """ + #-#-#- + # EXIT last_offset=$last_offset + #-#-#- + """) + end + # Rewrap result in an svec for use by the C code return Core.svec(ex, last_offset) catch exc @@ -67,6 +86,14 @@ flisp parser for all parsing work. That is, JuliaSyntax will be used for `include()` `Meta.parse()`, the REPL, etc. """ function enable_in_core!(enable=true) + debug_filename = get(ENV, "JULIA_SYNTAX_DEBUG_FILE", nothing) + global _debug_log + if enable && !isnothing(debug_filename) + _debug_log = open(debug_filename, "w") + elseif !enable && !isnothing(_debug_log) + close(_debug_log) + _debug_log = nothing + end parser = enable ? core_parser_hook : Core.Compiler.fl_parse Base.eval(Core, :(_parse = $parser)) nothing From 8602699de57aee1c22ad49c6b67714001d11911f Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Tue, 21 Jun 2022 08:19:01 +1000 Subject: [PATCH 0432/1109] Ignore trailing trivia in Core parser hook For compatibility with the flisp parser, we must consume any trailing whitespace at the end of a string. Otherwise Meta.parse() will fail unnecessarily when parsing a single statement. --- JuliaSyntax/src/hooks.jl | 7 +++++-- JuliaSyntax/test/hooks.jl | 8 +++++--- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/JuliaSyntax/src/hooks.jl b/JuliaSyntax/src/hooks.jl index 29c22abc50832..4e462e746d7f0 100644 --- a/JuliaSyntax/src/hooks.jl +++ b/JuliaSyntax/src/hooks.jl @@ -25,11 +25,14 @@ function core_parser_hook(code, filename, lineno, offset, options) stream = ParseStream(io) rule = options === :all ? :toplevel : options if rule !== :toplevel - # To copy the flisp parser driver, we ignore leading trivia when - # parsing statements or atoms + # To copy the flisp parser driver, we ignore leading and trailing + # trivia when parsing statements or atoms bump_trivia(stream) end JuliaSyntax.parse(stream; rule=rule) + if rule !== :toplevel + bump_trivia(stream) + end if any_error(stream) e = Expr(:error, ParseError(SourceFile(code), stream.diagnostics)) diff --git a/JuliaSyntax/test/hooks.jl b/JuliaSyntax/test/hooks.jl index 542cd65c9c9d8..5cdf6143259d8 100644 --- a/JuliaSyntax/test/hooks.jl +++ b/JuliaSyntax/test/hooks.jl @@ -4,9 +4,11 @@ @test Meta.parse("x + 1") == :(x + 1) @test Meta.parse("x + 1", 1) == (:(x + 1), 6) - # Test that parsing statements incrementally works - @test Meta.parse("x + 1\n(y)", 1) == (:(x + 1), 6) - @test Meta.parse("x + 1\n(y)", 6) == (:y, 10) + # Test that parsing statements incrementally works and stops after + # whitespace / comment trivia + @test Meta.parse("x + 1\n(y)\n", 1) == (:(x + 1), 7) + @test Meta.parse("x + 1\n(y)\n", 7) == (:y, 11) + @test Meta.parse(" x#==#", 1) == (:x, 7) # Check that Meta.parse throws the JuliaSyntax.ParseError rather than # Meta.ParseError when Core integration is enabled. From 79a1862fa79238258d33b8faa832d80b0c1e33ec Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Tue, 21 Jun 2022 16:21:02 +1000 Subject: [PATCH 0433/1109] Pass file name in Core hook for Expr creation --- JuliaSyntax/src/hooks.jl | 4 ++-- JuliaSyntax/src/source_files.jl | 2 +- JuliaSyntax/test/hooks.jl | 32 ++++++++++++++++++++------------ 3 files changed, 23 insertions(+), 15 deletions(-) diff --git a/JuliaSyntax/src/hooks.jl b/JuliaSyntax/src/hooks.jl index 4e462e746d7f0..6d91206f89de4 100644 --- a/JuliaSyntax/src/hooks.jl +++ b/JuliaSyntax/src/hooks.jl @@ -35,10 +35,10 @@ function core_parser_hook(code, filename, lineno, offset, options) end if any_error(stream) - e = Expr(:error, ParseError(SourceFile(code), stream.diagnostics)) + e = Expr(:error, ParseError(SourceFile(code, filename=filename), stream.diagnostics)) ex = options === :all ? Expr(:toplevel, e) : e else - ex = build_tree(Expr, stream, wrap_toplevel_as_kind=K"None") + ex = build_tree(Expr, stream, filename=filename, wrap_toplevel_as_kind=K"None") if Meta.isexpr(ex, :None) # The None wrapping is only to give somewhere for trivia to be # attached; unwrap! diff --git a/JuliaSyntax/src/source_files.jl b/JuliaSyntax/src/source_files.jl index adb44b4dfb357..fa95794afb2e7 100644 --- a/JuliaSyntax/src/source_files.jl +++ b/JuliaSyntax/src/source_files.jl @@ -1,5 +1,5 @@ """ - SourceFile(code [, filename]) + SourceFile(code [; filename=nothing]) A UTF-8 source code string with associated file name and indexing structures. """ diff --git a/JuliaSyntax/test/hooks.jl b/JuliaSyntax/test/hooks.jl index 5cdf6143259d8..6269b71a8721c 100644 --- a/JuliaSyntax/test/hooks.jl +++ b/JuliaSyntax/test/hooks.jl @@ -1,18 +1,26 @@ @testset "Hooks for Core integration" begin - JuliaSyntax.enable_in_core!() + @testset "filename is used" begin + ex = JuliaSyntax.core_parser_hook("@a", "somefile", 0, :statement)[1] + @test Meta.isexpr(ex, :macrocall) + @test ex.args[2] == LineNumberNode(1, "somefile") + end - @test Meta.parse("x + 1") == :(x + 1) - @test Meta.parse("x + 1", 1) == (:(x + 1), 6) + @testset "enable_in_core!" begin + JuliaSyntax.enable_in_core!() - # Test that parsing statements incrementally works and stops after - # whitespace / comment trivia - @test Meta.parse("x + 1\n(y)\n", 1) == (:(x + 1), 7) - @test Meta.parse("x + 1\n(y)\n", 7) == (:y, 11) - @test Meta.parse(" x#==#", 1) == (:x, 7) + @test Meta.parse("x + 1") == :(x + 1) + @test Meta.parse("x + 1", 1) == (:(x + 1), 6) - # Check that Meta.parse throws the JuliaSyntax.ParseError rather than - # Meta.ParseError when Core integration is enabled. - @test_throws JuliaSyntax.ParseError Meta.parse("[x") + # Test that parsing statements incrementally works and stops after + # whitespace / comment trivia + @test Meta.parse("x + 1\n(y)\n", 1) == (:(x + 1), 7) + @test Meta.parse("x + 1\n(y)\n", 7) == (:y, 11) + @test Meta.parse(" x#==#", 1) == (:x, 7) - JuliaSyntax.enable_in_core!(false) + # Check that Meta.parse throws the JuliaSyntax.ParseError rather than + # Meta.ParseError when Core integration is enabled. + @test_throws JuliaSyntax.ParseError Meta.parse("[x") + + JuliaSyntax.enable_in_core!(false) + end end From 9ea17273a14c38fd8c3fca76c86db9efbebcc2a3 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Tue, 21 Jun 2022 20:55:28 +1000 Subject: [PATCH 0434/1109] Treat block in ifelse conditional as Expr wart Instead of baking this into the parser output, we treat this extra block as an Expr-specific oddity and add it during Expr conversion. --- JuliaSyntax/README.md | 2 +- JuliaSyntax/src/expr.jl | 3 +++ JuliaSyntax/src/parser.jl | 10 +++------- JuliaSyntax/test/{syntax_tree.jl => expr.jl} | 0 JuliaSyntax/test/parser.jl | 6 +++--- JuliaSyntax/test/runtests.jl | 2 +- 6 files changed, 11 insertions(+), 12 deletions(-) rename JuliaSyntax/test/{syntax_tree.jl => expr.jl} (100%) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index e83375f993fc7..6e3c1bc26a67d 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -127,7 +127,7 @@ tree", but that term has also been used for the parse tree of the full formal grammar for a language including any grammar hacks required to solve ambiguities, etc. So we avoid this term.) -`JuliaSyntax` uses use a mostly recursive descent parser which closely +`JuliaSyntax` uses a mostly recursive descent parser which closely follows the high level structure of the flisp reference parser. This makes the code familiar and reduces porting bugs. It also gives a lot of flexibility for designing the diagnostics, tree data structures, compatibility with different diff --git a/JuliaSyntax/src/expr.jl b/JuliaSyntax/src/expr.jl index 33688c2832bc9..3689ac55df9f5 100644 --- a/JuliaSyntax/src/expr.jl +++ b/JuliaSyntax/src/expr.jl @@ -144,6 +144,9 @@ function _to_expr(node::SyntaxNode, iteration_spec=false) args[2] = Expr(:block, loc, args[2]) end end + elseif headsym == :elseif + # Compat wart: add a block for the elseif conditional + args[1] = Expr(:block, args[1]) elseif headsym == :(->) if Meta.isexpr(args[2], :block) pushfirst!(args[2].args, loc) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 54030f74a4fcc..98994bda46e19 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -1852,7 +1852,7 @@ end # Parse if-elseif-else-end expressions # -# if a xx elseif b yy else zz end ==> (if a (block xx) (elseif (block b) (block yy) (block zz))) +# if a xx elseif b yy else zz end ==> (if a (block xx) (elseif b (block yy) (block zz))) function parse_if_elseif(ps, is_elseif=false, is_elseif_whitespace_err=false) mark = position(ps) word = peek(ps) @@ -1872,23 +1872,19 @@ function parse_if_elseif(ps, is_elseif=false, is_elseif_whitespace_err=false) # if a xx end ==> (if a (block xx)) parse_cond(ps) end - if is_elseif - # Wart: `elseif` condition is in a block but not `if` condition - emit(ps, cond_mark, K"block") - end # if a \n\n xx \n\n end ==> (if a (block xx)) parse_block(ps) bump_trivia(ps) k = peek(ps) if k == K"elseif" - # if a xx elseif b yy end ==> (if a (block xx) (elseif (block b) (block yy))) + # if a xx elseif b yy end ==> (if a (block xx) (elseif b (block yy))) parse_if_elseif(ps, true) elseif k == K"else" emark = position(ps) bump(ps, TRIVIA_FLAG) if peek(ps) == K"if" # Recovery: User wrote `else if` by mistake ? - # if a xx else if b yy end ==> (if a (block xx) (error-t) (elseif (block b) (block yy))) + # if a xx else if b yy end ==> (if a (block xx) (error-t) (elseif b (block yy))) bump(ps, TRIVIA_FLAG) emit(ps, emark, K"error", TRIVIA_FLAG, error="use `elseif` instead of `else if`") diff --git a/JuliaSyntax/test/syntax_tree.jl b/JuliaSyntax/test/expr.jl similarity index 100% rename from JuliaSyntax/test/syntax_tree.jl rename to JuliaSyntax/test/expr.jl diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index d30c3eff132dc..b94bc69ccd029 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -390,14 +390,14 @@ tests = [ "export (\$f)" => "(export (\$ f))" ], JuliaSyntax.parse_if_elseif => [ - "if a xx elseif b yy else zz end" => "(if a (block xx) (elseif (block b) (block yy) (block zz)))" + "if a xx elseif b yy else zz end" => "(if a (block xx) (elseif b (block yy) (block zz)))" "if end" => "(if (error) (block))" "if \n end" => "(if (error) (block))" "if a end" => "(if a (block))" "if a xx end" => "(if a (block xx))" "if a \n\n xx \n\n end" => "(if a (block xx))" - "if a xx elseif b yy end" => "(if a (block xx) (elseif (block b) (block yy)))" - "if a xx else if b yy end" => "(if a (block xx) (error-t) (elseif (block b) (block yy)))" + "if a xx elseif b yy end" => "(if a (block xx) (elseif b (block yy)))" + "if a xx else if b yy end" => "(if a (block xx) (error-t) (elseif b (block yy)))" "if a xx else yy end" => "(if a (block xx) (block yy))" ], JuliaSyntax.parse_const_local_global => [ diff --git a/JuliaSyntax/test/runtests.jl b/JuliaSyntax/test/runtests.jl index ea0c0a1b9cfcd..9c499084087c3 100644 --- a/JuliaSyntax/test/runtests.jl +++ b/JuliaSyntax/test/runtests.jl @@ -18,7 +18,7 @@ include("test_utils.jl") include("parse_stream.jl") include("parser.jl") include("parser_api.jl") -include("syntax_tree.jl") +include("expr.jl") @testset "Parsing values from strings" begin include("value_parsing.jl") end From fdcdc8e7526d7f87d80d9eec0282c4a0d44fbdc2 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Wed, 22 Jun 2022 08:23:07 +1000 Subject: [PATCH 0435/1109] Generate line numbers in Expr conversion This should give proper line number information when using JuliaSyntax via the Julia runtime. And hopefully allow tools like Revise.jl to work reliably. --- JuliaSyntax/src/expr.jl | 71 +++++++++++++------- JuliaSyntax/test/expr.jl | 117 ++++++++++++++++++++++++++++++++- JuliaSyntax/test/parser_api.jl | 9 ++- JuliaSyntax/test/test_utils.jl | 36 ++++++---- 4 files changed, 193 insertions(+), 40 deletions(-) diff --git a/JuliaSyntax/src/expr.jl b/JuliaSyntax/src/expr.jl index 3689ac55df9f5..d239c80263f7d 100644 --- a/JuliaSyntax/src/expr.jl +++ b/JuliaSyntax/src/expr.jl @@ -6,7 +6,7 @@ function is_eventually_call(ex) is_eventually_call(ex.args[1])) end -function _to_expr(node::SyntaxNode, iteration_spec=false) +function _to_expr(node::SyntaxNode, iteration_spec=false, need_linenodes=true) if !haschildren(node) if node.val isa Union{Int128,UInt128,BigInt} # Ignore the values of large integers and convert them back to @@ -27,12 +27,32 @@ function _to_expr(node::SyntaxNode, iteration_spec=false) headsym = !isnothing(headstr) ? Symbol(headstr) : error("Can't untokenize head of kind $(kind(node))") node_args = children(node) - args = Vector{Any}(undef, length(node_args)) + insert_linenums = (headsym == :block || headsym == :toplevel) && need_linenodes + args = Vector{Any}(undef, length(node_args)*(insert_linenums ? 2 : 1)) if headsym == :for && length(node_args) == 2 - args[1] = _to_expr(node_args[1], true) - args[2] = _to_expr(node_args[2], false) + # No line numbers in for loop iteration spec + args[1] = _to_expr(node_args[1], true, false) + args[2] = _to_expr(node_args[2]) + elseif headsym == :let && length(node_args) == 2 + # No line numbers in let statement binding list + args[1] = _to_expr(node_args[1], false, false) + args[2] = _to_expr(node_args[2]) else - map!(_to_expr, args, node_args) + if insert_linenums + if isempty(node_args) + push!(args, source_location(LineNumberNode, node.source, node.position)) + else + for i in 1:length(node_args) + n = node_args[i] + args[2*i-1] = source_location(LineNumberNode, n.source, n.position) + args[2*i] = _to_expr(n) + end + end + else + for i in 1:length(node_args) + args[i] = _to_expr(node_args[i]) + end + end end # Julia's standard `Expr` ASTs have children stored in a canonical # order which is often not always source order. We permute the children @@ -136,34 +156,41 @@ function _to_expr(node::SyntaxNode, iteration_spec=false) # Strip string from interpolations in 1.5 and lower to preserve # "hi$("ho")" ==> (string "hi" "ho") elseif headsym == :(=) - if is_eventually_call(args[1]) && !iteration_spec - if Meta.isexpr(args[2], :block) - pushfirst!(args[2].args, loc) - else - # Add block for short form function locations - args[2] = Expr(:block, loc, args[2]) - end + if is_eventually_call(args[1]) && !iteration_spec && !Meta.isexpr(args[2], :block) + # Add block for short form function locations + args[2] = Expr(:block, loc, args[2]) end elseif headsym == :elseif - # Compat wart: add a block for the elseif conditional - args[1] = Expr(:block, args[1]) + # Block for conditional's source location + args[1] = Expr(:block, loc, args[1]) elseif headsym == :(->) if Meta.isexpr(args[2], :block) - pushfirst!(args[2].args, loc) + if node.parent isa SyntaxNode && kind(node.parent) != K"do" + pushfirst!(args[2].args, loc) + end else # Add block for source locations args[2] = Expr(:block, loc, args[2]) end elseif headsym == :function - if length(args) > 1 && Meta.isexpr(args[1], :tuple) - # Convert to weird Expr forms for long-form anonymous functions. - # - # (function (tuple (... xs)) body) ==> (function (... xs) body) - if length(args[1].args) == 1 && Meta.isexpr(args[1].args[1], :...) - # function (xs...) \n body end - args[1] = args[1].args[1] + if length(args) > 1 + if Meta.isexpr(args[1], :tuple) + # Convert to weird Expr forms for long-form anonymous functions. + # + # (function (tuple (... xs)) body) ==> (function (... xs) body) + if length(args[1].args) == 1 && Meta.isexpr(args[1].args[1], :...) + # function (xs...) \n body end + args[1] = args[1].args[1] + end end + pushfirst!(args[2].args, loc) + end + elseif headsym == :macro + if length(args) > 1 + pushfirst!(args[2].args, loc) end + elseif headsym == :module + pushfirst!(args[3].args, loc) end if headsym == :inert || (headsym == :quote && length(args) == 1 && !(a1 = only(args); a1 isa Expr || a1 isa QuoteNode || diff --git a/JuliaSyntax/test/expr.jl b/JuliaSyntax/test/expr.jl index 669994e7c4bc2..da2704af403f8 100644 --- a/JuliaSyntax/test/expr.jl +++ b/JuliaSyntax/test/expr.jl @@ -9,6 +9,114 @@ @test parseall(Expr, ":true", rule=:atom) == Expr(:quote, true) end + @testset "Line numbers" begin + @testset "Blocks" begin + @test parseall(Expr, "begin a\nb\n\nc\nend", rule=:statement) == + Expr(:block, + LineNumberNode(1), + :a, + LineNumberNode(2), + :b, + LineNumberNode(4), + :c, + ) + @test parseall(Expr, "begin end", rule=:statement) == + Expr(:block, + LineNumberNode(1) + ) + + @test parseall(Expr, "a\n\nb") == + Expr(:toplevel, + LineNumberNode(1), + :a, + LineNumberNode(3), + :b, + ) + + @test parseall(Expr, "module A\n\nbody\nend", rule=:statement) == + Expr(:module, + true, + :A, + Expr(:block, + LineNumberNode(1), + LineNumberNode(3), + :body, + ), + ) + end + + @testset "Function definition lines" begin + @test parseall(Expr, "function f()\na\n\nb\nend", rule=:statement) == + Expr(:function, + Expr(:call, :f), + Expr(:block, + LineNumberNode(1), + LineNumberNode(2), + :a, + LineNumberNode(4), + :b, + ) + ) + @test parseall(Expr, "f() = 1", rule=:statement) == + Expr(:(=), + Expr(:call, :f), + Expr(:block, + LineNumberNode(1), + 1 + ) + ) + + # function/macro without methods + @test parseall(Expr, "function f end", rule=:statement) == + Expr(:function, :f) + @test parseall(Expr, "macro f end", rule=:statement) == + Expr(:macro, :f) + end + + @testset "elseif" begin + @test parseall(Expr, "if a\nb\nelseif c\n d\nend", rule=:statement) == + Expr(:if, + :a, + Expr(:block, + LineNumberNode(2), + :b), + Expr(:elseif, + Expr(:block, + LineNumberNode(3), # Line number for elseif condition + :c), + Expr(:block, + LineNumberNode(4), + :d), + ) + ) + end + + @testset "No line numbers in for/let bindings" begin + @test parseall(Expr, "for i=is, j=js\nbody\nend", rule=:statement) == + Expr(:for, + Expr(:block, + Expr(:(=), :i, :is), + Expr(:(=), :j, :js), + ), + Expr(:block, + LineNumberNode(2), + :body + ) + ) + @test parseall(Expr, "let i=is, j=js\nbody\nend", rule=:statement) == + Expr(:let, + Expr(:block, + Expr(:(=), :i, :is), + Expr(:(=), :j, :js), + ), + Expr(:block, + LineNumberNode(2), + :body + ) + ) + end + end + @testset "Short form function line numbers" begin # A block is added to hold the line number node @test parseall(Expr, "f() = xs", rule=:statement) == @@ -22,13 +130,18 @@ @test parseall(Expr, "for f() = xs\nend", rule=:statement) == Expr(:for, Expr(:(=), Expr(:call, :f), :xs), - Expr(:block)) + Expr(:block, + LineNumberNode(1) + )) end @testset "Long form anonymous functions" begin @test parseall(Expr, "function (xs...)\nbody end", rule=:statement) == Expr(:function, Expr(:..., :xs), - Expr(:block, :body)) + Expr(:block, + LineNumberNode(1), + LineNumberNode(2), + :body)) end end diff --git a/JuliaSyntax/test/parser_api.jl b/JuliaSyntax/test/parser_api.jl index ed15570faa148..0a3c49ecf8db0 100644 --- a/JuliaSyntax/test/parser_api.jl +++ b/JuliaSyntax/test/parser_api.jl @@ -1,7 +1,12 @@ @testset "parser API" begin @testset "String and buffer input" begin # String - @test parse(Expr, "x+y\nz") == (Expr(:toplevel, :(x+y), :z), [], 6) + let + ex,diag,pos = parse(Expr, "x+y\nz") + @test JuliaSyntax.remove_linenums!(ex) == Expr(:toplevel, :(x+y), :z) + @test diag == [] + @test pos == 6 + end @test parse(Expr, "x+y\nz", rule=:statement) == (:(x+y), [], 4) @test parse(Expr, "x+y\nz", rule=:atom) == (:x, [], 2) @test parse(Expr, "x+y\nz", 5, rule=:atom) == (:z, [], 6) @@ -56,7 +61,7 @@ end @testset "parseall" begin - @test parseall(Expr, " x ") == Expr(:toplevel, :x) + @test JuliaSyntax.remove_linenums!(parseall(Expr, " x ")) == Expr(:toplevel, :x) @test parseall(Expr, " x ", rule=:statement) == :x @test parseall(Expr, " x ", rule=:atom) == :x # TODO: Fix this situation with trivia here; the brackets are trivia, but diff --git a/JuliaSyntax/test/test_utils.jl b/JuliaSyntax/test/test_utils.jl index 914b41afa228d..85a88f5441022 100644 --- a/JuliaSyntax/test/test_utils.jl +++ b/JuliaSyntax/test/test_utils.jl @@ -38,7 +38,24 @@ function remove_all_linenums!(ex) remove_macro_linenums!(ex) end -function parsers_agree_on_file(filename) +function show_expr_text_diff(showfunc, ex, f_ex; context=2) + if Sys.isunix() + mktemp() do path1, io1 + mktemp() do path2, io2 + showfunc(io1, ex); close(io1) + showfunc(io2, f_ex); close(io2) + run(ignorestatus(`diff -U$context --color=always $path1 $path2`)) + end + end + else + showfunc(stdout, ex) + println("------------------------------------") + showfunc(stdout, f_ex) + end +end + + +function parsers_agree_on_file(filename; show_diff=false) text = try read(filename, String) catch @@ -55,6 +72,9 @@ function parsers_agree_on_file(filename) end try ex, diagnostics, _ = parse(Expr, text, filename=filename) + if show_diff && ex != fl_ex + show_expr_text_diff(show, ex, fl_ex) + end return !JuliaSyntax.any_error(diagnostics) && JuliaSyntax.remove_linenums!(ex) == JuliaSyntax.remove_linenums!(fl_ex) @@ -223,19 +243,7 @@ function itest_parse(production, code; version::VersionNumber=v"1.6") show(stdout, MIME"text/plain"(), f_ex) printstyled(stdout, "\n\n# Diff of AST dump:\n", color=:red) - if Sys.isunix() - mktemp() do path1, io1 - mktemp() do path2, io2 - dump(io1, ex); close(io1) - dump(io2, f_ex); close(io2) - run(ignorestatus(`diff -U10 --color=always $path1 $path2`)) - end - end - else - dump(ex) - println("------------------------------------") - dump(f_ex) - end + show_expr_text_diff(showfunc, ex, f_ex, context=10) # return (ex, f_ex) # return (code, stream, t, s, ex) end From 2c08532140824e5fd6ea037233e90816b6d33646 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Thu, 23 Jun 2022 11:41:03 +1000 Subject: [PATCH 0436/1109] Fix needless inconsistency in KSet"" vs K"" string macros (JuliaLang/JuliaSyntax.jl#30) --- JuliaSyntax/src/parser.jl | 124 ++++++++++++++++----------------- JuliaSyntax/src/syntax_tree.jl | 4 +- JuliaSyntax/src/tokens.jl | 6 +- 3 files changed, 67 insertions(+), 67 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 98994bda46e19..d254f99997f17 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -144,7 +144,7 @@ function bump_closing_token(ps, closing_kind) mark = position(ps) while true k = peek(ps) - if is_closing_token(ps, k) && !(k in KSet`, ;`) + if is_closing_token(ps, k) && !(k in KSet", ;") break end bump(ps) @@ -196,7 +196,7 @@ function bump_disallowed_space(ps) end function bump_semicolon_trivia(ps) - while peek(ps) in KSet`; NewlineWs` + while peek(ps) in KSet"; NewlineWs" bump(ps, TRIVIA_FLAG) end end @@ -231,7 +231,7 @@ end function is_closing_token(ps::ParseState, k) k = kind(k) - return k in KSet`else elseif catch finally , ) ] } ; EndMarker` || + return k in KSet"else elseif catch finally , ) ] } ; EndMarker" || (k == K"end" && !ps.end_symbol) end @@ -241,9 +241,9 @@ end function is_initial_reserved_word(ps::ParseState, k) k = kind(k) - is_iresword = k in KSet`begin while if for try return break continue function + is_iresword = k in KSet"begin while if for try return break continue function macro quote let local global const do struct module - baremodule using import export` + baremodule using import export" # `begin` means firstindex(a) inside a[...] return is_iresword && !(k == K"begin" && ps.end_symbol) end @@ -270,8 +270,8 @@ function peek_initial_reserved_words(ps::ParseState) end function is_block_form(k) - kind(k) in KSet`block quote if for while let function macro - abstract primitive struct try module` + kind(k) in KSet"block quote if for while let function macro + abstract primitive struct try module" end function is_syntactic_operator(k) @@ -279,22 +279,22 @@ function is_syntactic_operator(k) # TODO: Do we need to disallow dotted and suffixed forms here? # The lexer itself usually disallows such tokens, so it's not clear whether # we need to handle them. (Though note `.->` is a token...) - return k in KSet`&& || . ... ->` || (is_prec_assignment(k) && k != K"~") + return k in KSet"&& || . ... ->" || (is_prec_assignment(k) && k != K"~") end function is_syntactic_unary_op(k) - kind(k) in KSet`$ & ::` + kind(k) in KSet"$ & ::" end function is_type_operator(t) - kind(t) in KSet`<: >:` && !is_dotted(t) + kind(t) in KSet"<: >:" && !is_dotted(t) end function is_unary_op(t) k = kind(t) !is_suffixed(t) && ( - (k in KSet`<: >:` && !is_dotted(t)) || - k in KSet`+ - ! ~ ¬ √ ∛ ∜ ⋆ ± ∓` # dotop allowed + (k in KSet"<: >:" && !is_dotted(t)) || + k in KSet"+ - ! ~ ¬ √ ∛ ∜ ⋆ ± ∓" # dotop allowed ) end @@ -304,7 +304,7 @@ function is_both_unary_and_binary(t) # Preventing is_suffixed here makes this consistent with the flisp parser. # But is this by design or happenstance? !is_suffixed(t) && ( - k in KSet`+ - ⋆ ± ∓` || (k in KSet`$ & ~` && !is_dotted(t)) + k in KSet"+ - ⋆ ± ∓" || (k in KSet"$ & ~" && !is_dotted(t)) ) end @@ -314,7 +314,7 @@ function is_initial_operator(t) # TODO(jb): `?` should probably not be listed here except for the syntax hack in osutils.jl is_operator(k) && !is_word_operator(k) && - !(k in KSet`: ' .' ?`) && + !(k in KSet": ' .' ?") && !(is_syntactic_unary_op(k) && !is_dotted(t)) && !is_syntactic_operator(k) end @@ -322,7 +322,7 @@ end # flisp: invalid-identifier? function is_valid_identifier(k) k = kind(k) - !(is_syntactic_operator(k) || k in KSet`? .'`) + !(is_syntactic_operator(k) || k in KSet"? .'") end #------------------------------------------------------------------------------- @@ -450,7 +450,7 @@ end # Parse a block, but leave emitting the block up to the caller. function parse_block_inner(ps::ParseState, down) - parse_Nary(ps, down, KSet`NewlineWs ;`, KSet`end else elseif catch finally`) + parse_Nary(ps, down, KSet"NewlineWs ;", KSet"end else elseif catch finally") end # ";" at the top level produces a sequence of top level expressions @@ -465,7 +465,7 @@ function parse_stmts(ps::ParseState) do_emit = parse_Nary(ps, parse_docstring, (K";",), (K"NewlineWs",)) # check for unparsed junk after an expression junk_mark = position(ps) - while peek(ps) ∉ KSet`EndMarker NewlineWs` + while peek(ps) ∉ KSet"EndMarker NewlineWs" # Error recovery bump(ps) end @@ -487,7 +487,7 @@ function parse_docstring(ps::ParseState, down=parse_eq) mark = position(ps) atdoc_mark = bump_invisible(ps, K"TOMBSTONE") down(ps) - if peek_behind(ps).kind in KSet`String string` + if peek_behind(ps).kind in KSet"String string" is_doc = true k = peek(ps) if is_closing_token(ps, k) @@ -540,7 +540,7 @@ end function parse_eq_star(ps::ParseState, equals_is_kw=false) k = peek(ps) k2 = peek(ps,2) - if (is_literal(k) || k == K"Identifier") && k2 in KSet`, ) } ]` + if (is_literal(k) || k == K"Identifier") && k2 in KSet", ) } ]" # optimization: skip checking the whole precedence stack if we have a # simple token followed by a common closing token bump(ps) @@ -812,7 +812,7 @@ function parse_range(ps::ParseState) break end t2 = peek_token(ps,2) - if kind(t2) in KSet`< >` && !preceding_whitespace(t2) + if kind(t2) in KSet"< >" && !preceding_whitespace(t2) # Error heuristic: we found `:>` or `:<` which are invalid lookalikes # for `<:` and `>:`. Attempt to recover by treating them as a # comparison operator. @@ -875,14 +875,14 @@ end # # flisp: parse-expr function parse_expr(ps::ParseState) - parse_with_chains(ps, parse_term, is_prec_plus, KSet`+ ++`) + parse_with_chains(ps, parse_term, is_prec_plus, KSet"+ ++") end # a * b * c ==> (call-i a * b c) # # flisp: parse-term function parse_term(ps::ParseState) - parse_with_chains(ps, parse_rational, is_prec_times, KSet`*`) + parse_with_chains(ps, parse_rational, is_prec_times, KSet"*") end # Parse left to right, combining any of `chain_ops` into one call @@ -948,15 +948,15 @@ end # flisp: parse-unary-subtype function parse_unary_subtype(ps::ParseState) k = peek(ps, skip_newlines=true) - if k in KSet`<: >:` + if k in KSet"<: >:" k2 = peek(ps, 2) - if is_closing_token(ps, k2) || k2 in KSet`NewlineWs =` + if is_closing_token(ps, k2) || k2 in KSet"NewlineWs =" # return operator by itself # <: ) ==> <: # <: \n ==> <: # <: = ==> <: bump(ps) - elseif k2 in KSet`{ (` + elseif k2 in KSet"{ (" # parse <:{T}(x::T) or <:(x::T) like other unary operators # <:{T}(x::T) ==> (call (curly <: T) (:: x T)) # <:(x::T) ==> (<: (:: x T)) @@ -1038,7 +1038,7 @@ function is_juxtapose(ps, prev_k, t) is_initial_reserved_word(ps, prev_k) ))) && # https://github.com/JuliaLang/julia/issues/16356 # 0xenomorph ==> 0x0e - !(prev_k in KSet`BinInt HexInt OctInt` && (k == K"Identifier" || is_keyword(k))) && + !(prev_k in KSet"BinInt HexInt OctInt" && (k == K"Identifier" || is_keyword(k))) && (!is_operator(k) || is_radical_op(k)) && !is_closing_token(ps, k) && !is_initial_reserved_word(ps, k) @@ -1101,11 +1101,11 @@ function parse_unary(ps::ParseState) parse_factor(ps) return end - if k in KSet`- +` + if k in KSet"- +" t2 = peek_token(ps, 2) - if !preceding_whitespace(t2) && kind(t2) in KSet`Integer Float` + if !preceding_whitespace(t2) && kind(t2) in KSet"Integer Float" k3 = peek(ps, 3) - if is_prec_power(k3) || k3 in KSet`[ {` + if is_prec_power(k3) || k3 in KSet"[ {" # `[`, `{` (issue #18851) and `^` have higher precedence than # unary negation # -2^x ==> (call - (call-i 2 ^ x)) @@ -1141,7 +1141,7 @@ function parse_unary_call(ps::ParseState) op_tok_flags = is_type_operator(op_t) ? TRIVIA_FLAG : EMPTY_FLAGS t2 = peek_token(ps, 2) k2 = kind(t2) - if is_closing_token(ps, k2) || k2 in KSet`NewlineWs =` + if is_closing_token(ps, k2) || k2 in KSet"NewlineWs =" if is_dotted(op_t) # Standalone dotted operators are parsed as (|.| op) # .+ ==> (. +) @@ -1325,13 +1325,13 @@ function parse_unary_prefix(ps::ParseState) k = peek(ps) if is_syntactic_unary_op(k) k2 = peek(ps, 2) - if k in KSet`& $` && (is_closing_token(ps, k2) || k2 == K"NewlineWs") + if k in KSet"& $" && (is_closing_token(ps, k2) || k2 == K"NewlineWs") # &) ==> & # $\n ==> $ bump(ps) else bump(ps, TRIVIA_FLAG) - if k in KSet`& ::` + if k in KSet"& ::" # &a ==> (& a) parse_where(ps, parse_call) else @@ -1389,7 +1389,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) # source range of the @-prefixed part of a macro macro_atname_range = nothing # $A.@x ==> (macrocall (. ($ A) (quote @x))) - valid_macroname = peek_behind(ps, skip_trivia=false).kind in KSet`Identifier . $` + valid_macroname = peek_behind(ps, skip_trivia=false).kind in KSet"Identifier . $" # We record the last component of chains of dot-separated identifiers so we # know which identifier was the macro name. macro_name_position = position(ps) # points to same output span as peek_behind @@ -1432,7 +1432,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) end break elseif (ps.space_sensitive && preceding_whitespace(t) && - k in KSet`( [ { \ Char " """ \` \`\`\``) + k in KSet"( [ { \ Char \" \"\"\" ` ```") # [f (x)] ==> (hcat f x) # [f "x"] ==> (hcat f "x") break @@ -1608,7 +1608,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) # S{a,b} ==> (curly S a b) emit(ps, mark, K"curly") end - elseif k in KSet` " """ \` \`\`\` ` && + elseif k in KSet" \" \"\"\" ` ``` " && !preceding_whitespace(t) && valid_macroname # Custom string and command literals # x"str" ==> (macrocall @x_str "str") @@ -1669,7 +1669,7 @@ function parse_resword(ps::ParseState) ps = normal_context(ps) mark = position(ps) word = peek(ps) - if word in KSet`begin quote` + if word in KSet"begin quote" # begin end ==> (block) # begin a ; b end ==> (block a b) # begin\na\nb\nend ==> (block a b) @@ -1704,7 +1704,7 @@ function parse_resword(ps::ParseState) emit(ps, mark, K"for") elseif word == K"let" bump(ps, TRIVIA_FLAG) - if peek(ps) ∉ KSet`NewlineWs ;` + if peek(ps) ∉ KSet"NewlineWs ;" # let x=1\n end ==> (let (= x 1) (block)) m = position(ps) n_subexprs = parse_comma_separated(ps, parse_eq_star) @@ -1713,7 +1713,7 @@ function parse_resword(ps::ParseState) # let x=1 ; end ==> (let (= x 1) (block)) # let x::1 ; end ==> (let (:: x 1) (block)) # let x ; end ==> (let x (block)) - if n_subexprs > 1 || !(kb in KSet`Identifier = ::`) + if n_subexprs > 1 || !(kb in KSet"Identifier = ::") # let x=1,y=2 ; end ==> (let (block (= x 1) (= y 2) (block))) # let x+=1 ; end ==> (let (block (+= x 1)) (block)) emit(ps, m, K"block") @@ -1725,7 +1725,7 @@ function parse_resword(ps::ParseState) bump_invisible(ps, K"block") end k = peek(ps) - if k in KSet`NewlineWs ;` + if k in KSet"NewlineWs ;" bump(ps, TRIVIA_FLAG) elseif k == K"end" # pass @@ -1739,9 +1739,9 @@ function parse_resword(ps::ParseState) emit(ps, mark, K"let") elseif word == K"if" parse_if_elseif(ps) - elseif word in KSet`const global local` + elseif word in KSet"const global local" parse_const_local_global(ps) - elseif word in KSet`function macro` + elseif word in KSet"function macro" parse_function(ps) elseif word == K"abstract" # Abstract type definitions @@ -1759,7 +1759,7 @@ function parse_resword(ps::ParseState) bump_semicolon_trivia(ps) bump_closing_token(ps, K"end") emit(ps, mark, K"abstract") - elseif word in KSet`struct mutable` + elseif word in KSet"struct mutable" # struct A <: B \n a::X \n end ==> (struct false (<: A B) (block (:: a X))) if word == K"mutable" # mutable struct A end ==> (struct true A (block)) @@ -1805,16 +1805,16 @@ function parse_resword(ps::ParseState) parse_eq(ps) end emit(ps, mark, K"return") - elseif word in KSet`break continue` + elseif word in KSet"break continue" # break ==> (break) # continue ==> (continue) bump(ps) k = peek(ps) - if !(k in KSet`NewlineWs ; ) : EndMarker` || (k == K"end" && !ps.end_symbol)) + if !(k in KSet"NewlineWs ; ) : EndMarker" || (k == K"end" && !ps.end_symbol)) recover(is_closer_or_newline, ps, TRIVIA_FLAG, error="unexpected token after $(untokenize(word))") end - elseif word in KSet`module baremodule` + elseif word in KSet"module baremodule" # module A end ==> (module true A (block)) # baremodule A end ==> (module false A (block)) bump(ps, TRIVIA_FLAG) @@ -1841,7 +1841,7 @@ function parse_resword(ps::ParseState) bump(ps, TRIVIA_FLAG) parse_comma_separated(ps, parse_atsym) emit(ps, mark, K"export") - elseif word in KSet`import using` + elseif word in KSet"import using" parse_imports(ps) elseif word == K"do" bump(ps, TRIVIA_FLAG, error="invalid `do` syntax") @@ -1863,7 +1863,7 @@ function parse_if_elseif(ps, is_elseif=false, is_elseif_whitespace_err=false) bump(ps, TRIVIA_FLAG) end cond_mark = position(ps) - if peek(ps) in KSet`NewlineWs end` + if peek(ps) in KSet"NewlineWs end" # if end ==> (if (error) (block)) # if \n end ==> (if (error) (block)) bump_trivia(ps, error="missing condition in `$(untokenize(word))`") @@ -1906,7 +1906,7 @@ function parse_const_local_global(ps) has_const = false scope_k = K"None" k = peek(ps) - if k in KSet`global local` + if k in KSet"global local" # global x ==> (global x) # local x ==> (local x) scope_k = k @@ -1922,7 +1922,7 @@ function parse_const_local_global(ps) # const x = 1 ==> (const (= x 1)) bump(ps, TRIVIA_FLAG) k = peek(ps) - if k in KSet`global local` + if k in KSet"global local" # const global x = 1 ==> (const (global (= x 1))) # const local x = 1 ==> (const (local (= x 1))) scope_k = k @@ -1975,7 +1975,7 @@ end function parse_function(ps::ParseState) mark = position(ps) word = peek(ps) - @check word in KSet`macro function` + @check word in KSet"macro function" is_function = word == K"function" is_anon_func = false bump(ps, TRIVIA_FLAG) @@ -2159,7 +2159,7 @@ end function parse_catch(ps::ParseState) bump(ps, TRIVIA_FLAG) k = peek(ps) - if k in KSet`NewlineWs ;` || is_closing_token(ps, k) + if k in KSet"NewlineWs ;" || is_closing_token(ps, k) # try x catch end ==> (try (block x) false (block) false false) # try x catch ; y end ==> (try (block x) false (block y) false false) # try x catch \n y end ==> (try (block x) false (block y) false false) @@ -2176,7 +2176,7 @@ end function parse_do(ps::ParseState) ps = normal_context(ps) mark = position(ps) - if peek(ps) in KSet`NewlineWs ;` + if peek(ps) in KSet"NewlineWs ;" # f() do\nend ==> (do (call f) (-> (tuple) (block))) # f() do ; body end ==> (do (call f) (-> (tuple) (block body))) # this trivia needs to go into the tuple due to the way position() @@ -2244,7 +2244,7 @@ end function parse_imports(ps::ParseState) mark = position(ps) word = peek(ps) - @check word in KSet`import using` + @check word in KSet"import using" bump(ps, TRIVIA_FLAG) emark = position(ps) initial_as = parse_import(ps, word, false) @@ -2389,7 +2389,7 @@ function parse_import_path(ps::ParseState) # Import the .. operator # import A... ==> (import (. A ..)) bump_split(ps, (1,K".",TRIVIA_FLAG), (2,K"..",EMPTY_FLAGS)) - elseif k in KSet`NewlineWs ; , : EndMarker` + elseif k in KSet"NewlineWs ; , : EndMarker" # import A; B ==> (import (. A)) break else @@ -2443,7 +2443,7 @@ function parse_iteration_spec(ps::ParseState) # Handle `outer` contextual keyword parse_pipe_lt(with_space_sensitive(ps)) if peek_behind(ps).orig_kind == K"outer" - if peek_skip_newline_in_gen(ps) in KSet`= in ∈` + if peek_skip_newline_in_gen(ps) in KSet"= in ∈" # Not outer keyword # outer = rhs ==> (= outer rhs) # outer <| x = rhs ==> (= (call-i outer <| x) rhs) @@ -2455,13 +2455,13 @@ function parse_iteration_spec(ps::ParseState) emit(ps, mark, K"outer") end end - if peek_skip_newline_in_gen(ps) in KSet`= in ∈` + if peek_skip_newline_in_gen(ps) in KSet"= in ∈" bump(ps, TRIVIA_FLAG) parse_pipe_lt(ps) else # Recovery heuristic recover(ps, error="invalid iteration spec: expected one of `=` `in` or `∈`") do ps, k - k in KSet`, NewlineWs` || is_closing_token(ps, k) + k in KSet", NewlineWs" || is_closing_token(ps, k) end # Or try parse_pipe_lt ??? end @@ -3034,8 +3034,8 @@ is_indentation(b::UInt8) = (b == UInt8(' ') || b == UInt8('\t')) function parse_string(ps::ParseState, raw::Bool) mark = position(ps) delim_k = peek(ps) - triplestr = delim_k in KSet`""" \`\`\`` - string_chunk_kind = delim_k in KSet`" """` ? K"String" : K"CmdString" + triplestr = delim_k in KSet"\"\"\" ```" + string_chunk_kind = delim_k in KSet"\" \"\"\"" ? K"String" : K"CmdString" indent_ref_i = 0 indent_ref_len = typemax(Int) indent_chunks = acquire_positions(ps.stream) @@ -3236,7 +3236,7 @@ function emit_braces(ps, mark, ckind, cflags) emit(ps, mark, K"nrow", cflags) end check_ncat_compat(ps, mark, ckind) - outk = ckind in KSet`vect comprehension` ? K"braces" : K"bracescat" + outk = ckind in KSet"vect comprehension" ? K"braces" : K"bracescat" emit(ps, mark, outk) end @@ -3339,7 +3339,7 @@ function parse_atom(ps::ParseState, check_identifiers=true) t = peek_token(ps) k = kind(t) if preceding_whitespace(t) || is_operator(k) || - k in KSet`( ) [ ] { } , ; @ EndMarker` + k in KSet"( ) [ ] { } , ; @ EndMarker" # var"x"+ ==> x # var"x") ==> x # var"x"( ==> x @@ -3378,7 +3378,7 @@ function parse_atom(ps::ParseState, check_identifiers=true) parse_call_chain(ps, mark, true) elseif is_string_delim(leading_kind) parse_string(ps, false) - elseif leading_kind in KSet`\` \`\`\`` + elseif leading_kind in KSet"` ```" # `` ==> (macrocall core_@cmd "") # `cmd` ==> (macrocall core_@cmd "cmd") # ```cmd``` ==> (macrocall core_@cmd "cmd"-s) diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index ff8221e891252..c55c52281cc7b 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -29,7 +29,7 @@ function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, position::In val_str = view(source, val_range) # Here we parse the values eagerly rather than representing them as # strings. Maybe this is good. Maybe not. - val = if k in KSet`Integer Float BinInt OctInt HexInt` + val = if k in KSet"Integer Float BinInt OctInt HexInt" julia_string_to_number(val_str, k) elseif k == K"true" true @@ -47,7 +47,7 @@ function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, position::In elseif is_keyword(k) # This should only happen for tokens nested inside errors Symbol(val_str) - elseif k in KSet`String CmdString` + elseif k in KSet"String CmdString" is_cmd = k == K"CmdString" is_raw = has_flags(head(raw), RAW_STRING_FLAG) unescape_julia_string(val_str, is_cmd, is_raw) diff --git a/JuliaSyntax/src/tokens.jl b/JuliaSyntax/src/tokens.jl index 8d41e9ee40da0..584cdb9aa55e9 100644 --- a/JuliaSyntax/src/tokens.jl +++ b/JuliaSyntax/src/tokens.jl @@ -24,11 +24,11 @@ end """ A set of kinds which can be used with the `in` operator. For example - k in KSet`+ - *` + k in KSet"+ - *" """ -macro KSet_cmd(str) +macro KSet_str(str) kinds = [get(_str_to_kind, s) do - error("unknown token kind KSet`$(repr(str)[2:end-1])`") + error("unknown token kind KSet\"$(repr(str)[2:end-1])\"") end for s in split(str)] From bd24a3ebac0d4d99ed989eb9530c1783f9871d9f Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Thu, 23 Jun 2022 14:59:31 +1000 Subject: [PATCH 0437/1109] .gitignore for sysimage generation --- JuliaSyntax/sysimage/.gitignore | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 JuliaSyntax/sysimage/.gitignore diff --git a/JuliaSyntax/sysimage/.gitignore b/JuliaSyntax/sysimage/.gitignore new file mode 100644 index 0000000000000..56c79c0509880 --- /dev/null +++ b/JuliaSyntax/sysimage/.gitignore @@ -0,0 +1,2 @@ +Project.toml +Manifest.toml From c24237dd05f44b39c0c25c807f9c8bc53c9edda6 Mon Sep 17 00:00:00 2001 From: Sebastian Pfitzner Date: Thu, 23 Jun 2022 11:25:39 +0200 Subject: [PATCH 0438/1109] && and || don't support suffixes --- JuliaSyntax/Tokenize/src/lexer.jl | 2 +- JuliaSyntax/Tokenize/src/utilities.jl | 2 ++ JuliaSyntax/Tokenize/test/lexer.jl | 29 +++++++++++++++++++++++++++ 3 files changed, 32 insertions(+), 1 deletion(-) diff --git a/JuliaSyntax/Tokenize/src/lexer.jl b/JuliaSyntax/Tokenize/src/lexer.jl index 005992834a8a4..516c5e68b43c5 100644 --- a/JuliaSyntax/Tokenize/src/lexer.jl +++ b/JuliaSyntax/Tokenize/src/lexer.jl @@ -678,7 +678,7 @@ function lex_minus(l::Lexer) else return emit_error(l, Tokens.INVALID_OPERATOR) # "--" is an invalid operator end - elseif accept(l, '>') + elseif !l.dotop && accept(l, '>') return emit(l, Tokens.ANON_FUNC) elseif accept(l, '=') return emit(l, Tokens.MINUS_EQ) diff --git a/JuliaSyntax/Tokenize/src/utilities.jl b/JuliaSyntax/Tokenize/src/utilities.jl index 71cfe5d6e139a..8a051ef402c37 100644 --- a/JuliaSyntax/Tokenize/src/utilities.jl +++ b/JuliaSyntax/Tokenize/src/utilities.jl @@ -231,6 +231,8 @@ function optakessuffix(k) k == Tokens.CONDITIONAL || k == Tokens.ISSUBTYPE || k == Tokens.ISSUPERTYPE || + k == Tokens.LAZY_AND || + k == Tokens.LAZY_OR || k == Tokens.IN || k == Tokens.ISA || k == Tokens.COLON_EQUALS || diff --git a/JuliaSyntax/Tokenize/test/lexer.jl b/JuliaSyntax/Tokenize/test/lexer.jl index 7730ba9c467c4..356522c287d61 100644 --- a/JuliaSyntax/Tokenize/test/lexer.jl +++ b/JuliaSyntax/Tokenize/test/lexer.jl @@ -881,3 +881,32 @@ end Tokens.ENDMARKER ] end + +@testset "dotop miscellanea" begin + broken_ops = [ + "a .-> b", + ".>: b", + ".<: b", + "a ||₁ b", + "a ||̄ b", + "a .||₁ b", + "a &&₁ b", + "a &&̄ b", + "a .&&₁ b", + ] + + @test [ + [Tokenize.untokenize(t, s) for t in Tokenize.tokenize(s)] + for s in broken_ops + ] == [ + ["a", " ", ".-", ">", " ", "b", ""], + [".>:", " ", "b", ""], + [".<:", " ", "b", ""], + ["a", " ", "||", "₁", " ", "b", ""], + ["a", " ", "||", "̄", " ", "b", ""], + ["a", " ", ".||", "₁", " ", "b", ""], + ["a", " ", "&&", "₁", " ", "b", ""], + ["a", " ", "&&", "̄", " ", "b", ""], + ["a", " ", ".&&", "₁", " ", "b", ""], + ] +end From 137df53fa90db68e286792ea6dd614fa2a1cb516 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Fri, 24 Jun 2022 16:57:10 +1000 Subject: [PATCH 0439/1109] Streamline dotop miscellania test --- JuliaSyntax/Tokenize/test/lexer.jl | 37 +++++++++--------------------- 1 file changed, 11 insertions(+), 26 deletions(-) diff --git a/JuliaSyntax/Tokenize/test/lexer.jl b/JuliaSyntax/Tokenize/test/lexer.jl index 356522c287d61..2f8e19e144a43 100644 --- a/JuliaSyntax/Tokenize/test/lexer.jl +++ b/JuliaSyntax/Tokenize/test/lexer.jl @@ -6,6 +6,8 @@ const T = Tokenize.Tokens tok(str, i = 1) = collect(tokenize(str))[i] +strtok(str) = untokenize.(collect(tokenize(str)), str) + @testset "tokens" begin for s in ["a", IOBuffer("a")] l = tokenize(s) @@ -883,30 +885,13 @@ end end @testset "dotop miscellanea" begin - broken_ops = [ - "a .-> b", - ".>: b", - ".<: b", - "a ||₁ b", - "a ||̄ b", - "a .||₁ b", - "a &&₁ b", - "a &&̄ b", - "a .&&₁ b", - ] - - @test [ - [Tokenize.untokenize(t, s) for t in Tokenize.tokenize(s)] - for s in broken_ops - ] == [ - ["a", " ", ".-", ">", " ", "b", ""], - [".>:", " ", "b", ""], - [".<:", " ", "b", ""], - ["a", " ", "||", "₁", " ", "b", ""], - ["a", " ", "||", "̄", " ", "b", ""], - ["a", " ", ".||", "₁", " ", "b", ""], - ["a", " ", "&&", "₁", " ", "b", ""], - ["a", " ", "&&", "̄", " ", "b", ""], - ["a", " ", ".&&", "₁", " ", "b", ""], - ] + @test strtok("a .-> b") == ["a", " ", ".-", ">", " ", "b", ""] + @test strtok(".>: b") == [".>:", " ", "b", ""] + @test strtok(".<: b") == [".<:", " ", "b", ""] + @test strtok("a ||₁ b") == ["a", " ", "||", "₁", " ", "b", ""] + @test strtok("a ||̄ b") == ["a", " ", "||", "̄", " ", "b", ""] + @test strtok("a .||₁ b") == ["a", " ", ".||", "₁", " ", "b", ""] + @test strtok("a &&₁ b") == ["a", " ", "&&", "₁", " ", "b", ""] + @test strtok("a &&̄ b") == ["a", " ", "&&", "̄", " ", "b", ""] + @test strtok("a .&&₁ b") == ["a", " ", ".&&", "₁", " ", "b", ""] end From 31600b2db1219b7659121716c281e308568f47c4 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Fri, 1 Jul 2022 17:30:03 +1000 Subject: [PATCH 0440/1109] Improve trivia attachment for `)` in parse_brackets The `)` was attached to the `parameters` expression, but should be attached to node along with the opening `(` instead. --- JuliaSyntax/src/parser.jl | 8 ++------ JuliaSyntax/test/parser.jl | 14 ++++++++++++++ JuliaSyntax/test/test_utils.jl | 4 ++++ 3 files changed, 20 insertions(+), 6 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index d254f99997f17..9867f01f38b90 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -1186,7 +1186,6 @@ function parse_unary_call(ps::ParseState) parse_brackets(ps, K")") do had_commas, had_splat, num_semis, num_subexprs is_call[] = had_commas || had_splat || initial_semi is_block[] = !is_call[] && num_semis > 0 - bump_closing_token(ps, K")") return (needs_parameters=is_call[], eq_is_kw_before_semi=is_call[], eq_is_kw_after_semi=is_call[]) @@ -2009,8 +2008,7 @@ function parse_function(ps::ParseState) # producing less consistent syntax for anonymous functions. is_anon_func_ = Ref(is_anon_func) parse_brackets(ps, K")") do _, _, _, _ - bump_closing_token(ps, K")") - is_anon_func_[] = peek(ps) != K"(" + is_anon_func_[] = peek(ps, 2) != K"(" return (needs_parameters = is_anon_func_[], eq_is_kw_before_semi = is_anon_func_[], eq_is_kw_after_semi = is_anon_func_[]) @@ -2491,7 +2489,6 @@ function parse_call_arglist(ps::ParseState, closer, is_macrocall) ps = ParseState(ps, for_generator=true) parse_brackets(ps, closer) do _, _, _, _ - bump_closing_token(ps, closer) return (needs_parameters=true, eq_is_kw_before_semi=!is_macrocall, eq_is_kw_after_semi=true) @@ -2510,7 +2507,6 @@ function parse_vect(ps::ParseState, closer) # [x=1, y=2] ==> (vect (= x 1) (= y 2)) # [x=1, ; y=2] ==> (vect (= x 1) (parameters (= y 2))) parse_brackets(ps, closer) do _, _, _, _ - bump_closing_token(ps, closer) return (needs_parameters=true, eq_is_kw_before_semi=false, eq_is_kw_after_semi=false) @@ -2868,7 +2864,6 @@ function parse_paren(ps::ParseState, check_identifiers=true) is_tuple[] = had_commas || (had_splat && num_semis >= 1) || (initial_semi && (num_semis == 1 || num_subexprs > 0)) is_block[] = num_semis > 0 - bump_closing_token(ps, K")") return (needs_parameters=is_tuple[], eq_is_kw_before_semi=false, eq_is_kw_after_semi=is_tuple[]) @@ -3023,6 +3018,7 @@ function parse_brackets(after_parse::Function, end release_positions(ps.stream, params_marks) release_positions(ps.stream, eq_positions) + bump_closing_token(ps, closing_kind) end is_indentation(b::UInt8) = (b == UInt8(' ') || b == UInt8('\t')) diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index b94bc69ccd029..e0466a0ebc3b9 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -791,6 +791,20 @@ broken_tests = [ end end +@testset "Trivia attachment" begin + @test show_green_tree("f(a;b)") == """ + 1:6 │[toplevel] + 1:6 │ [call] + 1:1 │ Identifier ✔ "f" + 2:2 │ ( "(" + 3:3 │ Identifier ✔ "a" + 4:5 │ [parameters] + 4:4 │ ; ";" + 5:5 │ Identifier ✔ "b" + 6:6 │ ) ")" + """ +end + @testset "Unicode normalization in tree conversion" begin # ɛµ normalizes to εμ @test test_parse(JuliaSyntax.parse_eq, "\u025B\u00B5()") == "(call \u03B5\u03BC)" diff --git a/JuliaSyntax/test/test_utils.jl b/JuliaSyntax/test/test_utils.jl index 85a88f5441022..3d994080f5b9e 100644 --- a/JuliaSyntax/test/test_utils.jl +++ b/JuliaSyntax/test/test_utils.jl @@ -250,3 +250,7 @@ function itest_parse(production, code; version::VersionNumber=v"1.6") nothing end +function show_green_tree(code; version::VersionNumber=v"1.6") + t = JuliaSyntax.parseall(GreenNode, code, version=version) + sprint(show, MIME"text/plain"(), t, code) +end From f3c9106a5e09cb3e3af1c595d2a772089abb00a7 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Fri, 1 Jul 2022 17:44:00 +1000 Subject: [PATCH 0441/1109] Avoid duplicate diagnostic messages for closing brackets This error condition is checked and reported as part of bump_closing_token. --- JuliaSyntax/src/parser.jl | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 9867f01f38b90..193127f6f5140 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -2982,14 +2982,7 @@ function parse_brackets(after_parse::Function, # (x \n\n for a in as) ==> (generator x (= a as)) parse_generator(ps, mark) else - if is_closing_token(ps, k) - k_str = untokenize(k, unique=false) - emit_diagnostic(ps, error="unexpected `$k_str` in bracketed list") - else - ck_str = untokenize(closing_kind) - emit_diagnostic(ps, error="missing comma or $ck_str in bracketed list") - end - # Recovery done after loop + # Error - recovery done when consuming closing_kind break end end From b1b376518377c205e01186418a39407a74865fb9 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Tue, 2 Aug 2022 17:58:55 +1000 Subject: [PATCH 0442/1109] More capable _printstyled - background + foreground colors --- JuliaSyntax/src/diagnostics.jl | 6 +++--- JuliaSyntax/src/syntax_tree.jl | 2 +- JuliaSyntax/src/utils.jl | 17 +++++++++++++---- 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/JuliaSyntax/src/diagnostics.jl b/JuliaSyntax/src/diagnostics.jl index b6e5d1caefcdb..dc5292d4cb687 100644 --- a/JuliaSyntax/src/diagnostics.jl +++ b/JuliaSyntax/src/diagnostics.jl @@ -89,7 +89,7 @@ function show_diagnostic(io::IO, diagnostic::Diagnostic, source::SourceFile) # a............... # .....p...q...... # ...............b - _printstyled(io, source[p:q]; color=hicol) + _printstyled(io, source[p:q]; bgcolor=hicol) else # Or large and we trucate the code to show only the region around the # start and end of the error. @@ -100,9 +100,9 @@ function show_diagnostic(io::IO, diagnostic::Diagnostic, source::SourceFile) # c............... # .....q.......... # ...............d - _printstyled(io, source[p:b]; color=hicol) + _printstyled(io, source[p:b]; bgcolor=hicol) println(io, "…") - _printstyled(io, source[c:q]; color=hicol) + _printstyled(io, source[c:q]; bgcolor=hicol) end print(io, source[nextind(text,q):d]) println(io) diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index c55c52281cc7b..f9ecd9d06788b 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -253,7 +253,7 @@ function highlight(code::String, node, path::Int...; color=(40,40,70)) node, p, span = child_position_span(node, path...) q = p + span print(stdout, code[1:p-1]) - _printstyled(stdout, code[p:q-1]; color=color) + _printstyled(stdout, code[p:q-1]; bgcolor=color) print(stdout, code[q:end]) end diff --git a/JuliaSyntax/src/utils.jl b/JuliaSyntax/src/utils.jl index b7edce50b6ddd..81f780ef9c74e 100644 --- a/JuliaSyntax/src/utils.jl +++ b/JuliaSyntax/src/utils.jl @@ -2,11 +2,20 @@ """ Like printstyled, but allows providing RGB colors for true color terminals """ -function _printstyled(io::IO, text; color) - if length(color) != 3 || !all(0 .<= color .< 256) - error("Invalid ansi color $color") +function _printstyled(io::IO, text; fgcolor=nothing, bgcolor=nothing) + colcode = "" + if !isnothing(fgcolor) + if length(fgcolor) != 3 || !all(0 .<= fgcolor .< 256) + error("Invalid ansi color $fgcolor") + end + colcode *= "\e[38;2;$(fgcolor[1]);$(fgcolor[2]);$(fgcolor[3])m" + end + if !isnothing(bgcolor) + if length(bgcolor) != 3 || !all(0 .<= bgcolor .< 256) + error("Invalid ansi color $bgcolor") + end + colcode *= "\e[48;2;$(bgcolor[1]);$(bgcolor[2]);$(bgcolor[3])m" end - colcode = "\e[48;2;$(color[1]);$(color[2]);$(color[3])m" colreset = "\e[0;0m" first = true for linepart in split(text, '\n') From 311c4898c71b3ce4a4cfed76ff2854314fb57bdd Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Fri, 5 Aug 2022 06:42:38 +1000 Subject: [PATCH 0443/1109] Minor docs update --- JuliaSyntax/src/parse_stream.jl | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index 4621270d74610..1855e76b9ee2c 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -557,7 +557,15 @@ end bump(stream [, flags=EMPTY_FLAGS]; skip_newlines=false, error, remap_kind) -Shift the current token from the input to the output, adding the given flags. +Copy the current token from the input stream to the output. Adds the given +flags to the output token (normally this would be the default `EMPTY_FLAGS` or +`TRIVIA_FLAG`). + +Keyword arguments: +* `skip_newlines` - if `true`, newlines are treated as whitespace. +* `error` - if set, emit an error for this token +* `remap_kind` - the kind of the token in the output token stream if it needs + to be modified. """ function bump(stream::ParseStream, flags=EMPTY_FLAGS; skip_newlines=false, error=nothing, remap_kind::Kind=K"None") From aff278ff370fd6a012137a11542ad62f40a75378 Mon Sep 17 00:00:00 2001 From: c42f Date: Sat, 6 Aug 2022 05:27:00 +1000 Subject: [PATCH 0444/1109] Separate UUID for the JuliaSyntax in sysimage (JuliaLang/JuliaSyntax.jl#37) This allows us to use JuliaSyntax baked into the sysimage to develop a separate JuliaSyntax with Revise :-D --- .../sysimage/JuliaSyntaxCore/Project.toml | 2 +- JuliaSyntax/sysimage/compile.jl | 16 +++++++++++++++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/JuliaSyntax/sysimage/JuliaSyntaxCore/Project.toml b/JuliaSyntax/sysimage/JuliaSyntaxCore/Project.toml index 5b969f2202591..7e31a0dd5bc26 100644 --- a/JuliaSyntax/sysimage/JuliaSyntaxCore/Project.toml +++ b/JuliaSyntax/sysimage/JuliaSyntaxCore/Project.toml @@ -7,4 +7,4 @@ version = "0.1.0" julia = "1.6" [deps] -JuliaSyntax = "70703baa-626e-46a2-a12c-08ffd08c73b4" +JuliaSyntax = "54354a4c-6cac-4c00-8566-e7c1beb8bfd8" diff --git a/JuliaSyntax/sysimage/compile.jl b/JuliaSyntax/sysimage/compile.jl index ea950555bebfe..fbc17232ad6e3 100755 --- a/JuliaSyntax/sysimage/compile.jl +++ b/JuliaSyntax/sysimage/compile.jl @@ -11,10 +11,24 @@ using Libdl cd(@__DIR__) +# Create a copy of JuliaSyntax so we can change the project UUID. +# This allows us to use an older version of JuliaSyntax for developing +# JuliaSyntax itself. +rm("JuliaSyntax", force=true, recursive=true) +mkdir("JuliaSyntax") +cp("../src", "JuliaSyntax/src") +cp("../test", "JuliaSyntax/test") +projstr = replace(read("../Project.toml", String), + "70703baa-626e-46a2-a12c-08ffd08c73b4"=>"54354a4c-6cac-4c00-8566-e7c1beb8bfd8") +write("JuliaSyntax/Project.toml", projstr) + using Pkg +rm("Project.toml", force=true) +rm("Manifest.toml", force=true) Pkg.activate(".") -Pkg.develop("JuliaSyntax") +Pkg.develop(path="./JuliaSyntax") Pkg.develop(path="./JuliaSyntaxCore") +Pkg.add("PackageCompiler") image_path = joinpath(imgs_base_path, "juliasyntax_sysimage."*Libdl.dlext) From 084170e49c403938fb387adbb7769f13c7d2b63e Mon Sep 17 00:00:00 2001 From: c42f Date: Tue, 9 Aug 2022 06:05:11 +1000 Subject: [PATCH 0445/1109] Docs: Add notes about concatenation AST forms (JuliaLang/JuliaSyntax.jl#39) * Docs: Add notes about concatenation AST forms * Add embedded JuliaCon talk --- JuliaSyntax/README.md | 104 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 99 insertions(+), 5 deletions(-) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index 6e3c1bc26a67d..0a786be199bb8 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -32,6 +32,10 @@ handful of failures remaining in the Base tests and standard library. The tree data structures should be somewhat usable but will evolve as we try out various use cases. +A talk from JuliaCon 2022 covered some aspects of this package. + +[![Youtube video thumbnail](https://img.youtube.com/vi/CIiGng9Brrk/mqdefault.jpg)](https://youtu.be/CIiGng9Brrk) + # Examples Here's what parsing of a small piece of code currently looks like in various @@ -243,14 +247,104 @@ JuliaSyntax currently deals in three types of trees: associated `GreenTree` nodes. * `Expr` is used as a conversion target for compatibility -Wherever possible, the tree structure of `GreenNode`/`SyntaxNode` is 1:1 with -`Expr`. There are, however, some exceptions. +## Julia AST structures + +In this section we describe some features of Julia's AST structures. + +### Concatenation syntax + +Concatenation syntax comes in two syntax forms: +* The traditional `hcat`/`vcat`/`row` which deal with concatenation or matrix + construction along dimensions one and two. +* The new `ncat`/`nrow` syntax which deals with concatenation or array + construction along arbitrary dimensions. + +We write `ncat-3` for concatenation along the third dimension. (The `3` is +stored in the head flags for `SyntaxNode` trees, and in the first `arg` for +`Expr` trees.) Semantically the new syntax can work like the old: +* `ncat-1` is the same as `vcat` +* `ncat-2` is the same as `hcat` +* `row` is the same as `nrow-2` + +#### Vertical concatenation (dimension 1) + +Vertical concatenation along dimension 1 can be done with semicolons or newlines + +```julia +julia> print_tree(:([a + b])) +Expr(:vcat) +├─ :a +└─ :b + +julia> print_tree(:([a ; b])) +Expr(:vcat) +├─ :a +└─ :b +``` + +#### Horizontal concatenation (dimension 2) + +For horizontal concatenation along dimension 2, use spaces or double semicolons + +```julia +julia> print_tree(:([a b])) +Expr(:hcat) +├─ :a +└─ :b + +julia> print_tree(:([a ;; b])) +Expr(:ncat) +├─ 2 +├─ :a +└─ :b +``` + +#### Mixed concatenation + +Concatenation along dimensions 1 and 2 can be done with spaces and single +semicolons or newlines, producing a mixture of `vcat` and `row` expressions: + +```julia +julia> print_tree(:([a b + c d])) +# OR +julia> print_tree(:([a b ; c d])) +Expr(:vcat) +├─ Expr(:row) +│ ├─ :a +│ └─ :b +└─ Expr(:row) + ├─ :c + └─ :d +``` + +General n-dimensional concatenation results in nested `ncat` and `nrow`, for +example + +```julia +julia> print_tree(:([a ; b ;; c ; d ;;; x])) +Expr(:ncat) +├─ 3 +├─ Expr(:nrow) +│ ├─ 2 +│ ├─ Expr(:nrow) +│ │ ├─ 1 +│ │ ├─ :a +│ │ └─ :b +│ └─ Expr(:nrow) +│ ├─ 1 +│ ├─ :c +│ └─ :d +└─ :x +``` ## Tree differences between GreenNode and Expr -First, `GreenNode` inherently stores source position, so there's no need for -the `LineNumberNode`s used by `Expr`. There's also a small number of other -differences +Wherever possible, the tree structure of `GreenNode`/`SyntaxNode` is 1:1 with +`Expr`. There are, however, some exceptions. First, `GreenNode` inherently +stores source position, so there's no need for the `LineNumberNode`s used by +`Expr`. There's also a small number of other differences ### Flattened generators From 9a8333da7696297a54f987922db9df9eea17bdb9 Mon Sep 17 00:00:00 2001 From: c42f Date: Wed, 10 Aug 2022 13:58:55 +1000 Subject: [PATCH 0446/1109] Fixes for JuliaSyntax in Core / sysimage (JuliaLang/JuliaSyntax.jl#41) * Fix copying of Tokenize source (clash with refactoring in JuliaLang/JuliaSyntax.jl#40) * Ensure the previous default parser is restored by `enable_in_core!()`. This could be other than Core.Compiler.fl_parse if we're using JuliaSyntax.jl to develop itself :) --- JuliaSyntax/src/hooks.jl | 4 +++- JuliaSyntax/src/parse_stream.jl | 4 ++-- JuliaSyntax/sysimage/compile.jl | 1 + 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/JuliaSyntax/src/hooks.jl b/JuliaSyntax/src/hooks.jl index 6d91206f89de4..e8748df6ee79c 100644 --- a/JuliaSyntax/src/hooks.jl +++ b/JuliaSyntax/src/hooks.jl @@ -82,6 +82,8 @@ end # `Expr(:error)`. Base.Meta.ParseError(e::JuliaSyntax.ParseError) = e +const _default_parser = Core._parse + """ Connect the JuliaSyntax parser to the Julia runtime so that it replaces the flisp parser for all parsing work. @@ -97,7 +99,7 @@ function enable_in_core!(enable=true) close(_debug_log) _debug_log = nothing end - parser = enable ? core_parser_hook : Core.Compiler.fl_parse + parser = enable ? core_parser_hook : _default_parser Base.eval(Core, :(_parse = $parser)) nothing end diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index 1855e76b9ee2c..be1617e94bba5 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -84,8 +84,8 @@ function untokenize(head::SyntaxHead; unique=true, include_flag_suff=true) end #------------------------------------------------------------------------------- -# Generic interface for types `T` which have kind and flags: -# 1. Define kind(::T) and flags(::T) directly +# Generic interface for types `T` which have kind and flags. Either: +# 1. Define kind(::T) and flags(::T), or # 2. Define head(::T) to return a type like `SyntaxKind` for which `kind` and # `flags` are defined kind(x) = kind(head(x)) diff --git a/JuliaSyntax/sysimage/compile.jl b/JuliaSyntax/sysimage/compile.jl index fbc17232ad6e3..329a7c553004a 100755 --- a/JuliaSyntax/sysimage/compile.jl +++ b/JuliaSyntax/sysimage/compile.jl @@ -17,6 +17,7 @@ cd(@__DIR__) rm("JuliaSyntax", force=true, recursive=true) mkdir("JuliaSyntax") cp("../src", "JuliaSyntax/src") +cp("../Tokenize", "JuliaSyntax/Tokenize") cp("../test", "JuliaSyntax/test") projstr = replace(read("../Project.toml", String), "70703baa-626e-46a2-a12c-08ffd08c73b4"=>"54354a4c-6cac-4c00-8566-e7c1beb8bfd8") From 11ea9923d1512918030ad226a7eca8b6aa2c249f Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Thu, 11 Aug 2022 12:05:52 +1000 Subject: [PATCH 0447/1109] Claim only Julia 1.6+ compatibility for now We can always make this more compatible later, but for now we're only testing on julia 1.6+ --- JuliaSyntax/Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaSyntax/Project.toml b/JuliaSyntax/Project.toml index 7025d9af5f2c3..d5920354fd2c3 100644 --- a/JuliaSyntax/Project.toml +++ b/JuliaSyntax/Project.toml @@ -4,7 +4,7 @@ authors = ["Chris Foster and contributors"] version = "0.1.0" [compat] -julia = "1.4" +julia = "1.6" [deps] Mmap = "a63ad114-7e13-5084-954f-fe012c677804" From 197e3ef123e0287742b661b567c70073087576ad Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Fri, 5 Aug 2022 14:37:08 +1000 Subject: [PATCH 0448/1109] Move Tokenize into src The fist step of integrating the lexer more deeply into JuliaSyntax. --- JuliaSyntax/LICENSE | 21 ------ JuliaSyntax/{Tokenize => }/LICENSE.md | 49 +++++++------ JuliaSyntax/Tokenize/.github/workflows/CI.yml | 49 ------------- .../Tokenize/.github/workflows/TagBot.yml | 11 --- JuliaSyntax/Tokenize/.gitignore | 3 - JuliaSyntax/Tokenize/Project.toml | 13 ---- JuliaSyntax/Tokenize/README.md | 59 ---------------- JuliaSyntax/Tokenize/benchmark/lex_base.jl | 36 ---------- JuliaSyntax/Tokenize/src/_precompile.jl | 70 ------------------- JuliaSyntax/Tokenize/test/lex_yourself.jl | 64 ----------------- JuliaSyntax/Tokenize/test/profile.jl | 54 -------------- JuliaSyntax/Tokenize/test/runtests.jl | 9 --- JuliaSyntax/src/JuliaSyntax.jl | 11 +-- .../src => src/Tokenize}/Tokenize.jl | 5 -- .../{Tokenize/src => src/Tokenize}/lexer.jl | 0 .../{Tokenize/src => src/Tokenize}/token.jl | 0 .../src => src/Tokenize}/token_kinds.jl | 0 .../src => src/Tokenize}/utilities.jl | 0 JuliaSyntax/{Tokenize => }/test/lexer.jl | 4 -- JuliaSyntax/test/runtests.jl | 10 +-- 20 files changed, 32 insertions(+), 436 deletions(-) delete mode 100644 JuliaSyntax/LICENSE rename JuliaSyntax/{Tokenize => }/LICENSE.md (53%) delete mode 100644 JuliaSyntax/Tokenize/.github/workflows/CI.yml delete mode 100644 JuliaSyntax/Tokenize/.github/workflows/TagBot.yml delete mode 100644 JuliaSyntax/Tokenize/.gitignore delete mode 100644 JuliaSyntax/Tokenize/Project.toml delete mode 100644 JuliaSyntax/Tokenize/README.md delete mode 100644 JuliaSyntax/Tokenize/benchmark/lex_base.jl delete mode 100644 JuliaSyntax/Tokenize/src/_precompile.jl delete mode 100644 JuliaSyntax/Tokenize/test/lex_yourself.jl delete mode 100644 JuliaSyntax/Tokenize/test/profile.jl delete mode 100644 JuliaSyntax/Tokenize/test/runtests.jl rename JuliaSyntax/{Tokenize/src => src/Tokenize}/Tokenize.jl (68%) rename JuliaSyntax/{Tokenize/src => src/Tokenize}/lexer.jl (100%) rename JuliaSyntax/{Tokenize/src => src/Tokenize}/token.jl (100%) rename JuliaSyntax/{Tokenize/src => src/Tokenize}/token_kinds.jl (100%) rename JuliaSyntax/{Tokenize/src => src/Tokenize}/utilities.jl (100%) rename JuliaSyntax/{Tokenize => }/test/lexer.jl (99%) diff --git a/JuliaSyntax/LICENSE b/JuliaSyntax/LICENSE deleted file mode 100644 index 11212fbf53df0..0000000000000 --- a/JuliaSyntax/LICENSE +++ /dev/null @@ -1,21 +0,0 @@ -MIT License - -Copyright (c) 2021 Julia Computing and contributors - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/JuliaSyntax/Tokenize/LICENSE.md b/JuliaSyntax/LICENSE.md similarity index 53% rename from JuliaSyntax/Tokenize/LICENSE.md rename to JuliaSyntax/LICENSE.md index f316960a03e7b..d70e81a6cea51 100644 --- a/JuliaSyntax/Tokenize/LICENSE.md +++ b/JuliaSyntax/LICENSE.md @@ -1,4 +1,27 @@ -The Tokenize.jl package is licensed under the MIT "Expat" License: +The JuliaSyntax.jl package is licensed under the MIT "Expat" License: + +> Copyright (c) 2021 Julia Computing and contributors +> +> Permission is hereby granted, free of charge, to any person obtaining a copy +> of this software and associated documentation files (the "Software"), to deal +> in the Software without restriction, including without limitation the rights +> to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +> copies of the Software, and to permit persons to whom the Software is +> furnished to do so, subject to the following conditions: +> +> The above copyright notice and this permission notice shall be included in all +> copies or substantial portions of the Software. +> +> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +> IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +> FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +> AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +> LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +> OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +> SOFTWARE. + +The code in src/Tokenize and test/lexer.jl is derived from the Tokenize.jl +package and is also licensed under the MIT "Expat" License: > Copyright (c) 2016: Kristoffer Carlsson. > @@ -21,27 +44,3 @@ The Tokenize.jl package is licensed under the MIT "Expat" License: > SOFTWARE. > -The code in src/utilities.jl is extracted from JuliaParser.jl: - -The JuliaParser.jl package is licensed under the MIT "Expat" License: - -> Copyright (c) 2014: Jake Bolewski. -> -> Permission is hereby granted, free of charge, to any person obtaining -> a copy of this software and associated documentation files (the -> "Software"), to deal in the Software without restriction, including -> without limitation the rights to use, copy, modify, merge, publish, -> distribute, sublicense, and/or sell copies of the Software, and to -> permit persons to whom the Software is furnished to do so, subject to -> the following conditions: -> -> The above copyright notice and this permission notice shall be -> included in all copies or substantial portions of the Software. -> -> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -> EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -> MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -> IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -> CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -> TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE -> SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. \ No newline at end of file diff --git a/JuliaSyntax/Tokenize/.github/workflows/CI.yml b/JuliaSyntax/Tokenize/.github/workflows/CI.yml deleted file mode 100644 index 08e7ec3027c62..0000000000000 --- a/JuliaSyntax/Tokenize/.github/workflows/CI.yml +++ /dev/null @@ -1,49 +0,0 @@ -name: CI -on: - pull_request: - branches: - - master - push: - branches: - - master - tags: '*' -jobs: - test: - name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - version: - - '1.0' - - '1' - - 'nightly' - os: - - ubuntu-latest - - macOS-latest - - windows-latest - arch: - - x64 - steps: - - uses: actions/checkout@v2 - - uses: julia-actions/setup-julia@v1 - with: - version: ${{ matrix.version }} - arch: ${{ matrix.arch }} - - uses: actions/cache@v1 - env: - cache-name: cache-artifacts - with: - path: ~/.julia/artifacts - key: ${{ runner.os }}-test-${{ env.cache-name }}-${{ hashFiles('**/Project.toml') }} - restore-keys: | - ${{ runner.os }}-test-${{ env.cache-name }}- - ${{ runner.os }}-test- - ${{ runner.os }}- - - uses: julia-actions/julia-buildpkg@v1 - - uses: julia-actions/julia-runtest@v1 - - uses: julia-actions/julia-processcoverage@v1 - - uses: codecov/codecov-action@v1 - with: - file: lcov.info - diff --git a/JuliaSyntax/Tokenize/.github/workflows/TagBot.yml b/JuliaSyntax/Tokenize/.github/workflows/TagBot.yml deleted file mode 100644 index d77d3a0c36d8a..0000000000000 --- a/JuliaSyntax/Tokenize/.github/workflows/TagBot.yml +++ /dev/null @@ -1,11 +0,0 @@ -name: TagBot -on: - schedule: - - cron: 0 * * * * -jobs: - TagBot: - runs-on: ubuntu-latest - steps: - - uses: JuliaRegistries/TagBot@v1 - with: - token: ${{ secrets.GITHUB_TOKEN }} diff --git a/JuliaSyntax/Tokenize/.gitignore b/JuliaSyntax/Tokenize/.gitignore deleted file mode 100644 index 8c960ec808d9e..0000000000000 --- a/JuliaSyntax/Tokenize/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -*.jl.cov -*.jl.*.cov -*.jl.mem diff --git a/JuliaSyntax/Tokenize/Project.toml b/JuliaSyntax/Tokenize/Project.toml deleted file mode 100644 index 454daa772a52c..0000000000000 --- a/JuliaSyntax/Tokenize/Project.toml +++ /dev/null @@ -1,13 +0,0 @@ -name = "Tokenize" -uuid = "0796e94c-ce3b-5d07-9a54-7f471281c624" -version = "0.5.21" - -[compat] -julia = "1" - -[extras] -Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" -Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" - -[targets] -test = ["Test", "Printf"] diff --git a/JuliaSyntax/Tokenize/README.md b/JuliaSyntax/Tokenize/README.md deleted file mode 100644 index b4f4ac78b5d6a..0000000000000 --- a/JuliaSyntax/Tokenize/README.md +++ /dev/null @@ -1,59 +0,0 @@ -# Tokenize - -[![Build Status](https://github.com/KristofferC/Tokenize.jl/workflows/CI/badge.svg)](https://github.com/KristofferC/Tokenize.jl/actions?query=workflows/CI) - -`Tokenize` is a Julia package that serves a similar purpose and API as the [tokenize module](https://docs.python.org/3/library/tokenize.html) in Python but for Julia. This is to take a string or buffer containing Julia code, perform lexical analysis and return a stream of tokens. - -The goals of this package is to be - -* Fast, it currently lexes all of Julia source files in ~0.25 seconds (580 files, 2 million Tokens) -* Round trippable, that is, from a stream of tokens the original string should be recoverable exactly. -* Non error throwing. Instead of throwing errors a certain error token is returned. - -### API - -#### Tokenization - -The function `tokenize` is the main entrypoint for generating `Token`s. -It takes a string or a buffer and creates an iterator that will sequentially return the next `Token` until the end of string or buffer. The argument to `tokenize` can either be a `String`, `IOBuffer` or an `IOStream`. - -```jl -julia> collect(tokenize("function f(x) end")) - 1,1-1,8 KEYWORD "function" - 1,9-1,9 WHITESPACE " " - 1,10-1,10 IDENTIFIER "f" - 1,11-1,11 LPAREN "(" - 1,12-1,12 IDENTIFIER "x" - 1,13-1,13 RPAREN ")" - 1,14-1,14 WHITESPACE " " - 1,15-1,17 KEYWORD "end" - 1,18-1,17 ENDMARKER "" -``` - -#### `Token`s - -Each `Token` is represented by where it starts and ends, what string it contains and what type it is. - -The API for a `Token` (non exported from the `Tokenize.Tokens` module) is. - -```julia -startbyte(T)::Int # byte offset where the token start -endbyte(t)::Int # byte offset where the token ends -untokenize(t, str)::String # string representation of the token -kind(t)::Token.Kind # kind of the token -exactkind(t)::Token.Kind # exact kind of the token -``` - -The difference between `kind` and `exactkind` is that `kind` returns `OP` for all operators and `KEYWORD` for all keywords while `exactkind` returns a unique kind for all different operators and keywords, ex; - -```jl -julia> tok = collect(tokenize("⇒"))[1]; - -julia> Tokens.kind(tok) -OP::Tokenize.Tokens.Kind = 90 - -julia> Tokens.exactkind(tok) -RIGHTWARDS_DOUBLE_ARROW::Tokenize.Tokens.Kind = 128 -``` - -All the different `Token.Kind` can be seen in the [`token_kinds.jl` file](https://github.com/JuliaLang/Tokenize.jl/blob/master/src/token_kinds.jl) diff --git a/JuliaSyntax/Tokenize/benchmark/lex_base.jl b/JuliaSyntax/Tokenize/benchmark/lex_base.jl deleted file mode 100644 index 38d65f786fc7a..0000000000000 --- a/JuliaSyntax/Tokenize/benchmark/lex_base.jl +++ /dev/null @@ -1,36 +0,0 @@ -using Tokenize -using BenchmarkTools -using Printf - -function speed_test() - tot_files = 0 - tot_tokens = 0 - tot_errors = 0 - basedir = dirname(Base.find_source_file("int.jl")) - for dir in (basedir, Sys.STDLIB) - for (root, dirs, files) in walkdir(dir) - for file in files - if endswith(file, ".jl") - tot_files += 1 - file = joinpath(root, file) - str = read(file, String)::String - l = tokenize(str) - while !Tokenize.Lexers.eof(l) - t = Tokenize.Lexers.next_token(l) - tot_tokens += 1 - if Tokens.iserror(t.kind) - tot_errors += 1 - end - end - end - end - end - end - tot_files, tot_tokens, tot_errors -end - -tot_files, tot_tokens, tot_errors = speed_test() -tot_time_token = @belapsed speed_test() -println("Lexed ", tot_files, " files, with a total of ", tot_tokens, - " tokens with ", tot_errors, " errors") -println("Time Token: ", @sprintf("%3.4f", tot_time_token), " seconds") diff --git a/JuliaSyntax/Tokenize/src/_precompile.jl b/JuliaSyntax/Tokenize/src/_precompile.jl deleted file mode 100644 index 75d5eb647f54b..0000000000000 --- a/JuliaSyntax/Tokenize/src/_precompile.jl +++ /dev/null @@ -1,70 +0,0 @@ -import Base: GenericIOBuffer - -function _precompile_() - ccall(:jl_generating_output, Cint, ()) == 1 || return nothing - precompile(Tokenize.Tokens.iskeyword, (Tokenize.Tokens.Kind,)) - precompile(Tokenize.Tokens.isliteral, (Tokenize.Tokens.Kind,)) - precompile(Tokenize.Tokens.isoperator, (Tokenize.Tokens.Kind,)) - precompile(Tokenize.Tokens.Token, (Tokenize.Tokens.Kind,Tuple{Int,Int},Tuple{Int,Int},Int,Int,String)) - precompile(Tokenize.Tokens.Token, ()) - precompile(Tokenize.Tokens.kind, (Tokenize.Tokens.Token,)) - - precompile(Tokenize.Lexers.is_identifier_start_char, (Char,)) - precompile(Tokenize.Lexers.readchar, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.next_token, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) - - precompile(Tokenize.Lexers.ishex, (Char,)) - precompile(Tokenize.Lexers.isbinary, (Char,)) - precompile(Tokenize.Lexers.isoctal, (Char,)) - precompile(Tokenize.Lexers.iswhitespace, (Char,)) - precompile(Tokenize.Lexers.Lexer, (String,)) - precompile(Tokenize.Lexers.Lexer, (GenericIOBuffer{Array{UInt8, 1}},)) - precompile(Tokenize.Lexers.tokenize, (String,)) - - precompile(Tokenize.Lexers.iterate, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}}, Bool,)) - precompile(Tokenize.Lexers.iterate, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}}, Bool,)) - precompile(Tokenize.Lexers.startpos, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.startpos!, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},Int)) - precompile(Tokenize.Lexers.start_token!, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) - - precompile(Tokenize.Lexers.lex_greater, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.lex_prime, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.lex_digit, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},Tokenize.Tokens.Kind)) - precompile(Tokenize.Lexers.lex_identifier, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}}, Char,)) - precompile(Tokenize.Lexers.lex_less, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.lex_forwardslash, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.lex_minus, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.lex_xor, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.lex_equal, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.lex_bar, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.lex_quote, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.lex_plus, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.lex_dot, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.lex_exclaim, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.lex_colon, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.lex_percent, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.lex_comment, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.lex_comment, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},Bool)) - precompile(Tokenize.Lexers.lex_division, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.lex_circumflex, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.lex_backslash, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.lex_star, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) - precompile(Tokenize.Lexers.lex_amper, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},)) - - precompile(Tokenize.Lexers.lex_whitespace, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},Char)) - - precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}}, Char,)) - precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}}, String,)) - precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},typeof( Base.isdigit),)) - precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},typeof( Tokenize.Lexers.iswhitespace),)) - precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},typeof( Tokenize.Lexers.is_identifier_char),)) - precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},typeof(Tokenize.Lexers.ishex),)) - precompile(Tokenize.Lexers.accept_batch, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}}, typeof(Tokenize.Lexers.iswhitespace),)) - precompile(Tokenize.Lexers.accept_batch, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}}, typeof(Tokenize.Lexers.isdigit),)) - - precompile(Tokenize.Lexers.accept_batch, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}}, typeof(Tokenize.Lexers.iswhitespace),)) - precompile(Tokenize.Lexers.accept_batch, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}}, typeof(Tokenize.Lexers.isdigit),)) - precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}}, Char,)) - - precompile(Tokenize.Lexers.readchar, (GenericIOBuffer{Array{UInt8, 1}},)) -end diff --git a/JuliaSyntax/Tokenize/test/lex_yourself.jl b/JuliaSyntax/Tokenize/test/lex_yourself.jl deleted file mode 100644 index bb88a31cfec1d..0000000000000 --- a/JuliaSyntax/Tokenize/test/lex_yourself.jl +++ /dev/null @@ -1,64 +0,0 @@ -@testset "lex yourself" begin - -PKGPATH = joinpath(dirname(@__FILE__), "..") - -global tot_files = 0 -global tot_time = 0.0 -global tot_tokens = 0 -global tot_errors = 0 -function testall(srcdir::AbstractString) - global tot_files, tot_time, tot_tokens, tot_errors - dirs, files = [], [] - - for fname in sort(readdir(srcdir)) - path = joinpath(srcdir, fname) - if isdir(path) - push!(dirs, path) - continue - end - _, ext = splitext(fname) - if ext == ".jl" - push!(files, path) - end - end - - if !isempty(files) - for jlpath in files - - fname = splitdir(jlpath)[end] - - buf = IOBuffer() - print(buf, open(read, jlpath)) - seek(buf, 0) - tot_files += 1 - tot_time += @elapsed tokens = collect(Tokenize.tokenize(buf)) - tot_tokens += length(tokens) - - seek(buf, 0) - str = String(take!(buf)) - - collect(Tokenize.tokenize(str)) - - for token in tokens - if Tokenize.Tokens.kind(token) == Tokenize.Tokens.ERROR - tot_errors += 1 - end - end - end - end - for dir in dirs - testall(dir) - end -end - -testall(joinpath(PKGPATH, "benchmark")) -testall(joinpath(PKGPATH, "src")) -testall(joinpath(PKGPATH, "test")) -testall(joinpath(Sys.BINDIR, Base.DATAROOTDIR)) - -println("Lexed ", tot_files, " files in ", @sprintf("%3.4f", tot_time), - " seconds with a total of ", tot_tokens, " tokens with ", tot_errors, " errors") - -@test tot_errors == 0 - -end # testset diff --git a/JuliaSyntax/Tokenize/test/profile.jl b/JuliaSyntax/Tokenize/test/profile.jl deleted file mode 100644 index 154e9b7b50ddb..0000000000000 --- a/JuliaSyntax/Tokenize/test/profile.jl +++ /dev/null @@ -1,54 +0,0 @@ -using Tokenize - -nt = @timed @eval(collect(Tokenize.tokenize("foo + bar"))) -println("First run took $(nt.time) seconds with $(nt.bytes/1e6) MB allocated") - -srcdir = joinpath(Sys.BINDIR, Base.DATAROOTDIR, "..") - -allfiles = [] -for (root, dirs, files) in walkdir(srcdir, follow_symlinks = true) - for file in files - splitext(file)[2] == ".jl" || continue - push!(allfiles, joinpath(root, file)) - end -end - -# warmup -let time_taken = 0.0, allocated = 0.0 - for file in allfiles - content = IOBuffer(read(file, String)) - nt = @timed for t in Tokenize.tokenize(content) end - time_taken += nt.time - allocated += nt.bytes - end -end - -# actual run -let time_taken = 0.0, allocated = 0.0 - for file in allfiles - content = IOBuffer(read(file, String)) - nt = @timed for t in Tokenize.tokenize(content) end - time_taken += nt.time - allocated += nt.bytes - end - println("Tokenized $(length(allfiles)) files in $(time_taken) seconds with $(allocated/1e6) MB allocated") -end - -isempty(ARGS) && exit(0) - -using PProf, Profile - -# warm up profiler -let content = read(first(allfiles), String) - @profile collect(Tokenize.tokenize(content)) -end - -Profile.clear() -for file in allfiles - content = read(file, String) - @profile collect(Tokenize.tokenize(content)) -end -pprof() - -println("Press any key to exit...") -readline() diff --git a/JuliaSyntax/Tokenize/test/runtests.jl b/JuliaSyntax/Tokenize/test/runtests.jl deleted file mode 100644 index 9c8267640f97a..0000000000000 --- a/JuliaSyntax/Tokenize/test/runtests.jl +++ /dev/null @@ -1,9 +0,0 @@ -using Test, Printf - -import JuliaSyntax.Tokenize - -# Takes 10s to run -# include("lex_yourself.jl") -@testset "lexer" begin -include("lexer.jl") -end diff --git a/JuliaSyntax/src/JuliaSyntax.jl b/JuliaSyntax/src/JuliaSyntax.jl index a9407be8f3014..e35d62a5bbdbd 100644 --- a/JuliaSyntax/src/JuliaSyntax.jl +++ b/JuliaSyntax/src/JuliaSyntax.jl @@ -2,16 +2,11 @@ module JuliaSyntax using Mmap -# Internal utilities which aren't related to JuliaSyntax per se. +# Helper utilities include("utils.jl") -# Lexing -# -# We're using a git subtree for a modified version of Tokenize.jl, as we need -# several significant changes. -# TODO: Perhaps integrate these back into Tokenize? Or maybe JuliaSyntax would -# be a sensible home for the Tokenize lexer in the future? -include("../Tokenize/src/Tokenize.jl") +# Lexing uses a significantly modified version of Tokenize.jl +include("Tokenize/Tokenize.jl") using .Tokenize.Tokens: Token const TzTokens = Tokenize.Tokens include("tokens.jl") diff --git a/JuliaSyntax/Tokenize/src/Tokenize.jl b/JuliaSyntax/src/Tokenize/Tokenize.jl similarity index 68% rename from JuliaSyntax/Tokenize/src/Tokenize.jl rename to JuliaSyntax/src/Tokenize/Tokenize.jl index 79175d96fc7b1..548d8a91c8f73 100644 --- a/JuliaSyntax/Tokenize/src/Tokenize.jl +++ b/JuliaSyntax/src/Tokenize/Tokenize.jl @@ -12,9 +12,4 @@ import .Tokens: untokenize export tokenize, untokenize, Tokens -# disable precompilation when profiling runtime performance, as -# it can lead to wrong traces -include("_precompile.jl") -_precompile_() - end # module diff --git a/JuliaSyntax/Tokenize/src/lexer.jl b/JuliaSyntax/src/Tokenize/lexer.jl similarity index 100% rename from JuliaSyntax/Tokenize/src/lexer.jl rename to JuliaSyntax/src/Tokenize/lexer.jl diff --git a/JuliaSyntax/Tokenize/src/token.jl b/JuliaSyntax/src/Tokenize/token.jl similarity index 100% rename from JuliaSyntax/Tokenize/src/token.jl rename to JuliaSyntax/src/Tokenize/token.jl diff --git a/JuliaSyntax/Tokenize/src/token_kinds.jl b/JuliaSyntax/src/Tokenize/token_kinds.jl similarity index 100% rename from JuliaSyntax/Tokenize/src/token_kinds.jl rename to JuliaSyntax/src/Tokenize/token_kinds.jl diff --git a/JuliaSyntax/Tokenize/src/utilities.jl b/JuliaSyntax/src/Tokenize/utilities.jl similarity index 100% rename from JuliaSyntax/Tokenize/src/utilities.jl rename to JuliaSyntax/src/Tokenize/utilities.jl diff --git a/JuliaSyntax/Tokenize/test/lexer.jl b/JuliaSyntax/test/lexer.jl similarity index 99% rename from JuliaSyntax/Tokenize/test/lexer.jl rename to JuliaSyntax/test/lexer.jl index 2f8e19e144a43..b0def96833d76 100644 --- a/JuliaSyntax/Tokenize/test/lexer.jl +++ b/JuliaSyntax/test/lexer.jl @@ -1,7 +1,3 @@ -using JuliaSyntax.Tokenize -using JuliaSyntax.Tokenize.Lexers -using Test - const T = Tokenize.Tokens tok(str, i = 1) = collect(tokenize(str))[i] diff --git a/JuliaSyntax/test/runtests.jl b/JuliaSyntax/test/runtests.jl index 9c499084087c3..ded79a74fb559 100644 --- a/JuliaSyntax/test/runtests.jl +++ b/JuliaSyntax/test/runtests.jl @@ -7,11 +7,11 @@ using JuliaSyntax: GreenNode, SyntaxNode, flags, EMPTY_FLAGS, TRIVIA_FLAG, INFIX_FLAG, children, child, setchild!, SyntaxHead -module TokenizeTests - using Test - @testset "Tokenize" begin - include("../Tokenize/test/runtests.jl") - end +using JuliaSyntax.Tokenize +using JuliaSyntax.Tokenize.Lexers + +@testset "Tokenize" begin + include("lexer.jl") end include("test_utils.jl") From c4cfd2d5bafb67498368ae45e258e8d9c98d0696 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Tue, 9 Aug 2022 06:12:22 +1000 Subject: [PATCH 0449/1109] New `Kind` in JuliaSyntax module; use @ K_str everywhere Replace the @ enum-based version of Kind with a custom bits type. This allows us to use the K_str macro to name the enumerants with more self-descriptive names. Refactor to use this new Kind type everywhere from the lexer to the parser. --- JuliaSyntax/src/JuliaSyntax.jl | 1 + JuliaSyntax/src/Tokenize/lexer.jl | 451 +++-- JuliaSyntax/src/Tokenize/token.jl | 52 +- JuliaSyntax/src/Tokenize/token_kinds.jl | 2237 +++++++---------------- JuliaSyntax/src/Tokenize/utilities.jl | 44 +- JuliaSyntax/src/kinds.jl | 996 ++++++++++ JuliaSyntax/src/token_kinds.jl | 908 --------- JuliaSyntax/src/tokens.jl | 71 +- JuliaSyntax/test/lexer.jl | 724 ++++---- 9 files changed, 2337 insertions(+), 3147 deletions(-) create mode 100644 JuliaSyntax/src/kinds.jl delete mode 100644 JuliaSyntax/src/token_kinds.jl diff --git a/JuliaSyntax/src/JuliaSyntax.jl b/JuliaSyntax/src/JuliaSyntax.jl index e35d62a5bbdbd..4aeb6cbb314df 100644 --- a/JuliaSyntax/src/JuliaSyntax.jl +++ b/JuliaSyntax/src/JuliaSyntax.jl @@ -5,6 +5,7 @@ using Mmap # Helper utilities include("utils.jl") +include("kinds.jl") # Lexing uses a significantly modified version of Tokenize.jl include("Tokenize/Tokenize.jl") using .Tokenize.Tokens: Token diff --git a/JuliaSyntax/src/Tokenize/lexer.jl b/JuliaSyntax/src/Tokenize/lexer.jl index 516c5e68b43c5..b4e50dc677938 100644 --- a/JuliaSyntax/src/Tokenize/lexer.jl +++ b/JuliaSyntax/src/Tokenize/lexer.jl @@ -1,15 +1,10 @@ module Lexers -include("utilities.jl") - import ..Tokens -import ..Tokens: Token, Kind, UNICODE_OPS, EMPTY_TOKEN, isliteral - -import ..Tokens: FUNCTION, ABSTRACT, IDENTIFIER, BAREMODULE, BEGIN, BREAK, CATCH, CONST, CONTINUE, - DO, ELSE, ELSEIF, END, EXPORT, FALSE, FINALLY, FOR, FUNCTION, GLOBAL, LET, LOCAL, IF, - IMPORT, MACRO, MODULE, OUTER, QUOTE, RETURN, TRUE, TRY, TYPE, USING, WHILE, ISA, IN, - MUTABLE, PRIMITIVE, STRUCT, WHERE +import ..Tokens: @K_str, Token, Kind, UNICODE_OPS, EMPTY_TOKEN, + isliteral, iserror, iscontextualkeyword, iswordoperator +include("utilities.jl") export tokenize @@ -45,7 +40,7 @@ mutable struct Lexer{IO_t <: IO} current_col::Int current_pos::Int - last_token::Tokens.Kind + last_token::Kind string_states::Vector{StringState} charstore::IOBuffer chars::Tuple{Char,Char,Char,Char} @@ -80,7 +75,7 @@ function Lexer(io::IO) end end Lexer(io, position(io), 1, 1, position(io), 1, 1, position(io), - Tokens.ERROR, Vector{StringState}(), IOBuffer(), + K"error", Vector{StringState}(), IOBuffer(), (c1,c2,c3,c4), (p1,p2,p3,p4), false, false, false) end Lexer(str::AbstractString) = Lexer(IOBuffer(str)) @@ -110,13 +105,13 @@ function Base.iterate(l::Lexer) l.current_col = 1 l.current_pos = l.io_startpos t = next_token(l) - return t, t.kind == Tokens.ENDMARKER + return t, t.kind == K"EndMarker" end function Base.iterate(l::Lexer, isdone::Any) isdone && return nothing t = next_token(l) - return t, t.kind == Tokens.ENDMARKER + return t, t.kind == K"EndMarker" end function Base.show(io::IO, l::Lexer) @@ -265,13 +260,13 @@ function emit(l::Lexer, kind::Kind) end """ - emit_error(l::Lexer, err::Kind=Tokens.ERROR) + emit_error(l::Lexer, err::Kind=K"error") -Returns an `ERROR` token with error `err` and starts a new `Token`. +Returns an `K"error"` token with error `err` and starts a new `Token`. """ -function emit_error(l::Lexer, err::Kind = Tokens.ERROR) +function emit_error(l::Lexer, err::Kind = K"error") l.errored = true - @assert Tokens.iserror(err) + @assert iserror(err) return emit(l, err) end @@ -292,39 +287,39 @@ end function _next_token(l::Lexer, c) if eof(c) - return emit(l, Tokens.ENDMARKER) + return emit(l, K"EndMarker") elseif iswhitespace(c) return lex_whitespace(l, c) elseif c == '[' - return emit(l, Tokens.LSQUARE) + return emit(l, K"[") elseif c == ']' - return emit(l, Tokens.RSQUARE) + return emit(l, K"]") elseif c == '{' - return emit(l, Tokens.LBRACE) + return emit(l, K"{") elseif c == ';' - return emit(l, Tokens.SEMICOLON) + return emit(l, K";") elseif c == '}' - return emit(l, Tokens.RBRACE) + return emit(l, K"}") elseif c == '(' - return emit(l, Tokens.LPAREN) + return emit(l, K"(") elseif c == ')' - return emit(l, Tokens.RPAREN) + return emit(l, K")") elseif c == ',' - return emit(l, Tokens.COMMA) + return emit(l, K",") elseif c == '*' return lex_star(l); elseif c == '^' return lex_circumflex(l); elseif c == '@' - return emit(l, Tokens.AT_SIGN) + return emit(l, K"@") elseif c == '?' - return emit(l, Tokens.CONDITIONAL) + return emit(l, K"?") elseif c == '$' return lex_dollar(l); elseif c == '⊻' return lex_xor(l); elseif c == '~' - return emit(l, Tokens.APPROX) + return emit(l, K"~") elseif c == '#' return lex_comment(l) elseif c == '=' @@ -360,14 +355,14 @@ function _next_token(l::Lexer, c) elseif c == '-' return lex_minus(l); elseif c == '−' # \minus '−' treated as hyphen '-' - return emit(l, accept(l, '=') ? Tokens.MINUS_EQ : Tokens.MINUS) + return emit(l, accept(l, '=') ? K"-=" : K"-") elseif c == '`' return lex_backtick(l); elseif is_identifier_start_char(c) return lex_identifier(l, c) elseif isdigit(c) - return lex_digit(l, Tokens.INTEGER) - elseif (k = get(UNICODE_OPS, c, Tokens.ERROR)) != Tokens.ERROR + return lex_digit(l, K"Integer") + elseif (k = get(UNICODE_OPS, c, K"error")) != K"error" return emit(l, k) else emit_error(l) @@ -385,42 +380,42 @@ function lex_string_chunk(l) if c == '(' l.string_states[end] = StringState(state.triplestr, state.raw, state.delim, state.paren_depth + 1) - return emit(l, Tokens.LPAREN) + return emit(l, K"(") elseif c == ')' l.string_states[end] = StringState(state.triplestr, state.raw, state.delim, state.paren_depth - 1) - return emit(l, Tokens.RPAREN) + return emit(l, K")") else return _next_token(l, c) end end pc = peekchar(l) - if l.last_token == Tokens.EX_OR + if l.last_token == K"$" pc = peekchar(l) # Interpolated symbol or expression if pc == '(' readchar(l) l.string_states[end] = StringState(state.triplestr, state.raw, state.delim, state.paren_depth + 1) - return emit(l, Tokens.LPAREN) + return emit(l, K"(") elseif is_identifier_start_char(pc) return lex_identifier(l, readchar(l)) else # Getting here is a syntax error - fall through to reading string # characters and let the parser deal with it. end - elseif l.last_token == Tokens.IDENTIFIER && + elseif l.last_token == K"Identifier" && !(eof(pc) || is_operator_start_char(pc) || is_never_id_char(pc)) # Only allow certain characters after interpolated vars # https://github.com/JuliaLang/julia/pull/25234 - return emit_error(l, Tokens.INVALID_INTERPOLATION_TERMINATOR) + return emit_error(l, K"ErrorInvalidInterpolationTerminator") end if eof(pc) - return emit(l, Tokens.ENDMARKER) + return emit(l, K"EndMarker") elseif !state.raw && pc == '$' # Start interpolation readchar(l) - return emit(l, Tokens.EX_OR) + return emit(l, K"$") elseif !state.raw && pc == '\\' && (pc2 = dpeekchar(l)[2]; pc2 == '\r' || pc2 == '\n') # Process escaped newline as whitespace @@ -432,7 +427,7 @@ function lex_string_chunk(l) while (pc = peekchar(l); pc == ' ' || pc == '\t') readchar(l) end - return emit(l, Tokens.WHITESPACE) + return emit(l, K"Whitespace") elseif pc == state.delim && string_terminates(l, state.delim, state.triplestr) # Terminate string pop!(l.string_states) @@ -440,9 +435,9 @@ function lex_string_chunk(l) if state.triplestr readchar(l); readchar(l) return emit(l, state.delim == '"' ? - Tokens.TRIPLE_DQUOTE : Tokens.TRIPLE_BACKTICK) + K"\"\"\"" : K"```") else - return emit(l, state.delim == '"' ? Tokens.DQUOTE : Tokens.BACKTICK) + return emit(l, state.delim == '"' ? K"\"" : K"`") end end # Read a chunk of string characters @@ -502,19 +497,19 @@ function lex_string_chunk(l) end end end - return emit(l, state.delim == '"' ? Tokens.STRING : Tokens.CMD) + return emit(l, state.delim == '"' ? K"String" : K"CmdString") end # Lex whitespace, a whitespace char `c` has been consumed function lex_whitespace(l::Lexer, c) - k = Tokens.WHITESPACE + k = K"Whitespace" while true if c == '\n' - k = Tokens.NEWLINE_WS + k = K"NewlineWs" end pc = peekchar(l) # stop on non whitespace and limit to a single newline in a token - if !iswhitespace(pc) || (k == Tokens.NEWLINE_WS && pc == '\n') + if !iswhitespace(pc) || (k == K"NewlineWs" && pc == '\n') break end c = readchar(l) @@ -527,7 +522,7 @@ function lex_comment(l::Lexer, doemit=true) while true pc = peekchar(l) if pc == '\n' || eof(pc) - return doemit ? emit(l, Tokens.COMMENT) : EMPTY_TOKEN + return doemit ? emit(l, K"Comment") : EMPTY_TOKEN end readchar(l) end @@ -537,7 +532,7 @@ function lex_comment(l::Lexer, doemit=true) n_start, n_end = 1, 0 while true if eof(c) - return doemit ? emit_error(l, Tokens.EOF_MULTICOMMENT) : EMPTY_TOKEN + return doemit ? emit_error(l, K"ErrorEofMultiComment") : EMPTY_TOKEN end nc = readchar(l) if c == '#' && nc == '=' @@ -546,7 +541,7 @@ function lex_comment(l::Lexer, doemit=true) n_end += 1 end if n_start == n_end - return doemit ? emit(l, Tokens.COMMENT) : EMPTY_TOKEN + return doemit ? emit(l, K"Comment") : EMPTY_TOKEN end pc = c c = nc @@ -556,171 +551,171 @@ end # Lex a greater char, a '>' has been consumed function lex_greater(l::Lexer) - if accept(l, '>') # >> - if accept(l, '>') # >>> - if accept(l, '=') # >>>= - return emit(l, Tokens.UNSIGNED_BITSHIFT_EQ) + if accept(l, '>') + if accept(l, '>') + if accept(l, '=') + return emit(l, K">>>=") else # >>>?, ? not a = - return emit(l, Tokens.UNSIGNED_BITSHIFT) + return emit(l, K">>>") end - elseif accept(l, '=') # >>= - return emit(l, Tokens.RBITSHIFT_EQ) - else # '>>' - return emit(l, Tokens.RBITSHIFT) + elseif accept(l, '=') + return emit(l, K">>=") + else + return emit(l, K">>") end - elseif accept(l, '=') # >= - return emit(l, Tokens.GREATER_EQ) - elseif accept(l, ':') # >: - return emit(l, Tokens.ISSUPERTYPE) - else # '>' - return emit(l, Tokens.GREATER) + elseif accept(l, '=') + return emit(l, K">=") + elseif accept(l, ':') + return emit(l, K">:") + else + return emit(l, K">") end end # Lex a less char, a '<' has been consumed function lex_less(l::Lexer) - if accept(l, '<') # << - if accept(l, '=') # <<= - return emit(l, Tokens.LBITSHIFT_EQ) + if accept(l, '<') + if accept(l, '=') + return emit(l, K"<<=") else # '< + return emit(l, K"<:") + elseif accept(l, '|') + return emit(l, K"<|") + elseif dpeekchar(l) == ('-', '-') readchar(l); readchar(l) if accept(l, '>') - return emit(l, Tokens.DOUBLE_ARROW) + return emit(l, K"<-->") else - return emit(l, Tokens.LEFT_ARROW) + return emit(l, K"<--") end else - return emit(l, Tokens.LESS) # '<' + return emit(l, K"<") end end # Lex all tokens that start with an = character. # An '=' char has been consumed function lex_equal(l::Lexer) - if accept(l, '=') # == - if accept(l, '=') # === - emit(l, Tokens.EQEQEQ) + if accept(l, '=') + if accept(l, '=') + emit(l, K"===") else - emit(l, Tokens.EQEQ) + emit(l, K"==") end - elseif accept(l, '>') # => - emit(l, Tokens.PAIR_ARROW) + elseif accept(l, '>') + emit(l, K"=>") else - emit(l, Tokens.EQ) + emit(l, K"=") end end # Lex a colon, a ':' has been consumed function lex_colon(l::Lexer) - if accept(l, ':') # '::' - return emit(l, Tokens.DECLARATION) - elseif accept(l, '=') # ':=' - return emit(l, Tokens.COLON_EQ) + if accept(l, ':') + return emit(l, K"::") + elseif accept(l, '=') + return emit(l, K":=") else - return emit(l, Tokens.COLON) + return emit(l, K":") end end function lex_exclaim(l::Lexer) - if accept(l, '=') # != - if accept(l, '=') # !== - return emit(l, Tokens.NOT_IS) - else # != - return emit(l, Tokens.NOT_EQ) + if accept(l, '=') + if accept(l, '=') + return emit(l, K"!==") + else + return emit(l, K"!=") end else - return emit(l, Tokens.NOT) + return emit(l, K"!") end end function lex_percent(l::Lexer) if accept(l, '=') - return emit(l, Tokens.REM_EQ) + return emit(l, K"%=") else - return emit(l, Tokens.REM) + return emit(l, K"%") end end function lex_bar(l::Lexer) - if accept(l, '=') # |= - return emit(l, Tokens.OR_EQ) - elseif accept(l, '>') # |> - return emit(l, Tokens.RPIPE) - elseif accept(l, '|') # || - return emit(l, Tokens.LAZY_OR) + if accept(l, '=') + return emit(l, K"|=") + elseif accept(l, '>') + return emit(l, K"|>") + elseif accept(l, '|') + return emit(l, K"||") else - emit(l, Tokens.OR) # '|' + emit(l, K"|") end end function lex_plus(l::Lexer) if accept(l, '+') - return emit(l, Tokens.PLUSPLUS) + return emit(l, K"++") elseif accept(l, '=') - return emit(l, Tokens.PLUS_EQ) + return emit(l, K"+=") end - return emit(l, Tokens.PLUS) + return emit(l, K"+") end function lex_minus(l::Lexer) if accept(l, '-') if accept(l, '>') - return emit(l, Tokens.RIGHT_ARROW) + return emit(l, K"-->") else - return emit_error(l, Tokens.INVALID_OPERATOR) # "--" is an invalid operator + return emit_error(l, K"ErrorInvalidOperator") # "--" is an invalid operator end elseif !l.dotop && accept(l, '>') - return emit(l, Tokens.ANON_FUNC) + return emit(l, K"->") elseif accept(l, '=') - return emit(l, Tokens.MINUS_EQ) + return emit(l, K"-=") end - return emit(l, Tokens.MINUS) + return emit(l, K"-") end function lex_star(l::Lexer) if accept(l, '*') - return emit_error(l, Tokens.INVALID_OPERATOR) # "**" is an invalid operator use ^ + return emit_error(l, K"ErrorInvalidOperator") # "**" is an invalid operator use ^ elseif accept(l, '=') - return emit(l, Tokens.STAR_EQ) + return emit(l, K"*=") end - return emit(l, Tokens.STAR) + return emit(l, K"*") end function lex_circumflex(l::Lexer) if accept(l, '=') - return emit(l, Tokens.CIRCUMFLEX_EQ) + return emit(l, K"^=") end - return emit(l, Tokens.CIRCUMFLEX_ACCENT) + return emit(l, K"^") end function lex_division(l::Lexer) if accept(l, '=') - return emit(l, Tokens.DIVISION_EQ) + return emit(l, K"÷=") end - return emit(l, Tokens.DIVISION_SIGN) + return emit(l, K"÷") end function lex_dollar(l::Lexer) if accept(l, '=') - return emit(l, Tokens.EX_OR_EQ) + return emit(l, K"$=") end - return emit(l, Tokens.EX_OR) + return emit(l, K"$") end function lex_xor(l::Lexer) if accept(l, '=') - return emit(l, Tokens.XOR_EQ) + return emit(l, K"⊻=") end - return emit(l, Tokens.XOR) + return emit(l, K"⊻") end function accept_number(l::Lexer, f::F) where {F} @@ -741,10 +736,10 @@ function lex_digit(l::Lexer, kind) accept_number(l, isdigit) pc,ppc = dpeekchar(l) if pc == '.' - if kind === Tokens.FLOAT - # If we enter the function with kind == FLOAT then a '.' has been parsed. + if kind === K"Float" + # If we enter the function with kind == K"Float" then a '.' has been parsed. readchar(l) - return emit_error(l, Tokens.INVALID_NUMERIC_CONSTANT) + return emit_error(l, K"ErrorInvalidNumericConstant") elseif ppc == '.' return emit(l, kind) elseif is_operator_start_char(ppc) && ppc !== ':' @@ -767,113 +762,113 @@ function lex_digit(l::Lexer, kind) || ppc == ':' || ppc == '?' || eof(ppc))) - kind = Tokens.INTEGER + kind = K"Integer" return emit(l, kind) end readchar(l) - kind = Tokens.FLOAT + kind = K"Float" accept_number(l, isdigit) pc, ppc = dpeekchar(l) if (pc == 'e' || pc == 'E' || pc == 'f') && (isdigit(ppc) || ppc == '+' || ppc == '-' || ppc == '−') - kind = Tokens.FLOAT + kind = K"Float" readchar(l) accept(l, "+-−") if accept_batch(l, isdigit) pc,ppc = dpeekchar(l) if pc === '.' && !dotop2(ppc, ' ') accept(l, '.') - return emit_error(l, Tokens.INVALID_NUMERIC_CONSTANT) + return emit_error(l, K"ErrorInvalidNumericConstant") end else return emit_error(l) end elseif pc == '.' && (is_identifier_start_char(ppc) || eof(ppc)) readchar(l) - return emit_error(l, Tokens.INVALID_NUMERIC_CONSTANT) + return emit_error(l, K"ErrorInvalidNumericConstant") end elseif (pc == 'e' || pc == 'E' || pc == 'f') && (isdigit(ppc) || ppc == '+' || ppc == '-' || ppc == '−') - kind = Tokens.FLOAT + kind = K"Float" readchar(l) accept(l, "+-−") if accept_batch(l, isdigit) pc,ppc = dpeekchar(l) if pc === '.' && !dotop2(ppc, ' ') accept(l, '.') - return emit_error(l, Tokens.INVALID_NUMERIC_CONSTANT) + return emit_error(l, K"ErrorInvalidNumericConstant") end else return emit_error(l) end elseif position(l) - startpos(l) == 1 && l.chars[1] == '0' - kind == Tokens.INTEGER + kind == K"Integer" if pc == 'x' - kind = Tokens.HEX_INT + kind = K"HexInt" isfloat = false readchar(l) - !(ishex(ppc) || ppc == '.') && return emit_error(l, Tokens.INVALID_NUMERIC_CONSTANT) + !(ishex(ppc) || ppc == '.') && return emit_error(l, K"ErrorInvalidNumericConstant") accept_number(l, ishex) if accept(l, '.') accept_number(l, ishex) isfloat = true end if accept(l, "pP") - kind = Tokens.FLOAT + kind = K"Float" accept(l, "+-−") accept_number(l, isdigit) elseif isfloat - return emit_error(l, Tokens.INVALID_NUMERIC_CONSTANT) + return emit_error(l, K"ErrorInvalidNumericConstant") end elseif pc == 'b' - !isbinary(ppc) && return emit_error(l, Tokens.INVALID_NUMERIC_CONSTANT) + !isbinary(ppc) && return emit_error(l, K"ErrorInvalidNumericConstant") readchar(l) accept_number(l, isbinary) - kind = Tokens.BIN_INT + kind = K"BinInt" elseif pc == 'o' - !isoctal(ppc) && return emit_error(l, Tokens.INVALID_NUMERIC_CONSTANT) + !isoctal(ppc) && return emit_error(l, K"ErrorInvalidNumericConstant") readchar(l) accept_number(l, isoctal) - kind = Tokens.OCT_INT + kind = K"OctInt" end end return emit(l, kind) end function lex_prime(l, doemit = true) - if l.last_token == Tokens.IDENTIFIER || - Tokens.iscontextualkeyword(l.last_token) || - Tokens.iswordoperator(l.last_token) || - l.last_token == Tokens.DOT || - l.last_token == Tokens.RPAREN || - l.last_token == Tokens.RSQUARE || - l.last_token == Tokens.RBRACE || - l.last_token == Tokens.PRIME || - l.last_token == Tokens.END || isliteral(l.last_token) - return emit(l, Tokens.PRIME) + if l.last_token == K"Identifier" || + iscontextualkeyword(l.last_token) || + iswordoperator(l.last_token) || + l.last_token == K"." || + l.last_token == K")" || + l.last_token == K"]" || + l.last_token == K"}" || + l.last_token == K"'" || + l.last_token == K"end" || isliteral(l.last_token) + return emit(l, K"'") else if accept(l, '\'') if accept(l, '\'') - return doemit ? emit(l, Tokens.CHAR) : EMPTY_TOKEN + return doemit ? emit(l, K"Char") : EMPTY_TOKEN else # Empty char literal # Arguably this should be an error here, but we generally # look at the contents of the char literal in the parser, # so we defer erroring until there. - return doemit ? emit(l, Tokens.CHAR) : EMPTY_TOKEN + return doemit ? emit(l, K"Char") : EMPTY_TOKEN end end while true c = readchar(l) if eof(c) - return doemit ? emit_error(l, Tokens.EOF_CHAR) : EMPTY_TOKEN + return doemit ? emit_error(l, K"ErrorEofChar") : EMPTY_TOKEN elseif c == '\\' if eof(readchar(l)) - return doemit ? emit_error(l, Tokens.EOF_CHAR) : EMPTY_TOKEN + return doemit ? emit_error(l, K"ErrorEofChar") : EMPTY_TOKEN end elseif c == '\'' - return doemit ? emit(l, Tokens.CHAR) : EMPTY_TOKEN + return doemit ? emit(l, K"Char") : EMPTY_TOKEN end end end @@ -881,29 +876,29 @@ end function lex_amper(l::Lexer) if accept(l, '&') - return emit(l, Tokens.LAZY_AND) + return emit(l, K"&&") elseif accept(l, '=') - return emit(l, Tokens.AND_EQ) + return emit(l, K"&=") else - return emit(l, Tokens.AND) + return emit(l, K"&") end end # Parse a token starting with a quote. # A '"' has been consumed function lex_quote(l::Lexer) - raw = l.last_token == Tokens.IDENTIFIER || - Tokens.iscontextualkeyword(l.last_token) || - Tokens.iswordoperator(l.last_token) + raw = l.last_token == K"Identifier" || + iscontextualkeyword(l.last_token) || + iswordoperator(l.last_token) pc, dpc = dpeekchar(l) triplestr = pc == '"' && dpc == '"' push!(l.string_states, StringState(triplestr, raw, '"', 0)) if triplestr readchar(l) readchar(l) - emit(l, Tokens.TRIPLE_DQUOTE) + emit(l, K"\"\"\"") else - emit(l, Tokens.DQUOTE) + emit(l, K"\"") end end @@ -919,36 +914,36 @@ end # Parse a token starting with a forward slash. # A '/' has been consumed function lex_forwardslash(l::Lexer) - if accept(l, '/') # // - if accept(l, '=') # //= - return emit(l, Tokens.FWDFWD_SLASH_EQ) + if accept(l, '/') + if accept(l, '=') + return emit(l, K"//=") else - return emit(l, Tokens.FWDFWD_SLASH) + return emit(l, K"//") end - elseif accept(l, '=') # /= - return emit(l, Tokens.FWD_SLASH_EQ) + elseif accept(l, '=') + return emit(l, K"/=") else - return emit(l, Tokens.FWD_SLASH) + return emit(l, K"/") end end function lex_backslash(l::Lexer) if accept(l, '=') - return emit(l, Tokens.BACKSLASH_EQ) + return emit(l, K"\=") end - return emit(l, Tokens.BACKSLASH) + return emit(l, K"\\") end # TODO .op function lex_dot(l::Lexer) if accept(l, '.') if accept(l, '.') - return emit(l, Tokens.DDDOT) + return emit(l, K"...") else - return emit(l, Tokens.DDOT) + return emit(l, K"..") end elseif Base.isdigit(peekchar(l)) - return lex_digit(l, Tokens.FLOAT) + return lex_digit(l, K"Float") else pc, dpc = dpeekchar(l) if dotop1(pc) @@ -965,7 +960,7 @@ function lex_dot(l::Lexer) elseif pc == '−' l.dotop = true readchar(l) - return emit(l, accept(l, '=') ? Tokens.MINUS_EQ : Tokens.MINUS) + return emit(l, accept(l, '=') ? K"-=" : K"-") elseif pc =='*' l.dotop = true readchar(l) @@ -994,12 +989,12 @@ function lex_dot(l::Lexer) l.dotop = true readchar(l) if accept(l, '=') - return emit(l, Tokens.AND_EQ) + return emit(l, K"&=") else if accept(l, '&') - return emit(l, Tokens.LAZY_AND) + return emit(l, K"&&") end - return emit(l, Tokens.AND) + return emit(l, K"&") end elseif pc =='%' l.dotop = true @@ -1013,7 +1008,7 @@ function lex_dot(l::Lexer) l.dotop = true readchar(l) if accept(l, '|') - return emit(l, Tokens.LAZY_OR) + return emit(l, K"||") end return lex_bar(l) elseif pc == '!' && dpc == '=' @@ -1033,7 +1028,7 @@ function lex_dot(l::Lexer) readchar(l) return lex_equal(l) end - return emit(l, Tokens.DOT) + return emit(l, K".") end end @@ -1048,9 +1043,9 @@ function lex_backtick(l::Lexer) if triplestr readchar(l) readchar(l) - emit(l, Tokens.TRIPLE_BACKTICK) + emit(l, K"```") else - emit(l, Tokens.BACKTICK) + emit(l, K"`") end end @@ -1069,9 +1064,9 @@ function lex_identifier(l::Lexer, c) end if n > MAX_KW_LENGTH - emit(l, IDENTIFIER) + emit(l, K"Identifier") else - emit(l, get(kw_hash, h, IDENTIFIER)) + emit(l, get(kw_hash, h, K"Identifier")) end end @@ -1094,47 +1089,47 @@ function simple_hash(str) end kws = [ -Tokens.BAREMODULE, -Tokens.BEGIN, -Tokens.BREAK, -Tokens.CATCH, -Tokens.CONST, -Tokens.CONTINUE, -Tokens.DO, -Tokens.ELSE, -Tokens.ELSEIF, -Tokens.END, -Tokens.EXPORT, -Tokens.FINALLY, -Tokens.FOR, -Tokens.FUNCTION, -Tokens.GLOBAL, -Tokens.IF, -Tokens.IMPORT, -Tokens.LET, -Tokens.LOCAL, -Tokens.MACRO, -Tokens.MODULE, -Tokens.QUOTE, -Tokens.RETURN, -Tokens.STRUCT, -Tokens.TRY, -Tokens.USING, -Tokens.WHILE, -Tokens.IN, -Tokens.ISA, -Tokens.WHERE, -Tokens.TRUE, -Tokens.FALSE, - -Tokens.ABSTRACT, -Tokens.AS, -Tokens.DOC, -Tokens.MUTABLE, -Tokens.OUTER, -Tokens.PRIMITIVE, -Tokens.TYPE, -Tokens.VAR, +K"baremodule", +K"begin", +K"break", +K"catch", +K"const", +K"continue", +K"do", +K"else", +K"elseif", +K"end", +K"export", +K"finally", +K"for", +K"function", +K"global", +K"if", +K"import", +K"let", +K"local", +K"macro", +K"module", +K"quote", +K"return", +K"struct", +K"try", +K"using", +K"while", +K"in", +K"isa", +K"where", +K"true", +K"false", + +K"abstract", +K"as", +K"doc", +K"mutable", +K"outer", +K"primitive", +K"type", +K"var", ] const kw_hash = Dict(simple_hash(lowercase(string(kw))) => kw for kw in kws) diff --git a/JuliaSyntax/src/Tokenize/token.jl b/JuliaSyntax/src/Tokenize/token.jl index debb4fe8a3c3e..709b706914fcb 100644 --- a/JuliaSyntax/src/Tokenize/token.jl +++ b/JuliaSyntax/src/Tokenize/token.jl @@ -1,5 +1,7 @@ module Tokens +using ...JuliaSyntax: Kind, @K_str + import Base.eof export Token @@ -7,39 +9,27 @@ export Token include("token_kinds.jl") -iskeyword(k::Kind) = begin_keywords < k < end_keywords -isliteral(k::Kind) = begin_literal < k < end_literal -isoperator(k::Kind) = begin_ops < k < end_ops -iserror(k::Kind) = begin_errors < k < end_errors -iscontextualkeyword(k::Kind) = begin_contextual_keywords < k < end_contextual_keywords +iskeyword(k::Kind) = K"BEGIN_KEYWORDS" < k < K"END_KEYWORDS" +isliteral(k::Kind) = K"BEGIN_LITERAL" < k < K"END_LITERAL" +isoperator(k::Kind) = K"BEGIN_OPS" < k < K"END_OPS" +iserror(k::Kind) = K"BEGIN_ERRORS" < k < K"END_ERRORS" +iscontextualkeyword(k::Kind) = K"BEGIN_CONTEXTUAL_KEYWORDS" < k < K"END_CONTEXTUAL_KEYWORDS" function iswordoperator(k::Kind) # Keyword-like operators - k == Tokens.IN || - k == Tokens.ISA || - k == Tokens.WHERE + k == K"in" || + k == K"isa" || + k == K"where" end -# Create string => keyword kind -const KEYWORDS = Dict{String, Kind}() - -function _add_kws() - for k in instances(Kind) - if iskeyword(k) - KEYWORDS[lowercase(string(k))] = k - end - end -end -_add_kws() - # Error kind => description TOKEN_ERROR_DESCRIPTION = Dict{Kind, String}( - EOF_MULTICOMMENT => "unterminated multi-line comment #= ... =#", - EOF_CHAR => "unterminated character literal", - INVALID_NUMERIC_CONSTANT => "invalid numeric constant", - INVALID_OPERATOR => "invalid operator", - INVALID_INTERPOLATION_TERMINATOR => "interpolated variable ends with invalid character; use `\$(...)` instead", - ERROR => "unknown error", + K"ErrorEofMultiComment" => "unterminated multi-line comment #= ... =#", + K"ErrorEofChar" => "unterminated character literal", + K"ErrorInvalidNumericConstant" => "invalid numeric constant", + K"ErrorInvalidOperator" => "invalid operator", + K"ErrorInvalidInterpolationTerminator" => "interpolated variable ends with invalid character; use `\$(...)` instead", + K"error" => "unknown error", ) struct Token @@ -53,16 +43,10 @@ end function Token(kind::Kind, startbyte::Int, endbyte::Int) Token(kind, startbyte, endbyte, false, false) end -Token() = Token(ERROR, 0, 0, false, false) +Token() = Token(K"error", 0, 0, false, false) const EMPTY_TOKEN = Token() -function kind(t::Token) - isoperator(t.kind) && return OP - iskeyword(t.kind) && return KEYWORD - iserror(t.kind) && return ERROR - return t.kind -end exactkind(t::Token) = t.kind startbyte(t::Token) = t.startbyte @@ -75,7 +59,7 @@ end function Base.show(io::IO, t::Token) print(io, rpad(string(startbyte(t), "-", endbyte(t)), 11, " ")) - print(io, rpad(kind(t), 15, " ")) + print(io, rpad(exactkind(t), 15, " ")) end end # module diff --git a/JuliaSyntax/src/Tokenize/token_kinds.jl b/JuliaSyntax/src/Tokenize/token_kinds.jl index a81b301bc1e62..2e86ab0d606f6 100644 --- a/JuliaSyntax/src/Tokenize/token_kinds.jl +++ b/JuliaSyntax/src/Tokenize/token_kinds.jl @@ -1,1503 +1,604 @@ -@enum(Kind::UInt16, - NONE, # Placeholder; never emitted by lexer - ENDMARKER, # EOF - COMMENT, # aadsdsa, #= fdsf #= - WHITESPACE, # '\n \t' - IDENTIFIER, # foo, Σxx - AT_SIGN, # @ - COMMA, #, - SEMICOLON, # ; - - begin_errors, - EOF_MULTICOMMENT, - EOF_CHAR, - INVALID_NUMERIC_CONSTANT, - INVALID_OPERATOR, - INVALID_INTERPOLATION_TERMINATOR, - ERROR, - end_errors, - - begin_keywords, - KEYWORD, # general - BAREMODULE, - BEGIN, - BREAK, - CATCH, - CONST, - CONTINUE, - DO, - ELSE, - ELSEIF, - END, - EXPORT, - FINALLY, - FOR, - FUNCTION, - GLOBAL, - IF, - IMPORT, - LET, - LOCAL, - MACRO, - MODULE, - QUOTE, - RETURN, - STRUCT, - TRY, - USING, - WHILE, - begin_contextual_keywords, - ABSTRACT, - AS, - DOC, - MUTABLE, - OUTER, - PRIMITIVE, - TYPE, - VAR, - end_contextual_keywords, - end_keywords, - - begin_cstparser, - INVISIBLE_BRACKETS, - NOTHING, # A literal `nothing` - WS, - SEMICOLON_WS, - NEWLINE_WS, - EMPTY_WS, - end_cstparser, - - begin_literal, - LITERAL, # general - INTEGER, # 4 - BIN_INT, # 0b1 - HEX_INT, # 0x0 - OCT_INT, # 0o0 - FLOAT, # 3.5, 3.7e+3 - STRING, # "foo" (without the " delimiters) - CHAR, # 'a' - CMD, # `cmd ...` (without delimiters) - TRUE, FALSE, - end_literal, - - begin_delimiters, - LSQUARE, # [ - RSQUARE, # [ - LBRACE, # { - RBRACE, # } - LPAREN, # ( - RPAREN, # ) - DQUOTE, # " (double quote) - TRIPLE_DQUOTE, # """ - BACKTICK, # ` - TRIPLE_BACKTICK, # ``` - end_delimiters, - - begin_ops, - OP, # general - DDDOT, # ... - - # Level 1 - begin_assignments, - EQ, # = - PLUS_EQ, # += - MINUS_EQ, # -= - STAR_EQ, # *= - FWD_SLASH_EQ, # /= - FWDFWD_SLASH_EQ, # //= - OR_EQ, # |= - CIRCUMFLEX_EQ, # ^= - DIVISION_EQ, # ÷= - REM_EQ, # %= - LBITSHIFT_EQ, # <<= - RBITSHIFT_EQ, # >>= - UNSIGNED_BITSHIFT_EQ, # >>>= - BACKSLASH_EQ, # \= - AND_EQ, # &= - COLON_EQ, # := - APPROX, # ~ - EX_OR_EQ, # $= - XOR_EQ, # ⊻= - end_assignments, - - begin_pairarrow, - PAIR_ARROW, # => - end_pairarrow, - - # Level 2 - begin_conditional, - CONDITIONAL, # ? - end_conditional, - - # Level 3 - begin_arrow, - RIGHT_ARROW, # --> - LEFT_ARROW, # <-- - DOUBLE_ARROW, # <--> - LEFTWARDS_ARROW, # ← - RIGHTWARDS_ARROW, # → - LEFT_RIGHT_ARROW, # ↔ - LEFTWARDS_ARROW_WITH_STROKE, # ↚ - RIGHTWARDS_ARROW_WITH_STROKE, # ↛ - LEFTWARDS_TWO_HEADED_ARROW,# ↞ - RIGHTWARDS_TWO_HEADED_ARROW, # ↠ - LEFTWARDS_ARROW_WITH_TAIL, # ↢ - RIGHTWARDS_ARROW_WITH_TAIL, # ↣ - LEFTWARDS_ARROW_FROM_BAR,# ↤ - RIGHTWARDS_ARROW_FROM_BAR, # ↦ - LEFT_RIGHT_ARROW_WITH_STROKE, # ↮ - LEFT_RIGHT_DOUBLE_ARROW_WITH_STROKE, # ⇎ - LEFTWARDS_DOUBLE_ARROW_WITH_STROKE, # ⇍ - RIGHTWARDS_DOUBLE_ARROW_WITH_STROKE, # ⇏ - LEFTWARDS_DOUBLE_ARROW, # ⇐ - RIGHTWARDS_DOUBLE_ARROW, # ⇒ - LEFT_RIGHT_DOUBLE_ARROW, # ⇔ - RIGHT_ARROW_WITH_SMALL_CIRCLE, # ⇴ - THREE_RIGHTWARDS_ARROWS, # ⇶ - LEFTWARDS_ARROW_WITH_VERTICAL_STROKE, # ⇷ - RIGHTWARDS_ARROW_WITH_VERTICAL_STROKE, # ⇸ - LEFT_RIGHT_ARROW_WITH_VERTICAL_STROKE, # ⇹ - LEFTWARDS_ARROW_WITH_DOUBLE_VERTICAL_STROKE, # ⇺ - RIGHTWARDS_ARROW_WITH_DOUBLE_VERTICAL_STROKE, # ⇻ - LEFT_RIGHT_ARROW_WITH_DOUBLE_VERTICAL_STROKE, # ⇼ - LEFTWARDS_OPEN_HEADED_ARROW, # ⇽ - RIGHTWARDS_OPEN_HEADED_ARROW, # ⇾ - LEFT_RIGHT_OPEN_HEADED_ARROW, # ⇿ - LONG_LEFTWARDS_ARROW, # ⟵ - LONG_RIGHTWARDS_ARROW, # ⟶ - LONG_LEFT_RIGHT_ARROW, # ⟷ - LONG_RIGHTWARDS_DOUBLE_ARROW, # ⟹ - LONG_LEFT_RIGHT_DOUBLE_ARROW, # ⟺ - LONG_LEFTWARDS_ARROW_FROM_BAR, # ⟻ - LONG_RIGHTWARDS_ARROW_FROM_BAR, # ⟼ - LONG_LEFTWARDS_DOUBLE_ARROW_FROM_BAR, # ⟽ - LONG_RIGHTWARDS_DOUBLE_ARROW_FROM_BAR, # ⟾ - LONG_RIGHTWARDS_SQUIGGLE_ARROW, # ⟿ - RIGHTWARDS_TWO_HEADED_ARROW_WITH_VERTICAL_STROKE, # ⤀ - RIGHTWARDS_TWO_HEADED_ARROW_WITH_DOUBLE_VERTICAL_STROKE, # ⤁ - LEFTWARDS_DOUBLE_ARROW_WITH_VERTICAL_STROKE, # ⤂ - RIGHTWARDS_DOUBLE_ARROW_WITH_VERTICAL_STROKE, # ⤃ - LEFT_RIGHT_DOUBLE_ARROW_WITH_VERTICAL_STROKE, # ⤄ - RIGHTWARDS_TWO_HEADED_ARROW_FROM_BAR, # ⤅ - LEFTWARDS_DOUBLE_ARROW_FROM_BAR, # ⤆ - RIGHTWARDS_DOUBLE_ARROW_FROM_BAR, # ⤇ - LEFTWARDS_DOUBLE_DASH_ARROW, # ⤌ - RIGHTWARDS_DOUBLE_DASH_ARROW, # ⤍ - LEFTWARDS_TRIPLE_DASH_ARROW, # ⤎ - RIGHTWARDS_TRIPLE_DASH_ARROW, # ⤏ - RIGHTWARDS_TWO_HEADED_TRIPLE_DASH_ARROW, # ⤐ - RIGHTWARDS_ARROW_WITH_DOTTED_STEM, # ⤑ - RIGHTWARDS_ARROW_WITH_TAIL_WITH_VERTICAL_STROKE, # ⤔ - RIGHTWARDS_ARROW_WITH_TAIL_WITH_DOUBLE_VERTICAL_STROKE, # ⤕ - RIGHTWARDS_TWO_HEADED_ARROW_WITH_TAIL, # ⤖ - RIGHTWARDS_TWO_HEADED_ARROW_WITH_TAIL_WITH_VERTICAL_STROKE, # ⤗ - RIGHTWARDS_TWO_HEADED_ARROW_WITH_TAIL_WITH_DOUBLE_VERTICAL_STROKE, # ⤘ - LEFTWARDS_ARROW_TO_BLACK_DIAMOND, # ⤝ - RIGHTWARDS_ARROW_TO_BLACK_DIAMOND, # ⤞ - LEFTWARDS_ARROW_FROM_BAR_TO_BLACK_DIAMOND, # ⤟ - RIGHTWARDS_ARROW_FROM_BAR_TO_BLACK_DIAMOND, # ⤠ - SHORT_RIGHTWARDS_ARROW_ABOVE_LEFTWARDS_ARROW, # ⥄ - RIGHTWARDS_ARROW_WITH_PLUS_BELOW, # ⥅ - LEFTWARDS_ARROW_WITH_PLUS_BELOW, # ⥆ - RIGHTWARDS_ARROW_THROUGH_X, # ⥇ - LEFT_RIGHT_ARROW_THROUGH_SMALL_CIRCLE, # ⥈ - LEFT_BARB_UP_RIGHT_BARB_DOWN_HARPOON, # ⥊ - LEFT_BARB_DOWN_RIGHT_BARB_UP_HARPOON, # ⥋ - LEFT_BARB_UP_RIGHT_BARB_UP_HARPOON, # ⥎ - LEFT_BARB_DOWN_RIGHT_BARB_DOWN_HARPOON, # ⥐ - LEFTWARDS_HARPOON_WITH_BARB_UP_TO_BAR, # ⥒ - RIGHTWARDS_HARPOON_WITH_BARB_UP_TO_BAR, # ⥓ - LEFTWARDS_HARPOON_WITH_BARB_DOWN_TO_BAR, # ⥖ - RIGHTWARDS_HARPOON_WITH_BARB_DOWN_TO_BAR, # ⥗ - LEFTWARDS_HARPOON_WITH_BARB_UP_FROM_BAR, # ⥚ - RIGHTWARDS_HARPOON_WITH_BARB_UP_FROM_BAR, # ⥛ - LEFTWARDS_HARPOON_WITH_BARB_DOWN_FROM_BAR, # ⥞ - RIGHTWARDS_HARPOON_WITH_BARB_DOWN_FROM_BAR, # ⥟ - LEFTWARDS_HARPOON_WITH_BARB_UP_ABOVE_LEFTWARDS_HARPOON_WITH_BARB_DOWN, # ⥢ - RIGHTWARDS_HARPOON_WITH_BARB_UP_ABOVE_RIGHTWARDS_HARPOON_WITH_BARB_DOWN, # ⥤ - LEFTWARDS_HARPOON_WITH_BARB_UP_ABOVE_RIGHTWARDS_HARPOON_WITH_BARB_UP, # ⥦ - LEFTWARDS_HARPOON_WITH_BARB_DOWN_ABOVE_RIGHTWARDS_HARPOON_WITH_BARB_DOWN, # ⥧ - RIGHTWARDS_HARPOON_WITH_BARB_UP_ABOVE_LEFTWARDS_HARPOON_WITH_BARB_UP, # ⥨ - RIGHTWARDS_HARPOON_WITH_BARB_DOWN_ABOVE_LEFTWARDS_HARPOON_WITH_BARB_DOWN, # ⥩ - LEFTWARDS_HARPOON_WITH_BARB_UP_ABOVE_LONG_DASH, # ⥪ - LEFTWARDS_HARPOON_WITH_BARB_DOWN_BELOW_LONG_DASH, # ⥫ - RIGHTWARDS_HARPOON_WITH_BARB_UP_ABOVE_LONG_DASH, # ⥬ - RIGHTWARDS_HARPOON_WITH_BARB_DOWN_BELOW_LONG_DASH, # ⥭ - RIGHT_DOUBLE_ARROW_WITH_ROUNDED_HEAD, # ⥰ - RULE_DELAYED, # ⧴ - THREE_LEFTWARDS_ARROWS, # ⬱ - LEFT_ARROW_WITH_SMALL_CIRCLE, # ⬰ - LEFT_ARROW_WITH_CIRCLED_PLUS, # ⬲ - LONG_LEFTWARDS_SQUIGGLE_ARROW, # ⬳ - LEFTWARDS_TWO_HEADED_ARROW_WITH_VERTICAL_STROKE, # ⬴ - LEFTWARDS_TWO_HEADED_ARROW_WITH_DOUBLE_VERTICAL_STROKE, # ⬵ - LEFTWARDS_TWO_HEADED_ARROW_FROM_BAR, # ⬶ - LEFTWARDS_TWO_HEADED_TRIPLE_DASH_ARROW, # ⬷ - LEFTWARDS_ARROW_WITH_DOTTED_STEM, # ⬸ - LEFTWARDS_ARROW_WITH_TAIL_WITH_VERTICAL_STROKE, # ⬹ - LEFTWARDS_ARROW_WITH_TAIL_WITH_DOUBLE_VERTICAL_STROKE, # ⬺ - LEFTWARDS_TWO_HEADED_ARROW_WITH_TAIL, # ⬻ - LEFTWARDS_TWO_HEADED_ARROW_WITH_TAIL_WITH_VERTICAL_STROKE, # ⬼ - LEFTWARDS_TWO_HEADED_ARROW_WITH_TAIL_WITH_DOUBLE_VERTICAL_STROKE, # ⬽ - LEFTWARDS_ARROW_THROUGH_X, # ⬾ - WAVE_ARROW_POINTING_DIRECTLY_LEFT, # ⬿ - EQUALS_SIGN_ABOVE_LEFTWARDS_ARROW, # ⭀ - REVERSE_TILDE_OPERATOR_ABOVE_LEFTWARDS_ARROW, # ⭁ - LEFTWARDS_ARROW_ABOVE_REVERSE_ALMOST_EQUAL_TO, # ⭂ - RIGHTWARDS_ARROW_THROUGH_GREATER_THAN, # ⭃ - RIGHTWARDS_ARROW_THROUGH_SUPERSET, # ⭄ - REVERSE_TILDE_OPERATOR_ABOVE_RIGHTWARDS_ARROW, # ⭇ - RIGHTWARDS_ARROW_ABOVE_REVERSE_ALMOST_EQUAL_TO, # ⭈ - TILDE_OPERATOR_ABOVE_LEFTWARDS_ARROW, # ⭉ - LEFTWARDS_ARROW_ABOVE_ALMOST_EQUAL_TO, # ⭊ - LEFTWARDS_ARROW_ABOVE_REVERSE_TILDE_OPERATOR, # ⭋ - RIGHTWARDS_ARROW_ABOVE_REVERSE_TILDE_OPERATOR, # ⭌ - HALFWIDTH_LEFTWARDS_ARROW, # ← - HALFWIDTH_RIGHTWARDS_ARROW, # → - CIRCLE_ARROW_RIGHT, - LEFT_SQUIGGLE_ARROW, # ⇜ - RIGHT_SQUIGGLE_ARROW, # ⇝ - LEFT_WAVE_ARROW, # ↜ - RIGHT_WAVE_ARROW, # ↝ - LEFTWARDS_ARROW_WITH_HOOK, # ↩ - RIGHTWARDS_ARROW_WITH_HOOK, # ↪ - LOOP_ARROW_LEFT, # ↫ - LOOP_ARROW_RIGHT, # ↬ - LEFT_HARPOON_UP, # ↼ - LEFT_HARPOON_DOWN, # ↽ - RIGHT_HARPOON_UP, # ⇀ - RIGHT_HARPOON_DOWN, # ⇁ - RIGHT_LEFT_ARROWS, # ⇄ - LEFT_RIGHT_ARROWS, # ⇆ - LEFT_LEFT_ARROWS, # ⇇ - RIGHT_RIGHT_ARROWS, # ⇉ - LEFT_RIGHT_HARPOONS, # ⇋ - RIGHT_LEFT_HARPOONS, # ⇌ - L_LEFT_ARROW, # ⇚ - R_RIGHT_ARROW, # ⇛ - LEFT_DASH_ARROW, # ⇠ - RIGHT_DASH_ARROW, # ⇢ - CURVE_ARROW_RIGHT, # ↷ - CURVE_ARROW_LEFT,# ↶ - CIRCLE_ARROW_LEFT,# ↺ - end_arrow, - - # Level 4 - begin_lazyor, - LAZY_OR, # || - end_lazyor, - - # Level 5 - begin_lazyand, - LAZY_AND, # && - end_lazyand, - - # Level 6 - begin_comparison, - ISSUBTYPE, # <: - ISSUPERTYPE, # >: - GREATER, # > - LESS, # < - GREATER_EQ, # >= - GREATER_THAN_OR_EQUAL_TO, # ≥ - LESS_EQ, # <= - LESS_THAN_OR_EQUAL_TO, # ≤ - EQEQ, # == - EQEQEQ, # === - IDENTICAL_TO, # ≡ - NOT_EQ, # != - NOT_EQUAL_TO, # ≠ - NOT_IS, # !== - NOT_IDENTICAL_TO, # ≢ - ELEMENT_OF, # ∈ - IN, # in - ISA, # isa - NOT_AN_ELEMENT_OF, # ∉ - CONTAINS_AS_MEMBER, # ∋ - DOES_NOT_CONTAIN_AS_MEMBER, # ∌ - SUBSET_OF_OR_EQUAL_TO, # ⊆ - NEITHER_A_SUBSET_OF_NOR_EQUAL_TO, # ⊈ - SUBSET_OF, # ⊂ - NOT_A_SUBSET_OF, # ⊄ - SUBSET_OF_WITH_NOT_EQUAL_TO, # ⊊ - PROPORTIONAL_TO, # ∝ - SMALL_ELEMENT_OF, # ∊ - SMALL_CONTAINS_AS_MEMBER, # ∍ - PARALLEL_TO, # ∥ - NOT_PARALLEL_TO, # ∦ - PROPORTION, # ∷ - GEOMETRIC_PROPORTION, # ∺ - HOMOTHETIC, # ∻ - REVERSED_TILDE, # ∽ - INVERTED_LAZY_S, # ∾ - NOT_TILDE, # ≁ - ASYMPTOTICALLY_EQUAL_TO, # ≃ - NOT_ASYMPTOTICALLY_EQUAL_TO, # ≄ - APPROXIMATELY_EQUAL_TO, # ≅ - APPROXIMATELY_BUT_NOT_ACTUALLY_EQUAL_TO, # ≆ - NEITHER_APPROXIMATELY_NOR_ACTUALLY_EQUAL_TO, # ≇ - ALMOST_EQUAL_TO, # ≈ - NOT_ALMOST_EQUAL_TO, # ≉ - ALMOST_EQUAL_OR_EQUAL_TO, # ≊ - TRIPLE_TILDE, # ≋ - ALL_EQUAL_TO, # ≌ - EQUIVALENT_TO, # ≍ - GEOMETRICALLY_EQUIVALENT_TO, # ≎ - APPROACHES_THE_LIMIT, # ≐ - GEOMETRICALLY_EQUAL_TO, # ≑ - APPROXIMATELY_EQUAL_TO_OR_THE_IMAGE_OF, # ≒ - IMAGE_OF_OR_APPROXIMATELY_EQUAL_TO, # ≓ - COLON_EQUALS, # ≔ - EQUALS_COLON, # ≕ - RING_IN_EQUAL_TO, # ≖ - RING_EQUAL_TO, # ≗ - CORRESPONDS_TO, # ≘ - ESTIMATES, # ≙ - EQUIANGULAR_TO, # ≚ - STAR_EQUALS, # ≛ - DELTA_EQUAL_TO, # ≜ - EQUAL_TO_BY_DEFINITION, # ≝ - MEASURED_BY, # ≞ - QUESTIONED_EQUAL_TO, # ≟ - STRICTLY_EQUIVALENT_TO, # ≣ - LESS_THAN_OVER_EQUAL_TO, # ≦ - GREATER_THAN_OVER_EQUAL_TO, # ≧ - LESS_THAN_BUT_NOT_EQUAL_TO, # ≨ - GREATER_THAN_BUT_NOT_EQUAL_TO, # ≩ - MUCH_LESS_THAN, # ≪ - MUCH_GREATER_THAN, # ≫ - BETWEEN, # ≬ - NOT_EQUIVALENT_TO, # ≭ - NOT_LESS_THAN, # ≮ - NOT_GREATER_THAN, # ≯ - NEITHER_LESS_THAN_NOR_EQUAL_TO, # ≰ - NEITHER_GREATER_THAN_NOR_EQUAL_TO, # ≱ - LESS_THAN_OR_EQUIVALENT_TO, # ≲ - GREATER_THAN_OR_EQUIVALENT_TO, # ≳ - NEITHER_LESS_THAN_NOR_EQUIVALENT_TO, # ≴ - NEITHER_GREATER_THAN_NOR_EQUIVALENT_TO, # ≵ - LESS_THAN_OR_GREATER_THAN, # ≶ - GREATER_THAN_OR_LESS_THAN, # ≷ - NEITHER_LESS_THAN_NOR_GREATER_THAN, # ≸ - NEITHER_GREATER_THAN_NOR_LESS_THAN, # ≹ - PRECEDES, # ≺ - SUCCEEDS, # ≻ - PRECEDES_OR_EQUAL_TO, # ≼ - SUCCEEDS_OR_EQUAL_TO, # ≽ - PRECEDES_OR_EQUIVALENT_TO, # ≾ - SUCCEEDS_OR_EQUIVALENT_TO, # ≿ - DOES_NOT_PRECEDE, # ⊀ - DOES_NOT_SUCCEED, # ⊁ - SUPERSET_OF, # ⊃ - NOT_A_SUPERSET_OF, # ⊅ - SUPERSET_OF_OR_EQUAL_TO, # ⊇ - NEITHER_A_SUPERSET_OF_NOR_EQUAL_TO, # ⊉ - SUPERSET_OF_WITH_NOT_EQUAL_TO, # ⊋ - SQUARE_IMAGE_OF, # ⊏ - SQUARE_ORIGINAL_OF, # ⊐ - SQUARE_IMAGE_OF_OR_EQUAL_TO, # ⊑ - SQUARE_ORIGINAL_OF_OR_EQUAL_TO, # ⊒ - CIRCLED_EQUALS, # ⊜ - FORCES, # ⊩ - DOES_NOT_PROVE, # ⊬ - DOES_NOT_FORCE, # ⊮ - PRECEDES_UNDER_RELATION, # ⊰ - SUCCEEDS_UNDER_RELATION, # ⊱ - NORMAL_SUBGROUP_OF, # ⊲ - CONTAINS_AS_NORMAL_SUBGROUP, # ⊳ - NORMAL_SUBGROUP_OF_OR_EQUAL_TO, # ⊴ - CONTAINS_AS_NORMAL_SUBGROUP_OR_EQUAL_TO, # ⊵ - ORIGINAL_OF, # ⊶ - IMAGE_OF, # ⊷ - REVERSED_TILDE_EQUALS, # ⋍ - DOUBLE_SUBSET, # ⋐ - DOUBLE_SUPERSET, # ⋑ - EQUAL_AND_PARALLEL_TO, # ⋕ - LESS_THAN_WITH_DOT, # ⋖ - GREATER_THAN_WITH_DOT, # ⋗ - VERY_MUCH_LESS_THAN, # ⋘ - VERY_MUCH_GREATER_THAN, # ⋙ - LESS_THAN_EQUAL_TO_OR_GREATER_THAN, # ⋚ - GREATER_THAN_EQUAL_TO_OR_LESS_THAN, # ⋛ - EQUAL_TO_OR_LESS_THAN, # ⋜ - EQUAL_TO_OR_GREATER_THAN, # ⋝ - EQUAL_TO_OR_PRECEDES, # ⋞ - EQUAL_TO_OR_SUCCEEDS, # ⋟ - DOES_NOT_PRECEDE_OR_EQUAL, # ⋠ - DOES_NOT_SUCCEED_OR_EQUAL, # ⋡ - NOT_SQUARE_IMAGE_OF_OR_EQUAL_TO, # ⋢ - NOT_SQUARE_ORIGINAL_OF_OR_EQUAL_TO, # ⋣ - SQUARE_IMAGE_OF_OR_NOT_EQUAL_TO, # ⋤ - SQUARE_ORIGINAL_OF_OR_NOT_EQUAL_TO, # ⋥ - LESS_THAN_BUT_NOT_EQUIVALENT_TO, # ⋦ - GREATER_THAN_BUT_NOT_EQUIVALENT_TO, # ⋧ - PRECEDES_BUT_NOT_EQUIVALENT_TO, # ⋨ - SUCCEEDS_BUT_NOT_EQUIVALENT_TO, # ⋩ - NOT_NORMAL_SUBGROUP_OF, # ⋪ - DOES_NOT_CONTAIN_AS_NORMAL_SUBGROUP, # ⋫ - NOT_NORMAL_SUBGROUP_OF_OR_EQUAL_TO, # ⋬ - DOES_NOT_CONTAIN_AS_NORMAL_SUBGROUP_OR_EQUAL, # ⋭ - ELEMENT_OF_WITH_LONG_HORIZONTAL_STROKE, # ⋲ - ELEMENT_OF_WITH_VERTICAL_BAR_AT_END_OF_HORIZONTAL_STROKE, # ⋳ - SMALL_ELEMENT_OF_WITH_VERTICAL_BAR_AT_END_OF_HORIZONTAL_STROKE, # ⋴ - ELEMENT_OF_WITH_DOT_ABOVE, # ⋵ - ELEMENT_OF_WITH_OVERBAR, # ⋶ - SMALL_ELEMENT_OF_WITH_OVERBAR, # ⋷ - ELEMENT_OF_WITH_UNDERBAR, # ⋸ - ELEMENT_OF_WITH_TWO_HORIZONTAL_STROKES, # ⋹ - CONTAINS_WITH_LONG_HORIZONTAL_STROKE, # ⋺ - CONTAINS_WITH_VERTICAL_BAR_AT_END_OF_HORIZONTAL_STROKE, # ⋻ - SMALL_CONTAINS_WITH_VERTICAL_BAR_AT_END_OF_HORIZONTAL_STROKE, # ⋼ - CONTAINS_WITH_OVERBAR, # ⋽ - SMALL_CONTAINS_WITH_OVERBAR, # ⋾ - Z_NOTATION_BAG_MEMBERSHIP, # ⋿ - REVERSE_SOLIDUS_PRECEDING_SUBSET, # ⟈ - SUPERSET_PRECEDING_SOLIDUS, # ⟉ - ELEMENT_OF_OPENING_UPWARDS, # ⟒ - CIRCLED_PARALLEL, # ⦷ - CIRCLED_LESS_THAN, # ⧀ - CIRCLED_GREATER_THAN, # ⧁ - INCREASES_AS, # ⧡ - EQUALS_SIGN_AND_SLANTED_PARALLEL, # ⧣ - EQUALS_SIGN_AND_SLANTED_PARALLEL_WITH_TILDE_ABOVE, # ⧤ - IDENTICAL_TO_AND_SLANTED_PARALLEL, # ⧥ - EQUALS_SIGN_WITH_DOT_BELOW, # ⩦ - IDENTICAL_WITH_DOT_ABOVE, # ⩧ - TILDE_OPERATOR_WITH_DOT_ABOVE, # ⩪ - TILDE_OPERATOR_WITH_RISING_DOTS, # ⩫ - SIMILAR_MINUS_SIMILAR, # ⩬ - CONGRUENT_WITH_DOT_ABOVE, # ⩭ - EQUALS_WITH_ASTERISK, # ⩮ - ALMOST_EQUAL_TO_WITH_CIRCUMFLEX_ACCENT, # ⩯ - APPROXIMATELY_EQUAL_OR_EQUAL_TO, # ⩰ - EQUALS_SIGN_ABOVE_PLUS_SIGN, # ⩱ - PLUS_SIGN_ABOVE_EQUALS_SIGN, # ⩲ - EQUALS_SIGN_ABOVE_TILDE_OPERATOR, # ⩳ - DOUBLE_COLON_EQUAL, # ⩴ - TWO_CONSECUTIVE_EQUALS_SIGNS, # ⩵ - THREE_CONSECUTIVE_EQUALS_SIGNS, # ⩶ - EQUALS_SIGN_WITH_TWO_DOTS_ABOVE_AND_TWO_DOTS_BELOW, # ⩷ - EQUIVALENT_WITH_FOUR_DOTS_ABOVE, # ⩸ - LESS_THAN_WITH_CIRCLE_INSIDE, # ⩹ - GREATER_THAN_WITH_CIRCLE_INSIDE, # ⩺ - LESS_THAN_WITH_QUESTION_MARK_ABOVE, # ⩻ - GREATER_THAN_WITH_QUESTION_MARK_ABOVE, # ⩼ - LESS_THAN_OR_SLANTED_EQUAL_TO, # ⩽ - GREATER_THAN_OR_SLANTED_EQUAL_TO, # ⩾ - LESS_THAN_OR_SLANTED_EQUAL_TO_WITH_DOT_INSIDE, # ⩿ - GREATER_THAN_OR_SLANTED_EQUAL_TO_WITH_DOT_INSIDE, # ⪀ - LESS_THAN_OR_SLANTED_EQUAL_TO_WITH_DOT_ABOVE, # ⪁ - GREATER_THAN_OR_SLANTED_EQUAL_TO_WITH_DOT_ABOVE, # ⪂ - LESS_THAN_OR_SLANTED_EQUAL_TO_WITH_DOT_ABOVE_RIGHT, # ⪃ - GREATER_THAN_OR_SLANTED_EQUAL_TO_WITH_DOT_ABOVE_LEFT, # ⪄ - LESS_THAN_OR_APPROXIMATE, # ⪅ - GREATER_THAN_OR_APPROXIMATE, # ⪆ - LESS_THAN_AND_SINGLE_LINE_NOT_EQUAL_TO, # ⪇ - GREATER_THAN_AND_SINGLE_LINE_NOT_EQUAL_TO, # ⪈ - LESS_THAN_AND_NOT_APPROXIMATE, # ⪉ - GREATER_THAN_AND_NOT_APPROXIMATE, # ⪊ - LESS_THAN_ABOVE_DOUBLE_LINE_EQUAL_ABOVE_GREATER_THAN, # ⪋ - GREATER_THAN_ABOVE_DOUBLE_LINE_EQUAL_ABOVE_LESS_THAN, # ⪌ - LESS_THAN_ABOVE_SIMILAR_OR_EQUAL, # ⪍ - GREATER_THAN_ABOVE_SIMILAR_OR_EQUAL, # ⪎ - LESS_THAN_ABOVE_SIMILAR_ABOVE_GREATER_THAN, # ⪏ - GREATER_THAN_ABOVE_SIMILAR_ABOVE_LESS_THAN, # ⪐ - LESS_THAN_ABOVE_GREATER_THAN_ABOVE_DOUBLE_LINE_EQUAL, # ⪑ - GREATER_THAN_ABOVE_LESS_THAN_ABOVE_DOUBLE_LINE_EQUAL, # ⪒ - LESS_THAN_ABOVE_SLANTED_EQUAL_ABOVE_GREATER_THAN_ABOVE_SLANTED_EQUAL, # ⪓ - GREATER_THAN_ABOVE_SLANTED_EQUAL_ABOVE_LESS_THAN_ABOVE_SLANTED_EQUAL, # ⪔ - SLANTED_EQUAL_TO_OR_LESS_THAN, # ⪕ - SLANTED_EQUAL_TO_OR_GREATER_THAN, # ⪖ - SLANTED_EQUAL_TO_OR_LESS_THAN_WITH_DOT_INSIDE, # ⪗ - SLANTED_EQUAL_TO_OR_GREATER_THAN_WITH_DOT_INSIDE, # ⪘ - DOUBLE_LINE_EQUAL_TO_OR_LESS_THAN, # ⪙ - DOUBLE_LINE_EQUAL_TO_OR_GREATER_THAN, # ⪚ - DOUBLE_LINE_SLANTED_EQUAL_TO_OR_LESS_THAN, # ⪛ - DOUBLE_LINE_SLANTED_EQUAL_TO_OR_GREATER_THAN, # ⪜ - SIMILAR_OR_LESS_THAN, # ⪝ - SIMILAR_OR_GREATER_THAN, # ⪞ - SIMILAR_ABOVE_LESS_THAN_ABOVE_EQUALS_SIGN, # ⪟ - SIMILAR_ABOVE_GREATER_THAN_ABOVE_EQUALS_SIGN, # ⪠ - DOUBLE_NESTED_LESS_THAN, # ⪡ - DOUBLE_NESTED_GREATER_THAN, # ⪢ - DOUBLE_NESTED_LESS_THAN_WITH_UNDERBAR, # ⪣ - GREATER_THAN_OVERLAPPING_LESS_THAN, # ⪤ - GREATER_THAN_BESIDE_LESS_THAN, # ⪥ - LESS_THAN_CLOSED_BY_CURVE, # ⪦ - GREATER_THAN_CLOSED_BY_CURVE, # ⪧ - LESS_THAN_CLOSED_BY_CURVE_ABOVE_SLANTED_EQUAL, # ⪨ - GREATER_THAN_CLOSED_BY_CURVE_ABOVE_SLANTED_EQUAL, # ⪩ - SMALLER_THAN, # ⪪ - LARGER_THAN, # ⪫ - SMALLER_THAN_OR_EQUAL_TO, # ⪬ - LARGER_THAN_OR_EQUAL_TO, # ⪭ - EQUALS_SIGN_WITH_BUMPY_ABOVE, # ⪮ - PRECEDES_ABOVE_SINGLE_LINE_EQUALS_SIGN, # ⪯ - SUCCEEDS_ABOVE_SINGLE_LINE_EQUALS_SIGN, # ⪰ - PRECEDES_ABOVE_SINGLE_LINE_NOT_EQUAL_TO, # ⪱ - SUCCEEDS_ABOVE_SINGLE_LINE_NOT_EQUAL_TO, # ⪲ - PRECEDES_ABOVE_EQUALS_SIGN, # ⪳ - SUCCEEDS_ABOVE_EQUALS_SIGN, # ⪴ - PRECEDES_ABOVE_NOT_EQUAL_TO, # ⪵ - SUCCEEDS_ABOVE_NOT_EQUAL_TO, # ⪶ - PRECEDES_ABOVE_ALMOST_EQUAL_TO, # ⪷ - SUCCEEDS_ABOVE_ALMOST_EQUAL_TO, # ⪸ - PRECEDES_ABOVE_NOT_ALMOST_EQUAL_TO, # ⪹ - SUCCEEDS_ABOVE_NOT_ALMOST_EQUAL_TO, # ⪺ - DOUBLE_PRECEDES, # ⪻ - DOUBLE_SUCCEEDS, # ⪼ - SUBSET_WITH_DOT, # ⪽ - SUPERSET_WITH_DOT, # ⪾ - SUBSET_WITH_PLUS_SIGN_BELOW, # ⪿ - SUPERSET_WITH_PLUS_SIGN_BELOW, # ⫀ - SUBSET_WITH_MULTIPLICATION_SIGN_BELOW, # ⫁ - SUPERSET_WITH_MULTIPLICATION_SIGN_BELOW, # ⫂ - SUBSET_OF_OR_EQUAL_TO_WITH_DOT_ABOVE, # ⫃ - SUPERSET_OF_OR_EQUAL_TO_WITH_DOT_ABOVE, # ⫄ - SUBSET_OF_ABOVE_EQUALS_SIGN, # ⫅ - SUPERSET_OF_ABOVE_EQUALS_SIGN, # ⫆ - SUBSET_OF_ABOVE_TILDE_OPERATOR, # ⫇ - SUPERSET_OF_ABOVE_TILDE_OPERATOR, # ⫈ - SUBSET_OF_ABOVE_ALMOST_EQUAL_TO, # ⫉ - SUPERSET_OF_ABOVE_ALMOST_EQUAL_TO, # ⫊ - SUBSET_OF_ABOVE_NOT_EQUAL_TO, # ⫋ - SUPERSET_OF_ABOVE_NOT_EQUAL_TO, # ⫌ - SQUARE_LEFT_OPEN_BOX_OPERATOR, # ⫍ - SQUARE_RIGHT_OPEN_BOX_OPERATOR, # ⫎ - CLOSED_SUBSET, # ⫏ - CLOSED_SUPERSET, # ⫐ - CLOSED_SUBSET_OR_EQUAL_TO, # ⫑ - CLOSED_SUPERSET_OR_EQUAL_TO, # ⫒ - SUBSET_ABOVE_SUPERSET, # ⫓ - SUPERSET_ABOVE_SUBSET, # ⫔ - SUBSET_ABOVE_SUBSET, # ⫕ - SUPERSET_ABOVE_SUPERSET, # ⫖ - SUPERSET_BESIDE_SUBSET, # ⫗ - SUPERSET_BESIDE_AND_JOINED_BY_DASH_WITH_SUBSET, # ⫘ - ELEMENT_OF_OPENING_DOWNWARDS, # ⫙ - TRIPLE_NESTED_LESS_THAN, # ⫷ - TRIPLE_NESTED_GREATER_THAN, # ⫸ - DOUBLE_LINE_SLANTED_LESS_THAN_OR_EQUAL_TO, # ⫹ - DOUBLE_LINE_SLANTED_GREATER_THAN_OR_EQUAL_TO, # ⫺ - RIGHT_TACK, # ⊢ - LEFT_TACK, # ⊣ - DOUBLE_DOWN_TACK, # ⫪ - DOUBLE_UP_TACK, # ⫫ - PERP, # ⟂ - end_comparison, - - # Level 7 - begin_pipe, - LPIPE, # <| - RPIPE, # |> - end_pipe, - - # Level 8 - begin_colon, - COLON, # : - DDOT, # .. - LDOTS, # … - TRICOLON, # ⁝ - VDOTS, # ⋮ - DDOTS, # ⋱ - ADOTS, # ⋰ - CDOTS, # ⋯ - end_colon, - - # Level 9 - begin_plus, - EX_OR, # $ - PLUS, # + - MINUS, # - - PLUSPLUS, # ++ - CIRCLED_PLUS, # ⊕ - CIRCLED_MINUS, # ⊖ - SQUARED_PLUS, # ⊞ - SQUARED_MINUS, # ⊟ - OR, # | - UNION, # ∪ - LOGICAL_OR, # ∨ - SQUARE_CUP, # ⊔ - PLUS_MINUS_SIGN, # ± - MINUS_OR_PLUS_SIGN, # ∓ - DOT_PLUS, # ∔ - DOT_MINUS, # ∸ - MINUS_TILDE, # ≂ - DIFFERENCE_BETWEEN, # ≏ - MULTISET_UNION, # ⊎ - XOR, # ⊻ - NOR, # ⊽ - CURLY_LOGICAL_OR, # ⋎ - DOUBLE_UNION, # ⋓ - DOUBLE_PLUS, # ⧺ - TRIPLE_PLUS, # ⧻ - TWO_LOGICAL_OR_OPERATOR, # ⨈ - PLUS_SIGN_WITH_SMALL_CIRCLE_ABOVE, # ⨢ - PLUS_SIGN_WITH_CIRCUMFLEX_ACCENT_ABOVE, # ⨣ - PLUS_SIGN_WITH_TILDE_ABOVE, # ⨤ - PLUS_SIGN_WITH_DOT_BELOW, # ⨥ - PLUS_SIGN_WITH_TILDE_BELOW, # ⨦ - PLUS_SIGN_WITH_SUBSCRIPT_TWO, # ⨧ - PLUS_SIGN_WITH_BLACK_TRIANGLE, # ⨨ - MINUS_SIGN_WITH_COMMA_ABOVE, # ⨩ - MINUS_SIGN_WITH_DOT_BELOW, # ⨪ - MINUS_SIGN_WITH_FALLING_DOTS, # ⨫ - MINUS_SIGN_WITH_RISING_DOTS, # ⨬ - PLUS_SIGN_IN_LEFT_HALF_CIRCLE, # ⨭ - PLUS_SIGN_IN_RIGHT_HALF_CIRCLE, # ⨮ - PLUS_SIGN_IN_TRIANGLE, # ⨹ - MINUS_SIGN_IN_TRIANGLE, # ⨺ - UNION_WITH_MINUS_SIGN, # ⩁ - UNION_WITH_OVERBAR, # ⩂ - UNION_WITH_LOGICAL_OR, # ⩅ - UNION_BESIDE_AND_JOINED_WITH_UNION, # ⩊ - CLOSED_UNION_WITH_SERIFS, # ⩌ - DOUBLE_SQUARE_UNION, # ⩏ - CLOSED_UNION_WITH_SERIFS_AND_SMASH_PRODUCT, # ⩐ - LOGICAL_OR_WITH_DOT_ABOVE, # ⩒ - DOUBLE_LOGICAL_OR, # ⩔ - TWO_INTERSECTING_LOGICAL_OR, # ⩖ - SLOPING_LARGE_OR, # ⩗ - LOGICAL_OR_WITH_MIDDLE_STEM, # ⩛ - LOGICAL_OR_WITH_HORIZONTAL_DASH, # ⩝ - SMALL_VEE_WITH_UNDERBAR, # ⩡ - LOGICAL_OR_WITH_DOUBLE_OVERBAR, # ⩢ - LOGICAL_OR_WITH_DOUBLE_UNDERBAR, # ⩣ - BROKEN_BAR, # ¦ - end_plus, - - # Level 10 - begin_bitshifts, - LBITSHIFT, # << - RBITSHIFT, # >> - UNSIGNED_BITSHIFT, # >>> - end_bitshifts, - - # Level 11 - begin_times, - STAR, # * - FWD_SLASH, # / - DIVISION_SIGN, # ÷ - REM, # % - UNICODE_DOT, # ⋅ - RING_OPERATOR, # ∘ - MULTIPLICATION_SIGN, # × - BACKSLASH, # \ - AND, # & - INTERSECTION, # ∩ - LOGICAL_AND, # ∧ - CIRCLED_TIMES, # ⊗ - CIRCLED_DIVISION_SLASH, # ⊘ - CIRCLED_DOT_OPERATOR, # ⊙ - CIRCLED_RING_OPERATOR, # ⊚ - CIRCLED_ASTERISK_OPERATOR, # ⊛ - SQUARED_TIMES, # ⊠ - SQUARED_DOT_OPERATOR, # ⊡ - SQUARE_CAP, # ⊓ - ASTERISK_OPERATOR, # ∗ - BULLET_OPERATOR, # ∙ - DOES_NOT_DIVIDE, # ∤ - TURNED_AMPERSAND, # ⅋ - WREATH_PRODUCT, # ≀ - NAND, # ⊼ - DIAMOND_OPERATOR, # ⋄ - STAR_OPERATOR, # ⋆ - DIVISION_TIMES, # ⋇ - LEFT_NORMAL_FACTOR_SEMIDIRECT_PRODUCT, # ⋉ - RIGHT_NORMAL_FACTOR_SEMIDIRECT_PRODUCT, # ⋊ - LEFT_SEMIDIRECT_PRODUCT, # ⋋ - RIGHT_SEMIDIRECT_PRODUCT, # ⋌ - CURLY_LOGICAL_AND, # ⋏ - DOUBLE_INTERSECTION, # ⋒ - AND_WITH_DOT, # ⟑ - CIRCLED_REVERSE_SOLIDUS, # ⦸ - CIRCLED_ANTICLOCKWISE_ROTATED_DIVISION_SIGN, # ⦼ - CIRCLED_WHITE_BULLET, # ⦾ - CIRCLED_BULLET, # ⦿ - SOLIDUS_WITH_OVERBAR, # ⧶ - REVERSE_SOLIDUS_WITH_HORIZONTAL_STROKE, # ⧷ - TWO_LOGICAL_AND_OPERATOR, # ⨇ - MULTIPLICATION_SIGN_WITH_DOT_ABOVE, # ⨰ - MULTIPLICATION_SIGN_WITH_UNDERBAR, # ⨱ - SEMIDIRECT_PRODUCT_WITH_BOTTOM_CLOSED, # ⨲ - SMASH_PRODUCT, # ⨳ - MULTIPLICATION_SIGN_IN_LEFT_HALF_CIRCLE, # ⨴ - MULTIPLICATION_SIGN_IN_RIGHT_HALF_CIRCLE, # ⨵ - CIRCLED_MULTIPLICATION_SIGN_WITH_CIRCUMFLEX_ACCENT, # ⨶ - MULTIPLICATION_SIGN_IN_DOUBLE_CIRCLE, # ⨷ - CIRCLED_DIVISION_SIGN, # ⨸ - MULTIPLICATION_SIGN_IN_TRIANGLE, # ⨻ - INTERIOR_PRODUCT, # ⨼ - RIGHTHAND_INTERIOR_PRODUCT, # ⨽ - INTERSECTION_WITH_DOT, # ⩀ - INTERSECTION_WITH_OVERBAR, # ⩃ - INTERSECTION_WITH_LOGICAL_AND, # ⩄ - INTERSECTION_BESIDE_AND_JOINED_WITH_INTERSECTION, # ⩋ - CLOSED_INTERSECTION_WITH_SERIFS, # ⩍ - DOUBLE_SQUARE_INTERSECTION, # ⩎ - LOGICAL_AND_WITH_DOT_ABOVE, # ⩑ - DOUBLE_LOGICAL_AND, # ⩓ - TWO_INTERSECTING_LOGICAL_AND, # ⩕ - SLOPING_LARGE_AND, # ⩘ - LOGICAL_AND_WITH_MIDDLE_STEM, # ⩚ - LOGICAL_AND_WITH_HORIZONTAL_DASH, # ⩜ - LOGICAL_AND_WITH_DOUBLE_OVERBAR, # ⩞ - LOGICAL_AND_WITH_UNDERBAR, # ⩟ - LOGICAL_AND_WITH_DOUBLE_UNDERBAR, # ⩠ - TRANSVERSAL_INTERSECTION, # ⫛ - MULTISET_MULTIPLICATION, # ⊍ - WHITE_RIGHT_POINTING_TRIANGLE, # ▷ - JOIN, # ⨝ - LEFT_OUTER_JOIN, # ⟕ - RIGHT_OUTER_JOIN, # ⟖ - FULL_OUTER_JOIN, # ⟗ - NOT_SLASH, # ⌿ - BB_SEMI, # ⨟ - end_times, - - # Level 12 - begin_rational, - FWDFWD_SLASH, # // - end_rational, - - # Level 13 - begin_power, - CIRCUMFLEX_ACCENT, # ^ - UPWARDS_ARROW, # ↑ - DOWNWARDS_ARROW, # ↓ - DOWNWARDS_ARROW_LEFTWARDS_OF_UPWARDS_ARROW, # ⇵ - UPWARDS_QUADRUPLE_ARROW, # ⟰ - DOWNWARDS_QUADRUPLE_ARROW, # ⟱ - DOWNWARDS_ARROW_WITH_HORIZONTAL_STROKE, # ⤈ - UPWARDS_ARROW_WITH_HORIZONTAL_STROKE, # ⤉ - UPWARDS_TRIPLE_ARROW, # ⤊ - DOWNWARDS_TRIPLE_ARROW, # ⤋ - UPWARDS_ARROW_TO_BAR, # ⤒ - DOWNWARDS_ARROW_TO_BAR, # ⤓ - UPWARDS_TWO_HEADED_ARROW_FROM_SMALL_CIRCLE, # ⥉ - UP_BARB_RIGHT_DOWN_BARB_LEFT_HARPOON, # ⥌ - UP_BARB_LEFT_DOWN_BARB_RIGHT_HARPOON, # ⥍ - UP_BARB_RIGHT_DOWN_BARB_RIGHT_HARPOON, # ⥏ - UP_BARB_LEFT_DOWN_BARB_LEFT_HARPOON, # ⥑ - UPWARDS_HARPOON_WITH_BARB_RIGHT_TO_BAR, # ⥔ - DOWNWARDS_HARPOON_WITH_BARB_RIGHT_TO_BAR, # ⥕ - UPWARDS_HARPOON_WITH_BARB_LEFT_TO_BAR, # ⥘ - DOWNWARDS_HARPOON_WITH_BARB_LEFT_TO_BAR, # ⥙ - UPWARDS_HARPOON_WITH_BARB_RIGHT_FROM_BAR, # ⥜ - DOWNWARDS_HARPOON_WITH_BARB_RIGHT_FROM_BAR, # ⥝ - UPWARDS_HARPOON_WITH_BARB_LEFT_FROM_BAR, # ⥠ - DOWNWARDS_HARPOON_WITH_BARB_LEFT_FROM_BAR, # ⥡ - UPWARDS_HARPOON_WITH_BARB_LEFT_BESIDE_UPWARDS_HARPOON_WITH_BARB_RIGHT, # ⥣ - DOWNWARDS_HARPOON_WITH_BARB_LEFT_BESIDE_DOWNWARDS_HARPOON_WITH_BARB_RIGHT, # ⥥ - UPWARDS_HARPOON_WITH_BARB_LEFT_BESIDE_DOWNWARDS_HARPOON_WITH_BARB_RIGHT, # ⥮ - DOWNWARDS_HARPOON_WITH_BARB_LEFT_BESIDE_UPWARDS_HARPOON_WITH_BARB_RIGHT, # ⥯ - HALFWIDTH_UPWARDS_ARROW, # ↑ - HALFWIDTH_DOWNWARDS_ARROW, # ↓ - end_power, - - # Level 14 - begin_decl, - DECLARATION, # :: - end_decl, - - # Level 15 - begin_where, - WHERE, - end_where, - - # Level 16 - begin_dot, - DOT,# . - end_dot, - - NOT, # ! - PRIME, # ' - TRANSPOSE, # .' - ANON_FUNC, # -> - - begin_unicode_ops, - NOT_SIGN, # ¬ - SQUARE_ROOT, # √ - CUBE_ROOT, # ∛ - QUAD_ROOT, # ∜ - end_unicode_ops, - end_ops, - - # Kinds emitted by the parser. There's two types of these: - # 1. Implied tokens which have a position but might have zero width in the - # source text. - # - # In some cases we want to generate parse tree nodes in a standard form, - # but some of the leaf tokens are implied rather than existing in the - # source text, or the lexed tokens need to be re-kinded to represent - # special forms which only the parser can infer. These are "parser tokens". - # - # Some examples: - # - # Docstrings - the macro name is invisible - # "doc" foo() = 1 ==> (macrocall (core @doc) . (= (call foo) 1)) - # - # String macros - the macro name does not appear in the source text, so we - # need a special kind of token to imply it. - # - # In these cases, we use some special kinds which can be emitted as zero - # width tokens to keep the parse tree more uniform. - begin_parser_tokens, - TOMBSTONE, # Empty placeholder for kind to be filled later - - # Macro names are modelled as a special kind of identifier because the - # @ may not be attached to the macro name in the source (or may not be - # associated with a token at all in the case of implied macro calls - # like CORE_DOC_MACRO_NAME) - begin_macro_names, - MACRO_NAME, # A macro name identifier - STRING_MACRO_NAME, # macname"some_str" - CMD_MACRO_NAME, # macname`some_str` - DOT_MACRO_NAME, # The macro name of @. - CORE_DOC_MACRO_NAME, # Core.@doc - CORE_CMD_MACRO_NAME, # Core.@cmd - CORE_INT128_STR_MACRO_NAME, # Core.@int128_str - CORE_UINT128_STR_MACRO_NAME, # Core.@uint128_str - CORE_BIG_STR_MACRO_NAME, # Core.@big_str - end_macro_names, - end_parser_tokens, - - # 2. Nonterminals which are exposed in the AST, but where the surface - # syntax doesn't have a token corresponding to the node type. - begin_syntax_kinds, - BLOCK, - CALL, - COMPARISON, - CURLY, - INERT, # QuoteNode; not quasiquote - STRING_INTERP, # "a $x" - TOPLEVEL, - TUPLE, - REF, - VECT, - MACROCALL, - KW, # the = in f(a=1) - PARAMETERS, # the list after ; in f(; a=1) - # Concatenation syntax - BRACES, - BRACESCAT, - HCAT, - VCAT, - NCAT, - TYPED_HCAT, - TYPED_VCAT, - TYPED_NCAT, - ROW, - NROW, - # Comprehensions - GENERATOR, - FILTER, - FLATTEN, - COMPREHENSION, - TYPED_COMPREHENSION, - end_syntax_kinds, -) - const UNICODE_OPS = Dict{Char, Kind}( -'−' => MINUS, -'÷' => DIVISION_SIGN, -'¬' => NOT_SIGN, -'√' => SQUARE_ROOT, -'∛' => CUBE_ROOT, -'∜' => QUAD_ROOT, -'←' => LEFTWARDS_ARROW, -'→' => RIGHTWARDS_ARROW, -'↔' => LEFT_RIGHT_ARROW, -'↚' => LEFTWARDS_ARROW_WITH_STROKE, -'↛' => RIGHTWARDS_ARROW_WITH_STROKE, -'↞' => LEFTWARDS_TWO_HEADED_ARROW, -'↠' => RIGHTWARDS_TWO_HEADED_ARROW, -'↢' => LEFTWARDS_ARROW_WITH_TAIL, -'↣' => RIGHTWARDS_ARROW_WITH_TAIL, -'↤' => LEFTWARDS_ARROW_FROM_BAR, -'↦' => RIGHTWARDS_ARROW_FROM_BAR, -'↮' => LEFT_RIGHT_ARROW_WITH_STROKE, -'⇎' => LEFT_RIGHT_DOUBLE_ARROW_WITH_STROKE, -'⇍' => LEFTWARDS_DOUBLE_ARROW_WITH_STROKE, -'⇏' => RIGHTWARDS_DOUBLE_ARROW_WITH_STROKE, -'⇐' => LEFTWARDS_DOUBLE_ARROW, -'⇒' => RIGHTWARDS_DOUBLE_ARROW, -'⇔' => LEFT_RIGHT_DOUBLE_ARROW, -'⇴' => RIGHT_ARROW_WITH_SMALL_CIRCLE, -'⇶' => THREE_RIGHTWARDS_ARROWS, -'⇷' => LEFTWARDS_ARROW_WITH_VERTICAL_STROKE, -'⇸' => RIGHTWARDS_ARROW_WITH_VERTICAL_STROKE, -'⇹' => LEFT_RIGHT_ARROW_WITH_VERTICAL_STROKE, -'⇺' => LEFTWARDS_ARROW_WITH_DOUBLE_VERTICAL_STROKE, -'⇻' => RIGHTWARDS_ARROW_WITH_DOUBLE_VERTICAL_STROKE, -'⇼' => LEFT_RIGHT_ARROW_WITH_DOUBLE_VERTICAL_STROKE, -'⇽' => LEFTWARDS_OPEN_HEADED_ARROW, -'⇾' => RIGHTWARDS_OPEN_HEADED_ARROW, -'⇿' => LEFT_RIGHT_OPEN_HEADED_ARROW, -'⟵' => LONG_LEFTWARDS_ARROW, -'⟶' => LONG_RIGHTWARDS_ARROW, -'⟷' => LONG_LEFT_RIGHT_ARROW, -'⟹' => LONG_RIGHTWARDS_DOUBLE_ARROW, -'⟺' => LONG_LEFT_RIGHT_DOUBLE_ARROW, -'⟻' => LONG_LEFTWARDS_ARROW_FROM_BAR, -'⟼' => LONG_RIGHTWARDS_ARROW_FROM_BAR, -'⟽' => LONG_LEFTWARDS_DOUBLE_ARROW_FROM_BAR, -'⟾' => LONG_RIGHTWARDS_DOUBLE_ARROW_FROM_BAR, -'⟿' => LONG_RIGHTWARDS_SQUIGGLE_ARROW, -'⤀' => RIGHTWARDS_TWO_HEADED_ARROW_WITH_VERTICAL_STROKE, -'⤁' => RIGHTWARDS_TWO_HEADED_ARROW_WITH_DOUBLE_VERTICAL_STROKE, -'⤂' => LEFTWARDS_DOUBLE_ARROW_WITH_VERTICAL_STROKE, -'⤃' => RIGHTWARDS_DOUBLE_ARROW_WITH_VERTICAL_STROKE, -'⤄' => LEFT_RIGHT_DOUBLE_ARROW_WITH_VERTICAL_STROKE, -'⤅' => RIGHTWARDS_TWO_HEADED_ARROW_FROM_BAR, -'⤆' => LEFTWARDS_DOUBLE_ARROW_FROM_BAR, -'⤇' => RIGHTWARDS_DOUBLE_ARROW_FROM_BAR, -'⤌' => LEFTWARDS_DOUBLE_DASH_ARROW, -'⤍' => RIGHTWARDS_DOUBLE_DASH_ARROW, -'⤎' => LEFTWARDS_TRIPLE_DASH_ARROW, -'⤏' => RIGHTWARDS_TRIPLE_DASH_ARROW, -'⤐' => RIGHTWARDS_TWO_HEADED_TRIPLE_DASH_ARROW, -'⤑' => RIGHTWARDS_ARROW_WITH_DOTTED_STEM, -'⤔' => RIGHTWARDS_ARROW_WITH_TAIL_WITH_VERTICAL_STROKE, -'⤕' => RIGHTWARDS_ARROW_WITH_TAIL_WITH_DOUBLE_VERTICAL_STROKE, -'⤖' => RIGHTWARDS_TWO_HEADED_ARROW_WITH_TAIL, -'⤗' => RIGHTWARDS_TWO_HEADED_ARROW_WITH_TAIL_WITH_VERTICAL_STROKE, -'⤘' => RIGHTWARDS_TWO_HEADED_ARROW_WITH_TAIL_WITH_DOUBLE_VERTICAL_STROKE, -'⤝' => LEFTWARDS_ARROW_TO_BLACK_DIAMOND, -'⤞' => RIGHTWARDS_ARROW_TO_BLACK_DIAMOND, -'⤟' => LEFTWARDS_ARROW_FROM_BAR_TO_BLACK_DIAMOND, -'⤠' => RIGHTWARDS_ARROW_FROM_BAR_TO_BLACK_DIAMOND, -'⥄' => SHORT_RIGHTWARDS_ARROW_ABOVE_LEFTWARDS_ARROW, -'⥅' => RIGHTWARDS_ARROW_WITH_PLUS_BELOW, -'⥆' => LEFTWARDS_ARROW_WITH_PLUS_BELOW, -'⥇' => RIGHTWARDS_ARROW_THROUGH_X, -'⥈' => LEFT_RIGHT_ARROW_THROUGH_SMALL_CIRCLE, -'⥊' => LEFT_BARB_UP_RIGHT_BARB_DOWN_HARPOON, -'⥋' => LEFT_BARB_DOWN_RIGHT_BARB_UP_HARPOON, -'⥎' => LEFT_BARB_UP_RIGHT_BARB_UP_HARPOON, -'⥐' => LEFT_BARB_DOWN_RIGHT_BARB_DOWN_HARPOON, -'⥒' => LEFTWARDS_HARPOON_WITH_BARB_UP_TO_BAR, -'⥓' => RIGHTWARDS_HARPOON_WITH_BARB_UP_TO_BAR, -'⥖' => LEFTWARDS_HARPOON_WITH_BARB_DOWN_TO_BAR, -'⥗' => RIGHTWARDS_HARPOON_WITH_BARB_DOWN_TO_BAR, -'⥚' => LEFTWARDS_HARPOON_WITH_BARB_UP_FROM_BAR, -'⥛' => RIGHTWARDS_HARPOON_WITH_BARB_UP_FROM_BAR, -'⥞' => LEFTWARDS_HARPOON_WITH_BARB_DOWN_FROM_BAR, -'⥟' => RIGHTWARDS_HARPOON_WITH_BARB_DOWN_FROM_BAR, -'⥢' => LEFTWARDS_HARPOON_WITH_BARB_UP_ABOVE_LEFTWARDS_HARPOON_WITH_BARB_DOWN, -'⥤' => RIGHTWARDS_HARPOON_WITH_BARB_UP_ABOVE_RIGHTWARDS_HARPOON_WITH_BARB_DOWN, -'⥦' => LEFTWARDS_HARPOON_WITH_BARB_UP_ABOVE_RIGHTWARDS_HARPOON_WITH_BARB_UP, -'⥧' => LEFTWARDS_HARPOON_WITH_BARB_DOWN_ABOVE_RIGHTWARDS_HARPOON_WITH_BARB_DOWN, -'⥨' => RIGHTWARDS_HARPOON_WITH_BARB_UP_ABOVE_LEFTWARDS_HARPOON_WITH_BARB_UP, -'⥩' => RIGHTWARDS_HARPOON_WITH_BARB_DOWN_ABOVE_LEFTWARDS_HARPOON_WITH_BARB_DOWN, -'⥪' => LEFTWARDS_HARPOON_WITH_BARB_UP_ABOVE_LONG_DASH, -'⥫' => LEFTWARDS_HARPOON_WITH_BARB_DOWN_BELOW_LONG_DASH, -'⥬' => RIGHTWARDS_HARPOON_WITH_BARB_UP_ABOVE_LONG_DASH, -'⥭' => RIGHTWARDS_HARPOON_WITH_BARB_DOWN_BELOW_LONG_DASH, -'⥰' => RIGHT_DOUBLE_ARROW_WITH_ROUNDED_HEAD, -'⧴' => RULE_DELAYED, -'⬱' => THREE_LEFTWARDS_ARROWS, -'⬰' => LEFT_ARROW_WITH_SMALL_CIRCLE, -'⬲' => LEFT_ARROW_WITH_CIRCLED_PLUS, -'⬳' => LONG_LEFTWARDS_SQUIGGLE_ARROW, -'⬴' => LEFTWARDS_TWO_HEADED_ARROW_WITH_VERTICAL_STROKE, -'⬵' => LEFTWARDS_TWO_HEADED_ARROW_WITH_DOUBLE_VERTICAL_STROKE, -'⬶' => LEFTWARDS_TWO_HEADED_ARROW_FROM_BAR, -'⬷' => LEFTWARDS_TWO_HEADED_TRIPLE_DASH_ARROW, -'⬸' => LEFTWARDS_ARROW_WITH_DOTTED_STEM, -'⬹' => LEFTWARDS_ARROW_WITH_TAIL_WITH_VERTICAL_STROKE, -'⬺' => LEFTWARDS_ARROW_WITH_TAIL_WITH_DOUBLE_VERTICAL_STROKE, -'⬻' => LEFTWARDS_TWO_HEADED_ARROW_WITH_TAIL, -'⬼' => LEFTWARDS_TWO_HEADED_ARROW_WITH_TAIL_WITH_VERTICAL_STROKE, -'⬽' => LEFTWARDS_TWO_HEADED_ARROW_WITH_TAIL_WITH_DOUBLE_VERTICAL_STROKE, -'⬾' => LEFTWARDS_ARROW_THROUGH_X, -'⬿' => WAVE_ARROW_POINTING_DIRECTLY_LEFT, -'⭀' => EQUALS_SIGN_ABOVE_LEFTWARDS_ARROW, -'⭁' => REVERSE_TILDE_OPERATOR_ABOVE_LEFTWARDS_ARROW, -'⭂' => LEFTWARDS_ARROW_ABOVE_REVERSE_ALMOST_EQUAL_TO, -'⭃' => RIGHTWARDS_ARROW_THROUGH_GREATER_THAN, -'⭄' => RIGHTWARDS_ARROW_THROUGH_SUPERSET, -'⭇' => REVERSE_TILDE_OPERATOR_ABOVE_RIGHTWARDS_ARROW, -'⭈' => RIGHTWARDS_ARROW_ABOVE_REVERSE_ALMOST_EQUAL_TO, -'⭉' => TILDE_OPERATOR_ABOVE_LEFTWARDS_ARROW, -'⭊' => LEFTWARDS_ARROW_ABOVE_ALMOST_EQUAL_TO, -'⭋' => LEFTWARDS_ARROW_ABOVE_REVERSE_TILDE_OPERATOR, -'⭌' => RIGHTWARDS_ARROW_ABOVE_REVERSE_TILDE_OPERATOR, -'←' => HALFWIDTH_LEFTWARDS_ARROW, -'→' => HALFWIDTH_RIGHTWARDS_ARROW, -'≥' => GREATER_THAN_OR_EQUAL_TO, -'≤' => LESS_THAN_OR_EQUAL_TO, -'≡' => IDENTICAL_TO, -'≠' => NOT_EQUAL_TO, -'≢' => NOT_IDENTICAL_TO, -'∈' => ELEMENT_OF, -'∉' => NOT_AN_ELEMENT_OF, -'∋' => CONTAINS_AS_MEMBER, -'∌' => DOES_NOT_CONTAIN_AS_MEMBER, -'⊆' => SUBSET_OF_OR_EQUAL_TO, -'⊈' => NEITHER_A_SUBSET_OF_NOR_EQUAL_TO, -'⊂' => SUBSET_OF, -'⊄' => NOT_A_SUBSET_OF, -'⊊' => SUBSET_OF_WITH_NOT_EQUAL_TO, -'∝' => PROPORTIONAL_TO, -'∊' => SMALL_ELEMENT_OF, -'∍' => SMALL_CONTAINS_AS_MEMBER, -'∥' => PARALLEL_TO, -'∦' => NOT_PARALLEL_TO, -'∷' => PROPORTION, -'∺' => GEOMETRIC_PROPORTION, -'∻' => HOMOTHETIC, -'∽' => REVERSED_TILDE, -'∾' => INVERTED_LAZY_S, -'≁' => NOT_TILDE, -'≃' => ASYMPTOTICALLY_EQUAL_TO, -'≄' => NOT_ASYMPTOTICALLY_EQUAL_TO, -'≅' => APPROXIMATELY_EQUAL_TO, -'≆' => APPROXIMATELY_BUT_NOT_ACTUALLY_EQUAL_TO, -'≇' => NEITHER_APPROXIMATELY_NOR_ACTUALLY_EQUAL_TO, -'≈' => ALMOST_EQUAL_TO, -'≉' => NOT_ALMOST_EQUAL_TO, -'≊' => ALMOST_EQUAL_OR_EQUAL_TO, -'≋' => TRIPLE_TILDE, -'≌' => ALL_EQUAL_TO, -'≍' => EQUIVALENT_TO, -'≎' => GEOMETRICALLY_EQUIVALENT_TO, -'≐' => APPROACHES_THE_LIMIT, -'≑' => GEOMETRICALLY_EQUAL_TO, -'≒' => APPROXIMATELY_EQUAL_TO_OR_THE_IMAGE_OF, -'≓' => IMAGE_OF_OR_APPROXIMATELY_EQUAL_TO, -'≔' => COLON_EQUALS, -'≕' => EQUALS_COLON, -'≖' => RING_IN_EQUAL_TO, -'≗' => RING_EQUAL_TO, -'≘' => CORRESPONDS_TO, -'≙' => ESTIMATES, -'≚' => EQUIANGULAR_TO, -'≛' => STAR_EQUALS, -'≜' => DELTA_EQUAL_TO, -'≝' => EQUAL_TO_BY_DEFINITION, -'≞' => MEASURED_BY, -'≟' => QUESTIONED_EQUAL_TO, -'≣' => STRICTLY_EQUIVALENT_TO, -'≦' => LESS_THAN_OVER_EQUAL_TO, -'≧' => GREATER_THAN_OVER_EQUAL_TO, -'≨' => LESS_THAN_BUT_NOT_EQUAL_TO, -'≩' => GREATER_THAN_BUT_NOT_EQUAL_TO, -'≪' => MUCH_LESS_THAN, -'≫' => MUCH_GREATER_THAN, -'≬' => BETWEEN, -'≭' => NOT_EQUIVALENT_TO, -'≮' => NOT_LESS_THAN, -'≯' => NOT_GREATER_THAN, -'≰' => NEITHER_LESS_THAN_NOR_EQUAL_TO, -'≱' => NEITHER_GREATER_THAN_NOR_EQUAL_TO, -'≲' => LESS_THAN_OR_EQUIVALENT_TO, -'≳' => GREATER_THAN_OR_EQUIVALENT_TO, -'≴' => NEITHER_LESS_THAN_NOR_EQUIVALENT_TO, -'≵' => NEITHER_GREATER_THAN_NOR_EQUIVALENT_TO, -'≶' => LESS_THAN_OR_GREATER_THAN, -'≷' => GREATER_THAN_OR_LESS_THAN, -'≸' => NEITHER_LESS_THAN_NOR_GREATER_THAN, -'≹' => NEITHER_GREATER_THAN_NOR_LESS_THAN, -'≺' => PRECEDES, -'≻' => SUCCEEDS, -'≼' => PRECEDES_OR_EQUAL_TO, -'≽' => SUCCEEDS_OR_EQUAL_TO, -'≾' => PRECEDES_OR_EQUIVALENT_TO, -'≿' => SUCCEEDS_OR_EQUIVALENT_TO, -'⊀' => DOES_NOT_PRECEDE, -'⊁' => DOES_NOT_SUCCEED, -'⊃' => SUPERSET_OF, -'⊅' => NOT_A_SUPERSET_OF, -'⊇' => SUPERSET_OF_OR_EQUAL_TO, -'⊉' => NEITHER_A_SUPERSET_OF_NOR_EQUAL_TO, -'⊋' => SUPERSET_OF_WITH_NOT_EQUAL_TO, -'⊏' => SQUARE_IMAGE_OF, -'⊐' => SQUARE_ORIGINAL_OF, -'⊑' => SQUARE_IMAGE_OF_OR_EQUAL_TO, -'⊒' => SQUARE_ORIGINAL_OF_OR_EQUAL_TO, -'⊜' => CIRCLED_EQUALS, -'⊩' => FORCES, -'⊬' => DOES_NOT_PROVE, -'⊮' => DOES_NOT_FORCE, -'⊰' => PRECEDES_UNDER_RELATION, -'⊱' => SUCCEEDS_UNDER_RELATION, -'⊲' => NORMAL_SUBGROUP_OF, -'⊳' => CONTAINS_AS_NORMAL_SUBGROUP, -'⊴' => NORMAL_SUBGROUP_OF_OR_EQUAL_TO, -'⊵' => CONTAINS_AS_NORMAL_SUBGROUP_OR_EQUAL_TO, -'⊶' => ORIGINAL_OF, -'⊷' => IMAGE_OF, -'⋍' => REVERSED_TILDE_EQUALS, -'⋐' => DOUBLE_SUBSET, -'⋑' => DOUBLE_SUPERSET, -'⋕' => EQUAL_AND_PARALLEL_TO, -'⋖' => LESS_THAN_WITH_DOT, -'⋗' => GREATER_THAN_WITH_DOT, -'⋘' => VERY_MUCH_LESS_THAN, -'⋙' => VERY_MUCH_GREATER_THAN, -'⋚' => LESS_THAN_EQUAL_TO_OR_GREATER_THAN, -'⋛' => GREATER_THAN_EQUAL_TO_OR_LESS_THAN, -'⋜' => EQUAL_TO_OR_LESS_THAN, -'⋝' => EQUAL_TO_OR_GREATER_THAN, -'⋞' => EQUAL_TO_OR_PRECEDES, -'⋟' => EQUAL_TO_OR_SUCCEEDS, -'⋠' => DOES_NOT_PRECEDE_OR_EQUAL, -'⋡' => DOES_NOT_SUCCEED_OR_EQUAL, -'⋢' => NOT_SQUARE_IMAGE_OF_OR_EQUAL_TO, -'⋣' => NOT_SQUARE_ORIGINAL_OF_OR_EQUAL_TO, -'⋤' => SQUARE_IMAGE_OF_OR_NOT_EQUAL_TO, -'⋥' => SQUARE_ORIGINAL_OF_OR_NOT_EQUAL_TO, -'⋦' => LESS_THAN_BUT_NOT_EQUIVALENT_TO, -'⋧' => GREATER_THAN_BUT_NOT_EQUIVALENT_TO, -'⋨' => PRECEDES_BUT_NOT_EQUIVALENT_TO, -'⋩' => SUCCEEDS_BUT_NOT_EQUIVALENT_TO, -'⋪' => NOT_NORMAL_SUBGROUP_OF, -'⋫' => DOES_NOT_CONTAIN_AS_NORMAL_SUBGROUP, -'⋬' => NOT_NORMAL_SUBGROUP_OF_OR_EQUAL_TO, -'⋭' => DOES_NOT_CONTAIN_AS_NORMAL_SUBGROUP_OR_EQUAL, -'⋲' => ELEMENT_OF_WITH_LONG_HORIZONTAL_STROKE, -'⋳' => ELEMENT_OF_WITH_VERTICAL_BAR_AT_END_OF_HORIZONTAL_STROKE, -'⋴' => SMALL_ELEMENT_OF_WITH_VERTICAL_BAR_AT_END_OF_HORIZONTAL_STROKE, -'⋵' => ELEMENT_OF_WITH_DOT_ABOVE, -'⋶' => ELEMENT_OF_WITH_OVERBAR, -'⋷' => SMALL_ELEMENT_OF_WITH_OVERBAR, -'⋸' => ELEMENT_OF_WITH_UNDERBAR, -'⋹' => ELEMENT_OF_WITH_TWO_HORIZONTAL_STROKES, -'⋺' => CONTAINS_WITH_LONG_HORIZONTAL_STROKE, -'⋻' => CONTAINS_WITH_VERTICAL_BAR_AT_END_OF_HORIZONTAL_STROKE, -'⋼' => SMALL_CONTAINS_WITH_VERTICAL_BAR_AT_END_OF_HORIZONTAL_STROKE, -'⋽' => CONTAINS_WITH_OVERBAR, -'⋾' => SMALL_CONTAINS_WITH_OVERBAR, -'⋿' => Z_NOTATION_BAG_MEMBERSHIP, -'⟈' => REVERSE_SOLIDUS_PRECEDING_SUBSET, -'⟉' => SUPERSET_PRECEDING_SOLIDUS, -'⟒' => ELEMENT_OF_OPENING_UPWARDS, -'⦷' => CIRCLED_PARALLEL, -'⧀' => CIRCLED_LESS_THAN, -'⧁' => CIRCLED_GREATER_THAN, -'⧡' => INCREASES_AS, -'⧣' => EQUALS_SIGN_AND_SLANTED_PARALLEL, -'⧤' => EQUALS_SIGN_AND_SLANTED_PARALLEL_WITH_TILDE_ABOVE, -'⧥' => IDENTICAL_TO_AND_SLANTED_PARALLEL, -'⩦' => EQUALS_SIGN_WITH_DOT_BELOW, -'⩧' => IDENTICAL_WITH_DOT_ABOVE, -'⩪' => TILDE_OPERATOR_WITH_DOT_ABOVE, -'⩫' => TILDE_OPERATOR_WITH_RISING_DOTS, -'⩬' => SIMILAR_MINUS_SIMILAR, -'⩭' => CONGRUENT_WITH_DOT_ABOVE, -'⩮' => EQUALS_WITH_ASTERISK, -'⩯' => ALMOST_EQUAL_TO_WITH_CIRCUMFLEX_ACCENT, -'⩰' => APPROXIMATELY_EQUAL_OR_EQUAL_TO, -'⩱' => EQUALS_SIGN_ABOVE_PLUS_SIGN, -'⩲' => PLUS_SIGN_ABOVE_EQUALS_SIGN, -'⩳' => EQUALS_SIGN_ABOVE_TILDE_OPERATOR, -'⩴' => DOUBLE_COLON_EQUAL, -'⩵' => TWO_CONSECUTIVE_EQUALS_SIGNS, -'⩶' => THREE_CONSECUTIVE_EQUALS_SIGNS, -'⩷' => EQUALS_SIGN_WITH_TWO_DOTS_ABOVE_AND_TWO_DOTS_BELOW, -'⩸' => EQUIVALENT_WITH_FOUR_DOTS_ABOVE, -'⩹' => LESS_THAN_WITH_CIRCLE_INSIDE, -'⩺' => GREATER_THAN_WITH_CIRCLE_INSIDE, -'⩻' => LESS_THAN_WITH_QUESTION_MARK_ABOVE, -'⩼' => GREATER_THAN_WITH_QUESTION_MARK_ABOVE, -'⩽' => LESS_THAN_OR_SLANTED_EQUAL_TO, -'⩾' => GREATER_THAN_OR_SLANTED_EQUAL_TO, -'⩿' => LESS_THAN_OR_SLANTED_EQUAL_TO_WITH_DOT_INSIDE, -'⪀' => GREATER_THAN_OR_SLANTED_EQUAL_TO_WITH_DOT_INSIDE, -'⪁' => LESS_THAN_OR_SLANTED_EQUAL_TO_WITH_DOT_ABOVE, -'⪂' => GREATER_THAN_OR_SLANTED_EQUAL_TO_WITH_DOT_ABOVE, -'⪃' => LESS_THAN_OR_SLANTED_EQUAL_TO_WITH_DOT_ABOVE_RIGHT, -'⪄' => GREATER_THAN_OR_SLANTED_EQUAL_TO_WITH_DOT_ABOVE_LEFT, -'⪅' => LESS_THAN_OR_APPROXIMATE, -'⪆' => GREATER_THAN_OR_APPROXIMATE, -'⪇' => LESS_THAN_AND_SINGLE_LINE_NOT_EQUAL_TO, -'⪈' => GREATER_THAN_AND_SINGLE_LINE_NOT_EQUAL_TO, -'⪉' => LESS_THAN_AND_NOT_APPROXIMATE, -'⪊' => GREATER_THAN_AND_NOT_APPROXIMATE, -'⪋' => LESS_THAN_ABOVE_DOUBLE_LINE_EQUAL_ABOVE_GREATER_THAN, -'⪌' => GREATER_THAN_ABOVE_DOUBLE_LINE_EQUAL_ABOVE_LESS_THAN, -'⪍' => LESS_THAN_ABOVE_SIMILAR_OR_EQUAL, -'⪎' => GREATER_THAN_ABOVE_SIMILAR_OR_EQUAL, -'⪏' => LESS_THAN_ABOVE_SIMILAR_ABOVE_GREATER_THAN, -'⪐' => GREATER_THAN_ABOVE_SIMILAR_ABOVE_LESS_THAN, -'⪑' => LESS_THAN_ABOVE_GREATER_THAN_ABOVE_DOUBLE_LINE_EQUAL, -'⪒' => GREATER_THAN_ABOVE_LESS_THAN_ABOVE_DOUBLE_LINE_EQUAL, -'⪓' => LESS_THAN_ABOVE_SLANTED_EQUAL_ABOVE_GREATER_THAN_ABOVE_SLANTED_EQUAL, -'⪔' => GREATER_THAN_ABOVE_SLANTED_EQUAL_ABOVE_LESS_THAN_ABOVE_SLANTED_EQUAL, -'⪕' => SLANTED_EQUAL_TO_OR_LESS_THAN, -'⪖' => SLANTED_EQUAL_TO_OR_GREATER_THAN, -'⪗' => SLANTED_EQUAL_TO_OR_LESS_THAN_WITH_DOT_INSIDE, -'⪘' => SLANTED_EQUAL_TO_OR_GREATER_THAN_WITH_DOT_INSIDE, -'⪙' => DOUBLE_LINE_EQUAL_TO_OR_LESS_THAN, -'⪚' => DOUBLE_LINE_EQUAL_TO_OR_GREATER_THAN, -'⪛' => DOUBLE_LINE_SLANTED_EQUAL_TO_OR_LESS_THAN, -'⪜' => DOUBLE_LINE_SLANTED_EQUAL_TO_OR_GREATER_THAN, -'⪝' => SIMILAR_OR_LESS_THAN, -'⪞' => SIMILAR_OR_GREATER_THAN, -'⪟' => SIMILAR_ABOVE_LESS_THAN_ABOVE_EQUALS_SIGN, -'⪠' => SIMILAR_ABOVE_GREATER_THAN_ABOVE_EQUALS_SIGN, -'⪡' => DOUBLE_NESTED_LESS_THAN, -'⪢' => DOUBLE_NESTED_GREATER_THAN, -'⪣' => DOUBLE_NESTED_LESS_THAN_WITH_UNDERBAR, -'⪤' => GREATER_THAN_OVERLAPPING_LESS_THAN, -'⪥' => GREATER_THAN_BESIDE_LESS_THAN, -'⪦' => LESS_THAN_CLOSED_BY_CURVE, -'⪧' => GREATER_THAN_CLOSED_BY_CURVE, -'⪨' => LESS_THAN_CLOSED_BY_CURVE_ABOVE_SLANTED_EQUAL, -'⪩' => GREATER_THAN_CLOSED_BY_CURVE_ABOVE_SLANTED_EQUAL, -'⪪' => SMALLER_THAN, -'⪫' => LARGER_THAN, -'⪬' => SMALLER_THAN_OR_EQUAL_TO, -'⪭' => LARGER_THAN_OR_EQUAL_TO, -'⪮' => EQUALS_SIGN_WITH_BUMPY_ABOVE, -'⪯' => PRECEDES_ABOVE_SINGLE_LINE_EQUALS_SIGN, -'⪰' => SUCCEEDS_ABOVE_SINGLE_LINE_EQUALS_SIGN, -'⪱' => PRECEDES_ABOVE_SINGLE_LINE_NOT_EQUAL_TO, -'⪲' => SUCCEEDS_ABOVE_SINGLE_LINE_NOT_EQUAL_TO, -'⪳' => PRECEDES_ABOVE_EQUALS_SIGN, -'⪴' => SUCCEEDS_ABOVE_EQUALS_SIGN, -'⪵' => PRECEDES_ABOVE_NOT_EQUAL_TO, -'⪶' => SUCCEEDS_ABOVE_NOT_EQUAL_TO, -'⪷' => PRECEDES_ABOVE_ALMOST_EQUAL_TO, -'⪸' => SUCCEEDS_ABOVE_ALMOST_EQUAL_TO, -'⪹' => PRECEDES_ABOVE_NOT_ALMOST_EQUAL_TO, -'⪺' => SUCCEEDS_ABOVE_NOT_ALMOST_EQUAL_TO, -'⪻' => DOUBLE_PRECEDES, -'⪼' => DOUBLE_SUCCEEDS, -'⪽' => SUBSET_WITH_DOT, -'⪾' => SUPERSET_WITH_DOT, -'⪿' => SUBSET_WITH_PLUS_SIGN_BELOW, -'⫀' => SUPERSET_WITH_PLUS_SIGN_BELOW, -'⫁' => SUBSET_WITH_MULTIPLICATION_SIGN_BELOW, -'⫂' => SUPERSET_WITH_MULTIPLICATION_SIGN_BELOW, -'⫃' => SUBSET_OF_OR_EQUAL_TO_WITH_DOT_ABOVE, -'⫄' => SUPERSET_OF_OR_EQUAL_TO_WITH_DOT_ABOVE, -'⫅' => SUBSET_OF_ABOVE_EQUALS_SIGN, -'⫆' => SUPERSET_OF_ABOVE_EQUALS_SIGN, -'⫇' => SUBSET_OF_ABOVE_TILDE_OPERATOR, -'⫈' => SUPERSET_OF_ABOVE_TILDE_OPERATOR, -'⫉' => SUBSET_OF_ABOVE_ALMOST_EQUAL_TO, -'⫊' => SUPERSET_OF_ABOVE_ALMOST_EQUAL_TO, -'⫋' => SUBSET_OF_ABOVE_NOT_EQUAL_TO, -'⫌' => SUPERSET_OF_ABOVE_NOT_EQUAL_TO, -'⫍' => SQUARE_LEFT_OPEN_BOX_OPERATOR, -'⫎' => SQUARE_RIGHT_OPEN_BOX_OPERATOR, -'⫏' => CLOSED_SUBSET, -'⫐' => CLOSED_SUPERSET, -'⫑' => CLOSED_SUBSET_OR_EQUAL_TO, -'⫒' => CLOSED_SUPERSET_OR_EQUAL_TO, -'⫓' => SUBSET_ABOVE_SUPERSET, -'⫔' => SUPERSET_ABOVE_SUBSET, -'⫕' => SUBSET_ABOVE_SUBSET, -'⫖' => SUPERSET_ABOVE_SUPERSET, -'⫗' => SUPERSET_BESIDE_SUBSET, -'⫘' => SUPERSET_BESIDE_AND_JOINED_BY_DASH_WITH_SUBSET, -'⫙' => ELEMENT_OF_OPENING_DOWNWARDS, -'⫷' => TRIPLE_NESTED_LESS_THAN, -'⫸' => TRIPLE_NESTED_GREATER_THAN, -'⫹' => DOUBLE_LINE_SLANTED_LESS_THAN_OR_EQUAL_TO, -'⫺' => DOUBLE_LINE_SLANTED_GREATER_THAN_OR_EQUAL_TO, -'⊢' => RIGHT_TACK, -'⊣' => LEFT_TACK, -'⫪' => DOUBLE_DOWN_TACK, -'⫫' => DOUBLE_UP_TACK, -'⟂' => PERP, -'⊕' => CIRCLED_PLUS, -'⊖' => CIRCLED_MINUS, -'⊞' => SQUARED_PLUS, -'⊟' => SQUARED_MINUS, -'|' => OR, -'∪' => UNION, -'∨' => LOGICAL_OR, -'⊔' => SQUARE_CUP, -'±' => PLUS_MINUS_SIGN, -'∓' => MINUS_OR_PLUS_SIGN, -'∔' => DOT_PLUS, -'∸' => DOT_MINUS, -'≂' => MINUS_TILDE, -'≏' => DIFFERENCE_BETWEEN, -'⊎' => MULTISET_UNION, -'⊻' => XOR, -'⊽' => NOR, -'⋎' => CURLY_LOGICAL_OR, -'⋓' => DOUBLE_UNION, -'⧺' => DOUBLE_PLUS, -'⧻' => TRIPLE_PLUS, -'⨈' => TWO_LOGICAL_OR_OPERATOR, -'⨢' => PLUS_SIGN_WITH_SMALL_CIRCLE_ABOVE, -'⨣' => PLUS_SIGN_WITH_CIRCUMFLEX_ACCENT_ABOVE, -'⨤' => PLUS_SIGN_WITH_TILDE_ABOVE, -'⨥' => PLUS_SIGN_WITH_DOT_BELOW, -'⨦' => PLUS_SIGN_WITH_TILDE_BELOW, -'⨧' => PLUS_SIGN_WITH_SUBSCRIPT_TWO, -'⨨' => PLUS_SIGN_WITH_BLACK_TRIANGLE, -'⨩' => MINUS_SIGN_WITH_COMMA_ABOVE, -'⨪' => MINUS_SIGN_WITH_DOT_BELOW, -'⨫' => MINUS_SIGN_WITH_FALLING_DOTS, -'⨬' => MINUS_SIGN_WITH_RISING_DOTS, -'⨭' => PLUS_SIGN_IN_LEFT_HALF_CIRCLE, -'⨮' => PLUS_SIGN_IN_RIGHT_HALF_CIRCLE, -'⨹' => PLUS_SIGN_IN_TRIANGLE, -'⨺' => MINUS_SIGN_IN_TRIANGLE, -'⩁' => UNION_WITH_MINUS_SIGN, -'⩂' => UNION_WITH_OVERBAR, -'⩅' => UNION_WITH_LOGICAL_OR, -'⩊' => UNION_BESIDE_AND_JOINED_WITH_UNION, -'⩌' => CLOSED_UNION_WITH_SERIFS, -'⩏' => DOUBLE_SQUARE_UNION, -'⩐' => CLOSED_UNION_WITH_SERIFS_AND_SMASH_PRODUCT, -'⩒' => LOGICAL_OR_WITH_DOT_ABOVE, -'⩔' => DOUBLE_LOGICAL_OR, -'⩖' => TWO_INTERSECTING_LOGICAL_OR, -'⩗' => SLOPING_LARGE_OR, -'⩛' => LOGICAL_OR_WITH_MIDDLE_STEM, -'⩝' => LOGICAL_OR_WITH_HORIZONTAL_DASH, -'⩡' => SMALL_VEE_WITH_UNDERBAR, -'⩢' => LOGICAL_OR_WITH_DOUBLE_OVERBAR, -'⩣' => LOGICAL_OR_WITH_DOUBLE_UNDERBAR, -'∘' => RING_OPERATOR, -'×' => MULTIPLICATION_SIGN, -'∩' => INTERSECTION, -'∧' => LOGICAL_AND, -'⊗' => CIRCLED_TIMES, -'⊘' => CIRCLED_DIVISION_SLASH, -'⊙' => CIRCLED_DOT_OPERATOR, -'⊚' => CIRCLED_RING_OPERATOR, -'⊛' => CIRCLED_ASTERISK_OPERATOR, -'⊠' => SQUARED_TIMES, -'⊡' => SQUARED_DOT_OPERATOR, -'⊓' => SQUARE_CAP, -'∗' => ASTERISK_OPERATOR, -'∙' => BULLET_OPERATOR, -'∤' => DOES_NOT_DIVIDE, -'⅋' => TURNED_AMPERSAND, -'≀' => WREATH_PRODUCT, -'⊼' => NAND, -'⋄' => DIAMOND_OPERATOR, -'⋆' => STAR_OPERATOR, -'⋇' => DIVISION_TIMES, -'⋉' => LEFT_NORMAL_FACTOR_SEMIDIRECT_PRODUCT, -'⋊' => RIGHT_NORMAL_FACTOR_SEMIDIRECT_PRODUCT, -'⋋' => LEFT_SEMIDIRECT_PRODUCT, -'⋌' => RIGHT_SEMIDIRECT_PRODUCT, -'⋏' => CURLY_LOGICAL_AND, -'⋒' => DOUBLE_INTERSECTION, -'⟑' => AND_WITH_DOT, -'⦸' => CIRCLED_REVERSE_SOLIDUS, -'⦼' => CIRCLED_ANTICLOCKWISE_ROTATED_DIVISION_SIGN, -'⦾' => CIRCLED_WHITE_BULLET, -'⦿' => CIRCLED_BULLET, -'⧶' => SOLIDUS_WITH_OVERBAR, -'⧷' => REVERSE_SOLIDUS_WITH_HORIZONTAL_STROKE, -'⨇' => TWO_LOGICAL_AND_OPERATOR, -'⨰' => MULTIPLICATION_SIGN_WITH_DOT_ABOVE, -'⨱' => MULTIPLICATION_SIGN_WITH_UNDERBAR, -'⨲' => SEMIDIRECT_PRODUCT_WITH_BOTTOM_CLOSED, -'⨳' => SMASH_PRODUCT, -'⨴' => MULTIPLICATION_SIGN_IN_LEFT_HALF_CIRCLE, -'⨵' => MULTIPLICATION_SIGN_IN_RIGHT_HALF_CIRCLE, -'⨶' => CIRCLED_MULTIPLICATION_SIGN_WITH_CIRCUMFLEX_ACCENT, -'⨷' => MULTIPLICATION_SIGN_IN_DOUBLE_CIRCLE, -'⨸' => CIRCLED_DIVISION_SIGN, -'⨻' => MULTIPLICATION_SIGN_IN_TRIANGLE, -'⨼' => INTERIOR_PRODUCT, -'⨽' => RIGHTHAND_INTERIOR_PRODUCT, -'⩀' => INTERSECTION_WITH_DOT, -'⩃' => INTERSECTION_WITH_OVERBAR, -'⩄' => INTERSECTION_WITH_LOGICAL_AND, -'⩋' => INTERSECTION_BESIDE_AND_JOINED_WITH_INTERSECTION, -'⩍' => CLOSED_INTERSECTION_WITH_SERIFS, -'⩎' => DOUBLE_SQUARE_INTERSECTION, -'⩑' => LOGICAL_AND_WITH_DOT_ABOVE, -'⩓' => DOUBLE_LOGICAL_AND, -'⩕' => TWO_INTERSECTING_LOGICAL_AND, -'⩘' => SLOPING_LARGE_AND, -'⩚' => LOGICAL_AND_WITH_MIDDLE_STEM, -'⩜' => LOGICAL_AND_WITH_HORIZONTAL_DASH, -'⩞' => LOGICAL_AND_WITH_DOUBLE_OVERBAR, -'⩟' => LOGICAL_AND_WITH_UNDERBAR, -'⩠' => LOGICAL_AND_WITH_DOUBLE_UNDERBAR, -'⫛' => TRANSVERSAL_INTERSECTION, -'⊍' => MULTISET_MULTIPLICATION, -'▷' => WHITE_RIGHT_POINTING_TRIANGLE, -'⨝' => JOIN, -'⟕' => LEFT_OUTER_JOIN, -'⟖' => RIGHT_OUTER_JOIN, -'⟗' => FULL_OUTER_JOIN, -'^' => CIRCUMFLEX_ACCENT, -'↑' => UPWARDS_ARROW, -'↓' => DOWNWARDS_ARROW, -'⇵' => DOWNWARDS_ARROW_LEFTWARDS_OF_UPWARDS_ARROW, -'⟰' => UPWARDS_QUADRUPLE_ARROW, -'⟱' => DOWNWARDS_QUADRUPLE_ARROW, -'⤈' => DOWNWARDS_ARROW_WITH_HORIZONTAL_STROKE, -'⤉' => UPWARDS_ARROW_WITH_HORIZONTAL_STROKE, -'⤊' => UPWARDS_TRIPLE_ARROW, -'⤋' => DOWNWARDS_TRIPLE_ARROW, -'⤒' => UPWARDS_ARROW_TO_BAR, -'⤓' => DOWNWARDS_ARROW_TO_BAR, -'⥉' => UPWARDS_TWO_HEADED_ARROW_FROM_SMALL_CIRCLE, -'⥌' => UP_BARB_RIGHT_DOWN_BARB_LEFT_HARPOON, -'⥍' => UP_BARB_LEFT_DOWN_BARB_RIGHT_HARPOON, -'⥏' => UP_BARB_RIGHT_DOWN_BARB_RIGHT_HARPOON, -'⥑' => UP_BARB_LEFT_DOWN_BARB_LEFT_HARPOON, -'⥔' => UPWARDS_HARPOON_WITH_BARB_RIGHT_TO_BAR, -'⥕' => DOWNWARDS_HARPOON_WITH_BARB_RIGHT_TO_BAR, -'⥘' => UPWARDS_HARPOON_WITH_BARB_LEFT_TO_BAR, -'⥙' => DOWNWARDS_HARPOON_WITH_BARB_LEFT_TO_BAR, -'⥜' => UPWARDS_HARPOON_WITH_BARB_RIGHT_FROM_BAR, -'⥝' => DOWNWARDS_HARPOON_WITH_BARB_RIGHT_FROM_BAR, -'⥠' => UPWARDS_HARPOON_WITH_BARB_LEFT_FROM_BAR, -'⥡' => DOWNWARDS_HARPOON_WITH_BARB_LEFT_FROM_BAR, -'⥣' => UPWARDS_HARPOON_WITH_BARB_LEFT_BESIDE_UPWARDS_HARPOON_WITH_BARB_RIGHT, -'⥥' => DOWNWARDS_HARPOON_WITH_BARB_LEFT_BESIDE_DOWNWARDS_HARPOON_WITH_BARB_RIGHT, -'⥮' => UPWARDS_HARPOON_WITH_BARB_LEFT_BESIDE_DOWNWARDS_HARPOON_WITH_BARB_RIGHT, -'⥯' => DOWNWARDS_HARPOON_WITH_BARB_LEFT_BESIDE_UPWARDS_HARPOON_WITH_BARB_RIGHT, -'↑' => HALFWIDTH_UPWARDS_ARROW, -'↓' => HALFWIDTH_DOWNWARDS_ARROW, -# Lookalikes which are normalized into UNICODE_DOT -# https://github.com/JuliaLang/julia/pull/25157 -'\u00b7' => UNICODE_DOT, # '·' Middle Dot, -'\u0387' => UNICODE_DOT, # '·' Greek Ano Teleia, -'⋅' => UNICODE_DOT, -'…' => LDOTS, -'⁝' => TRICOLON, -'⋮' => VDOTS, -'⋱' => DDOTS, -'⋰' => ADOTS, -'⋯' => CDOTS, -'↻' => CIRCLE_ARROW_RIGHT, -'⇜' => LEFT_SQUIGGLE_ARROW, -'⇝' => RIGHT_SQUIGGLE_ARROW, -'↜' => LEFT_WAVE_ARROW, -'↝' => RIGHT_WAVE_ARROW, -'↩' => LEFTWARDS_ARROW_WITH_HOOK, -'↪' => RIGHTWARDS_ARROW_WITH_HOOK, -'↫' => LOOP_ARROW_LEFT, -'↬' => LOOP_ARROW_RIGHT, -'↼' => LEFT_HARPOON_UP, -'↽' => LEFT_HARPOON_DOWN, -'⇀' => RIGHT_HARPOON_UP, -'⇁' => RIGHT_HARPOON_DOWN, -'⇄' => RIGHT_LEFT_ARROWS, -'⇆' => LEFT_RIGHT_ARROWS, -'⇇' => LEFT_LEFT_ARROWS, -'⇉' => RIGHT_RIGHT_ARROWS, -'⇋' => LEFT_RIGHT_HARPOONS, -'⇌' => RIGHT_LEFT_HARPOONS, -'⇚' => L_LEFT_ARROW, -'⇛' => R_RIGHT_ARROW, -'⇠' => LEFT_DASH_ARROW, -'⇢' => RIGHT_DASH_ARROW, -'↷' => CURVE_ARROW_RIGHT, -'↶' => CURVE_ARROW_LEFT, -'↺' => CIRCLE_ARROW_LEFT, -'¦' => BROKEN_BAR, -'⌿' => NOT_SLASH, -'⨟' => BB_SEMI) - + # '−' is normalized into K"-", + '−' => K"-", + '÷' => K"÷", + '¬' => K"¬", + '√' => K"√", + '∛' => K"∛", + '∜' => K"∜", + '←' => K"←", + '→' => K"→", + '↔' => K"↔", + '↚' => K"↚", + '↛' => K"↛", + '↞' => K"↞", + '↠' => K"↠", + '↢' => K"↢", + '↣' => K"↣", + '↤' => K"↤", + '↦' => K"↦", + '↮' => K"↮", + '⇎' => K"⇎", + '⇍' => K"⇍", + '⇏' => K"⇏", + '⇐' => K"⇐", + '⇒' => K"⇒", + '⇔' => K"⇔", + '⇴' => K"⇴", + '⇶' => K"⇶", + '⇷' => K"⇷", + '⇸' => K"⇸", + '⇹' => K"⇹", + '⇺' => K"⇺", + '⇻' => K"⇻", + '⇼' => K"⇼", + '⇽' => K"⇽", + '⇾' => K"⇾", + '⇿' => K"⇿", + '⟵' => K"⟵", + '⟶' => K"⟶", + '⟷' => K"⟷", + '⟹' => K"⟹", + '⟺' => K"⟺", + '⟻' => K"⟻", + '⟼' => K"⟼", + '⟽' => K"⟽", + '⟾' => K"⟾", + '⟿' => K"⟿", + '⤀' => K"⤀", + '⤁' => K"⤁", + '⤂' => K"⤂", + '⤃' => K"⤃", + '⤄' => K"⤄", + '⤅' => K"⤅", + '⤆' => K"⤆", + '⤇' => K"⤇", + '⤌' => K"⤌", + '⤍' => K"⤍", + '⤎' => K"⤎", + '⤏' => K"⤏", + '⤐' => K"⤐", + '⤑' => K"⤑", + '⤔' => K"⤔", + '⤕' => K"⤕", + '⤖' => K"⤖", + '⤗' => K"⤗", + '⤘' => K"⤘", + '⤝' => K"⤝", + '⤞' => K"⤞", + '⤟' => K"⤟", + '⤠' => K"⤠", + '⥄' => K"⥄", + '⥅' => K"⥅", + '⥆' => K"⥆", + '⥇' => K"⥇", + '⥈' => K"⥈", + '⥊' => K"⥊", + '⥋' => K"⥋", + '⥎' => K"⥎", + '⥐' => K"⥐", + '⥒' => K"⥒", + '⥓' => K"⥓", + '⥖' => K"⥖", + '⥗' => K"⥗", + '⥚' => K"⥚", + '⥛' => K"⥛", + '⥞' => K"⥞", + '⥟' => K"⥟", + '⥢' => K"⥢", + '⥤' => K"⥤", + '⥦' => K"⥦", + '⥧' => K"⥧", + '⥨' => K"⥨", + '⥩' => K"⥩", + '⥪' => K"⥪", + '⥫' => K"⥫", + '⥬' => K"⥬", + '⥭' => K"⥭", + '⥰' => K"⥰", + '⧴' => K"⧴", + '⬱' => K"⬱", + '⬰' => K"⬰", + '⬲' => K"⬲", + '⬳' => K"⬳", + '⬴' => K"⬴", + '⬵' => K"⬵", + '⬶' => K"⬶", + '⬷' => K"⬷", + '⬸' => K"⬸", + '⬹' => K"⬹", + '⬺' => K"⬺", + '⬻' => K"⬻", + '⬼' => K"⬼", + '⬽' => K"⬽", + '⬾' => K"⬾", + '⬿' => K"⬿", + '⭀' => K"⭀", + '⭁' => K"⭁", + '⭂' => K"⭂", + '⭃' => K"⭃", + '⭄' => K"⭄", + '⭇' => K"⭇", + '⭈' => K"⭈", + '⭉' => K"⭉", + '⭊' => K"⭊", + '⭋' => K"⭋", + '⭌' => K"⭌", + '←' => K"←", + '→' => K"→", + '≥' => K"≥", + '≤' => K"≤", + '≡' => K"≡", + '≠' => K"≠", + '≢' => K"≢", + '∈' => K"∈", + '∉' => K"∉", + '∋' => K"∋", + '∌' => K"∌", + '⊆' => K"⊆", + '⊈' => K"⊈", + '⊂' => K"⊂", + '⊄' => K"⊄", + '⊊' => K"⊊", + '∝' => K"∝", + '∊' => K"∊", + '∍' => K"∍", + '∥' => K"∥", + '∦' => K"∦", + '∷' => K"∷", + '∺' => K"∺", + '∻' => K"∻", + '∽' => K"∽", + '∾' => K"∾", + '≁' => K"≁", + '≃' => K"≃", + '≄' => K"≄", + '≅' => K"≅", + '≆' => K"≆", + '≇' => K"≇", + '≈' => K"≈", + '≉' => K"≉", + '≊' => K"≊", + '≋' => K"≋", + '≌' => K"≌", + '≍' => K"≍", + '≎' => K"≎", + '≐' => K"≐", + '≑' => K"≑", + '≒' => K"≒", + '≓' => K"≓", + '≔' => K"≔", + '≕' => K"≕", + '≖' => K"≖", + '≗' => K"≗", + '≘' => K"≘", + '≙' => K"≙", + '≚' => K"≚", + '≛' => K"≛", + '≜' => K"≜", + '≝' => K"≝", + '≞' => K"≞", + '≟' => K"≟", + '≣' => K"≣", + '≦' => K"≦", + '≧' => K"≧", + '≨' => K"≨", + '≩' => K"≩", + '≪' => K"≪", + '≫' => K"≫", + '≬' => K"≬", + '≭' => K"≭", + '≮' => K"≮", + '≯' => K"≯", + '≰' => K"≰", + '≱' => K"≱", + '≲' => K"≲", + '≳' => K"≳", + '≴' => K"≴", + '≵' => K"≵", + '≶' => K"≶", + '≷' => K"≷", + '≸' => K"≸", + '≹' => K"≹", + '≺' => K"≺", + '≻' => K"≻", + '≼' => K"≼", + '≽' => K"≽", + '≾' => K"≾", + '≿' => K"≿", + '⊀' => K"⊀", + '⊁' => K"⊁", + '⊃' => K"⊃", + '⊅' => K"⊅", + '⊇' => K"⊇", + '⊉' => K"⊉", + '⊋' => K"⊋", + '⊏' => K"⊏", + '⊐' => K"⊐", + '⊑' => K"⊑", + '⊒' => K"⊒", + '⊜' => K"⊜", + '⊩' => K"⊩", + '⊬' => K"⊬", + '⊮' => K"⊮", + '⊰' => K"⊰", + '⊱' => K"⊱", + '⊲' => K"⊲", + '⊳' => K"⊳", + '⊴' => K"⊴", + '⊵' => K"⊵", + '⊶' => K"⊶", + '⊷' => K"⊷", + '⋍' => K"⋍", + '⋐' => K"⋐", + '⋑' => K"⋑", + '⋕' => K"⋕", + '⋖' => K"⋖", + '⋗' => K"⋗", + '⋘' => K"⋘", + '⋙' => K"⋙", + '⋚' => K"⋚", + '⋛' => K"⋛", + '⋜' => K"⋜", + '⋝' => K"⋝", + '⋞' => K"⋞", + '⋟' => K"⋟", + '⋠' => K"⋠", + '⋡' => K"⋡", + '⋢' => K"⋢", + '⋣' => K"⋣", + '⋤' => K"⋤", + '⋥' => K"⋥", + '⋦' => K"⋦", + '⋧' => K"⋧", + '⋨' => K"⋨", + '⋩' => K"⋩", + '⋪' => K"⋪", + '⋫' => K"⋫", + '⋬' => K"⋬", + '⋭' => K"⋭", + '⋲' => K"⋲", + '⋳' => K"⋳", + '⋴' => K"⋴", + '⋵' => K"⋵", + '⋶' => K"⋶", + '⋷' => K"⋷", + '⋸' => K"⋸", + '⋹' => K"⋹", + '⋺' => K"⋺", + '⋻' => K"⋻", + '⋼' => K"⋼", + '⋽' => K"⋽", + '⋾' => K"⋾", + '⋿' => K"⋿", + '⟈' => K"⟈", + '⟉' => K"⟉", + '⟒' => K"⟒", + '⦷' => K"⦷", + '⧀' => K"⧀", + '⧁' => K"⧁", + '⧡' => K"⧡", + '⧣' => K"⧣", + '⧤' => K"⧤", + '⧥' => K"⧥", + '⩦' => K"⩦", + '⩧' => K"⩧", + '⩪' => K"⩪", + '⩫' => K"⩫", + '⩬' => K"⩬", + '⩭' => K"⩭", + '⩮' => K"⩮", + '⩯' => K"⩯", + '⩰' => K"⩰", + '⩱' => K"⩱", + '⩲' => K"⩲", + '⩳' => K"⩳", + '⩴' => K"⩴", + '⩵' => K"⩵", + '⩶' => K"⩶", + '⩷' => K"⩷", + '⩸' => K"⩸", + '⩹' => K"⩹", + '⩺' => K"⩺", + '⩻' => K"⩻", + '⩼' => K"⩼", + '⩽' => K"⩽", + '⩾' => K"⩾", + '⩿' => K"⩿", + '⪀' => K"⪀", + '⪁' => K"⪁", + '⪂' => K"⪂", + '⪃' => K"⪃", + '⪄' => K"⪄", + '⪅' => K"⪅", + '⪆' => K"⪆", + '⪇' => K"⪇", + '⪈' => K"⪈", + '⪉' => K"⪉", + '⪊' => K"⪊", + '⪋' => K"⪋", + '⪌' => K"⪌", + '⪍' => K"⪍", + '⪎' => K"⪎", + '⪏' => K"⪏", + '⪐' => K"⪐", + '⪑' => K"⪑", + '⪒' => K"⪒", + '⪓' => K"⪓", + '⪔' => K"⪔", + '⪕' => K"⪕", + '⪖' => K"⪖", + '⪗' => K"⪗", + '⪘' => K"⪘", + '⪙' => K"⪙", + '⪚' => K"⪚", + '⪛' => K"⪛", + '⪜' => K"⪜", + '⪝' => K"⪝", + '⪞' => K"⪞", + '⪟' => K"⪟", + '⪠' => K"⪠", + '⪡' => K"⪡", + '⪢' => K"⪢", + '⪣' => K"⪣", + '⪤' => K"⪤", + '⪥' => K"⪥", + '⪦' => K"⪦", + '⪧' => K"⪧", + '⪨' => K"⪨", + '⪩' => K"⪩", + '⪪' => K"⪪", + '⪫' => K"⪫", + '⪬' => K"⪬", + '⪭' => K"⪭", + '⪮' => K"⪮", + '⪯' => K"⪯", + '⪰' => K"⪰", + '⪱' => K"⪱", + '⪲' => K"⪲", + '⪳' => K"⪳", + '⪴' => K"⪴", + '⪵' => K"⪵", + '⪶' => K"⪶", + '⪷' => K"⪷", + '⪸' => K"⪸", + '⪹' => K"⪹", + '⪺' => K"⪺", + '⪻' => K"⪻", + '⪼' => K"⪼", + '⪽' => K"⪽", + '⪾' => K"⪾", + '⪿' => K"⪿", + '⫀' => K"⫀", + '⫁' => K"⫁", + '⫂' => K"⫂", + '⫃' => K"⫃", + '⫄' => K"⫄", + '⫅' => K"⫅", + '⫆' => K"⫆", + '⫇' => K"⫇", + '⫈' => K"⫈", + '⫉' => K"⫉", + '⫊' => K"⫊", + '⫋' => K"⫋", + '⫌' => K"⫌", + '⫍' => K"⫍", + '⫎' => K"⫎", + '⫏' => K"⫏", + '⫐' => K"⫐", + '⫑' => K"⫑", + '⫒' => K"⫒", + '⫓' => K"⫓", + '⫔' => K"⫔", + '⫕' => K"⫕", + '⫖' => K"⫖", + '⫗' => K"⫗", + '⫘' => K"⫘", + '⫙' => K"⫙", + '⫷' => K"⫷", + '⫸' => K"⫸", + '⫹' => K"⫹", + '⫺' => K"⫺", + '⊢' => K"⊢", + '⊣' => K"⊣", + '⫪' => K"⫪", + '⫫' => K"⫫", + '⟂' => K"⟂", + '⊕' => K"⊕", + '⊖' => K"⊖", + '⊞' => K"⊞", + '⊟' => K"⊟", + '|' => K"|", + '∪' => K"∪", + '∨' => K"∨", + '⊔' => K"⊔", + '±' => K"±", + '∓' => K"∓", + '∔' => K"∔", + '∸' => K"∸", + '≂' => K"≂", + '≏' => K"≏", + '⊎' => K"⊎", + '⊻' => K"⊻", + '⊽' => K"⊽", + '⋎' => K"⋎", + '⋓' => K"⋓", + '⧺' => K"⧺", + '⧻' => K"⧻", + '⨈' => K"⨈", + '⨢' => K"⨢", + '⨣' => K"⨣", + '⨤' => K"⨤", + '⨥' => K"⨥", + '⨦' => K"⨦", + '⨧' => K"⨧", + '⨨' => K"⨨", + '⨩' => K"⨩", + '⨪' => K"⨪", + '⨫' => K"⨫", + '⨬' => K"⨬", + '⨭' => K"⨭", + '⨮' => K"⨮", + '⨹' => K"⨹", + '⨺' => K"⨺", + '⩁' => K"⩁", + '⩂' => K"⩂", + '⩅' => K"⩅", + '⩊' => K"⩊", + '⩌' => K"⩌", + '⩏' => K"⩏", + '⩐' => K"⩐", + '⩒' => K"⩒", + '⩔' => K"⩔", + '⩖' => K"⩖", + '⩗' => K"⩗", + '⩛' => K"⩛", + '⩝' => K"⩝", + '⩡' => K"⩡", + '⩢' => K"⩢", + '⩣' => K"⩣", + '∘' => K"∘", + '×' => K"×", + '∩' => K"∩", + '∧' => K"∧", + '⊗' => K"⊗", + '⊘' => K"⊘", + '⊙' => K"⊙", + '⊚' => K"⊚", + '⊛' => K"⊛", + '⊠' => K"⊠", + '⊡' => K"⊡", + '⊓' => K"⊓", + '∗' => K"∗", + '∙' => K"∙", + '∤' => K"∤", + '⅋' => K"⅋", + '≀' => K"≀", + '⊼' => K"⊼", + '⋄' => K"⋄", + '⋆' => K"⋆", + '⋇' => K"⋇", + '⋉' => K"⋉", + '⋊' => K"⋊", + '⋋' => K"⋋", + '⋌' => K"⋌", + '⋏' => K"⋏", + '⋒' => K"⋒", + '⟑' => K"⟑", + '⦸' => K"⦸", + '⦼' => K"⦼", + '⦾' => K"⦾", + '⦿' => K"⦿", + '⧶' => K"⧶", + '⧷' => K"⧷", + '⨇' => K"⨇", + '⨰' => K"⨰", + '⨱' => K"⨱", + '⨲' => K"⨲", + '⨳' => K"⨳", + '⨴' => K"⨴", + '⨵' => K"⨵", + '⨶' => K"⨶", + '⨷' => K"⨷", + '⨸' => K"⨸", + '⨻' => K"⨻", + '⨼' => K"⨼", + '⨽' => K"⨽", + '⩀' => K"⩀", + '⩃' => K"⩃", + '⩄' => K"⩄", + '⩋' => K"⩋", + '⩍' => K"⩍", + '⩎' => K"⩎", + '⩑' => K"⩑", + '⩓' => K"⩓", + '⩕' => K"⩕", + '⩘' => K"⩘", + '⩚' => K"⩚", + '⩜' => K"⩜", + '⩞' => K"⩞", + '⩟' => K"⩟", + '⩠' => K"⩠", + '⫛' => K"⫛", + '⊍' => K"⊍", + '▷' => K"▷", + '⨝' => K"⨝", + '⟕' => K"⟕", + '⟖' => K"⟖", + '⟗' => K"⟗", + '^' => K"^", + '↑' => K"↑", + '↓' => K"↓", + '⇵' => K"⇵", + '⟰' => K"⟰", + '⟱' => K"⟱", + '⤈' => K"⤈", + '⤉' => K"⤉", + '⤊' => K"⤊", + '⤋' => K"⤋", + '⤒' => K"⤒", + '⤓' => K"⤓", + '⥉' => K"⥉", + '⥌' => K"⥌", + '⥍' => K"⥍", + '⥏' => K"⥏", + '⥑' => K"⥑", + '⥔' => K"⥔", + '⥕' => K"⥕", + '⥘' => K"⥘", + '⥙' => K"⥙", + '⥜' => K"⥜", + '⥝' => K"⥝", + '⥠' => K"⥠", + '⥡' => K"⥡", + '⥣' => K"⥣", + '⥥' => K"⥥", + '⥮' => K"⥮", + '⥯' => K"⥯", + '↑' => K"↑", + '↓' => K"↓", + # Lookalikes which are normalized into K"⋅", + # https://github.com/JuliaLang/julia/pull/25157, + '\u00b7' => K"⋅", # '·' Middle Dot,, + '\u0387' => K"⋅", # '·' Greek Ano Teleia,, + '⋅' => K"⋅", + '…' => K"…", + '⁝' => K"⁝", + '⋮' => K"⋮", + '⋱' => K"⋱", + '⋰' => K"⋰", + '⋯' => K"⋯", + '↻' => K"↻", + '⇜' => K"⇜", + '⇝' => K"⇝", + '↜' => K"↜", + '↝' => K"↝", + '↩' => K"↩", + '↪' => K"↪", + '↫' => K"↫", + '↬' => K"↬", + '↼' => K"↼", + '↽' => K"↽", + '⇀' => K"⇀", + '⇁' => K"⇁", + '⇄' => K"⇄", + '⇆' => K"⇆", + '⇇' => K"⇇", + '⇉' => K"⇉", + '⇋' => K"⇋", + '⇌' => K"⇌", + '⇚' => K"⇚", + '⇛' => K"⇛", + '⇠' => K"⇠", + '⇢' => K"⇢", + '↷' => K"↷", + '↶' => K"↶", + '↺' => K"↺", + '¦' => K"¦", + '⌿' => K"⌿", + '⨟' => K"⨟", +) const UNICODE_OPS_REVERSE = Dict{Kind,Symbol}() for (k, v) in UNICODE_OPS @@ -1505,71 +606,75 @@ for (k, v) in UNICODE_OPS UNICODE_OPS_REVERSE[v] = Symbol(k) end -UNICODE_OPS_REVERSE[EQ] = :(=) -UNICODE_OPS_REVERSE[PLUS_EQ] = :(+=) -UNICODE_OPS_REVERSE[MINUS_EQ] = :(-=) -UNICODE_OPS_REVERSE[STAR_EQ] = :(*=) -UNICODE_OPS_REVERSE[FWD_SLASH_EQ] = :(/=) -UNICODE_OPS_REVERSE[FWDFWD_SLASH_EQ] = :(//=) -UNICODE_OPS_REVERSE[OR_EQ] = :(|=) -UNICODE_OPS_REVERSE[CIRCUMFLEX_EQ] = :(^=) -UNICODE_OPS_REVERSE[DIVISION_EQ] = :(÷=) -UNICODE_OPS_REVERSE[REM_EQ] = :(%=) -UNICODE_OPS_REVERSE[LBITSHIFT_EQ] = :(<<=) -UNICODE_OPS_REVERSE[RBITSHIFT_EQ] = :(>>=) -UNICODE_OPS_REVERSE[LBITSHIFT] = :(<<) -UNICODE_OPS_REVERSE[RBITSHIFT] = :(>>) -UNICODE_OPS_REVERSE[UNSIGNED_BITSHIFT] = :(>>>) -UNICODE_OPS_REVERSE[UNSIGNED_BITSHIFT_EQ] = :(>>>=) -UNICODE_OPS_REVERSE[BACKSLASH_EQ] = :(\=) -UNICODE_OPS_REVERSE[AND_EQ] = :(&=) -UNICODE_OPS_REVERSE[COLON_EQ] = :(:=) -UNICODE_OPS_REVERSE[PAIR_ARROW] = :(=>) -UNICODE_OPS_REVERSE[APPROX] = :(~) -UNICODE_OPS_REVERSE[EX_OR_EQ] = :($=) -UNICODE_OPS_REVERSE[XOR_EQ] = :(⊻=) -UNICODE_OPS_REVERSE[RIGHT_ARROW] = :(-->) -UNICODE_OPS_REVERSE[LAZY_OR] = :(||) -UNICODE_OPS_REVERSE[LAZY_AND] = :(&&) -UNICODE_OPS_REVERSE[ISSUBTYPE] = :(<:) -UNICODE_OPS_REVERSE[ISSUPERTYPE] = :(>:) -UNICODE_OPS_REVERSE[GREATER] = :(>) -UNICODE_OPS_REVERSE[LESS] = :(<) -UNICODE_OPS_REVERSE[GREATER_EQ] = :(>=) -UNICODE_OPS_REVERSE[GREATER_THAN_OR_EQUAL_TO] = :(≥) -UNICODE_OPS_REVERSE[LESS_EQ] = :(<=) -UNICODE_OPS_REVERSE[LESS_THAN_OR_EQUAL_TO] = :(≤) -UNICODE_OPS_REVERSE[EQEQ] = :(==) -UNICODE_OPS_REVERSE[EQEQEQ] = :(===) -UNICODE_OPS_REVERSE[IDENTICAL_TO] = :(≡) -UNICODE_OPS_REVERSE[NOT_EQ] = :(!=) -UNICODE_OPS_REVERSE[NOT_EQUAL_TO] = :(≠) -UNICODE_OPS_REVERSE[NOT_IS] = :(!==) -UNICODE_OPS_REVERSE[NOT_IDENTICAL_TO] = :(≢) -UNICODE_OPS_REVERSE[IN] = :(in) -UNICODE_OPS_REVERSE[ISA] = :(isa) -UNICODE_OPS_REVERSE[LPIPE] = :(<|) -UNICODE_OPS_REVERSE[RPIPE] = :(|>) -UNICODE_OPS_REVERSE[COLON] = :(:) -UNICODE_OPS_REVERSE[DDOT] = :(..) -UNICODE_OPS_REVERSE[EX_OR] = :($) -UNICODE_OPS_REVERSE[PLUS] = :(+) -UNICODE_OPS_REVERSE[MINUS] = :(-) -UNICODE_OPS_REVERSE[PLUSPLUS] = :(++) -UNICODE_OPS_REVERSE[OR] = :(|) -UNICODE_OPS_REVERSE[STAR] = :(*) -UNICODE_OPS_REVERSE[FWD_SLASH] = :(/) -UNICODE_OPS_REVERSE[REM] = :(%) -UNICODE_OPS_REVERSE[BACKSLASH] = :(\) -UNICODE_OPS_REVERSE[AND] = :(&) -UNICODE_OPS_REVERSE[FWDFWD_SLASH] = :(//) -UNICODE_OPS_REVERSE[CIRCUMFLEX_ACCENT] = :(^) -UNICODE_OPS_REVERSE[DECLARATION] = :(::) -UNICODE_OPS_REVERSE[CONDITIONAL] = :? -UNICODE_OPS_REVERSE[DOT] = :(.) -UNICODE_OPS_REVERSE[NOT] = :(!) -UNICODE_OPS_REVERSE[PRIME] = Symbol(''') -UNICODE_OPS_REVERSE[DDDOT] = :(...) -UNICODE_OPS_REVERSE[TRANSPOSE] = Symbol(".'") -UNICODE_OPS_REVERSE[ANON_FUNC] = :(->) -UNICODE_OPS_REVERSE[WHERE] = :where +for (k, v) in [ + K"=" => :(=) + K"+=" => :(+=) + K"-=" => :(-=) + K"*=" => :(*=) + K"/=" => :(/=) + K"//=" => :(//=) + K"|=" => :(|=) + K"^=" => :(^=) + K"÷=" => :(÷=) + K"%=" => :(%=) + K"<<=" => :(<<=) + K">>=" => :(>>=) + K"<<" => :(<<) + K">>" => :(>>) + K">>>" => :(>>>) + K">>>=" => :(>>>=) + K"\=" => :(\=) + K"&=" => :(&=) + K":=" => :(:=) + K"=>" => :(=>) + K"~" => :(~) + K"$=" => :($=) + K"⊻=" => :(⊻=) + K"-->" => :(-->) + K"||" => :(||) + K"&&" => :(&&) + K"<:" => :(<:) + K">:" => :(>:) + K">" => :(>) + K"<" => :(<) + K">=" => :(>=) + K"≥" => :(≥) + K"<=" => :(<=) + K"≤" => :(≤) + K"==" => :(==) + K"===" => :(===) + K"≡" => :(≡) + K"!=" => :(!=) + K"≠" => :(≠) + K"!==" => :(!==) + K"≢" => :(≢) + K"in" => :(in) + K"isa" => :(isa) + K"<|" => :(<|) + K"|>" => :(|>) + K":" => :(:) + K".." => :(..) + K"$" => :($) + K"+" => :(+) + K"-" => :(-) + K"++" => :(++) + K"|" => :(|) + K"*" => :(*) + K"/" => :(/) + K"%" => :(%) + K"\\" => :(\) + K"&" => :(&) + K"//" => :(//) + K"^" => :(^) + K"::" => :(::) + K"?" => :? + K"." => :(.) + K"!" => :(!) + K"'" => Symbol(''') + K"..." => :(...) + K".'" => Symbol(".'") + K"->" => :(->) + K"where" => :where + ] + UNICODE_OPS_REVERSE[k] = v +end diff --git a/JuliaSyntax/src/Tokenize/utilities.jl b/JuliaSyntax/src/Tokenize/utilities.jl index 8a051ef402c37..708ccfacc11c6 100644 --- a/JuliaSyntax/src/Tokenize/utilities.jl +++ b/JuliaSyntax/src/Tokenize/utilities.jl @@ -224,29 +224,29 @@ end end function optakessuffix(k) - (Tokens.begin_ops < k < Tokens.end_ops) && + (K"BEGIN_OPS" < k < K"END_OPS") && !( - k == Tokens.DDDOT || - Tokens.begin_assignments <= k <= Tokens.end_assignments || - k == Tokens.CONDITIONAL || - k == Tokens.ISSUBTYPE || - k == Tokens.ISSUPERTYPE || - k == Tokens.LAZY_AND || - k == Tokens.LAZY_OR || - k == Tokens.IN || - k == Tokens.ISA || - k == Tokens.COLON_EQUALS || - k == Tokens.DOUBLE_COLON_EQUAL || - k == Tokens.COLON || - k == Tokens.DDOT || - k == Tokens.EX_OR || - k == Tokens.DECLARATION || - k == Tokens.WHERE || - k == Tokens.DOT || - k == Tokens.NOT || - k == Tokens.TRANSPOSE || - k == Tokens.ANON_FUNC || - Tokens.NOT_SIGN <= k <= Tokens.QUAD_ROOT + k == K"..." || + K"BEGIN_ASSIGNMENTS" <= k <= K"END_ASSIGNMENTS" || + k == K"?" || + k == K"<:" || + k == K">:" || + k == K"&&" || + k == K"||" || + k == K"in" || + k == K"isa" || + k == K"≔" || + k == K"⩴" || + k == K":" || + k == K".." || + k == K"$" || + k == K"::" || + k == K"where" || + k == K"." || + k == K"!" || + k == K".'" || + k == K"->" || + K"¬" <= k <= K"∜" ) end diff --git a/JuliaSyntax/src/kinds.jl b/JuliaSyntax/src/kinds.jl new file mode 100644 index 0000000000000..f1260978eaf1a --- /dev/null +++ b/JuliaSyntax/src/kinds.jl @@ -0,0 +1,996 @@ +# Definition of Kind type - mapping from token string identifiers to +# enumeration values as used in @K_str +const _kind_names = +[ + "None" # Placeholder; never emitted by lexer + "EndMarker" # EOF + "Comment" + "Whitespace" + "NewlineWs" # newline-containing whitespace + "Identifier" + "@" + "," + ";" + + "BEGIN_ERRORS" + # Tokenization errors + "ErrorEofMultiComment" + "ErrorEofChar" + "ErrorInvalidNumericConstant" + "ErrorInvalidOperator" + "ErrorInvalidInterpolationTerminator" + # Generic error + "error" + "END_ERRORS" + + "BEGIN_KEYWORDS" + "baremodule" + "begin" + "break" + "catch" + "const" + "continue" + "do" + "else" + "elseif" + "end" + "export" + "finally" + "for" + "function" + "global" + "if" + "import" + "let" + "local" + "macro" + "module" + "quote" + "return" + "struct" + "try" + "using" + "while" + "BEGIN_CONTEXTUAL_KEYWORDS" + # contextual keywords + "abstract" + "as" + "doc" + "mutable" + "outer" + "primitive" + "type" + "var" + "END_CONTEXTUAL_KEYWORDS" + "END_KEYWORDS" + + "BEGIN_LITERAL" + "Integer" + "BinInt" + "HexInt" + "OctInt" + "Float" + "String" + "Char" + "CmdString" + "true" + "false" + "nothing" # A literal `nothing` + "END_LITERAL" + + "BEGIN_DELIMITERS" + "[" + "]" + "{" + "}" + "(" + ")" + "\"" + "\"\"\"" + "`" + "```" + "END_DELIMITERS" + + "BEGIN_OPS" + "..." + + # Level 1 + "BEGIN_ASSIGNMENTS" + "=" + "+=" + "-=" + "*=" + "/=" + "//=" + "|=" + "^=" + "÷=" + "%=" + "<<=" + ">>=" + ">>>=" + "\\=" + "&=" + ":=" + "~" + "\$=" + "⊻=" + "END_ASSIGNMENTS" + + "BEGIN_PAIRARROW" + "=>" + "END_PAIRARROW" + + # Level 2 + "BEGIN_CONDITIONAL" + "?" + "END_CONDITIONAL" + + # Level 3 + "BEGIN_ARROW" + "-->" + "<--" + "<-->" + "←" + "→" + "↔" + "↚" + "↛" + "↞" + "↠" + "↢" + "↣" + "↤" + "↦" + "↮" + "⇎" + "⇍" + "⇏" + "⇐" + "⇒" + "⇔" + "⇴" + "⇶" + "⇷" + "⇸" + "⇹" + "⇺" + "⇻" + "⇼" + "⇽" + "⇾" + "⇿" + "⟵" + "⟶" + "⟷" + "⟹" + "⟺" + "⟻" + "⟼" + "⟽" + "⟾" + "⟿" + "⤀" + "⤁" + "⤂" + "⤃" + "⤄" + "⤅" + "⤆" + "⤇" + "⤌" + "⤍" + "⤎" + "⤏" + "⤐" + "⤑" + "⤔" + "⤕" + "⤖" + "⤗" + "⤘" + "⤝" + "⤞" + "⤟" + "⤠" + "⥄" + "⥅" + "⥆" + "⥇" + "⥈" + "⥊" + "⥋" + "⥎" + "⥐" + "⥒" + "⥓" + "⥖" + "⥗" + "⥚" + "⥛" + "⥞" + "⥟" + "⥢" + "⥤" + "⥦" + "⥧" + "⥨" + "⥩" + "⥪" + "⥫" + "⥬" + "⥭" + "⥰" + "⧴" + "⬱" + "⬰" + "⬲" + "⬳" + "⬴" + "⬵" + "⬶" + "⬷" + "⬸" + "⬹" + "⬺" + "⬻" + "⬼" + "⬽" + "⬾" + "⬿" + "⭀" + "⭁" + "⭂" + "⭃" + "⭄" + "⭇" + "⭈" + "⭉" + "⭊" + "⭋" + "⭌" + "←" + "→" + "↻" + "⇜" + "⇝" + "↜" + "↝" + "↩" + "↪" + "↫" + "↬" + "↼" + "↽" + "⇀" + "⇁" + "⇄" + "⇆" + "⇇" + "⇉" + "⇋" + "⇌" + "⇚" + "⇛" + "⇠" + "⇢" + "↷" + "↶" + "↺" + "END_ARROW" + + # Level 4 + "BEGIN_LAZYOR" + "||" + "END_LAZYOR" + + # Level 5 + "BEGIN_LAZYAND" + "&&" + "END_LAZYAND" + + # Level 6 + "BEGIN_COMPARISON" + "<:" + ">:" + ">" + "<" + ">=" + "≥" + "<=" + "≤" + "==" + "===" + "≡" + "!=" + "≠" + "!==" + "≢" + "∈" + "in" + "isa" + "∉" + "∋" + "∌" + "⊆" + "⊈" + "⊂" + "⊄" + "⊊" + "∝" + "∊" + "∍" + "∥" + "∦" + "∷" + "∺" + "∻" + "∽" + "∾" + "≁" + "≃" + "≄" + "≅" + "≆" + "≇" + "≈" + "≉" + "≊" + "≋" + "≌" + "≍" + "≎" + "≐" + "≑" + "≒" + "≓" + "≔" + "≕" + "≖" + "≗" + "≘" + "≙" + "≚" + "≛" + "≜" + "≝" + "≞" + "≟" + "≣" + "≦" + "≧" + "≨" + "≩" + "≪" + "≫" + "≬" + "≭" + "≮" + "≯" + "≰" + "≱" + "≲" + "≳" + "≴" + "≵" + "≶" + "≷" + "≸" + "≹" + "≺" + "≻" + "≼" + "≽" + "≾" + "≿" + "⊀" + "⊁" + "⊃" + "⊅" + "⊇" + "⊉" + "⊋" + "⊏" + "⊐" + "⊑" + "⊒" + "⊜" + "⊩" + "⊬" + "⊮" + "⊰" + "⊱" + "⊲" + "⊳" + "⊴" + "⊵" + "⊶" + "⊷" + "⋍" + "⋐" + "⋑" + "⋕" + "⋖" + "⋗" + "⋘" + "⋙" + "⋚" + "⋛" + "⋜" + "⋝" + "⋞" + "⋟" + "⋠" + "⋡" + "⋢" + "⋣" + "⋤" + "⋥" + "⋦" + "⋧" + "⋨" + "⋩" + "⋪" + "⋫" + "⋬" + "⋭" + "⋲" + "⋳" + "⋴" + "⋵" + "⋶" + "⋷" + "⋸" + "⋹" + "⋺" + "⋻" + "⋼" + "⋽" + "⋾" + "⋿" + "⟈" + "⟉" + "⟒" + "⦷" + "⧀" + "⧁" + "⧡" + "⧣" + "⧤" + "⧥" + "⩦" + "⩧" + "⩪" + "⩫" + "⩬" + "⩭" + "⩮" + "⩯" + "⩰" + "⩱" + "⩲" + "⩳" + "⩴" + "⩵" + "⩶" + "⩷" + "⩸" + "⩹" + "⩺" + "⩻" + "⩼" + "⩽" + "⩾" + "⩿" + "⪀" + "⪁" + "⪂" + "⪃" + "⪄" + "⪅" + "⪆" + "⪇" + "⪈" + "⪉" + "⪊" + "⪋" + "⪌" + "⪍" + "⪎" + "⪏" + "⪐" + "⪑" + "⪒" + "⪓" + "⪔" + "⪕" + "⪖" + "⪗" + "⪘" + "⪙" + "⪚" + "⪛" + "⪜" + "⪝" + "⪞" + "⪟" + "⪠" + "⪡" + "⪢" + "⪣" + "⪤" + "⪥" + "⪦" + "⪧" + "⪨" + "⪩" + "⪪" + "⪫" + "⪬" + "⪭" + "⪮" + "⪯" + "⪰" + "⪱" + "⪲" + "⪳" + "⪴" + "⪵" + "⪶" + "⪷" + "⪸" + "⪹" + "⪺" + "⪻" + "⪼" + "⪽" + "⪾" + "⪿" + "⫀" + "⫁" + "⫂" + "⫃" + "⫄" + "⫅" + "⫆" + "⫇" + "⫈" + "⫉" + "⫊" + "⫋" + "⫌" + "⫍" + "⫎" + "⫏" + "⫐" + "⫑" + "⫒" + "⫓" + "⫔" + "⫕" + "⫖" + "⫗" + "⫘" + "⫙" + "⫷" + "⫸" + "⫹" + "⫺" + "⊢" + "⊣" + # ⫪,⫫ see https://github.com/JuliaLang/julia/issues/39350 + "⫪" + "⫫" + "⟂" + "END_COMPARISON" + + # Level 7 + "BEGIN_PIPE" + "<|" + "|>" + "END_PIPE" + + # Level 8 + "BEGIN_COLON" + ":" + ".." + "…" + "⁝" + "⋮" + "⋱" + "⋰" + "⋯" + "END_COLON" + + # Level 9 + "BEGIN_PLUS" + "\$" + "+" + "-" + "++" + "⊕" + "⊖" + "⊞" + "⊟" + "|" + "∪" + "∨" + "⊔" + "±" + "∓" + "∔" + "∸" + "≂" + "≏" + "⊎" + "⊻" + "⊽" + "⋎" + "⋓" + "⧺" + "⧻" + "⨈" + "⨢" + "⨣" + "⨤" + "⨥" + "⨦" + "⨧" + "⨨" + "⨩" + "⨪" + "⨫" + "⨬" + "⨭" + "⨮" + "⨹" + "⨺" + "⩁" + "⩂" + "⩅" + "⩊" + "⩌" + "⩏" + "⩐" + "⩒" + "⩔" + "⩖" + "⩗" + "⩛" + "⩝" + "⩡" + "⩢" + "⩣" + "¦" + "END_PLUS" + + # Level 10 + "BEGIN_BITSHIFTS" + "<<" + ">>" + ">>>" + "END_BITSHIFTS" + + # Level 11 + "BEGIN_TIMES" + "*" + "/" + "÷" + "%" + "⋅" + "∘" + "×" + "\\" + "&" + "∩" + "∧" + "⊗" + "⊘" + "⊙" + "⊚" + "⊛" + "⊠" + "⊡" + "⊓" + "∗" + "∙" + "∤" + "⅋" + "≀" + "⊼" + "⋄" + "⋆" + "⋇" + "⋉" + "⋊" + "⋋" + "⋌" + "⋏" + "⋒" + "⟑" + "⦸" + "⦼" + "⦾" + "⦿" + "⧶" + "⧷" + "⨇" + "⨰" + "⨱" + "⨲" + "⨳" + "⨴" + "⨵" + "⨶" + "⨷" + "⨸" + "⨻" + "⨼" + "⨽" + "⩀" + "⩃" + "⩄" + "⩋" + "⩍" + "⩎" + "⩑" + "⩓" + "⩕" + "⩘" + "⩚" + "⩜" + "⩞" + "⩟" + "⩠" + "⫛" + "⊍" + "▷" + "⨝" + "⟕" + "⟖" + "⟗" + "⌿" + "⨟" + "END_TIMES" + + # Level 12 + "BEGIN_RATIONAL" + "//" + "END_RATIONAL" + + # Level 13 + "BEGIN_POWER" + "^" + "↑" + "↓" + "⇵" + "⟰" + "⟱" + "⤈" + "⤉" + "⤊" + "⤋" + "⤒" + "⤓" + "⥉" + "⥌" + "⥍" + "⥏" + "⥑" + "⥔" + "⥕" + "⥘" + "⥙" + "⥜" + "⥝" + "⥠" + "⥡" + "⥣" + "⥥" + "⥮" + "⥯" + "↑" + "↓" + "END_POWER" + + # Level 14 + "BEGIN_DECL" + "::" + "END_DECL" + + # Level 15 + "BEGIN_WHERE" + "where" + "END_WHERE" + + # Level 16 + "BEGIN_DOT" + "." + "END_DOT" + + "!" + "'" + ".'" + "->" + + "BEGIN_UNICODE_OPS" + "¬" + "√" + "∛" + "∜" + "END_UNICODE_OPS" + "END_OPS" + + # The following kinds are emitted by the parser. There's two types of these: + + # 1. Implied tokens which have a position but might have zero width in the + # source text. + # + # In some cases we want to generate parse tree nodes in a standard form, + # but some of the leaf tokens are implied rather than existing in the + # source text, or the lexed tokens need to be re-kinded to represent + # special forms which only the parser can infer. These are "parser tokens". + # + # Some examples: + # + # Docstrings - the macro name is invisible + # "doc" foo() = 1 ==> (macrocall (core @doc) . (= (call foo) 1)) + # + # String macros - the macro name does not appear in the source text, so we + # need a special kind of token to imply it. + # + # In these cases, we use some special kinds which can be emitted as zero + # width tokens to keep the parse tree more uniform. + "BEGIN_PARSER_TOKENS" + + "TOMBSTONE" # Empty placeholder for kind to be filled later + + # Macro names are modelled as a special kind of identifier because the + # @ may not be attached to the macro name in the source (or may not be + # associated with a token at all in the case of implied macro calls + # like CORE_DOC_MACRO_NAME) + "BEGIN_MACRO_NAMES" + "MacroName" + "@." + "StringMacroName" + "CmdMacroName" + "core_@doc" + "core_@cmd" + "core_@int128_str" + "core_@uint128_str" + "core_@big_str" + "END_MACRO_NAMES" + "END_PARSER_TOKENS" + + # 2. Nonterminals which are exposed in the AST, but where the surface + # syntax doesn't have a token corresponding to the node type. + "BEGIN_SYNTAX_KINDS" + "block" + "call" + "comparison" + "curly" + "inert" # QuoteNode; not quasiquote + "string" # A string interior node (possibly containing interpolations) + "macrocall" + "kw" # the = in f(a=1) + "parameters" # the list after ; in f(; a=1) + "toplevel" + "tuple" + "ref" + "vect" + # Concatenation syntax + "braces" + "bracescat" + "hcat" + "vcat" + "ncat" + "typed_hcat" + "typed_vcat" + "typed_ncat" + "row" + "nrow" + # Comprehensions + "generator" + "filter" + "flatten" + "comprehension" + "typed_comprehension" + "END_SYNTAX_KINDS" +] + +""" + K"name" + Kind(namestr) + +`Kind` is a type tag for specifying the type of tokens and interior nodes of +a syntax tree. Abstractly, this tag is used to define our own *sum types* for +syntax tree nodes. We do this explicitly outside the Julia type system because +(a) Julia doesn't have sum types and (b) we want concrete data structures which +are unityped from the Julia compiler's point of view, for efficiency. + +Naming rules: +* Kinds which correspond to exactly one textural form are represented with that + text. This includes keywords like K"for" and operators like K"*". +* Kinds which represent many textural forms have UpperCamelCase names. This + includes kinds like K"Identifier" and K"Comment". +* Kinds which exist merely as delimiters are all uppercase +""" +primitive type Kind 16 end + +# The implementation of Kind here is basically similar to @enum. However we use +# the K_str macro to self-name these kinds with their literal representation, +# rather than needing to invent a new name for each. + +let kind_int_type = :UInt16, + max_kind_int = length(_kind_names)-1 + + @eval begin + function Kind(x::Integer) + if x < 0 || x > $max_kind_int + throw(ArgumentError("Kind out of range: $x")) + end + return Base.bitcast(Kind, convert($kind_int_type, x)) + end + + Base.convert(::Type{String}, k::Kind) = _kind_names[1 + Base.bitcast($kind_int_type, k)] + + let kindstr_to_int = Dict(s=>i-1 for (i,s) in enumerate(_kind_names)) + function Base.convert(::Type{Kind}, s::AbstractString) + i = get(kindstr_to_int, s) do + error("unknown Kind name $(repr(s))") + end + Kind(i) + end + end + + Base.string(x::Kind) = convert(String, x) + Base.print(io::IO, x::Kind) = print(io, convert(String, x)) + + Base.typemin(::Type{Kind}) = Kind(0) + Base.typemax(::Type{Kind}) = Kind($max_kind_int) + + Base.:<(x::Kind, y::Kind) = reinterpret($kind_int_type, x) < reinterpret($kind_int_type, y) + + Base.instances(::Type{Kind}) = (Kind(i) for i in reinterpret($kind_int_type, typemin(Kind)):reinterpret($kind_int_type, typemax(Kind))) + end +end + +function Base.show(io::IO, k::Kind) + print(io, "K\"$(convert(String, k))\"") +end + +#------------------------------------------------------------------------------- + +""" + K"s" + +The kind of a token or AST internal node with string "s". + +For example +* K")" is the kind of the right parenthesis token +* K"block" is the kind of a block of code (eg, statements within a begin-end). +""" +macro K_str(s) + convert(Kind, s) +end + +""" +A set of kinds which can be used with the `in` operator. For example + + k in KSet"+ - *" +""" +macro KSet_str(str) + kinds = [convert(Kind, s) for s in split(str)] + + quote + ($(kinds...),) + end +end + +""" + kind(x) + +Return the `Kind` of `x`. +""" +kind(k::Kind) = k + diff --git a/JuliaSyntax/src/token_kinds.jl b/JuliaSyntax/src/token_kinds.jl deleted file mode 100644 index 61680d1ea6a45..0000000000000 --- a/JuliaSyntax/src/token_kinds.jl +++ /dev/null @@ -1,908 +0,0 @@ -# Mapping from token string identifiers to enumeration values as used in @K_str -# -# TODO: Unify Tokenize with this approach so we don't need to write these out -# in two places. - -const _str_to_kind = let Ts = TzTokens -Dict([ -"None" => Ts.NONE -"EndMarker" => Ts.ENDMARKER -"Comment" => Ts.COMMENT -"Whitespace" => Ts.WHITESPACE -"Identifier" => Ts.IDENTIFIER -"@" => Ts.AT_SIGN -"," => Ts.COMMA -";" => Ts.SEMICOLON - -"BEGIN_ERRORS" => Ts.begin_errors -# Tokenization errors -"ErrorEofMultiComment" => Ts.EOF_MULTICOMMENT -"ErrorEofChar" => Ts.EOF_CHAR -"ErrorInvalidNumericConstant" => Ts.INVALID_NUMERIC_CONSTANT -"ErrorInvalidOperator" => Ts.INVALID_OPERATOR -"ErrorInvalidInterpolationTerminator" => Ts.INVALID_INTERPOLATION_TERMINATOR -# Generic error -"error" => Ts.ERROR -"END_ERRORS" => Ts.end_errors - -"BEGIN_KEYWORDS" => Ts.begin_keywords -"baremodule" => Ts.BAREMODULE -"begin" => Ts.BEGIN -"break" => Ts.BREAK -"catch" => Ts.CATCH -"const" => Ts.CONST -"continue" => Ts.CONTINUE -"do" => Ts.DO -"else" => Ts.ELSE -"elseif" => Ts.ELSEIF -"end" => Ts.END -"export" => Ts.EXPORT -"finally" => Ts.FINALLY -"for" => Ts.FOR -"function" => Ts.FUNCTION -"global" => Ts.GLOBAL -"if" => Ts.IF -"import" => Ts.IMPORT -"let" => Ts.LET -"local" => Ts.LOCAL -"macro" => Ts.MACRO -"module" => Ts.MODULE -"quote" => Ts.QUOTE -"return" => Ts.RETURN -"struct" => Ts.STRUCT -"try" => Ts.TRY -"using" => Ts.USING -"while" => Ts.WHILE -# contextual keywords -"abstract" => Ts.ABSTRACT -"as" => Ts.AS -"doc" => Ts.DOC -"mutable" => Ts.MUTABLE -"outer" => Ts.OUTER -"primitive" => Ts.PRIMITIVE -"type" => Ts.TYPE -"var" => Ts.VAR -"END_KEYWORDS" => Ts.end_keywords - -"BEGIN_CSTPARSER" => Ts.begin_cstparser -"nothing" => Ts.NOTHING -"NewlineWs" => Ts.NEWLINE_WS -"END_CSTPARSER" => Ts.end_cstparser - -"BEGIN_LITERAL" => Ts.begin_literal -"Integer" => Ts.INTEGER -"BinInt" => Ts.BIN_INT -"HexInt" => Ts.HEX_INT -"OctInt" => Ts.OCT_INT -"Float" => Ts.FLOAT -"String" => Ts.STRING -"Char" => Ts.CHAR -"CmdString" => Ts.CMD -"true" => Ts.TRUE -"false" => Ts.FALSE -"END_LITERAL" => Ts.end_literal - -"BEGIN_DELIMITERS" => Ts.begin_delimiters -"[" => Ts.LSQUARE -"]" => Ts.RSQUARE -"{" => Ts.LBRACE -"}" => Ts.RBRACE -"(" => Ts.LPAREN -")" => Ts.RPAREN -"\"" => Ts.DQUOTE -"\"\"\"" => Ts.TRIPLE_DQUOTE -"`" => Ts.BACKTICK -"```" => Ts.TRIPLE_BACKTICK -"END_DELIMITERS" => Ts.end_delimiters - -"BEGIN_OPS" => Ts.begin_ops -"..." => Ts.DDDOT - -# Level 1 -"BEGIN_ASSIGNMENTS" => Ts.begin_assignments -"=" => Ts.EQ -"+=" => Ts.PLUS_EQ -"-=" => Ts.MINUS_EQ -"*=" => Ts.STAR_EQ -"/=" => Ts.FWD_SLASH_EQ -"//=" => Ts.FWDFWD_SLASH_EQ -"|=" => Ts.OR_EQ -"^=" => Ts.CIRCUMFLEX_EQ -"÷=" => Ts.DIVISION_EQ -"%=" => Ts.REM_EQ -"<<=" => Ts.LBITSHIFT_EQ -">>=" => Ts.RBITSHIFT_EQ -">>>=" => Ts.UNSIGNED_BITSHIFT_EQ -"\\=" => Ts.BACKSLASH_EQ -"&=" => Ts.AND_EQ -":=" => Ts.COLON_EQ -"~" => Ts.APPROX -"\$=" => Ts.EX_OR_EQ -"⊻=" => Ts.XOR_EQ -"END_ASSIGNMENTS" => Ts.end_assignments - -"BEGIN_PAIRARROW" => Ts.begin_pairarrow -"=>" => Ts.PAIR_ARROW -"END_PAIRARROW" => Ts.end_pairarrow - -# Level 2 -"BEGIN_CONDITIONAL" => Ts.begin_conditional -"?" => Ts.CONDITIONAL -"END_CONDITIONAL" => Ts.end_conditional - -# Level 3 -"BEGIN_ARROW" => Ts.begin_arrow -"-->" => Ts.RIGHT_ARROW -"<--" => Ts.LEFT_ARROW -"<-->" => Ts.DOUBLE_ARROW -"←" => Ts.LEFTWARDS_ARROW -"→" => Ts.RIGHTWARDS_ARROW -"↔" => Ts.LEFT_RIGHT_ARROW -"↚" => Ts.LEFTWARDS_ARROW_WITH_STROKE -"↛" => Ts.RIGHTWARDS_ARROW_WITH_STROKE -"↞" => Ts.LEFTWARDS_TWO_HEADED_ARROW -"↠" => Ts.RIGHTWARDS_TWO_HEADED_ARROW -"↢" => Ts.LEFTWARDS_ARROW_WITH_TAIL -"↣" => Ts.RIGHTWARDS_ARROW_WITH_TAIL -"↤" => Ts.LEFTWARDS_ARROW_FROM_BAR -"↦" => Ts.RIGHTWARDS_ARROW_FROM_BAR -"↮" => Ts.LEFT_RIGHT_ARROW_WITH_STROKE -"⇎" => Ts.LEFT_RIGHT_DOUBLE_ARROW_WITH_STROKE -"⇍" => Ts.LEFTWARDS_DOUBLE_ARROW_WITH_STROKE -"⇏" => Ts.RIGHTWARDS_DOUBLE_ARROW_WITH_STROKE -"⇐" => Ts.LEFTWARDS_DOUBLE_ARROW -"⇒" => Ts.RIGHTWARDS_DOUBLE_ARROW -"⇔" => Ts.LEFT_RIGHT_DOUBLE_ARROW -"⇴" => Ts.RIGHT_ARROW_WITH_SMALL_CIRCLE -"⇶" => Ts.THREE_RIGHTWARDS_ARROWS -"⇷" => Ts.LEFTWARDS_ARROW_WITH_VERTICAL_STROKE -"⇸" => Ts.RIGHTWARDS_ARROW_WITH_VERTICAL_STROKE -"⇹" => Ts.LEFT_RIGHT_ARROW_WITH_VERTICAL_STROKE -"⇺" => Ts.LEFTWARDS_ARROW_WITH_DOUBLE_VERTICAL_STROKE -"⇻" => Ts.RIGHTWARDS_ARROW_WITH_DOUBLE_VERTICAL_STROKE -"⇼" => Ts.LEFT_RIGHT_ARROW_WITH_DOUBLE_VERTICAL_STROKE -"⇽" => Ts.LEFTWARDS_OPEN_HEADED_ARROW -"⇾" => Ts.RIGHTWARDS_OPEN_HEADED_ARROW -"⇿" => Ts.LEFT_RIGHT_OPEN_HEADED_ARROW -"⟵" => Ts.LONG_LEFTWARDS_ARROW -"⟶" => Ts.LONG_RIGHTWARDS_ARROW -"⟷" => Ts.LONG_LEFT_RIGHT_ARROW -"⟹" => Ts.LONG_RIGHTWARDS_DOUBLE_ARROW -"⟺" => Ts.LONG_LEFT_RIGHT_DOUBLE_ARROW -"⟻" => Ts.LONG_LEFTWARDS_ARROW_FROM_BAR -"⟼" => Ts.LONG_RIGHTWARDS_ARROW_FROM_BAR -"⟽" => Ts.LONG_LEFTWARDS_DOUBLE_ARROW_FROM_BAR -"⟾" => Ts.LONG_RIGHTWARDS_DOUBLE_ARROW_FROM_BAR -"⟿" => Ts.LONG_RIGHTWARDS_SQUIGGLE_ARROW -"⤀" => Ts.RIGHTWARDS_TWO_HEADED_ARROW_WITH_VERTICAL_STROKE -"⤁" => Ts.RIGHTWARDS_TWO_HEADED_ARROW_WITH_DOUBLE_VERTICAL_STROKE -"⤂" => Ts.LEFTWARDS_DOUBLE_ARROW_WITH_VERTICAL_STROKE -"⤃" => Ts.RIGHTWARDS_DOUBLE_ARROW_WITH_VERTICAL_STROKE -"⤄" => Ts.LEFT_RIGHT_DOUBLE_ARROW_WITH_VERTICAL_STROKE -"⤅" => Ts.RIGHTWARDS_TWO_HEADED_ARROW_FROM_BAR -"⤆" => Ts.LEFTWARDS_DOUBLE_ARROW_FROM_BAR -"⤇" => Ts.RIGHTWARDS_DOUBLE_ARROW_FROM_BAR -"⤌" => Ts.LEFTWARDS_DOUBLE_DASH_ARROW -"⤍" => Ts.RIGHTWARDS_DOUBLE_DASH_ARROW -"⤎" => Ts.LEFTWARDS_TRIPLE_DASH_ARROW -"⤏" => Ts.RIGHTWARDS_TRIPLE_DASH_ARROW -"⤐" => Ts.RIGHTWARDS_TWO_HEADED_TRIPLE_DASH_ARROW -"⤑" => Ts.RIGHTWARDS_ARROW_WITH_DOTTED_STEM -"⤔" => Ts.RIGHTWARDS_ARROW_WITH_TAIL_WITH_VERTICAL_STROKE -"⤕" => Ts.RIGHTWARDS_ARROW_WITH_TAIL_WITH_DOUBLE_VERTICAL_STROKE -"⤖" => Ts.RIGHTWARDS_TWO_HEADED_ARROW_WITH_TAIL -"⤗" => Ts.RIGHTWARDS_TWO_HEADED_ARROW_WITH_TAIL_WITH_VERTICAL_STROKE -"⤘" => Ts.RIGHTWARDS_TWO_HEADED_ARROW_WITH_TAIL_WITH_DOUBLE_VERTICAL_STROKE -"⤝" => Ts.LEFTWARDS_ARROW_TO_BLACK_DIAMOND -"⤞" => Ts.RIGHTWARDS_ARROW_TO_BLACK_DIAMOND -"⤟" => Ts.LEFTWARDS_ARROW_FROM_BAR_TO_BLACK_DIAMOND -"⤠" => Ts.RIGHTWARDS_ARROW_FROM_BAR_TO_BLACK_DIAMOND -"⥄" => Ts.SHORT_RIGHTWARDS_ARROW_ABOVE_LEFTWARDS_ARROW -"⥅" => Ts.RIGHTWARDS_ARROW_WITH_PLUS_BELOW -"⥆" => Ts.LEFTWARDS_ARROW_WITH_PLUS_BELOW -"⥇" => Ts.RIGHTWARDS_ARROW_THROUGH_X -"⥈" => Ts.LEFT_RIGHT_ARROW_THROUGH_SMALL_CIRCLE -"⥊" => Ts.LEFT_BARB_UP_RIGHT_BARB_DOWN_HARPOON -"⥋" => Ts.LEFT_BARB_DOWN_RIGHT_BARB_UP_HARPOON -"⥎" => Ts.LEFT_BARB_UP_RIGHT_BARB_UP_HARPOON -"⥐" => Ts.LEFT_BARB_DOWN_RIGHT_BARB_DOWN_HARPOON -"⥒" => Ts.LEFTWARDS_HARPOON_WITH_BARB_UP_TO_BAR -"⥓" => Ts.RIGHTWARDS_HARPOON_WITH_BARB_UP_TO_BAR -"⥖" => Ts.LEFTWARDS_HARPOON_WITH_BARB_DOWN_TO_BAR -"⥗" => Ts.RIGHTWARDS_HARPOON_WITH_BARB_DOWN_TO_BAR -"⥚" => Ts.LEFTWARDS_HARPOON_WITH_BARB_UP_FROM_BAR -"⥛" => Ts.RIGHTWARDS_HARPOON_WITH_BARB_UP_FROM_BAR -"⥞" => Ts.LEFTWARDS_HARPOON_WITH_BARB_DOWN_FROM_BAR -"⥟" => Ts.RIGHTWARDS_HARPOON_WITH_BARB_DOWN_FROM_BAR -"⥢" => Ts.LEFTWARDS_HARPOON_WITH_BARB_UP_ABOVE_LEFTWARDS_HARPOON_WITH_BARB_DOWN -"⥤" => Ts.RIGHTWARDS_HARPOON_WITH_BARB_UP_ABOVE_RIGHTWARDS_HARPOON_WITH_BARB_DOWN -"⥦" => Ts.LEFTWARDS_HARPOON_WITH_BARB_UP_ABOVE_RIGHTWARDS_HARPOON_WITH_BARB_UP -"⥧" => Ts.LEFTWARDS_HARPOON_WITH_BARB_DOWN_ABOVE_RIGHTWARDS_HARPOON_WITH_BARB_DOWN -"⥨" => Ts.RIGHTWARDS_HARPOON_WITH_BARB_UP_ABOVE_LEFTWARDS_HARPOON_WITH_BARB_UP -"⥩" => Ts.RIGHTWARDS_HARPOON_WITH_BARB_DOWN_ABOVE_LEFTWARDS_HARPOON_WITH_BARB_DOWN -"⥪" => Ts.LEFTWARDS_HARPOON_WITH_BARB_UP_ABOVE_LONG_DASH -"⥫" => Ts.LEFTWARDS_HARPOON_WITH_BARB_DOWN_BELOW_LONG_DASH -"⥬" => Ts.RIGHTWARDS_HARPOON_WITH_BARB_UP_ABOVE_LONG_DASH -"⥭" => Ts.RIGHTWARDS_HARPOON_WITH_BARB_DOWN_BELOW_LONG_DASH -"⥰" => Ts.RIGHT_DOUBLE_ARROW_WITH_ROUNDED_HEAD -"⧴" => Ts.RULE_DELAYED -"⬱" => Ts.THREE_LEFTWARDS_ARROWS -"⬰" => Ts.LEFT_ARROW_WITH_SMALL_CIRCLE -"⬲" => Ts.LEFT_ARROW_WITH_CIRCLED_PLUS -"⬳" => Ts.LONG_LEFTWARDS_SQUIGGLE_ARROW -"⬴" => Ts.LEFTWARDS_TWO_HEADED_ARROW_WITH_VERTICAL_STROKE -"⬵" => Ts.LEFTWARDS_TWO_HEADED_ARROW_WITH_DOUBLE_VERTICAL_STROKE -"⬶" => Ts.LEFTWARDS_TWO_HEADED_ARROW_FROM_BAR -"⬷" => Ts.LEFTWARDS_TWO_HEADED_TRIPLE_DASH_ARROW -"⬸" => Ts.LEFTWARDS_ARROW_WITH_DOTTED_STEM -"⬹" => Ts.LEFTWARDS_ARROW_WITH_TAIL_WITH_VERTICAL_STROKE -"⬺" => Ts.LEFTWARDS_ARROW_WITH_TAIL_WITH_DOUBLE_VERTICAL_STROKE -"⬻" => Ts.LEFTWARDS_TWO_HEADED_ARROW_WITH_TAIL -"⬼" => Ts.LEFTWARDS_TWO_HEADED_ARROW_WITH_TAIL_WITH_VERTICAL_STROKE -"⬽" => Ts.LEFTWARDS_TWO_HEADED_ARROW_WITH_TAIL_WITH_DOUBLE_VERTICAL_STROKE -"⬾" => Ts.LEFTWARDS_ARROW_THROUGH_X -"⬿" => Ts.WAVE_ARROW_POINTING_DIRECTLY_LEFT -"⭀" => Ts.EQUALS_SIGN_ABOVE_LEFTWARDS_ARROW -"⭁" => Ts.REVERSE_TILDE_OPERATOR_ABOVE_LEFTWARDS_ARROW -"⭂" => Ts.LEFTWARDS_ARROW_ABOVE_REVERSE_ALMOST_EQUAL_TO -"⭃" => Ts.RIGHTWARDS_ARROW_THROUGH_GREATER_THAN -"⭄" => Ts.RIGHTWARDS_ARROW_THROUGH_SUPERSET -"⭇" => Ts.REVERSE_TILDE_OPERATOR_ABOVE_RIGHTWARDS_ARROW -"⭈" => Ts.RIGHTWARDS_ARROW_ABOVE_REVERSE_ALMOST_EQUAL_TO -"⭉" => Ts.TILDE_OPERATOR_ABOVE_LEFTWARDS_ARROW -"⭊" => Ts.LEFTWARDS_ARROW_ABOVE_ALMOST_EQUAL_TO -"⭋" => Ts.LEFTWARDS_ARROW_ABOVE_REVERSE_TILDE_OPERATOR -"⭌" => Ts.RIGHTWARDS_ARROW_ABOVE_REVERSE_TILDE_OPERATOR -"←" => Ts.HALFWIDTH_LEFTWARDS_ARROW -"→" => Ts.HALFWIDTH_RIGHTWARDS_ARROW -"↻" => Ts.CIRCLE_ARROW_RIGHT -"⇜" => Ts.LEFT_SQUIGGLE_ARROW -"⇝" => Ts.RIGHT_SQUIGGLE_ARROW -"↜" => Ts.LEFT_WAVE_ARROW -"↝" => Ts.RIGHT_WAVE_ARROW -"↩" => Ts.LEFTWARDS_ARROW_WITH_HOOK -"↪" => Ts.RIGHTWARDS_ARROW_WITH_HOOK -"↫" => Ts.LOOP_ARROW_LEFT -"↬" => Ts.LOOP_ARROW_RIGHT -"↼" => Ts.LEFT_HARPOON_UP -"↽" => Ts.LEFT_HARPOON_DOWN -"⇀" => Ts.RIGHT_HARPOON_UP -"⇁" => Ts.RIGHT_HARPOON_DOWN -"⇄" => Ts.RIGHT_LEFT_ARROWS -"⇆" => Ts.LEFT_RIGHT_ARROWS -"⇇" => Ts.LEFT_LEFT_ARROWS -"⇉" => Ts.RIGHT_RIGHT_ARROWS -"⇋" => Ts.LEFT_RIGHT_HARPOONS -"⇌" => Ts.RIGHT_LEFT_HARPOONS -"⇚" => Ts.L_LEFT_ARROW -"⇛" => Ts.R_RIGHT_ARROW -"⇠" => Ts.LEFT_DASH_ARROW -"⇢" => Ts.RIGHT_DASH_ARROW -"↷" => Ts.CURVE_ARROW_RIGHT -"↶" => Ts.CURVE_ARROW_LEFT -"↺" => Ts.CIRCLE_ARROW_LEFT -"END_ARROW" => Ts.end_arrow - -# Level 4 -"BEGIN_LAZYOR" => Ts.begin_lazyor -"||" => Ts.LAZY_OR -"END_LAZYOR" => Ts.end_lazyor - -# Level 5 -"BEGIN_LAZYAND" => Ts.begin_lazyand -"&&" => Ts.LAZY_AND -"END_LAZYAND" => Ts.end_lazyand - -# Level 6 -"BEGIN_COMPARISON" => Ts.begin_comparison -"<:" => Ts.ISSUBTYPE -">:" => Ts.ISSUPERTYPE -">" => Ts.GREATER -"<" => Ts.LESS -">=" => Ts.GREATER_EQ -"≥" => Ts.GREATER_THAN_OR_EQUAL_TO -"<=" => Ts.LESS_EQ -"≤" => Ts.LESS_THAN_OR_EQUAL_TO -"==" => Ts.EQEQ -"===" => Ts.EQEQEQ -"≡" => Ts.IDENTICAL_TO -"!=" => Ts.NOT_EQ -"≠" => Ts.NOT_EQUAL_TO -"!==" => Ts.NOT_IS -"≢" => Ts.NOT_IDENTICAL_TO -"∈" => Ts.ELEMENT_OF -"in" => Ts.IN -"isa" => Ts.ISA -"∉" => Ts.NOT_AN_ELEMENT_OF -"∋" => Ts.CONTAINS_AS_MEMBER -"∌" => Ts.DOES_NOT_CONTAIN_AS_MEMBER -"⊆" => Ts.SUBSET_OF_OR_EQUAL_TO -"⊈" => Ts.NEITHER_A_SUBSET_OF_NOR_EQUAL_TO -"⊂" => Ts.SUBSET_OF -"⊄" => Ts.NOT_A_SUBSET_OF -"⊊" => Ts.SUBSET_OF_WITH_NOT_EQUAL_TO -"∝" => Ts.PROPORTIONAL_TO -"∊" => Ts.SMALL_ELEMENT_OF -"∍" => Ts.SMALL_CONTAINS_AS_MEMBER -"∥" => Ts.PARALLEL_TO -"∦" => Ts.NOT_PARALLEL_TO -"∷" => Ts.PROPORTION -"∺" => Ts.GEOMETRIC_PROPORTION -"∻" => Ts.HOMOTHETIC -"∽" => Ts.REVERSED_TILDE -"∾" => Ts.INVERTED_LAZY_S -"≁" => Ts.NOT_TILDE -"≃" => Ts.ASYMPTOTICALLY_EQUAL_TO -"≄" => Ts.NOT_ASYMPTOTICALLY_EQUAL_TO -"≅" => Ts.APPROXIMATELY_EQUAL_TO -"≆" => Ts.APPROXIMATELY_BUT_NOT_ACTUALLY_EQUAL_TO -"≇" => Ts.NEITHER_APPROXIMATELY_NOR_ACTUALLY_EQUAL_TO -"≈" => Ts.ALMOST_EQUAL_TO -"≉" => Ts.NOT_ALMOST_EQUAL_TO -"≊" => Ts.ALMOST_EQUAL_OR_EQUAL_TO -"≋" => Ts.TRIPLE_TILDE -"≌" => Ts.ALL_EQUAL_TO -"≍" => Ts.EQUIVALENT_TO -"≎" => Ts.GEOMETRICALLY_EQUIVALENT_TO -"≐" => Ts.APPROACHES_THE_LIMIT -"≑" => Ts.GEOMETRICALLY_EQUAL_TO -"≒" => Ts.APPROXIMATELY_EQUAL_TO_OR_THE_IMAGE_OF -"≓" => Ts.IMAGE_OF_OR_APPROXIMATELY_EQUAL_TO -"≔" => Ts.COLON_EQUALS -"≕" => Ts.EQUALS_COLON -"≖" => Ts.RING_IN_EQUAL_TO -"≗" => Ts.RING_EQUAL_TO -"≘" => Ts.CORRESPONDS_TO -"≙" => Ts.ESTIMATES -"≚" => Ts.EQUIANGULAR_TO -"≛" => Ts.STAR_EQUALS -"≜" => Ts.DELTA_EQUAL_TO -"≝" => Ts.EQUAL_TO_BY_DEFINITION -"≞" => Ts.MEASURED_BY -"≟" => Ts.QUESTIONED_EQUAL_TO -"≣" => Ts.STRICTLY_EQUIVALENT_TO -"≦" => Ts.LESS_THAN_OVER_EQUAL_TO -"≧" => Ts.GREATER_THAN_OVER_EQUAL_TO -"≨" => Ts.LESS_THAN_BUT_NOT_EQUAL_TO -"≩" => Ts.GREATER_THAN_BUT_NOT_EQUAL_TO -"≪" => Ts.MUCH_LESS_THAN -"≫" => Ts.MUCH_GREATER_THAN -"≬" => Ts.BETWEEN -"≭" => Ts.NOT_EQUIVALENT_TO -"≮" => Ts.NOT_LESS_THAN -"≯" => Ts.NOT_GREATER_THAN -"≰" => Ts.NEITHER_LESS_THAN_NOR_EQUAL_TO -"≱" => Ts.NEITHER_GREATER_THAN_NOR_EQUAL_TO -"≲" => Ts.LESS_THAN_OR_EQUIVALENT_TO -"≳" => Ts.GREATER_THAN_OR_EQUIVALENT_TO -"≴" => Ts.NEITHER_LESS_THAN_NOR_EQUIVALENT_TO -"≵" => Ts.NEITHER_GREATER_THAN_NOR_EQUIVALENT_TO -"≶" => Ts.LESS_THAN_OR_GREATER_THAN -"≷" => Ts.GREATER_THAN_OR_LESS_THAN -"≸" => Ts.NEITHER_LESS_THAN_NOR_GREATER_THAN -"≹" => Ts.NEITHER_GREATER_THAN_NOR_LESS_THAN -"≺" => Ts.PRECEDES -"≻" => Ts.SUCCEEDS -"≼" => Ts.PRECEDES_OR_EQUAL_TO -"≽" => Ts.SUCCEEDS_OR_EQUAL_TO -"≾" => Ts.PRECEDES_OR_EQUIVALENT_TO -"≿" => Ts.SUCCEEDS_OR_EQUIVALENT_TO -"⊀" => Ts.DOES_NOT_PRECEDE -"⊁" => Ts.DOES_NOT_SUCCEED -"⊃" => Ts.SUPERSET_OF -"⊅" => Ts.NOT_A_SUPERSET_OF -"⊇" => Ts.SUPERSET_OF_OR_EQUAL_TO -"⊉" => Ts.NEITHER_A_SUPERSET_OF_NOR_EQUAL_TO -"⊋" => Ts.SUPERSET_OF_WITH_NOT_EQUAL_TO -"⊏" => Ts.SQUARE_IMAGE_OF -"⊐" => Ts.SQUARE_ORIGINAL_OF -"⊑" => Ts.SQUARE_IMAGE_OF_OR_EQUAL_TO -"⊒" => Ts.SQUARE_ORIGINAL_OF_OR_EQUAL_TO -"⊜" => Ts.CIRCLED_EQUALS -"⊩" => Ts.FORCES -"⊬" => Ts.DOES_NOT_PROVE -"⊮" => Ts.DOES_NOT_FORCE -"⊰" => Ts.PRECEDES_UNDER_RELATION -"⊱" => Ts.SUCCEEDS_UNDER_RELATION -"⊲" => Ts.NORMAL_SUBGROUP_OF -"⊳" => Ts.CONTAINS_AS_NORMAL_SUBGROUP -"⊴" => Ts.NORMAL_SUBGROUP_OF_OR_EQUAL_TO -"⊵" => Ts.CONTAINS_AS_NORMAL_SUBGROUP_OR_EQUAL_TO -"⊶" => Ts.ORIGINAL_OF -"⊷" => Ts.IMAGE_OF -"⋍" => Ts.REVERSED_TILDE_EQUALS -"⋐" => Ts.DOUBLE_SUBSET -"⋑" => Ts.DOUBLE_SUPERSET -"⋕" => Ts.EQUAL_AND_PARALLEL_TO -"⋖" => Ts.LESS_THAN_WITH_DOT -"⋗" => Ts.GREATER_THAN_WITH_DOT -"⋘" => Ts.VERY_MUCH_LESS_THAN -"⋙" => Ts.VERY_MUCH_GREATER_THAN -"⋚" => Ts.LESS_THAN_EQUAL_TO_OR_GREATER_THAN -"⋛" => Ts.GREATER_THAN_EQUAL_TO_OR_LESS_THAN -"⋜" => Ts.EQUAL_TO_OR_LESS_THAN -"⋝" => Ts.EQUAL_TO_OR_GREATER_THAN -"⋞" => Ts.EQUAL_TO_OR_PRECEDES -"⋟" => Ts.EQUAL_TO_OR_SUCCEEDS -"⋠" => Ts.DOES_NOT_PRECEDE_OR_EQUAL -"⋡" => Ts.DOES_NOT_SUCCEED_OR_EQUAL -"⋢" => Ts.NOT_SQUARE_IMAGE_OF_OR_EQUAL_TO -"⋣" => Ts.NOT_SQUARE_ORIGINAL_OF_OR_EQUAL_TO -"⋤" => Ts.SQUARE_IMAGE_OF_OR_NOT_EQUAL_TO -"⋥" => Ts.SQUARE_ORIGINAL_OF_OR_NOT_EQUAL_TO -"⋦" => Ts.LESS_THAN_BUT_NOT_EQUIVALENT_TO -"⋧" => Ts.GREATER_THAN_BUT_NOT_EQUIVALENT_TO -"⋨" => Ts.PRECEDES_BUT_NOT_EQUIVALENT_TO -"⋩" => Ts.SUCCEEDS_BUT_NOT_EQUIVALENT_TO -"⋪" => Ts.NOT_NORMAL_SUBGROUP_OF -"⋫" => Ts.DOES_NOT_CONTAIN_AS_NORMAL_SUBGROUP -"⋬" => Ts.NOT_NORMAL_SUBGROUP_OF_OR_EQUAL_TO -"⋭" => Ts.DOES_NOT_CONTAIN_AS_NORMAL_SUBGROUP_OR_EQUAL -"⋲" => Ts.ELEMENT_OF_WITH_LONG_HORIZONTAL_STROKE -"⋳" => Ts.ELEMENT_OF_WITH_VERTICAL_BAR_AT_END_OF_HORIZONTAL_STROKE -"⋴" => Ts.SMALL_ELEMENT_OF_WITH_VERTICAL_BAR_AT_END_OF_HORIZONTAL_STROKE -"⋵" => Ts.ELEMENT_OF_WITH_DOT_ABOVE -"⋶" => Ts.ELEMENT_OF_WITH_OVERBAR -"⋷" => Ts.SMALL_ELEMENT_OF_WITH_OVERBAR -"⋸" => Ts.ELEMENT_OF_WITH_UNDERBAR -"⋹" => Ts.ELEMENT_OF_WITH_TWO_HORIZONTAL_STROKES -"⋺" => Ts.CONTAINS_WITH_LONG_HORIZONTAL_STROKE -"⋻" => Ts.CONTAINS_WITH_VERTICAL_BAR_AT_END_OF_HORIZONTAL_STROKE -"⋼" => Ts.SMALL_CONTAINS_WITH_VERTICAL_BAR_AT_END_OF_HORIZONTAL_STROKE -"⋽" => Ts.CONTAINS_WITH_OVERBAR -"⋾" => Ts.SMALL_CONTAINS_WITH_OVERBAR -"⋿" => Ts.Z_NOTATION_BAG_MEMBERSHIP -"⟈" => Ts.REVERSE_SOLIDUS_PRECEDING_SUBSET -"⟉" => Ts.SUPERSET_PRECEDING_SOLIDUS -"⟒" => Ts.ELEMENT_OF_OPENING_UPWARDS -"⦷" => Ts.CIRCLED_PARALLEL -"⧀" => Ts.CIRCLED_LESS_THAN -"⧁" => Ts.CIRCLED_GREATER_THAN -"⧡" => Ts.INCREASES_AS -"⧣" => Ts.EQUALS_SIGN_AND_SLANTED_PARALLEL -"⧤" => Ts.EQUALS_SIGN_AND_SLANTED_PARALLEL_WITH_TILDE_ABOVE -"⧥" => Ts.IDENTICAL_TO_AND_SLANTED_PARALLEL -"⩦" => Ts.EQUALS_SIGN_WITH_DOT_BELOW -"⩧" => Ts.IDENTICAL_WITH_DOT_ABOVE -"⩪" => Ts.TILDE_OPERATOR_WITH_DOT_ABOVE -"⩫" => Ts.TILDE_OPERATOR_WITH_RISING_DOTS -"⩬" => Ts.SIMILAR_MINUS_SIMILAR -"⩭" => Ts.CONGRUENT_WITH_DOT_ABOVE -"⩮" => Ts.EQUALS_WITH_ASTERISK -"⩯" => Ts.ALMOST_EQUAL_TO_WITH_CIRCUMFLEX_ACCENT -"⩰" => Ts.APPROXIMATELY_EQUAL_OR_EQUAL_TO -"⩱" => Ts.EQUALS_SIGN_ABOVE_PLUS_SIGN -"⩲" => Ts.PLUS_SIGN_ABOVE_EQUALS_SIGN -"⩳" => Ts.EQUALS_SIGN_ABOVE_TILDE_OPERATOR -"⩴" => Ts.DOUBLE_COLON_EQUAL -"⩵" => Ts.TWO_CONSECUTIVE_EQUALS_SIGNS -"⩶" => Ts.THREE_CONSECUTIVE_EQUALS_SIGNS -"⩷" => Ts.EQUALS_SIGN_WITH_TWO_DOTS_ABOVE_AND_TWO_DOTS_BELOW -"⩸" => Ts.EQUIVALENT_WITH_FOUR_DOTS_ABOVE -"⩹" => Ts.LESS_THAN_WITH_CIRCLE_INSIDE -"⩺" => Ts.GREATER_THAN_WITH_CIRCLE_INSIDE -"⩻" => Ts.LESS_THAN_WITH_QUESTION_MARK_ABOVE -"⩼" => Ts.GREATER_THAN_WITH_QUESTION_MARK_ABOVE -"⩽" => Ts.LESS_THAN_OR_SLANTED_EQUAL_TO -"⩾" => Ts.GREATER_THAN_OR_SLANTED_EQUAL_TO -"⩿" => Ts.LESS_THAN_OR_SLANTED_EQUAL_TO_WITH_DOT_INSIDE -"⪀" => Ts.GREATER_THAN_OR_SLANTED_EQUAL_TO_WITH_DOT_INSIDE -"⪁" => Ts.LESS_THAN_OR_SLANTED_EQUAL_TO_WITH_DOT_ABOVE -"⪂" => Ts.GREATER_THAN_OR_SLANTED_EQUAL_TO_WITH_DOT_ABOVE -"⪃" => Ts.LESS_THAN_OR_SLANTED_EQUAL_TO_WITH_DOT_ABOVE_RIGHT -"⪄" => Ts.GREATER_THAN_OR_SLANTED_EQUAL_TO_WITH_DOT_ABOVE_LEFT -"⪅" => Ts.LESS_THAN_OR_APPROXIMATE -"⪆" => Ts.GREATER_THAN_OR_APPROXIMATE -"⪇" => Ts.LESS_THAN_AND_SINGLE_LINE_NOT_EQUAL_TO -"⪈" => Ts.GREATER_THAN_AND_SINGLE_LINE_NOT_EQUAL_TO -"⪉" => Ts.LESS_THAN_AND_NOT_APPROXIMATE -"⪊" => Ts.GREATER_THAN_AND_NOT_APPROXIMATE -"⪋" => Ts.LESS_THAN_ABOVE_DOUBLE_LINE_EQUAL_ABOVE_GREATER_THAN -"⪌" => Ts.GREATER_THAN_ABOVE_DOUBLE_LINE_EQUAL_ABOVE_LESS_THAN -"⪍" => Ts.LESS_THAN_ABOVE_SIMILAR_OR_EQUAL -"⪎" => Ts.GREATER_THAN_ABOVE_SIMILAR_OR_EQUAL -"⪏" => Ts.LESS_THAN_ABOVE_SIMILAR_ABOVE_GREATER_THAN -"⪐" => Ts.GREATER_THAN_ABOVE_SIMILAR_ABOVE_LESS_THAN -"⪑" => Ts.LESS_THAN_ABOVE_GREATER_THAN_ABOVE_DOUBLE_LINE_EQUAL -"⪒" => Ts.GREATER_THAN_ABOVE_LESS_THAN_ABOVE_DOUBLE_LINE_EQUAL -"⪓" => Ts.LESS_THAN_ABOVE_SLANTED_EQUAL_ABOVE_GREATER_THAN_ABOVE_SLANTED_EQUAL -"⪔" => Ts.GREATER_THAN_ABOVE_SLANTED_EQUAL_ABOVE_LESS_THAN_ABOVE_SLANTED_EQUAL -"⪕" => Ts.SLANTED_EQUAL_TO_OR_LESS_THAN -"⪖" => Ts.SLANTED_EQUAL_TO_OR_GREATER_THAN -"⪗" => Ts.SLANTED_EQUAL_TO_OR_LESS_THAN_WITH_DOT_INSIDE -"⪘" => Ts.SLANTED_EQUAL_TO_OR_GREATER_THAN_WITH_DOT_INSIDE -"⪙" => Ts.DOUBLE_LINE_EQUAL_TO_OR_LESS_THAN -"⪚" => Ts.DOUBLE_LINE_EQUAL_TO_OR_GREATER_THAN -"⪛" => Ts.DOUBLE_LINE_SLANTED_EQUAL_TO_OR_LESS_THAN -"⪜" => Ts.DOUBLE_LINE_SLANTED_EQUAL_TO_OR_GREATER_THAN -"⪝" => Ts.SIMILAR_OR_LESS_THAN -"⪞" => Ts.SIMILAR_OR_GREATER_THAN -"⪟" => Ts.SIMILAR_ABOVE_LESS_THAN_ABOVE_EQUALS_SIGN -"⪠" => Ts.SIMILAR_ABOVE_GREATER_THAN_ABOVE_EQUALS_SIGN -"⪡" => Ts.DOUBLE_NESTED_LESS_THAN -"⪢" => Ts.DOUBLE_NESTED_GREATER_THAN -"⪣" => Ts.DOUBLE_NESTED_LESS_THAN_WITH_UNDERBAR -"⪤" => Ts.GREATER_THAN_OVERLAPPING_LESS_THAN -"⪥" => Ts.GREATER_THAN_BESIDE_LESS_THAN -"⪦" => Ts.LESS_THAN_CLOSED_BY_CURVE -"⪧" => Ts.GREATER_THAN_CLOSED_BY_CURVE -"⪨" => Ts.LESS_THAN_CLOSED_BY_CURVE_ABOVE_SLANTED_EQUAL -"⪩" => Ts.GREATER_THAN_CLOSED_BY_CURVE_ABOVE_SLANTED_EQUAL -"⪪" => Ts.SMALLER_THAN -"⪫" => Ts.LARGER_THAN -"⪬" => Ts.SMALLER_THAN_OR_EQUAL_TO -"⪭" => Ts.LARGER_THAN_OR_EQUAL_TO -"⪮" => Ts.EQUALS_SIGN_WITH_BUMPY_ABOVE -"⪯" => Ts.PRECEDES_ABOVE_SINGLE_LINE_EQUALS_SIGN -"⪰" => Ts.SUCCEEDS_ABOVE_SINGLE_LINE_EQUALS_SIGN -"⪱" => Ts.PRECEDES_ABOVE_SINGLE_LINE_NOT_EQUAL_TO -"⪲" => Ts.SUCCEEDS_ABOVE_SINGLE_LINE_NOT_EQUAL_TO -"⪳" => Ts.PRECEDES_ABOVE_EQUALS_SIGN -"⪴" => Ts.SUCCEEDS_ABOVE_EQUALS_SIGN -"⪵" => Ts.PRECEDES_ABOVE_NOT_EQUAL_TO -"⪶" => Ts.SUCCEEDS_ABOVE_NOT_EQUAL_TO -"⪷" => Ts.PRECEDES_ABOVE_ALMOST_EQUAL_TO -"⪸" => Ts.SUCCEEDS_ABOVE_ALMOST_EQUAL_TO -"⪹" => Ts.PRECEDES_ABOVE_NOT_ALMOST_EQUAL_TO -"⪺" => Ts.SUCCEEDS_ABOVE_NOT_ALMOST_EQUAL_TO -"⪻" => Ts.DOUBLE_PRECEDES -"⪼" => Ts.DOUBLE_SUCCEEDS -"⪽" => Ts.SUBSET_WITH_DOT -"⪾" => Ts.SUPERSET_WITH_DOT -"⪿" => Ts.SUBSET_WITH_PLUS_SIGN_BELOW -"⫀" => Ts.SUPERSET_WITH_PLUS_SIGN_BELOW -"⫁" => Ts.SUBSET_WITH_MULTIPLICATION_SIGN_BELOW -"⫂" => Ts.SUPERSET_WITH_MULTIPLICATION_SIGN_BELOW -"⫃" => Ts.SUBSET_OF_OR_EQUAL_TO_WITH_DOT_ABOVE -"⫄" => Ts.SUPERSET_OF_OR_EQUAL_TO_WITH_DOT_ABOVE -"⫅" => Ts.SUBSET_OF_ABOVE_EQUALS_SIGN -"⫆" => Ts.SUPERSET_OF_ABOVE_EQUALS_SIGN -"⫇" => Ts.SUBSET_OF_ABOVE_TILDE_OPERATOR -"⫈" => Ts.SUPERSET_OF_ABOVE_TILDE_OPERATOR -"⫉" => Ts.SUBSET_OF_ABOVE_ALMOST_EQUAL_TO -"⫊" => Ts.SUPERSET_OF_ABOVE_ALMOST_EQUAL_TO -"⫋" => Ts.SUBSET_OF_ABOVE_NOT_EQUAL_TO -"⫌" => Ts.SUPERSET_OF_ABOVE_NOT_EQUAL_TO -"⫍" => Ts.SQUARE_LEFT_OPEN_BOX_OPERATOR -"⫎" => Ts.SQUARE_RIGHT_OPEN_BOX_OPERATOR -"⫏" => Ts.CLOSED_SUBSET -"⫐" => Ts.CLOSED_SUPERSET -"⫑" => Ts.CLOSED_SUBSET_OR_EQUAL_TO -"⫒" => Ts.CLOSED_SUPERSET_OR_EQUAL_TO -"⫓" => Ts.SUBSET_ABOVE_SUPERSET -"⫔" => Ts.SUPERSET_ABOVE_SUBSET -"⫕" => Ts.SUBSET_ABOVE_SUBSET -"⫖" => Ts.SUPERSET_ABOVE_SUPERSET -"⫗" => Ts.SUPERSET_BESIDE_SUBSET -"⫘" => Ts.SUPERSET_BESIDE_AND_JOINED_BY_DASH_WITH_SUBSET -"⫙" => Ts.ELEMENT_OF_OPENING_DOWNWARDS -"⫷" => Ts.TRIPLE_NESTED_LESS_THAN -"⫸" => Ts.TRIPLE_NESTED_GREATER_THAN -"⫹" => Ts.DOUBLE_LINE_SLANTED_LESS_THAN_OR_EQUAL_TO -"⫺" => Ts.DOUBLE_LINE_SLANTED_GREATER_THAN_OR_EQUAL_TO -"⊢" => Ts.RIGHT_TACK -"⊣" => Ts.LEFT_TACK -# ⫪,⫫ see https://github.com/JuliaLang/julia/issues/39350 -"⫪" => Ts.DOUBLE_DOWN_TACK -"⫫" => Ts.DOUBLE_UP_TACK -"⟂" => Ts.PERP -"END_COMPARISON" => Ts.end_comparison - -# Level 7 -"BEGIN_PIPE" => Ts.begin_pipe -"<|" => Ts.LPIPE -"|>" => Ts.RPIPE -"END_PIPE" => Ts.end_pipe - -# Level 8 -"BEGIN_COLON" => Ts.begin_colon -":" => Ts.COLON -".." => Ts.DDOT -"…" => Ts.LDOTS -"⁝" => Ts.TRICOLON -"⋮" => Ts.VDOTS -"⋱" => Ts.DDOTS -"⋰" => Ts.ADOTS -"⋯" => Ts.CDOTS -"END_COLON" => Ts.end_colon - -# Level 9 -"BEGIN_PLUS" => Ts.begin_plus -"\$" => Ts.EX_OR -"+" => Ts.PLUS -"-" => Ts.MINUS -"++" => Ts.PLUSPLUS -"⊕" => Ts.CIRCLED_PLUS -"⊖" => Ts.CIRCLED_MINUS -"⊞" => Ts.SQUARED_PLUS -"⊟" => Ts.SQUARED_MINUS -"|" => Ts.OR -"∪" => Ts.UNION -"∨" => Ts.LOGICAL_OR -"⊔" => Ts.SQUARE_CUP -"±" => Ts.PLUS_MINUS_SIGN -"∓" => Ts.MINUS_OR_PLUS_SIGN -"∔" => Ts.DOT_PLUS -"∸" => Ts.DOT_MINUS -"≂" => Ts.MINUS_TILDE -"≏" => Ts.DIFFERENCE_BETWEEN -"⊎" => Ts.MULTISET_UNION -"⊻" => Ts.XOR -"⊽" => Ts.NOR -"⋎" => Ts.CURLY_LOGICAL_OR -"⋓" => Ts.DOUBLE_UNION -"⧺" => Ts.DOUBLE_PLUS -"⧻" => Ts.TRIPLE_PLUS -"⨈" => Ts.TWO_LOGICAL_OR_OPERATOR -"⨢" => Ts.PLUS_SIGN_WITH_SMALL_CIRCLE_ABOVE -"⨣" => Ts.PLUS_SIGN_WITH_CIRCUMFLEX_ACCENT_ABOVE -"⨤" => Ts.PLUS_SIGN_WITH_TILDE_ABOVE -"⨥" => Ts.PLUS_SIGN_WITH_DOT_BELOW -"⨦" => Ts.PLUS_SIGN_WITH_TILDE_BELOW -"⨧" => Ts.PLUS_SIGN_WITH_SUBSCRIPT_TWO -"⨨" => Ts.PLUS_SIGN_WITH_BLACK_TRIANGLE -"⨩" => Ts.MINUS_SIGN_WITH_COMMA_ABOVE -"⨪" => Ts.MINUS_SIGN_WITH_DOT_BELOW -"⨫" => Ts.MINUS_SIGN_WITH_FALLING_DOTS -"⨬" => Ts.MINUS_SIGN_WITH_RISING_DOTS -"⨭" => Ts.PLUS_SIGN_IN_LEFT_HALF_CIRCLE -"⨮" => Ts.PLUS_SIGN_IN_RIGHT_HALF_CIRCLE -"⨹" => Ts.PLUS_SIGN_IN_TRIANGLE -"⨺" => Ts.MINUS_SIGN_IN_TRIANGLE -"⩁" => Ts.UNION_WITH_MINUS_SIGN -"⩂" => Ts.UNION_WITH_OVERBAR -"⩅" => Ts.UNION_WITH_LOGICAL_OR -"⩊" => Ts.UNION_BESIDE_AND_JOINED_WITH_UNION -"⩌" => Ts.CLOSED_UNION_WITH_SERIFS -"⩏" => Ts.DOUBLE_SQUARE_UNION -"⩐" => Ts.CLOSED_UNION_WITH_SERIFS_AND_SMASH_PRODUCT -"⩒" => Ts.LOGICAL_OR_WITH_DOT_ABOVE -"⩔" => Ts.DOUBLE_LOGICAL_OR -"⩖" => Ts.TWO_INTERSECTING_LOGICAL_OR -"⩗" => Ts.SLOPING_LARGE_OR -"⩛" => Ts.LOGICAL_OR_WITH_MIDDLE_STEM -"⩝" => Ts.LOGICAL_OR_WITH_HORIZONTAL_DASH -"⩡" => Ts.SMALL_VEE_WITH_UNDERBAR -"⩢" => Ts.LOGICAL_OR_WITH_DOUBLE_OVERBAR -"⩣" => Ts.LOGICAL_OR_WITH_DOUBLE_UNDERBAR -"¦" => Ts.BROKEN_BAR -"END_PLUS" => Ts.end_plus - -# Level 10 -"BEGIN_BITSHIFTS" => Ts.begin_bitshifts -"<<" => Ts.LBITSHIFT -">>" => Ts.RBITSHIFT -">>>" => Ts.UNSIGNED_BITSHIFT -"END_BITSHIFTS" => Ts.end_bitshifts - -# Level 11 -"BEGIN_TIMES" => Ts.begin_times -"*" => Ts.STAR -"/" => Ts.FWD_SLASH -"÷" => Ts.DIVISION_SIGN -"%" => Ts.REM -"⋅" => Ts.UNICODE_DOT -"∘" => Ts.RING_OPERATOR -"×" => Ts.MULTIPLICATION_SIGN -"\\" => Ts.BACKSLASH -"&" => Ts.AND -"∩" => Ts.INTERSECTION -"∧" => Ts.LOGICAL_AND -"⊗" => Ts.CIRCLED_TIMES -"⊘" => Ts.CIRCLED_DIVISION_SLASH -"⊙" => Ts.CIRCLED_DOT_OPERATOR -"⊚" => Ts.CIRCLED_RING_OPERATOR -"⊛" => Ts.CIRCLED_ASTERISK_OPERATOR -"⊠" => Ts.SQUARED_TIMES -"⊡" => Ts.SQUARED_DOT_OPERATOR -"⊓" => Ts.SQUARE_CAP -"∗" => Ts.ASTERISK_OPERATOR -"∙" => Ts.BULLET_OPERATOR -"∤" => Ts.DOES_NOT_DIVIDE -"⅋" => Ts.TURNED_AMPERSAND -"≀" => Ts.WREATH_PRODUCT -"⊼" => Ts.NAND -"⋄" => Ts.DIAMOND_OPERATOR -"⋆" => Ts.STAR_OPERATOR -"⋇" => Ts.DIVISION_TIMES -"⋉" => Ts.LEFT_NORMAL_FACTOR_SEMIDIRECT_PRODUCT -"⋊" => Ts.RIGHT_NORMAL_FACTOR_SEMIDIRECT_PRODUCT -"⋋" => Ts.LEFT_SEMIDIRECT_PRODUCT -"⋌" => Ts.RIGHT_SEMIDIRECT_PRODUCT -"⋏" => Ts.CURLY_LOGICAL_AND -"⋒" => Ts.DOUBLE_INTERSECTION -"⟑" => Ts.AND_WITH_DOT -"⦸" => Ts.CIRCLED_REVERSE_SOLIDUS -"⦼" => Ts.CIRCLED_ANTICLOCKWISE_ROTATED_DIVISION_SIGN -"⦾" => Ts.CIRCLED_WHITE_BULLET -"⦿" => Ts.CIRCLED_BULLET -"⧶" => Ts.SOLIDUS_WITH_OVERBAR -"⧷" => Ts.REVERSE_SOLIDUS_WITH_HORIZONTAL_STROKE -"⨇" => Ts.TWO_LOGICAL_AND_OPERATOR -"⨰" => Ts.MULTIPLICATION_SIGN_WITH_DOT_ABOVE -"⨱" => Ts.MULTIPLICATION_SIGN_WITH_UNDERBAR -"⨲" => Ts.SEMIDIRECT_PRODUCT_WITH_BOTTOM_CLOSED -"⨳" => Ts.SMASH_PRODUCT -"⨴" => Ts.MULTIPLICATION_SIGN_IN_LEFT_HALF_CIRCLE -"⨵" => Ts.MULTIPLICATION_SIGN_IN_RIGHT_HALF_CIRCLE -"⨶" => Ts.CIRCLED_MULTIPLICATION_SIGN_WITH_CIRCUMFLEX_ACCENT -"⨷" => Ts.MULTIPLICATION_SIGN_IN_DOUBLE_CIRCLE -"⨸" => Ts.CIRCLED_DIVISION_SIGN -"⨻" => Ts.MULTIPLICATION_SIGN_IN_TRIANGLE -"⨼" => Ts.INTERIOR_PRODUCT -"⨽" => Ts.RIGHTHAND_INTERIOR_PRODUCT -"⩀" => Ts.INTERSECTION_WITH_DOT -"⩃" => Ts.INTERSECTION_WITH_OVERBAR -"⩄" => Ts.INTERSECTION_WITH_LOGICAL_AND -"⩋" => Ts.INTERSECTION_BESIDE_AND_JOINED_WITH_INTERSECTION -"⩍" => Ts.CLOSED_INTERSECTION_WITH_SERIFS -"⩎" => Ts.DOUBLE_SQUARE_INTERSECTION -"⩑" => Ts.LOGICAL_AND_WITH_DOT_ABOVE -"⩓" => Ts.DOUBLE_LOGICAL_AND -"⩕" => Ts.TWO_INTERSECTING_LOGICAL_AND -"⩘" => Ts.SLOPING_LARGE_AND -"⩚" => Ts.LOGICAL_AND_WITH_MIDDLE_STEM -"⩜" => Ts.LOGICAL_AND_WITH_HORIZONTAL_DASH -"⩞" => Ts.LOGICAL_AND_WITH_DOUBLE_OVERBAR -"⩟" => Ts.LOGICAL_AND_WITH_UNDERBAR -"⩠" => Ts.LOGICAL_AND_WITH_DOUBLE_UNDERBAR -"⫛" => Ts.TRANSVERSAL_INTERSECTION -"⊍" => Ts.MULTISET_MULTIPLICATION -"▷" => Ts.WHITE_RIGHT_POINTING_TRIANGLE -"⨝" => Ts.JOIN -"⟕" => Ts.LEFT_OUTER_JOIN -"⟖" => Ts.RIGHT_OUTER_JOIN -"⟗" => Ts.FULL_OUTER_JOIN -"⌿" => Ts.NOT_SLASH -"⨟" => Ts.BB_SEMI -"END_TIMES" => Ts.end_times - -# Level 12 -"BEGIN_RATIONAL" => Ts.begin_rational -"//" => Ts.FWDFWD_SLASH -"END_RATIONAL" => Ts.end_rational - -# Level 13 -"BEGIN_POWER" => Ts.begin_power -"^" => Ts.CIRCUMFLEX_ACCENT -"↑" => Ts.UPWARDS_ARROW -"↓" => Ts.DOWNWARDS_ARROW -"⇵" => Ts.DOWNWARDS_ARROW_LEFTWARDS_OF_UPWARDS_ARROW -"⟰" => Ts.UPWARDS_QUADRUPLE_ARROW -"⟱" => Ts.DOWNWARDS_QUADRUPLE_ARROW -"⤈" => Ts.DOWNWARDS_ARROW_WITH_HORIZONTAL_STROKE -"⤉" => Ts.UPWARDS_ARROW_WITH_HORIZONTAL_STROKE -"⤊" => Ts.UPWARDS_TRIPLE_ARROW -"⤋" => Ts.DOWNWARDS_TRIPLE_ARROW -"⤒" => Ts.UPWARDS_ARROW_TO_BAR -"⤓" => Ts.DOWNWARDS_ARROW_TO_BAR -"⥉" => Ts.UPWARDS_TWO_HEADED_ARROW_FROM_SMALL_CIRCLE -"⥌" => Ts.UP_BARB_RIGHT_DOWN_BARB_LEFT_HARPOON -"⥍" => Ts.UP_BARB_LEFT_DOWN_BARB_RIGHT_HARPOON -"⥏" => Ts.UP_BARB_RIGHT_DOWN_BARB_RIGHT_HARPOON -"⥑" => Ts.UP_BARB_LEFT_DOWN_BARB_LEFT_HARPOON -"⥔" => Ts.UPWARDS_HARPOON_WITH_BARB_RIGHT_TO_BAR -"⥕" => Ts.DOWNWARDS_HARPOON_WITH_BARB_RIGHT_TO_BAR -"⥘" => Ts.UPWARDS_HARPOON_WITH_BARB_LEFT_TO_BAR -"⥙" => Ts.DOWNWARDS_HARPOON_WITH_BARB_LEFT_TO_BAR -"⥜" => Ts.UPWARDS_HARPOON_WITH_BARB_RIGHT_FROM_BAR -"⥝" => Ts.DOWNWARDS_HARPOON_WITH_BARB_RIGHT_FROM_BAR -"⥠" => Ts.UPWARDS_HARPOON_WITH_BARB_LEFT_FROM_BAR -"⥡" => Ts.DOWNWARDS_HARPOON_WITH_BARB_LEFT_FROM_BAR -"⥣" => Ts.UPWARDS_HARPOON_WITH_BARB_LEFT_BESIDE_UPWARDS_HARPOON_WITH_BARB_RIGHT -"⥥" => Ts.DOWNWARDS_HARPOON_WITH_BARB_LEFT_BESIDE_DOWNWARDS_HARPOON_WITH_BARB_RIGHT -"⥮" => Ts.UPWARDS_HARPOON_WITH_BARB_LEFT_BESIDE_DOWNWARDS_HARPOON_WITH_BARB_RIGHT -"⥯" => Ts.DOWNWARDS_HARPOON_WITH_BARB_LEFT_BESIDE_UPWARDS_HARPOON_WITH_BARB_RIGHT -"↑" => Ts.HALFWIDTH_UPWARDS_ARROW -"↓" => Ts.HALFWIDTH_DOWNWARDS_ARROW -"END_POWER" => Ts.end_power - -# Level 14 -"BEGIN_DECL" => Ts.begin_decl -"::" => Ts.DECLARATION -"END_DECL" => Ts.end_decl - -# Level 15 -"BEGIN_WHERE" => Ts.begin_where -"where" => Ts.WHERE -"END_WHERE" => Ts.end_where - -# Level 16 -"BEGIN_DOT" => Ts.begin_dot -"." => Ts.DOT -"END_DOT" => Ts.end_dot - -"!" => Ts.NOT -"'" => Ts.PRIME -".'" => Ts.TRANSPOSE -"->" => Ts.ANON_FUNC - -"BEGIN_UNICODE_OPS" => Ts.begin_unicode_ops -"¬" => Ts.NOT_SIGN -"√" => Ts.SQUARE_ROOT -"∛" => Ts.CUBE_ROOT -"∜" => Ts.QUAD_ROOT -"END_UNICODE_OPS" => Ts.end_unicode_ops - -"END_OPS" => Ts.end_ops - -"BEGIN_PARSER_TOKENS" => Ts.begin_parser_tokens - -"TOMBSTONE" => Ts.TOMBSTONE - -# Macro names are modelled as a special kind of identifier because the -# @ may not be attached to the macro name in the source (or may not be -# associated with a token at all in the case of implied macro calls -# like CORE_DOC_MACRO_NAME) -"BEGIN_MACRO_NAMES" => Ts.begin_macro_names -"MacroName" => Ts.MACRO_NAME # A macro name identifier -"@." => Ts.DOT_MACRO_NAME # The macro name of @. -"StringMacroName" => Ts.STRING_MACRO_NAME # macname"some_str" -"CmdMacroName" => Ts.CMD_MACRO_NAME # macname`some_str` -"core_@doc" => Ts.CORE_DOC_MACRO_NAME # Core.@doc -"core_@cmd" => Ts.CORE_CMD_MACRO_NAME # Core.@cmd -"core_@int128_str" => Ts.CORE_INT128_STR_MACRO_NAME # Core.@int128_str -"core_@uint128_str" => Ts.CORE_UINT128_STR_MACRO_NAME # Core.@uint128_str -"core_@big_str" => Ts.CORE_BIG_STR_MACRO_NAME # Core.@big_str -"END_MACRO_NAMES" => Ts.end_macro_names -"END_PARSER_TOKENS" => Ts.end_parser_tokens - -# Our custom syntax tokens -"BEGIN_SYNTAX_KINDS" => Ts.begin_syntax_kinds -"block" => Ts.BLOCK -"call" => Ts.CALL -"comparison" => Ts.COMPARISON -"curly" => Ts.CURLY -"inert" => Ts.INERT -"string" => Ts.STRING_INTERP -"macrocall" => Ts.MACROCALL -"kw" => Ts.KW # the = in f(a=1) -"parameters" => Ts.PARAMETERS # the list after ; in f(; a=1) -"toplevel" => Ts.TOPLEVEL -"tuple" => Ts.TUPLE -"ref" => Ts.REF -"vect" => Ts.VECT -"braces" => Ts.BRACES -"bracescat" => Ts.BRACESCAT -"hcat" => Ts.HCAT -"vcat" => Ts.VCAT -"ncat" => Ts.NCAT -"typed_hcat" => Ts.TYPED_HCAT -"typed_vcat" => Ts.TYPED_VCAT -"typed_ncat" => Ts.TYPED_NCAT -"row" => Ts.ROW -"nrow" => Ts.NROW -"generator" => Ts.GENERATOR -"filter" => Ts.FILTER -"flatten" => Ts.FLATTEN -"comprehension" => Ts.COMPREHENSION -"typed_comprehension" => Ts.TYPED_COMPREHENSION -"END_SYNTAX_KINDS" => Ts.end_syntax_kinds - -]) -end - -# Mapping from kinds to their unique string representation, if it exists -const _kind_to_str_unique = - Dict{Kind,String}(k=>string(s) for (k,s) in TzTokens.UNICODE_OPS_REVERSE) -for kw in split(""" - ( [ { } ] ) @ , ; " \"\"\" ` ``` - - baremodule begin break catch const - continue do else elseif end export finally for - function global if import let local - macro module quote return struct try type using while - - as abstract doc mutable outer primitive type var - - block call comparison curly string inert macrocall kw parameters - toplevel tuple ref vect braces bracescat hcat - vcat ncat typed_hcat typed_vcat typed_ncat row nrow generator - filter flatten comprehension typed_comprehension - - error nothing true false None - """) - _kind_to_str_unique[_str_to_kind[kw]] = kw -end - -const _kind_to_str = Dict(s=>k for (k,s) in _str_to_kind) diff --git a/JuliaSyntax/src/tokens.jl b/JuliaSyntax/src/tokens.jl index 584cdb9aa55e9..df5ca9392e725 100644 --- a/JuliaSyntax/src/tokens.jl +++ b/JuliaSyntax/src/tokens.jl @@ -1,43 +1,4 @@ -using .Tokenize.Tokens: Kind -include("token_kinds.jl") - -""" - K"s" - -The full kind of a string "s". For example, K")" is the kind of the -right parenthesis token. - -Naming rules: -* Kinds which correspond to exactly one textural form are represented with that - text. This includes keywords like K"for" and operators like K"*". -* Kinds which represent many textural forms have UpperCamelCase names. This - includes kinds like K"Identifier" and K"Comment". -* Kinds which exist merely as delimiters are all uppercase -""" -macro K_str(str) - get(_str_to_kind, str) do - error("unknown token kind K$(repr(str))") - end -end - -""" -A set of kinds which can be used with the `in` operator. For example - - k in KSet"+ - *" -""" -macro KSet_str(str) - kinds = [get(_str_to_kind, s) do - error("unknown token kind KSet\"$(repr(str)[2:end-1])\"") - end - for s in split(str)] - - quote - ($(kinds...),) - end -end - -kind(k::Kind) = k kind(raw::TzTokens.Token) = TzTokens.exactkind(raw) # Some renaming for naming consistency @@ -114,6 +75,32 @@ function is_whitespace(t) kind(t) in (K"Whitespace", K"NewlineWs") end + +#------------------------------------------------------------------------------- +# Mapping from kinds to their unique string representation, if it exists +# FIXME: put this in token_kinds ? +const _kind_to_str_unique = + Dict{Kind,String}(k=>string(s) for (k,s) in TzTokens.UNICODE_OPS_REVERSE) +for kw in split(""" + ( [ { } ] ) @ , ; " \"\"\" ` ``` + + baremodule begin break catch const + continue do else elseif end export finally for + function global if import let local + macro module quote return struct try type using while + + as abstract doc mutable outer primitive type var + + block call comparison curly string inert macrocall kw parameters + toplevel tuple ref vect braces bracescat hcat + vcat ncat typed_hcat typed_vcat typed_ncat row nrow generator + filter flatten comprehension typed_comprehension + + error nothing true false None + """) + _kind_to_str_unique[convert(Kind, kw)] = kw +end + """ Return the string representation of a token kind, or `nothing` if the kind represents a class of tokens like K"Identifier". @@ -125,5 +112,9 @@ return the name of the kind. TODO: Replace `untokenize()` with `Base.string()`? """ function untokenize(k::Kind; unique=true) - get(unique ? _kind_to_str_unique : _kind_to_str, k, nothing) + if unique + get(_kind_to_str_unique, k, nothing) + else + convert(String, k) + end end diff --git a/JuliaSyntax/test/lexer.jl b/JuliaSyntax/test/lexer.jl index b0def96833d76..d015576e1f50e 100644 --- a/JuliaSyntax/test/lexer.jl +++ b/JuliaSyntax/test/lexer.jl @@ -1,4 +1,23 @@ -const T = Tokenize.Tokens +# Hack: Introduce a module here to isolate some Tokenize internals from JuliaSyntax +module TokenizeTests + +using Test + +using JuliaSyntax: + # Parsing + @K_str, + kind, + Kind + +using JuliaSyntax.Tokenize: + Tokens, + Lexers, + Tokenize, + tokenize, + untokenize + +using JuliaSyntax.Tokenize.Tokens: + Token tok(str, i = 1) = collect(tokenize(str))[i] @@ -23,11 +42,11 @@ end # testset str = "𝘋 =2β" for s in [str, IOBuffer(str)] l = tokenize(s) - kinds = [T.IDENTIFIER, T.WHITESPACE, T.OP, - T.INTEGER, T.IDENTIFIER, T.ENDMARKER] + kinds = [K"Identifier", K"Whitespace", K"=", + K"Integer", K"Identifier", K"EndMarker"] token_strs = ["𝘋", " ", "=", "2", "β", ""] for (i, n) in enumerate(l) - @test T.kind(n) == kinds[i] + @test kind(n) == kinds[i] @test untokenize(n, str) == token_strs[i] end end @@ -64,61 +83,61 @@ end # testset # Generate the following with # ``` # for t in Tokens.kind.(collect(tokenize(str))) - # print("T.", t, ",") + # print(kind(t), ",") # end # ``` # and *check* it afterwards. - kinds = [T.KEYWORD,T.WHITESPACE,T.IDENTIFIER,T.LBRACE,T.IDENTIFIER, - T.OP,T.IDENTIFIER,T.RBRACE,T.LPAREN,T.IDENTIFIER,T.OP, - T.LBRACE,T.IDENTIFIER,T.RBRACE,T.OP,T.INTEGER,T.RPAREN, + kinds = [K"function",K"Whitespace",K"Identifier",K"{",K"Identifier", + K"<:",K"Identifier",K"}",K"(",K"Identifier",K"::", + K"{",K"Identifier",K"}",K"=",K"Integer",K")", - T.NEWLINE_WS,T.AT_SIGN,T.IDENTIFIER,T.WHITESPACE,T.LPAREN, - T.IDENTIFIER,T.OP,T.IDENTIFIER,T.COMMA,T.WHITESPACE, - T.IDENTIFIER,T.OP,T.IDENTIFIER,T.RPAREN,T.SEMICOLON, + K"NewlineWs",K"@",K"Identifier",K"Whitespace",K"(", + K"Identifier",K"+",K"Identifier",K",",K"Whitespace", + K"Identifier",K"+",K"Identifier",K")",K";", - T.NEWLINE_WS,T.KEYWORD, + K"NewlineWs",K"end", - T.NEWLINE_WS,T.KEYWORD, - T.NEWLINE_WS,T.IDENTIFIER, - T.NEWLINE_WS,T.KEYWORD, - T.NEWLINE_WS,T.IDENTIFIER, - T.NEWLINE_WS,T.KEYWORD, + K"NewlineWs",K"try", + K"NewlineWs",K"Identifier", + K"NewlineWs",K"catch", + K"NewlineWs",K"Identifier", + K"NewlineWs",K"end", - T.NEWLINE_WS,T.AT_SIGN,T.IDENTIFIER,T.WHITESPACE,T.IDENTIFIER, - T.OP,T.IDENTIFIER, + K"NewlineWs",K"@",K"Identifier",K"Whitespace",K"Identifier", + K"+",K"Identifier", - T.NEWLINE_WS,T.IDENTIFIER,T.LSQUARE,T.LSQUARE,T.INTEGER,T.WHITESPACE, - T.INTEGER,T.WHITESPACE,T.INTEGER,T.RSQUARE,T.RSQUARE, + K"NewlineWs",K"Identifier",K"[",K"[",K"Integer",K"Whitespace", + K"Integer",K"Whitespace",K"Integer",K"]",K"]", - T.NEWLINE_WS,T.LSQUARE,T.INTEGER,T.OP,T.INTEGER,T.COMMA,T.INTEGER, - T.SEMICOLON,T.INTEGER,T.COMMA,T.INTEGER,T.RSQUARE, + K"NewlineWs",K"[",K"Integer",K"*",K"Integer",K",",K"Integer", + K";",K"Integer",K",",K"Integer",K"]", - T.NEWLINE_WS,T.DQUOTE,T.STRING,T.DQUOTE,T.SEMICOLON,T.WHITESPACE,T.CHAR, + K"NewlineWs",K"\"",K"String",K"\"",K";",K"Whitespace",K"Char", - T.NEWLINE_WS,T.LPAREN,T.IDENTIFIER,T.OP,T.IDENTIFIER,T.RPAREN,T.OP, - T.LPAREN,T.IDENTIFIER,T.OP,T.IDENTIFIER,T.RPAREN, + K"NewlineWs",K"(",K"Identifier",K"&&",K"Identifier",K")",K"||", + K"(",K"Identifier",K"||",K"Identifier",K")", - T.NEWLINE_WS,T.COMMENT, + K"NewlineWs",K"Comment", - T.NEWLINE_WS,T.COMMENT, + K"NewlineWs",K"Comment", - T.NEWLINE_WS,T.INTEGER,T.OP,T.INTEGER, + K"NewlineWs",K"Integer",K"%",K"Integer", - T.NEWLINE_WS,T.IDENTIFIER,T.OP,T.OP,T.IDENTIFIER,T.OP, + K"NewlineWs",K"Identifier",K"'",K"/",K"Identifier",K"'", - T.NEWLINE_WS,T.IDENTIFIER,T.OP,T.OP,T.OP,T.IDENTIFIER,T.OP,T.OP, + K"NewlineWs",K"Identifier",K".",K"'",K"\\",K"Identifier",K".",K"'", - T.NEWLINE_WS,T.BACKTICK,T.CMD,T.BACKTICK, + K"NewlineWs",K"`",K"CmdString",K"`", - T.NEWLINE_WS,T.INTEGER,T.IDENTIFIER,T.LPAREN,T.INTEGER,T.RPAREN, + K"NewlineWs",K"Integer",K"Identifier",K"(",K"Integer",K")", - T.NEWLINE_WS,T.LBRACE,T.RBRACE, + K"NewlineWs",K"{",K"}", - T.NEWLINE_WS,T.ERROR,T.ENDMARKER] + K"NewlineWs",K"ErrorEofChar",K"EndMarker"] for (i, n) in enumerate(tokenize(str)) - @test Tokens.kind(n) == kinds[i] + @test kind(n) == kinds[i] end @testset "roundtrippability" begin @@ -129,7 +148,7 @@ end # testset end # testset @testset "issue 5, '..'" begin - @test Tokens.kind.(collect(tokenize("1.23..3.21"))) == [T.FLOAT,T.OP,T.FLOAT,T.ENDMARKER] + @test kind.(collect(tokenize("1.23..3.21"))) == [K"Float",K"..",K"Float",K"EndMarker"] end @testset "issue 17, >>" begin @@ -139,55 +158,55 @@ end @testset "test added operators" begin - @test tok("1+=2", 2).kind == T.PLUS_EQ - @test tok("1-=2", 2).kind == T.MINUS_EQ - @test tok("1:=2", 2).kind == T.COLON_EQ - @test tok("1*=2", 2).kind == T.STAR_EQ - @test tok("1^=2", 2).kind == T.CIRCUMFLEX_EQ - @test tok("1÷=2", 2).kind == T.DIVISION_EQ - @test tok("1\\=2", 2).kind == T.BACKSLASH_EQ - @test tok("1\$=2", 2).kind == T.EX_OR_EQ - @test tok("1-->2", 2).kind == T.RIGHT_ARROW - @test tok("1<--2", 2).kind == T.LEFT_ARROW - @test tok("1<-->2", 2).kind == T.DOUBLE_ARROW - @test tok("1>:2", 2).kind == T.ISSUPERTYPE + @test tok("1+=2", 2).kind == K"+=" + @test tok("1-=2", 2).kind == K"-=" + @test tok("1:=2", 2).kind == K":=" + @test tok("1*=2", 2).kind == K"*=" + @test tok("1^=2", 2).kind == K"^=" + @test tok("1÷=2", 2).kind == K"÷=" + @test tok("1\\=2", 2).kind == K"\=" + @test tok("1\$=2", 2).kind == K"$=" + @test tok("1-->2", 2).kind == K"-->" + @test tok("1<--2", 2).kind == K"<--" + @test tok("1<-->2", 2).kind == K"<-->" + @test tok("1>:2", 2).kind == K">:" end @testset "infix" begin - @test tok("1 in 2", 3).kind == T.IN - @test tok("1 in[1]", 3).kind == T.IN + @test tok("1 in 2", 3).kind == K"in" + @test tok("1 in[1]", 3).kind == K"in" - @test tok("1 isa 2", 3).kind == T.ISA - @test tok("1 isa[2]", 3).kind == T.ISA + @test tok("1 isa 2", 3).kind == K"isa" + @test tok("1 isa[2]", 3).kind == K"isa" end @testset "tokenizing true/false literals" begin - @test tok("somtext true", 3).kind == T.TRUE - @test tok("somtext false", 3).kind == T.FALSE - @test tok("somtext tr", 3).kind == T.IDENTIFIER - @test tok("somtext falsething", 3).kind == T.IDENTIFIER + @test tok("somtext true", 3).kind == K"true" + @test tok("somtext false", 3).kind == K"false" + @test tok("somtext tr", 3).kind == K"Identifier" + @test tok("somtext falsething", 3).kind == K"Identifier" end function test_roundtrip(str, kind, val) - T = tok(str) - @test T.kind == kind - @test untokenize(T, str) == val + t = tok(str) + @test t.kind == kind + @test untokenize(t, str) == val end @testset "tokenizing juxtaposed numbers and dotted operators/identifiers" begin - test_roundtrip("1234 .+1", Tokens.INTEGER, "1234") - test_roundtrip("1234.0+1", Tokens.FLOAT, "1234.0") - test_roundtrip("1234.0 .+1", Tokens.FLOAT, "1234.0") - test_roundtrip("1234.f(a)", Tokens.FLOAT, "1234.") - test_roundtrip("1234 .f(a)", Tokens.INTEGER, "1234") - test_roundtrip("1234.0.f(a)", Tokens.INVALID_NUMERIC_CONSTANT, "1234.0.") - test_roundtrip("1234.0 .f(a)", Tokens.FLOAT, "1234.0") + test_roundtrip("1234 .+1", K"Integer", "1234") + test_roundtrip("1234.0+1", K"Float", "1234.0") + test_roundtrip("1234.0 .+1", K"Float", "1234.0") + test_roundtrip("1234.f(a)", K"Float", "1234.") + test_roundtrip("1234 .f(a)", K"Integer", "1234") + test_roundtrip("1234.0.f(a)", K"ErrorInvalidNumericConstant", "1234.0.") + test_roundtrip("1234.0 .f(a)", K"Float", "1234.0") end @testset "lexing anon functions '->' " begin - @test tok("a->b", 2).kind==Tokens.ANON_FUNC + @test tok("a->b", 2).kind==K"->" end @testset "comments" begin @@ -199,11 +218,11 @@ end 1 """)) - kinds = [T.COMMENT, T.NEWLINE_WS, - T.TRIPLE_DQUOTE, T.STRING, T.STRING, T.TRIPLE_DQUOTE, T.NEWLINE_WS, - T.INTEGER, T.NEWLINE_WS, - T.ENDMARKER] - @test T.kind.(toks) == kinds + kinds = [K"Comment", K"NewlineWs", + K"\"\"\"", K"String", K"String", K"\"\"\"", K"NewlineWs", + K"Integer", K"NewlineWs", + K"EndMarker"] + @test kind.(toks) == kinds end @@ -215,20 +234,20 @@ end tokens = collect(tokenize(str)) @test string(untokenize(tokens[16], str))==string(untokenize(tokens[17], str))=="'" - test_roundtrip("'a'", Tokens.CHAR, "'a'") - test_roundtrip("''", Tokens.CHAR, "''") - test_roundtrip("'''", Tokens.CHAR, "'''") - test_roundtrip("''''", Tokens.CHAR, "'''") + test_roundtrip("'a'", K"Char", "'a'") + test_roundtrip("''", K"Char", "''") + test_roundtrip("'''", K"Char", "'''") + test_roundtrip("''''", K"Char", "'''") - @test tok("''''", 1).kind == Tokens.CHAR - @test tok("''''", 2).kind == Tokens.PRIME - @test tok("()'", 3).kind == Tokens.PRIME - @test tok("{}'", 3).kind == Tokens.PRIME - @test tok("[]'", 3).kind == Tokens.PRIME - @test tok("outer'", 2).kind == Tokens.PRIME - @test tok("mutable'", 2).kind == Tokens.PRIME - @test tok("as'", 2).kind == Tokens.PRIME - @test tok("isa'", 2).kind == Tokens.PRIME + @test tok("''''", 1).kind == K"Char" + @test tok("''''", 2).kind == K"'" + @test tok("()'", 3).kind == K"'" + @test tok("{}'", 3).kind == K"'" + @test tok("[]'", 3).kind == K"'" + @test tok("outer'", 2).kind == K"'" + @test tok("mutable'", 2).kind == K"'" + @test tok("as'", 2).kind == K"'" + @test tok("isa'", 2).kind == K"'" end @testset "keywords" begin @@ -269,7 +288,7 @@ end "type", "var"] - @test T.kind(tok(kw)) == T.KEYWORD + @test kind(tok(kw)) == convert(Kind, kw) end end @@ -278,231 +297,231 @@ end end @testset "errors" begin - @test tok("#= #= =#", 1).kind == T.EOF_MULTICOMMENT - @test tok("'dsadsa", 1).kind == T.EOF_CHAR - @test tok("aa **", 3).kind == T.INVALID_OPERATOR + @test tok("#= #= =#", 1).kind == K"ErrorEofMultiComment" + @test tok("'dsadsa", 1).kind == K"ErrorEofChar" + @test tok("aa **", 3).kind == K"ErrorInvalidOperator" end @testset "xor_eq" begin - @test tok("1 ⊻= 2", 3).kind==T.XOR_EQ + @test tok("1 ⊻= 2", 3).kind==K"⊻=" end @testset "lex binary" begin - @test tok("0b0101").kind==T.BIN_INT + @test tok("0b0101").kind==K"BinInt" end @testset "show" begin io = IOBuffer() show(io, collect(tokenize("\"abc\nd\"ef"))[2]) - @test String(take!(io)) == "1-5 STRING " + @test String(take!(io)) == "1-5 String " end -~(tok::T.Token, t::Tuple) = tok.kind == t[1] && untokenize(tok, t[3]) == t[2] +~(tok::Token, t::Tuple) = tok.kind == t[1] && untokenize(tok, t[3]) == t[2] @testset "raw strings" begin str = raw""" str"x $ \ y" """ ts = collect(tokenize(str)) - @test ts[1] ~ (T.WHITESPACE , " " , str) - @test ts[2] ~ (T.IDENTIFIER , "str" , str) - @test ts[3] ~ (T.DQUOTE , "\"" , str) - @test ts[4] ~ (T.STRING , "x \$ \\ y", str) - @test ts[5] ~ (T.DQUOTE , "\"" , str) - @test ts[6] ~ (T.WHITESPACE , " " , str) - @test ts[7] ~ (T.ENDMARKER , "" , str) + @test ts[1] ~ (K"Whitespace" , " " , str) + @test ts[2] ~ (K"Identifier" , "str" , str) + @test ts[3] ~ (K"\"" , "\"" , str) + @test ts[4] ~ (K"String" , "x \$ \\ y", str) + @test ts[5] ~ (K"\"" , "\"" , str) + @test ts[6] ~ (K"Whitespace" , " " , str) + @test ts[7] ~ (K"EndMarker" , "" , str) str = raw"""`x $ \ y`""" ts = collect(tokenize(str)) - @test ts[1] ~ (T.BACKTICK , "`" , str) - @test ts[2] ~ (T.CMD , "x \$ \\ y" , str) - @test ts[3] ~ (T.BACKTICK , "`" , str) - @test ts[4] ~ (T.ENDMARKER , "" , str) + @test ts[1] ~ (K"`" , "`" , str) + @test ts[2] ~ (K"CmdString" , "x \$ \\ y" , str) + @test ts[3] ~ (K"`" , "`" , str) + @test ts[4] ~ (K"EndMarker" , "" , str) # str"\\" str = "str\"\\\\\"" ts = collect(tokenize(str)) - @test ts[1] ~ (T.IDENTIFIER , "str" , str) - @test ts[2] ~ (T.DQUOTE , "\"" , str) - @test ts[3] ~ (T.STRING , "\\\\" , str) - @test ts[4] ~ (T.DQUOTE , "\"" , str) - @test ts[5] ~ (T.ENDMARKER , "" , str) + @test ts[1] ~ (K"Identifier" , "str" , str) + @test ts[2] ~ (K"\"" , "\"" , str) + @test ts[3] ~ (K"String" , "\\\\" , str) + @test ts[4] ~ (K"\"" , "\"" , str) + @test ts[5] ~ (K"EndMarker" , "" , str) # str"\\\"" str = "str\"\\\\\\\"\"" ts = collect(tokenize(str)) - @test ts[1] ~ (T.IDENTIFIER , "str" , str) - @test ts[2] ~ (T.DQUOTE , "\"" , str) - @test ts[3] ~ (T.STRING , "\\\\\\\"" , str) - @test ts[4] ~ (T.DQUOTE , "\"" , str) - @test ts[5] ~ (T.ENDMARKER , "" , str) + @test ts[1] ~ (K"Identifier" , "str" , str) + @test ts[2] ~ (K"\"" , "\"" , str) + @test ts[3] ~ (K"String" , "\\\\\\\"" , str) + @test ts[4] ~ (K"\"" , "\"" , str) + @test ts[5] ~ (K"EndMarker" , "" , str) # Contextual keywords and operators allowed as raw string prefixes str = raw""" var"x $ \ y" """ ts = collect(tokenize(str)) - @test ts[2] ~ (T.VAR , "var", str) - @test ts[4] ~ (T.STRING , "x \$ \\ y", str) + @test ts[2] ~ (K"var" , "var", str) + @test ts[4] ~ (K"String" , "x \$ \\ y", str) str = raw""" outer"x $ \ y" """ ts = collect(tokenize(str)) - @test ts[2] ~ (T.OUTER , "outer", str) - @test ts[4] ~ (T.STRING , "x \$ \\ y", str) + @test ts[2] ~ (K"outer" , "outer", str) + @test ts[4] ~ (K"String" , "x \$ \\ y", str) str = raw""" isa"x $ \ y" """ ts = collect(tokenize(str)) - @test ts[2] ~ (T.ISA , "isa", str) - @test ts[4] ~ (T.STRING , "x \$ \\ y", str) + @test ts[2] ~ (K"isa" , "isa", str) + @test ts[4] ~ (K"String" , "x \$ \\ y", str) end @testset "string escaped newline whitespace" begin str = "\"x\\\n \ty\"" ts = collect(tokenize(str)) - @test ts[1] ~ (T.DQUOTE, "\"", str) - @test ts[2] ~ (T.STRING, "x", str) - @test ts[3] ~ (T.WHITESPACE, "\\\n \t", str) - @test ts[4] ~ (T.STRING, "y", str) - @test ts[5] ~ (T.DQUOTE, "\"", str) + @test ts[1] ~ (K"\"", "\"", str) + @test ts[2] ~ (K"String", "x", str) + @test ts[3] ~ (K"Whitespace", "\\\n \t", str) + @test ts[4] ~ (K"String", "y", str) + @test ts[5] ~ (K"\"", "\"", str) # No newline escape for raw strings str = "r\"x\\\ny\"" ts = collect(tokenize(str)) - @test ts[1] ~ (T.IDENTIFIER , "r", str) - @test ts[2] ~ (T.DQUOTE, "\"", str) - @test ts[3] ~ (T.STRING, "x\\\ny", str) - @test ts[4] ~ (T.DQUOTE , "\"", str) + @test ts[1] ~ (K"Identifier", "r", str) + @test ts[2] ~ (K"\"", "\"", str) + @test ts[3] ~ (K"String", "x\\\ny", str) + @test ts[4] ~ (K"\"", "\"", str) end @testset "triple quoted string line splitting" begin str = "\"\"\"\nx\r\ny\rz\n\r\"\"\"" ts = collect(tokenize(str)) - @test ts[1] ~ (T.TRIPLE_DQUOTE , "\"\"\"", str) - @test ts[2] ~ (T.STRING , "\n", str) - @test ts[3] ~ (T.STRING , "x\r\n", str) - @test ts[4] ~ (T.STRING , "y\r", str) - @test ts[5] ~ (T.STRING , "z\n", str) - @test ts[6] ~ (T.STRING , "\r", str) - @test ts[7] ~ (T.TRIPLE_DQUOTE , "\"\"\"", str) + @test ts[1] ~ (K"\"\"\"" , "\"\"\"", str) + @test ts[2] ~ (K"String" , "\n", str) + @test ts[3] ~ (K"String" , "x\r\n", str) + @test ts[4] ~ (K"String" , "y\r", str) + @test ts[5] ~ (K"String" , "z\n", str) + @test ts[6] ~ (K"String" , "\r", str) + @test ts[7] ~ (K"\"\"\"" , "\"\"\"", str) # Also for raw strings str = "r\"\"\"\nx\ny\"\"\"" ts = collect(tokenize(str)) - @test ts[1] ~ (T.IDENTIFIER , "r", str) - @test ts[2] ~ (T.TRIPLE_DQUOTE , "\"\"\"", str) - @test ts[3] ~ (T.STRING , "\n", str) - @test ts[4] ~ (T.STRING , "x\n", str) - @test ts[5] ~ (T.STRING , "y", str) - @test ts[6] ~ (T.TRIPLE_DQUOTE , "\"\"\"", str) + @test ts[1] ~ (K"Identifier" , "r", str) + @test ts[2] ~ (K"\"\"\"" , "\"\"\"", str) + @test ts[3] ~ (K"String" , "\n", str) + @test ts[4] ~ (K"String" , "x\n", str) + @test ts[5] ~ (K"String" , "y", str) + @test ts[6] ~ (K"\"\"\"" , "\"\"\"", str) end @testset "interpolation" begin @testset "basic" begin str = "\"\$x \$y\"" ts = collect(tokenize(str)) - @test ts[1] ~ (T.DQUOTE , "\"", str) - @test ts[2] ~ (T.EX_OR , "\$", str) - @test ts[3] ~ (T.IDENTIFIER , "x" , str) - @test ts[4] ~ (T.STRING , " " , str) - @test ts[5] ~ (T.EX_OR , "\$", str) - @test ts[6] ~ (T.IDENTIFIER , "y" , str) - @test ts[7] ~ (T.DQUOTE , "\"", str) - @test ts[8] ~ (T.ENDMARKER , "" , str) + @test ts[1] ~ (K"\"" , "\"", str) + @test ts[2] ~ (K"$" , "\$", str) + @test ts[3] ~ (K"Identifier" , "x" , str) + @test ts[4] ~ (K"String" , " " , str) + @test ts[5] ~ (K"$" , "\$", str) + @test ts[6] ~ (K"Identifier" , "y" , str) + @test ts[7] ~ (K"\"" , "\"", str) + @test ts[8] ~ (K"EndMarker" , "" , str) end @testset "nested" begin str = """"str: \$(g("str: \$(h("str"))"))" """ ts = collect(tokenize(str)) @test length(ts) == 23 - @test ts[1] ~ (T.DQUOTE , "\"" , str) - @test ts[2] ~ (T.STRING , "str: ", str) - @test ts[3] ~ (T.EX_OR , "\$" , str) - @test ts[4] ~ (T.LPAREN , "(" , str) - @test ts[5] ~ (T.IDENTIFIER, "g" , str) - @test ts[6] ~ (T.LPAREN , "(" , str) - @test ts[7] ~ (T.DQUOTE , "\"" , str) - @test ts[8] ~ (T.STRING , "str: ", str) - @test ts[9] ~ (T.EX_OR , "\$" , str) - @test ts[10] ~ (T.LPAREN , "(" , str) - @test ts[11] ~ (T.IDENTIFIER, "h" , str) - @test ts[12] ~ (T.LPAREN , "(" , str) - @test ts[13] ~ (T.DQUOTE , "\"" , str) - @test ts[14] ~ (T.STRING , "str" , str) - @test ts[15] ~ (T.DQUOTE , "\"" , str) - @test ts[16] ~ (T.RPAREN , ")" , str) - @test ts[17] ~ (T.RPAREN , ")" , str) - @test ts[18] ~ (T.DQUOTE , "\"" , str) - @test ts[19] ~ (T.RPAREN , ")" , str) - @test ts[20] ~ (T.RPAREN , ")" , str) - @test ts[21] ~ (T.DQUOTE , "\"" , str) - @test ts[22] ~ (T.WHITESPACE, " " , str) - @test ts[23] ~ (T.ENDMARKER , "" , str) + @test ts[1] ~ (K"\"" , "\"" , str) + @test ts[2] ~ (K"String" , "str: ", str) + @test ts[3] ~ (K"$" , "\$" , str) + @test ts[4] ~ (K"(" , "(" , str) + @test ts[5] ~ (K"Identifier", "g" , str) + @test ts[6] ~ (K"(" , "(" , str) + @test ts[7] ~ (K"\"" , "\"" , str) + @test ts[8] ~ (K"String" , "str: ", str) + @test ts[9] ~ (K"$" , "\$" , str) + @test ts[10] ~ (K"(" , "(" , str) + @test ts[11] ~ (K"Identifier", "h" , str) + @test ts[12] ~ (K"(" , "(" , str) + @test ts[13] ~ (K"\"" , "\"" , str) + @test ts[14] ~ (K"String" , "str" , str) + @test ts[15] ~ (K"\"" , "\"" , str) + @test ts[16] ~ (K")" , ")" , str) + @test ts[17] ~ (K")" , ")" , str) + @test ts[18] ~ (K"\"" , "\"" , str) + @test ts[19] ~ (K")" , ")" , str) + @test ts[20] ~ (K")" , ")" , str) + @test ts[21] ~ (K"\"" , "\"" , str) + @test ts[22] ~ (K"Whitespace", " " , str) + @test ts[23] ~ (K"EndMarker" , "" , str) end @testset "duplicate \$" begin str = "\"\$\$\"" ts = collect(tokenize(str)) - @test ts[1] ~ (T.DQUOTE , "\"", str) - @test ts[2] ~ (T.EX_OR , "\$", str) - @test ts[3] ~ (T.EX_OR , "\$", str) - @test ts[4] ~ (T.DQUOTE , "\"", str) - @test ts[5] ~ (T.ENDMARKER , "" , str) + @test ts[1] ~ (K"\"" , "\"", str) + @test ts[2] ~ (K"$" , "\$", str) + @test ts[3] ~ (K"$" , "\$", str) + @test ts[4] ~ (K"\"" , "\"", str) + @test ts[5] ~ (K"EndMarker" , "" , str) end @testset "Unmatched parens" begin # issue 73: https://github.com/JuliaLang/Tokenize.jl/issues/73 str = "\"\$(fdsf\"" ts = collect(tokenize(str)) - @test ts[1] ~ (T.DQUOTE , "\"" , str) - @test ts[2] ~ (T.EX_OR , "\$" , str) - @test ts[3] ~ (T.LPAREN , "(" , str) - @test ts[4] ~ (T.IDENTIFIER , "fdsf" , str) - @test ts[5] ~ (T.DQUOTE , "\"" , str) - @test ts[6] ~ (T.ENDMARKER , "" , str) + @test ts[1] ~ (K"\"" , "\"" , str) + @test ts[2] ~ (K"$" , "\$" , str) + @test ts[3] ~ (K"(" , "(" , str) + @test ts[4] ~ (K"Identifier" , "fdsf" , str) + @test ts[5] ~ (K"\"" , "\"" , str) + @test ts[6] ~ (K"EndMarker" , "" , str) end @testset "Unicode" begin # issue 178: https://github.com/JuliaLang/Tokenize.jl/issues/178 str = """ "\$uₕx \$(uₕx - ux)" """ ts = collect(tokenize(str)) - @test ts[ 1] ~ (T.WHITESPACE , " " , str) - @test ts[ 2] ~ (T.DQUOTE , "\"" , str) - @test ts[ 3] ~ (T.EX_OR , "\$" , str) - @test ts[ 4] ~ (T.IDENTIFIER , "uₕx" , str) - @test ts[ 5] ~ (T.STRING , " " , str) - @test ts[ 6] ~ (T.EX_OR , "\$" , str) - @test ts[ 7] ~ (T.LPAREN , "(" , str) - @test ts[ 8] ~ (T.IDENTIFIER , "uₕx" , str) - @test ts[ 9] ~ (T.WHITESPACE , " " , str) - @test ts[10] ~ (T.MINUS , "-" , str) - @test ts[11] ~ (T.WHITESPACE , " " , str) - @test ts[12] ~ (T.IDENTIFIER , "ux" , str) - @test ts[13] ~ (T.RPAREN , ")" , str) - @test ts[14] ~ (T.DQUOTE , "\"" , str) - @test ts[15] ~ (T.WHITESPACE , " " , str) - @test ts[16] ~ (T.ENDMARKER , "" , str) + @test ts[ 1] ~ (K"Whitespace" , " " , str) + @test ts[ 2] ~ (K"\"" , "\"" , str) + @test ts[ 3] ~ (K"$" , "\$" , str) + @test ts[ 4] ~ (K"Identifier" , "uₕx" , str) + @test ts[ 5] ~ (K"String" , " " , str) + @test ts[ 6] ~ (K"$" , "\$" , str) + @test ts[ 7] ~ (K"(" , "(" , str) + @test ts[ 8] ~ (K"Identifier" , "uₕx" , str) + @test ts[ 9] ~ (K"Whitespace" , " " , str) + @test ts[10] ~ (K"-" , "-" , str) + @test ts[11] ~ (K"Whitespace" , " " , str) + @test ts[12] ~ (K"Identifier" , "ux" , str) + @test ts[13] ~ (K")" , ")" , str) + @test ts[14] ~ (K"\"" , "\"" , str) + @test ts[15] ~ (K"Whitespace" , " " , str) + @test ts[16] ~ (K"EndMarker" , "" , str) end @testset "var\"...\" disabled in interpolations" begin str = """ "\$var"x" " """ ts = collect(tokenize(str)) - @test ts[ 1] ~ (T.WHITESPACE , " " , str) - @test ts[ 2] ~ (T.DQUOTE , "\"" , str) - @test ts[ 3] ~ (T.EX_OR , "\$" , str) - @test ts[ 4] ~ (T.VAR , "var" , str) - @test ts[ 5] ~ (T.DQUOTE , "\"" , str) - @test ts[ 6] ~ (T.IDENTIFIER , "x" , str) - @test ts[ 7] ~ (T.DQUOTE , "\"" , str) - @test ts[ 8] ~ (T.STRING , " " , str) - @test ts[ 9] ~ (T.DQUOTE , "\"" , str) - @test ts[10] ~ (T.WHITESPACE , " " , str) - @test ts[11] ~ (T.ENDMARKER , "" , str) + @test ts[ 1] ~ (K"Whitespace" , " " , str) + @test ts[ 2] ~ (K"\"" , "\"" , str) + @test ts[ 3] ~ (K"$" , "\$" , str) + @test ts[ 4] ~ (K"var" , "var" , str) + @test ts[ 5] ~ (K"\"" , "\"" , str) + @test ts[ 6] ~ (K"Identifier" , "x" , str) + @test ts[ 7] ~ (K"\"" , "\"" , str) + @test ts[ 8] ~ (K"String" , " " , str) + @test ts[ 9] ~ (K"\"" , "\"" , str) + @test ts[10] ~ (K"Whitespace" , " " , str) + @test ts[11] ~ (K"EndMarker" , "" , str) end @testset "invalid chars after identifier" begin str = """ "\$x෴" """ ts = collect(tokenize(str)) - @test ts[4] ~ (T.IDENTIFIER , "x" , str) - @test ts[5] ~ (T.INVALID_INTERPOLATION_TERMINATOR , "" , str) - @test ts[6] ~ (T.STRING , "෴" , str) + @test ts[4] ~ (K"Identifier" , "x" , str) + @test ts[5] ~ (K"ErrorInvalidInterpolationTerminator" , "" , str) + @test ts[6] ~ (K"String" , "෴" , str) @test Tokens.iserror(ts[5].kind) - @test ts[5].kind == Tokens.INVALID_INTERPOLATION_TERMINATOR + @test ts[5].kind == K"ErrorInvalidInterpolationTerminator" end end @@ -512,134 +531,134 @@ end end @testset "modifying function names (!) followed by operator" begin - @test tok("a!=b", 2).kind == Tokens.NOT_EQ - @test tok("a!!=b", 2).kind == Tokens.NOT_EQ - @test tok("!=b", 1).kind == Tokens.NOT_EQ + @test tok("a!=b", 2).kind == K"!=" + @test tok("a!!=b", 2).kind == K"!=" + @test tok("!=b", 1).kind == K"!=" end @testset "lex integers" begin - @test tok("1234").kind == T.INTEGER - @test tok("12_34").kind == T.INTEGER - @test tok("_1234").kind == T.IDENTIFIER - @test tok("1234_").kind == T.INTEGER - @test tok("1234_", 2).kind == T.IDENTIFIER - @test tok("1234x").kind == T.INTEGER - @test tok("1234x", 2).kind == T.IDENTIFIER + @test kind(tok("1234")) == K"Integer" + @test kind(tok("12_34")) == K"Integer" + @test kind(tok("_1234")) == K"Identifier" + @test kind(tok("1234_")) == K"Integer" + @test kind(tok("1234_", 2)) == K"Identifier" + @test kind(tok("1234x")) == K"Integer" + @test kind(tok("1234x", 2)) == K"Identifier" end @testset "floats with trailing `.` " begin - @test tok("1.0").kind == Tokens.FLOAT - @test tok("1.a").kind == Tokens.FLOAT - @test tok("1.(").kind == Tokens.FLOAT - @test tok("1.[").kind == Tokens.FLOAT - @test tok("1.{").kind == Tokens.FLOAT - @test tok("1.)").kind == Tokens.FLOAT - @test tok("1.]").kind == Tokens.FLOAT - @test tok("1.{").kind == Tokens.FLOAT - @test tok("1.,").kind == Tokens.FLOAT - @test tok("1.;").kind == Tokens.FLOAT - @test tok("1.@").kind == Tokens.FLOAT - @test tok("1.").kind == Tokens.FLOAT - @test tok("1.\"text\" ").kind == Tokens.FLOAT + @test tok("1.0").kind == K"Float" + @test tok("1.a").kind == K"Float" + @test tok("1.(").kind == K"Float" + @test tok("1.[").kind == K"Float" + @test tok("1.{").kind == K"Float" + @test tok("1.)").kind == K"Float" + @test tok("1.]").kind == K"Float" + @test tok("1.{").kind == K"Float" + @test tok("1.,").kind == K"Float" + @test tok("1.;").kind == K"Float" + @test tok("1.@").kind == K"Float" + @test tok("1.").kind == K"Float" + @test tok("1.\"text\" ").kind == K"Float" - @test tok("1..").kind == Tokens.INTEGER - @test T.kind.(collect(tokenize("1f0./1"))) == [T.FLOAT, T.OP, T.INTEGER, T.ENDMARKER] + @test tok("1..").kind == K"Integer" + @test kind.(collect(tokenize("1f0./1"))) == [K"Float", K"/", K"Integer", K"EndMarker"] end @testset "lex octal" begin - @test tok("0o0167").kind == T.OCT_INT + @test tok("0o0167").kind == K"OctInt" end @testset "lex float/bin/hex/oct w underscores" begin - @test tok("1_1.11").kind == T.FLOAT - @test tok("11.1_1").kind == T.FLOAT - @test tok("1_1.1_1").kind == T.FLOAT - @test tok("_1.1_1", 1).kind == T.IDENTIFIER - @test tok("_1.1_1", 2).kind == T.FLOAT - @test tok("0x0167_032").kind == T.HEX_INT - @test tok("0b0101001_0100_0101").kind == T.BIN_INT - @test tok("0o01054001_0100_0101").kind == T.OCT_INT - @test T.kind.(collect(tokenize("1.2."))) == [T.ERROR, T.ENDMARKER] - @test tok("1__2").kind == T.INTEGER - @test tok("1.2_3").kind == T.FLOAT - @test tok("1.2_3", 2).kind == T.ENDMARKER - @test T.kind.(collect(tokenize("3e2_2"))) == [T.FLOAT, T.IDENTIFIER, T.ENDMARKER] - @test T.kind.(collect(tokenize("1__2"))) == [T.INTEGER, T.IDENTIFIER, T.ENDMARKER] - @test T.kind.(collect(tokenize("0x2_0_2"))) == [T.HEX_INT, T.ENDMARKER] - @test T.kind.(collect(tokenize("0x2__2"))) == [T.HEX_INT, T.IDENTIFIER, T.ENDMARKER] - @test T.kind.(collect(tokenize("3_2.5_2"))) == [T.FLOAT, T.ENDMARKER] - @test T.kind.(collect(tokenize("3.2e2.2"))) == [T.ERROR, T.INTEGER, T.ENDMARKER] - @test T.kind.(collect(tokenize("3e2.2"))) == [T.ERROR, T.INTEGER, T.ENDMARKER] - @test T.kind.(collect(tokenize("0b101__101"))) == [T.BIN_INT, T.IDENTIFIER, T.ENDMARKER] + @test tok("1_1.11").kind == K"Float" + @test tok("11.1_1").kind == K"Float" + @test tok("1_1.1_1").kind == K"Float" + @test tok("_1.1_1", 1).kind == K"Identifier" + @test tok("_1.1_1", 2).kind == K"Float" + @test tok("0x0167_032").kind == K"HexInt" + @test tok("0b0101001_0100_0101").kind == K"BinInt" + @test tok("0o01054001_0100_0101").kind == K"OctInt" + @test kind.(collect(tokenize("1.2."))) == [K"ErrorInvalidNumericConstant", K"EndMarker"] + @test tok("1__2").kind == K"Integer" + @test tok("1.2_3").kind == K"Float" + @test tok("1.2_3", 2).kind == K"EndMarker" + @test kind.(collect(tokenize("3e2_2"))) == [K"Float", K"Identifier", K"EndMarker"] + @test kind.(collect(tokenize("1__2"))) == [K"Integer", K"Identifier", K"EndMarker"] + @test kind.(collect(tokenize("0x2_0_2"))) == [K"HexInt", K"EndMarker"] + @test kind.(collect(tokenize("0x2__2"))) == [K"HexInt", K"Identifier", K"EndMarker"] + @test kind.(collect(tokenize("3_2.5_2"))) == [K"Float", K"EndMarker"] + @test kind.(collect(tokenize("3.2e2.2"))) == [K"ErrorInvalidNumericConstant", K"Integer", K"EndMarker"] + @test kind.(collect(tokenize("3e2.2"))) == [K"ErrorInvalidNumericConstant", K"Integer", K"EndMarker"] + @test kind.(collect(tokenize("0b101__101"))) == [K"BinInt", K"Identifier", K"EndMarker"] end @testset "floating points" begin - @test tok("1.0e0").kind == Tokens.FLOAT - @test tok("1.0e-0").kind == Tokens.FLOAT - @test tok("1.0E0").kind == Tokens.FLOAT - @test tok("1.0E-0").kind == Tokens.FLOAT - @test tok("1.0f0").kind == Tokens.FLOAT - @test tok("1.0f-0").kind == Tokens.FLOAT - - @test tok("0e0").kind == Tokens.FLOAT - @test tok("0e+0").kind == Tokens.FLOAT - @test tok("0E0").kind == Tokens.FLOAT - @test tok("201E+0").kind == Tokens.FLOAT - @test tok("2f+0").kind == Tokens.FLOAT - @test tok("2048f0").kind == Tokens.FLOAT - @test tok("1.:0").kind == Tokens.FLOAT - @test tok("0x00p2").kind == Tokens.FLOAT - @test tok("0x00P2").kind == Tokens.FLOAT - @test tok("0x0.00p23").kind == Tokens.FLOAT - @test tok("0x0.0ap23").kind == Tokens.FLOAT - @test tok("0x0.0_0p2").kind == Tokens.FLOAT - @test tok("0x0_0_0.0_0p2").kind == Tokens.FLOAT - @test tok("0x0p+2").kind == Tokens.FLOAT - @test tok("0x0p-2").kind == Tokens.FLOAT + @test tok("1.0e0").kind == K"Float" + @test tok("1.0e-0").kind == K"Float" + @test tok("1.0E0").kind == K"Float" + @test tok("1.0E-0").kind == K"Float" + @test tok("1.0f0").kind == K"Float" + @test tok("1.0f-0").kind == K"Float" + + @test tok("0e0").kind == K"Float" + @test tok("0e+0").kind == K"Float" + @test tok("0E0").kind == K"Float" + @test tok("201E+0").kind == K"Float" + @test tok("2f+0").kind == K"Float" + @test tok("2048f0").kind == K"Float" + @test tok("1.:0").kind == K"Float" + @test tok("0x00p2").kind == K"Float" + @test tok("0x00P2").kind == K"Float" + @test tok("0x0.00p23").kind == K"Float" + @test tok("0x0.0ap23").kind == K"Float" + @test tok("0x0.0_0p2").kind == K"Float" + @test tok("0x0_0_0.0_0p2").kind == K"Float" + @test tok("0x0p+2").kind == K"Float" + @test tok("0x0p-2").kind == K"Float" # Floating point with \minus rather than - - @test tok("1.0e−0").kind == Tokens.FLOAT - @test tok("1.0f−0").kind == Tokens.FLOAT - @test tok("0x0p−2").kind == Tokens.FLOAT + @test tok("1.0e−0").kind == K"Float" + @test tok("1.0f−0").kind == K"Float" + @test tok("0x0p−2").kind == K"Float" end @testset "1e1" begin - @test tok("1e", 1).kind == Tokens.INTEGER - @test tok("1e", 2).kind == Tokens.IDENTIFIER + @test tok("1e", 1).kind == K"Integer" + @test tok("1e", 2).kind == K"Identifier" end @testset "jl06types" begin - @test tok("mutable").kind == Tokens.MUTABLE - @test tok("primitive").kind == Tokens.PRIMITIVE - @test tok("struct").kind == Tokens.STRUCT - @test tok("where").kind == Tokens.WHERE - @test tok("mutable struct s{T} where T", 1).kind == Tokens.MUTABLE - @test tok("mutable struct s{T} where T", 3).kind == Tokens.STRUCT - @test tok("mutable struct s{T} where T", 10).kind == Tokens.WHERE + @test tok("mutable").kind == K"mutable" + @test tok("primitive").kind == K"primitive" + @test tok("struct").kind == K"struct" + @test tok("where").kind == K"where" + @test tok("mutable struct s{T} where T", 1).kind == K"mutable" + @test tok("mutable struct s{T} where T", 3).kind == K"struct" + @test tok("mutable struct s{T} where T", 10).kind == K"where" end @testset "CMDs" begin - @test tok("`cmd`",1).kind == T.BACKTICK - @test tok("`cmd`",2).kind == T.CMD - @test tok("`cmd`",3).kind == T.BACKTICK - @test tok("`cmd`",4).kind == T.ENDMARKER - @test tok("```cmd```", 1).kind == T.TRIPLE_BACKTICK - @test tok("```cmd```", 2).kind == T.CMD - @test tok("```cmd```", 3).kind == T.TRIPLE_BACKTICK - @test tok("```cmd```", 4).kind == T.ENDMARKER - @test tok("```cmd````cmd`", 1).kind == T.TRIPLE_BACKTICK - @test tok("```cmd````cmd`", 2).kind == T.CMD - @test tok("```cmd````cmd`", 3).kind == T.TRIPLE_BACKTICK - @test tok("```cmd````cmd`", 4).kind == T.BACKTICK - @test tok("```cmd````cmd`", 5).kind == T.CMD - @test tok("```cmd````cmd`", 6).kind == T.BACKTICK - @test tok("```cmd````cmd`", 7).kind == T.ENDMARKER + @test tok("`cmd`",1).kind == K"`" + @test tok("`cmd`",2).kind == K"CmdString" + @test tok("`cmd`",3).kind == K"`" + @test tok("`cmd`",4).kind == K"EndMarker" + @test tok("```cmd```", 1).kind == K"```" + @test tok("```cmd```", 2).kind == K"CmdString" + @test tok("```cmd```", 3).kind == K"```" + @test tok("```cmd```", 4).kind == K"EndMarker" + @test tok("```cmd````cmd`", 1).kind == K"```" + @test tok("```cmd````cmd`", 2).kind == K"CmdString" + @test tok("```cmd````cmd`", 3).kind == K"```" + @test tok("```cmd````cmd`", 4).kind == K"`" + @test tok("```cmd````cmd`", 5).kind == K"CmdString" + @test tok("```cmd````cmd`", 6).kind == K"`" + @test tok("```cmd````cmd`", 7).kind == K"EndMarker" end @testset "where" begin - @test tok("a where b", 3).kind == T.WHERE + @test tok("a where b", 3).kind == K"where" end @testset "IO position" begin @@ -649,11 +668,11 @@ end end @testset "hex/bin/octal errors" begin - @test tok("0x").kind == T.INVALID_NUMERIC_CONSTANT - @test tok("0b").kind == T.INVALID_NUMERIC_CONSTANT - @test tok("0o").kind == T.INVALID_NUMERIC_CONSTANT - @test tok("0x 2", 1).kind == T.INVALID_NUMERIC_CONSTANT - @test tok("0x.1p1").kind == T.FLOAT + @test tok("0x").kind == K"ErrorInvalidNumericConstant" + @test tok("0b").kind == K"ErrorInvalidNumericConstant" + @test tok("0o").kind == K"ErrorInvalidNumericConstant" + @test tok("0x 2", 1).kind == K"ErrorInvalidNumericConstant" + @test tok("0x.1p1").kind == K"Float" end @@ -682,9 +701,14 @@ for op in ops if VERSION < v"1.7" && str == "a .&& b" expr = Expr(Symbol(".&&"), :a, :b) end + if str in (".>:b", ".<:b") + # HACK! See https://github.com/JuliaLang/JuliaSyntax.jl/issues/38 + continue + end if expr isa Expr && (expr.head != :error && expr.head != :incomplete) tokens = collect(tokenize(str)) exop = expr.head == :call ? expr.args[1] : expr.head + #println(str) @test Symbol(Tokenize.Tokens.untokenize(tokens[arity == 1 ? 1 : 3], str)) == exop else break @@ -696,23 +720,23 @@ end @testset "Normalization of Unicode symbols" begin # https://github.com/JuliaLang/julia/pull/25157 - @test tok("\u00b7").kind == T.UNICODE_DOT - @test tok("\u0387").kind == T.UNICODE_DOT + @test tok("\u00b7").kind == K"⋅" + @test tok("\u0387").kind == K"⋅" @test tok(".\u00b7").dotop @test tok(".\u0387").dotop # https://github.com/JuliaLang/julia/pull/40948 - @test tok("−").kind == T.MINUS - @test tok("−=").kind == T.MINUS_EQ + @test tok("−").kind == K"-" + @test tok("−=").kind == K"-=" @test tok(".−").dotop end @testset "perp" begin - @test tok("1 ⟂ 2", 3).kind==T.PERP + @test tok("1 ⟂ 2", 3).kind==K"⟂" end @testset "outer" begin - @test tok("outer", 1).kind==T.OUTER + @test tok("outer", 1).kind==K"outer" end function test_error(tok, kind) @@ -721,18 +745,18 @@ function test_error(tok, kind) end @testset "token errors" begin - test_error(tok("1.2e2.3",1), Tokens.INVALID_NUMERIC_CONSTANT) - test_error(tok("1.2.",1), Tokens.INVALID_NUMERIC_CONSTANT) - test_error(tok("1.2.f",1), Tokens.INVALID_NUMERIC_CONSTANT) - test_error(tok("0xv",1), Tokens.INVALID_NUMERIC_CONSTANT) - test_error(tok("0b3",1), Tokens.INVALID_NUMERIC_CONSTANT) - test_error(tok("0op",1), Tokens.INVALID_NUMERIC_CONSTANT) - test_error(tok("--",1), Tokens.INVALID_OPERATOR) - test_error(tok("1**2",2), Tokens.INVALID_OPERATOR) + test_error(tok("1.2e2.3",1), K"ErrorInvalidNumericConstant") + test_error(tok("1.2.",1), K"ErrorInvalidNumericConstant") + test_error(tok("1.2.f",1), K"ErrorInvalidNumericConstant") + test_error(tok("0xv",1), K"ErrorInvalidNumericConstant") + test_error(tok("0b3",1), K"ErrorInvalidNumericConstant") + test_error(tok("0op",1), K"ErrorInvalidNumericConstant") + test_error(tok("--",1), K"ErrorInvalidOperator") + test_error(tok("1**2",2), K"ErrorInvalidOperator") end @testset "hat suffix" begin - @test tok("ŝ", 1).kind==Tokens.IDENTIFIER + @test tok("ŝ", 1).kind==K"Identifier" @test untokenize(collect(tokenize("ŝ"))[1], "ŝ") == "ŝ" end @@ -744,12 +768,12 @@ end @testset "invalid float juxt" begin s = "1.+2" - @test tok(s, 1).kind == Tokens.ERROR + @test tok(s, 1).kind == K"error" @test Tokens.isoperator(tok(s, 2).kind) - test_roundtrip("1234.+1", Tokens.ERROR, "1234.") - @test tok("1.+ ").kind == Tokens.ERROR - @test tok("1.⤋").kind == Tokens.ERROR - @test tok("1.?").kind == Tokens.ERROR + test_roundtrip("1234.+1", K"error", "1234.") + @test tok("1.+ ").kind == K"error" + @test tok("1.⤋").kind == K"error" + @test tok("1.?").kind == K"error" end @testset "comments" begin @@ -759,21 +783,21 @@ end @testset "invalid hexadecimal" begin s = "0x." - tok(s, 1).kind === Tokens.ERROR + tok(s, 1).kind === K"error" end @testset "circ arrow right op" begin s = "↻" - @test collect(tokenize(s))[1].kind == Tokens.CIRCLE_ARROW_RIGHT + @test collect(tokenize(s))[1].kind == K"↻" end @testset "invalid float" begin s = ".0." - @test collect(tokenize(s))[1].kind == Tokens.INVALID_NUMERIC_CONSTANT + @test collect(tokenize(s))[1].kind == K"ErrorInvalidNumericConstant" end @testset "allow prime after end" begin - @test tok("begin end'", 4).kind === Tokens.PRIME + @test tok("begin end'", 4).kind === K"'" end @testset "new ops" begin @@ -869,14 +893,14 @@ end @testset "UTF-8 BOM" begin - @test Tokenize.Tokens.kind.(collect(tokenize("\ufeff[1\ufeff2]"))) == [ - Tokens.WHITESPACE, - Tokens.LSQUARE, - Tokens.INTEGER, - Tokens.WHITESPACE, - Tokens.INTEGER, - Tokens.RSQUARE, - Tokens.ENDMARKER + @test kind.(collect(tokenize("\ufeff[1\ufeff2]"))) == [ + K"Whitespace", + K"[", + K"Integer", + K"Whitespace", + K"Integer", + K"]", + K"EndMarker" ] end @@ -891,3 +915,5 @@ end @test strtok("a &&̄ b") == ["a", " ", "&&", "̄", " ", "b", ""] @test strtok("a .&&₁ b") == ["a", " ", ".&&", "₁", " ", "b", ""] end + +end From 899374057bacdece27f72711e13d326889592150 Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Tue, 9 Aug 2022 07:02:37 +1000 Subject: [PATCH 0450/1109] Add to sysimage gitignore --- JuliaSyntax/sysimage/.gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/JuliaSyntax/sysimage/.gitignore b/JuliaSyntax/sysimage/.gitignore index 56c79c0509880..344cdc510a371 100644 --- a/JuliaSyntax/sysimage/.gitignore +++ b/JuliaSyntax/sysimage/.gitignore @@ -1,2 +1,3 @@ +JuliaSyntax Project.toml Manifest.toml From 93070aad0b8a96897d07dd4e808f502a413df91c Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Wed, 10 Aug 2022 09:18:14 +1000 Subject: [PATCH 0451/1109] Consolidate lexer code & dedupe Kind predicates * Consolidate all Kind-based predicates into kind.jl and deduplicate the functions from Tokenize which are duplicated in JuliaSyntax. Minor restyling of the names here for consistency with the other predicates in JuliaSyntax.jl. * Move all of Tokenize into a pair of files and remove the sub-modules as these don't really seem to achieve much now that we use K_str and don't need a namespace to contain the token kinds. --- JuliaSyntax/src/JuliaSyntax.jl | 5 +- JuliaSyntax/src/Tokenize/Tokenize.jl | 13 - JuliaSyntax/src/Tokenize/lexer.jl | 71 ++- JuliaSyntax/src/Tokenize/token.jl | 65 --- JuliaSyntax/src/Tokenize/token_kinds.jl | 680 ----------------------- JuliaSyntax/src/Tokenize/utilities.jl | 682 ++++++++++++++++++++++++ JuliaSyntax/src/kinds.jl | 106 ++++ JuliaSyntax/src/parse_stream.jl | 11 +- JuliaSyntax/src/tokens.jl | 120 ----- JuliaSyntax/test/lexer.jl | 39 +- JuliaSyntax/test/runtests.jl | 3 - 11 files changed, 873 insertions(+), 922 deletions(-) delete mode 100644 JuliaSyntax/src/Tokenize/token_kinds.jl delete mode 100644 JuliaSyntax/src/tokens.jl diff --git a/JuliaSyntax/src/JuliaSyntax.jl b/JuliaSyntax/src/JuliaSyntax.jl index 4aeb6cbb314df..2b0986d254803 100644 --- a/JuliaSyntax/src/JuliaSyntax.jl +++ b/JuliaSyntax/src/JuliaSyntax.jl @@ -6,11 +6,10 @@ using Mmap include("utils.jl") include("kinds.jl") + # Lexing uses a significantly modified version of Tokenize.jl include("Tokenize/Tokenize.jl") -using .Tokenize.Tokens: Token -const TzTokens = Tokenize.Tokens -include("tokens.jl") +using .Tokenize: Token # Source and diagnostics include("source_files.jl") diff --git a/JuliaSyntax/src/Tokenize/Tokenize.jl b/JuliaSyntax/src/Tokenize/Tokenize.jl index 548d8a91c8f73..fb49626dd9f89 100644 --- a/JuliaSyntax/src/Tokenize/Tokenize.jl +++ b/JuliaSyntax/src/Tokenize/Tokenize.jl @@ -1,15 +1,2 @@ -module Tokenize - -if isdefined(Base, :Experimental) && isdefined(Base.Experimental, Symbol("@optlevel")) - @eval Base.Experimental.@optlevel 1 -end - -include("token.jl") include("lexer.jl") -import .Lexers: tokenize -import .Tokens: untokenize - -export tokenize, untokenize, Tokens - -end # module diff --git a/JuliaSyntax/src/Tokenize/lexer.jl b/JuliaSyntax/src/Tokenize/lexer.jl index b4e50dc677938..1537b772e4cf2 100644 --- a/JuliaSyntax/src/Tokenize/lexer.jl +++ b/JuliaSyntax/src/Tokenize/lexer.jl @@ -1,12 +1,61 @@ -module Lexers +module Tokenize -import ..Tokens -import ..Tokens: @K_str, Token, Kind, UNICODE_OPS, EMPTY_TOKEN, - isliteral, iserror, iscontextualkeyword, iswordoperator +export tokenize, untokenize, Tokens + +using ..JuliaSyntax: Kind, @K_str + +import ..JuliaSyntax: kind, + is_literal, is_error, is_contextual_keyword, is_word_operator + +import Base.eof include("utilities.jl") -export tokenize +#------------------------------------------------------------------------------- +# Tokens + +# Error kind => description +TOKEN_ERROR_DESCRIPTION = Dict{Kind, String}( + K"ErrorEofMultiComment" => "unterminated multi-line comment #= ... =#", + K"ErrorEofChar" => "unterminated character literal", + K"ErrorInvalidNumericConstant" => "invalid numeric constant", + K"ErrorInvalidOperator" => "invalid operator", + K"ErrorInvalidInterpolationTerminator" => "interpolated variable ends with invalid character; use `\$(...)` instead", + K"error" => "unknown error", +) + +struct Token + kind::Kind + # Offsets into a string or buffer + startbyte::Int # The byte where the token start in the buffer + endbyte::Int # The byte where the token ended in the buffer + dotop::Bool + suffix::Bool +end +function Token(kind::Kind, startbyte::Int, endbyte::Int) + Token(kind, startbyte, endbyte, false, false) +end +Token() = Token(K"error", 0, 0, false, false) + +const EMPTY_TOKEN = Token() + +kind(t::Token) = t.kind + +startbyte(t::Token) = t.startbyte +endbyte(t::Token) = t.endbyte + + +function untokenize(t::Token, str::String) + String(codeunits(str)[1 .+ (t.startbyte:t.endbyte)]) +end + +function Base.show(io::IO, t::Token) + print(io, rpad(string(startbyte(t), "-", endbyte(t)), 11, " ")) + print(io, rpad(kind(t), 15, " ")) +end + +#------------------------------------------------------------------------------- +# Lexer @inline ishex(c::Char) = isdigit(c) || ('a' <= c <= 'f') || ('A' <= c <= 'F') @inline isbinary(c::Char) = c == '0' || c == '1' @@ -266,7 +315,7 @@ Returns an `K"error"` token with error `err` and starts a new `Token`. """ function emit_error(l::Lexer, err::Kind = K"error") l.errored = true - @assert iserror(err) + @assert is_error(err) return emit(l, err) end @@ -838,14 +887,14 @@ end function lex_prime(l, doemit = true) if l.last_token == K"Identifier" || - iscontextualkeyword(l.last_token) || - iswordoperator(l.last_token) || + is_contextual_keyword(l.last_token) || + is_word_operator(l.last_token) || l.last_token == K"." || l.last_token == K")" || l.last_token == K"]" || l.last_token == K"}" || l.last_token == K"'" || - l.last_token == K"end" || isliteral(l.last_token) + l.last_token == K"end" || is_literal(l.last_token) return emit(l, K"'") else if accept(l, '\'') @@ -888,8 +937,8 @@ end # A '"' has been consumed function lex_quote(l::Lexer) raw = l.last_token == K"Identifier" || - iscontextualkeyword(l.last_token) || - iswordoperator(l.last_token) + is_contextual_keyword(l.last_token) || + is_word_operator(l.last_token) pc, dpc = dpeekchar(l) triplestr = pc == '"' && dpc == '"' push!(l.string_states, StringState(triplestr, raw, '"', 0)) diff --git a/JuliaSyntax/src/Tokenize/token.jl b/JuliaSyntax/src/Tokenize/token.jl index 709b706914fcb..e69de29bb2d1d 100644 --- a/JuliaSyntax/src/Tokenize/token.jl +++ b/JuliaSyntax/src/Tokenize/token.jl @@ -1,65 +0,0 @@ -module Tokens - -using ...JuliaSyntax: Kind, @K_str - -import Base.eof - -export Token - -include("token_kinds.jl") - - -iskeyword(k::Kind) = K"BEGIN_KEYWORDS" < k < K"END_KEYWORDS" -isliteral(k::Kind) = K"BEGIN_LITERAL" < k < K"END_LITERAL" -isoperator(k::Kind) = K"BEGIN_OPS" < k < K"END_OPS" -iserror(k::Kind) = K"BEGIN_ERRORS" < k < K"END_ERRORS" -iscontextualkeyword(k::Kind) = K"BEGIN_CONTEXTUAL_KEYWORDS" < k < K"END_CONTEXTUAL_KEYWORDS" - -function iswordoperator(k::Kind) - # Keyword-like operators - k == K"in" || - k == K"isa" || - k == K"where" -end - -# Error kind => description -TOKEN_ERROR_DESCRIPTION = Dict{Kind, String}( - K"ErrorEofMultiComment" => "unterminated multi-line comment #= ... =#", - K"ErrorEofChar" => "unterminated character literal", - K"ErrorInvalidNumericConstant" => "invalid numeric constant", - K"ErrorInvalidOperator" => "invalid operator", - K"ErrorInvalidInterpolationTerminator" => "interpolated variable ends with invalid character; use `\$(...)` instead", - K"error" => "unknown error", -) - -struct Token - kind::Kind - # Offsets into a string or buffer - startbyte::Int # The byte where the token start in the buffer - endbyte::Int # The byte where the token ended in the buffer - dotop::Bool - suffix::Bool -end -function Token(kind::Kind, startbyte::Int, endbyte::Int) - Token(kind, startbyte, endbyte, false, false) -end -Token() = Token(K"error", 0, 0, false, false) - -const EMPTY_TOKEN = Token() - -exactkind(t::Token) = t.kind - -startbyte(t::Token) = t.startbyte -endbyte(t::Token) = t.endbyte - - -function untokenize(t::Token, str::String) - String(codeunits(str)[1 .+ (t.startbyte:t.endbyte)]) -end - -function Base.show(io::IO, t::Token) - print(io, rpad(string(startbyte(t), "-", endbyte(t)), 11, " ")) - print(io, rpad(exactkind(t), 15, " ")) -end - -end # module diff --git a/JuliaSyntax/src/Tokenize/token_kinds.jl b/JuliaSyntax/src/Tokenize/token_kinds.jl deleted file mode 100644 index 2e86ab0d606f6..0000000000000 --- a/JuliaSyntax/src/Tokenize/token_kinds.jl +++ /dev/null @@ -1,680 +0,0 @@ - -const UNICODE_OPS = Dict{Char, Kind}( - # '−' is normalized into K"-", - '−' => K"-", - '÷' => K"÷", - '¬' => K"¬", - '√' => K"√", - '∛' => K"∛", - '∜' => K"∜", - '←' => K"←", - '→' => K"→", - '↔' => K"↔", - '↚' => K"↚", - '↛' => K"↛", - '↞' => K"↞", - '↠' => K"↠", - '↢' => K"↢", - '↣' => K"↣", - '↤' => K"↤", - '↦' => K"↦", - '↮' => K"↮", - '⇎' => K"⇎", - '⇍' => K"⇍", - '⇏' => K"⇏", - '⇐' => K"⇐", - '⇒' => K"⇒", - '⇔' => K"⇔", - '⇴' => K"⇴", - '⇶' => K"⇶", - '⇷' => K"⇷", - '⇸' => K"⇸", - '⇹' => K"⇹", - '⇺' => K"⇺", - '⇻' => K"⇻", - '⇼' => K"⇼", - '⇽' => K"⇽", - '⇾' => K"⇾", - '⇿' => K"⇿", - '⟵' => K"⟵", - '⟶' => K"⟶", - '⟷' => K"⟷", - '⟹' => K"⟹", - '⟺' => K"⟺", - '⟻' => K"⟻", - '⟼' => K"⟼", - '⟽' => K"⟽", - '⟾' => K"⟾", - '⟿' => K"⟿", - '⤀' => K"⤀", - '⤁' => K"⤁", - '⤂' => K"⤂", - '⤃' => K"⤃", - '⤄' => K"⤄", - '⤅' => K"⤅", - '⤆' => K"⤆", - '⤇' => K"⤇", - '⤌' => K"⤌", - '⤍' => K"⤍", - '⤎' => K"⤎", - '⤏' => K"⤏", - '⤐' => K"⤐", - '⤑' => K"⤑", - '⤔' => K"⤔", - '⤕' => K"⤕", - '⤖' => K"⤖", - '⤗' => K"⤗", - '⤘' => K"⤘", - '⤝' => K"⤝", - '⤞' => K"⤞", - '⤟' => K"⤟", - '⤠' => K"⤠", - '⥄' => K"⥄", - '⥅' => K"⥅", - '⥆' => K"⥆", - '⥇' => K"⥇", - '⥈' => K"⥈", - '⥊' => K"⥊", - '⥋' => K"⥋", - '⥎' => K"⥎", - '⥐' => K"⥐", - '⥒' => K"⥒", - '⥓' => K"⥓", - '⥖' => K"⥖", - '⥗' => K"⥗", - '⥚' => K"⥚", - '⥛' => K"⥛", - '⥞' => K"⥞", - '⥟' => K"⥟", - '⥢' => K"⥢", - '⥤' => K"⥤", - '⥦' => K"⥦", - '⥧' => K"⥧", - '⥨' => K"⥨", - '⥩' => K"⥩", - '⥪' => K"⥪", - '⥫' => K"⥫", - '⥬' => K"⥬", - '⥭' => K"⥭", - '⥰' => K"⥰", - '⧴' => K"⧴", - '⬱' => K"⬱", - '⬰' => K"⬰", - '⬲' => K"⬲", - '⬳' => K"⬳", - '⬴' => K"⬴", - '⬵' => K"⬵", - '⬶' => K"⬶", - '⬷' => K"⬷", - '⬸' => K"⬸", - '⬹' => K"⬹", - '⬺' => K"⬺", - '⬻' => K"⬻", - '⬼' => K"⬼", - '⬽' => K"⬽", - '⬾' => K"⬾", - '⬿' => K"⬿", - '⭀' => K"⭀", - '⭁' => K"⭁", - '⭂' => K"⭂", - '⭃' => K"⭃", - '⭄' => K"⭄", - '⭇' => K"⭇", - '⭈' => K"⭈", - '⭉' => K"⭉", - '⭊' => K"⭊", - '⭋' => K"⭋", - '⭌' => K"⭌", - '←' => K"←", - '→' => K"→", - '≥' => K"≥", - '≤' => K"≤", - '≡' => K"≡", - '≠' => K"≠", - '≢' => K"≢", - '∈' => K"∈", - '∉' => K"∉", - '∋' => K"∋", - '∌' => K"∌", - '⊆' => K"⊆", - '⊈' => K"⊈", - '⊂' => K"⊂", - '⊄' => K"⊄", - '⊊' => K"⊊", - '∝' => K"∝", - '∊' => K"∊", - '∍' => K"∍", - '∥' => K"∥", - '∦' => K"∦", - '∷' => K"∷", - '∺' => K"∺", - '∻' => K"∻", - '∽' => K"∽", - '∾' => K"∾", - '≁' => K"≁", - '≃' => K"≃", - '≄' => K"≄", - '≅' => K"≅", - '≆' => K"≆", - '≇' => K"≇", - '≈' => K"≈", - '≉' => K"≉", - '≊' => K"≊", - '≋' => K"≋", - '≌' => K"≌", - '≍' => K"≍", - '≎' => K"≎", - '≐' => K"≐", - '≑' => K"≑", - '≒' => K"≒", - '≓' => K"≓", - '≔' => K"≔", - '≕' => K"≕", - '≖' => K"≖", - '≗' => K"≗", - '≘' => K"≘", - '≙' => K"≙", - '≚' => K"≚", - '≛' => K"≛", - '≜' => K"≜", - '≝' => K"≝", - '≞' => K"≞", - '≟' => K"≟", - '≣' => K"≣", - '≦' => K"≦", - '≧' => K"≧", - '≨' => K"≨", - '≩' => K"≩", - '≪' => K"≪", - '≫' => K"≫", - '≬' => K"≬", - '≭' => K"≭", - '≮' => K"≮", - '≯' => K"≯", - '≰' => K"≰", - '≱' => K"≱", - '≲' => K"≲", - '≳' => K"≳", - '≴' => K"≴", - '≵' => K"≵", - '≶' => K"≶", - '≷' => K"≷", - '≸' => K"≸", - '≹' => K"≹", - '≺' => K"≺", - '≻' => K"≻", - '≼' => K"≼", - '≽' => K"≽", - '≾' => K"≾", - '≿' => K"≿", - '⊀' => K"⊀", - '⊁' => K"⊁", - '⊃' => K"⊃", - '⊅' => K"⊅", - '⊇' => K"⊇", - '⊉' => K"⊉", - '⊋' => K"⊋", - '⊏' => K"⊏", - '⊐' => K"⊐", - '⊑' => K"⊑", - '⊒' => K"⊒", - '⊜' => K"⊜", - '⊩' => K"⊩", - '⊬' => K"⊬", - '⊮' => K"⊮", - '⊰' => K"⊰", - '⊱' => K"⊱", - '⊲' => K"⊲", - '⊳' => K"⊳", - '⊴' => K"⊴", - '⊵' => K"⊵", - '⊶' => K"⊶", - '⊷' => K"⊷", - '⋍' => K"⋍", - '⋐' => K"⋐", - '⋑' => K"⋑", - '⋕' => K"⋕", - '⋖' => K"⋖", - '⋗' => K"⋗", - '⋘' => K"⋘", - '⋙' => K"⋙", - '⋚' => K"⋚", - '⋛' => K"⋛", - '⋜' => K"⋜", - '⋝' => K"⋝", - '⋞' => K"⋞", - '⋟' => K"⋟", - '⋠' => K"⋠", - '⋡' => K"⋡", - '⋢' => K"⋢", - '⋣' => K"⋣", - '⋤' => K"⋤", - '⋥' => K"⋥", - '⋦' => K"⋦", - '⋧' => K"⋧", - '⋨' => K"⋨", - '⋩' => K"⋩", - '⋪' => K"⋪", - '⋫' => K"⋫", - '⋬' => K"⋬", - '⋭' => K"⋭", - '⋲' => K"⋲", - '⋳' => K"⋳", - '⋴' => K"⋴", - '⋵' => K"⋵", - '⋶' => K"⋶", - '⋷' => K"⋷", - '⋸' => K"⋸", - '⋹' => K"⋹", - '⋺' => K"⋺", - '⋻' => K"⋻", - '⋼' => K"⋼", - '⋽' => K"⋽", - '⋾' => K"⋾", - '⋿' => K"⋿", - '⟈' => K"⟈", - '⟉' => K"⟉", - '⟒' => K"⟒", - '⦷' => K"⦷", - '⧀' => K"⧀", - '⧁' => K"⧁", - '⧡' => K"⧡", - '⧣' => K"⧣", - '⧤' => K"⧤", - '⧥' => K"⧥", - '⩦' => K"⩦", - '⩧' => K"⩧", - '⩪' => K"⩪", - '⩫' => K"⩫", - '⩬' => K"⩬", - '⩭' => K"⩭", - '⩮' => K"⩮", - '⩯' => K"⩯", - '⩰' => K"⩰", - '⩱' => K"⩱", - '⩲' => K"⩲", - '⩳' => K"⩳", - '⩴' => K"⩴", - '⩵' => K"⩵", - '⩶' => K"⩶", - '⩷' => K"⩷", - '⩸' => K"⩸", - '⩹' => K"⩹", - '⩺' => K"⩺", - '⩻' => K"⩻", - '⩼' => K"⩼", - '⩽' => K"⩽", - '⩾' => K"⩾", - '⩿' => K"⩿", - '⪀' => K"⪀", - '⪁' => K"⪁", - '⪂' => K"⪂", - '⪃' => K"⪃", - '⪄' => K"⪄", - '⪅' => K"⪅", - '⪆' => K"⪆", - '⪇' => K"⪇", - '⪈' => K"⪈", - '⪉' => K"⪉", - '⪊' => K"⪊", - '⪋' => K"⪋", - '⪌' => K"⪌", - '⪍' => K"⪍", - '⪎' => K"⪎", - '⪏' => K"⪏", - '⪐' => K"⪐", - '⪑' => K"⪑", - '⪒' => K"⪒", - '⪓' => K"⪓", - '⪔' => K"⪔", - '⪕' => K"⪕", - '⪖' => K"⪖", - '⪗' => K"⪗", - '⪘' => K"⪘", - '⪙' => K"⪙", - '⪚' => K"⪚", - '⪛' => K"⪛", - '⪜' => K"⪜", - '⪝' => K"⪝", - '⪞' => K"⪞", - '⪟' => K"⪟", - '⪠' => K"⪠", - '⪡' => K"⪡", - '⪢' => K"⪢", - '⪣' => K"⪣", - '⪤' => K"⪤", - '⪥' => K"⪥", - '⪦' => K"⪦", - '⪧' => K"⪧", - '⪨' => K"⪨", - '⪩' => K"⪩", - '⪪' => K"⪪", - '⪫' => K"⪫", - '⪬' => K"⪬", - '⪭' => K"⪭", - '⪮' => K"⪮", - '⪯' => K"⪯", - '⪰' => K"⪰", - '⪱' => K"⪱", - '⪲' => K"⪲", - '⪳' => K"⪳", - '⪴' => K"⪴", - '⪵' => K"⪵", - '⪶' => K"⪶", - '⪷' => K"⪷", - '⪸' => K"⪸", - '⪹' => K"⪹", - '⪺' => K"⪺", - '⪻' => K"⪻", - '⪼' => K"⪼", - '⪽' => K"⪽", - '⪾' => K"⪾", - '⪿' => K"⪿", - '⫀' => K"⫀", - '⫁' => K"⫁", - '⫂' => K"⫂", - '⫃' => K"⫃", - '⫄' => K"⫄", - '⫅' => K"⫅", - '⫆' => K"⫆", - '⫇' => K"⫇", - '⫈' => K"⫈", - '⫉' => K"⫉", - '⫊' => K"⫊", - '⫋' => K"⫋", - '⫌' => K"⫌", - '⫍' => K"⫍", - '⫎' => K"⫎", - '⫏' => K"⫏", - '⫐' => K"⫐", - '⫑' => K"⫑", - '⫒' => K"⫒", - '⫓' => K"⫓", - '⫔' => K"⫔", - '⫕' => K"⫕", - '⫖' => K"⫖", - '⫗' => K"⫗", - '⫘' => K"⫘", - '⫙' => K"⫙", - '⫷' => K"⫷", - '⫸' => K"⫸", - '⫹' => K"⫹", - '⫺' => K"⫺", - '⊢' => K"⊢", - '⊣' => K"⊣", - '⫪' => K"⫪", - '⫫' => K"⫫", - '⟂' => K"⟂", - '⊕' => K"⊕", - '⊖' => K"⊖", - '⊞' => K"⊞", - '⊟' => K"⊟", - '|' => K"|", - '∪' => K"∪", - '∨' => K"∨", - '⊔' => K"⊔", - '±' => K"±", - '∓' => K"∓", - '∔' => K"∔", - '∸' => K"∸", - '≂' => K"≂", - '≏' => K"≏", - '⊎' => K"⊎", - '⊻' => K"⊻", - '⊽' => K"⊽", - '⋎' => K"⋎", - '⋓' => K"⋓", - '⧺' => K"⧺", - '⧻' => K"⧻", - '⨈' => K"⨈", - '⨢' => K"⨢", - '⨣' => K"⨣", - '⨤' => K"⨤", - '⨥' => K"⨥", - '⨦' => K"⨦", - '⨧' => K"⨧", - '⨨' => K"⨨", - '⨩' => K"⨩", - '⨪' => K"⨪", - '⨫' => K"⨫", - '⨬' => K"⨬", - '⨭' => K"⨭", - '⨮' => K"⨮", - '⨹' => K"⨹", - '⨺' => K"⨺", - '⩁' => K"⩁", - '⩂' => K"⩂", - '⩅' => K"⩅", - '⩊' => K"⩊", - '⩌' => K"⩌", - '⩏' => K"⩏", - '⩐' => K"⩐", - '⩒' => K"⩒", - '⩔' => K"⩔", - '⩖' => K"⩖", - '⩗' => K"⩗", - '⩛' => K"⩛", - '⩝' => K"⩝", - '⩡' => K"⩡", - '⩢' => K"⩢", - '⩣' => K"⩣", - '∘' => K"∘", - '×' => K"×", - '∩' => K"∩", - '∧' => K"∧", - '⊗' => K"⊗", - '⊘' => K"⊘", - '⊙' => K"⊙", - '⊚' => K"⊚", - '⊛' => K"⊛", - '⊠' => K"⊠", - '⊡' => K"⊡", - '⊓' => K"⊓", - '∗' => K"∗", - '∙' => K"∙", - '∤' => K"∤", - '⅋' => K"⅋", - '≀' => K"≀", - '⊼' => K"⊼", - '⋄' => K"⋄", - '⋆' => K"⋆", - '⋇' => K"⋇", - '⋉' => K"⋉", - '⋊' => K"⋊", - '⋋' => K"⋋", - '⋌' => K"⋌", - '⋏' => K"⋏", - '⋒' => K"⋒", - '⟑' => K"⟑", - '⦸' => K"⦸", - '⦼' => K"⦼", - '⦾' => K"⦾", - '⦿' => K"⦿", - '⧶' => K"⧶", - '⧷' => K"⧷", - '⨇' => K"⨇", - '⨰' => K"⨰", - '⨱' => K"⨱", - '⨲' => K"⨲", - '⨳' => K"⨳", - '⨴' => K"⨴", - '⨵' => K"⨵", - '⨶' => K"⨶", - '⨷' => K"⨷", - '⨸' => K"⨸", - '⨻' => K"⨻", - '⨼' => K"⨼", - '⨽' => K"⨽", - '⩀' => K"⩀", - '⩃' => K"⩃", - '⩄' => K"⩄", - '⩋' => K"⩋", - '⩍' => K"⩍", - '⩎' => K"⩎", - '⩑' => K"⩑", - '⩓' => K"⩓", - '⩕' => K"⩕", - '⩘' => K"⩘", - '⩚' => K"⩚", - '⩜' => K"⩜", - '⩞' => K"⩞", - '⩟' => K"⩟", - '⩠' => K"⩠", - '⫛' => K"⫛", - '⊍' => K"⊍", - '▷' => K"▷", - '⨝' => K"⨝", - '⟕' => K"⟕", - '⟖' => K"⟖", - '⟗' => K"⟗", - '^' => K"^", - '↑' => K"↑", - '↓' => K"↓", - '⇵' => K"⇵", - '⟰' => K"⟰", - '⟱' => K"⟱", - '⤈' => K"⤈", - '⤉' => K"⤉", - '⤊' => K"⤊", - '⤋' => K"⤋", - '⤒' => K"⤒", - '⤓' => K"⤓", - '⥉' => K"⥉", - '⥌' => K"⥌", - '⥍' => K"⥍", - '⥏' => K"⥏", - '⥑' => K"⥑", - '⥔' => K"⥔", - '⥕' => K"⥕", - '⥘' => K"⥘", - '⥙' => K"⥙", - '⥜' => K"⥜", - '⥝' => K"⥝", - '⥠' => K"⥠", - '⥡' => K"⥡", - '⥣' => K"⥣", - '⥥' => K"⥥", - '⥮' => K"⥮", - '⥯' => K"⥯", - '↑' => K"↑", - '↓' => K"↓", - # Lookalikes which are normalized into K"⋅", - # https://github.com/JuliaLang/julia/pull/25157, - '\u00b7' => K"⋅", # '·' Middle Dot,, - '\u0387' => K"⋅", # '·' Greek Ano Teleia,, - '⋅' => K"⋅", - '…' => K"…", - '⁝' => K"⁝", - '⋮' => K"⋮", - '⋱' => K"⋱", - '⋰' => K"⋰", - '⋯' => K"⋯", - '↻' => K"↻", - '⇜' => K"⇜", - '⇝' => K"⇝", - '↜' => K"↜", - '↝' => K"↝", - '↩' => K"↩", - '↪' => K"↪", - '↫' => K"↫", - '↬' => K"↬", - '↼' => K"↼", - '↽' => K"↽", - '⇀' => K"⇀", - '⇁' => K"⇁", - '⇄' => K"⇄", - '⇆' => K"⇆", - '⇇' => K"⇇", - '⇉' => K"⇉", - '⇋' => K"⇋", - '⇌' => K"⇌", - '⇚' => K"⇚", - '⇛' => K"⇛", - '⇠' => K"⇠", - '⇢' => K"⇢", - '↷' => K"↷", - '↶' => K"↶", - '↺' => K"↺", - '¦' => K"¦", - '⌿' => K"⌿", - '⨟' => K"⨟", -) - -const UNICODE_OPS_REVERSE = Dict{Kind,Symbol}() -for (k, v) in UNICODE_OPS - k in ('\u00b7', '\u0387') && continue - UNICODE_OPS_REVERSE[v] = Symbol(k) -end - -for (k, v) in [ - K"=" => :(=) - K"+=" => :(+=) - K"-=" => :(-=) - K"*=" => :(*=) - K"/=" => :(/=) - K"//=" => :(//=) - K"|=" => :(|=) - K"^=" => :(^=) - K"÷=" => :(÷=) - K"%=" => :(%=) - K"<<=" => :(<<=) - K">>=" => :(>>=) - K"<<" => :(<<) - K">>" => :(>>) - K">>>" => :(>>>) - K">>>=" => :(>>>=) - K"\=" => :(\=) - K"&=" => :(&=) - K":=" => :(:=) - K"=>" => :(=>) - K"~" => :(~) - K"$=" => :($=) - K"⊻=" => :(⊻=) - K"-->" => :(-->) - K"||" => :(||) - K"&&" => :(&&) - K"<:" => :(<:) - K">:" => :(>:) - K">" => :(>) - K"<" => :(<) - K">=" => :(>=) - K"≥" => :(≥) - K"<=" => :(<=) - K"≤" => :(≤) - K"==" => :(==) - K"===" => :(===) - K"≡" => :(≡) - K"!=" => :(!=) - K"≠" => :(≠) - K"!==" => :(!==) - K"≢" => :(≢) - K"in" => :(in) - K"isa" => :(isa) - K"<|" => :(<|) - K"|>" => :(|>) - K":" => :(:) - K".." => :(..) - K"$" => :($) - K"+" => :(+) - K"-" => :(-) - K"++" => :(++) - K"|" => :(|) - K"*" => :(*) - K"/" => :(/) - K"%" => :(%) - K"\\" => :(\) - K"&" => :(&) - K"//" => :(//) - K"^" => :(^) - K"::" => :(::) - K"?" => :? - K"." => :(.) - K"!" => :(!) - K"'" => Symbol(''') - K"..." => :(...) - K".'" => Symbol(".'") - K"->" => :(->) - K"where" => :where - ] - UNICODE_OPS_REVERSE[k] = v -end diff --git a/JuliaSyntax/src/Tokenize/utilities.jl b/JuliaSyntax/src/Tokenize/utilities.jl index 708ccfacc11c6..a21b20aae2399 100644 --- a/JuliaSyntax/src/Tokenize/utilities.jl +++ b/JuliaSyntax/src/Tokenize/utilities.jl @@ -255,3 +255,685 @@ function is_operator_start_char(c::Char) is_operator_start_char(UInt32(c)) end is_operator_start_char(u::UInt32) = u == 0x00000021 || (u == 0x00000024 || (u == 0x00000025 || (u == 0x00000026 || (u == 0x00000027 || (u == 0x0000002a || (u == 0x0000002b || (u == 0x0000002d || (u == 0x0000002e || (u == 0x0000002f || (u == 0x0000003a || (u == 0x0000003c || (u == 0x0000003d || (u == 0x0000003e || (u == 0x0000003f || (u == 0x0000005c || (u == 0x0000005e || (u == 0x00000069 || (u == 0x00000077 || (u == 0x0000007c || (u == 0x0000007e || (u == 0x000000ac || (u == 0x000000b1 || (u == 0x000000d7 || (u == 0x000000f7 || (u == 0x00002026 || (u == 0x0000205d || (u == 0x0000214b || (u == 0x00002190 || (u == 0x00002191 || (u == 0x00002192 || (u == 0x00002193 || (u == 0x00002194 || (u == 0x0000219a || (u == 0x0000219b || (u == 0x000021a0 || (u == 0x000021a3 || (u == 0x000021a6 || (u == 0x000021ae || (u == 0x000021ce || (u == 0x000021cf || (u == 0x000021d2 || (u == 0x000021d4 || (u == 0x000021f4 || (u == 0x000021f5 || (u == 0x000021f6 || (u == 0x000021f7 || (u == 0x000021f8 || (u == 0x000021f9 || (u == 0x000021fa || (u == 0x000021fb || (u == 0x000021fc || (u == 0x000021fd || (u == 0x000021fe || (u == 0x000021ff || (u == 0x00002208 || (u == 0x00002209 || (u == 0x0000220a || (u == 0x0000220b || (u == 0x0000220c || (u == 0x0000220d || (u == 0x00002213 || (u == 0x00002214 || (u == 0x00002217 || (u == 0x00002218 || (u == 0x00002219 || (u == 0x0000221a || (u == 0x0000221b || (u == 0x0000221c || (u == 0x0000221d || (u == 0x00002224 || (u == 0x00002225 || (u == 0x00002226 || (u == 0x00002227 || (u == 0x00002228 || (u == 0x00002229 || (u == 0x0000222a || (u == 0x00002237 || (u == 0x00002238 || (u == 0x0000223a || (u == 0x0000223b || (u == 0x0000223d || (u == 0x0000223e || (u == 0x00002240 || (u == 0x00002241 || (u == 0x00002242 || (u == 0x00002243 || (u == 0x00002244 || (u == 0x00002245 || (u == 0x00002246 || (u == 0x00002247 || (u == 0x00002248 || (u == 0x00002249 || (u == 0x0000224a || (u == 0x0000224b || (u == 0x0000224c || (u == 0x0000224d || (u == 0x0000224e || (u == 0x0000224f || (u == 0x00002250 || (u == 0x00002251 || (u == 0x00002252 || (u == 0x00002253 || (u == 0x00002254 || (u == 0x00002255 || (u == 0x00002256 || (u == 0x00002257 || (u == 0x00002258 || (u == 0x00002259 || (u == 0x0000225a || (u == 0x0000225b || (u == 0x0000225c || (u == 0x0000225d || (u == 0x0000225e || (u == 0x0000225f || (u == 0x00002260 || (u == 0x00002261 || (u == 0x00002262 || (u == 0x00002263 || (u == 0x00002264 || (u == 0x00002265 || (u == 0x00002266 || (u == 0x00002267 || (u == 0x00002268 || (u == 0x00002269 || (u == 0x0000226a || (u == 0x0000226b || (u == 0x0000226c || (u == 0x0000226d || (u == 0x0000226e || (u == 0x0000226f || (u == 0x00002270 || (u == 0x00002271 || (u == 0x00002272 || (u == 0x00002273 || (u == 0x00002274 || (u == 0x00002275 || (u == 0x00002276 || (u == 0x00002277 || (u == 0x00002278 || (u == 0x00002279 || (u == 0x0000227a || (u == 0x0000227b || (u == 0x0000227c || (u == 0x0000227d || (u == 0x0000227e || (u == 0x0000227f || (u == 0x00002280 || (u == 0x00002281 || (u == 0x00002282 || (u == 0x00002283 || (u == 0x00002284 || (u == 0x00002285 || (u == 0x00002286 || (u == 0x00002287 || (u == 0x00002288 || (u == 0x00002289 || (u == 0x0000228a || (u == 0x0000228b || (u == 0x0000228d || (u == 0x0000228e || (u == 0x0000228f || (u == 0x00002290 || (u == 0x00002291 || (u == 0x00002292 || (u == 0x00002293 || (u == 0x00002294 || (u == 0x00002295 || (u == 0x00002296 || (u == 0x00002297 || (u == 0x00002298 || (u == 0x00002299 || (u == 0x0000229a || (u == 0x0000229b || (u == 0x0000229c || (u == 0x0000229e || (u == 0x0000229f || (u == 0x000022a0 || (u == 0x000022a1 || (u == 0x000022a2 || (u == 0x000022a3 || (u == 0x000022a9 || (u == 0x000022ac || (u == 0x000022ae || (u == 0x000022b0 || (u == 0x000022b1 || (u == 0x000022b2 || (u == 0x000022b3 || (u == 0x000022b4 || (u == 0x000022b5 || (u == 0x000022b6 || (u == 0x000022b7 || (u == 0x000022bb || (u == 0x000022bc || (u == 0x000022bd || (u == 0x000022c4 || (u == 0x000022c5 || (u == 0x000022c6 || (u == 0x000022c7 || (u == 0x000022c9 || (u == 0x000022ca || (u == 0x000022cb || (u == 0x000022cc || (u == 0x000022cd || (u == 0x000022ce || (u == 0x000022cf || (u == 0x000022d0 || (u == 0x000022d1 || (u == 0x000022d2 || (u == 0x000022d3 || (u == 0x000022d5 || (u == 0x000022d6 || (u == 0x000022d7 || (u == 0x000022d8 || (u == 0x000022d9 || (u == 0x000022da || (u == 0x000022db || (u == 0x000022dc || (u == 0x000022dd || (u == 0x000022de || (u == 0x000022df || (u == 0x000022e0 || (u == 0x000022e1 || (u == 0x000022e2 || (u == 0x000022e3 || (u == 0x000022e4 || (u == 0x000022e5 || (u == 0x000022e6 || (u == 0x000022e7 || (u == 0x000022e8 || (u == 0x000022e9 || (u == 0x000022ea || (u == 0x000022eb || (u == 0x000022ec || (u == 0x000022ed || (u == 0x000022ee || (u == 0x000022ef || (u == 0x000022f0 || (u == 0x000022f1 || (u == 0x000022f2 || (u == 0x000022f3 || (u == 0x000022f4 || (u == 0x000022f5 || (u == 0x000022f6 || (u == 0x000022f7 || (u == 0x000022f8 || (u == 0x000022f9 || (u == 0x000022fa || (u == 0x000022fb || (u == 0x000022fc || (u == 0x000022fd || (u == 0x000022fe || (u == 0x000022ff || (u == 0x000025b7 || (u == 0x000027c2 || (u == 0x000027c8 || (u == 0x000027c9 || (u == 0x000027d1 || (u == 0x000027d2 || (u == 0x000027d5 || (u == 0x000027d6 || (u == 0x000027d7 || (u == 0x000027f0 || (u == 0x000027f1 || (u == 0x000027f5 || (u == 0x000027f6 || (u == 0x000027f7 || (u == 0x000027f9 || (u == 0x000027fa || (u == 0x000027fb || (u == 0x000027fc || (u == 0x000027fd || (u == 0x000027fe || (u == 0x000027ff || (u == 0x00002900 || (u == 0x00002901 || (u == 0x00002902 || (u == 0x00002903 || (u == 0x00002904 || (u == 0x00002905 || (u == 0x00002906 || (u == 0x00002907 || (u == 0x00002908 || (u == 0x00002909 || (u == 0x0000290a || (u == 0x0000290b || (u == 0x0000290c || (u == 0x0000290d || (u == 0x0000290e || (u == 0x0000290f || (u == 0x00002910 || (u == 0x00002911 || (u == 0x00002912 || (u == 0x00002913 || (u == 0x00002914 || (u == 0x00002915 || (u == 0x00002916 || (u == 0x00002917 || (u == 0x00002918 || (u == 0x0000291d || (u == 0x0000291e || (u == 0x0000291f || (u == 0x00002920 || (u == 0x00002944 || (u == 0x00002945 || (u == 0x00002946 || (u == 0x00002947 || (u == 0x00002948 || (u == 0x00002949 || (u == 0x0000294a || (u == 0x0000294b || (u == 0x0000294c || (u == 0x0000294d || (u == 0x0000294e || (u == 0x0000294f || (u == 0x00002950 || (u == 0x00002951 || (u == 0x00002952 || (u == 0x00002953 || (u == 0x00002954 || (u == 0x00002955 || (u == 0x00002956 || (u == 0x00002957 || (u == 0x00002958 || (u == 0x00002959 || (u == 0x0000295a || (u == 0x0000295b || (u == 0x0000295c || (u == 0x0000295d || (u == 0x0000295e || (u == 0x0000295f || (u == 0x00002960 || (u == 0x00002961 || (u == 0x00002962 || (u == 0x00002963 || (u == 0x00002964 || (u == 0x00002965 || (u == 0x00002966 || (u == 0x00002967 || (u == 0x00002968 || (u == 0x00002969 || (u == 0x0000296a || (u == 0x0000296b || (u == 0x0000296c || (u == 0x0000296d || (u == 0x0000296e || (u == 0x0000296f || (u == 0x00002970 || (u == 0x000029b7 || (u == 0x000029b8 || (u == 0x000029bc || (u == 0x000029be || (u == 0x000029bf || (u == 0x000029c0 || (u == 0x000029c1 || (u == 0x000029e1 || (u == 0x000029e3 || (u == 0x000029e4 || (u == 0x000029e5 || (u == 0x000029f4 || (u == 0x000029f6 || (u == 0x000029f7 || (u == 0x000029fa || (u == 0x000029fb || (u == 0x00002a07 || (u == 0x00002a08 || (u == 0x00002a1d || (u == 0x00002a22 || (u == 0x00002a23 || (u == 0x00002a24 || (u == 0x00002a25 || (u == 0x00002a26 || (u == 0x00002a27 || (u == 0x00002a28 || (u == 0x00002a29 || (u == 0x00002a2a || (u == 0x00002a2b || (u == 0x00002a2c || (u == 0x00002a2d || (u == 0x00002a2e || (u == 0x00002a30 || (u == 0x00002a31 || (u == 0x00002a32 || (u == 0x00002a33 || (u == 0x00002a34 || (u == 0x00002a35 || (u == 0x00002a36 || (u == 0x00002a37 || (u == 0x00002a38 || (u == 0x00002a39 || (u == 0x00002a3a || (u == 0x00002a3b || (u == 0x00002a3c || (u == 0x00002a3d || (u == 0x00002a40 || (u == 0x00002a41 || (u == 0x00002a42 || (u == 0x00002a43 || (u == 0x00002a44 || (u == 0x00002a45 || (u == 0x00002a4a || (u == 0x00002a4b || (u == 0x00002a4c || (u == 0x00002a4d || (u == 0x00002a4e || (u == 0x00002a4f || (u == 0x00002a50 || (u == 0x00002a51 || (u == 0x00002a52 || (u == 0x00002a53 || (u == 0x00002a54 || (u == 0x00002a55 || (u == 0x00002a56 || (u == 0x00002a57 || (u == 0x00002a58 || (u == 0x00002a5a || (u == 0x00002a5b || (u == 0x00002a5c || (u == 0x00002a5d || (u == 0x00002a5e || (u == 0x00002a5f || (u == 0x00002a60 || (u == 0x00002a61 || (u == 0x00002a62 || (u == 0x00002a63 || (u == 0x00002a66 || (u == 0x00002a67 || (u == 0x00002a6a || (u == 0x00002a6b || (u == 0x00002a6c || (u == 0x00002a6d || (u == 0x00002a6e || (u == 0x00002a6f || (u == 0x00002a70 || (u == 0x00002a71 || (u == 0x00002a72 || (u == 0x00002a73 || (u == 0x00002a74 || (u == 0x00002a75 || (u == 0x00002a76 || (u == 0x00002a77 || (u == 0x00002a78 || (u == 0x00002a79 || (u == 0x00002a7a || (u == 0x00002a7b || (u == 0x00002a7c || (u == 0x00002a7d || (u == 0x00002a7e || (u == 0x00002a7f || (u == 0x00002a80 || (u == 0x00002a81 || (u == 0x00002a82 || (u == 0x00002a83 || (u == 0x00002a84 || (u == 0x00002a85 || (u == 0x00002a86 || (u == 0x00002a87 || (u == 0x00002a88 || (u == 0x00002a89 || (u == 0x00002a8a || (u == 0x00002a8b || (u == 0x00002a8c || (u == 0x00002a8d || (u == 0x00002a8e || (u == 0x00002a8f || (u == 0x00002a90 || (u == 0x00002a91 || (u == 0x00002a92 || (u == 0x00002a93 || (u == 0x00002a94 || (u == 0x00002a95 || (u == 0x00002a96 || (u == 0x00002a97 || (u == 0x00002a98 || (u == 0x00002a99 || (u == 0x00002a9a || (u == 0x00002a9b || (u == 0x00002a9c || (u == 0x00002a9d || (u == 0x00002a9e || (u == 0x00002a9f || (u == 0x00002aa0 || (u == 0x00002aa1 || (u == 0x00002aa2 || (u == 0x00002aa3 || (u == 0x00002aa4 || (u == 0x00002aa5 || (u == 0x00002aa6 || (u == 0x00002aa7 || (u == 0x00002aa8 || (u == 0x00002aa9 || (u == 0x00002aaa || (u == 0x00002aab || (u == 0x00002aac || (u == 0x00002aad || (u == 0x00002aae || (u == 0x00002aaf || (u == 0x00002ab0 || (u == 0x00002ab1 || (u == 0x00002ab2 || (u == 0x00002ab3 || (u == 0x00002ab4 || (u == 0x00002ab5 || (u == 0x00002ab6 || (u == 0x00002ab7 || (u == 0x00002ab8 || (u == 0x00002ab9 || (u == 0x00002aba || (u == 0x00002abb || (u == 0x00002abc || (u == 0x00002abd || (u == 0x00002abe || (u == 0x00002abf || (u == 0x00002ac0 || (u == 0x00002ac1 || (u == 0x00002ac2 || (u == 0x00002ac3 || (u == 0x00002ac4 || (u == 0x00002ac5 || (u == 0x00002ac6 || (u == 0x00002ac7 || (u == 0x00002ac8 || (u == 0x00002ac9 || (u == 0x00002aca || (u == 0x00002acb || (u == 0x00002acc || (u == 0x00002acd || (u == 0x00002ace || (u == 0x00002acf || (u == 0x00002ad0 || (u == 0x00002ad1 || (u == 0x00002ad2 || (u == 0x00002ad3 || (u == 0x00002ad4 || (u == 0x00002ad5 || (u == 0x00002ad6 || (u == 0x00002ad7 || (u == 0x00002ad8 || (u == 0x00002ad9 || (u == 0x00002adb || (u == 0x00002af7 || (u == 0x00002af8 || (u == 0x00002af9 || (u == 0x00002afa || (u == 0x00002b30 || (u == 0x00002b31 || (u == 0x00002b32 || (u == 0x00002b33 || (u == 0x00002b34 || (u == 0x00002b35 || (u == 0x00002b36 || (u == 0x00002b37 || (u == 0x00002b38 || (u == 0x00002b39 || (u == 0x00002b3a || (u == 0x00002b3b || (u == 0x00002b3c || (u == 0x00002b3d || (u == 0x00002b3e || (u == 0x00002b3f || (u == 0x00002b40 || (u == 0x00002b41 || (u == 0x00002b42 || (u == 0x00002b43 || (u == 0x00002b44 || (u == 0x00002b47 || (u == 0x00002b48 || (u == 0x00002b49 || (u == 0x00002b4a || (u == 0x00002b4b || (u == 0x00002b4c || (u == 0x0000ffe9 || (u == 0x0000ffea || (u == 0x0000ffeb || u == 0x0000ffec))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))) + +const UNICODE_OPS = Dict{Char, Kind}( + # '−' is normalized into K"-", + '−' => K"-", + '÷' => K"÷", + '¬' => K"¬", + '√' => K"√", + '∛' => K"∛", + '∜' => K"∜", + '←' => K"←", + '→' => K"→", + '↔' => K"↔", + '↚' => K"↚", + '↛' => K"↛", + '↞' => K"↞", + '↠' => K"↠", + '↢' => K"↢", + '↣' => K"↣", + '↤' => K"↤", + '↦' => K"↦", + '↮' => K"↮", + '⇎' => K"⇎", + '⇍' => K"⇍", + '⇏' => K"⇏", + '⇐' => K"⇐", + '⇒' => K"⇒", + '⇔' => K"⇔", + '⇴' => K"⇴", + '⇶' => K"⇶", + '⇷' => K"⇷", + '⇸' => K"⇸", + '⇹' => K"⇹", + '⇺' => K"⇺", + '⇻' => K"⇻", + '⇼' => K"⇼", + '⇽' => K"⇽", + '⇾' => K"⇾", + '⇿' => K"⇿", + '⟵' => K"⟵", + '⟶' => K"⟶", + '⟷' => K"⟷", + '⟹' => K"⟹", + '⟺' => K"⟺", + '⟻' => K"⟻", + '⟼' => K"⟼", + '⟽' => K"⟽", + '⟾' => K"⟾", + '⟿' => K"⟿", + '⤀' => K"⤀", + '⤁' => K"⤁", + '⤂' => K"⤂", + '⤃' => K"⤃", + '⤄' => K"⤄", + '⤅' => K"⤅", + '⤆' => K"⤆", + '⤇' => K"⤇", + '⤌' => K"⤌", + '⤍' => K"⤍", + '⤎' => K"⤎", + '⤏' => K"⤏", + '⤐' => K"⤐", + '⤑' => K"⤑", + '⤔' => K"⤔", + '⤕' => K"⤕", + '⤖' => K"⤖", + '⤗' => K"⤗", + '⤘' => K"⤘", + '⤝' => K"⤝", + '⤞' => K"⤞", + '⤟' => K"⤟", + '⤠' => K"⤠", + '⥄' => K"⥄", + '⥅' => K"⥅", + '⥆' => K"⥆", + '⥇' => K"⥇", + '⥈' => K"⥈", + '⥊' => K"⥊", + '⥋' => K"⥋", + '⥎' => K"⥎", + '⥐' => K"⥐", + '⥒' => K"⥒", + '⥓' => K"⥓", + '⥖' => K"⥖", + '⥗' => K"⥗", + '⥚' => K"⥚", + '⥛' => K"⥛", + '⥞' => K"⥞", + '⥟' => K"⥟", + '⥢' => K"⥢", + '⥤' => K"⥤", + '⥦' => K"⥦", + '⥧' => K"⥧", + '⥨' => K"⥨", + '⥩' => K"⥩", + '⥪' => K"⥪", + '⥫' => K"⥫", + '⥬' => K"⥬", + '⥭' => K"⥭", + '⥰' => K"⥰", + '⧴' => K"⧴", + '⬱' => K"⬱", + '⬰' => K"⬰", + '⬲' => K"⬲", + '⬳' => K"⬳", + '⬴' => K"⬴", + '⬵' => K"⬵", + '⬶' => K"⬶", + '⬷' => K"⬷", + '⬸' => K"⬸", + '⬹' => K"⬹", + '⬺' => K"⬺", + '⬻' => K"⬻", + '⬼' => K"⬼", + '⬽' => K"⬽", + '⬾' => K"⬾", + '⬿' => K"⬿", + '⭀' => K"⭀", + '⭁' => K"⭁", + '⭂' => K"⭂", + '⭃' => K"⭃", + '⭄' => K"⭄", + '⭇' => K"⭇", + '⭈' => K"⭈", + '⭉' => K"⭉", + '⭊' => K"⭊", + '⭋' => K"⭋", + '⭌' => K"⭌", + '←' => K"←", + '→' => K"→", + '≥' => K"≥", + '≤' => K"≤", + '≡' => K"≡", + '≠' => K"≠", + '≢' => K"≢", + '∈' => K"∈", + '∉' => K"∉", + '∋' => K"∋", + '∌' => K"∌", + '⊆' => K"⊆", + '⊈' => K"⊈", + '⊂' => K"⊂", + '⊄' => K"⊄", + '⊊' => K"⊊", + '∝' => K"∝", + '∊' => K"∊", + '∍' => K"∍", + '∥' => K"∥", + '∦' => K"∦", + '∷' => K"∷", + '∺' => K"∺", + '∻' => K"∻", + '∽' => K"∽", + '∾' => K"∾", + '≁' => K"≁", + '≃' => K"≃", + '≄' => K"≄", + '≅' => K"≅", + '≆' => K"≆", + '≇' => K"≇", + '≈' => K"≈", + '≉' => K"≉", + '≊' => K"≊", + '≋' => K"≋", + '≌' => K"≌", + '≍' => K"≍", + '≎' => K"≎", + '≐' => K"≐", + '≑' => K"≑", + '≒' => K"≒", + '≓' => K"≓", + '≔' => K"≔", + '≕' => K"≕", + '≖' => K"≖", + '≗' => K"≗", + '≘' => K"≘", + '≙' => K"≙", + '≚' => K"≚", + '≛' => K"≛", + '≜' => K"≜", + '≝' => K"≝", + '≞' => K"≞", + '≟' => K"≟", + '≣' => K"≣", + '≦' => K"≦", + '≧' => K"≧", + '≨' => K"≨", + '≩' => K"≩", + '≪' => K"≪", + '≫' => K"≫", + '≬' => K"≬", + '≭' => K"≭", + '≮' => K"≮", + '≯' => K"≯", + '≰' => K"≰", + '≱' => K"≱", + '≲' => K"≲", + '≳' => K"≳", + '≴' => K"≴", + '≵' => K"≵", + '≶' => K"≶", + '≷' => K"≷", + '≸' => K"≸", + '≹' => K"≹", + '≺' => K"≺", + '≻' => K"≻", + '≼' => K"≼", + '≽' => K"≽", + '≾' => K"≾", + '≿' => K"≿", + '⊀' => K"⊀", + '⊁' => K"⊁", + '⊃' => K"⊃", + '⊅' => K"⊅", + '⊇' => K"⊇", + '⊉' => K"⊉", + '⊋' => K"⊋", + '⊏' => K"⊏", + '⊐' => K"⊐", + '⊑' => K"⊑", + '⊒' => K"⊒", + '⊜' => K"⊜", + '⊩' => K"⊩", + '⊬' => K"⊬", + '⊮' => K"⊮", + '⊰' => K"⊰", + '⊱' => K"⊱", + '⊲' => K"⊲", + '⊳' => K"⊳", + '⊴' => K"⊴", + '⊵' => K"⊵", + '⊶' => K"⊶", + '⊷' => K"⊷", + '⋍' => K"⋍", + '⋐' => K"⋐", + '⋑' => K"⋑", + '⋕' => K"⋕", + '⋖' => K"⋖", + '⋗' => K"⋗", + '⋘' => K"⋘", + '⋙' => K"⋙", + '⋚' => K"⋚", + '⋛' => K"⋛", + '⋜' => K"⋜", + '⋝' => K"⋝", + '⋞' => K"⋞", + '⋟' => K"⋟", + '⋠' => K"⋠", + '⋡' => K"⋡", + '⋢' => K"⋢", + '⋣' => K"⋣", + '⋤' => K"⋤", + '⋥' => K"⋥", + '⋦' => K"⋦", + '⋧' => K"⋧", + '⋨' => K"⋨", + '⋩' => K"⋩", + '⋪' => K"⋪", + '⋫' => K"⋫", + '⋬' => K"⋬", + '⋭' => K"⋭", + '⋲' => K"⋲", + '⋳' => K"⋳", + '⋴' => K"⋴", + '⋵' => K"⋵", + '⋶' => K"⋶", + '⋷' => K"⋷", + '⋸' => K"⋸", + '⋹' => K"⋹", + '⋺' => K"⋺", + '⋻' => K"⋻", + '⋼' => K"⋼", + '⋽' => K"⋽", + '⋾' => K"⋾", + '⋿' => K"⋿", + '⟈' => K"⟈", + '⟉' => K"⟉", + '⟒' => K"⟒", + '⦷' => K"⦷", + '⧀' => K"⧀", + '⧁' => K"⧁", + '⧡' => K"⧡", + '⧣' => K"⧣", + '⧤' => K"⧤", + '⧥' => K"⧥", + '⩦' => K"⩦", + '⩧' => K"⩧", + '⩪' => K"⩪", + '⩫' => K"⩫", + '⩬' => K"⩬", + '⩭' => K"⩭", + '⩮' => K"⩮", + '⩯' => K"⩯", + '⩰' => K"⩰", + '⩱' => K"⩱", + '⩲' => K"⩲", + '⩳' => K"⩳", + '⩴' => K"⩴", + '⩵' => K"⩵", + '⩶' => K"⩶", + '⩷' => K"⩷", + '⩸' => K"⩸", + '⩹' => K"⩹", + '⩺' => K"⩺", + '⩻' => K"⩻", + '⩼' => K"⩼", + '⩽' => K"⩽", + '⩾' => K"⩾", + '⩿' => K"⩿", + '⪀' => K"⪀", + '⪁' => K"⪁", + '⪂' => K"⪂", + '⪃' => K"⪃", + '⪄' => K"⪄", + '⪅' => K"⪅", + '⪆' => K"⪆", + '⪇' => K"⪇", + '⪈' => K"⪈", + '⪉' => K"⪉", + '⪊' => K"⪊", + '⪋' => K"⪋", + '⪌' => K"⪌", + '⪍' => K"⪍", + '⪎' => K"⪎", + '⪏' => K"⪏", + '⪐' => K"⪐", + '⪑' => K"⪑", + '⪒' => K"⪒", + '⪓' => K"⪓", + '⪔' => K"⪔", + '⪕' => K"⪕", + '⪖' => K"⪖", + '⪗' => K"⪗", + '⪘' => K"⪘", + '⪙' => K"⪙", + '⪚' => K"⪚", + '⪛' => K"⪛", + '⪜' => K"⪜", + '⪝' => K"⪝", + '⪞' => K"⪞", + '⪟' => K"⪟", + '⪠' => K"⪠", + '⪡' => K"⪡", + '⪢' => K"⪢", + '⪣' => K"⪣", + '⪤' => K"⪤", + '⪥' => K"⪥", + '⪦' => K"⪦", + '⪧' => K"⪧", + '⪨' => K"⪨", + '⪩' => K"⪩", + '⪪' => K"⪪", + '⪫' => K"⪫", + '⪬' => K"⪬", + '⪭' => K"⪭", + '⪮' => K"⪮", + '⪯' => K"⪯", + '⪰' => K"⪰", + '⪱' => K"⪱", + '⪲' => K"⪲", + '⪳' => K"⪳", + '⪴' => K"⪴", + '⪵' => K"⪵", + '⪶' => K"⪶", + '⪷' => K"⪷", + '⪸' => K"⪸", + '⪹' => K"⪹", + '⪺' => K"⪺", + '⪻' => K"⪻", + '⪼' => K"⪼", + '⪽' => K"⪽", + '⪾' => K"⪾", + '⪿' => K"⪿", + '⫀' => K"⫀", + '⫁' => K"⫁", + '⫂' => K"⫂", + '⫃' => K"⫃", + '⫄' => K"⫄", + '⫅' => K"⫅", + '⫆' => K"⫆", + '⫇' => K"⫇", + '⫈' => K"⫈", + '⫉' => K"⫉", + '⫊' => K"⫊", + '⫋' => K"⫋", + '⫌' => K"⫌", + '⫍' => K"⫍", + '⫎' => K"⫎", + '⫏' => K"⫏", + '⫐' => K"⫐", + '⫑' => K"⫑", + '⫒' => K"⫒", + '⫓' => K"⫓", + '⫔' => K"⫔", + '⫕' => K"⫕", + '⫖' => K"⫖", + '⫗' => K"⫗", + '⫘' => K"⫘", + '⫙' => K"⫙", + '⫷' => K"⫷", + '⫸' => K"⫸", + '⫹' => K"⫹", + '⫺' => K"⫺", + '⊢' => K"⊢", + '⊣' => K"⊣", + '⫪' => K"⫪", + '⫫' => K"⫫", + '⟂' => K"⟂", + '⊕' => K"⊕", + '⊖' => K"⊖", + '⊞' => K"⊞", + '⊟' => K"⊟", + '|' => K"|", + '∪' => K"∪", + '∨' => K"∨", + '⊔' => K"⊔", + '±' => K"±", + '∓' => K"∓", + '∔' => K"∔", + '∸' => K"∸", + '≂' => K"≂", + '≏' => K"≏", + '⊎' => K"⊎", + '⊻' => K"⊻", + '⊽' => K"⊽", + '⋎' => K"⋎", + '⋓' => K"⋓", + '⧺' => K"⧺", + '⧻' => K"⧻", + '⨈' => K"⨈", + '⨢' => K"⨢", + '⨣' => K"⨣", + '⨤' => K"⨤", + '⨥' => K"⨥", + '⨦' => K"⨦", + '⨧' => K"⨧", + '⨨' => K"⨨", + '⨩' => K"⨩", + '⨪' => K"⨪", + '⨫' => K"⨫", + '⨬' => K"⨬", + '⨭' => K"⨭", + '⨮' => K"⨮", + '⨹' => K"⨹", + '⨺' => K"⨺", + '⩁' => K"⩁", + '⩂' => K"⩂", + '⩅' => K"⩅", + '⩊' => K"⩊", + '⩌' => K"⩌", + '⩏' => K"⩏", + '⩐' => K"⩐", + '⩒' => K"⩒", + '⩔' => K"⩔", + '⩖' => K"⩖", + '⩗' => K"⩗", + '⩛' => K"⩛", + '⩝' => K"⩝", + '⩡' => K"⩡", + '⩢' => K"⩢", + '⩣' => K"⩣", + '∘' => K"∘", + '×' => K"×", + '∩' => K"∩", + '∧' => K"∧", + '⊗' => K"⊗", + '⊘' => K"⊘", + '⊙' => K"⊙", + '⊚' => K"⊚", + '⊛' => K"⊛", + '⊠' => K"⊠", + '⊡' => K"⊡", + '⊓' => K"⊓", + '∗' => K"∗", + '∙' => K"∙", + '∤' => K"∤", + '⅋' => K"⅋", + '≀' => K"≀", + '⊼' => K"⊼", + '⋄' => K"⋄", + '⋆' => K"⋆", + '⋇' => K"⋇", + '⋉' => K"⋉", + '⋊' => K"⋊", + '⋋' => K"⋋", + '⋌' => K"⋌", + '⋏' => K"⋏", + '⋒' => K"⋒", + '⟑' => K"⟑", + '⦸' => K"⦸", + '⦼' => K"⦼", + '⦾' => K"⦾", + '⦿' => K"⦿", + '⧶' => K"⧶", + '⧷' => K"⧷", + '⨇' => K"⨇", + '⨰' => K"⨰", + '⨱' => K"⨱", + '⨲' => K"⨲", + '⨳' => K"⨳", + '⨴' => K"⨴", + '⨵' => K"⨵", + '⨶' => K"⨶", + '⨷' => K"⨷", + '⨸' => K"⨸", + '⨻' => K"⨻", + '⨼' => K"⨼", + '⨽' => K"⨽", + '⩀' => K"⩀", + '⩃' => K"⩃", + '⩄' => K"⩄", + '⩋' => K"⩋", + '⩍' => K"⩍", + '⩎' => K"⩎", + '⩑' => K"⩑", + '⩓' => K"⩓", + '⩕' => K"⩕", + '⩘' => K"⩘", + '⩚' => K"⩚", + '⩜' => K"⩜", + '⩞' => K"⩞", + '⩟' => K"⩟", + '⩠' => K"⩠", + '⫛' => K"⫛", + '⊍' => K"⊍", + '▷' => K"▷", + '⨝' => K"⨝", + '⟕' => K"⟕", + '⟖' => K"⟖", + '⟗' => K"⟗", + '^' => K"^", + '↑' => K"↑", + '↓' => K"↓", + '⇵' => K"⇵", + '⟰' => K"⟰", + '⟱' => K"⟱", + '⤈' => K"⤈", + '⤉' => K"⤉", + '⤊' => K"⤊", + '⤋' => K"⤋", + '⤒' => K"⤒", + '⤓' => K"⤓", + '⥉' => K"⥉", + '⥌' => K"⥌", + '⥍' => K"⥍", + '⥏' => K"⥏", + '⥑' => K"⥑", + '⥔' => K"⥔", + '⥕' => K"⥕", + '⥘' => K"⥘", + '⥙' => K"⥙", + '⥜' => K"⥜", + '⥝' => K"⥝", + '⥠' => K"⥠", + '⥡' => K"⥡", + '⥣' => K"⥣", + '⥥' => K"⥥", + '⥮' => K"⥮", + '⥯' => K"⥯", + '↑' => K"↑", + '↓' => K"↓", + # Lookalikes which are normalized into K"⋅", + # https://github.com/JuliaLang/julia/pull/25157, + '\u00b7' => K"⋅", # '·' Middle Dot,, + '\u0387' => K"⋅", # '·' Greek Ano Teleia,, + '⋅' => K"⋅", + '…' => K"…", + '⁝' => K"⁝", + '⋮' => K"⋮", + '⋱' => K"⋱", + '⋰' => K"⋰", + '⋯' => K"⋯", + '↻' => K"↻", + '⇜' => K"⇜", + '⇝' => K"⇝", + '↜' => K"↜", + '↝' => K"↝", + '↩' => K"↩", + '↪' => K"↪", + '↫' => K"↫", + '↬' => K"↬", + '↼' => K"↼", + '↽' => K"↽", + '⇀' => K"⇀", + '⇁' => K"⇁", + '⇄' => K"⇄", + '⇆' => K"⇆", + '⇇' => K"⇇", + '⇉' => K"⇉", + '⇋' => K"⇋", + '⇌' => K"⇌", + '⇚' => K"⇚", + '⇛' => K"⇛", + '⇠' => K"⇠", + '⇢' => K"⇢", + '↷' => K"↷", + '↶' => K"↶", + '↺' => K"↺", + '¦' => K"¦", + '⌿' => K"⌿", + '⨟' => K"⨟", +) + + +# For use in tests only ? +const UNICODE_OPS_REVERSE = Dict{Kind,Symbol}() +for (k, v) in UNICODE_OPS + k in ('\u00b7', '\u0387') && continue + UNICODE_OPS_REVERSE[v] = Symbol(k) +end + +for (k, v) in [ + K"=" => :(=) + K"+=" => :(+=) + K"-=" => :(-=) + K"*=" => :(*=) + K"/=" => :(/=) + K"//=" => :(//=) + K"|=" => :(|=) + K"^=" => :(^=) + K"÷=" => :(÷=) + K"%=" => :(%=) + K"<<=" => :(<<=) + K">>=" => :(>>=) + K"<<" => :(<<) + K">>" => :(>>) + K">>>" => :(>>>) + K">>>=" => :(>>>=) + K"\=" => :(\=) + K"&=" => :(&=) + K":=" => :(:=) + K"=>" => :(=>) + K"~" => :(~) + K"$=" => :($=) + K"⊻=" => :(⊻=) + K"-->" => :(-->) + K"||" => :(||) + K"&&" => :(&&) + K"<:" => :(<:) + K">:" => :(>:) + K">" => :(>) + K"<" => :(<) + K">=" => :(>=) + K"≥" => :(≥) + K"<=" => :(<=) + K"≤" => :(≤) + K"==" => :(==) + K"===" => :(===) + K"≡" => :(≡) + K"!=" => :(!=) + K"≠" => :(≠) + K"!==" => :(!==) + K"≢" => :(≢) + K"in" => :(in) + K"isa" => :(isa) + K"<|" => :(<|) + K"|>" => :(|>) + K":" => :(:) + K".." => :(..) + K"$" => :($) + K"+" => :(+) + K"-" => :(-) + K"++" => :(++) + K"|" => :(|) + K"*" => :(*) + K"/" => :(/) + K"%" => :(%) + K"\\" => :(\) + K"&" => :(&) + K"//" => :(//) + K"^" => :(^) + K"::" => :(::) + K"?" => :? + K"." => :(.) + K"!" => :(!) + K"'" => Symbol(''') + K"..." => :(...) + K".'" => Symbol(".'") + K"->" => :(->) + K"where" => :where + ] + UNICODE_OPS_REVERSE[k] = v +end diff --git a/JuliaSyntax/src/kinds.jl b/JuliaSyntax/src/kinds.jl index f1260978eaf1a..9f461820dab75 100644 --- a/JuliaSyntax/src/kinds.jl +++ b/JuliaSyntax/src/kinds.jl @@ -994,3 +994,109 @@ Return the `Kind` of `x`. """ kind(k::Kind) = k +#------------------------------------------------------------------------------- +const _nonunique_kind_names = Set([ + K"Comment" + K"Whitespace" + K"NewlineWs" + K"Identifier" + + K"ErrorEofMultiComment" + K"ErrorEofChar" + K"ErrorInvalidNumericConstant" + K"ErrorInvalidOperator" + K"ErrorInvalidInterpolationTerminator" + + K"Integer" + K"BinInt" + K"HexInt" + K"OctInt" + K"Float" + K"String" + K"Char" + K"CmdString" + + K"MacroName" + K"StringMacroName" + K"CmdMacroName" +]) + +""" +Return the string representation of a token kind, or `nothing` if the kind +represents a class of tokens like K"Identifier". + +When `unique=true` only return a string when the kind uniquely defines the +corresponding input token, otherwise return `nothing`. When `unique=false`, +return the name of the kind. + +TODO: Replace `untokenize()` with `Base.string()`? +""" +function untokenize(k::Kind; unique=true) + if unique && k in _nonunique_kind_names + return nothing + else + return convert(String, k) + end +end + + +#------------------------------------------------------------------------------- +# Predicates +is_contextual_keyword(k::Kind) = K"BEGIN_CONTEXTUAL_KEYWORDS" < k < K"END_CONTEXTUAL_KEYWORDS" +is_error(k::Kind) = K"BEGIN_ERRORS" < k < K"END_ERRORS" +is_keyword(k::Kind) = K"BEGIN_KEYWORDS" < k < K"END_KEYWORDS" +is_literal(k::Kind) = K"BEGIN_LITERAL" < k < K"END_LITERAL" +is_operator(k::Kind) = K"BEGIN_OPS" < k < K"END_OPS" +is_word_operator(k::Kind) = (k == K"in" || k == K"isa" || k == K"where") + +is_contextual_keyword(k) = is_contextual_keyword(kind(k)) +is_error(k) = is_error(kind(k)) +is_keyword(k) = is_keyword(kind(k)) +is_literal(k) = is_literal(kind(k)) +is_operator(k) = is_operator(kind(k)) +is_word_operator(k) = is_word_operator(kind(k)) + + +# Predicates for operator precedence +# FIXME: Review how precedence depends on dottedness, eg +# https://github.com/JuliaLang/julia/pull/36725 +is_prec_assignment(x) = K"BEGIN_ASSIGNMENTS" < kind(x) < K"END_ASSIGNMENTS" +is_prec_pair(x) = K"BEGIN_PAIRARROW" < kind(x) < K"END_PAIRARROW" +is_prec_conditional(x) = K"BEGIN_CONDITIONAL" < kind(x) < K"END_CONDITIONAL" +is_prec_arrow(x) = K"BEGIN_ARROW" < kind(x) < K"END_ARROW" +is_prec_lazy_or(x) = K"BEGIN_LAZYOR" < kind(x) < K"END_LAZYOR" +is_prec_lazy_and(x) = K"BEGIN_LAZYAND" < kind(x) < K"END_LAZYAND" +is_prec_comparison(x) = K"BEGIN_COMPARISON" < kind(x) < K"END_COMPARISON" +is_prec_pipe(x) = K"BEGIN_PIPE" < kind(x) < K"END_PIPE" +is_prec_colon(x) = K"BEGIN_COLON" < kind(x) < K"END_COLON" +is_prec_plus(x) = K"BEGIN_PLUS" < kind(x) < K"END_PLUS" +is_prec_bitshift(x) = K"BEGIN_BITSHIFTS" < kind(x) < K"END_BITSHIFTS" +is_prec_times(x) = K"BEGIN_TIMES" < kind(x) < K"END_TIMES" +is_prec_rational(x) = K"BEGIN_RATIONAL" < kind(x) < K"END_RATIONAL" +is_prec_power(x) = K"BEGIN_POWER" < kind(x) < K"END_POWER" +is_prec_decl(x) = K"BEGIN_DECL" < kind(x) < K"END_DECL" +is_prec_where(x) = K"BEGIN_WHERE" < kind(x) < K"END_WHERE" +is_prec_dot(x) = K"BEGIN_DOT" < kind(x) < K"END_DOT" +is_prec_unicode_ops(x) = K"BEGIN_UNICODE_OPS" < kind(x) < K"END_UNICODE_OPS" +is_prec_pipe_lt(x) = kind(x) == K"<|" +is_prec_pipe_gt(x) = kind(x) == K"|>" +is_syntax_kind(x) = K"BEGIN_SYNTAX_KINDS" < kind(x) < K"END_SYNTAX_KINDS" +is_macro_name(x) = K"BEGIN_MACRO_NAMES" < kind(x) < K"END_MACRO_NAMES" + +function is_number(x) + kind(x) in (K"Integer", K"BinInt", K"HexInt", K"OctInt", K"Float") +end + +function is_string_delim(x) + kind(x) in (K"\"", K"\"\"\"") +end + +function is_radical_op(x) + kind(x) in (K"√", K"∛", K"∜") +end + +function is_whitespace(x) + kind(x) in (K"Whitespace", K"NewlineWs") +end + + diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index be1617e94bba5..ab5b383b3fc79 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -91,8 +91,7 @@ end kind(x) = kind(head(x)) flags(x) = flags(head(x)) -# Predicates based on kind() / flags() -is_error(x) = is_error(kind(x)) +# Predicates based on flags() has_flags(x, test_flags) = has_flags(flags(x), test_flags) is_trivia(x) = has_flags(x, TRIVIA_FLAG) is_infix(x) = has_flags(x, INFIX_FLAG) @@ -174,7 +173,7 @@ mutable struct ParseStream # the `textbuf` owner was unknown (eg, ptr,length was passed) text_root::Any # Lexer, transforming the input bytes into a token stream - lexer::Tokenize.Lexers.Lexer{IOBuffer} + lexer::Tokenize.Lexer{IOBuffer} # Lookahead buffer for already lexed tokens lookahead::Vector{SyntaxToken} lookahead_index::Int @@ -196,7 +195,7 @@ mutable struct ParseStream version::VersionNumber) io = IOBuffer(text_buf) seek(io, next_byte-1) - lexer = Tokenize.Lexers.Lexer(io) + lexer = Tokenize.Lexer(io) # To avoid keeping track of the exact Julia development version where new # features were added or comparing prerelease strings, we treat prereleases # or dev versons as the release version using only major and minor version @@ -317,8 +316,8 @@ function _buffer_lookahead_tokens(lexer, lookahead) had_whitespace = false token_count = 0 while true - raw = Tokenize.Lexers.next_token(lexer) - k = TzTokens.exactkind(raw) + raw = Tokenize.next_token(lexer) + k = kind(raw) was_whitespace = k in (K"Whitespace", K"Comment", K"NewlineWs") had_whitespace |= was_whitespace f = EMPTY_FLAGS diff --git a/JuliaSyntax/src/tokens.jl b/JuliaSyntax/src/tokens.jl deleted file mode 100644 index df5ca9392e725..0000000000000 --- a/JuliaSyntax/src/tokens.jl +++ /dev/null @@ -1,120 +0,0 @@ - -kind(raw::TzTokens.Token) = TzTokens.exactkind(raw) - -# Some renaming for naming consistency -is_literal(k) = TzTokens.isliteral(kind(k)) -is_keyword(k) = TzTokens.iskeyword(kind(k)) -is_error(k::Kind) = TzTokens.iserror(k) -is_contextual_keyword(k) = TzTokens.iscontextualkeyword(kind(k)) -is_operator(k) = TzTokens.isoperator(kind(k)) -is_word_operator(k) = TzTokens.iswordoperator(kind(k)) - -# Predicates for operator precedence -# FIXME: Review how precedence depends on dottedness, eg -# https://github.com/JuliaLang/julia/pull/36725 -is_prec_assignment(t) = K"BEGIN_ASSIGNMENTS" < kind(t) < K"END_ASSIGNMENTS" -is_prec_pair(t) = K"BEGIN_PAIRARROW" < kind(t) < K"END_PAIRARROW" -is_prec_conditional(t) = K"BEGIN_CONDITIONAL" < kind(t) < K"END_CONDITIONAL" -is_prec_arrow(t) = K"BEGIN_ARROW" < kind(t) < K"END_ARROW" -is_prec_lazy_or(t) = K"BEGIN_LAZYOR" < kind(t) < K"END_LAZYOR" -is_prec_lazy_and(t) = K"BEGIN_LAZYAND" < kind(t) < K"END_LAZYAND" -is_prec_comparison(t) = K"BEGIN_COMPARISON" < kind(t) < K"END_COMPARISON" -is_prec_pipe(t) = K"BEGIN_PIPE" < kind(t) < K"END_PIPE" -is_prec_colon(t) = K"BEGIN_COLON" < kind(t) < K"END_COLON" -is_prec_plus(t) = K"BEGIN_PLUS" < kind(t) < K"END_PLUS" -is_prec_bitshift(t) = K"BEGIN_BITSHIFTS" < kind(t) < K"END_BITSHIFTS" -is_prec_times(t) = K"BEGIN_TIMES" < kind(t) < K"END_TIMES" -is_prec_rational(t) = K"BEGIN_RATIONAL" < kind(t) < K"END_RATIONAL" -is_prec_power(t) = K"BEGIN_POWER" < kind(t) < K"END_POWER" -is_prec_decl(t) = K"BEGIN_DECL" < kind(t) < K"END_DECL" -is_prec_where(t) = K"BEGIN_WHERE" < kind(t) < K"END_WHERE" -is_prec_dot(t) = K"BEGIN_DOT" < kind(t) < K"END_DOT" -is_prec_unicode_ops(t) = K"BEGIN_UNICODE_OPS" < kind(t) < K"END_UNICODE_OPS" - -is_prec_pipe_lt(t) = kind(t) == K"<|" -is_prec_pipe_gt(t) = kind(t) == K"|>" - -#= -# Sholuld we optimize membership a bit by unrolling? -@generated function in(k::Kind, t::NTuple{N,Kind}) where {N} - ex = :(k === t[1]) - for i = 2:N - ex = :($ex || k === t[$i]) - end - quote - $ex - end -end -=# - -function is_syntax_kind(t) - K"BEGIN_SYNTAX_KINDS" < kind(t) < K"END_SYNTAX_KINDS" -end - -function is_identifier(k) - kind(k) == K"Identifier" -end - -function is_macro_name(k) - K"BEGIN_MACRO_NAMES" < kind(k) < K"END_MACRO_NAMES" -end - -function is_number(t) - kind(t) in (K"Integer", K"BinInt", K"HexInt", K"OctInt", K"Float") -end - -function is_string_delim(t) - kind(t) in (K"\"", K"\"\"\"") -end - -function is_radical_op(t) - kind(t) in (K"√", K"∛", K"∜") -end - -function is_whitespace(t) - kind(t) in (K"Whitespace", K"NewlineWs") -end - - -#------------------------------------------------------------------------------- -# Mapping from kinds to their unique string representation, if it exists -# FIXME: put this in token_kinds ? -const _kind_to_str_unique = - Dict{Kind,String}(k=>string(s) for (k,s) in TzTokens.UNICODE_OPS_REVERSE) -for kw in split(""" - ( [ { } ] ) @ , ; " \"\"\" ` ``` - - baremodule begin break catch const - continue do else elseif end export finally for - function global if import let local - macro module quote return struct try type using while - - as abstract doc mutable outer primitive type var - - block call comparison curly string inert macrocall kw parameters - toplevel tuple ref vect braces bracescat hcat - vcat ncat typed_hcat typed_vcat typed_ncat row nrow generator - filter flatten comprehension typed_comprehension - - error nothing true false None - """) - _kind_to_str_unique[convert(Kind, kw)] = kw -end - -""" -Return the string representation of a token kind, or `nothing` if the kind -represents a class of tokens like K"Identifier". - -When `unique=true` only return a string when the kind uniquely defines the -corresponding input token, otherwise return `nothing`. When `unique=false`, -return the name of the kind. - -TODO: Replace `untokenize()` with `Base.string()`? -""" -function untokenize(k::Kind; unique=true) - if unique - get(_kind_to_str_unique, k, nothing) - else - convert(String, k) - end -end diff --git a/JuliaSyntax/test/lexer.jl b/JuliaSyntax/test/lexer.jl index d015576e1f50e..246e479929b12 100644 --- a/JuliaSyntax/test/lexer.jl +++ b/JuliaSyntax/test/lexer.jl @@ -4,19 +4,16 @@ module TokenizeTests using Test using JuliaSyntax: - # Parsing @K_str, + Kind, kind, - Kind + is_error, + is_operator using JuliaSyntax.Tokenize: - Tokens, - Lexers, Tokenize, tokenize, - untokenize - -using JuliaSyntax.Tokenize.Tokens: + untokenize, Token tok(str, i = 1) = collect(tokenize(str))[i] @@ -26,13 +23,13 @@ strtok(str) = untokenize.(collect(tokenize(str)), str) @testset "tokens" begin for s in ["a", IOBuffer("a")] l = tokenize(s) - @test Lexers.readchar(l) == 'a' + @test Tokenize.readchar(l) == 'a' # @test l.current_pos == 0 l_old = l @test l == l_old - @test Lexers.eof(l) - @test Lexers.readchar(l) == Lexers.EOF_CHAR + @test Tokenize.eof(l) + @test Tokenize.readchar(l) == Tokenize.EOF_CHAR # @test l.current_pos == 0 end @@ -82,7 +79,7 @@ end # testset # Generate the following with # ``` - # for t in Tokens.kind.(collect(tokenize(str))) + # for t in kind.(collect(tokenize(str))) # print(kind(t), ",") # end # ``` @@ -520,14 +517,14 @@ end @test ts[4] ~ (K"Identifier" , "x" , str) @test ts[5] ~ (K"ErrorInvalidInterpolationTerminator" , "" , str) @test ts[6] ~ (K"String" , "෴" , str) - @test Tokens.iserror(ts[5].kind) + @test is_error(ts[5].kind) @test ts[5].kind == K"ErrorInvalidInterpolationTerminator" end end @testset "inferred" begin l = tokenize("abc") - @inferred Tokenize.Lexers.next_token(l) + @inferred Tokenize.next_token(l) end @testset "modifying function names (!) followed by operator" begin @@ -677,7 +674,7 @@ end @testset "dotted and suffixed operators" begin -ops = collect(values(Tokenize.Tokens.UNICODE_OPS_REVERSE)) +ops = collect(values(Tokenize.UNICODE_OPS_REVERSE)) for op in ops op in (:isa, :in, :where, Symbol('\''), :?, :(:)) && continue @@ -709,7 +706,7 @@ for op in ops tokens = collect(tokenize(str)) exop = expr.head == :call ? expr.args[1] : expr.head #println(str) - @test Symbol(Tokenize.Tokens.untokenize(tokens[arity == 1 ? 1 : 3], str)) == exop + @test Symbol(Tokenize.untokenize(tokens[arity == 1 ? 1 : 3], str)) == exop else break end @@ -740,7 +737,7 @@ end end function test_error(tok, kind) - @test Tokens.iserror(tok.kind) + @test is_error(tok.kind) @test tok.kind == kind end @@ -762,14 +759,14 @@ end @testset "suffixed op" begin s = "+¹" - @test Tokens.isoperator(tok(s, 1).kind) + @test is_operator(tok(s, 1).kind) @test untokenize(collect(tokenize(s))[1], s) == s end @testset "invalid float juxt" begin s = "1.+2" @test tok(s, 1).kind == K"error" - @test Tokens.isoperator(tok(s, 2).kind) + @test is_operator(tok(s, 2).kind) test_roundtrip("1234.+1", K"error", "1234.") @test tok("1.+ ").kind == K"error" @test tok("1.⤋").kind == K"error" @@ -824,7 +821,7 @@ end push!(ops, raw"<-- <-->") end allops = split(join(ops, " "), " ") - @test all(s->Base.isoperator(Symbol(s)) == Tokens.isoperator(first(collect(tokenize(s))).kind), allops) + @test all(s->Base.isoperator(Symbol(s)) == is_operator(first(collect(tokenize(s))).kind), allops) end const all_kws = Set([ @@ -877,14 +874,14 @@ const all_kws = Set([ function check_kw_hashes(iter) for cs in iter str = String([cs...]) - if Lexers.simple_hash(str) in keys(Tokenize.Lexers.kw_hash) + if Tokenize.simple_hash(str) in keys(Tokenize.kw_hash) @test str in all_kws end end end @testset "simple_hash" begin - @test length(all_kws) == length(Tokenize.Lexers.kw_hash) + @test length(all_kws) == length(Tokenize.kw_hash) @testset "Length $len keywords" for len in 1:5 check_kw_hashes(String([cs...]) for cs in Iterators.product(['a':'z' for _ in 1:len]...)) diff --git a/JuliaSyntax/test/runtests.jl b/JuliaSyntax/test/runtests.jl index ded79a74fb559..a6281b1a88959 100644 --- a/JuliaSyntax/test/runtests.jl +++ b/JuliaSyntax/test/runtests.jl @@ -7,9 +7,6 @@ using JuliaSyntax: GreenNode, SyntaxNode, flags, EMPTY_FLAGS, TRIVIA_FLAG, INFIX_FLAG, children, child, setchild!, SyntaxHead -using JuliaSyntax.Tokenize -using JuliaSyntax.Tokenize.Lexers - @testset "Tokenize" begin include("lexer.jl") end From 0759f2514defe127f826096925dc9cdb3fcd809d Mon Sep 17 00:00:00 2001 From: Chris Foster Date: Wed, 10 Aug 2022 09:37:37 +1000 Subject: [PATCH 0452/1109] Consolidate source of Tokenize.jl into main src dir This will help integrate the two codebases a bit more consistently. Tokenize is still a separate module for now. --- JuliaSyntax/LICENSE.md | 2 +- JuliaSyntax/src/JuliaSyntax.jl | 2 +- JuliaSyntax/src/Tokenize/Tokenize.jl | 2 -- JuliaSyntax/src/Tokenize/token.jl | 0 JuliaSyntax/src/{Tokenize/lexer.jl => tokenize.jl} | 2 +- JuliaSyntax/src/{Tokenize/utilities.jl => tokenize_utils.jl} | 0 JuliaSyntax/test/runtests.jl | 2 +- JuliaSyntax/test/{lexer.jl => tokenize.jl} | 0 8 files changed, 4 insertions(+), 6 deletions(-) delete mode 100644 JuliaSyntax/src/Tokenize/Tokenize.jl delete mode 100644 JuliaSyntax/src/Tokenize/token.jl rename JuliaSyntax/src/{Tokenize/lexer.jl => tokenize.jl} (99%) rename JuliaSyntax/src/{Tokenize/utilities.jl => tokenize_utils.jl} (100%) rename JuliaSyntax/test/{lexer.jl => tokenize.jl} (100%) diff --git a/JuliaSyntax/LICENSE.md b/JuliaSyntax/LICENSE.md index d70e81a6cea51..bfb0e81bccb04 100644 --- a/JuliaSyntax/LICENSE.md +++ b/JuliaSyntax/LICENSE.md @@ -20,7 +20,7 @@ The JuliaSyntax.jl package is licensed under the MIT "Expat" License: > OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE > SOFTWARE. -The code in src/Tokenize and test/lexer.jl is derived from the Tokenize.jl +The code in src/tokenize*.jl and test/tokenize.jl is derived from the Tokenize.jl package and is also licensed under the MIT "Expat" License: > Copyright (c) 2016: Kristoffer Carlsson. diff --git a/JuliaSyntax/src/JuliaSyntax.jl b/JuliaSyntax/src/JuliaSyntax.jl index 2b0986d254803..da57cead8874a 100644 --- a/JuliaSyntax/src/JuliaSyntax.jl +++ b/JuliaSyntax/src/JuliaSyntax.jl @@ -8,7 +8,7 @@ include("utils.jl") include("kinds.jl") # Lexing uses a significantly modified version of Tokenize.jl -include("Tokenize/Tokenize.jl") +include("tokenize.jl") using .Tokenize: Token # Source and diagnostics diff --git a/JuliaSyntax/src/Tokenize/Tokenize.jl b/JuliaSyntax/src/Tokenize/Tokenize.jl deleted file mode 100644 index fb49626dd9f89..0000000000000 --- a/JuliaSyntax/src/Tokenize/Tokenize.jl +++ /dev/null @@ -1,2 +0,0 @@ -include("lexer.jl") - diff --git a/JuliaSyntax/src/Tokenize/token.jl b/JuliaSyntax/src/Tokenize/token.jl deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/JuliaSyntax/src/Tokenize/lexer.jl b/JuliaSyntax/src/tokenize.jl similarity index 99% rename from JuliaSyntax/src/Tokenize/lexer.jl rename to JuliaSyntax/src/tokenize.jl index 1537b772e4cf2..1c9cf4e8d06ec 100644 --- a/JuliaSyntax/src/Tokenize/lexer.jl +++ b/JuliaSyntax/src/tokenize.jl @@ -9,7 +9,7 @@ import ..JuliaSyntax: kind, import Base.eof -include("utilities.jl") +include("tokenize_utils.jl") #------------------------------------------------------------------------------- # Tokens diff --git a/JuliaSyntax/src/Tokenize/utilities.jl b/JuliaSyntax/src/tokenize_utils.jl similarity index 100% rename from JuliaSyntax/src/Tokenize/utilities.jl rename to JuliaSyntax/src/tokenize_utils.jl diff --git a/JuliaSyntax/test/runtests.jl b/JuliaSyntax/test/runtests.jl index a6281b1a88959..7d40754459d47 100644 --- a/JuliaSyntax/test/runtests.jl +++ b/JuliaSyntax/test/runtests.jl @@ -8,7 +8,7 @@ using JuliaSyntax: GreenNode, SyntaxNode, children, child, setchild!, SyntaxHead @testset "Tokenize" begin - include("lexer.jl") + include("tokenize.jl") end include("test_utils.jl") diff --git a/JuliaSyntax/test/lexer.jl b/JuliaSyntax/test/tokenize.jl similarity index 100% rename from JuliaSyntax/test/lexer.jl rename to JuliaSyntax/test/tokenize.jl From 2c80f20451ce77a3852f4222b011698568e0503f Mon Sep 17 00:00:00 2001 From: c42f Date: Fri, 12 Aug 2022 18:13:35 +1000 Subject: [PATCH 0453/1109] Sysimage / precompile fixes * Fix for sysimage compilation * Disable info logging so stdlib build is simpler --- JuliaSyntax/src/syntax_tree.jl | 2 +- JuliaSyntax/sysimage/compile.jl | 1 - JuliaSyntax/sysimage/precompile_exec.jl | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index f9ecd9d06788b..4ea92384ab782 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -74,7 +74,7 @@ function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, position::In elseif is_syntax_kind(raw) nothing else - @error "Leaf node of kind $k unknown to SyntaxNode" + error("Leaf node of kind $k unknown to SyntaxNode") val = nothing end return SyntaxNode(source, raw, position, nothing, true, val) diff --git a/JuliaSyntax/sysimage/compile.jl b/JuliaSyntax/sysimage/compile.jl index 329a7c553004a..fbc17232ad6e3 100755 --- a/JuliaSyntax/sysimage/compile.jl +++ b/JuliaSyntax/sysimage/compile.jl @@ -17,7 +17,6 @@ cd(@__DIR__) rm("JuliaSyntax", force=true, recursive=true) mkdir("JuliaSyntax") cp("../src", "JuliaSyntax/src") -cp("../Tokenize", "JuliaSyntax/Tokenize") cp("../test", "JuliaSyntax/test") projstr = replace(read("../Project.toml", String), "70703baa-626e-46a2-a12c-08ffd08c73b4"=>"54354a4c-6cac-4c00-8566-e7c1beb8bfd8") diff --git a/JuliaSyntax/sysimage/precompile_exec.jl b/JuliaSyntax/sysimage/precompile_exec.jl index 74f7bd7d1145d..567d3d02ddbcd 100644 --- a/JuliaSyntax/sysimage/precompile_exec.jl +++ b/JuliaSyntax/sysimage/precompile_exec.jl @@ -2,4 +2,4 @@ import JuliaSyntax Base.include(@__MODULE__(), joinpath(pkgdir(JuliaSyntax), "test", "test_utils.jl")) Base.include(@__MODULE__(), joinpath(pkgdir(JuliaSyntax), "test", "parser.jl")) JuliaSyntax.enable_in_core!() -@info "Some parsing" Meta.parse("x+y+z-w .+ [a b c]") +Meta.parse("x+y+z-w .+ [a b c]") From 365b51038ca762d2d1e5d97800fb79d19aceee43 Mon Sep 17 00:00:00 2001 From: c42f Date: Sat, 13 Aug 2022 07:13:16 +1000 Subject: [PATCH 0454/1109] Set up CI for sysimage build --- JuliaSyntax/.github/workflows/CI.yml | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/JuliaSyntax/.github/workflows/CI.yml b/JuliaSyntax/.github/workflows/CI.yml index e25245c9277f4..7b868086e5c76 100644 --- a/JuliaSyntax/.github/workflows/CI.yml +++ b/JuliaSyntax/.github/workflows/CI.yml @@ -38,6 +38,29 @@ jobs: ${{ runner.os }}- - uses: julia-actions/julia-buildpkg@v1 - uses: julia-actions/julia-runtest@v1 + test_sysimage: + name: JuliaSyntax sysimage build - ${{ github.event_name }} + runs-on: ubuntu-latest + strategy: + fail-fast: false + steps: + - uses: actions/checkout@v2 + - uses: julia-actions/setup-julia@v1 + with: + version: 1.6 + arch: x64 + - uses: actions/cache@v1 + env: + cache-name: cache-artifacts + with: + path: ~/.julia/artifacts + key: ${{ runner.os }}-test-${{ env.cache-name }}-${{ hashFiles('**/Project.toml') }} + restore-keys: | + ${{ runner.os }}-test-${{ env.cache-name }}- + ${{ runner.os }}-test- + ${{ runner.os }}- + - uses: julia-actions/julia-buildpkg@v1 + - run: julia sysimage/compile.jl # docs: # name: Documentation # runs-on: ubuntu-latest From d8dcbf59c5b67d1e414af3d7f11a5c36ae9345ca Mon Sep 17 00:00:00 2001 From: c42f Date: Sat, 13 Aug 2022 07:14:17 +1000 Subject: [PATCH 0455/1109] Add macos and windows to tests --- JuliaSyntax/.github/workflows/CI.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/JuliaSyntax/.github/workflows/CI.yml b/JuliaSyntax/.github/workflows/CI.yml index 7b868086e5c76..57bde03dc7625 100644 --- a/JuliaSyntax/.github/workflows/CI.yml +++ b/JuliaSyntax/.github/workflows/CI.yml @@ -18,6 +18,8 @@ jobs: - 'nightly' os: - ubuntu-latest + - macOS-latest + - windows-latest arch: - x64 steps: From 80d615b8879aec5dd8713ae6496b62023cda3346 Mon Sep 17 00:00:00 2001 From: c42f Date: Sat, 13 Aug 2022 07:50:35 +1000 Subject: [PATCH 0456/1109] Fix accidental piracy of Base.eof As part of rearranging modules in JuliaLang/JuliaSyntax.jl#40, Base.eof(::Char) was accidentally defined. It seems clearer here to just use comparison with EOF_CHAR anyway. --- JuliaSyntax/src/tokenize.jl | 28 +++++++++++++--------------- JuliaSyntax/src/tokenize_utils.jl | 5 +---- 2 files changed, 14 insertions(+), 19 deletions(-) diff --git a/JuliaSyntax/src/tokenize.jl b/JuliaSyntax/src/tokenize.jl index 1c9cf4e8d06ec..c330113fb09ba 100644 --- a/JuliaSyntax/src/tokenize.jl +++ b/JuliaSyntax/src/tokenize.jl @@ -7,8 +7,6 @@ using ..JuliaSyntax: Kind, @K_str import ..JuliaSyntax: kind, is_literal, is_error, is_contextual_keyword, is_word_operator -import Base.eof - include("tokenize_utils.jl") #------------------------------------------------------------------------------- @@ -223,7 +221,7 @@ Base.position(l::Lexer) = l.charspos[1] Determine whether the end of the lexer's underlying buffer has been reached. """# Base.position(l::Lexer) = Base.position(l.io) -eof(l::Lexer) = eof(l.io) +Base.eof(l::Lexer) = eof(l.io) Base.seek(l::Lexer, pos) = seek(l.io, pos) @@ -335,7 +333,7 @@ function next_token(l::Lexer, start = true) end function _next_token(l::Lexer, c) - if eof(c) + if c == EOF_CHAR return emit(l, K"EndMarker") elseif iswhitespace(c) return lex_whitespace(l, c) @@ -454,12 +452,12 @@ function lex_string_chunk(l) # characters and let the parser deal with it. end elseif l.last_token == K"Identifier" && - !(eof(pc) || is_operator_start_char(pc) || is_never_id_char(pc)) + !(pc == EOF_CHAR || is_operator_start_char(pc) || is_never_id_char(pc)) # Only allow certain characters after interpolated vars # https://github.com/JuliaLang/julia/pull/25234 return emit_error(l, K"ErrorInvalidInterpolationTerminator") end - if eof(pc) + if pc == EOF_CHAR return emit(l, K"EndMarker") elseif !state.raw && pc == '$' # Start interpolation @@ -495,7 +493,7 @@ function lex_string_chunk(l) # the closing quotes can be escaped with an odd number of \ characters. while true pc = peekchar(l) - if string_terminates(l, state.delim, state.triplestr) || eof(pc) + if string_terminates(l, state.delim, state.triplestr) || pc == EOF_CHAR break elseif state.triplestr && (pc == '\n' || pc == '\r') # triple quoted newline splitting @@ -520,7 +518,7 @@ function lex_string_chunk(l) else while true pc = peekchar(l) - if pc == '$' || eof(pc) + if pc == '$' || pc == EOF_CHAR break elseif state.triplestr && (pc == '\n' || pc == '\r') # triple quoted newline splitting @@ -541,7 +539,7 @@ function lex_string_chunk(l) c = readchar(l) if c == '\\' c = readchar(l) - eof(c) && break + c == EOF_CHAR && break continue end end @@ -570,7 +568,7 @@ function lex_comment(l::Lexer, doemit=true) if peekchar(l) != '=' while true pc = peekchar(l) - if pc == '\n' || eof(pc) + if pc == '\n' || pc == EOF_CHAR return doemit ? emit(l, K"Comment") : EMPTY_TOKEN end readchar(l) @@ -580,7 +578,7 @@ function lex_comment(l::Lexer, doemit=true) c = readchar(l) # consume the '=' n_start, n_end = 1, 0 while true - if eof(c) + if c == EOF_CHAR return doemit ? emit_error(l, K"ErrorEofMultiComment") : EMPTY_TOKEN end nc = readchar(l) @@ -810,7 +808,7 @@ function lex_digit(l::Lexer, kind) || ppc == '"' || ppc == ':' || ppc == '?' - || eof(ppc))) + || ppc == EOF_CHAR)) kind = K"Integer" return emit(l, kind) @@ -833,7 +831,7 @@ function lex_digit(l::Lexer, kind) else return emit_error(l) end - elseif pc == '.' && (is_identifier_start_char(ppc) || eof(ppc)) + elseif pc == '.' && (is_identifier_start_char(ppc) || ppc == EOF_CHAR) readchar(l) return emit_error(l, K"ErrorInvalidNumericConstant") end @@ -910,10 +908,10 @@ function lex_prime(l, doemit = true) end while true c = readchar(l) - if eof(c) + if c == EOF_CHAR return doemit ? emit_error(l, K"ErrorEofChar") : EMPTY_TOKEN elseif c == '\\' - if eof(readchar(l)) + if readchar(l) == EOF_CHAR return doemit ? emit_error(l, K"ErrorEofChar") : EMPTY_TOKEN end elseif c == '\'' diff --git a/JuliaSyntax/src/tokenize_utils.jl b/JuliaSyntax/src/tokenize_utils.jl index a21b20aae2399..720fff4c6f8e7 100644 --- a/JuliaSyntax/src/tokenize_utils.jl +++ b/JuliaSyntax/src/tokenize_utils.jl @@ -39,9 +39,6 @@ function is_never_id_char(ch::Char) ) end -eof(io::IO) = Base.eof(io) -eof(c::Char) = c === EOF_CHAR - readchar(io::IO) = eof(io) ? EOF_CHAR : read(io, Char) # Checks whether a Char is an operator, which can not be juxtaposed with another @@ -251,7 +248,7 @@ function optakessuffix(k) end function is_operator_start_char(c::Char) - eof(c) && return false + c == EOF_CHAR && return false is_operator_start_char(UInt32(c)) end is_operator_start_char(u::UInt32) = u == 0x00000021 || (u == 0x00000024 || (u == 0x00000025 || (u == 0x00000026 || (u == 0x00000027 || (u == 0x0000002a || (u == 0x0000002b || (u == 0x0000002d || (u == 0x0000002e || (u == 0x0000002f || (u == 0x0000003a || (u == 0x0000003c || (u == 0x0000003d || (u == 0x0000003e || (u == 0x0000003f || (u == 0x0000005c || (u == 0x0000005e || (u == 0x00000069 || (u == 0x00000077 || (u == 0x0000007c || (u == 0x0000007e || (u == 0x000000ac || (u == 0x000000b1 || (u == 0x000000d7 || (u == 0x000000f7 || (u == 0x00002026 || (u == 0x0000205d || (u == 0x0000214b || (u == 0x00002190 || (u == 0x00002191 || (u == 0x00002192 || (u == 0x00002193 || (u == 0x00002194 || (u == 0x0000219a || (u == 0x0000219b || (u == 0x000021a0 || (u == 0x000021a3 || (u == 0x000021a6 || (u == 0x000021ae || (u == 0x000021ce || (u == 0x000021cf || (u == 0x000021d2 || (u == 0x000021d4 || (u == 0x000021f4 || (u == 0x000021f5 || (u == 0x000021f6 || (u == 0x000021f7 || (u == 0x000021f8 || (u == 0x000021f9 || (u == 0x000021fa || (u == 0x000021fb || (u == 0x000021fc || (u == 0x000021fd || (u == 0x000021fe || (u == 0x000021ff || (u == 0x00002208 || (u == 0x00002209 || (u == 0x0000220a || (u == 0x0000220b || (u == 0x0000220c || (u == 0x0000220d || (u == 0x00002213 || (u == 0x00002214 || (u == 0x00002217 || (u == 0x00002218 || (u == 0x00002219 || (u == 0x0000221a || (u == 0x0000221b || (u == 0x0000221c || (u == 0x0000221d || (u == 0x00002224 || (u == 0x00002225 || (u == 0x00002226 || (u == 0x00002227 || (u == 0x00002228 || (u == 0x00002229 || (u == 0x0000222a || (u == 0x00002237 || (u == 0x00002238 || (u == 0x0000223a || (u == 0x0000223b || (u == 0x0000223d || (u == 0x0000223e || (u == 0x00002240 || (u == 0x00002241 || (u == 0x00002242 || (u == 0x00002243 || (u == 0x00002244 || (u == 0x00002245 || (u == 0x00002246 || (u == 0x00002247 || (u == 0x00002248 || (u == 0x00002249 || (u == 0x0000224a || (u == 0x0000224b || (u == 0x0000224c || (u == 0x0000224d || (u == 0x0000224e || (u == 0x0000224f || (u == 0x00002250 || (u == 0x00002251 || (u == 0x00002252 || (u == 0x00002253 || (u == 0x00002254 || (u == 0x00002255 || (u == 0x00002256 || (u == 0x00002257 || (u == 0x00002258 || (u == 0x00002259 || (u == 0x0000225a || (u == 0x0000225b || (u == 0x0000225c || (u == 0x0000225d || (u == 0x0000225e || (u == 0x0000225f || (u == 0x00002260 || (u == 0x00002261 || (u == 0x00002262 || (u == 0x00002263 || (u == 0x00002264 || (u == 0x00002265 || (u == 0x00002266 || (u == 0x00002267 || (u == 0x00002268 || (u == 0x00002269 || (u == 0x0000226a || (u == 0x0000226b || (u == 0x0000226c || (u == 0x0000226d || (u == 0x0000226e || (u == 0x0000226f || (u == 0x00002270 || (u == 0x00002271 || (u == 0x00002272 || (u == 0x00002273 || (u == 0x00002274 || (u == 0x00002275 || (u == 0x00002276 || (u == 0x00002277 || (u == 0x00002278 || (u == 0x00002279 || (u == 0x0000227a || (u == 0x0000227b || (u == 0x0000227c || (u == 0x0000227d || (u == 0x0000227e || (u == 0x0000227f || (u == 0x00002280 || (u == 0x00002281 || (u == 0x00002282 || (u == 0x00002283 || (u == 0x00002284 || (u == 0x00002285 || (u == 0x00002286 || (u == 0x00002287 || (u == 0x00002288 || (u == 0x00002289 || (u == 0x0000228a || (u == 0x0000228b || (u == 0x0000228d || (u == 0x0000228e || (u == 0x0000228f || (u == 0x00002290 || (u == 0x00002291 || (u == 0x00002292 || (u == 0x00002293 || (u == 0x00002294 || (u == 0x00002295 || (u == 0x00002296 || (u == 0x00002297 || (u == 0x00002298 || (u == 0x00002299 || (u == 0x0000229a || (u == 0x0000229b || (u == 0x0000229c || (u == 0x0000229e || (u == 0x0000229f || (u == 0x000022a0 || (u == 0x000022a1 || (u == 0x000022a2 || (u == 0x000022a3 || (u == 0x000022a9 || (u == 0x000022ac || (u == 0x000022ae || (u == 0x000022b0 || (u == 0x000022b1 || (u == 0x000022b2 || (u == 0x000022b3 || (u == 0x000022b4 || (u == 0x000022b5 || (u == 0x000022b6 || (u == 0x000022b7 || (u == 0x000022bb || (u == 0x000022bc || (u == 0x000022bd || (u == 0x000022c4 || (u == 0x000022c5 || (u == 0x000022c6 || (u == 0x000022c7 || (u == 0x000022c9 || (u == 0x000022ca || (u == 0x000022cb || (u == 0x000022cc || (u == 0x000022cd || (u == 0x000022ce || (u == 0x000022cf || (u == 0x000022d0 || (u == 0x000022d1 || (u == 0x000022d2 || (u == 0x000022d3 || (u == 0x000022d5 || (u == 0x000022d6 || (u == 0x000022d7 || (u == 0x000022d8 || (u == 0x000022d9 || (u == 0x000022da || (u == 0x000022db || (u == 0x000022dc || (u == 0x000022dd || (u == 0x000022de || (u == 0x000022df || (u == 0x000022e0 || (u == 0x000022e1 || (u == 0x000022e2 || (u == 0x000022e3 || (u == 0x000022e4 || (u == 0x000022e5 || (u == 0x000022e6 || (u == 0x000022e7 || (u == 0x000022e8 || (u == 0x000022e9 || (u == 0x000022ea || (u == 0x000022eb || (u == 0x000022ec || (u == 0x000022ed || (u == 0x000022ee || (u == 0x000022ef || (u == 0x000022f0 || (u == 0x000022f1 || (u == 0x000022f2 || (u == 0x000022f3 || (u == 0x000022f4 || (u == 0x000022f5 || (u == 0x000022f6 || (u == 0x000022f7 || (u == 0x000022f8 || (u == 0x000022f9 || (u == 0x000022fa || (u == 0x000022fb || (u == 0x000022fc || (u == 0x000022fd || (u == 0x000022fe || (u == 0x000022ff || (u == 0x000025b7 || (u == 0x000027c2 || (u == 0x000027c8 || (u == 0x000027c9 || (u == 0x000027d1 || (u == 0x000027d2 || (u == 0x000027d5 || (u == 0x000027d6 || (u == 0x000027d7 || (u == 0x000027f0 || (u == 0x000027f1 || (u == 0x000027f5 || (u == 0x000027f6 || (u == 0x000027f7 || (u == 0x000027f9 || (u == 0x000027fa || (u == 0x000027fb || (u == 0x000027fc || (u == 0x000027fd || (u == 0x000027fe || (u == 0x000027ff || (u == 0x00002900 || (u == 0x00002901 || (u == 0x00002902 || (u == 0x00002903 || (u == 0x00002904 || (u == 0x00002905 || (u == 0x00002906 || (u == 0x00002907 || (u == 0x00002908 || (u == 0x00002909 || (u == 0x0000290a || (u == 0x0000290b || (u == 0x0000290c || (u == 0x0000290d || (u == 0x0000290e || (u == 0x0000290f || (u == 0x00002910 || (u == 0x00002911 || (u == 0x00002912 || (u == 0x00002913 || (u == 0x00002914 || (u == 0x00002915 || (u == 0x00002916 || (u == 0x00002917 || (u == 0x00002918 || (u == 0x0000291d || (u == 0x0000291e || (u == 0x0000291f || (u == 0x00002920 || (u == 0x00002944 || (u == 0x00002945 || (u == 0x00002946 || (u == 0x00002947 || (u == 0x00002948 || (u == 0x00002949 || (u == 0x0000294a || (u == 0x0000294b || (u == 0x0000294c || (u == 0x0000294d || (u == 0x0000294e || (u == 0x0000294f || (u == 0x00002950 || (u == 0x00002951 || (u == 0x00002952 || (u == 0x00002953 || (u == 0x00002954 || (u == 0x00002955 || (u == 0x00002956 || (u == 0x00002957 || (u == 0x00002958 || (u == 0x00002959 || (u == 0x0000295a || (u == 0x0000295b || (u == 0x0000295c || (u == 0x0000295d || (u == 0x0000295e || (u == 0x0000295f || (u == 0x00002960 || (u == 0x00002961 || (u == 0x00002962 || (u == 0x00002963 || (u == 0x00002964 || (u == 0x00002965 || (u == 0x00002966 || (u == 0x00002967 || (u == 0x00002968 || (u == 0x00002969 || (u == 0x0000296a || (u == 0x0000296b || (u == 0x0000296c || (u == 0x0000296d || (u == 0x0000296e || (u == 0x0000296f || (u == 0x00002970 || (u == 0x000029b7 || (u == 0x000029b8 || (u == 0x000029bc || (u == 0x000029be || (u == 0x000029bf || (u == 0x000029c0 || (u == 0x000029c1 || (u == 0x000029e1 || (u == 0x000029e3 || (u == 0x000029e4 || (u == 0x000029e5 || (u == 0x000029f4 || (u == 0x000029f6 || (u == 0x000029f7 || (u == 0x000029fa || (u == 0x000029fb || (u == 0x00002a07 || (u == 0x00002a08 || (u == 0x00002a1d || (u == 0x00002a22 || (u == 0x00002a23 || (u == 0x00002a24 || (u == 0x00002a25 || (u == 0x00002a26 || (u == 0x00002a27 || (u == 0x00002a28 || (u == 0x00002a29 || (u == 0x00002a2a || (u == 0x00002a2b || (u == 0x00002a2c || (u == 0x00002a2d || (u == 0x00002a2e || (u == 0x00002a30 || (u == 0x00002a31 || (u == 0x00002a32 || (u == 0x00002a33 || (u == 0x00002a34 || (u == 0x00002a35 || (u == 0x00002a36 || (u == 0x00002a37 || (u == 0x00002a38 || (u == 0x00002a39 || (u == 0x00002a3a || (u == 0x00002a3b || (u == 0x00002a3c || (u == 0x00002a3d || (u == 0x00002a40 || (u == 0x00002a41 || (u == 0x00002a42 || (u == 0x00002a43 || (u == 0x00002a44 || (u == 0x00002a45 || (u == 0x00002a4a || (u == 0x00002a4b || (u == 0x00002a4c || (u == 0x00002a4d || (u == 0x00002a4e || (u == 0x00002a4f || (u == 0x00002a50 || (u == 0x00002a51 || (u == 0x00002a52 || (u == 0x00002a53 || (u == 0x00002a54 || (u == 0x00002a55 || (u == 0x00002a56 || (u == 0x00002a57 || (u == 0x00002a58 || (u == 0x00002a5a || (u == 0x00002a5b || (u == 0x00002a5c || (u == 0x00002a5d || (u == 0x00002a5e || (u == 0x00002a5f || (u == 0x00002a60 || (u == 0x00002a61 || (u == 0x00002a62 || (u == 0x00002a63 || (u == 0x00002a66 || (u == 0x00002a67 || (u == 0x00002a6a || (u == 0x00002a6b || (u == 0x00002a6c || (u == 0x00002a6d || (u == 0x00002a6e || (u == 0x00002a6f || (u == 0x00002a70 || (u == 0x00002a71 || (u == 0x00002a72 || (u == 0x00002a73 || (u == 0x00002a74 || (u == 0x00002a75 || (u == 0x00002a76 || (u == 0x00002a77 || (u == 0x00002a78 || (u == 0x00002a79 || (u == 0x00002a7a || (u == 0x00002a7b || (u == 0x00002a7c || (u == 0x00002a7d || (u == 0x00002a7e || (u == 0x00002a7f || (u == 0x00002a80 || (u == 0x00002a81 || (u == 0x00002a82 || (u == 0x00002a83 || (u == 0x00002a84 || (u == 0x00002a85 || (u == 0x00002a86 || (u == 0x00002a87 || (u == 0x00002a88 || (u == 0x00002a89 || (u == 0x00002a8a || (u == 0x00002a8b || (u == 0x00002a8c || (u == 0x00002a8d || (u == 0x00002a8e || (u == 0x00002a8f || (u == 0x00002a90 || (u == 0x00002a91 || (u == 0x00002a92 || (u == 0x00002a93 || (u == 0x00002a94 || (u == 0x00002a95 || (u == 0x00002a96 || (u == 0x00002a97 || (u == 0x00002a98 || (u == 0x00002a99 || (u == 0x00002a9a || (u == 0x00002a9b || (u == 0x00002a9c || (u == 0x00002a9d || (u == 0x00002a9e || (u == 0x00002a9f || (u == 0x00002aa0 || (u == 0x00002aa1 || (u == 0x00002aa2 || (u == 0x00002aa3 || (u == 0x00002aa4 || (u == 0x00002aa5 || (u == 0x00002aa6 || (u == 0x00002aa7 || (u == 0x00002aa8 || (u == 0x00002aa9 || (u == 0x00002aaa || (u == 0x00002aab || (u == 0x00002aac || (u == 0x00002aad || (u == 0x00002aae || (u == 0x00002aaf || (u == 0x00002ab0 || (u == 0x00002ab1 || (u == 0x00002ab2 || (u == 0x00002ab3 || (u == 0x00002ab4 || (u == 0x00002ab5 || (u == 0x00002ab6 || (u == 0x00002ab7 || (u == 0x00002ab8 || (u == 0x00002ab9 || (u == 0x00002aba || (u == 0x00002abb || (u == 0x00002abc || (u == 0x00002abd || (u == 0x00002abe || (u == 0x00002abf || (u == 0x00002ac0 || (u == 0x00002ac1 || (u == 0x00002ac2 || (u == 0x00002ac3 || (u == 0x00002ac4 || (u == 0x00002ac5 || (u == 0x00002ac6 || (u == 0x00002ac7 || (u == 0x00002ac8 || (u == 0x00002ac9 || (u == 0x00002aca || (u == 0x00002acb || (u == 0x00002acc || (u == 0x00002acd || (u == 0x00002ace || (u == 0x00002acf || (u == 0x00002ad0 || (u == 0x00002ad1 || (u == 0x00002ad2 || (u == 0x00002ad3 || (u == 0x00002ad4 || (u == 0x00002ad5 || (u == 0x00002ad6 || (u == 0x00002ad7 || (u == 0x00002ad8 || (u == 0x00002ad9 || (u == 0x00002adb || (u == 0x00002af7 || (u == 0x00002af8 || (u == 0x00002af9 || (u == 0x00002afa || (u == 0x00002b30 || (u == 0x00002b31 || (u == 0x00002b32 || (u == 0x00002b33 || (u == 0x00002b34 || (u == 0x00002b35 || (u == 0x00002b36 || (u == 0x00002b37 || (u == 0x00002b38 || (u == 0x00002b39 || (u == 0x00002b3a || (u == 0x00002b3b || (u == 0x00002b3c || (u == 0x00002b3d || (u == 0x00002b3e || (u == 0x00002b3f || (u == 0x00002b40 || (u == 0x00002b41 || (u == 0x00002b42 || (u == 0x00002b43 || (u == 0x00002b44 || (u == 0x00002b47 || (u == 0x00002b48 || (u == 0x00002b49 || (u == 0x00002b4a || (u == 0x00002b4b || (u == 0x00002b4c || (u == 0x0000ffe9 || (u == 0x0000ffea || (u == 0x0000ffeb || u == 0x0000ffec))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))) From 27adaf5407800f27b95168ff17146cfe41f0ef9a Mon Sep 17 00:00:00 2001 From: c42f Date: Mon, 15 Aug 2022 14:03:39 +1000 Subject: [PATCH 0457/1109] Fix core hook when parsing only whitespace. (JuliaLang/JuliaSyntax.jl#45) core_parser_hook() should return `nothing` when attempting to parse statements or atoms, but when there's nothing but whitespace remaining. --- JuliaSyntax/src/hooks.jl | 6 ++++++ JuliaSyntax/test/hooks.jl | 9 +++++++++ 2 files changed, 15 insertions(+) diff --git a/JuliaSyntax/src/hooks.jl b/JuliaSyntax/src/hooks.jl index e8748df6ee79c..6ce9cbcc298b8 100644 --- a/JuliaSyntax/src/hooks.jl +++ b/JuliaSyntax/src/hooks.jl @@ -28,6 +28,12 @@ function core_parser_hook(code, filename, lineno, offset, options) # To copy the flisp parser driver, we ignore leading and trailing # trivia when parsing statements or atoms bump_trivia(stream) + if peek(stream) == K"EndMarker" + # If we're at the end of stream after skipping whitespace, just + # return `nothing` to indicate this rather than attempting to + # parse a statement or atom and failing. + return Core.svec(nothing, last_byte(stream)) + end end JuliaSyntax.parse(stream; rule=rule) if rule !== :toplevel diff --git a/JuliaSyntax/test/hooks.jl b/JuliaSyntax/test/hooks.jl index 6269b71a8721c..8b9f7cf1fc9d4 100644 --- a/JuliaSyntax/test/hooks.jl +++ b/JuliaSyntax/test/hooks.jl @@ -1,4 +1,12 @@ @testset "Hooks for Core integration" begin + @testset "parsing empty strings" begin + @test JuliaSyntax.core_parser_hook("", "somefile", 0, :statement) == Core.svec(nothing, 0) + @test JuliaSyntax.core_parser_hook("", "somefile", 0, :statement) == Core.svec(nothing, 0) + + @test JuliaSyntax.core_parser_hook(" ", "somefile", 2, :statement) == Core.svec(nothing,2) + @test JuliaSyntax.core_parser_hook(" #==# ", "somefile", 6, :statement) == Core.svec(nothing,6) + end + @testset "filename is used" begin ex = JuliaSyntax.core_parser_hook("@a", "somefile", 0, :statement)[1] @test Meta.isexpr(ex, :macrocall) @@ -16,6 +24,7 @@ @test Meta.parse("x + 1\n(y)\n", 1) == (:(x + 1), 7) @test Meta.parse("x + 1\n(y)\n", 7) == (:y, 11) @test Meta.parse(" x#==#", 1) == (:x, 7) + @test Meta.parse(" #==# ", 1) == (nothing, 7) # Check that Meta.parse throws the JuliaSyntax.ParseError rather than # Meta.ParseError when Core integration is enabled. From a17967c33fc07b722cb6b02be2cb092965fad251 Mon Sep 17 00:00:00 2001 From: c42f Date: Mon, 15 Aug 2022 19:04:45 +1000 Subject: [PATCH 0458/1109] Bugfix: Always emit a token from parse_atom (JuliaLang/JuliaSyntax.jl#47) If a closing token is found in parse_atom, we still need to emit a nontrivia token to make the resulting parse tree consistent. We choose an invisible token for this. Also add various minor fixes to error recovory and a few additional tests in parse_atom and elsewhere. --- JuliaSyntax/src/parser.jl | 18 +++++++++++++----- JuliaSyntax/test/parser.jl | 23 +++++++++++++++++------ JuliaSyntax/test/test_utils.jl | 2 +- 3 files changed, 31 insertions(+), 12 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 193127f6f5140..5b41afc98f785 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -151,7 +151,7 @@ function bump_closing_token(ps, closing_kind) end # mark as trivia => ignore in AST. emit(ps, mark, K"error", TRIVIA_FLAG, - error="Expected `$(untokenize(closing_kind))` but got unexpected tokens") + error="Expected `$(untokenize(closing_kind))`") if peek(ps) == closing_kind bump(ps, TRIVIA_FLAG) end @@ -2798,11 +2798,11 @@ function parse_cat(ps::ParseState, closer, end_is_symbol) if k == K"," || (is_closing_token(ps, k) && k != K";") if k == K"," # [x,] ==> (vect x) - # [x ==> (vect x) bump(ps, TRIVIA_FLAG) end # [x] ==> (vect x) # [x \n ] ==> (vect x) + # [x ==> (vect x (error-t)) parse_vect(ps, closer) elseif k == K"for" # [x for a in as] ==> (comprehension (generator x (= a as))) @@ -2957,6 +2957,9 @@ function parse_brackets(after_parse::Function, num_semis += 1 bump(ps, TRIVIA_FLAG) bump_trivia(ps) + elseif is_closing_token(ps, k) + # Error; handled below in bump_closing_token + break else mark = position(ps) eq_pos = parse_eq_star(ps) @@ -3279,8 +3282,11 @@ function parse_atom(ps::ParseState, check_identifiers=true) end emit(ps, mark, K"quote") elseif leading_kind == K"=" && is_plain_equals(peek_token(ps)) - bump(ps, TRIVIA_FLAG, error="unexpected `=`") + # = ==> (error =) + bump(ps, error="unexpected `=`") elseif leading_kind == K"Identifier" + # xx ==> xx + # x₁ ==> x₁ bump(ps) elseif is_operator(leading_kind) if check_identifiers && is_syntactic_operator(leading_kind) @@ -3375,14 +3381,16 @@ function parse_atom(ps::ParseState, check_identifiers=true) parse_string(ps, true) emit(ps, mark, K"macrocall") elseif is_literal(leading_kind) + # 42 ==> 42 bump(ps) elseif is_closing_token(ps, leading_kind) # Leave closing token in place for other productions to - # recover with (??) + # recover with + # ) ==> error msg = leading_kind == K"EndMarker" ? "premature end of input" : "unexpected closing token" - emit_diagnostic(ps, error=msg) + bump_invisible(ps, K"error", error=msg) else bump(ps, error="invalid syntax atom") end diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index e0466a0ebc3b9..e2dd4e105efb3 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -458,6 +458,8 @@ tests = [ "function f() where T end" => "(function (where (call f) T) (block))" "function f() \n a \n b end" => "(function (call f) (block a b))" "function f() end" => "(function (call f) (block))" + # Errors + "function" => "(function (error (error)) (block (error)) (error-t))" ], JuliaSyntax.parse_try => [ "try \n x \n catch e \n y \n finally \n z end" => @@ -560,8 +562,13 @@ tests = [ ":foo" => "(quote foo)" ": foo" => "(quote (error-t) foo)" # Literal colons - ":)" => ":" - ": end" => ":" + ":)" => ":" + ": end" => ":" + # plain equals + "=" => "(error =)" + # Identifiers + "xx" => "xx" + "x₁" => "x₁" # var syntax """var"x" """ => "x" """var"x"+""" => "x" @@ -586,14 +593,16 @@ tests = [ ":.=" => "(quote .=)" # Special symbols quoted ":end" => "(quote end)" - ":(end)" => "(quote (error (end)))" + ":(end)" => "(quote (error-t))" ":<:" => "(quote <:)" + # unexpect = + "=" => "(error =)" # parse_cat "[]" => "(vect)" "[x,]" => "(vect x)" "[x]" => "(vect x)" - "[x" => "(vect x (error-t))" "[x \n ]" => "(vect x)" + "[x" => "(vect x (error-t))" "[x \n\n ]" => "(vect x)" "[x for a in as]" => "(comprehension (generator x (= a as)))" "[x \n\n for a in as]" => "(comprehension (generator x (= a as)))" @@ -625,8 +634,10 @@ tests = [ "``" => "(macrocall :(Core.var\"@cmd\") \"\")" "`cmd`" => "(macrocall :(Core.var\"@cmd\") \"cmd\")" "```cmd```" => "(macrocall :(Core.var\"@cmd\") \"cmd\")" - # Errors - ": foo" => "(quote (error-t) foo)" + # literals + "42" => "42" + # closing tokens + ")" => "(error)" ], JuliaSyntax.parse_atom => [ # Actually parse_array diff --git a/JuliaSyntax/test/test_utils.jl b/JuliaSyntax/test/test_utils.jl index 3d994080f5b9e..2510622c6ef41 100644 --- a/JuliaSyntax/test/test_utils.jl +++ b/JuliaSyntax/test/test_utils.jl @@ -243,7 +243,7 @@ function itest_parse(production, code; version::VersionNumber=v"1.6") show(stdout, MIME"text/plain"(), f_ex) printstyled(stdout, "\n\n# Diff of AST dump:\n", color=:red) - show_expr_text_diff(showfunc, ex, f_ex, context=10) + show_expr_text_diff(show, ex, f_ex, context=10) # return (ex, f_ex) # return (code, stream, t, s, ex) end From 2d43b60c402a81f1969a0e4e218690bed4d5bf75 Mon Sep 17 00:00:00 2001 From: c42f Date: Mon, 15 Aug 2022 19:05:02 +1000 Subject: [PATCH 0459/1109] Fix hooks: whitespace parsing with Meta.parseatom() (JuliaLang/JuliaSyntax.jl#48) Previously, `Meta.parseatom()` would consume trailing whitespace which breaks usages like in `Base.shell_parse()` --- JuliaSyntax/src/hooks.jl | 9 +++++---- JuliaSyntax/test/hooks.jl | 5 ++++- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/JuliaSyntax/src/hooks.jl b/JuliaSyntax/src/hooks.jl index 6ce9cbcc298b8..bc3114ac578c3 100644 --- a/JuliaSyntax/src/hooks.jl +++ b/JuliaSyntax/src/hooks.jl @@ -24,9 +24,10 @@ function core_parser_hook(code, filename, lineno, offset, options) stream = ParseStream(io) rule = options === :all ? :toplevel : options - if rule !== :toplevel - # To copy the flisp parser driver, we ignore leading and trailing - # trivia when parsing statements or atoms + if rule === :statement || rule === :atom + # To copy the flisp parser driver: + # * Parsing atoms consumes leading trivia + # * Parsing statements consumes leading+trailing trivia bump_trivia(stream) if peek(stream) == K"EndMarker" # If we're at the end of stream after skipping whitespace, just @@ -36,7 +37,7 @@ function core_parser_hook(code, filename, lineno, offset, options) end end JuliaSyntax.parse(stream; rule=rule) - if rule !== :toplevel + if rule === :statement bump_trivia(stream) end diff --git a/JuliaSyntax/test/hooks.jl b/JuliaSyntax/test/hooks.jl index 8b9f7cf1fc9d4..b93c0a17f8a54 100644 --- a/JuliaSyntax/test/hooks.jl +++ b/JuliaSyntax/test/hooks.jl @@ -1,10 +1,13 @@ @testset "Hooks for Core integration" begin - @testset "parsing empty strings" begin + @testset "whitespace parsing" begin @test JuliaSyntax.core_parser_hook("", "somefile", 0, :statement) == Core.svec(nothing, 0) @test JuliaSyntax.core_parser_hook("", "somefile", 0, :statement) == Core.svec(nothing, 0) @test JuliaSyntax.core_parser_hook(" ", "somefile", 2, :statement) == Core.svec(nothing,2) @test JuliaSyntax.core_parser_hook(" #==# ", "somefile", 6, :statement) == Core.svec(nothing,6) + + @test JuliaSyntax.core_parser_hook(" x \n", "somefile", 0, :statement) == Core.svec(:x,4) + @test JuliaSyntax.core_parser_hook(" x \n", "somefile", 0, :atom) == Core.svec(:x,2) end @testset "filename is used" begin From 34ef78a083b8ca735c839878758297b919dcee53 Mon Sep 17 00:00:00 2001 From: c42f Date: Sun, 14 Aug 2022 12:02:32 +1000 Subject: [PATCH 0460/1109] Freeze world age by default in enable_in_core!() A fixed world age for the parser prevents the need for recompilation of the parser due to any user-defined methods. --- JuliaSyntax/src/hooks.jl | 81 ++++++++++++++++++++++++++++------------ 1 file changed, 58 insertions(+), 23 deletions(-) diff --git a/JuliaSyntax/src/hooks.jl b/JuliaSyntax/src/hooks.jl index bc3114ac578c3..1dcbeb91f9583 100644 --- a/JuliaSyntax/src/hooks.jl +++ b/JuliaSyntax/src/hooks.jl @@ -1,7 +1,28 @@ -_debug_log = nothing +# World age which the parser will be invoked in. +# Set to typemax(UInt) to invoke in the world of the caller. +const _parser_world_age = Ref{UInt}(typemax(UInt)) # Adaptor for the API/ABI expected by the Julia runtime code. function core_parser_hook(code, filename, lineno, offset, options) + if _parser_world_age[] != typemax(UInt) + Base.invoke_in_world(_parser_world_age[], _core_parser_hook, + code, filename, lineno, offset, options) + else + _core_parser_hook(code, filename, lineno, offset, options) + end +end + +# Core._parse gained a `lineno` argument in +# https://github.com/JuliaLang/julia/pull/43876 +# Prior to this, the following signature was needed: +function core_parser_hook(code, filename, offset, options) + core_parser_hook(code, filename, LineNumberNode(0), offset, options) +end + +# Debug log file for dumping parsed code +const _debug_log = Ref{Union{Nothing,IO}}(nothing) + +function _core_parser_hook(code, filename, lineno, offset, options) try # TODO: Check that we do all this input wrangling without copying the # code buffer @@ -10,13 +31,13 @@ function core_parser_hook(code, filename, lineno, offset, options) (ptr,len) = code code = String(unsafe_wrap(Array, ptr, len)) end - if !isnothing(_debug_log) - print(_debug_log, """ + if !isnothing(_debug_log[]) + print(_debug_log[], """ #-#-#------------------------------- # ENTER filename=$filename, lineno=$lineno, offset=$offset, options=$options" #-#-#------------------------------- """) - write(_debug_log, code) + write(_debug_log[], code) end io = IOBuffer(code) @@ -58,8 +79,8 @@ function core_parser_hook(code, filename, lineno, offset, options) # of one cancel here. last_offset = last_byte(stream) - if !isnothing(_debug_log) - println(_debug_log, """ + if !isnothing(_debug_log[]) + println(_debug_log[], """ #-#-#- # EXIT last_offset=$last_offset #-#-#- @@ -69,19 +90,25 @@ function core_parser_hook(code, filename, lineno, offset, options) # Rewrap result in an svec for use by the C code return Core.svec(ex, last_offset) catch exc + if !isnothing(_debug_log[]) + println(_debug_log[], """ + #-#-#- + # ERROR EXIT + # $exc + #-#-#- + """) + end @error("JuliaSyntax parser failed — falling back to flisp!", exception=(exc,catch_backtrace()), offset=offset, code=code) - end - return Core.Compiler.fl_parse(code, filename, offset, options) -end -# Core._parse gained a `lineno` argument in -# https://github.com/JuliaLang/julia/pull/43876 -# Prior to this, the following signature was needed: -function core_parser_hook(code, filename, offset, options) - core_parser_hook(code, filename, LineNumberNode(0), offset, options) + if VERSION >= v"1.8.0-DEV.1370" # https://github.com/JuliaLang/julia/pull/43876 + return Core.Compiler.fl_parse(code, filename, lineno, offset, options) + else + return Core.Compiler.fl_parse(code, filename, offset, options) + end + end end # Hack: @@ -92,19 +119,27 @@ Base.Meta.ParseError(e::JuliaSyntax.ParseError) = e const _default_parser = Core._parse """ + enable_in_core!([enable=true; freeze_world_age, debug_filename]) + Connect the JuliaSyntax parser to the Julia runtime so that it replaces the -flisp parser for all parsing work. +flisp parser for all parsing work. That is, JuliaSyntax will be used for +`include()` `Meta.parse()`, the REPL, etc. To disable, set use +`enable_in_core!(false)`. -That is, JuliaSyntax will be used for `include()` `Meta.parse()`, the REPL, etc. +Keyword arguments: +* `freeze_world_age` - Use a fixed world age for the parser to prevent + recompilation of the parser due to any user-defined methods (default `true`). +* `debug_filename` - File name of parser debug log (defaults to `nothing` or + the value of `ENV["JULIA_SYNTAX_DEBUG_FILE"]`). """ -function enable_in_core!(enable=true) - debug_filename = get(ENV, "JULIA_SYNTAX_DEBUG_FILE", nothing) - global _debug_log +function enable_in_core!(enable=true; freeze_world_age = true, + debug_filename = get(ENV, "JULIA_SYNTAX_DEBUG_FILE", nothing)) + _parser_world_age[] = freeze_world_age ? Base.get_world_counter() : typemax(UInt) if enable && !isnothing(debug_filename) - _debug_log = open(debug_filename, "w") - elseif !enable && !isnothing(_debug_log) - close(_debug_log) - _debug_log = nothing + _debug_log[] = open(debug_filename, "w") + elseif !enable && !isnothing(_debug_log[]) + close(_debug_log[]) + _debug_log[] = nothing end parser = enable ? core_parser_hook : _default_parser Base.eval(Core, :(_parse = $parser)) From 6c0a9e2649dec899d8e8151c5bd4d293271e286a Mon Sep 17 00:00:00 2001 From: c42f Date: Mon, 15 Aug 2022 13:37:11 +1000 Subject: [PATCH 0461/1109] Prevent more compile latency in Core parser hooks --- JuliaSyntax/src/hooks.jl | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/JuliaSyntax/src/hooks.jl b/JuliaSyntax/src/hooks.jl index 1dcbeb91f9583..8b886fce2d02b 100644 --- a/JuliaSyntax/src/hooks.jl +++ b/JuliaSyntax/src/hooks.jl @@ -1,14 +1,22 @@ -# World age which the parser will be invoked in. -# Set to typemax(UInt) to invoke in the world of the caller. -const _parser_world_age = Ref{UInt}(typemax(UInt)) +# This file provides an adaptor to match the API expected by the Julia runtime +# code in the binding Core._parse + +# Use caller's world age. +const _caller_world = typemax(UInt) +const _parser_world_age = Ref{UInt}(_caller_world) -# Adaptor for the API/ABI expected by the Julia runtime code. function core_parser_hook(code, filename, lineno, offset, options) - if _parser_world_age[] != typemax(UInt) - Base.invoke_in_world(_parser_world_age[], _core_parser_hook, + # `hook` is always _core_parser_hook, but that's hidden from the compiler + # via a Ref to prevent invalidation / recompilation when other packages are + # loaded. This wouldn't seem like it should be necessary given the use of + # invoke_in_world, but it is in Julia-1.7.3. I'm not sure exactly which + # latency it's removing. + hook = _core_parser_hook_ref[] + if _parser_world_age[] != _caller_world + Base.invoke_in_world(_parser_world_age[], hook, code, filename, lineno, offset, options) else - _core_parser_hook(code, filename, lineno, offset, options) + hook(code, filename, lineno, offset, options) end end @@ -113,10 +121,13 @@ end # Hack: # Meta.parse() attempts to construct a ParseError from a string if it receives -# `Expr(:error)`. +# `Expr(:error)`. Add an override to the ParseError constructor to prevent this. +# FIXME: Improve this in Base somehow? Base.Meta.ParseError(e::JuliaSyntax.ParseError) = e const _default_parser = Core._parse +# NB: Never reassigned, but the compiler doesn't know this! +const _core_parser_hook_ref = Ref{Function}(_core_parser_hook) """ enable_in_core!([enable=true; freeze_world_age, debug_filename]) @@ -134,7 +145,7 @@ Keyword arguments: """ function enable_in_core!(enable=true; freeze_world_age = true, debug_filename = get(ENV, "JULIA_SYNTAX_DEBUG_FILE", nothing)) - _parser_world_age[] = freeze_world_age ? Base.get_world_counter() : typemax(UInt) + _parser_world_age[] = freeze_world_age ? Base.get_world_counter() : _caller_world if enable && !isnothing(debug_filename) _debug_log[] = open(debug_filename, "w") elseif !enable && !isnothing(_debug_log[]) From a592c368f05aa4e69c8f3d2451e4e45095c643ec Mon Sep 17 00:00:00 2001 From: c42f Date: Wed, 17 Aug 2022 11:13:26 +1000 Subject: [PATCH 0462/1109] Make enable_in_core! safe for use in precompilation (JuliaLang/JuliaSyntax.jl#50) Move the hacky code for overriding Core._parse from the sysimage init module into JuliaSyntax.jl proper, to allow it to be reused as necessary. --- JuliaSyntax/src/hooks.jl | 23 +++++++++++++++++-- .../JuliaSyntaxCore/src/JuliaSyntaxCore.jl | 14 ----------- 2 files changed, 21 insertions(+), 16 deletions(-) diff --git a/JuliaSyntax/src/hooks.jl b/JuliaSyntax/src/hooks.jl index 8b886fce2d02b..2c5bbb5f55e4b 100644 --- a/JuliaSyntax/src/hooks.jl +++ b/JuliaSyntax/src/hooks.jl @@ -1,6 +1,26 @@ # This file provides an adaptor to match the API expected by the Julia runtime # code in the binding Core._parse +function _set_core_parse_hook(parser) + # HACK! Fool the runtime into allowing us to set Core._parse, even during + # incremental compilation. (Ideally we'd just arrange for Core._parse to be + # set to the JuliaSyntax parser. But how do we signal that to the dumping + # code outside of the initial creation of Core?) + i = findfirst(==(:incremental), fieldnames(Base.JLOptions)) + ptr = convert(Ptr{fieldtype(Base.JLOptions, i)}, + cglobal(:jl_options, Base.JLOptions) + fieldoffset(Base.JLOptions, i)) + incremental = unsafe_load(ptr) + if incremental != 0 + unsafe_store!(ptr, 0) + end + + Base.eval(Core, :(_parse = $parser)) + + if incremental != 0 + unsafe_store!(ptr, incremental) + end +end + # Use caller's world age. const _caller_world = typemax(UInt) const _parser_world_age = Ref{UInt}(_caller_world) @@ -152,8 +172,7 @@ function enable_in_core!(enable=true; freeze_world_age = true, close(_debug_log[]) _debug_log[] = nothing end - parser = enable ? core_parser_hook : _default_parser - Base.eval(Core, :(_parse = $parser)) + _set_core_parse_hook(enable ? core_parser_hook : _default_parser) nothing end diff --git a/JuliaSyntax/sysimage/JuliaSyntaxCore/src/JuliaSyntaxCore.jl b/JuliaSyntax/sysimage/JuliaSyntaxCore/src/JuliaSyntaxCore.jl index 3188fd8a96f38..d1804b89053b7 100644 --- a/JuliaSyntax/sysimage/JuliaSyntaxCore/src/JuliaSyntaxCore.jl +++ b/JuliaSyntax/sysimage/JuliaSyntaxCore/src/JuliaSyntaxCore.jl @@ -5,22 +5,8 @@ module JuliaSyntaxCore using JuliaSyntax -import Base: JLOptions - function __init__() - # HACK! Fool the runtime into allowing us to set Core._parse, even during - # incremental compilation. (Ideally we'd just arrange for Core._parse to be - # set to the JuliaSyntax parser. But how do we signal that to the dumping - # code outside of the initial creation of Core?) - i = findfirst(==(:incremental), fieldnames(JLOptions)) - ptr = convert(Ptr{fieldtype(JLOptions, i)}, - cglobal(:jl_options, JLOptions) + fieldoffset(JLOptions, i)) - incremental = unsafe_load(ptr) - incremental == 0 || unsafe_store!(ptr, 0) - JuliaSyntax.enable_in_core!() - - incremental == 0 || unsafe_store!(ptr, incremental) end end From 24e0fb3f695f478b5d2aa929e8dc5a31025aaaba Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Thu, 18 Aug 2022 12:40:17 +0200 Subject: [PATCH 0463/1109] require a number after 'p' for hex floats to lex as a float (JuliaLang/JuliaSyntax.jl#52) --- JuliaSyntax/src/tokenize.jl | 10 +++++++--- JuliaSyntax/test/tokenize.jl | 1 + 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/JuliaSyntax/src/tokenize.jl b/JuliaSyntax/src/tokenize.jl index c330113fb09ba..730052f21ed04 100644 --- a/JuliaSyntax/src/tokenize.jl +++ b/JuliaSyntax/src/tokenize.jl @@ -766,15 +766,17 @@ function lex_xor(l::Lexer) end function accept_number(l::Lexer, f::F) where {F} + lexed_number = false while true pc, ppc = dpeekchar(l) if pc == '_' && !f(ppc) - return + return lexed_number elseif f(pc) || pc == '_' readchar(l) else - return + return lexed_number end + lexed_number = true end end @@ -864,7 +866,9 @@ function lex_digit(l::Lexer, kind) if accept(l, "pP") kind = K"Float" accept(l, "+-−") - accept_number(l, isdigit) + if !accept_number(l, isdigit) + return emit_error(l, K"ErrorInvalidNumericConstant") + end elseif isfloat return emit_error(l, K"ErrorInvalidNumericConstant") end diff --git a/JuliaSyntax/test/tokenize.jl b/JuliaSyntax/test/tokenize.jl index 246e479929b12..5c969adef36bb 100644 --- a/JuliaSyntax/test/tokenize.jl +++ b/JuliaSyntax/test/tokenize.jl @@ -589,6 +589,7 @@ end @test kind.(collect(tokenize("3.2e2.2"))) == [K"ErrorInvalidNumericConstant", K"Integer", K"EndMarker"] @test kind.(collect(tokenize("3e2.2"))) == [K"ErrorInvalidNumericConstant", K"Integer", K"EndMarker"] @test kind.(collect(tokenize("0b101__101"))) == [K"BinInt", K"Identifier", K"EndMarker"] + @test tok("0x1p").kind == K"ErrorInvalidNumericConstant" end @testset "floating points" begin From c9274b95c86aa001c0826acbc1bc342182feac89 Mon Sep 17 00:00:00 2001 From: c42f Date: Wed, 17 Aug 2022 16:58:42 +1000 Subject: [PATCH 0464/1109] Use Core.set_parser rather than eval if it's available --- JuliaSyntax/src/hooks.jl | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/JuliaSyntax/src/hooks.jl b/JuliaSyntax/src/hooks.jl index 2c5bbb5f55e4b..c72d37b8e39ae 100644 --- a/JuliaSyntax/src/hooks.jl +++ b/JuliaSyntax/src/hooks.jl @@ -1,23 +1,27 @@ # This file provides an adaptor to match the API expected by the Julia runtime # code in the binding Core._parse -function _set_core_parse_hook(parser) - # HACK! Fool the runtime into allowing us to set Core._parse, even during - # incremental compilation. (Ideally we'd just arrange for Core._parse to be - # set to the JuliaSyntax parser. But how do we signal that to the dumping - # code outside of the initial creation of Core?) - i = findfirst(==(:incremental), fieldnames(Base.JLOptions)) - ptr = convert(Ptr{fieldtype(Base.JLOptions, i)}, - cglobal(:jl_options, Base.JLOptions) + fieldoffset(Base.JLOptions, i)) - incremental = unsafe_load(ptr) - if incremental != 0 - unsafe_store!(ptr, 0) - end +if isdefined(Core, :set_parser) + const _set_core_parse_hook = Core.set_parser +else + function _set_core_parse_hook(parser) + # HACK! Fool the runtime into allowing us to set Core._parse, even during + # incremental compilation. (Ideally we'd just arrange for Core._parse to be + # set to the JuliaSyntax parser. But how do we signal that to the dumping + # code outside of the initial creation of Core?) + i = findfirst(==(:incremental), fieldnames(Base.JLOptions)) + ptr = convert(Ptr{fieldtype(Base.JLOptions, i)}, + cglobal(:jl_options, Base.JLOptions) + fieldoffset(Base.JLOptions, i)) + incremental = unsafe_load(ptr) + if incremental != 0 + unsafe_store!(ptr, 0) + end - Base.eval(Core, :(_parse = $parser)) + Base.eval(Core, :(_parse = $parser)) - if incremental != 0 - unsafe_store!(ptr, incremental) + if incremental != 0 + unsafe_store!(ptr, incremental) + end end end From 974e71f881e15fd5ec67ec5804132f894c76496a Mon Sep 17 00:00:00 2001 From: c42f Date: Thu, 18 Aug 2022 17:00:31 +1000 Subject: [PATCH 0465/1109] Minor cleanup to inference barrier in core_parser_hook --- JuliaSyntax/src/hooks.jl | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/JuliaSyntax/src/hooks.jl b/JuliaSyntax/src/hooks.jl index c72d37b8e39ae..5e48b45cd8a4d 100644 --- a/JuliaSyntax/src/hooks.jl +++ b/JuliaSyntax/src/hooks.jl @@ -26,21 +26,17 @@ else end # Use caller's world age. -const _caller_world = typemax(UInt) -const _parser_world_age = Ref{UInt}(_caller_world) +const _latest_world = typemax(UInt) +const _parser_world_age = Ref{UInt}(_latest_world) function core_parser_hook(code, filename, lineno, offset, options) - # `hook` is always _core_parser_hook, but that's hidden from the compiler - # via a Ref to prevent invalidation / recompilation when other packages are - # loaded. This wouldn't seem like it should be necessary given the use of - # invoke_in_world, but it is in Julia-1.7.3. I'm not sure exactly which - # latency it's removing. - hook = _core_parser_hook_ref[] - if _parser_world_age[] != _caller_world - Base.invoke_in_world(_parser_world_age[], hook, + # NB: We need an inference barrier of one type or another here to prevent + # invalidations. The invokes provide this currently. + if _parser_world_age[] != _latest_world + Base.invoke_in_world(_parser_world_age[], _core_parser_hook, code, filename, lineno, offset, options) else - hook(code, filename, lineno, offset, options) + Base.invokelatest(_core_parser_hook, code, filename, lineno, offset, options) end end @@ -150,8 +146,6 @@ end Base.Meta.ParseError(e::JuliaSyntax.ParseError) = e const _default_parser = Core._parse -# NB: Never reassigned, but the compiler doesn't know this! -const _core_parser_hook_ref = Ref{Function}(_core_parser_hook) """ enable_in_core!([enable=true; freeze_world_age, debug_filename]) @@ -169,7 +163,7 @@ Keyword arguments: """ function enable_in_core!(enable=true; freeze_world_age = true, debug_filename = get(ENV, "JULIA_SYNTAX_DEBUG_FILE", nothing)) - _parser_world_age[] = freeze_world_age ? Base.get_world_counter() : _caller_world + _parser_world_age[] = freeze_world_age ? Base.get_world_counter() : _latest_world if enable && !isnothing(debug_filename) _debug_log[] = open(debug_filename, "w") elseif !enable && !isnothing(_debug_log[]) From afb759415d5690f1dab2f368cb4d9f193e91d345 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Fri, 19 Aug 2022 09:07:27 +0200 Subject: [PATCH 0466/1109] get rid of unused fields and functions in the lexer (JuliaLang/JuliaSyntax.jl#56) --- JuliaSyntax/src/tokenize.jl | 36 ++++-------------------------------- JuliaSyntax/test/tokenize.jl | 2 -- 2 files changed, 4 insertions(+), 34 deletions(-) diff --git a/JuliaSyntax/src/tokenize.jl b/JuliaSyntax/src/tokenize.jl index 730052f21ed04..df7d8e6243fa4 100644 --- a/JuliaSyntax/src/tokenize.jl +++ b/JuliaSyntax/src/tokenize.jl @@ -77,24 +77,14 @@ Ideally a lexer is stateless but some state is needed here for: """ mutable struct Lexer{IO_t <: IO} io::IO_t - io_startpos::Int - token_start_row::Int - token_start_col::Int token_startpos::Int - current_row::Int - current_col::Int - current_pos::Int - last_token::Kind string_states::Vector{StringState} - charstore::IOBuffer chars::Tuple{Char,Char,Char,Char} charspos::Tuple{Int,Int,Int,Int} - doread::Bool dotop::Bool - errored::Bool end function Lexer(io::IO) @@ -121,9 +111,9 @@ function Lexer(io::IO) end end end - Lexer(io, position(io), 1, 1, position(io), 1, 1, position(io), - K"error", Vector{StringState}(), IOBuffer(), - (c1,c2,c3,c4), (p1,p2,p3,p4), false, false, false) + Lexer(io, position(io), + K"error", Vector{StringState}(), + (c1,c2,c3,c4), (p1,p2,p3,p4), false) end Lexer(str::AbstractString) = Lexer(IOBuffer(str)) @@ -143,14 +133,8 @@ Base.eltype(::Type{<:Lexer}) = Token function Base.iterate(l::Lexer) - seekstart(l) l.token_startpos = position(l) - l.token_start_row = 1 - l.token_start_col = 1 - l.current_row = 1 - l.current_col = 1 - l.current_pos = l.io_startpos t = next_token(l) return t, t.kind == K"EndMarker" end @@ -179,15 +163,6 @@ Set a new starting position. """ startpos!(l::Lexer, i::Integer) = l.token_startpos = i -Base.seekstart(l::Lexer) = seek(l.io, l.io_startpos) - -""" - seek2startpos!(l::Lexer) - -Sets the lexer's current position to the beginning of the latest `Token`. -""" -seek2startpos!(l::Lexer) = seek(l, startpos(l)) - """ peekchar(l::Lexer) @@ -220,7 +195,7 @@ Base.position(l::Lexer) = l.charspos[1] eof(l::Lexer) Determine whether the end of the lexer's underlying buffer has been reached. -"""# Base.position(l::Lexer) = Base.position(l.io) +""" Base.eof(l::Lexer) = eof(l.io) Base.seek(l::Lexer, pos) = seek(l.io, pos) @@ -233,8 +208,6 @@ position. """ function start_token!(l::Lexer) l.token_startpos = l.charspos[1] - l.token_start_row = l.current_row - l.token_start_col = l.current_col end """ @@ -312,7 +285,6 @@ end Returns an `K"error"` token with error `err` and starts a new `Token`. """ function emit_error(l::Lexer, err::Kind = K"error") - l.errored = true @assert is_error(err) return emit(l, err) end diff --git a/JuliaSyntax/test/tokenize.jl b/JuliaSyntax/test/tokenize.jl index 5c969adef36bb..c0e81278eb7b7 100644 --- a/JuliaSyntax/test/tokenize.jl +++ b/JuliaSyntax/test/tokenize.jl @@ -25,13 +25,11 @@ strtok(str) = untokenize.(collect(tokenize(str)), str) l = tokenize(s) @test Tokenize.readchar(l) == 'a' - # @test l.current_pos == 0 l_old = l @test l == l_old @test Tokenize.eof(l) @test Tokenize.readchar(l) == Tokenize.EOF_CHAR - # @test l.current_pos == 0 end end # testset From 42e8a2952fa0e344850adb8cc43e7124c04c4db5 Mon Sep 17 00:00:00 2001 From: Sebastian Pfitzner Date: Mon, 22 Aug 2022 16:00:29 +0200 Subject: [PATCH 0467/1109] Fix vect parsing with newline before comma --- JuliaSyntax/src/parser.jl | 18 ++++++++++-------- JuliaSyntax/src/syntax_tree.jl | 1 - JuliaSyntax/test/parser.jl | 4 +++- 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 5b41afc98f785..c25ac9931ab99 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -398,7 +398,7 @@ function parse_Nary(ps::ParseState, down, delimiters, closing_tokens) break elseif k in delimiters # ignore empty delimited sections - # a;;;b ==> (block a b) + # a;;;b ==> (block a b) continue end down(ps) @@ -433,7 +433,7 @@ function parse_toplevel(ps::ParseState) nothing end -# Parse a newline or semicolon-delimited list of expressions. +# Parse a newline or semicolon-delimited list of expressions. # Repeated delimiters are allowed but ignored # a;b;c ==> (block a b c) # a;;;b;; ==> (block a b) @@ -2246,7 +2246,7 @@ function parse_imports(ps::ParseState) bump(ps, TRIVIA_FLAG) emark = position(ps) initial_as = parse_import(ps, word, false) - t = peek_token(ps) + t = peek_token(ps) k = kind(t) has_import_prefix = false # true if we have `prefix:` in `import prefix: stuff` has_comma = false @@ -2912,9 +2912,9 @@ end # For example, (a=1; b=2) could be seen to parse four different ways! # # Function args: (kw a 1) (parameters (kw b 2)) -# Tuple-like: (= a 1) (parameters (kw b 2)) -# Block: (= a 1) (= b 2) -# [] vect-like: (= a 1) (parameters (= b 2)) +# Tuple-like: (= a 1) (parameters (kw b 2)) +# Block: (= a 1) (= b 2) +# [] vect-like: (= a 1) (parameters (= b 2)) # # Expressions (X; Y; Z) with more semicolons are also allowed by the flisp # parser and generally parse as nested parameters blocks. This is invalid Julia @@ -2957,6 +2957,9 @@ function parse_brackets(after_parse::Function, num_semis += 1 bump(ps, TRIVIA_FLAG) bump_trivia(ps) + elseif k == K"," + had_commas = true + bump(ps, TRIVIA_FLAG) elseif is_closing_token(ps, k) # Error; handled below in bump_closing_token break @@ -3384,7 +3387,7 @@ function parse_atom(ps::ParseState, check_identifiers=true) # 42 ==> 42 bump(ps) elseif is_closing_token(ps, leading_kind) - # Leave closing token in place for other productions to + # Leave closing token in place for other productions to # recover with # ) ==> error msg = leading_kind == K"EndMarker" ? @@ -3395,4 +3398,3 @@ function parse_atom(ps::ParseState, check_identifiers=true) bump(ps, error="invalid syntax atom") end end - diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index 4ea92384ab782..a92d5f4e121a9 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -256,4 +256,3 @@ function highlight(code::String, node, path::Int...; color=(40,40,70)) _printstyled(stdout, code[p:q-1]; bgcolor=color) print(stdout, code[q:end]) end - diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index e2dd4e105efb3..8f83d853ac5ee 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -602,6 +602,7 @@ tests = [ "[x,]" => "(vect x)" "[x]" => "(vect x)" "[x \n ]" => "(vect x)" + "[x \n, ]" => "(vect x)" "[x" => "(vect x (error-t))" "[x \n\n ]" => "(vect x)" "[x for a in as]" => "(comprehension (generator x (= a as)))" @@ -616,6 +617,8 @@ tests = [ # parse_vect "[x, y]" => "(vect x y)" "[x, y]" => "(vect x y)" + "[x,\n y]" => "(vect x y)" + "[x\n, y]" => "(vect x y)" "[x,y ; z]" => "(vect x y (parameters z))" "[x=1, y=2]" => "(vect (= x 1) (= y 2))" "[x=1, ; y=2]" => "(vect (= x 1) (parameters (= y 2)))" @@ -830,4 +833,3 @@ end @test test_parse(JuliaSyntax.parse_eq, "a \u2212= b") == "(-= a b)" @test test_parse(JuliaSyntax.parse_eq, "a .\u2212= b") == "(.-= a b)" end - From 4c85764d5f63104a300690e78b67955c8c02fc3d Mon Sep 17 00:00:00 2001 From: Sebastian Pfitzner Date: Mon, 22 Aug 2022 17:34:00 +0200 Subject: [PATCH 0468/1109] Correct fix --- JuliaSyntax/src/parser.jl | 5 +---- JuliaSyntax/test/parser.jl | 4 ++++ 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index c25ac9931ab99..306290f053292 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -2798,7 +2798,7 @@ function parse_cat(ps::ParseState, closer, end_is_symbol) if k == K"," || (is_closing_token(ps, k) && k != K";") if k == K"," # [x,] ==> (vect x) - bump(ps, TRIVIA_FLAG) + bump(ps, TRIVIA_FLAG; skip_newlines = true) end # [x] ==> (vect x) # [x \n ] ==> (vect x) @@ -2957,9 +2957,6 @@ function parse_brackets(after_parse::Function, num_semis += 1 bump(ps, TRIVIA_FLAG) bump_trivia(ps) - elseif k == K"," - had_commas = true - bump(ps, TRIVIA_FLAG) elseif is_closing_token(ps, k) # Error; handled below in bump_closing_token break diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 8f83d853ac5ee..68a68bf8d98ad 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -782,6 +782,10 @@ broken_tests = [ ] ] +@testset "Invalid syntax" begin + @test !isempty(JuliaSyntax.parse(JuliaSyntax.GreenNode, """[ [],,[] ]""")[2]) +end + @testset "Inline test cases" begin @testset "$production" for (production, test_specs) in tests @testset "$(repr(input))" for (input,output) in test_specs From 8f1699fce0c12409945682581f8e060c9fe62209 Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki <40514306+aviatesk@users.noreply.github.com> Date: Tue, 23 Aug 2022 14:23:40 +0900 Subject: [PATCH 0469/1109] fix and optimize erroneous code paths (JuliaLang/JuliaSyntax.jl#58) Co-authored-by: Sebastian Pfitzner --- JuliaSyntax/README.md | 2 +- JuliaSyntax/src/diagnostics.jl | 9 +++++---- JuliaSyntax/src/expr.jl | 13 +++++++------ JuliaSyntax/src/hooks.jl | 3 +-- JuliaSyntax/src/parse_stream.jl | 9 +++++---- JuliaSyntax/src/syntax_tree.jl | 14 +++++++------- JuliaSyntax/src/value_parsing.jl | 5 ++--- 7 files changed, 28 insertions(+), 27 deletions(-) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index 0a786be199bb8..f4ed6c98be0a5 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -471,7 +471,7 @@ There's arguably a few downsides: Practically the flisp parser is not quite a classic [recursive descent parser](https://en.wikipedia.org/wiki/Recursive_descent_parser), because it often looks back and modifies the output tree it has already produced. We've -tried to eliminate this pattern it favor of lookahead where possible because +tried to eliminate this pattern in favor of lookahead where possible because * It works poorly when the parser is emitting a stream of node spans with strict source ordering constraints. diff --git a/JuliaSyntax/src/diagnostics.jl b/JuliaSyntax/src/diagnostics.jl index dc5292d4cb687..092571e21c571 100644 --- a/JuliaSyntax/src/diagnostics.jl +++ b/JuliaSyntax/src/diagnostics.jl @@ -32,7 +32,7 @@ end function Diagnostic(first_byte, last_byte; error=nothing, warning=nothing) message = !isnothing(error) ? error : !isnothing(warning) ? warning : - error("No message in diagnostic") + Base.error("No message in diagnostic") level = !isnothing(error) ? :error : :warning Diagnostic(first_byte, last_byte, level, message) end @@ -48,11 +48,12 @@ function show_diagnostic(io::IO, diagnostic::Diagnostic, source::SourceFile) (:normal, "Info") line, col = source_location(source, first_byte(diagnostic)) linecol = "$line:$col" - if !isnothing(source.filename) - locstr = "$(source.filename):$linecol" + filename = source.filename + if !isnothing(filename) + locstr = "$filename:$linecol" if get(io, :color, false) # Also add hyperlinks in color terminals - url = "file://$(abspath(source.filename))#$linecol" + url = "file://$(abspath(filename))#$linecol" locstr = "\e]8;;$url\e\\$locstr\e]8;;\e\\" end else diff --git a/JuliaSyntax/src/expr.jl b/JuliaSyntax/src/expr.jl index d239c80263f7d..69b3e5e05160d 100644 --- a/JuliaSyntax/src/expr.jl +++ b/JuliaSyntax/src/expr.jl @@ -8,19 +8,20 @@ end function _to_expr(node::SyntaxNode, iteration_spec=false, need_linenodes=true) if !haschildren(node) - if node.val isa Union{Int128,UInt128,BigInt} + val = node.val + if val isa Union{Int128,UInt128,BigInt} # Ignore the values of large integers and convert them back to # symbolic/textural form for compatibility with the Expr # representation of these. str = replace(sourcetext(node), '_'=>"") headsym = :macrocall k = kind(node) - macname = node.val isa Int128 ? Symbol("@int128_str") : - node.val isa UInt128 ? Symbol("@uint128_str") : + macname = val isa Int128 ? Symbol("@int128_str") : + val isa UInt128 ? Symbol("@uint128_str") : Symbol("@big_str") return Expr(:macrocall, GlobalRef(Core, macname), nothing, str) else - return node.val + return val end end headstr = untokenize(head(node), include_flag_suff=false) @@ -165,7 +166,8 @@ function _to_expr(node::SyntaxNode, iteration_spec=false, need_linenodes=true) args[1] = Expr(:block, loc, args[1]) elseif headsym == :(->) if Meta.isexpr(args[2], :block) - if node.parent isa SyntaxNode && kind(node.parent) != K"do" + parent = node.parent + if parent isa SyntaxNode && kind(parent) != K"do" pushfirst!(args[2].args, loc) end else @@ -207,4 +209,3 @@ Base.Expr(node::SyntaxNode) = _to_expr(node) function build_tree(::Type{Expr}, stream::ParseStream; kws...) Expr(build_tree(SyntaxNode, stream; kws...)) end - diff --git a/JuliaSyntax/src/hooks.jl b/JuliaSyntax/src/hooks.jl index 5e48b45cd8a4d..46fd9c0ca6b56 100644 --- a/JuliaSyntax/src/hooks.jl +++ b/JuliaSyntax/src/hooks.jl @@ -9,7 +9,7 @@ else # incremental compilation. (Ideally we'd just arrange for Core._parse to be # set to the JuliaSyntax parser. But how do we signal that to the dumping # code outside of the initial creation of Core?) - i = findfirst(==(:incremental), fieldnames(Base.JLOptions)) + i = Base.fieldindex(Base.JLOptions, :incremental) ptr = convert(Ptr{fieldtype(Base.JLOptions, i)}, cglobal(:jl_options, Base.JLOptions) + fieldoffset(Base.JLOptions, i)) incremental = unsafe_load(ptr) @@ -173,4 +173,3 @@ function enable_in_core!(enable=true; freeze_world_age = true, _set_core_parse_hook(enable ? core_parser_hook : _default_parser) nothing end - diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index ab5b383b3fc79..be4a49605803a 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -61,7 +61,7 @@ function Base.summary(head::SyntaxHead) end function untokenize(head::SyntaxHead; unique=true, include_flag_suff=true) - str = is_error(kind(head)) ? "error" : untokenize(kind(head); unique=unique) + str = is_error(kind(head)) ? "error" : untokenize(kind(head); unique=unique)::String if is_dotted(head) str = "."*str end @@ -281,7 +281,7 @@ function release_positions(stream, positions) end #------------------------------------------------------------------------------- -# Return true when a token was emitted last at stream position `pos` +# Return true when a token was emitted last at stream position `pos` function token_is_last(stream, pos) return pos.range_index == 0 || pos.token_index > stream.ranges[pos.range_index].last_token @@ -872,8 +872,9 @@ stream's text buffer. Note that this leaves the `ParseStream` in an invalid state for further parsing. """ function sourcetext(stream::ParseStream; steal_textbuf=false) - if stream.text_root isa AbstractString && codeunit(stream.text_root) == UInt8 - return stream.text_root + root = stream.text_root + if root isa AbstractString && codeunit(root) == UInt8 + return root elseif steal_textbuf return String(stream.textbuf) else diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index 4ea92384ab782..b81eb4509dee6 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -118,13 +118,13 @@ function interpolate_literal(node::SyntaxNode, val) SyntaxNode(node.source, node.raw, node.position, node.parent, true, val) end -function _show_syntax_node(io, current_filename, node, indent) +function _show_syntax_node(io, current_filename, node::SyntaxNode, indent) fname = node.source.filename line, col = source_location(node.source, node.position) posstr = "$(lpad(line, 4)):$(rpad(col,3))│$(lpad(node.position,6)):$(rpad(node.position+span(node)-1,6))│" - nodestr = haschildren(node) ? "[$(untokenize(head(node)))]" : - node.val isa Symbol ? string(node.val) : - repr(node.val) + val = node.val + nodestr = haschildren(node) ? "[$(untokenize(head(node)))]" : + isa(val, Symbol) ? string(val) : repr(val) treestr = string(indent, nodestr) # Add filename if it's changed from the previous node if fname != current_filename[] @@ -141,12 +141,13 @@ function _show_syntax_node(io, current_filename, node, indent) end end -function _show_syntax_node_sexpr(io, node) +function _show_syntax_node_sexpr(io, node::SyntaxNode) if !haschildren(node) if is_error(node) print(io, "(", untokenize(head(node)), ")") else - print(io, node.val isa Symbol ? string(node.val) : repr(node.val)) + val = node.val + print(io, val isa Symbol ? string(val) : repr(val)) end else print(io, "(", untokenize(head(node))) @@ -256,4 +257,3 @@ function highlight(code::String, node, path::Int...; color=(40,40,70)) _printstyled(stdout, code[p:q-1]; bgcolor=color) print(stdout, code[q:end]) end - diff --git a/JuliaSyntax/src/value_parsing.jl b/JuliaSyntax/src/value_parsing.jl index 1d91693591be8..d6d9bc99420d5 100644 --- a/JuliaSyntax/src/value_parsing.jl +++ b/JuliaSyntax/src/value_parsing.jl @@ -228,7 +228,7 @@ function utf8proc_decompose(str, options, buffer, nwords) ret = ccall(:utf8proc_decompose_custom, Int, (Ptr{UInt8}, Int, Ptr{UInt8}, Int, Cint, Ptr{Cvoid}, Ptr{Cvoid}), str, sizeof(str), buffer, nwords, options, @cfunction(utf8proc_custom_func, UInt32, (UInt32, Ptr{Cvoid})), C_NULL) - ret < 0 && utf8proc_error(ret) + ret < 0 && Base.Unicode.utf8proc_error(ret) return ret end @@ -237,7 +237,7 @@ function utf8proc_map(str::Union{String,SubString{String}}, options::Integer) buffer = Base.StringVector(nwords*4) nwords = utf8proc_decompose(str, options, buffer, nwords) nbytes = ccall(:utf8proc_reencode, Int, (Ptr{UInt8}, Int, Cint), buffer, nwords, options) - nbytes < 0 && utf8proc_error(nbytes) + nbytes < 0 && Base.Unicode.utf8proc_error(nbytes) return String(resize!(buffer, nbytes)) end @@ -245,4 +245,3 @@ function normalize_identifier(str) flags = Base.Unicode.UTF8PROC_STABLE | Base.Unicode.UTF8PROC_COMPOSE utf8proc_map(str, flags) end - From 5ca49abfed7ad5d2796b1ee413227d95b0d1f776 Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki <40514306+aviatesk@users.noreply.github.com> Date: Tue, 23 Aug 2022 17:24:55 +0900 Subject: [PATCH 0470/1109] update the `Core._parse` hook (JuliaLang/JuliaSyntax.jl#62) --- JuliaSyntax/src/hooks.jl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/JuliaSyntax/src/hooks.jl b/JuliaSyntax/src/hooks.jl index 46fd9c0ca6b56..e113a4301aeb1 100644 --- a/JuliaSyntax/src/hooks.jl +++ b/JuliaSyntax/src/hooks.jl @@ -1,7 +1,9 @@ # This file provides an adaptor to match the API expected by the Julia runtime # code in the binding Core._parse -if isdefined(Core, :set_parser) +@static if isdefined(Core, :_setparser!) + const _set_core_parse_hook = Core._setparser! +elseif isdefined(Core, :set_parser) const _set_core_parse_hook = Core.set_parser else function _set_core_parse_hook(parser) From c0107911caa883b5ff05f406c5da1db4ae1009b9 Mon Sep 17 00:00:00 2001 From: Sebastian Pfitzner Date: Tue, 23 Aug 2022 11:06:47 +0200 Subject: [PATCH 0471/1109] "Improve" error handling in SyntaxNode conversion --- JuliaSyntax/src/syntax_tree.jl | 6 ++++-- JuliaSyntax/test/parser.jl | 6 ++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index a92d5f4e121a9..560ba123d2df5 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -74,8 +74,10 @@ function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, position::In elseif is_syntax_kind(raw) nothing else - error("Leaf node of kind $k unknown to SyntaxNode") - val = nothing + # FIXME: this allows us to recover from trivia is_error nodes + # that we insert below + @debug "Leaf node of kind $k unknown to SyntaxNode" + ErrorVal() end return SyntaxNode(source, raw, position, nothing, true, val) else diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 68a68bf8d98ad..ec6005dadf49a 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -600,6 +600,7 @@ tests = [ # parse_cat "[]" => "(vect)" "[x,]" => "(vect x)" + "[x\n,,]" => "(vect x (error-t ✘))" "[x]" => "(vect x)" "[x \n ]" => "(vect x)" "[x \n, ]" => "(vect x)" @@ -619,6 +620,7 @@ tests = [ "[x, y]" => "(vect x y)" "[x,\n y]" => "(vect x y)" "[x\n, y]" => "(vect x y)" + "[x\n,, y]" => "(vect x (error-t ✘ y))" "[x,y ; z]" => "(vect x y (parameters z))" "[x=1, y=2]" => "(vect (= x 1) (= y 2))" "[x=1, ; y=2]" => "(vect (= x 1) (parameters (= y 2)))" @@ -782,10 +784,6 @@ broken_tests = [ ] ] -@testset "Invalid syntax" begin - @test !isempty(JuliaSyntax.parse(JuliaSyntax.GreenNode, """[ [],,[] ]""")[2]) -end - @testset "Inline test cases" begin @testset "$production" for (production, test_specs) in tests @testset "$(repr(input))" for (input,output) in test_specs From a48dadacb1f4eb180a31699625a69abf6b666c9e Mon Sep 17 00:00:00 2001 From: Sebastian Pfitzner Date: Tue, 23 Aug 2022 14:56:57 +0200 Subject: [PATCH 0472/1109] Add check_all_packages script --- JuliaSyntax/tools/check_all_packages.jl | 64 +++++++++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 JuliaSyntax/tools/check_all_packages.jl diff --git a/JuliaSyntax/tools/check_all_packages.jl b/JuliaSyntax/tools/check_all_packages.jl new file mode 100644 index 0000000000000..2a52a9477816c --- /dev/null +++ b/JuliaSyntax/tools/check_all_packages.jl @@ -0,0 +1,64 @@ +# hacky script to parse all Julia files in all packages in General +# to Exprs and report errors +# +# Run this after registry_download.jl (so the pkgs directory is populated). + +using JuliaSyntax, Logging + +logio = open(joinpath(@__DIR__, "logs.txt"), "w") +logger = Logging.ConsoleLogger(logio) + +pkgspath = joinpath(@__DIR__, "pkgs") + +parallel = 50 +exceptions = [] +Logging.with_logger(logger) do + for tars in Iterators.partition(readdir(pkgspath), parallel) + @sync for tar in tars + endswith(tar, ".tgz") || continue + @async begin + dir = joinpath(@__DIR__, "pkgs", replace(tar, r"\.tgz$" => "")) + if !isdir(dir) || !isdir(joinpath(dir, "src")) + rm(dir; recursive=true, force=true) + mkpath(dir) + tar_path = joinpath(@__DIR__, "pkgs", tar) + try + run(`tar -xf $tar_path -C $dir`) + catch err + @error "could not untar $tar_path" + end + end + end + end + end + + t = time() + i = 0 + iob = IOBuffer() + for (r, _, files) in walkdir(pkgspath) + for f in files + endswith(f, ".jl") || continue + fpath = joinpath(r, f) + try + JuliaSyntax.parse(Expr, read(fpath, String)) + catch err + err isa InterruptException && rethrow() + ex = (err, catch_backtrace()) + push!(exceptions, ex) + @error "parsing failed for $(fpath)" ex + flush(logio) + end + i += 1 + if i % 100 == 0 + runtime = time() - t + avg = round(runtime/i*1000, digits = 2) + print(iob, "\e[2J\e[0;0H") + println(iob, "$i files parsed") + println(iob, " $(length(exceptions)) failures") + println(iob, " $(avg)ms per file, $(round(Int, runtime))s in total") + println(stderr, String(take!(iob))) + end + end + end +end +close(logio) From f727838c9dcdca4b371b064be4ec8c35fa6f9f4e Mon Sep 17 00:00:00 2001 From: c42f Date: Wed, 24 Aug 2022 17:41:27 +1000 Subject: [PATCH 0473/1109] Add fl_parse() test tool to call flisp parser directly (JuliaLang/JuliaSyntax.jl#70) This allows us to call the flisp parser directly instead of using Meta.parse which may have been overridden with the JuliaSyntax parser. Fix the tests to always use the flisp reference parser. --- JuliaSyntax/src/hooks.jl | 15 ++++++---- JuliaSyntax/src/utils.jl | 55 ++++++++++++++++++++++++++++------ JuliaSyntax/test/test_utils.jl | 9 +++--- JuliaSyntax/test/tokenize.jl | 7 ++--- 4 files changed, 63 insertions(+), 23 deletions(-) diff --git a/JuliaSyntax/src/hooks.jl b/JuliaSyntax/src/hooks.jl index e113a4301aeb1..2b024de79aec6 100644 --- a/JuliaSyntax/src/hooks.jl +++ b/JuliaSyntax/src/hooks.jl @@ -133,11 +133,16 @@ function _core_parser_hook(code, filename, lineno, offset, options) offset=offset, code=code) - if VERSION >= v"1.8.0-DEV.1370" # https://github.com/JuliaLang/julia/pull/43876 - return Core.Compiler.fl_parse(code, filename, lineno, offset, options) - else - return Core.Compiler.fl_parse(code, filename, offset, options) - end + _fl_parse_hook(code, filename, lineno, offset, options) + end +end + +# Call the flisp parser +function _fl_parse_hook(code, filename, lineno, offset, options) + @static if VERSION >= v"1.8.0-DEV.1370" # https://github.com/JuliaLang/julia/pull/43876 + return Core.Compiler.fl_parse(code, filename, lineno, offset, options) + else + return Core.Compiler.fl_parse(code, filename, offset, options) end end diff --git a/JuliaSyntax/src/utils.jl b/JuliaSyntax/src/utils.jl index 81f780ef9c74e..6cc4f4c6dcb4a 100644 --- a/JuliaSyntax/src/utils.jl +++ b/JuliaSyntax/src/utils.jl @@ -25,15 +25,6 @@ function _printstyled(io::IO, text; fgcolor=nothing, bgcolor=nothing) end end -function flisp_parse_all(code; filename="none") - if VERSION >= v"1.6" - Meta.parseall(code, filename=filename) - else - # This is approximate. It should work for well-formed code. - Base.parse_input_line(code, filename=filename) - end -end - # Really remove line numbers, even from Expr(:toplevel) remove_linenums!(ex) = ex function remove_linenums!(ex::Expr) @@ -48,3 +39,49 @@ function remove_linenums!(ex::Expr) return ex end + +#------------------------------------------------------------------------------- +# Copy of the Meta.parse() API, but ensuring that we call the flisp parser +# rather than using Meta.parse() which may be using the JuliaSyntax parser. + +""" +Like Meta.parse() but always call the flisp reference parser. +""" +function fl_parse(str::AbstractString; raise::Bool=true, depwarn::Bool=true) + ex, pos = fl_parse(str, 1, greedy=true, raise=raise, depwarn=depwarn) + if isa(ex,Expr) && ex.head === :error + return ex + end + if pos <= ncodeunits(str) + raise && throw(Meta.ParseError("extra token after end of expression")) + return Expr(:error, "extra token after end of expression") + end + return ex +end + +function fl_parse(str::AbstractString, pos::Integer; greedy::Bool=true, raise::Bool=true, + depwarn::Bool=true) + ex, pos = _fl_parse_string(str, "none", 1, pos, greedy ? :statement : :atom) + if raise && isa(ex,Expr) && ex.head === :error + throw(Meta.ParseError(ex.args[1])) + end + return ex, pos +end + +""" +Like Meta.parseall() but always call the flisp reference parser. +""" +function fl_parseall(text::AbstractString; filename="none", lineno=1) + ex,_ = _fl_parse_string(text, String(filename), lineno, 1, :all) + return ex +end + +function _fl_parse_string(text::AbstractString, filename::AbstractString, + lineno::Integer, index::Integer, options) + if index < 1 || index > ncodeunits(text) + 1 + throw(BoundsError(text, index)) + end + ex, offset::Int = _fl_parse_hook(text, filename, lineno, index-1, options) + ex, offset+1 +end + diff --git a/JuliaSyntax/test/test_utils.jl b/JuliaSyntax/test/test_utils.jl index 2510622c6ef41..ced5912ce6a6d 100644 --- a/JuliaSyntax/test/test_utils.jl +++ b/JuliaSyntax/test/test_utils.jl @@ -21,7 +21,8 @@ using JuliaSyntax: haschildren, children, child, - flisp_parse_all + fl_parseall, + fl_parse function remove_macro_linenums!(ex) if Meta.isexpr(ex, :macrocall) @@ -63,7 +64,7 @@ function parsers_agree_on_file(filename; show_diff=false) # ignore this case. return true end - fl_ex = flisp_parse_all(text, filename=filename) + fl_ex = fl_parseall(text, filename=filename) if Meta.isexpr(fl_ex, :toplevel) && !isempty(fl_ex.args) && Meta.isexpr(fl_ex.args[end], (:error, :incomplete)) # Reference parser failed. This generally indicates a broken file not a @@ -111,7 +112,7 @@ function equals_flisp_parse(tree) # Reparse with JuliaSyntax. This is a crude way to ensure we're not missing # some context from the parent node. ex,_,_ = parse(Expr, node_text) - fl_ex = flisp_parse_all(node_text) + fl_ex = fl_parseall(node_text) if Meta.isexpr(fl_ex, :error) return true # Something went wrong in reduction; ignore these cases 😬 end @@ -237,7 +238,7 @@ function itest_parse(production, code; version::VersionNumber=v"1.6") println(stdout, "\n\n# Julia Expr:") show(stdout, MIME"text/plain"(), ex) - f_ex = JuliaSyntax.remove_linenums!(Meta.parse(code, raise=false)) + f_ex = JuliaSyntax.remove_linenums!(fl_parse(code, raise=false)) if JuliaSyntax.remove_linenums!(ex) != f_ex printstyled(stdout, "\n\n# flisp Julia Expr:\n", color=:red) show(stdout, MIME"text/plain"(), f_ex) diff --git a/JuliaSyntax/test/tokenize.jl b/JuliaSyntax/test/tokenize.jl index c0e81278eb7b7..2d0e9a91eb94b 100644 --- a/JuliaSyntax/test/tokenize.jl +++ b/JuliaSyntax/test/tokenize.jl @@ -4,6 +4,7 @@ module TokenizeTests using Test using JuliaSyntax: + JuliaSyntax, @K_str, Kind, kind, @@ -693,14 +694,10 @@ for op in ops for (arity, container) in strs for str in container - expr = Meta.parse(str, raise = false) + expr = JuliaSyntax.fl_parse(str, raise = false) if VERSION < v"1.7" && str == "a .&& b" expr = Expr(Symbol(".&&"), :a, :b) end - if str in (".>:b", ".<:b") - # HACK! See https://github.com/JuliaLang/JuliaSyntax.jl/issues/38 - continue - end if expr isa Expr && (expr.head != :error && expr.head != :incomplete) tokens = collect(tokenize(str)) exop = expr.head == :call ? expr.args[1] : expr.head From 816f08e077d0a18ed9483d57123cfdfd7425c2c0 Mon Sep 17 00:00:00 2001 From: Sebastian Pfitzner Date: Wed, 24 Aug 2022 11:03:10 +0200 Subject: [PATCH 0474/1109] Fix early-abort handling in array_parse --- JuliaSyntax/src/parser.jl | 3 +++ JuliaSyntax/test/parser.jl | 2 ++ 2 files changed, 5 insertions(+) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 306290f053292..94ae01edc34ca 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -2617,6 +2617,9 @@ function parse_array(ps::ParseState, mark, closer, end_is_symbol) # For an excellent overview of Pratt parsing, see # https://matklad.github.io/2020/04/13/simple-but-powerful-pratt-parsing.html (dim, binding_power) = parse_array_separator(ps, array_order) + if binding_power == typemin(Int) + return (K"hcat", EMPTY_FLAGS) + end while true (next_dim, next_bp) = parse_array_inner(ps, binding_power, array_order) if next_bp == typemin(Int) diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index ec6005dadf49a..d96fbf9bd35c0 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -652,6 +652,8 @@ tests = [ "[x ; y ; z]" => "(vcat x y z)" "[x;]" => "(vcat x)" "[x y]" => "(hcat x y)" + # Early abort + "[x@y" => "(hcat x) (error-t ✘ y)" # Mismatched rows "[x y ; z]" => "(vcat (row x y) z)" # Single elements in rows From 81b87b1fb3cc1436ecd614511eab754cf940310a Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Wed, 24 Aug 2022 11:29:19 +0200 Subject: [PATCH 0475/1109] fix parsing hex floats containing 'f' (JuliaLang/JuliaSyntax.jl#73) --- JuliaSyntax/src/value_parsing.jl | 2 +- JuliaSyntax/test/parser.jl | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/JuliaSyntax/src/value_parsing.jl b/JuliaSyntax/src/value_parsing.jl index d6d9bc99420d5..e2c1bb69426bc 100644 --- a/JuliaSyntax/src/value_parsing.jl +++ b/JuliaSyntax/src/value_parsing.jl @@ -20,7 +20,7 @@ function julia_string_to_number(str::AbstractString, kind) end return x elseif kind == K"Float" - if !startswith(str,"0x") && 'f' in str + if !startswith(str,"0x") && 'f' in str && !('p' in str) # This is kind of awful. Should we have a separate Float32 literal # type produced by the lexer? The `f` suffix is nonstandard after all. return Base.parse(Float32, replace(str, 'f'=>'e')) diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index ec6005dadf49a..d40034cc4f5fb 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -641,6 +641,7 @@ tests = [ "```cmd```" => "(macrocall :(Core.var\"@cmd\") \"cmd\")" # literals "42" => "42" + "0x123456789abcdefp+0" => "8.19855292164869e16" # closing tokens ")" => "(error)" ], From 257a22b074f68d805a21a913257c7dae9d992292 Mon Sep 17 00:00:00 2001 From: Sebastian Pfitzner Date: Thu, 25 Aug 2022 08:45:31 +0200 Subject: [PATCH 0476/1109] Fix tests --- JuliaSyntax/test/parser.jl | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index d96fbf9bd35c0..1c50b70444b83 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -21,6 +21,9 @@ tests = [ "a;b \n c;d" => "(toplevel (toplevel a b) (toplevel c d))" "a \n \n" => "(toplevel a)" "" => "(toplevel)" + # Early abort in array parsing + "[x@y" => "(toplevel (hcat x) (error-t ✘ y))" + "[x@y]" => "(toplevel (hcat x) (error-t ✘ y ✘))" ], JuliaSyntax.parse_block => [ "a;b;c" => "(block a b c)" @@ -652,8 +655,6 @@ tests = [ "[x ; y ; z]" => "(vcat x y z)" "[x;]" => "(vcat x)" "[x y]" => "(hcat x y)" - # Early abort - "[x@y" => "(hcat x) (error-t ✘ y)" # Mismatched rows "[x y ; z]" => "(vcat (row x y) z)" # Single elements in rows @@ -788,9 +789,9 @@ broken_tests = [ @testset "Inline test cases" begin @testset "$production" for (production, test_specs) in tests - @testset "$(repr(input))" for (input,output) in test_specs + @testset "$(repr(input))" for (input, output) in test_specs if !(input isa AbstractString) - opts,input = input + opts, input = input else opts = NamedTuple() end From d58776b86f528319df61aa4a1431c6d554b53a81 Mon Sep 17 00:00:00 2001 From: c42f Date: Thu, 25 Aug 2022 17:35:41 +1000 Subject: [PATCH 0477/1109] Fix: .- and .+ before numeric literals (JuliaLang/JuliaSyntax.jl#76) -2 is numeric literal -2, but ensure .-2 is (call .- 2) --- JuliaSyntax/src/parser.jl | 9 +++++++-- JuliaSyntax/test/parser.jl | 15 +++++++++------ 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 306290f053292..11ff367902c42 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -1101,7 +1101,7 @@ function parse_unary(ps::ParseState) parse_factor(ps) return end - if k in KSet"- +" + if k in KSet"- +" && !is_decorated(t) t2 = peek_token(ps, 2) if !preceding_whitespace(t2) && kind(t2) in KSet"Integer Float" k3 = peek(ps, 3) @@ -1116,13 +1116,18 @@ function parse_unary(ps::ParseState) else # We have a signed numeric literal. Glue the operator to the # next token to create a signed literal: - # +2 ==> +2 + # -2 ==> -2 + # +2.0 ==> 2.0 # -2*x ==> (call-i -2 * x) bump_glue(ps, kind(t2), EMPTY_FLAGS, 2) end return end end + # Things which are not quite negative literals result in a unary call instead + # -0x1 ==> (call - 0x01) + # - 2 ==> (call - 2) + # .-2 ==> (call .- 2) parse_unary_call(ps) end diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index d40034cc4f5fb..011362ba1feee 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -129,11 +129,7 @@ tests = [ ], JuliaSyntax.parse_term => [ "a * b * c" => "(call-i a * b c)" - # parse_unary - "-2*x" => "(call-i -2 * x)" - ":T" => "(quote T)" - "in::T" => "(:: in T)" - "isa::T" => "(:: isa T)" + "-2*x" => "(call-i -2 * x)" ], JuliaSyntax.parse_juxtapose => [ "2x" => "(call-i 2 * x)" @@ -153,9 +149,16 @@ tests = [ "0xenomorph" => "0x0e" ], JuliaSyntax.parse_unary => [ - "+2" => "2" + ":T" => "(quote T)" + "in::T" => "(:: in T)" + "isa::T" => "(:: isa T)" "-2^x" => "(call - (call-i 2 ^ x))" "-2[1, 3]" => "(call - (ref 2 1 3))" + "-2" => "-2" + "+2.0" => "2.0" + "-0x1" => "(call - 0x01)" + "- 2" => "(call - 2)" + ".-2" => "(call .- 2)" ], JuliaSyntax.parse_unary_call => [ # Standalone dotted operators are parsed as (|.| op) From 5ee5ae936fa40a6722398b809dd58f05e78b2f8c Mon Sep 17 00:00:00 2001 From: Sebastian Pfitzner Date: Thu, 25 Aug 2022 10:37:43 +0200 Subject: [PATCH 0478/1109] Bump ] --- JuliaSyntax/src/parser.jl | 1 + JuliaSyntax/test/parser.jl | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 94ae01edc34ca..af57860fdfcec 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -2618,6 +2618,7 @@ function parse_array(ps::ParseState, mark, closer, end_is_symbol) # https://matklad.github.io/2020/04/13/simple-but-powerful-pratt-parsing.html (dim, binding_power) = parse_array_separator(ps, array_order) if binding_power == typemin(Int) + bump_closing_token(ps, closer) return (K"hcat", EMPTY_FLAGS) end while true diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 1c50b70444b83..e990be49c9bf2 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -21,9 +21,6 @@ tests = [ "a;b \n c;d" => "(toplevel (toplevel a b) (toplevel c d))" "a \n \n" => "(toplevel a)" "" => "(toplevel)" - # Early abort in array parsing - "[x@y" => "(toplevel (hcat x) (error-t ✘ y))" - "[x@y]" => "(toplevel (hcat x) (error-t ✘ y ✘))" ], JuliaSyntax.parse_block => [ "a;b;c" => "(block a b c)" @@ -655,6 +652,9 @@ tests = [ "[x ; y ; z]" => "(vcat x y z)" "[x;]" => "(vcat x)" "[x y]" => "(hcat x y)" + # Early abort in array parsing + "[x@y" => "(hcat x (error-t ✘ y))" + "[x@y]" => "(hcat x (error-t ✘ y))" # Mismatched rows "[x y ; z]" => "(vcat (row x y) z)" # Single elements in rows From 5e1f5feb7bb7c0e753ebadd63959ce3e5587d11d Mon Sep 17 00:00:00 2001 From: c42f Date: Thu, 25 Aug 2022 19:54:42 +1000 Subject: [PATCH 0479/1109] Fix parser crash for empty tuple parsing in parse_function (JuliaLang/JuliaSyntax.jl#75) This won't be valid syntax during lowering, but the parser shouldn't throw. Also a little cleanup to clarify variable naming while we're here. --- JuliaSyntax/src/parser.jl | 34 ++++++++++++++++++++-------------- JuliaSyntax/test/parser.jl | 1 + 2 files changed, 21 insertions(+), 14 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 11ff367902c42..e94d3f99a208c 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -2002,29 +2002,35 @@ function parse_function(ps::ParseState) end else if peek(ps) == K"(" - bump(ps, TRIVIA_FLAG) - # When an initial parenthesis is present, we might either have the - # function name or the argument list in an anonymous function. We - # use parse_brackets directly here (rather than dispatching to it - # via parse_atom) so we can distinguish these two cases by peeking - # at the following parenthesis, if present. + # When an initial parenthesis is present, we might either have + # * the function name in parens, followed by (args...) + # * an anonymous function argument list in parens # - # The flisp parser disambiguates this case quite differently, - # producing less consistent syntax for anonymous functions. - is_anon_func_ = Ref(is_anon_func) + # This should somewhat parse as in parse_paren() (this is what + # the flisp parser does), but that results in weird parsing of + # keyword parameters. So we peek at a following `(` instead to + # distinguish the cases here. + bump(ps, TRIVIA_FLAG) + is_empty_tuple = peek(ps, skip_newlines=true) == K")" + _is_anon_func = Ref(is_anon_func) parse_brackets(ps, K")") do _, _, _, _ - is_anon_func_[] = peek(ps, 2) != K"(" - return (needs_parameters = is_anon_func_[], - eq_is_kw_before_semi = is_anon_func_[], - eq_is_kw_after_semi = is_anon_func_[]) + _is_anon_func[] = peek(ps, 2) != K"(" + return (needs_parameters = _is_anon_func[], + eq_is_kw_before_semi = _is_anon_func[], + eq_is_kw_after_semi = _is_anon_func[]) end - is_anon_func = is_anon_func_[] + is_anon_func = _is_anon_func[] if is_anon_func # function (x) body end ==> (function (tuple x) (block body)) # function (x,y) end ==> (function (tuple x y) (block)) # function (x=1) end ==> (function (tuple (kw x 1)) (block)) # function (;x=1) end ==> (function (tuple (parameters (kw x 1))) (block)) emit(ps, def_mark, K"tuple") + elseif is_empty_tuple + # Weird case which is consistent with parse_paren but will be + # rejected in lowering + # function ()(x) end ==> (function (call (tuple) x) (block)) + emit(ps, def_mark, K"tuple") else # function (:)() end ==> (function (call :) (block)) # function (x::T)() end ==> (function (call (:: x T)) (block)) diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 011362ba1feee..6f61f5ddb6da9 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -435,6 +435,7 @@ tests = [ "function (x,y) end" => "(function (tuple x y) (block))" "function (x=1) end" => "(function (tuple (kw x 1)) (block))" "function (;x=1) end" => "(function (tuple (parameters (kw x 1))) (block))" + "function ()(x) end" => "(function (call (tuple) x) (block))" "function (:)() end" => "(function (call :) (block))" "function (x::T)() end"=> "(function (call (:: x T)) (block))" "function (::T)() end" => "(function (call (:: T)) (block))" From 8c886cfed77599eca9cefedf978cce569420cd76 Mon Sep 17 00:00:00 2001 From: Sebastian Pfitzner Date: Thu, 25 Aug 2022 12:10:09 +0200 Subject: [PATCH 0480/1109] Update src/parser.jl Co-authored-by: c42f --- JuliaSyntax/src/parser.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index af57860fdfcec..b4d75006d5fa0 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -2618,6 +2618,7 @@ function parse_array(ps::ParseState, mark, closer, end_is_symbol) # https://matklad.github.io/2020/04/13/simple-but-powerful-pratt-parsing.html (dim, binding_power) = parse_array_separator(ps, array_order) if binding_power == typemin(Int) + # [x@y ==> (hcat x (error-t ✘ y)) bump_closing_token(ps, closer) return (K"hcat", EMPTY_FLAGS) end From 4ebcbb3638a94e0d3a8ae7f54e48224fcef76fd7 Mon Sep 17 00:00:00 2001 From: Sebastian Pfitzner Date: Thu, 25 Aug 2022 14:36:41 +0200 Subject: [PATCH 0481/1109] Better "unexpected kw" handling in ternary parsing --- JuliaSyntax/src/kinds.jl | 15 ++++++++------- JuliaSyntax/src/parser.jl | 14 ++++++++++++-- JuliaSyntax/test/parser.jl | 3 +++ 3 files changed, 23 insertions(+), 9 deletions(-) diff --git a/JuliaSyntax/src/kinds.jl b/JuliaSyntax/src/kinds.jl index 9f461820dab75..947fc81b8d3bd 100644 --- a/JuliaSyntax/src/kinds.jl +++ b/JuliaSyntax/src/kinds.jl @@ -27,15 +27,10 @@ const _kind_names = "baremodule" "begin" "break" - "catch" "const" "continue" "do" - "else" - "elseif" - "end" "export" - "finally" "for" "function" "global" @@ -51,6 +46,13 @@ const _kind_names = "try" "using" "while" + "BEGIN_BLOCK_CONTINUATION_KEYWORDS" + "catch" + "finally" + "else" + "elseif" + "end" + "END_BLOCK_CONTINUATION_KEYWORDS" "BEGIN_CONTEXTUAL_KEYWORDS" # contextual keywords "abstract" @@ -1045,6 +1047,7 @@ end is_contextual_keyword(k::Kind) = K"BEGIN_CONTEXTUAL_KEYWORDS" < k < K"END_CONTEXTUAL_KEYWORDS" is_error(k::Kind) = K"BEGIN_ERRORS" < k < K"END_ERRORS" is_keyword(k::Kind) = K"BEGIN_KEYWORDS" < k < K"END_KEYWORDS" +is_block_continuation_keyword(k::Kind) = K"BEGIN_BLOCK_CONTINUATION_KEYWORDS" < k < K"END_BLOCK_CONTINUATION_KEYWORDS" is_literal(k::Kind) = K"BEGIN_LITERAL" < k < K"END_LITERAL" is_operator(k::Kind) = K"BEGIN_OPS" < k < K"END_OPS" is_word_operator(k::Kind) = (k == K"in" || k == K"isa" || k == K"where") @@ -1098,5 +1101,3 @@ end function is_whitespace(x) kind(x) in (K"Whitespace", K"NewlineWs") end - - diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 2622ab141a9e2..c836a2b6c36ae 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -659,8 +659,18 @@ function parse_cond(ps::ParseState) t = peek_token(ps) if !preceding_whitespace(t) # a ? b :c ==> (if a [ ] [?] [ ] b [ ] [:] (error-t) c) - bump_invisible(ps, K"error", TRIVIA_FLAG, - error="space required after `:` in `?` expression") + bump_invisible(ps, K"error", TRIVIA_FLAG, error="space required after `:` in `?` expression") + end + if is_block_continuation_keyword(kind(t)) + # a "continuaton keyword" is likely to belong to the surrounding code, so + # we abort early + + # if true; x ? true elseif true end ==> (if true (block (if x true (error-t) (error-t))) (elseif true (block))) + # if true; x ? true end ==> (if true (block (if x true (error-t) (error-t)))) + # if true; x ? true : elseif true end ==> (if true (block (if x true (error-t))) (elseif true (block))) + bump_invisible(ps, K"error", TRIVIA_FLAG, error="unexpected `$(kind(t))`") + emit(ps, mark, K"if") + return end parse_eq_star(ps) emit(ps, mark, K"if") diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 41be18410f683..3771e8cdb17fe 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -402,6 +402,9 @@ tests = [ "if a xx elseif b yy end" => "(if a (block xx) (elseif b (block yy)))" "if a xx else if b yy end" => "(if a (block xx) (error-t) (elseif b (block yy)))" "if a xx else yy end" => "(if a (block xx) (block yy))" + "if true; x ? true elseif true end" => "(if true (block (if x true (error-t) (error-t))) (elseif true (block)))" + "if true; x ? true end" => "(if true (block (if x true (error-t) (error-t))))" + "if true; x ? true : elseif true end" => "(if true (block (if x true (error-t))) (elseif true (block)))" ], JuliaSyntax.parse_const_local_global => [ "global x" => "(global x)" From 1e0ad2104a94e0a11a96640d3ec0a68a8da02dd9 Mon Sep 17 00:00:00 2001 From: c42f Date: Fri, 26 Aug 2022 20:35:11 +1000 Subject: [PATCH 0482/1109] Fix over/underflow in parsing float literals It seems we can't use Base.parse for floats because this disallows underflow. So replicate the logic of it here in julia-level code. This implementation is still somewhat (~15%) slower than Base.parse for some reason. --- JuliaSyntax/src/parser.jl | 19 -------- JuliaSyntax/src/utils.jl | 18 ++++++++ JuliaSyntax/src/value_parsing.jl | 74 ++++++++++++++++++++++++++++++-- 3 files changed, 88 insertions(+), 23 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 2622ab141a9e2..4203c40e9a2f8 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -201,25 +201,6 @@ function bump_semicolon_trivia(ps) end end -# Like @assert, but always enabled and calls internal_error() -macro check(ex, msgs...) - msg = isempty(msgs) ? ex : msgs[1] - if isa(msg, AbstractString) - msg = msg - elseif !isempty(msgs) && (isa(msg, Expr) || isa(msg, Symbol)) - msg = :(string($(esc(msg)))) - else - msg = string(msg) - end - return :($(esc(ex)) ? nothing : internal_error($msg)) -end - -# Parser internal error, used as an assertion failure for cases we expect can't -# happen. -@noinline function internal_error(strs...) - error("Internal error: ", strs...) -end - #------------------------------------------------------------------------------- # Parsing-specific predicates on tokens/kinds # diff --git a/JuliaSyntax/src/utils.jl b/JuliaSyntax/src/utils.jl index 6cc4f4c6dcb4a..08f5ab33e2ae2 100644 --- a/JuliaSyntax/src/utils.jl +++ b/JuliaSyntax/src/utils.jl @@ -1,3 +1,21 @@ +# Internal error, used as assertion failure for cases we expect can't happen. +@noinline function internal_error(strs...) + error("Internal error: ", strs...) +end + +# Like @assert, but always enabled and calls internal_error() +macro check(ex, msgs...) + msg = isempty(msgs) ? ex : msgs[1] + if isa(msg, AbstractString) + msg = msg + elseif !isempty(msgs) && (isa(msg, Expr) || isa(msg, Symbol)) + msg = :(string($(esc(msg)))) + else + msg = string(msg) + end + return :($(esc(ex)) ? nothing : internal_error($msg)) +end + """ Like printstyled, but allows providing RGB colors for true color terminals diff --git a/JuliaSyntax/src/value_parsing.jl b/JuliaSyntax/src/value_parsing.jl index e2c1bb69426bc..2074ad53c735b 100644 --- a/JuliaSyntax/src/value_parsing.jl +++ b/JuliaSyntax/src/value_parsing.jl @@ -21,12 +21,15 @@ function julia_string_to_number(str::AbstractString, kind) return x elseif kind == K"Float" if !startswith(str,"0x") && 'f' in str && !('p' in str) - # This is kind of awful. Should we have a separate Float32 literal - # type produced by the lexer? The `f` suffix is nonstandard after all. - return Base.parse(Float32, replace(str, 'f'=>'e')) + # TODO: re-detecting Float32 here is kind of awful. Should have a + # separate Float32 literal type produced by the lexer? + x, code = _parse_float(Float32, str) else - return Base.parse(Float64, str) + x, code = _parse_float(Float64, str) end + return code === :ok ? x : + code === :underflow ? x : # < TODO: emit warning somehow? + #=code === :overflow=# ErrorVal() elseif kind == K"HexInt" ndigits = length(str)-2 return ndigits <= 2 ? Base.parse(UInt8, str) : @@ -66,6 +69,69 @@ function julia_string_to_number(str::AbstractString, kind) end +#------------------------------------------------------------------------------- +""" +Like `Base.parse(Union{Float64,Float32}, str)`, but permits float underflow + +Parse a Float64. str[firstind:lastind] must be a valid floating point literal +string. If the value is outside Float64 range. +""" +function _parse_float(::Type{T}, str::String, + firstind::Integer, lastind::Integer) where {T} # force specialize with where {T} + strsize = lastind - firstind + 1 + bufsz = 50 + buf = Ref{NTuple{bufsz, UInt8}}() + if strsize < bufsz + GC.@preserve str buf begin + ptr = Base.unsafe_convert(Ptr{UInt8}, pointer_from_objref(buf)) + unsafe_copyto!(ptr, pointer(str, firstind), strsize) + unsafe_store!(ptr, UInt8(0), strsize + 1) + end + else + # Slow path with allocation + str = String(str[firstind:lastind]) + ptr = pointer(str) + end + GC.@preserve str buf begin + Libc.errno(0) + endptr = Ref{Ptr{UInt8}}(C_NULL) + # Assumes ptr is null terminated + x = _strtofloat(T, ptr, strsize, endptr) + @check endptr[] == ptr + strsize + status = :ok + if Libc.errno() == Libc.ERANGE + # strtod man page: + # * If the correct value would cause overflow, plus or + # minus HUGE_VAL, HUGE_VALF, or HUGE_VALL is returned and + # ERANGE is stored in errno. + # * If the correct value would cause underflow, a value with + # magnitude no larger than DBL_MIN, FLT_MIN, or LDBL_MIN is + # returned and ERANGE is stored in errno. + status = abs(x) < 1.0 ? :underflow : :overflow + end + return (x, status) + end +end + +function _parse_float(T, str::String) + _parse_float(T, str, firstindex(str), lastindex(str)) +end + +@inline function _strtofloat(::Type{Float64}, ptr, strsize, endptr) + @ccall jl_strtod_c(ptr::Ptr{UInt8}, endptr::Ptr{Ptr{UInt8}})::Cdouble +end + +@inline function _strtofloat(::Type{Float32}, ptr, strsize, endptr) + # Convert float exponent 'f' to 'e' for strtof, eg, 1.0f0 => 1.0e0 + for p in ptr+strsize-1:-1:ptr + if unsafe_load(p) == UInt8('f') + unsafe_store!(p, UInt8('e')) + break + end + end + @ccall jl_strtof_c(ptr::Ptr{UInt8}, endptr::Ptr{Ptr{UInt8}})::Cfloat +end + #------------------------------------------------------------------------------- is_indentation(c) = c == ' ' || c == '\t' From 88a8b844f7aa80202234f15bebf427c80d4bcac8 Mon Sep 17 00:00:00 2001 From: c42f Date: Sun, 28 Aug 2022 08:11:07 +1000 Subject: [PATCH 0483/1109] Use Vector{UInt8} as allocated buffer in slow path We probably shoudln't be modifying the memory in a String on the slow path here, so use a Vector{UInt8} as the buffer instead. Also deal with some weird codegen issue which came up as a result on julia 1.6 / 1.7. Add tests. --- JuliaSyntax/src/value_parsing.jl | 57 ++++++++++++++++++------------- JuliaSyntax/test/parser.jl | 1 + JuliaSyntax/test/value_parsing.jl | 26 +++++++++++++- 3 files changed, 59 insertions(+), 25 deletions(-) diff --git a/JuliaSyntax/src/value_parsing.jl b/JuliaSyntax/src/value_parsing.jl index 2074ad53c735b..8a3b158a75bc8 100644 --- a/JuliaSyntax/src/value_parsing.jl +++ b/JuliaSyntax/src/value_parsing.jl @@ -80,36 +80,21 @@ function _parse_float(::Type{T}, str::String, firstind::Integer, lastind::Integer) where {T} # force specialize with where {T} strsize = lastind - firstind + 1 bufsz = 50 - buf = Ref{NTuple{bufsz, UInt8}}() if strsize < bufsz + buf = Ref{NTuple{bufsz, UInt8}}() + ptr = Base.unsafe_convert(Ptr{UInt8}, pointer_from_objref(buf)) GC.@preserve str buf begin - ptr = Base.unsafe_convert(Ptr{UInt8}, pointer_from_objref(buf)) unsafe_copyto!(ptr, pointer(str, firstind), strsize) + # Ensure ptr is null terminated unsafe_store!(ptr, UInt8(0), strsize + 1) + _unsafe_parse_float(T, ptr, strsize) end else - # Slow path with allocation - str = String(str[firstind:lastind]) - ptr = pointer(str) - end - GC.@preserve str buf begin - Libc.errno(0) - endptr = Ref{Ptr{UInt8}}(C_NULL) - # Assumes ptr is null terminated - x = _strtofloat(T, ptr, strsize, endptr) - @check endptr[] == ptr + strsize - status = :ok - if Libc.errno() == Libc.ERANGE - # strtod man page: - # * If the correct value would cause overflow, plus or - # minus HUGE_VAL, HUGE_VALF, or HUGE_VALL is returned and - # ERANGE is stored in errno. - # * If the correct value would cause underflow, a value with - # magnitude no larger than DBL_MIN, FLT_MIN, or LDBL_MIN is - # returned and ERANGE is stored in errno. - status = abs(x) < 1.0 ? :underflow : :overflow - end - return (x, status) + # Slow path with allocation. + buf = Vector{UInt8}(str[firstind:lastind]) + push!(buf, 0x00) + ptr = pointer(buf) + GC.@preserve buf _unsafe_parse_float(T, ptr, strsize) end end @@ -117,12 +102,36 @@ function _parse_float(T, str::String) _parse_float(T, str, firstindex(str), lastindex(str)) end +# Internals of _parse_float, split into a separate function to avoid some +# apparent codegen issues https://github.com/JuliaLang/julia/issues/46509 +# (perhaps we don't want the `buf` in `GC.@preserve buf` to be stack allocated +# on one branch and heap allocated in another?) +@inline function _unsafe_parse_float(::Type{T}, ptr, strsize) where {T} + Libc.errno(0) + endptr = Ref{Ptr{UInt8}}(C_NULL) + x = _strtofloat(T, ptr, strsize, endptr) + @check endptr[] == ptr + strsize + status = :ok + if Libc.errno() == Libc.ERANGE + # strtod man page: + # * If the correct value would cause overflow, plus or + # minus HUGE_VAL, HUGE_VALF, or HUGE_VALL is returned and + # ERANGE is stored in errno. + # * If the correct value would cause underflow, a value with + # magnitude no larger than DBL_MIN, FLT_MIN, or LDBL_MIN is + # returned and ERANGE is stored in errno. + status = abs(x) < 1.0 ? :underflow : :overflow + end + return (x, status) +end + @inline function _strtofloat(::Type{Float64}, ptr, strsize, endptr) @ccall jl_strtod_c(ptr::Ptr{UInt8}, endptr::Ptr{Ptr{UInt8}})::Cdouble end @inline function _strtofloat(::Type{Float32}, ptr, strsize, endptr) # Convert float exponent 'f' to 'e' for strtof, eg, 1.0f0 => 1.0e0 + # Presumes we can modify the data in ptr! for p in ptr+strsize-1:-1:ptr if unsafe_load(p) == UInt8('f') unsafe_store!(p, UInt8('e')) diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 41be18410f683..4a50b0c8dca38 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -645,6 +645,7 @@ tests = [ "```cmd```" => "(macrocall :(Core.var\"@cmd\") \"cmd\")" # literals "42" => "42" + "1.0e-1000" => "0.0" "0x123456789abcdefp+0" => "8.19855292164869e16" # closing tokens ")" => "(error)" diff --git a/JuliaSyntax/test/value_parsing.jl b/JuliaSyntax/test/value_parsing.jl index 9ba3c093d5934..4684dcadcfbdf 100644 --- a/JuliaSyntax/test/value_parsing.jl +++ b/JuliaSyntax/test/value_parsing.jl @@ -1,6 +1,30 @@ using JuliaSyntax: julia_string_to_number, - unescape_julia_string + unescape_julia_string, + _parse_float + +@testset "Float parsing" begin + # Float64 + @test _parse_float(Float64, "123", 1, 3) === (123.0, :ok) + @test _parse_float(Float64, "123", 2, 3) === (23.0, :ok) + @test _parse_float(Float64, "123", 2, 2) === (2.0, :ok) + @test _parse_float(Float64, "1.3", 1, 3) === (1.3, :ok) + @test _parse_float(Float64, "1.3e2", 1, 5) === (1.3e2, :ok) + @test _parse_float(Float64, "1.0e-1000", 1, 9) === (0.0, :underflow) + @test _parse_float(Float64, "1.0e+1000", 1, 9) === (Inf, :overflow) + # Slow path (exceeds static buffer size) + @test _parse_float(Float64, "0.000000000000000000000000000000000000000000000000000000000001") === (1e-60, :ok) + + # Float32 + @test _parse_float(Float32, "123", 1, 3) === (123.0f0, :ok) + @test _parse_float(Float32, "1.3f2", 1, 5) === (1.3f2, :ok) + @test _parse_float(Float32, "1.0f-50", 1, 7) === (0.0f0, :underflow) + @test _parse_float(Float32, "1.0f+50", 1, 7) === (Inf32, :overflow) + + # Assertions + @test_throws ErrorException _parse_float(Float64, "x", 1, 1) + @test_throws ErrorException _parse_float(Float64, "1x", 1, 2) +end hexint(s) = julia_string_to_number(s, K"HexInt") binint(s) = julia_string_to_number(s, K"BinInt") From eafe35e87d372bdc299f933132ed8026737497bf Mon Sep 17 00:00:00 2001 From: c42f Date: Tue, 30 Aug 2022 17:26:11 +1000 Subject: [PATCH 0484/1109] Work around strtof bug on windows --- JuliaSyntax/src/value_parsing.jl | 35 +++++++++++++++++++++++-------- JuliaSyntax/test/value_parsing.jl | 4 +++- 2 files changed, 29 insertions(+), 10 deletions(-) diff --git a/JuliaSyntax/src/value_parsing.jl b/JuliaSyntax/src/value_parsing.jl index 8a3b158a75bc8..53e6576bccc13 100644 --- a/JuliaSyntax/src/value_parsing.jl +++ b/JuliaSyntax/src/value_parsing.jl @@ -106,10 +106,10 @@ end # apparent codegen issues https://github.com/JuliaLang/julia/issues/46509 # (perhaps we don't want the `buf` in `GC.@preserve buf` to be stack allocated # on one branch and heap allocated in another?) -@inline function _unsafe_parse_float(::Type{T}, ptr, strsize) where {T} +@inline function _unsafe_parse_float(::Type{Float64}, ptr, strsize) Libc.errno(0) endptr = Ref{Ptr{UInt8}}(C_NULL) - x = _strtofloat(T, ptr, strsize, endptr) + x = @ccall jl_strtod_c(ptr::Ptr{UInt8}, endptr::Ptr{Ptr{UInt8}})::Cdouble @check endptr[] == ptr + strsize status = :ok if Libc.errno() == Libc.ERANGE @@ -120,16 +120,12 @@ end # * If the correct value would cause underflow, a value with # magnitude no larger than DBL_MIN, FLT_MIN, or LDBL_MIN is # returned and ERANGE is stored in errno. - status = abs(x) < 1.0 ? :underflow : :overflow + status = abs(x) < 1 ? :underflow : :overflow end return (x, status) end -@inline function _strtofloat(::Type{Float64}, ptr, strsize, endptr) - @ccall jl_strtod_c(ptr::Ptr{UInt8}, endptr::Ptr{Ptr{UInt8}})::Cdouble -end - -@inline function _strtofloat(::Type{Float32}, ptr, strsize, endptr) +@inline function _unsafe_parse_float(::Type{Float32}, ptr, strsize) # Convert float exponent 'f' to 'e' for strtof, eg, 1.0f0 => 1.0e0 # Presumes we can modify the data in ptr! for p in ptr+strsize-1:-1:ptr @@ -138,9 +134,30 @@ end break end end - @ccall jl_strtof_c(ptr::Ptr{UInt8}, endptr::Ptr{Ptr{UInt8}})::Cfloat + Libc.errno(0) + endptr = Ref{Ptr{UInt8}}(C_NULL) + status = :ok + @static if Sys.iswindows() + # Call strtod here and convert to Float32 on the Julia side because + # strtof seems buggy on windows and doesn't set ERANGE correctly on + # overflow. See also + # https://github.com/JuliaLang/julia/issues/46544 + x = Float32(@ccall jl_strtod_c(ptr::Ptr{UInt8}, endptr::Ptr{Ptr{UInt8}})::Cdouble) + if isinf(x) + status = :overflow + # Underflow not detected, but that will only be a warning elsewhere. + end + else + x = @ccall jl_strtof_c(ptr::Ptr{UInt8}, endptr::Ptr{Ptr{UInt8}})::Cfloat + end + @check endptr[] == ptr + strsize + if Libc.errno() == Libc.ERANGE + status = abs(x) < 1 ? :underflow : :overflow + end + return (x, status) end + #------------------------------------------------------------------------------- is_indentation(c) = c == ' ' || c == '\t' diff --git a/JuliaSyntax/test/value_parsing.jl b/JuliaSyntax/test/value_parsing.jl index 4684dcadcfbdf..f16fd3acf2cd9 100644 --- a/JuliaSyntax/test/value_parsing.jl +++ b/JuliaSyntax/test/value_parsing.jl @@ -18,7 +18,9 @@ using JuliaSyntax: # Float32 @test _parse_float(Float32, "123", 1, 3) === (123.0f0, :ok) @test _parse_float(Float32, "1.3f2", 1, 5) === (1.3f2, :ok) - @test _parse_float(Float32, "1.0f-50", 1, 7) === (0.0f0, :underflow) + if !Sys.iswindows() + @test _parse_float(Float32, "1.0f-50", 1, 7) === (0.0f0, :underflow) + end @test _parse_float(Float32, "1.0f+50", 1, 7) === (Inf32, :overflow) # Assertions From d8fc66e2efa27c20b4ce3e6bc5d4e958b545ab01 Mon Sep 17 00:00:00 2001 From: Sebastian Pfitzner Date: Tue, 30 Aug 2022 10:55:07 +0200 Subject: [PATCH 0485/1109] Also handle newline ws --- JuliaSyntax/src/parser.jl | 3 ++- JuliaSyntax/test/parser.jl | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 3f3932413e2c4..8edcbf4c24738 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -637,7 +637,7 @@ function parse_cond(ps::ParseState) # a ? b c ==> (if a b (error) c) bump_invisible(ps, K"error", TRIVIA_FLAG, error="`:` expected in `?` expression") end - t = peek_token(ps) + t = peek_token(ps; skip_newlines = true) if !preceding_whitespace(t) # a ? b :c ==> (if a [ ] [?] [ ] b [ ] [:] (error-t) c) bump_invisible(ps, K"error", TRIVIA_FLAG, error="space required after `:` in `?` expression") @@ -648,6 +648,7 @@ function parse_cond(ps::ParseState) # if true; x ? true elseif true end ==> (if true (block (if x true (error-t) (error-t))) (elseif true (block))) # if true; x ? true end ==> (if true (block (if x true (error-t) (error-t)))) + # if true; x ? true\n end ==> (if true (block (if x true (error-t) (error-t)))) # if true; x ? true : elseif true end ==> (if true (block (if x true (error-t))) (elseif true (block))) bump_invisible(ps, K"error", TRIVIA_FLAG, error="unexpected `$(kind(t))`") emit(ps, mark, K"if") diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 0758cc60f92e9..dd667a602ded9 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -404,6 +404,7 @@ tests = [ "if a xx else yy end" => "(if a (block xx) (block yy))" "if true; x ? true elseif true end" => "(if true (block (if x true (error-t) (error-t))) (elseif true (block)))" "if true; x ? true end" => "(if true (block (if x true (error-t) (error-t))))" + "if true; x ? true\nend" => "(if true (block (if x true (error-t) (error-t))))" "if true; x ? true : elseif true end" => "(if true (block (if x true (error-t))) (elseif true (block)))" ], JuliaSyntax.parse_const_local_global => [ From 2ab932d97971fcc86be06b7cd3878b377fb6f457 Mon Sep 17 00:00:00 2001 From: Sebastian Pfitzner Date: Tue, 30 Aug 2022 09:38:42 +0000 Subject: [PATCH 0486/1109] improve diagnostics script --- JuliaSyntax/.gitignore | 2 + JuliaSyntax/tools/check_all_packages.jl | 64 +++++++++++++++++++++---- 2 files changed, 56 insertions(+), 10 deletions(-) diff --git a/JuliaSyntax/.gitignore b/JuliaSyntax/.gitignore index b067eddee4ee0..e3f57ade45012 100644 --- a/JuliaSyntax/.gitignore +++ b/JuliaSyntax/.gitignore @@ -1 +1,3 @@ /Manifest.toml +/tools/pkgs +/tools/logs.txt \ No newline at end of file diff --git a/JuliaSyntax/tools/check_all_packages.jl b/JuliaSyntax/tools/check_all_packages.jl index 2a52a9477816c..29e32922692a8 100644 --- a/JuliaSyntax/tools/check_all_packages.jl +++ b/JuliaSyntax/tools/check_all_packages.jl @@ -5,6 +5,23 @@ using JuliaSyntax, Logging +# like Meta.parseall, but throws +function parseall(str) + pos = firstindex(str) + exs = [] + while pos <= lastindex(str) + ex, pos = Meta.parse(str, pos) + push!(exs, ex) + end + if length(exs) == 0 + throw(Meta.ParseError("end of input")) + elseif length(exs) == 1 + return exs[1] + else + return Expr(:toplevel, exs...) + end +end + logio = open(joinpath(@__DIR__, "logs.txt"), "w") logger = Logging.ConsoleLogger(logio) @@ -35,18 +52,44 @@ Logging.with_logger(logger) do t = time() i = 0 iob = IOBuffer() + ex_count = 0 for (r, _, files) in walkdir(pkgspath) for f in files endswith(f, ".jl") || continue fpath = joinpath(r, f) - try - JuliaSyntax.parse(Expr, read(fpath, String)) - catch err - err isa InterruptException && rethrow() - ex = (err, catch_backtrace()) - push!(exceptions, ex) - @error "parsing failed for $(fpath)" ex - flush(logio) + if isfile(fpath) + file = read(fpath, String) + try + e1 = JuliaSyntax.parse(Expr, file) + catch err + err isa InterruptException && rethrow() + ex_count += 1 + ex = (err, catch_backtrace()) + push!(exceptions, ex) + meta_parse = "success" + try + parseall(file) + catch err2 + meta_parse = "fail" + ex_count -= 1 + end + parse_to_syntax = "success" + try + JuliaSyntax.parse(JuliaSyntax.SyntaxNode, file) + catch err2 + parse_to_syntax = "fail" + end + severity = parse_to_syntax == "fail" ? "error" : + meta_parse == "fail" ? "warn" : "error" + println(logio, """ + [$(severity)] $(fpath) + parse-to-expr: fail + parse-to-syntaxtree: $(parse_to_syntax) + reference: $(meta_parse) + """) + @error "" exception = ex + flush(logio) + end end i += 1 if i % 100 == 0 @@ -54,8 +97,9 @@ Logging.with_logger(logger) do avg = round(runtime/i*1000, digits = 2) print(iob, "\e[2J\e[0;0H") println(iob, "$i files parsed") - println(iob, " $(length(exceptions)) failures") - println(iob, " $(avg)ms per file, $(round(Int, runtime))s in total") + println(iob, "> $(ex_count) failures compared to Meta.parse") + println(iob, "> $(length(exceptions)) errors in total") + println(iob, "> $(avg)ms per file, $(round(Int, runtime))s in total") println(stderr, String(take!(iob))) end end From ac6f25ec89b40da6155b062c42bf93766306d9fc Mon Sep 17 00:00:00 2001 From: Sebastian Pfitzner Date: Tue, 30 Aug 2022 10:15:52 +0000 Subject: [PATCH 0487/1109] Allow newline in range expr after : in parens --- JuliaSyntax/src/parser.jl | 5 +++-- JuliaSyntax/test/parser.jl | 4 ++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 4203c40e9a2f8..679c1960151a5 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -822,9 +822,10 @@ function parse_range(ps::ParseState) emit_diagnostic(ps, error="found unexpected closing token") return end - if had_newline + if had_newline && !ps.whitespace_newline # Error message for people coming from python - # 1:\n2 ==> (call-i 1 : (error)) + # 1:\n2 ==> (call-i 1 : (error)) + # (1:\n2) ==> (call-i 1 : 2) emit_diagnostic(ps, whitespace=true, error="line break after `:` in range expression") bump_invisible(ps, K"error") diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 4a50b0c8dca38..be7ed72d1452d 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -101,6 +101,7 @@ tests = [ "1:2:3" => "(call-i 1 : 2 3)" "a:b:c:d:e" => "(call-i (call-i a : b c) : d e)" "a :< b" => "(call-i a (error : <) b)" + "1:\n2" => "(call-i 1 : (error))" ], JuliaSyntax.parse_range => [ "a..b" => "(call-i a .. b)" @@ -561,6 +562,9 @@ tests = [ # Generators "(x for a in as)" => "(generator x (= a as))" "(x \n\n for a in as)" => "(generator x (= a as))" + # Range parsing in parens + "(1:\n2)" => "(call-i 1 : 2)" + "(1:2)" => "(call-i 1 : 2)" ], JuliaSyntax.parse_atom => [ ":foo" => "(quote foo)" From 7eb87a1aca929b84c2d8858e80d13ceb43aaf056 Mon Sep 17 00:00:00 2001 From: Sebastian Pfitzner Date: Tue, 30 Aug 2022 13:25:00 +0200 Subject: [PATCH 0488/1109] Correctly handle begin as kw in typed comprehension --- JuliaSyntax/src/parser.jl | 4 +++- JuliaSyntax/test/parser.jl | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 4203c40e9a2f8..904cac7c50560 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -2525,6 +2525,7 @@ end # then reconstructing the nested flattens and generators when converting to Expr. # # [x for a = as for b = bs if cond1 for c = cs if cond2] ==> (comprehension (flatten x (= a as) (filter (= b bs) cond1) (filter (= c cs) cond2))) +# [x for a = as if begin cond2 end] => (comprehension (generator x (filter (= a as) (block cond2)))) # # flisp: parse-generator function parse_generator(ps::ParseState, mark, flatten=false) @@ -2561,7 +2562,8 @@ end # flisp: parse-comprehension function parse_comprehension(ps::ParseState, mark, closer) ps = ParseState(ps, whitespace_newline=true, - space_sensitive=false) + space_sensitive=false, + end_symbol=false) parse_generator(ps, mark) bump_closing_token(ps, closer) return (K"comprehension", EMPTY_FLAGS) diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 4a50b0c8dca38..14a60f1da69e2 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -614,6 +614,7 @@ tests = [ "[x \n\n for a in as]" => "(comprehension (generator x (= a as)))" # parse_generator "[x for a = as for b = bs if cond1 for c = cs if cond2]" => "(comprehension (flatten x (= a as) (filter (= b bs) cond1) (filter (= c cs) cond2)))" + "[x for a = as if begin cond2 end]" => "(comprehension (generator x (filter (= a as) (block cond2))))" "[(x)for x in xs]" => "(comprehension (generator x (error-t) (= x xs)))" "(a for x in xs if cond)" => "(generator a (filter (= x xs) cond))" "(xy for x in xs for y in ys)" => "(flatten xy (= x xs) (= y ys))" From a192fc43eaa36e47b555651fe86c67bd0bc401ca Mon Sep 17 00:00:00 2001 From: Sebastian Pfitzner Date: Tue, 30 Aug 2022 12:23:08 +0000 Subject: [PATCH 0489/1109] never throw in unescape_julia_string --- JuliaSyntax/src/syntax_tree.jl | 22 ++++++++++++++++---- JuliaSyntax/src/value_parsing.jl | 20 ++++++++++-------- JuliaSyntax/test/parser.jl | 1 + JuliaSyntax/test/value_parsing.jl | 34 +++++++++++++++---------------- 4 files changed, 47 insertions(+), 30 deletions(-) diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index 5788992c51e35..5dc154daaa4d7 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -36,11 +36,20 @@ function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, position::In elseif k == K"false" false elseif k == K"Char" - unescape_julia_string(val_str, false, false)[2] + v, err, _ = unescape_julia_string(val_str, false, false) + if err + ErrorVal() + else + v[2] + end elseif k == K"Identifier" if has_flags(head(raw), RAW_STRING_FLAG) - s = unescape_julia_string(val_str, false, true) - Symbol(normalize_identifier(s)) + s, err, _ = unescape_julia_string(val_str, false, true) + if err + ErrorVal() + else + Symbol(normalize_identifier(s)) + end else Symbol(normalize_identifier(val_str)) end @@ -50,7 +59,12 @@ function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, position::In elseif k in KSet"String CmdString" is_cmd = k == K"CmdString" is_raw = has_flags(head(raw), RAW_STRING_FLAG) - unescape_julia_string(val_str, is_cmd, is_raw) + s, err, _ = unescape_julia_string(val_str, is_cmd, is_raw) + if err + ErrorVal() + else + s + end elseif is_operator(k) isempty(val_range) ? Symbol(untokenize(k)) : # synthetic invisible tokens diff --git a/JuliaSyntax/src/value_parsing.jl b/JuliaSyntax/src/value_parsing.jl index 53e6576bccc13..51e674f68c2a5 100644 --- a/JuliaSyntax/src/value_parsing.jl +++ b/JuliaSyntax/src/value_parsing.jl @@ -77,7 +77,7 @@ Parse a Float64. str[firstind:lastind] must be a valid floating point literal string. If the value is outside Float64 range. """ function _parse_float(::Type{T}, str::String, - firstind::Integer, lastind::Integer) where {T} # force specialize with where {T} + firstind::Integer, lastind::Integer) where {T} # force specialize with where {T} strsize = lastind - firstind + 1 bufsz = 50 if strsize < bufsz @@ -207,7 +207,7 @@ end Process Julia source code escape sequences for non-raw strings. `str` should be passed without delimiting quotes. """ -function unescape_julia_string(io::IO, str::AbstractString) +function unescape_julia_string(io::IO, str::AbstractString)::Tuple{Bool, String} i = firstindex(str) lastidx = lastindex(str) while i <= lastidx @@ -244,8 +244,7 @@ function unescape_julia_string(io::IO, str::AbstractString) end if k == 1 || n > 0x10ffff u = m == 4 ? 'u' : 'U' - throw(ArgumentError("invalid $(m == 2 ? "hex (\\x)" : - "unicode (\\$u)") escape sequence")) + return true, "invalid $(m == 2 ? "hex (\\x)" : "unicode (\\$u)") escape sequence" end if m == 2 # \x escape sequence write(io, UInt8(n)) @@ -261,7 +260,7 @@ function unescape_julia_string(io::IO, str::AbstractString) i += 1 end if n > 255 - throw(ArgumentError("octal escape sequence out of range")) + return true, "octal escape sequence out of range" end write(io, UInt8(n)) else @@ -280,21 +279,24 @@ function unescape_julia_string(io::IO, str::AbstractString) c == '"' ? '"' : c == '$' ? '$' : c == '`' ? '`' : - throw(ArgumentError("Invalid escape sequence \\$c")) + return true, "Invalid escape sequence \\$c" write(io, u) end i = nextind(str, i) end + return false, "" end -function unescape_julia_string(str::AbstractString, is_cmd::Bool, is_raw::Bool) +function unescape_julia_string(str::AbstractString, is_cmd::Bool, is_raw::Bool)::Tuple{String, Bool, String} io = IOBuffer() + error = false + msg = "" if is_raw unescape_raw_string(io, str, is_cmd) else - unescape_julia_string(io, str) + error, msg = unescape_julia_string(io, str) end - String(take!(io)) + String(take!(io)), error, msg end #------------------------------------------------------------------------------- diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 4a50b0c8dca38..c5ed36e26572a 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -704,6 +704,7 @@ tests = [ "\"\$var\"" => "(string var)" "\"\$outer\"" => "(string outer)" "\"\$in\"" => "(string in)" + raw"\"\xqqq\"" => "✘" # Triple-quoted dedenting: "\"\"\"\nx\"\"\"" => "\"x\"" "\"\"\"\n\nx\"\"\"" => raw"""(string-s "\n" "x")""" diff --git a/JuliaSyntax/test/value_parsing.jl b/JuliaSyntax/test/value_parsing.jl index f16fd3acf2cd9..24c0671856add 100644 --- a/JuliaSyntax/test/value_parsing.jl +++ b/JuliaSyntax/test/value_parsing.jl @@ -151,8 +151,8 @@ octint(s) = julia_string_to_number(s, K"OctInt") end end +unesc(str, is_cmd = false, is_raw = false) = unescape_julia_string(str, is_cmd, is_raw)[1] @testset "String unescaping" begin - unesc(str) = unescape_julia_string(str, false, false) # Allowed escapes of delimiters and dollar sign @test only(unesc("\\\\")) == '\\' @test only(unesc("\\\"")) == '"' @@ -164,8 +164,8 @@ end @test unesc("a\nb\rc\r\nd") == "a\nb\nc\nd" # Invalid escapes - @test_throws ArgumentError unesc("\\.") - @test_throws ArgumentError unesc("\\z") + @test unescape_julia_string("\\.", false, false)[2] + @test unescape_julia_string("\\z", false, false)[2] # Standard C escape sequences @test codeunits(unesc("\\n\\t\\r\\e\\b\\f\\v\\a")) == @@ -177,44 +177,44 @@ end @test unesc("x\\U001F604x") == "x😄x" # Maximum unicode code point @test unesc("x\\U10ffffx") == "x\U10ffffx" - @test_throws ArgumentError unesc("x\\U110000x") + @test unescape_julia_string("x\\U110000x", false, false)[2] # variable-length octal @test unesc("x\\7x") == "x\ax" @test unesc("x\\77x") == "x?x" @test unesc("x\\141x") == "xax" @test unesc("x\\377x") == "x\xffx" - @test_throws ArgumentError unesc("x\\400x") + @test unescape_julia_string("x\\400x", false, false)[2] end @testset "Raw string unescaping" begin # " delimited # x\"x ==> x"x - @test unescape_julia_string("x\\\"x", false, true) == "x\"x" + @test unesc("x\\\"x", false, true) == "x\"x" # x\`x ==> x\`x - @test unescape_julia_string("x\\`x", false, true) == "x\\`x" + @test unesc("x\\`x", false, true) == "x\\`x" # x\\\"x ==> x\"x - @test unescape_julia_string("x\\\\\\\"x", false, true) == "x\\\"x" + @test unesc("x\\\\\\\"x", false, true) == "x\\\"x" # x\\\`x ==> x\\\`x - @test unescape_julia_string("x\\\\\\`x", false, true) == "x\\\\\\`x" + @test unesc("x\\\\\\`x", false, true) == "x\\\\\\`x" # '\\ ' ==> '\\ ' - @test unescape_julia_string("\\\\ ", false, true) == "\\\\ " + @test unesc("\\\\ ", false, true) == "\\\\ " # '\\' ==> '\' - @test unescape_julia_string("\\\\", false, true) == "\\" + @test unesc("\\\\", false, true) == "\\" # '\\\\' ==> '\\' - @test unescape_julia_string("\\\\\\\\", false, true) == "\\\\" + @test unesc("\\\\\\\\", false, true) == "\\\\" # ` delimited # x\"x ==> x\"x - @test unescape_julia_string("x\\\"x", true, true) == "x\\\"x" + @test unesc("x\\\"x", true, true) == "x\\\"x" # x\`x ==> x`x - @test unescape_julia_string("x\\`x", true, true) == "x`x" + @test unesc("x\\`x", true, true) == "x`x" # x\\\"x ==> x\"x - @test unescape_julia_string("x\\\\\\\"x", true, true) == "x\\\\\\\"x" + @test unesc("x\\\\\\\"x", true, true) == "x\\\\\\\"x" # x\\\`x ==> x\`x - @test unescape_julia_string("x\\\\\\`x", true, true) == "x\\`x" + @test unesc("x\\\\\\`x", true, true) == "x\\`x" # '\\ ' ==> '\\ ' - @test unescape_julia_string("\\\\ ", true, true) == "\\\\ " + @test unesc("\\\\ ", true, true) == "\\\\ " end @testset "Normalization of identifiers" begin From 50b6bf13fbd599c9074422a8bc2c10381b958812 Mon Sep 17 00:00:00 2001 From: Sebastian Pfitzner Date: Tue, 30 Aug 2022 12:34:29 +0000 Subject: [PATCH 0490/1109] non-throwing is_identifier_start_char --- JuliaSyntax/src/tokenize_utils.jl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/JuliaSyntax/src/tokenize_utils.jl b/JuliaSyntax/src/tokenize_utils.jl index 720fff4c6f8e7..7976f1c348be2 100644 --- a/JuliaSyntax/src/tokenize_utils.jl +++ b/JuliaSyntax/src/tokenize_utils.jl @@ -9,7 +9,11 @@ end function is_identifier_start_char(c::Char) c == EOF_CHAR && return false - return Base.is_id_start_char(c) + return try + Base.is_id_start_char(c) + catch _ + false + end end # Chars that we will never allow to be part of a valid non-operator identifier From 114ca886f7e4dd61c7d00f0329548d2c1a909180 Mon Sep 17 00:00:00 2001 From: Sebastian Pfitzner Date: Tue, 30 Aug 2022 12:35:32 +0000 Subject: [PATCH 0491/1109] non-throwing is_identifier_char --- JuliaSyntax/src/tokenize_utils.jl | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/JuliaSyntax/src/tokenize_utils.jl b/JuliaSyntax/src/tokenize_utils.jl index 7976f1c348be2..8ad6792c11a9e 100644 --- a/JuliaSyntax/src/tokenize_utils.jl +++ b/JuliaSyntax/src/tokenize_utils.jl @@ -4,14 +4,19 @@ const EOF_CHAR = typemax(Char) function is_identifier_char(c::Char) c == EOF_CHAR && return false - return Base.is_id_char(c) + return try + Base.is_id_char(c) + catch + false + end + end function is_identifier_start_char(c::Char) c == EOF_CHAR && return false return try Base.is_id_start_char(c) - catch _ + catch false end end From 890ced3194ad7b7a6ead3c1dc133d3ed8ea8dc57 Mon Sep 17 00:00:00 2001 From: Sebastian Pfitzner Date: Wed, 31 Aug 2022 09:33:01 +0200 Subject: [PATCH 0492/1109] Apply suggestions from code review Co-authored-by: c42f --- JuliaSyntax/src/syntax_tree.jl | 1 + JuliaSyntax/test/value_parsing.jl | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index 5dc154daaa4d7..a9cf33e48054f 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -61,6 +61,7 @@ function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, position::In is_raw = has_flags(head(raw), RAW_STRING_FLAG) s, err, _ = unescape_julia_string(val_str, is_cmd, is_raw) if err + # TODO: communicate the unescaping error somehow ErrorVal() else s diff --git a/JuliaSyntax/test/value_parsing.jl b/JuliaSyntax/test/value_parsing.jl index 24c0671856add..634963e9edc77 100644 --- a/JuliaSyntax/test/value_parsing.jl +++ b/JuliaSyntax/test/value_parsing.jl @@ -151,7 +151,11 @@ octint(s) = julia_string_to_number(s, K"OctInt") end end -unesc(str, is_cmd = false, is_raw = false) = unescape_julia_string(str, is_cmd, is_raw)[1] +function unesc(str, is_cmd = false, is_raw = false) + str, iserror, _ = unescape_julia_string(str, is_cmd, is_raw) + @test !iserorr + return str +end @testset "String unescaping" begin # Allowed escapes of delimiters and dollar sign @test only(unesc("\\\\")) == '\\' From fb560decb856bf2e81f804198d9a56ce79c4b272 Mon Sep 17 00:00:00 2001 From: Sebastian Pfitzner Date: Wed, 31 Aug 2022 10:48:54 +0200 Subject: [PATCH 0493/1109] Fix typo --- JuliaSyntax/test/value_parsing.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaSyntax/test/value_parsing.jl b/JuliaSyntax/test/value_parsing.jl index 634963e9edc77..8b983131b6b97 100644 --- a/JuliaSyntax/test/value_parsing.jl +++ b/JuliaSyntax/test/value_parsing.jl @@ -153,7 +153,7 @@ end function unesc(str, is_cmd = false, is_raw = false) str, iserror, _ = unescape_julia_string(str, is_cmd, is_raw) - @test !iserorr + @test !iserror return str end @testset "String unescaping" begin From 653b00ce8f12c84b66b5f6bd6878b673159c67da Mon Sep 17 00:00:00 2001 From: Sebastian Pfitzner Date: Wed, 31 Aug 2022 10:57:03 +0200 Subject: [PATCH 0494/1109] Remove skip_newlines instead --- JuliaSyntax/src/parser.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 679c1960151a5..e02b212700167 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -811,7 +811,7 @@ function parse_range(ps::ParseState) end n_colons += 1 bump(ps, n_colons == 1 ? EMPTY_FLAGS : TRIVIA_FLAG) - had_newline = peek(ps, skip_newlines=false) == K"NewlineWs" + had_newline = peek(ps) == K"NewlineWs" t = peek_token(ps) if is_closing_token(ps, kind(t)) # 1: } ==> (call-i 1 : (error)) @@ -822,7 +822,7 @@ function parse_range(ps::ParseState) emit_diagnostic(ps, error="found unexpected closing token") return end - if had_newline && !ps.whitespace_newline + if had_newline # Error message for people coming from python # 1:\n2 ==> (call-i 1 : (error)) # (1:\n2) ==> (call-i 1 : 2) From 43304eb860374036ba297d47bcc2e190f9a5a9ec Mon Sep 17 00:00:00 2001 From: Sebastian Pfitzner Date: Wed, 31 Aug 2022 11:25:55 +0200 Subject: [PATCH 0495/1109] Add FIXME --- JuliaSyntax/src/parser.jl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 8edcbf4c24738..8f232df54d4ba 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -642,6 +642,9 @@ function parse_cond(ps::ParseState) # a ? b :c ==> (if a [ ] [?] [ ] b [ ] [:] (error-t) c) bump_invisible(ps, K"error", TRIVIA_FLAG, error="space required after `:` in `?` expression") end + + # FIXME: This is a very specific case. Error recovery should be handled mor + # generally elsewhere. if is_block_continuation_keyword(kind(t)) # a "continuaton keyword" is likely to belong to the surrounding code, so # we abort early From 3397e4d1b4e8d01ab7f44453a6923288ee3605d6 Mon Sep 17 00:00:00 2001 From: Sebastian Pfitzner Date: Wed, 31 Aug 2022 11:18:22 +0200 Subject: [PATCH 0496/1109] Check Base.ismalformed instead and add a test --- JuliaSyntax/src/tokenize_utils.jl | 16 +++++----------- JuliaSyntax/test/tokenize.jl | 7 +++++++ 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/JuliaSyntax/src/tokenize_utils.jl b/JuliaSyntax/src/tokenize_utils.jl index 8ad6792c11a9e..1b50536200af2 100644 --- a/JuliaSyntax/src/tokenize_utils.jl +++ b/JuliaSyntax/src/tokenize_utils.jl @@ -4,25 +4,19 @@ const EOF_CHAR = typemax(Char) function is_identifier_char(c::Char) c == EOF_CHAR && return false - return try - Base.is_id_char(c) - catch - false - end - + Base.ismalformed(c) && return false + return Base.is_id_char(c) end function is_identifier_start_char(c::Char) c == EOF_CHAR && return false - return try - Base.is_id_start_char(c) - catch - false - end + Base.ismalformed(c) && return false + return Base.is_id_start_char(c) end # Chars that we will never allow to be part of a valid non-operator identifier function is_never_id_char(ch::Char) + Base.ismalformed(ch) && return true cat = Unicode.category_code(ch) c = UInt32(ch) return ( diff --git a/JuliaSyntax/test/tokenize.jl b/JuliaSyntax/test/tokenize.jl index 2d0e9a91eb94b..2d51938379e44 100644 --- a/JuliaSyntax/test/tokenize.jl +++ b/JuliaSyntax/test/tokenize.jl @@ -909,4 +909,11 @@ end @test strtok("a .&&₁ b") == ["a", " ", ".&&", "₁", " ", "b", ""] end +@testset "is_identifier[_start]_char" begin + malformed = first("\xe2") + @test Tokenize.is_identifier_char(malformed) == false + @test Tokenize.is_identifier_start_char(malformed) == false + @test Tokenize.is_never_id_char(malformed) == true +end + end From 8e9925911838e0e44d2e14bf5b87a6d522804d06 Mon Sep 17 00:00:00 2001 From: Sebastian Pfitzner Date: Wed, 31 Aug 2022 11:33:05 +0200 Subject: [PATCH 0497/1109] Update src/parser.jl Co-authored-by: Kristoffer Carlsson --- JuliaSyntax/src/parser.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 8f232df54d4ba..b943a39760546 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -643,7 +643,7 @@ function parse_cond(ps::ParseState) bump_invisible(ps, K"error", TRIVIA_FLAG, error="space required after `:` in `?` expression") end - # FIXME: This is a very specific case. Error recovery should be handled mor + # FIXME: This is a very specific case. Error recovery should be handled more # generally elsewhere. if is_block_continuation_keyword(kind(t)) # a "continuaton keyword" is likely to belong to the surrounding code, so From 6bb483519baa7951e7906f0426b35ce3151fd6dc Mon Sep 17 00:00:00 2001 From: c42f Date: Thu, 1 Sep 2022 08:06:40 +1000 Subject: [PATCH 0498/1109] Use K"?" for head of ternary conditional (JuliaLang/JuliaSyntax.jl#85) This allows `a ? b : c` syntax to be clearly distinguished from normal `if a b else c end` without reparsing syntax trivia. For compatibility we convert to :if for normal Expr --- JuliaSyntax/src/expr.jl | 10 +++++++--- JuliaSyntax/src/parser.jl | 14 +++++++------- JuliaSyntax/test/parser.jl | 18 +++++++++--------- 3 files changed, 23 insertions(+), 19 deletions(-) diff --git a/JuliaSyntax/src/expr.jl b/JuliaSyntax/src/expr.jl index 69b3e5e05160d..10ef225ec36ac 100644 --- a/JuliaSyntax/src/expr.jl +++ b/JuliaSyntax/src/expr.jl @@ -24,9 +24,13 @@ function _to_expr(node::SyntaxNode, iteration_spec=false, need_linenodes=true) return val end end - headstr = untokenize(head(node), include_flag_suff=false) - headsym = !isnothing(headstr) ? Symbol(headstr) : - error("Can't untokenize head of kind $(kind(node))") + if kind(node) == K"?" + headsym = :if + else + headstr = untokenize(head(node), include_flag_suff=false) + headsym = !isnothing(headstr) ? Symbol(headstr) : + error("Can't untokenize head of kind $(kind(node))") + end node_args = children(node) insert_linenums = (headsym == :block || headsym == :toplevel) && need_linenodes args = Vector{Any}(undef, length(node_args)*(insert_linenums ? 2 : 1)) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index e02b212700167..b538f744fc053 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -602,7 +602,7 @@ function parse_pair(ps::ParseState) end # Parse short form conditional expression -# a ? b : c ==> (if a b c) +# a ? b : c ==> (? a b c) # # flisp: parse-cond function parse_cond(ps::ParseState) @@ -613,38 +613,38 @@ function parse_cond(ps::ParseState) return end if !preceding_whitespace(t) - # a? b : c => (if a (error-t) b c) + # a? b : c => (? a (error-t) b c) bump_invisible(ps, K"error", TRIVIA_FLAG, error="space required before `?` operator") end bump(ps, TRIVIA_FLAG) # ? t = peek_token(ps) if !preceding_whitespace(t) - # a ?b : c + # a ?b : c ==> (? a (error-t) b c) bump_invisible(ps, K"error", TRIVIA_FLAG, error="space required after `?` operator") end parse_eq_star(ParseState(ps, range_colon_enabled=false)) t = peek_token(ps) if !preceding_whitespace(t) - # a ? b: c ==> (if a [ ] [?] [ ] b (error-t) [:] [ ] c) + # a ? b: c ==> (? a b (error-t) c) bump_invisible(ps, K"error", TRIVIA_FLAG, error="space required before `:` in `?` expression") end if kind(t) == K":" bump(ps, TRIVIA_FLAG) else - # a ? b c ==> (if a b (error) c) + # a ? b c ==> (? a b (error-t) c) bump_invisible(ps, K"error", TRIVIA_FLAG, error="`:` expected in `?` expression") end t = peek_token(ps) if !preceding_whitespace(t) - # a ? b :c ==> (if a [ ] [?] [ ] b [ ] [:] (error-t) c) + # a ? b :c ==> (? a b (error-t) c) bump_invisible(ps, K"error", TRIVIA_FLAG, error="space required after `:` in `?` expression") end parse_eq_star(ps) - emit(ps, mark, K"if") + emit(ps, mark, K"?") end # Parse arrows. Like parse_RtoL, but specialized for --> syntactic operator diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 0b59ccec89834..dfd5bbc50d9fb 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -52,16 +52,16 @@ tests = [ "a => b" => "(call-i a => b)" ], JuliaSyntax.parse_cond => [ - "a ? b : c" => "(if a b c)" - "a ?\nb : c" => "(if a b c)" - "a ? b :\nc" => "(if a b c)" - "a ? b : c:d" => "(if a b (call-i c : d))" + "a ? b : c" => "(? a b c)" + "a ?\nb : c" => "(? a b c)" + "a ? b :\nc" => "(? a b c)" + "a ? b : c:d" => "(? a b (call-i c : d))" # Following are errors but should recover - "a? b : c" => "(if a (error-t) b c)" - "a ?b : c" => "(if a (error-t) b c)" - "a ? b: c" => "(if a b (error-t) c)" - "a ? b :c" => "(if a b (error-t) c)" - "a ? b c" => "(if a b (error-t) c)" + "a? b : c" => "(? a (error-t) b c)" + "a ?b : c" => "(? a (error-t) b c)" + "a ? b: c" => "(? a b (error-t) c)" + "a ? b :c" => "(? a b (error-t) c)" + "a ? b c" => "(? a b (error-t) c)" ], JuliaSyntax.parse_arrow => [ "x → y" => "(call-i x → y)" From 567e82747faa9b3f90bd52f9ff19fb5288911209 Mon Sep 17 00:00:00 2001 From: c42f Date: Thu, 1 Sep 2022 14:00:46 +1000 Subject: [PATCH 0499/1109] More notes about where AST forms differ --- JuliaSyntax/README.md | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index f4ed6c98be0a5..4aa858135ac96 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -435,6 +435,22 @@ julia> text = "x = \"\"\"\n \$a\n b\"\"\"" 21:23 │ """ "\"\"\"" ``` +### Less redundant `block`s + +Sometimes `Expr` needs to contain redundant block constructs in order to have a +place to store `LineNumberNode`s, but we don't need these and avoid adding them +in several cases: +* The right hand side of short form function syntax +* The conditional in `elseif` +* The body of anonymous functions after the `->` + +### Distinct conditional ternary expression + +The syntax `a ? b : c` is the same as `if a b else c` in `Expr` so macros can't +distinguish these cases. Instead, we use a distinct expression head `K"?"` and +lower to `Expr(:if)` during `Expr` conversion. + + ## More about syntax kinds We generally track the type of syntax nodes with a syntax "kind", stored From 9463bcac74e453fd2021542faedc463c24c4fab0 Mon Sep 17 00:00:00 2001 From: c42f Date: Thu, 1 Sep 2022 14:36:22 +1000 Subject: [PATCH 0500/1109] Minor fix to test case comment + unify recovery for parsing : --- JuliaSyntax/src/parser.jl | 16 ++++------------ JuliaSyntax/test/parser.jl | 4 +++- 2 files changed, 7 insertions(+), 13 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index b538f744fc053..a8eb3767af035 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -3244,7 +3244,6 @@ function parse_atom(ps::ParseState, check_identifiers=true) if leading_kind == K":" # symbol/expression quote # :foo ==> (quote foo) - # : foo ==> (quote (error-t) foo) t = peek_token(ps, 2) k = kind(t) if is_closing_token(ps, k) && (!is_keyword(k) || preceding_whitespace(t)) @@ -3256,19 +3255,12 @@ function parse_atom(ps::ParseState, check_identifiers=true) end bump(ps, TRIVIA_FLAG) # K":" if preceding_whitespace(t) - # : a ==> (quote (error-t) a)) - # === - # : - # a - # ==> (quote (error)) - bump_trivia(ps, TRIVIA_FLAG, + # : foo ==> (quote (error-t) foo) + # :\nfoo ==> (quote (error-t) foo) + bump_trivia(ps, TRIVIA_FLAG, skip_newlines=true, error="whitespace not allowed after `:` used for quoting") # Heuristic recovery - if kind(t) == K"NewlineWs" - bump_invisible(ps, K"error") - else - bump(ps) - end + bump(ps) else # Being inside quote makes keywords into identifiers at at the # first level of nesting diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index dfd5bbc50d9fb..f65397f4b602e 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -568,10 +568,12 @@ tests = [ ], JuliaSyntax.parse_atom => [ ":foo" => "(quote foo)" - ": foo" => "(quote (error-t) foo)" # Literal colons ":)" => ":" ": end" => ":" + # Whitespace after quoting colon + ": foo" => "(quote (error-t) foo)" + ":\nfoo" => "(quote (error-t) foo)" # plain equals "=" => "(error =)" # Identifiers From 8f7dbba076d8c8b9f408ce042e3400f4c152ea96 Mon Sep 17 00:00:00 2001 From: Sebastian Pfitzner Date: Thu, 1 Sep 2022 09:59:14 +0200 Subject: [PATCH 0501/1109] Add note about weird refparser behaviour --- JuliaSyntax/README.md | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index f4ed6c98be0a5..e8eb88c3b8184 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -549,6 +549,25 @@ name of compatibility, perhaps with a warning.) arises from `(set! pred char-hex?)` in `parse-number` accepting hex exponent digits, all of which are detected as invalid except for a trailing `f` when processed by `isnumtok_base`. +* `begin` and `end` are not parsed as keywords when indexing. Typed comprehensions + initially look the same, but can be distinguished from indexing once we handle + a `for` token; it is safe to treat `begin` and `end` as keywords afterwards. The + reference parser *only* handles this well when there's a newline before `for`: + ```julia + Any[foo(i) + for i in x if begin + true + end + ] + ``` + works, while + ```julia + Any[foo(i) for i in x if begin + true + end + ] + ``` + does not. JuliaSyntax handles both cases. ## Parsing / AST oddities and warts From cdc42df393e042faa462062c40ea5e8f5e9bba60 Mon Sep 17 00:00:00 2001 From: c42f Date: Wed, 7 Sep 2022 16:11:46 +1000 Subject: [PATCH 0502/1109] Always encapsulate strings in a string node (JuliaLang/JuliaSyntax.jl#94) This change ensures that strings are always encapsulated within a `K"string"` internal node of the parse tree, giving a place to include the delimiters and unifying interpolated strings / strings with internal whitespace with plain string literals. Also wrap backtick delimited strings with a `K"cmdstring"` head for the same reasons. --- JuliaSyntax/README.md | 11 ++++ JuliaSyntax/src/expr.jl | 85 +++++++++++++++---------- JuliaSyntax/src/kinds.jl | 1 + JuliaSyntax/src/parser.jl | 125 +++++++++++++++++-------------------- JuliaSyntax/test/expr.jl | 13 ++++ JuliaSyntax/test/parser.jl | 75 +++++++++++----------- 6 files changed, 176 insertions(+), 134 deletions(-) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index 524346e865401..a19b082882746 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -450,6 +450,17 @@ The syntax `a ? b : c` is the same as `if a b else c` in `Expr` so macros can't distinguish these cases. Instead, we use a distinct expression head `K"?"` and lower to `Expr(:if)` during `Expr` conversion. +### String nodes always wrapped in `K"string"` or `K"cmdstring"` + +All strings are surrounded by a node of kind `K"string"`, even non-interpolated +literals, so `"x"` parses as `(string "x")`. This makes string handling simpler +and more systematic because interpolations and triple strings with embedded +trivia don't need to be treated differently. It also gives a container in which +to attach the delimiting quotes. + +The same goes for command strings which are always wrapped in `K"cmdstring"` +regardless of whether they have multiple pieces (due to triple-quoted +dedenting) or otherwise. ## More about syntax kinds diff --git a/JuliaSyntax/src/expr.jl b/JuliaSyntax/src/expr.jl index 10ef225ec36ac..9aeb248bf1a07 100644 --- a/JuliaSyntax/src/expr.jl +++ b/JuliaSyntax/src/expr.jl @@ -6,6 +6,11 @@ function is_eventually_call(ex) is_eventually_call(ex.args[1])) end +function is_stringchunk(node) + k = kind(node) + return k == K"String" || k == K"CmdString" +end + function _to_expr(node::SyntaxNode, iteration_spec=false, need_linenodes=true) if !haschildren(node) val = node.val @@ -32,6 +37,54 @@ function _to_expr(node::SyntaxNode, iteration_spec=false, need_linenodes=true) error("Can't untokenize head of kind $(kind(node))") end node_args = children(node) + if headsym == :string || headsym == :cmdstring + # Julia string literals may be interspersed with trivia in two situations: + # 1. Triple quoted string indentation is trivia + # 2. An \ before newline removes the newline and any following indentation + # + # Such trivia is eagerly removed by the reference parser, so here we + # concatenate adjacent string chunks together for compatibility. + args = Vector{Any}() + i = 1 + while i <= length(node_args) + if is_stringchunk(node_args[i]) + if i < length(node_args) && is_stringchunk(node_args[i+1]) + buf = IOBuffer() + while i <= length(node_args) && is_stringchunk(node_args[i]) + write(buf, node_args[i].val) + i += 1 + end + push!(args, String(take!(buf))) + else + push!(args, node_args[i].val) + i += 1 + end + else + e = _to_expr(node_args[i]) + if e isa String && headsym == :string + # Wrap interpolated literal strings in (string) so we can + # distinguish them from the surrounding text (issue #38501) + # Ie, "$("str")" vs "str" + # https://github.com/JuliaLang/julia/pull/38692 + e = Expr(:string, e) + end + push!(args, e) + i += 1 + end + end + if length(args) == 1 && args[1] isa String + # If there's a single string remaining after joining, we unwrap + # to give a string literal. + # """\n a\n b""" ==> "a\nb" + # headsym === :cmdstring follows this branch + return only(args) + else + @check headsym === :string + return Expr(headsym, args...) + end + end + + # Convert children insert_linenums = (headsym == :block || headsym == :toplevel) && need_linenodes args = Vector{Any}(undef, length(node_args)*(insert_linenums ? 2 : 1)) if headsym == :for && length(node_args) == 2 @@ -125,38 +178,6 @@ function _to_expr(node::SyntaxNode, iteration_spec=false, need_linenodes=true) pushfirst!(args, numeric_flags(flags(node))) elseif headsym == :typed_ncat insert!(args, 2, numeric_flags(flags(node))) - elseif headsym == :string && length(args) > 1 - # Julia string literals may be interspersed with trivia in two situations: - # 1. Triple quoted string indentation is trivia - # 2. An \ before newline removes the newline and any following indentation - # - # Such trivia is eagerly removed by the reference parser, so here we - # concatenate adjacent string chunks together for compatibility. - # - # TODO: Manage the non-interpolation cases with String and CmdString - # kinds instead? - args2 = Vector{Any}() - i = 1 - while i <= length(args) - if args[i] isa String && i < length(args) && args[i+1] isa String - buf = IOBuffer() - while i <= length(args) && args[i] isa String - write(buf, args[i]) - i += 1 - end - push!(args2, String(take!(buf))) - else - push!(args2, args[i]) - i += 1 - end - end - args = args2 - if length(args2) == 1 && args2[1] isa String - # If there's a single string remaining after joining we unwrap to - # give a string literal. - # """\n a\n b""" ==> "a\nb" - return args2[1] - end # elseif headsym == :string && length(args) == 1 && version <= (1,5) # Strip string from interpolations in 1.5 and lower to preserve # "hi$("ho")" ==> (string "hi" "ho") diff --git a/JuliaSyntax/src/kinds.jl b/JuliaSyntax/src/kinds.jl index 9f461820dab75..7d7dce9eda20e 100644 --- a/JuliaSyntax/src/kinds.jl +++ b/JuliaSyntax/src/kinds.jl @@ -871,6 +871,7 @@ const _kind_names = "curly" "inert" # QuoteNode; not quasiquote "string" # A string interior node (possibly containing interpolations) + "cmdstring" # A cmd string node (containing delimiters plus string) "macrocall" "kw" # the = in f(a=1) "parameters" # the list after ; in f(; a=1) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index bb80a19b2a0b6..cc16480dfa646 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -468,28 +468,28 @@ function parse_docstring(ps::ParseState, down=parse_eq) mark = position(ps) atdoc_mark = bump_invisible(ps, K"TOMBSTONE") down(ps) - if peek_behind(ps).kind in KSet"String string" + if peek_behind(ps).kind == K"string" is_doc = true k = peek(ps) if is_closing_token(ps, k) - # "notdoc" ] ==> "notdoc" + # "notdoc" ] ==> (string "notdoc") is_doc = false elseif k == K"NewlineWs" k2 = peek(ps, 2) if is_closing_token(ps, k2) || k2 == K"NewlineWs" - # "notdoc" \n] ==> "notdoc" - # "notdoc" \n\n foo ==> "notdoc" + # "notdoc" \n] ==> (string "notdoc") + # "notdoc" \n\n foo ==> (string "notdoc") is_doc = false else # Allow a single newline - # "doc" \n foo ==> (macrocall core_@doc "doc" foo) + # "doc" \n foo ==> (macrocall core_@doc (string "doc") foo) bump(ps, TRIVIA_FLAG) # NewlineWs end else - # "doc" foo ==> (macrocall core_@doc "doc" foo) + # "doc" foo ==> (macrocall core_@doc (string "doc") foo) # "doc $x" foo ==> (macrocall core_@doc (string "doc " x) foo) # Allow docstrings with embedded trailing whitespace trivia - # """\n doc\n """ foo ==> (macrocall core_@doc "doc\n" foo) + # """\n doc\n """ foo ==> (macrocall core_@doc (string-s "doc\n") foo) end if is_doc reset_node!(ps, atdoc_mark, kind=K"core_@doc") @@ -1048,11 +1048,12 @@ function parse_juxtapose(ps::ParseState) if n_terms == 1 bump_invisible(ps, K"*") end - if prev_kind == K"String" || is_string_delim(t) + if prev_kind == K"string" || is_string_delim(t) # issue #20575 # - # "a""b" ==> (call-i "a" * (error) "b") - # "a"x ==> (call-i "a" * (error) x) + # "a""b" ==> (call-i (string "a") * (error-t) (string "b")) + # "a"x ==> (call-i (string "a") * (error-t) x) + # "$y"x ==> (call-i (string (string y)) * (error-t) x) bump_invisible(ps, K"error", TRIVIA_FLAG, error="cannot juxtapose string literal") end @@ -1389,7 +1390,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) # @foo (x) ==> (macrocall @foo x) # @foo (x,y) ==> (macrocall @foo (tuple x y)) # a().@x y ==> (macrocall (error (. (call a) (quote x))) y) - # [@foo "x"] ==> (vect (macrocall @foo "x")) + # [@foo x] ==> (vect (macrocall @foo x)) finish_macroname(ps, mark, valid_macroname, macro_name_position) let ps = with_space_sensitive(ps) # Space separated macro arguments @@ -1420,7 +1421,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) elseif (ps.space_sensitive && preceding_whitespace(t) && k in KSet"( [ { \ Char \" \"\"\" ` ```") # [f (x)] ==> (hcat f x) - # [f "x"] ==> (hcat f "x") + # [f x] ==> (hcat f x) break elseif k == K"(" if is_macrocall @@ -1597,12 +1598,12 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) elseif k in KSet" \" \"\"\" ` ``` " && !preceding_whitespace(t) && valid_macroname # Custom string and command literals - # x"str" ==> (macrocall @x_str "str") - # x`str` ==> (macrocall @x_cmd "str") - # x"" ==> (macrocall @x_str "") - # x`` ==> (macrocall @x_cmd "") + # x"str" ==> (macrocall @x_str (string-r "str")) + # x`str` ==> (macrocall @x_cmd (cmdstring-r "str")) + # x"" ==> (macrocall @x_str (string-r "")) + # x`` ==> (macrocall @x_cmd (cmdstring-r "")) # Triple quoted procesing for custom strings - # r"""\nx""" ==> (macrocall @r_str "x") + # r"""\nx""" ==> (macrocall @r_str (string-sr "x")) # r"""\n x\n y""" ==> (macrocall @r_str (string-sr "x\n" "y")) # r"""\n x\\n y""" ==> (macrocall @r_str (string-sr "x\\\n" "y")) # @@ -1615,11 +1616,11 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) k = kind(t) if !preceding_whitespace(t) && (k == K"Identifier" || is_keyword(k) || is_word_operator(k) || is_number(k)) # Macro sufficies can include keywords and numbers - # x"s"y ==> (macrocall @x_str "s" "y") - # x"s"end ==> (macrocall @x_str "s" "end") - # x"s"in ==> (macrocall @x_str "s" "in") - # x"s"2 ==> (macrocall @x_str "s" 2) - # x"s"10.0 ==> (macrocall @x_str "s" 10.0) + # x"s"y ==> (macrocall @x_str (string-r "s") "y") + # x"s"end ==> (macrocall @x_str (string-r "s") "end") + # x"s"in ==> (macrocall @x_str (string-r "s") "in") + # x"s"2 ==> (macrocall @x_str (string-r "s") 2) + # x"s"10.0 ==> (macrocall @x_str (string-r "s") 10.0) suffix_kind = (k == K"Identifier" || is_keyword(k) || is_word_operator(k)) ? K"String" : k bump(ps, remap_kind=suffix_kind) @@ -1813,7 +1814,7 @@ function parse_resword(ps::ParseState) parse_unary_prefix(ps) end # module A \n a \n b \n end ==> (module true A (block a b)) - # module A \n "x"\na \n end ==> (module true A (block (core_@doc "x" a))) + # module A \n "x"\na \n end ==> (module true A (block (core_@doc (string "x") a))) parse_block(ps, parse_docstring) bump_closing_token(ps, K"end") emit(ps, mark, K"module") @@ -3032,8 +3033,7 @@ function parse_string(ps::ParseState, raw::Bool) indent_ref_len = typemax(Int) indent_chunks = acquire_positions(ps.stream) buf = textbuf(ps) - str_flags = (triplestr ? TRIPLE_STRING_FLAG : EMPTY_FLAGS) | - (raw ? RAW_STRING_FLAG : EMPTY_FLAGS) + chunk_flags = raw ? RAW_STRING_FLAG : EMPTY_FLAGS bump(ps, TRIVIA_FLAG) first_chunk = true n_valid_chunks = 0 @@ -3048,18 +3048,9 @@ function parse_string(ps::ParseState, raw::Bool) bump(ps, TRIVIA_FLAG) k = peek(ps) if k == K"(" - # "a $(x + y) b" ==> (string "a " (call-i x + y) " b") - m = position(ps) + # "a $(x + y) b" ==> (string "a " (call-i x + y) " b") + # "hi$("ho")" ==> (string "hi" (string "ho")) parse_atom(ps) - # https://github.com/JuliaLang/julia/pull/38692 - prev = peek_behind(ps) - if prev.kind == string_chunk_kind - # Wrap interpolated literal strings in (string) so we can - # distinguish them from the surrounding text (issue #38501) - # "hi$("ho")" ==> (string "hi" (string "ho")) - # "hi$("""ho""")" ==> (string "hi" (string-s "ho")) - emit(ps, m, K"string", prev.flags) - end elseif k == K"var" # var identifiers disabled in strings # "$var" ==> (string var) @@ -3087,7 +3078,7 @@ function parse_string(ps::ParseState, raw::Bool) (s == 2 && (buf[first_byte(t)] == UInt8('\r') && b == UInt8('\n'))) end # First line of triple string is a newline only: mark as trivia. - # """\nx""" ==> "x" + # """\nx""" ==> (string-s "x") # """\n\nx""" ==> (string-s "\n" "x") bump(ps, TRIVIA_FLAG) first_chunk = false @@ -3097,6 +3088,7 @@ function parse_string(ps::ParseState, raw::Bool) # Triple-quoted dedenting: # Various newlines (\n \r \r\n) and whitespace (' ' \t) # """\n x\n y""" ==> (string-s "x\n" "y") + # ```\n x\n y``` ==> (macrocall :(Core.var"@cmd") (cmdstring-sr "x\n" "y")) # """\r x\r y""" ==> (string-s "x\n" "y") # """\r\n x\r\n y""" ==> (string-s "x\n" "y") # Spaces or tabs or mixtures acceptable @@ -3158,7 +3150,7 @@ function parse_string(ps::ParseState, raw::Bool) b = buf[last_byte(t)] prev_chunk_newline = b == UInt8('\n') || b == UInt8('\r') end - bump(ps, str_flags) + bump(ps, chunk_flags) first_chunk = false n_valid_chunks += 1 end @@ -3187,36 +3179,37 @@ function parse_string(ps::ParseState, raw::Bool) if had_end_delim if n_valid_chunks == 0 # Empty strings, or empty after triple quoted processing - # "" ==> "" - # """\n """ ==> "" - bump_invisible(ps, string_chunk_kind, str_flags) + # "" ==> (string "") + # """\n """ ==> (string-s "") + bump_invisible(ps, string_chunk_kind, chunk_flags) end bump(ps, TRIVIA_FLAG) else # Missing delimiter recovery - # "str ==> "str" (error) + # "str ==> (string "str" (error-t)) bump_invisible(ps, K"error", TRIVIA_FLAG, error="Unterminated string literal") end - if n_valid_chunks > 1 || had_interpolation - # String interpolations - # "$x$y$z" ==> (string x y z) - # "$(x)" ==> (string x) - # "$x" ==> (string x) - # """$x""" ==> (string-s x) - # - # Strings with embedded whitespace trivia - # "a\\\nb" ==> (string "a" "b") - # "a\\\rb" ==> (string "a" "b") - # "a\\\r\nb" ==> (string "a" "b") - # "a\\\n \tb" ==> (string "a" "b") - emit(ps, mark, K"string", str_flags) - else - # Strings with only a single valid string chunk - # "str" ==> "str" - # "a\\\n" ==> "a" - # "a\\\r" ==> "a" - # "a\\\r\n" ==> "a" - end + # String interpolations + # "$x$y$z" ==> (string x y z) + # "$(x)" ==> (string x) + # "$x" ==> (string x) + # """$x""" ==> (string-s x) + # + # Strings with embedded whitespace trivia + # "a\\\nb" ==> (string "a" "b") + # "a\\\rb" ==> (string "a" "b") + # "a\\\r\nb" ==> (string "a" "b") + # "a\\\n \tb" ==> (string "a" "b") + # + # Strings with only a single valid string chunk + # "str" ==> (string "str") + # "a\\\n" ==> (string "a") + # "a\\\r" ==> (string "a") + # "a\\\r\n" ==> (string "a") + string_kind = delim_k in KSet"\" \"\"\"" ? K"string" : K"cmdstring" + str_flags = (triplestr ? TRIPLE_STRING_FLAG : EMPTY_FLAGS) | + (raw ? RAW_STRING_FLAG : EMPTY_FLAGS) + emit(ps, mark, string_kind, str_flags) end function emit_braces(ps, mark, ckind, cflags) @@ -3264,7 +3257,7 @@ function parse_atom(ps::ParseState, check_identifiers=true) # Heuristic recovery bump(ps) else - # Being inside quote makes keywords into identifiers at at the + # Being inside quote makes keywords into identifiers at the # first level of nesting # :end ==> (quote end) # :(end) ==> (quote (error (end))) @@ -3366,9 +3359,9 @@ function parse_atom(ps::ParseState, check_identifiers=true) elseif is_string_delim(leading_kind) parse_string(ps, false) elseif leading_kind in KSet"` ```" - # `` ==> (macrocall core_@cmd "") - # `cmd` ==> (macrocall core_@cmd "cmd") - # ```cmd``` ==> (macrocall core_@cmd "cmd"-s) + # `` ==> (macrocall core_@cmd (cmdstring-r "")) + # `cmd` ==> (macrocall core_@cmd (cmdstring-r "cmd")) + # ```cmd``` ==> (macrocall core_@cmd (cmdstring-sr "cmd")) bump_invisible(ps, K"core_@cmd") parse_string(ps, true) emit(ps, mark, K"macrocall") diff --git a/JuliaSyntax/test/expr.jl b/JuliaSyntax/test/expr.jl index da2704af403f8..2995b2389ac94 100644 --- a/JuliaSyntax/test/expr.jl +++ b/JuliaSyntax/test/expr.jl @@ -144,4 +144,17 @@ LineNumberNode(2), :body)) end + + @testset "String conversions" begin + # String unwrapping / wrapping + @test parseall(Expr, "\"str\"", rule=:statement) == "str" + @test parseall(Expr, "\"\$(\"str\")\"", rule=:statement) == + Expr(:string, Expr(:string, "str")) + # Concatenation of string chunks in triple quoted cases + @test parseall(Expr, "```\n a\n b```", rule=:statement) == + Expr(:macrocall, GlobalRef(Core, Symbol("@cmd")), LineNumberNode(1), + "a\nb") + @test parseall(Expr, "\"\"\"\n a\n \$x\n b\n c\"\"\"", rule=:statement) == + Expr(:string, "a\n", :x, "\nb\nc") + end end diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index b7af890c68b5c..e567a72a4f195 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -33,7 +33,7 @@ tests = [ "a;b;c" => "(toplevel a b c)" "a;;;b;;" => "(toplevel a b)" """ "x" a ; "y" b """ => - """(toplevel (macrocall :(Core.var"@doc") "x" a) (macrocall :(Core.var"@doc") "y" b))""" + """(toplevel (macrocall :(Core.var"@doc") (string "x") a) (macrocall :(Core.var"@doc") (string "y") b))""" "x y" => "x (error-t y)" ], JuliaSyntax.parse_eq => [ @@ -140,8 +140,8 @@ tests = [ "(x-1)y" => "(call-i (call-i x - 1) * y)" "x'y" => "(call-i (' x) * y)" # errors - "\"a\"\"b\"" => "(call-i \"a\" * (error-t) \"b\")" - "\"a\"x" => "(call-i \"a\" * (error-t) x)" + "\"a\"\"b\"" => "(call-i (string \"a\") * (error-t) (string \"b\"))" + "\"a\"x" => "(call-i (string \"a\") * (error-t) x)" # Not juxtaposition - parse_juxtapose will consume only the first token. "x.3" => "x" "sqrt(2)2" => "(call sqrt 2)" @@ -254,9 +254,9 @@ tests = [ "@foo (x,y)" => "(macrocall @foo (tuple x y))" "A.@foo a b" => "(macrocall (. A (quote @foo)) a b)" "@A.foo a b" => "(macrocall (. A (quote @foo)) a b)" - "[@foo \"x\"]" => "(vect (macrocall @foo \"x\"))" - "[f (x)]" => "(hcat f x)" - "[f \"x\"]" => "(hcat f \"x\")" + "[@foo x]" => "(vect (macrocall @foo x))" + "[f (x)]" => "(hcat f x)" + "[f x]" => "(hcat f x)" # Macro names "@! x" => "(macrocall @! x)" "@.. x" => "(macrocall @.. x)" @@ -309,20 +309,20 @@ tests = [ "S{a,b}" => "(curly S a b)" "S {a}" => "(curly S (error-t) a)" # String macros - "x\"str\"" => """(macrocall @x_str "str")""" - "x`str`" => """(macrocall @x_cmd "str")""" - "x\"\"" => """(macrocall @x_str "")""" - "x``" => """(macrocall @x_cmd "")""" + "x\"str\"" => """(macrocall @x_str (string-r "str"))""" + "x`str`" => """(macrocall @x_cmd (cmdstring-r "str"))""" + "x\"\"" => """(macrocall @x_str (string-r ""))""" + "x``" => """(macrocall @x_cmd (cmdstring-r ""))""" # Triple quoted procesing for custom strings - "r\"\"\"\nx\"\"\"" => raw"""(macrocall @r_str "x")""" + "r\"\"\"\nx\"\"\"" => raw"""(macrocall @r_str (string-sr "x"))""" "r\"\"\"\n x\n y\"\"\"" => raw"""(macrocall @r_str (string-sr "x\n" "y"))""" "r\"\"\"\n x\\\n y\"\"\"" => raw"""(macrocall @r_str (string-sr "x\\\n" "y"))""" # Macro sufficies can include keywords and numbers - "x\"s\"y" => """(macrocall @x_str "s" "y")""" - "x\"s\"end" => """(macrocall @x_str "s" "end")""" - "x\"s\"in" => """(macrocall @x_str "s" "in")""" - "x\"s\"2" => """(macrocall @x_str "s" 2)""" - "x\"s\"10.0" => """(macrocall @x_str "s" 10.0)""" + "x\"s\"y" => """(macrocall @x_str (string-r "s") "y")""" + "x\"s\"end" => """(macrocall @x_str (string-r "s") "end")""" + "x\"s\"in" => """(macrocall @x_str (string-r "s") "in")""" + "x\"s\"2" => """(macrocall @x_str (string-r "s") 2)""" + "x\"s\"10.0" => """(macrocall @x_str (string-r "s") 10.0)""" ], JuliaSyntax.parse_resword => [ # In normal_context @@ -381,7 +381,7 @@ tests = [ "module do \n end" => "(module true (error (do)) (block))" "module \$A end" => "(module true (\$ A) (block))" "module A \n a \n b \n end" => "(module true A (block a b))" - """module A \n "x"\na\n end""" => """(module true A (block (macrocall :(Core.var"@doc") "x" a)))""" + """module A \n "x"\na\n end""" => """(module true A (block (macrocall :(Core.var"@doc") (string "x") a)))""" # export "export a" => "(export a)" "export @a" => "(export @a)" @@ -647,9 +647,9 @@ tests = [ # __dot__ macro "@. x y" => "(macrocall @__dot__ x y)" # cmd strings - "``" => "(macrocall :(Core.var\"@cmd\") \"\")" - "`cmd`" => "(macrocall :(Core.var\"@cmd\") \"cmd\")" - "```cmd```" => "(macrocall :(Core.var\"@cmd\") \"cmd\")" + "``" => "(macrocall :(Core.var\"@cmd\") (cmdstring-r \"\"))" + "`cmd`" => "(macrocall :(Core.var\"@cmd\") (cmdstring-r \"cmd\"))" + "```cmd```" => "(macrocall :(Core.var\"@cmd\") (cmdstring-sr \"cmd\"))" # literals "42" => "42" "1.0e-1000" => "0.0" @@ -706,15 +706,15 @@ tests = [ # parse_string "\"a \$(x + y) b\"" => "(string \"a \" (call-i x + y) \" b\")" "\"hi\$(\"ho\")\"" => "(string \"hi\" (string \"ho\"))" - "\"hi\$(\"\"\"ho\"\"\")\"" => "(string \"hi\" (string-s \"ho\"))" "\"a \$foo b\"" => "(string \"a \" foo \" b\")" "\"\$var\"" => "(string var)" "\"\$outer\"" => "(string outer)" "\"\$in\"" => "(string in)" - raw"\"\xqqq\"" => "✘" + raw"\"\xqqq\"" => "(string ✘)" # Triple-quoted dedenting: - "\"\"\"\nx\"\"\"" => "\"x\"" + "\"\"\"\nx\"\"\"" => raw"""(string-s "x")""" "\"\"\"\n\nx\"\"\"" => raw"""(string-s "\n" "x")""" + "```\n x\n y```" => raw"""(macrocall :(Core.var"@cmd") (cmdstring-sr "x\n" "y"))""" # Various newlines (\n \r \r\n) and whitespace (' ' \t) "\"\"\"\n x\n y\"\"\"" => raw"""(string-s "x\n" "y")""" "\"\"\"\r x\r y\"\"\"" => raw"""(string-s "x\n" "y")""" @@ -738,14 +738,14 @@ tests = [ "\"\"\"\n \$a \n \$b\"\"\"" => raw"""(string-s a " \n" b)""" "\"\"\"\n \$a\n \$b\n\"\"\"" => raw"""(string-s " " a "\n" " " b "\n")""" # Empty chunks after dedent are removed - "\"\"\"\n \n \"\"\"" => "\"\\n\"" + "\"\"\"\n \n \"\"\"" => "(string-s \"\\n\")" # Newline at end of string "\"\"\"\n x\n y\n\"\"\"" => raw"""(string-s " x\n" " y\n")""" # Empty strings, or empty after triple quoted processing - "\"\"" => "\"\"" - "\"\"\"\n \"\"\"" => "\"\"" + "\"\"" => "(string \"\")" + "\"\"\"\n \"\"\"" => "(string-s \"\")" # Missing delimiter - "\"str" => "\"str\" (error-t)" + "\"str" => "(string \"str\" (error-t))" # String interpolations "\"\$x\$y\$z\"" => "(string x y z)" "\"\$(x)\"" => "(string x)" @@ -756,17 +756,20 @@ tests = [ "\"a\\\r\nb\"" => raw"""(string "a" "b")""" "\"a\\\n \tb\"" => raw"""(string "a" "b")""" # Strings with only a single valid string chunk - "\"str\"" => "\"str\"" + "\"str\"" => "(string \"str\")" + "\"a\\\n\"" => "(string \"a\")" + "\"a\\\r\"" => "(string \"a\")" + "\"a\\\r\n\"" => "(string \"a\")" ], JuliaSyntax.parse_docstring => [ - """ "notdoc" ] """ => "\"notdoc\"" - """ "notdoc" \n] """ => "\"notdoc\"" - """ "notdoc" \n\n foo """ => "\"notdoc\"" - """ "doc" \n foo """ => """(macrocall :(Core.var"@doc") "doc" foo)""" - """ "doc" foo """ => """(macrocall :(Core.var"@doc") "doc" foo)""" + """ "notdoc" ] """ => "(string \"notdoc\")" + """ "notdoc" \n] """ => "(string \"notdoc\")" + """ "notdoc" \n\n foo """ => "(string \"notdoc\")" + """ "doc" \n foo """ => """(macrocall :(Core.var"@doc") (string "doc") foo)""" + """ "doc" foo """ => """(macrocall :(Core.var"@doc") (string "doc") foo)""" """ "doc \$x" foo """ => """(macrocall :(Core.var"@doc") (string "doc " x) foo)""" # Allow docstrings with embedded trailing whitespace trivia - "\"\"\"\n doc\n \"\"\" foo" => """(macrocall :(Core.var"@doc") "doc\\n" foo)""" + "\"\"\"\n doc\n \"\"\" foo" => """(macrocall :(Core.var"@doc") (string-s "doc\\n") foo)""" ], ] @@ -842,8 +845,8 @@ end # ɛµ normalizes to εμ @test test_parse(JuliaSyntax.parse_eq, "\u025B\u00B5()") == "(call \u03B5\u03BC)" @test test_parse(JuliaSyntax.parse_eq, "@\u025B\u00B5") == "(macrocall @\u03B5\u03BC)" - @test test_parse(JuliaSyntax.parse_eq, "\u025B\u00B5\"\"") == "(macrocall @\u03B5\u03BC_str \"\")" - @test test_parse(JuliaSyntax.parse_eq, "\u025B\u00B5``") == "(macrocall @\u03B5\u03BC_cmd \"\")" + @test test_parse(JuliaSyntax.parse_eq, "\u025B\u00B5\"\"") == "(macrocall @\u03B5\u03BC_str (string-r \"\"))" + @test test_parse(JuliaSyntax.parse_eq, "\u025B\u00B5``") == "(macrocall @\u03B5\u03BC_cmd (cmdstring-r \"\"))" # · and · normalize to ⋅ @test test_parse(JuliaSyntax.parse_eq, "a \u00B7 b") == "(call-i a \u22C5 b)" @test test_parse(JuliaSyntax.parse_eq, "a \u0387 b") == "(call-i a \u22C5 b)" From 428b04a7a381966b63e7a51a2297d2f86b1f85ff Mon Sep 17 00:00:00 2001 From: Olivier MATHIEU Date: Fri, 9 Sep 2022 08:36:29 +0200 Subject: [PATCH 0503/1109] Update link to last known work on the lowerer (JuliaLang/JuliaSyntax.jl#97) Updates README with a link to last lowerer proto (circa 2019), since the branch has been deleted on julia . It resolves directly to the last known change / cumulative known work on the lowerer --- JuliaSyntax/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index a19b082882746..ab426bfe84084 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -821,7 +821,7 @@ Here's a few links to relevant Julia issues. #### Lowering * A partial implementation of lowering in Julia https://github.com/JuliaLang/julia/pull/32201 — - some of this should be ported. + some of this should be ported. (Last commit at https://github.com/JuliaLang/julia/tree/df61138fcf97d03dcbbba10e962571af9700db56/ ) * The closure capture problem https://github.com/JuliaLang/julia/issues/15276 — would be interesting to see whether we can tackle some of the harder cases in a new implementation. From 9e5f2c42a055c226037334edc99f03a5cc57eb95 Mon Sep 17 00:00:00 2001 From: c42f Date: Fri, 9 Sep 2022 16:41:13 +1000 Subject: [PATCH 0504/1109] Remove many unnecessary allocations (JuliaLang/JuliaSyntax.jl#96) * Eliminate the use of Ref to communicate values out of the anonymous function passed to parse_brackets(). It seems this forces unnecessary allocation of the Ref captured by the closure. * Force specialization on `down` in `parse_assignment_with_initial_ex` to avoid an unnecessary generic dispatch there. This should probably bring down the total number of allocations during parsing to around O(log(N) + M) in the length of the file N and depth, M, of the AST. --- JuliaSyntax/src/parser.jl | 53 +++++++++++++++++++-------------------- 1 file changed, 26 insertions(+), 27 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index cc16480dfa646..0c4d24dafd0ad 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -540,7 +540,7 @@ function parse_assignment(ps::ParseState, down, equals_is_kw::Bool) parse_assignment_with_initial_ex(ps, mark, down, equals_is_kw) end -function parse_assignment_with_initial_ex(ps::ParseState, mark, down, equals_is_kw::Bool) +function parse_assignment_with_initial_ex(ps::ParseState, mark, down::T, equals_is_kw::Bool) where {T} # where => specialize on `down` t = peek_token(ps) k = kind(t) if !is_prec_assignment(k) @@ -1169,19 +1169,18 @@ function parse_unary_call(ps::ParseState) mark_before_paren = position(ps) bump(ps, TRIVIA_FLAG) # ( initial_semi = peek(ps) == K";" - is_call = Ref(false) - is_block = Ref(false) - parse_brackets(ps, K")") do had_commas, had_splat, num_semis, num_subexprs - is_call[] = had_commas || had_splat || initial_semi - is_block[] = !is_call[] && num_semis > 0 - return (needs_parameters=is_call[], - eq_is_kw_before_semi=is_call[], - eq_is_kw_after_semi=is_call[]) + opts = parse_brackets(ps, K")") do had_commas, had_splat, num_semis, num_subexprs + is_call = had_commas || had_splat || initial_semi + return (needs_parameters=is_call, + eq_is_kw_before_semi=is_call, + eq_is_kw_after_semi=is_call, + is_call=is_call, + is_block=!is_call && num_semis > 0) end # The precedence between unary + and any following infix ^ depends on # whether the parens are a function call or not - if is_call[] + if opts.is_call if preceding_whitespace(t2) # Whitespace not allowed before prefix function call bracket # + (a,b) ==> (call + (error) a b) @@ -1203,7 +1202,7 @@ function parse_unary_call(ps::ParseState) parse_factor_with_initial_ex(ps, mark) else # Unary function calls with brackets as grouping, not an arglist - if is_block[] + if opts.is_block # +(a;b) ==> (call + (block a b)) emit(ps, mark_before_paren, K"block") end @@ -1995,14 +1994,14 @@ function parse_function(ps::ParseState) # distinguish the cases here. bump(ps, TRIVIA_FLAG) is_empty_tuple = peek(ps, skip_newlines=true) == K")" - _is_anon_func = Ref(is_anon_func) - parse_brackets(ps, K")") do _, _, _, _ - _is_anon_func[] = peek(ps, 2) != K"(" - return (needs_parameters = _is_anon_func[], - eq_is_kw_before_semi = _is_anon_func[], - eq_is_kw_after_semi = _is_anon_func[]) + opts = parse_brackets(ps, K")") do _, _, _, _ + _is_anon_func = peek(ps, 2) != K"(" + return (needs_parameters = _is_anon_func, + eq_is_kw_before_semi = _is_anon_func, + eq_is_kw_after_semi = _is_anon_func, + is_anon_func=_is_anon_func) end - is_anon_func = _is_anon_func[] + is_anon_func = opts.is_anon_func if is_anon_func # function (x) body end ==> (function (tuple x) (block body)) # function (x,y) end ==> (function (tuple x y) (block)) @@ -2859,17 +2858,16 @@ function parse_paren(ps::ParseState, check_identifiers=true) # Deal with all other cases of tuple or block syntax via the generic # parse_brackets initial_semi = peek(ps) == K";" - is_tuple = Ref(false) - is_block = Ref(false) - parse_brackets(ps, K")") do had_commas, had_splat, num_semis, num_subexprs - is_tuple[] = had_commas || (had_splat && num_semis >= 1) || + opts = parse_brackets(ps, K")") do had_commas, had_splat, num_semis, num_subexprs + is_tuple = had_commas || (had_splat && num_semis >= 1) || (initial_semi && (num_semis == 1 || num_subexprs > 0)) - is_block[] = num_semis > 0 - return (needs_parameters=is_tuple[], + return (needs_parameters=is_tuple, eq_is_kw_before_semi=false, - eq_is_kw_after_semi=is_tuple[]) + eq_is_kw_after_semi=is_tuple, + is_tuple=is_tuple, + is_block=num_semis > 0) end - if is_tuple[] + if opts.is_tuple # Tuple syntax with commas # (x,) ==> (tuple x) # (x,y) ==> (tuple x y) @@ -2886,7 +2884,7 @@ function parse_paren(ps::ParseState, check_identifiers=true) # (a; b; c,d) ==> (tuple a (parameters b (parameters c d))) # (a=1, b=2; c=3) ==> (tuple (= a 1) (= b 2) (parameters (kw c 3))) emit(ps, mark, K"tuple") - elseif is_block[] + elseif opts.is_block # Blocks # (;;) ==> (block) # (a=1;) ==> (block (= a 1)) @@ -3016,6 +3014,7 @@ function parse_brackets(after_parse::Function, release_positions(ps.stream, params_marks) release_positions(ps.stream, eq_positions) bump_closing_token(ps, closing_kind) + return actions end is_indentation(b::UInt8) = (b == UInt8(' ') || b == UInt8('\t')) From d8b11f46f3006a7712a832d9bf860fafe46af7d9 Mon Sep 17 00:00:00 2001 From: Sebastian Pfitzner Date: Wed, 14 Sep 2022 12:46:48 +0200 Subject: [PATCH 0505/1109] rename parseall to parseall_throws --- JuliaSyntax/tools/check_all_packages.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/JuliaSyntax/tools/check_all_packages.jl b/JuliaSyntax/tools/check_all_packages.jl index 29e32922692a8..40f55bfaa5744 100644 --- a/JuliaSyntax/tools/check_all_packages.jl +++ b/JuliaSyntax/tools/check_all_packages.jl @@ -6,7 +6,7 @@ using JuliaSyntax, Logging # like Meta.parseall, but throws -function parseall(str) +function parseall_throws(str) pos = firstindex(str) exs = [] while pos <= lastindex(str) @@ -68,7 +68,7 @@ Logging.with_logger(logger) do push!(exceptions, ex) meta_parse = "success" try - parseall(file) + parseall_throws(file) catch err2 meta_parse = "fail" ex_count -= 1 From 7927445e223342b1a0538a96672f624b29faed59 Mon Sep 17 00:00:00 2001 From: c42f Date: Thu, 15 Sep 2022 05:53:40 +1000 Subject: [PATCH 0506/1109] Parse `do` blocks without desugaring the closure (JuliaLang/JuliaSyntax.jl#98) The reference parser represents `do` syntax with a closure for the second argument. However, the nested closure with `->` head is implied rather than present in the surface syntax, which suggests this is a premature desugaring step. Instead we emit a flatter three-argument form for `do`. --- JuliaSyntax/README.md | 17 +++++++++++++++++ JuliaSyntax/src/expr.jl | 9 +++++---- JuliaSyntax/src/parser.jl | 24 +++++++++++------------- JuliaSyntax/test/parser.jl | 8 ++++---- 4 files changed, 37 insertions(+), 21 deletions(-) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index ab426bfe84084..99acad9d8b69b 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -462,6 +462,23 @@ The same goes for command strings which are always wrapped in `K"cmdstring"` regardless of whether they have multiple pieces (due to triple-quoted dedenting) or otherwise. +### No desugaring of the closure in do blocks + +The reference parser represents `do` syntax with a closure for the second +argument. That is, + +```julia +f(x) do y + body +end +``` + +becomes `(do (call f x) (-> (tuple y) (block body)))` in the reference parser. + +However, the nested closure with `->` head is implied here rather than present +in the surface syntax, which suggests this is a premature desugaring step. +Instead we emit the flatter structure `(do (call f x) (tuple y) (block body))`. + ## More about syntax kinds We generally track the type of syntax nodes with a syntax "kind", stored diff --git a/JuliaSyntax/src/expr.jl b/JuliaSyntax/src/expr.jl index 9aeb248bf1a07..814a141d36a5d 100644 --- a/JuliaSyntax/src/expr.jl +++ b/JuliaSyntax/src/expr.jl @@ -218,15 +218,16 @@ function _to_expr(node::SyntaxNode, iteration_spec=false, need_linenodes=true) end elseif headsym == :module pushfirst!(args[3].args, loc) - end - if headsym == :inert || (headsym == :quote && length(args) == 1 && + elseif headsym == :inert || (headsym == :quote && length(args) == 1 && !(a1 = only(args); a1 isa Expr || a1 isa QuoteNode || a1 isa Bool # <- compat hack, Julia 1.4+ )) return QuoteNode(only(args)) - else - return Expr(headsym, args...) + elseif headsym == :do + @check length(args) == 3 + return Expr(:do, args[1], Expr(:->, args[2], args[3])) end + return Expr(headsym, args...) end Base.Expr(node::SyntaxNode) = _to_expr(node) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 86fe2e929f6fc..2d754e71b19d6 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -423,8 +423,7 @@ end # a \n b ==> (block a b) # # flisp: parse-block -function parse_block(ps::ParseState, down=parse_eq, mark=position(ps), - consume_end=false) +function parse_block(ps::ParseState, down=parse_eq, mark=position(ps)) parse_block_inner(ps::ParseState, down) emit(ps, mark, K"block") end @@ -1452,10 +1451,8 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) parse_call_arglist(ps, K")", is_macrocall) emit(ps, mark, is_macrocall ? K"macrocall" : K"call") if peek(ps) == K"do" - # f(x) do y body end ==> (do (call :f :x) (-> (tuple :y) (block :body))) - bump(ps, TRIVIA_FLAG) - parse_do(ps) - emit(ps, mark, K"do") + # f(x) do y body end ==> (do (call :f :x) (tuple :y) (block :body)) + parse_do(ps, mark) end if is_macrocall break @@ -2179,23 +2176,24 @@ function parse_catch(ps::ParseState) end # flisp: parse-do -function parse_do(ps::ParseState) +function parse_do(ps::ParseState, mark) + bump(ps, TRIVIA_FLAG) # do ps = normal_context(ps) - mark = position(ps) + m = position(ps) if peek(ps) in KSet"NewlineWs ;" - # f() do\nend ==> (do (call f) (-> (tuple) (block))) - # f() do ; body end ==> (do (call f) (-> (tuple) (block body))) + # f() do\nend ==> (do (call f) (tuple) (block)) + # f() do ; body end ==> (do (call f) (tuple) (block body)) # this trivia needs to go into the tuple due to the way position() # works. bump(ps, TRIVIA_FLAG) else - # f() do x, y\n body end ==> (do (call f) (-> (tuple x y) (block body))) + # f() do x, y\n body end ==> (do (call f) (tuple x y) (block body)) parse_comma_separated(ps, parse_range) end - emit(ps, mark, K"tuple") + emit(ps, m, K"tuple") parse_block(ps) bump_closing_token(ps, K"end") - emit(ps, mark, K"->") + emit(ps, mark, K"do") end function macro_name_kind(k) diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index dbd56c27569a0..24383a25ca538 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -242,10 +242,10 @@ tests = [ "f(a).g(b)" => "(call (. (call f a) (quote g)) b)" "\$A.@x" => "(macrocall (. (\$ A) (quote @x)))" # do - "f() do\nend" => "(do (call f) (-> (tuple) (block)))" - "f() do ; body end" => "(do (call f) (-> (tuple) (block body)))" - "f() do x, y\n body end" => "(do (call f) (-> (tuple x y) (block body)))" - "f(x) do y body end" => "(do (call f x) (-> (tuple y) (block body)))" + "f() do\nend" => "(do (call f) (tuple) (block))" + "f() do ; body end" => "(do (call f) (tuple) (block body))" + "f() do x, y\n body end" => "(do (call f) (tuple x y) (block body))" + "f(x) do y body end" => "(do (call f x) (tuple y) (block body))" # Keyword arguments depend on call vs macrocall "foo(a=1)" => "(call foo (kw a 1))" "@foo(a=1)" => "(macrocall @foo (= a 1))" From a699a4e3c7cc1acb63c8282e1b36aa1fac9a2193 Mon Sep 17 00:00:00 2001 From: c42f Date: Sat, 17 Sep 2022 12:13:03 +1000 Subject: [PATCH 0507/1109] Remove Mmap as a dependency (JuliaLang/JuliaSyntax.jl#100) We can use Mmap for mapping IOStream but * It's not clear that this is necessary * Removing it avoids stdlib dependencies, making Base integration easier. --- JuliaSyntax/Project.toml | 1 - JuliaSyntax/src/JuliaSyntax.jl | 2 -- JuliaSyntax/src/parse_stream.jl | 4 ---- 3 files changed, 7 deletions(-) diff --git a/JuliaSyntax/Project.toml b/JuliaSyntax/Project.toml index d5920354fd2c3..9b175d9d4a5b6 100644 --- a/JuliaSyntax/Project.toml +++ b/JuliaSyntax/Project.toml @@ -7,7 +7,6 @@ version = "0.1.0" julia = "1.6" [deps] -Mmap = "a63ad114-7e13-5084-954f-fe012c677804" [extras] Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" diff --git a/JuliaSyntax/src/JuliaSyntax.jl b/JuliaSyntax/src/JuliaSyntax.jl index da57cead8874a..6b4f93d4a2fe9 100644 --- a/JuliaSyntax/src/JuliaSyntax.jl +++ b/JuliaSyntax/src/JuliaSyntax.jl @@ -1,7 +1,5 @@ module JuliaSyntax -using Mmap - # Helper utilities include("utils.jl") diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index be4a49605803a..6665c3fa5f0f8 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -250,10 +250,6 @@ function ParseStream(io::Base.GenericIOBuffer; version=VERSION) textbuf = unsafe_wrap(Vector{UInt8}, pointer(io.data), length(io.data)) ParseStream(textbuf, io, position(io)+1, version) end -function ParseStream(io::IOStream; version=VERSION) - textbuf = Mmap.mmap(io) - ParseStream(textbuf, io, position(io)+1, version) -end function ParseStream(io::IO; version=VERSION) textbuf = read(io) ParseStream(textbuf, textbuf, 1, version) From 9358462ffb8fb0821ecfc670be40f4a4252eefbb Mon Sep 17 00:00:00 2001 From: c42f Date: Mon, 19 Sep 2022 14:34:04 +1000 Subject: [PATCH 0508/1109] Fix lineno type in calling Core.Compiler.fl_parse --- JuliaSyntax/src/hooks.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/JuliaSyntax/src/hooks.jl b/JuliaSyntax/src/hooks.jl index 2b024de79aec6..1a183a09fca76 100644 --- a/JuliaSyntax/src/hooks.jl +++ b/JuliaSyntax/src/hooks.jl @@ -46,7 +46,7 @@ end # https://github.com/JuliaLang/julia/pull/43876 # Prior to this, the following signature was needed: function core_parser_hook(code, filename, offset, options) - core_parser_hook(code, filename, LineNumberNode(0), offset, options) + core_parser_hook(code, filename, 1, offset, options) end # Debug log file for dumping parsed code @@ -96,6 +96,7 @@ function _core_parser_hook(code, filename, lineno, offset, options) e = Expr(:error, ParseError(SourceFile(code, filename=filename), stream.diagnostics)) ex = options === :all ? Expr(:toplevel, e) : e else + # FIXME: Add support to lineno to this tree build (via SourceFile?) ex = build_tree(Expr, stream, filename=filename, wrap_toplevel_as_kind=K"None") if Meta.isexpr(ex, :None) # The None wrapping is only to give somewhere for trivia to be From 83cfaf87f2c7b6e0ca3b09cf629bfeab14cd3e99 Mon Sep 17 00:00:00 2001 From: c42f Date: Mon, 19 Sep 2022 15:28:44 +1000 Subject: [PATCH 0509/1109] Fix initialization of character byte offsets in lexer --- JuliaSyntax/src/tokenize.jl | 6 +++--- JuliaSyntax/test/tokenize.jl | 6 ++++++ 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/JuliaSyntax/src/tokenize.jl b/JuliaSyntax/src/tokenize.jl index df7d8e6243fa4..9c2bfd9336a7f 100644 --- a/JuliaSyntax/src/tokenize.jl +++ b/JuliaSyntax/src/tokenize.jl @@ -98,13 +98,13 @@ function Lexer(io::IO) c2 = read(io, Char) p2 = position(io) if eof(io) - c3, p3 = EOF_CHAR, p1 - c4, p4 = EOF_CHAR, p1 + c3, p3 = EOF_CHAR, p2 + c4, p4 = EOF_CHAR, p2 else c3 = read(io, Char) p3 = position(io) if eof(io) - c4, p4 = EOF_CHAR, p1 + c4, p4 = EOF_CHAR, p3 else c4 = read(io, Char) p4 = position(io) diff --git a/JuliaSyntax/test/tokenize.jl b/JuliaSyntax/test/tokenize.jl index 2d51938379e44..43e87389ee699 100644 --- a/JuliaSyntax/test/tokenize.jl +++ b/JuliaSyntax/test/tokenize.jl @@ -897,6 +897,12 @@ end ] end +@testset "lexer initialization" begin + # Ranges of EndMarker + @test (t = last(collect(tokenize("+"))); (t.startbyte, t.endbyte)) == (1,0) + @test (t = last(collect(tokenize("+*"))); (t.startbyte, t.endbyte)) == (2,1) +end + @testset "dotop miscellanea" begin @test strtok("a .-> b") == ["a", " ", ".-", ">", " ", "b", ""] @test strtok(".>: b") == [".>:", " ", "b", ""] From 440f4e1bfbc854cab17c2fa8216e18f2e20db8d4 Mon Sep 17 00:00:00 2001 From: c42f Date: Wed, 21 Sep 2022 10:53:01 +1000 Subject: [PATCH 0510/1109] Generate Expr(:incomplete) for errors which hit EOF (JuliaLang/JuliaSyntax.jl#102) This allows REPL completion to work correctly. It works by pattern matching the parse tree, rather than hard coding incomplete expression detection into the parser itself. There's still more changes from JuliaLang/JuliaSyntax.jl#88 which would help make this nicer but for now it works. The tests here are somewhat derived from Base, but had to be reviewed and tweaked because they turn out to not really be consistent. For example, "begin;" and "begin" are both the prefix of a block construct, one of them shouldn't come out as `:other`. --- JuliaSyntax/src/hooks.jl | 94 ++++++++++++++++++++++++++++++++-- JuliaSyntax/src/parser_api.jl | 8 ++- JuliaSyntax/src/syntax_tree.jl | 5 +- JuliaSyntax/test/hooks.jl | 63 ++++++++++++++++++++++- 4 files changed, 163 insertions(+), 7 deletions(-) diff --git a/JuliaSyntax/src/hooks.jl b/JuliaSyntax/src/hooks.jl index 1a183a09fca76..41499061d9fdb 100644 --- a/JuliaSyntax/src/hooks.jl +++ b/JuliaSyntax/src/hooks.jl @@ -1,6 +1,76 @@ # This file provides an adaptor to match the API expected by the Julia runtime # code in the binding Core._parse +# Find the first error in a SyntaxNode tree, returning the index of the error +# within its parent and the node itself. +function _first_error(t::SyntaxNode) + if is_error(t) + return 0,t + end + if haschildren(t) + for (i,c) in enumerate(children(t)) + if is_error(c) + return i,c + else + x = _first_error(c) + if x != (0,nothing) + return x + end + end + end + end + return 0,nothing +end + +# Classify an incomplete expression, returning a Symbol compatible with +# Base.incomplete_tag(). +# +# Roughly, the intention here is to classify which expression head is expected +# next if the incomplete stream was to continue. (Though this is just rough. In +# practice several categories are combined for the purposes of the REPL - +# perhaps we can/should do something more precise in the future.) +function _incomplete_tag(n::SyntaxNode) + i,c = _first_error(n) + if isnothing(c) + return :none + end + # TODO: Check error hits last character + if kind(c) == K"error" && begin + cs = children(c) + length(cs) > 0 + end + k1 = kind(cs[1]) + if k1 == K"ErrorEofMultiComment" + return :comment + elseif k1 == K"ErrorEofChar" + # TODO: Make this case into an internal node + return :char + end + for cc in cs + if kind(cc) == K"error" + return :other + end + end + end + kp = kind(c.parent) + if kp == K"string" + return :string + elseif kp == K"cmdstring" + return :cmd + elseif kp in KSet"block quote let try" + return :block + elseif kp in KSet"for while function if" + return i == 1 ? :other : :block + elseif kp in KSet"module struct" + return i == 2 ? :other : :block + elseif kp == K"do" + return i < 3 ? :other : :block + else + return :other + end +end + +#------------------------------------------------------------------------------- @static if isdefined(Core, :_setparser!) const _set_core_parse_hook = Core._setparser! elseif isdefined(Core, :set_parser) @@ -93,11 +163,29 @@ function _core_parser_hook(code, filename, lineno, offset, options) end if any_error(stream) - e = Expr(:error, ParseError(SourceFile(code, filename=filename), stream.diagnostics)) - ex = options === :all ? Expr(:toplevel, e) : e + tree = build_tree(SyntaxNode, stream, wrap_toplevel_as_kind=K"None") + _,err = _first_error(tree) + # In the flisp parser errors are normally `Expr(:error, msg)` where + # `msg` is a String. By using a ParseError for msg we can do fancy + # error reporting instead. + if last_byte(err) == lastindex(code) + tag = _incomplete_tag(tree) + # Here we replicate the particular messages + msg = + tag === :string ? "incomplete: invalid string syntax" : + tag === :comment ? "incomplete: unterminated multi-line comment #= ... =#" : + tag === :block ? "incomplete: construct requires end" : + tag === :cmd ? "incomplete: invalid \"`\" syntax" : + tag === :char ? "incomplete: invalid character literal" : + "incomplete: premature end of input" + error_ex = Expr(:incomplete, msg) + else + error_ex = Expr(:error, ParseError(stream, filename=filename)) + end + ex = options === :all ? Expr(:toplevel, error_ex) : error_ex else # FIXME: Add support to lineno to this tree build (via SourceFile?) - ex = build_tree(Expr, stream, filename=filename, wrap_toplevel_as_kind=K"None") + ex = build_tree(Expr, stream; filename=filename, wrap_toplevel_as_kind=K"None") if Meta.isexpr(ex, :None) # The None wrapping is only to give somewhere for trivia to be # attached; unwrap! diff --git a/JuliaSyntax/src/parser_api.jl b/JuliaSyntax/src/parser_api.jl index e70c62da43254..c05122be8e3a9 100644 --- a/JuliaSyntax/src/parser_api.jl +++ b/JuliaSyntax/src/parser_api.jl @@ -46,6 +46,11 @@ struct ParseError <: Exception diagnostics::Vector{Diagnostic} end +function ParseError(stream::ParseStream; filename=nothing) + source = SourceFile(sourcetext(stream), filename=filename) + ParseError(source, stream.diagnostics) +end + function Base.showerror(io::IO, err::ParseError, bt; backtrace=false) println(io, "ParseError:") show_diagnostics(io, err.diagnostics, err.source) @@ -156,8 +161,7 @@ function parseall(::Type{T}, input...; rule=:toplevel, version=VERSION, emit_diagnostic(stream, error="unexpected text after parsing $rule") end if any_error(stream.diagnostics) - source = SourceFile(sourcetext(stream, steal_textbuf=true), filename=filename) - throw(ParseError(source, stream.diagnostics)) + throw(ParseError(stream, filename=filename)) end # TODO: Figure out a more satisfying solution to the wrap_toplevel_as_kind # mess that we've got here. diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index a9cf33e48054f..6ec97e6b49f3f 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -120,6 +120,9 @@ children(node::SyntaxNode) = haschildren(node) ? node.val::Vector{SyntaxNode} : span(node::SyntaxNode) = span(node.raw) +first_byte(node::SyntaxNode) = node.position +last_byte(node::SyntaxNode) = node.position + span(node) - 1 + """ sourcetext(node) @@ -138,7 +141,7 @@ end function _show_syntax_node(io, current_filename, node::SyntaxNode, indent) fname = node.source.filename line, col = source_location(node.source, node.position) - posstr = "$(lpad(line, 4)):$(rpad(col,3))│$(lpad(node.position,6)):$(rpad(node.position+span(node)-1,6))│" + posstr = "$(lpad(line, 4)):$(rpad(col,3))│$(lpad(first_byte(node),6)):$(rpad(last_byte(node),6))│" val = node.val nodestr = haschildren(node) ? "[$(untokenize(head(node)))]" : isa(val, Symbol) ? string(val) : repr(val) diff --git a/JuliaSyntax/test/hooks.jl b/JuliaSyntax/test/hooks.jl index b93c0a17f8a54..2604e9f2fa2f1 100644 --- a/JuliaSyntax/test/hooks.jl +++ b/JuliaSyntax/test/hooks.jl @@ -31,8 +31,69 @@ # Check that Meta.parse throws the JuliaSyntax.ParseError rather than # Meta.ParseError when Core integration is enabled. - @test_throws JuliaSyntax.ParseError Meta.parse("[x") + @test_throws JuliaSyntax.ParseError Meta.parse("[x)") JuliaSyntax.enable_in_core!(false) end + + @testset "Expr(:incomplete)" begin + JuliaSyntax.enable_in_core!() + + @test Meta.isexpr(Meta.parse("[x"), :incomplete) + + for (str, tag) in [ + "" => :none + "\"" => :string + "\"\$foo" => :string + "#=" => :comment + "'" => :char + "'a" => :char + "`" => :cmd + "(" => :other + "[" => :other + "begin" => :block + "quote" => :block + "let" => :block + "let;" => :block + "for" => :other + "for x=xs" => :block + "function" => :other + "function f()" => :block + "macro" => :other + "macro f()" => :block + "f() do" => :other + "f() do x" => :block + "module" => :other + "module X" => :block + "baremodule" => :other + "baremodule X" => :block + "mutable struct" => :other + "mutable struct X" => :block + "struct" => :other + "struct X" => :block + "if" => :other + "if x" => :block + "while" => :other + "while x" => :block + "try" => :block + # could be `try x catch exc body end` or `try x catch ; body end` + "try x catch" => :block + "using" => :other + "import" => :other + "local" => :other + "global" => :other + + "1 == 2 ?" => :other + "1 == 2 ? 3 :" => :other + "1," => :other + "1, " => :other + "1,\n" => :other + "1, \n" => :other + ] + @testset "$(repr(str))" begin + @test Base.incomplete_tag(Meta.parse(str, raise=false)) == tag + end + end + JuliaSyntax.enable_in_core!(false) + end end From 347f9739bf9d9d3e20147db8304b0758435cf04d Mon Sep 17 00:00:00 2001 From: c42f Date: Thu, 22 Sep 2022 18:40:03 +1000 Subject: [PATCH 0511/1109] Fix project status in README + bump to 0.2 (JuliaLang/JuliaSyntax.jl#104) --- JuliaSyntax/Project.toml | 2 +- JuliaSyntax/README.md | 11 +++++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/JuliaSyntax/Project.toml b/JuliaSyntax/Project.toml index 9b175d9d4a5b6..1d6774fe98064 100644 --- a/JuliaSyntax/Project.toml +++ b/JuliaSyntax/Project.toml @@ -1,7 +1,7 @@ name = "JuliaSyntax" uuid = "70703baa-626e-46a2-a12c-08ffd08c73b4" authors = ["Chris Foster and contributors"] -version = "0.1.0" +version = "0.2.0" [compat] julia = "1.6" diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index 99acad9d8b69b..7ef4dbcd84e8a 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -27,10 +27,13 @@ A Julia frontend, written in Julia. ### Status -The library is in pre-0.1 stage, but parses all of Base correctly with only a -handful of failures remaining in the Base tests and standard library. -The tree data structures should be somewhat usable but will evolve as we try -out various use cases. +JuliaSyntax.jl is highly compatible with the Julia reference parser: It parses +all of Base and the standard libraries correctly and most of the General +registry. There's still a few known incompatibilities in the Base tests. + +The tree data structures are usable but their APIs will evolve as we try out +various use cases. Converting to `Expr` is always be possible and will be +stable if that helps for your use case. A talk from JuliaCon 2022 covered some aspects of this package. From b9d1908c9b9202708b763076b03b914a4823d817 Mon Sep 17 00:00:00 2001 From: c42f Date: Fri, 30 Sep 2022 10:28:04 +1000 Subject: [PATCH 0512/1109] Parse keyword args with `=` head rather than `kw` (JuliaLang/JuliaSyntax.jl#103) For Expr there's various cases where = is parsed into a kw head, but this is inconsistent especially when named tuples come into play. That is, the `=` parses to different heads: * as `=` in `(a=1, b=2)` * as `kw` in `f(a=1, b=2)` This causes extra complexity in the parser, and is visually confusing for macro writers. Instead, this change always parses `=` to the K"=" head, converting to the :kw head when lowering to Expr. This all seems to work fairly well, with one fairly obscure but awkward exception - in infix notation such as `(x = 1) != y` the equality is assignment, not a keyword argument. --- JuliaSyntax/src/expr.jl | 43 ++++++++----- JuliaSyntax/src/kinds.jl | 1 - JuliaSyntax/src/parser.jl | 126 +++++++++++-------------------------- JuliaSyntax/test/expr.jl | 96 +++++++++++++++++++++++----- JuliaSyntax/test/parser.jl | 21 ++----- 5 files changed, 148 insertions(+), 139 deletions(-) diff --git a/JuliaSyntax/src/expr.jl b/JuliaSyntax/src/expr.jl index 814a141d36a5d..48da0dbe8ed16 100644 --- a/JuliaSyntax/src/expr.jl +++ b/JuliaSyntax/src/expr.jl @@ -11,7 +11,8 @@ function is_stringchunk(node) return k == K"String" || k == K"CmdString" end -function _to_expr(node::SyntaxNode, iteration_spec=false, need_linenodes=true) +function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, + eq_to_kw=false, inside_dot_expr=false, inside_vect_or_braces=false) if !haschildren(node) val = node.val if val isa Union{Int128,UInt128,BigInt} @@ -20,7 +21,6 @@ function _to_expr(node::SyntaxNode, iteration_spec=false, need_linenodes=true) # representation of these. str = replace(sourcetext(node), '_'=>"") headsym = :macrocall - k = kind(node) macname = val isa Int128 ? Symbol("@int128_str") : val isa UInt128 ? Symbol("@uint128_str") : Symbol("@big_str") @@ -29,12 +29,15 @@ function _to_expr(node::SyntaxNode, iteration_spec=false, need_linenodes=true) return val end end - if kind(node) == K"?" + nodekind = kind(node) + if nodekind == K"?" headsym = :if + elseif nodekind == K"=" && !is_decorated(node) && eq_to_kw + headsym = :kw else headstr = untokenize(head(node), include_flag_suff=false) headsym = !isnothing(headstr) ? Symbol(headstr) : - error("Can't untokenize head of kind $(kind(node))") + error("Can't untokenize head of kind $(nodekind)") end node_args = children(node) if headsym == :string || headsym == :cmdstring @@ -89,13 +92,19 @@ function _to_expr(node::SyntaxNode, iteration_spec=false, need_linenodes=true) args = Vector{Any}(undef, length(node_args)*(insert_linenums ? 2 : 1)) if headsym == :for && length(node_args) == 2 # No line numbers in for loop iteration spec - args[1] = _to_expr(node_args[1], true, false) + args[1] = _to_expr(node_args[1], iteration_spec=true, need_linenodes=false) args[2] = _to_expr(node_args[2]) elseif headsym == :let && length(node_args) == 2 # No line numbers in let statement binding list - args[1] = _to_expr(node_args[1], false, false) + args[1] = _to_expr(node_args[1], need_linenodes=false) args[2] = _to_expr(node_args[2]) else + eq_to_kw = headsym == :call && !has_flags(node, INFIX_FLAG) || + headsym == :ref || + (headsym == :parameters && !inside_vect_or_braces) || + (headsym == :tuple && inside_dot_expr) + in_dot = headsym == :. + in_vb = headsym == :vect || headsym == :braces if insert_linenums if isempty(node_args) push!(args, source_location(LineNumberNode, node.source, node.position)) @@ -103,12 +112,18 @@ function _to_expr(node::SyntaxNode, iteration_spec=false, need_linenodes=true) for i in 1:length(node_args) n = node_args[i] args[2*i-1] = source_location(LineNumberNode, n.source, n.position) - args[2*i] = _to_expr(n) + args[2*i] = _to_expr(n, + eq_to_kw=eq_to_kw, + inside_dot_expr=in_dot, + inside_vect_or_braces=in_vb) end end else for i in 1:length(node_args) - args[i] = _to_expr(node_args[i]) + args[i] = _to_expr(node_args[i], + eq_to_kw=eq_to_kw, + inside_dot_expr=in_dot, + inside_vect_or_braces=in_vb) end end end @@ -118,8 +133,9 @@ function _to_expr(node::SyntaxNode, iteration_spec=false, need_linenodes=true) if is_infix(node.raw) args[2], args[1] = args[1], args[2] end + + # Special cases for various expression heads loc = source_location(LineNumberNode, node.source, node.position) - # Convert elements if headsym == :macrocall insert!(args, 2, loc) elseif headsym in (:call, :ref) @@ -128,7 +144,7 @@ function _to_expr(node::SyntaxNode, iteration_spec=false, need_linenodes=true) insert!(args, 2, args[end]) pop!(args) end - elseif headsym in (:tuple, :parameters, :vect) + elseif headsym in (:tuple, :parameters, :vect, :braces) # Move parameters blocks to args[1] if length(args) > 1 && Meta.isexpr(args[end], :parameters) pushfirst!(args, args[end]) @@ -181,7 +197,7 @@ function _to_expr(node::SyntaxNode, iteration_spec=false, need_linenodes=true) # elseif headsym == :string && length(args) == 1 && version <= (1,5) # Strip string from interpolations in 1.5 and lower to preserve # "hi$("ho")" ==> (string "hi" "ho") - elseif headsym == :(=) + elseif headsym == :(=) && !is_decorated(node) if is_eventually_call(args[1]) && !iteration_spec && !Meta.isexpr(args[2], :block) # Add block for short form function locations args[2] = Expr(:block, loc, args[2]) @@ -191,10 +207,7 @@ function _to_expr(node::SyntaxNode, iteration_spec=false, need_linenodes=true) args[1] = Expr(:block, loc, args[1]) elseif headsym == :(->) if Meta.isexpr(args[2], :block) - parent = node.parent - if parent isa SyntaxNode && kind(parent) != K"do" - pushfirst!(args[2].args, loc) - end + pushfirst!(args[2].args, loc) else # Add block for source locations args[2] = Expr(:block, loc, args[2]) diff --git a/JuliaSyntax/src/kinds.jl b/JuliaSyntax/src/kinds.jl index 06a77c98da866..476ddc1e18dc2 100644 --- a/JuliaSyntax/src/kinds.jl +++ b/JuliaSyntax/src/kinds.jl @@ -875,7 +875,6 @@ const _kind_names = "string" # A string interior node (possibly containing interpolations) "cmdstring" # A cmd string node (containing delimiters plus string) "macrocall" - "kw" # the = in f(a=1) "parameters" # the list after ; in f(; a=1) "toplevel" "tuple" diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 2d754e71b19d6..7e62fd8033ef4 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -508,64 +508,57 @@ end # # flisp: parse-eq function parse_eq(ps::ParseState) - parse_assignment(ps, parse_comma, false) + parse_assignment(ps, parse_comma) end # parse_eq_star is used where commas are special, for example in an argument list # -# If an `(= x y)` node was emitted, returns the position of that node in the -# output list so that it can be changed to `(kw x y)` later if necessary. -# # flisp: parse-eq* -function parse_eq_star(ps::ParseState, equals_is_kw=false) +function parse_eq_star(ps::ParseState) k = peek(ps) k2 = peek(ps,2) if (is_literal(k) || k == K"Identifier") && k2 in KSet", ) } ]" # optimization: skip checking the whole precedence stack if we have a # simple token followed by a common closing token bump(ps) - return NO_POSITION else - return parse_assignment(ps, parse_pair, equals_is_kw) + parse_assignment(ps, parse_pair) end end # a = b ==> (= a b) # # flisp: parse-assignment -function parse_assignment(ps::ParseState, down, equals_is_kw::Bool) +function parse_assignment(ps::ParseState, down) mark = position(ps) down(ps) - parse_assignment_with_initial_ex(ps, mark, down, equals_is_kw) + parse_assignment_with_initial_ex(ps, mark, down) end -function parse_assignment_with_initial_ex(ps::ParseState, mark, down::T, equals_is_kw::Bool) where {T} # where => specialize on `down` +function parse_assignment_with_initial_ex(ps::ParseState, mark, down::T) where {T} # where => specialize on `down` t = peek_token(ps) k = kind(t) if !is_prec_assignment(k) - return NO_POSITION + return end if k == K"~" if ps.space_sensitive && !preceding_whitespace(peek_token(ps, 2)) # Unary ~ in space sensitive context is not assignment precedence # [a ~b] ==> (hcat a (call ~ b)) - return NO_POSITION + return end # ~ is the only non-syntactic assignment-precedence operator. # a ~ b ==> (call-i a ~ b) # [a ~ b c] ==> (hcat (call-i a ~ b) c) bump(ps) - parse_assignment(ps, down, equals_is_kw) + parse_assignment(ps, down) emit(ps, mark, K"call", INFIX_FLAG) - return NO_POSITION else # a += b ==> (+= a b) # a .= b ==> (.= a b) bump(ps, TRIVIA_FLAG) - parse_assignment(ps, down, equals_is_kw) - plain_eq = is_plain_equals(t) - equals_pos = emit(ps, mark, plain_eq && equals_is_kw ? K"kw" : k, flags(t)) - return plain_eq ? equals_pos : NO_POSITION + parse_assignment(ps, down) + emit(ps, mark, k, flags(t)) end end @@ -1186,8 +1179,6 @@ function parse_unary_call(ps::ParseState) opts = parse_brackets(ps, K")") do had_commas, had_splat, num_semis, num_subexprs is_call = had_commas || had_splat || initial_semi return (needs_parameters=is_call, - eq_is_kw_before_semi=is_call, - eq_is_kw_after_semi=is_call, is_call=is_call, is_block=!is_call && num_semis > 0) end @@ -1204,7 +1195,7 @@ function parse_unary_call(ps::ParseState) end # Prefix function calls for operators which are both binary and unary # +(a,b) ==> (call + a b) - # +(a=1,) ==> (call + (kw a 1)) + # +(a=1,) ==> (call + (= a 1)) # +(a...) ==> (call + (... a)) # +(a;b,c) ==> (call + a (parameters b c)) # +(;a) ==> (call + (parameters a)) @@ -1445,10 +1436,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) # f (a) ==> (call f (error-t) a b) bump_disallowed_space(ps) bump(ps, TRIVIA_FLAG) - # Keyword arguments depends on call vs macrocall - # foo(a=1) ==> (call foo (kw a 1)) - # @foo(a=1) ==> (macrocall @foo (= a 1)) - parse_call_arglist(ps, K")", is_macrocall) + parse_call_arglist(ps, K")") emit(ps, mark, is_macrocall ? K"macrocall" : K"call") if peek(ps) == K"do" # f(x) do y body end ==> (do (call :f :x) (tuple :y) (block :body)) @@ -1517,14 +1505,12 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) emit_diagnostic(ps, mark, error="dot call syntax not supported for macros") end - # Keyword params always use kw inside tuple in dot calls # f.(a,b) ==> (. f (tuple a b)) - # f.(a=1) ==> (. f (tuple (kw a 1))) # f. (x) ==> (. f (error-t) (tuple x)) bump_disallowed_space(ps) m = position(ps) bump(ps, TRIVIA_FLAG) - parse_call_arglist(ps, K")", is_macrocall) + parse_call_arglist(ps, K")") emit(ps, m, K"tuple") emit(ps, mark, K".") elseif k == K":" @@ -1595,7 +1581,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) # S {a} ==> (curly S (error-t) a) bump_disallowed_space(ps) bump(ps, TRIVIA_FLAG) - parse_call_arglist(ps, K"}", is_macrocall) + parse_call_arglist(ps, K"}") if is_macrocall # @S{a,b} ==> (macrocall S (braces a b)) emit(ps, m, K"braces") @@ -1944,7 +1930,7 @@ function parse_const_local_global(ps) # const x = 1 ==> (const (= x 1)) # global x ~ 1 ==> (global (call-i x ~ 1)) # global x += 1 ==> (global (+= x 1)) - parse_assignment_with_initial_ex(ps, beforevar_mark, parse_comma, false) + parse_assignment_with_initial_ex(ps, beforevar_mark, parse_comma) else # global x ==> (global x) # local x ==> (local x) @@ -2008,17 +1994,15 @@ function parse_function(ps::ParseState) is_empty_tuple = peek(ps, skip_newlines=true) == K")" opts = parse_brackets(ps, K")") do _, _, _, _ _is_anon_func = peek(ps, 2) != K"(" - return (needs_parameters = _is_anon_func, - eq_is_kw_before_semi = _is_anon_func, - eq_is_kw_after_semi = _is_anon_func, - is_anon_func=_is_anon_func) + return (needs_parameters = _is_anon_func, + is_anon_func = _is_anon_func) end is_anon_func = opts.is_anon_func if is_anon_func # function (x) body end ==> (function (tuple x) (block body)) # function (x,y) end ==> (function (tuple x y) (block)) - # function (x=1) end ==> (function (tuple (kw x 1)) (block)) - # function (;x=1) end ==> (function (tuple (parameters (kw x 1))) (block)) + # function (x=1) end ==> (function (tuple (= x 1)) (block)) + # function (;x=1) end ==> (function (tuple (parameters (= x 1))) (block)) emit(ps, def_mark, K"tuple") elseif is_empty_tuple # Weird case which is consistent with parse_paren but will be @@ -2491,13 +2475,11 @@ end # like parse-arglist, but with `for` parsed as a generator # # flisp: parse-call-arglist -function parse_call_arglist(ps::ParseState, closer, is_macrocall) +function parse_call_arglist(ps::ParseState, closer) ps = ParseState(ps, for_generator=true) parse_brackets(ps, closer) do _, _, _, _ - return (needs_parameters=true, - eq_is_kw_before_semi=!is_macrocall, - eq_is_kw_after_semi=true) + return (needs_parameters=true,) end end @@ -2513,9 +2495,7 @@ function parse_vect(ps::ParseState, closer) # [x=1, y=2] ==> (vect (= x 1) (= y 2)) # [x=1, ; y=2] ==> (vect (= x 1) (parameters (= y 2))) parse_brackets(ps, closer) do _, _, _, _ - return (needs_parameters=true, - eq_is_kw_before_semi=false, - eq_is_kw_after_semi=false) + return (needs_parameters=true,) end return (K"vect", EMPTY_FLAGS) end @@ -2875,8 +2855,6 @@ function parse_paren(ps::ParseState, check_identifiers=true) is_tuple = had_commas || (had_splat && num_semis >= 1) || (initial_semi && (num_semis == 1 || num_subexprs > 0)) return (needs_parameters=is_tuple, - eq_is_kw_before_semi=false, - eq_is_kw_after_semi=is_tuple, is_tuple=is_tuple, is_block=num_semis > 0) end @@ -2888,14 +2866,14 @@ function parse_paren(ps::ParseState, check_identifiers=true) # # Named tuple with initial semicolon # (;) ==> (tuple (parameters)) - # (; a=1) ==> (tuple (parameters (kw a 1))) + # (; a=1) ==> (tuple (parameters (= a 1))) # # Extra credit: nested parameters and frankentuples # (x...;) ==> (tuple (... x) (parameters)) # (x...; y) ==> (tuple (... x) (parameters y)) - # (; a=1; b=2) ==> (tuple (parameters (kw a 1) (parameters (kw b 2)))) + # (; a=1; b=2) ==> (tuple (parameters (= a 1) (parameters (= b 2)))) # (a; b; c,d) ==> (tuple a (parameters b (parameters c d))) - # (a=1, b=2; c=3) ==> (tuple (= a 1) (= b 2) (parameters (kw c 3))) + # (a=1, b=2; c=3) ==> (tuple (= a 1) (= b 2) (parameters (= c 3))) emit(ps, mark, K"tuple") elseif opts.is_block # Blocks @@ -2917,28 +2895,17 @@ end # Handle bracketed syntax inside any of () [] or {} where there's a mixture # of commas and semicolon delimiters. # -# For parentheses this is hard because there's various ambiguities depending on -# context. In general (X; Y) is difficult when X and Y are subexpressions -# possibly containing `,` and `=`. -# -# For example, (a=1; b=2) could be seen to parse four different ways! -# -# Function args: (kw a 1) (parameters (kw b 2)) -# Tuple-like: (= a 1) (parameters (kw b 2)) -# Block: (= a 1) (= b 2) -# [] vect-like: (= a 1) (parameters (= b 2)) +# For parentheses this is tricky because there's various cases to disambiguate, +# depending on outside context and the content of the brackets (number of +# semicolons, presence of commas or splats). The `after_parse` function must be +# provided by the caller to disambiguate these cases. # # Expressions (X; Y; Z) with more semicolons are also allowed by the flisp # parser and generally parse as nested parameters blocks. This is invalid Julia # syntax so the parse tree is pretty strange in these cases! Some macros # probably use it though. Example: # -# (a,b=1; c,d=2; e,f=3) ==> (tuple a (= b 1) (parameters c (kw d 2) (parameters e (kw f 3)))) -# -# Deciding which of these representations to use depends on both the prefix -# context and the contained expressions. To distinguish between blocks vs -# tuples we use the presence of `,` within the `;`-delimited sections: If -# there's commas, it's a tuple, otherwise a block. +# (a,b=1; c,d=2; e,f=3) ==> (tuple a (= b 1) (parameters c (= d 2) (parameters e (= f 3)))) # # flisp: parts of parse-paren- and parse-arglist function parse_brackets(after_parse::Function, @@ -2948,7 +2915,6 @@ function parse_brackets(after_parse::Function, where_enabled=true, whitespace_newline=true) params_marks = acquire_positions(ps.stream) - eq_positions = acquire_positions(ps.stream) last_eq_before_semi = 0 num_subexprs = 0 num_semis = 0 @@ -2963,9 +2929,6 @@ function parse_brackets(after_parse::Function, # Start of parameters list # a, b; c d ==> a b (parameters c d) push!(params_marks, position(ps)) - if num_semis == 0 - last_eq_before_semi = length(eq_positions) - end num_semis += 1 bump(ps, TRIVIA_FLAG) bump_trivia(ps) @@ -2974,14 +2937,11 @@ function parse_brackets(after_parse::Function, break else mark = position(ps) - eq_pos = parse_eq_star(ps) + parse_eq_star(ps) num_subexprs += 1 if num_subexprs == 1 had_splat = peek_behind(ps).kind == K"..." end - if eq_pos != NO_POSITION - push!(eq_positions, eq_pos) - end t = peek_token(ps, skip_newlines=true) k = kind(t) bump_trivia(ps) @@ -3002,32 +2962,16 @@ function parse_brackets(after_parse::Function, end end end - actions = after_parse(had_commas, had_splat, num_semis, num_subexprs) - if num_semis == 0 - last_eq_before_semi = length(eq_positions) - end - # Turn any K"=" into K"kw" as necessary - if actions.eq_is_kw_before_semi - # f(a=1) ==> (call f (kw a 1)) - for i=1:last_eq_before_semi - reset_node!(ps, eq_positions[i], kind=K"kw") - end - end - if actions.eq_is_kw_after_semi - for i = last_eq_before_semi+1:length(eq_positions) - reset_node!(ps, eq_positions[i], kind=K"kw") - end - end + opts = after_parse(had_commas, had_splat, num_semis, num_subexprs) # Emit nested parameter nodes if necessary - if actions.needs_parameters + if opts.needs_parameters for mark in Iterators.reverse(params_marks) emit(ps, mark, K"parameters") end end release_positions(ps.stream, params_marks) - release_positions(ps.stream, eq_positions) bump_closing_token(ps, closing_kind) - return actions + return opts end is_indentation(b::UInt8) = (b == UInt8(' ') || b == UInt8('\t')) diff --git a/JuliaSyntax/test/expr.jl b/JuliaSyntax/test/expr.jl index 2995b2389ac94..35d84de5116c0 100644 --- a/JuliaSyntax/test/expr.jl +++ b/JuliaSyntax/test/expr.jl @@ -1,3 +1,6 @@ +function parse_Expr(str) + parseall(Expr, str, rule=:statement) +end @testset "Expr conversion" begin @testset "Quote nodes" begin @@ -11,7 +14,7 @@ @testset "Line numbers" begin @testset "Blocks" begin - @test parseall(Expr, "begin a\nb\n\nc\nend", rule=:statement) == + @test parse_Expr("begin a\nb\n\nc\nend") == Expr(:block, LineNumberNode(1), :a, @@ -20,7 +23,7 @@ LineNumberNode(4), :c, ) - @test parseall(Expr, "begin end", rule=:statement) == + @test parse_Expr("begin end") == Expr(:block, LineNumberNode(1) ) @@ -33,7 +36,7 @@ :b, ) - @test parseall(Expr, "module A\n\nbody\nend", rule=:statement) == + @test parse_Expr("module A\n\nbody\nend") == Expr(:module, true, :A, @@ -46,7 +49,7 @@ end @testset "Function definition lines" begin - @test parseall(Expr, "function f()\na\n\nb\nend", rule=:statement) == + @test parse_Expr("function f()\na\n\nb\nend") == Expr(:function, Expr(:call, :f), Expr(:block, @@ -57,7 +60,7 @@ :b, ) ) - @test parseall(Expr, "f() = 1", rule=:statement) == + @test parse_Expr("f() = 1") == Expr(:(=), Expr(:call, :f), Expr(:block, @@ -67,14 +70,14 @@ ) # function/macro without methods - @test parseall(Expr, "function f end", rule=:statement) == + @test parse_Expr("function f end") == Expr(:function, :f) - @test parseall(Expr, "macro f end", rule=:statement) == + @test parse_Expr("macro f end") == Expr(:macro, :f) end @testset "elseif" begin - @test parseall(Expr, "if a\nb\nelseif c\n d\nend", rule=:statement) == + @test parse_Expr("if a\nb\nelseif c\n d\nend") == Expr(:if, :a, Expr(:block, @@ -92,7 +95,7 @@ end @testset "No line numbers in for/let bindings" begin - @test parseall(Expr, "for i=is, j=js\nbody\nend", rule=:statement) == + @test parse_Expr("for i=is, j=js\nbody\nend") == Expr(:for, Expr(:block, Expr(:(=), :i, :is), @@ -103,7 +106,7 @@ :body ) ) - @test parseall(Expr, "let i=is, j=js\nbody\nend", rule=:statement) == + @test parse_Expr("let i=is, j=js\nbody\nend") == Expr(:let, Expr(:block, Expr(:(=), :i, :is), @@ -119,7 +122,7 @@ @testset "Short form function line numbers" begin # A block is added to hold the line number node - @test parseall(Expr, "f() = xs", rule=:statement) == + @test parse_Expr("f() = xs") == Expr(:(=), Expr(:call, :f), Expr(:block, @@ -127,7 +130,7 @@ :xs)) # flisp parser quirk: In a for loop the block is not added, despite # this defining a short-form function. - @test parseall(Expr, "for f() = xs\nend", rule=:statement) == + @test parse_Expr("for f() = xs\nend") == Expr(:for, Expr(:(=), Expr(:call, :f), :xs), Expr(:block, @@ -136,7 +139,7 @@ end @testset "Long form anonymous functions" begin - @test parseall(Expr, "function (xs...)\nbody end", rule=:statement) == + @test parse_Expr("function (xs...)\nbody end") == Expr(:function, Expr(:..., :xs), Expr(:block, @@ -147,14 +150,73 @@ @testset "String conversions" begin # String unwrapping / wrapping - @test parseall(Expr, "\"str\"", rule=:statement) == "str" - @test parseall(Expr, "\"\$(\"str\")\"", rule=:statement) == + @test parse_Expr("\"str\"") == "str" + @test parse_Expr("\"\$(\"str\")\"") == Expr(:string, Expr(:string, "str")) # Concatenation of string chunks in triple quoted cases - @test parseall(Expr, "```\n a\n b```", rule=:statement) == + @test parse_Expr("```\n a\n b```") == Expr(:macrocall, GlobalRef(Core, Symbol("@cmd")), LineNumberNode(1), "a\nb") - @test parseall(Expr, "\"\"\"\n a\n \$x\n b\n c\"\"\"", rule=:statement) == + @test parse_Expr("\"\"\"\n a\n \$x\n b\n c\"\"\"") == Expr(:string, "a\n", :x, "\nb\nc") end + + @testset "do block conversion" begin + @test parse_Expr("f(x) do y\n body end") == + Expr(:do, Expr(:call, :f, :x), + Expr(:->, Expr(:tuple, :y), + Expr(:block, + LineNumberNode(2), + :body))) + end + + @testset "= to Expr(:kw) conversion" begin + # Call + @test parse_Expr("f(a=1)") == + Expr(:call, :f, Expr(:kw, :a, 1)) + @test parse_Expr("f(; b=2)") == + Expr(:call, :f, Expr(:parameters, Expr(:kw, :b, 2))) + @test parse_Expr("f(a=1; b=2)") == + Expr(:call, :f, Expr(:parameters, Expr(:kw, :b, 2)), Expr(:kw, :a, 1)) + + # Infix call = is not :kw + @test parse_Expr("(x=1) != 2") == + Expr(:call, :!=, Expr(:(=), :x, 1), 2) + + # Dotcall + @test parse_Expr("f.(a=1; b=2)") == + Expr(:., :f, Expr(:tuple, + Expr(:parameters, Expr(:kw, :b, 2)), + Expr(:kw, :a, 1))) + + # Named tuples + @test parse_Expr("(a=1,)") == + Expr(:tuple, Expr(:(=), :a, 1)) + @test parse_Expr("(a=1,; b=2)") == + Expr(:tuple, Expr(:parameters, Expr(:kw, :b, 2)), Expr(:(=), :a, 1)) + @test parse_Expr("(a=1,; b=2; c=3)") == + Expr(:tuple, + Expr(:parameters, + Expr(:parameters, Expr(:kw, :c, 3)), + Expr(:kw, :b, 2)), + Expr(:(=), :a, 1)) + + # ref + @test parse_Expr("x[i=j]") == + Expr(:ref, :x, Expr(:kw, :i, :j)) + + # vect/braces + @test parse_Expr("[a=1,; b=2]") == + Expr(:vect, + Expr(:parameters, Expr(:(=), :b, 2)), + Expr(:(=), :a, 1)) + @test parse_Expr("{a=1,; b=2}") == + Expr(:braces, + Expr(:parameters, Expr(:(=), :b, 2)), + Expr(:(=), :a, 1)) + + # dotted = is not :kw + @test parse_Expr("f(a .= 1)") == + Expr(:call, :f, Expr(:.=, :a, 1)) + end end diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 24383a25ca538..407a57b7d5c7c 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -175,7 +175,7 @@ tests = [ "*(x)" => "(call * x)" # Prefix function calls for operators which are both binary and unary "+(a,b)" => "(call + a b)" - "+(a=1,)" => "(call + (kw a 1))" + "+(a=1,)" => "(call + (= a 1))" "+(a...)" => "(call + (... a))" "+(a;b,c)" => "(call + a (parameters b c))" "+(;a)" => "(call + (parameters a))" @@ -246,9 +246,6 @@ tests = [ "f() do ; body end" => "(do (call f) (tuple) (block body))" "f() do x, y\n body end" => "(do (call f) (tuple x y) (block body))" "f(x) do y body end" => "(do (call f x) (tuple y) (block body))" - # Keyword arguments depend on call vs macrocall - "foo(a=1)" => "(call foo (kw a 1))" - "@foo(a=1)" => "(macrocall @foo (= a 1))" "@foo a b" => "(macrocall @foo a b)" "@foo (x)" => "(macrocall @foo x)" "@foo (x,y)" => "(macrocall @foo (tuple x y))" @@ -288,9 +285,7 @@ tests = [ "T[a b; c d]" => "(typed_vcat T (row a b) (row c d))" "T[x for x in xs]" => "(typed_comprehension T (generator x (= x xs)))" ((v=v"1.8",), "T[a ; b ;; c ; d]") => "(typed_ncat-2 T (nrow-1 a b) (nrow-1 c d))" - # Keyword params always use kw inside tuple in dot calls "f.(a,b)" => "(. f (tuple a b))" - "f.(a=1)" => "(. f (tuple (kw a 1)))" "f. (x)" => "(. f (error-t) (tuple x))" # Other dotted syntax "A.:+" => "(. A (quote +))" @@ -438,8 +433,8 @@ tests = [ "macro (\$f)() end" => "(macro (call (\$ f)) (block))" "function (x) body end"=> "(function (tuple x) (block body))" "function (x,y) end" => "(function (tuple x y) (block))" - "function (x=1) end" => "(function (tuple (kw x 1)) (block))" - "function (;x=1) end" => "(function (tuple (parameters (kw x 1))) (block))" + "function (x=1) end" => "(function (tuple (= x 1)) (block))" + "function (;x=1) end" => "(function (tuple (parameters (= x 1))) (block))" "function ()(x) end" => "(function (call (tuple) x) (block))" "function (:)() end" => "(function (call :) (block))" "function (x::T)() end"=> "(function (call (:: x T)) (block))" @@ -546,13 +541,13 @@ tests = [ "(x=1, y=2)" => "(tuple (= x 1) (= y 2))" # Named tuples with initial semicolon "(;)" => "(tuple (parameters))" - "(; a=1)" => "(tuple (parameters (kw a 1)))" + "(; a=1)" => "(tuple (parameters (= a 1)))" # Extra credit: nested parameters and frankentuples "(x...; y)" => "(tuple (... x) (parameters y))" "(x...;)" => "(tuple (... x) (parameters))" - "(; a=1; b=2)" => "(tuple (parameters (kw a 1) (parameters (kw b 2))))" + "(; a=1; b=2)" => "(tuple (parameters (= a 1) (parameters (= b 2))))" "(a; b; c,d)" => "(tuple a (parameters b (parameters c d)))" - "(a=1, b=2; c=3)" => "(tuple (= a 1) (= b 2) (parameters (kw c 3)))" + "(a=1, b=2; c=3)" => "(tuple (= a 1) (= b 2) (parameters (= c 3)))" # Block syntax "(;;)" => "(block)" "(a=1;)" => "(block (= a 1))" @@ -798,10 +793,6 @@ broken_tests = [ "'\\x80'" => "(error '\\x80')" "'ab'" => "(error 'ab')" ] - JuliaSyntax.parse_call => [ - # kw's in ref - "x[i=y]" => "(ref x (kw i y))" - ] JuliaSyntax.parse_juxtapose => [ # Want: "numeric constant \"10.\" cannot be implicitly multiplied because it ends with \".\"" "10.x" => "(error (call * 10.0 x))" From 925503116487eeb4fc5693dc6900aaa6c71bf5ac Mon Sep 17 00:00:00 2001 From: c42f Date: Fri, 30 Sep 2022 12:25:14 +1000 Subject: [PATCH 0513/1109] Stricter parsing of exception names in `catch $excname` (JuliaLang/JuliaSyntax.jl#106) --- JuliaSyntax/src/hooks.jl | 2 +- JuliaSyntax/src/parser.jl | 7 ++++++- JuliaSyntax/test/parser.jl | 1 + 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/JuliaSyntax/src/hooks.jl b/JuliaSyntax/src/hooks.jl index 41499061d9fdb..8011f60271b73 100644 --- a/JuliaSyntax/src/hooks.jl +++ b/JuliaSyntax/src/hooks.jl @@ -157,7 +157,7 @@ function _core_parser_hook(code, filename, lineno, offset, options) return Core.svec(nothing, last_byte(stream)) end end - JuliaSyntax.parse(stream; rule=rule) + parse(stream; rule=rule) if rule === :statement bump_trivia(stream) end diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 7e62fd8033ef4..b3b6d7f3c72f8 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -2154,7 +2154,12 @@ function parse_catch(ps::ParseState) else # try x catch e y end ==> (try (block x) e (block y) false false) # try x catch $e y end ==> (try (block x) ($ e) (block y) false false) - parse_identifier_or_interpolate(ps) + mark = position(ps) + parse_eq_star(ps) + if !(peek_behind(ps).kind in KSet"Identifier $") + # try x catch e+3 y end ==> (try (block x) (error (call-i e + 3)) (block y) false false) + emit(ps, mark, K"error", error="a variable name is expected after `catch`") + end end parse_block(ps) end diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 407a57b7d5c7c..87b39368c5e37 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -475,6 +475,7 @@ tests = [ "try x catch \n y end" => "(try (block x) false (block y) false false)" "try x catch e y end" => "(try (block x) e (block y) false false)" "try x catch \$e y end" => "(try (block x) (\$ e) (block y) false false)" + "try x catch e+3 y end" => "(try (block x) (error (call-i e + 3)) (block y) false false)" "try x finally y end" => "(try (block x) false false false (block y))" # v1.8 only ((v=v"1.8",), "try catch ; else end") => "(try (block) false (block) (block) false)" From 795ef850005c400756e9dceefb7e6a5f01ac0a6e Mon Sep 17 00:00:00 2001 From: c42f Date: Tue, 4 Oct 2022 14:28:42 +1000 Subject: [PATCH 0514/1109] Rework JuliaSyntax.parse() public API Rework JuliaSyntax.parse() public API `parse()` and `parseall()` were generally pretty inconvenient to use. This change reworks what I had called `parseall()` to be more similar to `Meta.parse()` and adds `parseall()` and `parseatom()` in analogy to the `Base.Meta` versions of these functions. The lower level function `parse!()` is provided to work with `ParseStream` for cases where more control is required. --- JuliaSyntax/README.md | 75 ++++++------ JuliaSyntax/src/hooks.jl | 2 +- JuliaSyntax/src/parse_stream.jl | 26 ++++- JuliaSyntax/src/parser_api.jl | 188 ++++++++++++------------------- JuliaSyntax/test/expr.jl | 72 ++++++------ JuliaSyntax/test/parse_stream.jl | 40 +++++-- JuliaSyntax/test/parser_api.jl | 95 ++++++++-------- JuliaSyntax/test/test_utils.jl | 13 ++- 8 files changed, 258 insertions(+), 253 deletions(-) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index 7ef4dbcd84e8a..ecce2b3a8e4bb 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -42,23 +42,24 @@ A talk from JuliaCon 2022 covered some aspects of this package. # Examples Here's what parsing of a small piece of code currently looks like in various -forms. We'll use the `parseall` convenience function to demonstrate, but -there's also a more flexible parsing interface with `JuliaSyntax.parse()`. +forms. We'll use the `JuliaSyntax.parse` function to demonstrate, there's also +`JuliaSyntax.parse!` offering more fine-grained control. First, a source-ordered AST with `SyntaxNode` (`call-i` in the dump here means the `call` has the infix `-i` flag): ```julia -julia> parseall(SyntaxNode, "(x + y)*z", filename="foo.jl") +julia> using JuliaSyntax: JuliaSyntax, SyntaxNode, GreenNode + +julia> JuliaSyntax.parse(SyntaxNode, "(x + y)*z", filename="foo.jl") line:col│ byte_range │ tree │ file_name - 1:1 │ 1:9 │[toplevel] │foo.jl - 1:1 │ 1:9 │ [call-i] - 1:2 │ 2:6 │ [call-i] - 1:2 │ 2:2 │ x - 1:4 │ 4:4 │ + - 1:6 │ 6:6 │ y - 1:8 │ 8:8 │ * - 1:9 │ 9:9 │ z + 1:1 │ 1:9 │[call-i] │foo.jl + 1:2 │ 2:6 │ [call-i] + 1:2 │ 2:2 │ x + 1:4 │ 4:4 │ + + 1:6 │ 6:6 │ y + 1:8 │ 8:8 │ * + 1:9 │ 9:9 │ z ``` Internally this has a full representation of all syntax trivia (whitespace and @@ -69,19 +70,18 @@ despite being important for parsing. ```julia julia> text = "(x + y)*z" - greentree = parseall(GreenNode, text) - 1:9 │[toplevel] - 1:9 │ [call] - 1:1 │ ( - 2:6 │ [call] - 2:2 │ Identifier ✔ - 3:3 │ Whitespace - 4:4 │ + ✔ - 5:5 │ Whitespace - 6:6 │ Identifier ✔ - 7:7 │ ) - 8:8 │ * ✔ - 9:9 │ Identifier ✔ + greentree = JuliaSyntax.parse(GreenNode, text) + 1:9 │[call] + 1:1 │ ( + 2:6 │ [call] + 2:2 │ Identifier ✔ + 3:3 │ Whitespace + 4:4 │ + ✔ + 5:5 │ Whitespace + 6:6 │ Identifier ✔ + 7:7 │ ) + 8:8 │ * ✔ + 9:9 │ Identifier ✔ ``` `GreenNode` stores only byte ranges, but the token strings can be shown by @@ -89,25 +89,24 @@ supplying the source text string: ```julia julia> show(stdout, MIME"text/plain"(), greentree, text) - 1:9 │[toplevel] - 1:9 │ [call] - 1:1 │ ( "(" - 2:6 │ [call] - 2:2 │ Identifier ✔ "x" - 3:3 │ Whitespace " " - 4:4 │ + ✔ "+" - 5:5 │ Whitespace " " - 6:6 │ Identifier ✔ "y" - 7:7 │ ) ")" - 8:8 │ * ✔ "*" - 9:9 │ Identifier ✔ "z" + 1:9 │[call] + 1:1 │ ( "(" + 2:6 │ [call] + 2:2 │ Identifier ✔ "x" + 3:3 │ Whitespace " " + 4:4 │ + ✔ "+" + 5:5 │ Whitespace " " + 6:6 │ Identifier ✔ "y" + 7:7 │ ) ")" + 8:8 │ * ✔ "*" + 9:9 │ Identifier ✔ "z" ``` Julia `Expr` can also be produced: ```julia -julia> parseall(Expr, "(x + y)*z") -:($(Expr(:toplevel, :((x + y) * z)))) +julia> JuliaSyntax.parse(Expr, "(x + y)*z") +:((x + y) * z) ``` # Using JuliaSyntax as the default parser diff --git a/JuliaSyntax/src/hooks.jl b/JuliaSyntax/src/hooks.jl index 8011f60271b73..9d65983cb632e 100644 --- a/JuliaSyntax/src/hooks.jl +++ b/JuliaSyntax/src/hooks.jl @@ -157,7 +157,7 @@ function _core_parser_hook(code, filename, lineno, offset, options) return Core.svec(nothing, last_byte(stream)) end end - parse(stream; rule=rule) + parse!(stream; rule=rule) if rule === :statement bump_trivia(stream) end diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index 6665c3fa5f0f8..c62e64b27eec7 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -155,10 +155,28 @@ const NO_POSITION = ParseStreamPosition(0, 0) #------------------------------------------------------------------------------- """ -ParseStream provides an IO interface for the parser. It -- Wraps the lexer with a lookahead buffer -- Removes insignificant whitespace and comment tokens, shifting them into the - output implicitly (newlines may be significant depending on `skip_newlines`) + ParseStream(text::AbstractString, index::Integer=1; version=VERSION) + ParseStream(text::IO; version=VERSION) + ParseStream(text::Vector{UInt8}, index::Integer=1; version=VERSION) + ParseStream(ptr::Ptr{UInt8}, len::Integer, index::Integer=1; version=VERSION) + +Construct a `ParseStream` from input which may come in various forms: +* An string (zero copy for `String` and `SubString`) +* An `IO` object (zero copy for `IOBuffer`). The `IO` object must be seekable. +* A buffer of bytes (zero copy). The caller is responsible for preserving + buffers passed as `(ptr,len)`. + +A byte `index` may be provided as the position to start parsing. + +ParseStream provides an IO interface for the parser which provides lexing of +the source text input into tokens, manages insignificant whitespace tokens on +behalf of the parser, and stores output tokens and tree nodes in a pair of +output arrays. + +`version` (default `VERSION`) may be used to set the syntax version to +any Julia version `>= v"1.0"`. We aim to parse all Julia syntax which has been +added after v"1.0", emitting an error if it's not compatible with the requested +`version`. """ mutable struct ParseStream # `textbuf` is a buffer of UTF-8 encoded text of the source code. This is a diff --git a/JuliaSyntax/src/parser_api.jl b/JuliaSyntax/src/parser_api.jl index c05122be8e3a9..9904e84a72b93 100644 --- a/JuliaSyntax/src/parser_api.jl +++ b/JuliaSyntax/src/parser_api.jl @@ -3,43 +3,6 @@ # This is defined separately from parser.jl so that: # * parser.jl doesn't need to refer to any tree data structures # * It's clear which parts are the public API -# -# What should the general parsing API look like? Some points to consider: -# -# * After parsing atoms or statements or most other internal rules, it's -# usual to start in the middle of the input text and end somewhere else in -# the middle of the input text. So we should taken an index for the start of -# parsing and supply an index back to the caller after parsing. -# -# * `parseall` is a special case where we expect to consume all the input. -# Perhaps this is the API which throws an error if we don't consume it all, -# and doesn't accept an index as input? -# -# * The ParseStream is the fundamental interface which wraps the code string -# and index up together for input and contains the output events, diagnostics -# and current stream position after parsing. The user should potentially be -# able to use this directly. It does, however assume a Julia-compatible token -# stream. -# -# * It could be useful to support an IO-based interface so that users can parse -# Julia code intermixed with other DSLs. Documenter.jl and string macros come -# to mind as examples which could use this. A tricky part is deciding where -# the input ends: For string macros this is done by the parser, but for -# Documenter it's probably just done beforehand according to the Markdown -# code block rules. -# -# * The API should have an interface where a simple string is passed in. How -# does SourceFile relate to this? -# -# * It's neat for `parse` to be overloadable to produce various output data -# structures; GreenNode, SyntaxNode, Expr, (etc?) in the same way that -# Base.parse can be used for non-Julia code. (Heh... though -# `Base.parse(Expr, "...")` would also make a certain amount of sense.) -# -# * What's the no-copy API look like? A String can be put into an IOBuffer via -# unsafe_wrap(Vector{UInt8}, str) ... A SubString likewise. Also there's the -# `codeunits` function to hold a GC-safe view of string data as an array (but -# we can't use a Vector{UInt8}) struct ParseError <: Exception source::SourceFile @@ -65,39 +28,19 @@ Base.display_error(io::IO, err::ParseError, bt) = Base.showerror(io, err, bt) """ - # Input and output: - stream = parse(stream::ParseStream; kws...) - (tree, diagnostics) = parse(TreeType, io::IOBuffer; kws...) - (tree, diagnostics, index) = parse(TreeType, str::AbstractString, [index::Integer]; kws...) - # Keywords - parse(...; rule=:toplevel, version=VERSION, ignore_trivia=true) - -Parse Julia source code from `input`, returning the output in a format -compatible with `input`: - -* When `input` is a `ParseStream`, the stream itself is returned and the - `ParseStream` interface can be used to process the output. -* When `input` is a seekable `IO` subtype, the output is `(tree, diagnostics)`. - The buffer `position` will be set to the next byte of input. -* When `input` is an `AbstractString, Integer`, or `Vector{UInt8}, Integer` the - output is `(tree, diagnostics, index)`, where `index` (default 1) is the next - byte of input. + parse!(stream::ParseStream; rule=:toplevel) + +Parse Julia source code from a [`ParseStream`](@ref) object. Output tree data +structures may be extracted from `stream` with the [`build_tree`](@ref) function. `rule` may be any of -* `toplevel` (default) — parse a whole "file" of top level statements. In this +* `:toplevel` (default) — parse a whole "file" of top level statements. In this mode, the parser expects to fully consume the input. -* `statement` — parse a single statement, or statements separated by semicolons. -* `atom` — parse a single syntax "atom": a literal, identifier, or +* `:statement` — parse a single statement, or statements separated by semicolons. +* `:atom` — parse a single syntax "atom": a literal, identifier, or parenthesized expression. - -`version` (default `VERSION`) may be used to set the syntax version to -any Julia version `>= v"1.0"`. We aim to parse all Julia syntax which has been -added after v"1.0", emitting an error if it's not compatible with the requested -`version`. - -See also [`parseall`](@ref) for a simpler but less powerful interface. """ -function parse(stream::ParseStream; rule::Symbol=:toplevel) +function parse!(stream::ParseStream; rule::Symbol=:toplevel) ps = ParseState(stream) if rule === :toplevel parse_toplevel(ps) @@ -111,56 +54,37 @@ function parse(stream::ParseStream; rule::Symbol=:toplevel) stream end -function parse(::Type{T}, io::IO; - rule::Symbol=:toplevel, version=VERSION, kws...) where {T} +""" + parse!(TreeType, io::IO; rule=:toplevel, version=VERSION) + +Parse Julia source code from a seekable `IO` object. The output is a tuple +`(tree, diagnostics)`. When `parse!` returns, the stream `io` is positioned +directly after the last byte which was consumed during parsing. +""" +function parse!(::Type{TreeType}, io::IO; + rule::Symbol=:toplevel, version=VERSION, kws...) where {TreeType} stream = ParseStream(io; version=version) - parse(stream; rule=rule) - tree = build_tree(T, stream; kws...) + parse!(stream; rule=rule) + tree = build_tree(TreeType, stream; kws...) seek(io, last_byte(stream)) tree, stream.diagnostics end -# Generic version of parse for all other cases where an index must be passed -# back - ie strings and buffers -function parse(::Type{T}, input...; - rule::Symbol=:toplevel, version=VERSION, kws...) where {T} - stream = ParseStream(input...; version=version) - parse(stream; rule=rule) - tree = build_tree(T, stream; kws...) - tree, stream.diagnostics, last_byte(stream) + 1 -end - - -""" - parseall(TreeType, input...; - rule=:toplevel, - version=VERSION, - ignore_trivia=true) - -Experimental convenience interface to parse `input` as Julia code, emitting an -error if the entire input is not consumed. `input` can be a string or any other -valid input to the `ParseStream` constructor. By default `parseall` will ignore -whitespace and comments before and after valid code but you can turn this off -by setting `ignore_trivia=false`. - -A `ParseError` will be thrown if any errors occurred during parsing. - -See [`parse`](@ref) for a more complete and powerful interface to the parser, -as well as a description of the `version` and `rule` keywords. -""" -function parseall(::Type{T}, input...; rule=:toplevel, version=VERSION, - ignore_trivia=true, filename=nothing) where {T} - stream = ParseStream(input...; version=version) +function _parse(rule::Symbol, need_eof::Bool, ::Type{T}, text, index=1; version=VERSION, + ignore_trivia=true, filename=nothing, ignore_warnings=false) where {T} + stream = ParseStream(text, index; version=version) if ignore_trivia && rule != :toplevel bump_trivia(stream, skip_newlines=true) empty!(stream) end - parse(stream; rule=rule) - if (ignore_trivia && peek(stream, skip_newlines=true) != K"EndMarker") || - (!ignore_trivia && (peek(stream, skip_newlines=false, skip_whitespace=false) != K"EndMarker")) - emit_diagnostic(stream, error="unexpected text after parsing $rule") + parse!(stream; rule=rule) + if need_eof + if (ignore_trivia && peek(stream, skip_newlines=true) != K"EndMarker") || + (!ignore_trivia && (peek(stream, skip_newlines=false, skip_whitespace=false) != K"EndMarker")) + emit_diagnostic(stream, error="unexpected text after parsing $rule") + end end - if any_error(stream.diagnostics) + if any_error(stream.diagnostics) || (!ignore_warnings && !isempty(stream.diagnostics)) throw(ParseError(stream, filename=filename)) end # TODO: Figure out a more satisfying solution to the wrap_toplevel_as_kind @@ -169,13 +93,51 @@ function parseall(::Type{T}, input...; rule=:toplevel, version=VERSION, # not absolute positions. # * Dropping it would be ok for SyntaxNode and Expr... tree = build_tree(T, stream; wrap_toplevel_as_kind=K"toplevel", filename=filename) - if !isempty(stream.diagnostics) - # Crudely format any warnings to the current logger. - buf = IOBuffer() - show_diagnostics(IOContext(buf, stdout), stream, - SourceFile(sourcetext(stream, steal_textbuf=true), filename=filename)) - @warn Text(String(take!(buf))) - end - tree + tree, last_byte(stream) + 1 end +""" + parse(TreeType, text, [index]; + version=VERSION, + ignore_trivia=true, + filename=nothing, + ignore_warnings=false) + + # Or, with the same arguments + parseall(...) + parseatom(...) + +Parse Julia source code string `text` into a data structure of type `TreeType`. +`parse` parses a single Julia statement, `parseall` parses top level statements +at file scope and `parseatom` parses a single Julia identifier or other "syntax +atom". + +If `text` is passed without `index`, all the input text must be consumed and a +tree data structure is returned. When an integer byte `index` is passed, a +tuple `(tree, next_index)` will be returned containing the next index in `text` +to resume parsing. By default whitespace and comments before and after valid +code are ignored but you can turn this off by setting `ignore_trivia=false`. + +`version` (default `VERSION`) may be used to set the syntax version to +any Julia version `>= v"1.0"`. We aim to parse all Julia syntax which has been +added after v"1.0", emitting an error if it's not compatible with the requested +`version`. + +Pass `filename` to set any file name information embedded within the output +tree, if applicable. This will also annotate errors and warnings with the +source file name. + +A `ParseError` will be thrown if any errors or warnings occurred during +parsing. To avoid exceptions due to warnings, use `ignore_warnings=true`. +""" +parse(::Type{T}, text::AbstractString; kws...) where {T} = _parse(:statement, true, T, text; kws...)[1] +parseall(::Type{T}, text::AbstractString; kws...) where {T} = _parse(:toplevel, true, T, text; kws...)[1] +parseatom(::Type{T}, text::AbstractString; kws...) where {T} = _parse(:atom, true, T, text; kws...)[1] + +@eval @doc $(@doc parse) parseall +@eval @doc $(@doc parse) parseatom + +parse(::Type{T}, text::AbstractString, index::Integer; kws...) where {T} = _parse(:statement, false, T, text, index; kws...) +parseall(::Type{T}, text::AbstractString, index::Integer; kws...) where {T} = _parse(:toplevel, false, T, text, index; kws...) +parseatom(::Type{T}, text::AbstractString, index::Integer; kws...) where {T} = _parse(:atom, false, T, text, index; kws...) + diff --git a/JuliaSyntax/test/expr.jl b/JuliaSyntax/test/expr.jl index 35d84de5116c0..850853f5cbc10 100644 --- a/JuliaSyntax/test/expr.jl +++ b/JuliaSyntax/test/expr.jl @@ -1,20 +1,16 @@ -function parse_Expr(str) - parseall(Expr, str, rule=:statement) -end - @testset "Expr conversion" begin @testset "Quote nodes" begin - @test parseall(Expr, ":(a)", rule=:atom) == QuoteNode(:a) - @test parseall(Expr, ":(:a)", rule=:atom) == Expr(:quote, QuoteNode(:a)) - @test parseall(Expr, ":(1+2)", rule=:atom) == Expr(:quote, Expr(:call, :+, 1, 2)) + @test parseatom(Expr, ":(a)") == QuoteNode(:a) + @test parseatom(Expr, ":(:a)") == Expr(:quote, QuoteNode(:a)) + @test parseatom(Expr, ":(1+2)") == Expr(:quote, Expr(:call, :+, 1, 2)) # Compatibility hack for VERSION >= v"1.4" # https://github.com/JuliaLang/julia/pull/34077 - @test parseall(Expr, ":true", rule=:atom) == Expr(:quote, true) + @test parseatom(Expr, ":true") == Expr(:quote, true) end @testset "Line numbers" begin @testset "Blocks" begin - @test parse_Expr("begin a\nb\n\nc\nend") == + @test parse(Expr, "begin a\nb\n\nc\nend") == Expr(:block, LineNumberNode(1), :a, @@ -23,7 +19,7 @@ end LineNumberNode(4), :c, ) - @test parse_Expr("begin end") == + @test parse(Expr, "begin end") == Expr(:block, LineNumberNode(1) ) @@ -36,7 +32,7 @@ end :b, ) - @test parse_Expr("module A\n\nbody\nend") == + @test parse(Expr, "module A\n\nbody\nend") == Expr(:module, true, :A, @@ -49,7 +45,7 @@ end end @testset "Function definition lines" begin - @test parse_Expr("function f()\na\n\nb\nend") == + @test parse(Expr, "function f()\na\n\nb\nend") == Expr(:function, Expr(:call, :f), Expr(:block, @@ -60,7 +56,7 @@ end :b, ) ) - @test parse_Expr("f() = 1") == + @test parse(Expr, "f() = 1") == Expr(:(=), Expr(:call, :f), Expr(:block, @@ -70,14 +66,14 @@ end ) # function/macro without methods - @test parse_Expr("function f end") == + @test parse(Expr, "function f end") == Expr(:function, :f) - @test parse_Expr("macro f end") == + @test parse(Expr, "macro f end") == Expr(:macro, :f) end @testset "elseif" begin - @test parse_Expr("if a\nb\nelseif c\n d\nend") == + @test parse(Expr, "if a\nb\nelseif c\n d\nend") == Expr(:if, :a, Expr(:block, @@ -95,7 +91,7 @@ end end @testset "No line numbers in for/let bindings" begin - @test parse_Expr("for i=is, j=js\nbody\nend") == + @test parse(Expr, "for i=is, j=js\nbody\nend") == Expr(:for, Expr(:block, Expr(:(=), :i, :is), @@ -106,7 +102,7 @@ end :body ) ) - @test parse_Expr("let i=is, j=js\nbody\nend") == + @test parse(Expr, "let i=is, j=js\nbody\nend") == Expr(:let, Expr(:block, Expr(:(=), :i, :is), @@ -122,7 +118,7 @@ end @testset "Short form function line numbers" begin # A block is added to hold the line number node - @test parse_Expr("f() = xs") == + @test parse(Expr, "f() = xs") == Expr(:(=), Expr(:call, :f), Expr(:block, @@ -130,7 +126,7 @@ end :xs)) # flisp parser quirk: In a for loop the block is not added, despite # this defining a short-form function. - @test parse_Expr("for f() = xs\nend") == + @test parse(Expr, "for f() = xs\nend") == Expr(:for, Expr(:(=), Expr(:call, :f), :xs), Expr(:block, @@ -139,7 +135,7 @@ end end @testset "Long form anonymous functions" begin - @test parse_Expr("function (xs...)\nbody end") == + @test parse(Expr, "function (xs...)\nbody end") == Expr(:function, Expr(:..., :xs), Expr(:block, @@ -150,19 +146,19 @@ end @testset "String conversions" begin # String unwrapping / wrapping - @test parse_Expr("\"str\"") == "str" - @test parse_Expr("\"\$(\"str\")\"") == + @test parse(Expr, "\"str\"") == "str" + @test parse(Expr, "\"\$(\"str\")\"") == Expr(:string, Expr(:string, "str")) # Concatenation of string chunks in triple quoted cases - @test parse_Expr("```\n a\n b```") == + @test parse(Expr, "```\n a\n b```") == Expr(:macrocall, GlobalRef(Core, Symbol("@cmd")), LineNumberNode(1), "a\nb") - @test parse_Expr("\"\"\"\n a\n \$x\n b\n c\"\"\"") == + @test parse(Expr, "\"\"\"\n a\n \$x\n b\n c\"\"\"") == Expr(:string, "a\n", :x, "\nb\nc") end @testset "do block conversion" begin - @test parse_Expr("f(x) do y\n body end") == + @test parse(Expr, "f(x) do y\n body end") == Expr(:do, Expr(:call, :f, :x), Expr(:->, Expr(:tuple, :y), Expr(:block, @@ -172,29 +168,29 @@ end @testset "= to Expr(:kw) conversion" begin # Call - @test parse_Expr("f(a=1)") == + @test parse(Expr, "f(a=1)") == Expr(:call, :f, Expr(:kw, :a, 1)) - @test parse_Expr("f(; b=2)") == + @test parse(Expr, "f(; b=2)") == Expr(:call, :f, Expr(:parameters, Expr(:kw, :b, 2))) - @test parse_Expr("f(a=1; b=2)") == + @test parse(Expr, "f(a=1; b=2)") == Expr(:call, :f, Expr(:parameters, Expr(:kw, :b, 2)), Expr(:kw, :a, 1)) # Infix call = is not :kw - @test parse_Expr("(x=1) != 2") == + @test parse(Expr, "(x=1) != 2") == Expr(:call, :!=, Expr(:(=), :x, 1), 2) # Dotcall - @test parse_Expr("f.(a=1; b=2)") == + @test parse(Expr, "f.(a=1; b=2)") == Expr(:., :f, Expr(:tuple, Expr(:parameters, Expr(:kw, :b, 2)), Expr(:kw, :a, 1))) # Named tuples - @test parse_Expr("(a=1,)") == + @test parse(Expr, "(a=1,)") == Expr(:tuple, Expr(:(=), :a, 1)) - @test parse_Expr("(a=1,; b=2)") == + @test parse(Expr, "(a=1,; b=2)") == Expr(:tuple, Expr(:parameters, Expr(:kw, :b, 2)), Expr(:(=), :a, 1)) - @test parse_Expr("(a=1,; b=2; c=3)") == + @test parse(Expr, "(a=1,; b=2; c=3)") == Expr(:tuple, Expr(:parameters, Expr(:parameters, Expr(:kw, :c, 3)), @@ -202,21 +198,21 @@ end Expr(:(=), :a, 1)) # ref - @test parse_Expr("x[i=j]") == + @test parse(Expr, "x[i=j]") == Expr(:ref, :x, Expr(:kw, :i, :j)) # vect/braces - @test parse_Expr("[a=1,; b=2]") == + @test parse(Expr, "[a=1,; b=2]") == Expr(:vect, Expr(:parameters, Expr(:(=), :b, 2)), Expr(:(=), :a, 1)) - @test parse_Expr("{a=1,; b=2}") == + @test parse(Expr, "{a=1,; b=2}") == Expr(:braces, Expr(:parameters, Expr(:(=), :b, 2)), Expr(:(=), :a, 1)) # dotted = is not :kw - @test parse_Expr("f(a .= 1)") == + @test parse(Expr, "f(a .= 1)") == Expr(:call, :f, Expr(:.=, :a, 1)) end end diff --git a/JuliaSyntax/test/parse_stream.jl b/JuliaSyntax/test/parse_stream.jl index 10c021f775df4..315b59c81eb9f 100644 --- a/JuliaSyntax/test/parse_stream.jl +++ b/JuliaSyntax/test/parse_stream.jl @@ -71,16 +71,34 @@ st = ParseStream(code) @test peek(st) == K"NewlineWs" bump(st, TRIVIA_FLAG) emit(st, p1, K"toplevel") -end - -@test JuliaSyntax.build_tree(GreenNode, st) isa JuliaSyntax.GreenNode -# ## Input code -#= -println("-----------------------") -print(code) -println() + @test build_tree(GreenNode, st) isa JuliaSyntax.GreenNode +end -# ## Output tree -show(stdout, MIME"text/plain"(), t, code, show_trivia=true) -=# +@testset "ParseStream constructors" begin + @testset "Byte buffer inputs" begin + # Vector{UInt8} + let + st = ParseStream(Vector{UInt8}("x+y")) + bump(st) + @test build_tree(Expr, st) == :x + @test JuliaSyntax.last_byte(st) == 1 + end + let + st = ParseStream(Vector{UInt8}("x+y"), 3) + bump(st) + @test build_tree(Expr, st) == :y + @test JuliaSyntax.last_byte(st) == 3 + end + # Ptr{UInt8}, len + code = "x+y" + GC.@preserve code begin + let + st = ParseStream(pointer(code), 3) + bump(st) + @test build_tree(Expr, st) == :x + @test JuliaSyntax.last_byte(st) == 1 + end + end + end +end diff --git a/JuliaSyntax/test/parser_api.jl b/JuliaSyntax/test/parser_api.jl index 0a3c49ecf8db0..1af46fbabc750 100644 --- a/JuliaSyntax/test/parser_api.jl +++ b/JuliaSyntax/test/parser_api.jl @@ -1,52 +1,56 @@ @testset "parser API" begin - @testset "String and buffer input" begin - # String - let - ex,diag,pos = parse(Expr, "x+y\nz") - @test JuliaSyntax.remove_linenums!(ex) == Expr(:toplevel, :(x+y), :z) - @test diag == [] - @test pos == 6 - end - @test parse(Expr, "x+y\nz", rule=:statement) == (:(x+y), [], 4) - @test parse(Expr, "x+y\nz", rule=:atom) == (:x, [], 2) - @test parse(Expr, "x+y\nz", 5, rule=:atom) == (:z, [], 6) - - # Vector{UInt8} - @test parse(Expr, Vector{UInt8}("x+y"), rule=:statement) == (:(x+y), [], 4) - @test parse(Expr, Vector{UInt8}("x+y"), 3, rule=:statement) == (:y, [], 4) - # Ptr{UInt8}, len - code = "x+y" - GC.@preserve code begin - stream = ParseStream(pointer(code), 3) - parse(stream, rule=:statement) - @test JuliaSyntax.build_tree(Expr, stream) == :(x+y) - @test JuliaSyntax.last_byte(stream) == 3 - end + @testset "parse with String input" begin + @test parse(Expr, " x ") == :x + @test JuliaSyntax.remove_linenums!(parseall(Expr, " x ")) == Expr(:toplevel, :x) + @test parseatom(Expr, " x ") == :x + # TODO: Fix this situation with trivia here; the brackets are trivia, but + # must be parsed to discover the atom inside. But in GreenTree we only + # place trivia as siblings of the leaf node with identifier `x`, not as + # children. + @test_broken parseatom(Expr, "(x)") == :x # SubString - @test parse(Expr, SubString("x+y"), rule=:statement) == (:(x+y), [], 4) - @test parse(Expr, SubString("x+y"), 1, rule=:atom) == (:x, [], 2) - @test parse(Expr, SubString("x+y"), 3, rule=:atom) == (:y, [], 4) - @test parse(Expr, SubString("x+y",3,3), 1, rule=:atom) == (:y, [], 2) - @test parse(Expr, SubString("α+x"), rule=:statement) == (:(α+x), [], 5) + @test parse(Expr, SubString("x+y")) == :(x+y) + @test parse(Expr, SubString("α+x")) == :(α+x) + @test parseatom(Expr, SubString("x+y",3,3)) == :y + + # Exceptions due to extra trailing syntax + @test_throws JuliaSyntax.ParseError parseatom(Expr, "x+y") + @test_throws JuliaSyntax.ParseError parse(Expr, "x+y\nz") + + # ignore_warnings flag + @test_throws JuliaSyntax.ParseError parse(Expr, "import . .A") + @test parse(Expr, "import . .A", ignore_warnings=true) == :(import ..A) + + # version selection + @test_throws JuliaSyntax.ParseError parse(Expr, "[a ;; b]", version=v"1.6") + @test parse(Expr, "[a ;; b]", version=v"1.7") == Expr(:ncat, 2, :a, :b) + + # filename + @test JuliaSyntax.parse(Expr, "begin\na\nend", filename="foo.jl") == + Expr(:block, LineNumberNode(2, Symbol("foo.jl")), :a) + + # ignore_trivia + @test parseatom(Expr, " x ", ignore_trivia=true) == :x + @test_throws JuliaSyntax.ParseError parseatom(Expr, " x ", ignore_trivia=false) end @testset "IO input" begin # IOBuffer io = IOBuffer("x+y") - @test parse(Expr, io, rule=:statement) == (:(x+y), []) + @test parse!(Expr, io, rule=:statement) == (:(x+y), []) @test position(io) == 3 io = IOBuffer("x+y") seek(io, 2) - @test parse(Expr, io, rule=:atom) == (:y, []) + @test parse!(Expr, io, rule=:atom) == (:y, []) @test position(io) == 3 # A GenericIOBuffer, not actually IOBuffer io = IOBuffer(SubString("x+y")) - @test parse(Expr, io, rule=:statement) == (:(x+y), []) + @test parse!(Expr, io, rule=:statement) == (:(x+y), []) @test position(io) == 3 # Another type of GenericIOBuffer io = IOBuffer(codeunits("x+y")) - @test parse(Expr, io, rule=:statement) == (:(x+y), []) + @test parse!(Expr, io, rule=:statement) == (:(x+y), []) @test position(io) == 3 # IOStream mktemp() do path, io @@ -54,23 +58,26 @@ close(io) open(path, "r") do io - @test parse(Expr, io, rule=:statement) == (:(x+y), []) + @test parse!(Expr, io, rule=:statement) == (:(x+y), []) @test position(io) == 3 end end end - @testset "parseall" begin - @test JuliaSyntax.remove_linenums!(parseall(Expr, " x ")) == Expr(:toplevel, :x) - @test parseall(Expr, " x ", rule=:statement) == :x - @test parseall(Expr, " x ", rule=:atom) == :x - # TODO: Fix this situation with trivia here; the brackets are trivia, but - # must be parsed to discover the atom inside. But in GreenTree we only - # place trivia as siblings of the leaf node with identifier `x`, not as - # children. - @test_broken parseall(Expr, "(x)", rule=:atom) == :x + @testset "parse with String and index input" begin + # String + let + ex,pos = parseall(Expr, "x+y\nz", 1) + @test JuliaSyntax.remove_linenums!(ex) == Expr(:toplevel, :(x+y), :z) + @test pos == 6 + end + @test parse(Expr, "x+y\nz", 1) == (:(x+y), 4) + @test parseatom(Expr, "x+y\nz", 1) == (:x, 2) + @test parseatom(Expr, "x+y\nz", 5) == (:z, 6) - @test_throws JuliaSyntax.ParseError parseall(Expr, "x+y", rule=:atom) - @test_throws JuliaSyntax.ParseError parseall(Expr, "x+y\nz", rule=:statement) + # SubString + @test parse(Expr, SubString("α+x\ny"), 1) == (:(α+x), 5) + @test parseatom(Expr, SubString("x+y"), 1) == (:x, 2) + @test parseatom(Expr, SubString("x+y"), 3) == (:y, 4) end end diff --git a/JuliaSyntax/test/test_utils.jl b/JuliaSyntax/test/test_utils.jl index ced5912ce6a6d..d8b52436b438b 100644 --- a/JuliaSyntax/test/test_utils.jl +++ b/JuliaSyntax/test/test_utils.jl @@ -7,8 +7,11 @@ using JuliaSyntax: # Parsing ParseStream, SourceFile, + parse!, parse, parseall, + parseatom, + build_tree, @K_str, # Nodes GreenNode, @@ -72,11 +75,13 @@ function parsers_agree_on_file(filename; show_diff=false) return true end try - ex, diagnostics, _ = parse(Expr, text, filename=filename) + stream = ParseStream(text) + parse!(stream) + ex = build_tree(Expr, stream, filename=filename) if show_diff && ex != fl_ex show_expr_text_diff(show, ex, fl_ex) end - return !JuliaSyntax.any_error(diagnostics) && + return !JuliaSyntax.any_error(stream) && JuliaSyntax.remove_linenums!(ex) == JuliaSyntax.remove_linenums!(fl_ex) catch exc @@ -111,7 +116,7 @@ function equals_flisp_parse(tree) node_text = sourcetext(tree) # Reparse with JuliaSyntax. This is a crude way to ensure we're not missing # some context from the parent node. - ex,_,_ = parse(Expr, node_text) + ex = parseall(Expr, node_text) fl_ex = fl_parseall(node_text) if Meta.isexpr(fl_ex, :error) return true # Something went wrong in reduction; ignore these cases 😬 @@ -156,7 +161,7 @@ function reduce_test(tree::SyntaxNode) end function reduce_test(text::AbstractString) - tree, _, _ = parse(SyntaxNode, text) + tree, = parseall(SyntaxNode, text) reduce_test(tree) end From 98d1d8ca185b0abd6cea2024db17f8dac21f4174 Mon Sep 17 00:00:00 2001 From: c42f Date: Thu, 6 Oct 2022 13:33:41 +1000 Subject: [PATCH 0515/1109] Tweaks to allow vendoring into Base (JuliaLang/JuliaSyntax.jl#119) * Simplify `@doc` usage for use in Base bootstrap. * Separate sysimage precompile into part which can be used in Base - when vendoring into base, pkgdir doesn't exist - we need to pass a path to where it's vendored instead. * Relative include for JuliaSyntax tests. This allows these files to be run during precompilation even when JuliaSyntax isn't a standalone module. --- JuliaSyntax/src/parser_api.jl | 8 +++++--- JuliaSyntax/sysimage/precompile.jl | 6 ++++++ JuliaSyntax/sysimage/precompile_exec.jl | 6 ++---- JuliaSyntax/test/parser.jl | 6 +++--- JuliaSyntax/test/test_utils.jl | 7 +++---- 5 files changed, 19 insertions(+), 14 deletions(-) create mode 100644 JuliaSyntax/sysimage/precompile.jl diff --git a/JuliaSyntax/src/parser_api.jl b/JuliaSyntax/src/parser_api.jl index 9904e84a72b93..50a7d8ed815c5 100644 --- a/JuliaSyntax/src/parser_api.jl +++ b/JuliaSyntax/src/parser_api.jl @@ -96,7 +96,7 @@ function _parse(rule::Symbol, need_eof::Bool, ::Type{T}, text, index=1; version= tree, last_byte(stream) + 1 end -""" +_parse_docs = """ parse(TreeType, text, [index]; version=VERSION, ignore_trivia=true, @@ -130,12 +130,14 @@ source file name. A `ParseError` will be thrown if any errors or warnings occurred during parsing. To avoid exceptions due to warnings, use `ignore_warnings=true`. """ + parse(::Type{T}, text::AbstractString; kws...) where {T} = _parse(:statement, true, T, text; kws...)[1] parseall(::Type{T}, text::AbstractString; kws...) where {T} = _parse(:toplevel, true, T, text; kws...)[1] parseatom(::Type{T}, text::AbstractString; kws...) where {T} = _parse(:atom, true, T, text; kws...)[1] -@eval @doc $(@doc parse) parseall -@eval @doc $(@doc parse) parseatom +@eval @doc $_parse_docs parse +@eval @doc $_parse_docs parseall +@eval @doc $_parse_docs parseatom parse(::Type{T}, text::AbstractString, index::Integer; kws...) where {T} = _parse(:statement, false, T, text, index; kws...) parseall(::Type{T}, text::AbstractString, index::Integer; kws...) where {T} = _parse(:toplevel, false, T, text, index; kws...) diff --git a/JuliaSyntax/sysimage/precompile.jl b/JuliaSyntax/sysimage/precompile.jl new file mode 100644 index 0000000000000..a1ae9555f33a2 --- /dev/null +++ b/JuliaSyntax/sysimage/precompile.jl @@ -0,0 +1,6 @@ +function precompile_JuliaSyntax(mod, juliasyntax_path) + Base.include(mod, joinpath(juliasyntax_path, "test", "test_utils.jl")) + Base.include(mod, joinpath(juliasyntax_path, "test", "parser.jl")) + JuliaSyntax.enable_in_core!() + Meta.parse("x+y+z-w .+ [a b c]") +end diff --git a/JuliaSyntax/sysimage/precompile_exec.jl b/JuliaSyntax/sysimage/precompile_exec.jl index 567d3d02ddbcd..99c8069341275 100644 --- a/JuliaSyntax/sysimage/precompile_exec.jl +++ b/JuliaSyntax/sysimage/precompile_exec.jl @@ -1,5 +1,3 @@ import JuliaSyntax -Base.include(@__MODULE__(), joinpath(pkgdir(JuliaSyntax), "test", "test_utils.jl")) -Base.include(@__MODULE__(), joinpath(pkgdir(JuliaSyntax), "test", "parser.jl")) -JuliaSyntax.enable_in_core!() -Meta.parse("x+y+z-w .+ [a b c]") +include("precompile.jl") +precompile_JuliaSyntax(@__MODULE__(), pkgdir(JuliaSyntax)) diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 87b39368c5e37..543cad0b03f3e 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -1,10 +1,10 @@ function test_parse(production, code; v=v"1.6") stream = ParseStream(code, version=v) - production(JuliaSyntax.ParseState(stream)) - t = JuliaSyntax.build_tree(GreenNode, stream, wrap_toplevel_as_kind=K"None") + production(ParseState(stream)) + t = build_tree(GreenNode, stream, wrap_toplevel_as_kind=K"None") source = SourceFile(code) s = SyntaxNode(source, t) - if JuliaSyntax.kind(s) == K"None" + if kind(s) == K"None" join([sprint(show, MIME("text/x.sexpression"), c) for c in children(s)], ' ') else sprint(show, MIME("text/x.sexpression"), s) diff --git a/JuliaSyntax/test/test_utils.jl b/JuliaSyntax/test/test_utils.jl index d8b52436b438b..a3ab3bbd434b3 100644 --- a/JuliaSyntax/test/test_utils.jl +++ b/JuliaSyntax/test/test_utils.jl @@ -1,11 +1,10 @@ using Test -using JuliaSyntax -using Base.Meta: @dump - -using JuliaSyntax: +# We need a relative include here as JuliaSyntax my come from Base. +using .JuliaSyntax: # Parsing ParseStream, + ParseState, SourceFile, parse!, parse, From 5117c6d465e33a7e668193327f6e33801b619939 Mon Sep 17 00:00:00 2001 From: c42f Date: Wed, 12 Oct 2022 10:33:41 +1000 Subject: [PATCH 0516/1109] Split char delimiters early and emit K"char" node (JuliaLang/JuliaSyntax.jl#121) Here we split off char delimiters in the tokenizer rather than re-parsing them later during value conversion. Also add a K"char" internal node to cover the delimiters and the literal char content in the green tree. This allows us to remove another special case token error kind (ErrorEofChar) and makes char representation in the tree similar to string representation. --- JuliaSyntax/README.md | 3 ++ JuliaSyntax/src/expr.jl | 3 ++ JuliaSyntax/src/hooks.jl | 5 ++- JuliaSyntax/src/kinds.jl | 3 +- JuliaSyntax/src/parser.jl | 33 +++++++++++++++--- JuliaSyntax/src/syntax_tree.jl | 4 +-- JuliaSyntax/src/tokenize.jl | 64 ++++++++++++++-------------------- JuliaSyntax/test/expr.jl | 8 +++++ JuliaSyntax/test/parser.jl | 8 +++++ JuliaSyntax/test/tokenize.jl | 35 ++++++++++++------- 10 files changed, 105 insertions(+), 61 deletions(-) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index ecce2b3a8e4bb..c66ccd1532332 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -741,6 +741,9 @@ parsing `key=val` pairs inside parentheses. :([(x, y) for $(Expr(:filter, :(y < x), :(x = 1:10), :(y = 1:10)))]) ``` +* The character `'` may be written without escaping as `'''` rather than + requiring the form `'\''`. + # Comparisons to other packages ### Official Julia compiler diff --git a/JuliaSyntax/src/expr.jl b/JuliaSyntax/src/expr.jl index 48da0dbe8ed16..2766c736566f1 100644 --- a/JuliaSyntax/src/expr.jl +++ b/JuliaSyntax/src/expr.jl @@ -239,6 +239,9 @@ function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, elseif headsym == :do @check length(args) == 3 return Expr(:do, args[1], Expr(:->, args[2], args[3])) + elseif headsym == :char + @check length(args) == 1 + return args[1] end return Expr(headsym, args...) end diff --git a/JuliaSyntax/src/hooks.jl b/JuliaSyntax/src/hooks.jl index 9d65983cb632e..6f0ecfe85b8c5 100644 --- a/JuliaSyntax/src/hooks.jl +++ b/JuliaSyntax/src/hooks.jl @@ -42,9 +42,6 @@ function _incomplete_tag(n::SyntaxNode) k1 = kind(cs[1]) if k1 == K"ErrorEofMultiComment" return :comment - elseif k1 == K"ErrorEofChar" - # TODO: Make this case into an internal node - return :char end for cc in cs if kind(cc) == K"error" @@ -57,6 +54,8 @@ function _incomplete_tag(n::SyntaxNode) return :string elseif kp == K"cmdstring" return :cmd + elseif kp == K"char" + return :char elseif kp in KSet"block quote let try" return :block elseif kp in KSet"for while function if" diff --git a/JuliaSyntax/src/kinds.jl b/JuliaSyntax/src/kinds.jl index 476ddc1e18dc2..86f2a931060d2 100644 --- a/JuliaSyntax/src/kinds.jl +++ b/JuliaSyntax/src/kinds.jl @@ -15,7 +15,6 @@ const _kind_names = "BEGIN_ERRORS" # Tokenization errors "ErrorEofMultiComment" - "ErrorEofChar" "ErrorInvalidNumericConstant" "ErrorInvalidOperator" "ErrorInvalidInterpolationTerminator" @@ -874,6 +873,7 @@ const _kind_names = "inert" # QuoteNode; not quasiquote "string" # A string interior node (possibly containing interpolations) "cmdstring" # A cmd string node (containing delimiters plus string) + "char" # A char string node (containing delims + char data) "macrocall" "parameters" # the list after ; in f(; a=1) "toplevel" @@ -1004,7 +1004,6 @@ const _nonunique_kind_names = Set([ K"Identifier" K"ErrorEofMultiComment" - K"ErrorEofChar" K"ErrorInvalidNumericConstant" K"ErrorInvalidOperator" K"ErrorInvalidInterpolationTerminator" diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index b3b6d7f3c72f8..f5e202a6804b7 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -1561,7 +1561,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) emit(ps, mark, K".") this_iter_valid_macroname = true end - elseif k == K"'" + elseif k == K"'" && !preceding_whitespace(t) if !is_suffixed(t) # f' ==> (' f) bump(ps, TRIVIA_FLAG) @@ -3148,7 +3148,7 @@ function parse_string(ps::ParseState, raw::Bool) else # Missing delimiter recovery # "str ==> (string "str" (error-t)) - bump_invisible(ps, K"error", TRIVIA_FLAG, error="Unterminated string literal") + bump_invisible(ps, K"error", TRIVIA_FLAG, error="unterminated string literal") end # String interpolations # "$x$y$z" ==> (string x y z) @@ -3197,7 +3197,32 @@ function parse_atom(ps::ParseState, check_identifiers=true) mark = position(ps) leading_kind = peek(ps) # todo: Reorder to put most likely tokens first? - if leading_kind == K":" + if leading_kind == K"'" + # char literal + bump(ps, TRIVIA_FLAG) + k = peek(ps) + if k == K"Char" + bump(ps) + if peek(ps) == K"'" + # 'a' ==> (char 'a') + # 'α' ==> (char 'α') + # '\xce\xb1' ==> (char 'α') + bump(ps, TRIVIA_FLAG) + else + # 'a ==> (char 'a' (error-t)) + bump_invisible(ps, K"error", TRIVIA_FLAG, + error="unterminated character literal") + end + elseif k == K"'" + # '' ==> (char (error)) + bump_invisible(ps, K"error", error="empty character literal") + else + # ' ==> (char (error)) + @check k == K"EndMarker" + bump_invisible(ps, K"error", error="unterminated character literal") + end + emit(ps, mark, K"char") + elseif leading_kind == K":" # symbol/expression quote # :foo ==> (quote foo) t = peek_token(ps, 2) @@ -3275,7 +3300,7 @@ function parse_atom(ps::ParseState, check_identifiers=true) bump(ps, TRIVIA_FLAG) else bump_invisible(ps, K"error", TRIVIA_FLAG, - error="Unterminated string literal") + error="unterminated string literal") end t = peek_token(ps) k = kind(t) diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index 6ec97e6b49f3f..002caa555fd7a 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -37,10 +37,10 @@ function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, position::In false elseif k == K"Char" v, err, _ = unescape_julia_string(val_str, false, false) - if err + if err || length(v) != 1 ErrorVal() else - v[2] + only(v) end elseif k == K"Identifier" if has_flags(head(raw), RAW_STRING_FLAG) diff --git a/JuliaSyntax/src/tokenize.jl b/JuliaSyntax/src/tokenize.jl index 9c2bfd9336a7f..9bbcb7d7db25a 100644 --- a/JuliaSyntax/src/tokenize.jl +++ b/JuliaSyntax/src/tokenize.jl @@ -15,7 +15,6 @@ include("tokenize_utils.jl") # Error kind => description TOKEN_ERROR_DESCRIPTION = Dict{Kind, String}( K"ErrorEofMultiComment" => "unterminated multi-line comment #= ... =#", - K"ErrorEofChar" => "unterminated character literal", K"ErrorInvalidNumericConstant" => "invalid numeric constant", K"ErrorInvalidOperator" => "invalid operator", K"ErrorInvalidInterpolationTerminator" => "interpolated variable ends with invalid character; use `\$(...)` instead", @@ -263,9 +262,9 @@ end Returns a `Token` of kind `kind` with contents `str` and starts a new `Token`. """ -function emit(l::Lexer, kind::Kind) +function emit(l::Lexer, kind::Kind, maybe_op=true) suffix = false - if optakessuffix(kind) + if optakessuffix(kind) && maybe_op while isopsuffix(peekchar(l)) readchar(l) suffix = true @@ -448,6 +447,11 @@ function lex_string_chunk(l) end return emit(l, K"Whitespace") elseif pc == state.delim && string_terminates(l, state.delim, state.triplestr) + if state.delim == '\'' && l.last_token == K"'" && dpeekchar(l)[2] == '\'' + # Handle ''' + readchar(l) + return emit(l, K"Char") + end # Terminate string pop!(l.string_states) readchar(l) @@ -456,7 +460,8 @@ function lex_string_chunk(l) return emit(l, state.delim == '"' ? K"\"\"\"" : K"```") else - return emit(l, state.delim == '"' ? K"\"" : K"`") + return emit(l, state.delim == '"' ? K"\"" : + state.delim == '`' ? K"`" : K"'", false) end end # Read a chunk of string characters @@ -516,7 +521,8 @@ function lex_string_chunk(l) end end end - return emit(l, state.delim == '"' ? K"String" : K"CmdString") + return emit(l, state.delim == '"' ? K"String" : + state.delim == '`' ? K"CmdString" : K"Char") end # Lex whitespace, a whitespace char `c` has been consumed @@ -859,41 +865,23 @@ function lex_digit(l::Lexer, kind) return emit(l, kind) end -function lex_prime(l, doemit = true) - if l.last_token == K"Identifier" || - is_contextual_keyword(l.last_token) || - is_word_operator(l.last_token) || - l.last_token == K"." || - l.last_token == K")" || - l.last_token == K"]" || - l.last_token == K"}" || - l.last_token == K"'" || - l.last_token == K"end" || is_literal(l.last_token) +function lex_prime(l) + if l.last_token == K"Identifier" || + is_contextual_keyword(l.last_token) || + is_word_operator(l.last_token) || + l.last_token == K"." || + l.last_token == K")" || + l.last_token == K"]" || + l.last_token == K"}" || + l.last_token == K"'" || + l.last_token == K"end" || + is_literal(l.last_token) + # FIXME ^ This doesn't cover all cases - probably needs involvement + # from the parser state. return emit(l, K"'") else - if accept(l, '\'') - if accept(l, '\'') - return doemit ? emit(l, K"Char") : EMPTY_TOKEN - else - # Empty char literal - # Arguably this should be an error here, but we generally - # look at the contents of the char literal in the parser, - # so we defer erroring until there. - return doemit ? emit(l, K"Char") : EMPTY_TOKEN - end - end - while true - c = readchar(l) - if c == EOF_CHAR - return doemit ? emit_error(l, K"ErrorEofChar") : EMPTY_TOKEN - elseif c == '\\' - if readchar(l) == EOF_CHAR - return doemit ? emit_error(l, K"ErrorEofChar") : EMPTY_TOKEN - end - elseif c == '\'' - return doemit ? emit(l, K"Char") : EMPTY_TOKEN - end - end + push!(l.string_states, StringState(false, true, '\'', 0)) + return emit(l, K"'", false) end end diff --git a/JuliaSyntax/test/expr.jl b/JuliaSyntax/test/expr.jl index 850853f5cbc10..9202c22b55e1c 100644 --- a/JuliaSyntax/test/expr.jl +++ b/JuliaSyntax/test/expr.jl @@ -157,6 +157,14 @@ Expr(:string, "a\n", :x, "\nb\nc") end + @testset "Char conversions" begin + @test parse(Expr, "'a'") == 'a' + @test parse(Expr, "'α'") == 'α' + @test parse(Expr, "'\\xce\\xb1'") == 'α' + # FIXME + # @test_throws ParseError parse(Expr, "'abcde'") + end + @testset "do block conversion" begin @test parse(Expr, "f(x) do y\n body end") == Expr(:do, Expr(:call, :f, :x), diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 543cad0b03f3e..f523b6d3ed44c 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -567,6 +567,14 @@ tests = [ "(1:2)" => "(call-i 1 : 2)" ], JuliaSyntax.parse_atom => [ + # char literal + "'a'" => "(char 'a')" + "'α'" => "(char 'α')" + "'\\xce\\xb1'" => "(char 'α')" + "'a" => "(char 'a' (error-t))" + "''" => "(char (error))" + "'" => "(char (error))" + # symbol/expression quote ":foo" => "(quote foo)" # Literal colons ":)" => ":" diff --git a/JuliaSyntax/test/tokenize.jl b/JuliaSyntax/test/tokenize.jl index 43e87389ee699..c6aaccb7bcd4d 100644 --- a/JuliaSyntax/test/tokenize.jl +++ b/JuliaSyntax/test/tokenize.jl @@ -109,7 +109,7 @@ end # testset K"NewlineWs",K"[",K"Integer",K"*",K"Integer",K",",K"Integer", K";",K"Integer",K",",K"Integer",K"]", - K"NewlineWs",K"\"",K"String",K"\"",K";",K"Whitespace",K"Char", + K"NewlineWs",K"\"",K"String",K"\"",K";",K"Whitespace",K"'",K"Char",K"'", K"NewlineWs",K"(",K"Identifier",K"&&",K"Identifier",K")",K"||", K"(",K"Identifier",K"||",K"Identifier",K")", @@ -130,7 +130,7 @@ end # testset K"NewlineWs",K"{",K"}", - K"NewlineWs",K"ErrorEofChar",K"EndMarker"] + K"NewlineWs",K"'",K"Char",K"EndMarker"] for (i, n) in enumerate(tokenize(str)) @test kind(n) == kinds[i] @@ -190,6 +190,8 @@ function test_roundtrip(str, kind, val) @test untokenize(t, str) == val end +roundtrip(str) = join(untokenize.(collect(tokenize(str)), str)) + @testset "tokenizing juxtaposed numbers and dotted operators/identifiers" begin test_roundtrip("1234 .+1", K"Integer", "1234") test_roundtrip("1234.0+1", K"Float", "1234.0") @@ -228,15 +230,24 @@ end D = ImageMagick.load(fn) """ tokens = collect(tokenize(str)) - @test string(untokenize(tokens[16], str))==string(untokenize(tokens[17], str))=="'" + @test string(untokenize(tokens[16], str)) == string(untokenize(tokens[17], str))=="'" + + @test roundtrip("'a'") == "'a'" + @test kind.(collect(tokenize("'a'"))) == [K"'", K"Char", K"'", K"EndMarker"] + + # ' is not an operator here, so doesn't consume the suffix ᵀ + @test roundtrip("'ᵀ'") == "'ᵀ'" + @test kind.(collect(tokenize("'₁'"))) == [K"'", K"Char", K"'", K"EndMarker"] + + @test roundtrip("''") == "''" + @test kind.(collect(tokenize("''"))) == [K"'", K"'", K"EndMarker"] + + @test roundtrip("'''") == "'''" + @test kind.(collect(tokenize("'''"))) == [K"'", K"Char", K"'", K"EndMarker"] - test_roundtrip("'a'", K"Char", "'a'") - test_roundtrip("''", K"Char", "''") - test_roundtrip("'''", K"Char", "'''") - test_roundtrip("''''", K"Char", "'''") + @test roundtrip("''''") == "''''" + @test kind.(collect(tokenize("''''"))) == [K"'", K"Char", K"'", K"'", K"EndMarker"] - @test tok("''''", 1).kind == K"Char" - @test tok("''''", 2).kind == K"'" @test tok("()'", 3).kind == K"'" @test tok("{}'", 3).kind == K"'" @test tok("[]'", 3).kind == K"'" @@ -244,6 +255,7 @@ end @test tok("mutable'", 2).kind == K"'" @test tok("as'", 2).kind == K"'" @test tok("isa'", 2).kind == K"'" + @test untokenize.(collect(tokenize("a'ᵀ")), "a'ᵀ") == ["a", "'ᵀ", ""] end @testset "keywords" begin @@ -293,9 +305,8 @@ end end @testset "errors" begin - @test tok("#= #= =#", 1).kind == K"ErrorEofMultiComment" - @test tok("'dsadsa", 1).kind == K"ErrorEofChar" - @test tok("aa **", 3).kind == K"ErrorInvalidOperator" + @test tok("#= #= =#", 1).kind == K"ErrorEofMultiComment" + @test tok("aa **", 3).kind == K"ErrorInvalidOperator" end @testset "xor_eq" begin From e42c8cf87aa463b077dc76e2518d71fdf3467c36 Mon Sep 17 00:00:00 2001 From: c42f Date: Wed, 12 Oct 2022 11:28:42 +1000 Subject: [PATCH 0517/1109] Fix incomplete detection for tree with no parents (JuliaLang/JuliaSyntax.jl#122) --- JuliaSyntax/src/hooks.jl | 3 +++ JuliaSyntax/test/hooks.jl | 17 ++++++++++------- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/JuliaSyntax/src/hooks.jl b/JuliaSyntax/src/hooks.jl index 6f0ecfe85b8c5..ca4c9974020ef 100644 --- a/JuliaSyntax/src/hooks.jl +++ b/JuliaSyntax/src/hooks.jl @@ -49,6 +49,9 @@ function _incomplete_tag(n::SyntaxNode) end end end + if isnothing(c.parent) + return :other + end kp = kind(c.parent) if kp == K"string" return :string diff --git a/JuliaSyntax/test/hooks.jl b/JuliaSyntax/test/hooks.jl index 2604e9f2fa2f1..953df0aeea326 100644 --- a/JuliaSyntax/test/hooks.jl +++ b/JuliaSyntax/test/hooks.jl @@ -1,17 +1,17 @@ @testset "Hooks for Core integration" begin @testset "whitespace parsing" begin - @test JuliaSyntax.core_parser_hook("", "somefile", 0, :statement) == Core.svec(nothing, 0) - @test JuliaSyntax.core_parser_hook("", "somefile", 0, :statement) == Core.svec(nothing, 0) + @test JuliaSyntax._core_parser_hook("", "somefile", 1, 0, :statement) == Core.svec(nothing, 0) + @test JuliaSyntax._core_parser_hook("", "somefile", 1, 0, :statement) == Core.svec(nothing, 0) - @test JuliaSyntax.core_parser_hook(" ", "somefile", 2, :statement) == Core.svec(nothing,2) - @test JuliaSyntax.core_parser_hook(" #==# ", "somefile", 6, :statement) == Core.svec(nothing,6) + @test JuliaSyntax._core_parser_hook(" ", "somefile", 1, 2, :statement) == Core.svec(nothing,2) + @test JuliaSyntax._core_parser_hook(" #==# ", "somefile", 1, 6, :statement) == Core.svec(nothing,6) - @test JuliaSyntax.core_parser_hook(" x \n", "somefile", 0, :statement) == Core.svec(:x,4) - @test JuliaSyntax.core_parser_hook(" x \n", "somefile", 0, :atom) == Core.svec(:x,2) + @test JuliaSyntax._core_parser_hook(" x \n", "somefile", 1, 0, :statement) == Core.svec(:x,4) + @test JuliaSyntax._core_parser_hook(" x \n", "somefile", 1, 0, :atom) == Core.svec(:x,2) end @testset "filename is used" begin - ex = JuliaSyntax.core_parser_hook("@a", "somefile", 0, :statement)[1] + ex = JuliaSyntax._core_parser_hook("@a", "somefile", 1, 0, :statement)[1] @test Meta.isexpr(ex, :macrocall) @test ex.args[2] == LineNumberNode(1, "somefile") end @@ -95,5 +95,8 @@ end end JuliaSyntax.enable_in_core!(false) + + # Should not throw + @test JuliaSyntax._core_parser_hook("+=", "somefile", 1, 0, :statement)[1] isa Expr end end From 45c43fdbadb8e87dcf79916108a0e60c19d2ae7d Mon Sep 17 00:00:00 2001 From: c42f Date: Fri, 14 Oct 2022 15:45:05 +1000 Subject: [PATCH 0518/1109] Remove TRY_CATCH_FINALLY_FLAG (JuliaLang/JuliaSyntax.jl#123) A flag is a waste for this; we can just use a different kind for this horrible edge case. --- JuliaSyntax/src/expr.jl | 7 ++++--- JuliaSyntax/src/kinds.jl | 2 ++ JuliaSyntax/src/parse_stream.jl | 3 --- JuliaSyntax/src/parser.jl | 7 ++++--- JuliaSyntax/test/parser.jl | 22 ++++++++++++++++------ 5 files changed, 26 insertions(+), 15 deletions(-) diff --git a/JuliaSyntax/src/expr.jl b/JuliaSyntax/src/expr.jl index 2766c736566f1..748f351601e85 100644 --- a/JuliaSyntax/src/expr.jl +++ b/JuliaSyntax/src/expr.jl @@ -150,19 +150,19 @@ function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, pushfirst!(args, args[end]) pop!(args) end - elseif headsym == :try + elseif headsym in (:try, :try_finally_catch) # Try children in source order: # try_block catch_var catch_block else_block finally_block # Expr ordering: # try_block catch_var catch_block [finally_block] [else_block] catch_ = nothing - if has_flags(node, TRY_CATCH_AFTER_FINALLY_FLAG) + if headsym === :try_finally_catch catch_ = pop!(args) catch_var = pop!(args) end finally_ = pop!(args) else_ = pop!(args) - if has_flags(node, TRY_CATCH_AFTER_FINALLY_FLAG) + if headsym === :try_finally_catch pop!(args) pop!(args) push!(args, catch_var) @@ -176,6 +176,7 @@ function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, if else_ !== false push!(args, else_) end + headsym = :try elseif headsym == :filter pushfirst!(args, last(args)) pop!(args) diff --git a/JuliaSyntax/src/kinds.jl b/JuliaSyntax/src/kinds.jl index 86f2a931060d2..26ab76a90400c 100644 --- a/JuliaSyntax/src/kinds.jl +++ b/JuliaSyntax/src/kinds.jl @@ -897,6 +897,8 @@ const _kind_names = "flatten" "comprehension" "typed_comprehension" + # Special kind for compatibility with the ever-ugly try-finally-catch ordering + "try_finally_catch" "END_SYNTAX_KINDS" ] diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index c62e64b27eec7..00790f1d9def9 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -15,8 +15,6 @@ const DOTOP_FLAG = RawFlags(1<<2) const TRIPLE_STRING_FLAG = RawFlags(1<<3) # Set when a string or identifier needs "raw string" unescaping const RAW_STRING_FLAG = RawFlags(1<<4) -# try-finally-catch -const TRY_CATCH_AFTER_FINALLY_FLAG = RawFlags(1<<5) # Record whether operator has a suffix const SUFFIXED_FLAG = RawFlags(1<<6) @@ -75,7 +73,6 @@ function untokenize(head::SyntaxHead; unique=true, include_flag_suff=true) is_infix(head) && (str = str*"i") has_flags(head, TRIPLE_STRING_FLAG) && (str = str*"s") has_flags(head, RAW_STRING_FLAG) && (str = str*"r") - has_flags(head, TRY_CATCH_AFTER_FINALLY_FLAG) && (str = str*"f") is_suffixed(head) && (str = str*"S") n = numeric_flags(head) n != 0 && (str = str*string(n)) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index f5e202a6804b7..9f968d03a4403 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -2085,6 +2085,7 @@ end # # flisp: embedded in parse_resword function parse_try(ps) + out_kind = K"try" mark = position(ps) bump(ps, TRIVIA_FLAG) parse_block(ps) @@ -2132,15 +2133,15 @@ function parse_try(ps) # in which these blocks execute. bump_trivia(ps) if !has_catch && peek(ps) == K"catch" - # try x finally y catch e z end ==> (try-f (block x) false false false (block y) e (block z)) - flags |= TRY_CATCH_AFTER_FINALLY_FLAG + # try x finally y catch e z end ==> (try_finally_catch (block x) false false false (block y) e (block z)) + out_kind = K"try_finally_catch" m = position(ps) parse_catch(ps) emit_diagnostic(ps, m, position(ps), warning="`catch` after `finally` will execute out of order") end bump_closing_token(ps, K"end") - emit(ps, mark, K"try", flags) + emit(ps, mark, out_kind, flags) end function parse_catch(ps::ParseState) diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index f523b6d3ed44c..082198fa4740e 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -1,13 +1,17 @@ -function test_parse(production, code; v=v"1.6") +function test_parse(production, code; v=v"1.6", expr=false) stream = ParseStream(code, version=v) production(ParseState(stream)) t = build_tree(GreenNode, stream, wrap_toplevel_as_kind=K"None") source = SourceFile(code) s = SyntaxNode(source, t) - if kind(s) == K"None" - join([sprint(show, MIME("text/x.sexpression"), c) for c in children(s)], ' ') + if expr + JuliaSyntax.remove_linenums!(Expr(s)) else - sprint(show, MIME("text/x.sexpression"), s) + if kind(s) == K"None" + join([sprint(show, MIME("text/x.sexpression"), c) for c in children(s)], ' ') + else + sprint(show, MIME("text/x.sexpression"), s) + end end end @@ -482,7 +486,8 @@ tests = [ ((v=v"1.8",), "try else end") => "(try (block) false false (error (block)) false)" ((v=v"1.7",), "try catch ; else end") => "(try (block) false (block) (error (block)) false)" # finally before catch :-( - "try x finally y catch e z end" => "(try-f (block x) false false false (block y) e (block z))" + "try x finally y catch e z end" => "(try_finally_catch (block x) false false false (block y) e (block z))" => + Expr(:try, Expr(:block, :x), :e, Expr(:block, :z), Expr(:block, :y)) ], JuliaSyntax.parse_imports => [ "import A as B: x" => "(import (: (error (as (. A) B)) (. x)))" @@ -816,7 +821,12 @@ broken_tests = [ else opts = NamedTuple() end - @test test_parse(production, input; opts...) == output + if output isa Pair + @test test_parse(production, input; opts...) == output[1] + @test test_parse(production, input; opts..., expr=true) == output[2] + else + @test test_parse(production, input; opts...) == output + end end end @testset "Broken $production" for (production, test_specs) in broken_tests From 9340cfb59017ee3557bc2e69113aab1f70c6bbfd Mon Sep 17 00:00:00 2001 From: c42f Date: Sat, 15 Oct 2022 06:03:54 +1000 Subject: [PATCH 0519/1109] Record fixity of call type in flags (JuliaLang/JuliaSyntax.jl#124) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We now record which precise call syntax was used out of the four options: * Prefix calls with parens * Prefix operator calls * Infix operator calls * Postfix operator calls This allows us to distinguish keyword arguments from assignment, fixing several bugs with = to kw conversion. Also, change to emit unadorned postfix adjoint as `(call-post x ')` rather than as a syntactic operator `(' x)`, for consistency with suffixed versions like `x'ᵀ`. --- JuliaSyntax/src/expr.jl | 54 ++++++++++++------------ JuliaSyntax/src/parse_stream.jl | 40 +++++++++++++----- JuliaSyntax/src/parser.jl | 74 ++++++++++++++++----------------- JuliaSyntax/src/tokenize.jl | 1 - JuliaSyntax/test/parser.jl | 58 +++++++++++++++----------- 5 files changed, 125 insertions(+), 102 deletions(-) diff --git a/JuliaSyntax/src/expr.jl b/JuliaSyntax/src/expr.jl index 748f351601e85..4d536ef2d73ba 100644 --- a/JuliaSyntax/src/expr.jl +++ b/JuliaSyntax/src/expr.jl @@ -99,46 +99,46 @@ function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, args[1] = _to_expr(node_args[1], need_linenodes=false) args[2] = _to_expr(node_args[2]) else - eq_to_kw = headsym == :call && !has_flags(node, INFIX_FLAG) || - headsym == :ref || - (headsym == :parameters && !inside_vect_or_braces) || - (headsym == :tuple && inside_dot_expr) + eq_to_kw_in_call = + headsym == :call && is_prefix_call(node) || + headsym == :ref + eq_to_kw_all = headsym == :parameters && !inside_vect_or_braces || + (headsym == :tuple && inside_dot_expr) in_dot = headsym == :. in_vb = headsym == :vect || headsym == :braces - if insert_linenums - if isempty(node_args) - push!(args, source_location(LineNumberNode, node.source, node.position)) - else - for i in 1:length(node_args) - n = node_args[i] - args[2*i-1] = source_location(LineNumberNode, n.source, n.position) - args[2*i] = _to_expr(n, - eq_to_kw=eq_to_kw, - inside_dot_expr=in_dot, - inside_vect_or_braces=in_vb) - end - end + if insert_linenums && isempty(node_args) + push!(args, source_location(LineNumberNode, node.source, node.position)) else for i in 1:length(node_args) - args[i] = _to_expr(node_args[i], - eq_to_kw=eq_to_kw, - inside_dot_expr=in_dot, - inside_vect_or_braces=in_vb) + n = node_args[i] + if insert_linenums + args[2*i-1] = source_location(LineNumberNode, n.source, n.position) + end + eq_to_kw = eq_to_kw_in_call && i > 1 || eq_to_kw_all + args[insert_linenums ? 2*i : i] = + _to_expr(n, eq_to_kw=eq_to_kw, + inside_dot_expr=in_dot, + inside_vect_or_braces=in_vb) end end end - # Julia's standard `Expr` ASTs have children stored in a canonical - # order which is often not always source order. We permute the children - # here as necessary to get the canonical order. - if is_infix(node.raw) - args[2], args[1] = args[1], args[2] - end # Special cases for various expression heads loc = source_location(LineNumberNode, node.source, node.position) if headsym == :macrocall insert!(args, 2, loc) elseif headsym in (:call, :ref) + # Julia's standard `Expr` ASTs have children stored in a canonical + # order which is often not always source order. We permute the children + # here as necessary to get the canonical order. + if is_infix_op_call(node) || is_postfix_op_call(node) + args[2], args[1] = args[1], args[2] + end + # Lower (call x ') to special ' head + if is_postfix_op_call(node) && args[1] == Symbol("'") + popfirst!(args) + headsym = Symbol("'") + end # Move parameters block to args[2] if length(args) > 1 && Meta.isexpr(args[end], :parameters) insert!(args, 2, args[end]) diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index 00790f1d9def9..ec50ef766690d 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -5,18 +5,25 @@ # TODO: Use `primitive type SyntaxFlags 16 end` rather than an alias? const RawFlags = UInt16 const EMPTY_FLAGS = RawFlags(0) +# Applied to tokens which are syntax trivia after parsing const TRIVIA_FLAG = RawFlags(1<<0) -# Some of the following flags are head-specific and could probably be allowed -# to cover the same bits... -const INFIX_FLAG = RawFlags(1<<1) -# Record whether syntactic operators were dotted -const DOTOP_FLAG = RawFlags(1<<2) + +# Record whether operators are dotted +const DOTOP_FLAG = RawFlags(1<<1) +# Record whether operator has a suffix +const SUFFIXED_FLAG = RawFlags(1<<2) + +# Distinguish various syntaxes which are mapped to K"call" +const PREFIX_CALL_FLAG = RawFlags(0<<3) +const INFIX_FLAG = RawFlags(1<<3) +const PREFIX_OP_FLAG = RawFlags(2<<3) +const POSTFIX_OP_FLAG = RawFlags(3<<3) + +# The next two bits could overlap with the previous two if necessary # Set when kind == K"String" was triple-delimited as with """ or ``` -const TRIPLE_STRING_FLAG = RawFlags(1<<3) +const TRIPLE_STRING_FLAG = RawFlags(1<<5) # Set when a string or identifier needs "raw string" unescaping -const RAW_STRING_FLAG = RawFlags(1<<4) -# Record whether operator has a suffix -const SUFFIXED_FLAG = RawFlags(1<<6) +const RAW_STRING_FLAG = RawFlags(1<<6) # Token-only flag # Record whether a token had preceding whitespace @@ -34,6 +41,10 @@ function set_numeric_flags(n::Integer) f end +function call_type_flags(f::RawFlags) + f & 0b11000 +end + function numeric_flags(f::RawFlags) Int((f >> 8) % UInt8) end @@ -70,7 +81,9 @@ function untokenize(head::SyntaxHead; unique=true, include_flag_suff=true) if include_flag_suff && suffix_flags != EMPTY_FLAGS str = str*"-" is_trivia(head) && (str = str*"t") - is_infix(head) && (str = str*"i") + is_infix_op_call(head) && (str = str*"i") + is_prefix_op_call(head) && (str = str*"pre") + is_postfix_op_call(head) && (str = str*"post") has_flags(head, TRIPLE_STRING_FLAG) && (str = str*"s") has_flags(head, RAW_STRING_FLAG) && (str = str*"r") is_suffixed(head) && (str = str*"S") @@ -90,8 +103,13 @@ flags(x) = flags(head(x)) # Predicates based on flags() has_flags(x, test_flags) = has_flags(flags(x), test_flags) +call_type_flags(x) = call_type_flags(flags(x)) + is_trivia(x) = has_flags(x, TRIVIA_FLAG) -is_infix(x) = has_flags(x, INFIX_FLAG) +is_prefix_call(x) = call_type_flags(x) == PREFIX_CALL_FLAG +is_infix_op_call(x) = call_type_flags(x) == INFIX_FLAG +is_prefix_op_call(x) = call_type_flags(x) == PREFIX_OP_FLAG +is_postfix_op_call(x) = call_type_flags(x) == POSTFIX_OP_FLAG is_dotted(x) = has_flags(x, DOTOP_FLAG) is_suffixed(x) = has_flags(x, SUFFIXED_FLAG) is_decorated(x) = is_dotted(x) || is_suffixed(x) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 9f968d03a4403..fa7e8f6eee087 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -544,7 +544,7 @@ function parse_assignment_with_initial_ex(ps::ParseState, mark, down::T) where { if k == K"~" if ps.space_sensitive && !preceding_whitespace(peek_token(ps, 2)) # Unary ~ in space sensitive context is not assignment precedence - # [a ~b] ==> (hcat a (call ~ b)) + # [a ~b] ==> (hcat a (call-pre ~ b)) return end # ~ is the only non-syntactic assignment-precedence operator. @@ -885,8 +885,8 @@ function parse_with_chains(ps::ParseState, down, is_op, chain_ops) is_both_unary_and_binary(t) && !preceding_whitespace(peek_token(ps, 2)) # The following is two elements of a hcat - # [x +y] ==> (hcat x (call + y)) - # [x+y +z] ==> (hcat (call-i x + y) (call + z)) + # [x +y] ==> (hcat x (call-pre + y)) + # [x+y +z] ==> (hcat (call-i x + y) (call-pre + z)) # Conversely the following are infix calls # [x +₁y] ==> (vect (call-i x +₁ y)) # [x+y+z] ==> (vect (call-i x + y z)) @@ -914,7 +914,7 @@ function parse_chain(ps::ParseState, down, op_kind) if ps.space_sensitive && preceding_whitespace(t) && is_both_unary_and_binary(t) && !preceding_whitespace(peek_token(ps, 2)) - # [x +y] ==> (hcat x (call + y)) + # [x +y] ==> (hcat x (call-pre + y)) break end bump(ps, TRIVIA_FLAG) @@ -948,16 +948,16 @@ function parse_unary_subtype(ps::ParseState) elseif k2 in KSet"{ (" # parse <:{T}(x::T) or <:(x::T) like other unary operators # <:{T}(x::T) ==> (call (curly <: T) (:: x T)) - # <:(x::T) ==> (<: (:: x T)) + # <:(x::T) ==> (<:-pre (:: x T)) parse_where(ps, parse_juxtapose) else - # <: A where B ==> (<: (where A B)) + # <: A where B ==> (<:-pre (where A B)) mark = position(ps) bump(ps, TRIVIA_FLAG) parse_where(ps, parse_juxtapose) # Flisp parser handled this, but I don't know how it can happen... @check peek_behind(ps).kind != K"tuple" - emit(ps, mark, k) + emit(ps, mark, k, PREFIX_OP_FLAG) end else parse_where(ps, parse_juxtapose) @@ -1015,7 +1015,7 @@ function is_juxtapose(ps, prev_k, t) # Not juxtaposition - parse_juxtapose will consume only the first token. # x.3 ==> x # sqrt(2)2 ==> (call sqrt 2) - # x' y ==> x + # x' y ==> (call-post x ') # x 'y ==> x return !preceding_whitespace(t) && @@ -1039,7 +1039,7 @@ end # 2(x) ==> (call-i 2 * x) # (2)(3)x ==> (call-i 2 * 3 x) # (x-1)y ==> (call-i (call-i x - 1) * y) -# x'y ==> x +# x'y ==> (call-i (call-post x ') * y) # # flisp: parse-juxtapose function parse_juxtapose(ps::ParseState) @@ -1098,11 +1098,11 @@ function parse_unary(ps::ParseState) if is_prec_power(k3) || k3 in KSet"[ {" # `[`, `{` (issue #18851) and `^` have higher precedence than # unary negation - # -2^x ==> (call - (call-i 2 ^ x)) - # -2[1, 3] ==> (call - (ref 2 1 3)) + # -2^x ==> (call-pre - (call-i 2 ^ x)) + # -2[1, 3] ==> (call-pre - (ref 2 1 3)) bump(ps) parse_factor(ps) - emit(ps, mark, K"call") + emit(ps, mark, K"call", PREFIX_OP_FLAG) else # We have a signed numeric literal. Glue the operator to the # next token to create a signed literal: @@ -1115,17 +1115,17 @@ function parse_unary(ps::ParseState) end end # Things which are not quite negative literals result in a unary call instead - # -0x1 ==> (call - 0x01) - # - 2 ==> (call - 2) - # .-2 ==> (call .- 2) + # -0x1 ==> (call-pre - 0x01) + # - 2 ==> (call-pre - 2) + # .-2 ==> (call-pre .- 2) parse_unary_call(ps) end # Parse calls to unary operators and prefix calls involving arbitrary operators # with bracketed arglists (as opposed to infix notation) # -# +a ==> (call + a) -# +(a,b) ==> (call + a b) +# +a ==> (call-pre + a) +# +(a,b) ==> (call-pre + a b) # # flisp: parse-unary-call function parse_unary_call(ps::ParseState) @@ -1208,33 +1208,33 @@ function parse_unary_call(ps::ParseState) else # Unary function calls with brackets as grouping, not an arglist if opts.is_block - # +(a;b) ==> (call + (block a b)) + # +(a;b) ==> (call-pre + (block a b)) emit(ps, mark_before_paren, K"block") end # Not a prefix operator call but a block; `=` is not `kw` - # +(a=1) ==> (call + (= a 1)) + # +(a=1) ==> (call-pre + (= a 1)) # Unary operators have lower precedence than ^ - # +(a)^2 ==> (call + (call-i a ^ 2)) - # +(a)(x,y)^2 ==> (call + (call-i (call a x y) ^ 2)) + # +(a)^2 ==> (call-pre + (call-i a ^ 2)) + # +(a)(x,y)^2 ==> (call-pre + (call-i (call a x y) ^ 2)) parse_call_chain(ps, mark_before_paren) parse_factor_with_initial_ex(ps, mark_before_paren) - emit(ps, mark, op_node_kind) + emit(ps, mark, op_node_kind, PREFIX_OP_FLAG) end else if is_unary_op(op_t) # Normal unary calls - # +x ==> (call + x) - # √x ==> (call √ x) - # ±x ==> (call ± x) + # +x ==> (call-pre + x) + # √x ==> (call-pre √ x) + # ±x ==> (call-pre ± x) bump(ps, op_tok_flags) else - # /x ==> (call (error /) x) - # +₁ x ==> (call (error +₁) x) - # .<: x ==> (call (error .<:) x) + # /x ==> (call-pre (error /) x) + # +₁ x ==> (call-pre (error +₁) x) + # .<: x ==> (call-pre (error .<:) x) bump(ps, error="not a unary operator") end parse_unary(ps) - emit(ps, mark, op_node_kind) + emit(ps, mark, op_node_kind, PREFIX_OP_FLAG) end end @@ -1433,6 +1433,8 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) finish_macroname(ps, mark, valid_macroname, macro_name_position) end # f(a,b) ==> (call f a b) + # f(a; b=1) ==> (call f a (parameters (b 1))) + # (a=1)() ==> (call (= a 1)) # f (a) ==> (call f (error-t) a b) bump_disallowed_space(ps) bump(ps, TRIVIA_FLAG) @@ -1457,6 +1459,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) K"]", ps.end_symbol) # a[i] ==> (ref a i) # a[i,j] ==> (ref a i j) + # (a=1)[] ==> (ref (= a 1)) # T[x y] ==> (typed_hcat T x y) # T[x ; y] ==> (typed_vcat T x y) # T[a b; c d] ==> (typed_vcat T (row a b) (row c d)) @@ -1562,15 +1565,10 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) this_iter_valid_macroname = true end elseif k == K"'" && !preceding_whitespace(t) - if !is_suffixed(t) - # f' ==> (' f) - bump(ps, TRIVIA_FLAG) - emit(ps, mark, k) - else - # f'ᵀ ==> (call 'ᵀ f) - bump(ps) - emit(ps, mark, K"call", INFIX_FLAG) - end + # f' ==> (call-post f ') + # f'ᵀ ==> (call-post f 'ᵀ) + bump(ps) + emit(ps, mark, K"call", POSTFIX_OP_FLAG) elseif k == K"{" # Type parameter curlies and macro calls if is_macrocall diff --git a/JuliaSyntax/src/tokenize.jl b/JuliaSyntax/src/tokenize.jl index 9bbcb7d7db25a..ba3c782b05bce 100644 --- a/JuliaSyntax/src/tokenize.jl +++ b/JuliaSyntax/src/tokenize.jl @@ -945,7 +945,6 @@ function lex_backslash(l::Lexer) return emit(l, K"\\") end -# TODO .op function lex_dot(l::Lexer) if accept(l, '.') if accept(l, '.') diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 082198fa4740e..1a050dadeaccf 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -48,7 +48,7 @@ tests = [ "a .+= b" => "(.+= a b)" "a, b = c, d" => "(= (tuple a b) (tuple c d))" "x, = xs" => "(= (tuple x) xs)" - "[a ~b]" => "(hcat a (call ~ b))" + "[a ~b]" => "(hcat a (call-pre ~ b))" "a ~ b" => "(call-i a ~ b)" "[a ~ b c]" => "(hcat (call-i a ~ b) c)" ], @@ -122,8 +122,8 @@ tests = [ "a + b .+ c" => "(call-i (call-i a + b) .+ c)" # parse_with_chains: # The following is two elements of a hcat - "[x +y]" => "(hcat x (call + y))" - "[x+y +z]" => "(hcat (call-i x + y) (call + z))" + "[x +y]" => "(hcat x (call-pre + y))" + "[x+y +z]" => "(hcat (call-i x + y) (call-pre + z))" # Conversely the following are infix calls "[x +₁y]" => "(vect (call-i x +₁ y))" "[x+y+z]" => "(vect (call-i x + y z))" @@ -142,14 +142,14 @@ tests = [ "2(x)" => "(call-i 2 * x)" "(2)(3)x" => "(call-i 2 * 3 x)" "(x-1)y" => "(call-i (call-i x - 1) * y)" - "x'y" => "(call-i (' x) * y)" + "x'y" => "(call-i (call-post x ') * y)" # errors "\"a\"\"b\"" => "(call-i (string \"a\") * (error-t) (string \"b\"))" "\"a\"x" => "(call-i (string \"a\") * (error-t) x)" # Not juxtaposition - parse_juxtapose will consume only the first token. "x.3" => "x" "sqrt(2)2" => "(call sqrt 2)" - "x' y" => "(' x)" + "x' y" => "(call-post x ')" "x 'y" => "x" "0xenomorph" => "0x0e" ], @@ -157,13 +157,13 @@ tests = [ ":T" => "(quote T)" "in::T" => "(:: in T)" "isa::T" => "(:: isa T)" - "-2^x" => "(call - (call-i 2 ^ x))" - "-2[1, 3]" => "(call - (ref 2 1 3))" + "-2^x" => "(call-pre - (call-i 2 ^ x))" + "-2[1, 3]" => "(call-pre - (ref 2 1 3))" "-2" => "-2" "+2.0" => "2.0" - "-0x1" => "(call - 0x01)" - "- 2" => "(call - 2)" - ".-2" => "(call .- 2)" + "-0x1" => "(call-pre - 0x01)" + "- 2" => "(call-pre - 2)" + ".-2" => "(call-pre .- 2)" ], JuliaSyntax.parse_unary_call => [ # Standalone dotted operators are parsed as (|.| op) @@ -179,7 +179,7 @@ tests = [ "*(x)" => "(call * x)" # Prefix function calls for operators which are both binary and unary "+(a,b)" => "(call + a b)" - "+(a=1,)" => "(call + (= a 1))" + "+(a=1,)" => "(call + (= a 1))" => Expr(:call, :+, Expr(:kw, :a, 1)) "+(a...)" => "(call + (... a))" "+(a;b,c)" => "(call + a (parameters b c))" "+(;a)" => "(call + (parameters a))" @@ -189,19 +189,19 @@ tests = [ "+(a,b)^2" => "(call-i (call + a b) ^ 2)" "+(a,b)(x)^2" => "(call-i (call (call + a b) x) ^ 2)" # Unary function calls with brackets as grouping, not an arglist - "+(a;b)" => "(call + (block a b))" - "+(a=1)" => "(call + (= a 1))" + "+(a;b)" => "(call-pre + (block a b))" + "+(a=1)" => "(call-pre + (= a 1))" => Expr(:call, :+, Expr(:(=), :a, 1)) # Unary operators have lower precedence than ^ - "+(a)^2" => "(call + (call-i a ^ 2))" - "+(a)(x,y)^2" => "(call + (call-i (call a x y) ^ 2))" + "+(a)^2" => "(call-pre + (call-i a ^ 2))" + "+(a)(x,y)^2" => "(call-pre + (call-i (call a x y) ^ 2))" # Normal unary calls (see parse_unary) - "+x" => "(call + x)" - "√x" => "(call √ x)" - "±x" => "(call ± x)" + "+x" => "(call-pre + x)" + "√x" => "(call-pre √ x)" + "±x" => "(call-pre ± x)" # Not a unary operator - "/x" => "(call (error /) x)" - "+₁ x" => "(call (error +₁) x)" - ".<: x" => "(call (error .<:) x)" + "/x" => "(call-pre (error /) x)" + "+₁ x" => "(call-pre (error +₁) x)" + ".<: x" => "(call-pre (error .<:) x)" ], JuliaSyntax.parse_factor => [ "x^y" => "(call-i x ^ y)" @@ -218,8 +218,8 @@ tests = [ "<: \n" => "<:" "<: =" => "<:" "<:{T}(x::T)" => "(call (curly <: T) (:: x T))" - "<:(x::T)" => "(<: (:: x T))" - "<: A where B" => "(<: (where A B))" + "<:(x::T)" => "(<:-pre (:: x T))" + "<: A where B" => "(<:-pre (where A B))" # Really for parse_where "x where \n {T}" => "(where x T)" "x where {T,S}" => "(where x T S)" @@ -242,6 +242,9 @@ tests = [ "f(x)" => "(call f x)" "\$f(x)" => "(call (\$ f) x)" "f(a,b)" => "(call f a b)" + "f(a=1; b=2)" => "(call f (= a 1) (parameters (= b 2)))" => + Expr(:call, :f, Expr(:parameters, Expr(:kw, :b, 2)), Expr(:kw, :a, 1)) + "(a=1)()" => "(call (= a 1))" => Expr(:call, Expr(:(=), :a, 1)) "f (a)" => "(call f (error-t) a)" "f(a).g(b)" => "(call (. (call f a) (quote g)) b)" "\$A.@x" => "(macrocall (. (\$ A) (quote @x)))" @@ -284,12 +287,16 @@ tests = [ "a[i]" => "(ref a i)" "a [i]" => "(ref a (error-t) i)" "a[i,j]" => "(ref a i j)" + "(a=1)[]" => "(ref (= a 1))" => Expr(:ref, Expr(:(=), :a, 1)) "T[x y]" => "(typed_hcat T x y)" "T[x ; y]" => "(typed_vcat T x y)" "T[a b; c d]" => "(typed_vcat T (row a b) (row c d))" "T[x for x in xs]" => "(typed_comprehension T (generator x (= x xs)))" ((v=v"1.8",), "T[a ; b ;; c ; d]") => "(typed_ncat-2 T (nrow-1 a b) (nrow-1 c d))" "f.(a,b)" => "(. f (tuple a b))" + "f.(a=1; b=2)" => "(. f (tuple (= a 1) (parameters (= b 2))))" => + Expr(:., :f, Expr(:tuple, Expr(:parameters, Expr(:kw, :b, 2)), Expr(:kw, :a, 1))) + "(a=1).()" => "(. (= a 1) (tuple))" => Expr(:., Expr(:(=), :a, 1), Expr(:tuple)) "f. (x)" => "(. f (error-t) (tuple x))" # Other dotted syntax "A.:+" => "(. A (quote +))" @@ -301,8 +308,8 @@ tests = [ "f.x.y" => "(. (. f (quote x)) (quote y))" "x .y" => "(. x (error-t) (quote y))" # Adjoint - "f'" => "(' f)" - "f'ᵀ" => "(call-i f 'ᵀ)" + "f'" => "(call-post f ')" + "f'ᵀ" => "(call-post f 'ᵀ)" # Curly calls "@S{a,b}" => "(macrocall @S (braces a b))" "S{a,b}" => "(curly S a b)" @@ -322,6 +329,7 @@ tests = [ "x\"s\"in" => """(macrocall @x_str (string-r "s") "in")""" "x\"s\"2" => """(macrocall @x_str (string-r "s") 2)""" "x\"s\"10.0" => """(macrocall @x_str (string-r "s") 10.0)""" + # ], JuliaSyntax.parse_resword => [ # In normal_context From 091295649f73d83a0e91dd48f02a6fc41b895699 Mon Sep 17 00:00:00 2001 From: c42f Date: Fri, 21 Oct 2022 20:04:43 +1000 Subject: [PATCH 0520/1109] Bug fix for macro call square bracket whitespace (JuliaLang/JuliaSyntax.jl#125) Ensure things like `@S[a,b]` where there's no whitespace between the `@S` and opening `[` are parsed correctly. --- JuliaSyntax/src/parser.jl | 43 +++++++++++++++++++++++--------------- JuliaSyntax/test/parser.jl | 18 +++++++++------- 2 files changed, 37 insertions(+), 24 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index fa7e8f6eee087..39512ac47e8e3 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -1449,33 +1449,42 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) end elseif k == K"[" if is_macrocall - # a().@x[1] ==> (macrocall (ref (error (. (call a) (quote x))) 1)) + # a().@x[1] ==> (macrocall (error (. (call a) (quote x))) (vect 1)) finish_macroname(ps, mark, valid_macroname, macro_name_position) end + m = position(ps) # a [i] ==> (ref a (error-t) i) bump_disallowed_space(ps) bump(ps, TRIVIA_FLAG) ckind, cflags = parse_cat(ParseState(ps, end_symbol=true), K"]", ps.end_symbol) - # a[i] ==> (ref a i) - # a[i,j] ==> (ref a i j) - # (a=1)[] ==> (ref (= a 1)) - # T[x y] ==> (typed_hcat T x y) - # T[x ; y] ==> (typed_vcat T x y) - # T[a b; c d] ==> (typed_vcat T (row a b) (row c d)) - # T[x for x in xs] ==> (typed_comprehension T (generator x (= x xs))) - #v1.8: T[a ; b ;; c ; d] ==> (typed_ncat-2 T (nrow-1 a b) (nrow-1 c d)) - outk = ckind == K"vect" ? K"ref" : - ckind == K"hcat" ? K"typed_hcat" : - ckind == K"vcat" ? K"typed_vcat" : - ckind == K"comprehension" ? K"typed_comprehension" : - ckind == K"ncat" ? K"typed_ncat" : - internal_error("unrecognized kind in parse_cat ", ckind) - emit(ps, mark, outk, cflags) - check_ncat_compat(ps, mark, ckind) if is_macrocall + # @S[a,b] ==> (macrocall @S (vect a b)) + # @S[a b] ==> (macrocall @S (hcat a b)) + # @S[a; b] ==> (macrocall @S (vcat a b)) + #v1.7: @S[a ;; b] ==> (macrocall @S (ncat-2 a b)) + #v1.6: @S[a ;; b] ==> (macrocall @S (error (ncat-2 a b))) + emit(ps, m, ckind, cflags) + check_ncat_compat(ps, m, ckind) emit(ps, mark, K"macrocall") break + else + # a[i] ==> (ref a i) + # a[i,j] ==> (ref a i j) + # (a=1)[] ==> (ref (= a 1)) + # T[x y] ==> (typed_hcat T x y) + # T[x ; y] ==> (typed_vcat T x y) + # T[a b; c d] ==> (typed_vcat T (row a b) (row c d)) + # T[x for x in xs] ==> (typed_comprehension T (generator x (= x xs))) + #v1.8: T[a ; b ;; c ; d] ==> (typed_ncat-2 T (nrow-1 a b) (nrow-1 c d)) + outk = ckind == K"vect" ? K"ref" : + ckind == K"hcat" ? K"typed_hcat" : + ckind == K"vcat" ? K"typed_vcat" : + ckind == K"comprehension" ? K"typed_comprehension" : + ckind == K"ncat" ? K"typed_ncat" : + internal_error("unrecognized kind in parse_cat ", ckind) + emit(ps, mark, outk, cflags) + check_ncat_compat(ps, mark, ckind) end elseif k == K"." # x .y ==> (. x (error-t) (quote y)) diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 1a050dadeaccf..b259915f691f3 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -282,8 +282,17 @@ tests = [ "a().@x(y)" => "(macrocall (error (. (call a) (quote x))) y)" "a().@x y" => "(macrocall (error (. (call a) (quote x))) y)" "a().@x{y}" => "(macrocall (error (. (call a) (quote x))) (braces y))" - # array indexing, typed comprehension, etc - "a().@x[1]" => "(macrocall (ref (error (. (call a) (quote x))) 1))" + # square brackets + "a().@x[1]" => "(macrocall (error (. (call a) (quote x))) (vect 1))" + "@S[a,b]" => "(macrocall @S (vect a b))" => + Expr(:macrocall, Symbol("@S"), LineNumberNode(1), Expr(:vect, :a, :b)) + "@S[a b]" => "(macrocall @S (hcat a b))" => + Expr(:macrocall, Symbol("@S"), LineNumberNode(1), Expr(:hcat, :a, :b)) + "@S[a; b]" => "(macrocall @S (vcat a b))" => + Expr(:macrocall, Symbol("@S"), LineNumberNode(1), Expr(:vcat, :a, :b)) + ((v=v"1.7",), "@S[a ;; b]") => "(macrocall @S (ncat-2 a b))" => + Expr(:macrocall, Symbol("@S"), LineNumberNode(1), Expr(:ncat, 2, :a, :b)) + ((v=v"1.6",), "@S[a ;; b]") => "(macrocall @S (error (ncat-2 a b)))" "a[i]" => "(ref a i)" "a [i]" => "(ref a (error-t) i)" "a[i,j]" => "(ref a i j)" @@ -805,11 +814,6 @@ broken_tests = [ # Invalid numeric literals, not juxtaposition "0b12" => "(error \"0b12\")" "0xex" => "(error \"0xex\")" - # Square brackets without space in macrocall - "@S[a,b]" => "(macrocall S (vect a b))" - "@S[a b]" => "(macrocall S (hcat a b))" - "@S[a; b]" => "(macrocall S (vcat a b))" - "@S[a; b ;; c; d]" => "(macrocall S (ncat-2 (nrow-1 a b) (nrow-1 c d)))" # Bad character literals "'\\xff'" => "(error '\\xff')" "'\\x80'" => "(error '\\x80')" From 86ea921ec7a88d128b9485b98ef53dcc22a8814f Mon Sep 17 00:00:00 2001 From: c42f Date: Sat, 22 Oct 2022 08:02:10 +1000 Subject: [PATCH 0521/1109] Always emit a block for `let` binding lists (JuliaLang/JuliaSyntax.jl#126) This moves the unnecessary special cases for lists of bindings in `let` out of the parser and into Expr lowering. --- JuliaSyntax/src/expr.jl | 10 ++++++++++ JuliaSyntax/src/parser.jl | 17 +++++++---------- JuliaSyntax/test/parser.jl | 18 +++++++++--------- 3 files changed, 26 insertions(+), 19 deletions(-) diff --git a/JuliaSyntax/src/expr.jl b/JuliaSyntax/src/expr.jl index 4d536ef2d73ba..8f13b5fadeadf 100644 --- a/JuliaSyntax/src/expr.jl +++ b/JuliaSyntax/src/expr.jl @@ -243,6 +243,16 @@ function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, elseif headsym == :char @check length(args) == 1 return args[1] + elseif headsym == :let + @check Meta.isexpr(args[1], :block) + a1 = args[1].args + # Ugly logic to strip the Expr(:block) in certian cases for compatibility + if length(a1) == 1 + a = a1[1] + if a isa Symbol || Meta.isexpr(a, (:(=), :(::))) + args[1] = a + end + end end return Expr(headsym, args...) end diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 39512ac47e8e3..9d5318385413c 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -1696,19 +1696,16 @@ function parse_resword(ps::ParseState) elseif word == K"let" bump(ps, TRIVIA_FLAG) if peek(ps) ∉ KSet"NewlineWs ;" - # let x=1\n end ==> (let (= x 1) (block)) + # let x=1\n end ==> (let (block (= x 1)) (block)) + # let x=1 ; end ==> (let (block (= x 1)) (block)) m = position(ps) n_subexprs = parse_comma_separated(ps, parse_eq_star) kb = peek_behind(ps).kind - # Wart: This ugly logic seems unfortunate. Why not always emit a block? - # let x=1 ; end ==> (let (= x 1) (block)) - # let x::1 ; end ==> (let (:: x 1) (block)) - # let x ; end ==> (let x (block)) - if n_subexprs > 1 || !(kb in KSet"Identifier = ::") - # let x=1,y=2 ; end ==> (let (block (= x 1) (= y 2) (block))) - # let x+=1 ; end ==> (let (block (+= x 1)) (block)) - emit(ps, m, K"block") - end + # let x::1 ; end ==> (let (block (:: x 1)) (block)) + # let x ; end ==> (let (block x) (block)) + # let x=1,y=2 ; end ==> (let (block (= x 1) (= y 2) (block))) + # let x+=1 ; end ==> (let (block (+= x 1)) (block)) + emit(ps, m, K"block") else # let end ==> (let (block) (block)) # let ; end ==> (let (block) (block)) diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index b259915f691f3..553da01bfc8c2 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -357,15 +357,15 @@ tests = [ "for x in xs end" => "(for (= x xs) (block))" "for x in xs, y in ys \n a \n end" => "(for (block (= x xs) (= y ys)) (block a))" # let - "let x=1\n end" => "(let (= x 1) (block))" - "let x ; end" => "(let x (block))" - "let x=1 ; end" => "(let (= x 1) (block))" - "let x::1 ; end" => "(let (:: x 1) (block))" - "let x=1,y=2 end" => "(let (block (= x 1) (= y 2)) (block))" - "let x+=1 ; end" => "(let (block (+= x 1)) (block))" - "let ; end" => "(let (block) (block))" - "let ; body end" => "(let (block) (block body))" - "let\na\nb\nend" => "(let (block) (block a b))" + "let x=1\n end" => "(let (block (= x 1)) (block))" => Expr(:let, Expr(:(=), :x, 1), Expr(:block)) + "let x=1 ; end" => "(let (block (= x 1)) (block))" => Expr(:let, Expr(:(=), :x, 1), Expr(:block)) + "let x ; end" => "(let (block x) (block))" => Expr(:let, :x, Expr(:block)) + "let x::1 ; end" => "(let (block (:: x 1)) (block))" => Expr(:let, Expr(:(::), :x, 1), Expr(:block)) + "let x=1,y=2 end" => "(let (block (= x 1) (= y 2)) (block))" => Expr(:let, Expr(:block, Expr(:(=), :x, 1), Expr(:(=), :y, 2)), Expr(:block)) + "let x+=1 ; end" => "(let (block (+= x 1)) (block))" => Expr(:let, Expr(:block, Expr(:+=, :x, 1)), Expr(:block)) + "let ; end" => "(let (block) (block))" => Expr(:let, Expr(:block), Expr(:block)) + "let ; body end" => "(let (block) (block body))" => Expr(:let, Expr(:block), Expr(:block, :body)) + "let\na\nb\nend" => "(let (block) (block a b))" => Expr(:let, Expr(:block), Expr(:block, :a, :b)) # abstract type "abstract type A end" => "(abstract A)" "abstract type A ; end" => "(abstract A)" From d84ec85c5d6ab30d7a1a9e022c7659f1363e9036 Mon Sep 17 00:00:00 2001 From: c42f Date: Wed, 26 Oct 2022 13:05:45 +1000 Subject: [PATCH 0522/1109] Fix const struct field errors + cleanup `global const` AST (JuliaLang/JuliaSyntax.jl#130) Here I've replicated the fix from JuliaLang/julia#45024 so that `const x` (ie, without an assignment) is only valid within a `struct` and is otherwise an error. Also avoid lowering the syntax `global const` into `const global` within the parser; do this in `Expr` conversion instead. This more closely reflects the structure of the source, allowing trivia attachment to be more natural. --- JuliaSyntax/src/expr.jl | 6 ++ JuliaSyntax/src/parser.jl | 134 ++++++++++++++++++++----------------- JuliaSyntax/test/parser.jl | 45 ++++++------- 3 files changed, 99 insertions(+), 86 deletions(-) diff --git a/JuliaSyntax/src/expr.jl b/JuliaSyntax/src/expr.jl index 8f13b5fadeadf..ed112a0a84537 100644 --- a/JuliaSyntax/src/expr.jl +++ b/JuliaSyntax/src/expr.jl @@ -253,6 +253,12 @@ function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, args[1] = a end end + elseif headsym == :local || headsym == :global + if length(args) == 1 && Meta.isexpr(args[1], :const) + # Normalize `local const` to `const local` + args[1] = Expr(headsym, args[1].args...) + headsym = :const + end end return Expr(headsym, args...) end diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 9d5318385413c..5a531578b43f2 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -1647,6 +1647,23 @@ function parse_subtype_spec(ps::ParseState) parse_comparison(ps, true) end +# flisp: parse-struct-field +function parse_struct_field(ps::ParseState) + mark = position(ps) + const_field = peek(ps) == K"const" + if const_field + bump(ps, TRIVIA_FLAG) + end + parse_eq(ps) + if const_field + # Const fields https://github.com/JuliaLang/julia/pull/43305 + #v1.8: struct A const a end ==> (struct false A (block (const x))) + #v1.7: struct A const a end ==> (struct false A (block (error (const x)))) + emit(ps, mark, K"const") + min_supported_version(v"1.8", ps, mark, "`const` struct field") + end +end + # parse expressions or blocks introduced by syntactic reserved words. # # The caller should use peek_initial_reserved_words to determine whether @@ -1727,8 +1744,47 @@ function parse_resword(ps::ParseState) emit(ps, mark, K"let") elseif word == K"if" parse_if_elseif(ps) - elseif word in KSet"const global local" - parse_const_local_global(ps) + elseif word in KSet"global local" + # global x ==> (global x) + # local x ==> (local x) + # global x,y ==> (global x y) + bump(ps, TRIVIA_FLAG) + const_mark = nothing + if peek(ps) == K"const" + const_mark = position(ps) + bump(ps, TRIVIA_FLAG) + end + had_assignment = parse_global_local_const_vars(ps) + if !isnothing(const_mark) + # global const x = 1 ==> (global (const (= x 1))) + # local const x = 1 ==> (local (const (= x 1))) + emit(ps, const_mark, K"const") + if !had_assignment + # global const x ==> (global (error (const x))) + emit(ps, mark, K"error", error="expected assignment after `const`") + end + end + emit(ps, mark, word) + elseif word == K"const" + # const x = 1 ==> (const (= x 1)) + bump(ps, TRIVIA_FLAG) + scope_mark = nothing + scope_k = peek(ps) + if scope_k in KSet"local global" + scope_mark = position(ps) + bump(ps, TRIVIA_FLAG) + end + had_assignment = parse_global_local_const_vars(ps) + if !isnothing(scope_mark) + # const global x = 1 ==> (const (global (= x 1))) + # const local x = 1 ==> (const (local (= x 1))) + emit(ps, scope_mark, scope_k) + end + emit(ps, mark, K"const") + if !had_assignment + # const x .= 1 ==> (error (const (.= x 1))) + emit(ps, mark, K"error", error="expected assignment after `const`") + end elseif word in KSet"function macro" parse_function(ps) elseif word == K"abstract" @@ -1749,6 +1805,9 @@ function parse_resword(ps::ParseState) emit(ps, mark, K"abstract") elseif word in KSet"struct mutable" # struct A <: B \n a::X \n end ==> (struct false (<: A B) (block (:: a X))) + # struct A \n a \n b \n end ==> (struct false A (block a b)) + #v1.7: struct A const a end ==> (struct false A (block (error (const a)))) + #v1.8: struct A const a end ==> (struct false A (block (const a))) if word == K"mutable" # mutable struct A end ==> (struct true A (block)) bump(ps, TRIVIA_FLAG) @@ -1760,7 +1819,7 @@ function parse_resword(ps::ParseState) @check peek(ps) == K"struct" bump(ps, TRIVIA_FLAG) parse_subtype_spec(ps) - parse_block(ps) + parse_block(ps, parse_struct_field) bump_closing_token(ps, K"end") emit(ps, mark, K"struct") elseif word == K"primitive" @@ -1888,75 +1947,24 @@ function parse_if_elseif(ps, is_elseif=false, is_elseif_whitespace_err=false) emit(ps, mark, word) end -function parse_const_local_global(ps) +# Like parse_assignment, but specialized so that we can omit the +# tuple when there's commas but no assignment. +function parse_global_local_const_vars(ps) mark = position(ps) - scope_mark = mark - has_const = false - scope_k = K"None" - k = peek(ps) - if k in KSet"global local" - # global x ==> (global x) - # local x ==> (local x) - scope_k = k - bump(ps, TRIVIA_FLAG) - if peek(ps) == K"const" - # global const x = 1 ==> (const (global (= x 1))) - # local const x = 1 ==> (const (local (= x 1))) - has_const = true - bump(ps, TRIVIA_FLAG) - end - else - has_const = true - # const x = 1 ==> (const (= x 1)) - bump(ps, TRIVIA_FLAG) - k = peek(ps) - if k in KSet"global local" - # const global x = 1 ==> (const (global (= x 1))) - # const local x = 1 ==> (const (local (= x 1))) - scope_k = k - scope_mark = position(ps) - bump(ps, TRIVIA_FLAG) - end - end - # Like parse_assignment, but specialized so that we can omit the - # tuple when there's commas but no assignment. - beforevar_mark = position(ps) n_commas = parse_comma(ps, false) t = peek_token(ps) - has_assignment = is_prec_assignment(t) - if n_commas >= 1 && (has_assignment || has_const) + assign_prec = is_prec_assignment(t) + if n_commas >= 1 && assign_prec # const x,y = 1,2 ==> (const (= (tuple x y) (tuple 1 2))) - # Maybe nonsensical? But this is what the flisp parser does. - #v1.8: const x,y ==> (const (tuple x y)) - emit(ps, beforevar_mark, K"tuple") + emit(ps, mark, K"tuple") end - if has_assignment + if assign_prec # const x = 1 ==> (const (= x 1)) # global x ~ 1 ==> (global (call-i x ~ 1)) # global x += 1 ==> (global (+= x 1)) - parse_assignment_with_initial_ex(ps, beforevar_mark, parse_comma) - else - # global x ==> (global x) - # local x ==> (local x) - # global x,y ==> (global x y) - end - if has_const && (!has_assignment || is_dotted(t)) - # Const fields https://github.com/JuliaLang/julia/pull/43305 - #v1.8: const x ==> (const x) - #v1.8: const x::T ==> (const (:: x T)) - # Disallowed const forms on <= 1.7 - #v1.7: const x ==> (const (error x)) - #v1.7: const x .= 1 ==> (const (error (.= x 1))) - min_supported_version(v"1.8", ps, beforevar_mark, - "`const` struct field without assignment") - end - if scope_k != K"None" - emit(ps, scope_mark, scope_k) - end - if has_const - # TODO: Normalize `global const` during Expr conversion rather than here? - emit(ps, mark, K"const") + parse_assignment_with_initial_ex(ps, mark, parse_comma) end + return kind(t) == K"=" && !is_dotted(t) end # Parse function and macro definitions diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 553da01bfc8c2..992ade3b8c6c4 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -379,9 +379,12 @@ tests = [ "primitive type A \$N end" => "(primitive A (\$ N))" "primitive type A <: B \n 8 \n end" => "(primitive (<: A B) 8)" # struct - "struct A <: B \n a::X \n end" => "(struct false (<: A B) (block (:: a X)))" - "mutable struct A end" => "(struct true A (block))" - "struct A end" => "(struct false A (block))" + "struct A <: B \n a::X \n end" => "(struct false (<: A B) (block (:: a X)))" => Expr(:struct, false, Expr(:<:, :A, :B), Expr(:block, Expr(:(::), :a, :X))) + "struct A \n a \n b \n end" => "(struct false A (block a b))" => Expr(:struct, false, :A, Expr(:block, :a, :b)) + "mutable struct A end" => "(struct true A (block))" + ((v=v"1.8",), "struct A const a end") => "(struct false A (block (const a)))" => Expr(:struct, false, :A, Expr(:block, Expr(:const, :a))) + ((v=v"1.7",), "struct A const a end") => "(struct false A (block (error (const a))))" + "struct A end" => "(struct false A (block))" => Expr(:struct, false, :A, Expr(:block)) "struct try end" => "(struct false (error (try)) (block))" # return "return\nx" => "(return nothing)" @@ -424,26 +427,22 @@ tests = [ "if true; x ? true\nend" => "(if true (block (if x true (error-t) (error-t))))" "if true; x ? true : elseif true end" => "(if true (block (if x true (error-t))) (elseif true (block)))" ], - JuliaSyntax.parse_const_local_global => [ - "global x" => "(global x)" - "local x" => "(local x)" - "global const x = 1" => "(const (global (= x 1)))" - "local const x = 1" => "(const (local (= x 1)))" - "const x = 1" => "(const (= x 1))" - "const global x = 1" => "(const (global (= x 1)))" - "const local x = 1" => "(const (local (= x 1)))" - "const x,y = 1,2" => "(const (= (tuple x y) (tuple 1 2)))" - ((v=v"1.8",), "const x,y") => "(const (tuple x y))" - "const x = 1" => "(const (= x 1))" - "global x ~ 1" => "(global (call-i x ~ 1))" - "global x += 1" => "(global (+= x 1))" - "global x" => "(global x)" - "local x" => "(local x)" - "global x,y" => "(global x y)" - ((v=v"1.8",), "const x") => "(const x)" - ((v=v"1.8",), "const x::T") => "(const (:: x T))" - ((v=v"1.7",), "const x") => "(const (error x))" - ((v=v"1.7",), "const x .= 1") => "(const (error (.= x 1)))" + JuliaSyntax.parse_resword => [ + "global x" => "(global x)" => Expr(:global, :x) + "local x" => "(local x)" => Expr(:local, :x) + "global x,y" => "(global x y)" => Expr(:global, :x, :y) + "global const x = 1" => "(global (const (= x 1)))" => Expr(:const, Expr(:global, Expr(:(=), :x, 1))) + "local const x = 1" => "(local (const (= x 1)))" => Expr(:const, Expr(:local, Expr(:(=), :x, 1))) + "const global x = 1" => "(const (global (= x 1)))" => Expr(:const, Expr(:global, Expr(:(=), :x, 1))) + "const local x = 1" => "(const (local (= x 1)))" => Expr(:const, Expr(:local, Expr(:(=), :x, 1))) + "const x,y = 1,2" => "(const (= (tuple x y) (tuple 1 2)))" => Expr(:const, Expr(:(=), Expr(:tuple, :x, :y), Expr(:tuple, 1, 2))) + "const x = 1" => "(const (= x 1))" => Expr(:const, Expr(:(=), :x, 1)) + "const x .= 1" => "(error (const (.= x 1)))" + "global x ~ 1" => "(global (call-i x ~ 1))" => Expr(:global, Expr(:call, :~, :x, 1)) + "global x += 1" => "(global (+= x 1))" => Expr(:global, Expr(:+=, :x, 1)) + "const x" => "(error (const x))" + "global const x" => "(global (error (const x)))" + "const global x" => "(error (const (global x)))" ], JuliaSyntax.parse_function => [ "macro while(ex) end" => "(macro (call (error while) ex) (block))" From 78f54e6a6f071e4226bdf97cbe66175be694a3fd Mon Sep 17 00:00:00 2001 From: c42f Date: Sat, 29 Oct 2022 11:19:14 +1000 Subject: [PATCH 0523/1109] Permit parens in function call signatures (JuliaLang/JuliaSyntax.jl#131) This permits the extra parentheses in things like function (funcname(some, long, argument, list) where {Type,Params}) body end This syntax "works" in the reference parser and has been seen in the wild so we need to support it for compatibility. (However, the precedence of `where` and `::` is broken when used inside the parens which suggests this is more of a syntactic aberration rather than an intentional feature. Perhaps we can warn in future.) --- JuliaSyntax/src/kinds.jl | 1 + JuliaSyntax/src/parse_stream.jl | 40 ++++++++++++ JuliaSyntax/src/parser.jl | 103 ++++++++++++++++++++----------- JuliaSyntax/test/parse_stream.jl | 54 ++++++++++++---- JuliaSyntax/test/parser.jl | 5 +- JuliaSyntax/test/test_utils.jl | 38 ++++++++++++ 6 files changed, 193 insertions(+), 48 deletions(-) diff --git a/JuliaSyntax/src/kinds.jl b/JuliaSyntax/src/kinds.jl index 26ab76a90400c..4f403e6e2e3f6 100644 --- a/JuliaSyntax/src/kinds.jl +++ b/JuliaSyntax/src/kinds.jl @@ -880,6 +880,7 @@ const _kind_names = "tuple" "ref" "vect" + "parens" # Concatenation syntax "braces" "bracescat" diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index ec50ef766690d..80dfed640f101 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -528,6 +528,46 @@ function peek_behind(stream::ParseStream, pos::ParseStreamPosition) end end +function first_child_position(stream::ParseStream, pos::ParseStreamPosition) + # Find the first nontrivia range which is a child of this range but not a + # child of the child + c = 0 + @assert pos.range_index > 0 + parent = stream.ranges[pos.range_index] + i = pos.range_index-1 + while i >= 1 + if stream.ranges[i].first_token >= parent.first_token && + (c == 0 || stream.ranges[i].first_token < stream.ranges[c].first_token) && + !is_trivia(stream.ranges[i]) + c = i + end + i -= 1 + end + + # Find first nontrivia token + t = 0 + for i = parent.first_token:parent.last_token + if !is_trivia(stream.tokens[i]) + t = i + break + end + end + + if c != 0 + if t != 0 + if stream.ranges[c].first_token > t + return ParseStreamPosition(t, c-1) + else + return ParseStreamPosition(stream.ranges[c].last_token, c) + end + else + return ParseStreamPosition(stream.ranges[c].last_token, c) + end + else + return ParseStreamPosition(t, c) + end +end + function peek_behind(stream::ParseStream; skip_trivia::Bool=true) pos = position(stream) if !skip_trivia || !token_is_last(stream, pos) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 5a531578b43f2..caab4221074d9 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -306,6 +306,22 @@ function is_valid_identifier(k) !(is_syntactic_operator(k) || k in KSet"? .'") end +# The expression is a call after stripping `where` and `::` +function was_eventually_call(ps::ParseState) + stream = ps.stream + p = position(ps) + while true + kb = peek_behind(stream, p).kind + if kb == K"call" + return true + elseif kb == K"where" || kb == K"::" + p = first_child_position(ps.stream, p) + else + return false + end + end +end + #------------------------------------------------------------------------------- # Parser # @@ -1786,7 +1802,26 @@ function parse_resword(ps::ParseState) emit(ps, mark, K"error", error="expected assignment after `const`") end elseif word in KSet"function macro" - parse_function(ps) + bump(ps, TRIVIA_FLAG) + bump_trivia(ps) + has_body = parse_function_signature(ps, word == K"function") + if has_body + # The function body + # function f() \n a \n b end ==> (function (call f) (block a b)) + # function f() end ==> (function (call f) (block)) + parse_block(ps) + bump_closing_token(ps, K"end") + emit(ps, mark, word) + else + # Function/macro definition with no methods + # function f end ==> (function f) + # (function f \n end) ==> (function f) + # function f \n\n end ==> (function f) + # function $f end ==> (function ($ f)) + # macro f end ==> (macro f) + bump(ps, TRIVIA_FLAG, skip_newlines=true) + emit(ps, mark, word) + end elseif word == K"abstract" # Abstract type definitions # abstract type A end ==> (abstract A) @@ -1968,23 +2003,17 @@ function parse_global_local_const_vars(ps) end # Parse function and macro definitions -function parse_function(ps::ParseState) - mark = position(ps) - word = peek(ps) - @check word in KSet"macro function" - is_function = word == K"function" +function parse_function_signature(ps::ParseState, is_function::Bool) is_anon_func = false - bump(ps, TRIVIA_FLAG) - bump_trivia(ps) - def_mark = position(ps) + mark = position(ps) if !is_function # Parse macro name parse_identifier_or_interpolate(ps) kb = peek_behind(ps).orig_kind if is_initial_reserved_word(ps, kb) # macro while(ex) end ==> (macro (call (error while) ex) (block)) - emit(ps, def_mark, K"error", error="Invalid macro name") + emit(ps, mark, K"error", error="Invalid macro name") else # macro f() end ==> (macro (call f) (block)) # macro (:)(ex) end ==> (macro (call : ex) (block)) @@ -1997,6 +2026,7 @@ function parse_function(ps::ParseState) # When an initial parenthesis is present, we might either have # * the function name in parens, followed by (args...) # * an anonymous function argument list in parens + # * the whole function declaration in parens # # This should somewhat parse as in parse_paren() (this is what # the flisp parser does), but that results in weird parsing of @@ -2005,22 +2035,36 @@ function parse_function(ps::ParseState) bump(ps, TRIVIA_FLAG) is_empty_tuple = peek(ps, skip_newlines=true) == K")" opts = parse_brackets(ps, K")") do _, _, _, _ - _is_anon_func = peek(ps, 2) != K"(" + _parsed_call = was_eventually_call(ps) + _is_anon_func = peek(ps, 2) != K"(" && !_parsed_call return (needs_parameters = _is_anon_func, - is_anon_func = _is_anon_func) + is_anon_func = _is_anon_func, + parsed_call = _parsed_call) end is_anon_func = opts.is_anon_func + if opts.parsed_call + # Compat: Ugly case where extra parentheses existed and we've + # already parsed the whole signature. + # function (f() where T) end ==> (function (where (call f) T) (block)) + # function (f()::S) end ==> (function (:: (call f) S) (block)) + # + # TODO: Warn for use of parens? The precedence of `::` and + # `where` don't work inside parens so this is a bit of a syntax + # oddity/aberration. + return true + end if is_anon_func # function (x) body end ==> (function (tuple x) (block body)) + # function (x::f()) end ==> (function (tuple (:: x (call f))) (block)) # function (x,y) end ==> (function (tuple x y) (block)) # function (x=1) end ==> (function (tuple (= x 1)) (block)) # function (;x=1) end ==> (function (tuple (parameters (= x 1))) (block)) - emit(ps, def_mark, K"tuple") + emit(ps, mark, K"tuple") elseif is_empty_tuple # Weird case which is consistent with parse_paren but will be # rejected in lowering # function ()(x) end ==> (function (call (tuple) x) (block)) - emit(ps, def_mark, K"tuple") + emit(ps, mark, K"tuple") else # function (:)() end ==> (function (call :) (block)) # function (x::T)() end ==> (function (call (:: x T)) (block)) @@ -2033,7 +2077,7 @@ function parse_function(ps::ParseState) kb = peek_behind(ps).orig_kind if is_reserved_word(kb) # function begin() end ==> (function (call (error begin)) (block)) - emit(ps, def_mark, K"error", error="Invalid function name") + emit(ps, mark, K"error", error="Invalid function name") else # function f() end ==> (function (call f) (block)) # function type() end ==> (function (call type) (block)) @@ -2045,26 +2089,18 @@ function parse_function(ps::ParseState) end end if peek(ps, skip_newlines=true) == K"end" && !is_anon_func - # Function/macro definition with no methods - # function f end ==> (function f) - # (function f \n end) ==> (function f) - # function f \n\n end ==> (function f) - # function $f end ==> (function ($ f)) - # macro f end ==> (macro f) - bump(ps, TRIVIA_FLAG, skip_newlines=true) - emit(ps, mark, word) - return + return false end if !is_anon_func # Parse function argument list # function f(x,y) end ==> (function (call f x y) (block)) # function f{T}() end ==> (function (call (curly f T)) (block)) # function A.f() end ==> (function (call (. A (quote f))) (block)) - parse_call_chain(ps, def_mark) + parse_call_chain(ps, mark) if peek_behind(ps).kind != K"call" # function f body end ==> (function (error f) (block body)) - emit(ps, def_mark, K"error", - error="Invalid signature in $(untokenize(word)) definition") + emit(ps, mark, K"error", + error="Invalid signature in $(is_function ? "function" : "macro") definition") end end if is_function && peek(ps) == K"::" @@ -2073,21 +2109,16 @@ function parse_function(ps::ParseState) # function f()::g(T) end ==> (function (:: (call f) (call g T)) (block)) bump(ps, TRIVIA_FLAG) parse_call(ps) - emit(ps, def_mark, K"::") + emit(ps, mark, K"::") end if peek(ps) == K"where" # Function signature where syntax # function f() where {T} end ==> (function (where (call f) T) (block)) # function f() where T end ==> (function (where (call f) T) (block)) - parse_where_chain(ps, def_mark) + parse_where_chain(ps, mark) end - - # The function body - # function f() \n a \n b end ==> (function (call f) (block a b)) - # function f() end ==> (function (call f) (block)) - parse_block(ps) - bump_closing_token(ps, K"end") - emit(ps, mark, word) + # function f()::S where T end ==> (function (where (:: (call f) S) T) (block)) + return true end # Parse a try block diff --git a/JuliaSyntax/test/parse_stream.jl b/JuliaSyntax/test/parse_stream.jl index 315b59c81eb9f..037e025e3fca0 100644 --- a/JuliaSyntax/test/parse_stream.jl +++ b/JuliaSyntax/test/parse_stream.jl @@ -6,21 +6,22 @@ using JuliaSyntax: ParseStream, peek, peek_token, bump, bump_trivia, bump_invisible, - emit, emit_diagnostic, TRIVIA_FLAG, INFIX_FLAG - -code = """ -for i = 1:10 - xx[i] + 2 - # hi - yy -end -""" - -st = ParseStream(code) + emit, emit_diagnostic, TRIVIA_FLAG, INFIX_FLAG, + ParseStreamPosition, first_child_position # Here we manually issue parse events in the order the Julia parser would issue # them @testset "ParseStream" begin + code = """ + for i = 1:10 + xx[i] + 2 + # hi + yy + end + """ + + st = ParseStream(code) + p1 = position(st) @test peek(st) == K"for" bump(st, TRIVIA_FLAG) @@ -102,3 +103,34 @@ end end end end + +@testset "ParseStream tree traversal" begin + # NB: ParseStreamPosition.token_index includes an initial sentinel token so + # indices here are one more than "might be expected". + st = parse_sexpr("((a b) c)") + child1_pos = first_child_position(st, position(st)) + @test child1_pos == ParseStreamPosition(7, 1) + child2_pos = first_child_position(st, child1_pos) + @test child2_pos == ParseStreamPosition(4, 0) + + st = parse_sexpr("( (a b) c)") + child1_pos = first_child_position(st, position(st)) + @test child1_pos == ParseStreamPosition(8, 1) + child2_pos = first_child_position(st, child1_pos) + @test child2_pos == ParseStreamPosition(5, 0) + + st = parse_sexpr("(a (b c))") + @test first_child_position(st, position(st)) == ParseStreamPosition(3, 0) + + st = parse_sexpr("( a (b c))") + @test first_child_position(st, position(st)) == ParseStreamPosition(4, 0) + + st = parse_sexpr("a (b c)") + @test first_child_position(st, position(st)) == ParseStreamPosition(5, 0) + + st = parse_sexpr("(a) (b c)") + @test first_child_position(st, position(st)) == ParseStreamPosition(7, 0) + + st = parse_sexpr("(() ())") + @test first_child_position(st, position(st)) == ParseStreamPosition(4, 1) +end diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 992ade3b8c6c4..25c5e182cdffe 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -444,13 +444,16 @@ tests = [ "global const x" => "(global (error (const x)))" "const global x" => "(error (const (global x)))" ], - JuliaSyntax.parse_function => [ + JuliaSyntax.parse_resword => [ + # Macros and functions "macro while(ex) end" => "(macro (call (error while) ex) (block))" "macro f() end" => "(macro (call f) (block))" "macro (:)(ex) end" => "(macro (call : ex) (block))" "macro (type)(ex) end" => "(macro (call type ex) (block))" "macro \$f() end" => "(macro (call (\$ f)) (block))" "macro (\$f)() end" => "(macro (call (\$ f)) (block))" + "function (f() where T) end" => "(function (where (call f) T) (block))" => Expr(:function, Expr(:where, Expr(:call, :f), :T), Expr(:block)) + "function (f()::S) end"=> "(function (:: (call f) S) (block))" => Expr(:function, Expr(:(::), Expr(:call, :f), :S), Expr(:block)) "function (x) body end"=> "(function (tuple x) (block body))" "function (x,y) end" => "(function (tuple x y) (block))" "function (x=1) end" => "(function (tuple (= x 1)) (block))" diff --git a/JuliaSyntax/test/test_utils.jl b/JuliaSyntax/test/test_utils.jl index a3ab3bbd434b3..273eb003889d8 100644 --- a/JuliaSyntax/test/test_utils.jl +++ b/JuliaSyntax/test/test_utils.jl @@ -259,3 +259,41 @@ function show_green_tree(code; version::VersionNumber=v"1.6") t = JuliaSyntax.parseall(GreenNode, code, version=version) sprint(show, MIME"text/plain"(), t, code) end + +#------------------------------------------------------------------------------- +# Parse s-expressions +function parse_sexpr(code) + st = ParseStream(code) + pos_stack = ParseStreamPosition[] + while true + k = peek(st) + if k == K"(" + push!(pos_stack, position(st)) + bump(st, TRIVIA_FLAG) + elseif k == K")" + if isempty(pos_stack) + bump(st, error="Mismatched `)` with no opening `(`") + break + else + bump(st, TRIVIA_FLAG) + end + emit(st, pop!(pos_stack), K"parens") + elseif k == K"Identifier" || k == K"Integer" + bump(st) + elseif k == K"NewlineWs" + bump(st, TRIVIA_FLAG) + elseif k == K"EndMarker" + if !isempty(pos_stack) + bump_invisible(st, K"error", error="Mismatched `)`") + end + break + else + bump(st, error="Unexpected token") + end + end + if JuliaSyntax.any_error(st) + throw(JuliaSyntax.ParseError(st)) + end + st +end + From 8dcab26bc60000161ff44405737a59f17003e807 Mon Sep 17 00:00:00 2001 From: c42f Date: Sun, 30 Oct 2022 16:28:41 +1000 Subject: [PATCH 0524/1109] Wrap var"" nonstandard identifiers in var nodes (JuliaLang/JuliaSyntax.jl#127) Useful to hold associated trivia (delimiter and var prefix) and indicate in a clean way that var"" was used. --- JuliaSyntax/src/expr.jl | 13 +++++--- JuliaSyntax/src/parser.jl | 65 +++++++++++++++++++++++--------------- JuliaSyntax/test/parser.jl | 44 ++++++++++++++------------ 3 files changed, 72 insertions(+), 50 deletions(-) diff --git a/JuliaSyntax/src/expr.jl b/JuliaSyntax/src/expr.jl index ed112a0a84537..9da2afa9b7990 100644 --- a/JuliaSyntax/src/expr.jl +++ b/JuliaSyntax/src/expr.jl @@ -30,7 +30,14 @@ function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, end end nodekind = kind(node) - if nodekind == K"?" + node_args = children(node) + if nodekind == K"var" + @check length(node_args) == 1 + return _to_expr(node_args[1]) + elseif nodekind == K"char" + @check length(node_args) == 1 + return _to_expr(node_args[1]) + elseif nodekind == K"?" headsym = :if elseif nodekind == K"=" && !is_decorated(node) && eq_to_kw headsym = :kw @@ -39,7 +46,6 @@ function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, headsym = !isnothing(headstr) ? Symbol(headstr) : error("Can't untokenize head of kind $(nodekind)") end - node_args = children(node) if headsym == :string || headsym == :cmdstring # Julia string literals may be interspersed with trivia in two situations: # 1. Triple quoted string indentation is trivia @@ -240,9 +246,6 @@ function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, elseif headsym == :do @check length(args) == 3 return Expr(:do, args[1], Expr(:->, args[2], args[3])) - elseif headsym == :char - @check length(args) == 1 - return args[1] elseif headsym == :let @check Meta.isexpr(args[1], :block) a1 = args[1].args diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index caab4221074d9..f1beb9134642e 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -125,6 +125,10 @@ function textbuf(ps::ParseState) textbuf(ps.stream) end +function first_child_position(ps::ParseState, pos::ParseStreamPosition) + first_child_position(ps.stream, pos) +end + #------------------------------------------------------------------------------- # Parser Utils @@ -315,7 +319,7 @@ function was_eventually_call(ps::ParseState) if kb == K"call" return true elseif kb == K"where" || kb == K"::" - p = first_child_position(ps.stream, p) + p = first_child_position(ps, p) else return false end @@ -1363,7 +1367,7 @@ function parse_identifier_or_interpolate(ps::ParseState) # export outer ==> (export outer) # export ($f) ==> (export ($ f)) ok = (b.is_leaf && (b.kind == K"Identifier" || is_operator(b.kind))) || - (!b.is_leaf && b.kind == K"$") + (!b.is_leaf && b.kind in KSet"$ var") if !ok emit(ps, mark, K"error", error="Expected identifier") end @@ -1372,10 +1376,7 @@ end function finish_macroname(ps, mark, valid_macroname, macro_name_position, name_kind=nothing) if valid_macroname - if isnothing(name_kind) - name_kind = macro_name_kind(peek_behind(ps, macro_name_position).kind) - end - reset_node!(ps, macro_name_position, kind = name_kind) + fix_macro_name_kind!(ps, macro_name_position, name_kind) else emit(ps, mark, K"error", error="not a valid macro name or macro module path") end @@ -1396,7 +1397,8 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) # source range of the @-prefixed part of a macro macro_atname_range = nothing # $A.@x ==> (macrocall (. ($ A) (quote @x))) - valid_macroname = peek_behind(ps, skip_trivia=false).kind in KSet"Identifier . $" + # A.@var"#" ==> (macrocall (. A (quote @x))) + valid_macroname = peek_behind(ps, skip_trivia=false).kind in KSet"Identifier var . $" # We record the last component of chains of dot-separated identifiers so we # know which identifier was the macro name. macro_name_position = position(ps) # points to same output span as peek_behind @@ -1411,6 +1413,8 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) # @foo (x,y) ==> (macrocall @foo (tuple x y)) # a().@x y ==> (macrocall (error (. (call a) (quote x))) y) # [@foo x] ==> (vect (macrocall @foo x)) + # @var"#" a ==> (macrocall (var @#) a) + # A.@var"#" a ==> (macrocall (. A (quote (var @#))) a) finish_macroname(ps, mark, valid_macroname, macro_name_position) let ps = with_space_sensitive(ps) # Space separated macro arguments @@ -2229,10 +2233,18 @@ function parse_do(ps::ParseState, mark) emit(ps, mark, K"do") end -function macro_name_kind(k) - return k == K"Identifier" ? K"MacroName" : - k == K"." ? K"@." : - internal_error("unrecognized source kind for macro name ", k) +function fix_macro_name_kind!(ps::ParseState, macro_name_position, name_kind=nothing) + k = peek_behind(ps, macro_name_position).kind + if k == K"var" + macro_name_position = first_child_position(ps, macro_name_position) + k = peek_behind(ps, macro_name_position).kind + end + if isnothing(name_kind) + name_kind = k == K"Identifier" ? K"MacroName" : + k == K"." ? K"@." : + internal_error("unrecognized source kind for macro name ", k) + end + reset_node!(ps, macro_name_position, kind=name_kind) end # If remap_kind is false, the kind will be remapped by parse_call_chain after @@ -2250,23 +2262,25 @@ function parse_macro_name(ps::ParseState) # @! x ==> (macrocall @! x) # @.. x ==> (macrocall @.. x) # @$ x ==> (macrocall @$ x) + # @var"#" x ==> (macrocall (var #) @$ x) let ps = with_space_sensitive(ps) parse_atom(ps, false) end end end -# Parse an identifier, interpolation of @-prefixed symbol +# Parse an identifier, interpolation or @-prefixed symbol # # flisp: parse-atsym function parse_atsym(ps::ParseState) bump_trivia(ps) if peek(ps) == K"@" - # export @a ==> (export @a) - # export a, \n @b ==> (export a @b) + # export @a ==> (export @a) + # export @var"'" ==> (export (var @')) + # export a, \n @b ==> (export a @b) bump(ps, TRIVIA_FLAG) parse_macro_name(ps) - reset_node!(ps, position(ps), kind=macro_name_kind(peek_behind(ps).kind)) + fix_macro_name_kind!(ps, position(ps)) else # export a ==> (export a) # export \n a ==> (export a) @@ -3322,12 +3336,12 @@ function parse_atom(ps::ParseState, check_identifiers=true) elseif is_keyword(leading_kind) if leading_kind == K"var" && (t = peek_token(ps,2); kind(t) == K"\"" && !preceding_whitespace(t)) - # var"x" ==> x + # var"x" ==> (var x) # Raw mode unescaping - # var"" ==> - # var"\"" ==> " - # var"\\"" ==> \" - # var"\\x" ==> \\x + # var"" ==> (var ) + # var"\"" ==> (var ") + # var"\\"" ==> (var \") + # var"\\x" ==> (var \\x) # # NB: Triple quoted var identifiers are not implemented, but with # the complex deindentation rules they seem like a misfeature @@ -3344,7 +3358,7 @@ function parse_atom(ps::ParseState, check_identifiers=true) bump(ps, TRIVIA_FLAG) else bump_invisible(ps, K"error", TRIVIA_FLAG, - error="unterminated string literal") + error="unterminated `var\"\"` identifier") end t = peek_token(ps) k = kind(t) @@ -3354,11 +3368,12 @@ function parse_atom(ps::ParseState, check_identifiers=true) # var"x") ==> x # var"x"( ==> x else - # var"x"end ==> (error (end)) - # var"x"1 ==> (error 1) - # var"x"y ==> (error y) - bump(ps, error="suffix not allowed after var\"...\" syntax") + # var"x"end ==> (var x (error-t)) + # var"x"1 ==> (var x (error-t)) + # var"x"y ==> (var x (error-t)) + bump(ps, TRIVIA_FLAG, error="suffix not allowed after var\"...\" syntax") end + emit(ps, mark, K"var") elseif check_identifiers && is_closing_token(ps, leading_kind) # :(end) ==> (quote (error end)) bump(ps, error="invalid identifier") diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 25c5e182cdffe..5a080e498e007 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -258,7 +258,9 @@ tests = [ "@foo (x,y)" => "(macrocall @foo (tuple x y))" "A.@foo a b" => "(macrocall (. A (quote @foo)) a b)" "@A.foo a b" => "(macrocall (. A (quote @foo)) a b)" - "[@foo x]" => "(vect (macrocall @foo x))" + "[@foo x]" => "(vect (macrocall @foo x))" + "@var\"#\" a" => "(macrocall (var @#) a)" => Expr(:macrocall, Symbol("@#"), LineNumberNode(1), :a) + "A.@var\"#\" a"=> "(macrocall (. A (quote (var @#))) a)" => Expr(:macrocall, Expr(:., :A, QuoteNode(Symbol("@#"))), LineNumberNode(1), :a) "[f (x)]" => "(hcat f x)" "[f x]" => "(hcat f x)" # Macro names @@ -402,15 +404,16 @@ tests = [ "module A \n a \n b \n end" => "(module true A (block a b))" """module A \n "x"\na\n end""" => """(module true A (block (macrocall :(Core.var"@doc") (string "x") a)))""" # export - "export a" => "(export a)" - "export @a" => "(export @a)" - "export a, \n @b" => "(export a @b)" - "export +, ==" => "(export + ==)" - "export \n a" => "(export a)" - "export \$a, \$(a*b)" => "(export (\$ a) (\$ (call-i a * b)))" + "export a" => "(export a)" => Expr(:export, :a) + "export @a" => "(export @a)" => Expr(:export, Symbol("@a")) + "export @var\"'\"" => "(export (var @'))" => Expr(:export, Symbol("@'")) + "export a, \n @b" => "(export a @b)" => Expr(:export, :a, Symbol("@b")) + "export +, ==" => "(export + ==)" => Expr(:export, :+, :(==)) + "export \n a" => "(export a)" => Expr(:export, :a) + "export \$a, \$(a*b)" => "(export (\$ a) (\$ (call-i a * b)))" => Expr(:export, Expr(:$, :a), Expr(:$, Expr(:call, :*, :a, :b))) "export (x::T)" => "(export (error (:: x T)))" - "export outer" => "(export outer)" - "export (\$f)" => "(export (\$ f))" + "export outer" => "(export outer)" => Expr(:export, :outer) + "export (\$f)" => "(export (\$ f))" => Expr(:export, Expr(:$, :f)) ], JuliaSyntax.parse_if_elseif => [ "if a xx elseif b yy else zz end" => "(if a (block xx) (elseif b (block yy) (block zz)))" @@ -612,18 +615,19 @@ tests = [ "xx" => "xx" "x₁" => "x₁" # var syntax - """var"x" """ => "x" - """var"x"+""" => "x" - """var"x")""" => "x" - """var"x"(""" => "x" - """var"x"end""" => "x (error (end))" - """var"x"1""" => "x (error 1)" - """var"x"y""" => "x (error y)" + """var"x" """ => "(var x)" => :x # var syntax raw string unescaping - "var\"\"" => "" - "var\"\\\"\"" => "\"" - "var\"\\\\\\\"\"" => "\\\"" - "var\"\\\\x\"" => "\\\\x" + "var\"\"" => "(var )" => Symbol("") + "var\"\\\"\"" => "(var \")" => Symbol("\"") + "var\"\\\\\\\"\"" => "(var \\\")" => Symbol("\\\"") + "var\"\\\\x\"" => "(var \\\\x)" => Symbol("\\\\x") + # trailing syntax after var + """var"x"+""" => "(var x)" => :x + """var"x")""" => "(var x)" => :x + """var"x"(""" => "(var x)" => :x + """var"x"end""" => "(var x (error-t))" + """var"x"1""" => "(var x (error-t))" + """var"x"y""" => "(var x (error-t))" # Syntactic operators "+=" => "(error +=)" ".+=" => "(error .+=)" From e3eeccfce1f915fcebce151d8ad3cb1e62b3e55c Mon Sep 17 00:00:00 2001 From: c42f Date: Mon, 31 Oct 2022 11:50:47 +1000 Subject: [PATCH 0525/1109] Add docs on how to use JuliaSyntax inside VSCode (JuliaLang/JuliaSyntax.jl#132) --- JuliaSyntax/README.md | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index c66ccd1532332..50cb8e1458f5b 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -125,6 +125,25 @@ system image by running the code in `./sysimage/compile.jl` as a Julia script Using a custom sysimage has the advantage that package precompilation will also go through the JuliaSyntax parser. +### VSCode + +To use JuliaSyntax as the default parser for Julia within VSCode, add the +following to your `startup.jl` file: + +```julia +atreplinit() do repl + @eval begin + import JuliaSyntax + JuliaSyntax.enable_in_core!(true) + end +end +``` + +To reduce startup latency you can combine with a custom system as described in +the [Julia VScode docs](https://www.julia-vscode.org/docs/dev/userguide/compilesysimage/#Creating-a-sysimage-for-the-active-environment), +combined with the precompile execution file in [sysimage/precompile_exec.jl](sysimage/precompile_exec.jl). +For additional detail see the discussion in [issue #128](https://github.com/JuliaLang/JuliaSyntax.jl/issues/128). + # Parser implementation Our goal is to losslessly represent the source text with a tree; this may be From e910b064969352d4ce5bb13fdc8e39c3f00dbdd9 Mon Sep 17 00:00:00 2001 From: c42f Date: Tue, 1 Nov 2022 14:42:57 +1000 Subject: [PATCH 0526/1109] More flattened form for multiple frakentuple parameters (JuliaLang/JuliaSyntax.jl#133) Here we emit `(a, b; c, d; e, f)` as (tuple a b (parameters c d) (parameters e f)) which should make these simpler to process, rather than using the nested form (tuple a b (parameters c d parameters e f))) --- JuliaSyntax/src/expr.jl | 33 +++++++++++++++++++++++---------- JuliaSyntax/src/parser.jl | 23 +++++++++++++++-------- JuliaSyntax/test/parser.jl | 14 +++++++++----- 3 files changed, 47 insertions(+), 23 deletions(-) diff --git a/JuliaSyntax/src/expr.jl b/JuliaSyntax/src/expr.jl index 9da2afa9b7990..13d91d2be3045 100644 --- a/JuliaSyntax/src/expr.jl +++ b/JuliaSyntax/src/expr.jl @@ -11,6 +11,25 @@ function is_stringchunk(node) return k == K"String" || k == K"CmdString" end +function reorder_parameters!(args, params_pos) + p = 0 + for i = length(args):-1:1 + if !Meta.isexpr(args[i], :parameters) + break + end + p = i + end + if p == 0 + return + end + # nest frankentuples parameters sections + for i = length(args)-1:-1:p + pushfirst!(args[i].args, pop!(args)) + end + # Move parameters to args[params_pos] + insert!(args, params_pos, pop!(args)) +end + function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, eq_to_kw=false, inside_dot_expr=false, inside_vect_or_braces=false) if !haschildren(node) @@ -145,17 +164,11 @@ function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, popfirst!(args) headsym = Symbol("'") end - # Move parameters block to args[2] - if length(args) > 1 && Meta.isexpr(args[end], :parameters) - insert!(args, 2, args[end]) - pop!(args) - end - elseif headsym in (:tuple, :parameters, :vect, :braces) + # Move parameters blocks to args[2] + reorder_parameters!(args, 2) + elseif headsym in (:tuple, :vect, :braces) # Move parameters blocks to args[1] - if length(args) > 1 && Meta.isexpr(args[end], :parameters) - pushfirst!(args, args[end]) - pop!(args) - end + reorder_parameters!(args, 1) elseif headsym in (:try, :try_finally_catch) # Try children in source order: # try_block catch_var catch_block else_block finally_block diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index f1beb9134642e..40d3af6fbb0a9 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -2934,8 +2934,8 @@ function parse_paren(ps::ParseState, check_identifiers=true) # Extra credit: nested parameters and frankentuples # (x...;) ==> (tuple (... x) (parameters)) # (x...; y) ==> (tuple (... x) (parameters y)) - # (; a=1; b=2) ==> (tuple (parameters (= a 1) (parameters (= b 2)))) - # (a; b; c,d) ==> (tuple a (parameters b (parameters c d))) + # (; a=1; b=2) ==> (tuple (parameters (= a 1)) (parameters (= b 2))) + # (a; b; c,d) ==> (tuple a (parameters b) (parameters c d)) # (a=1, b=2; c=3) ==> (tuple (= a 1) (= b 2) (parameters (= c 3))) emit(ps, mark, K"tuple") elseif opts.is_block @@ -2968,7 +2968,7 @@ end # syntax so the parse tree is pretty strange in these cases! Some macros # probably use it though. Example: # -# (a,b=1; c,d=2; e,f=3) ==> (tuple a (= b 1) (parameters c (= d 2) (parameters e (= f 3)))) +# (a,b=1; c,d=2; e,f=3) ==> (tuple a (= b 1) (parameters c (= d 2)) (parameters e (= f 3))) # # flisp: parts of parse-paren- and parse-arglist function parse_brackets(after_parse::Function, @@ -2977,12 +2977,13 @@ function parse_brackets(after_parse::Function, space_sensitive=false, where_enabled=true, whitespace_newline=true) - params_marks = acquire_positions(ps.stream) + params_positions = acquire_positions(ps.stream) last_eq_before_semi = 0 num_subexprs = 0 num_semis = 0 had_commas = false had_splat = false + param_start = nothing while true bump_trivia(ps) k = peek(ps) @@ -2991,8 +2992,11 @@ function parse_brackets(after_parse::Function, elseif k == K";" # Start of parameters list # a, b; c d ==> a b (parameters c d) - push!(params_marks, position(ps)) + if !isnothing(param_start) + push!(params_positions, emit(ps, param_start, K"TOMBSTONE")) + end num_semis += 1 + param_start = position(ps) bump(ps, TRIVIA_FLAG) bump_trivia(ps) elseif is_closing_token(ps, k) @@ -3025,14 +3029,17 @@ function parse_brackets(after_parse::Function, end end end + if !isnothing(param_start) && position(ps) != param_start + push!(params_positions, emit(ps, param_start, K"TOMBSTONE")) + end opts = after_parse(had_commas, had_splat, num_semis, num_subexprs) # Emit nested parameter nodes if necessary if opts.needs_parameters - for mark in Iterators.reverse(params_marks) - emit(ps, mark, K"parameters") + for pos in params_positions + reset_node!(ps, pos, kind=K"parameters") end end - release_positions(ps.stream, params_marks) + release_positions(ps.stream, params_positions) bump_closing_token(ps, closing_kind) return opts end diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 5a080e498e007..aa174b58d09f2 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -244,6 +244,8 @@ tests = [ "f(a,b)" => "(call f a b)" "f(a=1; b=2)" => "(call f (= a 1) (parameters (= b 2)))" => Expr(:call, :f, Expr(:parameters, Expr(:kw, :b, 2)), Expr(:kw, :a, 1)) + "f(a; b; c)" => "(call f a (parameters b) (parameters c))" => + Expr(:call, :f, Expr(:parameters, Expr(:parameters, :c), :b), :a) "(a=1)()" => "(call (= a 1))" => Expr(:call, Expr(:(=), :a, 1)) "f (a)" => "(call f (error-t) a)" "f(a).g(b)" => "(call (. (call f a) (quote g)) b)" @@ -568,13 +570,15 @@ tests = [ "(x,y)" => "(tuple x y)" "(x=1, y=2)" => "(tuple (= x 1) (= y 2))" # Named tuples with initial semicolon - "(;)" => "(tuple (parameters))" - "(; a=1)" => "(tuple (parameters (= a 1)))" + "(;)" => "(tuple (parameters))" => Expr(:tuple, Expr(:parameters)) + "(; a=1)" => "(tuple (parameters (= a 1)))" => Expr(:tuple, Expr(:parameters, Expr(:kw, :a, 1))) # Extra credit: nested parameters and frankentuples "(x...; y)" => "(tuple (... x) (parameters y))" "(x...;)" => "(tuple (... x) (parameters))" - "(; a=1; b=2)" => "(tuple (parameters (= a 1) (parameters (= b 2))))" - "(a; b; c,d)" => "(tuple a (parameters b (parameters c d)))" + "(; a=1; b=2)" => "(tuple (parameters (= a 1)) (parameters (= b 2)))" => + Expr(:tuple, Expr(:parameters, Expr(:parameters, Expr(:kw, :b, 2)), Expr(:kw, :a, 1))) + "(a; b; c,d)" => "(tuple a (parameters b) (parameters c d))" => + Expr(:tuple, Expr(:parameters, Expr(:parameters, :c, :d), :b), :a) "(a=1, b=2; c=3)" => "(tuple (= a 1) (= b 2) (parameters (= c 3)))" # Block syntax "(;;)" => "(block)" @@ -668,7 +672,7 @@ tests = [ "[x,\n y]" => "(vect x y)" "[x\n, y]" => "(vect x y)" "[x\n,, y]" => "(vect x (error-t ✘ y))" - "[x,y ; z]" => "(vect x y (parameters z))" + "[x,y ; z]" => "(vect x y (parameters z))" => Expr(:vect, Expr(:parameters, :z), :x, :y) "[x=1, y=2]" => "(vect (= x 1) (= y 2))" "[x=1, ; y=2]" => "(vect (= x 1) (parameters (= y 2)))" # parse_paren From 477a59cbdd9888dbd5bcb46bc8fd0fa7a14f6a68 Mon Sep 17 00:00:00 2001 From: c42f Date: Wed, 2 Nov 2022 08:13:10 +1000 Subject: [PATCH 0527/1109] Fixes for parse check of General registry (JuliaLang/JuliaSyntax.jl#135) * Fix for new `parseall()` API * Reduce false positives by not reporting cases where reference parser also fails. * Ensure we use the reference parser via `fl_parseall`, as Meta.parse may have been substituted * Separate out untarring * Also delete old hacky parser conversion tool --- JuliaSyntax/tools/check_all_packages.jl | 73 +++++--------------- JuliaSyntax/tools/flisp_defines_to_julia.jl | 74 --------------------- JuliaSyntax/tools/untar_packages.jl | 22 ++++++ 3 files changed, 38 insertions(+), 131 deletions(-) delete mode 100644 JuliaSyntax/tools/flisp_defines_to_julia.jl create mode 100644 JuliaSyntax/tools/untar_packages.jl diff --git a/JuliaSyntax/tools/check_all_packages.jl b/JuliaSyntax/tools/check_all_packages.jl index 40f55bfaa5744..57e7399d7424d 100644 --- a/JuliaSyntax/tools/check_all_packages.jl +++ b/JuliaSyntax/tools/check_all_packages.jl @@ -5,50 +5,13 @@ using JuliaSyntax, Logging -# like Meta.parseall, but throws -function parseall_throws(str) - pos = firstindex(str) - exs = [] - while pos <= lastindex(str) - ex, pos = Meta.parse(str, pos) - push!(exs, ex) - end - if length(exs) == 0 - throw(Meta.ParseError("end of input")) - elseif length(exs) == 1 - return exs[1] - else - return Expr(:toplevel, exs...) - end -end - logio = open(joinpath(@__DIR__, "logs.txt"), "w") logger = Logging.ConsoleLogger(logio) pkgspath = joinpath(@__DIR__, "pkgs") -parallel = 50 exceptions = [] Logging.with_logger(logger) do - for tars in Iterators.partition(readdir(pkgspath), parallel) - @sync for tar in tars - endswith(tar, ".tgz") || continue - @async begin - dir = joinpath(@__DIR__, "pkgs", replace(tar, r"\.tgz$" => "")) - if !isdir(dir) || !isdir(joinpath(dir, "src")) - rm(dir; recursive=true, force=true) - mkpath(dir) - tar_path = joinpath(@__DIR__, "pkgs", tar) - try - run(`tar -xf $tar_path -C $dir`) - catch err - @error "could not untar $tar_path" - end - end - end - end - end - t = time() i = 0 iob = IOBuffer() @@ -60,35 +23,31 @@ Logging.with_logger(logger) do if isfile(fpath) file = read(fpath, String) try - e1 = JuliaSyntax.parse(Expr, file) + e1 = JuliaSyntax.parseall(Expr, file) catch err err isa InterruptException && rethrow() - ex_count += 1 ex = (err, catch_backtrace()) push!(exceptions, ex) - meta_parse = "success" - try - parseall_throws(file) - catch err2 - meta_parse = "fail" - ex_count -= 1 + ref_parse = "success" + e2 = JuliaSyntax.fl_parseall(file) + @assert Meta.isexpr(e2, :toplevel) + if length(e2.args) >= 1 && Meta.isexpr(last(e2.args), (:error, :incomplete)) + ref_parse = "fail" + if err isa JuliaSyntax.ParseError + # Both parsers agree that there's an error, and + # JuliaSyntax didn't have an internal error. + continue + end end + + ex_count += 1 parse_to_syntax = "success" try - JuliaSyntax.parse(JuliaSyntax.SyntaxNode, file) + JuliaSyntax.parseall(JuliaSyntax.SyntaxNode, file) catch err2 parse_to_syntax = "fail" end - severity = parse_to_syntax == "fail" ? "error" : - meta_parse == "fail" ? "warn" : "error" - println(logio, """ - [$(severity)] $(fpath) - parse-to-expr: fail - parse-to-syntaxtree: $(parse_to_syntax) - reference: $(meta_parse) - """) - @error "" exception = ex - flush(logio) + @error "Parse failed" fpath exception=ex parse_to_syntax end end i += 1 @@ -97,7 +56,7 @@ Logging.with_logger(logger) do avg = round(runtime/i*1000, digits = 2) print(iob, "\e[2J\e[0;0H") println(iob, "$i files parsed") - println(iob, "> $(ex_count) failures compared to Meta.parse") + println(iob, "> $(ex_count) failures compared to reference parser") println(iob, "> $(length(exceptions)) errors in total") println(iob, "> $(avg)ms per file, $(round(Int, runtime))s in total") println(stderr, String(take!(iob))) diff --git a/JuliaSyntax/tools/flisp_defines_to_julia.jl b/JuliaSyntax/tools/flisp_defines_to_julia.jl deleted file mode 100644 index a8e45dc23da2a..0000000000000 --- a/JuliaSyntax/tools/flisp_defines_to_julia.jl +++ /dev/null @@ -1,74 +0,0 @@ -function _replace(s, pairs::Pair...) - for p in pairs - s = replace(s, p) - end - return s -end - -# Convert flisp definitions and comments to psuedo-Julia to reflect the -# structure of the existing flisp parser. -# -# Surrounded with all this compiler technology, but still resorting to a pile -# of regexs? 😂😱 -function juliafy_flisp(fl_input, jl_output) - prev_newline = false - had_comment = false - for line in readlines(fl_input) - if occursin(r"^\(define *\(", line) - had_comment && println(jl_output, "#") - println(jl_output, "# flisp: $line") - m = match(r"\(define *\(([-a-zA-Z?_=0-9*><:!]+) *([^)]*)", replace(line, "-"=>"_")) - isnothing(m) && @error "no match for line" line - funcname = m[1] - funcname = _replace(funcname, - r"(.*)\?"=>s"is_\1", - "=" => "equals", - "*" => "_star", - ">" => "_gt", - "<" => "_lt", - ":" => "_", - ) - funcargs = _replace(m[2], - r" *\(" => ";", - r" +" => ", ", - "." => "_", - r"([-a-zA-Z?_=]+)\?" => s"is_\1", - r", *#t" => "=true", - r", *#f" => "=false", - ";" => "; ", - ) - if startswith(funcname, "parse_") - funcargs = replace(funcargs, r"^ *s\b"=>"ps::ParseState") - end - text = """ - function $funcname($funcargs) - TODO("$funcname unimplemented") - end - """ - ex = Meta.parse(text, raise=false) - if Meta.isexpr(ex, :error) - @warn "Generated bad code" message=ex.args[1] code=Text(text) - end - print(jl_output, text) - prev_newline = false - had_comment = false - elseif occursin(r"^;", line) - println(jl_output, replace(line, r"^;+" => "#")) - prev_newline = false - had_comment = true - elseif line == "" - if !prev_newline - println(jl_output) - end - prev_newline = true - had_comment = false - end - end -end - -open("/home/chris/dev/julia/src/julia-parser.scm", "r") do fl_input - open(joinpath(@__DIR__, "julia_parser_scm.jl"), "w") do jl_output - juliafy_flisp(fl_input, jl_output) - end -end - diff --git a/JuliaSyntax/tools/untar_packages.jl b/JuliaSyntax/tools/untar_packages.jl new file mode 100644 index 0000000000000..4c89ed024e33c --- /dev/null +++ b/JuliaSyntax/tools/untar_packages.jl @@ -0,0 +1,22 @@ + +pkgspath = joinpath(@__DIR__, "pkgs") + +for tars in Iterators.partition(readdir(pkgspath), 50) + @sync for tar in tars + endswith(tar, ".tgz") || continue + @async begin + dir = joinpath(@__DIR__, "pkgs", replace(tar, r"\.tgz$" => "")) + if !isdir(dir) || !isdir(joinpath(dir, "src")) + rm(dir; recursive=true, force=true) + mkpath(dir) + tar_path = joinpath(@__DIR__, "pkgs", tar) + try + run(`tar -xf $tar_path -C $dir`) + catch err + @error "could not untar $tar_path" + end + end + end + end +end + From 42e182bab23d3a596cf88e77c910af337fd630ac Mon Sep 17 00:00:00 2001 From: c42f Date: Wed, 2 Nov 2022 08:13:21 +1000 Subject: [PATCH 0528/1109] Fix for parsing `end` in `A[x ? y : end]` (JuliaLang/JuliaSyntax.jl#136) Failure due to overeager error detection of `end` keyword. --- JuliaSyntax/src/parser.jl | 8 +++++++- JuliaSyntax/test/parser.jl | 1 + 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 40d3af6fbb0a9..c76a839361eca 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -220,6 +220,10 @@ function is_closing_token(ps::ParseState, k) (k == K"end" && !ps.end_symbol) end +function is_block_continuation_keyword(ps::ParseState, k) + is_block_continuation_keyword(k) && !(ps.end_symbol && k == K"end") +end + function is_closer_or_newline(ps::ParseState, k) is_closing_token(ps,k) || k == K"NewlineWs" end @@ -658,7 +662,7 @@ function parse_cond(ps::ParseState) # FIXME: This is a very specific case. Error recovery should be handled more # generally elsewhere. - if is_block_continuation_keyword(kind(t)) + if is_block_continuation_keyword(ps, kind(t)) # a "continuaton keyword" is likely to belong to the surrounding code, so # we abort early @@ -669,6 +673,8 @@ function parse_cond(ps::ParseState) bump_invisible(ps, K"error", TRIVIA_FLAG, error="unexpected `$(kind(t))`") emit(ps, mark, K"if") return + else + # A[x ? y : end] ==> (ref A (? x y end)) end parse_eq_star(ps) emit(ps, mark, K"?") diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index aa174b58d09f2..624c388cb11e3 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -66,6 +66,7 @@ tests = [ "a ? b: c" => "(? a b (error-t) c)" "a ? b :c" => "(? a b (error-t) c)" "a ? b c" => "(? a b (error-t) c)" + "A[x ? y : end]" => "(ref A (? x y end))" ], JuliaSyntax.parse_arrow => [ "x → y" => "(call-i x → y)" From 13ceb35cda5df5746e22225cb79b03ed6df7a3ec Mon Sep 17 00:00:00 2001 From: c42f Date: Wed, 2 Nov 2022 08:13:36 +1000 Subject: [PATCH 0529/1109] Fix parsing of `[a~b]` (JuliaLang/JuliaSyntax.jl#137) In space sensitive contexts, `~` is parsed as unary or binary depending on whitespace to the left and right of the `~`. In this case it should be binary and parse as `(ref (call-i ~ a b))` --- JuliaSyntax/src/parser.jl | 3 ++- JuliaSyntax/test/parser.jl | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index c76a839361eca..c6f70712b9f29 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -566,7 +566,7 @@ function parse_assignment_with_initial_ex(ps::ParseState, mark, down::T) where { return end if k == K"~" - if ps.space_sensitive && !preceding_whitespace(peek_token(ps, 2)) + if ps.space_sensitive && preceding_whitespace(t) && !preceding_whitespace(peek_token(ps, 2)) # Unary ~ in space sensitive context is not assignment precedence # [a ~b] ==> (hcat a (call-pre ~ b)) return @@ -574,6 +574,7 @@ function parse_assignment_with_initial_ex(ps::ParseState, mark, down::T) where { # ~ is the only non-syntactic assignment-precedence operator. # a ~ b ==> (call-i a ~ b) # [a ~ b c] ==> (hcat (call-i a ~ b) c) + # [a~b] ==> (vect (call-i a ~ b)) bump(ps) parse_assignment(ps, down) emit(ps, mark, K"call", INFIX_FLAG) diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 624c388cb11e3..7e477a8c74a91 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -51,6 +51,7 @@ tests = [ "[a ~b]" => "(hcat a (call-pre ~ b))" "a ~ b" => "(call-i a ~ b)" "[a ~ b c]" => "(hcat (call-i a ~ b) c)" + "[a~b]" => "(vect (call-i a ~ b))" ], JuliaSyntax.parse_pair => [ "a => b" => "(call-i a => b)" From 989d7bb61c22614740ec5610d4d5a905482739d5 Mon Sep 17 00:00:00 2001 From: c42f Date: Wed, 2 Nov 2022 21:31:54 +1000 Subject: [PATCH 0530/1109] Fix for function signatures with grouping parens (JuliaLang/JuliaSyntax.jl#140) Cater for cases where some parts of the signature are inside and some outside of the parentheses. For example the reference parser allows an extra set of parens such as function (f()) where T body end and unfortunately this syntax is used in several packages. --- JuliaSyntax/src/parser.jl | 30 +++++++++++++++++------------- JuliaSyntax/test/parser.jl | 15 +++++++++++---- 2 files changed, 28 insertions(+), 17 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index c6f70712b9f29..6df30c5b56499 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -2016,6 +2016,7 @@ end # Parse function and macro definitions function parse_function_signature(ps::ParseState, is_function::Bool) is_anon_func = false + parsed_call = false mark = position(ps) if !is_function @@ -2053,17 +2054,7 @@ function parse_function_signature(ps::ParseState, is_function::Bool) parsed_call = _parsed_call) end is_anon_func = opts.is_anon_func - if opts.parsed_call - # Compat: Ugly case where extra parentheses existed and we've - # already parsed the whole signature. - # function (f() where T) end ==> (function (where (call f) T) (block)) - # function (f()::S) end ==> (function (:: (call f) S) (block)) - # - # TODO: Warn for use of parens? The precedence of `::` and - # `where` don't work inside parens so this is a bit of a syntax - # oddity/aberration. - return true - end + parsed_call = opts.parsed_call if is_anon_func # function (x) body end ==> (function (tuple x) (block body)) # function (x::f()) end ==> (function (tuple (:: x (call f))) (block)) @@ -2099,10 +2090,10 @@ function parse_function_signature(ps::ParseState, is_function::Bool) end end end - if peek(ps, skip_newlines=true) == K"end" && !is_anon_func + if peek(ps, skip_newlines=true) == K"end" && !is_anon_func && !parsed_call return false end - if !is_anon_func + if !is_anon_func && !parsed_call # Parse function argument list # function f(x,y) end ==> (function (call f x y) (block)) # function f{T}() end ==> (function (call (curly f T)) (block)) @@ -2129,6 +2120,19 @@ function parse_function_signature(ps::ParseState, is_function::Bool) parse_where_chain(ps, mark) end # function f()::S where T end ==> (function (where (:: (call f) S) T) (block)) + # + # Ugly cases for compat where extra parentheses existed and we've + # already parsed at least the call part of the signature + # + # function (f() where T) end ==> (function (where (call f) T) (block)) + # function (f()) where T end ==> (function (where (call f) T) (block)) + # function (f() where T) where U end ==> (function (where (where (call f) T) U) (block)) + # function (f()::S) end ==> (function (:: (call f) S) (block)) + # function ((f()::S) where T) end ==> (function (where (:: (call f) S) T) (block)) + # + # TODO: Warn for use of parens? The precedence of `::` and + # `where` don't work inside parens so this is a bit of a syntax + # oddity/aberration. return true end diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 7e477a8c74a91..d6f94f9dfccac 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -459,8 +459,6 @@ tests = [ "macro (type)(ex) end" => "(macro (call type ex) (block))" "macro \$f() end" => "(macro (call (\$ f)) (block))" "macro (\$f)() end" => "(macro (call (\$ f)) (block))" - "function (f() where T) end" => "(function (where (call f) T) (block))" => Expr(:function, Expr(:where, Expr(:call, :f), :T), Expr(:block)) - "function (f()::S) end"=> "(function (:: (call f) S) (block))" => Expr(:function, Expr(:(::), Expr(:call, :f), :S), Expr(:block)) "function (x) body end"=> "(function (tuple x) (block body))" "function (x,y) end" => "(function (tuple x y) (block))" "function (x=1) end" => "(function (tuple (= x 1)) (block))" @@ -488,8 +486,17 @@ tests = [ "function f body end" => "(function (error f) (block body))" "function f()::T end" => "(function (:: (call f) T) (block))" "function f()::g(T) end" => "(function (:: (call f) (call g T)) (block))" - "function f() where {T} end" => "(function (where (call f) T) (block))" - "function f() where T end" => "(function (where (call f) T) (block))" + "function f() where {T} end" => "(function (where (call f) T) (block))" + "function f() where T end" => "(function (where (call f) T) (block))" + "function f()::S where T end" => "(function (where (:: (call f) S) T) (block))" + # Ugly cases for compat where extra parentheses existed and we've + # already parsed at least the call part of the signature + "function (f() where T) end" => "(function (where (call f) T) (block))" => Expr(:function, Expr(:where, Expr(:call, :f), :T), Expr(:block)) + "function (f()) where T end" => "(function (where (call f) T) (block))" + "function (f() where T) where U end" => "(function (where (where (call f) T) U) (block))" + "function (f()::S) end"=> "(function (:: (call f) S) (block))" => Expr(:function, Expr(:(::), Expr(:call, :f), :S), Expr(:block)) + "function ((f()::S) where T) end" => "(function (where (:: (call f) S) T) (block))" + # body "function f() \n a \n b end" => "(function (call f) (block a b))" "function f() end" => "(function (call f) (block))" # Errors From 3272d8d63ed9e7045fc6651f0dae409dd17152bf Mon Sep 17 00:00:00 2001 From: c42f Date: Thu, 3 Nov 2022 06:51:18 +1000 Subject: [PATCH 0531/1109] Fix tokenization of `1.#` (JuliaLang/JuliaSyntax.jl#142) --- JuliaSyntax/src/tokenize.jl | 1 + JuliaSyntax/test/tokenize.jl | 1 + 2 files changed, 2 insertions(+) diff --git a/JuliaSyntax/src/tokenize.jl b/JuliaSyntax/src/tokenize.jl index ba3c782b05bce..18f46fff2e055 100644 --- a/JuliaSyntax/src/tokenize.jl +++ b/JuliaSyntax/src/tokenize.jl @@ -788,6 +788,7 @@ function lex_digit(l::Lexer, kind) || ppc == '"' || ppc == ':' || ppc == '?' + || ppc == '#' || ppc == EOF_CHAR)) kind = K"Integer" diff --git a/JuliaSyntax/test/tokenize.jl b/JuliaSyntax/test/tokenize.jl index c6aaccb7bcd4d..7ff38c69413b8 100644 --- a/JuliaSyntax/test/tokenize.jl +++ b/JuliaSyntax/test/tokenize.jl @@ -565,6 +565,7 @@ end @test tok("1.,").kind == K"Float" @test tok("1.;").kind == K"Float" @test tok("1.@").kind == K"Float" + @test tok("1.#").kind == K"Float" @test tok("1.").kind == K"Float" @test tok("1.\"text\" ").kind == K"Float" From 32c080f002231f9ad2f89d83e90341e9850c754f Mon Sep 17 00:00:00 2001 From: c42f Date: Thu, 3 Nov 2022 09:59:55 +1000 Subject: [PATCH 0532/1109] Fix tokenization of numbers followed by `..` (JuliaLang/JuliaSyntax.jl#143) * `.1..` is `.1` followed by `..` * `0x01..` is `0x01` followed by `..` Also fixes these cases when followed by `...` --- JuliaSyntax/src/tokenize.jl | 11 +++++++---- JuliaSyntax/test/tokenize.jl | 14 ++++++++++++-- 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/JuliaSyntax/src/tokenize.jl b/JuliaSyntax/src/tokenize.jl index 18f46fff2e055..c1e86842c8ac8 100644 --- a/JuliaSyntax/src/tokenize.jl +++ b/JuliaSyntax/src/tokenize.jl @@ -763,12 +763,13 @@ function lex_digit(l::Lexer, kind) accept_number(l, isdigit) pc,ppc = dpeekchar(l) if pc == '.' - if kind === K"Float" + if ppc == '.' + # Number followed by K".." or K"..." + return emit(l, kind) + elseif kind === K"Float" # If we enter the function with kind == K"Float" then a '.' has been parsed. readchar(l) return emit_error(l, K"ErrorInvalidNumericConstant") - elseif ppc == '.' - return emit(l, kind) elseif is_operator_start_char(ppc) && ppc !== ':' readchar(l) return emit_error(l) @@ -838,7 +839,9 @@ function lex_digit(l::Lexer, kind) readchar(l) !(ishex(ppc) || ppc == '.') && return emit_error(l, K"ErrorInvalidNumericConstant") accept_number(l, ishex) - if accept(l, '.') + pc,ppc = dpeekchar(l) + if pc == '.' && ppc != '.' + readchar(l) accept_number(l, ishex) isfloat = true end diff --git a/JuliaSyntax/test/tokenize.jl b/JuliaSyntax/test/tokenize.jl index 7ff38c69413b8..30b4758de4e51 100644 --- a/JuliaSyntax/test/tokenize.jl +++ b/JuliaSyntax/test/tokenize.jl @@ -21,6 +21,13 @@ tok(str, i = 1) = collect(tokenize(str))[i] strtok(str) = untokenize.(collect(tokenize(str)), str) +function toks(str) + ts = [untokenize(t, str)=>kind(t) for t in tokenize(str)] + @test ts[end] == (""=>K"EndMarker") + pop!(ts) + ts +end + @testset "tokens" begin for s in ["a", IOBuffer("a")] l = tokenize(s) @@ -553,7 +560,7 @@ end @test kind(tok("1234x", 2)) == K"Identifier" end -@testset "floats with trailing `.` " begin +@testset "numbers with trailing `.` " begin @test tok("1.0").kind == K"Float" @test tok("1.a").kind == K"Float" @test tok("1.(").kind == K"Float" @@ -569,7 +576,10 @@ end @test tok("1.").kind == K"Float" @test tok("1.\"text\" ").kind == K"Float" - @test tok("1..").kind == K"Integer" + @test toks("1..") == ["1"=>K"Integer", ".."=>K".."] + @test toks(".1..") == [".1"=>K"Float", ".."=>K".."] + @test toks("0x01..") == ["0x01"=>K"HexInt", ".."=>K".."] + @test kind.(collect(tokenize("1f0./1"))) == [K"Float", K"/", K"Integer", K"EndMarker"] end From 5a02c6985ddf10d6b7568d68362fb29138c74eb5 Mon Sep 17 00:00:00 2001 From: c42f Date: Thu, 3 Nov 2022 14:58:34 +1000 Subject: [PATCH 0533/1109] Fixes for macro paths and call chain parsing (JuliaLang/JuliaSyntax.jl#144) Correctly parse dots and other same-precedence chaining after macro calls: @A().x @A{a}.x @A[a].x Also allow general forms in macro paths, such as calls: f().@x This is allowed by the reference parser but seemed weird so it was disallowed here. But it turns out this doesn't prevent computed macro paths as the `A` in `A.@x` is looked up dynamically - so just simplify things and allow this. --- JuliaSyntax/src/parser.jl | 85 ++++++++++++++++---------------------- JuliaSyntax/test/parser.jl | 70 +++++++++++++++++++------------ 2 files changed, 79 insertions(+), 76 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 6df30c5b56499..ca2bb380c06a5 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -1380,18 +1380,8 @@ function parse_identifier_or_interpolate(ps::ParseState) end end -function finish_macroname(ps, mark, valid_macroname, macro_name_position, - name_kind=nothing) - if valid_macroname - fix_macro_name_kind!(ps, macro_name_position, name_kind) - else - emit(ps, mark, K"error", error="not a valid macro name or macro module path") - end -end - # Parses a chain of sufficies at function call precedence, leftmost binding # tightest. -# f(a,b) ==> (call f a b) # f(a).g(b) ==> (call (. (call f a) (quote g)) b) # # flisp: parse-call-chain, parse-call-with-initial-ex @@ -1404,13 +1394,12 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) # source range of the @-prefixed part of a macro macro_atname_range = nothing # $A.@x ==> (macrocall (. ($ A) (quote @x))) - # A.@var"#" ==> (macrocall (. A (quote @x))) - valid_macroname = peek_behind(ps, skip_trivia=false).kind in KSet"Identifier var . $" + maybe_strmac = true # We record the last component of chains of dot-separated identifiers so we # know which identifier was the macro name. macro_name_position = position(ps) # points to same output span as peek_behind while true - this_iter_valid_macroname = false + maybe_strmac_1 = false t = peek_token(ps) k = kind(t) if is_macrocall && (preceding_whitespace(t) || is_closing_token(ps, k)) @@ -1418,11 +1407,11 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) # @foo a b ==> (macrocall @foo a b) # @foo (x) ==> (macrocall @foo x) # @foo (x,y) ==> (macrocall @foo (tuple x y)) - # a().@x y ==> (macrocall (error (. (call a) (quote x))) y) # [@foo x] ==> (vect (macrocall @foo x)) # @var"#" a ==> (macrocall (var @#) a) + # A.@x y ==> (macrocall (. A (quote @x)) y) # A.@var"#" a ==> (macrocall (. A (quote (var @#))) a) - finish_macroname(ps, mark, valid_macroname, macro_name_position) + fix_macro_name_kind!(ps, macro_name_position) let ps = with_space_sensitive(ps) # Space separated macro arguments # A.@foo a b ==> (macrocall (. A (quote @foo)) a b) @@ -1455,12 +1444,9 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) # [f x] ==> (hcat f x) break elseif k == K"(" - if is_macrocall - # a().@x(y) ==> (macrocall (error (. (call a) (quote x))) y) - finish_macroname(ps, mark, valid_macroname, macro_name_position) - end # f(a,b) ==> (call f a b) - # f(a; b=1) ==> (call f a (parameters (b 1))) + # f(a=1; b=2) ==> (call f (= a 1) (parameters (= b 2))) + # f(a; b; c) ==> (call f a (parameters b) (parameters c)) # (a=1)() ==> (call (= a 1)) # f (a) ==> (call f (error-t) a b) bump_disallowed_space(ps) @@ -1472,13 +1458,13 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) parse_do(ps, mark) end if is_macrocall - break + # A.@x(y) ==> (macrocall (. A (quote @x)) y) + # A.@x(y).z ==> (. (macrocall (. A (quote @x)) y) (quote z)) + fix_macro_name_kind!(ps, macro_name_position) + is_macrocall = false + macro_atname_range = nothing end elseif k == K"[" - if is_macrocall - # a().@x[1] ==> (macrocall (error (. (call a) (quote x))) (vect 1)) - finish_macroname(ps, mark, valid_macroname, macro_name_position) - end m = position(ps) # a [i] ==> (ref a (error-t) i) bump_disallowed_space(ps) @@ -1489,12 +1475,16 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) # @S[a,b] ==> (macrocall @S (vect a b)) # @S[a b] ==> (macrocall @S (hcat a b)) # @S[a; b] ==> (macrocall @S (vcat a b)) + # A.@S[a] ==> (macrocall (. A (quote @S)) (vect a)) + # @S[a].b ==> (. (macrocall @S (vect a)) (quote b)) #v1.7: @S[a ;; b] ==> (macrocall @S (ncat-2 a b)) #v1.6: @S[a ;; b] ==> (macrocall @S (error (ncat-2 a b))) + fix_macro_name_kind!(ps, macro_name_position) emit(ps, m, ckind, cflags) check_ncat_compat(ps, m, ckind) emit(ps, mark, K"macrocall") - break + is_macrocall = false + macro_atname_range = nothing else # a[i] ==> (ref a i) # a[i,j] ==> (ref a i j) @@ -1516,16 +1506,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) elseif k == K"." # x .y ==> (. x (error-t) (quote y)) bump_disallowed_space(ps) - if peek(ps, 2) == K"'" - # f.' => f (error-t . ') - emark = position(ps) - bump(ps) - bump(ps) - emit(ps, emark, K"error", TRIVIA_FLAG, - error="the .' operator for transpose is discontinued") - valid_macroname = false - continue - end + emark = position(ps) if !isnothing(macro_atname_range) # Allow `@` in macrocall only in first and last position # A.B.@x ==> (macrocall (. (. A (quote B)) (quote @x))) @@ -1564,21 +1545,20 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) elseif k == K"$" # f.$x ==> (. f (inert ($ x))) # f.$(x+y) ==> (. f (inert ($ (call + x y)))) + # A.$B.@x ==> (macrocall (. (. A (inert ($ B))) (quote @x))) m = position(ps) bump(ps, TRIVIA_FLAG) parse_atom(ps) emit(ps, m, K"$") emit(ps, m, K"inert") emit(ps, mark, K".") - # A.$B.@x ==> (macrocall (. (. A (inert ($ B))) (quote @x))) - this_iter_valid_macroname = true elseif k == K"@" # A macro call after some prefix A has been consumed # A.@x ==> (macrocall (. A (quote @x))) # A.@x a ==> (macrocall (. A (quote @x)) a) m = position(ps) if is_macrocall - # @A.B.@x a ==> (macrocall (error (. A (quote x))) a) + # @A.B.@x a ==> (macrocall (. (. A (quote B)) (quote (error-t) @x)) a) bump(ps, TRIVIA_FLAG, error="repeated `@` in macro module path") else bump(ps, TRIVIA_FLAG) @@ -1589,16 +1569,21 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) macro_atname_range = (m, macro_name_position) emit(ps, m, K"quote") emit(ps, mark, K".") - this_iter_valid_macroname = true + elseif k == K"'" + # TODO: Reclaim dotted postfix operators :-) + # f.' => f (error-t ') + bump(ps) + emit(ps, emark, K"error", TRIVIA_FLAG, + error="the .' operator for transpose is discontinued") else # Field/property syntax # f.x.y ==> (. (. f (quote x)) (quote y)) m = position(ps) parse_atom(ps, false) macro_name_position = position(ps) + maybe_strmac_1 = true emit(ps, m, K"quote") emit(ps, mark, K".") - this_iter_valid_macroname = true end elseif k == K"'" && !preceding_whitespace(t) # f' ==> (call-post f ') @@ -1607,10 +1592,6 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) emit(ps, mark, K"call", POSTFIX_OP_FLAG) elseif k == K"{" # Type parameter curlies and macro calls - if is_macrocall - # a().@x{y} ==> (macrocall (error (. (call a) (quote x))) (braces y)) - finish_macroname(ps, mark, valid_macroname, macro_name_position) - end m = position(ps) # S {a} ==> (curly S (error-t) a) bump_disallowed_space(ps) @@ -1618,16 +1599,21 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) parse_call_arglist(ps, K"}") if is_macrocall # @S{a,b} ==> (macrocall S (braces a b)) + # A.@S{a} ==> (macrocall (. A (quote @S)) (braces a)) + # @S{a}.b ==> (. (macrocall @S (braces a)) (quote b)) + fix_macro_name_kind!(ps, macro_name_position) emit(ps, m, K"braces") emit(ps, mark, K"macrocall") min_supported_version(v"1.6", ps, mark, "macro call without space before `{}`") - break + is_macrocall = false + macro_atname_range = nothing else # S{a,b} ==> (curly S a b) emit(ps, mark, K"curly") end elseif k in KSet" \" \"\"\" ` ``` " && - !preceding_whitespace(t) && valid_macroname + !preceding_whitespace(t) && + maybe_strmac && peek_behind(ps, macro_name_position).kind == K"Identifier" # Custom string and command literals # x"str" ==> (macrocall @x_str (string-r "str")) # x`str` ==> (macrocall @x_cmd (cmdstring-r "str")) @@ -1641,7 +1627,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) # Use a special token kind for string and cmd macro names so the # names can be expanded later as necessary. outk = is_string_delim(k) ? K"StringMacroName" : K"CmdMacroName" - finish_macroname(ps, mark, valid_macroname, macro_name_position, outk) + fix_macro_name_kind!(ps, macro_name_position, outk) parse_string(ps, true) t = peek_token(ps) k = kind(t) @@ -1660,7 +1646,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) else break end - valid_macroname &= this_iter_valid_macroname + maybe_strmac = maybe_strmac_1 end end @@ -2267,6 +2253,7 @@ function parse_macro_name(ps::ParseState) mark = position(ps) k = peek(ps) if k == K"." + # TODO: deal with __dot__ lowering in Expr conversion? # @. y ==> (macrocall @__dot__ y) bump(ps) else diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index d6f94f9dfccac..47b42a2e507f7 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -240,23 +240,14 @@ tests = [ "\$\$a" => "(\$ (\$ a))" ], JuliaSyntax.parse_call => [ - # Mostly parse_call_chain + # parse_call "f(x)" => "(call f x)" "\$f(x)" => "(call (\$ f) x)" - "f(a,b)" => "(call f a b)" - "f(a=1; b=2)" => "(call f (= a 1) (parameters (= b 2)))" => - Expr(:call, :f, Expr(:parameters, Expr(:kw, :b, 2)), Expr(:kw, :a, 1)) - "f(a; b; c)" => "(call f a (parameters b) (parameters c))" => - Expr(:call, :f, Expr(:parameters, Expr(:parameters, :c), :b), :a) - "(a=1)()" => "(call (= a 1))" => Expr(:call, Expr(:(=), :a, 1)) - "f (a)" => "(call f (error-t) a)" + # parse_call_chain "f(a).g(b)" => "(call (. (call f a) (quote g)) b)" "\$A.@x" => "(macrocall (. (\$ A) (quote @x)))" - # do - "f() do\nend" => "(do (call f) (tuple) (block))" - "f() do ; body end" => "(do (call f) (tuple) (block body))" - "f() do x, y\n body end" => "(do (call f) (tuple x y) (block body))" - "f(x) do y body end" => "(do (call f x) (tuple y) (block body))" + + # space separated macro calls "@foo a b" => "(macrocall @foo a b)" "@foo (x)" => "(macrocall @foo x)" "@foo (x,y)" => "(macrocall @foo (tuple x y))" @@ -264,9 +255,8 @@ tests = [ "@A.foo a b" => "(macrocall (. A (quote @foo)) a b)" "[@foo x]" => "(vect (macrocall @foo x))" "@var\"#\" a" => "(macrocall (var @#) a)" => Expr(:macrocall, Symbol("@#"), LineNumberNode(1), :a) + "A.@x y" => "(macrocall (. A (quote @x)) y)" "A.@var\"#\" a"=> "(macrocall (. A (quote (var @#))) a)" => Expr(:macrocall, Expr(:., :A, QuoteNode(Symbol("@#"))), LineNumberNode(1), :a) - "[f (x)]" => "(hcat f x)" - "[f x]" => "(hcat f x)" # Macro names "@! x" => "(macrocall @! x)" "@.. x" => "(macrocall @.. x)" @@ -278,24 +268,35 @@ tests = [ "@doc x y\nz" => "(macrocall @doc x y)" "@doc x\n\ny" => "(macrocall @doc x)" "@doc x\nend" => "(macrocall @doc x)" - # .' discontinued - "f.'" => "f (error-t . ')" - # Allow `@` in macrocall only in first and last position - "A.B.@x" => "(macrocall (. (. A (quote B)) (quote @x)))" - "@A.B.x" => "(macrocall (. (. A (quote B)) (quote @x)))" - "A.@B.x" => "(macrocall (. (. A (quote B)) (error-t) (quote @x)))" - "A.@. y" => "(macrocall (. A (quote @__dot__)) y)" - "a().@x(y)" => "(macrocall (error (. (call a) (quote x))) y)" - "a().@x y" => "(macrocall (error (. (call a) (quote x))) y)" - "a().@x{y}" => "(macrocall (error (. (call a) (quote x))) (braces y))" + + # non-errors in space sensitive contexts + "[f (x)]" => "(hcat f x)" + "[f x]" => "(hcat f x)" + # calls with brackets + "f(a,b)" => "(call f a b)" + "f(a=1; b=2)" => "(call f (= a 1) (parameters (= b 2)))" => + Expr(:call, :f, Expr(:parameters, Expr(:kw, :b, 2)), Expr(:kw, :a, 1)) + "f(a; b; c)" => "(call f a (parameters b) (parameters c))" => + Expr(:call, :f, Expr(:parameters, Expr(:parameters, :c), :b), :a) + "(a=1)()" => "(call (= a 1))" => Expr(:call, Expr(:(=), :a, 1)) + "f (a)" => "(call f (error-t) a)" + "A.@x(y)" => "(macrocall (. A (quote @x)) y)" + "A.@x(y).z" => "(. (macrocall (. A (quote @x)) y) (quote z))" + # do + "f() do\nend" => "(do (call f) (tuple) (block))" + "f() do ; body end" => "(do (call f) (tuple) (block body))" + "f() do x, y\n body end" => "(do (call f) (tuple x y) (block body))" + "f(x) do y body end" => "(do (call f x) (tuple y) (block body))" + # square brackets - "a().@x[1]" => "(macrocall (error (. (call a) (quote x))) (vect 1))" "@S[a,b]" => "(macrocall @S (vect a b))" => Expr(:macrocall, Symbol("@S"), LineNumberNode(1), Expr(:vect, :a, :b)) "@S[a b]" => "(macrocall @S (hcat a b))" => Expr(:macrocall, Symbol("@S"), LineNumberNode(1), Expr(:hcat, :a, :b)) "@S[a; b]" => "(macrocall @S (vcat a b))" => Expr(:macrocall, Symbol("@S"), LineNumberNode(1), Expr(:vcat, :a, :b)) + "A.@S[a]" => "(macrocall (. A (quote @S)) (vect a))" + "@S[a].b" => "(. (macrocall @S (vect a)) (quote b))" ((v=v"1.7",), "@S[a ;; b]") => "(macrocall @S (ncat-2 a b))" => Expr(:macrocall, Symbol("@S"), LineNumberNode(1), Expr(:ncat, 2, :a, :b)) ((v=v"1.6",), "@S[a ;; b]") => "(macrocall @S (error (ncat-2 a b)))" @@ -308,6 +309,13 @@ tests = [ "T[a b; c d]" => "(typed_vcat T (row a b) (row c d))" "T[x for x in xs]" => "(typed_comprehension T (generator x (= x xs)))" ((v=v"1.8",), "T[a ; b ;; c ; d]") => "(typed_ncat-2 T (nrow-1 a b) (nrow-1 c d))" + + # Dotted forms + # Allow `@` in macrocall only in first and last position + "A.B.@x" => "(macrocall (. (. A (quote B)) (quote @x)))" + "@A.B.x" => "(macrocall (. (. A (quote B)) (quote @x)))" + "A.@B.x" => "(macrocall (. (. A (quote B)) (error-t) (quote @x)))" + "@. y" => "(macrocall @__dot__ y)" "f.(a,b)" => "(. f (tuple a b))" "f.(a=1; b=2)" => "(. f (tuple (= a 1) (parameters (= b 2))))" => Expr(:., :f, Expr(:tuple, Expr(:parameters, Expr(:kw, :b, 2)), Expr(:kw, :a, 1))) @@ -319,6 +327,11 @@ tests = [ "f.\$x" => "(. f (inert (\$ x)))" "f.\$(x+y)" => "(. f (inert (\$ (call-i x + y))))" "A.\$B.@x" => "(macrocall (. (. A (inert (\$ B))) (quote @x)))" + "A.@x" => "(macrocall (. A (quote @x)))" + "A.@x a" => "(macrocall (. A (quote @x)) a)" + "@A.B.@x a" => "(macrocall (. (. A (quote B)) (quote (error-t) @x)) a)" + # .' discontinued + "f.'" => "f (error-t ')" # Field/property syntax "f.x.y" => "(. (. f (quote x)) (quote y))" "x .y" => "(. x (error-t) (quote y))" @@ -326,9 +339,12 @@ tests = [ "f'" => "(call-post f ')" "f'ᵀ" => "(call-post f 'ᵀ)" # Curly calls + "S {a}" => "(curly S (error-t) a)" + "A.@S{a}" => "(macrocall (. A (quote @S)) (braces a))" "@S{a,b}" => "(macrocall @S (braces a b))" + "A.@S{a}" => "(macrocall (. A (quote @S)) (braces a))" + "@S{a}.b" => "(. (macrocall @S (braces a)) (quote b))" "S{a,b}" => "(curly S a b)" - "S {a}" => "(curly S (error-t) a)" # String macros "x\"str\"" => """(macrocall @x_str (string-r "str"))""" "x`str`" => """(macrocall @x_cmd (cmdstring-r "str"))""" From 9ccf22640d82b77647b21263127b3dabbf0b189b Mon Sep 17 00:00:00 2001 From: c42f Date: Fri, 4 Nov 2022 17:47:16 +1000 Subject: [PATCH 0534/1109] Lower `@ .` to `@ __dot__` not in parser but in Expr conversion (JuliaLang/JuliaSyntax.jl#146) The reason to use `__dot__` is to allow `macro __dot__` to be defined in normal Julia source. But doing this in the parser is an awkward special case - better to do it later during some lowering step. Here done in Expr conversion. --- JuliaSyntax/src/expr.jl | 3 +++ JuliaSyntax/src/kinds.jl | 1 - JuliaSyntax/src/parser.jl | 22 ++++++---------------- JuliaSyntax/src/syntax_tree.jl | 2 -- JuliaSyntax/test/parser.jl | 3 +-- 5 files changed, 10 insertions(+), 21 deletions(-) diff --git a/JuliaSyntax/src/expr.jl b/JuliaSyntax/src/expr.jl index 13d91d2be3045..f7caadbca78e2 100644 --- a/JuliaSyntax/src/expr.jl +++ b/JuliaSyntax/src/expr.jl @@ -152,6 +152,9 @@ function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, loc = source_location(LineNumberNode, node.source, node.position) if headsym == :macrocall insert!(args, 2, loc) + if args[1] == Symbol("@.") + args[1] = Symbol("@__dot__") + end elseif headsym in (:call, :ref) # Julia's standard `Expr` ASTs have children stored in a canonical # order which is often not always source order. We permute the children diff --git a/JuliaSyntax/src/kinds.jl b/JuliaSyntax/src/kinds.jl index 4f403e6e2e3f6..a45e7a0d43ed8 100644 --- a/JuliaSyntax/src/kinds.jl +++ b/JuliaSyntax/src/kinds.jl @@ -852,7 +852,6 @@ const _kind_names = # like CORE_DOC_MACRO_NAME) "BEGIN_MACRO_NAMES" "MacroName" - "@." "StringMacroName" "CmdMacroName" "core_@doc" diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index ca2bb380c06a5..7134b49a458cd 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -2238,7 +2238,6 @@ function fix_macro_name_kind!(ps::ParseState, macro_name_position, name_kind=not end if isnothing(name_kind) name_kind = k == K"Identifier" ? K"MacroName" : - k == K"." ? K"@." : internal_error("unrecognized source kind for macro name ", k) end reset_node!(ps, macro_name_position, kind=name_kind) @@ -2249,21 +2248,13 @@ end # # flisp: parse-macro-name function parse_macro_name(ps::ParseState) + # @! x ==> (macrocall @! x) + # @.. x ==> (macrocall @.. x) + # @$ x ==> (macrocall @$ x) + # @var"#" x ==> (macrocall (var #) @$ x) bump_disallowed_space(ps) - mark = position(ps) - k = peek(ps) - if k == K"." - # TODO: deal with __dot__ lowering in Expr conversion? - # @. y ==> (macrocall @__dot__ y) - bump(ps) - else - # @! x ==> (macrocall @! x) - # @.. x ==> (macrocall @.. x) - # @$ x ==> (macrocall @$ x) - # @var"#" x ==> (macrocall (var #) @$ x) - let ps = with_space_sensitive(ps) - parse_atom(ps, false) - end + let ps = with_space_sensitive(ps) + parse_atom(ps, false) end end @@ -3402,7 +3393,6 @@ function parse_atom(ps::ParseState, check_identifiers=true) elseif leading_kind == K"@" # macro call # Macro names can be keywords # @end x ==> (macrocall @end x) - # @. x y ==> (macrocall @__dot__ x y) bump(ps, TRIVIA_FLAG) parse_macro_name(ps) parse_call_chain(ps, mark, true) diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index 002caa555fd7a..aceb524b0f43e 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -74,8 +74,6 @@ function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, position::In nothing elseif k == K"error" ErrorVal() - elseif k == K"@." - :var"@__dot__" elseif k == K"MacroName" Symbol("@$(normalize_identifier(val_str))") elseif k == K"StringMacroName" diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 47b42a2e507f7..3e192fb086315 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -315,7 +315,6 @@ tests = [ "A.B.@x" => "(macrocall (. (. A (quote B)) (quote @x)))" "@A.B.x" => "(macrocall (. (. A (quote B)) (quote @x)))" "A.@B.x" => "(macrocall (. (. A (quote B)) (error-t) (quote @x)))" - "@. y" => "(macrocall @__dot__ y)" "f.(a,b)" => "(. f (tuple a b))" "f.(a=1; b=2)" => "(. f (tuple (= a 1) (parameters (= b 2))))" => Expr(:., :f, Expr(:tuple, Expr(:parameters, Expr(:kw, :b, 2)), Expr(:kw, :a, 1))) @@ -710,7 +709,7 @@ tests = [ # Macro names can be keywords "@end x" => "(macrocall @end x)" # __dot__ macro - "@. x y" => "(macrocall @__dot__ x y)" + "@. x" => "(macrocall @. x)" => Expr(:macrocall, Symbol("@__dot__"), LineNumberNode(1), :x) # cmd strings "``" => "(macrocall :(Core.var\"@cmd\") (cmdstring-r \"\"))" "`cmd`" => "(macrocall :(Core.var\"@cmd\") (cmdstring-r \"cmd\"))" From 7a6dcf54025b7d5c6edbc56c8df95b5f8b34d8ff Mon Sep 17 00:00:00 2001 From: c42f Date: Tue, 8 Nov 2022 15:11:38 +1000 Subject: [PATCH 0535/1109] Report bad macro names more clearly (JuliaLang/JuliaSyntax.jl#147) This change prevents a crash for various cases of bad syntax involving macro names. In particular things like `@[x] a` and `@A.x a`. The latter case where the @ is separated from the actual macro name is a pain to deal with and is only approximately correct in this PR. But I'm not sure why we even support this syntax and maybe we could warn about it or deprecate it. So unclear whether going to greater lengths emitting K"TOMBSTONE" to be precise about error reporting in that case is worth it. --- JuliaSyntax/src/hooks.jl | 3 +++ JuliaSyntax/src/parser.jl | 37 +++++++++++++++++++++++++++++++++---- JuliaSyntax/test/parser.jl | 2 ++ 3 files changed, 38 insertions(+), 4 deletions(-) diff --git a/JuliaSyntax/src/hooks.jl b/JuliaSyntax/src/hooks.jl index ca4c9974020ef..0c499674214e5 100644 --- a/JuliaSyntax/src/hooks.jl +++ b/JuliaSyntax/src/hooks.jl @@ -186,6 +186,9 @@ function _core_parser_hook(code, filename, lineno, offset, options) end ex = options === :all ? Expr(:toplevel, error_ex) : error_ex else + # FIXME: Unilaterally showing any warnings to stdout here is far + # from ideal. But Meta.parse() has no API for communicating this. + show_diagnostics(stdout, stream.diagnostics, code) # FIXME: Add support to lineno to this tree build (via SourceFile?) ex = build_tree(Expr, stream; filename=filename, wrap_toplevel_as_kind=K"None") if Meta.isexpr(ex, :None) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 7134b49a458cd..378e57437c3bd 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -1546,10 +1546,12 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) # f.$x ==> (. f (inert ($ x))) # f.$(x+y) ==> (. f (inert ($ (call + x y)))) # A.$B.@x ==> (macrocall (. (. A (inert ($ B))) (quote @x))) + # @A.$x a ==> (macrocall (. A (inert (error x))) a) m = position(ps) bump(ps, TRIVIA_FLAG) parse_atom(ps) emit(ps, m, K"$") + macro_name_position = position(ps) emit(ps, m, K"inert") emit(ps, mark, K".") elseif k == K"@" @@ -2235,10 +2237,31 @@ function fix_macro_name_kind!(ps::ParseState, macro_name_position, name_kind=not if k == K"var" macro_name_position = first_child_position(ps, macro_name_position) k = peek_behind(ps, macro_name_position).kind + elseif k == K")" + # @(A) x => (macrocall @A x) + # TODO: Clean this up when K"parens" is implemented + while true + macro_name_position = ParseStreamPosition(macro_name_position.token_index-1, + macro_name_position.range_index) + b = peek_behind(ps, macro_name_position) + k = b.kind + if !has_flags(b.flags, TRIVIA_FLAG) + break + end + end + elseif k == K"error" + # Error already reported in parse_macro_name + return end if isnothing(name_kind) - name_kind = k == K"Identifier" ? K"MacroName" : - internal_error("unrecognized source kind for macro name ", k) + name_kind = (k == K"Identifier") ? K"MacroName" : K"error" + if name_kind == K"error" + # Hack to handle bad but unusual syntax like `@A.$x a` + ri = macro_name_position.range_index + startpos = ParseStreamPosition(ps.stream.ranges[ri].first_token, ri) + # This isn't quite accurate + emit_diagnostic(ps, startpos, macro_name_position, error="Invalid macro name") + end end reset_node!(ps, macro_name_position, kind=name_kind) end @@ -2253,8 +2276,14 @@ function parse_macro_name(ps::ParseState) # @$ x ==> (macrocall @$ x) # @var"#" x ==> (macrocall (var #) @$ x) bump_disallowed_space(ps) - let ps = with_space_sensitive(ps) - parse_atom(ps, false) + mark = position(ps) + k = peek(ps) + parse_atom(ps, false) + if k == K"(" + emit_diagnostic(ps, mark, warning="parenthesizing macro names is unnecessary") + elseif !(peek_behind(ps).kind in KSet"Identifier var") + # @[x] y z ==> (macrocall (error (vect x)) y z) + emit(ps, mark, K"error", error="Invalid macro name") end end diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 3e192fb086315..5027ed92f1ecc 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -261,6 +261,7 @@ tests = [ "@! x" => "(macrocall @! x)" "@.. x" => "(macrocall @.. x)" "@\$ y" => "(macrocall @\$ y)" + "@[x] y z" => "(macrocall (error (vect x)) y z)" # Special @doc parsing rules "@doc x\ny" => "(macrocall @doc x y)" "A.@doc x\ny" => "(macrocall (. A (quote @doc)) x y)" @@ -326,6 +327,7 @@ tests = [ "f.\$x" => "(. f (inert (\$ x)))" "f.\$(x+y)" => "(. f (inert (\$ (call-i x + y))))" "A.\$B.@x" => "(macrocall (. (. A (inert (\$ B))) (quote @x)))" + "@A.\$x a" => "(macrocall (. A (inert (error x))) a)" "A.@x" => "(macrocall (. A (quote @x)))" "A.@x a" => "(macrocall (. A (quote @x)) a)" "@A.B.@x a" => "(macrocall (. (. A (quote B)) (quote (error-t) @x)) a)" From 73ff2ed4d2ff026604dc20c4d079fab54e62db26 Mon Sep 17 00:00:00 2001 From: c42f Date: Fri, 18 Nov 2022 18:11:53 +1000 Subject: [PATCH 0536/1109] Validate literals at parse time (JuliaLang/JuliaSyntax.jl#149) Here we unconditionally validate all literals after `parse!()` - ie processing String and Char escape sequences and detecting numeric overflow. Doing this up front in `parse!()` means it'll be redone later during conversion to SyntaxNode. The time cost of this in parsing to Expr seems to be about 20% in the worst case where the code consists of a large array of Float64 literals. Which isn't great but is probably acceptable for now. --- JuliaSyntax/src/diagnostics.jl | 5 + JuliaSyntax/src/hooks.jl | 10 +- JuliaSyntax/src/kinds.jl | 4 +- JuliaSyntax/src/parse_stream.jl | 84 +++++++++++++-- JuliaSyntax/src/parser.jl | 9 +- JuliaSyntax/src/parser_api.jl | 1 + JuliaSyntax/src/syntax_tree.jl | 55 ++++++---- JuliaSyntax/src/tokenize.jl | 4 +- JuliaSyntax/src/value_parsing.jl | 164 ++++++++++++++++-------------- JuliaSyntax/test/diagnostics.jl | 54 ++++++++++ JuliaSyntax/test/parser.jl | 13 ++- JuliaSyntax/test/runtests.jl | 1 + JuliaSyntax/test/test_utils.jl | 2 + JuliaSyntax/test/tokenize.jl | 12 +-- JuliaSyntax/test/value_parsing.jl | 157 +++++++++++++++------------- 15 files changed, 381 insertions(+), 194 deletions(-) create mode 100644 JuliaSyntax/test/diagnostics.jl diff --git a/JuliaSyntax/src/diagnostics.jl b/JuliaSyntax/src/diagnostics.jl index 092571e21c571..ca98b3684303a 100644 --- a/JuliaSyntax/src/diagnostics.jl +++ b/JuliaSyntax/src/diagnostics.jl @@ -121,6 +121,11 @@ function show_diagnostics(io::IO, diagnostics::AbstractVector{Diagnostic}, text: end end +function emit_diagnostic(diagnostics::AbstractVector{Diagnostic}, + fbyte::Integer, lbyte::Integer; kws...) + push!(diagnostics, Diagnostic(fbyte, lbyte; kws...)) +end + function any_error(diagnostics::AbstractVector{Diagnostic}) any(is_error(d) for d in diagnostics) end diff --git a/JuliaSyntax/src/hooks.jl b/JuliaSyntax/src/hooks.jl index 0c499674214e5..c3bc0c563d96d 100644 --- a/JuliaSyntax/src/hooks.jl +++ b/JuliaSyntax/src/hooks.jl @@ -186,9 +186,13 @@ function _core_parser_hook(code, filename, lineno, offset, options) end ex = options === :all ? Expr(:toplevel, error_ex) : error_ex else - # FIXME: Unilaterally showing any warnings to stdout here is far - # from ideal. But Meta.parse() has no API for communicating this. - show_diagnostics(stdout, stream.diagnostics, code) + # TODO: Figure out a way to show warnings. Meta.parse() has no API + # to communicate this, and we also can't show them to stdout as + # this is too side-effectful and can result in double-reporting in + # the REPL. + # + # show_diagnostics(stdout, stream.diagnostics, code) + # # FIXME: Add support to lineno to this tree build (via SourceFile?) ex = build_tree(Expr, stream; filename=filename, wrap_toplevel_as_kind=K"None") if Meta.isexpr(ex, :None) diff --git a/JuliaSyntax/src/kinds.jl b/JuliaSyntax/src/kinds.jl index a45e7a0d43ed8..2ceabb3ee58d9 100644 --- a/JuliaSyntax/src/kinds.jl +++ b/JuliaSyntax/src/kinds.jl @@ -71,6 +71,7 @@ const _kind_names = "HexInt" "OctInt" "Float" + "Float32" "String" "Char" "CmdString" @@ -1015,6 +1016,7 @@ const _nonunique_kind_names = Set([ K"HexInt" K"OctInt" K"Float" + K"Float32" K"String" K"Char" K"CmdString" @@ -1088,7 +1090,7 @@ is_syntax_kind(x) = K"BEGIN_SYNTAX_KINDS" < kind(x) < K"END_SYNTAX_KINDS" is_macro_name(x) = K"BEGIN_MACRO_NAMES" < kind(x) < K"END_MACRO_NAMES" function is_number(x) - kind(x) in (K"Integer", K"BinInt", K"HexInt", K"OctInt", K"Float") + kind(x) in (K"Integer", K"BinInt", K"HexInt", K"OctInt", K"Float", K"Float32") end function is_string_delim(x) diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index 80dfed640f101..5fedfbfda5318 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -25,13 +25,15 @@ const TRIPLE_STRING_FLAG = RawFlags(1<<5) # Set when a string or identifier needs "raw string" unescaping const RAW_STRING_FLAG = RawFlags(1<<6) +# TODO? +# const ERROR_FLAG = RawFlags(1<<7) + # Token-only flag # Record whether a token had preceding whitespace const PRECEDING_WHITESPACE_FLAG = RawFlags(1<<7) # Flags holding the dimension of an nrow or other UInt8 not held in the source const NUMERIC_FLAGS = RawFlags(RawFlags(0xff)<<8) -# Todo ERROR_FLAG = 0x8000 ? function set_numeric_flags(n::Integer) f = RawFlags((n << 8) & NUMERIC_FLAGS) @@ -673,7 +675,7 @@ function bump_invisible(stream::ParseStream, kind, flags=EMPTY_FLAGS; h = SyntaxHead(kind, flags) push!(stream.tokens, SyntaxToken(h, b)) if !isnothing(error) - _emit_diagnostic(stream, b, b-1, error=error) + emit_diagnostic(stream, b, b-1, error=error) end stream.peek_count = 0 return position(stream) @@ -796,14 +798,14 @@ function emit(stream::ParseStream, mark::ParseStreamPosition, kind::Kind, # nested. fbyte = token_first_byte(stream, first_token) lbyte = token_last_byte(stream, lastindex(stream.tokens)) - _emit_diagnostic(stream, fbyte, lbyte, error=error) + emit_diagnostic(stream, fbyte, lbyte, error=error) end push!(stream.ranges, range) return position(stream) end -function _emit_diagnostic(stream::ParseStream, fbyte, lbyte; kws...) - push!(stream.diagnostics, Diagnostic(fbyte, lbyte; kws...)) +function emit_diagnostic(stream::ParseStream, fbyte::Integer, lbyte::Integer; kws...) + emit_diagnostic(stream.diagnostics, fbyte, lbyte; kws...) return nothing end @@ -812,8 +814,6 @@ Emit a diagnostic at the position of the next token If `whitespace` is true, the diagnostic is positioned on the whitespace before the next token. Otherwise it's positioned at the next token as returned by `peek()`. - -TODO: Rename? This doesn't emit normal tokens into the output event list! """ function emit_diagnostic(stream::ParseStream; whitespace=false, kws...) i = _lookahead_index(stream, 1, true) @@ -828,12 +828,12 @@ function emit_diagnostic(stream::ParseStream; whitespace=false, kws...) end fbyte = lookahead_token_first_byte(stream, begin_tok_i) lbyte = lookahead_token_last_byte(stream, end_tok_i) - _emit_diagnostic(stream, fbyte, lbyte; kws...) + emit_diagnostic(stream, fbyte, lbyte; kws...) return nothing end function emit_diagnostic(stream::ParseStream, mark::ParseStreamPosition; kws...) - _emit_diagnostic(stream, token_first_byte(stream, mark.token_index), + emit_diagnostic(stream, token_first_byte(stream, mark.token_index), _next_byte(stream) - 1; kws...) end @@ -845,10 +845,68 @@ function emit_diagnostic(stream::ParseStream, mark::ParseStreamPosition, end_mark::ParseStreamPosition; kws...) fbyte = token_first_byte(stream, mark.token_index) lbyte = token_first_byte(stream, end_mark.token_index) - 1 - _emit_diagnostic(stream, fbyte, lbyte; kws...) + emit_diagnostic(stream, fbyte, lbyte; kws...) end #------------------------------------------------------------------------------- +# ParseStream Post-processing + +function validate_literal_tokens(stream::ParseStream) + text = sourcetext(stream) + toks = stream.tokens + charbuf = IOBuffer() + for i = 2:length(toks) + t = toks[i] + k = kind(t) + fbyte = toks[i-1].next_byte + nbyte = t.next_byte + lbyte = prevind(text, t.next_byte) + had_error = false + if k in KSet"Integer BinInt OctInt HexInt" + # The following shouldn't be able to error... + # parse_int_literal + # parse_uint_literal + elseif k == K"Float" || k == K"Float32" + if k == K"Float" + _, code = parse_float_literal(Float64, text, fbyte, nbyte) + else + _, code = parse_float_literal(Float32, text, fbyte, nbyte) + end + if code == :ok + # pass + elseif code == :overflow + emit_diagnostic(stream, fbyte, lbyte, + error="overflow in floating point literal") + had_error = true + elseif code == :underflow + emit_diagnostic(stream, fbyte, lbyte, + warning="underflow in floating point literal") + end + elseif k == K"Char" + @assert fbyte < nbyte # Already handled in the parser + truncate(charbuf, 0) + had_error = unescape_julia_string(charbuf, text, fbyte, + nbyte, stream.diagnostics) + if !had_error + seek(charbuf,0) + read(charbuf, Char) + if !eof(charbuf) + had_error = true + emit_diagnostic(stream, fbyte, lbyte, + error="character literal contains multiple characters") + end + end + elseif k == K"String" && !has_flags(t, RAW_STRING_FLAG) + had_error = unescape_julia_string(devnull, text, fbyte, + nbyte, stream.diagnostics) + end + if had_error + toks[i] = SyntaxToken(SyntaxHead(K"error", EMPTY_FLAGS), + t.orig_kind, t.next_byte) + end + end +end + # Tree construction from the list of text ranges held by ParseStream # API for extracting results from ParseStream @@ -942,7 +1000,11 @@ state for further parsing. """ function sourcetext(stream::ParseStream; steal_textbuf=false) root = stream.text_root - if root isa AbstractString && codeunit(root) == UInt8 + # The following works for SubString but makes the return type of this + # method type unstable. + # if root isa AbstractString && codeunit(root) == UInt8 + # return root + if root isa String return root elseif steal_textbuf return String(stream.textbuf) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 378e57437c3bd..9e565225979d5 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -1120,7 +1120,7 @@ function parse_unary(ps::ParseState) end if k in KSet"- +" && !is_decorated(t) t2 = peek_token(ps, 2) - if !preceding_whitespace(t2) && kind(t2) in KSet"Integer Float" + if !preceding_whitespace(t2) && kind(t2) in KSet"Integer Float Float32" k3 = peek(ps, 3) if is_prec_power(k3) || k3 in KSet"[ {" # `[`, `{` (issue #18851) and `^` have higher precedence than @@ -1133,9 +1133,10 @@ function parse_unary(ps::ParseState) else # We have a signed numeric literal. Glue the operator to the # next token to create a signed literal: - # -2 ==> -2 - # +2.0 ==> 2.0 - # -2*x ==> (call-i -2 * x) + # -2 ==> -2 + # +2.0 ==> 2.0 + # -1.0f0 ==> -1.0f0 + # -2*x ==> (call-i -2 * x) bump_glue(ps, kind(t2), EMPTY_FLAGS, 2) end return diff --git a/JuliaSyntax/src/parser_api.jl b/JuliaSyntax/src/parser_api.jl index 50a7d8ed815c5..c693a7bf9c0e0 100644 --- a/JuliaSyntax/src/parser_api.jl +++ b/JuliaSyntax/src/parser_api.jl @@ -51,6 +51,7 @@ function parse!(stream::ParseStream; rule::Symbol=:toplevel) else throw(ArgumentError("Unknown grammar rule $rule")) end + validate_literal_tokens(stream) stream end diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index aceb524b0f43e..18ada54798d74 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -29,27 +29,39 @@ function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, position::In val_str = view(source, val_range) # Here we parse the values eagerly rather than representing them as # strings. Maybe this is good. Maybe not. - val = if k in KSet"Integer Float BinInt OctInt HexInt" - julia_string_to_number(val_str, k) + val = if k == K"Integer" + parse_int_literal(val_str) + elseif k == K"Float" + v, code = parse_float_literal(Float64, source.code, position, + position+span(raw)) + @check code == :ok || code == :underflow + v + elseif k == K"Float32" + v, code = parse_float_literal(Float32, source.code, position, + position+span(raw)) + @check code == :ok || code == :underflow + v + elseif k in KSet"BinInt OctInt HexInt" + parse_uint_literal(val_str, k) elseif k == K"true" true elseif k == K"false" false elseif k == K"Char" - v, err, _ = unescape_julia_string(val_str, false, false) - if err || length(v) != 1 - ErrorVal() - else - only(v) - end + io = IOBuffer() + ds = Diagnostic[] + had_error = unescape_julia_string(io, source.code, position, + position+span(raw), ds) + @check !had_error && isempty(ds) + seek(io, 0) + c = read(io, Char) + @check eof(io) + c elseif k == K"Identifier" if has_flags(head(raw), RAW_STRING_FLAG) - s, err, _ = unescape_julia_string(val_str, false, true) - if err - ErrorVal() - else - Symbol(normalize_identifier(s)) - end + io = IOBuffer() + unescape_raw_string(io, val_str, false) + Symbol(normalize_identifier(String(take!(io)))) else Symbol(normalize_identifier(val_str)) end @@ -57,15 +69,16 @@ function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, position::In # This should only happen for tokens nested inside errors Symbol(val_str) elseif k in KSet"String CmdString" - is_cmd = k == K"CmdString" - is_raw = has_flags(head(raw), RAW_STRING_FLAG) - s, err, _ = unescape_julia_string(val_str, is_cmd, is_raw) - if err - # TODO: communicate the unescaping error somehow - ErrorVal() + io = IOBuffer() + if has_flags(head(raw), RAW_STRING_FLAG) + unescape_raw_string(io, val_str, k == K"CmdString") else - s + ds = Diagnostic[] + had_error = unescape_julia_string(io, source.code, position, + position+span(raw), ds) + @check !had_error && isempty(ds) end + String(take!(io)) elseif is_operator(k) isempty(val_range) ? Symbol(untokenize(k)) : # synthetic invisible tokens diff --git a/JuliaSyntax/src/tokenize.jl b/JuliaSyntax/src/tokenize.jl index c1e86842c8ac8..8f7ad5ff57621 100644 --- a/JuliaSyntax/src/tokenize.jl +++ b/JuliaSyntax/src/tokenize.jl @@ -801,7 +801,7 @@ function lex_digit(l::Lexer, kind) accept_number(l, isdigit) pc, ppc = dpeekchar(l) if (pc == 'e' || pc == 'E' || pc == 'f') && (isdigit(ppc) || ppc == '+' || ppc == '-' || ppc == '−') - kind = K"Float" + kind = pc == 'f' ? K"Float32" : K"Float" readchar(l) accept(l, "+-−") if accept_batch(l, isdigit) @@ -819,7 +819,7 @@ function lex_digit(l::Lexer, kind) end elseif (pc == 'e' || pc == 'E' || pc == 'f') && (isdigit(ppc) || ppc == '+' || ppc == '-' || ppc == '−') - kind = K"Float" + kind = pc == 'f' ? K"Float32" : K"Float" readchar(l) accept(l, "+-−") if accept_batch(l, isdigit) diff --git a/JuliaSyntax/src/value_parsing.jl b/JuliaSyntax/src/value_parsing.jl index 51e674f68c2a5..042cca0a01037 100644 --- a/JuliaSyntax/src/value_parsing.jl +++ b/JuliaSyntax/src/value_parsing.jl @@ -2,43 +2,35 @@ # This file contains utility functions for converting undecorated source # strings into Julia values. For example, string->number, string unescaping, etc. -""" -Convert a Julia source code string into a number. -""" -function julia_string_to_number(str::AbstractString, kind) +function parse_int_literal(str::AbstractString) + # TODO: A specialized code path here can be a lot faster and also + # allocation free str = replace(replace(str, '_'=>""), '−'=>'-') - if kind == K"Integer" - x = Base.tryparse(Int, str) - if Int === Int32 && isnothing(x) - x = Base.tryparse(Int64, str) - end + x = Base.tryparse(Int, str) + if Int === Int32 && isnothing(x) + x = Base.tryparse(Int64, str) + end + if isnothing(x) + x = Base.tryparse(Int128, str) if isnothing(x) - x = Base.tryparse(Int128, str) - if isnothing(x) - x = Base.parse(BigInt, str) - end - end - return x - elseif kind == K"Float" - if !startswith(str,"0x") && 'f' in str && !('p' in str) - # TODO: re-detecting Float32 here is kind of awful. Should have a - # separate Float32 literal type produced by the lexer? - x, code = _parse_float(Float32, str) - else - x, code = _parse_float(Float64, str) + x = Base.parse(BigInt, str) end - return code === :ok ? x : - code === :underflow ? x : # < TODO: emit warning somehow? - #=code === :overflow=# ErrorVal() - elseif kind == K"HexInt" - ndigits = length(str)-2 + end + return x +end + +function parse_uint_literal(str::AbstractString, k) + str = replace(replace(str, '_'=>""), '−'=>'-') + ndigits = length(str)-2 + if k == K"HexInt" return ndigits <= 2 ? Base.parse(UInt8, str) : ndigits <= 4 ? Base.parse(UInt16, str) : ndigits <= 8 ? Base.parse(UInt32, str) : ndigits <= 16 ? Base.parse(UInt64, str) : ndigits <= 32 ? Base.parse(UInt128, str) : Base.parse(BigInt, str) - elseif kind == K"BinInt" + elseif k == K"BinInt" + str = replace(replace(str, '_'=>""), '−'=>'-') ndigits = length(str)-2 return ndigits <= 8 ? Base.parse(UInt8, str) : ndigits <= 16 ? Base.parse(UInt16, str) : @@ -46,8 +38,7 @@ function julia_string_to_number(str::AbstractString, kind) ndigits <= 64 ? Base.parse(UInt64, str) : ndigits <= 128 ? Base.parse(UInt128, str) : Base.parse(BigInt, str) - elseif kind == K"OctInt" - ndigits = length(str)-2 + elseif k == K"OctInt" x = Base.tryparse(UInt64, str) if isnothing(x) x = Base.tryparse(UInt128, str) @@ -68,7 +59,6 @@ function julia_string_to_number(str::AbstractString, kind) end end - #------------------------------------------------------------------------------- """ Like `Base.parse(Union{Float64,Float32}, str)`, but permits float underflow @@ -76,33 +66,54 @@ Like `Base.parse(Union{Float64,Float32}, str)`, but permits float underflow Parse a Float64. str[firstind:lastind] must be a valid floating point literal string. If the value is outside Float64 range. """ -function _parse_float(::Type{T}, str::String, - firstind::Integer, lastind::Integer) where {T} # force specialize with where {T} - strsize = lastind - firstind + 1 +function parse_float_literal(::Type{T}, str::String, + firstind::Integer, endind::Integer) where {T} # force specialize with where {T} + strsize = endind - firstind bufsz = 50 if strsize < bufsz buf = Ref{NTuple{bufsz, UInt8}}() ptr = Base.unsafe_convert(Ptr{UInt8}, pointer_from_objref(buf)) GC.@preserve str buf begin - unsafe_copyto!(ptr, pointer(str, firstind), strsize) - # Ensure ptr is null terminated - unsafe_store!(ptr, UInt8(0), strsize + 1) - _unsafe_parse_float(T, ptr, strsize) + n = _copy_normalize_number!(ptr, pointer(str, firstind), strsize) + _unsafe_parse_float(T, ptr, n) end else - # Slow path with allocation. - buf = Vector{UInt8}(str[firstind:lastind]) - push!(buf, 0x00) + # Slower path with allocation. + buf = Vector{UInt8}(undef, strsize+1) ptr = pointer(buf) - GC.@preserve buf _unsafe_parse_float(T, ptr, strsize) + GC.@preserve str buf begin + n = _copy_normalize_number!(ptr, pointer(str, firstind), strsize) + _unsafe_parse_float(T, ptr, n) + end end end -function _parse_float(T, str::String) - _parse_float(T, str, firstindex(str), lastindex(str)) +# Like replace(replace(str, '_'=>""), '−'=>'-') +# dest must be of size at least srcsize+1 +function _copy_normalize_number!(dest, src, srcsize) + i = 0 + j = 0 + while i < srcsize + b = unsafe_load(src + i) + if b == UInt8('_') + i += 1 + continue + elseif b == 0xe2 && i+2 < srcsize && + unsafe_load(src + i + 1) == 0x88 && + unsafe_load(src + i + 2) == 0x92 + # src at i,i+1,i+2 is UTF-8 code for unicode minus sign '−' + b = UInt8('-') + i += 2 + end + unsafe_store!(dest+j, b) + i += 1 + j += 1 + end + unsafe_store!(dest+j, 0x00) + return j end -# Internals of _parse_float, split into a separate function to avoid some +# Internals of parse_float_literal, split into a separate function to avoid some # apparent codegen issues https://github.com/JuliaLang/julia/issues/46509 # (perhaps we don't want the `buf` in `GC.@preserve buf` to be stack allocated # on one branch and heap allocated in another?) @@ -207,15 +218,16 @@ end Process Julia source code escape sequences for non-raw strings. `str` should be passed without delimiting quotes. """ -function unescape_julia_string(io::IO, str::AbstractString)::Tuple{Bool, String} - i = firstindex(str) - lastidx = lastindex(str) - while i <= lastidx +function unescape_julia_string(io::IO, str::AbstractString, + firstind, endind, diagnostics) + had_error = false + i = firstind + while i < endind c = str[i] if c != '\\' if c == '\r' # convert literal \r and \r\n in strings to \n (issue #11988) - if i+1 <= lastidx && str[i+1] == '\n' + if i+1 < endind && str[i+1] == '\n' i += 1 end c = '\n' @@ -226,8 +238,9 @@ function unescape_julia_string(io::IO, str::AbstractString)::Tuple{Bool, String} end # Process \ escape sequences. See also Base.unescape_string which some # of this code derives from (but which disallows \` \' \$) + escstart = i i += 1 - if i > lastidx + if i >= endind break end c = str[i] @@ -235,7 +248,7 @@ function unescape_julia_string(io::IO, str::AbstractString)::Tuple{Bool, String} n = k = 0 m = c == 'x' ? 2 : c == 'u' ? 4 : 8 - while (k += 1) <= m && i+1 <= lastidx + while (k += 1) <= m && i+1 < endind nc = str[i+1] n = '0' <= nc <= '9' ? n<<4 + (nc-'0') : 'a' <= nc <= 'f' ? n<<4 + (nc-'a'+10) : @@ -244,25 +257,32 @@ function unescape_julia_string(io::IO, str::AbstractString)::Tuple{Bool, String} end if k == 1 || n > 0x10ffff u = m == 4 ? 'u' : 'U' - return true, "invalid $(m == 2 ? "hex (\\x)" : "unicode (\\$u)") escape sequence" - end - if m == 2 # \x escape sequence - write(io, UInt8(n)) + msg = (m == 2) ? "invalid hex escape sequence" : + "invalid unicode escape sequence" + emit_diagnostic(diagnostics, escstart, i, error=msg) + had_error = true else - print(io, Char(n)) + if m == 2 # \x escape sequence + write(io, UInt8(n)) + else + print(io, Char(n)) + end end elseif '0' <= c <= '7' k = 1 n = c-'0' - while (k += 1) <= 3 && i+1 <= lastidx + while (k += 1) <= 3 && i+1 < endind c = str[i+1] n = ('0' <= c <= '7') ? n<<3 + c-'0' : break i += 1 end if n > 255 - return true, "octal escape sequence out of range" + emit_diagnostic(diagnostics, escstart, i, + error="invalid octal escape sequence") + had_error = true + else + write(io, UInt8(n)) end - write(io, UInt8(n)) else u = # C escapes c == 'n' ? '\n' : @@ -279,24 +299,18 @@ function unescape_julia_string(io::IO, str::AbstractString)::Tuple{Bool, String} c == '"' ? '"' : c == '$' ? '$' : c == '`' ? '`' : - return true, "Invalid escape sequence \\$c" - write(io, u) + nothing + if isnothing(u) + emit_diagnostic(diagnostics, escstart, i, + error="invalid escape sequence") + had_error = true + else + write(io, u) + end end i = nextind(str, i) end - return false, "" -end - -function unescape_julia_string(str::AbstractString, is_cmd::Bool, is_raw::Bool)::Tuple{String, Bool, String} - io = IOBuffer() - error = false - msg = "" - if is_raw - unescape_raw_string(io, str, is_cmd) - else - error, msg = unescape_julia_string(io, str) - end - String(take!(io)), error, msg + return had_error end #------------------------------------------------------------------------------- diff --git a/JuliaSyntax/test/diagnostics.jl b/JuliaSyntax/test/diagnostics.jl new file mode 100644 index 0000000000000..9e3c800091e0f --- /dev/null +++ b/JuliaSyntax/test/diagnostics.jl @@ -0,0 +1,54 @@ +function diagnostic(str; allow_multiple=false) + stream = ParseStream(str) + parse!(stream) + if allow_multiple + stream.diagnostics + else + @test length(stream.diagnostics) == 1 + only(stream.diagnostics) + end +end + +@testset "diagnostics for literal parsing" begin + # Float overflow/underflow + @test diagnostic("x = 10.0e1000;") == + Diagnostic(5, 13, :error, "overflow in floating point literal") + @test diagnostic("x = 10.0f1000;") == + Diagnostic(5, 13, :error, "overflow in floating point literal") + @test diagnostic("x = 10.0e-1000;") == + Diagnostic(5, 14, :warning, "underflow in floating point literal") + @test diagnostic("x = 10.0f-1000;") == + Diagnostic(5, 14, :warning, "underflow in floating point literal") + + # Char + @test diagnostic("x = ''") == + Diagnostic(6, 5, :error, "empty character literal") + @test diagnostic("x = 'abc'") == + Diagnostic(6, 8, :error, "character literal contains multiple characters") + @test diagnostic("x = '\\xq'") == + Diagnostic(6, 7, :error, "invalid hex escape sequence") + @test diagnostic("x = '\\uq'") == + Diagnostic(6, 7, :error, "invalid unicode escape sequence") + @test diagnostic("x = '\\Uq'") == + Diagnostic(6, 7, :error, "invalid unicode escape sequence") + @test diagnostic("x = '\\777'") == + Diagnostic(6, 9, :error, "invalid octal escape sequence") + @test diagnostic("x = '\\k'") == + Diagnostic(6, 7, :error, "invalid escape sequence") + + # String + @test diagnostic("x = \"abc\\xq\"") == + Diagnostic(9, 10, :error, "invalid hex escape sequence") + @test diagnostic("x = \"abc\\uq\"") == + Diagnostic(9, 10, :error, "invalid unicode escape sequence") + @test diagnostic("x = \"abc\\Uq\"") == + Diagnostic(9, 10, :error, "invalid unicode escape sequence") + @test diagnostic("x = \"abc\\777\"") == + Diagnostic(9, 12, :error, "invalid octal escape sequence") + @test diagnostic("x = \"abc\\k\"") == + Diagnostic(9, 10, :error, "invalid escape sequence") + @test diagnostic("x = \"abc\\k \\k\"", allow_multiple=true) == [ + Diagnostic(9, 10, :error, "invalid escape sequence"), + Diagnostic(12, 13, :error, "invalid escape sequence") + ] +end diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 5027ed92f1ecc..8ae2b6688fbfc 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -1,6 +1,7 @@ function test_parse(production, code; v=v"1.6", expr=false) stream = ParseStream(code, version=v) production(ParseState(stream)) + JuliaSyntax.validate_literal_tokens(stream) t = build_tree(GreenNode, stream, wrap_toplevel_as_kind=K"None") source = SourceFile(code) s = SyntaxNode(source, t) @@ -163,6 +164,7 @@ tests = [ "-2[1, 3]" => "(call-pre - (ref 2 1 3))" "-2" => "-2" "+2.0" => "2.0" + "-1.0f0" => "-1.0f0" "-0x1" => "(call-pre - 0x01)" "- 2" => "(call-pre - 2)" ".-2" => "(call-pre .- 2)" @@ -628,6 +630,8 @@ tests = [ "'a'" => "(char 'a')" "'α'" => "(char 'α')" "'\\xce\\xb1'" => "(char 'α')" + "'\\u03b1'" => "(char 'α')" + "'\\U1D7DA'" => "(char '𝟚')" "'a" => "(char 'a' (error-t))" "''" => "(char (error))" "'" => "(char (error))" @@ -776,7 +780,6 @@ tests = [ "\"\$var\"" => "(string var)" "\"\$outer\"" => "(string outer)" "\"\$in\"" => "(string in)" - raw"\"\xqqq\"" => "(string ✘)" # Triple-quoted dedenting: "\"\"\"\nx\"\"\"" => raw"""(string-s "x")""" "\"\"\"\n\nx\"\"\"" => raw"""(string-s "\n" "x")""" @@ -827,6 +830,14 @@ tests = [ "\"a\\\r\"" => "(string \"a\")" "\"a\\\r\n\"" => "(string \"a\")" ], + JuliaSyntax.parse_atom => [ + # errors in literals + "\"\\xqqq\"" => "(string (error))" + "'ab'" => "(char (error))" + "'\\xq'" => "(char (error))" + "10.0e1000'" => "(error)" + "10.0f100'" => "(error)" + ], JuliaSyntax.parse_docstring => [ """ "notdoc" ] """ => "(string \"notdoc\")" """ "notdoc" \n] """ => "(string \"notdoc\")" diff --git a/JuliaSyntax/test/runtests.jl b/JuliaSyntax/test/runtests.jl index 7d40754459d47..b08da53303274 100644 --- a/JuliaSyntax/test/runtests.jl +++ b/JuliaSyntax/test/runtests.jl @@ -14,6 +14,7 @@ end include("test_utils.jl") include("parse_stream.jl") include("parser.jl") +include("diagnostics.jl") include("parser_api.jl") include("expr.jl") @testset "Parsing values from strings" begin diff --git a/JuliaSyntax/test/test_utils.jl b/JuliaSyntax/test/test_utils.jl index 273eb003889d8..1cd7728419945 100644 --- a/JuliaSyntax/test/test_utils.jl +++ b/JuliaSyntax/test/test_utils.jl @@ -5,6 +5,7 @@ using .JuliaSyntax: # Parsing ParseStream, ParseState, + Diagnostic, SourceFile, parse!, parse, @@ -226,6 +227,7 @@ for debugging. function itest_parse(production, code; version::VersionNumber=v"1.6") stream = ParseStream(code; version=version) production(JuliaSyntax.ParseState(stream)) + JuliaSyntax.validate_literal_tokens(stream) t = JuliaSyntax.build_tree(GreenNode, stream, wrap_toplevel_as_kind=K"toplevel") println(stdout, "# Code:\n$code\n") diff --git a/JuliaSyntax/test/tokenize.jl b/JuliaSyntax/test/tokenize.jl index 30b4758de4e51..4ee7859ad7d6b 100644 --- a/JuliaSyntax/test/tokenize.jl +++ b/JuliaSyntax/test/tokenize.jl @@ -580,7 +580,7 @@ end @test toks(".1..") == [".1"=>K"Float", ".."=>K".."] @test toks("0x01..") == ["0x01"=>K"HexInt", ".."=>K".."] - @test kind.(collect(tokenize("1f0./1"))) == [K"Float", K"/", K"Integer", K"EndMarker"] + @test kind.(collect(tokenize("1f0./1"))) == [K"Float32", K"/", K"Integer", K"EndMarker"] end @@ -618,15 +618,15 @@ end @test tok("1.0e-0").kind == K"Float" @test tok("1.0E0").kind == K"Float" @test tok("1.0E-0").kind == K"Float" - @test tok("1.0f0").kind == K"Float" - @test tok("1.0f-0").kind == K"Float" + @test tok("1.0f0").kind == K"Float32" + @test tok("1.0f-0").kind == K"Float32" @test tok("0e0").kind == K"Float" @test tok("0e+0").kind == K"Float" @test tok("0E0").kind == K"Float" @test tok("201E+0").kind == K"Float" - @test tok("2f+0").kind == K"Float" - @test tok("2048f0").kind == K"Float" + @test tok("2f+0").kind == K"Float32" + @test tok("2048f0").kind == K"Float32" @test tok("1.:0").kind == K"Float" @test tok("0x00p2").kind == K"Float" @test tok("0x00P2").kind == K"Float" @@ -639,7 +639,7 @@ end # Floating point with \minus rather than - @test tok("1.0e−0").kind == K"Float" - @test tok("1.0f−0").kind == K"Float" + @test tok("1.0f−0").kind == K"Float32" @test tok("0x0p−2").kind == K"Float" end diff --git a/JuliaSyntax/test/value_parsing.jl b/JuliaSyntax/test/value_parsing.jl index 8b983131b6b97..8b552d6c348cd 100644 --- a/JuliaSyntax/test/value_parsing.jl +++ b/JuliaSyntax/test/value_parsing.jl @@ -1,55 +1,57 @@ using JuliaSyntax: - julia_string_to_number, - unescape_julia_string, - _parse_float + parse_int_literal, + parse_uint_literal, + parse_float_literal, + unescape_julia_string @testset "Float parsing" begin # Float64 - @test _parse_float(Float64, "123", 1, 3) === (123.0, :ok) - @test _parse_float(Float64, "123", 2, 3) === (23.0, :ok) - @test _parse_float(Float64, "123", 2, 2) === (2.0, :ok) - @test _parse_float(Float64, "1.3", 1, 3) === (1.3, :ok) - @test _parse_float(Float64, "1.3e2", 1, 5) === (1.3e2, :ok) - @test _parse_float(Float64, "1.0e-1000", 1, 9) === (0.0, :underflow) - @test _parse_float(Float64, "1.0e+1000", 1, 9) === (Inf, :overflow) + @test parse_float_literal(Float64, "123", 1, 4) === (123.0, :ok) + @test parse_float_literal(Float64, "123", 2, 4) === (23.0, :ok) + @test parse_float_literal(Float64, "123", 2, 3) === (2.0, :ok) + @test parse_float_literal(Float64, "1.3", 1, 4) === (1.3, :ok) + @test parse_float_literal(Float64, "1.3e2", 1, 6) === (1.3e2, :ok) + @test parse_float_literal(Float64, "1.3E2", 1, 6) === (1.3e2, :ok) + @test parse_float_literal(Float64, "1.0e-1000", 1, 10) === (0.0, :underflow) + @test parse_float_literal(Float64, "1.0e+1000", 1, 10) === (Inf, :overflow) # Slow path (exceeds static buffer size) - @test _parse_float(Float64, "0.000000000000000000000000000000000000000000000000000000000001") === (1e-60, :ok) + @test parse_float_literal(Float64, "0.000000000000000000000000000000000000000000000000000000000001", 1, 63) === (1e-60, :ok) + # hexfloat + @test parse_float_literal(Float64, "0x0ap-0", 1, 8) === (Float64(10), :ok) + @test parse_float_literal(Float64, "0xffp-0", 1, 8) === (Float64(255), :ok) # Float32 - @test _parse_float(Float32, "123", 1, 3) === (123.0f0, :ok) - @test _parse_float(Float32, "1.3f2", 1, 5) === (1.3f2, :ok) + @test parse_float_literal(Float32, "123", 1, 4) === (123.0f0, :ok) + @test parse_float_literal(Float32, "1.3f2", 1, 6) === (1.3f2, :ok) if !Sys.iswindows() - @test _parse_float(Float32, "1.0f-50", 1, 7) === (0.0f0, :underflow) + @test parse_float_literal(Float32, "1.0f-50", 1, 8) === (0.0f0, :underflow) end - @test _parse_float(Float32, "1.0f+50", 1, 7) === (Inf32, :overflow) + @test parse_float_literal(Float32, "1.0f+50", 1, 8) === (Inf32, :overflow) # Assertions - @test_throws ErrorException _parse_float(Float64, "x", 1, 1) - @test_throws ErrorException _parse_float(Float64, "1x", 1, 2) + @test_throws ErrorException parse_float_literal(Float64, "x", 1, 2) + @test_throws ErrorException parse_float_literal(Float64, "1x", 1, 3) + + # Underscore and \minus allowed + @test parse_float_literal(Float64, "10_000.0_0", 1, 9) === (Float64(10000), :ok) + @test parse_float_literal(Float64, "−10.0", 1, 8) === (Float64(-10), :ok) + @test parse_float_literal(Float64, "10e\u22121", 1, 8) === (Float64(1), :ok) end -hexint(s) = julia_string_to_number(s, K"HexInt") -binint(s) = julia_string_to_number(s, K"BinInt") -octint(s) = julia_string_to_number(s, K"OctInt") +hexint(s) = parse_uint_literal(s, K"HexInt") +binint(s) = parse_uint_literal(s, K"BinInt") +octint(s) = parse_uint_literal(s, K"OctInt") -@testset "Number parsing" begin +@testset "Integer parsing" begin # Integers - @testset "Integers" begin - @test julia_string_to_number("-1", K"Integer") isa Int - @test julia_string_to_number("1", K"Integer") isa Int - @test julia_string_to_number("2147483647", K"Integer") isa Int - @test julia_string_to_number("9223372036854775807", K"Integer") isa Int64 - @test julia_string_to_number("9223372036854775808", K"Integer") isa Int128 - @test julia_string_to_number("170141183460469231731687303715884105727", K"Integer") isa Int128 - @test julia_string_to_number("170141183460469231731687303715884105728", K"Integer") isa BigInt - end - - # Floats - @testset "Floats" begin - @test julia_string_to_number("10e-0", K"Float") === Float64(10) - @test julia_string_to_number("10f-0", K"Float") === Float32(10) - @test julia_string_to_number("0x0ap-0", K"Float") === Float64(10) - @test julia_string_to_number("0xffp-0", K"Float") === Float64(255) + @testset "Signed Integers" begin + @test parse_int_literal("-1") isa Int + @test parse_int_literal("1") isa Int + @test parse_int_literal("2147483647") isa Int + @test parse_int_literal("9223372036854775807") isa Int64 + @test parse_int_literal("9223372036854775808") isa Int128 + @test parse_int_literal("170141183460469231731687303715884105727") isa Int128 + @test parse_int_literal("170141183460469231731687303715884105728") isa BigInt end # HexInt @@ -137,39 +139,48 @@ octint(s) = julia_string_to_number(s, K"OctInt") end @testset "Underscore separators" begin - @test julia_string_to_number("10_000", K"Integer") === 10000 - @test julia_string_to_number("10_000.0", K"Float") === Float64(10000) - @test julia_string_to_number("0xff_ff", K"HexInt") === 0xffff - @test julia_string_to_number("0b1111_1111", K"BinInt") === 0xff - @test julia_string_to_number("0o177_777", K"OctInt") === 0xffff + @test parse_int_literal("10_000") === 10000 + @test parse_uint_literal("0xff_ff", K"HexInt") === 0xffff + @test parse_uint_literal("0b1111_1111", K"BinInt") === 0xff + @test parse_uint_literal("0o177_777", K"OctInt") === 0xffff end @testset "\\minus ('\\u2212' / '−') allowed in numbers" begin - @test julia_string_to_number("−10", K"Integer") === -10 - @test julia_string_to_number("−10.0", K"Float") === Float64(-10) - @test julia_string_to_number("10e\u22121", K"Float") === Float64(1) + @test parse_int_literal("−10") === -10 end end -function unesc(str, is_cmd = false, is_raw = false) - str, iserror, _ = unescape_julia_string(str, is_cmd, is_raw) - @test !iserror - return str +function unesc(str, firstind=firstindex(str), endind=lastindex(str)+1; diagnostics=false) + io = IOBuffer() + ds = JuliaSyntax.Diagnostic[] + unescape_julia_string(io, str, firstind, endind, ds) + if diagnostics + ds + else + @test isempty(ds) + String(take!(io)) + end end + @testset "String unescaping" begin + # offsets + @test unesc("abcd", 1, 3) == "ab" + @test unesc("abcd", 2, 4) == "bc" + @test unesc("abcd", 3, 5) == "cd" + # Allowed escapes of delimiters and dollar sign - @test only(unesc("\\\\")) == '\\' - @test only(unesc("\\\"")) == '"' - @test only(unesc("\\\$")) == '$' - @test only(unesc("\\'")) == '\'' - @test only(unesc("\\`")) == '`' + @test unesc("\\\\") == "\\" + @test unesc("\\\"") == "\"" + @test unesc("\\\$") == "\$" + @test unesc("\\'") == "\'" + @test unesc("\\`") == "`" # Newline normalization @test unesc("a\nb\rc\r\nd") == "a\nb\nc\nd" # Invalid escapes - @test unescape_julia_string("\\.", false, false)[2] - @test unescape_julia_string("\\z", false, false)[2] + @test !isempty(unesc("\\.", diagnostics=true)) + @test !isempty(unesc("\\z", diagnostics=true)) # Standard C escape sequences @test codeunits(unesc("\\n\\t\\r\\e\\b\\f\\v\\a")) == @@ -181,44 +192,50 @@ end @test unesc("x\\U001F604x") == "x😄x" # Maximum unicode code point @test unesc("x\\U10ffffx") == "x\U10ffffx" - @test unescape_julia_string("x\\U110000x", false, false)[2] + @test !isempty(unesc("x\\U110000x", diagnostics=true)) # variable-length octal @test unesc("x\\7x") == "x\ax" @test unesc("x\\77x") == "x?x" @test unesc("x\\141x") == "xax" @test unesc("x\\377x") == "x\xffx" - @test unescape_julia_string("x\\400x", false, false)[2] + @test !isempty(unesc("x\\400x", diagnostics=true)) +end + +function unesc_raw(str, is_cmd) + io = IOBuffer() + JuliaSyntax.unescape_raw_string(io, str, is_cmd) + return String(take!(io)) end @testset "Raw string unescaping" begin # " delimited # x\"x ==> x"x - @test unesc("x\\\"x", false, true) == "x\"x" + @test unesc_raw("x\\\"x", false) == "x\"x" # x\`x ==> x\`x - @test unesc("x\\`x", false, true) == "x\\`x" + @test unesc_raw("x\\`x", false) == "x\\`x" # x\\\"x ==> x\"x - @test unesc("x\\\\\\\"x", false, true) == "x\\\"x" + @test unesc_raw("x\\\\\\\"x", false) == "x\\\"x" # x\\\`x ==> x\\\`x - @test unesc("x\\\\\\`x", false, true) == "x\\\\\\`x" + @test unesc_raw("x\\\\\\`x", false) == "x\\\\\\`x" # '\\ ' ==> '\\ ' - @test unesc("\\\\ ", false, true) == "\\\\ " + @test unesc_raw("\\\\ ", false) == "\\\\ " # '\\' ==> '\' - @test unesc("\\\\", false, true) == "\\" + @test unesc_raw("\\\\", false) == "\\" # '\\\\' ==> '\\' - @test unesc("\\\\\\\\", false, true) == "\\\\" + @test unesc_raw("\\\\\\\\", false) == "\\\\" # ` delimited # x\"x ==> x\"x - @test unesc("x\\\"x", true, true) == "x\\\"x" + @test unesc_raw("x\\\"x", true) == "x\\\"x" # x\`x ==> x`x - @test unesc("x\\`x", true, true) == "x`x" + @test unesc_raw("x\\`x", true) == "x`x" # x\\\"x ==> x\"x - @test unesc("x\\\\\\\"x", true, true) == "x\\\\\\\"x" + @test unesc_raw("x\\\\\\\"x", true) == "x\\\\\\\"x" # x\\\`x ==> x\`x - @test unesc("x\\\\\\`x", true, true) == "x\\`x" + @test unesc_raw("x\\\\\\`x", true) == "x\\`x" # '\\ ' ==> '\\ ' - @test unesc("\\\\ ", true, true) == "\\\\ " + @test unesc_raw("\\\\ ", true) == "\\\\ " end @testset "Normalization of identifiers" begin From d9dc5145bfaf475c399039a8475f1bf12e61a93e Mon Sep 17 00:00:00 2001 From: c42f Date: Tue, 22 Nov 2022 12:17:44 +1000 Subject: [PATCH 0537/1109] Parse dotted calls with `dotcall` head (JuliaLang/JuliaSyntax.jl#151) Dotted call syntax parses into various forms which aren't really consistent. Especially, Expr is inconsistent about dotted infix calls vs dotted prefix calls. In this change we adopt a more consistent (and hopefully less mysterious!) parsing where dotted calls get their own `dotcall` head which is otherwise like the `call` head: f.(a, b) ==> (dotcall f a b) a .+ b ==> (dotcall-i a + b) .+ b ==> (dotcall-pre + b) .+(b) ==> (dotcall-pre + b) Also, in comparison chains where a dotted operator appears as an atom we split the dot from an operator, so `.+` becomes `(. +)`: a .< b < c ==> (comparison a (. <) b < c) There's other cases where it would also be consistent to split the dot from the operator, but these are more challenging to convert back to a compatible Expr so I've punted on these for now. For example, we'd like .*(a,b) ==> (call (. *) a b) .+(a,) ==> (call (. +) a) but these are not yet implemented as we need to be able to distinguish them from the likes of `(.+)(a,)` which the reference parser treats differently from `.+(a,)` --- JuliaSyntax/src/expr.jl | 26 +++- JuliaSyntax/src/kinds.jl | 1 + JuliaSyntax/src/parse_stream.jl | 6 +- JuliaSyntax/src/parser.jl | 226 ++++++++++++++++++++------------ JuliaSyntax/test/expr.jl | 12 ++ JuliaSyntax/test/parser.jl | 55 +++++--- 6 files changed, 213 insertions(+), 113 deletions(-) diff --git a/JuliaSyntax/src/expr.jl b/JuliaSyntax/src/expr.jl index f7caadbca78e2..04f1a979d797d 100644 --- a/JuliaSyntax/src/expr.jl +++ b/JuliaSyntax/src/expr.jl @@ -31,7 +31,7 @@ function reorder_parameters!(args, params_pos) end function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, - eq_to_kw=false, inside_dot_expr=false, inside_vect_or_braces=false) + eq_to_kw=false, inside_vect_or_braces=false) if !haschildren(node) val = node.val if val isa Union{Int128,UInt128,BigInt} @@ -125,11 +125,9 @@ function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, args[2] = _to_expr(node_args[2]) else eq_to_kw_in_call = - headsym == :call && is_prefix_call(node) || + ((headsym == :call || headsym == :dotcall) && is_prefix_call(node)) || headsym == :ref - eq_to_kw_all = headsym == :parameters && !inside_vect_or_braces || - (headsym == :tuple && inside_dot_expr) - in_dot = headsym == :. + eq_to_kw_all = headsym == :parameters && !inside_vect_or_braces in_vb = headsym == :vect || headsym == :braces if insert_linenums && isempty(node_args) push!(args, source_location(LineNumberNode, node.source, node.position)) @@ -142,7 +140,6 @@ function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, eq_to_kw = eq_to_kw_in_call && i > 1 || eq_to_kw_all args[insert_linenums ? 2*i : i] = _to_expr(n, eq_to_kw=eq_to_kw, - inside_dot_expr=in_dot, inside_vect_or_braces=in_vb) end end @@ -155,7 +152,7 @@ function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, if args[1] == Symbol("@.") args[1] = Symbol("@__dot__") end - elseif headsym in (:call, :ref) + elseif headsym in (:dotcall, :call, :ref) # Julia's standard `Expr` ASTs have children stored in a canonical # order which is often not always source order. We permute the children # here as necessary to get the canonical order. @@ -169,6 +166,21 @@ function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, end # Move parameters blocks to args[2] reorder_parameters!(args, 2) + if headsym === :dotcall + if is_prefix_call(node) + return Expr(:., args[1], Expr(:tuple, args[2:end]...)) + else + # operator calls + headsym = :call + args[1] = Symbol(".", args[1]) + end + end + elseif headsym === :comparison + for i in 1:length(args) + if Meta.isexpr(args[i], :., 1) + args[i] = Symbol(".",args[i].args[1]) + end + end elseif headsym in (:tuple, :vect, :braces) # Move parameters blocks to args[1] reorder_parameters!(args, 1) diff --git a/JuliaSyntax/src/kinds.jl b/JuliaSyntax/src/kinds.jl index 2ceabb3ee58d9..9fded1d46e319 100644 --- a/JuliaSyntax/src/kinds.jl +++ b/JuliaSyntax/src/kinds.jl @@ -868,6 +868,7 @@ const _kind_names = "BEGIN_SYNTAX_KINDS" "block" "call" + "dotcall" "comparison" "curly" "inert" # QuoteNode; not quasiquote diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index 5fedfbfda5318..e073d90f874a7 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -83,7 +83,7 @@ function untokenize(head::SyntaxHead; unique=true, include_flag_suff=true) if include_flag_suff && suffix_flags != EMPTY_FLAGS str = str*"-" is_trivia(head) && (str = str*"t") - is_infix_op_call(head) && (str = str*"i") + is_infix_op_call(head) && (str = str*"i") is_prefix_op_call(head) && (str = str*"pre") is_postfix_op_call(head) && (str = str*"post") has_flags(head, TRIPLE_STRING_FLAG) && (str = str*"s") @@ -725,9 +725,7 @@ function bump_split(stream::ParseStream, split_spec...) push!(stream.tokens, SyntaxToken(h, kind(tok), b)) end stream.peek_count = 0 - # Returning position(stream) like the other bump* methods would be - # ambiguous here; return nothing instead. - nothing + return position(stream) end function _reset_node_head(x, k, f) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 9e565225979d5..bc3877e7f8bff 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -297,17 +297,6 @@ function is_both_unary_and_binary(t) ) end -# operators handled by parse_unary at the start of an expression -function is_initial_operator(t) - k = kind(t) - # TODO(jb): `?` should probably not be listed here except for the syntax hack in osutils.jl - is_operator(k) && - !is_word_operator(k) && - !(k in KSet": ' .' ?") && - !(is_syntactic_unary_op(k) && !is_dotted(t)) && - !is_syntactic_operator(k) -end - # flisp: invalid-identifier? function is_valid_identifier(k) k = kind(k) @@ -330,6 +319,27 @@ function was_eventually_call(ps::ParseState) end end +function bump_dotsplit(ps, flags=EMPTY_FLAGS; + emit_dot_node::Bool=false, remap_kind::Kind=K"None") + t = peek_token(ps) + if is_dotted(t) + bump_trivia(ps) + mark = position(ps) + k = remap_kind != K"None" ? remap_kind : kind(t) + pos = bump_split(ps, (1, K".", TRIVIA_FLAG), (0, k, flags)) + if emit_dot_node + pos = emit(ps, mark, K".") + end + else + if remap_kind != K"None" + pos = bump(ps, remap_kind=remap_kind) + else + pos = bump(ps) + end + end + return pos +end + #------------------------------------------------------------------------------- # Parser # @@ -351,9 +361,10 @@ function parse_LtoR(ps::ParseState, down, is_op) mark = position(ps) down(ps) while is_op(peek(ps)) - bump(ps) + t = peek_token(ps) + bump_dotsplit(ps) down(ps) - emit(ps, mark, K"call", INFIX_FLAG) + emit(ps, mark, is_dotted(t) ? K"dotcall" : K"call", INFIX_FLAG) end end @@ -364,11 +375,11 @@ end function parse_RtoL(ps::ParseState, down, is_op, self) mark = position(ps) down(ps) - k = peek(ps) - if is_op(k) - bump(ps) + t = peek_token(ps) + if is_op(kind(t)) + bump_dotsplit(ps) self(ps) - emit(ps, mark, K"call", INFIX_FLAG) + emit(ps, mark, is_dotted(t) ? K"dotcall" : K"call", INFIX_FLAG) end end @@ -573,11 +584,12 @@ function parse_assignment_with_initial_ex(ps::ParseState, mark, down::T) where { end # ~ is the only non-syntactic assignment-precedence operator. # a ~ b ==> (call-i a ~ b) + # a .~ b ==> (dotcall-i a ~ b) # [a ~ b c] ==> (hcat (call-i a ~ b) c) # [a~b] ==> (vect (call-i a ~ b)) - bump(ps) + bump_dotsplit(ps) parse_assignment(ps, down) - emit(ps, mark, K"call", INFIX_FLAG) + emit(ps, mark, is_dotted(t) ? K"dotcall" : K"call", INFIX_FLAG) else # a += b ==> (+= a b) # a .= b ==> (.= a b) @@ -614,6 +626,7 @@ end # flisp: parse-pair # a => b ==> (call-i a => b) +# a .=> b ==> (dotcall-i a => b) function parse_pair(ps::ParseState) parse_RtoL(ps, parse_cond, is_prec_pair, parse_pair) end @@ -698,11 +711,11 @@ function parse_arrow(ps::ParseState) else # x → y ==> (call-i x → y) # x <--> y ==> (call-i x <--> y) - # x .--> y ==> (call-i x .--> y) + # x .--> y ==> (dotcall-i x --> y) # x -->₁ y ==> (call-i x -->₁ y) - bump(ps) + bump_dotsplit(ps) parse_arrow(ps) - emit(ps, mark, K"call", INFIX_FLAG) + emit(ps, mark, is_dotted(t) ? K"dotcall" : K"call", INFIX_FLAG) end end end @@ -757,10 +770,12 @@ function parse_comparison(ps::ParseState, subtype_comparison=false) end n_comparisons = 0 op_pos = NO_POSITION + op_dotted = false initial_tok = peek_token(ps) - while is_prec_comparison(peek(ps)) + while (t = peek_token(ps); is_prec_comparison(t)) n_comparisons += 1 - op_pos = bump(ps) + op_dotted = is_dotted(t) + op_pos = bump_dotsplit(ps, emit_dot_node=true) parse_pipe_lt(ps) end if n_comparisons == 1 @@ -773,13 +788,19 @@ function parse_comparison(ps::ParseState, subtype_comparison=false) else # Normal binary comparisons # x < y ==> (call-i x < y) - # x .<: y ==> (call-i x .<: y) - emit(ps, mark, K"call", INFIX_FLAG) + # x .< y ==> (dotcall-i x < y) + if op_dotted + # x .<: y ==> (dotcall-i x <: y) + reset_node!(ps, op_pos, kind=K"TOMBSTONE", flags=TRIVIA_FLAG) + end + emit(ps, mark, is_dotted(initial_tok) ? K"dotcall" : K"call", INFIX_FLAG) end elseif n_comparisons > 1 # Comparison chains # x < y < z ==> (comparison x < y < z) # x == y < z ==> (comparison x == y < z) + # x .< y .< z ==> (comparison x (. <) y (. <) z) + # x .< y < z ==> (comparison x (. <) y < z) emit(ps, mark, K"comparison") end end @@ -791,6 +812,7 @@ function parse_pipe_lt(ps::ParseState) end # x |> y |> z ==> (call-i (call-i x |> y) |> z) +# x .|> y ==> (dotcall-i x |> y) # flisp: parse-pipe> function parse_pipe_gt(ps::ParseState) parse_LtoR(ps, parse_range, is_prec_pipe_gt) @@ -807,15 +829,17 @@ end function parse_range(ps::ParseState) mark = position(ps) parse_expr(ps) - initial_kind = peek(ps) + initial_tok = peek_token(ps) + initial_kind = kind(initial_tok) if initial_kind != K":" && is_prec_colon(initial_kind) # a..b ==> (call-i a .. b) # a … b ==> (call-i a … b) - bump(ps) + # a .… b ==> (dotcall-i a … b) + bump_dotsplit(ps) parse_expr(ps) - emit(ps, mark, K"call", INFIX_FLAG) + emit(ps, mark, is_dotted(initial_tok) ? K"dotcall" : K"call", INFIX_FLAG) elseif initial_kind == K":" && ps.range_colon_enabled - # a ? b : c:d ==> (if a b (call-i c : d)) + # a ? b : c:d ==> (? a b (call-i c : d)) n_colons = 0 while peek(ps) == K":" if ps.space_sensitive && @@ -888,6 +912,7 @@ end # a - b - c ==> (call-i (call-i a - b) - c) # a + b + c ==> (call-i a + b c) +# a .+ b ==> (dotcall-i a + b) # # flisp: parse-expr function parse_expr(ps::ParseState) @@ -920,16 +945,16 @@ function parse_with_chains(ps::ParseState, down, is_op, chain_ops) # [x+y + z] ==> (vect (call-i x + y z)) break end - bump(ps) + bump_dotsplit(ps) down(ps) if kind(t) in chain_ops && !is_decorated(t) # a + b + c ==> (call-i a + b c) - # a + b .+ c ==> (call-i (call-i a + b) .+ c) + # a + b .+ c ==> (dotcall-i (call-i a + b) + c) parse_chain(ps, down, kind(t)) end # a +₁ b +₁ c ==> (call-i (call-i a +₁ b) +₁ c) - # a .+ b .+ c ==> (call-i (call-i a .+ b) .+ c) - emit(ps, mark, K"call", INFIX_FLAG) + # a .+ b .+ c ==> (dotcall-i (dotcall-i a + b) + c) + emit(ps, mark, is_dotted(t) ? K"dotcall" : K"call", INFIX_FLAG) end end @@ -950,11 +975,13 @@ function parse_chain(ps::ParseState, down, op_kind) end # flisp: parse-rational +# x // y // z ==> (call-i (call-i x // y) // z) function parse_rational(ps::ParseState) parse_LtoR(ps, parse_shift, is_prec_rational) end # flisp: parse-shift +# x >> y >> z ==> (call-i (call-i x >> y) >> z) function parse_shift(ps::ParseState) parse_LtoR(ps, parse_unary_subtype, is_prec_bitshift) end @@ -963,8 +990,8 @@ end # # flisp: parse-unary-subtype function parse_unary_subtype(ps::ParseState) - k = peek(ps, skip_newlines=true) - if k in KSet"<: >:" + t = peek_token(ps, skip_newlines=true) + if is_type_operator(t) k2 = peek(ps, 2) if is_closing_token(ps, k2) || k2 in KSet"NewlineWs =" # return operator by itself @@ -978,13 +1005,14 @@ function parse_unary_subtype(ps::ParseState) # <:(x::T) ==> (<:-pre (:: x T)) parse_where(ps, parse_juxtapose) else + # <: x ==> (<:-pre x) # <: A where B ==> (<:-pre (where A B)) mark = position(ps) bump(ps, TRIVIA_FLAG) parse_where(ps, parse_juxtapose) # Flisp parser handled this, but I don't know how it can happen... @check peek_behind(ps).kind != K"tuple" - emit(ps, mark, k, PREFIX_OP_FLAG) + emit(ps, mark, kind(t), PREFIX_OP_FLAG) end else parse_where(ps, parse_juxtapose) @@ -1103,22 +1131,33 @@ function parse_juxtapose(ps::ParseState) end end -# Deal with numeric literal prefixes and unary calls +# Parse numeric literal prefixes, calls to unary operators and prefix +# calls involving arbitrary operators with bracketed arglists (as opposed to +# infix notation) # -# flisp: parse-unary +# flisp: parse-unary, parse-unary-call function parse_unary(ps::ParseState) mark = position(ps) bump_trivia(ps) - t = peek_token(ps) - k = kind(t) - if !is_initial_operator(t) + op_t = peek_token(ps) + op_k = kind(op_t) + if ( + !is_operator(op_k) || + is_word_operator(op_k) || + # TODO(jb): `?` should probably not be listed here + # except for the syntax hack in osutils.jl + (op_k in KSet": ' .' ?") || + (is_syntactic_unary_op(op_k) && !is_dotted(op_t)) || + is_syntactic_operator(op_k) + ) + # `op_t` is not an initial operator # :T ==> (quote T) # in::T ==> (:: in T) # isa::T ==> (:: isa T) parse_factor(ps) return end - if k in KSet"- +" && !is_decorated(t) + if op_k in KSet"- +" && !is_decorated(op_t) t2 = peek_token(ps, 2) if !preceding_whitespace(t2) && kind(t2) in KSet"Integer Float Float32" k3 = peek(ps, 3) @@ -1142,26 +1181,6 @@ function parse_unary(ps::ParseState) return end end - # Things which are not quite negative literals result in a unary call instead - # -0x1 ==> (call-pre - 0x01) - # - 2 ==> (call-pre - 2) - # .-2 ==> (call-pre .- 2) - parse_unary_call(ps) -end - -# Parse calls to unary operators and prefix calls involving arbitrary operators -# with bracketed arglists (as opposed to infix notation) -# -# +a ==> (call-pre + a) -# +(a,b) ==> (call-pre + a b) -# -# flisp: parse-unary-call -function parse_unary_call(ps::ParseState) - mark = position(ps) - op_t = peek_token(ps) - op_k = kind(op_t) - op_node_kind = is_type_operator(op_t) ? op_k : K"call" - op_tok_flags = is_type_operator(op_t) ? TRIVIA_FLAG : EMPTY_FLAGS t2 = peek_token(ps, 2) k2 = kind(t2) if is_closing_token(ps, k2) || k2 in KSet"NewlineWs =" @@ -1172,9 +1191,7 @@ function parse_unary_call(ps::ParseState) # .+ = ==> (. +) # .+) ==> (. +) # .& ==> (. &) - bump_trivia(ps) - bump_split(ps, (1, K".", TRIVIA_FLAG), (0, op_k, EMPTY_FLAGS)) - emit(ps, mark, K".") + bump_dotsplit(ps, emit_dot_node=true) else # Standalone non-dotted operators # +) ==> + @@ -1184,6 +1201,7 @@ function parse_unary_call(ps::ParseState) # Call with type parameters or non-unary prefix call # +{T}(x::T) ==> (call (curly + T) (:: x T)) # *(x) ==> (call * x) + # .*(x) ==> (call .* x) parse_factor(ps) elseif k2 == K"(" # Cases like +(a;b) are ambiguous: are they prefix calls to + with b as @@ -1193,7 +1211,7 @@ function parse_unary_call(ps::ParseState) # # (The flisp parser only considers commas before `;` and thus gets this # last case wrong) - bump(ps, op_tok_flags) + op_pos = bump_dotsplit(ps, emit_dot_node=true) # Setup possible whitespace error between operator and ( ws_mark = position(ps) @@ -1205,15 +1223,15 @@ function parse_unary_call(ps::ParseState) bump(ps, TRIVIA_FLAG) # ( initial_semi = peek(ps) == K";" opts = parse_brackets(ps, K")") do had_commas, had_splat, num_semis, num_subexprs - is_call = had_commas || had_splat || initial_semi - return (needs_parameters=is_call, - is_call=is_call, - is_block=!is_call && num_semis > 0) + is_paren_call = had_commas || had_splat || initial_semi + return (needs_parameters=is_paren_call, + is_paren_call=is_paren_call, + is_block=!is_paren_call && num_semis > 0) end # The precedence between unary + and any following infix ^ depends on # whether the parens are a function call or not - if opts.is_call + if opts.is_paren_call if preceding_whitespace(t2) # Whitespace not allowed before prefix function call bracket # + (a,b) ==> (call + (error) a b) @@ -1230,11 +1248,31 @@ function parse_unary_call(ps::ParseState) # Prefix calls have higher precedence than ^ # +(a,b)^2 ==> (call-i (call + a b) ^ 2) # +(a,b)(x)^2 ==> (call-i (call (call + a b) x) ^ 2) - emit(ps, mark, op_node_kind) + if is_type_operator(op_t) + # <:(a,) ==> (<: a) + emit(ps, mark, op_k) + reset_node!(ps, op_pos, flags=TRIVIA_FLAG) + else + if is_dotted(op_t) + # Ugly hack to undo the split in bump_dotsplit + # .+(a,) ==> (call .+ a) + reset_node!(ps, op_pos, kind=K"TOMBSTONE") + tb1 = ps.stream.tokens[op_pos.token_index-1] + ps.stream.tokens[op_pos.token_index-1] = + SyntaxToken(SyntaxHead(K"TOMBSTONE", EMPTY_FLAGS), + K"TOMBSTONE", tb1.next_byte-1) + tb0 = ps.stream.tokens[op_pos.token_index] + ps.stream.tokens[op_pos.token_index] = + SyntaxToken(SyntaxHead(kind(tb0), flags(tb0)), + tb0.orig_kind, tb0.next_byte) + end + emit(ps, mark, K"call") + end parse_call_chain(ps, mark) parse_factor_with_initial_ex(ps, mark) else # Unary function calls with brackets as grouping, not an arglist + # .+(a) ==> (dotcall-pre (. +) a) if opts.is_block # +(a;b) ==> (call-pre + (block a b)) emit(ps, mark_before_paren, K"block") @@ -1243,26 +1281,43 @@ function parse_unary_call(ps::ParseState) # +(a=1) ==> (call-pre + (= a 1)) # Unary operators have lower precedence than ^ # +(a)^2 ==> (call-pre + (call-i a ^ 2)) + # .+(a)^2 ==> (dotcall-pre + (call-i a ^ 2)) # +(a)(x,y)^2 ==> (call-pre + (call-i (call a x y) ^ 2)) parse_call_chain(ps, mark_before_paren) parse_factor_with_initial_ex(ps, mark_before_paren) - emit(ps, mark, op_node_kind, PREFIX_OP_FLAG) + if is_type_operator(op_t) + # <:(a) ==> (<:-pre a) + emit(ps, mark, op_k, PREFIX_OP_FLAG) + reset_node!(ps, op_pos, flags=TRIVIA_FLAG) + else + if is_dotted(op_t) + emit(ps, mark, K"dotcall", PREFIX_OP_FLAG) + reset_node!(ps, op_pos, kind=K"TOMBSTONE") + else + emit(ps, mark, K"call", PREFIX_OP_FLAG) + end + end end else + @assert !is_type_operator(op_t) # `<:x` handled in parse_unary_subtype if is_unary_op(op_t) # Normal unary calls # +x ==> (call-pre + x) # √x ==> (call-pre √ x) - # ±x ==> (call-pre ± x) - bump(ps, op_tok_flags) + # .~x ==> (dotcall-pre ~ x) + # Things which are not quite negative literals + # -0x1 ==> (call-pre - 0x01) + # - 2 ==> (call-pre - 2) + # .-2 ==> (dotcall-pre - 2) + bump_dotsplit(ps, EMPTY_FLAGS) else # /x ==> (call-pre (error /) x) # +₁ x ==> (call-pre (error +₁) x) - # .<: x ==> (call-pre (error .<:) x) + # .<: x ==> (dotcall-pre (error .<:) x) bump(ps, error="not a unary operator") end parse_unary(ps) - emit(ps, mark, op_node_kind, PREFIX_OP_FLAG) + emit(ps, mark, is_dotted(op_t) ? K"dotcall" : K"call", PREFIX_OP_FLAG) end end @@ -1270,6 +1325,7 @@ end # # x^y ==> (call-i x ^ y) # x^y^z ==> (call-i x ^ (call-i y ^ z)) +# x .^ y ==> (dotcall-i x ^ y) # begin x end::T ==> (:: (block x) T) # # flisp: parse-factor @@ -1282,10 +1338,10 @@ end # flisp: parse-factor-with-initial-ex function parse_factor_with_initial_ex(ps::ParseState, mark) parse_decl_with_initial_ex(ps, mark) - if is_prec_power(peek(ps)) - bump(ps) + if (t = peek_token(ps); is_prec_power(kind(t))) + bump_dotsplit(ps) parse_factor_after(ps) - emit(ps, mark, K"call", INFIX_FLAG) + emit(ps, mark, is_dotted(t) ? K"dotcall" : K"call", INFIX_FLAG) end end @@ -1526,14 +1582,12 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) emit_diagnostic(ps, mark, error="dot call syntax not supported for macros") end - # f.(a,b) ==> (. f (tuple a b)) - # f. (x) ==> (. f (error-t) (tuple x)) + # f.(a,b) ==> (dotcall f a b) + # f. (x) ==> (dotcall f (error-t) x) bump_disallowed_space(ps) - m = position(ps) bump(ps, TRIVIA_FLAG) parse_call_arglist(ps, K")") - emit(ps, m, K"tuple") - emit(ps, mark, K".") + emit(ps, mark, K"dotcall") elseif k == K":" # A.:+ ==> (. A (quote +)) # A.: + ==> (. A (error-t) (quote +)) diff --git a/JuliaSyntax/test/expr.jl b/JuliaSyntax/test/expr.jl index 9202c22b55e1c..0d0d99e517f38 100644 --- a/JuliaSyntax/test/expr.jl +++ b/JuliaSyntax/test/expr.jl @@ -223,4 +223,16 @@ @test parse(Expr, "f(a .= 1)") == Expr(:call, :f, Expr(:.=, :a, 1)) end + + @testset "dotcall" begin + parse(Expr, "f.(x,y)") == Expr(:., :f, Expr(:tuple, :x, :y)) + parse(Expr, "f.(x=1)") == Expr(:., :f, Expr(:tuple, Expr(:kw, :x, 1))) + parse(Expr, "x .+ y") == Expr(:call, Symbol(".+"), :x, :y) + parse(Expr, "(x=1) .+ y") == Expr(:call, Symbol(".+"), Expr(:(=), :x, 1), :y) + parse(Expr, "a .< b .< c") == Expr(:comparison, :a, Symbol(".<"), + :b, Symbol(".<"), :c) + parse(Expr, ".*(x)") == Expr(:call, Symbol(".*"), :x) + parse(Expr, ".+(x)") == Expr(:call, Symbol(".+"), :x) + parse(Expr, ".+x") == Expr(:call, Symbol(".+"), :x) + end end diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 8ae2b6688fbfc..82b3e14499c27 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -51,11 +51,13 @@ tests = [ "x, = xs" => "(= (tuple x) xs)" "[a ~b]" => "(hcat a (call-pre ~ b))" "a ~ b" => "(call-i a ~ b)" + "a .~ b" => "(dotcall-i a ~ b)" "[a ~ b c]" => "(hcat (call-i a ~ b) c)" "[a~b]" => "(vect (call-i a ~ b))" ], JuliaSyntax.parse_pair => [ "a => b" => "(call-i a => b)" + "a .=> b" => "(dotcall-i a => b)" ], JuliaSyntax.parse_cond => [ "a ? b : c" => "(? a b c)" @@ -74,7 +76,7 @@ tests = [ "x → y" => "(call-i x → y)" "x <--> y" => "(call-i x <--> y)" "x --> y" => "(--> x y)" - "x .--> y" => "(call-i x .--> y)" + "x .--> y" => "(dotcall-i x --> y)" "x -->₁ y" => "(call-i x -->₁ y)" ], JuliaSyntax.parse_or => [ @@ -93,15 +95,20 @@ tests = [ "x >: y" => "(>: x y)" # Normal binary comparisons "x < y" => "(call-i x < y)" + "x .< y" => "(dotcall-i x < y)" + "x .<: y" => "(dotcall-i x <: y)" # Comparison chains "x < y < z" => "(comparison x < y < z)" "x == y < z" => "(comparison x == y < z)" + "x .< y .< z" => "(comparison x (. <) y (. <) z)" + "x .< y < z" => "(comparison x (. <) y < z)" ], JuliaSyntax.parse_pipe_lt => [ "x <| y <| z" => "(call-i x <| (call-i y <| z))" ], JuliaSyntax.parse_pipe_gt => [ "x |> y |> z" => "(call-i (call-i x |> y) |> z)" + "x .|> y" => "(dotcall-i x |> y)" ], JuliaSyntax.parse_range => [ "1:2" => "(call-i 1 : 2)" @@ -113,6 +120,7 @@ tests = [ JuliaSyntax.parse_range => [ "a..b" => "(call-i a .. b)" "a … b" => "(call-i a … b)" + "a .… b" => "(dotcall-i a … b)" "[1 :a]" => "(hcat 1 (quote a))" "[1 2:3 :a]" => "(hcat 1 (call-i 2 : 3) (quote a))" "x..." => "(... x)" @@ -122,7 +130,7 @@ tests = [ JuliaSyntax.parse_expr => [ "a - b - c" => "(call-i (call-i a - b) - c)" "a + b + c" => "(call-i a + b c)" - "a + b .+ c" => "(call-i (call-i a + b) .+ c)" + "a + b .+ c" => "(dotcall-i (call-i a + b) + c)" # parse_with_chains: # The following is two elements of a hcat "[x +y]" => "(hcat x (call-pre + y))" @@ -132,13 +140,20 @@ tests = [ "[x+y+z]" => "(vect (call-i x + y z))" "[x+y + z]" => "(vect (call-i x + y z))" # Dotted and normal operators - "a +₁ b +₁ c" => "(call-i (call-i a +₁ b) +₁ c)" - "a .+ b .+ c" => "(call-i (call-i a .+ b) .+ c)" + "a +₁ b +₁ c" => "(call-i (call-i a +₁ b) +₁ c)" + "a .+ b .+ c" => "(dotcall-i (dotcall-i a + b) + c)" ], JuliaSyntax.parse_term => [ "a * b * c" => "(call-i a * b c)" + "a .* b" => "(dotcall-i a * b)" "-2*x" => "(call-i -2 * x)" ], + JuliaSyntax.parse_rational => [ + "x // y // z" => "(call-i (call-i x // y) // z)" + ], + JuliaSyntax.parse_shift => [ + "x >> y >> z" => "(call-i (call-i x >> y) >> z)" + ], JuliaSyntax.parse_juxtapose => [ "2x" => "(call-i 2 * x)" "2x" => "(call-i 2 * x)" @@ -165,11 +180,6 @@ tests = [ "-2" => "-2" "+2.0" => "2.0" "-1.0f0" => "-1.0f0" - "-0x1" => "(call-pre - 0x01)" - "- 2" => "(call-pre - 2)" - ".-2" => "(call-pre .- 2)" - ], - JuliaSyntax.parse_unary_call => [ # Standalone dotted operators are parsed as (|.| op) ".+" => "(. +)" ".+\n" => "(. +)" @@ -181,8 +191,11 @@ tests = [ # Call with type parameters or non-unary prefix call "+{T}(x::T)" => "(call (curly + T) (:: x T))" "*(x)" => "(call * x)" + ".*(x)" => "(call .* x)" # Prefix function calls for operators which are both binary and unary "+(a,b)" => "(call + a b)" + ".+(a,)" => "(call .+ a)" + "(.+)(a)" => "(call (. +) a)" "+(a=1,)" => "(call + (= a 1))" => Expr(:call, :+, Expr(:kw, :a, 1)) "+(a...)" => "(call + (... a))" "+(a;b,c)" => "(call + a (parameters b c))" @@ -192,24 +205,33 @@ tests = [ # Prefix calls have higher precedence than ^ "+(a,b)^2" => "(call-i (call + a b) ^ 2)" "+(a,b)(x)^2" => "(call-i (call (call + a b) x) ^ 2)" + "<:(a,)" => "(<: a)" # Unary function calls with brackets as grouping, not an arglist + ".+(a)" => "(dotcall-pre + a)" "+(a;b)" => "(call-pre + (block a b))" "+(a=1)" => "(call-pre + (= a 1))" => Expr(:call, :+, Expr(:(=), :a, 1)) # Unary operators have lower precedence than ^ "+(a)^2" => "(call-pre + (call-i a ^ 2))" + ".+(a)^2" => "(dotcall-pre + (call-i a ^ 2))" "+(a)(x,y)^2" => "(call-pre + (call-i (call a x y) ^ 2))" - # Normal unary calls (see parse_unary) + "<:(a)" => "(<:-pre a)" + # Normal unary calls "+x" => "(call-pre + x)" "√x" => "(call-pre √ x)" - "±x" => "(call-pre ± x)" + ".~x" => "(dotcall-pre ~ x)" + # Things which are not quite negative literals + "-0x1"=> "(call-pre - 0x01)" + "- 2" => "(call-pre - 2)" + ".-2" => "(dotcall-pre - 2)" # Not a unary operator "/x" => "(call-pre (error /) x)" "+₁ x" => "(call-pre (error +₁) x)" - ".<: x" => "(call-pre (error .<:) x)" + ".<: x" => "(dotcall-pre (error .<:) x)" ], JuliaSyntax.parse_factor => [ "x^y" => "(call-i x ^ y)" "x^y^z" => "(call-i x ^ (call-i y ^ z))" + "x .^ y" => "(dotcall-i x ^ y)" "begin x end::T" => "(:: (block x) T)" # parse_decl_with_initial_ex "a::b" => "(:: a b)" @@ -223,6 +245,7 @@ tests = [ "<: =" => "<:" "<:{T}(x::T)" => "(call (curly <: T) (:: x T))" "<:(x::T)" => "(<:-pre (:: x T))" + "<: x" => "(<:-pre x)" "<: A where B" => "(<:-pre (where A B))" # Really for parse_where "x where \n {T}" => "(where x T)" @@ -318,11 +341,11 @@ tests = [ "A.B.@x" => "(macrocall (. (. A (quote B)) (quote @x)))" "@A.B.x" => "(macrocall (. (. A (quote B)) (quote @x)))" "A.@B.x" => "(macrocall (. (. A (quote B)) (error-t) (quote @x)))" - "f.(a,b)" => "(. f (tuple a b))" - "f.(a=1; b=2)" => "(. f (tuple (= a 1) (parameters (= b 2))))" => + "f.(a,b)" => "(dotcall f a b)" + "f.(a=1; b=2)" => "(dotcall f (= a 1) (parameters (= b 2)))" => Expr(:., :f, Expr(:tuple, Expr(:parameters, Expr(:kw, :b, 2)), Expr(:kw, :a, 1))) - "(a=1).()" => "(. (= a 1) (tuple))" => Expr(:., Expr(:(=), :a, 1), Expr(:tuple)) - "f. (x)" => "(. f (error-t) (tuple x))" + "(a=1).()" => "(dotcall (= a 1))" => Expr(:., Expr(:(=), :a, 1), Expr(:tuple)) + "f. (x)" => "(dotcall f (error-t) x)" # Other dotted syntax "A.:+" => "(. A (quote +))" "A.: +" => "(. A (quote (error-t) +))" From 8f6021f6402f13c994986f2422f9f40787678571 Mon Sep 17 00:00:00 2001 From: c42f Date: Wed, 23 Nov 2022 10:51:00 +1000 Subject: [PATCH 0538/1109] Add ignore_errors keyword to main parser API (JuliaLang/JuliaSyntax.jl#152) This allows extracting the recovered tree (containing error nodes) without using the low-level parse!() API. --- JuliaSyntax/src/parser_api.jl | 6 ++++-- JuliaSyntax/test/parser_api.jl | 12 ++++++++++++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/JuliaSyntax/src/parser_api.jl b/JuliaSyntax/src/parser_api.jl index c693a7bf9c0e0..905f4c2405b54 100644 --- a/JuliaSyntax/src/parser_api.jl +++ b/JuliaSyntax/src/parser_api.jl @@ -72,7 +72,8 @@ function parse!(::Type{TreeType}, io::IO; end function _parse(rule::Symbol, need_eof::Bool, ::Type{T}, text, index=1; version=VERSION, - ignore_trivia=true, filename=nothing, ignore_warnings=false) where {T} + ignore_trivia=true, filename=nothing, ignore_errors=false, + ignore_warnings=ignore_errors) where {T} stream = ParseStream(text, index; version=version) if ignore_trivia && rule != :toplevel bump_trivia(stream, skip_newlines=true) @@ -85,7 +86,8 @@ function _parse(rule::Symbol, need_eof::Bool, ::Type{T}, text, index=1; version= emit_diagnostic(stream, error="unexpected text after parsing $rule") end end - if any_error(stream.diagnostics) || (!ignore_warnings && !isempty(stream.diagnostics)) + if (!ignore_errors && any_error(stream.diagnostics)) || + (!ignore_warnings && !isempty(stream.diagnostics)) throw(ParseError(stream, filename=filename)) end # TODO: Figure out a more satisfying solution to the wrap_toplevel_as_kind diff --git a/JuliaSyntax/test/parser_api.jl b/JuliaSyntax/test/parser_api.jl index 1af46fbabc750..131bb7fd933d0 100644 --- a/JuliaSyntax/test/parser_api.jl +++ b/JuliaSyntax/test/parser_api.jl @@ -80,4 +80,16 @@ @test parseatom(Expr, SubString("x+y"), 1) == (:x, 2) @test parseatom(Expr, SubString("x+y"), 3) == (:y, 4) end + + @testset "error/warning handling" begin + # ignore_warnings + parse_sexpr(s;kws...) = sprint(show, MIME("text/x.sexpression"), parse(SyntaxNode, s; kws...)) + @test_throws JuliaSyntax.ParseError parse_sexpr("try finally catch ex end") + @test parse_sexpr("try finally catch ex end", ignore_warnings=true) == + "(try_finally_catch (block) false false false (block) ex (block))" + # ignore_errors + @test_throws JuliaSyntax.ParseError parse_sexpr("[a; b, c]") + @test_throws JuliaSyntax.ParseError parse_sexpr("[a; b, c]", ignore_warnings=true) + @test parse_sexpr("[a; b, c]", ignore_errors=true) == "(vcat a b (error-t) c)" + end end From 1ab50fe41925019b67802bc2910c2f6924ff1b76 Mon Sep 17 00:00:00 2001 From: c42f Date: Wed, 23 Nov 2022 10:51:39 +1000 Subject: [PATCH 0539/1109] Fixes for tokenization of invalid UTF-8 strings (JuliaLang/JuliaSyntax.jl#155) --- JuliaSyntax/src/tokenize_utils.jl | 3 +++ JuliaSyntax/test/tokenize.jl | 5 ++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/JuliaSyntax/src/tokenize_utils.jl b/JuliaSyntax/src/tokenize_utils.jl index 1b50536200af2..8637577c57f0b 100644 --- a/JuliaSyntax/src/tokenize_utils.jl +++ b/JuliaSyntax/src/tokenize_utils.jl @@ -50,6 +50,7 @@ readchar(io::IO) = eof(io) ? EOF_CHAR : read(io, Char) # `a .(op) b` or `.(op)a` and where `length(string(op)) == 1` @inline function dotop1(c1::Char) c1 == EOF_CHAR && return false + Base.ismalformed(c1) && return false c = UInt32(c1) c == 0x00000021 || c == 0x000000a6 || @@ -174,6 +175,7 @@ end @inline function isopsuffix(c1::Char) c1 == EOF_CHAR && return false + Base.ismalformed(c1) && return false c = UInt32(c1) if (c < 0xa1 || c > 0x10ffff) return false @@ -252,6 +254,7 @@ end function is_operator_start_char(c::Char) c == EOF_CHAR && return false + Base.ismalformed(c) && return false is_operator_start_char(UInt32(c)) end is_operator_start_char(u::UInt32) = u == 0x00000021 || (u == 0x00000024 || (u == 0x00000025 || (u == 0x00000026 || (u == 0x00000027 || (u == 0x0000002a || (u == 0x0000002b || (u == 0x0000002d || (u == 0x0000002e || (u == 0x0000002f || (u == 0x0000003a || (u == 0x0000003c || (u == 0x0000003d || (u == 0x0000003e || (u == 0x0000003f || (u == 0x0000005c || (u == 0x0000005e || (u == 0x00000069 || (u == 0x00000077 || (u == 0x0000007c || (u == 0x0000007e || (u == 0x000000ac || (u == 0x000000b1 || (u == 0x000000d7 || (u == 0x000000f7 || (u == 0x00002026 || (u == 0x0000205d || (u == 0x0000214b || (u == 0x00002190 || (u == 0x00002191 || (u == 0x00002192 || (u == 0x00002193 || (u == 0x00002194 || (u == 0x0000219a || (u == 0x0000219b || (u == 0x000021a0 || (u == 0x000021a3 || (u == 0x000021a6 || (u == 0x000021ae || (u == 0x000021ce || (u == 0x000021cf || (u == 0x000021d2 || (u == 0x000021d4 || (u == 0x000021f4 || (u == 0x000021f5 || (u == 0x000021f6 || (u == 0x000021f7 || (u == 0x000021f8 || (u == 0x000021f9 || (u == 0x000021fa || (u == 0x000021fb || (u == 0x000021fc || (u == 0x000021fd || (u == 0x000021fe || (u == 0x000021ff || (u == 0x00002208 || (u == 0x00002209 || (u == 0x0000220a || (u == 0x0000220b || (u == 0x0000220c || (u == 0x0000220d || (u == 0x00002213 || (u == 0x00002214 || (u == 0x00002217 || (u == 0x00002218 || (u == 0x00002219 || (u == 0x0000221a || (u == 0x0000221b || (u == 0x0000221c || (u == 0x0000221d || (u == 0x00002224 || (u == 0x00002225 || (u == 0x00002226 || (u == 0x00002227 || (u == 0x00002228 || (u == 0x00002229 || (u == 0x0000222a || (u == 0x00002237 || (u == 0x00002238 || (u == 0x0000223a || (u == 0x0000223b || (u == 0x0000223d || (u == 0x0000223e || (u == 0x00002240 || (u == 0x00002241 || (u == 0x00002242 || (u == 0x00002243 || (u == 0x00002244 || (u == 0x00002245 || (u == 0x00002246 || (u == 0x00002247 || (u == 0x00002248 || (u == 0x00002249 || (u == 0x0000224a || (u == 0x0000224b || (u == 0x0000224c || (u == 0x0000224d || (u == 0x0000224e || (u == 0x0000224f || (u == 0x00002250 || (u == 0x00002251 || (u == 0x00002252 || (u == 0x00002253 || (u == 0x00002254 || (u == 0x00002255 || (u == 0x00002256 || (u == 0x00002257 || (u == 0x00002258 || (u == 0x00002259 || (u == 0x0000225a || (u == 0x0000225b || (u == 0x0000225c || (u == 0x0000225d || (u == 0x0000225e || (u == 0x0000225f || (u == 0x00002260 || (u == 0x00002261 || (u == 0x00002262 || (u == 0x00002263 || (u == 0x00002264 || (u == 0x00002265 || (u == 0x00002266 || (u == 0x00002267 || (u == 0x00002268 || (u == 0x00002269 || (u == 0x0000226a || (u == 0x0000226b || (u == 0x0000226c || (u == 0x0000226d || (u == 0x0000226e || (u == 0x0000226f || (u == 0x00002270 || (u == 0x00002271 || (u == 0x00002272 || (u == 0x00002273 || (u == 0x00002274 || (u == 0x00002275 || (u == 0x00002276 || (u == 0x00002277 || (u == 0x00002278 || (u == 0x00002279 || (u == 0x0000227a || (u == 0x0000227b || (u == 0x0000227c || (u == 0x0000227d || (u == 0x0000227e || (u == 0x0000227f || (u == 0x00002280 || (u == 0x00002281 || (u == 0x00002282 || (u == 0x00002283 || (u == 0x00002284 || (u == 0x00002285 || (u == 0x00002286 || (u == 0x00002287 || (u == 0x00002288 || (u == 0x00002289 || (u == 0x0000228a || (u == 0x0000228b || (u == 0x0000228d || (u == 0x0000228e || (u == 0x0000228f || (u == 0x00002290 || (u == 0x00002291 || (u == 0x00002292 || (u == 0x00002293 || (u == 0x00002294 || (u == 0x00002295 || (u == 0x00002296 || (u == 0x00002297 || (u == 0x00002298 || (u == 0x00002299 || (u == 0x0000229a || (u == 0x0000229b || (u == 0x0000229c || (u == 0x0000229e || (u == 0x0000229f || (u == 0x000022a0 || (u == 0x000022a1 || (u == 0x000022a2 || (u == 0x000022a3 || (u == 0x000022a9 || (u == 0x000022ac || (u == 0x000022ae || (u == 0x000022b0 || (u == 0x000022b1 || (u == 0x000022b2 || (u == 0x000022b3 || (u == 0x000022b4 || (u == 0x000022b5 || (u == 0x000022b6 || (u == 0x000022b7 || (u == 0x000022bb || (u == 0x000022bc || (u == 0x000022bd || (u == 0x000022c4 || (u == 0x000022c5 || (u == 0x000022c6 || (u == 0x000022c7 || (u == 0x000022c9 || (u == 0x000022ca || (u == 0x000022cb || (u == 0x000022cc || (u == 0x000022cd || (u == 0x000022ce || (u == 0x000022cf || (u == 0x000022d0 || (u == 0x000022d1 || (u == 0x000022d2 || (u == 0x000022d3 || (u == 0x000022d5 || (u == 0x000022d6 || (u == 0x000022d7 || (u == 0x000022d8 || (u == 0x000022d9 || (u == 0x000022da || (u == 0x000022db || (u == 0x000022dc || (u == 0x000022dd || (u == 0x000022de || (u == 0x000022df || (u == 0x000022e0 || (u == 0x000022e1 || (u == 0x000022e2 || (u == 0x000022e3 || (u == 0x000022e4 || (u == 0x000022e5 || (u == 0x000022e6 || (u == 0x000022e7 || (u == 0x000022e8 || (u == 0x000022e9 || (u == 0x000022ea || (u == 0x000022eb || (u == 0x000022ec || (u == 0x000022ed || (u == 0x000022ee || (u == 0x000022ef || (u == 0x000022f0 || (u == 0x000022f1 || (u == 0x000022f2 || (u == 0x000022f3 || (u == 0x000022f4 || (u == 0x000022f5 || (u == 0x000022f6 || (u == 0x000022f7 || (u == 0x000022f8 || (u == 0x000022f9 || (u == 0x000022fa || (u == 0x000022fb || (u == 0x000022fc || (u == 0x000022fd || (u == 0x000022fe || (u == 0x000022ff || (u == 0x000025b7 || (u == 0x000027c2 || (u == 0x000027c8 || (u == 0x000027c9 || (u == 0x000027d1 || (u == 0x000027d2 || (u == 0x000027d5 || (u == 0x000027d6 || (u == 0x000027d7 || (u == 0x000027f0 || (u == 0x000027f1 || (u == 0x000027f5 || (u == 0x000027f6 || (u == 0x000027f7 || (u == 0x000027f9 || (u == 0x000027fa || (u == 0x000027fb || (u == 0x000027fc || (u == 0x000027fd || (u == 0x000027fe || (u == 0x000027ff || (u == 0x00002900 || (u == 0x00002901 || (u == 0x00002902 || (u == 0x00002903 || (u == 0x00002904 || (u == 0x00002905 || (u == 0x00002906 || (u == 0x00002907 || (u == 0x00002908 || (u == 0x00002909 || (u == 0x0000290a || (u == 0x0000290b || (u == 0x0000290c || (u == 0x0000290d || (u == 0x0000290e || (u == 0x0000290f || (u == 0x00002910 || (u == 0x00002911 || (u == 0x00002912 || (u == 0x00002913 || (u == 0x00002914 || (u == 0x00002915 || (u == 0x00002916 || (u == 0x00002917 || (u == 0x00002918 || (u == 0x0000291d || (u == 0x0000291e || (u == 0x0000291f || (u == 0x00002920 || (u == 0x00002944 || (u == 0x00002945 || (u == 0x00002946 || (u == 0x00002947 || (u == 0x00002948 || (u == 0x00002949 || (u == 0x0000294a || (u == 0x0000294b || (u == 0x0000294c || (u == 0x0000294d || (u == 0x0000294e || (u == 0x0000294f || (u == 0x00002950 || (u == 0x00002951 || (u == 0x00002952 || (u == 0x00002953 || (u == 0x00002954 || (u == 0x00002955 || (u == 0x00002956 || (u == 0x00002957 || (u == 0x00002958 || (u == 0x00002959 || (u == 0x0000295a || (u == 0x0000295b || (u == 0x0000295c || (u == 0x0000295d || (u == 0x0000295e || (u == 0x0000295f || (u == 0x00002960 || (u == 0x00002961 || (u == 0x00002962 || (u == 0x00002963 || (u == 0x00002964 || (u == 0x00002965 || (u == 0x00002966 || (u == 0x00002967 || (u == 0x00002968 || (u == 0x00002969 || (u == 0x0000296a || (u == 0x0000296b || (u == 0x0000296c || (u == 0x0000296d || (u == 0x0000296e || (u == 0x0000296f || (u == 0x00002970 || (u == 0x000029b7 || (u == 0x000029b8 || (u == 0x000029bc || (u == 0x000029be || (u == 0x000029bf || (u == 0x000029c0 || (u == 0x000029c1 || (u == 0x000029e1 || (u == 0x000029e3 || (u == 0x000029e4 || (u == 0x000029e5 || (u == 0x000029f4 || (u == 0x000029f6 || (u == 0x000029f7 || (u == 0x000029fa || (u == 0x000029fb || (u == 0x00002a07 || (u == 0x00002a08 || (u == 0x00002a1d || (u == 0x00002a22 || (u == 0x00002a23 || (u == 0x00002a24 || (u == 0x00002a25 || (u == 0x00002a26 || (u == 0x00002a27 || (u == 0x00002a28 || (u == 0x00002a29 || (u == 0x00002a2a || (u == 0x00002a2b || (u == 0x00002a2c || (u == 0x00002a2d || (u == 0x00002a2e || (u == 0x00002a30 || (u == 0x00002a31 || (u == 0x00002a32 || (u == 0x00002a33 || (u == 0x00002a34 || (u == 0x00002a35 || (u == 0x00002a36 || (u == 0x00002a37 || (u == 0x00002a38 || (u == 0x00002a39 || (u == 0x00002a3a || (u == 0x00002a3b || (u == 0x00002a3c || (u == 0x00002a3d || (u == 0x00002a40 || (u == 0x00002a41 || (u == 0x00002a42 || (u == 0x00002a43 || (u == 0x00002a44 || (u == 0x00002a45 || (u == 0x00002a4a || (u == 0x00002a4b || (u == 0x00002a4c || (u == 0x00002a4d || (u == 0x00002a4e || (u == 0x00002a4f || (u == 0x00002a50 || (u == 0x00002a51 || (u == 0x00002a52 || (u == 0x00002a53 || (u == 0x00002a54 || (u == 0x00002a55 || (u == 0x00002a56 || (u == 0x00002a57 || (u == 0x00002a58 || (u == 0x00002a5a || (u == 0x00002a5b || (u == 0x00002a5c || (u == 0x00002a5d || (u == 0x00002a5e || (u == 0x00002a5f || (u == 0x00002a60 || (u == 0x00002a61 || (u == 0x00002a62 || (u == 0x00002a63 || (u == 0x00002a66 || (u == 0x00002a67 || (u == 0x00002a6a || (u == 0x00002a6b || (u == 0x00002a6c || (u == 0x00002a6d || (u == 0x00002a6e || (u == 0x00002a6f || (u == 0x00002a70 || (u == 0x00002a71 || (u == 0x00002a72 || (u == 0x00002a73 || (u == 0x00002a74 || (u == 0x00002a75 || (u == 0x00002a76 || (u == 0x00002a77 || (u == 0x00002a78 || (u == 0x00002a79 || (u == 0x00002a7a || (u == 0x00002a7b || (u == 0x00002a7c || (u == 0x00002a7d || (u == 0x00002a7e || (u == 0x00002a7f || (u == 0x00002a80 || (u == 0x00002a81 || (u == 0x00002a82 || (u == 0x00002a83 || (u == 0x00002a84 || (u == 0x00002a85 || (u == 0x00002a86 || (u == 0x00002a87 || (u == 0x00002a88 || (u == 0x00002a89 || (u == 0x00002a8a || (u == 0x00002a8b || (u == 0x00002a8c || (u == 0x00002a8d || (u == 0x00002a8e || (u == 0x00002a8f || (u == 0x00002a90 || (u == 0x00002a91 || (u == 0x00002a92 || (u == 0x00002a93 || (u == 0x00002a94 || (u == 0x00002a95 || (u == 0x00002a96 || (u == 0x00002a97 || (u == 0x00002a98 || (u == 0x00002a99 || (u == 0x00002a9a || (u == 0x00002a9b || (u == 0x00002a9c || (u == 0x00002a9d || (u == 0x00002a9e || (u == 0x00002a9f || (u == 0x00002aa0 || (u == 0x00002aa1 || (u == 0x00002aa2 || (u == 0x00002aa3 || (u == 0x00002aa4 || (u == 0x00002aa5 || (u == 0x00002aa6 || (u == 0x00002aa7 || (u == 0x00002aa8 || (u == 0x00002aa9 || (u == 0x00002aaa || (u == 0x00002aab || (u == 0x00002aac || (u == 0x00002aad || (u == 0x00002aae || (u == 0x00002aaf || (u == 0x00002ab0 || (u == 0x00002ab1 || (u == 0x00002ab2 || (u == 0x00002ab3 || (u == 0x00002ab4 || (u == 0x00002ab5 || (u == 0x00002ab6 || (u == 0x00002ab7 || (u == 0x00002ab8 || (u == 0x00002ab9 || (u == 0x00002aba || (u == 0x00002abb || (u == 0x00002abc || (u == 0x00002abd || (u == 0x00002abe || (u == 0x00002abf || (u == 0x00002ac0 || (u == 0x00002ac1 || (u == 0x00002ac2 || (u == 0x00002ac3 || (u == 0x00002ac4 || (u == 0x00002ac5 || (u == 0x00002ac6 || (u == 0x00002ac7 || (u == 0x00002ac8 || (u == 0x00002ac9 || (u == 0x00002aca || (u == 0x00002acb || (u == 0x00002acc || (u == 0x00002acd || (u == 0x00002ace || (u == 0x00002acf || (u == 0x00002ad0 || (u == 0x00002ad1 || (u == 0x00002ad2 || (u == 0x00002ad3 || (u == 0x00002ad4 || (u == 0x00002ad5 || (u == 0x00002ad6 || (u == 0x00002ad7 || (u == 0x00002ad8 || (u == 0x00002ad9 || (u == 0x00002adb || (u == 0x00002af7 || (u == 0x00002af8 || (u == 0x00002af9 || (u == 0x00002afa || (u == 0x00002b30 || (u == 0x00002b31 || (u == 0x00002b32 || (u == 0x00002b33 || (u == 0x00002b34 || (u == 0x00002b35 || (u == 0x00002b36 || (u == 0x00002b37 || (u == 0x00002b38 || (u == 0x00002b39 || (u == 0x00002b3a || (u == 0x00002b3b || (u == 0x00002b3c || (u == 0x00002b3d || (u == 0x00002b3e || (u == 0x00002b3f || (u == 0x00002b40 || (u == 0x00002b41 || (u == 0x00002b42 || (u == 0x00002b43 || (u == 0x00002b44 || (u == 0x00002b47 || (u == 0x00002b48 || (u == 0x00002b49 || (u == 0x00002b4a || (u == 0x00002b4b || (u == 0x00002b4c || (u == 0x0000ffe9 || (u == 0x0000ffea || (u == 0x0000ffeb || u == 0x0000ffec))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))) diff --git a/JuliaSyntax/test/tokenize.jl b/JuliaSyntax/test/tokenize.jl index 4ee7859ad7d6b..9df33cbec066b 100644 --- a/JuliaSyntax/test/tokenize.jl +++ b/JuliaSyntax/test/tokenize.jl @@ -937,11 +937,14 @@ end @test strtok("a .&&₁ b") == ["a", " ", ".&&", "₁", " ", "b", ""] end -@testset "is_identifier[_start]_char" begin +@testset "malformed strings" begin malformed = first("\xe2") @test Tokenize.is_identifier_char(malformed) == false @test Tokenize.is_identifier_start_char(malformed) == false @test Tokenize.is_never_id_char(malformed) == true + @test Tokenize.dotop1(malformed) == false + @test Tokenize.isopsuffix(malformed) == false + @test Tokenize.is_operator_start_char(malformed) == false end end From 3259d9ff778b4f561468d591b47bcdf1bc566865 Mon Sep 17 00:00:00 2001 From: c42f Date: Wed, 23 Nov 2022 10:52:25 +1000 Subject: [PATCH 0540/1109] Only warn when float literals underflow to zero (JuliaLang/JuliaSyntax.jl#154) jl_strtod_c can return "underflow" even for valid cases such as `5e-324` where the source string is parsed to an exact Float64 representation. So we can't rely on jl_strtod_c to detect "invalid" underflow. Rather, only warn when underflowing to zero which is probably a programming mistake. --- JuliaSyntax/src/parse_stream.jl | 14 ++++++++++---- JuliaSyntax/test/diagnostics.jl | 8 ++++++-- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index e073d90f874a7..01ae8ee9369bc 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -865,10 +865,16 @@ function validate_literal_tokens(stream::ParseStream) # parse_int_literal # parse_uint_literal elseif k == K"Float" || k == K"Float32" + underflow0 = false if k == K"Float" - _, code = parse_float_literal(Float64, text, fbyte, nbyte) + x, code = parse_float_literal(Float64, text, fbyte, nbyte) + # jl_strtod_c can return "underflow" even for valid cases such + # as `5e-324` where the source is an exact representation of + # `x`. So only warn when underflowing to zero. + underflow0 = code == :underflow && x == 0 else - _, code = parse_float_literal(Float32, text, fbyte, nbyte) + x, code = parse_float_literal(Float32, text, fbyte, nbyte) + underflow0 = code == :underflow && x == 0 end if code == :ok # pass @@ -876,9 +882,9 @@ function validate_literal_tokens(stream::ParseStream) emit_diagnostic(stream, fbyte, lbyte, error="overflow in floating point literal") had_error = true - elseif code == :underflow + elseif underflow0 emit_diagnostic(stream, fbyte, lbyte, - warning="underflow in floating point literal") + warning="underflow to zero in floating point literal") end elseif k == K"Char" @assert fbyte < nbyte # Already handled in the parser diff --git a/JuliaSyntax/test/diagnostics.jl b/JuliaSyntax/test/diagnostics.jl index 9e3c800091e0f..3324b2b078ce2 100644 --- a/JuliaSyntax/test/diagnostics.jl +++ b/JuliaSyntax/test/diagnostics.jl @@ -16,9 +16,13 @@ end @test diagnostic("x = 10.0f1000;") == Diagnostic(5, 13, :error, "overflow in floating point literal") @test diagnostic("x = 10.0e-1000;") == - Diagnostic(5, 14, :warning, "underflow in floating point literal") + Diagnostic(5, 14, :warning, "underflow to zero in floating point literal") @test diagnostic("x = 10.0f-1000;") == - Diagnostic(5, 14, :warning, "underflow in floating point literal") + Diagnostic(5, 14, :warning, "underflow to zero in floating point literal") + # Underflow boundary + @test diagnostic("5e-324", allow_multiple=true) == [] + @test diagnostic("2e-324") == + Diagnostic(1, 6, :warning, "underflow to zero in floating point literal") # Char @test diagnostic("x = ''") == From f871a527b0473a5cec863bdbc107f9728e1548b9 Mon Sep 17 00:00:00 2001 From: c42f Date: Wed, 23 Nov 2022 17:11:31 +1000 Subject: [PATCH 0541/1109] Fix source line reporting at EOF (JuliaLang/JuliaSyntax.jl#156) --- JuliaSyntax/src/source_files.jl | 9 +++++---- JuliaSyntax/test/runtests.jl | 6 ++---- JuliaSyntax/test/source_files.jl | 12 ++++++++++++ JuliaSyntax/test/test_utils.jl | 1 + 4 files changed, 20 insertions(+), 8 deletions(-) create mode 100644 JuliaSyntax/test/source_files.jl diff --git a/JuliaSyntax/src/source_files.jl b/JuliaSyntax/src/source_files.jl index fa95794afb2e7..bff823f9347fa 100644 --- a/JuliaSyntax/src/source_files.jl +++ b/JuliaSyntax/src/source_files.jl @@ -23,7 +23,7 @@ function SourceFile(code::AbstractString; filename=nothing) code[i] == '\n' && push!(line_starts, i+1) end if isempty(code) || last(code) != '\n' - push!(line_starts, lastindex(code)+1) + push!(line_starts, ncodeunits(code)+1) end SourceFile(code, filename, line_starts) end @@ -34,14 +34,15 @@ end # Get line number of the given byte within the code function source_line(source::SourceFile, byte_index) - searchsortedlast(source.line_starts, byte_index) + line = searchsortedlast(source.line_starts, byte_index) + return (line < lastindex(source.line_starts)) ? line : line-1 end """ Get line number and character within the line at the given byte index. """ function source_location(source::SourceFile, byte_index) - line = searchsortedlast(source.line_starts, byte_index) + line = source_line(source, byte_index) i = source.line_starts[line] column = 1 while i < byte_index @@ -57,7 +58,7 @@ Get byte range of the source line at byte_index, buffered by """ function source_line_range(source::SourceFile, byte_index; context_lines_before=0, context_lines_after=0) - line = searchsortedlast(source.line_starts, byte_index) + line = source_line(source, byte_index) fbyte = source.line_starts[max(line-context_lines_before, 1)] lbyte = source.line_starts[min(line+1+context_lines_after, end)] - 1 fbyte,lbyte diff --git a/JuliaSyntax/test/runtests.jl b/JuliaSyntax/test/runtests.jl index b08da53303274..fad4278d425de 100644 --- a/JuliaSyntax/test/runtests.jl +++ b/JuliaSyntax/test/runtests.jl @@ -17,12 +17,10 @@ include("parser.jl") include("diagnostics.jl") include("parser_api.jl") include("expr.jl") -@testset "Parsing values from strings" begin +@testset "Parsing literals from strings" begin include("value_parsing.jl") end include("hooks.jl") include("parse_packages.jl") +include("source_files.jl") -# Prototypes -#include("syntax_interpolation.jl") -#include("simple_parser.jl") diff --git a/JuliaSyntax/test/source_files.jl b/JuliaSyntax/test/source_files.jl new file mode 100644 index 0000000000000..9907192612e12 --- /dev/null +++ b/JuliaSyntax/test/source_files.jl @@ -0,0 +1,12 @@ +@testset "SourceFile lines and column indexing" begin + @test source_location(SourceFile("a"), 1) == (1,1) + @test source_location(SourceFile("a"), 2) == (1,2) + + @test source_location(SourceFile("a\n"), 2) == (1,2) + @test source_location(SourceFile("a\n"), 3) == (1,3) + + @test source_location(SourceFile("a\nb\n"), 2) == (1,2) + @test source_location(SourceFile("a\nb\n"), 3) == (2,1) + @test source_location(SourceFile("a\nb\n"), 4) == (2,2) + @test source_location(SourceFile("a\nb\n"), 5) == (2,3) +end diff --git a/JuliaSyntax/test/test_utils.jl b/JuliaSyntax/test/test_utils.jl index 1cd7728419945..03bda337aa327 100644 --- a/JuliaSyntax/test/test_utils.jl +++ b/JuliaSyntax/test/test_utils.jl @@ -7,6 +7,7 @@ using .JuliaSyntax: ParseState, Diagnostic, SourceFile, + source_location, parse!, parse, parseall, From 0f5cc69e4b95f55878779f4112e97da949c8a866 Mon Sep 17 00:00:00 2001 From: c42f Date: Mon, 28 Nov 2022 16:19:58 +1000 Subject: [PATCH 0542/1109] Fix parsing of `.&(x,y)` (JuliaLang/JuliaSyntax.jl#157) --- JuliaSyntax/src/parser.jl | 6 ++++-- JuliaSyntax/test/parser.jl | 1 + 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index bc3877e7f8bff..ebf3b30ed7266 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -1397,8 +1397,9 @@ end # flisp: parse-unary-prefix function parse_unary_prefix(ps::ParseState) mark = position(ps) - k = peek(ps) - if is_syntactic_unary_op(k) + t = peek_token(ps) + k = kind(t) + if is_syntactic_unary_op(k) && !is_dotted(t) k2 = peek(ps, 2) if k in KSet"& $" && (is_closing_token(ps, k2) || k2 == K"NewlineWs") # &) ==> & @@ -1418,6 +1419,7 @@ function parse_unary_prefix(ps::ParseState) emit(ps, mark, k) end else + # .&(x,y) ==> (call .& x y) parse_atom(ps) end end diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 82b3e14499c27..4956abfae2d95 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -268,6 +268,7 @@ tests = [ # parse_call "f(x)" => "(call f x)" "\$f(x)" => "(call (\$ f) x)" + ".&(x,y)" => "(call .& x y)" # parse_call_chain "f(a).g(b)" => "(call (. (call f a) (quote g)) b)" "\$A.@x" => "(macrocall (. (\$ A) (quote @x)))" From 1cbb97e78c3caaa4be3fde525ef0d417e7f6f90c Mon Sep 17 00:00:00 2001 From: c42f Date: Mon, 28 Nov 2022 16:23:43 +1000 Subject: [PATCH 0543/1109] =?UTF-8?q?Fix=20operator=20category/precedence?= =?UTF-8?q?=20for=20=E2=89=94=20=E2=A9=B4=20=E2=89=95=20=E2=89=82=20(Julia?= =?UTF-8?q?Lang/JuliaSyntax.jl#159)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Several unicode operators were in the wrong precedence category; fix this: * Assignment, not comparison: `≔` `⩴` `≕` * Comparison, not plus: `≂` Also some code movement to reflect precedence and the ordering of unicode codepoints as in the reference parser. --- JuliaSyntax/src/kinds.jl | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/JuliaSyntax/src/kinds.jl b/JuliaSyntax/src/kinds.jl index 9fded1d46e319..24fdd97a03d21 100644 --- a/JuliaSyntax/src/kinds.jl +++ b/JuliaSyntax/src/kinds.jl @@ -100,7 +100,7 @@ const _kind_names = "BEGIN_ASSIGNMENTS" "=" "+=" - "-=" + "-=" # Also used for "−=" "*=" "/=" "//=" @@ -117,6 +117,9 @@ const _kind_names = "~" "\$=" "⊻=" + "≔" + "⩴" + "≕" "END_ASSIGNMENTS" "BEGIN_PAIRARROW" @@ -253,7 +256,6 @@ const _kind_names = "⭌" "←" "→" - "↻" "⇜" "⇝" "↜" @@ -279,6 +281,7 @@ const _kind_names = "↷" "↶" "↺" + "↻" "END_ARROW" # Level 4 @@ -331,6 +334,7 @@ const _kind_names = "∾" "≁" "≃" + "≂" "≄" "≅" "≆" @@ -346,8 +350,6 @@ const _kind_names = "≑" "≒" "≓" - "≔" - "≕" "≖" "≗" "≘" @@ -472,7 +474,6 @@ const _kind_names = "⩱" "⩲" "⩳" - "⩴" "⩵" "⩶" "⩷" @@ -580,10 +581,10 @@ const _kind_names = "⫺" "⊢" "⊣" + "⟂" # ⫪,⫫ see https://github.com/JuliaLang/julia/issues/39350 "⫪" "⫫" - "⟂" "END_COMPARISON" # Level 7 @@ -608,7 +609,7 @@ const _kind_names = "BEGIN_PLUS" "\$" "+" - "-" + "-" # also used for "−" "++" "⊕" "⊖" @@ -622,7 +623,6 @@ const _kind_names = "∓" "∔" "∸" - "≂" "≏" "⊎" "⊻" @@ -667,19 +667,12 @@ const _kind_names = "END_PLUS" # Level 10 - "BEGIN_BITSHIFTS" - "<<" - ">>" - ">>>" - "END_BITSHIFTS" - - # Level 11 "BEGIN_TIMES" "*" "/" "÷" "%" - "⋅" + "⋅" # also used for lookalikes "·" and "·" "∘" "×" "\\" @@ -755,11 +748,18 @@ const _kind_names = "⨟" "END_TIMES" - # Level 12 + # Level 11 "BEGIN_RATIONAL" "//" "END_RATIONAL" + # Level 12 + "BEGIN_BITSHIFTS" + "<<" + ">>" + ">>>" + "END_BITSHIFTS" + # Level 13 "BEGIN_POWER" "^" From bfce5f7253a450e865e1feac2e1da443378d8cea Mon Sep 17 00:00:00 2001 From: c42f Date: Mon, 28 Nov 2022 16:26:21 +1000 Subject: [PATCH 0544/1109] Make try without catch/finally an error (JuliaLang/JuliaSyntax.jl#161) --- JuliaSyntax/src/parser.jl | 6 +++++- JuliaSyntax/test/parser.jl | 3 ++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index ebf3b30ed7266..89713bc11682e 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -2215,7 +2215,7 @@ function parse_try(ps) bump(ps, TRIVIA_FLAG) parse_block(ps) if !has_catch - #v1.8: try else end ==> (try (block) false false (error (block)) false) + #v1.8: try else x finally y end ==> (try (block) false false (error (block x)) (block y)) emit(ps, else_mark, K"error", error="Expected `catch` before `else`") end #v1.7: try catch ; else end ==> (try (block) false (block) (error (block)) false) @@ -2243,6 +2243,10 @@ function parse_try(ps) emit_diagnostic(ps, m, position(ps), warning="`catch` after `finally` will execute out of order") end + if !has_catch && !has_finally + # try x end ==> (try (block x) false false false false (error-t)) + bump_invisible(ps, K"error", TRIVIA_FLAG, error="try without catch or finally") + end bump_closing_token(ps, K"end") emit(ps, mark, out_kind, flags) end diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 4956abfae2d95..f8ea3ea083a17 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -559,11 +559,12 @@ tests = [ "try x finally y end" => "(try (block x) false false false (block y))" # v1.8 only ((v=v"1.8",), "try catch ; else end") => "(try (block) false (block) (block) false)" - ((v=v"1.8",), "try else end") => "(try (block) false false (error (block)) false)" + ((v=v"1.8",), "try else x finally y end") => "(try (block) false false (error (block x)) (block y))" ((v=v"1.7",), "try catch ; else end") => "(try (block) false (block) (error (block)) false)" # finally before catch :-( "try x finally y catch e z end" => "(try_finally_catch (block x) false false false (block y) e (block z))" => Expr(:try, Expr(:block, :x), :e, Expr(:block, :z), Expr(:block, :y)) + "try x end" => "(try (block x) false false false false (error-t))" ], JuliaSyntax.parse_imports => [ "import A as B: x" => "(import (: (error (as (. A) B)) (. x)))" From d05d8c4191c335845068ee109d779b9b49c34c46 Mon Sep 17 00:00:00 2001 From: c42f Date: Mon, 28 Nov 2022 16:26:30 +1000 Subject: [PATCH 0545/1109] Allow + prefix to be part of unsigned numeric literals (JuliaLang/JuliaSyntax.jl#160) So +0xff is a numeric literal, not `(call-pre + 0xff)` --- JuliaSyntax/src/parser.jl | 10 ++++++---- JuliaSyntax/src/value_parsing.jl | 6 ++++-- JuliaSyntax/test/parser.jl | 8 ++++++++ 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 89713bc11682e..57bcf2f733032 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -1157,9 +1157,12 @@ function parse_unary(ps::ParseState) parse_factor(ps) return end + t2 = peek_token(ps, 2) + k2 = kind(t2) if op_k in KSet"- +" && !is_decorated(op_t) - t2 = peek_token(ps, 2) - if !preceding_whitespace(t2) && kind(t2) in KSet"Integer Float Float32" + if !preceding_whitespace(t2) && (k2 in KSet"Integer Float Float32" || + (op_k == K"+" && k2 in KSet"BinInt HexInt OctInt")) + k3 = peek(ps, 3) if is_prec_power(k3) || k3 in KSet"[ {" # `[`, `{` (issue #18851) and `^` have higher precedence than @@ -1176,13 +1179,12 @@ function parse_unary(ps::ParseState) # +2.0 ==> 2.0 # -1.0f0 ==> -1.0f0 # -2*x ==> (call-i -2 * x) + # +0xff ==> 0xff bump_glue(ps, kind(t2), EMPTY_FLAGS, 2) end return end end - t2 = peek_token(ps, 2) - k2 = kind(t2) if is_closing_token(ps, k2) || k2 in KSet"NewlineWs =" if is_dotted(op_t) # Standalone dotted operators are parsed as (|.| op) diff --git a/JuliaSyntax/src/value_parsing.jl b/JuliaSyntax/src/value_parsing.jl index 042cca0a01037..f83bc8912eb78 100644 --- a/JuliaSyntax/src/value_parsing.jl +++ b/JuliaSyntax/src/value_parsing.jl @@ -20,7 +20,10 @@ function parse_int_literal(str::AbstractString) end function parse_uint_literal(str::AbstractString, k) - str = replace(replace(str, '_'=>""), '−'=>'-') + str = replace(str, '_'=>"") + if startswith(str, '+') + str = str[2:end] + end ndigits = length(str)-2 if k == K"HexInt" return ndigits <= 2 ? Base.parse(UInt8, str) : @@ -30,7 +33,6 @@ function parse_uint_literal(str::AbstractString, k) ndigits <= 32 ? Base.parse(UInt128, str) : Base.parse(BigInt, str) elseif k == K"BinInt" - str = replace(replace(str, '_'=>""), '−'=>'-') ndigits = length(str)-2 return ndigits <= 8 ? Base.parse(UInt8, str) : ndigits <= 16 ? Base.parse(UInt16, str) : diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index f8ea3ea083a17..74bd08dc21561 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -177,9 +177,17 @@ tests = [ "isa::T" => "(:: isa T)" "-2^x" => "(call-pre - (call-i 2 ^ x))" "-2[1, 3]" => "(call-pre - (ref 2 1 3))" + # signed literals "-2" => "-2" "+2.0" => "2.0" "-1.0f0" => "-1.0f0" + "-0xf.0p0" => "-15.0" + "+0b10010" => "0x12" + "+0o22" => "0x12" + "+0x12" => "0x12" + "-0b10010" => "(call-pre - 0x12)" + "-0o22" => "(call-pre - 0x12)" + "-0x12" => "(call-pre - 0x12)" # Standalone dotted operators are parsed as (|.| op) ".+" => "(. +)" ".+\n" => "(. +)" From a17767acdcd476bfe558e0689d074c5d669ffc3f Mon Sep 17 00:00:00 2001 From: c42f Date: Mon, 28 Nov 2022 16:30:02 +1000 Subject: [PATCH 0546/1109] Fix dotcall tests (JuliaLang/JuliaSyntax.jl#162) --- JuliaSyntax/test/expr.jl | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/JuliaSyntax/test/expr.jl b/JuliaSyntax/test/expr.jl index 0d0d99e517f38..853b4e3b7627f 100644 --- a/JuliaSyntax/test/expr.jl +++ b/JuliaSyntax/test/expr.jl @@ -225,14 +225,14 @@ end @testset "dotcall" begin - parse(Expr, "f.(x,y)") == Expr(:., :f, Expr(:tuple, :x, :y)) - parse(Expr, "f.(x=1)") == Expr(:., :f, Expr(:tuple, Expr(:kw, :x, 1))) - parse(Expr, "x .+ y") == Expr(:call, Symbol(".+"), :x, :y) - parse(Expr, "(x=1) .+ y") == Expr(:call, Symbol(".+"), Expr(:(=), :x, 1), :y) - parse(Expr, "a .< b .< c") == Expr(:comparison, :a, Symbol(".<"), - :b, Symbol(".<"), :c) - parse(Expr, ".*(x)") == Expr(:call, Symbol(".*"), :x) - parse(Expr, ".+(x)") == Expr(:call, Symbol(".+"), :x) - parse(Expr, ".+x") == Expr(:call, Symbol(".+"), :x) + @test parse(Expr, "f.(x,y)") == Expr(:., :f, Expr(:tuple, :x, :y)) + @test parse(Expr, "f.(x=1)") == Expr(:., :f, Expr(:tuple, Expr(:kw, :x, 1))) + @test parse(Expr, "x .+ y") == Expr(:call, Symbol(".+"), :x, :y) + @test parse(Expr, "(x=1) .+ y") == Expr(:call, Symbol(".+"), Expr(:(=), :x, 1), :y) + @test parse(Expr, "a .< b .< c") == Expr(:comparison, :a, Symbol(".<"), + :b, Symbol(".<"), :c) + @test parse(Expr, ".*(x)") == Expr(:call, Symbol(".*"), :x) + @test parse(Expr, ".+(x)") == Expr(:call, Symbol(".+"), :x) + @test parse(Expr, ".+x") == Expr(:call, Symbol(".+"), :x) end end From bcab571e7035c8d31f1a3382aaeee2fe60cac987 Mon Sep 17 00:00:00 2001 From: c42f Date: Mon, 28 Nov 2022 16:44:23 +1000 Subject: [PATCH 0547/1109] Fix where parameter conversion as in `A where {X, Y; Z}` (JuliaLang/JuliaSyntax.jl#163) This "frankenwhere" syntax is used by the SimpleTraits.jl `@traitfn` macro. --- JuliaSyntax/src/expr.jl | 26 ++++++++++++++------------ JuliaSyntax/test/expr.jl | 4 ++++ 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/JuliaSyntax/src/expr.jl b/JuliaSyntax/src/expr.jl index 04f1a979d797d..5082b29642438 100644 --- a/JuliaSyntax/src/expr.jl +++ b/JuliaSyntax/src/expr.jl @@ -147,7 +147,7 @@ function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, # Special cases for various expression heads loc = source_location(LineNumberNode, node.source, node.position) - if headsym == :macrocall + if headsym === :macrocall insert!(args, 2, loc) if args[1] == Symbol("@.") args[1] = Symbol("@__dot__") @@ -184,6 +184,8 @@ function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, elseif headsym in (:tuple, :vect, :braces) # Move parameters blocks to args[1] reorder_parameters!(args, 1) + elseif headsym === :where + reorder_parameters!(args, 2) elseif headsym in (:try, :try_finally_catch) # Try children in source order: # try_block catch_var catch_block else_block finally_block @@ -211,10 +213,10 @@ function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, push!(args, else_) end headsym = :try - elseif headsym == :filter + elseif headsym === :filter pushfirst!(args, last(args)) pop!(args) - elseif headsym == :flatten + elseif headsym === :flatten # The order of nodes inside the generators in Julia's flatten AST # is noncontiguous in the source text, so need to reconstruct # Julia's AST here from our alternative `flatten` expression. @@ -227,12 +229,12 @@ function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, # For lack of a better place, the dimension argument to nrow/ncat # is stored in the flags pushfirst!(args, numeric_flags(flags(node))) - elseif headsym == :typed_ncat + elseif headsym === :typed_ncat insert!(args, 2, numeric_flags(flags(node))) # elseif headsym == :string && length(args) == 1 && version <= (1,5) # Strip string from interpolations in 1.5 and lower to preserve # "hi$("ho")" ==> (string "hi" "ho") - elseif headsym == :(=) && !is_decorated(node) + elseif headsym === :(=) && !is_decorated(node) if is_eventually_call(args[1]) && !iteration_spec && !Meta.isexpr(args[2], :block) # Add block for short form function locations args[2] = Expr(:block, loc, args[2]) @@ -240,14 +242,14 @@ function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, elseif headsym == :elseif # Block for conditional's source location args[1] = Expr(:block, loc, args[1]) - elseif headsym == :(->) + elseif headsym === :(->) if Meta.isexpr(args[2], :block) pushfirst!(args[2].args, loc) else # Add block for source locations args[2] = Expr(:block, loc, args[2]) end - elseif headsym == :function + elseif headsym === :function if length(args) > 1 if Meta.isexpr(args[1], :tuple) # Convert to weird Expr forms for long-form anonymous functions. @@ -260,21 +262,21 @@ function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, end pushfirst!(args[2].args, loc) end - elseif headsym == :macro + elseif headsym === :macro if length(args) > 1 pushfirst!(args[2].args, loc) end - elseif headsym == :module + elseif headsym === :module pushfirst!(args[3].args, loc) elseif headsym == :inert || (headsym == :quote && length(args) == 1 && !(a1 = only(args); a1 isa Expr || a1 isa QuoteNode || a1 isa Bool # <- compat hack, Julia 1.4+ )) return QuoteNode(only(args)) - elseif headsym == :do + elseif headsym === :do @check length(args) == 3 return Expr(:do, args[1], Expr(:->, args[2], args[3])) - elseif headsym == :let + elseif headsym === :let @check Meta.isexpr(args[1], :block) a1 = args[1].args # Ugly logic to strip the Expr(:block) in certian cases for compatibility @@ -284,7 +286,7 @@ function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, args[1] = a end end - elseif headsym == :local || headsym == :global + elseif headsym === :local || headsym === :global if length(args) == 1 && Meta.isexpr(args[1], :const) # Normalize `local const` to `const local` args[1] = Expr(headsym, args[1].args...) diff --git a/JuliaSyntax/test/expr.jl b/JuliaSyntax/test/expr.jl index 853b4e3b7627f..9461bb7609e65 100644 --- a/JuliaSyntax/test/expr.jl +++ b/JuliaSyntax/test/expr.jl @@ -235,4 +235,8 @@ @test parse(Expr, ".+(x)") == Expr(:call, Symbol(".+"), :x) @test parse(Expr, ".+x") == Expr(:call, Symbol(".+"), :x) end + + @testset "where" begin + @test parse(Expr, "A where {X, Y; Z}") == Expr(:where, :A, Expr(:parameters, :Z), :X, :Y) + end end From 6a117ad88dd7afae7e54576a9b29379b24cb21f6 Mon Sep 17 00:00:00 2001 From: c42f Date: Wed, 30 Nov 2022 11:43:52 +1000 Subject: [PATCH 0548/1109] Fix Expr parameters in parenthesized macro calls (JuliaLang/JuliaSyntax.jl#164) --- JuliaSyntax/src/expr.jl | 3 ++- JuliaSyntax/test/expr.jl | 13 +++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/JuliaSyntax/src/expr.jl b/JuliaSyntax/src/expr.jl index 5082b29642438..24c732c0a3635 100644 --- a/JuliaSyntax/src/expr.jl +++ b/JuliaSyntax/src/expr.jl @@ -148,10 +148,11 @@ function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, # Special cases for various expression heads loc = source_location(LineNumberNode, node.source, node.position) if headsym === :macrocall - insert!(args, 2, loc) if args[1] == Symbol("@.") args[1] = Symbol("@__dot__") end + reorder_parameters!(args, 2) + insert!(args, 2, loc) elseif headsym in (:dotcall, :call, :ref) # Julia's standard `Expr` ASTs have children stored in a canonical # order which is often not always source order. We permute the children diff --git a/JuliaSyntax/test/expr.jl b/JuliaSyntax/test/expr.jl index 9461bb7609e65..a8a1a07cf09eb 100644 --- a/JuliaSyntax/test/expr.jl +++ b/JuliaSyntax/test/expr.jl @@ -239,4 +239,17 @@ @testset "where" begin @test parse(Expr, "A where {X, Y; Z}") == Expr(:where, :A, Expr(:parameters, :Z), :X, :Y) end + + @testset "macrocall" begin + # line numbers + @test parse(Expr, "@m\n") == Expr(:macrocall, Symbol("@m"), LineNumberNode(1)) + @test parse(Expr, "\n@m") == Expr(:macrocall, Symbol("@m"), LineNumberNode(2)) + # parameters + @test parse(Expr, "@m(x; a)") == Expr(:macrocall, Symbol("@m"), LineNumberNode(1), + Expr(:parameters, :a), :x) + @test parse(Expr, "@m(a=1; b=2)") == Expr(:macrocall, Symbol("@m"), LineNumberNode(1), + Expr(:parameters, Expr(:kw, :b, 2)), Expr(:(=), :a, 1)) + # @__dot__ + @test parse(Expr, "@.") == Expr(:macrocall, Symbol("@__dot__"), LineNumberNode(1)) + end end From 21c95a3fd512ab6d2a396f8bab311ea4ef2a3931 Mon Sep 17 00:00:00 2001 From: c42f Date: Thu, 1 Dec 2022 06:14:23 +1000 Subject: [PATCH 0549/1109] Avoid use of GlobalRef in SyntaxNode trees (JuliaLang/JuliaSyntax.jl#165) This makes the tree more symbolic, by using unique SyntaxNode head types to refer to core macro names, rather than needing an explicit reference to the Core module included in the tree. --- JuliaSyntax/src/expr.jl | 6 +++++- JuliaSyntax/src/syntax_tree.jl | 4 ++-- JuliaSyntax/test/parser.jl | 20 ++++++++++---------- 3 files changed, 17 insertions(+), 13 deletions(-) diff --git a/JuliaSyntax/src/expr.jl b/JuliaSyntax/src/expr.jl index 24c732c0a3635..b9be8133e4614 100644 --- a/JuliaSyntax/src/expr.jl +++ b/JuliaSyntax/src/expr.jl @@ -31,7 +31,7 @@ function reorder_parameters!(args, params_pos) end function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, - eq_to_kw=false, inside_vect_or_braces=false) + eq_to_kw=false, inside_vect_or_braces=false, inside_do=false) if !haschildren(node) val = node.val if val isa Union{Int128,UInt128,BigInt} @@ -44,6 +44,10 @@ function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, val isa UInt128 ? Symbol("@uint128_str") : Symbol("@big_str") return Expr(:macrocall, GlobalRef(Core, macname), nothing, str) + elseif kind(node) == K"core_@doc" + return GlobalRef(Core, :var"@doc") + elseif kind(node) == K"core_@cmd" + return GlobalRef(Core, :var"@cmd") else return val end diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index 18ada54798d74..293403e25bd9e 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -94,9 +94,9 @@ function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, position::In elseif k == K"CmdMacroName" Symbol("@$(normalize_identifier(val_str))_cmd") elseif k == K"core_@doc" - GlobalRef(Core, :var"@doc") + Symbol("core_@doc") elseif k == K"core_@cmd" - GlobalRef(Core, :var"@cmd") + Symbol("core_@cmd") elseif is_syntax_kind(raw) nothing else diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 74bd08dc21561..308d28d4a34f5 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -38,7 +38,7 @@ tests = [ "a;b;c" => "(toplevel a b c)" "a;;;b;;" => "(toplevel a b)" """ "x" a ; "y" b """ => - """(toplevel (macrocall :(Core.var"@doc") (string "x") a) (macrocall :(Core.var"@doc") (string "y") b))""" + """(toplevel (macrocall core_@doc (string "x") a) (macrocall core_@doc (string "y") b))""" "x y" => "x (error-t y)" ], JuliaSyntax.parse_eq => [ @@ -457,7 +457,7 @@ tests = [ "module do \n end" => "(module true (error (do)) (block))" "module \$A end" => "(module true (\$ A) (block))" "module A \n a \n b \n end" => "(module true A (block a b))" - """module A \n "x"\na\n end""" => """(module true A (block (macrocall :(Core.var"@doc") (string "x") a)))""" + """module A \n "x"\na\n end""" => """(module true A (block (macrocall core_@doc (string "x") a)))""" # export "export a" => "(export a)" => Expr(:export, :a) "export @a" => "(export @a)" => Expr(:export, Symbol("@a")) @@ -750,9 +750,9 @@ tests = [ # __dot__ macro "@. x" => "(macrocall @. x)" => Expr(:macrocall, Symbol("@__dot__"), LineNumberNode(1), :x) # cmd strings - "``" => "(macrocall :(Core.var\"@cmd\") (cmdstring-r \"\"))" - "`cmd`" => "(macrocall :(Core.var\"@cmd\") (cmdstring-r \"cmd\"))" - "```cmd```" => "(macrocall :(Core.var\"@cmd\") (cmdstring-sr \"cmd\"))" + "``" => "(macrocall core_@cmd (cmdstring-r \"\"))" + "`cmd`" => "(macrocall core_@cmd (cmdstring-r \"cmd\"))" + "```cmd```" => "(macrocall core_@cmd (cmdstring-sr \"cmd\"))" # literals "42" => "42" "1.0e-1000" => "0.0" @@ -816,7 +816,7 @@ tests = [ # Triple-quoted dedenting: "\"\"\"\nx\"\"\"" => raw"""(string-s "x")""" "\"\"\"\n\nx\"\"\"" => raw"""(string-s "\n" "x")""" - "```\n x\n y```" => raw"""(macrocall :(Core.var"@cmd") (cmdstring-sr "x\n" "y"))""" + "```\n x\n y```" => raw"""(macrocall core_@cmd (cmdstring-sr "x\n" "y"))""" # Various newlines (\n \r \r\n) and whitespace (' ' \t) "\"\"\"\n x\n y\"\"\"" => raw"""(string-s "x\n" "y")""" "\"\"\"\r x\r y\"\"\"" => raw"""(string-s "x\n" "y")""" @@ -875,11 +875,11 @@ tests = [ """ "notdoc" ] """ => "(string \"notdoc\")" """ "notdoc" \n] """ => "(string \"notdoc\")" """ "notdoc" \n\n foo """ => "(string \"notdoc\")" - """ "doc" \n foo """ => """(macrocall :(Core.var"@doc") (string "doc") foo)""" - """ "doc" foo """ => """(macrocall :(Core.var"@doc") (string "doc") foo)""" - """ "doc \$x" foo """ => """(macrocall :(Core.var"@doc") (string "doc " x) foo)""" + """ "doc" \n foo """ => """(macrocall core_@doc (string "doc") foo)""" + """ "doc" foo """ => """(macrocall core_@doc (string "doc") foo)""" + """ "doc \$x" foo """ => """(macrocall core_@doc (string "doc " x) foo)""" # Allow docstrings with embedded trailing whitespace trivia - "\"\"\"\n doc\n \"\"\" foo" => """(macrocall :(Core.var"@doc") (string-s "doc\\n") foo)""" + "\"\"\"\n doc\n \"\"\" foo" => """(macrocall core_@doc (string-s "doc\\n") foo)""" ], ] From a319a0bc0c9222fc0bdf7cfeb93e1e028e829ff1 Mon Sep 17 00:00:00 2001 From: c42f Date: Thu, 1 Dec 2022 14:29:10 +1000 Subject: [PATCH 0550/1109] Improve registry testing tools (JuliaLang/JuliaSyntax.jl#158) * Compare Expr in detail against reference parser * Hack for visual expression diffing * Cache result of parsing General with reference parser because it's so much slower. --- JuliaSyntax/test/test_utils.jl | 26 +++++++++++++------- JuliaSyntax/tools/check_all_packages.jl | 32 +++++++++++++++++-------- JuliaSyntax/tools/untar_packages.jl | 24 +++++++++++++++++++ 3 files changed, 63 insertions(+), 19 deletions(-) diff --git a/JuliaSyntax/test/test_utils.jl b/JuliaSyntax/test/test_utils.jl index 03bda337aa327..c0e8dc77e1ef4 100644 --- a/JuliaSyntax/test/test_utils.jl +++ b/JuliaSyntax/test/test_utils.jl @@ -43,22 +43,30 @@ function remove_all_linenums!(ex) remove_macro_linenums!(ex) end -function show_expr_text_diff(showfunc, ex, f_ex; context=2) +function show_expr_text_diff(io::IO, showfunc, e1, e2; context=2) if Sys.isunix() mktemp() do path1, io1 mktemp() do path2, io2 - showfunc(io1, ex); close(io1) - showfunc(io2, f_ex); close(io2) - run(ignorestatus(`diff -U$context --color=always $path1 $path2`)) + showfunc(io1, e1); close(io1) + showfunc(io2, e2); close(io2) + run(pipeline(ignorestatus(`diff -U$context --color=always $path1 $path2`), io)) end end else - showfunc(stdout, ex) - println("------------------------------------") - showfunc(stdout, f_ex) + showfunc(io, ex) + println(io, "------------------------------------") + showfunc(io, e2) end + nothing end +# Parse text with JuliaSyntax vs reference parser and show a textural diff of +# the resulting expressions +function parse_diff(text, showfunc=dump) + ex = parse(Expr, text, filename="none") + fl_ex = fl_parse(text) + show_expr_text_diff(stdout, showfunc, ex, fl_ex) +end function parsers_agree_on_file(filename; show_diff=false) text = try @@ -80,7 +88,7 @@ function parsers_agree_on_file(filename; show_diff=false) parse!(stream) ex = build_tree(Expr, stream, filename=filename) if show_diff && ex != fl_ex - show_expr_text_diff(show, ex, fl_ex) + show_expr_text_diff(stdout, show, ex, fl_ex) end return !JuliaSyntax.any_error(stream) && JuliaSyntax.remove_linenums!(ex) == @@ -251,7 +259,7 @@ function itest_parse(production, code; version::VersionNumber=v"1.6") show(stdout, MIME"text/plain"(), f_ex) printstyled(stdout, "\n\n# Diff of AST dump:\n", color=:red) - show_expr_text_diff(show, ex, f_ex, context=10) + show_expr_text_diff(stdout, show, ex, f_ex, context=10) # return (ex, f_ex) # return (code, stream, t, s, ex) end diff --git a/JuliaSyntax/tools/check_all_packages.jl b/JuliaSyntax/tools/check_all_packages.jl index 57e7399d7424d..0fd9a8477e5b3 100644 --- a/JuliaSyntax/tools/check_all_packages.jl +++ b/JuliaSyntax/tools/check_all_packages.jl @@ -3,7 +3,9 @@ # # Run this after registry_download.jl (so the pkgs directory is populated). -using JuliaSyntax, Logging +using JuliaSyntax, Logging, Serialization + +include("../test/test_utils.jl") logio = open(joinpath(@__DIR__, "logs.txt"), "w") logger = Logging.ConsoleLogger(logio) @@ -15,22 +17,32 @@ Logging.with_logger(logger) do t = time() i = 0 iob = IOBuffer() - ex_count = 0 + exception_count = 0 + mismatch_count = 0 for (r, _, files) in walkdir(pkgspath) for f in files endswith(f, ".jl") || continue fpath = joinpath(r, f) if isfile(fpath) - file = read(fpath, String) + code = read(fpath, String) + expr_cache = fpath*".Expr" + #e2 = JuliaSyntax.fl_parseall(code) + e2 = open(deserialize, fpath*".Expr") + @assert Meta.isexpr(e2, :toplevel) try - e1 = JuliaSyntax.parseall(Expr, file) + e1 = JuliaSyntax.parseall(Expr, code, filename=fpath) + if JuliaSyntax.remove_linenums!(e1) != JuliaSyntax.remove_linenums!(e2) + mismatch_count += 1 + @error("Parsers succeed but disagree", + fpath, + diff=Text(sprint(show_expr_text_diff, show, e1, e2)), + ) + end catch err err isa InterruptException && rethrow() ex = (err, catch_backtrace()) push!(exceptions, ex) ref_parse = "success" - e2 = JuliaSyntax.fl_parseall(file) - @assert Meta.isexpr(e2, :toplevel) if length(e2.args) >= 1 && Meta.isexpr(last(e2.args), (:error, :incomplete)) ref_parse = "fail" if err isa JuliaSyntax.ParseError @@ -40,10 +52,10 @@ Logging.with_logger(logger) do end end - ex_count += 1 + exception_count += 1 parse_to_syntax = "success" try - JuliaSyntax.parseall(JuliaSyntax.SyntaxNode, file) + JuliaSyntax.parseall(JuliaSyntax.SyntaxNode, code) catch err2 parse_to_syntax = "fail" end @@ -56,8 +68,8 @@ Logging.with_logger(logger) do avg = round(runtime/i*1000, digits = 2) print(iob, "\e[2J\e[0;0H") println(iob, "$i files parsed") - println(iob, "> $(ex_count) failures compared to reference parser") - println(iob, "> $(length(exceptions)) errors in total") + println(iob, "> $(exception_count) failures compared to reference parser") + println(iob, "> $(mismatch_count) Expr mismatches") println(iob, "> $(avg)ms per file, $(round(Int, runtime))s in total") println(stderr, String(take!(iob))) end diff --git a/JuliaSyntax/tools/untar_packages.jl b/JuliaSyntax/tools/untar_packages.jl index 4c89ed024e33c..7d2507dc3e622 100644 --- a/JuliaSyntax/tools/untar_packages.jl +++ b/JuliaSyntax/tools/untar_packages.jl @@ -1,3 +1,5 @@ +using Serialization +using JuliaSyntax pkgspath = joinpath(@__DIR__, "pkgs") @@ -20,3 +22,25 @@ for tars in Iterators.partition(readdir(pkgspath), 50) end end +@info "Parsing files with reference parser" + +let i = 0 + for (r, _, files) in walkdir(pkgspath) + for f in files + endswith(f, ".jl") || continue + fpath = joinpath(r, f) + outpath = joinpath(r, f*".Expr") + if isfile(fpath) + code = read(fpath, String) + fl_ex = JuliaSyntax.fl_parseall(code, filename=fpath) + i += 1 + if i % 100 == 0 + @info "$i files parsed" + end + open(outpath, "w") do io + serialize(io, fl_ex) + end + end + end + end +end From eb49fb9bfe879a1d8ccb4949dfdfba6aed537ceb Mon Sep 17 00:00:00 2001 From: c42f Date: Wed, 7 Dec 2022 11:38:33 +1000 Subject: [PATCH 0551/1109] Fix an obscure form of function definition (JuliaLang/JuliaSyntax.jl#166) Fix cases where parens are used around module name such as function (A).f() end See in the wild as things like `function ($M).$J() end` --- JuliaSyntax/src/expr.jl | 2 +- JuliaSyntax/src/parser.jl | 3 ++- JuliaSyntax/test/parser.jl | 1 + 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/JuliaSyntax/src/expr.jl b/JuliaSyntax/src/expr.jl index b9be8133e4614..bef0dec863d89 100644 --- a/JuliaSyntax/src/expr.jl +++ b/JuliaSyntax/src/expr.jl @@ -31,7 +31,7 @@ function reorder_parameters!(args, params_pos) end function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, - eq_to_kw=false, inside_vect_or_braces=false, inside_do=false) + eq_to_kw=false, inside_vect_or_braces=false) if !haschildren(node) val = node.val if val isa Union{Int128,UInt128,BigInt} diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 57bcf2f733032..da2d5147308db 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -2095,7 +2095,7 @@ function parse_function_signature(ps::ParseState, is_function::Bool) is_empty_tuple = peek(ps, skip_newlines=true) == K")" opts = parse_brackets(ps, K")") do _, _, _, _ _parsed_call = was_eventually_call(ps) - _is_anon_func = peek(ps, 2) != K"(" && !_parsed_call + _is_anon_func = peek(ps, 2) ∉ KSet"( ." && !_parsed_call return (needs_parameters = _is_anon_func, is_anon_func = _is_anon_func, parsed_call = _parsed_call) @@ -2115,6 +2115,7 @@ function parse_function_signature(ps::ParseState, is_function::Bool) # function ()(x) end ==> (function (call (tuple) x) (block)) emit(ps, mark, K"tuple") else + # function (A).f() end ==> (function (call (. A (quote f))) (block)) # function (:)() end ==> (function (call :) (block)) # function (x::T)() end ==> (function (call (:: x T)) (block)) # function (::T)() end ==> (function (call (:: T)) (block)) diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 308d28d4a34f5..cde516d70ba91 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -515,6 +515,7 @@ tests = [ "function (x=1) end" => "(function (tuple (= x 1)) (block))" "function (;x=1) end" => "(function (tuple (parameters (= x 1))) (block))" "function ()(x) end" => "(function (call (tuple) x) (block))" + "function (A).f() end" => "(function (call (. A (quote f))) (block))" "function (:)() end" => "(function (call :) (block))" "function (x::T)() end"=> "(function (call (:: x T)) (block))" "function (::T)() end" => "(function (call (:: T)) (block))" From 8be2501b3f14f5d5ab4ad10a20b71ad3afeb9ce8 Mon Sep 17 00:00:00 2001 From: c42f Date: Tue, 13 Dec 2022 17:51:57 +1000 Subject: [PATCH 0552/1109] Fixes for obscure function definition forms (JuliaLang/JuliaSyntax.jl#168) Fixes for `is_eventuall_call` so that the following parse correctly function (::g(x))() end function (f::T{g(i)})() end As part of this, we mark `::` expressions with infix or prefix flags depending on the form to make the two forms easy to detect. --- JuliaSyntax/src/parse_stream.jl | 15 ++++++----- JuliaSyntax/src/parser.jl | 47 ++++++++++++++++++--------------- JuliaSyntax/test/parser.jl | 44 +++++++++++++++--------------- 3 files changed, 56 insertions(+), 50 deletions(-) diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index 01ae8ee9369bc..8643695b38bb3 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -536,14 +536,14 @@ function first_child_position(stream::ParseStream, pos::ParseStreamPosition) c = 0 @assert pos.range_index > 0 parent = stream.ranges[pos.range_index] - i = pos.range_index-1 - while i >= 1 - if stream.ranges[i].first_token >= parent.first_token && - (c == 0 || stream.ranges[i].first_token < stream.ranges[c].first_token) && - !is_trivia(stream.ranges[i]) + for i = pos.range_index-1:-1:1 + if stream.ranges[i].first_token < parent.first_token + break + end + if (c == 0 || stream.ranges[i].first_token < stream.ranges[c].first_token) && + !is_trivia(stream.ranges[i]) c = i end - i -= 1 end # Find first nontrivia token @@ -558,7 +558,8 @@ function first_child_position(stream::ParseStream, pos::ParseStreamPosition) if c != 0 if t != 0 if stream.ranges[c].first_token > t - return ParseStreamPosition(t, c-1) + # Need a child index strictly before `t`. `c=0` works. + return ParseStreamPosition(t, 0) else return ParseStreamPosition(stream.ranges[c].last_token, c) end diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index da2d5147308db..e653c43331f5c 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -308,10 +308,11 @@ function was_eventually_call(ps::ParseState) stream = ps.stream p = position(ps) while true - kb = peek_behind(stream, p).kind - if kb == K"call" + b = peek_behind(stream, p) + if b.kind == K"call" return true - elseif kb == K"where" || kb == K"::" + elseif b.kind == K"where" || (b.kind == K"::" && + has_flags(b.flags, INFIX_FLAG)) p = first_child_position(ps, p) else return false @@ -1353,20 +1354,20 @@ function parse_factor_after(ps::ParseState) end # Parse type declarations and lambda syntax -# a::b ==> (:: a b) +# a::b ==> (::-i a b) # a->b ==> (-> a b) # # flisp: parse-decl-with-initial-ex function parse_decl_with_initial_ex(ps::ParseState, mark) while peek(ps) == K"::" - # a::b::c ==> (:: (:: a b) c) + # a::b::c ==> (::-i (::-i a b) c) bump(ps, TRIVIA_FLAG) parse_where(ps, parse_call) - emit(ps, mark, K"::") + emit(ps, mark, K"::", INFIX_FLAG) end if peek(ps) == K"->" # x -> y ==> (-> x y) - # a::b->c ==> (-> (:: a b) c) + # a::b->c ==> (-> (::-i a b) c) bump(ps, TRIVIA_FLAG) # -> is unusual: it binds tightly on the left and loosely on the right. parse_eq_star(ps) @@ -1393,7 +1394,7 @@ end # parse syntactic unary operators # # &a ==> (& a) -# ::a ==> (:: a) +# ::a ==> (::-pre a) # $a ==> ($ a) # # flisp: parse-unary-prefix @@ -1418,7 +1419,9 @@ function parse_unary_prefix(ps::ParseState) # $&a ==> ($ (& a)) parse_unary_prefix(ps) end - emit(ps, mark, k) + # Only need PREFIX_OP_FLAG for :: + f = k == K"::" ? PREFIX_OP_FLAG : EMPTY_FLAGS + emit(ps, mark, k, f) end else # .&(x,y) ==> (call .& x y) @@ -1431,7 +1434,7 @@ function parse_identifier_or_interpolate(ps::ParseState) mark = position(ps) parse_unary_prefix(ps) b = peek_behind(ps) - # export (x::T) ==> (export (error (:: x T))) + # export (x::T) ==> (export (error (::-i x T))) # export outer ==> (export outer) # export ($f) ==> (export ($ f)) ok = (b.is_leaf && (b.kind == K"Identifier" || is_operator(b.kind))) || @@ -1792,7 +1795,7 @@ function parse_resword(ps::ParseState) m = position(ps) n_subexprs = parse_comma_separated(ps, parse_eq_star) kb = peek_behind(ps).kind - # let x::1 ; end ==> (let (block (:: x 1)) (block)) + # let x::1 ; end ==> (let (block (::-i x 1)) (block)) # let x ; end ==> (let (block x) (block)) # let x=1,y=2 ; end ==> (let (block (= x 1) (= y 2) (block))) # let x+=1 ; end ==> (let (block (+= x 1)) (block)) @@ -1897,7 +1900,7 @@ function parse_resword(ps::ParseState) bump_closing_token(ps, K"end") emit(ps, mark, K"abstract") elseif word in KSet"struct mutable" - # struct A <: B \n a::X \n end ==> (struct false (<: A B) (block (:: a X))) + # struct A <: B \n a::X \n end ==> (struct false (<: A B) (block (::-i a X))) # struct A \n a \n b \n end ==> (struct false A (block a b)) #v1.7: struct A const a end ==> (struct false A (block (error (const a)))) #v1.8: struct A const a end ==> (struct false A (block (const a))) @@ -2104,7 +2107,7 @@ function parse_function_signature(ps::ParseState, is_function::Bool) parsed_call = opts.parsed_call if is_anon_func # function (x) body end ==> (function (tuple x) (block body)) - # function (x::f()) end ==> (function (tuple (:: x (call f))) (block)) + # function (x::f()) end ==> (function (tuple (::-i x (call f))) (block)) # function (x,y) end ==> (function (tuple x y) (block)) # function (x=1) end ==> (function (tuple (= x 1)) (block)) # function (;x=1) end ==> (function (tuple (parameters (= x 1))) (block)) @@ -2117,8 +2120,8 @@ function parse_function_signature(ps::ParseState, is_function::Bool) else # function (A).f() end ==> (function (call (. A (quote f))) (block)) # function (:)() end ==> (function (call :) (block)) - # function (x::T)() end ==> (function (call (:: x T)) (block)) - # function (::T)() end ==> (function (call (:: T)) (block)) + # function (x::T)() end ==> (function (call (::-i x T)) (block)) + # function (::T)() end ==> (function (call (::-pre T)) (block)) end else parse_unary_prefix(ps) @@ -2134,7 +2137,7 @@ function parse_function_signature(ps::ParseState, is_function::Bool) # function \n f() end ==> (function (call f) (block)) # function $f() end ==> (function (call ($ f)) (block)) # function (:)() end ==> (function (call :) (block)) - # function (::Type{T})(x) end ==> (function (call (:: (curly Type T)) x) (block)) + # function (::Type{T})(x) end ==> (function (call (::-pre (curly Type T)) x) (block)) end end end @@ -2155,11 +2158,11 @@ function parse_function_signature(ps::ParseState, is_function::Bool) end if is_function && peek(ps) == K"::" # Function return type - # function f()::T end ==> (function (:: (call f) T) (block)) - # function f()::g(T) end ==> (function (:: (call f) (call g T)) (block)) + # function f()::T end ==> (function (::-i (call f) T) (block)) + # function f()::g(T) end ==> (function (::-i (call f) (call g T)) (block)) bump(ps, TRIVIA_FLAG) parse_call(ps) - emit(ps, mark, K"::") + emit(ps, mark, K"::", INFIX_FLAG) end if peek(ps) == K"where" # Function signature where syntax @@ -2167,7 +2170,7 @@ function parse_function_signature(ps::ParseState, is_function::Bool) # function f() where T end ==> (function (where (call f) T) (block)) parse_where_chain(ps, mark) end - # function f()::S where T end ==> (function (where (:: (call f) S) T) (block)) + # function f()::S where T end ==> (function (where (::-i (call f) S) T) (block)) # # Ugly cases for compat where extra parentheses existed and we've # already parsed at least the call part of the signature @@ -2175,8 +2178,8 @@ function parse_function_signature(ps::ParseState, is_function::Bool) # function (f() where T) end ==> (function (where (call f) T) (block)) # function (f()) where T end ==> (function (where (call f) T) (block)) # function (f() where T) where U end ==> (function (where (where (call f) T) U) (block)) - # function (f()::S) end ==> (function (:: (call f) S) (block)) - # function ((f()::S) where T) end ==> (function (where (:: (call f) S) T) (block)) + # function (f()::S) end ==> (function (::-i (call f) S) (block)) + # function ((f()::S) where T) end ==> (function (where (::-i (call f) S) T) (block)) # # TODO: Warn for use of parens? The precedence of `::` and # `where` don't work inside parens so this is a bit of a syntax diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index cde516d70ba91..8f7203e9f5f7b 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -173,8 +173,8 @@ tests = [ ], JuliaSyntax.parse_unary => [ ":T" => "(quote T)" - "in::T" => "(:: in T)" - "isa::T" => "(:: isa T)" + "in::T" => "(::-i in T)" + "isa::T" => "(::-i isa T)" "-2^x" => "(call-pre - (call-i 2 ^ x))" "-2[1, 3]" => "(call-pre - (ref 2 1 3))" # signed literals @@ -197,7 +197,7 @@ tests = [ # Standalone non-dotted operators "+)" => "+" # Call with type parameters or non-unary prefix call - "+{T}(x::T)" => "(call (curly + T) (:: x T))" + "+{T}(x::T)" => "(call (curly + T) (::-i x T))" "*(x)" => "(call * x)" ".*(x)" => "(call .* x)" # Prefix function calls for operators which are both binary and unary @@ -240,19 +240,19 @@ tests = [ "x^y" => "(call-i x ^ y)" "x^y^z" => "(call-i x ^ (call-i y ^ z))" "x .^ y" => "(dotcall-i x ^ y)" - "begin x end::T" => "(:: (block x) T)" + "begin x end::T" => "(::-i (block x) T)" # parse_decl_with_initial_ex - "a::b" => "(:: a b)" + "a::b" => "(::-i a b)" "a->b" => "(-> a b)" - "a::b::c" => "(:: (:: a b) c)" - "a::b->c" => "(-> (:: a b) c)" + "a::b::c" => "(::-i (::-i a b) c)" + "a::b->c" => "(-> (::-i a b) c)" ], JuliaSyntax.parse_unary_subtype => [ "<: )" => "<:" "<: \n" => "<:" "<: =" => "<:" - "<:{T}(x::T)" => "(call (curly <: T) (:: x T))" - "<:(x::T)" => "(<:-pre (:: x T))" + "<:{T}(x::T)" => "(call (curly <: T) (::-i x T))" + "<:(x::T)" => "(<:-pre (::-i x T))" "<: x" => "(<:-pre x)" "<: A where B" => "(<:-pre (where A B))" # Really for parse_where @@ -268,7 +268,7 @@ tests = [ "&)" => "&" "\$\n" => "\$" "&a" => "(& a)" - "::a" => "(:: a)" + "::a" => "(::-pre a)" "\$a" => "(\$ a)" "\$\$a" => "(\$ (\$ a))" ], @@ -417,7 +417,7 @@ tests = [ "let x=1\n end" => "(let (block (= x 1)) (block))" => Expr(:let, Expr(:(=), :x, 1), Expr(:block)) "let x=1 ; end" => "(let (block (= x 1)) (block))" => Expr(:let, Expr(:(=), :x, 1), Expr(:block)) "let x ; end" => "(let (block x) (block))" => Expr(:let, :x, Expr(:block)) - "let x::1 ; end" => "(let (block (:: x 1)) (block))" => Expr(:let, Expr(:(::), :x, 1), Expr(:block)) + "let x::1 ; end" => "(let (block (::-i x 1)) (block))" => Expr(:let, Expr(:(::), :x, 1), Expr(:block)) "let x=1,y=2 end" => "(let (block (= x 1) (= y 2)) (block))" => Expr(:let, Expr(:block, Expr(:(=), :x, 1), Expr(:(=), :y, 2)), Expr(:block)) "let x+=1 ; end" => "(let (block (+= x 1)) (block))" => Expr(:let, Expr(:block, Expr(:+=, :x, 1)), Expr(:block)) "let ; end" => "(let (block) (block))" => Expr(:let, Expr(:block), Expr(:block)) @@ -436,7 +436,7 @@ tests = [ "primitive type A \$N end" => "(primitive A (\$ N))" "primitive type A <: B \n 8 \n end" => "(primitive (<: A B) 8)" # struct - "struct A <: B \n a::X \n end" => "(struct false (<: A B) (block (:: a X)))" => Expr(:struct, false, Expr(:<:, :A, :B), Expr(:block, Expr(:(::), :a, :X))) + "struct A <: B \n a::X \n end" => "(struct false (<: A B) (block (::-i a X)))" => Expr(:struct, false, Expr(:<:, :A, :B), Expr(:block, Expr(:(::), :a, :X))) "struct A \n a \n b \n end" => "(struct false A (block a b))" => Expr(:struct, false, :A, Expr(:block, :a, :b)) "mutable struct A end" => "(struct true A (block))" ((v=v"1.8",), "struct A const a end") => "(struct false A (block (const a)))" => Expr(:struct, false, :A, Expr(:block, Expr(:const, :a))) @@ -466,7 +466,7 @@ tests = [ "export +, ==" => "(export + ==)" => Expr(:export, :+, :(==)) "export \n a" => "(export a)" => Expr(:export, :a) "export \$a, \$(a*b)" => "(export (\$ a) (\$ (call-i a * b)))" => Expr(:export, Expr(:$, :a), Expr(:$, Expr(:call, :*, :a, :b))) - "export (x::T)" => "(export (error (:: x T)))" + "export (x::T)" => "(export (error (::-i x T)))" "export outer" => "(export outer)" => Expr(:export, :outer) "export (\$f)" => "(export (\$ f))" => Expr(:export, Expr(:$, :f)) ], @@ -517,15 +517,17 @@ tests = [ "function ()(x) end" => "(function (call (tuple) x) (block))" "function (A).f() end" => "(function (call (. A (quote f))) (block))" "function (:)() end" => "(function (call :) (block))" - "function (x::T)() end"=> "(function (call (:: x T)) (block))" - "function (::T)() end" => "(function (call (:: T)) (block))" + "function (x::T)() end"=> "(function (call (::-i x T)) (block))" + "function (::g(x))() end" => "(function (call (::-pre (call g x))) (block))" + "function (f::T{g(i)})() end" => "(function (call (::-i f (curly T (call g i)))) (block))" + "function (::T)() end" => "(function (call (::-pre T)) (block))" "function begin() end" => "(function (call (error begin)) (block))" "function f() end" => "(function (call f) (block))" "function type() end" => "(function (call type) (block))" "function \n f() end" => "(function (call f) (block))" "function \$f() end" => "(function (call (\$ f)) (block))" "function (:)() end" => "(function (call :) (block))" - "function (::Type{T})(x) end" => "(function (call (:: (curly Type T)) x) (block))" + "function (::Type{T})(x) end" => "(function (call (::-pre (curly Type T)) x) (block))" # Function/macro definition with no methods "function f end" => "(function f)" "function f \n\n end" => "(function f)" @@ -536,18 +538,18 @@ tests = [ "function f{T}() end" => "(function (call (curly f T)) (block))" "function A.f() end" => "(function (call (. A (quote f))) (block))" "function f body end" => "(function (error f) (block body))" - "function f()::T end" => "(function (:: (call f) T) (block))" - "function f()::g(T) end" => "(function (:: (call f) (call g T)) (block))" + "function f()::T end" => "(function (::-i (call f) T) (block))" + "function f()::g(T) end" => "(function (::-i (call f) (call g T)) (block))" "function f() where {T} end" => "(function (where (call f) T) (block))" "function f() where T end" => "(function (where (call f) T) (block))" - "function f()::S where T end" => "(function (where (:: (call f) S) T) (block))" + "function f()::S where T end" => "(function (where (::-i (call f) S) T) (block))" # Ugly cases for compat where extra parentheses existed and we've # already parsed at least the call part of the signature "function (f() where T) end" => "(function (where (call f) T) (block))" => Expr(:function, Expr(:where, Expr(:call, :f), :T), Expr(:block)) "function (f()) where T end" => "(function (where (call f) T) (block))" "function (f() where T) where U end" => "(function (where (where (call f) T) U) (block))" - "function (f()::S) end"=> "(function (:: (call f) S) (block))" => Expr(:function, Expr(:(::), Expr(:call, :f), :S), Expr(:block)) - "function ((f()::S) where T) end" => "(function (where (:: (call f) S) T) (block))" + "function (f()::S) end"=> "(function (::-i (call f) S) (block))" => Expr(:function, Expr(:(::), Expr(:call, :f), :S), Expr(:block)) + "function ((f()::S) where T) end" => "(function (where (::-i (call f) S) T) (block))" # body "function f() \n a \n b end" => "(function (call f) (block a b))" "function f() end" => "(function (call f) (block))" From 54936e07d5c799566475e6b40f38d1685f7bda4b Mon Sep 17 00:00:00 2001 From: c42f Date: Wed, 14 Dec 2022 12:18:23 +1000 Subject: [PATCH 0553/1109] Trailing linebreak is not significant in hcat (JuliaLang/JuliaSyntax.jl#169) In the reference parser, `[a b\n\n]` parses as `(hcat a b)`, not vcat with a single row. --- JuliaSyntax/src/parser.jl | 14 ++++++++++---- JuliaSyntax/test/parser.jl | 2 ++ 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index e653c43331f5c..b5ea979e0396a 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -2882,10 +2882,16 @@ function parse_array_separator(ps, array_order) k = kind(t) if k == K"NewlineWs" bump_trivia(ps) - # Treat a linebreak prior to a value as a semicolon (ie, separator for - # the first dimension) if no previous semicolons observed - # [a \n b] ==> (vcat a b) - return (1, -1) + if peek(ps) == K"]" + # Linebreaks not significant before closing `]` + # [a b\n\n] ==> (hcat a b) + return (typemin(Int), typemin(Int)) + else + # Treat a linebreak prior to a value as a semicolon (ie, separator + # for the first dimension) if no previous semicolons observed + # [a \n b] ==> (vcat a b) + return (1, -1) + end elseif k == K"," # Treat `,` as semicolon for the purposes of recovery # [a; b, c] ==> (vcat a b (error-t) c) diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 8f7203e9f5f7b..7aca5a7ca9a7e 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -799,6 +799,8 @@ tests = [ ((v=v"1.7",), "[a b \n ;; c]") => "(ncat-2 (row a b (error-t)) c)" # Can't mix spaces and multiple ;'s ((v=v"1.7",), "[a b ;; c]") => "(ncat-2 (row a b (error-t)) c)" + # Linebreaks not significant before closing `]` + "[a b\n\n]" => "(hcat a b)" # Treat a linebreak prior to a value as a semicolon (ie, separator for # the first dimension) if no previous semicolons observed "[a \n b]" => "(vcat a b)" From 3b03ac9568c0894d5ecd2a2321a3cd3b831d0215 Mon Sep 17 00:00:00 2001 From: c42f Date: Fri, 16 Dec 2022 06:57:11 +1000 Subject: [PATCH 0554/1109] Fix for tokenization of consecutive nested multiline comments (JuliaLang/JuliaSyntax.jl#171) --- JuliaSyntax/src/tokenize.jl | 31 ++++++++++++++++++------------- JuliaSyntax/test/tokenize.jl | 14 +++++++------- 2 files changed, 25 insertions(+), 20 deletions(-) diff --git a/JuliaSyntax/src/tokenize.jl b/JuliaSyntax/src/tokenize.jl index 8f7ad5ff57621..91b64c8235fa4 100644 --- a/JuliaSyntax/src/tokenize.jl +++ b/JuliaSyntax/src/tokenize.jl @@ -542,33 +542,38 @@ function lex_whitespace(l::Lexer, c) return emit(l, k) end -function lex_comment(l::Lexer, doemit=true) +function lex_comment(l::Lexer) if peekchar(l) != '=' while true pc = peekchar(l) if pc == '\n' || pc == EOF_CHAR - return doemit ? emit(l, K"Comment") : EMPTY_TOKEN + return emit(l, K"Comment") end readchar(l) end else - pc = '#' c = readchar(l) # consume the '=' - n_start, n_end = 1, 0 + skip = true # true => c was part of the prev comment marker pair + nesting = 1 while true if c == EOF_CHAR - return doemit ? emit_error(l, K"ErrorEofMultiComment") : EMPTY_TOKEN + return emit_error(l, K"ErrorEofMultiComment") end nc = readchar(l) - if c == '#' && nc == '=' - n_start += 1 - elseif c == '=' && nc == '#' && pc != '#' - n_end += 1 - end - if n_start == n_end - return doemit ? emit(l, K"Comment") : EMPTY_TOKEN + if skip + skip = false + else + if c == '#' && nc == '=' + nesting += 1 + skip = true + elseif c == '=' && nc == '#' + nesting -= 1 + skip = true + if nesting == 0 + return emit(l, K"Comment") + end + end end - pc = c c = nc end end diff --git a/JuliaSyntax/test/tokenize.jl b/JuliaSyntax/test/tokenize.jl index 9df33cbec066b..3145d44f10f86 100644 --- a/JuliaSyntax/test/tokenize.jl +++ b/JuliaSyntax/test/tokenize.jl @@ -215,7 +215,7 @@ end end @testset "comments" begin - toks = collect(tokenize(""" + ts = collect(tokenize(""" # \"\"\" f @@ -227,7 +227,12 @@ end K"\"\"\"", K"String", K"String", K"\"\"\"", K"NewlineWs", K"Integer", K"NewlineWs", K"EndMarker"] - @test kind.(toks) == kinds + @test kind.(ts) == kinds + + @test toks("#=# text=#") == ["#=# text=#"=>K"Comment"] + + @test toks("#=#==#=#") == ["#=#==#=#"=>K"Comment"] + @test toks("#=#==#=") == ["#=#==#="=>K"ErrorEofMultiComment"] end @@ -791,11 +796,6 @@ end @test tok("1.?").kind == K"error" end -@testset "comments" begin - s = "#=# text=#" - @test length(collect(tokenize(s))) == 2 -end - @testset "invalid hexadecimal" begin s = "0x." tok(s, 1).kind === K"error" From acec81aa7fd627e6885b088e11cd2c4f77966389 Mon Sep 17 00:00:00 2001 From: c42f Date: Mon, 19 Dec 2022 14:16:13 +1000 Subject: [PATCH 0555/1109] Fix = to kw conversion in `x[a, b; i=j]` (JuliaLang/JuliaSyntax.jl#173) --- JuliaSyntax/src/expr.jl | 8 ++++---- JuliaSyntax/test/expr.jl | 2 ++ 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/JuliaSyntax/src/expr.jl b/JuliaSyntax/src/expr.jl index bef0dec863d89..5c3c9fe99756d 100644 --- a/JuliaSyntax/src/expr.jl +++ b/JuliaSyntax/src/expr.jl @@ -31,7 +31,7 @@ function reorder_parameters!(args, params_pos) end function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, - eq_to_kw=false, inside_vect_or_braces=false) + eq_to_kw=false, map_kw_in_params=false) if !haschildren(node) val = node.val if val isa Union{Int128,UInt128,BigInt} @@ -131,8 +131,8 @@ function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, eq_to_kw_in_call = ((headsym == :call || headsym == :dotcall) && is_prefix_call(node)) || headsym == :ref - eq_to_kw_all = headsym == :parameters && !inside_vect_or_braces - in_vb = headsym == :vect || headsym == :braces + eq_to_kw_all = headsym == :parameters && !map_kw_in_params + in_vbr = headsym == :vect || headsym == :braces || headsym == :ref if insert_linenums && isempty(node_args) push!(args, source_location(LineNumberNode, node.source, node.position)) else @@ -144,7 +144,7 @@ function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, eq_to_kw = eq_to_kw_in_call && i > 1 || eq_to_kw_all args[insert_linenums ? 2*i : i] = _to_expr(n, eq_to_kw=eq_to_kw, - inside_vect_or_braces=in_vb) + map_kw_in_params=in_vbr) end end end diff --git a/JuliaSyntax/test/expr.jl b/JuliaSyntax/test/expr.jl index a8a1a07cf09eb..656f645ab15c6 100644 --- a/JuliaSyntax/test/expr.jl +++ b/JuliaSyntax/test/expr.jl @@ -208,6 +208,8 @@ # ref @test parse(Expr, "x[i=j]") == Expr(:ref, :x, Expr(:kw, :i, :j)) + @test parse(Expr, "x[a, b; i=j]") == + Expr(:ref, :x, Expr(:parameters, Expr(:(=), :i, :j)), :a, :b) # vect/braces @test parse(Expr, "[a=1,; b=2]") == From c85269926e7d8c7ebe502a449e3b38f3e768a7b6 Mon Sep 17 00:00:00 2001 From: c42f Date: Mon, 19 Dec 2022 17:14:55 +1000 Subject: [PATCH 0556/1109] Fixes for parameters blocks in Expr(:curly) (JuliaLang/JuliaSyntax.jl#174) Fix Expr conversion for expressions like `x{a, b; i=j}` --- JuliaSyntax/src/expr.jl | 15 +++++++++------ JuliaSyntax/test/expr.jl | 10 +++++++++- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/JuliaSyntax/src/expr.jl b/JuliaSyntax/src/expr.jl index 5c3c9fe99756d..83b7fa36bc6ea 100644 --- a/JuliaSyntax/src/expr.jl +++ b/JuliaSyntax/src/expr.jl @@ -132,7 +132,7 @@ function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, ((headsym == :call || headsym == :dotcall) && is_prefix_call(node)) || headsym == :ref eq_to_kw_all = headsym == :parameters && !map_kw_in_params - in_vbr = headsym == :vect || headsym == :braces || headsym == :ref + in_vcbr = headsym == :vect || headsym == :curly || headsym == :braces || headsym == :ref if insert_linenums && isempty(node_args) push!(args, source_location(LineNumberNode, node.source, node.position)) else @@ -144,7 +144,7 @@ function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, eq_to_kw = eq_to_kw_in_call && i > 1 || eq_to_kw_all args[insert_linenums ? 2*i : i] = _to_expr(n, eq_to_kw=eq_to_kw, - map_kw_in_params=in_vbr) + map_kw_in_params=in_vcbr) end end end @@ -157,7 +157,7 @@ function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, end reorder_parameters!(args, 2) insert!(args, 2, loc) - elseif headsym in (:dotcall, :call, :ref) + elseif headsym in (:dotcall, :call) # Julia's standard `Expr` ASTs have children stored in a canonical # order which is often not always source order. We permute the children # here as necessary to get the canonical order. @@ -180,15 +180,18 @@ function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, args[1] = Symbol(".", args[1]) end end + elseif headsym in (:ref, :curly) + # Move parameters blocks to args[2] + reorder_parameters!(args, 2) + elseif headsym in (:tuple, :vect, :braces) + # Move parameters blocks to args[1] + reorder_parameters!(args, 1) elseif headsym === :comparison for i in 1:length(args) if Meta.isexpr(args[i], :., 1) args[i] = Symbol(".",args[i].args[1]) end end - elseif headsym in (:tuple, :vect, :braces) - # Move parameters blocks to args[1] - reorder_parameters!(args, 1) elseif headsym === :where reorder_parameters!(args, 2) elseif headsym in (:try, :try_finally_catch) diff --git a/JuliaSyntax/test/expr.jl b/JuliaSyntax/test/expr.jl index 656f645ab15c6..3a2f46f19a131 100644 --- a/JuliaSyntax/test/expr.jl +++ b/JuliaSyntax/test/expr.jl @@ -208,14 +208,22 @@ # ref @test parse(Expr, "x[i=j]") == Expr(:ref, :x, Expr(:kw, :i, :j)) + @test parse(Expr, "(i=j)[x]") == + Expr(:ref, Expr(:(=), :i, :j), :x) @test parse(Expr, "x[a, b; i=j]") == Expr(:ref, :x, Expr(:parameters, Expr(:(=), :i, :j)), :a, :b) + # curly + @test parse(Expr, "(i=j){x}") == + Expr(:curly, Expr(:(=), :i, :j), :x) + @test parse(Expr, "x{a, b; i=j}") == + Expr(:curly, :x, Expr(:parameters, Expr(:(=), :i, :j)), :a, :b) - # vect/braces + # vect @test parse(Expr, "[a=1,; b=2]") == Expr(:vect, Expr(:parameters, Expr(:(=), :b, 2)), Expr(:(=), :a, 1)) + # braces @test parse(Expr, "{a=1,; b=2}") == Expr(:braces, Expr(:parameters, Expr(:(=), :b, 2)), From c3d0df6d519c6dc075d746d7022dc9ebf752de89 Mon Sep 17 00:00:00 2001 From: c42f Date: Tue, 20 Dec 2022 13:55:41 +1000 Subject: [PATCH 0557/1109] Improve Expr comparison code for registry testing (JuliaLang/JuliaSyntax.jl#175) Add special cases to explicitly allow a few incompatibilities for cases where the reference parser has bugs: * `0x1.8p23f` is a `Float64` literal, with the trailing `f` ignored (also `0x1p1f0`) * The macrocall in `"@f(a=1) do\nend"` is not the same as the call in `@f(a=1)` * `global (x,y)` is the same as `global x,y` * Triple quoted indentation - `"\"\"\"\n a\n \n b\"\"\""` parses to "a\n \nb" --- JuliaSyntax/test/test_utils.jl | 90 +++++++++++++++++++++++++ JuliaSyntax/tools/check_all_packages.jl | 2 +- 2 files changed, 91 insertions(+), 1 deletion(-) diff --git a/JuliaSyntax/test/test_utils.jl b/JuliaSyntax/test/test_utils.jl index c0e8dc77e1ef4..42112c0d82714 100644 --- a/JuliaSyntax/test/test_utils.jl +++ b/JuliaSyntax/test/test_utils.jl @@ -68,6 +68,79 @@ function parse_diff(text, showfunc=dump) show_expr_text_diff(stdout, showfunc, ex, fl_ex) end +function kw_to_eq(ex) + return Meta.isexpr(:kw, ex) ? Expr(:(=), ex.args...) : ex +end + +function triple_string_roughly_equal(str, fl_str) + # Allow some leeway for a bug in the reference parser with + # triple quoted strings + lines = split(str, '\n') + fl_lines = split(fl_str, '\n') + if length(lines) != length(fl_lines) + return false + end + for (line1, line2) in zip(lines, fl_lines) + if !all(c in " \t" for c in line2) && !endswith(line1, line2) + return false + end + end + return true +end + +# Compare Expr from reference parser expression to JuliaSyntax parser, ignoring +# differences due to bugs in the reference parser. +function exprs_roughly_equal(fl_ex, ex) + if fl_ex isa Float64 && Meta.isexpr(ex, :call, 3) && + ex.args[1] == :* && + ex.args[2] == fl_ex && + (ex.args[3] == :f || ex.args[3] == :f0) + # 0x1p0f + return true + elseif !(fl_ex isa Expr) || !(ex isa Expr) + if fl_ex isa String && ex isa String + if fl_ex == ex + return true + else + return triple_string_roughly_equal(ex, fl_ex) + end + else + return fl_ex == ex + end + end + if fl_ex.head != ex.head + return false + end + h = ex.head + fl_args = fl_ex.args + args = ex.args + if ex.head in (:block, :quote, :toplevel) + fl_args = filter(x->!(x isa LineNumberNode), fl_args) + args = filter(x->!(x isa LineNumberNode), args) + end + if (h == :global || h == :local) && length(args) == 1 && Meta.isexpr(args[1], :tuple) + # Allow invalid syntax like `global (x, y)` + args = args[1].args + end + if length(fl_args) != length(args) + return false + end + if h == :do && length(args) >= 1 && Meta.isexpr(fl_args[1], :macrocall) + # Macrocalls with do, as in `@f(a=1) do\nend` use :kw in the + # reference parser for the `a=1`, but we regard this as a bug. + fl_args = copy(fl_args) + fl_args[1] = Expr(:macrocall, map(kw_to_eq, args[1].args)...) + end + for i = 1:length(args) + flarg = fl_args[i] + arg = args[i] + if !exprs_roughly_equal(flarg, arg) + return false + end + end + return true +end + function parsers_agree_on_file(filename; show_diff=false) text = try read(filename, String) @@ -93,6 +166,8 @@ function parsers_agree_on_file(filename; show_diff=false) return !JuliaSyntax.any_error(stream) && JuliaSyntax.remove_linenums!(ex) == JuliaSyntax.remove_linenums!(fl_ex) + # Could alternatively use + # exprs_roughly_equal(fl_ex, ex) catch exc @error "Parsing failed" filename exception=current_exceptions() return false @@ -308,3 +383,18 @@ function parse_sexpr(code) st end + +@testset "Test tools" begin + @test exprs_roughly_equal(Expr(:global, :x, :y), + Expr(:global, Expr(:tuple, :x, :y))) + @test exprs_roughly_equal(Expr(:local, :x, :y), + Expr(:local, Expr(:tuple, :x, :y))) + @test exprs_roughly_equal(1.5, + Expr(:call, :*, 1.5, :f)) + @test exprs_roughly_equal(1.5, + Expr(:call, :*, 1.5, :f0)) + @test exprs_roughly_equal(Expr(:do, Expr(:macrocall, Symbol("@f"), LineNumberNode(1), Expr(:kw, :a, 1)), + Expr(:->, Expr(:tuple), Expr(:block, LineNumberNode(1)))), + Expr(:do, Expr(:macrocall, Symbol("@f"), LineNumberNode(1), Expr(:(=), :a, 1)), + Expr(:->, Expr(:tuple), Expr(:block, LineNumberNode(1))))) +end diff --git a/JuliaSyntax/tools/check_all_packages.jl b/JuliaSyntax/tools/check_all_packages.jl index 0fd9a8477e5b3..58fc5e4417180 100644 --- a/JuliaSyntax/tools/check_all_packages.jl +++ b/JuliaSyntax/tools/check_all_packages.jl @@ -31,7 +31,7 @@ Logging.with_logger(logger) do @assert Meta.isexpr(e2, :toplevel) try e1 = JuliaSyntax.parseall(Expr, code, filename=fpath) - if JuliaSyntax.remove_linenums!(e1) != JuliaSyntax.remove_linenums!(e2) + if !exprs_roughly_equal(e2, e1) mismatch_count += 1 @error("Parsers succeed but disagree", fpath, From 9593dd84b8abca3dc2396a9ee13d600387aef20a Mon Sep 17 00:00:00 2001 From: c42f Date: Wed, 4 Jan 2023 11:55:25 +1000 Subject: [PATCH 0558/1109] Emit sensible errors for invalid operator tokens (JuliaLang/JuliaSyntax.jl#176) Here we emit invalid token errors during the token validation pass. This ensures any invalid token after parsing is guarenteed to have one single error emitted for it, independent from how it's handled by the parser. Add the special K"Error**" kind to allow us to emit a specific error for the `**` operator. Ensure we reject all the invalid operators which are rejected by the reference parser including the following which were missing: * `..+` and similar * `<---` Also add an extra rule for parsing invalid binary operators (at some arbitrarily-chosen precedence) to improve the recovered parse tree. --- JuliaSyntax/src/hooks.jl | 8 +++----- JuliaSyntax/src/kinds.jl | 14 +++++++++++--- JuliaSyntax/src/parse_stream.jl | 26 ++++++++++++++++++-------- JuliaSyntax/src/parser.jl | 29 +++++++++++++++++++++++++---- JuliaSyntax/src/parser_api.jl | 2 +- JuliaSyntax/src/tokenize.jl | 31 ++++++++++++++++++++++--------- JuliaSyntax/src/tokenize_utils.jl | 12 +++++------- JuliaSyntax/test/parser.jl | 15 +++++++++------ JuliaSyntax/test/test_utils.jl | 2 +- JuliaSyntax/test/tokenize.jl | 12 ++++++------ 10 files changed, 101 insertions(+), 50 deletions(-) diff --git a/JuliaSyntax/src/hooks.jl b/JuliaSyntax/src/hooks.jl index c3bc0c563d96d..5f4ca70d3d836 100644 --- a/JuliaSyntax/src/hooks.jl +++ b/JuliaSyntax/src/hooks.jl @@ -35,14 +35,12 @@ function _incomplete_tag(n::SyntaxNode) return :none end # TODO: Check error hits last character - if kind(c) == K"error" && begin + if kind(c) == K"ErrorEofMultiComment" + return :comment + elseif kind(c) == K"error" && begin cs = children(c) length(cs) > 0 end - k1 = kind(cs[1]) - if k1 == K"ErrorEofMultiComment" - return :comment - end for cc in cs if kind(cc) == K"error" return :other diff --git a/JuliaSyntax/src/kinds.jl b/JuliaSyntax/src/kinds.jl index 24fdd97a03d21..7228b0b28ca32 100644 --- a/JuliaSyntax/src/kinds.jl +++ b/JuliaSyntax/src/kinds.jl @@ -16,8 +16,10 @@ const _kind_names = # Tokenization errors "ErrorEofMultiComment" "ErrorInvalidNumericConstant" - "ErrorInvalidOperator" "ErrorInvalidInterpolationTerminator" + "ErrorNumericOverflow" + "ErrorInvalidEscapeSequence" + "ErrorOverLongCharacter" # Generic error "error" "END_ERRORS" @@ -94,6 +96,9 @@ const _kind_names = "END_DELIMITERS" "BEGIN_OPS" + "ErrorInvalidOperator" + "Error**" + "..." # Level 1 @@ -1009,8 +1014,11 @@ const _nonunique_kind_names = Set([ K"ErrorEofMultiComment" K"ErrorInvalidNumericConstant" - K"ErrorInvalidOperator" K"ErrorInvalidInterpolationTerminator" + K"ErrorNumericOverflow" + K"ErrorInvalidEscapeSequence" + K"ErrorOverLongCharacter" + K"ErrorInvalidOperator" K"Integer" K"BinInt" @@ -1049,7 +1057,7 @@ end #------------------------------------------------------------------------------- # Predicates is_contextual_keyword(k::Kind) = K"BEGIN_CONTEXTUAL_KEYWORDS" < k < K"END_CONTEXTUAL_KEYWORDS" -is_error(k::Kind) = K"BEGIN_ERRORS" < k < K"END_ERRORS" +is_error(k::Kind) = K"BEGIN_ERRORS" < k < K"END_ERRORS" || k == K"ErrorInvalidOperator" || k == K"Error**" is_keyword(k::Kind) = K"BEGIN_KEYWORDS" < k < K"END_KEYWORDS" is_block_continuation_keyword(k::Kind) = K"BEGIN_BLOCK_CONTINUATION_KEYWORDS" < k < K"END_BLOCK_CONTINUATION_KEYWORDS" is_literal(k::Kind) = K"BEGIN_LITERAL" < k < K"END_LITERAL" diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index 8643695b38bb3..f697d2b3ae25c 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -72,7 +72,8 @@ function Base.summary(head::SyntaxHead) end function untokenize(head::SyntaxHead; unique=true, include_flag_suff=true) - str = is_error(kind(head)) ? "error" : untokenize(kind(head); unique=unique)::String + str = (is_error(kind(head)) ? untokenize(kind(head); unique=false) : + untokenize(kind(head); unique=unique))::String if is_dotted(head) str = "."*str end @@ -850,7 +851,7 @@ end #------------------------------------------------------------------------------- # ParseStream Post-processing -function validate_literal_tokens(stream::ParseStream) +function validate_tokens(stream::ParseStream) text = sourcetext(stream) toks = stream.tokens charbuf = IOBuffer() @@ -860,7 +861,7 @@ function validate_literal_tokens(stream::ParseStream) fbyte = toks[i-1].next_byte nbyte = t.next_byte lbyte = prevind(text, t.next_byte) - had_error = false + error_kind = K"None" if k in KSet"Integer BinInt OctInt HexInt" # The following shouldn't be able to error... # parse_int_literal @@ -882,7 +883,7 @@ function validate_literal_tokens(stream::ParseStream) elseif code == :overflow emit_diagnostic(stream, fbyte, lbyte, error="overflow in floating point literal") - had_error = true + error_kind = K"ErrorNumericOverflow" elseif underflow0 emit_diagnostic(stream, fbyte, lbyte, warning="underflow to zero in floating point literal") @@ -892,11 +893,13 @@ function validate_literal_tokens(stream::ParseStream) truncate(charbuf, 0) had_error = unescape_julia_string(charbuf, text, fbyte, nbyte, stream.diagnostics) - if !had_error + if had_error + error_kind = K"ErrorInvalidEscapeSequence" + else seek(charbuf,0) read(charbuf, Char) if !eof(charbuf) - had_error = true + error_kind = K"ErrorOverLongCharacter" emit_diagnostic(stream, fbyte, lbyte, error="character literal contains multiple characters") end @@ -904,9 +907,16 @@ function validate_literal_tokens(stream::ParseStream) elseif k == K"String" && !has_flags(t, RAW_STRING_FLAG) had_error = unescape_julia_string(devnull, text, fbyte, nbyte, stream.diagnostics) + if had_error + error_kind = K"ErrorInvalidEscapeSequence" + end + elseif is_error(k) && k != K"error" + # Emit messages for non-generic token errors + emit_diagnostic(stream, fbyte, lbyte, + error=Tokenize.TOKEN_ERROR_DESCRIPTION[k]) end - if had_error - toks[i] = SyntaxToken(SyntaxHead(K"error", EMPTY_FLAGS), + if error_kind != K"None" + toks[i] = SyntaxToken(SyntaxHead(error_kind, EMPTY_FLAGS), t.orig_kind, t.next_byte) end end diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index b5ea979e0396a..952a3e7f0dd7f 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -829,7 +829,7 @@ end # flisp: parse-range function parse_range(ps::ParseState) mark = position(ps) - parse_expr(ps) + parse_invalid_ops(ps) initial_tok = peek_token(ps) initial_kind = kind(initial_tok) if initial_kind != K":" && is_prec_colon(initial_kind) @@ -837,7 +837,7 @@ function parse_range(ps::ParseState) # a … b ==> (call-i a … b) # a .… b ==> (dotcall-i a … b) bump_dotsplit(ps) - parse_expr(ps) + parse_invalid_ops(ps) emit(ps, mark, is_dotted(initial_tok) ? K"dotcall" : K"call", INFIX_FLAG) elseif initial_kind == K":" && ps.range_colon_enabled # a ? b : c:d ==> (? a b (call-i c : d)) @@ -864,7 +864,7 @@ function parse_range(ps::ParseState) bump(ps) # K"<" or K">" emit(ps, emark, K"error", error="Invalid `:$ks` found, maybe replace with `$ks:`") - parse_expr(ps) + parse_invalid_ops(ps) emit(ps, mark, K"call", INFIX_FLAG) break end @@ -891,7 +891,7 @@ function parse_range(ps::ParseState) emit(ps, mark, K"call", INFIX_FLAG) return end - parse_expr(ps) + parse_invalid_ops(ps) if n_colons == 2 emit(ps, mark, K"call", INFIX_FLAG) n_colons = 0 @@ -911,6 +911,23 @@ function parse_range(ps::ParseState) end end +# Parse invalid binary operators +# +# Having this is unnecessary, but it improves error messages and the +# error-containing parse tree. +# +# a--b ==> (call-i a (error) b) +function parse_invalid_ops(ps::ParseState) + mark = position(ps) + parse_expr(ps) + while (t = peek_token(ps); kind(t) in KSet"ErrorInvalidOperator Error**") + bump_trivia(ps) + bump_dotsplit(ps) + parse_expr(ps) + emit(ps, mark, is_dotted(t) ? K"dotcall" : K"call", INFIX_FLAG) + end +end + # a - b - c ==> (call-i (call-i a - b) - c) # a + b + c ==> (call-i a + b c) # a .+ b ==> (dotcall-i a + b) @@ -3518,6 +3535,10 @@ function parse_atom(ps::ParseState, check_identifiers=true) "premature end of input" : "unexpected closing token" bump_invisible(ps, K"error", error=msg) + elseif is_error(leading_kind) + # Errors for bad tokens are emitted in validate_tokens() rather than + # here. + bump(ps) else bump(ps, error="invalid syntax atom") end diff --git a/JuliaSyntax/src/parser_api.jl b/JuliaSyntax/src/parser_api.jl index 905f4c2405b54..6d918c3c24cf0 100644 --- a/JuliaSyntax/src/parser_api.jl +++ b/JuliaSyntax/src/parser_api.jl @@ -51,7 +51,7 @@ function parse!(stream::ParseStream; rule::Symbol=:toplevel) else throw(ArgumentError("Unknown grammar rule $rule")) end - validate_literal_tokens(stream) + validate_tokens(stream) stream end diff --git a/JuliaSyntax/src/tokenize.jl b/JuliaSyntax/src/tokenize.jl index 91b64c8235fa4..4d6080a02ca20 100644 --- a/JuliaSyntax/src/tokenize.jl +++ b/JuliaSyntax/src/tokenize.jl @@ -16,9 +16,13 @@ include("tokenize_utils.jl") TOKEN_ERROR_DESCRIPTION = Dict{Kind, String}( K"ErrorEofMultiComment" => "unterminated multi-line comment #= ... =#", K"ErrorInvalidNumericConstant" => "invalid numeric constant", - K"ErrorInvalidOperator" => "invalid operator", K"ErrorInvalidInterpolationTerminator" => "interpolated variable ends with invalid character; use `\$(...)` instead", - K"error" => "unknown error", + K"ErrorNumericOverflow"=>"overflow in numeric literal", + K"ErrorInvalidEscapeSequence"=>"invalid string escape sequence", + K"ErrorOverLongCharacter"=>"character literal contains multiple characters", + K"ErrorInvalidOperator" => "invalid operator", + K"Error**" => "use `x^y` instead of `x**y` for exponentiation, and `x...` instead of `**x` for splatting", + K"error" => "unknown error token", ) struct Token @@ -618,10 +622,14 @@ function lex_less(l::Lexer) return emit(l, K"<|") elseif dpeekchar(l) == ('-', '-') readchar(l); readchar(l) - if accept(l, '>') - return emit(l, K"<-->") + if accept(l, '-') + return emit_error(l, K"ErrorInvalidOperator") else - return emit(l, K"<--") + if accept(l, '>') + return emit(l, K"<-->") + else + return emit(l, K"<--") + end end else return emit(l, K"<") @@ -713,7 +721,7 @@ end function lex_star(l::Lexer) if accept(l, '*') - return emit_error(l, K"ErrorInvalidOperator") # "**" is an invalid operator use ^ + return emit_error(l, K"Error**") # "**" is an invalid operator use ^ elseif accept(l, '=') return emit(l, K"*=") end @@ -811,7 +819,7 @@ function lex_digit(l::Lexer, kind) accept(l, "+-−") if accept_batch(l, isdigit) pc,ppc = dpeekchar(l) - if pc === '.' && !dotop2(ppc, ' ') + if pc === '.' && !dotop2(ppc) accept(l, '.') return emit_error(l, K"ErrorInvalidNumericConstant") end @@ -829,7 +837,7 @@ function lex_digit(l::Lexer, kind) accept(l, "+-−") if accept_batch(l, isdigit) pc,ppc = dpeekchar(l) - if pc === '.' && !dotop2(ppc, ' ') + if pc === '.' && !dotop2(ppc) accept(l, '.') return emit_error(l, K"ErrorInvalidNumericConstant") end @@ -959,7 +967,12 @@ function lex_dot(l::Lexer) if accept(l, '.') return emit(l, K"...") else - return emit(l, K"..") + if dotop2(peekchar(l)) + readchar(l) + return emit_error(l, K"ErrorInvalidOperator") + else + return emit(l, K"..") + end end elseif Base.isdigit(peekchar(l)) return lex_digit(l, K"Float") diff --git a/JuliaSyntax/src/tokenize_utils.jl b/JuliaSyntax/src/tokenize_utils.jl index 8637577c57f0b..6da41683a271a 100644 --- a/JuliaSyntax/src/tokenize_utils.jl +++ b/JuliaSyntax/src/tokenize_utils.jl @@ -147,25 +147,23 @@ readchar(io::IO) = eof(io) ? EOF_CHAR : read(io, Char) 0x0000ffe9 <= c <= 0x0000ffec end -function dotop2(pc, dpc) +function dotop2(pc) dotop1(pc) || pc =='+' || pc =='-' || + pc =='−' || pc =='*' || pc =='/' || pc =='\\' || pc =='^' || pc =='<' || pc =='>' || - pc =='&' && dpc === '=' || pc =='&' || pc =='%' || - pc == '=' && dpc != '>' || - pc == '|' && dpc != '|' || - pc == '!' && dpc == '=' || + pc == '=' || + pc == '|' || pc == '⊻' || - pc == '÷' || - pc == '=' && dpc == '>' + pc == '÷' end # suffix operators diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 7aca5a7ca9a7e..1be6bc39cc410 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -1,7 +1,7 @@ function test_parse(production, code; v=v"1.6", expr=false) stream = ParseStream(code, version=v) production(ParseState(stream)) - JuliaSyntax.validate_literal_tokens(stream) + JuliaSyntax.validate_tokens(stream) t = build_tree(GreenNode, stream, wrap_toplevel_as_kind=K"None") source = SourceFile(code) s = SyntaxNode(source, t) @@ -127,6 +127,9 @@ tests = [ "x:y..." => "(... (call-i x : y))" "x..y..." => "(... (call-i x .. y))" ], + JuliaSyntax.parse_invalid_ops => [ + "a--b" => "(call-i a (ErrorInvalidOperator) b)" + ], JuliaSyntax.parse_expr => [ "a - b - c" => "(call-i (call-i a - b) - c)" "a + b + c" => "(call-i a + b c)" @@ -870,11 +873,11 @@ tests = [ ], JuliaSyntax.parse_atom => [ # errors in literals - "\"\\xqqq\"" => "(string (error))" - "'ab'" => "(char (error))" - "'\\xq'" => "(char (error))" - "10.0e1000'" => "(error)" - "10.0f100'" => "(error)" + "\"\\xqqq\"" => "(string (ErrorInvalidEscapeSequence))" + "'\\xq'" => "(char (ErrorInvalidEscapeSequence))" + "'ab'" => "(char (ErrorOverLongCharacter))" + "10.0e1000'" => "(ErrorNumericOverflow)" + "10.0f100'" => "(ErrorNumericOverflow)" ], JuliaSyntax.parse_docstring => [ """ "notdoc" ] """ => "(string \"notdoc\")" diff --git a/JuliaSyntax/test/test_utils.jl b/JuliaSyntax/test/test_utils.jl index 42112c0d82714..0c9df2952b4cd 100644 --- a/JuliaSyntax/test/test_utils.jl +++ b/JuliaSyntax/test/test_utils.jl @@ -311,7 +311,7 @@ for debugging. function itest_parse(production, code; version::VersionNumber=v"1.6") stream = ParseStream(code; version=version) production(JuliaSyntax.ParseState(stream)) - JuliaSyntax.validate_literal_tokens(stream) + JuliaSyntax.validate_tokens(stream) t = JuliaSyntax.build_tree(GreenNode, stream, wrap_toplevel_as_kind=K"toplevel") println(stdout, "# Code:\n$code\n") diff --git a/JuliaSyntax/test/tokenize.jl b/JuliaSyntax/test/tokenize.jl index 3145d44f10f86..297394dd13ba1 100644 --- a/JuliaSyntax/test/tokenize.jl +++ b/JuliaSyntax/test/tokenize.jl @@ -231,6 +231,7 @@ end @test toks("#=# text=#") == ["#=# text=#"=>K"Comment"] + @test toks("#= #= =#") == ["#= #= =#"=>K"ErrorEofMultiComment"] @test toks("#=#==#=#") == ["#=#==#=#"=>K"Comment"] @test toks("#=#==#=") == ["#=#==#="=>K"ErrorEofMultiComment"] end @@ -316,11 +317,6 @@ end @test length(collect(tokenize("x)"))) == 3 end -@testset "errors" begin - @test tok("#= #= =#", 1).kind == K"ErrorEofMultiComment" - @test tok("aa **", 3).kind == K"ErrorInvalidOperator" -end - @testset "xor_eq" begin @test tok("1 ⊻= 2", 3).kind==K"⊻=" end @@ -772,7 +768,11 @@ end test_error(tok("0b3",1), K"ErrorInvalidNumericConstant") test_error(tok("0op",1), K"ErrorInvalidNumericConstant") test_error(tok("--",1), K"ErrorInvalidOperator") - test_error(tok("1**2",2), K"ErrorInvalidOperator") + + @test toks("1**2") == ["1"=>K"Integer", "**"=>K"Error**", "2"=>K"Integer"] + @test toks("a<---b") == ["a"=>K"Identifier", "<---"=>K"ErrorInvalidOperator", "b"=>K"Identifier"] + @test toks("a..+b") == ["a"=>K"Identifier", "..+"=>K"ErrorInvalidOperator", "b"=>K"Identifier"] + @test toks("a..−b") == ["a"=>K"Identifier", "..−"=>K"ErrorInvalidOperator", "b"=>K"Identifier"] end @testset "hat suffix" begin From 6592f1a8bcd7e70ff80013346d48816dfdcde9f7 Mon Sep 17 00:00:00 2001 From: c42f Date: Thu, 5 Jan 2023 17:15:51 +1000 Subject: [PATCH 0559/1109] Allow the syntax `import A.:x` for `import A.x` (JuliaLang/JuliaSyntax.jl#177) This syntax is unnecessary but does occur ~10 times in the General registry so we should support it. --- JuliaSyntax/src/parser.jl | 10 ++++++---- JuliaSyntax/test/parser.jl | 1 + 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 952a3e7f0dd7f..863129ee4ebce 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -2520,6 +2520,12 @@ function parse_import_path(ps::ParseState) # import A.B.C ==> (import (. A B C)) bump_disallowed_space(ps) bump(ps, TRIVIA_FLAG) + if peek(ps) == K":" + # import A.:+ ==> (import (. A +)) + bump_disallowed_space(ps) + emit_diagnostic(ps, warning="quoting with `:` in import is unnecessary") + bump(ps, TRIVIA_FLAG) + end parse_atsym(ps) elseif is_dotted(t) # Resolve tokenization ambiguity: In imports, dots are part of the @@ -2534,10 +2540,6 @@ function parse_import_path(ps::ParseState) end bump_trivia(ps) bump_split(ps, (1,K".",TRIVIA_FLAG), (1,k,EMPTY_FLAGS)) - # elseif k == K".." - # # The flisp parser does this, but it's nonsense? - # # import A.. !=> (import (. A .)) - # bump_split(ps, (1,K".",TRIVIA_FLAG), (1,K".",EMPTY_FLAGS)) elseif k == K"..." # Import the .. operator # import A... ==> (import (. A ..)) diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 1be6bc39cc410..06882a2dac375 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -614,6 +614,7 @@ tests = [ "import \$A.@x" => "(import (. (\$ A) @x))" "import A.B" => "(import (. A B))" "import A.B.C" => "(import (. A B C))" + "import A.:+" => "(import (. A +))" "import A.==" => "(import (. A ==))" "import A.⋆.f" => "(import (. A ⋆ f))" "import A..." => "(import (. A ..))" From 733741f7e24dbb8b5d4a3b73b3fd6d31f5f656dd Mon Sep 17 00:00:00 2001 From: c42f Date: Mon, 9 Jan 2023 18:17:11 +1000 Subject: [PATCH 0560/1109] Fix lowering of `@.` to `@__dot__` in `using A: @.` (JuliaLang/JuliaSyntax.jl#178) --- JuliaSyntax/src/expr.jl | 5 ++--- JuliaSyntax/test/expr.jl | 1 + 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/JuliaSyntax/src/expr.jl b/JuliaSyntax/src/expr.jl index 83b7fa36bc6ea..b4b09982d5abb 100644 --- a/JuliaSyntax/src/expr.jl +++ b/JuliaSyntax/src/expr.jl @@ -48,6 +48,8 @@ function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, return GlobalRef(Core, :var"@doc") elseif kind(node) == K"core_@cmd" return GlobalRef(Core, :var"@cmd") + elseif kind(node) == K"MacroName" && val === Symbol("@.") + return Symbol("@__dot__") else return val end @@ -152,9 +154,6 @@ function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, # Special cases for various expression heads loc = source_location(LineNumberNode, node.source, node.position) if headsym === :macrocall - if args[1] == Symbol("@.") - args[1] = Symbol("@__dot__") - end reorder_parameters!(args, 2) insert!(args, 2, loc) elseif headsym in (:dotcall, :call) diff --git a/JuliaSyntax/test/expr.jl b/JuliaSyntax/test/expr.jl index 3a2f46f19a131..fe1adf78c61b7 100644 --- a/JuliaSyntax/test/expr.jl +++ b/JuliaSyntax/test/expr.jl @@ -261,5 +261,6 @@ Expr(:parameters, Expr(:kw, :b, 2)), Expr(:(=), :a, 1)) # @__dot__ @test parse(Expr, "@.") == Expr(:macrocall, Symbol("@__dot__"), LineNumberNode(1)) + @test parse(Expr, "using A: @.") == Expr(:using, Expr(Symbol(":"), Expr(:., :A), Expr(:., Symbol("@__dot__")))) end end From 3d44de4727b3a636ae0c25c66579616643b0aaec Mon Sep 17 00:00:00 2001 From: c42f Date: Wed, 11 Jan 2023 16:27:51 +1000 Subject: [PATCH 0561/1109] Fix deindentation with triple strings like `"""\n$x a"""` (JuliaLang/JuliaSyntax.jl#179) When an interpolation occurs at the beginning of a line without any preceding whitespace this counts as zeroing out the common prefix of indentation. --- JuliaSyntax/src/parser.jl | 5 +++++ JuliaSyntax/test/parser.jl | 1 + 2 files changed, 6 insertions(+) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 863129ee4ebce..d7722b6ad652d 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -3181,6 +3181,11 @@ function parse_string(ps::ParseState, raw::Bool) k = kind(t) if k == K"$" @assert !raw # The lexer detects raw strings separately + if prev_chunk_newline + # """\n$x\n a""" ==> (string-s x "\n" " a") + indent_ref_i = first_byte(t) + indent_ref_len = 0 + end bump(ps, TRIVIA_FLAG) k = peek(ps) if k == K"(" diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 06882a2dac375..dbf0ae0f510fb 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -816,6 +816,7 @@ tests = [ ((v=v"1.8",), "[\n ;; \n ]") => "(ncat-2)" ((v=v"1.7",), "[;;]") => "(ncat-2 (error))" # parse_string + "\"\"\"\n\$x\n a\"\"\"" => "(string-s x \"\\n\" \" a\")" "\"a \$(x + y) b\"" => "(string \"a \" (call-i x + y) \" b\")" "\"hi\$(\"ho\")\"" => "(string \"hi\" (string \"ho\"))" "\"a \$foo b\"" => "(string \"a \" foo \" b\")" From cd7a41b699bef695163cb64a74021567b373b5c2 Mon Sep 17 00:00:00 2001 From: c42f Date: Wed, 11 Jan 2023 16:52:38 +1000 Subject: [PATCH 0562/1109] Improve logging for registry test tool --- JuliaSyntax/tools/check_all_packages.jl | 105 +++++++++++++----------- 1 file changed, 55 insertions(+), 50 deletions(-) diff --git a/JuliaSyntax/tools/check_all_packages.jl b/JuliaSyntax/tools/check_all_packages.jl index 58fc5e4417180..e476393603be3 100644 --- a/JuliaSyntax/tools/check_all_packages.jl +++ b/JuliaSyntax/tools/check_all_packages.jl @@ -12,68 +12,73 @@ logger = Logging.ConsoleLogger(logio) pkgspath = joinpath(@__DIR__, "pkgs") +exception_count = 0 +mismatch_count = 0 +file_count = 0 +t0 = time() exceptions = [] + Logging.with_logger(logger) do - t = time() - i = 0 - iob = IOBuffer() - exception_count = 0 - mismatch_count = 0 + global exception_count, mismatch_count, file_count, t0 for (r, _, files) in walkdir(pkgspath) for f in files endswith(f, ".jl") || continue fpath = joinpath(r, f) - if isfile(fpath) - code = read(fpath, String) - expr_cache = fpath*".Expr" - #e2 = JuliaSyntax.fl_parseall(code) - e2 = open(deserialize, fpath*".Expr") - @assert Meta.isexpr(e2, :toplevel) - try - e1 = JuliaSyntax.parseall(Expr, code, filename=fpath) - if !exprs_roughly_equal(e2, e1) - mismatch_count += 1 - @error("Parsers succeed but disagree", - fpath, - diff=Text(sprint(show_expr_text_diff, show, e1, e2)), - ) - end - catch err - err isa InterruptException && rethrow() - ex = (err, catch_backtrace()) - push!(exceptions, ex) - ref_parse = "success" - if length(e2.args) >= 1 && Meta.isexpr(last(e2.args), (:error, :incomplete)) - ref_parse = "fail" - if err isa JuliaSyntax.ParseError - # Both parsers agree that there's an error, and - # JuliaSyntax didn't have an internal error. - continue - end - end + isfile(fpath) || continue - exception_count += 1 - parse_to_syntax = "success" - try - JuliaSyntax.parseall(JuliaSyntax.SyntaxNode, code) - catch err2 - parse_to_syntax = "fail" + code = read(fpath, String) + expr_cache = fpath*".Expr" + #e2 = JuliaSyntax.fl_parseall(code) + e2 = open(deserialize, fpath*".Expr") + @assert Meta.isexpr(e2, :toplevel) + try + e1 = JuliaSyntax.parseall(Expr, code, filename=fpath) + if !exprs_roughly_equal(e2, e1) + mismatch_count += 1 + @error("Parsers succeed but disagree", + fpath, + diff=Text(sprint(show_expr_text_diff, show, e1, e2)), + ) + end + catch err + err isa InterruptException && rethrow() + ex = (err, catch_backtrace()) + push!(exceptions, ex) + ref_parse = "success" + if length(e2.args) >= 1 && Meta.isexpr(last(e2.args), (:error, :incomplete)) + ref_parse = "fail" + if err isa JuliaSyntax.ParseError + # Both parsers agree that there's an error, and + # JuliaSyntax didn't have an internal error. + continue end - @error "Parse failed" fpath exception=ex parse_to_syntax end + + exception_count += 1 + parse_to_syntax = "success" + try + JuliaSyntax.parseall(JuliaSyntax.SyntaxNode, code) + catch err2 + parse_to_syntax = "fail" + end + @error "Parse failed" fpath exception=ex parse_to_syntax end - i += 1 - if i % 100 == 0 - runtime = time() - t - avg = round(runtime/i*1000, digits = 2) - print(iob, "\e[2J\e[0;0H") - println(iob, "$i files parsed") - println(iob, "> $(exception_count) failures compared to reference parser") - println(iob, "> $(mismatch_count) Expr mismatches") - println(iob, "> $(avg)ms per file, $(round(Int, runtime))s in total") - println(stderr, String(take!(iob))) + + file_count += 1 + if file_count % 100 == 0 + t_avg = round((time() - t0)/file_count*1000, digits = 2) + print(stderr, "\r$file_count files parsed, $t_avg ms per file") end end end end close(logio) + +t_avg = round((time() - t0)/file_count*1000, digits = 2) + +println() +@info """ + Finished parsing $file_count files. + $(exception_count) failures compared to reference parser + $(mismatch_count) Expr mismatches + $(t_avg)ms per file""" From 0a806dc4924cf0d988a603b404d01d3fa6361ae3 Mon Sep 17 00:00:00 2001 From: c42f Date: Sun, 15 Jan 2023 14:09:30 +1000 Subject: [PATCH 0563/1109] Ignore compiler warnings when parsing General registry These aren't failures so just ignore them for the purposes of comparison. --- JuliaSyntax/tools/check_all_packages.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaSyntax/tools/check_all_packages.jl b/JuliaSyntax/tools/check_all_packages.jl index e476393603be3..3ee160c089b11 100644 --- a/JuliaSyntax/tools/check_all_packages.jl +++ b/JuliaSyntax/tools/check_all_packages.jl @@ -32,7 +32,7 @@ Logging.with_logger(logger) do e2 = open(deserialize, fpath*".Expr") @assert Meta.isexpr(e2, :toplevel) try - e1 = JuliaSyntax.parseall(Expr, code, filename=fpath) + e1 = JuliaSyntax.parseall(Expr, code, filename=fpath, ignore_warnings=true) if !exprs_roughly_equal(e2, e1) mismatch_count += 1 @error("Parsers succeed but disagree", From 70619f8907106bfd5cf93cd89d007b7e5ab6ef1a Mon Sep 17 00:00:00 2001 From: c42f Date: Wed, 11 Jan 2023 19:15:23 +1000 Subject: [PATCH 0564/1109] Use specific error token kinds, not K"error" --- JuliaSyntax/src/kinds.jl | 18 ++++++++++++++++++ JuliaSyntax/src/parse_stream.jl | 2 +- JuliaSyntax/src/tokenize.jl | 25 ++++++------------------- JuliaSyntax/test/tokenize.jl | 26 +++++++++++--------------- 4 files changed, 36 insertions(+), 35 deletions(-) diff --git a/JuliaSyntax/src/kinds.jl b/JuliaSyntax/src/kinds.jl index 7228b0b28ca32..c4171d6b1f5ca 100644 --- a/JuliaSyntax/src/kinds.jl +++ b/JuliaSyntax/src/kinds.jl @@ -16,10 +16,12 @@ const _kind_names = # Tokenization errors "ErrorEofMultiComment" "ErrorInvalidNumericConstant" + "ErrorAmbiguousNumericConstant" "ErrorInvalidInterpolationTerminator" "ErrorNumericOverflow" "ErrorInvalidEscapeSequence" "ErrorOverLongCharacter" + "ErrorUnknownCharacter" # Generic error "error" "END_ERRORS" @@ -1014,10 +1016,12 @@ const _nonunique_kind_names = Set([ K"ErrorEofMultiComment" K"ErrorInvalidNumericConstant" + K"ErrorAmbiguousNumericConstant" K"ErrorInvalidInterpolationTerminator" K"ErrorNumericOverflow" K"ErrorInvalidEscapeSequence" K"ErrorOverLongCharacter" + K"ErrorUnknownCharacter" K"ErrorInvalidOperator" K"Integer" @@ -1053,6 +1057,20 @@ function untokenize(k::Kind; unique=true) end end +# Error kind => description +_token_error_descriptions = Dict{Kind, String}( + K"ErrorEofMultiComment" => "unterminated multi-line comment #= ... =#", + K"ErrorInvalidNumericConstant" => "invalid numeric constant", + K"ErrorAmbiguousNumericConstant" => "ambiguous `.` syntax; add whitespace to clarify (eg `1.+2` might be `1.0+2` or `1 .+ 2`)", + K"ErrorInvalidInterpolationTerminator" => "interpolated variable ends with invalid character; use `\$(...)` instead", + K"ErrorNumericOverflow"=>"overflow in numeric literal", + K"ErrorInvalidEscapeSequence"=>"invalid string escape sequence", + K"ErrorOverLongCharacter"=>"character literal contains multiple characters", + K"ErrorUnknownCharacter"=>"unknown unicode character", + K"ErrorInvalidOperator" => "invalid operator", + K"Error**" => "use `x^y` instead of `x**y` for exponentiation, and `x...` instead of `**x` for splatting", + K"error" => "unknown error token", +) #------------------------------------------------------------------------------- # Predicates diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index f697d2b3ae25c..a276460f729bd 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -913,7 +913,7 @@ function validate_tokens(stream::ParseStream) elseif is_error(k) && k != K"error" # Emit messages for non-generic token errors emit_diagnostic(stream, fbyte, lbyte, - error=Tokenize.TOKEN_ERROR_DESCRIPTION[k]) + error=_token_error_descriptions[k]) end if error_kind != K"None" toks[i] = SyntaxToken(SyntaxHead(error_kind, EMPTY_FLAGS), diff --git a/JuliaSyntax/src/tokenize.jl b/JuliaSyntax/src/tokenize.jl index 4d6080a02ca20..974062afad5cc 100644 --- a/JuliaSyntax/src/tokenize.jl +++ b/JuliaSyntax/src/tokenize.jl @@ -12,19 +12,6 @@ include("tokenize_utils.jl") #------------------------------------------------------------------------------- # Tokens -# Error kind => description -TOKEN_ERROR_DESCRIPTION = Dict{Kind, String}( - K"ErrorEofMultiComment" => "unterminated multi-line comment #= ... =#", - K"ErrorInvalidNumericConstant" => "invalid numeric constant", - K"ErrorInvalidInterpolationTerminator" => "interpolated variable ends with invalid character; use `\$(...)` instead", - K"ErrorNumericOverflow"=>"overflow in numeric literal", - K"ErrorInvalidEscapeSequence"=>"invalid string escape sequence", - K"ErrorOverLongCharacter"=>"character literal contains multiple characters", - K"ErrorInvalidOperator" => "invalid operator", - K"Error**" => "use `x^y` instead of `x**y` for exponentiation, and `x...` instead of `**x` for splatting", - K"error" => "unknown error token", -) - struct Token kind::Kind # Offsets into a string or buffer @@ -283,11 +270,11 @@ function emit(l::Lexer, kind::Kind, maybe_op=true) end """ - emit_error(l::Lexer, err::Kind=K"error") + emit_error(l::Lexer, err::Kind) Returns an `K"error"` token with error `err` and starts a new `Token`. """ -function emit_error(l::Lexer, err::Kind = K"error") +function emit_error(l::Lexer, err::Kind) @assert is_error(err) return emit(l, err) end @@ -387,7 +374,7 @@ function _next_token(l::Lexer, c) elseif (k = get(UNICODE_OPS, c, K"error")) != K"error" return emit(l, k) else - emit_error(l) + emit_error(l, K"ErrorUnknownCharacter") end end @@ -785,7 +772,7 @@ function lex_digit(l::Lexer, kind) return emit_error(l, K"ErrorInvalidNumericConstant") elseif is_operator_start_char(ppc) && ppc !== ':' readchar(l) - return emit_error(l) + return emit_error(l, K"ErrorAmbiguousNumericConstant") elseif (!(isdigit(ppc) || iswhitespace(ppc) || is_identifier_start_char(ppc) @@ -824,7 +811,7 @@ function lex_digit(l::Lexer, kind) return emit_error(l, K"ErrorInvalidNumericConstant") end else - return emit_error(l) + return emit_error(l, K"ErrorInvalidNumericConstant") end elseif pc == '.' && (is_identifier_start_char(ppc) || ppc == EOF_CHAR) readchar(l) @@ -842,7 +829,7 @@ function lex_digit(l::Lexer, kind) return emit_error(l, K"ErrorInvalidNumericConstant") end else - return emit_error(l) + return emit_error(l, K"ErrorInvalidNumericConstant") end elseif position(l) - startpos(l) == 1 && l.chars[1] == '0' kind == K"Integer" diff --git a/JuliaSyntax/test/tokenize.jl b/JuliaSyntax/test/tokenize.jl index 297394dd13ba1..4ea295c6713e5 100644 --- a/JuliaSyntax/test/tokenize.jl +++ b/JuliaSyntax/test/tokenize.jl @@ -769,10 +769,21 @@ end test_error(tok("0op",1), K"ErrorInvalidNumericConstant") test_error(tok("--",1), K"ErrorInvalidOperator") + @test toks("1e+") == ["1e+"=>K"ErrorInvalidNumericConstant"] + @test toks("1.0e+") == ["1.0e+"=>K"ErrorInvalidNumericConstant"] + @test toks("0x.") == ["0x."=>K"ErrorInvalidNumericConstant"] + @test toks("1**2") == ["1"=>K"Integer", "**"=>K"Error**", "2"=>K"Integer"] @test toks("a<---b") == ["a"=>K"Identifier", "<---"=>K"ErrorInvalidOperator", "b"=>K"Identifier"] @test toks("a..+b") == ["a"=>K"Identifier", "..+"=>K"ErrorInvalidOperator", "b"=>K"Identifier"] @test toks("a..−b") == ["a"=>K"Identifier", "..−"=>K"ErrorInvalidOperator", "b"=>K"Identifier"] + + @test toks("1.+2") == ["1."=>K"ErrorAmbiguousNumericConstant", "+"=>K"+", "2"=>K"Integer"] + @test toks("1.+ ") == ["1."=>K"ErrorAmbiguousNumericConstant", "+"=>K"+", " "=>K"Whitespace"] + @test toks("1.⤋") == ["1."=>K"ErrorAmbiguousNumericConstant", "⤋"=>K"⤋"] + @test toks("1.?") == ["1."=>K"ErrorAmbiguousNumericConstant", "?"=>K"?"] + + @test toks("\x00") == ["\x00"=>K"ErrorUnknownCharacter"] end @testset "hat suffix" begin @@ -786,21 +797,6 @@ end @test untokenize(collect(tokenize(s))[1], s) == s end -@testset "invalid float juxt" begin - s = "1.+2" - @test tok(s, 1).kind == K"error" - @test is_operator(tok(s, 2).kind) - test_roundtrip("1234.+1", K"error", "1234.") - @test tok("1.+ ").kind == K"error" - @test tok("1.⤋").kind == K"error" - @test tok("1.?").kind == K"error" -end - -@testset "invalid hexadecimal" begin - s = "0x." - tok(s, 1).kind === K"error" -end - @testset "circ arrow right op" begin s = "↻" @test collect(tokenize(s))[1].kind == K"↻" From af4f3b968facff55b03c6b0a088c533be1d1f507 Mon Sep 17 00:00:00 2001 From: c42f Date: Sun, 15 Jan 2023 15:42:38 +1000 Subject: [PATCH 0565/1109] Support for running JuliaSyntax on Julia 1.0 Mostly just compat changes here, adding a bunch of Base utility functions we use which don't exist in earlier Julia versions. --- JuliaSyntax/.github/workflows/CI.yml | 27 +++++++++++++++++++++++++ JuliaSyntax/Project.toml | 2 +- JuliaSyntax/src/expr.jl | 4 ++-- JuliaSyntax/src/hooks.jl | 20 ++++++++++++++++-- JuliaSyntax/src/source_files.jl | 3 ++- JuliaSyntax/src/utils.jl | 18 +++++++++++++++++ JuliaSyntax/src/value_parsing.jl | 6 +++--- JuliaSyntax/test/parse_packages.jl | 8 +++++--- JuliaSyntax/test/runtests.jl | 10 +++++++-- JuliaSyntax/test/test_utils.jl | 7 ++++++- JuliaSyntax/test/value_parsing.jl | 6 +++--- JuliaSyntax/tools/check_all_packages.jl | 2 +- 12 files changed, 94 insertions(+), 19 deletions(-) diff --git a/JuliaSyntax/.github/workflows/CI.yml b/JuliaSyntax/.github/workflows/CI.yml index 57bde03dc7625..235c2d3f81183 100644 --- a/JuliaSyntax/.github/workflows/CI.yml +++ b/JuliaSyntax/.github/workflows/CI.yml @@ -13,7 +13,14 @@ jobs: fail-fast: false matrix: version: + - '1.0' + - '1.1' + - '1.2' + - '1.3' + - '1.4' + - '1.5' - '1.6' + - '1.7' - '1' - 'nightly' os: @@ -22,6 +29,26 @@ jobs: - windows-latest arch: - x64 + exclude: + # Test all OS's on + # - 1.0 + # - 1.6 + # - 1 + # - nightly + # but remove some configurations from the build matrix to reduce CI time. + # See https://github.com/marketplace/actions/setup-julia-environment + - {os: 'macOS-latest', version: '1.1'} + - {os: 'macOS-latest', version: '1.2'} + - {os: 'macOS-latest', version: '1.3'} + - {os: 'macOS-latest', version: '1.4'} + - {os: 'macOS-latest', version: '1.5'} + - {os: 'macOS-latest', version: '1.7'} + - {os: 'windows-latest', version: '1.1'} + - {os: 'windows-latest', version: '1.2'} + - {os: 'windows-latest', version: '1.3'} + - {os: 'windows-latest', version: '1.4'} + - {os: 'windows-latest', version: '1.5'} + - {os: 'windows-latest', version: '1.7'} steps: - uses: actions/checkout@v2 - uses: julia-actions/setup-julia@v1 diff --git a/JuliaSyntax/Project.toml b/JuliaSyntax/Project.toml index 1d6774fe98064..2920fa397c0c1 100644 --- a/JuliaSyntax/Project.toml +++ b/JuliaSyntax/Project.toml @@ -4,7 +4,7 @@ authors = ["Chris Foster and contributors"] version = "0.2.0" [compat] -julia = "1.6" +julia = "1.0" [deps] diff --git a/JuliaSyntax/src/expr.jl b/JuliaSyntax/src/expr.jl index b4b09982d5abb..87a7f59b92264 100644 --- a/JuliaSyntax/src/expr.jl +++ b/JuliaSyntax/src/expr.jl @@ -45,9 +45,9 @@ function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, Symbol("@big_str") return Expr(:macrocall, GlobalRef(Core, macname), nothing, str) elseif kind(node) == K"core_@doc" - return GlobalRef(Core, :var"@doc") + return GlobalRef(Core, Symbol("@doc")) elseif kind(node) == K"core_@cmd" - return GlobalRef(Core, :var"@cmd") + return GlobalRef(Core, Symbol("@cmd")) elseif kind(node) == K"MacroName" && val === Symbol("@.") return Symbol("@__dot__") else diff --git a/JuliaSyntax/src/hooks.jl b/JuliaSyntax/src/hooks.jl index 5f4ca70d3d836..2de562a6b7358 100644 --- a/JuliaSyntax/src/hooks.jl +++ b/JuliaSyntax/src/hooks.jl @@ -237,8 +237,21 @@ end function _fl_parse_hook(code, filename, lineno, offset, options) @static if VERSION >= v"1.8.0-DEV.1370" # https://github.com/JuliaLang/julia/pull/43876 return Core.Compiler.fl_parse(code, filename, lineno, offset, options) - else + elseif VERSION >= v"1.6" return Core.Compiler.fl_parse(code, filename, offset, options) + else + if options === :all + ex = Base.parse_input_line(String(code), filename=filename, depwarn=false) + if !Meta.isexpr(ex, :toplevel) + ex = Expr(:toplevel, ex) + end + return ex, sizeof(code) + elseif options === :statement || options == :atom + ex, pos = Meta.parse(code, offset+1, greedy=options==:statement, raise=false) + return ex, pos-1 + else + error("Unknown parse options $options") + end end end @@ -248,7 +261,7 @@ end # FIXME: Improve this in Base somehow? Base.Meta.ParseError(e::JuliaSyntax.ParseError) = e -const _default_parser = Core._parse +const _default_parser = VERSION < v"1.6" ? nothing : Core._parse """ enable_in_core!([enable=true; freeze_world_age, debug_filename]) @@ -266,6 +279,9 @@ Keyword arguments: """ function enable_in_core!(enable=true; freeze_world_age = true, debug_filename = get(ENV, "JULIA_SYNTAX_DEBUG_FILE", nothing)) + if VERSION < v"1.6" + error("Cannot use JuliaSyntax as the main Julia parser in Julia version $VERSION < 1.6") + end _parser_world_age[] = freeze_world_age ? Base.get_world_counter() : _latest_world if enable && !isnothing(debug_filename) _debug_log[] = open(debug_filename, "w") diff --git a/JuliaSyntax/src/source_files.jl b/JuliaSyntax/src/source_files.jl index bff823f9347fa..a756610f7a78c 100644 --- a/JuliaSyntax/src/source_files.jl +++ b/JuliaSyntax/src/source_files.jl @@ -65,7 +65,8 @@ function source_line_range(source::SourceFile, byte_index; end function source_location(::Type{LineNumberNode}, source::SourceFile, byte_index) - LineNumberNode(source_line(source, byte_index), source.filename) + LineNumberNode(source_line(source, byte_index), + isnothing(source.filename) ? nothing : Symbol(source.filename)) end function Base.show(io::IO, ::MIME"text/plain", source::SourceFile) diff --git a/JuliaSyntax/src/utils.jl b/JuliaSyntax/src/utils.jl index 08f5ab33e2ae2..04f7d8ad40cdc 100644 --- a/JuliaSyntax/src/utils.jl +++ b/JuliaSyntax/src/utils.jl @@ -1,3 +1,21 @@ +# Compatibility hacks for older Julia versions +if VERSION < v"1.1" + isnothing(x) = x === nothing +end +if VERSION < v"1.4" + function only(x::AbstractVector) + if length(x) != 1 + error("Collection must contain exactly 1 element") + end + return x[1] + end +end +if VERSION < v"1.5" + import Base.peek +end + +#-------------------------------------------------- +# # Internal error, used as assertion failure for cases we expect can't happen. @noinline function internal_error(strs...) error("Internal error: ", strs...) diff --git a/JuliaSyntax/src/value_parsing.jl b/JuliaSyntax/src/value_parsing.jl index f83bc8912eb78..37982ac4984ef 100644 --- a/JuliaSyntax/src/value_parsing.jl +++ b/JuliaSyntax/src/value_parsing.jl @@ -122,7 +122,7 @@ end @inline function _unsafe_parse_float(::Type{Float64}, ptr, strsize) Libc.errno(0) endptr = Ref{Ptr{UInt8}}(C_NULL) - x = @ccall jl_strtod_c(ptr::Ptr{UInt8}, endptr::Ptr{Ptr{UInt8}})::Cdouble + x = ccall(:jl_strtod_c, Cdouble, (Ptr{UInt8}, Ptr{Ptr{UInt8}}), ptr, endptr) @check endptr[] == ptr + strsize status = :ok if Libc.errno() == Libc.ERANGE @@ -155,13 +155,13 @@ end # strtof seems buggy on windows and doesn't set ERANGE correctly on # overflow. See also # https://github.com/JuliaLang/julia/issues/46544 - x = Float32(@ccall jl_strtod_c(ptr::Ptr{UInt8}, endptr::Ptr{Ptr{UInt8}})::Cdouble) + x = Float32(ccall(:jl_strtod_c, Cdouble, (Ptr{UInt8}, Ptr{Ptr{UInt8}}), ptr, endptr)) if isinf(x) status = :overflow # Underflow not detected, but that will only be a warning elsewhere. end else - x = @ccall jl_strtof_c(ptr::Ptr{UInt8}, endptr::Ptr{Ptr{UInt8}})::Cfloat + x = ccall(:jl_strtof_c, Cfloat, (Ptr{UInt8}, Ptr{Ptr{UInt8}}), ptr, endptr) end @check endptr[] == ptr + strsize if Libc.errno() == Libc.ERANGE diff --git a/JuliaSyntax/test/parse_packages.jl b/JuliaSyntax/test/parse_packages.jl index 1603af1392618..5ea0a07024dc4 100644 --- a/JuliaSyntax/test/parse_packages.jl +++ b/JuliaSyntax/test/parse_packages.jl @@ -1,9 +1,11 @@ # Full-scale parsing tests of JuliaSyntax itself, Julia Base, etc. +juliasyntax_dir = joinpath(@__DIR__, "..") @testset "Parse JuliaSyntax" begin - pkgdir = joinpath(@__DIR__, "..") - test_parse_all_in_path(joinpath(pkgdir, "src")) - test_parse_all_in_path(joinpath(pkgdir, "test")) + test_parse_all_in_path(joinpath(juliasyntax_dir, "src")) +end +@testset "Parse JuliaSyntax tests" begin + test_parse_all_in_path(joinpath(juliasyntax_dir, "test")) end base_path = let diff --git a/JuliaSyntax/test/runtests.jl b/JuliaSyntax/test/runtests.jl index fad4278d425de..a6c091c74c6ee 100644 --- a/JuliaSyntax/test/runtests.jl +++ b/JuliaSyntax/test/runtests.jl @@ -20,7 +20,13 @@ include("expr.jl") @testset "Parsing literals from strings" begin include("value_parsing.jl") end -include("hooks.jl") -include("parse_packages.jl") include("source_files.jl") +if VERSION >= v"1.6" + # Tests restricted to 1.6+ due to + # * Core._parse hook doesn't exist on v1.5 and lower + # * Reference parser bugs which would need workarounds for package parse comparisons + include("hooks.jl") + include("parse_packages.jl") +end + diff --git a/JuliaSyntax/test/test_utils.jl b/JuliaSyntax/test/test_utils.jl index 0c9df2952b4cd..512c549eaad0a 100644 --- a/JuliaSyntax/test/test_utils.jl +++ b/JuliaSyntax/test/test_utils.jl @@ -28,6 +28,11 @@ using .JuliaSyntax: fl_parseall, fl_parse +if VERSION < v"1.6" + # Compat stuff which might not be in Base for older versions + using JuliaSyntax: isnothing, only, peek +end + function remove_macro_linenums!(ex) if Meta.isexpr(ex, :macrocall) ex.args[2] = nothing @@ -69,7 +74,7 @@ function parse_diff(text, showfunc=dump) end function kw_to_eq(ex) - return Meta.isexpr(:kw, ex) ? Expr(:(=), ex.args...) : ex + return Meta.isexpr(ex, :kw) ? Expr(:(=), ex.args...) : ex end function triple_string_roughly_equal(str, fl_str) diff --git a/JuliaSyntax/test/value_parsing.jl b/JuliaSyntax/test/value_parsing.jl index 8b552d6c348cd..64b868289e176 100644 --- a/JuliaSyntax/test/value_parsing.jl +++ b/JuliaSyntax/test/value_parsing.jl @@ -66,7 +66,7 @@ octint(s) = parse_uint_literal(s, K"OctInt") @test hexint("0x10000000000000000") === UInt128(0x10000000000000000) @test hexint("0xffffffffffffffffffffffffffffffff") === UInt128(0xffffffffffffffffffffffffffffffff) @test (n = hexint("0x100000000000000000000000000000000"); - n isa BigInt && n == 0x100000000000000000000000000000000) + n isa BigInt && n == big"0x100000000000000000000000000000000") end @testset "HexInt string length limits for different types" begin @test hexint("0x00") === UInt8(0) @@ -94,7 +94,7 @@ octint(s) = parse_uint_literal(s, K"OctInt") @test binint("0b10000000000000000000000000000000000000000000000000000000000000000") === UInt128(0x10000000000000000) @test binint("0b11111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111") === UInt128(0xffffffffffffffffffffffffffffffff) @test (n = binint("0b100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000"); - n isa BigInt && n == 0x100000000000000000000000000000000) + n isa BigInt && n == big"0x100000000000000000000000000000000") end @testset "BinInt string length limits for different types" begin @test binint("0b00000000") === UInt8(0) @@ -122,7 +122,7 @@ octint(s) = parse_uint_literal(s, K"OctInt") @test octint("0o2000000000000000000000") === UInt128(0x10000000000000000) @test octint("0o3777777777777777777777777777777777777777777") === UInt128(0xffffffffffffffffffffffffffffffff) @test (n = octint("0o4000000000000000000000000000000000000000000"); - n isa BigInt && n == 0x100000000000000000000000000000000) + n isa BigInt && n == big"0x100000000000000000000000000000000") end @testset "OctInt string length limits for different types" begin @test octint("0o000") === UInt8(0) diff --git a/JuliaSyntax/tools/check_all_packages.jl b/JuliaSyntax/tools/check_all_packages.jl index 3ee160c089b11..ee487f48968ea 100644 --- a/JuliaSyntax/tools/check_all_packages.jl +++ b/JuliaSyntax/tools/check_all_packages.jl @@ -33,7 +33,7 @@ Logging.with_logger(logger) do @assert Meta.isexpr(e2, :toplevel) try e1 = JuliaSyntax.parseall(Expr, code, filename=fpath, ignore_warnings=true) - if !exprs_roughly_equal(e2, e1) + if !exprs_roughly_equal(e2, e1, strict_triple_strs=false) mismatch_count += 1 @error("Parsers succeed but disagree", fpath, From 505ccfbbeae2aef003eefbd65556c1e4abe7acd7 Mon Sep 17 00:00:00 2001 From: c42f Date: Mon, 16 Jan 2023 12:17:01 +1000 Subject: [PATCH 0566/1109] Test the test_utils from runtests.jl --- JuliaSyntax/test/runtests.jl | 19 ++++++++++++++++++- JuliaSyntax/test/test_utils.jl | 14 -------------- 2 files changed, 18 insertions(+), 15 deletions(-) diff --git a/JuliaSyntax/test/runtests.jl b/JuliaSyntax/test/runtests.jl index a6c091c74c6ee..418bb2400e2dd 100644 --- a/JuliaSyntax/test/runtests.jl +++ b/JuliaSyntax/test/runtests.jl @@ -7,11 +7,28 @@ using JuliaSyntax: GreenNode, SyntaxNode, flags, EMPTY_FLAGS, TRIVIA_FLAG, INFIX_FLAG, children, child, setchild!, SyntaxHead +include("test_utils.jl") +# Tests for the test_utils go here to allow the utils to be included on their +# own without invoking the tests. +@testset "Test tools" begin + @test exprs_roughly_equal(Expr(:global, :x, :y), + Expr(:global, Expr(:tuple, :x, :y))) + @test exprs_roughly_equal(Expr(:local, :x, :y), + Expr(:local, Expr(:tuple, :x, :y))) + @test exprs_roughly_equal(1.5, + Expr(:call, :*, 1.5, :f)) + @test exprs_roughly_equal(1.5, + Expr(:call, :*, 1.5, :f0)) + @test exprs_roughly_equal(Expr(:do, Expr(:macrocall, Symbol("@f"), LineNumberNode(1), Expr(:kw, :a, 1)), + Expr(:->, Expr(:tuple), Expr(:block, LineNumberNode(1)))), + Expr(:do, Expr(:macrocall, Symbol("@f"), LineNumberNode(1), Expr(:(=), :a, 1)), + Expr(:->, Expr(:tuple), Expr(:block, LineNumberNode(1))))) +end + @testset "Tokenize" begin include("tokenize.jl") end -include("test_utils.jl") include("parse_stream.jl") include("parser.jl") include("diagnostics.jl") diff --git a/JuliaSyntax/test/test_utils.jl b/JuliaSyntax/test/test_utils.jl index 512c549eaad0a..5e552cb9903db 100644 --- a/JuliaSyntax/test/test_utils.jl +++ b/JuliaSyntax/test/test_utils.jl @@ -389,17 +389,3 @@ function parse_sexpr(code) end -@testset "Test tools" begin - @test exprs_roughly_equal(Expr(:global, :x, :y), - Expr(:global, Expr(:tuple, :x, :y))) - @test exprs_roughly_equal(Expr(:local, :x, :y), - Expr(:local, Expr(:tuple, :x, :y))) - @test exprs_roughly_equal(1.5, - Expr(:call, :*, 1.5, :f)) - @test exprs_roughly_equal(1.5, - Expr(:call, :*, 1.5, :f0)) - @test exprs_roughly_equal(Expr(:do, Expr(:macrocall, Symbol("@f"), LineNumberNode(1), Expr(:kw, :a, 1)), - Expr(:->, Expr(:tuple), Expr(:block, LineNumberNode(1)))), - Expr(:do, Expr(:macrocall, Symbol("@f"), LineNumberNode(1), Expr(:(=), :a, 1)), - Expr(:->, Expr(:tuple), Expr(:block, LineNumberNode(1))))) -end From 27d2ac1ec89b52c30070a59c5447702774486b48 Mon Sep 17 00:00:00 2001 From: c42f Date: Mon, 16 Jan 2023 15:14:07 +1000 Subject: [PATCH 0567/1109] Minor clarifying rename --- JuliaSyntax/test/parser_api.jl | 13 ++++++------- JuliaSyntax/test/test_utils.jl | 1 + 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/JuliaSyntax/test/parser_api.jl b/JuliaSyntax/test/parser_api.jl index 131bb7fd933d0..b6c7752c72835 100644 --- a/JuliaSyntax/test/parser_api.jl +++ b/JuliaSyntax/test/parser_api.jl @@ -82,14 +82,13 @@ end @testset "error/warning handling" begin - # ignore_warnings - parse_sexpr(s;kws...) = sprint(show, MIME("text/x.sexpression"), parse(SyntaxNode, s; kws...)) - @test_throws JuliaSyntax.ParseError parse_sexpr("try finally catch ex end") - @test parse_sexpr("try finally catch ex end", ignore_warnings=true) == + parseshow(s;kws...) = sprint(show, MIME("text/x.sexpression"), parse(SyntaxNode, s; kws...)) + @test_throws JuliaSyntax.ParseError parseshow("try finally catch ex end") + @test parseshow("try finally catch ex end", ignore_warnings=true) == "(try_finally_catch (block) false false false (block) ex (block))" # ignore_errors - @test_throws JuliaSyntax.ParseError parse_sexpr("[a; b, c]") - @test_throws JuliaSyntax.ParseError parse_sexpr("[a; b, c]", ignore_warnings=true) - @test parse_sexpr("[a; b, c]", ignore_errors=true) == "(vcat a b (error-t) c)" + @test_throws JuliaSyntax.ParseError parseshow("[a; b, c]") + @test_throws JuliaSyntax.ParseError parseshow("[a; b, c]", ignore_warnings=true) + @test parseshow("[a; b, c]", ignore_errors=true) == "(vcat a b (error-t) c)" end end diff --git a/JuliaSyntax/test/test_utils.jl b/JuliaSyntax/test/test_utils.jl index 5e552cb9903db..b866644595a15 100644 --- a/JuliaSyntax/test/test_utils.jl +++ b/JuliaSyntax/test/test_utils.jl @@ -351,6 +351,7 @@ function show_green_tree(code; version::VersionNumber=v"1.6") sprint(show, MIME"text/plain"(), t, code) end + #------------------------------------------------------------------------------- # Parse s-expressions function parse_sexpr(code) From f8ded4a4b004d8885b3c8b51b0be1d955937a48a Mon Sep 17 00:00:00 2001 From: c42f Date: Fri, 20 Jan 2023 08:39:22 +1000 Subject: [PATCH 0568/1109] Minor fix to package check tool --- JuliaSyntax/tools/check_all_packages.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaSyntax/tools/check_all_packages.jl b/JuliaSyntax/tools/check_all_packages.jl index ee487f48968ea..3ee160c089b11 100644 --- a/JuliaSyntax/tools/check_all_packages.jl +++ b/JuliaSyntax/tools/check_all_packages.jl @@ -33,7 +33,7 @@ Logging.with_logger(logger) do @assert Meta.isexpr(e2, :toplevel) try e1 = JuliaSyntax.parseall(Expr, code, filename=fpath, ignore_warnings=true) - if !exprs_roughly_equal(e2, e1, strict_triple_strs=false) + if !exprs_roughly_equal(e2, e1) mismatch_count += 1 @error("Parsers succeed but disagree", fpath, From e9d5a5e981892672006d50e2dc2f5326d22745be Mon Sep 17 00:00:00 2001 From: c42f Date: Fri, 20 Jan 2023 08:48:07 +1000 Subject: [PATCH 0569/1109] Bump version to 0.3 --- JuliaSyntax/Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaSyntax/Project.toml b/JuliaSyntax/Project.toml index 2920fa397c0c1..2ee8dba954fce 100644 --- a/JuliaSyntax/Project.toml +++ b/JuliaSyntax/Project.toml @@ -1,7 +1,7 @@ name = "JuliaSyntax" uuid = "70703baa-626e-46a2-a12c-08ffd08c73b4" authors = ["Chris Foster and contributors"] -version = "0.2.0" +version = "0.3.0" [compat] julia = "1.0" From 5f0df653b3475714011aa71275a3a461486f2e78 Mon Sep 17 00:00:00 2001 From: c42f Date: Tue, 24 Jan 2023 14:56:23 +1000 Subject: [PATCH 0570/1109] Allow different parsings of `(a; b,)` to compare equal (JuliaLang/JuliaSyntax.jl#184) The reference parser sees `(a; b,)` as a block, but the trailing comma implies this should be a frakentuple. Allow this unusual syntax as a minor bug in the reference parser. --- JuliaSyntax/test/expr.jl | 2 -- JuliaSyntax/test/runtests.jl | 19 ++++++++++++++++- JuliaSyntax/test/test_utils.jl | 37 +++++++++++++++++++++++++--------- 3 files changed, 45 insertions(+), 13 deletions(-) diff --git a/JuliaSyntax/test/expr.jl b/JuliaSyntax/test/expr.jl index fe1adf78c61b7..11c5025a96a37 100644 --- a/JuliaSyntax/test/expr.jl +++ b/JuliaSyntax/test/expr.jl @@ -161,8 +161,6 @@ @test parse(Expr, "'a'") == 'a' @test parse(Expr, "'α'") == 'α' @test parse(Expr, "'\\xce\\xb1'") == 'α' - # FIXME - # @test_throws ParseError parse(Expr, "'abcde'") end @testset "do block conversion" begin diff --git a/JuliaSyntax/test/runtests.jl b/JuliaSyntax/test/runtests.jl index 418bb2400e2dd..020f33f293b9b 100644 --- a/JuliaSyntax/test/runtests.jl +++ b/JuliaSyntax/test/runtests.jl @@ -8,21 +8,38 @@ using JuliaSyntax: GreenNode, SyntaxNode, children, child, setchild!, SyntaxHead include("test_utils.jl") + # Tests for the test_utils go here to allow the utils to be included on their # own without invoking the tests. -@testset "Test tools" begin +@testset "Reference parser bugs" begin + # `global (x,y)` @test exprs_roughly_equal(Expr(:global, :x, :y), Expr(:global, Expr(:tuple, :x, :y))) @test exprs_roughly_equal(Expr(:local, :x, :y), Expr(:local, Expr(:tuple, :x, :y))) + # `0x1.8p0f` @test exprs_roughly_equal(1.5, Expr(:call, :*, 1.5, :f)) @test exprs_roughly_equal(1.5, Expr(:call, :*, 1.5, :f0)) + # `@f(a=1) do \n end` @test exprs_roughly_equal(Expr(:do, Expr(:macrocall, Symbol("@f"), LineNumberNode(1), Expr(:kw, :a, 1)), Expr(:->, Expr(:tuple), Expr(:block, LineNumberNode(1)))), Expr(:do, Expr(:macrocall, Symbol("@f"), LineNumberNode(1), Expr(:(=), :a, 1)), Expr(:->, Expr(:tuple), Expr(:block, LineNumberNode(1))))) + # `"""\n a\n \n b"""` + @test exprs_roughly_equal("a\n \nb", " a\n\n b") + @test !exprs_roughly_equal("a\n x\nb", " a\n x\n b") + @test exprs_roughly_equal("a\n x\nb", "a\n x\nb") + # `(a; b,)` + @test exprs_roughly_equal(Expr(:block, :a, LineNumberNode(1), :b), + Expr(:tuple, Expr(:parameters, :b), :a)) + @test !exprs_roughly_equal(Expr(:block, :a, LineNumberNode(1), :b), + Expr(:tuple, Expr(:parameters, :c), :a)) + @test !exprs_roughly_equal(Expr(:block, :a, LineNumberNode(1), :b), + Expr(:tuple, Expr(:parameters, :b), :c)) + @test !exprs_roughly_equal(Expr(:block, :a, LineNumberNode(1), :b, :c), + Expr(:tuple, Expr(:parameters, :b), :a)) end @testset "Tokenize" begin diff --git a/JuliaSyntax/test/test_utils.jl b/JuliaSyntax/test/test_utils.jl index b866644595a15..45058acdf1666 100644 --- a/JuliaSyntax/test/test_utils.jl +++ b/JuliaSyntax/test/test_utils.jl @@ -77,7 +77,7 @@ function kw_to_eq(ex) return Meta.isexpr(ex, :kw) ? Expr(:(=), ex.args...) : ex end -function triple_string_roughly_equal(str, fl_str) +function triple_string_roughly_equal(fl_str, str) # Allow some leeway for a bug in the reference parser with # triple quoted strings lines = split(str, '\n') @@ -85,8 +85,14 @@ function triple_string_roughly_equal(str, fl_str) if length(lines) != length(fl_lines) return false end - for (line1, line2) in zip(lines, fl_lines) - if !all(c in " \t" for c in line2) && !endswith(line1, line2) + has_whitespace_only_line = + any(!isempty(fl_line) && all(c in " \t" for c in fl_line) + for fl_line in fl_lines) + if !has_whitespace_only_line + return str == fl_str + end + for (line, fl_line) in zip(lines, fl_lines) + if !all(c in " \t" for c in fl_line) && !endswith(line, fl_line) return false end end @@ -107,22 +113,33 @@ function exprs_roughly_equal(fl_ex, ex) if fl_ex == ex return true else - return triple_string_roughly_equal(ex, fl_ex) + return triple_string_roughly_equal(fl_ex, ex) end else return fl_ex == ex end end + # Ignore differences in line number nodes within block-like constructs + fl_args = fl_ex.head in (:block, :quote, :toplevel) ? + filter(x->!(x isa LineNumberNode), fl_ex.args) : + fl_ex.args + args = ex.head in (:block, :quote, :toplevel) ? + filter(x->!(x isa LineNumberNode), ex.args) : + ex.args + if (fl_ex.head == :block && ex.head == :tuple && + length(fl_args) == 2 && length(args) == 2 && + Meta.isexpr(args[1], :parameters, 1) && + exprs_roughly_equal(fl_args[2], args[1].args[1]) && + exprs_roughly_equal(fl_args[1], args[2])) + # Allow `(a; b,)`: + # * Reference parser produces a block + # * New parser produces a frankentuple + return true + end if fl_ex.head != ex.head return false end h = ex.head - fl_args = fl_ex.args - args = ex.args - if ex.head in (:block, :quote, :toplevel) - fl_args = filter(x->!(x isa LineNumberNode), fl_args) - args = filter(x->!(x isa LineNumberNode), args) - end if (h == :global || h == :local) && length(args) == 1 && Meta.isexpr(args[1], :tuple) # Allow invalid syntax like `global (x, y)` args = args[1].args From 421bb276c7334c5c200503ae99bd723d02bf0183 Mon Sep 17 00:00:00 2001 From: c42f Date: Wed, 25 Jan 2023 18:04:48 +1000 Subject: [PATCH 0571/1109] Parser fuzz testing tools and fixes (JuliaLang/JuliaSyntax.jl#185) Some fuzz testing tooling to check that the parser doesn't crash on randomly generated source strings. Fix several problems found with this: * `?` shouldn't be special in `parse_unary`. This was inherited from the a syntax hack used to support the ancient and questionable `@windows?` and other platform test macros in osutils.jl. This is long since gone and we shouldn't continue supporting this. * `<:` may be unary so `<: <: x` should parse as `(<: (<: x))`, even though this is kind of nonsense semantically. * Constructing a SyntaxNode tree shouldn't fail when there's malformed literals but when we've parsed using `ignore_errors=true`. Instead we use ErrorVal() for the leaf values in that tree. * The tokenizer should not crash when overlong UTF-8 character literals are encountered. --- JuliaSyntax/src/parser.jl | 13 +++--- JuliaSyntax/src/syntax_tree.jl | 31 +++++++------- JuliaSyntax/src/tokenize.jl | 2 +- JuliaSyntax/src/tokenize_utils.jl | 12 +++--- JuliaSyntax/test/fuzz_test.jl | 71 +++++++++++++++++++++++++++++++ JuliaSyntax/test/parser.jl | 4 ++ JuliaSyntax/test/parser_api.jl | 6 +++ JuliaSyntax/test/tokenize.jl | 29 ++++++++----- 8 files changed, 130 insertions(+), 38 deletions(-) create mode 100644 JuliaSyntax/test/fuzz_test.jl diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index d7722b6ad652d..e2b28251690be 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -1025,9 +1025,10 @@ function parse_unary_subtype(ps::ParseState) else # <: x ==> (<:-pre x) # <: A where B ==> (<:-pre (where A B)) + # <: <: x ==> (<:-pre (<:-pre x)) mark = position(ps) bump(ps, TRIVIA_FLAG) - parse_where(ps, parse_juxtapose) + parse_unary_subtype(ps) # Flisp parser handled this, but I don't know how it can happen... @check peek_behind(ps).kind != K"tuple" emit(ps, mark, kind(t), PREFIX_OP_FLAG) @@ -1162,9 +1163,7 @@ function parse_unary(ps::ParseState) if ( !is_operator(op_k) || is_word_operator(op_k) || - # TODO(jb): `?` should probably not be listed here - # except for the syntax hack in osutils.jl - (op_k in KSet": ' .' ?") || + (op_k in KSet": ' .'") || (is_syntactic_unary_op(op_k) && !is_dotted(op_t)) || is_syntactic_operator(op_k) ) @@ -1693,8 +1692,10 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) emit(ps, mark, K"curly") end elseif k in KSet" \" \"\"\" ` ``` " && - !preceding_whitespace(t) && - maybe_strmac && peek_behind(ps, macro_name_position).kind == K"Identifier" + !preceding_whitespace(t) && maybe_strmac && + (# Must mirror the logic in lex_quote() for consistency + origk = peek_behind(ps, macro_name_position).orig_kind; + origk == K"Identifier" || is_contextual_keyword(origk) || is_word_operator(origk)) # Custom string and command literals # x"str" ==> (macrocall @x_str (string-r "str")) # x`str` ==> (macrocall @x_cmd (cmdstring-r "str")) diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index 293403e25bd9e..733dd7506471f 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -29,18 +29,19 @@ function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, position::In val_str = view(source, val_range) # Here we parse the values eagerly rather than representing them as # strings. Maybe this is good. Maybe not. + # + # Any errors parsing literals are represented as ErrorVal() - this can + # happen when the user sets `ignore_errors=true` during parsing. val = if k == K"Integer" parse_int_literal(val_str) elseif k == K"Float" v, code = parse_float_literal(Float64, source.code, position, position+span(raw)) - @check code == :ok || code == :underflow - v + (code == :ok || code == :underflow) ? v : ErrorVal() elseif k == K"Float32" v, code = parse_float_literal(Float32, source.code, position, position+span(raw)) - @check code == :ok || code == :underflow - v + (code == :ok || code == :underflow) ? v : ErrorVal() elseif k in KSet"BinInt OctInt HexInt" parse_uint_literal(val_str, k) elseif k == K"true" @@ -49,14 +50,15 @@ function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, position::In false elseif k == K"Char" io = IOBuffer() - ds = Diagnostic[] had_error = unescape_julia_string(io, source.code, position, - position+span(raw), ds) - @check !had_error && isempty(ds) - seek(io, 0) - c = read(io, Char) - @check eof(io) - c + position+span(raw), Diagnostic[]) + if had_error + ErrorVal() + else + seek(io, 0) + c = read(io, Char) + eof(io) ? c : ErrorVal() + end elseif k == K"Identifier" if has_flags(head(raw), RAW_STRING_FLAG) io = IOBuffer() @@ -70,15 +72,14 @@ function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, position::In Symbol(val_str) elseif k in KSet"String CmdString" io = IOBuffer() + had_error = false if has_flags(head(raw), RAW_STRING_FLAG) unescape_raw_string(io, val_str, k == K"CmdString") else - ds = Diagnostic[] had_error = unescape_julia_string(io, source.code, position, - position+span(raw), ds) - @check !had_error && isempty(ds) + position+span(raw), Diagnostic[]) end - String(take!(io)) + had_error ? ErrorVal() : String(take!(io)) elseif is_operator(k) isempty(val_range) ? Symbol(untokenize(k)) : # synthetic invisible tokens diff --git a/JuliaSyntax/src/tokenize.jl b/JuliaSyntax/src/tokenize.jl index 974062afad5cc..70163010229ca 100644 --- a/JuliaSyntax/src/tokenize.jl +++ b/JuliaSyntax/src/tokenize.jl @@ -48,7 +48,7 @@ end @inline ishex(c::Char) = isdigit(c) || ('a' <= c <= 'f') || ('A' <= c <= 'F') @inline isbinary(c::Char) = c == '0' || c == '1' @inline isoctal(c::Char) = '0' ≤ c ≤ '7' -@inline iswhitespace(c::Char) = Base.isspace(c) || c === '\ufeff' +@inline iswhitespace(c::Char) = (Base.isvalid(c) && Base.isspace(c)) || c === '\ufeff' struct StringState triplestr::Bool diff --git a/JuliaSyntax/src/tokenize_utils.jl b/JuliaSyntax/src/tokenize_utils.jl index 6da41683a271a..1767cc522b5db 100644 --- a/JuliaSyntax/src/tokenize_utils.jl +++ b/JuliaSyntax/src/tokenize_utils.jl @@ -4,19 +4,19 @@ const EOF_CHAR = typemax(Char) function is_identifier_char(c::Char) c == EOF_CHAR && return false - Base.ismalformed(c) && return false + Base.isvalid(c) || return false return Base.is_id_char(c) end function is_identifier_start_char(c::Char) c == EOF_CHAR && return false - Base.ismalformed(c) && return false + Base.isvalid(c) || return false return Base.is_id_start_char(c) end # Chars that we will never allow to be part of a valid non-operator identifier function is_never_id_char(ch::Char) - Base.ismalformed(ch) && return true + Base.isvalid(ch) || return true cat = Unicode.category_code(ch) c = UInt32(ch) return ( @@ -50,7 +50,7 @@ readchar(io::IO) = eof(io) ? EOF_CHAR : read(io, Char) # `a .(op) b` or `.(op)a` and where `length(string(op)) == 1` @inline function dotop1(c1::Char) c1 == EOF_CHAR && return false - Base.ismalformed(c1) && return false + Base.isvalid(c1) || return false c = UInt32(c1) c == 0x00000021 || c == 0x000000a6 || @@ -173,7 +173,7 @@ end @inline function isopsuffix(c1::Char) c1 == EOF_CHAR && return false - Base.ismalformed(c1) && return false + Base.isvalid(c1) || return false c = UInt32(c1) if (c < 0xa1 || c > 0x10ffff) return false @@ -252,7 +252,7 @@ end function is_operator_start_char(c::Char) c == EOF_CHAR && return false - Base.ismalformed(c) && return false + Base.isvalid(c) || return false is_operator_start_char(UInt32(c)) end is_operator_start_char(u::UInt32) = u == 0x00000021 || (u == 0x00000024 || (u == 0x00000025 || (u == 0x00000026 || (u == 0x00000027 || (u == 0x0000002a || (u == 0x0000002b || (u == 0x0000002d || (u == 0x0000002e || (u == 0x0000002f || (u == 0x0000003a || (u == 0x0000003c || (u == 0x0000003d || (u == 0x0000003e || (u == 0x0000003f || (u == 0x0000005c || (u == 0x0000005e || (u == 0x00000069 || (u == 0x00000077 || (u == 0x0000007c || (u == 0x0000007e || (u == 0x000000ac || (u == 0x000000b1 || (u == 0x000000d7 || (u == 0x000000f7 || (u == 0x00002026 || (u == 0x0000205d || (u == 0x0000214b || (u == 0x00002190 || (u == 0x00002191 || (u == 0x00002192 || (u == 0x00002193 || (u == 0x00002194 || (u == 0x0000219a || (u == 0x0000219b || (u == 0x000021a0 || (u == 0x000021a3 || (u == 0x000021a6 || (u == 0x000021ae || (u == 0x000021ce || (u == 0x000021cf || (u == 0x000021d2 || (u == 0x000021d4 || (u == 0x000021f4 || (u == 0x000021f5 || (u == 0x000021f6 || (u == 0x000021f7 || (u == 0x000021f8 || (u == 0x000021f9 || (u == 0x000021fa || (u == 0x000021fb || (u == 0x000021fc || (u == 0x000021fd || (u == 0x000021fe || (u == 0x000021ff || (u == 0x00002208 || (u == 0x00002209 || (u == 0x0000220a || (u == 0x0000220b || (u == 0x0000220c || (u == 0x0000220d || (u == 0x00002213 || (u == 0x00002214 || (u == 0x00002217 || (u == 0x00002218 || (u == 0x00002219 || (u == 0x0000221a || (u == 0x0000221b || (u == 0x0000221c || (u == 0x0000221d || (u == 0x00002224 || (u == 0x00002225 || (u == 0x00002226 || (u == 0x00002227 || (u == 0x00002228 || (u == 0x00002229 || (u == 0x0000222a || (u == 0x00002237 || (u == 0x00002238 || (u == 0x0000223a || (u == 0x0000223b || (u == 0x0000223d || (u == 0x0000223e || (u == 0x00002240 || (u == 0x00002241 || (u == 0x00002242 || (u == 0x00002243 || (u == 0x00002244 || (u == 0x00002245 || (u == 0x00002246 || (u == 0x00002247 || (u == 0x00002248 || (u == 0x00002249 || (u == 0x0000224a || (u == 0x0000224b || (u == 0x0000224c || (u == 0x0000224d || (u == 0x0000224e || (u == 0x0000224f || (u == 0x00002250 || (u == 0x00002251 || (u == 0x00002252 || (u == 0x00002253 || (u == 0x00002254 || (u == 0x00002255 || (u == 0x00002256 || (u == 0x00002257 || (u == 0x00002258 || (u == 0x00002259 || (u == 0x0000225a || (u == 0x0000225b || (u == 0x0000225c || (u == 0x0000225d || (u == 0x0000225e || (u == 0x0000225f || (u == 0x00002260 || (u == 0x00002261 || (u == 0x00002262 || (u == 0x00002263 || (u == 0x00002264 || (u == 0x00002265 || (u == 0x00002266 || (u == 0x00002267 || (u == 0x00002268 || (u == 0x00002269 || (u == 0x0000226a || (u == 0x0000226b || (u == 0x0000226c || (u == 0x0000226d || (u == 0x0000226e || (u == 0x0000226f || (u == 0x00002270 || (u == 0x00002271 || (u == 0x00002272 || (u == 0x00002273 || (u == 0x00002274 || (u == 0x00002275 || (u == 0x00002276 || (u == 0x00002277 || (u == 0x00002278 || (u == 0x00002279 || (u == 0x0000227a || (u == 0x0000227b || (u == 0x0000227c || (u == 0x0000227d || (u == 0x0000227e || (u == 0x0000227f || (u == 0x00002280 || (u == 0x00002281 || (u == 0x00002282 || (u == 0x00002283 || (u == 0x00002284 || (u == 0x00002285 || (u == 0x00002286 || (u == 0x00002287 || (u == 0x00002288 || (u == 0x00002289 || (u == 0x0000228a || (u == 0x0000228b || (u == 0x0000228d || (u == 0x0000228e || (u == 0x0000228f || (u == 0x00002290 || (u == 0x00002291 || (u == 0x00002292 || (u == 0x00002293 || (u == 0x00002294 || (u == 0x00002295 || (u == 0x00002296 || (u == 0x00002297 || (u == 0x00002298 || (u == 0x00002299 || (u == 0x0000229a || (u == 0x0000229b || (u == 0x0000229c || (u == 0x0000229e || (u == 0x0000229f || (u == 0x000022a0 || (u == 0x000022a1 || (u == 0x000022a2 || (u == 0x000022a3 || (u == 0x000022a9 || (u == 0x000022ac || (u == 0x000022ae || (u == 0x000022b0 || (u == 0x000022b1 || (u == 0x000022b2 || (u == 0x000022b3 || (u == 0x000022b4 || (u == 0x000022b5 || (u == 0x000022b6 || (u == 0x000022b7 || (u == 0x000022bb || (u == 0x000022bc || (u == 0x000022bd || (u == 0x000022c4 || (u == 0x000022c5 || (u == 0x000022c6 || (u == 0x000022c7 || (u == 0x000022c9 || (u == 0x000022ca || (u == 0x000022cb || (u == 0x000022cc || (u == 0x000022cd || (u == 0x000022ce || (u == 0x000022cf || (u == 0x000022d0 || (u == 0x000022d1 || (u == 0x000022d2 || (u == 0x000022d3 || (u == 0x000022d5 || (u == 0x000022d6 || (u == 0x000022d7 || (u == 0x000022d8 || (u == 0x000022d9 || (u == 0x000022da || (u == 0x000022db || (u == 0x000022dc || (u == 0x000022dd || (u == 0x000022de || (u == 0x000022df || (u == 0x000022e0 || (u == 0x000022e1 || (u == 0x000022e2 || (u == 0x000022e3 || (u == 0x000022e4 || (u == 0x000022e5 || (u == 0x000022e6 || (u == 0x000022e7 || (u == 0x000022e8 || (u == 0x000022e9 || (u == 0x000022ea || (u == 0x000022eb || (u == 0x000022ec || (u == 0x000022ed || (u == 0x000022ee || (u == 0x000022ef || (u == 0x000022f0 || (u == 0x000022f1 || (u == 0x000022f2 || (u == 0x000022f3 || (u == 0x000022f4 || (u == 0x000022f5 || (u == 0x000022f6 || (u == 0x000022f7 || (u == 0x000022f8 || (u == 0x000022f9 || (u == 0x000022fa || (u == 0x000022fb || (u == 0x000022fc || (u == 0x000022fd || (u == 0x000022fe || (u == 0x000022ff || (u == 0x000025b7 || (u == 0x000027c2 || (u == 0x000027c8 || (u == 0x000027c9 || (u == 0x000027d1 || (u == 0x000027d2 || (u == 0x000027d5 || (u == 0x000027d6 || (u == 0x000027d7 || (u == 0x000027f0 || (u == 0x000027f1 || (u == 0x000027f5 || (u == 0x000027f6 || (u == 0x000027f7 || (u == 0x000027f9 || (u == 0x000027fa || (u == 0x000027fb || (u == 0x000027fc || (u == 0x000027fd || (u == 0x000027fe || (u == 0x000027ff || (u == 0x00002900 || (u == 0x00002901 || (u == 0x00002902 || (u == 0x00002903 || (u == 0x00002904 || (u == 0x00002905 || (u == 0x00002906 || (u == 0x00002907 || (u == 0x00002908 || (u == 0x00002909 || (u == 0x0000290a || (u == 0x0000290b || (u == 0x0000290c || (u == 0x0000290d || (u == 0x0000290e || (u == 0x0000290f || (u == 0x00002910 || (u == 0x00002911 || (u == 0x00002912 || (u == 0x00002913 || (u == 0x00002914 || (u == 0x00002915 || (u == 0x00002916 || (u == 0x00002917 || (u == 0x00002918 || (u == 0x0000291d || (u == 0x0000291e || (u == 0x0000291f || (u == 0x00002920 || (u == 0x00002944 || (u == 0x00002945 || (u == 0x00002946 || (u == 0x00002947 || (u == 0x00002948 || (u == 0x00002949 || (u == 0x0000294a || (u == 0x0000294b || (u == 0x0000294c || (u == 0x0000294d || (u == 0x0000294e || (u == 0x0000294f || (u == 0x00002950 || (u == 0x00002951 || (u == 0x00002952 || (u == 0x00002953 || (u == 0x00002954 || (u == 0x00002955 || (u == 0x00002956 || (u == 0x00002957 || (u == 0x00002958 || (u == 0x00002959 || (u == 0x0000295a || (u == 0x0000295b || (u == 0x0000295c || (u == 0x0000295d || (u == 0x0000295e || (u == 0x0000295f || (u == 0x00002960 || (u == 0x00002961 || (u == 0x00002962 || (u == 0x00002963 || (u == 0x00002964 || (u == 0x00002965 || (u == 0x00002966 || (u == 0x00002967 || (u == 0x00002968 || (u == 0x00002969 || (u == 0x0000296a || (u == 0x0000296b || (u == 0x0000296c || (u == 0x0000296d || (u == 0x0000296e || (u == 0x0000296f || (u == 0x00002970 || (u == 0x000029b7 || (u == 0x000029b8 || (u == 0x000029bc || (u == 0x000029be || (u == 0x000029bf || (u == 0x000029c0 || (u == 0x000029c1 || (u == 0x000029e1 || (u == 0x000029e3 || (u == 0x000029e4 || (u == 0x000029e5 || (u == 0x000029f4 || (u == 0x000029f6 || (u == 0x000029f7 || (u == 0x000029fa || (u == 0x000029fb || (u == 0x00002a07 || (u == 0x00002a08 || (u == 0x00002a1d || (u == 0x00002a22 || (u == 0x00002a23 || (u == 0x00002a24 || (u == 0x00002a25 || (u == 0x00002a26 || (u == 0x00002a27 || (u == 0x00002a28 || (u == 0x00002a29 || (u == 0x00002a2a || (u == 0x00002a2b || (u == 0x00002a2c || (u == 0x00002a2d || (u == 0x00002a2e || (u == 0x00002a30 || (u == 0x00002a31 || (u == 0x00002a32 || (u == 0x00002a33 || (u == 0x00002a34 || (u == 0x00002a35 || (u == 0x00002a36 || (u == 0x00002a37 || (u == 0x00002a38 || (u == 0x00002a39 || (u == 0x00002a3a || (u == 0x00002a3b || (u == 0x00002a3c || (u == 0x00002a3d || (u == 0x00002a40 || (u == 0x00002a41 || (u == 0x00002a42 || (u == 0x00002a43 || (u == 0x00002a44 || (u == 0x00002a45 || (u == 0x00002a4a || (u == 0x00002a4b || (u == 0x00002a4c || (u == 0x00002a4d || (u == 0x00002a4e || (u == 0x00002a4f || (u == 0x00002a50 || (u == 0x00002a51 || (u == 0x00002a52 || (u == 0x00002a53 || (u == 0x00002a54 || (u == 0x00002a55 || (u == 0x00002a56 || (u == 0x00002a57 || (u == 0x00002a58 || (u == 0x00002a5a || (u == 0x00002a5b || (u == 0x00002a5c || (u == 0x00002a5d || (u == 0x00002a5e || (u == 0x00002a5f || (u == 0x00002a60 || (u == 0x00002a61 || (u == 0x00002a62 || (u == 0x00002a63 || (u == 0x00002a66 || (u == 0x00002a67 || (u == 0x00002a6a || (u == 0x00002a6b || (u == 0x00002a6c || (u == 0x00002a6d || (u == 0x00002a6e || (u == 0x00002a6f || (u == 0x00002a70 || (u == 0x00002a71 || (u == 0x00002a72 || (u == 0x00002a73 || (u == 0x00002a74 || (u == 0x00002a75 || (u == 0x00002a76 || (u == 0x00002a77 || (u == 0x00002a78 || (u == 0x00002a79 || (u == 0x00002a7a || (u == 0x00002a7b || (u == 0x00002a7c || (u == 0x00002a7d || (u == 0x00002a7e || (u == 0x00002a7f || (u == 0x00002a80 || (u == 0x00002a81 || (u == 0x00002a82 || (u == 0x00002a83 || (u == 0x00002a84 || (u == 0x00002a85 || (u == 0x00002a86 || (u == 0x00002a87 || (u == 0x00002a88 || (u == 0x00002a89 || (u == 0x00002a8a || (u == 0x00002a8b || (u == 0x00002a8c || (u == 0x00002a8d || (u == 0x00002a8e || (u == 0x00002a8f || (u == 0x00002a90 || (u == 0x00002a91 || (u == 0x00002a92 || (u == 0x00002a93 || (u == 0x00002a94 || (u == 0x00002a95 || (u == 0x00002a96 || (u == 0x00002a97 || (u == 0x00002a98 || (u == 0x00002a99 || (u == 0x00002a9a || (u == 0x00002a9b || (u == 0x00002a9c || (u == 0x00002a9d || (u == 0x00002a9e || (u == 0x00002a9f || (u == 0x00002aa0 || (u == 0x00002aa1 || (u == 0x00002aa2 || (u == 0x00002aa3 || (u == 0x00002aa4 || (u == 0x00002aa5 || (u == 0x00002aa6 || (u == 0x00002aa7 || (u == 0x00002aa8 || (u == 0x00002aa9 || (u == 0x00002aaa || (u == 0x00002aab || (u == 0x00002aac || (u == 0x00002aad || (u == 0x00002aae || (u == 0x00002aaf || (u == 0x00002ab0 || (u == 0x00002ab1 || (u == 0x00002ab2 || (u == 0x00002ab3 || (u == 0x00002ab4 || (u == 0x00002ab5 || (u == 0x00002ab6 || (u == 0x00002ab7 || (u == 0x00002ab8 || (u == 0x00002ab9 || (u == 0x00002aba || (u == 0x00002abb || (u == 0x00002abc || (u == 0x00002abd || (u == 0x00002abe || (u == 0x00002abf || (u == 0x00002ac0 || (u == 0x00002ac1 || (u == 0x00002ac2 || (u == 0x00002ac3 || (u == 0x00002ac4 || (u == 0x00002ac5 || (u == 0x00002ac6 || (u == 0x00002ac7 || (u == 0x00002ac8 || (u == 0x00002ac9 || (u == 0x00002aca || (u == 0x00002acb || (u == 0x00002acc || (u == 0x00002acd || (u == 0x00002ace || (u == 0x00002acf || (u == 0x00002ad0 || (u == 0x00002ad1 || (u == 0x00002ad2 || (u == 0x00002ad3 || (u == 0x00002ad4 || (u == 0x00002ad5 || (u == 0x00002ad6 || (u == 0x00002ad7 || (u == 0x00002ad8 || (u == 0x00002ad9 || (u == 0x00002adb || (u == 0x00002af7 || (u == 0x00002af8 || (u == 0x00002af9 || (u == 0x00002afa || (u == 0x00002b30 || (u == 0x00002b31 || (u == 0x00002b32 || (u == 0x00002b33 || (u == 0x00002b34 || (u == 0x00002b35 || (u == 0x00002b36 || (u == 0x00002b37 || (u == 0x00002b38 || (u == 0x00002b39 || (u == 0x00002b3a || (u == 0x00002b3b || (u == 0x00002b3c || (u == 0x00002b3d || (u == 0x00002b3e || (u == 0x00002b3f || (u == 0x00002b40 || (u == 0x00002b41 || (u == 0x00002b42 || (u == 0x00002b43 || (u == 0x00002b44 || (u == 0x00002b47 || (u == 0x00002b48 || (u == 0x00002b49 || (u == 0x00002b4a || (u == 0x00002b4b || (u == 0x00002b4c || (u == 0x0000ffe9 || (u == 0x0000ffea || (u == 0x0000ffeb || u == 0x0000ffec))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))) diff --git a/JuliaSyntax/test/fuzz_test.jl b/JuliaSyntax/test/fuzz_test.jl new file mode 100644 index 0000000000000..49dc11a23fa4d --- /dev/null +++ b/JuliaSyntax/test/fuzz_test.jl @@ -0,0 +1,71 @@ +using JuliaSyntax + +# Parser fuzz testing tools. + +function parser_exception(str) + try + JuliaSyntax.parseall(JuliaSyntax.SyntaxNode, str, ignore_errors=true) + false + catch + true + end +end + +""" +Reduce test case via combination of bisection and random deletion. + +This is suited to randomly generated strings. It might work with more code-like +strings too? +""" +function rand_reduce(str) + while true + if length(str) <= 1 + return str + end + m1 = thisind(str, length(str)÷2) + m2 = nextind(str, m1) + if parser_exception(str[1:m1]) + str = str[1:m1] + elseif parser_exception(str[m2:end]) + str = str[m2:end] + else + chunklen = 10 + reduced = false + if length(str) > chunklen + for i = 1:100 + m = thisind(str, rand(1:length(str)-chunklen)) + s = str[1:m]*str[prevind(str, m+chunklen):end] + if parser_exception(s) + str = s + reduced = true + break + end + end + end + if !reduced + return str + end + end + end +end + +# The parser should never throw an exception. To test whether this is true, +# try passing randomly generated bad input data into it. +function fuzz_test(gen_bad_input, N) + for i=1:N + str = gen_bad_input() + try + JuliaSyntax.parseall(JuliaSyntax.SyntaxNode, str, ignore_errors=true); + catch + @error "Parser threw exception" exception=current_exceptions() + return str + end + end + return nothing +end + +function fuzz_binary(N) + fuzz_test(N) do + String(rand(UInt8, 1_000_000)) + end +end diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index dbf0ae0f510fb..a49e85c7e64d4 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -238,6 +238,7 @@ tests = [ "/x" => "(call-pre (error /) x)" "+₁ x" => "(call-pre (error +₁) x)" ".<: x" => "(dotcall-pre (error .<:) x)" + "?\"str\"" => """(call-pre (error ?) (string "str"))""" ], JuliaSyntax.parse_factor => [ "x^y" => "(call-i x ^ y)" @@ -257,6 +258,7 @@ tests = [ "<:{T}(x::T)" => "(call (curly <: T) (::-i x T))" "<:(x::T)" => "(<:-pre (::-i x T))" "<: x" => "(<:-pre x)" + "<: <: x" => "(<:-pre (<:-pre x))" "<: A where B" => "(<:-pre (where A B))" # Really for parse_where "x where \n {T}" => "(where x T)" @@ -388,6 +390,8 @@ tests = [ "x`str`" => """(macrocall @x_cmd (cmdstring-r "str"))""" "x\"\"" => """(macrocall @x_str (string-r ""))""" "x``" => """(macrocall @x_cmd (cmdstring-r ""))""" + "in\"str\"" => """(macrocall @in_str (string-r "str"))""" + "outer\"str\"" => """(macrocall @outer_str (string-r "str"))""" # Triple quoted procesing for custom strings "r\"\"\"\nx\"\"\"" => raw"""(macrocall @r_str (string-sr "x"))""" "r\"\"\"\n x\n y\"\"\"" => raw"""(macrocall @r_str (string-sr "x\n" "y"))""" diff --git a/JuliaSyntax/test/parser_api.jl b/JuliaSyntax/test/parser_api.jl index b6c7752c72835..e6f8d26c8a390 100644 --- a/JuliaSyntax/test/parser_api.jl +++ b/JuliaSyntax/test/parser_api.jl @@ -90,5 +90,11 @@ @test_throws JuliaSyntax.ParseError parseshow("[a; b, c]") @test_throws JuliaSyntax.ParseError parseshow("[a; b, c]", ignore_warnings=true) @test parseshow("[a; b, c]", ignore_errors=true) == "(vcat a b (error-t) c)" + # errors in literals + @test parseshow("\"\\z\"", ignore_errors=true) == "(string (ErrorInvalidEscapeSequence))" + @test parseshow("'\\z'", ignore_errors=true) == "(char (ErrorInvalidEscapeSequence))" + @test parseshow("'abc'", ignore_errors=true) == "(char (ErrorOverLongCharacter))" + @test parseshow("1e1000", ignore_errors=true) == "(ErrorNumericOverflow)" + @test parseshow("1f1000", ignore_errors=true) == "(ErrorNumericOverflow)" end end diff --git a/JuliaSyntax/test/tokenize.jl b/JuliaSyntax/test/tokenize.jl index 4ea295c6713e5..528ef7c1bbe09 100644 --- a/JuliaSyntax/test/tokenize.jl +++ b/JuliaSyntax/test/tokenize.jl @@ -921,6 +921,25 @@ end @test (t = last(collect(tokenize("+*"))); (t.startbyte, t.endbyte)) == (2,1) end +@testset "invalid UTF-8 characters" begin + bad_chars = [ + first("\xe2") # malformed + first("\xc0\x9b") # overlong + first("\xf0\x83\x99\xae") # overlong + ] + + @testset "bad char $(repr(c))" for c in bad_chars + @test Tokenize.is_identifier_char(c) == false + @test Tokenize.is_identifier_start_char(c) == false + @test Tokenize.is_never_id_char(c) == true + @test Tokenize.dotop1(c) == false + @test Tokenize.isopsuffix(c) == false + @test Tokenize.is_operator_start_char(c) == false + @test Tokenize.iswhitespace(c) == false + @test Tokenize.ishex(c) == false + end +end + @testset "dotop miscellanea" begin @test strtok("a .-> b") == ["a", " ", ".-", ">", " ", "b", ""] @test strtok(".>: b") == [".>:", " ", "b", ""] @@ -933,14 +952,4 @@ end @test strtok("a .&&₁ b") == ["a", " ", ".&&", "₁", " ", "b", ""] end -@testset "malformed strings" begin - malformed = first("\xe2") - @test Tokenize.is_identifier_char(malformed) == false - @test Tokenize.is_identifier_start_char(malformed) == false - @test Tokenize.is_never_id_char(malformed) == true - @test Tokenize.dotop1(malformed) == false - @test Tokenize.isopsuffix(malformed) == false - @test Tokenize.is_operator_start_char(malformed) == false -end - end From b444e05220737c132f0e75af30babfaba868bb85 Mon Sep 17 00:00:00 2001 From: c42f Date: Wed, 1 Feb 2023 11:18:21 +1000 Subject: [PATCH 0572/1109] A few fixes for ranges of diagnostic messages (JuliaLang/JuliaSyntax.jl#187) --- JuliaSyntax/src/parse_stream.jl | 11 ++++++---- JuliaSyntax/src/parser.jl | 37 ++++++++++++++++++++------------- 2 files changed, 29 insertions(+), 19 deletions(-) diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index a276460f729bd..2189da2314b75 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -778,10 +778,17 @@ function steal_token_bytes!(stream::ParseStream, pos::ParseStreamPosition, numby return t2_is_empty end +# Get position of last item emitted into the output stream function Base.position(stream::ParseStream) ParseStreamPosition(lastindex(stream.tokens), lastindex(stream.ranges)) end +# Get position of next item to be emitted into the output stream +# TODO: Figure out how to remove this? It's only used with emit_diagnostic +function next_position(stream::ParseStream) + ParseStreamPosition(lastindex(stream.tokens)+1, lastindex(stream.ranges)+1) +end + """ emit(stream, mark, kind, flags = EMPTY_FLAGS; error=nothing) @@ -837,10 +844,6 @@ function emit_diagnostic(stream::ParseStream, mark::ParseStreamPosition; kws...) _next_byte(stream) - 1; kws...) end -function emit_diagnostic(stream::ParseStream, r::NTuple{2,ParseStreamPosition}; kws...) - emit_diagnostic(stream, first(r), last(r); kws...) -end - function emit_diagnostic(stream::ParseStream, mark::ParseStreamPosition, end_mark::ParseStreamPosition; kws...) fbyte = token_first_byte(stream, mark.token_index) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index e2b28251690be..11aa94538b4df 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -113,6 +113,10 @@ function Base.position(ps::ParseState, args...) position(ps.stream, args...) end +function next_position(ps::ParseState, args...) + next_position(ps.stream, args...) +end + function emit(ps::ParseState, args...; kws...) emit(ps.stream, args...; kws...) end @@ -1232,11 +1236,15 @@ function parse_unary(ps::ParseState) # last case wrong) op_pos = bump_dotsplit(ps, emit_dot_node=true) - # Setup possible whitespace error between operator and ( - ws_mark = position(ps) - bump_trivia(ps) - ws_mark_end = position(ps) - ws_error_pos = emit(ps, ws_mark, K"TOMBSTONE") + space_before_paren = preceding_whitespace(t2) + if space_before_paren + # Setup possible whitespace error between operator and ( + ws_node_mark = position(ps) + ws_mark = next_position(ps) + bump_trivia(ps) + ws_error_pos = emit(ps, ws_node_mark, K"TOMBSTONE") + ws_mark_end = next_position(ps) + end mark_before_paren = position(ps) bump(ps, TRIVIA_FLAG) # ( @@ -1251,7 +1259,7 @@ function parse_unary(ps::ParseState) # The precedence between unary + and any following infix ^ depends on # whether the parens are a function call or not if opts.is_paren_call - if preceding_whitespace(t2) + if space_before_paren # Whitespace not allowed before prefix function call bracket # + (a,b) ==> (call + (error) a b) reset_node!(ps, ws_error_pos, kind=K"error") @@ -1592,7 +1600,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) # A.B.@x ==> (macrocall (. (. A (quote B)) (quote @x))) # @A.B.x ==> (macrocall (. (. A (quote B)) (quote @x))) # A.@B.x ==> (macrocall (. (. A (error-t) B) (quote @x))) - emit_diagnostic(ps, macro_atname_range, + emit_diagnostic(ps, macro_atname_range..., error="`@` must appear on first or last macro name component") bump(ps, TRIVIA_FLAG, error="Unexpected `.` after macro name") else @@ -1646,7 +1654,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) end parse_macro_name(ps) macro_name_position = position(ps) - macro_atname_range = (m, macro_name_position) + macro_atname_range = (m, next_position(ps)) emit(ps, m, K"quote") emit(ps, mark, K".") elseif k == K"'" @@ -2484,10 +2492,12 @@ function parse_import_path(ps::ParseState) # import . .A ==> (import (. . . A)) first_dot = true while true - m = position(ps) - bump_trivia(ps) - m2 = position(ps) - k = peek(ps) + t = peek_token(ps) + k = kind(t) + if !first_dot && preceding_whitespace(t) + emit_diagnostic(ps, whitespace=true, + warning="space between dots in import path") + end if k == K"." bump(ps) elseif k == K".." @@ -2497,9 +2507,6 @@ function parse_import_path(ps::ParseState) else break end - if !first_dot && m != m2 - emit_diagnostic(ps, m, m2, warning="space between dots in import path") - end first_dot = false end if is_dotted(peek_token(ps)) From 65c2166663d69074cceef71a4d3f688f4c5abaf1 Mon Sep 17 00:00:00 2001 From: c42f Date: Mon, 6 Feb 2023 14:39:14 +1000 Subject: [PATCH 0573/1109] More fuzz testing tools and fixes (JuliaLang/JuliaSyntax.jl#188) * Line-deletion based fuzzer * Token-deletion based fuzzer * Fixes for two crashing bugs found this way - `"(y::\nif x z end)"` - `"@(x y)"` --- JuliaSyntax/src/parser.jl | 3 +- JuliaSyntax/test/fuzz_test.jl | 1034 ++++++++++++++++++++++++++++++++- JuliaSyntax/test/parser.jl | 97 ++-- 3 files changed, 1074 insertions(+), 60 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 11aa94538b4df..26e6c51773364 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -1778,6 +1778,7 @@ function parse_resword(ps::ParseState) # In normal_context # begin f() where T = x end ==> (block (= (where (call f) T) x)) ps = normal_context(ps) + bump_trivia(ps) mark = position(ps) word = peek(ps) if word in KSet"begin quote" @@ -2335,7 +2336,7 @@ function fix_macro_name_kind!(ps::ParseState, macro_name_position, name_kind=not # TODO: Clean this up when K"parens" is implemented while true macro_name_position = ParseStreamPosition(macro_name_position.token_index-1, - macro_name_position.range_index) + macro_name_position.range_index-1) b = peek_behind(ps, macro_name_position) k = b.kind if !has_flags(b.flags, TRIVIA_FLAG) diff --git a/JuliaSyntax/test/fuzz_test.jl b/JuliaSyntax/test/fuzz_test.jl index 49dc11a23fa4d..c1653c9ebabf2 100644 --- a/JuliaSyntax/test/fuzz_test.jl +++ b/JuliaSyntax/test/fuzz_test.jl @@ -2,7 +2,919 @@ using JuliaSyntax # Parser fuzz testing tools. -function parser_exception(str) +const all_tokens = [ + "#x\n" + "#==#" + " " + "\t" + "\n" + "x" + "@" + "," + ";" + + "baremodule" + "begin" + "break" + "const" + "continue" + "do" + "export" + "for" + "function" + "global" + "if" + "import" + "let" + "local" + "macro" + "module" + "quote" + "return" + "struct" + "try" + "using" + "while" + "catch" + "finally" + "else" + "elseif" + "end" + "abstract" + "as" + "doc" + "mutable" + "outer" + "primitive" + "type" + "var" + + "1" + "0b1" + "0x1" + "0o1" + "1.0" + "1.0f0" + "\"s\"" + "'c'" + "`s`" + "true" + "false" + + "[" + "]" + "{" + "}" + "(" + ")" + "\"" + "\"\"\"" + "`" + "```" + + "=" + "+=" + "-=" # Also used for "−=" + "−=" + "*=" + "/=" + "//=" + "|=" + "^=" + "÷=" + "%=" + "<<=" + ">>=" + ">>>=" + "\\=" + "&=" + ":=" + "~" + "\$=" + "⊻=" + "≔" + "⩴" + "≕" + + "=>" + + "?" + + "-->" + "<--" + "<-->" + "←" + "→" + "↔" + "↚" + "↛" + "↞" + "↠" + "↢" + "↣" + "↤" + "↦" + "↮" + "⇎" + "⇍" + "⇏" + "⇐" + "⇒" + "⇔" + "⇴" + "⇶" + "⇷" + "⇸" + "⇹" + "⇺" + "⇻" + "⇼" + "⇽" + "⇾" + "⇿" + "⟵" + "⟶" + "⟷" + "⟹" + "⟺" + "⟻" + "⟼" + "⟽" + "⟾" + "⟿" + "⤀" + "⤁" + "⤂" + "⤃" + "⤄" + "⤅" + "⤆" + "⤇" + "⤌" + "⤍" + "⤎" + "⤏" + "⤐" + "⤑" + "⤔" + "⤕" + "⤖" + "⤗" + "⤘" + "⤝" + "⤞" + "⤟" + "⤠" + "⥄" + "⥅" + "⥆" + "⥇" + "⥈" + "⥊" + "⥋" + "⥎" + "⥐" + "⥒" + "⥓" + "⥖" + "⥗" + "⥚" + "⥛" + "⥞" + "⥟" + "⥢" + "⥤" + "⥦" + "⥧" + "⥨" + "⥩" + "⥪" + "⥫" + "⥬" + "⥭" + "⥰" + "⧴" + "⬱" + "⬰" + "⬲" + "⬳" + "⬴" + "⬵" + "⬶" + "⬷" + "⬸" + "⬹" + "⬺" + "⬻" + "⬼" + "⬽" + "⬾" + "⬿" + "⭀" + "⭁" + "⭂" + "⭃" + "⭄" + "⭇" + "⭈" + "⭉" + "⭊" + "⭋" + "⭌" + "←" + "→" + "⇜" + "⇝" + "↜" + "↝" + "↩" + "↪" + "↫" + "↬" + "↼" + "↽" + "⇀" + "⇁" + "⇄" + "⇆" + "⇇" + "⇉" + "⇋" + "⇌" + "⇚" + "⇛" + "⇠" + "⇢" + "↷" + "↶" + "↺" + "↻" + + "||" + + "&&" + + "<:" + ">:" + ">" + "<" + ">=" + "≥" + "<=" + "≤" + "==" + "===" + "≡" + "!=" + "≠" + "!==" + "≢" + "∈" + "in" + "isa" + "∉" + "∋" + "∌" + "⊆" + "⊈" + "⊂" + "⊄" + "⊊" + "∝" + "∊" + "∍" + "∥" + "∦" + "∷" + "∺" + "∻" + "∽" + "∾" + "≁" + "≃" + "≂" + "≄" + "≅" + "≆" + "≇" + "≈" + "≉" + "≊" + "≋" + "≌" + "≍" + "≎" + "≐" + "≑" + "≒" + "≓" + "≖" + "≗" + "≘" + "≙" + "≚" + "≛" + "≜" + "≝" + "≞" + "≟" + "≣" + "≦" + "≧" + "≨" + "≩" + "≪" + "≫" + "≬" + "≭" + "≮" + "≯" + "≰" + "≱" + "≲" + "≳" + "≴" + "≵" + "≶" + "≷" + "≸" + "≹" + "≺" + "≻" + "≼" + "≽" + "≾" + "≿" + "⊀" + "⊁" + "⊃" + "⊅" + "⊇" + "⊉" + "⊋" + "⊏" + "⊐" + "⊑" + "⊒" + "⊜" + "⊩" + "⊬" + "⊮" + "⊰" + "⊱" + "⊲" + "⊳" + "⊴" + "⊵" + "⊶" + "⊷" + "⋍" + "⋐" + "⋑" + "⋕" + "⋖" + "⋗" + "⋘" + "⋙" + "⋚" + "⋛" + "⋜" + "⋝" + "⋞" + "⋟" + "⋠" + "⋡" + "⋢" + "⋣" + "⋤" + "⋥" + "⋦" + "⋧" + "⋨" + "⋩" + "⋪" + "⋫" + "⋬" + "⋭" + "⋲" + "⋳" + "⋴" + "⋵" + "⋶" + "⋷" + "⋸" + "⋹" + "⋺" + "⋻" + "⋼" + "⋽" + "⋾" + "⋿" + "⟈" + "⟉" + "⟒" + "⦷" + "⧀" + "⧁" + "⧡" + "⧣" + "⧤" + "⧥" + "⩦" + "⩧" + "⩪" + "⩫" + "⩬" + "⩭" + "⩮" + "⩯" + "⩰" + "⩱" + "⩲" + "⩳" + "⩵" + "⩶" + "⩷" + "⩸" + "⩹" + "⩺" + "⩻" + "⩼" + "⩽" + "⩾" + "⩿" + "⪀" + "⪁" + "⪂" + "⪃" + "⪄" + "⪅" + "⪆" + "⪇" + "⪈" + "⪉" + "⪊" + "⪋" + "⪌" + "⪍" + "⪎" + "⪏" + "⪐" + "⪑" + "⪒" + "⪓" + "⪔" + "⪕" + "⪖" + "⪗" + "⪘" + "⪙" + "⪚" + "⪛" + "⪜" + "⪝" + "⪞" + "⪟" + "⪠" + "⪡" + "⪢" + "⪣" + "⪤" + "⪥" + "⪦" + "⪧" + "⪨" + "⪩" + "⪪" + "⪫" + "⪬" + "⪭" + "⪮" + "⪯" + "⪰" + "⪱" + "⪲" + "⪳" + "⪴" + "⪵" + "⪶" + "⪷" + "⪸" + "⪹" + "⪺" + "⪻" + "⪼" + "⪽" + "⪾" + "⪿" + "⫀" + "⫁" + "⫂" + "⫃" + "⫄" + "⫅" + "⫆" + "⫇" + "⫈" + "⫉" + "⫊" + "⫋" + "⫌" + "⫍" + "⫎" + "⫏" + "⫐" + "⫑" + "⫒" + "⫓" + "⫔" + "⫕" + "⫖" + "⫗" + "⫘" + "⫙" + "⫷" + "⫸" + "⫹" + "⫺" + "⊢" + "⊣" + "⟂" + "⫪" + "⫫" + + "<|" + "|>" + + ":" + ".." + "…" + "⁝" + "⋮" + "⋱" + "⋰" + "⋯" + + "\$" + "+" + "-" # also used for "−" + "−" + "++" + "⊕" + "⊖" + "⊞" + "⊟" + "|" + "∪" + "∨" + "⊔" + "±" + "∓" + "∔" + "∸" + "≏" + "⊎" + "⊻" + "⊽" + "⋎" + "⋓" + "⧺" + "⧻" + "⨈" + "⨢" + "⨣" + "⨤" + "⨥" + "⨦" + "⨧" + "⨨" + "⨩" + "⨪" + "⨫" + "⨬" + "⨭" + "⨮" + "⨹" + "⨺" + "⩁" + "⩂" + "⩅" + "⩊" + "⩌" + "⩏" + "⩐" + "⩒" + "⩔" + "⩖" + "⩗" + "⩛" + "⩝" + "⩡" + "⩢" + "⩣" + "¦" + + "*" + "/" + "÷" + "%" + "⋅" # also used for lookalikes "·" and "·" + "·" + "·" + "∘" + "×" + "\\" + "&" + "∩" + "∧" + "⊗" + "⊘" + "⊙" + "⊚" + "⊛" + "⊠" + "⊡" + "⊓" + "∗" + "∙" + "∤" + "⅋" + "≀" + "⊼" + "⋄" + "⋆" + "⋇" + "⋉" + "⋊" + "⋋" + "⋌" + "⋏" + "⋒" + "⟑" + "⦸" + "⦼" + "⦾" + "⦿" + "⧶" + "⧷" + "⨇" + "⨰" + "⨱" + "⨲" + "⨳" + "⨴" + "⨵" + "⨶" + "⨷" + "⨸" + "⨻" + "⨼" + "⨽" + "⩀" + "⩃" + "⩄" + "⩋" + "⩍" + "⩎" + "⩑" + "⩓" + "⩕" + "⩘" + "⩚" + "⩜" + "⩞" + "⩟" + "⩠" + "⫛" + "⊍" + "▷" + "⨝" + "⟕" + "⟖" + "⟗" + "⌿" + "⨟" + + "//" + + "<<" + ">>" + ">>>" + + "^" + "↑" + "↓" + "⇵" + "⟰" + "⟱" + "⤈" + "⤉" + "⤊" + "⤋" + "⤒" + "⤓" + "⥉" + "⥌" + "⥍" + "⥏" + "⥑" + "⥔" + "⥕" + "⥘" + "⥙" + "⥜" + "⥝" + "⥠" + "⥡" + "⥣" + "⥥" + "⥮" + "⥯" + "↑" + "↓" + + "::" + + "where" + + "." + + "!" + "'" + ".'" + "->" + + "¬" + "√" + "∛" + "∜" +] + +const cutdown_tokens = [ + "#x\n" + "#==#" + " " + "\t" + "\n" + "x" + "@" + "," + ";" + + "baremodule" + "begin" + "break" + "const" + "continue" + "do" + "export" + "for" + "function" + "global" + "if" + "import" + "let" + "local" + "macro" + "module" + "quote" + "return" + "struct" + "try" + "using" + "while" + "catch" + "finally" + "else" + "elseif" + "end" + "abstract" + "as" + "doc" + "mutable" + "outer" + "primitive" + "type" + "var" + + "1" + "0b1" + "0x1" + "0o1" + "1.0" + "1.0f0" + "\"s\"" + "'c'" + "`s`" + "true" + "false" + + "[" + "]" + "{" + "}" + "(" + ")" + "\"" + "\"\"\"" + "`" + "```" + + "=" + "+=" + "~" + + "=>" + + "?" + + "-->" + + "||" + + "&&" + + "<:" + ">:" + ">" + "<" + ">=" + "<=" + "==" + "===" + "!=" + + "<|" + "|>" + + ":" + ".." + "…" + + "\$" + "+" + "−" + "-" + "|" + + "*" + "/" + "⋅" # also used for lookalikes "·" and "·" + "·" + "\\" + + "//" + + "<<" + + "^" + + "::" + + "where" + + "." + + "!" + "'" + "->" + + "√" +] + +#------------------------------------------------------------------------------- + +# Rough tokenization interface. +# TODO: We should have something like this in parser_api.jl + +struct Token2 + head::JuliaSyntax.SyntaxHead + range::UnitRange{UInt32} +end + +function tokenize(text::String) + ps = JuliaSyntax.ParseStream(text) + JuliaSyntax.parse!(ps, rule=:toplevel) + ts = ps.tokens + output_tokens = Token2[] + for i = 2:length(ts) + if JuliaSyntax.kind(ts[i]) == JuliaSyntax.K"TOMBSTONE" + continue + end + r = ts[i-1].next_byte:thisind(text, ts[i].next_byte-1) + push!(output_tokens, Token2(JuliaSyntax.head(ts[i]), r)) + end + output_tokens +end + +function split_tokens(text::String) + [@view text[t.range] for t in tokenize(text)] +end + + +#------------------------------------------------------------------------------- + +function parser_throws_exception(str) try JuliaSyntax.parseall(JuliaSyntax.SyntaxNode, str, ignore_errors=true) false @@ -14,8 +926,8 @@ end """ Reduce test case via combination of bisection and random deletion. -This is suited to randomly generated strings. It might work with more code-like -strings too? +This is suited to randomly generated strings, but it's surprisingly effective +for code-like strings as well. """ function rand_reduce(str) while true @@ -24,22 +936,20 @@ function rand_reduce(str) end m1 = thisind(str, length(str)÷2) m2 = nextind(str, m1) - if parser_exception(str[1:m1]) + if parser_throws_exception(str[1:m1]) str = str[1:m1] - elseif parser_exception(str[m2:end]) + elseif parser_throws_exception(str[m2:end]) str = str[m2:end] else - chunklen = 10 + chunklen = clamp(length(str)÷10, 1, 10) reduced = false - if length(str) > chunklen - for i = 1:100 - m = thisind(str, rand(1:length(str)-chunklen)) - s = str[1:m]*str[prevind(str, m+chunklen):end] - if parser_exception(s) - str = s - reduced = true - break - end + for i = 1:100 + m = thisind(str, rand(1:length(str)-chunklen)) + s = str[1:m]*str[nextind(str, m+chunklen):end] + if parser_throws_exception(s) + str = s + reduced = true + break end end if !reduced @@ -51,21 +961,95 @@ end # The parser should never throw an exception. To test whether this is true, # try passing randomly generated bad input data into it. -function fuzz_test(gen_bad_input, N) - for i=1:N - str = gen_bad_input() +function _fuzz_test(bad_input_iter) + error_strings = [] + for str in bad_input_iter try JuliaSyntax.parseall(JuliaSyntax.SyntaxNode, str, ignore_errors=true); - catch - @error "Parser threw exception" exception=current_exceptions() - return str + catch exc + !(exc isa InterruptException) || rethrow() + rstr = rand_reduce(str) + @error "Parser threw exception" rstr exception=current_exceptions() + push!(error_strings, rstr) end end - return nothing + return error_strings end -function fuzz_binary(N) - fuzz_test(N) do - String(rand(UInt8, 1_000_000)) +""" +Fuzz test parser against all tuples of length `N` with elements taken from +`tokens`. +""" +function fuzz_tokens(tokens, N) + iter = (join(ts) for ts in Iterators.product([tokens for _ in 1:N]...)) + _fuzz_test(iter) +end + +"""Delete `nlines` adjacent lines from code, at `niters` randomly chosen positions""" +function delete_lines(lines, nlines, niters) + selection = trues(length(lines)) + for j=1:niters + i = rand(1:length(lines)-nlines) + selection[i:i+nlines] .= false end + join(lines[selection], '\n') end + +"""Delete `ntokens` adjacent tokens from code, at `niters` randomly chosen positions""" +function delete_tokens(code, tokens, ntokens, niters) + # [ aa bbbb cc d eeeeee ] + # | | | | | | + selection = trues(length(tokens)) + for j=1:niters + i = rand(1:length(tokens)-ntokens) + selection[i:i+ntokens] .= false + end + io = IOBuffer() + i = 1 + while true + while i <= length(selection) && !selection[i] + i += 1 + end + if i > length(selection) + break + end + first_ind = first(tokens[i].range) + while selection[i] && i < length(selection) + i += 1 + end + last_ind = last(tokens[i].range) + write(io, @view code[first_ind:last_ind]) + if i == length(selection) + break + end + end + return String(take!(io)) +end + +#------------------------------------------------------------------------------- +# Fuzzer functions + +""" +Fuzz test parser against randomly generated binary strings +""" +function fuzz_binary(nbytes, N) + bad_strs = _fuzz_test(String(rand(UInt8, nbytes)) for _ in 1:N) + rand_reduce.(bad_strs) +end + +""" +Fuzz test by deleting random lines of some given source `code` +""" +function fuzz_lines(code, N; nlines=10, niters=10) + lines = split(code, '\n') + _fuzz_test(delete_lines(lines, nlines, niters) for _=1:N) +end + +""" +Fuzz test by deleting random tokens from given source `code` +""" +function fuzz_tokens(code, N; ntokens=10, niters=10) + ts = tokenize(code) + _fuzz_test(delete_tokens(code, ts, ntokens, niters) for _=1:N) +end + diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index a49e85c7e64d4..afb77080506b3 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -1,4 +1,7 @@ -function test_parse(production, code; v=v"1.6", expr=false) +""" +Parse string to SyntaxNode tree and show as an sexpression +""" +function parse_to_sexpr_str(production, code::AbstractString; v=v"1.6", expr=false) stream = ParseStream(code, version=v) production(ParseState(stream)) JuliaSyntax.validate_tokens(stream) @@ -16,6 +19,24 @@ function test_parse(production, code; v=v"1.6", expr=false) end end +function test_parse(production, input, output) + if !(input isa AbstractString) + opts, input = input + else + opts = NamedTuple() + end + if output isa Pair + @test parse_to_sexpr_str(production, input; opts...) == output[1] + @test parse_to_sexpr_str(production, input; opts..., expr=true) == output[2] + else + @test parse_to_sexpr_str(production, input; opts...) == output + end +end + +function test_parse(inout::Pair) + test_parse(JuliaSyntax.parse_toplevel, inout...) +end + # TODO: # * Extract the following test cases from the source itself. # * Use only the green tree to generate the S-expressions @@ -897,6 +918,30 @@ tests = [ ], ] +@testset "Inline test cases" begin + @testset "$production" for (production, test_specs) in tests + @testset "$(repr(input))" for (input, output) in test_specs + test_parse(production, input, output) + end + end +end + +parseall_test_specs = [ + # whitespace before keywords in space-insensitive mode + "(y::\nif x z end)" => "(toplevel (::-i y (if x (block z))))" + + # The following may not be ideal error recovery! But at least the parser + # shouldn't crash + "@(x y)" => "(toplevel (macrocall x (error-t @y)))" + "|(&\nfunction" => "(toplevel (call | (& (function (error (error)) (block (error)) (error-t))) (error-t)))" +] + +@testset "Parser does not crash on broken code" begin + @testset "$(repr(input))" for (input, output) in parseall_test_specs + test_parse(JuliaSyntax.parse_toplevel, input, output) + end +end + # Known bugs / incompatibilities broken_tests = [ JuliaSyntax.parse_atom => [ @@ -919,35 +964,19 @@ broken_tests = [ ] ] -@testset "Inline test cases" begin - @testset "$production" for (production, test_specs) in tests - @testset "$(repr(input))" for (input, output) in test_specs - if !(input isa AbstractString) - opts, input = input - else - opts = NamedTuple() - end - if output isa Pair - @test test_parse(production, input; opts...) == output[1] - @test test_parse(production, input; opts..., expr=true) == output[2] - else - @test test_parse(production, input; opts...) == output - end - end - end - @testset "Broken $production" for (production, test_specs) in broken_tests - @testset "$(repr(input))" for (input,output) in test_specs - if !(input isa AbstractString) - opts,input = input - else - opts = NamedTuple() - end - @test_broken test_parse(production, input; opts...) == output +@testset "Broken $production" for (production, test_specs) in broken_tests + @testset "$(repr(input))" for (input,output) in test_specs + if !(input isa AbstractString) + opts,input = input + else + opts = NamedTuple() end + @test_broken parse_to_sexpr_str(production, input; opts...) == output end end @testset "Trivia attachment" begin + # TODO: Need to expand this greatly to cover as many forms as possible! @test show_green_tree("f(a;b)") == """ 1:6 │[toplevel] 1:6 │ [call] @@ -963,15 +992,15 @@ end @testset "Unicode normalization in tree conversion" begin # ɛµ normalizes to εμ - @test test_parse(JuliaSyntax.parse_eq, "\u025B\u00B5()") == "(call \u03B5\u03BC)" - @test test_parse(JuliaSyntax.parse_eq, "@\u025B\u00B5") == "(macrocall @\u03B5\u03BC)" - @test test_parse(JuliaSyntax.parse_eq, "\u025B\u00B5\"\"") == "(macrocall @\u03B5\u03BC_str (string-r \"\"))" - @test test_parse(JuliaSyntax.parse_eq, "\u025B\u00B5``") == "(macrocall @\u03B5\u03BC_cmd (cmdstring-r \"\"))" + @test parse_to_sexpr_str(JuliaSyntax.parse_eq, "\u025B\u00B5()") == "(call \u03B5\u03BC)" + @test parse_to_sexpr_str(JuliaSyntax.parse_eq, "@\u025B\u00B5") == "(macrocall @\u03B5\u03BC)" + @test parse_to_sexpr_str(JuliaSyntax.parse_eq, "\u025B\u00B5\"\"") == "(macrocall @\u03B5\u03BC_str (string-r \"\"))" + @test parse_to_sexpr_str(JuliaSyntax.parse_eq, "\u025B\u00B5``") == "(macrocall @\u03B5\u03BC_cmd (cmdstring-r \"\"))" # · and · normalize to ⋅ - @test test_parse(JuliaSyntax.parse_eq, "a \u00B7 b") == "(call-i a \u22C5 b)" - @test test_parse(JuliaSyntax.parse_eq, "a \u0387 b") == "(call-i a \u22C5 b)" + @test parse_to_sexpr_str(JuliaSyntax.parse_eq, "a \u00B7 b") == "(call-i a \u22C5 b)" + @test parse_to_sexpr_str(JuliaSyntax.parse_eq, "a \u0387 b") == "(call-i a \u22C5 b)" # − normalizes to - - @test test_parse(JuliaSyntax.parse_expr, "a \u2212 b") == "(call-i a - b)" - @test test_parse(JuliaSyntax.parse_eq, "a \u2212= b") == "(-= a b)" - @test test_parse(JuliaSyntax.parse_eq, "a .\u2212= b") == "(.-= a b)" + @test parse_to_sexpr_str(JuliaSyntax.parse_expr, "a \u2212 b") == "(call-i a - b)" + @test parse_to_sexpr_str(JuliaSyntax.parse_eq, "a \u2212= b") == "(-= a b)" + @test parse_to_sexpr_str(JuliaSyntax.parse_eq, "a .\u2212= b") == "(.-= a b)" end From 056e8cf3de06b747adef5465ac3d7b7b4afdfb5d Mon Sep 17 00:00:00 2001 From: c42f Date: Fri, 10 Feb 2023 11:07:33 +1000 Subject: [PATCH 0574/1109] Allow SyntaxNode `node[end]` to get the last child (JuliaLang/JuliaSyntax.jl#189) Also add start of more targeted tests for SyntaxNode. --- JuliaSyntax/src/syntax_tree.jl | 4 ++++ JuliaSyntax/test/runtests.jl | 1 + JuliaSyntax/test/syntax_tree.jl | 18 ++++++++++++++++++ 3 files changed, 23 insertions(+) create mode 100644 JuliaSyntax/test/syntax_tree.jl diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index 733dd7506471f..cf6484bbbfeb7 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -254,6 +254,10 @@ end function Base.getindex(node::Union{SyntaxNode,GreenNode}, path::Int...) child(node, path...) end +function Base.lastindex(node::Union{SyntaxNode,GreenNode}) + length(children(node)) +end + function Base.setindex!(node::SyntaxNode, x::SyntaxNode, path::Int...) setchild!(node, path, x) end diff --git a/JuliaSyntax/test/runtests.jl b/JuliaSyntax/test/runtests.jl index 020f33f293b9b..f1a72da287beb 100644 --- a/JuliaSyntax/test/runtests.jl +++ b/JuliaSyntax/test/runtests.jl @@ -48,6 +48,7 @@ end include("parse_stream.jl") include("parser.jl") +include("syntax_tree.jl") include("diagnostics.jl") include("parser_api.jl") include("expr.jl") diff --git a/JuliaSyntax/test/syntax_tree.jl b/JuliaSyntax/test/syntax_tree.jl new file mode 100644 index 0000000000000..3dc69f91e9a0d --- /dev/null +++ b/JuliaSyntax/test/syntax_tree.jl @@ -0,0 +1,18 @@ +@testset "SyntaxNode" begin + # Child access + t = parse(SyntaxNode, "a*b + c") + + @test sourcetext(child(t, 1)) == "a*b" + @test sourcetext(child(t, 1, 1)) == "a" + @test sourcetext(child(t, 1, 2)) == "*" + @test sourcetext(child(t, 1, 3)) == "b" + @test sourcetext(child(t, 2)) == "+" + @test sourcetext(child(t, 3)) == "c" + + # Child indexing + @test t[1] === child(t, 1) + @test t[1, 1] === child(t, 1, 1) + @test t[end] === child(t, 3) + # Unfortunately, can't make t[1, end] work + # as `lastindex(t, 2)` isn't well defined +end From 5c48e621a0927a1569b08bd7ea24a16619c6f8ad Mon Sep 17 00:00:00 2001 From: c42f Date: Tue, 14 Feb 2023 14:24:42 +1000 Subject: [PATCH 0575/1109] Fix various numeric literal token errors (JuliaLang/JuliaSyntax.jl#196) A fairly big refactor of numeric literal tokenization error cases and a couple of other tokenizer errors ported from the flisp code. * For hexfloat, emit a more specific errors when the `p` suffix is missing. * For octal, hex and binary, add errors for trailing invalid digits or identifier characters like `0b123` and `0xenomorph` * Emit an error for ambiguous numeric constants with dot suffix vs juxtuposition like `1.(` * Emit an error for underscore directly after dot as in `1._` * Emit an error for hexfloat without digits `0x.p0` * Add an invalid operator error for `<---` to follow compatibility with the reference parser. --- JuliaSyntax/src/kinds.jl | 6 + JuliaSyntax/src/parser.jl | 3 - JuliaSyntax/src/tokenize.jl | 77 ++++----- JuliaSyntax/src/utils.jl | 4 + JuliaSyntax/test/parser.jl | 12 -- JuliaSyntax/test/tokenize.jl | 310 ++++++++++++++++++----------------- 6 files changed, 210 insertions(+), 202 deletions(-) diff --git a/JuliaSyntax/src/kinds.jl b/JuliaSyntax/src/kinds.jl index c4171d6b1f5ca..3b5fa1bd10c63 100644 --- a/JuliaSyntax/src/kinds.jl +++ b/JuliaSyntax/src/kinds.jl @@ -16,7 +16,9 @@ const _kind_names = # Tokenization errors "ErrorEofMultiComment" "ErrorInvalidNumericConstant" + "ErrorHexFloatMustContainP" "ErrorAmbiguousNumericConstant" + "ErrorAmbiguousNumericDotMultiply" "ErrorInvalidInterpolationTerminator" "ErrorNumericOverflow" "ErrorInvalidEscapeSequence" @@ -1016,7 +1018,9 @@ const _nonunique_kind_names = Set([ K"ErrorEofMultiComment" K"ErrorInvalidNumericConstant" + K"ErrorHexFloatMustContainP" K"ErrorAmbiguousNumericConstant" + K"ErrorAmbiguousNumericDotMultiply" K"ErrorInvalidInterpolationTerminator" K"ErrorNumericOverflow" K"ErrorInvalidEscapeSequence" @@ -1061,7 +1065,9 @@ end _token_error_descriptions = Dict{Kind, String}( K"ErrorEofMultiComment" => "unterminated multi-line comment #= ... =#", K"ErrorInvalidNumericConstant" => "invalid numeric constant", + K"ErrorHexFloatMustContainP" => "hex float literal must contain `p` or `P`", K"ErrorAmbiguousNumericConstant" => "ambiguous `.` syntax; add whitespace to clarify (eg `1.+2` might be `1.0+2` or `1 .+ 2`)", + K"ErrorAmbiguousNumericDotMultiply" => "numeric constant cannot be implicitly multiplied because it ends with `.`", K"ErrorInvalidInterpolationTerminator" => "interpolated variable ends with invalid character; use `\$(...)` instead", K"ErrorNumericOverflow"=>"overflow in numeric literal", K"ErrorInvalidEscapeSequence"=>"invalid string escape sequence", diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 26e6c51773364..8ce172513a870 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -1103,9 +1103,6 @@ function is_juxtapose(ps, prev_k, t) !(is_block_form(prev_k) || is_syntactic_unary_op(prev_k) || is_initial_reserved_word(ps, prev_k) ))) && - # https://github.com/JuliaLang/julia/issues/16356 - # 0xenomorph ==> 0x0e - !(prev_k in KSet"BinInt HexInt OctInt" && (k == K"Identifier" || is_keyword(k))) && (!is_operator(k) || is_radical_op(k)) && !is_closing_token(ps, k) && !is_initial_reserved_word(ps, k) diff --git a/JuliaSyntax/src/tokenize.jl b/JuliaSyntax/src/tokenize.jl index 70163010229ca..82cab1232446b 100644 --- a/JuliaSyntax/src/tokenize.jl +++ b/JuliaSyntax/src/tokenize.jl @@ -614,6 +614,8 @@ function lex_less(l::Lexer) else if accept(l, '>') return emit(l, K"<-->") + elseif accept(l, '-') + return emit_error(l, K"ErrorInvalidOperator") else return emit(l, K"<--") end @@ -772,33 +774,13 @@ function lex_digit(l::Lexer, kind) return emit_error(l, K"ErrorInvalidNumericConstant") elseif is_operator_start_char(ppc) && ppc !== ':' readchar(l) - return emit_error(l, K"ErrorAmbiguousNumericConstant") - elseif (!(isdigit(ppc) || - iswhitespace(ppc) || - is_identifier_start_char(ppc) - || ppc == '(' - || ppc == ')' - || ppc == '[' - || ppc == ']' - || ppc == '{' - || ppc == '}' - || ppc == ',' - || ppc == ';' - || ppc == '@' - || ppc == '`' - || ppc == '"' - || ppc == ':' - || ppc == '?' - || ppc == '#' - || ppc == EOF_CHAR)) - kind = K"Integer" - - return emit(l, kind) + return emit_error(l, K"ErrorAmbiguousNumericConstant") # `1.+` end readchar(l) kind = K"Float" - accept_number(l, isdigit) + accept(l, '_') && return emit_error(l, K"ErrorInvalidNumericConstant") # `1._` + had_fraction_digs = accept_number(l, isdigit) pc, ppc = dpeekchar(l) if (pc == 'e' || pc == 'E' || pc == 'f') && (isdigit(ppc) || ppc == '+' || ppc == '-' || ppc == '−') kind = pc == 'f' ? K"Float32" : K"Float" @@ -807,17 +789,20 @@ function lex_digit(l::Lexer, kind) if accept_batch(l, isdigit) pc,ppc = dpeekchar(l) if pc === '.' && !dotop2(ppc) - accept(l, '.') - return emit_error(l, K"ErrorInvalidNumericConstant") + readchar(l) + return emit_error(l, K"ErrorInvalidNumericConstant") # `1.e1.` end else - return emit_error(l, K"ErrorInvalidNumericConstant") + return emit_error(l, K"ErrorInvalidNumericConstant") # `1.e` end - elseif pc == '.' && (is_identifier_start_char(ppc) || ppc == EOF_CHAR) + elseif pc == '.' && ppc != '.' && !is_operator_start_char(ppc) readchar(l) - return emit_error(l, K"ErrorInvalidNumericConstant") + return emit_error(l, K"ErrorInvalidNumericConstant") # `1.1.` + elseif !had_fraction_digs && (is_identifier_start_char(pc) || + pc == '(' || pc == '[' || pc == '{' || + pc == '@' || pc == '`' || pc == '"') + return emit_error(l, K"ErrorAmbiguousNumericDotMultiply") # `1.(` `1.x` end - elseif (pc == 'e' || pc == 'E' || pc == 'f') && (isdigit(ppc) || ppc == '+' || ppc == '-' || ppc == '−') kind = pc == 'f' ? K"Float32" : K"Float" readchar(l) @@ -826,44 +811,54 @@ function lex_digit(l::Lexer, kind) pc,ppc = dpeekchar(l) if pc === '.' && !dotop2(ppc) accept(l, '.') - return emit_error(l, K"ErrorInvalidNumericConstant") + return emit_error(l, K"ErrorInvalidNumericConstant") # `1e1.` end else - return emit_error(l, K"ErrorInvalidNumericConstant") + return emit_error(l, K"ErrorInvalidNumericConstant") # `1e+` end elseif position(l) - startpos(l) == 1 && l.chars[1] == '0' kind == K"Integer" + is_bin_oct_hex_int = false if pc == 'x' kind = K"HexInt" isfloat = false readchar(l) - !(ishex(ppc) || ppc == '.') && return emit_error(l, K"ErrorInvalidNumericConstant") - accept_number(l, ishex) + had_digits = accept_number(l, ishex) pc,ppc = dpeekchar(l) if pc == '.' && ppc != '.' readchar(l) - accept_number(l, ishex) + had_digits |= accept_number(l, ishex) isfloat = true end if accept(l, "pP") kind = K"Float" accept(l, "+-−") - if !accept_number(l, isdigit) - return emit_error(l, K"ErrorInvalidNumericConstant") + if !accept_number(l, isdigit) || !had_digits + return emit_error(l, K"ErrorInvalidNumericConstant") # `0x1p` `0x.p0` end elseif isfloat - return emit_error(l, K"ErrorInvalidNumericConstant") + return emit_error(l, K"ErrorHexFloatMustContainP") # `0x.` `0x1.0` end + is_bin_oct_hex_int = !isfloat elseif pc == 'b' - !isbinary(ppc) && return emit_error(l, K"ErrorInvalidNumericConstant") readchar(l) - accept_number(l, isbinary) + had_digits = accept_number(l, isbinary) kind = K"BinInt" + is_bin_oct_hex_int = true elseif pc == 'o' - !isoctal(ppc) && return emit_error(l, K"ErrorInvalidNumericConstant") readchar(l) - accept_number(l, isoctal) + had_digits = accept_number(l, isoctal) kind = K"OctInt" + is_bin_oct_hex_int = true + end + if is_bin_oct_hex_int + pc = peekchar(l) + if !had_digits || isdigit(pc) || is_identifier_start_char(pc) + accept_batch(l, c->isdigit(c) || is_identifier_start_char(c)) + # `0x` `0xg` `0x_` `0x-` + # `0b123` `0o78p` `0xenomorph` `0xaα` + return emit_error(l, K"ErrorInvalidNumericConstant") + end end end return emit(l, kind) diff --git a/JuliaSyntax/src/utils.jl b/JuliaSyntax/src/utils.jl index 04f7d8ad40cdc..bc30e22283c8c 100644 --- a/JuliaSyntax/src/utils.jl +++ b/JuliaSyntax/src/utils.jl @@ -121,3 +121,7 @@ function _fl_parse_string(text::AbstractString, filename::AbstractString, ex, offset+1 end +# Convenience functions to mirror `JuliaSyntax.parse(Expr, ...)` in simple cases. +fl_parse(::Type{Expr}, args...; kws...) = fl_parse(args...; kws...) +fl_parseall(::Type{Expr}, args...; kws...) = fl_parseall(args...; kws...) + diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index afb77080506b3..a01b9cc2df2c7 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -193,7 +193,6 @@ tests = [ "sqrt(2)2" => "(call sqrt 2)" "x' y" => "(call-post x ')" "x 'y" => "x" - "0xenomorph" => "0x0e" ], JuliaSyntax.parse_unary => [ ":T" => "(quote T)" @@ -950,17 +949,6 @@ broken_tests = [ "@!x" => "(macrocall @! x)" "@..x" => "(macrocall @.. x)" "@.x" => "(macrocall @__dot__ x)" - # Invalid numeric literals, not juxtaposition - "0b12" => "(error \"0b12\")" - "0xex" => "(error \"0xex\")" - # Bad character literals - "'\\xff'" => "(error '\\xff')" - "'\\x80'" => "(error '\\x80')" - "'ab'" => "(error 'ab')" - ] - JuliaSyntax.parse_juxtapose => [ - # Want: "numeric constant \"10.\" cannot be implicitly multiplied because it ends with \".\"" - "10.x" => "(error (call * 10.0 x))" ] ] diff --git a/JuliaSyntax/test/tokenize.jl b/JuliaSyntax/test/tokenize.jl index 528ef7c1bbe09..d1587e6d0236d 100644 --- a/JuliaSyntax/test/tokenize.jl +++ b/JuliaSyntax/test/tokenize.jl @@ -28,6 +28,13 @@ function toks(str) ts end +function onlytok(str) + ts = collect(tokenize(str)) + (length(ts) == 2 && ts[2].kind == K"EndMarker") || + error("Expected one token got $(length(ts)-1)") + return ts[1].kind +end + @testset "tokens" begin for s in ["a", IOBuffer("a")] l = tokenize(s) @@ -191,25 +198,8 @@ end end -function test_roundtrip(str, kind, val) - t = tok(str) - @test t.kind == kind - @test untokenize(t, str) == val -end - roundtrip(str) = join(untokenize.(collect(tokenize(str)), str)) -@testset "tokenizing juxtaposed numbers and dotted operators/identifiers" begin - test_roundtrip("1234 .+1", K"Integer", "1234") - test_roundtrip("1234.0+1", K"Float", "1234.0") - test_roundtrip("1234.0 .+1", K"Float", "1234.0") - test_roundtrip("1234.f(a)", K"Float", "1234.") - test_roundtrip("1234 .f(a)", K"Integer", "1234") - test_roundtrip("1234.0.f(a)", K"ErrorInvalidNumericConstant", "1234.0.") - test_roundtrip("1234.0 .f(a)", K"Float", "1234.0") -end - - @testset "lexing anon functions '->' " begin @test tok("a->b", 2).kind==K"->" end @@ -546,114 +536,172 @@ end end @testset "modifying function names (!) followed by operator" begin - @test tok("a!=b", 2).kind == K"!=" - @test tok("a!!=b", 2).kind == K"!=" - @test tok("!=b", 1).kind == K"!=" -end + @test toks("a!=b") == ["a"=>K"Identifier", "!="=>K"!=", "b"=>K"Identifier"] + @test toks("a!!=b") == ["a!"=>K"Identifier", "!="=>K"!=", "b"=>K"Identifier"] + @test toks("!=b") == ["!="=>K"!=", "b"=>K"Identifier"] +end + +@testset "integer literals" begin + @test onlytok("1234") == K"Integer" + @test onlytok("12_34") == K"Integer" + + @test toks("1234_") == ["1234"=>K"Integer", "_"=>K"Identifier"] + @test toks("1234x") == ["1234"=>K"Integer", "x"=>K"Identifier"] + + @test onlytok("_1234") == K"Identifier" + + @test toks("1__2") == ["1"=>K"Integer", "__2"=>K"Identifier"] +end + +@testset "hex integer literals" begin + @test onlytok("0x0167_032") == K"HexInt" + @test onlytok("0x2_0_2") == K"HexInt" + # trailing junk + # https://github.com/JuliaLang/julia/issues/16356 + @test onlytok("0xenomorph") == K"ErrorInvalidNumericConstant" + @test onlytok("0xaα") == K"ErrorInvalidNumericConstant" + @test toks("0x ") == ["0x"=>K"ErrorInvalidNumericConstant", " "=>K"Whitespace"] + @test onlytok("0x") == K"ErrorInvalidNumericConstant" + @test onlytok("0xg") == K"ErrorInvalidNumericConstant" + @test onlytok("0x_") == K"ErrorInvalidNumericConstant" + @test toks("0x-") == ["0x"=>K"ErrorInvalidNumericConstant", "-"=>K"-"] +end + +@testset "hexfloat literals" begin + @test onlytok("0x.1p1") == K"Float" + @test onlytok("0x00p2") == K"Float" + @test onlytok("0x00P2") == K"Float" + @test onlytok("0x0.00p23") == K"Float" + @test onlytok("0x0.0ap23") == K"Float" + @test onlytok("0x0.0_0p2") == K"Float" + @test onlytok("0x0_0_0.0_0p2") == K"Float" + @test onlytok("0x0p+2") == K"Float" + @test onlytok("0x0p-2") == K"Float" + # errors + @test onlytok("0x") == K"ErrorInvalidNumericConstant" + @test onlytok("0x2__2") == K"ErrorInvalidNumericConstant" + @test onlytok("0x1p") == K"ErrorInvalidNumericConstant" + @test onlytok("0x.p0") == K"ErrorInvalidNumericConstant" + @test onlytok("0x.") == K"ErrorHexFloatMustContainP" + @test onlytok("0x1.0") == K"ErrorHexFloatMustContainP" +end + +@testset "binary literals" begin + @test onlytok("0b0101001_0100_0101") == K"BinInt" + + @test onlytok("0b") == K"ErrorInvalidNumericConstant" + @test toks("0b ") == ["0b"=>K"ErrorInvalidNumericConstant", " "=>K"Whitespace"] + @test onlytok("0b101__101") == K"ErrorInvalidNumericConstant" + @test onlytok("0b123") == K"ErrorInvalidNumericConstant" +end + +@testset "octal literals" begin + @test onlytok("0o0167") == K"OctInt" + @test onlytok("0o01054001_0100_0101") == K"OctInt" + + @test onlytok("0o") == K"ErrorInvalidNumericConstant" + @test onlytok("0o78p") == K"ErrorInvalidNumericConstant" + @test toks("0o ") == ["0o"=>K"ErrorInvalidNumericConstant", " "=>K"Whitespace"] +end + +@testset "float literals" begin + @test onlytok("1.0") == K"Float" + + @test onlytok("1.0e0") == K"Float" + @test onlytok("1.0e-0") == K"Float" + @test onlytok("1.0E0") == K"Float" + @test onlytok("1.0E-0") == K"Float" + @test onlytok("1.0f0") == K"Float32" + @test onlytok("1.0f-0") == K"Float32" + @test onlytok("1.e0") == K"Float" + @test onlytok("1.f0") == K"Float32" + + @test onlytok("0e0") == K"Float" + @test onlytok("0e+0") == K"Float" + @test onlytok("0E0") == K"Float" + @test onlytok("201E+0") == K"Float" + @test onlytok("2f+0") == K"Float32" + @test onlytok("2048f0") == K"Float32" + + # underscores + @test onlytok("1_1.11") == K"Float" + @test onlytok("11.1_1") == K"Float" + @test onlytok("1_1.1_1") == K"Float" + @test onlytok("1.2_3") == K"Float" + @test onlytok("3_2.5_2") == K"Float" + @test toks("_1.1_1") == ["_1"=>K"Identifier", ".1_1"=>K"Float"] + + # juxtapositions with identifiers + @test toks("3e2_2") == ["3e2"=>K"Float", "_2"=>K"Identifier"] + @test toks("1e") == ["1"=>K"Integer", "e"=>K"Identifier"] + + @test toks("1.:0") == ["1."=>K"Float", ":"=>K":", "0"=>K"Integer"] -@testset "lex integers" begin - @test kind(tok("1234")) == K"Integer" - @test kind(tok("12_34")) == K"Integer" - @test kind(tok("_1234")) == K"Identifier" - @test kind(tok("1234_")) == K"Integer" - @test kind(tok("1234_", 2)) == K"Identifier" - @test kind(tok("1234x")) == K"Integer" - @test kind(tok("1234x", 2)) == K"Identifier" + # Floating point with \minus rather than - + @test onlytok("1.0e−0") == K"Float" + @test onlytok("1.0f−0") == K"Float32" + @test onlytok("0x0p−2") == K"Float" + + # Errors + @test onlytok("1._") == K"ErrorInvalidNumericConstant" + @test onlytok("1.1.") == K"ErrorInvalidNumericConstant" + @test onlytok("1e+") == K"ErrorInvalidNumericConstant" + @test onlytok("1.0e+") == K"ErrorInvalidNumericConstant" + @test onlytok("1.e1.") == K"ErrorInvalidNumericConstant" + @test onlytok("1e1.") == K"ErrorInvalidNumericConstant" + @test toks("1.e") == ["1."=>K"ErrorAmbiguousNumericDotMultiply", "e"=>K"Identifier"] + @test toks("3.2e2.2") == ["3.2e2."=>K"ErrorInvalidNumericConstant", "2"=>K"Integer"] + @test toks("3e2.2") == ["3e2."=>K"ErrorInvalidNumericConstant", "2"=>K"Integer"] + @test toks("1.2.f") == ["1.2."=>K"ErrorInvalidNumericConstant", "f"=>K"Identifier"] end @testset "numbers with trailing `.` " begin - @test tok("1.0").kind == K"Float" - @test tok("1.a").kind == K"Float" - @test tok("1.(").kind == K"Float" - @test tok("1.[").kind == K"Float" - @test tok("1.{").kind == K"Float" - @test tok("1.)").kind == K"Float" - @test tok("1.]").kind == K"Float" - @test tok("1.{").kind == K"Float" - @test tok("1.,").kind == K"Float" - @test tok("1.;").kind == K"Float" - @test tok("1.@").kind == K"Float" - @test tok("1.#").kind == K"Float" - @test tok("1.").kind == K"Float" - @test tok("1.\"text\" ").kind == K"Float" + @test toks("1.") == ["1."=>K"Float"] + @test toks("1.)") == ["1."=>K"Float", ")"=>K")"] + @test toks("1.]") == ["1."=>K"Float", "]"=>K"]"] + @test toks("1.}") == ["1."=>K"Float", "}"=>K"}"] + @test toks("1.,") == ["1."=>K"Float", ","=>K","] + @test toks("1.;") == ["1."=>K"Float", ";"=>K";"] + @test toks("1.#") == ["1."=>K"Float", "#"=>K"Comment"] + + # ellipses @test toks("1..") == ["1"=>K"Integer", ".."=>K".."] + @test toks("1...") == ["1"=>K"Integer", "..."=>K"..."] @test toks(".1..") == [".1"=>K"Float", ".."=>K".."] @test toks("0x01..") == ["0x01"=>K"HexInt", ".."=>K".."] - @test kind.(collect(tokenize("1f0./1"))) == [K"Float32", K"/", K"Integer", K"EndMarker"] -end - - - -@testset "lex octal" begin - @test tok("0o0167").kind == K"OctInt" -end - -@testset "lex float/bin/hex/oct w underscores" begin - @test tok("1_1.11").kind == K"Float" - @test tok("11.1_1").kind == K"Float" - @test tok("1_1.1_1").kind == K"Float" - @test tok("_1.1_1", 1).kind == K"Identifier" - @test tok("_1.1_1", 2).kind == K"Float" - @test tok("0x0167_032").kind == K"HexInt" - @test tok("0b0101001_0100_0101").kind == K"BinInt" - @test tok("0o01054001_0100_0101").kind == K"OctInt" - @test kind.(collect(tokenize("1.2."))) == [K"ErrorInvalidNumericConstant", K"EndMarker"] - @test tok("1__2").kind == K"Integer" - @test tok("1.2_3").kind == K"Float" - @test tok("1.2_3", 2).kind == K"EndMarker" - @test kind.(collect(tokenize("3e2_2"))) == [K"Float", K"Identifier", K"EndMarker"] - @test kind.(collect(tokenize("1__2"))) == [K"Integer", K"Identifier", K"EndMarker"] - @test kind.(collect(tokenize("0x2_0_2"))) == [K"HexInt", K"EndMarker"] - @test kind.(collect(tokenize("0x2__2"))) == [K"HexInt", K"Identifier", K"EndMarker"] - @test kind.(collect(tokenize("3_2.5_2"))) == [K"Float", K"EndMarker"] - @test kind.(collect(tokenize("3.2e2.2"))) == [K"ErrorInvalidNumericConstant", K"Integer", K"EndMarker"] - @test kind.(collect(tokenize("3e2.2"))) == [K"ErrorInvalidNumericConstant", K"Integer", K"EndMarker"] - @test kind.(collect(tokenize("0b101__101"))) == [K"BinInt", K"Identifier", K"EndMarker"] - @test tok("0x1p").kind == K"ErrorInvalidNumericConstant" -end - -@testset "floating points" begin - @test tok("1.0e0").kind == K"Float" - @test tok("1.0e-0").kind == K"Float" - @test tok("1.0E0").kind == K"Float" - @test tok("1.0E-0").kind == K"Float" - @test tok("1.0f0").kind == K"Float32" - @test tok("1.0f-0").kind == K"Float32" - - @test tok("0e0").kind == K"Float" - @test tok("0e+0").kind == K"Float" - @test tok("0E0").kind == K"Float" - @test tok("201E+0").kind == K"Float" - @test tok("2f+0").kind == K"Float32" - @test tok("2048f0").kind == K"Float32" - @test tok("1.:0").kind == K"Float" - @test tok("0x00p2").kind == K"Float" - @test tok("0x00P2").kind == K"Float" - @test tok("0x0.00p23").kind == K"Float" - @test tok("0x0.0ap23").kind == K"Float" - @test tok("0x0.0_0p2").kind == K"Float" - @test tok("0x0_0_0.0_0p2").kind == K"Float" - @test tok("0x0p+2").kind == K"Float" - @test tok("0x0p-2").kind == K"Float" + # Dotted operators and other dotted sufficies + @test toks("1234 .+1") == ["1234"=>K"Integer", " "=>K"Whitespace", ".+"=>K"+", "1"=>K"Integer"] + @test toks("1234.0+1") == ["1234.0"=>K"Float", "+"=>K"+", "1"=>K"Integer"] + @test toks("1234.0 .+1") == ["1234.0"=>K"Float", " "=>K"Whitespace", ".+"=>K"+", "1"=>K"Integer"] + @test toks("1234 .f(a)") == ["1234"=>K"Integer", " "=>K"Whitespace", "."=>K".", + "f"=>K"Identifier", "("=>K"(", "a"=>K"Identifier", ")"=>K")"] + @test toks("1234.0 .f(a)") == ["1234.0"=>K"Float", " "=>K"Whitespace", "."=>K".", + "f"=>K"Identifier", "("=>K"(", "a"=>K"Identifier", ")"=>K")"] + @test toks("1f0./1") == ["1f0"=>K"Float32", "./"=>K"/", "1"=>K"Integer"] + + # Ambiguous dotted operators + @test toks("1.+") == ["1."=>K"ErrorAmbiguousNumericConstant", "+"=>K"+"] + @test toks("1.+ ") == ["1."=>K"ErrorAmbiguousNumericConstant", "+"=>K"+", " "=>K"Whitespace"] + @test toks("1.⤋") == ["1."=>K"ErrorAmbiguousNumericConstant", "⤋"=>K"⤋"] + @test toks("1.?") == ["1."=>K"ErrorAmbiguousNumericConstant", "?"=>K"?"] - # Floating point with \minus rather than - - @test tok("1.0e−0").kind == K"Float" - @test tok("1.0f−0").kind == K"Float32" - @test tok("0x0p−2").kind == K"Float" + # Ambiguous - literal vs multiply by juxtaposition + @test toks("1.x") == ["1."=>K"ErrorAmbiguousNumericDotMultiply", "x"=>K"Identifier"] + @test toks("1.(") == ["1."=>K"ErrorAmbiguousNumericDotMultiply", "("=>K"("] + @test toks("1.[") == ["1."=>K"ErrorAmbiguousNumericDotMultiply", "["=>K"["] + @test toks("1.{") == ["1."=>K"ErrorAmbiguousNumericDotMultiply", "{"=>K"{"] + @test toks("1.@") == ["1."=>K"ErrorAmbiguousNumericDotMultiply", "@"=>K"@"] + @test toks("1.\"") == ["1."=>K"ErrorAmbiguousNumericDotMultiply", "\""=>K"\""] end -@testset "1e1" begin - @test tok("1e", 1).kind == K"Integer" - @test tok("1e", 2).kind == K"Identifier" -end +@testset "julia 0.6 types" begin + @test onlytok("mutable") == K"mutable" + @test onlytok("primitive") == K"primitive" + @test onlytok("struct") == K"struct" + @test onlytok("where") == K"where" -@testset "jl06types" begin - @test tok("mutable").kind == K"mutable" - @test tok("primitive").kind == K"primitive" - @test tok("struct").kind == K"struct" - @test tok("where").kind == K"where" @test tok("mutable struct s{T} where T", 1).kind == K"mutable" @test tok("mutable struct s{T} where T", 3).kind == K"struct" @test tok("mutable struct s{T} where T", 10).kind == K"where" @@ -687,15 +735,6 @@ end @test length(collect(tokenize(io))) == 4 end -@testset "hex/bin/octal errors" begin - @test tok("0x").kind == K"ErrorInvalidNumericConstant" - @test tok("0b").kind == K"ErrorInvalidNumericConstant" - @test tok("0o").kind == K"ErrorInvalidNumericConstant" - @test tok("0x 2", 1).kind == K"ErrorInvalidNumericConstant" - @test tok("0x.1p1").kind == K"Float" -end - - @testset "dotted and suffixed operators" begin ops = collect(values(Tokenize.UNICODE_OPS_REVERSE)) @@ -755,35 +794,12 @@ end @test tok("outer", 1).kind==K"outer" end -function test_error(tok, kind) - @test is_error(tok.kind) - @test tok.kind == kind -end - -@testset "token errors" begin - test_error(tok("1.2e2.3",1), K"ErrorInvalidNumericConstant") - test_error(tok("1.2.",1), K"ErrorInvalidNumericConstant") - test_error(tok("1.2.f",1), K"ErrorInvalidNumericConstant") - test_error(tok("0xv",1), K"ErrorInvalidNumericConstant") - test_error(tok("0b3",1), K"ErrorInvalidNumericConstant") - test_error(tok("0op",1), K"ErrorInvalidNumericConstant") - test_error(tok("--",1), K"ErrorInvalidOperator") - - @test toks("1e+") == ["1e+"=>K"ErrorInvalidNumericConstant"] - @test toks("1.0e+") == ["1.0e+"=>K"ErrorInvalidNumericConstant"] - @test toks("0x.") == ["0x."=>K"ErrorInvalidNumericConstant"] - +@testset "invalid operator errors" begin + @test toks("--") == ["--"=>K"ErrorInvalidOperator"] @test toks("1**2") == ["1"=>K"Integer", "**"=>K"Error**", "2"=>K"Integer"] @test toks("a<---b") == ["a"=>K"Identifier", "<---"=>K"ErrorInvalidOperator", "b"=>K"Identifier"] @test toks("a..+b") == ["a"=>K"Identifier", "..+"=>K"ErrorInvalidOperator", "b"=>K"Identifier"] @test toks("a..−b") == ["a"=>K"Identifier", "..−"=>K"ErrorInvalidOperator", "b"=>K"Identifier"] - - @test toks("1.+2") == ["1."=>K"ErrorAmbiguousNumericConstant", "+"=>K"+", "2"=>K"Integer"] - @test toks("1.+ ") == ["1."=>K"ErrorAmbiguousNumericConstant", "+"=>K"+", " "=>K"Whitespace"] - @test toks("1.⤋") == ["1."=>K"ErrorAmbiguousNumericConstant", "⤋"=>K"⤋"] - @test toks("1.?") == ["1."=>K"ErrorAmbiguousNumericConstant", "?"=>K"?"] - - @test toks("\x00") == ["\x00"=>K"ErrorUnknownCharacter"] end @testset "hat suffix" begin @@ -922,6 +938,8 @@ end end @testset "invalid UTF-8 characters" begin + @test onlytok("\x00") == K"ErrorUnknownCharacter" + bad_chars = [ first("\xe2") # malformed first("\xc0\x9b") # overlong From b6cbfd1f348ee9e76b65ee2bb28394de3d967cab Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Tue, 14 Feb 2023 09:08:35 -0600 Subject: [PATCH 0576/1109] Add lineno to SourceFile (JuliaLang/JuliaSyntax.jl#191) Co-authored-by: c42f --- JuliaSyntax/src/source_files.jl | 35 ++++++++++++++++++-------------- JuliaSyntax/test/source_files.jl | 17 ++++++++++++++++ 2 files changed, 37 insertions(+), 15 deletions(-) diff --git a/JuliaSyntax/src/source_files.jl b/JuliaSyntax/src/source_files.jl index a756610f7a78c..fe78185ca8eb5 100644 --- a/JuliaSyntax/src/source_files.jl +++ b/JuliaSyntax/src/source_files.jl @@ -1,7 +1,9 @@ """ - SourceFile(code [; filename=nothing]) + SourceFile(code [; filename=nothing, first_line=1]) -A UTF-8 source code string with associated file name and indexing structures. +A UTF-8 source code string with associated file name and line number. + +`SourceFile` stores the character positions of line starts to facilitate indexing. """ struct SourceFile # We use `code::String` for now but it could be some other UTF-8 based @@ -11,11 +13,12 @@ struct SourceFile # https://en.wikipedia.org/wiki/Rope_(data_structure) code::String filename::Union{Nothing,String} + first_line::Int # String index of start of every line line_starts::Vector{Int} end -function SourceFile(code::AbstractString; filename=nothing) +function SourceFile(code::AbstractString; filename=nothing, first_line=1) line_starts = Int[1] for i in eachindex(code) # The line is considered to start after the `\n` @@ -25,31 +28,33 @@ function SourceFile(code::AbstractString; filename=nothing) if isempty(code) || last(code) != '\n' push!(line_starts, ncodeunits(code)+1) end - SourceFile(code, filename, line_starts) + SourceFile(code, filename, first_line, line_starts) end -function SourceFile(; filename) - SourceFile(read(filename, String); filename=filename) +function SourceFile(; filename, kwargs...) + SourceFile(read(filename, String); filename=filename, kwargs...) end # Get line number of the given byte within the code -function source_line(source::SourceFile, byte_index) - line = searchsortedlast(source.line_starts, byte_index) - return (line < lastindex(source.line_starts)) ? line : line-1 +function source_line_index(source::SourceFile, byte_index) + lineidx = searchsortedlast(source.line_starts, byte_index) + return (lineidx < lastindex(source.line_starts)) ? lineidx : lineidx-1 end +_source_line(source::SourceFile, lineidx) = lineidx + source.first_line - 1 +source_line(source::SourceFile, byte_index) = _source_line(source, source_line_index(source, byte_index)) """ Get line number and character within the line at the given byte index. """ function source_location(source::SourceFile, byte_index) - line = source_line(source, byte_index) - i = source.line_starts[line] + lineidx = source_line_index(source, byte_index) + i = source.line_starts[lineidx] column = 1 while i < byte_index i = nextind(source.code, i) column += 1 end - line, column + _source_line(source, lineidx), column end """ @@ -58,9 +63,9 @@ Get byte range of the source line at byte_index, buffered by """ function source_line_range(source::SourceFile, byte_index; context_lines_before=0, context_lines_after=0) - line = source_line(source, byte_index) - fbyte = source.line_starts[max(line-context_lines_before, 1)] - lbyte = source.line_starts[min(line+1+context_lines_after, end)] - 1 + lineidx = source_line_index(source, byte_index) + fbyte = source.line_starts[max(lineidx-context_lines_before, 1)] + lbyte = source.line_starts[min(lineidx+1+context_lines_after, end)] - 1 fbyte,lbyte end diff --git a/JuliaSyntax/test/source_files.jl b/JuliaSyntax/test/source_files.jl index 9907192612e12..88a1cad82521f 100644 --- a/JuliaSyntax/test/source_files.jl +++ b/JuliaSyntax/test/source_files.jl @@ -9,4 +9,21 @@ @test source_location(SourceFile("a\nb\n"), 3) == (2,1) @test source_location(SourceFile("a\nb\n"), 4) == (2,2) @test source_location(SourceFile("a\nb\n"), 5) == (2,3) + + @test source_location(SourceFile("a"; first_line=7), 1) == (7,1) + @test source_location(SourceFile("a"; first_line=7), 2) == (7,2) + + @test source_location(SourceFile("a\n"; first_line=7), 2) == (7,2) + @test source_location(SourceFile("a\n"; first_line=7), 3) == (7,3) + + @test source_location(SourceFile("a\nb\n"; first_line=7), 2) == (7,2) + @test source_location(SourceFile("a\nb\n"; first_line=7), 3) == (8,1) + @test source_location(SourceFile("a\nb\n"; first_line=7), 4) == (8,2) + @test source_location(SourceFile("a\nb\n"; first_line=7), 5) == (8,3) + + mktemp() do path, io + write(io, "a\n") + @test source_location(SourceFile(; filename=path), 1) == (1,1) + @test source_location(SourceFile(; filename=path, first_line=7), 1) == (7,1) + end end From c0d9a5df99e026ade101242caee6c468c7868633 Mon Sep 17 00:00:00 2001 From: c42f Date: Wed, 15 Feb 2023 20:16:28 +1000 Subject: [PATCH 0577/1109] Fix for operator-named macros: parse `@+x` as `@+ x` (JuliaLang/JuliaSyntax.jl#197) Allow operator-named macros to be used without spaces. --- JuliaSyntax/src/parser.jl | 23 ++++++++++++++++------- JuliaSyntax/test/parser.jl | 32 +++++++++----------------------- 2 files changed, 25 insertions(+), 30 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 8ce172513a870..6318054a5496f 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -1466,7 +1466,13 @@ function parse_identifier_or_interpolate(ps::ParseState) end # Parses a chain of sufficies at function call precedence, leftmost binding -# tightest. +# tightest. This handles +# * Bracketed calls like a() b[] c{} +# * Field access like a.b.c +# - Various dotted syntax like f.() and f.:x +# * Adjoint suffix like a' +# * String macros like a"str" b"""str""" c`str` d```str``` +# # f(a).g(b) ==> (call (. (call f a) (quote g)) b) # # flisp: parse-call-chain, parse-call-with-initial-ex @@ -1487,15 +1493,23 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) maybe_strmac_1 = false t = peek_token(ps) k = kind(t) - if is_macrocall && (preceding_whitespace(t) || is_closing_token(ps, k)) + if !is_macrocall && ps.space_sensitive && preceding_whitespace(t) && + k in KSet"( [ { \" \"\"\" ` ```" + # [f (x)] ==> (hcat f x) + # [f x] ==> (hcat f x) + break + elseif is_macrocall && (preceding_whitespace(t) || !(k in KSet"( [ { ' .")) # Macro calls with space-separated arguments # @foo a b ==> (macrocall @foo a b) # @foo (x) ==> (macrocall @foo x) # @foo (x,y) ==> (macrocall @foo (tuple x y)) # [@foo x] ==> (vect (macrocall @foo x)) + # [@foo] ==> (vect (macrocall @foo)) # @var"#" a ==> (macrocall (var @#) a) # A.@x y ==> (macrocall (. A (quote @x)) y) # A.@var"#" a ==> (macrocall (. A (quote (var @#))) a) + # @+x y ==> (macrocall @+ x y) + # A.@.x ==> (macrocall (. A (quote @.)) x) fix_macro_name_kind!(ps, macro_name_position) let ps = with_space_sensitive(ps) # Space separated macro arguments @@ -1523,11 +1537,6 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) emit(ps, mark, K"macrocall") end break - elseif (ps.space_sensitive && preceding_whitespace(t) && - k in KSet"( [ { \ Char \" \"\"\" ` ```") - # [f (x)] ==> (hcat f x) - # [f x] ==> (hcat f x) - break elseif k == K"(" # f(a,b) ==> (call f a b) # f(a=1; b=2) ==> (call f (= a 1) (parameters (= b 2))) diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index a01b9cc2df2c7..f95f176e9ed03 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -306,6 +306,9 @@ tests = [ "f(a).g(b)" => "(call (. (call f a) (quote g)) b)" "\$A.@x" => "(macrocall (. (\$ A) (quote @x)))" + # non-errors in space sensitive contexts + "[f (x)]" => "(hcat f x)" + "[f x]" => "(hcat f x)" # space separated macro calls "@foo a b" => "(macrocall @foo a b)" "@foo (x)" => "(macrocall @foo x)" @@ -313,9 +316,12 @@ tests = [ "A.@foo a b" => "(macrocall (. A (quote @foo)) a b)" "@A.foo a b" => "(macrocall (. A (quote @foo)) a b)" "[@foo x]" => "(vect (macrocall @foo x))" + "[@foo]" => "(vect (macrocall @foo))" "@var\"#\" a" => "(macrocall (var @#) a)" => Expr(:macrocall, Symbol("@#"), LineNumberNode(1), :a) "A.@x y" => "(macrocall (. A (quote @x)) y)" "A.@var\"#\" a"=> "(macrocall (. A (quote (var @#))) a)" => Expr(:macrocall, Expr(:., :A, QuoteNode(Symbol("@#"))), LineNumberNode(1), :a) + "@+x y" => "(macrocall @+ x y)" + "A.@.x" => "(macrocall (. A (quote @.)) x)" # Macro names "@! x" => "(macrocall @! x)" "@.. x" => "(macrocall @.. x)" @@ -329,9 +335,6 @@ tests = [ "@doc x\n\ny" => "(macrocall @doc x)" "@doc x\nend" => "(macrocall @doc x)" - # non-errors in space sensitive contexts - "[f (x)]" => "(hcat f x)" - "[f x]" => "(hcat f x)" # calls with brackets "f(a,b)" => "(call f a b)" "f(a=1; b=2)" => "(call f (= a 1) (parameters (= b 2)))" => @@ -941,26 +944,9 @@ parseall_test_specs = [ end end -# Known bugs / incompatibilities -broken_tests = [ - JuliaSyntax.parse_atom => [ - """var""\"x""\"""" => "x" - # Operator-named macros without spaces - "@!x" => "(macrocall @! x)" - "@..x" => "(macrocall @.. x)" - "@.x" => "(macrocall @__dot__ x)" - ] -] - -@testset "Broken $production" for (production, test_specs) in broken_tests - @testset "$(repr(input))" for (input,output) in test_specs - if !(input isa AbstractString) - opts,input = input - else - opts = NamedTuple() - end - @test_broken parse_to_sexpr_str(production, input; opts...) == output - end +@testset "Broken tests" begin + # Technically broken. But do we even want this behavior? + @test_broken parse_to_sexpr_str(JuliaSyntax.parse_eq, "var\"\"\"x\"\"\"") == "(var x)" end @testset "Trivia attachment" begin From 850d8f018d9604cb185266c94634cea62307de09 Mon Sep 17 00:00:00 2001 From: Waldir Pimenta Date: Wed, 15 Feb 2023 14:39:08 +0000 Subject: [PATCH 0578/1109] Various improvements to the README (JuliaLang/JuliaSyntax.jl#195) - Slightly reword some passages for clarity - Use punctuation to help reveal sentence structure - Use section links to provide additional context and make the document more cohesive --- JuliaSyntax/README.md | 58 ++++++++++++++++++++++++------------------- 1 file changed, 32 insertions(+), 26 deletions(-) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index 50cb8e1458f5b..33a42cb40a8f3 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -63,10 +63,10 @@ line:col│ byte_range │ tree │ file_name ``` Internally this has a full representation of all syntax trivia (whitespace and -comments) as can be seen with the more raw "green tree" representation with -`GreenNode`. Here ranges on the left are byte ranges, and `✔` flags nontrivia -tokens. Note that the parentheses are trivia in the tree representation, -despite being important for parsing. +comments) as can be seen with the more raw ["green tree"](#raw-syntax-tree--green-tree) +representation with `GreenNode`. Here ranges on the left are byte ranges, and +`✔` flags nontrivia tokens. Note that the parentheses are trivia in the tree +representation, despite being important for parsing. ```julia julia> text = "(x + y)*z" @@ -211,7 +211,7 @@ For lossless parsing the output spans must cover the entire input text. Using * Parent spans are emitted after all their children. These properties make the output spans naturally isomorphic to a -["green tree"](https://ericlippert.com/2012/06/08/red-green-trees/) +["green tree"](#raw-syntax-tree--green-tree) in the terminology of C#'s Roslyn compiler. ### Tree construction @@ -533,6 +533,8 @@ There's arguably a few downsides: # Differences from the flisp parser +_See also the [§ Comparisons to other packages](#comparisons-to-other-packages) section._ + Practically the flisp parser is not quite a classic [recursive descent parser](https://en.wikipedia.org/wiki/Recursive_descent_parser), because it often looks back and modifies the output tree it has already produced. We've @@ -767,6 +769,8 @@ parsing `key=val` pairs inside parentheses. ### Official Julia compiler +_See also the [§ Differences from the flisp parser](#differences-from-the-flisp-parser) section._ + The official Julia compiler frontend lives in the Julia source tree. It's mostly contained in just a few files: * The parser in [src/julia-parser.scm](https://github.com/JuliaLang/julia/blob/9c4b75d7f63d01d12b67aaf7ce8bb4a078825b52/src/julia-parser.scm) @@ -793,41 +797,42 @@ structures and FFI is complex and inefficient. ### JuliaParser.jl [JuliaParser.jl](https://github.com/JuliaLang/JuliaParser.jl) -was a direct port of Julia's flisp reference parser but was abandoned around -Julia 0.5 or so. However it doesn't support lossless parsing and doing so would -amount to a full rewrite. Given the divergence with the flisp reference parser -since Julia-0.5, it seemed better just to start with the reference parser -instead. +was a direct port of Julia's flisp reference parser, but was abandoned around +Julia 0.5 or so. Furthermore, it doesn't support lossless parsing, and adding +that feature would amount to a full rewrite. Given its divergence with the flisp +reference parser since Julia-0.5, it seemed better just to start anew from the +reference parser instead. ### Tokenize.jl [Tokenize.jl](https://github.com/JuliaLang/Tokenize.jl) is a fast lexer for Julia code. The code from Tokenize has been imported and used in JuliaSyntax, with some major modifications as discussed in -the lexer implementation section. +the [lexer implementation](#lexing) section. ### CSTParser.jl [CSTParser.jl](https://github.com/julia-vscode/CSTParser.jl) is a ([mostly?](https://github.com/domluna/JuliaFormatter.jl/issues/52#issuecomment-529945126)) -lossless parser with goals quite similar to JuliaParser and used extensively in -the VSCode / LanguageServer / JuliaFormatter ecosystem. CSTParser is very useful -but I do find the implementation hard to understand and I wanted to try a fresh -approach with a focus on: +lossless parser with goals quite similar to JuliaParser. It is used extensively +in the VSCode / LanguageServer / JuliaFormatter ecosystem. CSTParser is very +useful, but I do find the implementation hard to understand, and I wanted to try +a fresh approach with a focus on: -* "Production readyness": Good docs, tests, diagnostics and maximum similarity +* "Production readiness": Good docs, tests, diagnostics and maximum similarity with the flisp parser, with the goal of getting the new parser into `Core`. * Learning from the latest ideas about composable parsing and data structures from outside Julia. In particular the implementation of `rust-analyzer` is - very clean, well documented, and a great source of inspiration. + very clean, well documented, and was a great source of inspiration. * Composability of tree data structures — I feel like the trees should be - layered somehow with a really lightweight green tree at the most basic level, - similar to Roslyn or rust-analyzer. In comparison CSTParser uses a more heavy - weight non-layered data structure. Alternatively or additionally, have a - common tree API with many concrete task-specific implementations. + layered somehow with a really lightweight [green tree](#raw-syntax-tree--green-tree) + at the most basic level, similar to Roslyn or rust-analyzer. In comparison, + CSTParser uses a more heavyweight non-layered data structure. Alternatively or + additionally, have a common tree API with many concrete task-specific + implementations. A big benefit of the JuliaSyntax parser is that it separates the parser code -from the tree data structures entirely which should give a lot of flexibility +from the tree data structures entirely, which should give a lot of flexibility in experimenting with various tree representations. I also want JuliaSyntax to tackle macro expansion and other lowering steps, and @@ -840,12 +845,12 @@ Using a modern production-ready parser generator like `tree-sitter` is an interesting option and some progress has already been made in [tree-sitter-julia](https://github.com/tree-sitter/tree-sitter-julia). But I feel like the grammars for parser generators are only marginally more -expressive than writing the parser by hand after accounting for the effort +expressive than writing the parser by hand, after accounting for the effort spent on the weird edge cases of a real language and writing the parser's tests and "supporting code". -On the other hand a hand-written parser is completely flexible and can be -mutually understood with the reference implementation so I chose that approach +On the other hand, a hand-written parser is completely flexible and can be +mutually understood with the reference implementation, so I chose that approach for JuliaSyntax. # Resources @@ -1020,7 +1025,8 @@ work flows: ### Raw syntax tree / Green tree -Raw syntax tree (or "Green tree" in the terminology from Roslyn) +Raw syntax tree (or ["Green tree"](https://ericlippert.com/2012/06/08/red-green-trees/) +in the terminology from Roslyn) We want GreenNode to be * *structurally minimal* — For efficiency and generality From 379c853e58913fb590ae58f2f02f34ce7e2df3a7 Mon Sep 17 00:00:00 2001 From: c42f Date: Thu, 16 Feb 2023 15:04:56 +1000 Subject: [PATCH 0579/1109] Fix `try catch else` Expr conversion (JuliaLang/JuliaSyntax.jl#198) This was incorrect when not including a finally clause. --- JuliaSyntax/src/expr.jl | 2 +- JuliaSyntax/test/expr.jl | 34 ++++++++++++++++++++++++++++++++++ JuliaSyntax/test/test_utils.jl | 2 +- 3 files changed, 36 insertions(+), 2 deletions(-) diff --git a/JuliaSyntax/src/expr.jl b/JuliaSyntax/src/expr.jl index 87a7f59b92264..61ec2e75239ef 100644 --- a/JuliaSyntax/src/expr.jl +++ b/JuliaSyntax/src/expr.jl @@ -213,7 +213,7 @@ function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, end # At this point args is # [try_block catch_var catch_block] - if finally_ !== false + if finally_ !== false || else_ !== false push!(args, finally_) end if else_ !== false diff --git a/JuliaSyntax/test/expr.jl b/JuliaSyntax/test/expr.jl index 11c5025a96a37..37f22246501b0 100644 --- a/JuliaSyntax/test/expr.jl +++ b/JuliaSyntax/test/expr.jl @@ -261,4 +261,38 @@ @test parse(Expr, "@.") == Expr(:macrocall, Symbol("@__dot__"), LineNumberNode(1)) @test parse(Expr, "using A: @.") == Expr(:using, Expr(Symbol(":"), Expr(:., :A), Expr(:., Symbol("@__dot__")))) end + + @testset "try" begin + @test parse(Expr, "try x catch e; y end") == + Expr(:try, + Expr(:block, LineNumberNode(1), :x), + :e, + Expr(:block, LineNumberNode(1), :y)) + @test parse(Expr, "try x finally y end") == + Expr(:try, + Expr(:block, LineNumberNode(1), :x), + false, + false, + Expr(:block, LineNumberNode(1), :y)) + @test parse(Expr, "try x catch e; y finally z end") == + Expr(:try, + Expr(:block, LineNumberNode(1), :x), + :e, + Expr(:block, LineNumberNode(1), :y), + Expr(:block, LineNumberNode(1), :z)) + @test parse(Expr, "try x catch e; y else z end", version=v"1.8") == + Expr(:try, + Expr(:block, LineNumberNode(1), :x), + :e, + Expr(:block, LineNumberNode(1), :y), + false, + Expr(:block, LineNumberNode(1), :z)) + @test parse(Expr, "try x catch e; y else z finally w end", version=v"1.8") == + Expr(:try, + Expr(:block, LineNumberNode(1), :x), + :e, + Expr(:block, LineNumberNode(1), :y), + Expr(:block, LineNumberNode(1), :w), + Expr(:block, LineNumberNode(1), :z)) + end end diff --git a/JuliaSyntax/test/test_utils.jl b/JuliaSyntax/test/test_utils.jl index 45058acdf1666..5a9dca4832fd5 100644 --- a/JuliaSyntax/test/test_utils.jl +++ b/JuliaSyntax/test/test_utils.jl @@ -267,7 +267,7 @@ function reduce_test(tree::SyntaxNode) end function reduce_test(text::AbstractString) - tree, = parseall(SyntaxNode, text) + tree = parseall(SyntaxNode, text) reduce_test(tree) end From b9a00a43aaadfc4d652dfc40092add91a81daab0 Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Sat, 18 Feb 2023 08:10:50 -0600 Subject: [PATCH 0580/1109] `first_line` support in `parse` (JuliaLang/JuliaSyntax.jl#200) --- JuliaSyntax/src/parser_api.jl | 10 +++++----- JuliaSyntax/src/syntax_tree.jl | 4 ++-- JuliaSyntax/test/parser_api.jl | 4 ++-- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/JuliaSyntax/src/parser_api.jl b/JuliaSyntax/src/parser_api.jl index 6d918c3c24cf0..a3f29fde1c8f2 100644 --- a/JuliaSyntax/src/parser_api.jl +++ b/JuliaSyntax/src/parser_api.jl @@ -9,8 +9,8 @@ struct ParseError <: Exception diagnostics::Vector{Diagnostic} end -function ParseError(stream::ParseStream; filename=nothing) - source = SourceFile(sourcetext(stream), filename=filename) +function ParseError(stream::ParseStream; kws...) + source = SourceFile(sourcetext(stream); kws...) ParseError(source, stream.diagnostics) end @@ -72,7 +72,7 @@ function parse!(::Type{TreeType}, io::IO; end function _parse(rule::Symbol, need_eof::Bool, ::Type{T}, text, index=1; version=VERSION, - ignore_trivia=true, filename=nothing, ignore_errors=false, + ignore_trivia=true, filename=nothing, first_line=1, ignore_errors=false, ignore_warnings=ignore_errors) where {T} stream = ParseStream(text, index; version=version) if ignore_trivia && rule != :toplevel @@ -88,14 +88,14 @@ function _parse(rule::Symbol, need_eof::Bool, ::Type{T}, text, index=1; version= end if (!ignore_errors && any_error(stream.diagnostics)) || (!ignore_warnings && !isempty(stream.diagnostics)) - throw(ParseError(stream, filename=filename)) + throw(ParseError(stream, filename=filename, first_line=first_line)) end # TODO: Figure out a more satisfying solution to the wrap_toplevel_as_kind # mess that we've got here. # * It's kind of required for GreenNode, as GreenNode only records spans, # not absolute positions. # * Dropping it would be ok for SyntaxNode and Expr... - tree = build_tree(T, stream; wrap_toplevel_as_kind=K"toplevel", filename=filename) + tree = build_tree(T, stream; wrap_toplevel_as_kind=K"toplevel", filename=filename, first_line=first_line) tree, last_byte(stream) + 1 end diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index cf6484bbbfeb7..1a1f37e91102f 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -214,9 +214,9 @@ function Base.push!(node::SyntaxNode, child::SyntaxNode) push!(args, child) end -function build_tree(::Type{SyntaxNode}, stream::ParseStream; filename=nothing, kws...) +function build_tree(::Type{SyntaxNode}, stream::ParseStream; filename=nothing, first_line=1, kws...) green_tree = build_tree(GreenNode, stream; kws...) - source = SourceFile(sourcetext(stream), filename=filename) + source = SourceFile(sourcetext(stream), filename=filename, first_line=first_line) SyntaxNode(source, green_tree, first_byte(stream)) end diff --git a/JuliaSyntax/test/parser_api.jl b/JuliaSyntax/test/parser_api.jl index e6f8d26c8a390..3e25f3225a041 100644 --- a/JuliaSyntax/test/parser_api.jl +++ b/JuliaSyntax/test/parser_api.jl @@ -27,8 +27,8 @@ @test parse(Expr, "[a ;; b]", version=v"1.7") == Expr(:ncat, 2, :a, :b) # filename - @test JuliaSyntax.parse(Expr, "begin\na\nend", filename="foo.jl") == - Expr(:block, LineNumberNode(2, Symbol("foo.jl")), :a) + @test JuliaSyntax.parse(Expr, "begin\na\nend", filename="foo.jl", first_line=55) == + Expr(:block, LineNumberNode(56, Symbol("foo.jl")), :a) # ignore_trivia @test parseatom(Expr, " x ", ignore_trivia=true) == :x From 7c6a45e4e57b85327cfe3d7cf32a24dbf1197ce0 Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Sat, 18 Feb 2023 08:20:17 -0600 Subject: [PATCH 0581/1109] Support `begin/end` indexing of `SourceFile` (JuliaLang/JuliaSyntax.jl#201) --- JuliaSyntax/src/source_files.jl | 3 +++ JuliaSyntax/test/source_files.jl | 10 ++++++++++ 2 files changed, 13 insertions(+) diff --git a/JuliaSyntax/src/source_files.jl b/JuliaSyntax/src/source_files.jl index fe78185ca8eb5..66e960bbeac30 100644 --- a/JuliaSyntax/src/source_files.jl +++ b/JuliaSyntax/src/source_files.jl @@ -109,6 +109,9 @@ function Base.getindex(source::SourceFile, i::Int) source.code[i] end +Base.firstindex(source::SourceFile) = firstindex(source.code) +Base.lastindex(source::SourceFile) = lastindex(source.code) + """ sourcetext(source::SourceFile) diff --git a/JuliaSyntax/test/source_files.jl b/JuliaSyntax/test/source_files.jl index 88a1cad82521f..b40f281897726 100644 --- a/JuliaSyntax/test/source_files.jl +++ b/JuliaSyntax/test/source_files.jl @@ -26,4 +26,14 @@ @test source_location(SourceFile(; filename=path), 1) == (1,1) @test source_location(SourceFile(; filename=path, first_line=7), 1) == (7,1) end + + @test SourceFile("a\nb\n")[1:2] == "a\n" + @test SourceFile("a\nb\n")[3:end] == "b\n" + if Base.VERSION >= v"1.4" + # Protect the `[begin` from being viewed by the parser on older Julia versions + @test eval(Meta.parse("""SourceFile("a\nb\n")[begin:end]""")) == "a\nb\n" + end + + # unicode + @test SourceFile("αβ")[1:2] == "α" end From 63cc013e6106153e67f47dd5d87ded2931c9b5b7 Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Sat, 18 Feb 2023 09:19:52 -0600 Subject: [PATCH 0582/1109] Submit coverage reports to codecov and add badge (JuliaLang/JuliaSyntax.jl#203) --- JuliaSyntax/.github/workflows/CI.yml | 4 ++++ JuliaSyntax/.gitignore | 3 ++- JuliaSyntax/README.md | 3 ++- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/JuliaSyntax/.github/workflows/CI.yml b/JuliaSyntax/.github/workflows/CI.yml index 235c2d3f81183..8b23d64d028c7 100644 --- a/JuliaSyntax/.github/workflows/CI.yml +++ b/JuliaSyntax/.github/workflows/CI.yml @@ -67,6 +67,10 @@ jobs: ${{ runner.os }}- - uses: julia-actions/julia-buildpkg@v1 - uses: julia-actions/julia-runtest@v1 + - uses: julia-actions/julia-processcoverage@latest + - uses: codecov/codecov-action@v3 + with: + file: lcov.info test_sysimage: name: JuliaSyntax sysimage build - ${{ github.event_name }} runs-on: ubuntu-latest diff --git a/JuliaSyntax/.gitignore b/JuliaSyntax/.gitignore index e3f57ade45012..c5f3e51da58b8 100644 --- a/JuliaSyntax/.gitignore +++ b/JuliaSyntax/.gitignore @@ -1,3 +1,4 @@ /Manifest.toml /tools/pkgs -/tools/logs.txt \ No newline at end of file +/tools/logs.txt +*.cov diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index 33a42cb40a8f3..0839aaea9f2d4 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -1,6 +1,7 @@ # JuliaSyntax [![Build Status](https://github.com/c42f/JuliaSyntax.jl/workflows/CI/badge.svg)](https://github.com/c42f/JuliaSyntax.jl/actions) +[![codecov.io](http://codecov.io/github/JuliaLang/JuliaSyntax.jl/coverage.svg?branch=main)](http://codecov.io/github/JuliaLang/JuliaSyntax.jl?branch=main) A Julia frontend, written in Julia. @@ -483,7 +484,7 @@ The same goes for command strings which are always wrapped in `K"cmdstring"` regardless of whether they have multiple pieces (due to triple-quoted dedenting) or otherwise. -### No desugaring of the closure in do blocks +### No desugaring of the closure in do blocks The reference parser represents `do` syntax with a closure for the second argument. That is, From 69e2bdf4095809e7d012992788ccb35ba5bd8187 Mon Sep 17 00:00:00 2001 From: c42f Date: Sun, 19 Feb 2023 16:18:26 +1000 Subject: [PATCH 0583/1109] Enable parsing of Base tests (JuliaLang/JuliaSyntax.jl#205) The only weird case here is Base's test/syntax.jl which contains some pretty weird syntax cases. Cases where we don't really want to parse the same... --- JuliaSyntax/test/parse_packages.jl | 22 ++++++++++--- JuliaSyntax/test/test_utils.jl | 52 +++++++++++++++++++----------- 2 files changed, 50 insertions(+), 24 deletions(-) diff --git a/JuliaSyntax/test/parse_packages.jl b/JuliaSyntax/test/parse_packages.jl index 5ea0a07024dc4..61aad92247b7a 100644 --- a/JuliaSyntax/test/parse_packages.jl +++ b/JuliaSyntax/test/parse_packages.jl @@ -23,14 +23,26 @@ end test_parse_all_in_path(base_path) end -if haskey(ENV, "PARSE_BASE_TEST") -# TODO: Turn on by default - base_tests_path = joinpath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "test") @testset "Parse Base tests at $base_tests_path" begin - test_parse_all_in_path(base_tests_path) -end + for f in find_source_in_path(base_tests_path) + @testset "Parse $(relpath(f, base_tests_path))" begin + # In julia-1.6, test/copy.jl had spurious syntax which became the + # multidimensional array syntax in 1.7. + endswith(f, "copy.jl") && v"1.6" <= VERSION < v"1.7" && continue + + # syntax.jl has some intentially weird syntax which we parse + # differently than the flisp parser, and some cases which we've + # decided are syntax errors. + endswith(f, "syntax.jl") && continue + @test parsers_agree_on_file(f) + # TODO: + # exprs_equal = endswith(f, "syntax.jl") ? + # exprs_roughly_equal : exprs_equal_no_linenum + # @test parsers_agree_on_file(f; exprs_equal=exprs_equal) + end + end end @testset "Parse Julia stdlib at $(Sys.STDLIB)" begin diff --git a/JuliaSyntax/test/test_utils.jl b/JuliaSyntax/test/test_utils.jl index 5a9dca4832fd5..ada480404ccca 100644 --- a/JuliaSyntax/test/test_utils.jl +++ b/JuliaSyntax/test/test_utils.jl @@ -99,6 +99,10 @@ function triple_string_roughly_equal(fl_str, str) return true end +function exprs_equal_no_linenum(fl_ex, ex) + remove_all_linenums!(deepcopy(ex)) == remove_all_linenums!(deepcopy(fl_ex)) +end + # Compare Expr from reference parser expression to JuliaSyntax parser, ignoring # differences due to bugs in the reference parser. function exprs_roughly_equal(fl_ex, ex) @@ -143,6 +147,15 @@ function exprs_roughly_equal(fl_ex, ex) if (h == :global || h == :local) && length(args) == 1 && Meta.isexpr(args[1], :tuple) # Allow invalid syntax like `global (x, y)` args = args[1].args + elseif h == :function && Meta.isexpr(fl_args[1], :block) + blockargs = filter(x->!(x isa LineNumberNode), fl_args[1].args) + ps = blockargs[2:end] + for i = 1:length(ps) + if Meta.isexpr(ps[i], :(=)) + ps[i] = Expr(:kw, ps[i].args...) + end + end + fl_args[1] = Expr(:tuple, Expr(:parameters, ps...), blockargs[1]) end if length(fl_args) != length(args) return false @@ -163,7 +176,8 @@ function exprs_roughly_equal(fl_ex, ex) return true end -function parsers_agree_on_file(filename; show_diff=false) +function parsers_agree_on_file(filename; exprs_equal=exprs_equal_no_linenum, + show_diff=false) text = try read(filename, String) catch @@ -185,9 +199,7 @@ function parsers_agree_on_file(filename; show_diff=false) if show_diff && ex != fl_ex show_expr_text_diff(stdout, show, ex, fl_ex) end - return !JuliaSyntax.any_error(stream) && - JuliaSyntax.remove_linenums!(ex) == - JuliaSyntax.remove_linenums!(fl_ex) + return !JuliaSyntax.any_error(stream) && exprs_equal(fl_ex, ex) # Could alternatively use # exprs_roughly_equal(fl_ex, ex) catch exc @@ -218,27 +230,29 @@ end # Check whether a given SyntaxNode converts to the same Expr as the flisp # parser produces from the source text of the node. -function equals_flisp_parse(tree) +function equals_flisp_parse(exprs_equal, tree) node_text = sourcetext(tree) # Reparse with JuliaSyntax. This is a crude way to ensure we're not missing # some context from the parent node. - ex = parseall(Expr, node_text) - fl_ex = fl_parseall(node_text) - if Meta.isexpr(fl_ex, :error) - return true # Something went wrong in reduction; ignore these cases 😬 + fl_ex = fl_parseall(node_text, filename="none") + if Meta.isexpr(fl_ex, :error) || (Meta.isexpr(fl_ex, :toplevel) && + length(fl_ex.args) >= 1 && + Meta.isexpr(fl_ex.args[end], :error)) + return true # Something went wrong in reduction; ignore these cases 😬 end - remove_all_linenums!(ex) == remove_all_linenums!(fl_ex) + ex = parseall(Expr, node_text, filename="none", ignore_errors=true) + exprs_equal(fl_ex, ex) end """ - reduce_test(text::AbstractString) - reduce_test(tree::SyntaxNode) + reduce_test(text::AbstractString; exprs_equal=exprs_equal_no_linenum) + reduce_test(tree::SyntaxNode; exprs_equal=exprs_equal_no_linenum) Select minimal subtrees of `text` or `tree` which are inconsistent between flisp and JuliaSyntax parsers. """ -function reduce_test(failing_subtrees, tree) - if equals_flisp_parse(tree) +function reduce_test(failing_subtrees, tree; exprs_equal=exprs_equal_no_linenum) + if equals_flisp_parse(exprs_equal, tree) return false end if !haschildren(tree) @@ -251,7 +265,7 @@ function reduce_test(failing_subtrees, tree) if is_trivia(child) || !haschildren(child) continue end - had_failing_subtrees |= reduce_test(failing_subtrees, child) + had_failing_subtrees |= reduce_test(failing_subtrees, child; exprs_equal=exprs_equal) end end if !had_failing_subtrees @@ -260,15 +274,15 @@ function reduce_test(failing_subtrees, tree) return true end -function reduce_test(tree::SyntaxNode) +function reduce_test(tree::SyntaxNode; kws...) subtrees = Vector{typeof(tree)}() - reduce_test(subtrees, tree) + reduce_test(subtrees, tree; kws...) subtrees end -function reduce_test(text::AbstractString) +function reduce_test(text::AbstractString; kws...) tree = parseall(SyntaxNode, text) - reduce_test(tree) + reduce_test(tree; kws...) end From 77db0cbb964650b8dab19cdba68092b76064f187 Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Sun, 19 Feb 2023 03:24:45 -0600 Subject: [PATCH 0584/1109] Improve test coverge of source_files, syntax_tree (JuliaLang/JuliaSyntax.jl#204) These two files are particularly affected by recent and upcoming changes (e.g., JuliaLang/JuliaSyntax.jl#193). This adds a bit more coverage as a guard against breakage. --- JuliaSyntax/src/syntax_tree.jl | 8 ++++---- JuliaSyntax/test/source_files.jl | 12 ++++++++++++ JuliaSyntax/test/syntax_tree.jl | 19 ++++++++++++++++++- 3 files changed, 34 insertions(+), 5 deletions(-) diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index 1a1f37e91102f..8df657e08f9bc 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -286,10 +286,10 @@ end """ Print the code, highlighting the part covered by `node` at tree `path`. """ -function highlight(code::String, node, path::Int...; color=(40,40,70)) +function highlight(io::IO, code::String, node, path::Int...; color=(40,40,70)) node, p, span = child_position_span(node, path...) q = p + span - print(stdout, code[1:p-1]) - _printstyled(stdout, code[p:q-1]; bgcolor=color) - print(stdout, code[q:end]) + print(io, code[1:p-1]) + _printstyled(io, code[p:q-1]; bgcolor=color) + print(io, code[q:end]) end diff --git a/JuliaSyntax/test/source_files.jl b/JuliaSyntax/test/source_files.jl index b40f281897726..54bcccde48ae3 100644 --- a/JuliaSyntax/test/source_files.jl +++ b/JuliaSyntax/test/source_files.jl @@ -26,7 +26,9 @@ @test source_location(SourceFile(; filename=path), 1) == (1,1) @test source_location(SourceFile(; filename=path, first_line=7), 1) == (7,1) end +end +@testset "SourceFile position indexing" begin @test SourceFile("a\nb\n")[1:2] == "a\n" @test SourceFile("a\nb\n")[3:end] == "b\n" if Base.VERSION >= v"1.4" @@ -36,4 +38,14 @@ # unicode @test SourceFile("αβ")[1:2] == "α" + @test SourceFile("αβ")[3] == 'β' +end + +@testset "SourceFile printing and text extraction" begin + srcf = SourceFile("module Foo\nend") + @test sprint(show, MIME("text/plain"), srcf) == """ + ## SourceFile ## + module Foo + end""" + @test sourcetext(srcf) == "module Foo\nend" end diff --git a/JuliaSyntax/test/syntax_tree.jl b/JuliaSyntax/test/syntax_tree.jl index 3dc69f91e9a0d..52b5732f82b26 100644 --- a/JuliaSyntax/test/syntax_tree.jl +++ b/JuliaSyntax/test/syntax_tree.jl @@ -1,6 +1,7 @@ @testset "SyntaxNode" begin # Child access - t = parse(SyntaxNode, "a*b + c") + tt = "a*b + c" + t = parse(SyntaxNode, tt) @test sourcetext(child(t, 1)) == "a*b" @test sourcetext(child(t, 1, 1)) == "a" @@ -9,10 +10,26 @@ @test sourcetext(child(t, 2)) == "+" @test sourcetext(child(t, 3)) == "c" + @test JuliaSyntax.first_byte(child(t, 2)) == findfirst(==('+'), tt) + # Child indexing @test t[1] === child(t, 1) @test t[1, 1] === child(t, 1, 1) @test t[end] === child(t, 3) # Unfortunately, can't make t[1, end] work # as `lastindex(t, 2)` isn't well defined + + @test sprint(show, t) == "(call-i (call-i a * b) + c)" + str = sprint(show, MIME("text/plain"), t) + # These tests are deliberately quite relaxed to avoid being too specific about display style + @test occursin("line:col", str) + @test occursin("call-i", str) + @test sprint(JuliaSyntax.highlight, tt, t, 1, 3) == "a*\e[48;2;40;40;70mb\e[0;0m + c" + @test sprint(JuliaSyntax.highlight, tt, t.raw, 5) == "a*b + \e[48;2;40;40;70mc\e[0;0m" + + node = parse(SyntaxNode, "f()") + push!(node, parse(SyntaxNode, "x")) + @test length(children(node)) == 2 + node[2] = parse(SyntaxNode, "y") + @test sourcetext(child(node, 2)) == "y" end From e585f636c772e0e665114cb8b592c9920a2e2eb1 Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Sun, 19 Feb 2023 06:08:51 -0600 Subject: [PATCH 0585/1109] Split SyntaxNode into TreeNode & SyntaxData (JuliaLang/JuliaSyntax.jl#193) Closes JuliaLang/JuliaSyntax.jl#192 Co-authored-by: c42f --- JuliaSyntax/src/syntax_tree.jl | 72 ++++++++++++++++++++++----------- JuliaSyntax/test/syntax_tree.jl | 7 ++++ 2 files changed, 55 insertions(+), 24 deletions(-) diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index 8df657e08f9bc..93466876a54ee 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -1,20 +1,43 @@ #------------------------------------------------------------------------------- # AST interface, built on top of raw tree -""" -Design options: -* rust-analyzer treats their version of an untyped syntax node as a cursor into - the green tree. They deallocate aggressively. -""" -mutable struct SyntaxNode +abstract type AbstractSyntaxData end + +mutable struct TreeNode{NodeData} # ? prevent others from using this with NodeData <: AbstractSyntaxData? + parent::Union{Nothing,TreeNode{NodeData}} + children::Union{Nothing,Vector{TreeNode{NodeData}}} + data::Union{Nothing,NodeData} +end + +# Implement "pass-through" semantics for field access: access fields of `data` +# as if they were part of `TreeNode` +function Base.getproperty(node::TreeNode, name::Symbol) + name === :parent && return getfield(node, :parent) + name === :children && return getfield(node, :children) + d = getfield(node, :data) + name === :data && return d + return getproperty(d, name) +end + +function Base.setproperty!(node::TreeNode, name::Symbol, x) + name === :parent && return setfield!(node, :parent, x) + name === :children && return setfield!(node, :children, x) + name === :data && return setfield!(node, :data, x) + d = getfield(node, :data) + return setfield!(d, name, x) +end + +const AbstractSyntaxNode = TreeNode{<:AbstractSyntaxData} + +struct SyntaxData <: AbstractSyntaxData source::SourceFile raw::GreenNode{SyntaxHead} position::Int - parent::Union{Nothing,SyntaxNode} - is_leaf::Bool val::Any end +const SyntaxNode = TreeNode{SyntaxData} + # Value of an error node with no children struct ErrorVal end @@ -106,7 +129,7 @@ function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, position::In @debug "Leaf node of kind $k unknown to SyntaxNode" ErrorVal() end - return SyntaxNode(source, raw, position, nothing, true, val) + return SyntaxNode(nothing, nothing, SyntaxData(source, raw, position, val)) else cs = SyntaxNode[] pos = position @@ -117,7 +140,7 @@ function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, position::In end pos += rawchild.span end - node = SyntaxNode(source, raw, position, nothing, false, cs) + node = SyntaxNode(nothing, cs, SyntaxData(source, raw, position, nothing)) for c in cs c.parent = node end @@ -125,22 +148,23 @@ function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, position::In end end -head(node::SyntaxNode) = head(node.raw) +haschildren(node::TreeNode) = node.children !== nothing +children(node::TreeNode) = (c = node.children; return c === nothing ? () : c) + -haschildren(node::SyntaxNode) = !node.is_leaf -children(node::SyntaxNode) = haschildren(node) ? node.val::Vector{SyntaxNode} : () +head(node::SyntaxNode) = head(node.raw) span(node::SyntaxNode) = span(node.raw) -first_byte(node::SyntaxNode) = node.position -last_byte(node::SyntaxNode) = node.position + span(node) - 1 +first_byte(node::AbstractSyntaxNode) = node.position +last_byte(node::AbstractSyntaxNode) = node.position + span(node) - 1 """ sourcetext(node) Get the full source text of a node. """ -function sourcetext(node::SyntaxNode) +function sourcetext(node::AbstractSyntaxNode) val_range = (node.position-1) .+ (1:span(node)) view(node.source, val_range) end @@ -150,7 +174,7 @@ function interpolate_literal(node::SyntaxNode, val) SyntaxNode(node.source, node.raw, node.position, node.parent, true, val) end -function _show_syntax_node(io, current_filename, node::SyntaxNode, indent) +function _show_syntax_node(io, current_filename, node::AbstractSyntaxNode, indent) fname = node.source.filename line, col = source_location(node.source, node.position) posstr = "$(lpad(line, 4)):$(rpad(col,3))│$(lpad(first_byte(node),6)):$(rpad(last_byte(node),6))│" @@ -173,7 +197,7 @@ function _show_syntax_node(io, current_filename, node::SyntaxNode, indent) end end -function _show_syntax_node_sexpr(io, node::SyntaxNode) +function _show_syntax_node_sexpr(io, node::AbstractSyntaxNode) if !haschildren(node) if is_error(node) print(io, "(", untokenize(head(node)), ")") @@ -193,24 +217,24 @@ function _show_syntax_node_sexpr(io, node::SyntaxNode) end end -function Base.show(io::IO, ::MIME"text/plain", node::SyntaxNode) +function Base.show(io::IO, ::MIME"text/plain", node::AbstractSyntaxNode) println(io, "line:col│ byte_range │ tree │ file_name") _show_syntax_node(io, Ref{Union{Nothing,String}}(nothing), node, "") end -function Base.show(io::IO, ::MIME"text/x.sexpression", node::SyntaxNode) +function Base.show(io::IO, ::MIME"text/x.sexpression", node::AbstractSyntaxNode) _show_syntax_node_sexpr(io, node) end -function Base.show(io::IO, node::SyntaxNode) +function Base.show(io::IO, node::AbstractSyntaxNode) _show_syntax_node_sexpr(io, node) end -function Base.push!(node::SyntaxNode, child::SyntaxNode) +function Base.push!(node::SN, child::SN) where SN<:AbstractSyntaxNode if !haschildren(node) error("Cannot add children") end - args = node.val::Vector{SyntaxNode} + args = children(node) push!(args, child) end @@ -239,7 +263,7 @@ end function setchild!(node::SyntaxNode, path, x) n1 = child(node, path[1:end-1]...) - n1.val[path[end]] = x + n1.children[path[end]] = x end # We can overload multidimensional Base.getindex / Base.setindex! for node diff --git a/JuliaSyntax/test/syntax_tree.jl b/JuliaSyntax/test/syntax_tree.jl index 52b5732f82b26..99f612f1548af 100644 --- a/JuliaSyntax/test/syntax_tree.jl +++ b/JuliaSyntax/test/syntax_tree.jl @@ -27,6 +27,13 @@ @test sprint(JuliaSyntax.highlight, tt, t, 1, 3) == "a*\e[48;2;40;40;70mb\e[0;0m + c" @test sprint(JuliaSyntax.highlight, tt, t.raw, 5) == "a*b + \e[48;2;40;40;70mc\e[0;0m" + # Pass-through field access + node = child(t, 1, 1) + @test node.val === :a + # The specific error text has evolved over Julia versions. Check that it involves `SyntaxData` and immutability + e = try node.val = :q catch e e end + @test occursin("immutable", e.msg) && occursin("SyntaxData", e.msg) + node = parse(SyntaxNode, "f()") push!(node, parse(SyntaxNode, "x")) @test length(children(node)) == 2 From 7a99852ce7c57b9cece44e6a9e2d63b0e6b84b6a Mon Sep 17 00:00:00 2001 From: c42f Date: Mon, 20 Feb 2023 15:42:49 +1000 Subject: [PATCH 0586/1109] Move value_parsing.jl to literal_parsing.jl (JuliaLang/JuliaSyntax.jl#206) --- JuliaSyntax/src/JuliaSyntax.jl | 2 +- JuliaSyntax/src/{value_parsing.jl => literal_parsing.jl} | 0 JuliaSyntax/test/{value_parsing.jl => literal_parsing.jl} | 0 JuliaSyntax/test/runtests.jl | 2 +- 4 files changed, 2 insertions(+), 2 deletions(-) rename JuliaSyntax/src/{value_parsing.jl => literal_parsing.jl} (100%) rename JuliaSyntax/test/{value_parsing.jl => literal_parsing.jl} (100%) diff --git a/JuliaSyntax/src/JuliaSyntax.jl b/JuliaSyntax/src/JuliaSyntax.jl index 6b4f93d4a2fe9..4d0d4e7029f51 100644 --- a/JuliaSyntax/src/JuliaSyntax.jl +++ b/JuliaSyntax/src/JuliaSyntax.jl @@ -17,7 +17,7 @@ include("diagnostics.jl") include("parse_stream.jl") include("parser.jl") include("parser_api.jl") -include("value_parsing.jl") +include("literal_parsing.jl") # Tree data structures include("green_tree.jl") diff --git a/JuliaSyntax/src/value_parsing.jl b/JuliaSyntax/src/literal_parsing.jl similarity index 100% rename from JuliaSyntax/src/value_parsing.jl rename to JuliaSyntax/src/literal_parsing.jl diff --git a/JuliaSyntax/test/value_parsing.jl b/JuliaSyntax/test/literal_parsing.jl similarity index 100% rename from JuliaSyntax/test/value_parsing.jl rename to JuliaSyntax/test/literal_parsing.jl diff --git a/JuliaSyntax/test/runtests.jl b/JuliaSyntax/test/runtests.jl index f1a72da287beb..338470917c3f1 100644 --- a/JuliaSyntax/test/runtests.jl +++ b/JuliaSyntax/test/runtests.jl @@ -53,7 +53,7 @@ include("diagnostics.jl") include("parser_api.jl") include("expr.jl") @testset "Parsing literals from strings" begin - include("value_parsing.jl") + include("literal_parsing.jl") end include("source_files.jl") From 182515f9f3779b890f3a3ef158f77cde3bf57540 Mon Sep 17 00:00:00 2001 From: c42f Date: Mon, 20 Feb 2023 20:30:14 +1000 Subject: [PATCH 0587/1109] Remove byte_range from default printing of SyntaxNode (JuliaLang/JuliaSyntax.jl#207) The byte range is probably most useful for debugging JuliaSyntax than general use. So disable this by default. --- JuliaSyntax/README.md | 16 ++++++++-------- JuliaSyntax/src/syntax_tree.jl | 15 +++++++++------ JuliaSyntax/test/syntax_tree.jl | 24 ++++++++++++++++++++++++ 3 files changed, 41 insertions(+), 14 deletions(-) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index 0839aaea9f2d4..c968b91613d42 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -53,14 +53,14 @@ the `call` has the infix `-i` flag): julia> using JuliaSyntax: JuliaSyntax, SyntaxNode, GreenNode julia> JuliaSyntax.parse(SyntaxNode, "(x + y)*z", filename="foo.jl") -line:col│ byte_range │ tree │ file_name - 1:1 │ 1:9 │[call-i] │foo.jl - 1:2 │ 2:6 │ [call-i] - 1:2 │ 2:2 │ x - 1:4 │ 4:4 │ + - 1:6 │ 6:6 │ y - 1:8 │ 8:8 │ * - 1:9 │ 9:9 │ z +line:col│ tree │ file_name + 1:1 │[call-i] │foo.jl + 1:2 │ [call-i] + 1:2 │ x + 1:4 │ + + 1:6 │ y + 1:8 │ * + 1:9 │ z ``` Internally this has a full representation of all syntax trivia (whitespace and diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index 93466876a54ee..57f36701c9500 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -174,10 +174,13 @@ function interpolate_literal(node::SyntaxNode, val) SyntaxNode(node.source, node.raw, node.position, node.parent, true, val) end -function _show_syntax_node(io, current_filename, node::AbstractSyntaxNode, indent) +function _show_syntax_node(io, current_filename, node::AbstractSyntaxNode, indent, show_byte_offsets) fname = node.source.filename line, col = source_location(node.source, node.position) - posstr = "$(lpad(line, 4)):$(rpad(col,3))│$(lpad(first_byte(node),6)):$(rpad(last_byte(node),6))│" + posstr = "$(lpad(line, 4)):$(rpad(col,3))│" + if show_byte_offsets + posstr *= "$(lpad(first_byte(node),6)):$(rpad(last_byte(node),6))│" + end val = node.val nodestr = haschildren(node) ? "[$(untokenize(head(node)))]" : isa(val, Symbol) ? string(val) : repr(val) @@ -192,7 +195,7 @@ function _show_syntax_node(io, current_filename, node::AbstractSyntaxNode, inden if haschildren(node) new_indent = indent*" " for n in children(node) - _show_syntax_node(io, current_filename, n, new_indent) + _show_syntax_node(io, current_filename, n, new_indent, show_byte_offsets) end end end @@ -217,9 +220,9 @@ function _show_syntax_node_sexpr(io, node::AbstractSyntaxNode) end end -function Base.show(io::IO, ::MIME"text/plain", node::AbstractSyntaxNode) - println(io, "line:col│ byte_range │ tree │ file_name") - _show_syntax_node(io, Ref{Union{Nothing,String}}(nothing), node, "") +function Base.show(io::IO, ::MIME"text/plain", node::AbstractSyntaxNode; show_byte_offsets=false) + println(io, "line:col│$(show_byte_offsets ? " byte_range │" : "") tree │ file_name") + _show_syntax_node(io, Ref{Union{Nothing,String}}(nothing), node, "", show_byte_offsets) end function Base.show(io::IO, ::MIME"text/x.sexpression", node::AbstractSyntaxNode) diff --git a/JuliaSyntax/test/syntax_tree.jl b/JuliaSyntax/test/syntax_tree.jl index 99f612f1548af..d596f25009b5f 100644 --- a/JuliaSyntax/test/syntax_tree.jl +++ b/JuliaSyntax/test/syntax_tree.jl @@ -40,3 +40,27 @@ node[2] = parse(SyntaxNode, "y") @test sourcetext(child(node, 2)) == "y" end + +@testset "SyntaxNode pretty printing" begin + t = parse(SyntaxNode, "f(a*b,\n c)", filename="foo.jl") + @test sprint(show, MIME("text/plain"), t) == """ + line:col│ tree │ file_name + 1:1 │[call] │foo.jl + 1:1 │ f + 1:3 │ [call-i] + 1:3 │ a + 1:4 │ * + 1:5 │ b + 2:3 │ c + """ + @test sprint(io->show(io, MIME("text/plain"), t, show_byte_offsets=true)) == """ + line:col│ byte_range │ tree │ file_name + 1:1 │ 1:11 │[call] │foo.jl + 1:1 │ 1:1 │ f + 1:3 │ 3:5 │ [call-i] + 1:3 │ 3:3 │ a + 1:4 │ 4:4 │ * + 1:5 │ 5:5 │ b + 2:3 │ 10:10 │ c + """ +end From 0746f292897abff6638bc5996a55a5a1c15a4a5d Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Thu, 23 Feb 2023 07:08:24 -0600 Subject: [PATCH 0588/1109] AbstractSyntaxNode generalization & convenience (JuliaLang/JuliaSyntax.jl#208) These add a couple more convenience methods useful for TypedSyntax. --- JuliaSyntax/src/syntax_tree.jl | 7 +++++-- JuliaSyntax/test/syntax_tree.jl | 2 ++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index 57f36701c9500..d97dfd1f5560b 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -152,9 +152,9 @@ haschildren(node::TreeNode) = node.children !== nothing children(node::TreeNode) = (c = node.children; return c === nothing ? () : c) -head(node::SyntaxNode) = head(node.raw) +head(node::AbstractSyntaxNode) = head(node.raw) -span(node::SyntaxNode) = span(node.raw) +span(node::AbstractSyntaxNode) = span(node.raw) first_byte(node::AbstractSyntaxNode) = node.position last_byte(node::AbstractSyntaxNode) = node.position + span(node) - 1 @@ -169,6 +169,9 @@ function sourcetext(node::AbstractSyntaxNode) view(node.source, val_range) end +source_line(node::AbstractSyntaxNode) = source_line(node.source, node.position) +source_location(node::AbstractSyntaxNode) = source_location(node.source, node.position) + function interpolate_literal(node::SyntaxNode, val) @assert kind(node) == K"$" SyntaxNode(node.source, node.raw, node.position, node.parent, true, val) diff --git a/JuliaSyntax/test/syntax_tree.jl b/JuliaSyntax/test/syntax_tree.jl index d596f25009b5f..478095f1e34af 100644 --- a/JuliaSyntax/test/syntax_tree.jl +++ b/JuliaSyntax/test/syntax_tree.jl @@ -11,6 +11,8 @@ @test sourcetext(child(t, 3)) == "c" @test JuliaSyntax.first_byte(child(t, 2)) == findfirst(==('+'), tt) + @test JuliaSyntax.source_line(child(t, 3)) == 1 + @test source_location(child(t, 3)) == (1, 7) # Child indexing @test t[1] === child(t, 1) From 8397c93961b9bbff52ef43d62b4304097bbe1c11 Mon Sep 17 00:00:00 2001 From: c42f Date: Fri, 24 Feb 2023 06:37:54 +1000 Subject: [PATCH 0589/1109] Bump to 0.3.1 and remove old Printf dependency (JuliaLang/JuliaSyntax.jl#209) --- JuliaSyntax/Project.toml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/JuliaSyntax/Project.toml b/JuliaSyntax/Project.toml index 2ee8dba954fce..64708a325a6d0 100644 --- a/JuliaSyntax/Project.toml +++ b/JuliaSyntax/Project.toml @@ -1,7 +1,7 @@ name = "JuliaSyntax" uuid = "70703baa-626e-46a2-a12c-08ffd08c73b4" authors = ["Chris Foster and contributors"] -version = "0.3.0" +version = "0.3.1" [compat] julia = "1.0" @@ -9,8 +9,7 @@ julia = "1.0" [deps] [extras] -Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["Test", "Printf"] +test = ["Test"] From 76a84bfdef6192cf6653825a796f8c238e631936 Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Mon, 27 Feb 2023 07:14:50 -0600 Subject: [PATCH 0590/1109] Support `copy` for TreeNodes (JuliaLang/JuliaSyntax.jl#210) Co-authored-by: c42f --- JuliaSyntax/src/syntax_tree.jl | 15 +++++++++++++++ JuliaSyntax/test/syntax_tree.jl | 8 ++++++++ 2 files changed, 23 insertions(+) diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index d97dfd1f5560b..66cf8757668f5 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -244,6 +244,21 @@ function Base.push!(node::SN, child::SN) where SN<:AbstractSyntaxNode push!(args, child) end +function Base.copy(node::TreeNode) + # copy the container but not the data (ie, deep copy the tree, shallow copy the data). copy(::Expr) is similar + # copy "un-parents" the top-level `node` that you're copying + newnode = typeof(node)(nothing, haschildren(node) ? typeof(node)[] : nothing, copy(node.data)) + for child in children(node) + newchild = copy(child) + newchild.parent = newnode + push!(newnode, newchild) + end + return newnode +end + +# shallow-copy the data +Base.copy(data::SyntaxData) = SyntaxData(data.source, data.raw, data.position, data.val) + function build_tree(::Type{SyntaxNode}, stream::ParseStream; filename=nothing, first_line=1, kws...) green_tree = build_tree(GreenNode, stream; kws...) source = SourceFile(sourcetext(stream), filename=filename, first_line=first_line) diff --git a/JuliaSyntax/test/syntax_tree.jl b/JuliaSyntax/test/syntax_tree.jl index 478095f1e34af..30a77cdc49e02 100644 --- a/JuliaSyntax/test/syntax_tree.jl +++ b/JuliaSyntax/test/syntax_tree.jl @@ -36,6 +36,14 @@ e = try node.val = :q catch e e end @test occursin("immutable", e.msg) && occursin("SyntaxData", e.msg) + # copy + t = parse(SyntaxNode, "a*b + c") + ct = copy(t) + ct.data = nothing + @test ct.data === nothing && t.data !== nothing + @test child(ct, 1).parent === ct + @test child(ct, 1) !== child(t, 1) + node = parse(SyntaxNode, "f()") push!(node, parse(SyntaxNode, "x")) @test length(children(node)) == 2 From 2ba165f9abf27ff2951b55f86128c597b6425c7c Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Mon, 27 Feb 2023 07:16:04 -0600 Subject: [PATCH 0591/1109] Version 0.3.2 (JuliaLang/JuliaSyntax.jl#211) --- JuliaSyntax/Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaSyntax/Project.toml b/JuliaSyntax/Project.toml index 64708a325a6d0..16967150c4aab 100644 --- a/JuliaSyntax/Project.toml +++ b/JuliaSyntax/Project.toml @@ -1,7 +1,7 @@ name = "JuliaSyntax" uuid = "70703baa-626e-46a2-a12c-08ffd08c73b4" authors = ["Chris Foster and contributors"] -version = "0.3.1" +version = "0.3.2" [compat] julia = "1.0" From aff6c6a2f56dbcd6743f18f27bea10758dd0e92e Mon Sep 17 00:00:00 2001 From: c42f Date: Fri, 3 Mar 2023 19:44:05 +1000 Subject: [PATCH 0592/1109] Move some utility/hook functions to more consistent locations --- JuliaSyntax/src/hooks.jl | 98 +++++++++++++++++++++++++++++++--------- JuliaSyntax/src/utils.jl | 81 +++++++-------------------------- 2 files changed, 93 insertions(+), 86 deletions(-) diff --git a/JuliaSyntax/src/hooks.jl b/JuliaSyntax/src/hooks.jl index 2de562a6b7358..031ea801e374d 100644 --- a/JuliaSyntax/src/hooks.jl +++ b/JuliaSyntax/src/hooks.jl @@ -233,28 +233,6 @@ function _core_parser_hook(code, filename, lineno, offset, options) end end -# Call the flisp parser -function _fl_parse_hook(code, filename, lineno, offset, options) - @static if VERSION >= v"1.8.0-DEV.1370" # https://github.com/JuliaLang/julia/pull/43876 - return Core.Compiler.fl_parse(code, filename, lineno, offset, options) - elseif VERSION >= v"1.6" - return Core.Compiler.fl_parse(code, filename, offset, options) - else - if options === :all - ex = Base.parse_input_line(String(code), filename=filename, depwarn=false) - if !Meta.isexpr(ex, :toplevel) - ex = Expr(:toplevel, ex) - end - return ex, sizeof(code) - elseif options === :statement || options == :atom - ex, pos = Meta.parse(code, offset+1, greedy=options==:statement, raise=false) - return ex, pos-1 - else - error("Unknown parse options $options") - end - end -end - # Hack: # Meta.parse() attempts to construct a ParseError from a string if it receives # `Expr(:error)`. Add an override to the ParseError constructor to prevent this. @@ -292,3 +270,79 @@ function enable_in_core!(enable=true; freeze_world_age = true, _set_core_parse_hook(enable ? core_parser_hook : _default_parser) nothing end + + +#------------------------------------------------------------------------------- +# Tools to call the reference flisp parser +# +# Call the flisp parser +function _fl_parse_hook(code, filename, lineno, offset, options) + @static if VERSION >= v"1.8.0-DEV.1370" # https://github.com/JuliaLang/julia/pull/43876 + return Core.Compiler.fl_parse(code, filename, lineno, offset, options) + elseif VERSION >= v"1.6" + return Core.Compiler.fl_parse(code, filename, offset, options) + else + if options === :all + ex = Base.parse_input_line(String(code), filename=filename, depwarn=false) + if !Meta.isexpr(ex, :toplevel) + ex = Expr(:toplevel, ex) + end + return ex, sizeof(code) + elseif options === :statement || options == :atom + ex, pos = Meta.parse(code, offset+1, greedy=options==:statement, raise=false) + return ex, pos-1 + else + error("Unknown parse options $options") + end + end +end + +#------------------------------------------------ +# Copy of the Meta.parse() API, but ensuring that we call the flisp parser +# rather than using Meta.parse() which may be using the JuliaSyntax parser. + +""" +Like Meta.parse() but always call the flisp reference parser. +""" +function fl_parse(str::AbstractString; raise::Bool=true, depwarn::Bool=true) + ex, pos = fl_parse(str, 1, greedy=true, raise=raise, depwarn=depwarn) + if isa(ex,Expr) && ex.head === :error + return ex + end + if pos <= ncodeunits(str) + raise && throw(Meta.ParseError("extra token after end of expression")) + return Expr(:error, "extra token after end of expression") + end + return ex +end + +function fl_parse(str::AbstractString, pos::Integer; greedy::Bool=true, raise::Bool=true, + depwarn::Bool=true) + ex, pos = _fl_parse_string(str, "none", 1, pos, greedy ? :statement : :atom) + if raise && isa(ex,Expr) && ex.head === :error + throw(Meta.ParseError(ex.args[1])) + end + return ex, pos +end + +""" +Like Meta.parseall() but always call the flisp reference parser. +""" +function fl_parseall(text::AbstractString; filename="none", lineno=1) + ex,_ = _fl_parse_string(text, String(filename), lineno, 1, :all) + return ex +end + +function _fl_parse_string(text::AbstractString, filename::AbstractString, + lineno::Integer, index::Integer, options) + if index < 1 || index > ncodeunits(text) + 1 + throw(BoundsError(text, index)) + end + ex, offset::Int = _fl_parse_hook(text, filename, lineno, index-1, options) + ex, offset+1 +end + +# Convenience functions to mirror `JuliaSyntax.parse(Expr, ...)` in simple cases. +fl_parse(::Type{Expr}, args...; kws...) = fl_parse(args...; kws...) +fl_parseall(::Type{Expr}, args...; kws...) = fl_parseall(args...; kws...) + diff --git a/JuliaSyntax/src/utils.jl b/JuliaSyntax/src/utils.jl index bc30e22283c8c..153d845a419c4 100644 --- a/JuliaSyntax/src/utils.jl +++ b/JuliaSyntax/src/utils.jl @@ -34,6 +34,23 @@ macro check(ex, msgs...) return :($(esc(ex)) ? nothing : internal_error($msg)) end +# Really remove line numbers, even from Expr(:toplevel) +remove_linenums!(ex) = ex +function remove_linenums!(ex::Expr) + if ex.head === :block || ex.head === :quote || ex.head === :toplevel + filter!(ex.args) do x + !(isa(x, Expr) && x.head === :line || isa(x, LineNumberNode)) + end + end + for subex in ex.args + subex isa Expr && remove_linenums!(subex) + end + return ex +end + + +#------------------------------------------------------------------------------- +# Text printing/display utils """ Like printstyled, but allows providing RGB colors for true color terminals @@ -61,67 +78,3 @@ function _printstyled(io::IO, text; fgcolor=nothing, bgcolor=nothing) end end -# Really remove line numbers, even from Expr(:toplevel) -remove_linenums!(ex) = ex -function remove_linenums!(ex::Expr) - if ex.head === :block || ex.head === :quote || ex.head === :toplevel - filter!(ex.args) do x - !(isa(x, Expr) && x.head === :line || isa(x, LineNumberNode)) - end - end - for subex in ex.args - subex isa Expr && remove_linenums!(subex) - end - return ex -end - - -#------------------------------------------------------------------------------- -# Copy of the Meta.parse() API, but ensuring that we call the flisp parser -# rather than using Meta.parse() which may be using the JuliaSyntax parser. - -""" -Like Meta.parse() but always call the flisp reference parser. -""" -function fl_parse(str::AbstractString; raise::Bool=true, depwarn::Bool=true) - ex, pos = fl_parse(str, 1, greedy=true, raise=raise, depwarn=depwarn) - if isa(ex,Expr) && ex.head === :error - return ex - end - if pos <= ncodeunits(str) - raise && throw(Meta.ParseError("extra token after end of expression")) - return Expr(:error, "extra token after end of expression") - end - return ex -end - -function fl_parse(str::AbstractString, pos::Integer; greedy::Bool=true, raise::Bool=true, - depwarn::Bool=true) - ex, pos = _fl_parse_string(str, "none", 1, pos, greedy ? :statement : :atom) - if raise && isa(ex,Expr) && ex.head === :error - throw(Meta.ParseError(ex.args[1])) - end - return ex, pos -end - -""" -Like Meta.parseall() but always call the flisp reference parser. -""" -function fl_parseall(text::AbstractString; filename="none", lineno=1) - ex,_ = _fl_parse_string(text, String(filename), lineno, 1, :all) - return ex -end - -function _fl_parse_string(text::AbstractString, filename::AbstractString, - lineno::Integer, index::Integer, options) - if index < 1 || index > ncodeunits(text) + 1 - throw(BoundsError(text, index)) - end - ex, offset::Int = _fl_parse_hook(text, filename, lineno, index-1, options) - ex, offset+1 -end - -# Convenience functions to mirror `JuliaSyntax.parse(Expr, ...)` in simple cases. -fl_parse(::Type{Expr}, args...; kws...) = fl_parse(args...; kws...) -fl_parseall(::Type{Expr}, args...; kws...) = fl_parseall(args...; kws...) - From 8d92c40c97d013d89b6d0db04fdbbe8e387bc5d9 Mon Sep 17 00:00:00 2001 From: c42f Date: Sat, 4 Mar 2023 10:46:08 +1000 Subject: [PATCH 0593/1109] Add docs for the ignore_errors flag --- JuliaSyntax/src/parser_api.jl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/JuliaSyntax/src/parser_api.jl b/JuliaSyntax/src/parser_api.jl index a3f29fde1c8f2..0ea050c46f26a 100644 --- a/JuliaSyntax/src/parser_api.jl +++ b/JuliaSyntax/src/parser_api.jl @@ -104,7 +104,8 @@ _parse_docs = """ version=VERSION, ignore_trivia=true, filename=nothing, - ignore_warnings=false) + ignore_errors=false, + ignore_warnings=ignore_errors) # Or, with the same arguments parseall(...) @@ -131,7 +132,8 @@ tree, if applicable. This will also annotate errors and warnings with the source file name. A `ParseError` will be thrown if any errors or warnings occurred during -parsing. To avoid exceptions due to warnings, use `ignore_warnings=true`. +parsing. To avoid exceptions due to warnings, use `ignore_warnings=true`. To +also avoid exceptions due to errors, use `ignore_errors=true`. """ parse(::Type{T}, text::AbstractString; kws...) where {T} = _parse(:statement, true, T, text; kws...)[1] From 36d96ad6c2893342417d95014d224b322603c6ee Mon Sep 17 00:00:00 2001 From: c42f Date: Fri, 10 Mar 2023 10:47:56 +1000 Subject: [PATCH 0594/1109] Add some basic benchmarks --- JuliaSyntax/test/benchmark.jl | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 JuliaSyntax/test/benchmark.jl diff --git a/JuliaSyntax/test/benchmark.jl b/JuliaSyntax/test/benchmark.jl new file mode 100644 index 0000000000000..b7dc4e0116f6c --- /dev/null +++ b/JuliaSyntax/test/benchmark.jl @@ -0,0 +1,34 @@ +using BenchmarkTools +using JuliaSyntax + +include("test_utils.jl") + +function concat_base() + basedir = joinpath(Sys.BINDIR, "..", "share", "julia", "base") + io = IOBuffer() + for f in find_source_in_path(basedir) + write(io, read(f, String)) + println(io) + end + return String(take!(io)) +end + +all_base_code = concat_base() + +b_ParseStream = @benchmark JuliaSyntax.parse!(JuliaSyntax.ParseStream(all_base_code), rule=:toplevel) +b_GreenNode = @benchmark JuliaSyntax.parseall(JuliaSyntax.GreenNode, all_base_code) +b_SyntaxNode = @benchmark JuliaSyntax.parseall(JuliaSyntax.SyntaxNode, all_base_code) +b_Expr = @benchmark JuliaSyntax.parseall(Expr, all_base_code) + +@info "Benchmarks" ParseStream=b_ParseStream GreenNode=b_GreenNode SyntaxNode=b_SyntaxNode Expr=b_Expr + + +# Allocation profiling +# +# using Profile.Allocs +# using PProf +# Allocs.clear() +# stream = JuliaSyntax.ParseStream(text); +# JuliaSyntax.peek(stream); +# Allocs.@profile sample_rate=1 JuliaSyntax.parse(stream) +# PProf.Allocs.pprof() From e503f8e0c65baf474adb3577d6492976d3929043 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Fri, 10 Mar 2023 09:44:17 +0100 Subject: [PATCH 0595/1109] add a very basic precompile workload (JuliaLang/JuliaSyntax.jl#213) * add a very basic precompile workload Before: ``` julia> @time @eval collect(JuliaSyntax.Tokenize.Lexer("1+1 == 2")) 1.083684 seconds (1.04 M allocations: 63.173 MiB, 0.86% gc time, 99.98% compilation time) ``` After: ``` julia> @time @eval collect(JuliaSyntax.Tokenize.Lexer("1+1 == 2")) 0.016808 seconds (29.64 k allocations: 1.939 MiB, 62.47% compilation ``` Co-authored-by: Shuhei Kadowaki <40514306+aviatesk@users.noreply.github.com> Co-authored-by: c42f --- JuliaSyntax/src/JuliaSyntax.jl | 2 +- JuliaSyntax/src/precompile.jl | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) create mode 100644 JuliaSyntax/src/precompile.jl diff --git a/JuliaSyntax/src/JuliaSyntax.jl b/JuliaSyntax/src/JuliaSyntax.jl index 4d0d4e7029f51..ff3ab13b2d922 100644 --- a/JuliaSyntax/src/JuliaSyntax.jl +++ b/JuliaSyntax/src/JuliaSyntax.jl @@ -26,5 +26,5 @@ include("expr.jl") # Hooks to integrate the parser with Base include("hooks.jl") - +include("precompile.jl") end diff --git a/JuliaSyntax/src/precompile.jl b/JuliaSyntax/src/precompile.jl new file mode 100644 index 0000000000000..f6426532dad4c --- /dev/null +++ b/JuliaSyntax/src/precompile.jl @@ -0,0 +1,5 @@ +# Just parse some file as a precompile workload +let filename = joinpath(@__DIR__, "literal_parsing.jl") + text = read(filename, String) + parseall(Expr, text) +end From 5d79307b80cf3f624c26ae3d4113b3c07716ef53 Mon Sep 17 00:00:00 2001 From: c42f Date: Sat, 11 Mar 2023 20:42:14 +1000 Subject: [PATCH 0596/1109] Rewrite source range `highlight()` (JuliaLang/JuliaSyntax.jl#215) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `highlight()` now always prints whole lines of source code, and can highlight arbitrary ranges using box drawing characters, not just with ANSI colors. Use this to fix diagnostic printing so that it's comprehensible in a non color terminal and so that pasting errors into non-color environments works. The box drawing characters from WGL4 seem like a good balance of: * Relatively compatible because they're very old, dating from DOS era: https://en.wikipedia.org/wiki/Box-drawing_character#DOS - note we also use these for formatting log messages from Logging.ConsoleLogger. * Easy to distinguish from the user's source code I've also found it's helpful to prepend any lines of annotation with a Julia # comment where possible - this ensures that copy+paste into websites with syntax highlighting will highlight the annotations separately from the code. A simple example: julia> (x - (c <--- d)) ERROR: ParseError: (x - (c <--- d)) # └──┘ ── invalid operator @ REPL[48]:1:9 Also some semi-related changes included * Generalized/expanded _printstyled() function * Better diagnostic range for try-without-catch --- JuliaSyntax/src/diagnostics.jl | 78 ++++++------------- JuliaSyntax/src/parse_stream.jl | 7 +- JuliaSyntax/src/parser.jl | 9 ++- JuliaSyntax/src/source_files.jl | 126 ++++++++++++++++++++++++++++++- JuliaSyntax/src/syntax_tree.jl | 24 +++--- JuliaSyntax/src/utils.jl | 62 +++++++++++++-- JuliaSyntax/test/diagnostics.jl | 12 +++ JuliaSyntax/test/parser_api.jl | 26 +++++++ JuliaSyntax/test/runtests.jl | 2 + JuliaSyntax/test/source_files.jl | 112 +++++++++++++++++++++++++++ JuliaSyntax/test/syntax_tree.jl | 4 +- JuliaSyntax/test/test_utils.jl | 3 +- JuliaSyntax/test/utils.jl | 17 +++++ 13 files changed, 400 insertions(+), 82 deletions(-) create mode 100644 JuliaSyntax/test/utils.jl diff --git a/JuliaSyntax/src/diagnostics.jl b/JuliaSyntax/src/diagnostics.jl index ca98b3684303a..404972514b9b7 100644 --- a/JuliaSyntax/src/diagnostics.jl +++ b/JuliaSyntax/src/diagnostics.jl @@ -40,6 +40,18 @@ end first_byte(d::Diagnostic) = d.first_byte last_byte(d::Diagnostic) = d.last_byte is_error(d::Diagnostic) = d.level == :error +Base.range(d::Diagnostic) = first_byte(d):last_byte(d) + +# Make relative path into a file URL +function _file_url(filename) + @static if Sys.iswindows() + # TODO: Test this with windows terminal + path = replace(abspath(filename), '\\'=>'/') + else + path = abspath(filename) + end + "file://$(path)" +end function show_diagnostic(io::IO, diagnostic::Diagnostic, source::SourceFile) color,prefix = diagnostic.level == :error ? (:light_red, "Error") : @@ -49,76 +61,34 @@ function show_diagnostic(io::IO, diagnostic::Diagnostic, source::SourceFile) line, col = source_location(source, first_byte(diagnostic)) linecol = "$line:$col" filename = source.filename + file_href = nothing if !isnothing(filename) locstr = "$filename:$linecol" - if get(io, :color, false) - # Also add hyperlinks in color terminals - url = "file://$(abspath(filename))#$linecol" - locstr = "\e]8;;$url\e\\$locstr\e]8;;\e\\" + if !startswith(filename, "REPL[") + file_href = _file_url(filename)*"#$linecol" end else locstr = "line $linecol" end - print(io, prefix, ": ") - printstyled(io, diagnostic.message, color=color) - printstyled(io, "\n", "@ $locstr", color=:light_black) + _printstyled(io, "# $prefix @ ", fgcolor=:light_black) + _printstyled(io, "$locstr", fgcolor=:light_black, href=file_href) print(io, "\n") - - p = first_byte(diagnostic) - q = last_byte(diagnostic) - text = sourcetext(source) - if q < p || (p == q && source[p] == '\n') - # An empty or invisible range! We expand it symmetrically to make it - # visible. - p = max(firstindex(text), prevind(text, p)) - q = min(lastindex(text), nextind(text, q)) - end - - # p and q mark the start and end of the diagnostic range. For context, - # buffer these out to the surrouding lines. - a,b = source_line_range(source, p, context_lines_before=2, context_lines_after=1) - c,d = source_line_range(source, q, context_lines_before=1, context_lines_after=2) - - hicol = (100,40,40) - - # TODO: show line numbers on left - - print(io, source[a:prevind(text, p)]) - # There's two situations, either - if b >= c - # The diagnostic range is compact and we show the whole thing - # a............... - # .....p...q...... - # ...............b - _printstyled(io, source[p:q]; bgcolor=hicol) - else - # Or large and we trucate the code to show only the region around the - # start and end of the error. - # a............... - # .....p.......... - # ...............b - # (snip) - # c............... - # .....q.......... - # ...............d - _printstyled(io, source[p:b]; bgcolor=hicol) - println(io, "…") - _printstyled(io, source[c:q]; bgcolor=hicol) - end - print(io, source[nextind(text,q):d]) - println(io) + highlight(io, source, range(diagnostic), + note=diagnostic.message, notecolor=color, + context_lines_before=1, context_lines_after=0) end function show_diagnostics(io::IO, diagnostics::AbstractVector{Diagnostic}, source::SourceFile) + first = true for d in diagnostics + first || println(io) + first = false show_diagnostic(io, d, source) end end function show_diagnostics(io::IO, diagnostics::AbstractVector{Diagnostic}, text::AbstractString) - if !isempty(diagnostics) - show_diagnostics(io, diagnostics, SourceFile(text)) - end + show_diagnostics(io, diagnostics, SourceFile(text)) end function emit_diagnostic(diagnostics::AbstractVector{Diagnostic}, diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index 2189da2314b75..f5a8bcd7a7c4b 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -295,8 +295,8 @@ function Base.show(io::IO, mime::MIME"text/plain", stream::ParseStream) println(io, "ParseStream at position $(_next_byte(stream))") end -function show_diagnostics(io::IO, stream::ParseStream, code) - show_diagnostics(io, stream.diagnostics, code) +function show_diagnostics(io::IO, stream::ParseStream) + show_diagnostics(io, stream.diagnostics, sourcetext(stream)) end # We manage a pool of stream positions as parser working space @@ -841,7 +841,7 @@ end function emit_diagnostic(stream::ParseStream, mark::ParseStreamPosition; kws...) emit_diagnostic(stream, token_first_byte(stream, mark.token_index), - _next_byte(stream) - 1; kws...) + _next_byte(stream) - 1; kws...) end function emit_diagnostic(stream::ParseStream, mark::ParseStreamPosition, @@ -923,6 +923,7 @@ function validate_tokens(stream::ParseStream) t.orig_kind, t.next_byte) end end + sort!(stream.diagnostics, by=first_byte) end # Tree construction from the list of text ranges held by ParseStream diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 6318054a5496f..9347ee73f42f6 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -2230,6 +2230,7 @@ function parse_try(ps) out_kind = K"try" mark = position(ps) bump(ps, TRIVIA_FLAG) + diagnostic_mark = position(ps) parse_block(ps) has_catch = false has_else = false @@ -2282,12 +2283,16 @@ function parse_try(ps) emit_diagnostic(ps, m, position(ps), warning="`catch` after `finally` will execute out of order") end - if !has_catch && !has_finally + missing_recovery = !has_catch && !has_finally + if missing_recovery # try x end ==> (try (block x) false false false false (error-t)) - bump_invisible(ps, K"error", TRIVIA_FLAG, error="try without catch or finally") + bump_invisible(ps, K"error", TRIVIA_FLAG) end bump_closing_token(ps, K"end") emit(ps, mark, out_kind, flags) + if missing_recovery + emit_diagnostic(ps, diagnostic_mark, error="try without catch or finally") + end end function parse_catch(ps::ParseState) diff --git a/JuliaSyntax/src/source_files.jl b/JuliaSyntax/src/source_files.jl index 66e960bbeac30..d2903a5b62b0c 100644 --- a/JuliaSyntax/src/source_files.jl +++ b/JuliaSyntax/src/source_files.jl @@ -88,7 +88,7 @@ function Base.show(io::IO, ::MIME"text/plain", source::SourceFile) end end -function Base.getindex(source::SourceFile, rng::AbstractRange) +function Base.getindex(source::SourceFile, rng::AbstractUnitRange) i = first(rng) # Convert byte range into unicode String character range. # Assumes valid unicode! (SubString doesn't give us a reliable way to opt @@ -99,7 +99,7 @@ function Base.getindex(source::SourceFile, rng::AbstractRange) end # TODO: Change view() here to `sourcetext` ? -function Base.view(source::SourceFile, rng::AbstractRange) +function Base.view(source::SourceFile, rng::AbstractUnitRange) i = first(rng) j = prevind(source.code, last(rng)+1) SubString(source.code, i, j) @@ -120,3 +120,125 @@ Get the full source text of a `SourceFile` as a string. function sourcetext(source::SourceFile) return source.code end + + +#------------------------------------------------------------------------------- +# Tools for highlighting source ranges +function _print_marker_line(io, prefix_str, str, underline, singleline, color, + note, notecolor) + # Whitespace equivalent in length to `prefix_str` + # Getting exactly the same width of whitespace as `str` is tricky. + # Especially for mixtures of tabs and spaces. + # tabs are zero width according to textwidth + indent = join(isspace(c) ? c : repeat(' ', textwidth(c)) for c in prefix_str) + + # Assume tabs are 4 wide rather than 0. (fixme: implement tab alignment?) + w = textwidth(str) + 4*count(c->c=='\t', str) + if !isempty(indent) + indent = "#" * (first(indent) == '\t' ? indent : indent[nextind(indent,1):end]) + end + + midchar = '─' + startstr, endstr, singlestart = underline ? ("└","┘","╙") : ("┌","┐","╓") + + markline = + if singleline + w == 0 ? string(indent, startstr) : + w == 1 ? string(indent, singlestart) : + string(indent, startstr, repeat('─', w-2), endstr) + else + if underline && isempty(indent) && w > 1 + string('#', repeat('─', w-2), endstr) + else + s,e = underline ? ("", endstr) : (startstr, "") + w == 0 ? string(indent, s, e) : + string(indent, s, repeat('─', w-1), e) + end + end + if note isa AbstractString + markline *= " ── " + end + _printstyled(io, markline; fgcolor=color) + if !isnothing(note) + if note isa AbstractString + _printstyled(io, note, fgcolor=notecolor) + else + note(io, indent, w) + end + end +end + +""" +Print the lines of source code surrounding the given byte `range`, which is +highlighted with background `color` and markers in the text. +""" +function highlight(io::IO, source::SourceFile, range::UnitRange; + color=(120,70,70), context_lines_before=2, + context_lines_inner=1, context_lines_after=2, + note=nothing, notecolor=nothing) + p = first(range) + q = last(range) + + x,y = source_line_range(source, p; + context_lines_before=context_lines_before, + context_lines_after=context_lines_inner) + a,b = source_line_range(source, p) + c,d = source_line_range(source, q) + z,w = source_line_range(source, q; + context_lines_before=context_lines_inner, + context_lines_after=context_lines_after) + + p_line = source_line(source, p) + q_line = source_line(source, q) + + marker_line_color = :light_black + + if p_line == q_line + # x----------------- + # a---p-------q----b + # # └───────┘ ── note + # -----------------w + + hitext = source[p:q] + print(io, source[x:p-1]) + _printstyled(io, hitext; bgcolor=color) + print(io, source[q+1:d]) + source[d] == '\n' || print(io, "\n") + _print_marker_line(io, source[a:p-1], hitext, true, true, marker_line_color, note, notecolor) + else + # x -------------- + # # ┌───── + # a---p----b + # --------------y + # --------------- + # z-------------- + # c --------q----d + # #───────────┘ ── note + # -----------------w + + prefix1 = source[a:p-1] + print(io, source[x:a-1]) + _print_marker_line(io, prefix1, source[p:b], false, false, marker_line_color, nothing, notecolor) + print(io, '\n') + print(io, prefix1) + if q_line - p_line - 1 <= 2*context_lines_inner + # The diagnostic range is compact and we show the whole thing + _printstyled(io, source[p:q]; bgcolor=color) + else + # Or large and we trucate the code to show only the region around the + # start and end of the error. + _printstyled(io, source[p:y]; bgcolor=color) + print(io, "⋮\n") + _printstyled(io, source[z:q]; bgcolor=color) + end + print(io, source[q+1:d]) + source[d] == '\n' || print(io, "\n") + qline = source[c:q] + _print_marker_line(io, "", qline, true, false, marker_line_color, note, notecolor) + end + if context_lines_after > 0 && d+1 < lastindex(source) + print(io, '\n') + w1 = source[w] == '\n' ? w - 1 : w + print(io, source[d+1:w1]) + end +end diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index 66cf8757668f5..7563d69dafcba 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -165,8 +165,11 @@ last_byte(node::AbstractSyntaxNode) = node.position + span(node) - 1 Get the full source text of a node. """ function sourcetext(node::AbstractSyntaxNode) - val_range = (node.position-1) .+ (1:span(node)) - view(node.source, val_range) + view(node.source, range(node)) +end + +function Base.range(node::AbstractSyntaxNode) + (node.position-1) .+ (1:span(node)) end source_line(node::AbstractSyntaxNode) = source_line(node.source, node.position) @@ -328,13 +331,12 @@ function child_position_span(node::SyntaxNode, path::Int...) n, n.position, span(n) end -""" -Print the code, highlighting the part covered by `node` at tree `path`. -""" -function highlight(io::IO, code::String, node, path::Int...; color=(40,40,70)) - node, p, span = child_position_span(node, path...) - q = p + span - print(io, code[1:p-1]) - _printstyled(io, code[p:q-1]; bgcolor=color) - print(io, code[q:end]) +function highlight(io::IO, node::SyntaxNode; kws...) + highlight(io, node.source, range(node); kws...) +end + +function highlight(io::IO, source::SourceFile, node::GreenNode, path::Int...; kws...) + _, p, span = child_position_span(node, path...) + q = p + span - 1 + highlight(io, source, p:q; kws...) end diff --git a/JuliaSyntax/src/utils.jl b/JuliaSyntax/src/utils.jl index 153d845a419c4..f30f06f99498e 100644 --- a/JuliaSyntax/src/utils.jl +++ b/JuliaSyntax/src/utils.jl @@ -52,28 +52,76 @@ end #------------------------------------------------------------------------------- # Text printing/display utils +const _fg_color_codes = Dict( + :black => 30, + :red => 31, + :green => 32, + :yellow => 33, + :blue => 34, + :magenta => 35, + :cyan => 36, + :white => 37, + :light_black => 90, # gray + :light_red => 91, + :light_green => 92, + :light_yellow => 93, + :light_blue => 94, + :light_magenta => 95, + :light_cyan => 96, + :light_white => 97, +) + """ - Like printstyled, but allows providing RGB colors for true color terminals + _printstyled(io::IO, text; + fgcolor=nothing, bgcolor=nothing, href=nothing) + +Like Base.printstyled, but allows providing RGB colors for true color +terminals, both foreground and background colors, and hyperlinks. Colors may be +given as one of the standard color names as in `Base.printstyled`, an integer +for 256 color terms, or an (r,g,b) triple with `0 <= r <= 255` etc for true +color terminals. + +* `fgcolor` - set foreground color +* `bgcolor` - set background color +* `href` - set hyperlink reference """ -function _printstyled(io::IO, text; fgcolor=nothing, bgcolor=nothing) +function _printstyled(io::IO, text; fgcolor=nothing, bgcolor=nothing, href=nothing) + if (isnothing(fgcolor) && isnothing(bgcolor) && isnothing(href)) || !get(io, :color, false) + print(io, text) + return + end colcode = "" if !isnothing(fgcolor) - if length(fgcolor) != 3 || !all(0 .<= fgcolor .< 256) + if fgcolor isa Symbol && haskey(_fg_color_codes, fgcolor) + colcode *= "\e[$(_fg_color_codes[fgcolor])m" + elseif fgcolor isa Integer && 0 <= fgcolor <= 255 + colcode *= "\e[38;5;$(fgcolor)m" + elseif fgcolor isa Tuple && length(fgcolor) == 3 && all(0 .<= fgcolor .<= 255) + colcode *= "\e[38;2;$(fgcolor[1]);$(fgcolor[2]);$(fgcolor[3])m" + else error("Invalid ansi color $fgcolor") end - colcode *= "\e[38;2;$(fgcolor[1]);$(fgcolor[2]);$(fgcolor[3])m" end if !isnothing(bgcolor) - if length(bgcolor) != 3 || !all(0 .<= bgcolor .< 256) + if bgcolor isa Symbol && haskey(_fg_color_codes, bgcolor) + colcode *= "\e[$(10 + _fg_color_codes[bgcolor])m" + elseif bgcolor isa Integer && 0 <= bgcolor <= 255 + colcode *= "\e[48;5;$(bgcolor)m" + elseif bgcolor isa Tuple && length(bgcolor) == 3 && all(0 .<= bgcolor .<= 255) + colcode *= "\e[48;2;$(bgcolor[1]);$(bgcolor[2]);$(bgcolor[3])m" + else error("Invalid ansi color $bgcolor") end - colcode *= "\e[48;2;$(bgcolor[1]);$(bgcolor[2]);$(bgcolor[3])m" end colreset = "\e[0;0m" first = true for linepart in split(text, '\n') first || print(io, '\n') - print(io, colcode, linepart, colreset) + line = string(colcode, linepart, colreset) + if !isnothing(href) + line = "\e]8;;$href\e\\$line\e]8;;\e\\" + end + print(io, line) first = false end end diff --git a/JuliaSyntax/test/diagnostics.jl b/JuliaSyntax/test/diagnostics.jl index 3324b2b078ce2..1df7934547361 100644 --- a/JuliaSyntax/test/diagnostics.jl +++ b/JuliaSyntax/test/diagnostics.jl @@ -56,3 +56,15 @@ end Diagnostic(12, 13, :error, "invalid escape sequence") ] end + +@testset "diagnostic printing" begin + stream = JuliaSyntax.ParseStream("a -- b -- c") + JuliaSyntax.parse!(stream) + @test sprint(JuliaSyntax.show_diagnostics, stream) == """ + # Error @ line 1:3 + a -- b -- c + # └┘ ── invalid operator + # Error @ line 1:8 + a -- b -- c + # └┘ ── invalid operator""" +end diff --git a/JuliaSyntax/test/parser_api.jl b/JuliaSyntax/test/parser_api.jl index 3e25f3225a041..1c4259a6cc148 100644 --- a/JuliaSyntax/test/parser_api.jl +++ b/JuliaSyntax/test/parser_api.jl @@ -98,3 +98,29 @@ @test parseshow("1f1000", ignore_errors=true) == "(ErrorNumericOverflow)" end end + +@testset "ParseError printing" begin + try + JuliaSyntax.parse(JuliaSyntax.SyntaxNode, "a -- b -- c", filename="somefile.jl") + @assert false "error should be thrown" + catch exc + @test exc isa JuliaSyntax.ParseError + @test sprint(showerror, exc) == """ + ParseError: + # Error @ somefile.jl:1:3 + a -- b -- c + # └┘ ── invalid operator + # Error @ somefile.jl:1:8 + a -- b -- c + # └┘ ── invalid operator""" + file_url = JuliaSyntax._file_url("somefile.jl") + @test sprint(showerror, exc, context=:color=>true) == """ + ParseError: + \e[90m# Error @ \e[0;0m\e]8;;$file_url#1:3\e\\\e[90msomefile.jl:1:3\e[0;0m\e]8;;\e\\ + a \e[48;2;120;70;70m--\e[0;0m b -- c + \e[90m# └┘ ── \e[0;0m\e[91minvalid operator\e[0;0m + \e[90m# Error @ \e[0;0m\e]8;;$file_url#1:8\e\\\e[90msomefile.jl:1:8\e[0;0m\e]8;;\e\\ + a -- b \e[48;2;120;70;70m--\e[0;0m c + \e[90m# └┘ ── \e[0;0m\e[91minvalid operator\e[0;0m""" + end +end diff --git a/JuliaSyntax/test/runtests.jl b/JuliaSyntax/test/runtests.jl index 338470917c3f1..9bb664f1e025f 100644 --- a/JuliaSyntax/test/runtests.jl +++ b/JuliaSyntax/test/runtests.jl @@ -42,6 +42,8 @@ include("test_utils.jl") Expr(:tuple, Expr(:parameters, :b), :a)) end +include("utils.jl") + @testset "Tokenize" begin include("tokenize.jl") end diff --git a/JuliaSyntax/test/source_files.jl b/JuliaSyntax/test/source_files.jl index 54bcccde48ae3..72455dfcf6acf 100644 --- a/JuliaSyntax/test/source_files.jl +++ b/JuliaSyntax/test/source_files.jl @@ -49,3 +49,115 @@ end end""" @test sourcetext(srcf) == "module Foo\nend" end + + +@testset "highlight()" begin + src = JuliaSyntax.SourceFile(""" + abcd + αβγδ + +-*/""") + + @test sprint(highlight, src, 1:4) == "abcd\n└──┘\nαβγδ\n+-*/" + @test sprint(highlight, src, 2:4) == "abcd\n#└─┘\nαβγδ\n+-*/" + @test sprint(highlight, src, 3:4) == "abcd\n# └┘\nαβγδ\n+-*/" + @test sprint(highlight, src, 4:4) == "abcd\n# ╙\nαβγδ\n+-*/" + @test sprint(highlight, src, 4:3) == "abcd\n# └\nαβγδ\n+-*/" + @test sprint(highlight, src, 5:5) == "abcd\n# └\nαβγδ\n+-*/" + + # multi-byte chars + @test sprint(highlight, src, 8:13) == """ + abcd + αβγδ + #└─┘ + +-*/""" + + # Multi-line ranges + @test sprint(highlight, src, 1:7) == """ + ┌─── + abcd + αβγδ + ┘ + +-*/""" + @test sprint(highlight, src, 2:7) == """ + #┌── + abcd + αβγδ + ┘ + +-*/""" + @test sprint(highlight, src, 2:9) == """ + #┌── + abcd + αβγδ + #┘ + +-*/""" + @test sprint(highlight, src, 4:9) == """ + # ┌ + abcd + αβγδ + #┘ + +-*/""" + @test sprint(highlight, src, 5:9) == """ + # ┌ + abcd + αβγδ + #┘ + +-*/""" + @test sprint(highlight, src, 1:18) == """ + ┌─── + abcd + αβγδ + +-*/ + #──┘""" + + # context lines + @test sprint(io->highlight(io, src, 8:13; + context_lines_before=0, + context_lines_after=0)) == """ + αβγδ + #└─┘""" + @test sprint(io->highlight(io, src, 8:13; context_lines_after=0)) == """ + abcd + αβγδ + #└─┘""" + @test sprint(io->highlight(io, src, 8:13; context_lines_before=0)) == """ + αβγδ + #└─┘ + +-*/""" + @test sprint(io->highlight(io, src, 1:18; context_lines_inner=0)) == """ + ┌─── + abcd + ⋮ + +-*/ + #──┘""" + + # annotations + @test sprint(io->highlight(io, src, 8:13; note="hello")) == """ + abcd + αβγδ + #└─┘ ── hello + +-*/""" + @test sprint(io->highlight(io, src, 1:13; note="hello")) == """ + ┌─── + abcd + αβγδ + #──┘ ── hello + +-*/""" + @test sprint(io->highlight(io, src, 8:13; + note=(io,indent,w)->print(io, "\n$indent$('!'^w) hello"))) == """ + abcd + αβγδ + #└─┘ + #!!! hello + +-*/""" + + # colored output + @test sprint(io->highlight(io, src, 8:13; context_lines_after=0, note="hello", notecolor=:light_red), + context=:color=>true) == + "abcd\nα\e[48;2;120;70;70mβγδ\e[0;0m\n\e[90m#└─┘ ── \e[0;0m\e[91mhello\e[0;0m" + @test sprint(io->highlight(io, src, 1:13; context_lines_after=0, note="hello", notecolor=(255,0,0)), + context=:color=>true) == + "\e[90m┌───\e[0;0m\n\e[48;2;120;70;70mabcd\e[0;0m\n\e[48;2;120;70;70mαβγδ\e[0;0m\n\e[90m#──┘ ── \e[0;0m\e[38;2;255;0;0mhello\e[0;0m" + @test sprint(io->highlight(io, src, 1:18, context_lines_inner=0), + context=:color=>true) == + "\e[90m┌───\e[0;0m\n\e[48;2;120;70;70mabcd\e[0;0m\n\e[48;2;120;70;70m\e[0;0m⋮\n\e[48;2;120;70;70m+-*/\e[0;0m\n\e[90m#──┘\e[0;0m" +end diff --git a/JuliaSyntax/test/syntax_tree.jl b/JuliaSyntax/test/syntax_tree.jl index 30a77cdc49e02..c6dd658508c2f 100644 --- a/JuliaSyntax/test/syntax_tree.jl +++ b/JuliaSyntax/test/syntax_tree.jl @@ -26,8 +26,8 @@ # These tests are deliberately quite relaxed to avoid being too specific about display style @test occursin("line:col", str) @test occursin("call-i", str) - @test sprint(JuliaSyntax.highlight, tt, t, 1, 3) == "a*\e[48;2;40;40;70mb\e[0;0m + c" - @test sprint(JuliaSyntax.highlight, tt, t.raw, 5) == "a*b + \e[48;2;40;40;70mc\e[0;0m" + @test sprint(highlight, child(t, 1, 3)) == "a*b + c\n# ╙" + @test sprint(highlight, t.source, t.raw, 1, 3) == "a*b + c\n# ╙" # Pass-through field access node = child(t, 1, 1) diff --git a/JuliaSyntax/test/test_utils.jl b/JuliaSyntax/test/test_utils.jl index ada480404ccca..2059f96af7036 100644 --- a/JuliaSyntax/test/test_utils.jl +++ b/JuliaSyntax/test/test_utils.jl @@ -26,7 +26,8 @@ using .JuliaSyntax: children, child, fl_parseall, - fl_parse + fl_parse, + highlight if VERSION < v"1.6" # Compat stuff which might not be in Base for older versions diff --git a/JuliaSyntax/test/utils.jl b/JuliaSyntax/test/utils.jl new file mode 100644 index 0000000000000..227077f6b2196 --- /dev/null +++ b/JuliaSyntax/test/utils.jl @@ -0,0 +1,17 @@ +@testset "_printstyled" begin + ps(str; kws...) = sprint(io->JuliaSyntax._printstyled(IOContext(io, :color=>true), str; kws...)) + + @test ps("XX"; fgcolor=:red) == "\e[31mXX\e[0;0m" + @test ps("XX"; fgcolor=42) == "\e[38;5;42mXX\e[0;0m" + @test ps("XX"; fgcolor=(10,100,200)) == "\e[38;2;10;100;200mXX\e[0;0m" + + ps("XX"; bgcolor=:red) == "\e[41mXX\e[0;0m" + @test ps("XX"; bgcolor=42) == "\e[48;5;42mXX\e[0;0m" + @test ps("XX"; bgcolor=(10,100,200)) == "\e[48;2;10;100;200mXX\e[0;0m" + + @test ps("XX"; href="https://www.example.com") == + "\e]8;;https://www.example.com\e\\XX\e[0;0m\e]8;;\e\\" + + @test ps("XX", fgcolor=:red, bgcolor=:green, href="https://www.example.com") == + "\e]8;;https://www.example.com\e\\\e[31m\e[42mXX\e[0;0m\e]8;;\e\\" +end From 99dee462b92bc8fc517f70e9bfd6524e731ed46d Mon Sep 17 00:00:00 2001 From: c42f Date: Sun, 12 Mar 2023 02:44:52 +1000 Subject: [PATCH 0597/1109] Improve syntax diffing in General registry checker (JuliaLang/JuliaSyntax.jl#216) --- JuliaSyntax/test/test_utils.jl | 20 +++--- JuliaSyntax/tools/check_all_packages.jl | 95 ++++++++++++------------- 2 files changed, 54 insertions(+), 61 deletions(-) diff --git a/JuliaSyntax/test/test_utils.jl b/JuliaSyntax/test/test_utils.jl index 2059f96af7036..c0b1e7ab1265a 100644 --- a/JuliaSyntax/test/test_utils.jl +++ b/JuliaSyntax/test/test_utils.jl @@ -245,14 +245,7 @@ function equals_flisp_parse(exprs_equal, tree) exprs_equal(fl_ex, ex) end -""" - reduce_test(text::AbstractString; exprs_equal=exprs_equal_no_linenum) - reduce_test(tree::SyntaxNode; exprs_equal=exprs_equal_no_linenum) - -Select minimal subtrees of `text` or `tree` which are inconsistent between -flisp and JuliaSyntax parsers. -""" -function reduce_test(failing_subtrees, tree; exprs_equal=exprs_equal_no_linenum) +function _reduce_test(failing_subtrees, tree; exprs_equal=exprs_equal_no_linenum) if equals_flisp_parse(exprs_equal, tree) return false end @@ -266,7 +259,7 @@ function reduce_test(failing_subtrees, tree; exprs_equal=exprs_equal_no_linenum) if is_trivia(child) || !haschildren(child) continue end - had_failing_subtrees |= reduce_test(failing_subtrees, child; exprs_equal=exprs_equal) + had_failing_subtrees |= _reduce_test(failing_subtrees, child; exprs_equal=exprs_equal) end end if !had_failing_subtrees @@ -275,9 +268,16 @@ function reduce_test(failing_subtrees, tree; exprs_equal=exprs_equal_no_linenum) return true end +""" + reduce_test(text::AbstractString; exprs_equal=exprs_equal_no_linenum) + reduce_test(tree::SyntaxNode; exprs_equal=exprs_equal_no_linenum) + +Select minimal subtrees of `text` or `tree` which are inconsistent between +flisp and JuliaSyntax parsers. +""" function reduce_test(tree::SyntaxNode; kws...) subtrees = Vector{typeof(tree)}() - reduce_test(subtrees, tree; kws...) + _reduce_test(subtrees, tree; kws...) subtrees end diff --git a/JuliaSyntax/tools/check_all_packages.jl b/JuliaSyntax/tools/check_all_packages.jl index 3ee160c089b11..5e9f47b50bb6d 100644 --- a/JuliaSyntax/tools/check_all_packages.jl +++ b/JuliaSyntax/tools/check_all_packages.jl @@ -3,76 +3,69 @@ # # Run this after registry_download.jl (so the pkgs directory is populated). -using JuliaSyntax, Logging, Serialization +using JuliaSyntax, Logging, TerminalLoggers, ProgressLogging, Serialization include("../test/test_utils.jl") -logio = open(joinpath(@__DIR__, "logs.txt"), "w") -logger = Logging.ConsoleLogger(logio) - pkgspath = joinpath(@__DIR__, "pkgs") +source_paths = find_source_in_path(pkgspath) +file_count = length(source_paths) exception_count = 0 mismatch_count = 0 -file_count = 0 t0 = time() exceptions = [] -Logging.with_logger(logger) do - global exception_count, mismatch_count, file_count, t0 - for (r, _, files) in walkdir(pkgspath) - for f in files - endswith(f, ".jl") || continue - fpath = joinpath(r, f) - isfile(fpath) || continue - - code = read(fpath, String) - expr_cache = fpath*".Expr" - #e2 = JuliaSyntax.fl_parseall(code) - e2 = open(deserialize, fpath*".Expr") - @assert Meta.isexpr(e2, :toplevel) - try - e1 = JuliaSyntax.parseall(Expr, code, filename=fpath, ignore_warnings=true) - if !exprs_roughly_equal(e2, e1) - mismatch_count += 1 - @error("Parsers succeed but disagree", - fpath, - diff=Text(sprint(show_expr_text_diff, show, e1, e2)), - ) - end - catch err - err isa InterruptException && rethrow() - ex = (err, catch_backtrace()) - push!(exceptions, ex) - ref_parse = "success" - if length(e2.args) >= 1 && Meta.isexpr(last(e2.args), (:error, :incomplete)) - ref_parse = "fail" - if err isa JuliaSyntax.ParseError - # Both parsers agree that there's an error, and - # JuliaSyntax didn't have an internal error. - continue +Logging.with_logger(TerminalLogger()) do + global exception_count, mismatch_count, t0 + @withprogress for (ifile, fpath) in enumerate(source_paths) + @logprogress ifile/file_count time_ms=round((time() - t0)/ifile*1000, digits = 2) + text = read(fpath, String) + expr_cache = fpath*".Expr" + #e2 = JuliaSyntax.fl_parseall(text) + e2 = open(deserialize, fpath*".Expr") + @assert Meta.isexpr(e2, :toplevel) + try + e1 = JuliaSyntax.parseall(Expr, text, filename=fpath, ignore_warnings=true) + if !exprs_roughly_equal(e2, e1) + mismatch_count += 1 + reduced_chunks = sprint(context=:color=>true) do io + for c in reduce_test(text) + JuliaSyntax.highlight(io, c.source, range(c), context_inner_lines=5) + println(io, "\n") end end - - exception_count += 1 - parse_to_syntax = "success" - try - JuliaSyntax.parseall(JuliaSyntax.SyntaxNode, code) - catch err2 - parse_to_syntax = "fail" + @error("Parsers succeed but disagree", + fpath, + reduced_chunks=Text(reduced_chunks), + # diff=Text(sprint(show_expr_text_diff, show, e1, e2)), + ) + end + catch err + err isa InterruptException && rethrow() + ex = (err, catch_backtrace()) + push!(exceptions, ex) + ref_parse = "success" + if length(e2.args) >= 1 && Meta.isexpr(last(e2.args), (:error, :incomplete)) + ref_parse = "fail" + if err isa JuliaSyntax.ParseError + # Both parsers agree that there's an error, and + # JuliaSyntax didn't have an internal error. + continue end - @error "Parse failed" fpath exception=ex parse_to_syntax end - file_count += 1 - if file_count % 100 == 0 - t_avg = round((time() - t0)/file_count*1000, digits = 2) - print(stderr, "\r$file_count files parsed, $t_avg ms per file") + exception_count += 1 + parse_to_syntax = "success" + try + JuliaSyntax.parseall(JuliaSyntax.SyntaxNode, code) + catch err2 + parse_to_syntax = "fail" end + @error "Parse failed" fpath exception=ex parse_to_syntax end end end -close(logio) t_avg = round((time() - t0)/file_count*1000, digits = 2) From f4cd69d7e72043fc27a1b07f04818c775d7ae7fb Mon Sep 17 00:00:00 2001 From: c42f Date: Wed, 15 Mar 2023 12:46:38 +1000 Subject: [PATCH 0598/1109] Parse docstring attachment as `K"doc"` kind (JuliaLang/JuliaSyntax.jl#217) With this change `"str" f` now parses as (doc (string "str") f) This better represents the surface syntax which isn't an explicit macro call but a juxtaposition at top level. --- JuliaSyntax/src/expr.jl | 5 +++-- JuliaSyntax/src/kinds.jl | 1 - JuliaSyntax/src/parser.jl | 17 +++++++---------- JuliaSyntax/src/syntax_tree.jl | 2 -- JuliaSyntax/test/expr.jl | 7 +++++++ JuliaSyntax/test/parser.jl | 12 ++++++------ 6 files changed, 23 insertions(+), 21 deletions(-) diff --git a/JuliaSyntax/src/expr.jl b/JuliaSyntax/src/expr.jl index 61ec2e75239ef..fcc544f8fe826 100644 --- a/JuliaSyntax/src/expr.jl +++ b/JuliaSyntax/src/expr.jl @@ -44,8 +44,6 @@ function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, val isa UInt128 ? Symbol("@uint128_str") : Symbol("@big_str") return Expr(:macrocall, GlobalRef(Core, macname), nothing, str) - elseif kind(node) == K"core_@doc" - return GlobalRef(Core, Symbol("@doc")) elseif kind(node) == K"core_@cmd" return GlobalRef(Core, Symbol("@cmd")) elseif kind(node) == K"MacroName" && val === Symbol("@.") @@ -156,6 +154,9 @@ function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, if headsym === :macrocall reorder_parameters!(args, 2) insert!(args, 2, loc) + elseif headsym === :doc + return Expr(:macrocall, GlobalRef(Core, Symbol("@doc")), + loc, args...) elseif headsym in (:dotcall, :call) # Julia's standard `Expr` ASTs have children stored in a canonical # order which is often not always source order. We permute the children diff --git a/JuliaSyntax/src/kinds.jl b/JuliaSyntax/src/kinds.jl index 3b5fa1bd10c63..73e63c7a21630 100644 --- a/JuliaSyntax/src/kinds.jl +++ b/JuliaSyntax/src/kinds.jl @@ -864,7 +864,6 @@ const _kind_names = "MacroName" "StringMacroName" "CmdMacroName" - "core_@doc" "core_@cmd" "core_@int128_str" "core_@uint128_str" diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 9347ee73f42f6..844cd398cecd9 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -477,7 +477,7 @@ end # # a;b;c ==> (toplevel a b c) # a;;;b;; ==> (toplevel a b) -# "x" a ; "y" b ==> (toplevel (macrocall core_@doc "x" a) (macrocall core_@doc "y" b)) +# "x" a ; "y" b ==> (toplevel (doc (string "x") a) (doc (string "y") b)) # # flisp: parse-stmts function parse_stmts(ps::ParseState) @@ -500,12 +500,10 @@ function parse_stmts(ps::ParseState) end # Parse docstrings attached by a space or single newline -# "doc" foo ==> (macrocall core_@doc "doc" foo) # # flisp: parse-docstring function parse_docstring(ps::ParseState, down=parse_eq) mark = position(ps) - atdoc_mark = bump_invisible(ps, K"TOMBSTONE") down(ps) if peek_behind(ps).kind == K"string" is_doc = true @@ -521,19 +519,18 @@ function parse_docstring(ps::ParseState, down=parse_eq) is_doc = false else # Allow a single newline - # "doc" \n foo ==> (macrocall core_@doc (string "doc") foo) + # "doc" \n foo ==> (doc (string "doc") foo) bump(ps, TRIVIA_FLAG) # NewlineWs end else - # "doc" foo ==> (macrocall core_@doc (string "doc") foo) - # "doc $x" foo ==> (macrocall core_@doc (string "doc " x) foo) + # "doc" foo ==> (doc (string "doc") foo) + # "doc $x" foo ==> (doc (string "doc " x) foo) # Allow docstrings with embedded trailing whitespace trivia - # """\n doc\n """ foo ==> (macrocall core_@doc (string-s "doc\n") foo) + # """\n doc\n """ foo ==> (doc (string-s "doc\n") foo) end if is_doc - reset_node!(ps, atdoc_mark, kind=K"core_@doc") down(ps) - emit(ps, mark, K"macrocall") + emit(ps, mark, K"doc") end end end @@ -2003,7 +2000,7 @@ function parse_resword(ps::ParseState) parse_unary_prefix(ps) end # module A \n a \n b \n end ==> (module true A (block a b)) - # module A \n "x"\na \n end ==> (module true A (block (core_@doc (string "x") a))) + # module A \n "x"\na \n end ==> (module true A (block (doc (string "x") a))) parse_block(ps, parse_docstring) bump_closing_token(ps, K"end") emit(ps, mark, K"module") diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index 7563d69dafcba..7695129f72dbc 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -117,8 +117,6 @@ function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, position::In Symbol("@$(normalize_identifier(val_str))_str") elseif k == K"CmdMacroName" Symbol("@$(normalize_identifier(val_str))_cmd") - elseif k == K"core_@doc" - Symbol("core_@doc") elseif k == K"core_@cmd" Symbol("core_@cmd") elseif is_syntax_kind(raw) diff --git a/JuliaSyntax/test/expr.jl b/JuliaSyntax/test/expr.jl index 37f22246501b0..84f0f6e92707d 100644 --- a/JuliaSyntax/test/expr.jl +++ b/JuliaSyntax/test/expr.jl @@ -295,4 +295,11 @@ Expr(:block, LineNumberNode(1), :w), Expr(:block, LineNumberNode(1), :z)) end + + @testset "Core.@doc" begin + @test parse(Expr, "\"x\" f") == + Expr(:macrocall, GlobalRef(Core, Symbol("@doc")), LineNumberNode(1), "x", :f) + @test parse(Expr, "\n\"x\" f") == + Expr(:macrocall, GlobalRef(Core, Symbol("@doc")), LineNumberNode(2), "x", :f) + end end diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index f95f176e9ed03..56d0bd8730e9b 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -59,7 +59,7 @@ tests = [ "a;b;c" => "(toplevel a b c)" "a;;;b;;" => "(toplevel a b)" """ "x" a ; "y" b """ => - """(toplevel (macrocall core_@doc (string "x") a) (macrocall core_@doc (string "y") b))""" + """(toplevel (doc (string "x") a) (doc (string "y") b))""" "x y" => "x (error-t y)" ], JuliaSyntax.parse_eq => [ @@ -487,7 +487,7 @@ tests = [ "module do \n end" => "(module true (error (do)) (block))" "module \$A end" => "(module true (\$ A) (block))" "module A \n a \n b \n end" => "(module true A (block a b))" - """module A \n "x"\na\n end""" => """(module true A (block (macrocall core_@doc (string "x") a)))""" + """module A \n "x"\na\n end""" => """(module true A (block (doc (string "x") a)))""" # export "export a" => "(export a)" => Expr(:export, :a) "export @a" => "(export @a)" => Expr(:export, Symbol("@a")) @@ -912,11 +912,11 @@ tests = [ """ "notdoc" ] """ => "(string \"notdoc\")" """ "notdoc" \n] """ => "(string \"notdoc\")" """ "notdoc" \n\n foo """ => "(string \"notdoc\")" - """ "doc" \n foo """ => """(macrocall core_@doc (string "doc") foo)""" - """ "doc" foo """ => """(macrocall core_@doc (string "doc") foo)""" - """ "doc \$x" foo """ => """(macrocall core_@doc (string "doc " x) foo)""" + """ "doc" \n foo """ => """(doc (string "doc") foo)""" + """ "doc" foo """ => """(doc (string "doc") foo)""" + """ "doc \$x" foo """ => """(doc (string "doc " x) foo)""" # Allow docstrings with embedded trailing whitespace trivia - "\"\"\"\n doc\n \"\"\" foo" => """(macrocall core_@doc (string-s "doc\\n") foo)""" + "\"\"\"\n doc\n \"\"\" foo" => """(doc (string-s "doc\\n") foo)""" ], ] From 85e780bcc39dceba9fa91a99c2a77c19bbf6f848 Mon Sep 17 00:00:00 2001 From: c42f Date: Wed, 15 Mar 2023 12:46:56 +1000 Subject: [PATCH 0599/1109] Add PARENS_FLAG to tuple, block and macrocall (JuliaLang/JuliaSyntax.jl#218) Several syntactic constructs can occur either with or without parentheses and it can be useful to distinguish between these without looking at the syntax trivia - particularly for code formatting, but also for other reasons. * Macro calls: `@x(a,b)` vs `@x a b` * Blocks: `(a; b)` vs `begin a ; b end` * Tuples: `a,b` vs `(a,b)` (for example, see JuliaLang/JuliaSyntax.jl#194) Also modify the printing of head flags so that each flag is clearly distinguished from every other flag with a `-`. --- JuliaSyntax/src/expr.jl | 3 ++ JuliaSyntax/src/parse_stream.jl | 82 +++++++++++++++--------------- JuliaSyntax/src/parser.jl | 90 +++++++++++++++++---------------- JuliaSyntax/test/parser.jl | 63 +++++++++++------------ 4 files changed, 122 insertions(+), 116 deletions(-) diff --git a/JuliaSyntax/src/expr.jl b/JuliaSyntax/src/expr.jl index fcc544f8fe826..bcec2b7ebafc9 100644 --- a/JuliaSyntax/src/expr.jl +++ b/JuliaSyntax/src/expr.jl @@ -146,6 +146,9 @@ function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, _to_expr(n, eq_to_kw=eq_to_kw, map_kw_in_params=in_vcbr) end + if nodekind == K"block" && has_flags(node, PARENS_FLAG) + popfirst!(args) + end end end diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index f5a8bcd7a7c4b..d81b7810a7348 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -25,12 +25,7 @@ const TRIPLE_STRING_FLAG = RawFlags(1<<5) # Set when a string or identifier needs "raw string" unescaping const RAW_STRING_FLAG = RawFlags(1<<6) -# TODO? -# const ERROR_FLAG = RawFlags(1<<7) - -# Token-only flag -# Record whether a token had preceding whitespace -const PRECEDING_WHITESPACE_FLAG = RawFlags(1<<7) +const PARENS_FLAG = RawFlags(1<<7) # Flags holding the dimension of an nrow or other UInt8 not held in the source const NUMERIC_FLAGS = RawFlags(RawFlags(0xff)<<8) @@ -77,21 +72,18 @@ function untokenize(head::SyntaxHead; unique=true, include_flag_suff=true) if is_dotted(head) str = "."*str end - # Ignore some flags: - # DOTOP_FLAG is represented above with . prefix - # PRECEDING_WHITESPACE_FLAG relates to the environment of this token - suffix_flags = remove_flags(flags(head), DOTOP_FLAG, PRECEDING_WHITESPACE_FLAG) - if include_flag_suff && suffix_flags != EMPTY_FLAGS - str = str*"-" - is_trivia(head) && (str = str*"t") - is_infix_op_call(head) && (str = str*"i") - is_prefix_op_call(head) && (str = str*"pre") - is_postfix_op_call(head) && (str = str*"post") - has_flags(head, TRIPLE_STRING_FLAG) && (str = str*"s") - has_flags(head, RAW_STRING_FLAG) && (str = str*"r") - is_suffixed(head) && (str = str*"S") + if include_flag_suff + # Ignore DOTOP_FLAG - it's represented above with . prefix + is_trivia(head) && (str = str*"-t") + is_infix_op_call(head) && (str = str*"-i") + is_prefix_op_call(head) && (str = str*"-pre") + is_postfix_op_call(head) && (str = str*"-post") + has_flags(head, TRIPLE_STRING_FLAG) && (str = str*"-s") + has_flags(head, RAW_STRING_FLAG) && (str = str*"-r") + has_flags(head, PARENS_FLAG) && (str = str*"-p") + is_suffixed(head) && (str = str*"-S") n = numeric_flags(head) - n != 0 && (str = str*string(n)) + n != 0 && (str = str*"-"*string(n)) end str end @@ -116,7 +108,6 @@ is_postfix_op_call(x) = call_type_flags(x) == POSTFIX_OP_FLAG is_dotted(x) = has_flags(x, DOTOP_FLAG) is_suffixed(x) = has_flags(x, SUFFIXED_FLAG) is_decorated(x) = is_dotted(x) || is_suffixed(x) -preceding_whitespace(x) = has_flags(x, PRECEDING_WHITESPACE_FLAG) numeric_flags(x) = numeric_flags(flags(x)) #------------------------------------------------------------------------------- @@ -131,18 +122,17 @@ token to be used for recording the first byte of the first real token. struct SyntaxToken head::SyntaxHead orig_kind::Kind + preceding_whitespace::Bool next_byte::UInt32 end -function SyntaxToken(head::SyntaxHead, next_byte::Integer) - SyntaxToken(head, kind(head), next_byte) -end - function Base.show(io::IO, tok::SyntaxToken) print(io, rpad(untokenize(tok.head, unique=false), 15), " |", tok.next_byte) end head(tok::SyntaxToken) = tok.head +flags(tok::SyntaxToken) = remove_flags(flags(tok.head), NUMERIC_FLAGS) +preceding_whitespace(tok::SyntaxToken) = tok.preceding_whitespace #------------------------------------------------------------------------------- @@ -240,7 +230,7 @@ mutable struct ParseStream ver = (version.major, version.minor) # Initial sentinel token containing the first byte of the first real token. sentinel = SyntaxToken(SyntaxHead(K"TOMBSTONE", EMPTY_FLAGS), - K"TOMBSTONE", next_byte) + K"TOMBSTONE", false, next_byte) new(text_buf, text_root, lexer, @@ -353,10 +343,10 @@ function _buffer_lookahead_tokens(lexer, lookahead) was_whitespace = k in (K"Whitespace", K"Comment", K"NewlineWs") had_whitespace |= was_whitespace f = EMPTY_FLAGS - had_whitespace && (f |= PRECEDING_WHITESPACE_FLAG) raw.dotop && (f |= DOTOP_FLAG) raw.suffix && (f |= SUFFIXED_FLAG) - push!(lookahead, SyntaxToken(SyntaxHead(k, f), raw.endbyte + 2)) + push!(lookahead, SyntaxToken(SyntaxHead(k, f), k, + had_whitespace, raw.endbyte + 2)) token_count += 1 if k == K"EndMarker" break @@ -471,7 +461,7 @@ function peek_token(stream::ParseStream, n::Integer=1; if !skip_whitespace i = stream.lookahead_index end - return @inbounds head(stream.lookahead[i]) + return @inbounds stream.lookahead[i] end @@ -613,12 +603,13 @@ function _bump_until_n(stream::ParseStream, n::Integer, flags, remap_kind=K"None if k == K"EndMarker" break end - f = flags | remove_flags((@__MODULE__).flags(tok), PRECEDING_WHITESPACE_FLAG) + f = flags | (@__MODULE__).flags(tok) is_trivia = k ∈ (K"Whitespace", K"Comment", K"NewlineWs") is_trivia && (f |= TRIVIA_FLAG) outk = (is_trivia || remap_kind == K"None") ? k : remap_kind h = SyntaxHead(outk, f) - push!(stream.tokens, SyntaxToken(h, kind(tok), tok.next_byte)) + push!(stream.tokens, + SyntaxToken(h, kind(tok), tok.preceding_whitespace, tok.next_byte)) end stream.lookahead_index = n + 1 # Defuse the time bomb @@ -675,7 +666,7 @@ function bump_invisible(stream::ParseStream, kind, flags=EMPTY_FLAGS; error=nothing) b = _next_byte(stream) h = SyntaxHead(kind, flags) - push!(stream.tokens, SyntaxToken(h, b)) + push!(stream.tokens, SyntaxToken(h, (@__MODULE__).kind(h), false, b)) if !isnothing(error) emit_diagnostic(stream, b, b-1, error=error) end @@ -693,7 +684,8 @@ whitespace if necessary with bump_trivia. function bump_glue(stream::ParseStream, kind, flags, num_tokens) i = stream.lookahead_index h = SyntaxHead(kind, flags) - push!(stream.tokens, SyntaxToken(h, stream.lookahead[i+1].next_byte)) + push!(stream.tokens, SyntaxToken(h, kind, false, + stream.lookahead[i+1].next_byte)) stream.lookahead_index += num_tokens stream.peek_count = 0 return position(stream) @@ -724,7 +716,7 @@ function bump_split(stream::ParseStream, split_spec...) for (i, (nbyte, k, f)) in enumerate(split_spec) h = SyntaxHead(k, f) b = (i == length(split_spec)) ? tok.next_byte : b + nbyte - push!(stream.tokens, SyntaxToken(h, kind(tok), b)) + push!(stream.tokens, SyntaxToken(h, kind(tok), false, b)) end stream.peek_count = 0 return position(stream) @@ -747,12 +739,14 @@ function reset_node!(stream::ParseStream, pos::ParseStreamPosition; kind=nothing, flags=nothing) if token_is_last(stream, pos) t = stream.tokens[pos.token_index] - stream.tokens[pos.token_index] = SyntaxToken(_reset_node_head(t, kind, flags), - t.orig_kind, t.next_byte) + stream.tokens[pos.token_index] = + SyntaxToken(_reset_node_head(t, kind, flags), + t.orig_kind, t.preceding_whitespace, t.next_byte) else r = stream.ranges[pos.range_index] - stream.ranges[pos.range_index] = TaggedRange(_reset_node_head(r, kind, flags), - r.first_token, r.last_token) + stream.ranges[pos.range_index] = + TaggedRange(_reset_node_head(r, kind, flags), + r.first_token, r.last_token) end end @@ -770,11 +764,13 @@ function steal_token_bytes!(stream::ParseStream, pos::ParseStreamPosition, numby t2 = stream.tokens[i+1] t1_next_byte = t1.next_byte + numbytes - stream.tokens[i] = SyntaxToken(t1.head, t1.orig_kind, t1_next_byte) + stream.tokens[i] = SyntaxToken(t1.head, t1.orig_kind, + t1.preceding_whitespace, t1_next_byte) t2_is_empty = t1_next_byte == t2.next_byte head2 = t2_is_empty ? SyntaxHead(K"TOMBSTONE", EMPTY_FLAGS) : t2.head - stream.tokens[i+1] = SyntaxToken(head2, t2.orig_kind, t2.next_byte) + stream.tokens[i+1] = SyntaxToken(head2, t2.orig_kind, + t2.preceding_whitespace, t2.next_byte) return t2_is_empty end @@ -920,7 +916,8 @@ function validate_tokens(stream::ParseStream) end if error_kind != K"None" toks[i] = SyntaxToken(SyntaxHead(error_kind, EMPTY_FLAGS), - t.orig_kind, t.next_byte) + t.orig_kind, t.preceding_whitespace, + t.next_byte) end end sort!(stream.diagnostics, by=first_byte) @@ -1052,6 +1049,7 @@ function Base.empty!(stream::ParseStream) empty!(stream.tokens) # Restore sentinel token push!(stream.tokens, SyntaxToken(SyntaxHead(K"TOMBSTONE",EMPTY_FLAGS), - K"TOMBSTONE", t.next_byte)) + K"TOMBSTONE", t.preceding_whitespace, + t.next_byte)) empty!(stream.ranges) end diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 844cd398cecd9..51f4b617b370f 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -1281,11 +1281,13 @@ function parse_unary(ps::ParseState) tb1 = ps.stream.tokens[op_pos.token_index-1] ps.stream.tokens[op_pos.token_index-1] = SyntaxToken(SyntaxHead(K"TOMBSTONE", EMPTY_FLAGS), - K"TOMBSTONE", tb1.next_byte-1) + K"TOMBSTONE", tb1.preceding_whitespace, + tb1.next_byte-1) tb0 = ps.stream.tokens[op_pos.token_index] ps.stream.tokens[op_pos.token_index] = SyntaxToken(SyntaxHead(kind(tb0), flags(tb0)), - tb0.orig_kind, tb0.next_byte) + tb0.orig_kind, tb0.preceding_whitespace, + tb0.next_byte) end emit(ps, mark, K"call") end @@ -1295,8 +1297,8 @@ function parse_unary(ps::ParseState) # Unary function calls with brackets as grouping, not an arglist # .+(a) ==> (dotcall-pre (. +) a) if opts.is_block - # +(a;b) ==> (call-pre + (block a b)) - emit(ps, mark_before_paren, K"block") + # +(a;b) ==> (call-pre + (block-p a b)) + emit(ps, mark_before_paren, K"block", PARENS_FLAG) end # Not a prefix operator call but a block; `=` is not `kw` # +(a=1) ==> (call-pre + (= a 1)) @@ -1499,7 +1501,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) # Macro calls with space-separated arguments # @foo a b ==> (macrocall @foo a b) # @foo (x) ==> (macrocall @foo x) - # @foo (x,y) ==> (macrocall @foo (tuple x y)) + # @foo (x,y) ==> (macrocall @foo (tuple-p x y)) # [@foo x] ==> (vect (macrocall @foo x)) # [@foo] ==> (vect (macrocall @foo)) # @var"#" a ==> (macrocall (var @#) a) @@ -1543,14 +1545,16 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) bump_disallowed_space(ps) bump(ps, TRIVIA_FLAG) parse_call_arglist(ps, K")") - emit(ps, mark, is_macrocall ? K"macrocall" : K"call") + emit(ps, mark, is_macrocall ? K"macrocall" : K"call", + is_macrocall ? PARENS_FLAG : EMPTY_FLAGS) if peek(ps) == K"do" - # f(x) do y body end ==> (do (call :f :x) (tuple :y) (block :body)) + # f(x) do y body end ==> (do (call f x) (tuple y) (block body)) parse_do(ps, mark) end if is_macrocall - # A.@x(y) ==> (macrocall (. A (quote @x)) y) - # A.@x(y).z ==> (. (macrocall (. A (quote @x)) y) (quote z)) + # @x(a, b) ==> (macrocall-p @x a b) + # A.@x(y) ==> (macrocall-p (. A (quote @x)) y) + # A.@x(y).z ==> (. (macrocall-p (. A (quote @x)) y) (quote z)) fix_macro_name_kind!(ps, macro_name_position) is_macrocall = false macro_atname_range = nothing @@ -1713,9 +1717,9 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) # x"" ==> (macrocall @x_str (string-r "")) # x`` ==> (macrocall @x_cmd (cmdstring-r "")) # Triple quoted procesing for custom strings - # r"""\nx""" ==> (macrocall @r_str (string-sr "x")) - # r"""\n x\n y""" ==> (macrocall @r_str (string-sr "x\n" "y")) - # r"""\n x\\n y""" ==> (macrocall @r_str (string-sr "x\\\n" "y")) + # r"""\nx""" ==> (macrocall @r_str (string-s-r "x")) + # r"""\n x\n y""" ==> (macrocall @r_str (string-s-r "x\n" "y")) + # r"""\n x\\n y""" ==> (macrocall @r_str (string-s-r "x\\\n" "y")) # # Use a special token kind for string and cmd macro names so the # names can be expanded later as necessary. @@ -2136,17 +2140,17 @@ function parse_function_signature(ps::ParseState, is_function::Bool) is_anon_func = opts.is_anon_func parsed_call = opts.parsed_call if is_anon_func - # function (x) body end ==> (function (tuple x) (block body)) - # function (x::f()) end ==> (function (tuple (::-i x (call f))) (block)) - # function (x,y) end ==> (function (tuple x y) (block)) - # function (x=1) end ==> (function (tuple (= x 1)) (block)) - # function (;x=1) end ==> (function (tuple (parameters (= x 1))) (block)) - emit(ps, mark, K"tuple") + # function (x) body end ==> (function (tuple-p x) (block body)) + # function (x::f()) end ==> (function (tuple-p (::-i x (call f))) (block)) + # function (x,y) end ==> (function (tuple-p x y) (block)) + # function (x=1) end ==> (function (tuple-p (= x 1)) (block)) + # function (;x=1) end ==> (function (tuple-p (parameters (= x 1))) (block)) + emit(ps, mark, K"tuple", PARENS_FLAG) elseif is_empty_tuple # Weird case which is consistent with parse_paren but will be # rejected in lowering - # function ()(x) end ==> (function (call (tuple) x) (block)) - emit(ps, mark, K"tuple") + # function ()(x) end ==> (function (call (tuple-p) x) (block)) + emit(ps, mark, K"tuple", PARENS_FLAG) else # function (A).f() end ==> (function (call (. A (quote f))) (block)) # function (:)() end ==> (function (call :) (block)) @@ -2606,7 +2610,7 @@ end # i ∈ rhs ==> (= i rhs) # # i = 1:10 ==> (= i (call : 1 10)) -# (i,j) in iter ==> (= (tuple i j) iter) +# (i,j) in iter ==> (= (tuple-p i j) iter) # # flisp: parse-iteration-spec function parse_iteration_spec(ps::ParseState) @@ -2621,7 +2625,7 @@ function parse_iteration_spec(ps::ParseState) # outer <| x = rhs ==> (= (call-i outer <| x) rhs) else # outer i = rhs ==> (= (outer i) rhs) - # outer (x,y) = rhs ==> (= (outer (tuple x y)) rhs) + # outer (x,y) = rhs ==> (= (outer (tuple-p x y)) rhs) reset_node!(ps, position(ps), kind=K"outer", flags=TRIVIA_FLAG) parse_pipe_lt(ps) emit(ps, mark, K"outer") @@ -3020,9 +3024,9 @@ function parse_paren(ps::ParseState, check_identifiers=true) after_paren_mark = position(ps) k = peek(ps) if k == K")" - # () ==> (tuple) + # () ==> (tuple-p) bump(ps, TRIVIA_FLAG) - emit(ps, mark, K"tuple") + emit(ps, mark, K"tuple", PARENS_FLAG) elseif is_syntactic_operator(k) # allow :(=) etc in unchecked contexts, eg quotes # :(=) ==> (quote =) @@ -3050,28 +3054,28 @@ function parse_paren(ps::ParseState, check_identifiers=true) end if opts.is_tuple # Tuple syntax with commas - # (x,) ==> (tuple x) - # (x,y) ==> (tuple x y) - # (x=1, y=2) ==> (tuple (= x 1) (= y 2)) + # (x,) ==> (tuple-p x) + # (x,y) ==> (tuple-p x y) + # (x=1, y=2) ==> (tuple-p (= x 1) (= y 2)) # # Named tuple with initial semicolon - # (;) ==> (tuple (parameters)) - # (; a=1) ==> (tuple (parameters (= a 1))) + # (;) ==> (tuple-p (parameters)) + # (; a=1) ==> (tuple-p (parameters (= a 1))) # # Extra credit: nested parameters and frankentuples - # (x...;) ==> (tuple (... x) (parameters)) - # (x...; y) ==> (tuple (... x) (parameters y)) - # (; a=1; b=2) ==> (tuple (parameters (= a 1)) (parameters (= b 2))) - # (a; b; c,d) ==> (tuple a (parameters b) (parameters c d)) - # (a=1, b=2; c=3) ==> (tuple (= a 1) (= b 2) (parameters (= c 3))) - emit(ps, mark, K"tuple") + # (x...;) ==> (tuple-p (... x) (parameters)) + # (x...; y) ==> (tuple-p (... x) (parameters y)) + # (; a=1; b=2) ==> (tuple-p (parameters (= a 1)) (parameters (= b 2))) + # (a; b; c,d) ==> (tuple-p a (parameters b) (parameters c d)) + # (a=1, b=2; c=3) ==> (tuple-p (= a 1) (= b 2) (parameters (= c 3))) + emit(ps, mark, K"tuple", PARENS_FLAG) elseif opts.is_block # Blocks - # (;;) ==> (block) - # (a=1;) ==> (block (= a 1)) - # (a;b;;c) ==> (block a b c) - # (a=1; b=2) ==> (block (= a 1) (= b 2)) - emit(ps, mark, K"block") + # (;;) ==> (block-p) + # (a=1;) ==> (block-p (= a 1)) + # (a;b;;c) ==> (block-p a b c) + # (a=1; b=2) ==> (block-p (= a 1) (= b 2)) + emit(ps, mark, K"block", PARENS_FLAG) else # Parentheses used for grouping # (a * b) ==> (call-i * a b) @@ -3095,7 +3099,7 @@ end # syntax so the parse tree is pretty strange in these cases! Some macros # probably use it though. Example: # -# (a,b=1; c,d=2; e,f=3) ==> (tuple a (= b 1) (parameters c (= d 2)) (parameters e (= f 3))) +# (a,b=1; c,d=2; e,f=3) ==> (tuple-p a (= b 1) (parameters c (= d 2)) (parameters e (= f 3))) # # flisp: parts of parse-paren- and parse-arglist function parse_brackets(after_parse::Function, @@ -3246,7 +3250,7 @@ function parse_string(ps::ParseState, raw::Bool) # Triple-quoted dedenting: # Various newlines (\n \r \r\n) and whitespace (' ' \t) # """\n x\n y""" ==> (string-s "x\n" "y") - # ```\n x\n y``` ==> (macrocall :(Core.var"@cmd") (cmdstring-sr "x\n" "y")) + # ```\n x\n y``` ==> (macrocall :(Core.var"@cmd") (cmdstring-s-r "x\n" "y")) # """\r x\r y""" ==> (string-s "x\n" "y") # """\r\n x\r\n y""" ==> (string-s "x\n" "y") # Spaces or tabs or mixtures acceptable @@ -3544,7 +3548,7 @@ function parse_atom(ps::ParseState, check_identifiers=true) elseif leading_kind in KSet"` ```" # `` ==> (macrocall core_@cmd (cmdstring-r "")) # `cmd` ==> (macrocall core_@cmd (cmdstring-r "cmd")) - # ```cmd``` ==> (macrocall core_@cmd (cmdstring-sr "cmd")) + # ```cmd``` ==> (macrocall core_@cmd (cmdstring-s-r "cmd")) bump_invisible(ps, K"core_@cmd") parse_string(ps, true) emit(ps, mark, K"macrocall") diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 56d0bd8730e9b..4d29ac76f0081 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -239,7 +239,7 @@ tests = [ "<:(a,)" => "(<: a)" # Unary function calls with brackets as grouping, not an arglist ".+(a)" => "(dotcall-pre + a)" - "+(a;b)" => "(call-pre + (block a b))" + "+(a;b)" => "(call-pre + (block-p a b))" "+(a=1)" => "(call-pre + (= a 1))" => Expr(:call, :+, Expr(:(=), :a, 1)) # Unary operators have lower precedence than ^ "+(a)^2" => "(call-pre + (call-i a ^ 2))" @@ -312,7 +312,7 @@ tests = [ # space separated macro calls "@foo a b" => "(macrocall @foo a b)" "@foo (x)" => "(macrocall @foo x)" - "@foo (x,y)" => "(macrocall @foo (tuple x y))" + "@foo (x,y)" => "(macrocall @foo (tuple-p x y))" "A.@foo a b" => "(macrocall (. A (quote @foo)) a b)" "@A.foo a b" => "(macrocall (. A (quote @foo)) a b)" "[@foo x]" => "(vect (macrocall @foo x))" @@ -343,8 +343,9 @@ tests = [ Expr(:call, :f, Expr(:parameters, Expr(:parameters, :c), :b), :a) "(a=1)()" => "(call (= a 1))" => Expr(:call, Expr(:(=), :a, 1)) "f (a)" => "(call f (error-t) a)" - "A.@x(y)" => "(macrocall (. A (quote @x)) y)" - "A.@x(y).z" => "(. (macrocall (. A (quote @x)) y) (quote z))" + "@x(a, b)" => "(macrocall-p @x a b)" + "A.@x(y)" => "(macrocall-p (. A (quote @x)) y)" + "A.@x(y).z" => "(. (macrocall-p (. A (quote @x)) y) (quote z))" # do "f() do\nend" => "(do (call f) (tuple) (block))" "f() do ; body end" => "(do (call f) (tuple) (block body))" @@ -416,9 +417,9 @@ tests = [ "in\"str\"" => """(macrocall @in_str (string-r "str"))""" "outer\"str\"" => """(macrocall @outer_str (string-r "str"))""" # Triple quoted procesing for custom strings - "r\"\"\"\nx\"\"\"" => raw"""(macrocall @r_str (string-sr "x"))""" - "r\"\"\"\n x\n y\"\"\"" => raw"""(macrocall @r_str (string-sr "x\n" "y"))""" - "r\"\"\"\n x\\\n y\"\"\"" => raw"""(macrocall @r_str (string-sr "x\\\n" "y"))""" + "r\"\"\"\nx\"\"\"" => raw"""(macrocall @r_str (string-s-r "x"))""" + "r\"\"\"\n x\n y\"\"\"" => raw"""(macrocall @r_str (string-s-r "x\n" "y"))""" + "r\"\"\"\n x\\\n y\"\"\"" => raw"""(macrocall @r_str (string-s-r "x\\\n" "y"))""" # Macro sufficies can include keywords and numbers "x\"s\"y" => """(macrocall @x_str (string-r "s") "y")""" "x\"s\"end" => """(macrocall @x_str (string-r "s") "end")""" @@ -540,11 +541,11 @@ tests = [ "macro (type)(ex) end" => "(macro (call type ex) (block))" "macro \$f() end" => "(macro (call (\$ f)) (block))" "macro (\$f)() end" => "(macro (call (\$ f)) (block))" - "function (x) body end"=> "(function (tuple x) (block body))" - "function (x,y) end" => "(function (tuple x y) (block))" - "function (x=1) end" => "(function (tuple (= x 1)) (block))" - "function (;x=1) end" => "(function (tuple (parameters (= x 1))) (block))" - "function ()(x) end" => "(function (call (tuple) x) (block))" + "function (x) body end"=> "(function (tuple-p x) (block body))" + "function (x,y) end" => "(function (tuple-p x y) (block))" + "function (x=1) end" => "(function (tuple-p (= x 1)) (block))" + "function (;x=1) end" => "(function (tuple-p (parameters (= x 1))) (block))" + "function ()(x) end" => "(function (call (tuple-p) x) (block))" "function (A).f() end" => "(function (call (. A (quote f))) (block))" "function (:)() end" => "(function (call :) (block))" "function (x::T)() end"=> "(function (call (::-i x T)) (block))" @@ -652,34 +653,34 @@ tests = [ "i in rhs" => "(= i rhs)" "i ∈ rhs" => "(= i rhs)" "i = 1:10" => "(= i (call-i 1 : 10))" - "(i,j) in iter" => "(= (tuple i j) iter)" + "(i,j) in iter" => "(= (tuple-p i j) iter)" "outer = rhs" => "(= outer rhs)" "outer <| x = rhs" => "(= (call-i outer <| x) rhs)" "outer i = rhs" => "(= (outer i) rhs)" - "outer (x,y) = rhs" => "(= (outer (tuple x y)) rhs)" + "outer (x,y) = rhs" => "(= (outer (tuple-p x y)) rhs)" ], JuliaSyntax.parse_paren => [ # Tuple syntax with commas - "()" => "(tuple)" - "(x,)" => "(tuple x)" - "(x,y)" => "(tuple x y)" - "(x=1, y=2)" => "(tuple (= x 1) (= y 2))" + "()" => "(tuple-p)" + "(x,)" => "(tuple-p x)" + "(x,y)" => "(tuple-p x y)" + "(x=1, y=2)" => "(tuple-p (= x 1) (= y 2))" # Named tuples with initial semicolon - "(;)" => "(tuple (parameters))" => Expr(:tuple, Expr(:parameters)) - "(; a=1)" => "(tuple (parameters (= a 1)))" => Expr(:tuple, Expr(:parameters, Expr(:kw, :a, 1))) + "(;)" => "(tuple-p (parameters))" => Expr(:tuple, Expr(:parameters)) + "(; a=1)" => "(tuple-p (parameters (= a 1)))" => Expr(:tuple, Expr(:parameters, Expr(:kw, :a, 1))) # Extra credit: nested parameters and frankentuples - "(x...; y)" => "(tuple (... x) (parameters y))" - "(x...;)" => "(tuple (... x) (parameters))" - "(; a=1; b=2)" => "(tuple (parameters (= a 1)) (parameters (= b 2)))" => + "(x...; y)" => "(tuple-p (... x) (parameters y))" + "(x...;)" => "(tuple-p (... x) (parameters))" + "(; a=1; b=2)" => "(tuple-p (parameters (= a 1)) (parameters (= b 2)))" => Expr(:tuple, Expr(:parameters, Expr(:parameters, Expr(:kw, :b, 2)), Expr(:kw, :a, 1))) - "(a; b; c,d)" => "(tuple a (parameters b) (parameters c d))" => + "(a; b; c,d)" => "(tuple-p a (parameters b) (parameters c d))" => Expr(:tuple, Expr(:parameters, Expr(:parameters, :c, :d), :b), :a) - "(a=1, b=2; c=3)" => "(tuple (= a 1) (= b 2) (parameters (= c 3)))" + "(a=1, b=2; c=3)" => "(tuple-p (= a 1) (= b 2) (parameters (= c 3)))" # Block syntax - "(;;)" => "(block)" - "(a=1;)" => "(block (= a 1))" - "(a;b;;c)" => "(block a b c)" - "(a=1; b=2)" => "(block (= a 1) (= b 2))" + "(;;)" => "(block-p)" + "(a=1;)" => "(block-p (= a 1))" + "(a;b;;c)" => "(block-p a b c)" + "(a=1; b=2)" => "(block-p (= a 1) (= b 2))" # Parentheses used for grouping "(a * b)" => "(call-i a * b)" "(a=1)" => "(= a 1)" @@ -786,7 +787,7 @@ tests = [ # cmd strings "``" => "(macrocall core_@cmd (cmdstring-r \"\"))" "`cmd`" => "(macrocall core_@cmd (cmdstring-r \"cmd\"))" - "```cmd```" => "(macrocall core_@cmd (cmdstring-sr \"cmd\"))" + "```cmd```" => "(macrocall core_@cmd (cmdstring-s-r \"cmd\"))" # literals "42" => "42" "1.0e-1000" => "0.0" @@ -853,7 +854,7 @@ tests = [ # Triple-quoted dedenting: "\"\"\"\nx\"\"\"" => raw"""(string-s "x")""" "\"\"\"\n\nx\"\"\"" => raw"""(string-s "\n" "x")""" - "```\n x\n y```" => raw"""(macrocall core_@cmd (cmdstring-sr "x\n" "y"))""" + "```\n x\n y```" => raw"""(macrocall core_@cmd (cmdstring-s-r "x\n" "y"))""" # Various newlines (\n \r \r\n) and whitespace (' ' \t) "\"\"\"\n x\n y\"\"\"" => raw"""(string-s "x\n" "y")""" "\"\"\"\r x\r y\"\"\"" => raw"""(string-s "x\n" "y")""" From e81d0db893d43685d4d65dbfc48baffaab8cd5f9 Mon Sep 17 00:00:00 2001 From: c42f Date: Wed, 15 Mar 2023 14:10:06 +1000 Subject: [PATCH 0600/1109] Minor cleanup of parser code for let block bindings --- JuliaSyntax/src/parser.jl | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 51f4b617b370f..ee6e09cffd702 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -1823,23 +1823,21 @@ function parse_resword(ps::ParseState) emit(ps, mark, K"for") elseif word == K"let" bump(ps, TRIVIA_FLAG) - if peek(ps) ∉ KSet"NewlineWs ;" - # let x=1\n end ==> (let (block (= x 1)) (block)) - # let x=1 ; end ==> (let (block (= x 1)) (block)) - m = position(ps) - n_subexprs = parse_comma_separated(ps, parse_eq_star) - kb = peek_behind(ps).kind + m = position(ps) + if peek(ps) in KSet"NewlineWs ;" + # let end ==> (let (block) (block)) + # let ; end ==> (let (block) (block)) + # let ; body end ==> (let (block) (block body)) + else + # let x=1\n end ==> (let (block (= x 1)) (block)) + # let x=1 ; end ==> (let (block (= x 1)) (block)) # let x::1 ; end ==> (let (block (::-i x 1)) (block)) # let x ; end ==> (let (block x) (block)) # let x=1,y=2 ; end ==> (let (block (= x 1) (= y 2) (block))) # let x+=1 ; end ==> (let (block (+= x 1)) (block)) - emit(ps, m, K"block") - else - # let end ==> (let (block) (block)) - # let ; end ==> (let (block) (block)) - # let ; body end ==> (let (block) (block body)) - bump_invisible(ps, K"block") + parse_comma_separated(ps, parse_eq_star) end + emit(ps, m, K"block") k = peek(ps) if k in KSet"NewlineWs ;" bump(ps, TRIVIA_FLAG) From 2ee6210fec8b4e1e07ae67e54e7c33d0dc9239db Mon Sep 17 00:00:00 2001 From: c42f Date: Wed, 15 Mar 2023 15:01:54 +1000 Subject: [PATCH 0601/1109] Remove premature lowering of `return` without arguments `return` can have zero arguments - indicate this with empty children, rather than with an invisible `K"nothing"` token. --- JuliaSyntax/src/expr.jl | 2 ++ JuliaSyntax/src/kinds.jl | 1 - JuliaSyntax/src/parser.jl | 5 ++--- JuliaSyntax/src/syntax_tree.jl | 2 -- JuliaSyntax/test/expr.jl | 5 +++++ JuliaSyntax/test/parser.jl | 4 ++-- 6 files changed, 11 insertions(+), 8 deletions(-) diff --git a/JuliaSyntax/src/expr.jl b/JuliaSyntax/src/expr.jl index bcec2b7ebafc9..cfdec790d7b9c 100644 --- a/JuliaSyntax/src/expr.jl +++ b/JuliaSyntax/src/expr.jl @@ -303,6 +303,8 @@ function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, args[1] = Expr(headsym, args[1].args...) headsym = :const end + elseif headsym == :return && isempty(args) + push!(args, nothing) end return Expr(headsym, args...) end diff --git a/JuliaSyntax/src/kinds.jl b/JuliaSyntax/src/kinds.jl index 73e63c7a21630..d2b62b358980e 100644 --- a/JuliaSyntax/src/kinds.jl +++ b/JuliaSyntax/src/kinds.jl @@ -83,7 +83,6 @@ const _kind_names = "CmdString" "true" "false" - "nothing" # A literal `nothing` "END_LITERAL" "BEGIN_DELIMITERS" diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index ee6e09cffd702..4a5ee2ea01cb9 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -1971,9 +1971,8 @@ function parse_resword(ps::ParseState) bump(ps, TRIVIA_FLAG) k = peek(ps) if k == K"NewlineWs" || is_closing_token(ps, k) - # return\nx ==> (return nothing) - # return) ==> (return nothing) - bump_invisible(ps, K"nothing") + # return\nx ==> (return) + # return) ==> (return) else # return x ==> (return x) # return x,y ==> (return (tuple x y)) diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index 7695129f72dbc..a9a5fdc883240 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -107,8 +107,6 @@ function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, position::In isempty(val_range) ? Symbol(untokenize(k)) : # synthetic invisible tokens Symbol(normalize_identifier(val_str)) - elseif k == K"nothing" - nothing elseif k == K"error" ErrorVal() elseif k == K"MacroName" diff --git a/JuliaSyntax/test/expr.jl b/JuliaSyntax/test/expr.jl index 84f0f6e92707d..e86c356492188 100644 --- a/JuliaSyntax/test/expr.jl +++ b/JuliaSyntax/test/expr.jl @@ -302,4 +302,9 @@ @test parse(Expr, "\n\"x\" f") == Expr(:macrocall, GlobalRef(Core, Symbol("@doc")), LineNumberNode(2), "x", :f) end + + @testset "return" begin + @test parse(Expr, "return x") == Expr(:return, :x) + @test parse(Expr, "return") == Expr(:return, nothing) + end end diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 4d29ac76f0081..6221be1e0064e 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -475,8 +475,8 @@ tests = [ "struct A end" => "(struct false A (block))" => Expr(:struct, false, :A, Expr(:block)) "struct try end" => "(struct false (error (try)) (block))" # return - "return\nx" => "(return nothing)" - "return)" => "(return nothing)" + "return\nx" => "(return)" + "return)" => "(return)" "return x" => "(return x)" "return x,y" => "(return (tuple x y))" # break/continue From 7241a92dee47ba992b352eba3b1f45ca4bd5b5ed Mon Sep 17 00:00:00 2001 From: c42f Date: Wed, 15 Mar 2023 15:11:31 +1000 Subject: [PATCH 0602/1109] Use distinct kind for juxtaposition This allows us to easily distinguish juxtaposition syntax from explicit multiplication. --- JuliaSyntax/src/expr.jl | 3 +++ JuliaSyntax/src/kinds.jl | 1 + JuliaSyntax/src/parser.jl | 23 ++++++++++------------- JuliaSyntax/test/expr.jl | 5 +++++ JuliaSyntax/test/parser.jl | 16 ++++++++-------- 5 files changed, 27 insertions(+), 21 deletions(-) diff --git a/JuliaSyntax/src/expr.jl b/JuliaSyntax/src/expr.jl index cfdec790d7b9c..0ebf6693e2148 100644 --- a/JuliaSyntax/src/expr.jl +++ b/JuliaSyntax/src/expr.jl @@ -305,6 +305,9 @@ function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, end elseif headsym == :return && isempty(args) push!(args, nothing) + elseif headsym == :juxtapose + headsym = :call + pushfirst!(args, :*) end return Expr(headsym, args...) end diff --git a/JuliaSyntax/src/kinds.jl b/JuliaSyntax/src/kinds.jl index d2b62b358980e..f0848936df91b 100644 --- a/JuliaSyntax/src/kinds.jl +++ b/JuliaSyntax/src/kinds.jl @@ -879,6 +879,7 @@ const _kind_names = "comparison" "curly" "inert" # QuoteNode; not quasiquote + "juxtapose" # Numeric juxtaposition like 2x "string" # A string interior node (possibly containing interpolations) "cmdstring" # A cmd string node (containing delimiters plus string) "char" # A char string node (containing delims + char data) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 4a5ee2ea01cb9..0a309986f0810 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -1105,13 +1105,13 @@ function is_juxtapose(ps, prev_k, t) !is_initial_reserved_word(ps, k) end -# Juxtoposition. Ugh! +# Juxtoposition. Ugh! But so useful for units and Field identities like `im` # -# 2x ==> (call-i 2 * x) -# 2(x) ==> (call-i 2 * x) -# (2)(3)x ==> (call-i 2 * 3 x) -# (x-1)y ==> (call-i (call-i x - 1) * y) -# x'y ==> (call-i (call-post x ') * y) +# 2x ==> (juxtapose 2 x) +# 2(x) ==> (juxtapose 2 x) +# (2)(3)x ==> (juxtapose 2 3 x) +# (x-1)y ==> (juxtapose (call-i x - 1) y) +# x'y ==> (juxtapose (call-post x ') y) # # flisp: parse-juxtapose function parse_juxtapose(ps::ParseState) @@ -1124,15 +1124,12 @@ function parse_juxtapose(ps::ParseState) if !is_juxtapose(ps, prev_kind, t) break end - if n_terms == 1 - bump_invisible(ps, K"*") - end if prev_kind == K"string" || is_string_delim(t) # issue #20575 # - # "a""b" ==> (call-i (string "a") * (error-t) (string "b")) - # "a"x ==> (call-i (string "a") * (error-t) x) - # "$y"x ==> (call-i (string (string y)) * (error-t) x) + # "a""b" ==> (juxtapose (string "a") (error-t) (string "b")) + # "a"x ==> (juxtapose (string "a") (error-t) x) + # "$y"x ==> (juxtapose (string (string y)) (error-t) x) bump_invisible(ps, K"error", TRIVIA_FLAG, error="cannot juxtapose string literal") end @@ -1144,7 +1141,7 @@ function parse_juxtapose(ps::ParseState) n_terms += 1 end if n_terms > 1 - emit(ps, mark, K"call", INFIX_FLAG) + emit(ps, mark, K"juxtapose") end end diff --git a/JuliaSyntax/test/expr.jl b/JuliaSyntax/test/expr.jl index e86c356492188..72d130c4fc935 100644 --- a/JuliaSyntax/test/expr.jl +++ b/JuliaSyntax/test/expr.jl @@ -296,6 +296,11 @@ Expr(:block, LineNumberNode(1), :z)) end + @testset "juxtapose" begin + @test parse(Expr, "2x") == Expr(:call, :*, 2, :x) + @test parse(Expr, "(2)(3)x") == Expr(:call, :*, 2, 3, :x) + end + @testset "Core.@doc" begin @test parse(Expr, "\"x\" f") == Expr(:macrocall, GlobalRef(Core, Symbol("@doc")), LineNumberNode(1), "x", :f) diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 6221be1e0064e..2149d1f92c4cd 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -179,15 +179,15 @@ tests = [ "x >> y >> z" => "(call-i (call-i x >> y) >> z)" ], JuliaSyntax.parse_juxtapose => [ - "2x" => "(call-i 2 * x)" - "2x" => "(call-i 2 * x)" - "2(x)" => "(call-i 2 * x)" - "(2)(3)x" => "(call-i 2 * 3 x)" - "(x-1)y" => "(call-i (call-i x - 1) * y)" - "x'y" => "(call-i (call-post x ') * y)" + "2x" => "(juxtapose 2 x)" + "2x" => "(juxtapose 2 x)" + "2(x)" => "(juxtapose 2 x)" + "(2)(3)x" => "(juxtapose 2 3 x)" + "(x-1)y" => "(juxtapose (call-i x - 1) y)" + "x'y" => "(juxtapose (call-post x ') y)" # errors - "\"a\"\"b\"" => "(call-i (string \"a\") * (error-t) (string \"b\"))" - "\"a\"x" => "(call-i (string \"a\") * (error-t) x)" + "\"a\"\"b\"" => "(juxtapose (string \"a\") (error-t) (string \"b\"))" + "\"a\"x" => "(juxtapose (string \"a\") (error-t) x)" # Not juxtaposition - parse_juxtapose will consume only the first token. "x.3" => "x" "sqrt(2)2" => "(call sqrt 2)" From 2651cee0e742eaf7e9f48c71adcf6f217a44cb5a Mon Sep 17 00:00:00 2001 From: c42f Date: Wed, 15 Mar 2023 15:56:16 +1000 Subject: [PATCH 0603/1109] Use flags for struct/module variants and allow empty return nodes Various AST cleanups: * `struct` and `mutable struct` are distinguished with flags not children * `module` and `baremodule` are distinguished with flags not children * `return` now parses with empty children rather than with an invisible K"nothing" as the only child. --- JuliaSyntax/src/expr.jl | 7 ++++-- JuliaSyntax/src/hooks.jl | 2 +- JuliaSyntax/src/parse_stream.jl | 43 ++++++++++++++++++++++++--------- JuliaSyntax/src/parser.jl | 41 +++++++++++++++---------------- JuliaSyntax/test/expr.jl | 14 +++++++++++ JuliaSyntax/test/hooks.jl | 1 - JuliaSyntax/test/parser.jl | 26 ++++++++++---------- 7 files changed, 84 insertions(+), 50 deletions(-) diff --git a/JuliaSyntax/src/expr.jl b/JuliaSyntax/src/expr.jl index 0ebf6693e2148..b18b9c0ee0ca6 100644 --- a/JuliaSyntax/src/expr.jl +++ b/JuliaSyntax/src/expr.jl @@ -278,6 +278,7 @@ function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, pushfirst!(args[2].args, loc) end elseif headsym === :module + pushfirst!(args, !has_flags(node, BARE_MODULE_FLAG)) pushfirst!(args[3].args, loc) elseif headsym == :inert || (headsym == :quote && length(args) == 1 && !(a1 = only(args); a1 isa Expr || a1 isa QuoteNode || @@ -303,11 +304,13 @@ function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, args[1] = Expr(headsym, args[1].args...) headsym = :const end - elseif headsym == :return && isempty(args) + elseif headsym === :return && isempty(args) push!(args, nothing) - elseif headsym == :juxtapose + elseif headsym === :juxtapose headsym = :call pushfirst!(args, :*) + elseif headsym === :struct + pushfirst!(args, has_flags(node, MUTABLE_FLAG)) end return Expr(headsym, args...) end diff --git a/JuliaSyntax/src/hooks.jl b/JuliaSyntax/src/hooks.jl index 031ea801e374d..01df50238d9c7 100644 --- a/JuliaSyntax/src/hooks.jl +++ b/JuliaSyntax/src/hooks.jl @@ -62,7 +62,7 @@ function _incomplete_tag(n::SyntaxNode) elseif kp in KSet"for while function if" return i == 1 ? :other : :block elseif kp in KSet"module struct" - return i == 2 ? :other : :block + return i == 1 ? :other : :block elseif kp == K"do" return i < 3 ? :other : :block else diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index d81b7810a7348..8db4489f28063 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -5,27 +5,38 @@ # TODO: Use `primitive type SyntaxFlags 16 end` rather than an alias? const RawFlags = UInt16 const EMPTY_FLAGS = RawFlags(0) -# Applied to tokens which are syntax trivia after parsing + +# Set for tokens or ranges which are syntax trivia after parsing const TRIVIA_FLAG = RawFlags(1<<0) -# Record whether operators are dotted +# Token flags - may be set for operator kinded tokens +# Operator is dotted const DOTOP_FLAG = RawFlags(1<<1) -# Record whether operator has a suffix -const SUFFIXED_FLAG = RawFlags(1<<2) +# Operator has a suffix +const SUFFIXED_FLAG = RawFlags(1<<2) +# Set for K"call", K"dotcall" or any syntactic operator heads # Distinguish various syntaxes which are mapped to K"call" const PREFIX_CALL_FLAG = RawFlags(0<<3) const INFIX_FLAG = RawFlags(1<<3) const PREFIX_OP_FLAG = RawFlags(2<<3) const POSTFIX_OP_FLAG = RawFlags(3<<3) -# The next two bits could overlap with the previous two if necessary -# Set when kind == K"String" was triple-delimited as with """ or ``` +# The following flags are quite head-specific and may overlap + +# Set when K"string" or K"cmdstring" was triple-delimited as with """ or ``` const TRIPLE_STRING_FLAG = RawFlags(1<<5) -# Set when a string or identifier needs "raw string" unescaping +# Set when a K"string", K"cmdstring" or K"Identifier" needs raw string unescaping const RAW_STRING_FLAG = RawFlags(1<<6) -const PARENS_FLAG = RawFlags(1<<7) +# Set for K"tuple", K"block" or K"macrocall" which are delimited by parentheses +const PARENS_FLAG = RawFlags(1<<5) + +# Set for K"struct" when mutable +const MUTABLE_FLAG = RawFlags(1<<5) + +# Set for K"module" when it's not bare (`module`, not `baremodule`) +const BARE_MODULE_FLAG = RawFlags(1<<5) # Flags holding the dimension of an nrow or other UInt8 not held in the source const NUMERIC_FLAGS = RawFlags(RawFlags(0xff)<<8) @@ -78,10 +89,18 @@ function untokenize(head::SyntaxHead; unique=true, include_flag_suff=true) is_infix_op_call(head) && (str = str*"-i") is_prefix_op_call(head) && (str = str*"-pre") is_postfix_op_call(head) && (str = str*"-post") - has_flags(head, TRIPLE_STRING_FLAG) && (str = str*"-s") - has_flags(head, RAW_STRING_FLAG) && (str = str*"-r") - has_flags(head, PARENS_FLAG) && (str = str*"-p") - is_suffixed(head) && (str = str*"-S") + + if kind(head) in KSet"string cmdstring Identifier" + has_flags(head, TRIPLE_STRING_FLAG) && (str = str*"-s") + has_flags(head, RAW_STRING_FLAG) && (str = str*"-r") + elseif kind(head) in KSet"tuple block macrocall" + has_flags(head, PARENS_FLAG) && (str = str*"-p") + elseif kind(head) == K"struct" + has_flags(head, MUTABLE_FLAG) && (str = str*"-mut") + elseif kind(head) == K"module" + has_flags(head, BARE_MODULE_FLAG) && (str = str*"-bare") + end + is_suffixed(head) && (str = str*"-suf") n = numeric_flags(head) n != 0 && (str = str*"-"*string(n)) end diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 0a309986f0810..46a51bf0f4c47 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -763,7 +763,7 @@ function parse_comparison(ps::ParseState, subtype_comparison=false) mark = position(ps) if subtype_comparison && is_reserved_word(peek(ps)) # Recovery - # struct try end ==> (struct false (error (try)) (block)) + # struct try end ==> (struct (error (try)) (block)) name = untokenize(peek(ps)) bump(ps) emit(ps, mark, K"error", error="Invalid type name `$name`") @@ -1764,8 +1764,8 @@ function parse_struct_field(ps::ParseState) parse_eq(ps) if const_field # Const fields https://github.com/JuliaLang/julia/pull/43305 - #v1.8: struct A const a end ==> (struct false A (block (const x))) - #v1.7: struct A const a end ==> (struct false A (block (error (const x)))) + #v1.8: struct A const a end ==> (struct A (block (const x))) + #v1.7: struct A const a end ==> (struct A (block (error (const x)))) emit(ps, mark, K"const") min_supported_version(v"1.8", ps, mark, "`const` struct field") end @@ -1929,24 +1929,23 @@ function parse_resword(ps::ParseState) bump_closing_token(ps, K"end") emit(ps, mark, K"abstract") elseif word in KSet"struct mutable" - # struct A <: B \n a::X \n end ==> (struct false (<: A B) (block (::-i a X))) - # struct A \n a \n b \n end ==> (struct false A (block a b)) - #v1.7: struct A const a end ==> (struct false A (block (error (const a)))) - #v1.8: struct A const a end ==> (struct false A (block (const a))) - if word == K"mutable" - # mutable struct A end ==> (struct true A (block)) + # struct A <: B \n a::X \n end ==> (struct (<: A B) (block (::-i a X))) + # struct A \n a \n b \n end ==> (struct A (block a b)) + #v1.7: struct A const a end ==> (struct A (block (error (const a)))) + #v1.8: struct A const a end ==> (struct A (block (const a))) + is_mut = word == K"mutable" + if is_mut + # mutable struct A end ==> (struct-mut A (block)) bump(ps, TRIVIA_FLAG) - bump_invisible(ps, K"true") else - # struct A end ==> (struct false A (block)) - bump_invisible(ps, K"false") + # struct A end ==> (struct A (block)) end @check peek(ps) == K"struct" bump(ps, TRIVIA_FLAG) parse_subtype_spec(ps) parse_block(ps, parse_struct_field) bump_closing_token(ps, K"end") - emit(ps, mark, K"struct") + emit(ps, mark, K"struct", is_mut ? MUTABLE_FLAG : EMPTY_FLAGS) elseif word == K"primitive" # primitive type A 32 end ==> (primitive A 32) # primitive type A 32 ; end ==> (primitive A 32) @@ -1986,22 +1985,22 @@ function parse_resword(ps::ParseState) error="unexpected token after $(untokenize(word))") end elseif word in KSet"module baremodule" - # module A end ==> (module true A (block)) - # baremodule A end ==> (module false A (block)) + # module A end ==> (module A (block)) + # baremodule A end ==> (module-bare A (block)) bump(ps, TRIVIA_FLAG) - bump_invisible(ps, (word == K"module") ? K"true" : K"false") if is_reserved_word(peek(ps)) - # module do \n end ==> (module true (error do) (block)) + # module do \n end ==> (module (error do) (block)) bump(ps, error="Invalid module name") else - # module $A end ==> (module true ($ A) (block)) + # module $A end ==> (module ($ A) (block)) parse_unary_prefix(ps) end - # module A \n a \n b \n end ==> (module true A (block a b)) - # module A \n "x"\na \n end ==> (module true A (block (doc (string "x") a))) + # module A \n a \n b \n end ==> (module A (block a b)) + # module A \n "x"\na \n end ==> (module A (block (doc (string "x") a))) parse_block(ps, parse_docstring) bump_closing_token(ps, K"end") - emit(ps, mark, K"module") + emit(ps, mark, K"module", + word == K"baremodule" ? BARE_MODULE_FLAG : EMPTY_FLAGS) elseif word == K"export" # export a ==> (export a) # export @a ==> (export @a) diff --git a/JuliaSyntax/test/expr.jl b/JuliaSyntax/test/expr.jl index 72d130c4fc935..5c2f03d1ab822 100644 --- a/JuliaSyntax/test/expr.jl +++ b/JuliaSyntax/test/expr.jl @@ -312,4 +312,18 @@ @test parse(Expr, "return x") == Expr(:return, :x) @test parse(Expr, "return") == Expr(:return, nothing) end + + @testset "struct" begin + @test parse(Expr, "struct A end") == + Expr(:struct, false, :A, Expr(:block, LineNumberNode(1))) + @test parse(Expr, "mutable struct A end") == + Expr(:struct, true, :A, Expr(:block, LineNumberNode(1))) + end + + @testset "module" begin + @test parse(Expr, "module A end") == + Expr(:module, true, :A, Expr(:block, LineNumberNode(1), LineNumberNode(1))) + @test parse(Expr, "baremodule A end") == + Expr(:module, false, :A, Expr(:block, LineNumberNode(1), LineNumberNode(1))) + end end diff --git a/JuliaSyntax/test/hooks.jl b/JuliaSyntax/test/hooks.jl index 953df0aeea326..7c8f0af0f177a 100644 --- a/JuliaSyntax/test/hooks.jl +++ b/JuliaSyntax/test/hooks.jl @@ -38,7 +38,6 @@ @testset "Expr(:incomplete)" begin JuliaSyntax.enable_in_core!() - @test Meta.isexpr(Meta.parse("[x"), :incomplete) for (str, tag) in [ diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 2149d1f92c4cd..cc3c02c759aa7 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -467,13 +467,13 @@ tests = [ "primitive type A \$N end" => "(primitive A (\$ N))" "primitive type A <: B \n 8 \n end" => "(primitive (<: A B) 8)" # struct - "struct A <: B \n a::X \n end" => "(struct false (<: A B) (block (::-i a X)))" => Expr(:struct, false, Expr(:<:, :A, :B), Expr(:block, Expr(:(::), :a, :X))) - "struct A \n a \n b \n end" => "(struct false A (block a b))" => Expr(:struct, false, :A, Expr(:block, :a, :b)) - "mutable struct A end" => "(struct true A (block))" - ((v=v"1.8",), "struct A const a end") => "(struct false A (block (const a)))" => Expr(:struct, false, :A, Expr(:block, Expr(:const, :a))) - ((v=v"1.7",), "struct A const a end") => "(struct false A (block (error (const a))))" - "struct A end" => "(struct false A (block))" => Expr(:struct, false, :A, Expr(:block)) - "struct try end" => "(struct false (error (try)) (block))" + "struct A <: B \n a::X \n end" => "(struct (<: A B) (block (::-i a X)))" => Expr(:struct, false, Expr(:<:, :A, :B), Expr(:block, Expr(:(::), :a, :X))) + "struct A \n a \n b \n end" => "(struct A (block a b))" => Expr(:struct, false, :A, Expr(:block, :a, :b)) + "mutable struct A end" => "(struct-mut A (block))" + ((v=v"1.8",), "struct A const a end") => "(struct A (block (const a)))" => Expr(:struct, false, :A, Expr(:block, Expr(:const, :a))) + ((v=v"1.7",), "struct A const a end") => "(struct A (block (error (const a))))" + "struct A end" => "(struct A (block))" => Expr(:struct, false, :A, Expr(:block)) + "struct try end" => "(struct (error (try)) (block))" # return "return\nx" => "(return)" "return)" => "(return)" @@ -483,12 +483,12 @@ tests = [ "break" => "(break)" "continue" => "(continue)" # module/baremodule - "module A end" => "(module true A (block))" - "baremodule A end" => "(module false A (block))" - "module do \n end" => "(module true (error (do)) (block))" - "module \$A end" => "(module true (\$ A) (block))" - "module A \n a \n b \n end" => "(module true A (block a b))" - """module A \n "x"\na\n end""" => """(module true A (block (doc (string "x") a)))""" + "module A end" => "(module A (block))" + "baremodule A end" => "(module-bare A (block))" + "module do \n end" => "(module (error (do)) (block))" + "module \$A end" => "(module (\$ A) (block))" + "module A \n a \n b \n end" => "(module A (block a b))" + """module A \n "x"\na\n end""" => """(module A (block (doc (string "x") a)))""" # export "export a" => "(export a)" => Expr(:export, :a) "export @a" => "(export @a)" => Expr(:export, Symbol("@a")) From 9ac6d40e74e19af33ab8c4d5b8dcdf3ab30ca8ac Mon Sep 17 00:00:00 2001 From: c42f Date: Fri, 17 Mar 2023 20:59:46 +1000 Subject: [PATCH 0604/1109] Basic after-parse tokenization interface (JuliaLang/JuliaSyntax.jl#221) Implement a `tokenize()` function which retreives the tokens *after* parsing. Going through the parser isn't hugely more expensive than plain tokenization, and allows us to be more precise and complete. For example it automatically: * Determines when contextual keywords are keywords, vs identifiers. For example, the `outer` in `outer = 1` is an identifier, but a keyword in `for outer i = 1:10` * Validates numeric literals (eg, detecting overflow cases like `10e1000` and flagging as errors) * Splits or combines ambiguous tokens. For example, making the `...` in `import ...A` three separate `.` tokens. --- JuliaSyntax/src/JuliaSyntax.jl | 1 - JuliaSyntax/src/parser_api.jl | 50 ++++++++++++++++++++++++++++++++++ JuliaSyntax/src/tokenize.jl | 39 +++++++++++++------------- JuliaSyntax/test/fuzz_test.jl | 31 +-------------------- JuliaSyntax/test/parser_api.jl | 41 ++++++++++++++++++++++++++++ JuliaSyntax/test/test_utils.jl | 4 ++- JuliaSyntax/test/tokenize.jl | 4 +-- 7 files changed, 116 insertions(+), 54 deletions(-) diff --git a/JuliaSyntax/src/JuliaSyntax.jl b/JuliaSyntax/src/JuliaSyntax.jl index ff3ab13b2d922..76b686759488b 100644 --- a/JuliaSyntax/src/JuliaSyntax.jl +++ b/JuliaSyntax/src/JuliaSyntax.jl @@ -7,7 +7,6 @@ include("kinds.jl") # Lexing uses a significantly modified version of Tokenize.jl include("tokenize.jl") -using .Tokenize: Token # Source and diagnostics include("source_files.jl") diff --git a/JuliaSyntax/src/parser_api.jl b/JuliaSyntax/src/parser_api.jl index 0ea050c46f26a..a22a6a2578610 100644 --- a/JuliaSyntax/src/parser_api.jl +++ b/JuliaSyntax/src/parser_api.jl @@ -148,3 +148,53 @@ parse(::Type{T}, text::AbstractString, index::Integer; kws...) where {T} = _pars parseall(::Type{T}, text::AbstractString, index::Integer; kws...) where {T} = _parse(:toplevel, false, T, text, index; kws...) parseatom(::Type{T}, text::AbstractString, index::Integer; kws...) where {T} = _parse(:atom, false, T, text, index; kws...) +#------------------------------------------------------------------------------- +# Tokens interface +""" +Token type resulting from calling `tokenize(text)` + +Use +* `kind(tok)` to get the token kind +* `untokenize(tok, text)` to retreive the text +* Predicates like `is_error(tok)` to query token categories and flags +""" +struct Token + head::SyntaxHead + range::UnitRange{UInt32} +end + +Token() = Token(SyntaxHead(K"None", EMPTY_FLAGS), 0:0) + +head(t::Token) = t.head + +""" + tokenize(text) + +Returns the tokenized UTF-8 encoded `text` as a vector of `Token`s. The +text for the token can be retreived by using `untokenize()`. The full text can be +reconstructed with, for example, `join(untokenize.(tokenize(text), text))`. + +This interface works on UTF-8 encoded string or buffer data only. +""" +function tokenize(text) + ps = ParseStream(text) + parse!(ps, rule=:toplevel) + ts = ps.tokens + output_tokens = Token[] + for i = 2:length(ts) + if kind(ts[i]) == K"TOMBSTONE" + continue + end + r = ts[i-1].next_byte:ts[i].next_byte-1 + push!(output_tokens, Token(head(ts[i]), r)) + end + output_tokens +end + +function untokenize(token::Token, text::AbstractString) + text[first(token.range):thisind(text, last(token.range))] +end + +function untokenize(token::Token, text::Vector{UInt8}) + text[token.range] +end diff --git a/JuliaSyntax/src/tokenize.jl b/JuliaSyntax/src/tokenize.jl index 82cab1232446b..26eb31fb0b382 100644 --- a/JuliaSyntax/src/tokenize.jl +++ b/JuliaSyntax/src/tokenize.jl @@ -12,7 +12,7 @@ include("tokenize_utils.jl") #------------------------------------------------------------------------------- # Tokens -struct Token +struct RawToken kind::Kind # Offsets into a string or buffer startbyte::Int # The byte where the token start in the buffer @@ -20,24 +20,24 @@ struct Token dotop::Bool suffix::Bool end -function Token(kind::Kind, startbyte::Int, endbyte::Int) - Token(kind, startbyte, endbyte, false, false) +function RawToken(kind::Kind, startbyte::Int, endbyte::Int) + RawToken(kind, startbyte, endbyte, false, false) end -Token() = Token(K"error", 0, 0, false, false) +RawToken() = RawToken(K"error", 0, 0, false, false) -const EMPTY_TOKEN = Token() +const EMPTY_TOKEN = RawToken() -kind(t::Token) = t.kind +kind(t::RawToken) = t.kind -startbyte(t::Token) = t.startbyte -endbyte(t::Token) = t.endbyte +startbyte(t::RawToken) = t.startbyte +endbyte(t::RawToken) = t.endbyte -function untokenize(t::Token, str::String) +function untokenize(t::RawToken, str::String) String(codeunits(str)[1 .+ (t.startbyte:t.endbyte)]) end -function Base.show(io::IO, t::Token) +function Base.show(io::IO, t::RawToken) print(io, rpad(string(startbyte(t), "-", endbyte(t)), 11, " ")) print(io, rpad(kind(t), 15, " ")) end @@ -108,18 +108,17 @@ end Lexer(str::AbstractString) = Lexer(IOBuffer(str)) """ - tokenize(x, T = Token) + tokenize(x) Returns an `Iterable` containing the tokenized input. Can be reverted by e.g. -`join(untokenize.(tokenize(x)))`. Setting `T` chooses the type of token -produced by the lexer (`Token` or `Token`). +`join(untokenize.(tokenize(x)))`. """ tokenize(x) = Lexer(x) # Iterator interface Base.IteratorSize(::Type{<:Lexer}) = Base.SizeUnknown() Base.IteratorEltype(::Type{<:Lexer}) = Base.HasEltype() -Base.eltype(::Type{<:Lexer}) = Token +Base.eltype(::Type{<:Lexer}) = RawToken function Base.iterate(l::Lexer) @@ -142,7 +141,7 @@ end """ startpos(l::Lexer) -Return the latest `Token`'s starting position. +Return the latest `RawToken`'s starting position. """ startpos(l::Lexer) = l.token_startpos @@ -193,7 +192,7 @@ Base.seek(l::Lexer, pos) = seek(l.io, pos) """ start_token!(l::Lexer) -Updates the lexer's state such that the next `Token` will start at the current +Updates the lexer's state such that the next `RawToken` will start at the current position. """ function start_token!(l::Lexer) @@ -251,7 +250,7 @@ end """ emit(l::Lexer, kind::Kind) -Returns a `Token` of kind `kind` with contents `str` and starts a new `Token`. +Returns a `RawToken` of kind `kind` with contents `str` and starts a new `RawToken`. """ function emit(l::Lexer, kind::Kind, maybe_op=true) suffix = false @@ -262,7 +261,7 @@ function emit(l::Lexer, kind::Kind, maybe_op=true) end end - tok = Token(kind, startpos(l), position(l) - 1, l.dotop, suffix) + tok = RawToken(kind, startpos(l), position(l) - 1, l.dotop, suffix) l.dotop = false l.last_token = kind @@ -272,7 +271,7 @@ end """ emit_error(l::Lexer, err::Kind) -Returns an `K"error"` token with error `err` and starts a new `Token`. +Returns an `K"error"` token with error `err` and starts a new `RawToken`. """ function emit_error(l::Lexer, err::Kind) @assert is_error(err) @@ -283,7 +282,7 @@ end """ next_token(l::Lexer) -Returns the next `Token`. +Returns the next `RawToken`. """ function next_token(l::Lexer, start = true) start && start_token!(l) diff --git a/JuliaSyntax/test/fuzz_test.jl b/JuliaSyntax/test/fuzz_test.jl index c1653c9ebabf2..4df943b819e61 100644 --- a/JuliaSyntax/test/fuzz_test.jl +++ b/JuliaSyntax/test/fuzz_test.jl @@ -1,4 +1,5 @@ using JuliaSyntax +using JuliaSyntax: tokenize # Parser fuzz testing tools. @@ -882,36 +883,6 @@ const cutdown_tokens = [ "√" ] -#------------------------------------------------------------------------------- - -# Rough tokenization interface. -# TODO: We should have something like this in parser_api.jl - -struct Token2 - head::JuliaSyntax.SyntaxHead - range::UnitRange{UInt32} -end - -function tokenize(text::String) - ps = JuliaSyntax.ParseStream(text) - JuliaSyntax.parse!(ps, rule=:toplevel) - ts = ps.tokens - output_tokens = Token2[] - for i = 2:length(ts) - if JuliaSyntax.kind(ts[i]) == JuliaSyntax.K"TOMBSTONE" - continue - end - r = ts[i-1].next_byte:thisind(text, ts[i].next_byte-1) - push!(output_tokens, Token2(JuliaSyntax.head(ts[i]), r)) - end - output_tokens -end - -function split_tokens(text::String) - [@view text[t.range] for t in tokenize(text)] -end - - #------------------------------------------------------------------------------- function parser_throws_exception(str) diff --git a/JuliaSyntax/test/parser_api.jl b/JuliaSyntax/test/parser_api.jl index 1c4259a6cc148..bc6a1aa88b26c 100644 --- a/JuliaSyntax/test/parser_api.jl +++ b/JuliaSyntax/test/parser_api.jl @@ -124,3 +124,44 @@ end \e[90m# └┘ ── \e[0;0m\e[91minvalid operator\e[0;0m""" end end + +tokensplit(str) = [kind(tok) => untokenize(tok, str) for tok in tokenize(str)] + +@testset "tokenize() API" begin + # tokenize() is eager + @test tokenize("aba") isa Vector{JuliaSyntax.Token} + + # . is a separate token from + in `.+` + @test tokensplit("a .+ β") == [ + K"Identifier" => "a", + K"Whitespace" => " ", + K"." => ".", + K"+" => "+", + K"Whitespace" => " ", + K"Identifier" => "β", + ] + + # Contextual keywords become identifiers where necessary + @test tokensplit("outer = 1") == [ + K"Identifier" => "outer", + K"Whitespace" => " ", + K"=" => "=", + K"Whitespace" => " ", + K"Integer" => "1", + ] + + # A predicate based on flags() + @test JuliaSyntax.is_suffixed(tokenize("+₁")[1]) + + # Buffer interface + @test tokenize(Vector{UInt8}("a + b")) == tokenize("a + b") + + buf = Vector{UInt8}("a-β") + @test untokenize.(tokenize(buf), Ref(buf,)) == [ + Vector{UInt8}("a"), + Vector{UInt8}("-"), + Vector{UInt8}("β") + ] + + @test kind(JuliaSyntax.Token()) == K"None" +end diff --git a/JuliaSyntax/test/test_utils.jl b/JuliaSyntax/test/test_utils.jl index c0b1e7ab1265a..36fc22cde1afc 100644 --- a/JuliaSyntax/test/test_utils.jl +++ b/JuliaSyntax/test/test_utils.jl @@ -27,7 +27,9 @@ using .JuliaSyntax: child, fl_parseall, fl_parse, - highlight + highlight, + tokenize, + untokenize if VERSION < v"1.6" # Compat stuff which might not be in Base for older versions diff --git a/JuliaSyntax/test/tokenize.jl b/JuliaSyntax/test/tokenize.jl index d1587e6d0236d..4c0b778ef0ed6 100644 --- a/JuliaSyntax/test/tokenize.jl +++ b/JuliaSyntax/test/tokenize.jl @@ -15,7 +15,7 @@ using JuliaSyntax.Tokenize: Tokenize, tokenize, untokenize, - Token + RawToken tok(str, i = 1) = collect(tokenize(str))[i] @@ -321,7 +321,7 @@ end @test String(take!(io)) == "1-5 String " end -~(tok::Token, t::Tuple) = tok.kind == t[1] && untokenize(tok, t[3]) == t[2] +~(tok::RawToken, t::Tuple) = tok.kind == t[1] && untokenize(tok, t[3]) == t[2] @testset "raw strings" begin str = raw""" str"x $ \ y" """ From 3eb788570c0dea1e5be463213e0f7fc5ebe72368 Mon Sep 17 00:00:00 2001 From: c42f Date: Sat, 18 Mar 2023 05:34:07 +1000 Subject: [PATCH 0605/1109] Make CI run on release-* branches --- JuliaSyntax/.github/workflows/CI.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/JuliaSyntax/.github/workflows/CI.yml b/JuliaSyntax/.github/workflows/CI.yml index 8b23d64d028c7..1f540e944e771 100644 --- a/JuliaSyntax/.github/workflows/CI.yml +++ b/JuliaSyntax/.github/workflows/CI.yml @@ -3,6 +3,7 @@ on: push: branches: - main + - release-* tags: '*' pull_request: jobs: From 71d16e24b637114e4686c00e9fbfae5bf8849367 Mon Sep 17 00:00:00 2001 From: c42f Date: Sat, 18 Mar 2023 05:31:59 +1000 Subject: [PATCH 0606/1109] bump version to 0.3.3 --- JuliaSyntax/Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaSyntax/Project.toml b/JuliaSyntax/Project.toml index 16967150c4aab..7438212f690fe 100644 --- a/JuliaSyntax/Project.toml +++ b/JuliaSyntax/Project.toml @@ -1,7 +1,7 @@ name = "JuliaSyntax" uuid = "70703baa-626e-46a2-a12c-08ffd08c73b4" authors = ["Chris Foster and contributors"] -version = "0.3.2" +version = "0.3.3" [compat] julia = "1.0" From 82ff79282a97be64ee40314ef3de60f947b1cff9 Mon Sep 17 00:00:00 2001 From: c42f Date: Tue, 21 Mar 2023 06:49:49 +1000 Subject: [PATCH 0607/1109] Enclose grouping parentheses with `parens` node (JuliaLang/JuliaSyntax.jl#222) Introduce a new kind `K"parens"` to represent grouping parentheses with a tree node of their own. This makes it simple for tooling to process and preserve parenthesized expressions without resorting to searching through the attached syntax trivia. An alternative considered here was to use `K"block"` with a single child which would avoid introducing an extra kind of node. But in that case we couldn't distinguish between a trivial block like `(a;)` vs bare parentheses `(a)`. It also makes implementing `peek_behind` more complicated. --- JuliaSyntax/src/expr.jl | 3 + JuliaSyntax/src/parse_stream.jl | 37 +++++----- JuliaSyntax/src/parser.jl | 105 ++++++++++++++-------------- JuliaSyntax/test/parser.jl | 117 +++++++++++++++++--------------- JuliaSyntax/test/parser_api.jl | 6 +- 5 files changed, 138 insertions(+), 130 deletions(-) diff --git a/JuliaSyntax/src/expr.jl b/JuliaSyntax/src/expr.jl index b18b9c0ee0ca6..2f6ee29e24e99 100644 --- a/JuliaSyntax/src/expr.jl +++ b/JuliaSyntax/src/expr.jl @@ -197,6 +197,9 @@ function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, end elseif headsym === :where reorder_parameters!(args, 2) + elseif headsym == :parens + # parens are used for grouping and don't appear in the Expr AST + return only(args) elseif headsym in (:try, :try_finally_catch) # Try children in source order: # try_block catch_var catch_block else_block finally_block diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index 8db4489f28063..0ff0215948492 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -582,26 +582,25 @@ function first_child_position(stream::ParseStream, pos::ParseStreamPosition) end function peek_behind(stream::ParseStream; skip_trivia::Bool=true) - pos = position(stream) - if !skip_trivia || !token_is_last(stream, pos) - return peek_behind(stream, pos) - else - token_index = lastindex(stream.tokens) - range_index = lastindex(stream.ranges) - last_token_in_nonterminal = isempty(stream.ranges) ? 0 : - stream.ranges[range_index].last_token - while token_index > last_token_in_nonterminal - t = stream.tokens[token_index] - if !is_trivia(t) && kind(t) != K"TOMBSTONE" - break - end - token_index -= 1 - end - if token_index > 0 - return peek_behind(stream, ParseStreamPosition(token_index, range_index)) - else - internal_error("Can't peek behind at start of stream") + token_index = lastindex(stream.tokens) + range_index = lastindex(stream.ranges) + while range_index >= firstindex(stream.ranges) && + kind(stream.ranges[range_index]) == K"parens" + range_index -= 1 + end + last_token_in_nonterminal = range_index == 0 ? 0 : + stream.ranges[range_index].last_token + while token_index > last_token_in_nonterminal + t = stream.tokens[token_index] + if kind(t) != K"TOMBSTONE" && (!skip_trivia || !is_trivia(t)) + break end + token_index -= 1 + end + if token_index > 0 + return peek_behind(stream, ParseStreamPosition(token_index, range_index)) + else + internal_error("Can't peek behind at start of stream") end end diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 46a51bf0f4c47..899360135e13e 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -315,8 +315,8 @@ function was_eventually_call(ps::ParseState) b = peek_behind(stream, p) if b.kind == K"call" return true - elseif b.kind == K"where" || (b.kind == K"::" && - has_flags(b.flags, INFIX_FLAG)) + elseif b.kind == K"where" || b.kind == K"parens" || + (b.kind == K"::" && has_flags(b.flags, INFIX_FLAG)) p = first_child_position(ps, p) else return false @@ -885,7 +885,7 @@ function parse_range(ps::ParseState) if had_newline # Error message for people coming from python # 1:\n2 ==> (call-i 1 : (error)) - # (1:\n2) ==> (call-i 1 : 2) + # (1:\n2) ==> (parens (call-i 1 : 2)) emit_diagnostic(ps, whitespace=true, error="line break after `:` in range expression") bump_invisible(ps, K"error") @@ -1021,7 +1021,7 @@ function parse_unary_subtype(ps::ParseState) elseif k2 in KSet"{ (" # parse <:{T}(x::T) or <:(x::T) like other unary operators # <:{T}(x::T) ==> (call (curly <: T) (:: x T)) - # <:(x::T) ==> (<:-pre (:: x T)) + # <:(x::T) ==> (<:-pre (parens (:: x T))) parse_where(ps, parse_juxtapose) else # <: x ==> (<:-pre x) @@ -1108,9 +1108,9 @@ end # Juxtoposition. Ugh! But so useful for units and Field identities like `im` # # 2x ==> (juxtapose 2 x) -# 2(x) ==> (juxtapose 2 x) -# (2)(3)x ==> (juxtapose 2 3 x) -# (x-1)y ==> (juxtapose (call-i x - 1) y) +# 2(x) ==> (juxtapose 2 (parens x)) +# (2)(3)x ==> (juxtapose (parens 2) (parens 3) x) +# (x-1)y ==> (juxtapose (parens (call-i x - 1)) y) # x'y ==> (juxtapose (call-post x ') y) # # flisp: parse-juxtapose @@ -1239,9 +1239,9 @@ function parse_unary(ps::ParseState) mark_before_paren = position(ps) bump(ps, TRIVIA_FLAG) # ( - initial_semi = peek(ps) == K";" + _is_paren_call = peek(ps, skip_newlines=true) in KSet"; )" opts = parse_brackets(ps, K")") do had_commas, had_splat, num_semis, num_subexprs - is_paren_call = had_commas || had_splat || initial_semi + is_paren_call = had_commas || had_splat || _is_paren_call return (needs_parameters=is_paren_call, is_paren_call=is_paren_call, is_block=!is_paren_call && num_semis > 0) @@ -1263,6 +1263,7 @@ function parse_unary(ps::ParseState) # +(a...) ==> (call + (... a)) # +(a;b,c) ==> (call + a (parameters b c)) # +(;a) ==> (call + (parameters a)) + # +() ==> (call +) # Prefix calls have higher precedence than ^ # +(a,b)^2 ==> (call-i (call + a b) ^ 2) # +(a,b)(x)^2 ==> (call-i (call (call + a b) x) ^ 2) @@ -1292,21 +1293,23 @@ function parse_unary(ps::ParseState) parse_factor_with_initial_ex(ps, mark) else # Unary function calls with brackets as grouping, not an arglist - # .+(a) ==> (dotcall-pre (. +) a) + # .+(a) ==> (dotcall-pre (. +) (parens a)) if opts.is_block # +(a;b) ==> (call-pre + (block-p a b)) emit(ps, mark_before_paren, K"block", PARENS_FLAG) + else + emit(ps, mark_before_paren, K"parens") end # Not a prefix operator call but a block; `=` is not `kw` - # +(a=1) ==> (call-pre + (= a 1)) + # +(a=1) ==> (call-pre + (parens (= a 1))) # Unary operators have lower precedence than ^ - # +(a)^2 ==> (call-pre + (call-i a ^ 2)) - # .+(a)^2 ==> (dotcall-pre + (call-i a ^ 2)) - # +(a)(x,y)^2 ==> (call-pre + (call-i (call a x y) ^ 2)) + # +(a)^2 ==> (call-pre + (call-i (parens a) ^ 2)) + # .+(a)^2 ==> (dotcall-pre + (call-i (parens a) ^ 2)) + # +(a)(x,y)^2 ==> (call-pre + (call-i (call (parens a) x y) ^ 2)) parse_call_chain(ps, mark_before_paren) parse_factor_with_initial_ex(ps, mark_before_paren) if is_type_operator(op_t) - # <:(a) ==> (<:-pre a) + # <:(a) ==> (<:-pre (parens a)) emit(ps, mark, op_k, PREFIX_OP_FLAG) reset_node!(ps, op_pos, flags=TRIVIA_FLAG) else @@ -1451,7 +1454,7 @@ function parse_identifier_or_interpolate(ps::ParseState) mark = position(ps) parse_unary_prefix(ps) b = peek_behind(ps) - # export (x::T) ==> (export (error (::-i x T))) + # export (x::T) ==> (export (error (parens (::-i x T)))) # export outer ==> (export outer) # export ($f) ==> (export ($ f)) ok = (b.is_leaf && (b.kind == K"Identifier" || is_operator(b.kind))) || @@ -1491,13 +1494,13 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) k = kind(t) if !is_macrocall && ps.space_sensitive && preceding_whitespace(t) && k in KSet"( [ { \" \"\"\" ` ```" - # [f (x)] ==> (hcat f x) + # [f (x)] ==> (hcat f (parens x)) # [f x] ==> (hcat f x) break elseif is_macrocall && (preceding_whitespace(t) || !(k in KSet"( [ { ' .")) # Macro calls with space-separated arguments # @foo a b ==> (macrocall @foo a b) - # @foo (x) ==> (macrocall @foo x) + # @foo (x) ==> (macrocall @foo (parens x)) # @foo (x,y) ==> (macrocall @foo (tuple-p x y)) # [@foo x] ==> (vect (macrocall @foo x)) # [@foo] ==> (vect (macrocall @foo)) @@ -1537,8 +1540,8 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) # f(a,b) ==> (call f a b) # f(a=1; b=2) ==> (call f (= a 1) (parameters (= b 2))) # f(a; b; c) ==> (call f a (parameters b) (parameters c)) - # (a=1)() ==> (call (= a 1)) - # f (a) ==> (call f (error-t) a b) + # (a=1)() ==> (call (parens (= a 1))) + # f (a) ==> (call f (error-t) a) bump_disallowed_space(ps) bump(ps, TRIVIA_FLAG) parse_call_arglist(ps, K")") @@ -1580,7 +1583,8 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) else # a[i] ==> (ref a i) # a[i,j] ==> (ref a i j) - # (a=1)[] ==> (ref (= a 1)) + # (a=1)[] ==> (ref (parens (= a 1))) + # a[end] ==> (ref a end) # T[x y] ==> (typed_hcat T x y) # T[x ; y] ==> (typed_vcat T x y) # T[a b; c d] ==> (typed_vcat T (row a b) (row c d)) @@ -1905,7 +1909,7 @@ function parse_resword(ps::ParseState) else # Function/macro definition with no methods # function f end ==> (function f) - # (function f \n end) ==> (function f) + # (function f \n end) ==> (parens (function f)) # function f \n\n end ==> (function f) # function $f end ==> (function ($ f)) # macro f end ==> (macro f) @@ -2007,7 +2011,7 @@ function parse_resword(ps::ParseState) # export a, \n @b ==> (export a @b) # export +, == ==> (export + ==) # export \n a ==> (export a) - # export \$a, \$(a*b) ==> (export (\$ a) (\$ (call-i a * b))) + # export \$a, \$(a*b) ==> (export (\$ a) (\$ (parens (call-i a * b)))) bump(ps, TRIVIA_FLAG) parse_comma_separated(ps, parse_atsym) emit(ps, mark, K"export") @@ -2105,10 +2109,10 @@ function parse_function_signature(ps::ParseState, is_function::Bool) emit(ps, mark, K"error", error="Invalid macro name") else # macro f() end ==> (macro (call f) (block)) - # macro (:)(ex) end ==> (macro (call : ex) (block)) - # macro (type)(ex) end ==> (macro (call type ex) (block)) + # macro (:)(ex) end ==> (macro (call (parens :) ex) (block)) + # macro (type)(ex) end ==> (macro (call (parens type) ex) (block)) # macro $f() end ==> (macro (call ($ f)) (block)) - # macro ($f)() end ==> (macro (call ($ f)) (block)) + # macro ($f)() end ==> (macro (call (parens ($ f))) (block)) end else if peek(ps) == K"(" @@ -2145,10 +2149,11 @@ function parse_function_signature(ps::ParseState, is_function::Bool) # function ()(x) end ==> (function (call (tuple-p) x) (block)) emit(ps, mark, K"tuple", PARENS_FLAG) else - # function (A).f() end ==> (function (call (. A (quote f))) (block)) - # function (:)() end ==> (function (call :) (block)) - # function (x::T)() end ==> (function (call (::-i x T)) (block)) - # function (::T)() end ==> (function (call (::-pre T)) (block)) + # function (A).f() end ==> (function (call (. (parens A) (quote f))) (block)) + # function (:)() end ==> (function (call (parens :)) (block)) + # function (x::T)() end ==> (function (call (parens (::-i x T))) (block)) + # function (::T)() end ==> (function (call (parens (::-pre T))) (block)) + emit(ps, mark, K"parens", PARENS_FLAG) end else parse_unary_prefix(ps) @@ -2163,8 +2168,7 @@ function parse_function_signature(ps::ParseState, is_function::Bool) # function type() end ==> (function (call type) (block)) # function \n f() end ==> (function (call f) (block)) # function $f() end ==> (function (call ($ f)) (block)) - # function (:)() end ==> (function (call :) (block)) - # function (::Type{T})(x) end ==> (function (call (::-pre (curly Type T)) x) (block)) + # function (::Type{T})(x) end ==> (function (call (parens (::-pre (curly Type T))) x) (block)) end end end @@ -2205,8 +2209,8 @@ function parse_function_signature(ps::ParseState, is_function::Bool) # function (f() where T) end ==> (function (where (call f) T) (block)) # function (f()) where T end ==> (function (where (call f) T) (block)) # function (f() where T) where U end ==> (function (where (where (call f) T) U) (block)) - # function (f()::S) end ==> (function (::-i (call f) S) (block)) - # function ((f()::S) where T) end ==> (function (where (::-i (call f) S) T) (block)) + # function (f()::S) end ==> (function (parens (::-i (call f) S)) (block)) + # function ((f()::S) where T) end ==> (function (where (parens (::-i (call f) S)) T) (block)) # # TODO: Warn for use of parens? The precedence of `::` and # `where` don't work inside parens so this is a bit of a syntax @@ -2401,7 +2405,7 @@ function parse_atsym(ps::ParseState) else # export a ==> (export a) # export \n a ==> (export a) - # export $a, $(a*b) ==> (export ($ a) ($ (call * a b))) + # export $a, $(a*b) ==> (export ($ a) (parens ($ (call * a b)))) parse_identifier_or_interpolate(ps) end end @@ -2706,7 +2710,7 @@ end function parse_generator(ps::ParseState, mark, flatten=false) t = peek_token(ps) if !preceding_whitespace(t) - # [(x)for x in xs] ==> (comprehension (generator x (error) (= x xs))) + # [(x)for x in xs] ==> (comprehension (generator (parens x) (error) (= x xs))) bump_invisible(ps, K"error", TRIVIA_FLAG, error="Expected space before `for` in generator") end @@ -2715,21 +2719,21 @@ function parse_generator(ps::ParseState, mark, flatten=false) filter_mark = position(ps) parse_comma_separated(ps, parse_iteration_spec) if peek(ps) == K"if" - # (a for x in xs if cond) ==> (generator a (filter (= x xs) cond)) + # (a for x in xs if cond) ==> (parens (generator a (filter (= x xs) cond))) bump(ps, TRIVIA_FLAG) parse_cond(ps) emit(ps, filter_mark, K"filter") end t = peek_token(ps) if kind(t) == K"for" - # (xy for x in xs for y in ys) ==> (flatten xy (= x xs) (= y ys)) - # (xy for x in xs for y in ys for z in zs) ==> (flatten xy (= x xs) (= y ys) (= z zs)) + # (xy for x in xs for y in ys) ==> (parens (flatten xy (= x xs) (= y ys))) + # (xy for x in xs for y in ys for z in zs) ==> (parens (flatten xy (= x xs) (= y ys) (= z zs))) parse_generator(ps, mark, true) if !flatten emit(ps, mark, K"flatten") end elseif !flatten - # (x for a in as) ==> (generator x (= a as)) + # (x for a in as) ==> (parens (generator x (= a as))) emit(ps, mark, K"generator") end end @@ -3071,10 +3075,11 @@ function parse_paren(ps::ParseState, check_identifiers=true) emit(ps, mark, K"block", PARENS_FLAG) else # Parentheses used for grouping - # (a * b) ==> (call-i * a b) - # (a=1) ==> (= a 1) - # (x) ==> x - # (a...) ==> (... a) + # (a * b) ==> (parens (call-i * a b)) + # (a=1) ==> (parens (= a 1)) + # (x) ==> (parens x) + # (a...) ==> (parens (... a)) + emit(ps, mark, K"parens") end end end @@ -3144,8 +3149,8 @@ function parse_brackets(after_parse::Function, continue elseif k == K"for" # Generator syntax - # (x for a in as) ==> (generator x (= a as)) - # (x \n\n for a in as) ==> (generator x (= a as)) + # (x for a in as) ==> (parens (generator x (= a as))) + # (x \n\n for a in as) ==> (parens (generator x (= a as))) parse_generator(ps, mark) else # Error - recovery done when consuming closing_kind @@ -3203,8 +3208,8 @@ function parse_string(ps::ParseState, raw::Bool) bump(ps, TRIVIA_FLAG) k = peek(ps) if k == K"(" - # "a $(x + y) b" ==> (string "a " (call-i x + y) " b") - # "hi$("ho")" ==> (string "hi" (string "ho")) + # "a $(x + y) b" ==> (string "a " (parens (call-i x + y)) " b") + # "hi$("ho")" ==> (string "hi" (parens (string "ho"))) parse_atom(ps) elseif k == K"var" # var identifiers disabled in strings @@ -3346,7 +3351,7 @@ function parse_string(ps::ParseState, raw::Bool) end # String interpolations # "$x$y$z" ==> (string x y z) - # "$(x)" ==> (string x) + # "$(x)" ==> (string (parens x)) # "$x" ==> (string x) # """$x""" ==> (string-s x) # @@ -3440,7 +3445,7 @@ function parse_atom(ps::ParseState, check_identifiers=true) # Being inside quote makes keywords into identifiers at the # first level of nesting # :end ==> (quote end) - # :(end) ==> (quote (error (end))) + # :(end) ==> (quote (parens (error-t))) # Being inside quote makes end non-special again (issue #27690) # a[:(end)] ==> (ref a (quote (error-t end))) parse_atom(ParseState(ps, end_symbol=false), false) diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index cc3c02c759aa7..6adddbfa10873 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -181,9 +181,9 @@ tests = [ JuliaSyntax.parse_juxtapose => [ "2x" => "(juxtapose 2 x)" "2x" => "(juxtapose 2 x)" - "2(x)" => "(juxtapose 2 x)" - "(2)(3)x" => "(juxtapose 2 3 x)" - "(x-1)y" => "(juxtapose (call-i x - 1) y)" + "2(x)" => "(juxtapose 2 (parens x))" + "(2)(3)x" => "(juxtapose (parens 2) (parens 3) x)" + "(x-1)y" => "(juxtapose (parens (call-i x - 1)) y)" "x'y" => "(juxtapose (call-post x ') y)" # errors "\"a\"\"b\"" => "(juxtapose (string \"a\") (error-t) (string \"b\"))" @@ -226,11 +226,14 @@ tests = [ # Prefix function calls for operators which are both binary and unary "+(a,b)" => "(call + a b)" ".+(a,)" => "(call .+ a)" - "(.+)(a)" => "(call (. +) a)" + "(.+)(a)" => "(call (parens (. +)) a)" "+(a=1,)" => "(call + (= a 1))" => Expr(:call, :+, Expr(:kw, :a, 1)) "+(a...)" => "(call + (... a))" "+(a;b,c)" => "(call + a (parameters b c))" "+(;a)" => "(call + (parameters a))" + "+()" => "(call +)" + "+(\n;a)" => "(call + (parameters a))" + "+(\n)" => "(call +)" # Whitespace not allowed before prefix function call bracket "+ (a,b)" => "(call + (error) a b)" # Prefix calls have higher precedence than ^ @@ -238,14 +241,14 @@ tests = [ "+(a,b)(x)^2" => "(call-i (call (call + a b) x) ^ 2)" "<:(a,)" => "(<: a)" # Unary function calls with brackets as grouping, not an arglist - ".+(a)" => "(dotcall-pre + a)" + ".+(a)" => "(dotcall-pre + (parens a))" "+(a;b)" => "(call-pre + (block-p a b))" - "+(a=1)" => "(call-pre + (= a 1))" => Expr(:call, :+, Expr(:(=), :a, 1)) + "+(a=1)" => "(call-pre + (parens (= a 1)))" => Expr(:call, :+, Expr(:(=), :a, 1)) # Unary operators have lower precedence than ^ - "+(a)^2" => "(call-pre + (call-i a ^ 2))" - ".+(a)^2" => "(dotcall-pre + (call-i a ^ 2))" - "+(a)(x,y)^2" => "(call-pre + (call-i (call a x y) ^ 2))" - "<:(a)" => "(<:-pre a)" + "+(a)^2" => "(call-pre + (call-i (parens a) ^ 2))" + ".+(a)^2" => "(dotcall-pre + (call-i (parens a) ^ 2))" + "+(a)(x,y)^2" => "(call-pre + (call-i (call (parens a) x y) ^ 2))" + "<:(a)" => "(<:-pre (parens a))" # Normal unary calls "+x" => "(call-pre + x)" "√x" => "(call-pre √ x)" @@ -276,7 +279,7 @@ tests = [ "<: \n" => "<:" "<: =" => "<:" "<:{T}(x::T)" => "(call (curly <: T) (::-i x T))" - "<:(x::T)" => "(<:-pre (::-i x T))" + "<:(x::T)" => "(<:-pre (parens (::-i x T)))" "<: x" => "(<:-pre x)" "<: <: x" => "(<:-pre (<:-pre x))" "<: A where B" => "(<:-pre (where A B))" @@ -307,11 +310,11 @@ tests = [ "\$A.@x" => "(macrocall (. (\$ A) (quote @x)))" # non-errors in space sensitive contexts - "[f (x)]" => "(hcat f x)" + "[f (x)]" => "(hcat f (parens x))" "[f x]" => "(hcat f x)" # space separated macro calls "@foo a b" => "(macrocall @foo a b)" - "@foo (x)" => "(macrocall @foo x)" + "@foo (x)" => "(macrocall @foo (parens x))" "@foo (x,y)" => "(macrocall @foo (tuple-p x y))" "A.@foo a b" => "(macrocall (. A (quote @foo)) a b)" "@A.foo a b" => "(macrocall (. A (quote @foo)) a b)" @@ -341,7 +344,7 @@ tests = [ Expr(:call, :f, Expr(:parameters, Expr(:kw, :b, 2)), Expr(:kw, :a, 1)) "f(a; b; c)" => "(call f a (parameters b) (parameters c))" => Expr(:call, :f, Expr(:parameters, Expr(:parameters, :c), :b), :a) - "(a=1)()" => "(call (= a 1))" => Expr(:call, Expr(:(=), :a, 1)) + "(a=1)()" => "(call (parens (= a 1)))" => Expr(:call, Expr(:(=), :a, 1)) "f (a)" => "(call f (error-t) a)" "@x(a, b)" => "(macrocall-p @x a b)" "A.@x(y)" => "(macrocall-p (. A (quote @x)) y)" @@ -367,7 +370,10 @@ tests = [ "a[i]" => "(ref a i)" "a [i]" => "(ref a (error-t) i)" "a[i,j]" => "(ref a i j)" - "(a=1)[]" => "(ref (= a 1))" => Expr(:ref, Expr(:(=), :a, 1)) + "(a=1)[]" => "(ref (parens (= a 1)))" => Expr(:ref, Expr(:(=), :a, 1)) + "a[end]" => "(ref a end)" + "a[begin]" => "(ref a begin)" + "a[:(end)]" => "(typed_hcat a (quote (parens (error-t))) (error-t))" "T[x y]" => "(typed_hcat T x y)" "T[x ; y]" => "(typed_vcat T x y)" "T[a b; c d]" => "(typed_vcat T (row a b) (row c d))" @@ -382,13 +388,13 @@ tests = [ "f.(a,b)" => "(dotcall f a b)" "f.(a=1; b=2)" => "(dotcall f (= a 1) (parameters (= b 2)))" => Expr(:., :f, Expr(:tuple, Expr(:parameters, Expr(:kw, :b, 2)), Expr(:kw, :a, 1))) - "(a=1).()" => "(dotcall (= a 1))" => Expr(:., Expr(:(=), :a, 1), Expr(:tuple)) + "(a=1).()" => "(dotcall (parens (= a 1)))" => Expr(:., Expr(:(=), :a, 1), Expr(:tuple)) "f. (x)" => "(dotcall f (error-t) x)" # Other dotted syntax "A.:+" => "(. A (quote +))" "A.: +" => "(. A (quote (error-t) +))" "f.\$x" => "(. f (inert (\$ x)))" - "f.\$(x+y)" => "(. f (inert (\$ (call-i x + y))))" + "f.\$(x+y)" => "(. f (inert (\$ (parens (call-i x + y)))))" "A.\$B.@x" => "(macrocall (. (. A (inert (\$ B))) (quote @x)))" "@A.\$x a" => "(macrocall (. A (inert (error x))) a)" "A.@x" => "(macrocall (. A (quote @x)))" @@ -496,10 +502,10 @@ tests = [ "export a, \n @b" => "(export a @b)" => Expr(:export, :a, Symbol("@b")) "export +, ==" => "(export + ==)" => Expr(:export, :+, :(==)) "export \n a" => "(export a)" => Expr(:export, :a) - "export \$a, \$(a*b)" => "(export (\$ a) (\$ (call-i a * b)))" => Expr(:export, Expr(:$, :a), Expr(:$, Expr(:call, :*, :a, :b))) - "export (x::T)" => "(export (error (::-i x T)))" + "export \$a, \$(a*b)" => "(export (\$ a) (\$ (parens (call-i a * b))))" => Expr(:export, Expr(:$, :a), Expr(:$, Expr(:call, :*, :a, :b))) + "export (x::T)" => "(export (error (parens (::-i x T))))" "export outer" => "(export outer)" => Expr(:export, :outer) - "export (\$f)" => "(export (\$ f))" => Expr(:export, Expr(:$, :f)) + "export (\$f)" => "(export (parens (\$ f)))" => Expr(:export, Expr(:$, :f)) ], JuliaSyntax.parse_if_elseif => [ "if a xx elseif b yy else zz end" => "(if a (block xx) (elseif b (block yy) (block zz)))" @@ -537,28 +543,27 @@ tests = [ # Macros and functions "macro while(ex) end" => "(macro (call (error while) ex) (block))" "macro f() end" => "(macro (call f) (block))" - "macro (:)(ex) end" => "(macro (call : ex) (block))" - "macro (type)(ex) end" => "(macro (call type ex) (block))" + "macro (:)(ex) end" => "(macro (call (parens :) ex) (block))" + "macro (type)(ex) end" => "(macro (call (parens type) ex) (block))" "macro \$f() end" => "(macro (call (\$ f)) (block))" - "macro (\$f)() end" => "(macro (call (\$ f)) (block))" + "macro (\$f)() end" => "(macro (call (parens (\$ f))) (block))" "function (x) body end"=> "(function (tuple-p x) (block body))" "function (x,y) end" => "(function (tuple-p x y) (block))" "function (x=1) end" => "(function (tuple-p (= x 1)) (block))" "function (;x=1) end" => "(function (tuple-p (parameters (= x 1))) (block))" "function ()(x) end" => "(function (call (tuple-p) x) (block))" - "function (A).f() end" => "(function (call (. A (quote f))) (block))" - "function (:)() end" => "(function (call :) (block))" - "function (x::T)() end"=> "(function (call (::-i x T)) (block))" - "function (::g(x))() end" => "(function (call (::-pre (call g x))) (block))" - "function (f::T{g(i)})() end" => "(function (call (::-i f (curly T (call g i)))) (block))" - "function (::T)() end" => "(function (call (::-pre T)) (block))" + "function (A).f() end" => "(function (call (. (parens A) (quote f))) (block))" + "function (:)() end" => "(function (call (parens :)) (block))" + "function (x::T)() end"=> "(function (call (parens (::-i x T))) (block))" + "function (::g(x))() end" => "(function (call (parens (::-pre (call g x)))) (block))" + "function (f::T{g(i)})() end" => "(function (call (parens (::-i f (curly T (call g i))))) (block))" + "function (::T)() end" => "(function (call (parens (::-pre T))) (block))" "function begin() end" => "(function (call (error begin)) (block))" "function f() end" => "(function (call f) (block))" "function type() end" => "(function (call type) (block))" "function \n f() end" => "(function (call f) (block))" "function \$f() end" => "(function (call (\$ f)) (block))" - "function (:)() end" => "(function (call :) (block))" - "function (::Type{T})(x) end" => "(function (call (::-pre (curly Type T)) x) (block))" + "function (::Type{T})(x) end" => "(function (call (parens (::-pre (curly Type T))) x) (block))" # Function/macro definition with no methods "function f end" => "(function f)" "function f \n\n end" => "(function f)" @@ -576,11 +581,11 @@ tests = [ "function f()::S where T end" => "(function (where (::-i (call f) S) T) (block))" # Ugly cases for compat where extra parentheses existed and we've # already parsed at least the call part of the signature - "function (f() where T) end" => "(function (where (call f) T) (block))" => Expr(:function, Expr(:where, Expr(:call, :f), :T), Expr(:block)) - "function (f()) where T end" => "(function (where (call f) T) (block))" - "function (f() where T) where U end" => "(function (where (where (call f) T) U) (block))" - "function (f()::S) end"=> "(function (::-i (call f) S) (block))" => Expr(:function, Expr(:(::), Expr(:call, :f), :S), Expr(:block)) - "function ((f()::S) where T) end" => "(function (where (::-i (call f) S) T) (block))" + "function (f() where T) end" => "(function (parens (where (call f) T)) (block))" => Expr(:function, Expr(:where, Expr(:call, :f), :T), Expr(:block)) + "function (f()) where T end" => "(function (where (parens (call f)) T) (block))" + "function (f() where T) where U end" => "(function (where (parens (where (call f) T)) U) (block))" + "function (f()::S) end"=> "(function (parens (::-i (call f) S)) (block))" => Expr(:function, Expr(:(::), Expr(:call, :f), :S), Expr(:block)) + "function ((f()::S) where T) end" => "(function (parens (where (parens (::-i (call f) S)) T)) (block))" # body "function f() \n a \n b end" => "(function (call f) (block a b))" "function f() end" => "(function (call f) (block))" @@ -682,16 +687,16 @@ tests = [ "(a;b;;c)" => "(block-p a b c)" "(a=1; b=2)" => "(block-p (= a 1) (= b 2))" # Parentheses used for grouping - "(a * b)" => "(call-i a * b)" - "(a=1)" => "(= a 1)" - "(x)" => "x" - "(a...)" => "(... a)" + "(a * b)" => "(parens (call-i a * b))" + "(a=1)" => "(parens (= a 1))" + "(x)" => "(parens x)" + "(a...)" => "(parens (... a))" # Generators - "(x for a in as)" => "(generator x (= a as))" - "(x \n\n for a in as)" => "(generator x (= a as))" + "(x for a in as)" => "(parens (generator x (= a as)))" + "(x \n\n for a in as)" => "(parens (generator x (= a as)))" # Range parsing in parens - "(1:\n2)" => "(call-i 1 : 2)" - "(1:2)" => "(call-i 1 : 2)" + "(1:\n2)" => "(parens (call-i 1 : 2))" + "(1:2)" => "(parens (call-i 1 : 2))" ], JuliaSyntax.parse_atom => [ # char literal @@ -741,7 +746,7 @@ tests = [ ":.=" => "(quote .=)" # Special symbols quoted ":end" => "(quote end)" - ":(end)" => "(quote (error-t))" + ":(end)" => "(quote (parens (error-t)))" ":<:" => "(quote <:)" # unexpect = "=" => "(error =)" @@ -759,11 +764,11 @@ tests = [ # parse_generator "[x for a = as for b = bs if cond1 for c = cs if cond2]" => "(comprehension (flatten x (= a as) (filter (= b bs) cond1) (filter (= c cs) cond2)))" "[x for a = as if begin cond2 end]" => "(comprehension (generator x (filter (= a as) (block cond2))))" - "[(x)for x in xs]" => "(comprehension (generator x (error-t) (= x xs)))" - "(a for x in xs if cond)" => "(generator a (filter (= x xs) cond))" - "(xy for x in xs for y in ys)" => "(flatten xy (= x xs) (= y ys))" - "(xy for x in xs for y in ys for z in zs)" => "(flatten xy (= x xs) (= y ys) (= z zs))" - "(x for a in as)" => "(generator x (= a as))" + "[(x)for x in xs]" => "(comprehension (generator (parens x) (error-t) (= x xs)))" + "(a for x in xs if cond)" => "(parens (generator a (filter (= x xs) cond)))" + "(xy for x in xs for y in ys)" => "(parens (flatten xy (= x xs) (= y ys)))" + "(xy for x in xs for y in ys for z in zs)" => "(parens (flatten xy (= x xs) (= y ys) (= z zs)))" + "(x for a in as)" => "(parens (generator x (= a as)))" # parse_vect "[x, y]" => "(vect x y)" "[x, y]" => "(vect x y)" @@ -776,7 +781,7 @@ tests = [ # parse_paren ":(=)" => "(quote =)" ":(::)" => "(quote ::)" - "(function f \n end)" => "(function f)" + "(function f \n end)" => "(parens (function f))" # braces "{x y}" => "(bracescat (row x y))" ((v=v"1.7",), "{x ;;; y}") => "(bracescat (nrow-3 x y))" @@ -845,8 +850,8 @@ tests = [ ((v=v"1.7",), "[;;]") => "(ncat-2 (error))" # parse_string "\"\"\"\n\$x\n a\"\"\"" => "(string-s x \"\\n\" \" a\")" - "\"a \$(x + y) b\"" => "(string \"a \" (call-i x + y) \" b\")" - "\"hi\$(\"ho\")\"" => "(string \"hi\" (string \"ho\"))" + "\"a \$(x + y) b\"" => "(string \"a \" (parens (call-i x + y)) \" b\")" + "\"hi\$(\"ho\")\"" => "(string \"hi\" (parens (string \"ho\")))" "\"a \$foo b\"" => "(string \"a \" foo \" b\")" "\"\$var\"" => "(string var)" "\"\$outer\"" => "(string outer)" @@ -888,7 +893,7 @@ tests = [ "\"str" => "(string \"str\" (error-t))" # String interpolations "\"\$x\$y\$z\"" => "(string x y z)" - "\"\$(x)\"" => "(string x)" + "\"\$(x)\"" => "(string (parens x))" "\"\$x\"" => "(string x)" # Strings with embedded whitespace trivia "\"a\\\nb\"" => raw"""(string "a" "b")""" @@ -931,11 +936,11 @@ end parseall_test_specs = [ # whitespace before keywords in space-insensitive mode - "(y::\nif x z end)" => "(toplevel (::-i y (if x (block z))))" + "(y::\nif x z end)" => "(toplevel (parens (::-i y (if x (block z)))))" # The following may not be ideal error recovery! But at least the parser # shouldn't crash - "@(x y)" => "(toplevel (macrocall x (error-t @y)))" + "@(x y)" => "(toplevel (macrocall (error x (error-t y))))" "|(&\nfunction" => "(toplevel (call | (& (function (error (error)) (block (error)) (error-t))) (error-t)))" ] diff --git a/JuliaSyntax/test/parser_api.jl b/JuliaSyntax/test/parser_api.jl index bc6a1aa88b26c..719af8bb7f738 100644 --- a/JuliaSyntax/test/parser_api.jl +++ b/JuliaSyntax/test/parser_api.jl @@ -3,11 +3,7 @@ @test parse(Expr, " x ") == :x @test JuliaSyntax.remove_linenums!(parseall(Expr, " x ")) == Expr(:toplevel, :x) @test parseatom(Expr, " x ") == :x - # TODO: Fix this situation with trivia here; the brackets are trivia, but - # must be parsed to discover the atom inside. But in GreenTree we only - # place trivia as siblings of the leaf node with identifier `x`, not as - # children. - @test_broken parseatom(Expr, "(x)") == :x + @test parseatom(Expr, "(x)") == :x # SubString @test parse(Expr, SubString("x+y")) == :(x+y) From 3f830180eada73ec610f35a52afa32f1ef4c51c8 Mon Sep 17 00:00:00 2001 From: c42f Date: Tue, 21 Mar 2023 14:51:45 +1000 Subject: [PATCH 0608/1109] Use === for symbol comparisons --- JuliaSyntax/src/diagnostics.jl | 8 ++++---- JuliaSyntax/src/expr.jl | 29 ++++++++++++++--------------- JuliaSyntax/src/hooks.jl | 2 +- JuliaSyntax/src/parse_stream.jl | 8 ++++---- JuliaSyntax/src/syntax_tree.jl | 4 ++-- 5 files changed, 25 insertions(+), 26 deletions(-) diff --git a/JuliaSyntax/src/diagnostics.jl b/JuliaSyntax/src/diagnostics.jl index 404972514b9b7..5c9004a7e4995 100644 --- a/JuliaSyntax/src/diagnostics.jl +++ b/JuliaSyntax/src/diagnostics.jl @@ -39,7 +39,7 @@ end first_byte(d::Diagnostic) = d.first_byte last_byte(d::Diagnostic) = d.last_byte -is_error(d::Diagnostic) = d.level == :error +is_error(d::Diagnostic) = d.level === :error Base.range(d::Diagnostic) = first_byte(d):last_byte(d) # Make relative path into a file URL @@ -54,9 +54,9 @@ function _file_url(filename) end function show_diagnostic(io::IO, diagnostic::Diagnostic, source::SourceFile) - color,prefix = diagnostic.level == :error ? (:light_red, "Error") : - diagnostic.level == :warning ? (:light_yellow, "Warning") : - diagnostic.level == :note ? (:light_blue, "Note") : + color,prefix = diagnostic.level === :error ? (:light_red, "Error") : + diagnostic.level === :warning ? (:light_yellow, "Warning") : + diagnostic.level === :note ? (:light_blue, "Note") : (:normal, "Info") line, col = source_location(source, first_byte(diagnostic)) linecol = "$line:$col" diff --git a/JuliaSyntax/src/expr.jl b/JuliaSyntax/src/expr.jl index 2f6ee29e24e99..3620346f8615b 100644 --- a/JuliaSyntax/src/expr.jl +++ b/JuliaSyntax/src/expr.jl @@ -69,7 +69,7 @@ function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, headsym = !isnothing(headstr) ? Symbol(headstr) : error("Can't untokenize head of kind $(nodekind)") end - if headsym == :string || headsym == :cmdstring + if headsym === :string || headsym === :cmdstring # Julia string literals may be interspersed with trivia in two situations: # 1. Triple quoted string indentation is trivia # 2. An \ before newline removes the newline and any following indentation @@ -93,7 +93,7 @@ function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, end else e = _to_expr(node_args[i]) - if e isa String && headsym == :string + if e isa String && headsym === :string # Wrap interpolated literal strings in (string) so we can # distinguish them from the surrounding text (issue #38501) # Ie, "$("str")" vs "str" @@ -117,22 +117,22 @@ function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, end # Convert children - insert_linenums = (headsym == :block || headsym == :toplevel) && need_linenodes + insert_linenums = (headsym === :block || headsym === :toplevel) && need_linenodes args = Vector{Any}(undef, length(node_args)*(insert_linenums ? 2 : 1)) - if headsym == :for && length(node_args) == 2 + if headsym === :for && length(node_args) == 2 # No line numbers in for loop iteration spec args[1] = _to_expr(node_args[1], iteration_spec=true, need_linenodes=false) args[2] = _to_expr(node_args[2]) - elseif headsym == :let && length(node_args) == 2 + elseif headsym === :let && length(node_args) == 2 # No line numbers in let statement binding list args[1] = _to_expr(node_args[1], need_linenodes=false) args[2] = _to_expr(node_args[2]) else eq_to_kw_in_call = - ((headsym == :call || headsym == :dotcall) && is_prefix_call(node)) || - headsym == :ref - eq_to_kw_all = headsym == :parameters && !map_kw_in_params - in_vcbr = headsym == :vect || headsym == :curly || headsym == :braces || headsym == :ref + ((headsym === :call || headsym === :dotcall) && is_prefix_call(node)) || + headsym === :ref + eq_to_kw_all = headsym === :parameters && !map_kw_in_params + in_vcbr = headsym === :vect || headsym === :curly || headsym === :braces || headsym === :ref if insert_linenums && isempty(node_args) push!(args, source_location(LineNumberNode, node.source, node.position)) else @@ -143,8 +143,7 @@ function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, end eq_to_kw = eq_to_kw_in_call && i > 1 || eq_to_kw_all args[insert_linenums ? 2*i : i] = - _to_expr(n, eq_to_kw=eq_to_kw, - map_kw_in_params=in_vcbr) + _to_expr(n, eq_to_kw=eq_to_kw, map_kw_in_params=in_vcbr) end if nodekind == K"block" && has_flags(node, PARENS_FLAG) popfirst!(args) @@ -197,7 +196,7 @@ function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, end elseif headsym === :where reorder_parameters!(args, 2) - elseif headsym == :parens + elseif headsym === :parens # parens are used for grouping and don't appear in the Expr AST return only(args) elseif headsym in (:try, :try_finally_catch) @@ -245,7 +244,7 @@ function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, pushfirst!(args, numeric_flags(flags(node))) elseif headsym === :typed_ncat insert!(args, 2, numeric_flags(flags(node))) - # elseif headsym == :string && length(args) == 1 && version <= (1,5) + # elseif headsym === :string && length(args) == 1 && version <= (1,5) # Strip string from interpolations in 1.5 and lower to preserve # "hi$("ho")" ==> (string "hi" "ho") elseif headsym === :(=) && !is_decorated(node) @@ -253,7 +252,7 @@ function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, # Add block for short form function locations args[2] = Expr(:block, loc, args[2]) end - elseif headsym == :elseif + elseif headsym === :elseif # Block for conditional's source location args[1] = Expr(:block, loc, args[1]) elseif headsym === :(->) @@ -283,7 +282,7 @@ function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, elseif headsym === :module pushfirst!(args, !has_flags(node, BARE_MODULE_FLAG)) pushfirst!(args[3].args, loc) - elseif headsym == :inert || (headsym == :quote && length(args) == 1 && + elseif headsym === :inert || (headsym === :quote && length(args) == 1 && !(a1 = only(args); a1 isa Expr || a1 isa QuoteNode || a1 isa Bool # <- compat hack, Julia 1.4+ )) diff --git a/JuliaSyntax/src/hooks.jl b/JuliaSyntax/src/hooks.jl index 01df50238d9c7..47d7dc47c1eeb 100644 --- a/JuliaSyntax/src/hooks.jl +++ b/JuliaSyntax/src/hooks.jl @@ -288,7 +288,7 @@ function _fl_parse_hook(code, filename, lineno, offset, options) ex = Expr(:toplevel, ex) end return ex, sizeof(code) - elseif options === :statement || options == :atom + elseif options === :statement || options === :atom ex, pos = Meta.parse(code, offset+1, greedy=options==:statement, raise=false) return ex, pos-1 else diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index 0ff0215948492..d58d8b9d4193b 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -890,14 +890,14 @@ function validate_tokens(stream::ParseStream) # jl_strtod_c can return "underflow" even for valid cases such # as `5e-324` where the source is an exact representation of # `x`. So only warn when underflowing to zero. - underflow0 = code == :underflow && x == 0 + underflow0 = code === :underflow && x == 0 else x, code = parse_float_literal(Float32, text, fbyte, nbyte) - underflow0 = code == :underflow && x == 0 + underflow0 = code === :underflow && x == 0 end - if code == :ok + if code === :ok # pass - elseif code == :overflow + elseif code === :overflow emit_diagnostic(stream, fbyte, lbyte, error="overflow in floating point literal") error_kind = K"ErrorNumericOverflow" diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index a9a5fdc883240..4d8a21721e70d 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -60,11 +60,11 @@ function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, position::In elseif k == K"Float" v, code = parse_float_literal(Float64, source.code, position, position+span(raw)) - (code == :ok || code == :underflow) ? v : ErrorVal() + (code === :ok || code === :underflow) ? v : ErrorVal() elseif k == K"Float32" v, code = parse_float_literal(Float32, source.code, position, position+span(raw)) - (code == :ok || code == :underflow) ? v : ErrorVal() + (code === :ok || code === :underflow) ? v : ErrorVal() elseif k in KSet"BinInt OctInt HexInt" parse_uint_literal(val_str, k) elseif k == K"true" From 664db4cab1939a188cad57caf348b7e9c714acc7 Mon Sep 17 00:00:00 2001 From: c42f Date: Tue, 21 Mar 2023 15:53:21 +1000 Subject: [PATCH 0609/1109] Better conversion to `Expr` with `parse(..., ignore_errors=true)` (JuliaLang/JuliaSyntax.jl#224) This isn't a complete solution, but it improves the situation somewhat. (It's unclear whether we should spend a lot of effort here as converting to Expr is not a particularly expressive path for error propagation. We probably need to plumb diagnostic support into the Julia runtime in a more general way.) --- JuliaSyntax/src/expr.jl | 13 ++++++++++--- JuliaSyntax/test/expr.jl | 8 ++++++++ 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/JuliaSyntax/src/expr.jl b/JuliaSyntax/src/expr.jl index 2f6ee29e24e99..d19a056d65a62 100644 --- a/JuliaSyntax/src/expr.jl +++ b/JuliaSyntax/src/expr.jl @@ -32,6 +32,7 @@ end function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, eq_to_kw=false, map_kw_in_params=false) + nodekind = kind(node) if !haschildren(node) val = node.val if val isa Union{Int128,UInt128,BigInt} @@ -44,15 +45,21 @@ function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, val isa UInt128 ? Symbol("@uint128_str") : Symbol("@big_str") return Expr(:macrocall, GlobalRef(Core, macname), nothing, str) - elseif kind(node) == K"core_@cmd" + elseif nodekind == K"core_@cmd" return GlobalRef(Core, Symbol("@cmd")) - elseif kind(node) == K"MacroName" && val === Symbol("@.") + elseif nodekind == K"MacroName" && val === Symbol("@.") return Symbol("@__dot__") + elseif is_error(nodekind) + # TODO: Get non-token error messages in here as well, somehow? + # There's an awkward mismatch between the out-of-tree + # `Vector{Diagnostic}` vs Expr(:error) being part of the tree. + return Expr(:error, + "$(_token_error_descriptions[nodekind]): `$(sourcetext(node))`" + ) else return val end end - nodekind = kind(node) node_args = children(node) if nodekind == K"var" @check length(node_args) == 1 diff --git a/JuliaSyntax/test/expr.jl b/JuliaSyntax/test/expr.jl index 5c2f03d1ab822..e54f66680118e 100644 --- a/JuliaSyntax/test/expr.jl +++ b/JuliaSyntax/test/expr.jl @@ -326,4 +326,12 @@ @test parse(Expr, "baremodule A end") == Expr(:module, false, :A, Expr(:block, LineNumberNode(1), LineNumberNode(1))) end + + @testset "errors" begin + @test parse(Expr, "--", ignore_errors=true) == + Expr(:error, "invalid operator: `--`") + @test parseall(Expr, "a b", ignore_errors=true) == + Expr(:toplevel, LineNumberNode(1), :a, + LineNumberNode(1), Expr(:error, :b)) + end end From 1eb400fd3547b3a7ed402f6d4272add70404ff36 Mon Sep 17 00:00:00 2001 From: c42f Date: Wed, 22 Mar 2023 06:55:20 +1000 Subject: [PATCH 0610/1109] Clean up emit_diagnostic stream position handling `emit_diagnostic(ps, mark, ...)` now behaves like `emit(ps, mark, ...)` in terms of the source range covered by `mark`. This is much less confusing than the previous behavior. --- JuliaSyntax/src/diagnostics.jl | 4 +-- JuliaSyntax/src/kinds.jl | 6 +++- JuliaSyntax/src/literal_parsing.jl | 6 ++-- JuliaSyntax/src/parse_stream.jl | 50 +++++++++++++++------------- JuliaSyntax/src/parser.jl | 52 +++++++++++++----------------- JuliaSyntax/test/diagnostics.jl | 36 +++++++++++++++++++-- JuliaSyntax/test/parser.jl | 1 + 7 files changed, 94 insertions(+), 61 deletions(-) diff --git a/JuliaSyntax/src/diagnostics.jl b/JuliaSyntax/src/diagnostics.jl index 5c9004a7e4995..717f38921d446 100644 --- a/JuliaSyntax/src/diagnostics.jl +++ b/JuliaSyntax/src/diagnostics.jl @@ -92,8 +92,8 @@ function show_diagnostics(io::IO, diagnostics::AbstractVector{Diagnostic}, text: end function emit_diagnostic(diagnostics::AbstractVector{Diagnostic}, - fbyte::Integer, lbyte::Integer; kws...) - push!(diagnostics, Diagnostic(fbyte, lbyte; kws...)) + byterange::AbstractUnitRange; kws...) + push!(diagnostics, Diagnostic(first(byterange), last(byterange); kws...)) end function any_error(diagnostics::AbstractVector{Diagnostic}) diff --git a/JuliaSyntax/src/kinds.jl b/JuliaSyntax/src/kinds.jl index f0848936df91b..a9a34924b7a89 100644 --- a/JuliaSyntax/src/kinds.jl +++ b/JuliaSyntax/src/kinds.jl @@ -1133,6 +1133,10 @@ function is_radical_op(x) kind(x) in (K"√", K"∛", K"∜") end +""" +Return true if `x` has whitespace or comment kind +""" function is_whitespace(x) - kind(x) in (K"Whitespace", K"NewlineWs") + k = kind(x) + return k == K"Whitespace" || k == K"NewlineWs" || k == K"Comment" end diff --git a/JuliaSyntax/src/literal_parsing.jl b/JuliaSyntax/src/literal_parsing.jl index 37982ac4984ef..2a58d425bba53 100644 --- a/JuliaSyntax/src/literal_parsing.jl +++ b/JuliaSyntax/src/literal_parsing.jl @@ -261,7 +261,7 @@ function unescape_julia_string(io::IO, str::AbstractString, u = m == 4 ? 'u' : 'U' msg = (m == 2) ? "invalid hex escape sequence" : "invalid unicode escape sequence" - emit_diagnostic(diagnostics, escstart, i, error=msg) + emit_diagnostic(diagnostics, escstart:i, error=msg) had_error = true else if m == 2 # \x escape sequence @@ -279,7 +279,7 @@ function unescape_julia_string(io::IO, str::AbstractString, i += 1 end if n > 255 - emit_diagnostic(diagnostics, escstart, i, + emit_diagnostic(diagnostics, escstart:i, error="invalid octal escape sequence") had_error = true else @@ -303,7 +303,7 @@ function unescape_julia_string(io::IO, str::AbstractString, c == '`' ? '`' : nothing if isnothing(u) - emit_diagnostic(diagnostics, escstart, i, + emit_diagnostic(diagnostics, escstart:i, error="invalid escape sequence") had_error = true else diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index d58d8b9d4193b..3f789fc50aca4 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -359,7 +359,7 @@ function _buffer_lookahead_tokens(lexer, lookahead) while true raw = Tokenize.next_token(lexer) k = kind(raw) - was_whitespace = k in (K"Whitespace", K"Comment", K"NewlineWs") + was_whitespace = is_whitespace(k) had_whitespace |= was_whitespace f = EMPTY_FLAGS raw.dotop && (f |= DOTOP_FLAG) @@ -622,7 +622,7 @@ function _bump_until_n(stream::ParseStream, n::Integer, flags, remap_kind=K"None break end f = flags | (@__MODULE__).flags(tok) - is_trivia = k ∈ (K"Whitespace", K"Comment", K"NewlineWs") + is_trivia = is_whitespace(k) is_trivia && (f |= TRIVIA_FLAG) outk = (is_trivia || remap_kind == K"None") ? k : remap_kind h = SyntaxHead(outk, f) @@ -686,7 +686,7 @@ function bump_invisible(stream::ParseStream, kind, flags=EMPTY_FLAGS; h = SyntaxHead(kind, flags) push!(stream.tokens, SyntaxToken(h, (@__MODULE__).kind(h), false, b)) if !isnothing(error) - emit_diagnostic(stream, b, b-1, error=error) + emit_diagnostic(stream, b:b-1, error=error) end stream.peek_count = 0 return position(stream) @@ -797,12 +797,6 @@ function Base.position(stream::ParseStream) ParseStreamPosition(lastindex(stream.tokens), lastindex(stream.ranges)) end -# Get position of next item to be emitted into the output stream -# TODO: Figure out how to remove this? It's only used with emit_diagnostic -function next_position(stream::ParseStream) - ParseStreamPosition(lastindex(stream.tokens)+1, lastindex(stream.ranges)+1) -end - """ emit(stream, mark, kind, flags = EMPTY_FLAGS; error=nothing) @@ -819,14 +813,14 @@ function emit(stream::ParseStream, mark::ParseStreamPosition, kind::Kind, # nested. fbyte = token_first_byte(stream, first_token) lbyte = token_last_byte(stream, lastindex(stream.tokens)) - emit_diagnostic(stream, fbyte, lbyte, error=error) + emit_diagnostic(stream, fbyte:lbyte, error=error) end push!(stream.ranges, range) return position(stream) end -function emit_diagnostic(stream::ParseStream, fbyte::Integer, lbyte::Integer; kws...) - emit_diagnostic(stream.diagnostics, fbyte, lbyte; kws...) +function emit_diagnostic(stream::ParseStream, byterange::AbstractUnitRange; kws...) + emit_diagnostic(stream.diagnostics, byterange; kws...) return nothing end @@ -849,20 +843,30 @@ function emit_diagnostic(stream::ParseStream; whitespace=false, kws...) end fbyte = lookahead_token_first_byte(stream, begin_tok_i) lbyte = lookahead_token_last_byte(stream, end_tok_i) - emit_diagnostic(stream, fbyte, lbyte; kws...) + emit_diagnostic(stream, fbyte:lbyte; kws...) return nothing end -function emit_diagnostic(stream::ParseStream, mark::ParseStreamPosition; kws...) - emit_diagnostic(stream, token_first_byte(stream, mark.token_index), - _next_byte(stream) - 1; kws...) +function emit_diagnostic(stream::ParseStream, mark::ParseStreamPosition; trim_whitespace=true, kws...) + i = mark.token_index + j = lastindex(stream.tokens) + if trim_whitespace + while i < j && is_whitespace(stream.tokens[j]) + j -= 1 + end + while i+1 < j && is_whitespace(stream.tokens[i+1]) + i += 1 + end + end + byterange = stream.tokens[i].next_byte:stream.tokens[j].next_byte-1 + emit_diagnostic(stream, byterange; kws...) end function emit_diagnostic(stream::ParseStream, mark::ParseStreamPosition, end_mark::ParseStreamPosition; kws...) - fbyte = token_first_byte(stream, mark.token_index) - lbyte = token_first_byte(stream, end_mark.token_index) - 1 - emit_diagnostic(stream, fbyte, lbyte; kws...) + fbyte = stream.tokens[mark.token_index].next_byte + lbyte = stream.tokens[end_mark.token_index].next_byte-1 + emit_diagnostic(stream, fbyte:lbyte; kws...) end #------------------------------------------------------------------------------- @@ -898,11 +902,11 @@ function validate_tokens(stream::ParseStream) if code === :ok # pass elseif code === :overflow - emit_diagnostic(stream, fbyte, lbyte, + emit_diagnostic(stream, fbyte:lbyte, error="overflow in floating point literal") error_kind = K"ErrorNumericOverflow" elseif underflow0 - emit_diagnostic(stream, fbyte, lbyte, + emit_diagnostic(stream, fbyte:lbyte, warning="underflow to zero in floating point literal") end elseif k == K"Char" @@ -917,7 +921,7 @@ function validate_tokens(stream::ParseStream) read(charbuf, Char) if !eof(charbuf) error_kind = K"ErrorOverLongCharacter" - emit_diagnostic(stream, fbyte, lbyte, + emit_diagnostic(stream, fbyte:lbyte, error="character literal contains multiple characters") end end @@ -929,7 +933,7 @@ function validate_tokens(stream::ParseStream) end elseif is_error(k) && k != K"error" # Emit messages for non-generic token errors - emit_diagnostic(stream, fbyte, lbyte, + emit_diagnostic(stream, fbyte:lbyte, error=_token_error_descriptions[k]) end if error_kind != K"None" diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 899360135e13e..dea41feb7c42c 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -113,10 +113,6 @@ function Base.position(ps::ParseState, args...) position(ps.stream, args...) end -function next_position(ps::ParseState, args...) - next_position(ps.stream, args...) -end - function emit(ps::ParseState, args...; kws...) emit(ps.stream, args...; kws...) end @@ -1230,11 +1226,10 @@ function parse_unary(ps::ParseState) space_before_paren = preceding_whitespace(t2) if space_before_paren # Setup possible whitespace error between operator and ( - ws_node_mark = position(ps) - ws_mark = next_position(ps) + ws_mark = position(ps) bump_trivia(ps) - ws_error_pos = emit(ps, ws_node_mark, K"TOMBSTONE") - ws_mark_end = next_position(ps) + ws_error_pos = emit(ps, ws_mark, K"TOMBSTONE") + ws_mark_end = position(ps) end mark_before_paren = position(ps) @@ -1617,7 +1612,8 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) k = peek(ps) if k == K"(" if is_macrocall - bump_invisible(ps, K"error") + # @M.(x) ==> (macrocall (dotcall @M (error-t) x)) + bump_invisible(ps, K"error", TRIVIA_FLAG) emit_diagnostic(ps, mark, error="dot call syntax not supported for macros") end @@ -1662,7 +1658,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) end parse_macro_name(ps) macro_name_position = position(ps) - macro_atname_range = (m, next_position(ps)) + macro_atname_range = (m, position(ps)) emit(ps, m, K"quote") emit(ps, mark, K".") elseif k == K"'" @@ -1857,7 +1853,6 @@ function parse_resword(ps::ParseState) elseif word in KSet"global local" # global x ==> (global x) # local x ==> (local x) - # global x,y ==> (global x y) bump(ps, TRIVIA_FLAG) const_mark = nothing if peek(ps) == K"const" @@ -2080,16 +2075,17 @@ function parse_global_local_const_vars(ps) mark = position(ps) n_commas = parse_comma(ps, false) t = peek_token(ps) - assign_prec = is_prec_assignment(t) - if n_commas >= 1 && assign_prec - # const x,y = 1,2 ==> (const (= (tuple x y) (tuple 1 2))) - emit(ps, mark, K"tuple") - end - if assign_prec + if is_prec_assignment(t) + if n_commas >= 1 + # const x,y = 1,2 ==> (const (= (tuple x y) (tuple 1 2))) + emit(ps, mark, K"tuple") + end # const x = 1 ==> (const (= x 1)) # global x ~ 1 ==> (global (call-i x ~ 1)) # global x += 1 ==> (global (+= x 1)) parse_assignment_with_initial_ex(ps, mark, parse_comma) + else + # global x,y ==> (global x y) end return kind(t) == K"=" && !is_dotted(t) end @@ -2106,7 +2102,7 @@ function parse_function_signature(ps::ParseState, is_function::Bool) kb = peek_behind(ps).orig_kind if is_initial_reserved_word(ps, kb) # macro while(ex) end ==> (macro (call (error while) ex) (block)) - emit(ps, mark, K"error", error="Invalid macro name") + emit(ps, mark, K"error", error="invalid macro name") else # macro f() end ==> (macro (call f) (block)) # macro (:)(ex) end ==> (macro (call (parens :) ex) (block)) @@ -2228,7 +2224,6 @@ function parse_try(ps) out_kind = K"try" mark = position(ps) bump(ps, TRIVIA_FLAG) - diagnostic_mark = position(ps) parse_block(ps) has_catch = false has_else = false @@ -2278,8 +2273,8 @@ function parse_try(ps) out_kind = K"try_finally_catch" m = position(ps) parse_catch(ps) - emit_diagnostic(ps, m, position(ps), - warning="`catch` after `finally` will execute out of order") + emit_diagnostic(ps, m, + warning="`catch` after `finally` will execute out of order") end missing_recovery = !has_catch && !has_finally if missing_recovery @@ -2289,7 +2284,7 @@ function parse_try(ps) bump_closing_token(ps, K"end") emit(ps, mark, out_kind, flags) if missing_recovery - emit_diagnostic(ps, diagnostic_mark, error="try without catch or finally") + emit_diagnostic(ps, mark, error="try without catch or finally") end end @@ -2359,11 +2354,9 @@ function fix_macro_name_kind!(ps::ParseState, macro_name_position, name_kind=not if isnothing(name_kind) name_kind = (k == K"Identifier") ? K"MacroName" : K"error" if name_kind == K"error" - # Hack to handle bad but unusual syntax like `@A.$x a` - ri = macro_name_position.range_index - startpos = ParseStreamPosition(ps.stream.ranges[ri].first_token, ri) - # This isn't quite accurate - emit_diagnostic(ps, startpos, macro_name_position, error="Invalid macro name") + # TODO: This isn't quite accurate + emit_diagnostic(ps, macro_name_position, macro_name_position, + error="invalid macro name") end end reset_node!(ps, macro_name_position, kind=name_kind) @@ -2383,10 +2376,11 @@ function parse_macro_name(ps::ParseState) k = peek(ps) parse_atom(ps, false) if k == K"(" - emit_diagnostic(ps, mark, warning="parenthesizing macro names is unnecessary") + emit_diagnostic(ps, mark, + warning="parenthesizing macro names is unnecessary") elseif !(peek_behind(ps).kind in KSet"Identifier var") # @[x] y z ==> (macrocall (error (vect x)) y z) - emit(ps, mark, K"error", error="Invalid macro name") + emit(ps, mark, K"error", error="invalid macro name") end end diff --git a/JuliaSyntax/test/diagnostics.jl b/JuliaSyntax/test/diagnostics.jl index 1df7934547361..a70d7ec4db37b 100644 --- a/JuliaSyntax/test/diagnostics.jl +++ b/JuliaSyntax/test/diagnostics.jl @@ -1,14 +1,44 @@ -function diagnostic(str; allow_multiple=false) +function diagnostic(str; only_first=false, allow_multiple=false) stream = ParseStream(str) parse!(stream) if allow_multiple stream.diagnostics else - @test length(stream.diagnostics) == 1 - only(stream.diagnostics) + if !only_first + @test length(stream.diagnostics) == 1 + end + return stream.diagnostics[1] end end +@testset "parser errors" begin + @test diagnostic("+ #==# (a,b)") == + Diagnostic(2, 7, :error, "whitespace not allowed between prefix function call and argument list") + @test diagnostic("A.@B.x", only_first=true) == + Diagnostic(3, 4, :error, "`@` must appear on first or last macro name component") + @test diagnostic("@M.(x)") == + Diagnostic(1, 3, :error, "dot call syntax not supported for macros") + + @test diagnostic("try x end") == + Diagnostic(1, 9, :error, "try without catch or finally") + # TODO: better range + @test diagnostic("@A.\$x a") == + Diagnostic(6, 5, :error, "invalid macro name") +end + +@testset "parser warnings" begin + @test diagnostic("@(A)", only_first=true) == + Diagnostic(2, 4, :warning, "parenthesizing macro names is unnecessary") + @test diagnostic("try finally catch a ; b end") == + Diagnostic(13, 23, :warning, "`catch` after `finally` will execute out of order") + @test diagnostic("import . .A") == + Diagnostic(9, 10, :warning, "space between dots in import path") + @test diagnostic("import A .==") == + Diagnostic(9, 9, :warning, "space between dots in import path") + @test diagnostic("import A.:+") == + Diagnostic(10, 10, :warning, "quoting with `:` in import is unnecessary") +end + @testset "diagnostics for literal parsing" begin # Float overflow/underflow @test diagnostic("x = 10.0e1000;") == diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 6adddbfa10873..801f49c623195 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -385,6 +385,7 @@ tests = [ "A.B.@x" => "(macrocall (. (. A (quote B)) (quote @x)))" "@A.B.x" => "(macrocall (. (. A (quote B)) (quote @x)))" "A.@B.x" => "(macrocall (. (. A (quote B)) (error-t) (quote @x)))" + "@M.(x)" => "(macrocall (dotcall @M (error-t) x))" "f.(a,b)" => "(dotcall f a b)" "f.(a=1; b=2)" => "(dotcall f (= a 1) (parameters (= b 2)))" => Expr(:., :f, Expr(:tuple, Expr(:parameters, Expr(:kw, :b, 2)), Expr(:kw, :a, 1))) From dc1b544ad1dd25e3ad1bbf810f9d40fa172c56b9 Mon Sep 17 00:00:00 2001 From: c42f Date: Wed, 22 Mar 2023 08:53:11 +1000 Subject: [PATCH 0611/1109] Fix parsing of parenthesized macro names --- JuliaSyntax/src/parser.jl | 22 +++++++--------------- JuliaSyntax/test/parser.jl | 1 + 2 files changed, 8 insertions(+), 15 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index dea41feb7c42c..8417afdd3121a 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -2335,18 +2335,10 @@ function fix_macro_name_kind!(ps::ParseState, macro_name_position, name_kind=not if k == K"var" macro_name_position = first_child_position(ps, macro_name_position) k = peek_behind(ps, macro_name_position).kind - elseif k == K")" - # @(A) x => (macrocall @A x) - # TODO: Clean this up when K"parens" is implemented - while true - macro_name_position = ParseStreamPosition(macro_name_position.token_index-1, - macro_name_position.range_index-1) - b = peek_behind(ps, macro_name_position) - k = b.kind - if !has_flags(b.flags, TRIVIA_FLAG) - break - end - end + elseif k == K"parens" + # @(A) x ==> (macrocall (parens @A) x) + macro_name_position = first_child_position(ps, macro_name_position) + k = peek_behind(ps, macro_name_position).kind elseif k == K"error" # Error already reported in parse_macro_name return @@ -2373,12 +2365,12 @@ function parse_macro_name(ps::ParseState) # @var"#" x ==> (macrocall (var #) @$ x) bump_disallowed_space(ps) mark = position(ps) - k = peek(ps) parse_atom(ps, false) - if k == K"(" + kb = peek_behind(ps, position(ps)).kind + if kb == K"parens" emit_diagnostic(ps, mark, warning="parenthesizing macro names is unnecessary") - elseif !(peek_behind(ps).kind in KSet"Identifier var") + elseif !(kb in KSet"Identifier var") # @[x] y z ==> (macrocall (error (vect x)) y z) emit(ps, mark, K"error", error="invalid macro name") end diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 801f49c623195..304046170cc2f 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -321,6 +321,7 @@ tests = [ "[@foo x]" => "(vect (macrocall @foo x))" "[@foo]" => "(vect (macrocall @foo))" "@var\"#\" a" => "(macrocall (var @#) a)" => Expr(:macrocall, Symbol("@#"), LineNumberNode(1), :a) + "@(A) x" => "(macrocall (parens @A) x)" "A.@x y" => "(macrocall (. A (quote @x)) y)" "A.@var\"#\" a"=> "(macrocall (. A (quote (var @#))) a)" => Expr(:macrocall, Expr(:., :A, QuoteNode(Symbol("@#"))), LineNumberNode(1), :a) "@+x y" => "(macrocall @+ x y)" From 2072d435807bb0fb4969581cec688e64e4dca723 Mon Sep 17 00:00:00 2001 From: c42f Date: Wed, 22 Mar 2023 18:56:19 +1000 Subject: [PATCH 0612/1109] Allow parens in import paths like `import A.(:+)` Some packages use this syntax, even though it's apparently pointless! --- JuliaSyntax/src/expr.jl | 15 +++++++++ JuliaSyntax/src/parser.jl | 60 +++++++++++++++++---------------- JuliaSyntax/test/diagnostics.jl | 8 ++++- JuliaSyntax/test/expr.jl | 6 ++++ JuliaSyntax/test/parser.jl | 5 +-- 5 files changed, 62 insertions(+), 32 deletions(-) diff --git a/JuliaSyntax/src/expr.jl b/JuliaSyntax/src/expr.jl index 4eb375c916968..474cfe8baa277 100644 --- a/JuliaSyntax/src/expr.jl +++ b/JuliaSyntax/src/expr.jl @@ -320,6 +320,21 @@ function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, pushfirst!(args, :*) elseif headsym === :struct pushfirst!(args, has_flags(node, MUTABLE_FLAG)) + elseif headsym === :import || headsym == :using + # Permit nonsense additional quoting such as + # import A.(:b).:c + if !isempty(args) && Meta.isexpr(args[1], Symbol(":")) + imports = args[1].args + else + imports = args + end + for imp in imports + for i = 1:length(imp.args) + if imp.args[i] isa QuoteNode + imp.args[i] = imp.args[i].value + end + end + end end return Expr(headsym, args...) end diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 8417afdd3121a..4b1536b8219c9 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -1444,21 +1444,6 @@ function parse_unary_prefix(ps::ParseState) end end -# Parse a symbol or interpolation syntax -function parse_identifier_or_interpolate(ps::ParseState) - mark = position(ps) - parse_unary_prefix(ps) - b = peek_behind(ps) - # export (x::T) ==> (export (error (parens (::-i x T)))) - # export outer ==> (export outer) - # export ($f) ==> (export ($ f)) - ok = (b.is_leaf && (b.kind == K"Identifier" || is_operator(b.kind))) || - (!b.is_leaf && b.kind in KSet"$ var") - if !ok - emit(ps, mark, K"error", error="Expected identifier") - end -end - # Parses a chain of sufficies at function call precedence, leftmost binding # tightest. This handles # * Bracketed calls like a() b[] c{} @@ -2008,7 +1993,7 @@ function parse_resword(ps::ParseState) # export \n a ==> (export a) # export \$a, \$(a*b) ==> (export (\$ a) (\$ (parens (call-i a * b)))) bump(ps, TRIVIA_FLAG) - parse_comma_separated(ps, parse_atsym) + parse_comma_separated(ps, x->parse_atsym(x, false)) emit(ps, mark, K"export") elseif word in KSet"import using" parse_imports(ps) @@ -2098,7 +2083,7 @@ function parse_function_signature(ps::ParseState, is_function::Bool) mark = position(ps) if !is_function # Parse macro name - parse_identifier_or_interpolate(ps) + parse_unary_prefix(ps) kb = peek_behind(ps).orig_kind if is_initial_reserved_word(ps, kb) # macro while(ex) end ==> (macro (call (error while) ex) (block)) @@ -2111,7 +2096,9 @@ function parse_function_signature(ps::ParseState, is_function::Bool) # macro ($f)() end ==> (macro (call (parens ($ f))) (block)) end else - if peek(ps) == K"(" + if peek(ps) != K"(" + parse_unary_prefix(ps) + else # When an initial parenthesis is present, we might either have # * the function name in parens, followed by (args...) # * an anonymous function argument list in parens @@ -2151,14 +2138,12 @@ function parse_function_signature(ps::ParseState, is_function::Bool) # function (::T)() end ==> (function (call (parens (::-pre T))) (block)) emit(ps, mark, K"parens", PARENS_FLAG) end - else - parse_unary_prefix(ps) end if !is_anon_func kb = peek_behind(ps).orig_kind if is_reserved_word(kb) # function begin() end ==> (function (call (error begin)) (block)) - emit(ps, mark, K"error", error="Invalid function name") + emit(ps, mark, K"error", error="invalid function name") else # function f() end ==> (function (call f) (block)) # function type() end ==> (function (call type) (block)) @@ -2379,7 +2364,7 @@ end # Parse an identifier, interpolation or @-prefixed symbol # # flisp: parse-atsym -function parse_atsym(ps::ParseState) +function parse_atsym(ps::ParseState, allow_quotes=true) bump_trivia(ps) if peek(ps) == K"@" # export @a ==> (export @a) @@ -2392,7 +2377,30 @@ function parse_atsym(ps::ParseState) # export a ==> (export a) # export \n a ==> (export a) # export $a, $(a*b) ==> (export ($ a) (parens ($ (call * a b)))) - parse_identifier_or_interpolate(ps) + # export (x::T) ==> (export (error (parens (::-i x T)))) + # export outer ==> (export outer) + # export ($f) ==> (export ($ f)) + mark = position(ps) + if allow_quotes && peek(ps) == K":" + # import A.:+ ==> (import (. A (quote +))) + emit_diagnostic(ps, warning="quoting with `:` is not required here") + end + parse_unary_prefix(ps) + pos = position(ps) + while peek_behind(ps, pos).kind == K"parens" + # import A.(:+) ==> (import (. A (parens (quote +)))) + pos = first_child_position(ps, pos) + emit_diagnostic(ps, mark, warning="parentheses are not required here") + end + if allow_quotes && peek_behind(ps, pos).kind == K"quote" + pos = first_child_position(ps, pos) + end + b = peek_behind(ps, pos) + ok = (b.is_leaf && (b.kind == K"Identifier" || is_operator(b.kind))) || + (!b.is_leaf && b.kind in KSet"$ var") + if !ok + emit(ps, mark, K"error", error="expected identifier") + end end end @@ -2524,12 +2532,6 @@ function parse_import_path(ps::ParseState) # import A.B.C ==> (import (. A B C)) bump_disallowed_space(ps) bump(ps, TRIVIA_FLAG) - if peek(ps) == K":" - # import A.:+ ==> (import (. A +)) - bump_disallowed_space(ps) - emit_diagnostic(ps, warning="quoting with `:` in import is unnecessary") - bump(ps, TRIVIA_FLAG) - end parse_atsym(ps) elseif is_dotted(t) # Resolve tokenization ambiguity: In imports, dots are part of the diff --git a/JuliaSyntax/test/diagnostics.jl b/JuliaSyntax/test/diagnostics.jl index a70d7ec4db37b..6671b2da89e77 100644 --- a/JuliaSyntax/test/diagnostics.jl +++ b/JuliaSyntax/test/diagnostics.jl @@ -36,7 +36,13 @@ end @test diagnostic("import A .==") == Diagnostic(9, 9, :warning, "space between dots in import path") @test diagnostic("import A.:+") == - Diagnostic(10, 10, :warning, "quoting with `:` in import is unnecessary") + Diagnostic(10, 10, :warning, "quoting with `:` is not required here") + @test diagnostic("import A.(:+)") == + Diagnostic(10, 13, :warning, "parentheses are not required here") + @test diagnostic("export (x)") == + Diagnostic(8, 10, :warning, "parentheses are not required here") + @test diagnostic("export :x") == + Diagnostic(8, 9, :error, "expected identifier") end @testset "diagnostics for literal parsing" begin diff --git a/JuliaSyntax/test/expr.jl b/JuliaSyntax/test/expr.jl index e54f66680118e..c0004c051bc8b 100644 --- a/JuliaSyntax/test/expr.jl +++ b/JuliaSyntax/test/expr.jl @@ -327,6 +327,7 @@ Expr(:module, false, :A, Expr(:block, LineNumberNode(1), LineNumberNode(1))) end + @testset "errors" begin @test parse(Expr, "--", ignore_errors=true) == Expr(:error, "invalid operator: `--`") @@ -334,4 +335,9 @@ Expr(:toplevel, LineNumberNode(1), :a, LineNumberNode(1), Expr(:error, :b)) end + + @testset "import" begin + @test parse(Expr, "import A.(:b).:c: x.:z", ignore_warnings=true) == + Expr(:import, Expr(Symbol(":"), Expr(:., :A, :b, :c), Expr(:., :x, :z))) + end end diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 304046170cc2f..636e4cf0eb6ab 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -649,7 +649,8 @@ tests = [ "import \$A.@x" => "(import (. (\$ A) @x))" "import A.B" => "(import (. A B))" "import A.B.C" => "(import (. A B C))" - "import A.:+" => "(import (. A +))" + "import A.:+" => "(import (. A (quote +)))" + "import A.(:+)"=> "(import (. A (parens (quote +))))" "import A.==" => "(import (. A ==))" "import A.⋆.f" => "(import (. A ⋆ f))" "import A..." => "(import (. A ..))" @@ -942,7 +943,7 @@ parseall_test_specs = [ # The following may not be ideal error recovery! But at least the parser # shouldn't crash - "@(x y)" => "(toplevel (macrocall (error x (error-t y))))" + "@(x y)" => "(toplevel (macrocall (parens @x (error-t y))))" "|(&\nfunction" => "(toplevel (call | (& (function (error (error)) (block (error)) (error-t))) (error-t)))" ] From b5c208a8c09a1bbc56acce7d113a4e9970f2144d Mon Sep 17 00:00:00 2001 From: c42f Date: Wed, 22 Mar 2023 20:22:16 +1000 Subject: [PATCH 0613/1109] Fix highlight() use in registry testing tool --- JuliaSyntax/tools/check_all_packages.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaSyntax/tools/check_all_packages.jl b/JuliaSyntax/tools/check_all_packages.jl index 5e9f47b50bb6d..8bd4170b25f77 100644 --- a/JuliaSyntax/tools/check_all_packages.jl +++ b/JuliaSyntax/tools/check_all_packages.jl @@ -31,7 +31,7 @@ Logging.with_logger(TerminalLogger()) do mismatch_count += 1 reduced_chunks = sprint(context=:color=>true) do io for c in reduce_test(text) - JuliaSyntax.highlight(io, c.source, range(c), context_inner_lines=5) + JuliaSyntax.highlight(io, c.source, range(c), context_lines_inner=5) println(io, "\n") end end From 0cfe6586655eb4667248b7571045c5930fa1f662 Mon Sep 17 00:00:00 2001 From: c42f Date: Thu, 23 Mar 2023 19:59:46 +1000 Subject: [PATCH 0614/1109] Various fixes for `K"parens"` nodes (JuliaLang/JuliaSyntax.jl#227) * Fix `:kw` conversion in dubious constructs like `f(((a=1)))` * Fix Expr conversion for parens which contain an error * Allow more parens in import paths like `import A.:(==)` --- JuliaSyntax/src/expr.jl | 20 +++++++++++++++----- JuliaSyntax/src/parser.jl | 13 +++++++++++-- JuliaSyntax/test/expr.jl | 20 ++++++++++++++++++++ JuliaSyntax/test/parser.jl | 1 + 4 files changed, 47 insertions(+), 7 deletions(-) diff --git a/JuliaSyntax/src/expr.jl b/JuliaSyntax/src/expr.jl index 474cfe8baa277..d4cb9f8262b58 100644 --- a/JuliaSyntax/src/expr.jl +++ b/JuliaSyntax/src/expr.jl @@ -138,7 +138,8 @@ function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, eq_to_kw_in_call = ((headsym === :call || headsym === :dotcall) && is_prefix_call(node)) || headsym === :ref - eq_to_kw_all = headsym === :parameters && !map_kw_in_params + eq_to_kw_all = (headsym === :parameters && !map_kw_in_params) || + (headsym === :parens && eq_to_kw) in_vcbr = headsym === :vect || headsym === :curly || headsym === :braces || headsym === :ref if insert_linenums && isempty(node_args) push!(args, source_location(LineNumberNode, node.source, node.position)) @@ -205,7 +206,15 @@ function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, reorder_parameters!(args, 2) elseif headsym === :parens # parens are used for grouping and don't appear in the Expr AST - return only(args) + if length(args) == 1 + return args[1] + else + # This case should only occur when there's an error inside the + # parens, and we've passed ignore_errors=true to the parser. + # Wrap in a block to preserve both the value and the error. + @check all(Meta.isexpr(args[j], :error) for j in 2:length(args)) + return Expr(:block, args...) + end elseif headsym in (:try, :try_finally_catch) # Try children in source order: # try_block catch_var catch_block else_block finally_block @@ -329,9 +338,10 @@ function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, imports = args end for imp in imports - for i = 1:length(imp.args) - if imp.args[i] isa QuoteNode - imp.args[i] = imp.args[i].value + imp_path = Meta.isexpr(imp, :as) ? imp.args[1].args : imp.args + for i = 1:length(imp_path) + if imp_path[i] isa QuoteNode + imp_path[i] = imp_path[i].value end end end diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 4b1536b8219c9..7aba62e3bf7c7 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -2387,13 +2387,22 @@ function parse_atsym(ps::ParseState, allow_quotes=true) end parse_unary_prefix(ps) pos = position(ps) - while peek_behind(ps, pos).kind == K"parens" + warn_parens = false + if peek_behind(ps, pos).kind == K"parens" # import A.(:+) ==> (import (. A (parens (quote +)))) pos = first_child_position(ps, pos) - emit_diagnostic(ps, mark, warning="parentheses are not required here") + warn_parens = true end if allow_quotes && peek_behind(ps, pos).kind == K"quote" pos = first_child_position(ps, pos) + if peek_behind(ps, pos).kind == K"parens" + # import A.:(+) ==> (import (. A (quote (parens +)))) + pos = first_child_position(ps, pos) + warn_parens = true + end + end + if warn_parens + emit_diagnostic(ps, mark, warning="parentheses are not required here") end b = peek_behind(ps, pos) ok = (b.is_leaf && (b.kind == K"Identifier" || is_operator(b.kind))) || diff --git a/JuliaSyntax/test/expr.jl b/JuliaSyntax/test/expr.jl index c0004c051bc8b..8a39649e90a0e 100644 --- a/JuliaSyntax/test/expr.jl +++ b/JuliaSyntax/test/expr.jl @@ -230,6 +230,15 @@ # dotted = is not :kw @test parse(Expr, "f(a .= 1)") == Expr(:call, :f, Expr(:.=, :a, 1)) + + # = inside parens in calls and tuples + # (TODO: we should warn for these cases.) + @test parse(Expr, "f(((a = 1)))") == + Expr(:call, :f, Expr(:kw, :a, 1)) + @test parse(Expr, "(((a = 1)),)") == + Expr(:tuple, Expr(:(=), :a, 1)) + @test parse(Expr, "(;((a = 1)),)") == + Expr(:tuple, Expr(:parameters, Expr(:kw, :a, 1))) end @testset "dotcall" begin @@ -334,10 +343,21 @@ @test parseall(Expr, "a b", ignore_errors=true) == Expr(:toplevel, LineNumberNode(1), :a, LineNumberNode(1), Expr(:error, :b)) + @test parse(Expr, "(x", ignore_errors=true) == + Expr(:block, :x, Expr(:error)) end @testset "import" begin @test parse(Expr, "import A.(:b).:c: x.:z", ignore_warnings=true) == Expr(:import, Expr(Symbol(":"), Expr(:., :A, :b, :c), Expr(:., :x, :z))) + # Stupid parens and quotes in import paths + @test parse(Expr, "import A.:+", ignore_warnings=true) == + Expr(:import, Expr(:., :A, :+)) + @test parse(Expr, "import A.(:+)", ignore_warnings=true) == + Expr(:import, Expr(:., :A, :+)) + @test parse(Expr, "import A.:(+)", ignore_warnings=true) == + Expr(:import, Expr(:., :A, :+)) + @test parse(Expr, "import A.:(+) as y", ignore_warnings=true, version=v"1.6") == + Expr(:import, Expr(:as, Expr(:., :A, :+), :y)) end end diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 636e4cf0eb6ab..fd8f46aa25c1c 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -651,6 +651,7 @@ tests = [ "import A.B.C" => "(import (. A B C))" "import A.:+" => "(import (. A (quote +)))" "import A.(:+)"=> "(import (. A (parens (quote +))))" + "import A.:(+)" => "(import (. A (quote (parens +))))" "import A.==" => "(import (. A ==))" "import A.⋆.f" => "(import (. A ⋆ f))" "import A..." => "(import (. A ..))" From 364b3b4692f1ec195ce9f010cf12ee78da8b4fe7 Mon Sep 17 00:00:00 2001 From: c42f Date: Fri, 24 Mar 2023 07:13:02 +1000 Subject: [PATCH 0615/1109] Automatically reduce test failures General registry (JuliaLang/JuliaSyntax.jl#229) --- JuliaSyntax/test/fuzz_test.jl | 8 ++--- JuliaSyntax/tools/check_all_packages.jl | 39 +++++++++++++++++++++++-- 2 files changed, 40 insertions(+), 7 deletions(-) diff --git a/JuliaSyntax/test/fuzz_test.jl b/JuliaSyntax/test/fuzz_test.jl index 4df943b819e61..2bf2b5bf5d092 100644 --- a/JuliaSyntax/test/fuzz_test.jl +++ b/JuliaSyntax/test/fuzz_test.jl @@ -900,16 +900,16 @@ Reduce test case via combination of bisection and random deletion. This is suited to randomly generated strings, but it's surprisingly effective for code-like strings as well. """ -function rand_reduce(str) +function rand_reduce(str, parse_failure=parser_throws_exception) while true if length(str) <= 1 return str end m1 = thisind(str, length(str)÷2) m2 = nextind(str, m1) - if parser_throws_exception(str[1:m1]) + if parse_failure(str[1:m1]) str = str[1:m1] - elseif parser_throws_exception(str[m2:end]) + elseif parse_failure(str[m2:end]) str = str[m2:end] else chunklen = clamp(length(str)÷10, 1, 10) @@ -917,7 +917,7 @@ function rand_reduce(str) for i = 1:100 m = thisind(str, rand(1:length(str)-chunklen)) s = str[1:m]*str[nextind(str, m+chunklen):end] - if parser_throws_exception(s) + if parse_failure(s) str = s reduced = true break diff --git a/JuliaSyntax/tools/check_all_packages.jl b/JuliaSyntax/tools/check_all_packages.jl index 8bd4170b25f77..81dae3ff575bc 100644 --- a/JuliaSyntax/tools/check_all_packages.jl +++ b/JuliaSyntax/tools/check_all_packages.jl @@ -6,6 +6,7 @@ using JuliaSyntax, Logging, TerminalLoggers, ProgressLogging, Serialization include("../test/test_utils.jl") +include("../test/fuzz_test.jl") pkgspath = joinpath(@__DIR__, "pkgs") source_paths = find_source_in_path(pkgspath) @@ -16,6 +17,24 @@ mismatch_count = 0 t0 = time() exceptions = [] +function parsers_disagree(text::AbstractString) + fl_ex = fl_parseall(text, filename="none") + if Meta.isexpr(fl_ex, (:error,:incomplete)) || + (Meta.isexpr(fl_ex, :toplevel) && length(fl_ex.args) >= 1 && + Meta.isexpr(fl_ex.args[end], (:error,:incomplete))) + return false + end + try + ex = parseall(Expr, text, filename="none", ignore_errors=true) + return !exprs_roughly_equal(fl_ex, ex) + catch + @error "Reduction failed" text + return false + end +end + +all_reduced_failures = String[] + Logging.with_logger(TerminalLogger()) do global exception_count, mismatch_count, t0 @withprogress for (ifile, fpath) in enumerate(source_paths) @@ -29,16 +48,19 @@ Logging.with_logger(TerminalLogger()) do e1 = JuliaSyntax.parseall(Expr, text, filename=fpath, ignore_warnings=true) if !exprs_roughly_equal(e2, e1) mismatch_count += 1 - reduced_chunks = sprint(context=:color=>true) do io + failing_source = sprint(context=:color=>true) do io for c in reduce_test(text) JuliaSyntax.highlight(io, c.source, range(c), context_lines_inner=5) println(io, "\n") end end + reduced_failures = rand_reduce.(sourcetext.(reduce_test(text)), + parsers_disagree) + append!(all_reduced_failures, reduced_failures) @error("Parsers succeed but disagree", fpath, - reduced_chunks=Text(reduced_chunks), - # diff=Text(sprint(show_expr_text_diff, show, e1, e2)), + failing_source=Text(failing_source), + reduced_failures, ) end catch err @@ -75,3 +97,14 @@ println() $(exception_count) failures compared to reference parser $(mismatch_count) Expr mismatches $(t_avg)ms per file""" + +open(joinpath(@__DIR__, "reduced_failures.jl"), write=true) do io + for str in all_reduced_failures + println(io, repr(str)) + end + for str in all_reduced_failures + println(io, "#------------------------------") + println(io, str) + println(io) + end +end From 56e57dbb534bd5bd7e821ea94258884b54cdc147 Mon Sep 17 00:00:00 2001 From: ndinsmore <45537276+ndinsmore@users.noreply.github.com> Date: Fri, 24 Mar 2023 21:56:22 -0400 Subject: [PATCH 0616/1109] Add isascii to normalize_identifier for 25% reduction in parse time (JuliaLang/JuliaSyntax.jl#228) --- JuliaSyntax/src/literal_parsing.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaSyntax/src/literal_parsing.jl b/JuliaSyntax/src/literal_parsing.jl index 2a58d425bba53..b48e96d39dcf6 100644 --- a/JuliaSyntax/src/literal_parsing.jl +++ b/JuliaSyntax/src/literal_parsing.jl @@ -353,5 +353,5 @@ end function normalize_identifier(str) flags = Base.Unicode.UTF8PROC_STABLE | Base.Unicode.UTF8PROC_COMPOSE - utf8proc_map(str, flags) + return isascii(str) ? str : utf8proc_map(str, flags) end From 15c35c6e8b6d067986605d8cada1f0db454a34ae Mon Sep 17 00:00:00 2001 From: c42f Date: Mon, 27 Mar 2023 12:30:45 +1000 Subject: [PATCH 0617/1109] Fix highlighting of empty source ranges (JuliaLang/JuliaSyntax.jl#231) Highlighting of empty ranges needs special handling at the ends of lines as the source line of the end of the range can be less than the source line at the start of the range. --- JuliaSyntax/src/source_files.jl | 9 +++++---- JuliaSyntax/test/source_files.jl | 13 ++++++++++++- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/JuliaSyntax/src/source_files.jl b/JuliaSyntax/src/source_files.jl index d2903a5b62b0c..40214f3767a33 100644 --- a/JuliaSyntax/src/source_files.jl +++ b/JuliaSyntax/src/source_files.jl @@ -183,17 +183,18 @@ function highlight(io::IO, source::SourceFile, range::UnitRange; context_lines_before=context_lines_before, context_lines_after=context_lines_inner) a,b = source_line_range(source, p) - c,d = source_line_range(source, q) - z,w = source_line_range(source, q; + q1 = max(q, p) # Ignore q for empty ranges + c,d = source_line_range(source, q1) + z,w = source_line_range(source, q1; context_lines_before=context_lines_inner, context_lines_after=context_lines_after) p_line = source_line(source, p) - q_line = source_line(source, q) + q_line = source_line(source, q) marker_line_color = :light_black - if p_line == q_line + if p_line >= q_line # x----------------- # a---p-------q----b # # └───────┘ ── note diff --git a/JuliaSyntax/test/source_files.jl b/JuliaSyntax/test/source_files.jl index 72455dfcf6acf..1dec5ffd403e7 100644 --- a/JuliaSyntax/test/source_files.jl +++ b/JuliaSyntax/test/source_files.jl @@ -57,11 +57,22 @@ end αβγδ +-*/""") + # Empty ranges + @test sprint(highlight, src, 1:0) == "abcd\n└\nαβγδ\n+-*/" + @test sprint(highlight, src, 2:1) == "abcd\n#└\nαβγδ\n+-*/" + @test sprint(highlight, src, 3:2) == "abcd\n# └\nαβγδ\n+-*/" + @test sprint(highlight, src, 4:3) == "abcd\n# └\nαβγδ\n+-*/" + @test sprint(highlight, src, 5:4) == "abcd\n# └\nαβγδ\n+-*/" + @test sprint(highlight, src, 6:5) == "abcd\nαβγδ\n└\n+-*/" + @test sprint(highlight, src, 19:18) == "abcd\nαβγδ\n+-*/\n# └" + @test sprint(io->highlight(io, src, 1:0, context_lines_after=0, note="hi")) == + "abcd\n└ ── hi" + + # Single line ranges @test sprint(highlight, src, 1:4) == "abcd\n└──┘\nαβγδ\n+-*/" @test sprint(highlight, src, 2:4) == "abcd\n#└─┘\nαβγδ\n+-*/" @test sprint(highlight, src, 3:4) == "abcd\n# └┘\nαβγδ\n+-*/" @test sprint(highlight, src, 4:4) == "abcd\n# ╙\nαβγδ\n+-*/" - @test sprint(highlight, src, 4:3) == "abcd\n# └\nαβγδ\n+-*/" @test sprint(highlight, src, 5:5) == "abcd\n# └\nαβγδ\n+-*/" # multi-byte chars From 7e3da1eedc113e988c4a66d81cbdec1c9e05c8bf Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Mon, 27 Mar 2023 22:29:16 +0200 Subject: [PATCH 0618/1109] fix parsing `'\\` to not error in the validate token step (JuliaLang/JuliaSyntax.jl#233) Co-authored-by: c42f --- JuliaSyntax/src/literal_parsing.jl | 3 +++ JuliaSyntax/test/diagnostics.jl | 4 ++++ JuliaSyntax/test/parser_api.jl | 6 ++++++ 3 files changed, 13 insertions(+) diff --git a/JuliaSyntax/src/literal_parsing.jl b/JuliaSyntax/src/literal_parsing.jl index b48e96d39dcf6..d1c5c40b559ba 100644 --- a/JuliaSyntax/src/literal_parsing.jl +++ b/JuliaSyntax/src/literal_parsing.jl @@ -243,6 +243,9 @@ function unescape_julia_string(io::IO, str::AbstractString, escstart = i i += 1 if i >= endind + emit_diagnostic(diagnostics, escstart:endind-1, + error="invalid escape sequence") + had_error = true break end c = str[i] diff --git a/JuliaSyntax/test/diagnostics.jl b/JuliaSyntax/test/diagnostics.jl index 6671b2da89e77..1c6de0a6af499 100644 --- a/JuliaSyntax/test/diagnostics.jl +++ b/JuliaSyntax/test/diagnostics.jl @@ -75,6 +75,10 @@ end Diagnostic(6, 9, :error, "invalid octal escape sequence") @test diagnostic("x = '\\k'") == Diagnostic(6, 7, :error, "invalid escape sequence") + @test diagnostic("'\\", allow_multiple=true) == [ + Diagnostic(2, 2, :error, "invalid escape sequence"), + Diagnostic(3, 2, :error, "unterminated character literal") + ] # String @test diagnostic("x = \"abc\\xq\"") == diff --git a/JuliaSyntax/test/parser_api.jl b/JuliaSyntax/test/parser_api.jl index 719af8bb7f738..18b9a3a8a0d1a 100644 --- a/JuliaSyntax/test/parser_api.jl +++ b/JuliaSyntax/test/parser_api.jl @@ -160,4 +160,10 @@ tokensplit(str) = [kind(tok) => untokenize(tok, str) for tok in tokenize(str)] ] @test kind(JuliaSyntax.Token()) == K"None" + + @test tokensplit("'\\") == [ + K"'" => "'", + K"ErrorInvalidEscapeSequence" => "\\", + K"error" => "" + ] end From f2b7101bebc811d70153b1f22ffe35395b12b217 Mon Sep 17 00:00:00 2001 From: c42f Date: Fri, 31 Mar 2023 14:41:11 +1000 Subject: [PATCH 0619/1109] Add parens node in special cases `:(=)` and `:(::)` (JuliaLang/JuliaSyntax.jl#237) --- JuliaSyntax/src/parser.jl | 46 +++++++++++++++++--------------------- JuliaSyntax/test/parser.jl | 5 +++-- 2 files changed, 24 insertions(+), 27 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 7aba62e3bf7c7..a70cc32cf013c 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -3023,18 +3023,17 @@ function parse_paren(ps::ParseState, check_identifiers=true) emit(ps, mark, K"tuple", PARENS_FLAG) elseif is_syntactic_operator(k) # allow :(=) etc in unchecked contexts, eg quotes - # :(=) ==> (quote =) - if check_identifiers && !is_valid_identifier(k) - bump(ps, error="invalid identifier") - else - bump(ps) - end + # :(=) ==> (quote (parens =)) + parse_atom(ps, check_identifiers) bump_closing_token(ps, K")") - elseif !check_identifiers && k == K"::" && peek(ps, 2, skip_newlines=true) == K")" + emit(ps, mark, K"parens") + elseif !check_identifiers && k == K"::" && + peek(ps, 2, skip_newlines=true) == K")" # allow :(::) as a special case - # :(::) ==> (quote ::) + # :(::) ==> (quote (parens ::)) bump(ps) bump(ps, TRIVIA_FLAG, skip_newlines=true) + emit(ps, mark, K"parens") else # Deal with all other cases of tuple or block syntax via the generic # parse_brackets @@ -3393,7 +3392,11 @@ function parse_atom(ps::ParseState, check_identifiers=true) mark = position(ps) leading_kind = peek(ps) # todo: Reorder to put most likely tokens first? - if leading_kind == K"'" + if is_error(leading_kind) + # Errors for bad tokens are emitted in validate_tokens() rather than + # here. + bump(ps) + elseif leading_kind == K"'" # char literal bump(ps, TRIVIA_FLAG) k = peek(ps) @@ -3436,19 +3439,16 @@ function parse_atom(ps::ParseState, check_identifiers=true) # :\nfoo ==> (quote (error-t) foo) bump_trivia(ps, TRIVIA_FLAG, skip_newlines=true, error="whitespace not allowed after `:` used for quoting") - # Heuristic recovery - bump(ps) - else - # Being inside quote makes keywords into identifiers at the - # first level of nesting - # :end ==> (quote end) - # :(end) ==> (quote (parens (error-t))) - # Being inside quote makes end non-special again (issue #27690) - # a[:(end)] ==> (ref a (quote (error-t end))) - parse_atom(ParseState(ps, end_symbol=false), false) end + # Being inside quote makes keywords into identifiers at the + # first level of nesting + # :end ==> (quote end) + # :(end) ==> (quote (parens (error-t))) + # Being inside quote makes end non-special again (issue #27690) + # a[:(end)] ==> (ref a (quote (error-t end))) + parse_atom(ParseState(ps, end_symbol=false), false) emit(ps, mark, K"quote") - elseif leading_kind == K"=" && is_plain_equals(peek_token(ps)) + elseif check_identifiers && leading_kind == K"=" && is_plain_equals(peek_token(ps)) # = ==> (error =) bump(ps, error="unexpected `=`") elseif leading_kind == K"Identifier" @@ -3456,7 +3456,7 @@ function parse_atom(ps::ParseState, check_identifiers=true) # x₁ ==> x₁ bump(ps) elseif is_operator(leading_kind) - if check_identifiers && is_syntactic_operator(leading_kind) + if check_identifiers && !is_valid_identifier(leading_kind) # += ==> (error +=) # .+= ==> (error .+=) bump(ps, error="invalid identifier") @@ -3558,10 +3558,6 @@ function parse_atom(ps::ParseState, check_identifiers=true) "premature end of input" : "unexpected closing token" bump_invisible(ps, K"error", error=msg) - elseif is_error(leading_kind) - # Errors for bad tokens are emitted in validate_tokens() rather than - # here. - bump(ps) else bump(ps, error="invalid syntax atom") end diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index fd8f46aa25c1c..4364613cade72 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -783,8 +783,9 @@ tests = [ "[x=1, y=2]" => "(vect (= x 1) (= y 2))" "[x=1, ; y=2]" => "(vect (= x 1) (parameters (= y 2)))" # parse_paren - ":(=)" => "(quote =)" - ":(::)" => "(quote ::)" + ":(=)" => "(quote (parens =))" + ":(::)" => "(quote (parens ::))" + ":(::\n)" => "(quote (parens ::))" "(function f \n end)" => "(parens (function f))" # braces "{x y}" => "(bracescat (row x y))" From cf09d28eb4c4e7e1faead209d92b4c15eab3f2d6 Mon Sep 17 00:00:00 2001 From: c42f Date: Fri, 31 Mar 2023 14:51:48 +1000 Subject: [PATCH 0620/1109] Simplify child layout for `try` (JuliaLang/JuliaSyntax.jl#234) The child layout of try-catch-else-finally is awkward because several of the subclauses are optional. Particularly this has led to problems for `else` in `Expr` which needed to be tacked onto the end for compatibility. This change clarifies the situation a bit and makes it more future proof by wrapping the subclauses in their own expression heads. --- JuliaSyntax/src/expr.jl | 49 ++++++++++++++++++-------------- JuliaSyntax/src/kinds.jl | 2 -- JuliaSyntax/src/parser.jl | 52 ++++++++++++++-------------------- JuliaSyntax/test/expr.jl | 13 +++++++++ JuliaSyntax/test/parser.jl | 29 +++++++++---------- JuliaSyntax/test/parser_api.jl | 2 +- 6 files changed, 77 insertions(+), 70 deletions(-) diff --git a/JuliaSyntax/src/expr.jl b/JuliaSyntax/src/expr.jl index d4cb9f8262b58..3f502723392ab 100644 --- a/JuliaSyntax/src/expr.jl +++ b/JuliaSyntax/src/expr.jl @@ -53,9 +53,9 @@ function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, # TODO: Get non-token error messages in here as well, somehow? # There's an awkward mismatch between the out-of-tree # `Vector{Diagnostic}` vs Expr(:error) being part of the tree. - return Expr(:error, - "$(_token_error_descriptions[nodekind]): `$(sourcetext(node))`" - ) + return nodekind == K"error" ? + Expr(:error) : + Expr(:error, "$(_token_error_descriptions[nodekind]): `$(sourcetext(node))`") else return val end @@ -215,33 +215,38 @@ function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, @check all(Meta.isexpr(args[j], :error) for j in 2:length(args)) return Expr(:block, args...) end - elseif headsym in (:try, :try_finally_catch) + elseif headsym === :try # Try children in source order: # try_block catch_var catch_block else_block finally_block # Expr ordering: # try_block catch_var catch_block [finally_block] [else_block] - catch_ = nothing - if headsym === :try_finally_catch - catch_ = pop!(args) - catch_var = pop!(args) - end - finally_ = pop!(args) - else_ = pop!(args) - if headsym === :try_finally_catch - pop!(args) - pop!(args) - push!(args, catch_var) - push!(args, catch_) + try_ = args[1] + catch_var = false + catch_ = false + else_ = false + finally_ = false + for i in 2:length(args) + a = args[i] + if Meta.isexpr(a, :catch) + catch_var = a.args[1] + catch_ = a.args[2] + elseif Meta.isexpr(a, :else) + else_ = only(a.args) + elseif Meta.isexpr(a, :finally) + finally_ = only(a.args) + elseif Meta.isexpr(a, :error) + finally_ = Expr(:block, a) # Unclear where to put this but here will do? + else + @check false "Illegal $a subclause in `try`" + end end - # At this point args is - # [try_block catch_var catch_block] + args = Any[try_, catch_var, catch_] if finally_ !== false || else_ !== false push!(args, finally_) + if else_ !== false + push!(args, else_) + end end - if else_ !== false - push!(args, else_) - end - headsym = :try elseif headsym === :filter pushfirst!(args, last(args)) pop!(args) diff --git a/JuliaSyntax/src/kinds.jl b/JuliaSyntax/src/kinds.jl index a9a34924b7a89..3374dde915f1c 100644 --- a/JuliaSyntax/src/kinds.jl +++ b/JuliaSyntax/src/kinds.jl @@ -907,8 +907,6 @@ const _kind_names = "flatten" "comprehension" "typed_comprehension" - # Special kind for compatibility with the ever-ugly try-finally-catch ordering - "try_finally_catch" "END_SYNTAX_KINDS" ] diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index a70cc32cf013c..7ca8b12b43e05 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -2201,61 +2201,51 @@ end # Parse a try block # -# try \n x \n catch e \n y \n finally \n z end ==> (try (block x) e (block y) false (block z)) -#v1.8: try \n x \n catch e \n y \n else z finally \n w end ==> (try (block x) e (block y) (block z) (block w)) +# try \n x \n catch e \n y \n finally \n z end ==> (try (block x) (catch e (block y)) (finally (block z))) +#v1.8: try \n x \n catch e \n y \n else z finally \n w end ==> (try (block x) (catch e (block y)) (else (block z)) (finally (block w))) # # flisp: embedded in parse_resword function parse_try(ps) - out_kind = K"try" mark = position(ps) bump(ps, TRIVIA_FLAG) parse_block(ps) has_catch = false - has_else = false has_finally = false bump_trivia(ps) - flags = EMPTY_FLAGS - bump_trivia(ps) if peek(ps) == K"catch" has_catch = true parse_catch(ps) - else - bump_invisible(ps, K"false") - bump_invisible(ps, K"false") end bump_trivia(ps) if peek(ps) == K"else" # catch-else syntax: https://github.com/JuliaLang/julia/pull/42211 # - #v1.8: try catch ; else end ==> (try (block) false (block) (block) false) - has_else = true + #v1.8: try catch ; else end ==> (try (block) (catch false (block)) (else (block))) else_mark = position(ps) bump(ps, TRIVIA_FLAG) parse_block(ps) if !has_catch - #v1.8: try else x finally y end ==> (try (block) false false (error (block x)) (block y)) + #v1.8: try else x finally y end ==> (try (block) (else (error (block x))) (finally (block y))) emit(ps, else_mark, K"error", error="Expected `catch` before `else`") end - #v1.7: try catch ; else end ==> (try (block) false (block) (error (block)) false) + #v1.7: try catch ; else end ==> (try (block) (catch false (block)) (else (error (block)))) min_supported_version(v"1.8", ps, else_mark, "`else` after `catch`") - else - bump_invisible(ps, K"false") + emit(ps, else_mark, K"else") end bump_trivia(ps) if peek(ps) == K"finally" - # try x finally y end ==> (try (block x) false false false (block y)) + finally_mark = position(ps) + # try x finally y end ==> (try (block x) (finally (block y))) has_finally = true bump(ps, TRIVIA_FLAG) parse_block(ps) - else - bump_invisible(ps, K"false") + emit(ps, finally_mark, K"finally") end # Wart: the flisp parser allows finally before catch, the *opposite* order # in which these blocks execute. bump_trivia(ps) if !has_catch && peek(ps) == K"catch" - # try x finally y catch e z end ==> (try_finally_catch (block x) false false false (block y) e (block z)) - out_kind = K"try_finally_catch" + # try x finally y catch e z end ==> (try (block x) (finally (block y)) (catch e (block z))) m = position(ps) parse_catch(ps) emit_diagnostic(ps, m, @@ -2263,35 +2253,37 @@ function parse_try(ps) end missing_recovery = !has_catch && !has_finally if missing_recovery - # try x end ==> (try (block x) false false false false (error-t)) + # try x end ==> (try (block x) (error-t)) bump_invisible(ps, K"error", TRIVIA_FLAG) end bump_closing_token(ps, K"end") - emit(ps, mark, out_kind, flags) + emit(ps, mark, K"try") if missing_recovery emit_diagnostic(ps, mark, error="try without catch or finally") end end function parse_catch(ps::ParseState) + mark = position(ps) bump(ps, TRIVIA_FLAG) k = peek(ps) if k in KSet"NewlineWs ;" || is_closing_token(ps, k) - # try x catch end ==> (try (block x) false (block) false false) - # try x catch ; y end ==> (try (block x) false (block y) false false) - # try x catch \n y end ==> (try (block x) false (block y) false false) + # try x catch end ==> (try (block x) (catch false (block))) + # try x catch ; y end ==> (try (block x) (catch false (block y))) + # try x catch \n y end ==> (try (block x) (catch false (block y))) bump_invisible(ps, K"false") else - # try x catch e y end ==> (try (block x) e (block y) false false) - # try x catch $e y end ==> (try (block x) ($ e) (block y) false false) - mark = position(ps) + # try x catch e y end ==> (try (block x) (catch e (block y))) + # try x catch $e y end ==> (try (block x) (catch ($ e) (block y))) + m = position(ps) parse_eq_star(ps) if !(peek_behind(ps).kind in KSet"Identifier $") - # try x catch e+3 y end ==> (try (block x) (error (call-i e + 3)) (block y) false false) - emit(ps, mark, K"error", error="a variable name is expected after `catch`") + # try x catch e+3 y end ==> (try (block x) (catch (error (call-i e + 3)) (block y))) + emit(ps, m, K"error", error="a variable name is expected after `catch`") end end parse_block(ps) + emit(ps, mark, K"catch") end # flisp: parse-do diff --git a/JuliaSyntax/test/expr.jl b/JuliaSyntax/test/expr.jl index 8a39649e90a0e..39d7edd44e531 100644 --- a/JuliaSyntax/test/expr.jl +++ b/JuliaSyntax/test/expr.jl @@ -303,6 +303,19 @@ Expr(:block, LineNumberNode(1), :y), Expr(:block, LineNumberNode(1), :w), Expr(:block, LineNumberNode(1), :z)) + # finally before catch + @test parse(Expr, "try x finally y catch e z end", ignore_warnings=true) == + Expr(:try, + Expr(:block, LineNumberNode(1), :x), + :e, + Expr(:block, LineNumberNode(1), :z), + Expr(:block, LineNumberNode(1), :y)) + # empty recovery + @test parse(Expr, "try x end", ignore_errors=true) == + Expr(:try, + Expr(:block, LineNumberNode(1), :x), + false, false, + Expr(:block, Expr(:error))) end @testset "juxtapose" begin diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 4364613cade72..8da911b6779a2 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -596,24 +596,23 @@ tests = [ ], JuliaSyntax.parse_try => [ "try \n x \n catch e \n y \n finally \n z end" => - "(try (block x) e (block y) false (block z))" + "(try (block x) (catch e (block y)) (finally (block z)))" ((v=v"1.8",), "try \n x \n catch e \n y \n else z finally \n w end") => - "(try (block x) e (block y) (block z) (block w))" - "try x catch end" => "(try (block x) false (block) false false)" - "try x catch ; y end" => "(try (block x) false (block y) false false)" - "try x catch \n y end" => "(try (block x) false (block y) false false)" - "try x catch e y end" => "(try (block x) e (block y) false false)" - "try x catch \$e y end" => "(try (block x) (\$ e) (block y) false false)" - "try x catch e+3 y end" => "(try (block x) (error (call-i e + 3)) (block y) false false)" - "try x finally y end" => "(try (block x) false false false (block y))" + "(try (block x) (catch e (block y)) (else (block z)) (finally (block w)))" + "try x catch end" => "(try (block x) (catch false (block)))" + "try x catch ; y end" => "(try (block x) (catch false (block y)))" + "try x catch \n y end" => "(try (block x) (catch false (block y)))" + "try x catch e y end" => "(try (block x) (catch e (block y)))" + "try x catch \$e y end" => "(try (block x) (catch (\$ e) (block y)))" + "try x catch e+3 y end" => "(try (block x) (catch (error (call-i e + 3)) (block y)))" + "try x finally y end" => "(try (block x) (finally (block y)))" # v1.8 only - ((v=v"1.8",), "try catch ; else end") => "(try (block) false (block) (block) false)" - ((v=v"1.8",), "try else x finally y end") => "(try (block) false false (error (block x)) (block y))" - ((v=v"1.7",), "try catch ; else end") => "(try (block) false (block) (error (block)) false)" + ((v=v"1.8",), "try catch ; else end") => "(try (block) (catch false (block)) (else (block)))" + ((v=v"1.8",), "try else x finally y end") => "(try (block) (else (error (block x))) (finally (block y)))" + ((v=v"1.7",), "try catch ; else end") => "(try (block) (catch false (block)) (else (error (block))))" # finally before catch :-( - "try x finally y catch e z end" => "(try_finally_catch (block x) false false false (block y) e (block z))" => - Expr(:try, Expr(:block, :x), :e, Expr(:block, :z), Expr(:block, :y)) - "try x end" => "(try (block x) false false false false (error-t))" + "try x finally y catch e z end" => "(try (block x) (finally (block y)) (catch e (block z)))" + "try x end" => "(try (block x) (error-t))" ], JuliaSyntax.parse_imports => [ "import A as B: x" => "(import (: (error (as (. A) B)) (. x)))" diff --git a/JuliaSyntax/test/parser_api.jl b/JuliaSyntax/test/parser_api.jl index 18b9a3a8a0d1a..f237f7aca776a 100644 --- a/JuliaSyntax/test/parser_api.jl +++ b/JuliaSyntax/test/parser_api.jl @@ -81,7 +81,7 @@ parseshow(s;kws...) = sprint(show, MIME("text/x.sexpression"), parse(SyntaxNode, s; kws...)) @test_throws JuliaSyntax.ParseError parseshow("try finally catch ex end") @test parseshow("try finally catch ex end", ignore_warnings=true) == - "(try_finally_catch (block) false false false (block) ex (block))" + "(try (block) (finally (block)) (catch ex (block)))" # ignore_errors @test_throws JuliaSyntax.ParseError parseshow("[a; b, c]") @test_throws JuliaSyntax.ParseError parseshow("[a; b, c]", ignore_warnings=true) From cf73f59147a2222f4c7f765e1806b5209ee8dd5f Mon Sep 17 00:00:00 2001 From: c42f Date: Fri, 31 Mar 2023 17:55:51 +1000 Subject: [PATCH 0621/1109] Add note on getting involved to the README MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit People keep asking me about this 😅 --- JuliaSyntax/README.md | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index c968b91613d42..28d902ea556a3 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -1205,3 +1205,22 @@ indentation from the syntax tree? Source formatting involves a big pile of heuristics to get something which "looks nice"... and ML systems have become very good at heuristics. Also, we've got huge piles of training data — just choose some high quality, tastefully hand-formatted libraries. + +# Getting involved + +For people who want to help improve Julia's error messages by contributing to +JuliaSyntax, I'd suggest looking through the issue list at +https://github.com/JuliaLang/JuliaSyntax.jl/issues and choosing a small issue +or two to work on to familiarize yourself with the code. Anything marked with +the labels `intro issue` or `bug` might be a good place to start. + +Also watching the 2022 JuliaCon talk and reading this document is probably good +for an overview. + +As of March 2023, we've got really good positional tracking within the source, +but JuliaSyntax really needs a better system for parser recovery before the +errors are really nice. This requires some research. For example, you could +read up on how rust-analyzer does recovery, or rslint - both these are +event-based recursive decent parsers with similar structure to JuliaSyntax +(though in Rust). I also want to investigate whether we can do data-driven +parser recovery using an ML technique. But again, this is a research project. From c28abcd56c3c0564114f21a6d9b3840a9269c6a9 Mon Sep 17 00:00:00 2001 From: c42f Date: Fri, 7 Apr 2023 13:51:55 +1000 Subject: [PATCH 0622/1109] API updates: `parse()` -> `parsestmt()` + exports + `parse!(rule=:all)` `JuliaSyntax.parse()` clashes with `Base.parse()` and it's really not clear whether `parse()` should parse a single statement or a whole file top-level. Having a less generic name `parsestmt()` helps with this. Using the name `parsestmt()` is explicit about the fact that this parses a single statement (not an "expression" which is ambiguous given every Julia construct is an expression). It's also consistent with the naming rules of `parseall()` and `parseatom()`, which already appear in Base as `Meta.parseall` and `Meta.parseatom`. Change to using `rule=:all` in the `parse!()` API rather than `rule=:toplevel` because this is most consistent with the choices made in the `Core._parser` interface and the naming of `parseall()`. Also add a conservative list of exports that I expect "people are likely to use", and which seem to be required as the main part of the API. There's more to the API than this, but shoving that all into the user's namespace doesn't seem ideal. Especially the parts which are less certain. --- JuliaSyntax/README.md | 51 ++++++----- JuliaSyntax/src/JuliaSyntax.jl | 19 ++++ JuliaSyntax/src/hooks.jl | 11 +-- JuliaSyntax/src/parser_api.jl | 60 +++++++----- JuliaSyntax/src/source_files.jl | 8 +- JuliaSyntax/test/benchmark.jl | 4 +- JuliaSyntax/test/expr.jl | 158 ++++++++++++++++---------------- JuliaSyntax/test/parser_api.jl | 36 +++++--- JuliaSyntax/test/syntax_tree.jl | 12 +-- JuliaSyntax/test/test_utils.jl | 4 +- 10 files changed, 202 insertions(+), 161 deletions(-) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index 28d902ea556a3..b84f91ce7a9ed 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -50,15 +50,16 @@ First, a source-ordered AST with `SyntaxNode` (`call-i` in the dump here means the `call` has the infix `-i` flag): ```julia -julia> using JuliaSyntax: JuliaSyntax, SyntaxNode, GreenNode +julia> using JuliaSyntax -julia> JuliaSyntax.parse(SyntaxNode, "(x + y)*z", filename="foo.jl") +julia> parsestmt(SyntaxNode, "(x + y)*z", filename="foo.jl") line:col│ tree │ file_name 1:1 │[call-i] │foo.jl - 1:2 │ [call-i] - 1:2 │ x - 1:4 │ + - 1:6 │ y + 1:1 │ [parens] + 1:2 │ [call-i] + 1:2 │ x + 1:4 │ + + 1:6 │ y 1:8 │ * 1:9 │ z ``` @@ -71,16 +72,17 @@ representation, despite being important for parsing. ```julia julia> text = "(x + y)*z" - greentree = JuliaSyntax.parse(GreenNode, text) + greentree = parsestmt(JuliaSyntax.GreenNode, text) 1:9 │[call] - 1:1 │ ( - 2:6 │ [call] - 2:2 │ Identifier ✔ - 3:3 │ Whitespace - 4:4 │ + ✔ - 5:5 │ Whitespace - 6:6 │ Identifier ✔ - 7:7 │ ) + 1:7 │ [parens] + 1:1 │ ( + 2:6 │ [call] + 2:2 │ Identifier ✔ + 3:3 │ Whitespace + 4:4 │ + ✔ + 5:5 │ Whitespace + 6:6 │ Identifier ✔ + 7:7 │ ) 8:8 │ * ✔ 9:9 │ Identifier ✔ ``` @@ -91,14 +93,15 @@ supplying the source text string: ```julia julia> show(stdout, MIME"text/plain"(), greentree, text) 1:9 │[call] - 1:1 │ ( "(" - 2:6 │ [call] - 2:2 │ Identifier ✔ "x" - 3:3 │ Whitespace " " - 4:4 │ + ✔ "+" - 5:5 │ Whitespace " " - 6:6 │ Identifier ✔ "y" - 7:7 │ ) ")" + 1:7 │ [parens] + 1:1 │ ( "(" + 2:6 │ [call] + 2:2 │ Identifier ✔ "x" + 3:3 │ Whitespace " " + 4:4 │ + ✔ "+" + 5:5 │ Whitespace " " + 6:6 │ Identifier ✔ "y" + 7:7 │ ) ")" 8:8 │ * ✔ "*" 9:9 │ Identifier ✔ "z" ``` @@ -106,7 +109,7 @@ julia> show(stdout, MIME"text/plain"(), greentree, text) Julia `Expr` can also be produced: ```julia -julia> JuliaSyntax.parse(Expr, "(x + y)*z") +julia> JuliaSyntax.parsestmt(Expr, "(x + y)*z") :((x + y) * z) ``` diff --git a/JuliaSyntax/src/JuliaSyntax.jl b/JuliaSyntax/src/JuliaSyntax.jl index 76b686759488b..3f1ad27a0434a 100644 --- a/JuliaSyntax/src/JuliaSyntax.jl +++ b/JuliaSyntax/src/JuliaSyntax.jl @@ -1,5 +1,23 @@ module JuliaSyntax +# Conservative list of exports - only export the most common/useful things +# here. + +# Parsing. See also +# parse!(), ParseStream +export parsestmt, parseall, parseatom +# Tokenization +export tokenize, Token, untokenize +# Source file handling. See also +# highlight() sourcetext() source_line() source_location() +export SourceFile +# Expression heads/kinds. See also +# flags() and related predicates. +export @K_str, kind, head +# Syntax tree types. See also +# GreenNode +export SyntaxNode + # Helper utilities include("utils.jl") @@ -26,4 +44,5 @@ include("expr.jl") # Hooks to integrate the parser with Base include("hooks.jl") include("precompile.jl") + end diff --git a/JuliaSyntax/src/hooks.jl b/JuliaSyntax/src/hooks.jl index 47d7dc47c1eeb..6805bb11b75a4 100644 --- a/JuliaSyntax/src/hooks.jl +++ b/JuliaSyntax/src/hooks.jl @@ -122,7 +122,7 @@ end # Debug log file for dumping parsed code const _debug_log = Ref{Union{Nothing,IO}}(nothing) -function _core_parser_hook(code, filename, lineno, offset, options) +function _core_parser_hook(code, filename::String, lineno::Int, offset::Int, options::Symbol) try # TODO: Check that we do all this input wrangling without copying the # code buffer @@ -144,8 +144,7 @@ function _core_parser_hook(code, filename, lineno, offset, options) seek(io, offset) stream = ParseStream(io) - rule = options === :all ? :toplevel : options - if rule === :statement || rule === :atom + if options === :statement || options === :atom # To copy the flisp parser driver: # * Parsing atoms consumes leading trivia # * Parsing statements consumes leading+trailing trivia @@ -157,8 +156,8 @@ function _core_parser_hook(code, filename, lineno, offset, options) return Core.svec(nothing, last_byte(stream)) end end - parse!(stream; rule=rule) - if rule === :statement + parse!(stream; rule=options) + if options === :statement bump_trivia(stream) end @@ -342,7 +341,7 @@ function _fl_parse_string(text::AbstractString, filename::AbstractString, ex, offset+1 end -# Convenience functions to mirror `JuliaSyntax.parse(Expr, ...)` in simple cases. +# Convenience functions to mirror `JuliaSyntax.parsestmt(Expr, ...)` in simple cases. fl_parse(::Type{Expr}, args...; kws...) = fl_parse(args...; kws...) fl_parseall(::Type{Expr}, args...; kws...) = fl_parseall(args...; kws...) diff --git a/JuliaSyntax/src/parser_api.jl b/JuliaSyntax/src/parser_api.jl index a22a6a2578610..bc45d22ecab9f 100644 --- a/JuliaSyntax/src/parser_api.jl +++ b/JuliaSyntax/src/parser_api.jl @@ -28,21 +28,25 @@ Base.display_error(io::IO, err::ParseError, bt) = Base.showerror(io, err, bt) """ - parse!(stream::ParseStream; rule=:toplevel) + parse!(stream::ParseStream; rule=:all) Parse Julia source code from a [`ParseStream`](@ref) object. Output tree data structures may be extracted from `stream` with the [`build_tree`](@ref) function. `rule` may be any of -* `:toplevel` (default) — parse a whole "file" of top level statements. In this +* `:all` (default) — parse a whole "file" of top level statements. In this mode, the parser expects to fully consume the input. * `:statement` — parse a single statement, or statements separated by semicolons. * `:atom` — parse a single syntax "atom": a literal, identifier, or parenthesized expression. """ -function parse!(stream::ParseStream; rule::Symbol=:toplevel) +function parse!(stream::ParseStream; rule::Symbol=:all) + if rule == :toplevel + Base.depwarn("Use of rule == :toplevel in parse!() is deprecated. use `rule=:all` instead.", :parse!) + rule = :all + end ps = ParseState(stream) - if rule === :toplevel + if rule === :all parse_toplevel(ps) elseif rule === :statement parse_stmts(ps) @@ -56,14 +60,14 @@ function parse!(stream::ParseStream; rule::Symbol=:toplevel) end """ - parse!(TreeType, io::IO; rule=:toplevel, version=VERSION) + parse!(TreeType, io::IO; rule=:all, version=VERSION) Parse Julia source code from a seekable `IO` object. The output is a tuple `(tree, diagnostics)`. When `parse!` returns, the stream `io` is positioned directly after the last byte which was consumed during parsing. """ function parse!(::Type{TreeType}, io::IO; - rule::Symbol=:toplevel, version=VERSION, kws...) where {TreeType} + rule::Symbol=:all, version=VERSION, kws...) where {TreeType} stream = ParseStream(io; version=version) parse!(stream; rule=rule) tree = build_tree(TreeType, stream; kws...) @@ -75,7 +79,7 @@ function _parse(rule::Symbol, need_eof::Bool, ::Type{T}, text, index=1; version= ignore_trivia=true, filename=nothing, first_line=1, ignore_errors=false, ignore_warnings=ignore_errors) where {T} stream = ParseStream(text, index; version=version) - if ignore_trivia && rule != :toplevel + if ignore_trivia && rule != :all bump_trivia(stream, skip_newlines=true) empty!(stream) end @@ -100,19 +104,22 @@ function _parse(rule::Symbol, need_eof::Bool, ::Type{T}, text, index=1; version= end _parse_docs = """ - parse(TreeType, text, [index]; - version=VERSION, - ignore_trivia=true, - filename=nothing, - ignore_errors=false, - ignore_warnings=ignore_errors) - - # Or, with the same arguments + # Parse a single expression/statement + parsestmt(TreeType, text, [index]; + version=VERSION, + ignore_trivia=true, + filename=nothing, + ignore_errors=false, + ignore_warnings=ignore_errors) + + # Parse all statements at top level (file scope) parseall(...) + + # Parse a single syntax atom parseatom(...) Parse Julia source code string `text` into a data structure of type `TreeType`. -`parse` parses a single Julia statement, `parseall` parses top level statements +`parsestmt` parses a single Julia statement, `parseall` parses top level statements at file scope and `parseatom` parses a single Julia identifier or other "syntax atom". @@ -136,16 +143,17 @@ parsing. To avoid exceptions due to warnings, use `ignore_warnings=true`. To also avoid exceptions due to errors, use `ignore_errors=true`. """ -parse(::Type{T}, text::AbstractString; kws...) where {T} = _parse(:statement, true, T, text; kws...)[1] -parseall(::Type{T}, text::AbstractString; kws...) where {T} = _parse(:toplevel, true, T, text; kws...)[1] -parseatom(::Type{T}, text::AbstractString; kws...) where {T} = _parse(:atom, true, T, text; kws...)[1] +"$_parse_docs" +parsestmt(::Type{T}, text::AbstractString; kws...) where {T} = _parse(:statement, true, T, text; kws...)[1] -@eval @doc $_parse_docs parse -@eval @doc $_parse_docs parseall -@eval @doc $_parse_docs parseatom +"$_parse_docs" +parseall(::Type{T}, text::AbstractString; kws...) where {T} = _parse(:all, true, T, text; kws...)[1] -parse(::Type{T}, text::AbstractString, index::Integer; kws...) where {T} = _parse(:statement, false, T, text, index; kws...) -parseall(::Type{T}, text::AbstractString, index::Integer; kws...) where {T} = _parse(:toplevel, false, T, text, index; kws...) +"$_parse_docs" +parseatom(::Type{T}, text::AbstractString; kws...) where {T} = _parse(:atom, true, T, text; kws...)[1] + +parsestmt(::Type{T}, text::AbstractString, index::Integer; kws...) where {T} = _parse(:statement, false, T, text, index; kws...) +parseall(::Type{T}, text::AbstractString, index::Integer; kws...) where {T} = _parse(:all, false, T, text, index; kws...) parseatom(::Type{T}, text::AbstractString, index::Integer; kws...) where {T} = _parse(:atom, false, T, text, index; kws...) #------------------------------------------------------------------------------- @@ -178,7 +186,7 @@ This interface works on UTF-8 encoded string or buffer data only. """ function tokenize(text) ps = ParseStream(text) - parse!(ps, rule=:toplevel) + parse!(ps, rule=:all) ts = ps.tokens output_tokens = Token[] for i = 2:length(ts) @@ -198,3 +206,5 @@ end function untokenize(token::Token, text::Vector{UInt8}) text[token.range] end + +@deprecate parse parsestmt diff --git a/JuliaSyntax/src/source_files.jl b/JuliaSyntax/src/source_files.jl index 40214f3767a33..203523e326ce5 100644 --- a/JuliaSyntax/src/source_files.jl +++ b/JuliaSyntax/src/source_files.jl @@ -36,18 +36,18 @@ function SourceFile(; filename, kwargs...) end # Get line number of the given byte within the code -function source_line_index(source::SourceFile, byte_index) +function _source_line_index(source::SourceFile, byte_index) lineidx = searchsortedlast(source.line_starts, byte_index) return (lineidx < lastindex(source.line_starts)) ? lineidx : lineidx-1 end _source_line(source::SourceFile, lineidx) = lineidx + source.first_line - 1 -source_line(source::SourceFile, byte_index) = _source_line(source, source_line_index(source, byte_index)) +source_line(source::SourceFile, byte_index) = _source_line(source, _source_line_index(source, byte_index)) """ Get line number and character within the line at the given byte index. """ function source_location(source::SourceFile, byte_index) - lineidx = source_line_index(source, byte_index) + lineidx = _source_line_index(source, byte_index) i = source.line_starts[lineidx] column = 1 while i < byte_index @@ -63,7 +63,7 @@ Get byte range of the source line at byte_index, buffered by """ function source_line_range(source::SourceFile, byte_index; context_lines_before=0, context_lines_after=0) - lineidx = source_line_index(source, byte_index) + lineidx = _source_line_index(source, byte_index) fbyte = source.line_starts[max(lineidx-context_lines_before, 1)] lbyte = source.line_starts[min(lineidx+1+context_lines_after, end)] - 1 fbyte,lbyte diff --git a/JuliaSyntax/test/benchmark.jl b/JuliaSyntax/test/benchmark.jl index b7dc4e0116f6c..0cae3e0714687 100644 --- a/JuliaSyntax/test/benchmark.jl +++ b/JuliaSyntax/test/benchmark.jl @@ -15,7 +15,7 @@ end all_base_code = concat_base() -b_ParseStream = @benchmark JuliaSyntax.parse!(JuliaSyntax.ParseStream(all_base_code), rule=:toplevel) +b_ParseStream = @benchmark JuliaSyntax.parse!(JuliaSyntax.ParseStream(all_base_code), rule=:all) b_GreenNode = @benchmark JuliaSyntax.parseall(JuliaSyntax.GreenNode, all_base_code) b_SyntaxNode = @benchmark JuliaSyntax.parseall(JuliaSyntax.SyntaxNode, all_base_code) b_Expr = @benchmark JuliaSyntax.parseall(Expr, all_base_code) @@ -30,5 +30,5 @@ b_Expr = @benchmark JuliaSyntax.parseall(Expr, all_base_code) # Allocs.clear() # stream = JuliaSyntax.ParseStream(text); # JuliaSyntax.peek(stream); -# Allocs.@profile sample_rate=1 JuliaSyntax.parse(stream) +# Allocs.@profile sample_rate=1 JuliaSyntax.parsestmt(stream) # PProf.Allocs.pprof() diff --git a/JuliaSyntax/test/expr.jl b/JuliaSyntax/test/expr.jl index 39d7edd44e531..31d466cccb660 100644 --- a/JuliaSyntax/test/expr.jl +++ b/JuliaSyntax/test/expr.jl @@ -10,7 +10,7 @@ @testset "Line numbers" begin @testset "Blocks" begin - @test parse(Expr, "begin a\nb\n\nc\nend") == + @test parsestmt(Expr, "begin a\nb\n\nc\nend") == Expr(:block, LineNumberNode(1), :a, @@ -19,7 +19,7 @@ LineNumberNode(4), :c, ) - @test parse(Expr, "begin end") == + @test parsestmt(Expr, "begin end") == Expr(:block, LineNumberNode(1) ) @@ -32,7 +32,7 @@ :b, ) - @test parse(Expr, "module A\n\nbody\nend") == + @test parsestmt(Expr, "module A\n\nbody\nend") == Expr(:module, true, :A, @@ -45,7 +45,7 @@ end @testset "Function definition lines" begin - @test parse(Expr, "function f()\na\n\nb\nend") == + @test parsestmt(Expr, "function f()\na\n\nb\nend") == Expr(:function, Expr(:call, :f), Expr(:block, @@ -56,7 +56,7 @@ :b, ) ) - @test parse(Expr, "f() = 1") == + @test parsestmt(Expr, "f() = 1") == Expr(:(=), Expr(:call, :f), Expr(:block, @@ -66,14 +66,14 @@ ) # function/macro without methods - @test parse(Expr, "function f end") == + @test parsestmt(Expr, "function f end") == Expr(:function, :f) - @test parse(Expr, "macro f end") == + @test parsestmt(Expr, "macro f end") == Expr(:macro, :f) end @testset "elseif" begin - @test parse(Expr, "if a\nb\nelseif c\n d\nend") == + @test parsestmt(Expr, "if a\nb\nelseif c\n d\nend") == Expr(:if, :a, Expr(:block, @@ -91,7 +91,7 @@ end @testset "No line numbers in for/let bindings" begin - @test parse(Expr, "for i=is, j=js\nbody\nend") == + @test parsestmt(Expr, "for i=is, j=js\nbody\nend") == Expr(:for, Expr(:block, Expr(:(=), :i, :is), @@ -102,7 +102,7 @@ :body ) ) - @test parse(Expr, "let i=is, j=js\nbody\nend") == + @test parsestmt(Expr, "let i=is, j=js\nbody\nend") == Expr(:let, Expr(:block, Expr(:(=), :i, :is), @@ -118,7 +118,7 @@ @testset "Short form function line numbers" begin # A block is added to hold the line number node - @test parse(Expr, "f() = xs") == + @test parsestmt(Expr, "f() = xs") == Expr(:(=), Expr(:call, :f), Expr(:block, @@ -126,7 +126,7 @@ :xs)) # flisp parser quirk: In a for loop the block is not added, despite # this defining a short-form function. - @test parse(Expr, "for f() = xs\nend") == + @test parsestmt(Expr, "for f() = xs\nend") == Expr(:for, Expr(:(=), Expr(:call, :f), :xs), Expr(:block, @@ -135,7 +135,7 @@ end @testset "Long form anonymous functions" begin - @test parse(Expr, "function (xs...)\nbody end") == + @test parsestmt(Expr, "function (xs...)\nbody end") == Expr(:function, Expr(:..., :xs), Expr(:block, @@ -146,25 +146,25 @@ @testset "String conversions" begin # String unwrapping / wrapping - @test parse(Expr, "\"str\"") == "str" - @test parse(Expr, "\"\$(\"str\")\"") == + @test parsestmt(Expr, "\"str\"") == "str" + @test parsestmt(Expr, "\"\$(\"str\")\"") == Expr(:string, Expr(:string, "str")) # Concatenation of string chunks in triple quoted cases - @test parse(Expr, "```\n a\n b```") == + @test parsestmt(Expr, "```\n a\n b```") == Expr(:macrocall, GlobalRef(Core, Symbol("@cmd")), LineNumberNode(1), "a\nb") - @test parse(Expr, "\"\"\"\n a\n \$x\n b\n c\"\"\"") == + @test parsestmt(Expr, "\"\"\"\n a\n \$x\n b\n c\"\"\"") == Expr(:string, "a\n", :x, "\nb\nc") end @testset "Char conversions" begin - @test parse(Expr, "'a'") == 'a' - @test parse(Expr, "'α'") == 'α' - @test parse(Expr, "'\\xce\\xb1'") == 'α' + @test parsestmt(Expr, "'a'") == 'a' + @test parsestmt(Expr, "'α'") == 'α' + @test parsestmt(Expr, "'\\xce\\xb1'") == 'α' end @testset "do block conversion" begin - @test parse(Expr, "f(x) do y\n body end") == + @test parsestmt(Expr, "f(x) do y\n body end") == Expr(:do, Expr(:call, :f, :x), Expr(:->, Expr(:tuple, :y), Expr(:block, @@ -174,29 +174,29 @@ @testset "= to Expr(:kw) conversion" begin # Call - @test parse(Expr, "f(a=1)") == + @test parsestmt(Expr, "f(a=1)") == Expr(:call, :f, Expr(:kw, :a, 1)) - @test parse(Expr, "f(; b=2)") == + @test parsestmt(Expr, "f(; b=2)") == Expr(:call, :f, Expr(:parameters, Expr(:kw, :b, 2))) - @test parse(Expr, "f(a=1; b=2)") == + @test parsestmt(Expr, "f(a=1; b=2)") == Expr(:call, :f, Expr(:parameters, Expr(:kw, :b, 2)), Expr(:kw, :a, 1)) # Infix call = is not :kw - @test parse(Expr, "(x=1) != 2") == + @test parsestmt(Expr, "(x=1) != 2") == Expr(:call, :!=, Expr(:(=), :x, 1), 2) # Dotcall - @test parse(Expr, "f.(a=1; b=2)") == + @test parsestmt(Expr, "f.(a=1; b=2)") == Expr(:., :f, Expr(:tuple, Expr(:parameters, Expr(:kw, :b, 2)), Expr(:kw, :a, 1))) # Named tuples - @test parse(Expr, "(a=1,)") == + @test parsestmt(Expr, "(a=1,)") == Expr(:tuple, Expr(:(=), :a, 1)) - @test parse(Expr, "(a=1,; b=2)") == + @test parsestmt(Expr, "(a=1,; b=2)") == Expr(:tuple, Expr(:parameters, Expr(:kw, :b, 2)), Expr(:(=), :a, 1)) - @test parse(Expr, "(a=1,; b=2; c=3)") == + @test parsestmt(Expr, "(a=1,; b=2; c=3)") == Expr(:tuple, Expr(:parameters, Expr(:parameters, Expr(:kw, :c, 3)), @@ -204,99 +204,99 @@ Expr(:(=), :a, 1)) # ref - @test parse(Expr, "x[i=j]") == + @test parsestmt(Expr, "x[i=j]") == Expr(:ref, :x, Expr(:kw, :i, :j)) - @test parse(Expr, "(i=j)[x]") == + @test parsestmt(Expr, "(i=j)[x]") == Expr(:ref, Expr(:(=), :i, :j), :x) - @test parse(Expr, "x[a, b; i=j]") == + @test parsestmt(Expr, "x[a, b; i=j]") == Expr(:ref, :x, Expr(:parameters, Expr(:(=), :i, :j)), :a, :b) # curly - @test parse(Expr, "(i=j){x}") == + @test parsestmt(Expr, "(i=j){x}") == Expr(:curly, Expr(:(=), :i, :j), :x) - @test parse(Expr, "x{a, b; i=j}") == + @test parsestmt(Expr, "x{a, b; i=j}") == Expr(:curly, :x, Expr(:parameters, Expr(:(=), :i, :j)), :a, :b) # vect - @test parse(Expr, "[a=1,; b=2]") == + @test parsestmt(Expr, "[a=1,; b=2]") == Expr(:vect, Expr(:parameters, Expr(:(=), :b, 2)), Expr(:(=), :a, 1)) # braces - @test parse(Expr, "{a=1,; b=2}") == + @test parsestmt(Expr, "{a=1,; b=2}") == Expr(:braces, Expr(:parameters, Expr(:(=), :b, 2)), Expr(:(=), :a, 1)) # dotted = is not :kw - @test parse(Expr, "f(a .= 1)") == + @test parsestmt(Expr, "f(a .= 1)") == Expr(:call, :f, Expr(:.=, :a, 1)) # = inside parens in calls and tuples # (TODO: we should warn for these cases.) - @test parse(Expr, "f(((a = 1)))") == + @test parsestmt(Expr, "f(((a = 1)))") == Expr(:call, :f, Expr(:kw, :a, 1)) - @test parse(Expr, "(((a = 1)),)") == + @test parsestmt(Expr, "(((a = 1)),)") == Expr(:tuple, Expr(:(=), :a, 1)) - @test parse(Expr, "(;((a = 1)),)") == + @test parsestmt(Expr, "(;((a = 1)),)") == Expr(:tuple, Expr(:parameters, Expr(:kw, :a, 1))) end @testset "dotcall" begin - @test parse(Expr, "f.(x,y)") == Expr(:., :f, Expr(:tuple, :x, :y)) - @test parse(Expr, "f.(x=1)") == Expr(:., :f, Expr(:tuple, Expr(:kw, :x, 1))) - @test parse(Expr, "x .+ y") == Expr(:call, Symbol(".+"), :x, :y) - @test parse(Expr, "(x=1) .+ y") == Expr(:call, Symbol(".+"), Expr(:(=), :x, 1), :y) - @test parse(Expr, "a .< b .< c") == Expr(:comparison, :a, Symbol(".<"), + @test parsestmt(Expr, "f.(x,y)") == Expr(:., :f, Expr(:tuple, :x, :y)) + @test parsestmt(Expr, "f.(x=1)") == Expr(:., :f, Expr(:tuple, Expr(:kw, :x, 1))) + @test parsestmt(Expr, "x .+ y") == Expr(:call, Symbol(".+"), :x, :y) + @test parsestmt(Expr, "(x=1) .+ y") == Expr(:call, Symbol(".+"), Expr(:(=), :x, 1), :y) + @test parsestmt(Expr, "a .< b .< c") == Expr(:comparison, :a, Symbol(".<"), :b, Symbol(".<"), :c) - @test parse(Expr, ".*(x)") == Expr(:call, Symbol(".*"), :x) - @test parse(Expr, ".+(x)") == Expr(:call, Symbol(".+"), :x) - @test parse(Expr, ".+x") == Expr(:call, Symbol(".+"), :x) + @test parsestmt(Expr, ".*(x)") == Expr(:call, Symbol(".*"), :x) + @test parsestmt(Expr, ".+(x)") == Expr(:call, Symbol(".+"), :x) + @test parsestmt(Expr, ".+x") == Expr(:call, Symbol(".+"), :x) end @testset "where" begin - @test parse(Expr, "A where {X, Y; Z}") == Expr(:where, :A, Expr(:parameters, :Z), :X, :Y) + @test parsestmt(Expr, "A where {X, Y; Z}") == Expr(:where, :A, Expr(:parameters, :Z), :X, :Y) end @testset "macrocall" begin # line numbers - @test parse(Expr, "@m\n") == Expr(:macrocall, Symbol("@m"), LineNumberNode(1)) - @test parse(Expr, "\n@m") == Expr(:macrocall, Symbol("@m"), LineNumberNode(2)) + @test parsestmt(Expr, "@m\n") == Expr(:macrocall, Symbol("@m"), LineNumberNode(1)) + @test parsestmt(Expr, "\n@m") == Expr(:macrocall, Symbol("@m"), LineNumberNode(2)) # parameters - @test parse(Expr, "@m(x; a)") == Expr(:macrocall, Symbol("@m"), LineNumberNode(1), + @test parsestmt(Expr, "@m(x; a)") == Expr(:macrocall, Symbol("@m"), LineNumberNode(1), Expr(:parameters, :a), :x) - @test parse(Expr, "@m(a=1; b=2)") == Expr(:macrocall, Symbol("@m"), LineNumberNode(1), + @test parsestmt(Expr, "@m(a=1; b=2)") == Expr(:macrocall, Symbol("@m"), LineNumberNode(1), Expr(:parameters, Expr(:kw, :b, 2)), Expr(:(=), :a, 1)) # @__dot__ - @test parse(Expr, "@.") == Expr(:macrocall, Symbol("@__dot__"), LineNumberNode(1)) - @test parse(Expr, "using A: @.") == Expr(:using, Expr(Symbol(":"), Expr(:., :A), Expr(:., Symbol("@__dot__")))) + @test parsestmt(Expr, "@.") == Expr(:macrocall, Symbol("@__dot__"), LineNumberNode(1)) + @test parsestmt(Expr, "using A: @.") == Expr(:using, Expr(Symbol(":"), Expr(:., :A), Expr(:., Symbol("@__dot__")))) end @testset "try" begin - @test parse(Expr, "try x catch e; y end") == + @test parsestmt(Expr, "try x catch e; y end") == Expr(:try, Expr(:block, LineNumberNode(1), :x), :e, Expr(:block, LineNumberNode(1), :y)) - @test parse(Expr, "try x finally y end") == + @test parsestmt(Expr, "try x finally y end") == Expr(:try, Expr(:block, LineNumberNode(1), :x), false, false, Expr(:block, LineNumberNode(1), :y)) - @test parse(Expr, "try x catch e; y finally z end") == + @test parsestmt(Expr, "try x catch e; y finally z end") == Expr(:try, Expr(:block, LineNumberNode(1), :x), :e, Expr(:block, LineNumberNode(1), :y), Expr(:block, LineNumberNode(1), :z)) - @test parse(Expr, "try x catch e; y else z end", version=v"1.8") == + @test parsestmt(Expr, "try x catch e; y else z end", version=v"1.8") == Expr(:try, Expr(:block, LineNumberNode(1), :x), :e, Expr(:block, LineNumberNode(1), :y), false, Expr(:block, LineNumberNode(1), :z)) - @test parse(Expr, "try x catch e; y else z finally w end", version=v"1.8") == + @test parsestmt(Expr, "try x catch e; y else z finally w end", version=v"1.8") == Expr(:try, Expr(:block, LineNumberNode(1), :x), :e, @@ -304,14 +304,14 @@ Expr(:block, LineNumberNode(1), :w), Expr(:block, LineNumberNode(1), :z)) # finally before catch - @test parse(Expr, "try x finally y catch e z end", ignore_warnings=true) == + @test parsestmt(Expr, "try x finally y catch e z end", ignore_warnings=true) == Expr(:try, Expr(:block, LineNumberNode(1), :x), :e, Expr(:block, LineNumberNode(1), :z), Expr(:block, LineNumberNode(1), :y)) # empty recovery - @test parse(Expr, "try x end", ignore_errors=true) == + @test parsestmt(Expr, "try x end", ignore_errors=true) == Expr(:try, Expr(:block, LineNumberNode(1), :x), false, false, @@ -319,58 +319,58 @@ end @testset "juxtapose" begin - @test parse(Expr, "2x") == Expr(:call, :*, 2, :x) - @test parse(Expr, "(2)(3)x") == Expr(:call, :*, 2, 3, :x) + @test parsestmt(Expr, "2x") == Expr(:call, :*, 2, :x) + @test parsestmt(Expr, "(2)(3)x") == Expr(:call, :*, 2, 3, :x) end @testset "Core.@doc" begin - @test parse(Expr, "\"x\" f") == + @test parsestmt(Expr, "\"x\" f") == Expr(:macrocall, GlobalRef(Core, Symbol("@doc")), LineNumberNode(1), "x", :f) - @test parse(Expr, "\n\"x\" f") == + @test parsestmt(Expr, "\n\"x\" f") == Expr(:macrocall, GlobalRef(Core, Symbol("@doc")), LineNumberNode(2), "x", :f) end @testset "return" begin - @test parse(Expr, "return x") == Expr(:return, :x) - @test parse(Expr, "return") == Expr(:return, nothing) + @test parsestmt(Expr, "return x") == Expr(:return, :x) + @test parsestmt(Expr, "return") == Expr(:return, nothing) end @testset "struct" begin - @test parse(Expr, "struct A end") == + @test parsestmt(Expr, "struct A end") == Expr(:struct, false, :A, Expr(:block, LineNumberNode(1))) - @test parse(Expr, "mutable struct A end") == + @test parsestmt(Expr, "mutable struct A end") == Expr(:struct, true, :A, Expr(:block, LineNumberNode(1))) end @testset "module" begin - @test parse(Expr, "module A end") == + @test parsestmt(Expr, "module A end") == Expr(:module, true, :A, Expr(:block, LineNumberNode(1), LineNumberNode(1))) - @test parse(Expr, "baremodule A end") == + @test parsestmt(Expr, "baremodule A end") == Expr(:module, false, :A, Expr(:block, LineNumberNode(1), LineNumberNode(1))) end @testset "errors" begin - @test parse(Expr, "--", ignore_errors=true) == + @test parsestmt(Expr, "--", ignore_errors=true) == Expr(:error, "invalid operator: `--`") @test parseall(Expr, "a b", ignore_errors=true) == Expr(:toplevel, LineNumberNode(1), :a, LineNumberNode(1), Expr(:error, :b)) - @test parse(Expr, "(x", ignore_errors=true) == + @test parsestmt(Expr, "(x", ignore_errors=true) == Expr(:block, :x, Expr(:error)) end @testset "import" begin - @test parse(Expr, "import A.(:b).:c: x.:z", ignore_warnings=true) == + @test parsestmt(Expr, "import A.(:b).:c: x.:z", ignore_warnings=true) == Expr(:import, Expr(Symbol(":"), Expr(:., :A, :b, :c), Expr(:., :x, :z))) # Stupid parens and quotes in import paths - @test parse(Expr, "import A.:+", ignore_warnings=true) == + @test parsestmt(Expr, "import A.:+", ignore_warnings=true) == Expr(:import, Expr(:., :A, :+)) - @test parse(Expr, "import A.(:+)", ignore_warnings=true) == + @test parsestmt(Expr, "import A.(:+)", ignore_warnings=true) == Expr(:import, Expr(:., :A, :+)) - @test parse(Expr, "import A.:(+)", ignore_warnings=true) == + @test parsestmt(Expr, "import A.:(+)", ignore_warnings=true) == Expr(:import, Expr(:., :A, :+)) - @test parse(Expr, "import A.:(+) as y", ignore_warnings=true, version=v"1.6") == + @test parsestmt(Expr, "import A.:(+) as y", ignore_warnings=true, version=v"1.6") == Expr(:import, Expr(:as, Expr(:., :A, :+), :y)) end end diff --git a/JuliaSyntax/test/parser_api.jl b/JuliaSyntax/test/parser_api.jl index f237f7aca776a..bac33c10e216d 100644 --- a/JuliaSyntax/test/parser_api.jl +++ b/JuliaSyntax/test/parser_api.jl @@ -1,34 +1,44 @@ @testset "parser API" begin @testset "parse with String input" begin - @test parse(Expr, " x ") == :x + @test parsestmt(Expr, " x ") == :x @test JuliaSyntax.remove_linenums!(parseall(Expr, " x ")) == Expr(:toplevel, :x) @test parseatom(Expr, " x ") == :x @test parseatom(Expr, "(x)") == :x # SubString - @test parse(Expr, SubString("x+y")) == :(x+y) - @test parse(Expr, SubString("α+x")) == :(α+x) + @test parsestmt(Expr, SubString("x+y")) == :(x+y) + @test parsestmt(Expr, SubString("α+x")) == :(α+x) @test parseatom(Expr, SubString("x+y",3,3)) == :y # Exceptions due to extra trailing syntax @test_throws JuliaSyntax.ParseError parseatom(Expr, "x+y") - @test_throws JuliaSyntax.ParseError parse(Expr, "x+y\nz") + @test_throws JuliaSyntax.ParseError parsestmt(Expr, "x+y\nz") # ignore_warnings flag - @test_throws JuliaSyntax.ParseError parse(Expr, "import . .A") - @test parse(Expr, "import . .A", ignore_warnings=true) == :(import ..A) + @test_throws JuliaSyntax.ParseError parsestmt(Expr, "import . .A") + @test parsestmt(Expr, "import . .A", ignore_warnings=true) == :(import ..A) # version selection - @test_throws JuliaSyntax.ParseError parse(Expr, "[a ;; b]", version=v"1.6") - @test parse(Expr, "[a ;; b]", version=v"1.7") == Expr(:ncat, 2, :a, :b) + @test_throws JuliaSyntax.ParseError parsestmt(Expr, "[a ;; b]", version=v"1.6") + @test parsestmt(Expr, "[a ;; b]", version=v"1.7") == Expr(:ncat, 2, :a, :b) # filename - @test JuliaSyntax.parse(Expr, "begin\na\nend", filename="foo.jl", first_line=55) == + @test parsestmt(Expr, "begin\na\nend", filename="foo.jl", first_line=55) == Expr(:block, LineNumberNode(56, Symbol("foo.jl")), :a) # ignore_trivia @test parseatom(Expr, " x ", ignore_trivia=true) == :x @test_throws JuliaSyntax.ParseError parseatom(Expr, " x ", ignore_trivia=false) + + # Top level parsing + @test parseall(Expr, "a\nb") == + Expr(:toplevel, LineNumberNode(1), :a, LineNumberNode(2), :b) + @test parseall(Expr, "a\nb #==#") == + Expr(:toplevel, LineNumberNode(1), :a, LineNumberNode(2), :b) + @test parseall(Expr, "#==#\na\nb") == + Expr(:toplevel, LineNumberNode(2), :a, LineNumberNode(3), :b) + @test parseall(Expr, "a\nb\n#==#") == + Expr(:toplevel, LineNumberNode(1), :a, LineNumberNode(2), :b) end @testset "IO input" begin @@ -67,18 +77,18 @@ @test JuliaSyntax.remove_linenums!(ex) == Expr(:toplevel, :(x+y), :z) @test pos == 6 end - @test parse(Expr, "x+y\nz", 1) == (:(x+y), 4) + @test parsestmt(Expr, "x+y\nz", 1) == (:(x+y), 4) @test parseatom(Expr, "x+y\nz", 1) == (:x, 2) @test parseatom(Expr, "x+y\nz", 5) == (:z, 6) # SubString - @test parse(Expr, SubString("α+x\ny"), 1) == (:(α+x), 5) + @test parsestmt(Expr, SubString("α+x\ny"), 1) == (:(α+x), 5) @test parseatom(Expr, SubString("x+y"), 1) == (:x, 2) @test parseatom(Expr, SubString("x+y"), 3) == (:y, 4) end @testset "error/warning handling" begin - parseshow(s;kws...) = sprint(show, MIME("text/x.sexpression"), parse(SyntaxNode, s; kws...)) + parseshow(s;kws...) = sprint(show, MIME("text/x.sexpression"), parsestmt(SyntaxNode, s; kws...)) @test_throws JuliaSyntax.ParseError parseshow("try finally catch ex end") @test parseshow("try finally catch ex end", ignore_warnings=true) == "(try (block) (finally (block)) (catch ex (block)))" @@ -97,7 +107,7 @@ end @testset "ParseError printing" begin try - JuliaSyntax.parse(JuliaSyntax.SyntaxNode, "a -- b -- c", filename="somefile.jl") + JuliaSyntax.parsestmt(JuliaSyntax.SyntaxNode, "a -- b -- c", filename="somefile.jl") @assert false "error should be thrown" catch exc @test exc isa JuliaSyntax.ParseError diff --git a/JuliaSyntax/test/syntax_tree.jl b/JuliaSyntax/test/syntax_tree.jl index c6dd658508c2f..c0cbf54763b43 100644 --- a/JuliaSyntax/test/syntax_tree.jl +++ b/JuliaSyntax/test/syntax_tree.jl @@ -1,7 +1,7 @@ @testset "SyntaxNode" begin # Child access tt = "a*b + c" - t = parse(SyntaxNode, tt) + t = parsestmt(SyntaxNode, tt) @test sourcetext(child(t, 1)) == "a*b" @test sourcetext(child(t, 1, 1)) == "a" @@ -37,22 +37,22 @@ @test occursin("immutable", e.msg) && occursin("SyntaxData", e.msg) # copy - t = parse(SyntaxNode, "a*b + c") + t = parsestmt(SyntaxNode, "a*b + c") ct = copy(t) ct.data = nothing @test ct.data === nothing && t.data !== nothing @test child(ct, 1).parent === ct @test child(ct, 1) !== child(t, 1) - node = parse(SyntaxNode, "f()") - push!(node, parse(SyntaxNode, "x")) + node = parsestmt(SyntaxNode, "f()") + push!(node, parsestmt(SyntaxNode, "x")) @test length(children(node)) == 2 - node[2] = parse(SyntaxNode, "y") + node[2] = parsestmt(SyntaxNode, "y") @test sourcetext(child(node, 2)) == "y" end @testset "SyntaxNode pretty printing" begin - t = parse(SyntaxNode, "f(a*b,\n c)", filename="foo.jl") + t = parsestmt(SyntaxNode, "f(a*b,\n c)", filename="foo.jl") @test sprint(show, MIME("text/plain"), t) == """ line:col│ tree │ file_name 1:1 │[call] │foo.jl diff --git a/JuliaSyntax/test/test_utils.jl b/JuliaSyntax/test/test_utils.jl index 36fc22cde1afc..2d76cf0bdad99 100644 --- a/JuliaSyntax/test/test_utils.jl +++ b/JuliaSyntax/test/test_utils.jl @@ -9,7 +9,7 @@ using .JuliaSyntax: SourceFile, source_location, parse!, - parse, + parsestmt, parseall, parseatom, build_tree, @@ -71,7 +71,7 @@ end # Parse text with JuliaSyntax vs reference parser and show a textural diff of # the resulting expressions function parse_diff(text, showfunc=dump) - ex = parse(Expr, text, filename="none") + ex = parsestmt(Expr, text, filename="none") fl_ex = fl_parse(text) show_expr_text_diff(stdout, showfunc, ex, fl_ex) end From f01b0e15403e0d22d885560a144bc28fbc6a8b11 Mon Sep 17 00:00:00 2001 From: c42f Date: Wed, 19 Apr 2023 17:01:25 +1000 Subject: [PATCH 0623/1109] Use importpath kind rather than K"." (JuliaLang/JuliaSyntax.jl#244) In `Expr` trees, the internal structure of children of `Expr(:.)` is different within an import statement vs outside. For example, `x.y` parses as * `(. x (quote y))` in normal code * `(. x y)` inside `import` and `using`. This causes awkwardness when writing expression manipulation code because import paths * Need to be distinguished from normal `.` but * Can be nested several layers deep in `:` and `as` nodes within an `import` or `using` parent node. To avoid this situation, here I've created a new K"importpath" kind specifically for import paths. --- JuliaSyntax/src/expr.jl | 21 ++++-------- JuliaSyntax/src/kinds.jl | 1 + JuliaSyntax/src/parser.jl | 70 +++++++++++++++++++------------------- JuliaSyntax/test/parser.jl | 66 +++++++++++++++++------------------ 4 files changed, 76 insertions(+), 82 deletions(-) diff --git a/JuliaSyntax/src/expr.jl b/JuliaSyntax/src/expr.jl index 3f502723392ab..e424aa038afa4 100644 --- a/JuliaSyntax/src/expr.jl +++ b/JuliaSyntax/src/expr.jl @@ -334,20 +334,13 @@ function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, pushfirst!(args, :*) elseif headsym === :struct pushfirst!(args, has_flags(node, MUTABLE_FLAG)) - elseif headsym === :import || headsym == :using - # Permit nonsense additional quoting such as - # import A.(:b).:c - if !isempty(args) && Meta.isexpr(args[1], Symbol(":")) - imports = args[1].args - else - imports = args - end - for imp in imports - imp_path = Meta.isexpr(imp, :as) ? imp.args[1].args : imp.args - for i = 1:length(imp_path) - if imp_path[i] isa QuoteNode - imp_path[i] = imp_path[i].value - end + elseif headsym === :importpath + headsym = :. + for i = 1:length(args) + if args[i] isa QuoteNode + # Permit nonsense additional quoting such as + # import A.(:b).:c + args[i] = args[i].value end end end diff --git a/JuliaSyntax/src/kinds.jl b/JuliaSyntax/src/kinds.jl index 3374dde915f1c..d9967e90c65c2 100644 --- a/JuliaSyntax/src/kinds.jl +++ b/JuliaSyntax/src/kinds.jl @@ -890,6 +890,7 @@ const _kind_names = "ref" "vect" "parens" + "importpath" # Concatenation syntax "braces" "bracescat" diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 7ca8b12b43e05..b6ae7601fe64e 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -2374,21 +2374,21 @@ function parse_atsym(ps::ParseState, allow_quotes=true) # export ($f) ==> (export ($ f)) mark = position(ps) if allow_quotes && peek(ps) == K":" - # import A.:+ ==> (import (. A (quote +))) + # import A.:+ ==> (import (importpath A (quote +))) emit_diagnostic(ps, warning="quoting with `:` is not required here") end parse_unary_prefix(ps) pos = position(ps) warn_parens = false if peek_behind(ps, pos).kind == K"parens" - # import A.(:+) ==> (import (. A (parens (quote +)))) + # import A.(:+) ==> (import (importpath A (parens (quote +)))) pos = first_child_position(ps, pos) warn_parens = true end if allow_quotes && peek_behind(ps, pos).kind == K"quote" pos = first_child_position(ps, pos) if peek_behind(ps, pos).kind == K"parens" - # import A.:(+) ==> (import (. A (quote (parens +)))) + # import A.:(+) ==> (import (importpath A (quote (parens +)))) pos = first_child_position(ps, pos) warn_parens = true end @@ -2423,7 +2423,7 @@ function parse_imports(ps::ParseState) bump(ps, TRIVIA_FLAG) has_import_prefix = true if initial_as - # import A as B: x ==> (import (: (error (as (. A) B)) (. x))) + # import A as B: x ==> (import (: (error (as (importpath A) B)) (importpath x))) emit(ps, emark, K"error", error="`as` before `:` in import/using") end elseif k == K"," @@ -2431,14 +2431,14 @@ function parse_imports(ps::ParseState) has_comma = true end if has_import_prefix || has_comma - # import A, y ==> (import (. A) (. y)) - # import A: x, y ==> (import (: (. A) (. x) (. y))) - # import A: +, == ==> (import (: (. A) (. +) (. ==))) + # import A, y ==> (import (importpath A) (importpath y)) + # import A: x, y ==> (import (: (importpath A) (importpath x) (importpath y))) + # import A: +, == ==> (import (: (importpath A) (importpath +) (importpath ==))) has_import_prefix_ = has_import_prefix parse_comma_separated(ps, ps1->parse_import(ps1, word, has_import_prefix_)) if peek(ps) == K":" # Error recovery - # import A: x, B: y ==> (import (: (. A) (. x) (. B) (error-t (. y)))) + # import A: x, B: y ==> (import (: (importpath A) (importpath x) (importpath B) (error-t (importpath y)))) emark = position(ps) bump(ps, TRIVIA_FLAG) parse_comma_separated(ps, ps1->parse_import(ps1, word, has_import_prefix_)) @@ -2447,11 +2447,11 @@ function parse_imports(ps::ParseState) end end if has_import_prefix - # import A: x ==> (import (: (. A) (. x))) + # import A: x ==> (import (: (importpath A) (importpath x))) emit(ps, mark, K":") end - # using A ==> (using (. A)) - # import A ==> (import (. A)) + # using A ==> (using (importpath A)) + # import A ==> (import (importpath A)) emit(ps, mark, word) end @@ -2461,21 +2461,21 @@ end function parse_import(ps::ParseState, word, has_import_prefix) mark = position(ps) parse_import_path(ps) - # import A: x, y ==> (import (: (. A) (. x) (. y))) + # import A: x, y ==> (import (: (importpath A) (importpath x) (importpath y))) if peek(ps) == K"as" - # import A as B ==> (import (as (. A) B)) - # import A: x as y ==> (import (: (. A) (as (. x) y))) - # using A: x as y ==> (using (: (. A) (as (. x) y))) + # import A as B ==> (import (as (importpath A) B)) + # import A: x as y ==> (import (: (importpath A) (as (importpath x) y))) + # using A: x as y ==> (using (: (importpath A) (as (importpath x) y))) bump(ps, TRIVIA_FLAG) parse_atsym(ps) emit(ps, mark, K"as") if word == K"using" && !has_import_prefix - # using A as B ==> (using (error (as (. A) B))) - # using A, B as C ==> (using (. A) (error (as (. B) C))) + # using A as B ==> (using (error (as (importpath A) B))) + # using A, B as C ==> (using (importpath A) (error (as (importpath B) C))) emit(ps, mark, K"error", error="`using` with `as` renaming requires a `:` and context module") end - #v1.5: import A as B ==> (import (error (as (. A) B))) + #v1.5: import A as B ==> (import (error (as (importpath A) B))) min_supported_version(v"1.6", ps, mark, "`import ... as`") return true else @@ -2489,12 +2489,12 @@ function parse_import_path(ps::ParseState) bump_trivia(ps) # The tokenizer produces conjoined dotted tokens .. and ... # When parsing import we must split these into single dots - # import .A ==> (import (. . A)) - # import ..A ==> (import (. . . A)) - # import ...A ==> (import (. . . . A)) - # import ....A ==> (import (. . . . . A)) + # import .A ==> (import (importpath . A)) + # import ..A ==> (import (importpath . . A)) + # import ...A ==> (import (importpath . . . A)) + # import ....A ==> (import (importpath . . . . A)) # Dots with spaces are allowed (a misfeature?) - # import . .A ==> (import (. . . A)) + # import . .A ==> (import (importpath . . A)) first_dot = true while true t = peek_token(ps) @@ -2516,32 +2516,32 @@ function parse_import_path(ps::ParseState) end if is_dotted(peek_token(ps)) # Modules with operator symbol names - # import .⋆ ==> (import (. . ⋆)) + # import .⋆ ==> (import (importpath . ⋆)) bump_trivia(ps) bump_split(ps, (1,K".",EMPTY_FLAGS), (1,peek(ps),EMPTY_FLAGS)) else - # import @x ==> (import (. @x)) - # import $A ==> (import (. ($ A))) + # import @x ==> (import (importpath @x)) + # import $A ==> (import (importpath ($ A))) parse_atsym(ps) end while true t = peek_token(ps) k = kind(t) if k == K"." - # import A.B ==> (import (. A B)) - # import $A.@x ==> (import (. ($ A) @x)) - # import A.B.C ==> (import (. A B C)) + # import A.B ==> (import (importpath A B)) + # import $A.@x ==> (import (importpath ($ A) @x)) + # import A.B.C ==> (import (importpath A B C)) bump_disallowed_space(ps) bump(ps, TRIVIA_FLAG) parse_atsym(ps) elseif is_dotted(t) # Resolve tokenization ambiguity: In imports, dots are part of the # path, not operators - # import A.== ==> (import (. A ==)) - # import A.⋆.f ==> (import (. A ⋆ f)) + # import A.== ==> (import (importpath A ==)) + # import A.⋆.f ==> (import (importpath A ⋆ f)) if preceding_whitespace(t) # Whitespace in import path allowed but discouraged - # import A .== ==> (import (. A ==)) + # import A .== ==> (import (importpath A ==)) emit_diagnostic(ps, whitespace=true, warning="space between dots in import path") end @@ -2549,17 +2549,17 @@ function parse_import_path(ps::ParseState) bump_split(ps, (1,K".",TRIVIA_FLAG), (1,k,EMPTY_FLAGS)) elseif k == K"..." # Import the .. operator - # import A... ==> (import (. A ..)) + # import A... ==> (import (importpath A ..)) bump_split(ps, (1,K".",TRIVIA_FLAG), (2,K"..",EMPTY_FLAGS)) elseif k in KSet"NewlineWs ; , : EndMarker" - # import A; B ==> (import (. A)) + # import A; B ==> (import (importpath A)) break else # Could we emit a more comprehensible error here? break end end - emit(ps, mark, K".") + emit(ps, mark, K"importpath") end # parse comma-separated assignments, like "i=1:n,j=1:m,..." diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 8da911b6779a2..92b580098b4a7 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -615,46 +615,46 @@ tests = [ "try x end" => "(try (block x) (error-t))" ], JuliaSyntax.parse_imports => [ - "import A as B: x" => "(import (: (error (as (. A) B)) (. x)))" - "import A, y" => "(import (. A) (. y))" - "import A: +, ==" => "(import (: (. A) (. +) (. ==)))" - "import A: x, y" => "(import (: (. A) (. x) (. y)))" - "import A: x, B: y" => "(import (: (. A) (. x) (. B) (error-t (. y))))" - "import A: x" => "(import (: (. A) (. x)))" - "using A" => "(using (. A))" - "import A" => "(import (. A))" + "import A as B: x" => "(import (: (error (as (importpath A) B)) (importpath x)))" + "import A, y" => "(import (importpath A) (importpath y))" + "import A: +, ==" => "(import (: (importpath A) (importpath +) (importpath ==)))" + "import A: x, y" => "(import (: (importpath A) (importpath x) (importpath y)))" + "import A: x, B: y" => "(import (: (importpath A) (importpath x) (importpath B) (error-t (importpath y))))" + "import A: x" => "(import (: (importpath A) (importpath x)))" + "using A" => "(using (importpath A))" + "import A" => "(import (importpath A))" # parse_import - "import A: x, y" => "(import (: (. A) (. x) (. y)))" - "import A as B" => "(import (as (. A) B))" - "import A: x as y" => "(import (: (. A) (as (. x) y)))" - "using A: x as y" => "(using (: (. A) (as (. x) y)))" - ((v=v"1.5",), "import A as B") => "(import (error (as (. A) B)))" - "using A as B" => "(using (error (as (. A) B)))" - "using A, B as C" => "(using (. A) (error (as (. B) C)))" + "import A: x, y" => "(import (: (importpath A) (importpath x) (importpath y)))" + "import A as B" => "(import (as (importpath A) B))" + "import A: x as y" => "(import (: (importpath A) (as (importpath x) y)))" + "using A: x as y" => "(using (: (importpath A) (as (importpath x) y)))" + ((v=v"1.5",), "import A as B") => "(import (error (as (importpath A) B)))" + "using A as B" => "(using (error (as (importpath A) B)))" + "using A, B as C" => "(using (importpath A) (error (as (importpath B) C)))" # parse_import_path # When parsing import we must split initial dots into nontrivial # leading dots for relative paths - "import .A" => "(import (. . A))" - "import ..A" => "(import (. . . A))" - "import ...A" => "(import (. . . . A))" - "import ....A" => "(import (. . . . . A))" + "import .A" => "(import (importpath . A))" + "import ..A" => "(import (importpath . . A))" + "import ...A" => "(import (importpath . . . A))" + "import ....A" => "(import (importpath . . . . A))" # Dots with spaces are allowed (a misfeature?) - "import . .A" => "(import (. . . A))" + "import . .A" => "(import (importpath . . A))" # Modules with operator symbol names - "import .⋆" => "(import (. . ⋆))" + "import .⋆" => "(import (importpath . ⋆))" # Expressions allowed in import paths - "import @x" => "(import (. @x))" - "import \$A" => "(import (. (\$ A)))" - "import \$A.@x" => "(import (. (\$ A) @x))" - "import A.B" => "(import (. A B))" - "import A.B.C" => "(import (. A B C))" - "import A.:+" => "(import (. A (quote +)))" - "import A.(:+)"=> "(import (. A (parens (quote +))))" - "import A.:(+)" => "(import (. A (quote (parens +))))" - "import A.==" => "(import (. A ==))" - "import A.⋆.f" => "(import (. A ⋆ f))" - "import A..." => "(import (. A ..))" - "import A; B" => "(import (. A))" + "import @x" => "(import (importpath @x))" + "import \$A" => "(import (importpath (\$ A)))" + "import \$A.@x" => "(import (importpath (\$ A) @x))" + "import A.B" => "(import (importpath A B))" + "import A.B.C" => "(import (importpath A B C))" + "import A.:+" => "(import (importpath A (quote +)))" + "import A.(:+)" => "(import (importpath A (parens (quote +))))" + "import A.:(+)" => "(import (importpath A (quote (parens +))))" + "import A.==" => "(import (importpath A ==))" + "import A.⋆.f" => "(import (importpath A ⋆ f))" + "import A..." => "(import (importpath A ..))" + "import A; B" => "(import (importpath A))" ], JuliaSyntax.parse_iteration_spec => [ "i = rhs" => "(= i rhs)" From 58cd088f6f21a227206101e88f911c1a8077ec21 Mon Sep 17 00:00:00 2001 From: c42f Date: Fri, 21 Apr 2023 06:49:21 +1000 Subject: [PATCH 0624/1109] Distinguish `:x` from `quote x end` with a flag (JuliaLang/JuliaSyntax.jl#245) This is also distinguishable based on the presence of a child `block` as the only argument, but having the flag is convenient and also allows to distinguish cases like `x.:y` from `x.y`. --- JuliaSyntax/src/parse_stream.jl | 4 ++++ JuliaSyntax/src/parser.jl | 42 ++++++++++++++++----------------- JuliaSyntax/test/parser.jl | 40 +++++++++++++++---------------- 3 files changed, 45 insertions(+), 41 deletions(-) diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index 3f789fc50aca4..2dd76c973b70a 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -31,6 +31,8 @@ const RAW_STRING_FLAG = RawFlags(1<<6) # Set for K"tuple", K"block" or K"macrocall" which are delimited by parentheses const PARENS_FLAG = RawFlags(1<<5) +# Set for K"quote" for the short form `:x` as oppsed to long form `quote x end` +const COLON_QUOTE = RawFlags(1<<5) # Set for K"struct" when mutable const MUTABLE_FLAG = RawFlags(1<<5) @@ -95,6 +97,8 @@ function untokenize(head::SyntaxHead; unique=true, include_flag_suff=true) has_flags(head, RAW_STRING_FLAG) && (str = str*"-r") elseif kind(head) in KSet"tuple block macrocall" has_flags(head, PARENS_FLAG) && (str = str*"-p") + elseif kind(head) == K"quote" + has_flags(head, COLON_QUOTE) && (str = str*"-:") elseif kind(head) == K"struct" has_flags(head, MUTABLE_FLAG) && (str = str*"-mut") elseif kind(head) == K"module" diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index b6ae7601fe64e..2a84a4643b54f 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -844,8 +844,8 @@ function parse_range(ps::ParseState) preceding_whitespace(peek_token(ps)) && !preceding_whitespace(peek_token(ps, 2)) # Tricky cases in space sensitive mode - # [1 :a] ==> (hcat 1 (quote a)) - # [1 2:3 :a] ==> (hcat 1 (call-i 2 : 3) (quote a)) + # [1 :a] ==> (hcat 1 (quote-: a)) + # [1 2:3 :a] ==> (hcat 1 (call-i 2 : 3) (quote-: a)) break end t2 = peek_token(ps,2) @@ -1159,7 +1159,7 @@ function parse_unary(ps::ParseState) is_syntactic_operator(op_k) ) # `op_t` is not an initial operator - # :T ==> (quote T) + # :T ==> (quote-: T) # in::T ==> (:: in T) # isa::T ==> (:: isa T) parse_factor(ps) @@ -1609,13 +1609,13 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) parse_call_arglist(ps, K")") emit(ps, mark, K"dotcall") elseif k == K":" - # A.:+ ==> (. A (quote +)) - # A.: + ==> (. A (error-t) (quote +)) + # A.:+ ==> (. A (quote-: +)) + # A.: + ==> (. A (error-t) (quote-: +)) m = position(ps) bump(ps, TRIVIA_FLAG) bump_disallowed_space(ps) parse_atom(ps, false) - emit(ps, m, K"quote") + emit(ps, m, K"quote", COLON_QUOTE) emit(ps, mark, K".") elseif k == K"$" # f.$x ==> (. f (inert ($ x))) @@ -2374,21 +2374,21 @@ function parse_atsym(ps::ParseState, allow_quotes=true) # export ($f) ==> (export ($ f)) mark = position(ps) if allow_quotes && peek(ps) == K":" - # import A.:+ ==> (import (importpath A (quote +))) + # import A.:+ ==> (import (importpath A (quote-: +))) emit_diagnostic(ps, warning="quoting with `:` is not required here") end parse_unary_prefix(ps) pos = position(ps) warn_parens = false if peek_behind(ps, pos).kind == K"parens" - # import A.(:+) ==> (import (importpath A (parens (quote +)))) + # import A.(:+) ==> (import (importpath A (parens (quote-: +)))) pos = first_child_position(ps, pos) warn_parens = true end if allow_quotes && peek_behind(ps, pos).kind == K"quote" pos = first_child_position(ps, pos) if peek_behind(ps, pos).kind == K"parens" - # import A.:(+) ==> (import (importpath A (quote (parens +)))) + # import A.:(+) ==> (import (importpath A (quote-: (parens +)))) pos = first_child_position(ps, pos) warn_parens = true end @@ -3015,14 +3015,14 @@ function parse_paren(ps::ParseState, check_identifiers=true) emit(ps, mark, K"tuple", PARENS_FLAG) elseif is_syntactic_operator(k) # allow :(=) etc in unchecked contexts, eg quotes - # :(=) ==> (quote (parens =)) + # :(=) ==> (quote-: (parens =)) parse_atom(ps, check_identifiers) bump_closing_token(ps, K")") emit(ps, mark, K"parens") elseif !check_identifiers && k == K"::" && peek(ps, 2, skip_newlines=true) == K")" # allow :(::) as a special case - # :(::) ==> (quote (parens ::)) + # :(::) ==> (quote-: (parens ::)) bump(ps) bump(ps, TRIVIA_FLAG, skip_newlines=true) emit(ps, mark, K"parens") @@ -3415,7 +3415,7 @@ function parse_atom(ps::ParseState, check_identifiers=true) emit(ps, mark, K"char") elseif leading_kind == K":" # symbol/expression quote - # :foo ==> (quote foo) + # :foo ==> (quote-: foo) t = peek_token(ps, 2) k = kind(t) if is_closing_token(ps, k) && (!is_keyword(k) || preceding_whitespace(t)) @@ -3427,19 +3427,19 @@ function parse_atom(ps::ParseState, check_identifiers=true) end bump(ps, TRIVIA_FLAG) # K":" if preceding_whitespace(t) - # : foo ==> (quote (error-t) foo) - # :\nfoo ==> (quote (error-t) foo) + # : foo ==> (quote-: (error-t) foo) + # :\nfoo ==> (quote-: (error-t) foo) bump_trivia(ps, TRIVIA_FLAG, skip_newlines=true, error="whitespace not allowed after `:` used for quoting") end # Being inside quote makes keywords into identifiers at the # first level of nesting - # :end ==> (quote end) - # :(end) ==> (quote (parens (error-t))) + # :end ==> (quote-: end) + # :(end) ==> (quote-: (parens (error-t))) # Being inside quote makes end non-special again (issue #27690) - # a[:(end)] ==> (ref a (quote (error-t end))) + # a[:(end)] ==> (ref a (quote-: (error-t end))) parse_atom(ParseState(ps, end_symbol=false), false) - emit(ps, mark, K"quote") + emit(ps, mark, K"quote", COLON_QUOTE) elseif check_identifiers && leading_kind == K"=" && is_plain_equals(peek_token(ps)) # = ==> (error =) bump(ps, error="unexpected `=`") @@ -3505,12 +3505,12 @@ function parse_atom(ps::ParseState, check_identifiers=true) end emit(ps, mark, K"var") elseif check_identifiers && is_closing_token(ps, leading_kind) - # :(end) ==> (quote (error end)) + # :(end) ==> (quote-: (error end)) bump(ps, error="invalid identifier") else # Remap keywords to identifiers. - # :end ==> (quote end) - # :<: ==> (quote <:) + # :end ==> (quote-: end) + # :<: ==> (quote-: <:) bump(ps, remap_kind=K"Identifier") end elseif leading_kind == K"(" # parens or tuple diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 92b580098b4a7..248528c662e83 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -142,8 +142,8 @@ tests = [ "a..b" => "(call-i a .. b)" "a … b" => "(call-i a … b)" "a .… b" => "(dotcall-i a … b)" - "[1 :a]" => "(hcat 1 (quote a))" - "[1 2:3 :a]" => "(hcat 1 (call-i 2 : 3) (quote a))" + "[1 :a]" => "(hcat 1 (quote-: a))" + "[1 2:3 :a]" => "(hcat 1 (call-i 2 : 3) (quote-: a))" "x..." => "(... x)" "x:y..." => "(... (call-i x : y))" "x..y..." => "(... (call-i x .. y))" @@ -195,7 +195,7 @@ tests = [ "x 'y" => "x" ], JuliaSyntax.parse_unary => [ - ":T" => "(quote T)" + ":T" => "(quote-: T)" "in::T" => "(::-i in T)" "isa::T" => "(::-i isa T)" "-2^x" => "(call-pre - (call-i 2 ^ x))" @@ -374,7 +374,7 @@ tests = [ "(a=1)[]" => "(ref (parens (= a 1)))" => Expr(:ref, Expr(:(=), :a, 1)) "a[end]" => "(ref a end)" "a[begin]" => "(ref a begin)" - "a[:(end)]" => "(typed_hcat a (quote (parens (error-t))) (error-t))" + "a[:(end)]" => "(typed_hcat a (quote-: (parens (error-t))) (error-t))" "T[x y]" => "(typed_hcat T x y)" "T[x ; y]" => "(typed_vcat T x y)" "T[a b; c d]" => "(typed_vcat T (row a b) (row c d))" @@ -393,8 +393,8 @@ tests = [ "(a=1).()" => "(dotcall (parens (= a 1)))" => Expr(:., Expr(:(=), :a, 1), Expr(:tuple)) "f. (x)" => "(dotcall f (error-t) x)" # Other dotted syntax - "A.:+" => "(. A (quote +))" - "A.: +" => "(. A (quote (error-t) +))" + "A.:+" => "(. A (quote-: +))" + "A.: +" => "(. A (quote-: (error-t) +))" "f.\$x" => "(. f (inert (\$ x)))" "f.\$(x+y)" => "(. f (inert (\$ (parens (call-i x + y)))))" "A.\$B.@x" => "(macrocall (. (. A (inert (\$ B))) (quote @x)))" @@ -648,9 +648,9 @@ tests = [ "import \$A.@x" => "(import (importpath (\$ A) @x))" "import A.B" => "(import (importpath A B))" "import A.B.C" => "(import (importpath A B C))" - "import A.:+" => "(import (importpath A (quote +)))" - "import A.(:+)" => "(import (importpath A (parens (quote +))))" - "import A.:(+)" => "(import (importpath A (quote (parens +))))" + "import A.:+" => "(import (importpath A (quote-: +)))" + "import A.(:+)" => "(import (importpath A (parens (quote-: +))))" + "import A.:(+)" => "(import (importpath A (quote-: (parens +))))" "import A.==" => "(import (importpath A ==))" "import A.⋆.f" => "(import (importpath A ⋆ f))" "import A..." => "(import (importpath A ..))" @@ -712,13 +712,13 @@ tests = [ "''" => "(char (error))" "'" => "(char (error))" # symbol/expression quote - ":foo" => "(quote foo)" + ":foo" => "(quote-: foo)" # Literal colons ":)" => ":" ": end" => ":" # Whitespace after quoting colon - ": foo" => "(quote (error-t) foo)" - ":\nfoo" => "(quote (error-t) foo)" + ": foo" => "(quote-: (error-t) foo)" + ":\nfoo" => "(quote-: (error-t) foo)" # plain equals "=" => "(error =)" # Identifiers @@ -745,12 +745,12 @@ tests = [ "+" => "+" "~" => "~" # Quoted syntactic operators allowed - ":+=" => "(quote +=)" - ":.=" => "(quote .=)" + ":+=" => "(quote-: +=)" + ":.=" => "(quote-: .=)" # Special symbols quoted - ":end" => "(quote end)" - ":(end)" => "(quote (parens (error-t)))" - ":<:" => "(quote <:)" + ":end" => "(quote-: end)" + ":(end)" => "(quote-: (parens (error-t)))" + ":<:" => "(quote-: <:)" # unexpect = "=" => "(error =)" # parse_cat @@ -782,9 +782,9 @@ tests = [ "[x=1, y=2]" => "(vect (= x 1) (= y 2))" "[x=1, ; y=2]" => "(vect (= x 1) (parameters (= y 2)))" # parse_paren - ":(=)" => "(quote (parens =))" - ":(::)" => "(quote (parens ::))" - ":(::\n)" => "(quote (parens ::))" + ":(=)" => "(quote-: (parens =))" + ":(::)" => "(quote-: (parens ::))" + ":(::\n)" => "(quote-: (parens ::))" "(function f \n end)" => "(parens (function f))" # braces "{x y}" => "(bracescat (row x y))" From 89641233d48cc371719d6a64c8be31520bec799a Mon Sep 17 00:00:00 2001 From: c42f Date: Fri, 21 Apr 2023 16:54:47 +1000 Subject: [PATCH 0625/1109] Always parse standalone dotted operators as `(. op)` (JuliaLang/JuliaSyntax.jl#240) Change the parsing of standalone dotted operators such as `.+` to always parse as `(. +)`, regardless of where they appear syntactically. This removes the last of the cases where operators were coalesced with their leading `.` into a single symbol. For example, `.+(x,y)` is now parsed as `(call (. +) x y)`, rather than `(call .+ x y)`. The reference parser concatenates the `.` and the operator in some cases, so for compatibility we convert back to that less consistent form during `Expr` conversion. --- JuliaSyntax/src/expr.jl | 25 +++++++++---- JuliaSyntax/src/parser.jl | 74 ++++++++++++++++---------------------- JuliaSyntax/test/expr.jl | 29 ++++++++++++--- JuliaSyntax/test/parser.jl | 15 ++++---- 4 files changed, 83 insertions(+), 60 deletions(-) diff --git a/JuliaSyntax/src/expr.jl b/JuliaSyntax/src/expr.jl index e424aa038afa4..c624387ccb0bc 100644 --- a/JuliaSyntax/src/expr.jl +++ b/JuliaSyntax/src/expr.jl @@ -31,7 +31,7 @@ function reorder_parameters!(args, params_pos) end function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, - eq_to_kw=false, map_kw_in_params=false) + eq_to_kw=false, map_kw_in_params=false, coalesce_dot=false) nodekind = kind(node) if !haschildren(node) val = node.val @@ -150,8 +150,13 @@ function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, args[2*i-1] = source_location(LineNumberNode, n.source, n.position) end eq_to_kw = eq_to_kw_in_call && i > 1 || eq_to_kw_all + coalesce_dot_with_ops = i==1 && + (nodekind in KSet"call dotcall curly" || + nodekind == K"quote" && flags(node) == COLON_QUOTE) args[insert_linenums ? 2*i : i] = - _to_expr(n, eq_to_kw=eq_to_kw, map_kw_in_params=in_vcbr) + _to_expr(n, eq_to_kw=eq_to_kw, + map_kw_in_params=in_vcbr, + coalesce_dot=coalesce_dot_with_ops) end if nodekind == K"block" && has_flags(node, PARENS_FLAG) popfirst!(args) @@ -190,6 +195,10 @@ function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, args[1] = Symbol(".", args[1]) end end + elseif headsym === :. && length(args) == 1 && + is_operator(kind(node[1])) && + (coalesce_dot || is_syntactic_operator(kind(node[1]))) + return Symbol(".", args[1]) elseif headsym in (:ref, :curly) # Move parameters blocks to args[2] reorder_parameters!(args, 2) @@ -303,11 +312,15 @@ function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, elseif headsym === :module pushfirst!(args, !has_flags(node, BARE_MODULE_FLAG)) pushfirst!(args[3].args, loc) - elseif headsym === :inert || (headsym === :quote && length(args) == 1 && - !(a1 = only(args); a1 isa Expr || a1 isa QuoteNode || - a1 isa Bool # <- compat hack, Julia 1.4+ - )) + elseif headsym === :inert return QuoteNode(only(args)) + elseif (headsym === :quote && length(args) == 1) + a1 = only(args) + if !(a1 isa Expr || a1 isa QuoteNode || a1 isa Bool) + # Flisp parser does an optimization here: simple values are stored + # as inert QuoteNode rather than in `Expr(:quote)` quasiquote + return QuoteNode(a1) + end elseif headsym === :do @check length(args) == 3 return Expr(:do, args[1], Expr(:->, args[2], args[3])) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 2a84a4643b54f..8f4ff73cf5030 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -1151,7 +1151,7 @@ function parse_unary(ps::ParseState) bump_trivia(ps) op_t = peek_token(ps) op_k = kind(op_t) - if ( + if ( !is_operator(op_k) || is_word_operator(op_k) || (op_k in KSet": ' .'") || @@ -1194,19 +1194,13 @@ function parse_unary(ps::ParseState) end end if is_closing_token(ps, k2) || k2 in KSet"NewlineWs =" - if is_dotted(op_t) - # Standalone dotted operators are parsed as (|.| op) - # .+ ==> (. +) - # .+\n ==> (. +) - # .+ = ==> (. +) - # .+) ==> (. +) - # .& ==> (. &) - bump_dotsplit(ps, emit_dot_node=true) - else - # Standalone non-dotted operators - # +) ==> + - bump(ps) - end + # Standalone operators parsed as `op` or `(. op)` + # +) ==> + + # +\n ==> + + # + = ==> + + # .+ ==> (. +) + # .& ==> (. &) + parse_atom(ps) elseif k2 == K"{" || (!is_unary_op(op_t) && k2 == K"(") # Call with type parameters or non-unary prefix call # +{T}(x::T) ==> (call (curly + T) (:: x T)) @@ -1267,28 +1261,13 @@ function parse_unary(ps::ParseState) emit(ps, mark, op_k) reset_node!(ps, op_pos, flags=TRIVIA_FLAG) else - if is_dotted(op_t) - # Ugly hack to undo the split in bump_dotsplit - # .+(a,) ==> (call .+ a) - reset_node!(ps, op_pos, kind=K"TOMBSTONE") - tb1 = ps.stream.tokens[op_pos.token_index-1] - ps.stream.tokens[op_pos.token_index-1] = - SyntaxToken(SyntaxHead(K"TOMBSTONE", EMPTY_FLAGS), - K"TOMBSTONE", tb1.preceding_whitespace, - tb1.next_byte-1) - tb0 = ps.stream.tokens[op_pos.token_index] - ps.stream.tokens[op_pos.token_index] = - SyntaxToken(SyntaxHead(kind(tb0), flags(tb0)), - tb0.orig_kind, tb0.preceding_whitespace, - tb0.next_byte) - end emit(ps, mark, K"call") end parse_call_chain(ps, mark) parse_factor_with_initial_ex(ps, mark) else # Unary function calls with brackets as grouping, not an arglist - # .+(a) ==> (dotcall-pre (. +) (parens a)) + # .+(a) ==> (dotcall-pre + (parens a)) if opts.is_block # +(a;b) ==> (call-pre + (block-p a b)) emit(ps, mark_before_paren, K"block", PARENS_FLAG) @@ -1627,6 +1606,10 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) parse_atom(ps) emit(ps, m, K"$") macro_name_position = position(ps) + # We need `inert` rather than `quote` here for subtle reasons: + # We need the expression expander to "see through" the quote + # around the `$x` in `:(f.$x)`, so that the `$x` is expanded + # even though it's double quoted. emit(ps, m, K"inert") emit(ps, mark, K".") elseif k == K"@" @@ -2307,6 +2290,10 @@ function parse_do(ps::ParseState, mark) emit(ps, mark, K"do") end +function _is_valid_macro_name(peektok) + return !is_error(peektok.kind) && (peektok.is_leaf || peektok.kind == K"var") +end + function fix_macro_name_kind!(ps::ParseState, macro_name_position, name_kind=nothing) k = peek_behind(ps, macro_name_position).kind if k == K"var" @@ -2321,7 +2308,8 @@ function fix_macro_name_kind!(ps::ParseState, macro_name_position, name_kind=not return end if isnothing(name_kind) - name_kind = (k == K"Identifier") ? K"MacroName" : K"error" + name_kind = _is_valid_macro_name(peek_behind(ps, macro_name_position)) ? + K"MacroName" : K"error" if name_kind == K"error" # TODO: This isn't quite accurate emit_diagnostic(ps, macro_name_position, macro_name_position, @@ -2343,11 +2331,11 @@ function parse_macro_name(ps::ParseState) bump_disallowed_space(ps) mark = position(ps) parse_atom(ps, false) - kb = peek_behind(ps, position(ps)).kind - if kb == K"parens" + b = peek_behind(ps, position(ps)) + if b.kind == K"parens" emit_diagnostic(ps, mark, warning="parenthesizing macro names is unnecessary") - elseif !(kb in KSet"Identifier var") + elseif !_is_valid_macro_name(b) # @[x] y z ==> (macrocall (error (vect x)) y z) emit(ps, mark, K"error", error="invalid macro name") end @@ -3448,20 +3436,18 @@ function parse_atom(ps::ParseState, check_identifiers=true) # x₁ ==> x₁ bump(ps) elseif is_operator(leading_kind) + # + ==> + + # .+ ==> (. +) + # .= ==> (. =) + bump_dotsplit(ps, emit_dot_node=true) if check_identifiers && !is_valid_identifier(leading_kind) # += ==> (error +=) - # .+= ==> (error .+=) - bump(ps, error="invalid identifier") + # ? ==> (error ?) + # .+= ==> (error (. +=)) + emit(ps, mark, K"error", error="invalid identifier") else - # + ==> + - # ~ ==> ~ # Quoted syntactic operators allowed - # :+= ==> (quote +=) - # :.= ==> (quote .=) - # Remap the kind here to K"Identifier", as operators parsed in this - # branch should be in "identifier-like" positions (I guess this is - # correct? is it convenient?) - bump(ps, remap_kind=K"Identifier") + # :+= ==> (quote-: +=) end elseif is_keyword(leading_kind) if leading_kind == K"var" && (t = peek_token(ps,2); diff --git a/JuliaSyntax/test/expr.jl b/JuliaSyntax/test/expr.jl index 31d466cccb660..b7583fb0571a1 100644 --- a/JuliaSyntax/test/expr.jl +++ b/JuliaSyntax/test/expr.jl @@ -241,16 +241,36 @@ Expr(:tuple, Expr(:parameters, Expr(:kw, :a, 1))) end - @testset "dotcall" begin + @testset "dotcall / dotted operators" begin @test parsestmt(Expr, "f.(x,y)") == Expr(:., :f, Expr(:tuple, :x, :y)) @test parsestmt(Expr, "f.(x=1)") == Expr(:., :f, Expr(:tuple, Expr(:kw, :x, 1))) @test parsestmt(Expr, "x .+ y") == Expr(:call, Symbol(".+"), :x, :y) @test parsestmt(Expr, "(x=1) .+ y") == Expr(:call, Symbol(".+"), Expr(:(=), :x, 1), :y) @test parsestmt(Expr, "a .< b .< c") == Expr(:comparison, :a, Symbol(".<"), :b, Symbol(".<"), :c) - @test parsestmt(Expr, ".*(x)") == Expr(:call, Symbol(".*"), :x) - @test parsestmt(Expr, ".+(x)") == Expr(:call, Symbol(".+"), :x) - @test parsestmt(Expr, ".+x") == Expr(:call, Symbol(".+"), :x) + @test parsestmt(Expr, ".*(x)") == Expr(:call, Symbol(".*"), :x) + @test parsestmt(Expr, ".+(x)") == Expr(:call, Symbol(".+"), :x) + @test parsestmt(Expr, ".+x") == Expr(:call, Symbol(".+"), :x) + @test parsestmt(Expr, "(.+)(x)") == Expr(:call, Expr(:., :+), :x) + @test parsestmt(Expr, "(.+).(x)") == Expr(:., Expr(:., :+), Expr(:tuple, :x)) + + @test parsestmt(Expr, ".+") == Expr(:., :+) + @test parsestmt(Expr, ":.+") == QuoteNode(Symbol(".+")) + @test parsestmt(Expr, ":(.+)") == Expr(:quote, (Expr(:., :+))) + @test parsestmt(Expr, "quote .+ end") == Expr(:quote, + Expr(:block, + LineNumberNode(1), + Expr(:., :+))) + @test parsestmt(Expr, ".+{x}") == Expr(:curly, Symbol(".+"), :x) + + # Quoted syntactic ops act different when in parens + @test parsestmt(Expr, ":.=") == QuoteNode(Symbol(".=")) + @test parsestmt(Expr, ":(.=)") == QuoteNode(Symbol(".=")) + + # A few other cases of bare dotted ops + @test parsestmt(Expr, "f(.+)") == Expr(:call, :f, Expr(:., :+)) + @test parsestmt(Expr, "(a, .+)") == Expr(:tuple, :a, Expr(:., :+)) + @test parsestmt(Expr, "A.:.+") == Expr(:., :A, QuoteNode(Symbol(".+"))) end @testset "where" begin @@ -361,6 +381,7 @@ end @testset "import" begin + @test parsestmt(Expr, "import A") == Expr(:import, Expr(:., :A)) @test parsestmt(Expr, "import A.(:b).:c: x.:z", ignore_warnings=true) == Expr(:import, Expr(Symbol(":"), Expr(:., :A, :b, :c), Expr(:., :x, :z))) # Stupid parens and quotes in import paths diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 248528c662e83..efc34473a30ce 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -222,10 +222,10 @@ tests = [ # Call with type parameters or non-unary prefix call "+{T}(x::T)" => "(call (curly + T) (::-i x T))" "*(x)" => "(call * x)" - ".*(x)" => "(call .* x)" + ".*(x)" => "(call (. *) x)" # Prefix function calls for operators which are both binary and unary "+(a,b)" => "(call + a b)" - ".+(a,)" => "(call .+ a)" + ".+(a,)" => "(call (. +) a)" "(.+)(a)" => "(call (parens (. +)) a)" "+(a=1,)" => "(call + (= a 1))" => Expr(:call, :+, Expr(:kw, :a, 1)) "+(a...)" => "(call + (... a))" @@ -304,7 +304,7 @@ tests = [ # parse_call "f(x)" => "(call f x)" "\$f(x)" => "(call (\$ f) x)" - ".&(x,y)" => "(call .& x y)" + ".&(x,y)" => "(call (. &) x y)" # parse_call_chain "f(a).g(b)" => "(call (. (call f a) (quote g)) b)" "\$A.@x" => "(macrocall (. (\$ A) (quote @x)))" @@ -394,6 +394,7 @@ tests = [ "f. (x)" => "(dotcall f (error-t) x)" # Other dotted syntax "A.:+" => "(. A (quote-: +))" + "A.:.+" => "(. A (quote-: (. +)))" "A.: +" => "(. A (quote-: (error-t) +))" "f.\$x" => "(. f (inert (\$ x)))" "f.\$(x+y)" => "(. f (inert (\$ (parens (call-i x + y)))))" @@ -738,15 +739,17 @@ tests = [ """var"x"end""" => "(var x (error-t))" """var"x"1""" => "(var x (error-t))" """var"x"y""" => "(var x (error-t))" - # Syntactic operators + # Standalone syntactic operators are errors "+=" => "(error +=)" - ".+=" => "(error .+=)" + "?" => "(error ?)" + ".+=" => "(error (. +=))" # Normal operators "+" => "+" "~" => "~" # Quoted syntactic operators allowed ":+=" => "(quote-: +=)" - ":.=" => "(quote-: .=)" + ":.=" => "(quote-: (. =))" + ":.&&" => "(quote-: (. &&))" # Special symbols quoted ":end" => "(quote-: end)" ":(end)" => "(quote-: (parens (error-t)))" From 239cffe96f1bd07c1718c9b4939dfaab1109d80f Mon Sep 17 00:00:00 2001 From: c42f Date: Sun, 23 Apr 2023 15:43:33 +1000 Subject: [PATCH 0626/1109] Big list of AST difference between Expr & GreenNode (JuliaLang/JuliaSyntax.jl#246) Copy this list from issue JuliaLang/JuliaSyntax.jl#88 to the docs. --- JuliaSyntax/README.md | 88 +++++++++++++++++++++++++++++++++---------- 1 file changed, 69 insertions(+), 19 deletions(-) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index b84f91ce7a9ed..e858cb595af7c 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -366,10 +366,75 @@ Expr(:ncat) ## Tree differences between GreenNode and Expr -Wherever possible, the tree structure of `GreenNode`/`SyntaxNode` is 1:1 with -`Expr`. There are, however, some exceptions. First, `GreenNode` inherently -stores source position, so there's no need for the `LineNumberNode`s used by -`Expr`. There's also a small number of other differences +The tree structure of `GreenNode`/`SyntaxNode` is similar to Julia's `Expr` +data structure but there are various differences: + +### Source ordered children + +The children of our trees are strictly in source order. This has many +consequences in places where `Expr` reorders child expressions. + +* Infix and postfix operator calls have the operator name in the *second* child position. `a + b` is parsed as `(call-i a + b)` - where the infix `-i` flag indicates infix child position - rather than `Expr(:call, :+, :a, :b)`. +* Flattened generators are represented in source order + +### No `LineNumberNode`s + +Our syntax nodes inherently stores source position, so there's no need for the +`LineNumberNode`s used by `Expr`. + +### More consistent / less redundant `block`s + +Sometimes `Expr` needs redundant block constructs to store `LineNumberNode`s, +but we don't need these. Also in cases which do use blocks we try to use them +consistently. + +* No block is used on the right hand side of short form function syntax +* No block is used for the conditional in `elseif` +* No block is used for the body of anonymous functions after the `->` +* `let` argument lists always use a block regardless of number or form of bindings + +### Faithful representation of the source text / avoid premature lowering + +Some cases of "premature lowering" have been removed, preferring to represent +the source text more closely. + +* `K"macrocall"` - allow users to easily distinguish macrocalls with parentheses from those without them (#218) +* Grouping parentheses are represented with a node of kind `K"parens"` (#222) +* Ternary syntax is not immediately lowered to an `if` node: `a ? b : c` parses as `(? a b c)` rather than `Expr(:if, :a, :b, :c)` (#85) +* `global const` and `const global` are not normalized by the parser. This is done in `Expr` conversion (#130) +* The AST for `do` is flatter and not lowered to a lambda by the parser: `f(x) do y ; body end` is parsed as `(do (call f x) (tuple y) (block body))` (#98) +* `@.` is not lowered to `@__dot__` inside the parser (#146) +* Docstrings use the `K"doc"` kind, and are not lowered to `Core.@doc` until later (#217) + +### Containers for string-like constructs + +String-like constructs always come within a container node, not as a single +token. These are useful for tooling which works with the tokens of the source +text. Also separating the delimiters from the text they delimit removes a whole +class of tokenization errors and lets the parser deal with them. + +* string always use `K"string"` to wrap strings, even when they only contain a single string chunk (#94) +* char literals are wrapped in the `K"char"` kind, containing the character literal string along with their delimiters (#121) +* backticks use the `K"cmdstring"` kind +* `var""` syntax uses `K"var"` as the head (#127) +* The parser splits triple quoted strings into string chunks interspersed with whitespace trivia + +### Improvements for AST inconsistencies + +* Dotted call syntax like `f.(a,b)` and `a .+ b` has been made consistent with the `K"dotcall"` head (#90) +* Standalone dotted operators are always parsed as `(. op)`. For example `.*(x,y)` is parsed as `(call (. *) x y)` (#240) +* The `K"="` kind is used for keyword syntax rather than `kw`, to avoid various inconsistencies and ambiguities (#103) +* Unadorned postfix adjoint is parsed as `call` rather than as a syntactic operator for consistency with suffixed versions like `x'ᵀ` (#124) + +### Improvements to awkward AST forms + +* Frakentuples with multiple parameter blocks like `(a=1, b=2; c=3; d=4)` are flattened into the parent tuple instead of using nested `K"parameters"` nodes (#133) +* Using `try catch else finally end` is parsed with `K"catch"` `K"else"` and `K"finally"` children to avoid the awkwardness of the optional child nodes in the `Expr` representation (#234) +* The dotted import path syntax as in `import A.b.c` is parsed with a `K"importpath"` kind rather than `K"."`, because a bare `A.b.c` has a very different nested/quoted expression representation (#244) +* We use flags rather than child nodes to represent the difference between `struct` and `mutable struct`, `module` and `baremodule` (#220) + + +## More detail on tree differences ### Flattened generators @@ -460,21 +525,6 @@ julia> text = "x = \"\"\"\n \$a\n b\"\"\"" 21:23 │ """ "\"\"\"" ``` -### Less redundant `block`s - -Sometimes `Expr` needs to contain redundant block constructs in order to have a -place to store `LineNumberNode`s, but we don't need these and avoid adding them -in several cases: -* The right hand side of short form function syntax -* The conditional in `elseif` -* The body of anonymous functions after the `->` - -### Distinct conditional ternary expression - -The syntax `a ? b : c` is the same as `if a b else c` in `Expr` so macros can't -distinguish these cases. Instead, we use a distinct expression head `K"?"` and -lower to `Expr(:if)` during `Expr` conversion. - ### String nodes always wrapped in `K"string"` or `K"cmdstring"` All strings are surrounded by a node of kind `K"string"`, even non-interpolated From b46c85f6bbcccb9b7bac0996067f58c410f19019 Mon Sep 17 00:00:00 2001 From: c42f Date: Tue, 25 Apr 2023 07:26:25 +1000 Subject: [PATCH 0627/1109] Fix for detecting call heads in function signatures (JuliaLang/JuliaSyntax.jl#247) Discovered parsing packages in General --- JuliaSyntax/src/parse_stream.jl | 25 ++++++++++++++++--------- JuliaSyntax/src/parser.jl | 6 +++++- JuliaSyntax/test/parser.jl | 1 + 3 files changed, 22 insertions(+), 10 deletions(-) diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index 2dd76c973b70a..447e18ec52706 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -527,20 +527,23 @@ Retroactively inspecting or modifying the parser's output can be confusing, so using this function should be avoided where possible. """ function peek_behind(stream::ParseStream, pos::ParseStreamPosition) - if token_is_last(stream, pos) && !isempty(stream.tokens) + if token_is_last(stream, pos) && pos.token_index > 0 t = stream.tokens[pos.token_index] return (kind=kind(t), flags=flags(t), orig_kind=t.orig_kind, is_leaf=true) - elseif !isempty(stream.ranges) + elseif !isempty(stream.ranges) && pos.range_index > 0 r = stream.ranges[pos.range_index] return (kind=kind(r), flags=flags(r), orig_kind=K"None", is_leaf=false) else - internal_error("Can't peek behind at start of stream") + return (kind=K"None", + flags=EMPTY_FLAGS, + orig_kind=K"None", + is_leaf=true) end end @@ -585,7 +588,11 @@ function first_child_position(stream::ParseStream, pos::ParseStreamPosition) end end -function peek_behind(stream::ParseStream; skip_trivia::Bool=true) +# Get last position in stream "of interest", skipping +# * parens nodes +# * deleted tokens (TOMBSTONE) +# * whitespace (if skip_trivia=true) +function peek_behind_pos(stream::ParseStream; skip_trivia::Bool=true) token_index = lastindex(stream.tokens) range_index = lastindex(stream.ranges) while range_index >= firstindex(stream.ranges) && @@ -601,11 +608,11 @@ function peek_behind(stream::ParseStream; skip_trivia::Bool=true) end token_index -= 1 end - if token_index > 0 - return peek_behind(stream, ParseStreamPosition(token_index, range_index)) - else - internal_error("Can't peek behind at start of stream") - end + return ParseStreamPosition(token_index, range_index) +end + +function peek_behind(stream::ParseStream; skip_trivia::Bool=true) + peek_behind(stream, peek_behind_pos(stream; skip_trivia=skip_trivia)) end #------------------------------------------------------------------------------- diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 8f4ff73cf5030..8a880d203a1a8 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -80,6 +80,10 @@ function peek_behind(ps::ParseState, args...; kws...) peek_behind(ps.stream, args...; kws...) end +function peek_behind_pos(ps::ParseState, args...; kws...) + peek_behind_pos(ps.stream, args...; kws...) +end + function bump(ps::ParseState, flags=EMPTY_FLAGS; skip_newlines=nothing, kws...) skip_nl = isnothing(skip_newlines) ? ps.whitespace_newline : skip_newlines bump(ps.stream, flags; skip_newlines=skip_nl, kws...) @@ -306,7 +310,7 @@ end # The expression is a call after stripping `where` and `::` function was_eventually_call(ps::ParseState) stream = ps.stream - p = position(ps) + p = peek_behind_pos(ps) while true b = peek_behind(stream, p) if b.kind == K"call" diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index efc34473a30ce..d31733fa69b58 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -589,6 +589,7 @@ tests = [ "function (f() where T) where U end" => "(function (where (parens (where (call f) T)) U) (block))" "function (f()::S) end"=> "(function (parens (::-i (call f) S)) (block))" => Expr(:function, Expr(:(::), Expr(:call, :f), :S), Expr(:block)) "function ((f()::S) where T) end" => "(function (parens (where (parens (::-i (call f) S)) T)) (block))" + "function (x*y ) end" => "(function (parens (call-i x * y)) (block))" # body "function f() \n a \n b end" => "(function (call f) (block a b))" "function f() end" => "(function (call f) (block))" From 6e3e9bc5703a2cd4a2a3b556855a30ad70a751b7 Mon Sep 17 00:00:00 2001 From: c42f Date: Tue, 25 Apr 2023 22:25:39 +1000 Subject: [PATCH 0628/1109] Big cleanup of test case reduction utilities (JuliaLang/JuliaSyntax.jl#250) * Text-based test reduction moved into test_utils * Remove some obsolete utils * Rename reduction utils to something more sensible * List actual reduced failures in package parsing tests --- JuliaSyntax/test/fuzz_test.jl | 49 +----- JuliaSyntax/test/parse_packages.jl | 28 ++-- JuliaSyntax/test/runtests.jl | 1 + JuliaSyntax/test/test_utils.jl | 214 ++++++++++-------------- JuliaSyntax/tools/check_all_packages.jl | 22 +-- 5 files changed, 110 insertions(+), 204 deletions(-) diff --git a/JuliaSyntax/test/fuzz_test.jl b/JuliaSyntax/test/fuzz_test.jl index 2bf2b5bf5d092..f792b2c68c230 100644 --- a/JuliaSyntax/test/fuzz_test.jl +++ b/JuliaSyntax/test/fuzz_test.jl @@ -885,51 +885,6 @@ const cutdown_tokens = [ #------------------------------------------------------------------------------- -function parser_throws_exception(str) - try - JuliaSyntax.parseall(JuliaSyntax.SyntaxNode, str, ignore_errors=true) - false - catch - true - end -end - -""" -Reduce test case via combination of bisection and random deletion. - -This is suited to randomly generated strings, but it's surprisingly effective -for code-like strings as well. -""" -function rand_reduce(str, parse_failure=parser_throws_exception) - while true - if length(str) <= 1 - return str - end - m1 = thisind(str, length(str)÷2) - m2 = nextind(str, m1) - if parse_failure(str[1:m1]) - str = str[1:m1] - elseif parse_failure(str[m2:end]) - str = str[m2:end] - else - chunklen = clamp(length(str)÷10, 1, 10) - reduced = false - for i = 1:100 - m = thisind(str, rand(1:length(str)-chunklen)) - s = str[1:m]*str[nextind(str, m+chunklen):end] - if parse_failure(s) - str = s - reduced = true - break - end - end - if !reduced - return str - end - end - end -end - # The parser should never throw an exception. To test whether this is true, # try passing randomly generated bad input data into it. function _fuzz_test(bad_input_iter) @@ -939,7 +894,7 @@ function _fuzz_test(bad_input_iter) JuliaSyntax.parseall(JuliaSyntax.SyntaxNode, str, ignore_errors=true); catch exc !(exc isa InterruptException) || rethrow() - rstr = rand_reduce(str) + rstr = reduce_text(str, parser_throws_exception) @error "Parser threw exception" rstr exception=current_exceptions() push!(error_strings, rstr) end @@ -1005,7 +960,7 @@ Fuzz test parser against randomly generated binary strings """ function fuzz_binary(nbytes, N) bad_strs = _fuzz_test(String(rand(UInt8, nbytes)) for _ in 1:N) - rand_reduce.(bad_strs) + reduce_text.(bad_strs, parser_throws_exception) end """ diff --git a/JuliaSyntax/test/parse_packages.jl b/JuliaSyntax/test/parse_packages.jl index 61aad92247b7a..d3bf9143fe367 100644 --- a/JuliaSyntax/test/parse_packages.jl +++ b/JuliaSyntax/test/parse_packages.jl @@ -25,23 +25,21 @@ end base_tests_path = joinpath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "test") @testset "Parse Base tests at $base_tests_path" begin - for f in find_source_in_path(base_tests_path) - @testset "Parse $(relpath(f, base_tests_path))" begin - # In julia-1.6, test/copy.jl had spurious syntax which became the - # multidimensional array syntax in 1.7. - endswith(f, "copy.jl") && v"1.6" <= VERSION < v"1.7" && continue - - # syntax.jl has some intentially weird syntax which we parse - # differently than the flisp parser, and some cases which we've - # decided are syntax errors. - endswith(f, "syntax.jl") && continue + test_parse_all_in_path(base_tests_path) do f + # In julia-1.6, test/copy.jl had spurious syntax which became the + # multidimensional array syntax in 1.7. + if endswith(f, "copy.jl") && v"1.6" <= VERSION < v"1.7" + return false + end - @test parsers_agree_on_file(f) - # TODO: - # exprs_equal = endswith(f, "syntax.jl") ? - # exprs_roughly_equal : exprs_equal_no_linenum - # @test parsers_agree_on_file(f; exprs_equal=exprs_equal) + # syntax.jl has some intentially weird syntax which we parse + # differently than the flisp parser, and some cases which we've + # decided are syntax errors. + if endswith(f, "syntax.jl") + return false end + + return true end end diff --git a/JuliaSyntax/test/runtests.jl b/JuliaSyntax/test/runtests.jl index 9bb664f1e025f..b43cbf590f513 100644 --- a/JuliaSyntax/test/runtests.jl +++ b/JuliaSyntax/test/runtests.jl @@ -8,6 +8,7 @@ using JuliaSyntax: GreenNode, SyntaxNode, children, child, setchild!, SyntaxHead include("test_utils.jl") +include("fuzz_test.jl") # Tests for the test_utils go here to allow the utils to be included on their # own without invoking the tests. diff --git a/JuliaSyntax/test/test_utils.jl b/JuliaSyntax/test/test_utils.jl index 2d76cf0bdad99..94333f07c99ba 100644 --- a/JuliaSyntax/test/test_utils.jl +++ b/JuliaSyntax/test/test_utils.jl @@ -51,31 +51,6 @@ function remove_all_linenums!(ex) remove_macro_linenums!(ex) end -function show_expr_text_diff(io::IO, showfunc, e1, e2; context=2) - if Sys.isunix() - mktemp() do path1, io1 - mktemp() do path2, io2 - showfunc(io1, e1); close(io1) - showfunc(io2, e2); close(io2) - run(pipeline(ignorestatus(`diff -U$context --color=always $path1 $path2`), io)) - end - end - else - showfunc(io, ex) - println(io, "------------------------------------") - showfunc(io, e2) - end - nothing -end - -# Parse text with JuliaSyntax vs reference parser and show a textural diff of -# the resulting expressions -function parse_diff(text, showfunc=dump) - ex = parsestmt(Expr, text, filename="none") - fl_ex = fl_parse(text) - show_expr_text_diff(stdout, showfunc, ex, fl_ex) -end - function kw_to_eq(ex) return Meta.isexpr(ex, :kw) ? Expr(:(=), ex.args...) : ex end @@ -179,8 +154,7 @@ function exprs_roughly_equal(fl_ex, ex) return true end -function parsers_agree_on_file(filename; exprs_equal=exprs_equal_no_linenum, - show_diff=false) +function parsers_agree_on_file(filename; kws...) text = try read(filename, String) catch @@ -188,6 +162,10 @@ function parsers_agree_on_file(filename; exprs_equal=exprs_equal_no_linenum, # ignore this case. return true end + parsers_agree_on_file(text, filename; kws...) +end + +function parsers_agree_on_file(text, filename; exprs_equal=exprs_equal_no_linenum) fl_ex = fl_parseall(text, filename=filename) if Meta.isexpr(fl_ex, :toplevel) && !isempty(fl_ex.args) && Meta.isexpr(fl_ex.args[end], (:error, :incomplete)) @@ -199,12 +177,7 @@ function parsers_agree_on_file(filename; exprs_equal=exprs_equal_no_linenum, stream = ParseStream(text) parse!(stream) ex = build_tree(Expr, stream, filename=filename) - if show_diff && ex != fl_ex - show_expr_text_diff(stdout, show, ex, fl_ex) - end return !JuliaSyntax.any_error(stream) && exprs_equal(fl_ex, ex) - # Could alternatively use - # exprs_roughly_equal(fl_ex, ex) catch exc @error "Parsing failed" filename exception=current_exceptions() return false @@ -220,10 +193,29 @@ function find_source_in_path(basedir) src_list end -function test_parse_all_in_path(basedir) - for f in find_source_in_path(basedir) - @testset "Parse $(relpath(f, basedir))" begin - @test parsers_agree_on_file(f) +test_parse_all_in_path(basedir) = test_parse_all_in_path(path->true, basedir) + +function test_parse_all_in_path(path_allowed::Function, basedir) + for filepath in find_source_in_path(basedir) + if !path_allowed(filepath) + continue + end + @testset "Parse $(relpath(filepath, basedir))" begin + text = try + read(filepath, String) + catch + # Something went wrong reading the file. This isn't a parser failure so + # ignore this case. + continue + end + parsers_agree = parsers_agree_on_file(text, filepath, + exprs_equal=exprs_equal_no_linenum) + @test parsers_agree + if !parsers_agree + reduced_failures = reduce_text.(sourcetext.(reduce_tree(text)), + parsers_fuzzy_disagree) + @test reduced_failures == [] + end end end end @@ -247,7 +239,7 @@ function equals_flisp_parse(exprs_equal, tree) exprs_equal(fl_ex, ex) end -function _reduce_test(failing_subtrees, tree; exprs_equal=exprs_equal_no_linenum) +function _reduce_tree(failing_subtrees, tree; exprs_equal=exprs_equal_no_linenum) if equals_flisp_parse(exprs_equal, tree) return false end @@ -261,7 +253,7 @@ function _reduce_test(failing_subtrees, tree; exprs_equal=exprs_equal_no_linenum if is_trivia(child) || !haschildren(child) continue end - had_failing_subtrees |= _reduce_test(failing_subtrees, child; exprs_equal=exprs_equal) + had_failing_subtrees |= _reduce_tree(failing_subtrees, child; exprs_equal=exprs_equal) end end if !had_failing_subtrees @@ -271,113 +263,90 @@ function _reduce_test(failing_subtrees, tree; exprs_equal=exprs_equal_no_linenum end """ - reduce_test(text::AbstractString; exprs_equal=exprs_equal_no_linenum) - reduce_test(tree::SyntaxNode; exprs_equal=exprs_equal_no_linenum) + reduce_tree(text::AbstractString; exprs_equal=exprs_equal_no_linenum) + reduce_tree(tree::SyntaxNode; exprs_equal=exprs_equal_no_linenum) Select minimal subtrees of `text` or `tree` which are inconsistent between flisp and JuliaSyntax parsers. """ -function reduce_test(tree::SyntaxNode; kws...) +function reduce_tree(tree::SyntaxNode; kws...) subtrees = Vector{typeof(tree)}() - _reduce_test(subtrees, tree; kws...) + _reduce_tree(subtrees, tree; kws...) subtrees end -function reduce_test(text::AbstractString; kws...) +function reduce_tree(text::AbstractString; kws...) tree = parseall(SyntaxNode, text) - reduce_test(tree; kws...) + reduce_tree(tree; kws...) end -""" - format_reduced_tests(out::IO, file_content) - -Reduced the syntax (a string or SyntaxNode) from `file_content` into the -minimal failing subtrees of syntax and write the results to `out`. -""" -function format_reduced_tests(out::IO, file_content; filename=nothing) - println(out, "#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") - if !isnothing(filename) - println(out, "# $filename") - end - text = nothing +#------------------------------------------------------------------------------- +# Text-based test case reduction +function parser_throws_exception(text) try - rtrees = reduce_test(file_content) - for rt in rtrees - print(out, "\n#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n") - t = sourcetext(rt) - print(out, t) - if !endswith(t, '\n') - println(out) - end - end - catch exc - exc isa InterruptException && rethrow() - @error "Error reducing file" exception=current_exceptions() - print(out, file_content isa AbstractString ? - file_content : sourcetext(file_content)) + JuliaSyntax.parseall(JuliaSyntax.SyntaxNode, text, ignore_errors=true) + false + catch + true end end -function reduce_all_failures_in_path(basedir, outdir) - rm(outdir, force=true, recursive=true) - mkpath(outdir) - for filename in find_source_in_path(basedir) - if !parsers_agree_on_file(filename) - @info "Found failure" filename - bn,_ = splitext(basename(filename)) - outname = joinpath(outdir, "$bn.jl") - i = 1 - while isfile(outname) - outname = joinpath(outdir, "$bn-$i.jl") - i += 1 - end - open(outname, "w") do io - format_reduced_tests(io, read(filename, String), filename=filename) - end - end +function parsers_fuzzy_disagree(text::AbstractString) + fl_ex = fl_parseall(text, filename="none") + if Meta.isexpr(fl_ex, (:error,:incomplete)) || + (Meta.isexpr(fl_ex, :toplevel) && length(fl_ex.args) >= 1 && + Meta.isexpr(fl_ex.args[end], (:error,:incomplete))) + return false + end + try + ex = parseall(Expr, text, filename="none", ignore_errors=true) + return !exprs_roughly_equal(fl_ex, ex) + catch + @error "Reduction failed" text + return false end end -#------------------------------------------------------------------------------- -""" - itest_parse(production, code; version::VersionNumber=v"1.6") -Parse `code`, entering the recursive descent parser at the given function -`production`. This function shows the various tree representations on stdout -for debugging. """ -function itest_parse(production, code; version::VersionNumber=v"1.6") - stream = ParseStream(code; version=version) - production(JuliaSyntax.ParseState(stream)) - JuliaSyntax.validate_tokens(stream) - t = JuliaSyntax.build_tree(GreenNode, stream, wrap_toplevel_as_kind=K"toplevel") - - println(stdout, "# Code:\n$code\n") - - println(stdout, "# Green tree:") - show(stdout, MIME"text/plain"(), t, code) - JuliaSyntax.show_diagnostics(stdout, stream, code) +Reduce text of a test case via combination of bisection and random deletion. - s = SyntaxNode(SourceFile(code, filename="none"), t) - println(stdout, "\n# SyntaxNode:") - show(stdout, MIME"text/x.sexpression"(), s) - - ex = Expr(s) - println(stdout, "\n\n# Julia Expr:") - show(stdout, MIME"text/plain"(), ex) - - f_ex = JuliaSyntax.remove_linenums!(fl_parse(code, raise=false)) - if JuliaSyntax.remove_linenums!(ex) != f_ex - printstyled(stdout, "\n\n# flisp Julia Expr:\n", color=:red) - show(stdout, MIME"text/plain"(), f_ex) - - printstyled(stdout, "\n\n# Diff of AST dump:\n", color=:red) - show_expr_text_diff(stdout, show, ex, f_ex, context=10) - # return (ex, f_ex) - # return (code, stream, t, s, ex) +This is suited to randomly generated strings, but it's surprisingly effective +for code-like strings as well. +""" +function reduce_text(str, parse_differs) + while true + if length(str) <= 1 + return str + end + m1 = thisind(str, length(str)÷2) + m2 = nextind(str, m1) + if parse_differs(str[1:m1]) + str = str[1:m1] + elseif parse_differs(str[m2:end]) + str = str[m2:end] + else + chunklen = clamp(length(str)÷10, 1, 10) + reduced = false + for i = 1:100 + m = thisind(str, rand(1:length(str)-chunklen)) + m3 = nextind(str, m+chunklen) + if m3 == nextind(str, m) + continue + end + s = str[1:m]*str[m3:end] + if parse_differs(s) + str = s + reduced = true + break + end + end + if !reduced + return str + end + end end - nothing end function show_green_tree(code; version::VersionNumber=v"1.6") @@ -385,7 +354,6 @@ function show_green_tree(code; version::VersionNumber=v"1.6") sprint(show, MIME"text/plain"(), t, code) end - #------------------------------------------------------------------------------- # Parse s-expressions function parse_sexpr(code) diff --git a/JuliaSyntax/tools/check_all_packages.jl b/JuliaSyntax/tools/check_all_packages.jl index 81dae3ff575bc..ccadb6660d07d 100644 --- a/JuliaSyntax/tools/check_all_packages.jl +++ b/JuliaSyntax/tools/check_all_packages.jl @@ -17,22 +17,6 @@ mismatch_count = 0 t0 = time() exceptions = [] -function parsers_disagree(text::AbstractString) - fl_ex = fl_parseall(text, filename="none") - if Meta.isexpr(fl_ex, (:error,:incomplete)) || - (Meta.isexpr(fl_ex, :toplevel) && length(fl_ex.args) >= 1 && - Meta.isexpr(fl_ex.args[end], (:error,:incomplete))) - return false - end - try - ex = parseall(Expr, text, filename="none", ignore_errors=true) - return !exprs_roughly_equal(fl_ex, ex) - catch - @error "Reduction failed" text - return false - end -end - all_reduced_failures = String[] Logging.with_logger(TerminalLogger()) do @@ -49,13 +33,13 @@ Logging.with_logger(TerminalLogger()) do if !exprs_roughly_equal(e2, e1) mismatch_count += 1 failing_source = sprint(context=:color=>true) do io - for c in reduce_test(text) + for c in reduce_tree(text) JuliaSyntax.highlight(io, c.source, range(c), context_lines_inner=5) println(io, "\n") end end - reduced_failures = rand_reduce.(sourcetext.(reduce_test(text)), - parsers_disagree) + reduced_failures = reduce_text.(sourcetext.(reduce_tree(text)), + parsers_fuzzy_disagree) append!(all_reduced_failures, reduced_failures) @error("Parsers succeed but disagree", fpath, From ab826a918917c235c3d153c8740e523d2b4f74d8 Mon Sep 17 00:00:00 2001 From: c42f Date: Tue, 25 Apr 2023 22:25:47 +1000 Subject: [PATCH 0629/1109] Fix parsing of chained unary and unary type operators (JuliaLang/JuliaSyntax.jl#249) * Parsing `* <: A` doesn't crash the parser * `+ <: A` now parses correctly as `(call-pre + (<:-pre A))`. --- JuliaSyntax/src/parser.jl | 15 ++++++++++----- JuliaSyntax/test/parser.jl | 8 +++++++- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 8a880d203a1a8..e29e80e1aafba 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -1300,7 +1300,6 @@ function parse_unary(ps::ParseState) end end else - @assert !is_type_operator(op_t) # `<:x` handled in parse_unary_subtype if is_unary_op(op_t) # Normal unary calls # +x ==> (call-pre + x) @@ -1310,15 +1309,21 @@ function parse_unary(ps::ParseState) # -0x1 ==> (call-pre - 0x01) # - 2 ==> (call-pre - 2) # .-2 ==> (dotcall-pre - 2) - bump_dotsplit(ps, EMPTY_FLAGS) + op_pos = bump_dotsplit(ps, EMPTY_FLAGS) else # /x ==> (call-pre (error /) x) # +₁ x ==> (call-pre (error +₁) x) - # .<: x ==> (dotcall-pre (error .<:) x) - bump(ps, error="not a unary operator") + # .<: x ==> (dotcall-pre (error (. <:)) x) + bump_dotsplit(ps, EMPTY_FLAGS, emit_dot_node=true) + op_pos = emit(ps, mark, K"error", error="not a unary operator") end parse_unary(ps) - emit(ps, mark, is_dotted(op_t) ? K"dotcall" : K"call", PREFIX_OP_FLAG) + if is_type_operator(op_t) + reset_node!(ps, op_pos, flags=TRIVIA_FLAG) + emit(ps, mark, op_k, PREFIX_OP_FLAG) + else + emit(ps, mark, is_dotted(op_t) ? K"dotcall" : K"call", PREFIX_OP_FLAG) + end end end diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index d31733fa69b58..4ff9629730760 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -260,7 +260,7 @@ tests = [ # Not a unary operator "/x" => "(call-pre (error /) x)" "+₁ x" => "(call-pre (error +₁) x)" - ".<: x" => "(dotcall-pre (error .<:) x)" + ".<: x" => "(dotcall-pre (error (. <:)) x)" "?\"str\"" => """(call-pre (error ?) (string "str"))""" ], JuliaSyntax.parse_factor => [ @@ -283,6 +283,9 @@ tests = [ "<: x" => "(<:-pre x)" "<: <: x" => "(<:-pre (<:-pre x))" "<: A where B" => "(<:-pre (where A B))" + # FIXME: The following bizarre precedence seems broken, but is + # compatible with the reference parser (see #248) + "+ <: A where B" => "(where (call-pre + (<:-pre A)) B)" # Really for parse_where "x where \n {T}" => "(where x T)" "x where {T,S}" => "(where x T S)" @@ -291,6 +294,9 @@ tests = [ "x where T" => "(where x T)" "x where \n T" => "(where x T)" "x where T<:S" => "(where x (<: T S))" + # nested unary and unary-syntactic ops + "<: + <: + A" => "(<:-pre (call-pre + (<:-pre (call-pre + A))))" + "* <: A" => "(call-pre (error *) (<:-pre A))" ], JuliaSyntax.parse_unary_prefix => [ "&)" => "&" From 7d5c51141654698c62e67527c3209a2ca36f5ea0 Mon Sep 17 00:00:00 2001 From: c42f Date: Tue, 25 Apr 2023 22:31:14 +1000 Subject: [PATCH 0630/1109] Minor doc addition for `K"juxtapose"` and `return` --- JuliaSyntax/README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index e858cb595af7c..e91fc4bfa7ca3 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -405,6 +405,8 @@ the source text more closely. * The AST for `do` is flatter and not lowered to a lambda by the parser: `f(x) do y ; body end` is parsed as `(do (call f x) (tuple y) (block body))` (#98) * `@.` is not lowered to `@__dot__` inside the parser (#146) * Docstrings use the `K"doc"` kind, and are not lowered to `Core.@doc` until later (#217) +* Juxtaposition uses the `K"juxtapose"` kind rather than lowering immediately to `*` (#220) +* `return` without a value has zero children, rather than lowering to `return nothing` (#220) ### Containers for string-like constructs From 3e2323576af54eaddff3011c4d86eac3dbc6e133 Mon Sep 17 00:00:00 2001 From: c42f Date: Wed, 26 Apr 2023 16:48:24 +1000 Subject: [PATCH 0631/1109] Avoid crashing on ambiguous tokenization of char vs adjoint (JuliaLang/JuliaSyntax.jl#251) The tokenizer can't really solve this case, it needs help from the parser. ie, tighter coupling of tokenizer and parser. However the workarounds here avoid crashing the parser and even gives the right result in most cases. --- JuliaSyntax/src/parser.jl | 32 +++++++++++++++++++++++--------- JuliaSyntax/test/parser.jl | 20 ++++++++++++++------ 2 files changed, 37 insertions(+), 15 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index e29e80e1aafba..0b04160b52275 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -3389,8 +3389,25 @@ function parse_atom(ps::ParseState, check_identifiers=true) # char literal bump(ps, TRIVIA_FLAG) k = peek(ps) - if k == K"Char" - bump(ps) + if k == K"'" + # '' ==> (char (error)) + bump_invisible(ps, K"error", error="empty character literal") + elseif k == K"EndMarker" + # ' ==> (char (error)) + bump_invisible(ps, K"error", error="unterminated character literal") + else + if k == K"Char" + bump(ps) + else + # FIXME: This case is actually a tokenization error. + # Make a best-effort attempt to workaround this for now by + # remapping the kind. This needs to be fixed by rewinding the + # tokenizer's buffer and re-tokenizing the next token as a + # char. (A lot of work for a very obscure edge case) + # + # x in'c' ==> (call-i x in (char 'c')) + bump(ps, remap_kind=K"Char") + end if peek(ps) == K"'" # 'a' ==> (char 'a') # 'α' ==> (char 'α') @@ -3401,15 +3418,12 @@ function parse_atom(ps::ParseState, check_identifiers=true) bump_invisible(ps, K"error", TRIVIA_FLAG, error="unterminated character literal") end - elseif k == K"'" - # '' ==> (char (error)) - bump_invisible(ps, K"error", error="empty character literal") - else - # ' ==> (char (error)) - @check k == K"EndMarker" - bump_invisible(ps, K"error", error="unterminated character literal") end emit(ps, mark, K"char") + elseif leading_kind == K"Char" + # FIXME: This is a tokenization error and should be preceeded with + # K"'". However this workaround is better than emitting a bare Char. + bump(ps, remap_kind=K"Identifier") elseif leading_kind == K":" # symbol/expression quote # :foo ==> (quote-: foo) diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 4ff9629730760..8a53a86ee6686 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -948,19 +948,27 @@ tests = [ end end -parseall_test_specs = [ +parsestmt_test_specs = [ # whitespace before keywords in space-insensitive mode - "(y::\nif x z end)" => "(toplevel (parens (::-i y (if x (block z)))))" + "(y::\nif x z end)" => "(parens (::-i y (if x (block z))))" + # parsing of tricky primes + "x in'c'" => "(call-i x in (char 'c'))" + "1where'c'" => "(where 1 (char 'c'))" + ":+'y'" => "(juxtapose (call-post (quote-: +) ') (call-post y '))" # The following may not be ideal error recovery! But at least the parser # shouldn't crash - "@(x y)" => "(toplevel (macrocall (parens @x (error-t y))))" - "|(&\nfunction" => "(toplevel (call | (& (function (error (error)) (block (error)) (error-t))) (error-t)))" + "@(x y)" => "(macrocall (parens @x (error-t y)))" + "|(&\nfunction" => "(call | (& (function (error (error)) (block (error)) (error-t))) (error-t))" + + # The following are currently broken but at least the parser shouldn't + # crash. + "x in' '" => "(call-i x in (char (error))) (error-t ')" ] @testset "Parser does not crash on broken code" begin - @testset "$(repr(input))" for (input, output) in parseall_test_specs - test_parse(JuliaSyntax.parse_toplevel, input, output) + @testset "$(repr(input))" for (input, output) in parsestmt_test_specs + test_parse(JuliaSyntax.parse_stmts, input, output) end end From ce0ed21ae674db6de80e14e762d4d363849e6016 Mon Sep 17 00:00:00 2001 From: c42f Date: Wed, 26 Apr 2023 18:07:07 +1000 Subject: [PATCH 0632/1109] Move all Expr conversion tests to test/expr.jl (JuliaLang/JuliaSyntax.jl#253) --- JuliaSyntax/test/expr.jl | 108 ++++++++++++++++++++++++++++++++- JuliaSyntax/test/parser.jl | 119 +++++++++++++++++-------------------- 2 files changed, 160 insertions(+), 67 deletions(-) diff --git a/JuliaSyntax/test/expr.jl b/JuliaSyntax/test/expr.jl index b7583fb0571a1..47043253af2b3 100644 --- a/JuliaSyntax/test/expr.jl +++ b/JuliaSyntax/test/expr.jl @@ -70,6 +70,14 @@ Expr(:function, :f) @test parsestmt(Expr, "macro f end") == Expr(:macro, :f) + + # weird cases with extra parens + @test parsestmt(Expr, "function (f() where T) end") == + Expr(:function, Expr(:where, Expr(:call, :f), :T), + Expr(:block, LineNumberNode(1), LineNumberNode(1))) + @test parsestmt(Expr, "function (f()::S) end") == + Expr(:function, Expr(:(::), Expr(:call, :f), :S), + Expr(:block, LineNumberNode(1), LineNumberNode(1))) end @testset "elseif" begin @@ -180,10 +188,22 @@ Expr(:call, :f, Expr(:parameters, Expr(:kw, :b, 2))) @test parsestmt(Expr, "f(a=1; b=2)") == Expr(:call, :f, Expr(:parameters, Expr(:kw, :b, 2)), Expr(:kw, :a, 1)) - - # Infix call = is not :kw + @test parsestmt(Expr, "f(a; b; c)") == + Expr(:call, :f, Expr(:parameters, Expr(:parameters, :c), :b), :a) + @test parsestmt(Expr, "+(a=1,)") == + Expr(:call, :+, Expr(:kw, :a, 1)) + @test parsestmt(Expr, "(a=1)()") == + Expr(:call, Expr(:(=), :a, 1)) + + # Operator calls: = is not :kw @test parsestmt(Expr, "(x=1) != 2") == Expr(:call, :!=, Expr(:(=), :x, 1), 2) + @test parsestmt(Expr, "+(a=1)") == + Expr(:call, :+, Expr(:(=), :a, 1)) + @test parsestmt(Expr, "(a=1)'") == + Expr(Symbol("'"), Expr(:(=), :a, 1)) + @test parsestmt(Expr, "(a=1)'ᵀ") == + Expr(:call, Symbol("'ᵀ"), Expr(:(=), :a, 1)) # Dotcall @test parsestmt(Expr, "f.(a=1; b=2)") == @@ -232,7 +252,6 @@ Expr(:call, :f, Expr(:.=, :a, 1)) # = inside parens in calls and tuples - # (TODO: we should warn for these cases.) @test parsestmt(Expr, "f(((a = 1)))") == Expr(:call, :f, Expr(:kw, :a, 1)) @test parsestmt(Expr, "(((a = 1)),)") == @@ -244,6 +263,9 @@ @testset "dotcall / dotted operators" begin @test parsestmt(Expr, "f.(x,y)") == Expr(:., :f, Expr(:tuple, :x, :y)) @test parsestmt(Expr, "f.(x=1)") == Expr(:., :f, Expr(:tuple, Expr(:kw, :x, 1))) + @test parsestmt(Expr, "f.(a=1; b=2)") == + Expr(:., :f, Expr(:tuple, Expr(:parameters, Expr(:kw, :b, 2)), Expr(:kw, :a, 1))) + @test parsestmt(Expr, "(a=1).()") == Expr(:., Expr(:(=), :a, 1), Expr(:tuple)) @test parsestmt(Expr, "x .+ y") == Expr(:call, Symbol(".+"), :x, :y) @test parsestmt(Expr, "(x=1) .+ y") == Expr(:call, Symbol(".+"), Expr(:(=), :x, 1), :y) @test parsestmt(Expr, "a .< b .< c") == Expr(:comparison, :a, Symbol(".<"), @@ -273,6 +295,27 @@ @test parsestmt(Expr, "A.:.+") == Expr(:., :A, QuoteNode(Symbol(".+"))) end + @testset "let" begin + @test parsestmt(Expr, "let x=1\n end") == + Expr(:let, Expr(:(=), :x, 1), Expr(:block, LineNumberNode(2))) + @test parsestmt(Expr, "let x=1 ; end") == + Expr(:let, Expr(:(=), :x, 1), Expr(:block, LineNumberNode(1))) + @test parsestmt(Expr, "let x ; end") == + Expr(:let, :x, Expr(:block, LineNumberNode(1))) + @test parsestmt(Expr, "let x::1 ; end") == + Expr(:let, Expr(:(::), :x, 1), Expr(:block, LineNumberNode(1))) + @test parsestmt(Expr, "let x=1,y=2 end") == + Expr(:let, Expr(:block, Expr(:(=), :x, 1), Expr(:(=), :y, 2)), Expr(:block, LineNumberNode(1))) + @test parsestmt(Expr, "let x+=1 ; end") == + Expr(:let, Expr(:block, Expr(:+=, :x, 1)), Expr(:block, LineNumberNode(1))) + @test parsestmt(Expr, "let ; end") == + Expr(:let, Expr(:block), Expr(:block, LineNumberNode(1))) + @test parsestmt(Expr, "let ; body end") == + Expr(:let, Expr(:block), Expr(:block, LineNumberNode(1), :body)) + @test parsestmt(Expr, "let\na\nb\nend") == + Expr(:let, Expr(:block), Expr(:block, LineNumberNode(2), :a, LineNumberNode(3), :b)) + end + @testset "where" begin @test parsestmt(Expr, "A where {X, Y; Z}") == Expr(:where, :A, Expr(:parameters, :Z), :X, :Y) end @@ -289,6 +332,24 @@ # @__dot__ @test parsestmt(Expr, "@.") == Expr(:macrocall, Symbol("@__dot__"), LineNumberNode(1)) @test parsestmt(Expr, "using A: @.") == Expr(:using, Expr(Symbol(":"), Expr(:., :A), Expr(:., Symbol("@__dot__")))) + + # var"" + @test parsestmt(Expr, "@var\"#\" a") == Expr(:macrocall, Symbol("@#"), LineNumberNode(1), :a) + @test parsestmt(Expr, "A.@var\"#\" a") == Expr(:macrocall, Expr(:., :A, QuoteNode(Symbol("@#"))), LineNumberNode(1), :a) + + # Square brackets + @test parsestmt(Expr, "@S[a,b]") == + Expr(:macrocall, Symbol("@S"), LineNumberNode(1), Expr(:vect, :a, :b)) + @test parsestmt(Expr, "@S[a b]") == + Expr(:macrocall, Symbol("@S"), LineNumberNode(1), Expr(:hcat, :a, :b)) + @test parsestmt(Expr, "@S[a; b]") == + Expr(:macrocall, Symbol("@S"), LineNumberNode(1), Expr(:vcat, :a, :b)) + @test parsestmt(Expr, "@S[a ;; b]", version=v"1.7") == + Expr(:macrocall, Symbol("@S"), LineNumberNode(1), Expr(:ncat, 2, :a, :b)) + end + + @testset "vect" begin + @test parsestmt(Expr, "[x,y ; z]") == Expr(:vect, Expr(:parameters, :z), :x, :y) end @testset "try" begin @@ -360,6 +421,47 @@ Expr(:struct, false, :A, Expr(:block, LineNumberNode(1))) @test parsestmt(Expr, "mutable struct A end") == Expr(:struct, true, :A, Expr(:block, LineNumberNode(1))) + + @test parsestmt(Expr, "struct A <: B \n a::X \n end") == + Expr(:struct, false, Expr(:<:, :A, :B), + Expr(:block, LineNumberNode(2), Expr(:(::), :a, :X))) + @test parsestmt(Expr, "struct A \n a \n b \n end") == + Expr(:struct, false, :A, + Expr(:block, LineNumberNode(2), :a, LineNumberNode(3), :b)) + @test parsestmt(Expr, "struct A const a end", version=v"1.8") == + Expr(:struct, false, :A, Expr(:block, LineNumberNode(1), Expr(:const, :a))) + end + + @testset "export" begin + @test parsestmt(Expr, "export a") == Expr(:export, :a) + @test parsestmt(Expr, "export @a") == Expr(:export, Symbol("@a")) + @test parsestmt(Expr, "export @var\"'\"") == Expr(:export, Symbol("@'")) + @test parsestmt(Expr, "export a, \n @b") == Expr(:export, :a, Symbol("@b")) + @test parsestmt(Expr, "export +, ==") == Expr(:export, :+, :(==)) + @test parsestmt(Expr, "export \n a") == Expr(:export, :a) + end + + @testset "global/const/local" begin + @test parsestmt(Expr, "global x") == Expr(:global, :x) + @test parsestmt(Expr, "local x") == Expr(:local, :x) + @test parsestmt(Expr, "global x,y") == Expr(:global, :x, :y) + @test parsestmt(Expr, "global const x = 1") == Expr(:const, Expr(:global, Expr(:(=), :x, 1))) + @test parsestmt(Expr, "local const x = 1") == Expr(:const, Expr(:local, Expr(:(=), :x, 1))) + @test parsestmt(Expr, "const global x = 1") == Expr(:const, Expr(:global, Expr(:(=), :x, 1))) + @test parsestmt(Expr, "const local x = 1") == Expr(:const, Expr(:local, Expr(:(=), :x, 1))) + @test parsestmt(Expr, "const x,y = 1,2") == Expr(:const, Expr(:(=), Expr(:tuple, :x, :y), Expr(:tuple, 1, 2))) + @test parsestmt(Expr, "const x = 1") == Expr(:const, Expr(:(=), :x, 1)) + @test parsestmt(Expr, "global x ~ 1") == Expr(:global, Expr(:call, :~, :x, 1)) + @test parsestmt(Expr, "global x += 1") == Expr(:global, Expr(:+=, :x, 1)) + end + + @testset "tuples" begin + @test parsestmt(Expr, "(;)") == Expr(:tuple, Expr(:parameters)) + @test parsestmt(Expr, "(; a=1)") == Expr(:tuple, Expr(:parameters, Expr(:kw, :a, 1))) + @test parsestmt(Expr, "(; a=1; b=2)") == + Expr(:tuple, Expr(:parameters, Expr(:parameters, Expr(:kw, :b, 2)), Expr(:kw, :a, 1))) + @test parsestmt(Expr, "(a; b; c,d)") == + Expr(:tuple, Expr(:parameters, Expr(:parameters, :c, :d), :b), :a) end @testset "module" begin diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 8a53a86ee6686..e4d6604cccef2 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -227,7 +227,7 @@ tests = [ "+(a,b)" => "(call + a b)" ".+(a,)" => "(call (. +) a)" "(.+)(a)" => "(call (parens (. +)) a)" - "+(a=1,)" => "(call + (= a 1))" => Expr(:call, :+, Expr(:kw, :a, 1)) + "+(a=1,)" => "(call + (= a 1))" "+(a...)" => "(call + (... a))" "+(a;b,c)" => "(call + a (parameters b c))" "+(;a)" => "(call + (parameters a))" @@ -243,7 +243,7 @@ tests = [ # Unary function calls with brackets as grouping, not an arglist ".+(a)" => "(dotcall-pre + (parens a))" "+(a;b)" => "(call-pre + (block-p a b))" - "+(a=1)" => "(call-pre + (parens (= a 1)))" => Expr(:call, :+, Expr(:(=), :a, 1)) + "+(a=1)" => "(call-pre + (parens (= a 1)))" # Unary operators have lower precedence than ^ "+(a)^2" => "(call-pre + (call-i (parens a) ^ 2))" ".+(a)^2" => "(dotcall-pre + (call-i (parens a) ^ 2))" @@ -326,10 +326,10 @@ tests = [ "@A.foo a b" => "(macrocall (. A (quote @foo)) a b)" "[@foo x]" => "(vect (macrocall @foo x))" "[@foo]" => "(vect (macrocall @foo))" - "@var\"#\" a" => "(macrocall (var @#) a)" => Expr(:macrocall, Symbol("@#"), LineNumberNode(1), :a) + "@var\"#\" a" => "(macrocall (var @#) a)" "@(A) x" => "(macrocall (parens @A) x)" "A.@x y" => "(macrocall (. A (quote @x)) y)" - "A.@var\"#\" a"=> "(macrocall (. A (quote (var @#))) a)" => Expr(:macrocall, Expr(:., :A, QuoteNode(Symbol("@#"))), LineNumberNode(1), :a) + "A.@var\"#\" a"=> "(macrocall (. A (quote (var @#))) a)" "@+x y" => "(macrocall @+ x y)" "A.@.x" => "(macrocall (. A (quote @.)) x)" # Macro names @@ -347,11 +347,9 @@ tests = [ # calls with brackets "f(a,b)" => "(call f a b)" - "f(a=1; b=2)" => "(call f (= a 1) (parameters (= b 2)))" => - Expr(:call, :f, Expr(:parameters, Expr(:kw, :b, 2)), Expr(:kw, :a, 1)) - "f(a; b; c)" => "(call f a (parameters b) (parameters c))" => - Expr(:call, :f, Expr(:parameters, Expr(:parameters, :c), :b), :a) - "(a=1)()" => "(call (parens (= a 1)))" => Expr(:call, Expr(:(=), :a, 1)) + "f(a=1; b=2)" => "(call f (= a 1) (parameters (= b 2)))" + "f(a; b; c)" => "(call f a (parameters b) (parameters c))" + "(a=1)()" => "(call (parens (= a 1)))" "f (a)" => "(call f (error-t) a)" "@x(a, b)" => "(macrocall-p @x a b)" "A.@x(y)" => "(macrocall-p (. A (quote @x)) y)" @@ -363,21 +361,17 @@ tests = [ "f(x) do y body end" => "(do (call f x) (tuple y) (block body))" # square brackets - "@S[a,b]" => "(macrocall @S (vect a b))" => - Expr(:macrocall, Symbol("@S"), LineNumberNode(1), Expr(:vect, :a, :b)) - "@S[a b]" => "(macrocall @S (hcat a b))" => - Expr(:macrocall, Symbol("@S"), LineNumberNode(1), Expr(:hcat, :a, :b)) - "@S[a; b]" => "(macrocall @S (vcat a b))" => - Expr(:macrocall, Symbol("@S"), LineNumberNode(1), Expr(:vcat, :a, :b)) + "@S[a,b]" => "(macrocall @S (vect a b))" + "@S[a b]" => "(macrocall @S (hcat a b))" + "@S[a; b]" => "(macrocall @S (vcat a b))" "A.@S[a]" => "(macrocall (. A (quote @S)) (vect a))" "@S[a].b" => "(. (macrocall @S (vect a)) (quote b))" - ((v=v"1.7",), "@S[a ;; b]") => "(macrocall @S (ncat-2 a b))" => - Expr(:macrocall, Symbol("@S"), LineNumberNode(1), Expr(:ncat, 2, :a, :b)) + ((v=v"1.7",), "@S[a ;; b]") => "(macrocall @S (ncat-2 a b))" ((v=v"1.6",), "@S[a ;; b]") => "(macrocall @S (error (ncat-2 a b)))" "a[i]" => "(ref a i)" "a [i]" => "(ref a (error-t) i)" "a[i,j]" => "(ref a i j)" - "(a=1)[]" => "(ref (parens (= a 1)))" => Expr(:ref, Expr(:(=), :a, 1)) + "(a=1)[]" => "(ref (parens (= a 1)))" "a[end]" => "(ref a end)" "a[begin]" => "(ref a begin)" "a[:(end)]" => "(typed_hcat a (quote-: (parens (error-t))) (error-t))" @@ -394,9 +388,8 @@ tests = [ "A.@B.x" => "(macrocall (. (. A (quote B)) (error-t) (quote @x)))" "@M.(x)" => "(macrocall (dotcall @M (error-t) x))" "f.(a,b)" => "(dotcall f a b)" - "f.(a=1; b=2)" => "(dotcall f (= a 1) (parameters (= b 2)))" => - Expr(:., :f, Expr(:tuple, Expr(:parameters, Expr(:kw, :b, 2)), Expr(:kw, :a, 1))) - "(a=1).()" => "(dotcall (parens (= a 1)))" => Expr(:., Expr(:(=), :a, 1), Expr(:tuple)) + "f.(a=1; b=2)" => "(dotcall f (= a 1) (parameters (= b 2)))" + "(a=1).()" => "(dotcall (parens (= a 1)))" "f. (x)" => "(dotcall f (error-t) x)" # Other dotted syntax "A.:+" => "(. A (quote-: +))" @@ -460,15 +453,15 @@ tests = [ "for x in xs end" => "(for (= x xs) (block))" "for x in xs, y in ys \n a \n end" => "(for (block (= x xs) (= y ys)) (block a))" # let - "let x=1\n end" => "(let (block (= x 1)) (block))" => Expr(:let, Expr(:(=), :x, 1), Expr(:block)) - "let x=1 ; end" => "(let (block (= x 1)) (block))" => Expr(:let, Expr(:(=), :x, 1), Expr(:block)) - "let x ; end" => "(let (block x) (block))" => Expr(:let, :x, Expr(:block)) - "let x::1 ; end" => "(let (block (::-i x 1)) (block))" => Expr(:let, Expr(:(::), :x, 1), Expr(:block)) - "let x=1,y=2 end" => "(let (block (= x 1) (= y 2)) (block))" => Expr(:let, Expr(:block, Expr(:(=), :x, 1), Expr(:(=), :y, 2)), Expr(:block)) - "let x+=1 ; end" => "(let (block (+= x 1)) (block))" => Expr(:let, Expr(:block, Expr(:+=, :x, 1)), Expr(:block)) - "let ; end" => "(let (block) (block))" => Expr(:let, Expr(:block), Expr(:block)) - "let ; body end" => "(let (block) (block body))" => Expr(:let, Expr(:block), Expr(:block, :body)) - "let\na\nb\nend" => "(let (block) (block a b))" => Expr(:let, Expr(:block), Expr(:block, :a, :b)) + "let x=1\n end" => "(let (block (= x 1)) (block))" + "let x=1 ; end" => "(let (block (= x 1)) (block))" + "let x ; end" => "(let (block x) (block))" + "let x::1 ; end" => "(let (block (::-i x 1)) (block))" + "let x=1,y=2 end" => "(let (block (= x 1) (= y 2)) (block))" + "let x+=1 ; end" => "(let (block (+= x 1)) (block))" + "let ; end" => "(let (block) (block))" + "let ; body end" => "(let (block) (block body))" + "let\na\nb\nend" => "(let (block) (block a b))" # abstract type "abstract type A end" => "(abstract A)" "abstract type A ; end" => "(abstract A)" @@ -482,12 +475,12 @@ tests = [ "primitive type A \$N end" => "(primitive A (\$ N))" "primitive type A <: B \n 8 \n end" => "(primitive (<: A B) 8)" # struct - "struct A <: B \n a::X \n end" => "(struct (<: A B) (block (::-i a X)))" => Expr(:struct, false, Expr(:<:, :A, :B), Expr(:block, Expr(:(::), :a, :X))) - "struct A \n a \n b \n end" => "(struct A (block a b))" => Expr(:struct, false, :A, Expr(:block, :a, :b)) + "struct A <: B \n a::X \n end" => "(struct (<: A B) (block (::-i a X)))" + "struct A \n a \n b \n end" => "(struct A (block a b))" "mutable struct A end" => "(struct-mut A (block))" - ((v=v"1.8",), "struct A const a end") => "(struct A (block (const a)))" => Expr(:struct, false, :A, Expr(:block, Expr(:const, :a))) + ((v=v"1.8",), "struct A const a end") => "(struct A (block (const a)))" ((v=v"1.7",), "struct A const a end") => "(struct A (block (error (const a))))" - "struct A end" => "(struct A (block))" => Expr(:struct, false, :A, Expr(:block)) + "struct A end" => "(struct A (block))" "struct try end" => "(struct (error (try)) (block))" # return "return\nx" => "(return)" @@ -505,16 +498,16 @@ tests = [ "module A \n a \n b \n end" => "(module A (block a b))" """module A \n "x"\na\n end""" => """(module A (block (doc (string "x") a)))""" # export - "export a" => "(export a)" => Expr(:export, :a) - "export @a" => "(export @a)" => Expr(:export, Symbol("@a")) - "export @var\"'\"" => "(export (var @'))" => Expr(:export, Symbol("@'")) - "export a, \n @b" => "(export a @b)" => Expr(:export, :a, Symbol("@b")) - "export +, ==" => "(export + ==)" => Expr(:export, :+, :(==)) - "export \n a" => "(export a)" => Expr(:export, :a) - "export \$a, \$(a*b)" => "(export (\$ a) (\$ (parens (call-i a * b))))" => Expr(:export, Expr(:$, :a), Expr(:$, Expr(:call, :*, :a, :b))) + "export a" => "(export a)" + "export @a" => "(export @a)" + "export @var\"'\"" => "(export (var @'))" + "export a, \n @b" => "(export a @b)" + "export +, ==" => "(export + ==)" + "export \n a" => "(export a)" + "export \$a, \$(a*b)" => "(export (\$ a) (\$ (parens (call-i a * b))))" "export (x::T)" => "(export (error (parens (::-i x T))))" - "export outer" => "(export outer)" => Expr(:export, :outer) - "export (\$f)" => "(export (parens (\$ f)))" => Expr(:export, Expr(:$, :f)) + "export outer" => "(export outer)" + "export (\$f)" => "(export (parens (\$ f)))" ], JuliaSyntax.parse_if_elseif => [ "if a xx elseif b yy else zz end" => "(if a (block xx) (elseif b (block yy) (block zz)))" @@ -532,18 +525,18 @@ tests = [ "if true; x ? true : elseif true end" => "(if true (block (if x true (error-t))) (elseif true (block)))" ], JuliaSyntax.parse_resword => [ - "global x" => "(global x)" => Expr(:global, :x) - "local x" => "(local x)" => Expr(:local, :x) - "global x,y" => "(global x y)" => Expr(:global, :x, :y) - "global const x = 1" => "(global (const (= x 1)))" => Expr(:const, Expr(:global, Expr(:(=), :x, 1))) - "local const x = 1" => "(local (const (= x 1)))" => Expr(:const, Expr(:local, Expr(:(=), :x, 1))) - "const global x = 1" => "(const (global (= x 1)))" => Expr(:const, Expr(:global, Expr(:(=), :x, 1))) - "const local x = 1" => "(const (local (= x 1)))" => Expr(:const, Expr(:local, Expr(:(=), :x, 1))) - "const x,y = 1,2" => "(const (= (tuple x y) (tuple 1 2)))" => Expr(:const, Expr(:(=), Expr(:tuple, :x, :y), Expr(:tuple, 1, 2))) - "const x = 1" => "(const (= x 1))" => Expr(:const, Expr(:(=), :x, 1)) + "global x" => "(global x)" + "local x" => "(local x)" + "global x,y" => "(global x y)" + "global const x = 1" => "(global (const (= x 1)))" + "local const x = 1" => "(local (const (= x 1)))" + "const global x = 1" => "(const (global (= x 1)))" + "const local x = 1" => "(const (local (= x 1)))" + "const x,y = 1,2" => "(const (= (tuple x y) (tuple 1 2)))" + "const x = 1" => "(const (= x 1))" "const x .= 1" => "(error (const (.= x 1)))" - "global x ~ 1" => "(global (call-i x ~ 1))" => Expr(:global, Expr(:call, :~, :x, 1)) - "global x += 1" => "(global (+= x 1))" => Expr(:global, Expr(:+=, :x, 1)) + "global x ~ 1" => "(global (call-i x ~ 1))" + "global x += 1" => "(global (+= x 1))" "const x" => "(error (const x))" "global const x" => "(global (error (const x)))" "const global x" => "(error (const (global x)))" @@ -590,10 +583,10 @@ tests = [ "function f()::S where T end" => "(function (where (::-i (call f) S) T) (block))" # Ugly cases for compat where extra parentheses existed and we've # already parsed at least the call part of the signature - "function (f() where T) end" => "(function (parens (where (call f) T)) (block))" => Expr(:function, Expr(:where, Expr(:call, :f), :T), Expr(:block)) + "function (f() where T) end" => "(function (parens (where (call f) T)) (block))" "function (f()) where T end" => "(function (where (parens (call f)) T) (block))" "function (f() where T) where U end" => "(function (where (parens (where (call f) T)) U) (block))" - "function (f()::S) end"=> "(function (parens (::-i (call f) S)) (block))" => Expr(:function, Expr(:(::), Expr(:call, :f), :S), Expr(:block)) + "function (f()::S) end"=> "(function (parens (::-i (call f) S)) (block))" "function ((f()::S) where T) end" => "(function (parens (where (parens (::-i (call f) S)) T)) (block))" "function (x*y ) end" => "(function (parens (call-i x * y)) (block))" # body @@ -682,15 +675,13 @@ tests = [ "(x,y)" => "(tuple-p x y)" "(x=1, y=2)" => "(tuple-p (= x 1) (= y 2))" # Named tuples with initial semicolon - "(;)" => "(tuple-p (parameters))" => Expr(:tuple, Expr(:parameters)) - "(; a=1)" => "(tuple-p (parameters (= a 1)))" => Expr(:tuple, Expr(:parameters, Expr(:kw, :a, 1))) + "(;)" => "(tuple-p (parameters))" + "(; a=1)" => "(tuple-p (parameters (= a 1)))" # Extra credit: nested parameters and frankentuples "(x...; y)" => "(tuple-p (... x) (parameters y))" "(x...;)" => "(tuple-p (... x) (parameters))" - "(; a=1; b=2)" => "(tuple-p (parameters (= a 1)) (parameters (= b 2)))" => - Expr(:tuple, Expr(:parameters, Expr(:parameters, Expr(:kw, :b, 2)), Expr(:kw, :a, 1))) - "(a; b; c,d)" => "(tuple-p a (parameters b) (parameters c d))" => - Expr(:tuple, Expr(:parameters, Expr(:parameters, :c, :d), :b), :a) + "(; a=1; b=2)" => "(tuple-p (parameters (= a 1)) (parameters (= b 2)))" + "(a; b; c,d)" => "(tuple-p a (parameters b) (parameters c d))" "(a=1, b=2; c=3)" => "(tuple-p (= a 1) (= b 2) (parameters (= c 3)))" # Block syntax "(;;)" => "(block-p)" @@ -788,7 +779,7 @@ tests = [ "[x,\n y]" => "(vect x y)" "[x\n, y]" => "(vect x y)" "[x\n,, y]" => "(vect x (error-t ✘ y))" - "[x,y ; z]" => "(vect x y (parameters z))" => Expr(:vect, Expr(:parameters, :z), :x, :y) + "[x,y ; z]" => "(vect x y (parameters z))" "[x=1, y=2]" => "(vect (= x 1) (= y 2))" "[x=1, ; y=2]" => "(vect (= x 1) (parameters (= y 2)))" # parse_paren @@ -802,7 +793,7 @@ tests = [ # Macro names can be keywords "@end x" => "(macrocall @end x)" # __dot__ macro - "@. x" => "(macrocall @. x)" => Expr(:macrocall, Symbol("@__dot__"), LineNumberNode(1), :x) + "@. x" => "(macrocall @. x)" # cmd strings "``" => "(macrocall core_@cmd (cmdstring-r \"\"))" "`cmd`" => "(macrocall core_@cmd (cmdstring-r \"cmd\"))" From 21410dccd1790743676c5a939b3cd58004252d01 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Wed, 26 Apr 2023 11:36:02 +0200 Subject: [PATCH 0633/1109] make the global `_token_error_descriptions` variable const (JuliaLang/JuliaSyntax.jl#255) --- JuliaSyntax/src/kinds.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaSyntax/src/kinds.jl b/JuliaSyntax/src/kinds.jl index d9967e90c65c2..60e2979fcddd7 100644 --- a/JuliaSyntax/src/kinds.jl +++ b/JuliaSyntax/src/kinds.jl @@ -1060,7 +1060,7 @@ function untokenize(k::Kind; unique=true) end # Error kind => description -_token_error_descriptions = Dict{Kind, String}( +const _token_error_descriptions = Dict{Kind, String}( K"ErrorEofMultiComment" => "unterminated multi-line comment #= ... =#", K"ErrorInvalidNumericConstant" => "invalid numeric constant", K"ErrorHexFloatMustContainP" => "hex float literal must contain `p` or `P`", From 5d6f08e618e5cbd16c8045f884ea254b540fec91 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Wed, 26 Apr 2023 11:36:14 +0200 Subject: [PATCH 0634/1109] avoid a Core.Box in `_to_expr` (JuliaLang/JuliaSyntax.jl#256) --- JuliaSyntax/src/expr.jl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/JuliaSyntax/src/expr.jl b/JuliaSyntax/src/expr.jl index c624387ccb0bc..c5a70155a99d0 100644 --- a/JuliaSyntax/src/expr.jl +++ b/JuliaSyntax/src/expr.jl @@ -221,7 +221,9 @@ function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, # This case should only occur when there's an error inside the # parens, and we've passed ignore_errors=true to the parser. # Wrap in a block to preserve both the value and the error. - @check all(Meta.isexpr(args[j], :error) for j in 2:length(args)) + let args = args + @check all(Meta.isexpr(args[j], :error) for j in 2:length(args)) + end return Expr(:block, args...) end elseif headsym === :try From 3d99dd8be83f178ebb3fb8b0469810f0358ecfdc Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Wed, 26 Apr 2023 12:31:35 +0200 Subject: [PATCH 0635/1109] specialize on the number of varargs for `bump_split` (JuliaLang/JuliaSyntax.jl#254) --- JuliaSyntax/src/parse_stream.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index 447e18ec52706..4b701bbd22514 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -738,7 +738,7 @@ example TODO: Are these the only cases? Can we replace this general utility with a simpler one which only splits preceding dots? """ -function bump_split(stream::ParseStream, split_spec...) +function bump_split(stream::ParseStream, split_spec::Vararg{Any, N}) where {N} tok = stream.lookahead[stream.lookahead_index] stream.lookahead_index += 1 b = _next_byte(stream) From 4e0da72b96bfaf0f55d7c1c1e1e06afc11ee28f8 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Wed, 26 Apr 2023 12:31:45 +0200 Subject: [PATCH 0636/1109] avoid a runtime dispatch when converting to expr (JuliaLang/JuliaSyntax.jl#257) --- JuliaSyntax/src/expr.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/JuliaSyntax/src/expr.jl b/JuliaSyntax/src/expr.jl index c5a70155a99d0..8978eb3e301fe 100644 --- a/JuliaSyntax/src/expr.jl +++ b/JuliaSyntax/src/expr.jl @@ -11,7 +11,7 @@ function is_stringchunk(node) return k == K"String" || k == K"CmdString" end -function reorder_parameters!(args, params_pos) +function reorder_parameters!(args::Vector{Any}, params_pos) p = 0 for i = length(args):-1:1 if !Meta.isexpr(args[i], :parameters) @@ -24,7 +24,7 @@ function reorder_parameters!(args, params_pos) end # nest frankentuples parameters sections for i = length(args)-1:-1:p - pushfirst!(args[i].args, pop!(args)) + pushfirst!((args[i]::Expr).args, pop!(args)) end # Move parameters to args[params_pos] insert!(args, params_pos, pop!(args)) From 554575f5c3f614356e5e419f755b06e959e15151 Mon Sep 17 00:00:00 2001 From: c42f Date: Fri, 28 Apr 2023 16:31:34 +1000 Subject: [PATCH 0637/1109] Support for lineno in core parser hooks (JuliaLang/JuliaSyntax.jl#259) Closes JuliaLang/JuliaSyntax.jl#241 --- JuliaSyntax/src/hooks.jl | 9 +++++---- JuliaSyntax/src/source_files.jl | 1 - JuliaSyntax/test/hooks.jl | 15 ++++++++++++++- 3 files changed, 19 insertions(+), 6 deletions(-) diff --git a/JuliaSyntax/src/hooks.jl b/JuliaSyntax/src/hooks.jl index 6805bb11b75a4..65372bc1c9e0a 100644 --- a/JuliaSyntax/src/hooks.jl +++ b/JuliaSyntax/src/hooks.jl @@ -162,7 +162,8 @@ function _core_parser_hook(code, filename::String, lineno::Int, offset::Int, opt end if any_error(stream) - tree = build_tree(SyntaxNode, stream, wrap_toplevel_as_kind=K"None") + tree = build_tree(SyntaxNode, stream, + wrap_toplevel_as_kind=K"None", first_line=lineno) _,err = _first_error(tree) # In the flisp parser errors are normally `Expr(:error, msg)` where # `msg` is a String. By using a ParseError for msg we can do fancy @@ -179,7 +180,7 @@ function _core_parser_hook(code, filename::String, lineno::Int, offset::Int, opt "incomplete: premature end of input" error_ex = Expr(:incomplete, msg) else - error_ex = Expr(:error, ParseError(stream, filename=filename)) + error_ex = Expr(:error, ParseError(stream, filename=filename, first_line=lineno)) end ex = options === :all ? Expr(:toplevel, error_ex) : error_ex else @@ -190,8 +191,8 @@ function _core_parser_hook(code, filename::String, lineno::Int, offset::Int, opt # # show_diagnostics(stdout, stream.diagnostics, code) # - # FIXME: Add support to lineno to this tree build (via SourceFile?) - ex = build_tree(Expr, stream; filename=filename, wrap_toplevel_as_kind=K"None") + ex = build_tree(Expr, stream; filename=filename, + wrap_toplevel_as_kind=K"None", first_line=lineno) if Meta.isexpr(ex, :None) # The None wrapping is only to give somewhere for trivia to be # attached; unwrap! diff --git a/JuliaSyntax/src/source_files.jl b/JuliaSyntax/src/source_files.jl index 203523e326ce5..e0df0c19bcc44 100644 --- a/JuliaSyntax/src/source_files.jl +++ b/JuliaSyntax/src/source_files.jl @@ -22,7 +22,6 @@ function SourceFile(code::AbstractString; filename=nothing, first_line=1) line_starts = Int[1] for i in eachindex(code) # The line is considered to start after the `\n` - # FIXME: \r and \n\r code[i] == '\n' && push!(line_starts, i+1) end if isempty(code) || last(code) != '\n' diff --git a/JuliaSyntax/test/hooks.jl b/JuliaSyntax/test/hooks.jl index 7c8f0af0f177a..a8460c4dc4ed9 100644 --- a/JuliaSyntax/test/hooks.jl +++ b/JuliaSyntax/test/hooks.jl @@ -10,10 +10,23 @@ @test JuliaSyntax._core_parser_hook(" x \n", "somefile", 1, 0, :atom) == Core.svec(:x,2) end - @testset "filename is used" begin + @testset "filename and lineno" begin ex = JuliaSyntax._core_parser_hook("@a", "somefile", 1, 0, :statement)[1] @test Meta.isexpr(ex, :macrocall) @test ex.args[2] == LineNumberNode(1, "somefile") + + ex = JuliaSyntax._core_parser_hook("@a", "otherfile", 2, 0, :statement)[1] + @test ex.args[2] == LineNumberNode(2, "otherfile") + + # Errors also propagate file & lineno + err = JuliaSyntax._core_parser_hook("[x)", "f1", 1, 0, :statement)[1].args[1] + @test err isa JuliaSyntax.ParseError + @test err.source.filename == "f1" + @test err.source.first_line == 1 + err = JuliaSyntax._core_parser_hook("[x)", "f2", 2, 0, :statement)[1].args[1] + @test err isa JuliaSyntax.ParseError + @test err.source.filename == "f2" + @test err.source.first_line == 2 end @testset "enable_in_core!" begin From 403e84f4fbd1bad48492761e97ecd098de5e46f6 Mon Sep 17 00:00:00 2001 From: c42f Date: Sat, 29 Apr 2023 15:05:26 +1000 Subject: [PATCH 0638/1109] Fixes for `Expr(:incomplete)` detection (JuliaLang/JuliaSyntax.jl#260) Fix JuliaLang/JuliaSyntax.jl#110 --- JuliaSyntax/src/hooks.jl | 32 +++++++++++++++++--------------- JuliaSyntax/test/hooks.jl | 7 ++++++- 2 files changed, 23 insertions(+), 16 deletions(-) diff --git a/JuliaSyntax/src/hooks.jl b/JuliaSyntax/src/hooks.jl index 65372bc1c9e0a..93050a5e2e96b 100644 --- a/JuliaSyntax/src/hooks.jl +++ b/JuliaSyntax/src/hooks.jl @@ -29,15 +29,20 @@ end # next if the incomplete stream was to continue. (Though this is just rough. In # practice several categories are combined for the purposes of the REPL - # perhaps we can/should do something more precise in the future.) -function _incomplete_tag(n::SyntaxNode) +function _incomplete_tag(n::SyntaxNode, codelen) i,c = _first_error(n) - if isnothing(c) + if isnothing(c) || last_byte(c) < codelen || codelen == 0 return :none + elseif first_byte(c) < codelen + if kind(c) == K"ErrorEofMultiComment" && last_byte(c) == codelen + # This is the one weird case where the token itself is an + # incomplete error + return :comment + else + return :none + end end - # TODO: Check error hits last character - if kind(c) == K"ErrorEofMultiComment" - return :comment - elseif kind(c) == K"error" && begin + if kind(c) == K"error" && begin cs = children(c) length(cs) > 0 end @@ -71,10 +76,8 @@ function _incomplete_tag(n::SyntaxNode) end #------------------------------------------------------------------------------- -@static if isdefined(Core, :_setparser!) +if isdefined(Core, :_setparser!) const _set_core_parse_hook = Core._setparser! -elseif isdefined(Core, :set_parser) - const _set_core_parse_hook = Core.set_parser else function _set_core_parse_hook(parser) # HACK! Fool the runtime into allowing us to set Core._parse, even during @@ -164,12 +167,8 @@ function _core_parser_hook(code, filename::String, lineno::Int, offset::Int, opt if any_error(stream) tree = build_tree(SyntaxNode, stream, wrap_toplevel_as_kind=K"None", first_line=lineno) - _,err = _first_error(tree) - # In the flisp parser errors are normally `Expr(:error, msg)` where - # `msg` is a String. By using a ParseError for msg we can do fancy - # error reporting instead. - if last_byte(err) == lastindex(code) - tag = _incomplete_tag(tree) + tag = _incomplete_tag(tree, lastindex(code)) + if tag !== :none # Here we replicate the particular messages msg = tag === :string ? "incomplete: invalid string syntax" : @@ -180,6 +179,9 @@ function _core_parser_hook(code, filename::String, lineno::Int, offset::Int, opt "incomplete: premature end of input" error_ex = Expr(:incomplete, msg) else + # In the flisp parser errors are normally `Expr(:error, msg)` where + # `msg` is a String. By using a JuliaSyntax.ParseError for msg + # we can do fancy error reporting instead. error_ex = Expr(:error, ParseError(stream, filename=filename, first_line=lineno)) end ex = options === :all ? Expr(:toplevel, error_ex) : error_ex diff --git a/JuliaSyntax/test/hooks.jl b/JuliaSyntax/test/hooks.jl index a8460c4dc4ed9..e3beab08a7b05 100644 --- a/JuliaSyntax/test/hooks.jl +++ b/JuliaSyntax/test/hooks.jl @@ -54,7 +54,6 @@ @test Meta.isexpr(Meta.parse("[x"), :incomplete) for (str, tag) in [ - "" => :none "\"" => :string "\"\$foo" => :string "#=" => :comment @@ -101,6 +100,12 @@ "1, " => :other "1,\n" => :other "1, \n" => :other + + # Syntax which may be an error but is not incomplete + "" => :none + ")" => :none + "1))" => :none + "a b" => :none ] @testset "$(repr(str))" begin @test Base.incomplete_tag(Meta.parse(str, raise=false)) == tag From 479b4638868f7b895948e1e4fc52fe35cd9ca1a9 Mon Sep 17 00:00:00 2001 From: c42f Date: Sun, 30 Apr 2023 06:50:02 +1000 Subject: [PATCH 0639/1109] Improve error message for closing tokens in parse_atom (JuliaLang/JuliaSyntax.jl#261) Fixes JuliaLang/JuliaSyntax.jl#114 --- JuliaSyntax/src/parser.jl | 2 +- JuliaSyntax/test/diagnostics.jl | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 0b04160b52275..8abb272c31709 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -3557,7 +3557,7 @@ function parse_atom(ps::ParseState, check_identifiers=true) # ) ==> error msg = leading_kind == K"EndMarker" ? "premature end of input" : - "unexpected closing token" + "unexpected `$(untokenize(leading_kind))`" bump_invisible(ps, K"error", error=msg) else bump(ps, error="invalid syntax atom") diff --git a/JuliaSyntax/test/diagnostics.jl b/JuliaSyntax/test/diagnostics.jl index 1c6de0a6af499..5b102060d17ae 100644 --- a/JuliaSyntax/test/diagnostics.jl +++ b/JuliaSyntax/test/diagnostics.jl @@ -24,6 +24,9 @@ end # TODO: better range @test diagnostic("@A.\$x a") == Diagnostic(6, 5, :error, "invalid macro name") + + @test diagnostic("a, , b") == + Diagnostic(4, 3, :error, "unexpected `,`") end @testset "parser warnings" begin From c4cddbee16d8d663e283fce37958ecdbf03750d6 Mon Sep 17 00:00:00 2001 From: c42f Date: Tue, 2 May 2023 13:08:11 +1000 Subject: [PATCH 0640/1109] Minor fixes to test tools (JuliaLang/JuliaSyntax.jl#262) * Avoid crash in heuristic Expr comparisons * reduce_tree(text) returns text not a tree --- JuliaSyntax/test/test_utils.jl | 27 ++++++++++++++++--------- JuliaSyntax/tools/check_all_packages.jl | 2 +- 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/JuliaSyntax/test/test_utils.jl b/JuliaSyntax/test/test_utils.jl index 94333f07c99ba..fa01c95ffa105 100644 --- a/JuliaSyntax/test/test_utils.jl +++ b/JuliaSyntax/test/test_utils.jl @@ -127,13 +127,14 @@ function exprs_roughly_equal(fl_ex, ex) args = args[1].args elseif h == :function && Meta.isexpr(fl_args[1], :block) blockargs = filter(x->!(x isa LineNumberNode), fl_args[1].args) - ps = blockargs[2:end] - for i = 1:length(ps) - if Meta.isexpr(ps[i], :(=)) - ps[i] = Expr(:kw, ps[i].args...) + posargs = blockargs[1:max(0, length(blockargs))] + kwargs = blockargs[2:end] + for i = 1:length(kwargs) + if Meta.isexpr(kwargs[i], :(=)) + kwargs[i] = Expr(:kw, kwargs[i].args...) end end - fl_args[1] = Expr(:tuple, Expr(:parameters, ps...), blockargs[1]) + fl_args[1] = Expr(:tuple, Expr(:parameters, kwargs...), posargs...) end if length(fl_args) != length(args) return false @@ -212,7 +213,7 @@ function test_parse_all_in_path(path_allowed::Function, basedir) exprs_equal=exprs_equal_no_linenum) @test parsers_agree if !parsers_agree - reduced_failures = reduce_text.(sourcetext.(reduce_tree(text)), + reduced_failures = reduce_text.(reduce_tree(text), parsers_fuzzy_disagree) @test reduced_failures == [] end @@ -263,11 +264,10 @@ function _reduce_tree(failing_subtrees, tree; exprs_equal=exprs_equal_no_linenum end """ - reduce_tree(text::AbstractString; exprs_equal=exprs_equal_no_linenum) reduce_tree(tree::SyntaxNode; exprs_equal=exprs_equal_no_linenum) -Select minimal subtrees of `text` or `tree` which are inconsistent between -flisp and JuliaSyntax parsers. +Select minimal subtrees of `tree` which are inconsistent between flisp and +JuliaSyntax parsers. """ function reduce_tree(tree::SyntaxNode; kws...) subtrees = Vector{typeof(tree)}() @@ -275,9 +275,16 @@ function reduce_tree(tree::SyntaxNode; kws...) subtrees end +""" + reduce_tree(text::AbstractString; exprs_equal=exprs_equal_no_linenum) + +Find the minimal subtrees of the parsed form of `text` which are inconsistent +between flisp and JuliaSyntax parsers and return the source text of those +subtrees. +""" function reduce_tree(text::AbstractString; kws...) tree = parseall(SyntaxNode, text) - reduce_tree(tree; kws...) + sourcetext.(reduce_tree(tree; kws...)) end diff --git a/JuliaSyntax/tools/check_all_packages.jl b/JuliaSyntax/tools/check_all_packages.jl index ccadb6660d07d..bc3ff071fa3c1 100644 --- a/JuliaSyntax/tools/check_all_packages.jl +++ b/JuliaSyntax/tools/check_all_packages.jl @@ -38,7 +38,7 @@ Logging.with_logger(TerminalLogger()) do println(io, "\n") end end - reduced_failures = reduce_text.(sourcetext.(reduce_tree(text)), + reduced_failures = reduce_text.(reduce_tree(text), parsers_fuzzy_disagree) append!(all_reduced_failures, reduced_failures) @error("Parsers succeed but disagree", From 70a2dca94e8ee1b55c8830ca724267822a669f09 Mon Sep 17 00:00:00 2001 From: c42f Date: Wed, 3 May 2023 05:32:53 +1000 Subject: [PATCH 0641/1109] Add braces node around `T` in `x where {T}` (JuliaLang/JuliaSyntax.jl#264) This allows `x where T` to be distinguished from `x where {T}` easily which should be quite helpful for source formatting tooling. It's also more consistent with the other allowed (but weird) forms such as `x where {T S}`. --- JuliaSyntax/README.md | 1 + JuliaSyntax/src/expr.jl | 6 +++++- JuliaSyntax/src/parser.jl | 16 +++++++--------- JuliaSyntax/test/expr.jl | 3 +++ JuliaSyntax/test/parser.jl | 6 +++--- 5 files changed, 19 insertions(+), 13 deletions(-) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index e91fc4bfa7ca3..3ee5033ac30d1 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -400,6 +400,7 @@ the source text more closely. * `K"macrocall"` - allow users to easily distinguish macrocalls with parentheses from those without them (#218) * Grouping parentheses are represented with a node of kind `K"parens"` (#222) +* The right hand side of `x where {T}` retains the `K"braces"` node around the `T` to distinguish it from `x where T`. * Ternary syntax is not immediately lowered to an `if` node: `a ? b : c` parses as `(? a b c)` rather than `Expr(:if, :a, :b, :c)` (#85) * `global const` and `const global` are not normalized by the parser. This is done in `Expr` conversion (#130) * The AST for `do` is flatter and not lowered to a lambda by the parser: `f(x) do y ; body end` is parsed as `(do (call f x) (tuple y) (block body))` (#98) diff --git a/JuliaSyntax/src/expr.jl b/JuliaSyntax/src/expr.jl index 8978eb3e301fe..6813e56fe2589 100644 --- a/JuliaSyntax/src/expr.jl +++ b/JuliaSyntax/src/expr.jl @@ -212,7 +212,11 @@ function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, end end elseif headsym === :where - reorder_parameters!(args, 2) + if length(args) == 2 && Meta.isexpr(args[2], :braces) + a2 = args[2].args + reorder_parameters!(a2, 2) + args = Any[args[1], a2...] + end elseif headsym === :parens # parens are used for grouping and don't appear in the Expr AST if length(args) == 1 diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 8abb272c31709..f38b34fe42fff 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -1047,17 +1047,15 @@ function parse_where_chain(ps0::ParseState, mark) bump_trivia(ps, skip_newlines=true) k = peek(ps) if k == K"{" + # x where \n {T} ==> (where x (braces T)) + # x where {T,S} ==> (where x (braces T S)) + # Also various nonsensical forms permitted + # x where {T S} ==> (where x (bracescat (row T S))) + # x where {y for y in ys} ==> (where x (braces (generator y (= y ys)))) m = position(ps) bump(ps, TRIVIA_FLAG) - # x where \n {T} ==> (where x T) - # x where {T,S} ==> (where x T S) ckind, cflags = parse_cat(ps, K"}", ps.end_symbol) - if ckind != K"vect" - # Various nonsensical forms permitted here - # x where {T S} ==> (where x (bracescat (row T S))) - # x where {y for y in ys} ==> (where x (braces (generator y (= y ys)))) - emit_braces(ps, m, ckind, cflags) - end + emit_braces(ps, m, ckind, cflags) emit(ps, mark, K"where") else # x where T ==> (where x T) @@ -2170,7 +2168,7 @@ function parse_function_signature(ps::ParseState, is_function::Bool) end if peek(ps) == K"where" # Function signature where syntax - # function f() where {T} end ==> (function (where (call f) T) (block)) + # function f() where {T} end ==> (function (where (call f) (braces T)) (block)) # function f() where T end ==> (function (where (call f) T) (block)) parse_where_chain(ps, mark) end diff --git a/JuliaSyntax/test/expr.jl b/JuliaSyntax/test/expr.jl index 47043253af2b3..fe2f35a08b0ae 100644 --- a/JuliaSyntax/test/expr.jl +++ b/JuliaSyntax/test/expr.jl @@ -317,6 +317,9 @@ end @testset "where" begin + @test parsestmt(Expr, "A where T") == Expr(:where, :A, :T) + @test parsestmt(Expr, "A where {T}") == Expr(:where, :A, :T) + @test parsestmt(Expr, "A where {S, T}") == Expr(:where, :A, :S, :T) @test parsestmt(Expr, "A where {X, Y; Z}") == Expr(:where, :A, Expr(:parameters, :Z), :X, :Y) end diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index e4d6604cccef2..e71b82deb6a23 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -287,8 +287,8 @@ tests = [ # compatible with the reference parser (see #248) "+ <: A where B" => "(where (call-pre + (<:-pre A)) B)" # Really for parse_where - "x where \n {T}" => "(where x T)" - "x where {T,S}" => "(where x T S)" + "x where \n {T}" => "(where x (braces T))" + "x where {T,S}" => "(where x (braces T S))" "x where {T S}" => "(where x (bracescat (row T S)))" "x where {y for y in ys}" => "(where x (braces (generator y (= y ys))))" "x where T" => "(where x T)" @@ -578,7 +578,7 @@ tests = [ "function f body end" => "(function (error f) (block body))" "function f()::T end" => "(function (::-i (call f) T) (block))" "function f()::g(T) end" => "(function (::-i (call f) (call g T)) (block))" - "function f() where {T} end" => "(function (where (call f) T) (block))" + "function f() where {T} end" => "(function (where (call f) (braces T)) (block))" "function f() where T end" => "(function (where (call f) T) (block))" "function f()::S where T end" => "(function (where (::-i (call f) S) T) (block))" # Ugly cases for compat where extra parentheses existed and we've From 021b46e89e74215a23f9cdf3e4de94fd03e97dde Mon Sep 17 00:00:00 2001 From: c42f Date: Wed, 3 May 2023 05:34:32 +1000 Subject: [PATCH 0642/1109] Fix combined cartesian / flattened generators (JuliaLang/JuliaSyntax.jl#263) The combined cartesian/flattened generator syntax is a very unusual use case found in only one or two packages in the general registry. It looks like this ((a,b,c,d) for a=as, b=bs for c=cs, d=ds) Practically all such generators end up as a flat list (though ... should they?) but the ordering of tuples (a,b,c,d) in the list depends on which parts are cartesian (separated by commas) and which parts are flattened (separated by for's). Here we fix the `SyntaxNode` representation of these, preferring a single `generator` head rather than combined `flatten` and `generator`, and instead grouping the sets of cartesian iterations into `cartesian_iterator` groups. This keeps the tree structure flat and closer to the source text while also faithfully representing these unusual forms. For example, the above parses as (generator (tuple a b c d) (cartesian_iterator (= a as) (= b bs)) (cartesian_iterator (= c cs) (= d ds))) In addition, we also make use of the `cartesian_iterator` head in `for` loops, replacing the `block` which was used previously (but is not semantically or syntactically a block!). Thus `for i=is, j=js body end` now parses as (for (cartesian_iterator (= i is) (= j js)) body) This also improves Expr conversion, removing some special cases which were necessary when processing `block's` in that situation. --- JuliaSyntax/README.md | 27 ++++++++--- JuliaSyntax/src/expr.jl | 97 +++++++++++++++++++++----------------- JuliaSyntax/src/kinds.jl | 2 +- JuliaSyntax/src/parser.jl | 86 ++++++++++++++------------------- JuliaSyntax/test/expr.jl | 82 +++++++++++++++++++++++++++----- JuliaSyntax/test/parser.jl | 15 +++--- 6 files changed, 191 insertions(+), 118 deletions(-) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index 3ee5033ac30d1..34ece75dab2c2 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -375,7 +375,7 @@ The children of our trees are strictly in source order. This has many consequences in places where `Expr` reorders child expressions. * Infix and postfix operator calls have the operator name in the *second* child position. `a + b` is parsed as `(call-i a + b)` - where the infix `-i` flag indicates infix child position - rather than `Expr(:call, :+, :a, :b)`. -* Flattened generators are represented in source order +* Generators are represented in source order as a single node rather than multiple nested flatten and generator expressions. ### No `LineNumberNode`s @@ -435,15 +435,16 @@ class of tokenization errors and lets the parser deal with them. * Using `try catch else finally end` is parsed with `K"catch"` `K"else"` and `K"finally"` children to avoid the awkwardness of the optional child nodes in the `Expr` representation (#234) * The dotted import path syntax as in `import A.b.c` is parsed with a `K"importpath"` kind rather than `K"."`, because a bare `A.b.c` has a very different nested/quoted expression representation (#244) * We use flags rather than child nodes to represent the difference between `struct` and `mutable struct`, `module` and `baremodule` (#220) +* Multiple iterations within the header of a `for`, as in `for a=as, b=bs body end` are represented with a `cartesian_iterator` head rather than a `block`, as these lists of iterators are neither semantically nor syntactically a sequence of statements. Unlike other uses of `block` (see also generators). ## More detail on tree differences -### Flattened generators +### Generators Flattened generators are uniquely problematic because the Julia AST doesn't respect a key rule we normally expect: that the children of an AST node are a -*contiguous* range in the source text. This is because the `for`s in +*contiguous* range in the source text. For example, the `for`s in `[xy for x in xs for y in ys]` are parsed in the normal order of a for loop to mean @@ -470,16 +471,30 @@ however, note that if this tree were flattened, the order would be source order. However, our green tree is strictly source-ordered, so we must deviate from the -Julia AST. The natural representation seems to be to remove the generators and -use a flattened structure: +Julia AST. We deal with this by grouping cartesian products of iterators +(separated by commas) within `cartesian_iterator` blocks as in `for` loops, and +use the presence of multiple iterator blocks rather than the `flatten` head to +distinguish flattened iterators. The nested flattens and generators of `Expr` +forms are reconstructed later. In this form the tree structure resembles the +source much more closely. For example, `(xy for x in xs for y in ys)` is parsed as ``` -(flatten +(generator xy (= x xs) (= y ys)) ``` +And the cartesian iteration `(xy for x in xs, y in ys)` is parsed as + +``` +(generator + xy + (cartesian_iterator + (= x xs) + (= y ys))) +``` + ### Whitespace trivia inside strings For triple quoted strings, the indentation isn't part of the string data so diff --git a/JuliaSyntax/src/expr.jl b/JuliaSyntax/src/expr.jl index 6813e56fe2589..540f23fbf12f3 100644 --- a/JuliaSyntax/src/expr.jl +++ b/JuliaSyntax/src/expr.jl @@ -126,41 +126,34 @@ function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, # Convert children insert_linenums = (headsym === :block || headsym === :toplevel) && need_linenodes args = Vector{Any}(undef, length(node_args)*(insert_linenums ? 2 : 1)) - if headsym === :for && length(node_args) == 2 - # No line numbers in for loop iteration spec - args[1] = _to_expr(node_args[1], iteration_spec=true, need_linenodes=false) - args[2] = _to_expr(node_args[2]) - elseif headsym === :let && length(node_args) == 2 - # No line numbers in let statement binding list - args[1] = _to_expr(node_args[1], need_linenodes=false) - args[2] = _to_expr(node_args[2]) + eq_to_kw_in_call = + ((headsym === :call || headsym === :dotcall) && is_prefix_call(node)) || + headsym === :ref + eq_to_kw_all = (headsym === :parameters && !map_kw_in_params) || + (headsym === :parens && eq_to_kw) + in_vcbr = headsym === :vect || headsym === :curly || headsym === :braces || headsym === :ref + if insert_linenums && isempty(node_args) + push!(args, source_location(LineNumberNode, node.source, node.position)) else - eq_to_kw_in_call = - ((headsym === :call || headsym === :dotcall) && is_prefix_call(node)) || - headsym === :ref - eq_to_kw_all = (headsym === :parameters && !map_kw_in_params) || - (headsym === :parens && eq_to_kw) - in_vcbr = headsym === :vect || headsym === :curly || headsym === :braces || headsym === :ref - if insert_linenums && isempty(node_args) - push!(args, source_location(LineNumberNode, node.source, node.position)) - else - for i in 1:length(node_args) - n = node_args[i] - if insert_linenums - args[2*i-1] = source_location(LineNumberNode, n.source, n.position) - end - eq_to_kw = eq_to_kw_in_call && i > 1 || eq_to_kw_all - coalesce_dot_with_ops = i==1 && - (nodekind in KSet"call dotcall curly" || - nodekind == K"quote" && flags(node) == COLON_QUOTE) - args[insert_linenums ? 2*i : i] = - _to_expr(n, eq_to_kw=eq_to_kw, - map_kw_in_params=in_vcbr, - coalesce_dot=coalesce_dot_with_ops) - end - if nodekind == K"block" && has_flags(node, PARENS_FLAG) - popfirst!(args) + for i in 1:length(node_args) + n = node_args[i] + if insert_linenums + args[2*i-1] = source_location(LineNumberNode, n.source, n.position) end + eq_to_kw = eq_to_kw_in_call && i > 1 || eq_to_kw_all + coalesce_dot_with_ops = i==1 && + (nodekind in KSet"call dotcall curly" || + nodekind == K"quote" && flags(node) == COLON_QUOTE) + args[insert_linenums ? 2*i : i] = + _to_expr(n, eq_to_kw=eq_to_kw, + map_kw_in_params=in_vcbr, + coalesce_dot=coalesce_dot_with_ops, + iteration_spec=(headsym == :for && i == 1), + need_linenodes=!(headsym == :let && i == 1) + ) + end + if nodekind == K"block" && has_flags(node, PARENS_FLAG) + popfirst!(args) end end @@ -202,6 +195,10 @@ function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, elseif headsym in (:ref, :curly) # Move parameters blocks to args[2] reorder_parameters!(args, 2) + elseif headsym === :for + if Meta.isexpr(args[1], :cartesian_iterator) + args[1] = Expr(:block, args[1].args...) + end elseif headsym in (:tuple, :vect, :braces) # Move parameters blocks to args[1] reorder_parameters!(args, 1) @@ -262,18 +259,32 @@ function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, push!(args, else_) end end - elseif headsym === :filter - pushfirst!(args, last(args)) - pop!(args) - elseif headsym === :flatten - # The order of nodes inside the generators in Julia's flatten AST - # is noncontiguous in the source text, so need to reconstruct - # Julia's AST here from our alternative `flatten` expression. - gen = Expr(:generator, args[1], args[end]) - for i in length(args)-1:-1:2 - gen = Expr(:flatten, Expr(:generator, gen, args[i])) + elseif headsym === :generator + # Reconstruct the nested Expr form for generator from our flatter + # source-ordered `generator` format. + gen = args[1] + for j = length(args):-1:2 + if Meta.isexpr(args[j], :cartesian_iterator) + gen = Expr(:generator, gen, args[j].args...) + else + gen = Expr(:generator, gen, args[j]) + end + if j < length(args) + # Additional `for`s flatten the inner generator + gen = Expr(:flatten, gen) + end end return gen + elseif headsym == :filter + @assert length(args) == 2 + iterspec = args[1] + outargs = Any[args[2]] + if Meta.isexpr(iterspec, :cartesian_iterator) + append!(outargs, iterspec.args) + else + push!(outargs, iterspec) + end + args = outargs elseif headsym in (:nrow, :ncat) # For lack of a better place, the dimension argument to nrow/ncat # is stored in the flags diff --git a/JuliaSyntax/src/kinds.jl b/JuliaSyntax/src/kinds.jl index 60e2979fcddd7..f32ccf56f2440 100644 --- a/JuliaSyntax/src/kinds.jl +++ b/JuliaSyntax/src/kinds.jl @@ -905,7 +905,7 @@ const _kind_names = # Comprehensions "generator" "filter" - "flatten" + "cartesian_iterator" "comprehension" "typed_comprehension" "END_SYNTAX_KINDS" diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index f38b34fe42fff..292b23271e5c4 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -1783,13 +1783,9 @@ function parse_resword(ps::ParseState) emit(ps, mark, K"while") elseif word == K"for" # for x in xs end ==> (for (= x xs) (block)) - # for x in xs, y in ys \n a \n end ==> (for (block (= x xs) (= y ys)) (block a)) + # for x in xs, y in ys \n a \n end ==> (for (cartesian_iterator (= x xs) (= y ys)) (block a)) bump(ps, TRIVIA_FLAG) - m = position(ps) - n_subexprs = parse_comma_separated(ps, parse_iteration_spec) - if n_subexprs > 1 - emit(ps, m, K"block") - end + parse_iteration_specs(ps) parse_block(ps) bump_closing_token(ps, K"end") emit(ps, mark, K"for") @@ -2625,6 +2621,16 @@ function parse_iteration_spec(ps::ParseState) emit(ps, mark, K"=") end +# Parse an iteration spec, or a comma separate list of such for for loops and +# generators +function parse_iteration_specs(ps::ParseState) + mark = position(ps) + n_iters = parse_comma_separated(ps, parse_iteration_spec) + if n_iters > 1 + emit(ps, mark, K"cartesian_iterator") + end +end + # flisp: parse-space-separated-exprs function parse_space_separated_exprs(ps::ParseState) ps = with_space_sensitive(ps) @@ -2669,61 +2675,41 @@ function parse_vect(ps::ParseState, closer) return (K"vect", EMPTY_FLAGS) end -# Flattened generators are hard because the Julia AST doesn't respect a key -# rule we normally expect: that the children of an AST node are a contiguous -# range in the source text. This is because the `for`s in -# `[xy for x in xs for y in ys]` are parsed in the normal order of a for as +# Parse generators # -# (flatten -# (generator -# (generator -# xy -# y in ys) -# x in xs)) +# We represent generators quite differently from `Expr`: +# * Cartesian products of iterators are grouped within cartesian_iterator +# nodes, as in the short form of `for` loops. +# * The `generator` kind is used for both cartesian and flattened generators # -# We deal with this by only emitting the flatten: -# -# (flatten xy (= x xs) (= y ys)) -# -# then reconstructing the nested flattens and generators when converting to Expr. -# -# [x for a = as for b = bs if cond1 for c = cs if cond2] ==> (comprehension (flatten x (= a as) (filter (= b bs) cond1) (filter (= c cs) cond2))) -# [x for a = as if begin cond2 end] => (comprehension (generator x (filter (= a as) (block cond2)))) +# (x for a in as for b in bs) ==> (parens (generator x (= a as) (= b bs))) +# (x for a in as, b in bs) ==> (parens (generator x (cartesian_iterator (= a as) (= b bs)))) +# (x for a in as, b in bs if z) ==> (parens (generator x (filter (cartesian_iterator (= a as) (= b bs)) z))) # # flisp: parse-generator -function parse_generator(ps::ParseState, mark, flatten=false) - t = peek_token(ps) - if !preceding_whitespace(t) - # [(x)for x in xs] ==> (comprehension (generator (parens x) (error) (= x xs))) - bump_invisible(ps, K"error", TRIVIA_FLAG, - error="Expected space before `for` in generator") - end - @check kind(t) == K"for" - bump(ps, TRIVIA_FLAG) - filter_mark = position(ps) - parse_comma_separated(ps, parse_iteration_spec) - if peek(ps) == K"if" - # (a for x in xs if cond) ==> (parens (generator a (filter (= x xs) cond))) +function parse_generator(ps::ParseState, mark) + while (t = peek_token(ps); kind(t) == K"for") + if !preceding_whitespace(t) + # ((x)for x in xs) ==> (parens (generator (parens x) (error) (= x xs))) + bump_invisible(ps, K"error", TRIVIA_FLAG, + error="Expected space before `for` in generator") + end bump(ps, TRIVIA_FLAG) - parse_cond(ps) - emit(ps, filter_mark, K"filter") - end - t = peek_token(ps) - if kind(t) == K"for" - # (xy for x in xs for y in ys) ==> (parens (flatten xy (= x xs) (= y ys))) - # (xy for x in xs for y in ys for z in zs) ==> (parens (flatten xy (= x xs) (= y ys) (= z zs))) - parse_generator(ps, mark, true) - if !flatten - emit(ps, mark, K"flatten") + iter_mark = position(ps) + parse_iteration_specs(ps) + if peek(ps) == K"if" + # (x for a in as if z) ==> (parens (generator x (filter (= a as) z))) + bump(ps, TRIVIA_FLAG) + parse_cond(ps) + emit(ps, iter_mark, K"filter") end - elseif !flatten - # (x for a in as) ==> (parens (generator x (= a as))) - emit(ps, mark, K"generator") end + emit(ps, mark, K"generator") end # flisp: parse-comprehension function parse_comprehension(ps::ParseState, mark, closer) + # [x for a in as] ==> (comprehension (generator x a in as)) ps = ParseState(ps, whitespace_newline=true, space_sensitive=false, end_symbol=false) diff --git a/JuliaSyntax/test/expr.jl b/JuliaSyntax/test/expr.jl index fe2f35a08b0ae..711c47535e101 100644 --- a/JuliaSyntax/test/expr.jl +++ b/JuliaSyntax/test/expr.jl @@ -98,18 +98,7 @@ ) end - @testset "No line numbers in for/let bindings" begin - @test parsestmt(Expr, "for i=is, j=js\nbody\nend") == - Expr(:for, - Expr(:block, - Expr(:(=), :i, :is), - Expr(:(=), :j, :js), - ), - Expr(:block, - LineNumberNode(2), - :body - ) - ) + @testset "No line numbers in let bindings" begin @test parsestmt(Expr, "let i=is, j=js\nbody\nend") == Expr(:let, Expr(:block, @@ -142,6 +131,28 @@ )) end + @testset "for" begin + @test parsestmt(Expr, "for i=is body end") == + Expr(:for, + Expr(:(=), :i, :is), + Expr(:block, + LineNumberNode(1), + :body + ) + ) + @test parsestmt(Expr, "for i=is, j=js\nbody\nend") == + Expr(:for, + Expr(:block, + Expr(:(=), :i, :is), + Expr(:(=), :j, :js), + ), + Expr(:block, + LineNumberNode(2), + :body + ) + ) + end + @testset "Long form anonymous functions" begin @test parsestmt(Expr, "function (xs...)\nbody end") == Expr(:function, @@ -355,6 +366,53 @@ @test parsestmt(Expr, "[x,y ; z]") == Expr(:vect, Expr(:parameters, :z), :x, :y) end + @testset "generators" begin + @test parsestmt(Expr, "(x for a in as for b in bs)") == + Expr(:flatten, Expr(:generator, + Expr(:generator, :x, Expr(:(=), :b, :bs)), + Expr(:(=), :a, :as))) + @test parsestmt(Expr, "(x for a in as, b in bs)") == + Expr(:generator, :x, Expr(:(=), :a, :as), Expr(:(=), :b, :bs)) + @test parsestmt(Expr, "(x for a in as, b in bs if z)") == + Expr(:generator, :x, + Expr(:filter, :z, Expr(:(=), :a, :as), Expr(:(=), :b, :bs))) + @test parsestmt(Expr, "(x for a in as, b in bs for c in cs, d in ds)") == + Expr(:flatten, + Expr(:generator, + Expr(:generator, :x, Expr(:(=), :c, :cs), Expr(:(=), :d, :ds)), + Expr(:(=), :a, :as), Expr(:(=), :b, :bs))) + @test parsestmt(Expr, "(x for a in as for b in bs if z)") == + Expr(:flatten, Expr(:generator, + Expr(:generator, :x, Expr(:filter, :z, Expr(:(=), :b, :bs))), + Expr(:(=), :a, :as))) + @test parsestmt(Expr, "(x for a in as if z for b in bs)") == + Expr(:flatten, Expr(:generator, + Expr(:generator, :x, Expr(:(=), :b, :bs)), + Expr(:filter, :z, Expr(:(=), :a, :as)))) + @test parsestmt(Expr, "[x for a = as for b = bs if cond1 for c = cs if cond2]" ) == + Expr(:comprehension, + Expr(:flatten, + Expr(:generator, + Expr(:flatten, + Expr(:generator, + Expr(:generator, + :x, + Expr(:filter, + :cond2, + Expr(:(=), :c, :cs))), + Expr(:filter, + :cond1, + Expr(:(=), :b, :bs)))), + Expr(:(=), :a, :as)))) + @test parsestmt(Expr, "[x for a = as if begin cond2 end]" ) == + Expr(:comprehension, Expr(:generator, :x, + Expr(:filter, + Expr(:block, LineNumberNode(1), :cond2), + Expr(:(=), :a, :as)))) + @test parsestmt(Expr, "(x for a in as if z)") == + Expr(:generator, :x, Expr(:filter, :z, Expr(:(=), :a, :as))) + end + @testset "try" begin @test parsestmt(Expr, "try x catch e; y end") == Expr(:try, diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index e71b82deb6a23..a26994cd22301 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -451,7 +451,7 @@ tests = [ "while x < y \n a \n b \n end" => "(while (call-i x < y) (block a b))" # for "for x in xs end" => "(for (= x xs) (block))" - "for x in xs, y in ys \n a \n end" => "(for (block (= x xs) (= y ys)) (block a))" + "for x in xs, y in ys \n a \n end" => "(for (cartesian_iterator (= x xs) (= y ys)) (block a))" # let "let x=1\n end" => "(let (block (= x 1)) (block))" "let x=1 ; end" => "(let (block (= x 1)) (block))" @@ -766,13 +766,16 @@ tests = [ "[x for a in as]" => "(comprehension (generator x (= a as)))" "[x \n\n for a in as]" => "(comprehension (generator x (= a as)))" # parse_generator - "[x for a = as for b = bs if cond1 for c = cs if cond2]" => "(comprehension (flatten x (= a as) (filter (= b bs) cond1) (filter (= c cs) cond2)))" + "(x for a in as for b in bs)" => "(parens (generator x (= a as) (= b bs)))" + "(x for a in as, b in bs)" => "(parens (generator x (cartesian_iterator (= a as) (= b bs))))" + "(x for a in as, b in bs if z)" => "(parens (generator x (filter (cartesian_iterator (= a as) (= b bs)) z)))" + "(x for a in as, b in bs for c in cs, d in ds)" => "(parens (generator x (cartesian_iterator (= a as) (= b bs)) (cartesian_iterator (= c cs) (= d ds))))" + "(x for a in as for b in bs if z)" => "(parens (generator x (= a as) (filter (= b bs) z)))" + "(x for a in as if z for b in bs)" => "(parens (generator x (filter (= a as) z) (= b bs)))" + "[x for a = as for b = bs if cond1 for c = cs if cond2]" => "(comprehension (generator x (= a as) (filter (= b bs) cond1) (filter (= c cs) cond2)))" "[x for a = as if begin cond2 end]" => "(comprehension (generator x (filter (= a as) (block cond2))))" "[(x)for x in xs]" => "(comprehension (generator (parens x) (error-t) (= x xs)))" - "(a for x in xs if cond)" => "(parens (generator a (filter (= x xs) cond)))" - "(xy for x in xs for y in ys)" => "(parens (flatten xy (= x xs) (= y ys)))" - "(xy for x in xs for y in ys for z in zs)" => "(parens (flatten xy (= x xs) (= y ys) (= z zs)))" - "(x for a in as)" => "(parens (generator x (= a as)))" + "(x for a in as if z)" => "(parens (generator x (filter (= a as) z)))" # parse_vect "[x, y]" => "(vect x y)" "[x, y]" => "(vect x y)" From 412d9f35a1df9ec8f852098c73fe44530a46f23a Mon Sep 17 00:00:00 2001 From: c42f Date: Wed, 3 May 2023 06:47:59 +1000 Subject: [PATCH 0643/1109] fix minor bug in check_all_packages --- JuliaSyntax/tools/check_all_packages.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaSyntax/tools/check_all_packages.jl b/JuliaSyntax/tools/check_all_packages.jl index bc3ff071fa3c1..0fad0c30d2612 100644 --- a/JuliaSyntax/tools/check_all_packages.jl +++ b/JuliaSyntax/tools/check_all_packages.jl @@ -33,7 +33,7 @@ Logging.with_logger(TerminalLogger()) do if !exprs_roughly_equal(e2, e1) mismatch_count += 1 failing_source = sprint(context=:color=>true) do io - for c in reduce_tree(text) + for c in reduce_tree(parseall(SyntaxNode, text)) JuliaSyntax.highlight(io, c.source, range(c), context_lines_inner=5) println(io, "\n") end From c2357b24abdabe3a402e290277132e6a72382842 Mon Sep 17 00:00:00 2001 From: c42f Date: Sun, 7 May 2023 14:40:13 +1000 Subject: [PATCH 0644/1109] Fix spurious warning for `import A.:` (JuliaLang/JuliaSyntax.jl#267) --- JuliaSyntax/src/parser.jl | 2 +- JuliaSyntax/test/diagnostics.jl | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 292b23271e5c4..acf2fe263cf76 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -2364,7 +2364,7 @@ function parse_atsym(ps::ParseState, allow_quotes=true) # export outer ==> (export outer) # export ($f) ==> (export ($ f)) mark = position(ps) - if allow_quotes && peek(ps) == K":" + if allow_quotes && peek(ps) == K":" && !is_closing_token(ps, peek(ps,2)) # import A.:+ ==> (import (importpath A (quote-: +))) emit_diagnostic(ps, warning="quoting with `:` is not required here") end diff --git a/JuliaSyntax/test/diagnostics.jl b/JuliaSyntax/test/diagnostics.jl index 5b102060d17ae..1d1efb1fdce18 100644 --- a/JuliaSyntax/test/diagnostics.jl +++ b/JuliaSyntax/test/diagnostics.jl @@ -40,6 +40,8 @@ end Diagnostic(9, 9, :warning, "space between dots in import path") @test diagnostic("import A.:+") == Diagnostic(10, 10, :warning, "quoting with `:` is not required here") + # No warning for import `:` symbol + @test diagnostic("import A.:, :", allow_multiple=true) == [] @test diagnostic("import A.(:+)") == Diagnostic(10, 13, :warning, "parentheses are not required here") @test diagnostic("export (x)") == From 6f794c630f610837588f525131afa62286cfda54 Mon Sep 17 00:00:00 2001 From: c42f Date: Sun, 7 May 2023 15:40:12 +1000 Subject: [PATCH 0645/1109] Construct Expr directly from ParseStream (JuliaLang/JuliaSyntax.jl#268) Generalize `build_tree` so that we can more easily construct tree types other than `GreenNode`. Use this to construct `Expr` directly from `ParseStream` rather than constructing both GreenNode and SyntaxNode along the way. Fix a bunch of type instabilities in the `Expr` conversion code along the way. With these changes, parsing all of Base to `Expr` is sped up by about 35% overall and allocations reduced by around 50%. (Parsing to `Expr` is now comparable with parsing to `SyntaxNode`.) --- JuliaSyntax/src/expr.jl | 529 +++++++++++++++++------------ JuliaSyntax/src/green_tree.jl | 27 +- JuliaSyntax/src/hooks.jl | 4 +- JuliaSyntax/src/literal_parsing.jl | 82 ++++- JuliaSyntax/src/parse_stream.jl | 48 +-- JuliaSyntax/src/precompile.jl | 8 +- JuliaSyntax/src/syntax_tree.jl | 78 +---- JuliaSyntax/test/benchmark.jl | 6 +- JuliaSyntax/test/expr.jl | 386 ++++++++++++--------- JuliaSyntax/test/parser.jl | 5 - 10 files changed, 665 insertions(+), 508 deletions(-) diff --git a/JuliaSyntax/src/expr.jl b/JuliaSyntax/src/expr.jl index 540f23fbf12f3..4d0093afd7206 100644 --- a/JuliaSyntax/src/expr.jl +++ b/JuliaSyntax/src/expr.jl @@ -1,20 +1,43 @@ #------------------------------------------------------------------------------- # Conversion to Base.Expr -function is_eventually_call(ex) - return Meta.isexpr(ex, :call) || (Meta.isexpr(ex, (:where, :(::))) && - is_eventually_call(ex.args[1])) +""" + @isexpr(ex, head) + @isexpr(ex, head, nargs) + +Type inference friendly replacement for `Meta.isexpr`. + +When using the pattern +``` +if @isexpr(ex, headsym) + body +end +``` +Julia's type inference knows `ex isa Expr` inside `body`. But `Meta.isexpr` +hides this information from the compiler, for whatever reason. +""" +macro isexpr(ex, head) + ex isa Symbol || error("First argument to `@isexpr` must be a variable name") + :($(esc(ex)) isa Expr && $(esc(ex)).head == $(esc(head))) end -function is_stringchunk(node) - k = kind(node) - return k == K"String" || k == K"CmdString" +macro isexpr(ex, head, nargs) + ex isa Symbol || error("First argument to `@isexpr` must be a variable name") + :($(esc(ex)) isa Expr && + $(esc(ex)).head == $(esc(head)) && + length($(esc(ex)).args) == $(esc(nargs))) end -function reorder_parameters!(args::Vector{Any}, params_pos) +function is_eventually_call(ex) + return ex isa Expr && (ex.head === :call || + (ex.head === :where || ex.head === :(::)) && is_eventually_call(ex.args[1])) +end + +function _reorder_parameters!(args::Vector{Any}, params_pos) p = 0 for i = length(args):-1:1 - if !Meta.isexpr(args[i], :parameters) + ai = args[i] + if !@isexpr(ai, :parameters) break end p = i @@ -30,204 +53,230 @@ function reorder_parameters!(args::Vector{Any}, params_pos) insert!(args, params_pos, pop!(args)) end -function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, - eq_to_kw=false, map_kw_in_params=false, coalesce_dot=false) - nodekind = kind(node) - if !haschildren(node) - val = node.val +function _strip_parens(ex) + while true + if @isexpr(ex, :parens) + if length(ex.args) == 1 + ex = ex.args[1] + else + # Only for error cases + return Expr(:block, ex.args...) + end + else + return ex + end + end +end + +function _leaf_to_Expr(source, head, srcrange, node) + k = kind(head) + if k == K"core_@cmd" + return GlobalRef(Core, Symbol("@cmd")) + elseif k == K"MacroName" && view(source, srcrange) == "." + return Symbol("@__dot__") + elseif is_error(k) + return k == K"error" ? + Expr(:error) : + Expr(:error, "$(_token_error_descriptions[k]): `$(source[srcrange])`") + else + val = isnothing(node) ? parse_julia_literal(source, head, srcrange) : node.val if val isa Union{Int128,UInt128,BigInt} # Ignore the values of large integers and convert them back to # symbolic/textural form for compatibility with the Expr # representation of these. - str = replace(sourcetext(node), '_'=>"") - headsym = :macrocall + str = replace(source[srcrange], '_'=>"") macname = val isa Int128 ? Symbol("@int128_str") : val isa UInt128 ? Symbol("@uint128_str") : Symbol("@big_str") return Expr(:macrocall, GlobalRef(Core, macname), nothing, str) - elseif nodekind == K"core_@cmd" - return GlobalRef(Core, Symbol("@cmd")) - elseif nodekind == K"MacroName" && val === Symbol("@.") - return Symbol("@__dot__") - elseif is_error(nodekind) - # TODO: Get non-token error messages in here as well, somehow? - # There's an awkward mismatch between the out-of-tree - # `Vector{Diagnostic}` vs Expr(:error) being part of the tree. - return nodekind == K"error" ? - Expr(:error) : - Expr(:error, "$(_token_error_descriptions[nodekind]): `$(sourcetext(node))`") else return val end end - node_args = children(node) - if nodekind == K"var" - @check length(node_args) == 1 - return _to_expr(node_args[1]) - elseif nodekind == K"char" - @check length(node_args) == 1 - return _to_expr(node_args[1]) - elseif nodekind == K"?" - headsym = :if - elseif nodekind == K"=" && !is_decorated(node) && eq_to_kw - headsym = :kw - else - headstr = untokenize(head(node), include_flag_suff=false) - headsym = !isnothing(headstr) ? Symbol(headstr) : - error("Can't untokenize head of kind $(nodekind)") - end - if headsym === :string || headsym === :cmdstring - # Julia string literals may be interspersed with trivia in two situations: - # 1. Triple quoted string indentation is trivia - # 2. An \ before newline removes the newline and any following indentation - # - # Such trivia is eagerly removed by the reference parser, so here we - # concatenate adjacent string chunks together for compatibility. - args = Vector{Any}() - i = 1 - while i <= length(node_args) - if is_stringchunk(node_args[i]) - if i < length(node_args) && is_stringchunk(node_args[i+1]) - buf = IOBuffer() - while i <= length(node_args) && is_stringchunk(node_args[i]) - write(buf, node_args[i].val) - i += 1 - end - push!(args, String(take!(buf))) - else - push!(args, node_args[i].val) +end + +# Julia string literals in a `K"string"` node may be split into several chunks +# interspersed with trivia in two situations: +# 1. Triple quoted string indentation is trivia +# 2. An \ before newline removes the newline and any following indentation +# +# This function concatenating adjacent string chunks together as done in the +# reference parser. +function _string_to_Expr(k, args) + args2 = Any[] + i = 1 + while i <= length(args) + if args[i] isa String + if i < length(args) && args[i+1] isa String + buf = IOBuffer() + while i <= length(args) && args[i] isa String + write(buf, args[i]::String) i += 1 end + push!(args2, String(take!(buf))) else - e = _to_expr(node_args[i]) - if e isa String && headsym === :string + push!(args2, args[i]) + i += 1 + end + else + ex = args[i] + if @isexpr(ex, :parens, 1) + ex = _strip_parens(ex) + if ex isa String # Wrap interpolated literal strings in (string) so we can # distinguish them from the surrounding text (issue #38501) # Ie, "$("str")" vs "str" # https://github.com/JuliaLang/julia/pull/38692 - e = Expr(:string, e) + ex = Expr(:string, ex) end - push!(args, e) - i += 1 end - end - if length(args) == 1 && args[1] isa String - # If there's a single string remaining after joining, we unwrap - # to give a string literal. - # """\n a\n b""" ==> "a\nb" - # headsym === :cmdstring follows this branch - return only(args) - else - @check headsym === :string - return Expr(headsym, args...) + push!(args2, ex) + i += 1 end end - - # Convert children - insert_linenums = (headsym === :block || headsym === :toplevel) && need_linenodes - args = Vector{Any}(undef, length(node_args)*(insert_linenums ? 2 : 1)) - eq_to_kw_in_call = - ((headsym === :call || headsym === :dotcall) && is_prefix_call(node)) || - headsym === :ref - eq_to_kw_all = (headsym === :parameters && !map_kw_in_params) || - (headsym === :parens && eq_to_kw) - in_vcbr = headsym === :vect || headsym === :curly || headsym === :braces || headsym === :ref - if insert_linenums && isempty(node_args) - push!(args, source_location(LineNumberNode, node.source, node.position)) + if length(args2) == 1 && args2[1] isa String + # If there's a single string remaining after joining, we unwrap + # to give a string literal. + # """\n a\n b""" ==> "a\nb" + # k == K"cmdstring" follows this branch + return only(args2) else - for i in 1:length(node_args) - n = node_args[i] - if insert_linenums - args[2*i-1] = source_location(LineNumberNode, n.source, n.position) + @check k == K"string" + return Expr(:string, args2...) + end +end + +# Shared fixups for Expr children in cases where the type of the parent node +# affects the child layout. +function _fixup_Expr_children!(head, loc, args) + k = kind(head) + eq_to_kw_in_call = ((k == K"call" || k == K"dotcall") && + is_prefix_call(head)) || k == K"ref" + eq_to_kw_in_params = k != K"vect" && k != K"curly" && + k != K"braces" && k != K"ref" + coalesce_dot = k in KSet"call dotcall curly" || + (k == K"quote" && flags(head) == COLON_QUOTE) + for i in 1:length(args) + arg = args[i] + was_parens = @isexpr(arg, :parens) + arg = _strip_parens(arg) + if @isexpr(arg, :(=)) && eq_to_kw_in_call && i > 1 + arg = Expr(:kw, arg.args...) + elseif k != K"parens" && @isexpr(arg, :., 1) && arg.args[1] isa Tuple + h, a = arg.args[1]::Tuple{SyntaxHead,Any} + arg = ((!was_parens && coalesce_dot && i == 1) || + (k == K"comparison" && iseven(i)) || + is_syntactic_operator(h)) ? + Symbol(".", a) : Expr(:., a) + elseif @isexpr(arg, :parameters) && eq_to_kw_in_params + pargs = arg.args + for j = 1:length(pargs) + pj = pargs[j] + if @isexpr(pj, :(=)) + pargs[j] = Expr(:kw, pj.args...) + end end - eq_to_kw = eq_to_kw_in_call && i > 1 || eq_to_kw_all - coalesce_dot_with_ops = i==1 && - (nodekind in KSet"call dotcall curly" || - nodekind == K"quote" && flags(node) == COLON_QUOTE) - args[insert_linenums ? 2*i : i] = - _to_expr(n, eq_to_kw=eq_to_kw, - map_kw_in_params=in_vcbr, - coalesce_dot=coalesce_dot_with_ops, - iteration_spec=(headsym == :for && i == 1), - need_linenodes=!(headsym == :let && i == 1) - ) + elseif k == K"let" && i == 1 && @isexpr(arg, :block) + filter!(a -> !(a isa LineNumberNode), arg.args) end - if nodekind == K"block" && has_flags(node, PARENS_FLAG) - popfirst!(args) + if !(k == K"for" && i == 1) && @isexpr(arg, :(=)) + aa2 = arg.args[2] + if is_eventually_call(arg.args[1]) && !@isexpr(aa2, :block) + # Add block for short form function locations + arg.args[2] = Expr(:block, loc, aa2) + end end + args[i] = arg end + return args +end + +# Convert internal node of the JuliaSyntax parse tree to an Expr +function _internal_node_to_Expr(source, srcrange, head, childranges, childheads, args) + k = kind(head) + if k == K"var" || k == K"char" + @check length(args) == 1 + return args[1] + elseif k == K"string" || k == K"cmdstring" + return _string_to_Expr(k, args) + end + + loc = source_location(LineNumberNode, source, first(srcrange)) - # Special cases for various expression heads - loc = source_location(LineNumberNode, node.source, node.position) - if headsym === :macrocall - reorder_parameters!(args, 2) + _fixup_Expr_children!(head, loc, args) + + headstr = untokenize(head, include_flag_suff=false) + headsym = !isnothing(headstr) ? + Symbol(headstr) : + error("Can't untokenize head of kind $(k)") + + if k == K"?" + headsym = :if + elseif k == K"macrocall" + _reorder_parameters!(args, 2) insert!(args, 2, loc) - elseif headsym === :doc - return Expr(:macrocall, GlobalRef(Core, Symbol("@doc")), - loc, args...) - elseif headsym in (:dotcall, :call) + elseif k == K"block" || k == K"toplevel" + if isempty(args) + push!(args, loc) + else + resize!(args, 2*length(args)) + for i = length(childranges):-1:1 + args[2*i] = args[i] + args[2*i-1] = source_location(LineNumberNode, source, first(childranges[i])) + end + end + elseif k == K"doc" + headsym = :macrocall + args = [GlobalRef(Core, Symbol("@doc")), loc, args...] + elseif k == K"dotcall" || k == K"call" # Julia's standard `Expr` ASTs have children stored in a canonical # order which is often not always source order. We permute the children # here as necessary to get the canonical order. - if is_infix_op_call(node) || is_postfix_op_call(node) + if is_infix_op_call(head) || is_postfix_op_call(head) args[2], args[1] = args[1], args[2] end # Lower (call x ') to special ' head - if is_postfix_op_call(node) && args[1] == Symbol("'") + if is_postfix_op_call(head) && args[1] == Symbol("'") popfirst!(args) headsym = Symbol("'") end # Move parameters blocks to args[2] - reorder_parameters!(args, 2) + _reorder_parameters!(args, 2) if headsym === :dotcall - if is_prefix_call(node) - return Expr(:., args[1], Expr(:tuple, args[2:end]...)) + if is_prefix_call(head) + headsym = :. + args = Any[args[1], Expr(:tuple, args[2:end]...)] else # operator calls headsym = :call args[1] = Symbol(".", args[1]) end end - elseif headsym === :. && length(args) == 1 && - is_operator(kind(node[1])) && - (coalesce_dot || is_syntactic_operator(kind(node[1]))) - return Symbol(".", args[1]) - elseif headsym in (:ref, :curly) + elseif k == K"." && length(args) == 1 && is_operator(childheads[1]) + # Hack: Here we preserve the head of the operator to determine whether + # we need to coalesce it with the dot into a single symbol later on. + args[1] = (childheads[1], args[1]) + elseif k == K"ref" || k == K"curly" # Move parameters blocks to args[2] - reorder_parameters!(args, 2) - elseif headsym === :for - if Meta.isexpr(args[1], :cartesian_iterator) - args[1] = Expr(:block, args[1].args...) + _reorder_parameters!(args, 2) + elseif k == K"for" + a1 = args[1] + if @isexpr(a1, :cartesian_iterator) + args[1] = Expr(:block, a1.args...) end - elseif headsym in (:tuple, :vect, :braces) + elseif k in KSet"tuple vect braces" # Move parameters blocks to args[1] - reorder_parameters!(args, 1) - elseif headsym === :comparison - for i in 1:length(args) - if Meta.isexpr(args[i], :., 1) - args[i] = Symbol(".",args[i].args[1]) - end - end - elseif headsym === :where - if length(args) == 2 && Meta.isexpr(args[2], :braces) - a2 = args[2].args - reorder_parameters!(a2, 2) - args = Any[args[1], a2...] - end - elseif headsym === :parens - # parens are used for grouping and don't appear in the Expr AST - if length(args) == 1 - return args[1] - else - # This case should only occur when there's an error inside the - # parens, and we've passed ignore_errors=true to the parser. - # Wrap in a block to preserve both the value and the error. - let args = args - @check all(Meta.isexpr(args[j], :error) for j in 2:length(args)) + _reorder_parameters!(args, 1) + elseif k == K"where" + if length(args) == 2 + a2 = args[2] + if @isexpr(a2, :braces) + a2a = a2.args + _reorder_parameters!(a2a, 2) + args = Any[args[1], a2a...] end - return Expr(:block, args...) end - elseif headsym === :try + elseif k == K"try" # Try children in source order: # try_block catch_var catch_block else_block finally_block # Expr ordering: @@ -239,17 +288,17 @@ function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, finally_ = false for i in 2:length(args) a = args[i] - if Meta.isexpr(a, :catch) + if @isexpr(a, :catch) catch_var = a.args[1] catch_ = a.args[2] - elseif Meta.isexpr(a, :else) + elseif @isexpr(a, :else) else_ = only(a.args) - elseif Meta.isexpr(a, :finally) + elseif @isexpr(a, :finally) finally_ = only(a.args) - elseif Meta.isexpr(a, :error) + elseif @isexpr(a, :error) finally_ = Expr(:block, a) # Unclear where to put this but here will do? else - @check false "Illegal $a subclause in `try`" + @assert false "Illegal $a subclause in `try`" end end args = Any[try_, catch_var, catch_] @@ -259,15 +308,16 @@ function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, push!(args, else_) end end - elseif headsym === :generator + elseif k == K"generator" # Reconstruct the nested Expr form for generator from our flatter # source-ordered `generator` format. gen = args[1] for j = length(args):-1:2 - if Meta.isexpr(args[j], :cartesian_iterator) - gen = Expr(:generator, gen, args[j].args...) + aj = args[j] + if @isexpr(aj, :cartesian_iterator) + gen = Expr(:generator, gen, aj.args...) else - gen = Expr(:generator, gen, args[j]) + gen = Expr(:generator, gen, aj) end if j < length(args) # Additional `for`s flatten the inner generator @@ -275,110 +325,159 @@ function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, end end return gen - elseif headsym == :filter + elseif k == K"filter" @assert length(args) == 2 iterspec = args[1] outargs = Any[args[2]] - if Meta.isexpr(iterspec, :cartesian_iterator) + if @isexpr(iterspec, :cartesian_iterator) append!(outargs, iterspec.args) else push!(outargs, iterspec) end args = outargs - elseif headsym in (:nrow, :ncat) + elseif k == K"nrow" || k == K"ncat" # For lack of a better place, the dimension argument to nrow/ncat # is stored in the flags - pushfirst!(args, numeric_flags(flags(node))) - elseif headsym === :typed_ncat - insert!(args, 2, numeric_flags(flags(node))) - # elseif headsym === :string && length(args) == 1 && version <= (1,5) - # Strip string from interpolations in 1.5 and lower to preserve - # "hi$("ho")" ==> (string "hi" "ho") - elseif headsym === :(=) && !is_decorated(node) - if is_eventually_call(args[1]) && !iteration_spec && !Meta.isexpr(args[2], :block) - # Add block for short form function locations - args[2] = Expr(:block, loc, args[2]) - end - elseif headsym === :elseif + pushfirst!(args, numeric_flags(flags(head))) + elseif k == K"typed_ncat" + insert!(args, 2, numeric_flags(flags(head))) + elseif k == K"elseif" # Block for conditional's source location args[1] = Expr(:block, loc, args[1]) - elseif headsym === :(->) - if Meta.isexpr(args[2], :block) - pushfirst!(args[2].args, loc) + elseif k == K"->" + a2 = args[2] + if @isexpr(a2, :block) + pushfirst!(a2.args, loc) else # Add block for source locations args[2] = Expr(:block, loc, args[2]) end - elseif headsym === :function + elseif k == K"function" if length(args) > 1 - if Meta.isexpr(args[1], :tuple) + a1 = args[1] + if @isexpr(a1, :tuple) # Convert to weird Expr forms for long-form anonymous functions. # # (function (tuple (... xs)) body) ==> (function (... xs) body) - if length(args[1].args) == 1 && Meta.isexpr(args[1].args[1], :...) + if length(a1.args) == 1 && (a11 = a1.args[1]; @isexpr(a11, :...)) # function (xs...) \n body end - args[1] = args[1].args[1] + args[1] = a11 end end - pushfirst!(args[2].args, loc) + pushfirst!((args[2]::Expr).args, loc) end - elseif headsym === :macro + elseif k == K"macro" if length(args) > 1 - pushfirst!(args[2].args, loc) + pushfirst!((args[2]::Expr).args, loc) end - elseif headsym === :module - pushfirst!(args, !has_flags(node, BARE_MODULE_FLAG)) - pushfirst!(args[3].args, loc) - elseif headsym === :inert + elseif k == K"module" + pushfirst!(args, !has_flags(head, BARE_MODULE_FLAG)) + pushfirst!((args[3]::Expr).args, loc) + elseif k == K"inert" return QuoteNode(only(args)) - elseif (headsym === :quote && length(args) == 1) + elseif k == K"quote" && length(args) == 1 a1 = only(args) if !(a1 isa Expr || a1 isa QuoteNode || a1 isa Bool) # Flisp parser does an optimization here: simple values are stored # as inert QuoteNode rather than in `Expr(:quote)` quasiquote return QuoteNode(a1) end - elseif headsym === :do + elseif k == K"do" @check length(args) == 3 return Expr(:do, args[1], Expr(:->, args[2], args[3])) - elseif headsym === :let - @check Meta.isexpr(args[1], :block) - a1 = args[1].args - # Ugly logic to strip the Expr(:block) in certian cases for compatibility - if length(a1) == 1 - a = a1[1] - if a isa Symbol || Meta.isexpr(a, (:(=), :(::))) - args[1] = a + elseif k == K"let" + a1 = args[1] + if @isexpr(a1, :block) + a1a = (args[1]::Expr).args + # Ugly logic to strip the Expr(:block) in certian cases for compatibility + if length(a1a) == 1 + a = a1a[1] + if a isa Symbol || @isexpr(a, :(=)) || @isexpr(a, :(::)) + args[1] = a + end end end - elseif headsym === :local || headsym === :global - if length(args) == 1 && Meta.isexpr(args[1], :const) + elseif k == K"local" || k === K"global" + if length(args) == 1 && (a1 = args[1]; @isexpr(a1, :const)) # Normalize `local const` to `const local` - args[1] = Expr(headsym, args[1].args...) + args[1] = Expr(headsym, (a1::Expr).args...) headsym = :const end - elseif headsym === :return && isempty(args) + elseif k == K"return" && isempty(args) push!(args, nothing) - elseif headsym === :juxtapose + elseif k == K"juxtapose" headsym = :call pushfirst!(args, :*) - elseif headsym === :struct - pushfirst!(args, has_flags(node, MUTABLE_FLAG)) - elseif headsym === :importpath + elseif k == K"struct" + pushfirst!(args, has_flags(head, MUTABLE_FLAG)) + elseif k == K"importpath" headsym = :. for i = 1:length(args) - if args[i] isa QuoteNode + ai = args[i] + if ai isa QuoteNode # Permit nonsense additional quoting such as # import A.(:b).:c - args[i] = args[i].value + args[i] = ai.value end end end + return Expr(headsym, args...) end -Base.Expr(node::SyntaxNode) = _to_expr(node) -function build_tree(::Type{Expr}, stream::ParseStream; kws...) - Expr(build_tree(SyntaxNode, stream; kws...)) +# Stack entry for build_tree Expr conversion. +# We'd use `Tuple{UnitRange{Int},SyntaxHead,Any}` instead, but that's an +# abstract type due to the `Any` and tuple covariance which destroys +# performance. +struct _BuildExprStackEntry + srcrange::UnitRange{Int} + head::SyntaxHead + ex::Any +end + +function build_tree(::Type{Expr}, stream::ParseStream; + filename=nothing, first_line=1, kws...) + source = SourceFile(sourcetext(stream), filename=filename, first_line=first_line) + args = Any[] + childranges = UnitRange{Int}[] + childheads = SyntaxHead[] + entry = build_tree(_BuildExprStackEntry, stream; kws...) do head, srcrange, nodechildren + if is_trivia(head) && !is_error(head) + return nothing + end + k = kind(head) + if isnothing(nodechildren) + ex = _leaf_to_Expr(source, head, srcrange, nothing) + else + resize!(childranges, length(nodechildren)) + resize!(childheads, length(nodechildren)) + resize!(args, length(nodechildren)) + for (i,c) in enumerate(nodechildren) + childranges[i] = c.srcrange + childheads[i] = c.head + args[i] = c.ex + end + ex = _internal_node_to_Expr(source, srcrange, head, childranges, childheads, args) + end + return _BuildExprStackEntry(srcrange, head, ex) + end + loc = source_location(LineNumberNode, source, first(entry.srcrange)) + only(_fixup_Expr_children!(SyntaxHead(K"None",EMPTY_FLAGS), loc, Any[entry.ex])) +end + +function _to_expr(node::SyntaxNode) + if !haschildren(node) + return _leaf_to_Expr(node.source, head(node), range(node), node) + end + cs = children(node) + args = Any[_to_expr(c) for c in cs] + _internal_node_to_Expr(node.source, range(node), head(node), range.(cs), head.(cs), args) +end + +function Base.Expr(node::SyntaxNode) + ex = _to_expr(node) + loc = source_location(LineNumberNode, node.source, first(range(node))) + only(_fixup_Expr_children!(SyntaxHead(K"None",EMPTY_FLAGS), loc, Any[ex])) end + diff --git a/JuliaSyntax/src/green_tree.jl b/JuliaSyntax/src/green_tree.jl index 587097b859ae1..11b01ae42df79 100644 --- a/JuliaSyntax/src/green_tree.jl +++ b/JuliaSyntax/src/green_tree.jl @@ -43,25 +43,10 @@ struct GreenNode{Head} args::Union{Tuple{},Vector{GreenNode{Head}}} end -function GreenNode{Head}(head::Head, span::Integer) where {Head} - GreenNode{Head}(head, span, ()) +function GreenNode(head::Head, span::Integer, args) where {Head} + GreenNode{Head}(head, span, args) end -function GreenNode(head::Head, span::Integer) where {Head} - GreenNode{Head}(head, span, ()) -end - -function GreenNode(head::Head, args) where {Head} - children = collect(GreenNode{Head}, args) - span = isempty(children) ? 0 : sum(x.span for x in children) - GreenNode{Head}(head, span, children) -end - -function GreenNode(head::Head, args::GreenNode{Head}...) where {Head} - GreenNode{Head}(head, GreenNode{Head}[args...]) -end - - # Accessors / predicates haschildren(node::GreenNode) = !(node.args isa Tuple{}) children(node::GreenNode) = node.args @@ -115,3 +100,11 @@ function Base.show(io::IO, ::MIME"text/plain", node::GreenNode, str::AbstractStr _show_green_node(io, node, "", 1, str, show_trivia) end +function build_tree(::Type{GreenNode}, stream::ParseStream; kws...) + build_tree(GreenNode{SyntaxHead}, stream; kws...) do h, srcrange, cs + span = length(srcrange) + isnothing(cs) ? GreenNode(h, span, ()) : + GreenNode(h, span, collect(GreenNode{SyntaxHead}, cs)) + end +end + diff --git a/JuliaSyntax/src/hooks.jl b/JuliaSyntax/src/hooks.jl index 93050a5e2e96b..757ee907651aa 100644 --- a/JuliaSyntax/src/hooks.jl +++ b/JuliaSyntax/src/hooks.jl @@ -195,7 +195,7 @@ function _core_parser_hook(code, filename::String, lineno::Int, offset::Int, opt # ex = build_tree(Expr, stream; filename=filename, wrap_toplevel_as_kind=K"None", first_line=lineno) - if Meta.isexpr(ex, :None) + if @isexpr(ex, :None) # The None wrapping is only to give somewhere for trivia to be # attached; unwrap! ex = only(ex.args) @@ -286,7 +286,7 @@ function _fl_parse_hook(code, filename, lineno, offset, options) else if options === :all ex = Base.parse_input_line(String(code), filename=filename, depwarn=false) - if !Meta.isexpr(ex, :toplevel) + if !@isexpr(ex, :toplevel) ex = Expr(:toplevel, ex) end return ex, sizeof(code) diff --git a/JuliaSyntax/src/literal_parsing.jl b/JuliaSyntax/src/literal_parsing.jl index d1c5c40b559ba..2527f86d4a087 100644 --- a/JuliaSyntax/src/literal_parsing.jl +++ b/JuliaSyntax/src/literal_parsing.jl @@ -12,7 +12,7 @@ function parse_int_literal(str::AbstractString) end if isnothing(x) x = Base.tryparse(Int128, str) - if isnothing(x) + if x === nothing x = Base.parse(BigInt, str) end end @@ -358,3 +358,83 @@ function normalize_identifier(str) flags = Base.Unicode.UTF8PROC_STABLE | Base.Unicode.UTF8PROC_COMPOSE return isascii(str) ? str : utf8proc_map(str, flags) end + + +#------------------------------------------------------------------------------- +function parse_julia_literal(source, head::SyntaxHead, srcrange) + # Leaf node + k = kind(head) + val_str = view(source, srcrange) + # Any errors parsing literals are represented as ErrorVal() - this can + # happen when the user sets `ignore_errors=true` during parsing. + val = if k == K"Integer" + parse_int_literal(val_str) + elseif k == K"Float" + v, code = parse_float_literal(Float64, source.code, first(srcrange), + last(srcrange)+1) + (code === :ok || code === :underflow) ? v : ErrorVal() + elseif k == K"Float32" + v, code = parse_float_literal(Float32, source.code, first(srcrange), + last(srcrange)+1) + (code === :ok || code === :underflow) ? v : ErrorVal() + elseif k in KSet"BinInt OctInt HexInt" + parse_uint_literal(val_str, k) + elseif k == K"true" + true + elseif k == K"false" + false + elseif k == K"Char" + io = IOBuffer() + had_error = unescape_julia_string(io, source.code, first(srcrange), + last(srcrange)+1, Diagnostic[]) + if had_error + ErrorVal() + else + seek(io, 0) + c = read(io, Char) + eof(io) ? c : ErrorVal() + end + elseif k == K"Identifier" + if has_flags(head, RAW_STRING_FLAG) + io = IOBuffer() + unescape_raw_string(io, val_str, false) + Symbol(normalize_identifier(String(take!(io)))) + else + Symbol(normalize_identifier(val_str)) + end + elseif is_keyword(k) + # This should only happen for tokens nested inside errors + Symbol(val_str) + elseif k in KSet"String CmdString" + io = IOBuffer() + had_error = false + if has_flags(head, RAW_STRING_FLAG) + unescape_raw_string(io, val_str, k == K"CmdString") + else + had_error = unescape_julia_string(io, source.code, first(srcrange), + last(srcrange)+1, Diagnostic[]) + end + had_error ? ErrorVal() : String(take!(io)) + elseif is_operator(k) + isempty(srcrange) ? + Symbol(untokenize(k)) : # synthetic invisible tokens + Symbol(normalize_identifier(val_str)) + elseif k == K"error" + ErrorVal() + elseif k == K"MacroName" + Symbol("@$(normalize_identifier(val_str))") + elseif k == K"StringMacroName" + Symbol("@$(normalize_identifier(val_str))_str") + elseif k == K"CmdMacroName" + Symbol("@$(normalize_identifier(val_str))_cmd") + elseif k == K"core_@cmd" + Symbol("core_@cmd") + elseif is_syntax_kind(head) + nothing + else + # FIXME: this allows us to recover from trivia is_error nodes + # that we insert below + ErrorVal() + end +end + diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index 4b701bbd22514..7f1f2cf804d83 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -341,10 +341,6 @@ function token_last_byte(stream::ParseStream, i) stream.tokens[i].next_byte - 1 end -function token_span(stream::ParseStream, i) - stream.tokens[i].next_byte - stream.tokens[i-1].next_byte -end - function lookahead_token_first_byte(stream, i) i == 1 ? _next_byte(stream) : stream.lookahead[i-1].next_byte end @@ -961,24 +957,25 @@ end # API for extracting results from ParseStream """ - build_tree(::Type{NodeType}, stream::ParseStream; + build_tree(make_node::Function, ::Type{StackEntry}, stream::ParseStream; wrap_toplevel_as_kind=nothing, kws...) -Construct a tree with `NodeType` nodes from a ParseStream using depth-first -traversal. `NodeType` must have the constructors +Construct a tree from a ParseStream using depth-first traversal. `make_node` +must have the signature + + make_node(head::SyntaxHead, span::Integer, children) - NodeType(head::SyntaxHead, span::Integer) - NodeType(head::SyntaxHead, span::Integer, children::Vector{NodeType}) +where `children` is either `nothing` for leaf nodes or an iterable of the +children of type `StackEntry` for internal nodes. `StackEntry` may be a node +type, but also may include other information required during building the tree. A single node which covers the input is expected, but if the ParseStream has multiple nodes at the top level, `wrap_toplevel_as_kind` may be used to wrap them in a single node. -The tree here is constructed depth-first, but it would also be possible to use -a bottom-up tree builder interface similar to rust-analyzer. (In that case we'd -traverse the list of ranges backward rather than forward.) +The tree here is constructed depth-first in postorder. """ -function build_tree(::Type{NodeType}, stream::ParseStream; +function build_tree(make_node::Function, ::Type{NodeType}, stream::ParseStream; wrap_toplevel_as_kind=nothing, kws...) where NodeType stack = Vector{NamedTuple{(:first_token,:node),Tuple{Int,NodeType}}}() @@ -996,8 +993,15 @@ function build_tree(::Type{NodeType}, stream::ParseStream; i += 1 continue # Ignore removed tokens end - node = NodeType(head(t), token_span(stream, i)) - push!(stack, (first_token=i, node=node)) + srcrange = (stream.tokens[i-1].next_byte: + stream.tokens[i].next_byte - 1) + h = head(t) + children = (is_syntax_kind(h) || is_keyword(h)) ? + (stack[n].node for n=1:0) : nothing + node = make_node(h, srcrange, children) + if !isnothing(node) + push!(stack, (first_token=i, node=node)) + end i += 1 end if j > lastindex(ranges) @@ -1018,10 +1022,14 @@ function build_tree(::Type{NodeType}, stream::ParseStream; while k > 1 && r.first_token <= stack[k-1].first_token k -= 1 end + srcrange = (stream.tokens[r.first_token-1].next_byte: + stream.tokens[r.last_token].next_byte - 1) children = (stack[n].node for n = k:length(stack)) - node = NodeType(head(r), children) + node = make_node(head(r), srcrange, children) resize!(stack, k-1) - push!(stack, (first_token=r.first_token, node=node)) + if !isnothing(node) + push!(stack, (first_token=r.first_token, node=node)) + end j += 1 end end @@ -1029,14 +1037,16 @@ function build_tree(::Type{NodeType}, stream::ParseStream; return only(stack).node elseif !isnothing(wrap_toplevel_as_kind) # Mostly for debugging + srcrange = (stream.tokens[1].next_byte: + stream.tokens[end].next_byte - 1) children = (x.node for x in stack) - return NodeType(SyntaxHead(wrap_toplevel_as_kind, EMPTY_FLAGS), children) + return make_node(SyntaxHead(wrap_toplevel_as_kind, EMPTY_FLAGS), + srcrange, children) else error("Found multiple nodes at top level") end end - """ sourcetext(stream::ParseStream; steal_textbuf=true) diff --git a/JuliaSyntax/src/precompile.jl b/JuliaSyntax/src/precompile.jl index f6426532dad4c..bd202bb85dac4 100644 --- a/JuliaSyntax/src/precompile.jl +++ b/JuliaSyntax/src/precompile.jl @@ -1,5 +1,5 @@ # Just parse some file as a precompile workload -let filename = joinpath(@__DIR__, "literal_parsing.jl") - text = read(filename, String) - parseall(Expr, text) -end +# let filename = joinpath(@__DIR__, "literal_parsing.jl") +# text = read(filename, String) +# parseall(Expr, text) +# end diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index 4d8a21721e70d..7f87a81a32dcc 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -46,85 +46,9 @@ Base.show(io::IO, ::ErrorVal) = printstyled(io, "✘", color=:light_red) function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, position::Integer=1) if !haschildren(raw) && !(is_syntax_kind(raw) || is_keyword(raw)) - # Leaf node - k = kind(raw) - val_range = position:position + span(raw) - 1 - val_str = view(source, val_range) # Here we parse the values eagerly rather than representing them as # strings. Maybe this is good. Maybe not. - # - # Any errors parsing literals are represented as ErrorVal() - this can - # happen when the user sets `ignore_errors=true` during parsing. - val = if k == K"Integer" - parse_int_literal(val_str) - elseif k == K"Float" - v, code = parse_float_literal(Float64, source.code, position, - position+span(raw)) - (code === :ok || code === :underflow) ? v : ErrorVal() - elseif k == K"Float32" - v, code = parse_float_literal(Float32, source.code, position, - position+span(raw)) - (code === :ok || code === :underflow) ? v : ErrorVal() - elseif k in KSet"BinInt OctInt HexInt" - parse_uint_literal(val_str, k) - elseif k == K"true" - true - elseif k == K"false" - false - elseif k == K"Char" - io = IOBuffer() - had_error = unescape_julia_string(io, source.code, position, - position+span(raw), Diagnostic[]) - if had_error - ErrorVal() - else - seek(io, 0) - c = read(io, Char) - eof(io) ? c : ErrorVal() - end - elseif k == K"Identifier" - if has_flags(head(raw), RAW_STRING_FLAG) - io = IOBuffer() - unescape_raw_string(io, val_str, false) - Symbol(normalize_identifier(String(take!(io)))) - else - Symbol(normalize_identifier(val_str)) - end - elseif is_keyword(k) - # This should only happen for tokens nested inside errors - Symbol(val_str) - elseif k in KSet"String CmdString" - io = IOBuffer() - had_error = false - if has_flags(head(raw), RAW_STRING_FLAG) - unescape_raw_string(io, val_str, k == K"CmdString") - else - had_error = unescape_julia_string(io, source.code, position, - position+span(raw), Diagnostic[]) - end - had_error ? ErrorVal() : String(take!(io)) - elseif is_operator(k) - isempty(val_range) ? - Symbol(untokenize(k)) : # synthetic invisible tokens - Symbol(normalize_identifier(val_str)) - elseif k == K"error" - ErrorVal() - elseif k == K"MacroName" - Symbol("@$(normalize_identifier(val_str))") - elseif k == K"StringMacroName" - Symbol("@$(normalize_identifier(val_str))_str") - elseif k == K"CmdMacroName" - Symbol("@$(normalize_identifier(val_str))_cmd") - elseif k == K"core_@cmd" - Symbol("core_@cmd") - elseif is_syntax_kind(raw) - nothing - else - # FIXME: this allows us to recover from trivia is_error nodes - # that we insert below - @debug "Leaf node of kind $k unknown to SyntaxNode" - ErrorVal() - end + val = parse_julia_literal(source, head(raw), position:position + span(raw) - 1) return SyntaxNode(nothing, nothing, SyntaxData(source, raw, position, val)) else cs = SyntaxNode[] diff --git a/JuliaSyntax/test/benchmark.jl b/JuliaSyntax/test/benchmark.jl index 0cae3e0714687..1e5183061b8fe 100644 --- a/JuliaSyntax/test/benchmark.jl +++ b/JuliaSyntax/test/benchmark.jl @@ -16,9 +16,9 @@ end all_base_code = concat_base() b_ParseStream = @benchmark JuliaSyntax.parse!(JuliaSyntax.ParseStream(all_base_code), rule=:all) -b_GreenNode = @benchmark JuliaSyntax.parseall(JuliaSyntax.GreenNode, all_base_code) -b_SyntaxNode = @benchmark JuliaSyntax.parseall(JuliaSyntax.SyntaxNode, all_base_code) -b_Expr = @benchmark JuliaSyntax.parseall(Expr, all_base_code) +b_GreenNode = @benchmark JuliaSyntax.parseall(JuliaSyntax.GreenNode, all_base_code, ignore_warnings=true) +b_SyntaxNode = @benchmark JuliaSyntax.parseall(JuliaSyntax.SyntaxNode, all_base_code, ignore_warnings=true) +b_Expr = @benchmark JuliaSyntax.parseall(Expr, all_base_code, ignore_warnings=true) @info "Benchmarks" ParseStream=b_ParseStream GreenNode=b_GreenNode SyntaxNode=b_SyntaxNode Expr=b_Expr diff --git a/JuliaSyntax/test/expr.jl b/JuliaSyntax/test/expr.jl index 711c47535e101..4cdc23c66fd4c 100644 --- a/JuliaSyntax/test/expr.jl +++ b/JuliaSyntax/test/expr.jl @@ -1,16 +1,30 @@ -@testset "Expr conversion" begin +@testset "Expr parsing with $method" for method in ["build_tree", "SyntaxNode conversion"] + parseatom, parsestmt, parseall = + if method == "build_tree" + ((s; kws...) -> JuliaSyntax.parseatom(Expr, s; kws...), + (s; kws...) -> JuliaSyntax.parsestmt(Expr, s; kws...), + (s; kws...) -> JuliaSyntax.parseall(Expr, s; kws...)) + else + ((s; kws...) -> Expr(JuliaSyntax.parseatom(SyntaxNode, s; kws...)), + (s; kws...) -> Expr(JuliaSyntax.parsestmt(SyntaxNode, s; kws...)), + (s; kws...) -> Expr(JuliaSyntax.parseall(SyntaxNode, s; kws...))) + end + @testset "Quote nodes" begin - @test parseatom(Expr, ":(a)") == QuoteNode(:a) - @test parseatom(Expr, ":(:a)") == Expr(:quote, QuoteNode(:a)) - @test parseatom(Expr, ":(1+2)") == Expr(:quote, Expr(:call, :+, 1, 2)) + @test parseatom(":(a)") == QuoteNode(:a) + @test parseatom(":(:a)") == Expr(:quote, QuoteNode(:a)) + @test parseatom(":(1+2)") == Expr(:quote, Expr(:call, :+, 1, 2)) # Compatibility hack for VERSION >= v"1.4" # https://github.com/JuliaLang/julia/pull/34077 - @test parseatom(Expr, ":true") == Expr(:quote, true) + @test parseatom(":true") == Expr(:quote, true) + + # Handling of K"inert" + @test parsestmt("a.\$b") == Expr(:., :a, QuoteNode(Expr(:$, :b))) end @testset "Line numbers" begin @testset "Blocks" begin - @test parsestmt(Expr, "begin a\nb\n\nc\nend") == + @test parsestmt("begin a\nb\n\nc\nend") == Expr(:block, LineNumberNode(1), :a, @@ -19,12 +33,12 @@ LineNumberNode(4), :c, ) - @test parsestmt(Expr, "begin end") == + @test parsestmt("begin end") == Expr(:block, LineNumberNode(1) ) - @test parseall(Expr, "a\n\nb") == + @test parseall("a\n\nb") == Expr(:toplevel, LineNumberNode(1), :a, @@ -32,7 +46,7 @@ :b, ) - @test parsestmt(Expr, "module A\n\nbody\nend") == + @test parsestmt("module A\n\nbody\nend") == Expr(:module, true, :A, @@ -45,7 +59,7 @@ end @testset "Function definition lines" begin - @test parsestmt(Expr, "function f()\na\n\nb\nend") == + @test parsestmt("function f()\na\n\nb\nend") == Expr(:function, Expr(:call, :f), Expr(:block, @@ -56,7 +70,7 @@ :b, ) ) - @test parsestmt(Expr, "f() = 1") == + @test parsestmt("f() = 1") == Expr(:(=), Expr(:call, :f), Expr(:block, @@ -64,24 +78,45 @@ 1 ) ) + @test parsestmt("macro f()\na\nend") == + Expr(:macro, + Expr(:call, :f), + Expr(:block, + LineNumberNode(1), + LineNumberNode(2), + :a, + ) + ) # function/macro without methods - @test parsestmt(Expr, "function f end") == + @test parsestmt("function f end") == Expr(:function, :f) - @test parsestmt(Expr, "macro f end") == + @test parsestmt("macro f end") == Expr(:macro, :f) # weird cases with extra parens - @test parsestmt(Expr, "function (f() where T) end") == + @test parsestmt("function (f() where T) end") == Expr(:function, Expr(:where, Expr(:call, :f), :T), Expr(:block, LineNumberNode(1), LineNumberNode(1))) - @test parsestmt(Expr, "function (f()::S) end") == + @test parsestmt("function (f()::S) end") == Expr(:function, Expr(:(::), Expr(:call, :f), :S), Expr(:block, LineNumberNode(1), LineNumberNode(1))) end + @testset "->" begin + @test parsestmt("a -> b") == + Expr(:->, :a, Expr(:block, LineNumberNode(1), :b)) + # @test parsestmt("a -> (\nb;c)") == + # Expr(:->, :a, Expr(:block, LineNumberNode(1), :b)) + @test parsestmt("a -> begin\nb\nc\nend") == + Expr(:->, :a, Expr(:block, + LineNumberNode(1), + LineNumberNode(2), :b, + LineNumberNode(3), :c)) + end + @testset "elseif" begin - @test parsestmt(Expr, "if a\nb\nelseif c\n d\nend") == + @test parsestmt("if a\nb\nelseif c\n d\nend") == Expr(:if, :a, Expr(:block, @@ -99,7 +134,7 @@ end @testset "No line numbers in let bindings" begin - @test parsestmt(Expr, "let i=is, j=js\nbody\nend") == + @test parsestmt("let i=is, j=js\nbody\nend") == Expr(:let, Expr(:block, Expr(:(=), :i, :is), @@ -115,7 +150,7 @@ @testset "Short form function line numbers" begin # A block is added to hold the line number node - @test parsestmt(Expr, "f() = xs") == + @test parsestmt("f() = xs") == Expr(:(=), Expr(:call, :f), Expr(:block, @@ -123,7 +158,7 @@ :xs)) # flisp parser quirk: In a for loop the block is not added, despite # this defining a short-form function. - @test parsestmt(Expr, "for f() = xs\nend") == + @test parsestmt("for f() = xs\nend") == Expr(:for, Expr(:(=), Expr(:call, :f), :xs), Expr(:block, @@ -132,7 +167,7 @@ end @testset "for" begin - @test parsestmt(Expr, "for i=is body end") == + @test parsestmt("for i=is body end") == Expr(:for, Expr(:(=), :i, :is), Expr(:block, @@ -140,7 +175,7 @@ :body ) ) - @test parsestmt(Expr, "for i=is, j=js\nbody\nend") == + @test parsestmt("for i=is, j=js\nbody\nend") == Expr(:for, Expr(:block, Expr(:(=), :i, :is), @@ -154,7 +189,7 @@ end @testset "Long form anonymous functions" begin - @test parsestmt(Expr, "function (xs...)\nbody end") == + @test parsestmt("function (xs...)\nbody end") == Expr(:function, Expr(:..., :xs), Expr(:block, @@ -165,25 +200,25 @@ @testset "String conversions" begin # String unwrapping / wrapping - @test parsestmt(Expr, "\"str\"") == "str" - @test parsestmt(Expr, "\"\$(\"str\")\"") == + @test parsestmt("\"str\"") == "str" + @test parsestmt("\"\$(\"str\")\"") == Expr(:string, Expr(:string, "str")) # Concatenation of string chunks in triple quoted cases - @test parsestmt(Expr, "```\n a\n b```") == + @test parsestmt("```\n a\n b```") == Expr(:macrocall, GlobalRef(Core, Symbol("@cmd")), LineNumberNode(1), "a\nb") - @test parsestmt(Expr, "\"\"\"\n a\n \$x\n b\n c\"\"\"") == + @test parsestmt("\"\"\"\n a\n \$x\n b\n c\"\"\"") == Expr(:string, "a\n", :x, "\nb\nc") end @testset "Char conversions" begin - @test parsestmt(Expr, "'a'") == 'a' - @test parsestmt(Expr, "'α'") == 'α' - @test parsestmt(Expr, "'\\xce\\xb1'") == 'α' + @test parsestmt("'a'") == 'a' + @test parsestmt("'α'") == 'α' + @test parsestmt("'\\xce\\xb1'") == 'α' end @testset "do block conversion" begin - @test parsestmt(Expr, "f(x) do y\n body end") == + @test parsestmt("f(x) do y\n body end") == Expr(:do, Expr(:call, :f, :x), Expr(:->, Expr(:tuple, :y), Expr(:block, @@ -193,41 +228,41 @@ @testset "= to Expr(:kw) conversion" begin # Call - @test parsestmt(Expr, "f(a=1)") == + @test parsestmt("f(a=1)") == Expr(:call, :f, Expr(:kw, :a, 1)) - @test parsestmt(Expr, "f(; b=2)") == + @test parsestmt("f(; b=2)") == Expr(:call, :f, Expr(:parameters, Expr(:kw, :b, 2))) - @test parsestmt(Expr, "f(a=1; b=2)") == + @test parsestmt("f(a=1; b=2)") == Expr(:call, :f, Expr(:parameters, Expr(:kw, :b, 2)), Expr(:kw, :a, 1)) - @test parsestmt(Expr, "f(a; b; c)") == + @test parsestmt("f(a; b; c)") == Expr(:call, :f, Expr(:parameters, Expr(:parameters, :c), :b), :a) - @test parsestmt(Expr, "+(a=1,)") == + @test parsestmt("+(a=1,)") == Expr(:call, :+, Expr(:kw, :a, 1)) - @test parsestmt(Expr, "(a=1)()") == + @test parsestmt("(a=1)()") == Expr(:call, Expr(:(=), :a, 1)) # Operator calls: = is not :kw - @test parsestmt(Expr, "(x=1) != 2") == + @test parsestmt("(x=1) != 2") == Expr(:call, :!=, Expr(:(=), :x, 1), 2) - @test parsestmt(Expr, "+(a=1)") == + @test parsestmt("+(a=1)") == Expr(:call, :+, Expr(:(=), :a, 1)) - @test parsestmt(Expr, "(a=1)'") == + @test parsestmt("(a=1)'") == Expr(Symbol("'"), Expr(:(=), :a, 1)) - @test parsestmt(Expr, "(a=1)'ᵀ") == + @test parsestmt("(a=1)'ᵀ") == Expr(:call, Symbol("'ᵀ"), Expr(:(=), :a, 1)) # Dotcall - @test parsestmt(Expr, "f.(a=1; b=2)") == + @test parsestmt("f.(a=1; b=2)") == Expr(:., :f, Expr(:tuple, Expr(:parameters, Expr(:kw, :b, 2)), Expr(:kw, :a, 1))) # Named tuples - @test parsestmt(Expr, "(a=1,)") == + @test parsestmt("(a=1,)") == Expr(:tuple, Expr(:(=), :a, 1)) - @test parsestmt(Expr, "(a=1,; b=2)") == + @test parsestmt("(a=1,; b=2)") == Expr(:tuple, Expr(:parameters, Expr(:kw, :b, 2)), Expr(:(=), :a, 1)) - @test parsestmt(Expr, "(a=1,; b=2; c=3)") == + @test parsestmt("(a=1,; b=2; c=3)") == Expr(:tuple, Expr(:parameters, Expr(:parameters, Expr(:kw, :c, 3)), @@ -235,161 +270,183 @@ Expr(:(=), :a, 1)) # ref - @test parsestmt(Expr, "x[i=j]") == + @test parsestmt("x[i=j]") == Expr(:ref, :x, Expr(:kw, :i, :j)) - @test parsestmt(Expr, "(i=j)[x]") == + @test parsestmt("(i=j)[x]") == Expr(:ref, Expr(:(=), :i, :j), :x) - @test parsestmt(Expr, "x[a, b; i=j]") == + @test parsestmt("x[a, b; i=j]") == Expr(:ref, :x, Expr(:parameters, Expr(:(=), :i, :j)), :a, :b) # curly - @test parsestmt(Expr, "(i=j){x}") == + @test parsestmt("(i=j){x}") == Expr(:curly, Expr(:(=), :i, :j), :x) - @test parsestmt(Expr, "x{a, b; i=j}") == + @test parsestmt("x{a, b; i=j}") == Expr(:curly, :x, Expr(:parameters, Expr(:(=), :i, :j)), :a, :b) # vect - @test parsestmt(Expr, "[a=1,; b=2]") == + @test parsestmt("[a=1,; b=2]") == Expr(:vect, Expr(:parameters, Expr(:(=), :b, 2)), Expr(:(=), :a, 1)) # braces - @test parsestmt(Expr, "{a=1,; b=2}") == + @test parsestmt("{a=1,; b=2}") == Expr(:braces, Expr(:parameters, Expr(:(=), :b, 2)), Expr(:(=), :a, 1)) # dotted = is not :kw - @test parsestmt(Expr, "f(a .= 1)") == + @test parsestmt("f(a .= 1)") == Expr(:call, :f, Expr(:.=, :a, 1)) # = inside parens in calls and tuples - @test parsestmt(Expr, "f(((a = 1)))") == + @test parsestmt("f(((a = 1)))") == Expr(:call, :f, Expr(:kw, :a, 1)) - @test parsestmt(Expr, "(((a = 1)),)") == + @test parsestmt("(((a = 1)),)") == Expr(:tuple, Expr(:(=), :a, 1)) - @test parsestmt(Expr, "(;((a = 1)),)") == + @test parsestmt("(;((a = 1)),)") == Expr(:tuple, Expr(:parameters, Expr(:kw, :a, 1))) end @testset "dotcall / dotted operators" begin - @test parsestmt(Expr, "f.(x,y)") == Expr(:., :f, Expr(:tuple, :x, :y)) - @test parsestmt(Expr, "f.(x=1)") == Expr(:., :f, Expr(:tuple, Expr(:kw, :x, 1))) - @test parsestmt(Expr, "f.(a=1; b=2)") == + @test parsestmt("f.(x,y)") == Expr(:., :f, Expr(:tuple, :x, :y)) + @test parsestmt("f.(x=1)") == Expr(:., :f, Expr(:tuple, Expr(:kw, :x, 1))) + @test parsestmt("f.(a=1; b=2)") == Expr(:., :f, Expr(:tuple, Expr(:parameters, Expr(:kw, :b, 2)), Expr(:kw, :a, 1))) - @test parsestmt(Expr, "(a=1).()") == Expr(:., Expr(:(=), :a, 1), Expr(:tuple)) - @test parsestmt(Expr, "x .+ y") == Expr(:call, Symbol(".+"), :x, :y) - @test parsestmt(Expr, "(x=1) .+ y") == Expr(:call, Symbol(".+"), Expr(:(=), :x, 1), :y) - @test parsestmt(Expr, "a .< b .< c") == Expr(:comparison, :a, Symbol(".<"), - :b, Symbol(".<"), :c) - @test parsestmt(Expr, ".*(x)") == Expr(:call, Symbol(".*"), :x) - @test parsestmt(Expr, ".+(x)") == Expr(:call, Symbol(".+"), :x) - @test parsestmt(Expr, ".+x") == Expr(:call, Symbol(".+"), :x) - @test parsestmt(Expr, "(.+)(x)") == Expr(:call, Expr(:., :+), :x) - @test parsestmt(Expr, "(.+).(x)") == Expr(:., Expr(:., :+), Expr(:tuple, :x)) - - @test parsestmt(Expr, ".+") == Expr(:., :+) - @test parsestmt(Expr, ":.+") == QuoteNode(Symbol(".+")) - @test parsestmt(Expr, ":(.+)") == Expr(:quote, (Expr(:., :+))) - @test parsestmt(Expr, "quote .+ end") == Expr(:quote, + @test parsestmt("(a=1).()") == Expr(:., Expr(:(=), :a, 1), Expr(:tuple)) + @test parsestmt("x .+ y") == Expr(:call, Symbol(".+"), :x, :y) + @test parsestmt("(x=1) .+ y") == Expr(:call, Symbol(".+"), Expr(:(=), :x, 1), :y) + @test parsestmt("a .< b .< c") == Expr(:comparison, :a, Symbol(".<"), + :b, Symbol(".<"), :c) + @test parsestmt("a .< (.<) .< c") == Expr(:comparison, :a, Symbol(".<"), + Expr(:., :<), Symbol(".<"), :c) + @test parsestmt(".*(x)") == Expr(:call, Symbol(".*"), :x) + @test parsestmt(".+(x)") == Expr(:call, Symbol(".+"), :x) + @test parsestmt(".+x") == Expr(:call, Symbol(".+"), :x) + @test parsestmt("(.+)(x)") == Expr(:call, Expr(:., :+), :x) + @test parsestmt("(.+).(x)") == Expr(:., Expr(:., :+), Expr(:tuple, :x)) + + @test parsestmt(".+") == Expr(:., :+) + @test parsestmt(":.+") == QuoteNode(Symbol(".+")) + @test parsestmt(":(.+)") == Expr(:quote, (Expr(:., :+))) + @test parsestmt("quote .+ end") == Expr(:quote, Expr(:block, LineNumberNode(1), Expr(:., :+))) - @test parsestmt(Expr, ".+{x}") == Expr(:curly, Symbol(".+"), :x) + @test parsestmt(".+{x}") == Expr(:curly, Symbol(".+"), :x) # Quoted syntactic ops act different when in parens - @test parsestmt(Expr, ":.=") == QuoteNode(Symbol(".=")) - @test parsestmt(Expr, ":(.=)") == QuoteNode(Symbol(".=")) + @test parsestmt(":.=") == QuoteNode(Symbol(".=")) + @test parsestmt(":(.=)") == QuoteNode(Symbol(".=")) # A few other cases of bare dotted ops - @test parsestmt(Expr, "f(.+)") == Expr(:call, :f, Expr(:., :+)) - @test parsestmt(Expr, "(a, .+)") == Expr(:tuple, :a, Expr(:., :+)) - @test parsestmt(Expr, "A.:.+") == Expr(:., :A, QuoteNode(Symbol(".+"))) + @test parsestmt("f(.+)") == Expr(:call, :f, Expr(:., :+)) + @test parsestmt("(a, .+)") == Expr(:tuple, :a, Expr(:., :+)) + @test parsestmt("A.:.+") == Expr(:., :A, QuoteNode(Symbol(".+"))) end @testset "let" begin - @test parsestmt(Expr, "let x=1\n end") == + @test parsestmt("let x=1\n end") == Expr(:let, Expr(:(=), :x, 1), Expr(:block, LineNumberNode(2))) - @test parsestmt(Expr, "let x=1 ; end") == + @test parsestmt("let x=1 ; end") == Expr(:let, Expr(:(=), :x, 1), Expr(:block, LineNumberNode(1))) - @test parsestmt(Expr, "let x ; end") == + @test parsestmt("let x ; end") == Expr(:let, :x, Expr(:block, LineNumberNode(1))) - @test parsestmt(Expr, "let x::1 ; end") == + @test parsestmt("let x::1 ; end") == Expr(:let, Expr(:(::), :x, 1), Expr(:block, LineNumberNode(1))) - @test parsestmt(Expr, "let x=1,y=2 end") == + @test parsestmt("let x=1,y=2 end") == Expr(:let, Expr(:block, Expr(:(=), :x, 1), Expr(:(=), :y, 2)), Expr(:block, LineNumberNode(1))) - @test parsestmt(Expr, "let x+=1 ; end") == + @test parsestmt("let x+=1 ; end") == Expr(:let, Expr(:block, Expr(:+=, :x, 1)), Expr(:block, LineNumberNode(1))) - @test parsestmt(Expr, "let ; end") == + @test parsestmt("let ; end") == Expr(:let, Expr(:block), Expr(:block, LineNumberNode(1))) - @test parsestmt(Expr, "let ; body end") == + @test parsestmt("let ; body end") == Expr(:let, Expr(:block), Expr(:block, LineNumberNode(1), :body)) - @test parsestmt(Expr, "let\na\nb\nend") == + @test parsestmt("let\na\nb\nend") == Expr(:let, Expr(:block), Expr(:block, LineNumberNode(2), :a, LineNumberNode(3), :b)) end @testset "where" begin - @test parsestmt(Expr, "A where T") == Expr(:where, :A, :T) - @test parsestmt(Expr, "A where {T}") == Expr(:where, :A, :T) - @test parsestmt(Expr, "A where {S, T}") == Expr(:where, :A, :S, :T) - @test parsestmt(Expr, "A where {X, Y; Z}") == Expr(:where, :A, Expr(:parameters, :Z), :X, :Y) + @test parsestmt("A where T") == Expr(:where, :A, :T) + @test parsestmt("A where {T}") == Expr(:where, :A, :T) + @test parsestmt("A where {S, T}") == Expr(:where, :A, :S, :T) + @test parsestmt("A where {X, Y; Z}") == Expr(:where, :A, Expr(:parameters, :Z), :X, :Y) end @testset "macrocall" begin # line numbers - @test parsestmt(Expr, "@m\n") == Expr(:macrocall, Symbol("@m"), LineNumberNode(1)) - @test parsestmt(Expr, "\n@m") == Expr(:macrocall, Symbol("@m"), LineNumberNode(2)) + @test parsestmt("@m\n") == Expr(:macrocall, Symbol("@m"), LineNumberNode(1)) + @test parsestmt("\n@m") == Expr(:macrocall, Symbol("@m"), LineNumberNode(2)) # parameters - @test parsestmt(Expr, "@m(x; a)") == Expr(:macrocall, Symbol("@m"), LineNumberNode(1), + @test parsestmt("@m(x; a)") == Expr(:macrocall, Symbol("@m"), LineNumberNode(1), Expr(:parameters, :a), :x) - @test parsestmt(Expr, "@m(a=1; b=2)") == Expr(:macrocall, Symbol("@m"), LineNumberNode(1), + @test parsestmt("@m(a=1; b=2)") == Expr(:macrocall, Symbol("@m"), LineNumberNode(1), Expr(:parameters, Expr(:kw, :b, 2)), Expr(:(=), :a, 1)) # @__dot__ - @test parsestmt(Expr, "@.") == Expr(:macrocall, Symbol("@__dot__"), LineNumberNode(1)) - @test parsestmt(Expr, "using A: @.") == Expr(:using, Expr(Symbol(":"), Expr(:., :A), Expr(:., Symbol("@__dot__")))) + @test parsestmt("@.") == Expr(:macrocall, Symbol("@__dot__"), LineNumberNode(1)) + @test parsestmt("using A: @.") == Expr(:using, Expr(Symbol(":"), Expr(:., :A), Expr(:., Symbol("@__dot__")))) # var"" - @test parsestmt(Expr, "@var\"#\" a") == Expr(:macrocall, Symbol("@#"), LineNumberNode(1), :a) - @test parsestmt(Expr, "A.@var\"#\" a") == Expr(:macrocall, Expr(:., :A, QuoteNode(Symbol("@#"))), LineNumberNode(1), :a) + @test parsestmt("@var\"#\" a") == Expr(:macrocall, Symbol("@#"), LineNumberNode(1), :a) + @test parsestmt("A.@var\"#\" a") == Expr(:macrocall, Expr(:., :A, QuoteNode(Symbol("@#"))), LineNumberNode(1), :a) # Square brackets - @test parsestmt(Expr, "@S[a,b]") == + @test parsestmt("@S[a,b]") == Expr(:macrocall, Symbol("@S"), LineNumberNode(1), Expr(:vect, :a, :b)) - @test parsestmt(Expr, "@S[a b]") == + @test parsestmt("@S[a b]") == Expr(:macrocall, Symbol("@S"), LineNumberNode(1), Expr(:hcat, :a, :b)) - @test parsestmt(Expr, "@S[a; b]") == + @test parsestmt("@S[a; b]") == Expr(:macrocall, Symbol("@S"), LineNumberNode(1), Expr(:vcat, :a, :b)) - @test parsestmt(Expr, "@S[a ;; b]", version=v"1.7") == + @test parsestmt("@S[a ;; b]", version=v"1.7") == Expr(:macrocall, Symbol("@S"), LineNumberNode(1), Expr(:ncat, 2, :a, :b)) end @testset "vect" begin - @test parsestmt(Expr, "[x,y ; z]") == Expr(:vect, Expr(:parameters, :z), :x, :y) + @test parsestmt("[x,y ; z]") == Expr(:vect, Expr(:parameters, :z), :x, :y) + end + + @testset "concatenation" begin + @test parsestmt("[a ;;; b ;;;; c]", version=v"1.7") == + Expr(:ncat, 4, Expr(:nrow, 3, :a, :b), :c) + @test parsestmt("[a b ; c d]") == + Expr(:vcat, Expr(:row, :a, :b), Expr(:row, :c, :d)) + @test parsestmt("[a\nb]") == Expr(:vcat, :a, :b) + @test parsestmt("[a b]") == Expr(:hcat, :a, :b) + @test parsestmt("[a b ; c d]") == + Expr(:vcat, Expr(:row, :a, :b), Expr(:row, :c, :d)) + + @test parsestmt("T[a ;;; b ;;;; c]", version=v"1.7") == + Expr(:typed_ncat, :T, 4, Expr(:nrow, 3, :a, :b), :c) + @test parsestmt("T[a b ; c d]") == + Expr(:typed_vcat, :T, Expr(:row, :a, :b), Expr(:row, :c, :d)) + @test parsestmt("T[a\nb]") == Expr(:typed_vcat, :T, :a, :b) + @test parsestmt("T[a b]") == Expr(:typed_hcat, :T, :a, :b) + @test parsestmt("T[a b ; c d]") == + Expr(:typed_vcat, :T, Expr(:row, :a, :b), Expr(:row, :c, :d)) end @testset "generators" begin - @test parsestmt(Expr, "(x for a in as for b in bs)") == + @test parsestmt("(x for a in as for b in bs)") == Expr(:flatten, Expr(:generator, Expr(:generator, :x, Expr(:(=), :b, :bs)), Expr(:(=), :a, :as))) - @test parsestmt(Expr, "(x for a in as, b in bs)") == + @test parsestmt("(x for a in as, b in bs)") == Expr(:generator, :x, Expr(:(=), :a, :as), Expr(:(=), :b, :bs)) - @test parsestmt(Expr, "(x for a in as, b in bs if z)") == + @test parsestmt("(x for a in as, b in bs if z)") == Expr(:generator, :x, Expr(:filter, :z, Expr(:(=), :a, :as), Expr(:(=), :b, :bs))) - @test parsestmt(Expr, "(x for a in as, b in bs for c in cs, d in ds)") == + @test parsestmt("(x for a in as, b in bs for c in cs, d in ds)") == Expr(:flatten, Expr(:generator, Expr(:generator, :x, Expr(:(=), :c, :cs), Expr(:(=), :d, :ds)), Expr(:(=), :a, :as), Expr(:(=), :b, :bs))) - @test parsestmt(Expr, "(x for a in as for b in bs if z)") == + @test parsestmt("(x for a in as for b in bs if z)") == Expr(:flatten, Expr(:generator, Expr(:generator, :x, Expr(:filter, :z, Expr(:(=), :b, :bs))), Expr(:(=), :a, :as))) - @test parsestmt(Expr, "(x for a in as if z for b in bs)") == + @test parsestmt("(x for a in as if z for b in bs)") == Expr(:flatten, Expr(:generator, Expr(:generator, :x, Expr(:(=), :b, :bs)), Expr(:filter, :z, Expr(:(=), :a, :as)))) - @test parsestmt(Expr, "[x for a = as for b = bs if cond1 for c = cs if cond2]" ) == + @test parsestmt("[x for a = as for b = bs if cond1 for c = cs if cond2]" ) == Expr(:comprehension, Expr(:flatten, Expr(:generator, @@ -404,41 +461,41 @@ :cond1, Expr(:(=), :b, :bs)))), Expr(:(=), :a, :as)))) - @test parsestmt(Expr, "[x for a = as if begin cond2 end]" ) == + @test parsestmt("[x for a = as if begin cond2 end]" ) == Expr(:comprehension, Expr(:generator, :x, Expr(:filter, Expr(:block, LineNumberNode(1), :cond2), Expr(:(=), :a, :as)))) - @test parsestmt(Expr, "(x for a in as if z)") == + @test parsestmt("(x for a in as if z)") == Expr(:generator, :x, Expr(:filter, :z, Expr(:(=), :a, :as))) end @testset "try" begin - @test parsestmt(Expr, "try x catch e; y end") == + @test parsestmt("try x catch e; y end") == Expr(:try, Expr(:block, LineNumberNode(1), :x), :e, Expr(:block, LineNumberNode(1), :y)) - @test parsestmt(Expr, "try x finally y end") == + @test parsestmt("try x finally y end") == Expr(:try, Expr(:block, LineNumberNode(1), :x), false, false, Expr(:block, LineNumberNode(1), :y)) - @test parsestmt(Expr, "try x catch e; y finally z end") == + @test parsestmt("try x catch e; y finally z end") == Expr(:try, Expr(:block, LineNumberNode(1), :x), :e, Expr(:block, LineNumberNode(1), :y), Expr(:block, LineNumberNode(1), :z)) - @test parsestmt(Expr, "try x catch e; y else z end", version=v"1.8") == + @test parsestmt("try x catch e; y else z end", version=v"1.8") == Expr(:try, Expr(:block, LineNumberNode(1), :x), :e, Expr(:block, LineNumberNode(1), :y), false, Expr(:block, LineNumberNode(1), :z)) - @test parsestmt(Expr, "try x catch e; y else z finally w end", version=v"1.8") == + @test parsestmt("try x catch e; y else z finally w end", version=v"1.8") == Expr(:try, Expr(:block, LineNumberNode(1), :x), :e, @@ -446,14 +503,14 @@ Expr(:block, LineNumberNode(1), :w), Expr(:block, LineNumberNode(1), :z)) # finally before catch - @test parsestmt(Expr, "try x finally y catch e z end", ignore_warnings=true) == + @test parsestmt("try x finally y catch e z end", ignore_warnings=true) == Expr(:try, Expr(:block, LineNumberNode(1), :x), :e, Expr(:block, LineNumberNode(1), :z), Expr(:block, LineNumberNode(1), :y)) # empty recovery - @test parsestmt(Expr, "try x end", ignore_errors=true) == + @test parsestmt("try x end", ignore_errors=true) == Expr(:try, Expr(:block, LineNumberNode(1), :x), false, false, @@ -461,100 +518,99 @@ end @testset "juxtapose" begin - @test parsestmt(Expr, "2x") == Expr(:call, :*, 2, :x) - @test parsestmt(Expr, "(2)(3)x") == Expr(:call, :*, 2, 3, :x) + @test parsestmt("2x") == Expr(:call, :*, 2, :x) + @test parsestmt("(2)(3)x") == Expr(:call, :*, 2, 3, :x) end @testset "Core.@doc" begin - @test parsestmt(Expr, "\"x\" f") == + @test parsestmt("\"x\" f") == Expr(:macrocall, GlobalRef(Core, Symbol("@doc")), LineNumberNode(1), "x", :f) - @test parsestmt(Expr, "\n\"x\" f") == + @test parsestmt("\n\"x\" f") == Expr(:macrocall, GlobalRef(Core, Symbol("@doc")), LineNumberNode(2), "x", :f) end @testset "return" begin - @test parsestmt(Expr, "return x") == Expr(:return, :x) - @test parsestmt(Expr, "return") == Expr(:return, nothing) + @test parsestmt("return x") == Expr(:return, :x) + @test parsestmt("return") == Expr(:return, nothing) end @testset "struct" begin - @test parsestmt(Expr, "struct A end") == + @test parsestmt("struct A end") == Expr(:struct, false, :A, Expr(:block, LineNumberNode(1))) - @test parsestmt(Expr, "mutable struct A end") == + @test parsestmt("mutable struct A end") == Expr(:struct, true, :A, Expr(:block, LineNumberNode(1))) - @test parsestmt(Expr, "struct A <: B \n a::X \n end") == + @test parsestmt("struct A <: B \n a::X \n end") == Expr(:struct, false, Expr(:<:, :A, :B), Expr(:block, LineNumberNode(2), Expr(:(::), :a, :X))) - @test parsestmt(Expr, "struct A \n a \n b \n end") == + @test parsestmt("struct A \n a \n b \n end") == Expr(:struct, false, :A, Expr(:block, LineNumberNode(2), :a, LineNumberNode(3), :b)) - @test parsestmt(Expr, "struct A const a end", version=v"1.8") == + @test parsestmt("struct A const a end", version=v"1.8") == Expr(:struct, false, :A, Expr(:block, LineNumberNode(1), Expr(:const, :a))) end @testset "export" begin - @test parsestmt(Expr, "export a") == Expr(:export, :a) - @test parsestmt(Expr, "export @a") == Expr(:export, Symbol("@a")) - @test parsestmt(Expr, "export @var\"'\"") == Expr(:export, Symbol("@'")) - @test parsestmt(Expr, "export a, \n @b") == Expr(:export, :a, Symbol("@b")) - @test parsestmt(Expr, "export +, ==") == Expr(:export, :+, :(==)) - @test parsestmt(Expr, "export \n a") == Expr(:export, :a) + @test parsestmt("export a") == Expr(:export, :a) + @test parsestmt("export @a") == Expr(:export, Symbol("@a")) + @test parsestmt("export @var\"'\"") == Expr(:export, Symbol("@'")) + @test parsestmt("export a, \n @b") == Expr(:export, :a, Symbol("@b")) + @test parsestmt("export +, ==") == Expr(:export, :+, :(==)) + @test parsestmt("export \n a") == Expr(:export, :a) end @testset "global/const/local" begin - @test parsestmt(Expr, "global x") == Expr(:global, :x) - @test parsestmt(Expr, "local x") == Expr(:local, :x) - @test parsestmt(Expr, "global x,y") == Expr(:global, :x, :y) - @test parsestmt(Expr, "global const x = 1") == Expr(:const, Expr(:global, Expr(:(=), :x, 1))) - @test parsestmt(Expr, "local const x = 1") == Expr(:const, Expr(:local, Expr(:(=), :x, 1))) - @test parsestmt(Expr, "const global x = 1") == Expr(:const, Expr(:global, Expr(:(=), :x, 1))) - @test parsestmt(Expr, "const local x = 1") == Expr(:const, Expr(:local, Expr(:(=), :x, 1))) - @test parsestmt(Expr, "const x,y = 1,2") == Expr(:const, Expr(:(=), Expr(:tuple, :x, :y), Expr(:tuple, 1, 2))) - @test parsestmt(Expr, "const x = 1") == Expr(:const, Expr(:(=), :x, 1)) - @test parsestmt(Expr, "global x ~ 1") == Expr(:global, Expr(:call, :~, :x, 1)) - @test parsestmt(Expr, "global x += 1") == Expr(:global, Expr(:+=, :x, 1)) + @test parsestmt("global x") == Expr(:global, :x) + @test parsestmt("local x") == Expr(:local, :x) + @test parsestmt("global x,y") == Expr(:global, :x, :y) + @test parsestmt("global const x = 1") == Expr(:const, Expr(:global, Expr(:(=), :x, 1))) + @test parsestmt("local const x = 1") == Expr(:const, Expr(:local, Expr(:(=), :x, 1))) + @test parsestmt("const global x = 1") == Expr(:const, Expr(:global, Expr(:(=), :x, 1))) + @test parsestmt("const local x = 1") == Expr(:const, Expr(:local, Expr(:(=), :x, 1))) + @test parsestmt("const x,y = 1,2") == Expr(:const, Expr(:(=), Expr(:tuple, :x, :y), Expr(:tuple, 1, 2))) + @test parsestmt("const x = 1") == Expr(:const, Expr(:(=), :x, 1)) + @test parsestmt("global x ~ 1") == Expr(:global, Expr(:call, :~, :x, 1)) + @test parsestmt("global x += 1") == Expr(:global, Expr(:+=, :x, 1)) end @testset "tuples" begin - @test parsestmt(Expr, "(;)") == Expr(:tuple, Expr(:parameters)) - @test parsestmt(Expr, "(; a=1)") == Expr(:tuple, Expr(:parameters, Expr(:kw, :a, 1))) - @test parsestmt(Expr, "(; a=1; b=2)") == + @test parsestmt("(;)") == Expr(:tuple, Expr(:parameters)) + @test parsestmt("(; a=1)") == Expr(:tuple, Expr(:parameters, Expr(:kw, :a, 1))) + @test parsestmt("(; a=1; b=2)") == Expr(:tuple, Expr(:parameters, Expr(:parameters, Expr(:kw, :b, 2)), Expr(:kw, :a, 1))) - @test parsestmt(Expr, "(a; b; c,d)") == + @test parsestmt("(a; b; c,d)") == Expr(:tuple, Expr(:parameters, Expr(:parameters, :c, :d), :b), :a) end @testset "module" begin - @test parsestmt(Expr, "module A end") == + @test parsestmt("module A end") == Expr(:module, true, :A, Expr(:block, LineNumberNode(1), LineNumberNode(1))) - @test parsestmt(Expr, "baremodule A end") == + @test parsestmt("baremodule A end") == Expr(:module, false, :A, Expr(:block, LineNumberNode(1), LineNumberNode(1))) end - @testset "errors" begin - @test parsestmt(Expr, "--", ignore_errors=true) == + @test parsestmt("--", ignore_errors=true) == Expr(:error, "invalid operator: `--`") - @test parseall(Expr, "a b", ignore_errors=true) == + @test parseall("a b", ignore_errors=true) == Expr(:toplevel, LineNumberNode(1), :a, LineNumberNode(1), Expr(:error, :b)) - @test parsestmt(Expr, "(x", ignore_errors=true) == + @test parsestmt("(x", ignore_errors=true) == Expr(:block, :x, Expr(:error)) end @testset "import" begin - @test parsestmt(Expr, "import A") == Expr(:import, Expr(:., :A)) - @test parsestmt(Expr, "import A.(:b).:c: x.:z", ignore_warnings=true) == + @test parsestmt("import A") == Expr(:import, Expr(:., :A)) + @test parsestmt("import A.(:b).:c: x.:z", ignore_warnings=true) == Expr(:import, Expr(Symbol(":"), Expr(:., :A, :b, :c), Expr(:., :x, :z))) # Stupid parens and quotes in import paths - @test parsestmt(Expr, "import A.:+", ignore_warnings=true) == + @test parsestmt("import A.:+", ignore_warnings=true) == Expr(:import, Expr(:., :A, :+)) - @test parsestmt(Expr, "import A.(:+)", ignore_warnings=true) == + @test parsestmt("import A.(:+)", ignore_warnings=true) == Expr(:import, Expr(:., :A, :+)) - @test parsestmt(Expr, "import A.:(+)", ignore_warnings=true) == + @test parsestmt("import A.:(+)", ignore_warnings=true) == Expr(:import, Expr(:., :A, :+)) - @test parsestmt(Expr, "import A.:(+) as y", ignore_warnings=true, version=v"1.6") == + @test parsestmt("import A.:(+) as y", ignore_warnings=true, version=v"1.6") == Expr(:import, Expr(:as, Expr(:., :A, :+), :y)) end end diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index a26994cd22301..078c2cb31f537 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -966,11 +966,6 @@ parsestmt_test_specs = [ end end -@testset "Broken tests" begin - # Technically broken. But do we even want this behavior? - @test_broken parse_to_sexpr_str(JuliaSyntax.parse_eq, "var\"\"\"x\"\"\"") == "(var x)" -end - @testset "Trivia attachment" begin # TODO: Need to expand this greatly to cover as many forms as possible! @test show_green_tree("f(a;b)") == """ From b23c18217583abb2f6a3d7a59919b29dadff6aeb Mon Sep 17 00:00:00 2001 From: c42f Date: Mon, 8 May 2023 06:13:18 +1000 Subject: [PATCH 0646/1109] Remove `K"parens"` from `SyntaxNode` (JuliaLang/JuliaSyntax.jl#269) Another approach to fix JuliaLang/JuliaSyntax.jl#239. Elide all `K"parens"` nodes from the `SyntaxNode` tree, replacing them with their single child. With JuliaLang/JuliaSyntax.jl#268 merged, this approach works more neatly and doesn't have the downsides of JuliaLang/JuliaSyntax.jl#265. --- JuliaSyntax/src/parser_api.jl | 4 ++-- JuliaSyntax/src/syntax_tree.jl | 18 ++++++++++++++---- JuliaSyntax/test/expr.jl | 16 +++++++++++++--- JuliaSyntax/test/parser.jl | 2 +- 4 files changed, 30 insertions(+), 10 deletions(-) diff --git a/JuliaSyntax/src/parser_api.jl b/JuliaSyntax/src/parser_api.jl index bc45d22ecab9f..c48fef6025899 100644 --- a/JuliaSyntax/src/parser_api.jl +++ b/JuliaSyntax/src/parser_api.jl @@ -77,7 +77,7 @@ end function _parse(rule::Symbol, need_eof::Bool, ::Type{T}, text, index=1; version=VERSION, ignore_trivia=true, filename=nothing, first_line=1, ignore_errors=false, - ignore_warnings=ignore_errors) where {T} + ignore_warnings=ignore_errors, kws...) where {T} stream = ParseStream(text, index; version=version) if ignore_trivia && rule != :all bump_trivia(stream, skip_newlines=true) @@ -99,7 +99,7 @@ function _parse(rule::Symbol, need_eof::Bool, ::Type{T}, text, index=1; version= # * It's kind of required for GreenNode, as GreenNode only records spans, # not absolute positions. # * Dropping it would be ok for SyntaxNode and Expr... - tree = build_tree(T, stream; wrap_toplevel_as_kind=K"toplevel", filename=filename, first_line=first_line) + tree = build_tree(T, stream; wrap_toplevel_as_kind=K"toplevel", filename=filename, first_line=first_line, kws...) tree, last_byte(stream) + 1 end diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index 7f87a81a32dcc..76c4e3d6e968e 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -44,7 +44,13 @@ end Base.show(io::IO, ::ErrorVal) = printstyled(io, "✘", color=:light_red) -function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, position::Integer=1) +function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}; + keep_parens=false, position::Integer=1) + _to_SyntaxNode(source, raw, convert(Int, position), keep_parens) +end + +function _to_SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, + position::Int, keep_parens::Bool) if !haschildren(raw) && !(is_syntax_kind(raw) || is_keyword(raw)) # Here we parse the values eagerly rather than representing them as # strings. Maybe this is good. Maybe not. @@ -56,10 +62,13 @@ function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, position::In for (i,rawchild) in enumerate(children(raw)) # FIXME: Allowing trivia is_error nodes here corrupts the tree layout. if !is_trivia(rawchild) || is_error(rawchild) - push!(cs, SyntaxNode(source, rawchild, pos)) + push!(cs, _to_SyntaxNode(source, rawchild, pos, keep_parens)) end pos += rawchild.span end + if !keep_parens && kind(raw) == K"parens" && length(cs) == 1 + return cs[1] + end node = SyntaxNode(nothing, cs, SyntaxData(source, raw, position, nothing)) for c in cs c.parent = node @@ -182,10 +191,11 @@ end # shallow-copy the data Base.copy(data::SyntaxData) = SyntaxData(data.source, data.raw, data.position, data.val) -function build_tree(::Type{SyntaxNode}, stream::ParseStream; filename=nothing, first_line=1, kws...) +function build_tree(::Type{SyntaxNode}, stream::ParseStream; + filename=nothing, first_line=1, keep_parens=false, kws...) green_tree = build_tree(GreenNode, stream; kws...) source = SourceFile(sourcetext(stream), filename=filename, first_line=first_line) - SyntaxNode(source, green_tree, first_byte(stream)) + SyntaxNode(source, green_tree, position=first_byte(stream), keep_parens=keep_parens) end #------------------------------------------------------------------------------- diff --git a/JuliaSyntax/test/expr.jl b/JuliaSyntax/test/expr.jl index 4cdc23c66fd4c..31c47ce960384 100644 --- a/JuliaSyntax/test/expr.jl +++ b/JuliaSyntax/test/expr.jl @@ -5,9 +5,9 @@ (s; kws...) -> JuliaSyntax.parsestmt(Expr, s; kws...), (s; kws...) -> JuliaSyntax.parseall(Expr, s; kws...)) else - ((s; kws...) -> Expr(JuliaSyntax.parseatom(SyntaxNode, s; kws...)), - (s; kws...) -> Expr(JuliaSyntax.parsestmt(SyntaxNode, s; kws...)), - (s; kws...) -> Expr(JuliaSyntax.parseall(SyntaxNode, s; kws...))) + ((s; kws...) -> Expr(JuliaSyntax.parseatom(SyntaxNode, s; keep_parens=true, kws...)), + (s; kws...) -> Expr(JuliaSyntax.parsestmt(SyntaxNode, s; keep_parens=true, kws...)), + (s; kws...) -> Expr(JuliaSyntax.parseall(SyntaxNode, s; keep_parens=true, kws...))) end @testset "Quote nodes" begin @@ -534,6 +534,16 @@ @test parsestmt("return") == Expr(:return, nothing) end + @testset "Large integer macros" begin + @test parsestmt("0x00000000000000001") == + Expr(:macrocall, GlobalRef(Core, Symbol("@uint128_str")), + nothing, "0x00000000000000001") + + @test parsestmt("(0x00000000000000001)") == + Expr(:macrocall, GlobalRef(Core, Symbol("@uint128_str")), + nothing, "0x00000000000000001") + end + @testset "struct" begin @test parsestmt("struct A end") == Expr(:struct, false, :A, Expr(:block, LineNumberNode(1))) diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 078c2cb31f537..e549b6684df36 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -7,7 +7,7 @@ function parse_to_sexpr_str(production, code::AbstractString; v=v"1.6", expr=fal JuliaSyntax.validate_tokens(stream) t = build_tree(GreenNode, stream, wrap_toplevel_as_kind=K"None") source = SourceFile(code) - s = SyntaxNode(source, t) + s = SyntaxNode(source, t, keep_parens=true) if expr JuliaSyntax.remove_linenums!(Expr(s)) else From c6b0e99b4edb5f7409e9ec43e7870a46837d01f9 Mon Sep 17 00:00:00 2001 From: c42f Date: Tue, 9 May 2023 07:26:52 +1000 Subject: [PATCH 0647/1109] Bump version to 0.4 --- JuliaSyntax/Project.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/JuliaSyntax/Project.toml b/JuliaSyntax/Project.toml index 7438212f690fe..6493f1bea67f1 100644 --- a/JuliaSyntax/Project.toml +++ b/JuliaSyntax/Project.toml @@ -1,7 +1,7 @@ name = "JuliaSyntax" uuid = "70703baa-626e-46a2-a12c-08ffd08c73b4" -authors = ["Chris Foster and contributors"] -version = "0.3.3" +authors = ["c42f and contributors"] +version = "0.4" [compat] julia = "1.0" From 950e458d93dc7d65c377a219fd251620c6cc3b23 Mon Sep 17 00:00:00 2001 From: c42f Date: Wed, 10 May 2023 07:32:07 +1000 Subject: [PATCH 0648/1109] Fix token error ranges + highlighting for multibyte chars (JuliaLang/JuliaSyntax.jl#271) --- JuliaSyntax/src/parse_stream.jl | 10 +++++----- JuliaSyntax/src/source_files.jl | 12 ++++++++---- JuliaSyntax/test/diagnostics.jl | 4 ++++ JuliaSyntax/test/source_files.jl | 6 +++++- 4 files changed, 22 insertions(+), 10 deletions(-) diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index 7f1f2cf804d83..74789c3005b08 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -888,7 +888,7 @@ function validate_tokens(stream::ParseStream) k = kind(t) fbyte = toks[i-1].next_byte nbyte = t.next_byte - lbyte = prevind(text, t.next_byte) + tokrange = fbyte:nbyte-1 error_kind = K"None" if k in KSet"Integer BinInt OctInt HexInt" # The following shouldn't be able to error... @@ -909,11 +909,11 @@ function validate_tokens(stream::ParseStream) if code === :ok # pass elseif code === :overflow - emit_diagnostic(stream, fbyte:lbyte, + emit_diagnostic(stream, tokrange, error="overflow in floating point literal") error_kind = K"ErrorNumericOverflow" elseif underflow0 - emit_diagnostic(stream, fbyte:lbyte, + emit_diagnostic(stream, tokrange, warning="underflow to zero in floating point literal") end elseif k == K"Char" @@ -928,7 +928,7 @@ function validate_tokens(stream::ParseStream) read(charbuf, Char) if !eof(charbuf) error_kind = K"ErrorOverLongCharacter" - emit_diagnostic(stream, fbyte:lbyte, + emit_diagnostic(stream, tokrange, error="character literal contains multiple characters") end end @@ -940,7 +940,7 @@ function validate_tokens(stream::ParseStream) end elseif is_error(k) && k != K"error" # Emit messages for non-generic token errors - emit_diagnostic(stream, fbyte:lbyte, + emit_diagnostic(stream, tokrange, error=_token_error_descriptions[k]) end if error_kind != K"None" diff --git a/JuliaSyntax/src/source_files.jl b/JuliaSyntax/src/source_files.jl index e0df0c19bcc44..c8e2a3983884b 100644 --- a/JuliaSyntax/src/source_files.jl +++ b/JuliaSyntax/src/source_files.jl @@ -108,6 +108,10 @@ function Base.getindex(source::SourceFile, i::Int) source.code[i] end +function Base.thisind(source::SourceFile, i::Int) + thisind(source.code, i) +end + Base.firstindex(source::SourceFile) = firstindex(source.code) Base.lastindex(source::SourceFile) = lastindex(source.code) @@ -203,7 +207,7 @@ function highlight(io::IO, source::SourceFile, range::UnitRange; print(io, source[x:p-1]) _printstyled(io, hitext; bgcolor=color) print(io, source[q+1:d]) - source[d] == '\n' || print(io, "\n") + source[thisind(source, d)] == '\n' || print(io, "\n") _print_marker_line(io, source[a:p-1], hitext, true, true, marker_line_color, note, notecolor) else # x -------------- @@ -232,13 +236,13 @@ function highlight(io::IO, source::SourceFile, range::UnitRange; _printstyled(io, source[z:q]; bgcolor=color) end print(io, source[q+1:d]) - source[d] == '\n' || print(io, "\n") + source[thisind(source, d)] == '\n' || print(io, "\n") qline = source[c:q] _print_marker_line(io, "", qline, true, false, marker_line_color, note, notecolor) end - if context_lines_after > 0 && d+1 < lastindex(source) + if context_lines_after > 0 && d+1 <= lastindex(source) print(io, '\n') - w1 = source[w] == '\n' ? w - 1 : w + w1 = source[thisind(source, w)] == '\n' ? w - 1 : w print(io, source[d+1:w1]) end end diff --git a/JuliaSyntax/test/diagnostics.jl b/JuliaSyntax/test/diagnostics.jl index 1d1efb1fdce18..01ee6999c1e14 100644 --- a/JuliaSyntax/test/diagnostics.jl +++ b/JuliaSyntax/test/diagnostics.jl @@ -11,6 +11,10 @@ function diagnostic(str; only_first=false, allow_multiple=false) end end +@testset "token errors" begin + @test diagnostic(":⥻") == Diagnostic(2, 4, :error, "unknown unicode character") +end + @testset "parser errors" begin @test diagnostic("+ #==# (a,b)") == Diagnostic(2, 7, :error, "whitespace not allowed between prefix function call and argument list") diff --git a/JuliaSyntax/test/source_files.jl b/JuliaSyntax/test/source_files.jl index 1dec5ffd403e7..5c113a7353196 100644 --- a/JuliaSyntax/test/source_files.jl +++ b/JuliaSyntax/test/source_files.jl @@ -52,7 +52,7 @@ end @testset "highlight()" begin - src = JuliaSyntax.SourceFile(""" + src = SourceFile(""" abcd αβγδ +-*/""") @@ -81,6 +81,10 @@ end αβγδ #└─┘ +-*/""" + # multi-byte char at eof + @test sprint(highlight, SourceFile("a α"), 3:4) == "a α\n# ╙" + @test sprint(highlight, SourceFile("a\nα"), 1:4) == "┌\na\nα\n┘" + @test sprint(highlight, SourceFile("a\nb\nα"), 3:3) == "a\nb\n╙\nα" # Multi-line ranges @test sprint(highlight, src, 1:7) == """ From 3be7feb82305bc608470616f61d014dff94fec09 Mon Sep 17 00:00:00 2001 From: c42f Date: Wed, 10 May 2023 09:14:27 +1000 Subject: [PATCH 0649/1109] Clean up BEGIN/END markers in kinds --- JuliaSyntax/src/kinds.jl | 76 +++++++++++++++++++------------ JuliaSyntax/src/tokenize_utils.jl | 2 +- 2 files changed, 48 insertions(+), 30 deletions(-) diff --git a/JuliaSyntax/src/kinds.jl b/JuliaSyntax/src/kinds.jl index f32ccf56f2440..c1cece098bf8c 100644 --- a/JuliaSyntax/src/kinds.jl +++ b/JuliaSyntax/src/kinds.jl @@ -934,7 +934,25 @@ primitive type Kind 16 end # the K_str macro to self-name these kinds with their literal representation, # rather than needing to invent a new name for each. -let kind_int_type = :UInt16, +let kind_int_type = :UInt16 + # Preprocess _kind_names to conflate category markers with the first/last + # in the category. + kindstr_to_int = Dict{String,UInt16}() + i = 1 + while i <= length(_kind_names) + kn = _kind_names[i] + kind_int = i-1 + if startswith(kn, "BEGIN_") + deleteat!(_kind_names, i) + elseif startswith(kn, "END_") + kind_int = i-2 + deleteat!(_kind_names, i) + else + i += 1 + end + push!(kindstr_to_int, kn=>kind_int) + end + max_kind_int = length(_kind_names)-1 @eval begin @@ -945,9 +963,9 @@ let kind_int_type = :UInt16, return Base.bitcast(Kind, convert($kind_int_type, x)) end - Base.convert(::Type{String}, k::Kind) = _kind_names[1 + Base.bitcast($kind_int_type, k)] + Base.convert(::Type{String}, k::Kind) = _kind_names[1 + reinterpret($kind_int_type, k)] - let kindstr_to_int = Dict(s=>i-1 for (i,s) in enumerate(_kind_names)) + let kindstr_to_int=$kindstr_to_int function Base.convert(::Type{Kind}, s::AbstractString) i = get(kindstr_to_int, s) do error("unknown Kind name $(repr(s))") @@ -1078,12 +1096,12 @@ const _token_error_descriptions = Dict{Kind, String}( #------------------------------------------------------------------------------- # Predicates -is_contextual_keyword(k::Kind) = K"BEGIN_CONTEXTUAL_KEYWORDS" < k < K"END_CONTEXTUAL_KEYWORDS" -is_error(k::Kind) = K"BEGIN_ERRORS" < k < K"END_ERRORS" || k == K"ErrorInvalidOperator" || k == K"Error**" -is_keyword(k::Kind) = K"BEGIN_KEYWORDS" < k < K"END_KEYWORDS" -is_block_continuation_keyword(k::Kind) = K"BEGIN_BLOCK_CONTINUATION_KEYWORDS" < k < K"END_BLOCK_CONTINUATION_KEYWORDS" -is_literal(k::Kind) = K"BEGIN_LITERAL" < k < K"END_LITERAL" -is_operator(k::Kind) = K"BEGIN_OPS" < k < K"END_OPS" +is_contextual_keyword(k::Kind) = K"BEGIN_CONTEXTUAL_KEYWORDS" <= k <= K"END_CONTEXTUAL_KEYWORDS" +is_error(k::Kind) = K"BEGIN_ERRORS" <= k <= K"END_ERRORS" || k == K"ErrorInvalidOperator" || k == K"Error**" +is_keyword(k::Kind) = K"BEGIN_KEYWORDS" <= k <= K"END_KEYWORDS" +is_block_continuation_keyword(k::Kind) = K"BEGIN_BLOCK_CONTINUATION_KEYWORDS" <= k <= K"END_BLOCK_CONTINUATION_KEYWORDS" +is_literal(k::Kind) = K"BEGIN_LITERAL" <= k <= K"END_LITERAL" +is_operator(k::Kind) = K"BEGIN_OPS" <= k <= K"END_OPS" is_word_operator(k::Kind) = (k == K"in" || k == K"isa" || k == K"where") is_contextual_keyword(k) = is_contextual_keyword(kind(k)) @@ -1097,28 +1115,28 @@ is_word_operator(k) = is_word_operator(kind(k)) # Predicates for operator precedence # FIXME: Review how precedence depends on dottedness, eg # https://github.com/JuliaLang/julia/pull/36725 -is_prec_assignment(x) = K"BEGIN_ASSIGNMENTS" < kind(x) < K"END_ASSIGNMENTS" -is_prec_pair(x) = K"BEGIN_PAIRARROW" < kind(x) < K"END_PAIRARROW" -is_prec_conditional(x) = K"BEGIN_CONDITIONAL" < kind(x) < K"END_CONDITIONAL" -is_prec_arrow(x) = K"BEGIN_ARROW" < kind(x) < K"END_ARROW" -is_prec_lazy_or(x) = K"BEGIN_LAZYOR" < kind(x) < K"END_LAZYOR" -is_prec_lazy_and(x) = K"BEGIN_LAZYAND" < kind(x) < K"END_LAZYAND" -is_prec_comparison(x) = K"BEGIN_COMPARISON" < kind(x) < K"END_COMPARISON" -is_prec_pipe(x) = K"BEGIN_PIPE" < kind(x) < K"END_PIPE" -is_prec_colon(x) = K"BEGIN_COLON" < kind(x) < K"END_COLON" -is_prec_plus(x) = K"BEGIN_PLUS" < kind(x) < K"END_PLUS" -is_prec_bitshift(x) = K"BEGIN_BITSHIFTS" < kind(x) < K"END_BITSHIFTS" -is_prec_times(x) = K"BEGIN_TIMES" < kind(x) < K"END_TIMES" -is_prec_rational(x) = K"BEGIN_RATIONAL" < kind(x) < K"END_RATIONAL" -is_prec_power(x) = K"BEGIN_POWER" < kind(x) < K"END_POWER" -is_prec_decl(x) = K"BEGIN_DECL" < kind(x) < K"END_DECL" -is_prec_where(x) = K"BEGIN_WHERE" < kind(x) < K"END_WHERE" -is_prec_dot(x) = K"BEGIN_DOT" < kind(x) < K"END_DOT" -is_prec_unicode_ops(x) = K"BEGIN_UNICODE_OPS" < kind(x) < K"END_UNICODE_OPS" +is_prec_assignment(x) = K"BEGIN_ASSIGNMENTS" <= kind(x) <= K"END_ASSIGNMENTS" +is_prec_pair(x) = K"BEGIN_PAIRARROW" <= kind(x) <= K"END_PAIRARROW" +is_prec_conditional(x) = K"BEGIN_CONDITIONAL" <= kind(x) <= K"END_CONDITIONAL" +is_prec_arrow(x) = K"BEGIN_ARROW" <= kind(x) <= K"END_ARROW" +is_prec_lazy_or(x) = K"BEGIN_LAZYOR" <= kind(x) <= K"END_LAZYOR" +is_prec_lazy_and(x) = K"BEGIN_LAZYAND" <= kind(x) <= K"END_LAZYAND" +is_prec_comparison(x) = K"BEGIN_COMPARISON" <= kind(x) <= K"END_COMPARISON" +is_prec_pipe(x) = K"BEGIN_PIPE" <= kind(x) <= K"END_PIPE" +is_prec_colon(x) = K"BEGIN_COLON" <= kind(x) <= K"END_COLON" +is_prec_plus(x) = K"BEGIN_PLUS" <= kind(x) <= K"END_PLUS" +is_prec_bitshift(x) = K"BEGIN_BITSHIFTS" <= kind(x) <= K"END_BITSHIFTS" +is_prec_times(x) = K"BEGIN_TIMES" <= kind(x) <= K"END_TIMES" +is_prec_rational(x) = K"BEGIN_RATIONAL" <= kind(x) <= K"END_RATIONAL" +is_prec_power(x) = K"BEGIN_POWER" <= kind(x) <= K"END_POWER" +is_prec_decl(x) = K"BEGIN_DECL" <= kind(x) <= K"END_DECL" +is_prec_where(x) = K"BEGIN_WHERE" <= kind(x) <= K"END_WHERE" +is_prec_dot(x) = K"BEGIN_DOT" <= kind(x) <= K"END_DOT" +is_prec_unicode_ops(x) = K"BEGIN_UNICODE_OPS" <= kind(x) <= K"END_UNICODE_OPS" is_prec_pipe_lt(x) = kind(x) == K"<|" is_prec_pipe_gt(x) = kind(x) == K"|>" -is_syntax_kind(x) = K"BEGIN_SYNTAX_KINDS" < kind(x) < K"END_SYNTAX_KINDS" -is_macro_name(x) = K"BEGIN_MACRO_NAMES" < kind(x) < K"END_MACRO_NAMES" +is_syntax_kind(x) = K"BEGIN_SYNTAX_KINDS"<= kind(x) <= K"END_SYNTAX_KINDS" +is_macro_name(x) = K"BEGIN_MACRO_NAMES" <= kind(x) <= K"END_MACRO_NAMES" function is_number(x) kind(x) in (K"Integer", K"BinInt", K"HexInt", K"OctInt", K"Float", K"Float32") diff --git a/JuliaSyntax/src/tokenize_utils.jl b/JuliaSyntax/src/tokenize_utils.jl index 1767cc522b5db..94aa773eda310 100644 --- a/JuliaSyntax/src/tokenize_utils.jl +++ b/JuliaSyntax/src/tokenize_utils.jl @@ -224,7 +224,7 @@ end end function optakessuffix(k) - (K"BEGIN_OPS" < k < K"END_OPS") && + (K"BEGIN_OPS" <= k <= K"END_OPS") && !( k == K"..." || K"BEGIN_ASSIGNMENTS" <= k <= K"END_ASSIGNMENTS" || From 8221db3348bed6018b81abae9404254c9429fe0a Mon Sep 17 00:00:00 2001 From: c42f Date: Fri, 12 May 2023 05:45:07 +1000 Subject: [PATCH 0650/1109] Tests for numeric constants and dots --- JuliaSyntax/test/tokenize.jl | 60 ++++++++++++++++++++++++++---------- 1 file changed, 44 insertions(+), 16 deletions(-) diff --git a/JuliaSyntax/test/tokenize.jl b/JuliaSyntax/test/tokenize.jl index 4c0b778ef0ed6..4330e012fd912 100644 --- a/JuliaSyntax/test/tokenize.jl +++ b/JuliaSyntax/test/tokenize.jl @@ -419,7 +419,7 @@ end @testset "interpolation" begin @testset "basic" begin str = "\"\$x \$y\"" - ts = collect(tokenize(str)) + ts = collect(tokenize(str)) @test ts[1] ~ (K"\"" , "\"", str) @test ts[2] ~ (K"$" , "\$", str) @test ts[3] ~ (K"Identifier" , "x" , str) @@ -461,7 +461,7 @@ end @testset "duplicate \$" begin str = "\"\$\$\"" - ts = collect(tokenize(str)) + ts = collect(tokenize(str)) @test ts[1] ~ (K"\"" , "\"", str) @test ts[2] ~ (K"$" , "\$", str) @test ts[3] ~ (K"$" , "\$", str) @@ -472,7 +472,7 @@ end @testset "Unmatched parens" begin # issue 73: https://github.com/JuliaLang/Tokenize.jl/issues/73 str = "\"\$(fdsf\"" - ts = collect(tokenize(str)) + ts = collect(tokenize(str)) @test ts[1] ~ (K"\"" , "\"" , str) @test ts[2] ~ (K"$" , "\$" , str) @test ts[3] ~ (K"(" , "(" , str) @@ -484,7 +484,7 @@ end @testset "Unicode" begin # issue 178: https://github.com/JuliaLang/Tokenize.jl/issues/178 str = """ "\$uₕx \$(uₕx - ux)" """ - ts = collect(tokenize(str)) + ts = collect(tokenize(str)) @test ts[ 1] ~ (K"Whitespace" , " " , str) @test ts[ 2] ~ (K"\"" , "\"" , str) @test ts[ 3] ~ (K"$" , "\$" , str) @@ -505,7 +505,7 @@ end @testset "var\"...\" disabled in interpolations" begin str = """ "\$var"x" " """ - ts = collect(tokenize(str)) + ts = collect(tokenize(str)) @test ts[ 1] ~ (K"Whitespace" , " " , str) @test ts[ 2] ~ (K"\"" , "\"" , str) @test ts[ 3] ~ (K"$" , "\$" , str) @@ -519,14 +519,30 @@ end @test ts[11] ~ (K"EndMarker" , "" , str) end - @testset "invalid chars after identifier" begin - str = """ "\$x෴" """ - ts = collect(tokenize(str)) - @test ts[4] ~ (K"Identifier" , "x" , str) - @test ts[5] ~ (K"ErrorInvalidInterpolationTerminator" , "" , str) - @test ts[6] ~ (K"String" , "෴" , str) - @test is_error(ts[5].kind) - @test ts[5].kind == K"ErrorInvalidInterpolationTerminator" + @testset "chars after interpolation identifier" begin + # Operators allowed + @test toks("\"\$x?\"") == [ + "\""=>K"\"" + "\$"=>K"$" + "x"=>K"Identifier" + "?"=>K"String" + "\""=>K"\"" + ] + @test toks("\"\$x⫪\"") == [ + "\""=>K"\"" + "\$"=>K"$" + "x"=>K"Identifier" + "⫪"=>K"String" + "\""=>K"\"" + ] + # Some chars disallowed (eg, U+0DF4) + @test toks("\"\$x෴\"") == [ + "\""=>K"\"" + "\$"=>K"$" + "x"=>K"Identifier" + "෴"=>K"ErrorInvalidInterpolationTerminator" + "\""=>K"\"" + ] end end @@ -635,8 +651,6 @@ end @test toks("3e2_2") == ["3e2"=>K"Float", "_2"=>K"Identifier"] @test toks("1e") == ["1"=>K"Integer", "e"=>K"Identifier"] - @test toks("1.:0") == ["1."=>K"Float", ":"=>K":", "0"=>K"Integer"] - # Floating point with \minus rather than - @test onlytok("1.0e−0") == K"Float" @test onlytok("1.0f−0") == K"Float32" @@ -681,11 +695,23 @@ end "f"=>K"Identifier", "("=>K"(", "a"=>K"Identifier", ")"=>K")"] @test toks("1f0./1") == ["1f0"=>K"Float32", "./"=>K"/", "1"=>K"Integer"] + # Dotted operators after numeric constants are ok + @test toks("1e1.⫪") == ["1e1"=>K"Float", ".⫪"=>K"⫪"] + @test toks("1.1.⫪") == ["1.1"=>K"Float", ".⫪"=>K"⫪"] + @test toks("1e1.−") == ["1e1"=>K"Float", ".−"=>K"-"] + @test toks("1.1.−") == ["1.1"=>K"Float", ".−"=>K"-"] + # Non-dottable operators are not ok + @test toks("1e1.\$") == ["1e1."=>K"ErrorInvalidNumericConstant", "\$"=>K"$"] + @test toks("1.1.\$") == ["1.1."=>K"ErrorInvalidNumericConstant", "\$"=>K"$"] + # Ambiguous dotted operators @test toks("1.+") == ["1."=>K"ErrorAmbiguousNumericConstant", "+"=>K"+"] @test toks("1.+ ") == ["1."=>K"ErrorAmbiguousNumericConstant", "+"=>K"+", " "=>K"Whitespace"] @test toks("1.⤋") == ["1."=>K"ErrorAmbiguousNumericConstant", "⤋"=>K"⤋"] - @test toks("1.?") == ["1."=>K"ErrorAmbiguousNumericConstant", "?"=>K"?"] + @test toks("1.⫪") == ["1."=>K"ErrorAmbiguousNumericConstant", "⫪"=>K"⫪"] + # non-dottable ops are the exception + @test toks("1.:") == ["1."=>K"Float", ":"=>K":"] + @test toks("1.\$") == ["1."=>K"Float", "\$"=>K"$"] # Ambiguous - literal vs multiply by juxtaposition @test toks("1.x") == ["1."=>K"ErrorAmbiguousNumericDotMultiply", "x"=>K"Identifier"] @@ -846,6 +872,8 @@ end raw"^ ↑ ↓ ⇵ ⟰ ⟱ ⤈ ⤉ ⤊ ⤋ ⤒ ⤓ ⥉ ⥌ ⥍ ⥏ ⥑ ⥔ ⥕ ⥘ ⥙ ⥜ ⥝ ⥠ ⥡ ⥣ ⥥ ⥮ ⥯ ↑ ↓" raw"::" raw"." + "⫪ ⫫" + "\u00b7 \u0387" ] if VERSION >= v"1.6.0" push!(ops, raw"<-- <-->") From 7e0d85df583c3ca47cffe38af5ceae98a2d78ae6 Mon Sep 17 00:00:00 2001 From: c42f Date: Fri, 12 May 2023 05:46:34 +1000 Subject: [PATCH 0651/1109] Cleanup and fix operator predicates Remove big lists of operator kinds and inscrutable hardcoded unicode ranges in predicates. Instead, generate all this code directly from the table of operator kinds. This makes new operators much easier to add in a single place in the code, thereby fixing a few bugs/inconsistencies which have crept in over time as new operators were added. Also fix several bugs in the tokenizer where `is_operator_start_char()` was used, but the check should have been restricted to dottable operators. --- JuliaSyntax/src/literal_parsing.jl | 10 +- JuliaSyntax/src/tokenize.jl | 23 +- JuliaSyntax/src/tokenize_utils.jl | 923 +++-------------------------- JuliaSyntax/test/tokenize.jl | 7 +- 4 files changed, 92 insertions(+), 871 deletions(-) diff --git a/JuliaSyntax/src/literal_parsing.jl b/JuliaSyntax/src/literal_parsing.jl index 2527f86d4a087..c762ff056584e 100644 --- a/JuliaSyntax/src/literal_parsing.jl +++ b/JuliaSyntax/src/literal_parsing.jl @@ -329,11 +329,11 @@ end # static wrapper around user callback function function utf8proc_custom_func(codepoint::UInt32, ::Ptr{Cvoid})::UInt32 - (codepoint == 0x025B ? 0x03B5 : - codepoint == 0x00B5 ? 0x03BC : - codepoint == 0x00B7 ? 0x22C5 : - codepoint == 0x0387 ? 0x22C5 : - codepoint == 0x2212 ? 0x002D : + (codepoint == 0x025B ? 0x03B5 : # 'ɛ' => 'ε' + codepoint == 0x00B5 ? 0x03BC : # 'µ' => 'μ' + codepoint == 0x00B7 ? 0x22C5 : # '·' => '⋅' + codepoint == 0x0387 ? 0x22C5 : # '·' => '⋅' + codepoint == 0x2212 ? 0x002D : # '−' => '-' codepoint) end diff --git a/JuliaSyntax/src/tokenize.jl b/JuliaSyntax/src/tokenize.jl index 26eb31fb0b382..30e73cc8a43e4 100644 --- a/JuliaSyntax/src/tokenize.jl +++ b/JuliaSyntax/src/tokenize.jl @@ -2,7 +2,7 @@ module Tokenize export tokenize, untokenize, Tokens -using ..JuliaSyntax: Kind, @K_str +using ..JuliaSyntax: JuliaSyntax, Kind, @K_str import ..JuliaSyntax: kind, is_literal, is_error, is_contextual_keyword, is_word_operator @@ -370,7 +370,7 @@ function _next_token(l::Lexer, c) return lex_identifier(l, c) elseif isdigit(c) return lex_digit(l, K"Integer") - elseif (k = get(UNICODE_OPS, c, K"error")) != K"error" + elseif (k = get(_unicode_ops, c, K"error")) != K"error" return emit(l, k) else emit_error(l, K"ErrorUnknownCharacter") @@ -416,6 +416,7 @@ function lex_string_chunk(l) !(pc == EOF_CHAR || is_operator_start_char(pc) || is_never_id_char(pc)) # Only allow certain characters after interpolated vars # https://github.com/JuliaLang/julia/pull/25234 + readchar(l) return emit_error(l, K"ErrorInvalidInterpolationTerminator") end if pc == EOF_CHAR @@ -771,7 +772,7 @@ function lex_digit(l::Lexer, kind) # If we enter the function with kind == K"Float" then a '.' has been parsed. readchar(l) return emit_error(l, K"ErrorInvalidNumericConstant") - elseif is_operator_start_char(ppc) && ppc !== ':' + elseif is_dottable_operator_start_char(ppc) readchar(l) return emit_error(l, K"ErrorAmbiguousNumericConstant") # `1.+` end @@ -787,14 +788,14 @@ function lex_digit(l::Lexer, kind) accept(l, "+-−") if accept_batch(l, isdigit) pc,ppc = dpeekchar(l) - if pc === '.' && !dotop2(ppc) + if pc === '.' && !is_dottable_operator_start_char(ppc) readchar(l) return emit_error(l, K"ErrorInvalidNumericConstant") # `1.e1.` end else return emit_error(l, K"ErrorInvalidNumericConstant") # `1.e` end - elseif pc == '.' && ppc != '.' && !is_operator_start_char(ppc) + elseif pc == '.' && ppc != '.' && !is_dottable_operator_start_char(ppc) readchar(l) return emit_error(l, K"ErrorInvalidNumericConstant") # `1.1.` elseif !had_fraction_digs && (is_identifier_start_char(pc) || @@ -808,7 +809,7 @@ function lex_digit(l::Lexer, kind) accept(l, "+-−") if accept_batch(l, isdigit) pc,ppc = dpeekchar(l) - if pc === '.' && !dotop2(ppc) + if pc === '.' && !is_dottable_operator_start_char(ppc) accept(l, '.') return emit_error(l, K"ErrorInvalidNumericConstant") # `1e1.` end @@ -948,7 +949,7 @@ function lex_dot(l::Lexer) if accept(l, '.') return emit(l, K"...") else - if dotop2(peekchar(l)) + if is_dottable_operator_start_char(peekchar(l)) readchar(l) return emit_error(l, K"ErrorInvalidOperator") else @@ -959,10 +960,7 @@ function lex_dot(l::Lexer) return lex_digit(l, K"Float") else pc, dpc = dpeekchar(l) - if dotop1(pc) - l.dotop = true - return _next_token(l, readchar(l)) - elseif pc =='+' + if pc == '+' l.dotop = true readchar(l) return lex_plus(l) @@ -1040,6 +1038,9 @@ function lex_dot(l::Lexer) l.dotop = true readchar(l) return lex_equal(l) + elseif is_dottable_operator_start_char(pc) + l.dotop = true + return _next_token(l, readchar(l)) end return emit(l, K".") end diff --git a/JuliaSyntax/src/tokenize_utils.jl b/JuliaSyntax/src/tokenize_utils.jl index 94aa773eda310..261b442a8f0f9 100644 --- a/JuliaSyntax/src/tokenize_utils.jl +++ b/JuliaSyntax/src/tokenize_utils.jl @@ -44,183 +44,81 @@ end readchar(io::IO) = eof(io) ? EOF_CHAR : read(io, Char) -# Checks whether a Char is an operator, which can not be juxtaposed with another -# Char to be an operator (i.e <=), and can be prefixed by a dot (.) -# magic number list created by filtering ops by those that successfully parse -# `a .(op) b` or `.(op)a` and where `length(string(op)) == 1` -@inline function dotop1(c1::Char) - c1 == EOF_CHAR && return false - Base.isvalid(c1) || return false - c = UInt32(c1) - c == 0x00000021 || - c == 0x000000a6 || - c == 0x0000002e || - c == 0x0000007e || - c == 0x000000ac || - c == 0x000000b1 || - c == 0x000000b7 || - c == 0x000000d7 || - c == 0x00000387 || - c == 0x00002026 || - c == 0x0000205d || - c == 0x0000214b || - 0x00002190 <= c <= 0x00002194 || - 0x0000219a <= c <= 0x0000219e || - c == 0x000021a0 || - 0x000021a2 <= c <= 0x000021a4 || - 0x000021aa <= c <= 0x000021ac || - c == 0x000021a6 || - c == 0x000021a9 || - c == 0x000021ae || - c == 0x000021c0 || - c == 0x000021c1 || - c == 0x000021c4 || - c == 0x000021c6 || - c == 0x000021c7 || - c == 0x000021c9 || - 0x000021cb <= c <= 0x000021cf || - c == 0x000021d2 || - c == 0x000021d4 || - c == 0x000021b6 || - c == 0x000021b7 || - 0x000021ba <= c <= 0x000021bd || - c == 0x000021d0 || - 0x000021da <= c <= 0x000021dd || - c == 0x000021e0 || - c == 0x000021e2 || - 0x000021f4 <= c <= 0x000021ff || - 0x00002208 <= c <= 0x0000220d || - 0x00002213 <= c <= 0x00002214 || - 0x00002217 <= c <= 0x00002219 || - 0x0000221a <= c <= 0x0000221d || - 0x00002224 <= c <= 0x0000222a || - 0x00002237 <= c <= 0x00002238 || - 0x0000223a <= c <= 0x0000223b || - 0x0000223d <= c <= 0x0000223e || - 0x00002240 <= c <= 0x0000228b || - 0x0000228d <= c <= 0x0000229c || - 0x0000229e <= c <= 0x000022a3 || - c == 0x000022a9 || - c == 0x000022ac || - c == 0x000022ae || - 0x000022b0 <= c <= 0x000022b7 || - 0x000022bc <= c <= 0x000022bd || - 0x000022c4 <= c <= 0x000022c7 || - 0x000022c9 <= c <= 0x000022d3 || - 0x000022d5 <= c <= 0x000022ff || - c == 0x0000233f || - c == 0x000025b7 || - c == 0x000027c2 || - 0x000027c8 <= c <= 0x000027c9 || - 0x000027d1 <= c <= 0x000027d2 || - 0x000027d5 <= c <= 0x000027d7 || - 0x000027f0 <= c <= 0x000027f1 || - 0x000027f5 <= c <= 0x000027f7 || - 0x000027f9 <= c <= 0x000027ff || - 0x00002900 <= c <= 0x00002918 || - 0x0000291d <= c <= 0x00002920 || - 0x00002944 <= c <= 0x00002970 || - 0x000029b7 <= c <= 0x000029b8 || - c == 0x000029bc || - 0x000029be <= c <= 0x000029c1 || - c == 0x000029e1 || - 0x000029e3 <= c <= 0x000029e5 || - c == 0x000029f4 || - 0x000029f6 <= c <= 0x000029f7 || - 0x000029fa <= c <= 0x000029fb || - 0x00002a07 <= c <= 0x00002a08 || - c == 0x00002a1d || - c == 0x00002a1f || - 0x00002a22 <= c <= 0x00002a2e || - 0x00002a30 <= c <= 0x00002a3d || - 0x00002a40 <= c <= 0x00002a45 || - 0x00002a4a <= c <= 0x00002a58 || - 0x00002a5a <= c <= 0x00002a63 || - 0x00002a66 <= c <= 0x00002a67 || - 0x00002a6a <= c <= 0x00002ad9 || - c == 0x00002adb || - c == 0x00002aea || - c == 0x00002aeb || - 0x00002af7 <= c <= 0x00002afa || - 0x00002b30 <= c <= 0x00002b44 || - 0x00002b47 <= c <= 0x00002b4c || - 0x0000ffe9 <= c <= 0x0000ffec +# Some unicode operators are normalized by the tokenizer into their equivalent +# kinds. See also normalize_identifier() +const _ops_with_unicode_aliases = [ + # \minus '−' is normalized into K"-", + '−' => K"-" + # Lookalikes which are normalized into K"⋅", + # https://github.com/JuliaLang/julia/pull/25157, + '\u00b7' => K"⋅" # '·' Middle Dot,, + '\u0387' => K"⋅" # '·' Greek Ano Teleia,, +] + +function _nondot_symbolic_operator_kinds() + op_range = reinterpret(UInt16, K"BEGIN_OPS"):reinterpret(UInt16, K"END_OPS") + setdiff(reinterpret.(Kind, op_range), [ + K"ErrorInvalidOperator" + K"Error**" + K"..." + K"." + K"where" + K"isa" + K"in" + K".'" + ]) end -function dotop2(pc) - dotop1(pc) || - pc =='+' || - pc =='-' || - pc =='−' || - pc =='*' || - pc =='/' || - pc =='\\' || - pc =='^' || - pc =='<' || - pc =='>' || - pc =='&' || - pc =='%' || - pc == '=' || - pc == '|' || - pc == '⊻' || - pc == '÷' +function _char_in_set_expr(varname, firstchars) + codes = sort!(UInt32.(unique(firstchars))) + terms = [] + i = 1 + while i <= length(codes) + j = i + while j < length(codes) && codes[j+1] == codes[j]+1 + j += 1 + end + if i == j + push!(terms, :($varname == $(codes[i]))) + else + push!(terms, :($(codes[i]) <= $varname <= $(codes[j]))) + end + i = j+1 + end + foldr((t1,t2)->:($t1 || $t2), terms) end -# suffix operators -# https://github.com/JuliaLang/julia/blob/d7d2b0c692eb6ad409d7193ba8d9d42972cbf182/src/flisp/julia_extensions.c#L156-L174 -# -# ₀₁₂₃₄₅₆₇₈₉₊₋₌₍₎²³¹ʰʲʳʷʸˡˢˣᴬᴮᴰᴱᴳᴴᴵᴶᴷᴸᴹᴺᴼᴾᴿᵀᵁᵂᵃᵇᵈᵉᵍᵏᵐᵒᵖᵗᵘᵛᵝᵞᵟᵠᵡᵢᵣᵤᵥᵦᵧᵨᵩᵪᶜᶠᶥᶦᶫᶰᶸᶻᶿ ⁰ⁱ⁴⁵⁶⁷⁸⁹⁺⁻⁼⁽⁾ⁿₐₑₒₓₕₖₗₘₙₚₛₜⱼⱽ′″‴‵‶‷⁗ +@eval function is_operator_start_char(c) + if c == EOF_CHAR || !Base.isvalid(c) + return false + end + u = UInt32(c) + return $(_char_in_set_expr(:u, + append!(first.(string.(_nondot_symbolic_operator_kinds())), + first.(_ops_with_unicode_aliases)))) +end -@inline function isopsuffix(c1::Char) - c1 == EOF_CHAR && return false - Base.isvalid(c1) || return false - c = UInt32(c1) - if (c < 0xa1 || c > 0x10ffff) +# Checks whether a Char is an operator which can be prefixed with a dot `.` +function is_dottable_operator_start_char(c) + return c != '?' && c != '$' && c != ':' && c != '\'' && is_operator_start_char(c) +end + +@eval function isopsuffix(c::Char) + c == EOF_CHAR && return false + Base.isvalid(c) || return false + u = UInt32(c) + if (u < 0xa1 || u > 0x10ffff) return false end - cat = Base.Unicode.category_code(c) + cat = Base.Unicode.category_code(u) if (cat == Base.Unicode.UTF8PROC_CATEGORY_MN || cat == Base.Unicode.UTF8PROC_CATEGORY_MC || cat == Base.Unicode.UTF8PROC_CATEGORY_ME) return true end - return 0x000000b2 <= c <= 0x000000b3 || - c == 0x000000b9 || - c == 0x000002b0 || - 0x000002b2 <= c <= 0x000002b3 || - 0x000002b7 <= c <= 0x000002b8 || - 0x000002e1 <= c <= 0x000002e3 || - c == 0x00000302 || - c == 0x00001d2c || - c == 0x00001d2e || - 0x00001d30 <= c <= 0x00001d31 || - 0x00001d33 <= c <= 0x00001d3a || - c == 0x00001d3c || - 0x00001d3e <= c <= 0x00001d43 || - 0x00001d47 <= c <= 0x00001d49 || - c == 0x00001d4d || - 0x00001d4f <= c <= 0x00001d50 || - c == 0x00001d52 || - 0x00001d56 <= c <= 0x00001d58 || - c == 0x00001d5b || - 0x00001d5d <= c <= 0x00001d6a || - c == 0x00001d9c || - c == 0x00001da0 || - 0x00001da5 <= c <= 0x00001da6 || - c == 0x00001dab || - c == 0x00001db0 || - c == 0x00001db8 || - c == 0x00001dbb || - c == 0x00001dbf || - c == 0x00002009 || - 0x00002032 <= c <= 0x00002037 || - c == 0x00002057 || - 0x00002070 <= c <= 0x00002071 || - 0x00002074 <= c <= 0x0000208e || - 0x00002090 <= c <= 0x00002093 || - 0x00002095 <= c <= 0x0000209c || - 0x00002c7c <= c <= 0x00002c7d || - 0x0000a71b <= c <= 0x0000a71d + # Additional allowed cases + return $(_char_in_set_expr(:u, + collect("²³¹ʰʲʳʷʸˡˢˣᴬᴮᴰᴱᴳᴴᴵᴶᴷᴸᴹᴺᴼᴾᴿᵀᵁᵂᵃᵇᵈᵉᵍᵏᵐᵒᵖᵗᵘᵛᵝᵞᵟᵠᵡᵢᵣᵤᵥᵦᵧᵨᵩᵪᶜᶠᶥᶦᶫᶰᶸᶻᶿ′″‴‵‶‷⁗⁰ⁱ⁴⁵⁶⁷⁸⁹⁺⁻⁼⁽⁾ⁿ₀₁₂₃₄₅₆₇₈₉₊₋₌₍₎ₐₑₒₓₕₖₗₘₙₚₛₜⱼⱽꜛꜜꜝ"))) end function optakessuffix(k) @@ -250,691 +148,14 @@ function optakessuffix(k) ) end -function is_operator_start_char(c::Char) - c == EOF_CHAR && return false - Base.isvalid(c) || return false - is_operator_start_char(UInt32(c)) -end -is_operator_start_char(u::UInt32) = u == 0x00000021 || (u == 0x00000024 || (u == 0x00000025 || (u == 0x00000026 || (u == 0x00000027 || (u == 0x0000002a || (u == 0x0000002b || (u == 0x0000002d || (u == 0x0000002e || (u == 0x0000002f || (u == 0x0000003a || (u == 0x0000003c || (u == 0x0000003d || (u == 0x0000003e || (u == 0x0000003f || (u == 0x0000005c || (u == 0x0000005e || (u == 0x00000069 || (u == 0x00000077 || (u == 0x0000007c || (u == 0x0000007e || (u == 0x000000ac || (u == 0x000000b1 || (u == 0x000000d7 || (u == 0x000000f7 || (u == 0x00002026 || (u == 0x0000205d || (u == 0x0000214b || (u == 0x00002190 || (u == 0x00002191 || (u == 0x00002192 || (u == 0x00002193 || (u == 0x00002194 || (u == 0x0000219a || (u == 0x0000219b || (u == 0x000021a0 || (u == 0x000021a3 || (u == 0x000021a6 || (u == 0x000021ae || (u == 0x000021ce || (u == 0x000021cf || (u == 0x000021d2 || (u == 0x000021d4 || (u == 0x000021f4 || (u == 0x000021f5 || (u == 0x000021f6 || (u == 0x000021f7 || (u == 0x000021f8 || (u == 0x000021f9 || (u == 0x000021fa || (u == 0x000021fb || (u == 0x000021fc || (u == 0x000021fd || (u == 0x000021fe || (u == 0x000021ff || (u == 0x00002208 || (u == 0x00002209 || (u == 0x0000220a || (u == 0x0000220b || (u == 0x0000220c || (u == 0x0000220d || (u == 0x00002213 || (u == 0x00002214 || (u == 0x00002217 || (u == 0x00002218 || (u == 0x00002219 || (u == 0x0000221a || (u == 0x0000221b || (u == 0x0000221c || (u == 0x0000221d || (u == 0x00002224 || (u == 0x00002225 || (u == 0x00002226 || (u == 0x00002227 || (u == 0x00002228 || (u == 0x00002229 || (u == 0x0000222a || (u == 0x00002237 || (u == 0x00002238 || (u == 0x0000223a || (u == 0x0000223b || (u == 0x0000223d || (u == 0x0000223e || (u == 0x00002240 || (u == 0x00002241 || (u == 0x00002242 || (u == 0x00002243 || (u == 0x00002244 || (u == 0x00002245 || (u == 0x00002246 || (u == 0x00002247 || (u == 0x00002248 || (u == 0x00002249 || (u == 0x0000224a || (u == 0x0000224b || (u == 0x0000224c || (u == 0x0000224d || (u == 0x0000224e || (u == 0x0000224f || (u == 0x00002250 || (u == 0x00002251 || (u == 0x00002252 || (u == 0x00002253 || (u == 0x00002254 || (u == 0x00002255 || (u == 0x00002256 || (u == 0x00002257 || (u == 0x00002258 || (u == 0x00002259 || (u == 0x0000225a || (u == 0x0000225b || (u == 0x0000225c || (u == 0x0000225d || (u == 0x0000225e || (u == 0x0000225f || (u == 0x00002260 || (u == 0x00002261 || (u == 0x00002262 || (u == 0x00002263 || (u == 0x00002264 || (u == 0x00002265 || (u == 0x00002266 || (u == 0x00002267 || (u == 0x00002268 || (u == 0x00002269 || (u == 0x0000226a || (u == 0x0000226b || (u == 0x0000226c || (u == 0x0000226d || (u == 0x0000226e || (u == 0x0000226f || (u == 0x00002270 || (u == 0x00002271 || (u == 0x00002272 || (u == 0x00002273 || (u == 0x00002274 || (u == 0x00002275 || (u == 0x00002276 || (u == 0x00002277 || (u == 0x00002278 || (u == 0x00002279 || (u == 0x0000227a || (u == 0x0000227b || (u == 0x0000227c || (u == 0x0000227d || (u == 0x0000227e || (u == 0x0000227f || (u == 0x00002280 || (u == 0x00002281 || (u == 0x00002282 || (u == 0x00002283 || (u == 0x00002284 || (u == 0x00002285 || (u == 0x00002286 || (u == 0x00002287 || (u == 0x00002288 || (u == 0x00002289 || (u == 0x0000228a || (u == 0x0000228b || (u == 0x0000228d || (u == 0x0000228e || (u == 0x0000228f || (u == 0x00002290 || (u == 0x00002291 || (u == 0x00002292 || (u == 0x00002293 || (u == 0x00002294 || (u == 0x00002295 || (u == 0x00002296 || (u == 0x00002297 || (u == 0x00002298 || (u == 0x00002299 || (u == 0x0000229a || (u == 0x0000229b || (u == 0x0000229c || (u == 0x0000229e || (u == 0x0000229f || (u == 0x000022a0 || (u == 0x000022a1 || (u == 0x000022a2 || (u == 0x000022a3 || (u == 0x000022a9 || (u == 0x000022ac || (u == 0x000022ae || (u == 0x000022b0 || (u == 0x000022b1 || (u == 0x000022b2 || (u == 0x000022b3 || (u == 0x000022b4 || (u == 0x000022b5 || (u == 0x000022b6 || (u == 0x000022b7 || (u == 0x000022bb || (u == 0x000022bc || (u == 0x000022bd || (u == 0x000022c4 || (u == 0x000022c5 || (u == 0x000022c6 || (u == 0x000022c7 || (u == 0x000022c9 || (u == 0x000022ca || (u == 0x000022cb || (u == 0x000022cc || (u == 0x000022cd || (u == 0x000022ce || (u == 0x000022cf || (u == 0x000022d0 || (u == 0x000022d1 || (u == 0x000022d2 || (u == 0x000022d3 || (u == 0x000022d5 || (u == 0x000022d6 || (u == 0x000022d7 || (u == 0x000022d8 || (u == 0x000022d9 || (u == 0x000022da || (u == 0x000022db || (u == 0x000022dc || (u == 0x000022dd || (u == 0x000022de || (u == 0x000022df || (u == 0x000022e0 || (u == 0x000022e1 || (u == 0x000022e2 || (u == 0x000022e3 || (u == 0x000022e4 || (u == 0x000022e5 || (u == 0x000022e6 || (u == 0x000022e7 || (u == 0x000022e8 || (u == 0x000022e9 || (u == 0x000022ea || (u == 0x000022eb || (u == 0x000022ec || (u == 0x000022ed || (u == 0x000022ee || (u == 0x000022ef || (u == 0x000022f0 || (u == 0x000022f1 || (u == 0x000022f2 || (u == 0x000022f3 || (u == 0x000022f4 || (u == 0x000022f5 || (u == 0x000022f6 || (u == 0x000022f7 || (u == 0x000022f8 || (u == 0x000022f9 || (u == 0x000022fa || (u == 0x000022fb || (u == 0x000022fc || (u == 0x000022fd || (u == 0x000022fe || (u == 0x000022ff || (u == 0x000025b7 || (u == 0x000027c2 || (u == 0x000027c8 || (u == 0x000027c9 || (u == 0x000027d1 || (u == 0x000027d2 || (u == 0x000027d5 || (u == 0x000027d6 || (u == 0x000027d7 || (u == 0x000027f0 || (u == 0x000027f1 || (u == 0x000027f5 || (u == 0x000027f6 || (u == 0x000027f7 || (u == 0x000027f9 || (u == 0x000027fa || (u == 0x000027fb || (u == 0x000027fc || (u == 0x000027fd || (u == 0x000027fe || (u == 0x000027ff || (u == 0x00002900 || (u == 0x00002901 || (u == 0x00002902 || (u == 0x00002903 || (u == 0x00002904 || (u == 0x00002905 || (u == 0x00002906 || (u == 0x00002907 || (u == 0x00002908 || (u == 0x00002909 || (u == 0x0000290a || (u == 0x0000290b || (u == 0x0000290c || (u == 0x0000290d || (u == 0x0000290e || (u == 0x0000290f || (u == 0x00002910 || (u == 0x00002911 || (u == 0x00002912 || (u == 0x00002913 || (u == 0x00002914 || (u == 0x00002915 || (u == 0x00002916 || (u == 0x00002917 || (u == 0x00002918 || (u == 0x0000291d || (u == 0x0000291e || (u == 0x0000291f || (u == 0x00002920 || (u == 0x00002944 || (u == 0x00002945 || (u == 0x00002946 || (u == 0x00002947 || (u == 0x00002948 || (u == 0x00002949 || (u == 0x0000294a || (u == 0x0000294b || (u == 0x0000294c || (u == 0x0000294d || (u == 0x0000294e || (u == 0x0000294f || (u == 0x00002950 || (u == 0x00002951 || (u == 0x00002952 || (u == 0x00002953 || (u == 0x00002954 || (u == 0x00002955 || (u == 0x00002956 || (u == 0x00002957 || (u == 0x00002958 || (u == 0x00002959 || (u == 0x0000295a || (u == 0x0000295b || (u == 0x0000295c || (u == 0x0000295d || (u == 0x0000295e || (u == 0x0000295f || (u == 0x00002960 || (u == 0x00002961 || (u == 0x00002962 || (u == 0x00002963 || (u == 0x00002964 || (u == 0x00002965 || (u == 0x00002966 || (u == 0x00002967 || (u == 0x00002968 || (u == 0x00002969 || (u == 0x0000296a || (u == 0x0000296b || (u == 0x0000296c || (u == 0x0000296d || (u == 0x0000296e || (u == 0x0000296f || (u == 0x00002970 || (u == 0x000029b7 || (u == 0x000029b8 || (u == 0x000029bc || (u == 0x000029be || (u == 0x000029bf || (u == 0x000029c0 || (u == 0x000029c1 || (u == 0x000029e1 || (u == 0x000029e3 || (u == 0x000029e4 || (u == 0x000029e5 || (u == 0x000029f4 || (u == 0x000029f6 || (u == 0x000029f7 || (u == 0x000029fa || (u == 0x000029fb || (u == 0x00002a07 || (u == 0x00002a08 || (u == 0x00002a1d || (u == 0x00002a22 || (u == 0x00002a23 || (u == 0x00002a24 || (u == 0x00002a25 || (u == 0x00002a26 || (u == 0x00002a27 || (u == 0x00002a28 || (u == 0x00002a29 || (u == 0x00002a2a || (u == 0x00002a2b || (u == 0x00002a2c || (u == 0x00002a2d || (u == 0x00002a2e || (u == 0x00002a30 || (u == 0x00002a31 || (u == 0x00002a32 || (u == 0x00002a33 || (u == 0x00002a34 || (u == 0x00002a35 || (u == 0x00002a36 || (u == 0x00002a37 || (u == 0x00002a38 || (u == 0x00002a39 || (u == 0x00002a3a || (u == 0x00002a3b || (u == 0x00002a3c || (u == 0x00002a3d || (u == 0x00002a40 || (u == 0x00002a41 || (u == 0x00002a42 || (u == 0x00002a43 || (u == 0x00002a44 || (u == 0x00002a45 || (u == 0x00002a4a || (u == 0x00002a4b || (u == 0x00002a4c || (u == 0x00002a4d || (u == 0x00002a4e || (u == 0x00002a4f || (u == 0x00002a50 || (u == 0x00002a51 || (u == 0x00002a52 || (u == 0x00002a53 || (u == 0x00002a54 || (u == 0x00002a55 || (u == 0x00002a56 || (u == 0x00002a57 || (u == 0x00002a58 || (u == 0x00002a5a || (u == 0x00002a5b || (u == 0x00002a5c || (u == 0x00002a5d || (u == 0x00002a5e || (u == 0x00002a5f || (u == 0x00002a60 || (u == 0x00002a61 || (u == 0x00002a62 || (u == 0x00002a63 || (u == 0x00002a66 || (u == 0x00002a67 || (u == 0x00002a6a || (u == 0x00002a6b || (u == 0x00002a6c || (u == 0x00002a6d || (u == 0x00002a6e || (u == 0x00002a6f || (u == 0x00002a70 || (u == 0x00002a71 || (u == 0x00002a72 || (u == 0x00002a73 || (u == 0x00002a74 || (u == 0x00002a75 || (u == 0x00002a76 || (u == 0x00002a77 || (u == 0x00002a78 || (u == 0x00002a79 || (u == 0x00002a7a || (u == 0x00002a7b || (u == 0x00002a7c || (u == 0x00002a7d || (u == 0x00002a7e || (u == 0x00002a7f || (u == 0x00002a80 || (u == 0x00002a81 || (u == 0x00002a82 || (u == 0x00002a83 || (u == 0x00002a84 || (u == 0x00002a85 || (u == 0x00002a86 || (u == 0x00002a87 || (u == 0x00002a88 || (u == 0x00002a89 || (u == 0x00002a8a || (u == 0x00002a8b || (u == 0x00002a8c || (u == 0x00002a8d || (u == 0x00002a8e || (u == 0x00002a8f || (u == 0x00002a90 || (u == 0x00002a91 || (u == 0x00002a92 || (u == 0x00002a93 || (u == 0x00002a94 || (u == 0x00002a95 || (u == 0x00002a96 || (u == 0x00002a97 || (u == 0x00002a98 || (u == 0x00002a99 || (u == 0x00002a9a || (u == 0x00002a9b || (u == 0x00002a9c || (u == 0x00002a9d || (u == 0x00002a9e || (u == 0x00002a9f || (u == 0x00002aa0 || (u == 0x00002aa1 || (u == 0x00002aa2 || (u == 0x00002aa3 || (u == 0x00002aa4 || (u == 0x00002aa5 || (u == 0x00002aa6 || (u == 0x00002aa7 || (u == 0x00002aa8 || (u == 0x00002aa9 || (u == 0x00002aaa || (u == 0x00002aab || (u == 0x00002aac || (u == 0x00002aad || (u == 0x00002aae || (u == 0x00002aaf || (u == 0x00002ab0 || (u == 0x00002ab1 || (u == 0x00002ab2 || (u == 0x00002ab3 || (u == 0x00002ab4 || (u == 0x00002ab5 || (u == 0x00002ab6 || (u == 0x00002ab7 || (u == 0x00002ab8 || (u == 0x00002ab9 || (u == 0x00002aba || (u == 0x00002abb || (u == 0x00002abc || (u == 0x00002abd || (u == 0x00002abe || (u == 0x00002abf || (u == 0x00002ac0 || (u == 0x00002ac1 || (u == 0x00002ac2 || (u == 0x00002ac3 || (u == 0x00002ac4 || (u == 0x00002ac5 || (u == 0x00002ac6 || (u == 0x00002ac7 || (u == 0x00002ac8 || (u == 0x00002ac9 || (u == 0x00002aca || (u == 0x00002acb || (u == 0x00002acc || (u == 0x00002acd || (u == 0x00002ace || (u == 0x00002acf || (u == 0x00002ad0 || (u == 0x00002ad1 || (u == 0x00002ad2 || (u == 0x00002ad3 || (u == 0x00002ad4 || (u == 0x00002ad5 || (u == 0x00002ad6 || (u == 0x00002ad7 || (u == 0x00002ad8 || (u == 0x00002ad9 || (u == 0x00002adb || (u == 0x00002af7 || (u == 0x00002af8 || (u == 0x00002af9 || (u == 0x00002afa || (u == 0x00002b30 || (u == 0x00002b31 || (u == 0x00002b32 || (u == 0x00002b33 || (u == 0x00002b34 || (u == 0x00002b35 || (u == 0x00002b36 || (u == 0x00002b37 || (u == 0x00002b38 || (u == 0x00002b39 || (u == 0x00002b3a || (u == 0x00002b3b || (u == 0x00002b3c || (u == 0x00002b3d || (u == 0x00002b3e || (u == 0x00002b3f || (u == 0x00002b40 || (u == 0x00002b41 || (u == 0x00002b42 || (u == 0x00002b43 || (u == 0x00002b44 || (u == 0x00002b47 || (u == 0x00002b48 || (u == 0x00002b49 || (u == 0x00002b4a || (u == 0x00002b4b || (u == 0x00002b4c || (u == 0x0000ffe9 || (u == 0x0000ffea || (u == 0x0000ffeb || u == 0x0000ffec))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))) - -const UNICODE_OPS = Dict{Char, Kind}( - # '−' is normalized into K"-", - '−' => K"-", - '÷' => K"÷", - '¬' => K"¬", - '√' => K"√", - '∛' => K"∛", - '∜' => K"∜", - '←' => K"←", - '→' => K"→", - '↔' => K"↔", - '↚' => K"↚", - '↛' => K"↛", - '↞' => K"↞", - '↠' => K"↠", - '↢' => K"↢", - '↣' => K"↣", - '↤' => K"↤", - '↦' => K"↦", - '↮' => K"↮", - '⇎' => K"⇎", - '⇍' => K"⇍", - '⇏' => K"⇏", - '⇐' => K"⇐", - '⇒' => K"⇒", - '⇔' => K"⇔", - '⇴' => K"⇴", - '⇶' => K"⇶", - '⇷' => K"⇷", - '⇸' => K"⇸", - '⇹' => K"⇹", - '⇺' => K"⇺", - '⇻' => K"⇻", - '⇼' => K"⇼", - '⇽' => K"⇽", - '⇾' => K"⇾", - '⇿' => K"⇿", - '⟵' => K"⟵", - '⟶' => K"⟶", - '⟷' => K"⟷", - '⟹' => K"⟹", - '⟺' => K"⟺", - '⟻' => K"⟻", - '⟼' => K"⟼", - '⟽' => K"⟽", - '⟾' => K"⟾", - '⟿' => K"⟿", - '⤀' => K"⤀", - '⤁' => K"⤁", - '⤂' => K"⤂", - '⤃' => K"⤃", - '⤄' => K"⤄", - '⤅' => K"⤅", - '⤆' => K"⤆", - '⤇' => K"⤇", - '⤌' => K"⤌", - '⤍' => K"⤍", - '⤎' => K"⤎", - '⤏' => K"⤏", - '⤐' => K"⤐", - '⤑' => K"⤑", - '⤔' => K"⤔", - '⤕' => K"⤕", - '⤖' => K"⤖", - '⤗' => K"⤗", - '⤘' => K"⤘", - '⤝' => K"⤝", - '⤞' => K"⤞", - '⤟' => K"⤟", - '⤠' => K"⤠", - '⥄' => K"⥄", - '⥅' => K"⥅", - '⥆' => K"⥆", - '⥇' => K"⥇", - '⥈' => K"⥈", - '⥊' => K"⥊", - '⥋' => K"⥋", - '⥎' => K"⥎", - '⥐' => K"⥐", - '⥒' => K"⥒", - '⥓' => K"⥓", - '⥖' => K"⥖", - '⥗' => K"⥗", - '⥚' => K"⥚", - '⥛' => K"⥛", - '⥞' => K"⥞", - '⥟' => K"⥟", - '⥢' => K"⥢", - '⥤' => K"⥤", - '⥦' => K"⥦", - '⥧' => K"⥧", - '⥨' => K"⥨", - '⥩' => K"⥩", - '⥪' => K"⥪", - '⥫' => K"⥫", - '⥬' => K"⥬", - '⥭' => K"⥭", - '⥰' => K"⥰", - '⧴' => K"⧴", - '⬱' => K"⬱", - '⬰' => K"⬰", - '⬲' => K"⬲", - '⬳' => K"⬳", - '⬴' => K"⬴", - '⬵' => K"⬵", - '⬶' => K"⬶", - '⬷' => K"⬷", - '⬸' => K"⬸", - '⬹' => K"⬹", - '⬺' => K"⬺", - '⬻' => K"⬻", - '⬼' => K"⬼", - '⬽' => K"⬽", - '⬾' => K"⬾", - '⬿' => K"⬿", - '⭀' => K"⭀", - '⭁' => K"⭁", - '⭂' => K"⭂", - '⭃' => K"⭃", - '⭄' => K"⭄", - '⭇' => K"⭇", - '⭈' => K"⭈", - '⭉' => K"⭉", - '⭊' => K"⭊", - '⭋' => K"⭋", - '⭌' => K"⭌", - '←' => K"←", - '→' => K"→", - '≥' => K"≥", - '≤' => K"≤", - '≡' => K"≡", - '≠' => K"≠", - '≢' => K"≢", - '∈' => K"∈", - '∉' => K"∉", - '∋' => K"∋", - '∌' => K"∌", - '⊆' => K"⊆", - '⊈' => K"⊈", - '⊂' => K"⊂", - '⊄' => K"⊄", - '⊊' => K"⊊", - '∝' => K"∝", - '∊' => K"∊", - '∍' => K"∍", - '∥' => K"∥", - '∦' => K"∦", - '∷' => K"∷", - '∺' => K"∺", - '∻' => K"∻", - '∽' => K"∽", - '∾' => K"∾", - '≁' => K"≁", - '≃' => K"≃", - '≄' => K"≄", - '≅' => K"≅", - '≆' => K"≆", - '≇' => K"≇", - '≈' => K"≈", - '≉' => K"≉", - '≊' => K"≊", - '≋' => K"≋", - '≌' => K"≌", - '≍' => K"≍", - '≎' => K"≎", - '≐' => K"≐", - '≑' => K"≑", - '≒' => K"≒", - '≓' => K"≓", - '≔' => K"≔", - '≕' => K"≕", - '≖' => K"≖", - '≗' => K"≗", - '≘' => K"≘", - '≙' => K"≙", - '≚' => K"≚", - '≛' => K"≛", - '≜' => K"≜", - '≝' => K"≝", - '≞' => K"≞", - '≟' => K"≟", - '≣' => K"≣", - '≦' => K"≦", - '≧' => K"≧", - '≨' => K"≨", - '≩' => K"≩", - '≪' => K"≪", - '≫' => K"≫", - '≬' => K"≬", - '≭' => K"≭", - '≮' => K"≮", - '≯' => K"≯", - '≰' => K"≰", - '≱' => K"≱", - '≲' => K"≲", - '≳' => K"≳", - '≴' => K"≴", - '≵' => K"≵", - '≶' => K"≶", - '≷' => K"≷", - '≸' => K"≸", - '≹' => K"≹", - '≺' => K"≺", - '≻' => K"≻", - '≼' => K"≼", - '≽' => K"≽", - '≾' => K"≾", - '≿' => K"≿", - '⊀' => K"⊀", - '⊁' => K"⊁", - '⊃' => K"⊃", - '⊅' => K"⊅", - '⊇' => K"⊇", - '⊉' => K"⊉", - '⊋' => K"⊋", - '⊏' => K"⊏", - '⊐' => K"⊐", - '⊑' => K"⊑", - '⊒' => K"⊒", - '⊜' => K"⊜", - '⊩' => K"⊩", - '⊬' => K"⊬", - '⊮' => K"⊮", - '⊰' => K"⊰", - '⊱' => K"⊱", - '⊲' => K"⊲", - '⊳' => K"⊳", - '⊴' => K"⊴", - '⊵' => K"⊵", - '⊶' => K"⊶", - '⊷' => K"⊷", - '⋍' => K"⋍", - '⋐' => K"⋐", - '⋑' => K"⋑", - '⋕' => K"⋕", - '⋖' => K"⋖", - '⋗' => K"⋗", - '⋘' => K"⋘", - '⋙' => K"⋙", - '⋚' => K"⋚", - '⋛' => K"⋛", - '⋜' => K"⋜", - '⋝' => K"⋝", - '⋞' => K"⋞", - '⋟' => K"⋟", - '⋠' => K"⋠", - '⋡' => K"⋡", - '⋢' => K"⋢", - '⋣' => K"⋣", - '⋤' => K"⋤", - '⋥' => K"⋥", - '⋦' => K"⋦", - '⋧' => K"⋧", - '⋨' => K"⋨", - '⋩' => K"⋩", - '⋪' => K"⋪", - '⋫' => K"⋫", - '⋬' => K"⋬", - '⋭' => K"⋭", - '⋲' => K"⋲", - '⋳' => K"⋳", - '⋴' => K"⋴", - '⋵' => K"⋵", - '⋶' => K"⋶", - '⋷' => K"⋷", - '⋸' => K"⋸", - '⋹' => K"⋹", - '⋺' => K"⋺", - '⋻' => K"⋻", - '⋼' => K"⋼", - '⋽' => K"⋽", - '⋾' => K"⋾", - '⋿' => K"⋿", - '⟈' => K"⟈", - '⟉' => K"⟉", - '⟒' => K"⟒", - '⦷' => K"⦷", - '⧀' => K"⧀", - '⧁' => K"⧁", - '⧡' => K"⧡", - '⧣' => K"⧣", - '⧤' => K"⧤", - '⧥' => K"⧥", - '⩦' => K"⩦", - '⩧' => K"⩧", - '⩪' => K"⩪", - '⩫' => K"⩫", - '⩬' => K"⩬", - '⩭' => K"⩭", - '⩮' => K"⩮", - '⩯' => K"⩯", - '⩰' => K"⩰", - '⩱' => K"⩱", - '⩲' => K"⩲", - '⩳' => K"⩳", - '⩴' => K"⩴", - '⩵' => K"⩵", - '⩶' => K"⩶", - '⩷' => K"⩷", - '⩸' => K"⩸", - '⩹' => K"⩹", - '⩺' => K"⩺", - '⩻' => K"⩻", - '⩼' => K"⩼", - '⩽' => K"⩽", - '⩾' => K"⩾", - '⩿' => K"⩿", - '⪀' => K"⪀", - '⪁' => K"⪁", - '⪂' => K"⪂", - '⪃' => K"⪃", - '⪄' => K"⪄", - '⪅' => K"⪅", - '⪆' => K"⪆", - '⪇' => K"⪇", - '⪈' => K"⪈", - '⪉' => K"⪉", - '⪊' => K"⪊", - '⪋' => K"⪋", - '⪌' => K"⪌", - '⪍' => K"⪍", - '⪎' => K"⪎", - '⪏' => K"⪏", - '⪐' => K"⪐", - '⪑' => K"⪑", - '⪒' => K"⪒", - '⪓' => K"⪓", - '⪔' => K"⪔", - '⪕' => K"⪕", - '⪖' => K"⪖", - '⪗' => K"⪗", - '⪘' => K"⪘", - '⪙' => K"⪙", - '⪚' => K"⪚", - '⪛' => K"⪛", - '⪜' => K"⪜", - '⪝' => K"⪝", - '⪞' => K"⪞", - '⪟' => K"⪟", - '⪠' => K"⪠", - '⪡' => K"⪡", - '⪢' => K"⪢", - '⪣' => K"⪣", - '⪤' => K"⪤", - '⪥' => K"⪥", - '⪦' => K"⪦", - '⪧' => K"⪧", - '⪨' => K"⪨", - '⪩' => K"⪩", - '⪪' => K"⪪", - '⪫' => K"⪫", - '⪬' => K"⪬", - '⪭' => K"⪭", - '⪮' => K"⪮", - '⪯' => K"⪯", - '⪰' => K"⪰", - '⪱' => K"⪱", - '⪲' => K"⪲", - '⪳' => K"⪳", - '⪴' => K"⪴", - '⪵' => K"⪵", - '⪶' => K"⪶", - '⪷' => K"⪷", - '⪸' => K"⪸", - '⪹' => K"⪹", - '⪺' => K"⪺", - '⪻' => K"⪻", - '⪼' => K"⪼", - '⪽' => K"⪽", - '⪾' => K"⪾", - '⪿' => K"⪿", - '⫀' => K"⫀", - '⫁' => K"⫁", - '⫂' => K"⫂", - '⫃' => K"⫃", - '⫄' => K"⫄", - '⫅' => K"⫅", - '⫆' => K"⫆", - '⫇' => K"⫇", - '⫈' => K"⫈", - '⫉' => K"⫉", - '⫊' => K"⫊", - '⫋' => K"⫋", - '⫌' => K"⫌", - '⫍' => K"⫍", - '⫎' => K"⫎", - '⫏' => K"⫏", - '⫐' => K"⫐", - '⫑' => K"⫑", - '⫒' => K"⫒", - '⫓' => K"⫓", - '⫔' => K"⫔", - '⫕' => K"⫕", - '⫖' => K"⫖", - '⫗' => K"⫗", - '⫘' => K"⫘", - '⫙' => K"⫙", - '⫷' => K"⫷", - '⫸' => K"⫸", - '⫹' => K"⫹", - '⫺' => K"⫺", - '⊢' => K"⊢", - '⊣' => K"⊣", - '⫪' => K"⫪", - '⫫' => K"⫫", - '⟂' => K"⟂", - '⊕' => K"⊕", - '⊖' => K"⊖", - '⊞' => K"⊞", - '⊟' => K"⊟", - '|' => K"|", - '∪' => K"∪", - '∨' => K"∨", - '⊔' => K"⊔", - '±' => K"±", - '∓' => K"∓", - '∔' => K"∔", - '∸' => K"∸", - '≂' => K"≂", - '≏' => K"≏", - '⊎' => K"⊎", - '⊻' => K"⊻", - '⊽' => K"⊽", - '⋎' => K"⋎", - '⋓' => K"⋓", - '⧺' => K"⧺", - '⧻' => K"⧻", - '⨈' => K"⨈", - '⨢' => K"⨢", - '⨣' => K"⨣", - '⨤' => K"⨤", - '⨥' => K"⨥", - '⨦' => K"⨦", - '⨧' => K"⨧", - '⨨' => K"⨨", - '⨩' => K"⨩", - '⨪' => K"⨪", - '⨫' => K"⨫", - '⨬' => K"⨬", - '⨭' => K"⨭", - '⨮' => K"⨮", - '⨹' => K"⨹", - '⨺' => K"⨺", - '⩁' => K"⩁", - '⩂' => K"⩂", - '⩅' => K"⩅", - '⩊' => K"⩊", - '⩌' => K"⩌", - '⩏' => K"⩏", - '⩐' => K"⩐", - '⩒' => K"⩒", - '⩔' => K"⩔", - '⩖' => K"⩖", - '⩗' => K"⩗", - '⩛' => K"⩛", - '⩝' => K"⩝", - '⩡' => K"⩡", - '⩢' => K"⩢", - '⩣' => K"⩣", - '∘' => K"∘", - '×' => K"×", - '∩' => K"∩", - '∧' => K"∧", - '⊗' => K"⊗", - '⊘' => K"⊘", - '⊙' => K"⊙", - '⊚' => K"⊚", - '⊛' => K"⊛", - '⊠' => K"⊠", - '⊡' => K"⊡", - '⊓' => K"⊓", - '∗' => K"∗", - '∙' => K"∙", - '∤' => K"∤", - '⅋' => K"⅋", - '≀' => K"≀", - '⊼' => K"⊼", - '⋄' => K"⋄", - '⋆' => K"⋆", - '⋇' => K"⋇", - '⋉' => K"⋉", - '⋊' => K"⋊", - '⋋' => K"⋋", - '⋌' => K"⋌", - '⋏' => K"⋏", - '⋒' => K"⋒", - '⟑' => K"⟑", - '⦸' => K"⦸", - '⦼' => K"⦼", - '⦾' => K"⦾", - '⦿' => K"⦿", - '⧶' => K"⧶", - '⧷' => K"⧷", - '⨇' => K"⨇", - '⨰' => K"⨰", - '⨱' => K"⨱", - '⨲' => K"⨲", - '⨳' => K"⨳", - '⨴' => K"⨴", - '⨵' => K"⨵", - '⨶' => K"⨶", - '⨷' => K"⨷", - '⨸' => K"⨸", - '⨻' => K"⨻", - '⨼' => K"⨼", - '⨽' => K"⨽", - '⩀' => K"⩀", - '⩃' => K"⩃", - '⩄' => K"⩄", - '⩋' => K"⩋", - '⩍' => K"⩍", - '⩎' => K"⩎", - '⩑' => K"⩑", - '⩓' => K"⩓", - '⩕' => K"⩕", - '⩘' => K"⩘", - '⩚' => K"⩚", - '⩜' => K"⩜", - '⩞' => K"⩞", - '⩟' => K"⩟", - '⩠' => K"⩠", - '⫛' => K"⫛", - '⊍' => K"⊍", - '▷' => K"▷", - '⨝' => K"⨝", - '⟕' => K"⟕", - '⟖' => K"⟖", - '⟗' => K"⟗", - '^' => K"^", - '↑' => K"↑", - '↓' => K"↓", - '⇵' => K"⇵", - '⟰' => K"⟰", - '⟱' => K"⟱", - '⤈' => K"⤈", - '⤉' => K"⤉", - '⤊' => K"⤊", - '⤋' => K"⤋", - '⤒' => K"⤒", - '⤓' => K"⤓", - '⥉' => K"⥉", - '⥌' => K"⥌", - '⥍' => K"⥍", - '⥏' => K"⥏", - '⥑' => K"⥑", - '⥔' => K"⥔", - '⥕' => K"⥕", - '⥘' => K"⥘", - '⥙' => K"⥙", - '⥜' => K"⥜", - '⥝' => K"⥝", - '⥠' => K"⥠", - '⥡' => K"⥡", - '⥣' => K"⥣", - '⥥' => K"⥥", - '⥮' => K"⥮", - '⥯' => K"⥯", - '↑' => K"↑", - '↓' => K"↓", - # Lookalikes which are normalized into K"⋅", - # https://github.com/JuliaLang/julia/pull/25157, - '\u00b7' => K"⋅", # '·' Middle Dot,, - '\u0387' => K"⋅", # '·' Greek Ano Teleia,, - '⋅' => K"⋅", - '…' => K"…", - '⁝' => K"⁝", - '⋮' => K"⋮", - '⋱' => K"⋱", - '⋰' => K"⋰", - '⋯' => K"⋯", - '↻' => K"↻", - '⇜' => K"⇜", - '⇝' => K"⇝", - '↜' => K"↜", - '↝' => K"↝", - '↩' => K"↩", - '↪' => K"↪", - '↫' => K"↫", - '↬' => K"↬", - '↼' => K"↼", - '↽' => K"↽", - '⇀' => K"⇀", - '⇁' => K"⇁", - '⇄' => K"⇄", - '⇆' => K"⇆", - '⇇' => K"⇇", - '⇉' => K"⇉", - '⇋' => K"⇋", - '⇌' => K"⇌", - '⇚' => K"⇚", - '⇛' => K"⇛", - '⇠' => K"⇠", - '⇢' => K"⇢", - '↷' => K"↷", - '↶' => K"↶", - '↺' => K"↺", - '¦' => K"¦", - '⌿' => K"⌿", - '⨟' => K"⨟", -) +const _unicode_ops = let + ks = _nondot_symbolic_operator_kinds() + ss = string.(ks) - -# For use in tests only ? -const UNICODE_OPS_REVERSE = Dict{Kind,Symbol}() -for (k, v) in UNICODE_OPS - k in ('\u00b7', '\u0387') && continue - UNICODE_OPS_REVERSE[v] = Symbol(k) -end - -for (k, v) in [ - K"=" => :(=) - K"+=" => :(+=) - K"-=" => :(-=) - K"*=" => :(*=) - K"/=" => :(/=) - K"//=" => :(//=) - K"|=" => :(|=) - K"^=" => :(^=) - K"÷=" => :(÷=) - K"%=" => :(%=) - K"<<=" => :(<<=) - K">>=" => :(>>=) - K"<<" => :(<<) - K">>" => :(>>) - K">>>" => :(>>>) - K">>>=" => :(>>>=) - K"\=" => :(\=) - K"&=" => :(&=) - K":=" => :(:=) - K"=>" => :(=>) - K"~" => :(~) - K"$=" => :($=) - K"⊻=" => :(⊻=) - K"-->" => :(-->) - K"||" => :(||) - K"&&" => :(&&) - K"<:" => :(<:) - K">:" => :(>:) - K">" => :(>) - K"<" => :(<) - K">=" => :(>=) - K"≥" => :(≥) - K"<=" => :(<=) - K"≤" => :(≤) - K"==" => :(==) - K"===" => :(===) - K"≡" => :(≡) - K"!=" => :(!=) - K"≠" => :(≠) - K"!==" => :(!==) - K"≢" => :(≢) - K"in" => :(in) - K"isa" => :(isa) - K"<|" => :(<|) - K"|>" => :(|>) - K":" => :(:) - K".." => :(..) - K"$" => :($) - K"+" => :(+) - K"-" => :(-) - K"++" => :(++) - K"|" => :(|) - K"*" => :(*) - K"/" => :(/) - K"%" => :(%) - K"\\" => :(\) - K"&" => :(&) - K"//" => :(//) - K"^" => :(^) - K"::" => :(::) - K"?" => :? - K"." => :(.) - K"!" => :(!) - K"'" => Symbol(''') - K"..." => :(...) - K".'" => Symbol(".'") - K"->" => :(->) - K"where" => :where - ] - UNICODE_OPS_REVERSE[k] = v + ops = Dict{Char, Kind}([first(s)=>k for (k,s) in zip(ks,ss) + if length(s) == 1 && !isascii(s[1])]) + for ck in _ops_with_unicode_aliases + push!(ops, ck) + end + ops end diff --git a/JuliaSyntax/test/tokenize.jl b/JuliaSyntax/test/tokenize.jl index 4330e012fd912..15b8d202f7215 100644 --- a/JuliaSyntax/test/tokenize.jl +++ b/JuliaSyntax/test/tokenize.jl @@ -762,10 +762,9 @@ end end @testset "dotted and suffixed operators" begin -ops = collect(values(Tokenize.UNICODE_OPS_REVERSE)) -for op in ops - op in (:isa, :in, :where, Symbol('\''), :?, :(:)) && continue +for opkind in Tokenize._nondot_symbolic_operator_kinds() + op = string(opkind) strs = [ 1 => [ # unary "$(op)b", @@ -978,7 +977,7 @@ end @test Tokenize.is_identifier_char(c) == false @test Tokenize.is_identifier_start_char(c) == false @test Tokenize.is_never_id_char(c) == true - @test Tokenize.dotop1(c) == false + @test Tokenize.is_dottable_operator_start_char(c) == false @test Tokenize.isopsuffix(c) == false @test Tokenize.is_operator_start_char(c) == false @test Tokenize.iswhitespace(c) == false From 0893e7d0e47391520d9f6a1f500b180e95838f98 Mon Sep 17 00:00:00 2001 From: c42f Date: Fri, 12 May 2023 06:36:39 +1000 Subject: [PATCH 0652/1109] Relocate code from tokenize_utils.jl to tokenize.jl Now that this code is much cleaner and shorter it makes sense to have the whole lexer in one file. --- JuliaSyntax/src/tokenize.jl | 164 +++++++++++++++++++++++++++++- JuliaSyntax/src/tokenize_utils.jl | 161 ----------------------------- 2 files changed, 163 insertions(+), 162 deletions(-) delete mode 100644 JuliaSyntax/src/tokenize_utils.jl diff --git a/JuliaSyntax/src/tokenize.jl b/JuliaSyntax/src/tokenize.jl index 30e73cc8a43e4..02053cd28ce4e 100644 --- a/JuliaSyntax/src/tokenize.jl +++ b/JuliaSyntax/src/tokenize.jl @@ -7,7 +7,169 @@ using ..JuliaSyntax: JuliaSyntax, Kind, @K_str import ..JuliaSyntax: kind, is_literal, is_error, is_contextual_keyword, is_word_operator -include("tokenize_utils.jl") +#------------------------------------------------------------------------------- +# Character-based predicates for tokenization +import Base.Unicode + +const EOF_CHAR = typemax(Char) + +function is_identifier_char(c::Char) + c == EOF_CHAR && return false + Base.isvalid(c) || return false + return Base.is_id_char(c) +end + +function is_identifier_start_char(c::Char) + c == EOF_CHAR && return false + Base.isvalid(c) || return false + return Base.is_id_start_char(c) +end + +# Chars that we will never allow to be part of a valid non-operator identifier +function is_never_id_char(ch::Char) + Base.isvalid(ch) || return true + cat = Unicode.category_code(ch) + c = UInt32(ch) + return ( + # spaces and control characters: + (cat >= Unicode.UTF8PROC_CATEGORY_ZS && cat <= Unicode.UTF8PROC_CATEGORY_CS) || + + # ASCII and Latin1 non-connector punctuation + (c < 0xff && + cat >= Unicode.UTF8PROC_CATEGORY_PD && cat <= Unicode.UTF8PROC_CATEGORY_PO) || + + c == UInt32('`') || + + # mathematical brackets + (c >= 0x27e6 && c <= 0x27ef) || + # angle, corner, and lenticular brackets + (c >= 0x3008 && c <= 0x3011) || + # tortoise shell, square, and more lenticular brackets + (c >= 0x3014 && c <= 0x301b) || + # fullwidth parens + (c == 0xff08 || c == 0xff09) || + # fullwidth square brackets + (c == 0xff3b || c == 0xff3d) + ) +end + +readchar(io::IO) = eof(io) ? EOF_CHAR : read(io, Char) + +# Some unicode operators are normalized by the tokenizer into their equivalent +# kinds. See also normalize_identifier() +const _ops_with_unicode_aliases = [ + # \minus '−' is normalized into K"-", + '−' => K"-" + # Lookalikes which are normalized into K"⋅", + # https://github.com/JuliaLang/julia/pull/25157, + '\u00b7' => K"⋅" # '·' Middle Dot,, + '\u0387' => K"⋅" # '·' Greek Ano Teleia,, +] + +function _nondot_symbolic_operator_kinds() + op_range = reinterpret(UInt16, K"BEGIN_OPS"):reinterpret(UInt16, K"END_OPS") + setdiff(reinterpret.(Kind, op_range), [ + K"ErrorInvalidOperator" + K"Error**" + K"..." + K"." + K"where" + K"isa" + K"in" + K".'" + ]) +end + +function _char_in_set_expr(varname, firstchars) + codes = sort!(UInt32.(unique(firstchars))) + terms = [] + i = 1 + while i <= length(codes) + j = i + while j < length(codes) && codes[j+1] == codes[j]+1 + j += 1 + end + if i == j + push!(terms, :($varname == $(codes[i]))) + else + push!(terms, :($(codes[i]) <= $varname <= $(codes[j]))) + end + i = j+1 + end + foldr((t1,t2)->:($t1 || $t2), terms) +end + +@eval function is_operator_start_char(c) + if c == EOF_CHAR || !Base.isvalid(c) + return false + end + u = UInt32(c) + return $(_char_in_set_expr(:u, + append!(first.(string.(_nondot_symbolic_operator_kinds())), + first.(_ops_with_unicode_aliases)))) +end + +# Checks whether a Char is an operator which can be prefixed with a dot `.` +function is_dottable_operator_start_char(c) + return c != '?' && c != '$' && c != ':' && c != '\'' && is_operator_start_char(c) +end + +@eval function isopsuffix(c::Char) + c == EOF_CHAR && return false + Base.isvalid(c) || return false + u = UInt32(c) + if (u < 0xa1 || u > 0x10ffff) + return false + end + cat = Base.Unicode.category_code(u) + if (cat == Base.Unicode.UTF8PROC_CATEGORY_MN || + cat == Base.Unicode.UTF8PROC_CATEGORY_MC || + cat == Base.Unicode.UTF8PROC_CATEGORY_ME) + return true + end + # Additional allowed cases + return $(_char_in_set_expr(:u, + collect("²³¹ʰʲʳʷʸˡˢˣᴬᴮᴰᴱᴳᴴᴵᴶᴷᴸᴹᴺᴼᴾᴿᵀᵁᵂᵃᵇᵈᵉᵍᵏᵐᵒᵖᵗᵘᵛᵝᵞᵟᵠᵡᵢᵣᵤᵥᵦᵧᵨᵩᵪᶜᶠᶥᶦᶫᶰᶸᶻᶿ′″‴‵‶‷⁗⁰ⁱ⁴⁵⁶⁷⁸⁹⁺⁻⁼⁽⁾ⁿ₀₁₂₃₄₅₆₇₈₉₊₋₌₍₎ₐₑₒₓₕₖₗₘₙₚₛₜⱼⱽꜛꜜꜝ"))) +end + +function optakessuffix(k) + (K"BEGIN_OPS" <= k <= K"END_OPS") && + !( + k == K"..." || + K"BEGIN_ASSIGNMENTS" <= k <= K"END_ASSIGNMENTS" || + k == K"?" || + k == K"<:" || + k == K">:" || + k == K"&&" || + k == K"||" || + k == K"in" || + k == K"isa" || + k == K"≔" || + k == K"⩴" || + k == K":" || + k == K".." || + k == K"$" || + k == K"::" || + k == K"where" || + k == K"." || + k == K"!" || + k == K".'" || + k == K"->" || + K"¬" <= k <= K"∜" + ) +end + +const _unicode_ops = let + ks = _nondot_symbolic_operator_kinds() + ss = string.(ks) + + ops = Dict{Char, Kind}([first(s)=>k for (k,s) in zip(ks,ss) + if length(s) == 1 && !isascii(s[1])]) + for ck in _ops_with_unicode_aliases + push!(ops, ck) + end + ops +end #------------------------------------------------------------------------------- # Tokens diff --git a/JuliaSyntax/src/tokenize_utils.jl b/JuliaSyntax/src/tokenize_utils.jl deleted file mode 100644 index 261b442a8f0f9..0000000000000 --- a/JuliaSyntax/src/tokenize_utils.jl +++ /dev/null @@ -1,161 +0,0 @@ -import Base.Unicode - -const EOF_CHAR = typemax(Char) - -function is_identifier_char(c::Char) - c == EOF_CHAR && return false - Base.isvalid(c) || return false - return Base.is_id_char(c) -end - -function is_identifier_start_char(c::Char) - c == EOF_CHAR && return false - Base.isvalid(c) || return false - return Base.is_id_start_char(c) -end - -# Chars that we will never allow to be part of a valid non-operator identifier -function is_never_id_char(ch::Char) - Base.isvalid(ch) || return true - cat = Unicode.category_code(ch) - c = UInt32(ch) - return ( - # spaces and control characters: - (cat >= Unicode.UTF8PROC_CATEGORY_ZS && cat <= Unicode.UTF8PROC_CATEGORY_CS) || - - # ASCII and Latin1 non-connector punctuation - (c < 0xff && - cat >= Unicode.UTF8PROC_CATEGORY_PD && cat <= Unicode.UTF8PROC_CATEGORY_PO) || - - c == UInt32('`') || - - # mathematical brackets - (c >= 0x27e6 && c <= 0x27ef) || - # angle, corner, and lenticular brackets - (c >= 0x3008 && c <= 0x3011) || - # tortoise shell, square, and more lenticular brackets - (c >= 0x3014 && c <= 0x301b) || - # fullwidth parens - (c == 0xff08 || c == 0xff09) || - # fullwidth square brackets - (c == 0xff3b || c == 0xff3d) - ) -end - -readchar(io::IO) = eof(io) ? EOF_CHAR : read(io, Char) - -# Some unicode operators are normalized by the tokenizer into their equivalent -# kinds. See also normalize_identifier() -const _ops_with_unicode_aliases = [ - # \minus '−' is normalized into K"-", - '−' => K"-" - # Lookalikes which are normalized into K"⋅", - # https://github.com/JuliaLang/julia/pull/25157, - '\u00b7' => K"⋅" # '·' Middle Dot,, - '\u0387' => K"⋅" # '·' Greek Ano Teleia,, -] - -function _nondot_symbolic_operator_kinds() - op_range = reinterpret(UInt16, K"BEGIN_OPS"):reinterpret(UInt16, K"END_OPS") - setdiff(reinterpret.(Kind, op_range), [ - K"ErrorInvalidOperator" - K"Error**" - K"..." - K"." - K"where" - K"isa" - K"in" - K".'" - ]) -end - -function _char_in_set_expr(varname, firstchars) - codes = sort!(UInt32.(unique(firstchars))) - terms = [] - i = 1 - while i <= length(codes) - j = i - while j < length(codes) && codes[j+1] == codes[j]+1 - j += 1 - end - if i == j - push!(terms, :($varname == $(codes[i]))) - else - push!(terms, :($(codes[i]) <= $varname <= $(codes[j]))) - end - i = j+1 - end - foldr((t1,t2)->:($t1 || $t2), terms) -end - -@eval function is_operator_start_char(c) - if c == EOF_CHAR || !Base.isvalid(c) - return false - end - u = UInt32(c) - return $(_char_in_set_expr(:u, - append!(first.(string.(_nondot_symbolic_operator_kinds())), - first.(_ops_with_unicode_aliases)))) -end - -# Checks whether a Char is an operator which can be prefixed with a dot `.` -function is_dottable_operator_start_char(c) - return c != '?' && c != '$' && c != ':' && c != '\'' && is_operator_start_char(c) -end - -@eval function isopsuffix(c::Char) - c == EOF_CHAR && return false - Base.isvalid(c) || return false - u = UInt32(c) - if (u < 0xa1 || u > 0x10ffff) - return false - end - cat = Base.Unicode.category_code(u) - if (cat == Base.Unicode.UTF8PROC_CATEGORY_MN || - cat == Base.Unicode.UTF8PROC_CATEGORY_MC || - cat == Base.Unicode.UTF8PROC_CATEGORY_ME) - return true - end - # Additional allowed cases - return $(_char_in_set_expr(:u, - collect("²³¹ʰʲʳʷʸˡˢˣᴬᴮᴰᴱᴳᴴᴵᴶᴷᴸᴹᴺᴼᴾᴿᵀᵁᵂᵃᵇᵈᵉᵍᵏᵐᵒᵖᵗᵘᵛᵝᵞᵟᵠᵡᵢᵣᵤᵥᵦᵧᵨᵩᵪᶜᶠᶥᶦᶫᶰᶸᶻᶿ′″‴‵‶‷⁗⁰ⁱ⁴⁵⁶⁷⁸⁹⁺⁻⁼⁽⁾ⁿ₀₁₂₃₄₅₆₇₈₉₊₋₌₍₎ₐₑₒₓₕₖₗₘₙₚₛₜⱼⱽꜛꜜꜝ"))) -end - -function optakessuffix(k) - (K"BEGIN_OPS" <= k <= K"END_OPS") && - !( - k == K"..." || - K"BEGIN_ASSIGNMENTS" <= k <= K"END_ASSIGNMENTS" || - k == K"?" || - k == K"<:" || - k == K">:" || - k == K"&&" || - k == K"||" || - k == K"in" || - k == K"isa" || - k == K"≔" || - k == K"⩴" || - k == K":" || - k == K".." || - k == K"$" || - k == K"::" || - k == K"where" || - k == K"." || - k == K"!" || - k == K".'" || - k == K"->" || - K"¬" <= k <= K"∜" - ) -end - -const _unicode_ops = let - ks = _nondot_symbolic_operator_kinds() - ss = string.(ks) - - ops = Dict{Char, Kind}([first(s)=>k for (k,s) in zip(ks,ss) - if length(s) == 1 && !isascii(s[1])]) - for ck in _ops_with_unicode_aliases - push!(ops, ck) - end - ops -end From 297fb389b2b3484f245c670fbcd3efb12d8443d9 Mon Sep 17 00:00:00 2001 From: c42f Date: Fri, 12 May 2023 08:41:27 +1000 Subject: [PATCH 0653/1109] fixup! Tests for numeric constants and dots --- JuliaSyntax/test/tokenize.jl | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/JuliaSyntax/test/tokenize.jl b/JuliaSyntax/test/tokenize.jl index 15b8d202f7215..ecfaad90f4f5f 100644 --- a/JuliaSyntax/test/tokenize.jl +++ b/JuliaSyntax/test/tokenize.jl @@ -871,12 +871,17 @@ end raw"^ ↑ ↓ ⇵ ⟰ ⟱ ⤈ ⤉ ⤊ ⤋ ⤒ ⤓ ⥉ ⥌ ⥍ ⥏ ⥑ ⥔ ⥕ ⥘ ⥙ ⥜ ⥝ ⥠ ⥡ ⥣ ⥥ ⥮ ⥯ ↑ ↓" raw"::" raw"." - "⫪ ⫫" - "\u00b7 \u0387" ] if VERSION >= v"1.6.0" push!(ops, raw"<-- <-->") end + if VERSION >= v"1.7.0" + append!(ops, [ + "−" + "\u00b7 \u0387" + "⫪ ⫫" + ]) + end allops = split(join(ops, " "), " ") @test all(s->Base.isoperator(Symbol(s)) == is_operator(first(collect(tokenize(s))).kind), allops) end From 19043c47ccac72d7582bba5a6d2c8a5ab00e68dd Mon Sep 17 00:00:00 2001 From: c42f Date: Fri, 12 May 2023 09:00:38 +1000 Subject: [PATCH 0654/1109] Additional operators from upstream (JuliaLang/JuliaSyntax.jl#273) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See upstream PRs * https://github.com/JuliaLang/julia/pull/47647 - `⟇` `\veedot` operator * https://github.com/JuliaLang/julia/pull/49623 - `⥺` `\leftarrowsubset` - `⥷` `\leftarrowless` --- JuliaSyntax/src/kinds.jl | 3 +++ JuliaSyntax/test/tokenize.jl | 3 +++ 2 files changed, 6 insertions(+) diff --git a/JuliaSyntax/src/kinds.jl b/JuliaSyntax/src/kinds.jl index c1cece098bf8c..7402c29245936 100644 --- a/JuliaSyntax/src/kinds.jl +++ b/JuliaSyntax/src/kinds.jl @@ -255,7 +255,9 @@ const _kind_names = "⭁" "⭂" "⭃" + "⥷" "⭄" + "⥺" "⭇" "⭈" "⭉" @@ -637,6 +639,7 @@ const _kind_names = "⊽" "⋎" "⋓" + "⟇" "⧺" "⧻" "⨈" diff --git a/JuliaSyntax/test/tokenize.jl b/JuliaSyntax/test/tokenize.jl index ecfaad90f4f5f..baaa08d0835c7 100644 --- a/JuliaSyntax/test/tokenize.jl +++ b/JuliaSyntax/test/tokenize.jl @@ -882,6 +882,9 @@ end "⫪ ⫫" ]) end + if VERSION >= v"1.10-DEV" + push!(ops, "⥷ ⥺ ⟇") + end allops = split(join(ops, " "), " ") @test all(s->Base.isoperator(Symbol(s)) == is_operator(first(collect(tokenize(s))).kind), allops) end From fed05e1f1ae44f060ffc063d576a7b25c8fe5ef2 Mon Sep 17 00:00:00 2001 From: c42f Date: Fri, 12 May 2023 16:56:46 +1000 Subject: [PATCH 0655/1109] Move remaining Expr tests to test/expr.jl (JuliaLang/JuliaSyntax.jl#274) --- JuliaSyntax/test/expr.jl | 9 +++++++++ JuliaSyntax/test/parser.jl | 23 +++++++++-------------- 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/JuliaSyntax/test/expr.jl b/JuliaSyntax/test/expr.jl index 31c47ce960384..9c9088b34163d 100644 --- a/JuliaSyntax/test/expr.jl +++ b/JuliaSyntax/test/expr.jl @@ -399,6 +399,15 @@ Expr(:macrocall, Symbol("@S"), LineNumberNode(1), Expr(:ncat, 2, :a, :b)) end + @testset "var" begin + @test parsestmt("var\"x\"") == :x + @test parsestmt("var\"\"") == Symbol("") + @test parsestmt("var\"\\\"\"") == Symbol("\"") + @test parsestmt("var\"\\\\\\\"\"") == Symbol("\\\"") + @test parsestmt("var\"\\\\x\"") == Symbol("\\\\x") + @test parsestmt("var\"x\"+y") == Expr(:call, :+, :x, :y) + end + @testset "vect" begin @test parsestmt("[x,y ; z]") == Expr(:vect, Expr(:parameters, :z), :x, :y) end diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index e549b6684df36..69ef9554c51d3 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -25,12 +25,7 @@ function test_parse(production, input, output) else opts = NamedTuple() end - if output isa Pair - @test parse_to_sexpr_str(production, input; opts...) == output[1] - @test parse_to_sexpr_str(production, input; opts..., expr=true) == output[2] - else - @test parse_to_sexpr_str(production, input; opts...) == output - end + @test parse_to_sexpr_str(production, input; opts...) == output end function test_parse(inout::Pair) @@ -724,16 +719,16 @@ tests = [ "xx" => "xx" "x₁" => "x₁" # var syntax - """var"x" """ => "(var x)" => :x + """var"x" """ => "(var x)" # var syntax raw string unescaping - "var\"\"" => "(var )" => Symbol("") - "var\"\\\"\"" => "(var \")" => Symbol("\"") - "var\"\\\\\\\"\"" => "(var \\\")" => Symbol("\\\"") - "var\"\\\\x\"" => "(var \\\\x)" => Symbol("\\\\x") + "var\"\"" => "(var )" + "var\"\\\"\"" => "(var \")" + "var\"\\\\\\\"\"" => "(var \\\")" + "var\"\\\\x\"" => "(var \\\\x)" # trailing syntax after var - """var"x"+""" => "(var x)" => :x - """var"x")""" => "(var x)" => :x - """var"x"(""" => "(var x)" => :x + """var"x"+""" => "(var x)" + """var"x")""" => "(var x)" + """var"x"(""" => "(var x)" """var"x"end""" => "(var x (error-t))" """var"x"1""" => "(var x (error-t))" """var"x"y""" => "(var x (error-t))" From f0e62bf5ad6d3677dd939bb37581e1299fc307cf Mon Sep 17 00:00:00 2001 From: c42f Date: Fri, 12 May 2023 19:49:39 +1000 Subject: [PATCH 0656/1109] == operator for green trees (JuliaLang/JuliaSyntax.jl#275) Also add some more direct GreeNode tests --- JuliaSyntax/src/green_tree.jl | 4 +++ JuliaSyntax/test/green_node.jl | 54 ++++++++++++++++++++++++++++++++++ JuliaSyntax/test/runtests.jl | 1 + JuliaSyntax/test/test_utils.jl | 3 ++ 4 files changed, 62 insertions(+) create mode 100644 JuliaSyntax/test/green_node.jl diff --git a/JuliaSyntax/src/green_tree.jl b/JuliaSyntax/src/green_tree.jl index 11b01ae42df79..37c814c2d6035 100644 --- a/JuliaSyntax/src/green_tree.jl +++ b/JuliaSyntax/src/green_tree.jl @@ -55,6 +55,10 @@ head(node::GreenNode) = node.head Base.summary(node::GreenNode) = summary(node.head) +function Base.:(==)(n1::GreenNode, n2::GreenNode) + n1.head == n2.head && n1.span == n2.span && n1.args == n2.args +end + # Pretty printing function _show_green_node(io, node, indent, pos, str, show_trivia) if !show_trivia && is_trivia(node) diff --git a/JuliaSyntax/test/green_node.jl b/JuliaSyntax/test/green_node.jl new file mode 100644 index 0000000000000..8dc79455bbf25 --- /dev/null +++ b/JuliaSyntax/test/green_node.jl @@ -0,0 +1,54 @@ +@testset "GreenNode" begin + t = parsestmt(GreenNode, "aa + b") + + @test span(t) == 6 + @test haschildren(t) + @test head(t) == SyntaxHead(K"call", 0x0008) + @test span.(children(t)) == [2,1,1,1,1] + @test head.(children(t)) == [ + SyntaxHead(K"Identifier", 0x0000) + SyntaxHead(K"Whitespace", 0x0001) + SyntaxHead(K"+", 0x0000) + SyntaxHead(K"Whitespace", 0x0001) + SyntaxHead(K"Identifier", 0x0000) + ] + + t2 = parsestmt(GreenNode, "aa + b") + @test t == t2 + @test t !== t2 + + text = "f(@x(y), z)" + @test sprint(show, MIME("text/plain"), parsestmt(GreenNode, text)) == + """ + 1:11 │[call] + 1:1 │ Identifier ✔ + 2:2 │ ( + 3:7 │ [macrocall] + 3:3 │ @ + 4:4 │ MacroName ✔ + 5:5 │ ( + 6:6 │ Identifier ✔ + 7:7 │ ) + 8:8 │ , + 9:9 │ Whitespace + 10:10 │ Identifier ✔ + 11:11 │ ) + """ + + @test sprint(show, MIME("text/plain"), parsestmt(GreenNode, text), text) == + """ + 1:11 │[call] + 1:1 │ Identifier ✔ "f" + 2:2 │ ( "(" + 3:7 │ [macrocall] + 3:3 │ @ "@" + 4:4 │ MacroName ✔ "x" + 5:5 │ ( "(" + 6:6 │ Identifier ✔ "y" + 7:7 │ ) ")" + 8:8 │ , "," + 9:9 │ Whitespace " " + 10:10 │ Identifier ✔ "z" + 11:11 │ ) ")" + """ +end diff --git a/JuliaSyntax/test/runtests.jl b/JuliaSyntax/test/runtests.jl index b43cbf590f513..d574fac3a218a 100644 --- a/JuliaSyntax/test/runtests.jl +++ b/JuliaSyntax/test/runtests.jl @@ -51,6 +51,7 @@ end include("parse_stream.jl") include("parser.jl") +include("green_node.jl") include("syntax_tree.jl") include("diagnostics.jl") include("parser_api.jl") diff --git a/JuliaSyntax/test/test_utils.jl b/JuliaSyntax/test/test_utils.jl index fa01c95ffa105..3ee9f2438a75c 100644 --- a/JuliaSyntax/test/test_utils.jl +++ b/JuliaSyntax/test/test_utils.jl @@ -20,6 +20,9 @@ using .JuliaSyntax: # Node inspection kind, flags, + head, + span, + SyntaxHead, is_trivia, sourcetext, haschildren, From 10c429044a2dcf95d30ba8c307d9404d5d4c7bf3 Mon Sep 17 00:00:00 2001 From: c42f Date: Sun, 14 May 2023 05:33:53 +1000 Subject: [PATCH 0657/1109] =?UTF-8?q?Normalize=20=E2=84=8F=20(\hslash)=20t?= =?UTF-8?q?o=20=C4=A7=20(\hbar)=20(JuliaLang/JuliaSyntax.jl#277)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See upstream PR https://github.com/JuliaLang/julia/pull/49559 --- JuliaSyntax/src/literal_parsing.jl | 3 ++- JuliaSyntax/test/literal_parsing.jl | 4 ++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/JuliaSyntax/src/literal_parsing.jl b/JuliaSyntax/src/literal_parsing.jl index c762ff056584e..67a7af6a230eb 100644 --- a/JuliaSyntax/src/literal_parsing.jl +++ b/JuliaSyntax/src/literal_parsing.jl @@ -333,7 +333,8 @@ function utf8proc_custom_func(codepoint::UInt32, ::Ptr{Cvoid})::UInt32 codepoint == 0x00B5 ? 0x03BC : # 'µ' => 'μ' codepoint == 0x00B7 ? 0x22C5 : # '·' => '⋅' codepoint == 0x0387 ? 0x22C5 : # '·' => '⋅' - codepoint == 0x2212 ? 0x002D : # '−' => '-' + codepoint == 0x2212 ? 0x002D : # '−' (\minus) => '-' + codepoint == 0x210F ? 0x0127 : # 'ℏ' (\hslash) => 'ħ' \hbar codepoint) end diff --git a/JuliaSyntax/test/literal_parsing.jl b/JuliaSyntax/test/literal_parsing.jl index 64b868289e176..7c4d1012090a9 100644 --- a/JuliaSyntax/test/literal_parsing.jl +++ b/JuliaSyntax/test/literal_parsing.jl @@ -248,4 +248,8 @@ end # https://github.com/JuliaLang/julia/pull/42561 @test JuliaSyntax.normalize_identifier("julia\u025B\u00B5\u00B7\u0387\u2212") == "julia\u03B5\u03BC\u22C5\u22C5\u002D" + + # https://github.com/JuliaLang/julia/issues/48870 + # ℏ -> ħ + @test JuliaSyntax.normalize_identifier("\u210f") == "\u0127" end From 5ff3800dd6eb75c1ddbfd745791c2653ba09b217 Mon Sep 17 00:00:00 2001 From: c42f Date: Sun, 14 May 2023 06:45:53 +1000 Subject: [PATCH 0658/1109] Small fix for `Expr(:incomplete)` detection (JuliaLang/JuliaSyntax.jl#278) This off-by-one error prevented incomplete detection from working for trailing single-character error tokens --- JuliaSyntax/src/hooks.jl | 2 +- JuliaSyntax/test/hooks.jl | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/JuliaSyntax/src/hooks.jl b/JuliaSyntax/src/hooks.jl index 757ee907651aa..1fc31f52d652b 100644 --- a/JuliaSyntax/src/hooks.jl +++ b/JuliaSyntax/src/hooks.jl @@ -33,7 +33,7 @@ function _incomplete_tag(n::SyntaxNode, codelen) i,c = _first_error(n) if isnothing(c) || last_byte(c) < codelen || codelen == 0 return :none - elseif first_byte(c) < codelen + elseif first_byte(c) <= codelen if kind(c) == K"ErrorEofMultiComment" && last_byte(c) == codelen # This is the one weird case where the token itself is an # incomplete error diff --git a/JuliaSyntax/test/hooks.jl b/JuliaSyntax/test/hooks.jl index e3beab08a7b05..385898cb38973 100644 --- a/JuliaSyntax/test/hooks.jl +++ b/JuliaSyntax/test/hooks.jl @@ -106,6 +106,8 @@ ")" => :none "1))" => :none "a b" => :none + "()x" => :none + "." => :none ] @testset "$(repr(str))" begin @test Base.incomplete_tag(Meta.parse(str, raise=false)) == tag From 8a2d2d79bfbaac0f1b79f768785235359395aed5 Mon Sep 17 00:00:00 2001 From: c42f Date: Sun, 14 May 2023 08:03:21 +1000 Subject: [PATCH 0659/1109] Fixes for line number node insertion in short form blocks (JuliaLang/JuliaSyntax.jl#279) Copy some peculiar behaviors of the reference parser for compatibility: * Top level blocks like `a;b;c` don't have any line numbers * Blocks like `(a;b;c)` omit the first line number node As part of this, add a new flag `TOPLEVEL_SEMICOLONS_FLAG` to distinguish semicolon-delimited toplevel statements from newline-delimited. --- JuliaSyntax/src/expr.jl | 5 ++++- JuliaSyntax/src/parse_stream.jl | 4 ++++ JuliaSyntax/src/parser.jl | 2 +- JuliaSyntax/test/expr.jl | 12 ++++++++++++ JuliaSyntax/test/parser.jl | 8 ++++---- 5 files changed, 25 insertions(+), 6 deletions(-) diff --git a/JuliaSyntax/src/expr.jl b/JuliaSyntax/src/expr.jl index 4d0093afd7206..56ac0f89b633d 100644 --- a/JuliaSyntax/src/expr.jl +++ b/JuliaSyntax/src/expr.jl @@ -215,7 +215,7 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads, elseif k == K"macrocall" _reorder_parameters!(args, 2) insert!(args, 2, loc) - elseif k == K"block" || k == K"toplevel" + elseif k == K"block" || (k == K"toplevel" && !has_flags(head, TOPLEVEL_SEMICOLONS_FLAG)) if isempty(args) push!(args, loc) else @@ -225,6 +225,9 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads, args[2*i-1] = source_location(LineNumberNode, source, first(childranges[i])) end end + if k == K"block" && has_flags(head, PARENS_FLAG) + popfirst!(args) + end elseif k == K"doc" headsym = :macrocall args = [GlobalRef(Core, Symbol("@doc")), loc, args...] diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index 74789c3005b08..3862991ff17bb 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -33,6 +33,8 @@ const RAW_STRING_FLAG = RawFlags(1<<6) const PARENS_FLAG = RawFlags(1<<5) # Set for K"quote" for the short form `:x` as oppsed to long form `quote x end` const COLON_QUOTE = RawFlags(1<<5) +# Set for K"toplevel" which is delimited by parentheses +const TOPLEVEL_SEMICOLONS_FLAG = RawFlags(1<<5) # Set for K"struct" when mutable const MUTABLE_FLAG = RawFlags(1<<5) @@ -99,6 +101,8 @@ function untokenize(head::SyntaxHead; unique=true, include_flag_suff=true) has_flags(head, PARENS_FLAG) && (str = str*"-p") elseif kind(head) == K"quote" has_flags(head, COLON_QUOTE) && (str = str*"-:") + elseif kind(head) == K"toplevel" + has_flags(head, TOPLEVEL_SEMICOLONS_FLAG) && (str = str*"-;") elseif kind(head) == K"struct" has_flags(head, MUTABLE_FLAG) && (str = str*"-mut") elseif kind(head) == K"module" diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index acf2fe263cf76..efdf917dc161e 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -495,7 +495,7 @@ function parse_stmts(ps::ParseState) error="extra tokens after end of expression") end if do_emit - emit(ps, mark, K"toplevel") + emit(ps, mark, K"toplevel", TOPLEVEL_SEMICOLONS_FLAG) end end diff --git a/JuliaSyntax/test/expr.jl b/JuliaSyntax/test/expr.jl index 9c9088b34163d..3d7856432256c 100644 --- a/JuliaSyntax/test/expr.jl +++ b/JuliaSyntax/test/expr.jl @@ -33,10 +33,20 @@ LineNumberNode(4), :c, ) + @test parsestmt("(a;b;c)") == + Expr(:block, + :a, + LineNumberNode(1), + :b, + LineNumberNode(1), + :c, + ) @test parsestmt("begin end") == Expr(:block, LineNumberNode(1) ) + @test parsestmt("(;;)") == + Expr(:block) @test parseall("a\n\nb") == Expr(:toplevel, @@ -45,6 +55,8 @@ LineNumberNode(3), :b, ) + @test parsestmt("a;b") == + Expr(:toplevel, :a, :b) @test parsestmt("module A\n\nbody\nend") == Expr(:module, diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 69ef9554c51d3..6d4b52fd97878 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -39,7 +39,7 @@ end tests = [ JuliaSyntax.parse_toplevel => [ "a \n b" => "(toplevel a b)" - "a;b \n c;d" => "(toplevel (toplevel a b) (toplevel c d))" + "a;b \n c;d" => "(toplevel (toplevel-; a b) (toplevel-; c d))" "a \n \n" => "(toplevel a)" "" => "(toplevel)" ], @@ -51,10 +51,10 @@ tests = [ "a\nb" => "(block a b)" ], JuliaSyntax.parse_stmts => [ - "a;b;c" => "(toplevel a b c)" - "a;;;b;;" => "(toplevel a b)" + "a;b;c" => "(toplevel-; a b c)" + "a;;;b;;" => "(toplevel-; a b)" """ "x" a ; "y" b """ => - """(toplevel (doc (string "x") a) (doc (string "y") b))""" + """(toplevel-; (doc (string "x") a) (doc (string "y") b))""" "x y" => "x (error-t y)" ], JuliaSyntax.parse_eq => [ From 25b350b3360bff88da84c4e3523eb86faae3b0e6 Mon Sep 17 00:00:00 2001 From: c42f Date: Sun, 14 May 2023 09:23:56 +1000 Subject: [PATCH 0660/1109] Reference parser compat: Make toplevel error Exprs more compatible (JuliaLang/JuliaSyntax.jl#280) When encountering a toplevel error, the reference parser * truncates the top level expression arg list before that error * includes the last line number * appends the error message Do something similar here so that the associated `LoadError` shows a more reasonable line number. --- JuliaSyntax/src/hooks.jl | 35 +++++++++++++++++++++++++++++++++-- JuliaSyntax/test/hooks.jl | 13 +++++++++++++ 2 files changed, 46 insertions(+), 2 deletions(-) diff --git a/JuliaSyntax/src/hooks.jl b/JuliaSyntax/src/hooks.jl index 1fc31f52d652b..2ca7219abbdeb 100644 --- a/JuliaSyntax/src/hooks.jl +++ b/JuliaSyntax/src/hooks.jl @@ -122,6 +122,18 @@ function core_parser_hook(code, filename, offset, options) core_parser_hook(code, filename, 1, offset, options) end +function _has_nested_error(ex) + if ex isa Expr + if ex.head == :error + return true + else + return any(_has_nested_error(e) for e in ex.args) + end + else + return false + end +end + # Debug log file for dumping parsed code const _debug_log = Ref{Union{Nothing,IO}}(nothing) @@ -166,7 +178,8 @@ function _core_parser_hook(code, filename::String, lineno::Int, offset::Int, opt if any_error(stream) tree = build_tree(SyntaxNode, stream, - wrap_toplevel_as_kind=K"None", first_line=lineno) + wrap_toplevel_as_kind=K"None", first_line=lineno, + filename=filename) tag = _incomplete_tag(tree, lastindex(code)) if tag !== :none # Here we replicate the particular messages @@ -184,7 +197,25 @@ function _core_parser_hook(code, filename::String, lineno::Int, offset::Int, opt # we can do fancy error reporting instead. error_ex = Expr(:error, ParseError(stream, filename=filename, first_line=lineno)) end - ex = options === :all ? Expr(:toplevel, error_ex) : error_ex + ex = if options === :all + # When encountering a toplevel error, the reference parser + # * truncates the top level expression arg list before that error + # * includes the last line number + # * appends the error message + topex = Expr(tree) + @assert topex.head == :toplevel + i = findfirst(_has_nested_error, topex.args) + if i > 1 && topex.args[i-1] isa LineNumberNode + i -= 1 + end + resize!(topex.args, i-1) + _,errort = _first_error(tree) + push!(topex.args, LineNumberNode(source_line(errort), filename)) + push!(topex.args, error_ex) + topex + else + error_ex + end else # TODO: Figure out a way to show warnings. Meta.parse() has no API # to communicate this, and we also can't show them to stdout as diff --git a/JuliaSyntax/test/hooks.jl b/JuliaSyntax/test/hooks.jl index 385898cb38973..fa2bbc0510fd8 100644 --- a/JuliaSyntax/test/hooks.jl +++ b/JuliaSyntax/test/hooks.jl @@ -29,6 +29,19 @@ @test err.source.first_line == 2 end + @testset "toplevel errors" begin + ex = JuliaSyntax._core_parser_hook("a\nb\n[x,\ny)", "somefile", 1, 0, :all)[1] + @test ex.head == :toplevel + @test ex.args[1:5] == [ + LineNumberNode(1, "somefile"), + :a, + LineNumberNode(2, "somefile"), + :b, + LineNumberNode(4, "somefile"), + ] + @test Meta.isexpr(ex.args[6], :error) + end + @testset "enable_in_core!" begin JuliaSyntax.enable_in_core!() From 3ddf2bc26423684617f5f27ec0d2b45d6c4ac817 Mon Sep 17 00:00:00 2001 From: c42f Date: Mon, 15 May 2023 08:17:51 +1000 Subject: [PATCH 0661/1109] Fix for block-vs-parameters in prefix call syntax (JuliaLang/JuliaSyntax.jl#281) Expressions like `+(;;)` are ambiguous between parsing as prefix calls with an empty block, vs normal parenthesized calls with multiple parameters: (call-pre + (block)) (call + (parameters) (parameters)) Fix a few of these cases to use the same somewhat arbitrary disambiguation as the reference parser. --- JuliaSyntax/src/parser.jl | 7 +++++-- JuliaSyntax/test/parser.jl | 8 ++++++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index efdf917dc161e..53d3975ca7eb0 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -1230,9 +1230,12 @@ function parse_unary(ps::ParseState) mark_before_paren = position(ps) bump(ps, TRIVIA_FLAG) # ( - _is_paren_call = peek(ps, skip_newlines=true) in KSet"; )" + initial_semi = peek(ps, skip_newlines=true) == K";" opts = parse_brackets(ps, K")") do had_commas, had_splat, num_semis, num_subexprs - is_paren_call = had_commas || had_splat || _is_paren_call + is_paren_call = had_commas || had_splat || + (initial_semi && num_subexprs > 0) || + (initial_semi && num_semis == 1) || + (num_semis == 0 && num_subexprs == 0) return (needs_parameters=is_paren_call, is_paren_call=is_paren_call, is_block=!is_paren_call && num_semis > 0) diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 6d4b52fd97878..be4effec64531 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -226,8 +226,11 @@ tests = [ "+(a...)" => "(call + (... a))" "+(a;b,c)" => "(call + a (parameters b c))" "+(;a)" => "(call + (parameters a))" + "+(;;a)" => "(call + (parameters) (parameters a))" "+()" => "(call +)" "+(\n;a)" => "(call + (parameters a))" + "+(;)" => "(call + (parameters))" + "+(\n;\n)" => "(call + (parameters))" "+(\n)" => "(call +)" # Whitespace not allowed before prefix function call bracket "+ (a,b)" => "(call + (error) a b)" @@ -238,6 +241,11 @@ tests = [ # Unary function calls with brackets as grouping, not an arglist ".+(a)" => "(dotcall-pre + (parens a))" "+(a;b)" => "(call-pre + (block-p a b))" + "+(;;)" => "(call-pre + (block-p))" + "+(;;)" => "(call-pre + (block-p))" + "+(a;)" => "(call-pre + (block-p a))" + "+(a;;)" => "(call-pre + (block-p a))" + "+(\n;\n;\n)" => "(call-pre + (block-p))" "+(a=1)" => "(call-pre + (parens (= a 1)))" # Unary operators have lower precedence than ^ "+(a)^2" => "(call-pre + (call-i (parens a) ^ 2))" From 260e90fe2fc8ef9d38a74f525b53f9b98a79467d Mon Sep 17 00:00:00 2001 From: c42f Date: Mon, 15 May 2023 09:30:25 +1000 Subject: [PATCH 0662/1109] Allow `var` identifier as exception name in catch (JuliaLang/JuliaSyntax.jl#282) --- JuliaSyntax/src/parser.jl | 4 +--- JuliaSyntax/test/parser.jl | 1 + 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 53d3975ca7eb0..21d1c6a3ceeb8 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -1030,8 +1030,6 @@ function parse_unary_subtype(ps::ParseState) mark = position(ps) bump(ps, TRIVIA_FLAG) parse_unary_subtype(ps) - # Flisp parser handled this, but I don't know how it can happen... - @check peek_behind(ps).kind != K"tuple" emit(ps, mark, kind(t), PREFIX_OP_FLAG) end else @@ -2266,7 +2264,7 @@ function parse_catch(ps::ParseState) # try x catch $e y end ==> (try (block x) (catch ($ e) (block y))) m = position(ps) parse_eq_star(ps) - if !(peek_behind(ps).kind in KSet"Identifier $") + if !(peek_behind(ps).kind in KSet"Identifier var $") # try x catch e+3 y end ==> (try (block x) (catch (error (call-i e + 3)) (block y))) emit(ps, m, K"error", error="a variable name is expected after `catch`") end diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index be4effec64531..5042a3c4eaf88 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -608,6 +608,7 @@ tests = [ "try x catch \n y end" => "(try (block x) (catch false (block y)))" "try x catch e y end" => "(try (block x) (catch e (block y)))" "try x catch \$e y end" => "(try (block x) (catch (\$ e) (block y)))" + "try x catch var\"#\" y end" => "(try (block x) (catch (var #) (block y)))" "try x catch e+3 y end" => "(try (block x) (catch (error (call-i e + 3)) (block y)))" "try x finally y end" => "(try (block x) (finally (block y)))" # v1.8 only From af6e2f596483735555a88695b884684798e923c8 Mon Sep 17 00:00:00 2001 From: c42f Date: Tue, 16 May 2023 07:21:19 +1000 Subject: [PATCH 0663/1109] Fixes for short form function locations (JuliaLang/JuliaSyntax.jl#284) * Bump trivia after `=` so we see the correct line for functions in multiple-assignments like `let f(x) =\ng(x)=1\nend` * Fixes for insertion of these line numbers on the right hand side * Remove special case avoiding inserting short form function location when in `for` loop iteration spec; allow this in Expr comparison instead. --- JuliaSyntax/src/expr.jl | 13 ++++----- JuliaSyntax/src/parser.jl | 2 ++ JuliaSyntax/test/expr.jl | 24 +++++++++++----- JuliaSyntax/test/parse_packages.jl | 11 +++++-- JuliaSyntax/test/runtests.jl | 34 +--------------------- JuliaSyntax/test/test_utils.jl | 22 ++++++++++---- JuliaSyntax/test/test_utils_tests.jl | 43 ++++++++++++++++++++++++++++ 7 files changed, 94 insertions(+), 55 deletions(-) create mode 100644 JuliaSyntax/test/test_utils_tests.jl diff --git a/JuliaSyntax/src/expr.jl b/JuliaSyntax/src/expr.jl index 56ac0f89b633d..ce33d3d6a2271 100644 --- a/JuliaSyntax/src/expr.jl +++ b/JuliaSyntax/src/expr.jl @@ -179,13 +179,6 @@ function _fixup_Expr_children!(head, loc, args) elseif k == K"let" && i == 1 && @isexpr(arg, :block) filter!(a -> !(a isa LineNumberNode), arg.args) end - if !(k == K"for" && i == 1) && @isexpr(arg, :(=)) - aa2 = arg.args[2] - if is_eventually_call(arg.args[1]) && !@isexpr(aa2, :block) - # Add block for short form function locations - arg.args[2] = Expr(:block, loc, aa2) - end - end args[i] = arg end return args @@ -212,6 +205,12 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads, if k == K"?" headsym = :if + elseif k == K"=" && !is_decorated(head) + a2 = args[2] + if is_eventually_call(args[1]) && !@isexpr(a2, :block) + # Add block for short form function locations + args[2] = Expr(:block, loc, a2) + end elseif k == K"macrocall" _reorder_parameters!(args, 2) insert!(args, 2, loc) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 21d1c6a3ceeb8..0ff3fc42dcc90 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -590,12 +590,14 @@ function parse_assignment_with_initial_ex(ps::ParseState, mark, down::T) where { # [a ~ b c] ==> (hcat (call-i a ~ b) c) # [a~b] ==> (vect (call-i a ~ b)) bump_dotsplit(ps) + bump_trivia(ps) parse_assignment(ps, down) emit(ps, mark, is_dotted(t) ? K"dotcall" : K"call", INFIX_FLAG) else # a += b ==> (+= a b) # a .= b ==> (.= a b) bump(ps, TRIVIA_FLAG) + bump_trivia(ps) parse_assignment(ps, down) emit(ps, mark, k, flags(t)) end diff --git a/JuliaSyntax/test/expr.jl b/JuliaSyntax/test/expr.jl index 3d7856432256c..b390132b579a8 100644 --- a/JuliaSyntax/test/expr.jl +++ b/JuliaSyntax/test/expr.jl @@ -168,14 +168,24 @@ Expr(:block, LineNumberNode(1), :xs)) - # flisp parser quirk: In a for loop the block is not added, despite - # this defining a short-form function. - @test parsestmt("for f() = xs\nend") == - Expr(:for, - Expr(:(=), Expr(:call, :f), :xs), + + @test parsestmt("let f(x) =\ng(x)=1\nend") == + Expr(:let, + Expr(:(=), + Expr(:call, :f, :x), + Expr(:block, + LineNumberNode(1), + Expr(:(=), + Expr(:call, :g, :x), + Expr(:block, + LineNumberNode(2), + 1)))), Expr(:block, - LineNumberNode(1) - )) + LineNumberNode(3))) + + # `.=` doesn't introduce short form functions + @test parsestmt("f() .= xs") == + Expr(:(.=), Expr(:call, :f), :xs) end @testset "for" begin diff --git a/JuliaSyntax/test/parse_packages.jl b/JuliaSyntax/test/parse_packages.jl index d3bf9143fe367..c02eb3a0d6ded 100644 --- a/JuliaSyntax/test/parse_packages.jl +++ b/JuliaSyntax/test/parse_packages.jl @@ -29,17 +29,22 @@ base_tests_path = joinpath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "test") # In julia-1.6, test/copy.jl had spurious syntax which became the # multidimensional array syntax in 1.7. if endswith(f, "copy.jl") && v"1.6" <= VERSION < v"1.7" - return false + return nothing end # syntax.jl has some intentially weird syntax which we parse # differently than the flisp parser, and some cases which we've # decided are syntax errors. if endswith(f, "syntax.jl") - return false + return nothing end - return true + if endswith(f, "core.jl") + # Loose comparison due to `for f() = 1:3` syntax + return exprs_roughly_equal + end + + return exprs_equal_no_linenum end end diff --git a/JuliaSyntax/test/runtests.jl b/JuliaSyntax/test/runtests.jl index d574fac3a218a..bf2f93fb9288f 100644 --- a/JuliaSyntax/test/runtests.jl +++ b/JuliaSyntax/test/runtests.jl @@ -8,41 +8,9 @@ using JuliaSyntax: GreenNode, SyntaxNode, children, child, setchild!, SyntaxHead include("test_utils.jl") +include("test_utils_tests.jl") include("fuzz_test.jl") -# Tests for the test_utils go here to allow the utils to be included on their -# own without invoking the tests. -@testset "Reference parser bugs" begin - # `global (x,y)` - @test exprs_roughly_equal(Expr(:global, :x, :y), - Expr(:global, Expr(:tuple, :x, :y))) - @test exprs_roughly_equal(Expr(:local, :x, :y), - Expr(:local, Expr(:tuple, :x, :y))) - # `0x1.8p0f` - @test exprs_roughly_equal(1.5, - Expr(:call, :*, 1.5, :f)) - @test exprs_roughly_equal(1.5, - Expr(:call, :*, 1.5, :f0)) - # `@f(a=1) do \n end` - @test exprs_roughly_equal(Expr(:do, Expr(:macrocall, Symbol("@f"), LineNumberNode(1), Expr(:kw, :a, 1)), - Expr(:->, Expr(:tuple), Expr(:block, LineNumberNode(1)))), - Expr(:do, Expr(:macrocall, Symbol("@f"), LineNumberNode(1), Expr(:(=), :a, 1)), - Expr(:->, Expr(:tuple), Expr(:block, LineNumberNode(1))))) - # `"""\n a\n \n b"""` - @test exprs_roughly_equal("a\n \nb", " a\n\n b") - @test !exprs_roughly_equal("a\n x\nb", " a\n x\n b") - @test exprs_roughly_equal("a\n x\nb", "a\n x\nb") - # `(a; b,)` - @test exprs_roughly_equal(Expr(:block, :a, LineNumberNode(1), :b), - Expr(:tuple, Expr(:parameters, :b), :a)) - @test !exprs_roughly_equal(Expr(:block, :a, LineNumberNode(1), :b), - Expr(:tuple, Expr(:parameters, :c), :a)) - @test !exprs_roughly_equal(Expr(:block, :a, LineNumberNode(1), :b), - Expr(:tuple, Expr(:parameters, :b), :c)) - @test !exprs_roughly_equal(Expr(:block, :a, LineNumberNode(1), :b, :c), - Expr(:tuple, Expr(:parameters, :b), :a)) -end - include("utils.jl") @testset "Tokenize" begin diff --git a/JuliaSyntax/test/test_utils.jl b/JuliaSyntax/test/test_utils.jl index 3ee9f2438a75c..52b97b05998f8 100644 --- a/JuliaSyntax/test/test_utils.jl +++ b/JuliaSyntax/test/test_utils.jl @@ -138,6 +138,17 @@ function exprs_roughly_equal(fl_ex, ex) end end fl_args[1] = Expr(:tuple, Expr(:parameters, kwargs...), posargs...) + elseif h == :for + iterspec = args[1] + if JuliaSyntax.is_eventually_call(iterspec.args[1]) && + Meta.isexpr(iterspec.args[2], :block) + blk = iterspec.args[2] + if length(blk.args) == 2 && blk.args[1] isa LineNumberNode + # Ignore short form function location differences in + # `for f() = 1:3 end` + iterspec.args[2] = blk.args[2] + end + end end if length(fl_args) != length(args) return false @@ -197,11 +208,13 @@ function find_source_in_path(basedir) src_list end -test_parse_all_in_path(basedir) = test_parse_all_in_path(path->true, basedir) +test_parse_all_in_path(basedir) = + test_parse_all_in_path(path->exprs_equal_no_linenum, basedir) -function test_parse_all_in_path(path_allowed::Function, basedir) +function test_parse_all_in_path(compare_for_path::Function, basedir) for filepath in find_source_in_path(basedir) - if !path_allowed(filepath) + cmp = compare_for_path(filepath) + if isnothing(cmp) continue end @testset "Parse $(relpath(filepath, basedir))" begin @@ -212,8 +225,7 @@ function test_parse_all_in_path(path_allowed::Function, basedir) # ignore this case. continue end - parsers_agree = parsers_agree_on_file(text, filepath, - exprs_equal=exprs_equal_no_linenum) + parsers_agree = parsers_agree_on_file(text, filepath, exprs_equal=cmp) @test parsers_agree if !parsers_agree reduced_failures = reduce_text.(reduce_tree(text), diff --git a/JuliaSyntax/test/test_utils_tests.jl b/JuliaSyntax/test/test_utils_tests.jl new file mode 100644 index 0000000000000..c0a0f3cd8ab34 --- /dev/null +++ b/JuliaSyntax/test/test_utils_tests.jl @@ -0,0 +1,43 @@ +# Tests for the test_utils go here to allow the utils to be included on their +# own without invoking the tests. +@testset "Reference parser bugs" begin + # `global (x,y)` + @test exprs_roughly_equal(Expr(:global, :x, :y), + Expr(:global, Expr(:tuple, :x, :y))) + @test exprs_roughly_equal(Expr(:local, :x, :y), + Expr(:local, Expr(:tuple, :x, :y))) + # `0x1.8p0f` + @test exprs_roughly_equal(1.5, + Expr(:call, :*, 1.5, :f)) + @test exprs_roughly_equal(1.5, + Expr(:call, :*, 1.5, :f0)) + # `@f(a=1) do \n end` + @test exprs_roughly_equal(Expr(:do, Expr(:macrocall, Symbol("@f"), LineNumberNode(1), Expr(:kw, :a, 1)), + Expr(:->, Expr(:tuple), Expr(:block, LineNumberNode(1)))), + Expr(:do, Expr(:macrocall, Symbol("@f"), LineNumberNode(1), Expr(:(=), :a, 1)), + Expr(:->, Expr(:tuple), Expr(:block, LineNumberNode(1))))) + # `"""\n a\n \n b"""` + @test exprs_roughly_equal("a\n \nb", " a\n\n b") + @test !exprs_roughly_equal("a\n x\nb", " a\n x\n b") + @test exprs_roughly_equal("a\n x\nb", "a\n x\nb") + # `(a; b,)` + @test exprs_roughly_equal(Expr(:block, :a, LineNumberNode(1), :b), + Expr(:tuple, Expr(:parameters, :b), :a)) + @test !exprs_roughly_equal(Expr(:block, :a, LineNumberNode(1), :b), + Expr(:tuple, Expr(:parameters, :c), :a)) + @test !exprs_roughly_equal(Expr(:block, :a, LineNumberNode(1), :b), + Expr(:tuple, Expr(:parameters, :b), :c)) + @test !exprs_roughly_equal(Expr(:block, :a, LineNumberNode(1), :b, :c), + Expr(:tuple, Expr(:parameters, :b), :a)) + + # Line numbers for short form function defs in `for` :-( + @test exprs_roughly_equal(Expr(:for, Expr(:(=), + Expr(:call, :f), + 1), + Expr(:block, LineNumberNode(1))), + Expr(:for, Expr(:(=), + Expr(:call, :f), + Expr(:block, LineNumberNode(1), 1)), + Expr(:block, LineNumberNode(1)))) +end + From d9ad0447eb5ca986052f5286fa518ff6bad11e59 Mon Sep 17 00:00:00 2001 From: c42f Date: Wed, 17 May 2023 07:46:02 +1000 Subject: [PATCH 0664/1109] Disallow juxtaposition of strings with blocks (JuliaLang/JuliaSyntax.jl#285) Disallow syntax like `"a"begin end`. Add a few more tests for juxtapositon. --- JuliaSyntax/src/parse_stream.jl | 15 +++--- JuliaSyntax/src/parser.jl | 84 ++++++++++++++++----------------- JuliaSyntax/test/parser.jl | 7 ++- 3 files changed, 55 insertions(+), 51 deletions(-) diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index 3862991ff17bb..a2f60b99e3323 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -592,12 +592,15 @@ end # * parens nodes # * deleted tokens (TOMBSTONE) # * whitespace (if skip_trivia=true) -function peek_behind_pos(stream::ParseStream; skip_trivia::Bool=true) +function peek_behind_pos(stream::ParseStream; skip_trivia::Bool=true, + skip_parens::Bool=true) token_index = lastindex(stream.tokens) range_index = lastindex(stream.ranges) - while range_index >= firstindex(stream.ranges) && - kind(stream.ranges[range_index]) == K"parens" - range_index -= 1 + if skip_parens + while range_index >= firstindex(stream.ranges) && + kind(stream.ranges[range_index]) == K"parens" + range_index -= 1 + end end last_token_in_nonterminal = range_index == 0 ? 0 : stream.ranges[range_index].last_token @@ -611,8 +614,8 @@ function peek_behind_pos(stream::ParseStream; skip_trivia::Bool=true) return ParseStreamPosition(token_index, range_index) end -function peek_behind(stream::ParseStream; skip_trivia::Bool=true) - peek_behind(stream, peek_behind_pos(stream; skip_trivia=skip_trivia)) +function peek_behind(stream::ParseStream; kws...) + peek_behind(stream, peek_behind_pos(stream; kws...)) end #------------------------------------------------------------------------------- diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 0ff3fc42dcc90..d388a49fdcc0b 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -1078,38 +1078,7 @@ function parse_where(ps::ParseState, down) end end -# given the previous expression kind and the next token, is there a -# juxtaposition operator between them? -# -# flisp: juxtapose? -function is_juxtapose(ps, prev_k, t) - k = kind(t) - - # Not juxtaposition - parse_juxtapose will consume only the first token. - # x.3 ==> x - # sqrt(2)2 ==> (call sqrt 2) - # x' y ==> (call-post x ') - # x 'y ==> x - - return !preceding_whitespace(t) && - (is_number(prev_k) || - (!is_number(k) && # disallow "x.3" and "sqrt(2)2" - k != K"@" && # disallow "x@time" - !(is_block_form(prev_k) || - is_syntactic_unary_op(prev_k) || - is_initial_reserved_word(ps, prev_k) ))) && - (!is_operator(k) || is_radical_op(k)) && - !is_closing_token(ps, k) && - !is_initial_reserved_word(ps, k) -end - -# Juxtoposition. Ugh! But so useful for units and Field identities like `im` -# -# 2x ==> (juxtapose 2 x) -# 2(x) ==> (juxtapose 2 (parens x)) -# (2)(3)x ==> (juxtapose (parens 2) (parens 3) x) -# (x-1)y ==> (juxtapose (parens (call-i x - 1)) y) -# x'y ==> (juxtapose (call-post x ') y) +# Juxtaposition. Kinda ugh but soo useful for units and Field identities like `im` # # flisp: parse-juxtapose function parse_juxtapose(ps::ParseState) @@ -1117,19 +1086,46 @@ function parse_juxtapose(ps::ParseState) parse_unary(ps) n_terms = 1 while true - prev_kind = peek_behind(ps).kind t = peek_token(ps) - if !is_juxtapose(ps, prev_kind, t) - break + k = kind(t) + prev_k = peek_behind(ps).kind + is_juxtapose = false + if !preceding_whitespace(t) && + (is_number(prev_k) || + (!is_number(k) && # disallow "x.3" and "f(2)2" + k != K"@" && # disallow "x@y" + !(is_block_form(prev_k) || + is_syntactic_unary_op(prev_k) || + is_initial_reserved_word(ps, prev_k) ))) && + (!is_operator(k) || is_radical_op(k)) && + !is_closing_token(ps, k) + if prev_k == K"string" || is_string_delim(t) + bump_invisible(ps, K"error", TRIVIA_FLAG, + error="cannot juxtapose string literal") + # JuliaLang/julia#20575 + # Error, but assume juxtapose for recovery + # "a""b" ==> (juxtapose (string "a") (error-t) (string "b")) + # "a"x ==> (juxtapose (string "a") (error-t) x) + # "$y"x ==> (juxtapose (string y) (error-t) x) + # "a"begin end ==> (juxtapose (string \"a\") (error-t) (block)) + is_juxtapose = true + elseif !is_initial_reserved_word(ps, k) + # 2x ==> (juxtapose 2 x) + # 2(x) ==> (juxtapose 2 (parens x)) + # (2)(3)x ==> (juxtapose (parens 2) (parens 3) x) + # (x-1)y ==> (juxtapose (parens (call-i x - 1)) y) + # x'y ==> (juxtapose (call-post x ') y) + # 1√x ==> (juxtapose 1 (call-pre √ x)) + is_juxtapose = true + end end - if prev_kind == K"string" || is_string_delim(t) - # issue #20575 - # - # "a""b" ==> (juxtapose (string "a") (error-t) (string "b")) - # "a"x ==> (juxtapose (string "a") (error-t) x) - # "$y"x ==> (juxtapose (string (string y)) (error-t) x) - bump_invisible(ps, K"error", TRIVIA_FLAG, - error="cannot juxtapose string literal") + if !is_juxtapose + # x.3 ==> x + # f(2)2 ==> (call f 2) + # x' y ==> (call-post x ') + # x 'y ==> x + # x@y ==> x + break end if is_radical_op(t) parse_unary(ps) @@ -2337,7 +2333,7 @@ function parse_macro_name(ps::ParseState) bump_disallowed_space(ps) mark = position(ps) parse_atom(ps, false) - b = peek_behind(ps, position(ps)) + b = peek_behind(ps, skip_parens=false) if b.kind == K"parens" emit_diagnostic(ps, mark, warning="parenthesizing macro names is unnecessary") diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 5042a3c4eaf88..736201cfe3a09 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -180,14 +180,19 @@ tests = [ "(2)(3)x" => "(juxtapose (parens 2) (parens 3) x)" "(x-1)y" => "(juxtapose (parens (call-i x - 1)) y)" "x'y" => "(juxtapose (call-post x ') y)" + "1√x" => "(juxtapose 1 (call-pre √ x))" # errors "\"a\"\"b\"" => "(juxtapose (string \"a\") (error-t) (string \"b\"))" "\"a\"x" => "(juxtapose (string \"a\") (error-t) x)" + "\"\$y\"x" => "(juxtapose (string y) (error-t) x)" + "\"a\"begin end" => "(juxtapose (string \"a\") (error-t) (block))" # Not juxtaposition - parse_juxtapose will consume only the first token. "x.3" => "x" - "sqrt(2)2" => "(call sqrt 2)" + "f(2)2" => "(call f 2)" "x' y" => "(call-post x ')" "x 'y" => "x" + "x@y" => "x" + "(begin end)x" => "(parens (block))" ], JuliaSyntax.parse_unary => [ ":T" => "(quote-: T)" From 731e5b7c02f2f46dc29a59617fde64e922406df6 Mon Sep 17 00:00:00 2001 From: c42f Date: Wed, 17 May 2023 10:06:19 +1000 Subject: [PATCH 0665/1109] More precise errors for bad unicode characters (JuliaLang/JuliaSyntax.jl#286) Ported from equivalent errors in the reference parser --- JuliaSyntax/src/kinds.jl | 6 ++++++ JuliaSyntax/src/parse_stream.jl | 8 ++++++-- JuliaSyntax/src/tokenize.jl | 21 ++++++++++++++++++++- JuliaSyntax/test/diagnostics.jl | 8 +++++++- 4 files changed, 39 insertions(+), 4 deletions(-) diff --git a/JuliaSyntax/src/kinds.jl b/JuliaSyntax/src/kinds.jl index 7402c29245936..a99f3b1d0a5e1 100644 --- a/JuliaSyntax/src/kinds.jl +++ b/JuliaSyntax/src/kinds.jl @@ -23,6 +23,8 @@ const _kind_names = "ErrorNumericOverflow" "ErrorInvalidEscapeSequence" "ErrorOverLongCharacter" + "ErrorInvalidUTF8" + "ErrorInvisibleChar" "ErrorUnknownCharacter" # Generic error "error" @@ -1044,6 +1046,8 @@ const _nonunique_kind_names = Set([ K"ErrorNumericOverflow" K"ErrorInvalidEscapeSequence" K"ErrorOverLongCharacter" + K"ErrorInvalidUTF8" + K"ErrorInvisibleChar" K"ErrorUnknownCharacter" K"ErrorInvalidOperator" @@ -1091,6 +1095,8 @@ const _token_error_descriptions = Dict{Kind, String}( K"ErrorNumericOverflow"=>"overflow in numeric literal", K"ErrorInvalidEscapeSequence"=>"invalid string escape sequence", K"ErrorOverLongCharacter"=>"character literal contains multiple characters", + K"ErrorInvalidUTF8"=>"invalid UTF-8 character", + K"ErrorInvisibleChar"=>"invisible character", K"ErrorUnknownCharacter"=>"unknown unicode character", K"ErrorInvalidOperator" => "invalid operator", K"Error**" => "use `x^y` instead of `x**y` for exponentiation, and `x...` instead of `**x` for splatting", diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index a2f60b99e3323..e8ebbfd50d330 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -947,8 +947,12 @@ function validate_tokens(stream::ParseStream) end elseif is_error(k) && k != K"error" # Emit messages for non-generic token errors - emit_diagnostic(stream, tokrange, - error=_token_error_descriptions[k]) + msg = if k in KSet"ErrorInvalidUTF8 ErrorInvisibleChar ErrorUnknownCharacter" + "$(_token_error_descriptions[k]) $(repr(text[fbyte]))" + else + _token_error_descriptions[k] + end + emit_diagnostic(stream, tokrange, error=msg) end if error_kind != K"None" toks[i] = SyntaxToken(SyntaxHead(error_kind, EMPTY_FLAGS), diff --git a/JuliaSyntax/src/tokenize.jl b/JuliaSyntax/src/tokenize.jl index 02053cd28ce4e..6f37e85ed60fa 100644 --- a/JuliaSyntax/src/tokenize.jl +++ b/JuliaSyntax/src/tokenize.jl @@ -25,6 +25,22 @@ function is_identifier_start_char(c::Char) return Base.is_id_start_char(c) end +function is_invisible_char(c::Char) + # These are the chars considered invisible by the reference parser. + # TODO: There's others we could add? See for example + # https://invisible-characters.com/ + return c == '\u00ad' || # soft hyphen + c == '\u200b' || # zero width space + c == '\u200c' || # zero width non-joiner + c == '\u200d' || # zero width joiner + c == '\u200e' || # left-to-right mark + c == '\u200f' || # right-to-left mark + c == '\u2060' || # word joiner + c == '\u2061' # function application + # https://github.com/JuliaLang/julia/issues/49850 + # c == '\u115f' || # Hangul Choseong filler +end + # Chars that we will never allow to be part of a valid non-operator identifier function is_never_id_char(ch::Char) Base.isvalid(ch) || return true @@ -535,7 +551,10 @@ function _next_token(l::Lexer, c) elseif (k = get(_unicode_ops, c, K"error")) != K"error" return emit(l, k) else - emit_error(l, K"ErrorUnknownCharacter") + emit_error(l, + !isvalid(c) ? K"ErrorInvalidUTF8" : + is_invisible_char(c) ? K"ErrorInvisibleChar" : + K"ErrorUnknownCharacter") end end diff --git a/JuliaSyntax/test/diagnostics.jl b/JuliaSyntax/test/diagnostics.jl index 01ee6999c1e14..87e2abea79528 100644 --- a/JuliaSyntax/test/diagnostics.jl +++ b/JuliaSyntax/test/diagnostics.jl @@ -12,7 +12,13 @@ function diagnostic(str; only_first=false, allow_multiple=false) end @testset "token errors" begin - @test diagnostic(":⥻") == Diagnostic(2, 4, :error, "unknown unicode character") + @test diagnostic("a\xf5b") == Diagnostic(2, 2, :error, "invalid UTF-8 character '\\xf5'") + for c in ['\u00ad', '\u200b', '\u200c', '\u200d', + '\u200e', '\u200f', '\u2060', '\u2061'] + @test diagnostic("a$(c)b") == + Diagnostic(2, 1+sizeof(string(c)), :error, "invisible character $(repr(c))") + end + @test diagnostic(":⥻") == Diagnostic(2, 4, :error, "unknown unicode character '⥻'") end @testset "parser errors" begin From ccdc8299f809bb143290457d248df91a12d81321 Mon Sep 17 00:00:00 2001 From: c42f Date: Thu, 18 May 2023 16:23:28 +1000 Subject: [PATCH 0666/1109] Re-add precompile (JuliaLang/JuliaSyntax.jl#287) Oops! Accidentally commented out previously :-( --- JuliaSyntax/src/precompile.jl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/JuliaSyntax/src/precompile.jl b/JuliaSyntax/src/precompile.jl index bd202bb85dac4..f6426532dad4c 100644 --- a/JuliaSyntax/src/precompile.jl +++ b/JuliaSyntax/src/precompile.jl @@ -1,5 +1,5 @@ # Just parse some file as a precompile workload -# let filename = joinpath(@__DIR__, "literal_parsing.jl") -# text = read(filename, String) -# parseall(Expr, text) -# end +let filename = joinpath(@__DIR__, "literal_parsing.jl") + text = read(filename, String) + parseall(Expr, text) +end From b575263372574ad05a3b31ec17f845eb645ca1c0 Mon Sep 17 00:00:00 2001 From: c42f Date: Thu, 18 May 2023 20:30:50 +1000 Subject: [PATCH 0667/1109] Disallow unbalanced bidirectional unicode (JuliaLang/JuliaSyntax.jl#288) Disallow unbalanced Unicode bidirectional formatting directives within strings and comments, to mitigate the "trojan source" vulnerability https://www.trojansource.codes See also https://github.com/JuliaLang/julia/pull/42918 --- JuliaSyntax/src/kinds.jl | 3 + JuliaSyntax/src/parse_stream.jl | 2 + JuliaSyntax/src/parser.jl | 5 ++ JuliaSyntax/src/tokenize.jl | 114 ++++++++++++++++---------- JuliaSyntax/test/diagnostics.jl | 7 ++ JuliaSyntax/test/parser.jl | 21 +++++ JuliaSyntax/test/test_utils.jl | 8 ++ JuliaSyntax/test/tokenize.jl | 136 ++++++++++++++++++++++++++++++-- 8 files changed, 247 insertions(+), 49 deletions(-) diff --git a/JuliaSyntax/src/kinds.jl b/JuliaSyntax/src/kinds.jl index a99f3b1d0a5e1..70c359820007b 100644 --- a/JuliaSyntax/src/kinds.jl +++ b/JuliaSyntax/src/kinds.jl @@ -26,6 +26,7 @@ const _kind_names = "ErrorInvalidUTF8" "ErrorInvisibleChar" "ErrorUnknownCharacter" + "ErrorBidiFormatting" # Generic error "error" "END_ERRORS" @@ -1049,6 +1050,7 @@ const _nonunique_kind_names = Set([ K"ErrorInvalidUTF8" K"ErrorInvisibleChar" K"ErrorUnknownCharacter" + K"ErrorBidiFormatting" K"ErrorInvalidOperator" K"Integer" @@ -1098,6 +1100,7 @@ const _token_error_descriptions = Dict{Kind, String}( K"ErrorInvalidUTF8"=>"invalid UTF-8 character", K"ErrorInvisibleChar"=>"invisible character", K"ErrorUnknownCharacter"=>"unknown unicode character", + K"ErrorBidiFormatting"=>"unbalanced bidirectional unicode formatting", K"ErrorInvalidOperator" => "invalid operator", K"Error**" => "use `x^y` instead of `x**y` for exponentiation, and `x...` instead of `**x` for splatting", K"error" => "unknown error token", diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index e8ebbfd50d330..94feba0340efe 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -949,6 +949,8 @@ function validate_tokens(stream::ParseStream) # Emit messages for non-generic token errors msg = if k in KSet"ErrorInvalidUTF8 ErrorInvisibleChar ErrorUnknownCharacter" "$(_token_error_descriptions[k]) $(repr(text[fbyte]))" + elseif k == K"ErrorBidiFormatting" + "$(_token_error_descriptions[k]) $(repr(text[fbyte:prevind(text, nbyte)]))" else _token_error_descriptions[k] end diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index d388a49fdcc0b..e2691e73fbe60 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -3282,6 +3282,9 @@ function parse_string(ps::ParseState, raw::Bool) first_chunk = false n_valid_chunks += 1 end + elseif k == K"ErrorInvalidInterpolationTerminator" || k == K"ErrorBidiFormatting" + # Treat these errors as string chunks + bump(ps) else break end @@ -3381,6 +3384,8 @@ function parse_atom(ps::ParseState, check_identifiers=true) else if k == K"Char" bump(ps) + elseif is_error(k) + bump(ps) else # FIXME: This case is actually a tokenization error. # Make a best-effort attempt to workaround this for now by diff --git a/JuliaSyntax/src/tokenize.jl b/JuliaSyntax/src/tokenize.jl index 6f37e85ed60fa..86e21d319220c 100644 --- a/JuliaSyntax/src/tokenize.jl +++ b/JuliaSyntax/src/tokenize.jl @@ -2,7 +2,7 @@ module Tokenize export tokenize, untokenize, Tokens -using ..JuliaSyntax: JuliaSyntax, Kind, @K_str +using ..JuliaSyntax: JuliaSyntax, Kind, @K_str, @KSet_str import ..JuliaSyntax: kind, is_literal, is_error, is_contextual_keyword, is_word_operator @@ -382,9 +382,6 @@ end Returns the next character and increments the current position. """ -function readchar end - - function readchar(l::Lexer) c = readchar(l.io) l.chars = (l.chars[2], l.chars[3], l.chars[4], c) @@ -446,17 +443,6 @@ function emit(l::Lexer, kind::Kind, maybe_op=true) return tok end -""" - emit_error(l::Lexer, err::Kind) - -Returns an `K"error"` token with error `err` and starts a new `RawToken`. -""" -function emit_error(l::Lexer, err::Kind) - @assert is_error(err) - return emit(l, err) -end - - """ next_token(l::Lexer) @@ -551,13 +537,33 @@ function _next_token(l::Lexer, c) elseif (k = get(_unicode_ops, c, K"error")) != K"error" return emit(l, k) else - emit_error(l, + emit(l, !isvalid(c) ? K"ErrorInvalidUTF8" : is_invisible_char(c) ? K"ErrorInvisibleChar" : K"ErrorUnknownCharacter") end end +# UAX #9: Unicode Bidirectional Algorithm +# https://unicode.org/reports/tr9/ +# Very partial implementation - just enough to check correct nesting in strings +# and multiline comments. +function update_bidi_state((embedding_nesting, isolate_nesting), c) + if c == '\n' + embedding_nesting = 0 + isolate_nesting = 0 + elseif c == '\U202A' || c == '\U202B' || c == '\U202D' || c == '\U202E' # LRE RLE LRO RLO + embedding_nesting += 1 + elseif c == '\U202C' # PDF + embedding_nesting -= 1 + elseif c == '\U2066' || c == '\U2067' || c == '\U2068' # LRI RLI FSI + isolate_nesting += 1 + elseif c == '\U2069' # PDI + isolate_nesting -= 1 + end + return (embedding_nesting, isolate_nesting) +end + # We're inside a string; possibly reading the string characters, or maybe in # Julia code within an interpolation. function lex_string_chunk(l) @@ -565,6 +571,9 @@ function lex_string_chunk(l) if state.paren_depth > 0 # Read normal Julia code inside an interpolation but track nesting of # parentheses. + # TODO: This stateful tracking should probably, somehow, be done by the + # parser instead? Especially for recovery of unbalanced parens inside + # interpolations? c = readchar(l) if c == '(' l.string_states[end] = StringState(state.triplestr, state.raw, state.delim, @@ -598,7 +607,7 @@ function lex_string_chunk(l) # Only allow certain characters after interpolated vars # https://github.com/JuliaLang/julia/pull/25234 readchar(l) - return emit_error(l, K"ErrorInvalidInterpolationTerminator") + return emit(l, K"ErrorInvalidInterpolationTerminator") end if pc == EOF_CHAR return emit(l, K"EndMarker") @@ -637,6 +646,8 @@ function lex_string_chunk(l) end end # Read a chunk of string characters + init_bidi_state = (0,0) + bidi_state = init_bidi_state if state.raw # Raw strings treat all characters as literals with the exception that # the closing quotes can be escaped with an odd number of \ characters. @@ -647,7 +658,10 @@ function lex_string_chunk(l) elseif state.triplestr && (pc == '\n' || pc == '\r') # triple quoted newline splitting readchar(l) - if pc == '\r' && peekchar(l) == '\n' + if pc == '\n' + bidi_state = init_bidi_state + elseif pc == '\r' && peekchar(l) == '\n' + bidi_state = init_bidi_state readchar(l) end break @@ -663,6 +677,7 @@ function lex_string_chunk(l) readchar(l) end end + bidi_state = update_bidi_state(bidi_state, c) end else while true @@ -672,16 +687,22 @@ function lex_string_chunk(l) elseif state.triplestr && (pc == '\n' || pc == '\r') # triple quoted newline splitting readchar(l) - if pc == '\r' && peekchar(l) == '\n' + if pc == '\n' + bidi_state = init_bidi_state + elseif pc == '\r' && peekchar(l) == '\n' readchar(l) + bidi_state = init_bidi_state end break elseif pc == state.delim && string_terminates(l, state.delim, state.triplestr) break elseif pc == '\\' # Escaped newline - pc2 = dpeekchar(l)[2] + _, pc2, pc3 = peekchar3(l) if pc2 == '\r' || pc2 == '\n' + if pc2 == '\n' || pc3 == '\n' + bidi_state = init_bidi_state + end break end end @@ -689,12 +710,16 @@ function lex_string_chunk(l) if c == '\\' c = readchar(l) c == EOF_CHAR && break - continue end + bidi_state = update_bidi_state(bidi_state, c) end end - return emit(l, state.delim == '"' ? K"String" : - state.delim == '`' ? K"CmdString" : K"Char") + outk = state.delim == '\'' ? K"Char" : + bidi_state != init_bidi_state ? K"ErrorBidiFormatting" : + state.delim == '"' ? K"String" : + state.delim == '`' ? K"CmdString" : + (@assert(state.delim in KSet"' \" `"); K"error") + return emit(l, outk) end # Lex whitespace, a whitespace char `c` has been consumed @@ -725,13 +750,16 @@ function lex_comment(l::Lexer) end else c = readchar(l) # consume the '=' + init_bidi_state = (0,0) + bidi_state = init_bidi_state skip = true # true => c was part of the prev comment marker pair nesting = 1 while true if c == EOF_CHAR - return emit_error(l, K"ErrorEofMultiComment") + return emit(l, K"ErrorEofMultiComment") end nc = readchar(l) + bidi_state = update_bidi_state(bidi_state, nc) if skip skip = false else @@ -742,7 +770,9 @@ function lex_comment(l::Lexer) nesting -= 1 skip = true if nesting == 0 - return emit(l, K"Comment") + outk = bidi_state == init_bidi_state ? + K"Comment" : K"ErrorBidiFormatting" + return emit(l, outk) end end end @@ -791,12 +821,12 @@ function lex_less(l::Lexer) elseif dpeekchar(l) == ('-', '-') readchar(l); readchar(l) if accept(l, '-') - return emit_error(l, K"ErrorInvalidOperator") + return emit(l, K"ErrorInvalidOperator") else if accept(l, '>') return emit(l, K"<-->") elseif accept(l, '-') - return emit_error(l, K"ErrorInvalidOperator") + return emit(l, K"ErrorInvalidOperator") else return emit(l, K"<--") end @@ -879,7 +909,7 @@ function lex_minus(l::Lexer) if accept(l, '>') return emit(l, K"-->") else - return emit_error(l, K"ErrorInvalidOperator") # "--" is an invalid operator + return emit(l, K"ErrorInvalidOperator") # "--" is an invalid operator end elseif !l.dotop && accept(l, '>') return emit(l, K"->") @@ -891,7 +921,7 @@ end function lex_star(l::Lexer) if accept(l, '*') - return emit_error(l, K"Error**") # "**" is an invalid operator use ^ + return emit(l, K"Error**") # "**" is an invalid operator use ^ elseif accept(l, '=') return emit(l, K"*=") end @@ -952,15 +982,15 @@ function lex_digit(l::Lexer, kind) elseif kind === K"Float" # If we enter the function with kind == K"Float" then a '.' has been parsed. readchar(l) - return emit_error(l, K"ErrorInvalidNumericConstant") + return emit(l, K"ErrorInvalidNumericConstant") elseif is_dottable_operator_start_char(ppc) readchar(l) - return emit_error(l, K"ErrorAmbiguousNumericConstant") # `1.+` + return emit(l, K"ErrorAmbiguousNumericConstant") # `1.+` end readchar(l) kind = K"Float" - accept(l, '_') && return emit_error(l, K"ErrorInvalidNumericConstant") # `1._` + accept(l, '_') && return emit(l, K"ErrorInvalidNumericConstant") # `1._` had_fraction_digs = accept_number(l, isdigit) pc, ppc = dpeekchar(l) if (pc == 'e' || pc == 'E' || pc == 'f') && (isdigit(ppc) || ppc == '+' || ppc == '-' || ppc == '−') @@ -971,18 +1001,18 @@ function lex_digit(l::Lexer, kind) pc,ppc = dpeekchar(l) if pc === '.' && !is_dottable_operator_start_char(ppc) readchar(l) - return emit_error(l, K"ErrorInvalidNumericConstant") # `1.e1.` + return emit(l, K"ErrorInvalidNumericConstant") # `1.e1.` end else - return emit_error(l, K"ErrorInvalidNumericConstant") # `1.e` + return emit(l, K"ErrorInvalidNumericConstant") # `1.e` end elseif pc == '.' && ppc != '.' && !is_dottable_operator_start_char(ppc) readchar(l) - return emit_error(l, K"ErrorInvalidNumericConstant") # `1.1.` + return emit(l, K"ErrorInvalidNumericConstant") # `1.1.` elseif !had_fraction_digs && (is_identifier_start_char(pc) || pc == '(' || pc == '[' || pc == '{' || pc == '@' || pc == '`' || pc == '"') - return emit_error(l, K"ErrorAmbiguousNumericDotMultiply") # `1.(` `1.x` + return emit(l, K"ErrorAmbiguousNumericDotMultiply") # `1.(` `1.x` end elseif (pc == 'e' || pc == 'E' || pc == 'f') && (isdigit(ppc) || ppc == '+' || ppc == '-' || ppc == '−') kind = pc == 'f' ? K"Float32" : K"Float" @@ -992,10 +1022,10 @@ function lex_digit(l::Lexer, kind) pc,ppc = dpeekchar(l) if pc === '.' && !is_dottable_operator_start_char(ppc) accept(l, '.') - return emit_error(l, K"ErrorInvalidNumericConstant") # `1e1.` + return emit(l, K"ErrorInvalidNumericConstant") # `1e1.` end else - return emit_error(l, K"ErrorInvalidNumericConstant") # `1e+` + return emit(l, K"ErrorInvalidNumericConstant") # `1e+` end elseif position(l) - startpos(l) == 1 && l.chars[1] == '0' kind == K"Integer" @@ -1015,10 +1045,10 @@ function lex_digit(l::Lexer, kind) kind = K"Float" accept(l, "+-−") if !accept_number(l, isdigit) || !had_digits - return emit_error(l, K"ErrorInvalidNumericConstant") # `0x1p` `0x.p0` + return emit(l, K"ErrorInvalidNumericConstant") # `0x1p` `0x.p0` end elseif isfloat - return emit_error(l, K"ErrorHexFloatMustContainP") # `0x.` `0x1.0` + return emit(l, K"ErrorHexFloatMustContainP") # `0x.` `0x1.0` end is_bin_oct_hex_int = !isfloat elseif pc == 'b' @@ -1038,7 +1068,7 @@ function lex_digit(l::Lexer, kind) accept_batch(l, c->isdigit(c) || is_identifier_start_char(c)) # `0x` `0xg` `0x_` `0x-` # `0b123` `0o78p` `0xenomorph` `0xaα` - return emit_error(l, K"ErrorInvalidNumericConstant") + return emit(l, K"ErrorInvalidNumericConstant") end end end @@ -1132,7 +1162,7 @@ function lex_dot(l::Lexer) else if is_dottable_operator_start_char(peekchar(l)) readchar(l) - return emit_error(l, K"ErrorInvalidOperator") + return emit(l, K"ErrorInvalidOperator") else return emit(l, K"..") end diff --git a/JuliaSyntax/test/diagnostics.jl b/JuliaSyntax/test/diagnostics.jl index 87e2abea79528..c2fd5c1f2b17f 100644 --- a/JuliaSyntax/test/diagnostics.jl +++ b/JuliaSyntax/test/diagnostics.jl @@ -19,6 +19,13 @@ end Diagnostic(2, 1+sizeof(string(c)), :error, "invisible character $(repr(c))") end @test diagnostic(":⥻") == Diagnostic(2, 4, :error, "unknown unicode character '⥻'") + + @test diagnostic("\"X \u202a X\"") == Diagnostic(2, 8, :error, "unbalanced bidirectional unicode formatting \"X \\u202a X\"") + @test diagnostic("#= \u202a =#") == Diagnostic(1, 9, :error, "unbalanced bidirectional unicode formatting \"#= \\u202a =#\"") + @test diagnostic("\"X \u202a \$xx\u202c\"", allow_multiple=true) == [ + Diagnostic(2, 7, :error, "unbalanced bidirectional unicode formatting \"X \\u202a \"") + Diagnostic(11, 13, :error, "unbalanced bidirectional unicode formatting \"\\u202c\"") + ] end @testset "parser errors" begin diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 736201cfe3a09..88dff3ada8cb8 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -1004,3 +1004,24 @@ end @test parse_to_sexpr_str(JuliaSyntax.parse_eq, "a \u2212= b") == "(-= a b)" @test parse_to_sexpr_str(JuliaSyntax.parse_eq, "a .\u2212= b") == "(.-= a b)" end + +@testset "Unbalanced bidirectional unicode" begin + # https://trojansource.codes + @test_throws JuliaSyntax.ParseError parsestmt(GreenNode, """ + function checkUserAccess(u::User) + if u.accessLevel != "user\u202e \u2066# users are not allowed\u2069\u2066" + return true + end + return false + end + """) + + @test_throws JuliaSyntax.ParseError parsestmt(GreenNode, """ + function checkUserAccess(u::User) + #=\u202e \u2066if (u.isAdmin)\u2069 \u2066 begin admins only =# + return true + #= end admin only \u202e \u2066end\u2069 \u2066=# + return false + end + """) +end diff --git a/JuliaSyntax/test/test_utils.jl b/JuliaSyntax/test/test_utils.jl index 52b97b05998f8..de80ed1a96200 100644 --- a/JuliaSyntax/test/test_utils.jl +++ b/JuliaSyntax/test/test_utils.jl @@ -39,6 +39,14 @@ if VERSION < v"1.6" using JuliaSyntax: isnothing, only, peek end +function toks(str) + ts = [JuliaSyntax.Tokenize.untokenize(t, str)=>kind(t) + for t in JuliaSyntax.Tokenize.tokenize(str)] + @test ts[end] == (""=>K"EndMarker") + pop!(ts) + ts +end + function remove_macro_linenums!(ex) if Meta.isexpr(ex, :macrocall) ex.args[2] = nothing diff --git a/JuliaSyntax/test/tokenize.jl b/JuliaSyntax/test/tokenize.jl index baaa08d0835c7..4e5926e604a02 100644 --- a/JuliaSyntax/test/tokenize.jl +++ b/JuliaSyntax/test/tokenize.jl @@ -17,17 +17,12 @@ using JuliaSyntax.Tokenize: untokenize, RawToken +using ..Main: toks + tok(str, i = 1) = collect(tokenize(str))[i] strtok(str) = untokenize.(collect(tokenize(str)), str) -function toks(str) - ts = [untokenize(t, str)=>kind(t) for t in tokenize(str)] - @test ts[end] == (""=>K"EndMarker") - pop!(ts) - ts -end - function onlytok(str) ts = collect(tokenize(str)) (length(ts) == 2 && ts[2].kind == K"EndMarker") || @@ -993,6 +988,133 @@ end end end +@testset "unbalanced bidirectional unicode" begin + open_embedding = ['\U202A', '\U202B', '\U202D', '\U202E'] + close_embedding = '\U202C' + open_isolate = ['\U2066', '\U2067', '\U2068'] + close_isolate = '\U2069' + close_all = '\n' + + all_bidi_codes = [open_embedding; close_embedding; open_isolate; close_isolate] + + bidi_pairs = [Iterators.product(open_embedding, [close_embedding, close_all])..., + Iterators.product(open_isolate, [close_isolate, close_all])...] + + @testset "delimiter $kd" for (kd, chunk_kind) in [ + (K"\"", K"String"), + (K"\"\"\"", K"String"), + (K"`", K"CmdString"), + (K"```", K"CmdString") + ] + d = string(kd) + @testset "Single unbalanced codes" begin + for c in all_bidi_codes + @test toks("$d$c$d") == + [d=>kd, "$c"=>K"ErrorBidiFormatting", d=>kd] + @test toks("pfx$d$c$d") == + ["pfx"=>K"Identifier", d=>kd, "$c"=>K"ErrorBidiFormatting", d=>kd] + end + end + @testset "Balanced pairs" begin + for (openc, closec) in bidi_pairs + str = "$(openc)##$(closec)" + @test toks("$d$str$d") == + [d=>kd, str=>chunk_kind, d=>kd] + @test toks("pfx$d$str$d") == + ["pfx"=>K"Identifier", d=>kd, str=>chunk_kind, d=>kd] + end + end + end + + @testset "multi line comments" begin + @testset "Single unbalanced codes" begin + for c in all_bidi_codes + comment = "#=$c=#" + @test toks(comment) == [comment=>K"ErrorBidiFormatting"] + end + end + @testset "Balanced pairs" begin + for (openc, closec) in bidi_pairs + str = "#=$(openc)zz$(closec)=#" + @test toks(str) == [str=>K"Comment"] + end + end + end + + @testset "extended balanced/unbalanced bidi state" begin + @testset "delimiter $kd" for (kd, chunk_kind) in [ + (K"\"", K"String"), + (K"\"\"\"", K"String"), + (K"`", K"CmdString"), + (K"```", K"CmdString") + ] + d = string(kd) + for balanced in [# Balanced pairs + "\u202a\u202bzz\u202c\u202c" + "\u2066\u2067zz\u2069\u2069" + # Newline is complete bidi state reset + "\u202a\u2067zz\n" + "\u202a\u202azz\n" + # \r\n and \n terminate a line + "\u202azz\r\n" + ] + @test toks("$d$balanced$d") == [ + d=>kd + balanced=>chunk_kind + d=>kd + ] + end + for unbalanced in ["\u202azz\u202c\u202c" + "\u202a\u202bzz\u202c" + # \r does not terminate a bidi line + "\u202azz\r" + ] + @test toks("$d$unbalanced$d") == [ + d=>kd + unbalanced=>K"ErrorBidiFormatting" + d=>kd + ] + end + end + end + + # Interpolations reset bidi state + @test toks("\"\u202a\$zz\n\"") == [ + "\""=>K"\"" + "\u202a"=>K"ErrorBidiFormatting" + "\$"=>K"$" + "zz"=>K"Identifier" + "\n"=>K"String" + "\""=>K"\"" + ] + @testset "newline escaping" begin + @test toks("\"a\u202a\\\n\"") == [ + "\""=>K"\"" + "a\u202a"=>K"String" + "\\\n"=>K"Whitespace" + "\""=>K"\"" + ] + @test toks("\"a\u202a\\\r\n\"") == [ + "\""=>K"\"" + "a\u202a"=>K"String" + "\\\r\n"=>K"Whitespace" + "\""=>K"\"" + ] + @test toks("\"a\u202a\\\r\"") == [ + "\""=>K"\"" + "a\u202a"=>K"ErrorBidiFormatting" + "\\\r"=>K"Whitespace" + "\""=>K"\"" + ] + end + + @testset "delimiter '" begin + for c in all_bidi_codes + @test toks("'$c'") == ["'"=>K"'", "$c"=>K"Char", "'"=>K"'"] + end + end +end + @testset "dotop miscellanea" begin @test strtok("a .-> b") == ["a", " ", ".-", ">", " ", "b", ""] @test strtok(".>: b") == [".>:", " ", "b", ""] From 293f1efdcac6d6f4719314f14ef43f80565a5d58 Mon Sep 17 00:00:00 2001 From: c42f Date: Thu, 18 May 2023 20:31:28 +1000 Subject: [PATCH 0668/1109] bump version to 0.4.1 --- JuliaSyntax/Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaSyntax/Project.toml b/JuliaSyntax/Project.toml index 6493f1bea67f1..462192cc5455b 100644 --- a/JuliaSyntax/Project.toml +++ b/JuliaSyntax/Project.toml @@ -1,7 +1,7 @@ name = "JuliaSyntax" uuid = "70703baa-626e-46a2-a12c-08ffd08c73b4" authors = ["c42f and contributors"] -version = "0.4" +version = "0.4.1" [compat] julia = "1.0" From 933d05a2de2d1f39d06aa0147607bffaa3d6aac0 Mon Sep 17 00:00:00 2001 From: c42f Date: Tue, 23 May 2023 06:19:47 +1000 Subject: [PATCH 0669/1109] Make invalid UTF-8 an error in comments and strings (JuliaLang/JuliaSyntax.jl#289) This is the behavior of the reference parser. --- JuliaSyntax/src/kinds.jl | 2 +- JuliaSyntax/src/parse_stream.jl | 8 +++++--- JuliaSyntax/src/parser.jl | 15 +++++++++------ JuliaSyntax/src/tokenize.jl | 29 +++++++++++++++++++---------- JuliaSyntax/test/diagnostics.jl | 3 ++- JuliaSyntax/test/parser.jl | 3 +++ JuliaSyntax/test/tokenize.jl | 25 +++++++++++++++++++++++++ 7 files changed, 64 insertions(+), 21 deletions(-) diff --git a/JuliaSyntax/src/kinds.jl b/JuliaSyntax/src/kinds.jl index 70c359820007b..6dcc35576da8d 100644 --- a/JuliaSyntax/src/kinds.jl +++ b/JuliaSyntax/src/kinds.jl @@ -1097,7 +1097,7 @@ const _token_error_descriptions = Dict{Kind, String}( K"ErrorNumericOverflow"=>"overflow in numeric literal", K"ErrorInvalidEscapeSequence"=>"invalid string escape sequence", K"ErrorOverLongCharacter"=>"character literal contains multiple characters", - K"ErrorInvalidUTF8"=>"invalid UTF-8 character", + K"ErrorInvalidUTF8"=>"invalid UTF-8 sequence", K"ErrorInvisibleChar"=>"invisible character", K"ErrorUnknownCharacter"=>"unknown unicode character", K"ErrorBidiFormatting"=>"unbalanced bidirectional unicode formatting", diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index 94feba0340efe..c41cc3bfdde48 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -947,10 +947,12 @@ function validate_tokens(stream::ParseStream) end elseif is_error(k) && k != K"error" # Emit messages for non-generic token errors - msg = if k in KSet"ErrorInvalidUTF8 ErrorInvisibleChar ErrorUnknownCharacter" + # + textrange = fbyte:prevind(text, nbyte) + msg = if k in KSet"ErrorInvisibleChar ErrorUnknownCharacter" "$(_token_error_descriptions[k]) $(repr(text[fbyte]))" - elseif k == K"ErrorBidiFormatting" - "$(_token_error_descriptions[k]) $(repr(text[fbyte:prevind(text, nbyte)]))" + elseif k in KSet"ErrorInvalidUTF8 ErrorBidiFormatting" + "$(_token_error_descriptions[k]) $(repr(text[textrange]))" else _token_error_descriptions[k] end diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index e2691e73fbe60..72fbd1d3f4f60 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -3159,7 +3159,7 @@ function parse_string(ps::ParseState, raw::Bool) chunk_flags = raw ? RAW_STRING_FLAG : EMPTY_FLAGS bump(ps, TRIVIA_FLAG) first_chunk = true - n_valid_chunks = 0 + n_nontrivia_chunks = 0 removed_initial_newline = false had_interpolation = false prev_chunk_newline = false @@ -3193,7 +3193,7 @@ function parse_string(ps::ParseState, raw::Bool) error="identifier or parenthesized expression expected after \$ in string") end first_chunk = false - n_valid_chunks += 1 + n_nontrivia_chunks += 1 had_interpolation = true prev_chunk_newline = false elseif k == string_chunk_kind @@ -3280,11 +3280,14 @@ function parse_string(ps::ParseState, raw::Bool) end bump(ps, chunk_flags) first_chunk = false - n_valid_chunks += 1 + n_nontrivia_chunks += 1 end - elseif k == K"ErrorInvalidInterpolationTerminator" || k == K"ErrorBidiFormatting" + elseif k == K"ErrorInvalidInterpolationTerminator" || + k == K"ErrorBidiFormatting" || + k == K"ErrorInvalidUTF8" # Treat these errors as string chunks bump(ps) + n_nontrivia_chunks += 1 else break end @@ -3302,13 +3305,13 @@ function parse_string(ps::ParseState, raw::Bool) if rhs_empty # Empty chunks after dedent are removed # """\n \n """ ==> (string-s "\n") - n_valid_chunks -= 1 + n_nontrivia_chunks -= 1 end end end release_positions(ps.stream, indent_chunks) if had_end_delim - if n_valid_chunks == 0 + if n_nontrivia_chunks == 0 # Empty strings, or empty after triple quoted processing # "" ==> (string "") # """\n """ ==> (string-s "") diff --git a/JuliaSyntax/src/tokenize.jl b/JuliaSyntax/src/tokenize.jl index 86e21d319220c..cb31f746706ea 100644 --- a/JuliaSyntax/src/tokenize.jl +++ b/JuliaSyntax/src/tokenize.jl @@ -15,13 +15,13 @@ const EOF_CHAR = typemax(Char) function is_identifier_char(c::Char) c == EOF_CHAR && return false - Base.isvalid(c) || return false + isvalid(c) || return false return Base.is_id_char(c) end function is_identifier_start_char(c::Char) c == EOF_CHAR && return false - Base.isvalid(c) || return false + isvalid(c) || return false return Base.is_id_start_char(c) end @@ -43,7 +43,7 @@ end # Chars that we will never allow to be part of a valid non-operator identifier function is_never_id_char(ch::Char) - Base.isvalid(ch) || return true + isvalid(ch) || return true cat = Unicode.category_code(ch) c = UInt32(ch) return ( @@ -116,7 +116,7 @@ function _char_in_set_expr(varname, firstchars) end @eval function is_operator_start_char(c) - if c == EOF_CHAR || !Base.isvalid(c) + if c == EOF_CHAR || !isvalid(c) return false end u = UInt32(c) @@ -132,7 +132,7 @@ end @eval function isopsuffix(c::Char) c == EOF_CHAR && return false - Base.isvalid(c) || return false + isvalid(c) || return false u = UInt32(c) if (u < 0xa1 || u > 0x10ffff) return false @@ -226,7 +226,7 @@ end @inline ishex(c::Char) = isdigit(c) || ('a' <= c <= 'f') || ('A' <= c <= 'F') @inline isbinary(c::Char) = c == '0' || c == '1' @inline isoctal(c::Char) = '0' ≤ c ≤ '7' -@inline iswhitespace(c::Char) = (Base.isvalid(c) && Base.isspace(c)) || c === '\ufeff' +@inline iswhitespace(c::Char) = (isvalid(c) && Base.isspace(c)) || c === '\ufeff' struct StringState triplestr::Bool @@ -648,6 +648,7 @@ function lex_string_chunk(l) # Read a chunk of string characters init_bidi_state = (0,0) bidi_state = init_bidi_state + valid = true if state.raw # Raw strings treat all characters as literals with the exception that # the closing quotes can be escaped with an odd number of \ characters. @@ -678,6 +679,7 @@ function lex_string_chunk(l) end end bidi_state = update_bidi_state(bidi_state, c) + valid &= isvalid(c) end else while true @@ -712,9 +714,11 @@ function lex_string_chunk(l) c == EOF_CHAR && break end bidi_state = update_bidi_state(bidi_state, c) + valid &= isvalid(c) end end - outk = state.delim == '\'' ? K"Char" : + outk = !valid ? K"ErrorInvalidUTF8" : + state.delim == '\'' ? K"Char" : bidi_state != init_bidi_state ? K"ErrorBidiFormatting" : state.delim == '"' ? K"String" : state.delim == '`' ? K"CmdString" : @@ -741,11 +745,13 @@ end function lex_comment(l::Lexer) if peekchar(l) != '=' + valid = true while true pc = peekchar(l) if pc == '\n' || pc == EOF_CHAR - return emit(l, K"Comment") + return emit(l, valid ? K"Comment" : K"ErrorInvalidUTF8") end + valid &= isvalid(pc) readchar(l) end else @@ -754,12 +760,14 @@ function lex_comment(l::Lexer) bidi_state = init_bidi_state skip = true # true => c was part of the prev comment marker pair nesting = 1 + valid = true while true if c == EOF_CHAR return emit(l, K"ErrorEofMultiComment") end nc = readchar(l) bidi_state = update_bidi_state(bidi_state, nc) + valid &= isvalid(nc) if skip skip = false else @@ -770,8 +778,9 @@ function lex_comment(l::Lexer) nesting -= 1 skip = true if nesting == 0 - outk = bidi_state == init_bidi_state ? - K"Comment" : K"ErrorBidiFormatting" + outk = !valid ? K"ErrorInvalidUTF8" : + bidi_state != init_bidi_state ? K"ErrorBidiFormatting" : + K"Comment" return emit(l, outk) end end diff --git a/JuliaSyntax/test/diagnostics.jl b/JuliaSyntax/test/diagnostics.jl index c2fd5c1f2b17f..f97ddb7ce796d 100644 --- a/JuliaSyntax/test/diagnostics.jl +++ b/JuliaSyntax/test/diagnostics.jl @@ -12,7 +12,8 @@ function diagnostic(str; only_first=false, allow_multiple=false) end @testset "token errors" begin - @test diagnostic("a\xf5b") == Diagnostic(2, 2, :error, "invalid UTF-8 character '\\xf5'") + @test diagnostic("a\xf5b") == Diagnostic(2, 2, :error, "invalid UTF-8 sequence \"\\xf5\"") + @test diagnostic("# a\xf5b") == Diagnostic(1, 5, :error, "invalid UTF-8 sequence \"# a\\xf5b\"") for c in ['\u00ad', '\u200b', '\u200c', '\u200d', '\u200e', '\u200f', '\u2060', '\u2061'] @test diagnostic("a$(c)b") == diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 88dff3ada8cb8..b6a8d836e4cea 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -928,6 +928,9 @@ tests = [ "\"\\xqqq\"" => "(string (ErrorInvalidEscapeSequence))" "'\\xq'" => "(char (ErrorInvalidEscapeSequence))" "'ab'" => "(char (ErrorOverLongCharacter))" + "\"\xf5\"" => "(string (ErrorInvalidUTF8))" + "'\xf5'" => "(char (ErrorInvalidUTF8))" + "`\xf5`" => "(macrocall core_@cmd (cmdstring-r (ErrorInvalidUTF8)))" "10.0e1000'" => "(ErrorNumericOverflow)" "10.0f100'" => "(ErrorNumericOverflow)" ], diff --git a/JuliaSyntax/test/tokenize.jl b/JuliaSyntax/test/tokenize.jl index 4e5926e604a02..61bbbb95be178 100644 --- a/JuliaSyntax/test/tokenize.jl +++ b/JuliaSyntax/test/tokenize.jl @@ -222,6 +222,31 @@ end end +@testset "invalid UTF-8" begin + @test toks("#=\xf5b\n=#") == [ + "#=\xf5b\n=#" => K"ErrorInvalidUTF8", + ] + @test toks("#\xf5b\n") == [ + "#\xf5b" => K"ErrorInvalidUTF8", + "\n" => K"NewlineWs" + ] + @test toks("\"\xf5\"") == [ + "\"" => K"\"" + "\xf5" => K"ErrorInvalidUTF8" + "\"" => K"\"" + ] + @test toks("'\xf5'") == [ + "'" => K"'" + "\xf5" => K"ErrorInvalidUTF8" + "'" => K"'" + ] + @test toks("`\xf5`") == [ + "`" => K"`" + "\xf5" => K"ErrorInvalidUTF8" + "`" => K"`" + ] +end + @testset "primes" begin str = """ ImageMagick.save(fn, reinterpret(ARGB32, [0xf0884422]'')) From 5c41bb2188d41e52bf503e69671969e88c2a835d Mon Sep 17 00:00:00 2001 From: c42f Date: Wed, 24 May 2023 05:17:50 +1000 Subject: [PATCH 0670/1109] Clean up fixed world age mechanism to use closures (JuliaLang/JuliaSyntax.jl#290) Using a closure here to hold the fixed world age seems nicer than a global within the JuliaSyntax module. --- JuliaSyntax/src/hooks.jl | 54 ++++++++++++++++++++++----------------- JuliaSyntax/test/hooks.jl | 24 ++++++++--------- 2 files changed, 42 insertions(+), 36 deletions(-) diff --git a/JuliaSyntax/src/hooks.jl b/JuliaSyntax/src/hooks.jl index 2ca7219abbdeb..ba82e4637608e 100644 --- a/JuliaSyntax/src/hooks.jl +++ b/JuliaSyntax/src/hooks.jl @@ -76,10 +76,10 @@ function _incomplete_tag(n::SyntaxNode, codelen) end #------------------------------------------------------------------------------- -if isdefined(Core, :_setparser!) - const _set_core_parse_hook = Core._setparser! -else - function _set_core_parse_hook(parser) +function _set_core_parse_hook(parser) + @static if isdefined(Core, :_setparser!) + Core._setparser!(parser) + else # HACK! Fool the runtime into allowing us to set Core._parse, even during # incremental compilation. (Ideally we'd just arrange for Core._parse to be # set to the JuliaSyntax parser. But how do we signal that to the dumping @@ -100,28 +100,23 @@ else end end -# Use caller's world age. -const _latest_world = typemax(UInt) -const _parser_world_age = Ref{UInt}(_latest_world) -function core_parser_hook(code, filename, lineno, offset, options) - # NB: We need an inference barrier of one type or another here to prevent - # invalidations. The invokes provide this currently. - if _parser_world_age[] != _latest_world - Base.invoke_in_world(_parser_world_age[], _core_parser_hook, - code, filename, lineno, offset, options) +# Wrap the function `f` so that it's always invoked in the given `world_age` +# +# NB: We need an inference barrier of one type or another here to prevent +# invalidations. The invokes provide this currently. +function fix_world_age(f, world_age::UInt) + if world_age == typemax(UInt) + function invoke_latestworld(args...; kws...) + Base.invokelatest(f, args...; kws...) + end else - Base.invokelatest(_core_parser_hook, code, filename, lineno, offset, options) + function invoke_fixedworld(args...; kws...) + Base.invoke_in_world(world_age, f, args...; kws...) + end end end -# Core._parse gained a `lineno` argument in -# https://github.com/JuliaLang/julia/pull/43876 -# Prior to this, the following signature was needed: -function core_parser_hook(code, filename, offset, options) - core_parser_hook(code, filename, 1, offset, options) -end - function _has_nested_error(ex) if ex isa Expr if ex.head == :error @@ -137,7 +132,7 @@ end # Debug log file for dumping parsed code const _debug_log = Ref{Union{Nothing,IO}}(nothing) -function _core_parser_hook(code, filename::String, lineno::Int, offset::Int, options::Symbol) +function core_parser_hook(code, filename::String, lineno::Int, offset::Int, options::Symbol) try # TODO: Check that we do all this input wrangling without copying the # code buffer @@ -266,6 +261,13 @@ function _core_parser_hook(code, filename::String, lineno::Int, offset::Int, opt end end +# Core._parse gained a `lineno` argument in +# https://github.com/JuliaLang/julia/pull/43876 +# Prior to this, the following signature was needed: +function core_parser_hook(code, filename, offset, options) + core_parser_hook(code, filename, 1, offset, options) +end + # Hack: # Meta.parse() attempts to construct a ParseError from a string if it receives # `Expr(:error)`. Add an override to the ParseError constructor to prevent this. @@ -293,14 +295,18 @@ function enable_in_core!(enable=true; freeze_world_age = true, if VERSION < v"1.6" error("Cannot use JuliaSyntax as the main Julia parser in Julia version $VERSION < 1.6") end - _parser_world_age[] = freeze_world_age ? Base.get_world_counter() : _latest_world if enable && !isnothing(debug_filename) _debug_log[] = open(debug_filename, "w") elseif !enable && !isnothing(_debug_log[]) close(_debug_log[]) _debug_log[] = nothing end - _set_core_parse_hook(enable ? core_parser_hook : _default_parser) + if enable + world_age = freeze_world_age ? Base.get_world_counter() : typemax(UInt) + _set_core_parse_hook(fix_world_age(core_parser_hook, world_age)) + else + _set_core_parse_hook(_default_parser) + end nothing end diff --git a/JuliaSyntax/test/hooks.jl b/JuliaSyntax/test/hooks.jl index fa2bbc0510fd8..35a8cedf79a3e 100644 --- a/JuliaSyntax/test/hooks.jl +++ b/JuliaSyntax/test/hooks.jl @@ -1,36 +1,36 @@ @testset "Hooks for Core integration" begin @testset "whitespace parsing" begin - @test JuliaSyntax._core_parser_hook("", "somefile", 1, 0, :statement) == Core.svec(nothing, 0) - @test JuliaSyntax._core_parser_hook("", "somefile", 1, 0, :statement) == Core.svec(nothing, 0) + @test JuliaSyntax.core_parser_hook("", "somefile", 1, 0, :statement) == Core.svec(nothing, 0) + @test JuliaSyntax.core_parser_hook("", "somefile", 1, 0, :statement) == Core.svec(nothing, 0) - @test JuliaSyntax._core_parser_hook(" ", "somefile", 1, 2, :statement) == Core.svec(nothing,2) - @test JuliaSyntax._core_parser_hook(" #==# ", "somefile", 1, 6, :statement) == Core.svec(nothing,6) + @test JuliaSyntax.core_parser_hook(" ", "somefile", 1, 2, :statement) == Core.svec(nothing,2) + @test JuliaSyntax.core_parser_hook(" #==# ", "somefile", 1, 6, :statement) == Core.svec(nothing,6) - @test JuliaSyntax._core_parser_hook(" x \n", "somefile", 1, 0, :statement) == Core.svec(:x,4) - @test JuliaSyntax._core_parser_hook(" x \n", "somefile", 1, 0, :atom) == Core.svec(:x,2) + @test JuliaSyntax.core_parser_hook(" x \n", "somefile", 1, 0, :statement) == Core.svec(:x,4) + @test JuliaSyntax.core_parser_hook(" x \n", "somefile", 1, 0, :atom) == Core.svec(:x,2) end @testset "filename and lineno" begin - ex = JuliaSyntax._core_parser_hook("@a", "somefile", 1, 0, :statement)[1] + ex = JuliaSyntax.core_parser_hook("@a", "somefile", 1, 0, :statement)[1] @test Meta.isexpr(ex, :macrocall) @test ex.args[2] == LineNumberNode(1, "somefile") - ex = JuliaSyntax._core_parser_hook("@a", "otherfile", 2, 0, :statement)[1] + ex = JuliaSyntax.core_parser_hook("@a", "otherfile", 2, 0, :statement)[1] @test ex.args[2] == LineNumberNode(2, "otherfile") # Errors also propagate file & lineno - err = JuliaSyntax._core_parser_hook("[x)", "f1", 1, 0, :statement)[1].args[1] + err = JuliaSyntax.core_parser_hook("[x)", "f1", 1, 0, :statement)[1].args[1] @test err isa JuliaSyntax.ParseError @test err.source.filename == "f1" @test err.source.first_line == 1 - err = JuliaSyntax._core_parser_hook("[x)", "f2", 2, 0, :statement)[1].args[1] + err = JuliaSyntax.core_parser_hook("[x)", "f2", 2, 0, :statement)[1].args[1] @test err isa JuliaSyntax.ParseError @test err.source.filename == "f2" @test err.source.first_line == 2 end @testset "toplevel errors" begin - ex = JuliaSyntax._core_parser_hook("a\nb\n[x,\ny)", "somefile", 1, 0, :all)[1] + ex = JuliaSyntax.core_parser_hook("a\nb\n[x,\ny)", "somefile", 1, 0, :all)[1] @test ex.head == :toplevel @test ex.args[1:5] == [ LineNumberNode(1, "somefile"), @@ -129,6 +129,6 @@ JuliaSyntax.enable_in_core!(false) # Should not throw - @test JuliaSyntax._core_parser_hook("+=", "somefile", 1, 0, :statement)[1] isa Expr + @test JuliaSyntax.core_parser_hook("+=", "somefile", 1, 0, :statement)[1] isa Expr end end From e1943362ecf2ba2a16d9eca0cd2f5706bc1edeef Mon Sep 17 00:00:00 2001 From: c42f Date: Wed, 24 May 2023 05:18:01 +1000 Subject: [PATCH 0671/1109] Additional syntax tests from Base test/syntax.jl (JuliaLang/JuliaSyntax.jl#291) --- JuliaSyntax/test/benchmark.jl | 3 +- JuliaSyntax/test/diagnostics.jl | 72 +++++++++++++++++++++++++++++++++ JuliaSyntax/test/hooks.jl | 3 ++ 3 files changed, 77 insertions(+), 1 deletion(-) diff --git a/JuliaSyntax/test/benchmark.jl b/JuliaSyntax/test/benchmark.jl index 1e5183061b8fe..0a4e260e485e9 100644 --- a/JuliaSyntax/test/benchmark.jl +++ b/JuliaSyntax/test/benchmark.jl @@ -19,8 +19,9 @@ b_ParseStream = @benchmark JuliaSyntax.parse!(JuliaSyntax.ParseStream(all_base_c b_GreenNode = @benchmark JuliaSyntax.parseall(JuliaSyntax.GreenNode, all_base_code, ignore_warnings=true) b_SyntaxNode = @benchmark JuliaSyntax.parseall(JuliaSyntax.SyntaxNode, all_base_code, ignore_warnings=true) b_Expr = @benchmark JuliaSyntax.parseall(Expr, all_base_code, ignore_warnings=true) +b_flisp = @benchmark JuliaSyntax.fl_parseall(all_base_code) -@info "Benchmarks" ParseStream=b_ParseStream GreenNode=b_GreenNode SyntaxNode=b_SyntaxNode Expr=b_Expr +@info "Benchmarks" ParseStream=b_ParseStream GreenNode=b_GreenNode SyntaxNode=b_SyntaxNode Expr=b_Expr flisp=b_flisp # Allocation profiling diff --git a/JuliaSyntax/test/diagnostics.jl b/JuliaSyntax/test/diagnostics.jl index f97ddb7ce796d..87180d0dddca2 100644 --- a/JuliaSyntax/test/diagnostics.jl +++ b/JuliaSyntax/test/diagnostics.jl @@ -27,11 +27,19 @@ end Diagnostic(2, 7, :error, "unbalanced bidirectional unicode formatting \"X \\u202a \"") Diagnostic(11, 13, :error, "unbalanced bidirectional unicode formatting \"\\u202c\"") ] + + @test diagnostic("0x") == Diagnostic(1, 2, :error, "invalid numeric constant") + @test diagnostic("0x0.1") == Diagnostic(1, 5, :error, "hex float literal must contain `p` or `P`") end @testset "parser errors" begin @test diagnostic("+ #==# (a,b)") == Diagnostic(2, 7, :error, "whitespace not allowed between prefix function call and argument list") + @test diagnostic("1 -+ (a=1, b=2)") == + Diagnostic(5, 5, :error, "whitespace not allowed between prefix function call and argument list") + @test diagnostic("\n+ (x, y)") == + Diagnostic(3, 3, :error, "whitespace not allowed between prefix function call and argument list") + @test diagnostic("A.@B.x", only_first=true) == Diagnostic(3, 4, :error, "`@` must appear on first or last macro name component") @test diagnostic("@M.(x)") == @@ -45,6 +53,49 @@ end @test diagnostic("a, , b") == Diagnostic(4, 3, :error, "unexpected `,`") + + @test diagnostic("if\nfalse\nend") == + Diagnostic(3, 3, :error, "missing condition in `if`") + @test diagnostic("if false\nelseif\nend") == + Diagnostic(16, 16, :error, "missing condition in `elseif`") + + @test diagnostic("f(x::V) where {V) = x", allow_multiple=true) == [ + Diagnostic(17, 16, :error, "Expected `}`") + Diagnostic(17, 21, :error, "extra tokens after end of expression") + ] + @test diagnostic("[1)", allow_multiple=true) == [ + Diagnostic(3, 2, :error, "Expected `]`") + Diagnostic(3, 3, :error, "extra tokens after end of expression") + ] + + @test diagnostic("sin. (1)") == + Diagnostic(5, 5, :error, "whitespace is not allowed here") + @test diagnostic("x [i]") == + Diagnostic(2, 2, :error, "whitespace is not allowed here") + @test diagnostic("\nf() [i]") == + Diagnostic(5, 5, :error, "whitespace is not allowed here") + @test diagnostic("\nf() (i)") == + Diagnostic(5, 5, :error, "whitespace is not allowed here") + @test diagnostic("\nf() .i") == + Diagnostic(5, 5, :error, "whitespace is not allowed here") + @test diagnostic("\nf() {i}") == + Diagnostic(5, 5, :error, "whitespace is not allowed here") + @test diagnostic("\n@ m") == + Diagnostic(3, 3, :error, "whitespace is not allowed here") + @test diagnostic("\nusing a .b") == + Diagnostic(9, 9, :error, "whitespace is not allowed here") + + @test diagnostic("const x") == + Diagnostic(1, 7, :error, "expected assignment after `const`") + @test diagnostic("global const x") == + Diagnostic(1, 14, :error, "expected assignment after `const`") + + @test diagnostic("(for i=1; println())") == + Diagnostic(20, 19, :error, "Expected `end`") + @test diagnostic("(try i=1; println())", allow_multiple=true) == [ + Diagnostic(2, 19, :error, "try without catch or finally") + Diagnostic(20, 19, :error, "Expected `end`") + ] end @testset "parser warnings" begin @@ -102,6 +153,25 @@ end Diagnostic(2, 2, :error, "invalid escape sequence"), Diagnostic(3, 2, :error, "unterminated character literal") ] + # Various cases from Base + @test diagnostic("'\\xff\\xff\\xff\\xff'") == + Diagnostic(2, 17, :error, "character literal contains multiple characters") + @test diagnostic("'\\100\\42'") == + Diagnostic(2, 8, :error, "character literal contains multiple characters") + @test diagnostic("'\\xff\\xff\\xff\\xff\\xff'") == + Diagnostic(2, 21, :error, "character literal contains multiple characters") + @test diagnostic("'abcd'") == + Diagnostic(2, 5, :error, "character literal contains multiple characters") + @test diagnostic("'\\uff\\xff'") == + Diagnostic(2, 9, :error, "character literal contains multiple characters") + @test diagnostic("'\\xffa'") == + Diagnostic(2, 6, :error, "character literal contains multiple characters") + @test diagnostic("'\\uffffa'") == + Diagnostic(2, 8, :error, "character literal contains multiple characters") + @test diagnostic("'\\U00002014a'") == + Diagnostic(2, 12, :error, "character literal contains multiple characters") + @test diagnostic("'\\1000'") == + Diagnostic(2, 6, :error, "character literal contains multiple characters") # String @test diagnostic("x = \"abc\\xq\"") == @@ -118,6 +188,8 @@ end Diagnostic(9, 10, :error, "invalid escape sequence"), Diagnostic(12, 13, :error, "invalid escape sequence") ] + @test diagnostic("\"\$x෴ \"") == + Diagnostic(4, 6, :error, "interpolated variable ends with invalid character; use `\$(...)` instead") end @testset "diagnostic printing" begin diff --git a/JuliaSyntax/test/hooks.jl b/JuliaSyntax/test/hooks.jl index 35a8cedf79a3e..16c5860d6faf5 100644 --- a/JuliaSyntax/test/hooks.jl +++ b/JuliaSyntax/test/hooks.jl @@ -114,6 +114,9 @@ "1,\n" => :other "1, \n" => :other + # Reference parser fails to detect incomplete exprs in this case + "(x for y" => :other + # Syntax which may be an error but is not incomplete "" => :none ")" => :none From 519d8c24bb4507f3e28ff872b7866a3cba31332f Mon Sep 17 00:00:00 2001 From: c42f Date: Wed, 24 May 2023 05:43:21 +1000 Subject: [PATCH 0672/1109] Bump version to 0.4.2 --- JuliaSyntax/Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaSyntax/Project.toml b/JuliaSyntax/Project.toml index 462192cc5455b..4893d66d6a26f 100644 --- a/JuliaSyntax/Project.toml +++ b/JuliaSyntax/Project.toml @@ -1,7 +1,7 @@ name = "JuliaSyntax" uuid = "70703baa-626e-46a2-a12c-08ffd08c73b4" authors = ["c42f and contributors"] -version = "0.4.1" +version = "0.4.2" [compat] julia = "1.0" From bb3cf27da283561f99ea59ddc03b87ae94d2638f Mon Sep 17 00:00:00 2001 From: Filip Tronarp Date: Tue, 23 May 2023 23:40:01 +0200 Subject: [PATCH 0673/1109] fix typo in readme README.md ? --- JuliaSyntax/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index 34ece75dab2c2..dd7b915991bc7 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -33,7 +33,7 @@ all of Base and the standard libraries correctly and most of the General registry. There's still a few known incompatibilities in the Base tests. The tree data structures are usable but their APIs will evolve as we try out -various use cases. Converting to `Expr` is always be possible and will be +various use cases. Converting to `Expr` is always possible and will be stable if that helps for your use case. A talk from JuliaCon 2022 covered some aspects of this package. From 31e9cd2289d61b057dacf11da848889f164b91f7 Mon Sep 17 00:00:00 2001 From: c42f Date: Thu, 25 May 2023 08:30:46 +1000 Subject: [PATCH 0674/1109] Fix `showerror` to include backtraces (JuliaLang/JuliaSyntax.jl#293) Why this method was there, I don't know! --- JuliaSyntax/src/parser_api.jl | 5 ----- JuliaSyntax/test/parser_api.jl | 1 + 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/JuliaSyntax/src/parser_api.jl b/JuliaSyntax/src/parser_api.jl index c48fef6025899..865d354e96666 100644 --- a/JuliaSyntax/src/parser_api.jl +++ b/JuliaSyntax/src/parser_api.jl @@ -14,11 +14,6 @@ function ParseError(stream::ParseStream; kws...) ParseError(source, stream.diagnostics) end -function Base.showerror(io::IO, err::ParseError, bt; backtrace=false) - println(io, "ParseError:") - show_diagnostics(io, err.diagnostics, err.source) -end - function Base.showerror(io::IO, err::ParseError) println(io, "ParseError:") show_diagnostics(io, err.diagnostics, err.source) diff --git a/JuliaSyntax/test/parser_api.jl b/JuliaSyntax/test/parser_api.jl index bac33c10e216d..1a78000e2f66f 100644 --- a/JuliaSyntax/test/parser_api.jl +++ b/JuliaSyntax/test/parser_api.jl @@ -119,6 +119,7 @@ end # Error @ somefile.jl:1:8 a -- b -- c # └┘ ── invalid operator""" + @test occursin("Stacktrace:\n", sprint(showerror, exc, catch_backtrace())) file_url = JuliaSyntax._file_url("somefile.jl") @test sprint(showerror, exc, context=:color=>true) == """ ParseError: From a04463989d59e196061d98524abb413e7738ca57 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Thu, 25 May 2023 09:18:56 +1000 Subject: [PATCH 0675/1109] Throw Meta.ParseError from Meta.parse() in Julia 1.10 (JuliaLang/JuliaSyntax.jl#294) New hooks added in https://github.com/JuliaLang/julia/pull/46372 allow us to * Add `JuliaSyntax.ParseError` as the new detail field of `Meta.ParseError`, preserving the detailed error information and ability to overload `showerror`, without disrupting `Base` too much. * Return `Expr(:incomplete, Meta.ParseError(...))` for incomplete expressions while having `incomplete_tag` still work. --- JuliaSyntax/src/hooks.jl | 35 ++++++++++++++++++++++++----------- JuliaSyntax/src/parser_api.jl | 5 +++-- JuliaSyntax/src/precompile.jl | 5 +++++ JuliaSyntax/test/hooks.jl | 35 +++++++++++++++++++++++++++++++---- 4 files changed, 63 insertions(+), 17 deletions(-) diff --git a/JuliaSyntax/src/hooks.jl b/JuliaSyntax/src/hooks.jl index ba82e4637608e..307e7aa518c37 100644 --- a/JuliaSyntax/src/hooks.jl +++ b/JuliaSyntax/src/hooks.jl @@ -1,6 +1,9 @@ # This file provides an adaptor to match the API expected by the Julia runtime # code in the binding Core._parse +const _has_v1_6_hooks = VERSION >= v"1.6" +const _has_v1_10_hooks = isdefined(Core, :_setparser!) + # Find the first error in a SyntaxNode tree, returning the index of the error # within its parent and the node itself. function _first_error(t::SyntaxNode) @@ -77,7 +80,7 @@ end #------------------------------------------------------------------------------- function _set_core_parse_hook(parser) - @static if isdefined(Core, :_setparser!) + @static if _has_v1_10_hooks Core._setparser!(parser) else # HACK! Fool the runtime into allowing us to set Core._parse, even during @@ -176,8 +179,15 @@ function core_parser_hook(code, filename::String, lineno::Int, offset::Int, opti wrap_toplevel_as_kind=K"None", first_line=lineno, filename=filename) tag = _incomplete_tag(tree, lastindex(code)) - if tag !== :none - # Here we replicate the particular messages + if _has_v1_10_hooks + exc = ParseError(stream, filename=filename, first_line=lineno, + incomplete_tag=tag) + msg = sprint(showerror, exc) + error_ex = Expr(tag === :none ? :error : :incomplete, + Meta.ParseError(msg, exc)) + elseif tag !== :none + # Hack: For older Julia versions, replicate the messages which + # Base.incomplete_tag() will match msg = tag === :string ? "incomplete: invalid string syntax" : tag === :comment ? "incomplete: unterminated multi-line comment #= ... =#" : @@ -268,13 +278,16 @@ function core_parser_hook(code, filename, offset, options) core_parser_hook(code, filename, 1, offset, options) end -# Hack: -# Meta.parse() attempts to construct a ParseError from a string if it receives -# `Expr(:error)`. Add an override to the ParseError constructor to prevent this. -# FIXME: Improve this in Base somehow? -Base.Meta.ParseError(e::JuliaSyntax.ParseError) = e +if _has_v1_10_hooks + Base.incomplete_tag(e::JuliaSyntax.ParseError) = e.incomplete_tag +else + # Hack: Meta.parse() attempts to construct a ParseError from a string if it + # receives `Expr(:error)`. Add an override to the ParseError constructor to + # prevent this. + Base.Meta.ParseError(e::JuliaSyntax.ParseError) = e +end -const _default_parser = VERSION < v"1.6" ? nothing : Core._parse +const _default_parser = _has_v1_6_hooks ? Core._parse : nothing """ enable_in_core!([enable=true; freeze_world_age, debug_filename]) @@ -292,7 +305,7 @@ Keyword arguments: """ function enable_in_core!(enable=true; freeze_world_age = true, debug_filename = get(ENV, "JULIA_SYNTAX_DEBUG_FILE", nothing)) - if VERSION < v"1.6" + if !_has_v1_6_hooks error("Cannot use JuliaSyntax as the main Julia parser in Julia version $VERSION < 1.6") end if enable && !isnothing(debug_filename) @@ -318,7 +331,7 @@ end function _fl_parse_hook(code, filename, lineno, offset, options) @static if VERSION >= v"1.8.0-DEV.1370" # https://github.com/JuliaLang/julia/pull/43876 return Core.Compiler.fl_parse(code, filename, lineno, offset, options) - elseif VERSION >= v"1.6" + elseif _has_v1_6_hooks return Core.Compiler.fl_parse(code, filename, offset, options) else if options === :all diff --git a/JuliaSyntax/src/parser_api.jl b/JuliaSyntax/src/parser_api.jl index 865d354e96666..68d6baf69733c 100644 --- a/JuliaSyntax/src/parser_api.jl +++ b/JuliaSyntax/src/parser_api.jl @@ -7,11 +7,12 @@ struct ParseError <: Exception source::SourceFile diagnostics::Vector{Diagnostic} + incomplete_tag::Symbol # Used only for Base Expr(:incomplete) support end -function ParseError(stream::ParseStream; kws...) +function ParseError(stream::ParseStream; incomplete_tag=:none, kws...) source = SourceFile(sourcetext(stream); kws...) - ParseError(source, stream.diagnostics) + ParseError(source, stream.diagnostics, incomplete_tag) end function Base.showerror(io::IO, err::ParseError) diff --git a/JuliaSyntax/src/precompile.jl b/JuliaSyntax/src/precompile.jl index f6426532dad4c..6f8d010c88904 100644 --- a/JuliaSyntax/src/precompile.jl +++ b/JuliaSyntax/src/precompile.jl @@ -2,4 +2,9 @@ let filename = joinpath(@__DIR__, "literal_parsing.jl") text = read(filename, String) parseall(Expr, text) + if _has_v1_6_hooks + enable_in_core!() + Meta.parse("1 + 2") + enable_in_core!(false) + end end diff --git a/JuliaSyntax/test/hooks.jl b/JuliaSyntax/test/hooks.jl index 16c5860d6faf5..c999793c053a6 100644 --- a/JuliaSyntax/test/hooks.jl +++ b/JuliaSyntax/test/hooks.jl @@ -20,10 +20,22 @@ # Errors also propagate file & lineno err = JuliaSyntax.core_parser_hook("[x)", "f1", 1, 0, :statement)[1].args[1] + if JuliaSyntax._has_v1_10_hooks + @test err isa Meta.ParseError + err = err.detail + else + @test err isa JuliaSyntax.ParseError + end @test err isa JuliaSyntax.ParseError @test err.source.filename == "f1" @test err.source.first_line == 1 err = JuliaSyntax.core_parser_hook("[x)", "f2", 2, 0, :statement)[1].args[1] + if JuliaSyntax._has_v1_10_hooks + @test err isa Meta.ParseError + err = err.detail + else + @test err isa JuliaSyntax.ParseError + end @test err isa JuliaSyntax.ParseError @test err.source.filename == "f2" @test err.source.first_line == 2 @@ -55,16 +67,31 @@ @test Meta.parse(" x#==#", 1) == (:x, 7) @test Meta.parse(" #==# ", 1) == (nothing, 7) - # Check that Meta.parse throws the JuliaSyntax.ParseError rather than - # Meta.ParseError when Core integration is enabled. - @test_throws JuliaSyntax.ParseError Meta.parse("[x)") + # Check the exception type that Meta.parse throws + if JuliaSyntax._has_v1_10_hooks + @test_throws Meta.ParseError Meta.parse("[x)") + @test_throws Meta.ParseError eval(Meta.parse("[x)", raise=false)) + @test_throws Meta.ParseError eval(Meta.parse("(x")) # Expr(:incomplete) + else + @test_throws JuliaSyntax.ParseError Meta.parse("[x)") + end JuliaSyntax.enable_in_core!(false) end @testset "Expr(:incomplete)" begin JuliaSyntax.enable_in_core!() - @test Meta.isexpr(Meta.parse("[x"), :incomplete) + err = Meta.parse("\"") + @test Meta.isexpr(err, :incomplete) + if JuliaSyntax._has_v1_10_hooks + @test err.args[1] isa Meta.ParseError + exc = err.args[1] + @test exc.msg == "ParseError:\n# Error @ none:1:2\n\"\n#└ ── unterminated string literal" + @test exc.detail isa JuliaSyntax.ParseError + @test exc.detail.incomplete_tag === :string + else + @test err.args[1] isa String + end for (str, tag) in [ "\"" => :string From 19a5c49f1132e5831f697ba482ce3108f6442938 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Thu, 25 May 2023 09:19:58 +1000 Subject: [PATCH 0676/1109] Bump to 0.4.3 --- JuliaSyntax/Project.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/JuliaSyntax/Project.toml b/JuliaSyntax/Project.toml index 4893d66d6a26f..e199f0a859bfd 100644 --- a/JuliaSyntax/Project.toml +++ b/JuliaSyntax/Project.toml @@ -1,7 +1,7 @@ name = "JuliaSyntax" uuid = "70703baa-626e-46a2-a12c-08ffd08c73b4" -authors = ["c42f and contributors"] -version = "0.4.2" +authors = ["Claire Foster and contributors"] +version = "0.4.3" [compat] julia = "1.0" From 2f98fe4a4d035c085510ae3918488edb3d9a8c86 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Thu, 25 May 2023 09:21:27 +1000 Subject: [PATCH 0677/1109] Remove talk thumbnail MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit iick 😬 --- JuliaSyntax/README.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index dd7b915991bc7..70002a7be4b09 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -36,9 +36,7 @@ The tree data structures are usable but their APIs will evolve as we try out various use cases. Converting to `Expr` is always possible and will be stable if that helps for your use case. -A talk from JuliaCon 2022 covered some aspects of this package. - -[![Youtube video thumbnail](https://img.youtube.com/vi/CIiGng9Brrk/mqdefault.jpg)](https://youtu.be/CIiGng9Brrk) +A [talk from JuliaCon 2022](https://youtu.be/CIiGng9Brrk) covered some aspects of this package. # Examples From 06308a9343b6f5ac7674ae11b79b488eddd6383a Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Thu, 25 May 2023 17:49:30 +1000 Subject: [PATCH 0678/1109] Minor tweaks to remove method ambiguities and argument ambiguities (JuliaLang/JuliaSyntax.jl#295) These were harmless enough, but should satisfy the tests in Base. Also remove a display_error() overload because (a) it's not designed to be overridden, and (b) this override doesn't really have the intended effect anyway in the latest version where the REPL uses exception stacks more extensively. --- JuliaSyntax/src/parser_api.jl | 3 --- JuliaSyntax/src/syntax_tree.jl | 8 ++++++++ JuliaSyntax/src/tokenize.jl | 2 +- JuliaSyntax/test/utils.jl | 7 +++++++ 4 files changed, 16 insertions(+), 4 deletions(-) diff --git a/JuliaSyntax/src/parser_api.jl b/JuliaSyntax/src/parser_api.jl index 68d6baf69733c..ffc6538fd24f2 100644 --- a/JuliaSyntax/src/parser_api.jl +++ b/JuliaSyntax/src/parser_api.jl @@ -20,9 +20,6 @@ function Base.showerror(io::IO, err::ParseError) show_diagnostics(io, err.diagnostics, err.source) end -Base.display_error(io::IO, err::ParseError, bt) = Base.showerror(io, err, bt) - - """ parse!(stream::ParseStream; rule=:all) diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index 76c4e3d6e968e..e8ce3a503f1bd 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -7,6 +7,14 @@ mutable struct TreeNode{NodeData} # ? prevent others from using this with Node parent::Union{Nothing,TreeNode{NodeData}} children::Union{Nothing,Vector{TreeNode{NodeData}}} data::Union{Nothing,NodeData} + + # Use this constructor rather than the automatically generated one to pass + # Test.detect_unbound_args() test in Base. + function TreeNode{NodeData}(parent::Union{Nothing,TreeNode{NodeData}}, + children::Union{Nothing,Vector{TreeNode{NodeData}}}, + data::Union{Nothing,NodeData}) where {NodeData} + new{NodeData}(parent, children, data) + end end # Implement "pass-through" semantics for field access: access fields of `data` diff --git a/JuliaSyntax/src/tokenize.jl b/JuliaSyntax/src/tokenize.jl index cb31f746706ea..7f54a980bf342 100644 --- a/JuliaSyntax/src/tokenize.jl +++ b/JuliaSyntax/src/tokenize.jl @@ -1,6 +1,6 @@ module Tokenize -export tokenize, untokenize, Tokens +export tokenize, untokenize using ..JuliaSyntax: JuliaSyntax, Kind, @K_str, @KSet_str diff --git a/JuliaSyntax/test/utils.jl b/JuliaSyntax/test/utils.jl index 227077f6b2196..371da98c9e174 100644 --- a/JuliaSyntax/test/utils.jl +++ b/JuliaSyntax/test/utils.jl @@ -15,3 +15,10 @@ @test ps("XX", fgcolor=:red, bgcolor=:green, href="https://www.example.com") == "\e]8;;https://www.example.com\e\\\e[31m\e[42mXX\e[0;0m\e]8;;\e\\" end + +@testset "ambiguities" begin + if VERSION >= v"1.8" + @test detect_ambiguities(JuliaSyntax) == [] + @test detect_unbound_args(JuliaSyntax) == [] + end +end From 20520baced6f5689edb8305b88b1d40ec174ed4a Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Fri, 26 May 2023 18:18:42 +1000 Subject: [PATCH 0679/1109] Fix for short form function line numbers (JuliaLang/JuliaSyntax.jl#296) Ensure we add short form function line numbers to the Expr when there's a block on the right hand side. --- JuliaSyntax/src/expr.jl | 10 +++++++--- JuliaSyntax/test/expr.jl | 19 +++++++++++++++++++ 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/JuliaSyntax/src/expr.jl b/JuliaSyntax/src/expr.jl index ce33d3d6a2271..e39400d1fdd96 100644 --- a/JuliaSyntax/src/expr.jl +++ b/JuliaSyntax/src/expr.jl @@ -207,9 +207,13 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads, headsym = :if elseif k == K"=" && !is_decorated(head) a2 = args[2] - if is_eventually_call(args[1]) && !@isexpr(a2, :block) - # Add block for short form function locations - args[2] = Expr(:block, loc, a2) + if is_eventually_call(args[1]) + if @isexpr(a2, :block) + pushfirst!(a2.args, loc) + else + # Add block for short form function locations + args[2] = Expr(:block, loc, a2) + end end elseif k == K"macrocall" _reorder_parameters!(args, 2) diff --git a/JuliaSyntax/test/expr.jl b/JuliaSyntax/test/expr.jl index b390132b579a8..98ab04f7484ab 100644 --- a/JuliaSyntax/test/expr.jl +++ b/JuliaSyntax/test/expr.jl @@ -169,6 +169,25 @@ LineNumberNode(1), :xs)) + @test parsestmt("f() =\n(a;b)") == + Expr(:(=), + Expr(:call, :f), + Expr(:block, + LineNumberNode(1), + :a, + LineNumberNode(2), + :b)) + + @test parsestmt("f() =\nbegin\na\nb\nend") == + Expr(:(=), + Expr(:call, :f), + Expr(:block, + LineNumberNode(1), + LineNumberNode(3), + :a, + LineNumberNode(4), + :b)) + @test parsestmt("let f(x) =\ng(x)=1\nend") == Expr(:let, Expr(:(=), From 8a087d201ffd1945aa5a3ad5b20743ae2bc43157 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Mon, 29 May 2023 19:57:24 +1000 Subject: [PATCH 0680/1109] Move docs from readme to docs and add a doc build (JuliaLang/JuliaSyntax.jl#297) + extract docstrings from the public API and get them onto an API reference page. --- JuliaSyntax/.github/workflows/CI.yml | 30 +- JuliaSyntax/README.md | 1274 +------------------------- JuliaSyntax/docs/Manifest.toml | 95 +- JuliaSyntax/docs/make.jl | 10 +- JuliaSyntax/docs/src/api.md | 57 ++ JuliaSyntax/docs/src/design.md | 847 ++++++++++++++++- JuliaSyntax/docs/src/howto.md | 38 + JuliaSyntax/docs/src/index.md | 78 ++ JuliaSyntax/docs/src/reference.md | 307 ++++++- JuliaSyntax/src/green_tree.jl | 21 +- JuliaSyntax/src/hooks.jl | 2 +- JuliaSyntax/src/syntax_tree.jl | 7 + 12 files changed, 1425 insertions(+), 1341 deletions(-) create mode 100644 JuliaSyntax/docs/src/api.md create mode 100644 JuliaSyntax/docs/src/howto.md diff --git a/JuliaSyntax/.github/workflows/CI.yml b/JuliaSyntax/.github/workflows/CI.yml index 1f540e944e771..faa5a2eafa55d 100644 --- a/JuliaSyntax/.github/workflows/CI.yml +++ b/JuliaSyntax/.github/workflows/CI.yml @@ -95,18 +95,18 @@ jobs: ${{ runner.os }}- - uses: julia-actions/julia-buildpkg@v1 - run: julia sysimage/compile.jl -# docs: -# name: Documentation -# runs-on: ubuntu-latest -# steps: -# - uses: actions/checkout@v2 -# - uses: julia-actions/setup-julia@latest -# with: -# version: '1.6' -# - run: julia --project=docs -e ' -# using Pkg; -# Pkg.develop(PackageSpec(; path=pwd())); -# Pkg.instantiate();' -# - run: julia --project=docs docs/make.jl -# env: -# GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + docs: + name: Documentation + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: julia-actions/setup-julia@latest + with: + version: '1.9' + - run: julia --project=docs -e ' + using Pkg; + Pkg.develop(PackageSpec(; path=pwd())); + Pkg.instantiate();' + - run: julia --project=docs docs/make.jl + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index 70002a7be4b09..ad49ef6d14c12 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -3,1277 +3,21 @@ [![Build Status](https://github.com/c42f/JuliaSyntax.jl/workflows/CI/badge.svg)](https://github.com/c42f/JuliaSyntax.jl/actions) [![codecov.io](http://codecov.io/github/JuliaLang/JuliaSyntax.jl/coverage.svg?branch=main)](http://codecov.io/github/JuliaLang/JuliaSyntax.jl?branch=main) -A Julia frontend, written in Julia. +A Julia compiler frontend, written in Julia. -## Goals - -* Lossless parsing of Julia code with precise source mapping -* Production quality error recovery, reporting and unit testing -* Parser structure as similar as possible to Julia's flisp-based parser -* Speedy enough for interactive editing -* "Compilation as an API" to support all sorts of tooling -* Grow to encompass the rest of the compiler frontend: macro expansion, - desugaring and other lowering steps. -* Once mature, replace Julia's flisp-based reference frontend in `Core` - -### Design Opinions - -* Parser implementation should be independent from tree data structures. So - we have the `ParseStream` interface. -* Tree data structures should be *layered* to balance losslessness with - abstraction and generality. So we have `SyntaxNode` (an AST) layered on top - of `GreenNode` (a lossless parse tree). We might need other tree types later. -* Fancy parser generators still seem marginal for production compilers. We use - a boring but flexible recursive descent parser. +Read the [documentation](https://JuliaLang.github.io/JuliaSyntax.jl/dev) for +more information. ### Status JuliaSyntax.jl is highly compatible with the Julia reference parser: It parses -all of Base and the standard libraries correctly and most of the General -registry. There's still a few known incompatibilities in the Base tests. +all of Base, the standard libraries and General registry. Some minor difference +remain where we've decided to fix bugs or strange behaviors in the reference +parser. The tree data structures are usable but their APIs will evolve as we try out -various use cases. Converting to `Expr` is always possible and will be -stable if that helps for your use case. - -A [talk from JuliaCon 2022](https://youtu.be/CIiGng9Brrk) covered some aspects of this package. - -# Examples - -Here's what parsing of a small piece of code currently looks like in various -forms. We'll use the `JuliaSyntax.parse` function to demonstrate, there's also -`JuliaSyntax.parse!` offering more fine-grained control. - -First, a source-ordered AST with `SyntaxNode` (`call-i` in the dump here means -the `call` has the infix `-i` flag): - -```julia -julia> using JuliaSyntax - -julia> parsestmt(SyntaxNode, "(x + y)*z", filename="foo.jl") -line:col│ tree │ file_name - 1:1 │[call-i] │foo.jl - 1:1 │ [parens] - 1:2 │ [call-i] - 1:2 │ x - 1:4 │ + - 1:6 │ y - 1:8 │ * - 1:9 │ z -``` - -Internally this has a full representation of all syntax trivia (whitespace and -comments) as can be seen with the more raw ["green tree"](#raw-syntax-tree--green-tree) -representation with `GreenNode`. Here ranges on the left are byte ranges, and -`✔` flags nontrivia tokens. Note that the parentheses are trivia in the tree -representation, despite being important for parsing. - -```julia -julia> text = "(x + y)*z" - greentree = parsestmt(JuliaSyntax.GreenNode, text) - 1:9 │[call] - 1:7 │ [parens] - 1:1 │ ( - 2:6 │ [call] - 2:2 │ Identifier ✔ - 3:3 │ Whitespace - 4:4 │ + ✔ - 5:5 │ Whitespace - 6:6 │ Identifier ✔ - 7:7 │ ) - 8:8 │ * ✔ - 9:9 │ Identifier ✔ -``` - -`GreenNode` stores only byte ranges, but the token strings can be shown by -supplying the source text string: - -```julia -julia> show(stdout, MIME"text/plain"(), greentree, text) - 1:9 │[call] - 1:7 │ [parens] - 1:1 │ ( "(" - 2:6 │ [call] - 2:2 │ Identifier ✔ "x" - 3:3 │ Whitespace " " - 4:4 │ + ✔ "+" - 5:5 │ Whitespace " " - 6:6 │ Identifier ✔ "y" - 7:7 │ ) ")" - 8:8 │ * ✔ "*" - 9:9 │ Identifier ✔ "z" -``` - -Julia `Expr` can also be produced: - -```julia -julia> JuliaSyntax.parsestmt(Expr, "(x + y)*z") -:((x + y) * z) -``` - -# Using JuliaSyntax as the default parser - -To use JuliaSyntax as the default Julia parser to `include()` files, -parse code with `Meta.parse()`, etc, call - -``` -julia> JuliaSyntax.enable_in_core!() -``` - -This causes some startup latency, so to reduce that you can create a custom -system image by running the code in `./sysimage/compile.jl` as a Julia script -(or directly using the shell, on unix). Then use `julia -J $resulting_sysimage`. - -Using a custom sysimage has the advantage that package precompilation will also -go through the JuliaSyntax parser. - -### VSCode - -To use JuliaSyntax as the default parser for Julia within VSCode, add the -following to your `startup.jl` file: - -```julia -atreplinit() do repl - @eval begin - import JuliaSyntax - JuliaSyntax.enable_in_core!(true) - end -end -``` - -To reduce startup latency you can combine with a custom system as described in -the [Julia VScode docs](https://www.julia-vscode.org/docs/dev/userguide/compilesysimage/#Creating-a-sysimage-for-the-active-environment), -combined with the precompile execution file in [sysimage/precompile_exec.jl](sysimage/precompile_exec.jl). -For additional detail see the discussion in [issue #128](https://github.com/JuliaLang/JuliaSyntax.jl/issues/128). - -# Parser implementation - -Our goal is to losslessly represent the source text with a tree; this may be -called a "lossless syntax tree". (This is sometimes called a "concrete syntax -tree", but that term has also been used for the parse tree of the full formal -grammar for a language including any grammar hacks required to solve -ambiguities, etc. So we avoid this term.) - -`JuliaSyntax` uses a mostly recursive descent parser which closely -follows the high level structure of the flisp reference parser. This makes the -code familiar and reduces porting bugs. It also gives a lot of flexibility for -designing the diagnostics, tree data structures, compatibility with different -Julia versions, etc. I didn't choose a parser generator as they still seem -marginal for production compilers — for the parsing itself they don't seem -*greatly* more expressive and they can be less flexible for the important -"auxiliary" code which needs to be written in either case. - -### Lexing - -We use a version of [Tokenize.jl](https://github.com/JuliaLang/Tokenize.jl) -which has been modified to better match the needs of parsing: -* Newline-containing whitespace is emitted as a separate kind -* Tokens inside string interpolations are emitted separately from the string -* Strings delimiters are separate tokens and the actual string always has the - `String` kind -* Additional contextual keywords (`as`, `var`, `doc`) have been added and - moved to a subcategory of keywords. -* Nonterminal kinds were added (though these should probably be factored out again) -* Various bugs fixed and additions for newer Julia versions - -This copy of Tokenize lives in the `JuliaSyntax` source tree due to the volume -of changes required but once the churn settles down it would be good to figure -out how to un-fork the lexer in some way or other. - -### Parsing with ParseStream - -The main parser innovation is the `ParseStream` interface which provides a -stream-like I/O interface for writing the parser. The parser does not -depend on or produce any concrete tree data structure as part of the parsing -phase but the output spans can be post-processed into various tree data -structures as required. This is like the design of rust-analyzer though with a -simpler implementation. - -Parsing proceeds by recursive descent; - -* The parser consumes a flat list of lexed tokens as *input* using `peek()` to - examine tokens and `bump()` to consume them. -* The parser produces a flat list of text spans as *output* using `bump()` to - transfer tokens to the output and `position()`/`emit()` for nonterminal ranges. -* Diagnostics are emitted as separate text spans -* Whitespace and comments are automatically `bump()`ed and don't need to be - handled explicitly. The exception is syntactically relevant newlines in space - sensitive mode. -* Parser modes are passed down the call tree using `ParseState`. - -The output spans track the byte range, a syntax "kind" stored as an integer -tag, and some flags. The kind tag makes the spans a [sum -type](https://blog.waleedkhan.name/union-vs-sum-types/) but where the type is -tracked explicitly outside of Julia's type system. - -For lossless parsing the output spans must cover the entire input text. Using -`bump()`, `position()` and `emit()` in a natural way also ensures that: -* Spans are cleanly nested with children contained entirely within their parents -* Siblings spans are emitted in source order -* Parent spans are emitted after all their children. - -These properties make the output spans naturally isomorphic to a -["green tree"](#raw-syntax-tree--green-tree) -in the terminology of C#'s Roslyn compiler. - -### Tree construction - -The `build_tree` function performs a depth-first traversal of the `ParseStream` -output spans allowing it to be assembled into a concrete tree data structure, -for example using the `GreenNode` data type. We further build on top of this to -define `build_tree` for the AST type `SyntaxNode` and for normal Julia `Expr`. - -### Error recovery - -The goal of the parser is to produce well-formed hierarchical structure from -the source text. For interactive tools we need this to work even when the -source text contains errors; it's the job of the parser to include the recovery -heuristics to make this work. - -Concretely, the parser in `JuliaSyntax` should always produce a green tree -which is *well formed* in the sense that `GreenNode`s of a given `Kind` have -well-defined layout of children. This means the `GreenNode` to `SyntaxNode` -transformation is deterministic and tools can assume they're working with a -"mostly valid" AST. - -What does "mostly valid" mean? We allow the tree to contain the following types -of error nodes: - -* Missing tokens or nodes may be **added** as placeholders when they're needed - to complete a piece of syntax. For example, we could parse `a + (b *` as - `(call-i a + (call-i * b XXX))` where `XXX` is a placeholder error node. -* A sequence of unexpected tokens may be **removed** by collecting - them as children of an error node and treating them as syntax trivia during - AST construction. For example, `a + b end * c` could be parsed as the green - tree `(call-i a + b (error-t end * c))`, and turned into the AST `(call + a b)`. - -We want to encode both these cases in a way which is simplest for downstream -tools to use. This is an open question, but for now we use `K"error"` as the -kind, with the `TRIVIA_FLAG` set for unexpected syntax. - -# Syntax trees - -Julia's `Expr` abstract syntax tree can't store precise source locations or -deal with syntax trivia like whitespace or comments. So we need some new tree -types in `JuliaSyntax`. - -JuliaSyntax currently deals in three types of trees: -* `GreenNode` is a minimal *lossless syntax tree* where - - Nodes store a kind and length in bytes, but no text - - Syntax trivia are included in the list of children - - Children are strictly in source order -* `SyntaxNode` is an *abstract syntax tree* which has - - An absolute position and pointer to the source text - - Children strictly in source order - - Leaf nodes store values, not text - - Trivia are ignored, but there is a 1:1 mapping of non-trivia nodes to the - associated `GreenTree` nodes. -* `Expr` is used as a conversion target for compatibility - -## Julia AST structures - -In this section we describe some features of Julia's AST structures. - -### Concatenation syntax - -Concatenation syntax comes in two syntax forms: -* The traditional `hcat`/`vcat`/`row` which deal with concatenation or matrix - construction along dimensions one and two. -* The new `ncat`/`nrow` syntax which deals with concatenation or array - construction along arbitrary dimensions. - -We write `ncat-3` for concatenation along the third dimension. (The `3` is -stored in the head flags for `SyntaxNode` trees, and in the first `arg` for -`Expr` trees.) Semantically the new syntax can work like the old: -* `ncat-1` is the same as `vcat` -* `ncat-2` is the same as `hcat` -* `row` is the same as `nrow-2` - -#### Vertical concatenation (dimension 1) - -Vertical concatenation along dimension 1 can be done with semicolons or newlines - -```julia -julia> print_tree(:([a - b])) -Expr(:vcat) -├─ :a -└─ :b - -julia> print_tree(:([a ; b])) -Expr(:vcat) -├─ :a -└─ :b -``` - -#### Horizontal concatenation (dimension 2) - -For horizontal concatenation along dimension 2, use spaces or double semicolons - -```julia -julia> print_tree(:([a b])) -Expr(:hcat) -├─ :a -└─ :b - -julia> print_tree(:([a ;; b])) -Expr(:ncat) -├─ 2 -├─ :a -└─ :b -``` - -#### Mixed concatenation - -Concatenation along dimensions 1 and 2 can be done with spaces and single -semicolons or newlines, producing a mixture of `vcat` and `row` expressions: - -```julia -julia> print_tree(:([a b - c d])) -# OR -julia> print_tree(:([a b ; c d])) -Expr(:vcat) -├─ Expr(:row) -│ ├─ :a -│ └─ :b -└─ Expr(:row) - ├─ :c - └─ :d -``` - -General n-dimensional concatenation results in nested `ncat` and `nrow`, for -example - -```julia -julia> print_tree(:([a ; b ;; c ; d ;;; x])) -Expr(:ncat) -├─ 3 -├─ Expr(:nrow) -│ ├─ 2 -│ ├─ Expr(:nrow) -│ │ ├─ 1 -│ │ ├─ :a -│ │ └─ :b -│ └─ Expr(:nrow) -│ ├─ 1 -│ ├─ :c -│ └─ :d -└─ :x -``` - -## Tree differences between GreenNode and Expr - -The tree structure of `GreenNode`/`SyntaxNode` is similar to Julia's `Expr` -data structure but there are various differences: - -### Source ordered children - -The children of our trees are strictly in source order. This has many -consequences in places where `Expr` reorders child expressions. - -* Infix and postfix operator calls have the operator name in the *second* child position. `a + b` is parsed as `(call-i a + b)` - where the infix `-i` flag indicates infix child position - rather than `Expr(:call, :+, :a, :b)`. -* Generators are represented in source order as a single node rather than multiple nested flatten and generator expressions. - -### No `LineNumberNode`s - -Our syntax nodes inherently stores source position, so there's no need for the -`LineNumberNode`s used by `Expr`. - -### More consistent / less redundant `block`s - -Sometimes `Expr` needs redundant block constructs to store `LineNumberNode`s, -but we don't need these. Also in cases which do use blocks we try to use them -consistently. - -* No block is used on the right hand side of short form function syntax -* No block is used for the conditional in `elseif` -* No block is used for the body of anonymous functions after the `->` -* `let` argument lists always use a block regardless of number or form of bindings - -### Faithful representation of the source text / avoid premature lowering - -Some cases of "premature lowering" have been removed, preferring to represent -the source text more closely. - -* `K"macrocall"` - allow users to easily distinguish macrocalls with parentheses from those without them (#218) -* Grouping parentheses are represented with a node of kind `K"parens"` (#222) -* The right hand side of `x where {T}` retains the `K"braces"` node around the `T` to distinguish it from `x where T`. -* Ternary syntax is not immediately lowered to an `if` node: `a ? b : c` parses as `(? a b c)` rather than `Expr(:if, :a, :b, :c)` (#85) -* `global const` and `const global` are not normalized by the parser. This is done in `Expr` conversion (#130) -* The AST for `do` is flatter and not lowered to a lambda by the parser: `f(x) do y ; body end` is parsed as `(do (call f x) (tuple y) (block body))` (#98) -* `@.` is not lowered to `@__dot__` inside the parser (#146) -* Docstrings use the `K"doc"` kind, and are not lowered to `Core.@doc` until later (#217) -* Juxtaposition uses the `K"juxtapose"` kind rather than lowering immediately to `*` (#220) -* `return` without a value has zero children, rather than lowering to `return nothing` (#220) - -### Containers for string-like constructs - -String-like constructs always come within a container node, not as a single -token. These are useful for tooling which works with the tokens of the source -text. Also separating the delimiters from the text they delimit removes a whole -class of tokenization errors and lets the parser deal with them. - -* string always use `K"string"` to wrap strings, even when they only contain a single string chunk (#94) -* char literals are wrapped in the `K"char"` kind, containing the character literal string along with their delimiters (#121) -* backticks use the `K"cmdstring"` kind -* `var""` syntax uses `K"var"` as the head (#127) -* The parser splits triple quoted strings into string chunks interspersed with whitespace trivia - -### Improvements for AST inconsistencies - -* Dotted call syntax like `f.(a,b)` and `a .+ b` has been made consistent with the `K"dotcall"` head (#90) -* Standalone dotted operators are always parsed as `(. op)`. For example `.*(x,y)` is parsed as `(call (. *) x y)` (#240) -* The `K"="` kind is used for keyword syntax rather than `kw`, to avoid various inconsistencies and ambiguities (#103) -* Unadorned postfix adjoint is parsed as `call` rather than as a syntactic operator for consistency with suffixed versions like `x'ᵀ` (#124) - -### Improvements to awkward AST forms - -* Frakentuples with multiple parameter blocks like `(a=1, b=2; c=3; d=4)` are flattened into the parent tuple instead of using nested `K"parameters"` nodes (#133) -* Using `try catch else finally end` is parsed with `K"catch"` `K"else"` and `K"finally"` children to avoid the awkwardness of the optional child nodes in the `Expr` representation (#234) -* The dotted import path syntax as in `import A.b.c` is parsed with a `K"importpath"` kind rather than `K"."`, because a bare `A.b.c` has a very different nested/quoted expression representation (#244) -* We use flags rather than child nodes to represent the difference between `struct` and `mutable struct`, `module` and `baremodule` (#220) -* Multiple iterations within the header of a `for`, as in `for a=as, b=bs body end` are represented with a `cartesian_iterator` head rather than a `block`, as these lists of iterators are neither semantically nor syntactically a sequence of statements. Unlike other uses of `block` (see also generators). - - -## More detail on tree differences - -### Generators - -Flattened generators are uniquely problematic because the Julia AST doesn't -respect a key rule we normally expect: that the children of an AST node are a -*contiguous* range in the source text. For example, the `for`s in -`[xy for x in xs for y in ys]` are parsed in the normal order of a for loop to -mean - -``` -for x in xs -for y in ys - push!(xy, collection) -``` - -so the `xy` prefix is in the *body* of the innermost for loop. Following this, -the standard Julia AST is like so: - -``` -(flatten - (generator - (generator - xy - (= y ys)) - (= x xs))) -``` - -however, note that if this tree were flattened, the order would be -`(xy) (y in ys) (x in xs)` and the `x` and `y` iterations are *opposite* of the -source order. - -However, our green tree is strictly source-ordered, so we must deviate from the -Julia AST. We deal with this by grouping cartesian products of iterators -(separated by commas) within `cartesian_iterator` blocks as in `for` loops, and -use the presence of multiple iterator blocks rather than the `flatten` head to -distinguish flattened iterators. The nested flattens and generators of `Expr` -forms are reconstructed later. In this form the tree structure resembles the -source much more closely. For example, `(xy for x in xs for y in ys)` is parsed as - -``` -(generator - xy - (= x xs) - (= y ys)) -``` - -And the cartesian iteration `(xy for x in xs, y in ys)` is parsed as - -``` -(generator - xy - (cartesian_iterator - (= x xs) - (= y ys))) -``` - -### Whitespace trivia inside strings - -For triple quoted strings, the indentation isn't part of the string data so -should also be excluded from the string content within the green tree. That is, -it should be treated as separate whitespace trivia tokens. With this separation -things like formatting should be much easier. The same reasoning goes for -escaping newlines and following whitespace with backslashes in normal strings. - -Detecting string trivia during parsing means that string content is split over -several tokens. Here we wrap these in the K"string" kind (as is already used -for interpolations). The individual chunks can then be reassembled during Expr -construction. (A possible alternative might be to reuse the K"String" and -K"CmdString" kinds for groups of string chunks (without interpolation).) - -Take as an example the following Julia fragment. - -```julia -x = """ - $a - b""" -``` - -Here this is parsed as `(= x (string-s a "\n" "b"))` (the `-s` flag in -`string-s` means "triple quoted string") - -Looking at the green tree, we see the indentation before the `$a` and `b` are -marked as trivia: - -``` -julia> text = "x = \"\"\"\n \$a\n b\"\"\"" - show(stdout, MIME"text/plain"(), parseall(GreenNode, text, rule=:statement), text) - 1:23 │[=] - 1:1 │ Identifier ✔ "x" - 2:2 │ Whitespace " " - 3:3 │ = "=" - 4:4 │ Whitespace " " - 5:23 │ [string] - 5:7 │ """ "\"\"\"" - 8:8 │ String "\n" - 9:12 │ Whitespace " " - 13:13 │ $ "\$" - 14:14 │ Identifier ✔ "a" - 15:15 │ String ✔ "\n" - 16:19 │ Whitespace " " - 20:20 │ String ✔ "b" - 21:23 │ """ "\"\"\"" -``` - -### String nodes always wrapped in `K"string"` or `K"cmdstring"` - -All strings are surrounded by a node of kind `K"string"`, even non-interpolated -literals, so `"x"` parses as `(string "x")`. This makes string handling simpler -and more systematic because interpolations and triple strings with embedded -trivia don't need to be treated differently. It also gives a container in which -to attach the delimiting quotes. - -The same goes for command strings which are always wrapped in `K"cmdstring"` -regardless of whether they have multiple pieces (due to triple-quoted -dedenting) or otherwise. - -### No desugaring of the closure in do blocks - -The reference parser represents `do` syntax with a closure for the second -argument. That is, - -```julia -f(x) do y - body -end -``` - -becomes `(do (call f x) (-> (tuple y) (block body)))` in the reference parser. - -However, the nested closure with `->` head is implied here rather than present -in the surface syntax, which suggests this is a premature desugaring step. -Instead we emit the flatter structure `(do (call f x) (tuple y) (block body))`. - -## More about syntax kinds - -We generally track the type of syntax nodes with a syntax "kind", stored -explicitly in each node an integer tag. This effectively makes the node type a -[sum type](https://blog.waleedkhan.name/union-vs-sum-types/) in the type system -sense, but with the type tracked explicitly outside of Julia's type system. - -Managing the type explicitly brings a few benefits: -* Code and data structures for manipulating syntax nodes is always concretely - typed from the point of view of the compiler. -* We control the data layout and can pack the kind into very few bits along - with other flags bits, as desired. -* Predicates such as `is_operator` can be extremely efficient, given that we - know the meaning of the kind's bits. -* The kind can be applied to several different tree data structures, or - manipulated by itself. -* Pattern matching code is efficient when the full set of kinds is closed and - known during compilation. - -There's arguably a few downsides: -* Normal Julia dispatch can't express dispatch over syntax kind. Luckily, - a pattern matching macro can provide a very elegant way of expressing such - algorithms over a non-extensible set of kinds, so this is not a big problem. -* Different node kinds could come with different data fields, but a syntax - tree must have generic fields to cater for all kinds. (Consider as an analogy - the normal Julia AST `QuoteNode` with a single field vs `Expr` with generic - `head` and `args` fields.) This could be a disadvantage for code which - processes one specific kind but for generic code processing many kinds - having a generic but *concrete* data layout should be faster. - - -# Differences from the flisp parser - -_See also the [§ Comparisons to other packages](#comparisons-to-other-packages) section._ - -Practically the flisp parser is not quite a classic [recursive descent -parser](https://en.wikipedia.org/wiki/Recursive_descent_parser), because it -often looks back and modifies the output tree it has already produced. We've -tried to eliminate this pattern in favor of lookahead where possible because - -* It works poorly when the parser is emitting a stream of node spans with - strict source ordering constraints. -* It's confusing to reason about this kind of code - -However, on occasion it seems to solve genuine ambiguities where Julia code -can't be parsed top-down with finite lookahead. Eg for the `kw` vs `=` -ambiguity within parentheses. In these cases we put up with using the -functions `look_behind` and `reset_node!()`. - -## Code structure - -Large structural changes were generally avoided while porting. In particular, -nearly all function names for parsing productions are the same with `-` -replaced by `_` and predicates prefixed by `is_`. - -Some notable differences: - -* `parse-arglist` and a parts of `parse-paren-` have been combined into a - general function `parse_brackets`. This function deals with all the odd - corner cases of how the AST is emitted when mixing `,` and `;` within - parentheses. In particular regard to: - - Determining whether `;` are block syntax separators or keyword parameters - - Determining whether to emit `parameter` sections based on context - - Emitting key-value pairs either as `kw` or `=` depending on context -* The way that `parse-resword` is entered has been rearranged to avoid parsing - reserved words with `parse-atom` inside `parse-unary-prefix`. Instead, we - detect reserved words and enter `parse_resword` earlier. - -## Flisp parser bugs - -Here's some behaviors which seem to be bugs. (Some of these we replicate in the -name of compatibility, perhaps with a warning.) - -* Macro module paths allow calls which gives weird stateful semantics! - ``` - b() = rand() > 0.5 ? Base : Core - b().@info "hi" - ``` -* Misplaced `@` in macro module paths like `A.@B.x` is parsed as odd - broken-looking AST like `(macrocall (. A (quote (. B @x))))`. It should - probably be rejected. -* Operator prefix call syntax doesn't work in the cases like `+(a;b,c)` where - keyword parameters are separated by commas. A tuple is produced instead. -* `const` and `global` allow chained assignment, but the right hand side is not - constant. `a` const here but not `b`. - ``` - const a = b = 1 - ``` -* Parsing the `ncat` array concatenation syntax within braces gives - strange AST: `{a ;; b}` parses to `(bracescat 2 a b)` which is the same as - `{2 ; a ; b}`, but should probably be `(bracescat (nrow 2 a b))` in analogy - to how `{a b}` produces `(bracescat (row a b))`. -* `export a, \n $b` is rejected, but `export a, \n b` parses fine. -* In try-catch-finally, the `finally` clause is allowed before the `catch`, but - always executes afterward. (Presumably was this a mistake? It seems pretty awful!) -* When parsing `"[x \n\n ]"` the flisp parser gets confused, but `"[x \n ]"` is - correctly parsed as `Expr(:vect)` (maybe fixed in 1.7?) -* `f(x for x in in xs)` is accepted, and parsed very strangely. -* Octal escape sequences saturate rather than being reported as errors. Eg, - `"\777"` results in `"\xff"`. This is inconsistent with - `Base.parse(::Type{Int}, ...)` -* Leading dots in import paths with operator-named modules are parsed into - dotted operators rather than a relative path. Ie, we have `import .⋆` parsing - to `(import (. .⋆))` whereas it should be `(import (. . ⋆))` for consistency - with the parsing of `import .A`. -* Looking back on the output disregards grouping parentheses which can lead to - odd results in some cases. For example, `f(((((x=1)))))` parses as a keyword - call to function `f` with the keyword `x=1`, but arguably it should be an - assignment. -* Hexfloat literals can have a trailing `f` for example, `0x1p1f` - but this doesn't do anything. In the `flisp` C code such cases are treated as - Float32 literals and this was intentional https://github.com/JuliaLang/julia/pull/2925 - but this has never been officially supported in Julia. It seems this bug - arises from `(set! pred char-hex?)` in `parse-number` accepting hex exponent - digits, all of which are detected as invalid except for a trailing `f` when - processed by `isnumtok_base`. -* `begin` and `end` are not parsed as keywords when indexing. Typed comprehensions - initially look the same, but can be distinguished from indexing once we handle - a `for` token; it is safe to treat `begin` and `end` as keywords afterwards. The - reference parser *only* handles this well when there's a newline before `for`: - ```julia - Any[foo(i) - for i in x if begin - true - end - ] - ``` - works, while - ```julia - Any[foo(i) for i in x if begin - true - end - ] - ``` - does not. JuliaSyntax handles both cases. - -## Parsing / AST oddities and warts - -### Questionable allowed forms - -There's various allowed syntaxes which are fairly easily detected in the -parser, but which will be rejected later during lowering. To allow building -DSLs this is fine and good but some such allowed syntaxes don't seem very -useful, even for DSLs: - -* `macro (x) end` is allowed but there are no anonymous macros. -* `abstract type A < B end` and other subtype comparisons are allowed, but - only `A <: B` makes sense. -* `x where {S T}` produces `(where x (bracescat (row S T)))`. This seems pretty weird! -* `[x for outer x in xs]` parses, but `outer` makes no real sense in this - context (and using this form is a lowering error) - -### `kw` and `=` inconsistencies - -There's many apparent inconsistencies between how `kw` and `=` are used when -parsing `key=val` pairs inside parentheses. - -* Inconsistent parsing of tuple keyword args inside vs outside of dot calls - ```julia - (a=1,) # (tuple (= a 1)) - f.(a=1) # (tuple (kw a 1)) - ``` -* Mixtures of `,` and `;` in calls give nested parameter AST which parses - strangely, and is kind-of-horrible to use. - ```julia - # (tuple (parameters (parameters e f) c d) a b) - (a,b; c,d; e,f) - ``` -* Long-form anonymous functions have argument lists which are parsed - as tuples (or blocks!) rather than argument lists and this mess appears to be - papered over as part of lowering. For example, in `function (a;b) end` the - `(a;b)` is parsed as a block! This leads to more inconsistency in the use of - `kw` for keywords. - - -### Other oddities - -* Operators with suffices don't seem to always be parsed consistently as the - same operator without a suffix. Unclear whether this is by design or mistake. - For example, `[x +y] ==> (hcat x (+ y))`, but `[x +₁y] ==> (hcat (call +₁ x y))` - -* `global const x=1` is normalized by the parser into `(const (global (= x 1)))`. - I suppose this is somewhat useful for AST consumers, but reversing the source - order is pretty weird and inconvenient when moving to a lossless parser. - -* `let` bindings might be stored in a block, or they might not be, depending on - special cases: - ``` - # Special cases not in a block - let x=1 ; end ==> (let (= x 1) (block)) - let x::1 ; end ==> (let (:: x 1) (block)) - let x ; end ==> (let x (block)) - - # In a block - let x=1,y=2 ; end ==> (let (block (= x 1) (= y 2) (block))) - let x+=1 ; end ==> (let (block (+= x 1)) (block)) - ``` - -* The `elseif` condition is always in a block but not the `if` condition. - Presumably because of the need to add a line number node in the flisp parser - `if a xx elseif b yy end ==> (if a (block xx) (elseif (block b) (block yy)))` - -* Spaces are allowed between import dots — `import . .A` is allowed, and - parsed the same as `import ..A` - -* `import A..` produces `(import (. A .))` which is arguably nonsensical, as `.` - can't be a normal identifier. - -* The raw string escaping rules are *super* confusing for backslashes near - the end of the string: `raw"\\\\ "` contains four backslashes, whereas - `raw"\\\\"` contains only two. However this was an intentional feature to - allow all strings to be represented and it's unclear whether the situation - can be improved. - -* In braces after macrocall, `@S{a b}` is invalid but both `@S{a,b}` and - `@S {a b}` parse. Conversely, `@S[a b]` parses. - -* Macro names and invocations are post-processed from the output of - `parse-atom` / `parse-call`, which leads to some surprising and questionable - constructs which "work": - - Absurdities like `@(((((a))))) x ==> (macrocall @a x)` - - Infix macros!? `@(x + y) ==> (macrocall @+ x y)` (ok, kinda cute and has - some weird logic to it... but what?) - - Similarly additional parentheses are allowed `@(f(x)) ==> (macrocall @f x)` - -* Allowing `@` first in macro module paths (eg `@A.B.x` instead of `A.B.@x`) - seems like unnecessary variation in syntax. It makes parsing valid macro - module paths more complex and leads to oddities like `@$.x y ==> (macrocall - ($ (quote x)) y` where the `$` is first parsed as a macro name, but turns out - to be the module name after the `.` is parsed. But `$` can never be a valid - module name in normal Julia code so this makes no sense. - -* Triple quoted `var"""##"""` identifiers are allowed. But it's not clear these - are required or desired given that they come with the complex triple-quoted - string deindentation rules. - -* Deindentation of triple quoted strings with mismatched whitespace is weird - when there's nothing but whitespace. For example, we have - `"\"\"\"\n \n \n \"\"\"" ==> "\n \n"` so the middle line of whitespace - here isn't dedented but the other two longer lines are?? Here it seems more - consistent that either (a) the middle line should be deindented completely, - or (b) all lines should be dedented only one character, as that's the - matching prefix. - -* Parsing of anonymous function arguments is somewhat inconsistent. - `function (xs...) \n body end` parses the argument list as `(... xs)`, whereas - `function (x) \n body end` parses the argument list as `(tuple x)`. - -* The difference between multidimensional vs flattened iterators is subtle, and - perhaps too syntactically permissive. For example, - - `[(x,y) for x * in 1:10, y in 1:10]` is a multidimensional iterator - - `[(x,y) for x * in 1:10 for y in 1:10]` is a flattened iterator - - `[(x,y) for x in 1:10, y in 1:10 if y < x]` is a flattened iterator - - It's this last case which seems problematic (why not *require* the second - form as a more explicit way to indicate flattening?). It's not even pretty - printed correctly: - ``` - julia> :([(x,y) for x in 1:10, y in 1:10 if y < x]) - :([(x, y) for $(Expr(:filter, :(y < x), :(x = 1:10), :(y = 1:10)))]) - ``` - -* The character `'` may be written without escaping as `'''` rather than - requiring the form `'\''`. - -# Comparisons to other packages - -### Official Julia compiler - -_See also the [§ Differences from the flisp parser](#differences-from-the-flisp-parser) section._ - -The official Julia compiler frontend lives in the Julia source tree. It's -mostly contained in just a few files: -* The parser in [src/julia-parser.scm](https://github.com/JuliaLang/julia/blob/9c4b75d7f63d01d12b67aaf7ce8bb4a078825b52/src/julia-parser.scm) -* Macro expansion in [src/ast.c](https://github.com/JuliaLang/julia/blob/9c4b75d7f63d01d12b67aaf7ce8bb4a078825b52/src/ast.c) and [src/macroexpand.scm](https://github.com/JuliaLang/julia/blob/9c4b75d7f63d01d12b67aaf7ce8bb4a078825b52/src/macroexpand.scm) -* Syntax lowering in [src/julia-syntax.scm](https://github.com/JuliaLang/julia/blob/9c4b75d7f63d01d12b67aaf7ce8bb4a078825b52/src/julia-syntax.scm) -* The flisp runtime and C extensions for Julia in [src/flisp](https://github.com/JuliaLang/julia/tree/master/src/flisp) -* Supporting utility functions in a few other `.scm` and `.c` files. - -There's two issues with the official reference frontend which suggest a rewrite. - -First, there's no support for precise source locations and the existing data -structures (bare flisp lists) can't easily be extended to add these. Fixing -this would require changes to nearly all of the code. - -Second, it's written in flisp: an aestheically pleasing, minimal but obscure -implementation of Scheme. Learning Scheme is actually a good way to appreciate -some of Julia's design inspiration, but it's quite a barrier for developers of -Julia language tooling. (Flisp has no user-level documentation but non-schemers -can refer to the [Racket documentation](https://docs.racket-lang.org) which is -quite compatible for basic things.) In addition to the social factors, having -the embedded flisp interpreter and runtime with its own separate data -structures and FFI is complex and inefficient. - -### JuliaParser.jl - -[JuliaParser.jl](https://github.com/JuliaLang/JuliaParser.jl) -was a direct port of Julia's flisp reference parser, but was abandoned around -Julia 0.5 or so. Furthermore, it doesn't support lossless parsing, and adding -that feature would amount to a full rewrite. Given its divergence with the flisp -reference parser since Julia-0.5, it seemed better just to start anew from the -reference parser instead. - -### Tokenize.jl - -[Tokenize.jl](https://github.com/JuliaLang/Tokenize.jl) -is a fast lexer for Julia code. The code from Tokenize has been -imported and used in JuliaSyntax, with some major modifications as discussed in -the [lexer implementation](#lexing) section. - -### CSTParser.jl - -[CSTParser.jl](https://github.com/julia-vscode/CSTParser.jl) -is a ([mostly?](https://github.com/domluna/JuliaFormatter.jl/issues/52#issuecomment-529945126)) -lossless parser with goals quite similar to JuliaParser. It is used extensively -in the VSCode / LanguageServer / JuliaFormatter ecosystem. CSTParser is very -useful, but I do find the implementation hard to understand, and I wanted to try -a fresh approach with a focus on: - -* "Production readiness": Good docs, tests, diagnostics and maximum similarity - with the flisp parser, with the goal of getting the new parser into `Core`. -* Learning from the latest ideas about composable parsing and data structures - from outside Julia. In particular the implementation of `rust-analyzer` is - very clean, well documented, and was a great source of inspiration. -* Composability of tree data structures — I feel like the trees should be - layered somehow with a really lightweight [green tree](#raw-syntax-tree--green-tree) - at the most basic level, similar to Roslyn or rust-analyzer. In comparison, - CSTParser uses a more heavyweight non-layered data structure. Alternatively or - additionally, have a common tree API with many concrete task-specific - implementations. - -A big benefit of the JuliaSyntax parser is that it separates the parser code -from the tree data structures entirely, which should give a lot of flexibility -in experimenting with various tree representations. - -I also want JuliaSyntax to tackle macro expansion and other lowering steps, and -provide APIs for this which can be used by both the core language and the -editor tooling. - -### tree-sitter-julia - -Using a modern production-ready parser generator like `tree-sitter` is an -interesting option and some progress has already been made in -[tree-sitter-julia](https://github.com/tree-sitter/tree-sitter-julia). -But I feel like the grammars for parser generators are only marginally more -expressive than writing the parser by hand, after accounting for the effort -spent on the weird edge cases of a real language and writing the parser's tests -and "supporting code". - -On the other hand, a hand-written parser is completely flexible and can be -mutually understood with the reference implementation, so I chose that approach -for JuliaSyntax. - -# Resources - -## Julia issues - -Here's a few links to relevant Julia issues. - -#### Macro expansion - -* Automatic hygiene for macros https://github.com/JuliaLang/julia/pull/6910 — - would be interesting to implement this in a new frontend. - -#### Lowering - -* A partial implementation of lowering in Julia https://github.com/JuliaLang/julia/pull/32201 — - some of this should be ported. (Last commit at https://github.com/JuliaLang/julia/tree/df61138fcf97d03dcbbba10e962571af9700db56/ ) -* The closure capture problem https://github.com/JuliaLang/julia/issues/15276 — - would be interesting to see whether we can tackle some of the harder cases in - a new implementation. - -## C# Roslyn - -[Persistence, façades and Roslyn’s red-green trees](https://ericlippert.com/2012/06/08/red-green-trees/) -* [Roslyn optimization overview](https://github.com/KirillOsenkov/Bliki/wiki/Roslyn-Immutable-Trees) -* [Literate C# Usage Example](https://johtela.github.io/LiterateCS/LiterateCS/BlockBuilder.html) - - -## Rust-analyzer - -`rust-analyzer` seems to be very close to what I'm building here, and has come -to the same conclusions on green tree layout with explicit trivia nodes. Their -document on internals -[here](https://github.com/rust-analyzer/rust-analyzer/blob/master/docs/dev/syntax.md) -is great. Points of note: - -* They have *three* trees! - 1. Green trees exactly like mine (pretty much all the same design - decisions, including trivia storage). Though note that the team are still - [toying with](https://github.com/rust-analyzer/rust-analyzer/issues/6584) - the idea of using the Roslyn model of trivia. - 2. Untyped red syntax trees somewhat like mine, but much more minimal. For - example, these don't attempt to reorder children. - 3. A typed AST layer with a type for each expression head. The AST searches - for children by dynamically traversing the child list each time, rather - than having a single canonical ordering or remembering the placement of - children which the parser knew. -* "Parser does not see whitespace nodes. Instead, they are attached to the - tree in the TreeSink layer." This may be relevant to us - it's a pain to - attach whitespace to otherwise significant tokens, and inefficient to - allocate and pass around a dynamic list of whitespace trivia. -* "In practice, incremental reparsing doesn't actually matter much for IDE - use-cases, parsing from scratch seems to be fast enough." (I wonder why - they've implemented incremental parsing then?) -* There's various comments about macros... Rust macro expansion seems quite - different from Julia (it appears it may be interleaved with parsing??) - -In general I think it's unclear whether we want typed ASTs in Julia and we -particularly need to deal with the fact that `Expr` is the existing public -interface. Could we have `Expr2` wrap `SyntaxNode`? - -* A related very useful set of blog posts which discuss using the rust syntax - tree library (rowan) for representing of a non-rust toy language is here - https://dev.to/cad97/lossless-syntax-trees-280c - -Not all the design decisions in `rust-analyzer` are finalized but the -[architecture document](https://github.com/rust-analyzer/rust-analyzer/blob/master/docs/dev/architecture.md) -is a fantastic source of design inspiration. - -Highlights: -* "The parser is independent of the particular tree structure and particular - representation of the tokens. It transforms one flat stream of events into - another flat stream of events." This seems great, let's adopt it! -* TODO - -## RSLint - -[RSLint](https://rslint.org/dev) is a linter for javascript, built in Rust. It -uses the same parsing infrastructure and green tree libraries `rust-analyzer`. -There's an excellent and friendly high level overview of how all this works in -the rslint [parsing devdocs](https://rslint.org/dev/parsing.html). - -Points of note: - -* Backtracking and restarting the parser on error is actually quite simple in - the architecture we (mostly) share with `rust-analyzer`: - > ... events allow us to cheaply backtrack the parser by simply draining - > the events and resetting the token source cursor back to some place. - -* The section on [error - recovery](https://rslint.org/dev/parsing.html#error-recovery) is interesting; - they talk about various error recovery strategies. - -## Diagnostics - -The paper [P2429 - Concepts Error Messages for -Humans](https://wg21.tartanllama.xyz/P2429%20-%20Concepts%20Error%20Messages%20for%20Humans.pdf) -is C++ centric, but has a nice review of quality error reporting in various -compilers including Elm, ReasonML, Flow, D and Rust. - -Some Rust-specific resources: -* [rustc_errors::Diagnostic](https://doc.rust-lang.org/stable/nightly-rustc/rustc_errors/struct.Diagnostic.html) -* The source of the Rust compiler's diagnostics system: - - The [`println!` macro](https://github.com/rust-lang/rust/blob/0b6f079e4987ded15c13a15b734e7cfb8176839f/compiler/rustc_builtin_macros/src/format.rs) - shows how these can be emitted from macros - - The parser's [diagnostics.rs](https://github.com/rust-lang/rust/blob/0b6f079e4987ded15c13a15b734e7cfb8176839f/compiler/rustc_parse/src/parser/diagnostics.rs) - -## General resources about parsing - -* [Modern parser generator](https://matklad.github.io/2018/06/06/modern-parser-generator.html) - has a lot of practical notes on writing parsers. Highlights: - - Encourages writing tests for handwritten parsers as inline comments - - Mentions Pratt parsers for simple operator precedence parsing. Good articles: - - [From Aleksey Kladov (matklad - the main rust-analyzer author, etc)](https://matklad.github.io/2020/04/13/simple-but-powerful-pratt-parsing.html) - - [From Bob Nystrom (munificent - one of the Dart devs, etc](http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/) - - Some discussion of error recovery - -* Some notes about stateful lexers for parsing shell-like string interpolations: - http://www.oilshell.org/blog/2017/12/17.html - - -# Design notes - -The following are some fairly disorganized design notes covering a mixture of -things which have already been done and musings about further work. - -## Prototyping approach - -The tree datastructure design here is tricky: - -1. The symbolic part of compilation (the compiler frontend) incrementally - abstracts and transforms the source text, but errors along the way should - refer back to the source. - - The tree must be a lossless representation of the source text - - Some aspects of the source text (comments, most whitespace) are irrelevant - to parsing. - - More aspects of the source text are irrelevant after we have an abstract - syntax tree of the surface syntax. Some good examples here are the - parentheses in `2*(x + y)` and the explicit vs implicit multiplication - symbol in `2*x` vs `2x`. - -2. There's various type of *analyses* -- There's many useful ways to augment a syntax tree depending on use case. -- Analysis algorithms should be able to act on any tree type, ignoring - but carrying augmentations which they don't know about. - -Having so many use cases suggests it might be best to have several different -tree types with a common interface rather than one main abstract syntax tree -type. But it seems useful to figure this out by prototyping several important -work flows: - -* Syntax transformations - - Choose some macros to implement. This is a basic test of mixing source - trees from different files while preserving precise source locations. - (Done in .) -* Formatting - - Re-indent a file. This tests the handling of syntax trivia. -* Refactoring - - A pass to rename local variables. This tests how information from further - down the compilation pipeline can be attached to the syntax tree and used - to modify the source code. -* Precise error reporting in lowering - - Syntax desugaring `[a, b] = (c, d)` should report "invalid assignment - location `[a, b]`". But at a precise source location. - - Try something several layers deeper inside lowering? For example "macro - definition not allowed inside a local scope" -* Incremental reparsing - - Reparse a source file, given a byte range replacement - - -## Tree design - -### Raw syntax tree / Green tree - -Raw syntax tree (or ["Green tree"](https://ericlippert.com/2012/06/08/red-green-trees/) -in the terminology from Roslyn) - -We want GreenNode to be -* *structurally minimal* — For efficiency and generality -* *immutable* — For efficiency (& thread safety) -* *complete* — To preserve parser knowledge -* *token agnostic* — To allow use with any source language - -The simplest idea possible is to have: -* Leaf nodes are a single token -* Children are in source order - -Call represents a challenge for the AST vs Green tree in terms of node -placement / iteration for infix operators vs normal prefix function calls. - -- The normal problem of `a + 1` vs `+(a, 1)` -- Or worse, `a + 1 + 2` vs `+(a, 1, 2)` - -Clearly in the AST's *interface* we need to abstract over this placement. For -example with something like the normal Julia AST's iteration order. - -### Abstract syntax tree - -By pointing to green tree nodes, AST nodes become traceable back to the original -source. - -Unlike most languages, designing a new AST is tricky because the existing -`Expr` is a very public API used in every macro expansion. User-defined -macro expansions interpose between the source text and lowering, and using -`Expr` looses source information in many ways. - -There seems to be a few ways forward: -* Maybe we can give `Expr` some new semi-hidden fields to point back to the - green tree nodes that the `Expr` or its `args` list came from? -* We can use the existing `Expr` during macro expansion and try to recover - source information after macro expansion using heuristics. Likely the - presence of correct hygiene can help with this. -* Introducing a new AST would be possible if it were opt-in for some - hypothetical "new-style macros" only. Fixing hygiene should go along with - this. Design challenge: How do we make manipulating expressions reasonable - when literals need to carry source location? - -One option which may help bridge between locationless ASTs and something new -may be to have wrappers for the small number of literal types we need to cover. -For example: - -```julia -SourceSymbol <: AbstractSymbol -SourceInt <: Integer -SourceString <: AbstractString -``` - -Having source location attached to symbols would potentially solve most of the -hygiene problem. There's still the problem of macro helper functions which use -symbol literals; we can't very well be changing the meaning of `:x`! Perhaps -the trick there is to try capturing the current module at the location of the -interpolation syntax. Eg, if you do `:(y + $x)`, lowering expands this to -`Core._expr(:call, :+, :y, x)`, but it could expand it to something like -`Core._expr(:call, :+, :y, _add_source_symbol(_module_we_are_lowering_into, x))`? - -## Parsing - -### Error recovery - -Some disorganized musings about error recovery - -Different types of errors seem to occur... - -* Disallowed syntax (such as lack of spaces in conditional expressions) - where we can reasonably just continue parsing and emit the node with an error - flag which is otherwise fully formed. In some cases like parsing infix - expressions with a missing tail, emitting a zero width error token can lead - to a fully formed parse tree without the productions up the stack needing to - participate in recovery. -* A token which is disallowed in current context. Eg, `=` in parse_atom, or a - closing token inside an infix expression. Here we can emit a `K"error"`, but - we can't descend further into the parse tree; we must pop several recursive - frames off. Seems tricky! - -A typical structure is as follows: - -```julia -function parse_foo(ps) - mark = position(ps) - parse_bar(ps) # What if this fails? - if peek(ps) == K"some-token" - bump(ps) - parse_baz(ps) # What if this fails? - emit(ps, mark, K"foo") - end -end -``` - -Emitting plain error tokens are good in unfinished infix expressions: - -```julia -begin - a = x + -end -``` - -The "missing end" problem is tricky, as the intermediate syntax is valid; the -problem is often only obvious until we get to EOF. - -Missing end -```julia -function f() - begin - a = 10 -end - -# <-- Indentation would be wrong if g() was an inner function of f. -function g() -end -``` - -It seems like ideal error recovery would need to backtrack in this case. For -example: - -- Pop back to the frame which was parsing `f()` -- Backtrack through the parse events until we find a function with indentation - mismatched to the nesting of the parent. -- Reset ParseStream to a parsing checkpoint before `g()` was called -- Emit error and exit the function parsing `f()` -- Restart parsing -- Somehow make sure all of this can't result in infinite recursion 😅 - -Missing commas or closing brackets in nested structures also present the -existing parser with a problem. - -```julia -f(a, - g(b, - c # -- missing comma? - d), - e) -``` - -Again the local indentation might tell a story - -```julia -f(a, - g(b, - c # -- missing closing `)` ? - d) -``` - -But not always! - -```julia -f(a, - g(b, - c # -- missing closing `,` ? - d)) -``` - -Another particularly difficult problem for diagnostics in the current system is -broken parentheses or double quotes in string interpolations, especially when -nested. - -# Fun research questions - -### Parser Recovery - -Can we learn fast and reasonably accurate recovery heuristics for when the -parser encounters broken syntax, rather than hand-coding these? How would we -set the parser up so that training works and injecting the model is -nonintrusive? If the model is embedded in and works together with the parser, -can it be made compact enough that training is fast and the model itself is -tiny? - -### Formatting - -Given source and syntax tree, can we regress/learn a generative model of -indentation from the syntax tree? Source formatting involves a big pile of -heuristics to get something which "looks nice"... and ML systems have become -very good at heuristics. Also, we've got huge piles of training data — just -choose some high quality, tastefully hand-formatted libraries. +various use cases. Converting to `Expr` is always possible and will be stable +if that helps for your use case. # Getting involved @@ -1286,7 +30,7 @@ the labels `intro issue` or `bug` might be a good place to start. Also watching the 2022 JuliaCon talk and reading this document is probably good for an overview. -As of March 2023, we've got really good positional tracking within the source, +As of May 2023, we've got really good positional tracking within the source, but JuliaSyntax really needs a better system for parser recovery before the errors are really nice. This requires some research. For example, you could read up on how rust-analyzer does recovery, or rslint - both these are diff --git a/JuliaSyntax/docs/Manifest.toml b/JuliaSyntax/docs/Manifest.toml index 32615300909ab..a84b595dd0c76 100644 --- a/JuliaSyntax/docs/Manifest.toml +++ b/JuliaSyntax/docs/Manifest.toml @@ -1,92 +1,119 @@ # This file is machine-generated - editing it directly is not advised -[[ANSIColoredPrinters]] +julia_version = "1.9.0" +manifest_format = "2.0" +project_hash = "e0c77beb18dc1f6cce661ebd60658c0c1a77390f" + +[[deps.ANSIColoredPrinters]] git-tree-sha1 = "574baf8110975760d391c710b6341da1afa48d8c" uuid = "a4c015fc-c6ff-483c-b24f-f7ea428134e9" version = "0.0.1" -[[Base64]] +[[deps.Base64]] uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" -[[Dates]] +[[deps.Dates]] deps = ["Printf"] uuid = "ade2ca70-3891-5945-98fb-dc099432e06a" -[[DocStringExtensions]] +[[deps.DocStringExtensions]] deps = ["LibGit2"] -git-tree-sha1 = "b19534d1895d702889b219c382a6e18010797f0b" +git-tree-sha1 = "2fb1e02f2b635d0845df5d7c167fec4dd739b00d" uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" -version = "0.8.6" +version = "0.9.3" -[[Documenter]] +[[deps.Documenter]] deps = ["ANSIColoredPrinters", "Base64", "Dates", "DocStringExtensions", "IOCapture", "InteractiveUtils", "JSON", "LibGit2", "Logging", "Markdown", "REPL", "Test", "Unicode"] -git-tree-sha1 = "f425293f7e0acaf9144de6d731772de156676233" +git-tree-sha1 = "58fea7c536acd71f3eef6be3b21c0df5f3df88fd" uuid = "e30172f5-a6a5-5a46-863b-614d45cd2de4" -version = "0.27.10" +version = "0.27.24" -[[IOCapture]] +[[deps.IOCapture]] deps = ["Logging", "Random"] -git-tree-sha1 = "f7be53659ab06ddc986428d3a9dcc95f6fa6705a" +git-tree-sha1 = "d75853a0bdbfb1ac815478bacd89cd27b550ace6" uuid = "b5f81e59-6552-4d32-b1f0-c071b021bf89" -version = "0.2.2" +version = "0.2.3" -[[InteractiveUtils]] +[[deps.InteractiveUtils]] deps = ["Markdown"] uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240" -[[JSON]] +[[deps.JSON]] deps = ["Dates", "Mmap", "Parsers", "Unicode"] -git-tree-sha1 = "8076680b162ada2a031f707ac7b4953e30667a37" +git-tree-sha1 = "31e996f0a15c7b280ba9f76636b3ff9e2ae58c9a" uuid = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" -version = "0.21.2" +version = "0.21.4" -[[LibGit2]] +[[deps.LibGit2]] deps = ["Base64", "NetworkOptions", "Printf", "SHA"] uuid = "76f85450-5226-5b5a-8eaa-529ad045b433" -[[Logging]] +[[deps.Logging]] uuid = "56ddb016-857b-54e1-b83d-db4d58db5568" -[[Markdown]] +[[deps.Markdown]] deps = ["Base64"] uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" -[[Mmap]] +[[deps.Mmap]] uuid = "a63ad114-7e13-5084-954f-fe012c677804" -[[NetworkOptions]] +[[deps.NetworkOptions]] uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908" +version = "1.2.0" -[[Parsers]] -deps = ["Dates"] -git-tree-sha1 = "92f91ba9e5941fc781fecf5494ac1da87bdac775" +[[deps.Parsers]] +deps = ["Dates", "PrecompileTools", "UUIDs"] +git-tree-sha1 = "a5aef8d4a6e8d81f171b2bd4be5265b01384c74c" uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0" -version = "2.2.0" +version = "2.5.10" + +[[deps.PrecompileTools]] +deps = ["Preferences"] +git-tree-sha1 = "259e206946c293698122f63e2b513a7c99a244e8" +uuid = "aea7be01-6a6a-4083-8856-8a6e6704d82a" +version = "1.1.1" + +[[deps.Preferences]] +deps = ["TOML"] +git-tree-sha1 = "7eb1686b4f04b82f96ed7a4ea5890a4f0c7a09f1" +uuid = "21216c6a-2e73-6563-6e65-726566657250" +version = "1.4.0" -[[Printf]] +[[deps.Printf]] deps = ["Unicode"] uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7" -[[REPL]] +[[deps.REPL]] deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"] uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" -[[Random]] -deps = ["Serialization"] +[[deps.Random]] +deps = ["SHA", "Serialization"] uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" -[[SHA]] +[[deps.SHA]] uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce" +version = "0.7.0" -[[Serialization]] +[[deps.Serialization]] uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b" -[[Sockets]] +[[deps.Sockets]] uuid = "6462fe0b-24de-5631-8697-dd941f90decc" -[[Test]] +[[deps.TOML]] +deps = ["Dates"] +uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76" +version = "1.0.3" + +[[deps.Test]] deps = ["InteractiveUtils", "Logging", "Random", "Serialization"] uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40" -[[Unicode]] +[[deps.UUIDs]] +deps = ["Random", "SHA"] +uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" + +[[deps.Unicode]] uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" diff --git a/JuliaSyntax/docs/make.jl b/JuliaSyntax/docs/make.jl index f753c467440ad..ab2417e752c9b 100644 --- a/JuliaSyntax/docs/make.jl +++ b/JuliaSyntax/docs/make.jl @@ -4,9 +4,13 @@ makedocs(; modules=[JuliaSyntax], format=Documenter.HTML(), pages=[ - "Overview" => "index.md", - "API Reference" => "reference.md", - "Design Discussion" => "design.md", + "Overview" => "index.md" + "How To" => "howto.md" + "Reference" => [ + "reference.md" + "api.md" + ] + "Design Discussion" => "design.md" ], repo="https://github.com/c42f/JuliaSyntax.jl/blob/{commit}{path}#L{line}", sitename="JuliaSyntax.jl", diff --git a/JuliaSyntax/docs/src/api.md b/JuliaSyntax/docs/src/api.md new file mode 100644 index 0000000000000..c7678890167b2 --- /dev/null +++ b/JuliaSyntax/docs/src/api.md @@ -0,0 +1,57 @@ +# API Reference + +## Parsing + +```@docs +JuliaSyntax.parsestmt +JuliaSyntax.parseall +JuliaSyntax.parseatom +``` + +### Low level parsing API + +The `ParseStream` interface which provides a low-level stream-like I/O +interface for writing the parser. The parser does not depend on or produce any +concrete tree data structure as part of the parsing phase but the output spans +can be post-processed into various tree data structures as required. + +```@docs +JuliaSyntax.parse! +JuliaSyntax.ParseStream +``` + +## Tokenization + +```@docs +JuliaSyntax.tokenize +JuliaSyntax.untokenize +JuliaSyntax.Token +``` + +## Source file handling + +```@docs +JuliaSyntax.SourceFile +JuliaSyntax.highlight +JuliaSyntax.sourcetext +JuliaSyntax.source_line +JuliaSyntax.source_location +``` + +## Expression heads/kinds + +```@docs +JuliaSyntax.@K_str +JuliaSyntax.kind +JuliaSyntax.head +JuliaSyntax.flags +``` + +see also predicates related to `flags`. + +## Syntax tree types + +```@docs +JuliaSyntax.SyntaxNode +JuliaSyntax.GreenNode +``` diff --git a/JuliaSyntax/docs/src/design.md b/JuliaSyntax/docs/src/design.md index a0d2d12947548..737dc31b7d2bb 100644 --- a/JuliaSyntax/docs/src/design.md +++ b/JuliaSyntax/docs/src/design.md @@ -1,2 +1,847 @@ -# Design discussion +# Design discussion and developer documentation + +## Goals + +* Lossless parsing of Julia code with precise source mapping +* Production quality error recovery, reporting and unit testing +* Parser structure similar to Julia's flisp-based parser +* Speedy enough for interactive editing +* "Compilation as an API" to support all sorts of tooling +* Grow to encompass the rest of the compiler frontend: macro expansion, + desugaring and other lowering steps. +* Replace Julia's flisp-based reference frontend + +## Design Opinions + +* Parser implementation should be independent from tree data structures. So + we have the `ParseStream` interface. +* Tree data structures should be *layered* to balance losslessness with + abstraction and generality. So we have `SyntaxNode` (an AST) layered on top + of `GreenNode` (a lossless parse tree). We might need other tree types later. +* Fancy parser generators still seem marginal for production compilers. We use + a boring but flexible recursive descent parser. + +# Parser implementation + +Our goal is to losslessly represent the source text with a tree; this may be +called a "lossless syntax tree". (This is sometimes called a "concrete syntax +tree", but that term has also been used for the parse tree of the full formal +grammar for a language including any grammar hacks required to solve +ambiguities, etc. So we avoid this term.) + +`JuliaSyntax` uses a mostly recursive descent parser which closely +follows the high level structure of the flisp reference parser. This makes the +code familiar and reduces porting bugs. It also gives a lot of flexibility for +designing the diagnostics, tree data structures, compatibility with different +Julia versions, etc. I didn't choose a parser generator as they still seem +marginal for production compilers — for the parsing itself they don't seem +*greatly* more expressive and they can be less flexible for the important +"auxiliary" code which needs to be written in either case. + +### Lexing + +We use a hand-written lexer (a heavily modified version of +[Tokenize.jl](https://github.com/JuliaLang/Tokenize.jl)) +* Newline-containing whitespace is emitted as a separate kind +* Tokens inside string interpolations are emitted separately from the string +* Strings delimiters are separate tokens and the actual string always has the + `String` kind +* Additional contextual keywords (`as`, `var`, `doc`) have been added and + moved to a subcategory of keywords. +* Nonterminal kinds were added (though these should probably be factored out again) +* Various bugs fixed and additions for newer Julia versions + +### Parsing with ParseStream + +The main parser innovation is the `ParseStream` interface which provides a +stream-like I/O interface for writing the parser. The parser does not +depend on or produce any concrete tree data structure as part of the parsing +phase but the output spans can be post-processed into various tree data +structures as required. This is like the design of rust-analyzer though with a +simpler implementation. + +Parsing proceeds by recursive descent; + +* The parser consumes a flat list of lexed tokens as *input* using `peek()` to + examine tokens and `bump()` to consume them. +* The parser produces a flat list of text spans as *output* using `bump()` to + transfer tokens to the output and `position()`/`emit()` for nonterminal ranges. +* Diagnostics are emitted as separate text spans +* Whitespace and comments are automatically `bump()`ed and don't need to be + handled explicitly. The exception is syntactically relevant newlines in space + sensitive mode. +* Parser modes are passed down the call tree using `ParseState`. + +The output spans track the byte range, a syntax "kind" stored as an integer +tag, and some flags. The kind tag makes the spans a [sum +type](https://blog.waleedkhan.name/union-vs-sum-types/) but where the type is +tracked explicitly outside of Julia's type system. + +For lossless parsing the output spans must cover the entire input text. Using +`bump()`, `position()` and `emit()` in a natural way also ensures that: +* Spans are cleanly nested with children contained entirely within their parents +* Siblings spans are emitted in source order +* Parent spans are emitted after all their children. + +These properties make the output spans naturally isomorphic to a +["green tree"](#raw-syntax-tree--green-tree) +in the terminology of C#'s Roslyn compiler. + +### Tree construction + +The `build_tree` function performs a depth-first traversal of the `ParseStream` +output spans allowing it to be assembled into a concrete tree data structure, +for example using the `GreenNode` data type. We further build on top of this to +define `build_tree` for the AST type `SyntaxNode` and for normal Julia `Expr`. + +### Error recovery + +The goal of the parser is to produce well-formed hierarchical structure from +the source text. For interactive tools we need this to work even when the +source text contains errors; it's the job of the parser to include the recovery +heuristics to make this work. + +Concretely, the parser in `JuliaSyntax` should always produce a green tree +which is *well formed* in the sense that `GreenNode`s of a given `Kind` have +well-defined layout of children. This means the `GreenNode` to `SyntaxNode` +transformation is deterministic and tools can assume they're working with a +"mostly valid" AST. + +What does "mostly valid" mean? We allow the tree to contain the following types +of error nodes: + +* Missing tokens or nodes may be **added** as placeholders when they're needed + to complete a piece of syntax. For example, we could parse `a + (b *` as + `(call-i a + (call-i * b XXX))` where `XXX` is a placeholder error node. +* A sequence of unexpected tokens may be **removed** by collecting + them as children of an error node and treating them as syntax trivia during + AST construction. For example, `a + b end * c` could be parsed as the green + tree `(call-i a + b (error-t end * c))`, and turned into the AST `(call + a b)`. + +We want to encode both these cases in a way which is simplest for downstream +tools to use. This is an open question, but for now we use `K"error"` as the +kind, with the `TRIVIA_FLAG` set for unexpected syntax. + +# Syntax trees + +Julia's `Expr` abstract syntax tree can't store precise source locations or +deal with syntax trivia like whitespace or comments. So we need some new tree +types in `JuliaSyntax`. + +JuliaSyntax currently deals in three types of trees: +* `GreenNode` is a minimal *lossless syntax tree* where + - Nodes store a kind and length in bytes, but no text + - Syntax trivia are included in the list of children + - Children are strictly in source order +* `SyntaxNode` is an *abstract syntax tree* which has + - An absolute position and pointer to the source text + - Children strictly in source order + - Leaf nodes store values, not text + - Trivia are ignored, but there is a 1:1 mapping of non-trivia nodes to the + associated `GreenTree` nodes. +* `Expr` is used as a conversion target for compatibility + +## More about syntax kinds + +We generally track the type of syntax nodes with a syntax "kind", stored +explicitly in each node an integer tag. This effectively makes the node type a +[sum type](https://blog.waleedkhan.name/union-vs-sum-types/) in the type system +sense, but with the type tracked explicitly outside of Julia's type system. + +Managing the type explicitly brings a few benefits: +* Code and data structures for manipulating syntax nodes is always concretely + typed from the point of view of the compiler. +* We control the data layout and can pack the kind into very few bits along + with other flags bits, as desired. +* Predicates such as `is_operator` can be extremely efficient, given that we + know the meaning of the kind's bits. +* The kind can be applied to several different tree data structures, or + manipulated by itself. +* Pattern matching code is efficient when the full set of kinds is closed and + known during compilation. + +There's arguably a few downsides: +* Normal Julia dispatch can't express dispatch over syntax kind. Luckily, + a pattern matching macro can provide a very elegant way of expressing such + algorithms over a non-extensible set of kinds, so this is not a big problem. +* Different node kinds could come with different data fields, but a syntax + tree must have generic fields to cater for all kinds. (Consider as an analogy + the normal Julia AST `QuoteNode` with a single field vs `Expr` with generic + `head` and `args` fields.) This could be a disadvantage for code which + processes one specific kind but for generic code processing many kinds + having a generic but *concrete* data layout should be faster. + +# Differences from the flisp parser + +_See also the [§ Comparisons to other packages](#comparisons-to-other-packages) section._ + +Practically the flisp parser is not quite a classic [recursive descent +parser](https://en.wikipedia.org/wiki/Recursive_descent_parser), because it +often looks back and modifies the output tree it has already produced. We've +tried to eliminate this pattern in favor of lookahead where possible because + +* It works poorly when the parser is emitting a stream of node spans with + strict source ordering constraints. +* It's confusing to reason about this kind of code + +However, on occasion it seems to solve genuine ambiguities where Julia code +can't be parsed top-down with finite lookahead. Eg for the `kw` vs `=` +ambiguity within parentheses. In these cases we put up with using the +functions `look_behind` and `reset_node!()`. + +## Code structure + +Large structural changes were generally avoided while porting. In particular, +nearly all function names for parsing productions are the same with `-` +replaced by `_` and predicates prefixed by `is_`. + +Some notable differences: + +* `parse-arglist` and a parts of `parse-paren-` have been combined into a + general function `parse_brackets`. This function deals with all the odd + corner cases of how the AST is emitted when mixing `,` and `;` within + parentheses. In particular regard to: + - Determining whether `;` are block syntax separators or keyword parameters + - Determining whether to emit `parameter` sections based on context + - Emitting key-value pairs either as `kw` or `=` depending on context +* The way that `parse-resword` is entered has been rearranged to avoid parsing + reserved words with `parse-atom` inside `parse-unary-prefix`. Instead, we + detect reserved words and enter `parse_resword` earlier. + +## Flisp parser bugs + +Here's some behaviors which seem to be bugs. (Some of these we replicate in the +name of compatibility, perhaps with a warning.) + +* Macro module paths allow calls which gives weird stateful semantics! + ``` + b() = rand() > 0.5 ? Base : Core + b().@info "hi" + ``` +* Misplaced `@` in macro module paths like `A.@B.x` is parsed as odd + broken-looking AST like `(macrocall (. A (quote (. B @x))))`. It should + probably be rejected. +* Operator prefix call syntax doesn't work in the cases like `+(a;b,c)` where + keyword parameters are separated by commas. A tuple is produced instead. +* `const` and `global` allow chained assignment, but the right hand side is not + constant. `a` const here but not `b`. + ``` + const a = b = 1 + ``` +* Parsing the `ncat` array concatenation syntax within braces gives + strange AST: `{a ;; b}` parses to `(bracescat 2 a b)` which is the same as + `{2 ; a ; b}`, but should probably be `(bracescat (nrow 2 a b))` in analogy + to how `{a b}` produces `(bracescat (row a b))`. +* `export a, \n $b` is rejected, but `export a, \n b` parses fine. +* In try-catch-finally, the `finally` clause is allowed before the `catch`, but + always executes afterward. (Presumably was this a mistake? It seems pretty awful!) +* When parsing `"[x \n\n ]"` the flisp parser gets confused, but `"[x \n ]"` is + correctly parsed as `Expr(:vect)` (maybe fixed in 1.7?) +* `f(x for x in in xs)` is accepted, and parsed very strangely. +* Octal escape sequences saturate rather than being reported as errors. Eg, + `"\777"` results in `"\xff"`. This is inconsistent with + `Base.parse(::Type{Int}, ...)` +* Leading dots in import paths with operator-named modules are parsed into + dotted operators rather than a relative path. Ie, we have `import .⋆` parsing + to `(import (. .⋆))` whereas it should be `(import (. . ⋆))` for consistency + with the parsing of `import .A`. +* Looking back on the output disregards grouping parentheses which can lead to + odd results in some cases. For example, `f(((((x=1)))))` parses as a keyword + call to function `f` with the keyword `x=1`, but arguably it should be an + assignment. +* Hexfloat literals can have a trailing `f` for example, `0x1p1f` + but this doesn't do anything. In the `flisp` C code such cases are treated as + Float32 literals and this was intentional https://github.com/JuliaLang/julia/pull/2925 + but this has never been officially supported in Julia. It seems this bug + arises from `(set! pred char-hex?)` in `parse-number` accepting hex exponent + digits, all of which are detected as invalid except for a trailing `f` when + processed by `isnumtok_base`. +* `begin` and `end` are not parsed as keywords when indexing. Typed comprehensions + initially look the same, but can be distinguished from indexing once we handle + a `for` token; it is safe to treat `begin` and `end` as keywords afterwards. The + reference parser *only* handles this well when there's a newline before `for`: + ```julia + Any[foo(i) + for i in x if begin + true + end + ] + ``` + works, while + ```julia + Any[foo(i) for i in x if begin + true + end + ] + ``` + does not. JuliaSyntax handles both cases. + +## Parsing / AST oddities and warts + +### Questionable allowed forms + +There's various allowed syntaxes which are fairly easily detected in the +parser, but which will be rejected later during lowering. To allow building +DSLs this is fine and good but some such allowed syntaxes don't seem very +useful, even for DSLs: + +* `macro (x) end` is allowed but there are no anonymous macros. +* `abstract type A < B end` and other subtype comparisons are allowed, but + only `A <: B` makes sense. +* `x where {S T}` produces `(where x (bracescat (row S T)))`. This seems pretty weird! +* `[x for outer x in xs]` parses, but `outer` makes no real sense in this + context (and using this form is a lowering error) + +### `kw` and `=` inconsistencies + +There's many apparent inconsistencies between how `kw` and `=` are used when +parsing `key=val` pairs inside parentheses. + +* Inconsistent parsing of tuple keyword args inside vs outside of dot calls + ```julia + (a=1,) # (tuple (= a 1)) + f.(a=1) # (tuple (kw a 1)) + ``` +* Mixtures of `,` and `;` in calls give nested parameter AST which parses + strangely, and is kind-of-horrible to use. + ```julia + # (tuple (parameters (parameters e f) c d) a b) + (a,b; c,d; e,f) + ``` +* Long-form anonymous functions have argument lists which are parsed + as tuples (or blocks!) rather than argument lists and this mess appears to be + papered over as part of lowering. For example, in `function (a;b) end` the + `(a;b)` is parsed as a block! This leads to more inconsistency in the use of + `kw` for keywords. + + +### Other oddities + +* Operators with suffices don't seem to always be parsed consistently as the + same operator without a suffix. Unclear whether this is by design or mistake. + For example, `[x +y] ==> (hcat x (+ y))`, but `[x +₁y] ==> (hcat (call +₁ x y))` + +* `global const x=1` is normalized by the parser into `(const (global (= x 1)))`. + I suppose this is somewhat useful for AST consumers, but reversing the source + order is pretty weird and inconvenient when moving to a lossless parser. + +* `let` bindings might be stored in a block, or they might not be, depending on + special cases: + ``` + # Special cases not in a block + let x=1 ; end ==> (let (= x 1) (block)) + let x::1 ; end ==> (let (:: x 1) (block)) + let x ; end ==> (let x (block)) + + # In a block + let x=1,y=2 ; end ==> (let (block (= x 1) (= y 2) (block))) + let x+=1 ; end ==> (let (block (+= x 1)) (block)) + ``` + +* The `elseif` condition is always in a block but not the `if` condition. + Presumably because of the need to add a line number node in the flisp parser + `if a xx elseif b yy end ==> (if a (block xx) (elseif (block b) (block yy)))` + +* Spaces are allowed between import dots — `import . .A` is allowed, and + parsed the same as `import ..A` + +* `import A..` produces `(import (. A .))` which is arguably nonsensical, as `.` + can't be a normal identifier. + +* The raw string escaping rules are *super* confusing for backslashes near + the end of the string: `raw"\\\\ "` contains four backslashes, whereas + `raw"\\\\"` contains only two. However this was an intentional feature to + allow all strings to be represented and it's unclear whether the situation + can be improved. + +* In braces after macrocall, `@S{a b}` is invalid but both `@S{a,b}` and + `@S {a b}` parse. Conversely, `@S[a b]` parses. + +* Macro names and invocations are post-processed from the output of + `parse-atom` / `parse-call`, which leads to some surprising and questionable + constructs which "work": + - Absurdities like `@(((((a))))) x ==> (macrocall @a x)` + - Infix macros!? `@(x + y) ==> (macrocall @+ x y)` (ok, kinda cute and has + some weird logic to it... but what?) + - Similarly additional parentheses are allowed `@(f(x)) ==> (macrocall @f x)` + +* Allowing `@` first in macro module paths (eg `@A.B.x` instead of `A.B.@x`) + seems like unnecessary variation in syntax. It makes parsing valid macro + module paths more complex and leads to oddities like `@$.x y ==> (macrocall + ($ (quote x)) y` where the `$` is first parsed as a macro name, but turns out + to be the module name after the `.` is parsed. But `$` can never be a valid + module name in normal Julia code so this makes no sense. + +* Triple quoted `var"""##"""` identifiers are allowed. But it's not clear these + are required or desired given that they come with the complex triple-quoted + string deindentation rules. + +* Deindentation of triple quoted strings with mismatched whitespace is weird + when there's nothing but whitespace. For example, we have + `"\"\"\"\n \n \n \"\"\"" ==> "\n \n"` so the middle line of whitespace + here isn't dedented but the other two longer lines are?? Here it seems more + consistent that either (a) the middle line should be deindented completely, + or (b) all lines should be dedented only one character, as that's the + matching prefix. + +* Parsing of anonymous function arguments is somewhat inconsistent. + `function (xs...) \n body end` parses the argument list as `(... xs)`, whereas + `function (x) \n body end` parses the argument list as `(tuple x)`. + +* The difference between multidimensional vs flattened iterators is subtle, and + perhaps too syntactically permissive. For example, + - `[(x,y) for x * in 1:10, y in 1:10]` is a multidimensional iterator + - `[(x,y) for x * in 1:10 for y in 1:10]` is a flattened iterator + - `[(x,y) for x in 1:10, y in 1:10 if y < x]` is a flattened iterator + + It's this last case which seems problematic (why not *require* the second + form as a more explicit way to indicate flattening?). It's not even pretty + printed correctly: + ``` + julia> :([(x,y) for x in 1:10, y in 1:10 if y < x]) + :([(x, y) for $(Expr(:filter, :(y < x), :(x = 1:10), :(y = 1:10)))]) + ``` + +* The character `'` may be written without escaping as `'''` rather than + requiring the form `'\''`. + +# Comparisons to other packages + +### Official Julia compiler + +_See also the [§ Differences from the flisp parser](#differences-from-the-flisp-parser) section._ + +The official Julia compiler frontend lives in the Julia source tree. It's +mostly contained in just a few files: +* The parser in [src/julia-parser.scm](https://github.com/JuliaLang/julia/blob/9c4b75d7f63d01d12b67aaf7ce8bb4a078825b52/src/julia-parser.scm) +* Macro expansion in [src/ast.c](https://github.com/JuliaLang/julia/blob/9c4b75d7f63d01d12b67aaf7ce8bb4a078825b52/src/ast.c) and [src/macroexpand.scm](https://github.com/JuliaLang/julia/blob/9c4b75d7f63d01d12b67aaf7ce8bb4a078825b52/src/macroexpand.scm) +* Syntax lowering in [src/julia-syntax.scm](https://github.com/JuliaLang/julia/blob/9c4b75d7f63d01d12b67aaf7ce8bb4a078825b52/src/julia-syntax.scm) +* The flisp runtime and C extensions for Julia in [src/flisp](https://github.com/JuliaLang/julia/tree/master/src/flisp) +* Supporting utility functions in a few other `.scm` and `.c` files. + +There's two issues with the official reference frontend which suggest a rewrite. + +First, there's no support for precise source locations and the existing data +structures (bare flisp lists) can't easily be extended to add these. Fixing +this would require changes to nearly all of the code. + +Second, it's written in flisp: an aestheically pleasing, minimal but obscure +implementation of Scheme. Learning Scheme is actually a good way to appreciate +some of Julia's design inspiration, but it's quite a barrier for developers of +Julia language tooling. (Flisp has no user-level documentation but non-schemers +can refer to the [Racket documentation](https://docs.racket-lang.org) which is +quite compatible for basic things.) In addition to the social factors, having +the embedded flisp interpreter and runtime with its own separate data +structures and FFI is complex and inefficient. + +### JuliaParser.jl + +[JuliaParser.jl](https://github.com/JuliaLang/JuliaParser.jl) +was a direct port of Julia's flisp reference parser, but was abandoned around +Julia 0.5 or so. Furthermore, it doesn't support lossless parsing, and adding +that feature would amount to a full rewrite. Given its divergence with the flisp +reference parser since Julia-0.5, it seemed better just to start anew from the +reference parser instead. + +### Tokenize.jl + +[Tokenize.jl](https://github.com/JuliaLang/Tokenize.jl) +is a fast lexer for Julia code. The code from Tokenize has been +imported and used in JuliaSyntax, with some major modifications as discussed in +the [lexer implementation](#lexing) section. + +### CSTParser.jl + +[CSTParser.jl](https://github.com/julia-vscode/CSTParser.jl) +is a ([mostly?](https://github.com/domluna/JuliaFormatter.jl/issues/52#issuecomment-529945126)) +lossless parser with goals quite similar to JuliaParser. It is used extensively +in the VSCode / LanguageServer / JuliaFormatter ecosystem. CSTParser is very +useful, but I do find the implementation hard to understand, and I wanted to try +a fresh approach with a focus on: + +* "Production readiness": Good docs, tests, diagnostics and maximum similarity + with the flisp parser, with the goal of getting the new parser into `Core`. +* Learning from the latest ideas about composable parsing and data structures + from outside Julia. In particular the implementation of `rust-analyzer` is + very clean, well documented, and was a great source of inspiration. +* Composability of tree data structures — I feel like the trees should be + layered somehow with a really lightweight [green tree](#raw-syntax-tree--green-tree) + at the most basic level, similar to Roslyn or rust-analyzer. In comparison, + CSTParser uses a more heavyweight non-layered data structure. Alternatively or + additionally, have a common tree API with many concrete task-specific + implementations. + +A big benefit of the JuliaSyntax parser is that it separates the parser code +from the tree data structures entirely, which should give a lot of flexibility +in experimenting with various tree representations. + +I also want JuliaSyntax to tackle macro expansion and other lowering steps, and +provide APIs for this which can be used by both the core language and the +editor tooling. + +### tree-sitter-julia + +Using a modern production-ready parser generator like `tree-sitter` is an +interesting option and some progress has already been made in +[tree-sitter-julia](https://github.com/tree-sitter/tree-sitter-julia). +But I feel like the grammars for parser generators are only marginally more +expressive than writing the parser by hand, after accounting for the effort +spent on the weird edge cases of a real language and writing the parser's tests +and "supporting code". + +On the other hand, a hand-written parser is completely flexible and can be +mutually understood with the reference implementation, so I chose that approach +for JuliaSyntax. + +# Resources + +## Julia issues + +Here's a few links to relevant Julia issues. + +#### Macro expansion + +* Automatic hygiene for macros https://github.com/JuliaLang/julia/pull/6910 — + would be interesting to implement this in a new frontend. + +#### Lowering + +* A partial implementation of lowering in Julia https://github.com/JuliaLang/julia/pull/32201 — + some of this should be ported. (Last commit at https://github.com/JuliaLang/julia/tree/df61138fcf97d03dcbbba10e962571af9700db56/ ) +* The closure capture problem https://github.com/JuliaLang/julia/issues/15276 — + would be interesting to see whether we can tackle some of the harder cases in + a new implementation. + +## C# Roslyn + +[Persistence, façades and Roslyn’s red-green trees](https://ericlippert.com/2012/06/08/red-green-trees/) +* [Roslyn optimization overview](https://github.com/KirillOsenkov/Bliki/wiki/Roslyn-Immutable-Trees) +* [Literate C# Usage Example](https://johtela.github.io/LiterateCS/LiterateCS/BlockBuilder.html) + + +## Rust-analyzer + +`rust-analyzer` seems to be very close to what I'm building here, and has come +to the same conclusions on green tree layout with explicit trivia nodes. Their +document on internals +[here](https://github.com/rust-analyzer/rust-analyzer/blob/master/docs/dev/syntax.md) +is great. Points of note: + +* They have *three* trees! + 1. Green trees exactly like mine (pretty much all the same design + decisions, including trivia storage). Though note that the team are still + [toying with](https://github.com/rust-analyzer/rust-analyzer/issues/6584) + the idea of using the Roslyn model of trivia. + 2. Untyped red syntax trees somewhat like mine, but much more minimal. For + example, these don't attempt to reorder children. + 3. A typed AST layer with a type for each expression head. The AST searches + for children by dynamically traversing the child list each time, rather + than having a single canonical ordering or remembering the placement of + children which the parser knew. +* "Parser does not see whitespace nodes. Instead, they are attached to the + tree in the TreeSink layer." This may be relevant to us - it's a pain to + attach whitespace to otherwise significant tokens, and inefficient to + allocate and pass around a dynamic list of whitespace trivia. +* "In practice, incremental reparsing doesn't actually matter much for IDE + use-cases, parsing from scratch seems to be fast enough." (I wonder why + they've implemented incremental parsing then?) +* There's various comments about macros... Rust macro expansion seems quite + different from Julia (it appears it may be interleaved with parsing??) + +In general I think it's unclear whether we want typed ASTs in Julia and we +particularly need to deal with the fact that `Expr` is the existing public +interface. Could we have `Expr2` wrap `SyntaxNode`? + +* A related very useful set of blog posts which discuss using the rust syntax + tree library (rowan) for representing of a non-rust toy language is here + https://dev.to/cad97/lossless-syntax-trees-280c + +Not all the design decisions in `rust-analyzer` are finalized but the +[architecture document](https://github.com/rust-analyzer/rust-analyzer/blob/master/docs/dev/architecture.md) +is a fantastic source of design inspiration. + +Highlights: +* "The parser is independent of the particular tree structure and particular + representation of the tokens. It transforms one flat stream of events into + another flat stream of events." This seems great, let's adopt it! +* TODO + +## RSLint + +[RSLint](https://rslint.org/dev) is a linter for javascript, built in Rust. It +uses the same parsing infrastructure and green tree libraries `rust-analyzer`. +There's an excellent and friendly high level overview of how all this works in +the rslint [parsing devdocs](https://rslint.org/dev/parsing.html). + +Points of note: + +* Backtracking and restarting the parser on error is actually quite simple in + the architecture we (mostly) share with `rust-analyzer`: + > ... events allow us to cheaply backtrack the parser by simply draining + > the events and resetting the token source cursor back to some place. + +* The section on [error + recovery](https://rslint.org/dev/parsing.html#error-recovery) is interesting; + they talk about various error recovery strategies. + +## Diagnostics + +The paper [P2429 - Concepts Error Messages for +Humans](https://wg21.tartanllama.xyz/P2429%20-%20Concepts%20Error%20Messages%20for%20Humans.pdf) +is C++ centric, but has a nice review of quality error reporting in various +compilers including Elm, ReasonML, Flow, D and Rust. + +Some Rust-specific resources: +* [rustc_errors::Diagnostic](https://doc.rust-lang.org/stable/nightly-rustc/rustc_errors/struct.Diagnostic.html) +* The source of the Rust compiler's diagnostics system: + - The [`println!` macro](https://github.com/rust-lang/rust/blob/0b6f079e4987ded15c13a15b734e7cfb8176839f/compiler/rustc_builtin_macros/src/format.rs) + shows how these can be emitted from macros + - The parser's [diagnostics.rs](https://github.com/rust-lang/rust/blob/0b6f079e4987ded15c13a15b734e7cfb8176839f/compiler/rustc_parse/src/parser/diagnostics.rs) + +## General resources about parsing + +* [Modern parser generator](https://matklad.github.io/2018/06/06/modern-parser-generator.html) + has a lot of practical notes on writing parsers. Highlights: + - Encourages writing tests for handwritten parsers as inline comments + - Mentions Pratt parsers for simple operator precedence parsing. Good articles: + - [From Aleksey Kladov (matklad - the main rust-analyzer author, etc)](https://matklad.github.io/2020/04/13/simple-but-powerful-pratt-parsing.html) + - [From Bob Nystrom (munificent - one of the Dart devs, etc](http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/) + - Some discussion of error recovery + +* Some notes about stateful lexers for parsing shell-like string interpolations: + http://www.oilshell.org/blog/2017/12/17.html + + +# Design notes + +The following are some fairly disorganized design notes covering a mixture of +things which have already been done and musings about further work. + +## Prototyping approach + +The tree datastructure design here is tricky: + +1. The symbolic part of compilation (the compiler frontend) incrementally + abstracts and transforms the source text, but errors along the way should + refer back to the source. + - The tree must be a lossless representation of the source text + - Some aspects of the source text (comments, most whitespace) are irrelevant + to parsing. + - More aspects of the source text are irrelevant after we have an abstract + syntax tree of the surface syntax. Some good examples here are the + parentheses in `2*(x + y)` and the explicit vs implicit multiplication + symbol in `2*x` vs `2x`. + +2. There's various type of *analyses* +- There's many useful ways to augment a syntax tree depending on use case. +- Analysis algorithms should be able to act on any tree type, ignoring + but carrying augmentations which they don't know about. + +Having so many use cases suggests it might be best to have several different +tree types with a common interface rather than one main abstract syntax tree +type. But it seems useful to figure this out by prototyping several important +work flows: + +* Syntax transformations + - Choose some macros to implement. This is a basic test of mixing source + trees from different files while preserving precise source locations. + (Done in .) +* Formatting + - Re-indent a file. This tests the handling of syntax trivia. +* Refactoring + - A pass to rename local variables. This tests how information from further + down the compilation pipeline can be attached to the syntax tree and used + to modify the source code. +* Precise error reporting in lowering + - Syntax desugaring `[a, b] = (c, d)` should report "invalid assignment + location `[a, b]`". But at a precise source location. + - Try something several layers deeper inside lowering? For example "macro + definition not allowed inside a local scope" +* Incremental reparsing + - Reparse a source file, given a byte range replacement + + +## Tree design + +### Raw syntax tree / Green tree + +Raw syntax tree (or ["Green tree"](https://ericlippert.com/2012/06/08/red-green-trees/) +in the terminology from Roslyn) + +We want GreenNode to be +* *structurally minimal* — For efficiency and generality +* *immutable* — For efficiency (& thread safety) +* *complete* — To preserve parser knowledge +* *token agnostic* — To allow use with any source language + +The simplest idea possible is to have: +* Leaf nodes are a single token +* Children are in source order + +Call represents a challenge for the AST vs Green tree in terms of node +placement / iteration for infix operators vs normal prefix function calls. + +- The normal problem of `a + 1` vs `+(a, 1)` +- Or worse, `a + 1 + 2` vs `+(a, 1, 2)` + +Clearly in the AST's *interface* we need to abstract over this placement. For +example with something like the normal Julia AST's iteration order. + +### Abstract syntax tree + +By pointing to green tree nodes, AST nodes become traceable back to the original +source. + +Unlike most languages, designing a new AST is tricky because the existing +`Expr` is a very public API used in every macro expansion. User-defined +macro expansions interpose between the source text and lowering, and using +`Expr` looses source information in many ways. + +There seems to be a few ways forward: +* Maybe we can give `Expr` some new semi-hidden fields to point back to the + green tree nodes that the `Expr` or its `args` list came from? +* We can use the existing `Expr` during macro expansion and try to recover + source information after macro expansion using heuristics. Likely the + presence of correct hygiene can help with this. +* Introducing a new AST would be possible if it were opt-in for some + hypothetical "new-style macros" only. Fixing hygiene should go along with + this. Design challenge: How do we make manipulating expressions reasonable + when literals need to carry source location? + +One option which may help bridge between locationless ASTs and something new +may be to have wrappers for the small number of literal types we need to cover. +For example: + +```julia +SourceSymbol <: AbstractSymbol +SourceInt <: Integer +SourceString <: AbstractString +``` + +Having source location attached to symbols would potentially solve most of the +hygiene problem. There's still the problem of macro helper functions which use +symbol literals; we can't very well be changing the meaning of `:x`! Perhaps +the trick there is to try capturing the current module at the location of the +interpolation syntax. Eg, if you do `:(y + $x)`, lowering expands this to +`Core._expr(:call, :+, :y, x)`, but it could expand it to something like +`Core._expr(:call, :+, :y, _add_source_symbol(_module_we_are_lowering_into, x))`? + +## Parsing + +### Error recovery + +Some disorganized musings about error recovery + +Different types of errors seem to occur... + +* Disallowed syntax (such as lack of spaces in conditional expressions) + where we can reasonably just continue parsing and emit the node with an error + flag which is otherwise fully formed. In some cases like parsing infix + expressions with a missing tail, emitting a zero width error token can lead + to a fully formed parse tree without the productions up the stack needing to + participate in recovery. +* A token which is disallowed in current context. Eg, `=` in parse_atom, or a + closing token inside an infix expression. Here we can emit a `K"error"`, but + we can't descend further into the parse tree; we must pop several recursive + frames off. Seems tricky! + +A typical structure is as follows: + +```julia +function parse_foo(ps) + mark = position(ps) + parse_bar(ps) # What if this fails? + if peek(ps) == K"some-token" + bump(ps) + parse_baz(ps) # What if this fails? + emit(ps, mark, K"foo") + end +end +``` + +Emitting plain error tokens are good in unfinished infix expressions: + +```julia +begin + a = x + +end +``` + +The "missing end" problem is tricky, as the intermediate syntax is valid; the +problem is often only obvious until we get to EOF. + +Missing end +```julia +function f() + begin + a = 10 +end + +# <-- Indentation would be wrong if g() was an inner function of f. +function g() +end +``` + +It seems like ideal error recovery would need to backtrack in this case. For +example: + +- Pop back to the frame which was parsing `f()` +- Backtrack through the parse events until we find a function with indentation + mismatched to the nesting of the parent. +- Reset ParseStream to a parsing checkpoint before `g()` was called +- Emit error and exit the function parsing `f()` +- Restart parsing +- Somehow make sure all of this can't result in infinite recursion 😅 + +Missing commas or closing brackets in nested structures also present the +existing parser with a problem. + +```julia +f(a, + g(b, + c # -- missing comma? + d), + e) +``` + +Again the local indentation might tell a story + +```julia +f(a, + g(b, + c # -- missing closing `)` ? + d) +``` + +But not always! + +```julia +f(a, + g(b, + c # -- missing closing `,` ? + d)) +``` + +Another particularly difficult problem for diagnostics in the current system is +broken parentheses or double quotes in string interpolations, especially when +nested. + +# Fun research questions + +### Parser Recovery + +Can we learn fast and reasonably accurate recovery heuristics for when the +parser encounters broken syntax, rather than hand-coding these? How would we +set the parser up so that training works and injecting the model is +nonintrusive? If the model is embedded in and works together with the parser, +can it be made compact enough that training is fast and the model itself is +tiny? + +### Formatting + +Given source and syntax tree, can we regress/learn a generative model of +indentation from the syntax tree? Source formatting involves a big pile of +heuristics to get something which "looks nice"... and ML systems have become +very good at heuristics. Also, we've got huge piles of training data — just +choose some high quality, tastefully hand-formatted libraries. diff --git a/JuliaSyntax/docs/src/howto.md b/JuliaSyntax/docs/src/howto.md new file mode 100644 index 0000000000000..0de9e69ad976d --- /dev/null +++ b/JuliaSyntax/docs/src/howto.md @@ -0,0 +1,38 @@ +# How-To + +This section contains brief recipes for particular tasks + +## Use JuliaSyntax as the default parser + +To use JuliaSyntax as the default Julia parser for the REPL and to `include()` +files, parse code with `Meta.parse()`, etc, put the following in your +startup.jl file: + +```julia +using JuliaSyntax +JuliaSyntax.enable_in_core!() +``` + +This works well in Julia 1.9 but in Julia 1.8 will cause some startup latency. +To reduce that you can create a custom system image by running the code in +`./sysimage/compile.jl` as a Julia script (or directly using the shell, on +unix). Then use `julia -J $resulting_sysimage`. + +Using a custom sysimage has the advantage that package precompilation will also +go through the JuliaSyntax parser. + +### VSCode + +To use JuliaSyntax as the default parser for Julia within VSCode, add the +following to your `startup.jl` file: + +```julia +import JuliaSyntax +JuliaSyntax.enable_in_core!() +``` + +To reduce startup latency you can combine with a custom system as described in +the [Julia VScode docs](https://www.julia-vscode.org/docs/dev/userguide/compilesysimage/#Creating-a-sysimage-for-the-active-environment), +combined with the precompile execution file in `sysimage/precompile_exec.jl` in the source tree. +For additional detail see the discussion in [issue #128](https://github.com/JuliaLang/JuliaSyntax.jl/issues/128). + diff --git a/JuliaSyntax/docs/src/index.md b/JuliaSyntax/docs/src/index.md index a8605a79474d3..add8907a330d1 100644 --- a/JuliaSyntax/docs/src/index.md +++ b/JuliaSyntax/docs/src/index.md @@ -1,2 +1,80 @@ # JuliaSyntax.jl +A Julia compiler frontend, written in Julia. + +A [talk from JuliaCon 2022](https://youtu.be/CIiGng9Brrk) covered some aspects +of this package. + +## Examples + +Here's what parsing of a small piece of code currently looks like in various +forms. We'll use the `JuliaSyntax.parsestmt` function to demonstrate, there's also +`JuliaSyntax.parse!` offering more fine-grained control. + +First, a source-ordered AST with `SyntaxNode` (`call-i` in the dump here means +the `call` has the infix `-i` flag): + +```julia +julia> using JuliaSyntax + +julia> parsestmt(SyntaxNode, "(x + y)*z", filename="foo.jl") +line:col│ tree │ file_name + 1:1 │[call-i] │foo.jl + 1:1 │ [parens] + 1:2 │ [call-i] + 1:2 │ x + 1:4 │ + + 1:6 │ y + 1:8 │ * + 1:9 │ z +``` + +Internally this has a full representation of all syntax trivia (whitespace and +comments) as can be seen with the more raw ["green tree"](#raw-syntax-tree--green-tree) +representation with `GreenNode`. Here ranges on the left are byte ranges, and +`✔` flags nontrivia tokens. Note that the parentheses are trivia in the tree +representation, despite being important for parsing. + +```julia +julia> text = "(x + y)*z" + greentree = parsestmt(JuliaSyntax.GreenNode, text) + 1:9 │[call] + 1:7 │ [parens] + 1:1 │ ( + 2:6 │ [call] + 2:2 │ Identifier ✔ + 3:3 │ Whitespace + 4:4 │ + ✔ + 5:5 │ Whitespace + 6:6 │ Identifier ✔ + 7:7 │ ) + 8:8 │ * ✔ + 9:9 │ Identifier ✔ +``` + +`GreenNode` stores only byte ranges, but the token strings can be shown by +supplying the source text string: + +```julia +julia> show(stdout, MIME"text/plain"(), greentree, text) + 1:9 │[call] + 1:7 │ [parens] + 1:1 │ ( "(" + 2:6 │ [call] + 2:2 │ Identifier ✔ "x" + 3:3 │ Whitespace " " + 4:4 │ + ✔ "+" + 5:5 │ Whitespace " " + 6:6 │ Identifier ✔ "y" + 7:7 │ ) ")" + 8:8 │ * ✔ "*" + 9:9 │ Identifier ✔ "z" +``` + +Julia `Expr` can also be produced: + +```julia +julia> JuliaSyntax.parsestmt(Expr, "(x + y)*z") +:((x + y) * z) +``` + diff --git a/JuliaSyntax/docs/src/reference.md b/JuliaSyntax/docs/src/reference.md index 1d966e4eb3389..22322b55381d5 100644 --- a/JuliaSyntax/docs/src/reference.md +++ b/JuliaSyntax/docs/src/reference.md @@ -1,7 +1,308 @@ -# API Reference +# Syntax Trees -## Parsing code +This section describes the syntax trees produced by JuliaSyntax, mainly in +terms of their similarities and differences with the `Expr` tree data +structures used since Julia 0.1. -```@docs +## JuliaSyntax trees vs `Expr` + +The tree structure of `GreenNode`/`SyntaxNode` is similar to Julia's `Expr` +data structure but there are various differences: + +### Source ordered children + +The children of our trees are strictly in source order. This has many +consequences in places where `Expr` reorders child expressions. + +* Infix and postfix operator calls have the operator name in the *second* child position. `a + b` is parsed as `(call-i a + b)` - where the infix `-i` flag indicates infix child position - rather than `Expr(:call, :+, :a, :b)`. +* Generators are represented in source order as a single node rather than multiple nested flatten and generator expressions. + +### No `LineNumberNode`s + +Our syntax nodes inherently stores source position, so there's no need for the +`LineNumberNode`s used by `Expr`. + +### More consistent / less redundant `block`s + +Sometimes `Expr` needs redundant block constructs to store `LineNumberNode`s, +but we don't need these. Also in cases which do use blocks we try to use them +consistently. + +* No block is used on the right hand side of short form function syntax +* No block is used for the conditional in `elseif` +* No block is used for the body of anonymous functions after the `->` +* `let` argument lists always use a block regardless of number or form of bindings + +### Faithful representation of the source text / avoid premature lowering + +Some cases of "premature lowering" have been removed, preferring to represent +the source text more closely. + +* `K"macrocall"` - allow users to easily distinguish macrocalls with parentheses from those without them (#218) +* Grouping parentheses are represented with a node of kind `K"parens"` (#222) +* The right hand side of `x where {T}` retains the `K"braces"` node around the `T` to distinguish it from `x where T`. +* Ternary syntax is not immediately lowered to an `if` node: `a ? b : c` parses as `(? a b c)` rather than `Expr(:if, :a, :b, :c)` (#85) +* `global const` and `const global` are not normalized by the parser. This is done in `Expr` conversion (#130) +* The AST for `do` is flatter and not lowered to a lambda by the parser: `f(x) do y ; body end` is parsed as `(do (call f x) (tuple y) (block body))` (#98) +* `@.` is not lowered to `@__dot__` inside the parser (#146) +* Docstrings use the `K"doc"` kind, and are not lowered to `Core.@doc` until later (#217) +* Juxtaposition uses the `K"juxtapose"` kind rather than lowering immediately to `*` (#220) +* `return` without a value has zero children, rather than lowering to `return nothing` (#220) + +### Containers for string-like constructs + +String-like constructs always come within a container node, not as a single +token. These are useful for tooling which works with the tokens of the source +text. Also separating the delimiters from the text they delimit removes a whole +class of tokenization errors and lets the parser deal with them. + +* string always use `K"string"` to wrap strings, even when they only contain a single string chunk (#94) +* char literals are wrapped in the `K"char"` kind, containing the character literal string along with their delimiters (#121) +* backticks use the `K"cmdstring"` kind +* `var""` syntax uses `K"var"` as the head (#127) +* The parser splits triple quoted strings into string chunks interspersed with whitespace trivia + +### Improvements for AST inconsistencies + +* Dotted call syntax like `f.(a,b)` and `a .+ b` has been made consistent with the `K"dotcall"` head (#90) +* Standalone dotted operators are always parsed as `(. op)`. For example `.*(x,y)` is parsed as `(call (. *) x y)` (#240) +* The `K"="` kind is used for keyword syntax rather than `kw`, to avoid various inconsistencies and ambiguities (#103) +* Unadorned postfix adjoint is parsed as `call` rather than as a syntactic operator for consistency with suffixed versions like `x'ᵀ` (#124) + +### Improvements to awkward AST forms + +* Frakentuples with multiple parameter blocks like `(a=1, b=2; c=3; d=4)` are flattened into the parent tuple instead of using nested `K"parameters"` nodes (#133) +* Using `try catch else finally end` is parsed with `K"catch"` `K"else"` and `K"finally"` children to avoid the awkwardness of the optional child nodes in the `Expr` representation (#234) +* The dotted import path syntax as in `import A.b.c` is parsed with a `K"importpath"` kind rather than `K"."`, because a bare `A.b.c` has a very different nested/quoted expression representation (#244) +* We use flags rather than child nodes to represent the difference between `struct` and `mutable struct`, `module` and `baremodule` (#220) +* Multiple iterations within the header of a `for`, as in `for a=as, b=bs body end` are represented with a `cartesian_iterator` head rather than a `block`, as these lists of iterators are neither semantically nor syntactically a sequence of statements. Unlike other uses of `block` (see also generators). + + +## More detail on tree differences + +### Generators + +Flattened generators are uniquely problematic because the Julia AST doesn't +respect a key rule we normally expect: that the children of an AST node are a +*contiguous* range in the source text. For example, the `for`s in +`[xy for x in xs for y in ys]` are parsed in the normal order of a for loop to +mean + +``` +for x in xs +for y in ys + push!(xy, collection) +``` + +so the `xy` prefix is in the *body* of the innermost for loop. Following this, +the standard Julia AST is like so: + +``` +(flatten + (generator + (generator + xy + (= y ys)) + (= x xs))) +``` + +however, note that if this tree were flattened, the order would be +`(xy) (y in ys) (x in xs)` and the `x` and `y` iterations are *opposite* of the +source order. + +However, our green tree is strictly source-ordered, so we must deviate from the +Julia AST. We deal with this by grouping cartesian products of iterators +(separated by commas) within `cartesian_iterator` blocks as in `for` loops, and +use the presence of multiple iterator blocks rather than the `flatten` head to +distinguish flattened iterators. The nested flattens and generators of `Expr` +forms are reconstructed later. In this form the tree structure resembles the +source much more closely. For example, `(xy for x in xs for y in ys)` is parsed as + +``` +(generator + xy + (= x xs) + (= y ys)) +``` + +And the cartesian iteration `(xy for x in xs, y in ys)` is parsed as + +``` +(generator + xy + (cartesian_iterator + (= x xs) + (= y ys))) +``` + +### Whitespace trivia inside strings + +For triple quoted strings, the indentation isn't part of the string data so +should also be excluded from the string content within the green tree. That is, +it should be treated as separate whitespace trivia tokens. With this separation +things like formatting should be much easier. The same reasoning goes for +escaping newlines and following whitespace with backslashes in normal strings. + +Detecting string trivia during parsing means that string content is split over +several tokens. Here we wrap these in the K"string" kind (as is already used +for interpolations). The individual chunks can then be reassembled during Expr +construction. (A possible alternative might be to reuse the K"String" and +K"CmdString" kinds for groups of string chunks (without interpolation).) + +Take as an example the following Julia fragment. + +```julia +x = """ + $a + b""" +``` + +Here this is parsed as `(= x (string-s a "\n" "b"))` (the `-s` flag in +`string-s` means "triple quoted string") + +Looking at the green tree, we see the indentation before the `$a` and `b` are +marked as trivia: + +``` +julia> text = "x = \"\"\"\n \$a\n b\"\"\"" + show(stdout, MIME"text/plain"(), parseall(GreenNode, text, rule=:statement), text) + 1:23 │[=] + 1:1 │ Identifier ✔ "x" + 2:2 │ Whitespace " " + 3:3 │ = "=" + 4:4 │ Whitespace " " + 5:23 │ [string] + 5:7 │ """ "\"\"\"" + 8:8 │ String "\n" + 9:12 │ Whitespace " " + 13:13 │ $ "\$" + 14:14 │ Identifier ✔ "a" + 15:15 │ String ✔ "\n" + 16:19 │ Whitespace " " + 20:20 │ String ✔ "b" + 21:23 │ """ "\"\"\"" +``` + +### String nodes always wrapped in `K"string"` or `K"cmdstring"` + +All strings are surrounded by a node of kind `K"string"`, even non-interpolated +literals, so `"x"` parses as `(string "x")`. This makes string handling simpler +and more systematic because interpolations and triple strings with embedded +trivia don't need to be treated differently. It also gives a container in which +to attach the delimiting quotes. + +The same goes for command strings which are always wrapped in `K"cmdstring"` +regardless of whether they have multiple pieces (due to triple-quoted +dedenting) or otherwise. + +### No desugaring of the closure in do blocks + +The reference parser represents `do` syntax with a closure for the second +argument. That is, + +```julia +f(x) do y + body +end +``` + +becomes `(do (call f x) (-> (tuple y) (block body)))` in the reference parser. + +However, the nested closure with `->` head is implied here rather than present +in the surface syntax, which suggests this is a premature desugaring step. +Instead we emit the flatter structure `(do (call f x) (tuple y) (block body))`. + + +## Tree structure reference + +This section may eventually contain a full description of the Julia AST. For +now, we describe a few of the more subtle features. + +### Concatenation syntax + +Concatenation syntax comes in two syntax forms: +* The traditional `hcat`/`vcat`/`row` which deal with concatenation or matrix + construction along dimensions one and two. +* The new `ncat`/`nrow` syntax which deals with concatenation or array + construction along arbitrary dimensions. + +We write `ncat-3` for concatenation along the third dimension. (The `3` is +stored in the head flags for `SyntaxNode` trees, and in the first `arg` for +`Expr` trees.) Semantically the new syntax can work like the old: +* `ncat-1` is the same as `vcat` +* `ncat-2` is the same as `hcat` +* `row` is the same as `nrow-2` + +#### Vertical concatenation (dimension 1) + +Vertical concatenation along dimension 1 can be done with semicolons or newlines + +```julia +julia> print_tree(:([a + b])) +Expr(:vcat) +├─ :a +└─ :b + +julia> print_tree(:([a ; b])) +Expr(:vcat) +├─ :a +└─ :b +``` + +#### Horizontal concatenation (dimension 2) + +For horizontal concatenation along dimension 2, use spaces or double semicolons + +```julia +julia> print_tree(:([a b])) +Expr(:hcat) +├─ :a +└─ :b + +julia> print_tree(:([a ;; b])) +Expr(:ncat) +├─ 2 +├─ :a +└─ :b +``` + +#### Mixed concatenation + +Concatenation along dimensions 1 and 2 can be done with spaces and single +semicolons or newlines, producing a mixture of `vcat` and `row` expressions: + +```julia +julia> print_tree(:([a b + c d])) +# OR +julia> print_tree(:([a b ; c d])) +Expr(:vcat) +├─ Expr(:row) +│ ├─ :a +│ └─ :b +└─ Expr(:row) + ├─ :c + └─ :d +``` + +General n-dimensional concatenation results in nested `ncat` and `nrow`, for +example + +```julia +julia> print_tree(:([a ; b ;; c ; d ;;; x])) +Expr(:ncat) +├─ 3 +├─ Expr(:nrow) +│ ├─ 2 +│ ├─ Expr(:nrow) +│ │ ├─ 1 +│ │ ├─ :a +│ │ └─ :b +│ └─ Expr(:nrow) +│ ├─ 1 +│ ├─ :c +│ └─ :d +└─ :x ``` diff --git a/JuliaSyntax/src/green_tree.jl b/JuliaSyntax/src/green_tree.jl index 37c814c2d6035..28b3f3fb37518 100644 --- a/JuliaSyntax/src/green_tree.jl +++ b/JuliaSyntax/src/green_tree.jl @@ -2,9 +2,8 @@ GreenNode(head, span) GreenNode(head, children...) -A "green tree" in Roslyn (C# compiler) terminology is a lossless syntax tree -which overlays all the source text. The most basic properties of a green tree -are that: +A "green tree" is a lossless syntax tree which overlays all the source text. +The most basic properties of a green tree are that: * Nodes cover a contiguous span of bytes in the text * Sibling nodes are ordered in the same order as the text @@ -20,22 +19,6 @@ As implementation choices, we choose that: * For simplicity and uniformity, leaf nodes cover a single token in the source. This is like rust-analyzer, but different from Roslyn where leaves can include syntax trivia. - -Design principles: -* Tree should remember what the lexer and parser knew about the source code -* Be position-independent so nodes can be interned and reused -* Be a low level textural overlay which is language independent. - -Design alternatives to explore: -* Maybe allow some loss of local parser state if it can be derived again - quickly? Particularly in the ordering of children. -* Store strings for tokens? (Surprisingly, rust-analyzer does this. It could be - efficient if the strings or nodes are interned for the parsing session?) -* Never construct this tree? Instead serialize it to Vector{UInt8} in an - efficient but compact format? Could this be more flexible with storing parser - state and beat the interning approach? We could also store the source tokens - in the serialization and discard the source text. (Caveat - unclear that this - could deal with incremental parsing...) """ struct GreenNode{Head} head::Head diff --git a/JuliaSyntax/src/hooks.jl b/JuliaSyntax/src/hooks.jl index 307e7aa518c37..db16205898a20 100644 --- a/JuliaSyntax/src/hooks.jl +++ b/JuliaSyntax/src/hooks.jl @@ -290,7 +290,7 @@ end const _default_parser = _has_v1_6_hooks ? Core._parse : nothing """ - enable_in_core!([enable=true; freeze_world_age, debug_filename]) + enable_in_core!([enable=true; freeze_world_age=true, debug_filename=nothing]) Connect the JuliaSyntax parser to the Julia runtime so that it replaces the flisp parser for all parsing work. That is, JuliaSyntax will be used for diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index e8ce3a503f1bd..9558f9bf9c449 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -44,6 +44,13 @@ struct SyntaxData <: AbstractSyntaxData val::Any end +""" + SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}; + keep_parens=false, position::Integer=1) + +An AST node with a similar layout to `Expr`. Typically constructed from source +text by calling one of the parser API functions such as [`parseall`](@ref) +""" const SyntaxNode = TreeNode{SyntaxData} # Value of an error node with no children From dd449cd37879f25376a38d22287bad9535e1275c Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Mon, 29 May 2023 20:01:35 +1000 Subject: [PATCH 0681/1109] Fix URLs in doc make.jl --- JuliaSyntax/docs/make.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/JuliaSyntax/docs/make.jl b/JuliaSyntax/docs/make.jl index ab2417e752c9b..79376b46fc5dc 100644 --- a/JuliaSyntax/docs/make.jl +++ b/JuliaSyntax/docs/make.jl @@ -12,12 +12,12 @@ makedocs(; ] "Design Discussion" => "design.md" ], - repo="https://github.com/c42f/JuliaSyntax.jl/blob/{commit}{path}#L{line}", + repo="https://github.com/JuliaLang/JuliaSyntax.jl/blob/{commit}{path}#L{line}", sitename="JuliaSyntax.jl", - authors = "Chris Foster and contributors: https://github.com/c42f/JuliaSyntax.jl/graphs/contributors" + authors = "Claire Foster and contributors: https://github.com/JuliaLang/JuliaSyntax.jl/graphs/contributors" ) deploydocs(; - repo="github.com/c42f/JuliaSyntax.jl", + repo="github.com/JuliaLang/JuliaSyntax.jl", push_preview=true ) From c9bb3f3f127314824cec0cafa9e68d8e345c965f Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sat, 3 Jun 2023 12:29:09 +1000 Subject: [PATCH 0682/1109] Add trailing line numbers to loops in Expr conversion (JuliaLang/JuliaSyntax.jl#298) These trailing line numbers are used to attribute coverage to the `end` of the loop for parts of the loop header which run there after lowering. --- JuliaSyntax/src/expr.jl | 7 +++++++ JuliaSyntax/test/expr.jl | 43 ++++++++++++++++++++++++++++++++++++++-- 2 files changed, 48 insertions(+), 2 deletions(-) diff --git a/JuliaSyntax/src/expr.jl b/JuliaSyntax/src/expr.jl index e39400d1fdd96..681a642cdfc7c 100644 --- a/JuliaSyntax/src/expr.jl +++ b/JuliaSyntax/src/expr.jl @@ -195,6 +195,7 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads, end loc = source_location(LineNumberNode, source, first(srcrange)) + endloc = source_location(LineNumberNode, source, last(srcrange)) _fixup_Expr_children!(head, loc, args) @@ -270,6 +271,12 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads, if @isexpr(a1, :cartesian_iterator) args[1] = Expr(:block, a1.args...) end + # Add extra line number node for the `end` of the block. This may seem + # useless but it affects code coverage. + push!(args[2].args, endloc) + elseif k == K"while" + # Line number node for the `end` of the block as in `for` loops. + push!(args[2].args, endloc) elseif k in KSet"tuple vect braces" # Move parameters blocks to args[1] _reorder_parameters!(args, 1) diff --git a/JuliaSyntax/test/expr.jl b/JuliaSyntax/test/expr.jl index 98ab04f7484ab..9d6d29dfcf53c 100644 --- a/JuliaSyntax/test/expr.jl +++ b/JuliaSyntax/test/expr.jl @@ -158,6 +158,43 @@ ) ) end + + @testset "Loops" begin + @test parsestmt("for x=xs\n\nend") == + Expr(:for, + Expr(:(=), :x, :xs), + Expr(:block, + LineNumberNode(1), + LineNumberNode(3) + ) + ) + @test parsestmt("for x=xs\ny\nend") == + Expr(:for, + Expr(:(=), :x, :xs), + Expr(:block, + LineNumberNode(2), + :y, + LineNumberNode(3) + ) + ) + @test parsestmt("while cond\n\nend") == + Expr(:while, + :cond, + Expr(:block, + LineNumberNode(1), + LineNumberNode(3) + ) + ) + @test parsestmt("while cond\ny\nend") == + Expr(:while, + :cond, + Expr(:block, + LineNumberNode(2), + :y, + LineNumberNode(3) + ) + ) + end end @testset "Short form function line numbers" begin @@ -213,7 +250,8 @@ Expr(:(=), :i, :is), Expr(:block, LineNumberNode(1), - :body + :body, + LineNumberNode(1) ) ) @test parsestmt("for i=is, j=js\nbody\nend") == @@ -224,7 +262,8 @@ ), Expr(:block, LineNumberNode(2), - :body + :body, + LineNumberNode(3), ) ) end From 908923acfe54b51ababbcd9a4c879d55aab55dc4 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sat, 3 Jun 2023 17:50:33 +1000 Subject: [PATCH 0683/1109] Small fix to make JuliaSyntax work when Int === Int32 (JuliaLang/JuliaSyntax.jl#299) Also add 32 bit arches to CI. --- JuliaSyntax/.github/workflows/CI.yml | 15 ++++++++++++++- JuliaSyntax/src/syntax_tree.jl | 2 +- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/JuliaSyntax/.github/workflows/CI.yml b/JuliaSyntax/.github/workflows/CI.yml index faa5a2eafa55d..588a3264adc7a 100644 --- a/JuliaSyntax/.github/workflows/CI.yml +++ b/JuliaSyntax/.github/workflows/CI.yml @@ -22,6 +22,7 @@ jobs: - '1.5' - '1.6' - '1.7' + - '1.8' - '1' - 'nightly' os: @@ -29,9 +30,10 @@ jobs: - macOS-latest - windows-latest arch: + - x86 - x64 exclude: - # Test all OS's on + # Test all OS's and arch possibilities on # - 1.0 # - 1.6 # - 1 @@ -44,12 +46,23 @@ jobs: - {os: 'macOS-latest', version: '1.4'} - {os: 'macOS-latest', version: '1.5'} - {os: 'macOS-latest', version: '1.7'} + - {os: 'macOS-latest', version: '1.8'} + # MacOS not available on x86 + - {os: 'macOS-latest', arch: 'x86'} - {os: 'windows-latest', version: '1.1'} - {os: 'windows-latest', version: '1.2'} - {os: 'windows-latest', version: '1.3'} - {os: 'windows-latest', version: '1.4'} - {os: 'windows-latest', version: '1.5'} - {os: 'windows-latest', version: '1.7'} + - {os: 'windows-latest', version: '1.8'} + - {os: 'ubuntu-latest', version: '1.1', arch: 'x86'} + - {os: 'ubuntu-latest', version: '1.2', arch: 'x86'} + - {os: 'ubuntu-latest', version: '1.3', arch: 'x86'} + - {os: 'ubuntu-latest', version: '1.4', arch: 'x86'} + - {os: 'ubuntu-latest', version: '1.5', arch: 'x86'} + - {os: 'ubuntu-latest', version: '1.7', arch: 'x86'} + - {os: 'ubuntu-latest', version: '1.8', arch: 'x86'} steps: - uses: actions/checkout@v2 - uses: julia-actions/setup-julia@v1 diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index 9558f9bf9c449..87fa6c27fb88b 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -79,7 +79,7 @@ function _to_SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, if !is_trivia(rawchild) || is_error(rawchild) push!(cs, _to_SyntaxNode(source, rawchild, pos, keep_parens)) end - pos += rawchild.span + pos += Int(rawchild.span) end if !keep_parens && kind(raw) == K"parens" && length(cs) == 1 return cs[1] From 85579932c09f155f83678cf268e0491d294efd72 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sat, 3 Jun 2023 20:05:24 +1000 Subject: [PATCH 0684/1109] Add a few missing docstrings for API docs (JuliaLang/JuliaSyntax.jl#301) --- JuliaSyntax/docs/make.jl | 3 ++- JuliaSyntax/docs/src/api.md | 6 +++++- JuliaSyntax/src/parse_stream.jl | 18 ++++++++++++++++++ JuliaSyntax/src/source_files.jl | 4 ++++ JuliaSyntax/src/syntax_tree.jl | 6 ++++++ 5 files changed, 35 insertions(+), 2 deletions(-) diff --git a/JuliaSyntax/docs/make.jl b/JuliaSyntax/docs/make.jl index 79376b46fc5dc..c6dca55563025 100644 --- a/JuliaSyntax/docs/make.jl +++ b/JuliaSyntax/docs/make.jl @@ -14,7 +14,8 @@ makedocs(; ], repo="https://github.com/JuliaLang/JuliaSyntax.jl/blob/{commit}{path}#L{line}", sitename="JuliaSyntax.jl", - authors = "Claire Foster and contributors: https://github.com/JuliaLang/JuliaSyntax.jl/graphs/contributors" + authors = "Claire Foster and contributors: https://github.com/JuliaLang/JuliaSyntax.jl/graphs/contributors", + strict = Documenter.except(:missing_docs) ) deploydocs(; diff --git a/JuliaSyntax/docs/src/api.md b/JuliaSyntax/docs/src/api.md index c7678890167b2..045d64207752e 100644 --- a/JuliaSyntax/docs/src/api.md +++ b/JuliaSyntax/docs/src/api.md @@ -13,11 +13,13 @@ JuliaSyntax.parseatom The `ParseStream` interface which provides a low-level stream-like I/O interface for writing the parser. The parser does not depend on or produce any concrete tree data structure as part of the parsing phase but the output spans -can be post-processed into various tree data structures as required. +can be post-processed into various tree data structures as required using +[`JuliaSyntax.build_tree`](@ref). ```@docs JuliaSyntax.parse! JuliaSyntax.ParseStream +JuliaSyntax.build_tree ``` ## Tokenization @@ -41,6 +43,8 @@ JuliaSyntax.source_location ## Expression heads/kinds ```@docs +JuliaSyntax.Kind +JuliaSyntax.SyntaxHead JuliaSyntax.@K_str JuliaSyntax.kind JuliaSyntax.head diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index c41cc3bfdde48..d21260bafa1ad 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -69,12 +69,30 @@ end has_flags(flags::RawFlags, test_flags) = (flags & test_flags) != 0 #------------------------------------------------------------------------------- +""" + SyntaxHead(kind, flags) + +A `SyntaxHead` combines the [`Kind`](@ref) of a syntactic construct with a set +of flags. The kind defines the broad "type" of the syntactic construct, while +the flag bits compactly store more detailed information about the construct. +""" struct SyntaxHead kind::Kind flags::RawFlags end kind(head::SyntaxHead) = head.kind + +""" + flags(x) + +Return the flag bits of a syntactic construct. Prefer to query these with the +predicates `is_trivia`, `is_prefix_call`, `is_infix_op_call`, +`is_prefix_op_call`, `is_postfix_op_call`, `is_dotted`, `is_suffixed`, +`is_decorated`. + +Or extract numeric portion of the flags with `numeric_flags`. +""" flags(head::SyntaxHead) = head.flags function Base.summary(head::SyntaxHead) diff --git a/JuliaSyntax/src/source_files.jl b/JuliaSyntax/src/source_files.jl index c8e2a3983884b..e15a419c5b35c 100644 --- a/JuliaSyntax/src/source_files.jl +++ b/JuliaSyntax/src/source_files.jl @@ -40,6 +40,10 @@ function _source_line_index(source::SourceFile, byte_index) return (lineidx < lastindex(source.line_starts)) ? lineidx : lineidx-1 end _source_line(source::SourceFile, lineidx) = lineidx + source.first_line - 1 + +""" +Get the line number at the given byte index. +""" source_line(source::SourceFile, byte_index) = _source_line(source, _source_line_index(source, byte_index)) """ diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index 87fa6c27fb88b..62fe8eb382782 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -96,6 +96,12 @@ haschildren(node::TreeNode) = node.children !== nothing children(node::TreeNode) = (c = node.children; return c === nothing ? () : c) +""" + head(x) + +Get the [`SyntaxHead`](@ref) of a node of a tree or other syntax-related data +structure. +""" head(node::AbstractSyntaxNode) = head(node.raw) span(node::AbstractSyntaxNode) = span(node.raw) From 9cd7939e408bda8ae910a6f92a4fb0637f0327af Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Tue, 6 Jun 2023 20:45:39 +1000 Subject: [PATCH 0685/1109] Improvements to General registry test tools (JuliaLang/JuliaSyntax.jl#302) * Use a separate directory for tar files vs unpacked packages * Avoid crashing with overlong corrupt symlinks in packages * When updating the package cache, delete older versions of the same packages. * Accept a path in the check_all_packages script --- JuliaSyntax/test/test_utils.jl | 2 +- JuliaSyntax/tools/check_all_packages.jl | 13 +++++--- JuliaSyntax/tools/untar_packages.jl | 42 +++++++++++++++++++------ 3 files changed, 42 insertions(+), 15 deletions(-) diff --git a/JuliaSyntax/test/test_utils.jl b/JuliaSyntax/test/test_utils.jl index de80ed1a96200..7b218f6096e7a 100644 --- a/JuliaSyntax/test/test_utils.jl +++ b/JuliaSyntax/test/test_utils.jl @@ -211,7 +211,7 @@ function find_source_in_path(basedir) src_list = String[] for (root, dirs, files) in walkdir(basedir) append!(src_list, (joinpath(root, f) for f in files - if endswith(f, ".jl") && isfile(joinpath(root,f)))) + if endswith(f, ".jl") && (p = joinpath(root,f); !islink(p) && isfile(p)))) end src_list end diff --git a/JuliaSyntax/tools/check_all_packages.jl b/JuliaSyntax/tools/check_all_packages.jl index 0fad0c30d2612..08b49e680e334 100644 --- a/JuliaSyntax/tools/check_all_packages.jl +++ b/JuliaSyntax/tools/check_all_packages.jl @@ -8,8 +8,9 @@ using JuliaSyntax, Logging, TerminalLoggers, ProgressLogging, Serialization include("../test/test_utils.jl") include("../test/fuzz_test.jl") -pkgspath = joinpath(@__DIR__, "pkgs") -source_paths = find_source_in_path(pkgspath) +srcpath = isempty(ARGS) ? joinpath(@__DIR__, "pkgs") : ARGS[1] +source_paths = find_source_in_path(srcpath) + file_count = length(source_paths) exception_count = 0 @@ -25,8 +26,12 @@ Logging.with_logger(TerminalLogger()) do @logprogress ifile/file_count time_ms=round((time() - t0)/ifile*1000, digits = 2) text = read(fpath, String) expr_cache = fpath*".Expr" - #e2 = JuliaSyntax.fl_parseall(text) - e2 = open(deserialize, fpath*".Expr") + e2 = if isfile(expr_cache) + open(deserialize, fpath*".Expr") + else + @warn "Expr cache not found, parsing using reference parser" expr_cache maxlog=1 + JuliaSyntax.fl_parseall(text, filename=fpath) + end @assert Meta.isexpr(e2, :toplevel) try e1 = JuliaSyntax.parseall(Expr, text, filename=fpath, ignore_warnings=true) diff --git a/JuliaSyntax/tools/untar_packages.jl b/JuliaSyntax/tools/untar_packages.jl index 7d2507dc3e622..2c6986890bb63 100644 --- a/JuliaSyntax/tools/untar_packages.jl +++ b/JuliaSyntax/tools/untar_packages.jl @@ -2,16 +2,37 @@ using Serialization using JuliaSyntax pkgspath = joinpath(@__DIR__, "pkgs") +tarspath = joinpath(@__DIR__, "pkg_tars") -for tars in Iterators.partition(readdir(pkgspath), 50) - @sync for tar in tars - endswith(tar, ".tgz") || continue +mkpath(pkgspath) +mkpath(tarspath) + +tar_info = [(m = match(r"(.*)_(\d+\.\d+\.\d+.*)\.tgz$", f); (f, m[1], VersionNumber(m[2]))) + for f in readdir(tarspath) if endswith(f, ".tgz")] + +tar_maxver = Dict{String,VersionNumber}() +for (_,name,ver) in tar_info + v = get(tar_maxver, name, v"0.0.0") + if v < ver + tar_maxver[name] = ver + end +end + +@info "# Untarring packages" + +for tinfos in Iterators.partition(tar_info, 50) + @sync for (tarname, pkgname, pkgver) in tinfos @async begin - dir = joinpath(@__DIR__, "pkgs", replace(tar, r"\.tgz$" => "")) - if !isdir(dir) || !isdir(joinpath(dir, "src")) + dir = joinpath(pkgspath, "$(pkgname)_$(pkgver)") + if pkgver != tar_maxver[pkgname] + if isdir(dir) + # Clean up old packages + rm(dir; recursive=true, force=true) + end + elseif !isdir(dir) || !isdir(joinpath(dir, "src")) rm(dir; recursive=true, force=true) mkpath(dir) - tar_path = joinpath(@__DIR__, "pkgs", tar) + tar_path = joinpath(tarspath, tarname) try run(`tar -xf $tar_path -C $dir`) catch err @@ -22,20 +43,21 @@ for tars in Iterators.partition(readdir(pkgspath), 50) end end -@info "Parsing files with reference parser" +@info "# Parsing files with reference parser" -let i = 0 +let i = 0, tot_files = 0 for (r, _, files) in walkdir(pkgspath) for f in files + tot_files += 1 endswith(f, ".jl") || continue fpath = joinpath(r, f) outpath = joinpath(r, f*".Expr") - if isfile(fpath) + if !islink(fpath) && isfile(fpath) && !isfile(outpath) code = read(fpath, String) fl_ex = JuliaSyntax.fl_parseall(code, filename=fpath) i += 1 if i % 100 == 0 - @info "$i files parsed" + @info "$i/$tot_files files parsed" end open(outpath, "w") do io serialize(io, fl_ex) From bf59985ea935b040c1251af53a288d5876f045fa Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Wed, 7 Jun 2023 10:53:51 +1000 Subject: [PATCH 0686/1109] Normalize `global (x,y)` syntax during `Expr` conversion (JuliaLang/JuliaSyntax.jl#303) The reference parser accepts this syntax and normalizes it eagerly. So we should do the same rather than making it part of fuzzy Expr comparison later. --- JuliaSyntax/src/expr.jl | 14 ++++++++++---- JuliaSyntax/test/expr.jl | 4 ++++ JuliaSyntax/test/test_utils.jl | 5 +---- JuliaSyntax/test/test_utils_tests.jl | 5 ----- 4 files changed, 15 insertions(+), 13 deletions(-) diff --git a/JuliaSyntax/src/expr.jl b/JuliaSyntax/src/expr.jl index 681a642cdfc7c..09416874fb51d 100644 --- a/JuliaSyntax/src/expr.jl +++ b/JuliaSyntax/src/expr.jl @@ -411,10 +411,16 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads, end end elseif k == K"local" || k === K"global" - if length(args) == 1 && (a1 = args[1]; @isexpr(a1, :const)) - # Normalize `local const` to `const local` - args[1] = Expr(headsym, (a1::Expr).args...) - headsym = :const + if length(args) == 1 + a1 = args[1] + if @isexpr(a1, :const) + # Normalize `local const` to `const local` + args[1] = Expr(headsym, (a1::Expr).args...) + headsym = :const + elseif @isexpr(a1, :tuple) + # Normalize `global (x, y)` to `global x, y` + args = a1.args + end end elseif k == K"return" && isempty(args) push!(args, nothing) diff --git a/JuliaSyntax/test/expr.jl b/JuliaSyntax/test/expr.jl index 9d6d29dfcf53c..073cd29dcabf3 100644 --- a/JuliaSyntax/test/expr.jl +++ b/JuliaSyntax/test/expr.jl @@ -670,6 +670,10 @@ @test parsestmt("const x = 1") == Expr(:const, Expr(:(=), :x, 1)) @test parsestmt("global x ~ 1") == Expr(:global, Expr(:call, :~, :x, 1)) @test parsestmt("global x += 1") == Expr(:global, Expr(:+=, :x, 1)) + + # Parsing of global/local with + @test parsestmt("global (x,y)") == Expr(:global, :x, :y) + @test parsestmt("local (x,y)") == Expr(:local, :x, :y) end @testset "tuples" begin diff --git a/JuliaSyntax/test/test_utils.jl b/JuliaSyntax/test/test_utils.jl index 7b218f6096e7a..e3228eaa79bb6 100644 --- a/JuliaSyntax/test/test_utils.jl +++ b/JuliaSyntax/test/test_utils.jl @@ -133,10 +133,7 @@ function exprs_roughly_equal(fl_ex, ex) return false end h = ex.head - if (h == :global || h == :local) && length(args) == 1 && Meta.isexpr(args[1], :tuple) - # Allow invalid syntax like `global (x, y)` - args = args[1].args - elseif h == :function && Meta.isexpr(fl_args[1], :block) + if h == :function && Meta.isexpr(fl_args[1], :block) blockargs = filter(x->!(x isa LineNumberNode), fl_args[1].args) posargs = blockargs[1:max(0, length(blockargs))] kwargs = blockargs[2:end] diff --git a/JuliaSyntax/test/test_utils_tests.jl b/JuliaSyntax/test/test_utils_tests.jl index c0a0f3cd8ab34..8c68f068a10ed 100644 --- a/JuliaSyntax/test/test_utils_tests.jl +++ b/JuliaSyntax/test/test_utils_tests.jl @@ -1,11 +1,6 @@ # Tests for the test_utils go here to allow the utils to be included on their # own without invoking the tests. @testset "Reference parser bugs" begin - # `global (x,y)` - @test exprs_roughly_equal(Expr(:global, :x, :y), - Expr(:global, Expr(:tuple, :x, :y))) - @test exprs_roughly_equal(Expr(:local, :x, :y), - Expr(:local, Expr(:tuple, :x, :y))) # `0x1.8p0f` @test exprs_roughly_equal(1.5, Expr(:call, :*, 1.5, :f)) From ff8f7cae7aa340d6b3781caf19b9e8a0cf16cb34 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Fri, 9 Jun 2023 04:03:05 +1000 Subject: [PATCH 0687/1109] Allow parsing of custom string types in core parser hook (JuliaLang/JuliaSyntax.jl#304) The simplest method of doing this is to convert to String, so that's what we do here, using invokelatest. --- JuliaSyntax/src/hooks.jl | 4 ++++ JuliaSyntax/test/hooks.jl | 13 +++++++++++++ 2 files changed, 17 insertions(+) diff --git a/JuliaSyntax/src/hooks.jl b/JuliaSyntax/src/hooks.jl index db16205898a20..63a44628e43e2 100644 --- a/JuliaSyntax/src/hooks.jl +++ b/JuliaSyntax/src/hooks.jl @@ -143,6 +143,10 @@ function core_parser_hook(code, filename::String, lineno::Int, offset::Int, opti # The C entry points will pass us this form. (ptr,len) = code code = String(unsafe_wrap(Array, ptr, len)) + elseif !(code isa String || code isa SubString || code isa Vector{UInt8}) + # For non-Base string types, convert to UTF-8 encoding, using an + # invokelatest to avoid world age issues. + code = Base.invokelatest(String, code) end if !isnothing(_debug_log[]) print(_debug_log[], """ diff --git a/JuliaSyntax/test/hooks.jl b/JuliaSyntax/test/hooks.jl index c999793c053a6..8665492da2dd1 100644 --- a/JuliaSyntax/test/hooks.jl +++ b/JuliaSyntax/test/hooks.jl @@ -76,6 +76,19 @@ @test_throws JuliaSyntax.ParseError Meta.parse("[x)") end + # Check custom string types defined in a world age later than + # enable_in_core!() can be passed to Meta.parse() + mystr = @eval begin + struct MyString <: AbstractString + x::String + end + Base.String(s::MyString) = s.x + Base.ncodeunits(s::MyString) = ncodeunits(s.x) + + MyString("hi") + end + @test Meta.parse(mystr) == :hi + JuliaSyntax.enable_in_core!(false) end From fb5a703d9be3c0cf0b49aa469d8a6967073207d9 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sat, 10 Jun 2023 06:53:17 +1000 Subject: [PATCH 0688/1109] Add error for invalid string interpolation syntax (JuliaLang/JuliaSyntax.jl#306) Commas and semicolons are not allowed within string interpolation brackets. For example `"$(x, digits=2)"` is reserved syntax. --- JuliaSyntax/src/parse_stream.jl | 2 +- JuliaSyntax/src/parser.jl | 8 +++++++- JuliaSyntax/test/parser.jl | 2 ++ 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index d21260bafa1ad..f37a1f590feb4 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -534,7 +534,7 @@ function peek_full_token(stream::ParseStream, n::Integer=1; end """ - peek_behind(ps; skip_trivia=true) + peek_behind(ps; skip_trivia=true, skip_parens=true) peek_behind(ps, pos::ParseStreamPosition) Return information about a span which was previously inserted into the output, diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 72fbd1d3f4f60..385c0da216a93 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -2097,7 +2097,8 @@ function parse_function_signature(ps::ParseState, is_function::Bool) is_empty_tuple = peek(ps, skip_newlines=true) == K")" opts = parse_brackets(ps, K")") do _, _, _, _ _parsed_call = was_eventually_call(ps) - _is_anon_func = peek(ps, 2) ∉ KSet"( ." && !_parsed_call + t2 = peek_token(ps, 2) + _is_anon_func = kind(t2) ∉ KSet"( ." && !_parsed_call return (needs_parameters = _is_anon_func, is_anon_func = _is_anon_func, parsed_call = _parsed_call) @@ -3178,7 +3179,12 @@ function parse_string(ps::ParseState, raw::Bool) if k == K"(" # "a $(x + y) b" ==> (string "a " (parens (call-i x + y)) " b") # "hi$("ho")" ==> (string "hi" (parens (string "ho"))) + m = position(ps) parse_atom(ps) + if peek_behind(ps, skip_parens=false).kind != K"parens" + # "$(x,y)" ==> (string (error (tuple-p x y))) + emit(ps, m, K"error", error="invalid interpolation syntax") + end elseif k == K"var" # var identifiers disabled in strings # "$var" ==> (string var) diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index b6a8d836e4cea..a1e71ce4a0675 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -869,6 +869,8 @@ tests = [ "\"\"\"\n\$x\n a\"\"\"" => "(string-s x \"\\n\" \" a\")" "\"a \$(x + y) b\"" => "(string \"a \" (parens (call-i x + y)) \" b\")" "\"hi\$(\"ho\")\"" => "(string \"hi\" (parens (string \"ho\")))" + "\"\$(x,y)\"" => "(string (error (tuple-p x y)))" + "\"\$(x;y)\"" => "(string (error (block-p x y)))" "\"a \$foo b\"" => "(string \"a \" foo \" b\")" "\"\$var\"" => "(string var)" "\"\$outer\"" => "(string outer)" From e20afd9e2ad1522498867e68015febd58551c59c Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sat, 10 Jun 2023 08:31:34 +1000 Subject: [PATCH 0689/1109] Also disallow bare generators in interpolation syntax (JuliaLang/JuliaSyntax.jl#307) This should be the only other case which needs to be disallowed - the reference parser uses parse_eq_star here and that's also what's used within parse_brackets with `,` `;` and `for` being the only allowed continuation tokens. Also refine error recovery a little and add additional tests. --- JuliaSyntax/src/parser.jl | 11 ++++++++--- JuliaSyntax/test/diagnostics.jl | 3 +++ JuliaSyntax/test/parser.jl | 7 +++++-- 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 385c0da216a93..675ac34c9615c 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -3180,11 +3180,16 @@ function parse_string(ps::ParseState, raw::Bool) # "a $(x + y) b" ==> (string "a " (parens (call-i x + y)) " b") # "hi$("ho")" ==> (string "hi" (parens (string "ho"))) m = position(ps) - parse_atom(ps) - if peek_behind(ps, skip_parens=false).kind != K"parens" - # "$(x,y)" ==> (string (error (tuple-p x y))) + bump(ps, TRIVIA_FLAG) + opts = parse_brackets(ps, K")") do had_commas, had_splat, num_semis, num_subexprs + return (needs_parameters=false, + simple_interp=!had_commas && num_semis == 0 && num_subexprs == 1) + end + if !opts.simple_interp || peek_behind(ps, skip_parens=false).kind == K"generator" + # "$(x,y)" ==> (string (parens (error x y))) emit(ps, m, K"error", error="invalid interpolation syntax") end + emit(ps, m, K"parens") elseif k == K"var" # var identifiers disabled in strings # "$var" ==> (string var) diff --git a/JuliaSyntax/test/diagnostics.jl b/JuliaSyntax/test/diagnostics.jl index 87180d0dddca2..9bba7bb30eab1 100644 --- a/JuliaSyntax/test/diagnostics.jl +++ b/JuliaSyntax/test/diagnostics.jl @@ -96,6 +96,9 @@ end Diagnostic(2, 19, :error, "try without catch or finally") Diagnostic(20, 19, :error, "Expected `end`") ] + + @test diagnostic("\"\$(x,y)\"") == + Diagnostic(3, 7, :error, "invalid interpolation syntax") end @testset "parser warnings" begin diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index a1e71ce4a0675..a7f3103ab4907 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -869,8 +869,11 @@ tests = [ "\"\"\"\n\$x\n a\"\"\"" => "(string-s x \"\\n\" \" a\")" "\"a \$(x + y) b\"" => "(string \"a \" (parens (call-i x + y)) \" b\")" "\"hi\$(\"ho\")\"" => "(string \"hi\" (parens (string \"ho\")))" - "\"\$(x,y)\"" => "(string (error (tuple-p x y)))" - "\"\$(x;y)\"" => "(string (error (block-p x y)))" + "\"\$(x,y)\"" => "(string (parens (error x y)))" + "\"\$(x;y)\"" => "(string (parens (error x y)))" + "\"\$(x for y in z)\"" => "(string (parens (error (generator x (= y z)))))" + "\"\$((x for y in z))\"" => "(string (parens (parens (generator x (= y z)))))" + "\"\$(xs...)\"" => "(string (parens (... xs)))" "\"a \$foo b\"" => "(string \"a \" foo \" b\")" "\"\$var\"" => "(string var)" "\"\$outer\"" => "(string outer)" From aa12a80b04563afe8bb4b400e113f9f5ab3a6eca Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sun, 11 Jun 2023 08:32:20 +1000 Subject: [PATCH 0690/1109] Parse `function (:*=(f))() end` syntax compatibly (JuliaLang/JuliaSyntax.jl#309) Slightly modify the anonymous function arg list disambiguation rules so that the function signature in this syntax parses as (call (call (quote *=) f)) The package NiLang uses this, so we'd like to keep it working. --- JuliaSyntax/src/parser.jl | 27 +++++++++++++------------ JuliaSyntax/test/parser.jl | 1 + JuliaSyntax/tools/check_all_packages.jl | 4 ++-- 3 files changed, 17 insertions(+), 15 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 675ac34c9615c..df17899bf9f0e 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -2064,6 +2064,7 @@ end function parse_function_signature(ps::ParseState, is_function::Bool) is_anon_func = false parsed_call = false + needs_parse_call = true mark = position(ps) if !is_function @@ -2082,29 +2083,28 @@ function parse_function_signature(ps::ParseState, is_function::Bool) end else if peek(ps) != K"(" + # function f() end ==> (function (call f)) parse_unary_prefix(ps) else - # When an initial parenthesis is present, we might either have - # * the function name in parens, followed by (args...) - # * an anonymous function argument list in parens - # * the whole function declaration in parens - # - # This should somewhat parse as in parse_paren() (this is what - # the flisp parser does), but that results in weird parsing of - # keyword parameters. So we peek at a following `(` instead to - # distinguish the cases here. + # When an initial parenthesis is present, we need to distinguish + # between + # * The function name in parens, followed by (args...) + # * An anonymous function argument list in parens + # * The whole function declaration, in parens bump(ps, TRIVIA_FLAG) is_empty_tuple = peek(ps, skip_newlines=true) == K")" opts = parse_brackets(ps, K")") do _, _, _, _ _parsed_call = was_eventually_call(ps) - t2 = peek_token(ps, 2) - _is_anon_func = kind(t2) ∉ KSet"( ." && !_parsed_call + _needs_parse_call = peek(ps, 2) ∈ KSet"( ." + _is_anon_func = !_needs_parse_call && !_parsed_call return (needs_parameters = _is_anon_func, is_anon_func = _is_anon_func, - parsed_call = _parsed_call) + parsed_call = _parsed_call, + needs_parse_call = _needs_parse_call) end is_anon_func = opts.is_anon_func parsed_call = opts.parsed_call + needs_parse_call = opts.needs_parse_call if is_anon_func # function (x) body end ==> (function (tuple-p x) (block body)) # function (x::f()) end ==> (function (tuple-p (::-i x (call f))) (block)) @@ -2122,6 +2122,7 @@ function parse_function_signature(ps::ParseState, is_function::Bool) # function (:)() end ==> (function (call (parens :)) (block)) # function (x::T)() end ==> (function (call (parens (::-i x T))) (block)) # function (::T)() end ==> (function (call (parens (::-pre T))) (block)) + # function (:*=(f))() end ==> (function (call (parens (call (quote-: *=) f))) (block)) emit(ps, mark, K"parens", PARENS_FLAG) end end @@ -2142,7 +2143,7 @@ function parse_function_signature(ps::ParseState, is_function::Bool) if peek(ps, skip_newlines=true) == K"end" && !is_anon_func && !parsed_call return false end - if !is_anon_func && !parsed_call + if needs_parse_call # Parse function argument list # function f(x,y) end ==> (function (call f x y) (block)) # function f{T}() end ==> (function (call (curly f T)) (block)) diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index a7f3103ab4907..ccad1d9aff396 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -568,6 +568,7 @@ tests = [ "function (::g(x))() end" => "(function (call (parens (::-pre (call g x)))) (block))" "function (f::T{g(i)})() end" => "(function (call (parens (::-i f (curly T (call g i))))) (block))" "function (::T)() end" => "(function (call (parens (::-pre T))) (block))" + "function (:*=(f))() end" => "(function (call (parens (call (quote-: *=) f))) (block))" "function begin() end" => "(function (call (error begin)) (block))" "function f() end" => "(function (call f) (block))" "function type() end" => "(function (call type) (block))" diff --git a/JuliaSyntax/tools/check_all_packages.jl b/JuliaSyntax/tools/check_all_packages.jl index 08b49e680e334..0dd993d5ccef1 100644 --- a/JuliaSyntax/tools/check_all_packages.jl +++ b/JuliaSyntax/tools/check_all_packages.jl @@ -8,8 +8,8 @@ using JuliaSyntax, Logging, TerminalLoggers, ProgressLogging, Serialization include("../test/test_utils.jl") include("../test/fuzz_test.jl") -srcpath = isempty(ARGS) ? joinpath(@__DIR__, "pkgs") : ARGS[1] -source_paths = find_source_in_path(srcpath) +srcpaths = isempty(ARGS) ? [joinpath(@__DIR__, "pkgs")] : abspath.(ARGS) +source_paths = vcat(find_source_in_path.(srcpaths)...) file_count = length(source_paths) From 7c5d7899320cd63c9ce148e564e895d2ce5626a6 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sat, 17 Jun 2023 09:09:02 +1000 Subject: [PATCH 0691/1109] Fix line numbers with source fragments (JuliaLang/JuliaSyntax.jl#310) When parsing source code fragments incrementally with * `Meta.parse(str, index)` or * `parsestmt(str, index)` we must avoid scanning the rest of `str` for line numbers for efficiency. In this mode, the user is expected to provide `first_line` to "manually" specify which line number we're counting from. Admittedly this is a bit clunky and should be integrated better with SourceFile (which should also be renamed - see issue JuliaLang/JuliaSyntax.jl#190) but for now seems to be the most consistent way to approach things here. As part of the refactoring here, switch over to using `Vector{UInt8}` for literal parsing which makes parsing to `ParseStream` and `GreenNode` around 10% faster. --- JuliaSyntax/docs/src/api.md | 1 + JuliaSyntax/prototypes/simple_parser.jl | 2 +- JuliaSyntax/src/expr.jl | 13 +- JuliaSyntax/src/hooks.jl | 25 +--- JuliaSyntax/src/kinds.jl | 2 + JuliaSyntax/src/literal_parsing.jl | 172 ++++++++++++------------ JuliaSyntax/src/parse_stream.jl | 48 +++---- JuliaSyntax/src/parser_api.jl | 8 +- JuliaSyntax/src/precompile.jl | 1 + JuliaSyntax/src/source_files.jl | 51 ++++--- JuliaSyntax/src/syntax_tree.jl | 19 ++- JuliaSyntax/src/utils.jl | 10 +- JuliaSyntax/test/literal_parsing.jl | 5 +- JuliaSyntax/test/parser.jl | 21 +-- JuliaSyntax/test/parser_api.jl | 10 ++ JuliaSyntax/test/source_files.jl | 26 +++- JuliaSyntax/test/syntax_tree.jl | 7 + JuliaSyntax/test/test_utils.jl | 1 + 18 files changed, 230 insertions(+), 192 deletions(-) diff --git a/JuliaSyntax/docs/src/api.md b/JuliaSyntax/docs/src/api.md index 045d64207752e..8b7723870536f 100644 --- a/JuliaSyntax/docs/src/api.md +++ b/JuliaSyntax/docs/src/api.md @@ -38,6 +38,7 @@ JuliaSyntax.highlight JuliaSyntax.sourcetext JuliaSyntax.source_line JuliaSyntax.source_location +JuliaSyntax.source_line_range ``` ## Expression heads/kinds diff --git a/JuliaSyntax/prototypes/simple_parser.jl b/JuliaSyntax/prototypes/simple_parser.jl index 918f12ed5e210..06a408a26860a 100644 --- a/JuliaSyntax/prototypes/simple_parser.jl +++ b/JuliaSyntax/prototypes/simple_parser.jl @@ -140,7 +140,7 @@ end function parse_and_show(production::Function, code) st = ParseStream(code) production(st) - t = JuliaSyntax.build_tree(GreenNode, st, wrap_toplevel_as_kind=K"error") + t = JuliaSyntax.build_tree(GreenNode, st) show(stdout, MIME"text/plain"(), t, code, show_trivia=true) if !isempty(st.diagnostics) println() diff --git a/JuliaSyntax/src/expr.jl b/JuliaSyntax/src/expr.jl index 09416874fb51d..845939ace0a03 100644 --- a/JuliaSyntax/src/expr.jl +++ b/JuliaSyntax/src/expr.jl @@ -68,7 +68,7 @@ function _strip_parens(ex) end end -function _leaf_to_Expr(source, head, srcrange, node) +function _leaf_to_Expr(source, txtbuf, head, srcrange, node) k = kind(head) if k == K"core_@cmd" return GlobalRef(Core, Symbol("@cmd")) @@ -79,7 +79,7 @@ function _leaf_to_Expr(source, head, srcrange, node) Expr(:error) : Expr(:error, "$(_token_error_descriptions[k]): `$(source[srcrange])`") else - val = isnothing(node) ? parse_julia_literal(source, head, srcrange) : node.val + val = isnothing(node) ? parse_julia_literal(txtbuf, head, srcrange) : node.val if val isa Union{Int128,UInt128,BigInt} # Ignore the values of large integers and convert them back to # symbolic/textural form for compatibility with the Expr @@ -457,7 +457,9 @@ end function build_tree(::Type{Expr}, stream::ParseStream; filename=nothing, first_line=1, kws...) - source = SourceFile(sourcetext(stream), filename=filename, first_line=first_line) + source = SourceFile(sourcetext(stream), first_index=first_byte(stream), + filename=filename, first_line=first_line) + txtbuf = textbuf(stream) args = Any[] childranges = UnitRange{Int}[] childheads = SyntaxHead[] @@ -467,7 +469,7 @@ function build_tree(::Type{Expr}, stream::ParseStream; end k = kind(head) if isnothing(nodechildren) - ex = _leaf_to_Expr(source, head, srcrange, nothing) + ex = _leaf_to_Expr(source, txtbuf, head, srcrange, nothing) else resize!(childranges, length(nodechildren)) resize!(childheads, length(nodechildren)) @@ -487,7 +489,8 @@ end function _to_expr(node::SyntaxNode) if !haschildren(node) - return _leaf_to_Expr(node.source, head(node), range(node), node) + offset, txtbuf = _unsafe_wrap_substring(sourcetext(node.source)) + return _leaf_to_Expr(node.source, txtbuf, head(node), range(node) .+ offset, node) end cs = children(node) args = Any[_to_expr(c) for c in cs] diff --git a/JuliaSyntax/src/hooks.jl b/JuliaSyntax/src/hooks.jl index 63a44628e43e2..c50e89b9411da 100644 --- a/JuliaSyntax/src/hooks.jl +++ b/JuliaSyntax/src/hooks.jl @@ -157,10 +157,7 @@ function core_parser_hook(code, filename::String, lineno::Int, offset::Int, opti write(_debug_log[], code) end - io = IOBuffer(code) - seek(io, offset) - - stream = ParseStream(io) + stream = ParseStream(code, offset+1) if options === :statement || options === :atom # To copy the flisp parser driver: # * Parsing atoms consumes leading trivia @@ -179,9 +176,7 @@ function core_parser_hook(code, filename::String, lineno::Int, offset::Int, opti end if any_error(stream) - tree = build_tree(SyntaxNode, stream, - wrap_toplevel_as_kind=K"None", first_line=lineno, - filename=filename) + tree = build_tree(SyntaxNode, stream, first_line=lineno, filename=filename) tag = _incomplete_tag(tree, lastindex(code)) if _has_v1_10_hooks exc = ParseError(stream, filename=filename, first_line=lineno, @@ -233,13 +228,7 @@ function core_parser_hook(code, filename::String, lineno::Int, offset::Int, opti # # show_diagnostics(stdout, stream.diagnostics, code) # - ex = build_tree(Expr, stream; filename=filename, - wrap_toplevel_as_kind=K"None", first_line=lineno) - if @isexpr(ex, :None) - # The None wrapping is only to give somewhere for trivia to be - # attached; unwrap! - ex = only(ex.args) - end + ex = build_tree(Expr, stream; filename=filename, first_line=lineno) end # Note the next byte in 1-based indexing is `last_byte(stream) + 1` but @@ -291,15 +280,13 @@ else Base.Meta.ParseError(e::JuliaSyntax.ParseError) = e end -const _default_parser = _has_v1_6_hooks ? Core._parse : nothing - """ enable_in_core!([enable=true; freeze_world_age=true, debug_filename=nothing]) Connect the JuliaSyntax parser to the Julia runtime so that it replaces the flisp parser for all parsing work. That is, JuliaSyntax will be used for -`include()` `Meta.parse()`, the REPL, etc. To disable, set use -`enable_in_core!(false)`. +`include()` `Meta.parse()`, the REPL, etc. To reset to the reference parser, +use `enable_in_core!(false)`. Keyword arguments: * `freeze_world_age` - Use a fixed world age for the parser to prevent @@ -322,7 +309,7 @@ function enable_in_core!(enable=true; freeze_world_age = true, world_age = freeze_world_age ? Base.get_world_counter() : typemax(UInt) _set_core_parse_hook(fix_world_age(core_parser_hook, world_age)) else - _set_core_parse_hook(_default_parser) + _set_core_parse_hook(Core.Compiler.fl_parse) end nothing end diff --git a/JuliaSyntax/src/kinds.jl b/JuliaSyntax/src/kinds.jl index 6dcc35576da8d..54f37e88e5b0d 100644 --- a/JuliaSyntax/src/kinds.jl +++ b/JuliaSyntax/src/kinds.jl @@ -914,6 +914,8 @@ const _kind_names = "cartesian_iterator" "comprehension" "typed_comprehension" + # Container for a single statement/atom plus any trivia and errors + "wrapper" "END_SYNTAX_KINDS" ] diff --git a/JuliaSyntax/src/literal_parsing.jl b/JuliaSyntax/src/literal_parsing.jl index 67a7af6a230eb..5cd610ce1c87c 100644 --- a/JuliaSyntax/src/literal_parsing.jl +++ b/JuliaSyntax/src/literal_parsing.jl @@ -68,7 +68,7 @@ Like `Base.parse(Union{Float64,Float32}, str)`, but permits float underflow Parse a Float64. str[firstind:lastind] must be a valid floating point literal string. If the value is outside Float64 range. """ -function parse_float_literal(::Type{T}, str::String, +function parse_float_literal(::Type{T}, str::Union{String,SubString,Vector{UInt8}}, firstind::Integer, endind::Integer) where {T} # force specialize with where {T} strsize = endind - firstind bufsz = 50 @@ -172,70 +172,68 @@ end #------------------------------------------------------------------------------- -is_indentation(c) = c == ' ' || c == '\t' - """ Process Julia source code escape sequences for raw strings """ -function unescape_raw_string(io::IO, str::AbstractString, is_cmd::Bool) - delim = is_cmd ? '`' : '"' - i = firstindex(str) - lastidx = lastindex(str) - while i <= lastidx - c = str[i] - if c != '\\' - if c == '\r' +function unescape_raw_string(io::IO, txtbuf::Vector{UInt8}, + firstind, endind, is_cmd::Bool) + delim = is_cmd ? u8"`" : u8"\"" + i = firstind + while i < endind + c = txtbuf[i] + if c != u8"\\" + if c == u8"\r" # convert literal \r and \r\n in strings to \n (issue #11988) - if i+1 <= lastidx && str[i+1] == '\n' + if i+1 < endind && txtbuf[i+1] == u8"\n" i += 1 end - c = '\n' + c = u8"\n" end write(io, c) - i = nextind(str, i) + i += 1 continue end # Process \ escape sequences j = i - while j <= lastidx && str[j] == '\\' + while j < endind && txtbuf[j] == u8"\\" j += 1 end nbackslash = j - i - if (j <= lastidx && str[j] == delim) || j > lastidx + if (j < endind && txtbuf[j] == delim) || j >= endind # Backslashes before a delimiter must also be escaped nbackslash = div(nbackslash,2) end for k = 1:nbackslash - write(io, '\\') + write(io, u8"\\") end i = j - if i <= lastidx - write(io, str[i]) - i = nextind(str, i) + if i < endind + write(io, txtbuf[i]) + i += 1 end end end """ Process Julia source code escape sequences for non-raw strings. -`str` should be passed without delimiting quotes. +`txtbuf` should be passed without delimiting quotes. """ -function unescape_julia_string(io::IO, str::AbstractString, +function unescape_julia_string(io::IO, txtbuf::Vector{UInt8}, firstind, endind, diagnostics) had_error = false i = firstind while i < endind - c = str[i] - if c != '\\' - if c == '\r' + c = txtbuf[i] + if c != u8"\\" + if c == u8"\r" # convert literal \r and \r\n in strings to \n (issue #11988) - if i+1 < endind && str[i+1] == '\n' + if i+1 < endind && txtbuf[i+1] == u8"\n" i += 1 end - c = '\n' + c = u8"\n" end write(io, c) - i = nextind(str, i) + i = nextind(txtbuf, i) continue end # Process \ escape sequences. See also Base.unescape_string which some @@ -248,20 +246,20 @@ function unescape_julia_string(io::IO, str::AbstractString, had_error = true break end - c = str[i] - if c == 'x' || c == 'u' || c == 'U' + c = txtbuf[i] + if c == u8"x" || c == u8"u" || c == u8"U" n = k = 0 - m = c == 'x' ? 2 : - c == 'u' ? 4 : 8 + m = c == u8"x" ? 2 : + c == u8"u" ? 4 : 8 while (k += 1) <= m && i+1 < endind - nc = str[i+1] - n = '0' <= nc <= '9' ? n<<4 + (nc-'0') : - 'a' <= nc <= 'f' ? n<<4 + (nc-'a'+10) : - 'A' <= nc <= 'F' ? n<<4 + (nc-'A'+10) : break + nc = txtbuf[i+1] + n = u8"0" <= nc <= u8"9" ? n<<4 + (nc-u8"0") : + u8"a" <= nc <= u8"f" ? n<<4 + (nc-u8"a"+10) : + u8"A" <= nc <= u8"F" ? n<<4 + (nc-u8"A"+10) : break i += 1 end if k == 1 || n > 0x10ffff - u = m == 4 ? 'u' : 'U' + u = m == 4 ? u8"u" : u8"U" msg = (m == 2) ? "invalid hex escape sequence" : "invalid unicode escape sequence" emit_diagnostic(diagnostics, escstart:i, error=msg) @@ -273,12 +271,12 @@ function unescape_julia_string(io::IO, str::AbstractString, print(io, Char(n)) end end - elseif '0' <= c <= '7' + elseif u8"0" <= c <= u8"7" k = 1 - n = c-'0' + n = Int(c - u8"0") while (k += 1) <= 3 && i+1 < endind - c = str[i+1] - n = ('0' <= c <= '7') ? n<<3 + c-'0' : break + c = txtbuf[i+1] + n = (u8"0" <= c <= u8"7") ? n<<3 + c-u8"0" : break i += 1 end if n > 255 @@ -290,20 +288,20 @@ function unescape_julia_string(io::IO, str::AbstractString, end else u = # C escapes - c == 'n' ? '\n' : - c == 't' ? '\t' : - c == 'r' ? '\r' : - c == 'e' ? '\e' : - c == 'b' ? '\b' : - c == 'f' ? '\f' : - c == 'v' ? '\v' : - c == 'a' ? '\a' : + c == u8"n" ? u8"\n" : + c == u8"t" ? u8"\t" : + c == u8"r" ? u8"\r" : + c == u8"e" ? u8"\e" : + c == u8"b" ? u8"\b" : + c == u8"f" ? u8"\f" : + c == u8"v" ? u8"\v" : + c == u8"a" ? u8"\a" : # Literal escapes allowed in Julia source - c == '\\' ? '\\' : - c == '\'' ? '\'' : - c == '"' ? '"' : - c == '$' ? '$' : - c == '`' ? '`' : + c == u8"\\" ? u8"\\" : + c == u8"'" ? u8"'" : + c == u8"\"" ? u8"\"" : + c == u8"$" ? u8"$" : + c == u8"`" ? u8"`" : nothing if isnothing(u) emit_diagnostic(diagnostics, escstart:i, @@ -313,7 +311,10 @@ function unescape_julia_string(io::IO, str::AbstractString, write(io, u) end end - i = nextind(str, i) + # For non-ascii characters we may not be in the middle of the UTF-8 + # encoding for that char, but this doesn't matter because unescaping + # only relies on the ascii subset. + i += 1 end return had_error end @@ -362,43 +363,56 @@ end #------------------------------------------------------------------------------- -function parse_julia_literal(source, head::SyntaxHead, srcrange) +function parse_julia_literal(txtbuf::Vector{UInt8}, head::SyntaxHead, srcrange) # Leaf node k = kind(head) - val_str = view(source, srcrange) # Any errors parsing literals are represented as ErrorVal() - this can # happen when the user sets `ignore_errors=true` during parsing. - val = if k == K"Integer" - parse_int_literal(val_str) - elseif k == K"Float" - v, code = parse_float_literal(Float64, source.code, first(srcrange), + if k == K"Float" + v, code = parse_float_literal(Float64, txtbuf, first(srcrange), last(srcrange)+1) - (code === :ok || code === :underflow) ? v : ErrorVal() + return (code === :ok || code === :underflow) ? v : ErrorVal() elseif k == K"Float32" - v, code = parse_float_literal(Float32, source.code, first(srcrange), + v, code = parse_float_literal(Float32, txtbuf, first(srcrange), last(srcrange)+1) - (code === :ok || code === :underflow) ? v : ErrorVal() - elseif k in KSet"BinInt OctInt HexInt" - parse_uint_literal(val_str, k) - elseif k == K"true" - true - elseif k == K"false" - false + return (code === :ok || code === :underflow) ? v : ErrorVal() elseif k == K"Char" io = IOBuffer() - had_error = unescape_julia_string(io, source.code, first(srcrange), + had_error = unescape_julia_string(io, txtbuf, first(srcrange), last(srcrange)+1, Diagnostic[]) if had_error - ErrorVal() + return ErrorVal() else seek(io, 0) c = read(io, Char) - eof(io) ? c : ErrorVal() + return eof(io) ? c : ErrorVal() + end + elseif k in KSet"String CmdString" + io = IOBuffer() + had_error = false + if has_flags(head, RAW_STRING_FLAG) + unescape_raw_string(io, txtbuf, first(srcrange), last(srcrange)+1, + k == K"CmdString") + else + had_error = unescape_julia_string(io, txtbuf, first(srcrange), + last(srcrange)+1, Diagnostic[]) end + return had_error ? ErrorVal() : String(take!(io)) + elseif k == K"true" + return true + elseif k == K"false" + return false + end + + val_str = String(txtbuf[srcrange]) + if k == K"Integer" + parse_int_literal(val_str) + elseif k in KSet"BinInt OctInt HexInt" + parse_uint_literal(val_str, k) elseif k == K"Identifier" if has_flags(head, RAW_STRING_FLAG) io = IOBuffer() - unescape_raw_string(io, val_str, false) + unescape_raw_string(io, txtbuf, first(srcrange), last(srcrange)+1, false) Symbol(normalize_identifier(String(take!(io)))) else Symbol(normalize_identifier(val_str)) @@ -406,16 +420,6 @@ function parse_julia_literal(source, head::SyntaxHead, srcrange) elseif is_keyword(k) # This should only happen for tokens nested inside errors Symbol(val_str) - elseif k in KSet"String CmdString" - io = IOBuffer() - had_error = false - if has_flags(head, RAW_STRING_FLAG) - unescape_raw_string(io, val_str, k == K"CmdString") - else - had_error = unescape_julia_string(io, source.code, first(srcrange), - last(srcrange)+1, Diagnostic[]) - end - had_error ? ErrorVal() : String(take!(io)) elseif is_operator(k) isempty(srcrange) ? Symbol(untokenize(k)) : # synthetic invisible tokens diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index f37a1f590feb4..ac7f9d7f9a861 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -905,7 +905,7 @@ end # ParseStream Post-processing function validate_tokens(stream::ParseStream) - text = sourcetext(stream) + txtbuf = textbuf(stream) toks = stream.tokens charbuf = IOBuffer() for i = 2:length(toks) @@ -922,13 +922,13 @@ function validate_tokens(stream::ParseStream) elseif k == K"Float" || k == K"Float32" underflow0 = false if k == K"Float" - x, code = parse_float_literal(Float64, text, fbyte, nbyte) + x, code = parse_float_literal(Float64, txtbuf, fbyte, nbyte) # jl_strtod_c can return "underflow" even for valid cases such # as `5e-324` where the source is an exact representation of # `x`. So only warn when underflowing to zero. underflow0 = code === :underflow && x == 0 else - x, code = parse_float_literal(Float32, text, fbyte, nbyte) + x, code = parse_float_literal(Float32, txtbuf, fbyte, nbyte) underflow0 = code === :underflow && x == 0 end if code === :ok @@ -944,7 +944,7 @@ function validate_tokens(stream::ParseStream) elseif k == K"Char" @assert fbyte < nbyte # Already handled in the parser truncate(charbuf, 0) - had_error = unescape_julia_string(charbuf, text, fbyte, + had_error = unescape_julia_string(charbuf, txtbuf, fbyte, nbyte, stream.diagnostics) if had_error error_kind = K"ErrorInvalidEscapeSequence" @@ -958,19 +958,18 @@ function validate_tokens(stream::ParseStream) end end elseif k == K"String" && !has_flags(t, RAW_STRING_FLAG) - had_error = unescape_julia_string(devnull, text, fbyte, + had_error = unescape_julia_string(devnull, txtbuf, fbyte, nbyte, stream.diagnostics) if had_error error_kind = K"ErrorInvalidEscapeSequence" end elseif is_error(k) && k != K"error" # Emit messages for non-generic token errors - # - textrange = fbyte:prevind(text, nbyte) + tokstr = String(txtbuf[tokrange]) msg = if k in KSet"ErrorInvisibleChar ErrorUnknownCharacter" - "$(_token_error_descriptions[k]) $(repr(text[fbyte]))" + "$(_token_error_descriptions[k]) $(repr(tokstr[1]))" elseif k in KSet"ErrorInvalidUTF8 ErrorBidiFormatting" - "$(_token_error_descriptions[k]) $(repr(text[textrange]))" + "$(_token_error_descriptions[k]) $(repr(tokstr))" else _token_error_descriptions[k] end @@ -990,8 +989,7 @@ end # API for extracting results from ParseStream """ - build_tree(make_node::Function, ::Type{StackEntry}, stream::ParseStream; - wrap_toplevel_as_kind=nothing, kws...) + build_tree(make_node::Function, ::Type{StackEntry}, stream::ParseStream; kws...) Construct a tree from a ParseStream using depth-first traversal. `make_node` must have the signature @@ -1002,14 +1000,13 @@ where `children` is either `nothing` for leaf nodes or an iterable of the children of type `StackEntry` for internal nodes. `StackEntry` may be a node type, but also may include other information required during building the tree. -A single node which covers the input is expected, but if the ParseStream has -multiple nodes at the top level, `wrap_toplevel_as_kind` may be used to wrap -them in a single node. +If the ParseStream has multiple nodes at the top level, `K"wrapper"` is used to +wrap them in a single node. The tree here is constructed depth-first in postorder. """ function build_tree(make_node::Function, ::Type{NodeType}, stream::ParseStream; - wrap_toplevel_as_kind=nothing, kws...) where NodeType + kws...) where NodeType stack = Vector{NamedTuple{(:first_token,:node),Tuple{Int,NodeType}}}() tokens = stream.tokens @@ -1068,15 +1065,11 @@ function build_tree(make_node::Function, ::Type{NodeType}, stream::ParseStream; end if length(stack) == 1 return only(stack).node - elseif !isnothing(wrap_toplevel_as_kind) - # Mostly for debugging + else srcrange = (stream.tokens[1].next_byte: stream.tokens[end].next_byte - 1) children = (x.node for x in stack) - return make_node(SyntaxHead(wrap_toplevel_as_kind, EMPTY_FLAGS), - srcrange, children) - else - error("Found multiple nodes at top level") + return make_node(SyntaxHead(K"wrapper", EMPTY_FLAGS), srcrange, children) end end @@ -1092,21 +1085,22 @@ state for further parsing. """ function sourcetext(stream::ParseStream; steal_textbuf=false) root = stream.text_root - # The following works for SubString but makes the return type of this - # method type unstable. + # The following kinda works but makes the return type of this method type + # unstable. (Also codeunit(root) == UInt8 doesn't imply UTF-8 encoding?) # if root isa AbstractString && codeunit(root) == UInt8 # return root - if root isa String - return root + str = if root isa String || root isa SubString + root elseif steal_textbuf - return String(stream.textbuf) + String(stream.textbuf) else # Safe default for other cases is to copy the buffer. Technically this # could possibly be avoided in some situations, but might have side # effects such as mutating stream.text_root or stealing the storage of # stream.textbuf - return String(copy(stream.textbuf)) + String(copy(stream.textbuf)) end + SubString(str, first_byte(stream), thisind(str, last_byte(stream))) end """ diff --git a/JuliaSyntax/src/parser_api.jl b/JuliaSyntax/src/parser_api.jl index ffc6538fd24f2..cc47dacc44c7b 100644 --- a/JuliaSyntax/src/parser_api.jl +++ b/JuliaSyntax/src/parser_api.jl @@ -74,7 +74,6 @@ function _parse(rule::Symbol, need_eof::Bool, ::Type{T}, text, index=1; version= stream = ParseStream(text, index; version=version) if ignore_trivia && rule != :all bump_trivia(stream, skip_newlines=true) - empty!(stream) end parse!(stream; rule=rule) if need_eof @@ -87,12 +86,7 @@ function _parse(rule::Symbol, need_eof::Bool, ::Type{T}, text, index=1; version= (!ignore_warnings && !isempty(stream.diagnostics)) throw(ParseError(stream, filename=filename, first_line=first_line)) end - # TODO: Figure out a more satisfying solution to the wrap_toplevel_as_kind - # mess that we've got here. - # * It's kind of required for GreenNode, as GreenNode only records spans, - # not absolute positions. - # * Dropping it would be ok for SyntaxNode and Expr... - tree = build_tree(T, stream; wrap_toplevel_as_kind=K"toplevel", filename=filename, first_line=first_line, kws...) + tree = build_tree(T, stream; filename=filename, first_line=first_line, kws...) tree, last_byte(stream) + 1 end diff --git a/JuliaSyntax/src/precompile.jl b/JuliaSyntax/src/precompile.jl index 6f8d010c88904..922be3540dc07 100644 --- a/JuliaSyntax/src/precompile.jl +++ b/JuliaSyntax/src/precompile.jl @@ -2,6 +2,7 @@ let filename = joinpath(@__DIR__, "literal_parsing.jl") text = read(filename, String) parseall(Expr, text) + parseall(SyntaxNode, text) if _has_v1_6_hooks enable_in_core!() Meta.parse("1 + 2") diff --git a/JuliaSyntax/src/source_files.jl b/JuliaSyntax/src/source_files.jl index e15a419c5b35c..283f6d1fa1425 100644 --- a/JuliaSyntax/src/source_files.jl +++ b/JuliaSyntax/src/source_files.jl @@ -1,24 +1,30 @@ """ - SourceFile(code [; filename=nothing, first_line=1]) + SourceFile(code [; filename=nothing, first_line=1, first_index=1]) -A UTF-8 source code string with associated file name and line number. +UTF-8 source text with associated file name and line number, storing the +character indices of the start of each line. `first_line` and `first_index` +can be used to specify the line number and index of the first character of +`code` within a larger piece of source text. -`SourceFile` stores the character positions of line starts to facilitate indexing. +`SourceFile` may be indexed via `getindex` or `view` to get a string. Line +information for a byte offset can be looked up via the `source_line`, +`source_location` and `source_line_range` functions. """ struct SourceFile - # We use `code::String` for now but it could be some other UTF-8 based - # string data structure with byte-based indexing. - # - # For example a rope data structure may be good for incremental editing - # https://en.wikipedia.org/wiki/Rope_(data_structure) - code::String + # TODO: Rename SourceFile -> SourceText / SourceChunk / SourceIndex / SourceLineIndex ? + # See https://github.com/JuliaLang/JuliaSyntax.jl/issues/190 + code::SubString + # Offset of `code` within a larger chunk of source text + byte_offset::Int filename::Union{Nothing,String} + # first_column::Int ?? first_line::Int # String index of start of every line line_starts::Vector{Int} end -function SourceFile(code::AbstractString; filename=nothing, first_line=1) +function SourceFile(code::AbstractString; filename=nothing, first_line=1, + first_index=1) line_starts = Int[1] for i in eachindex(code) # The line is considered to start after the `\n` @@ -27,7 +33,7 @@ function SourceFile(code::AbstractString; filename=nothing, first_line=1) if isempty(code) || last(code) != '\n' push!(line_starts, ncodeunits(code)+1) end - SourceFile(code, filename, first_line, line_starts) + SourceFile(code, first_index-1, filename, first_line, line_starts) end function SourceFile(; filename, kwargs...) @@ -36,7 +42,7 @@ end # Get line number of the given byte within the code function _source_line_index(source::SourceFile, byte_index) - lineidx = searchsortedlast(source.line_starts, byte_index) + lineidx = searchsortedlast(source.line_starts, byte_index - source.byte_offset) return (lineidx < lastindex(source.line_starts)) ? lineidx : lineidx-1 end _source_line(source::SourceFile, lineidx) = lineidx + source.first_line - 1 @@ -44,7 +50,8 @@ _source_line(source::SourceFile, lineidx) = lineidx + source.first_line - 1 """ Get the line number at the given byte index. """ -source_line(source::SourceFile, byte_index) = _source_line(source, _source_line_index(source, byte_index)) +source_line(source::SourceFile, byte_index) = + _source_line(source, _source_line_index(source, byte_index)) """ Get line number and character within the line at the given byte index. @@ -53,7 +60,7 @@ function source_location(source::SourceFile, byte_index) lineidx = _source_line_index(source, byte_index) i = source.line_starts[lineidx] column = 1 - while i < byte_index + while i < byte_index - source.byte_offset i = nextind(source.code, i) column += 1 end @@ -92,32 +99,32 @@ function Base.show(io::IO, ::MIME"text/plain", source::SourceFile) end function Base.getindex(source::SourceFile, rng::AbstractUnitRange) - i = first(rng) + i = first(rng) - source.byte_offset # Convert byte range into unicode String character range. # Assumes valid unicode! (SubString doesn't give us a reliable way to opt # out of the valid unicode check. The SubString{String} inner constructor # has some @boundscheck, but using @inbounds depends on inlining choices.) - j = prevind(source.code, last(rng)+1) + j = prevind(source.code, last(rng) + 1 - source.byte_offset) source.code[i:j] end # TODO: Change view() here to `sourcetext` ? function Base.view(source::SourceFile, rng::AbstractUnitRange) - i = first(rng) - j = prevind(source.code, last(rng)+1) + i = first(rng) - source.byte_offset + j = prevind(source.code, last(rng) + 1 - source.byte_offset) SubString(source.code, i, j) end function Base.getindex(source::SourceFile, i::Int) - source.code[i] + source.code[i - source.byte_offset] end function Base.thisind(source::SourceFile, i::Int) - thisind(source.code, i) + thisind(source.code, i - source.byte_offset) end -Base.firstindex(source::SourceFile) = firstindex(source.code) -Base.lastindex(source::SourceFile) = lastindex(source.code) +Base.firstindex(source::SourceFile) = firstindex(source.code) + source.byte_offset +Base.lastindex(source::SourceFile) = lastindex(source.code) + source.byte_offset """ sourcetext(source::SourceFile) diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index 62fe8eb382782..9614a2794638f 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -61,15 +61,18 @@ Base.show(io::IO, ::ErrorVal) = printstyled(io, "✘", color=:light_red) function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}; keep_parens=false, position::Integer=1) - _to_SyntaxNode(source, raw, convert(Int, position), keep_parens) + offset, txtbuf = _unsafe_wrap_substring(sourcetext(source)) + _to_SyntaxNode(source, txtbuf, offset, raw, convert(Int, position), keep_parens) end -function _to_SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, +function _to_SyntaxNode(source::SourceFile, txtbuf::Vector{UInt8}, offset::Int, + raw::GreenNode{SyntaxHead}, position::Int, keep_parens::Bool) if !haschildren(raw) && !(is_syntax_kind(raw) || is_keyword(raw)) # Here we parse the values eagerly rather than representing them as # strings. Maybe this is good. Maybe not. - val = parse_julia_literal(source, head(raw), position:position + span(raw) - 1) + valrange = position:position + span(raw) - 1 + val = parse_julia_literal(txtbuf, head(raw), valrange .+ offset) return SyntaxNode(nothing, nothing, SyntaxData(source, raw, position, val)) else cs = SyntaxNode[] @@ -77,13 +80,16 @@ function _to_SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}, for (i,rawchild) in enumerate(children(raw)) # FIXME: Allowing trivia is_error nodes here corrupts the tree layout. if !is_trivia(rawchild) || is_error(rawchild) - push!(cs, _to_SyntaxNode(source, rawchild, pos, keep_parens)) + push!(cs, _to_SyntaxNode(source, txtbuf, offset, rawchild, pos, keep_parens)) end pos += Int(rawchild.span) end if !keep_parens && kind(raw) == K"parens" && length(cs) == 1 return cs[1] end + if kind(raw) == K"wrapper" && length(cs) == 1 + return cs[1] + end node = SyntaxNode(nothing, cs, SyntaxData(source, raw, position, nothing)) for c in cs c.parent = node @@ -130,7 +136,8 @@ function interpolate_literal(node::SyntaxNode, val) SyntaxNode(node.source, node.raw, node.position, node.parent, true, val) end -function _show_syntax_node(io, current_filename, node::AbstractSyntaxNode, indent, show_byte_offsets) +function _show_syntax_node(io, current_filename, node::AbstractSyntaxNode, + indent, show_byte_offsets) fname = node.source.filename line, col = source_location(node.source, node.position) posstr = "$(lpad(line, 4)):$(rpad(col,3))│" @@ -216,7 +223,7 @@ function build_tree(::Type{SyntaxNode}, stream::ParseStream; filename=nothing, first_line=1, keep_parens=false, kws...) green_tree = build_tree(GreenNode, stream; kws...) source = SourceFile(sourcetext(stream), filename=filename, first_line=first_line) - SyntaxNode(source, green_tree, position=first_byte(stream), keep_parens=keep_parens) + SyntaxNode(source, green_tree, position=1, keep_parens=keep_parens) end #------------------------------------------------------------------------------- diff --git a/JuliaSyntax/src/utils.jl b/JuliaSyntax/src/utils.jl index f30f06f99498e..cee9689e27984 100644 --- a/JuliaSyntax/src/utils.jl +++ b/JuliaSyntax/src/utils.jl @@ -3,7 +3,7 @@ if VERSION < v"1.1" isnothing(x) = x === nothing end if VERSION < v"1.4" - function only(x::AbstractVector) + function only(x::Union{AbstractVector,AbstractString}) if length(x) != 1 error("Collection must contain exactly 1 element") end @@ -14,6 +14,8 @@ if VERSION < v"1.5" import Base.peek end +_unsafe_wrap_substring(s) = (s.offset, unsafe_wrap(Vector{UInt8}, s.string)) + #-------------------------------------------------- # # Internal error, used as assertion failure for cases we expect can't happen. @@ -48,6 +50,12 @@ function remove_linenums!(ex::Expr) return ex end +# String macro to get the UInt8 code of an ascii character +macro u8_str(str) + c = str == "\\" ? '\\' : only(unescape_string(str)) + isascii(c) || error("Non-ascii character in u8_str") + codepoint(c) % UInt8 +end #------------------------------------------------------------------------------- # Text printing/display utils diff --git a/JuliaSyntax/test/literal_parsing.jl b/JuliaSyntax/test/literal_parsing.jl index 7c4d1012090a9..42fcbc44ef97a 100644 --- a/JuliaSyntax/test/literal_parsing.jl +++ b/JuliaSyntax/test/literal_parsing.jl @@ -153,7 +153,7 @@ end function unesc(str, firstind=firstindex(str), endind=lastindex(str)+1; diagnostics=false) io = IOBuffer() ds = JuliaSyntax.Diagnostic[] - unescape_julia_string(io, str, firstind, endind, ds) + unescape_julia_string(io, Vector{UInt8}(str), firstind, endind, ds) if diagnostics ds else @@ -204,7 +204,8 @@ end function unesc_raw(str, is_cmd) io = IOBuffer() - JuliaSyntax.unescape_raw_string(io, str, is_cmd) + JuliaSyntax.unescape_raw_string(io, Vector{UInt8}(str), + firstindex(str), lastindex(str)+1, is_cmd) return String(take!(io)) end diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index ccad1d9aff396..161323fcfcadc 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -1,22 +1,14 @@ """ Parse string to SyntaxNode tree and show as an sexpression """ -function parse_to_sexpr_str(production, code::AbstractString; v=v"1.6", expr=false) +function parse_to_sexpr_str(production, code::AbstractString; v=v"1.6") stream = ParseStream(code, version=v) production(ParseState(stream)) JuliaSyntax.validate_tokens(stream) - t = build_tree(GreenNode, stream, wrap_toplevel_as_kind=K"None") + t = build_tree(GreenNode, stream) source = SourceFile(code) s = SyntaxNode(source, t, keep_parens=true) - if expr - JuliaSyntax.remove_linenums!(Expr(s)) - else - if kind(s) == K"None" - join([sprint(show, MIME("text/x.sexpression"), c) for c in children(s)], ' ') - else - sprint(show, MIME("text/x.sexpression"), s) - end - end + return sprint(show, MIME("text/x.sexpression"), s) end function test_parse(production, input, output) @@ -55,7 +47,7 @@ tests = [ "a;;;b;;" => "(toplevel-; a b)" """ "x" a ; "y" b """ => """(toplevel-; (doc (string "x") a) (doc (string "y") b))""" - "x y" => "x (error-t y)" + "x y" => "(wrapper x (error-t y))" ], JuliaSyntax.parse_eq => [ # parse_assignment @@ -411,7 +403,7 @@ tests = [ "A.@x a" => "(macrocall (. A (quote @x)) a)" "@A.B.@x a" => "(macrocall (. (. A (quote B)) (quote (error-t) @x)) a)" # .' discontinued - "f.'" => "f (error-t ')" + "f.'" => "(wrapper f (error-t '))" # Field/property syntax "f.x.y" => "(. (. f (quote x)) (quote y))" "x .y" => "(. x (error-t) (quote y))" @@ -812,6 +804,7 @@ tests = [ "`cmd`" => "(macrocall core_@cmd (cmdstring-r \"cmd\"))" "```cmd```" => "(macrocall core_@cmd (cmdstring-s-r \"cmd\"))" # literals + "true" => "true" "42" => "42" "1.0e-1000" => "0.0" "0x123456789abcdefp+0" => "8.19855292164869e16" @@ -975,7 +968,7 @@ parsestmt_test_specs = [ # The following are currently broken but at least the parser shouldn't # crash. - "x in' '" => "(call-i x in (char (error))) (error-t ')" + "x in' '" => "(wrapper (call-i x in (char (error))) (error-t '))" ] @testset "Parser does not crash on broken code" begin diff --git a/JuliaSyntax/test/parser_api.jl b/JuliaSyntax/test/parser_api.jl index 1a78000e2f66f..4ceb8b584592c 100644 --- a/JuliaSyntax/test/parser_api.jl +++ b/JuliaSyntax/test/parser_api.jl @@ -85,6 +85,16 @@ @test parsestmt(Expr, SubString("α+x\ny"), 1) == (:(α+x), 5) @test parseatom(Expr, SubString("x+y"), 1) == (:x, 2) @test parseatom(Expr, SubString("x+y"), 3) == (:y, 4) + + @test parseatom(Expr, SubString("x+1.0"), 3) == (1.0, 6) + @test parseatom(Expr, SubString("x+\"\n\""), 3) == ("\n", 6) + + # Line numbers are relative to the start of the string we're currently + # parsing + @test JuliaSyntax.parsestmt(Expr, "begin\na\nend\nbegin\nb\nend", 1) == + (Expr(:block, LineNumberNode(2), :a), 12) + @test JuliaSyntax.parsestmt(Expr, "begin\na\nend\nbegin\nb\nend", 12) == + (Expr(:block, LineNumberNode(3), :b), 24) end @testset "error/warning handling" begin diff --git a/JuliaSyntax/test/source_files.jl b/JuliaSyntax/test/source_files.jl index 5c113a7353196..c6329cfa106b8 100644 --- a/JuliaSyntax/test/source_files.jl +++ b/JuliaSyntax/test/source_files.jl @@ -26,19 +26,37 @@ @test source_location(SourceFile(; filename=path), 1) == (1,1) @test source_location(SourceFile(; filename=path, first_line=7), 1) == (7,1) end + + # byte offset + @test source_location(SourceFile("a\nbb\nccc\ndddd", first_index=10), 13) == (2,2) + @test source_line(SourceFile("a\nbb\nccc\ndddd", first_index=10), 15) == 3 + + # source_line convenience function + @test source_line(SourceFile("a\nb\n"), 2) == 1 + @test source_line(SourceFile("a\nb\n"), 3) == 2 end @testset "SourceFile position indexing" begin @test SourceFile("a\nb\n")[1:2] == "a\n" @test SourceFile("a\nb\n")[3:end] == "b\n" - if Base.VERSION >= v"1.4" - # Protect the `[begin` from being viewed by the parser on older Julia versions - @test eval(Meta.parse("""SourceFile("a\nb\n")[begin:end]""")) == "a\nb\n" - end # unicode @test SourceFile("αβ")[1:2] == "α" @test SourceFile("αβ")[3] == 'β' + + # offsets + sf = SourceFile("abcd", first_index=10) + @test firstindex(sf) == 10 + @test lastindex(sf) == 13 + @test sf[10] == 'a' + @test sf[10:11] == "ab" + @test view(sf, 10:11) == "ab" + + if Base.VERSION >= v"1.4" + # Protect the `[begin` from being viewed by the parser on older Julia versions + @test eval(Meta.parse("SourceFile(\"a\nb\n\")[begin:end]")) == "a\nb\n" + @test eval(Meta.parse("SourceFile(\"abcd\", first_index=10)[begin+1:end-1]")) == "bc" + end end @testset "SourceFile printing and text extraction" begin diff --git a/JuliaSyntax/test/syntax_tree.jl b/JuliaSyntax/test/syntax_tree.jl index c0cbf54763b43..32efac76222af 100644 --- a/JuliaSyntax/test/syntax_tree.jl +++ b/JuliaSyntax/test/syntax_tree.jl @@ -73,4 +73,11 @@ end 1:5 │ 5:5 │ b 2:3 │ 10:10 │ c """ + + t,_ = parsestmt(SyntaxNode, "begin a end\nbegin b end", 13) + @test sprint(show, MIME("text/plain"), t) == """ + line:col│ tree │ file_name + 1:1 │[block] + 1:7 │ b + """ end diff --git a/JuliaSyntax/test/test_utils.jl b/JuliaSyntax/test/test_utils.jl index e3228eaa79bb6..540086fffa337 100644 --- a/JuliaSyntax/test/test_utils.jl +++ b/JuliaSyntax/test/test_utils.jl @@ -8,6 +8,7 @@ using .JuliaSyntax: Diagnostic, SourceFile, source_location, + source_line, parse!, parsestmt, parseall, From 5653d836ad399d74d0a7b85a88f6b2363a80e9bc Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sat, 17 Jun 2023 14:55:15 +1000 Subject: [PATCH 0692/1109] Add mailmap file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 🏳️‍⚧️ --- JuliaSyntax/.mailmap | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 JuliaSyntax/.mailmap diff --git a/JuliaSyntax/.mailmap b/JuliaSyntax/.mailmap new file mode 100644 index 0000000000000..a16a55c644b31 --- /dev/null +++ b/JuliaSyntax/.mailmap @@ -0,0 +1,2 @@ +Claire Foster +Claire Foster From 1fc2a5bf66ce0fe3665ef2be5096e2e82e20363d Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sat, 17 Jun 2023 15:27:11 +1000 Subject: [PATCH 0693/1109] Bump version to 0.4.4 --- JuliaSyntax/Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaSyntax/Project.toml b/JuliaSyntax/Project.toml index e199f0a859bfd..a786e8f4b065b 100644 --- a/JuliaSyntax/Project.toml +++ b/JuliaSyntax/Project.toml @@ -1,7 +1,7 @@ name = "JuliaSyntax" uuid = "70703baa-626e-46a2-a12c-08ffd08c73b4" authors = ["Claire Foster and contributors"] -version = "0.4.3" +version = "0.4.4" [compat] julia = "1.0" From 383286a31e26168608b2ae9badb570adf9568e76 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Mon, 19 Jun 2023 09:11:46 +1000 Subject: [PATCH 0694/1109] Cleanup: rename textbuf -> unsafe_textbuf --- JuliaSyntax/src/diagnostics.jl | 5 ----- JuliaSyntax/src/expr.jl | 2 +- JuliaSyntax/src/parse_stream.jl | 14 +++++++++++--- JuliaSyntax/src/parser.jl | 24 ++++++++++++------------ 4 files changed, 24 insertions(+), 21 deletions(-) diff --git a/JuliaSyntax/src/diagnostics.jl b/JuliaSyntax/src/diagnostics.jl index 717f38921d446..c84fa0ac91c49 100644 --- a/JuliaSyntax/src/diagnostics.jl +++ b/JuliaSyntax/src/diagnostics.jl @@ -91,11 +91,6 @@ function show_diagnostics(io::IO, diagnostics::AbstractVector{Diagnostic}, text: show_diagnostics(io, diagnostics, SourceFile(text)) end -function emit_diagnostic(diagnostics::AbstractVector{Diagnostic}, - byterange::AbstractUnitRange; kws...) - push!(diagnostics, Diagnostic(first(byterange), last(byterange); kws...)) -end - function any_error(diagnostics::AbstractVector{Diagnostic}) any(is_error(d) for d in diagnostics) end diff --git a/JuliaSyntax/src/expr.jl b/JuliaSyntax/src/expr.jl index 845939ace0a03..0aa73a9486a1c 100644 --- a/JuliaSyntax/src/expr.jl +++ b/JuliaSyntax/src/expr.jl @@ -459,7 +459,7 @@ function build_tree(::Type{Expr}, stream::ParseStream; filename=nothing, first_line=1, kws...) source = SourceFile(sourcetext(stream), first_index=first_byte(stream), filename=filename, first_line=first_line) - txtbuf = textbuf(stream) + txtbuf = unsafe_textbuf(stream) args = Any[] childranges = UnitRange{Int}[] childheads = SyntaxHead[] diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index ac7f9d7f9a861..825fdb7973e64 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -901,11 +901,16 @@ function emit_diagnostic(stream::ParseStream, mark::ParseStreamPosition, emit_diagnostic(stream, fbyte:lbyte; kws...) end +function emit_diagnostic(diagnostics::AbstractVector{Diagnostic}, + byterange::AbstractUnitRange; kws...) + push!(diagnostics, Diagnostic(first(byterange), last(byterange); kws...)) +end + #------------------------------------------------------------------------------- # ParseStream Post-processing function validate_tokens(stream::ParseStream) - txtbuf = textbuf(stream) + txtbuf = unsafe_textbuf(stream) toks = stream.tokens charbuf = IOBuffer() for i = 2:length(toks) @@ -1104,11 +1109,14 @@ function sourcetext(stream::ParseStream; steal_textbuf=false) end """ - textbuf(stream) + unsafe_textbuf(stream) Return the `Vector{UInt8}` text buffer being parsed by this `ParseStream`. + +!!! warning + The caller must hold a reference to `stream` while using textbuf """ -textbuf(stream) = stream.textbuf +unsafe_textbuf(stream) = stream.textbuf first_byte(stream::ParseStream) = first(stream.tokens).next_byte # Use sentinel token last_byte(stream::ParseStream) = _next_byte(stream)-1 diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index df17899bf9f0e..a59839ca7927c 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -125,8 +125,8 @@ function emit_diagnostic(ps::ParseState, args...; kws...) emit_diagnostic(ps.stream, args...; kws...) end -function textbuf(ps::ParseState) - textbuf(ps.stream) +function unsafe_textbuf(ps::ParseState) + unsafe_textbuf(ps.stream) end function first_child_position(ps::ParseState, pos::ParseStreamPosition) @@ -3143,7 +3143,7 @@ function parse_brackets(after_parse::Function, return opts end -is_indentation(b::UInt8) = (b == UInt8(' ') || b == UInt8('\t')) +_is_indentation(b::UInt8) = (b == u8" " || b == u8"\t") # Parse a string, embedded interpolations and deindent triple quoted strings # by marking indentation characters as whitespace trivia. @@ -3157,7 +3157,7 @@ function parse_string(ps::ParseState, raw::Bool) indent_ref_i = 0 indent_ref_len = typemax(Int) indent_chunks = acquire_positions(ps.stream) - buf = textbuf(ps) + txtbuf = unsafe_textbuf(ps) chunk_flags = raw ? RAW_STRING_FLAG : EMPTY_FLAGS bump(ps, TRIVIA_FLAG) first_chunk = true @@ -3212,10 +3212,10 @@ function parse_string(ps::ParseState, raw::Bool) if triplestr && first_chunk && span(t) <= 2 && begin s = span(t) - b = buf[last_byte(t)] + b = txtbuf[last_byte(t)] # Test whether the string is a single logical newline - (s == 1 && (b == UInt8('\n') || b == UInt8('\r'))) || - (s == 2 && (buf[first_byte(t)] == UInt8('\r') && b == UInt8('\n'))) + (s == 1 && (b == u8"\n" || b == u8"\r")) || + (s == 2 && (txtbuf[first_byte(t)] == u8"\r" && b == u8"\n")) end # First line of triple string is a newline only: mark as trivia. # """\nx""" ==> (string-s "x") @@ -3253,8 +3253,8 @@ function parse_string(ps::ParseState, raw::Bool) # """\n $a \n $b""" ==> (string-s a " \n" b) # """\n $a\n $b\n""" ==> (string-s " " a "\n" " " b "\n") # - if prev_chunk_newline && (b = buf[first_byte(t)]; - b != UInt8('\n') && b != UInt8('\r')) + if prev_chunk_newline && (b = txtbuf[first_byte(t)]; + b != u8"\n" && b != u8"\r") # Compute length of longest common prefix of mixed # spaces and tabs, in bytes # @@ -3267,7 +3267,7 @@ function parse_string(ps::ParseState, raw::Bool) # No indentation found yet. Find indentation we'll # use as a reference i = first_byte(t) - 1 - while i < last_byte(t) && is_indentation(buf[i+1]) + while i < last_byte(t) && _is_indentation(txtbuf[i+1]) i += 1 end indent_ref_i = first_byte(t) @@ -3277,7 +3277,7 @@ function parse_string(ps::ParseState, raw::Bool) # shortening length if necessary. j = 0 while j < span(t) && j < indent_ref_len - if buf[j + first_byte(t)] != buf[j + indent_ref_i] + if txtbuf[j + first_byte(t)] != txtbuf[j + indent_ref_i] break end j += 1 @@ -3287,7 +3287,7 @@ function parse_string(ps::ParseState, raw::Bool) # Prepare a place for indentiation trivia, if necessary push!(indent_chunks, bump_invisible(ps, K"TOMBSTONE")) end - b = buf[last_byte(t)] + b = txtbuf[last_byte(t)] prev_chunk_newline = b == UInt8('\n') || b == UInt8('\r') end bump(ps, chunk_flags) From eb49c18c4b11c705310dd905be6806ecbc3e4b3b Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Tue, 20 Jun 2023 12:12:32 +1000 Subject: [PATCH 0695/1109] Fixes for `SourceFile` byte offsets This fixes a crash formatting error messages when core_parse_hook is used to parse a piece of broken code with a nontrivial byte offset. * `SourceFile` held by `SyntaxNode` preserves the indexing of the original string passed to the `parse*()` functions. * Fix `source_line_range` and `thisind` accordingly --- JuliaSyntax/src/expr.jl | 3 +-- JuliaSyntax/src/parse_stream.jl | 4 +++ JuliaSyntax/src/parser_api.jl | 2 +- JuliaSyntax/src/source_files.jl | 5 ++-- JuliaSyntax/src/syntax_tree.jl | 11 ++++++--- JuliaSyntax/test/hooks.jl | 42 +++++++++++++++++++++----------- JuliaSyntax/test/source_files.jl | 15 ++++++++++-- JuliaSyntax/test/syntax_tree.jl | 6 +++++ JuliaSyntax/test/test_utils.jl | 1 + 9 files changed, 64 insertions(+), 25 deletions(-) diff --git a/JuliaSyntax/src/expr.jl b/JuliaSyntax/src/expr.jl index 0aa73a9486a1c..bade833ed3823 100644 --- a/JuliaSyntax/src/expr.jl +++ b/JuliaSyntax/src/expr.jl @@ -457,8 +457,7 @@ end function build_tree(::Type{Expr}, stream::ParseStream; filename=nothing, first_line=1, kws...) - source = SourceFile(sourcetext(stream), first_index=first_byte(stream), - filename=filename, first_line=first_line) + source = SourceFile(stream, filename=filename, first_line=first_line) txtbuf = unsafe_textbuf(stream) args = Any[] childranges = UnitRange{Int}[] diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index 825fdb7973e64..2e99adaa9ff51 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -1108,6 +1108,10 @@ function sourcetext(stream::ParseStream; steal_textbuf=false) SubString(str, first_byte(stream), thisind(str, last_byte(stream))) end +function SourceFile(stream::ParseStream; kws...) + return SourceFile(sourcetext(stream); first_index=first_byte(stream), kws...) +end + """ unsafe_textbuf(stream) diff --git a/JuliaSyntax/src/parser_api.jl b/JuliaSyntax/src/parser_api.jl index cc47dacc44c7b..6eca864d0fc36 100644 --- a/JuliaSyntax/src/parser_api.jl +++ b/JuliaSyntax/src/parser_api.jl @@ -11,7 +11,7 @@ struct ParseError <: Exception end function ParseError(stream::ParseStream; incomplete_tag=:none, kws...) - source = SourceFile(sourcetext(stream); kws...) + source = SourceFile(stream; kws...) ParseError(source, stream.diagnostics, incomplete_tag) end diff --git a/JuliaSyntax/src/source_files.jl b/JuliaSyntax/src/source_files.jl index 283f6d1fa1425..2ba33e1de6de2 100644 --- a/JuliaSyntax/src/source_files.jl +++ b/JuliaSyntax/src/source_files.jl @@ -76,7 +76,8 @@ function source_line_range(source::SourceFile, byte_index; lineidx = _source_line_index(source, byte_index) fbyte = source.line_starts[max(lineidx-context_lines_before, 1)] lbyte = source.line_starts[min(lineidx+1+context_lines_after, end)] - 1 - fbyte,lbyte + return (fbyte + source.byte_offset, + lbyte + source.byte_offset) end function source_location(::Type{LineNumberNode}, source::SourceFile, byte_index) @@ -120,7 +121,7 @@ function Base.getindex(source::SourceFile, i::Int) end function Base.thisind(source::SourceFile, i::Int) - thisind(source.code, i - source.byte_offset) + thisind(source.code, i - source.byte_offset) + source.byte_offset end Base.firstindex(source::SourceFile) = firstindex(source.code) + source.byte_offset diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index 9614a2794638f..a2df524d59ba0 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -61,8 +61,11 @@ Base.show(io::IO, ::ErrorVal) = printstyled(io, "✘", color=:light_red) function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}; keep_parens=false, position::Integer=1) - offset, txtbuf = _unsafe_wrap_substring(sourcetext(source)) - _to_SyntaxNode(source, txtbuf, offset, raw, convert(Int, position), keep_parens) + GC.@preserve source begin + raw_offset, txtbuf = _unsafe_wrap_substring(source.code) + offset = raw_offset - source.byte_offset + _to_SyntaxNode(source, txtbuf, offset, raw, convert(Int, position), keep_parens) + end end function _to_SyntaxNode(source::SourceFile, txtbuf::Vector{UInt8}, offset::Int, @@ -222,8 +225,8 @@ Base.copy(data::SyntaxData) = SyntaxData(data.source, data.raw, data.position, d function build_tree(::Type{SyntaxNode}, stream::ParseStream; filename=nothing, first_line=1, keep_parens=false, kws...) green_tree = build_tree(GreenNode, stream; kws...) - source = SourceFile(sourcetext(stream), filename=filename, first_line=first_line) - SyntaxNode(source, green_tree, position=1, keep_parens=keep_parens) + source = SourceFile(stream, filename=filename, first_line=first_line) + SyntaxNode(source, green_tree, position=first_byte(stream), keep_parens=keep_parens) end #------------------------------------------------------------------------------- diff --git a/JuliaSyntax/test/hooks.jl b/JuliaSyntax/test/hooks.jl index 8665492da2dd1..3ed34209151a6 100644 --- a/JuliaSyntax/test/hooks.jl +++ b/JuliaSyntax/test/hooks.jl @@ -1,3 +1,15 @@ +function _unwrap_parse_error(core_hook_result) + @test Meta.isexpr(core_hook_result[1], :error, 1) + err = core_hook_result[1].args[1] + if JuliaSyntax._has_v1_10_hooks + @test err isa Meta.ParseError + return err.detail + else + @test err isa JuliaSyntax.ParseError + return err + end +end + @testset "Hooks for Core integration" begin @testset "whitespace parsing" begin @test JuliaSyntax.core_parser_hook("", "somefile", 1, 0, :statement) == Core.svec(nothing, 0) @@ -19,26 +31,28 @@ @test ex.args[2] == LineNumberNode(2, "otherfile") # Errors also propagate file & lineno - err = JuliaSyntax.core_parser_hook("[x)", "f1", 1, 0, :statement)[1].args[1] - if JuliaSyntax._has_v1_10_hooks - @test err isa Meta.ParseError - err = err.detail - else - @test err isa JuliaSyntax.ParseError - end + err = _unwrap_parse_error( + JuliaSyntax.core_parser_hook("[x)", "f1", 1, 0, :statement) + ) @test err isa JuliaSyntax.ParseError @test err.source.filename == "f1" @test err.source.first_line == 1 - err = JuliaSyntax.core_parser_hook("[x)", "f2", 2, 0, :statement)[1].args[1] - if JuliaSyntax._has_v1_10_hooks - @test err isa Meta.ParseError - err = err.detail - else - @test err isa JuliaSyntax.ParseError - end + err = _unwrap_parse_error( + JuliaSyntax.core_parser_hook("[x)", "f2", 2, 0, :statement) + ) @test err isa JuliaSyntax.ParseError @test err.source.filename == "f2" @test err.source.first_line == 2 + + # Errors including nontrivial offset indices + err = _unwrap_parse_error( + JuliaSyntax.core_parser_hook("a\nh{x)\nb", "test.jl", 1, 2, :statement) + ) + @test err isa JuliaSyntax.ParseError + @test err.source.first_line == 1 + @test err.diagnostics[1].first_byte == 6 + @test err.diagnostics[1].last_byte == 5 + @test err.diagnostics[1].message == "Expected `}`" end @testset "toplevel errors" begin diff --git a/JuliaSyntax/test/source_files.jl b/JuliaSyntax/test/source_files.jl index c6329cfa106b8..374a858ba14f9 100644 --- a/JuliaSyntax/test/source_files.jl +++ b/JuliaSyntax/test/source_files.jl @@ -28,8 +28,14 @@ end # byte offset - @test source_location(SourceFile("a\nbb\nccc\ndddd", first_index=10), 13) == (2,2) - @test source_line(SourceFile("a\nbb\nccc\ndddd", first_index=10), 15) == 3 + sf = SourceFile("a\nbb\nccc\ndddd", first_index=10) + @test source_location(sf, 13) == (2,2) + @test source_line(sf, 15) == 3 + @test source_line_range(sf, 10) == (10,11) + @test source_line_range(sf, 11) == (10,11) + @test source_line_range(sf, 12) == (12,14) + @test source_line_range(sf, 14) == (12,14) + @test source_line_range(sf, 15) == (15,18) # source_line convenience function @test source_line(SourceFile("a\nb\n"), 2) == 1 @@ -52,6 +58,11 @@ end @test sf[10:11] == "ab" @test view(sf, 10:11) == "ab" + @test thisind(SourceFile("xαx", first_index=10), 10) == 10 + @test thisind(SourceFile("xαx", first_index=10), 11) == 11 + @test thisind(SourceFile("xαx", first_index=10), 12) == 11 + @test thisind(SourceFile("xαx", first_index=10), 13) == 13 + if Base.VERSION >= v"1.4" # Protect the `[begin` from being viewed by the parser on older Julia versions @test eval(Meta.parse("SourceFile(\"a\nb\n\")[begin:end]")) == "a\nb\n" diff --git a/JuliaSyntax/test/syntax_tree.jl b/JuliaSyntax/test/syntax_tree.jl index 32efac76222af..78ebb1843151b 100644 --- a/JuliaSyntax/test/syntax_tree.jl +++ b/JuliaSyntax/test/syntax_tree.jl @@ -49,6 +49,12 @@ @test length(children(node)) == 2 node[2] = parsestmt(SyntaxNode, "y") @test sourcetext(child(node, 2)) == "y" + + # SyntaxNode with offsets + t,_ = parsestmt(SyntaxNode, "begin a end\nbegin b end", 13) + @test t.position == 13 + @test child(t,1).position == 19 + @test child(t,1).val == :b end @testset "SyntaxNode pretty printing" begin diff --git a/JuliaSyntax/test/test_utils.jl b/JuliaSyntax/test/test_utils.jl index 540086fffa337..c8d87ac5e1e1e 100644 --- a/JuliaSyntax/test/test_utils.jl +++ b/JuliaSyntax/test/test_utils.jl @@ -9,6 +9,7 @@ using .JuliaSyntax: SourceFile, source_location, source_line, + source_line_range, parse!, parsestmt, parseall, From 8db497750703ede145d21869e456bde61166ace7 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Tue, 20 Jun 2023 21:07:20 +1000 Subject: [PATCH 0696/1109] Bump version to 0.4.5 --- JuliaSyntax/Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaSyntax/Project.toml b/JuliaSyntax/Project.toml index a786e8f4b065b..03e9b770a0cbd 100644 --- a/JuliaSyntax/Project.toml +++ b/JuliaSyntax/Project.toml @@ -1,7 +1,7 @@ name = "JuliaSyntax" uuid = "70703baa-626e-46a2-a12c-08ffd08c73b4" authors = ["Claire Foster and contributors"] -version = "0.4.4" +version = "0.4.5" [compat] julia = "1.0" From c11a90603fbda30e709d0ec2951df5d75be604af Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Mon, 26 Jun 2023 13:08:49 +1000 Subject: [PATCH 0697/1109] Fix diagnostics covering trailing text (JuliaLang/JuliaSyntax.jl#317) Diagnostics may sometimes cover trailing text which wasn't consumed. For example, `parseatom(")")` doesn't consume the errant trailing ')', but the diagnostic refers to this character. In this case, `SourceFile` needs to cover the location of the diagnostic in addition to any text which is consumed. As part of this, mark `sourcetext(::ParseStream)` as deprecated, in favour of constructing a `SourceFile` to wrap things up more neatly. Also fix a bug in `highlight()` for empty `SourceFile`s. --- JuliaSyntax/src/parse_stream.jl | 29 +++++++++++++++++------------ JuliaSyntax/src/parser.jl | 3 ++- JuliaSyntax/src/source_files.jl | 4 +++- JuliaSyntax/test/diagnostics.jl | 15 ++++++++++++--- JuliaSyntax/test/source_files.jl | 3 +++ 5 files changed, 37 insertions(+), 17 deletions(-) diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index 2e99adaa9ff51..fcd35e3189360 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -331,7 +331,7 @@ function Base.show(io::IO, mime::MIME"text/plain", stream::ParseStream) end function show_diagnostics(io::IO, stream::ParseStream) - show_diagnostics(io, stream.diagnostics, sourcetext(stream)) + show_diagnostics(io, stream.diagnostics, SourceFile(stream)) end # We manage a pool of stream positions as parser working space @@ -1078,17 +1078,8 @@ function build_tree(make_node::Function, ::Type{NodeType}, stream::ParseStream; end end -""" - sourcetext(stream::ParseStream; steal_textbuf=true) - -Return the source text being parsed by this `ParseStream` as a UTF-8 encoded -string. - -If `steal_textbuf==true`, this is permitted to steal the content of the -stream's text buffer. Note that this leaves the `ParseStream` in an invalid -state for further parsing. -""" function sourcetext(stream::ParseStream; steal_textbuf=false) + Base.depwarn("Use of `sourcetext(::ParseStream)` is deprecated. Use `SourceFile(stream)` instead", :sourcetext) root = stream.text_root # The following kinda works but makes the return type of this method type # unstable. (Also codeunit(root) == UInt8 doesn't imply UTF-8 encoding?) @@ -1109,7 +1100,21 @@ function sourcetext(stream::ParseStream; steal_textbuf=false) end function SourceFile(stream::ParseStream; kws...) - return SourceFile(sourcetext(stream); first_index=first_byte(stream), kws...) + fbyte = first_byte(stream) + lbyte = last_byte(stream) + if !isempty(stream.diagnostics) + lbyte = max(lbyte, maximum(last_byte(d) for d in stream.diagnostics)) + end + # See also sourcetext() + srcroot = stream.text_root + str = if srcroot isa String + SubString(srcroot, fbyte, thisind(srcroot, lbyte)) + elseif srcroot isa SubString{String} + SubString(srcroot, fbyte, thisind(srcroot, lbyte)) + else + SubString(String(stream.textbuf[fbyte:lbyte])) + end + return SourceFile(str; first_index=first_byte(stream), kws...) end """ diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index a59839ca7927c..9ca609d8b7c20 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -3561,7 +3561,8 @@ function parse_atom(ps::ParseState, check_identifiers=true) msg = leading_kind == K"EndMarker" ? "premature end of input" : "unexpected `$(untokenize(leading_kind))`" - bump_invisible(ps, K"error", error=msg) + emit_diagnostic(ps, error=msg) + bump_invisible(ps, K"error") else bump(ps, error="invalid syntax atom") end diff --git a/JuliaSyntax/src/source_files.jl b/JuliaSyntax/src/source_files.jl index 2ba33e1de6de2..72811077b7c5a 100644 --- a/JuliaSyntax/src/source_files.jl +++ b/JuliaSyntax/src/source_files.jl @@ -219,7 +219,9 @@ function highlight(io::IO, source::SourceFile, range::UnitRange; print(io, source[x:p-1]) _printstyled(io, hitext; bgcolor=color) print(io, source[q+1:d]) - source[thisind(source, d)] == '\n' || print(io, "\n") + if d >= firstindex(source) && source[thisind(source, d)] != '\n' + print(io, "\n") + end _print_marker_line(io, source[a:p-1], hitext, true, true, marker_line_color, note, notecolor) else # x -------------- diff --git a/JuliaSyntax/test/diagnostics.jl b/JuliaSyntax/test/diagnostics.jl index 9bba7bb30eab1..423bb8824c0d4 100644 --- a/JuliaSyntax/test/diagnostics.jl +++ b/JuliaSyntax/test/diagnostics.jl @@ -1,6 +1,6 @@ -function diagnostic(str; only_first=false, allow_multiple=false) +function diagnostic(str; only_first=false, allow_multiple=false, rule=:all) stream = ParseStream(str) - parse!(stream) + parse!(stream, rule=rule) if allow_multiple stream.diagnostics else @@ -52,7 +52,11 @@ end Diagnostic(6, 5, :error, "invalid macro name") @test diagnostic("a, , b") == - Diagnostic(4, 3, :error, "unexpected `,`") + Diagnostic(4, 4, :error, "unexpected `,`") + @test diagnostic(")", allow_multiple=true) == [ + Diagnostic(1, 1, :error, "unexpected `)`") + Diagnostic(1, 1, :error, "extra tokens after end of expression") + ] @test diagnostic("if\nfalse\nend") == Diagnostic(3, 3, :error, "missing condition in `if`") @@ -99,6 +103,11 @@ end @test diagnostic("\"\$(x,y)\"") == Diagnostic(3, 7, :error, "invalid interpolation syntax") + + @test diagnostic("", rule=:statement) == + Diagnostic(1, 0, :error, "premature end of input") + @test diagnostic("", rule=:atom) == + Diagnostic(1, 0, :error, "premature end of input") end @testset "parser warnings" begin diff --git a/JuliaSyntax/test/source_files.jl b/JuliaSyntax/test/source_files.jl index 374a858ba14f9..9a1548f1009d5 100644 --- a/JuliaSyntax/test/source_files.jl +++ b/JuliaSyntax/test/source_files.jl @@ -115,6 +115,9 @@ end @test sprint(highlight, SourceFile("a\nα"), 1:4) == "┌\na\nα\n┘" @test sprint(highlight, SourceFile("a\nb\nα"), 3:3) == "a\nb\n╙\nα" + # empty files + @test sprint(highlight, SourceFile(""), 1:0) == "└" + # Multi-line ranges @test sprint(highlight, src, 1:7) == """ ┌─── From a64867b5ed8bdd824bbbb390f3509efbb82aa8f3 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sun, 2 Jul 2023 06:59:53 +1000 Subject: [PATCH 0698/1109] Fix parse error with broken do syntax (JuliaLang/JuliaSyntax.jl#323) --- JuliaSyntax/src/expr.jl | 7 +++++-- JuliaSyntax/test/expr.jl | 2 ++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/JuliaSyntax/src/expr.jl b/JuliaSyntax/src/expr.jl index bade833ed3823..bfc2f391e857d 100644 --- a/JuliaSyntax/src/expr.jl +++ b/JuliaSyntax/src/expr.jl @@ -395,8 +395,7 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads, # as inert QuoteNode rather than in `Expr(:quote)` quasiquote return QuoteNode(a1) end - elseif k == K"do" - @check length(args) == 3 + elseif k == K"do" && length(args) == 3 return Expr(:do, args[1], Expr(:->, args[2], args[3])) elseif k == K"let" a1 = args[1] @@ -439,6 +438,10 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads, args[i] = ai.value end end + elseif k == K"wrapper" + # This should only happen for errors wrapped next to what should have + # been single statements or atoms - represent these as blocks. + headsym = :block end return Expr(headsym, args...) diff --git a/JuliaSyntax/test/expr.jl b/JuliaSyntax/test/expr.jl index 073cd29dcabf3..8e6e37eac5f15 100644 --- a/JuliaSyntax/test/expr.jl +++ b/JuliaSyntax/test/expr.jl @@ -700,6 +700,8 @@ LineNumberNode(1), Expr(:error, :b)) @test parsestmt("(x", ignore_errors=true) == Expr(:block, :x, Expr(:error)) + @test parsestmt("x do", ignore_errors=true) == + Expr(:block, :x, Expr(:error, Expr(:do))) end @testset "import" begin From 06da7add48c2da88ae48a3061e0524ca26771513 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sun, 2 Jul 2023 09:50:24 +1000 Subject: [PATCH 0699/1109] Clean up README a bit Add some links to external documents and update the status section a bit. Fixes JuliaLang/JuliaSyntax.jl#312. --- JuliaSyntax/README.md | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index ad49ef6d14c12..2865b154a299d 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -10,14 +10,19 @@ more information. ### Status -JuliaSyntax.jl is highly compatible with the Julia reference parser: It parses -all of Base, the standard libraries and General registry. Some minor difference -remain where we've decided to fix bugs or strange behaviors in the reference -parser. +JuliaSyntax.jl is used as the new default Julia parser in Julia 1.10. +It's highly compatible with Julia's older +[femtolisp-based parser](https://github.com/JuliaLang/julia/blob/master/src/julia-parser.scm) - +It parses all of Base, the standard libraries and General registry. Some minor +difference remain where we've decided to fix bugs or strange behaviors in the +reference parser. -The tree data structures are usable but their APIs will evolve as we try out -various use cases. Converting to `Expr` is always possible and will be stable -if that helps for your use case. +The AST and tree data structures are usable but their APIs will evolve as we +try out various use cases. Parsing to the standard `Expr` AST is always +possible and will be stable. + +The intention is to extend this library over time to cover more of the Julia +compiler frontend. # Getting involved @@ -27,8 +32,10 @@ https://github.com/JuliaLang/JuliaSyntax.jl/issues and choosing a small issue or two to work on to familiarize yourself with the code. Anything marked with the labels `intro issue` or `bug` might be a good place to start. -Also watching the 2022 JuliaCon talk and reading this document is probably good -for an overview. +Also watching the [2022 JuliaCon talk](https://www.youtube.com/watch?v=CIiGng9Brrk) +and reading the [design](https://julialang.github.io/JuliaSyntax.jl/dev/design/) and +[reference](https://julialang.github.io/JuliaSyntax.jl/dev/reference/) +documentation should be good for an overview. As of May 2023, we've got really good positional tracking within the source, but JuliaSyntax really needs a better system for parser recovery before the From bdfe6309656e77959f898ec0bab09c54cd1a7918 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Tue, 4 Jul 2023 14:24:03 +1000 Subject: [PATCH 0700/1109] Update .gitignore --- JuliaSyntax/.gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/JuliaSyntax/.gitignore b/JuliaSyntax/.gitignore index c5f3e51da58b8..4681ba2d4e249 100644 --- a/JuliaSyntax/.gitignore +++ b/JuliaSyntax/.gitignore @@ -1,4 +1,6 @@ /Manifest.toml /tools/pkgs +/tools/pkg_tars /tools/logs.txt +/docs/build *.cov From 7712eebbe0620bd54146812afe3d9cff2f4ef167 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sun, 9 Jul 2023 07:17:45 +1000 Subject: [PATCH 0701/1109] AST: parse `a.b` as `(. a b)` (JuliaLang/JuliaSyntax.jl#325) I was working a bit on macro expansion - particularly `quote` (quasiquote) expansion with `$` interpolations - and I've found that it's weird and inconvenient that we parse `a.b` into `(. a (quote b))`. Specifically, the part that's weird here is that we emit `(quote b)` for the field name even though this is "not quote syntax": this should not yield a syntax literal during lowering, and is thus a semantic mismatch with actual quote syntax of the form `:(a + b)` or `quote a+b end`. * Why is this a problem? It means we need special rules to distinguish actual syntax literals from field names. * But can we really change this? Surely this AST form had a purpose? Yes! A long time ago Julia supported `a.(b)` syntax to mean `getfield(a, b)`, which would naturally have been parsed as `(. a b)`. However this was deprecated as part of adding broadcast syntax in https://github.com/JuliaLang/julia/pull/15032 Here we simplify by parsing `a.b` as `(. a b)` instead, with the second argument implied to be a field name. --- JuliaSyntax/docs/src/reference.md | 1 + JuliaSyntax/src/expr.jl | 15 +++++-- JuliaSyntax/src/parser.jl | 66 ++++++++++++++----------------- JuliaSyntax/test/expr.jl | 13 ++++-- JuliaSyntax/test/parser.jl | 60 ++++++++++++++-------------- 5 files changed, 81 insertions(+), 74 deletions(-) diff --git a/JuliaSyntax/docs/src/reference.md b/JuliaSyntax/docs/src/reference.md index 22322b55381d5..67ced3f1498a0 100644 --- a/JuliaSyntax/docs/src/reference.md +++ b/JuliaSyntax/docs/src/reference.md @@ -64,6 +64,7 @@ class of tokenization errors and lets the parser deal with them. ### Improvements for AST inconsistencies +* Field access syntax like `a.b` is parsed as `(. a b)` rather than `(. a (quote b))` to avoid the inconsistency between this and actual quoted syntax literals like `:(b)` and `quote b end` ([#342](https://github.com/JuliaLang/JuliaSyntax.jl/issues/324)) * Dotted call syntax like `f.(a,b)` and `a .+ b` has been made consistent with the `K"dotcall"` head (#90) * Standalone dotted operators are always parsed as `(. op)`. For example `.*(x,y)` is parsed as `(call (. *) x y)` (#240) * The `K"="` kind is used for keyword syntax rather than `kw`, to avoid various inconsistencies and ambiguities (#103) diff --git a/JuliaSyntax/src/expr.jl b/JuliaSyntax/src/expr.jl index bfc2f391e857d..f674b984e2bbc 100644 --- a/JuliaSyntax/src/expr.jl +++ b/JuliaSyntax/src/expr.jl @@ -259,10 +259,17 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads, args[1] = Symbol(".", args[1]) end end - elseif k == K"." && length(args) == 1 && is_operator(childheads[1]) - # Hack: Here we preserve the head of the operator to determine whether - # we need to coalesce it with the dot into a single symbol later on. - args[1] = (childheads[1], args[1]) + elseif k == K"." + if length(args) == 2 + a2 = args[2] + if !@isexpr(a2, :quote) && !(a2 isa QuoteNode) + args[2] = QuoteNode(a2) + end + elseif length(args) == 1 && is_operator(childheads[1]) + # Hack: Here we preserve the head of the operator to determine whether + # we need to coalesce it with the dot into a single symbol later on. + args[1] = (childheads[1], args[1]) + end elseif k == K"ref" || k == K"curly" # Move parameters blocks to args[2] _reorder_parameters!(args, 2) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 9ca609d8b7c20..042e811d8c405 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -1437,7 +1437,7 @@ end # * Adjoint suffix like a' # * String macros like a"str" b"""str""" c`str` d```str``` # -# f(a).g(b) ==> (call (. (call f a) (quote g)) b) +# f(a).g(b) ==> (call (. (call f a) g) b) # # flisp: parse-call-chain, parse-call-with-initial-ex function parse_call_chain(ps::ParseState, mark, is_macrocall=false) @@ -1448,7 +1448,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) end # source range of the @-prefixed part of a macro macro_atname_range = nothing - # $A.@x ==> (macrocall (. ($ A) (quote @x))) + # $A.@x ==> (macrocall (. ($ A) @x)) maybe_strmac = true # We record the last component of chains of dot-separated identifiers so we # know which identifier was the macro name. @@ -1470,22 +1470,22 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) # [@foo x] ==> (vect (macrocall @foo x)) # [@foo] ==> (vect (macrocall @foo)) # @var"#" a ==> (macrocall (var @#) a) - # A.@x y ==> (macrocall (. A (quote @x)) y) - # A.@var"#" a ==> (macrocall (. A (quote (var @#))) a) + # A.@x y ==> (macrocall (. A @x) y) + # A.@var"#" a ==> (macrocall (. A (var @#)) a) # @+x y ==> (macrocall @+ x y) - # A.@.x ==> (macrocall (. A (quote @.)) x) + # A.@.x ==> (macrocall (. A @.) x) fix_macro_name_kind!(ps, macro_name_position) let ps = with_space_sensitive(ps) # Space separated macro arguments - # A.@foo a b ==> (macrocall (. A (quote @foo)) a b) - # @A.foo a b ==> (macrocall (. A (quote @foo)) a b) + # A.@foo a b ==> (macrocall (. A @foo) a b) + # @A.foo a b ==> (macrocall (. A @foo) a b) n_args = parse_space_separated_exprs(ps) is_doc_macro = peek_behind(ps, macro_name_position).orig_kind == K"doc" if is_doc_macro && n_args == 1 # Parse extended @doc args on next line # @doc x\ny ==> (macrocall @doc x y) - # A.@doc x\ny ==> (macrocall (. A (quote @doc)) doc x y) - # @A.doc x\ny ==> (macrocall (. A (quote @doc)) doc x y) + # A.@doc x\ny ==> (macrocall (. A @doc) doc x y) + # @A.doc x\ny ==> (macrocall (. A @doc) doc x y) # @doc x y\nz ==> (macrocall @doc x y) # # Excluded cases @@ -1518,8 +1518,8 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) end if is_macrocall # @x(a, b) ==> (macrocall-p @x a b) - # A.@x(y) ==> (macrocall-p (. A (quote @x)) y) - # A.@x(y).z ==> (. (macrocall-p (. A (quote @x)) y) (quote z)) + # A.@x(y) ==> (macrocall-p (. A @x) y) + # A.@x(y).z ==> (. (macrocall-p (. A @x) y) z) fix_macro_name_kind!(ps, macro_name_position) is_macrocall = false macro_atname_range = nothing @@ -1535,8 +1535,8 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) # @S[a,b] ==> (macrocall @S (vect a b)) # @S[a b] ==> (macrocall @S (hcat a b)) # @S[a; b] ==> (macrocall @S (vcat a b)) - # A.@S[a] ==> (macrocall (. A (quote @S)) (vect a)) - # @S[a].b ==> (. (macrocall @S (vect a)) (quote b)) + # A.@S[a] ==> (macrocall (. A @S) (vect a)) + # @S[a].b ==> (. (macrocall @S (vect a)) b) #v1.7: @S[a ;; b] ==> (macrocall @S (ncat-2 a b)) #v1.6: @S[a ;; b] ==> (macrocall @S (error (ncat-2 a b))) fix_macro_name_kind!(ps, macro_name_position) @@ -1565,14 +1565,14 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) check_ncat_compat(ps, mark, ckind) end elseif k == K"." - # x .y ==> (. x (error-t) (quote y)) + # x .y ==> (. x (error-t) y) bump_disallowed_space(ps) emark = position(ps) if !isnothing(macro_atname_range) # Allow `@` in macrocall only in first and last position - # A.B.@x ==> (macrocall (. (. A (quote B)) (quote @x))) - # @A.B.x ==> (macrocall (. (. A (quote B)) (quote @x))) - # A.@B.x ==> (macrocall (. (. A (error-t) B) (quote @x))) + # A.B.@x ==> (macrocall (. (. A B) @x)) + # @A.B.x ==> (macrocall (. (. A B) @x)) + # A.@B.x ==> (macrocall (. (. A B (error-t)) @x)) emit_diagnostic(ps, macro_atname_range..., error="`@` must appear on first or last macro name component") bump(ps, TRIVIA_FLAG, error="Unexpected `.` after macro name") @@ -1603,28 +1603,23 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) emit(ps, m, K"quote", COLON_QUOTE) emit(ps, mark, K".") elseif k == K"$" - # f.$x ==> (. f (inert ($ x))) - # f.$(x+y) ==> (. f (inert ($ (call + x y)))) - # A.$B.@x ==> (macrocall (. (. A (inert ($ B))) (quote @x))) - # @A.$x a ==> (macrocall (. A (inert (error x))) a) + # f.$x ==> (. f ($ x)) + # f.$(x+y) ==> (. f ($ (call + x y))) + # A.$B.@x ==> (macrocall (. (. A ($ B)) @x)) + # @A.$x a ==> (macrocall (. A (error x)) a) m = position(ps) bump(ps, TRIVIA_FLAG) parse_atom(ps) emit(ps, m, K"$") macro_name_position = position(ps) - # We need `inert` rather than `quote` here for subtle reasons: - # We need the expression expander to "see through" the quote - # around the `$x` in `:(f.$x)`, so that the `$x` is expanded - # even though it's double quoted. - emit(ps, m, K"inert") emit(ps, mark, K".") elseif k == K"@" # A macro call after some prefix A has been consumed - # A.@x ==> (macrocall (. A (quote @x))) - # A.@x a ==> (macrocall (. A (quote @x)) a) + # A.@x ==> (macrocall (. A @x)) + # A.@x a ==> (macrocall (. A @x) a) m = position(ps) if is_macrocall - # @A.B.@x a ==> (macrocall (. (. A (quote B)) (quote (error-t) @x)) a) + # @A.B.@x a ==> (macrocall (. (. A B) (error-t) @x) a) bump(ps, TRIVIA_FLAG, error="repeated `@` in macro module path") else bump(ps, TRIVIA_FLAG) @@ -1633,7 +1628,6 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) parse_macro_name(ps) macro_name_position = position(ps) macro_atname_range = (m, position(ps)) - emit(ps, m, K"quote") emit(ps, mark, K".") elseif k == K"'" # TODO: Reclaim dotted postfix operators :-) @@ -1643,12 +1637,10 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) error="the .' operator for transpose is discontinued") else # Field/property syntax - # f.x.y ==> (. (. f (quote x)) (quote y)) - m = position(ps) + # f.x.y ==> (. (. f x) y) parse_atom(ps, false) macro_name_position = position(ps) maybe_strmac_1 = true - emit(ps, m, K"quote") emit(ps, mark, K".") end elseif k == K"'" && !preceding_whitespace(t) @@ -1665,8 +1657,8 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) parse_call_arglist(ps, K"}") if is_macrocall # @S{a,b} ==> (macrocall S (braces a b)) - # A.@S{a} ==> (macrocall (. A (quote @S)) (braces a)) - # @S{a}.b ==> (. (macrocall @S (braces a)) (quote b)) + # A.@S{a} ==> (macrocall (. A @S) (braces a)) + # @S{a}.b ==> (. (macrocall @S (braces a)) b) fix_macro_name_kind!(ps, macro_name_position) emit(ps, m, K"braces") emit(ps, mark, K"macrocall") @@ -2118,7 +2110,7 @@ function parse_function_signature(ps::ParseState, is_function::Bool) # function ()(x) end ==> (function (call (tuple-p) x) (block)) emit(ps, mark, K"tuple", PARENS_FLAG) else - # function (A).f() end ==> (function (call (. (parens A) (quote f))) (block)) + # function (A).f() end ==> (function (call (. (parens A) f)) (block)) # function (:)() end ==> (function (call (parens :)) (block)) # function (x::T)() end ==> (function (call (parens (::-i x T))) (block)) # function (::T)() end ==> (function (call (parens (::-pre T))) (block)) @@ -2147,7 +2139,7 @@ function parse_function_signature(ps::ParseState, is_function::Bool) # Parse function argument list # function f(x,y) end ==> (function (call f x y) (block)) # function f{T}() end ==> (function (call (curly f T)) (block)) - # function A.f() end ==> (function (call (. A (quote f))) (block)) + # function A.f() end ==> (function (call (. A f)) (block)) parse_call_chain(ps, mark) if peek_behind(ps).kind != K"call" # function f body end ==> (function (error f) (block body)) diff --git a/JuliaSyntax/test/expr.jl b/JuliaSyntax/test/expr.jl index 8e6e37eac5f15..96e711d7084cd 100644 --- a/JuliaSyntax/test/expr.jl +++ b/JuliaSyntax/test/expr.jl @@ -17,9 +17,6 @@ # Compatibility hack for VERSION >= v"1.4" # https://github.com/JuliaLang/julia/pull/34077 @test parseatom(":true") == Expr(:quote, true) - - # Handling of K"inert" - @test parsestmt("a.\$b") == Expr(:., :a, QuoteNode(Expr(:$, :b))) end @testset "Line numbers" begin @@ -386,6 +383,16 @@ Expr(:tuple, Expr(:parameters, Expr(:kw, :a, 1))) end + @testset "Field access syntax" begin + @test parsestmt("a.b") == Expr(:., :a, QuoteNode(:b)) + @test parsestmt("a.\$b") == Expr(:., :a, QuoteNode(Expr(:$, :b))) + @test parsestmt("a.:b") == Expr(:., :a, QuoteNode(:b)) + @test parsestmt("a.@b x") == Expr(:macrocall, + Expr(:., :a, QuoteNode(Symbol("@b"))), + LineNumberNode(1), + :x) + end + @testset "dotcall / dotted operators" begin @test parsestmt("f.(x,y)") == Expr(:., :f, Expr(:tuple, :x, :y)) @test parsestmt("f.(x=1)") == Expr(:., :f, Expr(:tuple, Expr(:kw, :x, 1))) diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 161323fcfcadc..9291c7f678ba0 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -312,8 +312,8 @@ tests = [ "\$f(x)" => "(call (\$ f) x)" ".&(x,y)" => "(call (. &) x y)" # parse_call_chain - "f(a).g(b)" => "(call (. (call f a) (quote g)) b)" - "\$A.@x" => "(macrocall (. (\$ A) (quote @x)))" + "f(a).g(b)" => "(call (. (call f a) g) b)" + "\$A.@x" => "(macrocall (. (\$ A) @x))" # non-errors in space sensitive contexts "[f (x)]" => "(hcat f (parens x))" @@ -322,16 +322,16 @@ tests = [ "@foo a b" => "(macrocall @foo a b)" "@foo (x)" => "(macrocall @foo (parens x))" "@foo (x,y)" => "(macrocall @foo (tuple-p x y))" - "A.@foo a b" => "(macrocall (. A (quote @foo)) a b)" - "@A.foo a b" => "(macrocall (. A (quote @foo)) a b)" + "A.@foo a b" => "(macrocall (. A @foo) a b)" + "@A.foo a b" => "(macrocall (. A @foo) a b)" "[@foo x]" => "(vect (macrocall @foo x))" "[@foo]" => "(vect (macrocall @foo))" "@var\"#\" a" => "(macrocall (var @#) a)" "@(A) x" => "(macrocall (parens @A) x)" - "A.@x y" => "(macrocall (. A (quote @x)) y)" - "A.@var\"#\" a"=> "(macrocall (. A (quote (var @#))) a)" + "A.@x y" => "(macrocall (. A @x) y)" + "A.@var\"#\" a"=> "(macrocall (. A (var @#)) a)" "@+x y" => "(macrocall @+ x y)" - "A.@.x" => "(macrocall (. A (quote @.)) x)" + "A.@.x" => "(macrocall (. A @.) x)" # Macro names "@! x" => "(macrocall @! x)" "@.. x" => "(macrocall @.. x)" @@ -339,8 +339,8 @@ tests = [ "@[x] y z" => "(macrocall (error (vect x)) y z)" # Special @doc parsing rules "@doc x\ny" => "(macrocall @doc x y)" - "A.@doc x\ny" => "(macrocall (. A (quote @doc)) x y)" - "@A.doc x\ny" => "(macrocall (. A (quote @doc)) x y)" + "A.@doc x\ny" => "(macrocall (. A @doc) x y)" + "@A.doc x\ny" => "(macrocall (. A @doc) x y)" "@doc x y\nz" => "(macrocall @doc x y)" "@doc x\n\ny" => "(macrocall @doc x)" "@doc x\nend" => "(macrocall @doc x)" @@ -352,8 +352,8 @@ tests = [ "(a=1)()" => "(call (parens (= a 1)))" "f (a)" => "(call f (error-t) a)" "@x(a, b)" => "(macrocall-p @x a b)" - "A.@x(y)" => "(macrocall-p (. A (quote @x)) y)" - "A.@x(y).z" => "(. (macrocall-p (. A (quote @x)) y) (quote z))" + "A.@x(y)" => "(macrocall-p (. A @x) y)" + "A.@x(y).z" => "(. (macrocall-p (. A @x) y) z)" # do "f() do\nend" => "(do (call f) (tuple) (block))" "f() do ; body end" => "(do (call f) (tuple) (block body))" @@ -364,8 +364,8 @@ tests = [ "@S[a,b]" => "(macrocall @S (vect a b))" "@S[a b]" => "(macrocall @S (hcat a b))" "@S[a; b]" => "(macrocall @S (vcat a b))" - "A.@S[a]" => "(macrocall (. A (quote @S)) (vect a))" - "@S[a].b" => "(. (macrocall @S (vect a)) (quote b))" + "A.@S[a]" => "(macrocall (. A @S) (vect a))" + "@S[a].b" => "(. (macrocall @S (vect a)) b)" ((v=v"1.7",), "@S[a ;; b]") => "(macrocall @S (ncat-2 a b))" ((v=v"1.6",), "@S[a ;; b]") => "(macrocall @S (error (ncat-2 a b)))" "a[i]" => "(ref a i)" @@ -383,9 +383,9 @@ tests = [ # Dotted forms # Allow `@` in macrocall only in first and last position - "A.B.@x" => "(macrocall (. (. A (quote B)) (quote @x)))" - "@A.B.x" => "(macrocall (. (. A (quote B)) (quote @x)))" - "A.@B.x" => "(macrocall (. (. A (quote B)) (error-t) (quote @x)))" + "A.B.@x" => "(macrocall (. (. A B) @x))" + "@A.B.x" => "(macrocall (. (. A B) @x))" + "A.@B.x" => "(macrocall (. (. A B) (error-t) @x))" "@M.(x)" => "(macrocall (dotcall @M (error-t) x))" "f.(a,b)" => "(dotcall f a b)" "f.(a=1; b=2)" => "(dotcall f (= a 1) (parameters (= b 2)))" @@ -395,27 +395,27 @@ tests = [ "A.:+" => "(. A (quote-: +))" "A.:.+" => "(. A (quote-: (. +)))" "A.: +" => "(. A (quote-: (error-t) +))" - "f.\$x" => "(. f (inert (\$ x)))" - "f.\$(x+y)" => "(. f (inert (\$ (parens (call-i x + y)))))" - "A.\$B.@x" => "(macrocall (. (. A (inert (\$ B))) (quote @x)))" - "@A.\$x a" => "(macrocall (. A (inert (error x))) a)" - "A.@x" => "(macrocall (. A (quote @x)))" - "A.@x a" => "(macrocall (. A (quote @x)) a)" - "@A.B.@x a" => "(macrocall (. (. A (quote B)) (quote (error-t) @x)) a)" + "f.\$x" => "(. f (\$ x))" + "f.\$(x+y)" => "(. f (\$ (parens (call-i x + y))))" + "A.\$B.@x" => "(macrocall (. (. A (\$ B)) @x))" + "@A.\$x a" => "(macrocall (. A (error x)) a)" + "A.@x" => "(macrocall (. A @x))" + "A.@x a" => "(macrocall (. A @x) a)" + "@A.B.@x a" => "(macrocall (. (. A B) (error-t) @x) a)" # .' discontinued "f.'" => "(wrapper f (error-t '))" # Field/property syntax - "f.x.y" => "(. (. f (quote x)) (quote y))" - "x .y" => "(. x (error-t) (quote y))" + "f.x.y" => "(. (. f x) y)" + "x .y" => "(. x (error-t) y)" # Adjoint "f'" => "(call-post f ')" "f'ᵀ" => "(call-post f 'ᵀ)" # Curly calls "S {a}" => "(curly S (error-t) a)" - "A.@S{a}" => "(macrocall (. A (quote @S)) (braces a))" + "A.@S{a}" => "(macrocall (. A @S) (braces a))" "@S{a,b}" => "(macrocall @S (braces a b))" - "A.@S{a}" => "(macrocall (. A (quote @S)) (braces a))" - "@S{a}.b" => "(. (macrocall @S (braces a)) (quote b))" + "A.@S{a}" => "(macrocall (. A @S) (braces a))" + "@S{a}.b" => "(. (macrocall @S (braces a)) b)" "S{a,b}" => "(curly S a b)" # String macros "x\"str\"" => """(macrocall @x_str (string-r "str"))""" @@ -554,7 +554,7 @@ tests = [ "function (x=1) end" => "(function (tuple-p (= x 1)) (block))" "function (;x=1) end" => "(function (tuple-p (parameters (= x 1))) (block))" "function ()(x) end" => "(function (call (tuple-p) x) (block))" - "function (A).f() end" => "(function (call (. (parens A) (quote f))) (block))" + "function (A).f() end" => "(function (call (. (parens A) f)) (block))" "function (:)() end" => "(function (call (parens :)) (block))" "function (x::T)() end"=> "(function (call (parens (::-i x T))) (block))" "function (::g(x))() end" => "(function (call (parens (::-pre (call g x)))) (block))" @@ -575,7 +575,7 @@ tests = [ # Function argument list "function f(x,y) end" => "(function (call f x y) (block))" "function f{T}() end" => "(function (call (curly f T)) (block))" - "function A.f() end" => "(function (call (. A (quote f))) (block))" + "function A.f() end" => "(function (call (. A f)) (block))" "function f body end" => "(function (error f) (block body))" "function f()::T end" => "(function (::-i (call f) T) (block))" "function f()::g(T) end" => "(function (::-i (call f) (call g T)) (block))" From 780da439d16225aca506f8ab8ec35435d0181a3b Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sun, 9 Jul 2023 07:35:01 +1000 Subject: [PATCH 0702/1109] AST: Rearrange `do` to sit inside `call`/`macrocall` (JuliaLang/JuliaSyntax.jl#322) `do` syntax is represented in `Expr` with the `do` outside the call. This makes some sense syntactically (do appears as "an operator" after the function call). However semantically this nesting is awkward because the lambda represented by the do block is passed to the call. This same problem occurs for the macro form `@f(x) do \n body end` where the macro expander needs a special rule to expand nestings of the form `Expr(:do, Expr(:macrocall ...), ...)`, rearranging the expression which are passed to this macro call rather than passing the expressions up the tree. In this PR, we change the parsing of @f(x, y) do a, b\n body\n end f(x, y) do a, b\n body\n end to tack the `do` onto the end of the call argument list: (macrocall @f x y (do (tuple a b) body)) (call f x y (do (tuple a b) body)) This achieves the following desirable properties 1. Content of `do` is nested inside the call which improves the match between AST and semantics 2. Macro can be passed the syntax as-is rather than the macro expander rearranging syntax before passing it to the macro 3. In the future, a macro can detect when it's being passed do syntax rather than lambda syntax 4. `do` head is used uniformly for both call and macrocall 5. We preserve the source ordering properties we need for the green tree. --- JuliaSyntax/docs/src/reference.md | 42 ++++++++++++++++++++----------- JuliaSyntax/src/expr.jl | 23 +++++++++++++++-- JuliaSyntax/src/parser.jl | 17 +++++++------ JuliaSyntax/test/expr.jl | 32 +++++++++++++++++++++-- JuliaSyntax/test/parser.jl | 9 ++++--- 5 files changed, 93 insertions(+), 30 deletions(-) diff --git a/JuliaSyntax/docs/src/reference.md b/JuliaSyntax/docs/src/reference.md index 67ced3f1498a0..bb9d3959aa4db 100644 --- a/JuliaSyntax/docs/src/reference.md +++ b/JuliaSyntax/docs/src/reference.md @@ -43,7 +43,7 @@ the source text more closely. * The right hand side of `x where {T}` retains the `K"braces"` node around the `T` to distinguish it from `x where T`. * Ternary syntax is not immediately lowered to an `if` node: `a ? b : c` parses as `(? a b c)` rather than `Expr(:if, :a, :b, :c)` (#85) * `global const` and `const global` are not normalized by the parser. This is done in `Expr` conversion (#130) -* The AST for `do` is flatter and not lowered to a lambda by the parser: `f(x) do y ; body end` is parsed as `(do (call f x) (tuple y) (block body))` (#98) +* [`do` syntax](#Do-blocks) is nested as the last child of the call which the `do` lambda will be passed to (#98, #322) * `@.` is not lowered to `@__dot__` inside the parser (#146) * Docstrings use the `K"doc"` kind, and are not lowered to `Core.@doc` until later (#217) * Juxtaposition uses the `K"juxtapose"` kind rather than lowering immediately to `*` (#220) @@ -78,7 +78,6 @@ class of tokenization errors and lets the parser deal with them. * We use flags rather than child nodes to represent the difference between `struct` and `mutable struct`, `module` and `baremodule` (#220) * Multiple iterations within the header of a `for`, as in `for a=as, b=bs body end` are represented with a `cartesian_iterator` head rather than a `block`, as these lists of iterators are neither semantically nor syntactically a sequence of statements. Unlike other uses of `block` (see also generators). - ## More detail on tree differences ### Generators @@ -196,23 +195,38 @@ The same goes for command strings which are always wrapped in `K"cmdstring"` regardless of whether they have multiple pieces (due to triple-quoted dedenting) or otherwise. -### No desugaring of the closure in do blocks +### Do blocks -The reference parser represents `do` syntax with a closure for the second -argument. That is, +`do` syntax is represented in the `Expr` AST with the `do` outside the call. +This makes some sense syntactically (do appears as "an operator" after the +function call). -```julia -f(x) do y - body -end -``` +However semantically this nesting is awkward because the lambda represented by +the do block is passed to the call. This same problem occurs for the macro form +`@f(x) do \n body end` where the macro expander needs a special rule to expand +nestings of the form `Expr(:do, Expr(:macrocall ...), ...)`, rearranging the +expression which are passed to this macro call rather than passing the +expressions up the tree. + +The implied closure is also lowered to a nested `Expr(:->)` expression, though +it this somewhat premature to do this during parsing. + +To resolve these problems we parse + + @f(x, y) do a, b\n body\n end + f(x, y) do a, b\n body\n end -becomes `(do (call f x) (-> (tuple y) (block body)))` in the reference parser. +by tacking the `do` onto the end of the call argument list: -However, the nested closure with `->` head is implied here rather than present -in the surface syntax, which suggests this is a premature desugaring step. -Instead we emit the flatter structure `(do (call f x) (tuple y) (block body))`. + (macrocall @f x y (do (tuple a b) body)) + (call f x y (do (tuple a b) body)) +This achieves the following desirable properties +1. Content of `do` is nested inside the call which improves the match between AST and semantics +2. Macro can be passed the syntax as-is rather than the macro expander rearranging syntax before passing it to the macro +3. In the future, a macro can detect when it's being passed do syntax rather than lambda syntax +4. `do` head is used uniformly for both call and macrocall +5. We preserve the source ordering properties we need for the green tree. ## Tree structure reference diff --git a/JuliaSyntax/src/expr.jl b/JuliaSyntax/src/expr.jl index f674b984e2bbc..d5cef886bf1f3 100644 --- a/JuliaSyntax/src/expr.jl +++ b/JuliaSyntax/src/expr.jl @@ -184,6 +184,16 @@ function _fixup_Expr_children!(head, loc, args) return args end +# Remove the `do` block from the final position in a function/macro call arg list +function _extract_do_lambda!(args) + if length(args) > 1 && Meta.isexpr(args[end], :do_lambda) + do_ex = pop!(args)::Expr + return Expr(:->, do_ex.args...) + else + return nothing + end +end + # Convert internal node of the JuliaSyntax parse tree to an Expr function _internal_node_to_Expr(source, srcrange, head, childranges, childheads, args) k = kind(head) @@ -217,8 +227,12 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads, end end elseif k == K"macrocall" + do_lambda = _extract_do_lambda!(args) _reorder_parameters!(args, 2) insert!(args, 2, loc) + if do_lambda isa Expr + return Expr(:do, Expr(headsym, args...), do_lambda) + end elseif k == K"block" || (k == K"toplevel" && !has_flags(head, TOPLEVEL_SEMICOLONS_FLAG)) if isempty(args) push!(args, loc) @@ -247,6 +261,7 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads, popfirst!(args) headsym = Symbol("'") end + do_lambda = _extract_do_lambda!(args) # Move parameters blocks to args[2] _reorder_parameters!(args, 2) if headsym === :dotcall @@ -259,6 +274,9 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads, args[1] = Symbol(".", args[1]) end end + if do_lambda isa Expr + return Expr(:do, Expr(headsym, args...), do_lambda) + end elseif k == K"." if length(args) == 2 a2 = args[2] @@ -402,8 +420,9 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads, # as inert QuoteNode rather than in `Expr(:quote)` quasiquote return QuoteNode(a1) end - elseif k == K"do" && length(args) == 3 - return Expr(:do, args[1], Expr(:->, args[2], args[3])) + elseif k == K"do" + # Temporary head which is picked up by _extract_do_lambda + headsym = :do_lambda elseif k == K"let" a1 = args[1] if @isexpr(a1, :block) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 042e811d8c405..0cc9383fb5810 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -1510,12 +1510,12 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) bump_disallowed_space(ps) bump(ps, TRIVIA_FLAG) parse_call_arglist(ps, K")") - emit(ps, mark, is_macrocall ? K"macrocall" : K"call", - is_macrocall ? PARENS_FLAG : EMPTY_FLAGS) if peek(ps) == K"do" - # f(x) do y body end ==> (do (call f x) (tuple y) (block body)) - parse_do(ps, mark) + # f(x) do y body end ==> (call f x (do (tuple y) (block body))) + parse_do(ps) end + emit(ps, mark, is_macrocall ? K"macrocall" : K"call", + is_macrocall ? PARENS_FLAG : EMPTY_FLAGS) if is_macrocall # @x(a, b) ==> (macrocall-p @x a b) # A.@x(y) ==> (macrocall-p (. A @x) y) @@ -2266,18 +2266,19 @@ function parse_catch(ps::ParseState) end # flisp: parse-do -function parse_do(ps::ParseState, mark) +function parse_do(ps::ParseState) + mark = position(ps) bump(ps, TRIVIA_FLAG) # do ps = normal_context(ps) m = position(ps) if peek(ps) in KSet"NewlineWs ;" - # f() do\nend ==> (do (call f) (tuple) (block)) - # f() do ; body end ==> (do (call f) (tuple) (block body)) + # f() do\nend ==> (call f (do (tuple) (block))) + # f() do ; body end ==> (call f (do (tuple) (block body))) # this trivia needs to go into the tuple due to the way position() # works. bump(ps, TRIVIA_FLAG) else - # f() do x, y\n body end ==> (do (call f) (tuple x y) (block body)) + # f() do x, y\n body end ==> (call f (do (tuple x y) (block body))) parse_comma_separated(ps, parse_range) end emit(ps, m, K"tuple") diff --git a/JuliaSyntax/test/expr.jl b/JuliaSyntax/test/expr.jl index 96e711d7084cd..810390c8fb585 100644 --- a/JuliaSyntax/test/expr.jl +++ b/JuliaSyntax/test/expr.jl @@ -296,11 +296,39 @@ @testset "do block conversion" begin @test parsestmt("f(x) do y\n body end") == - Expr(:do, Expr(:call, :f, :x), + Expr(:do, + Expr(:call, :f, :x), Expr(:->, Expr(:tuple, :y), Expr(:block, LineNumberNode(2), :body))) + + @test parsestmt("@f(x) do y body end") == + Expr(:do, + Expr(:macrocall, Symbol("@f"), LineNumberNode(1), :x), + Expr(:->, Expr(:tuple, :y), + Expr(:block, + LineNumberNode(1), + :body))) + + @test parsestmt("f(x; a=1) do y body end") == + Expr(:do, + Expr(:call, :f, Expr(:parameters, Expr(:kw, :a, 1)), :x), + Expr(:->, Expr(:tuple, :y), + Expr(:block, + LineNumberNode(1), + :body))) + + # Test calls with do inside them + @test parsestmt("g(f(x) do y\n body end)") == + Expr(:call, + :g, + Expr(:do, + Expr(:call, :f, :x), + Expr(:->, Expr(:tuple, :y), + Expr(:block, + LineNumberNode(2), + :body)))) end @testset "= to Expr(:kw) conversion" begin @@ -708,7 +736,7 @@ @test parsestmt("(x", ignore_errors=true) == Expr(:block, :x, Expr(:error)) @test parsestmt("x do", ignore_errors=true) == - Expr(:block, :x, Expr(:error, Expr(:do))) + Expr(:block, :x, Expr(:error, Expr(:do_lambda))) end @testset "import" begin diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 9291c7f678ba0..992284c9c46b0 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -355,10 +355,11 @@ tests = [ "A.@x(y)" => "(macrocall-p (. A @x) y)" "A.@x(y).z" => "(. (macrocall-p (. A @x) y) z)" # do - "f() do\nend" => "(do (call f) (tuple) (block))" - "f() do ; body end" => "(do (call f) (tuple) (block body))" - "f() do x, y\n body end" => "(do (call f) (tuple x y) (block body))" - "f(x) do y body end" => "(do (call f x) (tuple y) (block body))" + "f() do\nend" => "(call f (do (tuple) (block)))" + "f() do ; body end" => "(call f (do (tuple) (block body)))" + "f() do x, y\n body end" => "(call f (do (tuple x y) (block body)))" + "f(x) do y body end" => "(call f x (do (tuple y) (block body)))" + "@f(x) do y body end" => "(macrocall-p @f x (do (tuple y) (block body)))" # square brackets "@S[a,b]" => "(macrocall @S (vect a b))" From 9255df7b43d0b701dcdfb9ccf8b883cc63126059 Mon Sep 17 00:00:00 2001 From: "Sergio A. Vargas" Date: Mon, 10 Jul 2023 23:27:22 -0500 Subject: [PATCH 0703/1109] Fix code block indentation (JuliaLang/JuliaSyntax.jl#326) The code block has to be indented at the same level as the list elements, otherwise Documenter won't parse it correctly. --- JuliaSyntax/docs/src/design.md | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/JuliaSyntax/docs/src/design.md b/JuliaSyntax/docs/src/design.md index 737dc31b7d2bb..1644883cc91c8 100644 --- a/JuliaSyntax/docs/src/design.md +++ b/JuliaSyntax/docs/src/design.md @@ -327,16 +327,16 @@ parsing `key=val` pairs inside parentheses. * `let` bindings might be stored in a block, or they might not be, depending on special cases: - ``` - # Special cases not in a block - let x=1 ; end ==> (let (= x 1) (block)) - let x::1 ; end ==> (let (:: x 1) (block)) - let x ; end ==> (let x (block)) - - # In a block - let x=1,y=2 ; end ==> (let (block (= x 1) (= y 2) (block))) - let x+=1 ; end ==> (let (block (+= x 1)) (block)) - ``` + ```julia + # Special cases not in a block + let x=1 ; end # ==> (let (= x 1) (block)) + let x::1 ; end # ==> (let (:: x 1) (block)) + let x ; end # ==> (let x (block)) + + # In a block + let x=1,y=2 ; end # ==> (let (block (= x 1) (= y 2) (block))) + let x+=1 ; end # ==> (let (block (+= x 1)) (block)) + ``` * The `elseif` condition is always in a block but not the `if` condition. Presumably because of the need to add a line number node in the flisp parser From ad69b5677056d29406811f040246d4e2cf7e1d4d Mon Sep 17 00:00:00 2001 From: Lilith Orion Hafner Date: Sun, 30 Jul 2023 12:03:30 -0400 Subject: [PATCH 0704/1109] Add public keyword (JuliaLang/JuliaSyntax.jl#320) Add public as a contextual keyword that is parsed as a keyword only when it is both at the top-level and not followed by `(`, `=`, or `[`. Aside from this, the `public` keyword uses the same syntax as the `export` keyword and lowers analogously. Emit a warning when parsing `public` at the top-level followed by a `(`, `=`, or `[`. Co-authored-by: Claire Foster --- JuliaSyntax/src/kinds.jl | 1 + JuliaSyntax/src/parser.jl | 26 ++++++++++++++++++++++---- JuliaSyntax/src/tokenize.jl | 1 + JuliaSyntax/test/diagnostics.jl | 10 +++++++--- JuliaSyntax/test/parser.jl | 33 +++++++++++++++++++++++++++++++-- JuliaSyntax/test/tokenize.jl | 1 + 6 files changed, 63 insertions(+), 9 deletions(-) diff --git a/JuliaSyntax/src/kinds.jl b/JuliaSyntax/src/kinds.jl index 54f37e88e5b0d..6de2f26a0cd2f 100644 --- a/JuliaSyntax/src/kinds.jl +++ b/JuliaSyntax/src/kinds.jl @@ -69,6 +69,7 @@ const _kind_names = "mutable" "outer" "primitive" + "public" "type" "var" "END_CONTEXTUAL_KEYWORDS" diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 0cc9383fb5810..96009f25618ac 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -482,7 +482,7 @@ end # flisp: parse-stmts function parse_stmts(ps::ParseState) mark = position(ps) - do_emit = parse_Nary(ps, parse_docstring, (K";",), (K"NewlineWs",)) + do_emit = parse_Nary(ps, parse_public, (K";",), (K"NewlineWs",)) # check for unparsed junk after an expression junk_mark = position(ps) while peek(ps) ∉ KSet"EndMarker NewlineWs" @@ -499,6 +499,24 @@ function parse_stmts(ps::ParseState) end end +# Parse `public foo, bar` +# +# We *only* call this from toplevel contexts (file and module level) for +# compatibility. In the future we should probably make public a full fledged +# keyword like `export`. +function parse_public(ps::ParseState) + if ps.stream.version >= (1, 11) && peek(ps) == K"public" + if peek(ps, 2) ∈ KSet"( = [" + # this branch is for compatibility with use of public as a non-keyword. + # it should be removed at some point. + emit_diagnostic(ps, warning="using public as an identifier is deprecated") + else + return parse_resword(ps) + end + end + parse_docstring(ps) +end + # Parse docstrings attached by a space or single newline # # flisp: parse-docstring @@ -1958,11 +1976,11 @@ function parse_resword(ps::ParseState) end # module A \n a \n b \n end ==> (module A (block a b)) # module A \n "x"\na \n end ==> (module A (block (doc (string "x") a))) - parse_block(ps, parse_docstring) + parse_block(ps, parse_public) bump_closing_token(ps, K"end") emit(ps, mark, K"module", word == K"baremodule" ? BARE_MODULE_FLAG : EMPTY_FLAGS) - elseif word == K"export" + elseif word in KSet"export public" # export a ==> (export a) # export @a ==> (export @a) # export a, \n @b ==> (export a @b) @@ -1971,7 +1989,7 @@ function parse_resword(ps::ParseState) # export \$a, \$(a*b) ==> (export (\$ a) (\$ (parens (call-i a * b)))) bump(ps, TRIVIA_FLAG) parse_comma_separated(ps, x->parse_atsym(x, false)) - emit(ps, mark, K"export") + emit(ps, mark, word) elseif word in KSet"import using" parse_imports(ps) elseif word == K"do" diff --git a/JuliaSyntax/src/tokenize.jl b/JuliaSyntax/src/tokenize.jl index 7f54a980bf342..739a24c6ff1a9 100644 --- a/JuliaSyntax/src/tokenize.jl +++ b/JuliaSyntax/src/tokenize.jl @@ -1344,6 +1344,7 @@ K"let", K"local", K"macro", K"module", +K"public", K"quote", K"return", K"struct", diff --git a/JuliaSyntax/test/diagnostics.jl b/JuliaSyntax/test/diagnostics.jl index 423bb8824c0d4..ea2feb37a69f6 100644 --- a/JuliaSyntax/test/diagnostics.jl +++ b/JuliaSyntax/test/diagnostics.jl @@ -1,5 +1,5 @@ -function diagnostic(str; only_first=false, allow_multiple=false, rule=:all) - stream = ParseStream(str) +function diagnostic(str; only_first=false, allow_multiple=false, rule=:all, version=v"1.6") + stream = ParseStream(str; version=version) parse!(stream, rule=rule) if allow_multiple stream.diagnostics @@ -127,8 +127,12 @@ end Diagnostic(10, 13, :warning, "parentheses are not required here") @test diagnostic("export (x)") == Diagnostic(8, 10, :warning, "parentheses are not required here") - @test diagnostic("export :x") == + @test diagnostic("export :x") == Diagnostic(8, 9, :error, "expected identifier") + @test diagnostic("public = 4", version=v"1.11") == + diagnostic("public[7] = 5", version=v"1.11") == + diagnostic("public() = 6", version=v"1.11") == + Diagnostic(1, 6, :warning, "using public as an identifier is deprecated") end @testset "diagnostics for literal parsing" begin diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 992284c9c46b0..efb183d33c011 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -17,13 +17,22 @@ function test_parse(production, input, output) else opts = NamedTuple() end - @test parse_to_sexpr_str(production, input; opts...) == output + parsed = parse_to_sexpr_str(production, input; opts...) + if output isa Regex # Could be AbstractPattern, but that type was added in Julia 1.6. + @test match(output, parsed) !== nothing + else + @test parsed == output + end end function test_parse(inout::Pair) test_parse(JuliaSyntax.parse_toplevel, inout...) end +const PARSE_ERROR = r"\(error-t " + +with_version(v::VersionNumber, (i,o)::Pair) = ((;v=v), i) => o + # TODO: # * Extract the following test cases from the source itself. # * Use only the green tree to generate the S-expressions @@ -435,7 +444,7 @@ tests = [ "x\"s\"in" => """(macrocall @x_str (string-r "s") "in")""" "x\"s\"2" => """(macrocall @x_str (string-r "s") 2)""" "x\"s\"10.0" => """(macrocall @x_str (string-r "s") 10.0)""" - # + # ], JuliaSyntax.parse_resword => [ # In normal_context @@ -934,6 +943,26 @@ tests = [ "10.0e1000'" => "(ErrorNumericOverflow)" "10.0f100'" => "(ErrorNumericOverflow)" ], + JuliaSyntax.parse_stmts => with_version.(v"1.11", [ + "function f(public)\n public + 3\nend" => "(function (call f public) (block (call-i public + 3)))" + "public A, B" => "(public A B)" + "if true \n public *= 4 \n end" => "(if true (block (*= public 4)))" + "module Mod\n public A, B \n end" => "(module Mod (block (public A B)))" + "module Mod2\n a = 3; b = 6; public a, b\n end" => "(module Mod2 (block (= a 3) (= b 6) (public a b)))" + "a = 3; b = 6; public a, b" => "(toplevel-; (= a 3) (= b 6) (public a b))" + "begin \n public A, B \n end" => PARSE_ERROR + "if true \n public A, B \n end" => PARSE_ERROR + "public export=true foo, bar" => PARSE_ERROR # but these may be + "public experimental=true foo, bar" => PARSE_ERROR # supported soon ;) + "public(x::String) = false" => "(= (call public (::-i x String)) false)" + "module M; export @a; end" => "(module M (block (export @a)))" + "module M; public @a; end" => "(module M (block (public @a)))" + "module M; export ⤈; end" => "(module M (block (export ⤈)))" + "module M; public ⤈; end" => "(module M (block (public ⤈)))" + "public = 4" => "(= public 4)" + "public[7] = 5" => "(= (ref public 7) 5)" + "public() = 6" => "(= (call public) 6)" + ]), JuliaSyntax.parse_docstring => [ """ "notdoc" ] """ => "(string \"notdoc\")" """ "notdoc" \n] """ => "(string \"notdoc\")" diff --git a/JuliaSyntax/test/tokenize.jl b/JuliaSyntax/test/tokenize.jl index 61bbbb95be178..07972c9850a23 100644 --- a/JuliaSyntax/test/tokenize.jl +++ b/JuliaSyntax/test/tokenize.jl @@ -932,6 +932,7 @@ const all_kws = Set([ "local", "macro", "module", + "public", "quote", "return", "struct", From 4615a2a244509789e4fb031d0ba261f608de8660 Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Thu, 3 Aug 2023 00:28:58 -0400 Subject: [PATCH 0705/1109] Make `SourceFile.code` infer concretely (JuliaLang/JuliaSyntax.jl#337) --- JuliaSyntax/src/source_files.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaSyntax/src/source_files.jl b/JuliaSyntax/src/source_files.jl index 72811077b7c5a..06cae0085c248 100644 --- a/JuliaSyntax/src/source_files.jl +++ b/JuliaSyntax/src/source_files.jl @@ -13,7 +13,7 @@ information for a byte offset can be looked up via the `source_line`, struct SourceFile # TODO: Rename SourceFile -> SourceText / SourceChunk / SourceIndex / SourceLineIndex ? # See https://github.com/JuliaLang/JuliaSyntax.jl/issues/190 - code::SubString + code::SubString{String} # Offset of `code` within a larger chunk of source text byte_offset::Int filename::Union{Nothing,String} From 974a1d60a5e08acc00de9b072483cbadf270ebd8 Mon Sep 17 00:00:00 2001 From: Xianda Sun <5433119+sunxd3@users.noreply.github.com> Date: Sat, 12 Aug 2023 07:22:54 +0100 Subject: [PATCH 0706/1109] Remove `num_tokens` argument from `bump_glue` (JuliaLang/JuliaSyntax.jl#338) --- JuliaSyntax/src/parse_stream.jl | 4 ++-- JuliaSyntax/src/parser.jl | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index fcd35e3189360..78571d56c1235 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -731,12 +731,12 @@ This is for use in special circumstances where the parser needs to resolve lexing ambiguities. There's no special whitespace handling — bump any whitespace if necessary with bump_trivia. """ -function bump_glue(stream::ParseStream, kind, flags, num_tokens) +function bump_glue(stream::ParseStream, kind, flags) i = stream.lookahead_index h = SyntaxHead(kind, flags) push!(stream.tokens, SyntaxToken(h, kind, false, stream.lookahead[i+1].next_byte)) - stream.lookahead_index += num_tokens + stream.lookahead_index += 2 stream.peek_count = 0 return position(stream) end diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 96009f25618ac..d39429cb8ed2c 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -1204,7 +1204,7 @@ function parse_unary(ps::ParseState) # -1.0f0 ==> -1.0f0 # -2*x ==> (call-i -2 * x) # +0xff ==> 0xff - bump_glue(ps, kind(t2), EMPTY_FLAGS, 2) + bump_glue(ps, kind(t2), EMPTY_FLAGS) end return end From 49bad4d88cdf79f2c7209daacd66aa216de0e7b9 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sun, 13 Aug 2023 16:43:58 +1000 Subject: [PATCH 0707/1109] Only show the first parse error (JuliaLang/JuliaSyntax.jl#344) Parser recovery commonly results in several errors which refer to much the same location in the broken source file and are not useful to the user. Currently the most useful error is the first one, so this PR trims down error printing to only show that one. --- JuliaSyntax/src/parser_api.jl | 8 +++++- JuliaSyntax/test/parser_api.jl | 46 +++++++++++++++++++++++++++------- 2 files changed, 44 insertions(+), 10 deletions(-) diff --git a/JuliaSyntax/src/parser_api.jl b/JuliaSyntax/src/parser_api.jl index 6eca864d0fc36..51548a995bc11 100644 --- a/JuliaSyntax/src/parser_api.jl +++ b/JuliaSyntax/src/parser_api.jl @@ -17,7 +17,13 @@ end function Base.showerror(io::IO, err::ParseError) println(io, "ParseError:") - show_diagnostics(io, err.diagnostics, err.source) + # Only show the first parse error for now - later errors are often + # misleading due to the way recovery works + i = findfirst(is_error, err.diagnostics) + if isnothing(i) + i = lastindex(err.diagnostics) + end + show_diagnostics(io, err.diagnostics[1:i], err.source) end """ diff --git a/JuliaSyntax/test/parser_api.jl b/JuliaSyntax/test/parser_api.jl index 4ceb8b584592c..9e05dee151e5a 100644 --- a/JuliaSyntax/test/parser_api.jl +++ b/JuliaSyntax/test/parser_api.jl @@ -117,7 +117,7 @@ end @testset "ParseError printing" begin try - JuliaSyntax.parsestmt(JuliaSyntax.SyntaxNode, "a -- b -- c", filename="somefile.jl") + parsestmt(SyntaxNode, "a -- b -- c", filename="somefile.jl") @assert false "error should be thrown" catch exc @test exc isa JuliaSyntax.ParseError @@ -125,20 +125,48 @@ end ParseError: # Error @ somefile.jl:1:3 a -- b -- c - # └┘ ── invalid operator - # Error @ somefile.jl:1:8 - a -- b -- c - # └┘ ── invalid operator""" + # └┘ ── invalid operator""" @test occursin("Stacktrace:\n", sprint(showerror, exc, catch_backtrace())) file_url = JuliaSyntax._file_url("somefile.jl") @test sprint(showerror, exc, context=:color=>true) == """ ParseError: \e[90m# Error @ \e[0;0m\e]8;;$file_url#1:3\e\\\e[90msomefile.jl:1:3\e[0;0m\e]8;;\e\\ a \e[48;2;120;70;70m--\e[0;0m b -- c - \e[90m# └┘ ── \e[0;0m\e[91minvalid operator\e[0;0m - \e[90m# Error @ \e[0;0m\e]8;;$file_url#1:8\e\\\e[90msomefile.jl:1:8\e[0;0m\e]8;;\e\\ - a -- b \e[48;2;120;70;70m--\e[0;0m c - \e[90m# └┘ ── \e[0;0m\e[91minvalid operator\e[0;0m""" + \e[90m# └┘ ── \e[0;0m\e[91minvalid operator\e[0;0m""" + end + + try + # Test that warnings are printed first followed by only the first error + parsestmt(SyntaxNode, """ + @(a) + x -- y + z -- y""", filename="somefile.jl") + @assert false "error should be thrown" + catch exc + @test exc isa JuliaSyntax.ParseError + @test sprint(showerror, exc) == """ + ParseError: + # Warning @ somefile.jl:1:2 + @(a) + #└─┘ ── parenthesizing macro names is unnecessary + # Error @ somefile.jl:2:1 + @(a) + x + ╙ ── unexpected text after parsing statement""" + end + + try + # Test that initial warnings are always printed + parsestmt(SyntaxNode, """ + @(a)""", filename="somefile.jl") + @assert false "error should be thrown" + catch exc + @test exc isa JuliaSyntax.ParseError + @test sprint(showerror, exc) == """ + ParseError: + # Warning @ somefile.jl:1:2 + @(a) + #└─┘ ── parenthesizing macro names is unnecessary""" end end From 7363c2114a67458007aac3a0384159387c8d3f79 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sun, 13 Aug 2023 16:55:12 +1000 Subject: [PATCH 0708/1109] Bump version to 0.4.6 --- JuliaSyntax/Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaSyntax/Project.toml b/JuliaSyntax/Project.toml index 03e9b770a0cbd..2e50985469900 100644 --- a/JuliaSyntax/Project.toml +++ b/JuliaSyntax/Project.toml @@ -1,7 +1,7 @@ name = "JuliaSyntax" uuid = "70703baa-626e-46a2-a12c-08ffd08c73b4" authors = ["Claire Foster and contributors"] -version = "0.4.5" +version = "0.4.6" [compat] julia = "1.0" From c0bbb4458f35e37b993dfd894c3db7fb24aebdc3 Mon Sep 17 00:00:00 2001 From: Lilith Orion Hafner Date: Sun, 27 Aug 2023 20:01:59 -0500 Subject: [PATCH 0709/1109] Fix typo in docstring --- JuliaSyntax/src/hooks.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaSyntax/src/hooks.jl b/JuliaSyntax/src/hooks.jl index c50e89b9411da..a93b234632243 100644 --- a/JuliaSyntax/src/hooks.jl +++ b/JuliaSyntax/src/hooks.jl @@ -285,7 +285,7 @@ end Connect the JuliaSyntax parser to the Julia runtime so that it replaces the flisp parser for all parsing work. That is, JuliaSyntax will be used for -`include()` `Meta.parse()`, the REPL, etc. To reset to the reference parser, +`include()`, `Meta.parse()`, the REPL, etc. To reset to the reference parser, use `enable_in_core!(false)`. Keyword arguments: From 03324306d00f73ae11cb3fd5093600bef47005c8 Mon Sep 17 00:00:00 2001 From: Lilith Hafner Date: Fri, 15 Sep 2023 11:43:13 -0500 Subject: [PATCH 0710/1109] add a "not your falut" hint to the fallback to flisp error --- JuliaSyntax/src/hooks.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/JuliaSyntax/src/hooks.jl b/JuliaSyntax/src/hooks.jl index a93b234632243..4c828a3ee457d 100644 --- a/JuliaSyntax/src/hooks.jl +++ b/JuliaSyntax/src/hooks.jl @@ -255,7 +255,8 @@ function core_parser_hook(code, filename::String, lineno::Int, offset::Int, opti #-#-#- """) end - @error("JuliaSyntax parser failed — falling back to flisp!", + @error("JuliaSyntax parser failed — falling back to flisp!\n"* + "This is probably not your fault, please submit a bug report (https://github.com/JuliaLang/JuliaSyntax.jl/issues)", exception=(exc,catch_backtrace()), offset=offset, code=code) From fa2db082996deeca3d26605510a6eb0aa3fc440f Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sun, 15 Oct 2023 16:53:28 +1000 Subject: [PATCH 0711/1109] Fix SyntaxNode->Expr conversions of unterminated cmd strings (JuliaLang/JuliaSyntax.jl#367) --- JuliaSyntax/src/expr.jl | 3 +-- JuliaSyntax/src/parser.jl | 6 ++---- JuliaSyntax/test/expr.jl | 7 +++++++ 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/JuliaSyntax/src/expr.jl b/JuliaSyntax/src/expr.jl index d5cef886bf1f3..64692eb765dac 100644 --- a/JuliaSyntax/src/expr.jl +++ b/JuliaSyntax/src/expr.jl @@ -138,10 +138,9 @@ function _string_to_Expr(k, args) # If there's a single string remaining after joining, we unwrap # to give a string literal. # """\n a\n b""" ==> "a\nb" - # k == K"cmdstring" follows this branch return only(args2) else - @check k == K"string" + # This only happens when k == K"string" or when an error has occurred. return Expr(:string, args2...) end end diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index d39429cb8ed2c..266d58e5982ca 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -349,10 +349,8 @@ end # Parser # # The definitions and top-level comments here were copied to match the -# structure of Julia's official flisp-based parser. -# -# This is to make both codebases mutually understandable and make porting -# changes simple. +# structure of Julia's previous flisp-based parser to make both codebases +# mutually understandable and make porting changes simple. # # The `parse_*` functions are listed here roughly in order of increasing # precedence (lowest to highest binding power). A few helper functions are diff --git a/JuliaSyntax/test/expr.jl b/JuliaSyntax/test/expr.jl index 810390c8fb585..a349a3a1680a2 100644 --- a/JuliaSyntax/test/expr.jl +++ b/JuliaSyntax/test/expr.jl @@ -286,6 +286,13 @@ "a\nb") @test parsestmt("\"\"\"\n a\n \$x\n b\n c\"\"\"") == Expr(:string, "a\n", :x, "\nb\nc") + # Incomplete cases + @test parsestmt("`x", ignore_errors=true) == + Expr(:macrocall, GlobalRef(Core, Symbol("@cmd")), LineNumberNode(1), + Expr(:string, "x", Expr(:error))) + @test parsestmt("`", ignore_errors=true) == + Expr(:macrocall, GlobalRef(Core, Symbol("@cmd")), LineNumberNode(1), + Expr(:string, Expr(:error))) end @testset "Char conversions" begin From e7f9d6c0e86e8d62f626b96fc82d9222832e33bb Mon Sep 17 00:00:00 2001 From: Lilith Orion Hafner Date: Mon, 16 Oct 2023 07:49:23 -0500 Subject: [PATCH 0712/1109] Use triple quotes --- JuliaSyntax/src/hooks.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/JuliaSyntax/src/hooks.jl b/JuliaSyntax/src/hooks.jl index 4c828a3ee457d..aec9b5ff5a4a0 100644 --- a/JuliaSyntax/src/hooks.jl +++ b/JuliaSyntax/src/hooks.jl @@ -255,8 +255,8 @@ function core_parser_hook(code, filename::String, lineno::Int, offset::Int, opti #-#-#- """) end - @error("JuliaSyntax parser failed — falling back to flisp!\n"* - "This is probably not your fault, please submit a bug report (https://github.com/JuliaLang/JuliaSyntax.jl/issues)", + @error("""JuliaSyntax parser failed — falling back to flisp! + This is probably not your fault, please submit a bug report (https://github.com/JuliaLang/JuliaSyntax.jl/issues)""", exception=(exc,catch_backtrace()), offset=offset, code=code) From d5ea10ce0ec8608b23aa8d11cf4e99b7f47999f4 Mon Sep 17 00:00:00 2001 From: Lilith Orion Hafner Date: Mon, 16 Oct 2023 09:28:50 -0500 Subject: [PATCH 0713/1109] Update src/hooks.jl Co-authored-by: Sebastian Pfitzner --- JuliaSyntax/src/hooks.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaSyntax/src/hooks.jl b/JuliaSyntax/src/hooks.jl index aec9b5ff5a4a0..6bd86b3058b97 100644 --- a/JuliaSyntax/src/hooks.jl +++ b/JuliaSyntax/src/hooks.jl @@ -256,7 +256,7 @@ function core_parser_hook(code, filename::String, lineno::Int, offset::Int, opti """) end @error("""JuliaSyntax parser failed — falling back to flisp! - This is probably not your fault, please submit a bug report (https://github.com/JuliaLang/JuliaSyntax.jl/issues)""", + This is not your fault. Please submit a bug report to https://github.com/JuliaLang/JuliaSyntax.jl/issues""", exception=(exc,catch_backtrace()), offset=offset, code=code) From dfa6955d9da5b96a92368316ddd4b094201cd66f Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Thu, 19 Oct 2023 20:54:35 +1000 Subject: [PATCH 0714/1109] Better recovery when parsing empty characters (JuliaLang/JuliaSyntax.jl#369) Consume the trailing ' in empty characters '' for better error recovery when `ignore_errors=true` is set. --- JuliaSyntax/src/parser.jl | 1 + JuliaSyntax/test/parser.jl | 8 ++++++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 266d58e5982ca..2f23ed1c914c8 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -3402,6 +3402,7 @@ function parse_atom(ps::ParseState, check_identifiers=true) if k == K"'" # '' ==> (char (error)) bump_invisible(ps, K"error", error="empty character literal") + bump(ps, TRIVIA_FLAG) elseif k == K"EndMarker" # ' ==> (char (error)) bump_invisible(ps, K"error", error="unterminated character literal") diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index efb183d33c011..6db8c08b0fbe8 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -990,15 +990,19 @@ parsestmt_test_specs = [ "x in'c'" => "(call-i x in (char 'c'))" "1where'c'" => "(where 1 (char 'c'))" ":+'y'" => "(juxtapose (call-post (quote-: +) ') (call-post y '))" + # Empty character consumes trailing ' delimiter (ideally this could be + # tested above but we don't require the input stream to be consumed in the + # unit tests there. + "''" => "(char (error))" # The following may not be ideal error recovery! But at least the parser # shouldn't crash "@(x y)" => "(macrocall (parens @x (error-t y)))" "|(&\nfunction" => "(call | (& (function (error (error)) (block (error)) (error-t))) (error-t))" - # The following are currently broken but at least the parser shouldn't + # The following is currently broken but at least the parser shouldn't # crash. - "x in' '" => "(wrapper (call-i x in (char (error))) (error-t '))" + "x in' '" => "(call-i x in (char (error)))" ] @testset "Parser does not crash on broken code" begin From be909ed3a4433160cc7a1233d0799decbb2796dc Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sun, 22 Oct 2023 16:28:17 +1000 Subject: [PATCH 0715/1109] Always `emit()` nodes with zero children, don't `bump()` them (JuliaLang/JuliaSyntax.jl#371) Cleanup weird cases where the parser would `bump()` interior nodes into the output stream rather than `emit()`ing them. Emitting all interior nodes explicitly means we can remove some special cases which occurred during tree building. As part of this, fix some errors converting broken expressions like `x var"y"` from `SyntaxNode` to `Expr`. --- JuliaSyntax/src/expr.jl | 6 ++++-- JuliaSyntax/src/literal_parsing.jl | 21 +++++++++++++++------ JuliaSyntax/src/parse_stream.jl | 6 ++---- JuliaSyntax/src/parser.jl | 3 ++- JuliaSyntax/src/syntax_tree.jl | 8 +------- JuliaSyntax/test/expr.jl | 6 +++++- JuliaSyntax/test/parser.jl | 4 ++-- JuliaSyntax/test/test_utils.jl | 1 + 8 files changed, 32 insertions(+), 23 deletions(-) diff --git a/JuliaSyntax/src/expr.jl b/JuliaSyntax/src/expr.jl index 64692eb765dac..1dbfc4cd3ac67 100644 --- a/JuliaSyntax/src/expr.jl +++ b/JuliaSyntax/src/expr.jl @@ -196,8 +196,10 @@ end # Convert internal node of the JuliaSyntax parse tree to an Expr function _internal_node_to_Expr(source, srcrange, head, childranges, childheads, args) k = kind(head) - if k == K"var" || k == K"char" - @check length(args) == 1 + if (k == K"var" || k == K"char") && length(args) == 1 + # Ideally we'd like `@check length(args) == 1` as an invariant for all + # K"var" and K"char" nodes, but this discounts having embedded error + # nodes when ignore_errors=true is set. return args[1] elseif k == K"string" || k == K"cmdstring" return _string_to_Expr(k, args) diff --git a/JuliaSyntax/src/literal_parsing.jl b/JuliaSyntax/src/literal_parsing.jl index 5cd610ce1c87c..a027985ae62f7 100644 --- a/JuliaSyntax/src/literal_parsing.jl +++ b/JuliaSyntax/src/literal_parsing.jl @@ -1,3 +1,12 @@ +""" +Nontrivia tokens (leaf nodes / literals) which are malformed are parsed into +ErrorVal when `ignore_errors=true` during parsing. +""" +struct ErrorVal +end + +Base.show(io::IO, ::ErrorVal) = printstyled(io, "✘", color=:light_red) + #------------------------------------------------------------------------------- # This file contains utility functions for converting undecorated source # strings into Julia values. For example, string->number, string unescaping, etc. @@ -364,7 +373,6 @@ end #------------------------------------------------------------------------------- function parse_julia_literal(txtbuf::Vector{UInt8}, head::SyntaxHead, srcrange) - # Leaf node k = kind(head) # Any errors parsing literals are represented as ErrorVal() - this can # happen when the user sets `ignore_errors=true` during parsing. @@ -404,6 +412,7 @@ function parse_julia_literal(txtbuf::Vector{UInt8}, head::SyntaxHead, srcrange) return false end + # TODO: Avoid allocating temporary String here val_str = String(txtbuf[srcrange]) if k == K"Integer" parse_int_literal(val_str) @@ -417,9 +426,6 @@ function parse_julia_literal(txtbuf::Vector{UInt8}, head::SyntaxHead, srcrange) else Symbol(normalize_identifier(val_str)) end - elseif is_keyword(k) - # This should only happen for tokens nested inside errors - Symbol(val_str) elseif is_operator(k) isempty(srcrange) ? Symbol(untokenize(k)) : # synthetic invisible tokens @@ -436,9 +442,12 @@ function parse_julia_literal(txtbuf::Vector{UInt8}, head::SyntaxHead, srcrange) Symbol("core_@cmd") elseif is_syntax_kind(head) nothing + elseif is_keyword(k) + # This should only happen for tokens nested inside errors + Symbol(val_str) else - # FIXME: this allows us to recover from trivia is_error nodes - # that we insert below + # Other kinds should only happen for tokens nested inside errors + # TODO: Consolidate this with the is_keyword() above? Something else? ErrorVal() end end diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index 78571d56c1235..dcbb52aff665b 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -1021,7 +1021,7 @@ function build_tree(make_node::Function, ::Type{NodeType}, stream::ParseStream; while true last_token = j <= lastindex(ranges) ? ranges[j].last_token : lastindex(tokens) - # Process tokens to nodes for all tokens used by the next internal node + # Process tokens to leaf nodes for all tokens used by the next internal node while i <= last_token t = tokens[i] if kind(t) == K"TOMBSTONE" @@ -1031,9 +1031,7 @@ function build_tree(make_node::Function, ::Type{NodeType}, stream::ParseStream; srcrange = (stream.tokens[i-1].next_byte: stream.tokens[i].next_byte - 1) h = head(t) - children = (is_syntax_kind(h) || is_keyword(h)) ? - (stack[n].node for n=1:0) : nothing - node = make_node(h, srcrange, children) + node = make_node(h, srcrange, nothing) if !isnothing(node) push!(stack, (first_token=i, node=node)) end diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 2f23ed1c914c8..c246a3327069a 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -1955,7 +1955,8 @@ function parse_resword(ps::ParseState) elseif word in KSet"break continue" # break ==> (break) # continue ==> (continue) - bump(ps) + bump(ps, TRIVIA_FLAG) + emit(ps, mark, word) k = peek(ps) if !(k in KSet"NewlineWs ; ) : EndMarker" || (k == K"end" && !ps.end_symbol)) recover(is_closer_or_newline, ps, TRIVIA_FLAG, diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index a2df524d59ba0..02ef17f483aec 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -53,12 +53,6 @@ text by calling one of the parser API functions such as [`parseall`](@ref) """ const SyntaxNode = TreeNode{SyntaxData} -# Value of an error node with no children -struct ErrorVal -end - -Base.show(io::IO, ::ErrorVal) = printstyled(io, "✘", color=:light_red) - function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}; keep_parens=false, position::Integer=1) GC.@preserve source begin @@ -71,7 +65,7 @@ end function _to_SyntaxNode(source::SourceFile, txtbuf::Vector{UInt8}, offset::Int, raw::GreenNode{SyntaxHead}, position::Int, keep_parens::Bool) - if !haschildren(raw) && !(is_syntax_kind(raw) || is_keyword(raw)) + if !haschildren(raw) # Here we parse the values eagerly rather than representing them as # strings. Maybe this is good. Maybe not. valrange = position:position + span(raw) - 1 diff --git a/JuliaSyntax/test/expr.jl b/JuliaSyntax/test/expr.jl index a349a3a1680a2..1a943044815f8 100644 --- a/JuliaSyntax/test/expr.jl +++ b/JuliaSyntax/test/expr.jl @@ -743,7 +743,11 @@ @test parsestmt("(x", ignore_errors=true) == Expr(:block, :x, Expr(:error)) @test parsestmt("x do", ignore_errors=true) == - Expr(:block, :x, Expr(:error, Expr(:do_lambda))) + Expr(:block, :x, Expr(:error, :do)) + @test parsestmt("x var\"y\"", ignore_errors=true) == + Expr(:block, :x, Expr(:error, :var, ErrorVal(), "y", ErrorVal())) + @test parsestmt("var\"y", ignore_errors=true) == + Expr(:var, :y, Expr(:error)) end @testset "import" begin diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 6db8c08b0fbe8..8f85bbeb261e9 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -491,7 +491,7 @@ tests = [ ((v=v"1.8",), "struct A const a end") => "(struct A (block (const a)))" ((v=v"1.7",), "struct A const a end") => "(struct A (block (error (const a))))" "struct A end" => "(struct A (block))" - "struct try end" => "(struct (error (try)) (block))" + "struct try end" => "(struct (error try) (block))" # return "return\nx" => "(return)" "return)" => "(return)" @@ -503,7 +503,7 @@ tests = [ # module/baremodule "module A end" => "(module A (block))" "baremodule A end" => "(module-bare A (block))" - "module do \n end" => "(module (error (do)) (block))" + "module do \n end" => "(module (error do) (block))" "module \$A end" => "(module (\$ A) (block))" "module A \n a \n b \n end" => "(module A (block a b))" """module A \n "x"\na\n end""" => """(module A (block (doc (string "x") a)))""" diff --git a/JuliaSyntax/test/test_utils.jl b/JuliaSyntax/test/test_utils.jl index c8d87ac5e1e1e..e85621f1f3900 100644 --- a/JuliaSyntax/test/test_utils.jl +++ b/JuliaSyntax/test/test_utils.jl @@ -19,6 +19,7 @@ using .JuliaSyntax: # Nodes GreenNode, SyntaxNode, + ErrorVal, # Node inspection kind, flags, From 50a22cb0e4346c34c4632327d4bfc24d307e5288 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sun, 29 Oct 2023 16:38:53 +1000 Subject: [PATCH 0716/1109] Fix diagnostics printing when `pwd()` doesn't exist (JuliaLang/JuliaSyntax.jl#373) Computing the file URL for pretty printing using `abspath` can fail when the current working directory doesn't exist. This is a quick fix for this issue (which is incredibly confusing/disruptive if the user does manage to enter a nonexistant working directory!) A more complete fix would avoid ever looking at the working directory when printing diagnostics, instead requiring the caller to pass in a richer definition of the "location of source code" than the mere file name as a string. However getting something sensible working there is (a) breaking and (b) unclear on may details. So just patching this up quickly seems good for now. --- JuliaSyntax/src/diagnostics.jl | 32 ++++++++++++++++++++++++-------- JuliaSyntax/test/diagnostics.jl | 21 +++++++++++++++++++++ 2 files changed, 45 insertions(+), 8 deletions(-) diff --git a/JuliaSyntax/src/diagnostics.jl b/JuliaSyntax/src/diagnostics.jl index c84fa0ac91c49..9a5ea96149b23 100644 --- a/JuliaSyntax/src/diagnostics.jl +++ b/JuliaSyntax/src/diagnostics.jl @@ -44,13 +44,26 @@ Base.range(d::Diagnostic) = first_byte(d):last_byte(d) # Make relative path into a file URL function _file_url(filename) - @static if Sys.iswindows() - # TODO: Test this with windows terminal - path = replace(abspath(filename), '\\'=>'/') - else - path = abspath(filename) + try + @static if Sys.iswindows() + # TODO: Test this with windows terminal + path = replace(abspath(filename), '\\'=>'/') + else + path = abspath(filename) + end + return "file://$(path)" + catch exc + # abspath may fail if working directory doesn't exist + # TODO: It seems rather non-ideal to have the behavior here depend on + # the state of the local filesystem. And yet links in diagnostics seem + # useful. + # + # Ideally it'd be up to the caller to provide some notion of the + # "absolute location" of the source code resource when SourceFile is + # constructed. This is often not related to the local filesystem - it + # could be in memory, a fragment embedded in another file, etc etc. + return nothing end - "file://$(path)" end function show_diagnostic(io::IO, diagnostic::Diagnostic, source::SourceFile) @@ -64,8 +77,11 @@ function show_diagnostic(io::IO, diagnostic::Diagnostic, source::SourceFile) file_href = nothing if !isnothing(filename) locstr = "$filename:$linecol" - if !startswith(filename, "REPL[") - file_href = _file_url(filename)*"#$linecol" + if !startswith(filename, "REPL[") && get(io, :color, false) + url = _file_url(filename) + if !isnothing(url) + file_href = url*"#$linecol" + end end else locstr = "line $linecol" diff --git a/JuliaSyntax/test/diagnostics.jl b/JuliaSyntax/test/diagnostics.jl index ea2feb37a69f6..66fe4fda180d7 100644 --- a/JuliaSyntax/test/diagnostics.jl +++ b/JuliaSyntax/test/diagnostics.jl @@ -218,4 +218,25 @@ end # Error @ line 1:8 a -- b -- c # └┘ ── invalid operator""" + + stream = JuliaSyntax.ParseStream("a -- b") + JuliaSyntax.parse!(stream) + fname = "test.jl" + sf = SourceFile(stream, filename=fname) + url = JuliaSyntax._file_url(fname) + @test sprint(JuliaSyntax.show_diagnostics, stream.diagnostics, sf, + context=:color=>true) == """ + \e[90m# Error @ \e[0;0m\e]8;;$url#1:3\e\\\e[90mtest.jl:1:3\e[0;0m\e]8;;\e\\ + a \e[48;2;120;70;70m--\e[0;0m b + \e[90m# └┘ ── \e[0;0m\e[91minvalid operator\e[0;0m""" + + if Sys.isunix() + mktempdir() do tempdirname + cd(tempdirname) do + rm(tempdirname) + # Test _file_url doesn't fail with nonexistant directories + @test isnothing(JuliaSyntax._file_url(joinpath("__nonexistant__", "test.jl"))) + end + end + end end From e9d43e6ce01b0d054c5133092e352d3a0b9a2ef1 Mon Sep 17 00:00:00 2001 From: Lionel Zoubritzky Date: Tue, 31 Oct 2023 03:55:42 +0100 Subject: [PATCH 0717/1109] Fix Expr conversion of erroneous operator dot call (JuliaLang/JuliaSyntax.jl#374) --- JuliaSyntax/src/expr.jl | 7 +++++-- JuliaSyntax/test/expr.jl | 3 +++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/JuliaSyntax/src/expr.jl b/JuliaSyntax/src/expr.jl index 1dbfc4cd3ac67..21c98bb9925cd 100644 --- a/JuliaSyntax/src/expr.jl +++ b/JuliaSyntax/src/expr.jl @@ -266,13 +266,16 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads, # Move parameters blocks to args[2] _reorder_parameters!(args, 2) if headsym === :dotcall + funcname = args[1] if is_prefix_call(head) headsym = :. - args = Any[args[1], Expr(:tuple, args[2:end]...)] + args = Any[funcname, Expr(:tuple, args[2:end]...)] else # operator calls headsym = :call - args[1] = Symbol(".", args[1]) + if funcname isa Symbol + args[1] = Symbol(:., funcname) + end # else funcname could be an Expr(:error), just propagate it end end if do_lambda isa Expr diff --git a/JuliaSyntax/test/expr.jl b/JuliaSyntax/test/expr.jl index 1a943044815f8..71849da14289e 100644 --- a/JuliaSyntax/test/expr.jl +++ b/JuliaSyntax/test/expr.jl @@ -463,6 +463,9 @@ @test parsestmt("f(.+)") == Expr(:call, :f, Expr(:., :+)) @test parsestmt("(a, .+)") == Expr(:tuple, :a, Expr(:., :+)) @test parsestmt("A.:.+") == Expr(:., :A, QuoteNode(Symbol(".+"))) + + # Issue #341 + @test parsestmt("./x", ignore_errors=true) == Expr(:call, Expr(:error, Expr(:., :/)), :x) end @testset "let" begin From 3f5ac1820fdab4dc60b93d2396644bf64f530b19 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Wed, 1 Nov 2023 08:26:34 -0400 Subject: [PATCH 0718/1109] handle ZWJ and emoji sequences, don't break identifiers within graphemes (JuliaLang/JuliaSyntax.jl#372) * handle ZWJ and emoji sequences * forbid ZWNJ at end * fix tests on Julia < 1.5 * ascii fast path * fix for earlier Julia versions * Update test/tokenize.jl --- JuliaSyntax/src/tokenize.jl | 22 ++++++++++++++++++++-- JuliaSyntax/test/diagnostics.jl | 2 +- JuliaSyntax/test/tokenize.jl | 6 ++++-- 3 files changed, 25 insertions(+), 5 deletions(-) diff --git a/JuliaSyntax/src/tokenize.jl b/JuliaSyntax/src/tokenize.jl index 739a24c6ff1a9..9c19c04008d2d 100644 --- a/JuliaSyntax/src/tokenize.jl +++ b/JuliaSyntax/src/tokenize.jl @@ -1284,13 +1284,31 @@ function lex_backtick(l::Lexer) end const MAX_KW_LENGTH = 10 +const ascii_is_identifier_char = Bool[is_identifier_char(Char(b)) for b=0x00:0x7f] function lex_identifier(l::Lexer, c) h = simple_hash(c, UInt64(0)) n = 1 + ascii = isascii(c) + graphemestate = Ref(Int32(ascii)) # all ASCII id chars are UTF8PROC_BOUNDCLASS_OTHER + graphemestate_peek = Ref(zero(Int32)) while true pc, ppc = dpeekchar(l) - if (pc == '!' && ppc == '=') || !is_identifier_char(pc) - break + ascii = ascii && isascii(pc) + if ascii # fast path + pc_byte = pc % UInt8 + @inbounds if (pc_byte == UInt8('!') && ppc == '=') || !ascii_is_identifier_char[pc_byte+1] + break + end + elseif Unicode.isgraphemebreak!(graphemestate, c, pc) + if (pc == '!' && ppc == '=') || !is_identifier_char(pc) + break + end + elseif pc in ('\u200c','\u200d') # ZWNJ/ZWJ control characters + # ZWJ/ZWNJ only within grapheme sequences, not at end + graphemestate_peek[] = graphemestate[] + if Unicode.isgraphemebreak!(graphemestate_peek, pc, ppc) + break + end end c = readchar(l) h = simple_hash(c, h) diff --git a/JuliaSyntax/test/diagnostics.jl b/JuliaSyntax/test/diagnostics.jl index 66fe4fda180d7..ae7aae7d655ab 100644 --- a/JuliaSyntax/test/diagnostics.jl +++ b/JuliaSyntax/test/diagnostics.jl @@ -7,7 +7,7 @@ function diagnostic(str; only_first=false, allow_multiple=false, rule=:all, vers if !only_first @test length(stream.diagnostics) == 1 end - return stream.diagnostics[1] + return isempty(stream.diagnostics) ? nothing : stream.diagnostics[1] end end diff --git a/JuliaSyntax/test/tokenize.jl b/JuliaSyntax/test/tokenize.jl index 07972c9850a23..26ab044a617e4 100644 --- a/JuliaSyntax/test/tokenize.jl +++ b/JuliaSyntax/test/tokenize.jl @@ -44,12 +44,14 @@ end end # testset @testset "tokenize unicode" begin - str = "𝘋 =2β" + # FIXME: rm VERSION check once we implement our own is_identifier_char + emoji = VERSION < v"1.5" ? "😄" : "\U1F3F3\UFE0F\U200D\U1F308" # 🏳️‍🌈 requires newer Unicode + str = "𝘋 =2"*emoji for s in [str, IOBuffer(str)] l = tokenize(s) kinds = [K"Identifier", K"Whitespace", K"=", K"Integer", K"Identifier", K"EndMarker"] - token_strs = ["𝘋", " ", "=", "2", "β", ""] + token_strs = ["𝘋", " ", "=", "2", emoji, ""] for (i, n) in enumerate(l) @test kind(n) == kinds[i] @test untokenize(n, str) == token_strs[i] From 2caddbabb4369ba89392cab1cc8ee40e0df32fe7 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sat, 4 Nov 2023 05:55:23 +1000 Subject: [PATCH 0719/1109] Generalize fuzz testing tools (JuliaLang/JuliaSyntax.jl#379) This rearrangement allows us to fuzz test the hooks (which use the low level parser API) as well as the high level parser API. --- JuliaSyntax/Project.toml | 3 +- JuliaSyntax/test/diagnostics.jl | 11 ++-- JuliaSyntax/test/fuzz_test.jl | 96 ++++++++++++++++++++++----------- JuliaSyntax/test/test_utils.jl | 48 +++++++++++++++++ 4 files changed, 121 insertions(+), 37 deletions(-) diff --git a/JuliaSyntax/Project.toml b/JuliaSyntax/Project.toml index 2e50985469900..6ffbaa4007738 100644 --- a/JuliaSyntax/Project.toml +++ b/JuliaSyntax/Project.toml @@ -9,7 +9,8 @@ julia = "1.0" [deps] [extras] +Logging = "56ddb016-857b-54e1-b83d-db4d58db5568" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["Test"] +test = ["Test", "Logging"] diff --git a/JuliaSyntax/test/diagnostics.jl b/JuliaSyntax/test/diagnostics.jl index ae7aae7d655ab..647741d5a1e48 100644 --- a/JuliaSyntax/test/diagnostics.jl +++ b/JuliaSyntax/test/diagnostics.jl @@ -231,12 +231,11 @@ end \e[90m# └┘ ── \e[0;0m\e[91minvalid operator\e[0;0m""" if Sys.isunix() - mktempdir() do tempdirname - cd(tempdirname) do - rm(tempdirname) - # Test _file_url doesn't fail with nonexistant directories - @test isnothing(JuliaSyntax._file_url(joinpath("__nonexistant__", "test.jl"))) - end + tempdirname = mktempdir() + cd(tempdirname) do + rm(tempdirname) + # Test _file_url doesn't fail with nonexistant directories + @test isnothing(JuliaSyntax._file_url(joinpath("__nonexistant__", "test.jl"))) end end end diff --git a/JuliaSyntax/test/fuzz_test.jl b/JuliaSyntax/test/fuzz_test.jl index f792b2c68c230..b441938157469 100644 --- a/JuliaSyntax/test/fuzz_test.jl +++ b/JuliaSyntax/test/fuzz_test.jl @@ -1,5 +1,7 @@ using JuliaSyntax using JuliaSyntax: tokenize +import Logging +import Test # Parser fuzz testing tools. @@ -758,6 +760,7 @@ const cutdown_tokens = [ "\t" "\n" "x" + "β" "@" "," ";" @@ -884,33 +887,36 @@ const cutdown_tokens = [ ] #------------------------------------------------------------------------------- - -# The parser should never throw an exception. To test whether this is true, -# try passing randomly generated bad input data into it. -function _fuzz_test(bad_input_iter) - error_strings = [] - for str in bad_input_iter - try - JuliaSyntax.parseall(JuliaSyntax.SyntaxNode, str, ignore_errors=true); - catch exc - !(exc isa InterruptException) || rethrow() - rstr = reduce_text(str, parser_throws_exception) - @error "Parser threw exception" rstr exception=current_exceptions() - push!(error_strings, rstr) - end +# Parsing functions for use with fuzz_test + +function try_parseall_failure(str) + try + JuliaSyntax.parseall(JuliaSyntax.SyntaxNode, str, ignore_errors=true); + return nothing + catch exc + !(exc isa InterruptException) || rethrow() + rstr = reduce_text(str, parser_throws_exception) + @error "Parser threw exception" rstr exception=current_exceptions() + return rstr end - return error_strings end -""" -Fuzz test parser against all tuples of length `N` with elements taken from -`tokens`. -""" -function fuzz_tokens(tokens, N) - iter = (join(ts) for ts in Iterators.product([tokens for _ in 1:N]...)) - _fuzz_test(iter) +function try_hook_failure(str) + try + test_logger = Test.TestLogger() + Logging.with_logger(test_logger) do + Meta_parseall(str) + end + if !isempty(test_logger.logs) + return str + end + catch exc + return str + end + return nothing end +#------------------------------------------------------------------------------- """Delete `nlines` adjacent lines from code, at `niters` randomly chosen positions""" function delete_lines(lines, nlines, niters) selection = trues(length(lines)) @@ -953,29 +959,59 @@ function delete_tokens(code, tokens, ntokens, niters) end #------------------------------------------------------------------------------- -# Fuzzer functions +# Generators for "potentially bad input" + +""" +Fuzz test parser against all tuples of length `N` with elements taken from +`tokens`. +""" +function product_token_fuzz(tokens, N) + (join(ts) for ts in Iterators.product([tokens for _ in 1:N]...)) +end """ Fuzz test parser against randomly generated binary strings """ -function fuzz_binary(nbytes, N) - bad_strs = _fuzz_test(String(rand(UInt8, nbytes)) for _ in 1:N) - reduce_text.(bad_strs, parser_throws_exception) +function random_binary_fuzz(nbytes, N) + (String(rand(UInt8, nbytes)) for _ in 1:N) end """ Fuzz test by deleting random lines of some given source `code` """ -function fuzz_lines(code, N; nlines=10, niters=10) +function deleted_line_fuzz(code, N; nlines=10, niters=10) lines = split(code, '\n') - _fuzz_test(delete_lines(lines, nlines, niters) for _=1:N) + (delete_lines(lines, nlines, niters) for _=1:N) end """ Fuzz test by deleting random tokens from given source `code` """ -function fuzz_tokens(code, N; ntokens=10, niters=10) +function deleted_token_fuzz(code, N; ntokens=10, niters=10) ts = tokenize(code) - _fuzz_test(delete_tokens(code, ts, ntokens, niters) for _=1:N) + (delete_tokens(code, ts, ntokens, niters) for _=1:N) end +""" +Fuzz test a parsing function by trying it with many "bad" input strings. + +`try_parsefail` should return `nothing` when the parser succeeds, and return a +string (or reduced string) when parsing succeeds. +""" +function fuzz_test(try_parsefail::Function, bad_input_iter) + error_strings = [] + for str in bad_input_iter + res = try_parsefail(str) + if !isnothing(res) + push!(error_strings, res) + end + end + return error_strings +end + + +# Examples +# +# fuzz_test(try_hook_failure, product_token_fuzz(cutdown_tokens, 2)) +# fuzz_test(try_parseall_failure, product_token_fuzz(cutdown_tokens, 2)) + diff --git a/JuliaSyntax/test/test_utils.jl b/JuliaSyntax/test/test_utils.jl index e85621f1f3900..69915af2f5b1a 100644 --- a/JuliaSyntax/test/test_utils.jl +++ b/JuliaSyntax/test/test_utils.jl @@ -422,3 +422,51 @@ function parse_sexpr(code) end +#------------------------------------------------------------------------------- +# Tools copied from Base.Meta which call core_parser_hook as if called by +# Meta.parse(), but without installing the global hook. + +function _Meta_parse_string(text::AbstractString, filename::AbstractString, + lineno::Integer, index::Integer, options) + if index < 1 || index > ncodeunits(text) + 1 + throw(BoundsError(text, index)) + end + ex, offset::Int = JuliaSyntax.core_parser_hook(text, filename, lineno, index-1, options) + ex, offset+1 +end + +function Meta_parse(str::AbstractString, pos::Integer; + filename="none", greedy::Bool=true, raise::Bool=true, depwarn::Bool=true) + ex, pos = _Meta_parse_string(str, String(filename), 1, pos, greedy ? :statement : :atom) + if raise && Meta.isexpr(ex, :error) + err = ex.args[1] + if err isa String + err = Meta.ParseError(err) # For flisp parser + end + throw(err) + end + return ex, pos +end + +function Meta_parse(str::AbstractString; + filename="none", raise::Bool=true, depwarn::Bool=true) + ex, pos = Meta_parse(str, 1; filename=filename, greedy=true, raise=raise, depwarn=depwarn) + if Meta.isexpr(ex, :error) + return ex + end + if pos <= ncodeunits(str) + raise && throw(Meta.ParseError("extra token after end of expression")) + return Expr(:error, "extra token after end of expression") + end + return ex +end + +function Meta_parseatom(text::AbstractString, pos::Integer; filename="none", lineno=1) + return _Meta_parse_string(text, String(filename), lineno, pos, :atom) +end + +function Meta_parseall(text::AbstractString; filename="none", lineno=1) + ex,_ = _Meta_parse_string(text, String(filename), lineno, 1, :all) + return ex +end + From b3cf3e031666dd9567132f001dafc03978a4e4b7 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Mon, 6 Nov 2023 18:23:43 +1000 Subject: [PATCH 0720/1109] Fix crash with empty macro name when parsing `"@("` (JuliaLang/JuliaSyntax.jl#382) --- JuliaSyntax/src/parser.jl | 3 +++ JuliaSyntax/test/parser.jl | 2 ++ 2 files changed, 5 insertions(+) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index c246a3327069a..3993d76b8f004 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -2316,6 +2316,9 @@ function fix_macro_name_kind!(ps::ParseState, macro_name_position, name_kind=not elseif k == K"parens" # @(A) x ==> (macrocall (parens @A) x) macro_name_position = first_child_position(ps, macro_name_position) + if macro_name_position == NO_POSITION + return + end k = peek_behind(ps, macro_name_position).kind elseif k == K"error" # Error already reported in parse_macro_name diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 8f85bbeb261e9..d5b54f0828682 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -999,6 +999,8 @@ parsestmt_test_specs = [ # shouldn't crash "@(x y)" => "(macrocall (parens @x (error-t y)))" "|(&\nfunction" => "(call | (& (function (error (error)) (block (error)) (error-t))) (error-t))" + "@(" => "(macrocall (parens (error-t)))" + "x = @(" => "(= x (macrocall (parens (error-t))))" # The following is currently broken but at least the parser shouldn't # crash. From 80713b6c21c613aae3cfbbb0d33f85bd7ed7de8a Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Tue, 7 Nov 2023 15:56:37 +1000 Subject: [PATCH 0721/1109] Fix error in hooks when parsing incomplete `x.` (JuliaLang/JuliaSyntax.jl#385) Also add another fuzzing tool --- JuliaSyntax/src/hooks.jl | 2 ++ JuliaSyntax/test/fuzz_test.jl | 4 ++++ JuliaSyntax/test/hooks.jl | 4 ++++ 3 files changed, 10 insertions(+) diff --git a/JuliaSyntax/src/hooks.jl b/JuliaSyntax/src/hooks.jl index 6bd86b3058b97..c87d32b0a03c7 100644 --- a/JuliaSyntax/src/hooks.jl +++ b/JuliaSyntax/src/hooks.jl @@ -127,6 +127,8 @@ function _has_nested_error(ex) else return any(_has_nested_error(e) for e in ex.args) end + elseif ex isa QuoteNode + return _has_nested_error(ex.value) else return false end diff --git a/JuliaSyntax/test/fuzz_test.jl b/JuliaSyntax/test/fuzz_test.jl index b441938157469..b05c4247991ea 100644 --- a/JuliaSyntax/test/fuzz_test.jl +++ b/JuliaSyntax/test/fuzz_test.jl @@ -969,6 +969,10 @@ function product_token_fuzz(tokens, N) (join(ts) for ts in Iterators.product([tokens for _ in 1:N]...)) end +function random_token_fuzz(tokens, ntokens, ntries) + (join(rand(tokens, ntokens)) for _ in 1:ntries) +end + """ Fuzz test parser against randomly generated binary strings """ diff --git a/JuliaSyntax/test/hooks.jl b/JuliaSyntax/test/hooks.jl index 3ed34209151a6..99ec96d764b56 100644 --- a/JuliaSyntax/test/hooks.jl +++ b/JuliaSyntax/test/hooks.jl @@ -66,6 +66,10 @@ end LineNumberNode(4, "somefile"), ] @test Meta.isexpr(ex.args[6], :error) + + ex = JuliaSyntax.core_parser_hook("x.", "somefile", 0, 0, :all)[1] + @test ex.head == :toplevel + @test ex.args[2].head == :incomplete end @testset "enable_in_core!" begin From 7585c95207f6f5e35f7ecc9dded1ae8626ae5cfb Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Tue, 7 Nov 2023 21:29:16 +1000 Subject: [PATCH 0722/1109] Disallow newline between contextual keyword pairs in parentheses (JuliaLang/JuliaSyntax.jl#386) Ensure that we never treat things like `"mutable\nstruct"` as a mutable struct definition, even within parentheses where newline whitespace is insignificant. --- JuliaSyntax/src/parser.jl | 2 +- JuliaSyntax/test/parser.jl | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 3993d76b8f004..75400b8c6da57 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -253,7 +253,7 @@ function peek_initial_reserved_words(ps::ParseState) if is_initial_reserved_word(ps, k) return true elseif is_contextual_keyword(k) - k2 = peek(ps,2) + k2 = peek(ps, 2, skip_newlines=false) return (k == K"mutable" && k2 == K"struct") || (k == K"primitive" && k2 == K"type") || (k == K"abstract" && k2 == K"type") diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index d5b54f0828682..51d5ab77ca570 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -986,6 +986,9 @@ end parsestmt_test_specs = [ # whitespace before keywords in space-insensitive mode "(y::\nif x z end)" => "(parens (::-i y (if x (block z))))" + # Contextual keyword pairs inside parentheses + "(abstract type X end)" => "(parens (abstract X))" + "(mutable struct X end)" => "(parens (struct-mut X (block)))" # parsing of tricky primes "x in'c'" => "(call-i x in (char 'c'))" "1where'c'" => "(where 1 (char 'c'))" @@ -1001,6 +1004,9 @@ parsestmt_test_specs = [ "|(&\nfunction" => "(call | (& (function (error (error)) (block (error)) (error-t))) (error-t))" "@(" => "(macrocall (parens (error-t)))" "x = @(" => "(= x (macrocall (parens (error-t))))" + # Contextual keyword pairs must not be separated by newlines even within parens + "(abstract\ntype X end)" => "(wrapper (parens abstract (error-t type X)) (error-t end ✘))" + "(mutable\nstruct X end)" => "(wrapper (parens mutable (error-t struct X)) (error-t end ✘))" # The following is currently broken but at least the parser shouldn't # crash. From 281dc61db17943d751fddd403ef6b9e77f85bed5 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Fri, 10 Nov 2023 04:10:07 +1000 Subject: [PATCH 0723/1109] Fix highlighting of ranges which start with non-ascii chars (JuliaLang/JuliaSyntax.jl#387) This could cause the parser hook to crash on certain inputs. --- JuliaSyntax/src/source_files.jl | 9 +++++++-- JuliaSyntax/test/source_files.jl | 15 +++++++++++++++ 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/JuliaSyntax/src/source_files.jl b/JuliaSyntax/src/source_files.jl index 06cae0085c248..a8051a59ba0e6 100644 --- a/JuliaSyntax/src/source_files.jl +++ b/JuliaSyntax/src/source_files.jl @@ -124,6 +124,10 @@ function Base.thisind(source::SourceFile, i::Int) thisind(source.code, i - source.byte_offset) + source.byte_offset end +function Base.nextind(source::SourceFile, i::Integer) + nextind(source.code, i - source.byte_offset) + source.byte_offset +end + Base.firstindex(source::SourceFile) = firstindex(source.code) + source.byte_offset Base.lastindex(source::SourceFile) = lastindex(source.code) + source.byte_offset @@ -218,7 +222,8 @@ function highlight(io::IO, source::SourceFile, range::UnitRange; hitext = source[p:q] print(io, source[x:p-1]) _printstyled(io, hitext; bgcolor=color) - print(io, source[q+1:d]) + #print(io, source[q+1:d]) + print(io, source[nextind(source,q):d]) if d >= firstindex(source) && source[thisind(source, d)] != '\n' print(io, "\n") end @@ -249,7 +254,7 @@ function highlight(io::IO, source::SourceFile, range::UnitRange; print(io, "⋮\n") _printstyled(io, source[z:q]; bgcolor=color) end - print(io, source[q+1:d]) + print(io, source[nextind(source, q):d]) source[thisind(source, d)] == '\n' || print(io, "\n") qline = source[c:q] _print_marker_line(io, "", qline, true, false, marker_line_color, note, notecolor) diff --git a/JuliaSyntax/test/source_files.jl b/JuliaSyntax/test/source_files.jl index 9a1548f1009d5..0e36b7fe31103 100644 --- a/JuliaSyntax/test/source_files.jl +++ b/JuliaSyntax/test/source_files.jl @@ -103,6 +103,9 @@ end @test sprint(highlight, src, 3:4) == "abcd\n# └┘\nαβγδ\n+-*/" @test sprint(highlight, src, 4:4) == "abcd\n# ╙\nαβγδ\n+-*/" @test sprint(highlight, src, 5:5) == "abcd\n# └\nαβγδ\n+-*/" + @test sprint(highlight, src, 6:6) == "abcd\nαβγδ\n╙\n+-*/" + @test sprint(highlight, src, 6:9) == "abcd\nαβγδ\n└┘\n+-*/" + @test sprint(highlight, src, 8:8) == "abcd\nαβγδ\n#╙\n+-*/" # multi-byte chars @test sprint(highlight, src, 8:13) == """ @@ -149,6 +152,18 @@ end αβγδ #┘ +-*/""" + @test sprint(highlight, src, 6:15) == """ + abcd + ┌─── + αβγδ + +-*/ + ┘""" + @test sprint(highlight, src, 8:15) == """ + abcd + #┌── + αβγδ + +-*/ + ┘""" @test sprint(highlight, src, 1:18) == """ ┌─── abcd From 28f77a887deb5d2d265762e176043ee786ae7931 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Fri, 10 Nov 2023 19:03:37 +1000 Subject: [PATCH 0724/1109] Fix crash when parsing malformed `function(where` (JuliaLang/JuliaSyntax.jl#388) When word operators are parsed as atoms (ie, identifiers), the kind should be remapped as such. This fixes a crash when parsing `function(where` because `was_eventually_call` assumes that a kind of `K"where"` implies an internal node. --- JuliaSyntax/src/parser.jl | 3 +++ JuliaSyntax/test/fuzz_test.jl | 6 +++++- JuliaSyntax/test/parser.jl | 1 + 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 75400b8c6da57..f273702550bfc 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -3475,6 +3475,9 @@ function parse_atom(ps::ParseState, check_identifiers=true) # xx ==> xx # x₁ ==> x₁ bump(ps) + elseif is_word_operator(leading_kind) + # where=1 ==> (= where 1) + bump(ps, remap_kind=K"Identifier") elseif is_operator(leading_kind) # + ==> + # .+ ==> (. +) diff --git a/JuliaSyntax/test/fuzz_test.jl b/JuliaSyntax/test/fuzz_test.jl index b05c4247991ea..15bfa79de2dfb 100644 --- a/JuliaSyntax/test/fuzz_test.jl +++ b/JuliaSyntax/test/fuzz_test.jl @@ -905,7 +905,11 @@ function try_hook_failure(str) try test_logger = Test.TestLogger() Logging.with_logger(test_logger) do - Meta_parseall(str) + try + Meta_parseall(str) + catch exc + exc isa Meta.ParseError || exc isa JuliaSyntax.ParseError || rethrow() + end end if !isempty(test_logger.logs) return str diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 51d5ab77ca570..59182508d50ea 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -1004,6 +1004,7 @@ parsestmt_test_specs = [ "|(&\nfunction" => "(call | (& (function (error (error)) (block (error)) (error-t))) (error-t))" "@(" => "(macrocall (parens (error-t)))" "x = @(" => "(= x (macrocall (parens (error-t))))" + "function(where" => "(function (tuple-p where (error-t)) (block (error)) (error-t))" # Contextual keyword pairs must not be separated by newlines even within parens "(abstract\ntype X end)" => "(wrapper (parens abstract (error-t type X)) (error-t end ✘))" "(mutable\nstruct X end)" => "(wrapper (parens mutable (error-t struct X)) (error-t end ✘))" From 67c6aa102690bb1ee61bf6fb42f267d6587821e5 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Tue, 21 Nov 2023 20:32:18 +1000 Subject: [PATCH 0725/1109] Fix bug parsing unary subtypes with newlines (JuliaLang/JuliaSyntax.jl#393) In expressions like `"a +\n\n<:"` the presence of `peek(ps, skip_newlines=true)` was inconsistent with the use of `bump()` without `skip_newlines`. It seems that we didn't need skip_newlines at all in parse_unary_subtype, so do this to be consistent with parsing of other operators. --- JuliaSyntax/src/parser.jl | 8 ++++---- JuliaSyntax/test/parser.jl | 3 +++ 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index f273702550bfc..2c74821a27bfc 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -442,7 +442,7 @@ function parse_toplevel(ps::ParseState) # a \n \n ==> (toplevel a) # Empty files # ==> (toplevel) - bump_trivia(ps, skip_newlines=true) + bump_trivia(ps) break else parse_stmts(ps) @@ -1027,7 +1027,7 @@ end # # flisp: parse-unary-subtype function parse_unary_subtype(ps::ParseState) - t = peek_token(ps, skip_newlines=true) + t = peek_token(ps) if is_type_operator(t) k2 = peek(ps, 2) if is_closing_token(ps, k2) || k2 in KSet"NewlineWs =" @@ -1060,7 +1060,7 @@ function parse_where_chain(ps0::ParseState, mark) ps = ParseState(ps0, where_enabled=false) while peek(ps) == K"where" bump(ps, TRIVIA_FLAG) # where - bump_trivia(ps, skip_newlines=true) + bump_trivia(ps) k = peek(ps) if k == K"{" # x where \n {T} ==> (where x (braces T)) @@ -3457,7 +3457,7 @@ function parse_atom(ps::ParseState, check_identifiers=true) if preceding_whitespace(t) # : foo ==> (quote-: (error-t) foo) # :\nfoo ==> (quote-: (error-t) foo) - bump_trivia(ps, TRIVIA_FLAG, skip_newlines=true, + bump_trivia(ps, TRIVIA_FLAG, error="whitespace not allowed after `:` used for quoting") end # Being inside quote makes keywords into identifiers at the diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 59182508d50ea..63244274561eb 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -993,6 +993,9 @@ parsestmt_test_specs = [ "x in'c'" => "(call-i x in (char 'c'))" "1where'c'" => "(where 1 (char 'c'))" ":+'y'" => "(juxtapose (call-post (quote-: +) ') (call-post y '))" + # unary subtype ops and newlines + "a +\n\n<:" => "(call-i a + <:)" + "for\n\n<:" => "(for (= <: (error (error-t))) (block (error)) (error-t))" # Empty character consumes trailing ' delimiter (ideally this could be # tested above but we don't require the input stream to be consumed in the # unit tests there. From f46b60e5d16ee061b455f11e0ef6b810d305ca42 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Thu, 23 Nov 2023 21:03:41 +1000 Subject: [PATCH 0726/1109] Fix crashes due to lexing ambiguity of string delimiters (JuliaLang/JuliaSyntax.jl#394) There are some lexing ambituities in primes vs cmd delimiters. We break these with a simple rule in the lexer but there's edge cases of invalid or extremely strange syntax where this can be inconsistent with the parser. The following were some such cases which caused an assertion error in the parser. "var\"#\"``\$" "x in'``\$" This change avoids crashing in those cases, emitting an error instead. See also JuliaLang/JuliaSyntax.jl#25 --- JuliaSyntax/src/parser.jl | 25 ++++++++++++++++++++----- JuliaSyntax/test/parser.jl | 8 ++++++-- 2 files changed, 26 insertions(+), 7 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 2c74821a27bfc..0f0aab1aec359 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -301,6 +301,10 @@ function is_both_unary_and_binary(t) ) end +function is_string_macro_suffix(k) + k == K"Identifier" || is_keyword(k) || is_word_operator(k) || is_number(k) +end + # flisp: invalid-identifier? function is_valid_identifier(k) k = kind(k) @@ -1707,7 +1711,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) parse_string(ps, true) t = peek_token(ps) k = kind(t) - if !preceding_whitespace(t) && (k == K"Identifier" || is_keyword(k) || is_word_operator(k) || is_number(k)) + if !preceding_whitespace(t) && is_string_macro_suffix(k) # Macro sufficies can include keywords and numbers # x"s"y ==> (macrocall @x_str (string-r "s") "y") # x"s"end ==> (macrocall @x_str (string-r "s") "end") @@ -2344,7 +2348,7 @@ function parse_macro_name(ps::ParseState) # @! x ==> (macrocall @! x) # @.. x ==> (macrocall @.. x) # @$ x ==> (macrocall @$ x) - # @var"#" x ==> (macrocall (var #) @$ x) + # @var"#" x ==> (macrocall (var @#) x) bump_disallowed_space(ps) mark = position(ps) parse_atom(ps, false) @@ -3182,7 +3186,13 @@ function parse_string(ps::ParseState, raw::Bool) t = peek_full_token(ps) k = kind(t) if k == K"$" - @assert !raw # The lexer detects raw strings separately + if raw + # FIXME: This case is actually a tokenization error: + # The `K"$"` token should not occur when a raw string + # is being parsed, but this would require the lexer to know + # about the parse state. (see also parse_atom) + break + end if prev_chunk_newline # """\n$x\n a""" ==> (string-s x "\n" " a") indent_ref_i = first_byte(t) @@ -3526,11 +3536,16 @@ function parse_atom(ps::ParseState, check_identifiers=true) # var"x"+ ==> x # var"x") ==> x # var"x"( ==> x - else + elseif is_string_macro_suffix(k) # var"x"end ==> (var x (error-t)) # var"x"1 ==> (var x (error-t)) # var"x"y ==> (var x (error-t)) - bump(ps, TRIVIA_FLAG, error="suffix not allowed after var\"...\" syntax") + bump(ps, TRIVIA_FLAG, error="suffix not allowed after `var\"...\"` syntax") + elseif k == K"`" || k == K"\"" || k == K"\"\"\"" || k == K"```" + # Disallow `var"#""str". To allow this we'd need to fix `raw` + # detection in lex_quote to be consistent with the parser. + bump_invisible(ps, K"error", TRIVIA_FLAG, + error="`var\"...\"` syntax not supported as string macro name") end emit(ps, mark, K"var") elseif check_identifiers && is_closing_token(ps, leading_kind) diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 63244274561eb..3ac3ed36719a8 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -1012,9 +1012,13 @@ parsestmt_test_specs = [ "(abstract\ntype X end)" => "(wrapper (parens abstract (error-t type X)) (error-t end ✘))" "(mutable\nstruct X end)" => "(wrapper (parens mutable (error-t struct X)) (error-t end ✘))" - # The following is currently broken but at least the parser shouldn't - # crash. + # Lexer vs parser: issues detecting which tokens are string delimiters and + # detecting raw vs non-raw strings. The old parser was tightly coupled to + # the lexer and the parser state was used to disambiguate these cases. "x in' '" => "(call-i x in (char (error)))" + "x in'``\$" => "(call-i x in (call-i (juxtapose (char '`' (error-t)) (macrocall core_@cmd (cmdstring-r (error-t)))) \$ (error)))" + "var\"#\"`str`" => "(juxtapose (var # (error-t)) (macrocall core_@cmd (cmdstring-r \"str\")))" + "var\"#\"\"str\"" => "(juxtapose (var # (error-t)) (error-t) (string \"str\"))" ] @testset "Parser does not crash on broken code" begin From 0f2f26d1f057355888c46ea3afd9c8fd51ae462c Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Tue, 5 Dec 2023 21:01:22 +1000 Subject: [PATCH 0727/1109] Improve error message for missing closing tokens (JuliaLang/JuliaSyntax.jl#397) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a missing closing token like `)`, `]` or `}` is encountered we want the "Expected `)`" error to point to a location one past the last valid token, not to the trailing error tokens. For example from JuliaLang/JuliaSyntax.jl#349 here's a poor error message from the existing code: ERROR: ParseError: # Error @ REPL[53]:15:5 ylims!(p, (0, last(ylims(p))) xlabel!(p, "Contig length cutoff (kbp)") # └─────────────────────────────────────┘ ── Expected `)` After this change, the error location instead points to the end of the last valid line: ERROR: ParseError: # Error @ REPL[53]:15:5 ylims!(p, (0, last(ylims(p))) # └── Expected `)` --- JuliaSyntax/src/parser.jl | 16 +++++++++------- JuliaSyntax/test/diagnostics.jl | 9 ++++++--- JuliaSyntax/test/hooks.jl | 2 +- 3 files changed, 16 insertions(+), 11 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 0f0aab1aec359..34666b59cf58b 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -141,15 +141,20 @@ end # # Crude recovery heuristic: bump any tokens which aren't block or bracket # closing tokens. -function bump_closing_token(ps, closing_kind) +function bump_closing_token(ps, closing_kind, alternative_closer_hint=nothing) # todo: Refactor with recover() ? - bump_trivia(ps) if peek(ps) == closing_kind + bump_trivia(ps) bump(ps, TRIVIA_FLAG) return end + errmsg = "Expected `$(untokenize(closing_kind))`" + if !isnothing(alternative_closer_hint) + errmsg *= alternative_closer_hint + end # We didn't find the closing token. Read ahead in the stream mark = position(ps) + emit_diagnostic(ps, mark, mark, error=errmsg) while true k = peek(ps) if is_closing_token(ps, k) && !(k in KSet", ;") @@ -158,8 +163,7 @@ function bump_closing_token(ps, closing_kind) bump(ps) end # mark as trivia => ignore in AST. - emit(ps, mark, K"error", TRIVIA_FLAG, - error="Expected `$(untokenize(closing_kind))`") + emit(ps, mark, K"error", TRIVIA_FLAG) if peek(ps) == closing_kind bump(ps, TRIVIA_FLAG) end @@ -3101,7 +3105,6 @@ function parse_brackets(after_parse::Function, had_splat = false param_start = nothing while true - bump_trivia(ps) k = peek(ps) if k == closing_kind break @@ -3127,7 +3130,6 @@ function parse_brackets(after_parse::Function, end t = peek_token(ps, skip_newlines=true) k = kind(t) - bump_trivia(ps) if k == K"," had_commas = true bump(ps, TRIVIA_FLAG) @@ -3156,7 +3158,7 @@ function parse_brackets(after_parse::Function, end end release_positions(ps.stream, params_positions) - bump_closing_token(ps, closing_kind) + bump_closing_token(ps, closing_kind, " or `,`") return opts end diff --git a/JuliaSyntax/test/diagnostics.jl b/JuliaSyntax/test/diagnostics.jl index 647741d5a1e48..1d1f9e5dae008 100644 --- a/JuliaSyntax/test/diagnostics.jl +++ b/JuliaSyntax/test/diagnostics.jl @@ -64,14 +64,17 @@ end Diagnostic(16, 16, :error, "missing condition in `elseif`") @test diagnostic("f(x::V) where {V) = x", allow_multiple=true) == [ - Diagnostic(17, 16, :error, "Expected `}`") + Diagnostic(17, 16, :error, "Expected `}` or `,`") Diagnostic(17, 21, :error, "extra tokens after end of expression") ] @test diagnostic("[1)", allow_multiple=true) == [ - Diagnostic(3, 2, :error, "Expected `]`") + Diagnostic(3, 2, :error, "Expected `]` or `,`") Diagnostic(3, 3, :error, "extra tokens after end of expression") ] - + @test diagnostic("f(x, y #=hi=#\ng(z)") == Diagnostic(7, 6, :error, "Expected `)` or `,`") + @test diagnostic("(x, y \nz") == Diagnostic(6, 5, :error, "Expected `)` or `,`") + @test diagnostic("function f(x, y \nz end") == Diagnostic(16, 15, :error, "Expected `)` or `,`") + @test diagnostic("sin. (1)") == Diagnostic(5, 5, :error, "whitespace is not allowed here") @test diagnostic("x [i]") == diff --git a/JuliaSyntax/test/hooks.jl b/JuliaSyntax/test/hooks.jl index 99ec96d764b56..d5944a04ccff5 100644 --- a/JuliaSyntax/test/hooks.jl +++ b/JuliaSyntax/test/hooks.jl @@ -52,7 +52,7 @@ end @test err.source.first_line == 1 @test err.diagnostics[1].first_byte == 6 @test err.diagnostics[1].last_byte == 5 - @test err.diagnostics[1].message == "Expected `}`" + @test err.diagnostics[1].message == "Expected `}` or `,`" end @testset "toplevel errors" begin From fdfaf1d95ff8d87d05293d46a8dfaf348ca07aff Mon Sep 17 00:00:00 2001 From: Yuto Horikawa Date: Sat, 16 Mar 2024 11:45:54 +0900 Subject: [PATCH 0728/1109] Update docs (JuliaLang/JuliaSyntax.jl#400) * update docs CI to use Julia v1.10 * update packages in `docs/Manifest.toml` * enable `warnonly=true` and add `repolink` * update code block language * add compat for Documenter * fix doc/Manifest.toml --- JuliaSyntax/.github/workflows/CI.yml | 2 +- JuliaSyntax/docs/Manifest.toml | 161 ++++++++++++++++++++++++--- JuliaSyntax/docs/Project.toml | 3 + JuliaSyntax/docs/make.jl | 6 +- JuliaSyntax/docs/src/design.md | 6 +- JuliaSyntax/docs/src/reference.md | 8 +- JuliaSyntax/src/expr.jl | 2 +- 7 files changed, 164 insertions(+), 24 deletions(-) diff --git a/JuliaSyntax/.github/workflows/CI.yml b/JuliaSyntax/.github/workflows/CI.yml index 588a3264adc7a..985f9603f3387 100644 --- a/JuliaSyntax/.github/workflows/CI.yml +++ b/JuliaSyntax/.github/workflows/CI.yml @@ -115,7 +115,7 @@ jobs: - uses: actions/checkout@v2 - uses: julia-actions/setup-julia@latest with: - version: '1.9' + version: '1.10' - run: julia --project=docs -e ' using Pkg; Pkg.develop(PackageSpec(; path=pwd())); diff --git a/JuliaSyntax/docs/Manifest.toml b/JuliaSyntax/docs/Manifest.toml index a84b595dd0c76..ce4d6bed870e8 100644 --- a/JuliaSyntax/docs/Manifest.toml +++ b/JuliaSyntax/docs/Manifest.toml @@ -1,14 +1,26 @@ # This file is machine-generated - editing it directly is not advised -julia_version = "1.9.0" +julia_version = "1.10.0" manifest_format = "2.0" -project_hash = "e0c77beb18dc1f6cce661ebd60658c0c1a77390f" +project_hash = "46b5b82f24e4b5d97afc2843032730b022086b31" [[deps.ANSIColoredPrinters]] git-tree-sha1 = "574baf8110975760d391c710b6341da1afa48d8c" uuid = "a4c015fc-c6ff-483c-b24f-f7ea428134e9" version = "0.0.1" +[[deps.AbstractTrees]] +git-tree-sha1 = "faa260e4cb5aba097a73fab382dd4b5819d8ec8c" +uuid = "1520ce14-60c1-5f80-bbc7-55ef81b5835c" +version = "0.4.4" + +[[deps.ArgTools]] +uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f" +version = "1.1.1" + +[[deps.Artifacts]] +uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33" + [[deps.Base64]] uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" @@ -23,10 +35,36 @@ uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" version = "0.9.3" [[deps.Documenter]] -deps = ["ANSIColoredPrinters", "Base64", "Dates", "DocStringExtensions", "IOCapture", "InteractiveUtils", "JSON", "LibGit2", "Logging", "Markdown", "REPL", "Test", "Unicode"] -git-tree-sha1 = "58fea7c536acd71f3eef6be3b21c0df5f3df88fd" +deps = ["ANSIColoredPrinters", "AbstractTrees", "Base64", "Dates", "DocStringExtensions", "Downloads", "Git", "IOCapture", "InteractiveUtils", "JSON", "LibGit2", "Logging", "Markdown", "MarkdownAST", "Pkg", "PrecompileTools", "REPL", "RegistryInstances", "SHA", "Test", "Unicode"] +git-tree-sha1 = "2613dbec8f4748273bbe30ba71fd5cb369966bac" uuid = "e30172f5-a6a5-5a46-863b-614d45cd2de4" -version = "0.27.24" +version = "1.2.1" + +[[deps.Downloads]] +deps = ["ArgTools", "FileWatching", "LibCURL", "NetworkOptions"] +uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6" +version = "1.6.0" + +[[deps.Expat_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "4558ab818dcceaab612d1bb8c19cee87eda2b83c" +uuid = "2e619515-83b5-522b-bb60-26c02a35a201" +version = "2.5.0+0" + +[[deps.FileWatching]] +uuid = "7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee" + +[[deps.Git]] +deps = ["Git_jll"] +git-tree-sha1 = "51764e6c2e84c37055e846c516e9015b4a291c7d" +uuid = "d7ba0133-e1db-5d97-8f8c-041e4b3a1eb2" +version = "1.3.0" + +[[deps.Git_jll]] +deps = ["Artifacts", "Expat_jll", "JLLWrappers", "LibCURL_jll", "Libdl", "Libiconv_jll", "OpenSSL_jll", "PCRE2_jll", "Zlib_jll"] +git-tree-sha1 = "bb8f7cc77ec1152414b2af6db533d9471cfbb2d1" +uuid = "f8c6e375-362e-5223-8a59-34ff63f689eb" +version = "2.42.0+0" [[deps.IOCapture]] deps = ["Logging", "Random"] @@ -38,16 +76,56 @@ version = "0.2.3" deps = ["Markdown"] uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240" +[[deps.JLLWrappers]] +deps = ["Artifacts", "Preferences"] +git-tree-sha1 = "7e5d6779a1e09a36db2a7b6cff50942a0a7d0fca" +uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210" +version = "1.5.0" + [[deps.JSON]] deps = ["Dates", "Mmap", "Parsers", "Unicode"] git-tree-sha1 = "31e996f0a15c7b280ba9f76636b3ff9e2ae58c9a" uuid = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" version = "0.21.4" +[[deps.LazilyInitializedFields]] +git-tree-sha1 = "8f7f3cabab0fd1800699663533b6d5cb3fc0e612" +uuid = "0e77f7df-68c5-4e49-93ce-4cd80f5598bf" +version = "1.2.2" + +[[deps.LibCURL]] +deps = ["LibCURL_jll", "MozillaCACerts_jll"] +uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21" +version = "0.6.4" + +[[deps.LibCURL_jll]] +deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"] +uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0" +version = "8.4.0+0" + [[deps.LibGit2]] -deps = ["Base64", "NetworkOptions", "Printf", "SHA"] +deps = ["Base64", "LibGit2_jll", "NetworkOptions", "Printf", "SHA"] uuid = "76f85450-5226-5b5a-8eaa-529ad045b433" +[[deps.LibGit2_jll]] +deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll"] +uuid = "e37daf67-58a4-590a-8e99-b0245dd2ffc5" +version = "1.6.4+0" + +[[deps.LibSSH2_jll]] +deps = ["Artifacts", "Libdl", "MbedTLS_jll"] +uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8" +version = "1.11.0+1" + +[[deps.Libdl]] +uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb" + +[[deps.Libiconv_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "f9557a255370125b405568f9767d6d195822a175" +uuid = "94ce4f54-9a6c-5748-9c1c-f9c7231a4531" +version = "1.17.0+0" + [[deps.Logging]] uuid = "56ddb016-857b-54e1-b83d-db4d58db5568" @@ -55,30 +133,61 @@ uuid = "56ddb016-857b-54e1-b83d-db4d58db5568" deps = ["Base64"] uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" +[[deps.MarkdownAST]] +deps = ["AbstractTrees", "Markdown"] +git-tree-sha1 = "465a70f0fc7d443a00dcdc3267a497397b8a3899" +uuid = "d0879d2d-cac2-40c8-9cee-1863dc0c7391" +version = "0.1.2" + +[[deps.MbedTLS_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1" +version = "2.28.2+1" + [[deps.Mmap]] uuid = "a63ad114-7e13-5084-954f-fe012c677804" +[[deps.MozillaCACerts_jll]] +uuid = "14a3606d-f60d-562e-9121-12d972cd8159" +version = "2023.1.10" + [[deps.NetworkOptions]] uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908" version = "1.2.0" +[[deps.OpenSSL_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "cc6e1927ac521b659af340e0ca45828a3ffc748f" +uuid = "458c3c95-2e84-50aa-8efc-19380b2a3a95" +version = "3.0.12+0" + +[[deps.PCRE2_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "efcefdf7-47ab-520b-bdef-62a2eaa19f15" +version = "10.42.0+1" + [[deps.Parsers]] deps = ["Dates", "PrecompileTools", "UUIDs"] -git-tree-sha1 = "a5aef8d4a6e8d81f171b2bd4be5265b01384c74c" +git-tree-sha1 = "8489905bcdbcfac64d1daa51ca07c0d8f0283821" uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0" -version = "2.5.10" +version = "2.8.1" + +[[deps.Pkg]] +deps = ["Artifacts", "Dates", "Downloads", "FileWatching", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs", "p7zip_jll"] +uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" +version = "1.10.0" [[deps.PrecompileTools]] deps = ["Preferences"] -git-tree-sha1 = "259e206946c293698122f63e2b513a7c99a244e8" +git-tree-sha1 = "03b4c25b43cb84cee5c90aa9b5ea0a78fd848d2f" uuid = "aea7be01-6a6a-4083-8856-8a6e6704d82a" -version = "1.1.1" +version = "1.2.0" [[deps.Preferences]] deps = ["TOML"] -git-tree-sha1 = "7eb1686b4f04b82f96ed7a4ea5890a4f0c7a09f1" +git-tree-sha1 = "00805cd429dcb4870060ff49ef443486c262e38e" uuid = "21216c6a-2e73-6563-6e65-726566657250" -version = "1.4.0" +version = "1.4.1" [[deps.Printf]] deps = ["Unicode"] @@ -89,9 +198,15 @@ deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"] uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" [[deps.Random]] -deps = ["SHA", "Serialization"] +deps = ["SHA"] uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +[[deps.RegistryInstances]] +deps = ["LazilyInitializedFields", "Pkg", "TOML", "Tar"] +git-tree-sha1 = "ffd19052caf598b8653b99404058fce14828be51" +uuid = "2792f1a3-b283-48e8-9a74-f99dce5104f3" +version = "0.1.0" + [[deps.SHA]] uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce" version = "0.7.0" @@ -107,6 +222,11 @@ deps = ["Dates"] uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76" version = "1.0.3" +[[deps.Tar]] +deps = ["ArgTools", "SHA"] +uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e" +version = "1.10.0" + [[deps.Test]] deps = ["InteractiveUtils", "Logging", "Random", "Serialization"] uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40" @@ -117,3 +237,18 @@ uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" [[deps.Unicode]] uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" + +[[deps.Zlib_jll]] +deps = ["Libdl"] +uuid = "83775a58-1f1d-513f-b197-d71354ab007a" +version = "1.2.13+1" + +[[deps.nghttp2_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d" +version = "1.52.0+1" + +[[deps.p7zip_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0" +version = "17.4.0+2" diff --git a/JuliaSyntax/docs/Project.toml b/JuliaSyntax/docs/Project.toml index dfa65cd107d06..1814eb3304f3c 100644 --- a/JuliaSyntax/docs/Project.toml +++ b/JuliaSyntax/docs/Project.toml @@ -1,2 +1,5 @@ [deps] Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" + +[compat] +Documenter = "1" diff --git a/JuliaSyntax/docs/make.jl b/JuliaSyntax/docs/make.jl index c6dca55563025..5c3a094ba8691 100644 --- a/JuliaSyntax/docs/make.jl +++ b/JuliaSyntax/docs/make.jl @@ -2,7 +2,9 @@ using Documenter, JuliaSyntax makedocs(; modules=[JuliaSyntax], - format=Documenter.HTML(), + format=Documenter.HTML( + repolink="https://github.com/JuliaLang/JuliaSyntax.jl" + ), pages=[ "Overview" => "index.md" "How To" => "howto.md" @@ -15,7 +17,7 @@ makedocs(; repo="https://github.com/JuliaLang/JuliaSyntax.jl/blob/{commit}{path}#L{line}", sitename="JuliaSyntax.jl", authors = "Claire Foster and contributors: https://github.com/JuliaLang/JuliaSyntax.jl/graphs/contributors", - strict = Documenter.except(:missing_docs) + warnonly = true ) deploydocs(; diff --git a/JuliaSyntax/docs/src/design.md b/JuliaSyntax/docs/src/design.md index 1644883cc91c8..0f7e2a4f178b4 100644 --- a/JuliaSyntax/docs/src/design.md +++ b/JuliaSyntax/docs/src/design.md @@ -214,7 +214,7 @@ Here's some behaviors which seem to be bugs. (Some of these we replicate in the name of compatibility, perhaps with a warning.) * Macro module paths allow calls which gives weird stateful semantics! - ``` + ```julia b() = rand() > 0.5 ? Base : Core b().@info "hi" ``` @@ -225,7 +225,7 @@ name of compatibility, perhaps with a warning.) keyword parameters are separated by commas. A tuple is produced instead. * `const` and `global` allow chained assignment, but the right hand side is not constant. `a` const here but not `b`. - ``` + ```julia const a = b = 1 ``` * Parsing the `ncat` array concatenation syntax within braces gives @@ -397,7 +397,7 @@ parsing `key=val` pairs inside parentheses. It's this last case which seems problematic (why not *require* the second form as a more explicit way to indicate flattening?). It's not even pretty printed correctly: - ``` + ```julia-repl julia> :([(x,y) for x in 1:10, y in 1:10 if y < x]) :([(x, y) for $(Expr(:filter, :(y < x), :(x = 1:10), :(y = 1:10)))]) ``` diff --git a/JuliaSyntax/docs/src/reference.md b/JuliaSyntax/docs/src/reference.md index bb9d3959aa4db..2ae2cef10b1f7 100644 --- a/JuliaSyntax/docs/src/reference.md +++ b/JuliaSyntax/docs/src/reference.md @@ -252,7 +252,7 @@ stored in the head flags for `SyntaxNode` trees, and in the first `arg` for Vertical concatenation along dimension 1 can be done with semicolons or newlines -```julia +```julia-repl julia> print_tree(:([a b])) Expr(:vcat) @@ -269,7 +269,7 @@ Expr(:vcat) For horizontal concatenation along dimension 2, use spaces or double semicolons -```julia +```julia-repl julia> print_tree(:([a b])) Expr(:hcat) ├─ :a @@ -287,7 +287,7 @@ Expr(:ncat) Concatenation along dimensions 1 and 2 can be done with spaces and single semicolons or newlines, producing a mixture of `vcat` and `row` expressions: -```julia +```julia-repl julia> print_tree(:([a b c d])) # OR @@ -304,7 +304,7 @@ Expr(:vcat) General n-dimensional concatenation results in nested `ncat` and `nrow`, for example -```julia +```julia-repl julia> print_tree(:([a ; b ;; c ; d ;;; x])) Expr(:ncat) ├─ 3 diff --git a/JuliaSyntax/src/expr.jl b/JuliaSyntax/src/expr.jl index 21c98bb9925cd..4ca0be0222ca9 100644 --- a/JuliaSyntax/src/expr.jl +++ b/JuliaSyntax/src/expr.jl @@ -8,7 +8,7 @@ Type inference friendly replacement for `Meta.isexpr`. When using the pattern -``` +```julia if @isexpr(ex, headsym) body end From c6e172d8b9a01364532a31df3d69d72e08c53c3a Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Mon, 25 Mar 2024 13:30:38 +1000 Subject: [PATCH 0729/1109] Initial commit From 76b045328a6ff5d804f28aab9c0c5d39cd4cff3d Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Mon, 25 Mar 2024 13:30:47 +1000 Subject: [PATCH 0730/1109] Files generated by PkgTemplates PkgTemplates version: 0.7.46 --- JuliaLowering/.github/workflows/CI.yml | 37 +++++++++++++++++++ .../.github/workflows/CompatHelper.yml | 16 ++++++++ JuliaLowering/.github/workflows/TagBot.yml | 31 ++++++++++++++++ JuliaLowering/.gitignore | 1 + JuliaLowering/LICENSE | 21 +++++++++++ JuliaLowering/Project.toml | 13 +++++++ JuliaLowering/README.md | 3 ++ JuliaLowering/src/JuliaLowering.jl | 5 +++ JuliaLowering/test/runtests.jl | 6 +++ 9 files changed, 133 insertions(+) create mode 100644 JuliaLowering/.github/workflows/CI.yml create mode 100644 JuliaLowering/.github/workflows/CompatHelper.yml create mode 100644 JuliaLowering/.github/workflows/TagBot.yml create mode 100644 JuliaLowering/.gitignore create mode 100644 JuliaLowering/LICENSE create mode 100644 JuliaLowering/Project.toml create mode 100644 JuliaLowering/README.md create mode 100644 JuliaLowering/src/JuliaLowering.jl create mode 100644 JuliaLowering/test/runtests.jl diff --git a/JuliaLowering/.github/workflows/CI.yml b/JuliaLowering/.github/workflows/CI.yml new file mode 100644 index 0000000000000..e081dae8470c7 --- /dev/null +++ b/JuliaLowering/.github/workflows/CI.yml @@ -0,0 +1,37 @@ +name: CI +on: + push: + branches: + - main + tags: ['*'] + pull_request: + workflow_dispatch: +concurrency: + # Skip intermediate builds: always. + # Cancel intermediate builds: only if it is a pull request build. + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }} +jobs: + test: + name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + version: + - '1.0' + - '1.11' + - 'nightly' + os: + - ubuntu-latest + arch: + - x64 + steps: + - uses: actions/checkout@v4 + - uses: julia-actions/setup-julia@v1 + with: + version: ${{ matrix.version }} + arch: ${{ matrix.arch }} + - uses: julia-actions/cache@v1 + - uses: julia-actions/julia-buildpkg@v1 + - uses: julia-actions/julia-runtest@v1 diff --git a/JuliaLowering/.github/workflows/CompatHelper.yml b/JuliaLowering/.github/workflows/CompatHelper.yml new file mode 100644 index 0000000000000..cba9134c670f0 --- /dev/null +++ b/JuliaLowering/.github/workflows/CompatHelper.yml @@ -0,0 +1,16 @@ +name: CompatHelper +on: + schedule: + - cron: 0 0 * * * + workflow_dispatch: +jobs: + CompatHelper: + runs-on: ubuntu-latest + steps: + - name: Pkg.add("CompatHelper") + run: julia -e 'using Pkg; Pkg.add("CompatHelper")' + - name: CompatHelper.main() + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + COMPATHELPER_PRIV: ${{ secrets.DOCUMENTER_KEY }} + run: julia -e 'using CompatHelper; CompatHelper.main()' diff --git a/JuliaLowering/.github/workflows/TagBot.yml b/JuliaLowering/.github/workflows/TagBot.yml new file mode 100644 index 0000000000000..2bacdb87e004b --- /dev/null +++ b/JuliaLowering/.github/workflows/TagBot.yml @@ -0,0 +1,31 @@ +name: TagBot +on: + issue_comment: + types: + - created + workflow_dispatch: + inputs: + lookback: + default: 3 +permissions: + actions: read + checks: read + contents: write + deployments: read + issues: read + discussions: read + packages: read + pages: read + pull-requests: read + repository-projects: read + security-events: read + statuses: read +jobs: + TagBot: + if: github.event_name == 'workflow_dispatch' || github.actor == 'JuliaTagBot' + runs-on: ubuntu-latest + steps: + - uses: JuliaRegistries/TagBot@v1 + with: + token: ${{ secrets.GITHUB_TOKEN }} + ssh: ${{ secrets.DOCUMENTER_KEY }} diff --git a/JuliaLowering/.gitignore b/JuliaLowering/.gitignore new file mode 100644 index 0000000000000..b067eddee4ee0 --- /dev/null +++ b/JuliaLowering/.gitignore @@ -0,0 +1 @@ +/Manifest.toml diff --git a/JuliaLowering/LICENSE b/JuliaLowering/LICENSE new file mode 100644 index 0000000000000..482e394ee2bba --- /dev/null +++ b/JuliaLowering/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2024 Claire Foster and contributors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/JuliaLowering/Project.toml b/JuliaLowering/Project.toml new file mode 100644 index 0000000000000..88483488a13b2 --- /dev/null +++ b/JuliaLowering/Project.toml @@ -0,0 +1,13 @@ +name = "JuliaLowering" +uuid = "f3c80556-a63f-4383-b822-37d64f81a311" +authors = ["Claire Foster and contributors"] +version = "1.0.0-DEV" + +[compat] +julia = "1" + +[extras] +Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" + +[targets] +test = ["Test"] diff --git a/JuliaLowering/README.md b/JuliaLowering/README.md new file mode 100644 index 0000000000000..7d8408a3e615a --- /dev/null +++ b/JuliaLowering/README.md @@ -0,0 +1,3 @@ +# JuliaLowering + +[![Build Status](https://github.com/c42f/JuliaLowering.jl/actions/workflows/CI.yml/badge.svg?branch=main)](https://github.com/c42f/JuliaLowering.jl/actions/workflows/CI.yml?query=branch%3Amain) diff --git a/JuliaLowering/src/JuliaLowering.jl b/JuliaLowering/src/JuliaLowering.jl new file mode 100644 index 0000000000000..65ab2a054ff55 --- /dev/null +++ b/JuliaLowering/src/JuliaLowering.jl @@ -0,0 +1,5 @@ +module JuliaLowering + +# Write your package code here. + +end diff --git a/JuliaLowering/test/runtests.jl b/JuliaLowering/test/runtests.jl new file mode 100644 index 0000000000000..b9824e61b277c --- /dev/null +++ b/JuliaLowering/test/runtests.jl @@ -0,0 +1,6 @@ +using JuliaLowering +using Test + +@testset "JuliaLowering.jl" begin + # Write your tests here. +end From 49667ca8d5186e44062ca59d1c85ce5756adb9f8 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Tue, 26 Mar 2024 09:46:45 +1000 Subject: [PATCH 0731/1109] Initial code, copied from prototype in JuliaSyntax repo branch Requires a custom branch of JuliaSyntax to run... --- JuliaLowering/LICENSE | 2 +- JuliaLowering/Project.toml | 3 + JuliaLowering/README.md | 217 ++++++++++++ JuliaLowering/src/JuliaLowering.jl | 15 +- JuliaLowering/src/desugaring.jl | 477 ++++++++++++++++++++++++++ JuliaLowering/src/linear_ir.jl | 504 ++++++++++++++++++++++++++++ JuliaLowering/src/scope_analysis.jl | 274 +++++++++++++++ JuliaLowering/src/syntax_graph.jl | 432 ++++++++++++++++++++++++ JuliaLowering/src/utils.jl | 112 +++++++ JuliaLowering/test/lowering.jl | 81 +++++ 10 files changed, 2115 insertions(+), 2 deletions(-) create mode 100644 JuliaLowering/src/desugaring.jl create mode 100644 JuliaLowering/src/linear_ir.jl create mode 100644 JuliaLowering/src/scope_analysis.jl create mode 100644 JuliaLowering/src/syntax_graph.jl create mode 100644 JuliaLowering/src/utils.jl create mode 100644 JuliaLowering/test/lowering.jl diff --git a/JuliaLowering/LICENSE b/JuliaLowering/LICENSE index 482e394ee2bba..5732c3014feb3 100644 --- a/JuliaLowering/LICENSE +++ b/JuliaLowering/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2024 Claire Foster and contributors +Copyright (c) 2024 Julia Computing and contributors Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/JuliaLowering/Project.toml b/JuliaLowering/Project.toml index 88483488a13b2..d7b465c86fc43 100644 --- a/JuliaLowering/Project.toml +++ b/JuliaLowering/Project.toml @@ -3,6 +3,9 @@ uuid = "f3c80556-a63f-4383-b822-37d64f81a311" authors = ["Claire Foster and contributors"] version = "1.0.0-DEV" +[deps] +JuliaSyntax = "70703baa-626e-46a2-a12c-08ffd08c73b4" + [compat] julia = "1" diff --git a/JuliaLowering/README.md b/JuliaLowering/README.md index 7d8408a3e615a..4a123fe543313 100644 --- a/JuliaLowering/README.md +++ b/JuliaLowering/README.md @@ -1,3 +1,220 @@ # JuliaLowering [![Build Status](https://github.com/c42f/JuliaLowering.jl/actions/workflows/CI.yml/badge.svg?branch=main)](https://github.com/c42f/JuliaLowering.jl/actions/workflows/CI.yml?query=branch%3Amain) + +Experimental port of Julia's code "lowering" compiler passes into Julia. + +Lowering comprises four symbolic simplification steps +* Syntax desugaring - simplifying the rich surface syntax down to a small + number of forms. +* Scope analysis - analyzing identifier names used in the code to discover + local variables, closure captures, and associate global variables to the + appropriate module. +* Closure conversion - convert closures to types and deal with captured + variables efficiently where possible. +* Flattening to linear IR - convert code in hierarchical tree form to a + flat array of statements and control flow into gotos. + +## Goals + +This work is intended to +* Bring precise code provenance to Julia's lowered form (and eventually + downstream in type inference, stack traces, etc). This has many benefits + - Talk to users precisely about their code via character-precise error and + diagnostic messages from lowering + - Greatly simplify the implementation of critical tools like Revise.jl + which rely on analyzing how the user's source maps to the compiler's data + structures + - Allow tools like JuliaInterpreter to use type-inferred and optimized + code, with the potential for huge speed improvements. +* Bring improvements for macro authors + - Prototype "automatic hygiene" (no more need for `esc()`!) + - Precise author-defined error reporting from macros + - Sketch better interfaces for syntax trees (hopefully!) + +# Design Notes + +A disorganized collection of design notes :) + +## Syntax trees + +Want something something better than `JuliaSyntax.SyntaxNode`! `SyntaxTree` and +`SyntaxGraph` provide this. (These will probably end up in `JuliaSyntax`.) + +We want to allow arbitrary attributes to be attached to tree nodes by analysis +passes. This separates the analysis pass implementation from the data +structure, allowing passes which don't know about each other to act on a shared +data structure. + +Design and implementation inspiration comes in several analogies: + +Analogy 1: the ECS (Entity-Component-System) pattern for computer game design. +This pattern is highly successful because it separates game logic (systems) +from game objects (entities) by providing flexible storage +* Compiler passes are "systems" +* AST tree nodes are "entities" +* Node attributes are "components" + +Analogy 2: The AoS to SoA transformation. But here we've got a kind of +tree-of-structs-with-optional-attributes to struct-of-Dicts transformation. +The data alignment / packing efficiency and concrete type safe storage benefits +are similar. + +Analogy 3: Graph algorithms which represent graphs as a compact array of node +ids and edges with integer indices, rather than using a linked data structure. + +## Julia's existing lowering implementation + +### How does macro expansion work? + +`macroexpand(m::Module, x)` calls `jl_macroexpand` in ast.c: + +``` +jl_value_t *jl_macroexpand(jl_value_t *expr, jl_module_t *inmodule) +{ + expr = jl_copy_ast(expr); + expr = jl_expand_macros(expr, inmodule, NULL, 0, jl_world_counter, 0); + expr = jl_call_scm_on_ast("jl-expand-macroscope", expr, inmodule); + return expr; +} +``` + +First we copy the AST here. This is mostly a trivial deep copy of `Expr`s and +shallow copy of their non-`Expr` children, except for when they contain +embedded `CodeInfo/phi/phic` nodes which are also deep copied. + +Second we expand macros recursively by calling + +`jl_expand_macros(expr, inmodule, macroctx, onelevel, world, throw_load_error)` + +This relies on state indexed by `inmodule` and `world`, which gives it some +funny properties: +* `module` expressions can't be expanded: macro expansion depends on macro + lookup within the module, but we can't do that without `eval`. + +Expansion proceeds from the outermost to innermost macros. So macros see any +macro calls or quasiquote (`quote/$`) in their children as unexpanded forms. + +Things which are expanded: +* `quote` is expanded using flisp code in `julia-bq-macro` + - symbol / ssavalue -> `QuoteNode` (inert) + - atom -> itself + - at depth zero, `$` expands to its content + - Expressions `x` without `$` expand to `(copyast (inert x))` + - Other expressions containing a `$` expand to a call to `_expr` with all the + args mapped through `julia-bq-expand-`. Roughly! + - Special handling exists for multi-splatting arguments as in `quote quote $$(x...) end end` +* `macrocall` proceeds with + - Expand with `jl_invoke_julia_macro` + - Call `eval` on the macro name (!!) to get the macro function. Look up + the method. + - Set up arguments for the macro calling convention + - Wraps errors in macro invocation in `LoadError` + - Returns the expression, as well as the module at + which that method of that macro was defined and `LineNumberNode` where + the macro was invoked in the source. + - Deep copy the AST + - Recursively expand child macros in the context of the module where the + macrocall method was defined + - Wrap the result in `(hygienic-scope ,result ,newctx.m ,lineinfo)` (except + for special case optimizations) +* `hygenic-scope` expands `args[1]` with `jl_expand_macros`, with the module + of expansion set to `args[2]`. Ie, it's the `Expr` representation of the + module and expression arguments to `macroexpand`. The way this returns + either `hygenic-scope` or unwraps is a bit confusing. +* "`do` macrocalls" have their own special handling because the macrocall is + the child of the `do`. This seems like a mess!! + + +### Scope resolution + +Scopes are documented in the Juila documentation on [Scope of Variables](https://docs.julialang.org/en/v1/manual/variables-and-scoping/) + +This pass disambiguates variables which have the same name in different scopes +and fills in the list of local variables within each lambda. + +#### Which data is needed to define a scope? + +As scope is a collection of variable names by category: +* `argument` - arguments to a lambda +* `local` - variables declared local (at top level) or implicitly local (in lambdas) or desugared to local-def +* `global` - variables declared global (in lambdas) or implicitly global (at top level) +* `static-parameter` - lambda type arguments from `where` clauses + +#### How does scope resolution work? + +We traverse the AST starting at the root paying attention to certian nodes: +* Nodes representing identifiers (Identifier, operators, var) + - If a variable exists in the table, it's *replaced* with the value in the table. + - If it doesn't exist, it becomes an `outerref` +* Variable scoping constructs: `local`, `local-def` + - collected by scope-block + - removed during traversal +* Scope metadata `softscope`, `hardscope` - just removed +* New scopes + - `lambda` creates a new scope containing itself and its arguments, + otherwise copying the parent scope. It resolves the body with that new scope. + - `scope-block` is really complicated - see below +* Scope queries `islocal`, `locals` + - `islocal` - statically expand to true/false based on whether var name is a local var + - `locals` - return list of locals - see `@locals` + - `require-existing-local` - somewhat like `islocal`, but allows globals + too (whaa?! naming) and produces a lowering error immediately if variable + is not known. Should be called `require-in-scope` ?? +* `break-block`, `symbolicgoto`, `symboliclabel` need special handling because + one of their arguments is a non-quoted symbol. +* Add static parameters for generated functions `with-static-parameters` +* `method` - special handling for static params + +`scope-block` is the complicated bit. It's processed by +* Searching the expressions within the block for any `local`, `local-def`, + `global` and assigned vars. Searching doesn't recurse into `lambda`, + `scope-block`, `module` and `toplevel` +* Building lists of implicit locals or globals (depending on whether we're in a + top level thunk) +* Figuring out which local variables need to be renamed. This is any local variable + with a name which has already occurred in processing one of the previous scope blocks +* Check any conflicting local/global decls and soft/hard scope +* Build new scope with table of renames +* Resolve the body with the new scope, applying the renames + + +### Lowered IR + +See https://docs.julialang.org/en/v1/devdocs/ast/#Lowered-form + +#### CodeInfo + +```julia +mutable struct CodeInfo + code::Vector{Any} # IR statements + codelocs::Vector{Int32} # `length(code)` Vector of indices into `linetable` + ssavaluetypes::Any # `length(code)` or Vector of inferred types after opt + ssaflags::Vector{UInt32} # flag for every statement in `code` + # 0 if meta statement + # inbounds_flag - 1 bit (LSB) + # inline_flag - 1 bit + # noinline_flag - 1 bit + # ... other 8 flags which are defined in compiler/optimize.jl + # effects_flags - 9 bits + method_for_inference_limit_heuristics::Any + linetable::Any + slotnames::Vector{Symbol} # names of parameters and local vars used in the code + slotflags::Vector{UInt8} # vinfo flags from flisp + slottypes::Any # nothing (used by typeinf) + rettype::Any # Any (used by typeinf) + parent::Any # nothing (used by typeinf) + edges::Any + min_world::UInt64 + max_world::UInt64 + inferred::Bool + propagate_inbounds::Bool + has_fcall::Bool + nospecializeinfer::Bool + inlining::UInt8 + constprop::UInt8 + purity::UInt16 + inlining_cost::UInt16 +end +``` + diff --git a/JuliaLowering/src/JuliaLowering.jl b/JuliaLowering/src/JuliaLowering.jl index 65ab2a054ff55..a4f6cd513f55f 100644 --- a/JuliaLowering/src/JuliaLowering.jl +++ b/JuliaLowering/src/JuliaLowering.jl @@ -1,5 +1,18 @@ module JuliaLowering -# Write your package code here. +using JuliaSyntax + +using JuliaSyntax: SyntaxHead, highlight, Kind, GreenNode, @KSet_str +using JuliaSyntax: haschildren, children, child, numchildren, head, kind, flags +using JuliaSyntax: filename, first_byte, last_byte, source_location + +using JuliaSyntax: is_literal, is_number, is_operator, is_prec_assignment, is_infix_op_call, is_postfix_op_call + +include("syntax_graph.jl") +include("utils.jl") + +include("desugaring.jl") +include("scope_analysis.jl") +include("linear_ir.jl") end diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl new file mode 100644 index 0000000000000..4529781b38830 --- /dev/null +++ b/JuliaLowering/src/desugaring.jl @@ -0,0 +1,477 @@ +""" +Unique symbolic identity for a variable within a `DesugaringContext` +""" +const VarId = Int + +struct SSAVar + id::VarId +end + +struct LambdaInfo + # TODO: Make this concretely typed? + args::SyntaxList + ret_var::Union{Nothing,SyntaxTree} +end + +abstract type AbstractLoweringContext end + +struct DesugaringContext{GraphType} <: AbstractLoweringContext + graph::GraphType + next_var_id::Ref{VarId} +end + +function DesugaringContext() + graph = SyntaxGraph() + ensure_attributes!(graph, + kind=Kind, syntax_flags=UInt16, green_tree=GreenNode, + source_pos=Int, source=Union{SourceRef,NodeId}, + value=Any, name_val=String, + scope_type=Symbol, # :hard or :soft + var_id=VarId, + lambda_info=LambdaInfo) + DesugaringContext(freeze_attrs(graph), Ref{VarId}(1)) +end + +#------------------------------------------------------------------------------- +# AST creation utilities +_node_id(ex::NodeId) = ex +_node_id(ex::SyntaxTree) = ex.id + +_node_ids() = () +_node_ids(c, cs...) = (_node_id(c), _node_ids(cs...)...) + +function _makenode(graph::SyntaxGraph, srcref, head, children; attrs...) + id = newnode!(graph) + if kind(head) in (K"Identifier", K"core", K"top", K"SSAValue", K"Value", K"slot") || is_literal(head) + @assert length(children) == 0 + else + setchildren!(graph, id, children) + end + setattr!(graph, id; source=srcref.id, attrs...) + sethead!(graph, id, head) + return SyntaxTree(graph, id) +end + +function makenode(graph::SyntaxGraph, srcref, head, children...; attrs...) + _makenode(graph, srcref, head, children; attrs...) +end + +function makenode(ctx::AbstractLoweringContext, srcref, head, children::SyntaxTree...; attrs...) + _makenode(ctx.graph, srcref, head, _node_ids(children...); attrs...) +end + +function makenode(ctx::AbstractLoweringContext, srcref, head, children::SyntaxList; attrs...) + ctx.graph === children.graph || error("Mismatching graphs") + _makenode(ctx.graph, srcref, head, children.ids; attrs...) +end + +function mapchildren(f, ctx, ex) + cs = SyntaxList(ctx) + for e in children(ex) + push!(cs, f(e)) + end + ex2 = makenode(ctx, ex, head(ex), cs) + # Copy all attributes. + # TODO: Make this type stable and efficient + for v in values(ex.graph.attributes) + if haskey(v, ex.id) + v[ex2.id] = v[ex.id] + end + end + return ex2 +end + +function new_var_id(ctx::AbstractLoweringContext) + id = ctx.next_var_id[] + ctx.next_var_id[] += 1 + return id +end + +# Create a new SSA variable +function ssavar(ctx::AbstractLoweringContext, srcref) + id = makenode(ctx, srcref, K"SSAValue", var_id=new_var_id(ctx)) + return id +end + +# Assign `ex` to an SSA variable. +# Return (variable, assignment_node) +function assign_tmp(ctx::AbstractLoweringContext, ex) + var = ssavar(ctx, ex) + assign_var = makenode(ctx, ex, K"=", var, ex) + var, assign_var +end + +# Convenience functions to create leaf nodes referring to identifiers within +# the Core and Top modules. +core_ref(ctx, ex, name) = makenode(ctx, ex, K"core", name_val=name) +Any_type(ctx, ex) = core_ref(ctx, ex, "Any") +svec_type(ctx, ex) = core_ref(ctx, ex, "svec") +nothing_(ctx, ex) = core_ref(ctx, ex, "nothing") +unused(ctx, ex) = core_ref(ctx, ex, "UNUSED") + +top_ref(ctx, ex, name) = makenode(ctx, ex, K"top", name_val=name) + +#------------------------------------------------------------------------------- +# Predicates and accessors working on expression trees + +function is_quoted(ex) + kind(ex) in KSet"quote top core globalref outerref break inert + meta inbounds inline noinline loopinfo" +end + +function is_sym_decl(x) + k = kind(x) + k == K"Identifier" || k == K"::" +end + +# Identifier made of underscores +function is_placeholder(ex) + kind(ex) == K"Identifier" && all(==('_'), ex.name_val) +end + +function is_eventually_call(ex::SyntaxTree) + k = kind(ex) + return k == K"call" || ((k == K"where" || k == K"::") && is_eventually_call(ex[1])) +end + +function is_function_def(ex) + k = kind(ex) + return k == K"function" || k == K"->" || + (k == K"=" && numchildren(ex) == 2 && is_eventually_call(ex[1])) +end + +function identifier_name(ex) + kind(ex) == K"var" ? ex[1] : ex +end + +function is_valid_name(ex) + n = identifier_name(ex).name_val + n !== "ccall" && n !== "cglobal" +end + +function decl_var(ex) + kind(ex) == K"::" ? ex[1] : ex +end + +# given a complex assignment LHS, return the symbol that will ultimately be assigned to +function assigned_name(ex) + k = kind(ex) + if (k == K"call" || k == K"curly" || k == K"where") || (k == K"::" && is_eventually_call(ex)) + assigned_name(ex[1]) + else + ex + end +end + +#------------------------------------------------------------------------------- +# Lowering Pass 1 - basic desugaring +function expand_assignment(ctx, ex) +end + +function expand_condition(ctx, ex) + if head(ex) == K"block" || head(ex) == K"||" || head(ex) == K"&&" + # || and && get special lowering so that they compile directly to jumps + # rather than first computing a bool and then jumping. + error("TODO expand_condition") + end + expand_forms(ctx, ex) +end + +function expand_let(ctx, ex) + scope_type = get(ex, :scope_type, :hard) + blk = ex[2] + if numchildren(ex[1]) == 0 # TODO: Want to use !haschildren(ex[1]) but this doesn't work... + return makenode(ctx, ex, K"block", blk; + scope_type=scope_type) + end + for binding in Iterators.reverse(children(ex[1])) + kb = kind(binding) + if is_sym_decl(kb) + blk = makenode(ctx, ex, K"block", + makenode(ctx, ex, K"local", binding), + blk; + scope_type=scope_type + ) + elseif kb == K"=" && numchildren(binding) == 2 + lhs = binding[1] + rhs = binding[2] + if is_sym_decl(lhs) + tmp, tmpdef = assign_tmp(ctx, rhs) + blk = makenode(ctx, binding, K"block", + tmpdef, + makenode(ctx, ex, K"block", + makenode(ctx, lhs, K"local_def", lhs), # TODO: Use K"local" with attr? + makenode(ctx, rhs, K"=", decl_var(lhs), tmp), + blk; + scope_type=scope_type + ) + ) + else + TODO("Functions and multiple assignment") + end + else + throw(LoweringError(binding, "Invalid binding in let")) + continue + end + end + return blk +end + +function expand_call(ctx, ex) + cs = expand_forms(ctx, children(ex)) + if is_infix_op_call(ex) || is_postfix_op_call(ex) + cs[1], cs[2] = cs[2], cs[1] + end + # TODO: keywords + makenode(ctx, ex, K"call", cs...) +end + +# Strip variable type declarations from within a `local` or `global`, returning +# the stripped expression. Works recursively with complex left hand side +# assignments containing tuple destructuring. Eg, given +# (x::T, (y::U, z)) +# strip out stmts = (local x) (decl x T) (local x) (decl y U) (local z) +# and return (x, (y, z)) +function strip_decls!(ctx, stmts, declkind, ex) + k = kind(ex) + if k == K"Identifier" + push!(stmts, makenode(ctx, ex, declkind, ex)) + ex + elseif k == K"::" + @chk numchildren(ex) == 2 + name = ex[1] + @chk kind(name) == K"Identifier" + push!(stmts, makenode(ctx, ex, declkind, name)) + push!(stmts, makenode(ctx, ex, K"decl", name, ex[2])) + name + elseif k == K"tuple" || k == K"parameters" + cs = SyntaxList(ctx) + for e in children(ex) + push!(cs, strip_decls!(ctx, stmts, declkind, e)) + end + makenode(ctx, ex, k, cs) + end +end + +# local x, (y=2), z => local x; local y; y = 2; local z +function expand_decls(ctx, ex) + declkind = kind(ex) + stmts = SyntaxList(ctx) + for binding in children(ex) + kb = kind(binding) + if is_function_def(binding) + push!(stmts, makenode(ctx, binding, declkind, assigned_name(binding))) + push!(stmts, binding) + elseif is_prec_assignment(kb) + lhs = strip_decls!(ctx, stmts, declkind, binding[1]) + push!(stmts, makenode(ctx, binding, kb, lhs, binding[2])) + elseif is_sym_decl(binding) + strip_decls!(ctx, stmts, declkind, binding) + else + throw(LoweringError("invalid syntax in variable declaration")) + end + end + makenode(ctx, ex, K"block", stmts) +end + +function analyze_function_arg(full_ex) + name = nothing + type = nothing + default = nothing + is_slurp = false + is_nospecialize = false + ex = full_ex + while true + k = kind(ex) + if k == K"Identifier" || k == K"tuple" + name = ex + break + elseif k == K"::" + @chk numchildren(ex) in (1,2) + if numchildren(ex) == 1 + type = ex[1] + else + name = ex[1] + type = ex[2] + end + break + elseif k == K"..." + @chk full_ex !is_slurp + @chk numchildren(ex) == 1 + is_slurp = true + ex = ex[1] + elseif k == K"meta" + @chk ex[1].name_val == "nospecialize" + is_nospecialize = true + ex = ex[2] + elseif k == K"=" + @chk full_ex isnothing(default) && !is_slurp + default = ex[2] + ex = ex[1] + else + throw(LoweringError(ex, "Invalid function argument")) + end + end + return (name=name, + type=type, + default=default, + is_slurp=is_slurp, + is_nospecialize=is_nospecialize) +end + +function expand_function_def(ctx, ex) + @chk numchildren(ex) in (1,2) + name = ex[1] + if kind(name) == K"where" + TODO("where handling") + end + return_type = nothing + if kind(name) == K"::" + @chk numchildren(name) == 2 + return_type = name[2] + name = name[1] + end + if numchildren(ex) == 1 && is_identifier(name) # TODO: Or name as globalref + if !is_valid_name(name) + throw(LoweringError(name, "Invalid function name")) + end + return makenode(ctx, ex, K"method", identifier_name(name)) + elseif kind(name) == K"call" + callex = name + body = ex[2] + # TODO + # static params + # nospecialize + # argument destructuring + # dotop names + # overlays + + # Add self argument where necessary + args = name[2:end] + name = name[1] + if kind(name) == K"::" + if numchildren(name) == 1 + farg = makenode(ctx, name, K"::", + makenode(ctx, name, K"Identifier", name_val="#self#"), + name[1]) + else + TODO("Fixme type") + farg = name + end + function_name = nothing_(ctx, ex) + else + if !is_valid_name(name) + throw(LoweringError(name, "Invalid function name")) + end + farg = makenode(ctx, name, K"::", + makenode(ctx, name, K"Identifier", name_val="#self#"), + makenode(ctx, name, K"call", core_ref(ctx, name, "Typeof"), name)) + function_name = name + end + + # preamble is arbitrary code which computes + # svec(types, sparms, location) + + arg_names = SyntaxList(ctx) + arg_types = SyntaxList(ctx) + for (i,arg) in enumerate(args) + info = analyze_function_arg(arg) + aname = (isnothing(info.name) || is_placeholder(info.name)) ? + unused(ctx, arg) : info.name + push!(arg_names, aname) + atype = !isnothing(info.type) ? info.type : Any_type(ctx, arg) + @assert !info.is_nospecialize # TODO + @assert !isnothing(info.name) && kind(info.name) == K"Identifier" # TODO + if info.is_slurp + if i != length(args) + throw(LoweringError(arg, "`...` may only be used for the last function argument")) + end + atype = makenode(K"curly", core_ref(ctx, arg, "Vararg"), arg) + end + push!(arg_types, atype) + end + + preamble = makenode(ctx, ex, K"call", + svec_type(ctx, callex), + makenode(ctx, callex, K"call", + svec_type(ctx, name), + arg_types...), + makenode(ctx, callex, K"Value", value=source_location(LineNumberNode, callex)) + ) + if !isnothing(return_type) + ret_var, ret_assign = assign_tmp(ctx, return_type) + body = makenode(ctx, body, K"block", + ret_assign, + body, + scope_type=:hard) + else + ret_var = nothing + body = makenode(ctx, body, K"block", + body, + scope_type=:hard) + end + lambda = makenode(ctx, body, K"lambda", body, + lambda_info=LambdaInfo(arg_names, ret_var)) + makenode(ctx, ex, K"block", + makenode(ctx, ex, K"method", + function_name, + preamble, + lambda), + makenode(ctx, ex, K"unnecessary", function_name)) + elseif kind(name) == K"tuple" + TODO(name, "Anon function lowering") + else + throw(LoweringError(name, "Bad function definition")) + end +end + +function expand_forms(ctx::DesugaringContext, ex::SyntaxTree) + k = kind(ex) + if k == K"call" + expand_call(ctx, ex) + elseif k == K"function" + expand_forms(ctx, expand_function_def(ctx, ex)) + elseif k == K"let" + return expand_forms(ctx, expand_let(ctx, ex)) + elseif k == K"local" || k == K"global" + if numchildren(ex) == 1 && kind(ex[1]) == K"Identifier" + # Don't recurse when already simplified - `local x`, etc + ex + else + expand_forms(ctx, expand_decls(ctx, ex)) # FIXME + end + elseif is_operator(k) && !haschildren(ex) + return makenode(ctx, ex, K"Identifier", name_val=ex.name_val) + elseif k == K"char" || k == K"var" + @chk numchildren(ex) == 1 + return ex[1] + elseif k == K"string" + if numchildren(ex) == 1 && kind(ex[1]) == K"String" + return ex[1] + else + makenode(ctx, ex, K"call", top_ref(ctx, ex, "string"), expand_forms(children(ex))...) + end + elseif k == K"tuple" + # TODO: named tuples + makenode(ctx, ex, K"call", core_ref(ctx, ex, "tuple"), expand_forms(ctx, children(ex))...) + elseif !haschildren(ex) + return ex + else + if k == K"=" + @chk numchildren(ex) == 2 + if kind(ex[1]) ∉ (K"Identifier", K"SSAValue") + TODO(ex, "destructuring assignment") + end + end + mapchildren(e->expand_forms(ctx,e), ctx, ex) + end +end + +function expand_forms(ctx::DesugaringContext, exs::Union{Tuple,AbstractVector}) + res = SyntaxList(ctx) + for e in exs + push!(res, expand_forms(ctx, e)) + end + res +end + diff --git a/JuliaLowering/src/linear_ir.jl b/JuliaLowering/src/linear_ir.jl new file mode 100644 index 0000000000000..874693105575c --- /dev/null +++ b/JuliaLowering/src/linear_ir.jl @@ -0,0 +1,504 @@ +#------------------------------------------------------------------------------- +# Lowering pass 4: Flatten to linear IR + +function is_simple_atom(ex) + k = kind(ex) + # FIXME +# (or (number? x) (string? x) (char? x) +# (and (pair? x) (memq (car x) '(ssavalue null true false thismodule))) +# (eq? (typeof x) 'julia_value))) + is_number(k) || k == K"String" || k == K"Char" +end + +# N.B.: This assumes that resolve-scopes has run, so outerref is equivalent to +# a global in the current scope. +function is_valid_ir_argument(ex) + k = kind(ex) + return is_simple_atom(ex) + # FIXME || + #(k == K"outerref" && nothrow_julia_global(ex[1])) || + #(k == K"globalref" && nothrow_julia_global(ex)) || + #(k == K"quote" || k = K"inert" || k == K"top" || + #k == K"core" || k == K"slot" || k = K"static_parameter") +end + +""" +Context for creating linear IR. + +One of these is created per lambda expression to flatten the body down to +linear IR. +""" +struct LinearIRContext{GraphType} <: AbstractLoweringContext + graph::GraphType + code::SyntaxList{GraphType, Vector{NodeId}} + next_var_id::Ref{Int} + return_type::Union{Nothing,NodeId} + var_info::Dict{VarId,VarInfo} + mod::Module +end + +function LinearIRContext(ctx::ScopeResolutionContext, mod, return_type) + LinearIRContext(ctx.graph, SyntaxList(ctx.graph), ctx.next_var_id, + return_type, ctx.var_info, mod) +end + +function LinearIRContext(ctx::LinearIRContext, return_type) + LinearIRContext(ctx.graph, SyntaxList(ctx.graph), ctx.next_var_id, + return_type, ctx.var_info, ctx.mod) +end + +function is_valid_body_ir_argument(ex) + is_valid_ir_argument(ex) && return true + return false + # FIXME + k = kind(ex) + return k == K"Identifier" && # Arguments are always defined slots + TODO("vinfo-table stuff") +end + +function is_simple_arg(ex) + k = kind(ex) + return is_simple_atom(ex) || k == K"Identifier" || k == K"quote" || k == K"inert" || + k == K"top" || k == K"core" || k == K"globalref" || k == K"outerref" +end + +function is_single_assign_var(ctx::LinearIRContext, ex) + return false # FIXME + id = ex.var_id + # return id in ctx.lambda_args || +end + +function is_const_read_arg(ctx, ex) + k = kind(ex) + return is_simple_atom(ex) || + is_single_assign_var(ctx, ex) || + k == K"quote" || k == K"inert" || k == K"top" || k == K"core" +end + +function is_valid_ir_rvalue(lhs, rhs) + return kind(lhs) == K"SSAValue" || + is_valid_ir_argument(rhs) || + (kind(lhs) == K"Identifier" && + # FIXME: add: splatnew isdefined invoke cfunction gc_preserve_begin copyast new_opaque_closure globalref outerref + kind(rhs) in KSet"new the_exception call foreigncall") +end + +# evaluate the arguments of a call, creating temporary locations as needed +function compile_args(ctx, args) + # First check if all the arguments as simple (and therefore side-effect free). + # Otherwise, we need to use ssa values for all arguments to ensure proper + # left-to-right evaluation semantics. + all_simple = all(is_simple_arg, args) + args_out = SyntaxList(ctx) + for arg in args + arg_val = compile(ctx, arg, true, false) + if (all_simple || is_const_read_arg(ctx, arg_val)) && is_valid_body_ir_argument(arg_val) + push!(args_out, arg_val) + else + push!(args_out, emit_assign_tmp(ctx, arg_val)) + end + end + return args_out +end + +function emit(ctx::LinearIRContext, ex) + push!(ctx.code, ex) + return ex +end + +function emit(ctx::LinearIRContext, srcref, k, args...) + emit(ctx, makenode(ctx, srcref, k, args...)) +end + +# Emit computation of ex, assigning the result to an ssavar and returning that +function emit_assign_tmp(ctx::LinearIRContext, ex) + # TODO: We could replace this with an index into the code array right away? + tmp = makenode(ctx, ex, K"SSAValue", var_id=ctx.next_var_id[]) + ctx.next_var_id[] += 1 + emit(ctx, ex, K"=", tmp, ex) + return tmp +end + +function emit_return(ctx, srcref, ex) + if isnothing(ex) + return + end + # TODO: return type handling + # TODO: exception stack handling + # returning lambda directly is needed for @generated + if !(is_valid_ir_argument(ex) || head(ex) == K"lambda") + ex = emit_assign_tmp(ctx, ex) + end + # TODO: if !isnothing(ctx.return_type) ... + emit(ctx, srcref, K"return", ex) +end + +function emit_assignment(ctx, srcref, lhs, rhs) + if !isnothing(rhs) + if is_valid_ir_rvalue(lhs, rhs) + emit(ctx, srcref, K"=", lhs, rhs) + else + r = emit_assign_tmp(ctx, rhs) + emit(ctx, srcref, K"=", lhs, r) + end + else + # in unreachable code (such as after return); still emit the assignment + # so that the structure of those uses is preserved + emit(ctx, rhs, K"=", lhs, nothing_(ctx, srcref)) + nothing + end +end + +# This pass behaves like an interpreter on the given code. +# To perform stateful operations, it calls `emit` to record that something +# needs to be done. In value position, it returns an expression computing +# the needed value. +# +# TODO: is it ok to return `nothing` if we have no value in some sense +function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) + k = kind(ex) + if k == K"Identifier" || is_literal(k) || k == K"SSAValue" || k == K"quote" || k == K"inert" || + k == K"top" || k == K"core" || k == K"Value" + # TODO: other kinds: copyast the_exception $ globalref outerref thismodule cdecl stdcall fastcall thiscall llvmcall + if in_tail_pos + emit_return(ctx, ex, ex) + elseif needs_value + if is_placeholder(ex) + # TODO: ensure outterref, globalref work here + throw(LoweringError(ex, "all-underscore identifiers are write-only and their values cannot be used in expressions")) + end + ex + else + if k == K"Identifier" + emit(ctx, ex) # keep symbols for undefined-var checking + end + nothing + end + elseif k == K"call" + # TODO k ∈ splatnew foreigncall cfunction new_opaque_closure cglobal + args = compile_args(ctx, children(ex)) + callex = makenode(ctx, ex, k, args) + if in_tail_pos + emit_return(ctx, ex, callex) + elseif needs_value + callex + else + emit(ctx, callex) + nothing + end + elseif k == K"=" + lhs = ex[1] + # TODO: Handle underscore + rhs = compile(ctx, ex[2], true, false) + # TODO look up arg-map for renaming if lhs was reassigned + if needs_value && !isnothing(rhs) + r = emit_assign_tmp(ctx, rhs) + emit(ctx, ex, K"=", lhs, r) + if in_tail_pos + emit_return(ctx, ex, r) + else + r + end + else + emit_assignment(ctx, ex, lhs, rhs) + end + elseif k == K"block" + nc = numchildren(ex) + for i in 1:nc + islast = i == nc + compile(ctx, ex[i], islast && needs_value, islast && in_tail_pos) + end + elseif k == K"return" + compile(ctx, ex[1], true, true) + nothing + elseif k == K"method" + # TODO + # throw(LoweringError(ex, + # "Global method definition needs to be placed at the top level, or use `eval`")) + if numchildren(ex) == 1 + if in_tail_pos + emit_return(ctx, ex, ex) + elseif needs_value + ex + else + emit(ctx, ex) + end + else + @chk numchildren(ex) == 3 + fname = ex[1] + sig = compile(ctx, ex[2], true, false) + if !is_valid_ir_argument(sig) + sig = emit_assign_tmp(ctx, sig) + end + lam = ex[3] + if kind(lam) == K"lambda" + lam = compile_lambda(ctx, lam) + else + # lam = emit_assign_tmp(ctx, compile(ctx, lam, true, false)) + TODO(lam, "non-lambda method argument??") + end + emit(ctx, ex, K"method", fname, sig, lam) + @assert !needs_value && !in_tail_pos + nothing + end + elseif k == K"lambda" + lam = compile_lambda(ctx, ex) + if in_tail_pos + emit_return(ctx, ex, lam) + elseif needs_value + lam + else + emit(ctx, lam) + end + elseif k == K"global" + if needs_value + throw(LoweringError(ex, "misplaced `global` declaration")) + end + emit(ctx, ex) + nothing + elseif k == K"local_def" || k == K"local" + nothing + else + throw(LoweringError(ex, "Invalid syntax")) + end +end + + +#------------------------------------------------------------------------------- + +# Recursively renumber an expression within linear IR +# flisp: renumber-stuff +function _renumber(ctx, ssa_rewrites, slot_rewrites, label_table, ex) + k = kind(ex) + if k == K"Identifier" + id = ex.var_id + slot_id = get(slot_rewrites, id, nothing) + if !isnothing(slot_id) + makenode(ctx, ex, K"slot"; var_id=slot_id) + else + # TODO: look up any static parameters + ex + end + elseif k == K"outerref" || k == K"meta" + TODO(ex, "_renumber $k") + elseif is_literal(k) || is_quoted(k) || k == K"global" + ex + elseif k == K"SSAValue" + makenode(ctx, ex, K"SSAValue"; var_id=ssa_rewrites[ex.var_id]) + elseif k == K"goto" || k == K"enter" || k == K"gotoifnot" + TODO(ex, "_renumber $k") + # elseif k == K"lambda" + # renumber_lambda(ctx, ex) + else + mapchildren(ctx, ex) do e + _renumber(ctx, ssa_rewrites, slot_rewrites, label_table, e) + end + # TODO: foreigncall error check: + # "ccall function name and library expression cannot reference local variables" + end +end + +function _ir_to_expr() +end + +# flisp: renumber-lambda, compact-ir +function renumber_body(ctx, input_code, slot_rewrites) + # Step 1: Remove any assignments to SSA variables, record the indices of labels + ssa_rewrites = Dict{VarId,VarId}() + label_table = Dict{String,Int}() + code = SyntaxList(ctx) + for ex in input_code + k = kind(ex) + ex_out = nothing + if k == K"=" && kind(ex[1]) == K"SSAValue" + lhs_id = ex[1].var_id + if kind(ex[2]) == K"SSAValue" + # For SSA₁ = SSA₂, record that all uses of SSA₁ should be replaced by SSA₂ + ssa_rewrites[lhs_id] = ssa_rewrites[ex[2].var_id] + else + # Otherwise, record which `code` index this SSA value refers to + ssa_rewrites[lhs_id] = length(code) + 1 + ex_out = ex[2] + end + elseif k == K"label" + label_table[ex.name_val] = length(code) + 1 + else + ex_out = ex + end + if !isnothing(ex_out) + push!(code, ex_out) + end + end + + # Step 2: + # * Translate any SSA uses and labels into indices in the code table + # * Translate locals into slot indices + for i in 1:length(code) + code[i] = _renumber(ctx, ssa_rewrites, slot_rewrites, label_table, code[i]) + end + code +end + +function to_ir_expr(ex) + k = kind(ex) + if is_literal(k) + ex.value + elseif k == K"core" + GlobalRef(Core, Symbol(ex.name_val)) + elseif k == K"top" + GlobalRef(Base, Symbol(ex.name_val)) + elseif k == K"Identifier" + # Implicitly refers to name in parent module + # TODO: Should we even have plain identifiers at this point or should + # they all effectively be resolved into GlobalRef earlier? + Symbol(ex.name_val) + elseif k == K"slot" + Core.SlotNumber(ex.var_id) + elseif k == K"SSAValue" + Core.SSAValue(ex.var_id) + elseif k == K"return" + Core.ReturnNode(to_ir_expr(ex[1])) + elseif is_quoted(k) + TODO(ex, "Convert SyntaxTree to Expr") + else + # Allowed forms according to https://docs.julialang.org/en/v1/devdocs/ast/ + # + # call invoke static_parameter `=` method struct_type abstract_type + # primitive_type global const new splatnew isdefined the_exception + # enter leave pop_exception inbounds boundscheck loopinfo copyast meta + # foreigncall new_opaque_closure lambda + head = k == K"call" ? :call : + k == K"=" ? :(=) : + k == K"method" ? :method : + k == K"global" ? :global : + k == K"const" ? :const : + nothing + if isnothing(head) + TODO(ex, "Unhandled form") + end + Expr(head, map(to_ir_expr, children(ex))...) + end +end + +# Convert our data structures to CodeInfo +function to_code_info(input_code, mod, funcname, var_info, slot_rewrites) + # Convert code to Expr and record low res locations in table + num_stmts = length(input_code) + code = Vector{Any}(undef, num_stmts) + codelocs = Vector{Int32}(undef, num_stmts) + linetable_map = Dict{Tuple{Int,String}, Int32}() + linetable = Any[] + for i in 1:length(code) + code[i] = to_ir_expr(input_code[i]) + fname = filename(input_code[i]) + lineno, _ = source_location(input_code[i]) + loc = (lineno, fname) + codelocs[i] = get!(linetable_map, loc) do + inlined_at = 0 # FIXME: nonzero for expanded macros + full_loc = Core.LineInfoNode(mod, Symbol(funcname), Symbol(fname), + Int32(lineno), Int32(inlined_at)) + push!(linetable, full_loc) + length(linetable) + end + end + + # FIXME + ssaflags = zeros(UInt32, length(code)) + + nslots = length(slot_rewrites) + slotnames = Vector{Symbol}(undef, nslots) + slot_rename_inds = Dict{String,Int}() + slotflags = Vector{UInt8}(undef, nslots) + for (id,i) in slot_rewrites + info = var_info[id] + name = info.name + ni = get(slot_rename_inds, name, 0) + slot_rename_inds[name] = ni + 1 + if ni > 0 + name = "$name@$ni" + end + slotnames[i] = Symbol(name) + slotflags[i] = 0x00 # FIXME!! + end + + _CodeInfo( + code, + codelocs, + num_stmts, # ssavaluetypes (why put num_stmts in here??) + ssaflags, + nothing, # method_for_inference_limit_heuristics + linetable, + slotnames, + slotflags, + nothing, # slottypes + Any, # rettype + nothing, # parent + nothing, # edges + Csize_t(1), # min_world + typemax(Csize_t), # max_world + false, # inferred + false, # propagate_inbounds + false, # has_fcall + false, # nospecializeinfer + 0x00, # inlining + 0x00, # constprop + 0x0000, # purity + 0xffff, # inlining_cost + ) +end + +function renumber_lambda(ctx, lambda_info, code) + slot_rewrites = Dict{VarId,Tuple{Kind,Int}}() + # lambda arguments become K"slot"; type parameters become K"static_parameter" + info = ex.lambda_info + for (i,arg) in enumerate(info.args) + slot_rewrites[arg.var_id] = i + end + # TODO: add static_parameter here also + renumber_body(ctx, code, slot_rewrites) +end + +# flisp: compile-body +function compile_body(ctx, ex) + compile(ctx, ex, true, true) + # TODO: Fix any gotos + # TODO: Filter out any newvar nodes where the arg is definitely initialized +end + +function _add_slots!(slot_rewrites, var_info, var_ids) + n = length(slot_rewrites) + 1 + for id in var_ids + info = var_info[id] + if info.islocal + slot_rewrites[id] = n + n += 1 + end + end + slot_rewrites +end + +function compile_lambda(outer_ctx, ex) + info = ex.lambda_info + return_type = nothing # FIXME + # TODO: Add assignments for reassigned arguments to body using info.args + ctx = LinearIRContext(outer_ctx, return_type) + compile_body(ctx, ex[1]) + slot_rewrites = Dict{VarId,Int}() + _add_slots!(slot_rewrites, ctx.var_info, (a.var_id for a in info.args)) + _add_slots!(slot_rewrites, ctx.var_info, ex.lambda_vars) + code = renumber_body(ctx, ctx.code, slot_rewrites) + to_code_info(code, ctx.mod, "none", ctx.var_info, slot_rewrites) +end + +function compile_toplevel(outer_ctx, mod, ex) + return_type = nothing + ctx = LinearIRContext(outer_ctx, mod, return_type) + compile_body(ctx, ex) + slot_rewrites = Dict{VarId,Int}() + _add_slots!(slot_rewrites, ctx.var_info, ex.lambda_vars) + code = renumber_body(ctx, ctx.code, slot_rewrites) + to_code_info(code, mod, "top-level scope", ctx.var_info, slot_rewrites) + #var_info = nothing # FIXME + #makenode(ctx, ex, K"Value"; value=LambdaIR(SyntaxList(ctx), ctx.code, var_info)) +end + diff --git a/JuliaLowering/src/scope_analysis.jl b/JuliaLowering/src/scope_analysis.jl new file mode 100644 index 0000000000000..1bfdc7f551783 --- /dev/null +++ b/JuliaLowering/src/scope_analysis.jl @@ -0,0 +1,274 @@ +# Lowering pass 2: analyze scopes (passes 2/3 in flisp code) +# +# This pass analyzes the names (variables/constants etc) used in scopes +# +# This pass records information about variables used by closure conversion. +# finds which variables are assigned or captured, and records variable +# type declarations. +# +# This info is recorded by setting the second argument of `lambda` expressions +# in-place to +# (var-info-lst captured-var-infos ssavalues static_params) +# where var-info-lst is a list of var-info records + +#------------------------------------------------------------------------------- +# AST traversal functions - useful for performing non-recursive AST traversals +function _schedule_traverse(stack, e) + push!(stack, e) + return nothing +end +function _schedule_traverse(stack, es::Union{Tuple,AbstractVector,Base.Generator}) + append!(stack, es) + return nothing +end + +function traverse_ast(f, exs) + todo = SyntaxList(first(exs).graph) + append!(todo, exs) + while !isempty(todo) + f(pop!(todo), e->_schedule_traverse(todo, e)) + end +end + +function traverse_ast(f, ex::SyntaxTree) + traverse_ast(f, (ex,)) +end + +function find_in_ast(f, ex::SyntaxTree) + todo = SyntaxList(ex.graph) + push!(todo, ex) + while !isempty(todo) + e1 = pop!(todo) + res = f(e1, e->_schedule_traverse(todo, e)) + if !isnothing(res) + return res + end + end + return nothing +end + +# NB: This only really works after expand_forms has already processed assignments. +function find_scope_vars(ex, children_only) + assigned_vars = Set{String}() + # TODO: + # local_vars + local_def_vars = Set{String}() + # global_vars + used_vars = Set{String}() + traverse_ast(children_only ? children(ex) : ex) do e, traverse + k = kind(e) + if k == K"Identifier" + push!(used_vars, e.name_val) + elseif !haschildren(e) || hasattr(e, :scope_type) || is_quoted(k) || + k in KSet"lambda module toplevel" + return + elseif k == K"local_def" + push!(local_def_vars, e[1].name_val) + # elseif k == K"method" TODO static parameters + elseif k == K"=" + v = decl_var(e[1]) + if !(kind(v) in KSet"SSAValue globalref outerref" || is_placeholder(v)) + push!(assigned_vars, v.name_val) + end + traverse(e[2]) + else + traverse(children(e)) + end + end + return assigned_vars, local_def_vars, used_vars +end + +function find_decls(decl_kind, ex) + vars = Vector{typeof(ex)}() + traverse_ast(ex) do e, traverse + k = kind(e) + if !haschildren(e) || is_quoted(k) || k in KSet"lambda scope_block module toplevel" + return + elseif k == decl_kind + v = decl_var(e[1]) + if !is_placeholder(v) + push!(vars, decl_var(v)) + end + else + traverse(children(e)) + end + end + var_names = [v.name_val for v in vars] + return unique(var_names) +end + +# Determine whether decl_kind is in the scope of `ex` +# +# flisp: find-scope-decl +function has_scope_decl(decl_kind, ex) + find_in_ast(ex) do e, traverse + k = kind(e) + if !haschildren(e) || is_quoted(k) || k in KSet"lambda scope_block module toplevel" + return + elseif k == decl_kind + return e + else + traverse(children(ex)) + end + end +end + +# struct LambdaVars +# # For analyze-variables pass +# # var_info_lst::Set{Tuple{Symbol,Symbol}} # ish? +# # captured_var_infos ?? +# # ssalabels::Set{SSAValue} +# # static_params::Set{Symbol} +# end + +# Mirror of flisp scope info structure +# struct ScopeInfo +# lambda_vars::Union{LambdaLocals,LambdaInfo} +# parent::Union{Nothing,ScopeBlockInfo} +# args::Set{Symbol} +# locals::Set{Symbol} +# globals::Set{Symbol} +# static_params::Set{Symbol} +# renames::Dict{Symbol,Symbol} +# implicit_globals::Set{Symbol} +# warn_vars::Set{Symbol} +# is_soft::Bool +# is_hard::Bool +# table::Dict{Symbol,Any} +# end + +""" +Metadata about a variable name - whether it's a local, etc +""" +struct VarInfo + name::String + islocal::Bool # Local variable (if unset, variable is global) + isarg::Bool # Is a function argument + is_single_assign::Bool # Single assignment +end + +struct ScopeResolutionContext{GraphType} <: AbstractLoweringContext + graph::GraphType + next_var_id::Ref{VarId} + # Stack of name=>id mappings for each scope, innermost scope last. + var_id_stack::Vector{Dict{String,VarId}} + # Stack of var `id`s for lambda (or toplevel thunk) being processed, innermost last. + lambda_vars::Vector{Set{VarId}} + # Metadata about variables. There's only one map for this, as var_id is is + # unique across the context, even for same-named vars in unrelated local + # scopes. + var_info::Dict{VarId,VarInfo} +end + +function ScopeResolutionContext(ctx::DesugaringContext) + graph = ensure_attributes(ctx.graph, lambda_vars=Set{VarId}) + ScopeResolutionContext(graph, ctx.next_var_id, + Vector{Dict{String,VarId}}(), + [Set{VarId}()], + Dict{VarId,VarInfo}()) +end + +function lookup_var(ctx, name) + for i in lastindex(ctx.var_id_stack):-1:1 + ids = ctx.var_id_stack[i] + id = get(ids, name, nothing) + if !isnothing(id) + return id + end + end + return nothing +end + +function new_var(ctx, name; isarg=false, islocal=isarg) + id = new_var_id(ctx) + ctx.var_info[id] = VarInfo(name, islocal, isarg, false) + push!(last(ctx.lambda_vars), id) + id +end + +function resolve_scope!(f::Function, ctx, ex, is_toplevel) + id_map = Dict{String,VarId}() + is_hard_scope = get(ex, :scope_type, :hard) == :hard + assigned, local_def_vars, used_vars = find_scope_vars(ex, !is_toplevel) + for name in local_def_vars + id_map[name] = new_var(ctx, name, islocal=true) + end + for name in assigned + if !haskey(id_map, name) && isnothing(lookup_var(ctx, name)) + # Previously unknown assigned vars are impicit locals or globals + id_map[name] = new_var(ctx, name, islocal=!is_toplevel) + end + end + outer_scope = is_toplevel ? id_map : ctx.var_id_stack[1] + for name in used_vars + if !haskey(id_map, name) && isnothing(lookup_var(ctx, name)) + # Identifiers which weren't discovered further up the stack are + # newly discovered globals + outer_scope[name] = new_var(ctx, name, islocal=false) + end + end + push!(ctx.var_id_stack, id_map) + res = f(ctx) + pop!(ctx.var_id_stack) + return res +end + +resolve_scopes!(ctx::DesugaringContext, ex) = resolve_scopes!(ScopeResolutionContext(ctx), ex) + +function resolve_scopes!(ctx::ScopeResolutionContext, ex) + resolve_scope!(ctx, ex, true) do cx + resolve_scopes_!(cx, ex) + end + setattr!(ctx.graph, ex.id, lambda_vars=only(ctx.lambda_vars)) + SyntaxTree(ctx.graph, ex.id) +end + +function resolve_scopes_!(ctx, ex) + k = kind(ex) + if k == K"Identifier" + if is_placeholder(ex) + return # FIXME - make these K"placeholder"? + end + # TODO: Maybe we shouldn't do this in place?? + setattr!(ctx.graph, ex.id, var_id=lookup_var(ctx, ex.name_val)) + elseif !haschildren(ex) || is_quoted(ex) || k == K"toplevel" + return + elseif k == K"global" + TODO("global") + elseif k == K"local" + TODO("local") + # TODO + # elseif require_existing_local + # elseif locals # return Dict of locals + # elseif islocal + elseif k == K"lambda" + # TODO: Lambda captures! + info = ex.lambda_info + id_map = Dict{String,VarId}() + for a in info.args + id_map[a.name_val] = new_var(ctx, a.name_val, isarg=true) + end + push!(ctx.var_id_stack, id_map) + for a in info.args + resolve_scopes!(ctx, a) + end + vars = Set{VarId}() + setattr!(ctx.graph, ex.id, lambda_vars=vars) + push!(ctx.lambda_vars, vars) + resolve_scopes_!(ctx, ex[1]) + pop!(ctx.lambda_vars) + pop!(ctx.var_id_stack) + elseif k == K"block" && hasattr(ex, :scope_type) + resolve_scope!(ctx, ex, false) do cx + for e in children(ex) + resolve_scopes_!(cx, e) + end + end + else + for e in children(ex) + resolve_scopes_!(ctx, e) + end + end + ex +end + diff --git a/JuliaLowering/src/syntax_graph.jl b/JuliaLowering/src/syntax_graph.jl new file mode 100644 index 0000000000000..f739ef2c1e552 --- /dev/null +++ b/JuliaLowering/src/syntax_graph.jl @@ -0,0 +1,432 @@ +const NodeId = Int + +""" +Directed graph with arbitrary attributes on nodes. Used here for representing +one or several syntax trees. +""" +struct SyntaxGraph{Attrs} + edge_ranges::Vector{UnitRange{Int}} + edges::Vector{NodeId} + attributes::Attrs +end + +SyntaxGraph() = SyntaxGraph{Dict{Symbol,Any}}(Vector{UnitRange{Int}}(), + Vector{NodeId}(), Dict{Symbol,Any}()) + +# "Freeze" attribute names and types, encoding them in the type of the returned +# SyntaxGraph. +function freeze_attrs(graph::SyntaxGraph) + frozen_attrs = (; pairs(graph.attributes)...) + SyntaxGraph(graph.edge_ranges, graph.edges, frozen_attrs) +end + +function _show_attrs(io, attributes::Dict) + show(io, MIME("text/plain"), attributes) +end +function _show_attrs(io, attributes::NamedTuple) + show(io, MIME("text/plain"), Dict(pairs(attributes)...)) +end + +function Base.show(io::IO, ::MIME"text/plain", graph::SyntaxGraph) + print(io, typeof(graph), + " with $(length(graph.edge_ranges)) vertices, $(length(graph.edges)) edges, and attributes:\n") + _show_attrs(io, graph.attributes) +end + +function ensure_attributes!(graph::SyntaxGraph; kws...) + for (k,v) in pairs(kws) + @assert k isa Symbol + @assert v isa Type + if haskey(graph.attributes, k) + v0 = valtype(graph.attributes[k]) + v == v0 || throw(ErrorException("Attribute type mismatch $v != $v0")) + else + graph.attributes[k] = Dict{NodeId,v}() + end + end +end + +function ensure_attributes(graph::SyntaxGraph; kws...) + g = SyntaxGraph(graph.edge_ranges, graph.edges, Dict(pairs(graph.attributes)...)) + ensure_attributes!(g; kws...) + freeze_attrs(g) +end + +function newnode!(graph::SyntaxGraph) + push!(graph.edge_ranges, 0:-1) # Invalid range start => leaf node + return length(graph.edge_ranges) +end + +function setchildren!(graph::SyntaxGraph, id, children::NodeId...) + setchildren!(graph, id, children) +end + +function setchildren!(graph::SyntaxGraph, id, children) + n = length(graph.edges) + graph.edge_ranges[id] = n+1:(n+length(children)) + # TODO: Reuse existing edges if possible + append!(graph.edges, children) +end + +function JuliaSyntax.haschildren(graph::SyntaxGraph, id) + first(graph.edge_ranges[id]) > 0 +end + +function JuliaSyntax.numchildren(graph::SyntaxGraph, id) + length(graph.edge_ranges[id]) +end + +function JuliaSyntax.children(graph::SyntaxGraph, id) + @view graph.edges[graph.edge_ranges[id]] +end + +function JuliaSyntax.child(graph::SyntaxGraph, id::NodeId, i::Integer) + graph.edges[graph.edge_ranges[id][i]] +end + +function getattr(graph::SyntaxGraph{<:Dict}, name::Symbol) + getfield(graph, :attributes)[name] +end + +function getattr(graph::SyntaxGraph{<:NamedTuple}, name::Symbol) + getfield(getfield(graph, :attributes), name) +end + +function getattr(graph::SyntaxGraph, name::Symbol, default) + get(getfield(graph, :attributes), name, default) +end + +# FIXME: Probably terribly non-inferrable? +function setattr!(graph::SyntaxGraph, id; attrs...) + for (k,v) in pairs(attrs) + getattr(graph, k)[id] = v + end +end + +function Base.getproperty(graph::SyntaxGraph, name::Symbol) + # FIXME: Remove access to internals + name === :edge_ranges && return getfield(graph, :edge_ranges) + name === :edges && return getfield(graph, :edges) + name === :attributes && return getfield(graph, :attributes) + return getattr(graph, name) +end + +function sethead!(graph, id::NodeId, h::SyntaxHead) + graph.kind[id] = kind(h) + f = flags(h) + if f != 0 + graph.syntax_flags[id] = f + end +end + +function sethead!(graph, id::NodeId, k::Kind) + graph.kind[id] = k +end + +function _convert_nodes(graph::SyntaxGraph, node::SyntaxNode) + id = newnode!(graph) + sethead!(graph, id, head(node)) + if !isnothing(node.val) + v = node.val + if v isa Symbol + setattr!(graph, id, name_val=string(v)) + else + setattr!(graph, id, value=v) + end + end + setattr!(graph, id, source=SourceRef(node.source, node.position, node.raw)) + if haschildren(node) + cs = map(children(node)) do n + _convert_nodes(graph, n) + end + setchildren!(graph, id, cs) + end + return id +end + +#------------------------------------------------------------------------------- +struct SyntaxTree{GraphType} + graph::GraphType + id::NodeId +end + +function Base.getproperty(tree::SyntaxTree, name::Symbol) + # FIXME: Remove access to internals + name === :graph && return getfield(tree, :graph) + name === :id && return getfield(tree, :id) + id = getfield(tree, :id) + return get(getproperty(getfield(tree, :graph), name), id) do + error("Property `$name[$id]` not found") + end +end + +function Base.propertynames(tree::SyntaxTree) + attrnames(tree) +end + +function Base.get(tree::SyntaxTree, name::Symbol, default) + attr = getattr(getfield(tree, :graph), name, nothing) + return isnothing(attr) ? default : + get(attr, getfield(tree, :id), default) +end + +function Base.getindex(tree::SyntaxTree, i::Integer) + child(tree, i) +end + +function Base.getindex(tree::SyntaxTree, r::UnitRange) + (child(tree, i) for i in r) +end + +Base.firstindex(tree::SyntaxTree) = 1 +Base.lastindex(tree::SyntaxTree) = numchildren(tree) + +function hasattr(tree::SyntaxTree, name::Symbol) + attr = getattr(tree.graph, name, nothing) + return !isnothing(attr) && haskey(attr, tree.id) +end + +function attrnames(tree::SyntaxTree) + attrs = tree.graph.attributes + [name for (name, value) in pairs(attrs) if haskey(value, tree.id)] +end + +# JuliaSyntax tree API + +function JuliaSyntax.haschildren(tree::SyntaxTree) + haschildren(tree.graph, tree.id) +end + +function JuliaSyntax.numchildren(tree::SyntaxTree) + numchildren(tree.graph, tree.id) +end + +function JuliaSyntax.children(tree::SyntaxTree) + SyntaxList(tree.graph, children(tree.graph, tree.id)) +end + +function JuliaSyntax.child(tree::SyntaxTree, i::Integer) + SyntaxTree(tree.graph, child(tree.graph, tree.id, i)) +end + +function JuliaSyntax.head(tree::SyntaxTree) + SyntaxHead(kind(tree), flags(tree)) +end + +function JuliaSyntax.kind(tree::SyntaxTree) + tree.kind +end + +function JuliaSyntax.flags(tree::SyntaxTree) + get(tree, :syntax_flags, 0x0000) +end + + +# Reference to bytes within a source file +struct SourceRef + file::SourceFile + first_byte::Int + # TODO: Do we need the green node, or would last_byte suffice? + green_tree::GreenNode +end + +JuliaSyntax.first_byte(src::SourceRef) = src.first_byte +JuliaSyntax.last_byte(src::SourceRef) = src.first_byte + span(src.green_tree) - 1 +JuliaSyntax.filename(src::SourceRef) = filename(src.file) +JuliaSyntax.source_location(::Type{LineNumberNode}, src::SourceRef) = source_location(LineNumberNode, src.file, src.first_byte) +JuliaSyntax.source_location(src::SourceRef) = source_location(src.file, src.first_byte) + +function Base.show(io::IO, ::MIME"text/plain", src::SourceRef) + highlight(io, src.file, first_byte(src):last_byte(src), note="these are the bytes you're looking for 😊", context_lines_inner=20) +end + +function sourceref(tree::SyntaxTree) + sources = tree.graph.source + id = tree.id + while true + s = sources[id] + if s isa SourceRef + return s + else + id = s::NodeId + end + end +end + +JuliaSyntax.filename(tree::SyntaxTree) = return filename(sourceref(tree)) +JuliaSyntax.source_location(::Type{LineNumberNode}, tree::SyntaxTree) = source_location(LineNumberNode, sourceref(tree)) +JuliaSyntax.source_location(tree::SyntaxTree) = source_location(sourceref(tree)) +JuliaSyntax.first_byte(tree::SyntaxTree) = first_byte(sourceref(tree)) +JuliaSyntax.last_byte(tree::SyntaxTree) = last_byte(sourceref(tree)) + +function SyntaxTree(graph::SyntaxGraph, node::SyntaxNode) + ensure_attributes!(graph, kind=Kind, syntax_flags=UInt16, source=Union{SourceRef,NodeId}, + value=Any, name_val=String) + id = _convert_nodes(graph, node) + return SyntaxTree(graph, id) +end + +function SyntaxTree(node::SyntaxNode) + return SyntaxTree(SyntaxGraph(), node) +end + +attrsummary(name, value) = string(name) +attrsummary(name, value::Number) = "$name=$value" + +function _value_string(ex) + k = kind(ex) + str = k == K"Identifier" || is_operator(k) ? ex.name_val : + k == K"SSAValue" ? "ssa" : + k == K"core" ? "core.$(ex.name_val)" : + k == K"top" ? "top.$(ex.name_val)" : + k == K"slot" ? "slot" : + repr(get(ex, :value, nothing)) + id = get(ex, :var_id, nothing) + if !isnothing(id) + idstr = replace(string(id), + "0"=>"₀", "1"=>"₁", "2"=>"₂", "3"=>"₃", "4"=>"₄", + "5"=>"₅", "6"=>"₆", "7"=>"₇", "8"=>"₈", "9"=>"₉") + str = "$(str).$idstr" + end + if k == K"slot" + # TODO: Ideally shouldn't need to rewrap the id here... + srcex = SyntaxTree(ex.graph, ex.source) + str = "$(str)/$(srcex.name_val)" + end + return str +end + +function _show_syntax_tree(io, current_filename, node, indent, show_byte_offsets) + if hasattr(node, :source) + fname = filename(node) + line, col = source_location(node) + posstr = "$(lpad(line, 4)):$(rpad(col,3))" + if show_byte_offsets + posstr *= "│$(lpad(first_byte(node),6)):$(rpad(last_byte(node),6))" + end + else + fname = nothing + posstr = " " + if show_byte_offsets + posstr *= "│ " + end + end + val = get(node, :value, nothing) + nodestr = haschildren(node) ? "[$(untokenize(head(node)))]" : _value_string(node) + + treestr = string(indent, nodestr) + + std_attrs = Set([:name_val,:value,:kind,:syntax_flags,:source,:var_id]) + attrstr = join([attrsummary(n, getproperty(node, n)) for n in attrnames(node) if n ∉ std_attrs], ",") + if !isempty(attrstr) + treestr = string(rpad(treestr, 40), "│ $attrstr") + end + + # Add filename if it's changed from the previous node + if fname != current_filename[] && !isnothing(fname) + #println(io, "# ", fname) + treestr = string(rpad(treestr, 80), "│$fname") + current_filename[] = fname + end + println(io, posstr, "│", treestr) + if haschildren(node) + new_indent = indent*" " + for n in children(node) + _show_syntax_tree(io, current_filename, n, new_indent, show_byte_offsets) + end + end +end + +function Base.show(io::IO, ::MIME"text/plain", tree::SyntaxTree; show_byte_offsets=false) + println(io, "line:col│ tree │ attributes | file_name") + _show_syntax_tree(io, Ref{Union{Nothing,String}}(nothing), tree, "", show_byte_offsets) +end + +function _show_syntax_tree_sexpr(io, ex) + if !haschildren(ex) + if is_error(ex) + print(io, "(", untokenize(head(ex)), ")") + else + print(io, _value_string(ex)) + end + else + print(io, "(", untokenize(head(ex))) + first = true + for n in children(ex) + print(io, ' ') + _show_syntax_tree_sexpr(io, n) + first = false + end + print(io, ')') + end +end + +function Base.show(io::IO, ::MIME"text/x.sexpression", node::SyntaxTree) + _show_syntax_tree_sexpr(io, node) +end + +function Base.show(io::IO, node::SyntaxTree) + _show_syntax_tree_sexpr(io, node) +end + +#------------------------------------------------------------------------------- +# Lightweight vector of nodes ids with associated pointer to graph stored separately. +struct SyntaxList{GraphType, NodeIdVecType} <: AbstractVector{SyntaxTree} + graph::GraphType + ids::NodeIdVecType +end + +function SyntaxList(graph::SyntaxGraph, ids::AbstractVector{NodeId}) + SyntaxList{typeof(graph), typeof(ids)}(graph, ids) +end + +SyntaxList(graph::SyntaxGraph) = SyntaxList(graph, Vector{NodeId}()) +SyntaxList(ctx) = SyntaxList(ctx.graph) + +Base.size(v::SyntaxList) = size(v.ids) + +Base.IndexStyle(::Type{<:SyntaxList}) = IndexLinear() + +Base.getindex(v::SyntaxList, i::Int) = SyntaxTree(v.graph, v.ids[i]) + +function Base.setindex!(v::SyntaxList, tree::SyntaxTree, i::Int) + v.graph === tree.graph || error("Mismatching syntax graphs") + v.ids[i] = tree.id +end + +function Base.setindex!(v::SyntaxList, id::NodeId, i::Int) + v.ids[i] = id +end + +function Base.push!(v::SyntaxList, tree::SyntaxTree) + v.graph === tree.graph || error("Mismatching syntax graphs") + push!(v.ids, tree.id) +end + +function Base.append!(v::SyntaxList, exs) + for e in exs + push!(v, e) + end + v +end + +function Base.append!(v::SyntaxList, exs::SyntaxList) + v.graph === exs.graph || error("Mismatching syntax graphs") + append!(v.ids, exs.ids) + v +end + +function Base.push!(v::SyntaxList, id::NodeId) + push!(v.ids, id) +end + +function Base.pop!(v::SyntaxList) + SyntaxTree(v.graph, pop!(v.ids)) +end + +#------------------------------------------------------------------------------- + +function JuliaSyntax.build_tree(::Type{SyntaxTree}, stream::JuliaSyntax.ParseStream; kws...) + SyntaxTree(build_tree(SyntaxNode, stream; kws...)) +end + diff --git a/JuliaLowering/src/utils.jl b/JuliaLowering/src/utils.jl new file mode 100644 index 0000000000000..87136cbc73392 --- /dev/null +++ b/JuliaLowering/src/utils.jl @@ -0,0 +1,112 @@ +# Error handling + +TODO(msg) = throw(ErrorException("Lowering TODO: $msg")) +TODO(ex, msg) = throw(LoweringError(ex, "Lowering TODO: $msg")) + +# Errors found during lowering will result in LoweringError being thrown to +# indicate the syntax causing the error. +struct LoweringError <: Exception + ex::SyntaxTree + msg::String +end + +function Base.showerror(io::IO, exc::LoweringError) + print(io, "LoweringError:\n") + src = sourceref(exc.ex) + highlight(io, src.file, first_byte(src):last_byte(src), note=exc.msg) +end + +function _chk_code(ex, cond) + cond_str = string(cond) + quote + ex = $(esc(ex)) + @assert ex isa SyntaxTree + try + ok = $(esc(cond)) + if !ok + throw(LoweringError(ex, "Expected `$($cond_str)`")) + end + catch + throw(LoweringError(ex, "Structure error evaluating `$($cond_str)`")) + end + end +end + +# Internal error checking macro. +# Check a condition involving an expression, throwing a LoweringError if it +# doesn't evaluate to true. Does some very simple pattern matching to attempt +# to extract the expression variable from the left hand side. +macro chk(cond) + ex = cond + while true + if ex isa Symbol + break + elseif ex.head == :call + ex = ex.args[2] + elseif ex.head == :ref + ex = ex.args[1] + elseif ex.head == :. + ex = ex.args[1] + elseif ex.head in (:(==), :(in), :<, :>) + ex = ex.args[1] + else + error("Can't analyze $cond") + end + end + _chk_code(ex, cond) +end + +macro chk(ex, cond) + _chk_code(ex, cond) +end + + +#------------------------------------------------------------------------------- +# CodeInfo constructor. TODO: Should be in Core? +function _CodeInfo(code, + codelocs, + ssavaluetypes, + ssaflags, + method_for_inference_limit_heuristics, + linetable, + slotnames, + slotflags, + slottypes, + rettype, + parent, + edges, + min_world, + max_world, + inferred, + propagate_inbounds, + has_fcall, + nospecializeinfer, + inlining, + constprop, + purity, + inlining_cost) + @eval $(Expr(:new, :(Core.CodeInfo), + convert(Vector{Any}, code), + convert(Vector{Int32}, codelocs), + convert(Any, ssavaluetypes), + convert(Vector{UInt32}, ssaflags), + convert(Any, method_for_inference_limit_heuristics), + convert(Any, linetable), + convert(Vector{Symbol}, slotnames), + convert(Vector{UInt8}, slotflags), + convert(Any, slottypes), + convert(Any, rettype), + convert(Any, parent), + convert(Any, edges), + convert(UInt64, min_world), + convert(UInt64, max_world), + convert(Bool, inferred), + convert(Bool, propagate_inbounds), + convert(Bool, has_fcall), + convert(Bool, nospecializeinfer), + convert(UInt8, inlining), + convert(UInt8, constprop), + convert(UInt16, purity), + convert(UInt16, inlining_cost))) +end + diff --git a/JuliaLowering/test/lowering.jl b/JuliaLowering/test/lowering.jl new file mode 100644 index 0000000000000..cce88637c1a10 --- /dev/null +++ b/JuliaLowering/test/lowering.jl @@ -0,0 +1,81 @@ +# Just some hacking + +using JuliaSyntax +using JuliaLowering + +using JuliaLowering: SyntaxGraph, SyntaxTree, ensure_attributes!, newnode!, setchildren!, haschildren, children, child, setattr!, sourceref + +src = """ +let + y = 1 + x = 2 + let x = sin(x) + y = x + end + (x, y) +end +""" + +# src = """ +# let +# local x, (y = 2), (w::T = ww), q::S +# end +# """ + +# src = """ +# function foo(x::f(T), y::w(let ; S end)) +# "a \$("b \$("c")")" +# end +# """ + +# src = """ +# let +# function f() Int end +# function foo(y::f(a)) +# y +# end +# end +# """ + + +# src = """ +# x + y +# """ + +t = parsestmt(SyntaxNode, src, filename="foo.jl") + +ctx = JuliaLowering.DesugaringContext() + +t2 = SyntaxTree(ctx.graph, t) + +t3 = JuliaLowering.expand_forms(ctx, t2) + +ctx2 = JuliaLowering.ScopeResolutionContext(ctx) + +t4 = JuliaLowering.resolve_scopes!(ctx2, t3) + +@info "Resolved scopes" t4 + +code = JuliaLowering.compile_toplevel(ctx2, Main, t4) + +@info "Code" code + + +# flisp parts to do +# let +# desugar/let => 76 +# desugar/func => ~100 (partial) +# desugar/call => 70 +# handle-scopes => 195 +# handle-scopes/scope-block => 99 +# handle-scopes/locals => 16 +# linear-ir => 250 (partial, approximate) +# linear-ir/func => 22 + + +# Syntax tree ideas: Want following to work? +# This can be fully inferrable! +# +# t2[3].bindings[1].lhs.string +# t2[3].body[1].signature + From 199fcbdbffbe8059d465fa399d2a403aaceee8f5 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Wed, 3 Apr 2024 07:00:36 +1000 Subject: [PATCH 0732/1109] Full scope resolution + basic method lowering This contains a mostly complete implementation of scope resolution, including soft/hard scope rules at top level. Still needs a lot of testing. Also make lowering of very basic method defintions work. --- JuliaLowering/README.md | 13 + JuliaLowering/src/JuliaLowering.jl | 4 +- JuliaLowering/src/desugaring.jl | 19 +- JuliaLowering/src/linear_ir.jl | 204 ++++++------- JuliaLowering/src/scope_analysis.jl | 457 +++++++++++++++++----------- JuliaLowering/test/lowering.jl | 59 ++-- 6 files changed, 435 insertions(+), 321 deletions(-) diff --git a/JuliaLowering/README.md b/JuliaLowering/README.md index 4a123fe543313..477b818ffa3fe 100644 --- a/JuliaLowering/README.md +++ b/JuliaLowering/README.md @@ -178,6 +178,19 @@ We traverse the AST starting at the root paying attention to certian nodes: * Build new scope with table of renames * Resolve the body with the new scope, applying the renames +### Intermediate forms used in lowering + +* `local-def` - flisp code explains this as + - "a local that we know has an assignment that dominates all usages" + - "local declaration of a defined variable" + +There's also this comment in https://github.com/JuliaLang/julia/issues/22314: + +> mark the [...] variable as local-def, which would prevent it from getting Core.Boxed during the closure conversion it'll be detected as known-SSA + +But maybe that's confusing. It seems like `local-def` is a local which lowering +asserts is "always defined" / "definitely initialized before use". But it's not +necessarily single-assign, so not SSA. ### Lowered IR diff --git a/JuliaLowering/src/JuliaLowering.jl b/JuliaLowering/src/JuliaLowering.jl index a4f6cd513f55f..ac7c62a1f61a9 100644 --- a/JuliaLowering/src/JuliaLowering.jl +++ b/JuliaLowering/src/JuliaLowering.jl @@ -4,9 +4,9 @@ using JuliaSyntax using JuliaSyntax: SyntaxHead, highlight, Kind, GreenNode, @KSet_str using JuliaSyntax: haschildren, children, child, numchildren, head, kind, flags -using JuliaSyntax: filename, first_byte, last_byte, source_location +using JuliaSyntax: filename, first_byte, last_byte, source_location, span -using JuliaSyntax: is_literal, is_number, is_operator, is_prec_assignment, is_infix_op_call, is_postfix_op_call +using JuliaSyntax: is_literal, is_number, is_operator, is_prec_assignment, is_infix_op_call, is_postfix_op_call, is_error include("syntax_graph.jl") include("utils.jl") diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 4529781b38830..6530ba976a5da 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -8,9 +8,11 @@ struct SSAVar end struct LambdaInfo - # TODO: Make this concretely typed? + # TODO: Make SyntaxList concretely typed? args::SyntaxList + static_parameters::SyntaxList ret_var::Union{Nothing,SyntaxTree} + is_toplevel_thunk::Bool end abstract type AbstractLoweringContext end @@ -345,6 +347,7 @@ function expand_function_def(ctx, ex) # argument destructuring # dotop names # overlays + static_parameters = SyntaxList(ctx) # Add self argument where necessary args = name[2:end] @@ -368,6 +371,7 @@ function expand_function_def(ctx, ex) makenode(ctx, name, K"call", core_ref(ctx, name, "Typeof"), name)) function_name = name end + args = pushfirst!(collect(args), farg) # preamble is arbitrary code which computes # svec(types, sparms, location) @@ -396,23 +400,22 @@ function expand_function_def(ctx, ex) makenode(ctx, callex, K"call", svec_type(ctx, name), arg_types...), - makenode(ctx, callex, K"Value", value=source_location(LineNumberNode, callex)) + makenode(ctx, callex, K"call", + svec_type(ctx, name)), # FIXME sparams + makenode(ctx, callex, K"Value", value=QuoteNode(source_location(LineNumberNode, callex))) ) if !isnothing(return_type) ret_var, ret_assign = assign_tmp(ctx, return_type) body = makenode(ctx, body, K"block", ret_assign, - body, - scope_type=:hard) + body) else ret_var = nothing - body = makenode(ctx, body, K"block", - body, - scope_type=:hard) end lambda = makenode(ctx, body, K"lambda", body, - lambda_info=LambdaInfo(arg_names, ret_var)) + lambda_info=LambdaInfo(arg_names, static_parameters, ret_var, false)) makenode(ctx, ex, K"block", + makenode(ctx, ex, K"method", function_name), makenode(ctx, ex, K"method", function_name, preamble, diff --git a/JuliaLowering/src/linear_ir.jl b/JuliaLowering/src/linear_ir.jl index 874693105575c..c75ee1ff5af27 100644 --- a/JuliaLowering/src/linear_ir.jl +++ b/JuliaLowering/src/linear_ir.jl @@ -37,12 +37,7 @@ struct LinearIRContext{GraphType} <: AbstractLoweringContext mod::Module end -function LinearIRContext(ctx::ScopeResolutionContext, mod, return_type) - LinearIRContext(ctx.graph, SyntaxList(ctx.graph), ctx.next_var_id, - return_type, ctx.var_info, mod) -end - -function LinearIRContext(ctx::LinearIRContext, return_type) +function LinearIRContext(ctx, return_type) LinearIRContext(ctx.graph, SyntaxList(ctx.graph), ctx.next_var_id, return_type, ctx.var_info, ctx.mod) end @@ -211,6 +206,14 @@ function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) elseif k == K"return" compile(ctx, ex[1], true, true) nothing + elseif k == K"unnecessary" + # `unnecessary` marks expressions generated by lowering that + # do not need to be evaluated if their value is unused. + if needs_value + compile(ctx, ex[1], needs_value, in_tail_pos) + else + nothing + end elseif k == K"method" # TODO # throw(LoweringError(ex, @@ -287,8 +290,8 @@ function _renumber(ctx, ssa_rewrites, slot_rewrites, label_table, ex) makenode(ctx, ex, K"SSAValue"; var_id=ssa_rewrites[ex.var_id]) elseif k == K"goto" || k == K"enter" || k == K"gotoifnot" TODO(ex, "_renumber $k") - # elseif k == K"lambda" - # renumber_lambda(ctx, ex) + elseif k == K"lambda" + ex else mapchildren(ctx, ex) do e _renumber(ctx, ssa_rewrites, slot_rewrites, label_table, e) @@ -298,9 +301,6 @@ function _renumber(ctx, ssa_rewrites, slot_rewrites, label_table, ex) end end -function _ir_to_expr() -end - # flisp: renumber-lambda, compact-ir function renumber_body(ctx, input_code, slot_rewrites) # Step 1: Remove any assignments to SSA variables, record the indices of labels @@ -339,49 +339,51 @@ function renumber_body(ctx, input_code, slot_rewrites) code end -function to_ir_expr(ex) - k = kind(ex) - if is_literal(k) - ex.value - elseif k == K"core" - GlobalRef(Core, Symbol(ex.name_val)) - elseif k == K"top" - GlobalRef(Base, Symbol(ex.name_val)) - elseif k == K"Identifier" - # Implicitly refers to name in parent module - # TODO: Should we even have plain identifiers at this point or should - # they all effectively be resolved into GlobalRef earlier? - Symbol(ex.name_val) - elseif k == K"slot" - Core.SlotNumber(ex.var_id) - elseif k == K"SSAValue" - Core.SSAValue(ex.var_id) - elseif k == K"return" - Core.ReturnNode(to_ir_expr(ex[1])) - elseif is_quoted(k) - TODO(ex, "Convert SyntaxTree to Expr") - else - # Allowed forms according to https://docs.julialang.org/en/v1/devdocs/ast/ - # - # call invoke static_parameter `=` method struct_type abstract_type - # primitive_type global const new splatnew isdefined the_exception - # enter leave pop_exception inbounds boundscheck loopinfo copyast meta - # foreigncall new_opaque_closure lambda - head = k == K"call" ? :call : - k == K"=" ? :(=) : - k == K"method" ? :method : - k == K"global" ? :global : - k == K"const" ? :const : - nothing - if isnothing(head) - TODO(ex, "Unhandled form") +# flisp: compile-body +function compile_body(ctx, ex) + compile(ctx, ex, true, true) + # TODO: Fix any gotos + # TODO: Filter out any newvar nodes where the arg is definitely initialized +end + +function _add_slots!(slot_rewrites, var_info, var_ids) + n = length(slot_rewrites) + 1 + for id in var_ids + info = var_info[id] + if info.kind == :local || info.kind == :argument + slot_rewrites[id] = n + n += 1 end - Expr(head, map(to_ir_expr, children(ex))...) end + slot_rewrites end + +function compile_lambda(outer_ctx, ex) + lambda_info = ex.lambda_info + return_type = nothing # FIXME + # TODO: Add assignments for reassigned arguments to body using lambda_info.args + ctx = LinearIRContext(outer_ctx, return_type) + compile_body(ctx, ex[1]) + slot_rewrites = Dict{VarId,Int}() + _add_slots!(slot_rewrites, ctx.var_info, (arg.var_id for arg in lambda_info.args)) + _add_slots!(slot_rewrites, ctx.var_info, ex.lambda_locals) + @info "" slot_rewrites + code = renumber_body(ctx, ctx.code, slot_rewrites) + makenode(ctx, ex, K"lambda", + makenode(ctx, ex[1], K"block", code), + lambda_info=lambda_info, + slot_rewrites=slot_rewrites + ) +end + + +#------------------------------------------------------------------------------- +# Conversion to Expr + CodeInfo + # Convert our data structures to CodeInfo -function to_code_info(input_code, mod, funcname, var_info, slot_rewrites) +function to_code_info(ex, in_mod, funcname, var_info, slot_rewrites) + input_code = children(ex) # Convert code to Expr and record low res locations in table num_stmts = length(input_code) code = Vector{Any}(undef, num_stmts) @@ -389,13 +391,13 @@ function to_code_info(input_code, mod, funcname, var_info, slot_rewrites) linetable_map = Dict{Tuple{Int,String}, Int32}() linetable = Any[] for i in 1:length(code) - code[i] = to_ir_expr(input_code[i]) + code[i] = to_expr(in_mod, var_info, input_code[i]) fname = filename(input_code[i]) lineno, _ = source_location(input_code[i]) loc = (lineno, fname) codelocs[i] = get!(linetable_map, loc) do inlined_at = 0 # FIXME: nonzero for expanded macros - full_loc = Core.LineInfoNode(mod, Symbol(funcname), Symbol(fname), + full_loc = Core.LineInfoNode(in_mod, Symbol(funcname), Symbol(fname), Int32(lineno), Int32(inlined_at)) push!(linetable, full_loc) length(linetable) @@ -447,58 +449,56 @@ function to_code_info(input_code, mod, funcname, var_info, slot_rewrites) ) end -function renumber_lambda(ctx, lambda_info, code) - slot_rewrites = Dict{VarId,Tuple{Kind,Int}}() - # lambda arguments become K"slot"; type parameters become K"static_parameter" - info = ex.lambda_info - for (i,arg) in enumerate(info.args) - slot_rewrites[arg.var_id] = i - end - # TODO: add static_parameter here also - renumber_body(ctx, code, slot_rewrites) -end - -# flisp: compile-body -function compile_body(ctx, ex) - compile(ctx, ex, true, true) - # TODO: Fix any gotos - # TODO: Filter out any newvar nodes where the arg is definitely initialized -end - -function _add_slots!(slot_rewrites, var_info, var_ids) - n = length(slot_rewrites) + 1 - for id in var_ids - info = var_info[id] - if info.islocal - slot_rewrites[id] = n - n += 1 +function to_expr(in_mod, var_info, ex) + k = kind(ex) + if is_literal(k) + ex.value + elseif k == K"core" + GlobalRef(Core, Symbol(ex.name_val)) + elseif k == K"top" + GlobalRef(Base, Symbol(ex.name_val)) + elseif k == K"Identifier" + # Implicitly refers to name in parent module + # TODO: Should we even have plain identifiers at this point or should + # they all effectively be resolved into GlobalRef earlier? + Symbol(ex.name_val) + elseif k == K"slot" + Core.SlotNumber(ex.var_id) + elseif k == K"SSAValue" + Core.SSAValue(ex.var_id) + elseif k == K"return" + Core.ReturnNode(to_expr(in_mod, var_info, ex[1])) + elseif is_quoted(k) + TODO(ex, "Convert SyntaxTree to Expr") + elseif k == K"lambda" + funcname = ex.lambda_info.is_toplevel_thunk ? + "top-level scope" : + "none" # FIXME + ir = to_code_info(ex[1], in_mod, funcname, var_info, ex.slot_rewrites) + if ex.lambda_info.is_toplevel_thunk + Expr(:thunk, ir) + else + ir + end + elseif k == K"Value" + ex.value + else + # Allowed forms according to https://docs.julialang.org/en/v1/devdocs/ast/ + # + # call invoke static_parameter `=` method struct_type abstract_type + # primitive_type global const new splatnew isdefined the_exception + # enter leave pop_exception inbounds boundscheck loopinfo copyast meta + # foreigncall new_opaque_closure lambda + head = k == K"call" ? :call : + k == K"=" ? :(=) : + k == K"method" ? :method : + k == K"global" ? :global : + k == K"const" ? :const : + nothing + if isnothing(head) + TODO(ex, "Unhandled form for kind $k") end + Expr(head, map(e->to_expr(in_mod, var_info, e), children(ex))...) end - slot_rewrites -end - -function compile_lambda(outer_ctx, ex) - info = ex.lambda_info - return_type = nothing # FIXME - # TODO: Add assignments for reassigned arguments to body using info.args - ctx = LinearIRContext(outer_ctx, return_type) - compile_body(ctx, ex[1]) - slot_rewrites = Dict{VarId,Int}() - _add_slots!(slot_rewrites, ctx.var_info, (a.var_id for a in info.args)) - _add_slots!(slot_rewrites, ctx.var_info, ex.lambda_vars) - code = renumber_body(ctx, ctx.code, slot_rewrites) - to_code_info(code, ctx.mod, "none", ctx.var_info, slot_rewrites) -end - -function compile_toplevel(outer_ctx, mod, ex) - return_type = nothing - ctx = LinearIRContext(outer_ctx, mod, return_type) - compile_body(ctx, ex) - slot_rewrites = Dict{VarId,Int}() - _add_slots!(slot_rewrites, ctx.var_info, ex.lambda_vars) - code = renumber_body(ctx, ctx.code, slot_rewrites) - to_code_info(code, mod, "top-level scope", ctx.var_info, slot_rewrites) - #var_info = nothing # FIXME - #makenode(ctx, ex, K"Value"; value=LambdaIR(SyntaxList(ctx), ctx.code, var_info)) end diff --git a/JuliaLowering/src/scope_analysis.jl b/JuliaLowering/src/scope_analysis.jl index 1bfdc7f551783..a664248f6377c 100644 --- a/JuliaLowering/src/scope_analysis.jl +++ b/JuliaLowering/src/scope_analysis.jl @@ -13,214 +13,296 @@ #------------------------------------------------------------------------------- # AST traversal functions - useful for performing non-recursive AST traversals -function _schedule_traverse(stack, e) - push!(stack, e) - return nothing -end -function _schedule_traverse(stack, es::Union{Tuple,AbstractVector,Base.Generator}) - append!(stack, es) - return nothing -end - -function traverse_ast(f, exs) - todo = SyntaxList(first(exs).graph) - append!(todo, exs) - while !isempty(todo) - f(pop!(todo), e->_schedule_traverse(todo, e)) - end -end - -function traverse_ast(f, ex::SyntaxTree) - traverse_ast(f, (ex,)) -end +# function _schedule_traverse(stack, e) +# push!(stack, e) +# return nothing +# end +# function _schedule_traverse(stack, es::Union{Tuple,AbstractVector,Base.Generator}) +# append!(stack, es) +# return nothing +# end +# +# function traverse_ast(f, exs) +# todo = SyntaxList(first(exs).graph) +# append!(todo, exs) +# while !isempty(todo) +# f(pop!(todo), e->_schedule_traverse(todo, e)) +# end +# end +# +# function traverse_ast(f, ex::SyntaxTree) +# traverse_ast(f, (ex,)) +# end +# +# function find_in_ast(f, ex::SyntaxTree) +# todo = SyntaxList(ex.graph) +# push!(todo, ex) +# while !isempty(todo) +# e1 = pop!(todo) +# res = f(e1, e->_schedule_traverse(todo, e)) +# if !isnothing(res) +# return res +# end +# end +# return nothing +# end -function find_in_ast(f, ex::SyntaxTree) - todo = SyntaxList(ex.graph) - push!(todo, ex) - while !isempty(todo) - e1 = pop!(todo) - res = f(e1, e->_schedule_traverse(todo, e)) - if !isnothing(res) - return res - end - end - return nothing -end -# NB: This only really works after expand_forms has already processed assignments. -function find_scope_vars(ex, children_only) - assigned_vars = Set{String}() - # TODO: - # local_vars - local_def_vars = Set{String}() - # global_vars - used_vars = Set{String}() - traverse_ast(children_only ? children(ex) : ex) do e, traverse - k = kind(e) - if k == K"Identifier" - push!(used_vars, e.name_val) - elseif !haschildren(e) || hasattr(e, :scope_type) || is_quoted(k) || - k in KSet"lambda module toplevel" - return - elseif k == K"local_def" - push!(local_def_vars, e[1].name_val) - # elseif k == K"method" TODO static parameters - elseif k == K"=" - v = decl_var(e[1]) - if !(kind(v) in KSet"SSAValue globalref outerref" || is_placeholder(v)) - push!(assigned_vars, v.name_val) - end - traverse(e[2]) - else - traverse(children(e)) +#------------------------------------------------------------------------------- +function _find_scope_vars!(assignments, locals, globals, used_names, ex) + k = kind(ex) + if k == K"Identifier" + push!(used_names, ex.name_val) + elseif !haschildren(ex) || hasattr(ex, :scope_type) || is_quoted(k) || + k in KSet"lambda module toplevel" + return + elseif k == K"local" || k == K"local_def" + name = ex[1].name_val + get!(locals, name, ex) + elseif k == K"global" + name = ex[1].name_val + get!(globals, name, ex) + # elseif k == K"method" TODO static parameters + elseif k == K"=" + v = decl_var(ex[1]) + if !(kind(v) in KSet"SSAValue globalref outerref" || is_placeholder(v)) + get!(assignments, v.name_val, v) end - end - return assigned_vars, local_def_vars, used_vars -end - -function find_decls(decl_kind, ex) - vars = Vector{typeof(ex)}() - traverse_ast(ex) do e, traverse - k = kind(e) - if !haschildren(e) || is_quoted(k) || k in KSet"lambda scope_block module toplevel" - return - elseif k == decl_kind - v = decl_var(e[1]) - if !is_placeholder(v) - push!(vars, decl_var(v)) - end - else - traverse(children(e)) + _find_scope_vars!(assignments, locals, globals, used_names, ex[2]) + else + for e in children(ex) + _find_scope_vars!(assignments, locals, globals, used_names, e) end end - var_names = [v.name_val for v in vars] - return unique(var_names) end -# Determine whether decl_kind is in the scope of `ex` +# Find names of all identifiers used in the given expression, grouping them +# into sets by type. # -# flisp: find-scope-decl -function has_scope_decl(decl_kind, ex) - find_in_ast(ex) do e, traverse - k = kind(e) - if !haschildren(e) || is_quoted(k) || k in KSet"lambda scope_block module toplevel" - return - elseif k == decl_kind - return e - else - traverse(children(ex)) - end +# NB: This only works propery after expand_forms has already processed assignments +function find_scope_vars(ex) + ExT = typeof(ex) + assignments = Dict{String,ExT}() + locals = Dict{String,ExT}() + globals = Dict{String,ExT}() + used_names = Set{String}() + for e in children(ex) + _find_scope_vars!(assignments, locals, globals, used_names, e) end + return assignments, locals, globals, used_names end -# struct LambdaVars -# # For analyze-variables pass -# # var_info_lst::Set{Tuple{Symbol,Symbol}} # ish? -# # captured_var_infos ?? -# # ssalabels::Set{SSAValue} -# # static_params::Set{Symbol} -# end - -# Mirror of flisp scope info structure -# struct ScopeInfo -# lambda_vars::Union{LambdaLocals,LambdaInfo} -# parent::Union{Nothing,ScopeBlockInfo} -# args::Set{Symbol} -# locals::Set{Symbol} -# globals::Set{Symbol} -# static_params::Set{Symbol} -# renames::Dict{Symbol,Symbol} -# implicit_globals::Set{Symbol} -# warn_vars::Set{Symbol} -# is_soft::Bool -# is_hard::Bool -# table::Dict{Symbol,Any} -# end - """ Metadata about a variable name - whether it's a local, etc """ struct VarInfo - name::String - islocal::Bool # Local variable (if unset, variable is global) - isarg::Bool # Is a function argument - is_single_assign::Bool # Single assignment + name::String # Variable name + kind::Symbol # :local :global :argument :static_parameter + is_single_assign::Bool # Single assignment + is_ambiguous_local::Bool # Local, but would be global in soft scope (ie, the REPL) +end + +struct ScopeInfo + # True if scope is part of top level code, or a non-lambda scope nested + # inside top level code. Thus requiring special scope resolution rules. + in_toplevel_thunk::Bool + # Soft/hard scope. For top level thunks only + is_soft::Bool + is_hard::Bool + # Map from variable names to IDs which appear in this scope but not in the + # parent scope + var_ids::Dict{String,VarId} + # Variables used by the enclosing lambda + lambda_locals::Set{VarId} end struct ScopeResolutionContext{GraphType} <: AbstractLoweringContext graph::GraphType next_var_id::Ref{VarId} + mod::Module + # name=>id mappings for all discovered global vars + global_vars::Dict{String,VarId} # Stack of name=>id mappings for each scope, innermost scope last. - var_id_stack::Vector{Dict{String,VarId}} - # Stack of var `id`s for lambda (or toplevel thunk) being processed, innermost last. - lambda_vars::Vector{Set{VarId}} + scope_stack::Vector{ScopeInfo} # Metadata about variables. There's only one map for this, as var_id is is # unique across the context, even for same-named vars in unrelated local # scopes. var_info::Dict{VarId,VarInfo} + # Variables which were implicitly global due to being assigned to in top + # level code top level + implicit_toplevel_globals::Set{String} end -function ScopeResolutionContext(ctx::DesugaringContext) - graph = ensure_attributes(ctx.graph, lambda_vars=Set{VarId}) - ScopeResolutionContext(graph, ctx.next_var_id, - Vector{Dict{String,VarId}}(), - [Set{VarId}()], - Dict{VarId,VarInfo}()) +function ScopeResolutionContext(ctx::DesugaringContext, mod::Module) + # FIXME: Add slot_rewrites later + graph = ensure_attributes(ctx.graph, lambda_locals=Set{VarId}, slot_rewrites=Dict{VarId,Int}) + ScopeResolutionContext(graph, + ctx.next_var_id, + mod, + Dict{String,VarId}(), + Vector{ScopeInfo}(), + Dict{VarId,VarInfo}(), + Set{String}()) end -function lookup_var(ctx, name) - for i in lastindex(ctx.var_id_stack):-1:1 - ids = ctx.var_id_stack[i] +function lookup_var(ctx, name::String, exclude_toplevel_globals=false) + for i in lastindex(ctx.scope_stack):-1:1 + ids = ctx.scope_stack[i].var_ids id = get(ids, name, nothing) - if !isnothing(id) + if !isnothing(id) && (!exclude_toplevel_globals || + i > 1 || ctx.var_info[id].kind != :global) return id end end - return nothing + return exclude_toplevel_globals ? nothing : get(ctx.global_vars, name, nothing) +end + +function current_scope(ctx) + last(ctx.scope_stack) +end + +function var_kind(ctx, id::VarId) + ctx.var_info[id].kind end -function new_var(ctx, name; isarg=false, islocal=isarg) - id = new_var_id(ctx) - ctx.var_info[id] = VarInfo(name, islocal, isarg, false) - push!(last(ctx.lambda_vars), id) +function var_kind(ctx, name::String, exclude_toplevel_globals=false) + id = lookup_var(ctx, name, exclude_toplevel_globals) + isnothing(id) ? nothing : ctx.var_info[id].kind +end + +function new_var(ctx, name, kind, is_ambiguous_local=false) + id = kind === :global ? get(ctx.global_vars, name, nothing) : nothing + if isnothing(id) + id = new_var_id(ctx) + ctx.var_info[id] = VarInfo(name, kind, false, is_ambiguous_local) + end + if kind === :global + ctx.global_vars[name] = id + end id end -function resolve_scope!(f::Function, ctx, ex, is_toplevel) - id_map = Dict{String,VarId}() - is_hard_scope = get(ex, :scope_type, :hard) == :hard - assigned, local_def_vars, used_vars = find_scope_vars(ex, !is_toplevel) - for name in local_def_vars - id_map[name] = new_var(ctx, name, islocal=true) +# Analyze identifier usage within a scope, adding all newly discovered +# identifiers to ctx.var_info and constructing a lookup table from identifier +# names to their variable IDs +function make_scope(ctx, ex, scope_type, lambda_info) + parentscope = isempty(ctx.scope_stack) ? nothing : current_scope(ctx) + is_outer_lambda_scope = kind(ex) == K"lambda" + is_toplevel = !isnothing(lambda_info) && lambda_info.is_toplevel_thunk + in_toplevel_thunk = is_toplevel || (!is_outer_lambda_scope && parentscope.in_toplevel_thunk) + + assignments, locals, globals, used = find_scope_vars(ex) + + # Create new lookup table for variables in this scope which differ from the + # parent scope. + var_ids = Dict{String,VarId}() + + # Add lambda arguments + if !isnothing(lambda_info) + for a in lambda_info.args + var_ids[a.name_val] = new_var(ctx, a.name_val, :argument) + end + for a in lambda_info.static_parameters + var_ids[a.name_val] = new_var(ctx, a.name_val, :static_parameter) + end end - for name in assigned - if !haskey(id_map, name) && isnothing(lookup_var(ctx, name)) - # Previously unknown assigned vars are impicit locals or globals - id_map[name] = new_var(ctx, name, islocal=!is_toplevel) + + # Add explicit locals + for (name,e) in pairs(locals) + if haskey(globals, name) + throw(LoweringError(e, "Variable `$name` declared both local and global")) + elseif haskey(var_ids, name) + vk = ctx.var_info[var_ids[name]].kind + if vk === :argument && is_outer_lambda_scope + throw(LoweringError(e, "local variable name `$name` conflicts with an argument")) + elseif vk === :static_parameter + throw(LoweringError(e, "local variable name `$name` conflicts with a static parameter")) + end + elseif var_kind(ctx, name) === :static_parameter + throw(LoweringError(e, "local variable name `$name` conflicts with a static parameter")) end + var_ids[name] = new_var(ctx, name, :local) end - outer_scope = is_toplevel ? id_map : ctx.var_id_stack[1] - for name in used_vars - if !haskey(id_map, name) && isnothing(lookup_var(ctx, name)) - # Identifiers which weren't discovered further up the stack are - # newly discovered globals - outer_scope[name] = new_var(ctx, name, islocal=false) + + # Add explicit globals + for (name,e) in pairs(globals) + if haskey(var_ids, name) + vk = ctx.var_info[var_ids[name]].kind + if vk === :argument && is_outer_lambda_scope + throw(LoweringError(e, "global variable name `$name` conflicts with an argument")) + elseif vk === :static_parameter + throw(LoweringError(e, "global variable name `$name` conflicts with a static parameter")) + end + elseif var_kind(ctx, name) === :static_parameter + throw(LoweringError(e, "global variable name `$name` conflicts with a static parameter")) end + var_ids[name] = new_var(ctx, name, :global) end - push!(ctx.var_id_stack, id_map) - res = f(ctx) - pop!(ctx.var_id_stack) - return res -end -resolve_scopes!(ctx::DesugaringContext, ex) = resolve_scopes!(ScopeResolutionContext(ctx), ex) + # Compute implicit locals and globals + if is_toplevel + is_hard_scope = false + is_soft_scope = false -function resolve_scopes!(ctx::ScopeResolutionContext, ex) - resolve_scope!(ctx, ex, true) do cx - resolve_scopes_!(cx, ex) + # All non-local assignments are implicitly global at top level + for (name,e) in assignments + if !haskey(locals, name) + push!(ctx.implicit_toplevel_globals, name) + end + end + else + is_hard_scope = in_toplevel_thunk && (parentscope.is_hard || scope_type === :hard) + is_soft_scope = in_toplevel_thunk && !is_hard_scope && + (scope_type === :neutral ? parentscope.is_soft : scope_type === :soft) + + # Outside top level code, most assignments create local variables implicitly + for (name,e) in assignments + vk = haskey(var_ids, name) ? + ctx.var_info[var_ids[name]].kind : + var_kind(ctx, name, true) + if vk === :static_parameter + throw(LoweringError(e, "local variable name `$name` conflicts with a static parameter")) + elseif vk !== nothing + continue + end + # Assignment is to a newly discovered variable name + is_ambiguous_local = false + if in_toplevel_thunk && !is_hard_scope + # In a top level thunk but *inside* a nontrivial scope + if (name in ctx.implicit_toplevel_globals || isdefined(ctx.mod, Symbol(name))) + # Special scope rules to make assignments to globals work + # like assignments to locals do inside a function. + if is_soft_scope + # Soft scope (eg, for loop in REPL) => treat as a global + new_var(ctx, name, :global) + continue + else + # Ambiguous case (eg, nontrivial scopes in package top level code) + # => Treat as local but generate warning when assigned to + is_ambiguous_local = true + end + end + end + var_ids[name] = new_var(ctx, name, :local, is_ambiguous_local) + end + end + + for name in used + if lookup_var(ctx, name) === nothing + # Add other newly discovered identifiers as globals + new_var(ctx, name, :global) + end end - setattr!(ctx.graph, ex.id, lambda_vars=only(ctx.lambda_vars)) - SyntaxTree(ctx.graph, ex.id) + + lambda_locals = is_outer_lambda_scope ? Set{VarId}() : parentscope.lambda_locals + for id in values(var_ids) + vk = var_kind(ctx, id) + if vk === :local + push!(lambda_locals, id) + end + end + + return ScopeInfo(in_toplevel_thunk, is_soft_scope, is_hard_scope, var_ids, lambda_locals) end function resolve_scopes_!(ctx, ex) @@ -230,40 +312,40 @@ function resolve_scopes_!(ctx, ex) return # FIXME - make these K"placeholder"? end # TODO: Maybe we shouldn't do this in place?? - setattr!(ctx.graph, ex.id, var_id=lookup_var(ctx, ex.name_val)) + id = lookup_var(ctx, ex.name_val) + setattr!(ctx.graph, ex.id, var_id=id) elseif !haschildren(ex) || is_quoted(ex) || k == K"toplevel" return - elseif k == K"global" - TODO("global") - elseif k == K"local" - TODO("local") # TODO + # elseif k == K"global" + # elseif k == K"local" # elseif require_existing_local # elseif locals # return Dict of locals # elseif islocal elseif k == K"lambda" - # TODO: Lambda captures! - info = ex.lambda_info - id_map = Dict{String,VarId}() - for a in info.args - id_map[a.name_val] = new_var(ctx, a.name_val, isarg=true) + lambda_info = ex.lambda_info + scope = make_scope(ctx, ex, nothing, lambda_info) + push!(ctx.scope_stack, scope) + # Resolve args and static parameters so that variable IDs get pushed + # back into the original tree (not required for downstream processing) + for a in lambda_info.args + resolve_scopes!(ctx, a) end - push!(ctx.var_id_stack, id_map) - for a in info.args + for a in lambda_info.static_parameters resolve_scopes!(ctx, a) end - vars = Set{VarId}() - setattr!(ctx.graph, ex.id, lambda_vars=vars) - push!(ctx.lambda_vars, vars) - resolve_scopes_!(ctx, ex[1]) - pop!(ctx.lambda_vars) - pop!(ctx.var_id_stack) + for e in children(ex) + resolve_scopes_!(ctx, e) + end + pop!(ctx.scope_stack) + setattr!(ctx.graph, ex.id, lambda_locals=scope.lambda_locals) elseif k == K"block" && hasattr(ex, :scope_type) - resolve_scope!(ctx, ex, false) do cx - for e in children(ex) - resolve_scopes_!(cx, e) - end + scope = make_scope(ctx, ex, ex.scope_type, nothing) + push!(ctx.scope_stack, scope) + for e in children(ex) + resolve_scopes_!(ctx, e) end + pop!(ctx.scope_stack) else for e in children(ex) resolve_scopes_!(ctx, e) @@ -272,3 +354,10 @@ function resolve_scopes_!(ctx, ex) ex end +function resolve_scopes!(ctx::ScopeResolutionContext, ex) + thunk = makenode(ctx, ex, K"lambda", ex; + lambda_info=LambdaInfo(SyntaxList(ctx), SyntaxList(ctx), nothing, true)) + resolve_scopes_!(ctx, thunk) + return thunk +end + diff --git a/JuliaLowering/test/lowering.jl b/JuliaLowering/test/lowering.jl index cce88637c1a10..b84777b18c4d2 100644 --- a/JuliaLowering/test/lowering.jl +++ b/JuliaLowering/test/lowering.jl @@ -5,17 +5,6 @@ using JuliaLowering using JuliaLowering: SyntaxGraph, SyntaxTree, ensure_attributes!, newnode!, setchildren!, haschildren, children, child, setattr!, sourceref -src = """ -let - y = 1 - x = 2 - let x = sin(x) - y = x - end - (x, y) -end -""" - # src = """ # let # local x, (y = 2), (w::T = ww), q::S @@ -28,14 +17,27 @@ end # end # """ -# src = """ -# let -# function f() Int end -# function foo(y::f(a)) -# y -# end -# end -# """ +src = """ +let + y = 1 + x = 2 + let x = 3 + y = x + 1 + end + (x, y) +end +""" + +src = """ +begin + function f(x) + y = x + 1 + "hello world", x, y + end + + f(1) +end +""" # src = """ @@ -45,21 +47,28 @@ end t = parsestmt(SyntaxNode, src, filename="foo.jl") ctx = JuliaLowering.DesugaringContext() - t2 = SyntaxTree(ctx.graph, t) +@info "Input code" t2 t3 = JuliaLowering.expand_forms(ctx, t2) +@info "Desugared" t3 -ctx2 = JuliaLowering.ScopeResolutionContext(ctx) - +in_mod = Main # Module(:Foo) +ctx2 = JuliaLowering.ScopeResolutionContext(ctx, in_mod) t4 = JuliaLowering.resolve_scopes!(ctx2, t3) - @info "Resolved scopes" t4 -code = JuliaLowering.compile_toplevel(ctx2, Main, t4) +t5 = JuliaLowering.compile_lambda(ctx2, t4) + +@info "Linear IR" t5 + +t6 = JuliaLowering.to_expr(in_mod, ctx2.var_info, t5) -@info "Code" code +x = 100 +y = 200 +@info "CodeInfo" t6 +@info "Eval" Base.eval(in_mod, t6) # flisp parts to do # let From 7280abae1f1025c457793098cf971dca0172143c Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Wed, 3 Apr 2024 08:26:57 +1000 Subject: [PATCH 0733/1109] Add "try it out" note to readme + fix is_identifier --- JuliaLowering/README.md | 7 +++++++ JuliaLowering/src/desugaring.jl | 5 +++++ 2 files changed, 12 insertions(+) diff --git a/JuliaLowering/README.md b/JuliaLowering/README.md index 477b818ffa3fe..e16dda797d4b0 100644 --- a/JuliaLowering/README.md +++ b/JuliaLowering/README.md @@ -32,6 +32,13 @@ This work is intended to - Precise author-defined error reporting from macros - Sketch better interfaces for syntax trees (hopefully!) +## Trying it out + +Note this is a very early work in progress; most things probably don't work! + +1. Check out the caf/lowering-2 branch of JuliaSyntax.jl +2. Run the demo `include("test/lowering.jl")` + # Design Notes A disorganized collection of design notes :) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 6530ba976a5da..865165b2073b3 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -131,6 +131,11 @@ function is_placeholder(ex) kind(ex) == K"Identifier" && all(==('_'), ex.name_val) end +function is_identifier(x) + k = kind(x) + k == K"Identifier" || k == K"var" || is_operator(k) || is_macro_name(k) +end + function is_eventually_call(ex::SyntaxTree) k = kind(ex) return k == K"call" || ((k == K"where" || k == K"::") && is_eventually_call(ex[1])) From ec6ab83693f68b718e2ab7308b547c04446a95ee Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Wed, 3 Apr 2024 11:28:11 +1000 Subject: [PATCH 0734/1109] Fix attribution in license --- JuliaLowering/LICENSE | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaLowering/LICENSE b/JuliaLowering/LICENSE index 5732c3014feb3..62a00d053a428 100644 --- a/JuliaLowering/LICENSE +++ b/JuliaLowering/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2024 Julia Computing and contributors +Copyright (c) 2024 JuliaHub and contributors Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal From 1c56c792ed7c64ed98a79d17236abde17f53fc94 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sun, 7 Apr 2024 15:55:29 +1000 Subject: [PATCH 0735/1109] Move lowering kinds here and out of JuliaSyntax --- JuliaLowering/src/JuliaLowering.jl | 57 ++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/JuliaLowering/src/JuliaLowering.jl b/JuliaLowering/src/JuliaLowering.jl index ac7c62a1f61a9..7c64bf060ce12 100644 --- a/JuliaLowering/src/JuliaLowering.jl +++ b/JuliaLowering/src/JuliaLowering.jl @@ -8,6 +8,59 @@ using JuliaSyntax: filename, first_byte, last_byte, source_location, span using JuliaSyntax: is_literal, is_number, is_operator, is_prec_assignment, is_infix_op_call, is_postfix_op_call, is_error +function _insert_kinds() + JuliaSyntax.insert_kinds!(JuliaLowering, 1, [ + "BEGIN_LOWERING_KINDS" + # Compiler metadata hints + "meta" + "extension" + # A literal Julia value of any kind, as might be inserted by the AST + # during macro expansion + "Value" + "inbounds" + "inline" + "noinline" + "loopinfo" + # Identifier for a value which is only assigned once + "SSAValue" + # Scope expressions `(hygienic_scope ex s)` mean `ex` should be + # interpreted as being in scope `s`. + "hygienic_scope" + # Various heads harvested from flisp lowering. + # (TODO: May or may not need all these - assess later) + "break_block" + "scope_block" + "local_def" + "_while" + "_do_while" + "with_static_parameters" + "top" + "core" + "toplevel_butfirst" + "thunk" + "lambda" + "moved_local" + "the_exception" + "foreigncall" + "new" + "globalref" + "outerref" + "enter" + "leave" + "label" + "goto" + "gotoifnot" + "trycatchelse" + "tryfinally" + "method" + "slot" + "unnecessary" + "decl" + "END_LOWERING_KINDS" + ]) +end +_insert_kinds() + include("syntax_graph.jl") include("utils.jl") @@ -15,4 +68,8 @@ include("desugaring.jl") include("scope_analysis.jl") include("linear_ir.jl") +function __init__() + _insert_kinds() +end + end From e0ddbe05170632ead536c15c5979a4f42f0e9c04 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Mon, 8 Apr 2024 15:34:19 +1000 Subject: [PATCH 0736/1109] Fix ids for global assignments + various other fixes --- JuliaLowering/src/desugaring.jl | 7 +++++-- JuliaLowering/src/scope_analysis.jl | 11 ++++++----- JuliaLowering/src/syntax_graph.jl | 3 ++- JuliaLowering/test/lowering.jl | 20 +++++++++++++++++--- 4 files changed, 30 insertions(+), 11 deletions(-) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 865165b2073b3..717849d5d6a0f 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -44,6 +44,7 @@ _node_ids(c, cs...) = (_node_id(c), _node_ids(cs...)...) function _makenode(graph::SyntaxGraph, srcref, head, children; attrs...) id = newnode!(graph) + # TODO: Having this list of kinds seeems hacky? if kind(head) in (K"Identifier", K"core", K"top", K"SSAValue", K"Value", K"slot") || is_literal(head) @assert length(children) == 0 else @@ -58,11 +59,13 @@ function makenode(graph::SyntaxGraph, srcref, head, children...; attrs...) _makenode(graph, srcref, head, children; attrs...) end -function makenode(ctx::AbstractLoweringContext, srcref, head, children::SyntaxTree...; attrs...) +function makenode(ctx::Union{AbstractLoweringContext,SyntaxTree}, + srcref, head, children::SyntaxTree...; attrs...) _makenode(ctx.graph, srcref, head, _node_ids(children...); attrs...) end -function makenode(ctx::AbstractLoweringContext, srcref, head, children::SyntaxList; attrs...) +function makenode(ctx::Union{AbstractLoweringContext,SyntaxTree}, + srcref, head, children::SyntaxList; attrs...) ctx.graph === children.graph || error("Mismatching graphs") _makenode(ctx.graph, srcref, head, children.ids; attrs...) end diff --git a/JuliaLowering/src/scope_analysis.jl b/JuliaLowering/src/scope_analysis.jl index a664248f6377c..7763dacbf71ea 100644 --- a/JuliaLowering/src/scope_analysis.jl +++ b/JuliaLowering/src/scope_analysis.jl @@ -247,6 +247,7 @@ function make_scope(ctx, ex, scope_type, lambda_info) # All non-local assignments are implicitly global at top level for (name,e) in assignments if !haskey(locals, name) + new_var(ctx, name, :global) push!(ctx.implicit_toplevel_globals, name) end end @@ -305,7 +306,7 @@ function make_scope(ctx, ex, scope_type, lambda_info) return ScopeInfo(in_toplevel_thunk, is_soft_scope, is_hard_scope, var_ids, lambda_locals) end -function resolve_scopes_!(ctx, ex) +function _resolve_scopes!(ctx, ex) k = kind(ex) if k == K"Identifier" if is_placeholder(ex) @@ -335,7 +336,7 @@ function resolve_scopes_!(ctx, ex) resolve_scopes!(ctx, a) end for e in children(ex) - resolve_scopes_!(ctx, e) + _resolve_scopes!(ctx, e) end pop!(ctx.scope_stack) setattr!(ctx.graph, ex.id, lambda_locals=scope.lambda_locals) @@ -343,12 +344,12 @@ function resolve_scopes_!(ctx, ex) scope = make_scope(ctx, ex, ex.scope_type, nothing) push!(ctx.scope_stack, scope) for e in children(ex) - resolve_scopes_!(ctx, e) + _resolve_scopes!(ctx, e) end pop!(ctx.scope_stack) else for e in children(ex) - resolve_scopes_!(ctx, e) + _resolve_scopes!(ctx, e) end end ex @@ -357,7 +358,7 @@ end function resolve_scopes!(ctx::ScopeResolutionContext, ex) thunk = makenode(ctx, ex, K"lambda", ex; lambda_info=LambdaInfo(SyntaxList(ctx), SyntaxList(ctx), nothing, true)) - resolve_scopes_!(ctx, thunk) + _resolve_scopes!(ctx, thunk) return thunk end diff --git a/JuliaLowering/src/syntax_graph.jl b/JuliaLowering/src/syntax_graph.jl index f739ef2c1e552..4e45641d4cf83 100644 --- a/JuliaLowering/src/syntax_graph.jl +++ b/JuliaLowering/src/syntax_graph.jl @@ -129,6 +129,7 @@ function _convert_nodes(graph::SyntaxGraph, node::SyntaxNode) if !isnothing(node.val) v = node.val if v isa Symbol + # TODO: Fixes in JuliaSyntax to avoid ever converting to Symbol setattr!(graph, id, name_val=string(v)) else setattr!(graph, id, value=v) @@ -262,7 +263,7 @@ JuliaSyntax.last_byte(tree::SyntaxTree) = last_byte(sourceref(tree)) function SyntaxTree(graph::SyntaxGraph, node::SyntaxNode) ensure_attributes!(graph, kind=Kind, syntax_flags=UInt16, source=Union{SourceRef,NodeId}, value=Any, name_val=String) - id = _convert_nodes(graph, node) + id = _convert_nodes(freeze_attrs(graph), node) return SyntaxTree(graph, id) end diff --git a/JuliaLowering/test/lowering.jl b/JuliaLowering/test/lowering.jl index b84777b18c4d2..2c65cdd43865f 100644 --- a/JuliaLowering/test/lowering.jl +++ b/JuliaLowering/test/lowering.jl @@ -3,7 +3,17 @@ using JuliaSyntax using JuliaLowering -using JuliaLowering: SyntaxGraph, SyntaxTree, ensure_attributes!, newnode!, setchildren!, haschildren, children, child, setattr!, sourceref +using JuliaLowering: SyntaxGraph, SyntaxTree, ensure_attributes!, newnode!, setchildren!, haschildren, children, child, setattr!, sourceref, makenode + +function wrapscope(ex, scope_type) + makenode(ex, ex, K"block", ex; scope_type=scope_type) +end + +function softscope_test(ex) + wrapscope(wrapscope(ex, :neutral), :soft) +end + +#------------------------------------------------------------------------------- # src = """ # let @@ -39,6 +49,9 @@ begin end """ +# src = """ +# x = 1 +# """ # src = """ # x + y @@ -48,6 +61,7 @@ t = parsestmt(SyntaxNode, src, filename="foo.jl") ctx = JuliaLowering.DesugaringContext() t2 = SyntaxTree(ctx.graph, t) +# t2 = softscope_test(t2) @info "Input code" t2 t3 = JuliaLowering.expand_forms(ctx, t2) @@ -64,10 +78,10 @@ t5 = JuliaLowering.compile_lambda(ctx2, t4) t6 = JuliaLowering.to_expr(in_mod, ctx2.var_info, t5) -x = 100 -y = 200 @info "CodeInfo" t6 +x = 100 +y = 200 @info "Eval" Base.eval(in_mod, t6) # flisp parts to do From 5eb9e5a7289be27f008bf673194f999a0405cd5c Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Thu, 11 Apr 2024 07:02:01 +1000 Subject: [PATCH 0737/1109] Clean up pass system a bit Return contexts from pass functions, in addition to the expression so callers of a pass don't need to figure out how to create the context for that pass. Feels a bit better, but still unclear that this is great. Also be a bit more careful about reparenting SyntaxTree to the new graph as attributes are added by passes. This still feels somewhat dubious as any old SyntaxList data structures attached as attributes will not see the added attributes. --- JuliaLowering/src/JuliaLowering.jl | 2 ++ JuliaLowering/src/desugaring.jl | 18 ++++++++---- JuliaLowering/src/linear_ir.jl | 24 +++++++++------ JuliaLowering/src/scope_analysis.jl | 13 ++++++--- JuliaLowering/src/syntax_graph.jl | 23 ++++++++++++++- JuliaLowering/test/lowering.jl | 45 +++++++++-------------------- 6 files changed, 74 insertions(+), 51 deletions(-) diff --git a/JuliaLowering/src/JuliaLowering.jl b/JuliaLowering/src/JuliaLowering.jl index 7c64bf060ce12..ebc7292b63a58 100644 --- a/JuliaLowering/src/JuliaLowering.jl +++ b/JuliaLowering/src/JuliaLowering.jl @@ -64,6 +64,8 @@ _insert_kinds() include("syntax_graph.jl") include("utils.jl") +abstract type AbstractLoweringContext end + include("desugaring.jl") include("scope_analysis.jl") include("linear_ir.jl") diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 717849d5d6a0f..46a641b1a02ec 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -15,15 +15,13 @@ struct LambdaInfo is_toplevel_thunk::Bool end -abstract type AbstractLoweringContext end - struct DesugaringContext{GraphType} <: AbstractLoweringContext graph::GraphType next_var_id::Ref{VarId} end -function DesugaringContext() - graph = SyntaxGraph() +function DesugaringContext(ctx) + graph = syntax_graph(ctx) ensure_attributes!(graph, kind=Kind, syntax_flags=UInt16, green_tree=GreenNode, source_pos=Int, source=Union{SourceRef,NodeId}, @@ -86,6 +84,10 @@ function mapchildren(f, ctx, ex) return ex2 end +function syntax_graph(ctx::AbstractLoweringContext) + ctx.graph +end + function new_var_id(ctx::AbstractLoweringContext) id = ctx.next_var_id[] ctx.next_var_id[] += 1 @@ -460,7 +462,7 @@ function expand_forms(ctx::DesugaringContext, ex::SyntaxTree) if numchildren(ex) == 1 && kind(ex[1]) == K"String" return ex[1] else - makenode(ctx, ex, K"call", top_ref(ctx, ex, "string"), expand_forms(children(ex))...) + makenode(ctx, ex, K"call", top_ref(ctx, ex, "string"), expand_forms(ctx, children(ex))...) end elseif k == K"tuple" # TODO: named tuples @@ -486,3 +488,9 @@ function expand_forms(ctx::DesugaringContext, exs::Union{Tuple,AbstractVector}) res end +function expand_forms(ex::SyntaxTree) + ctx = DesugaringContext(ex) + res = expand_forms(ctx, reparent(ctx, ex)) + ctx, res +end + diff --git a/JuliaLowering/src/linear_ir.jl b/JuliaLowering/src/linear_ir.jl index c75ee1ff5af27..a8edf9ca76fdb 100644 --- a/JuliaLowering/src/linear_ir.jl +++ b/JuliaLowering/src/linear_ir.jl @@ -262,7 +262,7 @@ function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) elseif k == K"local_def" || k == K"local" nothing else - throw(LoweringError(ex, "Invalid syntax")) + throw(LoweringError(ex, "Invalid syntax; $(repr(k))")) end end @@ -368,7 +368,6 @@ function compile_lambda(outer_ctx, ex) slot_rewrites = Dict{VarId,Int}() _add_slots!(slot_rewrites, ctx.var_info, (arg.var_id for arg in lambda_info.args)) _add_slots!(slot_rewrites, ctx.var_info, ex.lambda_locals) - @info "" slot_rewrites code = renumber_body(ctx, ctx.code, slot_rewrites) makenode(ctx, ex, K"lambda", makenode(ctx, ex[1], K"block", code), @@ -377,12 +376,19 @@ function compile_lambda(outer_ctx, ex) ) end +function linearize_ir(ctx, ex) + graph = ensure_attributes(ctx.graph, slot_rewrites=Dict{VarId,Int}) + _ctx = LinearIRContext(graph, SyntaxList(graph), ctx.next_var_id, + nothing, ctx.var_info, ctx.mod) + res = compile_lambda(_ctx, reparent(_ctx, ex)) + _ctx, res +end #------------------------------------------------------------------------------- # Conversion to Expr + CodeInfo # Convert our data structures to CodeInfo -function to_code_info(ex, in_mod, funcname, var_info, slot_rewrites) +function to_code_info(ex, mod, funcname, var_info, slot_rewrites) input_code = children(ex) # Convert code to Expr and record low res locations in table num_stmts = length(input_code) @@ -391,13 +397,13 @@ function to_code_info(ex, in_mod, funcname, var_info, slot_rewrites) linetable_map = Dict{Tuple{Int,String}, Int32}() linetable = Any[] for i in 1:length(code) - code[i] = to_expr(in_mod, var_info, input_code[i]) + code[i] = to_expr(mod, var_info, input_code[i]) fname = filename(input_code[i]) lineno, _ = source_location(input_code[i]) loc = (lineno, fname) codelocs[i] = get!(linetable_map, loc) do inlined_at = 0 # FIXME: nonzero for expanded macros - full_loc = Core.LineInfoNode(in_mod, Symbol(funcname), Symbol(fname), + full_loc = Core.LineInfoNode(mod, Symbol(funcname), Symbol(fname), Int32(lineno), Int32(inlined_at)) push!(linetable, full_loc) length(linetable) @@ -449,7 +455,7 @@ function to_code_info(ex, in_mod, funcname, var_info, slot_rewrites) ) end -function to_expr(in_mod, var_info, ex) +function to_expr(mod, var_info, ex) k = kind(ex) if is_literal(k) ex.value @@ -467,14 +473,14 @@ function to_expr(in_mod, var_info, ex) elseif k == K"SSAValue" Core.SSAValue(ex.var_id) elseif k == K"return" - Core.ReturnNode(to_expr(in_mod, var_info, ex[1])) + Core.ReturnNode(to_expr(mod, var_info, ex[1])) elseif is_quoted(k) TODO(ex, "Convert SyntaxTree to Expr") elseif k == K"lambda" funcname = ex.lambda_info.is_toplevel_thunk ? "top-level scope" : "none" # FIXME - ir = to_code_info(ex[1], in_mod, funcname, var_info, ex.slot_rewrites) + ir = to_code_info(ex[1], mod, funcname, var_info, ex.slot_rewrites) if ex.lambda_info.is_toplevel_thunk Expr(:thunk, ir) else @@ -498,7 +504,7 @@ function to_expr(in_mod, var_info, ex) if isnothing(head) TODO(ex, "Unhandled form for kind $k") end - Expr(head, map(e->to_expr(in_mod, var_info, e), children(ex))...) + Expr(head, map(e->to_expr(mod, var_info, e), children(ex))...) end end diff --git a/JuliaLowering/src/scope_analysis.jl b/JuliaLowering/src/scope_analysis.jl index 7763dacbf71ea..26a29073e51cf 100644 --- a/JuliaLowering/src/scope_analysis.jl +++ b/JuliaLowering/src/scope_analysis.jl @@ -129,13 +129,12 @@ struct ScopeResolutionContext{GraphType} <: AbstractLoweringContext # scopes. var_info::Dict{VarId,VarInfo} # Variables which were implicitly global due to being assigned to in top - # level code top level + # level code implicit_toplevel_globals::Set{String} end -function ScopeResolutionContext(ctx::DesugaringContext, mod::Module) - # FIXME: Add slot_rewrites later - graph = ensure_attributes(ctx.graph, lambda_locals=Set{VarId}, slot_rewrites=Dict{VarId,Int}) +function ScopeResolutionContext(ctx, mod::Module) + graph = ensure_attributes(ctx.graph, lambda_locals=Set{VarId}) ScopeResolutionContext(graph, ctx.next_var_id, mod, @@ -362,3 +361,9 @@ function resolve_scopes!(ctx::ScopeResolutionContext, ex) return thunk end +function resolve_scopes!(ctx::DesugaringContext, mod::Module, ex) + ctx2 = ScopeResolutionContext(ctx, mod) + res = resolve_scopes!(ctx2, reparent(ctx2, ex)) + ctx2, res +end + diff --git a/JuliaLowering/src/syntax_graph.jl b/JuliaLowering/src/syntax_graph.jl index 4e45641d4cf83..953d842d762b2 100644 --- a/JuliaLowering/src/syntax_graph.jl +++ b/JuliaLowering/src/syntax_graph.jl @@ -145,6 +145,13 @@ function _convert_nodes(graph::SyntaxGraph, node::SyntaxNode) return id end +""" + syntax_graph(ctx) + +Return `SyntaxGraph` associated with `ctx` +""" +syntax_graph(graph::SyntaxGraph) = graph + #------------------------------------------------------------------------------- struct SyntaxTree{GraphType} graph::GraphType @@ -370,6 +377,18 @@ function Base.show(io::IO, node::SyntaxTree) _show_syntax_tree_sexpr(io, node) end +function reparent(ctx, ex::SyntaxTree) + # Ensure `ex` has the same parent graph, in a somewhat loose sense. + # Could relax by copying if necessary? + # In that case, would we copy all the attributes? That would have slightly + # different semantics. + graph = syntax_graph(ctx) + @assert graph.edge_ranges === ex.graph.edge_ranges + SyntaxTree(graph, ex.id) +end + +syntax_graph(ex::SyntaxTree) = ex.graph + #------------------------------------------------------------------------------- # Lightweight vector of nodes ids with associated pointer to graph stored separately. struct SyntaxList{GraphType, NodeIdVecType} <: AbstractVector{SyntaxTree} @@ -384,6 +403,8 @@ end SyntaxList(graph::SyntaxGraph) = SyntaxList(graph, Vector{NodeId}()) SyntaxList(ctx) = SyntaxList(ctx.graph) +syntax_graph(lst::SyntaxList) = lst.graph + Base.size(v::SyntaxList) = size(v.ids) Base.IndexStyle(::Type{<:SyntaxList}) = IndexLinear() @@ -428,6 +449,6 @@ end #------------------------------------------------------------------------------- function JuliaSyntax.build_tree(::Type{SyntaxTree}, stream::JuliaSyntax.ParseStream; kws...) - SyntaxTree(build_tree(SyntaxNode, stream; kws...)) + SyntaxTree(JuliaSyntax.build_tree(SyntaxNode, stream; kws...)) end diff --git a/JuliaLowering/test/lowering.jl b/JuliaLowering/test/lowering.jl index 2c65cdd43865f..7dd4eaefd0e2a 100644 --- a/JuliaLowering/test/lowering.jl +++ b/JuliaLowering/test/lowering.jl @@ -57,44 +57,25 @@ end # x + y # """ -t = parsestmt(SyntaxNode, src, filename="foo.jl") +ex = parsestmt(SyntaxTree, src, filename="foo.jl") +# t = softscope_test(t) +@info "Input code" ex -ctx = JuliaLowering.DesugaringContext() -t2 = SyntaxTree(ctx.graph, t) -# t2 = softscope_test(t2) -@info "Input code" t2 +in_mod = Main +ctx, ex_desugar = JuliaLowering.expand_forms(ex) +@info "Desugared" ex_desugar -t3 = JuliaLowering.expand_forms(ctx, t2) -@info "Desugared" t3 +ctx2, ex_scoped = JuliaLowering.resolve_scopes!(ctx, in_mod, ex_desugar) +@info "Resolved scopes" ex_scoped -in_mod = Main # Module(:Foo) -ctx2 = JuliaLowering.ScopeResolutionContext(ctx, in_mod) -t4 = JuliaLowering.resolve_scopes!(ctx2, t3) -@info "Resolved scopes" t4 - -t5 = JuliaLowering.compile_lambda(ctx2, t4) - -@info "Linear IR" t5 - -t6 = JuliaLowering.to_expr(in_mod, ctx2.var_info, t5) - -@info "CodeInfo" t6 +ctx3, ex_compiled = JuliaLowering.linearize_ir(ctx2, ex_scoped) +@info "Linear IR" ex_compiled +ex_expr = JuliaLowering.to_expr(in_mod, ctx2.var_info, ex_compiled) +@info "CodeInfo" ex_expr x = 100 y = 200 -@info "Eval" Base.eval(in_mod, t6) - -# flisp parts to do -# let -# desugar/let => 76 -# desugar/func => ~100 (partial) -# desugar/call => 70 -# handle-scopes => 195 -# handle-scopes/scope-block => 99 -# handle-scopes/locals => 16 -# linear-ir => 250 (partial, approximate) -# linear-ir/func => 22 - +@info "Eval" Base.eval(in_mod, ex_expr) # Syntax tree ideas: Want following to work? # This can be fully inferrable! From 23b89abdddc75bd94f3d3b7ad43f8434aaaf15aa Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Mon, 15 Apr 2024 05:51:27 +1000 Subject: [PATCH 0738/1109] Driver functions: `eval2` `include2` and `lower` --- JuliaLowering/README.md | 44 ++++++++ JuliaLowering/src/JuliaLowering.jl | 5 + JuliaLowering/src/desugaring.jl | 10 +- JuliaLowering/src/eval.jl | 176 +++++++++++++++++++++++++++++ JuliaLowering/src/linear_ir.jl | 141 +++-------------------- 5 files changed, 245 insertions(+), 131 deletions(-) create mode 100644 JuliaLowering/src/eval.jl diff --git a/JuliaLowering/README.md b/JuliaLowering/README.md index e16dda797d4b0..064d2870640a5 100644 --- a/JuliaLowering/README.md +++ b/JuliaLowering/README.md @@ -238,3 +238,47 @@ mutable struct CodeInfo end ``` +### Notes on toplevel-only forms and eval-related functions + +In the current Julia runtime, + +`Base.eval()` +- Uses `jl_toplevel_eval_in` which calls `jl_toplevel_eval_flex` + +`jl_toplevel_eval_flex(mod, ex)` +- Lowers if necessay +- Evaluates certain blessed top level forms + * `:.` + * `:module` + * `:using` + * `:import` + * `:public` + * `:export` + * `:global` + * `:const` + * `:toplevel` + * `:error` + * `:incomplete` + * Identifier and literals +- Otherwise expects `Expr(:thunk)` + * Use codegen "where necessary/profitable" (eg ccall, has_loops etc) + * Otherwise interpret via `jl_interpret_toplevel_thunk` + +Should we reimplement eval of the above blessed top level forms in Julia? +Pros: +- Semantically sound. Lowering should do syntax checking in things like `Expr(:using)` +- Precise lowering error messages +- Replaces more Expr usage +- Replaces a whole pile of C code with significantly less Julia code +- Lowering output becomes more consistently imperative +Cons: +- Lots more code to write +- May need to invent intermediate data structures to replace `Expr` +- Bootstrap? +- Some forms require creating toplevel thunks + +In general, we'd be replacing current *declarative* lowering targets like +`Expr(:using)` with an *imperative* call to a `Core` API instead. The call and +the setup of its arguments would need to go in a thunk. We've currently got an +odd mixture of imperative and declarative lowered code. + diff --git a/JuliaLowering/src/JuliaLowering.jl b/JuliaLowering/src/JuliaLowering.jl index ebc7292b63a58..6bf359f0915ae 100644 --- a/JuliaLowering/src/JuliaLowering.jl +++ b/JuliaLowering/src/JuliaLowering.jl @@ -8,6 +8,8 @@ using JuliaSyntax: filename, first_byte, last_byte, source_location, span using JuliaSyntax: is_literal, is_number, is_operator, is_prec_assignment, is_infix_op_call, is_postfix_op_call, is_error +# The following kinds are used in intermediate forms by lowering but are not +# part of the surface syntax function _insert_kinds() JuliaSyntax.insert_kinds!(JuliaLowering, 1, [ "BEGIN_LOWERING_KINDS" @@ -17,6 +19,7 @@ function _insert_kinds() # A literal Julia value of any kind, as might be inserted by the AST # during macro expansion "Value" + # TODO: Use `meta` for inbounds and loopinfo etc? "inbounds" "inline" "noinline" @@ -70,6 +73,8 @@ include("desugaring.jl") include("scope_analysis.jl") include("linear_ir.jl") +include("eval.jl") + function __init__() _insert_kinds() end diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 46a641b1a02ec..44c857227ac16 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -445,7 +445,7 @@ function expand_forms(ctx::DesugaringContext, ex::SyntaxTree) elseif k == K"function" expand_forms(ctx, expand_function_def(ctx, ex)) elseif k == K"let" - return expand_forms(ctx, expand_let(ctx, ex)) + expand_forms(ctx, expand_let(ctx, ex)) elseif k == K"local" || k == K"global" if numchildren(ex) == 1 && kind(ex[1]) == K"Identifier" # Don't recurse when already simplified - `local x`, etc @@ -454,13 +454,13 @@ function expand_forms(ctx::DesugaringContext, ex::SyntaxTree) expand_forms(ctx, expand_decls(ctx, ex)) # FIXME end elseif is_operator(k) && !haschildren(ex) - return makenode(ctx, ex, K"Identifier", name_val=ex.name_val) + makenode(ctx, ex, K"Identifier", name_val=ex.name_val) elseif k == K"char" || k == K"var" @chk numchildren(ex) == 1 - return ex[1] + ex[1] elseif k == K"string" if numchildren(ex) == 1 && kind(ex[1]) == K"String" - return ex[1] + ex[1] else makenode(ctx, ex, K"call", top_ref(ctx, ex, "string"), expand_forms(ctx, children(ex))...) end @@ -468,7 +468,7 @@ function expand_forms(ctx::DesugaringContext, ex::SyntaxTree) # TODO: named tuples makenode(ctx, ex, K"call", core_ref(ctx, ex, "tuple"), expand_forms(ctx, children(ex))...) elseif !haschildren(ex) - return ex + ex else if k == K"=" @chk numchildren(ex) == 2 diff --git a/JuliaLowering/src/eval.jl b/JuliaLowering/src/eval.jl new file mode 100644 index 0000000000000..76cc05cd1721b --- /dev/null +++ b/JuliaLowering/src/eval.jl @@ -0,0 +1,176 @@ +function lower(mod, ex) + ctx1, ex1 = expand_forms(ex) + ctx2, ex2 = resolve_scopes!(ctx1, mod, ex1) + ctx3, ex3 = linearize_ir(ctx2, ex2) + ex3 +end + +# Convert SyntaxTree to the CodeInfo+Expr data stuctures understood by the +# Julia runtime + +function to_code_info(ex, mod, funcname, var_info, slot_rewrites) + input_code = children(ex) + # Convert code to Expr and record low res locations in table + num_stmts = length(input_code) + code = Vector{Any}(undef, num_stmts) + codelocs = Vector{Int32}(undef, num_stmts) + linetable_map = Dict{Tuple{Int,String}, Int32}() + linetable = Any[] + for i in 1:length(code) + code[i] = to_lowered_expr(mod, var_info, input_code[i]) + fname = filename(input_code[i]) + lineno, _ = source_location(input_code[i]) + loc = (lineno, fname) + codelocs[i] = get!(linetable_map, loc) do + inlined_at = 0 # FIXME: nonzero for expanded macros + full_loc = Core.LineInfoNode(mod, Symbol(funcname), Symbol(fname), + Int32(lineno), Int32(inlined_at)) + push!(linetable, full_loc) + length(linetable) + end + end + + # FIXME + ssaflags = zeros(UInt32, length(code)) + + nslots = length(slot_rewrites) + slotnames = Vector{Symbol}(undef, nslots) + slot_rename_inds = Dict{String,Int}() + slotflags = Vector{UInt8}(undef, nslots) + for (id,i) in slot_rewrites + info = var_info[id] + name = info.name + ni = get(slot_rename_inds, name, 0) + slot_rename_inds[name] = ni + 1 + if ni > 0 + name = "$name@$ni" + end + slotnames[i] = Symbol(name) + slotflags[i] = 0x00 # FIXME!! + end + + _CodeInfo( + code, + codelocs, + num_stmts, # ssavaluetypes (why put num_stmts in here??) + ssaflags, + nothing, # method_for_inference_limit_heuristics + linetable, + slotnames, + slotflags, + nothing, # slottypes + Any, # rettype + nothing, # parent + nothing, # edges + Csize_t(1), # min_world + typemax(Csize_t), # max_world + false, # inferred + false, # propagate_inbounds + false, # has_fcall + false, # nospecializeinfer + 0x00, # inlining + 0x00, # constprop + 0x0000, # purity + 0xffff, # inlining_cost + ) +end + +function to_lowered_expr(mod, var_info, ex) + k = kind(ex) + if is_literal(k) + ex.value + elseif k == K"core" + GlobalRef(Core, Symbol(ex.name_val)) + elseif k == K"top" + GlobalRef(Base, Symbol(ex.name_val)) + elseif k == K"Identifier" + # Implicitly refers to name in parent module + # TODO: Should we even have plain identifiers at this point or should + # they all effectively be resolved into GlobalRef earlier? + Symbol(ex.name_val) + elseif k == K"slot" + Core.SlotNumber(ex.var_id) + elseif k == K"SSAValue" + Core.SSAValue(ex.var_id) + elseif k == K"return" + Core.ReturnNode(to_lowered_expr(mod, var_info, ex[1])) + elseif is_quoted(k) + TODO(ex, "Convert SyntaxTree to Expr") + elseif k == K"lambda" + funcname = ex.lambda_info.is_toplevel_thunk ? + "top-level scope" : + "none" # FIXME + ir = to_code_info(ex[1], mod, funcname, var_info, ex.slot_rewrites) + if ex.lambda_info.is_toplevel_thunk + Expr(:thunk, ir) + else + ir + end + elseif k == K"Value" + ex.value + else + # Allowed forms according to https://docs.julialang.org/en/v1/devdocs/ast/ + # + # call invoke static_parameter `=` method struct_type abstract_type + # primitive_type global const new splatnew isdefined the_exception + # enter leave pop_exception inbounds boundscheck loopinfo copyast meta + # foreigncall new_opaque_closure lambda + head = k == K"call" ? :call : + k == K"=" ? :(=) : + k == K"method" ? :method : + k == K"global" ? :global : + k == K"const" ? :const : + nothing + if isnothing(head) + TODO(ex, "Unhandled form for kind $k") + end + Expr(head, map(e->to_lowered_expr(mod, var_info, e), children(ex))...) + end +end + +#------------------------------------------------------------------------------- +function eval2(mod, exs::AbstractVector) + res = nothing + for e in exs + res = eval2(mod, e) + end + return res +end + +# Like eval(), but uses code lowering defined by this package +function eval2(mod, ex::SyntaxTree) + k = kind(ex) + if k == K"toplevel" + return eval2(mod, children(ex)) + elseif k == K"module" + m2 = Module(ex[1].name_val) + eval2(m2, children(ex[2])) + return m2 + end + linear_ir = lower(mod, ex) + expr_form = to_lowered_expr(mod, linear_ir.var_info, linear_ir) + Base.eval(mod, expr_form) +end + +#------------------------------------------------------------------------------- +function include2(mod, filename) + path, prev = Base._include_dependency(mod, filename) + code = read(path, String) + tls = task_local_storage() + tls[:SOURCE_PATH] = path + try + return include_string(mod, code; filename=path) + finally + if prev === nothing + delete!(tls, :SOURCE_PATH) + else + tls[:SOURCE_PATH] = prev + end + end +end + +function include_string(mod, str; filename=nothing) + eval2(mod, parseall(SyntaxTree, str; filename=filename)) +end + + diff --git a/JuliaLowering/src/linear_ir.jl b/JuliaLowering/src/linear_ir.jl index a8edf9ca76fdb..b9483658e4d75 100644 --- a/JuliaLowering/src/linear_ir.jl +++ b/JuliaLowering/src/linear_ir.jl @@ -259,6 +259,15 @@ function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) end emit(ctx, ex) nothing + elseif k == K"module" || k == K"toplevel" + # Both these forms can't be lowered here; they need to just be quoted + # and passed through to a call to eval2. + # TODO: Is compile() the right place to do this? + # TODO: Restrict to toplevel only + call = makenode(ctx, ex, K"call", + makenode(ctx, ex, K"Value", JuliaLowering.eval2), + makenode(ctx, ex, K"Value", ex)) + compile(ctx, call, needs_value, in_tail_pos) elseif k == K"local_def" || k == K"local" nothing else @@ -358,7 +367,6 @@ function _add_slots!(slot_rewrites, var_info, var_ids) slot_rewrites end - function compile_lambda(outer_ctx, ex) lambda_info = ex.lambda_info return_type = nothing # FIXME @@ -377,134 +385,15 @@ function compile_lambda(outer_ctx, ex) end function linearize_ir(ctx, ex) - graph = ensure_attributes(ctx.graph, slot_rewrites=Dict{VarId,Int}) + graph = ensure_attributes(ctx.graph, + slot_rewrites=Dict{VarId,Int}, + var_info=Dict{VarId,VarInfo}) + # TODO: Cleanup needed - `_ctx` is just a dummy context here. But currently + # required to call reparent() ... _ctx = LinearIRContext(graph, SyntaxList(graph), ctx.next_var_id, nothing, ctx.var_info, ctx.mod) res = compile_lambda(_ctx, reparent(_ctx, ex)) + setattr!(graph, res.id, var_info=ctx.var_info) _ctx, res end -#------------------------------------------------------------------------------- -# Conversion to Expr + CodeInfo - -# Convert our data structures to CodeInfo -function to_code_info(ex, mod, funcname, var_info, slot_rewrites) - input_code = children(ex) - # Convert code to Expr and record low res locations in table - num_stmts = length(input_code) - code = Vector{Any}(undef, num_stmts) - codelocs = Vector{Int32}(undef, num_stmts) - linetable_map = Dict{Tuple{Int,String}, Int32}() - linetable = Any[] - for i in 1:length(code) - code[i] = to_expr(mod, var_info, input_code[i]) - fname = filename(input_code[i]) - lineno, _ = source_location(input_code[i]) - loc = (lineno, fname) - codelocs[i] = get!(linetable_map, loc) do - inlined_at = 0 # FIXME: nonzero for expanded macros - full_loc = Core.LineInfoNode(mod, Symbol(funcname), Symbol(fname), - Int32(lineno), Int32(inlined_at)) - push!(linetable, full_loc) - length(linetable) - end - end - - # FIXME - ssaflags = zeros(UInt32, length(code)) - - nslots = length(slot_rewrites) - slotnames = Vector{Symbol}(undef, nslots) - slot_rename_inds = Dict{String,Int}() - slotflags = Vector{UInt8}(undef, nslots) - for (id,i) in slot_rewrites - info = var_info[id] - name = info.name - ni = get(slot_rename_inds, name, 0) - slot_rename_inds[name] = ni + 1 - if ni > 0 - name = "$name@$ni" - end - slotnames[i] = Symbol(name) - slotflags[i] = 0x00 # FIXME!! - end - - _CodeInfo( - code, - codelocs, - num_stmts, # ssavaluetypes (why put num_stmts in here??) - ssaflags, - nothing, # method_for_inference_limit_heuristics - linetable, - slotnames, - slotflags, - nothing, # slottypes - Any, # rettype - nothing, # parent - nothing, # edges - Csize_t(1), # min_world - typemax(Csize_t), # max_world - false, # inferred - false, # propagate_inbounds - false, # has_fcall - false, # nospecializeinfer - 0x00, # inlining - 0x00, # constprop - 0x0000, # purity - 0xffff, # inlining_cost - ) -end - -function to_expr(mod, var_info, ex) - k = kind(ex) - if is_literal(k) - ex.value - elseif k == K"core" - GlobalRef(Core, Symbol(ex.name_val)) - elseif k == K"top" - GlobalRef(Base, Symbol(ex.name_val)) - elseif k == K"Identifier" - # Implicitly refers to name in parent module - # TODO: Should we even have plain identifiers at this point or should - # they all effectively be resolved into GlobalRef earlier? - Symbol(ex.name_val) - elseif k == K"slot" - Core.SlotNumber(ex.var_id) - elseif k == K"SSAValue" - Core.SSAValue(ex.var_id) - elseif k == K"return" - Core.ReturnNode(to_expr(mod, var_info, ex[1])) - elseif is_quoted(k) - TODO(ex, "Convert SyntaxTree to Expr") - elseif k == K"lambda" - funcname = ex.lambda_info.is_toplevel_thunk ? - "top-level scope" : - "none" # FIXME - ir = to_code_info(ex[1], mod, funcname, var_info, ex.slot_rewrites) - if ex.lambda_info.is_toplevel_thunk - Expr(:thunk, ir) - else - ir - end - elseif k == K"Value" - ex.value - else - # Allowed forms according to https://docs.julialang.org/en/v1/devdocs/ast/ - # - # call invoke static_parameter `=` method struct_type abstract_type - # primitive_type global const new splatnew isdefined the_exception - # enter leave pop_exception inbounds boundscheck loopinfo copyast meta - # foreigncall new_opaque_closure lambda - head = k == K"call" ? :call : - k == K"=" ? :(=) : - k == K"method" ? :method : - k == K"global" ? :global : - k == K"const" ? :const : - nothing - if isnothing(head) - TODO(ex, "Unhandled form for kind $k") - end - Expr(head, map(e->to_expr(mod, var_info, e), children(ex))...) - end -end - From b7e32642bee272c7bb13d9465ee02257b0009f90 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sun, 21 Apr 2024 06:43:14 +1000 Subject: [PATCH 0739/1109] Move kinds into kinds.jl --- JuliaLowering/src/JuliaLowering.jl | 57 ++--------------------------- JuliaLowering/src/kinds.jl | 58 ++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+), 55 deletions(-) create mode 100644 JuliaLowering/src/kinds.jl diff --git a/JuliaLowering/src/JuliaLowering.jl b/JuliaLowering/src/JuliaLowering.jl index 6bf359f0915ae..0bcb47d518f05 100644 --- a/JuliaLowering/src/JuliaLowering.jl +++ b/JuliaLowering/src/JuliaLowering.jl @@ -3,65 +3,12 @@ module JuliaLowering using JuliaSyntax using JuliaSyntax: SyntaxHead, highlight, Kind, GreenNode, @KSet_str -using JuliaSyntax: haschildren, children, child, numchildren, head, kind, flags +using JuliaSyntax: haschildren, children, child, numchildren, head, kind, flags, has_flags using JuliaSyntax: filename, first_byte, last_byte, source_location, span using JuliaSyntax: is_literal, is_number, is_operator, is_prec_assignment, is_infix_op_call, is_postfix_op_call, is_error -# The following kinds are used in intermediate forms by lowering but are not -# part of the surface syntax -function _insert_kinds() - JuliaSyntax.insert_kinds!(JuliaLowering, 1, [ - "BEGIN_LOWERING_KINDS" - # Compiler metadata hints - "meta" - "extension" - # A literal Julia value of any kind, as might be inserted by the AST - # during macro expansion - "Value" - # TODO: Use `meta` for inbounds and loopinfo etc? - "inbounds" - "inline" - "noinline" - "loopinfo" - # Identifier for a value which is only assigned once - "SSAValue" - # Scope expressions `(hygienic_scope ex s)` mean `ex` should be - # interpreted as being in scope `s`. - "hygienic_scope" - # Various heads harvested from flisp lowering. - # (TODO: May or may not need all these - assess later) - "break_block" - "scope_block" - "local_def" - "_while" - "_do_while" - "with_static_parameters" - "top" - "core" - "toplevel_butfirst" - "thunk" - "lambda" - "moved_local" - "the_exception" - "foreigncall" - "new" - "globalref" - "outerref" - "enter" - "leave" - "label" - "goto" - "gotoifnot" - "trycatchelse" - "tryfinally" - "method" - "slot" - "unnecessary" - "decl" - "END_LOWERING_KINDS" - ]) -end +include("kinds.jl") _insert_kinds() include("syntax_graph.jl") diff --git a/JuliaLowering/src/kinds.jl b/JuliaLowering/src/kinds.jl new file mode 100644 index 0000000000000..6b5759f898043 --- /dev/null +++ b/JuliaLowering/src/kinds.jl @@ -0,0 +1,58 @@ + +# The following kinds are used in intermediate forms by lowering but are not +# part of the surface syntax +function _insert_kinds() + JuliaSyntax.insert_kinds!(JuliaLowering, 1, [ + "BEGIN_LOWERING_KINDS" + # Compiler metadata hints + "meta" + "extension" + # A literal Julia value of any kind, as might be inserted by the AST + # during macro expansion + "Value" + # Quoted symbol. Used to distinguish Symbol literals from AST + # literals of kind K"Identifier" + "Symbol" + # TODO: Use `meta` for inbounds and loopinfo etc? + "inbounds" + "inline" + "noinline" + "loopinfo" + # Identifier for a value which is only assigned once + "SSAValue" + # Scope expressions `(hygienic_scope ex s)` mean `ex` should be + # interpreted as being in scope `s`. + "hygienic_scope" + # Various heads harvested from flisp lowering. + # (TODO: May or may not need all these - assess later) + "break_block" + "scope_block" + "local_def" + "_while" + "_do_while" + "with_static_parameters" + "top" + "core" + "toplevel_butfirst" + "thunk" + "lambda" + "moved_local" + "the_exception" + "foreigncall" + "new" + "globalref" + "outerref" + "enter" + "leave" + "label" + "goto" + "gotoifnot" + "trycatchelse" + "tryfinally" + "method" + "slot" + "unnecessary" + "decl" + "END_LOWERING_KINDS" + ]) +end From b4bd1fe2ca17044dbde6257ab266b4ec841cea92 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sun, 21 Apr 2024 06:45:19 +1000 Subject: [PATCH 0740/1109] @ ast macro to make constructing syntax fragments simpler --- JuliaLowering/src/JuliaLowering.jl | 1 + JuliaLowering/src/ast.jl | 235 ++++++++++++++++++++++++++++ JuliaLowering/src/desugaring.jl | 224 ++++++++++---------------- JuliaLowering/src/linear_ir.jl | 2 +- JuliaLowering/src/scope_analysis.jl | 6 +- JuliaLowering/src/syntax_graph.jl | 42 +++-- JuliaLowering/test/lowering.jl | 33 ++-- JuliaLowering/test/runtests.jl | 44 +++++- 8 files changed, 412 insertions(+), 175 deletions(-) create mode 100644 JuliaLowering/src/ast.jl diff --git a/JuliaLowering/src/JuliaLowering.jl b/JuliaLowering/src/JuliaLowering.jl index 0bcb47d518f05..2505344c819ec 100644 --- a/JuliaLowering/src/JuliaLowering.jl +++ b/JuliaLowering/src/JuliaLowering.jl @@ -12,6 +12,7 @@ include("kinds.jl") _insert_kinds() include("syntax_graph.jl") +include("ast.jl") include("utils.jl") abstract type AbstractLoweringContext end diff --git a/JuliaLowering/src/ast.jl b/JuliaLowering/src/ast.jl new file mode 100644 index 0000000000000..26fcddd0acbc9 --- /dev/null +++ b/JuliaLowering/src/ast.jl @@ -0,0 +1,235 @@ +# AST creation utilities +_node_id(ex::NodeId) = ex +_node_id(ex::SyntaxTree) = ex.id + +_node_ids() = () +_node_ids(::Nothing, cs...) = _node_ids(cs...) +_node_ids(c, cs...) = (_node_id(c), _node_ids(cs...)...) + +function _makenode(graph::SyntaxGraph, srcref, head, children; attrs...) + id = newnode!(graph) + # TODO: Having this list seeems hacky? Use makeleaf everywhere instead. + if isnothing(children) || kind(head) in (K"Identifier", K"core", K"top", K"SSAValue", K"Value", K"slot") || is_literal(head) + @assert isnothing(children) || length(children) == 0 + else + setchildren!(graph, id, children) + end + srcref_attr = srcref isa SyntaxTree ? srcref.id : srcref + setattr!(graph, id; source=srcref_attr, attrs...) + sethead!(graph, id, head) + return SyntaxTree(graph, id) +end + +function makenode(ctx, srcref, head, children::SyntaxTree...; attrs...) + _makenode(syntax_graph(ctx), srcref, head, _node_ids(children...); attrs...) +end + +function makenode(ctx::Union{AbstractLoweringContext,SyntaxTree}, + srcref, head, children::SyntaxList; attrs...) + graph = syntax_graph(ctx) + syntax_graph(ctx) === syntax_graph(children) || error("Mismatching graphs") + _makenode(graph, srcref, head, children.ids; attrs...) +end + +function makeleaf(ctx, srcref, kind, value; kws...) + graph = syntax_graph(ctx) + if kind == K"Identifier" || kind == K"core" || kind == K"top" + _makenode(graph, srcref, kind, nothing; name_val=value, kws...) + elseif kind == K"SSAValue" + _makenode(graph, srcref, kind, nothing; var_id=value, kws...) + else + val = kind == K"Integer" ? convert(Int, value) : + kind == K"Float" ? convert(Float64, value) : + kind == K"String" ? convert(String, value) : + kind == K"Symbol" ? convert(String, value) : + kind == K"Char" ? convert(Char, value) : + kind == K"Value" ? value : + error("Unexpected leaf kind `$kind`") + _makenode(graph, srcref, kind, nothing; value=val, kws...) + end +end + +function makeleaf(ctx, srcref, kind) + _makenode(syntax_graph(ctx), srcref, kind, nothing) +end + + +function _match_kind_ex(defs, srcref, ex) + kws = [] + if Meta.isexpr(ex, :call) + kind = esc(ex.args[1]) + args = ex.args[2:end] + if Meta.isexpr(args[1], :parameters) + kws = map(esc, args[1].args) + popfirst!(args) + end + while length(args) >= 1 && Meta.isexpr(args[end], :kw) + pushfirst!(kws, esc(pop!(args))) + end + if length(args) == 1 + srcref = Symbol("srcref_$(length(defs))") + ref_ex = if Meta.isexpr(args[1], :macrocall) && args[1].args[1] == Symbol("@HERE") + QuoteNode(args[1].args[2]) + else + esc(args[1]) + end + push!(defs, :($srcref = $ref_ex)) + elseif length(args) > 1 + error("Unexpected: extra srcref argument in `$ex`?") + end + else + kind = esc(ex) + end + kind, srcref, kws +end + +function _expand_ast_tree(defs, ctx, srcref, tree) + if Meta.isexpr(tree, :(::)) + # Leaf node + kind, srcref, kws = _match_kind_ex(defs, srcref, tree.args[2]) + :(makeleaf($ctx, $srcref, $kind, $(esc(tree.args[1])), $(kws...))) + elseif Meta.isexpr(tree, (:vcat, :hcat)) + # Interior node + flatargs = [] + for a in tree.args + if Meta.isexpr(a, :row) + append!(flagargs, a.args) + else + push!(flatargs, a) + end + end + kind, srcref, kws = _match_kind_ex(defs, srcref, flatargs[1]) + children = map(a->_expand_ast_tree(defs, ctx, srcref, a), flatargs[2:end]) + :(makenode($ctx, $srcref, $kind, $(children...), $(kws...))) + elseif Meta.isexpr(tree, :(=)) + lhs = esc(tree.args[1]) + rhs = _expand_ast_tree(defs, ctx, srcref, tree.args[2]) + ssadef = Symbol("ssadef$(length(defs))") + push!(defs, :(($lhs, $ssadef) = assign_tmp($ctx, $rhs))) + ssadef + elseif Meta.isexpr(tree, :if) + Expr(:if, esc(tree.args[1]), + map(a->_expand_ast_tree(defs, ctx, srcref, a), tree.args[2:end])...) + elseif Meta.isexpr(tree, (:block, :tuple)) + Expr(tree.head, map(a->_expand_ast_tree(defs, ctx, srcref, a), tree.args)...) + else + esc(tree) + end +end + +""" + @ast ctx srcref tree + +Syntactic s-expression shorthand for constructing a `SyntaxTree` AST. + +* `ctx` - SyntaxGraph context +* `srcref` - Reference to the source code from which this AST was derived. + +The `tree` contains syntax of the following forms: +* `[kind child₁ child₂]` - construct an interior node with children +* `value :: kind` - construct a leaf node +* `var=ex` - Set `var=ssavar(...)` and return an assignment node `\$var=ex`. + `var` may be used outside `@ast` +* `cond ? ex1 : ex2` - Conditional; `ex1` and `ex2` will be recursively expanded. + `if ... end` and `if ... else ... end` also work with this. + +Any `kind` can be replaced with an expression of the form +* `kind(srcref)` - override the source reference for this node and its children +* `kind(attr=val)` - set an additional attribute +* `kind(srcref; attr₁=val₁, attr₂=val₂)` - the general form + +In any place `srcref` is used, the special form `@HERE` can be used to instead +to indicate that the "primary" location of the source is the location where +`@HERE` occurs. + + +# Examples + +``` +@ast ctx srcref [ + K"toplevel" + [K"using" + [K"importpath" + "Base" ::K"Identifier"(src) + ] + ] + [K"function" + [K"call" + "eval" ::K"Identifier" + "x" ::K"Identifier" + ] + [K"call" + "eval" ::K"core" + mn.name_val ::K"Identifier" + "x" ::K"Identifier" + ] + ] +] +``` +""" +macro ast(ctx, srcref, tree) + defs = [] + push!(defs, :(ctx = $(esc(ctx)))) + push!(defs, :(srcref = $(esc(srcref)))) + ex = _expand_ast_tree(defs, :ctx, :srcref, tree) + quote + $(defs...) + $ex + end +end + +function mapchildren(f, ctx, ex) + if haschildren(ex) + cs = SyntaxList(ctx) + for e in children(ex) + push!(cs, f(e)) + end + ex2 = makenode(ctx, ex, head(ex), cs) + else + ex2 = makeleaf(ctx, ex, head(ex)) + end + # Copy all attributes. + # TODO: Make this type stable and efficient + for v in values(ex.graph.attributes) + if haskey(v, ex.id) + v[ex2.id] = v[ex.id] + end + end + return ex2 +end + +# Convenience functions to create leaf nodes referring to identifiers within +# the Core and Top modules. +core_ref(ctx, ex, name) = makeleaf(ctx, ex, K"core", name) +Any_type(ctx, ex) = core_ref(ctx, ex, "Any") +svec_type(ctx, ex) = core_ref(ctx, ex, "svec") +nothing_(ctx, ex) = core_ref(ctx, ex, "nothing") +unused(ctx, ex) = core_ref(ctx, ex, "UNUSED") + +top_ref(ctx, ex, name) = makeleaf(ctx, ex, K"top", name) + +#------------------------------------------------------------------------------- +function syntax_graph(ctx::AbstractLoweringContext) + ctx.graph +end + +function new_var_id(ctx::AbstractLoweringContext) + id = ctx.next_var_id[] + ctx.next_var_id[] += 1 + return id +end + +# Create a new SSA variable +function ssavar(ctx::AbstractLoweringContext, srcref) + id = makenode(ctx, srcref, K"SSAValue", var_id=new_var_id(ctx)) + return id +end + +# Assign `ex` to an SSA variable. +# Return (variable, assignment_node) +function assign_tmp(ctx::AbstractLoweringContext, ex) + var = ssavar(ctx, ex) + assign_var = makenode(ctx, ex, K"=", var, ex) + var, assign_var +end + diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 44c857227ac16..d5f469451e26f 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -33,91 +33,6 @@ function DesugaringContext(ctx) end #------------------------------------------------------------------------------- -# AST creation utilities -_node_id(ex::NodeId) = ex -_node_id(ex::SyntaxTree) = ex.id - -_node_ids() = () -_node_ids(c, cs...) = (_node_id(c), _node_ids(cs...)...) - -function _makenode(graph::SyntaxGraph, srcref, head, children; attrs...) - id = newnode!(graph) - # TODO: Having this list of kinds seeems hacky? - if kind(head) in (K"Identifier", K"core", K"top", K"SSAValue", K"Value", K"slot") || is_literal(head) - @assert length(children) == 0 - else - setchildren!(graph, id, children) - end - setattr!(graph, id; source=srcref.id, attrs...) - sethead!(graph, id, head) - return SyntaxTree(graph, id) -end - -function makenode(graph::SyntaxGraph, srcref, head, children...; attrs...) - _makenode(graph, srcref, head, children; attrs...) -end - -function makenode(ctx::Union{AbstractLoweringContext,SyntaxTree}, - srcref, head, children::SyntaxTree...; attrs...) - _makenode(ctx.graph, srcref, head, _node_ids(children...); attrs...) -end - -function makenode(ctx::Union{AbstractLoweringContext,SyntaxTree}, - srcref, head, children::SyntaxList; attrs...) - ctx.graph === children.graph || error("Mismatching graphs") - _makenode(ctx.graph, srcref, head, children.ids; attrs...) -end - -function mapchildren(f, ctx, ex) - cs = SyntaxList(ctx) - for e in children(ex) - push!(cs, f(e)) - end - ex2 = makenode(ctx, ex, head(ex), cs) - # Copy all attributes. - # TODO: Make this type stable and efficient - for v in values(ex.graph.attributes) - if haskey(v, ex.id) - v[ex2.id] = v[ex.id] - end - end - return ex2 -end - -function syntax_graph(ctx::AbstractLoweringContext) - ctx.graph -end - -function new_var_id(ctx::AbstractLoweringContext) - id = ctx.next_var_id[] - ctx.next_var_id[] += 1 - return id -end - -# Create a new SSA variable -function ssavar(ctx::AbstractLoweringContext, srcref) - id = makenode(ctx, srcref, K"SSAValue", var_id=new_var_id(ctx)) - return id -end - -# Assign `ex` to an SSA variable. -# Return (variable, assignment_node) -function assign_tmp(ctx::AbstractLoweringContext, ex) - var = ssavar(ctx, ex) - assign_var = makenode(ctx, ex, K"=", var, ex) - var, assign_var -end - -# Convenience functions to create leaf nodes referring to identifiers within -# the Core and Top modules. -core_ref(ctx, ex, name) = makenode(ctx, ex, K"core", name_val=name) -Any_type(ctx, ex) = core_ref(ctx, ex, "Any") -svec_type(ctx, ex) = core_ref(ctx, ex, "svec") -nothing_(ctx, ex) = core_ref(ctx, ex, "nothing") -unused(ctx, ex) = core_ref(ctx, ex, "UNUSED") - -top_ref(ctx, ex, name) = makenode(ctx, ex, K"top", name_val=name) - #------------------------------------------------------------------------------- # Predicates and accessors working on expression trees @@ -176,7 +91,22 @@ function assigned_name(ex) end #------------------------------------------------------------------------------- -# Lowering Pass 1 - basic desugaring +# TODO: Lowering pass 1.1: +# Aim of this pass is to do some super simple normalizations to make +# desugaring-proper easier to write. The kinds of things like identifier +# normalization which would require extra logic to pervade the remaining +# desugaring. +# +# * Identifier normalization +# - Strip var"" +# - Operator -> Identifier if necessary +# * Strip "container" nodes +# - K"char" +# - K"parens" nodes +# * Quasiquote expansion + +#------------------------------------------------------------------------------- +# Lowering Pass 1.2 - desugaring function expand_assignment(ctx, ex) end @@ -193,31 +123,32 @@ function expand_let(ctx, ex) scope_type = get(ex, :scope_type, :hard) blk = ex[2] if numchildren(ex[1]) == 0 # TODO: Want to use !haschildren(ex[1]) but this doesn't work... - return makenode(ctx, ex, K"block", blk; - scope_type=scope_type) + return @ast ctx ex [K"scope_block"(scope_type=scope_type) blk] end for binding in Iterators.reverse(children(ex[1])) kb = kind(binding) if is_sym_decl(kb) - blk = makenode(ctx, ex, K"block", - makenode(ctx, ex, K"local", binding), - blk; - scope_type=scope_type - ) + blk = @ast ctx ex [ + K"scope_block"(scope_type=scope_type) + [K"local" binding] + blk + ] elseif kb == K"=" && numchildren(binding) == 2 lhs = binding[1] rhs = binding[2] if is_sym_decl(lhs) - tmp, tmpdef = assign_tmp(ctx, rhs) - blk = makenode(ctx, binding, K"block", - tmpdef, - makenode(ctx, ex, K"block", - makenode(ctx, lhs, K"local_def", lhs), # TODO: Use K"local" with attr? - makenode(ctx, rhs, K"=", decl_var(lhs), tmp), - blk; - scope_type=scope_type - ) - ) + blk = @ast ctx binding [ + K"block" + tmp=rhs + [K"scope_block"(ex, scope_type=scope_type) + [K"local_def"(lhs) lhs] # TODO: Use K"local" with attr? + [K"="(rhs) + decl_var(lhs) + tmp + ] + blk + ] + ] else TODO("Functions and multiple assignment") end @@ -235,7 +166,7 @@ function expand_call(ctx, ex) cs[1], cs[2] = cs[2], cs[1] end # TODO: keywords - makenode(ctx, ex, K"call", cs...) + @ast ctx ex [K"call" cs...] end # Strip variable type declarations from within a `local` or `global`, returning @@ -347,7 +278,7 @@ function expand_function_def(ctx, ex) if !is_valid_name(name) throw(LoweringError(name, "Invalid function name")) end - return makenode(ctx, ex, K"method", identifier_name(name)) + return @ast ctx ex [K"method" name] elseif kind(name) == K"call" callex = name body = ex[2] @@ -364,9 +295,10 @@ function expand_function_def(ctx, ex) name = name[1] if kind(name) == K"::" if numchildren(name) == 1 - farg = makenode(ctx, name, K"::", - makenode(ctx, name, K"Identifier", name_val="#self#"), - name[1]) + farg = @ast ctx name [K"::" + "#self#"::K"Identifier" + name[1] + ] else TODO("Fixme type") farg = name @@ -376,9 +308,13 @@ function expand_function_def(ctx, ex) if !is_valid_name(name) throw(LoweringError(name, "Invalid function name")) end - farg = makenode(ctx, name, K"::", - makenode(ctx, name, K"Identifier", name_val="#self#"), - makenode(ctx, name, K"call", core_ref(ctx, name, "Typeof"), name)) + farg = @ast ctx name [K"::" + "#self#"::K"Identifier" + [K"call" + "Typeof"::K"core" + name + ] + ] function_name = name end args = pushfirst!(collect(args), farg) @@ -400,37 +336,45 @@ function expand_function_def(ctx, ex) if i != length(args) throw(LoweringError(arg, "`...` may only be used for the last function argument")) end - atype = makenode(K"curly", core_ref(ctx, arg, "Vararg"), arg) + atype = @ast ctx arg [K"curly" "Vararg"::K"core" arg] end push!(arg_types, atype) end - preamble = makenode(ctx, ex, K"call", - svec_type(ctx, callex), - makenode(ctx, callex, K"call", - svec_type(ctx, name), - arg_types...), - makenode(ctx, callex, K"call", - svec_type(ctx, name)), # FIXME sparams - makenode(ctx, callex, K"Value", value=QuoteNode(source_location(LineNumberNode, callex))) - ) + preamble = @ast ctx callex [ + K"call" + "svec" ::K"core" + [K"call" + "svec" ::K"core" + arg_types... + ] + [K"call" + "svec" ::K"core" + # FIXME sparams + ] + QuoteNode(source_location(LineNumberNode, callex))::K"Value" + ] if !isnothing(return_type) - ret_var, ret_assign = assign_tmp(ctx, return_type) - body = makenode(ctx, body, K"block", - ret_assign, - body) + body = @ast ctx body [ + K"block" + ret_var=return_type + body + ] else ret_var = nothing end - lambda = makenode(ctx, body, K"lambda", body, - lambda_info=LambdaInfo(arg_names, static_parameters, ret_var, false)) - makenode(ctx, ex, K"block", - makenode(ctx, ex, K"method", function_name), - makenode(ctx, ex, K"method", - function_name, - preamble, - lambda), - makenode(ctx, ex, K"unnecessary", function_name)) + @ast ctx ex [ + K"block" + [K"method" function_name] + [K"method" + function_name + preamble + [K"lambda"(body, lambda_info=LambdaInfo(arg_names, static_parameters, ret_var, false)) + body + ] + ] + [K"unnecessary" function_name] + ] elseif kind(name) == K"tuple" TODO(name, "Anon function lowering") else @@ -454,7 +398,7 @@ function expand_forms(ctx::DesugaringContext, ex::SyntaxTree) expand_forms(ctx, expand_decls(ctx, ex)) # FIXME end elseif is_operator(k) && !haschildren(ex) - makenode(ctx, ex, K"Identifier", name_val=ex.name_val) + makeleaf(ctx, ex, K"Identifier", ex.name_val) elseif k == K"char" || k == K"var" @chk numchildren(ex) == 1 ex[1] @@ -462,11 +406,17 @@ function expand_forms(ctx::DesugaringContext, ex::SyntaxTree) if numchildren(ex) == 1 && kind(ex[1]) == K"String" ex[1] else - makenode(ctx, ex, K"call", top_ref(ctx, ex, "string"), expand_forms(ctx, children(ex))...) + @ast ctx ex [K"call" + "string"::K"top" + expand_forms(ctx, children(ex))... + ] end elseif k == K"tuple" # TODO: named tuples - makenode(ctx, ex, K"call", core_ref(ctx, ex, "tuple"), expand_forms(ctx, children(ex))...) + @ast ctx ex [K"call" + "tuple"::K"core" + expand_forms(ctx, children(ex))... + ] elseif !haschildren(ex) ex else diff --git a/JuliaLowering/src/linear_ir.jl b/JuliaLowering/src/linear_ir.jl index b9483658e4d75..2645b0226c107 100644 --- a/JuliaLowering/src/linear_ir.jl +++ b/JuliaLowering/src/linear_ir.jl @@ -197,7 +197,7 @@ function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) else emit_assignment(ctx, ex, lhs, rhs) end - elseif k == K"block" + elseif k == K"block" || k == K"scope_block" nc = numchildren(ex) for i in 1:nc islast = i == nc diff --git a/JuliaLowering/src/scope_analysis.jl b/JuliaLowering/src/scope_analysis.jl index 26a29073e51cf..fc837dd7236b9 100644 --- a/JuliaLowering/src/scope_analysis.jl +++ b/JuliaLowering/src/scope_analysis.jl @@ -53,8 +53,8 @@ function _find_scope_vars!(assignments, locals, globals, used_names, ex) k = kind(ex) if k == K"Identifier" push!(used_names, ex.name_val) - elseif !haschildren(ex) || hasattr(ex, :scope_type) || is_quoted(k) || - k in KSet"lambda module toplevel" + elseif !haschildren(ex) || is_quoted(k) || + k in KSet"scope_block lambda module toplevel" return elseif k == K"local" || k == K"local_def" name = ex[1].name_val @@ -339,7 +339,7 @@ function _resolve_scopes!(ctx, ex) end pop!(ctx.scope_stack) setattr!(ctx.graph, ex.id, lambda_locals=scope.lambda_locals) - elseif k == K"block" && hasattr(ex, :scope_type) + elseif k == K"scope_block" scope = make_scope(ctx, ex, ex.scope_type, nothing) push!(ctx.scope_stack, scope) for e in children(ex) diff --git a/JuliaLowering/src/syntax_graph.jl b/JuliaLowering/src/syntax_graph.jl index 953d842d762b2..22b81ae81afcb 100644 --- a/JuliaLowering/src/syntax_graph.jl +++ b/JuliaLowering/src/syntax_graph.jl @@ -44,6 +44,7 @@ function ensure_attributes!(graph::SyntaxGraph; kws...) graph.attributes[k] = Dict{NodeId,v}() end end + graph end function ensure_attributes(graph::SyntaxGraph; kws...) @@ -244,24 +245,33 @@ JuliaSyntax.filename(src::SourceRef) = filename(src.file) JuliaSyntax.source_location(::Type{LineNumberNode}, src::SourceRef) = source_location(LineNumberNode, src.file, src.first_byte) JuliaSyntax.source_location(src::SourceRef) = source_location(src.file, src.first_byte) +# TODO: Adding these methods to support LineNumberNode is kind of hacky but we +# can remove these after JuliaLowering becomes self-bootstrapping for macros +# and we a proper SourceRef for @ast's @HERE form. +JuliaSyntax.first_byte(src::LineNumberNode) = 0 +JuliaSyntax.last_byte(src::LineNumberNode) = 0 +JuliaSyntax.filename(src::LineNumberNode) = string(src.file) +JuliaSyntax.source_location(::Type{LineNumberNode}, src::LineNumberNode) = src +JuliaSyntax.source_location(src::LineNumberNode) = (src.line, 0) + function Base.show(io::IO, ::MIME"text/plain", src::SourceRef) highlight(io, src.file, first_byte(src):last_byte(src), note="these are the bytes you're looking for 😊", context_lines_inner=20) end function sourceref(tree::SyntaxTree) sources = tree.graph.source - id = tree.id + id::NodeId = tree.id while true - s = sources[id] - if s isa SourceRef - return s + s = get(sources, id, nothing) + if s isa NodeId + id = s else - id = s::NodeId + return s end end end -JuliaSyntax.filename(tree::SyntaxTree) = return filename(sourceref(tree)) +JuliaSyntax.filename(tree::SyntaxTree) = filename(sourceref(tree)) JuliaSyntax.source_location(::Type{LineNumberNode}, tree::SyntaxTree) = source_location(LineNumberNode, sourceref(tree)) JuliaSyntax.source_location(tree::SyntaxTree) = source_location(sourceref(tree)) JuliaSyntax.first_byte(tree::SyntaxTree) = first_byte(sourceref(tree)) @@ -305,12 +315,13 @@ function _value_string(ex) end function _show_syntax_tree(io, current_filename, node, indent, show_byte_offsets) - if hasattr(node, :source) - fname = filename(node) - line, col = source_location(node) + sr = sourceref(node) + if !isnothing(sr) + fname = filename(sr) + line, col = source_location(sr) posstr = "$(lpad(line, 4)):$(rpad(col,3))" if show_byte_offsets - posstr *= "│$(lpad(first_byte(node),6)):$(rpad(last_byte(node),6))" + posstr *= "│$(lpad(first_byte(sr),6)):$(rpad(last_byte(sr),6))" end else fname = nothing @@ -389,6 +400,11 @@ end syntax_graph(ex::SyntaxTree) = ex.graph +function JuliaSyntax.build_tree(::Type{SyntaxTree}, stream::JuliaSyntax.ParseStream; kws...) + SyntaxTree(JuliaSyntax.build_tree(SyntaxNode, stream; kws...)) +end + + #------------------------------------------------------------------------------- # Lightweight vector of nodes ids with associated pointer to graph stored separately. struct SyntaxList{GraphType, NodeIdVecType} <: AbstractVector{SyntaxTree} @@ -446,9 +462,3 @@ function Base.pop!(v::SyntaxList) SyntaxTree(v.graph, pop!(v.ids)) end -#------------------------------------------------------------------------------- - -function JuliaSyntax.build_tree(::Type{SyntaxTree}, stream::JuliaSyntax.ParseStream; kws...) - SyntaxTree(JuliaSyntax.build_tree(SyntaxNode, stream; kws...)) -end - diff --git a/JuliaLowering/test/lowering.jl b/JuliaLowering/test/lowering.jl index 7dd4eaefd0e2a..3b17cee0f6eb8 100644 --- a/JuliaLowering/test/lowering.jl +++ b/JuliaLowering/test/lowering.jl @@ -29,25 +29,25 @@ end src = """ let - y = 1 - x = 2 - let x = 3 - y = x + 1 + y = 0 + x = 1 + let x = x + 1 + y = x end (x, y) end """ -src = """ -begin - function f(x) - y = x + 1 - "hello world", x, y - end - - f(1) -end -""" +# src = """ +# begin +# function f(x) +# y = x + 1 +# "hello world", x, y +# end +# +# f(1) +# end +# """ # src = """ # x = 1 @@ -71,11 +71,12 @@ ctx2, ex_scoped = JuliaLowering.resolve_scopes!(ctx, in_mod, ex_desugar) ctx3, ex_compiled = JuliaLowering.linearize_ir(ctx2, ex_scoped) @info "Linear IR" ex_compiled -ex_expr = JuliaLowering.to_expr(in_mod, ctx2.var_info, ex_compiled) +ex_expr = JuliaLowering.to_lowered_expr(in_mod, ctx2.var_info, ex_compiled) @info "CodeInfo" ex_expr x = 100 y = 200 -@info "Eval" Base.eval(in_mod, ex_expr) +eval_result = Base.eval(in_mod, ex_expr) +@info "Eval" eval_result # Syntax tree ideas: Want following to work? # This can be fully inferrable! diff --git a/JuliaLowering/test/runtests.jl b/JuliaLowering/test/runtests.jl index b9824e61b277c..89a2365914450 100644 --- a/JuliaLowering/test/runtests.jl +++ b/JuliaLowering/test/runtests.jl @@ -1,6 +1,46 @@ -using JuliaLowering using Test +using JuliaLowering +using JuliaLowering: @ast + @testset "JuliaLowering.jl" begin - # Write your tests here. + +# Basic end-to-end / smoke tests + +test_mod = Module() + +@test JuliaLowering.include_string(test_mod, +""" +let + y = 0 + x = 1 + let x = x + 1 + y = x + end + (x, y) +end +""") == (1, 2) + + +@test JuliaLowering.include_string(test_mod, """ +begin + function f(x) + y = x + 1 + "hi", x, y + end + + f(1) +end +""") == ("hi", 1, 2) + + +JuliaLowering.include_string(test_mod, """ + x = 101 + y = 202 +""") +@test test_mod.x == 101 +@test test_mod.y == 202 +@test JuliaLowering.include_string(test_mod, "x + y") == 303 + + end From 032a926bede5d6cf40d93797463e2d775a1e2be7 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Mon, 22 Apr 2024 11:36:57 +1000 Subject: [PATCH 0741/1109] Cleanup `eval()` and `include()` implementations * Extends `Core.eval` rather than having our own separate `eval2` * Use `include` rather than `include2` as the public API --- JuliaLowering/src/JuliaLowering.jl | 29 +++++++++------- JuliaLowering/src/eval.jl | 53 +++++++++++++++++------------- JuliaLowering/src/linear_ir.jl | 4 +-- 3 files changed, 50 insertions(+), 36 deletions(-) diff --git a/JuliaLowering/src/JuliaLowering.jl b/JuliaLowering/src/JuliaLowering.jl index 2505344c819ec..1e7581d4835d8 100644 --- a/JuliaLowering/src/JuliaLowering.jl +++ b/JuliaLowering/src/JuliaLowering.jl @@ -1,4 +1,11 @@ -module JuliaLowering +baremodule JuliaLowering + +# ^ Use baremodule because we're implementing `Base.include` and `Core.eval`. +using Base +# We define a separate _include() for use in this module to avoid mixing method +# tables with the public `JuliaLowering.include()` API +_include(path::AbstractString) = Base.include(JuliaLowering, path) +using Core: eval using JuliaSyntax @@ -8,20 +15,20 @@ using JuliaSyntax: filename, first_byte, last_byte, source_location, span using JuliaSyntax: is_literal, is_number, is_operator, is_prec_assignment, is_infix_op_call, is_postfix_op_call, is_error -include("kinds.jl") -_insert_kinds() +abstract type AbstractLoweringContext end -include("syntax_graph.jl") -include("ast.jl") -include("utils.jl") +_include("kinds.jl") +_insert_kinds() -abstract type AbstractLoweringContext end +_include("syntax_graph.jl") +_include("ast.jl") +_include("utils.jl") -include("desugaring.jl") -include("scope_analysis.jl") -include("linear_ir.jl") +_include("desugaring.jl") +_include("scope_analysis.jl") +_include("linear_ir.jl") -include("eval.jl") +_include("eval.jl") function __init__() _insert_kinds() diff --git a/JuliaLowering/src/eval.jl b/JuliaLowering/src/eval.jl index 76cc05cd1721b..8189ce95ea60b 100644 --- a/JuliaLowering/src/eval.jl +++ b/JuliaLowering/src/eval.jl @@ -129,37 +129,40 @@ function to_lowered_expr(mod, var_info, ex) end #------------------------------------------------------------------------------- -function eval2(mod, exs::AbstractVector) - res = nothing - for e in exs - res = eval2(mod, e) - end - return res -end - -# Like eval(), but uses code lowering defined by this package -function eval2(mod, ex::SyntaxTree) +# Our version of eval takes our own data structures +function Core.eval(mod::Module, ex::SyntaxTree) k = kind(ex) if k == K"toplevel" - return eval2(mod, children(ex)) - elseif k == K"module" - m2 = Module(ex[1].name_val) - eval2(m2, children(ex[2])) - return m2 + x = nothing + for e in children(ex) + x = eval(mod, e) + end + return x end linear_ir = lower(mod, ex) expr_form = to_lowered_expr(mod, linear_ir.var_info, linear_ir) - Base.eval(mod, expr_form) + eval(mod, expr_form) end -#------------------------------------------------------------------------------- -function include2(mod, filename) - path, prev = Base._include_dependency(mod, filename) +""" + include(mod::Module, path::AbstractString) + +Evaluate the contents of the input source file in the global scope of module +`mod`. Every module (except those defined with baremodule) has its own +definition of `include()` omitting the `mod` argument, which evaluates the file +in that module. Returns the result of the last evaluated expression of the +input file. During including, a task-local include path is set to the directory +containing the file. Nested calls to include will search relative to that path. +This function is typically used to load source interactively, or to combine +files in packages that are broken into multiple source files. +""" +function include(mod::Module, path::AbstractString) + path, prev = Base._include_dependency(mod, path) code = read(path, String) tls = task_local_storage() tls[:SOURCE_PATH] = path try - return include_string(mod, code; filename=path) + return include_string(mod, code, path) finally if prev === nothing delete!(tls, :SOURCE_PATH) @@ -169,8 +172,12 @@ function include2(mod, filename) end end -function include_string(mod, str; filename=nothing) - eval2(mod, parseall(SyntaxTree, str; filename=filename)) -end +""" + include_string(mod::Module, code::AbstractString, filename::AbstractString="string") +Like `include`, except reads code from the given string rather than from a file. +""" +function include_string(mod::Module, code::AbstractString, filename::AbstractString="string") + eval(mod, parseall(SyntaxTree, code; filename=filename)) +end diff --git a/JuliaLowering/src/linear_ir.jl b/JuliaLowering/src/linear_ir.jl index 2645b0226c107..4e9bafbd17a8f 100644 --- a/JuliaLowering/src/linear_ir.jl +++ b/JuliaLowering/src/linear_ir.jl @@ -261,11 +261,11 @@ function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) nothing elseif k == K"module" || k == K"toplevel" # Both these forms can't be lowered here; they need to just be quoted - # and passed through to a call to eval2. + # and passed through to a call to eval. # TODO: Is compile() the right place to do this? # TODO: Restrict to toplevel only call = makenode(ctx, ex, K"call", - makenode(ctx, ex, K"Value", JuliaLowering.eval2), + makenode(ctx, ex, K"Value", JuliaLowering.eval), makenode(ctx, ex, K"Value", ex)) compile(ctx, call, needs_value, in_tail_pos) elseif k == K"local_def" || k == K"local" From a451a8ca9e10308a1264cfe862b63b9a91cd07c9 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Mon, 22 Apr 2024 15:49:00 +1000 Subject: [PATCH 0742/1109] Improve lowering of module / toplevel / import / using Lower `module`, `import` and `using` to an explicit runtime function call rather than a bare expression. This imperative approach allows us to remove module, import and using expressions from the lowered IR. As part of this the `import` and `using` expressions have their syntax checked by lowering rather than later in the runtime. Lowering of `module` results in a quoted `toplevel` expression for later consumption by `eval`. This expression also includes the standard definitions of `eval()` and `import()` rather than having these defined by a special lowering step at runtime. --- JuliaLowering/src/ast.jl | 20 ++- JuliaLowering/src/desugaring.jl | 177 +++++++++++++++++++- JuliaLowering/src/eval.jl | 123 +++++++++++++- JuliaLowering/src/kinds.jl | 3 - JuliaLowering/src/linear_ir.jl | 18 +- JuliaLowering/src/scope_analysis.jl | 8 +- JuliaLowering/src/syntax_graph.jl | 10 +- JuliaLowering/src/utils.jl | 54 +----- JuliaLowering/test/{lowering.jl => demo.jl} | 35 +++- JuliaLowering/test/runtests.jl | 49 ++++++ 10 files changed, 402 insertions(+), 95 deletions(-) rename JuliaLowering/test/{lowering.jl => demo.jl} (75%) diff --git a/JuliaLowering/src/ast.jl b/JuliaLowering/src/ast.jl index 26fcddd0acbc9..10852db89b000 100644 --- a/JuliaLowering/src/ast.jl +++ b/JuliaLowering/src/ast.jl @@ -20,7 +20,7 @@ function _makenode(graph::SyntaxGraph, srcref, head, children; attrs...) return SyntaxTree(graph, id) end -function makenode(ctx, srcref, head, children::SyntaxTree...; attrs...) +function makenode(ctx, srcref, head, children::Union{Nothing,SyntaxTree}...; attrs...) _makenode(syntax_graph(ctx), srcref, head, _node_ids(children...); attrs...) end @@ -53,6 +53,13 @@ function makeleaf(ctx, srcref, kind) _makenode(syntax_graph(ctx), srcref, kind, nothing) end +function _match_srcref(ex) + if Meta.isexpr(ex, :macrocall) && ex.args[1] == Symbol("@HERE") + QuoteNode(ex.args[2]) + else + esc(ex) + end +end function _match_kind_ex(defs, srcref, ex) kws = [] @@ -68,12 +75,7 @@ function _match_kind_ex(defs, srcref, ex) end if length(args) == 1 srcref = Symbol("srcref_$(length(defs))") - ref_ex = if Meta.isexpr(args[1], :macrocall) && args[1].args[1] == Symbol("@HERE") - QuoteNode(args[1].args[2]) - else - esc(args[1]) - end - push!(defs, :($srcref = $ref_ex)) + push!(defs, :($srcref = $(_match_srcref(args[1])))) elseif length(args) > 1 error("Unexpected: extra srcref argument in `$ex`?") end @@ -138,7 +140,7 @@ Any `kind` can be replaced with an expression of the form * `kind(attr=val)` - set an additional attribute * `kind(srcref; attr₁=val₁, attr₂=val₂)` - the general form -In any place `srcref` is used, the special form `@HERE` can be used to instead +In any place `srcref` is used, the special form `@HERE()` can be used to instead to indicate that the "primary" location of the source is the location where `@HERE` occurs. @@ -170,7 +172,7 @@ to indicate that the "primary" location of the source is the location where macro ast(ctx, srcref, tree) defs = [] push!(defs, :(ctx = $(esc(ctx)))) - push!(defs, :(srcref = $(esc(srcref)))) + push!(defs, :(srcref = $(_match_srcref(srcref)))) ex = _expand_ast_tree(defs, :ctx, :srcref, tree) quote $(defs...) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index d5f469451e26f..bc8d517406674 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -18,18 +18,19 @@ end struct DesugaringContext{GraphType} <: AbstractLoweringContext graph::GraphType next_var_id::Ref{VarId} + mod::Module end -function DesugaringContext(ctx) +function DesugaringContext(ctx, mod) graph = syntax_graph(ctx) ensure_attributes!(graph, kind=Kind, syntax_flags=UInt16, green_tree=GreenNode, - source_pos=Int, source=Union{SourceRef,NodeId}, + source_pos=Int, source=SourceAttrType, value=Any, name_val=String, scope_type=Symbol, # :hard or :soft var_id=VarId, lambda_info=LambdaInfo) - DesugaringContext(freeze_attrs(graph), Ref{VarId}(1)) + DesugaringContext(freeze_attrs(graph), Ref{VarId}(1), mod) end #------------------------------------------------------------------------------- @@ -382,6 +383,154 @@ function expand_function_def(ctx, ex) end end +function _append_importpath(ctx, path_spec, path) + prev_was_dot = true + for component in children(path) + k = kind(component) + if k == K"quote" + # Permit quoted path components as in + # import A.(:b).:c + component = component[1] + end + @chk kind(component) in (K"Identifier", K".") + name = component.name_val + is_dot = kind(component) == K"." + if is_dot && !prev_was_dot + throw(LoweringError(component, "invalid import path: `.` in identifier path")) + end + prev_was_dot = is_dot + push!(path_spec, @ast(ctx, component, name::K"String")) + end + path_spec +end + +function expand_import(ctx, ex) + is_using = kind(ex) == K"using" + if kind(ex[1]) == K":" + # import M: x.y as z, w + # (import (: (importpath M) (as (importpath x y) z) (importpath w))) + # => + # (call module_import + # false + # (call core.svec "M") + # (call core.svec 2 "x" "y" "z" 1 "w" "w")) + @chk numchildren(ex[1]) >= 2 + from = ex[1][1] + @chk kind(from) == K"importpath" + from_path = @ast ctx from [K"call" + "svec"::K"core" + _append_importpath(ctx, SyntaxList(ctx), from)... + ] + paths = ex[1][2:end] + else + # import A.B + # (using (importpath A B)) + # (call module_import true nothing (call core.svec 1 "w")) + @chk numchildren(ex) >= 1 + from_path = nothing_(ctx, ex) + paths = children(ex) + end + path_spec = SyntaxList(ctx) + for path in paths + as_name = nothing + if kind(path) == K"as" + @chk numchildren(path) == 2 + as_name = path[2] + @chk kind(as_name) == K"Identifier" + path = path[1] + end + @chk kind(path) == K"importpath" + push!(path_spec, @ast(ctx, path, numchildren(path)::K"Integer")) + _append_importpath(ctx, path_spec, path) + push!(path_spec, isnothing(as_name) ? nothing_(ctx, ex) : + @ast(ctx, as_name, as_name.name_val::K"String")) + end + @ast ctx ex [ + K"call" + module_import ::K"Value" + ctx.mod ::K"Value" + is_using ::K"Value" + from_path + [K"call" + "svec"::K"core" + path_spec... + ] + ] +end + +function expand_module(ctx::DesugaringContext, ex::SyntaxTree) + modname_ex = ex[1] + @chk kind(modname_ex) == K"Identifier" + modname = modname_ex.name_val + + std_defs = if !has_flags(ex, JuliaSyntax.BARE_MODULE_FLAG) + @ast ctx (@HERE) [ + K"block" + [K"using" + [K"importpath" + "Base" ::K"Identifier" + ] + ] + [K"function" + [K"call" + "eval" ::K"Identifier" + "x" ::K"Identifier" + ] + [K"call" + "eval" ::K"core" + modname ::K"Identifier" + "x" ::K"Identifier" + ] + ] + [K"function" + [K"call" + "include" ::K"Identifier" + "x" ::K"Identifier" + ] + [K"call" + "_call_latest" ::K"core" + "include" ::K"top" + modname ::K"Identifier" + "x" ::K"Identifier" + ] + ] + [K"function" + [K"call" + "include" ::K"Identifier" + [K"::" + "mapexpr" ::K"Identifier" + "Function" ::K"top" + ] + "x" ::K"Identifier" + ] + [K"call" + "_call_latest" ::K"core" + "include" ::K"top" + "mapexpr" ::K"Identifier" + modname ::K"Identifier" + "x" ::K"Identifier" + ] + ] + ] + end + + body = ex[2] + @chk kind(body) == K"block" + + @ast ctx ex [ + K"call" + eval_module ::K"Value" + ctx.mod ::K"Value" + modname ::K"String" + [K"inert"(body) + [K"toplevel" + std_defs + children(body)... + ] + ] + ] +end + function expand_forms(ctx::DesugaringContext, ex::SyntaxTree) k = kind(ex) if k == K"call" @@ -417,6 +566,24 @@ function expand_forms(ctx::DesugaringContext, ex::SyntaxTree) "tuple"::K"core" expand_forms(ctx, children(ex))... ] + elseif k == K"module" + # TODO: check-toplevel + expand_module(ctx, ex) + elseif k == K"import" || k == K"using" + # TODO: check-toplevel + expand_import(ctx, ex) + elseif k == K"export" || k == K"public" + TODO(ex) + elseif k == K"toplevel" + # The toplevel form can't be lowered here - it needs to just be quoted + # and passed through to a call to eval. + # TODO: check-toplevel + @ast ctx ex [ + K"call" + eval ::K"Value" + ctx.mod ::K"Value" + [K"inert" ex] + ] elseif !haschildren(ex) ex else @@ -438,8 +605,8 @@ function expand_forms(ctx::DesugaringContext, exs::Union{Tuple,AbstractVector}) res end -function expand_forms(ex::SyntaxTree) - ctx = DesugaringContext(ex) +function expand_forms(mod::Module, ex::SyntaxTree) + ctx = DesugaringContext(ex, mod) res = expand_forms(ctx, reparent(ctx, ex)) ctx, res end diff --git a/JuliaLowering/src/eval.jl b/JuliaLowering/src/eval.jl index 8189ce95ea60b..07b94cd52e91c 100644 --- a/JuliaLowering/src/eval.jl +++ b/JuliaLowering/src/eval.jl @@ -1,13 +1,60 @@ function lower(mod, ex) - ctx1, ex1 = expand_forms(ex) - ctx2, ex2 = resolve_scopes!(ctx1, mod, ex1) + ctx1, ex1 = expand_forms(mod, ex) + ctx2, ex2 = resolve_scopes!(ctx1, ex1) ctx3, ex3 = linearize_ir(ctx2, ex2) ex3 end +# CodeInfo constructor. TODO: Should be in Core? +function _CodeInfo(code, + codelocs, + ssavaluetypes, + ssaflags, + method_for_inference_limit_heuristics, + linetable, + slotnames, + slotflags, + slottypes, + rettype, + parent, + edges, + min_world, + max_world, + inferred, + propagate_inbounds, + has_fcall, + nospecializeinfer, + inlining, + constprop, + purity, + inlining_cost) + @eval $(Expr(:new, :(Core.CodeInfo), + convert(Vector{Any}, code), + convert(Vector{Int32}, codelocs), + convert(Any, ssavaluetypes), + convert(Vector{UInt32}, ssaflags), + convert(Any, method_for_inference_limit_heuristics), + convert(Any, linetable), + convert(Vector{Symbol}, slotnames), + convert(Vector{UInt8}, slotflags), + convert(Any, slottypes), + convert(Any, rettype), + convert(Any, parent), + convert(Any, edges), + convert(UInt64, min_world), + convert(UInt64, max_world), + convert(Bool, inferred), + convert(Bool, propagate_inbounds), + convert(Bool, has_fcall), + convert(Bool, nospecializeinfer), + convert(UInt8, inlining), + convert(UInt8, constprop), + convert(UInt16, purity), + convert(UInt16, inlining_cost))) +end + # Convert SyntaxTree to the CodeInfo+Expr data stuctures understood by the # Julia runtime - function to_code_info(ex, mod, funcname, var_info, slot_rewrites) input_code = children(ex) # Convert code to Expr and record low res locations in table @@ -95,7 +142,11 @@ function to_lowered_expr(mod, var_info, ex) elseif k == K"return" Core.ReturnNode(to_lowered_expr(mod, var_info, ex[1])) elseif is_quoted(k) - TODO(ex, "Convert SyntaxTree to Expr") + if k == K"inert" + QuoteNode(ex[1]) + else + TODO(ex, "Convert SyntaxTree to Expr") + end elseif k == K"lambda" funcname = ex.lambda_info.is_toplevel_thunk ? "top-level scope" : @@ -128,6 +179,70 @@ function to_lowered_expr(mod, var_info, ex) end end +#------------------------------------------------------------------------------- +# Runtime support functions called by lowering + +# Construct new bare module including only the "default names" +# +# using Core +# const modname = modval +# public modname +# +# And run statments in the toplevel expression `body` +function eval_module(parentmod, modname, body) + # Here we just use `eval()` with an Expr. + # If we wanted to avoid this we'd need to reproduce a lot of machinery from + # jl_eval_module_expr() + # + # 1. Register / deparent toplevel modules + # 2. Set binding in parent module + # 3. Deal with replacing modules + # * Warn if replacing + # * Root old module being replaced + # 4. Run __init__ + # * Also run __init__ for any children after parent is defined + # mod = @ccall jl_new_module(Symbol(modname)::Symbol, parentmod::Module)::Any + # ... + name = Symbol(modname) + eval(parentmod, :( + baremodule $name + $eval($name, $body) + end + )) +end + +function module_import(into_mod::Module, is_using::Bool, + from_mod::Union{Nothing,Core.SimpleVector}, paths::Core.SimpleVector) + # For now, this function converts our lowered representation back to Expr + # and calls eval() to avoid replicating all of the fiddly logic in + # jl_toplevel_eval_flex. + # FIXME: ccall Julia runtime functions directly? + # * jl_module_using jl_module_use_as + # * import_module jl_module_import_as + path_args = [] + i = 1 + while i < length(paths) + nsyms = paths[i]::Int + n = i + nsyms + path = Expr(:., [Symbol(paths[i+j]::String) for j = 1:nsyms]...) + as_name = paths[i+nsyms+1] + push!(path_args, isnothing(as_name) ? path : + Expr(:as, path, Symbol(as_name))) + i += nsyms + 2 + end + ex = if isnothing(from_mod) + Expr(is_using ? :using : :import, + path_args...) + else + from_path = Expr(:., [Symbol(s::String) for s in from_mod]...) + Expr(is_using ? :using : :import, + Expr(:(:), from_path, path_args...)) + end + eval(into_mod, ex) + nothing +end + + #------------------------------------------------------------------------------- # Our version of eval takes our own data structures function Core.eval(mod::Module, ex::SyntaxTree) diff --git a/JuliaLowering/src/kinds.jl b/JuliaLowering/src/kinds.jl index 6b5759f898043..db6ed8de0026e 100644 --- a/JuliaLowering/src/kinds.jl +++ b/JuliaLowering/src/kinds.jl @@ -10,9 +10,6 @@ function _insert_kinds() # A literal Julia value of any kind, as might be inserted by the AST # during macro expansion "Value" - # Quoted symbol. Used to distinguish Symbol literals from AST - # literals of kind K"Identifier" - "Symbol" # TODO: Use `meta` for inbounds and loopinfo etc? "inbounds" "inline" diff --git a/JuliaLowering/src/linear_ir.jl b/JuliaLowering/src/linear_ir.jl index 4e9bafbd17a8f..e2ddabc9c25f0 100644 --- a/JuliaLowering/src/linear_ir.jl +++ b/JuliaLowering/src/linear_ir.jl @@ -32,14 +32,15 @@ struct LinearIRContext{GraphType} <: AbstractLoweringContext graph::GraphType code::SyntaxList{GraphType, Vector{NodeId}} next_var_id::Ref{Int} + is_toplevel_thunk::Bool return_type::Union{Nothing,NodeId} var_info::Dict{VarId,VarInfo} mod::Module end -function LinearIRContext(ctx, return_type) +function LinearIRContext(ctx, is_toplevel_thunk, return_type) LinearIRContext(ctx.graph, SyntaxList(ctx.graph), ctx.next_var_id, - return_type, ctx.var_info, ctx.mod) + is_toplevel_thunk, return_type, ctx.var_info, ctx.mod) end function is_valid_body_ir_argument(ex) @@ -259,15 +260,6 @@ function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) end emit(ctx, ex) nothing - elseif k == K"module" || k == K"toplevel" - # Both these forms can't be lowered here; they need to just be quoted - # and passed through to a call to eval. - # TODO: Is compile() the right place to do this? - # TODO: Restrict to toplevel only - call = makenode(ctx, ex, K"call", - makenode(ctx, ex, K"Value", JuliaLowering.eval), - makenode(ctx, ex, K"Value", ex)) - compile(ctx, call, needs_value, in_tail_pos) elseif k == K"local_def" || k == K"local" nothing else @@ -371,7 +363,7 @@ function compile_lambda(outer_ctx, ex) lambda_info = ex.lambda_info return_type = nothing # FIXME # TODO: Add assignments for reassigned arguments to body using lambda_info.args - ctx = LinearIRContext(outer_ctx, return_type) + ctx = LinearIRContext(outer_ctx, lambda_info.is_toplevel_thunk, return_type) compile_body(ctx, ex[1]) slot_rewrites = Dict{VarId,Int}() _add_slots!(slot_rewrites, ctx.var_info, (arg.var_id for arg in lambda_info.args)) @@ -391,7 +383,7 @@ function linearize_ir(ctx, ex) # TODO: Cleanup needed - `_ctx` is just a dummy context here. But currently # required to call reparent() ... _ctx = LinearIRContext(graph, SyntaxList(graph), ctx.next_var_id, - nothing, ctx.var_info, ctx.mod) + false, nothing, ctx.var_info, ctx.mod) res = compile_lambda(_ctx, reparent(_ctx, ex)) setattr!(graph, res.id, var_info=ctx.var_info) _ctx, res diff --git a/JuliaLowering/src/scope_analysis.jl b/JuliaLowering/src/scope_analysis.jl index fc837dd7236b9..dae51d01bd24b 100644 --- a/JuliaLowering/src/scope_analysis.jl +++ b/JuliaLowering/src/scope_analysis.jl @@ -133,11 +133,11 @@ struct ScopeResolutionContext{GraphType} <: AbstractLoweringContext implicit_toplevel_globals::Set{String} end -function ScopeResolutionContext(ctx, mod::Module) +function ScopeResolutionContext(ctx) graph = ensure_attributes(ctx.graph, lambda_locals=Set{VarId}) ScopeResolutionContext(graph, ctx.next_var_id, - mod, + ctx.mod, Dict{String,VarId}(), Vector{ScopeInfo}(), Dict{VarId,VarInfo}(), @@ -361,8 +361,8 @@ function resolve_scopes!(ctx::ScopeResolutionContext, ex) return thunk end -function resolve_scopes!(ctx::DesugaringContext, mod::Module, ex) - ctx2 = ScopeResolutionContext(ctx, mod) +function resolve_scopes!(ctx::DesugaringContext, ex) + ctx2 = ScopeResolutionContext(ctx) res = resolve_scopes!(ctx2, reparent(ctx2, ex)) ctx2, res end diff --git a/JuliaLowering/src/syntax_graph.jl b/JuliaLowering/src/syntax_graph.jl index 22b81ae81afcb..6f265de8c1b58 100644 --- a/JuliaLowering/src/syntax_graph.jl +++ b/JuliaLowering/src/syntax_graph.jl @@ -81,6 +81,10 @@ function JuliaSyntax.children(graph::SyntaxGraph, id) @view graph.edges[graph.edge_ranges[id]] end +function JuliaSyntax.children(graph::SyntaxGraph, id, r::UnitRange) + @view graph.edges[graph.edge_ranges[id][r]] +end + function JuliaSyntax.child(graph::SyntaxGraph, id::NodeId, i::Integer) graph.edges[graph.edge_ranges[id][i]] end @@ -184,7 +188,7 @@ function Base.getindex(tree::SyntaxTree, i::Integer) end function Base.getindex(tree::SyntaxTree, r::UnitRange) - (child(tree, i) for i in r) + SyntaxList(tree.graph, children(tree.graph, tree.id, r)) end Base.firstindex(tree::SyntaxTree) = 1 @@ -277,8 +281,10 @@ JuliaSyntax.source_location(tree::SyntaxTree) = source_location(sourceref(tree)) JuliaSyntax.first_byte(tree::SyntaxTree) = first_byte(sourceref(tree)) JuliaSyntax.last_byte(tree::SyntaxTree) = last_byte(sourceref(tree)) +const SourceAttrType = Union{SourceRef,LineNumberNode,NodeId} + function SyntaxTree(graph::SyntaxGraph, node::SyntaxNode) - ensure_attributes!(graph, kind=Kind, syntax_flags=UInt16, source=Union{SourceRef,NodeId}, + ensure_attributes!(graph, kind=Kind, syntax_flags=UInt16, source=SourceAttrType, value=Any, name_val=String) id = _convert_nodes(freeze_attrs(graph), node) return SyntaxTree(graph, id) diff --git a/JuliaLowering/src/utils.jl b/JuliaLowering/src/utils.jl index 87136cbc73392..81714a8e1b5ff 100644 --- a/JuliaLowering/src/utils.jl +++ b/JuliaLowering/src/utils.jl @@ -1,7 +1,7 @@ # Error handling -TODO(msg) = throw(ErrorException("Lowering TODO: $msg")) -TODO(ex, msg) = throw(LoweringError(ex, "Lowering TODO: $msg")) +TODO(msg::AbstractString) = throw(ErrorException("Lowering TODO: $msg")) +TODO(ex::SyntaxTree, msg="") = throw(LoweringError(ex, "Lowering TODO: $msg")) # Errors found during lowering will result in LoweringError being thrown to # indicate the syntax causing the error. @@ -60,53 +60,3 @@ macro chk(ex, cond) _chk_code(ex, cond) end - -#------------------------------------------------------------------------------- -# CodeInfo constructor. TODO: Should be in Core? -function _CodeInfo(code, - codelocs, - ssavaluetypes, - ssaflags, - method_for_inference_limit_heuristics, - linetable, - slotnames, - slotflags, - slottypes, - rettype, - parent, - edges, - min_world, - max_world, - inferred, - propagate_inbounds, - has_fcall, - nospecializeinfer, - inlining, - constprop, - purity, - inlining_cost) - @eval $(Expr(:new, :(Core.CodeInfo), - convert(Vector{Any}, code), - convert(Vector{Int32}, codelocs), - convert(Any, ssavaluetypes), - convert(Vector{UInt32}, ssaflags), - convert(Any, method_for_inference_limit_heuristics), - convert(Any, linetable), - convert(Vector{Symbol}, slotnames), - convert(Vector{UInt8}, slotflags), - convert(Any, slottypes), - convert(Any, rettype), - convert(Any, parent), - convert(Any, edges), - convert(UInt64, min_world), - convert(UInt64, max_world), - convert(Bool, inferred), - convert(Bool, propagate_inbounds), - convert(Bool, has_fcall), - convert(Bool, nospecializeinfer), - convert(UInt8, inlining), - convert(UInt8, constprop), - convert(UInt16, purity), - convert(UInt16, inlining_cost))) -end - diff --git a/JuliaLowering/test/lowering.jl b/JuliaLowering/test/demo.jl similarity index 75% rename from JuliaLowering/test/lowering.jl rename to JuliaLowering/test/demo.jl index 3b17cee0f6eb8..a482406355d2a 100644 --- a/JuliaLowering/test/lowering.jl +++ b/JuliaLowering/test/demo.jl @@ -6,7 +6,7 @@ using JuliaLowering using JuliaLowering: SyntaxGraph, SyntaxTree, ensure_attributes!, newnode!, setchildren!, haschildren, children, child, setattr!, sourceref, makenode function wrapscope(ex, scope_type) - makenode(ex, ex, K"block", ex; scope_type=scope_type) + makenode(ex, ex, K"scope_block", ex; scope_type=scope_type) end function softscope_test(ex) @@ -14,6 +14,7 @@ function softscope_test(ex) end #------------------------------------------------------------------------------- +# Demos of the prototype # src = """ # let @@ -57,15 +58,42 @@ end # x + y # """ +src = """ +module A + function f(x)::Int + x + 1 + end + + b = f(2) +end +""" + +src = """ +function f() +end +""" + +src = """ +# import A.B: C.c as d, E.e as f +# import JuliaLowering +using JuliaLowering +""" + +src = """ +module A + z = 1 + 1 +end +""" + ex = parsestmt(SyntaxTree, src, filename="foo.jl") # t = softscope_test(t) @info "Input code" ex in_mod = Main -ctx, ex_desugar = JuliaLowering.expand_forms(ex) +ctx, ex_desugar = JuliaLowering.expand_forms(in_mod, ex) @info "Desugared" ex_desugar -ctx2, ex_scoped = JuliaLowering.resolve_scopes!(ctx, in_mod, ex_desugar) +ctx2, ex_scoped = JuliaLowering.resolve_scopes!(ctx, ex_desugar) @info "Resolved scopes" ex_scoped ctx3, ex_compiled = JuliaLowering.linearize_ir(ctx2, ex_scoped) @@ -73,6 +101,7 @@ ctx3, ex_compiled = JuliaLowering.linearize_ir(ctx2, ex_scoped) ex_expr = JuliaLowering.to_lowered_expr(in_mod, ctx2.var_info, ex_compiled) @info "CodeInfo" ex_expr + x = 100 y = 200 eval_result = Base.eval(in_mod, ex_expr) diff --git a/JuliaLowering/test/runtests.jl b/JuliaLowering/test/runtests.jl index 89a2365914450..9b0627f09b19a 100644 --- a/JuliaLowering/test/runtests.jl +++ b/JuliaLowering/test/runtests.jl @@ -1,6 +1,7 @@ using Test using JuliaLowering +using JuliaSyntax using JuliaLowering: @ast @testset "JuliaLowering.jl" begin @@ -42,5 +43,53 @@ JuliaLowering.include_string(test_mod, """ @test test_mod.y == 202 @test JuliaLowering.include_string(test_mod, "x + y") == 303 +# module +A = JuliaLowering.include_string(test_mod, """ +module A + function g() + return "hi" + end +end +""", "module_test") +@test A isa Module +@test A.g() == "hi" +@test A.include isa Function +@test A.Base === Base +@test A.eval(:(x = -1)) == -1 && A.x == -1 + +B = JuliaLowering.include_string(test_mod, """ +baremodule B +end +""", "baremodule_test") +@test B.Core === Core +@test !isdefined(B, :include) +@test !isdefined(B, :Base) + +# using / import +JuliaLowering.include_string(test_mod, """ + using JuliaSyntax + using JuliaLowering: SyntaxTree + using JuliaLowering: SyntaxTree as st + import JuliaLowering: SyntaxTree as st1, SyntaxTree as st2 +""") +@test test_mod.SyntaxTree === JuliaLowering.SyntaxTree +@test test_mod.st === JuliaLowering.SyntaxTree +@test test_mod.st1 === JuliaLowering.SyntaxTree +@test test_mod.st2 === JuliaLowering.SyntaxTree +@test test_mod.parsestmt === JuliaSyntax.parsestmt + +C = JuliaLowering.include_string(test_mod, """ +module C + module D + function f() + "hi" + end + end + module E + using ...C.D: f + end +end +""") +@test C.D.f === C.E.f end From ac8e6acd8f3032f765058f00e5dff35ef32fa681 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sun, 28 Apr 2024 07:00:14 +1000 Subject: [PATCH 0743/1109] Expansion of syntax quoting & interpolation Syntax interpolation generates an `InterpolationContext` at runtime, using it to copy and splice pieces of AST together with interpolated non-AST values which get inserted as the `K"Value"` kind. Also included here are the start of some test tools to test ASTs produced as part of expansion. --- JuliaLowering/src/JuliaLowering.jl | 2 +- JuliaLowering/src/ast.jl | 15 ++-- JuliaLowering/src/desugaring.jl | 127 +++++++++++++++++++++++++---- JuliaLowering/src/eval.jl | 61 +++++++++++++- JuliaLowering/src/linear_ir.jl | 4 +- JuliaLowering/src/syntax_graph.jl | 6 ++ JuliaLowering/test/demo.jl | 10 +++ JuliaLowering/test/runtests.jl | 27 ++++++ JuliaLowering/test/utils.jl | 83 +++++++++++++++++++ 9 files changed, 308 insertions(+), 27 deletions(-) create mode 100644 JuliaLowering/test/utils.jl diff --git a/JuliaLowering/src/JuliaLowering.jl b/JuliaLowering/src/JuliaLowering.jl index 1e7581d4835d8..0edc5fdc13659 100644 --- a/JuliaLowering/src/JuliaLowering.jl +++ b/JuliaLowering/src/JuliaLowering.jl @@ -11,7 +11,7 @@ using JuliaSyntax using JuliaSyntax: SyntaxHead, highlight, Kind, GreenNode, @KSet_str using JuliaSyntax: haschildren, children, child, numchildren, head, kind, flags, has_flags -using JuliaSyntax: filename, first_byte, last_byte, source_location, span +using JuliaSyntax: filename, first_byte, last_byte, source_location, span, sourcetext using JuliaSyntax: is_literal, is_number, is_operator, is_prec_assignment, is_infix_op_call, is_postfix_op_call, is_error diff --git a/JuliaLowering/src/ast.jl b/JuliaLowering/src/ast.jl index 10852db89b000..c8041dc44ee76 100644 --- a/JuliaLowering/src/ast.jl +++ b/JuliaLowering/src/ast.jl @@ -41,7 +41,6 @@ function makeleaf(ctx, srcref, kind, value; kws...) val = kind == K"Integer" ? convert(Int, value) : kind == K"Float" ? convert(Float64, value) : kind == K"String" ? convert(String, value) : - kind == K"Symbol" ? convert(String, value) : kind == K"Char" ? convert(Char, value) : kind == K"Value" ? value : error("Unexpected leaf kind `$kind`") @@ -49,8 +48,8 @@ function makeleaf(ctx, srcref, kind, value; kws...) end end -function makeleaf(ctx, srcref, kind) - _makenode(syntax_graph(ctx), srcref, kind, nothing) +function makeleaf(ctx, srcref, kind; kws...) + _makenode(syntax_graph(ctx), srcref, kind, nothing; kws...) end function _match_srcref(ex) @@ -190,10 +189,9 @@ function mapchildren(f, ctx, ex) else ex2 = makeleaf(ctx, ex, head(ex)) end - # Copy all attributes. - # TODO: Make this type stable and efficient - for v in values(ex.graph.attributes) - if haskey(v, ex.id) + # TODO: Make this faster? + for (k,v) in pairs(ex2.graph.attributes) + if (k !== :source && k !== :kind && k !== :syntax_flags) && haskey(v, ex.id) v[ex2.id] = v[ex.id] end end @@ -223,8 +221,7 @@ end # Create a new SSA variable function ssavar(ctx::AbstractLoweringContext, srcref) - id = makenode(ctx, srcref, K"SSAValue", var_id=new_var_id(ctx)) - return id + makenode(ctx, srcref, K"SSAValue", var_id=new_var_id(ctx)) end # Assign `ex` to an SSA variable. diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index bc8d517406674..4ec38ac1ac01f 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -92,19 +92,115 @@ function assigned_name(ex) end #------------------------------------------------------------------------------- -# TODO: Lowering pass 1.1: -# Aim of this pass is to do some super simple normalizations to make -# desugaring-proper easier to write. The kinds of things like identifier -# normalization which would require extra logic to pervade the remaining -# desugaring. -# -# * Identifier normalization -# - Strip var"" -# - Operator -> Identifier if necessary -# * Strip "container" nodes -# - K"char" -# - K"parens" nodes -# * Quasiquote expansion +# Lowering pass 1.1: Simple normalizations and quote expansion +function _contains_active_interp(ex, depth) + k = kind(ex) + if k == K"$" && depth == 0 + return true + end + inner_depth = k == K"quote" ? depth + 1 : + k == K"$" ? depth - 1 : + depth + return any(_contains_active_interp(c, inner_depth) for c in children(ex)) +end + +function expand_interpolation(ctx, interp_ctx_var, ex) + @ast ctx ex [K"call" + interpolate_value::K"Value" + interp_ctx_var + ex::K"Value" + expand_forms_0(ctx, ex) + ] +end + +function expand_quote_content(ctx, interp_ctx_var, ex, depth) + if !_contains_active_interp(ex, depth) + return @ast ctx ex [K"call" + interpolate_copy_ast::K"Value" + interp_ctx_var + ex::K"Value" + ] + end + + # We have an interpolation deeper in the tree somewhere - expand to an + # expression + inner_depth = kind(ex) == K"quote" ? depth + 1 : + kind(ex) == K"$" ? depth - 1 : + depth + expanded_children = SyntaxList(ctx) + for e in children(ex) + if kind(e) == K"$" && inner_depth == 0 + for x in children(e) + push!(expanded_children, expand_interpolation(ctx, interp_ctx_var, x)) + end + else + push!(expanded_children, expand_quote_content(ctx, interp_ctx_var, e, inner_depth)) + end + end + + return @ast ctx ex [K"call" + interpolate_node::K"Value" + interp_ctx_var + ex::K"Value" + expanded_children... + ] +end + +function expand_quote(ctx, ex) + interp_ctx_var = ssavar(ctx, ex) + expanded = if kind(ex) == K"$" + @chk numchildren(ex) == 1 + e1 = ex[1] + if kind(e1) == K"..." + throw(LoweringError(e1, "`...` expression outside of call")) + end + expand_interpolation(ctx, interp_ctx_var, e1) + else + expand_quote_content(ctx, interp_ctx_var, ex, 0) + end + @ast ctx ex [K"block" + [K"=" + interp_ctx_var + [K"call" + InterpolationContext::K"Value" + ctx.mod::K"Value" + ] + ] + expanded + ] +end + +""" +The aim of this pass is to do some super simple normalizations to make +desugaring-proper easier to write. The kinds of things like identifier +normalization which would require extra logic to pervade the remaining +desugaring. +""" +function expand_forms_0(ctx::DesugaringContext, ex::SyntaxTree) + k = kind(ex) + if k == K"var" || k == K"char" || k == K"parens" + # Strip "container" nodes + @chk numchildren(ex) == 1 + ex[1] + elseif is_operator(k) && !haschildren(ex) # FIXME do in JuliaSyntax? + @ast ctx ex ex.name_val::K"Identifier" + elseif k == K"quote" + @chk numchildren(ex) == 1 + expand_quote(ctx, ex[1]) + elseif !haschildren(ex) + ex + else + mapchildren(e->expand_forms_0(ctx,e), ctx, ex) + end +end + +function expand_forms_0(ctx::DesugaringContext, exs::Union{Tuple,AbstractVector}) + res = SyntaxList(ctx) + for e in exs + push!(res, expand_forms_0(ctx, e)) + end + res +end #------------------------------------------------------------------------------- # Lowering Pass 1.2 - desugaring @@ -607,7 +703,8 @@ end function expand_forms(mod::Module, ex::SyntaxTree) ctx = DesugaringContext(ex, mod) - res = expand_forms(ctx, reparent(ctx, ex)) - ctx, res + ex1 = expand_forms_0(ctx, reparent(ctx, ex)) + ex2 = expand_forms(ctx, ex1) + ctx, ex2 end diff --git a/JuliaLowering/src/eval.jl b/JuliaLowering/src/eval.jl index 07b94cd52e91c..b3966a5ac324a 100644 --- a/JuliaLowering/src/eval.jl +++ b/JuliaLowering/src/eval.jl @@ -143,7 +143,7 @@ function to_lowered_expr(mod, var_info, ex) Core.ReturnNode(to_lowered_expr(mod, var_info, ex[1])) elseif is_quoted(k) if k == K"inert" - QuoteNode(ex[1]) + ex[1] else TODO(ex, "Convert SyntaxTree to Expr") end @@ -182,6 +182,64 @@ end #------------------------------------------------------------------------------- # Runtime support functions called by lowering +struct InterpolationContext{Graph} <: AbstractLoweringContext + mod::Module + graph::Graph +end + +function InterpolationContext(mod) + graph = SyntaxGraph() + ensure_attributes!(graph, kind=Kind, syntax_flags=UInt16, source=SourceAttrType, + value=Any, name_val=String, mod=Module) + InterpolationContext(mod, freeze_attrs(graph)) +end + +# Produce interpolated node for `$x` syntax +function interpolate_value(ctx, srcref, x) + if x isa SyntaxTree + if x.graph === ctx.graph + x + else + copy_ast(ctx, x) + end + else + makeleaf(ctx, sourceref(srcref), K"Value", x) + end +end + +# Produce node corresponding to `srcref` when there was an interpolation among +# `children` +function interpolate_node(ctx::InterpolationContext, srcref, children...) + makenode(ctx, sourceref(srcref), head(srcref), children...; mod=ctx.mod) +end + +function copy_ast(ctx, ex; new_attrs...) + if haschildren(ex) + cs = SyntaxList(ctx) + for e in children(ex) + push!(cs, interpolate_copy_ast(ctx, e)) + end + ex2 = makenode(ctx, sourceref(ex), head(ex), cs; new_attrs...) + else + ex2 = makeleaf(ctx, sourceref(ex), head(ex); new_attrs...) + end + for (name,attr) in pairs(ex.graph.attributes) + if (name !== :source && name !== :kind && name !== :syntax_flags) && + haskey(attr, ex.id) + attr2 = getattr(ex2.graph, name, nothing) + if !isnothing(attr2) + attr2[ex2.id] = attr[ex.id] + end + end + end + return ex2 +end + +# Copy AST `ex` into `ctx.graph` +function interpolate_copy_ast(ctx, ex) + copy_ast(ctx, ex; mod=ctx.mod) +end + # Construct new bare module including only the "default names" # # using Core @@ -211,6 +269,7 @@ function eval_module(parentmod, modname, body) )) end +# Evaluate content of `import` or `using` statement function module_import(into_mod::Module, is_using::Bool, from_mod::Union{Nothing,Core.SimpleVector}, paths::Core.SimpleVector) # For now, this function converts our lowered representation back to Expr diff --git a/JuliaLowering/src/linear_ir.jl b/JuliaLowering/src/linear_ir.jl index e2ddabc9c25f0..351398a78aa9a 100644 --- a/JuliaLowering/src/linear_ir.jl +++ b/JuliaLowering/src/linear_ir.jl @@ -200,10 +200,12 @@ function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) end elseif k == K"block" || k == K"scope_block" nc = numchildren(ex) + res = nothing for i in 1:nc islast = i == nc - compile(ctx, ex[i], islast && needs_value, islast && in_tail_pos) + res = compile(ctx, ex[i], islast && needs_value, islast && in_tail_pos) end + res elseif k == K"return" compile(ctx, ex[1], true, true) nothing diff --git a/JuliaLowering/src/syntax_graph.jl b/JuliaLowering/src/syntax_graph.jl index 6f265de8c1b58..9804dd35b8bb4 100644 --- a/JuliaLowering/src/syntax_graph.jl +++ b/JuliaLowering/src/syntax_graph.jl @@ -204,6 +204,10 @@ function attrnames(tree::SyntaxTree) [name for (name, value) in pairs(attrs) if haskey(value, tree.id)] end +function setattr!(ex::SyntaxTree; attrs...) + setattr!(ex.graph, ex.id; attrs...) +end + # JuliaSyntax tree API function JuliaSyntax.haschildren(tree::SyntaxTree) @@ -248,6 +252,7 @@ JuliaSyntax.last_byte(src::SourceRef) = src.first_byte + span(src.green_tree) - JuliaSyntax.filename(src::SourceRef) = filename(src.file) JuliaSyntax.source_location(::Type{LineNumberNode}, src::SourceRef) = source_location(LineNumberNode, src.file, src.first_byte) JuliaSyntax.source_location(src::SourceRef) = source_location(src.file, src.first_byte) +JuliaSyntax.sourcetext(src::SourceRef) = src.file[first_byte(src):last_byte(src)] # TODO: Adding these methods to support LineNumberNode is kind of hacky but we # can remove these after JuliaLowering becomes self-bootstrapping for macros @@ -280,6 +285,7 @@ JuliaSyntax.source_location(::Type{LineNumberNode}, tree::SyntaxTree) = source_l JuliaSyntax.source_location(tree::SyntaxTree) = source_location(sourceref(tree)) JuliaSyntax.first_byte(tree::SyntaxTree) = first_byte(sourceref(tree)) JuliaSyntax.last_byte(tree::SyntaxTree) = last_byte(sourceref(tree)) +JuliaSyntax.sourcetext(tree::SyntaxTree) = sourcetext(sourceref(tree)) const SourceAttrType = Union{SourceRef,LineNumberNode,NodeId} diff --git a/JuliaLowering/test/demo.jl b/JuliaLowering/test/demo.jl index a482406355d2a..23dc5d21b3244 100644 --- a/JuliaLowering/test/demo.jl +++ b/JuliaLowering/test/demo.jl @@ -85,6 +85,16 @@ module A end """ +src = """ +begin + x = 10 + y = :(g(z)) + quote + f(\$(x+1), \$y) + end +end +""" + ex = parsestmt(SyntaxTree, src, filename="foo.jl") # t = softscope_test(t) @info "Input code" ex diff --git a/JuliaLowering/test/runtests.jl b/JuliaLowering/test/runtests.jl index 9b0627f09b19a..a276cb4415b76 100644 --- a/JuliaLowering/test/runtests.jl +++ b/JuliaLowering/test/runtests.jl @@ -2,8 +2,11 @@ using Test using JuliaLowering using JuliaSyntax +using JuliaSyntax: sourcetext using JuliaLowering: @ast +include("utils.jl") + @testset "JuliaLowering.jl" begin # Basic end-to-end / smoke tests @@ -92,4 +95,28 @@ end """) @test C.D.f === C.E.f +# Syntax quoting & interpolation +ex = JuliaLowering.include_string(test_mod, """ +begin + x = 10 + y = :(g(z)) + quote + f(\$(x+1), \$y) + end +end +""") +@test ex ~ @ast_ [K"block" + [K"call" + "f"::K"Identifier" + 11::K"Value" + [K"call" + "g"::K"Identifier" + "z"::K"Identifier" + ] + ] +] +@test sourcetext(ex[1]) == "f(\$(x+1), \$y)" +@test sourcetext(ex[1][2]) == "x+1" +@test sourcetext(ex[1][3]) == "g(z)" + end diff --git a/JuliaLowering/test/utils.jl b/JuliaLowering/test/utils.jl new file mode 100644 index 0000000000000..006b37fa738fd --- /dev/null +++ b/JuliaLowering/test/utils.jl @@ -0,0 +1,83 @@ +using JuliaLowering: + SyntaxGraph, newnode!, ensure_attributes!, + Kind, SourceRef, SyntaxTree, NodeId, + makenode, makeleaf, setattr!, sethead!, + haschildren, numchildren, children + +function _ast_test_graph() + graph = SyntaxGraph() + ensure_attributes!(graph, + kind=Kind, source=Union{SourceRef,NodeId,LineNumberNode}, + var_id=Int, value=Any, name_val=String) +end + +function _source_node(graph, src) + id = newnode!(graph) + sethead!(graph, id, K"None") + setattr!(graph, id, source=src) + SyntaxTree(graph, id) +end + +macro ast_(tree) + defs = [] + ex = JuliaLowering._expand_ast_tree(defs, :graph, :srcref, tree) + quote + graph = _ast_test_graph() + srcref = _source_node(graph, $(QuoteNode(__source__))) + $(defs...) + $ex + end +end + +function ~(ex1, ex2) + if kind(ex1) != kind(ex2) || haschildren(ex1) != haschildren(ex2) + return false + end + if haschildren(ex1) + if numchildren(ex1) != numchildren(ex2) + return false + end + return all(c1 ~ c2 for (c1,c2) in zip(children(ex1), children(ex2))) + else + return get(ex1, :value, nothing) == get(ex2, :value, nothing) && + get(ex1, :name_val, nothing) == get(ex2, :name_val, nothing) + end +end + + +#------------------------------------------------------------------------------- +function _format_as_ast_macro(io, ex, indent) + k = kind(ex) + kind_str = repr(k) + if haschildren(ex) + println(io, indent, "[", kind_str) + ind2 = indent*" " + for c in children(ex) + _format_as_ast_macro(io, c, ind2) + end + println(io, indent, "]") + else + val_str = if k == K"Identifier" || k == K"core" || k == K"top" + repr(ex.name_val) + elseif k == K"SSAValue" + repr(ex.var_id) + else + repr(get(ex, :value, nothing)) + end + println(io, indent, val_str, "::", kind_str) + end +end + +function format_as_ast_macro(io::IO, ex) + print(io, "@ast_ ") + _format_as_ast_macro(io, ex, "") +end + +""" + format_as_ast_macro(ex) + +Format AST `ex` as a Juila source code call to the `@ast_` macro for generating +test case comparisons with the `~` function. +""" +format_as_ast_macro(ex) = format_as_ast_macro(stdout, ex) + From c0663cb105e3618725e1587d8a46b72ec271f3eb Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Tue, 30 Apr 2024 14:48:48 +1000 Subject: [PATCH 0744/1109] Make mapchildren() allocation-free where possible. --- JuliaLowering/src/ast.jl | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/JuliaLowering/src/ast.jl b/JuliaLowering/src/ast.jl index c8041dc44ee76..f153045b34005 100644 --- a/JuliaLowering/src/ast.jl +++ b/JuliaLowering/src/ast.jl @@ -180,15 +180,30 @@ macro ast(ctx, srcref, tree) end function mapchildren(f, ctx, ex) - if haschildren(ex) - cs = SyntaxList(ctx) - for e in children(ex) - push!(cs, f(e)) + if !haschildren(ex) + return ex + end + orig_children = children(ex) + cs = nothing + for (i,e) in enumerate(orig_children) + c = f(e) + if isnothing(cs) + if c == e + continue + else + cs = SyntaxList(ctx) + append!(cs, orig_children[1:i-1]) + end end - ex2 = makenode(ctx, ex, head(ex), cs) - else - ex2 = makeleaf(ctx, ex, head(ex)) + push!(cs::SyntaxList, c) + end + if isnothing(cs) + # This function should be allocation-free if no children were changed + # by the mapping. + return ex end + cs::SyntaxList + ex2 = makenode(ctx, ex, head(ex), cs) # TODO: Make this faster? for (k,v) in pairs(ex2.graph.attributes) if (k !== :source && k !== :kind && k !== :syntax_flags) && haskey(v, ex.id) From 6dffe8d8a91d7e9f71111cf0c8610969d6f50ff8 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Mon, 13 May 2024 22:24:35 +1000 Subject: [PATCH 0745/1109] Automatic macro hygiene based on "scope layers" When expanding macros * Any identifiers passed to the macro are tagged with the scope layer they were defined within. * A new unique scope layer is generated for the macro invocation, and any names in the syntax produced by the macro are tagged with this layer. Subsequently, the `(name,scope_layer)` pairs are used rather than plain names when resolving bindings. This simple scheme is powerful enough to allow macros to call macros and this composition to work by default. It even allows macros to emit recursive calls to themselves in the code they return. More experimentation is needed to determine whether it's a complete fit for Julia's needs. --- JuliaLowering/README.md | 119 +++++++++-- JuliaLowering/src/JuliaLowering.jl | 5 +- JuliaLowering/src/ast.jl | 217 ++++++++++++++++--- JuliaLowering/src/desugaring.jl | 257 ++++------------------- JuliaLowering/src/eval.jl | 58 ++--- JuliaLowering/src/kinds.jl | 2 + JuliaLowering/src/linear_ir.jl | 16 +- JuliaLowering/src/macro_expansion.jl | 303 +++++++++++++++++++++++++++ JuliaLowering/src/scope_analysis.jl | 160 ++++++++------ JuliaLowering/src/syntax_graph.jl | 22 +- JuliaLowering/src/utils.jl | 46 +--- JuliaLowering/test/demo.jl | 175 +++++++++++----- JuliaLowering/test/runtests.jl | 116 +++++++++- 13 files changed, 1012 insertions(+), 484 deletions(-) create mode 100644 JuliaLowering/src/macro_expansion.jl diff --git a/JuliaLowering/README.md b/JuliaLowering/README.md index 064d2870640a5..4f093dbd347cd 100644 --- a/JuliaLowering/README.md +++ b/JuliaLowering/README.md @@ -2,18 +2,10 @@ [![Build Status](https://github.com/c42f/JuliaLowering.jl/actions/workflows/CI.yml/badge.svg?branch=main)](https://github.com/c42f/JuliaLowering.jl/actions/workflows/CI.yml?query=branch%3Amain) -Experimental port of Julia's code "lowering" compiler passes into Julia. - -Lowering comprises four symbolic simplification steps -* Syntax desugaring - simplifying the rich surface syntax down to a small - number of forms. -* Scope analysis - analyzing identifier names used in the code to discover - local variables, closure captures, and associate global variables to the - appropriate module. -* Closure conversion - convert closures to types and deal with captured - variables efficiently where possible. -* Flattening to linear IR - convert code in hierarchical tree form to a - flat array of statements and control flow into gotos. +JuliaLowering.jl is an experimental port of Julia's code lowering compiler +passes, written in Julia itself. "Code lowering" is the set of compiler passes +which *symbolically* transform and simplify Julia's syntax prior to type +inference. ## Goals @@ -41,7 +33,20 @@ Note this is a very early work in progress; most things probably don't work! # Design Notes -A disorganized collection of design notes :) +Lowering has five symbolic simplification passes: + +1. Macro expansion - expanding user-defined syntactic constructs by running the + user's macros. This pass also includes a small amount of other symbolic + simplification. +2. Syntax desugaring - simplifying Julia's rich surface syntax down to a small + number of syntactic forms. +3. Scope analysis - analyzing identifier names used in the code to discover + local variables, closure captures, and associate global variables to the + appropriate module. +4. Closure conversion - convert closures to types and deal with captured + variables efficiently where possible. +5. Flattening to linear IR - convert code in hierarchical tree form to a + flat array of statements; convert control flow into gotos. ## Syntax trees @@ -70,6 +75,70 @@ are similar. Analogy 3: Graph algorithms which represent graphs as a compact array of node ids and edges with integer indices, rather than using a linked data structure. +## Hygiene + +### Problems with Hygiene in Julia's exiting macro system + +To write correct hygienic macros in Julia (as of 2024), macro authors must use +`esc()` on any any syntax passed to the macro so that passed identifiers escape +to the macro caller scope. However + +* This is not automatic and the correct use of `esc()` is one of the things + that new macro authors find most confusing. (My impression, based on various + people complaining about how confusing `esc()` is.) +* `esc()` wraps expressions in `Expr(:escape)`, but this doesn't work well when + macros pass such escaped syntax to an inner macro call. As discussed in + [Julia issue #37691](https://github.com/JuliaLang/julia/issues/37691), macros + in Julia's existing system are not composable by default. Writing + composable macros in the existing system would require preserving the escape + nesting depth when recursing into any macro argument nested expressions. + Almost no macro author knows how to do this and is prepared to pay for the + complexity of getting it right. + +The requirement to use `esc()` stems from Julia's pervasive use of the simple +`Expr` data structure which represents a unadorned AST in which names are plain +symbols. For example, a macro call `@foo x` gets passed the symbol `:x` +which is just a name without any information attached to indicate that it came +from the scope where `@foo` was called. + +### Hygiene in JuliaLowering + +In JuliaLowering we make hygiene automatic and remove `esc()` by combining names +with scope information. In the language of the paper [*Towards the Essence of +Hygiene*](https://michaeldadams.org/papers/hygiene/hygiene-2015-popl-authors-copy.pdf) +by Michael Adams, this combination is called a "syntax object". In +JuliaLowering our representation is the tuple `(name,scope_layer)`, also called +`VarId` in the scope resolution pass. + +JuliaLowering's macro expander attaches a unique *scope layer* to each +identifier in a piece of syntax. A "scope layer" is an integer identifer +combined with the module in which the syntax was created. + +When expanding macros, + +* Any identifiers passed to the macro are tagged with the scope layer they were + defined within. +* A new unique scope layer is generated for the macro invocation, and any names + in the syntax produced by the macro are tagged with this layer. + +Subsequently, the `(name,scope_layer)` pairs are used when resolving bindings. +This ensures that, by default, we satisfy the basic rules for hygenic macros +discussed in Adams' paper: + +1. A macro can't insert a binding that can capture references other than those + inserted by the macro. +2. A macro can't insert a reference that can be captured by bindings other than + those inserted by the macro. + +TODO: Write more here... + +#### References + +* [Toward Fearless Macros](https://lambdaland.org/posts/2023-10-17_fearless_macros) - + a blog post by Ashton Wiersdorf +* [Towards the Essence of Hygiene](https://michaeldadams.org/papers/hygiene/hygiene-2015-popl-authors-copy.pdf) - a paper by Michael Adams +* [Bindings as sets of scopes](https://www-old.cs.utah.edu/plt/scope-sets/) - a description of Racket's scope set mechanism by Matthew Flatt + ## Julia's existing lowering implementation ### How does macro expansion work? @@ -264,9 +333,10 @@ In the current Julia runtime, * Use codegen "where necessary/profitable" (eg ccall, has_loops etc) * Otherwise interpret via `jl_interpret_toplevel_thunk` -Should we reimplement eval of the above blessed top level forms in Julia? +Should we lower the above blessed top level forms to julia runtime calls? Pros: -- Semantically sound. Lowering should do syntax checking in things like `Expr(:using)` +- Semantically sound. Lowering should do syntax checking in things like + `Expr(:using)` rather than doing this in the runtime support functions. - Precise lowering error messages - Replaces more Expr usage - Replaces a whole pile of C code with significantly less Julia code @@ -282,3 +352,22 @@ In general, we'd be replacing current *declarative* lowering targets like the setup of its arguments would need to go in a thunk. We've currently got an odd mixture of imperative and declarative lowered code. + +## Notes on racket's hygiene + +People look at [Racket](https://racket-lang.org/) as an example of a very +complete system of hygienic macros. We should learn from them, but keeping in +mind that Racket's macro system is more inherently more complicated. Racket's +current approach to hygiene is described in an [accessible talk](https://www.youtube.com/watch?v=Or_yKiI3Ha4) +and in more depth in [a paper](https://www-old.cs.utah.edu/plt/publications/popl16-f.pdf). + +Some differences which makes Racket's macro expander different from Julia: + +* Racket allows *local* definitions of macros. Macro code can be embedded in an + inner lexical scope and capture locals from that scope, but still needs to be + executed at compile time. Julia supports macros at top level scope only. +* Racket goes to great lengths to execute the minimal package code necessary to + expand macros; the "pass system". Julia just executes all top level + statements in order when precompiling a package. +* As a lisp, Racket's surface syntax is dramatically simpler and more uniform + diff --git a/JuliaLowering/src/JuliaLowering.jl b/JuliaLowering/src/JuliaLowering.jl index 0edc5fdc13659..688d7fc1cdc30 100644 --- a/JuliaLowering/src/JuliaLowering.jl +++ b/JuliaLowering/src/JuliaLowering.jl @@ -9,14 +9,12 @@ using Core: eval using JuliaSyntax -using JuliaSyntax: SyntaxHead, highlight, Kind, GreenNode, @KSet_str +using JuliaSyntax: highlight, Kind, @KSet_str using JuliaSyntax: haschildren, children, child, numchildren, head, kind, flags, has_flags using JuliaSyntax: filename, first_byte, last_byte, source_location, span, sourcetext using JuliaSyntax: is_literal, is_number, is_operator, is_prec_assignment, is_infix_op_call, is_postfix_op_call, is_error -abstract type AbstractLoweringContext end - _include("kinds.jl") _insert_kinds() @@ -24,6 +22,7 @@ _include("syntax_graph.jl") _include("ast.jl") _include("utils.jl") +_include("macro_expansion.jl") _include("desugaring.jl") _include("scope_analysis.jl") _include("linear_ir.jl") diff --git a/JuliaLowering/src/ast.jl b/JuliaLowering/src/ast.jl index f153045b34005..0b8c7bd39d05d 100644 --- a/JuliaLowering/src/ast.jl +++ b/JuliaLowering/src/ast.jl @@ -1,3 +1,17 @@ +#------------------------------------------------------------------------------- +abstract type AbstractLoweringContext end + +function syntax_graph(ctx::AbstractLoweringContext) + ctx.graph +end + +function new_var_id(ctx::AbstractLoweringContext) + id = ctx.next_var_id[] + ctx.next_var_id[] += 1 + return id +end + +#------------------------------------------------------------------------------- # AST creation utilities _node_id(ex::NodeId) = ex _node_id(ex::SyntaxTree) = ex.id @@ -33,7 +47,7 @@ end function makeleaf(ctx, srcref, kind, value; kws...) graph = syntax_graph(ctx) - if kind == K"Identifier" || kind == K"core" || kind == K"top" + if kind == K"Identifier" || kind == K"core" || kind == K"top" || kind == K"Symbol" || kind == K"globalref" _makenode(graph, srcref, kind, nothing; name_val=value, kws...) elseif kind == K"SSAValue" _makenode(graph, srcref, kind, nothing; var_id=value, kws...) @@ -52,6 +66,32 @@ function makeleaf(ctx, srcref, kind; kws...) _makenode(syntax_graph(ctx), srcref, kind, nothing; kws...) end +# Convenience functions to create leaf nodes referring to identifiers within +# the Core and Top modules. +core_ref(ctx, ex, name) = makeleaf(ctx, ex, K"core", name) +Any_type(ctx, ex) = core_ref(ctx, ex, "Any") +svec_type(ctx, ex) = core_ref(ctx, ex, "svec") +nothing_(ctx, ex) = core_ref(ctx, ex, "nothing") +unused(ctx, ex) = core_ref(ctx, ex, "UNUSED") + +top_ref(ctx, ex, name) = makeleaf(ctx, ex, K"top", name) + +# Create a new SSA variable +function ssavar(ctx::AbstractLoweringContext, srcref) + makenode(ctx, srcref, K"SSAValue", var_id=new_var_id(ctx)) +end + +# Assign `ex` to an SSA variable. +# Return (variable, assignment_node) +function assign_tmp(ctx::AbstractLoweringContext, ex) + var = ssavar(ctx, ex) + assign_var = makenode(ctx, ex, K"=", var, ex) + var, assign_var +end + + +#------------------------------------------------------------------------------- +# @ast macro function _match_srcref(ex) if Meta.isexpr(ex, :macrocall) && ex.args[1] == Symbol("@HERE") QuoteNode(ex.args[2]) @@ -89,7 +129,12 @@ function _expand_ast_tree(defs, ctx, srcref, tree) # Leaf node kind, srcref, kws = _match_kind_ex(defs, srcref, tree.args[2]) :(makeleaf($ctx, $srcref, $kind, $(esc(tree.args[1])), $(kws...))) - elseif Meta.isexpr(tree, (:vcat, :hcat)) + elseif Meta.isexpr(tree, :call) && tree.args[1] === :(=>) + # Leaf node with copied attributes + kind = esc(tree.args[3]) + srcref = esc(tree.args[2]) + :(mapleaf($ctx, $srcref, $kind)) + elseif Meta.isexpr(tree, (:vcat, :hcat, :vect)) # Interior node flatargs = [] for a in tree.args @@ -128,7 +173,9 @@ Syntactic s-expression shorthand for constructing a `SyntaxTree` AST. The `tree` contains syntax of the following forms: * `[kind child₁ child₂]` - construct an interior node with children -* `value :: kind` - construct a leaf node +* `value :: kind` - construct a leaf node +* `ex => kind` - convert a leaf node to the given `kind`, copying attributes + from it and also using `ex` as the source reference. * `var=ex` - Set `var=ssavar(...)` and return an assignment node `\$var=ex`. `var` may be used outside `@ast` * `cond ? ex1 : ex2` - Conditional; `ex1` and `ex2` will be recursively expanded. @@ -161,7 +208,7 @@ to indicate that the "primary" location of the source is the location where ] [K"call" "eval" ::K"core" - mn.name_val ::K"Identifier" + mn =>K"Identifier" "x" ::K"Identifier" ] ] @@ -179,6 +226,25 @@ macro ast(ctx, srcref, tree) end end +#------------------------------------------------------------------------------- +# Mapping and copying of AST nodes +function copy_attrs!(dest, src) + # TODO: Make this faster? + for (k,v) in pairs(dest.graph.attributes) + if (k !== :source && k !== :kind && k !== :syntax_flags) && haskey(v, src.id) + v[dest.id] = v[src.id] + end + end +end + +function mapleaf(ctx, src, kind) + ex = _makenode(syntax_graph(ctx), src, kind, nothing) + # TODO: Value coersion might be broken here due to use of `name_val` vs + # `value` vs ... ? + copy_attrs!(ex, src) + ex +end + function mapchildren(f, ctx, ex) if !haschildren(ex) return ex @@ -204,46 +270,133 @@ function mapchildren(f, ctx, ex) end cs::SyntaxList ex2 = makenode(ctx, ex, head(ex), cs) - # TODO: Make this faster? - for (k,v) in pairs(ex2.graph.attributes) - if (k !== :source && k !== :kind && k !== :syntax_flags) && haskey(v, ex.id) - v[ex2.id] = v[ex.id] + copy_attrs!(ex2, ex) + return ex2 +end + +# Copy AST `ex` into `ctx` +function copy_ast(ctx, ex) + if haschildren(ex) + cs = SyntaxList(ctx) + for e in children(ex) + push!(cs, copy_ast(ctx, e)) + end + ex2 = makenode(ctx, sourceref(ex), head(ex), cs) + else + ex2 = makeleaf(ctx, sourceref(ex), head(ex)) + end + for (name,attr) in pairs(ex.graph.attributes) + if (name !== :source && name !== :kind && name !== :syntax_flags) && + haskey(attr, ex.id) + attr2 = getattr(ex2.graph, name, nothing) + if !isnothing(attr2) + attr2[ex2.id] = attr[ex.id] + end end end return ex2 end -# Convenience functions to create leaf nodes referring to identifiers within -# the Core and Top modules. -core_ref(ctx, ex, name) = makeleaf(ctx, ex, K"core", name) -Any_type(ctx, ex) = core_ref(ctx, ex, "Any") -svec_type(ctx, ex) = core_ref(ctx, ex, "svec") -nothing_(ctx, ex) = core_ref(ctx, ex, "nothing") -unused(ctx, ex) = core_ref(ctx, ex, "UNUSED") +#------------------------------------------------------------------------------- +# Predicates and accessors working on expression trees -top_ref(ctx, ex, name) = makeleaf(ctx, ex, K"top", name) +function is_quoted(ex) + kind(ex) in KSet"quote top core globalref outerref break inert + meta inbounds inline noinline loopinfo" +end -#------------------------------------------------------------------------------- -function syntax_graph(ctx::AbstractLoweringContext) - ctx.graph +function is_sym_decl(x) + k = kind(x) + k == K"Identifier" || k == K"::" end -function new_var_id(ctx::AbstractLoweringContext) - id = ctx.next_var_id[] - ctx.next_var_id[] += 1 - return id +# Identifier made of underscores +function is_placeholder(ex) + kind(ex) == K"Identifier" && all(==('_'), ex.name_val) end -# Create a new SSA variable -function ssavar(ctx::AbstractLoweringContext, srcref) - makenode(ctx, srcref, K"SSAValue", var_id=new_var_id(ctx)) +function is_identifier(x) + k = kind(x) + k == K"Identifier" || k == K"var" || is_operator(k) || is_macro_name(k) end -# Assign `ex` to an SSA variable. -# Return (variable, assignment_node) -function assign_tmp(ctx::AbstractLoweringContext, ex) - var = ssavar(ctx, ex) - assign_var = makenode(ctx, ex, K"=", var, ex) - var, assign_var +function is_eventually_call(ex::SyntaxTree) + k = kind(ex) + return k == K"call" || ((k == K"where" || k == K"::") && is_eventually_call(ex[1])) +end + +function is_function_def(ex) + k = kind(ex) + return k == K"function" || k == K"->" || + (k == K"=" && numchildren(ex) == 2 && is_eventually_call(ex[1])) +end + +function identifier_name(ex) + kind(ex) == K"var" ? ex[1] : ex +end + +function is_valid_name(ex) + n = identifier_name(ex).name_val + n !== "ccall" && n !== "cglobal" +end + +function decl_var(ex) + kind(ex) == K"::" ? ex[1] : ex +end + +# given a complex assignment LHS, return the symbol that will ultimately be assigned to +function assigned_name(ex) + k = kind(ex) + if (k == K"call" || k == K"curly" || k == K"where") || (k == K"::" && is_eventually_call(ex)) + assigned_name(ex[1]) + else + ex + end +end + +#------------------------------------------------------------------------------- +# @chk: AST structure checking tool +function _chk_code(ex, cond) + cond_str = string(cond) + quote + ex = $(esc(ex)) + @assert ex isa SyntaxTree + try + ok = $(esc(cond)) + if !ok + throw(LoweringError(ex, "Expected `$($cond_str)`")) + end + catch + throw(LoweringError(ex, "Structure error evaluating `$($cond_str)`")) + end + end +end + +# Internal error checking macro. +# Check a condition involving an expression, throwing a LoweringError if it +# doesn't evaluate to true. Does some very simple pattern matching to attempt +# to extract the expression variable from the left hand side. +macro chk(cond) + ex = cond + while true + if ex isa Symbol + break + elseif ex.head == :call + ex = ex.args[2] + elseif ex.head == :ref + ex = ex.args[1] + elseif ex.head == :. + ex = ex.args[1] + elseif ex.head in (:(==), :(in), :<, :>) + ex = ex.args[1] + else + error("Can't analyze $cond") + end + end + _chk_code(ex, cond) +end + +macro chk(ex, cond) + _chk_code(ex, cond) end diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 4ec38ac1ac01f..a5c8dfa53199e 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -1,11 +1,4 @@ -""" -Unique symbolic identity for a variable within a `DesugaringContext` -""" -const VarId = Int - -struct SSAVar - id::VarId -end +# Lowering Pass 2 - syntax desugaring struct LambdaInfo # TODO: Make SyntaxList concretely typed? @@ -18,193 +11,19 @@ end struct DesugaringContext{GraphType} <: AbstractLoweringContext graph::GraphType next_var_id::Ref{VarId} + scope_layers::Vector{ScopeLayer} mod::Module end -function DesugaringContext(ctx, mod) - graph = syntax_graph(ctx) - ensure_attributes!(graph, - kind=Kind, syntax_flags=UInt16, green_tree=GreenNode, - source_pos=Int, source=SourceAttrType, - value=Any, name_val=String, - scope_type=Symbol, # :hard or :soft - var_id=VarId, - lambda_info=LambdaInfo) - DesugaringContext(freeze_attrs(graph), Ref{VarId}(1), mod) -end - -#------------------------------------------------------------------------------- -#------------------------------------------------------------------------------- -# Predicates and accessors working on expression trees - -function is_quoted(ex) - kind(ex) in KSet"quote top core globalref outerref break inert - meta inbounds inline noinline loopinfo" -end - -function is_sym_decl(x) - k = kind(x) - k == K"Identifier" || k == K"::" -end - -# Identifier made of underscores -function is_placeholder(ex) - kind(ex) == K"Identifier" && all(==('_'), ex.name_val) -end - -function is_identifier(x) - k = kind(x) - k == K"Identifier" || k == K"var" || is_operator(k) || is_macro_name(k) -end - -function is_eventually_call(ex::SyntaxTree) - k = kind(ex) - return k == K"call" || ((k == K"where" || k == K"::") && is_eventually_call(ex[1])) -end - -function is_function_def(ex) - k = kind(ex) - return k == K"function" || k == K"->" || - (k == K"=" && numchildren(ex) == 2 && is_eventually_call(ex[1])) -end - -function identifier_name(ex) - kind(ex) == K"var" ? ex[1] : ex -end - -function is_valid_name(ex) - n = identifier_name(ex).name_val - n !== "ccall" && n !== "cglobal" -end - -function decl_var(ex) - kind(ex) == K"::" ? ex[1] : ex -end - -# given a complex assignment LHS, return the symbol that will ultimately be assigned to -function assigned_name(ex) - k = kind(ex) - if (k == K"call" || k == K"curly" || k == K"where") || (k == K"::" && is_eventually_call(ex)) - assigned_name(ex[1]) - else - ex - end -end - -#------------------------------------------------------------------------------- -# Lowering pass 1.1: Simple normalizations and quote expansion -function _contains_active_interp(ex, depth) - k = kind(ex) - if k == K"$" && depth == 0 - return true - end - inner_depth = k == K"quote" ? depth + 1 : - k == K"$" ? depth - 1 : - depth - return any(_contains_active_interp(c, inner_depth) for c in children(ex)) -end - -function expand_interpolation(ctx, interp_ctx_var, ex) - @ast ctx ex [K"call" - interpolate_value::K"Value" - interp_ctx_var - ex::K"Value" - expand_forms_0(ctx, ex) - ] -end - -function expand_quote_content(ctx, interp_ctx_var, ex, depth) - if !_contains_active_interp(ex, depth) - return @ast ctx ex [K"call" - interpolate_copy_ast::K"Value" - interp_ctx_var - ex::K"Value" - ] - end - - # We have an interpolation deeper in the tree somewhere - expand to an - # expression - inner_depth = kind(ex) == K"quote" ? depth + 1 : - kind(ex) == K"$" ? depth - 1 : - depth - expanded_children = SyntaxList(ctx) - for e in children(ex) - if kind(e) == K"$" && inner_depth == 0 - for x in children(e) - push!(expanded_children, expand_interpolation(ctx, interp_ctx_var, x)) - end - else - push!(expanded_children, expand_quote_content(ctx, interp_ctx_var, e, inner_depth)) - end - end - - return @ast ctx ex [K"call" - interpolate_node::K"Value" - interp_ctx_var - ex::K"Value" - expanded_children... - ] -end - -function expand_quote(ctx, ex) - interp_ctx_var = ssavar(ctx, ex) - expanded = if kind(ex) == K"$" - @chk numchildren(ex) == 1 - e1 = ex[1] - if kind(e1) == K"..." - throw(LoweringError(e1, "`...` expression outside of call")) - end - expand_interpolation(ctx, interp_ctx_var, e1) - else - expand_quote_content(ctx, interp_ctx_var, ex, 0) - end - @ast ctx ex [K"block" - [K"=" - interp_ctx_var - [K"call" - InterpolationContext::K"Value" - ctx.mod::K"Value" - ] - ] - expanded - ] -end - -""" -The aim of this pass is to do some super simple normalizations to make -desugaring-proper easier to write. The kinds of things like identifier -normalization which would require extra logic to pervade the remaining -desugaring. -""" -function expand_forms_0(ctx::DesugaringContext, ex::SyntaxTree) - k = kind(ex) - if k == K"var" || k == K"char" || k == K"parens" - # Strip "container" nodes - @chk numchildren(ex) == 1 - ex[1] - elseif is_operator(k) && !haschildren(ex) # FIXME do in JuliaSyntax? - @ast ctx ex ex.name_val::K"Identifier" - elseif k == K"quote" - @chk numchildren(ex) == 1 - expand_quote(ctx, ex[1]) - elseif !haschildren(ex) - ex - else - mapchildren(e->expand_forms_0(ctx,e), ctx, ex) - end -end - -function expand_forms_0(ctx::DesugaringContext, exs::Union{Tuple,AbstractVector}) - res = SyntaxList(ctx) - for e in exs - push!(res, expand_forms_0(ctx, e)) - end - res -end - -#------------------------------------------------------------------------------- -# Lowering Pass 1.2 - desugaring -function expand_assignment(ctx, ex) +function DesugaringContext(ctx) + graph = ensure_attributes(syntax_graph(ctx), + kind=Kind, syntax_flags=UInt16, + source=SourceAttrType, + value=Any, name_val=String, + scope_type=Symbol, # :hard or :soft + var_id=VarId, + lambda_info=LambdaInfo) + DesugaringContext(graph, ctx.next_var_id, ctx.scope_layers, ctx.current_layer.mod) end function expand_condition(ctx, ex) @@ -213,7 +32,7 @@ function expand_condition(ctx, ex) # rather than first computing a bool and then jumping. error("TODO expand_condition") end - expand_forms(ctx, ex) + expand_forms_2(ctx, ex) end function expand_let(ctx, ex) @@ -258,7 +77,7 @@ function expand_let(ctx, ex) end function expand_call(ctx, ex) - cs = expand_forms(ctx, children(ex)) + cs = expand_forms_2(ctx, children(ex)) if is_infix_op_call(ex) || is_postfix_op_call(ex) cs[1], cs[2] = cs[2], cs[1] end @@ -562,12 +381,12 @@ function expand_module(ctx::DesugaringContext, ex::SyntaxTree) std_defs = if !has_flags(ex, JuliaSyntax.BARE_MODULE_FLAG) @ast ctx (@HERE) [ K"block" - [K"using" + [K"using"(@HERE) [K"importpath" "Base" ::K"Identifier" ] ] - [K"function" + [K"function"(@HERE) [K"call" "eval" ::K"Identifier" "x" ::K"Identifier" @@ -578,7 +397,7 @@ function expand_module(ctx::DesugaringContext, ex::SyntaxTree) "x" ::K"Identifier" ] ] - [K"function" + [K"function"(@HERE) [K"call" "include" ::K"Identifier" "x" ::K"Identifier" @@ -590,7 +409,7 @@ function expand_module(ctx::DesugaringContext, ex::SyntaxTree) "x" ::K"Identifier" ] ] - [K"function" + [K"function"(@HERE) [K"call" "include" ::K"Identifier" [K"::" @@ -627,20 +446,35 @@ function expand_module(ctx::DesugaringContext, ex::SyntaxTree) ] end -function expand_forms(ctx::DesugaringContext, ex::SyntaxTree) +""" +Lowering pass 2 - desugaring + +This pass simplifies expressions by expanding complicated syntax sugar into a +small set of core syntactic forms. For example, field access syntax `a.b` is +expanded to a function call `getproperty(a, :b)`. +""" +function expand_forms_2(ctx::DesugaringContext, ex::SyntaxTree) k = kind(ex) if k == K"call" expand_call(ctx, ex) + elseif k == K"." + @chk numchildren(ex) == 2 + @chk kind(ex[2]) == K"Identifier" + @ast ctx ex [K"call" + "getproperty"::K"top" + ex[1] + ex[2]=>K"Symbol" + ] elseif k == K"function" - expand_forms(ctx, expand_function_def(ctx, ex)) + expand_forms_2(ctx, expand_function_def(ctx, ex)) elseif k == K"let" - expand_forms(ctx, expand_let(ctx, ex)) + expand_forms_2(ctx, expand_let(ctx, ex)) elseif k == K"local" || k == K"global" if numchildren(ex) == 1 && kind(ex[1]) == K"Identifier" # Don't recurse when already simplified - `local x`, etc ex else - expand_forms(ctx, expand_decls(ctx, ex)) # FIXME + expand_forms_2(ctx, expand_decls(ctx, ex)) # FIXME end elseif is_operator(k) && !haschildren(ex) makeleaf(ctx, ex, K"Identifier", ex.name_val) @@ -653,14 +487,14 @@ function expand_forms(ctx::DesugaringContext, ex::SyntaxTree) else @ast ctx ex [K"call" "string"::K"top" - expand_forms(ctx, children(ex))... + expand_forms_2(ctx, children(ex))... ] end elseif k == K"tuple" # TODO: named tuples @ast ctx ex [K"call" "tuple"::K"core" - expand_forms(ctx, children(ex))... + expand_forms_2(ctx, children(ex))... ] elseif k == K"module" # TODO: check-toplevel @@ -689,22 +523,21 @@ function expand_forms(ctx::DesugaringContext, ex::SyntaxTree) TODO(ex, "destructuring assignment") end end - mapchildren(e->expand_forms(ctx,e), ctx, ex) + mapchildren(e->expand_forms_2(ctx,e), ctx, ex) end end -function expand_forms(ctx::DesugaringContext, exs::Union{Tuple,AbstractVector}) +function expand_forms_2(ctx::DesugaringContext, exs::Union{Tuple,AbstractVector}) res = SyntaxList(ctx) for e in exs - push!(res, expand_forms(ctx, e)) + push!(res, expand_forms_2(ctx, e)) end res end -function expand_forms(mod::Module, ex::SyntaxTree) - ctx = DesugaringContext(ex, mod) - ex1 = expand_forms_0(ctx, reparent(ctx, ex)) - ex2 = expand_forms(ctx, ex1) - ctx, ex2 +function expand_forms_2(ctx, ex::SyntaxTree) + ctx1 = DesugaringContext(ctx) + ex1 = expand_forms_2(ctx1, reparent(ctx1, ex)) + ctx1, ex1 end diff --git a/JuliaLowering/src/eval.jl b/JuliaLowering/src/eval.jl index b3966a5ac324a..2c6e96bcedb5c 100644 --- a/JuliaLowering/src/eval.jl +++ b/JuliaLowering/src/eval.jl @@ -1,8 +1,9 @@ -function lower(mod, ex) - ctx1, ex1 = expand_forms(mod, ex) - ctx2, ex2 = resolve_scopes!(ctx1, ex1) - ctx3, ex3 = linearize_ir(ctx2, ex2) - ex3 +function lower(mod::Module, ex) + ctx1, ex1 = expand_forms_1(mod, ex) + ctx2, ex2 = expand_forms_2(ctx1, ex1) + ctx3, ex3 = resolve_scopes!(ctx2, ex2) + ctx4, ex4 = linearize_ir(ctx3, ex3) + ex4 end # CodeInfo constructor. TODO: Should be in Core? @@ -130,11 +131,20 @@ function to_lowered_expr(mod, var_info, ex) GlobalRef(Core, Symbol(ex.name_val)) elseif k == K"top" GlobalRef(Base, Symbol(ex.name_val)) + elseif k == K"globalref" + if mod === ex.mod + # Implicitly refers to name in parent module. + Symbol(ex.name_val) + else + GlobalRef(ex.mod, Symbol(ex.name_val)) + end elseif k == K"Identifier" # Implicitly refers to name in parent module # TODO: Should we even have plain identifiers at this point or should # they all effectively be resolved into GlobalRef earlier? Symbol(ex.name_val) + elseif k == K"Symbol" + QuoteNode(Symbol(ex.name_val)) elseif k == K"slot" Core.SlotNumber(ex.var_id) elseif k == K"SSAValue" @@ -183,15 +193,14 @@ end # Runtime support functions called by lowering struct InterpolationContext{Graph} <: AbstractLoweringContext - mod::Module graph::Graph end -function InterpolationContext(mod) +function InterpolationContext() graph = SyntaxGraph() ensure_attributes!(graph, kind=Kind, syntax_flags=UInt16, source=SourceAttrType, - value=Any, name_val=String, mod=Module) - InterpolationContext(mod, freeze_attrs(graph)) + value=Any, name_val=String) + InterpolationContext(freeze_attrs(graph)) end # Produce interpolated node for `$x` syntax @@ -210,34 +219,7 @@ end # Produce node corresponding to `srcref` when there was an interpolation among # `children` function interpolate_node(ctx::InterpolationContext, srcref, children...) - makenode(ctx, sourceref(srcref), head(srcref), children...; mod=ctx.mod) -end - -function copy_ast(ctx, ex; new_attrs...) - if haschildren(ex) - cs = SyntaxList(ctx) - for e in children(ex) - push!(cs, interpolate_copy_ast(ctx, e)) - end - ex2 = makenode(ctx, sourceref(ex), head(ex), cs; new_attrs...) - else - ex2 = makeleaf(ctx, sourceref(ex), head(ex); new_attrs...) - end - for (name,attr) in pairs(ex.graph.attributes) - if (name !== :source && name !== :kind && name !== :syntax_flags) && - haskey(attr, ex.id) - attr2 = getattr(ex2.graph, name, nothing) - if !isnothing(attr2) - attr2[ex2.id] = attr[ex.id] - end - end - end - return ex2 -end - -# Copy AST `ex` into `ctx.graph` -function interpolate_copy_ast(ctx, ex) - copy_ast(ctx, ex; mod=ctx.mod) + makenode(ctx, sourceref(srcref), head(srcref), children...) end # Construct new bare module including only the "default names" @@ -275,7 +257,7 @@ function module_import(into_mod::Module, is_using::Bool, # For now, this function converts our lowered representation back to Expr # and calls eval() to avoid replicating all of the fiddly logic in # jl_toplevel_eval_flex. - # FIXME: ccall Julia runtime functions directly? + # TODO: ccall Julia runtime functions directly? # * jl_module_using jl_module_use_as # * import_module jl_module_import_as path_args = [] diff --git a/JuliaLowering/src/kinds.jl b/JuliaLowering/src/kinds.jl index db6ed8de0026e..3df5aff471ebe 100644 --- a/JuliaLowering/src/kinds.jl +++ b/JuliaLowering/src/kinds.jl @@ -10,6 +10,8 @@ function _insert_kinds() # A literal Julia value of any kind, as might be inserted by the AST # during macro expansion "Value" + # A (quoted) `Symbol` + "Symbol" # TODO: Use `meta` for inbounds and loopinfo etc? "inbounds" "inline" diff --git a/JuliaLowering/src/linear_ir.jl b/JuliaLowering/src/linear_ir.jl index 351398a78aa9a..9188a28ab50a0 100644 --- a/JuliaLowering/src/linear_ir.jl +++ b/JuliaLowering/src/linear_ir.jl @@ -1,5 +1,5 @@ #------------------------------------------------------------------------------- -# Lowering pass 4: Flatten to linear IR +# Lowering pass 5: Flatten to linear IR function is_simple_atom(ex) k = kind(ex) @@ -154,7 +154,7 @@ end function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) k = kind(ex) if k == K"Identifier" || is_literal(k) || k == K"SSAValue" || k == K"quote" || k == K"inert" || - k == K"top" || k == K"core" || k == K"Value" + k == K"top" || k == K"core" || k == K"Value" || k == K"Symbol" # TODO: other kinds: copyast the_exception $ globalref outerref thismodule cdecl stdcall fastcall thiscall llvmcall if in_tail_pos emit_return(ctx, ex, ex) @@ -166,7 +166,7 @@ function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) ex else if k == K"Identifier" - emit(ctx, ex) # keep symbols for undefined-var checking + emit(ctx, ex) # keep identifiers for undefined-var checking end nothing end @@ -283,7 +283,12 @@ function _renumber(ctx, ssa_rewrites, slot_rewrites, label_table, ex) makenode(ctx, ex, K"slot"; var_id=slot_id) else # TODO: look up any static parameters - ex + info = ctx.var_info[id] + if info.kind === :global + makeleaf(ctx, ex, K"globalref", ex.name_val, mod=info.mod) + else + TODO(ex, "Identifier which is not a slot or global?") + end end elseif k == K"outerref" || k == K"meta" TODO(ex, "_renumber $k") @@ -381,7 +386,8 @@ end function linearize_ir(ctx, ex) graph = ensure_attributes(ctx.graph, slot_rewrites=Dict{VarId,Int}, - var_info=Dict{VarId,VarInfo}) + var_info=Dict{VarId,VarInfo}, + mod=Module) # TODO: Cleanup needed - `_ctx` is just a dummy context here. But currently # required to call reparent() ... _ctx = LinearIRContext(graph, SyntaxList(graph), ctx.next_var_id, diff --git a/JuliaLowering/src/macro_expansion.jl b/JuliaLowering/src/macro_expansion.jl new file mode 100644 index 0000000000000..9c4e24a8c2369 --- /dev/null +++ b/JuliaLowering/src/macro_expansion.jl @@ -0,0 +1,303 @@ +# Lowering pass 1: Macro expansion, simple normalizations and quote expansion + +""" +Unique symbolic identity for a variable +""" +const VarId = Int + +const LayerId = Int + +""" +A `ScopeLayer` is a mechanism for automatic hygienic macros; every identifier +is assigned to a particular layer and can only match against bindings which are +themselves part of that layer. + +Normal code contains a single scope layer, whereas each macro expansion +generates a new layer. +""" +struct ScopeLayer + id::LayerId + mod::Module +end + +struct MacroExpansionContext{GraphType} <: AbstractLoweringContext + graph::GraphType + next_var_id::Ref{VarId} + scope_layers::Vector{ScopeLayer} + current_layer::ScopeLayer +end + +function MacroExpansionContext(ctx, mod::Module) + graph = ensure_attributes(syntax_graph(ctx), + var_id=VarId, + scope_layer=LayerId) + layers = Vector{ScopeLayer}() + MacroExpansionContext(graph, Ref{VarId}(1), layers, new_scope_layer(layers, mod)) +end + +function new_scope_layer(layers, mod::Module) + layer = ScopeLayer(length(layers)+1, mod) + push!(layers, layer) + return layer +end + +#-------------------------------------------------- +function _contains_active_interp(ex, depth) + k = kind(ex) + if k == K"$" && depth == 0 + return true + end + inner_depth = k == K"quote" ? depth + 1 : + k == K"$" ? depth - 1 : + depth + return any(_contains_active_interp(c, inner_depth) for c in children(ex)) +end + +function expand_interpolation(ctx, interp_ctx_var, ex) + @ast ctx ex [K"call" + interpolate_value::K"Value" + interp_ctx_var + [K"inert" ex] + expand_forms_1(ctx, ex) + ] +end + +# TODO: Rewrite this recursive expansion to happen partially at +# runtime rather than entirely in lowering? That is, we'd expand to +# +# interpolate_expression(ex, val1, val2, ...) +# +# where `ex` is an inert version of the quoted block and `val1, val2, ...` are +# the expressions within `$` escaping. +# +# Advantages: +# * Much more compact lowered AST +# * Clearer lowered AST - `ex` only appears once, rather than many times +# * Smaller runtime API surface area +# +# Disadvantages: +# * Recursive traversal and processing of quote depth appears both here and in +# the runtime. But can unify the traversal code? +# +# Beware tricky expansion gotchas like getting the meaning of the following correct +# +# x = 42 +# macro m() +# simplequote +# println(x) +# quote +# $x, x +# end +# end +# end +# +function expand_quote_content(ctx, interp_ctx_var, ex, depth) + if !_contains_active_interp(ex, depth) + return @ast ctx ex [K"call" + copy_ast::K"Value" + interp_ctx_var + ex::K"Value" + ] + end + + # We have an interpolation deeper in the tree somewhere - expand to an + # expression + inner_depth = kind(ex) == K"quote" ? depth + 1 : + kind(ex) == K"$" ? depth - 1 : + depth + expanded_children = SyntaxList(ctx) + for e in children(ex) + if kind(e) == K"$" && inner_depth == 0 + for x in children(e) + push!(expanded_children, expand_interpolation(ctx, interp_ctx_var, x)) + end + else + push!(expanded_children, expand_quote_content(ctx, interp_ctx_var, e, inner_depth)) + end + end + + return @ast ctx ex [K"call" + interpolate_node::K"Value" + interp_ctx_var + [K"inert" ex] + expanded_children... + ] +end + +function expand_quote(ctx, ex) + interp_ctx_var = ssavar(ctx, ex) + expanded = if kind(ex) == K"$" + @chk numchildren(ex) == 1 + e1 = ex[1] + if kind(e1) == K"..." + throw(LoweringError(e1, "`...` expression outside of call")) + end + expand_interpolation(ctx, interp_ctx_var, e1) + else + expand_quote_content(ctx, interp_ctx_var, ex, 0) + end + @ast ctx ex [K"block" + [K"=" + interp_ctx_var + [K"call" + InterpolationContext::K"Value" + ] + ] + expanded + ] +end + +#-------------------------------------------------- +struct MacroContext <: AbstractLoweringContext + graph::SyntaxGraph + macroname::SyntaxTree + mod::Module +end + +struct MacroExpansionError + context::Union{Nothing,MacroContext} + ex::SyntaxTree + msg::String +end + +function MacroExpansionError(ex::SyntaxTree, msg::AbstractString) + MacroExpansionError(nothing, ex, msg) +end + +function Base.showerror(io::IO, exc::MacroExpansionError) + print(io, "MacroExpansionError") + ctx = exc.context + if !isnothing(ctx) + print(io, " while expanding ", ctx.macroname, + " in module ", ctx.mod) + end + print(io, ":\n") + src = sourceref(exc.ex) + highlight(io, src.file, first_byte(src):last_byte(src), note=exc.msg) +end + +function maybe_set_scope_layer!(ex, id) + k = kind(ex) + if (k == K"Identifier" || k == K"MacroName" || (is_operator(k) && !haschildren(ex))) && + !hasattr(ex, :scope_layer) + setattr!(ex; scope_layer=id) + end +end + +function set_scope_layer_recursive!(ex, id) + k = kind(ex) + if k == K"module" || k == K"toplevel" + return + end + if haschildren(ex) + for c in children(ex) + set_scope_layer_recursive!(c, id) + end + else + maybe_set_scope_layer!(ex, id) + end + ex +end + +function eval_macro_name(ctx, ex) + # `ex1` might contain a nontrivial mix of scope layers so we can't just + # `eval()` it, as it's already been partially lowered by this point. + # Instead, we repeat the latter parts of `lower()` here. + ex1 = expand_forms_1(ctx, ex) + ctx2, ex2 = expand_forms_2(ctx, ex1) + ctx3, ex3 = resolve_scopes!(ctx2, ex2) + ctx4, ex4 = linearize_ir(ctx3, ex3) + mod = ctx.current_layer.mod + expr_form = to_lowered_expr(mod, ex4.var_info, ex4) + eval(mod, expr_form) +end + +function expand_macro(ctx, ex) + @assert kind(ex) == K"macrocall" + + macname = ex[1] + macfunc = eval_macro_name(ctx, macname) + # Macro call arguments may be either + # * Unprocessed by the macro expansion pass + # * Previously processed, but spliced into a further macro call emitted by + # a macro expansion. + # In either case, we need to set any unset scope layers before passing the + # arguments to the macro call. + macro_args = [set_scope_layer_recursive!(e, ctx.current_layer.id) + for e in children(ex)[2:end]] + mctx = MacroContext(ctx.graph, macname, ctx.current_layer.mod) + expanded = try + # TODO: Allow invoking old-style macros for compat + invokelatest(macfunc, mctx, macro_args...) + catch exc + if exc isa MacroExpansionError + # Add context to the error. + # TODO: Using rethrow() is kinda ugh. Is there a way to avoid it? + rethrow(MacroExpansionError(mctx, ex.ex, exc.msg)) + else + throw(MacroExpansionError(mctx, ex, "Error expanding macro")) + end + end + + if expanded isa SyntaxTree + new_layer = new_scope_layer(ctx.scope_layers, parentmodule(macfunc)) + ctx2 = MacroExpansionContext(ctx.graph, ctx.next_var_id, ctx.scope_layers, new_layer) + expand_forms_1(ctx2, expanded) + else + @ast ctx ex expanded::K"Value" + end +end + +""" +Lowering pass 1 + +This pass contains some simple expansion to make the rest of desugaring easier +to write and expands user defined macros. Macros see the surface syntax, so +need to be dealt with before other lowering. + +* Does identifier normalization +* Strips semantically irrelevant "container" nodes like parentheses +* Expands macros +* Processes quoted syntax turning `K"quote"` into `K"inert"` (eg, expanding + interpolations) +""" +function expand_forms_1(ctx::MacroExpansionContext, ex::SyntaxTree) + maybe_set_scope_layer!(ex, ctx.current_layer.id) + k = kind(ex) + if k == K"Identifier" + # TODO: Insert is_placeholder() transformation here. + ex + elseif k == K"var" || k == K"char" || k == K"parens" + # Strip "container" nodes + @chk numchildren(ex) == 1 + expand_forms_1(ctx, ex[1]) + elseif k == K"MacroName" + @ast ctx ex ex=>K"Identifier" + elseif is_operator(k) && !haschildren(ex) # TODO: do in JuliaSyntax? + @ast ctx ex ex=>K"Identifier" + elseif k == K"quote" + @chk numchildren(ex) == 1 + expand_quote(ctx, ex[1]) + elseif k == K"module" || k == K"toplevel" || k == K"inert" + ex + elseif k == K"macrocall" + expand_macro(ctx, ex) + elseif !haschildren(ex) + ex + else + mapchildren(e->expand_forms_1(ctx,e), ctx, ex) + end +end + +function expand_forms_1(ctx::MacroExpansionContext, exs::Union{Tuple,AbstractVector}) + res = SyntaxList(ctx) + for e in exs + push!(res, expand_forms_1(ctx, e)) + end + res +end + +function expand_forms_1(mod::Module, ex::SyntaxTree) + ctx = MacroExpansionContext(ex, mod) + ctx, expand_forms_1(ctx, reparent(ctx, ex)) +end diff --git a/JuliaLowering/src/scope_analysis.jl b/JuliaLowering/src/scope_analysis.jl index dae51d01bd24b..2b3108553dd37 100644 --- a/JuliaLowering/src/scope_analysis.jl +++ b/JuliaLowering/src/scope_analysis.jl @@ -1,4 +1,4 @@ -# Lowering pass 2: analyze scopes (passes 2/3 in flisp code) +# Lowering pass 3: analyze scopes (passes 2/3 in flisp code) # # This pass analyzes the names (variables/constants etc) used in scopes # @@ -52,21 +52,19 @@ function _find_scope_vars!(assignments, locals, globals, used_names, ex) k = kind(ex) if k == K"Identifier" - push!(used_names, ex.name_val) + push!(used_names, VarKey(ex)) elseif !haschildren(ex) || is_quoted(k) || k in KSet"scope_block lambda module toplevel" return elseif k == K"local" || k == K"local_def" - name = ex[1].name_val - get!(locals, name, ex) + get!(locals, VarKey(ex[1]), ex) elseif k == K"global" - name = ex[1].name_val - get!(globals, name, ex) + get!(globals, VarKey(ex[1]), ex) # elseif k == K"method" TODO static parameters elseif k == K"=" v = decl_var(ex[1]) if !(kind(v) in KSet"SSAValue globalref outerref" || is_placeholder(v)) - get!(assignments, v.name_val, v) + get!(assignments, VarKey(v), v) end _find_scope_vars!(assignments, locals, globals, used_names, ex[2]) else @@ -79,24 +77,45 @@ end # Find names of all identifiers used in the given expression, grouping them # into sets by type. # -# NB: This only works propery after expand_forms has already processed assignments +# NB: This only works propery after desugaring has already processed assignments function find_scope_vars(ex) ExT = typeof(ex) - assignments = Dict{String,ExT}() - locals = Dict{String,ExT}() - globals = Dict{String,ExT}() - used_names = Set{String}() + assignments = Dict{VarKey,ExT}() + locals = Dict{VarKey,ExT}() + globals = Dict{VarKey,ExT}() + used_names = Set{VarKey}() for e in children(ex) _find_scope_vars!(assignments, locals, globals, used_names, e) end return assignments, locals, globals, used_names end +""" +Key to use when looking up variables, composed of the name and scope layer. +""" +struct VarKey + name::String + layer::LayerId +end + +# Identifiers produced by lowering will have the following layer by default. +# +# To make new mutable variables without colliding names, lowering can +# - generate new var_id's directly (like the gensyms used by the old system) +# - create additional layers, though this may be unnecessary +const _lowering_internal_layer = -1 + +function VarKey(ex::SyntaxTree) + @chk kind(ex) == K"Identifier" + VarKey(ex.name_val, get(ex, :scope_layer, _lowering_internal_layer)) +end + """ Metadata about a variable name - whether it's a local, etc """ struct VarInfo - name::String # Variable name + name::String + mod::Union{Nothing,Module} kind::Symbol # :local :global :argument :static_parameter is_single_assign::Bool # Single assignment is_ambiguous_local::Bool # Local, but would be global in soft scope (ie, the REPL) @@ -111,7 +130,7 @@ struct ScopeInfo is_hard::Bool # Map from variable names to IDs which appear in this scope but not in the # parent scope - var_ids::Dict{String,VarId} + var_ids::Dict{VarKey,VarId} # Variables used by the enclosing lambda lambda_locals::Set{VarId} end @@ -120,8 +139,9 @@ struct ScopeResolutionContext{GraphType} <: AbstractLoweringContext graph::GraphType next_var_id::Ref{VarId} mod::Module + scope_layers::Vector{ScopeLayer} # name=>id mappings for all discovered global vars - global_vars::Dict{String,VarId} + global_vars::Dict{VarKey,VarId} # Stack of name=>id mappings for each scope, innermost scope last. scope_stack::Vector{ScopeInfo} # Metadata about variables. There's only one map for this, as var_id is is @@ -130,7 +150,7 @@ struct ScopeResolutionContext{GraphType} <: AbstractLoweringContext var_info::Dict{VarId,VarInfo} # Variables which were implicitly global due to being assigned to in top # level code - implicit_toplevel_globals::Set{String} + implicit_toplevel_globals::Set{VarKey} end function ScopeResolutionContext(ctx) @@ -138,22 +158,23 @@ function ScopeResolutionContext(ctx) ScopeResolutionContext(graph, ctx.next_var_id, ctx.mod, - Dict{String,VarId}(), + ctx.scope_layers, + Dict{VarKey,VarId}(), Vector{ScopeInfo}(), Dict{VarId,VarInfo}(), - Set{String}()) + Set{VarKey}()) end -function lookup_var(ctx, name::String, exclude_toplevel_globals=false) +function lookup_var(ctx, varkey::VarKey, exclude_toplevel_globals=false) for i in lastindex(ctx.scope_stack):-1:1 ids = ctx.scope_stack[i].var_ids - id = get(ids, name, nothing) + id = get(ids, varkey, nothing) if !isnothing(id) && (!exclude_toplevel_globals || i > 1 || ctx.var_info[id].kind != :global) return id end end - return exclude_toplevel_globals ? nothing : get(ctx.global_vars, name, nothing) + return exclude_toplevel_globals ? nothing : get(ctx.global_vars, varkey, nothing) end function current_scope(ctx) @@ -164,19 +185,20 @@ function var_kind(ctx, id::VarId) ctx.var_info[id].kind end -function var_kind(ctx, name::String, exclude_toplevel_globals=false) - id = lookup_var(ctx, name, exclude_toplevel_globals) +function var_kind(ctx, varkey::VarKey, exclude_toplevel_globals=false) + id = lookup_var(ctx, varkey, exclude_toplevel_globals) isnothing(id) ? nothing : ctx.var_info[id].kind end -function new_var(ctx, name, kind, is_ambiguous_local=false) - id = kind === :global ? get(ctx.global_vars, name, nothing) : nothing +function new_var(ctx, varkey::VarKey, kind::Symbol, is_ambiguous_local=false) + id = kind === :global ? get(ctx.global_vars, varkey, nothing) : nothing if isnothing(id) id = new_var_id(ctx) - ctx.var_info[id] = VarInfo(name, kind, false, is_ambiguous_local) + mod = kind === :global ? ctx.scope_layers[varkey.layer].mod : nothing + ctx.var_info[id] = VarInfo(varkey.name, mod, kind, false, is_ambiguous_local) end if kind === :global - ctx.global_vars[name] = id + ctx.global_vars[varkey] = id end id end @@ -184,7 +206,7 @@ end # Analyze identifier usage within a scope, adding all newly discovered # identifiers to ctx.var_info and constructing a lookup table from identifier # names to their variable IDs -function make_scope(ctx, ex, scope_type, lambda_info) +function analyze_scope(ctx, ex, scope_type, lambda_info) parentscope = isempty(ctx.scope_stack) ? nothing : current_scope(ctx) is_outer_lambda_scope = kind(ex) == K"lambda" is_toplevel = !isnothing(lambda_info) && lambda_info.is_toplevel_thunk @@ -194,48 +216,50 @@ function make_scope(ctx, ex, scope_type, lambda_info) # Create new lookup table for variables in this scope which differ from the # parent scope. - var_ids = Dict{String,VarId}() + var_ids = Dict{VarKey,VarId}() # Add lambda arguments if !isnothing(lambda_info) for a in lambda_info.args - var_ids[a.name_val] = new_var(ctx, a.name_val, :argument) + vk = VarKey(a) + var_ids[vk] = new_var(ctx, vk, :argument) end for a in lambda_info.static_parameters - var_ids[a.name_val] = new_var(ctx, a.name_val, :static_parameter) + vk = VarKey(a) + var_ids[vk] = new_var(ctx, vk, :static_parameter) end end # Add explicit locals - for (name,e) in pairs(locals) - if haskey(globals, name) - throw(LoweringError(e, "Variable `$name` declared both local and global")) - elseif haskey(var_ids, name) - vk = ctx.var_info[var_ids[name]].kind + for (varkey,e) in pairs(locals) + if haskey(globals, varkey) + throw(LoweringError(e, "Variable `$(varkey.name)` declared both local and global")) + elseif haskey(var_ids, varkey) + vk = ctx.var_info[var_ids[varkey]].kind if vk === :argument && is_outer_lambda_scope - throw(LoweringError(e, "local variable name `$name` conflicts with an argument")) + throw(LoweringError(e, "local variable name `$(varkey.name)` conflicts with an argument")) elseif vk === :static_parameter - throw(LoweringError(e, "local variable name `$name` conflicts with a static parameter")) + throw(LoweringError(e, "local variable name `$(varkey.name)` conflicts with a static parameter")) end - elseif var_kind(ctx, name) === :static_parameter - throw(LoweringError(e, "local variable name `$name` conflicts with a static parameter")) + elseif var_kind(ctx, varkey) === :static_parameter + throw(LoweringError(e, "local variable name `$(varkey.name)` conflicts with a static parameter")) end - var_ids[name] = new_var(ctx, name, :local) + var_ids[varkey] = new_var(ctx, varkey, :local) end # Add explicit globals - for (name,e) in pairs(globals) - if haskey(var_ids, name) - vk = ctx.var_info[var_ids[name]].kind + for (varkey,e) in pairs(globals) + if haskey(var_ids, varkey) + vk = ctx.var_info[var_ids[varkey]].kind if vk === :argument && is_outer_lambda_scope - throw(LoweringError(e, "global variable name `$name` conflicts with an argument")) + throw(LoweringError(e, "global variable name `$(varkey.name)` conflicts with an argument")) elseif vk === :static_parameter - throw(LoweringError(e, "global variable name `$name` conflicts with a static parameter")) + throw(LoweringError(e, "global variable name `$(varkey.name)` conflicts with a static parameter")) end elseif var_kind(ctx, name) === :static_parameter - throw(LoweringError(e, "global variable name `$name` conflicts with a static parameter")) + throw(LoweringError(e, "global variable name `$(varkey.name)` conflicts with a static parameter")) end - var_ids[name] = new_var(ctx, name, :global) + var_ids[varkey] = new_var(ctx, varkey, :global) end # Compute implicit locals and globals @@ -244,10 +268,10 @@ function make_scope(ctx, ex, scope_type, lambda_info) is_soft_scope = false # All non-local assignments are implicitly global at top level - for (name,e) in assignments - if !haskey(locals, name) - new_var(ctx, name, :global) - push!(ctx.implicit_toplevel_globals, name) + for (varkey,e) in assignments + if !haskey(locals, varkey) + new_var(ctx, varkey, :global) + push!(ctx.implicit_toplevel_globals, varkey) end end else @@ -256,25 +280,28 @@ function make_scope(ctx, ex, scope_type, lambda_info) (scope_type === :neutral ? parentscope.is_soft : scope_type === :soft) # Outside top level code, most assignments create local variables implicitly - for (name,e) in assignments - vk = haskey(var_ids, name) ? - ctx.var_info[var_ids[name]].kind : - var_kind(ctx, name, true) + for (varkey,e) in assignments + vk = haskey(var_ids, varkey) ? + ctx.var_info[var_ids[varkey]].kind : + var_kind(ctx, varkey, true) if vk === :static_parameter - throw(LoweringError(e, "local variable name `$name` conflicts with a static parameter")) + throw(LoweringError(e, "local variable name `$(varkey.name)` conflicts with a static parameter")) elseif vk !== nothing continue end # Assignment is to a newly discovered variable name is_ambiguous_local = false if in_toplevel_thunk && !is_hard_scope + # FIXME: Scope resolution: find module for varkey # In a top level thunk but *inside* a nontrivial scope - if (name in ctx.implicit_toplevel_globals || isdefined(ctx.mod, Symbol(name))) + var_mod = ctx.scope_layers[varkey.layer].mod + if (varkey in ctx.implicit_toplevel_globals || + isdefined(var_mod, Symbol(varkey.name))) # Special scope rules to make assignments to globals work # like assignments to locals do inside a function. if is_soft_scope # Soft scope (eg, for loop in REPL) => treat as a global - new_var(ctx, name, :global) + new_var(ctx, varkey, :global) continue else # Ambiguous case (eg, nontrivial scopes in package top level code) @@ -283,14 +310,14 @@ function make_scope(ctx, ex, scope_type, lambda_info) end end end - var_ids[name] = new_var(ctx, name, :local, is_ambiguous_local) + var_ids[varkey] = new_var(ctx, varkey, :local, is_ambiguous_local) end end - for name in used - if lookup_var(ctx, name) === nothing + for varkey in used + if lookup_var(ctx, varkey) === nothing # Add other newly discovered identifiers as globals - new_var(ctx, name, :global) + new_var(ctx, varkey, :global) end end @@ -311,8 +338,7 @@ function _resolve_scopes!(ctx, ex) if is_placeholder(ex) return # FIXME - make these K"placeholder"? end - # TODO: Maybe we shouldn't do this in place?? - id = lookup_var(ctx, ex.name_val) + id = lookup_var(ctx, VarKey(ex)) setattr!(ctx.graph, ex.id, var_id=id) elseif !haschildren(ex) || is_quoted(ex) || k == K"toplevel" return @@ -324,7 +350,7 @@ function _resolve_scopes!(ctx, ex) # elseif islocal elseif k == K"lambda" lambda_info = ex.lambda_info - scope = make_scope(ctx, ex, nothing, lambda_info) + scope = analyze_scope(ctx, ex, nothing, lambda_info) push!(ctx.scope_stack, scope) # Resolve args and static parameters so that variable IDs get pushed # back into the original tree (not required for downstream processing) @@ -340,7 +366,7 @@ function _resolve_scopes!(ctx, ex) pop!(ctx.scope_stack) setattr!(ctx.graph, ex.id, lambda_locals=scope.lambda_locals) elseif k == K"scope_block" - scope = make_scope(ctx, ex, ex.scope_type, nothing) + scope = analyze_scope(ctx, ex, ex.scope_type, nothing) push!(ctx.scope_stack, scope) for e in children(ex) _resolve_scopes!(ctx, e) diff --git a/JuliaLowering/src/syntax_graph.jl b/JuliaLowering/src/syntax_graph.jl index 9804dd35b8bb4..b9f3e527341ab 100644 --- a/JuliaLowering/src/syntax_graph.jl +++ b/JuliaLowering/src/syntax_graph.jl @@ -116,7 +116,7 @@ function Base.getproperty(graph::SyntaxGraph, name::Symbol) return getattr(graph, name) end -function sethead!(graph, id::NodeId, h::SyntaxHead) +function sethead!(graph, id::NodeId, h::JuliaSyntax.SyntaxHead) graph.kind[id] = kind(h) f = flags(h) if f != 0 @@ -227,7 +227,7 @@ function JuliaSyntax.child(tree::SyntaxTree, i::Integer) end function JuliaSyntax.head(tree::SyntaxTree) - SyntaxHead(kind(tree), flags(tree)) + JuliaSyntax.SyntaxHead(kind(tree), flags(tree)) end function JuliaSyntax.kind(tree::SyntaxTree) @@ -244,7 +244,7 @@ struct SourceRef file::SourceFile first_byte::Int # TODO: Do we need the green node, or would last_byte suffice? - green_tree::GreenNode + green_tree::JuliaSyntax.GreenNode end JuliaSyntax.first_byte(src::SourceRef) = src.first_byte @@ -263,8 +263,16 @@ JuliaSyntax.filename(src::LineNumberNode) = string(src.file) JuliaSyntax.source_location(::Type{LineNumberNode}, src::LineNumberNode) = src JuliaSyntax.source_location(src::LineNumberNode) = (src.line, 0) +function JuliaSyntax.highlight(io::IO, src::LineNumberNode; note="") + print(io, src, " - ", note, "\n") +end + +function JuliaSyntax.highlight(io::IO, src::SourceRef; kws...) + highlight(io, src.file, first_byte(src):last_byte(src); kws...) +end + function Base.show(io::IO, ::MIME"text/plain", src::SourceRef) - highlight(io, src.file, first_byte(src):last_byte(src), note="these are the bytes you're looking for 😊", context_lines_inner=20) + highlight(io, src; note="these are the bytes you're looking for 😊", context_lines_inner=20) end function sourceref(tree::SyntaxTree) @@ -305,10 +313,12 @@ attrsummary(name, value::Number) = "$name=$value" function _value_string(ex) k = kind(ex) - str = k == K"Identifier" || is_operator(k) ? ex.name_val : + str = k == K"Identifier" || k == K"MacroName" || is_operator(k) ? ex.name_val : k == K"SSAValue" ? "ssa" : k == K"core" ? "core.$(ex.name_val)" : k == K"top" ? "top.$(ex.name_val)" : + k == K"Symbol" ? ":$(ex.name_val)" : + k == K"globalref" ? "$(ex.mod).$(ex.name_val)" : k == K"slot" ? "slot" : repr(get(ex, :value, nothing)) id = get(ex, :var_id, nothing) @@ -429,7 +439,7 @@ function SyntaxList(graph::SyntaxGraph, ids::AbstractVector{NodeId}) end SyntaxList(graph::SyntaxGraph) = SyntaxList(graph, Vector{NodeId}()) -SyntaxList(ctx) = SyntaxList(ctx.graph) +SyntaxList(ctx) = SyntaxList(syntax_graph(ctx)) syntax_graph(lst::SyntaxList) = lst.graph diff --git a/JuliaLowering/src/utils.jl b/JuliaLowering/src/utils.jl index 81714a8e1b5ff..ebd97cfabd5a0 100644 --- a/JuliaLowering/src/utils.jl +++ b/JuliaLowering/src/utils.jl @@ -13,50 +13,6 @@ end function Base.showerror(io::IO, exc::LoweringError) print(io, "LoweringError:\n") src = sourceref(exc.ex) - highlight(io, src.file, first_byte(src):last_byte(src), note=exc.msg) -end - -function _chk_code(ex, cond) - cond_str = string(cond) - quote - ex = $(esc(ex)) - @assert ex isa SyntaxTree - try - ok = $(esc(cond)) - if !ok - throw(LoweringError(ex, "Expected `$($cond_str)`")) - end - catch - throw(LoweringError(ex, "Structure error evaluating `$($cond_str)`")) - end - end -end - -# Internal error checking macro. -# Check a condition involving an expression, throwing a LoweringError if it -# doesn't evaluate to true. Does some very simple pattern matching to attempt -# to extract the expression variable from the left hand side. -macro chk(cond) - ex = cond - while true - if ex isa Symbol - break - elseif ex.head == :call - ex = ex.args[2] - elseif ex.head == :ref - ex = ex.args[1] - elseif ex.head == :. - ex = ex.args[1] - elseif ex.head in (:(==), :(in), :<, :>) - ex = ex.args[1] - else - error("Can't analyze $cond") - end - end - _chk_code(ex, cond) -end - -macro chk(ex, cond) - _chk_code(ex, cond) + highlight(io, src; note=exc.msg) end diff --git a/JuliaLowering/test/demo.jl b/JuliaLowering/test/demo.jl index 23dc5d21b3244..6fdf0ae6f3d03 100644 --- a/JuliaLowering/test/demo.jl +++ b/JuliaLowering/test/demo.jl @@ -5,14 +5,6 @@ using JuliaLowering using JuliaLowering: SyntaxGraph, SyntaxTree, ensure_attributes!, newnode!, setchildren!, haschildren, children, child, setattr!, sourceref, makenode -function wrapscope(ex, scope_type) - makenode(ex, ex, K"scope_block", ex; scope_type=scope_type) -end - -function softscope_test(ex) - wrapscope(wrapscope(ex, :neutral), :soft) -end - #------------------------------------------------------------------------------- # Demos of the prototype @@ -39,87 +31,164 @@ let end """ +src = """ +begin + function f(x) + nothing + end + + f(1) +end +""" + # src = """ -# begin -# function f(x) -# y = x + 1 -# "hello world", x, y +# x + y +# """ + +# src = """ +# module A +# function f(x)::Int +# x + 1 # end # -# f(1) +# b = f(2) # end # """ # src = """ -# x = 1 +# function f() +# end # """ - +# # src = """ -# x + y +# # import A.B: C.c as d, E.e as f +# # import JuliaLowering +# using JuliaLowering +# """ +# +# src = """ +# module A +# z = 1 + 1 +# end +# """ +# +# src = """ +# begin +# x = 10 +# y = :(g(z)) +# quote +# f(\$(x+1), \$y) +# end +# end # """ -src = """ -module A - function f(x)::Int - x + 1 +module M + using JuliaLowering: @ast, @chk + using JuliaSyntax + + const someglobal = "global in M" + + # TODO: macrocall in macro call + # module A + # function var"@bar"(mctx, ex) + # end + # end + + # Macro with local variables + function var"@foo"(mctx, ex) + # :(let x = "local in @asdf expansion" + # (x, someglobal, $ex) + # end) + @ast mctx (@HERE) [K"let" + [K"block"(@HERE) + [K"="(@HERE) + "x"::K"Identifier"(@HERE) + "local in @asdf expansion"::K"String"(@HERE) + ] + ] + [K"block"(@HERE) + [K"tuple"(@HERE) + "x"::K"Identifier"(@HERE) + "someglobal"::K"Identifier"(@HERE) + ex + ] + ] + ] end - b = f(2) + # Recursive macro call + function var"@recursive"(mctx, N) + @chk kind(N) == K"Integer" + Nval = N.value::Int + if Nval < 1 + return N + end + # quote + # x = $N + # (@recursive $(Nval-1), x) + # end + @ast mctx (@HERE) [K"block" + [K"="(@HERE) + "x"::K"Identifier"(@HERE) + N + ] + [K"tuple"(@HERE) + "x"::K"Identifier"(@HERE) + [K"macrocall"(@HERE) + "@recursive"::K"Identifier" + (Nval-1)::K"Integer" + ] + ] + ] + end end -""" -src = """ -function f() -end -""" +# src = """ +# let +# x = 42 +# M.@foo x +# end +# """ src = """ -# import A.B: C.c as d, E.e as f -# import JuliaLowering -using JuliaLowering +M.@recursive 3 """ src = """ -module A - z = 1 + 1 +begin + x = 2 end """ -src = """ -begin - x = 10 - y = :(g(z)) - quote - f(\$(x+1), \$y) - end +function wrapscope(ex, scope_type) + makenode(ex, ex, K"scope_block", ex; scope_type=scope_type) +end + +function softscope_test(ex) + g = JuliaLowering.ensure_attributes(ex.graph, scope_type=Symbol) + wrapscope(wrapscope(JuliaLowering.reparent(g, ex), :neutral), :soft) end -""" -ex = parsestmt(SyntaxTree, src, filename="foo.jl") -# t = softscope_test(t) +ex = softscope_test(parsestmt(SyntaxTree, src, filename="foo.jl")) @info "Input code" ex in_mod = Main -ctx, ex_desugar = JuliaLowering.expand_forms(in_mod, ex) +ctx1, ex_macroexpand = JuliaLowering.expand_forms_1(in_mod, ex) +# @info "Macro expanded" ex_macroexpand + +ctx2, ex_desugar = JuliaLowering.expand_forms_2(ctx1, ex_macroexpand) @info "Desugared" ex_desugar -ctx2, ex_scoped = JuliaLowering.resolve_scopes!(ctx, ex_desugar) +ctx3, ex_scoped = JuliaLowering.resolve_scopes!(ctx2, ex_desugar) @info "Resolved scopes" ex_scoped -ctx3, ex_compiled = JuliaLowering.linearize_ir(ctx2, ex_scoped) +ctx4, ex_compiled = JuliaLowering.linearize_ir(ctx3, ex_scoped) @info "Linear IR" ex_compiled -ex_expr = JuliaLowering.to_lowered_expr(in_mod, ctx2.var_info, ex_compiled) +ex_expr = JuliaLowering.to_lowered_expr(in_mod, ctx4.var_info, ex_compiled) @info "CodeInfo" ex_expr -x = 100 -y = 200 +x = 1 eval_result = Base.eval(in_mod, ex_expr) @info "Eval" eval_result -# Syntax tree ideas: Want following to work? -# This can be fully inferrable! -# -# t2[3].bindings[1].lhs.string -# t2[3].body[1].signature - diff --git a/JuliaLowering/test/runtests.jl b/JuliaLowering/test/runtests.jl index a276cb4415b76..3b1d0dce84d34 100644 --- a/JuliaLowering/test/runtests.jl +++ b/JuliaLowering/test/runtests.jl @@ -13,6 +13,8 @@ include("utils.jl") test_mod = Module() +#------------------------------------------------------------------------------- +# Scopes @test JuliaLowering.include_string(test_mod, """ let @@ -25,7 +27,36 @@ let end """) == (1, 2) +JuliaLowering.include_string(test_mod, """ + x = 101 + y = 202 +""") +@test test_mod.x == 101 +@test test_mod.y == 202 +@test JuliaLowering.include_string(test_mod, "x + y") == 303 + +# wrap expression in scope block of `scope_type` +function wrapscope(ex, scope_type) + g = JuliaLowering.ensure_attributes(ex.graph, scope_type=Symbol) + ex = JuliaLowering.reparent(g, ex) + makenode(ex, ex, K"scope_block", ex; scope_type=scope_type) +end +assign_z_2 = parsestmt(SyntaxTree, "begin z = 2 end", filename="foo.jl") +JuliaLowering.eval(test_mod, :(z=1)) +@test test_mod.z == 1 +# neutral (eg, for loops) and hard (eg, let) scopes create a new binding for z +JuliaLowering.eval(test_mod, wrapscope(assign_z_2, :neutral)) +@test test_mod.z == 1 +JuliaLowering.eval(test_mod, wrapscope(assign_z_2, :hard)) +@test test_mod.z == 1 +# but wrapping neutral scope in soft scope uses the existing binding in test_mod +JuliaLowering.eval(test_mod, wrapscope(wrapscope(assign_z_2, :neutral), :soft)) +@test test_mod.z == 2 + + +#------------------------------------------------------------------------------- +# Functions @test JuliaLowering.include_string(test_mod, """ begin function f(x) @@ -38,14 +69,7 @@ end """) == ("hi", 1, 2) -JuliaLowering.include_string(test_mod, """ - x = 101 - y = 202 -""") -@test test_mod.x == 101 -@test test_mod.y == 202 -@test JuliaLowering.include_string(test_mod, "x + y") == 303 - +#------------------------------------------------------------------------------- # module A = JuliaLowering.include_string(test_mod, """ module A @@ -68,6 +92,7 @@ end @test !isdefined(B, :include) @test !isdefined(B, :Base) +#------------------------------------------------------------------------------- # using / import JuliaLowering.include_string(test_mod, """ using JuliaSyntax @@ -95,6 +120,7 @@ end """) @test C.D.f === C.E.f +#------------------------------------------------------------------------------- # Syntax quoting & interpolation ex = JuliaLowering.include_string(test_mod, """ begin @@ -119,4 +145,78 @@ end @test sourcetext(ex[1][2]) == "x+1" @test sourcetext(ex[1][3]) == "g(z)" +#------------------------------------------------------------------------------- +# Macro expansion + +Base.eval(test_mod, :( +module M + using JuliaLowering: @ast, @chk + using JuliaSyntax + + const someglobal = "global in module M" + + # Macro with local variables + function var"@foo"(mctx, ex) + # TODO + # :(let x = "local in @foo expansion" + # (x, someglobal, $ex) + # end) + @ast mctx (@HERE) [K"let" + [K"block"(@HERE) + [K"="(@HERE) + "x"::K"Identifier"(@HERE) + "`x` from @foo"::K"String"(@HERE) + ] + ] + [K"block"(@HERE) + [K"tuple"(@HERE) + "x"::K"Identifier"(@HERE) + "someglobal"::K"Identifier"(@HERE) + ex + ] + ] + ] + end + + # Recursive macro call + function var"@recursive"(mctx, N) + @chk kind(N) == K"Integer" + Nval = N.value::Int + if Nval < 1 + return N + end + # TODO + # quote + # x = $N + # (@recursive $(Nval-1), x) + # end + @ast mctx (@HERE) [K"block" + [K"="(@HERE) + "x"::K"Identifier"(@HERE) + N + ] + [K"tuple"(@HERE) + "x"::K"Identifier"(@HERE) + [K"macrocall"(@HERE) + "@recursive"::K"Identifier" + (Nval-1)::K"Integer" + ] + ] + ] + end +end +)) + +@test JuliaLowering.include_string(test_mod, """ +let + x = "`x` from outer scope" + M.@foo x +end +""") == ("`x` from @foo", "global in module M", "`x` from outer scope") + + +@test JuliaLowering.include_string(test_mod, """ +M.@recursive 3 +""") == (3, (2, (1, 0))) + end From abeb3876bd4acaa0a902440497ec7404a5cf87ec Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Wed, 15 May 2024 18:24:52 +1000 Subject: [PATCH 0746/1109] Lowering of macro definitions --- JuliaLowering/src/ast.jl | 90 ++++++++++--------- JuliaLowering/src/desugaring.jl | 60 +++++++++++-- JuliaLowering/src/eval.jl | 2 +- JuliaLowering/src/macro_expansion.jl | 6 ++ JuliaLowering/src/syntax_graph.jl | 8 ++ JuliaLowering/test/demo.jl | 90 ++++++++++--------- JuliaLowering/test/runtests.jl | 129 ++++++++++++++++----------- 7 files changed, 245 insertions(+), 140 deletions(-) diff --git a/JuliaLowering/src/ast.jl b/JuliaLowering/src/ast.jl index 0b8c7bd39d05d..30bf50f931f46 100644 --- a/JuliaLowering/src/ast.jl +++ b/JuliaLowering/src/ast.jl @@ -331,19 +331,28 @@ function is_function_def(ex) (k == K"=" && numchildren(ex) == 2 && is_eventually_call(ex[1])) end -function identifier_name(ex) - kind(ex) == K"var" ? ex[1] : ex -end - function is_valid_name(ex) n = identifier_name(ex).name_val n !== "ccall" && n !== "cglobal" end +function identifier_name(ex) + kind(ex) == K"var" ? ex[1] : ex +end + function decl_var(ex) kind(ex) == K"::" ? ex[1] : ex end +# Remove empty parameters block, eg, in the arg list of `f(x, y;)` +function remove_empty_parameters(args) + i = length(args) + while i > 0 && kind(args[i]) == K"parameters" && numchildren(args[i]) == 0 + i -= 1 + end + args[1:i] +end + # given a complex assignment LHS, return the symbol that will ultimately be assigned to function assigned_name(ex) k = kind(ex) @@ -355,48 +364,49 @@ function assigned_name(ex) end #------------------------------------------------------------------------------- -# @chk: AST structure checking tool -function _chk_code(ex, cond) - cond_str = string(cond) +# @chk: Basic AST structure checking tool +# +# Check a condition involving an expression, throwing a LoweringError if it +# doesn't evaluate to true. Does some very simple pattern matching to attempt +# to extract the expression variable from the left hand side. +# +# Forms: +# @chk pred(ex) +# @chk pred(ex) msg +# @chk pred(ex) (msg_display_ex, msg) +macro chk(cond, msg=nothing) + if Meta.isexpr(msg, :tuple) + ex = msg.args[1] + msg = msg.args[2] + else + ex = cond + while true + if ex isa Symbol + break + elseif ex.head == :call + ex = ex.args[2] + elseif ex.head == :ref + ex = ex.args[1] + elseif ex.head == :. + ex = ex.args[1] + elseif ex.head in (:(==), :(in), :<, :>) + ex = ex.args[1] + else + error("Can't analyze $cond") + end + end + end quote ex = $(esc(ex)) @assert ex isa SyntaxTree - try - ok = $(esc(cond)) - if !ok - throw(LoweringError(ex, "Expected `$($cond_str)`")) - end + ok = try + $(esc(cond)) catch - throw(LoweringError(ex, "Structure error evaluating `$($cond_str)`")) + false end - end -end - -# Internal error checking macro. -# Check a condition involving an expression, throwing a LoweringError if it -# doesn't evaluate to true. Does some very simple pattern matching to attempt -# to extract the expression variable from the left hand side. -macro chk(cond) - ex = cond - while true - if ex isa Symbol - break - elseif ex.head == :call - ex = ex.args[2] - elseif ex.head == :ref - ex = ex.args[1] - elseif ex.head == :. - ex = ex.args[1] - elseif ex.head in (:(==), :(in), :<, :>) - ex = ex.args[1] - else - error("Can't analyze $cond") + if !ok + throw(LoweringError(ex, $(isnothing(msg) ? "expected `$cond`" : esc(msg)))) end end - _chk_code(ex, cond) -end - -macro chk(ex, cond) - _chk_code(ex, cond) end diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index a5c8dfa53199e..bf2472c78064f 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -155,7 +155,7 @@ function analyze_function_arg(full_ex) end break elseif k == K"..." - @chk full_ex !is_slurp + @chk !is_slurp (full_ex,"nested `...` in function argument") @chk numchildren(ex) == 1 is_slurp = true ex = ex[1] @@ -298,6 +298,43 @@ function expand_function_def(ctx, ex) end end +function _make_macro_name(ctx, name) + @chk kind(name) == K"Identifier" (name, "invalid macro name") + ex = mapleaf(ctx, name, K"Identifier") + ex.name_val = "@$(name.name_val)" + ex +end + +# flisp: expand-macro-def +function expand_macro_def(ctx, ex) + @chk numchildren(ex) >= 1 (ex,"invalid macro definition") + if numchildren(ex) == 1 + name = ex[1] + # macro with zero methods + # `macro m end` + return @ast ctx ex [K"function" _make_macro_name(ctx, name)] + end + # TODO: Making this manual pattern matching robust is such a pain!!! + sig = ex[1] + @chk (kind(sig) == K"call" && numchildren(sig) >= 1) (sig, "invalid macro signature") + name = sig[1] + args = remove_empty_parameters(children(sig)) + @chk kind(args[end]) != K"parameters" (args[end], "macros cannot accept keyword arguments") + ret = @ast ctx ex [K"function" + [K"call"(sig) + _make_macro_name(ctx, name) + [K"::" + "__context__"::K"Identifier"(scope_layer=name.scope_layer) + MacroContext::K"Value" + ] + # flisp: We don't mark these @nospecialize because all arguments to + # new macros will be of type SyntaxTree + args[2:end]... + ] + ex[2] + ] +end + function _append_importpath(ctx, path_spec, path) prev_was_dot = true for component in children(path) @@ -460,13 +497,17 @@ function expand_forms_2(ctx::DesugaringContext, ex::SyntaxTree) elseif k == K"." @chk numchildren(ex) == 2 @chk kind(ex[2]) == K"Identifier" - @ast ctx ex [K"call" - "getproperty"::K"top" - ex[1] - ex[2]=>K"Symbol" - ] + expand_forms_2(ctx, + @ast ctx ex [K"call" + "getproperty"::K"top" + ex[1] + ex[2]=>K"Symbol" + ] + ) elseif k == K"function" expand_forms_2(ctx, expand_function_def(ctx, ex)) + elseif k == K"macro" + expand_forms_2(ctx, expand_macro_def(ctx, ex)) elseif k == K"let" expand_forms_2(ctx, expand_let(ctx, ex)) elseif k == K"local" || k == K"global" @@ -504,6 +545,11 @@ function expand_forms_2(ctx::DesugaringContext, ex::SyntaxTree) expand_import(ctx, ex) elseif k == K"export" || k == K"public" TODO(ex) + elseif k == K"ref" + if numchildren(ex) > 2 + TODO(ex, "ref expansion") + end + expand_forms_2(ctx, @ast ctx ex [K"call" "getindex"::K"top" ex[1] ex[2]]) elseif k == K"toplevel" # The toplevel form can't be lowered here - it needs to just be quoted # and passed through to a call to eval. @@ -514,6 +560,8 @@ function expand_forms_2(ctx::DesugaringContext, ex::SyntaxTree) ctx.mod ::K"Value" [K"inert" ex] ] + elseif k == K"inert" + ex elseif !haschildren(ex) ex else diff --git a/JuliaLowering/src/eval.jl b/JuliaLowering/src/eval.jl index 2c6e96bcedb5c..6e4179f4545c1 100644 --- a/JuliaLowering/src/eval.jl +++ b/JuliaLowering/src/eval.jl @@ -199,7 +199,7 @@ end function InterpolationContext() graph = SyntaxGraph() ensure_attributes!(graph, kind=Kind, syntax_flags=UInt16, source=SourceAttrType, - value=Any, name_val=String) + value=Any, name_val=String, scope_layer=LayerId) InterpolationContext(freeze_attrs(graph)) end diff --git a/JuliaLowering/src/macro_expansion.jl b/JuliaLowering/src/macro_expansion.jl index 9c4e24a8c2369..24b59c8fee5e3 100644 --- a/JuliaLowering/src/macro_expansion.jl +++ b/JuliaLowering/src/macro_expansion.jl @@ -240,6 +240,12 @@ function expand_macro(ctx, ex) end if expanded isa SyntaxTree + if syntax_graph(expanded) !== syntax_graph(ctx) + # If the macro has produced syntax outside the macro context, copy it over. + # TODO: Do we expect this always to happen? What is the API for access + # to the macro expansion context? + expanded = copy_ast(ctx, expanded) + end new_layer = new_scope_layer(ctx.scope_layers, parentmodule(macfunc)) ctx2 = MacroExpansionContext(ctx.graph, ctx.next_var_id, ctx.scope_layers, new_layer) expand_forms_1(ctx2, expanded) diff --git a/JuliaLowering/src/syntax_graph.jl b/JuliaLowering/src/syntax_graph.jl index b9f3e527341ab..c6731b0496b5c 100644 --- a/JuliaLowering/src/syntax_graph.jl +++ b/JuliaLowering/src/syntax_graph.jl @@ -173,6 +173,10 @@ function Base.getproperty(tree::SyntaxTree, name::Symbol) end end +function Base.setproperty!(tree::SyntaxTree, name::Symbol, val) + return setattr!(tree.graph, tree.id; name=>val) +end + function Base.propertynames(tree::SyntaxTree) attrnames(tree) end @@ -449,6 +453,10 @@ Base.IndexStyle(::Type{<:SyntaxList}) = IndexLinear() Base.getindex(v::SyntaxList, i::Int) = SyntaxTree(v.graph, v.ids[i]) +function Base.getindex(v::SyntaxList, r::UnitRange) + SyntaxList(v.graph, view(v.ids, r)) +end + function Base.setindex!(v::SyntaxList, tree::SyntaxTree, i::Int) v.graph === tree.graph || error("Mismatching syntax graphs") v.ids[i] = tree.id diff --git a/JuliaLowering/test/demo.jl b/JuliaLowering/test/demo.jl index 6fdf0ae6f3d03..9e4c321e7b616 100644 --- a/JuliaLowering/test/demo.jl +++ b/JuliaLowering/test/demo.jl @@ -82,42 +82,38 @@ end # end # """ +JuliaLowering.include_string(Main, """ module M - using JuliaLowering: @ast, @chk + using JuliaLowering: JuliaLowering, @ast, @chk using JuliaSyntax - const someglobal = "global in M" + # Introspection + macro __MODULE__() + __context__.mod + end + + macro __FILE__() + JuliaLowering.filename(__context__.macroname) + end + + macro __LINE__() + JuliaLowering.source_location(__context__.macroname)[1] + end - # TODO: macrocall in macro call - # module A - # function var"@bar"(mctx, ex) - # end - # end + someglobal = "global in module M" # Macro with local variables - function var"@foo"(mctx, ex) - # :(let x = "local in @asdf expansion" - # (x, someglobal, $ex) - # end) - @ast mctx (@HERE) [K"let" - [K"block"(@HERE) - [K"="(@HERE) - "x"::K"Identifier"(@HERE) - "local in @asdf expansion"::K"String"(@HERE) - ] - ] - [K"block"(@HERE) - [K"tuple"(@HERE) - "x"::K"Identifier"(@HERE) - "someglobal"::K"Identifier"(@HERE) - ex - ] - ] - ] + macro foo(ex) + :(let x = "`x` from @foo" + (x, someglobal, \$ex) + end) end +end +""") +Base.eval(M, quote # Recursive macro call - function var"@recursive"(mctx, N) + function var"@recursive"(__context__, N) @chk kind(N) == K"Integer" Nval = N.value::Int if Nval < 1 @@ -127,7 +123,7 @@ module M # x = $N # (@recursive $(Nval-1), x) # end - @ast mctx (@HERE) [K"block" + @ast __context__ (@HERE) [K"block" [K"="(@HERE) "x"::K"Identifier"(@HERE) N @@ -141,6 +137,15 @@ module M ] ] end +end) + +function wrapscope(ex, scope_type) + makenode(ex, ex, K"scope_block", ex; scope_type=scope_type) +end + +function softscope_test(ex) + g = JuliaLowering.ensure_attributes(ex.graph, scope_type=Symbol) + wrapscope(wrapscope(JuliaLowering.reparent(g, ex), :neutral), :soft) end # src = """ @@ -154,27 +159,26 @@ src = """ M.@recursive 3 """ -src = """ -begin - x = 2 -end -""" - -function wrapscope(ex, scope_type) - makenode(ex, ex, K"scope_block", ex; scope_type=scope_type) -end +# src = """ +# macro mmm(a; b=2) +# end +# macro A.b(ex) +# end +# """ -function softscope_test(ex) - g = JuliaLowering.ensure_attributes(ex.graph, scope_type=Symbol) - wrapscope(wrapscope(JuliaLowering.reparent(g, ex), :neutral), :soft) -end +# TODO: +# "hygiene bending" / (being unhygenic, or bending hygiene to the context of a +# macro argument on purpose) +# * bend to macro name to get to parent layer? +# * already needed in `#self#` argument -ex = softscope_test(parsestmt(SyntaxTree, src, filename="foo.jl")) +ex = parsestmt(SyntaxTree, src, filename="foo.jl") +#ex = softscope_test(ex) @info "Input code" ex in_mod = Main ctx1, ex_macroexpand = JuliaLowering.expand_forms_1(in_mod, ex) -# @info "Macro expanded" ex_macroexpand +@info "Macro expanded" ex_macroexpand ctx2, ex_desugar = JuliaLowering.expand_forms_2(ctx1, ex_macroexpand) @info "Desugared" ex_desugar diff --git a/JuliaLowering/test/runtests.jl b/JuliaLowering/test/runtests.jl index 3b1d0dce84d34..0e378f4c29984 100644 --- a/JuliaLowering/test/runtests.jl +++ b/JuliaLowering/test/runtests.jl @@ -145,67 +145,58 @@ end @test sourcetext(ex[1][2]) == "x+1" @test sourcetext(ex[1][3]) == "g(z)" +# Test expression flags are preserved during interpolation +@test JuliaSyntax.is_infix_op_call(JuliaLowering.include_string(test_mod, """ +let + x = 1 + :(\$x + \$x) +end +""")) + #------------------------------------------------------------------------------- # Macro expansion -Base.eval(test_mod, :( +JuliaLowering.include_string(test_mod, """ module M - using JuliaLowering: @ast, @chk + using JuliaLowering: JuliaLowering, @ast, @chk using JuliaSyntax - const someglobal = "global in module M" + # Introspection + macro __MODULE__() + __context__.mod + end - # Macro with local variables - function var"@foo"(mctx, ex) - # TODO - # :(let x = "local in @foo expansion" - # (x, someglobal, $ex) - # end) - @ast mctx (@HERE) [K"let" - [K"block"(@HERE) - [K"="(@HERE) - "x"::K"Identifier"(@HERE) - "`x` from @foo"::K"String"(@HERE) - ] - ] - [K"block"(@HERE) - [K"tuple"(@HERE) - "x"::K"Identifier"(@HERE) - "someglobal"::K"Identifier"(@HERE) - ex - ] - ] - ] + macro __FILE__() + JuliaLowering.filename(__context__.macroname) end - # Recursive macro call - function var"@recursive"(mctx, N) - @chk kind(N) == K"Integer" - Nval = N.value::Int - if Nval < 1 - return N - end - # TODO - # quote - # x = $N - # (@recursive $(Nval-1), x) - # end - @ast mctx (@HERE) [K"block" - [K"="(@HERE) - "x"::K"Identifier"(@HERE) - N - ] - [K"tuple"(@HERE) - "x"::K"Identifier"(@HERE) - [K"macrocall"(@HERE) - "@recursive"::K"Identifier" - (Nval-1)::K"Integer" - ] - ] - ] + macro __LINE__() + JuliaLowering.source_location(__context__.macroname)[1] + end + + someglobal = "global in module M" + + # Macro with local variables + macro foo(ex) + :(let x = "`x` from @foo" + (x, someglobal, \$ex) + end) end + + # # Recursive macro call + # # TODO: Need branching! + # macro recursive(N) + # Nval = N.value #::Int + # if Nval < 1 + # return N + # end + # quote + # x = \$N + # (@recursive \$(Nval-1), x) + # end + # end end -)) +""") @test JuliaLowering.include_string(test_mod, """ let @@ -214,9 +205,47 @@ let end """) == ("`x` from @foo", "global in module M", "`x` from outer scope") +@test JuliaLowering.include_string(test_mod, """ +#line1 +(M.@__MODULE__(), M.@__FILE__(), M.@__LINE__()) +""", "foo.jl") == (test_mod, "foo.jl", 2) + +Base.eval(test_mod.M, :( +# Recursive macro call +function var"@recursive"(mctx, N) + @chk kind(N) == K"Integer" + Nval = N.value::Int + if Nval < 1 + return N + end + @ast mctx (@HERE) [K"block" + [K"="(@HERE) + "x"::K"Identifier"(@HERE) + N + ] + [K"tuple"(@HERE) + "x"::K"Identifier"(@HERE) + [K"macrocall"(@HERE) + "@recursive"::K"Identifier" + (Nval-1)::K"Integer" + ] + ] + ] +end +)) @test JuliaLowering.include_string(test_mod, """ M.@recursive 3 """) == (3, (2, (1, 0))) +@test_throws JuliaLowering.LoweringError JuliaLowering.include_string(test_mod, """ +macro mmm(a; b=2) +end +""") + +@test_throws JuliaLowering.LoweringError JuliaLowering.include_string(test_mod, """ +macro A.b(ex) +end +""") + end From d3a2502f3d2d040eeecf5954dfeedcaaf9034fd8 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sat, 18 May 2024 15:56:07 +1000 Subject: [PATCH 0747/1109] Make assignments in macro expansions implicitly local When expanding a macro such as ```julia macro m() quote x = 100 end end ``` defined in module `M` in a module `X` we need to make some decisions: * Is `x` global or local? In which module? * Is x renamed? Like other symbols referenced in code emitted by `@m`, it seems that `x` should "belong to `M`". When `@m` is expanded at top level in `X`, "uniform" top level semantics would imply `M.x` is set, but this seems unlikely to be what the macro author intended - it changes the meaning of `@m` drastically depending on whether it's expanded within a scope block or not, eg, `begin @m end` vs `let ; @m end`. The current macro expander renames `x` to some internal variable name like `var"JuliaLang/JuliaLowering.jl#1376#x"`, but still sets it at top level in `X`. This is also unlikely to be useful - it creates an unnecessary junk GC root to a value which will never be referenced again. Instead, this change makes all such assignments to symbols generated by the macro implicitly local. --- JuliaLowering/src/desugaring.jl | 2 ++ JuliaLowering/src/macro_expansion.jl | 9 ++++---- JuliaLowering/src/scope_analysis.jl | 34 +++++++++++++++++----------- JuliaLowering/test/demo.jl | 32 +++++++++++++++++++------- JuliaLowering/test/runtests.jl | 29 +++++++++++++++++++++++- 5 files changed, 80 insertions(+), 26 deletions(-) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index bf2472c78064f..3c35617c4c64c 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -537,6 +537,8 @@ function expand_forms_2(ctx::DesugaringContext, ex::SyntaxTree) "tuple"::K"core" expand_forms_2(ctx, children(ex))... ] + elseif k == K"$" + throw(LoweringError(ex, "`\$` expression outside quote")) elseif k == K"module" # TODO: check-toplevel expand_module(ctx, ex) diff --git a/JuliaLowering/src/macro_expansion.jl b/JuliaLowering/src/macro_expansion.jl index 24b59c8fee5e3..25c4d8d02dc38 100644 --- a/JuliaLowering/src/macro_expansion.jl +++ b/JuliaLowering/src/macro_expansion.jl @@ -18,6 +18,7 @@ generates a new layer. struct ScopeLayer id::LayerId mod::Module + is_macro_expansion::Bool end struct MacroExpansionContext{GraphType} <: AbstractLoweringContext @@ -32,11 +33,11 @@ function MacroExpansionContext(ctx, mod::Module) var_id=VarId, scope_layer=LayerId) layers = Vector{ScopeLayer}() - MacroExpansionContext(graph, Ref{VarId}(1), layers, new_scope_layer(layers, mod)) + MacroExpansionContext(graph, Ref{VarId}(1), layers, new_scope_layer(layers, mod, false)) end -function new_scope_layer(layers, mod::Module) - layer = ScopeLayer(length(layers)+1, mod) +function new_scope_layer(layers, mod::Module, is_macro_expansion) + layer = ScopeLayer(length(layers)+1, mod, is_macro_expansion) push!(layers, layer) return layer end @@ -246,7 +247,7 @@ function expand_macro(ctx, ex) # to the macro expansion context? expanded = copy_ast(ctx, expanded) end - new_layer = new_scope_layer(ctx.scope_layers, parentmodule(macfunc)) + new_layer = new_scope_layer(ctx.scope_layers, parentmodule(macfunc), true) ctx2 = MacroExpansionContext(ctx.graph, ctx.next_var_id, ctx.scope_layers, new_layer) expand_forms_1(ctx2, expanded) else diff --git a/JuliaLowering/src/scope_analysis.jl b/JuliaLowering/src/scope_analysis.jl index 2b3108553dd37..365fe684b94b9 100644 --- a/JuliaLowering/src/scope_analysis.jl +++ b/JuliaLowering/src/scope_analysis.jl @@ -190,6 +190,7 @@ function var_kind(ctx, varkey::VarKey, exclude_toplevel_globals=false) isnothing(id) ? nothing : ctx.var_info[id].kind end +# FIXME: This name is a misnomer now. It's more like "maybe_new_var" ... function new_var(ctx, varkey::VarKey, kind::Symbol, is_ambiguous_local=false) id = kind === :global ? get(ctx.global_vars, varkey, nothing) : nothing if isnothing(id) @@ -221,12 +222,12 @@ function analyze_scope(ctx, ex, scope_type, lambda_info) # Add lambda arguments if !isnothing(lambda_info) for a in lambda_info.args - vk = VarKey(a) - var_ids[vk] = new_var(ctx, vk, :argument) + varkey = VarKey(a) + var_ids[varkey] = new_var(ctx, varkey, :argument) end for a in lambda_info.static_parameters - vk = VarKey(a) - var_ids[vk] = new_var(ctx, vk, :static_parameter) + varkey = VarKey(a) + var_ids[varkey] = new_var(ctx, varkey, :static_parameter) end end @@ -256,7 +257,7 @@ function analyze_scope(ctx, ex, scope_type, lambda_info) elseif vk === :static_parameter throw(LoweringError(e, "global variable name `$(varkey.name)` conflicts with a static parameter")) end - elseif var_kind(ctx, name) === :static_parameter + elseif var_kind(ctx, varkey) === :static_parameter throw(LoweringError(e, "global variable name `$(varkey.name)` conflicts with a static parameter")) end var_ids[varkey] = new_var(ctx, varkey, :global) @@ -267,11 +268,19 @@ function analyze_scope(ctx, ex, scope_type, lambda_info) is_hard_scope = false is_soft_scope = false - # All non-local assignments are implicitly global at top level + # Assignments are implicitly global at top level, unless they come from + # a macro expansion for (varkey,e) in assignments - if !haskey(locals, varkey) - new_var(ctx, varkey, :global) - push!(ctx.implicit_toplevel_globals, varkey) + vk = haskey(var_ids, varkey) ? + ctx.var_info[var_ids[varkey]].kind : + var_kind(ctx, varkey, true) + if vk === nothing + if ctx.scope_layers[varkey.layer].is_macro_expansion + var_ids[varkey] = new_var(ctx, varkey, :local) + else + new_var(ctx, varkey, :global) + push!(ctx.implicit_toplevel_globals, varkey) + end end end else @@ -292,11 +301,10 @@ function analyze_scope(ctx, ex, scope_type, lambda_info) # Assignment is to a newly discovered variable name is_ambiguous_local = false if in_toplevel_thunk && !is_hard_scope - # FIXME: Scope resolution: find module for varkey # In a top level thunk but *inside* a nontrivial scope - var_mod = ctx.scope_layers[varkey.layer].mod - if (varkey in ctx.implicit_toplevel_globals || - isdefined(var_mod, Symbol(varkey.name))) + layer = ctx.scope_layers[varkey.layer] + if !layer.is_macro_expansion && (varkey in ctx.implicit_toplevel_globals || + isdefined(layer.mod, Symbol(varkey.name))) # Special scope rules to make assignments to globals work # like assignments to locals do inside a function. if is_soft_scope diff --git a/JuliaLowering/test/demo.jl b/JuliaLowering/test/demo.jl index 9e4c321e7b616..e84debfbaa153 100644 --- a/JuliaLowering/test/demo.jl +++ b/JuliaLowering/test/demo.jl @@ -104,10 +104,19 @@ module M # Macro with local variables macro foo(ex) - :(let x = "`x` from @foo" + :(begin + x = "`x` from @foo" (x, someglobal, \$ex) end) end + + a_global = nothing + + macro set_a_global(val) + :(begin + global a_global = \$val + end) + end end """) @@ -148,17 +157,24 @@ function softscope_test(ex) wrapscope(wrapscope(JuliaLowering.reparent(g, ex), :neutral), :soft) end -# src = """ -# let -# x = 42 -# M.@foo x -# end -# """ +src = """ +begin + x = 42 + M.@foo x +end +""" src = """ -M.@recursive 3 +begin + M.@set_a_global 42 + M.a_global +end """ +# src = """ +# M.@recursive 3 +# """ + # src = """ # macro mmm(a; b=2) # end diff --git a/JuliaLowering/test/runtests.jl b/JuliaLowering/test/runtests.jl index 0e378f4c29984..68a3f0fd109c9 100644 --- a/JuliaLowering/test/runtests.jl +++ b/JuliaLowering/test/runtests.jl @@ -178,11 +178,25 @@ module M # Macro with local variables macro foo(ex) - :(let x = "`x` from @foo" + :(begin + x = "`x` from @foo" (x, someglobal, \$ex) end) end + # Set `a_global` in M + macro set_a_global(val) + :(begin + global a_global = \$val + end) + end + + macro set_other_global(ex, val) + :(begin + global \$ex = \$val + end) + end + # # Recursive macro call # # TODO: Need branching! # macro recursive(N) @@ -204,12 +218,25 @@ let M.@foo x end """) == ("`x` from @foo", "global in module M", "`x` from outer scope") +@test !isdefined(test_mod.M, :x) @test JuliaLowering.include_string(test_mod, """ #line1 (M.@__MODULE__(), M.@__FILE__(), M.@__LINE__()) """, "foo.jl") == (test_mod, "foo.jl", 2) +@test !isdefined(test_mod.M, :a_global) +@test JuliaLowering.include_string(test_mod, """ +begin + M.@set_a_global 42 + M.a_global +end +""") == 42 + +JuliaLowering.include_string(test_mod, "M.@set_other_global global_in_test_mod 100") +@test !isdefined(test_mod.M, :global_in_test_mod) +@test test_mod.global_in_test_mod == 100 + Base.eval(test_mod.M, :( # Recursive macro call function var"@recursive"(mctx, N) From 7d1160d60376045076c508ed29a0f451834804aa Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Wed, 22 May 2024 15:42:58 +1000 Subject: [PATCH 0748/1109] Update CodeInfo layout to Julia version 1.12.0-DEV.512 --- JuliaLowering/README.md | 6 +- JuliaLowering/src/eval.jl | 192 +++++++++++++++++++++----------------- 2 files changed, 110 insertions(+), 88 deletions(-) diff --git a/JuliaLowering/README.md b/JuliaLowering/README.md index 4f093dbd347cd..ab359de8567ca 100644 --- a/JuliaLowering/README.md +++ b/JuliaLowering/README.md @@ -28,8 +28,10 @@ This work is intended to Note this is a very early work in progress; most things probably don't work! -1. Check out the caf/lowering-2 branch of JuliaSyntax.jl -2. Run the demo `include("test/lowering.jl")` +1. Use a recent dev version of Julia (need at least version 1.12.0-DEV.512) +2. Check out the caf/lowering-2 branch of [JuliaSyntax](https://github.com/JuliaLang/JuliaSyntax.jl) +3. Get the latest version of [JuliaSyntaxFormatter](https://github.com/c42f/JuliaSyntaxFormatter.jl) +4. Run the demo `include("test/demo.jl")` # Design Notes diff --git a/JuliaLowering/src/eval.jl b/JuliaLowering/src/eval.jl index 6e4179f4545c1..2552efd376a4a 100644 --- a/JuliaLowering/src/eval.jl +++ b/JuliaLowering/src/eval.jl @@ -6,79 +6,74 @@ function lower(mod::Module, ex) ex4 end -# CodeInfo constructor. TODO: Should be in Core? -function _CodeInfo(code, - codelocs, - ssavaluetypes, - ssaflags, - method_for_inference_limit_heuristics, - linetable, - slotnames, - slotflags, - slottypes, - rettype, - parent, - edges, - min_world, - max_world, - inferred, - propagate_inbounds, - has_fcall, - nospecializeinfer, - inlining, - constprop, - purity, - inlining_cost) - @eval $(Expr(:new, :(Core.CodeInfo), - convert(Vector{Any}, code), - convert(Vector{Int32}, codelocs), - convert(Any, ssavaluetypes), - convert(Vector{UInt32}, ssaflags), - convert(Any, method_for_inference_limit_heuristics), - convert(Any, linetable), - convert(Vector{Symbol}, slotnames), - convert(Vector{UInt8}, slotflags), - convert(Any, slottypes), - convert(Any, rettype), - convert(Any, parent), - convert(Any, edges), - convert(UInt64, min_world), - convert(UInt64, max_world), - convert(Bool, inferred), - convert(Bool, propagate_inbounds), - convert(Bool, has_fcall), - convert(Bool, nospecializeinfer), - convert(UInt8, inlining), - convert(UInt8, constprop), - convert(UInt16, purity), - convert(UInt16, inlining_cost))) +_CodeInfo_need_ver = v"1.12.0-DEV.512" +if VERSION < _CodeInfo_need_ver + function _CodeInfo(args...) + error("Constructing a CodeInfo using JuliaLowering currently requires Julia version $_CodeInfo_need_ver or greater") + end +else + # debuginfo changed completely as of https://github.com/JuliaLang/julia/pull/52415 + # nargs / isva was added as of https://github.com/JuliaLang/julia/pull/54341 + # CodeInfo constructor. TODO: Should be in Core + let + fns = fieldnames(Core.CodeInfo) + fts = fieldtypes(Core.CodeInfo) + conversions = [:(convert($t, $n)) for (t,n) in zip(fts, fns)] + + expected_fns = (:code, :debuginfo, :ssavaluetypes, :ssaflags, :slotnames, :slotflags, :slottypes, :parent, :method_for_inference_limit_heuristics, :edges, :min_world, :max_world, :nargs, :propagate_inbounds, :has_fcall, :nospecializeinfer, :isva, :inlining, :constprop, :purity, :inlining_cost) + expected_fts = (Vector{Any}, Core.DebugInfo, Any, Vector{UInt32}, Vector{Symbol}, Vector{UInt8}, Any, Any, Any, Any, UInt64, UInt64, UInt64, Bool, Bool, Bool, Bool, UInt8, UInt8, UInt16, UInt16) + + code = if fns != expected_fns || fts != expected_fts + :(function _CodeInfo(args...) + error("Unrecognized CodeInfo layout: Maybe version $VERSION is to new for this version of JuliaLowering?") + end) + else + :(function _CodeInfo($(fns...)) + $(Expr(:new, :(Core.CodeInfo), conversions...)) + end) + end + + eval(@__MODULE__, code) + end +end + +function ir_debug_info(ex) + code = children(ex) + # Record low resolution locations in debug info + num_stmts = length(code) + codelocs = zeros(Int32, 3*num_stmts) + + topfile = Symbol(filename(ex)) + topline,_ = source_location(ex) + + edges = Core.DebugInfo[] + + for i in 1:num_stmts + line,_ = source_location(code[i]) + # TODO: Macro inlining stack filename(code[i]) + codelocs[3*i-2] = line + codelocs[3*i-1] = 0 # Index into edges + codelocs[3*i ] = 0 # Index into edges[linetable] + end + + codelocs = @ccall jl_compress_codelocs(topline::Int32, codelocs::Any, + num_stmts::Csize_t)::String + edges = Core.svec(edges...) + Core.DebugInfo(topfile, nothing, edges, codelocs) end # Convert SyntaxTree to the CodeInfo+Expr data stuctures understood by the # Julia runtime -function to_code_info(ex, mod, funcname, var_info, slot_rewrites) +function to_code_info(ex, mod, funcname, nargs, var_info, slot_rewrites) input_code = children(ex) - # Convert code to Expr and record low res locations in table - num_stmts = length(input_code) - code = Vector{Any}(undef, num_stmts) - codelocs = Vector{Int32}(undef, num_stmts) - linetable_map = Dict{Tuple{Int,String}, Int32}() - linetable = Any[] - for i in 1:length(code) - code[i] = to_lowered_expr(mod, var_info, input_code[i]) - fname = filename(input_code[i]) - lineno, _ = source_location(input_code[i]) - loc = (lineno, fname) - codelocs[i] = get!(linetable_map, loc) do - inlined_at = 0 # FIXME: nonzero for expanded macros - full_loc = Core.LineInfoNode(mod, Symbol(funcname), Symbol(fname), - Int32(lineno), Int32(inlined_at)) - push!(linetable, full_loc) - length(linetable) - end - end + code = Any[to_lowered_expr(mod, var_info, ex) for ex in input_code] + + debuginfo = ir_debug_info(ex) - # FIXME + # TODO: Set ssaflags based on call site annotations: + # - @inbounds annotations + # - call site @inline / @noinline + # - call site @assume_effects ssaflags = zeros(UInt32, length(code)) nslots = length(slot_rewrites) @@ -97,29 +92,53 @@ function to_code_info(ex, mod, funcname, var_info, slot_rewrites) slotflags[i] = 0x00 # FIXME!! end + # TODO: Set true for @propagate_inbounds + propagate_inbounds = false + # TODO: Set true if there's a foreigncall + has_fcall = false + # TODO: Set for @nospecializeinfer + nospecializeinfer = false + # TODO: Set based on @inline -> 0x01 or @noinline -> 0x02 + inlining = 0x00 + # TODO: Set based on @constprop :aggressive -> 0x01 or @constprop :none -> 0x02 + constprop = 0x00 + # TODO: Set based on Base.@assume_effects + purity = 0x0000 + + # The following CodeInfo fields always get their default values for + # uninferred code. + ssavaluetypes = length(code) # Why does the runtime code do this? + slottypes = nothing + parent = nothing + method_for_inference_limit_heuristics = nothing + edges = nothing + min_world = Csize_t(1) + max_world = typemax(Csize_t) + isva = false + inlining_cost = 0xffff + _CodeInfo( code, - codelocs, - num_stmts, # ssavaluetypes (why put num_stmts in here??) + debuginfo, + ssavaluetypes, ssaflags, - nothing, # method_for_inference_limit_heuristics - linetable, slotnames, slotflags, - nothing, # slottypes - Any, # rettype - nothing, # parent - nothing, # edges - Csize_t(1), # min_world - typemax(Csize_t), # max_world - false, # inferred - false, # propagate_inbounds - false, # has_fcall - false, # nospecializeinfer - 0x00, # inlining - 0x00, # constprop - 0x0000, # purity - 0xffff, # inlining_cost + slottypes, + parent, + method_for_inference_limit_heuristics, + edges, + min_world, + max_world, + nargs, + propagate_inbounds, + has_fcall, + nospecializeinfer, + isva, + inlining, + constprop, + purity, + inlining_cost ) end @@ -161,7 +180,8 @@ function to_lowered_expr(mod, var_info, ex) funcname = ex.lambda_info.is_toplevel_thunk ? "top-level scope" : "none" # FIXME - ir = to_code_info(ex[1], mod, funcname, var_info, ex.slot_rewrites) + nargs = length(ex.lambda_info.args) + ir = to_code_info(ex[1], mod, funcname, nargs, var_info, ex.slot_rewrites) if ex.lambda_info.is_toplevel_thunk Expr(:thunk, ir) else From 4d25a58c1dd453456f5909cc872be9bbbacdd4e5 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Wed, 22 May 2024 15:48:38 +1000 Subject: [PATCH 0749/1109] Implement splatting into function calls --- JuliaLowering/src/desugaring.jl | 51 +++++++++++++++++++++++++---- JuliaLowering/src/scope_analysis.jl | 6 +--- JuliaLowering/test/runtests.jl | 18 ++++++++++ 3 files changed, 63 insertions(+), 12 deletions(-) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 3c35617c4c64c..92eca167745bf 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -76,13 +76,50 @@ function expand_let(ctx, ex) return blk end +# Wrap unsplatted arguments in `tuple`: +# `[a, b, xs..., c]` -> `[(a, b), xs, (c,)]` +function _wrap_unsplatted_args(ctx, call_ex, args) + wrapped = SyntaxList(ctx) + i = 1 + while i <= length(args) + if kind(args[i]) == K"..." + splatarg = args[i] + @chk numchildren(splatarg) == 1 + push!(wrapped, splatarg[1]) + else + i1 = i + # Find range of non-splatted args + while i < length(args) && kind(args[i+1]) != K"..." + i += 1 + end + push!(wrapped, @ast ctx call_ex [K"call" "tuple"::K"core" args[i1:i]...]) + end + i += 1 + end + wrapped +end + function expand_call(ctx, ex) - cs = expand_forms_2(ctx, children(ex)) - if is_infix_op_call(ex) || is_postfix_op_call(ex) - cs[1], cs[2] = cs[2], cs[1] + cs = children(ex) + if is_infix_op_call(ex) + @chk numchildren(ex) == 3 + cs = [cs[2], cs[1], cs[3]] + elseif is_postfix_op_call(ex) + @chk numchildren(ex) == 2 + cs = [cs[2], cs[1]] end # TODO: keywords - @ast ctx ex [K"call" cs...] + if any(kind(c) == K"..." for c in cs) + # Splatting, eg, `f(a, xs..., b)` + @ast ctx ex [K"call" + "_apply_iterate"::K"core" + "iterate"::K"top" + expand_forms_2(ctx, cs[1]) + expand_forms_2(ctx, _wrap_unsplatted_args(ctx, ex, cs[2:end]))... + ] + else + @ast ctx ex [K"call" expand_forms_2(ctx, cs)...] + end end # Strip variable type declarations from within a `local` or `global`, returning @@ -533,10 +570,10 @@ function expand_forms_2(ctx::DesugaringContext, ex::SyntaxTree) end elseif k == K"tuple" # TODO: named tuples - @ast ctx ex [K"call" + expand_forms_2(ctx, @ast ctx ex [K"call" "tuple"::K"core" - expand_forms_2(ctx, children(ex))... - ] + children(ex)... + ]) elseif k == K"$" throw(LoweringError(ex, "`\$` expression outside quote")) elseif k == K"module" diff --git a/JuliaLowering/src/scope_analysis.jl b/JuliaLowering/src/scope_analysis.jl index 365fe684b94b9..9f43975cdadeb 100644 --- a/JuliaLowering/src/scope_analysis.jl +++ b/JuliaLowering/src/scope_analysis.jl @@ -177,10 +177,6 @@ function lookup_var(ctx, varkey::VarKey, exclude_toplevel_globals=false) return exclude_toplevel_globals ? nothing : get(ctx.global_vars, varkey, nothing) end -function current_scope(ctx) - last(ctx.scope_stack) -end - function var_kind(ctx, id::VarId) ctx.var_info[id].kind end @@ -208,7 +204,7 @@ end # identifiers to ctx.var_info and constructing a lookup table from identifier # names to their variable IDs function analyze_scope(ctx, ex, scope_type, lambda_info) - parentscope = isempty(ctx.scope_stack) ? nothing : current_scope(ctx) + parentscope = isempty(ctx.scope_stack) ? nothing : ctx.scope_stack[end] is_outer_lambda_scope = kind(ex) == K"lambda" is_toplevel = !isnothing(lambda_info) && lambda_info.is_toplevel_thunk in_toplevel_thunk = is_toplevel || (!is_outer_lambda_scope && parentscope.in_toplevel_thunk) diff --git a/JuliaLowering/test/runtests.jl b/JuliaLowering/test/runtests.jl index 68a3f0fd109c9..5d89ce9616468 100644 --- a/JuliaLowering/test/runtests.jl +++ b/JuliaLowering/test/runtests.jl @@ -54,6 +54,24 @@ JuliaLowering.eval(test_mod, wrapscope(assign_z_2, :hard)) JuliaLowering.eval(test_mod, wrapscope(wrapscope(assign_z_2, :neutral), :soft)) @test test_mod.z == 2 +#------------------------------------------------------------------------------- +# Function calls +# Splatting +@test JuliaLowering.include_string(test_mod, """ +let + x = 1 + y = 2 + zs = (3,4) + w = 5 + (tuple(zs...), + tuple(zs..., w), + tuple(y, zs...), + tuple(x, y, zs..., w)) +end +""") == ((3,4), + (3,4,5), + (2,3,4), + (1,2,3,4,5)) #------------------------------------------------------------------------------- # Functions From 2294ecb3efb2620926b25384c880009b04b24eed Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Wed, 22 May 2024 19:53:40 +1000 Subject: [PATCH 0750/1109] Hygiene bending via `adopt_scope()` --- JuliaLowering/src/ast.jl | 26 ++++++++++++++++++++++- JuliaLowering/src/desugaring.jl | 3 ++- JuliaLowering/src/macro_expansion.jl | 31 +++++++++++++--------------- JuliaLowering/test/demo.jl | 22 ++++++++++++++------ JuliaLowering/test/runtests.jl | 17 +++++++++++++-- 5 files changed, 72 insertions(+), 27 deletions(-) diff --git a/JuliaLowering/src/ast.jl b/JuliaLowering/src/ast.jl index 30bf50f931f46..8ec27ad952f04 100644 --- a/JuliaLowering/src/ast.jl +++ b/JuliaLowering/src/ast.jl @@ -1,6 +1,13 @@ #------------------------------------------------------------------------------- abstract type AbstractLoweringContext end +""" +Unique symbolic identity for a variable +""" +const VarId = Int + +const LayerId = Int + function syntax_graph(ctx::AbstractLoweringContext) ctx.graph end @@ -274,7 +281,9 @@ function mapchildren(f, ctx, ex) return ex2 end -# Copy AST `ex` into `ctx` +""" +Copy AST `ex` into `ctx` +""" function copy_ast(ctx, ex) if haschildren(ex) cs = SyntaxList(ctx) @@ -297,6 +306,21 @@ function copy_ast(ctx, ex) return ex2 end +""" + adopt_scope(ex, ref) + +Copy `ex`, adopting the scope layer of `ref`. +""" +function adopt_scope(ex, scope_layer::LayerId) + ex1 = copy_ast(ex, ex) + set_scope_layer_recursive!(ex1, scope_layer, true) + ex1 +end + +function adopt_scope(ex, ref::SyntaxTree) + adopt_scope(ex, ref.scope_layer) +end + #------------------------------------------------------------------------------- # Predicates and accessors working on expression trees diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 92eca167745bf..0e2ae44ecac5b 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -357,11 +357,12 @@ function expand_macro_def(ctx, ex) name = sig[1] args = remove_empty_parameters(children(sig)) @chk kind(args[end]) != K"parameters" (args[end], "macros cannot accept keyword arguments") + context_arg = adopt_scope(@ast(ctx, sig, "__context__"::K"Identifier"), name) ret = @ast ctx ex [K"function" [K"call"(sig) _make_macro_name(ctx, name) [K"::" - "__context__"::K"Identifier"(scope_layer=name.scope_layer) + context_arg MacroContext::K"Value" ] # flisp: We don't mark these @nospecialize because all arguments to diff --git a/JuliaLowering/src/macro_expansion.jl b/JuliaLowering/src/macro_expansion.jl index 25c4d8d02dc38..37ee43f3e8ce9 100644 --- a/JuliaLowering/src/macro_expansion.jl +++ b/JuliaLowering/src/macro_expansion.jl @@ -1,12 +1,5 @@ # Lowering pass 1: Macro expansion, simple normalizations and quote expansion -""" -Unique symbolic identity for a variable -""" -const VarId = Int - -const LayerId = Int - """ A `ScopeLayer` is a mechanism for automatic hygienic macros; every identifier is assigned to a particular layer and can only match against bindings which are @@ -152,7 +145,11 @@ end struct MacroContext <: AbstractLoweringContext graph::SyntaxGraph macroname::SyntaxTree - mod::Module + scope_layer::ScopeLayer +end + +function adopt_scope(ex, ctx::MacroContext) + adopt_scope(ex, ctx.scope_layer.id) end struct MacroExpansionError @@ -170,32 +167,32 @@ function Base.showerror(io::IO, exc::MacroExpansionError) ctx = exc.context if !isnothing(ctx) print(io, " while expanding ", ctx.macroname, - " in module ", ctx.mod) + " in module ", ctx.scope_layer.mod) end print(io, ":\n") src = sourceref(exc.ex) highlight(io, src.file, first_byte(src):last_byte(src), note=exc.msg) end -function maybe_set_scope_layer!(ex, id) +function set_scope_layer!(ex, id, force) k = kind(ex) if (k == K"Identifier" || k == K"MacroName" || (is_operator(k) && !haschildren(ex))) && - !hasattr(ex, :scope_layer) + (force || !hasattr(ex, :scope_layer)) setattr!(ex; scope_layer=id) end end -function set_scope_layer_recursive!(ex, id) +function set_scope_layer_recursive!(ex, id, force) k = kind(ex) if k == K"module" || k == K"toplevel" return end if haschildren(ex) for c in children(ex) - set_scope_layer_recursive!(c, id) + set_scope_layer_recursive!(c, id, force) end else - maybe_set_scope_layer!(ex, id) + set_scope_layer!(ex, id, force) end ex end @@ -224,9 +221,9 @@ function expand_macro(ctx, ex) # a macro expansion. # In either case, we need to set any unset scope layers before passing the # arguments to the macro call. - macro_args = [set_scope_layer_recursive!(e, ctx.current_layer.id) + macro_args = [set_scope_layer_recursive!(e, ctx.current_layer.id, false) for e in children(ex)[2:end]] - mctx = MacroContext(ctx.graph, macname, ctx.current_layer.mod) + mctx = MacroContext(ctx.graph, macname, ctx.current_layer) expanded = try # TODO: Allow invoking old-style macros for compat invokelatest(macfunc, mctx, macro_args...) @@ -269,7 +266,7 @@ need to be dealt with before other lowering. interpolations) """ function expand_forms_1(ctx::MacroExpansionContext, ex::SyntaxTree) - maybe_set_scope_layer!(ex, ctx.current_layer.id) + set_scope_layer!(ex, ctx.current_layer.id, false) k = kind(ex) if k == K"Identifier" # TODO: Insert is_placeholder() transformation here. diff --git a/JuliaLowering/test/demo.jl b/JuliaLowering/test/demo.jl index e84debfbaa153..7d4ae1b17f33c 100644 --- a/JuliaLowering/test/demo.jl +++ b/JuliaLowering/test/demo.jl @@ -84,12 +84,12 @@ end JuliaLowering.include_string(Main, """ module M - using JuliaLowering: JuliaLowering, @ast, @chk + using JuliaLowering: JuliaLowering, @ast, @chk, adopt_scope using JuliaSyntax # Introspection macro __MODULE__() - __context__.mod + __context__.scope_layer.mod end macro __FILE__() @@ -110,19 +110,29 @@ module M end) end - a_global = nothing - macro set_a_global(val) :(begin global a_global = \$val end) end + + macro set_global_in_parent(ex) + e1 = adopt_scope(:(sym_introduced_from_M), __context__) + quote + \$e1 = \$ex + end + end end """) Base.eval(M, quote + function var"@inert"(__context__::JuliaLowering.MacroContext, ex) + @chk kind(ex) == K"quote" + @ast __context__ ex [K"inert" ex] + end + # Recursive macro call - function var"@recursive"(__context__, N) + function var"@recursive"(__context__::JuliaLowering.MacroContext, N) @chk kind(N) == K"Integer" Nval = N.value::Int if Nval < 1 @@ -153,7 +163,7 @@ function wrapscope(ex, scope_type) end function softscope_test(ex) - g = JuliaLowering.ensure_attributes(ex.graph, scope_type=Symbol) + g = ensure_attributes(ex.graph, scope_type=Symbol) wrapscope(wrapscope(JuliaLowering.reparent(g, ex), :neutral), :soft) end diff --git a/JuliaLowering/test/runtests.jl b/JuliaLowering/test/runtests.jl index 5d89ce9616468..4124e2d2c1e68 100644 --- a/JuliaLowering/test/runtests.jl +++ b/JuliaLowering/test/runtests.jl @@ -176,12 +176,12 @@ end JuliaLowering.include_string(test_mod, """ module M - using JuliaLowering: JuliaLowering, @ast, @chk + using JuliaLowering: JuliaLowering, @ast, @chk, adopt_scope using JuliaSyntax # Introspection macro __MODULE__() - __context__.mod + __context__.scope_layer.mod end macro __FILE__() @@ -215,6 +215,14 @@ module M end) end + macro set_global_in_parent(ex) + e1 = adopt_scope(:(sym_introduced_from_M), __context__) + quote + \$e1 = \$ex + nothing + end + end + # # Recursive macro call # # TODO: Need branching! # macro recursive(N) @@ -251,6 +259,11 @@ begin end """) == 42 +JuliaLowering.include_string(test_mod, """ +M.@set_global_in_parent "bent hygiene!" +""") +@test test_mod.sym_introduced_from_M == "bent hygiene!" + JuliaLowering.include_string(test_mod, "M.@set_other_global global_in_test_mod 100") @test !isdefined(test_mod.M, :global_in_test_mod) @test test_mod.global_in_test_mod == 100 From 02fe245122884a9ac9be0c31c673eb389fdc02b3 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Wed, 22 May 2024 20:00:35 +1000 Subject: [PATCH 0751/1109] Reimplement quoted syntax interpolation Quoted syntax containing `$` interpolations can be expanded into a tree of nested expression generating code, but this generates a lot of code for nested expressions; effectively it's a "loop unrolling approach". Alternatively, we can just inert-quote the `$`-containing expression as a single expression in the source, separately strip out the interpolated values into their own expressions, and emit a call to a runtime function to do the interpolation. We take the second approach here; it results in much cleaner expanded expressions and keeps the original quoted syntax in a single data structure. --- JuliaLowering/src/eval.jl | 83 +++++++++++++++--- JuliaLowering/src/macro_expansion.jl | 122 +++++---------------------- JuliaLowering/src/syntax_graph.jl | 4 + JuliaLowering/test/demo.jl | 66 ++++++++++++++- JuliaLowering/test/runtests.jl | 42 +++++++++ 5 files changed, 204 insertions(+), 113 deletions(-) diff --git a/JuliaLowering/src/eval.jl b/JuliaLowering/src/eval.jl index 2552efd376a4a..105b1271f6271 100644 --- a/JuliaLowering/src/eval.jl +++ b/JuliaLowering/src/eval.jl @@ -214,28 +214,89 @@ end struct InterpolationContext{Graph} <: AbstractLoweringContext graph::Graph + values::Tuple + current_index::Ref{Int} end -function InterpolationContext() - graph = SyntaxGraph() - ensure_attributes!(graph, kind=Kind, syntax_flags=UInt16, source=SourceAttrType, - value=Any, name_val=String, scope_layer=LayerId) - InterpolationContext(freeze_attrs(graph)) +function _contains_active_interp(ex, depth) + k = kind(ex) + if k == K"$" && depth == 0 + return true + end + inner_depth = k == K"quote" ? depth + 1 : + k == K"$" ? depth - 1 : + depth + return any(_contains_active_interp(c, inner_depth) for c in children(ex)) end # Produce interpolated node for `$x` syntax -function interpolate_value(ctx, srcref, x) +function _interpolated_value(ctx, srcref, x) if x isa SyntaxTree - if x.graph === ctx.graph - x + x.graph === ctx.graph ? x : copy_ast(ctx, x) + else + makeleaf(ctx, srcref, K"Value", x) + end +end + +function _interpolate_ast(ctx::InterpolationContext, ex, depth) + if !_contains_active_interp(ex, depth) + return ex + end + + # We have an interpolation deeper in the tree somewhere - expand to an + # expression + inner_depth = kind(ex) == K"quote" ? depth + 1 : + kind(ex) == K"$" ? depth - 1 : + depth + expanded_children = SyntaxList(ctx) + for e in children(ex) + if kind(e) == K"$" && inner_depth == 0 + vals = ctx.values[ctx.current_index[]]::Tuple + ctx.current_index[] += 1 + for (i,v) in enumerate(vals) + srcref = numchildren(e) == 1 ? e[1] : e[i] + push!(expanded_children, _interpolated_value(ctx, srcref, v)) + end else - copy_ast(ctx, x) + push!(expanded_children, _interpolate_ast(ctx, e, inner_depth)) end - else - makeleaf(ctx, sourceref(srcref), K"Value", x) end + + makenode(ctx, ex, head(ex), expanded_children) end +function interpolate_ast(ex, values...) + if kind(ex) == K"$" + TODO(ex, "\$ in interpolate_ast") + end + # Construct graph for interpolation context. We inherit this from the macro + # context where possible by detecting it using __macro__ctx__. This feels + # hacky though. + # + # Perhaps we should use a ScopedValue for this instead or get it from + # __context__? Nothing feels great here. + graph = nothing + for vals in values + for v in vals + if v isa SyntaxTree && !isnothing(getattr(syntax_graph(v), :__macro_ctx__, nothing)) + graph = syntax_graph(v) + break + end + end + end + if isnothing(graph) + graph = SyntaxGraph() + ensure_attributes!(graph, kind=Kind, syntax_flags=UInt16, source=SourceAttrType, + value=Any, name_val=String, scope_layer=LayerId) + end + ctx = InterpolationContext(graph, values, Ref(1)) + # We must copy the AST into our context to use it as the source reference + # of generated expressions. + ex1 = copy_ast(ctx, ex) + _interpolate_ast(ctx, ex1, 0) +end + + # Produce node corresponding to `srcref` when there was an interpolation among # `children` function interpolate_node(ctx::InterpolationContext, srcref, children...) diff --git a/JuliaLowering/src/macro_expansion.jl b/JuliaLowering/src/macro_expansion.jl index 37ee43f3e8ce9..69ac55918e44a 100644 --- a/JuliaLowering/src/macro_expansion.jl +++ b/JuliaLowering/src/macro_expansion.jl @@ -24,7 +24,8 @@ end function MacroExpansionContext(ctx, mod::Module) graph = ensure_attributes(syntax_graph(ctx), var_id=VarId, - scope_layer=LayerId) + scope_layer=LayerId, + __macro_ctx__=Nothing) layers = Vector{ScopeLayer}() MacroExpansionContext(graph, Ref{VarId}(1), layers, new_scope_layer(layers, mod, false)) end @@ -36,108 +37,28 @@ function new_scope_layer(layers, mod::Module, is_macro_expansion) end #-------------------------------------------------- -function _contains_active_interp(ex, depth) - k = kind(ex) - if k == K"$" && depth == 0 - return true - end - inner_depth = k == K"quote" ? depth + 1 : - k == K"$" ? depth - 1 : - depth - return any(_contains_active_interp(c, inner_depth) for c in children(ex)) -end - -function expand_interpolation(ctx, interp_ctx_var, ex) - @ast ctx ex [K"call" - interpolate_value::K"Value" - interp_ctx_var - [K"inert" ex] - expand_forms_1(ctx, ex) - ] -end - -# TODO: Rewrite this recursive expansion to happen partially at -# runtime rather than entirely in lowering? That is, we'd expand to -# -# interpolate_expression(ex, val1, val2, ...) -# -# where `ex` is an inert version of the quoted block and `val1, val2, ...` are -# the expressions within `$` escaping. -# -# Advantages: -# * Much more compact lowered AST -# * Clearer lowered AST - `ex` only appears once, rather than many times -# * Smaller runtime API surface area -# -# Disadvantages: -# * Recursive traversal and processing of quote depth appears both here and in -# the runtime. But can unify the traversal code? -# -# Beware tricky expansion gotchas like getting the meaning of the following correct -# -# x = 42 -# macro m() -# simplequote -# println(x) -# quote -# $x, x -# end -# end -# end -# -function expand_quote_content(ctx, interp_ctx_var, ex, depth) - if !_contains_active_interp(ex, depth) - return @ast ctx ex [K"call" - copy_ast::K"Value" - interp_ctx_var - ex::K"Value" - ] - end - - # We have an interpolation deeper in the tree somewhere - expand to an - # expression - inner_depth = kind(ex) == K"quote" ? depth + 1 : - kind(ex) == K"$" ? depth - 1 : - depth - expanded_children = SyntaxList(ctx) - for e in children(ex) - if kind(e) == K"$" && inner_depth == 0 - for x in children(e) - push!(expanded_children, expand_interpolation(ctx, interp_ctx_var, x)) - end - else - push!(expanded_children, expand_quote_content(ctx, interp_ctx_var, e, inner_depth)) +# Expansion of quoted expressions +function collect_unquoted!(ctx, unquoted, ex, depth) + if kind(ex) == K"$" && depth == 0 + push!(unquoted, @ast ctx ex [K"tuple" children(ex)...]) + else + inner_depth = kind(ex) == K"quote" ? depth + 1 : + kind(ex) == K"$" ? depth - 1 : + depth + for e in children(ex) + collect_unquoted!(ctx, unquoted, e, inner_depth) end end - - return @ast ctx ex [K"call" - interpolate_node::K"Value" - interp_ctx_var - [K"inert" ex] - expanded_children... - ] + return unquoted end function expand_quote(ctx, ex) - interp_ctx_var = ssavar(ctx, ex) - expanded = if kind(ex) == K"$" - @chk numchildren(ex) == 1 - e1 = ex[1] - if kind(e1) == K"..." - throw(LoweringError(e1, "`...` expression outside of call")) - end - expand_interpolation(ctx, interp_ctx_var, e1) - else - expand_quote_content(ctx, interp_ctx_var, ex, 0) - end - @ast ctx ex [K"block" - [K"=" - interp_ctx_var - [K"call" - InterpolationContext::K"Value" - ] - ] - expanded + unquoted = SyntaxTree[] + collect_unquoted!(ctx, unquoted, ex, 0) + @ast ctx ex [K"call" + interpolate_ast::K"Value" + [K"inert" ex] + unquoted... ] end @@ -246,7 +167,8 @@ function expand_macro(ctx, ex) end new_layer = new_scope_layer(ctx.scope_layers, parentmodule(macfunc), true) ctx2 = MacroExpansionContext(ctx.graph, ctx.next_var_id, ctx.scope_layers, new_layer) - expand_forms_1(ctx2, expanded) + # Add wrapper block for macro expansion provenance tracking + @ast ctx ex [K"block" expand_forms_1(ctx2, expanded)] else @ast ctx ex expanded::K"Value" end @@ -281,7 +203,7 @@ function expand_forms_1(ctx::MacroExpansionContext, ex::SyntaxTree) @ast ctx ex ex=>K"Identifier" elseif k == K"quote" @chk numchildren(ex) == 1 - expand_quote(ctx, ex[1]) + expand_forms_1(ctx, expand_quote(ctx, ex[1])) elseif k == K"module" || k == K"toplevel" || k == K"inert" ex elseif k == K"macrocall" diff --git a/JuliaLowering/src/syntax_graph.jl b/JuliaLowering/src/syntax_graph.jl index c6731b0496b5c..a97ac015fa83d 100644 --- a/JuliaLowering/src/syntax_graph.jl +++ b/JuliaLowering/src/syntax_graph.jl @@ -424,6 +424,10 @@ function reparent(ctx, ex::SyntaxTree) SyntaxTree(graph, ex.id) end +function ensure_attributes(ex::SyntaxTree; kws...) + reparent(ensure_attributes(syntax_graph(ex); kws...), ex) +end + syntax_graph(ex::SyntaxTree) = ex.graph function JuliaSyntax.build_tree(::Type{SyntaxTree}, stream::JuliaSyntax.ParseStream; kws...) diff --git a/JuliaLowering/test/demo.jl b/JuliaLowering/test/demo.jl index 7d4ae1b17f33c..486e59cbc04f8 100644 --- a/JuliaLowering/test/demo.jl +++ b/JuliaLowering/test/demo.jl @@ -3,7 +3,10 @@ using JuliaSyntax using JuliaLowering -using JuliaLowering: SyntaxGraph, SyntaxTree, ensure_attributes!, newnode!, setchildren!, haschildren, children, child, setattr!, sourceref, makenode +using JuliaLowering: SyntaxGraph, SyntaxTree, ensure_attributes!, ensure_attributes, newnode!, setchildren!, haschildren, children, child, setattr!, sourceref, makenode, sourcetext + +using JuliaSyntaxFormatter +using JuliaSyntaxFormatter: FormatContext #------------------------------------------------------------------------------- # Demos of the prototype @@ -158,6 +161,16 @@ Base.eval(M, quote end end) +JuliaLowering.include_string(M, """ +xx = "xx in M" +macro test_inert_quote() + println(xx) + @inert quote + (\$xx, xx) + end +end +""") + function wrapscope(ex, scope_type) makenode(ex, ex, K"scope_block", ex; scope_type=scope_type) end @@ -192,13 +205,60 @@ end # end # """ +src = """ +begin + x = 10 + y = 20 + let x = y + x + z = "some string \$x \$y" + + function f(y) + a = M.@foo z + "\$z \$y \$a \$x" + end + print(x) + end + print(x) +end +""" + +src = """ +M.@set_global_in_parent "bent hygiene!" +""" + +# src = """ +# begin +# M.@__LINE__ +# end +# """ + +# src = """@foo z""" + # TODO: # "hygiene bending" / (being unhygenic, or bending hygiene to the context of a # macro argument on purpose) # * bend to macro name to get to parent layer? # * already needed in `#self#` argument +function printsrc(ex; color_by=nothing, kws...) + format_token_style = if !isnothing(color_by) + e->get(e, color_by, nothing) + else + e->nothing + end + print(JuliaSyntaxFormatter.format(ex; format_token_style, kws...)) +end + +function annotate_scopes(mod, ex) + ex = ensure_attributes(ex, var_id=Int) + ctx1, ex_macroexpand = JuliaLowering.expand_forms_1(mod, ex) + ctx2, ex_desugar = JuliaLowering.expand_forms_2(ctx1, ex_macroexpand) + ctx3, ex_scoped = JuliaLowering.resolve_scopes!(ctx2, ex_desugar) + ex +end + ex = parsestmt(SyntaxTree, src, filename="foo.jl") +ex = ensure_attributes(ex, var_id=Int) #ex = softscope_test(ex) @info "Input code" ex @@ -212,13 +272,15 @@ ctx2, ex_desugar = JuliaLowering.expand_forms_2(ctx1, ex_macroexpand) ctx3, ex_scoped = JuliaLowering.resolve_scopes!(ctx2, ex_desugar) @info "Resolved scopes" ex_scoped +#printsrc(ex, color_by=:var_id) +#printsrc(ex_macroexpand, color_by=:scope_layer) + ctx4, ex_compiled = JuliaLowering.linearize_ir(ctx3, ex_scoped) @info "Linear IR" ex_compiled ex_expr = JuliaLowering.to_lowered_expr(in_mod, ctx4.var_info, ex_compiled) @info "CodeInfo" ex_expr -x = 1 eval_result = Base.eval(in_mod, ex_expr) @info "Eval" eval_result diff --git a/JuliaLowering/test/runtests.jl b/JuliaLowering/test/runtests.jl index 4124e2d2c1e68..282c22ac1c8ff 100644 --- a/JuliaLowering/test/runtests.jl +++ b/JuliaLowering/test/runtests.jl @@ -171,6 +171,48 @@ let end """)) +# interpolations at multiple depths +ex = JuliaLowering.include_string(test_mod, """ +let + args = (:x,:y) + quote + x = 1 + y = 2 + quote + f(\$\$(args...)) + end + end +end +""") +@test ex ~ @ast_ [K"block" + [K"=" + "x"::K"Identifier" + 1::K"Integer" + ] + [K"=" + "y"::K"Identifier" + 2::K"Integer" + ] + [K"quote" + [K"block" + [K"call" + "f"::K"Identifier" + [K"$" + "x"::K"Identifier" + "y"::K"Identifier" + ] + ] + ] + ] +] +@test sourcetext(ex[3][1][1][2]) == "\$\$(args...)" +@test sourcetext(ex[3][1][1][2][1]) == "x" +@test sourcetext(ex[3][1][1][2][2]) == "y" + +ex2 = JuliaLowering.eval(test_mod, ex) +@test sourcetext(ex2[1][2]) == "x" +@test sourcetext(ex2[1][3]) == "y" + #------------------------------------------------------------------------------- # Macro expansion From 9fde9839cf41beafd86183076be247238d0bae2f Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Thu, 23 May 2024 16:52:23 +1000 Subject: [PATCH 0752/1109] Some cleanup to demo examples --- JuliaLowering/test/demo.jl | 153 +++++++++++------------------ JuliaLowering/test/demo_include.jl | 59 +++++++++++ 2 files changed, 118 insertions(+), 94 deletions(-) create mode 100644 JuliaLowering/test/demo_include.jl diff --git a/JuliaLowering/test/demo.jl b/JuliaLowering/test/demo.jl index 486e59cbc04f8..db320de77ea14 100644 --- a/JuliaLowering/test/demo.jl +++ b/JuliaLowering/test/demo.jl @@ -8,6 +8,23 @@ using JuliaLowering: SyntaxGraph, SyntaxTree, ensure_attributes!, ensure_attribu using JuliaSyntaxFormatter using JuliaSyntaxFormatter: FormatContext +function formatsrc(ex; color_by=nothing, kws...) + format_token_style = if !isnothing(color_by) + e->get(e, color_by, nothing) + else + e->nothing + end + Text(JuliaSyntaxFormatter.format(ex; format_token_style, kws...)) +end + +function annotate_scopes(mod, ex) + ex = ensure_attributes(ex, var_id=Int) + ctx1, ex_macroexpand = JuliaLowering.expand_forms_1(mod, ex) + ctx2, ex_desugar = JuliaLowering.expand_forms_2(ctx1, ex_macroexpand) + ctx3, ex_scoped = JuliaLowering.resolve_scopes!(ctx2, ex_desugar) + ex +end + #------------------------------------------------------------------------------- # Demos of the prototype @@ -85,48 +102,7 @@ end # end # """ -JuliaLowering.include_string(Main, """ -module M - using JuliaLowering: JuliaLowering, @ast, @chk, adopt_scope - using JuliaSyntax - - # Introspection - macro __MODULE__() - __context__.scope_layer.mod - end - - macro __FILE__() - JuliaLowering.filename(__context__.macroname) - end - - macro __LINE__() - JuliaLowering.source_location(__context__.macroname)[1] - end - - someglobal = "global in module M" - - # Macro with local variables - macro foo(ex) - :(begin - x = "`x` from @foo" - (x, someglobal, \$ex) - end) - end - - macro set_a_global(val) - :(begin - global a_global = \$val - end) - end - - macro set_global_in_parent(ex) - e1 = adopt_scope(:(sym_introduced_from_M), __context__) - quote - \$e1 = \$ex - end - end -end -""") +JuliaLowering.include(Main, "demo_include.jl") Base.eval(M, quote function var"@inert"(__context__::JuliaLowering.MacroContext, ex) @@ -180,17 +156,33 @@ function softscope_test(ex) wrapscope(wrapscope(JuliaLowering.reparent(g, ex), :neutral), :soft) end +# src = """ +# M.@test_inert_quote() +# """ + +# src = """ +# macro mmm(a; b=2) +# end +# macro A.b(ex) +# end +# """ + src = """ -begin - x = 42 - M.@foo x -end +M.@set_global_in_parent "bent hygiene!" """ +# src = """ +# begin +# M.@__LINE__ +# end +# """ + +# src = """@foo z""" + src = """ begin - M.@set_a_global 42 - M.a_global + x = 42 + M.@foo x end """ @@ -199,12 +191,16 @@ end # """ # src = """ -# macro mmm(a; b=2) -# end -# macro A.b(ex) +# begin +# M.@set_a_global 1000 +# M.a_global # end # """ +# src = """ +# M.@set_global_in_parent "bent hygiene!" +# """ + src = """ begin x = 10 @@ -222,41 +218,13 @@ begin end """ -src = """ -M.@set_global_in_parent "bent hygiene!" -""" - # src = """ # begin -# M.@__LINE__ +# x = -1 +# M.@baz x # end # """ -# src = """@foo z""" - -# TODO: -# "hygiene bending" / (being unhygenic, or bending hygiene to the context of a -# macro argument on purpose) -# * bend to macro name to get to parent layer? -# * already needed in `#self#` argument - -function printsrc(ex; color_by=nothing, kws...) - format_token_style = if !isnothing(color_by) - e->get(e, color_by, nothing) - else - e->nothing - end - print(JuliaSyntaxFormatter.format(ex; format_token_style, kws...)) -end - -function annotate_scopes(mod, ex) - ex = ensure_attributes(ex, var_id=Int) - ctx1, ex_macroexpand = JuliaLowering.expand_forms_1(mod, ex) - ctx2, ex_desugar = JuliaLowering.expand_forms_2(ctx1, ex_macroexpand) - ctx3, ex_scoped = JuliaLowering.resolve_scopes!(ctx2, ex_desugar) - ex -end - ex = parsestmt(SyntaxTree, src, filename="foo.jl") ex = ensure_attributes(ex, var_id=Int) #ex = softscope_test(ex) @@ -264,23 +232,20 @@ ex = ensure_attributes(ex, var_id=Int) in_mod = Main ctx1, ex_macroexpand = JuliaLowering.expand_forms_1(in_mod, ex) -@info "Macro expanded" ex_macroexpand +@info "Macro expanded" formatsrc(ex_macroexpand, color_by=:scope_layer) ctx2, ex_desugar = JuliaLowering.expand_forms_2(ctx1, ex_macroexpand) -@info "Desugared" ex_desugar +@info "Desugared" formatsrc(ex_desugar, color_by=:scope_layer) ctx3, ex_scoped = JuliaLowering.resolve_scopes!(ctx2, ex_desugar) -@info "Resolved scopes" ex_scoped - -#printsrc(ex, color_by=:var_id) -#printsrc(ex_macroexpand, color_by=:scope_layer) +@info "Resolved scopes" formatsrc(ex_scoped, color_by=:var_id) ctx4, ex_compiled = JuliaLowering.linearize_ir(ctx3, ex_scoped) -@info "Linear IR" ex_compiled - -ex_expr = JuliaLowering.to_lowered_expr(in_mod, ctx4.var_info, ex_compiled) -@info "CodeInfo" ex_expr - -eval_result = Base.eval(in_mod, ex_expr) -@info "Eval" eval_result +@info "Linear IR" formatsrc(ex_compiled, color_by=:var_id) +# ex_expr = JuliaLowering.to_lowered_expr(in_mod, ctx4.var_info, ex_compiled) +# @info "CodeInfo" ex_expr +# +# eval_result = Base.eval(in_mod, ex_expr) +# @info "Eval" eval_result +# diff --git a/JuliaLowering/test/demo_include.jl b/JuliaLowering/test/demo_include.jl new file mode 100644 index 0000000000000..a1a3a0aadbba6 --- /dev/null +++ b/JuliaLowering/test/demo_include.jl @@ -0,0 +1,59 @@ +module M + using JuliaLowering: JuliaLowering, @ast, @chk, adopt_scope + using JuliaSyntax + + # Introspection + macro __MODULE__() + __context__.scope_layer.mod + end + + macro __FILE__() + JuliaLowering.filename(__context__.macroname) + end + + macro __LINE__() + JuliaLowering.source_location(__context__.macroname)[1] + end + + module A + another_global = "global in A" + + macro bar(ex) + quote + x = "`x` in @bar" + (x, another_global, $ex) + end + end + end + + someglobal = "global in module M" + + # Macro with local variables + macro foo(ex) + quote + x = "`x` from @foo" + (x, someglobal, A.@bar $ex) + end + end + + macro set_a_global(val) + quote + global a_global = $val + end + end + + macro set_global_in_parent(ex) + e1 = adopt_scope(:(sym_introduced_from_M), __context__) + quote + $e1 = $ex + end + end + + macro baz(ex) + quote + let $ex = 10 + $ex + end + end + end +end From 49395b10b5225b47d93293162e601d9ddb018cef Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sat, 25 May 2024 07:38:30 +1000 Subject: [PATCH 0753/1109] Tool to highlight variable kind --- JuliaLowering/test/demo.jl | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/JuliaLowering/test/demo.jl b/JuliaLowering/test/demo.jl index db320de77ea14..2e5c4bcbd9070 100644 --- a/JuliaLowering/test/demo.jl +++ b/JuliaLowering/test/demo.jl @@ -8,11 +8,26 @@ using JuliaLowering: SyntaxGraph, SyntaxTree, ensure_attributes!, ensure_attribu using JuliaSyntaxFormatter using JuliaSyntaxFormatter: FormatContext +# Extract variable kind for highlighting purposes +function var_kind(e) + id = get(e, :var_id, nothing) + if isnothing(id) + return nothing + end + info = get(ctx3.var_info, id, nothing) + if isnothing(info) + return nothing + end + return info.kind +end + function formatsrc(ex; color_by=nothing, kws...) - format_token_style = if !isnothing(color_by) + format_token_style = if isnothing(color_by) + e->nothing + elseif color_by isa Symbol e->get(e, color_by, nothing) else - e->nothing + color_by end Text(JuliaSyntaxFormatter.format(ex; format_token_style, kws...)) end From 243e869f3e86bbdaba29437e9830a191829e64fe Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sun, 26 May 2024 06:44:20 +1000 Subject: [PATCH 0754/1109] Clean up makenode/makeleaf a bit --- JuliaLowering/src/ast.jl | 57 +++++++++++++++-------------- JuliaLowering/src/desugaring.jl | 3 +- JuliaLowering/src/linear_ir.jl | 2 +- JuliaLowering/src/scope_analysis.jl | 4 +- JuliaLowering/src/syntax_graph.jl | 13 +++++-- 5 files changed, 44 insertions(+), 35 deletions(-) diff --git a/JuliaLowering/src/ast.jl b/JuliaLowering/src/ast.jl index 8ec27ad952f04..ed90a0e4b8756 100644 --- a/JuliaLowering/src/ast.jl +++ b/JuliaLowering/src/ast.jl @@ -23,41 +23,48 @@ end _node_id(ex::NodeId) = ex _node_id(ex::SyntaxTree) = ex.id -_node_ids() = () -_node_ids(::Nothing, cs...) = _node_ids(cs...) -_node_ids(c, cs...) = (_node_id(c), _node_ids(cs...)...) +_node_id(graph::SyntaxGraph, ex::SyntaxTree) = (check_same_graph(graph, ex); ex.id) + +_node_ids(graph::SyntaxGraph) = () +_node_ids(graph::SyntaxGraph, ::Nothing, cs...) = _node_ids(graph, cs...) +_node_ids(graph::SyntaxGraph, c, cs...) = (_node_id(graph, c), _node_ids(graph, cs...)...) +_node_ids(graph::SyntaxGraph, cs::SyntaxList, cs1...) = (_node_ids(graph, cs...)..., _node_ids(graph, cs1...)...) +function _node_ids(graph::SyntaxGraph, cs::SyntaxList) + check_same_graph(graph, cs) + cs.ids +end -function _makenode(graph::SyntaxGraph, srcref, head, children; attrs...) +function makeleaf(graph::SyntaxGraph, srcref, head; attrs...) id = newnode!(graph) - # TODO: Having this list seeems hacky? Use makeleaf everywhere instead. - if isnothing(children) || kind(head) in (K"Identifier", K"core", K"top", K"SSAValue", K"Value", K"slot") || is_literal(head) - @assert isnothing(children) || length(children) == 0 - else - setchildren!(graph, id, children) - end - srcref_attr = srcref isa SyntaxTree ? srcref.id : srcref - setattr!(graph, id; source=srcref_attr, attrs...) + source = srcref isa SyntaxTree ? _node_id(graph, srcref) : srcref + setattr!(graph, id; source=source, attrs...) sethead!(graph, id, head) return SyntaxTree(graph, id) end -function makenode(ctx, srcref, head, children::Union{Nothing,SyntaxTree}...; attrs...) - _makenode(syntax_graph(ctx), srcref, head, _node_ids(children...); attrs...) +function makenode(graph::SyntaxGraph, srcref, head, children...; attrs...) + id = newnode!(graph) + setchildren!(graph, id, _node_ids(graph, children...)) + source = srcref isa SyntaxTree ? _node_id(graph, srcref) : srcref + setattr!(graph, id; source=source, attrs...) + sethead!(graph, id, head) + return SyntaxTree(graph, id) end -function makenode(ctx::Union{AbstractLoweringContext,SyntaxTree}, - srcref, head, children::SyntaxList; attrs...) - graph = syntax_graph(ctx) - syntax_graph(ctx) === syntax_graph(children) || error("Mismatching graphs") - _makenode(graph, srcref, head, children.ids; attrs...) +function makenode(ctx, srcref, head, children...; attrs...) + makenode(syntax_graph(ctx), srcref, head, children...; attrs...) +end + +function makeleaf(ctx, srcref, kind; kws...) + makeleaf(syntax_graph(ctx), srcref, kind; kws...) end function makeleaf(ctx, srcref, kind, value; kws...) graph = syntax_graph(ctx) if kind == K"Identifier" || kind == K"core" || kind == K"top" || kind == K"Symbol" || kind == K"globalref" - _makenode(graph, srcref, kind, nothing; name_val=value, kws...) + makeleaf(graph, srcref, kind; name_val=value, kws...) elseif kind == K"SSAValue" - _makenode(graph, srcref, kind, nothing; var_id=value, kws...) + makeleaf(graph, srcref, kind; var_id=value, kws...) else val = kind == K"Integer" ? convert(Int, value) : kind == K"Float" ? convert(Float64, value) : @@ -65,14 +72,10 @@ function makeleaf(ctx, srcref, kind, value; kws...) kind == K"Char" ? convert(Char, value) : kind == K"Value" ? value : error("Unexpected leaf kind `$kind`") - _makenode(graph, srcref, kind, nothing; value=val, kws...) + makeleaf(graph, srcref, kind; value=val, kws...) end end -function makeleaf(ctx, srcref, kind; kws...) - _makenode(syntax_graph(ctx), srcref, kind, nothing; kws...) -end - # Convenience functions to create leaf nodes referring to identifiers within # the Core and Top modules. core_ref(ctx, ex, name) = makeleaf(ctx, ex, K"core", name) @@ -245,7 +248,7 @@ function copy_attrs!(dest, src) end function mapleaf(ctx, src, kind) - ex = _makenode(syntax_graph(ctx), src, kind, nothing) + ex = makeleaf(syntax_graph(ctx), src, kind) # TODO: Value coersion might be broken here due to use of `name_val` vs # `value` vs ... ? copy_attrs!(ex, src) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 0e2ae44ecac5b..386044c802399 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -357,12 +357,11 @@ function expand_macro_def(ctx, ex) name = sig[1] args = remove_empty_parameters(children(sig)) @chk kind(args[end]) != K"parameters" (args[end], "macros cannot accept keyword arguments") - context_arg = adopt_scope(@ast(ctx, sig, "__context__"::K"Identifier"), name) ret = @ast ctx ex [K"function" [K"call"(sig) _make_macro_name(ctx, name) [K"::" - context_arg + adopt_scope(@ast(ctx, sig, "__context__"::K"Identifier"), name) MacroContext::K"Value" ] # flisp: We don't mark these @nospecialize because all arguments to diff --git a/JuliaLowering/src/linear_ir.jl b/JuliaLowering/src/linear_ir.jl index 9188a28ab50a0..ff1b522d9d559 100644 --- a/JuliaLowering/src/linear_ir.jl +++ b/JuliaLowering/src/linear_ir.jl @@ -150,7 +150,7 @@ end # needs to be done. In value position, it returns an expression computing # the needed value. # -# TODO: is it ok to return `nothing` if we have no value in some sense +# TODO: Is it ok to return `nothing` if we have no value in some sense? function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) k = kind(ex) if k == K"Identifier" || is_literal(k) || k == K"SSAValue" || k == K"quote" || k == K"inert" || diff --git a/JuliaLowering/src/scope_analysis.jl b/JuliaLowering/src/scope_analysis.jl index 9f43975cdadeb..bceb91dbd4423 100644 --- a/JuliaLowering/src/scope_analysis.jl +++ b/JuliaLowering/src/scope_analysis.jl @@ -359,10 +359,10 @@ function _resolve_scopes!(ctx, ex) # Resolve args and static parameters so that variable IDs get pushed # back into the original tree (not required for downstream processing) for a in lambda_info.args - resolve_scopes!(ctx, a) + _resolve_scopes!(ctx, a) end for a in lambda_info.static_parameters - resolve_scopes!(ctx, a) + _resolve_scopes!(ctx, a) end for e in children(ex) _resolve_scopes!(ctx, e) diff --git a/JuliaLowering/src/syntax_graph.jl b/JuliaLowering/src/syntax_graph.jl index a97ac015fa83d..5dcb0b41187e5 100644 --- a/JuliaLowering/src/syntax_graph.jl +++ b/JuliaLowering/src/syntax_graph.jl @@ -157,6 +157,13 @@ Return `SyntaxGraph` associated with `ctx` """ syntax_graph(graph::SyntaxGraph) = graph +function check_same_graph(x, y) + if syntax_graph(x) !== syntax_graph(y) + @info "" syntax_graph(x) syntax_graph(y) x y + error("Mismatching syntax graphs") + end +end + #------------------------------------------------------------------------------- struct SyntaxTree{GraphType} graph::GraphType @@ -462,7 +469,7 @@ function Base.getindex(v::SyntaxList, r::UnitRange) end function Base.setindex!(v::SyntaxList, tree::SyntaxTree, i::Int) - v.graph === tree.graph || error("Mismatching syntax graphs") + check_same_graph(v, tree) v.ids[i] = tree.id end @@ -471,7 +478,7 @@ function Base.setindex!(v::SyntaxList, id::NodeId, i::Int) end function Base.push!(v::SyntaxList, tree::SyntaxTree) - v.graph === tree.graph || error("Mismatching syntax graphs") + check_same_graph(v, tree) push!(v.ids, tree.id) end @@ -483,7 +490,7 @@ function Base.append!(v::SyntaxList, exs) end function Base.append!(v::SyntaxList, exs::SyntaxList) - v.graph === exs.graph || error("Mismatching syntax graphs") + check_same_graph(v, exs) append!(v.ids, exs.ids) v end From 9a25f223c32eda397e42528ac045101cd62f1de2 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Tue, 28 May 2024 14:12:44 +1000 Subject: [PATCH 0755/1109] Clean up makenode/makeleaf to allow a prototype expression to copy attributes from --- JuliaLowering/src/ast.jl | 63 +++++++++++++++++++--------------- JuliaLowering/src/linear_ir.jl | 4 +-- JuliaLowering/test/demo.jl | 2 +- 3 files changed, 38 insertions(+), 31 deletions(-) diff --git a/JuliaLowering/src/ast.jl b/JuliaLowering/src/ast.jl index ed90a0e4b8756..8bc3658994aa4 100644 --- a/JuliaLowering/src/ast.jl +++ b/JuliaLowering/src/ast.jl @@ -34,48 +34,59 @@ function _node_ids(graph::SyntaxGraph, cs::SyntaxList) cs.ids end -function makeleaf(graph::SyntaxGraph, srcref, head; attrs...) +function makeleaf(graph::SyntaxGraph, srcref, proto; attrs...) id = newnode!(graph) source = srcref isa SyntaxTree ? _node_id(graph, srcref) : srcref + ex = SyntaxTree(graph, id) + copy_attrs!(ex, proto) setattr!(graph, id; source=source, attrs...) - sethead!(graph, id, head) - return SyntaxTree(graph, id) + return ex end -function makenode(graph::SyntaxGraph, srcref, head, children...; attrs...) +function makenode(graph::SyntaxGraph, srcref, proto, children...; attrs...) id = newnode!(graph) setchildren!(graph, id, _node_ids(graph, children...)) source = srcref isa SyntaxTree ? _node_id(graph, srcref) : srcref + ex = SyntaxTree(graph, id) + copy_attrs!(ex, proto) setattr!(graph, id; source=source, attrs...) - sethead!(graph, id, head) return SyntaxTree(graph, id) end -function makenode(ctx, srcref, head, children...; attrs...) - makenode(syntax_graph(ctx), srcref, head, children...; attrs...) +function makenode(ctx, srcref, proto, children...; attrs...) + makenode(syntax_graph(ctx), srcref, proto, children...; attrs...) end -function makeleaf(ctx, srcref, kind; kws...) - makeleaf(syntax_graph(ctx), srcref, kind; kws...) +function makeleaf(ctx, srcref, proto; kws...) + makeleaf(syntax_graph(ctx), srcref, proto; kws...) end -function makeleaf(ctx, srcref, kind, value; kws...) +function makeleaf(ctx, srcref, k::Kind, value; kws...) graph = syntax_graph(ctx) - if kind == K"Identifier" || kind == K"core" || kind == K"top" || kind == K"Symbol" || kind == K"globalref" - makeleaf(graph, srcref, kind; name_val=value, kws...) - elseif kind == K"SSAValue" - makeleaf(graph, srcref, kind; var_id=value, kws...) + if k == K"Identifier" || k == K"core" || k == K"top" || k == K"Symbol" || k == K"globalref" + makeleaf(graph, srcref, k; name_val=value, kws...) + elseif k == K"SSAValue" + makeleaf(graph, srcref, k; var_id=value, kws...) else - val = kind == K"Integer" ? convert(Int, value) : - kind == K"Float" ? convert(Float64, value) : - kind == K"String" ? convert(String, value) : - kind == K"Char" ? convert(Char, value) : - kind == K"Value" ? value : - error("Unexpected leaf kind `$kind`") - makeleaf(graph, srcref, kind; value=val, kws...) + val = k == K"Integer" ? convert(Int, value) : + k == K"Float" ? convert(Float64, value) : + k == K"String" ? convert(String, value) : + k == K"Char" ? convert(Char, value) : + k == K"Value" ? value : + error("Unexpected leaf kind `$k`") + makeleaf(graph, srcref, k; value=val, kws...) end end +# TODO: Replace this with makeleaf variant? +function mapleaf(ctx, src, kind) + ex = makeleaf(syntax_graph(ctx), src, kind) + # TODO: Value coersion might be broken here due to use of `name_val` vs + # `value` vs ... ? + copy_attrs!(ex, src) + ex +end + # Convenience functions to create leaf nodes referring to identifiers within # the Core and Top modules. core_ref(ctx, ex, name) = makeleaf(ctx, ex, K"core", name) @@ -88,7 +99,7 @@ top_ref(ctx, ex, name) = makeleaf(ctx, ex, K"top", name) # Create a new SSA variable function ssavar(ctx::AbstractLoweringContext, srcref) - makenode(ctx, srcref, K"SSAValue", var_id=new_var_id(ctx)) + makeleaf(ctx, srcref, K"SSAValue", var_id=new_var_id(ctx)) end # Assign `ex` to an SSA variable. @@ -247,12 +258,8 @@ function copy_attrs!(dest, src) end end -function mapleaf(ctx, src, kind) - ex = makeleaf(syntax_graph(ctx), src, kind) - # TODO: Value coersion might be broken here due to use of `name_val` vs - # `value` vs ... ? - copy_attrs!(ex, src) - ex +function copy_attrs!(dest, head::Union{Kind,JuliaSyntax.SyntaxHead}) + sethead!(dest.graph, dest.id, head) end function mapchildren(f, ctx, ex) diff --git a/JuliaLowering/src/linear_ir.jl b/JuliaLowering/src/linear_ir.jl index ff1b522d9d559..801b75ca0adda 100644 --- a/JuliaLowering/src/linear_ir.jl +++ b/JuliaLowering/src/linear_ir.jl @@ -280,7 +280,7 @@ function _renumber(ctx, ssa_rewrites, slot_rewrites, label_table, ex) id = ex.var_id slot_id = get(slot_rewrites, id, nothing) if !isnothing(slot_id) - makenode(ctx, ex, K"slot"; var_id=slot_id) + makeleaf(ctx, ex, K"slot"; var_id=slot_id) else # TODO: look up any static parameters info = ctx.var_info[id] @@ -295,7 +295,7 @@ function _renumber(ctx, ssa_rewrites, slot_rewrites, label_table, ex) elseif is_literal(k) || is_quoted(k) || k == K"global" ex elseif k == K"SSAValue" - makenode(ctx, ex, K"SSAValue"; var_id=ssa_rewrites[ex.var_id]) + makeleaf(ctx, ex, K"SSAValue"; var_id=ssa_rewrites[ex.var_id]) elseif k == K"goto" || k == K"enter" || k == K"gotoifnot" TODO(ex, "_renumber $k") elseif k == K"lambda" diff --git a/JuliaLowering/test/demo.jl b/JuliaLowering/test/demo.jl index 2e5c4bcbd9070..6a455008094f9 100644 --- a/JuliaLowering/test/demo.jl +++ b/JuliaLowering/test/demo.jl @@ -243,7 +243,7 @@ end ex = parsestmt(SyntaxTree, src, filename="foo.jl") ex = ensure_attributes(ex, var_id=Int) #ex = softscope_test(ex) -@info "Input code" ex +@info "Input code" formatsrc(ex) in_mod = Main ctx1, ex_macroexpand = JuliaLowering.expand_forms_1(in_mod, ex) From c5fbd6f6574ddf356255e1bd33b3e53381c5fc41 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Tue, 28 May 2024 18:24:53 +1000 Subject: [PATCH 0756/1109] Initial lowering of underscore placeholders to K"Placeholder" --- JuliaLowering/src/ast.jl | 5 ---- JuliaLowering/src/desugaring.jl | 6 ++--- JuliaLowering/src/kinds.jl | 2 ++ JuliaLowering/src/linear_ir.jl | 35 ++++++++++++++------------- JuliaLowering/src/macro_expansion.jl | 7 ++++-- JuliaLowering/src/scope_analysis.jl | 5 +--- JuliaLowering/src/syntax_graph.jl | 36 +++++++++++++++++++++++----- JuliaLowering/test/demo.jl | 4 ++++ JuliaLowering/test/runtests.jl | 13 ++++++++++ 9 files changed, 77 insertions(+), 36 deletions(-) diff --git a/JuliaLowering/src/ast.jl b/JuliaLowering/src/ast.jl index 8bc3658994aa4..60cfc10714971 100644 --- a/JuliaLowering/src/ast.jl +++ b/JuliaLowering/src/ast.jl @@ -344,11 +344,6 @@ function is_sym_decl(x) k == K"Identifier" || k == K"::" end -# Identifier made of underscores -function is_placeholder(ex) - kind(ex) == K"Identifier" && all(==('_'), ex.name_val) -end - function is_identifier(x) k = kind(x) k == K"Identifier" || k == K"var" || is_operator(k) || is_macro_name(k) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 386044c802399..c5acab992681c 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -179,7 +179,7 @@ function analyze_function_arg(full_ex) ex = full_ex while true k = kind(ex) - if k == K"Identifier" || k == K"tuple" + if k == K"Identifier" || k == K"Placeholder" || k == K"tuple" name = ex break elseif k == K"::" @@ -279,7 +279,7 @@ function expand_function_def(ctx, ex) arg_types = SyntaxList(ctx) for (i,arg) in enumerate(args) info = analyze_function_arg(arg) - aname = (isnothing(info.name) || is_placeholder(info.name)) ? + aname = (isnothing(info.name) || kind(info.name) == K"Placeholder") ? unused(ctx, arg) : info.name push!(arg_names, aname) atype = !isnothing(info.type) ? info.type : Any_type(ctx, arg) @@ -606,7 +606,7 @@ function expand_forms_2(ctx::DesugaringContext, ex::SyntaxTree) else if k == K"=" @chk numchildren(ex) == 2 - if kind(ex[1]) ∉ (K"Identifier", K"SSAValue") + if kind(ex[1]) ∉ KSet"Identifier Placeholder SSAValue" TODO(ex, "destructuring assignment") end end diff --git a/JuliaLowering/src/kinds.jl b/JuliaLowering/src/kinds.jl index 3df5aff471ebe..c68348f51d835 100644 --- a/JuliaLowering/src/kinds.jl +++ b/JuliaLowering/src/kinds.jl @@ -10,6 +10,8 @@ function _insert_kinds() # A literal Julia value of any kind, as might be inserted by the AST # during macro expansion "Value" + # An identifier composed entirely of underscores + "Placeholder" # A (quoted) `Symbol` "Symbol" # TODO: Use `meta` for inbounds and loopinfo etc? diff --git a/JuliaLowering/src/linear_ir.jl b/JuliaLowering/src/linear_ir.jl index 801b75ca0adda..39d2c85de86de 100644 --- a/JuliaLowering/src/linear_ir.jl +++ b/JuliaLowering/src/linear_ir.jl @@ -154,15 +154,15 @@ end function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) k = kind(ex) if k == K"Identifier" || is_literal(k) || k == K"SSAValue" || k == K"quote" || k == K"inert" || - k == K"top" || k == K"core" || k == K"Value" || k == K"Symbol" + k == K"top" || k == K"core" || k == K"Value" || k == K"Symbol" || k == K"Placeholder" # TODO: other kinds: copyast the_exception $ globalref outerref thismodule cdecl stdcall fastcall thiscall llvmcall + if needs_value && k == K"Placeholder" + # TODO: ensure outterref, globalref work here + throw(LoweringError(ex, "all-underscore identifiers are write-only and their values cannot be used in expressions")) + end if in_tail_pos emit_return(ctx, ex, ex) elseif needs_value - if is_placeholder(ex) - # TODO: ensure outterref, globalref work here - throw(LoweringError(ex, "all-underscore identifiers are write-only and their values cannot be used in expressions")) - end ex else if k == K"Identifier" @@ -184,19 +184,22 @@ function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) end elseif k == K"=" lhs = ex[1] - # TODO: Handle underscore - rhs = compile(ctx, ex[2], true, false) - # TODO look up arg-map for renaming if lhs was reassigned - if needs_value && !isnothing(rhs) - r = emit_assign_tmp(ctx, rhs) - emit(ctx, ex, K"=", lhs, r) - if in_tail_pos - emit_return(ctx, ex, r) + if kind(lhs) == K"Placeholder" + compile(ctx, ex[2], needs_value, in_tail_pos) + else + rhs = compile(ctx, ex[2], true, false) + # TODO look up arg-map for renaming if lhs was reassigned + if needs_value && !isnothing(rhs) + r = emit_assign_tmp(ctx, rhs) + emit(ctx, ex, K"=", lhs, r) + if in_tail_pos + emit_return(ctx, ex, r) + else + r + end else - r + emit_assignment(ctx, ex, lhs, rhs) end - else - emit_assignment(ctx, ex, lhs, rhs) end elseif k == K"block" || k == K"scope_block" nc = numchildren(ex) diff --git a/JuliaLowering/src/macro_expansion.jl b/JuliaLowering/src/macro_expansion.jl index 69ac55918e44a..03264abde5f0b 100644 --- a/JuliaLowering/src/macro_expansion.jl +++ b/JuliaLowering/src/macro_expansion.jl @@ -191,8 +191,11 @@ function expand_forms_1(ctx::MacroExpansionContext, ex::SyntaxTree) set_scope_layer!(ex, ctx.current_layer.id, false) k = kind(ex) if k == K"Identifier" - # TODO: Insert is_placeholder() transformation here. - ex + if all(==('_'), ex.name_val) + @ast ctx ex ex=>K"Placeholder" + else + ex + end elseif k == K"var" || k == K"char" || k == K"parens" # Strip "container" nodes @chk numchildren(ex) == 1 diff --git a/JuliaLowering/src/scope_analysis.jl b/JuliaLowering/src/scope_analysis.jl index bceb91dbd4423..4b9b58608eede 100644 --- a/JuliaLowering/src/scope_analysis.jl +++ b/JuliaLowering/src/scope_analysis.jl @@ -63,7 +63,7 @@ function _find_scope_vars!(assignments, locals, globals, used_names, ex) # elseif k == K"method" TODO static parameters elseif k == K"=" v = decl_var(ex[1]) - if !(kind(v) in KSet"SSAValue globalref outerref" || is_placeholder(v)) + if !(kind(v) in KSet"SSAValue globalref outerref Placeholder") get!(assignments, VarKey(v), v) end _find_scope_vars!(assignments, locals, globals, used_names, ex[2]) @@ -339,9 +339,6 @@ end function _resolve_scopes!(ctx, ex) k = kind(ex) if k == K"Identifier" - if is_placeholder(ex) - return # FIXME - make these K"placeholder"? - end id = lookup_var(ctx, VarKey(ex)) setattr!(ctx.graph, ex.id, var_id=id) elseif !haschildren(ex) || is_quoted(ex) || k == K"toplevel" diff --git a/JuliaLowering/src/syntax_graph.jl b/JuliaLowering/src/syntax_graph.jl index 5dcb0b41187e5..78c4329a79acd 100644 --- a/JuliaLowering/src/syntax_graph.jl +++ b/JuliaLowering/src/syntax_graph.jl @@ -164,6 +164,10 @@ function check_same_graph(x, y) end end +function similar_graph(x, y) + syntax_graph(x).edges === syntax_graph(y).edges +end + #------------------------------------------------------------------------------- struct SyntaxTree{GraphType} graph::GraphType @@ -299,6 +303,25 @@ function sourceref(tree::SyntaxTree) end end +function is_ancestor(ex, ancestor) + if !similar_graph(ex, ancestor) + return false + end + sources = ex.graph.source + id::NodeId = ex.id + while true + s = get(sources, id, nothing) + if s isa NodeId + id = s + if id == ancestor.id + return true + end + else + return false + end + end +end + JuliaSyntax.filename(tree::SyntaxTree) = filename(sourceref(tree)) JuliaSyntax.source_location(::Type{LineNumberNode}, tree::SyntaxTree) = source_location(LineNumberNode, sourceref(tree)) JuliaSyntax.source_location(tree::SyntaxTree) = source_location(sourceref(tree)) @@ -325,12 +348,13 @@ attrsummary(name, value::Number) = "$name=$value" function _value_string(ex) k = kind(ex) str = k == K"Identifier" || k == K"MacroName" || is_operator(k) ? ex.name_val : - k == K"SSAValue" ? "ssa" : - k == K"core" ? "core.$(ex.name_val)" : - k == K"top" ? "top.$(ex.name_val)" : - k == K"Symbol" ? ":$(ex.name_val)" : - k == K"globalref" ? "$(ex.mod).$(ex.name_val)" : - k == K"slot" ? "slot" : + k == K"Placeholder" ? ex.name_val : + k == K"SSAValue" ? "ssa" : + k == K"core" ? "core.$(ex.name_val)" : + k == K"top" ? "top.$(ex.name_val)" : + k == K"Symbol" ? ":$(ex.name_val)" : + k == K"globalref" ? "$(ex.mod).$(ex.name_val)" : + k == K"slot" ? "slot" : repr(get(ex, :value, nothing)) id = get(ex, :var_id, nothing) if !isnothing(id) diff --git a/JuliaLowering/test/demo.jl b/JuliaLowering/test/demo.jl index 6a455008094f9..9fef0c0f4494d 100644 --- a/JuliaLowering/test/demo.jl +++ b/JuliaLowering/test/demo.jl @@ -240,6 +240,10 @@ end # end # """ +src = """ + _ = -1 +""" + ex = parsestmt(SyntaxTree, src, filename="foo.jl") ex = ensure_attributes(ex, var_id=Int) #ex = softscope_test(ex) diff --git a/JuliaLowering/test/runtests.jl b/JuliaLowering/test/runtests.jl index 282c22ac1c8ff..ab18ecc91aeae 100644 --- a/JuliaLowering/test/runtests.jl +++ b/JuliaLowering/test/runtests.jl @@ -54,6 +54,19 @@ JuliaLowering.eval(test_mod, wrapscope(assign_z_2, :hard)) JuliaLowering.eval(test_mod, wrapscope(wrapscope(assign_z_2, :neutral), :soft)) @test test_mod.z == 2 +#------------------------------------------------------------------------------- +# Placeholders +@test JuliaLowering.include_string(test_mod, """_ = 10""") == 10 + +assign_underscore = parsestmt(SyntaxTree, "_ + 1", filename="foo.jl") +exc = try + JuliaLowering.eval(test_mod, assign_underscore) +catch exc + exc +end +@test exc.msg == "all-underscore identifiers are write-only and their values cannot be used in expressions" +@test JuliaLowering.is_ancestor(exc.ex, assign_underscore[1]) + #------------------------------------------------------------------------------- # Function calls # Splatting From bc7ea1a2c521534fe403009be3737745510c54d5 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Tue, 4 Jun 2024 08:51:36 +1000 Subject: [PATCH 0757/1109] Macro expansion stack provenance This change allows nodes to have more than one source of provenance, represented as a tuple in the `source` attribute. We use this to attach the stack of macrocall expressions during macro expansion. --- JuliaLowering/src/ast.jl | 29 +++++--- JuliaLowering/src/eval.jl | 9 ++- JuliaLowering/src/macro_expansion.jl | 105 +++++++++++++++++---------- JuliaLowering/src/syntax_graph.jl | 70 +++++++++++++++++- JuliaLowering/test/demo.jl | 74 +++++++++---------- JuliaLowering/test/demo_include.jl | 16 ++++ 6 files changed, 208 insertions(+), 95 deletions(-) diff --git a/JuliaLowering/src/ast.jl b/JuliaLowering/src/ast.jl index 60cfc10714971..8e0646569d0fb 100644 --- a/JuliaLowering/src/ast.jl +++ b/JuliaLowering/src/ast.jl @@ -38,7 +38,7 @@ function makeleaf(graph::SyntaxGraph, srcref, proto; attrs...) id = newnode!(graph) source = srcref isa SyntaxTree ? _node_id(graph, srcref) : srcref ex = SyntaxTree(graph, id) - copy_attrs!(ex, proto) + copy_attrs!(ex, proto, true) setattr!(graph, id; source=source, attrs...) return ex end @@ -48,7 +48,7 @@ function makenode(graph::SyntaxGraph, srcref, proto, children...; attrs...) setchildren!(graph, id, _node_ids(graph, children...)) source = srcref isa SyntaxTree ? _node_id(graph, srcref) : srcref ex = SyntaxTree(graph, id) - copy_attrs!(ex, proto) + copy_attrs!(ex, proto, true) setattr!(graph, id; source=source, attrs...) return SyntaxTree(graph, id) end @@ -249,25 +249,27 @@ end #------------------------------------------------------------------------------- # Mapping and copying of AST nodes -function copy_attrs!(dest, src) +function copy_attrs!(dest, src, all=false) # TODO: Make this faster? for (k,v) in pairs(dest.graph.attributes) - if (k !== :source && k !== :kind && k !== :syntax_flags) && haskey(v, src.id) + if (all || (k !== :source && k !== :kind && k !== :syntax_flags)) && haskey(v, src.id) v[dest.id] = v[src.id] end end end -function copy_attrs!(dest, head::Union{Kind,JuliaSyntax.SyntaxHead}) - sethead!(dest.graph, dest.id, head) +function copy_attrs!(dest, head::Union{Kind,JuliaSyntax.SyntaxHead}, all=false) + if all + sethead!(dest.graph, dest.id, head) + end end -function mapchildren(f, ctx, ex) +function mapchildren(f, ctx, ex; extra_attrs...) if !haschildren(ex) return ex end orig_children = children(ex) - cs = nothing + cs = isempty(extra_attrs) ? nothing : SyntaxList(ctx) for (i,e) in enumerate(orig_children) c = f(e) if isnothing(cs) @@ -282,12 +284,13 @@ function mapchildren(f, ctx, ex) end if isnothing(cs) # This function should be allocation-free if no children were changed - # by the mapping. + # by the mapping and there's no extra_attrs return ex end cs::SyntaxList ex2 = makenode(ctx, ex, head(ex), cs) copy_attrs!(ex2, ex) + setattr!(ex2; extra_attrs...) return ex2 end @@ -295,14 +298,18 @@ end Copy AST `ex` into `ctx` """ function copy_ast(ctx, ex) + srcref = ex.source + if srcref isa NodeId + srcref = copy_ast(ctx, SyntaxTree(syntax_graph(ex), srcref)) + end if haschildren(ex) cs = SyntaxList(ctx) for e in children(ex) push!(cs, copy_ast(ctx, e)) end - ex2 = makenode(ctx, sourceref(ex), head(ex), cs) + ex2 = makenode(ctx, srcref, head(ex), cs) else - ex2 = makeleaf(ctx, sourceref(ex), head(ex)) + ex2 = makeleaf(ctx, srcref, head(ex)) end for (name,attr) in pairs(ex.graph.attributes) if (name !== :source && name !== :kind && name !== :syntax_flags) && diff --git a/JuliaLowering/src/eval.jl b/JuliaLowering/src/eval.jl index 105b1271f6271..effaa9e169b15 100644 --- a/JuliaLowering/src/eval.jl +++ b/JuliaLowering/src/eval.jl @@ -211,6 +211,7 @@ end #------------------------------------------------------------------------------- # Runtime support functions called by lowering +# TODO: Move to runtime.jl struct InterpolationContext{Graph} <: AbstractLoweringContext graph::Graph @@ -239,7 +240,7 @@ function _interpolated_value(ctx, srcref, x) end function _interpolate_ast(ctx::InterpolationContext, ex, depth) - if !_contains_active_interp(ex, depth) + if ctx.current_index[] > length(ctx.values) || !_contains_active_interp(ex, depth) return ex end @@ -270,15 +271,15 @@ function interpolate_ast(ex, values...) TODO(ex, "\$ in interpolate_ast") end # Construct graph for interpolation context. We inherit this from the macro - # context where possible by detecting it using __macro__ctx__. This feels + # context where possible by detecting it using __macro_ctx__. This feels # hacky though. # # Perhaps we should use a ScopedValue for this instead or get it from - # __context__? Nothing feels great here. + # the macro __context__? Nothing feels great here. graph = nothing for vals in values for v in vals - if v isa SyntaxTree && !isnothing(getattr(syntax_graph(v), :__macro_ctx__, nothing)) + if v isa SyntaxTree && hasattr(syntax_graph(v), :__macro_ctx__) graph = syntax_graph(v) break end diff --git a/JuliaLowering/src/macro_expansion.jl b/JuliaLowering/src/macro_expansion.jl index 03264abde5f0b..319310134254c 100644 --- a/JuliaLowering/src/macro_expansion.jl +++ b/JuliaLowering/src/macro_expansion.jl @@ -11,7 +11,7 @@ generates a new layer. struct ScopeLayer id::LayerId mod::Module - is_macro_expansion::Bool + is_macro_expansion::Bool # FIXME end struct MacroExpansionContext{GraphType} <: AbstractLoweringContext @@ -19,21 +19,7 @@ struct MacroExpansionContext{GraphType} <: AbstractLoweringContext next_var_id::Ref{VarId} scope_layers::Vector{ScopeLayer} current_layer::ScopeLayer -end - -function MacroExpansionContext(ctx, mod::Module) - graph = ensure_attributes(syntax_graph(ctx), - var_id=VarId, - scope_layer=LayerId, - __macro_ctx__=Nothing) - layers = Vector{ScopeLayer}() - MacroExpansionContext(graph, Ref{VarId}(1), layers, new_scope_layer(layers, mod, false)) -end - -function new_scope_layer(layers, mod::Module, is_macro_expansion) - layer = ScopeLayer(length(layers)+1, mod, is_macro_expansion) - push!(layers, layer) - return layer + expansion_stack::SyntaxList{GraphType} end #-------------------------------------------------- @@ -118,6 +104,21 @@ function set_scope_layer_recursive!(ex, id, force) ex end +function setup_macro_argument(ctx, ex, layer) + k = kind(ex) + scope_layer = get(ex, :scope_layer, layer.id) + if k == K"module" || k == K"toplevel" || k == K"inert" + makenode(ctx, ex, ex, children(ex); + scope_layer=scope_layer) + elseif haschildren(ex) + mapchildren(e->setup_macro_argument(ctx, e, layer), ctx, ex; + scope_layer=scope_layer) + else + makeleaf(ctx, ex, ex; + scope_layer=scope_layer) + end +end + function eval_macro_name(ctx, ex) # `ex1` might contain a nontrivial mix of scope layers so we can't just # `eval()` it, as it's already been partially lowered by this point. @@ -142,7 +143,7 @@ function expand_macro(ctx, ex) # a macro expansion. # In either case, we need to set any unset scope layers before passing the # arguments to the macro call. - macro_args = [set_scope_layer_recursive!(e, ctx.current_layer.id, false) + macro_args = [setup_macro_argument(ctx, e, ctx.current_layer) for e in children(ex)[2:end]] mctx = MacroContext(ctx.graph, macname, ctx.current_layer) expanded = try @@ -163,14 +164,31 @@ function expand_macro(ctx, ex) # If the macro has produced syntax outside the macro context, copy it over. # TODO: Do we expect this always to happen? What is the API for access # to the macro expansion context? + # + # TODO: Can expand_forms_1() do the copying? expanded = copy_ast(ctx, expanded) end - new_layer = new_scope_layer(ctx.scope_layers, parentmodule(macfunc), true) - ctx2 = MacroExpansionContext(ctx.graph, ctx.next_var_id, ctx.scope_layers, new_layer) - # Add wrapper block for macro expansion provenance tracking - @ast ctx ex [K"block" expand_forms_1(ctx2, expanded)] + new_layer = ScopeLayer(length(ctx.scope_layers)+1, parentmodule(macfunc), true) + push!(ctx.scope_layers, new_layer) + push!(ctx.expansion_stack, ex) + inner_ctx = MacroExpansionContext(ctx.graph, ctx.next_var_id, ctx.scope_layers, + new_layer, ctx.expansion_stack) + expanded = expand_forms_1(inner_ctx, expanded) + pop!(ctx.expansion_stack) else - @ast ctx ex expanded::K"Value" + # To get `srcref` correct it's simplest to have a special case here for + # when the macro returns a non-AST. + srcref = (ex.id, Iterators.reverse(ctx.expansion_stack.ids)...) + expanded = @ast ctx srcref expanded::K"Value" + end + return expanded +end + +function macro_stack_srcref(ctx, ex) + if ctx.current_layer.is_macro_expansion + (ex.id, Iterators.reverse(ctx.expansion_stack.ids)...) + else + ex.id end end @@ -188,33 +206,29 @@ need to be dealt with before other lowering. interpolations) """ function expand_forms_1(ctx::MacroExpansionContext, ex::SyntaxTree) - set_scope_layer!(ex, ctx.current_layer.id, false) k = kind(ex) - if k == K"Identifier" - if all(==('_'), ex.name_val) - @ast ctx ex ex=>K"Placeholder" - else - ex - end + if k == K"Identifier" && all(==('_'), ex.name_val) + @ast ctx macro_stack_srcref(ctx,ex) ex=>K"Placeholder" + elseif k == K"Identifier" || k == K"MacroName" || + (is_operator(k) && !haschildren(ex)) # <- TODO: fix upstream + layerid = get(ex, :scope_layer, ctx.current_layer.id) + makeleaf(ctx, macro_stack_srcref(ctx,ex), ex, kind=K"Identifier", scope_layer=layerid) elseif k == K"var" || k == K"char" || k == K"parens" # Strip "container" nodes @chk numchildren(ex) == 1 expand_forms_1(ctx, ex[1]) - elseif k == K"MacroName" - @ast ctx ex ex=>K"Identifier" - elseif is_operator(k) && !haschildren(ex) # TODO: do in JuliaSyntax? - @ast ctx ex ex=>K"Identifier" elseif k == K"quote" @chk numchildren(ex) == 1 expand_forms_1(ctx, expand_quote(ctx, ex[1])) - elseif k == K"module" || k == K"toplevel" || k == K"inert" - ex elseif k == K"macrocall" expand_macro(ctx, ex) + elseif k == K"module" || k == K"toplevel" || k == K"inert" + # FIXME + makenode(ctx, macro_stack_srcref(ctx,ex), ex, children(ex)) elseif !haschildren(ex) - ex + makeleaf(ctx, macro_stack_srcref(ctx,ex), ex) else - mapchildren(e->expand_forms_1(ctx,e), ctx, ex) + mapchildren(e->expand_forms_1(ctx,e), ctx, ex; source=macro_stack_srcref(ctx,ex)) end end @@ -227,6 +241,19 @@ function expand_forms_1(ctx::MacroExpansionContext, exs::Union{Tuple,AbstractVec end function expand_forms_1(mod::Module, ex::SyntaxTree) - ctx = MacroExpansionContext(ex, mod) - ctx, expand_forms_1(ctx, reparent(ctx, ex)) + graph = ensure_attributes(syntax_graph(ex), + var_id=VarId, + scope_layer=LayerId, + macro_expansion=ScopeLayer, + __macro_ctx__=Nothing) + layers = ScopeLayer[ScopeLayer(1, mod, false)] + ctx = MacroExpansionContext(graph, Ref{VarId}(1), layers, layers[1], SyntaxList(graph)) + ex2 = expand_forms_1(ctx, reparent(ctx, ex)) + graph2 = delete_attributes(graph, :__macro_ctx__) + # TODO: Returning the context with pass-specific mutable data is a bad way + # to carry state into the next pass. + ctx2 = MacroExpansionContext(graph2, ctx.next_var_id, ctx.scope_layers, + ctx.current_layer, SyntaxList(graph2)) + return ctx2, reparent(ctx2, ex2) end + diff --git a/JuliaLowering/src/syntax_graph.jl b/JuliaLowering/src/syntax_graph.jl index 78c4329a79acd..4133401fc2516 100644 --- a/JuliaLowering/src/syntax_graph.jl +++ b/JuliaLowering/src/syntax_graph.jl @@ -53,6 +53,14 @@ function ensure_attributes(graph::SyntaxGraph; kws...) freeze_attrs(g) end +function delete_attributes(graph::SyntaxGraph, attr_names...) + attributes = Dict(pairs(graph.attributes)...) + for name in attr_names + delete!(attributes, name) + end + SyntaxGraph(graph.edge_ranges, graph.edges, (; pairs(attributes)...)) +end + function newnode!(graph::SyntaxGraph) push!(graph.edge_ranges, 0:-1) # Invalid range start => leaf node return length(graph.edge_ranges) @@ -101,6 +109,10 @@ function getattr(graph::SyntaxGraph, name::Symbol, default) get(getfield(graph, :attributes), name, default) end +function hasattr(graph::SyntaxGraph, name::Symbol) + getattr(graph, name, nothing) !== nothing +end + # FIXME: Probably terribly non-inferrable? function setattr!(graph::SyntaxGraph, id; attrs...) for (k,v) in pairs(attrs) @@ -164,7 +176,7 @@ function check_same_graph(x, y) end end -function similar_graph(x, y) +function is_compatible_graph(x, y) syntax_graph(x).edges === syntax_graph(y).edges end @@ -290,11 +302,27 @@ function Base.show(io::IO, ::MIME"text/plain", src::SourceRef) highlight(io, src; note="these are the bytes you're looking for 😊", context_lines_inner=20) end +function _sourceref(sources, id) + i = 1 + while true + i += 1 + s = get(sources, id, nothing) + if s isa NodeId + id = s + else + return s + end + end +end + function sourceref(tree::SyntaxTree) sources = tree.graph.source id::NodeId = tree.id while true - s = get(sources, id, nothing) + s = _sourceref(sources, id) + if s isa Tuple + s = s[1] + end if s isa NodeId id = s else @@ -303,8 +331,42 @@ function sourceref(tree::SyntaxTree) end end +function show_expansion_stack(io::IO, exs) + for (i,ex) in enumerate(exs) + sr = sourceref(ex) + if i > 1 + JuliaSyntax._printstyled(io, "\n\n", fgcolor=:light_black) + first = false + end + note = length(exs) == 1 ? "in source here" : + i > 1 ? "expanded from here" : "in macro expansion" + highlight(io, sr, note=note) + end +end + +function expansion_stack(tree::SyntaxTree) + sources = tree.graph.source + id::NodeId = tree.id + while true + s = get(sources, id, nothing) + if s isa NodeId + id = s + else + refs = SyntaxList(tree) + if s isa Tuple + for i in s + push!(refs, SyntaxTree(tree.graph, i)) + end + else + push!(refs, SyntaxTree(tree.graph, id)) + end + return refs + end + end +end + function is_ancestor(ex, ancestor) - if !similar_graph(ex, ancestor) + if !is_compatible_graph(ex, ancestor) return false end sources = ex.graph.source @@ -329,7 +391,7 @@ JuliaSyntax.first_byte(tree::SyntaxTree) = first_byte(sourceref(tree)) JuliaSyntax.last_byte(tree::SyntaxTree) = last_byte(sourceref(tree)) JuliaSyntax.sourcetext(tree::SyntaxTree) = sourcetext(sourceref(tree)) -const SourceAttrType = Union{SourceRef,LineNumberNode,NodeId} +const SourceAttrType = Union{SourceRef,LineNumberNode,NodeId,Tuple} function SyntaxTree(graph::SyntaxGraph, node::SyntaxNode) ensure_attributes!(graph, kind=Kind, syntax_flags=UInt16, source=SourceAttrType, diff --git a/JuliaLowering/test/demo.jl b/JuliaLowering/test/demo.jl index 9fef0c0f4494d..098fefd3634a3 100644 --- a/JuliaLowering/test/demo.jl +++ b/JuliaLowering/test/demo.jl @@ -6,7 +6,6 @@ using JuliaLowering using JuliaLowering: SyntaxGraph, SyntaxTree, ensure_attributes!, ensure_attributes, newnode!, setchildren!, haschildren, children, child, setattr!, sourceref, makenode, sourcetext using JuliaSyntaxFormatter -using JuliaSyntaxFormatter: FormatContext # Extract variable kind for highlighting purposes function var_kind(e) @@ -21,15 +20,8 @@ function var_kind(e) return info.kind end -function formatsrc(ex; color_by=nothing, kws...) - format_token_style = if isnothing(color_by) - e->nothing - elseif color_by isa Symbol - e->get(e, color_by, nothing) - else - color_by - end - Text(JuliaSyntaxFormatter.format(ex; format_token_style, kws...)) +function formatsrc(ex; kws...) + Text(JuliaSyntaxFormatter.formatsrc(ex; kws...)) end function annotate_scopes(mod, ex) @@ -182,9 +174,9 @@ end # end # """ -src = """ -M.@set_global_in_parent "bent hygiene!" -""" +# src = """ +# M.@set_global_in_parent "bent hygiene!" +# """ # src = """ # begin @@ -216,22 +208,22 @@ end # M.@set_global_in_parent "bent hygiene!" # """ -src = """ -begin - x = 10 - y = 20 - let x = y + x - z = "some string \$x \$y" - - function f(y) - a = M.@foo z - "\$z \$y \$a \$x" - end - print(x) - end - print(x) -end -""" +# src = """ +# begin +# x = 10 +# y = 20 +# let x = y + x +# z = "some string \$x \$y" +# +# function f(y) +# a = M.@foo z +# "\$z \$y \$a \$x" +# end +# print(x) +# end +# print(x) +# end +# """ # src = """ # begin @@ -240,8 +232,15 @@ end # end # """ +# src = """ +# _ = -1 +# """ +src = """ +M.@make_module +""" + src = """ - _ = -1 +M.@nested_return_a_value """ ex = parsestmt(SyntaxTree, src, filename="foo.jl") @@ -251,7 +250,8 @@ ex = ensure_attributes(ex, var_id=Int) in_mod = Main ctx1, ex_macroexpand = JuliaLowering.expand_forms_1(in_mod, ex) -@info "Macro expanded" formatsrc(ex_macroexpand, color_by=:scope_layer) +# @info "Macro expanded" formatsrc(ex_macroexpand, color_by=:scope_layer) +@info "Macro expanded" formatsrc(ex_macroexpand, color_by=e->JuliaLowering.expansion_stack(e)[2:end]) ctx2, ex_desugar = JuliaLowering.expand_forms_2(ctx1, ex_macroexpand) @info "Desugared" formatsrc(ex_desugar, color_by=:scope_layer) @@ -262,9 +262,9 @@ ctx3, ex_scoped = JuliaLowering.resolve_scopes!(ctx2, ex_desugar) ctx4, ex_compiled = JuliaLowering.linearize_ir(ctx3, ex_scoped) @info "Linear IR" formatsrc(ex_compiled, color_by=:var_id) -# ex_expr = JuliaLowering.to_lowered_expr(in_mod, ctx4.var_info, ex_compiled) -# @info "CodeInfo" ex_expr -# -# eval_result = Base.eval(in_mod, ex_expr) -# @info "Eval" eval_result -# +ex_expr = JuliaLowering.to_lowered_expr(in_mod, ctx4.var_info, ex_compiled) +@info "CodeInfo" ex_expr + +eval_result = Base.eval(in_mod, ex_expr) +@info "Eval" eval_result + diff --git a/JuliaLowering/test/demo_include.jl b/JuliaLowering/test/demo_include.jl index a1a3a0aadbba6..cab5131b4bc94 100644 --- a/JuliaLowering/test/demo_include.jl +++ b/JuliaLowering/test/demo_include.jl @@ -56,4 +56,20 @@ module M end end end + + macro make_module() + :(module X + blah = 10 + end) + end + + macro return_a_value() + 42 + end + + macro nested_return_a_value() + :( + @return_a_value + ) + end end From 5cbddf2de0fa2e8d9d2ff570514ad2ad9fa88d50 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Tue, 4 Jun 2024 08:57:20 +1000 Subject: [PATCH 0758/1109] Some rudimentary lowering of `K"doc"` --- JuliaLowering/src/desugaring.jl | 21 ++++++++++++++++----- JuliaLowering/src/eval.jl | 15 +++++++++++++++ 2 files changed, 31 insertions(+), 5 deletions(-) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index c5acab992681c..984b298d9e58b 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -215,7 +215,7 @@ function analyze_function_arg(full_ex) is_nospecialize=is_nospecialize) end -function expand_function_def(ctx, ex) +function expand_function_def(ctx, ex, docs) @chk numchildren(ex) in (1,2) name = ex[1] if kind(name) == K"where" @@ -318,7 +318,7 @@ function expand_function_def(ctx, ex) end @ast ctx ex [ K"block" - [K"method" function_name] + func = [K"method" function_name] [K"method" function_name preamble @@ -326,7 +326,15 @@ function expand_function_def(ctx, ex) body ] ] - [K"unnecessary" function_name] + if !isnothing(docs) + [K"call"(docs) + bind_docs!::K"Value" + func + docs[1] + method_metadata + ] + end + [K"unnecessary" func] ] elseif kind(name) == K"tuple" TODO(name, "Anon function lowering") @@ -527,7 +535,7 @@ This pass simplifies expressions by expanding complicated syntax sugar into a small set of core syntactic forms. For example, field access syntax `a.b` is expanded to a function call `getproperty(a, :b)`. """ -function expand_forms_2(ctx::DesugaringContext, ex::SyntaxTree) +function expand_forms_2(ctx::DesugaringContext, ex::SyntaxTree, docs=nothing) k = kind(ex) if k == K"call" expand_call(ctx, ex) @@ -541,8 +549,11 @@ function expand_forms_2(ctx::DesugaringContext, ex::SyntaxTree) ex[2]=>K"Symbol" ] ) + elseif k == K"doc" + @chk numchildren(ex) == 2 + sig = expand_forms_2(ctx, ex[2], ex) elseif k == K"function" - expand_forms_2(ctx, expand_function_def(ctx, ex)) + expand_forms_2(ctx, expand_function_def(ctx, ex, docs)) elseif k == K"macro" expand_forms_2(ctx, expand_macro_def(ctx, ex)) elseif k == K"let" diff --git a/JuliaLowering/src/eval.jl b/JuliaLowering/src/eval.jl index effaa9e169b15..fb9761b877ce9 100644 --- a/JuliaLowering/src/eval.jl +++ b/JuliaLowering/src/eval.jl @@ -365,6 +365,21 @@ function module_import(into_mod::Module, is_using::Bool, nothing end +function bind_docs!(f::Function, docstr, method_metadata) + mod = parentmodule(f) + bind = Base.Docs.Binding(mod, nameof(f)) + full_sig = method_metadata[1] + arg_sig = Tuple{full_sig[2:end]...} + linenum = method_metadata[3] + metadata = Dict{Symbol, Any}( + :linenumber => linenum.line, + :module => mod, + ) + if !isnothing(linenum.file) + push!(metadata, :path => string(linenum.file)) + end + Docs.doc!(mod, bind, Base.Docs.docstr(docstr, metadata), arg_sig) +end #------------------------------------------------------------------------------- # Our version of eval takes our own data structures From 216a419786102e9051c71499da5a3321d7d89d86 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Tue, 4 Jun 2024 13:33:43 +1000 Subject: [PATCH 0759/1109] Move runtime functions to runtime.jl --- JuliaLowering/src/JuliaLowering.jl | 1 + JuliaLowering/src/eval.jl | 172 ---------------------------- JuliaLowering/src/runtime.jl | 175 +++++++++++++++++++++++++++++ 3 files changed, 176 insertions(+), 172 deletions(-) create mode 100644 JuliaLowering/src/runtime.jl diff --git a/JuliaLowering/src/JuliaLowering.jl b/JuliaLowering/src/JuliaLowering.jl index 688d7fc1cdc30..b472d0972ecc4 100644 --- a/JuliaLowering/src/JuliaLowering.jl +++ b/JuliaLowering/src/JuliaLowering.jl @@ -26,6 +26,7 @@ _include("macro_expansion.jl") _include("desugaring.jl") _include("scope_analysis.jl") _include("linear_ir.jl") +_include("runtime.jl") _include("eval.jl") diff --git a/JuliaLowering/src/eval.jl b/JuliaLowering/src/eval.jl index fb9761b877ce9..35a7d136aeb70 100644 --- a/JuliaLowering/src/eval.jl +++ b/JuliaLowering/src/eval.jl @@ -209,178 +209,6 @@ function to_lowered_expr(mod, var_info, ex) end end -#------------------------------------------------------------------------------- -# Runtime support functions called by lowering -# TODO: Move to runtime.jl - -struct InterpolationContext{Graph} <: AbstractLoweringContext - graph::Graph - values::Tuple - current_index::Ref{Int} -end - -function _contains_active_interp(ex, depth) - k = kind(ex) - if k == K"$" && depth == 0 - return true - end - inner_depth = k == K"quote" ? depth + 1 : - k == K"$" ? depth - 1 : - depth - return any(_contains_active_interp(c, inner_depth) for c in children(ex)) -end - -# Produce interpolated node for `$x` syntax -function _interpolated_value(ctx, srcref, x) - if x isa SyntaxTree - x.graph === ctx.graph ? x : copy_ast(ctx, x) - else - makeleaf(ctx, srcref, K"Value", x) - end -end - -function _interpolate_ast(ctx::InterpolationContext, ex, depth) - if ctx.current_index[] > length(ctx.values) || !_contains_active_interp(ex, depth) - return ex - end - - # We have an interpolation deeper in the tree somewhere - expand to an - # expression - inner_depth = kind(ex) == K"quote" ? depth + 1 : - kind(ex) == K"$" ? depth - 1 : - depth - expanded_children = SyntaxList(ctx) - for e in children(ex) - if kind(e) == K"$" && inner_depth == 0 - vals = ctx.values[ctx.current_index[]]::Tuple - ctx.current_index[] += 1 - for (i,v) in enumerate(vals) - srcref = numchildren(e) == 1 ? e[1] : e[i] - push!(expanded_children, _interpolated_value(ctx, srcref, v)) - end - else - push!(expanded_children, _interpolate_ast(ctx, e, inner_depth)) - end - end - - makenode(ctx, ex, head(ex), expanded_children) -end - -function interpolate_ast(ex, values...) - if kind(ex) == K"$" - TODO(ex, "\$ in interpolate_ast") - end - # Construct graph for interpolation context. We inherit this from the macro - # context where possible by detecting it using __macro_ctx__. This feels - # hacky though. - # - # Perhaps we should use a ScopedValue for this instead or get it from - # the macro __context__? Nothing feels great here. - graph = nothing - for vals in values - for v in vals - if v isa SyntaxTree && hasattr(syntax_graph(v), :__macro_ctx__) - graph = syntax_graph(v) - break - end - end - end - if isnothing(graph) - graph = SyntaxGraph() - ensure_attributes!(graph, kind=Kind, syntax_flags=UInt16, source=SourceAttrType, - value=Any, name_val=String, scope_layer=LayerId) - end - ctx = InterpolationContext(graph, values, Ref(1)) - # We must copy the AST into our context to use it as the source reference - # of generated expressions. - ex1 = copy_ast(ctx, ex) - _interpolate_ast(ctx, ex1, 0) -end - - -# Produce node corresponding to `srcref` when there was an interpolation among -# `children` -function interpolate_node(ctx::InterpolationContext, srcref, children...) - makenode(ctx, sourceref(srcref), head(srcref), children...) -end - -# Construct new bare module including only the "default names" -# -# using Core -# const modname = modval -# public modname -# -# And run statments in the toplevel expression `body` -function eval_module(parentmod, modname, body) - # Here we just use `eval()` with an Expr. - # If we wanted to avoid this we'd need to reproduce a lot of machinery from - # jl_eval_module_expr() - # - # 1. Register / deparent toplevel modules - # 2. Set binding in parent module - # 3. Deal with replacing modules - # * Warn if replacing - # * Root old module being replaced - # 4. Run __init__ - # * Also run __init__ for any children after parent is defined - # mod = @ccall jl_new_module(Symbol(modname)::Symbol, parentmod::Module)::Any - # ... - name = Symbol(modname) - eval(parentmod, :( - baremodule $name - $eval($name, $body) - end - )) -end - -# Evaluate content of `import` or `using` statement -function module_import(into_mod::Module, is_using::Bool, - from_mod::Union{Nothing,Core.SimpleVector}, paths::Core.SimpleVector) - # For now, this function converts our lowered representation back to Expr - # and calls eval() to avoid replicating all of the fiddly logic in - # jl_toplevel_eval_flex. - # TODO: ccall Julia runtime functions directly? - # * jl_module_using jl_module_use_as - # * import_module jl_module_import_as - path_args = [] - i = 1 - while i < length(paths) - nsyms = paths[i]::Int - n = i + nsyms - path = Expr(:., [Symbol(paths[i+j]::String) for j = 1:nsyms]...) - as_name = paths[i+nsyms+1] - push!(path_args, isnothing(as_name) ? path : - Expr(:as, path, Symbol(as_name))) - i += nsyms + 2 - end - ex = if isnothing(from_mod) - Expr(is_using ? :using : :import, - path_args...) - else - from_path = Expr(:., [Symbol(s::String) for s in from_mod]...) - Expr(is_using ? :using : :import, - Expr(:(:), from_path, path_args...)) - end - eval(into_mod, ex) - nothing -end - -function bind_docs!(f::Function, docstr, method_metadata) - mod = parentmodule(f) - bind = Base.Docs.Binding(mod, nameof(f)) - full_sig = method_metadata[1] - arg_sig = Tuple{full_sig[2:end]...} - linenum = method_metadata[3] - metadata = Dict{Symbol, Any}( - :linenumber => linenum.line, - :module => mod, - ) - if !isnothing(linenum.file) - push!(metadata, :path => string(linenum.file)) - end - Docs.doc!(mod, bind, Base.Docs.docstr(docstr, metadata), arg_sig) -end - #------------------------------------------------------------------------------- # Our version of eval takes our own data structures function Core.eval(mod::Module, ex::SyntaxTree) diff --git a/JuliaLowering/src/runtime.jl b/JuliaLowering/src/runtime.jl new file mode 100644 index 0000000000000..c2561b29b2033 --- /dev/null +++ b/JuliaLowering/src/runtime.jl @@ -0,0 +1,175 @@ +# Runtime support functionality. +# +# Lowering generates code which uses these functions and types but it doesn't +# call them directly. +# +# These should probably move to `Core` at some point? + +struct InterpolationContext{Graph} <: AbstractLoweringContext + graph::Graph + values::Tuple + current_index::Ref{Int} +end + +function _contains_active_interp(ex, depth) + k = kind(ex) + if k == K"$" && depth == 0 + return true + end + inner_depth = k == K"quote" ? depth + 1 : + k == K"$" ? depth - 1 : + depth + return any(_contains_active_interp(c, inner_depth) for c in children(ex)) +end + +# Produce interpolated node for `$x` syntax +function _interpolated_value(ctx, srcref, x) + if x isa SyntaxTree + x.graph === ctx.graph ? x : copy_ast(ctx, x) + else + makeleaf(ctx, srcref, K"Value", x) + end +end + +function _interpolate_ast(ctx::InterpolationContext, ex, depth) + if ctx.current_index[] > length(ctx.values) || !_contains_active_interp(ex, depth) + return ex + end + + # We have an interpolation deeper in the tree somewhere - expand to an + # expression + inner_depth = kind(ex) == K"quote" ? depth + 1 : + kind(ex) == K"$" ? depth - 1 : + depth + expanded_children = SyntaxList(ctx) + for e in children(ex) + if kind(e) == K"$" && inner_depth == 0 + vals = ctx.values[ctx.current_index[]]::Tuple + ctx.current_index[] += 1 + for (i,v) in enumerate(vals) + srcref = numchildren(e) == 1 ? e[1] : e[i] + push!(expanded_children, _interpolated_value(ctx, srcref, v)) + end + else + push!(expanded_children, _interpolate_ast(ctx, e, inner_depth)) + end + end + + makenode(ctx, ex, head(ex), expanded_children) +end + +function interpolate_ast(ex, values...) + if kind(ex) == K"$" + TODO(ex, "\$ in interpolate_ast") + end + # Construct graph for interpolation context. We inherit this from the macro + # context where possible by detecting it using __macro_ctx__. This feels + # hacky though. + # + # Perhaps we should use a ScopedValue for this instead or get it from + # the macro __context__? Nothing feels great here. + graph = nothing + for vals in values + for v in vals + if v isa SyntaxTree && hasattr(syntax_graph(v), :__macro_ctx__) + graph = syntax_graph(v) + break + end + end + end + if isnothing(graph) + graph = SyntaxGraph() + ensure_attributes!(graph, kind=Kind, syntax_flags=UInt16, source=SourceAttrType, + value=Any, name_val=String, scope_layer=LayerId) + end + ctx = InterpolationContext(graph, values, Ref(1)) + # We must copy the AST into our context to use it as the source reference + # of generated expressions. + ex1 = copy_ast(ctx, ex) + _interpolate_ast(ctx, ex1, 0) +end + + +# Produce node corresponding to `srcref` when there was an interpolation among +# `children` +function interpolate_node(ctx::InterpolationContext, srcref, children...) + makenode(ctx, sourceref(srcref), head(srcref), children...) +end + +# Construct new bare module including only the "default names" +# +# using Core +# const modname = modval +# public modname +# +# And run statments in the toplevel expression `body` +function eval_module(parentmod, modname, body) + # Here we just use `eval()` with an Expr. + # If we wanted to avoid this we'd need to reproduce a lot of machinery from + # jl_eval_module_expr() + # + # 1. Register / deparent toplevel modules + # 2. Set binding in parent module + # 3. Deal with replacing modules + # * Warn if replacing + # * Root old module being replaced + # 4. Run __init__ + # * Also run __init__ for any children after parent is defined + # mod = @ccall jl_new_module(Symbol(modname)::Symbol, parentmod::Module)::Any + # ... + name = Symbol(modname) + eval(parentmod, :( + baremodule $name + $eval($name, $body) + end + )) +end + +# Evaluate content of `import` or `using` statement +function module_import(into_mod::Module, is_using::Bool, + from_mod::Union{Nothing,Core.SimpleVector}, paths::Core.SimpleVector) + # For now, this function converts our lowered representation back to Expr + # and calls eval() to avoid replicating all of the fiddly logic in + # jl_toplevel_eval_flex. + # TODO: ccall Julia runtime functions directly? + # * jl_module_using jl_module_use_as + # * import_module jl_module_import_as + path_args = [] + i = 1 + while i < length(paths) + nsyms = paths[i]::Int + n = i + nsyms + path = Expr(:., [Symbol(paths[i+j]::String) for j = 1:nsyms]...) + as_name = paths[i+nsyms+1] + push!(path_args, isnothing(as_name) ? path : + Expr(:as, path, Symbol(as_name))) + i += nsyms + 2 + end + ex = if isnothing(from_mod) + Expr(is_using ? :using : :import, + path_args...) + else + from_path = Expr(:., [Symbol(s::String) for s in from_mod]...) + Expr(is_using ? :using : :import, + Expr(:(:), from_path, path_args...)) + end + eval(into_mod, ex) + nothing +end + +function bind_docs!(f::Function, docstr, method_metadata) + mod = parentmodule(f) + bind = Base.Docs.Binding(mod, nameof(f)) + full_sig = method_metadata[1] + arg_sig = Tuple{full_sig[2:end]...} + linenum = method_metadata[3] + metadata = Dict{Symbol, Any}( + :linenumber => linenum.line, + :module => mod, + ) + if !isnothing(linenum.file) + push!(metadata, :path => string(linenum.file)) + end + Docs.doc!(mod, bind, Base.Docs.docstr(docstr, metadata), arg_sig) +end + From 10dcef1516d037374f64f399bff6af2f7edd29e6 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Fri, 14 Jun 2024 16:39:56 +1000 Subject: [PATCH 0760/1109] Record macro expansions in expression provenance Here we track expression provenance as a tree. Some operations like macro expansion and quote expansion have multiple sources of provenance for a single expression - here this is represented as a tuple of nodes in the `source` attribute. This allows us to extract the macro expansion stack for any expression by walking the provenance tree; we use this to fill in the proper debug info when converting to CodeInfo. --- JuliaLowering/README.md | 104 ++++++++++++++++++++++- JuliaLowering/src/ast.jl | 49 ++++++----- JuliaLowering/src/eval.jl | 68 +++++++++++---- JuliaLowering/src/macro_expansion.jl | 103 ++++++++++------------- JuliaLowering/src/runtime.jl | 25 +++--- JuliaLowering/src/syntax_graph.jl | 120 +++++++++++---------------- JuliaLowering/src/utils.jl | 49 +++++++++++ JuliaLowering/test/demo.jl | 83 +++++++++--------- JuliaLowering/test/demo_include.jl | 21 ++--- JuliaLowering/test/demo_include_2.jl | 12 +++ JuliaLowering/test/runtests.jl | 49 ++++++++++- 11 files changed, 442 insertions(+), 241 deletions(-) create mode 100644 JuliaLowering/test/demo_include_2.jl diff --git a/JuliaLowering/README.md b/JuliaLowering/README.md index ab359de8567ca..7644c5bf01394 100644 --- a/JuliaLowering/README.md +++ b/JuliaLowering/README.md @@ -77,6 +77,108 @@ are similar. Analogy 3: Graph algorithms which represent graphs as a compact array of node ids and edges with integer indices, rather than using a linked data structure. +## Provenance tracking + +Expression provenance is tracked through lowering by attaching provenance +information in the `source` attribute to every expression as it is generated. +For example when parsing a source file we have + +```julia +julia> ex = parsestmt(SyntaxTree, "a + b", filename="foo.jl") +SyntaxTree with attributes kind,value,name_val,syntax_flags,source +[call-i] │ + a │ + + │ + b │ + +julia> ex[3].source +a + b +# ╙ ── these are the bytes you're looking for 😊 +``` + +The `provenance` function should be used to look up the `source` attribute and +the `showprov` function used to inspect the content (this is preferred because +the encoding of `source` is an implementation detail). For example: + +```julia +julia> showprov(ex[3]) +a + b +# ╙ ── in source +# @ foo.jl:1 +``` + +During macro expansion and lowering provenance gets more complicated because an +expression can arise from multiple sources. For example, we want to keep track +of the entire stack of macro expansions an expression was generated by, while +also recording where it occurred in the original source file. + +For this, we use a tree data structure. Let's look at the following pair of +macros + +```julia +julia> JuliaLowering.include_string(Main, raw""" + module M + macro inner() + :(2) + end + + macro outer() + :((1, @inner)) + end + end + """, "some_macros.jl") +``` + +The tree which arises from macro expanding this is pretty simple: + +```julia +julia> expanded = JuliaLowering.macroexpand(Main, parsestmt(SyntaxTree, "M.@outer()")) +SyntaxTree with attributes scope_layer,kind,value,var_id,name_val,syntax_flags,source +[tuple-p] │ + 1 │ + 2 │ +``` + +but the provenance information recorded for the second element `2` of this +tuple is not trivial; it includes the macro call expressions for `@inner` and +`@outer`. We can show this in tree form: + +```julia +julia> showprov(expanded[2], tree=true) +2 +├─ 2 +│ └─ @ some_macros.jl:3 +└─ (macrocall @inner) + ├─ (macrocall @inner) + │ └─ @ some_macros.jl:7 + └─ (macrocall-p (. M @outer)) + └─ @ foo.jl:1 +``` + +or as a more human readable flattened list highlighting of source ranges: + +```julia +module M + macro inner() + :(2) +# ╙ ── in source + end + +# @ some_macros.jl:3 + + + macro outer() + :((1, @inner)) +# └────┘ ── in macro expansion + end +end +# @ some_macros.jl:7 + +M.@outer() +└────────┘ ── in macro expansion +# @ foo.jl:1 +``` + ## Hygiene ### Problems with Hygiene in Julia's exiting macro system @@ -355,7 +457,7 @@ the setup of its arguments would need to go in a thunk. We've currently got an odd mixture of imperative and declarative lowered code. -## Notes on racket's hygiene +## Notes on Racket's hygiene People look at [Racket](https://racket-lang.org/) as an example of a very complete system of hygienic macros. We should learn from them, but keeping in diff --git a/JuliaLowering/src/ast.jl b/JuliaLowering/src/ast.jl index 8e0646569d0fb..edd5271828f09 100644 --- a/JuliaLowering/src/ast.jl +++ b/JuliaLowering/src/ast.jl @@ -34,22 +34,24 @@ function _node_ids(graph::SyntaxGraph, cs::SyntaxList) cs.ids end +_unpack_srcref(graph, srcref::SyntaxTree) = _node_id(graph, srcref) +_unpack_srcref(graph, srcref::Tuple) = _node_ids(graph, srcref...) +_unpack_srcref(graph, srcref) = srcref + function makeleaf(graph::SyntaxGraph, srcref, proto; attrs...) id = newnode!(graph) - source = srcref isa SyntaxTree ? _node_id(graph, srcref) : srcref ex = SyntaxTree(graph, id) copy_attrs!(ex, proto, true) - setattr!(graph, id; source=source, attrs...) + setattr!(graph, id; source=_unpack_srcref(graph, srcref), attrs...) return ex end function makenode(graph::SyntaxGraph, srcref, proto, children...; attrs...) id = newnode!(graph) setchildren!(graph, id, _node_ids(graph, children...)) - source = srcref isa SyntaxTree ? _node_id(graph, srcref) : srcref ex = SyntaxTree(graph, id) copy_attrs!(ex, proto, true) - setattr!(graph, id; source=source, attrs...) + setattr!(graph, id; source=_unpack_srcref(graph, srcref), attrs...) return SyntaxTree(graph, id) end @@ -251,9 +253,13 @@ end # Mapping and copying of AST nodes function copy_attrs!(dest, src, all=false) # TODO: Make this faster? - for (k,v) in pairs(dest.graph.attributes) - if (all || (k !== :source && k !== :kind && k !== :syntax_flags)) && haskey(v, src.id) - v[dest.id] = v[src.id] + for (name, attr) in pairs(src.graph.attributes) + if (all || (name !== :source && name !== :kind && name !== :syntax_flags)) && + haskey(attr, src.id) + dest_attr = getattr(dest.graph, name, nothing) + if !isnothing(dest_attr) + dest_attr[dest.id] = attr[src.id] + end end end end @@ -298,27 +304,22 @@ end Copy AST `ex` into `ctx` """ function copy_ast(ctx, ex) - srcref = ex.source - if srcref isa NodeId - srcref = copy_ast(ctx, SyntaxTree(syntax_graph(ex), srcref)) - end + # TODO: Do we need to keep a mapping of node IDs to ensure we don't + # double-copy here in the case when some tree nodes are pointed to by + # multiple parents? (How much does this actually happen in practice?) + s = ex.source + # TODO: Figure out how to use provenance() here? + srcref = s isa NodeId ? copy_ast(ctx, SyntaxTree(ex.graph, s)) : + s isa Tuple ? map(i->copy_ast(ctx, SyntaxTree(ex.graph, i)), s) : + s if haschildren(ex) cs = SyntaxList(ctx) for e in children(ex) push!(cs, copy_ast(ctx, e)) end - ex2 = makenode(ctx, srcref, head(ex), cs) + ex2 = makenode(ctx, srcref, ex, cs) else - ex2 = makeleaf(ctx, srcref, head(ex)) - end - for (name,attr) in pairs(ex.graph.attributes) - if (name !== :source && name !== :kind && name !== :syntax_flags) && - haskey(attr, ex.id) - attr2 = getattr(ex2.graph, name, nothing) - if !isnothing(attr2) - attr2[ex2.id] = attr[ex.id] - end - end + ex2 = makeleaf(ctx, srcref, ex) end return ex2 end @@ -329,9 +330,7 @@ end Copy `ex`, adopting the scope layer of `ref`. """ function adopt_scope(ex, scope_layer::LayerId) - ex1 = copy_ast(ex, ex) - set_scope_layer_recursive!(ex1, scope_layer, true) - ex1 + set_scope_layer(ex, ex, scope_layer, true) end function adopt_scope(ex, ref::SyntaxTree) diff --git a/JuliaLowering/src/eval.jl b/JuliaLowering/src/eval.jl index 35a7d136aeb70..15b0c99cd58ff 100644 --- a/JuliaLowering/src/eval.jl +++ b/JuliaLowering/src/eval.jl @@ -6,6 +6,11 @@ function lower(mod::Module, ex) ex4 end +function macroexpand(mod::Module, ex) + ctx1, ex1 = expand_forms_1(mod, ex) + ex1 +end + _CodeInfo_need_ver = v"1.12.0-DEV.512" if VERSION < _CodeInfo_need_ver function _CodeInfo(args...) @@ -37,29 +42,58 @@ else end end +function _compress_debuginfo(info) + filename, edges, codelocs = info + edges = Core.svec(map(_compress_debuginfo, edges)...) + codelocs = @ccall jl_compress_codelocs((-1)::Int32, codelocs::Any, + div(length(codelocs),3)::Csize_t)::String + Core.DebugInfo(Symbol(filename), nothing, edges, codelocs) +end + function ir_debug_info(ex) code = children(ex) - # Record low resolution locations in debug info - num_stmts = length(code) - codelocs = zeros(Int32, 3*num_stmts) - topfile = Symbol(filename(ex)) - topline,_ = source_location(ex) + e1 = first(flattened_provenance(ex)) + topfile = filename(e1) - edges = Core.DebugInfo[] - - for i in 1:num_stmts - line,_ = source_location(code[i]) - # TODO: Macro inlining stack filename(code[i]) - codelocs[3*i-2] = line - codelocs[3*i-1] = 0 # Index into edges - codelocs[3*i ] = 0 # Index into edges[linetable] + current_codelocs_stack = [(topfile, [], Vector{Int32}())] + for i in 1:length(code) + locstk = [(filename(e), source_location(e)[1]) for e in flattened_provenance(code[i])] + for j in 1:max(length(locstk), length(current_codelocs_stack)) + if j > length(locstk) || (length(current_codelocs_stack) >= j && + current_codelocs_stack[j][1] != locstk[j][1]) + while length(current_codelocs_stack) >= j + info = pop!(current_codelocs_stack) + push!(last(current_codelocs_stack)[2], info) + end + end + if j > length(locstk) + break + elseif j > length(current_codelocs_stack) + push!(current_codelocs_stack, (locstk[j][1], [], Vector{Int32}())) + end + end + for (j, (file,line)) in enumerate(locstk) + fn, edges, codelocs = current_codelocs_stack[j] + @assert fn == file + if j < length(locstk) + edge_index = length(edges) + 1 + edge_codeloc_index = fld1(length(current_codelocs_stack[j+1][3]) + 1, 3) + else + edge_index = 0 + edge_codeloc_index = 0 + end + push!(codelocs, line) + push!(codelocs, edge_index) + push!(codelocs, edge_codeloc_index) + end + end + while length(current_codelocs_stack) > 1 + info = pop!(current_codelocs_stack) + push!(last(current_codelocs_stack)[2], info) end - codelocs = @ccall jl_compress_codelocs(topline::Int32, codelocs::Any, - num_stmts::Csize_t)::String - edges = Core.svec(edges...) - Core.DebugInfo(topfile, nothing, edges, codelocs) + _compress_debuginfo(only(current_codelocs_stack)) end # Convert SyntaxTree to the CodeInfo+Expr data stuctures understood by the diff --git a/JuliaLowering/src/macro_expansion.jl b/JuliaLowering/src/macro_expansion.jl index 319310134254c..d668d5ff3465d 100644 --- a/JuliaLowering/src/macro_expansion.jl +++ b/JuliaLowering/src/macro_expansion.jl @@ -19,7 +19,6 @@ struct MacroExpansionContext{GraphType} <: AbstractLoweringContext next_var_id::Ref{VarId} scope_layers::Vector{ScopeLayer} current_layer::ScopeLayer - expansion_stack::SyntaxList{GraphType} end #-------------------------------------------------- @@ -41,6 +40,19 @@ end function expand_quote(ctx, ex) unquoted = SyntaxTree[] collect_unquoted!(ctx, unquoted, ex, 0) + # Unlike user-defined macro expansion, we don't call append_sourceref for + # the entire expression produced by `quote` expansion. We could, but it + # seems unnecessary for `quote` because the surface syntax is a transparent + # representation of the expansion process. However, it's useful to add the + # extra srcref in a more targetted way for $ interpolations inside + # interpolate_ast, so we do that there. + # + # In principle, particular user-defined macros could opt into a similar + # mechanism. + # + # TODO: Should we try adding a srcref to the `quote` node only for the + # extra syntax generated by expand_quote so srcref essentially becomes + # (ex, @HERE) ? @ast ctx ex [K"call" interpolate_ast::K"Value" [K"inert" ex] @@ -77,41 +89,22 @@ function Base.showerror(io::IO, exc::MacroExpansionError) " in module ", ctx.scope_layer.mod) end print(io, ":\n") + # FIXME: src = sourceref(exc.ex) highlight(io, src.file, first_byte(src):last_byte(src), note=exc.msg) end -function set_scope_layer!(ex, id, force) +function set_scope_layer(ctx, ex, layer_id, force) k = kind(ex) - if (k == K"Identifier" || k == K"MacroName" || (is_operator(k) && !haschildren(ex))) && - (force || !hasattr(ex, :scope_layer)) - setattr!(ex; scope_layer=id) - end -end - -function set_scope_layer_recursive!(ex, id, force) - k = kind(ex) - if k == K"module" || k == K"toplevel" - return - end - if haschildren(ex) - for c in children(ex) - set_scope_layer_recursive!(c, id, force) - end - else - set_scope_layer!(ex, id, force) - end - ex -end - -function setup_macro_argument(ctx, ex, layer) - k = kind(ex) - scope_layer = get(ex, :scope_layer, layer.id) + scope_layer = force ? layer_id : get(ex, :scope_layer, layer_id) if k == K"module" || k == K"toplevel" || k == K"inert" makenode(ctx, ex, ex, children(ex); scope_layer=scope_layer) + elseif k == K"." + makenode(ctx, ex, ex, set_scope_layer(ctx, ex[1], layer_id, force), ex[2], + scope_layer=scope_layer) elseif haschildren(ex) - mapchildren(e->setup_macro_argument(ctx, e, layer), ctx, ex; + mapchildren(e->set_scope_layer(ctx, e, layer_id, force), ctx, ex; scope_layer=scope_layer) else makeleaf(ctx, ex, ex; @@ -143,7 +136,7 @@ function expand_macro(ctx, ex) # a macro expansion. # In either case, we need to set any unset scope layers before passing the # arguments to the macro call. - macro_args = [setup_macro_argument(ctx, e, ctx.current_layer) + macro_args = [set_scope_layer(ctx, e, ctx.current_layer.id, false) for e in children(ex)[2:end]] mctx = MacroContext(ctx.graph, macname, ctx.current_layer) expanded = try @@ -160,35 +153,35 @@ function expand_macro(ctx, ex) end if expanded isa SyntaxTree - if syntax_graph(expanded) !== syntax_graph(ctx) + if !is_compatible_graph(ctx, expanded) # If the macro has produced syntax outside the macro context, copy it over. # TODO: Do we expect this always to happen? What is the API for access # to the macro expansion context? - # - # TODO: Can expand_forms_1() do the copying? expanded = copy_ast(ctx, expanded) end + expanded = append_sourceref(ctx, expanded, ex) new_layer = ScopeLayer(length(ctx.scope_layers)+1, parentmodule(macfunc), true) push!(ctx.scope_layers, new_layer) - push!(ctx.expansion_stack, ex) - inner_ctx = MacroExpansionContext(ctx.graph, ctx.next_var_id, ctx.scope_layers, - new_layer, ctx.expansion_stack) + inner_ctx = MacroExpansionContext(ctx.graph, ctx.next_var_id, ctx.scope_layers, new_layer) expanded = expand_forms_1(inner_ctx, expanded) - pop!(ctx.expansion_stack) else - # To get `srcref` correct it's simplest to have a special case here for - # when the macro returns a non-AST. - srcref = (ex.id, Iterators.reverse(ctx.expansion_stack.ids)...) - expanded = @ast ctx srcref expanded::K"Value" + expanded = @ast ctx ex expanded::K"Value" end return expanded end -function macro_stack_srcref(ctx, ex) - if ctx.current_layer.is_macro_expansion - (ex.id, Iterators.reverse(ctx.expansion_stack.ids)...) +# Add a secondary source of provenance to each expression in the tree `ex`. +function append_sourceref(ctx, ex, secondary_prov) + srcref = (ex, secondary_prov) + if haschildren(ex) + if kind(ex) == K"macrocall" + makenode(ctx, srcref, ex, children(ex)...) + else + makenode(ctx, srcref, ex, + map(e->append_sourceref(ctx, e, secondary_prov), children(ex))...) + end else - ex.id + makeleaf(ctx, srcref, ex) end end @@ -208,11 +201,11 @@ need to be dealt with before other lowering. function expand_forms_1(ctx::MacroExpansionContext, ex::SyntaxTree) k = kind(ex) if k == K"Identifier" && all(==('_'), ex.name_val) - @ast ctx macro_stack_srcref(ctx,ex) ex=>K"Placeholder" + @ast ctx ex ex=>K"Placeholder" elseif k == K"Identifier" || k == K"MacroName" || (is_operator(k) && !haschildren(ex)) # <- TODO: fix upstream layerid = get(ex, :scope_layer, ctx.current_layer.id) - makeleaf(ctx, macro_stack_srcref(ctx,ex), ex, kind=K"Identifier", scope_layer=layerid) + makeleaf(ctx, ex, ex, kind=K"Identifier", scope_layer=layerid) elseif k == K"var" || k == K"char" || k == K"parens" # Strip "container" nodes @chk numchildren(ex) == 1 @@ -223,37 +216,27 @@ function expand_forms_1(ctx::MacroExpansionContext, ex::SyntaxTree) elseif k == K"macrocall" expand_macro(ctx, ex) elseif k == K"module" || k == K"toplevel" || k == K"inert" - # FIXME - makenode(ctx, macro_stack_srcref(ctx,ex), ex, children(ex)) + ex elseif !haschildren(ex) - makeleaf(ctx, macro_stack_srcref(ctx,ex), ex) + ex else - mapchildren(e->expand_forms_1(ctx,e), ctx, ex; source=macro_stack_srcref(ctx,ex)) - end -end - -function expand_forms_1(ctx::MacroExpansionContext, exs::Union{Tuple,AbstractVector}) - res = SyntaxList(ctx) - for e in exs - push!(res, expand_forms_1(ctx, e)) + mapchildren(e->expand_forms_1(ctx,e), ctx, ex) end - res end function expand_forms_1(mod::Module, ex::SyntaxTree) graph = ensure_attributes(syntax_graph(ex), var_id=VarId, scope_layer=LayerId, - macro_expansion=ScopeLayer, __macro_ctx__=Nothing) layers = ScopeLayer[ScopeLayer(1, mod, false)] - ctx = MacroExpansionContext(graph, Ref{VarId}(1), layers, layers[1], SyntaxList(graph)) + ctx = MacroExpansionContext(graph, Ref{VarId}(1), layers, layers[1]) ex2 = expand_forms_1(ctx, reparent(ctx, ex)) graph2 = delete_attributes(graph, :__macro_ctx__) # TODO: Returning the context with pass-specific mutable data is a bad way # to carry state into the next pass. ctx2 = MacroExpansionContext(graph2, ctx.next_var_id, ctx.scope_layers, - ctx.current_layer, SyntaxList(graph2)) + ctx.current_layer) return ctx2, reparent(ctx2, ex2) end diff --git a/JuliaLowering/src/runtime.jl b/JuliaLowering/src/runtime.jl index c2561b29b2033..0af8e339a9d59 100644 --- a/JuliaLowering/src/runtime.jl +++ b/JuliaLowering/src/runtime.jl @@ -23,11 +23,14 @@ function _contains_active_interp(ex, depth) end # Produce interpolated node for `$x` syntax -function _interpolated_value(ctx, srcref, x) - if x isa SyntaxTree - x.graph === ctx.graph ? x : copy_ast(ctx, x) +function _interpolated_value(ctx, srcref, ex) + if ex isa SyntaxTree + if !is_compatible_graph(ctx, ex) + ex = copy_ast(ctx, ex) + end + append_sourceref(ctx, ex, srcref) else - makeleaf(ctx, srcref, K"Value", x) + makeleaf(ctx, srcref, K"Value", ex) end end @@ -47,7 +50,7 @@ function _interpolate_ast(ctx::InterpolationContext, ex, depth) vals = ctx.values[ctx.current_index[]]::Tuple ctx.current_index[] += 1 for (i,v) in enumerate(vals) - srcref = numchildren(e) == 1 ? e[1] : e[i] + srcref = numchildren(e) == 1 ? e : e[i] push!(expanded_children, _interpolated_value(ctx, srcref, v)) end else @@ -78,9 +81,8 @@ function interpolate_ast(ex, values...) end end if isnothing(graph) - graph = SyntaxGraph() - ensure_attributes!(graph, kind=Kind, syntax_flags=UInt16, source=SourceAttrType, - value=Any, name_val=String, scope_layer=LayerId) + graph = ensure_attributes(SyntaxGraph(), kind=Kind, syntax_flags=UInt16, source=SourceAttrType, + value=Any, name_val=String, scope_layer=LayerId) end ctx = InterpolationContext(graph, values, Ref(1)) # We must copy the AST into our context to use it as the source reference @@ -89,13 +91,6 @@ function interpolate_ast(ex, values...) _interpolate_ast(ctx, ex1, 0) end - -# Produce node corresponding to `srcref` when there was an interpolation among -# `children` -function interpolate_node(ctx::InterpolationContext, srcref, children...) - makenode(ctx, sourceref(srcref), head(srcref), children...) -end - # Construct new bare module including only the "default names" # # using Core diff --git a/JuliaLowering/src/syntax_graph.jl b/JuliaLowering/src/syntax_graph.jl index 4133401fc2516..c30e2536b9f3d 100644 --- a/JuliaLowering/src/syntax_graph.jl +++ b/JuliaLowering/src/syntax_graph.jl @@ -27,6 +27,10 @@ function _show_attrs(io, attributes::NamedTuple) show(io, MIME("text/plain"), Dict(pairs(attributes)...)) end +function attrnames(graph::SyntaxGraph) + keys(graph.attributes) +end + function Base.show(io::IO, ::MIME"text/plain", graph::SyntaxGraph) print(io, typeof(graph), " with $(length(graph.edge_ranges)) vertices, $(length(graph.edges)) edges, and attributes:\n") @@ -113,7 +117,7 @@ function hasattr(graph::SyntaxGraph, name::Symbol) getattr(graph, name, nothing) !== nothing end -# FIXME: Probably terribly non-inferrable? +# TODO: Probably terribly non-inferrable? function setattr!(graph::SyntaxGraph, id; attrs...) for (k,v) in pairs(attrs) getattr(graph, k)[id] = v @@ -121,7 +125,7 @@ function setattr!(graph::SyntaxGraph, id; attrs...) end function Base.getproperty(graph::SyntaxGraph, name::Symbol) - # FIXME: Remove access to internals + # TODO: Remove access to internals? name === :edge_ranges && return getfield(graph, :edge_ranges) name === :edges && return getfield(graph, :edges) name === :attributes && return getfield(graph, :attributes) @@ -171,7 +175,6 @@ syntax_graph(graph::SyntaxGraph) = graph function check_same_graph(x, y) if syntax_graph(x) !== syntax_graph(y) - @info "" syntax_graph(x) syntax_graph(y) x y error("Mismatching syntax graphs") end end @@ -187,7 +190,7 @@ struct SyntaxTree{GraphType} end function Base.getproperty(tree::SyntaxTree, name::Symbol) - # FIXME: Remove access to internals + # TODO: Remove access to internals? name === :graph && return getfield(tree, :graph) name === :id && return getfield(tree, :id) id = getfield(tree, :id) @@ -302,15 +305,28 @@ function Base.show(io::IO, ::MIME"text/plain", src::SourceRef) highlight(io, src; note="these are the bytes you're looking for 😊", context_lines_inner=20) end + +function provenance(ex::SyntaxTree) + s = ex.source + if s isa NodeId + return (SyntaxTree(ex.graph, s),) + elseif s isa Tuple + return SyntaxTree.((ex.graph,), s) + else + return (s,) + end +end + + function _sourceref(sources, id) i = 1 while true i += 1 - s = get(sources, id, nothing) + s = sources[id] if s isa NodeId id = s else - return s + return s, id end end end @@ -319,7 +335,7 @@ function sourceref(tree::SyntaxTree) sources = tree.graph.source id::NodeId = tree.id while true - s = _sourceref(sources, id) + s, _ = _sourceref(sources, id) if s isa Tuple s = s[1] end @@ -331,40 +347,25 @@ function sourceref(tree::SyntaxTree) end end -function show_expansion_stack(io::IO, exs) - for (i,ex) in enumerate(exs) - sr = sourceref(ex) - if i > 1 - JuliaSyntax._printstyled(io, "\n\n", fgcolor=:light_black) - first = false +function _flattened_provenance(refs, graph, sources, id) + # TODO: Implement in terms of `provenance()`? + s, id2 = _sourceref(sources, id) + if s isa Tuple + for i in s + _flattened_provenance(refs, graph, sources, i) end - note = length(exs) == 1 ? "in source here" : - i > 1 ? "expanded from here" : "in macro expansion" - highlight(io, sr, note=note) + else + push!(refs, SyntaxTree(graph, id2)) end end -function expansion_stack(tree::SyntaxTree) - sources = tree.graph.source - id::NodeId = tree.id - while true - s = get(sources, id, nothing) - if s isa NodeId - id = s - else - refs = SyntaxList(tree) - if s isa Tuple - for i in s - push!(refs, SyntaxTree(tree.graph, i)) - end - else - push!(refs, SyntaxTree(tree.graph, id)) - end - return refs - end - end +function flattened_provenance(ex::SyntaxTree) + refs = SyntaxList(ex) + _flattened_provenance(refs, ex.graph, ex.graph.source, ex.id) + return reverse(refs) end + function is_ancestor(ex, ancestor) if !is_compatible_graph(ex, ancestor) return false @@ -433,51 +434,30 @@ function _value_string(ex) return str end -function _show_syntax_tree(io, current_filename, node, indent, show_byte_offsets) - sr = sourceref(node) - if !isnothing(sr) - fname = filename(sr) - line, col = source_location(sr) - posstr = "$(lpad(line, 4)):$(rpad(col,3))" - if show_byte_offsets - posstr *= "│$(lpad(first_byte(sr),6)):$(rpad(last_byte(sr),6))" - end - else - fname = nothing - posstr = " " - if show_byte_offsets - posstr *= "│ " - end - end - val = get(node, :value, nothing) - nodestr = haschildren(node) ? "[$(untokenize(head(node)))]" : _value_string(node) +function _show_syntax_tree(io, ex, indent) + val = get(ex, :value, nothing) + nodestr = haschildren(ex) ? "[$(untokenize(head(ex)))]" : _value_string(ex) treestr = string(indent, nodestr) std_attrs = Set([:name_val,:value,:kind,:syntax_flags,:source,:var_id]) - attrstr = join([attrsummary(n, getproperty(node, n)) for n in attrnames(node) if n ∉ std_attrs], ",") - if !isempty(attrstr) - treestr = string(rpad(treestr, 40), "│ $attrstr") - end + attrstr = join([attrsummary(n, getproperty(ex, n)) + for n in attrnames(ex) if n ∉ std_attrs], ",") + treestr = string(rpad(treestr, 40), "│ $attrstr") - # Add filename if it's changed from the previous node - if fname != current_filename[] && !isnothing(fname) - #println(io, "# ", fname) - treestr = string(rpad(treestr, 80), "│$fname") - current_filename[] = fname - end - println(io, posstr, "│", treestr) - if haschildren(node) + println(io, treestr) + if haschildren(ex) new_indent = indent*" " - for n in children(node) - _show_syntax_tree(io, current_filename, n, new_indent, show_byte_offsets) + for n in children(ex) + _show_syntax_tree(io, n, new_indent) end end end -function Base.show(io::IO, ::MIME"text/plain", tree::SyntaxTree; show_byte_offsets=false) - println(io, "line:col│ tree │ attributes | file_name") - _show_syntax_tree(io, Ref{Union{Nothing,String}}(nothing), tree, "", show_byte_offsets) +function Base.show(io::IO, ::MIME"text/plain", ex::SyntaxTree) + anames = join(string.(attrnames(syntax_graph(ex))), ",") + println(io, "SyntaxTree with attributes $anames") + _show_syntax_tree(io, ex, "") end function _show_syntax_tree_sexpr(io, ex) diff --git a/JuliaLowering/src/utils.jl b/JuliaLowering/src/utils.jl index ebd97cfabd5a0..a6e8888d89bd8 100644 --- a/JuliaLowering/src/utils.jl +++ b/JuliaLowering/src/utils.jl @@ -12,7 +12,56 @@ end function Base.showerror(io::IO, exc::LoweringError) print(io, "LoweringError:\n") + # FIXME src = sourceref(exc.ex) highlight(io, src; note=exc.msg) end +#------------------------------------------------------------------------------- +function _show_provtree(io::IO, ex::SyntaxTree, indent) + print(io, ex, "\n") + prov = provenance(ex) + for (i, e) in enumerate(prov) + islast = i == length(prov) + printstyled(io, "$indent$(islast ? "└─ " : "├─ ")", color=:light_black) + inner_indent = indent * (islast ? " " : "│ ") + _show_provtree(io, e, inner_indent) + end +end + +function _show_provtree(io::IO, prov, indent) + fn = filename(prov) + line, _ = source_location(prov) + printstyled(io, "@ $fn:$line\n", color=:light_black) +end + +function showprov(io::IO, exs::Vector) + for (i,ex) in enumerate(Iterators.reverse(exs)) + sr = sourceref(ex) + if i > 1 + print(io, "\n\n") + end + k = kind(ex) + note = i > 1 && k == K"macrocall" ? "in macro expansion" : + i > 1 && k == K"$" ? "interpolated here" : + "in source" + highlight(io, sr, note=note) + + line, _ = source_location(sr) + locstr = "$(filename(sr)):$line" + JuliaSyntax._printstyled(io, "\n# @ $locstr", fgcolor=:light_black) + end +end + +function showprov(io::IO, ex::SyntaxTree; tree=false) + if tree + _show_provtree(io, ex, "") + else + showprov(io, flattened_provenance(ex)) + end +end + +function showprov(x; kws...) + showprov(stdout, x; kws...) +end + diff --git a/JuliaLowering/test/demo.jl b/JuliaLowering/test/demo.jl index 098fefd3634a3..4659fab093b91 100644 --- a/JuliaLowering/test/demo.jl +++ b/JuliaLowering/test/demo.jl @@ -3,7 +3,7 @@ using JuliaSyntax using JuliaLowering -using JuliaLowering: SyntaxGraph, SyntaxTree, ensure_attributes!, ensure_attributes, newnode!, setchildren!, haschildren, children, child, setattr!, sourceref, makenode, sourcetext +using JuliaLowering: SyntaxGraph, SyntaxTree, ensure_attributes!, ensure_attributes, newnode!, setchildren!, haschildren, children, child, setattr!, sourceref, makenode, sourcetext, showprov using JuliaSyntaxFormatter @@ -47,17 +47,6 @@ end # end # """ -src = """ -let - y = 0 - x = 1 - let x = x + 1 - y = x - end - (x, y) -end -""" - src = """ begin function f(x) @@ -98,16 +87,16 @@ end # z = 1 + 1 # end # """ -# -# src = """ -# begin -# x = 10 -# y = :(g(z)) -# quote -# f(\$(x+1), \$y) -# end -# end -# """ + +src = raw""" +begin + x = 10 + y = :(g(z)) + quote + f($(x+1), $y) + end +end +""" JuliaLowering.include(Main, "demo_include.jl") @@ -186,13 +175,6 @@ end # src = """@foo z""" -src = """ -begin - x = 42 - M.@foo x -end -""" - # src = """ # M.@recursive 3 # """ @@ -235,13 +217,34 @@ end # src = """ # _ = -1 # """ -src = """ -M.@make_module -""" -src = """ -M.@nested_return_a_value -""" +# src = """ +# M.@make_module +# """ + +# src = """ +# M.@nested_return_a_value +# """ + +# src = """ +# function f(y) +# x = 42 + y +# M.@foo error(x) +# end +# """ + +# src = """ +# let +# y = 0 +# x = 1 +# let x = x + 1 +# y = x +# end +# (x, y) +# end +# """ + +#src = """M.@outer""" ex = parsestmt(SyntaxTree, src, filename="foo.jl") ex = ensure_attributes(ex, var_id=Int) @@ -250,17 +253,17 @@ ex = ensure_attributes(ex, var_id=Int) in_mod = Main ctx1, ex_macroexpand = JuliaLowering.expand_forms_1(in_mod, ex) -# @info "Macro expanded" formatsrc(ex_macroexpand, color_by=:scope_layer) -@info "Macro expanded" formatsrc(ex_macroexpand, color_by=e->JuliaLowering.expansion_stack(e)[2:end]) +@info "Macro expanded" ex_macroexpand formatsrc(ex_macroexpand, color_by=:scope_layer) +#@info "Macro expanded" formatsrc(ex_macroexpand, color_by=e->JuliaLowering.flattened_provenance(e)[1:end-1]) ctx2, ex_desugar = JuliaLowering.expand_forms_2(ctx1, ex_macroexpand) -@info "Desugared" formatsrc(ex_desugar, color_by=:scope_layer) +@info "Desugared" ex_desugar formatsrc(ex_desugar, color_by=:scope_layer) ctx3, ex_scoped = JuliaLowering.resolve_scopes!(ctx2, ex_desugar) -@info "Resolved scopes" formatsrc(ex_scoped, color_by=:var_id) +@info "Resolved scopes" ex_scoped formatsrc(ex_scoped, color_by=:var_id) ctx4, ex_compiled = JuliaLowering.linearize_ir(ctx3, ex_scoped) -@info "Linear IR" formatsrc(ex_compiled, color_by=:var_id) +@info "Linear IR" ex_compiled formatsrc(ex_compiled, color_by=:var_id) ex_expr = JuliaLowering.to_lowered_expr(in_mod, ctx4.var_info, ex_compiled) @info "CodeInfo" ex_expr diff --git a/JuliaLowering/test/demo_include.jl b/JuliaLowering/test/demo_include.jl index cab5131b4bc94..9838111522169 100644 --- a/JuliaLowering/test/demo_include.jl +++ b/JuliaLowering/test/demo_include.jl @@ -15,16 +15,8 @@ module M JuliaLowering.source_location(__context__.macroname)[1] end - module A - another_global = "global in A" - - macro bar(ex) - quote - x = "`x` in @bar" - (x, another_global, $ex) - end - end - end + # Macro with local variables + JuliaLowering.include(M, "demo_include_2.jl") someglobal = "global in module M" @@ -33,6 +25,7 @@ module M quote x = "`x` from @foo" (x, someglobal, A.@bar $ex) + #(x, someglobal, $ex, A.@bar($ex), A.@bar(x)) end end @@ -72,4 +65,12 @@ module M @return_a_value ) end + + macro inner() + :(2) + end + + macro outer() + :((1, @inner)) + end end diff --git a/JuliaLowering/test/demo_include_2.jl b/JuliaLowering/test/demo_include_2.jl new file mode 100644 index 0000000000000..ee084f9b29c29 --- /dev/null +++ b/JuliaLowering/test/demo_include_2.jl @@ -0,0 +1,12 @@ + + module A + another_global = "global in A" + + macro bar(ex) + quote + x = "`x` in @bar" + (x, another_global, $ex) + end + end + end + diff --git a/JuliaLowering/test/runtests.jl b/JuliaLowering/test/runtests.jl index ab18ecc91aeae..0c21f4e3e2b5f 100644 --- a/JuliaLowering/test/runtests.jl +++ b/JuliaLowering/test/runtests.jl @@ -3,7 +3,7 @@ using Test using JuliaLowering using JuliaSyntax using JuliaSyntax: sourcetext -using JuliaLowering: @ast +using JuliaLowering: @ast, flattened_provenance, showprov include("utils.jl") @@ -173,8 +173,32 @@ end ] ] @test sourcetext(ex[1]) == "f(\$(x+1), \$y)" -@test sourcetext(ex[1][2]) == "x+1" -@test sourcetext(ex[1][3]) == "g(z)" +@test sourcetext(ex[1][2]) == "\$(x+1)" +@test sourcetext.(flattened_provenance(ex[1][3])) == ["\$y", "g(z)"] +@test sprint(io->showprov(io, ex[1][3], tree=true)) == raw""" + (call g z) + ├─ (call g z) + │ └─ @ string:3 + └─ ($ y) + └─ @ string:5 + """ +@test sprint(io->showprov(io, ex[1][3])) == raw""" + begin + x = 10 + y = :(g(z)) + # └──┘ ── in source + quote + f($(x+1), $y) + # @ string:3 + + y = :(g(z)) + quote + f($(x+1), $y) + # └┘ ── interpolated here + end + end + # @ string:5""" + # Test expression flags are preserved during interpolation @test JuliaSyntax.is_infix_op_call(JuliaLowering.include_string(test_mod, """ @@ -278,6 +302,14 @@ module M end end + macro inner() + :(2) + end + + macro outer() + :((1, @inner)) + end + # # Recursive macro call # # TODO: Need branching! # macro recursive(N) @@ -351,6 +383,17 @@ end M.@recursive 3 """) == (3, (2, (1, 0))) +@test let + ex = parsestmt(SyntaxTree, "M.@outer()", filename="foo.jl") + expanded = JuliaLowering.macroexpand(test_mod, ex) + sourcetext.(flattened_provenance(expanded[2])) +end == [ + "M.@outer()" + "@inner" + "2" +] + + @test_throws JuliaLowering.LoweringError JuliaLowering.include_string(test_mod, """ macro mmm(a; b=2) end From 4afe2cea5e85cd3eea0d6ec7d933381d5a38dcc6 Mon Sep 17 00:00:00 2001 From: Nathan Zimmerberg <39104088+nhz2@users.noreply.github.com> Date: Sat, 15 Jun 2024 23:22:03 -0400 Subject: [PATCH 0761/1109] Update `Base.peek` docstring When looking at the help for `peek` in the repl, the current docstring looks like it applies to all `IO`, so I added `::ParseStream` to make it more specific. See also https://github.com/JuliaLang/julia/issues/54749 --- JuliaSyntax/src/parse_stream.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index dcbb52aff665b..412d02b4c3be1 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -475,7 +475,7 @@ end end """ - peek(stream [, n=1]; skip_newlines=false) + peek(stream::ParseStream [, n=1]; skip_newlines=false) Look ahead in the stream `n` tokens, returning the token kind. Comments and non-newline whitespace are skipped automatically. Whitespace containing a From 59120fbe322eee1d00fe6f21ae69af600ff1070a Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Fri, 21 Jun 2024 16:34:33 +1000 Subject: [PATCH 0762/1109] Implement branching in desugaring and in IR flattening Desugar * `&&` and `||` chains * Ternary `a ? b : c` notation * Conditionals with chains of `&&` and `||` IR generation for `if ... elseif ... else ... end`, including all rules for conditionals including blocks. --- JuliaLowering/src/ast.jl | 4 +- JuliaLowering/src/desugaring.jl | 62 +++++++- JuliaLowering/src/eval.jl | 6 +- JuliaLowering/src/kinds.jl | 3 + JuliaLowering/src/linear_ir.jl | 145 ++++++++++++++++-- JuliaLowering/src/macro_expansion.jl | 7 +- JuliaLowering/src/syntax_graph.jl | 1 + JuliaLowering/src/utils.jl | 4 +- JuliaLowering/test/branching.jl | 212 +++++++++++++++++++++++++++ JuliaLowering/test/demo.jl | 45 ++---- JuliaLowering/test/demo_include.jl | 54 ++++++- JuliaLowering/test/runtests.jl | 2 + JuliaLowering/test/utils.jl | 7 + 13 files changed, 500 insertions(+), 52 deletions(-) create mode 100644 JuliaLowering/test/branching.jl diff --git a/JuliaLowering/src/ast.jl b/JuliaLowering/src/ast.jl index edd5271828f09..f1d7074658a39 100644 --- a/JuliaLowering/src/ast.jl +++ b/JuliaLowering/src/ast.jl @@ -67,7 +67,8 @@ function makeleaf(ctx, srcref, k::Kind, value; kws...) graph = syntax_graph(ctx) if k == K"Identifier" || k == K"core" || k == K"top" || k == K"Symbol" || k == K"globalref" makeleaf(graph, srcref, k; name_val=value, kws...) - elseif k == K"SSAValue" + elseif k == K"SSAValue" || k == K"label" + # FIXME? makeleaf(graph, srcref, k; var_id=value, kws...) else val = k == K"Integer" ? convert(Int, value) : @@ -75,6 +76,7 @@ function makeleaf(ctx, srcref, k::Kind, value; kws...) k == K"String" ? convert(String, value) : k == K"Char" ? convert(Char, value) : k == K"Value" ? value : + k == K"Bool" ? value : error("Unexpected leaf kind `$k`") makeleaf(graph, srcref, k; value=val, kws...) end diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 984b298d9e58b..297ba91f29bf9 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -26,13 +26,39 @@ function DesugaringContext(ctx) DesugaringContext(graph, ctx.next_var_id, ctx.scope_layers, ctx.current_layer.mod) end +# Flatten nested && or || nodes and expand their children +function expand_cond_children(ctx, ex, cond_kind=kind(ex), flat_children=SyntaxList(ctx)) + for e in children(ex) + if kind(e) == cond_kind + expand_cond_children(ctx, e, cond_kind, flat_children) + else + push!(flat_children, expand_forms_2(ctx, e)) + end + end + flat_children +end + +# Expand condition in, eg, `if` or `while` function expand_condition(ctx, ex) - if head(ex) == K"block" || head(ex) == K"||" || head(ex) == K"&&" - # || and && get special lowering so that they compile directly to jumps - # rather than first computing a bool and then jumping. - error("TODO expand_condition") + isblock = kind(ex) == K"block" + test = isblock ? ex[end] : ex + k = kind(test) + if k == K"&&" || k == K"||" + # `||` and `&&` get special lowering so that they compile directly to + # jumps rather than first computing a bool and then jumping. + cs = expand_cond_children(ctx, test) + @assert length(cs) > 1 + test = makenode(ctx, test, k, cs) + else + test = expand_forms_2(ctx, test) + end + if isblock + # Special handling so that the rules for `&&` and `||` can be applied + # to the last statement of a block + @ast ctx ex [K"block" ex[1:end-1]... test] + else + test end - expand_forms_2(ctx, ex) end function expand_let(ctx, ex) @@ -549,6 +575,26 @@ function expand_forms_2(ctx::DesugaringContext, ex::SyntaxTree, docs=nothing) ex[2]=>K"Symbol" ] ) + elseif k == K"?" + @chk numchildren(ex) == 3 + expand_forms_2(ctx, @ast ctx ex [K"if" children(ex)...]) + elseif k == K"&&" || k == K"||" + @chk numchildren(ex) > 1 + cs = expand_cond_children(ctx, ex) + # Attributing correct provenance for `cs[1:end-1]` is tricky in cases + # like `a && (b && c)` because the expression constructed here arises + # from the source fragment `a && (b` which doesn't follow the tree + # structure. For now we attribute to the parent node. + cond = length(cs) == 2 ? + cs[1] : + makenode(ctx, ex, k, cs[1:end-1]) + # This transformation assumes the type assertion `cond::Bool` will be + # added by a later pass. + if k == K"&&" + @ast ctx ex [K"if" cond cs[end] false::K"Bool"] + else + @ast ctx ex [K"if" cond true::K"Bool" cs[end]] + end elseif k == K"doc" @chk numchildren(ex) == 2 sig = expand_forms_2(ctx, ex[2], ex) @@ -556,6 +602,12 @@ function expand_forms_2(ctx::DesugaringContext, ex::SyntaxTree, docs=nothing) expand_forms_2(ctx, expand_function_def(ctx, ex, docs)) elseif k == K"macro" expand_forms_2(ctx, expand_macro_def(ctx, ex)) + elseif k == K"if" || k == K"elseif" + @chk numchildren(ex) >= 2 + @ast ctx ex [k + expand_condition(ctx, ex[1]) + expand_forms_2(ctx, ex[2:end])... + ] elseif k == K"let" expand_forms_2(ctx, expand_let(ctx, ex)) elseif k == K"local" || k == K"global" diff --git a/JuliaLowering/src/eval.jl b/JuliaLowering/src/eval.jl index 15b0c99cd58ff..e61fb0bfaf4fc 100644 --- a/JuliaLowering/src/eval.jl +++ b/JuliaLowering/src/eval.jl @@ -178,7 +178,7 @@ end function to_lowered_expr(mod, var_info, ex) k = kind(ex) - if is_literal(k) + if is_literal(k) || k == K"Bool" ex.value elseif k == K"core" GlobalRef(Core, Symbol(ex.name_val)) @@ -223,6 +223,10 @@ function to_lowered_expr(mod, var_info, ex) end elseif k == K"Value" ex.value + elseif k == K"goto" + Core.GotoNode(ex[1].var_id) + elseif k == K"gotoifnot" + Core.GotoIfNot(to_lowered_expr(mod, var_info, ex[1]), ex[2].var_id) else # Allowed forms according to https://docs.julialang.org/en/v1/devdocs/ast/ # diff --git a/JuliaLowering/src/kinds.jl b/JuliaLowering/src/kinds.jl index c68348f51d835..647cfbecd4e3c 100644 --- a/JuliaLowering/src/kinds.jl +++ b/JuliaLowering/src/kinds.jl @@ -10,6 +10,9 @@ function _insert_kinds() # A literal Julia value of any kind, as might be inserted by the AST # during macro expansion "Value" + # TODO: Emit "true" and "false" tokens as K"Bool" in parser to + # harmonize with K"Int" etc? + "Bool" # An identifier composed entirely of underscores "Placeholder" # A (quoted) `Symbol` diff --git a/JuliaLowering/src/linear_ir.jl b/JuliaLowering/src/linear_ir.jl index 39d2c85de86de..fffd30e48fbfb 100644 --- a/JuliaLowering/src/linear_ir.jl +++ b/JuliaLowering/src/linear_ir.jl @@ -32,15 +32,17 @@ struct LinearIRContext{GraphType} <: AbstractLoweringContext graph::GraphType code::SyntaxList{GraphType, Vector{NodeId}} next_var_id::Ref{Int} + next_label_id::Ref{Int} is_toplevel_thunk::Bool + lambda_locals::Set{VarId} return_type::Union{Nothing,NodeId} var_info::Dict{VarId,VarInfo} mod::Module end -function LinearIRContext(ctx, is_toplevel_thunk, return_type) - LinearIRContext(ctx.graph, SyntaxList(ctx.graph), ctx.next_var_id, - is_toplevel_thunk, return_type, ctx.var_info, ctx.mod) +function LinearIRContext(ctx, is_toplevel_thunk, lambda_locals, return_type) + LinearIRContext(ctx.graph, SyntaxList(ctx.graph), ctx.next_var_id, Ref(0), + is_toplevel_thunk, lambda_locals, return_type, ctx.var_info, ctx.mod) end function is_valid_body_ir_argument(ex) @@ -109,7 +111,7 @@ end # Emit computation of ex, assigning the result to an ssavar and returning that function emit_assign_tmp(ctx::LinearIRContext, ex) # TODO: We could replace this with an index into the code array right away? - tmp = makenode(ctx, ex, K"SSAValue", var_id=ctx.next_var_id[]) + tmp = makeleaf(ctx, ex, K"SSAValue", var_id=ctx.next_var_id[]) ctx.next_var_id[] += 1 emit(ctx, ex, K"=", tmp, ex) return tmp @@ -145,6 +147,79 @@ function emit_assignment(ctx, srcref, lhs, rhs) end end +function make_label(ctx, srcref) + id = ctx.next_label_id[] + ctx.next_label_id[] += 1 + makeleaf(ctx, srcref, K"label", id) +end + +# flisp: make&mark-label +function emit_label(ctx, srcref) + if !isempty(ctx.code) + # Use current label if available + e = ctx.code[end] + if kind(e) == K"label" + return e + end + end + l = make_label(ctx, srcref) + emit(ctx, l) + l +end + +function compile_condition_term(ctx, ex) + cond = compile(ctx, ex, true, false) + if !is_valid_body_ir_argument(cond) + cond = emit_assign_tmp(ctx, cond) + end + return cond +end + +# flisp: emit-cond +function compile_conditional(ctx, ex, false_label) + if kind(ex) == K"block" + for i in 1:numchildren(ex)-1 + compile(ctx, ex[i], false, false) + end + test = ex[end] + else + test = ex + end + k = kind(test) + if k == K"||" + true_label = make_label(ctx, test) + for (i,e) in enumerate(children(test)) + c = compile_condition_term(ctx, e) + if i < numchildren(test) + next_term_label = make_label(ctx, test) + # Jump over short circuit + emit(ctx, @ast ctx e [K"gotoifnot" c next_term_label]) + # Short circuit to true + emit(ctx, @ast ctx e [K"goto" true_label]) + emit(ctx, next_term_label) + else + emit(ctx, @ast ctx e [K"gotoifnot" c false_label]) + end + end + emit(ctx, true_label) + elseif k == K"&&" + for e in children(test) + c = compile_condition_term(ctx, e) + emit(ctx, @ast ctx e [K"gotoifnot" c false_label]) + end + else + c = compile_condition_term(ctx, test) + emit(ctx, @ast ctx test [K"gotoifnot" c false_label]) + end +end + +function new_mutable_var(ctx, srcref, name) + id = new_var_id(ctx) + ctx.var_info[id] = VarInfo(name, nothing, :local, false, false) + push!(ctx.lambda_locals, id) + makeleaf(ctx, srcref, K"Identifier", name, var_id=id) +end + # This pass behaves like an interpreter on the given code. # To perform stateful operations, it calls `emit` to record that something # needs to be done. In value position, it returns an expression computing @@ -154,7 +229,8 @@ end function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) k = kind(ex) if k == K"Identifier" || is_literal(k) || k == K"SSAValue" || k == K"quote" || k == K"inert" || - k == K"top" || k == K"core" || k == K"Value" || k == K"Symbol" || k == K"Placeholder" + k == K"top" || k == K"core" || k == K"Value" || k == K"Symbol" || k == K"Placeholder" || + k == K"Bool" # TODO: other kinds: copyast the_exception $ globalref outerref thismodule cdecl stdcall fastcall thiscall llvmcall if needs_value && k == K"Placeholder" # TODO: ensure outterref, globalref work here @@ -220,6 +296,46 @@ function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) else nothing end + elseif k == K"if" || k == K"elseif" + @chk numchildren(ex) <= 3 + has_else = numchildren(ex) > 2 + else_label = make_label(ctx, ex) + compile_conditional(ctx, ex[1], else_label) + if in_tail_pos + compile(ctx, ex[2], needs_value, in_tail_pos) + emit(ctx, else_label) + if has_else + compile(ctx, ex[3], needs_value, in_tail_pos) + else + emit_return(ctx, ex, nothing_(ctx, ex)) + end + nothing + else + val = needs_value && new_mutable_var(ctx, ex, "if_val") + v1 = compile(ctx, ex[2], needs_value, in_tail_pos) + if needs_value + emit_assignment(ctx, ex, val, v1) + end + if has_else || needs_value + end_label = make_label(ctx, ex) + emit(ctx, @ast ctx ex [K"goto" end_label]) + else + end_label = nothing + end + emit(ctx, else_label) + v2 = if has_else + compile(ctx, ex[3], needs_value, in_tail_pos) + elseif needs_value + nothing_(ctx, ex) + end + if needs_value + emit_assignment(ctx, ex, val, v2) + end + if !isnothing(end_label) + emit(ctx, end_label) + end + val + end elseif k == K"method" # TODO # throw(LoweringError(ex, @@ -299,8 +415,17 @@ function _renumber(ctx, ssa_rewrites, slot_rewrites, label_table, ex) ex elseif k == K"SSAValue" makeleaf(ctx, ex, K"SSAValue"; var_id=ssa_rewrites[ex.var_id]) - elseif k == K"goto" || k == K"enter" || k == K"gotoifnot" + elseif k == K"enter" TODO(ex, "_renumber $k") + elseif k == K"goto" + @ast ctx ex [K"goto" + label_table[ex[1].var_id]::K"label" + ] + elseif k == K"gotoifnot" + @ast ctx ex [K"gotoifnot" + _renumber(ctx, ssa_rewrites, slot_rewrites, label_table, ex[1]) + label_table[ex[2].var_id]::K"label" + ] elseif k == K"lambda" ex else @@ -316,7 +441,7 @@ end function renumber_body(ctx, input_code, slot_rewrites) # Step 1: Remove any assignments to SSA variables, record the indices of labels ssa_rewrites = Dict{VarId,VarId}() - label_table = Dict{String,Int}() + label_table = Dict{Int,Int}() code = SyntaxList(ctx) for ex in input_code k = kind(ex) @@ -332,7 +457,7 @@ function renumber_body(ctx, input_code, slot_rewrites) ex_out = ex[2] end elseif k == K"label" - label_table[ex.name_val] = length(code) + 1 + label_table[ex.var_id] = length(code) + 1 else ex_out = ex end @@ -373,7 +498,7 @@ function compile_lambda(outer_ctx, ex) lambda_info = ex.lambda_info return_type = nothing # FIXME # TODO: Add assignments for reassigned arguments to body using lambda_info.args - ctx = LinearIRContext(outer_ctx, lambda_info.is_toplevel_thunk, return_type) + ctx = LinearIRContext(outer_ctx, lambda_info.is_toplevel_thunk, ex.lambda_locals, return_type) compile_body(ctx, ex[1]) slot_rewrites = Dict{VarId,Int}() _add_slots!(slot_rewrites, ctx.var_info, (arg.var_id for arg in lambda_info.args)) @@ -394,7 +519,7 @@ function linearize_ir(ctx, ex) # TODO: Cleanup needed - `_ctx` is just a dummy context here. But currently # required to call reparent() ... _ctx = LinearIRContext(graph, SyntaxList(graph), ctx.next_var_id, - false, nothing, ctx.var_info, ctx.mod) + Ref(0), false, Set{VarId}(), nothing, ctx.var_info, ctx.mod) res = compile_lambda(_ctx, reparent(_ctx, ex)) setattr!(graph, res.id, var_info=ctx.var_info) _ctx, res diff --git a/JuliaLowering/src/macro_expansion.jl b/JuliaLowering/src/macro_expansion.jl index d668d5ff3465d..b0d004d858adb 100644 --- a/JuliaLowering/src/macro_expansion.jl +++ b/JuliaLowering/src/macro_expansion.jl @@ -146,7 +146,7 @@ function expand_macro(ctx, ex) if exc isa MacroExpansionError # Add context to the error. # TODO: Using rethrow() is kinda ugh. Is there a way to avoid it? - rethrow(MacroExpansionError(mctx, ex.ex, exc.msg)) + rethrow(MacroExpansionError(mctx, ex, exc.msg)) else throw(MacroExpansionError(mctx, ex, "Error expanding macro")) end @@ -202,7 +202,10 @@ function expand_forms_1(ctx::MacroExpansionContext, ex::SyntaxTree) k = kind(ex) if k == K"Identifier" && all(==('_'), ex.name_val) @ast ctx ex ex=>K"Placeholder" - elseif k == K"Identifier" || k == K"MacroName" || + elseif k == K"true" || k == K"false" + # FIXME: Move this upstream into JuliaSyntax + @ast ctx ex (k == K"true")::K"Bool" + elseif k == K"Identifier" || k == K"MacroName" || k == K"StringMacroName" || (is_operator(k) && !haschildren(ex)) # <- TODO: fix upstream layerid = get(ex, :scope_layer, ctx.current_layer.id) makeleaf(ctx, ex, ex, kind=K"Identifier", scope_layer=layerid) diff --git a/JuliaLowering/src/syntax_graph.jl b/JuliaLowering/src/syntax_graph.jl index c30e2536b9f3d..4827cb79b69a4 100644 --- a/JuliaLowering/src/syntax_graph.jl +++ b/JuliaLowering/src/syntax_graph.jl @@ -413,6 +413,7 @@ function _value_string(ex) str = k == K"Identifier" || k == K"MacroName" || is_operator(k) ? ex.name_val : k == K"Placeholder" ? ex.name_val : k == K"SSAValue" ? "ssa" : + k == K"label" ? "label" : k == K"core" ? "core.$(ex.name_val)" : k == K"top" ? "top.$(ex.name_val)" : k == K"Symbol" ? ":$(ex.name_val)" : diff --git a/JuliaLowering/src/utils.jl b/JuliaLowering/src/utils.jl index a6e8888d89bd8..0fcb5d0a5cbaf 100644 --- a/JuliaLowering/src/utils.jl +++ b/JuliaLowering/src/utils.jl @@ -12,9 +12,11 @@ end function Base.showerror(io::IO, exc::LoweringError) print(io, "LoweringError:\n") - # FIXME src = sourceref(exc.ex) highlight(io, src; note=exc.msg) + + print(io, "\n\nDetailed provenance:\n") + showprov(io, exc.ex, tree=true) end #------------------------------------------------------------------------------- diff --git a/JuliaLowering/test/branching.jl b/JuliaLowering/test/branching.jl new file mode 100644 index 0000000000000..3ad7aeeeb8431 --- /dev/null +++ b/JuliaLowering/test/branching.jl @@ -0,0 +1,212 @@ +# Branching + +@testset "branching" begin + +test_mod = Module() + +#------------------------------------------------------------------------------- +# Tail position +@test JuliaLowering.include_string(test_mod, """ +let a = true + if a + 1 + end +end +""") === 1 + +@test JuliaLowering.include_string(test_mod, """ +let a = false + if a + 1 + end +end +""") === nothing + +@test JuliaLowering.include_string(test_mod, """ +let a = true + if a + 1 + else + 2 + end +end +""") === 1 + +@test JuliaLowering.include_string(test_mod, """ +let a = false + if a + 1 + else + 2 + end +end +""") === 2 + +@test JuliaLowering.include_string(test_mod, """ +let a = false, b = true + if a + 1 + elseif b + 2 + else + 3 + end +end +""") === 2 + +@test JuliaLowering.include_string(test_mod, """ +let a = false, b = false + if a + 1 + elseif b + 2 + else + 3 + end +end +""") === 3 + +#------------------------------------------------------------------------------- +# Value, not tail position + +@test JuliaLowering.include_string(test_mod, """ +let a = true + x = if a + 1 + end + x +end +""") === 1 + +@test JuliaLowering.include_string(test_mod, """ +let a = false + x = if a + 1 + end + x +end +""") === nothing + +@test JuliaLowering.include_string(test_mod, """ +let a = true + x = if a + 1 + else + 2 + end + x +end +""") === 1 + +@test JuliaLowering.include_string(test_mod, """ +let a = false + x = if a + 1 + else + 2 + end + x +end +""") === 2 + +@test JuliaLowering.include_string(test_mod, """ +let a = false, b = true + x = if a + 1 + elseif b + 2 + else + 3 + end + x +end +""") === 2 + +@test JuliaLowering.include_string(test_mod, """ +let a = false, b = false + x = if a + 1 + elseif b + 2 + else + 3 + end + x +end +""") === 3 + +#------------------------------------------------------------------------------- +# Side effects (not value or tail position) +@test JuliaLowering.include_string(test_mod, """ +let a = true + x = nothing + if a + x = 1 + end + x +end +""") === 1 + +@test JuliaLowering.include_string(test_mod, """ +let a = false + x = nothing + if a + x = 1 + end + x +end +""") === nothing + +@test JuliaLowering.include_string(test_mod, """ +let a = true + x = nothing + if a + x = 1 + else + x = 2 + end + x +end +""") === 1 + +@test JuliaLowering.include_string(test_mod, """ +let a = false + x = nothing + if a + x = 1 + else + x = 2 + end + x +end +""") === 2 + +@test JuliaLowering.include_string(test_mod, """ +let a = false, b = true + x = nothing + if a + x = 1 + elseif b + x = 2 + else + x = 3 + end + x +end +""") === 2 + +@test JuliaLowering.include_string(test_mod, """ +let a = false, b = false + x = nothing + if a + x = 1 + elseif b + x = 2 + else + x = 3 + end + x +end +""") === 3 + +end diff --git a/JuliaLowering/test/demo.jl b/JuliaLowering/test/demo.jl index 4659fab093b91..7c770743a835c 100644 --- a/JuliaLowering/test/demo.jl +++ b/JuliaLowering/test/demo.jl @@ -102,34 +102,8 @@ JuliaLowering.include(Main, "demo_include.jl") Base.eval(M, quote function var"@inert"(__context__::JuliaLowering.MacroContext, ex) - @chk kind(ex) == K"quote" - @ast __context__ ex [K"inert" ex] - end - - # Recursive macro call - function var"@recursive"(__context__::JuliaLowering.MacroContext, N) - @chk kind(N) == K"Integer" - Nval = N.value::Int - if Nval < 1 - return N - end - # quote - # x = $N - # (@recursive $(Nval-1), x) - # end - @ast __context__ (@HERE) [K"block" - [K"="(@HERE) - "x"::K"Identifier"(@HERE) - N - ] - [K"tuple"(@HERE) - "x"::K"Identifier"(@HERE) - [K"macrocall"(@HERE) - "@recursive"::K"Identifier" - (Nval-1)::K"Integer" - ] - ] - ] + @chk kind(ex) == JuliaSyntax.K"quote" + @ast __context__ ex [JuliaSyntax.K"inert" ex] end end) @@ -175,9 +149,9 @@ end # src = """@foo z""" -# src = """ -# M.@recursive 3 -# """ +src = """ +M.@recursive 3 +""" # src = """ # begin @@ -246,6 +220,15 @@ end #src = """M.@outer""" +src = """ +begin + local a, b + if a + b + end +end +""" + ex = parsestmt(SyntaxTree, src, filename="foo.jl") ex = ensure_attributes(ex, var_id=Int) #ex = softscope_test(ex) diff --git a/JuliaLowering/test/demo_include.jl b/JuliaLowering/test/demo_include.jl index 9838111522169..47fcbb93d2d94 100644 --- a/JuliaLowering/test/demo_include.jl +++ b/JuliaLowering/test/demo_include.jl @@ -1,5 +1,5 @@ module M - using JuliaLowering: JuliaLowering, @ast, @chk, adopt_scope + using JuliaLowering: JuliaLowering, @ast, @chk, adopt_scope, MacroExpansionError, makenode using JuliaSyntax # Introspection @@ -73,4 +73,56 @@ module M macro outer() :((1, @inner)) end + + macro K_str(str) + convert(JuliaSyntax.Kind, str[1].value) + end + + # Recursive macro call + macro recursive(N) + Nval = if kind(N) == K"Integer" || kind(N) == K"Value" + N.value + end + if !(Nval isa Integer) + throw(MacroExpansionError(N, "argument must be an integer")) + end + if Nval < 1 + return N + end + quote + x = $N + (@recursive($(Nval-1)), x) + end + end + + # function var"@recursive"(__context__::JuliaLowering.MacroContext, N) + # @chk kind(N) == K"Integer" + # Nval = N.value::Int + # if Nval < 1 + # return N + # end + # @ast __context__ (@HERE) [K"block" + # [K"="(@HERE) + # "x"::K"Identifier"(@HERE) + # N + # ] + # [K"tuple"(@HERE) + # "x"::K"Identifier"(@HERE) + # [K"macrocall"(@HERE) + # "@recursive"::K"Identifier" + # (Nval-1)::K"Integer" + # ] + # ] + # ] + # end + + # macro inert(ex) + # if kind(ex) != K"quote" + # throw(MacroExpansionError(ex, "expected quote")) + # end + # makenode(__context__, ex, + # makenode(__context__, ex, K"inert", ex)) + # @chk kind(ex) == JuliaSyntax.K"quote" + # @ast __context__ ex [JuliaSyntax.K"inert" ex] + # end end diff --git a/JuliaLowering/test/runtests.jl b/JuliaLowering/test/runtests.jl index 0c21f4e3e2b5f..12b18be49af07 100644 --- a/JuliaLowering/test/runtests.jl +++ b/JuliaLowering/test/runtests.jl @@ -404,4 +404,6 @@ macro A.b(ex) end """) +include("branching.jl") + end diff --git a/JuliaLowering/test/utils.jl b/JuliaLowering/test/utils.jl index 006b37fa738fd..c209db5268752 100644 --- a/JuliaLowering/test/utils.jl +++ b/JuliaLowering/test/utils.jl @@ -81,3 +81,10 @@ test case comparisons with the `~` function. """ format_as_ast_macro(ex) = format_as_ast_macro(stdout, ex) +#------------------------------------------------------------------------------- + +# Parse and lower `src`, and print statements from the linear IR in text format +function ir_as_text(mod, src) + ex = JuliaLowering.lower(mod, parsestmt(SyntaxTree, src)) + join(string.(children(ex[1])), "\n") +end From 68e8cfd7fbfba9452e0eb18544cacb4f0c613469 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Fri, 21 Jun 2024 17:32:44 +1000 Subject: [PATCH 0763/1109] Ensure variable IDs and slots numbers are deterministic Remove the dependence on Dict iteration for var_id and slot number generation by sorting by VarKey when generating VarId, and VarId when generating slots. This ensures that integer identifiers in the IR are always deterministic. --- JuliaLowering/src/linear_ir.jl | 2 +- JuliaLowering/src/scope_analysis.jl | 18 +++++- JuliaLowering/test/branching.jl | 88 +++++++++++++++++++++++++++++ JuliaLowering/test/demo.jl | 24 ++++---- 4 files changed, 117 insertions(+), 15 deletions(-) diff --git a/JuliaLowering/src/linear_ir.jl b/JuliaLowering/src/linear_ir.jl index fffd30e48fbfb..54e3c6417736e 100644 --- a/JuliaLowering/src/linear_ir.jl +++ b/JuliaLowering/src/linear_ir.jl @@ -502,7 +502,7 @@ function compile_lambda(outer_ctx, ex) compile_body(ctx, ex[1]) slot_rewrites = Dict{VarId,Int}() _add_slots!(slot_rewrites, ctx.var_info, (arg.var_id for arg in lambda_info.args)) - _add_slots!(slot_rewrites, ctx.var_info, ex.lambda_locals) + _add_slots!(slot_rewrites, ctx.var_info, sort(collect(ex.lambda_locals))) code = renumber_body(ctx, ctx.code, slot_rewrites) makenode(ctx, ex, K"lambda", makenode(ctx, ex[1], K"block", code), diff --git a/JuliaLowering/src/scope_analysis.jl b/JuliaLowering/src/scope_analysis.jl index 4b9b58608eede..b577aff18282d 100644 --- a/JuliaLowering/src/scope_analysis.jl +++ b/JuliaLowering/src/scope_analysis.jl @@ -87,6 +87,13 @@ function find_scope_vars(ex) for e in children(ex) _find_scope_vars!(assignments, locals, globals, used_names, e) end + + # Sort by key so that id generation is deterministic + assignments = sort(collect(pairs(assignments)), by=first) + locals = sort(collect(pairs(locals)), by=first) + globals = sort(collect(pairs(globals)), by=first) + used_names = sort(collect(used_names)) + return assignments, locals, globals, used_names end @@ -98,6 +105,10 @@ struct VarKey layer::LayerId end +function Base.isless(a::VarKey, b::VarKey) + (a.name, a.layer) < (b.name, b.layer) +end + # Identifiers produced by lowering will have the following layer by default. # # To make new mutable variables without colliding names, lowering can @@ -227,9 +238,10 @@ function analyze_scope(ctx, ex, scope_type, lambda_info) end end + global_keys = Set(first(g) for g in globals) # Add explicit locals - for (varkey,e) in pairs(locals) - if haskey(globals, varkey) + for (varkey,e) in locals + if varkey in global_keys throw(LoweringError(e, "Variable `$(varkey.name)` declared both local and global")) elseif haskey(var_ids, varkey) vk = ctx.var_info[var_ids[varkey]].kind @@ -245,7 +257,7 @@ function analyze_scope(ctx, ex, scope_type, lambda_info) end # Add explicit globals - for (varkey,e) in pairs(globals) + for (varkey,e) in globals if haskey(var_ids, varkey) vk = ctx.var_info[var_ids[varkey]].kind if vk === :argument && is_outer_lambda_scope diff --git a/JuliaLowering/test/branching.jl b/JuliaLowering/test/branching.jl index 3ad7aeeeb8431..52ee6f4cb5a47 100644 --- a/JuliaLowering/test/branching.jl +++ b/JuliaLowering/test/branching.jl @@ -209,4 +209,92 @@ let a = false, b = false end """) === 3 +#------------------------------------------------------------------------------- +# Detailed lowering + +@test ir_as_text(test_mod, """ +begin + local a, b + if a + b + end +end +""") == """ +slot.₁/a +(gotoifnot ssa.₁ label.₅) +slot.₂/b +(return ssa.₃) +core.nothing +(return ssa.₅)""" + +@test ir_as_text(test_mod, """ +begin + local a, b, c + if a + b + end + c +end +""") == """ +slot.₁/a +(gotoifnot ssa.₁ label.₄) +slot.₂/b +slot.₃/c +(return ssa.₄)""" + +@test ir_as_text(test_mod, """ +begin + local a, b, c + if a + b + else + c + end +end +""") == """ +slot.₁/a +(gotoifnot ssa.₁ label.₅) +slot.₂/b +(return ssa.₃) +slot.₃/c +(return ssa.₅)""" + +@test ir_as_text(test_mod, """ +begin + local a, b, c, d + if a + b + else + c + end + d +end +""") == """ +slot.₁/a +(gotoifnot ssa.₁ label.₅) +slot.₂/b +(goto label.₆) +slot.₃/c +slot.₄/d +(return ssa.₆)""" + +# Blocks compile directly to branches +@test ir_as_text(test_mod, """ +begin + local a, b, c, d + if (a; b && c) + d + end +end +""") == """ +slot.₁/a +slot.₂/b +(gotoifnot ssa.₂ label.₈) +slot.₃/c +(gotoifnot ssa.₄ label.₈) +slot.₄/d +(return ssa.₆) +core.nothing +(return ssa.₈)""" + end diff --git a/JuliaLowering/test/demo.jl b/JuliaLowering/test/demo.jl index 7c770743a835c..063fcac2fc3dc 100644 --- a/JuliaLowering/test/demo.jl +++ b/JuliaLowering/test/demo.jl @@ -207,24 +207,26 @@ M.@recursive 3 # end # """ -# src = """ -# let -# y = 0 -# x = 1 -# let x = x + 1 -# y = x -# end -# (x, y) -# end -# """ +src = """ +let + y = 0 + x = 1 + let x = x + 1 + y = x + end + (x, y) +end +""" #src = """M.@outer""" src = """ begin - local a, b + local a, b, c if a b + else + c end end """ From 772fa269b374d919d48c8d2c46a8340a3650be60 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sat, 22 Jun 2024 21:41:49 +1000 Subject: [PATCH 0764/1109] Rename private fields of SyntaxTree We'd like to be able to use `id` as an attribute name (and maybe `graph`??), so rename the internal fields to start with underscores. --- JuliaLowering/src/ast.jl | 20 ++--- JuliaLowering/src/linear_ir.jl | 2 +- JuliaLowering/src/scope_analysis.jl | 6 +- JuliaLowering/src/syntax_graph.jl | 135 ++++++++++++++-------------- JuliaLowering/test/demo.jl | 2 +- JuliaLowering/test/runtests.jl | 2 +- 6 files changed, 83 insertions(+), 84 deletions(-) diff --git a/JuliaLowering/src/ast.jl b/JuliaLowering/src/ast.jl index f1d7074658a39..fcd354d542328 100644 --- a/JuliaLowering/src/ast.jl +++ b/JuliaLowering/src/ast.jl @@ -21,9 +21,9 @@ end #------------------------------------------------------------------------------- # AST creation utilities _node_id(ex::NodeId) = ex -_node_id(ex::SyntaxTree) = ex.id +_node_id(ex::SyntaxTree) = ex._id -_node_id(graph::SyntaxGraph, ex::SyntaxTree) = (check_same_graph(graph, ex); ex.id) +_node_id(graph::SyntaxGraph, ex::SyntaxTree) = (check_same_graph(graph, ex); ex._id) _node_ids(graph::SyntaxGraph) = () _node_ids(graph::SyntaxGraph, ::Nothing, cs...) = _node_ids(graph, cs...) @@ -164,7 +164,7 @@ function _expand_ast_tree(defs, ctx, srcref, tree) flatargs = [] for a in tree.args if Meta.isexpr(a, :row) - append!(flagargs, a.args) + append!(flatargs, a.args) else push!(flatargs, a) end @@ -255,12 +255,12 @@ end # Mapping and copying of AST nodes function copy_attrs!(dest, src, all=false) # TODO: Make this faster? - for (name, attr) in pairs(src.graph.attributes) + for (name, attr) in pairs(src._graph.attributes) if (all || (name !== :source && name !== :kind && name !== :syntax_flags)) && - haskey(attr, src.id) - dest_attr = getattr(dest.graph, name, nothing) + haskey(attr, src._id) + dest_attr = getattr(dest._graph, name, nothing) if !isnothing(dest_attr) - dest_attr[dest.id] = attr[src.id] + dest_attr[dest._id] = attr[src._id] end end end @@ -268,7 +268,7 @@ end function copy_attrs!(dest, head::Union{Kind,JuliaSyntax.SyntaxHead}, all=false) if all - sethead!(dest.graph, dest.id, head) + sethead!(dest._graph, dest._id, head) end end @@ -311,8 +311,8 @@ function copy_ast(ctx, ex) # multiple parents? (How much does this actually happen in practice?) s = ex.source # TODO: Figure out how to use provenance() here? - srcref = s isa NodeId ? copy_ast(ctx, SyntaxTree(ex.graph, s)) : - s isa Tuple ? map(i->copy_ast(ctx, SyntaxTree(ex.graph, i)), s) : + srcref = s isa NodeId ? copy_ast(ctx, SyntaxTree(ex._graph, s)) : + s isa Tuple ? map(i->copy_ast(ctx, SyntaxTree(ex._graph, i)), s) : s if haschildren(ex) cs = SyntaxList(ctx) diff --git a/JuliaLowering/src/linear_ir.jl b/JuliaLowering/src/linear_ir.jl index 54e3c6417736e..44163faf80c17 100644 --- a/JuliaLowering/src/linear_ir.jl +++ b/JuliaLowering/src/linear_ir.jl @@ -521,7 +521,7 @@ function linearize_ir(ctx, ex) _ctx = LinearIRContext(graph, SyntaxList(graph), ctx.next_var_id, Ref(0), false, Set{VarId}(), nothing, ctx.var_info, ctx.mod) res = compile_lambda(_ctx, reparent(_ctx, ex)) - setattr!(graph, res.id, var_info=ctx.var_info) + setattr!(graph, res._id, var_info=ctx.var_info) _ctx, res end diff --git a/JuliaLowering/src/scope_analysis.jl b/JuliaLowering/src/scope_analysis.jl index b577aff18282d..d2d10dc750cec 100644 --- a/JuliaLowering/src/scope_analysis.jl +++ b/JuliaLowering/src/scope_analysis.jl @@ -35,7 +35,7 @@ # end # # function find_in_ast(f, ex::SyntaxTree) -# todo = SyntaxList(ex.graph) +# todo = SyntaxList(ex._graph) # push!(todo, ex) # while !isempty(todo) # e1 = pop!(todo) @@ -352,7 +352,7 @@ function _resolve_scopes!(ctx, ex) k = kind(ex) if k == K"Identifier" id = lookup_var(ctx, VarKey(ex)) - setattr!(ctx.graph, ex.id, var_id=id) + setattr!(ctx.graph, ex._id, var_id=id) elseif !haschildren(ex) || is_quoted(ex) || k == K"toplevel" return # TODO @@ -377,7 +377,7 @@ function _resolve_scopes!(ctx, ex) _resolve_scopes!(ctx, e) end pop!(ctx.scope_stack) - setattr!(ctx.graph, ex.id, lambda_locals=scope.lambda_locals) + setattr!(ctx.graph, ex._id, lambda_locals=scope.lambda_locals) elseif k == K"scope_block" scope = analyze_scope(ctx, ex, ex.scope_type, nothing) push!(ctx.scope_stack, scope) diff --git a/JuliaLowering/src/syntax_graph.jl b/JuliaLowering/src/syntax_graph.jl index 4827cb79b69a4..e2fcc6fab7586 100644 --- a/JuliaLowering/src/syntax_graph.jl +++ b/JuliaLowering/src/syntax_graph.jl @@ -185,87 +185,86 @@ end #------------------------------------------------------------------------------- struct SyntaxTree{GraphType} - graph::GraphType - id::NodeId + _graph::GraphType + _id::NodeId end -function Base.getproperty(tree::SyntaxTree, name::Symbol) - # TODO: Remove access to internals? - name === :graph && return getfield(tree, :graph) - name === :id && return getfield(tree, :id) - id = getfield(tree, :id) - return get(getproperty(getfield(tree, :graph), name), id) do - error("Property `$name[$id]` not found") +function Base.getproperty(ex::SyntaxTree, name::Symbol) + name === :_graph && return getfield(ex, :_graph) + name === :_id && return getfield(ex, :_id) + _id = getfield(ex, :_id) + return get(getproperty(getfield(ex, :_graph), name), _id) do + error("Property `$name[$_id]` not found") end end -function Base.setproperty!(tree::SyntaxTree, name::Symbol, val) - return setattr!(tree.graph, tree.id; name=>val) +function Base.setproperty!(ex::SyntaxTree, name::Symbol, val) + return setattr!(ex._graph, ex._id; name=>val) end -function Base.propertynames(tree::SyntaxTree) - attrnames(tree) +function Base.propertynames(ex::SyntaxTree) + attrnames(ex) end -function Base.get(tree::SyntaxTree, name::Symbol, default) - attr = getattr(getfield(tree, :graph), name, nothing) +function Base.get(ex::SyntaxTree, name::Symbol, default) + attr = getattr(getfield(ex, :_graph), name, nothing) return isnothing(attr) ? default : - get(attr, getfield(tree, :id), default) + get(attr, getfield(ex, :_id), default) end -function Base.getindex(tree::SyntaxTree, i::Integer) - child(tree, i) +function Base.getindex(ex::SyntaxTree, i::Integer) + child(ex, i) end -function Base.getindex(tree::SyntaxTree, r::UnitRange) - SyntaxList(tree.graph, children(tree.graph, tree.id, r)) +function Base.getindex(ex::SyntaxTree, r::UnitRange) + SyntaxList(ex._graph, children(ex._graph, ex._id, r)) end -Base.firstindex(tree::SyntaxTree) = 1 -Base.lastindex(tree::SyntaxTree) = numchildren(tree) +Base.firstindex(ex::SyntaxTree) = 1 +Base.lastindex(ex::SyntaxTree) = numchildren(ex) -function hasattr(tree::SyntaxTree, name::Symbol) - attr = getattr(tree.graph, name, nothing) - return !isnothing(attr) && haskey(attr, tree.id) +function hasattr(ex::SyntaxTree, name::Symbol) + attr = getattr(ex._graph, name, nothing) + return !isnothing(attr) && haskey(attr, ex._id) end -function attrnames(tree::SyntaxTree) - attrs = tree.graph.attributes - [name for (name, value) in pairs(attrs) if haskey(value, tree.id)] +function attrnames(ex::SyntaxTree) + attrs = ex._graph.attributes + [name for (name, value) in pairs(attrs) if haskey(value, ex._id)] end function setattr!(ex::SyntaxTree; attrs...) - setattr!(ex.graph, ex.id; attrs...) + setattr!(ex._graph, ex._id; attrs...) end # JuliaSyntax tree API -function JuliaSyntax.haschildren(tree::SyntaxTree) - haschildren(tree.graph, tree.id) +function JuliaSyntax.haschildren(ex::SyntaxTree) + haschildren(ex._graph, ex._id) end -function JuliaSyntax.numchildren(tree::SyntaxTree) - numchildren(tree.graph, tree.id) +function JuliaSyntax.numchildren(ex::SyntaxTree) + numchildren(ex._graph, ex._id) end -function JuliaSyntax.children(tree::SyntaxTree) - SyntaxList(tree.graph, children(tree.graph, tree.id)) +function JuliaSyntax.children(ex::SyntaxTree) + SyntaxList(ex._graph, children(ex._graph, ex._id)) end -function JuliaSyntax.child(tree::SyntaxTree, i::Integer) - SyntaxTree(tree.graph, child(tree.graph, tree.id, i)) +function JuliaSyntax.child(ex::SyntaxTree, i::Integer) + SyntaxTree(ex._graph, child(ex._graph, ex._id, i)) end -function JuliaSyntax.head(tree::SyntaxTree) - JuliaSyntax.SyntaxHead(kind(tree), flags(tree)) +function JuliaSyntax.head(ex::SyntaxTree) + JuliaSyntax.SyntaxHead(kind(ex), flags(ex)) end -function JuliaSyntax.kind(tree::SyntaxTree) - tree.kind +function JuliaSyntax.kind(ex::SyntaxTree) + ex.kind end -function JuliaSyntax.flags(tree::SyntaxTree) - get(tree, :syntax_flags, 0x0000) +function JuliaSyntax.flags(ex::SyntaxTree) + get(ex, :syntax_flags, 0x0000) end @@ -309,9 +308,9 @@ end function provenance(ex::SyntaxTree) s = ex.source if s isa NodeId - return (SyntaxTree(ex.graph, s),) + return (SyntaxTree(ex._graph, s),) elseif s isa Tuple - return SyntaxTree.((ex.graph,), s) + return SyntaxTree.((ex._graph,), s) else return (s,) end @@ -331,9 +330,9 @@ function _sourceref(sources, id) end end -function sourceref(tree::SyntaxTree) - sources = tree.graph.source - id::NodeId = tree.id +function sourceref(ex::SyntaxTree) + sources = ex._graph.source + id::NodeId = ex._id while true s, _ = _sourceref(sources, id) if s isa Tuple @@ -361,7 +360,7 @@ end function flattened_provenance(ex::SyntaxTree) refs = SyntaxList(ex) - _flattened_provenance(refs, ex.graph, ex.graph.source, ex.id) + _flattened_provenance(refs, ex._graph, ex._graph.source, ex._id) return reverse(refs) end @@ -370,13 +369,13 @@ function is_ancestor(ex, ancestor) if !is_compatible_graph(ex, ancestor) return false end - sources = ex.graph.source - id::NodeId = ex.id + sources = ex._graph.source + id::NodeId = ex._id while true s = get(sources, id, nothing) if s isa NodeId id = s - if id == ancestor.id + if id == ancestor._id return true end else @@ -385,12 +384,12 @@ function is_ancestor(ex, ancestor) end end -JuliaSyntax.filename(tree::SyntaxTree) = filename(sourceref(tree)) -JuliaSyntax.source_location(::Type{LineNumberNode}, tree::SyntaxTree) = source_location(LineNumberNode, sourceref(tree)) -JuliaSyntax.source_location(tree::SyntaxTree) = source_location(sourceref(tree)) -JuliaSyntax.first_byte(tree::SyntaxTree) = first_byte(sourceref(tree)) -JuliaSyntax.last_byte(tree::SyntaxTree) = last_byte(sourceref(tree)) -JuliaSyntax.sourcetext(tree::SyntaxTree) = sourcetext(sourceref(tree)) +JuliaSyntax.filename(ex::SyntaxTree) = filename(sourceref(ex)) +JuliaSyntax.source_location(::Type{LineNumberNode}, ex::SyntaxTree) = source_location(LineNumberNode, sourceref(ex)) +JuliaSyntax.source_location(ex::SyntaxTree) = source_location(sourceref(ex)) +JuliaSyntax.first_byte(ex::SyntaxTree) = first_byte(sourceref(ex)) +JuliaSyntax.last_byte(ex::SyntaxTree) = last_byte(sourceref(ex)) +JuliaSyntax.sourcetext(ex::SyntaxTree) = sourcetext(sourceref(ex)) const SourceAttrType = Union{SourceRef,LineNumberNode,NodeId,Tuple} @@ -429,7 +428,7 @@ function _value_string(ex) end if k == K"slot" # TODO: Ideally shouldn't need to rewrap the id here... - srcex = SyntaxTree(ex.graph, ex.source) + srcex = SyntaxTree(ex._graph, ex.source) str = "$(str)/$(srcex.name_val)" end return str @@ -494,15 +493,15 @@ function reparent(ctx, ex::SyntaxTree) # In that case, would we copy all the attributes? That would have slightly # different semantics. graph = syntax_graph(ctx) - @assert graph.edge_ranges === ex.graph.edge_ranges - SyntaxTree(graph, ex.id) + @assert graph.edge_ranges === ex._graph.edge_ranges + SyntaxTree(graph, ex._id) end function ensure_attributes(ex::SyntaxTree; kws...) reparent(ensure_attributes(syntax_graph(ex); kws...), ex) end -syntax_graph(ex::SyntaxTree) = ex.graph +syntax_graph(ex::SyntaxTree) = ex._graph function JuliaSyntax.build_tree(::Type{SyntaxTree}, stream::JuliaSyntax.ParseStream; kws...) SyntaxTree(JuliaSyntax.build_tree(SyntaxNode, stream; kws...)) @@ -535,18 +534,18 @@ function Base.getindex(v::SyntaxList, r::UnitRange) SyntaxList(v.graph, view(v.ids, r)) end -function Base.setindex!(v::SyntaxList, tree::SyntaxTree, i::Int) - check_same_graph(v, tree) - v.ids[i] = tree.id +function Base.setindex!(v::SyntaxList, ex::SyntaxTree, i::Int) + check_same_graph(v, ex) + v.ids[i] = ex._id end function Base.setindex!(v::SyntaxList, id::NodeId, i::Int) v.ids[i] = id end -function Base.push!(v::SyntaxList, tree::SyntaxTree) - check_same_graph(v, tree) - push!(v.ids, tree.id) +function Base.push!(v::SyntaxList, ex::SyntaxTree) + check_same_graph(v, ex) + push!(v.ids, ex._id) end function Base.append!(v::SyntaxList, exs) diff --git a/JuliaLowering/test/demo.jl b/JuliaLowering/test/demo.jl index 063fcac2fc3dc..ccd6a4a17a1a3 100644 --- a/JuliaLowering/test/demo.jl +++ b/JuliaLowering/test/demo.jl @@ -122,7 +122,7 @@ function wrapscope(ex, scope_type) end function softscope_test(ex) - g = ensure_attributes(ex.graph, scope_type=Symbol) + g = ensure_attributes(ex._graph, scope_type=Symbol) wrapscope(wrapscope(JuliaLowering.reparent(g, ex), :neutral), :soft) end diff --git a/JuliaLowering/test/runtests.jl b/JuliaLowering/test/runtests.jl index 12b18be49af07..641f5925fbf13 100644 --- a/JuliaLowering/test/runtests.jl +++ b/JuliaLowering/test/runtests.jl @@ -37,7 +37,7 @@ JuliaLowering.include_string(test_mod, """ # wrap expression in scope block of `scope_type` function wrapscope(ex, scope_type) - g = JuliaLowering.ensure_attributes(ex.graph, scope_type=Symbol) + g = JuliaLowering.ensure_attributes(ex._graph, scope_type=Symbol) ex = JuliaLowering.reparent(g, ex) makenode(ex, ex, K"scope_block", ex; scope_type=scope_type) end From ea44fc7c4533247389fa9bb30878b70d6abbc43f Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sat, 22 Jun 2024 22:02:19 +1000 Subject: [PATCH 0765/1109] Use `id` rather than `var_id` for labels --- JuliaLowering/src/ast.jl | 5 +++-- JuliaLowering/src/eval.jl | 4 ++-- JuliaLowering/src/linear_ir.jl | 11 ++++++----- JuliaLowering/src/syntax_graph.jl | 3 +++ 4 files changed, 14 insertions(+), 9 deletions(-) diff --git a/JuliaLowering/src/ast.jl b/JuliaLowering/src/ast.jl index fcd354d542328..e8b82d7844395 100644 --- a/JuliaLowering/src/ast.jl +++ b/JuliaLowering/src/ast.jl @@ -67,9 +67,10 @@ function makeleaf(ctx, srcref, k::Kind, value; kws...) graph = syntax_graph(ctx) if k == K"Identifier" || k == K"core" || k == K"top" || k == K"Symbol" || k == K"globalref" makeleaf(graph, srcref, k; name_val=value, kws...) - elseif k == K"SSAValue" || k == K"label" - # FIXME? + elseif k == K"SSAValue" makeleaf(graph, srcref, k; var_id=value, kws...) + elseif k == K"label" + makeleaf(graph, srcref, k; id=value, kws...) else val = k == K"Integer" ? convert(Int, value) : k == K"Float" ? convert(Float64, value) : diff --git a/JuliaLowering/src/eval.jl b/JuliaLowering/src/eval.jl index e61fb0bfaf4fc..57ff9293599c5 100644 --- a/JuliaLowering/src/eval.jl +++ b/JuliaLowering/src/eval.jl @@ -224,9 +224,9 @@ function to_lowered_expr(mod, var_info, ex) elseif k == K"Value" ex.value elseif k == K"goto" - Core.GotoNode(ex[1].var_id) + Core.GotoNode(ex[1].id) elseif k == K"gotoifnot" - Core.GotoIfNot(to_lowered_expr(mod, var_info, ex[1]), ex[2].var_id) + Core.GotoIfNot(to_lowered_expr(mod, var_info, ex[1]), ex[2].id) else # Allowed forms according to https://docs.julialang.org/en/v1/devdocs/ast/ # diff --git a/JuliaLowering/src/linear_ir.jl b/JuliaLowering/src/linear_ir.jl index 44163faf80c17..788e557b6b76b 100644 --- a/JuliaLowering/src/linear_ir.jl +++ b/JuliaLowering/src/linear_ir.jl @@ -150,7 +150,7 @@ end function make_label(ctx, srcref) id = ctx.next_label_id[] ctx.next_label_id[] += 1 - makeleaf(ctx, srcref, K"label", id) + makeleaf(ctx, srcref, K"label", id=id) end # flisp: make&mark-label @@ -419,12 +419,12 @@ function _renumber(ctx, ssa_rewrites, slot_rewrites, label_table, ex) TODO(ex, "_renumber $k") elseif k == K"goto" @ast ctx ex [K"goto" - label_table[ex[1].var_id]::K"label" + label_table[ex[1].id]::K"label" ] elseif k == K"gotoifnot" @ast ctx ex [K"gotoifnot" _renumber(ctx, ssa_rewrites, slot_rewrites, label_table, ex[1]) - label_table[ex[2].var_id]::K"label" + label_table[ex[2].id]::K"label" ] elseif k == K"lambda" ex @@ -457,7 +457,7 @@ function renumber_body(ctx, input_code, slot_rewrites) ex_out = ex[2] end elseif k == K"label" - label_table[ex.var_id] = length(code) + 1 + label_table[ex.id] = length(code) + 1 else ex_out = ex end @@ -515,7 +515,8 @@ function linearize_ir(ctx, ex) graph = ensure_attributes(ctx.graph, slot_rewrites=Dict{VarId,Int}, var_info=Dict{VarId,VarInfo}, - mod=Module) + mod=Module, + id=Int) # TODO: Cleanup needed - `_ctx` is just a dummy context here. But currently # required to call reparent() ... _ctx = LinearIRContext(graph, SyntaxList(graph), ctx.next_var_id, diff --git a/JuliaLowering/src/syntax_graph.jl b/JuliaLowering/src/syntax_graph.jl index e2fcc6fab7586..2024d59bcf16a 100644 --- a/JuliaLowering/src/syntax_graph.jl +++ b/JuliaLowering/src/syntax_graph.jl @@ -420,6 +420,9 @@ function _value_string(ex) k == K"slot" ? "slot" : repr(get(ex, :value, nothing)) id = get(ex, :var_id, nothing) + if isnothing(id) + id = get(ex, :id, nothing) + end if !isnothing(id) idstr = replace(string(id), "0"=>"₀", "1"=>"₁", "2"=>"₂", "3"=>"₃", "4"=>"₄", From 437ca5de4edfde4ea92f9cbd0011b242e43e6f1c Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Mon, 24 Jun 2024 19:00:53 +1000 Subject: [PATCH 0766/1109] Test `||` and `&&` chains --- JuliaLowering/test/branching.jl | 88 +++++++++++++++++++++++++++++++-- 1 file changed, 84 insertions(+), 4 deletions(-) diff --git a/JuliaLowering/test/branching.jl b/JuliaLowering/test/branching.jl index 52ee6f4cb5a47..5db9d74e32eb7 100644 --- a/JuliaLowering/test/branching.jl +++ b/JuliaLowering/test/branching.jl @@ -5,7 +5,8 @@ test_mod = Module() #------------------------------------------------------------------------------- -# Tail position +@testset "Tail position" begin + @test JuliaLowering.include_string(test_mod, """ let a = true if a @@ -66,8 +67,10 @@ let a = false, b = false end """) === 3 +end + #------------------------------------------------------------------------------- -# Value, not tail position +@testset "Value required but not tail position" begin @test JuliaLowering.include_string(test_mod, """ let a = true @@ -135,8 +138,11 @@ let a = false, b = false end """) === 3 +end + #------------------------------------------------------------------------------- -# Side effects (not value or tail position) +@testset "Side effects (not value or tail position)" begin + @test JuliaLowering.include_string(test_mod, """ let a = true x = nothing @@ -209,8 +215,80 @@ let a = false, b = false end """) === 3 +end + #------------------------------------------------------------------------------- -# Detailed lowering +@testset "`&&` and `||` chains" begin + +@test JuliaLowering.include_string(test_mod, """ +true && "hi" +""") == "hi" + +@test JuliaLowering.include_string(test_mod, """ +true && true && "hi" +""") == "hi" + +@test JuliaLowering.include_string(test_mod, """ +false && "hi" +""") == false + +@test JuliaLowering.include_string(test_mod, """ +true && false && "hi" +""") == false + +@test JuliaLowering.include_string(test_mod, """ +begin + z = true && "hi" + z +end +""") == "hi" + +@test JuliaLowering.include_string(test_mod, """ +begin + z = false && "hi" + z +end +""") == false + + +@test JuliaLowering.include_string(test_mod, """ +true || "hi" +""") == true + +@test JuliaLowering.include_string(test_mod, """ +true || true || "hi" +""") == true + +@test JuliaLowering.include_string(test_mod, """ +false || "hi" +""") == "hi" + +@test JuliaLowering.include_string(test_mod, """ +false || true || "hi" +""") == true + +@test JuliaLowering.include_string(test_mod, """ +false || false || "hi" +""") == "hi" + +@test JuliaLowering.include_string(test_mod, """ +begin + z = false || "hi" + z +end +""") == "hi" + +@test JuliaLowering.include_string(test_mod, """ +begin + z = true || "hi" + z +end +""") == true + +end + +#------------------------------------------------------------------------------- +@testset "Detailed lowering tests" begin @test ir_as_text(test_mod, """ begin @@ -298,3 +376,5 @@ core.nothing (return ssa.₈)""" end + +end From e51dee511117f80788ff048e3d3252ba64cce59c Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Wed, 26 Jun 2024 14:06:08 +1000 Subject: [PATCH 0767/1109] Implement `while/break/continue` Changes to desugaring and IR generation to support these constructs, including handling of the internal break_block form and tracking break labels in IR generation. --- JuliaLowering/src/ast.jl | 2 ++ JuliaLowering/src/desugaring.jl | 22 ++++++++++++ JuliaLowering/src/kinds.jl | 2 ++ JuliaLowering/src/linear_ir.jl | 43 +++++++++++++++++++++-- JuliaLowering/src/syntax_graph.jl | 1 + JuliaLowering/src/utils.jl | 10 ++++++ JuliaLowering/test/demo.jl | 13 +++++++ JuliaLowering/test/loops.jl | 58 +++++++++++++++++++++++++++++++ JuliaLowering/test/runtests.jl | 1 + JuliaLowering/test/utils.jl | 2 ++ 10 files changed, 152 insertions(+), 2 deletions(-) create mode 100644 JuliaLowering/test/loops.jl diff --git a/JuliaLowering/src/ast.jl b/JuliaLowering/src/ast.jl index e8b82d7844395..233d2a79861b1 100644 --- a/JuliaLowering/src/ast.jl +++ b/JuliaLowering/src/ast.jl @@ -71,6 +71,8 @@ function makeleaf(ctx, srcref, k::Kind, value; kws...) makeleaf(graph, srcref, k; var_id=value, kws...) elseif k == K"label" makeleaf(graph, srcref, k; id=value, kws...) + elseif k == K"symbolic_label" + makeleaf(graph, srcref, k; name_val=value, kws...) else val = k == K"Integer" ? convert(Int, value) : k == K"Float" ? convert(Float64, value) : diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 297ba91f29bf9..45e57d7920601 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -595,6 +595,11 @@ function expand_forms_2(ctx::DesugaringContext, ex::SyntaxTree, docs=nothing) else @ast ctx ex [K"if" cond true::K"Bool" cs[end]] end + elseif k == K"break" + numchildren(ex) > 0 ? ex : + @ast ctx ex [K"break" "loop_exit"::K"symbolic_label"] + elseif k == K"continue" + @ast ctx ex [K"break" "loop_cont"::K"symbolic_label"] elseif k == K"doc" @chk numchildren(ex) == 2 sig = expand_forms_2(ctx, ex[2], ex) @@ -662,6 +667,23 @@ function expand_forms_2(ctx::DesugaringContext, ex::SyntaxTree, docs=nothing) ctx.mod ::K"Value" [K"inert" ex] ] + elseif k == K"vect" + @ast ctx ex [K"call" + "vect"::K"top" + expand_forms_2(ctx, children(ex))... + ] + elseif k == K"while" + @chk numchildren(ex) == 2 + @ast ctx ex [K"break_block" "loop_exit"::K"symbolic_label" + [K"_while" + expand_condition(ctx, ex[1]) + [K"break_block" "loop_cont"::K"symbolic_label" + [K"scope_block"(scope_type=:neutral) + expand_forms_2(ctx, ex[2]) + ] + ] + ] + ] elseif k == K"inert" ex elseif !haschildren(ex) diff --git a/JuliaLowering/src/kinds.jl b/JuliaLowering/src/kinds.jl index 647cfbecd4e3c..88a70e3fe22ff 100644 --- a/JuliaLowering/src/kinds.jl +++ b/JuliaLowering/src/kinds.jl @@ -49,6 +49,8 @@ function _insert_kinds() "enter" "leave" "label" + "symbolic_label" + "symbolic_goto" "goto" "gotoifnot" "trycatchelse" diff --git a/JuliaLowering/src/linear_ir.jl b/JuliaLowering/src/linear_ir.jl index 788e557b6b76b..2ed0b5448ec5a 100644 --- a/JuliaLowering/src/linear_ir.jl +++ b/JuliaLowering/src/linear_ir.jl @@ -37,12 +37,14 @@ struct LinearIRContext{GraphType} <: AbstractLoweringContext lambda_locals::Set{VarId} return_type::Union{Nothing,NodeId} var_info::Dict{VarId,VarInfo} + break_labels::Dict{String, NodeId} mod::Module end function LinearIRContext(ctx, is_toplevel_thunk, lambda_locals, return_type) LinearIRContext(ctx.graph, SyntaxList(ctx.graph), ctx.next_var_id, Ref(0), - is_toplevel_thunk, lambda_locals, return_type, ctx.var_info, ctx.mod) + is_toplevel_thunk, lambda_locals, return_type, ctx.var_info, + Dict{String,NodeId}(), ctx.mod) end function is_valid_body_ir_argument(ex) @@ -285,6 +287,31 @@ function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) res = compile(ctx, ex[i], islast && needs_value, islast && in_tail_pos) end res + elseif k == K"break_block" + end_label = make_label(ctx, ex) + name = ex[1].name_val + outer_label = get(ctx.break_labels, name, nothing) + ctx.break_labels[name] = end_label._id + compile(ctx, ex[2], false, false) + if isnothing(outer_label) + delete!(ctx.break_labels, name) + else + ctx.break_labels = outer_label + end + emit(ctx, end_label) + if needs_value + compile(ctx, nothing_(ctx, ex), needs_value, in_tail_pos) + end + elseif k == K"break" + name = ex[1].name_val + label_id = get(ctx.break_labels, name, nothing) + if isnothing(label_id) + ty = name == "loop_exit" ? "break" : "continue" + throw(LoweringError(ex, "$ty must be used inside a `while` or `for` loop")) + end + label = SyntaxTree(ctx.graph, label_id) + # TODO: try/finally handling + emit(ctx, @ast ctx ex [K"goto" label]) elseif k == K"return" compile(ctx, ex[1], true, true) nothing @@ -375,6 +402,16 @@ function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) else emit(ctx, lam) end + elseif k == K"_while" + end_label = make_label(ctx, ex) + top_label = emit_label(ctx, ex) + compile_conditional(ctx, ex[1], end_label) + compile(ctx, ex[2], false, false) + emit(ctx, @ast ctx ex [K"goto" top_label]) + emit(ctx, end_label) + if needs_value + compile(ctx, nothing_(ctx, ex), needs_value, in_tail_pos) + end elseif k == K"global" if needs_value throw(LoweringError(ex, "misplaced `global` declaration")) @@ -503,6 +540,7 @@ function compile_lambda(outer_ctx, ex) slot_rewrites = Dict{VarId,Int}() _add_slots!(slot_rewrites, ctx.var_info, (arg.var_id for arg in lambda_info.args)) _add_slots!(slot_rewrites, ctx.var_info, sort(collect(ex.lambda_locals))) + # @info "" @ast ctx ex [K"block" ctx.code] code = renumber_body(ctx, ctx.code, slot_rewrites) makenode(ctx, ex, K"lambda", makenode(ctx, ex[1], K"block", code), @@ -520,7 +558,8 @@ function linearize_ir(ctx, ex) # TODO: Cleanup needed - `_ctx` is just a dummy context here. But currently # required to call reparent() ... _ctx = LinearIRContext(graph, SyntaxList(graph), ctx.next_var_id, - Ref(0), false, Set{VarId}(), nothing, ctx.var_info, ctx.mod) + Ref(0), false, Set{VarId}(), nothing, ctx.var_info, + Dict{String,NodeId}(), ctx.mod) res = compile_lambda(_ctx, reparent(_ctx, ex)) setattr!(graph, res._id, var_info=ctx.var_info) _ctx, res diff --git a/JuliaLowering/src/syntax_graph.jl b/JuliaLowering/src/syntax_graph.jl index 2024d59bcf16a..969514e033f72 100644 --- a/JuliaLowering/src/syntax_graph.jl +++ b/JuliaLowering/src/syntax_graph.jl @@ -418,6 +418,7 @@ function _value_string(ex) k == K"Symbol" ? ":$(ex.name_val)" : k == K"globalref" ? "$(ex.mod).$(ex.name_val)" : k == K"slot" ? "slot" : + k == K"symbolic_label" ? "label:$(ex.name_val)" : repr(get(ex, :value, nothing)) id = get(ex, :var_id, nothing) if isnothing(id) diff --git a/JuliaLowering/src/utils.jl b/JuliaLowering/src/utils.jl index 0fcb5d0a5cbaf..7ede7a3e8e8b7 100644 --- a/JuliaLowering/src/utils.jl +++ b/JuliaLowering/src/utils.jl @@ -67,3 +67,13 @@ function showprov(x; kws...) showprov(stdout, x; kws...) end +function print_ir(io::IO, ex) + @assert kind(ex) == K"lambda" && kind(ex[1]) == K"block" + stmts = children(ex[1]) + for (i, e) in enumerate(stmts) + lno = rpad(i, 3) + code = string(e) # rpad(string(e), 50) + println(io, lno, " ", code) + end +end + diff --git a/JuliaLowering/test/demo.jl b/JuliaLowering/test/demo.jl index ccd6a4a17a1a3..9083e19a33577 100644 --- a/JuliaLowering/test/demo.jl +++ b/JuliaLowering/test/demo.jl @@ -231,6 +231,19 @@ begin end """ +src = """ +begin + local i = 0 + while i < 10 + i = i + 1 + if isodd(i) + continue + end + println(i) + end +end +""" + ex = parsestmt(SyntaxTree, src, filename="foo.jl") ex = ensure_attributes(ex, var_id=Int) #ex = softscope_test(ex) diff --git a/JuliaLowering/test/loops.jl b/JuliaLowering/test/loops.jl new file mode 100644 index 0000000000000..deb18fd6d1638 --- /dev/null +++ b/JuliaLowering/test/loops.jl @@ -0,0 +1,58 @@ + +@testset "loops" begin + +test_mod = Module() + +@test JuliaLowering.include_string(test_mod, """ +let + a = [] + i = 0 + while i < 5 + i = i + 1 + push!(a, i) + end + a +end +""") == [1,2,3,4,5] + +@test JuliaLowering.include_string(test_mod, """ +let + a = [] + i = 0 + while i < 5 + i = i + 1 + if i == 3 + break + end + push!(a, i) + end + a +end +""") == [1,2] + +@test JuliaLowering.include_string(test_mod, """ +let + a = [] + i = 0 + while i < 5 + i = i + 1 + if isodd(i) + continue + end + push!(a, i) + end + a +end +""") == [2,4] + +@test_throws JuliaLowering.LoweringError JuliaLowering.include_string(test_mod, """ +break +""") + +@test_throws JuliaLowering.LoweringError JuliaLowering.include_string(test_mod, """ +continue +""") + +# TODO: Test scope rules + +end diff --git a/JuliaLowering/test/runtests.jl b/JuliaLowering/test/runtests.jl index 641f5925fbf13..4b5d583e841e4 100644 --- a/JuliaLowering/test/runtests.jl +++ b/JuliaLowering/test/runtests.jl @@ -405,5 +405,6 @@ end """) include("branching.jl") +include("loops.jl") end diff --git a/JuliaLowering/test/utils.jl b/JuliaLowering/test/utils.jl index c209db5268752..cb62309b52574 100644 --- a/JuliaLowering/test/utils.jl +++ b/JuliaLowering/test/utils.jl @@ -1,3 +1,5 @@ +using Test + using JuliaLowering: SyntaxGraph, newnode!, ensure_attributes!, Kind, SourceRef, SyntaxTree, NodeId, From 24201e93f662a586435d380934cd97cd481af2bd Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Wed, 26 Jun 2024 17:14:12 +1000 Subject: [PATCH 0768/1109] Test system for testing IR New system for testing generated IR based on pretty printing the IR and comparing to a reference string. String comparison is an excellent way to do IR comparisons because * It's very simple to implement * Writing test cases is simple * It's easy to pinpoint errors in tests * Irrelevant detail can easily be omitted in the pretty printing The main downside is that, in principle, the pretty printing may have ambiguities, and changing the pretty printing causes high churn in test cases. However these seem like reasonable tradeoffs, given the benefits. --- JuliaLowering/test/branching.jl | 85 +---------------------------- JuliaLowering/test/branching_ir.jl | 88 ++++++++++++++++++++++++++++++ JuliaLowering/test/loops.jl | 2 + JuliaLowering/test/utils.jl | 37 +++++++++++-- 4 files changed, 124 insertions(+), 88 deletions(-) create mode 100644 JuliaLowering/test/branching_ir.jl diff --git a/JuliaLowering/test/branching.jl b/JuliaLowering/test/branching.jl index 5db9d74e32eb7..755927fdde8a3 100644 --- a/JuliaLowering/test/branching.jl +++ b/JuliaLowering/test/branching.jl @@ -290,90 +290,7 @@ end #------------------------------------------------------------------------------- @testset "Detailed lowering tests" begin -@test ir_as_text(test_mod, """ -begin - local a, b - if a - b - end -end -""") == """ -slot.₁/a -(gotoifnot ssa.₁ label.₅) -slot.₂/b -(return ssa.₃) -core.nothing -(return ssa.₅)""" - -@test ir_as_text(test_mod, """ -begin - local a, b, c - if a - b - end - c -end -""") == """ -slot.₁/a -(gotoifnot ssa.₁ label.₄) -slot.₂/b -slot.₃/c -(return ssa.₄)""" - -@test ir_as_text(test_mod, """ -begin - local a, b, c - if a - b - else - c - end -end -""") == """ -slot.₁/a -(gotoifnot ssa.₁ label.₅) -slot.₂/b -(return ssa.₃) -slot.₃/c -(return ssa.₅)""" - -@test ir_as_text(test_mod, """ -begin - local a, b, c, d - if a - b - else - c - end - d -end -""") == """ -slot.₁/a -(gotoifnot ssa.₁ label.₅) -slot.₂/b -(goto label.₆) -slot.₃/c -slot.₄/d -(return ssa.₆)""" - -# Blocks compile directly to branches -@test ir_as_text(test_mod, """ -begin - local a, b, c, d - if (a; b && c) - d - end -end -""") == """ -slot.₁/a -slot.₂/b -(gotoifnot ssa.₂ label.₈) -slot.₃/c -(gotoifnot ssa.₄ label.₈) -slot.₄/d -(return ssa.₆) -core.nothing -(return ssa.₈)""" +test_ir_cases(joinpath(@__DIR__,"branching_ir.jl")) end diff --git a/JuliaLowering/test/branching_ir.jl b/JuliaLowering/test/branching_ir.jl new file mode 100644 index 0000000000000..5d78d1e423422 --- /dev/null +++ b/JuliaLowering/test/branching_ir.jl @@ -0,0 +1,88 @@ +###################################### +# Basic branching tail && value +begin + local a, b + if a + b + end +end +#------------------------- +1 slot.₁/a +2 (gotoifnot ssa.₁ label.₅) +3 slot.₂/b +4 (return ssa.₃) +5 core.nothing +6 (return ssa.₅) + +###################################### +# Branching, !tail && !value +begin + local a, b, c + if a + b + end + c +end +#------------------------- +1 slot.₁/a +2 (gotoifnot ssa.₁ label.₄) +3 slot.₂/b +4 slot.₃/c +5 (return ssa.₄) + +###################################### +# Branching with else +begin + local a, b, c + if a + b + else + c + end +end +#--------------------- +1 slot.₁/a +2 (gotoifnot ssa.₁ label.₅) +3 slot.₂/b +4 (return ssa.₃) +5 slot.₃/c +6 (return ssa.₅) + +###################################### +# Branching with else, !tail && !value +begin + local a, b, c, d + if a + b + else + c + end + d +end +#--------------------- +1 slot.₁/a +2 (gotoifnot ssa.₁ label.₅) +3 slot.₂/b +4 (goto label.₆) +5 slot.₃/c +6 slot.₄/d +7 (return ssa.₆) + +###################################### +# Blocks compile directly to branches +begin + local a, b, c, d + if (a; b && c) + d + end +end +#--------------------- +1 slot.₁/a +2 slot.₂/b +3 (gotoifnot ssa.₂ label.₈) +4 slot.₃/c +5 (gotoifnot ssa.₄ label.₈) +6 slot.₄/d +7 (return ssa.₆) +8 core.nothing +9 (return ssa.₈) diff --git a/JuliaLowering/test/loops.jl b/JuliaLowering/test/loops.jl index deb18fd6d1638..cfb04c46c4bb4 100644 --- a/JuliaLowering/test/loops.jl +++ b/JuliaLowering/test/loops.jl @@ -53,6 +53,8 @@ break continue """) +test_ir_cases(joinpath(@__DIR__, "loops_ir.jl")) + # TODO: Test scope rules end diff --git a/JuliaLowering/test/utils.jl b/JuliaLowering/test/utils.jl index cb62309b52574..292043ce928dd 100644 --- a/JuliaLowering/test/utils.jl +++ b/JuliaLowering/test/utils.jl @@ -85,8 +85,37 @@ format_as_ast_macro(ex) = format_as_ast_macro(stdout, ex) #------------------------------------------------------------------------------- -# Parse and lower `src`, and print statements from the linear IR in text format -function ir_as_text(mod, src) - ex = JuliaLowering.lower(mod, parsestmt(SyntaxTree, src)) - join(string.(children(ex[1])), "\n") +# Test tools + +function match_ir_test_case(case_str) + m = match(r"# *([^\n]*)\n((?:.|\n)*)#----*\n((?:.|\n)*)"m, strip(case_str)) + if isnothing(m) + error("Malformatted IR test case:\n$(repr(case_str))") + end + (name=strip(m[1]), input=strip(m[2]), output=strip(m[3])) +end + +function format_ir_test_case(mod, input) + ex = parsestmt(SyntaxTree, input) + x = JuliaLowering.lower(mod, ex) + output = strip(sprint(JuliaLowering.print_ir, x)) + output = replace(output, string(mod)=>"TestMod") +end + +function test_ir_cases(filename) + str = read(filename, String) + cases = [match_ir_test_case(s) for s in split(str, r"####*") if strip(s) != ""] + + mod = Module(:TestMod) + for (name,input,ref) in cases + output = format_ir_test_case(mod, input) + @testset "$name" begin + if output != ref + # Do our own error dumping, as @test will + @error "Test \"$name\" failed" output=Text(output) ref=Text(ref) + end + @test output == ref + end + end end + From 6ea0d4b8349690d173dc5a1b4ee42a1b31bbe02f Mon Sep 17 00:00:00 2001 From: Lilith Orion Hafner Date: Tue, 2 Jul 2024 11:16:09 -0500 Subject: [PATCH 0769/1109] Fix corpus tests on Julia 1.10+ (JuliaLang/JuliaSyntax.jl#447) --- JuliaSyntax/test/parse_packages.jl | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/JuliaSyntax/test/parse_packages.jl b/JuliaSyntax/test/parse_packages.jl index c02eb3a0d6ded..cec8939d32dee 100644 --- a/JuliaSyntax/test/parse_packages.jl +++ b/JuliaSyntax/test/parse_packages.jl @@ -39,9 +39,17 @@ base_tests_path = joinpath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "test") return nothing end - if endswith(f, "core.jl") - # Loose comparison due to `for f() = 1:3` syntax - return exprs_roughly_equal + if endswith(f, "core.jl") + # The test + # @test Union{Tuple{T}, Tuple{T,Int}} where {T} === widen_diagonal(Union{Tuple{T}, Tuple{T,Int}} where {T}) + # depends on a JuliaSyntax bugfix and parses differently (wrong) using + # flisp. This was added in julia#52228 and backported in julia#52045 + if v"1.10.0-rc1.39" <= VERSION + return nothing + else + # Loose comparison due to `for f() = 1:3` syntax + return exprs_roughly_equal + end end return exprs_equal_no_linenum From f17e97a1b5efad87dd1efaae1f715da6c7874c48 Mon Sep 17 00:00:00 2001 From: Lilith Orion Hafner Date: Tue, 2 Jul 2024 12:38:37 -0500 Subject: [PATCH 0770/1109] Fix tests (JuliaLang/JuliaSyntax.jl#448) * Don't assume (io::IOBuffer).data is a Vector on new versions of Julia * Exclude test/subtype.jl from corpus due to new dependence on where precedence (JuliaLang/JuliaSyntax.jl#395) --- JuliaSyntax/src/parse_stream.jl | 3 ++- JuliaSyntax/test/parse_packages.jl | 13 +++++++++---- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index 412d02b4c3be1..0c80ef9de94cf 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -314,7 +314,8 @@ function ParseStream(text::AbstractString, index::Integer=1; version=VERSION) end # IO-based cases -function ParseStream(io::IOBuffer; version=VERSION) +# TODO: switch ParseStream to use a Memory internally on newer versions of Julia +VERSION < v"1.11.0-DEV.753" && function ParseStream(io::IOBuffer; version=VERSION) ParseStream(io.data, io, position(io)+1, version) end function ParseStream(io::Base.GenericIOBuffer; version=VERSION) diff --git a/JuliaSyntax/test/parse_packages.jl b/JuliaSyntax/test/parse_packages.jl index cec8939d32dee..26bf39fa4ca6a 100644 --- a/JuliaSyntax/test/parse_packages.jl +++ b/JuliaSyntax/test/parse_packages.jl @@ -9,10 +9,10 @@ end end base_path = let - p = joinpath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "base") + p = joinpath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "base") if !isdir(p) # For julia 1.9 images. - p = joinpath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "src", "base") + p = joinpath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "src", "base") if !isdir(p) error("source for Julia base not found") end @@ -39,10 +39,10 @@ base_tests_path = joinpath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "test") return nothing end - if endswith(f, "core.jl") + if endswith(f, "core.jl") # The test # @test Union{Tuple{T}, Tuple{T,Int}} where {T} === widen_diagonal(Union{Tuple{T}, Tuple{T,Int}} where {T}) - # depends on a JuliaSyntax bugfix and parses differently (wrong) using + # depends on a JuliaSyntax bugfix and parses differently (wrong) using # flisp. This was added in julia#52228 and backported in julia#52045 if v"1.10.0-rc1.39" <= VERSION return nothing @@ -52,6 +52,11 @@ base_tests_path = joinpath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "test") end end + # subtype.jl also depends on the where precedence JuliaSyntax bugfix as of julia#53034 + if endswith(f, "subtype.jl") && v"1.11.0-DEV.1382" <= VERSION + return nothing + end + return exprs_equal_no_linenum end end From 8ab0c25d8ac3f97a466a8198b540d6c8c4d57b16 Mon Sep 17 00:00:00 2001 From: Lilith Orion Hafner Date: Tue, 2 Jul 2024 12:49:12 -0500 Subject: [PATCH 0771/1109] Run CI on 1.9 and prerelease (JuliaLang/JuliaSyntax.jl#445) * Run CI on 1.9 and prerelease * Bump setup-julia to v2 --- JuliaSyntax/.github/workflows/CI.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/JuliaSyntax/.github/workflows/CI.yml b/JuliaSyntax/.github/workflows/CI.yml index 985f9603f3387..cd97e6c241442 100644 --- a/JuliaSyntax/.github/workflows/CI.yml +++ b/JuliaSyntax/.github/workflows/CI.yml @@ -23,7 +23,9 @@ jobs: - '1.6' - '1.7' - '1.8' + - '1.9' - '1' + - 'pre' - 'nightly' os: - ubuntu-latest @@ -65,7 +67,7 @@ jobs: - {os: 'ubuntu-latest', version: '1.8', arch: 'x86'} steps: - uses: actions/checkout@v2 - - uses: julia-actions/setup-julia@v1 + - uses: julia-actions/setup-julia@v2 with: version: ${{ matrix.version }} arch: ${{ matrix.arch }} @@ -92,7 +94,7 @@ jobs: fail-fast: false steps: - uses: actions/checkout@v2 - - uses: julia-actions/setup-julia@v1 + - uses: julia-actions/setup-julia@v2 with: version: 1.6 arch: x64 From b66c0e51fc5f9f1ebd0691775f490e932fbfd4e6 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Wed, 3 Jul 2024 11:13:02 +1000 Subject: [PATCH 0772/1109] More systemantic treatment of bindings Introduce `Bindings` and `BindingInfo` structs, as better system and naming to replace the `var_info` dict and `next_var_id` ref. Introduce a new kind `K"BindingId"` - an integer handle for an entity (variable, constant, etc). Normally these are generated from `NameKey` (`name,scope_layer`) pairs written in the user's program. But they can also be generated programmatically by the `ssavar()` function, or by `new_mutable_var()`. This should allow proper "gensym" bindings from macros, without needing to rely on the generation of unique names via name mangling. In this system, `SSAValue` is just a particular kind of binding, relegated to a low level IR concept which has invariants which can, in principle, be checked or inferred by the variable analysis pass. --- JuliaLowering/src/ast.jl | 73 ++++++++++++--- JuliaLowering/src/desugaring.jl | 8 +- JuliaLowering/src/eval.jl | 18 ++-- JuliaLowering/src/kinds.jl | 2 + JuliaLowering/src/linear_ir.jl | 79 +++++++++------- JuliaLowering/src/macro_expansion.jl | 12 +-- JuliaLowering/src/scope_analysis.jl | 130 ++++++++++++--------------- JuliaLowering/src/syntax_graph.jl | 11 ++- JuliaLowering/test/demo.jl | 12 +-- JuliaLowering/test/utils.jl | 2 +- 10 files changed, 197 insertions(+), 150 deletions(-) diff --git a/JuliaLowering/src/ast.jl b/JuliaLowering/src/ast.jl index 233d2a79861b1..f3c2acd530c44 100644 --- a/JuliaLowering/src/ast.jl +++ b/JuliaLowering/src/ast.jl @@ -2,9 +2,54 @@ abstract type AbstractLoweringContext end """ -Unique symbolic identity for a variable +Unique symbolic identity for a variable, constant, label, or other entity """ -const VarId = Int +const IdTag = Int + +""" +Metadata about a binding +""" +struct BindingInfo + name::String + mod::Union{Nothing,Module} # Set when `kind === :global` + kind::Symbol # :local :global :argument :static_parameter + is_ssa::Bool # Single assignment, defined before use + is_ambiguous_local::Bool # Local, but would be global in soft scope (ie, the REPL) +end + +""" +Metadata about "entities" (variables, constants, etc) in the program. Each +entity is associated to a unique integer id, the BindingId. A binding will be +inferred for each *name* in the user's source program by symbolic analysis of +the source. + +However, bindings can also be introduced programmatically during lowering or +macro expansion: the primary key for bindings is the `BindingId` integer, not +a name. +""" +struct Bindings + info::Vector{BindingInfo} +end + +Bindings() = Bindings(Vector{BindingInfo}()) + +function new_binding(bindings::Bindings, info::BindingInfo) + push!(bindings.info, info) + return length(bindings.info) +end + +function lookup_binding(bindings::Bindings, id::Integer) + bindings.info[id] +end + +function lookup_binding(bindings::Bindings, ex::SyntaxTree) + # TODO: @assert kind(ex) == K"BindingId" + bindings.info[ex.var_id] +end + +function lookup_binding(ctx::AbstractLoweringContext, id) + lookup_binding(ctx.bindings, id) +end const LayerId = Int @@ -12,12 +57,6 @@ function syntax_graph(ctx::AbstractLoweringContext) ctx.graph end -function new_var_id(ctx::AbstractLoweringContext) - id = ctx.next_var_id[] - ctx.next_var_id[] += 1 - return id -end - #------------------------------------------------------------------------------- # AST creation utilities _node_id(ex::NodeId) = ex @@ -67,7 +106,7 @@ function makeleaf(ctx, srcref, k::Kind, value; kws...) graph = syntax_graph(ctx) if k == K"Identifier" || k == K"core" || k == K"top" || k == K"Symbol" || k == K"globalref" makeleaf(graph, srcref, k; name_val=value, kws...) - elseif k == K"SSAValue" + elseif k == K"BindingId" makeleaf(graph, srcref, k; var_id=value, kws...) elseif k == K"label" makeleaf(graph, srcref, k; id=value, kws...) @@ -104,9 +143,21 @@ unused(ctx, ex) = core_ref(ctx, ex, "UNUSED") top_ref(ctx, ex, name) = makeleaf(ctx, ex, K"top", name) -# Create a new SSA variable +# Create a new SSA binding function ssavar(ctx::AbstractLoweringContext, srcref) - makeleaf(ctx, srcref, K"SSAValue", var_id=new_var_id(ctx)) + # TODO: Store this name in only one place? Probably use the provenance chain? + name = "ssa" + id = new_binding(ctx.bindings, BindingInfo(name, nothing, :local, true, false)) + # Create an identifier + nameref = makeleaf(ctx, srcref, K"Identifier", name_val=name) + makeleaf(ctx, nameref, K"BindingId", var_id=id) +end + +# Create a new local mutable variable +function new_mutable_var(ctx::AbstractLoweringContext, srcref, name) + id = new_binding(ctx, BindingInfo(name, nothing, :local, false, false)) + nameref = makeleaf(ctx, srcref, K"Identifier", name_val=name) + makeleaf(ctx, nameref, K"BindingId", var_id=id) end # Assign `ex` to an SSA variable. diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 45e57d7920601..09f54919e15b1 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -10,7 +10,7 @@ end struct DesugaringContext{GraphType} <: AbstractLoweringContext graph::GraphType - next_var_id::Ref{VarId} + bindings::Bindings scope_layers::Vector{ScopeLayer} mod::Module end @@ -21,9 +21,9 @@ function DesugaringContext(ctx) source=SourceAttrType, value=Any, name_val=String, scope_type=Symbol, # :hard or :soft - var_id=VarId, + var_id=IdTag, lambda_info=LambdaInfo) - DesugaringContext(graph, ctx.next_var_id, ctx.scope_layers, ctx.current_layer.mod) + DesugaringContext(graph, ctx.bindings, ctx.scope_layers, ctx.current_layer.mod) end # Flatten nested && or || nodes and expand their children @@ -691,7 +691,7 @@ function expand_forms_2(ctx::DesugaringContext, ex::SyntaxTree, docs=nothing) else if k == K"=" @chk numchildren(ex) == 2 - if kind(ex[1]) ∉ KSet"Identifier Placeholder SSAValue" + if kind(ex[1]) ∉ KSet"Identifier Placeholder BindingId" TODO(ex, "destructuring assignment") end end diff --git a/JuliaLowering/src/eval.jl b/JuliaLowering/src/eval.jl index 57ff9293599c5..e99da70e34dd0 100644 --- a/JuliaLowering/src/eval.jl +++ b/JuliaLowering/src/eval.jl @@ -98,9 +98,9 @@ end # Convert SyntaxTree to the CodeInfo+Expr data stuctures understood by the # Julia runtime -function to_code_info(ex, mod, funcname, nargs, var_info, slot_rewrites) +function to_code_info(ex, mod, funcname, nargs, bindings, slot_rewrites) input_code = children(ex) - code = Any[to_lowered_expr(mod, var_info, ex) for ex in input_code] + code = Any[to_lowered_expr(mod, bindings, ex) for ex in input_code] debuginfo = ir_debug_info(ex) @@ -115,7 +115,7 @@ function to_code_info(ex, mod, funcname, nargs, var_info, slot_rewrites) slot_rename_inds = Dict{String,Int}() slotflags = Vector{UInt8}(undef, nslots) for (id,i) in slot_rewrites - info = var_info[id] + info = lookup_binding(bindings, id) name = info.name ni = get(slot_rename_inds, name, 0) slot_rename_inds[name] = ni + 1 @@ -176,7 +176,7 @@ function to_code_info(ex, mod, funcname, nargs, var_info, slot_rewrites) ) end -function to_lowered_expr(mod, var_info, ex) +function to_lowered_expr(mod, bindings, ex) k = kind(ex) if is_literal(k) || k == K"Bool" ex.value @@ -203,7 +203,7 @@ function to_lowered_expr(mod, var_info, ex) elseif k == K"SSAValue" Core.SSAValue(ex.var_id) elseif k == K"return" - Core.ReturnNode(to_lowered_expr(mod, var_info, ex[1])) + Core.ReturnNode(to_lowered_expr(mod, bindings, ex[1])) elseif is_quoted(k) if k == K"inert" ex[1] @@ -215,7 +215,7 @@ function to_lowered_expr(mod, var_info, ex) "top-level scope" : "none" # FIXME nargs = length(ex.lambda_info.args) - ir = to_code_info(ex[1], mod, funcname, nargs, var_info, ex.slot_rewrites) + ir = to_code_info(ex[1], mod, funcname, nargs, bindings, ex.slot_rewrites) if ex.lambda_info.is_toplevel_thunk Expr(:thunk, ir) else @@ -226,7 +226,7 @@ function to_lowered_expr(mod, var_info, ex) elseif k == K"goto" Core.GotoNode(ex[1].id) elseif k == K"gotoifnot" - Core.GotoIfNot(to_lowered_expr(mod, var_info, ex[1]), ex[2].id) + Core.GotoIfNot(to_lowered_expr(mod, bindings, ex[1]), ex[2].id) else # Allowed forms according to https://docs.julialang.org/en/v1/devdocs/ast/ # @@ -243,7 +243,7 @@ function to_lowered_expr(mod, var_info, ex) if isnothing(head) TODO(ex, "Unhandled form for kind $k") end - Expr(head, map(e->to_lowered_expr(mod, var_info, e), children(ex))...) + Expr(head, map(e->to_lowered_expr(mod, bindings, e), children(ex))...) end end @@ -259,7 +259,7 @@ function Core.eval(mod::Module, ex::SyntaxTree) return x end linear_ir = lower(mod, ex) - expr_form = to_lowered_expr(mod, linear_ir.var_info, linear_ir) + expr_form = to_lowered_expr(mod, linear_ir.bindings, linear_ir) eval(mod, expr_form) end diff --git a/JuliaLowering/src/kinds.jl b/JuliaLowering/src/kinds.jl index 88a70e3fe22ff..393e175586d0b 100644 --- a/JuliaLowering/src/kinds.jl +++ b/JuliaLowering/src/kinds.jl @@ -24,6 +24,8 @@ function _insert_kinds() "loopinfo" # Identifier for a value which is only assigned once "SSAValue" + # Unique identifying integer for bindings (of variables, constants, etc) + "BindingId" # Scope expressions `(hygienic_scope ex s)` mean `ex` should be # interpreted as being in scope `s`. "hygienic_scope" diff --git a/JuliaLowering/src/linear_ir.jl b/JuliaLowering/src/linear_ir.jl index 2ed0b5448ec5a..d21a98463b51d 100644 --- a/JuliaLowering/src/linear_ir.jl +++ b/JuliaLowering/src/linear_ir.jl @@ -22,6 +22,10 @@ function is_valid_ir_argument(ex) #k == K"core" || k == K"slot" || k = K"static_parameter") end +function is_ssa(ctx, ex) + kind(ex) == K"BindingId" && lookup_binding(ctx, ex).is_ssa +end + """ Context for creating linear IR. @@ -31,19 +35,18 @@ linear IR. struct LinearIRContext{GraphType} <: AbstractLoweringContext graph::GraphType code::SyntaxList{GraphType, Vector{NodeId}} - next_var_id::Ref{Int} + bindings::Bindings next_label_id::Ref{Int} is_toplevel_thunk::Bool - lambda_locals::Set{VarId} + lambda_locals::Set{IdTag} return_type::Union{Nothing,NodeId} - var_info::Dict{VarId,VarInfo} break_labels::Dict{String, NodeId} mod::Module end function LinearIRContext(ctx, is_toplevel_thunk, lambda_locals, return_type) - LinearIRContext(ctx.graph, SyntaxList(ctx.graph), ctx.next_var_id, Ref(0), - is_toplevel_thunk, lambda_locals, return_type, ctx.var_info, + LinearIRContext(ctx.graph, SyntaxList(ctx.graph), ctx.bindings, Ref(0), + is_toplevel_thunk, lambda_locals, return_type, Dict{String,NodeId}(), ctx.mod) end @@ -75,8 +78,8 @@ function is_const_read_arg(ctx, ex) k == K"quote" || k == K"inert" || k == K"top" || k == K"core" end -function is_valid_ir_rvalue(lhs, rhs) - return kind(lhs) == K"SSAValue" || +function is_valid_ir_rvalue(ctx, lhs, rhs) + return is_ssa(ctx, lhs) || is_valid_ir_argument(rhs) || (kind(lhs) == K"Identifier" && # FIXME: add: splatnew isdefined invoke cfunction gc_preserve_begin copyast new_opaque_closure globalref outerref @@ -112,9 +115,7 @@ end # Emit computation of ex, assigning the result to an ssavar and returning that function emit_assign_tmp(ctx::LinearIRContext, ex) - # TODO: We could replace this with an index into the code array right away? - tmp = makeleaf(ctx, ex, K"SSAValue", var_id=ctx.next_var_id[]) - ctx.next_var_id[] += 1 + tmp = ssavar(ctx, ex) emit(ctx, ex, K"=", tmp, ex) return tmp end @@ -135,7 +136,7 @@ end function emit_assignment(ctx, srcref, lhs, rhs) if !isnothing(rhs) - if is_valid_ir_rvalue(lhs, rhs) + if is_valid_ir_rvalue(ctx, lhs, rhs) emit(ctx, srcref, K"=", lhs, rhs) else r = emit_assign_tmp(ctx, rhs) @@ -215,11 +216,13 @@ function compile_conditional(ctx, ex, false_label) end end -function new_mutable_var(ctx, srcref, name) - id = new_var_id(ctx) - ctx.var_info[id] = VarInfo(name, nothing, :local, false, false) +function new_mutable_var(ctx::LinearIRContext, srcref, name) + # TODO: Deduplicate this somehow with generic new_mutable_var? + id = new_binding(ctx.bindings, BindingInfo(name, nothing, :local, false, false)) + nameref = makeleaf(ctx, srcref, K"Identifier", name_val=name) + var = makeleaf(ctx, nameref, K"BindingId", var_id=id) push!(ctx.lambda_locals, id) - makeleaf(ctx, srcref, K"Identifier", name, var_id=id) + var end # This pass behaves like an interpreter on the given code. @@ -230,7 +233,7 @@ end # TODO: Is it ok to return `nothing` if we have no value in some sense? function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) k = kind(ex) - if k == K"Identifier" || is_literal(k) || k == K"SSAValue" || k == K"quote" || k == K"inert" || + if k == K"Identifier" || is_literal(k) || k == K"BindingId" || k == K"quote" || k == K"inert" || k == K"top" || k == K"core" || k == K"Value" || k == K"Symbol" || k == K"Placeholder" || k == K"Bool" # TODO: other kinds: copyast the_exception $ globalref outerref thismodule cdecl stdcall fastcall thiscall llvmcall @@ -439,7 +442,7 @@ function _renumber(ctx, ssa_rewrites, slot_rewrites, label_table, ex) makeleaf(ctx, ex, K"slot"; var_id=slot_id) else # TODO: look up any static parameters - info = ctx.var_info[id] + info = lookup_binding(ctx, id) if info.kind === :global makeleaf(ctx, ex, K"globalref", ex.name_val, mod=info.mod) else @@ -450,8 +453,17 @@ function _renumber(ctx, ssa_rewrites, slot_rewrites, label_table, ex) TODO(ex, "_renumber $k") elseif is_literal(k) || is_quoted(k) || k == K"global" ex - elseif k == K"SSAValue" - makeleaf(ctx, ex, K"SSAValue"; var_id=ssa_rewrites[ex.var_id]) + elseif k == K"BindingId" + # TODO: This case should replace K"Identifier" completely. For now only + # SSA variables go through here. Instead, we should branch on ssa_rewrites. + id = ex.var_id + if haskey(ssa_rewrites, id) + makeleaf(ctx, ex, K"SSAValue"; var_id=ssa_rewrites[id]) + else + slot_id = get(slot_rewrites, id, nothing) + @assert !isnothing(slot_id) + makeleaf(ctx, ex, K"slot"; var_id=slot_id) + end elseif k == K"enter" TODO(ex, "_renumber $k") elseif k == K"goto" @@ -477,15 +489,15 @@ end # flisp: renumber-lambda, compact-ir function renumber_body(ctx, input_code, slot_rewrites) # Step 1: Remove any assignments to SSA variables, record the indices of labels - ssa_rewrites = Dict{VarId,VarId}() + ssa_rewrites = Dict{IdTag,IdTag}() label_table = Dict{Int,Int}() code = SyntaxList(ctx) for ex in input_code k = kind(ex) ex_out = nothing - if k == K"=" && kind(ex[1]) == K"SSAValue" + if k == K"=" && is_ssa(ctx, ex[1]) lhs_id = ex[1].var_id - if kind(ex[2]) == K"SSAValue" + if is_ssa(ctx, ex[2]) # For SSA₁ = SSA₂, record that all uses of SSA₁ should be replaced by SSA₂ ssa_rewrites[lhs_id] = ssa_rewrites[ex[2].var_id] else @@ -519,10 +531,10 @@ function compile_body(ctx, ex) # TODO: Filter out any newvar nodes where the arg is definitely initialized end -function _add_slots!(slot_rewrites, var_info, var_ids) +function _add_slots!(slot_rewrites, bindings, ids) n = length(slot_rewrites) + 1 - for id in var_ids - info = var_info[id] + for id in ids + info = lookup_binding(bindings, id) if info.kind == :local || info.kind == :argument slot_rewrites[id] = n n += 1 @@ -537,9 +549,10 @@ function compile_lambda(outer_ctx, ex) # TODO: Add assignments for reassigned arguments to body using lambda_info.args ctx = LinearIRContext(outer_ctx, lambda_info.is_toplevel_thunk, ex.lambda_locals, return_type) compile_body(ctx, ex[1]) - slot_rewrites = Dict{VarId,Int}() - _add_slots!(slot_rewrites, ctx.var_info, (arg.var_id for arg in lambda_info.args)) - _add_slots!(slot_rewrites, ctx.var_info, sort(collect(ex.lambda_locals))) + slot_rewrites = Dict{IdTag,Int}() + _add_slots!(slot_rewrites, ctx.bindings, (arg.var_id for arg in lambda_info.args)) + # Sorting the lambda locals is required to remove dependence on Dict iteration order. + _add_slots!(slot_rewrites, ctx.bindings, sort(collect(ex.lambda_locals))) # @info "" @ast ctx ex [K"block" ctx.code] code = renumber_body(ctx, ctx.code, slot_rewrites) makenode(ctx, ex, K"lambda", @@ -551,17 +564,17 @@ end function linearize_ir(ctx, ex) graph = ensure_attributes(ctx.graph, - slot_rewrites=Dict{VarId,Int}, - var_info=Dict{VarId,VarInfo}, + slot_rewrites=Dict{IdTag,Int}, + bindings=Bindings, mod=Module, id=Int) # TODO: Cleanup needed - `_ctx` is just a dummy context here. But currently # required to call reparent() ... - _ctx = LinearIRContext(graph, SyntaxList(graph), ctx.next_var_id, - Ref(0), false, Set{VarId}(), nothing, ctx.var_info, + _ctx = LinearIRContext(graph, SyntaxList(graph), ctx.bindings, + Ref(0), false, Set{IdTag}(), nothing, Dict{String,NodeId}(), ctx.mod) res = compile_lambda(_ctx, reparent(_ctx, ex)) - setattr!(graph, res._id, var_info=ctx.var_info) + setattr!(graph, res._id, bindings=ctx.bindings) _ctx, res end diff --git a/JuliaLowering/src/macro_expansion.jl b/JuliaLowering/src/macro_expansion.jl index b0d004d858adb..89477584b4956 100644 --- a/JuliaLowering/src/macro_expansion.jl +++ b/JuliaLowering/src/macro_expansion.jl @@ -16,7 +16,7 @@ end struct MacroExpansionContext{GraphType} <: AbstractLoweringContext graph::GraphType - next_var_id::Ref{VarId} + bindings::Bindings scope_layers::Vector{ScopeLayer} current_layer::ScopeLayer end @@ -121,7 +121,7 @@ function eval_macro_name(ctx, ex) ctx3, ex3 = resolve_scopes!(ctx2, ex2) ctx4, ex4 = linearize_ir(ctx3, ex3) mod = ctx.current_layer.mod - expr_form = to_lowered_expr(mod, ex4.var_info, ex4) + expr_form = to_lowered_expr(mod, ex4.bindings, ex4) eval(mod, expr_form) end @@ -162,7 +162,7 @@ function expand_macro(ctx, ex) expanded = append_sourceref(ctx, expanded, ex) new_layer = ScopeLayer(length(ctx.scope_layers)+1, parentmodule(macfunc), true) push!(ctx.scope_layers, new_layer) - inner_ctx = MacroExpansionContext(ctx.graph, ctx.next_var_id, ctx.scope_layers, new_layer) + inner_ctx = MacroExpansionContext(ctx.graph, ctx.bindings, ctx.scope_layers, new_layer) expanded = expand_forms_1(inner_ctx, expanded) else expanded = @ast ctx ex expanded::K"Value" @@ -229,16 +229,16 @@ end function expand_forms_1(mod::Module, ex::SyntaxTree) graph = ensure_attributes(syntax_graph(ex), - var_id=VarId, + var_id=IdTag, scope_layer=LayerId, __macro_ctx__=Nothing) layers = ScopeLayer[ScopeLayer(1, mod, false)] - ctx = MacroExpansionContext(graph, Ref{VarId}(1), layers, layers[1]) + ctx = MacroExpansionContext(graph, Bindings(), layers, layers[1]) ex2 = expand_forms_1(ctx, reparent(ctx, ex)) graph2 = delete_attributes(graph, :__macro_ctx__) # TODO: Returning the context with pass-specific mutable data is a bad way # to carry state into the next pass. - ctx2 = MacroExpansionContext(graph2, ctx.next_var_id, ctx.scope_layers, + ctx2 = MacroExpansionContext(graph2, ctx.bindings, ctx.scope_layers, ctx.current_layer) return ctx2, reparent(ctx2, ex2) end diff --git a/JuliaLowering/src/scope_analysis.jl b/JuliaLowering/src/scope_analysis.jl index d2d10dc750cec..8cc1e15d90184 100644 --- a/JuliaLowering/src/scope_analysis.jl +++ b/JuliaLowering/src/scope_analysis.jl @@ -47,24 +47,31 @@ # return nothing # end +""" +Key to use when transforming names into bindings +""" +struct NameKey + name::String + layer::LayerId +end #------------------------------------------------------------------------------- function _find_scope_vars!(assignments, locals, globals, used_names, ex) k = kind(ex) if k == K"Identifier" - push!(used_names, VarKey(ex)) + push!(used_names, NameKey(ex)) elseif !haschildren(ex) || is_quoted(k) || k in KSet"scope_block lambda module toplevel" return elseif k == K"local" || k == K"local_def" - get!(locals, VarKey(ex[1]), ex) + get!(locals, NameKey(ex[1]), ex) elseif k == K"global" - get!(globals, VarKey(ex[1]), ex) + get!(globals, NameKey(ex[1]), ex) # elseif k == K"method" TODO static parameters elseif k == K"=" v = decl_var(ex[1]) - if !(kind(v) in KSet"SSAValue globalref outerref Placeholder") - get!(assignments, VarKey(v), v) + if !(kind(v) in KSet"BindingId globalref outerref Placeholder") + get!(assignments, NameKey(v), v) end _find_scope_vars!(assignments, locals, globals, used_names, ex[2]) else @@ -80,10 +87,10 @@ end # NB: This only works propery after desugaring has already processed assignments function find_scope_vars(ex) ExT = typeof(ex) - assignments = Dict{VarKey,ExT}() - locals = Dict{VarKey,ExT}() - globals = Dict{VarKey,ExT}() - used_names = Set{VarKey}() + assignments = Dict{NameKey,ExT}() + locals = Dict{NameKey,ExT}() + globals = Dict{NameKey,ExT}() + used_names = Set{NameKey}() for e in children(ex) _find_scope_vars!(assignments, locals, globals, used_names, e) end @@ -97,15 +104,7 @@ function find_scope_vars(ex) return assignments, locals, globals, used_names end -""" -Key to use when looking up variables, composed of the name and scope layer. -""" -struct VarKey - name::String - layer::LayerId -end - -function Base.isless(a::VarKey, b::VarKey) +function Base.isless(a::NameKey, b::NameKey) (a.name, a.layer) < (b.name, b.layer) end @@ -116,20 +115,9 @@ end # - create additional layers, though this may be unnecessary const _lowering_internal_layer = -1 -function VarKey(ex::SyntaxTree) +function NameKey(ex::SyntaxTree) @chk kind(ex) == K"Identifier" - VarKey(ex.name_val, get(ex, :scope_layer, _lowering_internal_layer)) -end - -""" -Metadata about a variable name - whether it's a local, etc -""" -struct VarInfo - name::String - mod::Union{Nothing,Module} - kind::Symbol # :local :global :argument :static_parameter - is_single_assign::Bool # Single assignment - is_ambiguous_local::Bool # Local, but would be global in soft scope (ie, the REPL) + NameKey(ex.name_val, get(ex, :scope_layer, _lowering_internal_layer)) end struct ScopeInfo @@ -141,69 +129,63 @@ struct ScopeInfo is_hard::Bool # Map from variable names to IDs which appear in this scope but not in the # parent scope - var_ids::Dict{VarKey,VarId} + # TODO: Rename to `locals` or local_bindings? + var_ids::Dict{NameKey,IdTag} # Variables used by the enclosing lambda - lambda_locals::Set{VarId} + lambda_locals::Set{IdTag} end struct ScopeResolutionContext{GraphType} <: AbstractLoweringContext graph::GraphType - next_var_id::Ref{VarId} + bindings::Bindings mod::Module scope_layers::Vector{ScopeLayer} # name=>id mappings for all discovered global vars - global_vars::Dict{VarKey,VarId} + global_vars::Dict{NameKey,IdTag} # Stack of name=>id mappings for each scope, innermost scope last. scope_stack::Vector{ScopeInfo} - # Metadata about variables. There's only one map for this, as var_id is is - # unique across the context, even for same-named vars in unrelated local - # scopes. - var_info::Dict{VarId,VarInfo} # Variables which were implicitly global due to being assigned to in top # level code - implicit_toplevel_globals::Set{VarKey} + implicit_toplevel_globals::Set{NameKey} end function ScopeResolutionContext(ctx) - graph = ensure_attributes(ctx.graph, lambda_locals=Set{VarId}) + graph = ensure_attributes(ctx.graph, lambda_locals=Set{IdTag}) ScopeResolutionContext(graph, - ctx.next_var_id, + ctx.bindings, ctx.mod, ctx.scope_layers, - Dict{VarKey,VarId}(), + Dict{NameKey,IdTag}(), Vector{ScopeInfo}(), - Dict{VarId,VarInfo}(), - Set{VarKey}()) + Set{NameKey}()) end -function lookup_var(ctx, varkey::VarKey, exclude_toplevel_globals=false) +function lookup_var(ctx, varkey::NameKey, exclude_toplevel_globals=false) for i in lastindex(ctx.scope_stack):-1:1 ids = ctx.scope_stack[i].var_ids id = get(ids, varkey, nothing) if !isnothing(id) && (!exclude_toplevel_globals || - i > 1 || ctx.var_info[id].kind != :global) + i > 1 || lookup_binding(ctx, id).kind != :global) return id end end return exclude_toplevel_globals ? nothing : get(ctx.global_vars, varkey, nothing) end -function var_kind(ctx, id::VarId) - ctx.var_info[id].kind +function var_kind(ctx, id::IdTag) + lookup_binding(ctx, id).kind end -function var_kind(ctx, varkey::VarKey, exclude_toplevel_globals=false) +function var_kind(ctx, varkey::NameKey, exclude_toplevel_globals=false) id = lookup_var(ctx, varkey, exclude_toplevel_globals) - isnothing(id) ? nothing : ctx.var_info[id].kind + isnothing(id) ? nothing : lookup_binding(ctx, id).kind end -# FIXME: This name is a misnomer now. It's more like "maybe_new_var" ... -function new_var(ctx, varkey::VarKey, kind::Symbol, is_ambiguous_local=false) +function init_binding(ctx, varkey::NameKey, kind::Symbol, is_ambiguous_local=false) id = kind === :global ? get(ctx.global_vars, varkey, nothing) : nothing if isnothing(id) - id = new_var_id(ctx) mod = kind === :global ? ctx.scope_layers[varkey.layer].mod : nothing - ctx.var_info[id] = VarInfo(varkey.name, mod, kind, false, is_ambiguous_local) + id = new_binding(ctx.bindings, BindingInfo(varkey.name, mod, kind, false, is_ambiguous_local)) end if kind === :global ctx.global_vars[varkey] = id @@ -212,7 +194,7 @@ function new_var(ctx, varkey::VarKey, kind::Symbol, is_ambiguous_local=false) end # Analyze identifier usage within a scope, adding all newly discovered -# identifiers to ctx.var_info and constructing a lookup table from identifier +# identifiers to ctx.bindings and constructing a lookup table from identifier # names to their variable IDs function analyze_scope(ctx, ex, scope_type, lambda_info) parentscope = isempty(ctx.scope_stack) ? nothing : ctx.scope_stack[end] @@ -224,17 +206,17 @@ function analyze_scope(ctx, ex, scope_type, lambda_info) # Create new lookup table for variables in this scope which differ from the # parent scope. - var_ids = Dict{VarKey,VarId}() + var_ids = Dict{NameKey,IdTag}() # Add lambda arguments if !isnothing(lambda_info) for a in lambda_info.args - varkey = VarKey(a) - var_ids[varkey] = new_var(ctx, varkey, :argument) + varkey = NameKey(a) + var_ids[varkey] = init_binding(ctx, varkey, :argument) end for a in lambda_info.static_parameters - varkey = VarKey(a) - var_ids[varkey] = new_var(ctx, varkey, :static_parameter) + varkey = NameKey(a) + var_ids[varkey] = init_binding(ctx, varkey, :static_parameter) end end @@ -244,7 +226,7 @@ function analyze_scope(ctx, ex, scope_type, lambda_info) if varkey in global_keys throw(LoweringError(e, "Variable `$(varkey.name)` declared both local and global")) elseif haskey(var_ids, varkey) - vk = ctx.var_info[var_ids[varkey]].kind + vk = lookup_binding(ctx, var_ids[varkey]).kind if vk === :argument && is_outer_lambda_scope throw(LoweringError(e, "local variable name `$(varkey.name)` conflicts with an argument")) elseif vk === :static_parameter @@ -253,13 +235,13 @@ function analyze_scope(ctx, ex, scope_type, lambda_info) elseif var_kind(ctx, varkey) === :static_parameter throw(LoweringError(e, "local variable name `$(varkey.name)` conflicts with a static parameter")) end - var_ids[varkey] = new_var(ctx, varkey, :local) + var_ids[varkey] = init_binding(ctx, varkey, :local) end # Add explicit globals for (varkey,e) in globals if haskey(var_ids, varkey) - vk = ctx.var_info[var_ids[varkey]].kind + vk = lookup_binding(ctx, var_ids[varkey]).kind if vk === :argument && is_outer_lambda_scope throw(LoweringError(e, "global variable name `$(varkey.name)` conflicts with an argument")) elseif vk === :static_parameter @@ -268,7 +250,7 @@ function analyze_scope(ctx, ex, scope_type, lambda_info) elseif var_kind(ctx, varkey) === :static_parameter throw(LoweringError(e, "global variable name `$(varkey.name)` conflicts with a static parameter")) end - var_ids[varkey] = new_var(ctx, varkey, :global) + var_ids[varkey] = init_binding(ctx, varkey, :global) end # Compute implicit locals and globals @@ -280,13 +262,13 @@ function analyze_scope(ctx, ex, scope_type, lambda_info) # a macro expansion for (varkey,e) in assignments vk = haskey(var_ids, varkey) ? - ctx.var_info[var_ids[varkey]].kind : + lookup_binding(ctx, var_ids[varkey]).kind : var_kind(ctx, varkey, true) if vk === nothing if ctx.scope_layers[varkey.layer].is_macro_expansion - var_ids[varkey] = new_var(ctx, varkey, :local) + var_ids[varkey] = init_binding(ctx, varkey, :local) else - new_var(ctx, varkey, :global) + init_binding(ctx, varkey, :global) push!(ctx.implicit_toplevel_globals, varkey) end end @@ -299,7 +281,7 @@ function analyze_scope(ctx, ex, scope_type, lambda_info) # Outside top level code, most assignments create local variables implicitly for (varkey,e) in assignments vk = haskey(var_ids, varkey) ? - ctx.var_info[var_ids[varkey]].kind : + lookup_binding(ctx, var_ids[varkey]).kind : var_kind(ctx, varkey, true) if vk === :static_parameter throw(LoweringError(e, "local variable name `$(varkey.name)` conflicts with a static parameter")) @@ -317,7 +299,7 @@ function analyze_scope(ctx, ex, scope_type, lambda_info) # like assignments to locals do inside a function. if is_soft_scope # Soft scope (eg, for loop in REPL) => treat as a global - new_var(ctx, varkey, :global) + init_binding(ctx, varkey, :global) continue else # Ambiguous case (eg, nontrivial scopes in package top level code) @@ -326,18 +308,18 @@ function analyze_scope(ctx, ex, scope_type, lambda_info) end end end - var_ids[varkey] = new_var(ctx, varkey, :local, is_ambiguous_local) + var_ids[varkey] = init_binding(ctx, varkey, :local, is_ambiguous_local) end end for varkey in used if lookup_var(ctx, varkey) === nothing # Add other newly discovered identifiers as globals - new_var(ctx, varkey, :global) + init_binding(ctx, varkey, :global) end end - lambda_locals = is_outer_lambda_scope ? Set{VarId}() : parentscope.lambda_locals + lambda_locals = is_outer_lambda_scope ? Set{IdTag}() : parentscope.lambda_locals for id in values(var_ids) vk = var_kind(ctx, id) if vk === :local @@ -351,7 +333,7 @@ end function _resolve_scopes!(ctx, ex) k = kind(ex) if k == K"Identifier" - id = lookup_var(ctx, VarKey(ex)) + id = lookup_var(ctx, NameKey(ex)) setattr!(ctx.graph, ex._id, var_id=id) elseif !haschildren(ex) || is_quoted(ex) || k == K"toplevel" return diff --git a/JuliaLowering/src/syntax_graph.jl b/JuliaLowering/src/syntax_graph.jl index 969514e033f72..91d5fc9f4544e 100644 --- a/JuliaLowering/src/syntax_graph.jl +++ b/JuliaLowering/src/syntax_graph.jl @@ -412,6 +412,7 @@ function _value_string(ex) str = k == K"Identifier" || k == K"MacroName" || is_operator(k) ? ex.name_val : k == K"Placeholder" ? ex.name_val : k == K"SSAValue" ? "ssa" : + k == K"BindingId" ? "bnd" : k == K"label" ? "label" : k == K"core" ? "core.$(ex.name_val)" : k == K"top" ? "top.$(ex.name_val)" : @@ -430,10 +431,12 @@ function _value_string(ex) "5"=>"₅", "6"=>"₆", "7"=>"₇", "8"=>"₈", "9"=>"₉") str = "$(str).$idstr" end - if k == K"slot" - # TODO: Ideally shouldn't need to rewrap the id here... - srcex = SyntaxTree(ex._graph, ex.source) - str = "$(str)/$(srcex.name_val)" + if k == K"slot" || k == K"BindingId" + p = provenance(ex)[1] + while kind(p) != K"Identifier" + p = provenance(p)[1] + end + str = "$(str)/$(p.name_val)" end return str end diff --git a/JuliaLowering/test/demo.jl b/JuliaLowering/test/demo.jl index 9083e19a33577..0aa5837f897b3 100644 --- a/JuliaLowering/test/demo.jl +++ b/JuliaLowering/test/demo.jl @@ -8,16 +8,12 @@ using JuliaLowering: SyntaxGraph, SyntaxTree, ensure_attributes!, ensure_attribu using JuliaSyntaxFormatter # Extract variable kind for highlighting purposes -function var_kind(e) - id = get(e, :var_id, nothing) +function var_kind(ex) + id = get(ex, :var_id, nothing) if isnothing(id) return nothing end - info = get(ctx3.var_info, id, nothing) - if isnothing(info) - return nothing - end - return info.kind + return lookup_binding(ctx3, id).kind end function formatsrc(ex; kws...) @@ -263,7 +259,7 @@ ctx3, ex_scoped = JuliaLowering.resolve_scopes!(ctx2, ex_desugar) ctx4, ex_compiled = JuliaLowering.linearize_ir(ctx3, ex_scoped) @info "Linear IR" ex_compiled formatsrc(ex_compiled, color_by=:var_id) -ex_expr = JuliaLowering.to_lowered_expr(in_mod, ctx4.var_info, ex_compiled) +ex_expr = JuliaLowering.to_lowered_expr(in_mod, ctx4.bindings, ex_compiled) @info "CodeInfo" ex_expr eval_result = Base.eval(in_mod, ex_expr) diff --git a/JuliaLowering/test/utils.jl b/JuliaLowering/test/utils.jl index 292043ce928dd..405e472255e01 100644 --- a/JuliaLowering/test/utils.jl +++ b/JuliaLowering/test/utils.jl @@ -61,7 +61,7 @@ function _format_as_ast_macro(io, ex, indent) else val_str = if k == K"Identifier" || k == K"core" || k == K"top" repr(ex.name_val) - elseif k == K"SSAValue" + elseif k == K"BindingId" repr(ex.var_id) else repr(get(ex, :value, nothing)) From 0f611bfa995aa0ae454a9c7921f9ceb12b87c81f Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Thu, 4 Jul 2024 18:52:43 +1000 Subject: [PATCH 0773/1109] Lowering of some simple `for` loops This is just enough to get the most basic for loops working. There's lots still to do here - cartesian iteration, proper tuple destructuring in assignment etc. --- JuliaLowering/src/ast.jl | 9 +-- JuliaLowering/src/desugaring.jl | 120 ++++++++++++++++++++++++++-- JuliaLowering/src/linear_ir.jl | 10 +++ JuliaLowering/src/scope_analysis.jl | 22 +++-- JuliaLowering/src/syntax_graph.jl | 4 +- JuliaLowering/test/demo.jl | 6 ++ 6 files changed, 152 insertions(+), 19 deletions(-) diff --git a/JuliaLowering/src/ast.jl b/JuliaLowering/src/ast.jl index f3c2acd530c44..b0c278b6f9ced 100644 --- a/JuliaLowering/src/ast.jl +++ b/JuliaLowering/src/ast.jl @@ -144,9 +144,8 @@ unused(ctx, ex) = core_ref(ctx, ex, "UNUSED") top_ref(ctx, ex, name) = makeleaf(ctx, ex, K"top", name) # Create a new SSA binding -function ssavar(ctx::AbstractLoweringContext, srcref) +function ssavar(ctx::AbstractLoweringContext, srcref, name="tmp") # TODO: Store this name in only one place? Probably use the provenance chain? - name = "ssa" id = new_binding(ctx.bindings, BindingInfo(name, nothing, :local, true, false)) # Create an identifier nameref = makeleaf(ctx, srcref, K"Identifier", name_val=name) @@ -155,15 +154,15 @@ end # Create a new local mutable variable function new_mutable_var(ctx::AbstractLoweringContext, srcref, name) - id = new_binding(ctx, BindingInfo(name, nothing, :local, false, false)) + id = new_binding(ctx.bindings, BindingInfo(name, nothing, :local, false, false)) nameref = makeleaf(ctx, srcref, K"Identifier", name_val=name) makeleaf(ctx, nameref, K"BindingId", var_id=id) end # Assign `ex` to an SSA variable. # Return (variable, assignment_node) -function assign_tmp(ctx::AbstractLoweringContext, ex) - var = ssavar(ctx, ex) +function assign_tmp(ctx::AbstractLoweringContext, ex, name="tmp") + var = ssavar(ctx, ex, name) assign_var = makenode(ctx, ex, K"=", var, ex) var, assign_var end diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 09f54919e15b1..7b52050508c65 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -26,6 +26,63 @@ function DesugaringContext(ctx) DesugaringContext(graph, ctx.bindings, ctx.scope_layers, ctx.current_layer.mod) end +function is_identifier_like(ex) + k = kind(ex) + k == K"Identifier" || k == K"BindingId" || k == K"Placeholder" +end + +is_assignment(ex) = kind(ex) == K"=" + +function has_parameters(ex) + numchildren(ex) >= 1 && kind(ex[end]) == K"parameters" +end + +# Create an assignment `$lhs = $rhs` where `lhs` must be "simple". If `rhs` is +# a block, sink the assignment into the last statement of the block to keep +# more expressions at top level. `rhs` should already be expanded. +# +# flisp: sink-assignment +function simple_assignment(ctx, assign_srcref, lhs, rhs) + @assert is_identifier_like(lhs) + if kind(rhs) == K"block" + @ast ctx assign_srcref [K"block" + rhs[1:end-1]... + [K"=" lhs rhs[end]] + ] + else + @ast ctx assign_srcref [K"=" lhs rhs] + end +end + +function expand_tuple_destruct(ctx, ex) + lhs = ex[1] + @assert kind(lhs) == K"tuple" + rhs = expand_forms_2(ctx, ex[2]) + + # FIXME: This is specialized to only the form produced by lowering of `for`. + @assert numchildren(lhs) == 2 && all(is_identifier_like, children(lhs)) + @ast ctx ex [K"block" + r = rhs + [K"=" lhs[1] [K"call" "getindex"::K"top" r 1::K"Integer"]] + [K"=" lhs[2] [K"call" "getindex"::K"top" r 2::K"Integer"]] + ] +end + +function expand_assignment(ctx, ex) + @chk numchildren(ex) == 2 + lhs = ex[1] + rhs = ex[2] + kl = kind(lhs) + if is_identifier_like(lhs) + simple_assignment(ctx, ex, lhs, expand_forms_2(ctx, rhs)) + elseif kl == K"tuple" + # TODO: has_parameters + expand_tuple_destruct(ctx, ex) + else + TODO(ex) + end +end + # Flatten nested && or || nodes and expand their children function expand_cond_children(ctx, ex, cond_kind=kind(ex), flat_children=SyntaxList(ctx)) for e in children(ex) @@ -148,6 +205,59 @@ function expand_call(ctx, ex) end end +function expand_for(ctx, ex) + iterspec = ex[1] + + iter_var = iterspec[1] + iter_ex = iterspec[2] + + # TODO: multiple iteration variables + @assert is_identifier_like(iter_var) + + next = new_mutable_var(ctx, iterspec, "next") + state = ssavar(ctx, iterspec, "state") + collection = ssavar(ctx, iter_ex, "collection") + + # Inner body + inner_body = ex[2] + body = @ast ctx inner_body [K"block" + [K"=" [K"tuple" iter_var state] next] + inner_body + ] + body = @ast ctx inner_body [K"break_block" + "loop_cont"::K"symbolic_label" + [K"let"(scope_type=:neutral) + [K"block" + # TODO: copied-vars + ] + body + ] + ] + + # Nearly all this machinery is lowering of the iteration specification, so + # most gets attributed to `iterspec`. + loop = @ast ctx ex [K"block" + [K"="(iter_ex) collection iter_ex] + # next = top.iterate(collection) + [K"="(iterspec) next [K"call" "iterate"::K"top" collection]] + # TODO if outer require-existing-local + [K"if"(iterspec) # if next !== nothing + [K"call"(iterspec) "not_int"::K"top" [K"call" "==="::K"core" next "nothing"::K"core"]] + [K"_do_while"(ex) + [K"block" + body + [K"="(iterspec) next [K"call" "iterate"::K"top" collection state]] + ] + [K"call"(iterspec) "not_int"::K"top" [K"call" "==="::K"core" next "nothing"::K"core"]] + ] + ] + ] + + @ast ctx ex [K"break_block" "loop_exit"::K"symbolic_label" + loop + ] +end + # Strip variable type declarations from within a `local` or `global`, returning # the stripped expression. Works recursively with complex left hand side # assignments containing tuple destructuring. Eg, given @@ -595,6 +705,8 @@ function expand_forms_2(ctx::DesugaringContext, ex::SyntaxTree, docs=nothing) else @ast ctx ex [K"if" cond true::K"Bool" cs[end]] end + elseif k == K"=" + expand_assignment(ctx, ex) elseif k == K"break" numchildren(ex) > 0 ? ex : @ast ctx ex [K"break" "loop_exit"::K"symbolic_label"] @@ -603,6 +715,8 @@ function expand_forms_2(ctx::DesugaringContext, ex::SyntaxTree, docs=nothing) elseif k == K"doc" @chk numchildren(ex) == 2 sig = expand_forms_2(ctx, ex[2], ex) + elseif k == K"for" + expand_forms_2(ctx, expand_for(ctx, ex)) elseif k == K"function" expand_forms_2(ctx, expand_function_def(ctx, ex, docs)) elseif k == K"macro" @@ -689,12 +803,6 @@ function expand_forms_2(ctx::DesugaringContext, ex::SyntaxTree, docs=nothing) elseif !haschildren(ex) ex else - if k == K"=" - @chk numchildren(ex) == 2 - if kind(ex[1]) ∉ KSet"Identifier Placeholder BindingId" - TODO(ex, "destructuring assignment") - end - end mapchildren(e->expand_forms_2(ctx,e), ctx, ex) end end diff --git a/JuliaLowering/src/linear_ir.jl b/JuliaLowering/src/linear_ir.jl index d21a98463b51d..a85767c94d350 100644 --- a/JuliaLowering/src/linear_ir.jl +++ b/JuliaLowering/src/linear_ir.jl @@ -415,6 +415,16 @@ function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) if needs_value compile(ctx, nothing_(ctx, ex), needs_value, in_tail_pos) end + elseif k == K"_do_while" + end_label = make_label(ctx, ex) + top_label = emit_label(ctx, ex) + compile(ctx, ex[1], false, false) + compile_conditional(ctx, ex[2], end_label) + emit(ctx, @ast ctx ex [K"goto" top_label]) + emit(ctx, end_label) + if needs_value + compile(ctx, nothing_(ctx, ex), needs_value, in_tail_pos) + end elseif k == K"global" if needs_value throw(LoweringError(ex, "misplaced `global` declaration")) diff --git a/JuliaLowering/src/scope_analysis.jl b/JuliaLowering/src/scope_analysis.jl index 8cc1e15d90184..55cc10f4a717f 100644 --- a/JuliaLowering/src/scope_analysis.jl +++ b/JuliaLowering/src/scope_analysis.jl @@ -56,10 +56,12 @@ struct NameKey end #------------------------------------------------------------------------------- -function _find_scope_vars!(assignments, locals, globals, used_names, ex) +function _find_scope_vars!(assignments, locals, globals, used_names, used_bindings, ex) k = kind(ex) if k == K"Identifier" push!(used_names, NameKey(ex)) + elseif k == K"BindingId" + push!(used_bindings, ex.var_id) elseif !haschildren(ex) || is_quoted(k) || k in KSet"scope_block lambda module toplevel" return @@ -73,10 +75,10 @@ function _find_scope_vars!(assignments, locals, globals, used_names, ex) if !(kind(v) in KSet"BindingId globalref outerref Placeholder") get!(assignments, NameKey(v), v) end - _find_scope_vars!(assignments, locals, globals, used_names, ex[2]) + _find_scope_vars!(assignments, locals, globals, used_names, used_bindings, ex[2]) else for e in children(ex) - _find_scope_vars!(assignments, locals, globals, used_names, e) + _find_scope_vars!(assignments, locals, globals, used_names, used_bindings, e) end end end @@ -91,8 +93,9 @@ function find_scope_vars(ex) locals = Dict{NameKey,ExT}() globals = Dict{NameKey,ExT}() used_names = Set{NameKey}() + used_bindings = Set{IdTag}() for e in children(ex) - _find_scope_vars!(assignments, locals, globals, used_names, e) + _find_scope_vars!(assignments, locals, globals, used_names, used_bindings, e) end # Sort by key so that id generation is deterministic @@ -100,8 +103,9 @@ function find_scope_vars(ex) locals = sort(collect(pairs(locals)), by=first) globals = sort(collect(pairs(globals)), by=first) used_names = sort(collect(used_names)) + used_bindings = sort(collect(used_bindings)) - return assignments, locals, globals, used_names + return assignments, locals, globals, used_names, used_bindings end function Base.isless(a::NameKey, b::NameKey) @@ -202,7 +206,7 @@ function analyze_scope(ctx, ex, scope_type, lambda_info) is_toplevel = !isnothing(lambda_info) && lambda_info.is_toplevel_thunk in_toplevel_thunk = is_toplevel || (!is_outer_lambda_scope && parentscope.in_toplevel_thunk) - assignments, locals, globals, used = find_scope_vars(ex) + assignments, locals, globals, used, used_bindings = find_scope_vars(ex) # Create new lookup table for variables in this scope which differ from the # parent scope. @@ -326,6 +330,12 @@ function analyze_scope(ctx, ex, scope_type, lambda_info) push!(lambda_locals, id) end end + for id in used_bindings + info = lookup_binding(ctx, id) + if !info.is_ssa && info.kind == :local + push!(lambda_locals, id) + end + end return ScopeInfo(in_toplevel_thunk, is_soft_scope, is_hard_scope, var_ids, lambda_locals) end diff --git a/JuliaLowering/src/syntax_graph.jl b/JuliaLowering/src/syntax_graph.jl index 91d5fc9f4544e..5b0900441da68 100644 --- a/JuliaLowering/src/syntax_graph.jl +++ b/JuliaLowering/src/syntax_graph.jl @@ -411,8 +411,8 @@ function _value_string(ex) k = kind(ex) str = k == K"Identifier" || k == K"MacroName" || is_operator(k) ? ex.name_val : k == K"Placeholder" ? ex.name_val : - k == K"SSAValue" ? "ssa" : - k == K"BindingId" ? "bnd" : + k == K"SSAValue" ? "%" : + k == K"BindingId" ? "#" : k == K"label" ? "label" : k == K"core" ? "core.$(ex.name_val)" : k == K"top" ? "top.$(ex.name_val)" : diff --git a/JuliaLowering/test/demo.jl b/JuliaLowering/test/demo.jl index 0aa5837f897b3..36f271016e9ef 100644 --- a/JuliaLowering/test/demo.jl +++ b/JuliaLowering/test/demo.jl @@ -240,6 +240,12 @@ begin end """ +src = """ +for i in [3,1,2] + println("i = ", i) +end +""" + ex = parsestmt(SyntaxTree, src, filename="foo.jl") ex = ensure_attributes(ex, var_id=Int) #ex = softscope_test(ex) From 9d4f7fd1f8964da06256f203cdbbe4a5fabe417b Mon Sep 17 00:00:00 2001 From: Lilith Orion Hafner Date: Sat, 6 Jul 2024 11:31:35 -0500 Subject: [PATCH 0774/1109] Hashing fixup, equality support, and serialization support (JuliaLang/JuliaSyntax.jl#452) --- JuliaSyntax/Project.toml | 4 +++- JuliaSyntax/src/green_tree.jl | 1 + JuliaSyntax/src/kinds.jl | 14 +++++++++++++- JuliaSyntax/src/source_files.jl | 6 ++++++ JuliaSyntax/src/syntax_tree.jl | 11 +++++++++++ JuliaSyntax/test/runtests.jl | 1 + JuliaSyntax/test/serialization.jl | 29 +++++++++++++++++++++++++++++ 7 files changed, 64 insertions(+), 2 deletions(-) create mode 100644 JuliaSyntax/test/serialization.jl diff --git a/JuliaSyntax/Project.toml b/JuliaSyntax/Project.toml index 6ffbaa4007738..e4215bd316000 100644 --- a/JuliaSyntax/Project.toml +++ b/JuliaSyntax/Project.toml @@ -4,13 +4,15 @@ authors = ["Claire Foster and contributors"] version = "0.4.6" [compat] +Serialization = "1.0" julia = "1.0" [deps] [extras] Logging = "56ddb016-857b-54e1-b83d-db4d58db5568" +Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["Test", "Logging"] +test = ["Test", "Serialization", "Logging"] diff --git a/JuliaSyntax/src/green_tree.jl b/JuliaSyntax/src/green_tree.jl index 28b3f3fb37518..c4df51634e57e 100644 --- a/JuliaSyntax/src/green_tree.jl +++ b/JuliaSyntax/src/green_tree.jl @@ -38,6 +38,7 @@ head(node::GreenNode) = node.head Base.summary(node::GreenNode) = summary(node.head) +Base.hash(node::GreenNode, h::UInt) = hash((node.head, node.span, node.args), h) function Base.:(==)(n1::GreenNode, n2::GreenNode) n1.head == n2.head && n1.span == n2.span && n1.args == n2.args end diff --git a/JuliaSyntax/src/kinds.jl b/JuliaSyntax/src/kinds.jl index 6de2f26a0cd2f..f6706dd254f50 100644 --- a/JuliaSyntax/src/kinds.jl +++ b/JuliaSyntax/src/kinds.jl @@ -922,7 +922,7 @@ const _kind_names = """ K"name" - Kind(namestr) + Kind(id) `Kind` is a type tag for specifying the type of tokens and interior nodes of a syntax tree. Abstractly, this tag is used to define our own *sum types* for @@ -999,6 +999,18 @@ function Base.show(io::IO, k::Kind) print(io, "K\"$(convert(String, k))\"") end +# Save the string representation rather than the bit pattern so that kinds +# can be serialized and deserialized across different JuliaSyntax versions. +function Base.write(io::IO, k::Kind) + str = convert(String, k) + write(io, UInt8(length(str))) + write(io, str) +end +function Base.read(io::IO, ::Type{Kind}) + len = read(io, UInt8) + str = String(read(io, len)) + convert(Kind, str) +end + #------------------------------------------------------------------------------- """ diff --git a/JuliaSyntax/src/source_files.jl b/JuliaSyntax/src/source_files.jl index a8051a59ba0e6..0ae8f3856d912 100644 --- a/JuliaSyntax/src/source_files.jl +++ b/JuliaSyntax/src/source_files.jl @@ -23,6 +23,12 @@ struct SourceFile line_starts::Vector{Int} end +Base.hash(s::SourceFile, h::UInt) = hash((s.code, s.byte_offset, s.filename, s.first_line, s.line_starts), h) +function Base.:(==)(a::SourceFile, b::SourceFile) + a.code == b.code && a.byte_offset == b.byte_offset && a.filename == b.filename && + a.first_line == b.first_line && a.line_starts == b.line_starts +end + function SourceFile(code::AbstractString; filename=nothing, first_line=1, first_index=1) line_starts = Int[1] diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index 02ef17f483aec..608b9ce4328ff 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -17,6 +17,12 @@ mutable struct TreeNode{NodeData} # ? prevent others from using this with Node end end +# Exclude parent from hash and equality checks. This means that subtrees can compare equal. +Base.hash(node::TreeNode, h::UInt) = hash((node.children, node.data), h) +function Base.:(==)(a::TreeNode{T}, b::TreeNode{T}) where T + a.children == b.children && a.data == b.data +end + # Implement "pass-through" semantics for field access: access fields of `data` # as if they were part of `TreeNode` function Base.getproperty(node::TreeNode, name::Symbol) @@ -44,6 +50,11 @@ struct SyntaxData <: AbstractSyntaxData val::Any end +Base.hash(data::SyntaxData, h::UInt) = hash((data.source, data.raw, data.position, data.val), h) +function Base.:(==)(a::SyntaxData, b::SyntaxData) + a.source == b.source && a.raw == b.raw && a.position == b.position && a.val == b.val +end + """ SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}; keep_parens=false, position::Integer=1) diff --git a/JuliaSyntax/test/runtests.jl b/JuliaSyntax/test/runtests.jl index bf2f93fb9288f..317f993deebb0 100644 --- a/JuliaSyntax/test/runtests.jl +++ b/JuliaSyntax/test/runtests.jl @@ -37,3 +37,4 @@ if VERSION >= v"1.6" include("parse_packages.jl") end +include("serialization.jl") diff --git a/JuliaSyntax/test/serialization.jl b/JuliaSyntax/test/serialization.jl new file mode 100644 index 0000000000000..5d194f0550fd7 --- /dev/null +++ b/JuliaSyntax/test/serialization.jl @@ -0,0 +1,29 @@ +using Serialization + +@testset "Equality $T" for T in [Expr, SyntaxNode, JuliaSyntax.GreenNode] + x = JuliaSyntax.parsestmt(T, "f(x) = x + 2") + y = JuliaSyntax.parsestmt(T, "f(x) = x + 2") + z = JuliaSyntax.parsestmt(T, "f(x) = 2 + x") + @test x == y + @test x != z + @test y != z +end + +@testset "Hashing $T" for T in [Expr, SyntaxNode, JuliaSyntax.GreenNode] + x = hash(JuliaSyntax.parsestmt(T, "f(x) = x + 2"))::UInt + y = hash(JuliaSyntax.parsestmt(T, "f(x) = x + 2"))::UInt + z = hash(JuliaSyntax.parsestmt(T, "f(x) = 2 + x"))::UInt + @test x == y # Correctness + @test x != z # Collision + @test y != z # Collision +end + +@testset "Serialization $T" for T in [Expr, SyntaxNode, JuliaSyntax.GreenNode] + x = JuliaSyntax.parsestmt(T, "f(x) = x + 2") + f = tempname() + open(f, "w") do io + serialize(io, x) + end + y = open(deserialize, f, "r") + @test x == y +end From 0ce0d7f5d009f0cd87cd4a4b0df9b1b69fce7349 Mon Sep 17 00:00:00 2001 From: Lilith Orion Hafner Date: Sat, 6 Jul 2024 18:29:02 -0500 Subject: [PATCH 0775/1109] cut back on CI on 1.9 (JuliaLang/JuliaSyntax.jl#453) --- JuliaSyntax/.github/workflows/CI.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/JuliaSyntax/.github/workflows/CI.yml b/JuliaSyntax/.github/workflows/CI.yml index cd97e6c241442..a08912669451b 100644 --- a/JuliaSyntax/.github/workflows/CI.yml +++ b/JuliaSyntax/.github/workflows/CI.yml @@ -39,6 +39,7 @@ jobs: # - 1.0 # - 1.6 # - 1 + # - pre # - nightly # but remove some configurations from the build matrix to reduce CI time. # See https://github.com/marketplace/actions/setup-julia-environment @@ -49,6 +50,7 @@ jobs: - {os: 'macOS-latest', version: '1.5'} - {os: 'macOS-latest', version: '1.7'} - {os: 'macOS-latest', version: '1.8'} + - {os: 'macOS-latest', version: '1.9'} # MacOS not available on x86 - {os: 'macOS-latest', arch: 'x86'} - {os: 'windows-latest', version: '1.1'} @@ -58,6 +60,7 @@ jobs: - {os: 'windows-latest', version: '1.5'} - {os: 'windows-latest', version: '1.7'} - {os: 'windows-latest', version: '1.8'} + - {os: 'windows-latest', version: '1.9'} - {os: 'ubuntu-latest', version: '1.1', arch: 'x86'} - {os: 'ubuntu-latest', version: '1.2', arch: 'x86'} - {os: 'ubuntu-latest', version: '1.3', arch: 'x86'} @@ -65,6 +68,7 @@ jobs: - {os: 'ubuntu-latest', version: '1.5', arch: 'x86'} - {os: 'ubuntu-latest', version: '1.7', arch: 'x86'} - {os: 'ubuntu-latest', version: '1.8', arch: 'x86'} + - {os: 'ubuntu-latest', version: '1.9', arch: 'x86'} steps: - uses: actions/checkout@v2 - uses: julia-actions/setup-julia@v2 From 1068ec8d1de64a4425a604b5ea1b7edaabd80209 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Mon, 8 Jul 2024 01:24:51 +1000 Subject: [PATCH 0776/1109] Fix tests for IR formatting changes --- JuliaLowering/test/branching_ir.jl | 28 ++++++++++++++-------------- JuliaLowering/test/demo.jl | 8 ++++++++ 2 files changed, 22 insertions(+), 14 deletions(-) diff --git a/JuliaLowering/test/branching_ir.jl b/JuliaLowering/test/branching_ir.jl index 5d78d1e423422..b9824fc27d0a7 100644 --- a/JuliaLowering/test/branching_ir.jl +++ b/JuliaLowering/test/branching_ir.jl @@ -8,11 +8,11 @@ begin end #------------------------- 1 slot.₁/a -2 (gotoifnot ssa.₁ label.₅) +2 (gotoifnot %.₁ label.₅) 3 slot.₂/b -4 (return ssa.₃) +4 (return %.₃) 5 core.nothing -6 (return ssa.₅) +6 (return %.₅) ###################################### # Branching, !tail && !value @@ -25,10 +25,10 @@ begin end #------------------------- 1 slot.₁/a -2 (gotoifnot ssa.₁ label.₄) +2 (gotoifnot %.₁ label.₄) 3 slot.₂/b 4 slot.₃/c -5 (return ssa.₄) +5 (return %.₄) ###################################### # Branching with else @@ -42,11 +42,11 @@ begin end #--------------------- 1 slot.₁/a -2 (gotoifnot ssa.₁ label.₅) +2 (gotoifnot %.₁ label.₅) 3 slot.₂/b -4 (return ssa.₃) +4 (return %.₃) 5 slot.₃/c -6 (return ssa.₅) +6 (return %.₅) ###################################### # Branching with else, !tail && !value @@ -61,12 +61,12 @@ begin end #--------------------- 1 slot.₁/a -2 (gotoifnot ssa.₁ label.₅) +2 (gotoifnot %.₁ label.₅) 3 slot.₂/b 4 (goto label.₆) 5 slot.₃/c 6 slot.₄/d -7 (return ssa.₆) +7 (return %.₆) ###################################### # Blocks compile directly to branches @@ -79,10 +79,10 @@ end #--------------------- 1 slot.₁/a 2 slot.₂/b -3 (gotoifnot ssa.₂ label.₈) +3 (gotoifnot %.₂ label.₈) 4 slot.₃/c -5 (gotoifnot ssa.₄ label.₈) +5 (gotoifnot %.₄ label.₈) 6 slot.₄/d -7 (return ssa.₆) +7 (return %.₆) 8 core.nothing -9 (return ssa.₈) +9 (return %.₈) diff --git a/JuliaLowering/test/demo.jl b/JuliaLowering/test/demo.jl index 36f271016e9ef..863d6dac0dbaa 100644 --- a/JuliaLowering/test/demo.jl +++ b/JuliaLowering/test/demo.jl @@ -246,6 +246,14 @@ for i in [3,1,2] end """ +src = """ +@ccall f()::T +""" + +src = """ +x = :hi +""" + ex = parsestmt(SyntaxTree, src, filename="foo.jl") ex = ensure_attributes(ex, var_id=Int) #ex = softscope_test(ex) From ddbc5957834da639bd50cfd409ffac776fe55a82 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Mon, 8 Jul 2024 01:23:00 +1000 Subject: [PATCH 0777/1109] Make `:x` produce a plain symbol for compatibility --- JuliaLowering/src/macro_expansion.jl | 41 ++++++++++++++++++++++++---- JuliaLowering/test/runtests.jl | 10 +++---- JuliaLowering/test/utils.jl | 8 +++++- 3 files changed, 46 insertions(+), 13 deletions(-) diff --git a/JuliaLowering/src/macro_expansion.jl b/JuliaLowering/src/macro_expansion.jl index 89477584b4956..9b2ebbe09f220 100644 --- a/JuliaLowering/src/macro_expansion.jl +++ b/JuliaLowering/src/macro_expansion.jl @@ -75,10 +75,14 @@ struct MacroExpansionError context::Union{Nothing,MacroContext} ex::SyntaxTree msg::String + position::Symbol end -function MacroExpansionError(ex::SyntaxTree, msg::AbstractString) - MacroExpansionError(nothing, ex, msg) +""" +`position` - the source position relative to the node - may be `:begin` or `:end` or `:all` +""" +function MacroExpansionError(ex::SyntaxTree, msg::AbstractString; position=:all) + MacroExpansionError(nothing, ex, msg, position) end function Base.showerror(io::IO, exc::MacroExpansionError) @@ -89,9 +93,21 @@ function Base.showerror(io::IO, exc::MacroExpansionError) " in module ", ctx.scope_layer.mod) end print(io, ":\n") - # FIXME: + # TODO: Display niceties: + # * Show the full provenance tree somehow, in addition to the primary + # source location we're showing here? + # * What if the expression doesn't arise from a source file? + # * How to deal with highlighting trivia? Could provide a token kind or + # child position within the raw tree? How to abstract this?? src = sourceref(exc.ex) - highlight(io, src.file, first_byte(src):last_byte(src), note=exc.msg) + fb = first_byte(src) + lb = last_byte(src) + pos = exc.position + byterange = pos == :all ? (fb:lb) : + pos == :begin ? (fb:fb-1) : + pos == :end ? (lb+1:lb) : + error("Unknown position $pos") + highlight(io, src.file, byterange, note=exc.msg) end function set_scope_layer(ctx, ex, layer_id, force) @@ -206,7 +222,7 @@ function expand_forms_1(ctx::MacroExpansionContext, ex::SyntaxTree) # FIXME: Move this upstream into JuliaSyntax @ast ctx ex (k == K"true")::K"Bool" elseif k == K"Identifier" || k == K"MacroName" || k == K"StringMacroName" || - (is_operator(k) && !haschildren(ex)) # <- TODO: fix upstream + (is_operator(k) && !haschildren(ex)) # <- TODO: fix upstream: make operator *tokens* into identifiers layerid = get(ex, :scope_layer, ctx.current_layer.id) makeleaf(ctx, ex, ex, kind=K"Identifier", scope_layer=layerid) elseif k == K"var" || k == K"char" || k == K"parens" @@ -215,7 +231,20 @@ function expand_forms_1(ctx::MacroExpansionContext, ex::SyntaxTree) expand_forms_1(ctx, ex[1]) elseif k == K"quote" @chk numchildren(ex) == 1 - expand_forms_1(ctx, expand_quote(ctx, ex[1])) + # TODO: Upstream should set a general flag for detecting parenthesized + # expressions so we don't need to dig into `green_tree` here. Ugh! + plain_symbol = has_flags(ex, JuliaSyntax.COLON_QUOTE) && + kind(ex[1]) == K"Identifier" && + (sr = sourceref(ex); sr isa SourceRef && kind(sr.green_tree[2]) != K"parens") + if plain_symbol + # As a compromise for compatibility, we treat non-parenthesized + # colon quoted identifiers like `:x` as plain Symbol literals + # because these are ubiquitiously used in Julia programs as ad hoc + # enum-like entities rather than pieces of AST. + @ast ctx ex[1] ex[1]=>K"Symbol" + else + expand_forms_1(ctx, expand_quote(ctx, ex[1])) + end elseif k == K"macrocall" expand_macro(ctx, ex) elseif k == K"module" || k == K"toplevel" || k == K"inert" diff --git a/JuliaLowering/test/runtests.jl b/JuliaLowering/test/runtests.jl index 4b5d583e841e4..f427750f90558 100644 --- a/JuliaLowering/test/runtests.jl +++ b/JuliaLowering/test/runtests.jl @@ -1,10 +1,5 @@ using Test -using JuliaLowering -using JuliaSyntax -using JuliaSyntax: sourcetext -using JuliaLowering: @ast, flattened_provenance, showprov - include("utils.jl") @testset "JuliaLowering.jl" begin @@ -211,7 +206,7 @@ end # interpolations at multiple depths ex = JuliaLowering.include_string(test_mod, """ let - args = (:x,:y) + args = (:(x),:(y)) quote x = 1 y = 2 @@ -250,6 +245,9 @@ ex2 = JuliaLowering.eval(test_mod, ex) @test sourcetext(ex2[1][2]) == "x" @test sourcetext(ex2[1][3]) == "y" +@test JuliaLowering.include_string(test_mod, ":x") isa Symbol +@test JuliaLowering.include_string(test_mod, ":(x)") isa SyntaxTree + #------------------------------------------------------------------------------- # Macro expansion diff --git a/JuliaLowering/test/utils.jl b/JuliaLowering/test/utils.jl index 405e472255e01..49e4d30964a8a 100644 --- a/JuliaLowering/test/utils.jl +++ b/JuliaLowering/test/utils.jl @@ -1,10 +1,16 @@ using Test +using JuliaLowering +using JuliaSyntax + +using JuliaSyntax: sourcetext + using JuliaLowering: SyntaxGraph, newnode!, ensure_attributes!, Kind, SourceRef, SyntaxTree, NodeId, makenode, makeleaf, setattr!, sethead!, - haschildren, numchildren, children + haschildren, numchildren, children, + @ast, flattened_provenance, showprov function _ast_test_graph() graph = SyntaxGraph() From 7846ea11646fd0b4faca1a73c224c195b70c1fec Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Mon, 8 Jul 2024 01:38:50 +1000 Subject: [PATCH 0778/1109] Move quoting tests into their own file --- JuliaLowering/test/quoting.jl | 105 +++++++++++++++++++++++++++++++++ JuliaLowering/test/runtests.jl | 102 -------------------------------- 2 files changed, 105 insertions(+), 102 deletions(-) create mode 100644 JuliaLowering/test/quoting.jl diff --git a/JuliaLowering/test/quoting.jl b/JuliaLowering/test/quoting.jl new file mode 100644 index 0000000000000..599db250c3d14 --- /dev/null +++ b/JuliaLowering/test/quoting.jl @@ -0,0 +1,105 @@ +@testset "Syntax quoting & interpolation" begin + +test_mod = Module() + +ex = JuliaLowering.include_string(test_mod, """ +begin + x = 10 + y = :(g(z)) + quote + f(\$(x+1), \$y) + end +end +""") +@test ex ~ @ast_ [K"block" + [K"call" + "f"::K"Identifier" + 11::K"Value" + [K"call" + "g"::K"Identifier" + "z"::K"Identifier" + ] + ] +] +@test sourcetext(ex[1]) == "f(\$(x+1), \$y)" +@test sourcetext(ex[1][2]) == "\$(x+1)" +@test sourcetext.(flattened_provenance(ex[1][3])) == ["\$y", "g(z)"] +@test sprint(io->showprov(io, ex[1][3], tree=true)) == raw""" + (call g z) + ├─ (call g z) + │ └─ @ string:3 + └─ ($ y) + └─ @ string:5 + """ +@test sprint(io->showprov(io, ex[1][3])) == raw""" + begin + x = 10 + y = :(g(z)) + # └──┘ ── in source + quote + f($(x+1), $y) + # @ string:3 + + y = :(g(z)) + quote + f($(x+1), $y) + # └┘ ── interpolated here + end + end + # @ string:5""" + + +# Test expression flags are preserved during interpolation +@test JuliaSyntax.is_infix_op_call(JuliaLowering.include_string(test_mod, """ +let + x = 1 + :(\$x + \$x) +end +""")) + +# interpolations at multiple depths +ex = JuliaLowering.include_string(test_mod, """ +let + args = (:(x),:(y)) + quote + x = 1 + y = 2 + quote + f(\$\$(args...)) + end + end +end +""") +@test ex ~ @ast_ [K"block" + [K"=" + "x"::K"Identifier" + 1::K"Integer" + ] + [K"=" + "y"::K"Identifier" + 2::K"Integer" + ] + [K"quote" + [K"block" + [K"call" + "f"::K"Identifier" + [K"$" + "x"::K"Identifier" + "y"::K"Identifier" + ] + ] + ] + ] +] +@test sourcetext(ex[3][1][1][2]) == "\$\$(args...)" +@test sourcetext(ex[3][1][1][2][1]) == "x" +@test sourcetext(ex[3][1][1][2][2]) == "y" + +ex2 = JuliaLowering.eval(test_mod, ex) +@test sourcetext(ex2[1][2]) == "x" +@test sourcetext(ex2[1][3]) == "y" + +@test JuliaLowering.include_string(test_mod, ":x") isa Symbol +@test JuliaLowering.include_string(test_mod, ":(x)") isa SyntaxTree + +end diff --git a/JuliaLowering/test/runtests.jl b/JuliaLowering/test/runtests.jl index f427750f90558..74284fe0c7e77 100644 --- a/JuliaLowering/test/runtests.jl +++ b/JuliaLowering/test/runtests.jl @@ -146,108 +146,6 @@ end """) @test C.D.f === C.E.f -#------------------------------------------------------------------------------- -# Syntax quoting & interpolation -ex = JuliaLowering.include_string(test_mod, """ -begin - x = 10 - y = :(g(z)) - quote - f(\$(x+1), \$y) - end -end -""") -@test ex ~ @ast_ [K"block" - [K"call" - "f"::K"Identifier" - 11::K"Value" - [K"call" - "g"::K"Identifier" - "z"::K"Identifier" - ] - ] -] -@test sourcetext(ex[1]) == "f(\$(x+1), \$y)" -@test sourcetext(ex[1][2]) == "\$(x+1)" -@test sourcetext.(flattened_provenance(ex[1][3])) == ["\$y", "g(z)"] -@test sprint(io->showprov(io, ex[1][3], tree=true)) == raw""" - (call g z) - ├─ (call g z) - │ └─ @ string:3 - └─ ($ y) - └─ @ string:5 - """ -@test sprint(io->showprov(io, ex[1][3])) == raw""" - begin - x = 10 - y = :(g(z)) - # └──┘ ── in source - quote - f($(x+1), $y) - # @ string:3 - - y = :(g(z)) - quote - f($(x+1), $y) - # └┘ ── interpolated here - end - end - # @ string:5""" - - -# Test expression flags are preserved during interpolation -@test JuliaSyntax.is_infix_op_call(JuliaLowering.include_string(test_mod, """ -let - x = 1 - :(\$x + \$x) -end -""")) - -# interpolations at multiple depths -ex = JuliaLowering.include_string(test_mod, """ -let - args = (:(x),:(y)) - quote - x = 1 - y = 2 - quote - f(\$\$(args...)) - end - end -end -""") -@test ex ~ @ast_ [K"block" - [K"=" - "x"::K"Identifier" - 1::K"Integer" - ] - [K"=" - "y"::K"Identifier" - 2::K"Integer" - ] - [K"quote" - [K"block" - [K"call" - "f"::K"Identifier" - [K"$" - "x"::K"Identifier" - "y"::K"Identifier" - ] - ] - ] - ] -] -@test sourcetext(ex[3][1][1][2]) == "\$\$(args...)" -@test sourcetext(ex[3][1][1][2][1]) == "x" -@test sourcetext(ex[3][1][1][2][2]) == "y" - -ex2 = JuliaLowering.eval(test_mod, ex) -@test sourcetext(ex2[1][2]) == "x" -@test sourcetext(ex2[1][3]) == "y" - -@test JuliaLowering.include_string(test_mod, ":x") isa Symbol -@test JuliaLowering.include_string(test_mod, ":(x)") isa SyntaxTree - #------------------------------------------------------------------------------- # Macro expansion From 3e208a7b383eadf18118528de94e12787b4eeb5d Mon Sep 17 00:00:00 2001 From: Fredrik Ekre Date: Tue, 9 Jul 2024 08:28:25 +0200 Subject: [PATCH 0779/1109] Fix tokenization of emitting comments followed by `\r\n` (JuliaLang/JuliaSyntax.jl#455) Before this patch the `\r` character would be part of the comment and not the following NewlineWs (which would simply be a `\n`-style NewlineWs instead of a `\r\n` as one would expect). --- JuliaSyntax/src/tokenize.jl | 4 ++-- JuliaSyntax/test/tokenize.jl | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/JuliaSyntax/src/tokenize.jl b/JuliaSyntax/src/tokenize.jl index 9c19c04008d2d..26563d6f444ec 100644 --- a/JuliaSyntax/src/tokenize.jl +++ b/JuliaSyntax/src/tokenize.jl @@ -747,8 +747,8 @@ function lex_comment(l::Lexer) if peekchar(l) != '=' valid = true while true - pc = peekchar(l) - if pc == '\n' || pc == EOF_CHAR + pc, ppc = dpeekchar(l) + if pc == '\n' || (pc == '\r' && ppc == '\n') || pc == EOF_CHAR return emit(l, valid ? K"Comment" : K"ErrorInvalidUTF8") end valid &= isvalid(pc) diff --git a/JuliaSyntax/test/tokenize.jl b/JuliaSyntax/test/tokenize.jl index 26ab044a617e4..1c525a99a0f8c 100644 --- a/JuliaSyntax/test/tokenize.jl +++ b/JuliaSyntax/test/tokenize.jl @@ -221,6 +221,8 @@ end @test toks("#= #= =#") == ["#= #= =#"=>K"ErrorEofMultiComment"] @test toks("#=#==#=#") == ["#=#==#=#"=>K"Comment"] @test toks("#=#==#=") == ["#=#==#="=>K"ErrorEofMultiComment"] + # comment terminated by \r\n + @test toks("#\r\n") == ["#" => K"Comment", "\r\n" => K"NewlineWs"] end From 32a28134623731d34a2ccf365842c12b45bfe9bb Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Thu, 11 Jul 2024 11:34:24 +0200 Subject: [PATCH 0780/1109] Fix macro expansion error propagation --- JuliaLowering/src/macro_expansion.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/JuliaLowering/src/macro_expansion.jl b/JuliaLowering/src/macro_expansion.jl index 9b2ebbe09f220..be40fa9bbc5b1 100644 --- a/JuliaLowering/src/macro_expansion.jl +++ b/JuliaLowering/src/macro_expansion.jl @@ -162,9 +162,9 @@ function expand_macro(ctx, ex) if exc isa MacroExpansionError # Add context to the error. # TODO: Using rethrow() is kinda ugh. Is there a way to avoid it? - rethrow(MacroExpansionError(mctx, ex, exc.msg)) + rethrow(MacroExpansionError(mctx, exc.ex, exc.msg, exc.position)) else - throw(MacroExpansionError(mctx, ex, "Error expanding macro")) + throw(MacroExpansionError(mctx, ex, "Error expanding macro", :all)) end end From ee484939c55315422d8ea604a8cf4ea1999afd88 Mon Sep 17 00:00:00 2001 From: David Anthoff Date: Thu, 11 Jul 2024 14:07:34 +0200 Subject: [PATCH 0781/1109] Add the VS Code settings file to .gitignore (JuliaLang/JuliaSyntax.jl#459) --- JuliaSyntax/.gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/JuliaSyntax/.gitignore b/JuliaSyntax/.gitignore index 4681ba2d4e249..8a934c09e33c3 100644 --- a/JuliaSyntax/.gitignore +++ b/JuliaSyntax/.gitignore @@ -4,3 +4,4 @@ /tools/logs.txt /docs/build *.cov +/.vscode/settings.json From 3ac335ae0908ae9cf528d7f1b468c24bc938aadc Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Tue, 16 Jul 2024 11:40:44 +0200 Subject: [PATCH 0782/1109] Fix parsing of `-1::Int` - signed literals with type assertions (JuliaLang/JuliaSyntax.jl#462) This syntax is normally not something you'd use, but it's very useful for `ccall`. Previously the parser just crashed on this kind of input - a bug which is also present in the old parser. --- JuliaSyntax/src/parser.jl | 1 + JuliaSyntax/test/parser.jl | 1 + 2 files changed, 2 insertions(+) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 34666b59cf58b..9e02928eb35d8 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -1211,6 +1211,7 @@ function parse_unary(ps::ParseState) # -2*x ==> (call-i -2 * x) # +0xff ==> 0xff bump_glue(ps, kind(t2), EMPTY_FLAGS) + parse_factor_with_initial_ex(ps, mark) end return end diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 3ac3ed36719a8..1e4baa665508e 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -212,6 +212,7 @@ tests = [ "-0b10010" => "(call-pre - 0x12)" "-0o22" => "(call-pre - 0x12)" "-0x12" => "(call-pre - 0x12)" + "-1::T" => "(::-i -1 T)" # Standalone dotted operators are parsed as (|.| op) ".+" => "(. +)" ".+\n" => "(. +)" From a871aa07daee7df2229dc36ac4acbdfaa7012f32 Mon Sep 17 00:00:00 2001 From: ShalokShalom Date: Wed, 17 Jul 2024 03:49:53 +0200 Subject: [PATCH 0783/1109] Correct typo (JuliaLang/JuliaLowering.jl#5) Correct typo --- JuliaLowering/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaLowering/README.md b/JuliaLowering/README.md index 7644c5bf01394..d9536dd8186d0 100644 --- a/JuliaLowering/README.md +++ b/JuliaLowering/README.md @@ -461,7 +461,7 @@ odd mixture of imperative and declarative lowered code. People look at [Racket](https://racket-lang.org/) as an example of a very complete system of hygienic macros. We should learn from them, but keeping in -mind that Racket's macro system is more inherently more complicated. Racket's +mind that Racket's macro system is inherently more complicated. Racket's current approach to hygiene is described in an [accessible talk](https://www.youtube.com/watch?v=Or_yKiI3Ha4) and in more depth in [a paper](https://www-old.cs.utah.edu/plt/publications/popl16-f.pdf). From 6c24d86fb15eaea6346cb4e324ad6d887bb2d3d4 Mon Sep 17 00:00:00 2001 From: Fredrik Ekre Date: Wed, 17 Jul 2024 09:24:58 +0200 Subject: [PATCH 0784/1109] Fix tokenization of consecutive `\r\n` line endings (JuliaLang/JuliaSyntax.jl#460) Without this patch `\r\n\r\n` would be tokenized as `\r\n\r` and `\n` instead of `\r\n` and `\r\n`. Fixes https://github.com/fredrikekre/Runic.jl/issues/15. --- JuliaSyntax/src/tokenize.jl | 5 +++-- JuliaSyntax/test/tokenize.jl | 10 ++++++++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/JuliaSyntax/src/tokenize.jl b/JuliaSyntax/src/tokenize.jl index 26563d6f444ec..0f60309fa899a 100644 --- a/JuliaSyntax/src/tokenize.jl +++ b/JuliaSyntax/src/tokenize.jl @@ -733,9 +733,10 @@ function lex_whitespace(l::Lexer, c) if c == '\n' k = K"NewlineWs" end - pc = peekchar(l) + pc, ppc = dpeekchar(l) # stop on non whitespace and limit to a single newline in a token - if !iswhitespace(pc) || (k == K"NewlineWs" && pc == '\n') + if !iswhitespace(pc) || + (k == K"NewlineWs" && (pc == '\n' || (pc == '\r' && ppc == '\n'))) break end c = readchar(l) diff --git a/JuliaSyntax/test/tokenize.jl b/JuliaSyntax/test/tokenize.jl index 1c525a99a0f8c..0837b9c880c6f 100644 --- a/JuliaSyntax/test/tokenize.jl +++ b/JuliaSyntax/test/tokenize.jl @@ -163,6 +163,16 @@ end @test untokenize(tok(str), str)==">>" end +@testset "tokenize newlines" begin + n = "\n" + rn = "\r\n" + nl = K"NewlineWs" + for i in 0:5 + j = 5 - i + @test toks(n^i * rn^j) == vcat(fill(n => nl, i), fill(rn => nl, j)) + @test toks(rn^i * n^j) == vcat(fill(rn => nl, i), fill(n => nl, j)) + end +end @testset "test added operators" begin @test tok("1+=2", 2).kind == K"+=" From 39fdea46a5bae1b4548e14a99f36db16d5222e40 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Wed, 17 Jul 2024 21:07:53 +1000 Subject: [PATCH 0785/1109] Define `numchildren()` function (JuliaLang/JuliaSyntax.jl#464) --- JuliaSyntax/src/syntax_tree.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index 608b9ce4328ff..12db89dd4d214 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -108,6 +108,7 @@ end haschildren(node::TreeNode) = node.children !== nothing children(node::TreeNode) = (c = node.children; return c === nothing ? () : c) +numchildren(node::TreeNode) = (isnothing(node.children) ? 0 : length(node.children)) """ From 11e28dd0a9f084ec8cafaabe12794d3ccd9dfc9c Mon Sep 17 00:00:00 2001 From: Lilith Orion Hafner Date: Wed, 17 Jul 2024 14:36:13 -0500 Subject: [PATCH 0786/1109] Delete unused down option from parse_docstring (JuliaLang/JuliaSyntax.jl#430) --- JuliaSyntax/src/parser.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 9e02928eb35d8..8401002c7b103 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -526,9 +526,9 @@ end # Parse docstrings attached by a space or single newline # # flisp: parse-docstring -function parse_docstring(ps::ParseState, down=parse_eq) +function parse_docstring(ps::ParseState) mark = position(ps) - down(ps) + parse_eq(ps) if peek_behind(ps).kind == K"string" is_doc = true k = peek(ps) @@ -553,7 +553,7 @@ function parse_docstring(ps::ParseState, down=parse_eq) # """\n doc\n """ foo ==> (doc (string-s "doc\n") foo) end if is_doc - down(ps) + parse_eq(ps) emit(ps, mark, K"doc") end end From e99153be08f39c041031eede6746bdc5c6be2cdc Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Thu, 18 Jul 2024 15:40:15 +1000 Subject: [PATCH 0787/1109] Allow `Kind`s to be registered by packages outside JuliaSyntax (JuliaLang/JuliaSyntax.jl#461) Extensible kinds are quite tricky. We want * To use a small number of bits for them * To have the string representation in the source, but have the compiler able to fully inline the integer representation. * Allow modules with different kinds to cooperate together on the same integer representation. * Not trigger invalidation when new kinds are added * Different `Kind` modules to not require cooperation This is a very hard set of constraints to satisfy. The last one is already impossible in a single flat namespace so in this design we've given up on it and require cooperation between all kind extension modules, including module authors allocating non-colliding id's for their modules, in addition to non-colliding kind names. --- JuliaSyntax/src/kinds.jl | 319 +++++++++++++++++++++-------------- JuliaSyntax/test/kinds.jl | 59 +++++++ JuliaSyntax/test/runtests.jl | 1 + 3 files changed, 250 insertions(+), 129 deletions(-) create mode 100644 JuliaSyntax/test/kinds.jl diff --git a/JuliaSyntax/src/kinds.jl b/JuliaSyntax/src/kinds.jl index f6706dd254f50..bf83771641dca 100644 --- a/JuliaSyntax/src/kinds.jl +++ b/JuliaSyntax/src/kinds.jl @@ -1,7 +1,194 @@ # Definition of Kind type - mapping from token string identifiers to # enumeration values as used in @K_str -const _kind_names = -[ + +""" + K"name" + Kind(namestr) + +`Kind` is a type tag for specifying the type of tokens and interior nodes of +a syntax tree. Abstractly, this tag is used to define our own *sum types* for +syntax tree nodes. We do this explicitly outside the Julia type system because +(a) Julia doesn't have sum types and (b) we want concrete data structures which +are unityped from the Julia compiler's point of view, for efficiency. + +Naming rules: +* Kinds which correspond to exactly one textural form are represented with that + text. This includes keywords like K"for" and operators like K"*". +* Kinds which represent many textural forms have UpperCamelCase names. This + includes kinds like K"Identifier" and K"Comment". +* Kinds which exist merely as delimiters are all uppercase +""" +primitive type Kind 16 end + +# The implementation of Kind here is basically similar to @enum. However we use +# the K_str macro to self-name these kinds with their literal representation, +# rather than needing to invent a new name for each. + +const _kind_str_to_int = Dict{String,UInt16}() +const _kind_int_to_str = Dict{UInt16,String}() +const _kind_modules = Dict{Int,Union{Symbol,Module}}( + 0=>:JuliaSyntax, + 1=>:JuliaLowering, + 2=>:JuliaSyntaxFormatter +) +# Number of bits reserved for kind id's belonging to a single module +const _kind_nbits = 10 +const _kind_module_id_max = typemax(UInt16) >> _kind_nbits + +function Kind(x::Integer) + if x < 0 || x > typemax(UInt16) + throw(ArgumentError("Kind out of range: $x")) + end + return Base.bitcast(Kind, convert(UInt16, x)) +end + +function Base.convert(::Type{String}, k::Kind) + _kind_int_to_str[reinterpret(UInt16, k)] +end + +function Base.convert(::Type{Kind}, s::AbstractString) + i = get(_kind_str_to_int, s) do + error("unknown Kind name $(repr(s))") + end + Kind(i) +end + +Base.string(x::Kind) = convert(String, x) +Base.print(io::IO, x::Kind) = print(io, convert(String, x)) + +Base.isless(x::Kind, y::Kind) = reinterpret(UInt16, x) < reinterpret(UInt16, y) + +function Base.show(io::IO, k::Kind) + print(io, "K\"$(convert(String, k))\"") +end + +# Save the string representation rather than the bit pattern so that kinds +# can be serialized and deserialized across different JuliaSyntax versions. +function Base.write(io::IO, k::Kind) + str = convert(String, k) + write(io, UInt8(length(str))) + write(io, str) +end +function Base.read(io::IO, ::Type{Kind}) + len = read(io, UInt8) + str = String(read(io, len)) + convert(Kind, str) +end + +function Base.parentmodule(k::Kind) + mod_id = reinterpret(UInt16, k) >> _kind_nbits + _kind_modules[mod_id]::Module +end + +function _register_kinds!(kind_modules, int_to_kindstr, kind_str_to_int, mod, module_id, names) + if module_id > _kind_module_id_max + error("Kind module id $module_id is out of range") + elseif length(names) >= 1 << _kind_nbits + error("Too many kind names") + elseif !haskey(kind_modules, module_id) + kind_modules[module_id] = mod + else + m = kind_modules[module_id] + if m == nameof(mod) + # Ok: known kind module, but not loaded until now + kind_modules[module_id] = mod + elseif m == mod + existing_kinds = [(i = get(kind_str_to_int, n, nothing); + isnothing(i) ? nothing : Kind(i)) for n in names] + if any(isnothing, existing_kinds) || + !issorted(existing_kinds) || + any(k->parentmodule(k) != mod, existing_kinds) + error("Error registering kinds for module $mod (register_kinds() called more than once inconsistently, or conflict with existing module kinds?)") + else + # Assume we're re-registering kinds as in top level vs `__init__` + return + end + else + error("Kind module ID $module_id already claimed by module $m") + end + end + # Process names to conflate category BEGIN/END markers with the first/last + # in the category. + i = 0 + for name in names + normal_kind = false + if startswith(name, "BEGIN_") + j = i + elseif startswith(name, "END_") + j = i - 1 + else + normal_kind = true + j = i + i += 1 + end + kind_int = (module_id << _kind_nbits) | j + push!(kind_str_to_int, name=>kind_int) + if normal_kind + push!(int_to_kindstr, kind_int=>name) + end + end +end + +""" + register_kinds!(mod, module_id, names) + +Register custom `Kind`s with the given `names`, belonging to a module `mod`. +`names` is an array of arbitrary strings. + +In order for kinds to be represented by a small number of bits, some nontrivial +cooperation is reqired between modules using custom kinds: +* The integer `module_id` is globally unique for each `mod` which will be used + together, and not larger than $_kind_module_id_max. +* No two modules register the same `name`. The semantics of a given `kind` name + should be defined by the module which owns it. + +To allow ranges of kinds to be delimited and quickly tested for, some special +names are allowed: `BEGIN_section` and `END_section` pairs are detected, and +alias the next and previous kind id's respectively so that kinds in `section` +can be tested with `BEGIN_section <= k <= END_section`. +""" +function register_kinds!(mod, module_id, names) + _register_kinds!(_kind_modules, _kind_int_to_str, _kind_str_to_int, mod, module_id, names) +end + +#------------------------------------------------------------------------------- + +""" + K"s" + +The kind of a token or AST internal node with string "s". + +For example +* K")" is the kind of the right parenthesis token +* K"block" is the kind of a block of code (eg, statements within a begin-end). +""" +macro K_str(s) + convert(Kind, s) +end + +""" +A set of kinds which can be used with the `in` operator. For example + + k in KSet"+ - *" +""" +macro KSet_str(str) + kinds = [convert(Kind, s) for s in split(str)] + + quote + ($(kinds...),) + end +end + +""" + kind(x) + +Return the `Kind` of `x`. +""" +kind(k::Kind) = k + + +#------------------------------------------------------------------------------- +# Kinds used by JuliaSyntax +register_kinds!(JuliaSyntax, 0, [ "None" # Placeholder; never emitted by lexer "EndMarker" # EOF "Comment" @@ -918,133 +1105,7 @@ const _kind_names = # Container for a single statement/atom plus any trivia and errors "wrapper" "END_SYNTAX_KINDS" -] - -""" - K"name" - Kind(id) - -`Kind` is a type tag for specifying the type of tokens and interior nodes of -a syntax tree. Abstractly, this tag is used to define our own *sum types* for -syntax tree nodes. We do this explicitly outside the Julia type system because -(a) Julia doesn't have sum types and (b) we want concrete data structures which -are unityped from the Julia compiler's point of view, for efficiency. - -Naming rules: -* Kinds which correspond to exactly one textural form are represented with that - text. This includes keywords like K"for" and operators like K"*". -* Kinds which represent many textural forms have UpperCamelCase names. This - includes kinds like K"Identifier" and K"Comment". -* Kinds which exist merely as delimiters are all uppercase -""" -primitive type Kind 16 end - -# The implementation of Kind here is basically similar to @enum. However we use -# the K_str macro to self-name these kinds with their literal representation, -# rather than needing to invent a new name for each. - -let kind_int_type = :UInt16 - # Preprocess _kind_names to conflate category markers with the first/last - # in the category. - kindstr_to_int = Dict{String,UInt16}() - i = 1 - while i <= length(_kind_names) - kn = _kind_names[i] - kind_int = i-1 - if startswith(kn, "BEGIN_") - deleteat!(_kind_names, i) - elseif startswith(kn, "END_") - kind_int = i-2 - deleteat!(_kind_names, i) - else - i += 1 - end - push!(kindstr_to_int, kn=>kind_int) - end - - max_kind_int = length(_kind_names)-1 - - @eval begin - function Kind(x::Integer) - if x < 0 || x > $max_kind_int - throw(ArgumentError("Kind out of range: $x")) - end - return Base.bitcast(Kind, convert($kind_int_type, x)) - end - - Base.convert(::Type{String}, k::Kind) = _kind_names[1 + reinterpret($kind_int_type, k)] - - let kindstr_to_int=$kindstr_to_int - function Base.convert(::Type{Kind}, s::AbstractString) - i = get(kindstr_to_int, s) do - error("unknown Kind name $(repr(s))") - end - Kind(i) - end - end - - Base.string(x::Kind) = convert(String, x) - Base.print(io::IO, x::Kind) = print(io, convert(String, x)) - - Base.typemin(::Type{Kind}) = Kind(0) - Base.typemax(::Type{Kind}) = Kind($max_kind_int) - - Base.:<(x::Kind, y::Kind) = reinterpret($kind_int_type, x) < reinterpret($kind_int_type, y) - - Base.instances(::Type{Kind}) = (Kind(i) for i in reinterpret($kind_int_type, typemin(Kind)):reinterpret($kind_int_type, typemax(Kind))) - end -end - -function Base.show(io::IO, k::Kind) - print(io, "K\"$(convert(String, k))\"") -end - -# Save the string representation rather than the bit pattern so that kinds -# can be serialized and deserialized across different JuliaSyntax versions. -function Base.write(io::IO, k::Kind) - str = convert(String, k) - write(io, UInt8(length(str))) + write(io, str) -end -function Base.read(io::IO, ::Type{Kind}) - len = read(io, UInt8) - str = String(read(io, len)) - convert(Kind, str) -end - -#------------------------------------------------------------------------------- - -""" - K"s" - -The kind of a token or AST internal node with string "s". - -For example -* K")" is the kind of the right parenthesis token -* K"block" is the kind of a block of code (eg, statements within a begin-end). -""" -macro K_str(s) - convert(Kind, s) -end - -""" -A set of kinds which can be used with the `in` operator. For example - - k in KSet"+ - *" -""" -macro KSet_str(str) - kinds = [convert(Kind, s) for s in split(str)] - - quote - ($(kinds...),) - end -end - -""" - kind(x) - -Return the `Kind` of `x`. -""" -kind(k::Kind) = k +]) #------------------------------------------------------------------------------- const _nonunique_kind_names = Set([ diff --git a/JuliaSyntax/test/kinds.jl b/JuliaSyntax/test/kinds.jl new file mode 100644 index 0000000000000..f58fbd80e74f2 --- /dev/null +++ b/JuliaSyntax/test/kinds.jl @@ -0,0 +1,59 @@ +# Only test this once per session, as kind modules must be unique (ugh) +if !isdefined(@__MODULE__, :FooKinds) +@eval module FooKinds + +using JuliaSyntax + +function _init_kinds() + JuliaSyntax.register_kinds!(@__MODULE__, 42, [ + "BEGIN_FOO" + "foo_1" + "foo_2" + "BEGIN_FOOBAR" + "foobar_1" + "foobar_2" + "END_FOOBAR" + "END_FOO" + ]) +end + +_init_kinds() + +k_before_init = K"foo_1" + +function __init__() + _init_kinds() +end + +end + +@eval module BarKinds + # Intentionally empty +end + +end + +@testset "Kinds" begin + @test K"foo_1" != K"foo_2" + + @test FooKinds.k_before_init == K"foo_1" + + @test K"BEGIN_FOO" == K"foo_1" + @test K"foo_2" < K"BEGIN_FOOBAR" + @test K"BEGIN_FOOBAR" == K"foobar_1" + @test K"END_FOOBAR" == K"foobar_2" + @test K"END_FOO" == K"foobar_2" + + @test parentmodule(K"foo_1") == FooKinds + @test sprint(show, K"foo_1") == "K\"foo_1\"" + + # Too many kind modules + @test_throws ErrorException JuliaSyntax.register_kinds!(BarKinds, 64, ["hoo?"]) + # Too many kind names per module + @test_throws ErrorException JuliaSyntax.register_kinds!(BarKinds, 42, string.(1:1024)) + # Re-registering or registering new kinds is not supported + @test_throws ErrorException JuliaSyntax.register_kinds!(FooKinds, 42, ["foo_2", "foo_1"]) + @test_throws ErrorException JuliaSyntax.register_kinds!(FooKinds, 42, ["foo_3"]) + # Module ID already taken by FooKinds + @test_throws ErrorException JuliaSyntax.register_kinds!(BarKinds, 42, ["hii?"]) +end diff --git a/JuliaSyntax/test/runtests.jl b/JuliaSyntax/test/runtests.jl index 317f993deebb0..0fc08d08a1c29 100644 --- a/JuliaSyntax/test/runtests.jl +++ b/JuliaSyntax/test/runtests.jl @@ -12,6 +12,7 @@ include("test_utils_tests.jl") include("fuzz_test.jl") include("utils.jl") +include("kinds.jl") @testset "Tokenize" begin include("tokenize.jl") From f4c66bb57f73f42db5598ca9e260f6e638bc1139 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Fri, 19 Jul 2024 17:32:03 +1000 Subject: [PATCH 0788/1109] Move overload of Base.range() to JuliaSyntax.byte_range() (JuliaLang/JuliaSyntax.jl#463) The Base overload was a mistake - it doesn't really have the same semantics as compared to `Base.range()`. It's also not got the clearest name! --- JuliaSyntax/docs/src/api.md | 11 ++++++++++- JuliaSyntax/src/diagnostics.jl | 3 +-- JuliaSyntax/src/expr.jl | 6 +++--- JuliaSyntax/src/syntax_tree.jl | 15 +++++++++------ JuliaSyntax/tools/check_all_packages.jl | 2 +- 5 files changed, 24 insertions(+), 13 deletions(-) diff --git a/JuliaSyntax/docs/src/api.md b/JuliaSyntax/docs/src/api.md index 8b7723870536f..eae6e5f1967d2 100644 --- a/JuliaSyntax/docs/src/api.md +++ b/JuliaSyntax/docs/src/api.md @@ -54,9 +54,18 @@ JuliaSyntax.flags see also predicates related to `flags`. -## Syntax tree types +## Syntax trees + +Syntax tree types: ```@docs JuliaSyntax.SyntaxNode JuliaSyntax.GreenNode ``` + +Functions applicable to syntax trees include everything in the sections on +heads/kinds, and source file handling. + +```@docs +JuliaSyntax.byte_range +``` diff --git a/JuliaSyntax/src/diagnostics.jl b/JuliaSyntax/src/diagnostics.jl index 9a5ea96149b23..76e8d6a70329c 100644 --- a/JuliaSyntax/src/diagnostics.jl +++ b/JuliaSyntax/src/diagnostics.jl @@ -40,7 +40,6 @@ end first_byte(d::Diagnostic) = d.first_byte last_byte(d::Diagnostic) = d.last_byte is_error(d::Diagnostic) = d.level === :error -Base.range(d::Diagnostic) = first_byte(d):last_byte(d) # Make relative path into a file URL function _file_url(filename) @@ -89,7 +88,7 @@ function show_diagnostic(io::IO, diagnostic::Diagnostic, source::SourceFile) _printstyled(io, "# $prefix @ ", fgcolor=:light_black) _printstyled(io, "$locstr", fgcolor=:light_black, href=file_href) print(io, "\n") - highlight(io, source, range(diagnostic), + highlight(io, source, byte_range(diagnostic), note=diagnostic.message, notecolor=color, context_lines_before=1, context_lines_after=0) end diff --git a/JuliaSyntax/src/expr.jl b/JuliaSyntax/src/expr.jl index 4ca0be0222ca9..c73a93c8b08ac 100644 --- a/JuliaSyntax/src/expr.jl +++ b/JuliaSyntax/src/expr.jl @@ -522,16 +522,16 @@ end function _to_expr(node::SyntaxNode) if !haschildren(node) offset, txtbuf = _unsafe_wrap_substring(sourcetext(node.source)) - return _leaf_to_Expr(node.source, txtbuf, head(node), range(node) .+ offset, node) + return _leaf_to_Expr(node.source, txtbuf, head(node), byte_range(node) .+ offset, node) end cs = children(node) args = Any[_to_expr(c) for c in cs] - _internal_node_to_Expr(node.source, range(node), head(node), range.(cs), head.(cs), args) + _internal_node_to_Expr(node.source, byte_range(node), head(node), byte_range.(cs), head.(cs), args) end function Base.Expr(node::SyntaxNode) ex = _to_expr(node) - loc = source_location(LineNumberNode, node.source, first(range(node))) + loc = source_location(LineNumberNode, node.source, first(byte_range(node))) only(_fixup_Expr_children!(SyntaxHead(K"None",EMPTY_FLAGS), loc, Any[ex])) end diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index 12db89dd4d214..2e448a072e36f 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -124,17 +124,20 @@ span(node::AbstractSyntaxNode) = span(node.raw) first_byte(node::AbstractSyntaxNode) = node.position last_byte(node::AbstractSyntaxNode) = node.position + span(node) - 1 +""" + byte_range(ex) + +Return the range of bytes which `ex` covers in the source text. +""" +byte_range(ex) = first_byte(ex):last_byte(ex) + """ sourcetext(node) Get the full source text of a node. """ function sourcetext(node::AbstractSyntaxNode) - view(node.source, range(node)) -end - -function Base.range(node::AbstractSyntaxNode) - (node.position-1) .+ (1:span(node)) + view(node.source, byte_range(node)) end source_line(node::AbstractSyntaxNode) = source_line(node.source, node.position) @@ -299,7 +302,7 @@ function child_position_span(node::SyntaxNode, path::Int...) end function highlight(io::IO, node::SyntaxNode; kws...) - highlight(io, node.source, range(node); kws...) + highlight(io, node.source, byte_range(node); kws...) end function highlight(io::IO, source::SourceFile, node::GreenNode, path::Int...; kws...) diff --git a/JuliaSyntax/tools/check_all_packages.jl b/JuliaSyntax/tools/check_all_packages.jl index 0dd993d5ccef1..32f255e0cb6ea 100644 --- a/JuliaSyntax/tools/check_all_packages.jl +++ b/JuliaSyntax/tools/check_all_packages.jl @@ -39,7 +39,7 @@ Logging.with_logger(TerminalLogger()) do mismatch_count += 1 failing_source = sprint(context=:color=>true) do io for c in reduce_tree(parseall(SyntaxNode, text)) - JuliaSyntax.highlight(io, c.source, range(c), context_lines_inner=5) + JuliaSyntax.highlight(io, c.source, JuliaSyntax.byte_range(c), context_lines_inner=5) println(io, "\n") end end From fe18c7d048072df409827a6f1493a98e2d354b3c Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Mon, 15 Jul 2024 02:22:12 +0200 Subject: [PATCH 0789/1109] Demo function to color by module for globals --- JuliaLowering/test/demo.jl | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/JuliaLowering/test/demo.jl b/JuliaLowering/test/demo.jl index 863d6dac0dbaa..d98f6b405e199 100644 --- a/JuliaLowering/test/demo.jl +++ b/JuliaLowering/test/demo.jl @@ -3,17 +3,26 @@ using JuliaSyntax using JuliaLowering -using JuliaLowering: SyntaxGraph, SyntaxTree, ensure_attributes!, ensure_attributes, newnode!, setchildren!, haschildren, children, child, setattr!, sourceref, makenode, sourcetext, showprov +using JuliaLowering: SyntaxGraph, SyntaxTree, ensure_attributes!, ensure_attributes, newnode!, setchildren!, haschildren, children, child, setattr!, sourceref, makenode, sourcetext, showprov, lookup_binding using JuliaSyntaxFormatter # Extract variable kind for highlighting purposes -function var_kind(ex) +function var_kind(ctx, ex) id = get(ex, :var_id, nothing) if isnothing(id) return nothing end - return lookup_binding(ctx3, id).kind + return lookup_binding(ctx, id).kind +end + +# Extract module of globals for highlighting +function var_mod(ctx, ex) + id = get(ex, :var_id, nothing) + if isnothing(id) + return nothing + end + return lookup_binding(ctx, id).mod end function formatsrc(ex; kws...) From 836c1a327c322c5edd83ab0676ce9ada01e33272 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Mon, 15 Jul 2024 02:24:19 +0200 Subject: [PATCH 0790/1109] SyntaxTree -> Expr conversion shims Allow surface syntax - in SyntaxTree format - to be converted to Expr. This depends on some experimental code in JuliaSyntax. --- JuliaLowering/src/syntax_graph.jl | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/JuliaLowering/src/syntax_graph.jl b/JuliaLowering/src/syntax_graph.jl index 5b0900441da68..1ca8b76270cb9 100644 --- a/JuliaLowering/src/syntax_graph.jl +++ b/JuliaLowering/src/syntax_graph.jl @@ -514,6 +514,23 @@ function JuliaSyntax.build_tree(::Type{SyntaxTree}, stream::JuliaSyntax.ParseStr SyntaxTree(JuliaSyntax.build_tree(SyntaxNode, stream; kws...)) end +#------------------------------------------------------------------------------- +function JuliaSyntax.expr_leaf_val(ex::SyntaxTree) + name = get(ex, :name_val, nothing) + if !isnothing(name) + Symbol(name) + else + ex.value + end +end + +function JuliaSyntax.sourcefile(ex::SyntaxTree) + sourceref(ex).file +end + +function Base.Expr(ex::SyntaxTree) + JuliaSyntax.to_expr(ex) +end #------------------------------------------------------------------------------- # Lightweight vector of nodes ids with associated pointer to graph stored separately. From e873eb8d33df6a7c08bfccf6c934a8116af7a5ac Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Fri, 19 Jul 2024 23:16:42 +1000 Subject: [PATCH 0791/1109] Fixes for upstream JuliaSyntax changes --- JuliaLowering/src/JuliaLowering.jl | 4 ++-- JuliaLowering/src/kinds.jl | 4 ++-- JuliaLowering/src/syntax_graph.jl | 4 ++-- JuliaLowering/test/runtests.jl | 2 ++ JuliaLowering/test/syntax_graph.jl | 5 +++++ 5 files changed, 13 insertions(+), 6 deletions(-) create mode 100644 JuliaLowering/test/syntax_graph.jl diff --git a/JuliaLowering/src/JuliaLowering.jl b/JuliaLowering/src/JuliaLowering.jl index b472d0972ecc4..9aeed1090238b 100644 --- a/JuliaLowering/src/JuliaLowering.jl +++ b/JuliaLowering/src/JuliaLowering.jl @@ -16,7 +16,7 @@ using JuliaSyntax: filename, first_byte, last_byte, source_location, span, sourc using JuliaSyntax: is_literal, is_number, is_operator, is_prec_assignment, is_infix_op_call, is_postfix_op_call, is_error _include("kinds.jl") -_insert_kinds() +_register_kinds() _include("syntax_graph.jl") _include("ast.jl") @@ -31,7 +31,7 @@ _include("runtime.jl") _include("eval.jl") function __init__() - _insert_kinds() + _register_kinds() end end diff --git a/JuliaLowering/src/kinds.jl b/JuliaLowering/src/kinds.jl index 393e175586d0b..b0cb64282adeb 100644 --- a/JuliaLowering/src/kinds.jl +++ b/JuliaLowering/src/kinds.jl @@ -1,8 +1,8 @@ # The following kinds are used in intermediate forms by lowering but are not # part of the surface syntax -function _insert_kinds() - JuliaSyntax.insert_kinds!(JuliaLowering, 1, [ +function _register_kinds() + JuliaSyntax.register_kinds!(JuliaLowering, 1, [ "BEGIN_LOWERING_KINDS" # Compiler metadata hints "meta" diff --git a/JuliaLowering/src/syntax_graph.jl b/JuliaLowering/src/syntax_graph.jl index 1ca8b76270cb9..8401b6e6e2037 100644 --- a/JuliaLowering/src/syntax_graph.jl +++ b/JuliaLowering/src/syntax_graph.jl @@ -515,7 +515,7 @@ function JuliaSyntax.build_tree(::Type{SyntaxTree}, stream::JuliaSyntax.ParseStr end #------------------------------------------------------------------------------- -function JuliaSyntax.expr_leaf_val(ex::SyntaxTree) +function JuliaSyntax._expr_leaf_val(ex::SyntaxTree) name = get(ex, :name_val, nothing) if !isnothing(name) Symbol(name) @@ -524,7 +524,7 @@ function JuliaSyntax.expr_leaf_val(ex::SyntaxTree) end end -function JuliaSyntax.sourcefile(ex::SyntaxTree) +function JuliaSyntax._sourcefile(ex::SyntaxTree) sourceref(ex).file end diff --git a/JuliaLowering/test/runtests.jl b/JuliaLowering/test/runtests.jl index 74284fe0c7e77..bb96111abb3b0 100644 --- a/JuliaLowering/test/runtests.jl +++ b/JuliaLowering/test/runtests.jl @@ -4,6 +4,8 @@ include("utils.jl") @testset "JuliaLowering.jl" begin +include("syntax_graph.jl") + # Basic end-to-end / smoke tests test_mod = Module() diff --git a/JuliaLowering/test/syntax_graph.jl b/JuliaLowering/test/syntax_graph.jl new file mode 100644 index 0000000000000..282be30cc6479 --- /dev/null +++ b/JuliaLowering/test/syntax_graph.jl @@ -0,0 +1,5 @@ +@testset "SyntaxTree" begin + # Expr conversion + @test Expr(parsestmt(SyntaxTree, "begin a + b ; c end", filename="none")) == + Meta.parse("begin a + b ; c end") +end From a8fe178ff3965b3f98029a8ff2a0e27382c9c525 Mon Sep 17 00:00:00 2001 From: Lilith Orion Hafner Date: Sat, 20 Jul 2024 13:43:53 -0500 Subject: [PATCH 0792/1109] Remove cache artifacts step from test CI because JuliaSyntax does not use artifacts in its test pipeline (JuliaLang/JuliaSyntax.jl#454) --- JuliaSyntax/.github/workflows/CI.yml | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/JuliaSyntax/.github/workflows/CI.yml b/JuliaSyntax/.github/workflows/CI.yml index a08912669451b..dcdb5f35311c8 100644 --- a/JuliaSyntax/.github/workflows/CI.yml +++ b/JuliaSyntax/.github/workflows/CI.yml @@ -75,16 +75,6 @@ jobs: with: version: ${{ matrix.version }} arch: ${{ matrix.arch }} - - uses: actions/cache@v1 - env: - cache-name: cache-artifacts - with: - path: ~/.julia/artifacts - key: ${{ runner.os }}-test-${{ env.cache-name }}-${{ hashFiles('**/Project.toml') }} - restore-keys: | - ${{ runner.os }}-test-${{ env.cache-name }}- - ${{ runner.os }}-test- - ${{ runner.os }}- - uses: julia-actions/julia-buildpkg@v1 - uses: julia-actions/julia-runtest@v1 - uses: julia-actions/julia-processcoverage@latest From 700098769933d5b27e9fbc8272f63b368a9f6367 Mon Sep 17 00:00:00 2001 From: Fredrik Ekre Date: Sun, 21 Jul 2024 03:54:17 +0200 Subject: [PATCH 0793/1109] Set version on main branch to 1.0.0-DEV. (JuliaLang/JuliaSyntax.jl#467) --- JuliaSyntax/Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaSyntax/Project.toml b/JuliaSyntax/Project.toml index e4215bd316000..92f233e6036f8 100644 --- a/JuliaSyntax/Project.toml +++ b/JuliaSyntax/Project.toml @@ -1,7 +1,7 @@ name = "JuliaSyntax" uuid = "70703baa-626e-46a2-a12c-08ffd08c73b4" authors = ["Claire Foster and contributors"] -version = "0.4.6" +version = "1.0.0-DEV" [compat] Serialization = "1.0" From 041a7ad9d04b4441d620d4e96b9b6627a411c050 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sun, 21 Jul 2024 15:09:15 +1000 Subject: [PATCH 0794/1109] Generic `sourcefile()` function (JuliaLang/JuliaSyntax.jl#469) * Having a generic version of this is useful for other syntax tree types such as `JuliaLowering.SyntaxTree` * Function to get the `SourceFile` for a syntax object * Generic `highlight()` implementation in terms of this * Cleanup: Remove internal `interpolate_literal()` function which accidentally has still survived from early prototyping. --- JuliaSyntax/src/expr.jl | 16 ++++++++++++---- JuliaSyntax/src/source_files.jl | 21 +++++++++++++++++++-- JuliaSyntax/src/syntax_tree.jl | 19 +++++-------------- 3 files changed, 36 insertions(+), 20 deletions(-) diff --git a/JuliaSyntax/src/expr.jl b/JuliaSyntax/src/expr.jl index c73a93c8b08ac..a0cbf91fdecb7 100644 --- a/JuliaSyntax/src/expr.jl +++ b/JuliaSyntax/src/expr.jl @@ -519,19 +519,27 @@ function build_tree(::Type{Expr}, stream::ParseStream; only(_fixup_Expr_children!(SyntaxHead(K"None",EMPTY_FLAGS), loc, Any[entry.ex])) end +""" +Get the source file for a given syntax object +""" +function sourcefile(node::SyntaxNode) + node.source +end + function _to_expr(node::SyntaxNode) + file = sourcefile(node) if !haschildren(node) - offset, txtbuf = _unsafe_wrap_substring(sourcetext(node.source)) - return _leaf_to_Expr(node.source, txtbuf, head(node), byte_range(node) .+ offset, node) + offset, txtbuf = _unsafe_wrap_substring(sourcetext(file)) + return _leaf_to_Expr(file, txtbuf, head(node), byte_range(node) .+ offset, node) end cs = children(node) args = Any[_to_expr(c) for c in cs] - _internal_node_to_Expr(node.source, byte_range(node), head(node), byte_range.(cs), head.(cs), args) + _internal_node_to_Expr(file, byte_range(node), head(node), byte_range.(cs), head.(cs), args) end function Base.Expr(node::SyntaxNode) ex = _to_expr(node) - loc = source_location(LineNumberNode, node.source, first(byte_range(node))) + loc = source_location(LineNumberNode, sourcefile(node), first_byte(node)) only(_fixup_Expr_children!(SyntaxHead(K"None",EMPTY_FLAGS), loc, Any[ex])) end diff --git a/JuliaSyntax/src/source_files.jl b/JuliaSyntax/src/source_files.jl index 0ae8f3856d912..a5b14f09f027e 100644 --- a/JuliaSyntax/src/source_files.jl +++ b/JuliaSyntax/src/source_files.jl @@ -193,9 +193,26 @@ function _print_marker_line(io, prefix_str, str, underline, singleline, color, end end +function highlight(io::IO, x; kws...) + highlight(io, sourcefile(x), byte_range(x); kws...) +end + """ -Print the lines of source code surrounding the given byte `range`, which is -highlighted with background `color` and markers in the text. + highlight(io::IO, source::SourceFile, range::UnitRange; + color, note, notecolor, + context_lines_before, context_lines_inner, context_lines_after, + highlight(io, x; kws...) + +Print the lines of source code `source` surrounding the given byte `range` +which is highlighted with background `color` and underlined with markers in the +text. A `note` in `notecolor` may be provided as annotation. + +In the second form, `x` is an object with `sourcefile(x)` and `byte_range(x)` +implemented. + +The context arguments `context_lines_before`, etc, refer to the number of +lines of code which will be printed as context before and after, with `inner` +referring to context lines inside a multiline region. """ function highlight(io::IO, source::SourceFile, range::UnitRange; color=(120,70,70), context_lines_before=2, diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index 2e448a072e36f..d885af806ac28 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -137,21 +137,16 @@ byte_range(ex) = first_byte(ex):last_byte(ex) Get the full source text of a node. """ function sourcetext(node::AbstractSyntaxNode) - view(node.source, byte_range(node)) + view(sourcefile(node), byte_range(node)) end -source_line(node::AbstractSyntaxNode) = source_line(node.source, node.position) -source_location(node::AbstractSyntaxNode) = source_location(node.source, node.position) - -function interpolate_literal(node::SyntaxNode, val) - @assert kind(node) == K"$" - SyntaxNode(node.source, node.raw, node.position, node.parent, true, val) -end +source_line(node::AbstractSyntaxNode) = source_line(sourcefile(node), node.position) +source_location(node::AbstractSyntaxNode) = source_location(sourcefile(node), node.position) function _show_syntax_node(io, current_filename, node::AbstractSyntaxNode, indent, show_byte_offsets) - fname = node.source.filename - line, col = source_location(node.source, node.position) + fname = sourcefile(node).filename + line, col = source_location(node) posstr = "$(lpad(line, 4)):$(rpad(col,3))│" if show_byte_offsets posstr *= "$(lpad(first_byte(node),6)):$(rpad(last_byte(node),6))│" @@ -301,10 +296,6 @@ function child_position_span(node::SyntaxNode, path::Int...) n, n.position, span(n) end -function highlight(io::IO, node::SyntaxNode; kws...) - highlight(io, node.source, byte_range(node); kws...) -end - function highlight(io::IO, source::SourceFile, node::GreenNode, path::Int...; kws...) _, p, span = child_position_span(node, path...) q = p + span - 1 From 13873660152774940fa7a04d5533f3175bc8e49d Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sun, 21 Jul 2024 21:34:31 +1000 Subject: [PATCH 0795/1109] Cleanup/document source code access functions + add `filename()` (JuliaLang/JuliaSyntax.jl#470) * Move all source code access functions which refer to source locations and strings into the top of source_files.jl, and add some documentation for these. * Add `filename()` function to determine source file name of a syntax object * Also add a minor generalization to SyntaxNode->Expr conversion code to make Expr conversion general enough to allow it to also be used for JuliaLowering.SyntaxTree. (internal/experimental interface, for now) --- JuliaSyntax/docs/src/api.md | 37 ++++++-- JuliaSyntax/src/diagnostics.jl | 13 ++- JuliaSyntax/src/expr.jl | 24 ++--- JuliaSyntax/src/parse_stream.jl | 3 +- JuliaSyntax/src/parser_api.jl | 2 + JuliaSyntax/src/source_files.jl | 159 +++++++++++++++++++++++++------- JuliaSyntax/src/syntax_tree.jl | 26 +----- JuliaSyntax/test/hooks.jl | 4 +- JuliaSyntax/test/test_utils.jl | 3 +- 9 files changed, 182 insertions(+), 89 deletions(-) diff --git a/JuliaSyntax/docs/src/api.md b/JuliaSyntax/docs/src/api.md index eae6e5f1967d2..da35c9412922a 100644 --- a/JuliaSyntax/docs/src/api.md +++ b/JuliaSyntax/docs/src/api.md @@ -30,14 +30,36 @@ JuliaSyntax.untokenize JuliaSyntax.Token ``` -## Source file handling +## Source code handling + +This section describes the generic functions for source text, source location +computation and formatting functions. + +Contiguous syntax objects like nodes in the syntax tree should implement the +following where possible: ```@docs -JuliaSyntax.SourceFile -JuliaSyntax.highlight -JuliaSyntax.sourcetext +JuliaSyntax.sourcefile +JuliaSyntax.byte_range +``` + +This will provide implementations of the following which include range +information, line numbers, and fancy highlighting of source ranges: + +```@docs +JuliaSyntax.first_byte +JuliaSyntax.last_byte +JuliaSyntax.filename JuliaSyntax.source_line JuliaSyntax.source_location +JuliaSyntax.sourcetext +JuliaSyntax.highlight +``` + +`SourceFile`-specific functions: + +```@docs +JuliaSyntax.SourceFile JuliaSyntax.source_line_range ``` @@ -64,8 +86,5 @@ JuliaSyntax.GreenNode ``` Functions applicable to syntax trees include everything in the sections on -heads/kinds, and source file handling. - -```@docs -JuliaSyntax.byte_range -``` +heads/kinds as well as the accessor functions in the source code handling +section. diff --git a/JuliaSyntax/src/diagnostics.jl b/JuliaSyntax/src/diagnostics.jl index 76e8d6a70329c..39fa473fed2f9 100644 --- a/JuliaSyntax/src/diagnostics.jl +++ b/JuliaSyntax/src/diagnostics.jl @@ -37,8 +37,7 @@ function Diagnostic(first_byte, last_byte; error=nothing, warning=nothing) Diagnostic(first_byte, last_byte, level, message) end -first_byte(d::Diagnostic) = d.first_byte -last_byte(d::Diagnostic) = d.last_byte +byte_range(d::Diagnostic) = d.first_byte:d.last_byte is_error(d::Diagnostic) = d.level === :error # Make relative path into a file URL @@ -72,12 +71,12 @@ function show_diagnostic(io::IO, diagnostic::Diagnostic, source::SourceFile) (:normal, "Info") line, col = source_location(source, first_byte(diagnostic)) linecol = "$line:$col" - filename = source.filename + fname = filename(source) file_href = nothing - if !isnothing(filename) - locstr = "$filename:$linecol" - if !startswith(filename, "REPL[") && get(io, :color, false) - url = _file_url(filename) + if !isempty(fname) + locstr = "$fname:$linecol" + if !startswith(fname, "REPL[") && get(io, :color, false) + url = _file_url(fname) if !isnothing(url) file_href = url*"#$linecol" end diff --git a/JuliaSyntax/src/expr.jl b/JuliaSyntax/src/expr.jl index a0cbf91fdecb7..d600a99b89eda 100644 --- a/JuliaSyntax/src/expr.jl +++ b/JuliaSyntax/src/expr.jl @@ -68,6 +68,11 @@ function _strip_parens(ex) end end +# Get Julia value of leaf node as it would be represented in `Expr` form +function _expr_leaf_val(node::SyntaxNode) + node.val +end + function _leaf_to_Expr(source, txtbuf, head, srcrange, node) k = kind(head) if k == K"core_@cmd" @@ -79,7 +84,7 @@ function _leaf_to_Expr(source, txtbuf, head, srcrange, node) Expr(:error) : Expr(:error, "$(_token_error_descriptions[k]): `$(source[srcrange])`") else - val = isnothing(node) ? parse_julia_literal(txtbuf, head, srcrange) : node.val + val = isnothing(node) ? parse_julia_literal(txtbuf, head, srcrange) : _expr_leaf_val(node) if val isa Union{Int128,UInt128,BigInt} # Ignore the values of large integers and convert them back to # symbolic/textural form for compatibility with the Expr @@ -519,14 +524,7 @@ function build_tree(::Type{Expr}, stream::ParseStream; only(_fixup_Expr_children!(SyntaxHead(K"None",EMPTY_FLAGS), loc, Any[entry.ex])) end -""" -Get the source file for a given syntax object -""" -function sourcefile(node::SyntaxNode) - node.source -end - -function _to_expr(node::SyntaxNode) +function _to_expr(node) file = sourcefile(node) if !haschildren(node) offset, txtbuf = _unsafe_wrap_substring(sourcetext(file)) @@ -537,9 +535,13 @@ function _to_expr(node::SyntaxNode) _internal_node_to_Expr(file, byte_range(node), head(node), byte_range.(cs), head.(cs), args) end -function Base.Expr(node::SyntaxNode) +function to_expr(node) ex = _to_expr(node) - loc = source_location(LineNumberNode, sourcefile(node), first_byte(node)) + loc = source_location(LineNumberNode, node) only(_fixup_Expr_children!(SyntaxHead(K"None",EMPTY_FLAGS), loc, Any[ex])) end +function Base.Expr(node::SyntaxNode) + to_expr(node) +end + diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index 0c80ef9de94cf..dc2192f9cb152 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -514,8 +514,7 @@ struct FullToken end head(t::FullToken) = t.head -first_byte(t::FullToken) = t.first_byte -last_byte(t::FullToken) = t.last_byte +byte_range(t::FullToken) = t.first_byte:t.last_byte span(t::FullToken) = 1 + last_byte(t) - first_byte(t) function peek_full_token(stream::ParseStream, n::Integer=1; diff --git a/JuliaSyntax/src/parser_api.jl b/JuliaSyntax/src/parser_api.jl index 51548a995bc11..fb805aa776c8e 100644 --- a/JuliaSyntax/src/parser_api.jl +++ b/JuliaSyntax/src/parser_api.jl @@ -26,6 +26,8 @@ function Base.showerror(io::IO, err::ParseError) show_diagnostics(io, err.diagnostics[1:i], err.source) end +sourcefile(err::ParseError) = err.source + """ parse!(stream::ParseStream; rule=:all) diff --git a/JuliaSyntax/src/source_files.jl b/JuliaSyntax/src/source_files.jl index a5b14f09f027e..57d89917af277 100644 --- a/JuliaSyntax/src/source_files.jl +++ b/JuliaSyntax/src/source_files.jl @@ -1,3 +1,110 @@ +#------------------------------------------------------------------------------- +# Generic functions for source text, source location computation and formatting +# functions + +""" + sourcefile(x) + +Get the source file object (usually `SourceFile`) for a given syntax object +`x`. The source file along with a byte range may be used to compute +`source_line()`, `source_location()`, `filename()`, etc. +""" +function sourcefile +end + +""" + byte_range(x) + +Return the range of bytes which `x` covers in the source text. +""" +function byte_range +end + +""" + first_byte(x) + +Return the first byte of `x` in the source text. +""" +first_byte(x) = first(byte_range(x)) + +""" + first_byte(x) + +Return the last byte of `x` in the source text. +""" +last_byte(x) = last(byte_range(x)) + +""" + filename(x) + +Get file name associated with `source`, or an empty string if one didn't exist. + +For objects `x` such as syntax trees, defers to `filename(sourcefile(x))` by +default. +""" +function filename(x) + source = sourcefile(x) + isnothing(source) ? "" : filename(source) +end + +""" + source_line(x) + source_line(source::SourceFile, byte_index::Integer) + +Get the line number of the first line on which object `x` appears. In the +second form, get the line number at the given `byte_index` within `source`. +""" +source_line(x) = source_line(sourcefile(x), first_byte(x)) + +""" + souce_location(x) + souce_location(source::SourceFile, byte_index::Integer) + + souce_location(LineNumberNode, x) + souce_location(LineNumberNode, source, byte_index) + +Get `(line,column)` of the first byte where object `x` appears in the source. +The second form allows one to be more precise with the `byte_index`, given the +source file. + +Providing `LineNumberNode` as the first agrument will return the line and file +name in a line number node object. +""" +source_location(x) = source_location(sourcefile(x), first_byte(x)) + +""" + sourcetext(x) + +Get the full source text syntax object `x` +""" +function sourcetext(x) + view(sourcefile(x), byte_range(x)) +end + +""" + highlight(io, x; color, note, notecolor, + context_lines_before, context_lines_inner, context_lines_after) + + highlight(io::IO, source::SourceFile, range::UnitRange; kws...) + +Print the lines of source code surrounding `x` which is highlighted with +background `color` and underlined with markers in the text. A `note` in +`notecolor` may be provided as annotation. By default, `x` should be an object +with `sourcefile(x)` and `byte_range(x)` implemented. + +The context arguments `context_lines_before`, etc, refer to the number of +lines of code which will be printed as context before and after, with `inner` +referring to context lines inside a multiline region. + +The second form shares the keywords of the first but allows an explicit source +file and byte range to be supplied. +""" +function highlight(io::IO, x; kws...) + highlight(io, sourcefile(x), byte_range(x); kws...) +end + + +#------------------------------------------------------------------------------- """ SourceFile(code [; filename=nothing, first_line=1, first_index=1]) @@ -53,16 +160,19 @@ function _source_line_index(source::SourceFile, byte_index) end _source_line(source::SourceFile, lineidx) = lineidx + source.first_line - 1 -""" -Get the line number at the given byte index. -""" -source_line(source::SourceFile, byte_index) = +function source_location(::Type{LineNumberNode}, x) + source_location(LineNumberNode, sourcefile(x), first_byte(x)) +end + +source_line(source::SourceFile, byte_index::Integer) = _source_line(source, _source_line_index(source, byte_index)) -""" -Get line number and character within the line at the given byte index. -""" -function source_location(source::SourceFile, byte_index) +function filename(source::SourceFile) + f = source.filename + !isnothing(f) ? f : "" +end + +function source_location(source::SourceFile, byte_index::Integer) lineidx = _source_line_index(source, byte_index) i = source.line_starts[lineidx] column = 1 @@ -77,7 +187,7 @@ end Get byte range of the source line at byte_index, buffered by `context_lines_before` and `context_lines_after` before and after. """ -function source_line_range(source::SourceFile, byte_index; +function source_line_range(source::SourceFile, byte_index::Integer; context_lines_before=0, context_lines_after=0) lineidx = _source_line_index(source, byte_index) fbyte = source.line_starts[max(lineidx-context_lines_before, 1)] @@ -86,14 +196,14 @@ function source_line_range(source::SourceFile, byte_index; lbyte + source.byte_offset) end -function source_location(::Type{LineNumberNode}, source::SourceFile, byte_index) - LineNumberNode(source_line(source, byte_index), - isnothing(source.filename) ? nothing : Symbol(source.filename)) +function source_location(::Type{LineNumberNode}, source::SourceFile, byte_index::Integer) + fn = filename(source) + LineNumberNode(source_line(source, byte_index), isempty(fn) ? nothing : Symbol(fn)) end function Base.show(io::IO, ::MIME"text/plain", source::SourceFile) - fn = isnothing(source.filename) ? "" : " $(source.filename)" - header = "## SourceFile$fn ##" + fn = filename(source) + header = "## SourceFile$(isempty(fn) ? "" : " ")$fn ##" print(io, header, "\n") heightlim = displaysize(io)[1] ÷ 2 if !get(io, :limit, false) || length(source.line_starts) <= heightlim @@ -193,27 +303,6 @@ function _print_marker_line(io, prefix_str, str, underline, singleline, color, end end -function highlight(io::IO, x; kws...) - highlight(io, sourcefile(x), byte_range(x); kws...) -end - -""" - highlight(io::IO, source::SourceFile, range::UnitRange; - color, note, notecolor, - context_lines_before, context_lines_inner, context_lines_after, - highlight(io, x; kws...) - -Print the lines of source code `source` surrounding the given byte `range` -which is highlighted with background `color` and underlined with markers in the -text. A `note` in `notecolor` may be provided as annotation. - -In the second form, `x` is an object with `sourcefile(x)` and `byte_range(x)` -implemented. - -The context arguments `context_lines_before`, etc, refer to the number of -lines of code which will be printed as context before and after, with `inner` -referring to context lines inside a multiline region. -""" function highlight(io::IO, source::SourceFile, range::UnitRange; color=(120,70,70), context_lines_before=2, context_lines_inner=1, context_lines_after=2, diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index d885af806ac28..dc9a73754ac76 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -121,31 +121,13 @@ head(node::AbstractSyntaxNode) = head(node.raw) span(node::AbstractSyntaxNode) = span(node.raw) -first_byte(node::AbstractSyntaxNode) = node.position -last_byte(node::AbstractSyntaxNode) = node.position + span(node) - 1 +byte_range(node::AbstractSyntaxNode) = node.position:(node.position + span(node) - 1) -""" - byte_range(ex) - -Return the range of bytes which `ex` covers in the source text. -""" -byte_range(ex) = first_byte(ex):last_byte(ex) - -""" - sourcetext(node) - -Get the full source text of a node. -""" -function sourcetext(node::AbstractSyntaxNode) - view(sourcefile(node), byte_range(node)) -end - -source_line(node::AbstractSyntaxNode) = source_line(sourcefile(node), node.position) -source_location(node::AbstractSyntaxNode) = source_location(sourcefile(node), node.position) +sourcefile(node::AbstractSyntaxNode) = node.source function _show_syntax_node(io, current_filename, node::AbstractSyntaxNode, indent, show_byte_offsets) - fname = sourcefile(node).filename + fname = filename(node) line, col = source_location(node) posstr = "$(lpad(line, 4)):$(rpad(col,3))│" if show_byte_offsets @@ -192,7 +174,7 @@ end function Base.show(io::IO, ::MIME"text/plain", node::AbstractSyntaxNode; show_byte_offsets=false) println(io, "line:col│$(show_byte_offsets ? " byte_range │" : "") tree │ file_name") - _show_syntax_node(io, Ref{Union{Nothing,String}}(nothing), node, "", show_byte_offsets) + _show_syntax_node(io, Ref(""), node, "", show_byte_offsets) end function Base.show(io::IO, ::MIME"text/x.sexpression", node::AbstractSyntaxNode) diff --git a/JuliaSyntax/test/hooks.jl b/JuliaSyntax/test/hooks.jl index d5944a04ccff5..3593911953984 100644 --- a/JuliaSyntax/test/hooks.jl +++ b/JuliaSyntax/test/hooks.jl @@ -35,13 +35,13 @@ end JuliaSyntax.core_parser_hook("[x)", "f1", 1, 0, :statement) ) @test err isa JuliaSyntax.ParseError - @test err.source.filename == "f1" + @test filename(err) == "f1" @test err.source.first_line == 1 err = _unwrap_parse_error( JuliaSyntax.core_parser_hook("[x)", "f2", 2, 0, :statement) ) @test err isa JuliaSyntax.ParseError - @test err.source.filename == "f2" + @test filename(err) == "f2" @test err.source.first_line == 2 # Errors including nontrivial offset indices diff --git a/JuliaSyntax/test/test_utils.jl b/JuliaSyntax/test/test_utils.jl index 69915af2f5b1a..b16aef3120871 100644 --- a/JuliaSyntax/test/test_utils.jl +++ b/JuliaSyntax/test/test_utils.jl @@ -35,7 +35,8 @@ using .JuliaSyntax: fl_parse, highlight, tokenize, - untokenize + untokenize, + filename if VERSION < v"1.6" # Compat stuff which might not be in Base for older versions From c333179993d888b877f81f680a851fe29fad7543 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Mon, 22 Jul 2024 15:10:57 +1000 Subject: [PATCH 0796/1109] Update to allow use of JuliaSyntax `main` branch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit More progress toward this becoming less of a giant hack 🎉 - no more JuliaSyntax custom branch! --- JuliaLowering/README.md | 2 +- JuliaLowering/src/JuliaLowering.jl | 2 +- JuliaLowering/src/syntax_graph.jl | 40 +++++++++++------------------- 3 files changed, 17 insertions(+), 27 deletions(-) diff --git a/JuliaLowering/README.md b/JuliaLowering/README.md index d9536dd8186d0..3c4db69f34abd 100644 --- a/JuliaLowering/README.md +++ b/JuliaLowering/README.md @@ -29,7 +29,7 @@ This work is intended to Note this is a very early work in progress; most things probably don't work! 1. Use a recent dev version of Julia (need at least version 1.12.0-DEV.512) -2. Check out the caf/lowering-2 branch of [JuliaSyntax](https://github.com/JuliaLang/JuliaSyntax.jl) +2. Check out the main branch of [JuliaSyntax](https://github.com/JuliaLang/JuliaSyntax.jl) 3. Get the latest version of [JuliaSyntaxFormatter](https://github.com/c42f/JuliaSyntaxFormatter.jl) 4. Run the demo `include("test/demo.jl")` diff --git a/JuliaLowering/src/JuliaLowering.jl b/JuliaLowering/src/JuliaLowering.jl index 9aeed1090238b..bf6546105d0d1 100644 --- a/JuliaLowering/src/JuliaLowering.jl +++ b/JuliaLowering/src/JuliaLowering.jl @@ -11,7 +11,7 @@ using JuliaSyntax using JuliaSyntax: highlight, Kind, @KSet_str using JuliaSyntax: haschildren, children, child, numchildren, head, kind, flags, has_flags -using JuliaSyntax: filename, first_byte, last_byte, source_location, span, sourcetext +using JuliaSyntax: filename, first_byte, last_byte, byte_range, sourcefile, source_location, span, sourcetext using JuliaSyntax: is_literal, is_number, is_operator, is_prec_assignment, is_infix_op_call, is_postfix_op_call, is_error diff --git a/JuliaLowering/src/syntax_graph.jl b/JuliaLowering/src/syntax_graph.jl index 8401b6e6e2037..faafbd0c40935 100644 --- a/JuliaLowering/src/syntax_graph.jl +++ b/JuliaLowering/src/syntax_graph.jl @@ -276,21 +276,22 @@ struct SourceRef green_tree::JuliaSyntax.GreenNode end -JuliaSyntax.first_byte(src::SourceRef) = src.first_byte -JuliaSyntax.last_byte(src::SourceRef) = src.first_byte + span(src.green_tree) - 1 -JuliaSyntax.filename(src::SourceRef) = filename(src.file) -JuliaSyntax.source_location(::Type{LineNumberNode}, src::SourceRef) = source_location(LineNumberNode, src.file, src.first_byte) -JuliaSyntax.source_location(src::SourceRef) = source_location(src.file, src.first_byte) -JuliaSyntax.sourcetext(src::SourceRef) = src.file[first_byte(src):last_byte(src)] +JuliaSyntax.sourcefile(src::SourceRef) = src.file +JuliaSyntax.byte_range(src::SourceRef) = src.first_byte:(src.first_byte + span(src.green_tree) - 1) # TODO: Adding these methods to support LineNumberNode is kind of hacky but we # can remove these after JuliaLowering becomes self-bootstrapping for macros # and we a proper SourceRef for @ast's @HERE form. -JuliaSyntax.first_byte(src::LineNumberNode) = 0 -JuliaSyntax.last_byte(src::LineNumberNode) = 0 -JuliaSyntax.filename(src::LineNumberNode) = string(src.file) -JuliaSyntax.source_location(::Type{LineNumberNode}, src::LineNumberNode) = src +JuliaSyntax.byte_range(src::LineNumberNode) = 0:0 JuliaSyntax.source_location(src::LineNumberNode) = (src.line, 0) +JuliaSyntax.source_location(::Type{LineNumberNode}, src::LineNumberNode) = src +JuliaSyntax.source_line(src::LineNumberNode) = src.line +# The follow somewhat strange cases are for where LineNumberNode is standing in +# for SourceFile because we've only got Expr-based provenance info +JuliaSyntax.sourcefile(src::LineNumberNode) = src +JuliaSyntax.source_location(src::LineNumberNode, byte_index::Integer) = (src.line, 0) +JuliaSyntax.source_location(::Type{LineNumberNode}, src::LineNumberNode, byte_index::Integer) = src +JuliaSyntax.filename(src::LineNumberNode) = string(src.file) function JuliaSyntax.highlight(io::IO, src::LineNumberNode; note="") print(io, src, " - ", note, "\n") @@ -384,13 +385,6 @@ function is_ancestor(ex, ancestor) end end -JuliaSyntax.filename(ex::SyntaxTree) = filename(sourceref(ex)) -JuliaSyntax.source_location(::Type{LineNumberNode}, ex::SyntaxTree) = source_location(LineNumberNode, sourceref(ex)) -JuliaSyntax.source_location(ex::SyntaxTree) = source_location(sourceref(ex)) -JuliaSyntax.first_byte(ex::SyntaxTree) = first_byte(sourceref(ex)) -JuliaSyntax.last_byte(ex::SyntaxTree) = last_byte(sourceref(ex)) -JuliaSyntax.sourcetext(ex::SyntaxTree) = sourcetext(sourceref(ex)) - const SourceAttrType = Union{SourceRef,LineNumberNode,NodeId,Tuple} function SyntaxTree(graph::SyntaxGraph, node::SyntaxNode) @@ -514,7 +508,9 @@ function JuliaSyntax.build_tree(::Type{SyntaxTree}, stream::JuliaSyntax.ParseStr SyntaxTree(JuliaSyntax.build_tree(SyntaxNode, stream; kws...)) end -#------------------------------------------------------------------------------- +JuliaSyntax.sourcefile(ex::SyntaxTree) = sourcefile(sourceref(ex)) +JuliaSyntax.byte_range(ex::SyntaxTree) = byte_range(sourceref(ex)) + function JuliaSyntax._expr_leaf_val(ex::SyntaxTree) name = get(ex, :name_val, nothing) if !isnothing(name) @@ -524,13 +520,7 @@ function JuliaSyntax._expr_leaf_val(ex::SyntaxTree) end end -function JuliaSyntax._sourcefile(ex::SyntaxTree) - sourceref(ex).file -end - -function Base.Expr(ex::SyntaxTree) - JuliaSyntax.to_expr(ex) -end +Base.Expr(ex::SyntaxTree) = JuliaSyntax.to_expr(ex) #------------------------------------------------------------------------------- # Lightweight vector of nodes ids with associated pointer to graph stored separately. From 2b7070a066ae7dbf74fbbfb5fa8907102d1f1ea3 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Tue, 23 Jul 2024 09:23:32 +1000 Subject: [PATCH 0797/1109] Make trivial interpolations like `:($x)` work Perhaps surprisingly this is a special case because there's no enclosing expression. --- JuliaLowering/src/macro_expansion.jl | 3 +++ JuliaLowering/src/runtime.jl | 15 ++++++++++---- JuliaLowering/test/demo.jl | 6 +++++- JuliaLowering/test/loops.jl | 4 ++-- JuliaLowering/test/quoting.jl | 30 ++++++++++++++++++++++++++++ JuliaLowering/test/runtests.jl | 4 ++-- JuliaLowering/test/utils.jl | 2 +- 7 files changed, 54 insertions(+), 10 deletions(-) diff --git a/JuliaLowering/src/macro_expansion.jl b/JuliaLowering/src/macro_expansion.jl index be40fa9bbc5b1..e95e965180aec 100644 --- a/JuliaLowering/src/macro_expansion.jl +++ b/JuliaLowering/src/macro_expansion.jl @@ -25,6 +25,9 @@ end # Expansion of quoted expressions function collect_unquoted!(ctx, unquoted, ex, depth) if kind(ex) == K"$" && depth == 0 + # children(ex) is usually length 1, but for double interpolation it may + # be longer and the children may contain K"..." expressions. Wrapping + # in a tuple groups the arguments together correctly in those cases. push!(unquoted, @ast ctx ex [K"tuple" children(ex)...]) else inner_depth = kind(ex) == K"quote" ? depth + 1 : diff --git a/JuliaLowering/src/runtime.jl b/JuliaLowering/src/runtime.jl index 0af8e339a9d59..4a636e23662dd 100644 --- a/JuliaLowering/src/runtime.jl +++ b/JuliaLowering/src/runtime.jl @@ -62,9 +62,6 @@ function _interpolate_ast(ctx::InterpolationContext, ex, depth) end function interpolate_ast(ex, values...) - if kind(ex) == K"$" - TODO(ex, "\$ in interpolate_ast") - end # Construct graph for interpolation context. We inherit this from the macro # context where possible by detecting it using __macro_ctx__. This feels # hacky though. @@ -88,7 +85,17 @@ function interpolate_ast(ex, values...) # We must copy the AST into our context to use it as the source reference # of generated expressions. ex1 = copy_ast(ctx, ex) - _interpolate_ast(ctx, ex1, 0) + if kind(ex1) == K"$" + @assert length(values) == 1 + vs = values[1] + if length(vs) > 1 + # :($($(xs...))) where xs is more than length 1 + throw(LoweringError(ex1, "More than one value in bare `\$` expression")) + end + _interpolated_value(ctx, ex1, only(vs)) + else + _interpolate_ast(ctx, ex1, 0) + end end # Construct new bare module including only the "default names" diff --git a/JuliaLowering/test/demo.jl b/JuliaLowering/test/demo.jl index d98f6b405e199..f63ebb8ed042d 100644 --- a/JuliaLowering/test/demo.jl +++ b/JuliaLowering/test/demo.jl @@ -260,7 +260,11 @@ src = """ """ src = """ -x = :hi +begin + a = 1 + xs = [:(a),] + x = :(:(\$(\$(xs...)))) +end """ ex = parsestmt(SyntaxTree, src, filename="foo.jl") diff --git a/JuliaLowering/test/loops.jl b/JuliaLowering/test/loops.jl index cfb04c46c4bb4..a5a9c0fb0d93a 100644 --- a/JuliaLowering/test/loops.jl +++ b/JuliaLowering/test/loops.jl @@ -45,11 +45,11 @@ let end """) == [2,4] -@test_throws JuliaLowering.LoweringError JuliaLowering.include_string(test_mod, """ +@test_throws LoweringError JuliaLowering.include_string(test_mod, """ break """) -@test_throws JuliaLowering.LoweringError JuliaLowering.include_string(test_mod, """ +@test_throws LoweringError JuliaLowering.include_string(test_mod, """ continue """) diff --git a/JuliaLowering/test/quoting.jl b/JuliaLowering/test/quoting.jl index 599db250c3d14..ac9ee12b721ff 100644 --- a/JuliaLowering/test/quoting.jl +++ b/JuliaLowering/test/quoting.jl @@ -57,6 +57,16 @@ let end """)) +# Test that trivial interpolation without any nesting works. +ex = JuliaLowering.include_string(test_mod, """ +let + x = 123 + :(\$x) +end +""") +@test kind(ex) == K"Value" +@test ex.value == 123 + # interpolations at multiple depths ex = JuliaLowering.include_string(test_mod, """ let @@ -102,4 +112,24 @@ ex2 = JuliaLowering.eval(test_mod, ex) @test JuliaLowering.include_string(test_mod, ":x") isa Symbol @test JuliaLowering.include_string(test_mod, ":(x)") isa SyntaxTree +# Double interpolation +ex = JuliaLowering.include_string(test_mod, """ +let + args = (:(xxx),) + :(:(\$\$(args...))) +end +""") +Base.eval(test_mod, :(xxx = 111)) +ex2 = JuliaLowering.eval(test_mod, ex) +@test kind(ex2) == K"Value" +@test ex2.value == 111 + +double_interp_ex = JuliaLowering.include_string(test_mod, """ +let + args = (:(x), :(y)) + :(:(\$\$(args...))) +end +""") +@test_throws LoweringError JuliaLowering.eval(test_mod, double_interp_ex) + end diff --git a/JuliaLowering/test/runtests.jl b/JuliaLowering/test/runtests.jl index bb96111abb3b0..e057b2a61b781 100644 --- a/JuliaLowering/test/runtests.jl +++ b/JuliaLowering/test/runtests.jl @@ -292,12 +292,12 @@ end == [ ] -@test_throws JuliaLowering.LoweringError JuliaLowering.include_string(test_mod, """ +@test_throws LoweringError JuliaLowering.include_string(test_mod, """ macro mmm(a; b=2) end """) -@test_throws JuliaLowering.LoweringError JuliaLowering.include_string(test_mod, """ +@test_throws LoweringError JuliaLowering.include_string(test_mod, """ macro A.b(ex) end """) diff --git a/JuliaLowering/test/utils.jl b/JuliaLowering/test/utils.jl index 49e4d30964a8a..fd8d532befe8b 100644 --- a/JuliaLowering/test/utils.jl +++ b/JuliaLowering/test/utils.jl @@ -10,7 +10,7 @@ using JuliaLowering: Kind, SourceRef, SyntaxTree, NodeId, makenode, makeleaf, setattr!, sethead!, haschildren, numchildren, children, - @ast, flattened_provenance, showprov + @ast, flattened_provenance, showprov, LoweringError function _ast_test_graph() graph = SyntaxGraph() From 8820d46c2a06d4c43b3d8c744f6f95318d10b0ed Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Tue, 23 Jul 2024 11:02:50 +1000 Subject: [PATCH 0798/1109] Add a pile of documentation for expression predicates (JuliaLang/JuliaSyntax.jl#471) --- JuliaSyntax/docs/src/api.md | 54 +++++++++++++++++-- JuliaSyntax/src/parse_stream.jl | 96 ++++++++++++++++++++++++++++++--- 2 files changed, 137 insertions(+), 13 deletions(-) diff --git a/JuliaSyntax/docs/src/api.md b/JuliaSyntax/docs/src/api.md index da35c9412922a..e68bf1cf1b56a 100644 --- a/JuliaSyntax/docs/src/api.md +++ b/JuliaSyntax/docs/src/api.md @@ -63,18 +63,62 @@ JuliaSyntax.SourceFile JuliaSyntax.source_line_range ``` -## Expression heads/kinds +## Expression predicates, kinds and flags + +Expressions are tagged with a kind - like a type, but represented as an integer +tag rather than a full Julia type for efficiency. (Very like the tag of a "sum +type".) `Kind`s are constructed with the `@K_str` macro. ```@docs -JuliaSyntax.Kind -JuliaSyntax.SyntaxHead JuliaSyntax.@K_str +JuliaSyntax.Kind +``` + +The kind of an expression `ex` in a tree should be accessed with `kind(ex)` + +```@docs JuliaSyntax.kind -JuliaSyntax.head +``` + +In addition to the `kind`, a small integer set of "flags" is included to +further distinguish details of each expresssion, accessed with the `flags` +function. The kind and flags can be wrapped into a `SyntaxHead` which is +accessed with the `head` function. + +```@docs JuliaSyntax.flags +JuliaSyntax.SyntaxHead +JuliaSyntax.head ``` -see also predicates related to `flags`. +Details about the flags may be extracted using various predicates: + +```@docs +JuliaSyntax.is_trivia +JuliaSyntax.is_prefix_call +JuliaSyntax.is_infix_op_call +JuliaSyntax.is_prefix_op_call +JuliaSyntax.is_postfix_op_call +JuliaSyntax.is_dotted +JuliaSyntax.is_suffixed +JuliaSyntax.is_decorated +JuliaSyntax.numeric_flags +``` + +Some of the more unusual predicates are accessed merely with `has_flags(x, +flag_bits)`, where any of the following uppercase constants may be used for +`flag_bits` after checking that the `kind` is correct. + +```@docs +JuliaSyntax.has_flags +JuliaSyntax.TRIPLE_STRING_FLAG +JuliaSyntax.RAW_STRING_FLAG +JuliaSyntax.PARENS_FLAG +JuliaSyntax.COLON_QUOTE +JuliaSyntax.TOPLEVEL_SEMICOLONS_FLAG +JuliaSyntax.MUTABLE_FLAG +JuliaSyntax.BARE_MODULE_FLAG +``` ## Syntax trees diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index dc2192f9cb152..b4ddbeafe8c01 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -24,25 +24,43 @@ const POSTFIX_OP_FLAG = RawFlags(3<<3) # The following flags are quite head-specific and may overlap -# Set when K"string" or K"cmdstring" was triple-delimited as with """ or ``` +""" +Set when K"string" or K"cmdstring" was triple-delimited as with \"\"\" or ``` +""" const TRIPLE_STRING_FLAG = RawFlags(1<<5) -# Set when a K"string", K"cmdstring" or K"Identifier" needs raw string unescaping + +""" +Set when a K"string", K"cmdstring" or K"Identifier" needs raw string unescaping +""" const RAW_STRING_FLAG = RawFlags(1<<6) -# Set for K"tuple", K"block" or K"macrocall" which are delimited by parentheses +""" +Set for K"tuple", K"block" or K"macrocall" which are delimited by parentheses +""" const PARENS_FLAG = RawFlags(1<<5) -# Set for K"quote" for the short form `:x` as oppsed to long form `quote x end` + +""" +Set for K"quote" for the short form `:x` as oppsed to long form `quote x end` +""" const COLON_QUOTE = RawFlags(1<<5) -# Set for K"toplevel" which is delimited by parentheses + +""" +Set for K"toplevel" which is delimited by parentheses +""" const TOPLEVEL_SEMICOLONS_FLAG = RawFlags(1<<5) -# Set for K"struct" when mutable +""" +Set for K"struct" when mutable +""" const MUTABLE_FLAG = RawFlags(1<<5) -# Set for K"module" when it's not bare (`module`, not `baremodule`) +""" +Set for K"module" when it's not bare (`module`, not `baremodule`) +""" const BARE_MODULE_FLAG = RawFlags(1<<5) # Flags holding the dimension of an nrow or other UInt8 not held in the source +# TODO: Given this is only used for nrow/ncat, we could actually use all the flags? const NUMERIC_FLAGS = RawFlags(RawFlags(0xff)<<8) function set_numeric_flags(n::Integer) @@ -65,7 +83,11 @@ function remove_flags(n::RawFlags, fs...) RawFlags(n & ~(RawFlags((|)(fs...)))) end -# Return true if any of `test_flags` are set +""" + has_flags(x, test_flags) + +Return true if any of `test_flags` are set. +""" has_flags(flags::RawFlags, test_flags) = (flags & test_flags) != 0 #------------------------------------------------------------------------------- @@ -145,14 +167,72 @@ flags(x) = flags(head(x)) has_flags(x, test_flags) = has_flags(flags(x), test_flags) call_type_flags(x) = call_type_flags(flags(x)) +""" + is_trivia(x) + +Return true for "syntax trivia": tokens in the tree which are either largely +invisible to the parser (eg, whitespace) or implied by the structure of the AST +(eg, reserved words). +""" is_trivia(x) = has_flags(x, TRIVIA_FLAG) + +""" + is_prefix_call(x) + +Return true for normal prefix function call syntax such as the `f` call node +parsed from `f(x)`. +""" is_prefix_call(x) = call_type_flags(x) == PREFIX_CALL_FLAG + +""" + is_infix_op_call(x) + +Return true for infix operator calls such as the `+` call node parsed from +`x + y`. +""" is_infix_op_call(x) = call_type_flags(x) == INFIX_FLAG + +""" + is_prefix_op_call(x) + +Return true for prefix operator calls such as the `+` call node parsed from `+x`. +""" is_prefix_op_call(x) = call_type_flags(x) == PREFIX_OP_FLAG + +""" + is_postfix_op_call(x) + +Return true for postfix operator calls such as the `'ᵀ` call node parsed from `x'ᵀ`. +""" is_postfix_op_call(x) = call_type_flags(x) == POSTFIX_OP_FLAG + +""" + is_dotted(x) + +Return true for dotted syntax tokens +""" is_dotted(x) = has_flags(x, DOTOP_FLAG) + +""" + is_suffixed(x) + +Return true for operators which have sufficies, such as `+₁` +""" is_suffixed(x) = has_flags(x, SUFFIXED_FLAG) + +""" + is_decorated(x) + +Return true for operators which are decorated with a dot or suffix. +""" is_decorated(x) = is_dotted(x) || is_suffixed(x) + +""" + numeric_flags(x) + +Return the number attached to a `SyntaxHead`. This is only for kinds `K"nrow"` +and `K"ncat"`, for now. +""" numeric_flags(x) = numeric_flags(flags(x)) #------------------------------------------------------------------------------- From 78f589f3ae95e5aca99f4893c203fdef6829bb3a Mon Sep 17 00:00:00 2001 From: Eric Hanson <5846501+ericphanson@users.noreply.github.com> Date: Tue, 23 Jul 2024 04:56:44 +0200 Subject: [PATCH 0799/1109] rm `is_error` import (JuliaLang/JuliaSyntax.jl#420) --- JuliaSyntax/src/tokenize.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaSyntax/src/tokenize.jl b/JuliaSyntax/src/tokenize.jl index 0f60309fa899a..18440e98e878e 100644 --- a/JuliaSyntax/src/tokenize.jl +++ b/JuliaSyntax/src/tokenize.jl @@ -5,7 +5,7 @@ export tokenize, untokenize using ..JuliaSyntax: JuliaSyntax, Kind, @K_str, @KSet_str import ..JuliaSyntax: kind, - is_literal, is_error, is_contextual_keyword, is_word_operator + is_literal, is_contextual_keyword, is_word_operator #------------------------------------------------------------------------------- # Character-based predicates for tokenization From 7768876e5cd4a3455bb73eb6e67cae6823488db5 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Tue, 23 Jul 2024 14:10:22 +1000 Subject: [PATCH 0800/1109] Test tricky field interpolation + improve error message for misplaced `$` --- JuliaLowering/src/desugaring.jl | 2 +- JuliaLowering/test/quoting.jl | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 7b52050508c65..7cd9cf241600b 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -757,7 +757,7 @@ function expand_forms_2(ctx::DesugaringContext, ex::SyntaxTree, docs=nothing) children(ex)... ]) elseif k == K"$" - throw(LoweringError(ex, "`\$` expression outside quote")) + throw(LoweringError(ex, "`\$` expression outside string or quote block")) elseif k == K"module" # TODO: check-toplevel expand_module(ctx, ex) diff --git a/JuliaLowering/test/quoting.jl b/JuliaLowering/test/quoting.jl index ac9ee12b721ff..0d747654a9e86 100644 --- a/JuliaLowering/test/quoting.jl +++ b/JuliaLowering/test/quoting.jl @@ -67,6 +67,17 @@ end @test kind(ex) == K"Value" @test ex.value == 123 +# Test that interpolation with field access works +# (the field name can be interpolated into +ex = JuliaLowering.include_string(test_mod, """ +let + field_name = :(a) + :(a.\$field_name) +end +""") +@test kind(ex[2]) == K"Identifier" +@test ex[2].name_val == "a" + # interpolations at multiple depths ex = JuliaLowering.include_string(test_mod, """ let From 8bef1ed800bb8bc70853b49bed573f1afa78e971 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Wed, 24 Jul 2024 12:53:18 +1000 Subject: [PATCH 0801/1109] Desugaring of try-catch-else-finally --- JuliaLowering/src/desugaring.jl | 80 +++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 7cd9cf241600b..c3ce65c604e0f 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -258,6 +258,84 @@ function expand_for(ctx, ex) ] end +function match_try(ex) + @chk numchildren(ex) > 1 "Invalid `try` form" + try_ = ex[1] + catch_and_exc = nothing + exc_var = nothing + catch_ = nothing + finally_ = nothing + else_ = nothing + for e in ex[2:end] + k = kind(e) + if k == K"catch" && isnothing(catch_) + @chk numchildren(e) == 2 "Invalid `catch` form" + if !(kind(e[1]) == K"Bool" && e[1].value === false) + # TODO: Fix this strange AST wart upstream? + exc_var = e[1] + end + catch_ = e[2] + catch_and_exc = e + elseif k == K"else" && isnothing(else_) + @chk numchildren(e) == 1 + else_ = e[1] + elseif k == K"finally" && isnothing(finally_) + @chk numchildren(e) == 1 + finally_ = e[1] + else + throw(LoweringError(ex, "Invalid clause in `try` form")) + end + end + (try_, catch_and_exc, exc_var, catch_, else_, finally_) +end + +function expand_try(ctx, ex) + (try_, catch_and_exc, exc_var, catch_, else_, finally_) = match_try(ex) + + if !isnothing(finally_) + # TODO: check unmatched symbolic gotos in try. + end + + try_body = @ast ctx try_ [K"scope_block"(scope_type=:neutral) try_] + + if isnothing(catch_) + try_block = try_body + else + if !isnothing(exc_var) && !is_identifier_like(exc_var) + throw(LoweringError(exc_var, "Expected an identifier as exception variable")) + end + try_block = @ast ctx ex [K"trycatchelse" + try_body + [K"scope_block"(catch_and_exc, scope_type=:neutral) + if !isnothing(exc_var) + if !is_identifier_like(exc_var) + throw(LoweringError(exc_var, "Expected an identifier as exception variable")) + end + [K"block" + [K"="(exc_var) exc_var [K"the_exception"]] + catch_ + ] + else + catch_ + end + ] + if !isnothing(else_) + [K"scope_block"(else_, scope_type=:neutral) else_] + end + ] + end + + # Add finally block + if isnothing(finally_) + try_block + else + @ast ctx ex [K"tryfinally" + try_block + [K"scope_block"(finally_, scope_type=:neutral) finally_] + ] + end +end + # Strip variable type declarations from within a `local` or `global`, returning # the stripped expression. Works recursively with complex left hand side # assignments containing tuple destructuring. Eg, given @@ -750,6 +828,8 @@ function expand_forms_2(ctx::DesugaringContext, ex::SyntaxTree, docs=nothing) expand_forms_2(ctx, children(ex))... ] end + elseif k == K"try" + expand_try(ctx, ex) elseif k == K"tuple" # TODO: named tuples expand_forms_2(ctx, @ast ctx ex [K"call" From d1d6cedd2264ba1d5859d3526e45ac2e29972f03 Mon Sep 17 00:00:00 2001 From: Eric Hanson <5846501+ericphanson@users.noreply.github.com> Date: Wed, 24 Jul 2024 12:39:05 +0200 Subject: [PATCH 0802/1109] add "some warnings detected" to ParseError printing when all problems are warnings (JuliaLang/JuliaSyntax.jl#423) * try to make warnings clearer * Apply suggestions from code review * Update test/hooks.jl --- JuliaSyntax/src/parser_api.jl | 5 ++++- JuliaSyntax/test/parser_api.jl | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/JuliaSyntax/src/parser_api.jl b/JuliaSyntax/src/parser_api.jl index fb805aa776c8e..95f077ef7c1c5 100644 --- a/JuliaSyntax/src/parser_api.jl +++ b/JuliaSyntax/src/parser_api.jl @@ -16,13 +16,16 @@ function ParseError(stream::ParseStream; incomplete_tag=:none, kws...) end function Base.showerror(io::IO, err::ParseError) - println(io, "ParseError:") # Only show the first parse error for now - later errors are often # misleading due to the way recovery works i = findfirst(is_error, err.diagnostics) if isnothing(i) i = lastindex(err.diagnostics) + level_info = " some warnings detected:" + else + level_info = "" end + println(io, "ParseError:", level_info) show_diagnostics(io, err.diagnostics[1:i], err.source) end diff --git a/JuliaSyntax/test/parser_api.jl b/JuliaSyntax/test/parser_api.jl index 9e05dee151e5a..11570ce9242c4 100644 --- a/JuliaSyntax/test/parser_api.jl +++ b/JuliaSyntax/test/parser_api.jl @@ -163,7 +163,7 @@ end catch exc @test exc isa JuliaSyntax.ParseError @test sprint(showerror, exc) == """ - ParseError: + ParseError: some warnings detected: # Warning @ somefile.jl:1:2 @(a) #└─┘ ── parenthesizing macro names is unnecessary""" From 0fd012156290fba8cad5a6099eccaeae5c2d7df3 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Fri, 26 Jul 2024 11:33:47 +1000 Subject: [PATCH 0803/1109] Change AST for iterations to use `iteration` kind (JuliaLang/JuliaSyntax.jl#433) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The `=` node which has traditionally been used for iteration specifications in `for` loops and generators doesn't have normal assignment semantics. Let's consider for x in xs body end which has been parsed as `(for (= x xs) (block body))`. Problems: * The iteration does create a binding for `x`, but not to the expression on the right hand side of the `=`. * The user may use `in` or `∈` in the source code rather than `=`. The parser still uses a `=` node for consistency but this only emphasizes that there's something a bit weird going on. So this use of `=` is not assignment; merely assignment-like. In this change, we use `in` instead of `=` and wrap this in an `iteration` node so that all iteration (including over multiple iterators) has the same structure. Thus the `for` loop above parses as `(for (iteration (in x xs)) (block body))` instead. The `cartesian_iteration` head naturally becomes `iteration` instead - being less specific here with the naming seems appropriate in trying to represent the surface syntax; cartesian semantics come later in lowering and a macro may decide to do something else with the iteration spec. These changes are also used for generators. After the changes we have tree structures such as julia> parsestmt(SyntaxNode, "for i in is body end") line:col│ tree │ file_name 1:1 │[for] 1:4 │ [iteration] 1:4 │ [in] 1:5 │ i 1:10 │ is 1:12 │ [block] 1:13 │ body julia> parsestmt(SyntaxNode, "for i in is, j in js body end") line:col│ tree │ file_name 1:1 │[for] 1:4 │ [iteration] 1:4 │ [in] 1:5 │ i 1:10 │ is 1:13 │ [in] 1:14 │ j 1:19 │ js 1:21 │ [block] 1:22 │ body julia> parsestmt(SyntaxNode, "[a for i = is, j = js if z]") line:col│ tree │ file_name 1:1 │[comprehension] 1:2 │ [generator] 1:2 │ a 1:7 │ [filter] 1:7 │ [iteration] 1:7 │ [in] 1:8 │ i 1:12 │ is 1:15 │ [in] 1:16 │ j 1:20 │ js 1:26 │ z julia> parsestmt(SyntaxNode, "[a for i = is for j = js if z]") line:col│ tree │ file_name 1:1 │[comprehension] 1:2 │ [generator] 1:2 │ a 1:7 │ [iteration] 1:7 │ [in] 1:8 │ i 1:12 │ is 1:18 │ [filter] 1:18 │ [iteration] 1:18 │ [in] 1:19 │ j 1:23 │ js 1:29 │ z --- JuliaSyntax/docs/src/reference.md | 20 +++++----- JuliaSyntax/src/expr.jl | 34 ++++++++--------- JuliaSyntax/src/kinds.jl | 2 +- JuliaSyntax/src/parser.jl | 43 ++++++++++----------- JuliaSyntax/test/parser.jl | 62 +++++++++++++++---------------- 5 files changed, 78 insertions(+), 83 deletions(-) diff --git a/JuliaSyntax/docs/src/reference.md b/JuliaSyntax/docs/src/reference.md index 2ae2cef10b1f7..a98662ee4e7f9 100644 --- a/JuliaSyntax/docs/src/reference.md +++ b/JuliaSyntax/docs/src/reference.md @@ -76,7 +76,7 @@ class of tokenization errors and lets the parser deal with them. * Using `try catch else finally end` is parsed with `K"catch"` `K"else"` and `K"finally"` children to avoid the awkwardness of the optional child nodes in the `Expr` representation (#234) * The dotted import path syntax as in `import A.b.c` is parsed with a `K"importpath"` kind rather than `K"."`, because a bare `A.b.c` has a very different nested/quoted expression representation (#244) * We use flags rather than child nodes to represent the difference between `struct` and `mutable struct`, `module` and `baremodule` (#220) -* Multiple iterations within the header of a `for`, as in `for a=as, b=bs body end` are represented with a `cartesian_iterator` head rather than a `block`, as these lists of iterators are neither semantically nor syntactically a sequence of statements. Unlike other uses of `block` (see also generators). +* Iterations are represented with the `iteration` head rather than `=` within the header of a `for`. Thus `for i=is ; body end` parses to `(for (iteration i is) (block body))`. Cartesian iteration as in `for a=as, b=bs body end` are represented with a longer `iteration` block rather than a `block` containing `=` because these lists of iterators are neither semantically nor syntactically a sequence of statements, unlike other uses of `block`. Generators also use the `iteration` head - see information on that below. ## More detail on tree differences @@ -90,8 +90,10 @@ mean ``` for x in xs -for y in ys - push!(xy, collection) + for y in ys + push!(xy, collection) + end +end ``` so the `xy` prefix is in the *body* of the innermost for loop. Following this, @@ -112,8 +114,8 @@ source order. However, our green tree is strictly source-ordered, so we must deviate from the Julia AST. We deal with this by grouping cartesian products of iterators -(separated by commas) within `cartesian_iterator` blocks as in `for` loops, and -use the presence of multiple iterator blocks rather than the `flatten` head to +(separated by commas) within `iteration` blocks as in `for` loops, and +use the length of the `iteration` block rather than the `flatten` head to distinguish flattened iterators. The nested flattens and generators of `Expr` forms are reconstructed later. In this form the tree structure resembles the source much more closely. For example, `(xy for x in xs for y in ys)` is parsed as @@ -121,8 +123,8 @@ source much more closely. For example, `(xy for x in xs for y in ys)` is parsed ``` (generator xy - (= x xs) - (= y ys)) + (iteration x xs) + (iteration y ys)) ``` And the cartesian iteration `(xy for x in xs, y in ys)` is parsed as @@ -130,9 +132,7 @@ And the cartesian iteration `(xy for x in xs, y in ys)` is parsed as ``` (generator xy - (cartesian_iterator - (= x xs) - (= y ys))) + (iteration x xs y ys)) ``` ### Whitespace trivia inside strings diff --git a/JuliaSyntax/src/expr.jl b/JuliaSyntax/src/expr.jl index d600a99b89eda..adda1463ae126 100644 --- a/JuliaSyntax/src/expr.jl +++ b/JuliaSyntax/src/expr.jl @@ -198,6 +198,17 @@ function _extract_do_lambda!(args) end end +function _append_iterspec!(args, ex) + if @isexpr(ex, :iteration) + for iter in ex.args::Vector{Any} + push!(args, Expr(:(=), iter.args...)) + end + else + push!(args, ex) + end + return args +end + # Convert internal node of the JuliaSyntax parse tree to an Expr function _internal_node_to_Expr(source, srcrange, head, childranges, childheads, args) k = kind(head) @@ -301,10 +312,8 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads, # Move parameters blocks to args[2] _reorder_parameters!(args, 2) elseif k == K"for" - a1 = args[1] - if @isexpr(a1, :cartesian_iterator) - args[1] = Expr(:block, a1.args...) - end + iters = _append_iterspec!([], args[1]) + args[1] = length(iters) == 1 ? only(iters) : Expr(:block, iters...) # Add extra line number node for the `end` of the block. This may seem # useless but it affects code coverage. push!(args[2].args, endloc) @@ -360,12 +369,8 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads, # source-ordered `generator` format. gen = args[1] for j = length(args):-1:2 - aj = args[j] - if @isexpr(aj, :cartesian_iterator) - gen = Expr(:generator, gen, aj.args...) - else - gen = Expr(:generator, gen, aj) - end + gen = Expr(:generator, gen) + _append_iterspec!(gen.args, args[j]) if j < length(args) # Additional `for`s flatten the inner generator gen = Expr(:flatten, gen) @@ -374,14 +379,7 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads, return gen elseif k == K"filter" @assert length(args) == 2 - iterspec = args[1] - outargs = Any[args[2]] - if @isexpr(iterspec, :cartesian_iterator) - append!(outargs, iterspec.args) - else - push!(outargs, iterspec) - end - args = outargs + args = _append_iterspec!(Any[args[2]], args[1]) elseif k == K"nrow" || k == K"ncat" # For lack of a better place, the dimension argument to nrow/ncat # is stored in the flags diff --git a/JuliaSyntax/src/kinds.jl b/JuliaSyntax/src/kinds.jl index bf83771641dca..0ef0b815a7fe6 100644 --- a/JuliaSyntax/src/kinds.jl +++ b/JuliaSyntax/src/kinds.jl @@ -1099,7 +1099,7 @@ register_kinds!(JuliaSyntax, 0, [ # Comprehensions "generator" "filter" - "cartesian_iterator" + "iteration" "comprehension" "typed_comprehension" # Container for a single statement/atom plus any trivia and errors diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 8401002c7b103..9a2ac7035716a 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -1075,7 +1075,7 @@ function parse_where_chain(ps0::ParseState, mark) # x where {T,S} ==> (where x (braces T S)) # Also various nonsensical forms permitted # x where {T S} ==> (where x (bracescat (row T S))) - # x where {y for y in ys} ==> (where x (braces (generator y (= y ys)))) + # x where {y for y in ys} ==> (where x (braces (generator y (iteration (in y ys))))) m = position(ps) bump(ps, TRIVIA_FLAG) ckind, cflags = parse_cat(ps, K"}", ps.end_symbol) @@ -1578,7 +1578,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) # T[x y] ==> (typed_hcat T x y) # T[x ; y] ==> (typed_vcat T x y) # T[a b; c d] ==> (typed_vcat T (row a b) (row c d)) - # T[x for x in xs] ==> (typed_comprehension T (generator x (= x xs))) + # T[x for x in xs] ==> (typed_comprehension T (generator x (iteration (in x xs)))) #v1.8: T[a ; b ;; c ; d] ==> (typed_ncat-2 T (nrow-1 a b) (nrow-1 c d)) outk = ckind == K"vect" ? K"ref" : ckind == K"hcat" ? K"typed_hcat" : @@ -1798,8 +1798,8 @@ function parse_resword(ps::ParseState) bump_closing_token(ps, K"end") emit(ps, mark, K"while") elseif word == K"for" - # for x in xs end ==> (for (= x xs) (block)) - # for x in xs, y in ys \n a \n end ==> (for (cartesian_iterator (= x xs) (= y ys)) (block a)) + # for x in xs end ==> (for (iteration (in x xs)) (block)) + # for x in xs, y in ys \n a \n end ==> (for (iteration (in x xs) (in y ys)) (block a)) bump(ps, TRIVIA_FLAG) parse_iteration_specs(ps) parse_block(ps) @@ -2621,11 +2621,11 @@ function parse_iteration_spec(ps::ParseState) if peek_behind(ps).orig_kind == K"outer" if peek_skip_newline_in_gen(ps) in KSet"= in ∈" # Not outer keyword - # outer = rhs ==> (= outer rhs) - # outer <| x = rhs ==> (= (call-i outer <| x) rhs) + # outer = rhs ==> (iteration (in outer rhs)) + # outer <| x = rhs ==> (iteration (in (call-i outer <| x) rhs)) else - # outer i = rhs ==> (= (outer i) rhs) - # outer (x,y) = rhs ==> (= (outer (tuple-p x y)) rhs) + # outer i = rhs ==> (iteration (in (outer i) rhs)) + # outer (x,y) = rhs ==> (iteration (in (outer (tuple-p x y)) rhs)) reset_node!(ps, position(ps), kind=K"outer", flags=TRIVIA_FLAG) parse_pipe_lt(ps) emit(ps, mark, K"outer") @@ -2641,7 +2641,7 @@ function parse_iteration_spec(ps::ParseState) end # Or try parse_pipe_lt ??? end - emit(ps, mark, K"=") + emit(ps, mark, K"in") end # Parse an iteration spec, or a comma separate list of such for for loops and @@ -2649,9 +2649,7 @@ end function parse_iteration_specs(ps::ParseState) mark = position(ps) n_iters = parse_comma_separated(ps, parse_iteration_spec) - if n_iters > 1 - emit(ps, mark, K"cartesian_iterator") - end + emit(ps, mark, K"iteration") end # flisp: parse-space-separated-exprs @@ -2701,19 +2699,19 @@ end # Parse generators # # We represent generators quite differently from `Expr`: -# * Cartesian products of iterators are grouped within cartesian_iterator +# * Iteration variables and their iterators are grouped within K"iteration" # nodes, as in the short form of `for` loops. # * The `generator` kind is used for both cartesian and flattened generators # -# (x for a in as for b in bs) ==> (parens (generator x (= a as) (= b bs))) -# (x for a in as, b in bs) ==> (parens (generator x (cartesian_iterator (= a as) (= b bs)))) -# (x for a in as, b in bs if z) ==> (parens (generator x (filter (cartesian_iterator (= a as) (= b bs)) z))) +# (x for a in as for b in bs) ==> (parens (generator x (iteration (in a as)) (iteration (in b bs)))) +# (x for a in as, b in bs) ==> (parens (generator x (iteration (in a as) (in b bs)))) +# (x for a in as, b in bs if z) ==> (parens (generator x (filter (iteration (in a as) (in b bs)) z))) # # flisp: parse-generator function parse_generator(ps::ParseState, mark) while (t = peek_token(ps); kind(t) == K"for") if !preceding_whitespace(t) - # ((x)for x in xs) ==> (parens (generator (parens x) (error) (= x xs))) + # ((x)for x in xs) ==> (parens (generator (parens x) (error) (iteration (in x xs)))) bump_invisible(ps, K"error", TRIVIA_FLAG, error="Expected space before `for` in generator") end @@ -2721,7 +2719,7 @@ function parse_generator(ps::ParseState, mark) iter_mark = position(ps) parse_iteration_specs(ps) if peek(ps) == K"if" - # (x for a in as if z) ==> (parens (generator x (filter (= a as) z))) + # (x for a in as if z) ==> (parens (generator x (filter (iteration (in a as)) z))) bump(ps, TRIVIA_FLAG) parse_cond(ps) emit(ps, iter_mark, K"filter") @@ -2732,7 +2730,7 @@ end # flisp: parse-comprehension function parse_comprehension(ps::ParseState, mark, closer) - # [x for a in as] ==> (comprehension (generator x a in as)) + # [x for a in as] ==> (comprehension (generator x (iteration (in a as)))) ps = ParseState(ps, whitespace_newline=true, space_sensitive=false, end_symbol=false) @@ -2982,8 +2980,8 @@ function parse_cat(ps::ParseState, closer, end_is_symbol) # [x ==> (vect x (error-t)) parse_vect(ps, closer) elseif k == K"for" - # [x for a in as] ==> (comprehension (generator x (= a as))) - # [x \n\n for a in as] ==> (comprehension (generator x (= a as))) + # [x for a in as] ==> (comprehension (generator x (iteration (in a as)))) + # [x \n\n for a in as] ==> (comprehension (generator x (iteration (in a as)))) parse_comprehension(ps, mark, closer) else # [x y] ==> (hcat x y) @@ -3139,8 +3137,7 @@ function parse_brackets(after_parse::Function, continue elseif k == K"for" # Generator syntax - # (x for a in as) ==> (parens (generator x (= a as))) - # (x \n\n for a in as) ==> (parens (generator x (= a as))) + # (x for a in as) ==> (parens (generator x (iteration (in a as)))) parse_generator(ps, mark) else # Error - recovery done when consuming closing_kind diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 1e4baa665508e..1a157e3d587c8 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -300,7 +300,7 @@ tests = [ "x where \n {T}" => "(where x (braces T))" "x where {T,S}" => "(where x (braces T S))" "x where {T S}" => "(where x (bracescat (row T S)))" - "x where {y for y in ys}" => "(where x (braces (generator y (= y ys))))" + "x where {y for y in ys}" => "(where x (braces (generator y (iteration (in y ys)))))" "x where T" => "(where x T)" "x where \n T" => "(where x T)" "x where T<:S" => "(where x (<: T S))" @@ -389,7 +389,7 @@ tests = [ "T[x y]" => "(typed_hcat T x y)" "T[x ; y]" => "(typed_vcat T x y)" "T[a b; c d]" => "(typed_vcat T (row a b) (row c d))" - "T[x for x in xs]" => "(typed_comprehension T (generator x (= x xs)))" + "T[x for x in xs]" => "(typed_comprehension T (generator x (iteration (in x xs))))" ((v=v"1.8",), "T[a ; b ;; c ; d]") => "(typed_ncat-2 T (nrow-1 a b) (nrow-1 c d))" # Dotted forms @@ -461,8 +461,8 @@ tests = [ "while cond body end" => "(while cond (block body))" "while x < y \n a \n b \n end" => "(while (call-i x < y) (block a b))" # for - "for x in xs end" => "(for (= x xs) (block))" - "for x in xs, y in ys \n a \n end" => "(for (cartesian_iterator (= x xs) (= y ys)) (block a))" + "for x in xs end" => "(for (iteration (in x xs)) (block))" + "for x in xs, y in ys \n a \n end" => "(for (iteration (in x xs) (in y ys)) (block a))" # let "let x=1\n end" => "(let (block (= x 1)) (block))" "let x=1 ; end" => "(let (block (= x 1)) (block))" @@ -670,16 +670,16 @@ tests = [ "import A..." => "(import (importpath A ..))" "import A; B" => "(import (importpath A))" ], - JuliaSyntax.parse_iteration_spec => [ - "i = rhs" => "(= i rhs)" - "i in rhs" => "(= i rhs)" - "i ∈ rhs" => "(= i rhs)" - "i = 1:10" => "(= i (call-i 1 : 10))" - "(i,j) in iter" => "(= (tuple-p i j) iter)" - "outer = rhs" => "(= outer rhs)" - "outer <| x = rhs" => "(= (call-i outer <| x) rhs)" - "outer i = rhs" => "(= (outer i) rhs)" - "outer (x,y) = rhs" => "(= (outer (tuple-p x y)) rhs)" + JuliaSyntax.parse_iteration_specs => [ + "i = rhs" => "(iteration (in i rhs))" + "i in rhs" => "(iteration (in i rhs))" + "i ∈ rhs" => "(iteration (in i rhs))" + "i = 1:10" => "(iteration (in i (call-i 1 : 10)))" + "(i,j) in iter" => "(iteration (in (tuple-p i j) iter))" + "outer = rhs" => "(iteration (in outer rhs))" + "outer <| x = rhs" => "(iteration (in (call-i outer <| x) rhs))" + "outer i = rhs" => "(iteration (in (outer i) rhs))" + "outer (x,y) = rhs" => "(iteration (in (outer (tuple-p x y)) rhs))" ], JuliaSyntax.parse_paren => [ # Tuple syntax with commas @@ -707,8 +707,8 @@ tests = [ "(x)" => "(parens x)" "(a...)" => "(parens (... a))" # Generators - "(x for a in as)" => "(parens (generator x (= a as)))" - "(x \n\n for a in as)" => "(parens (generator x (= a as)))" + "(x for a in as)" => "(parens (generator x (iteration (in a as))))" + "(x \n\n for a in as)" => "(parens (generator x (iteration (in a as))))" # Range parsing in parens "(1:\n2)" => "(parens (call-i 1 : 2))" "(1:2)" => "(parens (call-i 1 : 2))" @@ -776,19 +776,19 @@ tests = [ "[x \n, ]" => "(vect x)" "[x" => "(vect x (error-t))" "[x \n\n ]" => "(vect x)" - "[x for a in as]" => "(comprehension (generator x (= a as)))" - "[x \n\n for a in as]" => "(comprehension (generator x (= a as)))" + "[x for a in as]" => "(comprehension (generator x (iteration (in a as))))" + "[x \n\n for a in as]" => "(comprehension (generator x (iteration (in a as))))" # parse_generator - "(x for a in as for b in bs)" => "(parens (generator x (= a as) (= b bs)))" - "(x for a in as, b in bs)" => "(parens (generator x (cartesian_iterator (= a as) (= b bs))))" - "(x for a in as, b in bs if z)" => "(parens (generator x (filter (cartesian_iterator (= a as) (= b bs)) z)))" - "(x for a in as, b in bs for c in cs, d in ds)" => "(parens (generator x (cartesian_iterator (= a as) (= b bs)) (cartesian_iterator (= c cs) (= d ds))))" - "(x for a in as for b in bs if z)" => "(parens (generator x (= a as) (filter (= b bs) z)))" - "(x for a in as if z for b in bs)" => "(parens (generator x (filter (= a as) z) (= b bs)))" - "[x for a = as for b = bs if cond1 for c = cs if cond2]" => "(comprehension (generator x (= a as) (filter (= b bs) cond1) (filter (= c cs) cond2)))" - "[x for a = as if begin cond2 end]" => "(comprehension (generator x (filter (= a as) (block cond2))))" - "[(x)for x in xs]" => "(comprehension (generator (parens x) (error-t) (= x xs)))" - "(x for a in as if z)" => "(parens (generator x (filter (= a as) z)))" + "(x for a in as for b in bs)" => "(parens (generator x (iteration (in a as)) (iteration (in b bs))))" + "(x for a in as, b in bs)" => "(parens (generator x (iteration (in a as) (in b bs))))" + "(x for a in as, b in bs if z)" => "(parens (generator x (filter (iteration (in a as) (in b bs)) z)))" + "(x for a in as, b in bs for c in cs, d in ds)" => "(parens (generator x (iteration (in a as) (in b bs)) (iteration (in c cs) (in d ds))))" + "(x for a in as for b in bs if z)" => "(parens (generator x (iteration (in a as)) (filter (iteration (in b bs)) z)))" + "(x for a in as if z for b in bs)" => "(parens (generator x (filter (iteration (in a as)) z) (iteration (in b bs))))" + "[x for a = as for b = bs if cond1 for c = cs if cond2]" => "(comprehension (generator x (iteration (in a as)) (filter (iteration (in b bs)) cond1) (filter (iteration (in c cs)) cond2)))" + "[x for a = as if begin cond2 end]" => "(comprehension (generator x (filter (iteration (in a as)) (block cond2))))" + "[(x)for x in xs]" => "(comprehension (generator (parens x) (error-t) (iteration (in x xs))))" + "(x for a in as if z)" => "(parens (generator x (filter (iteration (in a as)) z)))" # parse_vect "[x, y]" => "(vect x y)" "[x, y]" => "(vect x y)" @@ -876,8 +876,8 @@ tests = [ "\"hi\$(\"ho\")\"" => "(string \"hi\" (parens (string \"ho\")))" "\"\$(x,y)\"" => "(string (parens (error x y)))" "\"\$(x;y)\"" => "(string (parens (error x y)))" - "\"\$(x for y in z)\"" => "(string (parens (error (generator x (= y z)))))" - "\"\$((x for y in z))\"" => "(string (parens (parens (generator x (= y z)))))" + "\"\$(x for y in z)\"" => "(string (parens (error (generator x (iteration (in y z))))))" + "\"\$((x for y in z))\"" => "(string (parens (parens (generator x (iteration (in y z))))))" "\"\$(xs...)\"" => "(string (parens (... xs)))" "\"a \$foo b\"" => "(string \"a \" foo \" b\")" "\"\$var\"" => "(string var)" @@ -996,7 +996,7 @@ parsestmt_test_specs = [ ":+'y'" => "(juxtapose (call-post (quote-: +) ') (call-post y '))" # unary subtype ops and newlines "a +\n\n<:" => "(call-i a + <:)" - "for\n\n<:" => "(for (= <: (error (error-t))) (block (error)) (error-t))" + "for\n\n<:" => "(for (iteration (in <: (error (error-t)))) (block (error)) (error-t))" # Empty character consumes trailing ' delimiter (ideally this could be # tested above but we don't require the input stream to be consumed in the # unit tests there. From f83bfd5033714593783dd85de4930dcfd252a457 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Tue, 30 Jul 2024 14:43:07 +1000 Subject: [PATCH 0804/1109] Use `K"function"` for short form function AST (JuliaLang/JuliaSyntax.jl#466) A pain point when writing macros is detecting all the types of things which might be lowered to functions. This is partly due to the existence of short form function definitions which in Julia's classic AST parse with `:(=)` rather than a `:function` head - to detect the meaning of `=`, one needs to traverse recursively into the left hand side of the expression. This change modifies the parsing of short form functions to use the `K"function"` kind. A new syntax flag `SHORT_FORM_FUNCTION_FLAG` is set to enable AST consumers to detect short vs long form functions. --- JuliaSyntax/docs/src/api.md | 1 + JuliaSyntax/src/expr.jl | 39 ++++++++---------- JuliaSyntax/src/parse_stream.jl | 64 ++++++++++++++++++++++-------- JuliaSyntax/src/parser.jl | 22 ++++++++-- JuliaSyntax/test/parse_packages.jl | 8 +++- JuliaSyntax/test/parse_stream.jl | 23 ++++++++--- JuliaSyntax/test/parser.jl | 13 ++++-- JuliaSyntax/test/test_utils.jl | 18 ++++++--- 8 files changed, 132 insertions(+), 56 deletions(-) diff --git a/JuliaSyntax/docs/src/api.md b/JuliaSyntax/docs/src/api.md index e68bf1cf1b56a..eaa5a67e0aa1c 100644 --- a/JuliaSyntax/docs/src/api.md +++ b/JuliaSyntax/docs/src/api.md @@ -118,6 +118,7 @@ JuliaSyntax.COLON_QUOTE JuliaSyntax.TOPLEVEL_SEMICOLONS_FLAG JuliaSyntax.MUTABLE_FLAG JuliaSyntax.BARE_MODULE_FLAG +JuliaSyntax.SHORT_FORM_FUNCTION_FLAG ``` ## Syntax trees diff --git a/JuliaSyntax/src/expr.jl b/JuliaSyntax/src/expr.jl index adda1463ae126..04561a7e9f7fa 100644 --- a/JuliaSyntax/src/expr.jl +++ b/JuliaSyntax/src/expr.jl @@ -28,11 +28,6 @@ macro isexpr(ex, head, nargs) length($(esc(ex)).args) == $(esc(nargs))) end -function is_eventually_call(ex) - return ex isa Expr && (ex.head === :call || - (ex.head === :where || ex.head === :(::)) && is_eventually_call(ex.args[1])) -end - function _reorder_parameters!(args::Vector{Any}, params_pos) p = 0 for i = length(args):-1:1 @@ -233,16 +228,6 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads, if k == K"?" headsym = :if - elseif k == K"=" && !is_decorated(head) - a2 = args[2] - if is_eventually_call(args[1]) - if @isexpr(a2, :block) - pushfirst!(a2.args, loc) - else - # Add block for short form function locations - args[2] = Expr(:block, loc, a2) - end - end elseif k == K"macrocall" do_lambda = _extract_do_lambda!(args) _reorder_parameters!(args, 2) @@ -399,14 +384,22 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads, end elseif k == K"function" if length(args) > 1 - a1 = args[1] - if @isexpr(a1, :tuple) - # Convert to weird Expr forms for long-form anonymous functions. - # - # (function (tuple (... xs)) body) ==> (function (... xs) body) - if length(a1.args) == 1 && (a11 = a1.args[1]; @isexpr(a11, :...)) - # function (xs...) \n body end - args[1] = a11 + if has_flags(head, SHORT_FORM_FUNCTION_FLAG) + a2 = args[2] + if !@isexpr(a2, :block) + args[2] = Expr(:block, a2) + end + headsym = :(=) + else + a1 = args[1] + if @isexpr(a1, :tuple) + # Convert to weird Expr forms for long-form anonymous functions. + # + # (function (tuple (... xs)) body) ==> (function (... xs) body) + if length(a1.args) == 1 && (a11 = a1.args[1]; @isexpr(a11, :...)) + # function (xs...) \n body end + args[1] = a11 + end end end pushfirst!((args[2]::Expr).args, loc) diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index b4ddbeafe8c01..d91c2d29928a0 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -49,6 +49,11 @@ Set for K"toplevel" which is delimited by parentheses """ const TOPLEVEL_SEMICOLONS_FLAG = RawFlags(1<<5) +""" +Set for K"function" in short form definitions such as `f() = 1` +""" +const SHORT_FORM_FUNCTION_FLAG = RawFlags(1<<5) + """ Set for K"struct" when mutable """ @@ -143,6 +148,8 @@ function untokenize(head::SyntaxHead; unique=true, include_flag_suff=true) has_flags(head, COLON_QUOTE) && (str = str*"-:") elseif kind(head) == K"toplevel" has_flags(head, TOPLEVEL_SEMICOLONS_FLAG) && (str = str*"-;") + elseif kind(head) == K"function" + has_flags(head, SHORT_FORM_FUNCTION_FLAG) && (str = str*"-=") elseif kind(head) == K"struct" has_flags(head, MUTABLE_FLAG) && (str = str*"-mut") elseif kind(head) == K"module" @@ -646,17 +653,17 @@ function peek_behind(stream::ParseStream, pos::ParseStreamPosition) end function first_child_position(stream::ParseStream, pos::ParseStreamPosition) + ranges = stream.ranges + @assert pos.range_index > 0 + parent = ranges[pos.range_index] # Find the first nontrivia range which is a child of this range but not a # child of the child c = 0 - @assert pos.range_index > 0 - parent = stream.ranges[pos.range_index] for i = pos.range_index-1:-1:1 - if stream.ranges[i].first_token < parent.first_token + if ranges[i].first_token < parent.first_token break end - if (c == 0 || stream.ranges[i].first_token < stream.ranges[c].first_token) && - !is_trivia(stream.ranges[i]) + if (c == 0 || ranges[i].first_token < ranges[c].first_token) && !is_trivia(ranges[i]) c = i end end @@ -670,19 +677,44 @@ function first_child_position(stream::ParseStream, pos::ParseStreamPosition) end end - if c != 0 - if t != 0 - if stream.ranges[c].first_token > t - # Need a child index strictly before `t`. `c=0` works. - return ParseStreamPosition(t, 0) - else - return ParseStreamPosition(stream.ranges[c].last_token, c) - end - else - return ParseStreamPosition(stream.ranges[c].last_token, c) + if c == 0 || (t != 0 && ranges[c].first_token > t) + # Return leaf node at `t` + return ParseStreamPosition(t, 0) + else + # Return interior node at `c` + return ParseStreamPosition(ranges[c].last_token, c) + end +end + +function last_child_position(stream::ParseStream, pos::ParseStreamPosition) + ranges = stream.ranges + @assert pos.range_index > 0 + parent = ranges[pos.range_index] + # Find the last nontrivia range which is a child of this range + c = 0 + if pos.range_index > 1 + i = pos.range_index-1 + if ranges[i].first_token >= parent.first_token + # Valid child of current range + c = i end + end + + # Find last nontrivia token + t = 0 + for i = parent.last_token:-1:parent.first_token + if !is_trivia(stream.tokens[i]) + t = i + break + end + end + + if c == 0 || (t != 0 && ranges[c].last_token < t) + # Return leaf node at `t` + return ParseStreamPosition(t, 0) else - return ParseStreamPosition(t, c) + # Return interior node at `c` + return ParseStreamPosition(ranges[c].last_token, c) end end diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 9a2ac7035716a..ad99e4c085d55 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -133,6 +133,9 @@ function first_child_position(ps::ParseState, pos::ParseStreamPosition) first_child_position(ps.stream, pos) end +function last_child_position(ps::ParseState, pos::ParseStreamPosition) + last_child_position(ps.stream, pos) +end #------------------------------------------------------------------------------- # Parser Utils @@ -325,6 +328,12 @@ function was_eventually_call(ps::ParseState) return true elseif b.kind == K"where" || b.kind == K"parens" || (b.kind == K"::" && has_flags(b.flags, INFIX_FLAG)) + if b.kind == K"::" + p_last = last_child_position(ps, p) + if p == p_last + return false + end + end p = first_child_position(ps, p) else return false @@ -618,12 +627,19 @@ function parse_assignment_with_initial_ex(ps::ParseState, mark, down::T) where { parse_assignment(ps, down) emit(ps, mark, is_dotted(t) ? K"dotcall" : K"call", INFIX_FLAG) else - # a += b ==> (+= a b) - # a .= b ==> (.= a b) + # f() = 1 ==> (function-= (call f) 1) + # f() .= 1 ==> (.= (call f) 1) + # a += b ==> (+= a b) + # a .= b ==> (.= a b) + is_short_form_func = k == K"=" && !is_dotted(t) && was_eventually_call(ps) bump(ps, TRIVIA_FLAG) bump_trivia(ps) + # Syntax Edition TODO: We'd like to call `down` here when + # is_short_form_func is true, to prevent `f() = 1 = 2` from parsing. parse_assignment(ps, down) - emit(ps, mark, k, flags(t)) + emit(ps, mark, + is_short_form_func ? K"function" : k, + is_short_form_func ? SHORT_FORM_FUNCTION_FLAG : flags(t)) end end diff --git a/JuliaSyntax/test/parse_packages.jl b/JuliaSyntax/test/parse_packages.jl index 26bf39fa4ca6a..ef255d656820b 100644 --- a/JuliaSyntax/test/parse_packages.jl +++ b/JuliaSyntax/test/parse_packages.jl @@ -20,7 +20,13 @@ base_path = let p end @testset "Parse Base at $base_path" begin - test_parse_all_in_path(base_path) + test_parse_all_in_path(base_path) do f + if endswith(f, "gmp.jl") + # Loose comparison due to `f(::g(w) = z) = a` syntax + return exprs_roughly_equal + end + return exprs_equal_no_linenum + end end base_tests_path = joinpath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "test") diff --git a/JuliaSyntax/test/parse_stream.jl b/JuliaSyntax/test/parse_stream.jl index 037e025e3fca0..f7c0bd60a586c 100644 --- a/JuliaSyntax/test/parse_stream.jl +++ b/JuliaSyntax/test/parse_stream.jl @@ -7,7 +7,7 @@ using JuliaSyntax: ParseStream, peek, peek_token, bump, bump_trivia, bump_invisible, emit, emit_diagnostic, TRIVIA_FLAG, INFIX_FLAG, - ParseStreamPosition, first_child_position + ParseStreamPosition, first_child_position, last_child_position # Here we manually issue parse events in the order the Julia parser would issue # them @@ -110,27 +110,40 @@ end st = parse_sexpr("((a b) c)") child1_pos = first_child_position(st, position(st)) @test child1_pos == ParseStreamPosition(7, 1) - child2_pos = first_child_position(st, child1_pos) - @test child2_pos == ParseStreamPosition(4, 0) + @test first_child_position(st, child1_pos) == ParseStreamPosition(4, 0) + @test last_child_position(st, position(st)) == ParseStreamPosition(9, 0) + @test last_child_position(st, child1_pos) == ParseStreamPosition(6, 0) st = parse_sexpr("( (a b) c)") child1_pos = first_child_position(st, position(st)) @test child1_pos == ParseStreamPosition(8, 1) - child2_pos = first_child_position(st, child1_pos) - @test child2_pos == ParseStreamPosition(5, 0) + @test first_child_position(st, child1_pos) == ParseStreamPosition(5, 0) + @test last_child_position(st, position(st)) == ParseStreamPosition(10, 0) + @test last_child_position(st, child1_pos) == ParseStreamPosition(7, 0) st = parse_sexpr("(a (b c))") @test first_child_position(st, position(st)) == ParseStreamPosition(3, 0) + child2_pos = last_child_position(st, position(st)) + @test child2_pos == ParseStreamPosition(9, 1) + @test first_child_position(st, child2_pos) == ParseStreamPosition(6, 0) + @test last_child_position(st, child2_pos) == ParseStreamPosition(8, 0) st = parse_sexpr("( a (b c))") @test first_child_position(st, position(st)) == ParseStreamPosition(4, 0) + child2_pos = last_child_position(st, position(st)) + @test child2_pos == ParseStreamPosition(10, 1) + @test first_child_position(st, child2_pos) == ParseStreamPosition(7, 0) + @test last_child_position(st, child2_pos) == ParseStreamPosition(9, 0) st = parse_sexpr("a (b c)") @test first_child_position(st, position(st)) == ParseStreamPosition(5, 0) + @test last_child_position(st, position(st)) == ParseStreamPosition(7, 0) st = parse_sexpr("(a) (b c)") @test first_child_position(st, position(st)) == ParseStreamPosition(7, 0) + @test last_child_position(st, position(st)) == ParseStreamPosition(9, 0) st = parse_sexpr("(() ())") @test first_child_position(st, position(st)) == ParseStreamPosition(4, 1) + @test last_child_position(st, position(st)) == ParseStreamPosition(7, 2) end diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 1a157e3d587c8..ef6c5df5738f2 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -71,6 +71,13 @@ tests = [ "a .~ b" => "(dotcall-i a ~ b)" "[a ~ b c]" => "(hcat (call-i a ~ b) c)" "[a~b]" => "(vect (call-i a ~ b))" + "f(x) .= 1" => "(.= (call f x) 1)" + "::g() = 1" => "(= (::-pre (call g)) 1)" + "f(x) = 1" => "(function-= (call f x) 1)" + "f(x)::T = 1" => "(function-= (::-i (call f x) T) 1)" + "f(x) where S where U = 1" => "(function-= (where (where (call f x) S) U) 1)" + "(f(x)::T) where S = 1" => "(function-= (where (parens (::-i (call f x) T)) S) 1)" + "f(x) = 1 = 2" => "(function-= (call f x) (= 1 2))" # Should be a warning! ], JuliaSyntax.parse_pair => [ "a => b" => "(call-i a => b)" @@ -449,7 +456,7 @@ tests = [ ], JuliaSyntax.parse_resword => [ # In normal_context - "begin f() where T = x end" => "(block (= (where (call f) T) x))" + "begin f() where T = x end" => "(block (function-= (where (call f) T) x))" # block "begin end" => "(block)" "begin a ; b end" => "(block a b)" @@ -955,14 +962,14 @@ tests = [ "if true \n public A, B \n end" => PARSE_ERROR "public export=true foo, bar" => PARSE_ERROR # but these may be "public experimental=true foo, bar" => PARSE_ERROR # supported soon ;) - "public(x::String) = false" => "(= (call public (::-i x String)) false)" + "public(x::String) = false" => "(function-= (call public (::-i x String)) false)" "module M; export @a; end" => "(module M (block (export @a)))" "module M; public @a; end" => "(module M (block (public @a)))" "module M; export ⤈; end" => "(module M (block (export ⤈)))" "module M; public ⤈; end" => "(module M (block (public ⤈)))" "public = 4" => "(= public 4)" "public[7] = 5" => "(= (ref public 7) 5)" - "public() = 6" => "(= (call public) 6)" + "public() = 6" => "(function-= (call public) 6)" ]), JuliaSyntax.parse_docstring => [ """ "notdoc" ] """ => "(string \"notdoc\")" diff --git a/JuliaSyntax/test/test_utils.jl b/JuliaSyntax/test/test_utils.jl index b16aef3120871..28a6992311f95 100644 --- a/JuliaSyntax/test/test_utils.jl +++ b/JuliaSyntax/test/test_utils.jl @@ -96,6 +96,11 @@ function exprs_equal_no_linenum(fl_ex, ex) remove_all_linenums!(deepcopy(ex)) == remove_all_linenums!(deepcopy(fl_ex)) end +function is_eventually_call(ex) + return ex isa Expr && (ex.head === :call || + (ex.head === :where || ex.head === :(::)) && is_eventually_call(ex.args[1])) +end + # Compare Expr from reference parser expression to JuliaSyntax parser, ignoring # differences due to bugs in the reference parser. function exprs_roughly_equal(fl_ex, ex) @@ -149,7 +154,7 @@ function exprs_roughly_equal(fl_ex, ex) fl_args[1] = Expr(:tuple, Expr(:parameters, kwargs...), posargs...) elseif h == :for iterspec = args[1] - if JuliaSyntax.is_eventually_call(iterspec.args[1]) && + if is_eventually_call(iterspec.args[1]) && Meta.isexpr(iterspec.args[2], :block) blk = iterspec.args[2] if length(blk.args) == 2 && blk.args[1] isa LineNumberNode @@ -158,6 +163,11 @@ function exprs_roughly_equal(fl_ex, ex) iterspec.args[2] = blk.args[2] end end + elseif (h == :(=) || h == :kw) && Meta.isexpr(fl_args[1], :(::), 1) && + Meta.isexpr(fl_args[2], :block, 2) && fl_args[2].args[1] isa LineNumberNode + # The flisp parser adds an extra block around `w` in the following case + # f(::g(z) = w) = 1 + fl_args[2] = fl_args[2].args[2] end if length(fl_args) != length(args) return false @@ -169,9 +179,7 @@ function exprs_roughly_equal(fl_ex, ex) fl_args[1] = Expr(:macrocall, map(kw_to_eq, args[1].args)...) end for i = 1:length(args) - flarg = fl_args[i] - arg = args[i] - if !exprs_roughly_equal(flarg, arg) + if !exprs_roughly_equal(fl_args[i], args[i]) return false end end @@ -307,7 +315,7 @@ between flisp and JuliaSyntax parsers and return the source text of those subtrees. """ function reduce_tree(text::AbstractString; kws...) - tree = parseall(SyntaxNode, text) + tree = parseall(SyntaxNode, text, ignore_warnings=true) sourcetext.(reduce_tree(tree; kws...)) end From 7496773f6578414851bdb12b00303136cff58894 Mon Sep 17 00:00:00 2001 From: Jeff Bezanson Date: Tue, 30 Jul 2024 06:59:52 -0400 Subject: [PATCH 0805/1109] add more specific error for invalid identifier start chars (JuliaLang/JuliaSyntax.jl#421) * add more specific error for invalid identifier start chars * Tweak ErrorIdentifierStart message + add diagnostics test. --------- Co-authored-by: Claire Foster --- JuliaSyntax/src/kinds.jl | 2 ++ JuliaSyntax/src/parse_stream.jl | 2 +- JuliaSyntax/src/tokenize.jl | 5 +++-- JuliaSyntax/test/diagnostics.jl | 1 + JuliaSyntax/test/tokenize.jl | 1 + 5 files changed, 8 insertions(+), 3 deletions(-) diff --git a/JuliaSyntax/src/kinds.jl b/JuliaSyntax/src/kinds.jl index 0ef0b815a7fe6..8aa8a9b2c7624 100644 --- a/JuliaSyntax/src/kinds.jl +++ b/JuliaSyntax/src/kinds.jl @@ -212,6 +212,7 @@ register_kinds!(JuliaSyntax, 0, [ "ErrorOverLongCharacter" "ErrorInvalidUTF8" "ErrorInvisibleChar" + "ErrorIdentifierStart" "ErrorUnknownCharacter" "ErrorBidiFormatting" # Generic error @@ -1175,6 +1176,7 @@ const _token_error_descriptions = Dict{Kind, String}( K"ErrorOverLongCharacter"=>"character literal contains multiple characters", K"ErrorInvalidUTF8"=>"invalid UTF-8 sequence", K"ErrorInvisibleChar"=>"invisible character", + K"ErrorIdentifierStart" => "identifier cannot begin with character", K"ErrorUnknownCharacter"=>"unknown unicode character", K"ErrorBidiFormatting"=>"unbalanced bidirectional unicode formatting", K"ErrorInvalidOperator" => "invalid operator", diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index d91c2d29928a0..8aad71df4be79 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -1083,7 +1083,7 @@ function validate_tokens(stream::ParseStream) elseif is_error(k) && k != K"error" # Emit messages for non-generic token errors tokstr = String(txtbuf[tokrange]) - msg = if k in KSet"ErrorInvisibleChar ErrorUnknownCharacter" + msg = if k in KSet"ErrorInvisibleChar ErrorUnknownCharacter ErrorIdentifierStart" "$(_token_error_descriptions[k]) $(repr(tokstr[1]))" elseif k in KSet"ErrorInvalidUTF8 ErrorBidiFormatting" "$(_token_error_descriptions[k]) $(repr(tokstr))" diff --git a/JuliaSyntax/src/tokenize.jl b/JuliaSyntax/src/tokenize.jl index 18440e98e878e..de2daa0f95dcd 100644 --- a/JuliaSyntax/src/tokenize.jl +++ b/JuliaSyntax/src/tokenize.jl @@ -538,8 +538,9 @@ function _next_token(l::Lexer, c) return emit(l, k) else emit(l, - !isvalid(c) ? K"ErrorInvalidUTF8" : - is_invisible_char(c) ? K"ErrorInvisibleChar" : + !isvalid(c) ? K"ErrorInvalidUTF8" : + is_invisible_char(c) ? K"ErrorInvisibleChar" : + is_identifier_char(c) ? K"ErrorIdentifierStart" : K"ErrorUnknownCharacter") end end diff --git a/JuliaSyntax/test/diagnostics.jl b/JuliaSyntax/test/diagnostics.jl index 1d1f9e5dae008..ff9e76a2b9f8d 100644 --- a/JuliaSyntax/test/diagnostics.jl +++ b/JuliaSyntax/test/diagnostics.jl @@ -19,6 +19,7 @@ end @test diagnostic("a$(c)b") == Diagnostic(2, 1+sizeof(string(c)), :error, "invisible character $(repr(c))") end + @test diagnostic("₁") == Diagnostic(1, 3, :error, "identifier cannot begin with character '₁'") @test diagnostic(":⥻") == Diagnostic(2, 4, :error, "unknown unicode character '⥻'") @test diagnostic("\"X \u202a X\"") == Diagnostic(2, 8, :error, "unbalanced bidirectional unicode formatting \"X \\u202a X\"") diff --git a/JuliaSyntax/test/tokenize.jl b/JuliaSyntax/test/tokenize.jl index 0837b9c880c6f..eb30370fc6885 100644 --- a/JuliaSyntax/test/tokenize.jl +++ b/JuliaSyntax/test/tokenize.jl @@ -1009,6 +1009,7 @@ end @testset "invalid UTF-8 characters" begin @test onlytok("\x00") == K"ErrorUnknownCharacter" + @test onlytok("₁") == K"ErrorIdentifierStart" bad_chars = [ first("\xe2") # malformed From fc5082f888782b0263a8b18b65eab715f1891b70 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Tue, 30 Jul 2024 21:04:33 +1000 Subject: [PATCH 0806/1109] Make `using :A` an error; fix `using A: (..)` warning (JuliaLang/JuliaSyntax.jl#477) --- JuliaSyntax/src/parser.jl | 10 ++++++---- JuliaSyntax/test/diagnostics.jl | 5 ++++- JuliaSyntax/test/parser.jl | 5 +++++ 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index ad99e4c085d55..94a7b8da0fb84 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -2403,6 +2403,8 @@ function parse_atsym(ps::ParseState, allow_quotes=true) # export outer ==> (export outer) # export ($f) ==> (export ($ f)) mark = position(ps) + # Syntax Edition TODO: make all the various ways to quote things inside + # import paths an error and require `var""` in the few remaining cases. if allow_quotes && peek(ps) == K":" && !is_closing_token(ps, peek(ps,2)) # import A.:+ ==> (import (importpath A (quote-: +))) emit_diagnostic(ps, warning="quoting with `:` is not required here") @@ -2423,10 +2425,10 @@ function parse_atsym(ps::ParseState, allow_quotes=true) warn_parens = true end end - if warn_parens + b = peek_behind(ps, pos) + if warn_parens && b.orig_kind != K".." emit_diagnostic(ps, mark, warning="parentheses are not required here") end - b = peek_behind(ps, pos) ok = (b.is_leaf && (b.kind == K"Identifier" || is_operator(b.kind))) || (!b.is_leaf && b.kind in KSet"$ var") if !ok @@ -2497,7 +2499,7 @@ function parse_import(ps::ParseState, word, has_import_prefix) # import A: x as y ==> (import (: (importpath A) (as (importpath x) y))) # using A: x as y ==> (using (: (importpath A) (as (importpath x) y))) bump(ps, TRIVIA_FLAG) - parse_atsym(ps) + parse_atsym(ps, false) emit(ps, mark, K"as") if word == K"using" && !has_import_prefix # using A as B ==> (using (error (as (importpath A) B))) @@ -2552,7 +2554,7 @@ function parse_import_path(ps::ParseState) else # import @x ==> (import (importpath @x)) # import $A ==> (import (importpath ($ A))) - parse_atsym(ps) + parse_atsym(ps, false) end while true t = peek_token(ps) diff --git a/JuliaSyntax/test/diagnostics.jl b/JuliaSyntax/test/diagnostics.jl index ff9e76a2b9f8d..1482a604ccedc 100644 --- a/JuliaSyntax/test/diagnostics.jl +++ b/JuliaSyntax/test/diagnostics.jl @@ -125,12 +125,15 @@ end Diagnostic(9, 9, :warning, "space between dots in import path") @test diagnostic("import A.:+") == Diagnostic(10, 10, :warning, "quoting with `:` is not required here") - # No warning for import `:` symbol + # No warnings for imports of `:` and parenthesized `(..)` @test diagnostic("import A.:, :", allow_multiple=true) == [] + @test diagnostic("import A: (..)", allow_multiple=true) == [] @test diagnostic("import A.(:+)") == Diagnostic(10, 13, :warning, "parentheses are not required here") @test diagnostic("export (x)") == Diagnostic(8, 10, :warning, "parentheses are not required here") + @test diagnostic("import :A") == + Diagnostic(8, 9, :error, "expected identifier") @test diagnostic("export :x") == Diagnostic(8, 9, :error, "expected identifier") @test diagnostic("public = 4", version=v"1.11") == diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index ef6c5df5738f2..b7b7839df5fe4 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -676,6 +676,11 @@ tests = [ "import A.⋆.f" => "(import (importpath A ⋆ f))" "import A..." => "(import (importpath A ..))" "import A; B" => "(import (importpath A))" + # Colons not allowed first in import paths + # but are allowed in trailing components (#473) + "using :A" => "(using (importpath (error (quote-: A))))" + "using A: :b" => "(using (: (importpath A) (importpath (error (quote-: b)))))" + "using A: b.:c" => "(using (: (importpath A) (importpath b (quote-: c))))" ], JuliaSyntax.parse_iteration_specs => [ "i = rhs" => "(iteration (in i rhs))" From d8b3b4cf40d835a1137bb406bbac704432e4b846 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Wed, 31 Jul 2024 11:34:01 +1000 Subject: [PATCH 0807/1109] =?UTF-8?q?Allow=20any=20of=20`=E2=89=94=20?= =?UTF-8?q?=E2=A9=B4=20=E2=89=95`=20as=20identifiers=20(JuliaLang/JuliaSyn?= =?UTF-8?q?tax.jl#478)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These assignment-precedence operators shouldn't be special syntax and should instead be usable as normal identifiers just like `~`. Also add more test cases for the true syntactic operators. Fixes JuliaLang/JuliaSyntax.jl#405 --- JuliaSyntax/src/kinds.jl | 5 ++++- JuliaSyntax/src/parser.jl | 5 +++-- JuliaSyntax/test/parser.jl | 28 ++++++++++++++++++++++++++-- 3 files changed, 33 insertions(+), 5 deletions(-) diff --git a/JuliaSyntax/src/kinds.jl b/JuliaSyntax/src/kinds.jl index 8aa8a9b2c7624..800d962200162 100644 --- a/JuliaSyntax/src/kinds.jl +++ b/JuliaSyntax/src/kinds.jl @@ -298,6 +298,7 @@ register_kinds!(JuliaSyntax, 0, [ # Level 1 "BEGIN_ASSIGNMENTS" + "BEGIN_SYNTACTIC_ASSIGNMENTS" "=" "+=" "-=" # Also used for "−=" @@ -314,9 +315,10 @@ register_kinds!(JuliaSyntax, 0, [ "\\=" "&=" ":=" - "~" "\$=" "⊻=" + "END_SYNTACTIC_ASSIGNMENTS" + "~" "≔" "⩴" "≕" @@ -1227,6 +1229,7 @@ is_prec_pipe_lt(x) = kind(x) == K"<|" is_prec_pipe_gt(x) = kind(x) == K"|>" is_syntax_kind(x) = K"BEGIN_SYNTAX_KINDS"<= kind(x) <= K"END_SYNTAX_KINDS" is_macro_name(x) = K"BEGIN_MACRO_NAMES" <= kind(x) <= K"END_MACRO_NAMES" +is_syntactic_assignment(x) = K"BEGIN_SYNTACTIC_ASSIGNMENTS" <= kind(x) <= K"END_SYNTACTIC_ASSIGNMENTS" function is_number(x) kind(x) in (K"Integer", K"BinInt", K"HexInt", K"OctInt", K"Float", K"Float32") diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 94a7b8da0fb84..2287e8aad6eee 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -279,7 +279,7 @@ function is_syntactic_operator(k) # TODO: Do we need to disallow dotted and suffixed forms here? # The lexer itself usually disallows such tokens, so it's not clear whether # we need to handle them. (Though note `.->` is a token...) - return k in KSet"&& || . ... ->" || (is_prec_assignment(k) && k != K"~") + return k in KSet"&& || . ... ->" || is_syntactic_assignment(k) end function is_syntactic_unary_op(k) @@ -617,7 +617,8 @@ function parse_assignment_with_initial_ex(ps::ParseState, mark, down::T) where { # [a ~b] ==> (hcat a (call-pre ~ b)) return end - # ~ is the only non-syntactic assignment-precedence operator. + # ~ is currently the only assignment-precedence operator which is parsed as a call. + # TODO: Make the other non-syntactic assignments such as `≔ ⩴ ≕` into calls as well? # a ~ b ==> (call-i a ~ b) # a .~ b ==> (dotcall-i a ~ b) # [a ~ b c] ==> (hcat (call-i a ~ b) c) diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index b7b7839df5fe4..d69811ad485a2 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -763,12 +763,36 @@ tests = [ """var"x"1""" => "(var x (error-t))" """var"x"y""" => "(var x (error-t))" # Standalone syntactic operators are errors - "+=" => "(error +=)" "?" => "(error ?)" + "&&" => "(error &&)" + "||" => "(error ||)" + "." => "(error .)" + "..." => "(error ...)" + "+=" => "(error +=)" + "-=" => "(error -=)" + "*=" => "(error *=)" + "/=" => "(error /=)" + "//=" => "(error //=)" + "|=" => "(error |=)" + "^=" => "(error ^=)" + "÷=" => "(error ÷=)" + "%=" => "(error %=)" + "<<=" => "(error <<=)" + ">>=" => "(error >>=)" + ">>>="=> "(error >>>=)" + "\\=" => "(error \\=)" + "&=" => "(error &=)" + ":=" => "(error :=)" + "\$=" => "(error \$=)" + "⊻=" => "(error ⊻=)" ".+=" => "(error (. +=))" # Normal operators "+" => "+" + # Assignment-precedence operators which can be used as identifiers "~" => "~" + "≔" => "≔" + "⩴" => "⩴" + "≕" => "≕" # Quoted syntactic operators allowed ":+=" => "(quote-: +=)" ":.=" => "(quote-: (. =))" @@ -777,7 +801,7 @@ tests = [ ":end" => "(quote-: end)" ":(end)" => "(quote-: (parens (error-t)))" ":<:" => "(quote-: <:)" - # unexpect = + # unexpected = "=" => "(error =)" # parse_cat "[]" => "(vect)" From 5081f3ab5f691fccc6e461e7f0774fddacbd50e8 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Fri, 2 Aug 2024 09:07:37 +1000 Subject: [PATCH 0808/1109] Fixes for AST reference documentation (JuliaLang/JuliaSyntax.jl#479) --- JuliaSyntax/docs/src/reference.md | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/JuliaSyntax/docs/src/reference.md b/JuliaSyntax/docs/src/reference.md index a98662ee4e7f9..9b0c9e62b24ee 100644 --- a/JuliaSyntax/docs/src/reference.md +++ b/JuliaSyntax/docs/src/reference.md @@ -76,7 +76,8 @@ class of tokenization errors and lets the parser deal with them. * Using `try catch else finally end` is parsed with `K"catch"` `K"else"` and `K"finally"` children to avoid the awkwardness of the optional child nodes in the `Expr` representation (#234) * The dotted import path syntax as in `import A.b.c` is parsed with a `K"importpath"` kind rather than `K"."`, because a bare `A.b.c` has a very different nested/quoted expression representation (#244) * We use flags rather than child nodes to represent the difference between `struct` and `mutable struct`, `module` and `baremodule` (#220) -* Iterations are represented with the `iteration` head rather than `=` within the header of a `for`. Thus `for i=is ; body end` parses to `(for (iteration i is) (block body))`. Cartesian iteration as in `for a=as, b=bs body end` are represented with a longer `iteration` block rather than a `block` containing `=` because these lists of iterators are neither semantically nor syntactically a sequence of statements, unlike other uses of `block`. Generators also use the `iteration` head - see information on that below. +* Iterations are represented with the `iteration` and `in` heads rather than `=` within the header of a `for`. Thus `for i=is ; body end` parses to `(for (iteration (in i is)) (block body))`. Cartesian iteration as in `for a=as, b=bs body end` are represented with a nested `(iteration (in a as) (in b bs))` rather than a `block` containing `=` because these lists of iterators are neither semantically nor syntactically a sequence of statements, unlike other uses of `block`. Generators also use the `iteration` head - see information on that below. +* Short form functions like `f(x) = x + 1` are represented with the `function` head rather than the `=` head. In this case the `SHORT_FORM_FUNCTION_FLAG` flag is set to allow the surface syntactic form to be easily distinguished from long form functions. ## More detail on tree differences @@ -123,8 +124,8 @@ source much more closely. For example, `(xy for x in xs for y in ys)` is parsed ``` (generator xy - (iteration x xs) - (iteration y ys)) + (iteration (in x xs)) + (iteration (in y ys))) ``` And the cartesian iteration `(xy for x in xs, y in ys)` is parsed as @@ -132,7 +133,7 @@ And the cartesian iteration `(xy for x in xs, y in ys)` is parsed as ``` (generator xy - (iteration x xs y ys)) + (iteration (in x xs) (in y ys))) ``` ### Whitespace trivia inside strings From 4ba2fa378a8e126a575373dde43f14c13a4285b6 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Fri, 2 Aug 2024 14:42:58 +1000 Subject: [PATCH 0809/1109] Fix premature lowering of cmd strings (JuliaLang/JuliaSyntax.jl#480) Unadorned backtick command syntax was prematurely lowered to a macrocall in the parser with the `core_@cmd` macro name. Remove this special macro name (almost the last of the special zero-width tokens to be removed!) and rely instead on the presence of unadorned `cmdstring` to do the lowering to a `Core.@cmd` call later during Expr conversion. Also some clean up `Kind`s, grouping them more sensibly and removing the obsolete kinds `K"core_@int128_str"` `K"core_@uint128_str"` `K"core_@big_str"`. --- JuliaSyntax/docs/src/reference.md | 1 + JuliaSyntax/src/expr.jl | 23 ++++-- JuliaSyntax/src/kinds.jl | 109 ++++++++++++----------------- JuliaSyntax/src/literal_parsing.jl | 2 - JuliaSyntax/src/parser.jl | 8 +-- JuliaSyntax/test/expr.jl | 18 +++++ JuliaSyntax/test/parser.jl | 14 ++-- 7 files changed, 90 insertions(+), 85 deletions(-) diff --git a/JuliaSyntax/docs/src/reference.md b/JuliaSyntax/docs/src/reference.md index 9b0c9e62b24ee..84fe09bcf6539 100644 --- a/JuliaSyntax/docs/src/reference.md +++ b/JuliaSyntax/docs/src/reference.md @@ -48,6 +48,7 @@ the source text more closely. * Docstrings use the `K"doc"` kind, and are not lowered to `Core.@doc` until later (#217) * Juxtaposition uses the `K"juxtapose"` kind rather than lowering immediately to `*` (#220) * `return` without a value has zero children, rather than lowering to `return nothing` (#220) +* Command syntax `` `foo` `` parses into a `cmdstring` tree node wrapping the string, as `(cmdstring "foo")` (#438). These are lowered to a macro call later rather than by the parser. ### Containers for string-like constructs diff --git a/JuliaSyntax/src/expr.jl b/JuliaSyntax/src/expr.jl index 04561a7e9f7fa..0808916bf979a 100644 --- a/JuliaSyntax/src/expr.jl +++ b/JuliaSyntax/src/expr.jl @@ -70,9 +70,7 @@ end function _leaf_to_Expr(source, txtbuf, head, srcrange, node) k = kind(head) - if k == K"core_@cmd" - return GlobalRef(Core, Symbol("@cmd")) - elseif k == K"MacroName" && view(source, srcrange) == "." + if k == K"MacroName" && view(source, srcrange) == "." return Symbol("@__dot__") elseif is_error(k) return k == K"error" ? @@ -102,7 +100,7 @@ end # # This function concatenating adjacent string chunks together as done in the # reference parser. -function _string_to_Expr(k, args) +function _string_to_Expr(args) args2 = Any[] i = 1 while i <= length(args) @@ -140,7 +138,7 @@ function _string_to_Expr(k, args) # """\n a\n b""" ==> "a\nb" return only(args2) else - # This only happens when k == K"string" or when an error has occurred. + # This only happens when the kind is K"string" or when an error has occurred. return Expr(:string, args2...) end end @@ -212,13 +210,17 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads, # K"var" and K"char" nodes, but this discounts having embedded error # nodes when ignore_errors=true is set. return args[1] - elseif k == K"string" || k == K"cmdstring" - return _string_to_Expr(k, args) + elseif k == K"string" + return _string_to_Expr(args) end loc = source_location(LineNumberNode, source, first(srcrange)) endloc = source_location(LineNumberNode, source, last(srcrange)) + if k == K"cmdstring" + return Expr(:macrocall, GlobalRef(Core, Symbol("@cmd")), loc, _string_to_Expr(args)) + end + _fixup_Expr_children!(head, loc, args) headstr = untokenize(head, include_flag_suff=false) @@ -229,6 +231,13 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads, if k == K"?" headsym = :if elseif k == K"macrocall" + if length(args) == 2 + a2 = args[2] + if @isexpr(a2, :macrocall) && kind(childheads[1]) == K"CmdMacroName" + # Fix up for custom cmd macros like `` foo`x` `` + args[2] = a2.args[3] + end + end do_lambda = _extract_do_lambda!(args) _reorder_parameters!(args, 2) insert!(args, 2, loc) diff --git a/JuliaSyntax/src/kinds.jl b/JuliaSyntax/src/kinds.jl index 800d962200162..30d16e3e29e89 100644 --- a/JuliaSyntax/src/kinds.jl +++ b/JuliaSyntax/src/kinds.jl @@ -189,35 +189,24 @@ kind(k::Kind) = k #------------------------------------------------------------------------------- # Kinds used by JuliaSyntax register_kinds!(JuliaSyntax, 0, [ - "None" # Placeholder; never emitted by lexer - "EndMarker" # EOF + # Whitespace "Comment" "Whitespace" "NewlineWs" # newline-containing whitespace - "Identifier" - "@" - "," - ";" - "BEGIN_ERRORS" - # Tokenization errors - "ErrorEofMultiComment" - "ErrorInvalidNumericConstant" - "ErrorHexFloatMustContainP" - "ErrorAmbiguousNumericConstant" - "ErrorAmbiguousNumericDotMultiply" - "ErrorInvalidInterpolationTerminator" - "ErrorNumericOverflow" - "ErrorInvalidEscapeSequence" - "ErrorOverLongCharacter" - "ErrorInvalidUTF8" - "ErrorInvisibleChar" - "ErrorIdentifierStart" - "ErrorUnknownCharacter" - "ErrorBidiFormatting" - # Generic error - "error" - "END_ERRORS" + # Identifiers + "BEGIN_IDENTIFIERS" + "Identifier" + # Macro names are modelled as special kinds of identifiers because the full + # macro name may not appear as characters in the source: The `@` may be + # detached from the macro name as in `@A.x` (ugh!!), or have a _str or _cmd + # suffix appended. + "BEGIN_MACRO_NAMES" + "MacroName" + "StringMacroName" + "CmdMacroName" + "END_MACRO_NAMES" + "END_IDENTIFIERS" "BEGIN_KEYWORDS" "baremodule" @@ -278,6 +267,12 @@ register_kinds!(JuliaSyntax, 0, [ "END_LITERAL" "BEGIN_DELIMITERS" + # Punctuation + "@" + "," + ";" + + # Paired delimiters "[" "]" "{" @@ -1028,45 +1023,6 @@ register_kinds!(JuliaSyntax, 0, [ "END_UNICODE_OPS" "END_OPS" - # The following kinds are emitted by the parser. There's two types of these: - - # 1. Implied tokens which have a position but might have zero width in the - # source text. - # - # In some cases we want to generate parse tree nodes in a standard form, - # but some of the leaf tokens are implied rather than existing in the - # source text, or the lexed tokens need to be re-kinded to represent - # special forms which only the parser can infer. These are "parser tokens". - # - # Some examples: - # - # Docstrings - the macro name is invisible - # "doc" foo() = 1 ==> (macrocall (core @doc) . (= (call foo) 1)) - # - # String macros - the macro name does not appear in the source text, so we - # need a special kind of token to imply it. - # - # In these cases, we use some special kinds which can be emitted as zero - # width tokens to keep the parse tree more uniform. - "BEGIN_PARSER_TOKENS" - - "TOMBSTONE" # Empty placeholder for kind to be filled later - - # Macro names are modelled as a special kind of identifier because the - # @ may not be attached to the macro name in the source (or may not be - # associated with a token at all in the case of implied macro calls - # like CORE_DOC_MACRO_NAME) - "BEGIN_MACRO_NAMES" - "MacroName" - "StringMacroName" - "CmdMacroName" - "core_@cmd" - "core_@int128_str" - "core_@uint128_str" - "core_@big_str" - "END_MACRO_NAMES" - "END_PARSER_TOKENS" - # 2. Nonterminals which are exposed in the AST, but where the surface # syntax doesn't have a token corresponding to the node type. "BEGIN_SYNTAX_KINDS" @@ -1108,6 +1064,31 @@ register_kinds!(JuliaSyntax, 0, [ # Container for a single statement/atom plus any trivia and errors "wrapper" "END_SYNTAX_KINDS" + + # Special tokens + "TOMBSTONE" # Empty placeholder for kind to be filled later + "None" # Placeholder; never emitted by lexer + "EndMarker" # EOF + + "BEGIN_ERRORS" + # Tokenization errors + "ErrorEofMultiComment" + "ErrorInvalidNumericConstant" + "ErrorHexFloatMustContainP" + "ErrorAmbiguousNumericConstant" + "ErrorAmbiguousNumericDotMultiply" + "ErrorInvalidInterpolationTerminator" + "ErrorNumericOverflow" + "ErrorInvalidEscapeSequence" + "ErrorOverLongCharacter" + "ErrorInvalidUTF8" + "ErrorInvisibleChar" + "ErrorIdentifierStart" + "ErrorUnknownCharacter" + "ErrorBidiFormatting" + # Generic error + "error" + "END_ERRORS" ]) #------------------------------------------------------------------------------- diff --git a/JuliaSyntax/src/literal_parsing.jl b/JuliaSyntax/src/literal_parsing.jl index a027985ae62f7..5a744f97108bc 100644 --- a/JuliaSyntax/src/literal_parsing.jl +++ b/JuliaSyntax/src/literal_parsing.jl @@ -438,8 +438,6 @@ function parse_julia_literal(txtbuf::Vector{UInt8}, head::SyntaxHead, srcrange) Symbol("@$(normalize_identifier(val_str))_str") elseif k == K"CmdMacroName" Symbol("@$(normalize_identifier(val_str))_cmd") - elseif k == K"core_@cmd" - Symbol("core_@cmd") elseif is_syntax_kind(head) nothing elseif is_keyword(k) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 2287e8aad6eee..4f97b8d51a20c 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -3596,12 +3596,10 @@ function parse_atom(ps::ParseState, check_identifiers=true) elseif is_string_delim(leading_kind) parse_string(ps, false) elseif leading_kind in KSet"` ```" - # `` ==> (macrocall core_@cmd (cmdstring-r "")) - # `cmd` ==> (macrocall core_@cmd (cmdstring-r "cmd")) - # ```cmd``` ==> (macrocall core_@cmd (cmdstring-s-r "cmd")) - bump_invisible(ps, K"core_@cmd") + # `` ==> (cmdstring-r "") + # `cmd` ==> (cmdstring-r "cmd") + # ```cmd``` ==> (cmdstring-s-r "cmd") parse_string(ps, true) - emit(ps, mark, K"macrocall") elseif is_literal(leading_kind) # 42 ==> 42 bump(ps) diff --git a/JuliaSyntax/test/expr.jl b/JuliaSyntax/test/expr.jl index 71849da14289e..0a5dce45b4840 100644 --- a/JuliaSyntax/test/expr.jl +++ b/JuliaSyntax/test/expr.jl @@ -663,6 +663,24 @@ Expr(:macrocall, GlobalRef(Core, Symbol("@doc")), LineNumberNode(2), "x", :f) end + @testset "String and cmd macros" begin + # Custom string macros + @test parsestmt("foo\"str\"") == + Expr(:macrocall, Symbol("@foo_str"), LineNumberNode(1), "str") + # Bare @cmd + @test parsestmt("\n`str`") == + Expr(:macrocall, GlobalRef(Core, Symbol("@cmd")), LineNumberNode(2), "str") + # Custom cmd macros + @test parsestmt("foo`str`") == + Expr(:macrocall, Symbol("@foo_cmd"), LineNumberNode(1), "str") + @test parsestmt("foo```\n a\n b```") == + Expr(:macrocall, Symbol("@foo_cmd"), LineNumberNode(1), "a\nb") + # Expr conversion distinguishes from explicit calls to a macro of the same name + @test parsestmt("@foo_cmd `str`") == + Expr(:macrocall, Symbol("@foo_cmd"), LineNumberNode(1), + Expr(:macrocall, GlobalRef(Core, Symbol("@cmd")), LineNumberNode(1), "str")) + end + @testset "return" begin @test parsestmt("return x") == Expr(:return, :x) @test parsestmt("return") == Expr(:return, nothing) diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index d69811ad485a2..18ad2eb2ac9ce 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -847,9 +847,9 @@ tests = [ # __dot__ macro "@. x" => "(macrocall @. x)" # cmd strings - "``" => "(macrocall core_@cmd (cmdstring-r \"\"))" - "`cmd`" => "(macrocall core_@cmd (cmdstring-r \"cmd\"))" - "```cmd```" => "(macrocall core_@cmd (cmdstring-s-r \"cmd\"))" + "``" => "(cmdstring-r \"\")" + "`cmd`" => "(cmdstring-r \"cmd\")" + "```cmd```" => "(cmdstring-s-r \"cmd\")" # literals "true" => "true" "42" => "42" @@ -922,7 +922,7 @@ tests = [ # Triple-quoted dedenting: "\"\"\"\nx\"\"\"" => raw"""(string-s "x")""" "\"\"\"\n\nx\"\"\"" => raw"""(string-s "\n" "x")""" - "```\n x\n y```" => raw"""(macrocall core_@cmd (cmdstring-s-r "x\n" "y"))""" + "```\n x\n y```" => raw"""(cmdstring-s-r "x\n" "y")""" # Various newlines (\n \r \r\n) and whitespace (' ' \t) "\"\"\"\n x\n y\"\"\"" => raw"""(string-s "x\n" "y")""" "\"\"\"\r x\r y\"\"\"" => raw"""(string-s "x\n" "y")""" @@ -976,7 +976,7 @@ tests = [ "'ab'" => "(char (ErrorOverLongCharacter))" "\"\xf5\"" => "(string (ErrorInvalidUTF8))" "'\xf5'" => "(char (ErrorInvalidUTF8))" - "`\xf5`" => "(macrocall core_@cmd (cmdstring-r (ErrorInvalidUTF8)))" + "`\xf5`" => "(cmdstring-r (ErrorInvalidUTF8))" "10.0e1000'" => "(ErrorNumericOverflow)" "10.0f100'" => "(ErrorNumericOverflow)" ], @@ -1053,8 +1053,8 @@ parsestmt_test_specs = [ # detecting raw vs non-raw strings. The old parser was tightly coupled to # the lexer and the parser state was used to disambiguate these cases. "x in' '" => "(call-i x in (char (error)))" - "x in'``\$" => "(call-i x in (call-i (juxtapose (char '`' (error-t)) (macrocall core_@cmd (cmdstring-r (error-t)))) \$ (error)))" - "var\"#\"`str`" => "(juxtapose (var # (error-t)) (macrocall core_@cmd (cmdstring-r \"str\")))" + "x in'``\$" => "(call-i x in (call-i (juxtapose (char '`' (error-t)) (cmdstring-r (error-t))) \$ (error)))" + "var\"#\"`str`" => "(juxtapose (var # (error-t)) (cmdstring-r \"str\"))" "var\"#\"\"str\"" => "(juxtapose (var # (error-t)) (error-t) (string \"str\"))" ] From 08b268c6294933e78145d08f8ee8ecf6ded74109 Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Fri, 2 Aug 2024 11:22:39 -0400 Subject: [PATCH 0810/1109] Allow macrocall in function def syntax (JuliaLang/JuliaSyntax.jl#456) * Allow macrocall in function def syntax Goes with https://github.com/JuliaLang/julia/pull/55040 * Update src/parser.jl Co-authored-by: Claire Foster * Test to cover function declaration with `var""` syntax * Make `function (@f(x)) body end` an ambiguity error This case is ambiguous as it might be either one of the following; require the user to explicitly disambiguate between them ``` function (@f(x),) body end function @f(x) body end ``` For the same reasons, `function ($f) body end` is also ambiguous. Also fix parsing of `function (f(x),) end` to correctly emit a tuple. --------- Co-authored-by: Claire Foster --- JuliaSyntax/src/parser.jl | 34 ++++++++++++++++++++++++--------- JuliaSyntax/test/diagnostics.jl | 3 +++ JuliaSyntax/test/parser.jl | 14 ++++++++++++++ 3 files changed, 42 insertions(+), 9 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 4f97b8d51a20c..8da46517775b6 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -2128,14 +2128,15 @@ function parse_function_signature(ps::ParseState, is_function::Bool) # * The whole function declaration, in parens bump(ps, TRIVIA_FLAG) is_empty_tuple = peek(ps, skip_newlines=true) == K")" - opts = parse_brackets(ps, K")") do _, _, _, _ + opts = parse_brackets(ps, K")") do had_commas, had_splat, num_semis, num_subexprs _parsed_call = was_eventually_call(ps) _needs_parse_call = peek(ps, 2) ∈ KSet"( ." - _is_anon_func = !_needs_parse_call && !_parsed_call + _is_anon_func = (!_needs_parse_call && !_parsed_call) || had_commas return (needs_parameters = _is_anon_func, is_anon_func = _is_anon_func, parsed_call = _parsed_call, - needs_parse_call = _needs_parse_call) + needs_parse_call = _needs_parse_call, + maybe_grouping_parens = !had_commas && !had_splat && num_semis == 0 && num_subexprs == 1) end is_anon_func = opts.is_anon_func parsed_call = opts.parsed_call @@ -2146,7 +2147,14 @@ function parse_function_signature(ps::ParseState, is_function::Bool) # function (x,y) end ==> (function (tuple-p x y) (block)) # function (x=1) end ==> (function (tuple-p (= x 1)) (block)) # function (;x=1) end ==> (function (tuple-p (parameters (= x 1))) (block)) + # function (f(x),) end ==> (function (tuple-p (call f x)) (block)) + ambiguous_parens = opts.maybe_grouping_parens && + peek_behind(ps).kind in KSet"macrocall $" emit(ps, mark, K"tuple", PARENS_FLAG) + if ambiguous_parens + # Got something like `(@f(x))`. Is it anon `(@f(x),)` or named sig `@f(x)` ?? + emit(ps, mark, K"error", error="Ambiguous signature. Add a trailing comma if this is a 1-argument anonymous function; remove parentheses if this is a macro call acting as function signature.") + end elseif is_empty_tuple # Weird case which is consistent with parse_paren but will be # rejected in lowering @@ -2175,19 +2183,23 @@ function parse_function_signature(ps::ParseState, is_function::Bool) end end end - if peek(ps, skip_newlines=true) == K"end" && !is_anon_func && !parsed_call - return false - end if needs_parse_call # Parse function argument list # function f(x,y) end ==> (function (call f x y) (block)) # function f{T}() end ==> (function (call (curly f T)) (block)) # function A.f() end ==> (function (call (. A f)) (block)) parse_call_chain(ps, mark) - if peek_behind(ps).kind != K"call" + sig_kind = peek_behind(ps).kind + if sig_kind in KSet"Identifier var $" && peek(ps, skip_newlines=true) == K"end" + # function f end ==> (function f) + # function $f end ==> (function $f) + return false + elseif sig_kind == K"macrocall" + min_supported_version(v"1.12", ps, mark, "macro call as function signature") + elseif sig_kind != K"call" # function f body end ==> (function (error f) (block body)) emit(ps, mark, K"error", - error="Invalid signature in $(is_function ? "function" : "macro") definition") + error="Invalid signature in $(is_function ? "function" : "macro") definition") end end if is_function && peek(ps) == K"::" @@ -3511,7 +3523,11 @@ function parse_atom(ps::ParseState, check_identifiers=true) # + ==> + # .+ ==> (. +) # .= ==> (. =) - bump_dotsplit(ps, emit_dot_node=true) + if is_dotted(peek_token(ps)) + bump_dotsplit(ps, emit_dot_node=true) + else + bump(ps, remap_kind=K"Identifier") + end if check_identifiers && !is_valid_identifier(leading_kind) # += ==> (error +=) # ? ==> (error ?) diff --git a/JuliaSyntax/test/diagnostics.jl b/JuliaSyntax/test/diagnostics.jl index 1482a604ccedc..8371ede20b4d1 100644 --- a/JuliaSyntax/test/diagnostics.jl +++ b/JuliaSyntax/test/diagnostics.jl @@ -41,6 +41,9 @@ end @test diagnostic("\n+ (x, y)") == Diagnostic(3, 3, :error, "whitespace not allowed between prefix function call and argument list") + @test diagnostic("function (\$f) body end") == + Diagnostic(10, 13, :error, "Ambiguous signature. Add a trailing comma if this is a 1-argument anonymous function; remove parentheses if this is a macro call acting as function signature.") + @test diagnostic("A.@B.x", only_first=true) == Diagnostic(3, 4, :error, "`@` must appear on first or last macro name component") @test diagnostic("@M.(x)") == diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 18ad2eb2ac9ce..caf40f9e53081 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -571,6 +571,11 @@ tests = [ "function (x,y) end" => "(function (tuple-p x y) (block))" "function (x=1) end" => "(function (tuple-p (= x 1)) (block))" "function (;x=1) end" => "(function (tuple-p (parameters (= x 1))) (block))" + "function (f(x),) end" => "(function (tuple-p (call f x)) (block))" + "function (@f(x);) end" => "(function (tuple-p (macrocall-p @f x) (parameters)) (block))" + "function (@f(x)...) end" => "(function (tuple-p (... (macrocall-p @f x))) (block))" + "function (@f(x)) end" => "(function (error (tuple-p (macrocall-p @f x))) (block))" + "function (\$f) end" => "(function (error (tuple-p (\$ f))) (block))" "function ()(x) end" => "(function (call (tuple-p) x) (block))" "function (A).f() end" => "(function (call (. (parens A) f)) (block))" "function (:)() end" => "(function (call (parens :)) (block))" @@ -589,6 +594,7 @@ tests = [ "function f end" => "(function f)" "function f \n\n end" => "(function f)" "function \$f end" => "(function (\$ f))" + "function var\".\" end" => "(function (var .))" "macro f end" => "(macro f)" # Function argument list "function f(x,y) end" => "(function (call f x y) (block))" @@ -611,6 +617,11 @@ tests = [ # body "function f() \n a \n b end" => "(function (call f) (block a b))" "function f() end" => "(function (call f) (block))" + # Macrocall as sig + ((v=v"1.12",), "function @callmemacro(a::Int) \n 1 \n end") => "(function (macrocall-p @callmemacro (::-i a Int)) (block 1))" + ((v=v"1.12",), "function @callmemacro(a::T, b::T) where T <: Int64\n3\nend") => "(function (where (macrocall-p @callmemacro (::-i a T) (::-i b T)) (<: T Int64)) (block 3))" + ((v=v"1.12",), "function @callmemacro(a::Int, b::Int, c::Int)::Float64\n4\nend") => "(function (::-i (macrocall-p @callmemacro (::-i a Int) (::-i b Int) (::-i c Int)) Float64) (block 4))" + ((v=v"1.12",), "function @f()() end") => "(function (call (macrocall-p @f)) (block))" # Errors "function" => "(function (error (error)) (block (error)) (error-t))" ], @@ -1000,6 +1011,9 @@ tests = [ "public[7] = 5" => "(= (ref public 7) 5)" "public() = 6" => "(function-= (call public) 6)" ]), + JuliaSyntax.parse_stmts => [ + ((v = v"1.12",), "@callmemacro(b::Float64) = 2") => "(= (macrocall-p @callmemacro (::-i b Float64)) 2)" + ], JuliaSyntax.parse_docstring => [ """ "notdoc" ] """ => "(string \"notdoc\")" """ "notdoc" \n] """ => "(string \"notdoc\")" From 14c37c5b0ad0bbb3ee5c239bf4ab81eca4eb72ab Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Tue, 6 Aug 2024 11:16:18 +1000 Subject: [PATCH 0811/1109] Minor README updates --- JuliaLowering/README.md | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/JuliaLowering/README.md b/JuliaLowering/README.md index 3c4db69f34abd..4071810602d34 100644 --- a/JuliaLowering/README.md +++ b/JuliaLowering/README.md @@ -53,7 +53,8 @@ Lowering has five symbolic simplification passes: ## Syntax trees Want something something better than `JuliaSyntax.SyntaxNode`! `SyntaxTree` and -`SyntaxGraph` provide this. (These will probably end up in `JuliaSyntax`.) +`SyntaxGraph` provide this. Some future version of these should end up in +`JuliaSyntax`. We want to allow arbitrary attributes to be attached to tree nodes by analysis passes. This separates the analysis pass implementation from the data @@ -77,6 +78,14 @@ are similar. Analogy 3: Graph algorithms which represent graphs as a compact array of node ids and edges with integer indices, rather than using a linked data structure. +### References + +Sander Mertens, the author of the Flecs ECS has a blog post series discussing +ECS data structures and the many things that may be done with them. We may want +to use some of these tricks to make `SyntaxTree` faster, eventually. See, for +example, +[Building Games in ECS with Entity Relationships](https://ajmmertens.medium.com/building-games-in-ecs-with-entity-relationships-657275ba2c6c) + ## Provenance tracking Expression provenance is tracked through lowering by attaching provenance From 38f66c36ae8e056cd153a3c51f42ffa6e0dc34d4 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Tue, 6 Aug 2024 12:07:07 +1000 Subject: [PATCH 0812/1109] Fix after JuliaLang/JuliaSyntax.jl#433 --- JuliaLowering/src/desugaring.jl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index c3ce65c604e0f..30aea4c54b26a 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -206,8 +206,12 @@ function expand_call(ctx, ex) end function expand_for(ctx, ex) - iterspec = ex[1] + iterspecs = ex[1] + @chk kind(iterspecs) == K"iteration" + @chk numchildren(iterspecs) == 1 + + iterspec = iterspecs[1] iter_var = iterspec[1] iter_ex = iterspec[2] From f670ef4f0b8d4f617eedf77d72949562e8ebf049 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Wed, 7 Aug 2024 11:58:07 +1000 Subject: [PATCH 0813/1109] Fix bug converting cmd string macros to Expr (JuliaLang/JuliaSyntax.jl#482) --- JuliaSyntax/src/expr.jl | 4 ++-- JuliaSyntax/test/expr.jl | 2 ++ JuliaSyntax/test/parser.jl | 7 ++++++- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/JuliaSyntax/src/expr.jl b/JuliaSyntax/src/expr.jl index 0808916bf979a..b779a49093d23 100644 --- a/JuliaSyntax/src/expr.jl +++ b/JuliaSyntax/src/expr.jl @@ -231,10 +231,10 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads, if k == K"?" headsym = :if elseif k == K"macrocall" - if length(args) == 2 + if length(args) >= 2 a2 = args[2] if @isexpr(a2, :macrocall) && kind(childheads[1]) == K"CmdMacroName" - # Fix up for custom cmd macros like `` foo`x` `` + # Fix up for custom cmd macros like foo`x` args[2] = a2.args[3] end end diff --git a/JuliaSyntax/test/expr.jl b/JuliaSyntax/test/expr.jl index 0a5dce45b4840..272a5b9a10e8d 100644 --- a/JuliaSyntax/test/expr.jl +++ b/JuliaSyntax/test/expr.jl @@ -673,6 +673,8 @@ # Custom cmd macros @test parsestmt("foo`str`") == Expr(:macrocall, Symbol("@foo_cmd"), LineNumberNode(1), "str") + @test parsestmt("foo`str`flag") == + Expr(:macrocall, Symbol("@foo_cmd"), LineNumberNode(1), "str", "flag") @test parsestmt("foo```\n a\n b```") == Expr(:macrocall, Symbol("@foo_cmd"), LineNumberNode(1), "a\nb") # Expr conversion distinguishes from explicit calls to a macro of the same name diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index caf40f9e53081..70842a4d27d48 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -452,7 +452,12 @@ tests = [ "x\"s\"in" => """(macrocall @x_str (string-r "s") "in")""" "x\"s\"2" => """(macrocall @x_str (string-r "s") 2)""" "x\"s\"10.0" => """(macrocall @x_str (string-r "s") 10.0)""" - # + # Cmd macro sufficies + "x`s`y" => """(macrocall @x_cmd (cmdstring-r "s") "y")""" + "x`s`end" => """(macrocall @x_cmd (cmdstring-r "s") "end")""" + "x`s`in" => """(macrocall @x_cmd (cmdstring-r "s") "in")""" + "x`s`2" => """(macrocall @x_cmd (cmdstring-r "s") 2)""" + "x`s`10.0" => """(macrocall @x_cmd (cmdstring-r "s") 10.0)""" ], JuliaSyntax.parse_resword => [ # In normal_context From 5368d64602271ce834b8aa73b96e90bd264abba1 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Wed, 7 Aug 2024 16:16:48 +1000 Subject: [PATCH 0814/1109] Rename `haschildren()` to `is_leaf()` (JuliaLang/JuliaSyntax.jl#483) Unfortunately, `haschildren(x)` was a terrible name because it's not testing the same thing as `numchildren(x) == 0`! In our ASTs * Leaves of the tree correspond to tokens in the source text * Internal nodes are containers for a range of tokens or other internal nodes. Occasionally we can have internal nodes which have no tokens and thus have `numchildren(node) == 0`. These are, however, still "internal nodes" and we have `haschildren(node) === true` for these which makes no sense! --- JuliaSyntax/src/expr.jl | 2 +- JuliaSyntax/src/green_tree.jl | 20 ++++++++++---------- JuliaSyntax/src/hooks.jl | 2 +- JuliaSyntax/src/syntax_tree.jl | 14 +++++++------- JuliaSyntax/test/green_node.jl | 2 +- JuliaSyntax/test/test_utils.jl | 8 ++++---- 6 files changed, 24 insertions(+), 24 deletions(-) diff --git a/JuliaSyntax/src/expr.jl b/JuliaSyntax/src/expr.jl index b779a49093d23..6d1e8ed6e576d 100644 --- a/JuliaSyntax/src/expr.jl +++ b/JuliaSyntax/src/expr.jl @@ -526,7 +526,7 @@ end function _to_expr(node) file = sourcefile(node) - if !haschildren(node) + if is_leaf(node) offset, txtbuf = _unsafe_wrap_substring(sourcetext(file)) return _leaf_to_Expr(file, txtbuf, head(node), byte_range(node) .+ offset, node) end diff --git a/JuliaSyntax/src/green_tree.jl b/JuliaSyntax/src/green_tree.jl index c4df51634e57e..8a4b46345c7c2 100644 --- a/JuliaSyntax/src/green_tree.jl +++ b/JuliaSyntax/src/green_tree.jl @@ -23,16 +23,16 @@ As implementation choices, we choose that: struct GreenNode{Head} head::Head span::UInt32 - args::Union{Tuple{},Vector{GreenNode{Head}}} + args::Union{Nothing,Vector{GreenNode{Head}}} end -function GreenNode(head::Head, span::Integer, args) where {Head} +function GreenNode(head::Head, span::Integer, args=nothing) where {Head} GreenNode{Head}(head, span, args) end # Accessors / predicates -haschildren(node::GreenNode) = !(node.args isa Tuple{}) -children(node::GreenNode) = node.args +is_leaf(node::GreenNode) = isnothing(node.args) +children(node::GreenNode) = isnothing(node.args) ? () : node.args span(node::GreenNode) = node.span head(node::GreenNode) = node.head @@ -49,19 +49,19 @@ function _show_green_node(io, node, indent, pos, str, show_trivia) return end posstr = "$(lpad(pos, 6)):$(rpad(pos+span(node)-1, 6)) │" - is_leaf = !haschildren(node) - if is_leaf + leaf = is_leaf(node) + if leaf line = string(posstr, indent, summary(node)) else line = string(posstr, indent, '[', summary(node), ']') end - if !is_trivia(node) && is_leaf + if !is_trivia(node) && leaf line = rpad(line, 40) * "✔" end if is_error(node) line = rpad(line, 41) * "✘" end - if is_leaf && !isnothing(str) + if leaf && !isnothing(str) line = string(rpad(line, 43), ' ', repr(str[pos:prevind(str, pos + span(node))])) end line = line*"\n" @@ -70,7 +70,7 @@ function _show_green_node(io, node, indent, pos, str, show_trivia) else print(io, line) end - if !is_leaf + if !leaf new_indent = indent*" " p = pos for x in children(node) @@ -91,7 +91,7 @@ end function build_tree(::Type{GreenNode}, stream::ParseStream; kws...) build_tree(GreenNode{SyntaxHead}, stream; kws...) do h, srcrange, cs span = length(srcrange) - isnothing(cs) ? GreenNode(h, span, ()) : + isnothing(cs) ? GreenNode(h, span) : GreenNode(h, span, collect(GreenNode{SyntaxHead}, cs)) end end diff --git a/JuliaSyntax/src/hooks.jl b/JuliaSyntax/src/hooks.jl index c87d32b0a03c7..97189321bbb31 100644 --- a/JuliaSyntax/src/hooks.jl +++ b/JuliaSyntax/src/hooks.jl @@ -10,7 +10,7 @@ function _first_error(t::SyntaxNode) if is_error(t) return 0,t end - if haschildren(t) + if !is_leaf(t) for (i,c) in enumerate(children(t)) if is_error(c) return i,c diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index dc9a73754ac76..af2d3e3191e39 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -76,7 +76,7 @@ end function _to_SyntaxNode(source::SourceFile, txtbuf::Vector{UInt8}, offset::Int, raw::GreenNode{SyntaxHead}, position::Int, keep_parens::Bool) - if !haschildren(raw) + if is_leaf(raw) # Here we parse the values eagerly rather than representing them as # strings. Maybe this is good. Maybe not. valrange = position:position + span(raw) - 1 @@ -106,7 +106,7 @@ function _to_SyntaxNode(source::SourceFile, txtbuf::Vector{UInt8}, offset::Int, end end -haschildren(node::TreeNode) = node.children !== nothing +is_leaf(node::TreeNode) = node.children === nothing children(node::TreeNode) = (c = node.children; return c === nothing ? () : c) numchildren(node::TreeNode) = (isnothing(node.children) ? 0 : length(node.children)) @@ -134,7 +134,7 @@ function _show_syntax_node(io, current_filename, node::AbstractSyntaxNode, posstr *= "$(lpad(first_byte(node),6)):$(rpad(last_byte(node),6))│" end val = node.val - nodestr = haschildren(node) ? "[$(untokenize(head(node)))]" : + nodestr = !is_leaf(node) ? "[$(untokenize(head(node)))]" : isa(val, Symbol) ? string(val) : repr(val) treestr = string(indent, nodestr) # Add filename if it's changed from the previous node @@ -144,7 +144,7 @@ function _show_syntax_node(io, current_filename, node::AbstractSyntaxNode, current_filename[] = fname end println(io, posstr, treestr) - if haschildren(node) + if !is_leaf(node) new_indent = indent*" " for n in children(node) _show_syntax_node(io, current_filename, n, new_indent, show_byte_offsets) @@ -153,7 +153,7 @@ function _show_syntax_node(io, current_filename, node::AbstractSyntaxNode, end function _show_syntax_node_sexpr(io, node::AbstractSyntaxNode) - if !haschildren(node) + if is_leaf(node) if is_error(node) print(io, "(", untokenize(head(node)), ")") else @@ -186,7 +186,7 @@ function Base.show(io::IO, node::AbstractSyntaxNode) end function Base.push!(node::SN, child::SN) where SN<:AbstractSyntaxNode - if !haschildren(node) + if is_leaf(node) error("Cannot add children") end args = children(node) @@ -196,7 +196,7 @@ end function Base.copy(node::TreeNode) # copy the container but not the data (ie, deep copy the tree, shallow copy the data). copy(::Expr) is similar # copy "un-parents" the top-level `node` that you're copying - newnode = typeof(node)(nothing, haschildren(node) ? typeof(node)[] : nothing, copy(node.data)) + newnode = typeof(node)(nothing, is_leaf(node) ? nothing : typeof(node)[], copy(node.data)) for child in children(node) newchild = copy(child) newchild.parent = newnode diff --git a/JuliaSyntax/test/green_node.jl b/JuliaSyntax/test/green_node.jl index 8dc79455bbf25..020c212df8274 100644 --- a/JuliaSyntax/test/green_node.jl +++ b/JuliaSyntax/test/green_node.jl @@ -2,7 +2,7 @@ t = parsestmt(GreenNode, "aa + b") @test span(t) == 6 - @test haschildren(t) + @test !is_leaf(t) @test head(t) == SyntaxHead(K"call", 0x0008) @test span.(children(t)) == [2,1,1,1,1] @test head.(children(t)) == [ diff --git a/JuliaSyntax/test/test_utils.jl b/JuliaSyntax/test/test_utils.jl index 28a6992311f95..9d666fbd72066 100644 --- a/JuliaSyntax/test/test_utils.jl +++ b/JuliaSyntax/test/test_utils.jl @@ -28,7 +28,7 @@ using .JuliaSyntax: SyntaxHead, is_trivia, sourcetext, - haschildren, + is_leaf, children, child, fl_parseall, @@ -276,14 +276,14 @@ function _reduce_tree(failing_subtrees, tree; exprs_equal=exprs_equal_no_linenum if equals_flisp_parse(exprs_equal, tree) return false end - if !haschildren(tree) + if is_leaf(tree) push!(failing_subtrees, tree) return true end had_failing_subtrees = false - if haschildren(tree) + if !is_leaf(tree) for child in children(tree) - if is_trivia(child) || !haschildren(child) + if is_trivia(child) || is_leaf(child) continue end had_failing_subtrees |= _reduce_tree(failing_subtrees, child; exprs_equal=exprs_equal) From 50937bf0da049ffd447cbe20d254d3dc0f99418b Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Thu, 8 Aug 2024 08:50:31 +1000 Subject: [PATCH 0815/1109] Fixes for JuliaSyntax.haschildren -> is_leaf --- JuliaLowering/src/JuliaLowering.jl | 2 +- JuliaLowering/src/ast.jl | 4 ++-- JuliaLowering/src/desugaring.jl | 6 +++--- JuliaLowering/src/macro_expansion.jl | 8 ++++---- JuliaLowering/src/scope_analysis.jl | 4 ++-- JuliaLowering/src/syntax_graph.jl | 24 ++++++++++-------------- JuliaLowering/test/demo.jl | 2 +- JuliaLowering/test/utils.jl | 8 ++++---- 8 files changed, 27 insertions(+), 31 deletions(-) diff --git a/JuliaLowering/src/JuliaLowering.jl b/JuliaLowering/src/JuliaLowering.jl index bf6546105d0d1..70874709e5851 100644 --- a/JuliaLowering/src/JuliaLowering.jl +++ b/JuliaLowering/src/JuliaLowering.jl @@ -10,7 +10,7 @@ using Core: eval using JuliaSyntax using JuliaSyntax: highlight, Kind, @KSet_str -using JuliaSyntax: haschildren, children, child, numchildren, head, kind, flags, has_flags +using JuliaSyntax: is_leaf, children, numchildren, head, kind, flags, has_flags using JuliaSyntax: filename, first_byte, last_byte, byte_range, sourcefile, source_location, span, sourcetext using JuliaSyntax: is_literal, is_number, is_operator, is_prec_assignment, is_infix_op_call, is_postfix_op_call, is_error diff --git a/JuliaLowering/src/ast.jl b/JuliaLowering/src/ast.jl index b0c278b6f9ced..1da3355255261 100644 --- a/JuliaLowering/src/ast.jl +++ b/JuliaLowering/src/ast.jl @@ -326,7 +326,7 @@ function copy_attrs!(dest, head::Union{Kind,JuliaSyntax.SyntaxHead}, all=false) end function mapchildren(f, ctx, ex; extra_attrs...) - if !haschildren(ex) + if is_leaf(ex) return ex end orig_children = children(ex) @@ -367,7 +367,7 @@ function copy_ast(ctx, ex) srcref = s isa NodeId ? copy_ast(ctx, SyntaxTree(ex._graph, s)) : s isa Tuple ? map(i->copy_ast(ctx, SyntaxTree(ex._graph, i)), s) : s - if haschildren(ex) + if !is_leaf(ex) cs = SyntaxList(ctx) for e in children(ex) push!(cs, copy_ast(ctx, e)) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 30aea4c54b26a..0722f930c17cb 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -121,7 +121,7 @@ end function expand_let(ctx, ex) scope_type = get(ex, :scope_type, :hard) blk = ex[2] - if numchildren(ex[1]) == 0 # TODO: Want to use !haschildren(ex[1]) but this doesn't work... + if numchildren(ex[1]) == 0 return @ast ctx ex [K"scope_block"(scope_type=scope_type) blk] end for binding in Iterators.reverse(children(ex[1])) @@ -818,7 +818,7 @@ function expand_forms_2(ctx::DesugaringContext, ex::SyntaxTree, docs=nothing) else expand_forms_2(ctx, expand_decls(ctx, ex)) # FIXME end - elseif is_operator(k) && !haschildren(ex) + elseif is_operator(k) && is_leaf(ex) makeleaf(ctx, ex, K"Identifier", ex.name_val) elseif k == K"char" || k == K"var" @chk numchildren(ex) == 1 @@ -884,7 +884,7 @@ function expand_forms_2(ctx::DesugaringContext, ex::SyntaxTree, docs=nothing) ] elseif k == K"inert" ex - elseif !haschildren(ex) + elseif is_leaf(ex) ex else mapchildren(e->expand_forms_2(ctx,e), ctx, ex) diff --git a/JuliaLowering/src/macro_expansion.jl b/JuliaLowering/src/macro_expansion.jl index e95e965180aec..3e741e376cb8b 100644 --- a/JuliaLowering/src/macro_expansion.jl +++ b/JuliaLowering/src/macro_expansion.jl @@ -122,7 +122,7 @@ function set_scope_layer(ctx, ex, layer_id, force) elseif k == K"." makenode(ctx, ex, ex, set_scope_layer(ctx, ex[1], layer_id, force), ex[2], scope_layer=scope_layer) - elseif haschildren(ex) + elseif !is_leaf(ex) mapchildren(e->set_scope_layer(ctx, e, layer_id, force), ctx, ex; scope_layer=scope_layer) else @@ -192,7 +192,7 @@ end # Add a secondary source of provenance to each expression in the tree `ex`. function append_sourceref(ctx, ex, secondary_prov) srcref = (ex, secondary_prov) - if haschildren(ex) + if !is_leaf(ex) if kind(ex) == K"macrocall" makenode(ctx, srcref, ex, children(ex)...) else @@ -225,7 +225,7 @@ function expand_forms_1(ctx::MacroExpansionContext, ex::SyntaxTree) # FIXME: Move this upstream into JuliaSyntax @ast ctx ex (k == K"true")::K"Bool" elseif k == K"Identifier" || k == K"MacroName" || k == K"StringMacroName" || - (is_operator(k) && !haschildren(ex)) # <- TODO: fix upstream: make operator *tokens* into identifiers + (is_operator(k) && is_leaf(ex)) # <- TODO: fix upstream: make operator *tokens* into identifiers layerid = get(ex, :scope_layer, ctx.current_layer.id) makeleaf(ctx, ex, ex, kind=K"Identifier", scope_layer=layerid) elseif k == K"var" || k == K"char" || k == K"parens" @@ -252,7 +252,7 @@ function expand_forms_1(ctx::MacroExpansionContext, ex::SyntaxTree) expand_macro(ctx, ex) elseif k == K"module" || k == K"toplevel" || k == K"inert" ex - elseif !haschildren(ex) + elseif is_leaf(ex) ex else mapchildren(e->expand_forms_1(ctx,e), ctx, ex) diff --git a/JuliaLowering/src/scope_analysis.jl b/JuliaLowering/src/scope_analysis.jl index 55cc10f4a717f..bd7b42a988d50 100644 --- a/JuliaLowering/src/scope_analysis.jl +++ b/JuliaLowering/src/scope_analysis.jl @@ -62,7 +62,7 @@ function _find_scope_vars!(assignments, locals, globals, used_names, used_bindin push!(used_names, NameKey(ex)) elseif k == K"BindingId" push!(used_bindings, ex.var_id) - elseif !haschildren(ex) || is_quoted(k) || + elseif is_leaf(ex) || is_quoted(k) || k in KSet"scope_block lambda module toplevel" return elseif k == K"local" || k == K"local_def" @@ -345,7 +345,7 @@ function _resolve_scopes!(ctx, ex) if k == K"Identifier" id = lookup_var(ctx, NameKey(ex)) setattr!(ctx.graph, ex._id, var_id=id) - elseif !haschildren(ex) || is_quoted(ex) || k == K"toplevel" + elseif is_leaf(ex) || is_quoted(ex) || k == K"toplevel" return # TODO # elseif k == K"global" diff --git a/JuliaLowering/src/syntax_graph.jl b/JuliaLowering/src/syntax_graph.jl index faafbd0c40935..3dc04c05cc741 100644 --- a/JuliaLowering/src/syntax_graph.jl +++ b/JuliaLowering/src/syntax_graph.jl @@ -81,8 +81,8 @@ function setchildren!(graph::SyntaxGraph, id, children) append!(graph.edges, children) end -function JuliaSyntax.haschildren(graph::SyntaxGraph, id) - first(graph.edge_ranges[id]) > 0 +function JuliaSyntax.is_leaf(graph::SyntaxGraph, id) + first(graph.edge_ranges[id]) == 0 end function JuliaSyntax.numchildren(graph::SyntaxGraph, id) @@ -97,7 +97,7 @@ function JuliaSyntax.children(graph::SyntaxGraph, id, r::UnitRange) @view graph.edges[graph.edge_ranges[id][r]] end -function JuliaSyntax.child(graph::SyntaxGraph, id::NodeId, i::Integer) +function child(graph::SyntaxGraph, id::NodeId, i::Integer) graph.edges[graph.edge_ranges[id][i]] end @@ -157,7 +157,7 @@ function _convert_nodes(graph::SyntaxGraph, node::SyntaxNode) end end setattr!(graph, id, source=SourceRef(node.source, node.position, node.raw)) - if haschildren(node) + if !is_leaf(node) cs = map(children(node)) do n _convert_nodes(graph, n) end @@ -213,7 +213,7 @@ function Base.get(ex::SyntaxTree, name::Symbol, default) end function Base.getindex(ex::SyntaxTree, i::Integer) - child(ex, i) + SyntaxTree(ex._graph, child(ex._graph, ex._id, i)) end function Base.getindex(ex::SyntaxTree, r::UnitRange) @@ -239,8 +239,8 @@ end # JuliaSyntax tree API -function JuliaSyntax.haschildren(ex::SyntaxTree) - haschildren(ex._graph, ex._id) +function JuliaSyntax.is_leaf(ex::SyntaxTree) + is_leaf(ex._graph, ex._id) end function JuliaSyntax.numchildren(ex::SyntaxTree) @@ -251,10 +251,6 @@ function JuliaSyntax.children(ex::SyntaxTree) SyntaxList(ex._graph, children(ex._graph, ex._id)) end -function JuliaSyntax.child(ex::SyntaxTree, i::Integer) - SyntaxTree(ex._graph, child(ex._graph, ex._id, i)) -end - function JuliaSyntax.head(ex::SyntaxTree) JuliaSyntax.SyntaxHead(kind(ex), flags(ex)) end @@ -437,7 +433,7 @@ end function _show_syntax_tree(io, ex, indent) val = get(ex, :value, nothing) - nodestr = haschildren(ex) ? "[$(untokenize(head(ex)))]" : _value_string(ex) + nodestr = !is_leaf(ex) ? "[$(untokenize(head(ex)))]" : _value_string(ex) treestr = string(indent, nodestr) @@ -447,7 +443,7 @@ function _show_syntax_tree(io, ex, indent) treestr = string(rpad(treestr, 40), "│ $attrstr") println(io, treestr) - if haschildren(ex) + if !is_leaf(ex) new_indent = indent*" " for n in children(ex) _show_syntax_tree(io, n, new_indent) @@ -462,7 +458,7 @@ function Base.show(io::IO, ::MIME"text/plain", ex::SyntaxTree) end function _show_syntax_tree_sexpr(io, ex) - if !haschildren(ex) + if is_leaf(ex) if is_error(ex) print(io, "(", untokenize(head(ex)), ")") else diff --git a/JuliaLowering/test/demo.jl b/JuliaLowering/test/demo.jl index f63ebb8ed042d..632559d4064cb 100644 --- a/JuliaLowering/test/demo.jl +++ b/JuliaLowering/test/demo.jl @@ -3,7 +3,7 @@ using JuliaSyntax using JuliaLowering -using JuliaLowering: SyntaxGraph, SyntaxTree, ensure_attributes!, ensure_attributes, newnode!, setchildren!, haschildren, children, child, setattr!, sourceref, makenode, sourcetext, showprov, lookup_binding +using JuliaLowering: SyntaxGraph, SyntaxTree, ensure_attributes!, ensure_attributes, newnode!, setchildren!, is_leaf, children, child, setattr!, sourceref, makenode, sourcetext, showprov, lookup_binding using JuliaSyntaxFormatter diff --git a/JuliaLowering/test/utils.jl b/JuliaLowering/test/utils.jl index fd8d532befe8b..650eb94046e8e 100644 --- a/JuliaLowering/test/utils.jl +++ b/JuliaLowering/test/utils.jl @@ -9,7 +9,7 @@ using JuliaLowering: SyntaxGraph, newnode!, ensure_attributes!, Kind, SourceRef, SyntaxTree, NodeId, makenode, makeleaf, setattr!, sethead!, - haschildren, numchildren, children, + is_leaf, numchildren, children, @ast, flattened_provenance, showprov, LoweringError function _ast_test_graph() @@ -38,10 +38,10 @@ macro ast_(tree) end function ~(ex1, ex2) - if kind(ex1) != kind(ex2) || haschildren(ex1) != haschildren(ex2) + if kind(ex1) != kind(ex2) || is_leaf(ex1) != is_leaf(ex2) return false end - if haschildren(ex1) + if is_leaf(ex1) if numchildren(ex1) != numchildren(ex2) return false end @@ -57,7 +57,7 @@ end function _format_as_ast_macro(io, ex, indent) k = kind(ex) kind_str = repr(k) - if haschildren(ex) + if !is_leaf(ex) println(io, indent, "[", kind_str) ind2 = indent*" " for c in children(ex) From efd08b49172fa8054ffd8af4807e36e57708ebf7 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Wed, 7 Aug 2024 15:21:40 +1000 Subject: [PATCH 0816/1109] Support weird `a."b"` syntax in expanding `K"."` --- JuliaLowering/src/desugaring.jl | 33 +++++++++++---- JuliaLowering/test/desugaring.jl | 73 ++++++++++++++++++++++++++++++++ JuliaLowering/test/runtests.jl | 1 + JuliaLowering/test/utils.jl | 9 +++- 4 files changed, 106 insertions(+), 10 deletions(-) create mode 100644 JuliaLowering/test/desugaring.jl diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 0722f930c17cb..67e74610de3be 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -205,6 +205,29 @@ function expand_call(ctx, ex) end end +function expand_dot(ctx, ex) + @chk numchildren(ex) == 2 # TODO: bare `.+` syntax + rhs = ex[2] + kr = kind(rhs) + expand_forms_2(ctx, + @ast ctx ex [K"call" + "getproperty"::K"top" + ex[1] + if kr == K"Identifier" + rhs=>K"Symbol" + else + if !(kind(rhs) == K"string" || is_leaf(rhs)) + throw(LoweringError(rhs, "Unrecognized field access syntax")) + end + # Required to support the possibly dubious syntax `a."b"`. See + # https://github.com/JuliaLang/julia/issues/26873 + # Syntax edition TODO: reconsider this; possibly restrict to only K"String"? + rhs + end + ] + ) +end + function expand_for(ctx, ex) iterspecs = ex[1] @@ -758,15 +781,7 @@ function expand_forms_2(ctx::DesugaringContext, ex::SyntaxTree, docs=nothing) if k == K"call" expand_call(ctx, ex) elseif k == K"." - @chk numchildren(ex) == 2 - @chk kind(ex[2]) == K"Identifier" - expand_forms_2(ctx, - @ast ctx ex [K"call" - "getproperty"::K"top" - ex[1] - ex[2]=>K"Symbol" - ] - ) + expand_dot(ctx, ex) elseif k == K"?" @chk numchildren(ex) == 3 expand_forms_2(ctx, @ast ctx ex [K"if" children(ex)...]) diff --git a/JuliaLowering/test/desugaring.jl b/JuliaLowering/test/desugaring.jl new file mode 100644 index 0000000000000..3aad0e152e594 --- /dev/null +++ b/JuliaLowering/test/desugaring.jl @@ -0,0 +1,73 @@ +@testset "Desugaring" begin + +test_mod = Module(:TestMod) + +@test desugar(test_mod, """ +a.b +""") ~ @ast_ [K"call" + "getproperty"::K"top" + "a"::K"Identifier" + "b"::K"Symbol" +] + +@test desugar(test_mod, """ +a."b" +""") ~ @ast_ [K"call" + "getproperty"::K"top" + "a"::K"Identifier" + "b"::K"String" +] + +# @test desugar(test_mod, """ +# let +# y = 0 +# x = 1 +# let x = x + 1 +# y = x +# end +# (x, y) +# end +# """) ~ @ast_ [K"block" +# [K"block" +# [K"=" +# "y"::K"Identifier" +# 0::K"Integer" +# ] +# [K"=" +# "x"::K"Identifier" +# 1::K"Integer" +# ] +# [K"block" +# [K"=" +# 1::K"BindingId" +# [K"call" +# "+"::K"Identifier" +# "x"::K"Identifier" +# 1::K"Integer" +# ] +# ] +# [K"block" +# [K"local_def" +# "x"::K"Identifier" +# ] +# [K"=" +# "x"::K"Identifier" +# 1::K"BindingId" +# ] +# [K"block" +# [K"=" +# "y"::K"Identifier" +# "x"::K"Identifier" +# ] +# ] +# ] +# ] +# [K"call" +# "tuple"::K"core" +# "x"::K"Identifier" +# "y"::K"Identifier" +# ] +# ] +# ] + +end diff --git a/JuliaLowering/test/runtests.jl b/JuliaLowering/test/runtests.jl index e057b2a61b781..4f2901b5e488c 100644 --- a/JuliaLowering/test/runtests.jl +++ b/JuliaLowering/test/runtests.jl @@ -302,6 +302,7 @@ macro A.b(ex) end """) +include("desugaring.jl") include("branching.jl") include("loops.jl") diff --git a/JuliaLowering/test/utils.jl b/JuliaLowering/test/utils.jl index 650eb94046e8e..ed4b32f5c1c28 100644 --- a/JuliaLowering/test/utils.jl +++ b/JuliaLowering/test/utils.jl @@ -10,7 +10,8 @@ using JuliaLowering: Kind, SourceRef, SyntaxTree, NodeId, makenode, makeleaf, setattr!, sethead!, is_leaf, numchildren, children, - @ast, flattened_provenance, showprov, LoweringError + @ast, flattened_provenance, showprov, LoweringError, + syntax_graph, Bindings, ScopeLayer function _ast_test_graph() graph = SyntaxGraph() @@ -93,6 +94,12 @@ format_as_ast_macro(ex) = format_as_ast_macro(stdout, ex) # Test tools +function desugar(mod::Module, src::String) + ex = parsestmt(SyntaxTree, src, filename="foo.jl") + ctx = JuliaLowering.DesugaringContext(syntax_graph(ex), Bindings(), ScopeLayer[], mod) + JuliaLowering.expand_forms_2(ctx, ex) +end + function match_ir_test_case(case_str) m = match(r"# *([^\n]*)\n((?:.|\n)*)#----*\n((?:.|\n)*)"m, strip(case_str)) if isnothing(m) From 88da7653c67b4ed5c52a72644d4873bf65c66b97 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Thu, 8 Aug 2024 11:08:30 +1000 Subject: [PATCH 0817/1109] Tool to bump JuliaSyntax version in Base (JuliaLang/JuliaSyntax.jl#485) It turns out that this is basically a local version of BumpStdlibs.jl... --- JuliaSyntax/tools/bump_in_Base.jl | 75 +++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 JuliaSyntax/tools/bump_in_Base.jl diff --git a/JuliaSyntax/tools/bump_in_Base.jl b/JuliaSyntax/tools/bump_in_Base.jl new file mode 100644 index 0000000000000..c9ca7473efb45 --- /dev/null +++ b/JuliaSyntax/tools/bump_in_Base.jl @@ -0,0 +1,75 @@ +function find_checksum_files(checksum_dir) + filter(readdir(checksum_dir, join=true)) do path + occursin(r"^JuliaSyntax-", basename(path)) + end +end + +function bump_in_Base(julia_dir, juliasyntax_dir, juliasyntax_branch_or_commit) + julia_git_dir = joinpath(julia_dir, ".git") + JuliaSyntax_git_dir = joinpath(juliasyntax_dir, ".git") + if !isdir(julia_git_dir) + @error "Julia .git directory not found" julia_git_dir + return 1 + end + if !isdir(JuliaSyntax_git_dir) + @error "JuliaSyntax .git directory not found" JuliaSyntax_git_dir + return 1 + end + + @info "Vendoring JuliaSyntax into Base" julia_dir juliasyntax_branch_or_commit + + remote_containing_branches = filter(b->occursin(r"^origin/(main|release-.*)$", b), + strip.(split( + read(`git --git-dir=$JuliaSyntax_git_dir branch -r --contains $juliasyntax_branch_or_commit`, String), + '\n', keepempty=false))) + if isempty(remote_containing_branches) + @warn "No remote main or release branches contain the given commit. This is ok for testing, but is otherwise an error." juliasyntax_branch_or_commit + else + @info "Given commit is accessible on remote branch" remote_containing_branches + end + + commit_sha = strip(String(read(`git --git-dir=$JuliaSyntax_git_dir show -s --pretty=tformat:%H $juliasyntax_branch_or_commit`))) + + cd(julia_dir) do + status = read(`git status --porcelain --untracked-files=no`, String) + if status != "" + @error "Julia git directory contains uncommitted changes" status=Text(status) + return 1 + end + + verfile_path = joinpath("deps", "JuliaSyntax.version") + @info "Updating JuliaSyntax.version" verfile_path + write(verfile_path, replace(read(verfile_path, String), r"JULIASYNTAX_SHA1.*"=>"JULIASYNTAX_SHA1 = "*commit_sha)) + run(`git add $verfile_path`) + + @info "Updating JuliaSyntax checksums" + deps_dir = "deps" + checksum_dir = joinpath(deps_dir, "checksums") + old_checksum_paths = find_checksum_files(checksum_dir) + if !isempty(old_checksum_paths) + run(`git rm -rf $old_checksum_paths`) + end + run(`make -C $deps_dir`) + run(`git add $(find_checksum_files(checksum_dir))`) + + # Force rebuild of Base to include the newly vendored JuliaSyntax next time Julia is built. + # (TODO: fix the Makefile instead?) + touch("base/Base.jl") + + @info "JuliaSyntax version updated. You can now test or commit the following changes" + run(`git diff --cached`) + end + + return 0 +end + +if !isinteractive() + if length(ARGS) != 2 + println("Usage: bump_in_Base.jl \$julia_dir \$juliasyntax_branch_or_commit") + exit(1) + else + juliasyntax_dir = dirname(@__DIR__) + exit(bump_in_Base(ARGS[1], juliasyntax_dir, ARGS[2])) + end +end + From a647d05cb80898e8e105ef968c5936d5b145e7f7 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Thu, 8 Aug 2024 14:13:33 +1000 Subject: [PATCH 0818/1109] Show leaf kinds in `show()` for `SyntaxTree` --- JuliaLowering/src/syntax_graph.jl | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/JuliaLowering/src/syntax_graph.jl b/JuliaLowering/src/syntax_graph.jl index 3dc04c05cc741..c3b3fb063a872 100644 --- a/JuliaLowering/src/syntax_graph.jl +++ b/JuliaLowering/src/syntax_graph.jl @@ -431,30 +431,33 @@ function _value_string(ex) return str end -function _show_syntax_tree(io, ex, indent) +function _show_syntax_tree(io, ex, indent, show_kinds) val = get(ex, :value, nothing) nodestr = !is_leaf(ex) ? "[$(untokenize(head(ex)))]" : _value_string(ex) - treestr = string(indent, nodestr) + treestr = rpad(string(indent, nodestr), 40) + if show_kinds && is_leaf(ex) + treestr = treestr*" :: "*string(kind(ex)) + end std_attrs = Set([:name_val,:value,:kind,:syntax_flags,:source,:var_id]) attrstr = join([attrsummary(n, getproperty(ex, n)) for n in attrnames(ex) if n ∉ std_attrs], ",") - treestr = string(rpad(treestr, 40), "│ $attrstr") + treestr = string(rpad(treestr, 60), " │ $attrstr") println(io, treestr) if !is_leaf(ex) new_indent = indent*" " for n in children(ex) - _show_syntax_tree(io, n, new_indent) + _show_syntax_tree(io, n, new_indent, show_kinds) end end end -function Base.show(io::IO, ::MIME"text/plain", ex::SyntaxTree) +function Base.show(io::IO, ::MIME"text/plain", ex::SyntaxTree, show_kinds=true) anames = join(string.(attrnames(syntax_graph(ex))), ",") println(io, "SyntaxTree with attributes $anames") - _show_syntax_tree(io, ex, "") + _show_syntax_tree(io, ex, "", show_kinds) end function _show_syntax_tree_sexpr(io, ex) From 14962199b559dca34724c402bec3245368e9dfc4 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Fri, 9 Aug 2024 11:51:21 +1000 Subject: [PATCH 0819/1109] Implement `char_range()` to respect string indices (JuliaLang/JuliaSyntax.jl#486) Also widen `byte_range()` signatures to accept other integer types --- JuliaSyntax/docs/src/api.md | 1 + JuliaSyntax/src/JuliaSyntax.jl | 2 +- JuliaSyntax/src/source_files.jl | 19 ++++++++++++++++--- JuliaSyntax/test/syntax_tree.jl | 9 +++++++++ JuliaSyntax/test/test_utils.jl | 4 +++- 5 files changed, 30 insertions(+), 5 deletions(-) diff --git a/JuliaSyntax/docs/src/api.md b/JuliaSyntax/docs/src/api.md index eaa5a67e0aa1c..da9b81e1e5c3f 100644 --- a/JuliaSyntax/docs/src/api.md +++ b/JuliaSyntax/docs/src/api.md @@ -52,6 +52,7 @@ JuliaSyntax.last_byte JuliaSyntax.filename JuliaSyntax.source_line JuliaSyntax.source_location +JuliaSyntax.char_range JuliaSyntax.sourcetext JuliaSyntax.highlight ``` diff --git a/JuliaSyntax/src/JuliaSyntax.jl b/JuliaSyntax/src/JuliaSyntax.jl index 3f1ad27a0434a..4e2144ee73afa 100644 --- a/JuliaSyntax/src/JuliaSyntax.jl +++ b/JuliaSyntax/src/JuliaSyntax.jl @@ -9,7 +9,7 @@ export parsestmt, parseall, parseatom # Tokenization export tokenize, Token, untokenize # Source file handling. See also -# highlight() sourcetext() source_line() source_location() +# highlight() sourcetext() source_line() source_location() char_range() export SourceFile # Expression heads/kinds. See also # flags() and related predicates. diff --git a/JuliaSyntax/src/source_files.jl b/JuliaSyntax/src/source_files.jl index 57d89917af277..9d039d928ce67 100644 --- a/JuliaSyntax/src/source_files.jl +++ b/JuliaSyntax/src/source_files.jl @@ -15,11 +15,24 @@ end """ byte_range(x) -Return the range of bytes which `x` covers in the source text. +Return the range of bytes which `x` covers in the source text. See also +[`char_range`](@ref). """ function byte_range end +""" + char_range(x) + +Compute the range in *character indices* over the source text for syntax object +`x`. If you want to index the source string you need this, rather than +[`byte_range`](@ref). +""" +function char_range(x) + br = byte_range(x) + first(br):thisind(sourcefile(x), last(br)) +end + """ first_byte(x) @@ -232,11 +245,11 @@ function Base.view(source::SourceFile, rng::AbstractUnitRange) SubString(source.code, i, j) end -function Base.getindex(source::SourceFile, i::Int) +function Base.getindex(source::SourceFile, i::Integer) source.code[i - source.byte_offset] end -function Base.thisind(source::SourceFile, i::Int) +function Base.thisind(source::SourceFile, i::Integer) thisind(source.code, i - source.byte_offset) + source.byte_offset end diff --git a/JuliaSyntax/test/syntax_tree.jl b/JuliaSyntax/test/syntax_tree.jl index 78ebb1843151b..71d155ccdb505 100644 --- a/JuliaSyntax/test/syntax_tree.jl +++ b/JuliaSyntax/test/syntax_tree.jl @@ -55,6 +55,15 @@ @test t.position == 13 @test child(t,1).position == 19 @test child(t,1).val == :b + + # Unicode character ranges + src = "ab + αβ" + t = parsestmt(SyntaxNode, src) + @test char_range(t[1]) == 1:2 + @test char_range(t[2]) == 4:4 + @test char_range(t[3]) == 6:8 + # conversely, β takes two bytes so char_range(t[3]) != byte_range(t[3]) + @test byte_range(t[3]) == 6:9 end @testset "SyntaxNode pretty printing" begin diff --git a/JuliaSyntax/test/test_utils.jl b/JuliaSyntax/test/test_utils.jl index 9d666fbd72066..32a1df945060b 100644 --- a/JuliaSyntax/test/test_utils.jl +++ b/JuliaSyntax/test/test_utils.jl @@ -36,7 +36,9 @@ using .JuliaSyntax: highlight, tokenize, untokenize, - filename + filename, + byte_range, + char_range if VERSION < v"1.6" # Compat stuff which might not be in Base for older versions From ebcd80ccc257144d9b345044f85c9a90878dcd7d Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Fri, 9 Aug 2024 15:13:19 +1000 Subject: [PATCH 0820/1109] Replace K"true" and K"false" with K"Bool" (JuliaLang/JuliaSyntax.jl#488) Use a single `K"Bool"` Kind for booleans. This is both more convenient and more consistent with other literal kinds such as K"Integer" which group all integers under a single kind. Replace the use of the invisible `K"false"` token in catch blocks with a new kind `K"Placeholder"` - this removes the last of the invisible tokens, other than error tokens! K"Placeholder" will also be really useful for JuliaLowering as a kind for all-underscore identifiers. --- JuliaSyntax/src/expr.jl | 4 ++++ JuliaSyntax/src/kinds.jl | 9 ++++++--- JuliaSyntax/src/literal_parsing.jl | 8 +++----- JuliaSyntax/src/parser.jl | 12 ++++++------ JuliaSyntax/src/syntax_tree.jl | 19 +++++++++++++++---- JuliaSyntax/src/tokenize.jl | 10 ++++++---- JuliaSyntax/test/parser.jl | 10 +++++----- JuliaSyntax/test/tokenize.jl | 12 +++++------- 8 files changed, 50 insertions(+), 34 deletions(-) diff --git a/JuliaSyntax/src/expr.jl b/JuliaSyntax/src/expr.jl index 6d1e8ed6e576d..fd61243c18eaa 100644 --- a/JuliaSyntax/src/expr.jl +++ b/JuliaSyntax/src/expr.jl @@ -326,6 +326,10 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads, args = Any[args[1], a2a...] end end + elseif k == K"catch" + if kind(childheads[1]) == K"Placeholder" + args[1] = false + end elseif k == K"try" # Try children in source order: # try_block catch_var catch_block else_block finally_block diff --git a/JuliaSyntax/src/kinds.jl b/JuliaSyntax/src/kinds.jl index 30d16e3e29e89..27d90c2265544 100644 --- a/JuliaSyntax/src/kinds.jl +++ b/JuliaSyntax/src/kinds.jl @@ -197,6 +197,7 @@ register_kinds!(JuliaSyntax, 0, [ # Identifiers "BEGIN_IDENTIFIERS" "Identifier" + "Placeholder" # Used for empty catch variables, and all-underscore identifiers in lowering # Macro names are modelled as special kinds of identifiers because the full # macro name may not appear as characters in the source: The `@` may be # detached from the macro name as in `@A.x` (ugh!!), or have a _str or _cmd @@ -253,6 +254,7 @@ register_kinds!(JuliaSyntax, 0, [ "END_KEYWORDS" "BEGIN_LITERAL" + "Bool" "Integer" "BinInt" "HexInt" @@ -262,8 +264,6 @@ register_kinds!(JuliaSyntax, 0, [ "String" "Char" "CmdString" - "true" - "false" "END_LITERAL" "BEGIN_DELIMITERS" @@ -1067,7 +1067,7 @@ register_kinds!(JuliaSyntax, 0, [ # Special tokens "TOMBSTONE" # Empty placeholder for kind to be filled later - "None" # Placeholder; never emitted by lexer + "None" # Never emitted by lexer/parser "EndMarker" # EOF "BEGIN_ERRORS" @@ -1097,6 +1097,7 @@ const _nonunique_kind_names = Set([ K"Whitespace" K"NewlineWs" K"Identifier" + K"Placeholder" K"ErrorEofMultiComment" K"ErrorInvalidNumericConstant" @@ -1169,6 +1170,7 @@ const _token_error_descriptions = Dict{Kind, String}( #------------------------------------------------------------------------------- # Predicates +is_identifier(k::Kind) = K"BEGIN_IDENTIFIERS" <= k <= K"END_IDENTIFIERS" is_contextual_keyword(k::Kind) = K"BEGIN_CONTEXTUAL_KEYWORDS" <= k <= K"END_CONTEXTUAL_KEYWORDS" is_error(k::Kind) = K"BEGIN_ERRORS" <= k <= K"END_ERRORS" || k == K"ErrorInvalidOperator" || k == K"Error**" is_keyword(k::Kind) = K"BEGIN_KEYWORDS" <= k <= K"END_KEYWORDS" @@ -1177,6 +1179,7 @@ is_literal(k::Kind) = K"BEGIN_LITERAL" <= k <= K"END_LITERAL" is_operator(k::Kind) = K"BEGIN_OPS" <= k <= K"END_OPS" is_word_operator(k::Kind) = (k == K"in" || k == K"isa" || k == K"where") +is_identifier(k) = is_identifier(kind(k)) is_contextual_keyword(k) = is_contextual_keyword(kind(k)) is_error(k) = is_error(kind(k)) is_keyword(k) = is_keyword(kind(k)) diff --git a/JuliaSyntax/src/literal_parsing.jl b/JuliaSyntax/src/literal_parsing.jl index 5a744f97108bc..f2b99b862210e 100644 --- a/JuliaSyntax/src/literal_parsing.jl +++ b/JuliaSyntax/src/literal_parsing.jl @@ -406,10 +406,8 @@ function parse_julia_literal(txtbuf::Vector{UInt8}, head::SyntaxHead, srcrange) last(srcrange)+1, Diagnostic[]) end return had_error ? ErrorVal() : String(take!(io)) - elseif k == K"true" - return true - elseif k == K"false" - return false + elseif k == K"Bool" + return txtbuf[first(srcrange)] == u8"t" end # TODO: Avoid allocating temporary String here @@ -418,7 +416,7 @@ function parse_julia_literal(txtbuf::Vector{UInt8}, head::SyntaxHead, srcrange) parse_int_literal(val_str) elseif k in KSet"BinInt OctInt HexInt" parse_uint_literal(val_str, k) - elseif k == K"Identifier" + elseif k == K"Identifier" || k == K"Placeholder" if has_flags(head, RAW_STRING_FLAG) io = IOBuffer() unescape_raw_string(io, txtbuf, first(srcrange), last(srcrange)+1, false) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 8da46517775b6..496979645d60e 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -2254,7 +2254,7 @@ function parse_try(ps) if peek(ps) == K"else" # catch-else syntax: https://github.com/JuliaLang/julia/pull/42211 # - #v1.8: try catch ; else end ==> (try (block) (catch false (block)) (else (block))) + #v1.8: try catch ; else end ==> (try (block) (catch □ (block)) (else (block))) else_mark = position(ps) bump(ps, TRIVIA_FLAG) parse_block(ps) @@ -2262,7 +2262,7 @@ function parse_try(ps) #v1.8: try else x finally y end ==> (try (block) (else (error (block x))) (finally (block y))) emit(ps, else_mark, K"error", error="Expected `catch` before `else`") end - #v1.7: try catch ; else end ==> (try (block) (catch false (block)) (else (error (block)))) + #v1.7: try catch ; else end ==> (try (block) (catch □ (block)) (else (error (block)))) min_supported_version(v"1.8", ps, else_mark, "`else` after `catch`") emit(ps, else_mark, K"else") end @@ -2302,10 +2302,10 @@ function parse_catch(ps::ParseState) bump(ps, TRIVIA_FLAG) k = peek(ps) if k in KSet"NewlineWs ;" || is_closing_token(ps, k) - # try x catch end ==> (try (block x) (catch false (block))) - # try x catch ; y end ==> (try (block x) (catch false (block y))) - # try x catch \n y end ==> (try (block x) (catch false (block y))) - bump_invisible(ps, K"false") + # try x catch end ==> (try (block x) (catch □ (block))) + # try x catch ; y end ==> (try (block x) (catch □ (block y))) + # try x catch \n y end ==> (try (block x) (catch □ (block y))) + bump_invisible(ps, K"Placeholder") else # try x catch e y end ==> (try (block x) (catch e (block y))) # try x catch $e y end ==> (try (block x) (catch ($ e) (block y))) diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index af2d3e3191e39..38d853c046cbe 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -125,6 +125,19 @@ byte_range(node::AbstractSyntaxNode) = node.position:(node.position + span(node) sourcefile(node::AbstractSyntaxNode) = node.source +function leaf_string(ex) + if !is_leaf(ex) + throw(ArgumentError("_value_string should be used for leaf nodes only")) + end + k = kind(ex) + value = ex.val + # TODO: Dispatch on kind extension module (??) + return k == K"Placeholder" ? "□"*string(value) : + is_identifier(k) ? string(value) : + value isa Symbol ? string(value) : # see parse_julia_literal for other cases which go here + repr(value) +end + function _show_syntax_node(io, current_filename, node::AbstractSyntaxNode, indent, show_byte_offsets) fname = filename(node) @@ -134,8 +147,7 @@ function _show_syntax_node(io, current_filename, node::AbstractSyntaxNode, posstr *= "$(lpad(first_byte(node),6)):$(rpad(last_byte(node),6))│" end val = node.val - nodestr = !is_leaf(node) ? "[$(untokenize(head(node)))]" : - isa(val, Symbol) ? string(val) : repr(val) + nodestr = is_leaf(node) ? leaf_string(node) : "[$(untokenize(head(node)))]" treestr = string(indent, nodestr) # Add filename if it's changed from the previous node if fname != current_filename[] @@ -157,8 +169,7 @@ function _show_syntax_node_sexpr(io, node::AbstractSyntaxNode) if is_error(node) print(io, "(", untokenize(head(node)), ")") else - val = node.val - print(io, val isa Symbol ? string(val) : repr(val)) + print(io, leaf_string(node)) end else print(io, "(", untokenize(head(node))) diff --git a/JuliaSyntax/src/tokenize.jl b/JuliaSyntax/src/tokenize.jl index de2daa0f95dcd..af78bee42c3ff 100644 --- a/JuliaSyntax/src/tokenize.jl +++ b/JuliaSyntax/src/tokenize.jl @@ -1319,8 +1319,10 @@ function lex_identifier(l::Lexer, c) if n > MAX_KW_LENGTH emit(l, K"Identifier") + elseif h == _true_hash || h == _false_hash + emit(l, K"Bool") else - emit(l, get(kw_hash, h, K"Identifier")) + emit(l, get(_kw_hash, h, K"Identifier")) end end @@ -1374,8 +1376,6 @@ K"while", K"in", K"isa", K"where", -K"true", -K"false", K"abstract", K"as", @@ -1387,6 +1387,8 @@ K"type", K"var", ] -const kw_hash = Dict(simple_hash(lowercase(string(kw))) => kw for kw in kws) +const _true_hash = simple_hash("true") +const _false_hash = simple_hash("false") +const _kw_hash = Dict(simple_hash(lowercase(string(kw))) => kw for kw in kws) end # module diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 70842a4d27d48..b4fac48263566 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -635,18 +635,18 @@ tests = [ "(try (block x) (catch e (block y)) (finally (block z)))" ((v=v"1.8",), "try \n x \n catch e \n y \n else z finally \n w end") => "(try (block x) (catch e (block y)) (else (block z)) (finally (block w)))" - "try x catch end" => "(try (block x) (catch false (block)))" - "try x catch ; y end" => "(try (block x) (catch false (block y)))" - "try x catch \n y end" => "(try (block x) (catch false (block y)))" + "try x catch end" => "(try (block x) (catch □ (block)))" + "try x catch ; y end" => "(try (block x) (catch □ (block y)))" + "try x catch \n y end" => "(try (block x) (catch □ (block y)))" "try x catch e y end" => "(try (block x) (catch e (block y)))" "try x catch \$e y end" => "(try (block x) (catch (\$ e) (block y)))" "try x catch var\"#\" y end" => "(try (block x) (catch (var #) (block y)))" "try x catch e+3 y end" => "(try (block x) (catch (error (call-i e + 3)) (block y)))" "try x finally y end" => "(try (block x) (finally (block y)))" # v1.8 only - ((v=v"1.8",), "try catch ; else end") => "(try (block) (catch false (block)) (else (block)))" + ((v=v"1.8",), "try catch ; else end") => "(try (block) (catch □ (block)) (else (block)))" ((v=v"1.8",), "try else x finally y end") => "(try (block) (else (error (block x))) (finally (block y)))" - ((v=v"1.7",), "try catch ; else end") => "(try (block) (catch false (block)) (else (error (block))))" + ((v=v"1.7",), "try catch ; else end") => "(try (block) (catch □ (block)) (else (error (block))))" # finally before catch :-( "try x finally y catch e z end" => "(try (block x) (finally (block y)) (catch e (block z)))" "try x end" => "(try (block x) (error-t))" diff --git a/JuliaSyntax/test/tokenize.jl b/JuliaSyntax/test/tokenize.jl index eb30370fc6885..8913a20ca0a99 100644 --- a/JuliaSyntax/test/tokenize.jl +++ b/JuliaSyntax/test/tokenize.jl @@ -198,9 +198,10 @@ end end @testset "tokenizing true/false literals" begin - @test tok("somtext true", 3).kind == K"true" - @test tok("somtext false", 3).kind == K"false" + @test tok("somtext true", 3).kind == K"Bool" + @test tok("somtext false", 3).kind == K"Bool" @test tok("somtext tr", 3).kind == K"Identifier" + @test tok("somtext truething", 3).kind == K"Identifier" @test tok("somtext falsething", 3).kind == K"Identifier" end @@ -962,9 +963,6 @@ const all_kws = Set([ "primitive", "type", "var", - # Literals - "true", - "false", # Word-like operators "in", "isa", @@ -974,14 +972,14 @@ const all_kws = Set([ function check_kw_hashes(iter) for cs in iter str = String([cs...]) - if Tokenize.simple_hash(str) in keys(Tokenize.kw_hash) + if Tokenize.simple_hash(str) in keys(Tokenize._kw_hash) @test str in all_kws end end end @testset "simple_hash" begin - @test length(all_kws) == length(Tokenize.kw_hash) + @test length(all_kws) == length(Tokenize._kw_hash) @testset "Length $len keywords" for len in 1:5 check_kw_hashes(String([cs...]) for cs in Iterators.product(['a':'z' for _ in 1:len]...)) From 4bc9469e74d7c40c7d76454960629e207ec3f207 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Fri, 9 Aug 2024 15:38:06 +1000 Subject: [PATCH 0821/1109] Show leaf Kinds when printing syntax trees (JuliaLang/JuliaSyntax.jl#489) The leaf kind is fairly critical information - this change ensures it's shown by default when printing syntax trees. Conversely, the byte offset and line/column is generally not very useful, so hide this by default. --- JuliaSyntax/src/syntax_tree.jl | 47 ++++++++++++++---------- JuliaSyntax/test/syntax_tree.jl | 64 ++++++++++++++++++++------------- 2 files changed, 67 insertions(+), 44 deletions(-) diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index 38d853c046cbe..02225795afc55 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -139,61 +139,70 @@ function leaf_string(ex) end function _show_syntax_node(io, current_filename, node::AbstractSyntaxNode, - indent, show_byte_offsets) - fname = filename(node) + indent, show_location, show_kind) line, col = source_location(node) - posstr = "$(lpad(line, 4)):$(rpad(col,3))│" - if show_byte_offsets - posstr *= "$(lpad(first_byte(node),6)):$(rpad(last_byte(node),6))│" + if show_location + fname = filename(node) + # Add filename if it's changed from the previous node + if fname != current_filename[] + println(io, indent, " -file- │ ", repr(fname)) + current_filename[] = fname + end + posstr = "$(lpad(line, 4)):$(rpad(col,3))│$(lpad(first_byte(node),6)):$(rpad(last_byte(node),6))│" + else + posstr = "" end val = node.val nodestr = is_leaf(node) ? leaf_string(node) : "[$(untokenize(head(node)))]" treestr = string(indent, nodestr) - # Add filename if it's changed from the previous node - if fname != current_filename[] - #println(io, "# ", fname) - treestr = string(rpad(treestr, 40), "│$fname") - current_filename[] = fname + if show_kind && is_leaf(node) + treestr = rpad(treestr, 40)*" :: "*string(kind(node)) end println(io, posstr, treestr) if !is_leaf(node) new_indent = indent*" " for n in children(node) - _show_syntax_node(io, current_filename, n, new_indent, show_byte_offsets) + _show_syntax_node(io, current_filename, n, new_indent, show_location, show_kind) end end end -function _show_syntax_node_sexpr(io, node::AbstractSyntaxNode) +function _show_syntax_node_sexpr(io, node::AbstractSyntaxNode, show_kind) if is_leaf(node) if is_error(node) print(io, "(", untokenize(head(node)), ")") else print(io, leaf_string(node)) + if show_kind + print(io, "::", kind(node)) + end end else print(io, "(", untokenize(head(node))) first = true for n in children(node) print(io, ' ') - _show_syntax_node_sexpr(io, n) + _show_syntax_node_sexpr(io, n, show_kind) first = false end print(io, ')') end end -function Base.show(io::IO, ::MIME"text/plain", node::AbstractSyntaxNode; show_byte_offsets=false) - println(io, "line:col│$(show_byte_offsets ? " byte_range │" : "") tree │ file_name") - _show_syntax_node(io, Ref(""), node, "", show_byte_offsets) +function Base.show(io::IO, ::MIME"text/plain", node::AbstractSyntaxNode; show_location=false, show_kind=true) + println(io, "SyntaxNode:") + if show_location + println(io, "line:col│ byte_range │ tree") + end + _show_syntax_node(io, Ref(""), node, "", show_location, show_kind) end -function Base.show(io::IO, ::MIME"text/x.sexpression", node::AbstractSyntaxNode) - _show_syntax_node_sexpr(io, node) +function Base.show(io::IO, ::MIME"text/x.sexpression", node::AbstractSyntaxNode; show_kind=false) + _show_syntax_node_sexpr(io, node, show_kind) end function Base.show(io::IO, node::AbstractSyntaxNode) - _show_syntax_node_sexpr(io, node) + _show_syntax_node_sexpr(io, node, false) end function Base.push!(node::SN, child::SN) where SN<:AbstractSyntaxNode diff --git a/JuliaSyntax/test/syntax_tree.jl b/JuliaSyntax/test/syntax_tree.jl index 71d155ccdb505..97124384de51c 100644 --- a/JuliaSyntax/test/syntax_tree.jl +++ b/JuliaSyntax/test/syntax_tree.jl @@ -22,10 +22,9 @@ # as `lastindex(t, 2)` isn't well defined @test sprint(show, t) == "(call-i (call-i a * b) + c)" - str = sprint(show, MIME("text/plain"), t) - # These tests are deliberately quite relaxed to avoid being too specific about display style - @test occursin("line:col", str) - @test occursin("call-i", str) + @test sprint(io->show(io, MIME("text/x.sexpression"), t, show_kind=true)) == + "(call-i (call-i a::Identifier *::* b::Identifier) +::+ c::Identifier)" + @test sprint(highlight, child(t, 1, 3)) == "a*b + c\n# ╙" @test sprint(highlight, t.source, t.raw, 1, 3) == "a*b + c\n# ╙" @@ -69,30 +68,45 @@ end @testset "SyntaxNode pretty printing" begin t = parsestmt(SyntaxNode, "f(a*b,\n c)", filename="foo.jl") @test sprint(show, MIME("text/plain"), t) == """ - line:col│ tree │ file_name - 1:1 │[call] │foo.jl - 1:1 │ f - 1:3 │ [call-i] - 1:3 │ a - 1:4 │ * - 1:5 │ b - 2:3 │ c + SyntaxNode: + [call] + f :: Identifier + [call-i] + a :: Identifier + * :: * + b :: Identifier + c :: Identifier """ - @test sprint(io->show(io, MIME("text/plain"), t, show_byte_offsets=true)) == """ - line:col│ byte_range │ tree │ file_name - 1:1 │ 1:11 │[call] │foo.jl - 1:1 │ 1:1 │ f + + @test sprint(io->show(io, MIME("text/plain"), t, show_location=true)) == """ + SyntaxNode: + line:col│ byte_range │ tree + -file- │ "foo.jl" + 1:1 │ 1:11 │[call] + 1:1 │ 1:1 │ f :: Identifier 1:3 │ 3:5 │ [call-i] - 1:3 │ 3:3 │ a - 1:4 │ 4:4 │ * - 1:5 │ 5:5 │ b - 2:3 │ 10:10 │ c + 1:3 │ 3:3 │ a :: Identifier + 1:4 │ 4:4 │ * :: * + 1:5 │ 5:5 │ b :: Identifier + 2:3 │ 10:10 │ c :: Identifier """ - t,_ = parsestmt(SyntaxNode, "begin a end\nbegin b end", 13) - @test sprint(show, MIME("text/plain"), t) == """ - line:col│ tree │ file_name - 1:1 │[block] - 1:7 │ b + @test sprint(io->show(io, MIME("text/plain"), t, show_kind=false)) == """ + SyntaxNode: + [call] + f + [call-i] + a + * + b + c + """ + + t,_ = parsestmt(SyntaxNode, "begin a end\nbegin b end", 13, first_line=100) + @test sprint(io->show(io, MIME("text/plain"), t, show_location=true)) == """ + SyntaxNode: + line:col│ byte_range │ tree + 100:1 │ 13:23 │[block] + 100:7 │ 19:19 │ b :: Identifier """ end From d8f56e1fb72753a607e7ee3287f5cec7bda1e152 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Fri, 9 Aug 2024 16:50:14 +1000 Subject: [PATCH 0822/1109] Fixes for JuliaSyntax changes to `K"Bool"` --- JuliaLowering/src/desugaring.jl | 24 ++++++++---------------- JuliaLowering/src/kinds.jl | 5 ----- JuliaLowering/src/macro_expansion.jl | 3 --- 3 files changed, 8 insertions(+), 24 deletions(-) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 67e74610de3be..ac68759cf200d 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -288,8 +288,6 @@ end function match_try(ex) @chk numchildren(ex) > 1 "Invalid `try` form" try_ = ex[1] - catch_and_exc = nothing - exc_var = nothing catch_ = nothing finally_ = nothing else_ = nothing @@ -297,12 +295,7 @@ function match_try(ex) k = kind(e) if k == K"catch" && isnothing(catch_) @chk numchildren(e) == 2 "Invalid `catch` form" - if !(kind(e[1]) == K"Bool" && e[1].value === false) - # TODO: Fix this strange AST wart upstream? - exc_var = e[1] - end - catch_ = e[2] - catch_and_exc = e + catch_ = e elseif k == K"else" && isnothing(else_) @chk numchildren(e) == 1 else_ = e[1] @@ -313,11 +306,11 @@ function match_try(ex) throw(LoweringError(ex, "Invalid clause in `try` form")) end end - (try_, catch_and_exc, exc_var, catch_, else_, finally_) + (try_, catch_, else_, finally_) end function expand_try(ctx, ex) - (try_, catch_and_exc, exc_var, catch_, else_, finally_) = match_try(ex) + (try_, catch_, else_, finally_) = match_try(ex) if !isnothing(finally_) # TODO: check unmatched symbolic gotos in try. @@ -328,16 +321,15 @@ function expand_try(ctx, ex) if isnothing(catch_) try_block = try_body else - if !isnothing(exc_var) && !is_identifier_like(exc_var) + exc_var = catch_[1] + catch_block = catch_[2] + if !is_identifier_like(exc_var) throw(LoweringError(exc_var, "Expected an identifier as exception variable")) end try_block = @ast ctx ex [K"trycatchelse" try_body - [K"scope_block"(catch_and_exc, scope_type=:neutral) - if !isnothing(exc_var) - if !is_identifier_like(exc_var) - throw(LoweringError(exc_var, "Expected an identifier as exception variable")) - end + [K"scope_block"(catch_, scope_type=:neutral) + if kind(exc_var) != K"Placeholder" [K"block" [K"="(exc_var) exc_var [K"the_exception"]] catch_ diff --git a/JuliaLowering/src/kinds.jl b/JuliaLowering/src/kinds.jl index b0cb64282adeb..3fdfbb962d4a6 100644 --- a/JuliaLowering/src/kinds.jl +++ b/JuliaLowering/src/kinds.jl @@ -10,11 +10,6 @@ function _register_kinds() # A literal Julia value of any kind, as might be inserted by the AST # during macro expansion "Value" - # TODO: Emit "true" and "false" tokens as K"Bool" in parser to - # harmonize with K"Int" etc? - "Bool" - # An identifier composed entirely of underscores - "Placeholder" # A (quoted) `Symbol` "Symbol" # TODO: Use `meta` for inbounds and loopinfo etc? diff --git a/JuliaLowering/src/macro_expansion.jl b/JuliaLowering/src/macro_expansion.jl index 3e741e376cb8b..216540eeef44b 100644 --- a/JuliaLowering/src/macro_expansion.jl +++ b/JuliaLowering/src/macro_expansion.jl @@ -221,9 +221,6 @@ function expand_forms_1(ctx::MacroExpansionContext, ex::SyntaxTree) k = kind(ex) if k == K"Identifier" && all(==('_'), ex.name_val) @ast ctx ex ex=>K"Placeholder" - elseif k == K"true" || k == K"false" - # FIXME: Move this upstream into JuliaSyntax - @ast ctx ex (k == K"true")::K"Bool" elseif k == K"Identifier" || k == K"MacroName" || k == K"StringMacroName" || (is_operator(k) && is_leaf(ex)) # <- TODO: fix upstream: make operator *tokens* into identifiers layerid = get(ex, :scope_layer, ctx.current_layer.id) From c12b9e6b00c9c095617c91f851728f8a7f7b8074 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Wed, 7 Aug 2024 15:25:00 +1000 Subject: [PATCH 0823/1109] Clean up and document syntax tree child access API Here I commit to a more consistent but simpler child access API for syntax trees, as informed by the JuliaLowering work so far: * `is_leaf(node)` is given a precise definition (previously `!haschildren()` - but that had issues - see JuliaLang/JuliaSyntax.jl#483) * `children(node)` returns the child list, or `nothing` if there are no children. The `nothing` might be seen as inconvenient, but mapping across the children of a leaf node is probably an error and one should probably branch on `is_leaf` first. * `numchildren(node)` is documented * `node[i]`, `node[i:j]` are documented to index into the child list We distinguish `GreenNode` and its implementation of `span` from `SyntaxNode` and its implementation of `byte_range` and `sourcetext` - these seem to just have very different APIs, at least as of now. I've deleted the questionable overloads of multidimensional `getindex` and the `child` function in favor of single dimensional getindex. I don't know whether anyone ever ended up using these. But I didn't and they didn't seem useful+consistent enough to keep the complexity. I've kept setindex! for now, to set a child of a `SyntaxNode`. Though I'm not sure this is a good idea to support by default. --- JuliaSyntax/docs/src/api.md | 39 +++++++++++- JuliaSyntax/src/green_tree.jl | 50 ++++++++++++--- JuliaSyntax/src/hooks.jl | 7 +-- JuliaSyntax/src/syntax_tree.jl | 107 ++++++++++---------------------- JuliaSyntax/test/green_node.jl | 9 +++ JuliaSyntax/test/runtests.jl | 6 -- JuliaSyntax/test/syntax_tree.jl | 44 ++++++------- JuliaSyntax/test/test_utils.jl | 5 +- 8 files changed, 149 insertions(+), 118 deletions(-) diff --git a/JuliaSyntax/docs/src/api.md b/JuliaSyntax/docs/src/api.md index da9b81e1e5c3f..9fa1a37fb50ab 100644 --- a/JuliaSyntax/docs/src/api.md +++ b/JuliaSyntax/docs/src/api.md @@ -124,13 +124,46 @@ JuliaSyntax.SHORT_FORM_FUNCTION_FLAG ## Syntax trees -Syntax tree types: +Access to the children of a tree node is provided by the functions + +```@docs +JuliaSyntax.is_leaf +JuliaSyntax.numchildren +JuliaSyntax.children +``` + +For convenient access to the children, we also provide `node[i]`, `node[i:j]` +and `node[begin:end]` by implementing `Base.getindex()`, `Base.firstindex()` and +`Base.lastindex()`. We choose to return a view from `node[i:j]` to make it +non-allocating. + +Tree traversal is supported by using these functions along with the predicates +such as [`kind`](@ref) listed above. + +### Trees referencing the source ```@docs JuliaSyntax.SyntaxNode -JuliaSyntax.GreenNode ``` -Functions applicable to syntax trees include everything in the sections on +Functions applicable to `SyntaxNode` include everything in the sections on heads/kinds as well as the accessor functions in the source code handling section. + +### Relocatable syntax trees + +[`GreenNode`](@ref) is a special low level syntax tree: it's "relocatable" in +the sense that it doesn't carry an absolute position in the source code or even +a reference to the source text. This allows it to be reused for incremental +parsing, but does make it a pain to work with directly! + +```@docs +JuliaSyntax.GreenNode +``` + +Green nodes only have a relative position so implement `span()` instead of +`byte_range()`: + +```@docs +JuliaSyntax.span +``` diff --git a/JuliaSyntax/src/green_tree.jl b/JuliaSyntax/src/green_tree.jl index 8a4b46345c7c2..be55e4f685f8b 100644 --- a/JuliaSyntax/src/green_tree.jl +++ b/JuliaSyntax/src/green_tree.jl @@ -23,24 +23,58 @@ As implementation choices, we choose that: struct GreenNode{Head} head::Head span::UInt32 - args::Union{Nothing,Vector{GreenNode{Head}}} + children::Union{Nothing,Vector{GreenNode{Head}}} end -function GreenNode(head::Head, span::Integer, args=nothing) where {Head} - GreenNode{Head}(head, span, args) +function GreenNode(head::Head, span::Integer, children=nothing) where {Head} + GreenNode{Head}(head, span, children) end # Accessors / predicates -is_leaf(node::GreenNode) = isnothing(node.args) -children(node::GreenNode) = isnothing(node.args) ? () : node.args -span(node::GreenNode) = node.span +is_leaf(node::GreenNode) = isnothing(node.children) +children(node::GreenNode) = node.children +numchildren(node::GreenNode) = isnothing(node.children) ? 0 : length(node.children) head(node::GreenNode) = node.head +""" + span(node) + +Get the number of bytes this node covers in the source text. +""" +span(node::GreenNode) = node.span + +Base.getindex(node::GreenNode, i::Int) = children(node)[i] +Base.getindex(node::GreenNode, rng::UnitRange) = view(children(node), rng) +Base.firstindex(node::GreenNode) = 1 +Base.lastindex(node::GreenNode) = length(children(node)) + +""" +Get absolute position and span of the child of `node` at the given tree `path`. +""" +function child_position_span(node::GreenNode, path::Int...) + n = node + p = 1 + for index in path + cs = children(n) + for i = 1:index-1 + p += span(cs[i]) + end + n = cs[index] + end + return n, p, n.span +end + +function highlight(io::IO, source::SourceFile, node::GreenNode, path::Int...; kws...) + _, p, span = child_position_span(node, path...) + q = p + span - 1 + highlight(io, source, p:q; kws...) +end + Base.summary(node::GreenNode) = summary(node.head) -Base.hash(node::GreenNode, h::UInt) = hash((node.head, node.span, node.args), h) +Base.hash(node::GreenNode, h::UInt) = hash((node.head, node.span, node.children), h) function Base.:(==)(n1::GreenNode, n2::GreenNode) - n1.head == n2.head && n1.span == n2.span && n1.args == n2.args + n1.head == n2.head && n1.span == n2.span && n1.children == n2.children end # Pretty printing diff --git a/JuliaSyntax/src/hooks.jl b/JuliaSyntax/src/hooks.jl index 97189321bbb31..228f14b05d693 100644 --- a/JuliaSyntax/src/hooks.jl +++ b/JuliaSyntax/src/hooks.jl @@ -45,11 +45,8 @@ function _incomplete_tag(n::SyntaxNode, codelen) return :none end end - if kind(c) == K"error" && begin - cs = children(c) - length(cs) > 0 - end - for cc in cs + if kind(c) == K"error" && numchildren(c) > 0 + for cc in children(c) if kind(cc) == K"error" return :other end diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index 02225795afc55..aa3d40091afd7 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -106,10 +106,36 @@ function _to_SyntaxNode(source::SourceFile, txtbuf::Vector{UInt8}, offset::Int, end end +""" + is_leaf(node) + +Determine whether the node is a leaf of the tree. In our trees a "leaf" +corresponds to a single token in the source text. +""" is_leaf(node::TreeNode) = node.children === nothing -children(node::TreeNode) = (c = node.children; return c === nothing ? () : c) + +""" + children(node) + +Return an iterable list of children for the node. For leaves, return `nothing`. +""" +children(node::TreeNode) = node.children + +""" + numchildren(node) + +Return `length(children(node))` but possibly computed in a more efficient way. +""" numchildren(node::TreeNode) = (isnothing(node.children) ? 0 : length(node.children)) +Base.getindex(node::AbstractSyntaxNode, i::Int) = children(node)[i] +Base.getindex(node::AbstractSyntaxNode, rng::UnitRange) = view(children(node), rng) +Base.firstindex(node::AbstractSyntaxNode) = 1 +Base.lastindex(node::AbstractSyntaxNode) = length(children(node)) + +function Base.setindex!(node::SN, x::SN, i::Int) where {SN<:AbstractSyntaxNode} + children(node)[i] = x +end """ head(x) @@ -217,10 +243,12 @@ function Base.copy(node::TreeNode) # copy the container but not the data (ie, deep copy the tree, shallow copy the data). copy(::Expr) is similar # copy "un-parents" the top-level `node` that you're copying newnode = typeof(node)(nothing, is_leaf(node) ? nothing : typeof(node)[], copy(node.data)) - for child in children(node) - newchild = copy(child) - newchild.parent = newnode - push!(newnode, newchild) + if !is_leaf(node) + for child in children(node) + newchild = copy(child) + newchild.parent = newnode + push!(newnode, newchild) + end end return newnode end @@ -235,71 +263,4 @@ function build_tree(::Type{SyntaxNode}, stream::ParseStream; SyntaxNode(source, green_tree, position=first_byte(stream), keep_parens=keep_parens) end -#------------------------------------------------------------------------------- -# Tree utilities - -""" - child(node, i1, i2, ...) - -Get child at a tree path. If indexing accessed children, it would be -`node[i1][i2][...]` -""" -function child(node, path::Integer...) - n = node - for index in path - n = children(n)[index] - end - return n -end - -function setchild!(node::SyntaxNode, path, x) - n1 = child(node, path[1:end-1]...) - n1.children[path[end]] = x -end - -# We can overload multidimensional Base.getindex / Base.setindex! for node -# types. -# -# The justification for this is to view a tree as a multidimensional ragged -# array, where descending depthwise into the tree corresponds to dimensions of -# the array. -# -# However... this analogy is only good for complete trees at a given depth (= -# dimension). But the syntax is oh-so-handy! -function Base.getindex(node::Union{SyntaxNode,GreenNode}, path::Int...) - child(node, path...) -end -function Base.lastindex(node::Union{SyntaxNode,GreenNode}) - length(children(node)) -end - -function Base.setindex!(node::SyntaxNode, x::SyntaxNode, path::Int...) - setchild!(node, path, x) -end - -""" -Get absolute position and span of the child of `node` at the given tree `path`. -""" -function child_position_span(node::GreenNode, path::Int...) - n = node - p = 1 - for index in path - cs = children(n) - for i = 1:index-1 - p += span(cs[i]) - end - n = cs[index] - end - return n, p, n.span -end - -function child_position_span(node::SyntaxNode, path::Int...) - n = child(node, path...) - n, n.position, span(n) -end - -function highlight(io::IO, source::SourceFile, node::GreenNode, path::Int...; kws...) - _, p, span = child_position_span(node, path...) - q = p + span - 1 - highlight(io, source, p:q; kws...) -end +@deprecate haschildren(x) !is_leaf(x) false diff --git a/JuliaSyntax/test/green_node.jl b/JuliaSyntax/test/green_node.jl index 020c212df8274..c3c4da40960ee 100644 --- a/JuliaSyntax/test/green_node.jl +++ b/JuliaSyntax/test/green_node.jl @@ -13,6 +13,15 @@ SyntaxHead(K"Identifier", 0x0000) ] + @test numchildren(t) == 5 + @test !is_leaf(t) + @test is_leaf(t[1]) + + @test t[1] === children(t)[1] + @test t[2:4] == [t[2],t[3],t[4]] + @test firstindex(t) == 1 + @test lastindex(t) == 5 + t2 = parsestmt(GreenNode, "aa + b") @test t == t2 @test t !== t2 diff --git a/JuliaSyntax/test/runtests.jl b/JuliaSyntax/test/runtests.jl index 0fc08d08a1c29..3fd6227801482 100644 --- a/JuliaSyntax/test/runtests.jl +++ b/JuliaSyntax/test/runtests.jl @@ -1,12 +1,6 @@ using JuliaSyntax using Test -using JuliaSyntax: SourceFile - -using JuliaSyntax: GreenNode, SyntaxNode, - flags, EMPTY_FLAGS, TRIVIA_FLAG, INFIX_FLAG, - children, child, setchild!, SyntaxHead - include("test_utils.jl") include("test_utils_tests.jl") include("fuzz_test.jl") diff --git a/JuliaSyntax/test/syntax_tree.jl b/JuliaSyntax/test/syntax_tree.jl index 97124384de51c..f647f1aec7dab 100644 --- a/JuliaSyntax/test/syntax_tree.jl +++ b/JuliaSyntax/test/syntax_tree.jl @@ -3,33 +3,35 @@ tt = "a*b + c" t = parsestmt(SyntaxNode, tt) - @test sourcetext(child(t, 1)) == "a*b" - @test sourcetext(child(t, 1, 1)) == "a" - @test sourcetext(child(t, 1, 2)) == "*" - @test sourcetext(child(t, 1, 3)) == "b" - @test sourcetext(child(t, 2)) == "+" - @test sourcetext(child(t, 3)) == "c" + @test sourcetext(t[1]) == "a*b" + @test sourcetext(t[1][1]) == "a" + @test sourcetext(t[1][2]) == "*" + @test sourcetext(t[1][3]) == "b" + @test sourcetext(t[2]) == "+" + @test sourcetext(t[3]) == "c" - @test JuliaSyntax.first_byte(child(t, 2)) == findfirst(==('+'), tt) - @test JuliaSyntax.source_line(child(t, 3)) == 1 - @test source_location(child(t, 3)) == (1, 7) + @test JuliaSyntax.first_byte(t[2]) == findfirst(==('+'), tt) + @test JuliaSyntax.source_line(t[3]) == 1 + @test source_location(t[3]) == (1, 7) # Child indexing - @test t[1] === child(t, 1) - @test t[1, 1] === child(t, 1, 1) - @test t[end] === child(t, 3) - # Unfortunately, can't make t[1, end] work - # as `lastindex(t, 2)` isn't well defined + @test t[end] === t[3] + @test sourcetext.(t[2:3]) == ["+", "c"] + @test sourcetext.(t[2:end]) == ["+", "c"] + @test firstindex(t) == 1 + @test lastindex(t) == 3 + @test !is_leaf(t) + @test is_leaf(t[3]) @test sprint(show, t) == "(call-i (call-i a * b) + c)" @test sprint(io->show(io, MIME("text/x.sexpression"), t, show_kind=true)) == "(call-i (call-i a::Identifier *::* b::Identifier) +::+ c::Identifier)" - @test sprint(highlight, child(t, 1, 3)) == "a*b + c\n# ╙" + @test sprint(highlight, t[1][3]) == "a*b + c\n# ╙" @test sprint(highlight, t.source, t.raw, 1, 3) == "a*b + c\n# ╙" # Pass-through field access - node = child(t, 1, 1) + node = t[1][1] @test node.val === :a # The specific error text has evolved over Julia versions. Check that it involves `SyntaxData` and immutability e = try node.val = :q catch e e end @@ -40,20 +42,20 @@ ct = copy(t) ct.data = nothing @test ct.data === nothing && t.data !== nothing - @test child(ct, 1).parent === ct - @test child(ct, 1) !== child(t, 1) + @test ct[1].parent === ct + @test ct[1] !== t[1] node = parsestmt(SyntaxNode, "f()") push!(node, parsestmt(SyntaxNode, "x")) @test length(children(node)) == 2 node[2] = parsestmt(SyntaxNode, "y") - @test sourcetext(child(node, 2)) == "y" + @test sourcetext(node[2]) == "y" # SyntaxNode with offsets t,_ = parsestmt(SyntaxNode, "begin a end\nbegin b end", 13) @test t.position == 13 - @test child(t,1).position == 19 - @test child(t,1).val == :b + @test t[1].position == 19 + @test t[1].val == :b # Unicode character ranges src = "ab + αβ" diff --git a/JuliaSyntax/test/test_utils.jl b/JuliaSyntax/test/test_utils.jl index 32a1df945060b..7553bf1c09604 100644 --- a/JuliaSyntax/test/test_utils.jl +++ b/JuliaSyntax/test/test_utils.jl @@ -1,6 +1,6 @@ using Test -# We need a relative include here as JuliaSyntax my come from Base. +# We need a relative include here as JuliaSyntax may come from Base. using .JuliaSyntax: # Parsing ParseStream, @@ -23,14 +23,15 @@ using .JuliaSyntax: # Node inspection kind, flags, + EMPTY_FLAGS, TRIVIA_FLAG, INFIX_FLAG, head, span, SyntaxHead, is_trivia, sourcetext, is_leaf, + numchildren, children, - child, fl_parseall, fl_parse, highlight, From d250d84ec276b1b9f336f646a0075bdbe87740d5 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Wed, 7 Aug 2024 17:20:23 +1000 Subject: [PATCH 0824/1109] Mark public symbols with `public` keyword --- JuliaSyntax/src/JuliaSyntax.jl | 84 ++++++++++++++++++++++++++++------ 1 file changed, 71 insertions(+), 13 deletions(-) diff --git a/JuliaSyntax/src/JuliaSyntax.jl b/JuliaSyntax/src/JuliaSyntax.jl index 4e2144ee73afa..4718824b8d61c 100644 --- a/JuliaSyntax/src/JuliaSyntax.jl +++ b/JuliaSyntax/src/JuliaSyntax.jl @@ -1,23 +1,81 @@ module JuliaSyntax -# Conservative list of exports - only export the most common/useful things -# here. +macro _public(syms) + if VERSION >= v"1.11" + names = syms isa Symbol ? [syms] : syms.args + esc(Expr(:public, names...)) + else + nothing + end +end + +# Public API, in the order of docs/src/api.md + +# Parsing. +export parsestmt, + parseall, + parseatom + +@_public parse!, + ParseStream, + build_tree -# Parsing. See also -# parse!(), ParseStream -export parsestmt, parseall, parseatom # Tokenization -export tokenize, Token, untokenize -# Source file handling. See also -# highlight() sourcetext() source_line() source_location() char_range() +@_public tokenize, + Token, + untokenize + +# Source file handling +@_public sourcefile, + byte_range, + char_range, + first_byte, + last_byte, + filename, + source_line, + source_location, + sourcetext, + highlight + export SourceFile -# Expression heads/kinds. See also -# flags() and related predicates. -export @K_str, kind, head -# Syntax tree types. See also -# GreenNode +@_public source_line_range + +# Expression predicates, kinds and flags +export @K_str, kind +@_public Kind + +@_public flags, + SyntaxHead, + head, + is_trivia, + is_prefix_call, + is_infix_op_call, + is_prefix_op_call, + is_postfix_op_call, + is_dotted, + is_suffixed, + is_decorated, + numeric_flags, + has_flags, + TRIPLE_STRING_FLAG, + RAW_STRING_FLAG, + PARENS_FLAG, + COLON_QUOTE, + TOPLEVEL_SEMICOLONS_FLAG, + MUTABLE_FLAG, + BARE_MODULE_FLAG, + SHORT_FORM_FUNCTION_FLAG + +# Syntax trees +@_public is_leaf, + numchildren, + children + export SyntaxNode +@_public GreenNode, + span + # Helper utilities include("utils.jl") From 6ba25019767dd5ac0e469a07d44f5aecd426fa4e Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Thu, 8 Aug 2024 08:47:26 +1000 Subject: [PATCH 0825/1109] fixup! Mark public symbols with `public` keyword --- JuliaSyntax/src/JuliaSyntax.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaSyntax/src/JuliaSyntax.jl b/JuliaSyntax/src/JuliaSyntax.jl index 4718824b8d61c..9afff8725f980 100644 --- a/JuliaSyntax/src/JuliaSyntax.jl +++ b/JuliaSyntax/src/JuliaSyntax.jl @@ -21,7 +21,7 @@ export parsestmt, build_tree # Tokenization -@_public tokenize, +export tokenize, Token, untokenize From 8aabad797a9a87d16b78dcbb8235f8cf390cee76 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Fri, 9 Aug 2024 22:06:19 +0200 Subject: [PATCH 0826/1109] add some small type inference improvements to allow for some basic static compiling (JuliaLang/JuliaSyntax.jl#491) --- JuliaSyntax/src/parser.jl | 6 +++--- JuliaSyntax/src/utils.jl | 5 ++--- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 496979645d60e..debd5028863c0 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -484,7 +484,7 @@ function parse_block(ps::ParseState, down=parse_eq, mark=position(ps)) end # Parse a block, but leave emitting the block up to the caller. -function parse_block_inner(ps::ParseState, down) +function parse_block_inner(ps::ParseState, down::F) where {F <: Function} parse_Nary(ps, down, KSet"NewlineWs ;", KSet"end else elseif catch finally") end @@ -1602,7 +1602,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) ckind == K"vcat" ? K"typed_vcat" : ckind == K"comprehension" ? K"typed_comprehension" : ckind == K"ncat" ? K"typed_ncat" : - internal_error("unrecognized kind in parse_cat ", ckind) + internal_error("unrecognized kind in parse_cat ", string(ckind)) emit(ps, mark, outk, cflags) check_ncat_compat(ps, mark, ckind) end @@ -2020,7 +2020,7 @@ function parse_resword(ps::ParseState) elseif word == K"do" bump(ps, TRIVIA_FLAG, error="invalid `do` syntax") else - internal_error("unhandled reserved word ", word) + internal_error("unhandled reserved word ", string(word)) end end diff --git a/JuliaSyntax/src/utils.jl b/JuliaSyntax/src/utils.jl index cee9689e27984..3f95c48572f53 100644 --- a/JuliaSyntax/src/utils.jl +++ b/JuliaSyntax/src/utils.jl @@ -19,7 +19,7 @@ _unsafe_wrap_substring(s) = (s.offset, unsafe_wrap(Vector{UInt8}, s.string)) #-------------------------------------------------- # # Internal error, used as assertion failure for cases we expect can't happen. -@noinline function internal_error(strs...) +@noinline function internal_error(strs::Vararg{String, N}) where {N} error("Internal error: ", strs...) end @@ -27,7 +27,7 @@ end macro check(ex, msgs...) msg = isempty(msgs) ? ex : msgs[1] if isa(msg, AbstractString) - msg = msg + msg = String(msg) elseif !isempty(msgs) && (isa(msg, Expr) || isa(msg, Symbol)) msg = :(string($(esc(msg)))) else @@ -133,4 +133,3 @@ function _printstyled(io::IO, text; fgcolor=nothing, bgcolor=nothing, href=nothi first = false end end - From d94a32d22c02facd0ad8df3a398758428fe3faa7 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sat, 10 Aug 2024 08:22:35 +1000 Subject: [PATCH 0827/1109] Make resolve_scopes pass non-mutating --- JuliaLowering/src/ast.jl | 4 +- JuliaLowering/src/desugaring.jl | 10 ++-- JuliaLowering/src/eval.jl | 11 +++-- JuliaLowering/src/linear_ir.jl | 59 +++++++++++----------- JuliaLowering/src/macro_expansion.jl | 2 +- JuliaLowering/src/scope_analysis.jl | 57 +++++++++++---------- JuliaLowering/src/syntax_graph.jl | 6 +++ JuliaLowering/test/demo.jl | 74 ++++++++++++++++++++-------- 8 files changed, 138 insertions(+), 85 deletions(-) diff --git a/JuliaLowering/src/ast.jl b/JuliaLowering/src/ast.jl index 1da3355255261..7b457d9eb9af3 100644 --- a/JuliaLowering/src/ast.jl +++ b/JuliaLowering/src/ast.jl @@ -62,14 +62,14 @@ end _node_id(ex::NodeId) = ex _node_id(ex::SyntaxTree) = ex._id -_node_id(graph::SyntaxGraph, ex::SyntaxTree) = (check_same_graph(graph, ex); ex._id) +_node_id(graph::SyntaxGraph, ex::SyntaxTree) = (check_compatible_graph(graph, ex); ex._id) _node_ids(graph::SyntaxGraph) = () _node_ids(graph::SyntaxGraph, ::Nothing, cs...) = _node_ids(graph, cs...) _node_ids(graph::SyntaxGraph, c, cs...) = (_node_id(graph, c), _node_ids(graph, cs...)...) _node_ids(graph::SyntaxGraph, cs::SyntaxList, cs1...) = (_node_ids(graph, cs...)..., _node_ids(graph, cs1...)...) function _node_ids(graph::SyntaxGraph, cs::SyntaxList) - check_same_graph(graph, cs) + check_compatible_graph(graph, cs) cs.ids end diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index ac68759cf200d..3e76dd863a5e1 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -127,8 +127,7 @@ function expand_let(ctx, ex) for binding in Iterators.reverse(children(ex[1])) kb = kind(binding) if is_sym_decl(kb) - blk = @ast ctx ex [ - K"scope_block"(scope_type=scope_type) + blk = @ast ctx ex [K"scope_block"(scope_type=scope_type) [K"local" binding] blk ] @@ -140,6 +139,7 @@ function expand_let(ctx, ex) K"block" tmp=rhs [K"scope_block"(ex, scope_type=scope_type) + # TODO: Use single child for scope_block? [K"local_def"(lhs) lhs] # TODO: Use K"local" with attr? [K"="(rhs) decl_var(lhs) @@ -464,7 +464,7 @@ function expand_function_def(ctx, ex, docs) if !is_valid_name(name) throw(LoweringError(name, "Invalid function name")) end - return @ast ctx ex [K"method" name] + return @ast ctx ex [K"method" name=>K"Symbol"] elseif kind(name) == K"call" callex = name body = ex[2] @@ -551,9 +551,9 @@ function expand_function_def(ctx, ex, docs) end @ast ctx ex [ K"block" - func = [K"method" function_name] + func = [K"method" function_name=>K"Symbol"] [K"method" - function_name + function_name=>K"Symbol" preamble [K"lambda"(body, lambda_info=LambdaInfo(arg_names, static_parameters, ret_var, false)) body diff --git a/JuliaLowering/src/eval.jl b/JuliaLowering/src/eval.jl index e99da70e34dd0..992ca67a3e9fe 100644 --- a/JuliaLowering/src/eval.jl +++ b/JuliaLowering/src/eval.jl @@ -1,7 +1,7 @@ function lower(mod::Module, ex) ctx1, ex1 = expand_forms_1(mod, ex) ctx2, ex2 = expand_forms_2(ctx1, ex1) - ctx3, ex3 = resolve_scopes!(ctx2, ex2) + ctx3, ex3 = resolve_scopes(ctx2, ex2) ctx4, ex4 = linearize_ir(ctx3, ex3) ex4 end @@ -178,7 +178,7 @@ end function to_lowered_expr(mod, bindings, ex) k = kind(ex) - if is_literal(k) || k == K"Bool" + if is_literal(k) ex.value elseif k == K"core" GlobalRef(Core, Symbol(ex.name_val)) @@ -227,6 +227,12 @@ function to_lowered_expr(mod, bindings, ex) Core.GotoNode(ex[1].id) elseif k == K"gotoifnot" Core.GotoIfNot(to_lowered_expr(mod, bindings, ex[1]), ex[2].id) + elseif k == K"method" + name = ex[1] + @chk kind(name) == K"Symbol" + namesym = Symbol(name.name_val) + cs = map(e->to_lowered_expr(mod, bindings, e), ex[2:end]) + Expr(:method, namesym, cs...) else # Allowed forms according to https://docs.julialang.org/en/v1/devdocs/ast/ # @@ -236,7 +242,6 @@ function to_lowered_expr(mod, bindings, ex) # foreigncall new_opaque_closure lambda head = k == K"call" ? :call : k == K"=" ? :(=) : - k == K"method" ? :method : k == K"global" ? :global : k == K"const" ? :const : nothing diff --git a/JuliaLowering/src/linear_ir.jl b/JuliaLowering/src/linear_ir.jl index a85767c94d350..e2479843aca6b 100644 --- a/JuliaLowering/src/linear_ir.jl +++ b/JuliaLowering/src/linear_ir.jl @@ -50,18 +50,27 @@ function LinearIRContext(ctx, is_toplevel_thunk, lambda_locals, return_type) Dict{String,NodeId}(), ctx.mod) end +# FIXME: BindingId subsumes many things so need to assess what that means for these predicates. +# BindingId can be +# - local variable (previously K"Identifier") +# - implicit global variables in current module (previously K"Identifier") +# - globalref - from macros +# +# BindingId could also subsume +# - top,core + function is_valid_body_ir_argument(ex) is_valid_ir_argument(ex) && return true return false # FIXME k = kind(ex) - return k == K"Identifier" && # Arguments are always defined slots + return k == K"BindingId" && # Arguments are always defined slots TODO("vinfo-table stuff") end function is_simple_arg(ex) k = kind(ex) - return is_simple_atom(ex) || k == K"Identifier" || k == K"quote" || k == K"inert" || + return is_simple_atom(ex) || k == K"BindingId" || k == K"quote" || k == K"inert" || k == K"top" || k == K"core" || k == K"globalref" || k == K"outerref" end @@ -81,7 +90,7 @@ end function is_valid_ir_rvalue(ctx, lhs, rhs) return is_ssa(ctx, lhs) || is_valid_ir_argument(rhs) || - (kind(lhs) == K"Identifier" && + (kind(lhs) == K"BindingId" && # FIXME: add: splatnew isdefined invoke cfunction gc_preserve_begin copyast new_opaque_closure globalref outerref kind(rhs) in KSet"new the_exception call foreigncall") end @@ -233,9 +242,8 @@ end # TODO: Is it ok to return `nothing` if we have no value in some sense? function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) k = kind(ex) - if k == K"Identifier" || is_literal(k) || k == K"BindingId" || k == K"quote" || k == K"inert" || - k == K"top" || k == K"core" || k == K"Value" || k == K"Symbol" || k == K"Placeholder" || - k == K"Bool" + if k == K"BindingId" || is_literal(k) || k == K"quote" || k == K"inert" || + k == K"top" || k == K"core" || k == K"Value" || k == K"Symbol" || k == K"Placeholder" # TODO: other kinds: copyast the_exception $ globalref outerref thismodule cdecl stdcall fastcall thiscall llvmcall if needs_value && k == K"Placeholder" # TODO: ensure outterref, globalref work here @@ -246,7 +254,7 @@ function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) elseif needs_value ex else - if k == K"Identifier" + if k == K"BindingId" && !is_ssa(ctx, ex) emit(ctx, ex) # keep identifiers for undefined-var checking end nothing @@ -445,35 +453,30 @@ end # flisp: renumber-stuff function _renumber(ctx, ssa_rewrites, slot_rewrites, label_table, ex) k = kind(ex) - if k == K"Identifier" + if k == K"BindingId" id = ex.var_id - slot_id = get(slot_rewrites, id, nothing) - if !isnothing(slot_id) - makeleaf(ctx, ex, K"slot"; var_id=slot_id) + if haskey(ssa_rewrites, id) + makeleaf(ctx, ex, K"SSAValue"; var_id=ssa_rewrites[id]) else - # TODO: look up any static parameters - info = lookup_binding(ctx, id) - if info.kind === :global - makeleaf(ctx, ex, K"globalref", ex.name_val, mod=info.mod) + slot_id = get(slot_rewrites, id, nothing) + if !isnothing(slot_id) + makeleaf(ctx, ex, K"slot"; var_id=slot_id) else - TODO(ex, "Identifier which is not a slot or global?") + # TODO: look up any static parameters + # TODO: Should we defer rewriting globals to globalref until + # CodeInfo generation? + info = lookup_binding(ctx, id) + if info.kind === :global + makeleaf(ctx, ex, K"globalref", info.name, mod=info.mod) + else + TODO(ex, "Bindings of kind $(info.kind)") + end end end elseif k == K"outerref" || k == K"meta" TODO(ex, "_renumber $k") - elseif is_literal(k) || is_quoted(k) || k == K"global" + elseif is_literal(k) || is_quoted(k) ex - elseif k == K"BindingId" - # TODO: This case should replace K"Identifier" completely. For now only - # SSA variables go through here. Instead, we should branch on ssa_rewrites. - id = ex.var_id - if haskey(ssa_rewrites, id) - makeleaf(ctx, ex, K"SSAValue"; var_id=ssa_rewrites[id]) - else - slot_id = get(slot_rewrites, id, nothing) - @assert !isnothing(slot_id) - makeleaf(ctx, ex, K"slot"; var_id=slot_id) - end elseif k == K"enter" TODO(ex, "_renumber $k") elseif k == K"goto" diff --git a/JuliaLowering/src/macro_expansion.jl b/JuliaLowering/src/macro_expansion.jl index 216540eeef44b..82b2552063267 100644 --- a/JuliaLowering/src/macro_expansion.jl +++ b/JuliaLowering/src/macro_expansion.jl @@ -137,7 +137,7 @@ function eval_macro_name(ctx, ex) # Instead, we repeat the latter parts of `lower()` here. ex1 = expand_forms_1(ctx, ex) ctx2, ex2 = expand_forms_2(ctx, ex1) - ctx3, ex3 = resolve_scopes!(ctx2, ex2) + ctx3, ex3 = resolve_scopes(ctx2, ex2) ctx4, ex4 = linearize_ir(ctx3, ex3) mod = ctx.current_layer.mod expr_form = to_lowered_expr(mod, ex4.bindings, ex4) diff --git a/JuliaLowering/src/scope_analysis.jl b/JuliaLowering/src/scope_analysis.jl index bd7b42a988d50..ccf6ee909bca3 100644 --- a/JuliaLowering/src/scope_analysis.jl +++ b/JuliaLowering/src/scope_analysis.jl @@ -198,7 +198,7 @@ function init_binding(ctx, varkey::NameKey, kind::Symbol, is_ambiguous_local=fal end # Analyze identifier usage within a scope, adding all newly discovered -# identifiers to ctx.bindings and constructing a lookup table from identifier +# identifiers to ctx.bindings and returning a lookup table from identifier # names to their variable IDs function analyze_scope(ctx, ex, scope_type, lambda_info) parentscope = isempty(ctx.scope_stack) ? nothing : ctx.scope_stack[end] @@ -340,16 +340,18 @@ function analyze_scope(ctx, ex, scope_type, lambda_info) return ScopeInfo(in_toplevel_thunk, is_soft_scope, is_hard_scope, var_ids, lambda_locals) end -function _resolve_scopes!(ctx, ex) +function _resolve_scopes(ctx, ex::SyntaxTree) k = kind(ex) if k == K"Identifier" id = lookup_var(ctx, NameKey(ex)) - setattr!(ctx.graph, ex._id, var_id=id) + @ast ctx ex id::K"BindingId" elseif is_leaf(ex) || is_quoted(ex) || k == K"toplevel" - return + ex # TODO # elseif k == K"global" + # ex # elseif k == K"local" + # nothing_(ctx, ex) # elseif require_existing_local # elseif locals # return Dict of locals # elseif islocal @@ -357,44 +359,47 @@ function _resolve_scopes!(ctx, ex) lambda_info = ex.lambda_info scope = analyze_scope(ctx, ex, nothing, lambda_info) push!(ctx.scope_stack, scope) - # Resolve args and static parameters so that variable IDs get pushed - # back into the original tree (not required for downstream processing) - for a in lambda_info.args - _resolve_scopes!(ctx, a) - end - for a in lambda_info.static_parameters - _resolve_scopes!(ctx, a) - end - for e in children(ex) - _resolve_scopes!(ctx, e) - end + arg_bindings = _resolve_scopes(ctx, lambda_info.args) + sparm_bindings = _resolve_scopes(ctx, lambda_info.static_parameters) + body = _resolve_scopes(ctx, only(children(ex))) pop!(ctx.scope_stack) - setattr!(ctx.graph, ex._id, lambda_locals=scope.lambda_locals) + # TODO: add a lambda locals field to lambda_info or make a new struct + # containing the additional info ?? + new_info = LambdaInfo(arg_bindings, sparm_bindings, + lambda_info.ret_var, lambda_info.is_toplevel_thunk) + makenode(ctx, ex, K"lambda", body; lambda_info=new_info, lambda_locals=scope.lambda_locals) elseif k == K"scope_block" scope = analyze_scope(ctx, ex, ex.scope_type, nothing) push!(ctx.scope_stack, scope) + body = SyntaxList(ctx) for e in children(ex) - _resolve_scopes!(ctx, e) + push!(body, _resolve_scopes(ctx, e)) end + body pop!(ctx.scope_stack) + @ast ctx ex [K"block" body...] else - for e in children(ex) - _resolve_scopes!(ctx, e) - end + mapchildren(e->_resolve_scopes(ctx, e), ctx, ex) + end +end + +function _resolve_scopes(ctx, exs::AbstractVector) + out = SyntaxList(ctx) + for e in exs + push!(out, _resolve_scopes(ctx, e)) end - ex + out end -function resolve_scopes!(ctx::ScopeResolutionContext, ex) +function resolve_scopes(ctx::ScopeResolutionContext, ex) thunk = makenode(ctx, ex, K"lambda", ex; lambda_info=LambdaInfo(SyntaxList(ctx), SyntaxList(ctx), nothing, true)) - _resolve_scopes!(ctx, thunk) - return thunk + return _resolve_scopes(ctx, thunk) end -function resolve_scopes!(ctx::DesugaringContext, ex) +function resolve_scopes(ctx::DesugaringContext, ex) ctx2 = ScopeResolutionContext(ctx) - res = resolve_scopes!(ctx2, reparent(ctx2, ex)) + res = resolve_scopes(ctx2, reparent(ctx2, ex)) ctx2, res end diff --git a/JuliaLowering/src/syntax_graph.jl b/JuliaLowering/src/syntax_graph.jl index c3b3fb063a872..3fc69af928f75 100644 --- a/JuliaLowering/src/syntax_graph.jl +++ b/JuliaLowering/src/syntax_graph.jl @@ -179,6 +179,12 @@ function check_same_graph(x, y) end end +function check_compatible_graph(x, y) + if !is_compatible_graph(x, y) + error("Incompatible syntax graphs") + end +end + function is_compatible_graph(x, y) syntax_graph(x).edges === syntax_graph(y).edges end diff --git a/JuliaLowering/test/demo.jl b/JuliaLowering/test/demo.jl index 632559d4064cb..0396d127718f2 100644 --- a/JuliaLowering/test/demo.jl +++ b/JuliaLowering/test/demo.jl @@ -29,13 +29,14 @@ function formatsrc(ex; kws...) Text(JuliaSyntaxFormatter.formatsrc(ex; kws...)) end -function annotate_scopes(mod, ex) - ex = ensure_attributes(ex, var_id=Int) - ctx1, ex_macroexpand = JuliaLowering.expand_forms_1(mod, ex) - ctx2, ex_desugar = JuliaLowering.expand_forms_2(ctx1, ex_macroexpand) - ctx3, ex_scoped = JuliaLowering.resolve_scopes!(ctx2, ex_desugar) - ex -end +# Currently broken - need to push info back onto src +# function annotate_scopes(mod, ex) +# ex = ensure_attributes(ex, var_id=Int) +# ctx1, ex_macroexpand = JuliaLowering.expand_forms_1(mod, ex) +# ctx2, ex_desugar = JuliaLowering.expand_forms_2(ctx1, ex_macroexpand) +# ctx3, ex_scoped = JuliaLowering.resolve_scopes(ctx2, ex_desugar) +# ex +# end #------------------------------------------------------------------------------- # Demos of the prototype @@ -158,13 +159,6 @@ src = """ M.@recursive 3 """ -# src = """ -# begin -# M.@set_a_global 1000 -# M.a_global -# end -# """ - # src = """ # M.@set_global_in_parent "bent hygiene!" # """ @@ -251,19 +245,59 @@ end src = """ for i in [3,1,2] - println("i = ", i) + println("i = ", i, ", j = ", j) end """ +# src = """ +# @ccall f()::T +# """ +# +# src = """ +# begin +# a = 1 +# xs = [:(a),] +# x = :(:(\$(\$(xs...)))) +# end +# """ + +# src = """ +# try +# a +# catch exc +# b +# end +# """ + src = """ -@ccall f()::T +let + a = [] + for i = 1:2, j = 3:4 + push!(a, (i,j)) + i = 100 + end + a +end """ src = """ begin - a = 1 - xs = [:(a),] - x = :(:(\$(\$(xs...)))) + function f(x) + y = x + 1 + "hi", x, y + end + + f(1) +end +""" + +src = """ +begin + local a, b, c + if a + b + end + c end """ @@ -280,7 +314,7 @@ ctx1, ex_macroexpand = JuliaLowering.expand_forms_1(in_mod, ex) ctx2, ex_desugar = JuliaLowering.expand_forms_2(ctx1, ex_macroexpand) @info "Desugared" ex_desugar formatsrc(ex_desugar, color_by=:scope_layer) -ctx3, ex_scoped = JuliaLowering.resolve_scopes!(ctx2, ex_desugar) +ctx3, ex_scoped = JuliaLowering.resolve_scopes(ctx2, ex_desugar) @info "Resolved scopes" ex_scoped formatsrc(ex_scoped, color_by=:var_id) ctx4, ex_compiled = JuliaLowering.linearize_ir(ctx3, ex_scoped) From afc9363d9cf55d9ee200bb998a1e12022e2cffbf Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sun, 11 Aug 2024 12:49:49 +1000 Subject: [PATCH 0828/1109] Simplify var_id formatting --- JuliaLowering/src/syntax_graph.jl | 2 +- JuliaLowering/test/branching_ir.jl | 62 +++++++++++++++--------------- JuliaLowering/test/loops_ir.jl | 50 ++++++++++++++++++++++++ 3 files changed, 82 insertions(+), 32 deletions(-) create mode 100644 JuliaLowering/test/loops_ir.jl diff --git a/JuliaLowering/src/syntax_graph.jl b/JuliaLowering/src/syntax_graph.jl index 3fc69af928f75..c8278373a2fb9 100644 --- a/JuliaLowering/src/syntax_graph.jl +++ b/JuliaLowering/src/syntax_graph.jl @@ -425,7 +425,7 @@ function _value_string(ex) idstr = replace(string(id), "0"=>"₀", "1"=>"₁", "2"=>"₂", "3"=>"₃", "4"=>"₄", "5"=>"₅", "6"=>"₆", "7"=>"₇", "8"=>"₈", "9"=>"₉") - str = "$(str).$idstr" + str = "$(str)$idstr" end if k == K"slot" || k == K"BindingId" p = provenance(ex)[1] diff --git a/JuliaLowering/test/branching_ir.jl b/JuliaLowering/test/branching_ir.jl index b9824fc27d0a7..a4f6a54a0115e 100644 --- a/JuliaLowering/test/branching_ir.jl +++ b/JuliaLowering/test/branching_ir.jl @@ -7,12 +7,12 @@ begin end end #------------------------- -1 slot.₁/a -2 (gotoifnot %.₁ label.₅) -3 slot.₂/b -4 (return %.₃) +1 slot₁/a +2 (gotoifnot %₁ label₅) +3 slot₂/b +4 (return %₃) 5 core.nothing -6 (return %.₅) +6 (return %₅) ###################################### # Branching, !tail && !value @@ -24,11 +24,11 @@ begin c end #------------------------- -1 slot.₁/a -2 (gotoifnot %.₁ label.₄) -3 slot.₂/b -4 slot.₃/c -5 (return %.₄) +1 slot₁/a +2 (gotoifnot %₁ label₄) +3 slot₂/b +4 slot₃/c +5 (return %₄) ###################################### # Branching with else @@ -41,12 +41,12 @@ begin end end #--------------------- -1 slot.₁/a -2 (gotoifnot %.₁ label.₅) -3 slot.₂/b -4 (return %.₃) -5 slot.₃/c -6 (return %.₅) +1 slot₁/a +2 (gotoifnot %₁ label₅) +3 slot₂/b +4 (return %₃) +5 slot₃/c +6 (return %₅) ###################################### # Branching with else, !tail && !value @@ -60,13 +60,13 @@ begin d end #--------------------- -1 slot.₁/a -2 (gotoifnot %.₁ label.₅) -3 slot.₂/b -4 (goto label.₆) -5 slot.₃/c -6 slot.₄/d -7 (return %.₆) +1 slot₁/a +2 (gotoifnot %₁ label₅) +3 slot₂/b +4 (goto label₆) +5 slot₃/c +6 slot₄/d +7 (return %₆) ###################################### # Blocks compile directly to branches @@ -77,12 +77,12 @@ begin end end #--------------------- -1 slot.₁/a -2 slot.₂/b -3 (gotoifnot %.₂ label.₈) -4 slot.₃/c -5 (gotoifnot %.₄ label.₈) -6 slot.₄/d -7 (return %.₆) +1 slot₁/a +2 slot₂/b +3 (gotoifnot %₂ label₈) +4 slot₃/c +5 (gotoifnot %₄ label₈) +6 slot₄/d +7 (return %₆) 8 core.nothing -9 (return %.₈) +9 (return %₈) diff --git a/JuliaLowering/test/loops_ir.jl b/JuliaLowering/test/loops_ir.jl new file mode 100644 index 0000000000000..d1149086401af --- /dev/null +++ b/JuliaLowering/test/loops_ir.jl @@ -0,0 +1,50 @@ +######################################## +# Basic while loop +while f(a) + body1 + body2 +end +#---------- +1 TestMod.f +2 TestMod.a +3 (call %₁ %₂) +4 (gotoifnot %₃ label₈) +5 TestMod.body1 +6 TestMod.body2 +7 (goto label₁) +8 core.nothing +9 (return %₈) +######################################## +# While loop with short circuit condition +while a && b + body +end +#---------- +1 TestMod.a +2 (gotoifnot %₁ label₇) +3 TestMod.b +4 (gotoifnot %₃ label₇) +5 TestMod.body +6 (goto label₁) +7 core.nothing +8 (return %₇) +######################################## +# While loop with with break and continue +while cond + body1 + break + body2 + continue + body3 +end +#--------- +1 TestMod.cond +2 (gotoifnot %₁ label₉) +3 TestMod.body1 +4 (goto label₉) +5 TestMod.body2 +6 (goto label₈) +7 TestMod.body3 +8 (goto label₁) +9 core.nothing +10 (return %₉) From 474bd7df8c6c7173be98b6604c1185652678d04d Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sun, 11 Aug 2024 12:50:00 +1000 Subject: [PATCH 0829/1109] Multidimensional for loop lowering + `outer` var checks --- JuliaLowering/src/ast.jl | 101 +++++++------ JuliaLowering/src/desugaring.jl | 138 +++++++++++++----- JuliaLowering/src/kinds.jl | 7 +- JuliaLowering/src/linear_ir.jl | 6 + JuliaLowering/src/scope_analysis.jl | 28 +++- JuliaLowering/test/loops.jl | 216 +++++++++++++++++++++++++++- 6 files changed, 398 insertions(+), 98 deletions(-) diff --git a/JuliaLowering/src/ast.jl b/JuliaLowering/src/ast.jl index 7b457d9eb9af3..bb7746ccecd63 100644 --- a/JuliaLowering/src/ast.jl +++ b/JuliaLowering/src/ast.jl @@ -57,6 +57,53 @@ function syntax_graph(ctx::AbstractLoweringContext) ctx.graph end +#------------------------------------------------------------------------------- +# @chk: Basic AST structure checking tool +# +# Check a condition involving an expression, throwing a LoweringError if it +# doesn't evaluate to true. Does some very simple pattern matching to attempt +# to extract the expression variable from the left hand side. +# +# Forms: +# @chk pred(ex) +# @chk pred(ex) msg +# @chk pred(ex) (msg_display_ex, msg) +macro chk(cond, msg=nothing) + if Meta.isexpr(msg, :tuple) + ex = msg.args[1] + msg = msg.args[2] + else + ex = cond + while true + if ex isa Symbol + break + elseif ex.head == :call + ex = ex.args[2] + elseif ex.head == :ref + ex = ex.args[1] + elseif ex.head == :. + ex = ex.args[1] + elseif ex.head in (:(==), :(in), :<, :>) + ex = ex.args[1] + else + error("Can't analyze $cond") + end + end + end + quote + ex = $(esc(ex)) + @assert ex isa SyntaxTree + ok = try + $(esc(cond)) + catch + false + end + if !ok + throw(LoweringError(ex, $(isnothing(msg) ? "expected `$cond`" : esc(msg)))) + end + end +end + #------------------------------------------------------------------------------- # AST creation utilities _node_id(ex::NodeId) = ex @@ -400,6 +447,13 @@ function is_quoted(ex) meta inbounds inline noinline loopinfo" end +function is_assertion(ex, type) + kind(ex) == K"assert" || return false + @chk numchildren(ex) >= 1 + @chk kind(ex[1]) == K"Symbol" + return ex[1].name_val == type +end + function is_sym_decl(x) k = kind(x) k == K"Identifier" || k == K"::" @@ -453,50 +507,3 @@ function assigned_name(ex) end end -#------------------------------------------------------------------------------- -# @chk: Basic AST structure checking tool -# -# Check a condition involving an expression, throwing a LoweringError if it -# doesn't evaluate to true. Does some very simple pattern matching to attempt -# to extract the expression variable from the left hand side. -# -# Forms: -# @chk pred(ex) -# @chk pred(ex) msg -# @chk pred(ex) (msg_display_ex, msg) -macro chk(cond, msg=nothing) - if Meta.isexpr(msg, :tuple) - ex = msg.args[1] - msg = msg.args[2] - else - ex = cond - while true - if ex isa Symbol - break - elseif ex.head == :call - ex = ex.args[2] - elseif ex.head == :ref - ex = ex.args[1] - elseif ex.head == :. - ex = ex.args[1] - elseif ex.head in (:(==), :(in), :<, :>) - ex = ex.args[1] - else - error("Can't analyze $cond") - end - end - end - quote - ex = $(esc(ex)) - @assert ex isa SyntaxTree - ok = try - $(esc(cond)) - catch - false - end - if !ok - throw(LoweringError(ex, $(isnothing(msg) ? "expected `$cond`" : esc(msg)))) - end - end -end - diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 3e76dd863a5e1..445ced2596182 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -228,57 +228,118 @@ function expand_dot(ctx, ex) ) end +function foreach_lhs_var(f::Function, ex) + k = kind(ex) + if k == K"Identifier" + f(ex) + elseif k == K"Placeholder" + # Ignored + else + TODO(ex, "LHS vars") + end +end + function expand_for(ctx, ex) iterspecs = ex[1] @chk kind(iterspecs) == K"iteration" - @chk numchildren(iterspecs) == 1 - iterspec = iterspecs[1] - iter_var = iterspec[1] - iter_ex = iterspec[2] + # Loop variables not declared `outer` are reassigned for each iteration of + # the innermost loop in case the user assigns them to something else. + # (Maybe we should filter these to remove vars not assigned in the loop? + # But that would ideally happen after the variable analysis pass, not + # during desugaring.) + copied_vars = SyntaxList(ctx) + for iterspec in iterspecs[1:end-1] + @chk kind(iterspec) == K"in" + lhs = iterspec[1] + if kind(lhs) != K"outer" + foreach_lhs_var(lhs) do var + @chk kind(var) == K"Identifier" + push!(copied_vars, @ast ctx var [K"=" var var]) + end + end + end + + loop = ex[2] + for i in numchildren(iterspecs):-1:1 + iterspec = iterspecs[i] + lhs = iterspec[1] - # TODO: multiple iteration variables - @assert is_identifier_like(iter_var) + outer = kind(lhs) == K"outer" + lhs_local_defs = SyntaxList(ctx) + lhs_outer_defs = SyntaxList(ctx) + if outer + lhs = lhs[1] + end + foreach_lhs_var(lhs) do var + if outer + push!(lhs_outer_defs, @ast ctx var var) + else + push!(lhs_local_defs, @ast ctx var [K"local" var]) + end + end - next = new_mutable_var(ctx, iterspec, "next") - state = ssavar(ctx, iterspec, "state") - collection = ssavar(ctx, iter_ex, "collection") + iter_ex = iterspec[2] + next = new_mutable_var(ctx, iterspec, "next") + state = ssavar(ctx, iterspec, "state") + collection = ssavar(ctx, iter_ex, "collection") - # Inner body - inner_body = ex[2] - body = @ast ctx inner_body [K"block" - [K"=" [K"tuple" iter_var state] next] - inner_body - ] - body = @ast ctx inner_body [K"break_block" - "loop_cont"::K"symbolic_label" - [K"let"(scope_type=:neutral) - [K"block" - # TODO: copied-vars - ] - body + # Assign iteration vars and next state + body = @ast ctx iterspec [K"block" + lhs_local_defs... + [K"=" [K"tuple" lhs state] next] + loop ] - ] - # Nearly all this machinery is lowering of the iteration specification, so - # most gets attributed to `iterspec`. - loop = @ast ctx ex [K"block" - [K"="(iter_ex) collection iter_ex] - # next = top.iterate(collection) - [K"="(iterspec) next [K"call" "iterate"::K"top" collection]] - # TODO if outer require-existing-local - [K"if"(iterspec) # if next !== nothing - [K"call"(iterspec) "not_int"::K"top" [K"call" "==="::K"core" next "nothing"::K"core"]] - [K"_do_while"(ex) - [K"block" - body - [K"="(iterspec) next [K"call" "iterate"::K"top" collection state]] + body = if i == numchildren(iterspecs) + # Innermost loop gets the continue label and copied vars + @ast ctx ex [K"break_block" + "loop_cont"::K"symbolic_label" + [K"let"(scope_type=:neutral) + [K"block" + copied_vars... + ] + body + ] + ] + else + # Outer loops get a scope block to contain the iteration vars + @ast ctx ex [K"scope_block"(scope_type=:neutral) + body + ] + end + + loop = @ast ctx ex [K"block" + if outer + [K"assert" + "require_existing_locals"::K"Symbol" + lhs_outer_defs... + ] + end + [K"="(iter_ex) collection iter_ex] + # First call to iterate is unrolled + # next = top.iterate(collection) + [K"="(iterspec) next [K"call" "iterate"::K"top" collection]] + [K"if"(iterspec) # if next !== nothing + [K"call"(iterspec) + "not_int"::K"top" + [K"call" "==="::K"core" next "nothing"::K"core"] + ] + [K"_do_while"(ex) + [K"block" + body + # Advance iterator + [K"="(iterspec) next [K"call" "iterate"::K"top" collection state]] + ] + [K"call"(iterspec) + "not_int"::K"top" + [K"call" "==="::K"core" next "nothing"::K"core"] + ] ] - [K"call"(iterspec) "not_int"::K"top" [K"call" "==="::K"core" next "nothing"::K"core"]] ] ] - ] + end @ast ctx ex [K"break_block" "loop_exit"::K"symbolic_label" loop @@ -344,7 +405,6 @@ function expand_try(ctx, ex) ] end - # Add finally block if isnothing(finally_) try_block else diff --git a/JuliaLowering/src/kinds.jl b/JuliaLowering/src/kinds.jl index 3fdfbb962d4a6..b91fb8ba4196e 100644 --- a/JuliaLowering/src/kinds.jl +++ b/JuliaLowering/src/kinds.jl @@ -7,6 +7,9 @@ function _register_kinds() # Compiler metadata hints "meta" "extension" + # Semantic assertions used by lowering. The content of an assertion + # is not considered to be quoted, so use K"Symbol" inside where necessary. + "assert" # A literal Julia value of any kind, as might be inserted by the AST # during macro expansion "Value" @@ -21,9 +24,6 @@ function _register_kinds() "SSAValue" # Unique identifying integer for bindings (of variables, constants, etc) "BindingId" - # Scope expressions `(hygienic_scope ex s)` mean `ex` should be - # interpreted as being in scope `s`. - "hygienic_scope" # Various heads harvested from flisp lowering. # (TODO: May or may not need all these - assess later) "break_block" @@ -35,7 +35,6 @@ function _register_kinds() "top" "core" "toplevel_butfirst" - "thunk" "lambda" "moved_local" "the_exception" diff --git a/JuliaLowering/src/linear_ir.jl b/JuliaLowering/src/linear_ir.jl index e2479843aca6b..9387617699edf 100644 --- a/JuliaLowering/src/linear_ir.jl +++ b/JuliaLowering/src/linear_ir.jl @@ -259,6 +259,12 @@ function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) end nothing end + elseif k == K"assert" + # Elide these - they're no longer required. + if needs_value + throw(LoweringError(ex, "misplaced semantic assertion")) + end + nothing elseif k == K"call" # TODO k ∈ splatnew foreigncall cfunction new_opaque_closure cglobal args = compile_args(ctx, children(ex)) diff --git a/JuliaLowering/src/scope_analysis.jl b/JuliaLowering/src/scope_analysis.jl index ccf6ee909bca3..161f0e730e4d3 100644 --- a/JuliaLowering/src/scope_analysis.jl +++ b/JuliaLowering/src/scope_analysis.jl @@ -56,7 +56,7 @@ struct NameKey end #------------------------------------------------------------------------------- -function _find_scope_vars!(assignments, locals, globals, used_names, used_bindings, ex) +function _find_scope_vars!(assignments, locals, globals, used_names, used_bindings, required_locals, ex) k = kind(ex) if k == K"Identifier" push!(used_names, NameKey(ex)) @@ -69,16 +69,20 @@ function _find_scope_vars!(assignments, locals, globals, used_names, used_bindin get!(locals, NameKey(ex[1]), ex) elseif k == K"global" get!(globals, NameKey(ex[1]), ex) + elseif is_assertion(ex, "require_existing_locals") + for v in ex[2:end] + get!(required_locals, NameKey(v), v) + end # elseif k == K"method" TODO static parameters elseif k == K"=" v = decl_var(ex[1]) if !(kind(v) in KSet"BindingId globalref outerref Placeholder") get!(assignments, NameKey(v), v) end - _find_scope_vars!(assignments, locals, globals, used_names, used_bindings, ex[2]) + _find_scope_vars!(assignments, locals, globals, used_names, used_bindings, required_locals, ex[2]) else for e in children(ex) - _find_scope_vars!(assignments, locals, globals, used_names, used_bindings, e) + _find_scope_vars!(assignments, locals, globals, used_names, used_bindings, required_locals, e) end end end @@ -94,8 +98,9 @@ function find_scope_vars(ex) globals = Dict{NameKey,ExT}() used_names = Set{NameKey}() used_bindings = Set{IdTag}() + required_locals = Dict{NameKey,ExT}() for e in children(ex) - _find_scope_vars!(assignments, locals, globals, used_names, used_bindings, e) + _find_scope_vars!(assignments, locals, globals, used_names, used_bindings, required_locals, e) end # Sort by key so that id generation is deterministic @@ -104,8 +109,9 @@ function find_scope_vars(ex) globals = sort(collect(pairs(globals)), by=first) used_names = sort(collect(used_names)) used_bindings = sort(collect(used_bindings)) + required_locals = sort(collect(pairs(required_locals)), by=first) - return assignments, locals, globals, used_names, used_bindings + return assignments, locals, globals, used_names, used_bindings, required_locals end function Base.isless(a::NameKey, b::NameKey) @@ -206,7 +212,7 @@ function analyze_scope(ctx, ex, scope_type, lambda_info) is_toplevel = !isnothing(lambda_info) && lambda_info.is_toplevel_thunk in_toplevel_thunk = is_toplevel || (!is_outer_lambda_scope && parentscope.in_toplevel_thunk) - assignments, locals, globals, used, used_bindings = find_scope_vars(ex) + assignments, locals, globals, used, used_bindings, required_locals = find_scope_vars(ex) # Create new lookup table for variables in this scope which differ from the # parent scope. @@ -323,6 +329,16 @@ function analyze_scope(ctx, ex, scope_type, lambda_info) end end + # Check that any required locals are present + for (varkey,e) in required_locals + vk = haskey(var_ids, varkey) ? + lookup_binding(ctx, var_ids[varkey]).kind : + var_kind(ctx, varkey, true) + if vk !== :local + throw(LoweringError(e, "`outer` annotations must match with a local variable in an outer scope but no such variable was found")) + end + end + lambda_locals = is_outer_lambda_scope ? Set{IdTag}() : parentscope.lambda_locals for id in values(var_ids) vk = var_kind(ctx, id) diff --git a/JuliaLowering/test/loops.jl b/JuliaLowering/test/loops.jl index a5a9c0fb0d93a..b63d1a1a7a840 100644 --- a/JuliaLowering/test/loops.jl +++ b/JuliaLowering/test/loops.jl @@ -1,5 +1,5 @@ -@testset "loops" begin +@testset "while loops" begin test_mod = Module() @@ -55,6 +55,218 @@ continue test_ir_cases(joinpath(@__DIR__, "loops_ir.jl")) -# TODO: Test scope rules +# TODO: Test soft scope rules + +end + + +@testset "for loops" begin + +test_mod = Module() + +# iteration +@test JuliaLowering.include_string(test_mod, """ +let + a = [] + for i = 1:3 + push!(a, i) + end + a +end +""") == [1,2,3] + +@test JuliaLowering.include_string(test_mod, """ +let + a = [] + for i = 1:0 + push!(a, i) + end + a +end +""") == [] + +@test JuliaLowering.include_string(test_mod, """ +let + a = [] + for _ = 1:3 + push!(a, 1) + end + a +end +""") == [1, 1, 1] + +# break +@test JuliaLowering.include_string(test_mod, """ +let + a = [] + for i = 1:6 + if i == 3 + break + end + push!(a, i) + end + a +end +""") == [1, 2] + +# continue +@test JuliaLowering.include_string(test_mod, """ +let + a = [] + for i = 1:6 + if isodd(i) + continue + end + push!(a, i) + end + a +end +""") == [2, 4, 6] + +# Loop variable scope +@test JuliaLowering.include_string(test_mod, """ +let + a = [] + for i = 1:3 + push!(a, i) + i = 100 + end + a +end +""") == [1,2,3] + +@test JuliaLowering.include_string(test_mod, """ +let + i = 100 + for i = 1:3 + end + i +end +""") == 100 + +@test JuliaLowering.include_string(test_mod, """ +let + i = 100 + for outer i = 1:2 + nothing + end + i +end +""") == 2 + +@test_throws LoweringError JuliaLowering.include_string(test_mod, """ +let + for outer i = 1:2 + nothing + end + i +end +""") + +end + + +@testset "multidimensional for loops" begin + +test_mod = Module() + +@test JuliaLowering.include_string(test_mod, """ +let + a = [] + for i = 1:2, j = 3:4 + push!(a, (i,j)) + end + a +end +""") == [(1,3), (1,4), (2,3), (2,4)] + +@testset "break/continue" begin +@test JuliaLowering.include_string(test_mod, """ +let + a = [] + for i = 1:2, j = 3:4 + push!(a, (i,j)) + break + end + a +end +""") == [(1,3)] + +@test JuliaLowering.include_string(test_mod, """ +let + a = [] + for i = 1:4, j = 3:4 + if isodd(i) + continue + end + push!(a, (i,j)) + end + a +end +""") == [(2,3), (2,4), (4,3), (4,4)] + +@test JuliaLowering.include_string(test_mod, """ +let + a = [] + for i = 1:2, j = 1:4 + if isodd(j) + continue + end + push!(a, (i,j)) + end + a +end +""") == [(1,2), (1,4), (2,2), (2,4)] +end + + +@testset "Loop variable scope" begin + +# Test that `i` is copied in the inner loop +@test JuliaLowering.include_string(test_mod, """ +let + a = [] + for i = 1:2, j = 3:4 + push!(a, (i,j)) + i = 100 + end + a +end +""") == [(1,3), (1,4), (2,3), (2,4)] + +@test JuliaLowering.include_string(test_mod, """ +let + i = 100 + j = 200 + for i = 1:2, j = 3:4 + nothing + end + (i,j) +end +""") == (100,200) + +@test JuliaLowering.include_string(test_mod, """ +let + i = 100 + j = 200 + for outer i = 1:2, j = 3:4 + nothing + end + (i,j) +end +""") == (2,200) + +@test JuliaLowering.include_string(test_mod, """ +let + i = 100 + j = 200 + for i = 1:2, outer j = 3:4 + nothing + end + (i,j) +end +""") == (100,4) + +end end From 56378b62922d7da984a578c93c062a0e817cabd6 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sun, 11 Aug 2024 15:01:29 +1000 Subject: [PATCH 0830/1109] Add demo `@ccall` implementation This was a demo for JuliaCon but it's actually pretty close to functional so we shouldn't loose it. --- JuliaLowering/test/ccall_demo.jl | 129 +++++++++++++++++++++++++++++++ 1 file changed, 129 insertions(+) create mode 100644 JuliaLowering/test/ccall_demo.jl diff --git a/JuliaLowering/test/ccall_demo.jl b/JuliaLowering/test/ccall_demo.jl new file mode 100644 index 0000000000000..62c859a7e4d56 --- /dev/null +++ b/JuliaLowering/test/ccall_demo.jl @@ -0,0 +1,129 @@ +module CCall + +using JuliaSyntax, JuliaLowering +using JuliaLowering: is_identifier_like, numchildren, children, MacroExpansionError, @ast, SyntaxTree + +# Hacky utils +# macro K_str(str) +# convert(JuliaSyntax.Kind, str[1].value) +# end +# +# # Needed because we can't lower kwarg calls yet ehehe :-/ +# function mac_ex_error(ex, msg, pos) +# kwargs = Core.apply_type(Core.NamedTuple, (:position,))((pos,)) +# Core.kwcall(kwargs, MacroExpansionError, ex, msg) +# end + +macro ast_str(str) + ex = parsestmt(JuliaLowering.SyntaxTree, str, filename=string(__source__.file)) + ctx1, ex1 = JuliaLowering.expand_forms_1(__module__, ex) + @assert kind(ex1) == K"call" && ex1[1].value === JuliaLowering.interpolate_ast + cs = map(e->esc(Expr(e)), ex1[3:end]) + :(JuliaLowering.interpolate_ast($(ex1[2][1]), $(cs...))) +end + +function ccall_macro_parse(ex) + if kind(ex) != K"::" + throw(MacroExpansionError(ex, "Expected a return type annotation like `::T`", position=:end)) + end + + rettype = ex[2] + call = ex[1] + if kind(call) != K"call" + throw(MacroExpansionError(call, "Expected function call syntax `f()`")) + end + + # get the function symbols + func = let f = call[1], kf = kind(f) + if kf == K"." + @ast ex ex [K"tuple" f[2]=>K"Symbol" f[1]] + elseif kf == K"$" + f + elseif kf == K"Identifier" + @ast ex ex f=>K"Symbol" + else + throw(MacroExpansionError(f, + "Function name must be a symbol like `foo`, a library and function name like `libc.printf` or an interpolated function pointer like `\$ptr`")) + end + end + + varargs = nothing + + # collect args and types + args = SyntaxTree[] + types = SyntaxTree[] + + function pusharg!(arg) + if kind(arg) != K"::" + throw(MacroExpansionError(arg, "argument needs a type annotation like `::T`")) + end + push!(args, arg[1]) + push!(types, arg[2]) + end + + varargs = nothing + num_varargs = 0 + for e in call[2:end] + if kind(e) == K"parameters" + num_varargs == 0 || throw(MacroExpansionError(e, "Multiple parameter blocks not allowed")) + num_varargs = numchildren(e) + num_varargs > 0 || throw(MacroExpansionError(e, "C ABI prohibits vararg without one required argument")) + varargs = children(e) + else + pusharg!(e) + end + end + if !isnothing(varargs) + for e in varargs + pusharg!(e) + end + end + + return func, rettype, types, args, num_varargs +end + +function ccall_macro_lower(ex, convention, func, rettype, types, args, num_varargs) + statements = SyntaxTree[] + if kind(func) == K"$" + check = ast"""quote + func = $(func[1]) + if !isa(func, Ptr{Cvoid}) + name = :($(func[1])) + throw(ArgumentError("interpolated function `$name` was not a `Ptr{Cvoid}`, but $(typeof(func))")) + end + end""" + func = check[1][1] + push!(statements, check) + end + + roots = SyntaxTree[] + cargs = SyntaxTree[] + for (i, (type, arg)) in enumerate(zip(types, args)) + argi = @ast ex arg "arg$i"::K"Identifier" + # TODO: Is there any safe way to use SSAValue here? + push!(statements, ast":(local $argi = Base.cconvert($type, $arg))") + push!(roots, argi) + push!(cargs, ast":(Base.unsafe_convert($type, $argi))") + end + push!(statements, + @ast ex ex [K"foreigncall" + func + rettype + ast":(Core.svec($(types...)))" + num_varargs::K"Integer" + convention::K"Symbol" + cargs... + roots... + ]) + + @ast ex ex [K"block" + statements... + ] +end + +function var"@ccall"(ctx::JuliaLowering.MacroContext, ex) + ccall_macro_lower(ex, "ccall", ccall_macro_parse(ex)...) +end + +end + From 83feda4460f108187baa917c17a0927cf2dedaae Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sun, 11 Aug 2024 15:16:36 +1000 Subject: [PATCH 0831/1109] Docs for identifier->binding transform done by scope analysis --- JuliaLowering/README.md | 3 ++- JuliaLowering/src/scope_analysis.jl | 27 +++++++++++++++------------ 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/JuliaLowering/README.md b/JuliaLowering/README.md index 4071810602d34..445624324e443 100644 --- a/JuliaLowering/README.md +++ b/JuliaLowering/README.md @@ -44,7 +44,8 @@ Lowering has five symbolic simplification passes: number of syntactic forms. 3. Scope analysis - analyzing identifier names used in the code to discover local variables, closure captures, and associate global variables to the - appropriate module. + appropriate module. Transform all names (kind `K"Identifier"`) into binding + IDs (kind `K"BindingId"`) which can be looked up in a table of bindings. 4. Closure conversion - convert closures to types and deal with captured variables efficiently where possible. 5. Flattening to linear IR - convert code in hierarchical tree form to a diff --git a/JuliaLowering/src/scope_analysis.jl b/JuliaLowering/src/scope_analysis.jl index 161f0e730e4d3..a2587782ae1a9 100644 --- a/JuliaLowering/src/scope_analysis.jl +++ b/JuliaLowering/src/scope_analysis.jl @@ -1,15 +1,4 @@ -# Lowering pass 3: analyze scopes (passes 2/3 in flisp code) -# -# This pass analyzes the names (variables/constants etc) used in scopes -# -# This pass records information about variables used by closure conversion. -# finds which variables are assigned or captured, and records variable -# type declarations. -# -# This info is recorded by setting the second argument of `lambda` expressions -# in-place to -# (var-info-lst captured-var-infos ssavalues static_params) -# where var-info-lst is a list of var-info records +# Lowering pass 3: analyze scopes (passes 2+3 in flisp code) #------------------------------------------------------------------------------- # AST traversal functions - useful for performing non-recursive AST traversals @@ -413,6 +402,20 @@ function resolve_scopes(ctx::ScopeResolutionContext, ex) return _resolve_scopes(ctx, thunk) end +""" +This pass analyzes scopes and the names (locals/globals etc) used within them. + +Names of kind `K"Identifier"` are transformed into binding identifiers of +kind `K"BindingId"`. The associated `Bindings` table in the context records +metadata about each binding. + +This pass also records the set of binding IDs are locals within the enclosing +lambda form. + +TODO: This pass should also record information about variables used by closure +conversion, find which variables are assigned or captured, and record variable +type declarations. +""" function resolve_scopes(ctx::DesugaringContext, ex) ctx2 = ScopeResolutionContext(ctx) res = resolve_scopes(ctx2, reparent(ctx2, ex)) From 1bd5fe775a2f4028294224b1229fa1de2cf643b2 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Wed, 14 Aug 2024 07:19:11 +1000 Subject: [PATCH 0832/1109] Consistent attachment of syntax trivia to top level statements (JuliaLang/JuliaSyntax.jl#495) Fix JuliaLang/JuliaSyntax.jl#494 --- JuliaSyntax/src/parser.jl | 1 + JuliaSyntax/test/expr.jl | 15 +++++++++++++++ 2 files changed, 16 insertions(+) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index debd5028863c0..1f63b39625662 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -462,6 +462,7 @@ function parse_toplevel(ps::ParseState) bump_trivia(ps) break else + bump_trivia(ps) parse_stmts(ps) end end diff --git a/JuliaSyntax/test/expr.jl b/JuliaSyntax/test/expr.jl index 272a5b9a10e8d..cf9b881d07f98 100644 --- a/JuliaSyntax/test/expr.jl +++ b/JuliaSyntax/test/expr.jl @@ -65,6 +65,21 @@ :body, ), ) + + @test parseall("a\n\nx") == + Expr(:toplevel, + LineNumberNode(1), + :a, + LineNumberNode(3), + :x + ) + @test parseall("a\n\nx;y") == + Expr(:toplevel, + LineNumberNode(1), + :a, + LineNumberNode(3), + Expr(:toplevel, :x, :y) + ) end @testset "Function definition lines" begin From 1106cc4881967b2a1644b812afb68fc792c7e95e Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Thu, 15 Aug 2024 21:31:05 +1000 Subject: [PATCH 0833/1109] WIP: Linear IR for try-catch-else-end --- JuliaLowering/src/desugaring.jl | 6 +- JuliaLowering/src/eval.jl | 8 ++ JuliaLowering/src/kinds.jl | 1 + JuliaLowering/src/linear_ir.jl | 164 ++++++++++++++++++++++++++++---- 4 files changed, 160 insertions(+), 19 deletions(-) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 445ced2596182..a8783155fb228 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -393,10 +393,10 @@ function expand_try(ctx, ex) if kind(exc_var) != K"Placeholder" [K"block" [K"="(exc_var) exc_var [K"the_exception"]] - catch_ + catch_block ] else - catch_ + catch_block end ] if !isnothing(else_) @@ -900,7 +900,7 @@ function expand_forms_2(ctx::DesugaringContext, ex::SyntaxTree, docs=nothing) ] end elseif k == K"try" - expand_try(ctx, ex) + expand_forms_2(ctx, expand_try(ctx, ex)) elseif k == K"tuple" # TODO: named tuples expand_forms_2(ctx, @ast ctx ex [K"call" diff --git a/JuliaLowering/src/eval.jl b/JuliaLowering/src/eval.jl index 992ca67a3e9fe..43f0b7400bba6 100644 --- a/JuliaLowering/src/eval.jl +++ b/JuliaLowering/src/eval.jl @@ -227,6 +227,11 @@ function to_lowered_expr(mod, bindings, ex) Core.GotoNode(ex[1].id) elseif k == K"gotoifnot" Core.GotoIfNot(to_lowered_expr(mod, bindings, ex[1]), ex[2].id) + elseif k == K"enter" + catch_idx = ex[1].id + numchildren(ex) == 1 ? + Core.EnterNode(catch_idx) : + Core.EnterNode(catch_idx, to_lowered_expr(ex[2])) elseif k == K"method" name = ex[1] @chk kind(name) == K"Symbol" @@ -244,6 +249,9 @@ function to_lowered_expr(mod, bindings, ex) k == K"=" ? :(=) : k == K"global" ? :global : k == K"const" ? :const : + k == K"leave" ? :leave : + k == K"the_exception" ? :the_exception : + k == K"pop_exception" ? :pop_exception : nothing if isnothing(head) TODO(ex, "Unhandled form for kind $k") diff --git a/JuliaLowering/src/kinds.jl b/JuliaLowering/src/kinds.jl index b91fb8ba4196e..62ac97cb4f3a7 100644 --- a/JuliaLowering/src/kinds.jl +++ b/JuliaLowering/src/kinds.jl @@ -43,6 +43,7 @@ function _register_kinds() "globalref" "outerref" "enter" + "pop_exception" "leave" "label" "symbolic_label" diff --git a/JuliaLowering/src/linear_ir.jl b/JuliaLowering/src/linear_ir.jl index 9387617699edf..18019ee340933 100644 --- a/JuliaLowering/src/linear_ir.jl +++ b/JuliaLowering/src/linear_ir.jl @@ -30,7 +30,7 @@ end Context for creating linear IR. One of these is created per lambda expression to flatten the body down to -linear IR. +a sequence of statements (linear IR). """ struct LinearIRContext{GraphType} <: AbstractLoweringContext graph::GraphType @@ -41,13 +41,15 @@ struct LinearIRContext{GraphType} <: AbstractLoweringContext lambda_locals::Set{IdTag} return_type::Union{Nothing,NodeId} break_labels::Dict{String, NodeId} + handler_token_stack::SyntaxList{GraphType, Vector{NodeId}} + catch_token_stack::SyntaxList{GraphType, Vector{NodeId}} mod::Module end function LinearIRContext(ctx, is_toplevel_thunk, lambda_locals, return_type) LinearIRContext(ctx.graph, SyntaxList(ctx.graph), ctx.bindings, Ref(0), is_toplevel_thunk, lambda_locals, return_type, - Dict{String,NodeId}(), ctx.mod) + Dict{String,NodeId}(), SyntaxList(ctx), SyntaxList(ctx), ctx.mod) end # FIXME: BindingId subsumes many things so need to assess what that means for these predicates. @@ -133,6 +135,7 @@ function emit_return(ctx, srcref, ex) if isnothing(ex) return end + # TODO: Mark implicit returns as having no location?? # TODO: return type handling # TODO: exception stack handling # returning lambda directly is needed for @generated @@ -234,6 +237,140 @@ function new_mutable_var(ctx::LinearIRContext, srcref, name) var end +# Exception handlers are lowered using the following special forms +# +# (= tok (enter catch_label dynscope)) +# push exception handler with catch block at `catch_label` and dynamic +# scope `dynscope`, yielding a token which is used by leave/pop_exception. +# `dynscope` is only used for the special tryfinally form without +# associated source level syntax (see the `@with` macro) +# +# (leave tok) +# pop exception handler associated to `tok`. Each `enter` must be matched +# with a `leave` on every non-exceptional program path, including implicit +# returns generated in tail position. Multiple tokens can be supplied to +# pop multiple handlers using `(leave tok1 tok2 ...)`. +# +# (pop_exception tok) - pop exception stack back to state of associated enter +# +function compile_try(ctx::LinearIRContext, ex, needs_value, in_tail_pos) + @chk numchildren(ex) <= 3 + try_block = ex[1] + catch_block = ex[2] + else_block = numchildren(ex) == 2 ? nothing : ex[3] + finally_block = nothing # fixme + + catch_label = make_label(ctx, catch_block) + end_label = !in_tail_pos || !isnothing(finally_block) ? make_label(ctx, ex) : nothing + result_var = needs_value && !in_tail_pos ? new_mutable_var(ctx, ex, "result_var") : nothing + + # Exception handler block prefix + handler_token = ssavar(ctx, ex, "handler_token") + emit(ctx, @ast ctx ex [K"=" + handler_token + [K"enter" catch_label] # TODO: dynscope + ]) + push!(ctx.handler_token_stack, handler_token) + # Try block code. + try_val = compile(ctx, try_block, needs_value, false) + + # Exception handler block postfix (1) + # if in_tail_pos + # if isnothing(else_block) + # if !isnothing(try_val) + # emit_return(ctx, try_val) + # end + # else + # if !isnothing(try_val) + # emit(ctx, try_val) + # end + # emit(ctx, @ast ctx ex [K"leave" handler_token]) + # end + # else + # if needs_value && !isnothing(try_val) + # emit_assignment(ctx, result_var, try_val) + # end + # emit(ctx, @ast ctx ex [K"leave" handler_token]) + # if isnothing(else_block) + # emit(ctx, @ast ctx ex [K"goto" end_label]) + # end + # end + # pop!(ctx.handler_token_stack, handler_token) + # + # # Else block + # if !isnothing(else_block) + # else_val = compile(ctx, else_block, needs_value, in_tail_pos) + # if !in_tail_pos + # if needs_value && !isnothing(else_val) + # emit_assignment(ctx, result_var, else_val) + # end + # emit(ctx, @ast ctx ex [K"goto" end_label]) + # end + # + # # More confusing form which should be equiv: + # # if !isnothing(result_var) && !isnothing(else_val) + # # emit_assignment(ctx, result_var, else_val) + # # end + # # if !isnothing(end_label) + # # emit(ctx, @ast ctx ex [K"goto" end_label]) + # # end + # end + + # Exception handler block postfix + if isnothing(else_block) + if in_tail_pos + if !isnothing(try_val) + emit_return(ctx, try_val, try_val) + end + else + if needs_value && !isnothing(try_val) + emit_assignment(ctx, ex, result_var, try_val) + end + emit(ctx, @ast ctx ex [K"leave" handler_token]) + end + pop!(ctx.handler_token_stack) + else + if !isnothing(try_val) && (in_tail_pos || needs_value) + emit(ctx, try_val) # TODO: Only for any side effects ? + end + emit(ctx, @ast ctx ex [K"leave" handler_token]) + pop!(ctx.handler_token_stack) + # Else block code + else_val = compile(ctx, else_block, needs_value, in_tail_pos) + if !in_tail_pos + if needs_value && !isnothing(else_val) + emit_assignment(ctx, ex, result_var, else_val) + end + end + end + if !in_tail_pos + emit(ctx, @ast ctx ex [K"goto" end_label]) + end + + # Emit either catch or finally block. A combined try/catch/finally block + # was split into separate trycatchelse and tryfinally blocks earlier. + + emit(ctx, catch_label) + if !isnothing(finally_block) + TODO(finally_block, "finally") + else + push!(ctx.catch_token_stack, handler_token) + # Exceptional control flow enters here + catch_val = compile(ctx, catch_block, needs_value, in_tail_pos) + if !isnothing(result_var) && !isnothing(catch_val) + emit_assignment(ctx, ex, result_var, catch_val) + end + if !in_tail_pos + emit(ctx, @ast ctx ex [K"pop_exception" handler_token]) + emit(ctx, end_label) + else + # <- pop_exception done in emit_return + end + pop!(ctx.catch_token_stack) + end + result_var +end + # This pass behaves like an interpreter on the given code. # To perform stateful operations, it calls `emit` to record that something # needs to be done. In value position, it returns an expression computing @@ -243,8 +380,9 @@ end function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) k = kind(ex) if k == K"BindingId" || is_literal(k) || k == K"quote" || k == K"inert" || - k == K"top" || k == K"core" || k == K"Value" || k == K"Symbol" || k == K"Placeholder" - # TODO: other kinds: copyast the_exception $ globalref outerref thismodule cdecl stdcall fastcall thiscall llvmcall + k == K"top" || k == K"core" || k == K"Value" || k == K"Symbol" || + k == K"Placeholder" || k == K"the_exception" + # TODO: other kinds: copyast $ globalref outerref thismodule cdecl stdcall fastcall thiscall llvmcall if needs_value && k == K"Placeholder" # TODO: ensure outterref, globalref work here throw(LoweringError(ex, "all-underscore identifiers are write-only and their values cannot be used in expressions")) @@ -261,6 +399,7 @@ function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) end elseif k == K"assert" # Elide these - they're no longer required. + # TODO: Elide in scope_analysis instead? if needs_value throw(LoweringError(ex, "misplaced semantic assertion")) end @@ -380,6 +519,8 @@ function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) end val end + elseif k == K"trycatchelse" # || k == K"tryfinally" + compile_try(ctx, ex, needs_value, in_tail_pos) elseif k == K"method" # TODO # throw(LoweringError(ex, @@ -483,17 +624,8 @@ function _renumber(ctx, ssa_rewrites, slot_rewrites, label_table, ex) TODO(ex, "_renumber $k") elseif is_literal(k) || is_quoted(k) ex - elseif k == K"enter" - TODO(ex, "_renumber $k") - elseif k == K"goto" - @ast ctx ex [K"goto" - label_table[ex[1].id]::K"label" - ] - elseif k == K"gotoifnot" - @ast ctx ex [K"gotoifnot" - _renumber(ctx, ssa_rewrites, slot_rewrites, label_table, ex[1]) - label_table[ex[2].id]::K"label" - ] + elseif k == K"label" + @ast ctx ex label_table[ex.id]::K"label" elseif k == K"lambda" ex else @@ -591,7 +723,7 @@ function linearize_ir(ctx, ex) # required to call reparent() ... _ctx = LinearIRContext(graph, SyntaxList(graph), ctx.bindings, Ref(0), false, Set{IdTag}(), nothing, - Dict{String,NodeId}(), ctx.mod) + Dict{String,NodeId}(), SyntaxList(graph), SyntaxList(graph), ctx.mod) res = compile_lambda(_ctx, reparent(_ctx, ex)) setattr!(graph, res._id, bindings=ctx.bindings) _ctx, res From 1f43785899ca353fdd91a64471c4fb76e8288c1a Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Sat, 17 Aug 2024 14:14:54 -0500 Subject: [PATCH 0834/1109] Terminate :statement parsing at newlines (JuliaLang/JuliaSyntax.jl#493) Addresses a portion of JuliaLang/JuliaSyntax.jl#316 --- JuliaSyntax/src/hooks.jl | 5 ++++- JuliaSyntax/test/hooks.jl | 12 +++++++++++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/JuliaSyntax/src/hooks.jl b/JuliaSyntax/src/hooks.jl index 228f14b05d693..afb8ba8bf01d3 100644 --- a/JuliaSyntax/src/hooks.jl +++ b/JuliaSyntax/src/hooks.jl @@ -171,7 +171,10 @@ function core_parser_hook(code, filename::String, lineno::Int, offset::Int, opti end parse!(stream; rule=options) if options === :statement - bump_trivia(stream) + bump_trivia(stream; skip_newlines=false) + if peek(stream) == K"NewlineWs" + bump(stream) + end end if any_error(stream) diff --git a/JuliaSyntax/test/hooks.jl b/JuliaSyntax/test/hooks.jl index 3593911953984..d0111aaca1c9c 100644 --- a/JuliaSyntax/test/hooks.jl +++ b/JuliaSyntax/test/hooks.jl @@ -11,7 +11,7 @@ function _unwrap_parse_error(core_hook_result) end @testset "Hooks for Core integration" begin - @testset "whitespace parsing" begin + @testset "whitespace and comment parsing" begin @test JuliaSyntax.core_parser_hook("", "somefile", 1, 0, :statement) == Core.svec(nothing, 0) @test JuliaSyntax.core_parser_hook("", "somefile", 1, 0, :statement) == Core.svec(nothing, 0) @@ -20,6 +20,16 @@ end @test JuliaSyntax.core_parser_hook(" x \n", "somefile", 1, 0, :statement) == Core.svec(:x,4) @test JuliaSyntax.core_parser_hook(" x \n", "somefile", 1, 0, :atom) == Core.svec(:x,2) + + # https://github.com/JuliaLang/JuliaSyntax.jl/issues/316#issuecomment-1870294857 + stmtstr = + """ + plus(a, b) = a + b + + # Issue #81 + f() = nothing + """ + @test JuliaSyntax.core_parser_hook(stmtstr, "somefile", 1, 0, :statement)[2] == 19 end @testset "filename and lineno" begin From b37bbc34520140e80bcafc4647ce1b3668ba0fa5 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sun, 18 Aug 2024 20:49:27 +1000 Subject: [PATCH 0835/1109] Disallow trailing junk in generators (JuliaLang/JuliaSyntax.jl#497) Disallow generator syntax with trailing non-delimited expressions (y for x in xs a) (y for x in xs a, b) Allow parameter syntax such as f(y for x in xs; a) because the flisp parser allowed this it's used in Base in at least one location. Disallow the almost-equivalent block syntax for now: (y for x in xs; a) because the flisp parser disallowed it and users can always get the same thing with some extra parentheses. --- JuliaSyntax/src/parser.jl | 20 ++++++++++++-------- JuliaSyntax/test/parser.jl | 10 ++++++++++ 2 files changed, 22 insertions(+), 8 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 1f63b39625662..e1356e998f286 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -2706,7 +2706,7 @@ end function parse_call_arglist(ps::ParseState, closer) ps = ParseState(ps, for_generator=true) - parse_brackets(ps, closer) do _, _, _, _ + parse_brackets(ps, closer, false) do _, _, _, _ return (needs_parameters=true,) end end @@ -3123,7 +3123,7 @@ end # # flisp: parts of parse-paren- and parse-arglist function parse_brackets(after_parse::Function, - ps::ParseState, closing_kind) + ps::ParseState, closing_kind, generator_is_last=true) ps = ParseState(ps, range_colon_enabled=true, space_sensitive=false, where_enabled=true, @@ -3159,18 +3159,22 @@ function parse_brackets(after_parse::Function, if num_subexprs == 1 had_splat = peek_behind(ps).kind == K"..." end - t = peek_token(ps, skip_newlines=true) - k = kind(t) + k = peek(ps, skip_newlines=true) + if k == K"for" + # Generator syntax + # (x for a in as) ==> (parens (generator x (iteration (in a as)))) + parse_generator(ps, mark) + if generator_is_last + break + end + k = peek(ps, skip_newlines=true) + end if k == K"," had_commas = true bump(ps, TRIVIA_FLAG) elseif k == K";" || k == closing_kind # Handled above continue - elseif k == K"for" - # Generator syntax - # (x for a in as) ==> (parens (generator x (iteration (in a as)))) - parse_generator(ps, mark) else # Error - recovery done when consuming closing_kind break diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index b4fac48263566..f32e7ba629448 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -371,6 +371,7 @@ tests = [ "@x(a, b)" => "(macrocall-p @x a b)" "A.@x(y)" => "(macrocall-p (. A @x) y)" "A.@x(y).z" => "(. (macrocall-p (. A @x) y) z)" + "f(y for x = xs; a)" => "(call f (generator y (iteration (in x xs))) (parameters a))" # do "f() do\nend" => "(call f (do (tuple) (block)))" "f() do ; body end" => "(call f (do (tuple) (block body)))" @@ -435,6 +436,7 @@ tests = [ "A.@S{a}" => "(macrocall (. A @S) (braces a))" "@S{a}.b" => "(. (macrocall @S (braces a)) b)" "S{a,b}" => "(curly S a b)" + "T{y for x = xs; a}" => "(curly T (generator y (iteration (in x xs))) (parameters a))" # String macros "x\"str\"" => """(macrocall @x_str (string-r "str"))""" "x`str`" => """(macrocall @x_cmd (cmdstring-r "str"))""" @@ -729,6 +731,9 @@ tests = [ "(a=1;)" => "(block-p (= a 1))" "(a;b;;c)" => "(block-p a b c)" "(a=1; b=2)" => "(block-p (= a 1) (= b 2))" + # Following is an error for flisp compatibility. But it could be + # allowed as valid block syntax in the future? + "(y for x = xs; a)" => "(parens (generator y (iteration (in x xs))) (error-t ✘ a))" # Parentheses used for grouping "(a * b)" => "(parens (call-i a * b))" "(a=1)" => "(parens (= a 1))" @@ -1075,6 +1080,11 @@ parsestmt_test_specs = [ "x in'``\$" => "(call-i x in (call-i (juxtapose (char '`' (error-t)) (cmdstring-r (error-t))) \$ (error)))" "var\"#\"`str`" => "(juxtapose (var # (error-t)) (cmdstring-r \"str\"))" "var\"#\"\"str\"" => "(juxtapose (var # (error-t)) (error-t) (string \"str\"))" + + # trailing junk in generators (issue #407) + "(x for x = xs a)" => "(parens (generator x (iteration (in x xs))) (error-t a))" + "(x for x = xs a, b)" => "(parens (generator x (iteration (in x xs))) (error-t a ✘ b))" + "f(x for x = xs a)" => "(call f (generator x (iteration (in x xs))) (error-t a))" ] @testset "Parser does not crash on broken code" begin From e7458257fd5a9aa4f33086663f1c7f6d852d3fc3 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Tue, 20 Aug 2024 07:10:49 +1000 Subject: [PATCH 0836/1109] Documenting the lowering of try-catch --- JuliaLowering/README.md | 47 ++++++++++++++++++++++ JuliaLowering/src/linear_ir.jl | 72 ++++++++++------------------------ 2 files changed, 68 insertions(+), 51 deletions(-) diff --git a/JuliaLowering/README.md b/JuliaLowering/README.md index 445624324e443..da6703cf7ec81 100644 --- a/JuliaLowering/README.md +++ b/JuliaLowering/README.md @@ -253,6 +253,53 @@ TODO: Write more here... * [Towards the Essence of Hygiene](https://michaeldadams.org/papers/hygiene/hygiene-2015-popl-authors-copy.pdf) - a paper by Michael Adams * [Bindings as sets of scopes](https://www-old.cs.utah.edu/plt/scope-sets/) - a description of Racket's scope set mechanism by Matthew Flatt +## Lowering of exception handlers + +Exception handling involves a careful interplay between lowering and the Julia +runtime. The forms `enter`, `leave` and `pop_exception` dynamically modify the +exception-related state on the `Task`; lowering and the runtime work together +to maintain correct invariants for this state. + +Lowering of exception handling must ensure that + +* Each `enter` is matched with a `leave` on every possible non-exceptional + program path (including implicit returns generated in tail position). +* Each `catch` block which is entered and handles the exception - by exiting + via a non-exceptional program path - is matched with a `pop_exception` +* Each `finally` block runs, regardless of the way it's entered - either by + normal program flow, an exception, early `return` or a jump out of an inner + context via `break`/`continue`/`goto` etc. + +The following special forms are emitted into the IR: + +* `(= tok (enter catch_label dynscope))` - + push exception handler with catch block at `catch_label` and dynamic + scope `dynscope`, yielding a token which is used by `leave` and + `pop_exception`. `dynscope` is only used in the special `tryfinally` form + without associated source level syntax (see the `@with` macro) +* `(leave tok)` - + pop exception handler back to the state of the `tok` from the associated + `enter`. Multiple tokens can be supplied to pop multiple handlers using + `(leave tok1 tok2 ...)`. +* `(pop_exception tok)` - pop exception stack back to state of associated enter + +When an `enter` is encountered, the runtime pushes a new handler onto the +`Task`'s exception handler stack which will jump to `catch_label` when an +exception occurs. + +There are two ways that the exception-related task state can be restored + +1. By encountering a `leave` which will restore the handler state with `tok`. +2. By throwing an exception. In this case the runtime will pop one handler + automatically and jump to the catch label with the new exception pushed + onto the exception stack. On this path the exception stack state must be + restored back to the associated `enter` by encountering `pop_exception`. + +Note that the handler and exception stack represent two distinct types of +exception-related state restoration which need to happen. Note also that the +"handler state restoration" actually includes several pieces of runtime state +including GC flags - see `jl_eh_restore_state` in the runtime for that. + ## Julia's existing lowering implementation ### How does macro expansion work? diff --git a/JuliaLowering/src/linear_ir.jl b/JuliaLowering/src/linear_ir.jl index 18019ee340933..207c5e81c1427 100644 --- a/JuliaLowering/src/linear_ir.jl +++ b/JuliaLowering/src/linear_ir.jl @@ -237,22 +237,35 @@ function new_mutable_var(ctx::LinearIRContext, srcref, name) var end -# Exception handlers are lowered using the following special forms +# Lowering of exception handling must ensure that +# +# * Each `enter` is matched with a `leave` on every possible non-exceptional +# program path (including implicit returns generated in tail position). +# * Each catch block which is entered and handles the exception - by exiting +# via a non-exceptional program path - leaves the block with `pop_exception`. +# * Each `finally` block runs, regardless of any early `return` or jumps +# via `break`/`continue`/`goto` etc. +# +# These invariants are upheld by tracking the nesting using +# `handler_token_stack` and `catch_token_stack` and using these when emitting +# any control flow (return / goto) which leaves the associated block. +# +# The following special forms are emitted into the IR: # # (= tok (enter catch_label dynscope)) # push exception handler with catch block at `catch_label` and dynamic -# scope `dynscope`, yielding a token which is used by leave/pop_exception. -# `dynscope` is only used for the special tryfinally form without -# associated source level syntax (see the `@with` macro) +# scope `dynscope`, yielding a token which is used by `leave` and +# `pop_exception`. `dynscope` is only used in the special `tryfinally` form +# without associated source level syntax (see the `@with` macro) # # (leave tok) -# pop exception handler associated to `tok`. Each `enter` must be matched -# with a `leave` on every non-exceptional program path, including implicit -# returns generated in tail position. Multiple tokens can be supplied to -# pop multiple handlers using `(leave tok1 tok2 ...)`. +# pop exception handler back to the state of the `tok` from the associated +# `enter`. Multiple tokens can be supplied to pop multiple handlers using +# `(leave tok1 tok2 ...)`. # # (pop_exception tok) - pop exception stack back to state of associated enter # +# See the devdocs for further discussion. function compile_try(ctx::LinearIRContext, ex, needs_value, in_tail_pos) @chk numchildren(ex) <= 3 try_block = ex[1] @@ -273,49 +286,6 @@ function compile_try(ctx::LinearIRContext, ex, needs_value, in_tail_pos) push!(ctx.handler_token_stack, handler_token) # Try block code. try_val = compile(ctx, try_block, needs_value, false) - - # Exception handler block postfix (1) - # if in_tail_pos - # if isnothing(else_block) - # if !isnothing(try_val) - # emit_return(ctx, try_val) - # end - # else - # if !isnothing(try_val) - # emit(ctx, try_val) - # end - # emit(ctx, @ast ctx ex [K"leave" handler_token]) - # end - # else - # if needs_value && !isnothing(try_val) - # emit_assignment(ctx, result_var, try_val) - # end - # emit(ctx, @ast ctx ex [K"leave" handler_token]) - # if isnothing(else_block) - # emit(ctx, @ast ctx ex [K"goto" end_label]) - # end - # end - # pop!(ctx.handler_token_stack, handler_token) - # - # # Else block - # if !isnothing(else_block) - # else_val = compile(ctx, else_block, needs_value, in_tail_pos) - # if !in_tail_pos - # if needs_value && !isnothing(else_val) - # emit_assignment(ctx, result_var, else_val) - # end - # emit(ctx, @ast ctx ex [K"goto" end_label]) - # end - # - # # More confusing form which should be equiv: - # # if !isnothing(result_var) && !isnothing(else_val) - # # emit_assignment(ctx, result_var, else_val) - # # end - # # if !isnothing(end_label) - # # emit(ctx, @ast ctx ex [K"goto" end_label]) - # # end - # end - # Exception handler block postfix if isnothing(else_block) if in_tail_pos From d5d5e2971c83e428bf418bff2f8a4ad56d549053 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Tue, 20 Aug 2024 14:50:54 +1000 Subject: [PATCH 0837/1109] Fix `is_number()` predicate to include `Bool` and clean up (JuliaLang/JuliaSyntax.jl#498) --- JuliaSyntax/src/kinds.jl | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/JuliaSyntax/src/kinds.jl b/JuliaSyntax/src/kinds.jl index 27d90c2265544..c5b43e9eb350a 100644 --- a/JuliaSyntax/src/kinds.jl +++ b/JuliaSyntax/src/kinds.jl @@ -254,13 +254,15 @@ register_kinds!(JuliaSyntax, 0, [ "END_KEYWORDS" "BEGIN_LITERAL" - "Bool" - "Integer" - "BinInt" - "HexInt" - "OctInt" - "Float" - "Float32" + "BEGIN_NUMBERS" + "Bool" + "Integer" + "BinInt" + "HexInt" + "OctInt" + "Float" + "Float32" + "END_NUMBERS" "String" "Char" "CmdString" @@ -1114,6 +1116,7 @@ const _nonunique_kind_names = Set([ K"ErrorBidiFormatting" K"ErrorInvalidOperator" + K"Bool" K"Integer" K"BinInt" K"HexInt" @@ -1176,17 +1179,18 @@ is_error(k::Kind) = K"BEGIN_ERRORS" <= k <= K"END_ERRORS" || k == K"ErrorInvalid is_keyword(k::Kind) = K"BEGIN_KEYWORDS" <= k <= K"END_KEYWORDS" is_block_continuation_keyword(k::Kind) = K"BEGIN_BLOCK_CONTINUATION_KEYWORDS" <= k <= K"END_BLOCK_CONTINUATION_KEYWORDS" is_literal(k::Kind) = K"BEGIN_LITERAL" <= k <= K"END_LITERAL" +is_number(k::Kind) = K"BEGIN_NUMBERS" <= k <= K"END_NUMBERS" is_operator(k::Kind) = K"BEGIN_OPS" <= k <= K"END_OPS" is_word_operator(k::Kind) = (k == K"in" || k == K"isa" || k == K"where") -is_identifier(k) = is_identifier(kind(k)) -is_contextual_keyword(k) = is_contextual_keyword(kind(k)) -is_error(k) = is_error(kind(k)) -is_keyword(k) = is_keyword(kind(k)) -is_literal(k) = is_literal(kind(k)) -is_operator(k) = is_operator(kind(k)) -is_word_operator(k) = is_word_operator(kind(k)) - +is_identifier(x) = is_identifier(kind(x)) +is_contextual_keyword(x) = is_contextual_keyword(kind(x)) +is_error(x) = is_error(kind(x)) +is_keyword(x) = is_keyword(kind(x)) +is_literal(x) = is_literal(kind(x)) +is_number(x) = is_number(kind(x)) +is_operator(x) = is_operator(kind(x)) +is_word_operator(x) = is_word_operator(kind(x)) # Predicates for operator precedence # FIXME: Review how precedence depends on dottedness, eg @@ -1215,10 +1219,6 @@ is_syntax_kind(x) = K"BEGIN_SYNTAX_KINDS"<= kind(x) <= K"END_SYNTAX_KINDS" is_macro_name(x) = K"BEGIN_MACRO_NAMES" <= kind(x) <= K"END_MACRO_NAMES" is_syntactic_assignment(x) = K"BEGIN_SYNTACTIC_ASSIGNMENTS" <= kind(x) <= K"END_SYNTACTIC_ASSIGNMENTS" -function is_number(x) - kind(x) in (K"Integer", K"BinInt", K"HexInt", K"OctInt", K"Float", K"Float32") -end - function is_string_delim(x) kind(x) in (K"\"", K"\"\"\"") end From 80b415ef183cd23c3b0379ffe7a4133746604052 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Tue, 20 Aug 2024 15:25:10 +1000 Subject: [PATCH 0838/1109] Some fixes to make is_simple_atom() more accurate --- JuliaLowering/src/linear_ir.jl | 36 ++++++++++++++---------------- JuliaLowering/test/branching_ir.jl | 6 ++--- JuliaLowering/test/loops_ir.jl | 9 +++----- 3 files changed, 22 insertions(+), 29 deletions(-) diff --git a/JuliaLowering/src/linear_ir.jl b/JuliaLowering/src/linear_ir.jl index 207c5e81c1427..2c8785eea3110 100644 --- a/JuliaLowering/src/linear_ir.jl +++ b/JuliaLowering/src/linear_ir.jl @@ -1,20 +1,18 @@ #------------------------------------------------------------------------------- # Lowering pass 5: Flatten to linear IR -function is_simple_atom(ex) +function is_simple_atom(ctx, ex) k = kind(ex) - # FIXME -# (or (number? x) (string? x) (char? x) -# (and (pair? x) (memq (car x) '(ssavalue null true false thismodule))) -# (eq? (typeof x) 'julia_value))) - is_number(k) || k == K"String" || k == K"Char" + # TODO flisp thismodule head? + is_literal(k) || k == K"Symbol" || k == K"Value" || is_ssa(ctx, ex) || + (k == K"core" && ex.name_val == "nothing") end # N.B.: This assumes that resolve-scopes has run, so outerref is equivalent to # a global in the current scope. -function is_valid_ir_argument(ex) +function is_valid_ir_argument(ctx, ex) k = kind(ex) - return is_simple_atom(ex) + return is_simple_atom(ctx, ex) # FIXME || #(k == K"outerref" && nothrow_julia_global(ex[1])) || #(k == K"globalref" && nothrow_julia_global(ex)) || @@ -61,8 +59,8 @@ end # BindingId could also subsume # - top,core -function is_valid_body_ir_argument(ex) - is_valid_ir_argument(ex) && return true +function is_valid_body_ir_argument(ctx, ex) + is_valid_ir_argument(ctx, ex) && return true return false # FIXME k = kind(ex) @@ -70,9 +68,9 @@ function is_valid_body_ir_argument(ex) TODO("vinfo-table stuff") end -function is_simple_arg(ex) +function is_simple_arg(ctx, ex) k = kind(ex) - return is_simple_atom(ex) || k == K"BindingId" || k == K"quote" || k == K"inert" || + return is_simple_atom(ctx, ex) || k == K"BindingId" || k == K"quote" || k == K"inert" || k == K"top" || k == K"core" || k == K"globalref" || k == K"outerref" end @@ -84,14 +82,14 @@ end function is_const_read_arg(ctx, ex) k = kind(ex) - return is_simple_atom(ex) || + return is_simple_atom(ctx, ex) || is_single_assign_var(ctx, ex) || k == K"quote" || k == K"inert" || k == K"top" || k == K"core" end function is_valid_ir_rvalue(ctx, lhs, rhs) return is_ssa(ctx, lhs) || - is_valid_ir_argument(rhs) || + is_valid_ir_argument(ctx, rhs) || (kind(lhs) == K"BindingId" && # FIXME: add: splatnew isdefined invoke cfunction gc_preserve_begin copyast new_opaque_closure globalref outerref kind(rhs) in KSet"new the_exception call foreigncall") @@ -102,11 +100,11 @@ function compile_args(ctx, args) # First check if all the arguments as simple (and therefore side-effect free). # Otherwise, we need to use ssa values for all arguments to ensure proper # left-to-right evaluation semantics. - all_simple = all(is_simple_arg, args) + all_simple = all(a->is_simple_arg(ctx, a), args) args_out = SyntaxList(ctx) for arg in args arg_val = compile(ctx, arg, true, false) - if (all_simple || is_const_read_arg(ctx, arg_val)) && is_valid_body_ir_argument(arg_val) + if (all_simple || is_const_read_arg(ctx, arg_val)) && is_valid_body_ir_argument(ctx, arg_val) push!(args_out, arg_val) else push!(args_out, emit_assign_tmp(ctx, arg_val)) @@ -139,7 +137,7 @@ function emit_return(ctx, srcref, ex) # TODO: return type handling # TODO: exception stack handling # returning lambda directly is needed for @generated - if !(is_valid_ir_argument(ex) || head(ex) == K"lambda") + if !(is_valid_ir_argument(ctx, ex) || head(ex) == K"lambda") ex = emit_assign_tmp(ctx, ex) end # TODO: if !isnothing(ctx.return_type) ... @@ -184,7 +182,7 @@ end function compile_condition_term(ctx, ex) cond = compile(ctx, ex, true, false) - if !is_valid_body_ir_argument(cond) + if !is_valid_body_ir_argument(ctx, cond) cond = emit_assign_tmp(ctx, cond) end return cond @@ -507,7 +505,7 @@ function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) @chk numchildren(ex) == 3 fname = ex[1] sig = compile(ctx, ex[2], true, false) - if !is_valid_ir_argument(sig) + if !is_valid_ir_argument(ctx, sig) sig = emit_assign_tmp(ctx, sig) end lam = ex[3] diff --git a/JuliaLowering/test/branching_ir.jl b/JuliaLowering/test/branching_ir.jl index a4f6a54a0115e..3d32bc88a03a6 100644 --- a/JuliaLowering/test/branching_ir.jl +++ b/JuliaLowering/test/branching_ir.jl @@ -11,8 +11,7 @@ end 2 (gotoifnot %₁ label₅) 3 slot₂/b 4 (return %₃) -5 core.nothing -6 (return %₅) +5 (return core.nothing) ###################################### # Branching, !tail && !value @@ -84,5 +83,4 @@ end 5 (gotoifnot %₄ label₈) 6 slot₄/d 7 (return %₆) -8 core.nothing -9 (return %₈) +8 (return core.nothing) diff --git a/JuliaLowering/test/loops_ir.jl b/JuliaLowering/test/loops_ir.jl index d1149086401af..859b33da6e356 100644 --- a/JuliaLowering/test/loops_ir.jl +++ b/JuliaLowering/test/loops_ir.jl @@ -12,8 +12,7 @@ end 5 TestMod.body1 6 TestMod.body2 7 (goto label₁) -8 core.nothing -9 (return %₈) +8 (return core.nothing) ######################################## # While loop with short circuit condition while a && b @@ -26,8 +25,7 @@ end 4 (gotoifnot %₃ label₇) 5 TestMod.body 6 (goto label₁) -7 core.nothing -8 (return %₇) +7 (return core.nothing) ######################################## # While loop with with break and continue while cond @@ -46,5 +44,4 @@ end 6 (goto label₈) 7 TestMod.body3 8 (goto label₁) -9 core.nothing -10 (return %₉) +9 (return core.nothing) From c6bb969349d39da93434bb3d5598a99164a88fdd Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Wed, 21 Aug 2024 21:08:24 +1000 Subject: [PATCH 0839/1109] IR generation for try/catch/else and function return types --- JuliaLowering/src/desugaring.jl | 3 +- JuliaLowering/src/linear_ir.jl | 166 ++++++++++++++++---- JuliaLowering/src/utils.jl | 12 +- JuliaLowering/test/demo.jl | 51 +++++- JuliaLowering/test/exceptions.jl | 239 +++++++++++++++++++++++++++++ JuliaLowering/test/functions.jl | 47 ++++++ JuliaLowering/test/functions_ir.jl | 38 +++++ JuliaLowering/test/runtests.jl | 22 +-- JuliaLowering/test/utils.jl | 19 ++- 9 files changed, 539 insertions(+), 58 deletions(-) create mode 100644 JuliaLowering/test/exceptions.jl create mode 100644 JuliaLowering/test/functions.jl create mode 100644 JuliaLowering/test/functions_ir.jl diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index a8783155fb228..1b578d9c48561 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -601,9 +601,10 @@ function expand_function_def(ctx, ex, docs) QuoteNode(source_location(LineNumberNode, callex))::K"Value" ] if !isnothing(return_type) + ret_var = ssavar(ctx, return_type, "return_type") body = @ast ctx body [ K"block" - ret_var=return_type + [K"=" ret_var return_type] body ] else diff --git a/JuliaLowering/src/linear_ir.jl b/JuliaLowering/src/linear_ir.jl index 2c8785eea3110..97290fbf4b515 100644 --- a/JuliaLowering/src/linear_ir.jl +++ b/JuliaLowering/src/linear_ir.jl @@ -37,17 +37,20 @@ struct LinearIRContext{GraphType} <: AbstractLoweringContext next_label_id::Ref{Int} is_toplevel_thunk::Bool lambda_locals::Set{IdTag} - return_type::Union{Nothing,NodeId} + return_type::Union{Nothing, SyntaxTree{GraphType}} break_labels::Dict{String, NodeId} handler_token_stack::SyntaxList{GraphType, Vector{NodeId}} catch_token_stack::SyntaxList{GraphType, Vector{NodeId}} + finally_handler # ::Union{Nothing, SyntaxTree{GraphType}} mod::Module end function LinearIRContext(ctx, is_toplevel_thunk, lambda_locals, return_type) - LinearIRContext(ctx.graph, SyntaxList(ctx.graph), ctx.bindings, Ref(0), - is_toplevel_thunk, lambda_locals, return_type, - Dict{String,NodeId}(), SyntaxList(ctx), SyntaxList(ctx), ctx.mod) + graph = syntax_graph(ctx) + rett = isnothing(return_type) ? nothing : reparent(graph, return_type) + LinearIRContext(graph, SyntaxList(ctx), ctx.bindings, Ref(0), + is_toplevel_thunk, lambda_locals, rett, + Dict{String,NodeId}(), SyntaxList(ctx), SyntaxList(ctx), nothing, ctx.mod) end # FIXME: BindingId subsumes many things so need to assess what that means for these predicates. @@ -123,25 +126,117 @@ function emit(ctx::LinearIRContext, srcref, k, args...) end # Emit computation of ex, assigning the result to an ssavar and returning that -function emit_assign_tmp(ctx::LinearIRContext, ex) - tmp = ssavar(ctx, ex) - emit(ctx, ex, K"=", tmp, ex) +function emit_assign_tmp(ctx::LinearIRContext, ex, name="tmp") + tmp = ssavar(ctx, ex, name) + emit(ctx, @ast ctx ex [K"=" tmp ex]) return tmp end +function compile_pop_exception(ctx::LinearIRContext, srcref, src_tokens, dest_tokens) + # It's valid to leave the context of src_tokens for the context of + # dest_tokens when src_tokens is the same or nested within dest_tokens. + # It's enough to check the token on the top of the dest stack. + n = length(dest_tokens) + jump_ok = n == 0 || (n <= length(src_tokens) && dest_tokens[n].var_id == src_tokens[n].var_id) + jump_ok || throw(LoweringError(srcref, "Attempt to jump into catch block")) + if n < length(src_tokens) + @ast ctx srcref [K"pop_exception" src_tokens[n+1]] + else + nothing + end +end + +function emit_pop_exception(ctx::LinearIRContext, srcref, dest_tokens) + pexc = compile_pop_exception(ctx, srcref, ctx.catch_token_stack, dest_tokens) + if !isnothing(pexc) + emit(ctx, pexc) + end +end + +function convert_for_type_decl(ctx, srcref, ex, type, do_typeassert) + if isnothing(type) + return ex + end + + # Require that the caller make `type` "simple", for now (can generalize + # later if necessary) + kt = kind(type) + @assert (kt == K"Identifier" || kt == K"BindingId" || is_literal(kt)) + # Use a slot to permit union-splitting this in inference + tmp = new_mutable_var(ctx, srcref, "tmp") + + @ast ctx srcref [K"block" + # [K"local_def" tmp] + # [K"=" type_ssa renumber_assigned_ssavalues(type)] + [K"=" tmp ex] + [K"if" + [K"call" "isa"::K"core" tmp type] + "nothing"::K"core" + [K"=" + tmp + if do_typeassert + [K"call" + "typeassert"::K"core" + [K"call" "convert"::K"top" type tmp] + type + ] + else + [K"call" "convert"::K"top" type tmp] + end + ] + ] + tmp + ] +end + +function actually_return(ctx, ex) + # TODO: Handle the implicit return coverage hack for #53354 ? + rett = ctx.return_type + if !isnothing(rett) + ex = compile(ctx, convert_for_type_decl(ctx, rett, ex, rett, true), true, false) + end + simple_ret_val = isempty(ctx.catch_token_stack) ? + # returning lambda directly is needed for @generated + (is_valid_ir_argument(ctx, ex) || kind(ex) == K"lambda") : + is_simple_atom(ctx, ex) + if !simple_ret_val + ex = emit_assign_tmp(ctx, ex, "return_tmp") + end + emit_pop_exception(ctx, ex, ()) + emit(ctx, @ast ctx ex [K"return" ex]) + return nothing +end + function emit_return(ctx, srcref, ex) if isnothing(ex) return + elseif isempty(ctx.handler_token_stack) + actually_return(ctx, ex) + return + end + # FIXME: What's this !is_ssa(ctx, ex) here about? + x = if is_simple_atom(ctx, ex) && !(is_ssa(ctx, ex) && !isnothing(ctx.finally_handler)) + ex + elseif !isnothing(ctx.finally_handler) + tmp = new_mutable_var(ctx, ex) + emit(ctx, @ast ctx ex [K"=" tmp ex]) + tmp + else + emit_assign_tmp(ctx, ex) end - # TODO: Mark implicit returns as having no location?? - # TODO: return type handling - # TODO: exception stack handling - # returning lambda directly is needed for @generated - if !(is_valid_ir_argument(ctx, ex) || head(ex) == K"lambda") - ex = emit_assign_tmp(ctx, ex) + if !isnothing(ctx.finally_handler) + TODO(ex, "Finally blocks") + else + emit(ctx, @ast ctx ex [K"leave" ctx.handler_token_stack...]) + actually_return(ctx, x) end - # TODO: if !isnothing(ctx.return_type) ... - emit(ctx, srcref, K"return", ex) + # Should we return `x` here? The flisp code does, but that doesn't seem + # useful as any returned value cannot be used? + return nothing +end + +function emit_return(ctx, ex) + emit_return(ctx, ex, ex) end function emit_assignment(ctx, srcref, lhs, rhs) @@ -288,7 +383,7 @@ function compile_try(ctx::LinearIRContext, ex, needs_value, in_tail_pos) if isnothing(else_block) if in_tail_pos if !isnothing(try_val) - emit_return(ctx, try_val, try_val) + emit_return(ctx, try_val) end else if needs_value && !isnothing(try_val) @@ -356,7 +451,7 @@ function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) throw(LoweringError(ex, "all-underscore identifiers are write-only and their values cannot be used in expressions")) end if in_tail_pos - emit_return(ctx, ex, ex) + emit_return(ctx, ex) elseif needs_value ex else @@ -405,12 +500,22 @@ function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) end elseif k == K"block" || k == K"scope_block" nc = numchildren(ex) - res = nothing - for i in 1:nc - islast = i == nc - res = compile(ctx, ex[i], islast && needs_value, islast && in_tail_pos) + if nc == 0 + if in_tail_pos + emit_return(ctx, nothing_(ctx, ex)) + elseif needs_value + nothing_(ctx, ex) + else + nothing + end + else + res = nothing + for i in 1:nc + islast = i == nc + res = compile(ctx, ex[i], islast && needs_value, islast && in_tail_pos) + end + res end - res elseif k == K"break_block" end_label = make_label(ctx, ex) name = ex[1].name_val @@ -495,7 +600,7 @@ function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) # "Global method definition needs to be placed at the top level, or use `eval`")) if numchildren(ex) == 1 if in_tail_pos - emit_return(ctx, ex, ex) + emit_return(ctx, ex) elseif needs_value ex else @@ -522,7 +627,7 @@ function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) elseif k == K"lambda" lam = compile_lambda(ctx, ex) if in_tail_pos - emit_return(ctx, ex, lam) + emit_return(ctx, lam) elseif needs_value lam else @@ -663,20 +768,19 @@ function _add_slots!(slot_rewrites, bindings, ids) end function compile_lambda(outer_ctx, ex) - lambda_info = ex.lambda_info - return_type = nothing # FIXME - # TODO: Add assignments for reassigned arguments to body using lambda_info.args - ctx = LinearIRContext(outer_ctx, lambda_info.is_toplevel_thunk, ex.lambda_locals, return_type) + info = ex.lambda_info + # TODO: Add assignments for reassigned arguments to body using info.args + ctx = LinearIRContext(outer_ctx, info.is_toplevel_thunk, ex.lambda_locals, info.ret_var) compile_body(ctx, ex[1]) slot_rewrites = Dict{IdTag,Int}() - _add_slots!(slot_rewrites, ctx.bindings, (arg.var_id for arg in lambda_info.args)) + _add_slots!(slot_rewrites, ctx.bindings, (arg.var_id for arg in info.args)) # Sorting the lambda locals is required to remove dependence on Dict iteration order. _add_slots!(slot_rewrites, ctx.bindings, sort(collect(ex.lambda_locals))) # @info "" @ast ctx ex [K"block" ctx.code] code = renumber_body(ctx, ctx.code, slot_rewrites) makenode(ctx, ex, K"lambda", makenode(ctx, ex[1], K"block", code), - lambda_info=lambda_info, + lambda_info=info, slot_rewrites=slot_rewrites ) end @@ -691,7 +795,7 @@ function linearize_ir(ctx, ex) # required to call reparent() ... _ctx = LinearIRContext(graph, SyntaxList(graph), ctx.bindings, Ref(0), false, Set{IdTag}(), nothing, - Dict{String,NodeId}(), SyntaxList(graph), SyntaxList(graph), ctx.mod) + Dict{String,NodeId}(), SyntaxList(graph), SyntaxList(graph), nothing, ctx.mod) res = compile_lambda(_ctx, reparent(_ctx, ex)) setattr!(graph, res._id, bindings=ctx.bindings) _ctx, res diff --git a/JuliaLowering/src/utils.jl b/JuliaLowering/src/utils.jl index 7ede7a3e8e8b7..5b64e9e167c40 100644 --- a/JuliaLowering/src/utils.jl +++ b/JuliaLowering/src/utils.jl @@ -67,13 +67,19 @@ function showprov(x; kws...) showprov(stdout, x; kws...) end -function print_ir(io::IO, ex) +function print_ir(io::IO, ex, indent="") @assert kind(ex) == K"lambda" && kind(ex[1]) == K"block" stmts = children(ex[1]) for (i, e) in enumerate(stmts) lno = rpad(i, 3) - code = string(e) # rpad(string(e), 50) - println(io, lno, " ", code) + if kind(e) == K"method" && numchildren(e) == 3 + println(io, indent, lno, " --- method ", string(e[1]), " ", string(e[2])) + @assert kind(e[3]) == K"lambda" + print_ir(io, e[3], " ") + else + code = string(e) + println(io, indent, lno, " ", code) + end end end diff --git a/JuliaLowering/test/demo.jl b/JuliaLowering/test/demo.jl index 0396d127718f2..bbeecb51261cf 100644 --- a/JuliaLowering/test/demo.jl +++ b/JuliaLowering/test/demo.jl @@ -292,12 +292,53 @@ end """ src = """ -begin - local a, b, c - if a - b +let + x = try + error("hi") + 1 + catch exc + current_exceptions() + else + 3 end - c + x +end +""" + +src = """ +function f(y) + x = + try + try + error("hi") + 1 + catch exc + if y + return 2 + end + 3 + else + 4 + end + catch + 5 + end + x +end +""" + +src = """ +function f(x)::Int + if x + 42.0 + end + 0xff +end +""" + +src = """ +try +catch end """ diff --git a/JuliaLowering/test/exceptions.jl b/JuliaLowering/test/exceptions.jl new file mode 100644 index 0000000000000..ca10f7047b6b1 --- /dev/null +++ b/JuliaLowering/test/exceptions.jl @@ -0,0 +1,239 @@ +@testset "try/catch" begin + +test_mod = Module() + +@test isempty(current_exceptions()) + +@testset "tail position" begin + + @test JuliaLowering.include_string(test_mod, """ + try + 1 + catch + 2 + end + """) == 1 + + @test JuliaLowering.include_string(test_mod, """ + try + error("hi") + 1 + catch + 2 + end + """) == 2 + + @test JuliaLowering.include_string(test_mod, """ + try + error("hi") + catch exc + exc + end + """) == ErrorException("hi") + + + @test JuliaLowering.include_string(test_mod, """ + try + 1 + catch + 2 + else + 3 + end + """) == 3 + + @test JuliaLowering.include_string(test_mod, """ + try + error("hi") + 1 + catch + 2 + else + 3 + end + """) == 2 + + @test JuliaLowering.include_string(test_mod, """ + begin + function f() + try + return 1 + catch + end + return 2 + end + f() + end + """) == 1 + + @test JuliaLowering.include_string(test_mod, """ + begin + function f() + try + return 1 + catch + end + end + f() + end + """) == 1 + +end + +@testset "value position" begin + + @test JuliaLowering.include_string(test_mod, """ + let + x = try + 1 + catch + 2 + end + x + end + """) == 1 + + @test JuliaLowering.include_string(test_mod, """ + let + x = try + error("hi") + 1 + catch + 2 + end + x + end + """) == 2 + + @test JuliaLowering.include_string(test_mod, """ + let + x = try + error("hi") + catch exc + exc + end + x + end + """) == ErrorException("hi") + + + @test JuliaLowering.include_string(test_mod, """ + let + x = try + 1 + catch + 2 + else + 3 + end + x + end + """) == 3 + + @test JuliaLowering.include_string(test_mod, """ + let + x = try + error("hi") + 1 + catch + 2 + else + 3 + end + x + end + """) == 2 + +end + +@testset "not value/tail position" begin + + @test JuliaLowering.include_string(test_mod, """ + let x = -1 + try + x = 1 + catch + x = 2 + end + x + end + """) == 1 + + @test JuliaLowering.include_string(test_mod, """ + let x = -1 + try + error("hi") + x = 1 + catch + x = 2 + end + x + end + """) == 2 + + @test JuliaLowering.include_string(test_mod, """ + let x = -1 + try + x = error("hi") + catch exc + x = exc + end + x + end + """) == ErrorException("hi") + + + @test JuliaLowering.include_string(test_mod, """ + let x = -1 + try + x = 1 + catch + x = 2 + else + x = 3 + end + x + end + """) == 3 + + @test JuliaLowering.include_string(test_mod, """ + let x = -1 + try + error("hi") + x = 1 + catch + x = 2 + else + x = 3 + end + x + end + """) == 2 + +end + +@testset "exception stack" begin + + @test JuliaLowering.include_string(test_mod, """ + try + try + error("hi") + catch + error("ho") + end + catch + a = [] + for x in current_exceptions() + push!(a, x.exception) + end + a + end + """) == [ErrorException("hi"), ErrorException("ho")] + +end + +@test isempty(current_exceptions()) + +test_ir_cases(joinpath(@__DIR__, "exceptions_ir.jl")) + +end diff --git a/JuliaLowering/test/functions.jl b/JuliaLowering/test/functions.jl new file mode 100644 index 0000000000000..ec35e14021cd6 --- /dev/null +++ b/JuliaLowering/test/functions.jl @@ -0,0 +1,47 @@ +@testset "Functions" begin + +test_mod = Module() + +@test JuliaLowering.include_string(test_mod, """ +begin + function f(x) + y = x + 1 + "hi", x, y + end + + f(1) +end +""") == ("hi", 1, 2) + +@test JuliaLowering.include_string(test_mod, """ +begin + function f(x)::Int + if x == 1 + return 42.0 + end + 0xff + end + (f(1), f(2)) +end +""") === (42, 255) + +Base.include_string(test_mod, +""" + struct X end + + # Erroneous `convert` to test type assert in function return values + Base.convert(::Type{X}, y) = y +""") + +@test_throws TypeError JuliaLowering.include_string(test_mod, """ +begin + function g()::X + return nothing + end + g() +end +""") + +test_ir_cases(joinpath(@__DIR__, "functions_ir.jl")) + +end diff --git a/JuliaLowering/test/functions_ir.jl b/JuliaLowering/test/functions_ir.jl new file mode 100644 index 0000000000000..2a0beb766f186 --- /dev/null +++ b/JuliaLowering/test/functions_ir.jl @@ -0,0 +1,38 @@ +######################################## +# Return types +function f(x)::Int + if x + 42.0 + end + 0xff +end +#---------- +1 (method :f) +2 core.svec +3 core.svec +4 core.Typeof +5 TestMod.f +6 (call %₄ %₅) +7 core.Any +8 (call %₃ %₆ %₇) +9 core.svec +10 (call %₉) +11 (call %₂ %₈ %₁₀ :($(QuoteNode(:(#= line 1 =#))))) +12 --- method :f %₁₁ + 1 TestMod.Int + 2 slot₂/x + 3 (gotoifnot %₂ label₄) + 4 (= slot₃/tmp 0xff) + 5 core.isa + 6 slot₃/tmp + 7 (call %₅ %₆ %₁) + 8 (gotoifnot %₇ label₁₀) + 9 (goto label₁₅) + 10 core.typeassert + 11 top.convert + 12 slot₃/tmp + 13 (call %₁₁ %₁ %₁₂) + 14 (= slot₃/tmp (call %₁₀ %₁₃ %₁)) + 15 slot₃/tmp + 16 (return %₁₅) +13 (return %₁) diff --git a/JuliaLowering/test/runtests.jl b/JuliaLowering/test/runtests.jl index 4f2901b5e488c..b5c4946f023e9 100644 --- a/JuliaLowering/test/runtests.jl +++ b/JuliaLowering/test/runtests.jl @@ -51,6 +51,13 @@ JuliaLowering.eval(test_mod, wrapscope(assign_z_2, :hard)) JuliaLowering.eval(test_mod, wrapscope(wrapscope(assign_z_2, :neutral), :soft)) @test test_mod.z == 2 +#------------------------------------------------------------------------------- +# Blocks +@test JuliaLowering.include_string(test_mod, """ +begin +end +""") == nothing + #------------------------------------------------------------------------------- # Placeholders @test JuliaLowering.include_string(test_mod, """_ = 10""") == 10 @@ -83,20 +90,6 @@ end (2,3,4), (1,2,3,4,5)) -#------------------------------------------------------------------------------- -# Functions -@test JuliaLowering.include_string(test_mod, """ -begin - function f(x) - y = x + 1 - "hi", x, y - end - - f(1) -end -""") == ("hi", 1, 2) - - #------------------------------------------------------------------------------- # module A = JuliaLowering.include_string(test_mod, """ @@ -302,6 +295,7 @@ macro A.b(ex) end """) +include("functions.jl") include("desugaring.jl") include("branching.jl") include("loops.jl") diff --git a/JuliaLowering/test/utils.jl b/JuliaLowering/test/utils.jl index ed4b32f5c1c28..7b0f8b0187726 100644 --- a/JuliaLowering/test/utils.jl +++ b/JuliaLowering/test/utils.jl @@ -108,11 +108,22 @@ function match_ir_test_case(case_str) (name=strip(m[1]), input=strip(m[2]), output=strip(m[3])) end -function format_ir_test_case(mod, input) +function format_ir_for_test(mod, input) ex = parsestmt(SyntaxTree, input) x = JuliaLowering.lower(mod, ex) - output = strip(sprint(JuliaLowering.print_ir, x)) - output = replace(output, string(mod)=>"TestMod") + ir = strip(sprint(JuliaLowering.print_ir, x)) + return replace(ir, string(mod)=>"TestMod") +end + +function format_ir_test_case(mod, input, description="-- Add description here --") + ir = format_ir_for_test(mod, input) + """ + ######################################## + # $description + $(strip(input)) + #---------- + $ir + """ end function test_ir_cases(filename) @@ -121,7 +132,7 @@ function test_ir_cases(filename) mod = Module(:TestMod) for (name,input,ref) in cases - output = format_ir_test_case(mod, input) + output = format_ir_for_test(mod, input) @testset "$name" begin if output != ref # Do our own error dumping, as @test will From 87197cb3eb6200c0cbb4e4b45a579df4c72dbb96 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Fri, 23 Aug 2024 19:00:26 +1000 Subject: [PATCH 0840/1109] Support try/finally in IR generation --- JuliaLowering/src/linear_ir.jl | 228 +++++++++++++++---- JuliaLowering/src/syntax_graph.jl | 3 + JuliaLowering/test/demo.jl | 16 +- JuliaLowering/test/exceptions.jl | 101 +++++++++ JuliaLowering/test/exceptions_ir.jl | 338 ++++++++++++++++++++++++++++ 5 files changed, 635 insertions(+), 51 deletions(-) create mode 100644 JuliaLowering/test/exceptions_ir.jl diff --git a/JuliaLowering/src/linear_ir.jl b/JuliaLowering/src/linear_ir.jl index 97290fbf4b515..ec954a3eb9c39 100644 --- a/JuliaLowering/src/linear_ir.jl +++ b/JuliaLowering/src/linear_ir.jl @@ -24,6 +24,30 @@ function is_ssa(ctx, ex) kind(ex) == K"BindingId" && lookup_binding(ctx, ex).is_ssa end +# Target to jump to, including info on try handler nesting and catch block +# nesting +struct JumpTarget{GraphType} + label::SyntaxTree{GraphType} + handler_token_stack::SyntaxList{GraphType, Vector{NodeId}} + catch_token_stack::SyntaxList{GraphType, Vector{NodeId}} +end + +function JumpTarget(label::SyntaxTree{GraphType}, ctx) where {GraphType} + JumpTarget{GraphType}(label, copy(ctx.handler_token_stack), copy(ctx.catch_token_stack)) +end + +struct FinallyHandler{GraphType} + tagvar::SyntaxTree{GraphType} + target::JumpTarget{GraphType} + exit_actions::Vector{Tuple{Symbol,Union{Nothing,SyntaxTree{GraphType}}}} +end + +function FinallyHandler(tagvar::SyntaxTree{GraphType}, target::JumpTarget) where {GraphType} + FinallyHandler{GraphType}(tagvar, target, + Vector{Tuple{Symbol, Union{Nothing,SyntaxTree{GraphType}}}}()) +end + + """ Context for creating linear IR. @@ -38,19 +62,21 @@ struct LinearIRContext{GraphType} <: AbstractLoweringContext is_toplevel_thunk::Bool lambda_locals::Set{IdTag} return_type::Union{Nothing, SyntaxTree{GraphType}} - break_labels::Dict{String, NodeId} + break_targets::Dict{String, JumpTarget{GraphType}} handler_token_stack::SyntaxList{GraphType, Vector{NodeId}} catch_token_stack::SyntaxList{GraphType, Vector{NodeId}} - finally_handler # ::Union{Nothing, SyntaxTree{GraphType}} + finally_handlers::Vector{FinallyHandler{GraphType}} mod::Module end function LinearIRContext(ctx, is_toplevel_thunk, lambda_locals, return_type) graph = syntax_graph(ctx) rett = isnothing(return_type) ? nothing : reparent(graph, return_type) + GraphType = typeof(graph) LinearIRContext(graph, SyntaxList(ctx), ctx.bindings, Ref(0), is_toplevel_thunk, lambda_locals, rett, - Dict{String,NodeId}(), SyntaxList(ctx), SyntaxList(ctx), nothing, ctx.mod) + Dict{String,JumpTarget{GraphType}}(), SyntaxList(ctx), SyntaxList(ctx), + Vector{FinallyHandler{GraphType}}(), ctx.mod) end # FIXME: BindingId subsumes many things so need to assess what that means for these predicates. @@ -153,6 +179,18 @@ function emit_pop_exception(ctx::LinearIRContext, srcref, dest_tokens) end end +function emit_leave_handler(ctx::LinearIRContext, srcref, dest_tokens) + src_tokens = ctx.handler_token_stack + n = length(dest_tokens) + jump_ok = n == 0 || (n <= length(src_tokens) && dest_tokens[n].var_id == src_tokens[n].var_id) + jump_ok || throw(LoweringError(srcref, "Attempt to jump into try block")) + if n < length(src_tokens) + emit(ctx, @ast ctx srcref [K"leave" src_tokens[n+1:end]]) + else + nothing + end +end + function convert_for_type_decl(ctx, srcref, ex, type, do_typeassert) if isnothing(type) return ex @@ -189,7 +227,30 @@ function convert_for_type_decl(ctx, srcref, ex, type, do_typeassert) ] end -function actually_return(ctx, ex) +function emit_jump(ctx, srcref, target::JumpTarget) + emit_pop_exception(ctx, srcref, target.catch_token_stack) + emit_leave_handler(ctx, srcref, target.handler_token_stack) + emit(ctx, @ast ctx srcref [K"goto" target.label]) +end + +# Enter the current finally block, either through the landing pad (on_exit == +# :rethrow) or via a jump (on_exit ∈ (:return, :break)). +# +# An integer tag is created to identify the current code path and select the +# on_exit action to be taken at finally handler exit. +function enter_finally_block(ctx, srcref, on_exit, value) + @assert on_exit ∈ (:rethrow, :break, :return) + handler = last(ctx.finally_handlers) + push!(handler.exit_actions, (on_exit, value)) + tag = length(handler.exit_actions) + emit(ctx, @ast ctx srcref [K"=" handler.tagvar tag::K"Integer"]) + if on_exit != :rethrow + emit_jump(ctx, srcref, handler.target) + end +end + +# Helper function for emit_return +function _actually_return(ctx, ex) # TODO: Handle the implicit return coverage hack for #53354 ? rett = ctx.return_type if !isnothing(rett) @@ -211,24 +272,27 @@ function emit_return(ctx, srcref, ex) if isnothing(ex) return elseif isempty(ctx.handler_token_stack) - actually_return(ctx, ex) + _actually_return(ctx, ex) return end # FIXME: What's this !is_ssa(ctx, ex) here about? - x = if is_simple_atom(ctx, ex) && !(is_ssa(ctx, ex) && !isnothing(ctx.finally_handler)) + x = if is_simple_atom(ctx, ex) && !(is_ssa(ctx, ex) && !isempty(ctx.finally_handlers)) ex - elseif !isnothing(ctx.finally_handler) - tmp = new_mutable_var(ctx, ex) - emit(ctx, @ast ctx ex [K"=" tmp ex]) + elseif !isempty(ctx.finally_handlers) + # TODO: Why does flisp lowering create a mutable variable here even + # though we don't mutate it? + # tmp = ssavar(ctx, srcref, "returnval_via_finally") # <- can we use this? + tmp = new_mutable_var(ctx, srcref, "returnval_via_finally") + emit(ctx, @ast ctx srcref [K"=" tmp ex]) tmp else - emit_assign_tmp(ctx, ex) + emit_assign_tmp(ctx, ex, "returnval_via_finally") end - if !isnothing(ctx.finally_handler) - TODO(ex, "Finally blocks") + if !isempty(ctx.finally_handlers) + enter_finally_block(ctx, srcref, :return, x) else - emit(ctx, @ast ctx ex [K"leave" ctx.handler_token_stack...]) - actually_return(ctx, x) + emit(ctx, @ast ctx srcref [K"leave" ctx.handler_token_stack...]) + _actually_return(ctx, x) end # Should we return `x` here? The flisp code does, but that doesn't seem # useful as any returned value cannot be used? @@ -239,6 +303,23 @@ function emit_return(ctx, ex) emit_return(ctx, ex, ex) end +function emit_break(ctx, ex) + name = ex[1].name_val + target = get(ctx.break_targets, name, nothing) + if isnothing(target) + ty = name == "loop_exit" ? "break" : "continue" + throw(LoweringError(ex, "$ty must be used inside a `while` or `for` loop")) + end + if !isempty(ctx.finally_handlers) + handler = last(ctx.finally_handlers) + if length(target.handler_token_stack) < length(handler.target.handler_token_stack) + enter_finally_block(ctx, ex, :break, ex) + return + end + end + emit_jump(ctx, ex, target) +end + function emit_assignment(ctx, srcref, lhs, rhs) if !isnothing(rhs) if is_valid_ir_rvalue(ctx, lhs, rhs) @@ -250,7 +331,7 @@ function emit_assignment(ctx, srcref, lhs, rhs) else # in unreachable code (such as after return); still emit the assignment # so that the structure of those uses is preserved - emit(ctx, rhs, K"=", lhs, nothing_(ctx, srcref)) + emit(ctx, @ast ctx srcref [K"=" lhs "nothing"::K"core"]) nothing end end @@ -362,13 +443,20 @@ end function compile_try(ctx::LinearIRContext, ex, needs_value, in_tail_pos) @chk numchildren(ex) <= 3 try_block = ex[1] - catch_block = ex[2] - else_block = numchildren(ex) == 2 ? nothing : ex[3] - finally_block = nothing # fixme + if kind(ex) == K"trycatchelse" + catch_block = ex[2] + else_block = numchildren(ex) == 2 ? nothing : ex[3] + finally_block = nothing + catch_label = make_label(ctx, catch_block) + else + catch_block = nothing + else_block = nothing + finally_block = ex[2] + catch_label = make_label(ctx, finally_block) + end - catch_label = make_label(ctx, catch_block) end_label = !in_tail_pos || !isnothing(finally_block) ? make_label(ctx, ex) : nothing - result_var = needs_value && !in_tail_pos ? new_mutable_var(ctx, ex, "result_var") : nothing + try_result = needs_value && !in_tail_pos ? new_mutable_var(ctx, ex, "try_result") : nothing # Exception handler block prefix handler_token = ssavar(ctx, ex, "handler_token") @@ -376,7 +464,16 @@ function compile_try(ctx::LinearIRContext, ex, needs_value, in_tail_pos) handler_token [K"enter" catch_label] # TODO: dynscope ]) + if !isnothing(finally_block) + # TODO: Trivial finally block optimization from JuliaLang/julia#52593 (or + # support a special form for @with)? + finally_handler = FinallyHandler(new_mutable_var(ctx, finally_block, "finally_tag"), + JumpTarget(end_label, ctx)) + push!(ctx.finally_handlers, finally_handler) + emit(ctx, @ast ctx finally_block [K"=" finally_handler.tagvar -1::K"Integer"]) + end push!(ctx.handler_token_stack, handler_token) + # Try block code. try_val = compile(ctx, try_block, needs_value, false) # Exception handler block postfix @@ -387,7 +484,7 @@ function compile_try(ctx::LinearIRContext, ex, needs_value, in_tail_pos) end else if needs_value && !isnothing(try_val) - emit_assignment(ctx, ex, result_var, try_val) + emit_assignment(ctx, ex, try_result, try_val) end emit(ctx, @ast ctx ex [K"leave" handler_token]) end @@ -402,7 +499,7 @@ function compile_try(ctx::LinearIRContext, ex, needs_value, in_tail_pos) else_val = compile(ctx, else_block, needs_value, in_tail_pos) if !in_tail_pos if needs_value && !isnothing(else_val) - emit_assignment(ctx, ex, result_var, else_val) + emit_assignment(ctx, ex, try_result, else_val) end end end @@ -410,36 +507,68 @@ function compile_try(ctx::LinearIRContext, ex, needs_value, in_tail_pos) emit(ctx, @ast ctx ex [K"goto" end_label]) end + # Catch pad # Emit either catch or finally block. A combined try/catch/finally block # was split into separate trycatchelse and tryfinally blocks earlier. - - emit(ctx, catch_label) + emit(ctx, catch_label) # <- Exceptional control flow enters here if !isnothing(finally_block) - TODO(finally_block, "finally") + # Attribute the postfix and prefix to the finally block as a whole. + srcref = finally_block + enter_finally_block(ctx, srcref, :rethrow, nothing) + emit(ctx, end_label) # <- Non-exceptional control flow enters here + pop!(ctx.finally_handlers) + compile(ctx, finally_block, false, false) + # Finally block postfix: Emit a branch for every code path which enters + # the block to dynamically decide which return/break/rethrow exit action to take + for (tag, (on_exit, value)) in Iterators.reverse(enumerate(finally_handler.exit_actions)) + next_action_label = !in_tail_pos || tag != 1 || on_exit != :return ? + make_label(ctx, srcref) : nothing + if !isnothing(next_action_label) + next_action_label = make_label(ctx, srcref) + tmp = ssavar(ctx, srcref, "do_finally_action") + emit(ctx, @ast ctx srcref [K"=" tmp + [K"call" + "==="::K"core" + finally_handler.tagvar + tag::K"Integer" + ] + ]) + emit(ctx, @ast ctx srcref [K"gotoifnot" tmp next_action_label]) + end + if on_exit === :return + emit_return(ctx, value) + elseif on_exit === :break + emit_break(ctx, value) + elseif on_exit === :rethrow + emit(ctx, @ast ctx srcref [K"call" "rethrow"::K"top"]) + else + @assert false + end + if !isnothing(next_action_label) + emit(ctx, next_action_label) + end + end else push!(ctx.catch_token_stack, handler_token) - # Exceptional control flow enters here catch_val = compile(ctx, catch_block, needs_value, in_tail_pos) - if !isnothing(result_var) && !isnothing(catch_val) - emit_assignment(ctx, ex, result_var, catch_val) + if !isnothing(try_result) && !isnothing(catch_val) + emit_assignment(ctx, ex, try_result, catch_val) end if !in_tail_pos emit(ctx, @ast ctx ex [K"pop_exception" handler_token]) emit(ctx, end_label) else - # <- pop_exception done in emit_return + # (pop_exception done in emit_return) end pop!(ctx.catch_token_stack) end - result_var + try_result end # This pass behaves like an interpreter on the given code. # To perform stateful operations, it calls `emit` to record that something # needs to be done. In value position, it returns an expression computing # the needed value. -# -# TODO: Is it ok to return `nothing` if we have no value in some sense? function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) k = kind(ex) if k == K"BindingId" || is_literal(k) || k == K"quote" || k == K"inert" || @@ -519,28 +648,29 @@ function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) elseif k == K"break_block" end_label = make_label(ctx, ex) name = ex[1].name_val - outer_label = get(ctx.break_labels, name, nothing) - ctx.break_labels[name] = end_label._id + outer_target = get(ctx.break_targets, name, nothing) + ctx.break_targets[name] = JumpTarget(end_label, ctx) compile(ctx, ex[2], false, false) - if isnothing(outer_label) - delete!(ctx.break_labels, name) + if isnothing(outer_target) + delete!(ctx.break_targets, name) else - ctx.break_labels = outer_label + ctx.break_targets = outer_target end emit(ctx, end_label) if needs_value compile(ctx, nothing_(ctx, ex), needs_value, in_tail_pos) end elseif k == K"break" - name = ex[1].name_val - label_id = get(ctx.break_labels, name, nothing) - if isnothing(label_id) - ty = name == "loop_exit" ? "break" : "continue" - throw(LoweringError(ex, "$ty must be used inside a `while` or `for` loop")) - end - label = SyntaxTree(ctx.graph, label_id) - # TODO: try/finally handling - emit(ctx, @ast ctx ex [K"goto" label]) + emit_break(ctx, ex) + #elseif k == K"symbolic_goto" + # target = get(ctx.symbolic_jump_targets, ex.name_val, nothing) + # if isnothing(target) + # push!(ctx.symbolic_jump_origins, IRInsertionPoint(ctx)) + # else + # emit_jump( + #elseif k == K"symbolic_label" + # label = emit_label(ctx, ex) + # push!(ctx.symbolic_jump_targets, JumpTarget(label, ctx)) elseif k == K"return" compile(ctx, ex[1], true, true) nothing @@ -592,7 +722,7 @@ function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) end val end - elseif k == K"trycatchelse" # || k == K"tryfinally" + elseif k == K"trycatchelse" || k == K"tryfinally" compile_try(ctx, ex, needs_value, in_tail_pos) elseif k == K"method" # TODO @@ -795,7 +925,9 @@ function linearize_ir(ctx, ex) # required to call reparent() ... _ctx = LinearIRContext(graph, SyntaxList(graph), ctx.bindings, Ref(0), false, Set{IdTag}(), nothing, - Dict{String,NodeId}(), SyntaxList(graph), SyntaxList(graph), nothing, ctx.mod) + Dict{String,JumpTarget{typeof(graph)}}(), + SyntaxList(graph), SyntaxList(graph), + Vector{FinallyHandler{typeof(graph)}}(), ctx.mod) res = compile_lambda(_ctx, reparent(_ctx, ex)) setattr!(graph, res._id, bindings=ctx.bindings) _ctx, res diff --git a/JuliaLowering/src/syntax_graph.jl b/JuliaLowering/src/syntax_graph.jl index c8278373a2fb9..4b756ea2d5d52 100644 --- a/JuliaLowering/src/syntax_graph.jl +++ b/JuliaLowering/src/syntax_graph.jl @@ -588,3 +588,6 @@ function Base.pop!(v::SyntaxList) SyntaxTree(v.graph, pop!(v.ids)) end +function Base.copy(v::SyntaxList) + SyntaxList(v.graph, copy(v.ids)) +end diff --git a/JuliaLowering/test/demo.jl b/JuliaLowering/test/demo.jl index bbeecb51261cf..e3f4fbe059fce 100644 --- a/JuliaLowering/test/demo.jl +++ b/JuliaLowering/test/demo.jl @@ -337,8 +337,18 @@ end """ src = """ -try -catch +let x = 10 + global a = [] + try + try + return 100 + finally + push!(a, 1) + end + finally + push!(a, 2) + end + x end """ @@ -359,7 +369,7 @@ ctx3, ex_scoped = JuliaLowering.resolve_scopes(ctx2, ex_desugar) @info "Resolved scopes" ex_scoped formatsrc(ex_scoped, color_by=:var_id) ctx4, ex_compiled = JuliaLowering.linearize_ir(ctx3, ex_scoped) -@info "Linear IR" ex_compiled formatsrc(ex_compiled, color_by=:var_id) +@info "Linear IR" ex_compiled formatsrc(ex_compiled, color_by=:var_id) Text(sprint(JuliaLowering.print_ir, ex_compiled)) ex_expr = JuliaLowering.to_lowered_expr(in_mod, ctx4.bindings, ex_compiled) @info "CodeInfo" ex_expr diff --git a/JuliaLowering/test/exceptions.jl b/JuliaLowering/test/exceptions.jl index ca10f7047b6b1..773580b882fcc 100644 --- a/JuliaLowering/test/exceptions.jl +++ b/JuliaLowering/test/exceptions.jl @@ -78,6 +78,32 @@ test_mod = Module() end """) == 1 + @test JuliaLowering.include_string(test_mod, """ + let x = -1 + while true + try + error("hi") + catch + x = 2 + break + end + end + x + end + """) == 2 + + @test JuliaLowering.include_string(test_mod, """ + let x = -1 + while true + try + x = 2 + break + catch + end + end + x + end + """) == 2 end @testset "value position" begin @@ -237,3 +263,78 @@ end test_ir_cases(joinpath(@__DIR__, "exceptions_ir.jl")) end + +#------------------------------------------------------------------------------- +@testset "try/finally" begin + +test_mod = Module() + +@test JuliaLowering.include_string(test_mod, """ +let x = -1 + try + x = 1 + finally + x = 2 + end + x +end +""") == 2 + +@test JuliaLowering.include_string(test_mod, """ +let x = -1 + try + try + error("hi") + x = 1 + finally + x = 2 + end + catch + end + x +end +""") == 2 + +JuliaLowering.include_string(test_mod, """ +begin + function nested_finally(a, x, b, c) + try + try + if x + return b + end + c + finally + push!(a, 1) + end + finally + push!(a, 2) + end + end +end +""") +@test (a = []; res = test_mod.nested_finally(a, true, 100, 200); (a, res)) == ([1,2], 100) +@test (a = []; res = test_mod.nested_finally(a, false, 100, 200); (a, res)) == ([1,2], 200) + +@test JuliaLowering.include_string(test_mod, """ +try + 1 +catch + 2 +finally + 3 +end +""") == 1 + +@test JuliaLowering.include_string(test_mod, """ +try + error("hi") + 1 +catch + 2 +finally + 3 +end +""") == 2 + +end diff --git a/JuliaLowering/test/exceptions_ir.jl b/JuliaLowering/test/exceptions_ir.jl new file mode 100644 index 0000000000000..121e9033f1db4 --- /dev/null +++ b/JuliaLowering/test/exceptions_ir.jl @@ -0,0 +1,338 @@ +######################################## +# Return from inside try/catch +try + f + return x +catch + g + return y +end +#---------- +1 (enter label₆) +2 TestMod.f +3 TestMod.x +4 (leave %₁) +5 (return %₃) +6 TestMod.g +7 TestMod.y +8 (pop_exception %₁) +9 (return %₇) + +######################################## +# Return from inside try/catch with simple return vals +try + f + return 10 +catch + g + return 20 +end +#---------- +1 (enter label₅) +2 TestMod.f +3 (leave %₁) +4 (return 10) +5 TestMod.g +6 (pop_exception %₁) +7 (return 20) + +######################################## +# Return from multiple try + try/catch +try + try + return 10 + catch + return 20 + end +catch +end +#---------- +1 (enter label₁₄) +2 (enter label₇) +3 (leave %₁ %₂) +4 (return 10) +5 (leave %₂) +6 (goto label₁₁) +7 (leave %₁) +8 (pop_exception %₂) +9 (return 20) +10 (pop_exception %₂) +11 slot₁/try_result +12 (leave %₁) +13 (return %₁₁) +14 (pop_exception %₁) +15 (return core.nothing) + +######################################## +# Return from multiple catch + try/catch +try +catch + try + return 10 + catch + return 20 + end +end +#---------- +1 (enter label₄) +2 (leave %₁) +3 (return core.nothing) +4 (enter label₈) +5 (leave %₄) +6 (pop_exception %₁) +7 (return 10) +8 (pop_exception %₁) +9 (return 20) + +######################################## +# try/catch/else, tail position +try + a +catch + b +else + c +end +#---------- +1 (enter label₆) +2 TestMod.a +3 (leave %₁) +4 TestMod.c +5 (return %₄) +6 TestMod.b +7 (pop_exception %₁) +8 (return %₆) + +######################################## +# try/catch/else, value position +begin + z = try + a + catch + b + else + c + end +end +#---------- +1 (enter label₇) +2 TestMod.a +3 (leave %₁) +4 TestMod.c +5 (= slot₁/try_result %₄) +6 (goto label₁₀) +7 TestMod.b +8 (= slot₁/try_result %₇) +9 (pop_exception %₁) +10 slot₁/try_result +11 (= TestMod.z %₁₀) +12 (return %₁₀) + +######################################## +# try/catch/else, not value/tail +begin + try + a + catch + b + else + c + end + z +end +#---------- +1 (enter label₆) +2 TestMod.a +3 (leave %₁) +4 TestMod.c +5 (goto label₈) +6 TestMod.b +7 (pop_exception %₁) +8 TestMod.z +9 (return %₈) + +######################################## +# basic try/finally, tail position +try + a +finally + b +end +#---------- +1 (enter label₇) +2 (= slot₁/finally_tag -1) +3 (= slot₂/returnval_via_finally TestMod.a) +4 (= slot₁/finally_tag 1) +5 (leave %₁) +6 (goto label₈) +7 (= slot₁/finally_tag 2) +8 TestMod.b +9 (call core.=== slot₁/finally_tag 2) +10 (gotoifnot %₉ label₁₂) +11 (call top.rethrow) +12 slot₂/returnval_via_finally +13 (return %₁₂) + +######################################## +# basic try/finally, value position +begin + z = try + a + finally + b + end +end +#---------- +1 (enter label₇) +2 (= slot₂/finally_tag -1) +3 TestMod.a +4 (= slot₁/try_result %₃) +5 (leave %₁) +6 (goto label₈) +7 (= slot₂/finally_tag 1) +8 TestMod.b +9 (call core.=== slot₂/finally_tag 1) +10 (gotoifnot %₉ label₁₂) +11 (call top.rethrow) +12 slot₁/try_result +13 (= TestMod.z %₁₂) +14 (return %₁₂) + +######################################## +# basic try/finally, not value/tail +begin + try + a + finally + b + end + z +end +#---------- +1 (enter label₆) +2 (= slot₁/finally_tag -1) +3 TestMod.a +4 (leave %₁) +5 (goto label₇) +6 (= slot₁/finally_tag 1) +7 TestMod.b +8 (call core.=== slot₁/finally_tag 1) +9 (gotoifnot %₈ label₁₁) +10 (call top.rethrow) +11 TestMod.z +12 (return %₁₁) + +######################################## +# try/finally + break +while true + try + a + break + finally + b + end +end +#---------- +1 (gotoifnot true label₁₅) +2 (enter label₉) +3 (= slot₁/finally_tag -1) +4 TestMod.a +5 (leave %₂) +6 (goto label₁₅) +7 (leave %₂) +8 (goto label₁₀) +9 (= slot₁/finally_tag 1) +10 TestMod.b +11 (call core.=== slot₁/finally_tag 1) +12 (gotoifnot %₁₁ label₁₄) +13 (call top.rethrow) +14 (goto label₁) +15 (return core.nothing) + +######################################## +# try/catch/finally +try + a +catch + b +finally + c +end +#---------- +1 (enter label₁₅) +2 (= slot₁/finally_tag -1) +3 (enter label₈) +4 TestMod.a +5 (= slot₂/try_result %₄) +6 (leave %₃) +7 (goto label₁₁) +8 TestMod.b +9 (= slot₂/try_result %₈) +10 (pop_exception %₃) +11 (= slot₃/returnval_via_finally slot₂/try_result) +12 (= slot₁/finally_tag 1) +13 (leave %₁) +14 (goto label₁₆) +15 (= slot₁/finally_tag 2) +16 TestMod.c +17 (call core.=== slot₁/finally_tag 2) +18 (gotoifnot %₁₇ label₂₀) +19 (call top.rethrow) +20 slot₃/returnval_via_finally +21 (return %₂₀) + +######################################## +# Nested finally blocks +try + try + if x + return a + end + b + finally + c + end +finally + d +end +#---------- +1 (enter label₃₀) +2 (= slot₁/finally_tag -1) +3 (enter label₁₅) +4 (= slot₃/finally_tag -1) +5 TestMod.x +6 (gotoifnot %₅ label₁₁) +7 (= slot₄/returnval_via_finally TestMod.a) +8 (= slot₃/finally_tag 1) +9 (leave %₃) +10 (goto label₁₆) +11 TestMod.b +12 (= slot₂/try_result %₁₁) +13 (leave %₃) +14 (goto label₁₆) +15 (= slot₃/finally_tag 2) +16 TestMod.c +17 (call core.=== slot₃/finally_tag 2) +18 (gotoifnot %₁₇ label₂₀) +19 (call top.rethrow) +20 (call core.=== slot₃/finally_tag 1) +21 (gotoifnot %₂₀ label₂₆) +22 (= slot₅/returnval_via_finally slot₄/returnval_via_finally) +23 (= slot₁/finally_tag 1) +24 (leave %₁) +25 (goto label₃₁) +26 (= slot₆/returnval_via_finally slot₂/try_result) +27 (= slot₁/finally_tag 2) +28 (leave %₁) +29 (goto label₃₁) +30 (= slot₁/finally_tag 3) +31 TestMod.d +32 (call core.=== slot₁/finally_tag 3) +33 (gotoifnot %₃₂ label₃₅) +34 (call top.rethrow) +35 (call core.=== slot₁/finally_tag 2) +36 (gotoifnot %₃₅ label₃₉) +37 slot₆/returnval_via_finally +38 (return %₃₇) +39 slot₅/returnval_via_finally +40 (return %₃₉) From 096d137400b8d725da14e90cdef8b471e1bac529 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Mon, 26 Aug 2024 14:02:11 +1000 Subject: [PATCH 0841/1109] Improve scope assertions and make `module` illegal in local scope See also https://github.com/JuliaLang/julia/issues/54092 --- JuliaLowering/src/desugaring.jl | 31 +++-- JuliaLowering/src/linear_ir.jl | 7 -- JuliaLowering/src/scope_analysis.jl | 55 +++++---- JuliaLowering/test/demo.jl | 18 +++ JuliaLowering/test/macros.jl | 170 ++++++++++++++++++++++++++ JuliaLowering/test/modules.jl | 48 ++++++++ JuliaLowering/test/runtests.jl | 179 +--------------------------- 7 files changed, 288 insertions(+), 220 deletions(-) create mode 100644 JuliaLowering/test/macros.jl create mode 100644 JuliaLowering/test/modules.jl diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 1b578d9c48561..8a577d9725219 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -808,15 +808,20 @@ function expand_module(ctx::DesugaringContext, ex::SyntaxTree) body = ex[2] @chk kind(body) == K"block" - @ast ctx ex [ - K"call" - eval_module ::K"Value" - ctx.mod ::K"Value" - modname ::K"String" - [K"inert"(body) - [K"toplevel" - std_defs - children(body)... + @ast ctx ex [K"block" + [K"assert" + "global_toplevel_only"::K"Symbol" + [K"inert" ex] + ] + [K"call" + eval_module ::K"Value" + ctx.mod ::K"Value" + modname ::K"String" + [K"inert"(body) + [K"toplevel" + std_defs + children(body)... + ] ] ] ] @@ -870,7 +875,13 @@ function expand_forms_2(ctx::DesugaringContext, ex::SyntaxTree, docs=nothing) elseif k == K"function" expand_forms_2(ctx, expand_function_def(ctx, ex, docs)) elseif k == K"macro" - expand_forms_2(ctx, expand_macro_def(ctx, ex)) + @ast ctx ex [K"block" + [K"assert" + "global_toplevel_only"::K"Symbol" + [K"inert" ex] + ] + expand_forms_2(ctx, expand_macro_def(ctx, ex)) + ] elseif k == K"if" || k == K"elseif" @chk numchildren(ex) >= 2 @ast ctx ex [k diff --git a/JuliaLowering/src/linear_ir.jl b/JuliaLowering/src/linear_ir.jl index ec954a3eb9c39..e5899fd286e44 100644 --- a/JuliaLowering/src/linear_ir.jl +++ b/JuliaLowering/src/linear_ir.jl @@ -589,13 +589,6 @@ function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) end nothing end - elseif k == K"assert" - # Elide these - they're no longer required. - # TODO: Elide in scope_analysis instead? - if needs_value - throw(LoweringError(ex, "misplaced semantic assertion")) - end - nothing elseif k == K"call" # TODO k ∈ splatnew foreigncall cfunction new_opaque_closure cglobal args = compile_args(ctx, children(ex)) diff --git a/JuliaLowering/src/scope_analysis.jl b/JuliaLowering/src/scope_analysis.jl index a2587782ae1a9..d1d2c792ce066 100644 --- a/JuliaLowering/src/scope_analysis.jl +++ b/JuliaLowering/src/scope_analysis.jl @@ -45,7 +45,7 @@ struct NameKey end #------------------------------------------------------------------------------- -function _find_scope_vars!(assignments, locals, globals, used_names, used_bindings, required_locals, ex) +function _find_scope_vars!(assignments, locals, globals, used_names, used_bindings, ex) k = kind(ex) if k == K"Identifier" push!(used_names, NameKey(ex)) @@ -58,20 +58,16 @@ function _find_scope_vars!(assignments, locals, globals, used_names, used_bindin get!(locals, NameKey(ex[1]), ex) elseif k == K"global" get!(globals, NameKey(ex[1]), ex) - elseif is_assertion(ex, "require_existing_locals") - for v in ex[2:end] - get!(required_locals, NameKey(v), v) - end # elseif k == K"method" TODO static parameters elseif k == K"=" v = decl_var(ex[1]) if !(kind(v) in KSet"BindingId globalref outerref Placeholder") get!(assignments, NameKey(v), v) end - _find_scope_vars!(assignments, locals, globals, used_names, used_bindings, required_locals, ex[2]) + _find_scope_vars!(assignments, locals, globals, used_names, used_bindings, ex[2]) else for e in children(ex) - _find_scope_vars!(assignments, locals, globals, used_names, used_bindings, required_locals, e) + _find_scope_vars!(assignments, locals, globals, used_names, used_bindings, e) end end end @@ -87,9 +83,8 @@ function find_scope_vars(ex) globals = Dict{NameKey,ExT}() used_names = Set{NameKey}() used_bindings = Set{IdTag}() - required_locals = Dict{NameKey,ExT}() for e in children(ex) - _find_scope_vars!(assignments, locals, globals, used_names, used_bindings, required_locals, e) + _find_scope_vars!(assignments, locals, globals, used_names, used_bindings, e) end # Sort by key so that id generation is deterministic @@ -98,9 +93,8 @@ function find_scope_vars(ex) globals = sort(collect(pairs(globals)), by=first) used_names = sort(collect(used_names)) used_bindings = sort(collect(used_bindings)) - required_locals = sort(collect(pairs(required_locals)), by=first) - return assignments, locals, globals, used_names, used_bindings, required_locals + return assignments, locals, globals, used_names, used_bindings end function Base.isless(a::NameKey, b::NameKey) @@ -120,6 +114,8 @@ function NameKey(ex::SyntaxTree) end struct ScopeInfo + # True if scope is the global top level scope + is_toplevel_global_scope::Bool # True if scope is part of top level code, or a non-lambda scope nested # inside top level code. Thus requiring special scope resolution rules. in_toplevel_thunk::Bool @@ -198,10 +194,11 @@ end function analyze_scope(ctx, ex, scope_type, lambda_info) parentscope = isempty(ctx.scope_stack) ? nothing : ctx.scope_stack[end] is_outer_lambda_scope = kind(ex) == K"lambda" - is_toplevel = !isnothing(lambda_info) && lambda_info.is_toplevel_thunk - in_toplevel_thunk = is_toplevel || (!is_outer_lambda_scope && parentscope.in_toplevel_thunk) + is_toplevel_global_scope = !isnothing(lambda_info) && lambda_info.is_toplevel_thunk + in_toplevel_thunk = is_toplevel_global_scope || + (!is_outer_lambda_scope && parentscope.in_toplevel_thunk) - assignments, locals, globals, used, used_bindings, required_locals = find_scope_vars(ex) + assignments, locals, globals, used, used_bindings = find_scope_vars(ex) # Create new lookup table for variables in this scope which differ from the # parent scope. @@ -253,7 +250,7 @@ function analyze_scope(ctx, ex, scope_type, lambda_info) end # Compute implicit locals and globals - if is_toplevel + if is_toplevel_global_scope is_hard_scope = false is_soft_scope = false @@ -318,16 +315,6 @@ function analyze_scope(ctx, ex, scope_type, lambda_info) end end - # Check that any required locals are present - for (varkey,e) in required_locals - vk = haskey(var_ids, varkey) ? - lookup_binding(ctx, var_ids[varkey]).kind : - var_kind(ctx, varkey, true) - if vk !== :local - throw(LoweringError(e, "`outer` annotations must match with a local variable in an outer scope but no such variable was found")) - end - end - lambda_locals = is_outer_lambda_scope ? Set{IdTag}() : parentscope.lambda_locals for id in values(var_ids) vk = var_kind(ctx, id) @@ -342,7 +329,8 @@ function analyze_scope(ctx, ex, scope_type, lambda_info) end end - return ScopeInfo(in_toplevel_thunk, is_soft_scope, is_hard_scope, var_ids, lambda_locals) + return ScopeInfo(is_toplevel_global_scope, in_toplevel_thunk, is_soft_scope, + is_hard_scope, var_ids, lambda_locals) end function _resolve_scopes(ctx, ex::SyntaxTree) @@ -383,6 +371,21 @@ function _resolve_scopes(ctx, ex::SyntaxTree) body pop!(ctx.scope_stack) @ast ctx ex [K"block" body...] + elseif k == K"assert" + if is_assertion(ex, "require_existing_locals") + for v in ex[2:end] + vk = var_kind(ctx, NameKey(v)) + if vk !== :local + throw(LoweringError(v, "`outer` annotations must match with a local variable in an outer scope but no such variable was found")) + end + end + elseif is_assertion(ex, "global_toplevel_only") + if !ctx.scope_stack[end].is_toplevel_global_scope + e = ex[2][1] + throw(LoweringError(e, "$(kind(e)) is only allowed in global scope")) + end + end + @ast ctx ex [K"unnecessary"] # TODO: Is there a better way to delete this? else mapchildren(e->_resolve_scopes(ctx, e), ctx, ex) end diff --git a/JuliaLowering/test/demo.jl b/JuliaLowering/test/demo.jl index e3f4fbe059fce..c208c186ccd79 100644 --- a/JuliaLowering/test/demo.jl +++ b/JuliaLowering/test/demo.jl @@ -352,6 +352,24 @@ let x = 10 end """ +src = """ +let + for outer i = 1:2 + body + end +end +""" + +src = """ +begin + yy = 200 + module A + import ..yy + x = yy + end +end +""" + ex = parsestmt(SyntaxTree, src, filename="foo.jl") ex = ensure_attributes(ex, var_id=Int) #ex = softscope_test(ex) diff --git a/JuliaLowering/test/macros.jl b/JuliaLowering/test/macros.jl new file mode 100644 index 0000000000000..a95521e2347fe --- /dev/null +++ b/JuliaLowering/test/macros.jl @@ -0,0 +1,170 @@ +@testset "macros" begin + +test_mod = Module() + +JuliaLowering.include_string(test_mod, """ +module M + using JuliaLowering: JuliaLowering, @ast, @chk, adopt_scope + using JuliaSyntax + + # Introspection + macro __MODULE__() + __context__.scope_layer.mod + end + + macro __FILE__() + JuliaLowering.filename(__context__.macroname) + end + + macro __LINE__() + JuliaLowering.source_location(__context__.macroname)[1] + end + + someglobal = "global in module M" + + # Macro with local variables + macro foo(ex) + :(begin + x = "`x` from @foo" + (x, someglobal, \$ex) + end) + end + + # Set `a_global` in M + macro set_a_global(val) + :(begin + global a_global = \$val + end) + end + + macro set_other_global(ex, val) + :(begin + global \$ex = \$val + end) + end + + macro set_global_in_parent(ex) + e1 = adopt_scope(:(sym_introduced_from_M), __context__) + quote + \$e1 = \$ex + nothing + end + end + + macro inner() + :(2) + end + + macro outer() + :((1, @inner)) + end + + # # Recursive macro call + # # TODO: Need branching! + # macro recursive(N) + # Nval = N.value #::Int + # if Nval < 1 + # return N + # end + # quote + # x = \$N + # (@recursive \$(Nval-1), x) + # end + # end +end +""") + +@test JuliaLowering.include_string(test_mod, """ +let + x = "`x` from outer scope" + M.@foo x +end +""") == ("`x` from @foo", "global in module M", "`x` from outer scope") +@test !isdefined(test_mod.M, :x) + +@test JuliaLowering.include_string(test_mod, """ +#line1 +(M.@__MODULE__(), M.@__FILE__(), M.@__LINE__()) +""", "foo.jl") == (test_mod, "foo.jl", 2) + +@test !isdefined(test_mod.M, :a_global) +@test JuliaLowering.include_string(test_mod, """ +begin + M.@set_a_global 42 + M.a_global +end +""") == 42 + +JuliaLowering.include_string(test_mod, """ +M.@set_global_in_parent "bent hygiene!" +""") +@test test_mod.sym_introduced_from_M == "bent hygiene!" + +JuliaLowering.include_string(test_mod, "M.@set_other_global global_in_test_mod 100") +@test !isdefined(test_mod.M, :global_in_test_mod) +@test test_mod.global_in_test_mod == 100 + +Base.eval(test_mod.M, :( +# Recursive macro call +function var"@recursive"(mctx, N) + @chk kind(N) == K"Integer" + Nval = N.value::Int + if Nval < 1 + return N + end + @ast mctx (@HERE) [K"block" + [K"="(@HERE) + "x"::K"Identifier"(@HERE) + N + ] + [K"tuple"(@HERE) + "x"::K"Identifier"(@HERE) + [K"macrocall"(@HERE) + "@recursive"::K"Identifier" + (Nval-1)::K"Integer" + ] + ] + ] +end +)) + +@test JuliaLowering.include_string(test_mod, """ +M.@recursive 3 +""") == (3, (2, (1, 0))) + +@test let + ex = parsestmt(SyntaxTree, "M.@outer()", filename="foo.jl") + expanded = JuliaLowering.macroexpand(test_mod, ex) + sourcetext.(flattened_provenance(expanded[2])) +end == [ + "M.@outer()" + "@inner" + "2" +] + + +@test_throws LoweringError JuliaLowering.include_string(test_mod, """ +macro mmm(a; b=2) +end +""") + +@test_throws LoweringError JuliaLowering.include_string(test_mod, """ +macro A.b(ex) +end +""") + +# Macros not allowed in local scope +@test_throws LoweringError JuliaLowering.include_string(test_mod, """ +let + macro foo(ex) + end +end +""") +@test_throws LoweringError JuliaLowering.include_string(test_mod, """ +function f() + macro foo() + end +end +""") + +end diff --git a/JuliaLowering/test/modules.jl b/JuliaLowering/test/modules.jl new file mode 100644 index 0000000000000..e9e71558cf1ce --- /dev/null +++ b/JuliaLowering/test/modules.jl @@ -0,0 +1,48 @@ +@testset "JuliaLowering.jl" begin + +test_mod = Module() + +A = JuliaLowering.include_string(test_mod, """ +module A + function g() + return "hi" + end +end +""", "module_test") +@test A isa Module +@test A.g() == "hi" +@test A.include isa Function +@test A.Base === Base +@test A.eval(:(x = -1)) == -1 && A.x == -1 + +B = JuliaLowering.include_string(test_mod, """ +baremodule B +end +""", "baremodule_test") +@test B.Core === Core +@test !isdefined(B, :include) +@test !isdefined(B, :Base) + +# modules allowed in nested code in global scope +@test typeof(JuliaLowering.include_string(test_mod, """ +begin + module C + end +end +""")) == Module + +# Modules not allowed in local scope +@test_throws LoweringError JuliaLowering.include_string(test_mod, """ +let + module C + end +end +""") +@test_throws LoweringError JuliaLowering.include_string(test_mod, """ +function f() + module C + end +end +""") + +end diff --git a/JuliaLowering/test/runtests.jl b/JuliaLowering/test/runtests.jl index b5c4946f023e9..675f1e91b4fbb 100644 --- a/JuliaLowering/test/runtests.jl +++ b/JuliaLowering/test/runtests.jl @@ -90,29 +90,6 @@ end (2,3,4), (1,2,3,4,5)) -#------------------------------------------------------------------------------- -# module -A = JuliaLowering.include_string(test_mod, """ -module A - function g() - return "hi" - end -end -""", "module_test") -@test A isa Module -@test A.g() == "hi" -@test A.include isa Function -@test A.Base === Base -@test A.eval(:(x = -1)) == -1 && A.x == -1 - -B = JuliaLowering.include_string(test_mod, """ -baremodule B -end -""", "baremodule_test") -@test B.Core === Core -@test !isdefined(B, :include) -@test !isdefined(B, :Base) - #------------------------------------------------------------------------------- # using / import JuliaLowering.include_string(test_mod, """ @@ -141,161 +118,9 @@ end """) @test C.D.f === C.E.f -#------------------------------------------------------------------------------- -# Macro expansion - -JuliaLowering.include_string(test_mod, """ -module M - using JuliaLowering: JuliaLowering, @ast, @chk, adopt_scope - using JuliaSyntax - - # Introspection - macro __MODULE__() - __context__.scope_layer.mod - end - - macro __FILE__() - JuliaLowering.filename(__context__.macroname) - end - - macro __LINE__() - JuliaLowering.source_location(__context__.macroname)[1] - end - - someglobal = "global in module M" - - # Macro with local variables - macro foo(ex) - :(begin - x = "`x` from @foo" - (x, someglobal, \$ex) - end) - end - - # Set `a_global` in M - macro set_a_global(val) - :(begin - global a_global = \$val - end) - end - - macro set_other_global(ex, val) - :(begin - global \$ex = \$val - end) - end - - macro set_global_in_parent(ex) - e1 = adopt_scope(:(sym_introduced_from_M), __context__) - quote - \$e1 = \$ex - nothing - end - end - - macro inner() - :(2) - end - - macro outer() - :((1, @inner)) - end - - # # Recursive macro call - # # TODO: Need branching! - # macro recursive(N) - # Nval = N.value #::Int - # if Nval < 1 - # return N - # end - # quote - # x = \$N - # (@recursive \$(Nval-1), x) - # end - # end -end -""") - -@test JuliaLowering.include_string(test_mod, """ -let - x = "`x` from outer scope" - M.@foo x -end -""") == ("`x` from @foo", "global in module M", "`x` from outer scope") -@test !isdefined(test_mod.M, :x) - -@test JuliaLowering.include_string(test_mod, """ -#line1 -(M.@__MODULE__(), M.@__FILE__(), M.@__LINE__()) -""", "foo.jl") == (test_mod, "foo.jl", 2) - -@test !isdefined(test_mod.M, :a_global) -@test JuliaLowering.include_string(test_mod, """ -begin - M.@set_a_global 42 - M.a_global -end -""") == 42 - -JuliaLowering.include_string(test_mod, """ -M.@set_global_in_parent "bent hygiene!" -""") -@test test_mod.sym_introduced_from_M == "bent hygiene!" - -JuliaLowering.include_string(test_mod, "M.@set_other_global global_in_test_mod 100") -@test !isdefined(test_mod.M, :global_in_test_mod) -@test test_mod.global_in_test_mod == 100 - -Base.eval(test_mod.M, :( -# Recursive macro call -function var"@recursive"(mctx, N) - @chk kind(N) == K"Integer" - Nval = N.value::Int - if Nval < 1 - return N - end - @ast mctx (@HERE) [K"block" - [K"="(@HERE) - "x"::K"Identifier"(@HERE) - N - ] - [K"tuple"(@HERE) - "x"::K"Identifier"(@HERE) - [K"macrocall"(@HERE) - "@recursive"::K"Identifier" - (Nval-1)::K"Integer" - ] - ] - ] -end -)) - -@test JuliaLowering.include_string(test_mod, """ -M.@recursive 3 -""") == (3, (2, (1, 0))) - -@test let - ex = parsestmt(SyntaxTree, "M.@outer()", filename="foo.jl") - expanded = JuliaLowering.macroexpand(test_mod, ex) - sourcetext.(flattened_provenance(expanded[2])) -end == [ - "M.@outer()" - "@inner" - "2" -] - - -@test_throws LoweringError JuliaLowering.include_string(test_mod, """ -macro mmm(a; b=2) -end -""") - -@test_throws LoweringError JuliaLowering.include_string(test_mod, """ -macro A.b(ex) -end -""") - include("functions.jl") +include("macros.jl") +include("modules.jl") include("desugaring.jl") include("branching.jl") include("loops.jl") From 081b6561fdf7e29e47ce66ebd9626f4b972737d7 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Mon, 26 Aug 2024 17:06:31 +1000 Subject: [PATCH 0842/1109] Docs for mechanics of finally lowering --- JuliaLowering/README.md | 44 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/JuliaLowering/README.md b/JuliaLowering/README.md index da6703cf7ec81..afd0216f85f8d 100644 --- a/JuliaLowering/README.md +++ b/JuliaLowering/README.md @@ -300,6 +300,50 @@ exception-related state restoration which need to happen. Note also that the "handler state restoration" actually includes several pieces of runtime state including GC flags - see `jl_eh_restore_state` in the runtime for that. +### Lowering finally code paths + +When lowering `finally` blocks we want to emit the user's finally code once but +multiple code paths may traverse the finally block. For example, consider the +code + +```julia +function foo(x) + while true + try + if x == 1 + return f(x) + elseif x == 2 + g(x) + continue + else + break + end + finally + h() + end + end +end +``` + +In this situation there's four distinct code paths through the finally block: +1. `return f(x)` needs to call `val = f(x)`, leave the `try` block, run `h()` then + return `val`. +2. `continue` needs to call `h()` then jump to the start of the while loop +3. `break` needs to call `h()` then jump to the exit of the while loop +4. If an exception occurs in `f(x)` or `g(x)`, we need to call `h()` before + falling back into the while loop. + +To deal with these we create a `finally_tag` variable to dynamically track +which action to take after the finally block exits. Before jumping to the block +we set this variable to a unique integer tag identifying the incoming code +path. At the exit of the user's code (`h()` in this case) we perform the jump +appropriate to the `break`, `continue` or `return` as necessary based on the tag. + +(TODO - these are the only four cases which can occur, but, for example, +multiple `return`s create multiple tags rather than assigning to a single +variable. Collapsing these into a single case might be worth considering? But +also might be worse for type inference in some cases?) + ## Julia's existing lowering implementation ### How does macro expansion work? From 13cf8d94594e4d287c788c5255feeed84b9c0637 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Tue, 27 Aug 2024 13:37:36 +1000 Subject: [PATCH 0843/1109] Lowering of symbolic goto/label --- JuliaLowering/src/linear_ir.jl | 102 ++++++++++++++++++++++------ JuliaLowering/src/syntax_graph.jl | 1 + JuliaLowering/test/branching.jl | 54 ++++++++++++++- JuliaLowering/test/branching_ir.jl | 103 +++++++++++++++++++++++++++++ JuliaLowering/test/demo.jl | 43 ++++++++++-- JuliaLowering/test/utils.jl | 3 +- 6 files changed, 275 insertions(+), 31 deletions(-) diff --git a/JuliaLowering/src/linear_ir.jl b/JuliaLowering/src/linear_ir.jl index e5899fd286e44..8cd7b59191c80 100644 --- a/JuliaLowering/src/linear_ir.jl +++ b/JuliaLowering/src/linear_ir.jl @@ -36,6 +36,17 @@ function JumpTarget(label::SyntaxTree{GraphType}, ctx) where {GraphType} JumpTarget{GraphType}(label, copy(ctx.handler_token_stack), copy(ctx.catch_token_stack)) end +struct JumpOrigin{GraphType} + goto::SyntaxTree{GraphType} + index::Int + handler_token_stack::SyntaxList{GraphType, Vector{NodeId}} + catch_token_stack::SyntaxList{GraphType, Vector{NodeId}} +end + +function JumpOrigin(goto::SyntaxTree{GraphType}, index, ctx) where {GraphType} + JumpOrigin{GraphType}(goto, index, copy(ctx.handler_token_stack), copy(ctx.catch_token_stack)) +end + struct FinallyHandler{GraphType} tagvar::SyntaxTree{GraphType} target::JumpTarget{GraphType} @@ -66,6 +77,8 @@ struct LinearIRContext{GraphType} <: AbstractLoweringContext handler_token_stack::SyntaxList{GraphType, Vector{NodeId}} catch_token_stack::SyntaxList{GraphType, Vector{NodeId}} finally_handlers::Vector{FinallyHandler{GraphType}} + symbolic_jump_targets::Dict{String,JumpTarget{GraphType}} + symbolic_jump_origins::Vector{JumpOrigin{GraphType}} mod::Module end @@ -76,7 +89,8 @@ function LinearIRContext(ctx, is_toplevel_thunk, lambda_locals, return_type) LinearIRContext(graph, SyntaxList(ctx), ctx.bindings, Ref(0), is_toplevel_thunk, lambda_locals, rett, Dict{String,JumpTarget{GraphType}}(), SyntaxList(ctx), SyntaxList(ctx), - Vector{FinallyHandler{GraphType}}(), ctx.mod) + Vector{FinallyHandler{GraphType}}(), Dict{String,JumpTarget{GraphType}}(), + Vector{JumpOrigin{GraphType}}(), ctx.mod) end # FIXME: BindingId subsumes many things so need to assess what that means for these predicates. @@ -158,7 +172,7 @@ function emit_assign_tmp(ctx::LinearIRContext, ex, name="tmp") return tmp end -function compile_pop_exception(ctx::LinearIRContext, srcref, src_tokens, dest_tokens) +function compile_pop_exception(ctx, srcref, src_tokens, dest_tokens) # It's valid to leave the context of src_tokens for the context of # dest_tokens when src_tokens is the same or nested within dest_tokens. # It's enough to check the token on the top of the dest stack. @@ -172,6 +186,17 @@ function compile_pop_exception(ctx::LinearIRContext, srcref, src_tokens, dest_to end end +function compile_leave_handler(ctx, srcref, src_tokens, dest_tokens) + n = length(dest_tokens) + jump_ok = n == 0 || (n <= length(src_tokens) && dest_tokens[n].var_id == src_tokens[n].var_id) + jump_ok || throw(LoweringError(srcref, "Attempt to jump into try block")) + if n < length(src_tokens) + @ast ctx srcref [K"leave" src_tokens[n+1:end]] + else + nothing + end +end + function emit_pop_exception(ctx::LinearIRContext, srcref, dest_tokens) pexc = compile_pop_exception(ctx, srcref, ctx.catch_token_stack, dest_tokens) if !isnothing(pexc) @@ -180,14 +205,9 @@ function emit_pop_exception(ctx::LinearIRContext, srcref, dest_tokens) end function emit_leave_handler(ctx::LinearIRContext, srcref, dest_tokens) - src_tokens = ctx.handler_token_stack - n = length(dest_tokens) - jump_ok = n == 0 || (n <= length(src_tokens) && dest_tokens[n].var_id == src_tokens[n].var_id) - jump_ok || throw(LoweringError(srcref, "Attempt to jump into try block")) - if n < length(src_tokens) - emit(ctx, @ast ctx srcref [K"leave" src_tokens[n+1:end]]) - else - nothing + ex = compile_leave_handler(ctx, srcref, ctx.handler_token_stack, dest_tokens) + if !isnothing(ex) + emit(ctx, ex) end end @@ -655,15 +675,24 @@ function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) end elseif k == K"break" emit_break(ctx, ex) - #elseif k == K"symbolic_goto" - # target = get(ctx.symbolic_jump_targets, ex.name_val, nothing) - # if isnothing(target) - # push!(ctx.symbolic_jump_origins, IRInsertionPoint(ctx)) - # else - # emit_jump( - #elseif k == K"symbolic_label" - # label = emit_label(ctx, ex) - # push!(ctx.symbolic_jump_targets, JumpTarget(label, ctx)) + elseif k == K"symbolic_label" + label = emit_label(ctx, ex) + name = ex.name_val + if haskey(ctx.symbolic_jump_targets, name) + throw(LoweringError(ex, "Label `$name` defined multiple times")) + end + push!(ctx.symbolic_jump_targets, name=>JumpTarget(label, ctx)) + if in_tail_pos + emit_return(ctx, ex, nothing_(ctx, ex)) + elseif needs_value + throw(LoweringError(ex, "misplaced label in value position")) + end + elseif k == K"symbolic_goto" + push!(ctx.symbolic_jump_origins, JumpOrigin(ex, length(ctx.code)+1, ctx)) + emit(ctx, makeleaf(ctx, ex, K"TOMBSTONE")) # ? pop_exception + emit(ctx, makeleaf(ctx, ex, K"TOMBSTONE")) # ? leave + emit(ctx, makeleaf(ctx, ex, K"TOMBSTONE")) # ? goto + nothing elseif k == K"return" compile(ctx, ex[1], true, true) nothing @@ -854,6 +883,8 @@ function renumber_body(ctx, input_code, slot_rewrites) end elseif k == K"label" label_table[ex.id] = length(code) + 1 + elseif k == K"TOMBSTONE" + # remove statement else ex_out = ex end @@ -874,7 +905,33 @@ end # flisp: compile-body function compile_body(ctx, ex) compile(ctx, ex, true, true) - # TODO: Fix any gotos + + # Fix up any symbolic gotos. (We can't do this earlier because the goto + # might precede the label definition in unstructured control flow.) + for origin in ctx.symbolic_jump_origins + name = origin.goto.name_val + target = get(ctx.symbolic_jump_targets, name, nothing) + if isnothing(target) + throw(LoweringError(origin.goto, "label `$name` referenced but not defined")) + end + i = origin.index + pop_ex = compile_pop_exception(ctx, origin.goto, origin.catch_token_stack, + target.catch_token_stack) + if !isnothing(pop_ex) + @assert kind(ctx.code[i]) == K"TOMBSTONE" + ctx.code[i] = pop_ex + i += 1 + end + leave_ex = compile_leave_handler(ctx, origin.goto, origin.handler_token_stack, + target.handler_token_stack) + if !isnothing(leave_ex) + @assert kind(ctx.code[i]) == K"TOMBSTONE" + ctx.code[i] = leave_ex + i += 1 + end + @assert kind(ctx.code[i]) == K"TOMBSTONE" + ctx.code[i] = @ast ctx origin.goto [K"goto" target.label] + end # TODO: Filter out any newvar nodes where the arg is definitely initialized end @@ -916,11 +973,14 @@ function linearize_ir(ctx, ex) id=Int) # TODO: Cleanup needed - `_ctx` is just a dummy context here. But currently # required to call reparent() ... + GraphType = typeof(graph) _ctx = LinearIRContext(graph, SyntaxList(graph), ctx.bindings, Ref(0), false, Set{IdTag}(), nothing, Dict{String,JumpTarget{typeof(graph)}}(), SyntaxList(graph), SyntaxList(graph), - Vector{FinallyHandler{typeof(graph)}}(), ctx.mod) + Vector{FinallyHandler{GraphType}}(), + Dict{String, JumpTarget{GraphType}}(), + Vector{JumpOrigin{GraphType}}(), ctx.mod) res = compile_lambda(_ctx, reparent(_ctx, ex)) setattr!(graph, res._id, bindings=ctx.bindings) _ctx, res diff --git a/JuliaLowering/src/syntax_graph.jl b/JuliaLowering/src/syntax_graph.jl index 4b756ea2d5d52..b19f5ef6958ac 100644 --- a/JuliaLowering/src/syntax_graph.jl +++ b/JuliaLowering/src/syntax_graph.jl @@ -416,6 +416,7 @@ function _value_string(ex) k == K"globalref" ? "$(ex.mod).$(ex.name_val)" : k == K"slot" ? "slot" : k == K"symbolic_label" ? "label:$(ex.name_val)" : + k == K"symbolic_goto" ? "goto:$(ex.name_val)" : repr(get(ex, :value, nothing)) id = get(ex, :var_id, nothing) if isnothing(id) diff --git a/JuliaLowering/test/branching.jl b/JuliaLowering/test/branching.jl index 755927fdde8a3..e5a60c6e91f69 100644 --- a/JuliaLowering/test/branching.jl +++ b/JuliaLowering/test/branching.jl @@ -4,6 +4,23 @@ test_mod = Module() +Base.eval(test_mod, quote + using JuliaLowering: JuliaLowering, @ast, @chk + using JuliaSyntax +end) + +Base.eval(test_mod, quote + function var"@label"(__context__::JuliaLowering.MacroContext, ex) + @chk kind(ex) == JuliaSyntax.K"Identifier" + @ast __context__ ex ex=>JuliaSyntax.K"symbolic_label" + end + + function var"@goto"(__context__::JuliaLowering.MacroContext, ex) + @chk kind(ex) == JuliaSyntax.K"Identifier" + @ast __context__ ex ex=>JuliaSyntax.K"symbolic_goto" + end +end) + #------------------------------------------------------------------------------- @testset "Tail position" begin @@ -287,11 +304,42 @@ end end -#------------------------------------------------------------------------------- -@testset "Detailed lowering tests" begin +@testset "symbolic goto/label" begin + JuliaLowering.include_string(test_mod, """ + let + a = [] + i = 1 + @label foo + push!(a, i) + i = i + 1 + if i <= 2 + @goto foo + end + a + end + """) == [1,2] -test_ir_cases(joinpath(@__DIR__,"branching_ir.jl")) + @test_throws LoweringError JuliaLowering.include_string(test_mod, """ + begin + @goto foo + end + """) + + @test_throws LoweringError JuliaLowering.include_string(test_mod, """ + begin + @label foo + @label foo + end + """) + @test_throws LoweringError JuliaLowering.include_string(test_mod, """ + x = @label foo + """) +end + +#------------------------------------------------------------------------------- +@testset "Branching IR" begin + test_ir_cases(joinpath(@__DIR__,"branching_ir.jl"), test_mod) end end diff --git a/JuliaLowering/test/branching_ir.jl b/JuliaLowering/test/branching_ir.jl index 3d32bc88a03a6..d0c6492a4e18f 100644 --- a/JuliaLowering/test/branching_ir.jl +++ b/JuliaLowering/test/branching_ir.jl @@ -84,3 +84,106 @@ end 6 slot₄/d 7 (return %₆) 8 (return core.nothing) + +######################################## +# symbolic goto forward jump +begin + a + @goto foo + b + @label foo +end +#---------- +1 TestMod.a +2 (goto label₄) +3 TestMod.b +4 (return core.nothing) + +######################################## +# symbolic goto backward jump +begin + a + @label foo + b + @goto foo +end +#---------- +1 TestMod.a +2 TestMod.b +3 (goto label₂) + +###################################### +# Jumping out of try and catch blocks using @goto +begin + try + a + @goto lab + b + catch + c + @goto lab + d + end + @label lab +end +#---------- +1 (enter label₈) +2 TestMod.a +3 (leave %₁) +4 (goto label₁₃) +5 TestMod.b +6 (leave %₁) +7 (goto label₁₃) +8 TestMod.c +9 (pop_exception %₁) +10 (goto label₁₃) +11 TestMod.d +12 (pop_exception %₁) +13 (return core.nothing) + +######################################## +# Jumping out of nested try/catch and catch/try +begin + try + try + a + catch + b + @goto lab + c + end + catch + try + d + @goto lab + e + catch + end + end + @label lab +end +#---------- +1 (enter label₁₄) +2 (enter label₆) +3 TestMod.a +4 (leave %₂) +5 (goto label₁₂) +6 TestMod.b +7 (pop_exception %₂) +8 (leave %₁) +9 (goto label₂₄) +10 TestMod.c +11 (pop_exception %₂) +12 (leave %₁) +13 (goto label₂₄) +14 (enter label₂₂) +15 TestMod.d +16 (pop_exception %₁) +17 (leave %₁₄) +18 (goto label₂₄) +19 TestMod.e +20 (leave %₁₄) +21 (goto label₂₃) +22 (pop_exception %₁₄) +23 (pop_exception %₁) +24 (return core.nothing) diff --git a/JuliaLowering/test/demo.jl b/JuliaLowering/test/demo.jl index c208c186ccd79..9806f478d8676 100644 --- a/JuliaLowering/test/demo.jl +++ b/JuliaLowering/test/demo.jl @@ -111,6 +111,16 @@ Base.eval(M, quote @chk kind(ex) == JuliaSyntax.K"quote" @ast __context__ ex [JuliaSyntax.K"inert" ex] end + + function var"@label"(__context__::JuliaLowering.MacroContext, ex) + @chk kind(ex) == JuliaSyntax.K"Identifier" + @ast __context__ ex ex=>JuliaSyntax.K"symbolic_label" + end + + function var"@goto"(__context__::JuliaLowering.MacroContext, ex) + @chk kind(ex) == JuliaSyntax.K"Identifier" + @ast __context__ ex ex=>JuliaSyntax.K"symbolic_goto" + end end) JuliaLowering.include_string(M, """ @@ -361,15 +371,38 @@ end """ src = """ -begin - yy = 200 - module A - import ..yy - x = yy +let + i = "hi" + j = 1 + M.@label foo + try + println("i = ", i) + i = i + 1 + if i <= 2 + M.@goto foo + end + catch exc + println("Caught exception ", exc) + j = j + 1 + if j <= 2 + println("Trying again ", exc) + M.@goto foo + end end end """ +src = """ +let + M.@goto foo + M.@label foo +end +""" + +src = """ +x = M.@label foo +""" + ex = parsestmt(SyntaxTree, src, filename="foo.jl") ex = ensure_attributes(ex, var_id=Int) #ex = softscope_test(ex) diff --git a/JuliaLowering/test/utils.jl b/JuliaLowering/test/utils.jl index 7b0f8b0187726..1a0dfab7d11e4 100644 --- a/JuliaLowering/test/utils.jl +++ b/JuliaLowering/test/utils.jl @@ -126,11 +126,10 @@ function format_ir_test_case(mod, input, description="-- Add description here -- """ end -function test_ir_cases(filename) +function test_ir_cases(filename::AbstractString, mod=Module(:TestMod)) str = read(filename, String) cases = [match_ir_test_case(s) for s in split(str, r"####*") if strip(s) != ""] - mod = Module(:TestMod) for (name,input,ref) in cases output = format_ir_for_test(mod, input) @testset "$name" begin From a3f89ecbe46948962cbbb08f82325256475de13b Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Thu, 29 Aug 2024 14:44:32 +1000 Subject: [PATCH 0844/1109] Some cleanup --- JuliaLowering/src/ast.jl | 13 +++++++++---- JuliaLowering/src/desugaring.jl | 10 ++++------ JuliaLowering/src/linear_ir.jl | 10 ++++------ JuliaLowering/src/scope_analysis.jl | 2 +- JuliaLowering/test/exceptions.jl | 4 ++-- JuliaLowering/test/functions.jl | 8 ++++---- 6 files changed, 24 insertions(+), 23 deletions(-) diff --git a/JuliaLowering/src/ast.jl b/JuliaLowering/src/ast.jl index bb7746ccecd63..60f50aa18de13 100644 --- a/JuliaLowering/src/ast.jl +++ b/JuliaLowering/src/ast.jl @@ -43,7 +43,7 @@ function lookup_binding(bindings::Bindings, id::Integer) end function lookup_binding(bindings::Bindings, ex::SyntaxTree) - # TODO: @assert kind(ex) == K"BindingId" + @assert kind(ex) == K"BindingId" bindings.info[ex.var_id] end @@ -199,11 +199,17 @@ function ssavar(ctx::AbstractLoweringContext, srcref, name="tmp") makeleaf(ctx, nameref, K"BindingId", var_id=id) end +function add_lambda_local!(ctx::AbstractLoweringContext, id) + # empty - early passes don't need to record lambda locals +end + # Create a new local mutable variable function new_mutable_var(ctx::AbstractLoweringContext, srcref, name) id = new_binding(ctx.bindings, BindingInfo(name, nothing, :local, false, false)) nameref = makeleaf(ctx, srcref, K"Identifier", name_val=name) - makeleaf(ctx, nameref, K"BindingId", var_id=id) + var = makeleaf(ctx, nameref, K"BindingId", var_id=id) + add_lambda_local!(ctx, id) + var end # Assign `ex` to an SSA variable. @@ -471,8 +477,7 @@ end function is_function_def(ex) k = kind(ex) - return k == K"function" || k == K"->" || - (k == K"=" && numchildren(ex) == 2 && is_eventually_call(ex[1])) + return k == K"function" || k == K"->" end function is_valid_name(ex) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 8a577d9725219..054adb7c96f9c 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -448,16 +448,14 @@ function expand_decls(ctx, ex) stmts = SyntaxList(ctx) for binding in children(ex) kb = kind(binding) - if is_function_def(binding) - push!(stmts, makenode(ctx, binding, declkind, assigned_name(binding))) - push!(stmts, binding) - elseif is_prec_assignment(kb) + if is_prec_assignment(kb) + @chk numchildren(binding) == 2 lhs = strip_decls!(ctx, stmts, declkind, binding[1]) - push!(stmts, makenode(ctx, binding, kb, lhs, binding[2])) + push!(stmts, @ast ctx binding [kb lhs binding[2]]) elseif is_sym_decl(binding) strip_decls!(ctx, stmts, declkind, binding) else - throw(LoweringError("invalid syntax in variable declaration")) + throw(LoweringError(ex, "invalid syntax in variable declaration")) end end makenode(ctx, ex, K"block", stmts) diff --git a/JuliaLowering/src/linear_ir.jl b/JuliaLowering/src/linear_ir.jl index 8cd7b59191c80..0a74eda31b717 100644 --- a/JuliaLowering/src/linear_ir.jl +++ b/JuliaLowering/src/linear_ir.jl @@ -422,13 +422,8 @@ function compile_conditional(ctx, ex, false_label) end end -function new_mutable_var(ctx::LinearIRContext, srcref, name) - # TODO: Deduplicate this somehow with generic new_mutable_var? - id = new_binding(ctx.bindings, BindingInfo(name, nothing, :local, false, false)) - nameref = makeleaf(ctx, srcref, K"Identifier", name_val=name) - var = makeleaf(ctx, nameref, K"BindingId", var_id=id) +function add_lambda_local!(ctx::LinearIRContext, id) push!(ctx.lambda_locals, id) - var end # Lowering of exception handling must ensure that @@ -704,6 +699,9 @@ function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) else nothing end + elseif k == K"TOMBSTONE" + @chk !needs_value (ex,"TOMBSTONE encountered in value position") + nothing elseif k == K"if" || k == K"elseif" @chk numchildren(ex) <= 3 has_else = numchildren(ex) > 2 diff --git a/JuliaLowering/src/scope_analysis.jl b/JuliaLowering/src/scope_analysis.jl index d1d2c792ce066..cbd58411e2ecf 100644 --- a/JuliaLowering/src/scope_analysis.jl +++ b/JuliaLowering/src/scope_analysis.jl @@ -385,7 +385,7 @@ function _resolve_scopes(ctx, ex::SyntaxTree) throw(LoweringError(e, "$(kind(e)) is only allowed in global scope")) end end - @ast ctx ex [K"unnecessary"] # TODO: Is there a better way to delete this? + makeleaf(ctx, ex, K"TOMBSTONE") else mapchildren(e->_resolve_scopes(ctx, e), ctx, ex) end diff --git a/JuliaLowering/test/exceptions.jl b/JuliaLowering/test/exceptions.jl index 773580b882fcc..d7d3bffc5a066 100644 --- a/JuliaLowering/test/exceptions.jl +++ b/JuliaLowering/test/exceptions.jl @@ -68,13 +68,13 @@ test_mod = Module() @test JuliaLowering.include_string(test_mod, """ begin - function f() + function g() try return 1 catch end end - f() + g() end """) == 1 diff --git a/JuliaLowering/test/functions.jl b/JuliaLowering/test/functions.jl index ec35e14021cd6..7d0e210119be1 100644 --- a/JuliaLowering/test/functions.jl +++ b/JuliaLowering/test/functions.jl @@ -15,13 +15,13 @@ end @test JuliaLowering.include_string(test_mod, """ begin - function f(x)::Int + function g(x)::Int if x == 1 return 42.0 end 0xff end - (f(1), f(2)) + (g(1), g(2)) end """) === (42, 255) @@ -35,10 +35,10 @@ Base.include_string(test_mod, @test_throws TypeError JuliaLowering.include_string(test_mod, """ begin - function g()::X + function h()::X return nothing end - g() + h() end """) From 4c0dad88a5eded8cfd3b4184bdcd37e2f6f446fe Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Thu, 29 Aug 2024 17:11:11 +1000 Subject: [PATCH 0845/1109] Minor cleanup / docs in linear IR generation --- JuliaLowering/src/linear_ir.jl | 73 +++++++++++++++++++--------------- 1 file changed, 40 insertions(+), 33 deletions(-) diff --git a/JuliaLowering/src/linear_ir.jl b/JuliaLowering/src/linear_ir.jl index 0a74eda31b717..cbf89ecd9a3aa 100644 --- a/JuliaLowering/src/linear_ir.jl +++ b/JuliaLowering/src/linear_ir.jl @@ -816,6 +816,38 @@ function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) end end +# flisp: compile-body +function compile_body(ctx, ex) + compile(ctx, ex, true, true) + + # Fix up any symbolic gotos. (We can't do this earlier because the goto + # might precede the label definition in unstructured control flow.) + for origin in ctx.symbolic_jump_origins + name = origin.goto.name_val + target = get(ctx.symbolic_jump_targets, name, nothing) + if isnothing(target) + throw(LoweringError(origin.goto, "label `$name` referenced but not defined")) + end + i = origin.index + pop_ex = compile_pop_exception(ctx, origin.goto, origin.catch_token_stack, + target.catch_token_stack) + if !isnothing(pop_ex) + @assert kind(ctx.code[i]) == K"TOMBSTONE" + ctx.code[i] = pop_ex + i += 1 + end + leave_ex = compile_leave_handler(ctx, origin.goto, origin.handler_token_stack, + target.handler_token_stack) + if !isnothing(leave_ex) + @assert kind(ctx.code[i]) == K"TOMBSTONE" + ctx.code[i] = leave_ex + i += 1 + end + @assert kind(ctx.code[i]) == K"TOMBSTONE" + ctx.code[i] = @ast ctx origin.goto [K"goto" target.label] + end + # TODO: Filter out any newvar nodes where the arg is definitely initialized +end #------------------------------------------------------------------------------- @@ -900,39 +932,6 @@ function renumber_body(ctx, input_code, slot_rewrites) code end -# flisp: compile-body -function compile_body(ctx, ex) - compile(ctx, ex, true, true) - - # Fix up any symbolic gotos. (We can't do this earlier because the goto - # might precede the label definition in unstructured control flow.) - for origin in ctx.symbolic_jump_origins - name = origin.goto.name_val - target = get(ctx.symbolic_jump_targets, name, nothing) - if isnothing(target) - throw(LoweringError(origin.goto, "label `$name` referenced but not defined")) - end - i = origin.index - pop_ex = compile_pop_exception(ctx, origin.goto, origin.catch_token_stack, - target.catch_token_stack) - if !isnothing(pop_ex) - @assert kind(ctx.code[i]) == K"TOMBSTONE" - ctx.code[i] = pop_ex - i += 1 - end - leave_ex = compile_leave_handler(ctx, origin.goto, origin.handler_token_stack, - target.handler_token_stack) - if !isnothing(leave_ex) - @assert kind(ctx.code[i]) == K"TOMBSTONE" - ctx.code[i] = leave_ex - i += 1 - end - @assert kind(ctx.code[i]) == K"TOMBSTONE" - ctx.code[i] = @ast ctx origin.goto [K"goto" target.label] - end - # TODO: Filter out any newvar nodes where the arg is definitely initialized -end - function _add_slots!(slot_rewrites, bindings, ids) n = length(slot_rewrites) + 1 for id in ids @@ -963,6 +962,14 @@ function compile_lambda(outer_ctx, ex) ) end +""" +This pass converts nested ASTs in the body of a lambda into a list of +statements (ie, Julia's linear/untyped IR). + +Most of the compliexty of this pass is in lowering structured control flow (if, +loops, etc) to gotos and exception handling to enter/leave. We also convert +`K"BindingId"` into K"slot", `K"globalref"` or `K"SSAValue` as appropriate. +""" function linearize_ir(ctx, ex) graph = ensure_attributes(ctx.graph, slot_rewrites=Dict{IdTag,Int}, From 339d96362d0fe6e51402e8b196d7d4744079e508 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Thu, 29 Aug 2024 22:44:48 +1000 Subject: [PATCH 0846/1109] Make local variable declarations like `local x::T = y` work These are lowered in the closure conversion pass, so add scaffolding for the closure conversion and variable analysis passes. --- JuliaLowering/src/JuliaLowering.jl | 1 + JuliaLowering/src/ast.jl | 134 +++++++++++++--------- JuliaLowering/src/closure_conversion.jl | 144 ++++++++++++++++++++++++ JuliaLowering/src/eval.jl | 13 ++- JuliaLowering/src/linear_ir.jl | 36 ------ JuliaLowering/src/macro_expansion.jl | 5 +- JuliaLowering/src/scope_analysis.jl | 31 +++-- JuliaLowering/test/decls_ir.jl | 21 ++++ JuliaLowering/test/demo.jl | 14 ++- JuliaLowering/test/runtests.jl | 41 +++++++ 10 files changed, 331 insertions(+), 109 deletions(-) create mode 100644 JuliaLowering/src/closure_conversion.jl create mode 100644 JuliaLowering/test/decls_ir.jl diff --git a/JuliaLowering/src/JuliaLowering.jl b/JuliaLowering/src/JuliaLowering.jl index 70874709e5851..af69aeb703119 100644 --- a/JuliaLowering/src/JuliaLowering.jl +++ b/JuliaLowering/src/JuliaLowering.jl @@ -25,6 +25,7 @@ _include("utils.jl") _include("macro_expansion.jl") _include("desugaring.jl") _include("scope_analysis.jl") +_include("closure_conversion.jl") _include("linear_ir.jl") _include("runtime.jl") diff --git a/JuliaLowering/src/ast.jl b/JuliaLowering/src/ast.jl index 60f50aa18de13..34fcaf8b94c2c 100644 --- a/JuliaLowering/src/ast.jl +++ b/JuliaLowering/src/ast.jl @@ -1,3 +1,50 @@ +#------------------------------------------------------------------------------- +# @chk: Basic AST structure checking tool +# +# Check a condition involving an expression, throwing a LoweringError if it +# doesn't evaluate to true. Does some very simple pattern matching to attempt +# to extract the expression variable from the left hand side. +# +# Forms: +# @chk pred(ex) +# @chk pred(ex) msg +# @chk pred(ex) (msg_display_ex, msg) +macro chk(cond, msg=nothing) + if Meta.isexpr(msg, :tuple) + ex = msg.args[1] + msg = msg.args[2] + else + ex = cond + while true + if ex isa Symbol + break + elseif ex.head == :call + ex = ex.args[2] + elseif ex.head == :ref + ex = ex.args[1] + elseif ex.head == :. + ex = ex.args[1] + elseif ex.head in (:(==), :(in), :<, :>) + ex = ex.args[1] + else + error("Can't analyze $cond") + end + end + end + quote + ex = $(esc(ex)) + @assert ex isa SyntaxTree + ok = try + $(esc(cond)) + catch + false + end + if !ok + throw(LoweringError(ex, $(isnothing(msg) ? "expected `$cond`" : esc(msg)))) + end + end +end + #------------------------------------------------------------------------------- abstract type AbstractLoweringContext end @@ -13,6 +60,7 @@ struct BindingInfo name::String mod::Union{Nothing,Module} # Set when `kind === :global` kind::Symbol # :local :global :argument :static_parameter + type::Union{Nothing,SyntaxTree} # Type, for bindings declared like x::T = 10 is_ssa::Bool # Single assignment, defined before use is_ambiguous_local::Bool # Local, but would be global in soft scope (ie, the REPL) end @@ -38,17 +86,38 @@ function new_binding(bindings::Bindings, info::BindingInfo) return length(bindings.info) end -function lookup_binding(bindings::Bindings, id::Integer) - bindings.info[id] +function _binding_id(id::Integer) + id +end + +function _binding_id(ex::SyntaxTree) + @chk kind(ex) == K"BindingId" + ex.var_id +end + +function update_binding(bindings::Bindings, x; type=nothing) + id = _binding_id(x) + b = lookup_binding(bindings, id) + bindings.info[id] = BindingInfo( + b.name, + b.mod, + b.kind, + isnothing(type) ? b.type : type, + b.is_ssa, + b.is_ambiguous_local, + ) +end + +function lookup_binding(bindings::Bindings, x) + bindings.info[_binding_id(x)] end -function lookup_binding(bindings::Bindings, ex::SyntaxTree) - @assert kind(ex) == K"BindingId" - bindings.info[ex.var_id] +function lookup_binding(ctx::AbstractLoweringContext, x) + lookup_binding(ctx.bindings, x) end -function lookup_binding(ctx::AbstractLoweringContext, id) - lookup_binding(ctx.bindings, id) +function update_binding(ctx::AbstractLoweringContext, x; kws...) + update_binding(ctx.bindings, x; kws...) end const LayerId = Int @@ -57,53 +126,6 @@ function syntax_graph(ctx::AbstractLoweringContext) ctx.graph end -#------------------------------------------------------------------------------- -# @chk: Basic AST structure checking tool -# -# Check a condition involving an expression, throwing a LoweringError if it -# doesn't evaluate to true. Does some very simple pattern matching to attempt -# to extract the expression variable from the left hand side. -# -# Forms: -# @chk pred(ex) -# @chk pred(ex) msg -# @chk pred(ex) (msg_display_ex, msg) -macro chk(cond, msg=nothing) - if Meta.isexpr(msg, :tuple) - ex = msg.args[1] - msg = msg.args[2] - else - ex = cond - while true - if ex isa Symbol - break - elseif ex.head == :call - ex = ex.args[2] - elseif ex.head == :ref - ex = ex.args[1] - elseif ex.head == :. - ex = ex.args[1] - elseif ex.head in (:(==), :(in), :<, :>) - ex = ex.args[1] - else - error("Can't analyze $cond") - end - end - end - quote - ex = $(esc(ex)) - @assert ex isa SyntaxTree - ok = try - $(esc(cond)) - catch - false - end - if !ok - throw(LoweringError(ex, $(isnothing(msg) ? "expected `$cond`" : esc(msg)))) - end - end -end - #------------------------------------------------------------------------------- # AST creation utilities _node_id(ex::NodeId) = ex @@ -193,7 +215,7 @@ top_ref(ctx, ex, name) = makeleaf(ctx, ex, K"top", name) # Create a new SSA binding function ssavar(ctx::AbstractLoweringContext, srcref, name="tmp") # TODO: Store this name in only one place? Probably use the provenance chain? - id = new_binding(ctx.bindings, BindingInfo(name, nothing, :local, true, false)) + id = new_binding(ctx.bindings, BindingInfo(name, nothing, :local, nothing, true, false)) # Create an identifier nameref = makeleaf(ctx, srcref, K"Identifier", name_val=name) makeleaf(ctx, nameref, K"BindingId", var_id=id) @@ -205,7 +227,7 @@ end # Create a new local mutable variable function new_mutable_var(ctx::AbstractLoweringContext, srcref, name) - id = new_binding(ctx.bindings, BindingInfo(name, nothing, :local, false, false)) + id = new_binding(ctx.bindings, BindingInfo(name, nothing, :local, nothing, false, false)) nameref = makeleaf(ctx, srcref, K"Identifier", name_val=name) var = makeleaf(ctx, nameref, K"BindingId", var_id=id) add_lambda_local!(ctx, id) diff --git a/JuliaLowering/src/closure_conversion.jl b/JuliaLowering/src/closure_conversion.jl new file mode 100644 index 0000000000000..b676b40b75f18 --- /dev/null +++ b/JuliaLowering/src/closure_conversion.jl @@ -0,0 +1,144 @@ +struct ClosureConversionCtx{GraphType} <: AbstractLoweringContext + graph::GraphType + bindings::Bindings + mod::Module + lambda_locals::Set{IdTag} +end + +function add_lambda_local!(ctx::ClosureConversionCtx, id) + push!(ctx.lambda_locals, id) +end + +function convert_for_type_decl(ctx, srcref, ex, type, do_typeassert) + # Require that the caller make `type` "simple", for now (can generalize + # later if necessary) + kt = kind(type) + @assert (kt == K"Identifier" || kt == K"BindingId" || is_literal(kt)) + # Use a slot to permit union-splitting this in inference + tmp = new_mutable_var(ctx, srcref, "tmp") + + @ast ctx srcref [K"block" + # [K"local_def" tmp] + # [K"=" type_ssa renumber_assigned_ssavalues(type)] + [K"=" tmp ex] + [K"if" + [K"call" "isa"::K"core" tmp type] + "nothing"::K"core" + [K"=" + tmp + if do_typeassert + [K"call" + "typeassert"::K"core" + [K"call" "convert"::K"top" type tmp] + type + ] + else + [K"call" "convert"::K"top" type tmp] + end + ] + ] + tmp + ] +end + +# Convert assignment to a closed variable to a `setfield!` call and generate +# `convert` calls for variables with declared types. +# +# When doing this, the original value needs to be preserved, to ensure the +# expression `a=b` always returns exactly `b`. +function convert_assignment(ctx, ex) + var = ex[1] + rhs0 = _convert_closures(ctx, ex[2]) + if kind(var) == K"Placeholder" + return @ast ctx ex [K"=" var rhs0] + end + @chk kind(var) == K"BindingId" + binfo = lookup_binding(ctx, var) + if binfo.kind == :global + # TODO: convert-global-assignment + if !isnothing(binfo.type) + TODO(ex, "Typed global assignment??") + end + @ast ctx ex [K"=" var rhs0] + else + closed = false # TODO + captured = false # TODO + @assert binfo.kind == :local + if isnothing(binfo.type) && !closed && !captured + @ast ctx ex [K"=" var rhs0] + else + @assert binfo.kind == :local + # Typed local + tmp_rhs0 = is_simple_atom(ctx, rhs0) || kind(rhs0) == K"the_exception" ? + nothing : ssavar(ctx, rhs0) + rhs1 = isnothing(tmp_rhs0) ? rhs0 : tmp_rhs0 + rhs = isnothing(binfo.type) ? rhs1 : + convert_for_type_decl(ctx, ex, rhs1, _convert_closures(ctx, binfo.type), true) + assgn = if closed + @assert false # TODO + elseif captured + @assert false # TODO + else + @ast ctx ex [K"=" var rhs] + end + if isnothing(tmp_rhs0) + @ast ctx ex [K"block" + assgn + rhs0 + ] + else + @ast ctx ex [K"block" + [K"=" tmp_rhs0 rhs0] + tmp_rhs0 + ] + end + end + end +end + +function _convert_closures(ctx::ClosureConversionCtx, ex) + k = kind(ex) + if k == K"BindingId" + # TODO: Captures etc + ex + elseif is_leaf(ex) || k == K"inert" + ex + elseif k == K"=" + convert_assignment(ctx, ex) + elseif k == K"decl" + binfo = lookup_binding(ctx, ex[1]) + if binfo.kind == :local + makeleaf(ctx, ex, K"TOMBSTONE") + else + # Remaining `decl` expressions are type assertions if the argument is global + # (TODO: Maybe we should remove the useless ones in + # analyze_variables() pass, or convert to `::`??) + TODO(ex, "global variables with type assertions") + end + elseif k == K"lambda" + ctx2 = ClosureConversionCtx(ctx.graph, ctx.bindings, ctx.mod, ex.lambda_locals) + mapchildren(e->_convert_closures(ctx2, e), ctx2, ex) + else + mapchildren(e->_convert_closures(ctx, e), ctx, ex) + end +end + + +""" +Closure conversion and lowering of bindings + +This pass does a few things things: +* Deal with typed variables (K"decl") and their assignments +* Deal with global assignments +* Convert closures into types + +Invariants: +* This pass must not introduce new K"Identifier" - only K"BindingId". +* Any new binding IDs must be added to the enclosing lambda locals +""" +function convert_closures(ctx::ScopeResolutionContext, ex) + @assert kind(ex) == K"lambda" + ctx = ClosureConversionCtx(ctx.graph, ctx.bindings, ctx.mod, ex.lambda_locals) + ex1 = _convert_closures(ctx, ex) + ctx, ex1 +end diff --git a/JuliaLowering/src/eval.jl b/JuliaLowering/src/eval.jl index 43f0b7400bba6..b905037dd8055 100644 --- a/JuliaLowering/src/eval.jl +++ b/JuliaLowering/src/eval.jl @@ -1,9 +1,10 @@ -function lower(mod::Module, ex) - ctx1, ex1 = expand_forms_1(mod, ex) - ctx2, ex2 = expand_forms_2(ctx1, ex1) - ctx3, ex3 = resolve_scopes(ctx2, ex2) - ctx4, ex4 = linearize_ir(ctx3, ex3) - ex4 +function lower(mod::Module, ex0) + ctx1, ex1 = expand_forms_1( mod, ex0) + ctx2, ex2 = expand_forms_2( ctx1, ex1) + ctx3, ex3 = resolve_scopes( ctx2, ex2) + ctx4, ex4 = convert_closures(ctx3, ex3) + ctx5, ex5 = linearize_ir( ctx4, ex4) + ex5 end function macroexpand(mod::Module, ex) diff --git a/JuliaLowering/src/linear_ir.jl b/JuliaLowering/src/linear_ir.jl index cbf89ecd9a3aa..677331053a366 100644 --- a/JuliaLowering/src/linear_ir.jl +++ b/JuliaLowering/src/linear_ir.jl @@ -211,42 +211,6 @@ function emit_leave_handler(ctx::LinearIRContext, srcref, dest_tokens) end end -function convert_for_type_decl(ctx, srcref, ex, type, do_typeassert) - if isnothing(type) - return ex - end - - # Require that the caller make `type` "simple", for now (can generalize - # later if necessary) - kt = kind(type) - @assert (kt == K"Identifier" || kt == K"BindingId" || is_literal(kt)) - # Use a slot to permit union-splitting this in inference - tmp = new_mutable_var(ctx, srcref, "tmp") - - @ast ctx srcref [K"block" - # [K"local_def" tmp] - # [K"=" type_ssa renumber_assigned_ssavalues(type)] - [K"=" tmp ex] - [K"if" - [K"call" "isa"::K"core" tmp type] - "nothing"::K"core" - [K"=" - tmp - if do_typeassert - [K"call" - "typeassert"::K"core" - [K"call" "convert"::K"top" type tmp] - type - ] - else - [K"call" "convert"::K"top" type tmp] - end - ] - ] - tmp - ] -end - function emit_jump(ctx, srcref, target::JumpTarget) emit_pop_exception(ctx, srcref, target.catch_token_stack) emit_leave_handler(ctx, srcref, target.handler_token_stack) diff --git a/JuliaLowering/src/macro_expansion.jl b/JuliaLowering/src/macro_expansion.jl index 82b2552063267..e06bb01b851a7 100644 --- a/JuliaLowering/src/macro_expansion.jl +++ b/JuliaLowering/src/macro_expansion.jl @@ -138,9 +138,10 @@ function eval_macro_name(ctx, ex) ex1 = expand_forms_1(ctx, ex) ctx2, ex2 = expand_forms_2(ctx, ex1) ctx3, ex3 = resolve_scopes(ctx2, ex2) - ctx4, ex4 = linearize_ir(ctx3, ex3) + ctx4, ex4 = convert_closures(ctx3, ex3) + ctx5, ex5 = linearize_ir(ctx4, ex4) mod = ctx.current_layer.mod - expr_form = to_lowered_expr(mod, ex4.bindings, ex4) + expr_form = to_lowered_expr(mod, ex5.bindings, ex5) eval(mod, expr_form) end diff --git a/JuliaLowering/src/scope_analysis.jl b/JuliaLowering/src/scope_analysis.jl index cbd58411e2ecf..a70deda4c52f9 100644 --- a/JuliaLowering/src/scope_analysis.jl +++ b/JuliaLowering/src/scope_analysis.jl @@ -180,7 +180,7 @@ function init_binding(ctx, varkey::NameKey, kind::Symbol, is_ambiguous_local=fal id = kind === :global ? get(ctx.global_vars, varkey, nothing) : nothing if isnothing(id) mod = kind === :global ? ctx.scope_layers[varkey.layer].mod : nothing - id = new_binding(ctx.bindings, BindingInfo(varkey.name, mod, kind, false, is_ambiguous_local)) + id = new_binding(ctx.bindings, BindingInfo(varkey.name, mod, kind, nothing, false, is_ambiguous_local)) end if kind === :global ctx.global_vars[varkey] = id @@ -340,12 +340,10 @@ function _resolve_scopes(ctx, ex::SyntaxTree) @ast ctx ex id::K"BindingId" elseif is_leaf(ex) || is_quoted(ex) || k == K"toplevel" ex - # TODO # elseif k == K"global" # ex - # elseif k == K"local" - # nothing_(ctx, ex) - # elseif require_existing_local + elseif k == K"local" + makeleaf(ctx, ex, K"TOMBSTONE") # elseif locals # return Dict of locals # elseif islocal elseif k == K"lambda" @@ -421,7 +419,26 @@ type declarations. """ function resolve_scopes(ctx::DesugaringContext, ex) ctx2 = ScopeResolutionContext(ctx) - res = resolve_scopes(ctx2, reparent(ctx2, ex)) - ctx2, res + ex2 = resolve_scopes(ctx2, reparent(ctx2, ex)) + _analyze_variables(ctx2, ex2) + ctx2, ex2 +end + +function _analyze_variables(ctx::ScopeResolutionContext, ex) + k = kind(ex) + if is_leaf(ex) + nothing + elseif k == K"decl" + _analyze_variables(ctx, ex[2]) + if kind(ex[1]) != K"Placeholder" + binfo = lookup_binding(ctx, ex[1]) + if !isnothing(binfo.type) + throw(LoweringError(ex, "multiple type declarations found for `$(binfo.name)`")) + end + update_binding(ctx, ex[1]; type=ex[2]) + end + else + foreach(e->_analyze_variables(ctx, e), children(ex)) + end end diff --git a/JuliaLowering/test/decls_ir.jl b/JuliaLowering/test/decls_ir.jl new file mode 100644 index 0000000000000..9a5c36a8b6557 --- /dev/null +++ b/JuliaLowering/test/decls_ir.jl @@ -0,0 +1,21 @@ +######################################## +# Local declaration with type +local x::T = 1 +#---------- +1 (= slot₂/tmp 1) +2 core.isa +3 slot₂/tmp +4 TestMod.T +5 (call %₂ %₃ %₄) +6 (gotoifnot %₅ label₈) +7 (goto label₁₅) +8 core.typeassert +9 top.convert +10 TestMod.T +11 slot₂/tmp +12 (call %₉ %₁₀ %₁₁) +13 TestMod.T +14 (= slot₂/tmp (call %₈ %₁₂ %₁₃)) +15 slot₂/tmp +16 (= slot₁/x %₁₅) +17 (return 1) diff --git a/JuliaLowering/test/demo.jl b/JuliaLowering/test/demo.jl index 9806f478d8676..168e53644a240 100644 --- a/JuliaLowering/test/demo.jl +++ b/JuliaLowering/test/demo.jl @@ -403,6 +403,13 @@ src = """ x = M.@label foo """ +src = """ +begin + local x::T = 1 + local x::S = 1 +end +""" + ex = parsestmt(SyntaxTree, src, filename="foo.jl") ex = ensure_attributes(ex, var_id=Int) #ex = softscope_test(ex) @@ -419,10 +426,13 @@ ctx2, ex_desugar = JuliaLowering.expand_forms_2(ctx1, ex_macroexpand) ctx3, ex_scoped = JuliaLowering.resolve_scopes(ctx2, ex_desugar) @info "Resolved scopes" ex_scoped formatsrc(ex_scoped, color_by=:var_id) -ctx4, ex_compiled = JuliaLowering.linearize_ir(ctx3, ex_scoped) +ctx4, ex_converted = JuliaLowering.convert_closures(ctx3, ex_scoped) +@info "Closure converted" ex_converted formatsrc(ex_converted, color_by=:var_id) + +ctx5, ex_compiled = JuliaLowering.linearize_ir(ctx4, ex_converted) @info "Linear IR" ex_compiled formatsrc(ex_compiled, color_by=:var_id) Text(sprint(JuliaLowering.print_ir, ex_compiled)) -ex_expr = JuliaLowering.to_lowered_expr(in_mod, ctx4.bindings, ex_compiled) +ex_expr = JuliaLowering.to_lowered_expr(in_mod, ctx5.bindings, ex_compiled) @info "CodeInfo" ex_expr eval_result = Base.eval(in_mod, ex_expr) diff --git a/JuliaLowering/test/runtests.jl b/JuliaLowering/test/runtests.jl index 675f1e91b4fbb..d4344bdfc5b2a 100644 --- a/JuliaLowering/test/runtests.jl +++ b/JuliaLowering/test/runtests.jl @@ -71,6 +71,47 @@ end @test exc.msg == "all-underscore identifiers are write-only and their values cannot be used in expressions" @test JuliaLowering.is_ancestor(exc.ex, assign_underscore[1]) +#------------------------------------------------------------------------------- +# Declarations + +@test JuliaLowering.include_string(test_mod, """ +begin + local x::Int = 1.0 + x +end +""") === 1 + +# In value position, yeild the right hand side, not `x` +@test JuliaLowering.include_string(test_mod, """ +local x::Int = 1.0 +""") === 1.0 + +# TODO unadorned declarations +# @test JuliaLowering.include_string(test_mod, """ +# let +# x::Int = 1.0 +# end +# """) === 1 + +@test JuliaLowering.include_string(test_mod, """ +let + local x::Int = 1 + x1 = x + x = 20.0 + x2 = x + (x1,x2) +end +""") === (1, 20) + +@test_throws LoweringError JuliaLowering.include_string(test_mod, """ +begin + local x::T = 1 + local x::S = 1 +end +""") + +test_ir_cases(joinpath(@__DIR__, "decls_ir.jl")) + #------------------------------------------------------------------------------- # Function calls # Splatting From a5fd4ad51f4b8a4114f1a31b66a9f75254adb28b Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sat, 31 Aug 2024 10:47:07 +1000 Subject: [PATCH 0847/1109] Remove the special form `K"the_exception"` I don't think the the_exception form does much special - The optimizer treats this mostly like a call (though with removable and nothrow effects) - Codegen just produces a call to the runtime function we're `ccall`ing here - the_exception doesn't help escape analysis because there's other ways for the exception to escape (eg, `current_exceptions()`) So it seems simpler to treat this as a runtime call rather than a special form to reduce the special cases in lowering. In the future we can layer more semantics on that runtime call if we want. --- JuliaLowering/src/closure_conversion.jl | 3 +-- JuliaLowering/src/desugaring.jl | 2 +- JuliaLowering/src/eval.jl | 3 +-- JuliaLowering/src/kinds.jl | 1 - JuliaLowering/src/linear_ir.jl | 4 ++-- JuliaLowering/src/runtime.jl | 6 ++++++ JuliaLowering/test/exceptions_ir.jl | 17 +++++++++++++++++ 7 files changed, 28 insertions(+), 8 deletions(-) diff --git a/JuliaLowering/src/closure_conversion.jl b/JuliaLowering/src/closure_conversion.jl index b676b40b75f18..c57df84558eac 100644 --- a/JuliaLowering/src/closure_conversion.jl +++ b/JuliaLowering/src/closure_conversion.jl @@ -69,8 +69,7 @@ function convert_assignment(ctx, ex) else @assert binfo.kind == :local # Typed local - tmp_rhs0 = is_simple_atom(ctx, rhs0) || kind(rhs0) == K"the_exception" ? - nothing : ssavar(ctx, rhs0) + tmp_rhs0 = is_simple_atom(ctx, rhs0) ? nothing : ssavar(ctx, rhs0) rhs1 = isnothing(tmp_rhs0) ? rhs0 : tmp_rhs0 rhs = isnothing(binfo.type) ? rhs1 : convert_for_type_decl(ctx, ex, rhs1, _convert_closures(ctx, binfo.type), true) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 054adb7c96f9c..bc685bc8547d3 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -392,7 +392,7 @@ function expand_try(ctx, ex) [K"scope_block"(catch_, scope_type=:neutral) if kind(exc_var) != K"Placeholder" [K"block" - [K"="(exc_var) exc_var [K"the_exception"]] + [K"="(exc_var) exc_var [K"call" current_exception::K"Value"]] catch_block ] else diff --git a/JuliaLowering/src/eval.jl b/JuliaLowering/src/eval.jl index b905037dd8055..2209fadfa760e 100644 --- a/JuliaLowering/src/eval.jl +++ b/JuliaLowering/src/eval.jl @@ -243,7 +243,7 @@ function to_lowered_expr(mod, bindings, ex) # Allowed forms according to https://docs.julialang.org/en/v1/devdocs/ast/ # # call invoke static_parameter `=` method struct_type abstract_type - # primitive_type global const new splatnew isdefined the_exception + # primitive_type global const new splatnew isdefined # enter leave pop_exception inbounds boundscheck loopinfo copyast meta # foreigncall new_opaque_closure lambda head = k == K"call" ? :call : @@ -251,7 +251,6 @@ function to_lowered_expr(mod, bindings, ex) k == K"global" ? :global : k == K"const" ? :const : k == K"leave" ? :leave : - k == K"the_exception" ? :the_exception : k == K"pop_exception" ? :pop_exception : nothing if isnothing(head) diff --git a/JuliaLowering/src/kinds.jl b/JuliaLowering/src/kinds.jl index 62ac97cb4f3a7..426ef21001154 100644 --- a/JuliaLowering/src/kinds.jl +++ b/JuliaLowering/src/kinds.jl @@ -37,7 +37,6 @@ function _register_kinds() "toplevel_butfirst" "lambda" "moved_local" - "the_exception" "foreigncall" "new" "globalref" diff --git a/JuliaLowering/src/linear_ir.jl b/JuliaLowering/src/linear_ir.jl index 677331053a366..593577103453f 100644 --- a/JuliaLowering/src/linear_ir.jl +++ b/JuliaLowering/src/linear_ir.jl @@ -135,7 +135,7 @@ function is_valid_ir_rvalue(ctx, lhs, rhs) is_valid_ir_argument(ctx, rhs) || (kind(lhs) == K"BindingId" && # FIXME: add: splatnew isdefined invoke cfunction gc_preserve_begin copyast new_opaque_closure globalref outerref - kind(rhs) in KSet"new the_exception call foreigncall") + kind(rhs) in KSet"new call foreigncall") end # evaluate the arguments of a call, creating temporary locations as needed @@ -552,7 +552,7 @@ function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) k = kind(ex) if k == K"BindingId" || is_literal(k) || k == K"quote" || k == K"inert" || k == K"top" || k == K"core" || k == K"Value" || k == K"Symbol" || - k == K"Placeholder" || k == K"the_exception" + k == K"Placeholder" # TODO: other kinds: copyast $ globalref outerref thismodule cdecl stdcall fastcall thiscall llvmcall if needs_value && k == K"Placeholder" # TODO: ensure outterref, globalref work here diff --git a/JuliaLowering/src/runtime.jl b/JuliaLowering/src/runtime.jl index 4a636e23662dd..74dc216c164c6 100644 --- a/JuliaLowering/src/runtime.jl +++ b/JuliaLowering/src/runtime.jl @@ -159,6 +159,12 @@ function module_import(into_mod::Module, is_using::Bool, nothing end +# Return the current exception. In JuliaLowering we use this rather than the +# special form `K"the_exception"` to reduces the number of special forms. +Base.@assume_effects :removable :nothrow function current_exception() + @ccall jl_current_exception(current_task()::Any)::Any +end + function bind_docs!(f::Function, docstr, method_metadata) mod = parentmodule(f) bind = Base.Docs.Binding(mod, nameof(f)) diff --git a/JuliaLowering/test/exceptions_ir.jl b/JuliaLowering/test/exceptions_ir.jl index 121e9033f1db4..034710a9598ae 100644 --- a/JuliaLowering/test/exceptions_ir.jl +++ b/JuliaLowering/test/exceptions_ir.jl @@ -336,3 +336,20 @@ end 38 (return %₃₇) 39 slot₅/returnval_via_finally 40 (return %₃₉) + +######################################## +# Access to the exception object +try + a +catch exc + b +end +#---------- +1 (enter label₅) +2 TestMod.a +3 (leave %₁) +4 (return %₂) +5 (= slot₁/exc (call JuliaLowering.current_exception)) +6 TestMod.b +7 (pop_exception %₁) +8 (return %₆) From 41425bc5fdafb613aaec30c24e48665ffb4cd0eb Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sat, 31 Aug 2024 11:36:57 +1000 Subject: [PATCH 0848/1109] Handle `const` forms --- JuliaLowering/src/ast.jl | 25 ++++++++++---- JuliaLowering/src/desugaring.jl | 28 ++++++++++----- JuliaLowering/src/linear_ir.jl | 2 ++ JuliaLowering/src/scope_analysis.jl | 51 +++++++++++++++------------ JuliaLowering/test/decls.jl | 53 +++++++++++++++++++++++++++++ JuliaLowering/test/decls_ir.jl | 9 +++++ JuliaLowering/test/demo.jl | 4 +++ JuliaLowering/test/runtests.jl | 42 +---------------------- 8 files changed, 136 insertions(+), 78 deletions(-) create mode 100644 JuliaLowering/test/decls.jl diff --git a/JuliaLowering/src/ast.jl b/JuliaLowering/src/ast.jl index 34fcaf8b94c2c..11f7cf57214f9 100644 --- a/JuliaLowering/src/ast.jl +++ b/JuliaLowering/src/ast.jl @@ -58,13 +58,23 @@ Metadata about a binding """ struct BindingInfo name::String - mod::Union{Nothing,Module} # Set when `kind === :global` kind::Symbol # :local :global :argument :static_parameter + mod::Union{Nothing,Module} # Set when `kind === :global` type::Union{Nothing,SyntaxTree} # Type, for bindings declared like x::T = 10 + is_const::Bool # Single assignment, defined before use is_ssa::Bool # Single assignment, defined before use is_ambiguous_local::Bool # Local, but would be global in soft scope (ie, the REPL) end +function BindingInfo(name::AbstractString, kind::Symbol; + mod::Union{Nothing,Module} = nothing, + type::Union{Nothing,SyntaxTree} = nothing, + is_const::Bool = false, + is_ssa::Bool = false, + is_ambiguous_local::Bool = false) + BindingInfo(name, kind, mod, type, is_const, is_ssa, is_ambiguous_local) +end + """ Metadata about "entities" (variables, constants, etc) in the program. Each entity is associated to a unique integer id, the BindingId. A binding will be @@ -95,14 +105,15 @@ function _binding_id(ex::SyntaxTree) ex.var_id end -function update_binding(bindings::Bindings, x; type=nothing) +function update_binding!(bindings::Bindings, x; type=nothing, is_const=nothing) id = _binding_id(x) b = lookup_binding(bindings, id) bindings.info[id] = BindingInfo( b.name, - b.mod, b.kind, + b.mod, isnothing(type) ? b.type : type, + isnothing(is_const) ? b.is_const : is_const, b.is_ssa, b.is_ambiguous_local, ) @@ -116,8 +127,8 @@ function lookup_binding(ctx::AbstractLoweringContext, x) lookup_binding(ctx.bindings, x) end -function update_binding(ctx::AbstractLoweringContext, x; kws...) - update_binding(ctx.bindings, x; kws...) +function update_binding!(ctx::AbstractLoweringContext, x; kws...) + update_binding!(ctx.bindings, x; kws...) end const LayerId = Int @@ -215,7 +226,7 @@ top_ref(ctx, ex, name) = makeleaf(ctx, ex, K"top", name) # Create a new SSA binding function ssavar(ctx::AbstractLoweringContext, srcref, name="tmp") # TODO: Store this name in only one place? Probably use the provenance chain? - id = new_binding(ctx.bindings, BindingInfo(name, nothing, :local, nothing, true, false)) + id = new_binding(ctx.bindings, BindingInfo(name, :local; is_ssa=true)) # Create an identifier nameref = makeleaf(ctx, srcref, K"Identifier", name_val=name) makeleaf(ctx, nameref, K"BindingId", var_id=id) @@ -227,7 +238,7 @@ end # Create a new local mutable variable function new_mutable_var(ctx::AbstractLoweringContext, srcref, name) - id = new_binding(ctx.bindings, BindingInfo(name, nothing, :local, nothing, false, false)) + id = new_binding(ctx.bindings, BindingInfo(name, :local)) nameref = makeleaf(ctx, srcref, K"Identifier", name_val=name) var = makeleaf(ctx, nameref, K"BindingId", var_id=id) add_lambda_local!(ctx, id) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index bc685bc8547d3..2a50929991959 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -421,22 +421,24 @@ end # (x::T, (y::U, z)) # strip out stmts = (local x) (decl x T) (local x) (decl y U) (local z) # and return (x, (y, z)) -function strip_decls!(ctx, stmts, declkind, ex) +function strip_decls!(ctx, stmts, declkind, declkind2, ex) k = kind(ex) if k == K"Identifier" push!(stmts, makenode(ctx, ex, declkind, ex)) + if !isnothing(declkind2) + push!(stmts, makenode(ctx, ex, declkind2, ex)) + end ex elseif k == K"::" @chk numchildren(ex) == 2 name = ex[1] @chk kind(name) == K"Identifier" - push!(stmts, makenode(ctx, ex, declkind, name)) push!(stmts, makenode(ctx, ex, K"decl", name, ex[2])) - name + strip_decls!(ctx, stmts, declkind, declkind2, ex[1]) elseif k == K"tuple" || k == K"parameters" cs = SyntaxList(ctx) for e in children(ex) - push!(cs, strip_decls!(ctx, stmts, declkind, e)) + push!(cs, strip_decls!(ctx, stmts, declkind, declkind2, e)) end makenode(ctx, ex, k, cs) end @@ -445,15 +447,25 @@ end # local x, (y=2), z => local x; local y; y = 2; local z function expand_decls(ctx, ex) declkind = kind(ex) + if numchildren(ex) == 1 && kind(ex[1]) ∈ KSet"const global local" + declkind2 = kind(ex[1]) + bindings = children(ex[1]) + else + declkind2 = nothing + bindings = children(ex) + end stmts = SyntaxList(ctx) - for binding in children(ex) + for binding in bindings kb = kind(binding) if is_prec_assignment(kb) @chk numchildren(binding) == 2 - lhs = strip_decls!(ctx, stmts, declkind, binding[1]) + lhs = strip_decls!(ctx, stmts, declkind, declkind2, binding[1]) push!(stmts, @ast ctx binding [kb lhs binding[2]]) elseif is_sym_decl(binding) - strip_decls!(ctx, stmts, declkind, binding) + if declkind == K"const" || declkind2 == K"const" + throw(LoweringError(ex, "expected assignment after `const`")) + end + strip_decls!(ctx, stmts, declkind, declkind2, binding) else throw(LoweringError(ex, "invalid syntax in variable declaration")) end @@ -888,7 +900,7 @@ function expand_forms_2(ctx::DesugaringContext, ex::SyntaxTree, docs=nothing) ] elseif k == K"let" expand_forms_2(ctx, expand_let(ctx, ex)) - elseif k == K"local" || k == K"global" + elseif k == K"local" || k == K"global" || k == K"const" if numchildren(ex) == 1 && kind(ex[1]) == K"Identifier" # Don't recurse when already simplified - `local x`, etc ex diff --git a/JuliaLowering/src/linear_ir.jl b/JuliaLowering/src/linear_ir.jl index 593577103453f..a8cf7a24e39be 100644 --- a/JuliaLowering/src/linear_ir.jl +++ b/JuliaLowering/src/linear_ir.jl @@ -775,6 +775,8 @@ function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) nothing elseif k == K"local_def" || k == K"local" nothing + elseif k == K"const" + emit(ctx, ex) else throw(LoweringError(ex, "Invalid syntax; $(repr(k))")) end diff --git a/JuliaLowering/src/scope_analysis.jl b/JuliaLowering/src/scope_analysis.jl index a70deda4c52f9..37d1434d21c0e 100644 --- a/JuliaLowering/src/scope_analysis.jl +++ b/JuliaLowering/src/scope_analysis.jl @@ -180,7 +180,9 @@ function init_binding(ctx, varkey::NameKey, kind::Symbol, is_ambiguous_local=fal id = kind === :global ? get(ctx.global_vars, varkey, nothing) : nothing if isnothing(id) mod = kind === :global ? ctx.scope_layers[varkey.layer].mod : nothing - id = new_binding(ctx.bindings, BindingInfo(varkey.name, mod, kind, nothing, false, is_ambiguous_local)) + id = new_binding(ctx.bindings, + BindingInfo(varkey.name, kind; + mod=mod, is_ambiguous_local=is_ambiguous_local)) end if kind === :global ctx.global_vars[varkey] = id @@ -333,6 +335,29 @@ function analyze_scope(ctx, ex, scope_type, lambda_info) is_hard_scope, var_ids, lambda_locals) end +# Do some things which are better done after converting to BindingId. +function maybe_update_bindings!(ctx, ex) + k = kind(ex) + if k == K"decl" + @chk numchildren(ex) == 2 + id = ex[1] + if kind(id) != K"Placeholder" + binfo = lookup_binding(ctx, id) + if !isnothing(binfo.type) + throw(LoweringError(ex, "multiple type declarations found for `$(binfo.name)`")) + end + update_binding!(ctx, id; type=ex[2]) + end + elseif k == K"const" + id = ex[1] + if lookup_binding(ctx, id).kind == :local + throw(LoweringError(ex, "unsupported `const` declaration on local variable")) + end + update_binding!(ctx, id; is_const=true) + end + nothing +end + function _resolve_scopes(ctx, ex::SyntaxTree) k = kind(ex) if k == K"Identifier" @@ -385,7 +410,9 @@ function _resolve_scopes(ctx, ex::SyntaxTree) end makeleaf(ctx, ex, K"TOMBSTONE") else - mapchildren(e->_resolve_scopes(ctx, e), ctx, ex) + ex_mapped = mapchildren(e->_resolve_scopes(ctx, e), ctx, ex) + maybe_update_bindings!(ctx, ex_mapped) + ex_mapped end end @@ -420,25 +447,5 @@ type declarations. function resolve_scopes(ctx::DesugaringContext, ex) ctx2 = ScopeResolutionContext(ctx) ex2 = resolve_scopes(ctx2, reparent(ctx2, ex)) - _analyze_variables(ctx2, ex2) ctx2, ex2 end - -function _analyze_variables(ctx::ScopeResolutionContext, ex) - k = kind(ex) - if is_leaf(ex) - nothing - elseif k == K"decl" - _analyze_variables(ctx, ex[2]) - if kind(ex[1]) != K"Placeholder" - binfo = lookup_binding(ctx, ex[1]) - if !isnothing(binfo.type) - throw(LoweringError(ex, "multiple type declarations found for `$(binfo.name)`")) - end - update_binding(ctx, ex[1]; type=ex[2]) - end - else - foreach(e->_analyze_variables(ctx, e), children(ex)) - end -end - diff --git a/JuliaLowering/test/decls.jl b/JuliaLowering/test/decls.jl new file mode 100644 index 0000000000000..9cd83562a65de --- /dev/null +++ b/JuliaLowering/test/decls.jl @@ -0,0 +1,53 @@ +@testset "Declarations" begin + +test_mod = Module() + +@test JuliaLowering.include_string(test_mod, """ +begin + local x::Int = 1.0 + x +end +""") === 1 + +# In value position, yeild the right hand side, not `x` +@test JuliaLowering.include_string(test_mod, """ +local x::Int = 1.0 +""") === 1.0 + +# TODO unadorned declarations +# @test JuliaLowering.include_string(test_mod, """ +# let +# x::Int = 1.0 +# end +# """) === 1 + +@test JuliaLowering.include_string(test_mod, """ +let + local x::Int = 1 + x1 = x + x = 20.0 + x2 = x + (x1,x2) +end +""") === (1, 20) + +@test_throws LoweringError JuliaLowering.include_string(test_mod, """ +begin + local x::T = 1 + local x::S = 1 +end +""") + +# Const not supported on locals +@test_throws LoweringError JuliaLowering.include_string(test_mod, """ +const local x = 1 +""") +@test_throws LoweringError JuliaLowering.include_string(test_mod, """ +let + const x = 1 +end +""") + +test_ir_cases(joinpath(@__DIR__, "decls_ir.jl")) + +end diff --git a/JuliaLowering/test/decls_ir.jl b/JuliaLowering/test/decls_ir.jl index 9a5c36a8b6557..8184fd332cf41 100644 --- a/JuliaLowering/test/decls_ir.jl +++ b/JuliaLowering/test/decls_ir.jl @@ -19,3 +19,12 @@ local x::T = 1 15 slot₂/tmp 16 (= slot₁/x %₁₅) 17 (return 1) + +######################################## +# const +const xx = 10 +#---------- +1 (const TestMod.xx) +2 10 +3 (= TestMod.xx %₂) +4 (return %₂) diff --git a/JuliaLowering/test/demo.jl b/JuliaLowering/test/demo.jl index 168e53644a240..9702e24ea64ca 100644 --- a/JuliaLowering/test/demo.jl +++ b/JuliaLowering/test/demo.jl @@ -410,6 +410,10 @@ begin end """ +src = """ +const local x = 1 +""" + ex = parsestmt(SyntaxTree, src, filename="foo.jl") ex = ensure_attributes(ex, var_id=Int) #ex = softscope_test(ex) diff --git a/JuliaLowering/test/runtests.jl b/JuliaLowering/test/runtests.jl index d4344bdfc5b2a..10918ae4cd0de 100644 --- a/JuliaLowering/test/runtests.jl +++ b/JuliaLowering/test/runtests.jl @@ -71,47 +71,6 @@ end @test exc.msg == "all-underscore identifiers are write-only and their values cannot be used in expressions" @test JuliaLowering.is_ancestor(exc.ex, assign_underscore[1]) -#------------------------------------------------------------------------------- -# Declarations - -@test JuliaLowering.include_string(test_mod, """ -begin - local x::Int = 1.0 - x -end -""") === 1 - -# In value position, yeild the right hand side, not `x` -@test JuliaLowering.include_string(test_mod, """ -local x::Int = 1.0 -""") === 1.0 - -# TODO unadorned declarations -# @test JuliaLowering.include_string(test_mod, """ -# let -# x::Int = 1.0 -# end -# """) === 1 - -@test JuliaLowering.include_string(test_mod, """ -let - local x::Int = 1 - x1 = x - x = 20.0 - x2 = x - (x1,x2) -end -""") === (1, 20) - -@test_throws LoweringError JuliaLowering.include_string(test_mod, """ -begin - local x::T = 1 - local x::S = 1 -end -""") - -test_ir_cases(joinpath(@__DIR__, "decls_ir.jl")) - #------------------------------------------------------------------------------- # Function calls # Splatting @@ -160,6 +119,7 @@ end @test C.D.f === C.E.f include("functions.jl") +include("decls.jl") include("macros.jl") include("modules.jl") include("desugaring.jl") From 5dfea041fbdf586a71d9eb6ac3b9ce4f0d26a996 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sat, 31 Aug 2024 15:54:09 +1000 Subject: [PATCH 0849/1109] Lowering of typed global assignments --- JuliaLowering/src/closure_conversion.jl | 59 +++++++++++++++++---- JuliaLowering/src/scope_analysis.jl | 4 ++ JuliaLowering/test/decls.jl | 23 ++++++++ JuliaLowering/test/decls_ir.jl | 70 +++++++++++++++++++++++-- JuliaLowering/test/demo.jl | 4 +- 5 files changed, 146 insertions(+), 14 deletions(-) diff --git a/JuliaLowering/src/closure_conversion.jl b/JuliaLowering/src/closure_conversion.jl index c57df84558eac..55ba168a57650 100644 --- a/JuliaLowering/src/closure_conversion.jl +++ b/JuliaLowering/src/closure_conversion.jl @@ -41,6 +41,41 @@ function convert_for_type_decl(ctx, srcref, ex, type, do_typeassert) ] end +function convert_global_assignment(ctx, ex, var, rhs0) + binfo = lookup_binding(ctx, var) + @assert binfo.kind == :global + stmts = SyntaxList(ctx) + rhs1 = if is_simple_atom(ctx, rhs0) + rhs0 + else + tmp = ssavar(ctx, rhs0) + push!(stmts, @ast ctx rhs0 [K"=" tmp rhs0]) + tmp + end + rhs = if binfo.is_const && isnothing(binfo.type) + # const global assignments without a type declaration don't need us to + # deal with the binding type at all. + rhs1 + else + type_var = ssavar(ctx, ex, "binding_type") + push!(stmts, @ast ctx ex [K"=" + type_var + [K"call" + "get_binding_type"::K"core" + binfo.mod::K"Value" + binfo.name::K"Symbol" + ] + ]) + do_typeassert = false # Global assignment type checking is done by the runtime + convert_for_type_decl(ctx, ex, rhs1, type_var, do_typeassert) + end + push!(stmts, @ast ctx ex [K"=" var rhs]) + @ast ctx ex [K"block" + stmts... + rhs1 + ] +end + # Convert assignment to a closed variable to a `setfield!` call and generate # `convert` calls for variables with declared types. # @@ -55,11 +90,7 @@ function convert_assignment(ctx, ex) @chk kind(var) == K"BindingId" binfo = lookup_binding(ctx, var) if binfo.kind == :global - # TODO: convert-global-assignment - if !isnothing(binfo.type) - TODO(ex, "Typed global assignment??") - end - @ast ctx ex [K"=" var rhs0] + convert_global_assignment(ctx, ex, var, rhs0) else closed = false # TODO captured = false # TODO @@ -105,14 +136,21 @@ function _convert_closures(ctx::ClosureConversionCtx, ex) elseif k == K"=" convert_assignment(ctx, ex) elseif k == K"decl" + if kind(ex[1]) != K"BindingId" + # TODO: This case might be better dealt with in an earlier pass, + # emitting `K"::"`?? + TODO(ex, "assertions for decls with non-bindings") + end binfo = lookup_binding(ctx, ex[1]) if binfo.kind == :local makeleaf(ctx, ex, K"TOMBSTONE") else - # Remaining `decl` expressions are type assertions if the argument is global - # (TODO: Maybe we should remove the useless ones in - # analyze_variables() pass, or convert to `::`??) - TODO(ex, "global variables with type assertions") + @ast ctx ex [K"call" + "set_binding_type!"::K"core" + binfo.mod::K"Value" + binfo.name::K"Symbol" + _convert_closures(ctx, ex[2]) + ] end elseif k == K"lambda" ctx2 = ClosureConversionCtx(ctx.graph, ctx.bindings, ctx.mod, ex.lambda_locals) @@ -128,8 +166,9 @@ Closure conversion and lowering of bindings This pass does a few things things: * Deal with typed variables (K"decl") and their assignments -* Deal with global assignments +* Deal with const and non-const global assignments * Convert closures into types +* Lower variables captured by closures into boxes, etc, as necessary Invariants: * This pass must not introduce new K"Identifier" - only K"BindingId". diff --git a/JuliaLowering/src/scope_analysis.jl b/JuliaLowering/src/scope_analysis.jl index 37d1434d21c0e..ac5fbb438b111 100644 --- a/JuliaLowering/src/scope_analysis.jl +++ b/JuliaLowering/src/scope_analysis.jl @@ -346,6 +346,10 @@ function maybe_update_bindings!(ctx, ex) if !isnothing(binfo.type) throw(LoweringError(ex, "multiple type declarations found for `$(binfo.name)`")) end + if binfo.kind == :global && !ctx.scope_stack[end].in_toplevel_thunk + throw(LoweringError(ex, "type declarations for global variables must be at top level, not inside a function")) + # set_binding_type! + end update_binding!(ctx, id; type=ex[2]) end elseif k == K"const" diff --git a/JuliaLowering/test/decls.jl b/JuliaLowering/test/decls.jl index 9cd83562a65de..20809b3fdef6b 100644 --- a/JuliaLowering/test/decls.jl +++ b/JuliaLowering/test/decls.jl @@ -31,6 +31,22 @@ let end """) === (1, 20) +# Global decls with types +@test JuliaLowering.include_string(test_mod, """ +global a_typed_global::Int = 10.0 +""") === 10.0 +@test Core.get_binding_type(test_mod, :a_typed_global) === Int +@test test_mod.a_typed_global === 10 + +# Also allowed in nontrivial scopes in a top level thunk +@test JuliaLowering.include_string(test_mod, """ +let + global a_typed_global_2::Int = 10.0 +end +""") === 10.0 +@test Core.get_binding_type(test_mod, :a_typed_global_2) === Int +@test test_mod.a_typed_global_2 === 10 + @test_throws LoweringError JuliaLowering.include_string(test_mod, """ begin local x::T = 1 @@ -48,6 +64,13 @@ let end """) +# global type decls only allowed at top level +@test_throws LoweringError JuliaLowering.include_string(test_mod, """ +function f() + global x::Int = 1 +end +""") + test_ir_cases(joinpath(@__DIR__, "decls_ir.jl")) end diff --git a/JuliaLowering/test/decls_ir.jl b/JuliaLowering/test/decls_ir.jl index 8184fd332cf41..70b88924c55e3 100644 --- a/JuliaLowering/test/decls_ir.jl +++ b/JuliaLowering/test/decls_ir.jl @@ -25,6 +25,70 @@ local x::T = 1 const xx = 10 #---------- 1 (const TestMod.xx) -2 10 -3 (= TestMod.xx %₂) -4 (return %₂) +2 (= TestMod.xx 10) +3 (return 10) + +######################################## +# Typed const +const xx::T = 10 +#---------- +1 core.set_binding_type! +2 TestMod.T +3 (call %₁ TestMod :xx %₂) +4 (const TestMod.xx) +5 core.get_binding_type +6 (call %₅ TestMod :xx) +7 (= slot₁/tmp 10) +8 core.isa +9 slot₁/tmp +10 (call %₈ %₉ %₆) +11 (gotoifnot %₁₀ label₁₃) +12 (goto label₁₆) +13 top.convert +14 slot₁/tmp +15 (= slot₁/tmp (call %₁₃ %₆ %₁₄)) +16 slot₁/tmp +17 (= TestMod.xx %₁₆) +18 (return 10) + +######################################## +# Global assignment +xx = 10 +#---------- +1 core.get_binding_type +2 (call %₁ TestMod :xx) +3 (= slot₁/tmp 10) +4 core.isa +5 slot₁/tmp +6 (call %₄ %₅ %₂) +7 (gotoifnot %₆ label₉) +8 (goto label₁₂) +9 top.convert +10 slot₁/tmp +11 (= slot₁/tmp (call %₉ %₂ %₁₀)) +12 slot₁/tmp +13 (= TestMod.xx %₁₂) +14 (return 10) + +######################################## +# Typed global assignment +global xx::T = 10 +#---------- +1 core.set_binding_type! +2 TestMod.T +3 (call %₁ TestMod :xx %₂) +4 (global TestMod.xx) +5 core.get_binding_type +6 (call %₅ TestMod :xx) +7 (= slot₁/tmp 10) +8 core.isa +9 slot₁/tmp +10 (call %₈ %₉ %₆) +11 (gotoifnot %₁₀ label₁₃) +12 (goto label₁₆) +13 top.convert +14 slot₁/tmp +15 (= slot₁/tmp (call %₁₃ %₆ %₁₄)) +16 slot₁/tmp +17 (= TestMod.xx %₁₆) +18 (return 10) diff --git a/JuliaLowering/test/demo.jl b/JuliaLowering/test/demo.jl index 9702e24ea64ca..f46ad3135264a 100644 --- a/JuliaLowering/test/demo.jl +++ b/JuliaLowering/test/demo.jl @@ -411,7 +411,9 @@ end """ src = """ -const local x = 1 +function f() + global some_sym::Int = 1 +end """ ex = parsestmt(SyntaxTree, src, filename="foo.jl") From eec4b633e97774ce31de4fef0795652f657a83f5 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sun, 1 Sep 2024 13:09:48 +1000 Subject: [PATCH 0850/1109] Factor out high level test sets into individual files This makes it easier to run subsets of tests by just including the appropriate file --- JuliaLowering/test/functions.jl | 20 +++++ JuliaLowering/test/import.jl | 31 ++++++++ JuliaLowering/test/misc.jl | 23 ++++++ JuliaLowering/test/runtests.jl | 133 +++----------------------------- JuliaLowering/test/scopes.jl | 46 +++++++++++ 5 files changed, 132 insertions(+), 121 deletions(-) create mode 100644 JuliaLowering/test/import.jl create mode 100644 JuliaLowering/test/misc.jl create mode 100644 JuliaLowering/test/scopes.jl diff --git a/JuliaLowering/test/functions.jl b/JuliaLowering/test/functions.jl index 7d0e210119be1..633d2799575c5 100644 --- a/JuliaLowering/test/functions.jl +++ b/JuliaLowering/test/functions.jl @@ -2,6 +2,26 @@ test_mod = Module() +# Function calls +# Splatting +@test JuliaLowering.include_string(test_mod, """ +let + x = 1 + y = 2 + zs = (3,4) + w = 5 + (tuple(zs...), + tuple(zs..., w), + tuple(y, zs...), + tuple(x, y, zs..., w)) +end +""") == ((3,4), + (3,4,5), + (2,3,4), + (1,2,3,4,5)) + +#------------------------------------------------------------------------------- +# Function definitions @test JuliaLowering.include_string(test_mod, """ begin function f(x) diff --git a/JuliaLowering/test/import.jl b/JuliaLowering/test/import.jl new file mode 100644 index 0000000000000..5098c3aa308d6 --- /dev/null +++ b/JuliaLowering/test/import.jl @@ -0,0 +1,31 @@ +@testset "using / import" begin + +test_mod = Module() + +JuliaLowering.include_string(test_mod, """ + using JuliaSyntax + using JuliaLowering: SyntaxTree + using JuliaLowering: SyntaxTree as st + import JuliaLowering: SyntaxTree as st1, SyntaxTree as st2 +""") +@test test_mod.SyntaxTree === JuliaLowering.SyntaxTree +@test test_mod.st === JuliaLowering.SyntaxTree +@test test_mod.st1 === JuliaLowering.SyntaxTree +@test test_mod.st2 === JuliaLowering.SyntaxTree +@test test_mod.parsestmt === JuliaSyntax.parsestmt + +C = JuliaLowering.include_string(test_mod, """ +module C + module D + function f() + "hi" + end + end + module E + using ...C.D: f + end +end +""") +@test C.D.f === C.E.f + +end diff --git a/JuliaLowering/test/misc.jl b/JuliaLowering/test/misc.jl new file mode 100644 index 0000000000000..66e2597a2e82a --- /dev/null +++ b/JuliaLowering/test/misc.jl @@ -0,0 +1,23 @@ +@testset "Miscellanous" begin + +test_mod = Module() + +# Blocks +@test JuliaLowering.include_string(test_mod, """ +begin +end +""") == nothing + +# Placeholders +@test JuliaLowering.include_string(test_mod, """_ = 10""") == 10 + +assign_underscore = parsestmt(SyntaxTree, "_ + 1", filename="foo.jl") +exc = try + JuliaLowering.eval(test_mod, assign_underscore) +catch exc + exc +end +@test exc.msg == "all-underscore identifiers are write-only and their values cannot be used in expressions" +@test JuliaLowering.is_ancestor(exc.ex, assign_underscore[1]) + +end diff --git a/JuliaLowering/test/runtests.jl b/JuliaLowering/test/runtests.jl index 10918ae4cd0de..a21a3e15d33aa 100644 --- a/JuliaLowering/test/runtests.jl +++ b/JuliaLowering/test/runtests.jl @@ -4,126 +4,17 @@ include("utils.jl") @testset "JuliaLowering.jl" begin -include("syntax_graph.jl") - -# Basic end-to-end / smoke tests - -test_mod = Module() - -#------------------------------------------------------------------------------- -# Scopes -@test JuliaLowering.include_string(test_mod, -""" -let - y = 0 - x = 1 - let x = x + 1 - y = x - end - (x, y) -end -""") == (1, 2) - -JuliaLowering.include_string(test_mod, """ - x = 101 - y = 202 -""") -@test test_mod.x == 101 -@test test_mod.y == 202 -@test JuliaLowering.include_string(test_mod, "x + y") == 303 - -# wrap expression in scope block of `scope_type` -function wrapscope(ex, scope_type) - g = JuliaLowering.ensure_attributes(ex._graph, scope_type=Symbol) - ex = JuliaLowering.reparent(g, ex) - makenode(ex, ex, K"scope_block", ex; scope_type=scope_type) -end - -assign_z_2 = parsestmt(SyntaxTree, "begin z = 2 end", filename="foo.jl") -JuliaLowering.eval(test_mod, :(z=1)) -@test test_mod.z == 1 -# neutral (eg, for loops) and hard (eg, let) scopes create a new binding for z -JuliaLowering.eval(test_mod, wrapscope(assign_z_2, :neutral)) -@test test_mod.z == 1 -JuliaLowering.eval(test_mod, wrapscope(assign_z_2, :hard)) -@test test_mod.z == 1 -# but wrapping neutral scope in soft scope uses the existing binding in test_mod -JuliaLowering.eval(test_mod, wrapscope(wrapscope(assign_z_2, :neutral), :soft)) -@test test_mod.z == 2 - -#------------------------------------------------------------------------------- -# Blocks -@test JuliaLowering.include_string(test_mod, """ -begin -end -""") == nothing - -#------------------------------------------------------------------------------- -# Placeholders -@test JuliaLowering.include_string(test_mod, """_ = 10""") == 10 - -assign_underscore = parsestmt(SyntaxTree, "_ + 1", filename="foo.jl") -exc = try - JuliaLowering.eval(test_mod, assign_underscore) -catch exc - exc -end -@test exc.msg == "all-underscore identifiers are write-only and their values cannot be used in expressions" -@test JuliaLowering.is_ancestor(exc.ex, assign_underscore[1]) - -#------------------------------------------------------------------------------- -# Function calls -# Splatting -@test JuliaLowering.include_string(test_mod, """ -let - x = 1 - y = 2 - zs = (3,4) - w = 5 - (tuple(zs...), - tuple(zs..., w), - tuple(y, zs...), - tuple(x, y, zs..., w)) -end -""") == ((3,4), - (3,4,5), - (2,3,4), - (1,2,3,4,5)) - -#------------------------------------------------------------------------------- -# using / import -JuliaLowering.include_string(test_mod, """ - using JuliaSyntax - using JuliaLowering: SyntaxTree - using JuliaLowering: SyntaxTree as st - import JuliaLowering: SyntaxTree as st1, SyntaxTree as st2 -""") -@test test_mod.SyntaxTree === JuliaLowering.SyntaxTree -@test test_mod.st === JuliaLowering.SyntaxTree -@test test_mod.st1 === JuliaLowering.SyntaxTree -@test test_mod.st2 === JuliaLowering.SyntaxTree -@test test_mod.parsestmt === JuliaSyntax.parsestmt - -C = JuliaLowering.include_string(test_mod, """ -module C - module D - function f() - "hi" - end - end - module E - using ...C.D: f - end -end -""") -@test C.D.f === C.E.f - -include("functions.jl") -include("decls.jl") -include("macros.jl") -include("modules.jl") -include("desugaring.jl") -include("branching.jl") -include("loops.jl") + include("syntax_graph.jl") + + include("misc.jl") + include("import.jl") + include("scopes.jl") + include("functions.jl") + include("decls.jl") + include("macros.jl") + include("modules.jl") + include("desugaring.jl") + include("branching.jl") + include("loops.jl") end diff --git a/JuliaLowering/test/scopes.jl b/JuliaLowering/test/scopes.jl new file mode 100644 index 0000000000000..1ec037326ad0a --- /dev/null +++ b/JuliaLowering/test/scopes.jl @@ -0,0 +1,46 @@ +@testset "Scopes" begin + +test_mod = Module() + +#------------------------------------------------------------------------------- +# Scopes +@test JuliaLowering.include_string(test_mod, +""" +let + y = 0 + x = 1 + let x = x + 1 + y = x + end + (x, y) +end +""") == (1, 2) + +JuliaLowering.include_string(test_mod, """ + x = 101 + y = 202 +""") +@test test_mod.x == 101 +@test test_mod.y == 202 +@test JuliaLowering.include_string(test_mod, "x + y") == 303 + +# wrap expression in scope block of `scope_type` +function wrapscope(ex, scope_type) + g = JuliaLowering.ensure_attributes(ex._graph, scope_type=Symbol) + ex = JuliaLowering.reparent(g, ex) + makenode(ex, ex, K"scope_block", ex; scope_type=scope_type) +end + +assign_z_2 = parsestmt(SyntaxTree, "begin z = 2 end", filename="foo.jl") +JuliaLowering.eval(test_mod, :(z=1)) +@test test_mod.z == 1 +# neutral (eg, for loops) and hard (eg, let) scopes create a new binding for z +JuliaLowering.eval(test_mod, wrapscope(assign_z_2, :neutral)) +@test test_mod.z == 1 +JuliaLowering.eval(test_mod, wrapscope(assign_z_2, :hard)) +@test test_mod.z == 1 +# but wrapping neutral scope in soft scope uses the existing binding in test_mod +JuliaLowering.eval(test_mod, wrapscope(wrapscope(assign_z_2, :neutral), :soft)) +@test test_mod.z == 2 + +end From 8d6c97d0e680a42449e5f66cf2ffe85aed7996c1 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sun, 1 Sep 2024 16:00:14 +1000 Subject: [PATCH 0851/1109] Utilities for updating IR test cases Also fix a couple of test cases which weren't being run correctly including updating exceptions_ir.jl to avoid use of globals --- JuliaLowering/test/branching.jl | 2 +- JuliaLowering/test/branching_ir.jl | 39 +++++++++++----- JuliaLowering/test/decls_ir.jl | 11 ++--- JuliaLowering/test/exceptions_ir.jl | 53 +++++++++++----------- JuliaLowering/test/functions_ir.jl | 3 +- JuliaLowering/test/loops_ir.jl | 9 ++-- JuliaLowering/test/runtests.jl | 16 ++++--- JuliaLowering/test/utils.jl | 70 +++++++++++++++++++++++------ 8 files changed, 135 insertions(+), 68 deletions(-) diff --git a/JuliaLowering/test/branching.jl b/JuliaLowering/test/branching.jl index e5a60c6e91f69..2e8c3b60e6bf5 100644 --- a/JuliaLowering/test/branching.jl +++ b/JuliaLowering/test/branching.jl @@ -339,7 +339,7 @@ end #------------------------------------------------------------------------------- @testset "Branching IR" begin - test_ir_cases(joinpath(@__DIR__,"branching_ir.jl"), test_mod) + test_ir_cases(joinpath(@__DIR__,"branching_ir.jl")) end end diff --git a/JuliaLowering/test/branching_ir.jl b/JuliaLowering/test/branching_ir.jl index d0c6492a4e18f..a817e02aed77b 100644 --- a/JuliaLowering/test/branching_ir.jl +++ b/JuliaLowering/test/branching_ir.jl @@ -1,4 +1,18 @@ -###################################### +using JuliaLowering: JuliaLowering, @ast, @chk +using JuliaSyntax + +function var"@label"(__context__::JuliaLowering.MacroContext, ex) + @chk kind(ex) == JuliaSyntax.K"Identifier" + @ast __context__ ex ex=>JuliaSyntax.K"symbolic_label" +end + +function var"@goto"(__context__::JuliaLowering.MacroContext, ex) + @chk kind(ex) == JuliaSyntax.K"Identifier" + @ast __context__ ex ex=>JuliaSyntax.K"symbolic_goto" +end + +#******************************************************************************* +######################################## # Basic branching tail && value begin local a, b @@ -6,14 +20,14 @@ begin b end end -#------------------------- +#--------------------- 1 slot₁/a 2 (gotoifnot %₁ label₅) 3 slot₂/b 4 (return %₃) 5 (return core.nothing) -###################################### +######################################## # Branching, !tail && !value begin local a, b, c @@ -22,14 +36,14 @@ begin end c end -#------------------------- +#--------------------- 1 slot₁/a 2 (gotoifnot %₁ label₄) 3 slot₂/b 4 slot₃/c 5 (return %₄) -###################################### +######################################## # Branching with else begin local a, b, c @@ -47,7 +61,7 @@ end 5 slot₃/c 6 (return %₅) -###################################### +######################################## # Branching with else, !tail && !value begin local a, b, c, d @@ -67,7 +81,7 @@ end 6 slot₄/d 7 (return %₆) -###################################### +######################################## # Blocks compile directly to branches begin local a, b, c, d @@ -93,7 +107,7 @@ begin b @label foo end -#---------- +#--------------------- 1 TestMod.a 2 (goto label₄) 3 TestMod.b @@ -107,12 +121,12 @@ begin b @goto foo end -#---------- +#--------------------- 1 TestMod.a 2 TestMod.b 3 (goto label₂) -###################################### +######################################## # Jumping out of try and catch blocks using @goto begin try @@ -126,7 +140,7 @@ begin end @label lab end -#---------- +#--------------------- 1 (enter label₈) 2 TestMod.a 3 (leave %₁) @@ -162,7 +176,7 @@ begin end @label lab end -#---------- +#--------------------- 1 (enter label₁₄) 2 (enter label₆) 3 TestMod.a @@ -187,3 +201,4 @@ end 22 (pop_exception %₁₄) 23 (pop_exception %₁) 24 (return core.nothing) + diff --git a/JuliaLowering/test/decls_ir.jl b/JuliaLowering/test/decls_ir.jl index 70b88924c55e3..71163d07a98ab 100644 --- a/JuliaLowering/test/decls_ir.jl +++ b/JuliaLowering/test/decls_ir.jl @@ -1,7 +1,7 @@ ######################################## # Local declaration with type local x::T = 1 -#---------- +#--------------------- 1 (= slot₂/tmp 1) 2 core.isa 3 slot₂/tmp @@ -23,7 +23,7 @@ local x::T = 1 ######################################## # const const xx = 10 -#---------- +#--------------------- 1 (const TestMod.xx) 2 (= TestMod.xx 10) 3 (return 10) @@ -31,7 +31,7 @@ const xx = 10 ######################################## # Typed const const xx::T = 10 -#---------- +#--------------------- 1 core.set_binding_type! 2 TestMod.T 3 (call %₁ TestMod :xx %₂) @@ -54,7 +54,7 @@ const xx::T = 10 ######################################## # Global assignment xx = 10 -#---------- +#--------------------- 1 core.get_binding_type 2 (call %₁ TestMod :xx) 3 (= slot₁/tmp 10) @@ -73,7 +73,7 @@ xx = 10 ######################################## # Typed global assignment global xx::T = 10 -#---------- +#--------------------- 1 core.set_binding_type! 2 TestMod.T 3 (call %₁ TestMod :xx %₂) @@ -92,3 +92,4 @@ global xx::T = 10 16 slot₁/tmp 17 (= TestMod.xx %₁₆) 18 (return 10) + diff --git a/JuliaLowering/test/exceptions_ir.jl b/JuliaLowering/test/exceptions_ir.jl index 034710a9598ae..49f4a116dc482 100644 --- a/JuliaLowering/test/exceptions_ir.jl +++ b/JuliaLowering/test/exceptions_ir.jl @@ -7,7 +7,7 @@ catch g return y end -#---------- +#--------------------- 1 (enter label₆) 2 TestMod.f 3 TestMod.x @@ -27,7 +27,7 @@ catch g return 20 end -#---------- +#--------------------- 1 (enter label₅) 2 TestMod.f 3 (leave %₁) @@ -46,7 +46,7 @@ try end catch end -#---------- +#--------------------- 1 (enter label₁₄) 2 (enter label₇) 3 (leave %₁ %₂) @@ -73,7 +73,7 @@ catch return 20 end end -#---------- +#--------------------- 1 (enter label₄) 2 (leave %₁) 3 (return core.nothing) @@ -93,7 +93,7 @@ catch else c end -#---------- +#--------------------- 1 (enter label₆) 2 TestMod.a 3 (leave %₁) @@ -105,7 +105,7 @@ end ######################################## # try/catch/else, value position -begin +let z = try a catch @@ -114,18 +114,18 @@ begin c end end -#---------- +#--------------------- 1 (enter label₇) 2 TestMod.a 3 (leave %₁) 4 TestMod.c -5 (= slot₁/try_result %₄) +5 (= slot₂/try_result %₄) 6 (goto label₁₀) 7 TestMod.b -8 (= slot₁/try_result %₇) +8 (= slot₂/try_result %₇) 9 (pop_exception %₁) -10 slot₁/try_result -11 (= TestMod.z %₁₀) +10 slot₂/try_result +11 (= slot₁/z %₁₀) 12 (return %₁₀) ######################################## @@ -140,7 +140,7 @@ begin end z end -#---------- +#--------------------- 1 (enter label₆) 2 TestMod.a 3 (leave %₁) @@ -158,7 +158,7 @@ try finally b end -#---------- +#--------------------- 1 (enter label₇) 2 (= slot₁/finally_tag -1) 3 (= slot₂/returnval_via_finally TestMod.a) @@ -175,27 +175,27 @@ end ######################################## # basic try/finally, value position -begin +let z = try a finally b end end -#---------- +#--------------------- 1 (enter label₇) -2 (= slot₂/finally_tag -1) +2 (= slot₃/finally_tag -1) 3 TestMod.a -4 (= slot₁/try_result %₃) +4 (= slot₂/try_result %₃) 5 (leave %₁) 6 (goto label₈) -7 (= slot₂/finally_tag 1) +7 (= slot₃/finally_tag 1) 8 TestMod.b -9 (call core.=== slot₂/finally_tag 1) +9 (call core.=== slot₃/finally_tag 1) 10 (gotoifnot %₉ label₁₂) 11 (call top.rethrow) -12 slot₁/try_result -13 (= TestMod.z %₁₂) +12 slot₂/try_result +13 (= slot₁/z %₁₂) 14 (return %₁₂) ######################################## @@ -208,7 +208,7 @@ begin end z end -#---------- +#--------------------- 1 (enter label₆) 2 (= slot₁/finally_tag -1) 3 TestMod.a @@ -232,7 +232,7 @@ while true b end end -#---------- +#--------------------- 1 (gotoifnot true label₁₅) 2 (enter label₉) 3 (= slot₁/finally_tag -1) @@ -258,7 +258,7 @@ catch finally c end -#---------- +#--------------------- 1 (enter label₁₅) 2 (= slot₁/finally_tag -1) 3 (enter label₈) @@ -295,7 +295,7 @@ try finally d end -#---------- +#--------------------- 1 (enter label₃₀) 2 (= slot₁/finally_tag -1) 3 (enter label₁₅) @@ -344,7 +344,7 @@ try catch exc b end -#---------- +#--------------------- 1 (enter label₅) 2 TestMod.a 3 (leave %₁) @@ -353,3 +353,4 @@ end 6 TestMod.b 7 (pop_exception %₁) 8 (return %₆) + diff --git a/JuliaLowering/test/functions_ir.jl b/JuliaLowering/test/functions_ir.jl index 2a0beb766f186..5fb9189538401 100644 --- a/JuliaLowering/test/functions_ir.jl +++ b/JuliaLowering/test/functions_ir.jl @@ -6,7 +6,7 @@ function f(x)::Int end 0xff end -#---------- +#--------------------- 1 (method :f) 2 core.svec 3 core.svec @@ -36,3 +36,4 @@ end 15 slot₃/tmp 16 (return %₁₅) 13 (return %₁) + diff --git a/JuliaLowering/test/loops_ir.jl b/JuliaLowering/test/loops_ir.jl index 859b33da6e356..51eb1ea689439 100644 --- a/JuliaLowering/test/loops_ir.jl +++ b/JuliaLowering/test/loops_ir.jl @@ -4,7 +4,7 @@ while f(a) body1 body2 end -#---------- +#--------------------- 1 TestMod.f 2 TestMod.a 3 (call %₁ %₂) @@ -13,12 +13,13 @@ end 6 TestMod.body2 7 (goto label₁) 8 (return core.nothing) + ######################################## # While loop with short circuit condition while a && b body end -#---------- +#--------------------- 1 TestMod.a 2 (gotoifnot %₁ label₇) 3 TestMod.b @@ -26,6 +27,7 @@ end 5 TestMod.body 6 (goto label₁) 7 (return core.nothing) + ######################################## # While loop with with break and continue while cond @@ -35,7 +37,7 @@ while cond continue body3 end -#--------- +#--------------------- 1 TestMod.cond 2 (gotoifnot %₁ label₉) 3 TestMod.body1 @@ -45,3 +47,4 @@ end 7 TestMod.body3 8 (goto label₁) 9 (return core.nothing) + diff --git a/JuliaLowering/test/runtests.jl b/JuliaLowering/test/runtests.jl index a21a3e15d33aa..6390e24d6e741 100644 --- a/JuliaLowering/test/runtests.jl +++ b/JuliaLowering/test/runtests.jl @@ -6,15 +6,17 @@ include("utils.jl") include("syntax_graph.jl") - include("misc.jl") - include("import.jl") - include("scopes.jl") - include("functions.jl") + include("branching.jl") include("decls.jl") - include("macros.jl") - include("modules.jl") include("desugaring.jl") - include("branching.jl") + include("exceptions.jl") + include("functions.jl") + include("import.jl") include("loops.jl") + include("macros.jl") + include("misc.jl") + include("modules.jl") + include("quoting.jl") + include("scopes.jl") end diff --git a/JuliaLowering/test/utils.jl b/JuliaLowering/test/utils.jl index 1a0dfab7d11e4..9061a3bdf262a 100644 --- a/JuliaLowering/test/utils.jl +++ b/JuliaLowering/test/utils.jl @@ -105,7 +105,21 @@ function match_ir_test_case(case_str) if isnothing(m) error("Malformatted IR test case:\n$(repr(case_str))") end - (name=strip(m[1]), input=strip(m[2]), output=strip(m[3])) + (description=strip(m[1]), input=strip(m[2]), output=strip(m[3])) +end + +function read_ir_test_cases(filename) + str = read(filename, String) + parts = split(str, r"#\*+") + if length(parts) == 2 + preamble_str = strip(parts[1]) + cases_str = parts[2] + else + preamble_str = "" + cases_str = only(parts) + end + (preamble_str, + [match_ir_test_case(s) for s in split(cases_str, r"####*") if strip(s) != ""]) end function format_ir_for_test(mod, input) @@ -115,29 +129,59 @@ function format_ir_for_test(mod, input) return replace(ir, string(mod)=>"TestMod") end +function test_ir_cases(filename::AbstractString) + preamble, cases = read_ir_test_cases(filename) + test_mod = Module(:TestMod) + Base.include_string(test_mod, preamble) + for (description,input,ref) in cases + output = format_ir_for_test(test_mod, input) + @testset "$description" begin + if output != ref + # Do our own error dumping, as @test will + @error "Test \"$description\" failed" output=Text(output) ref=Text(ref) + end + @test output == ref + end + end +end + function format_ir_test_case(mod, input, description="-- Add description here --") ir = format_ir_for_test(mod, input) """ ######################################## # $description $(strip(input)) - #---------- + #--------------------- $ir """ end -function test_ir_cases(filename::AbstractString, mod=Module(:TestMod)) - str = read(filename, String) - cases = [match_ir_test_case(s) for s in split(str, r"####*") if strip(s) != ""] - - for (name,input,ref) in cases - output = format_ir_for_test(mod, input) - @testset "$name" begin - if output != ref - # Do our own error dumping, as @test will - @error "Test \"$name\" failed" output=Text(output) ref=Text(ref) +""" +Update all IR test cases in `filename` when the IR format has changed. +""" +function refresh_ir_test_cases(filename) + preamble, cases = read_ir_test_cases(filename) + test_mod = Module(:TestMod) + Base.include_string(test_mod, preamble) + open(filename, "w") do io + if !isempty(preamble) + println(io, preamble, "\n") + println(io, "#*******************************************************************************") + end + for (description,input,ref) in cases + ir = format_ir_for_test(test_mod, input) + if ir != ref + @info "Refreshing test case $(repr(description))" end - @test output == ref + println(io, + """ + ######################################## + # $description + $(strip(input)) + #--------------------- + $ir + """ + ) end end end From c8adbc611c81fc483c77fb99b694c418e1c36af3 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sun, 1 Sep 2024 16:46:22 +1000 Subject: [PATCH 0852/1109] More accurate predicates for Julia IR Make all of `is_valid_ir_argument`, `is_valid_body_ir_argument`, `is_single_assign_var`, `is_const_read_arg` more accurate portings of the flisp equivalents. Some values in the IR must be written to temporaries for the resulting code to be correct. It's not clear which invariats we're upholding here because none of these seem to be documented, but it seems important to have these be as equivalent as possible for now. Some changes are still required to these after the variable analysis pass is more accurate. Also avoid using `isdefined()` when looking up globals in Julia modules during lowering - this does import resolution and we can't allow this side effect when generating IR. Instead use the new functions `is_defined_and_owned_global` `is_defined_nothrow_global` which dip a bit into Julia internals to look up bindings and determine binding owner without having any side effects. --- JuliaLowering/src/linear_ir.jl | 78 +++++++++++-------- JuliaLowering/src/runtime.jl | 30 +++++++ JuliaLowering/src/scope_analysis.jl | 2 +- JuliaLowering/test/decls_ir.jl | 116 ++++++++++++---------------- JuliaLowering/test/demo.jl | 63 ++++++++------- JuliaLowering/test/functions_ir.jl | 45 +++++------ JuliaLowering/test/utils.jl | 5 +- 7 files changed, 184 insertions(+), 155 deletions(-) diff --git a/JuliaLowering/src/linear_ir.jl b/JuliaLowering/src/linear_ir.jl index a8cf7a24e39be..2df29b24fc504 100644 --- a/JuliaLowering/src/linear_ir.jl +++ b/JuliaLowering/src/linear_ir.jl @@ -3,21 +3,38 @@ function is_simple_atom(ctx, ex) k = kind(ex) - # TODO flisp thismodule head? + # TODO thismodule is_literal(k) || k == K"Symbol" || k == K"Value" || is_ssa(ctx, ex) || (k == K"core" && ex.name_val == "nothing") end -# N.B.: This assumes that resolve-scopes has run, so outerref is equivalent to -# a global in the current scope. +# This assumes that resolve-scopes has run, so outerref is equivalent to a +# global in the current scope. function is_valid_ir_argument(ctx, ex) k = kind(ex) - return is_simple_atom(ctx, ex) - # FIXME || - #(k == K"outerref" && nothrow_julia_global(ex[1])) || - #(k == K"globalref" && nothrow_julia_global(ex)) || - #(k == K"quote" || k = K"inert" || k == K"top" || - #k == K"core" || k == K"slot" || k = K"static_parameter") + if is_simple_atom(ctx, ex) || k == K"inert" || k == K"top" || k == K"core" + true + elseif k == K"BindingId" + binfo = lookup_binding(ctx, ex) + bk = binfo.kind + # TODO: Can we allow bk == :local || bk == :argument || bk == :static_parameter ??? + # Why does flisp seem to allow (slot) and (static_parameter), but these + # aren't yet converted to by existing lowering?? + if bk == :global + # Globals are nothrow when they are defined - we assume a previously + # defined global can never be set to undefined. (TODO: This could be + # broken when precompiling a module `B` in the presence of a badly + # behaved module `A`, which inconsistently defines globals during + # `A.__init__()`??) + # + # TODO (k == K"outerref" && nothrow_julia_global(ex[1])) + is_defined_nothrow_global(binfo.mod, Symbol(binfo.name)) + else + false + end + else + false + end end function is_ssa(ctx, ex) @@ -93,22 +110,17 @@ function LinearIRContext(ctx, is_toplevel_thunk, lambda_locals, return_type) Vector{JumpOrigin{GraphType}}(), ctx.mod) end -# FIXME: BindingId subsumes many things so need to assess what that means for these predicates. -# BindingId can be -# - local variable (previously K"Identifier") -# - implicit global variables in current module (previously K"Identifier") -# - globalref - from macros -# -# BindingId could also subsume -# - top,core - function is_valid_body_ir_argument(ctx, ex) - is_valid_ir_argument(ctx, ex) && return true - return false - # FIXME - k = kind(ex) - return k == K"BindingId" && # Arguments are always defined slots - TODO("vinfo-table stuff") + if is_valid_ir_argument(ctx, ex) + true + elseif kind(ex) == K"BindingId" + binfo = lookup_binding(ctx, ex) + # Arguments are always defined + # TODO: use equiv of vinfo:never-undef when we have it + binfo.kind == :argument + else + false + end end function is_simple_arg(ctx, ex) @@ -118,16 +130,20 @@ function is_simple_arg(ctx, ex) end function is_single_assign_var(ctx::LinearIRContext, ex) - return false # FIXME - id = ex.var_id - # return id in ctx.lambda_args || + kind(ex) == K"BindingId" || return false + binfo = lookup_binding(ctx, ex) + # Arguments are always single-assign + # TODO: Use equiv of vinfo:sa when we have it + return binfo.kind == :argument end function is_const_read_arg(ctx, ex) k = kind(ex) - return is_simple_atom(ctx, ex) || - is_single_assign_var(ctx, ex) || - k == K"quote" || k == K"inert" || k == K"top" || k == K"core" + # Even if we have side effects, we know that singly-assigned + # locals cannot be affected by them so we can inline them anyway. + # TODO from flisp: "We could also allow const globals here" + return k == K"inert" || k == K"top" || k == K"core" || + is_simple_atom(ctx, ex) || is_single_assign_var(ctx, ex) end function is_valid_ir_rvalue(ctx, lhs, rhs) @@ -140,7 +156,7 @@ end # evaluate the arguments of a call, creating temporary locations as needed function compile_args(ctx, args) - # First check if all the arguments as simple (and therefore side-effect free). + # First check if all the arguments are simple (and therefore side-effect free). # Otherwise, we need to use ssa values for all arguments to ensure proper # left-to-right evaluation semantics. all_simple = all(a->is_simple_arg(ctx, a), args) diff --git a/JuliaLowering/src/runtime.jl b/JuliaLowering/src/runtime.jl index 74dc216c164c6..dab1c4346e21c 100644 --- a/JuliaLowering/src/runtime.jl +++ b/JuliaLowering/src/runtime.jl @@ -181,3 +181,33 @@ function bind_docs!(f::Function, docstr, method_metadata) Docs.doc!(mod, bind, Base.Docs.docstr(docstr, metadata), arg_sig) end +#------------------------------------------------------------------------------- +# The following functions are used by lowering to inspect Julia's state. + +# Get the binding for `name` if one is already resolved in module `mod`. Note +# that we cannot use `isdefined(::Module, ::Symbol)` here, because that causes +# binding resolution which is a massive side effect we must avoid in lowering. +function _get_module_binding(mod, name) + b = @ccall jl_get_module_binding(mod::Module, name::Symbol, 0::Cint)::Ptr{Core.Binding} + b == C_NULL ? nothing : unsafe_pointer_to_objref(b) +end + +# Return true if a `name` is defined in and *by* the module `mod`. +# Has no side effects, unlike isdefined() +# +# (This should do what fl_defined_julia_global does for flisp lowering) +function is_defined_and_owned_global(mod, name) + b = _get_module_binding(mod, name) + !isnothing(b) && isdefined(b, :owner) && b.owner === b +end + +# Return true if `name` is defined in `mod`, the sense that accessing it is nothrow. +# Has no side effects, unlike isdefined() +# +# (This should do what fl_nothrow_julia_global does for flisp lowering) +function is_defined_nothrow_global(mod, name) + b = _get_module_binding(mod, name) + !isnothing(b) && isdefined(b, :owner) || return false + isdefined(b.owner, :value) +end + diff --git a/JuliaLowering/src/scope_analysis.jl b/JuliaLowering/src/scope_analysis.jl index ac5fbb438b111..c76bf13cc00fd 100644 --- a/JuliaLowering/src/scope_analysis.jl +++ b/JuliaLowering/src/scope_analysis.jl @@ -292,7 +292,7 @@ function analyze_scope(ctx, ex, scope_type, lambda_info) # In a top level thunk but *inside* a nontrivial scope layer = ctx.scope_layers[varkey.layer] if !layer.is_macro_expansion && (varkey in ctx.implicit_toplevel_globals || - isdefined(layer.mod, Symbol(varkey.name))) + is_defined_and_owned_global(layer.mod, Symbol(varkey.name))) # Special scope rules to make assignments to globals work # like assignments to locals do inside a function. if is_soft_scope diff --git a/JuliaLowering/test/decls_ir.jl b/JuliaLowering/test/decls_ir.jl index 71163d07a98ab..74bb4d733e2bc 100644 --- a/JuliaLowering/test/decls_ir.jl +++ b/JuliaLowering/test/decls_ir.jl @@ -3,22 +3,19 @@ local x::T = 1 #--------------------- 1 (= slot₂/tmp 1) -2 core.isa -3 slot₂/tmp -4 TestMod.T -5 (call %₂ %₃ %₄) -6 (gotoifnot %₅ label₈) -7 (goto label₁₅) -8 core.typeassert -9 top.convert +2 slot₂/tmp +3 TestMod.T +4 (call core.isa %₂ %₃) +5 (gotoifnot %₄ label₇) +6 (goto label₁₂) +7 TestMod.T +8 slot₂/tmp +9 (call top.convert %₇ %₈) 10 TestMod.T -11 slot₂/tmp -12 (call %₉ %₁₀ %₁₁) -13 TestMod.T -14 (= slot₂/tmp (call %₈ %₁₂ %₁₃)) -15 slot₂/tmp -16 (= slot₁/x %₁₅) -17 (return 1) +11 (= slot₂/tmp (call core.typeassert %₉ %₁₀)) +12 slot₂/tmp +13 (= slot₁/x %₁₂) +14 (return 1) ######################################## # const @@ -32,64 +29,53 @@ const xx = 10 # Typed const const xx::T = 10 #--------------------- -1 core.set_binding_type! -2 TestMod.T -3 (call %₁ TestMod :xx %₂) -4 (const TestMod.xx) -5 core.get_binding_type -6 (call %₅ TestMod :xx) -7 (= slot₁/tmp 10) -8 core.isa -9 slot₁/tmp -10 (call %₈ %₉ %₆) -11 (gotoifnot %₁₀ label₁₃) -12 (goto label₁₆) -13 top.convert -14 slot₁/tmp -15 (= slot₁/tmp (call %₁₃ %₆ %₁₄)) -16 slot₁/tmp -17 (= TestMod.xx %₁₆) -18 (return 10) +1 TestMod.T +2 (call core.set_binding_type! TestMod :xx %₁) +3 (const TestMod.xx) +4 (call core.get_binding_type TestMod :xx) +5 (= slot₁/tmp 10) +6 slot₁/tmp +7 (call core.isa %₆ %₄) +8 (gotoifnot %₇ label₁₀) +9 (goto label₁₂) +10 slot₁/tmp +11 (= slot₁/tmp (call top.convert %₄ %₁₀)) +12 slot₁/tmp +13 (= TestMod.xx %₁₂) +14 (return 10) ######################################## # Global assignment xx = 10 #--------------------- -1 core.get_binding_type -2 (call %₁ TestMod :xx) -3 (= slot₁/tmp 10) -4 core.isa -5 slot₁/tmp -6 (call %₄ %₅ %₂) -7 (gotoifnot %₆ label₉) -8 (goto label₁₂) -9 top.convert -10 slot₁/tmp -11 (= slot₁/tmp (call %₉ %₂ %₁₀)) -12 slot₁/tmp -13 (= TestMod.xx %₁₂) -14 (return 10) +1 (call core.get_binding_type TestMod :xx) +2 (= slot₁/tmp 10) +3 slot₁/tmp +4 (call core.isa %₃ %₁) +5 (gotoifnot %₄ label₇) +6 (goto label₉) +7 slot₁/tmp +8 (= slot₁/tmp (call top.convert %₁ %₇)) +9 slot₁/tmp +10 (= TestMod.xx %₉) +11 (return 10) ######################################## # Typed global assignment global xx::T = 10 #--------------------- -1 core.set_binding_type! -2 TestMod.T -3 (call %₁ TestMod :xx %₂) -4 (global TestMod.xx) -5 core.get_binding_type -6 (call %₅ TestMod :xx) -7 (= slot₁/tmp 10) -8 core.isa -9 slot₁/tmp -10 (call %₈ %₉ %₆) -11 (gotoifnot %₁₀ label₁₃) -12 (goto label₁₆) -13 top.convert -14 slot₁/tmp -15 (= slot₁/tmp (call %₁₃ %₆ %₁₄)) -16 slot₁/tmp -17 (= TestMod.xx %₁₆) -18 (return 10) +1 TestMod.T +2 (call core.set_binding_type! TestMod :xx %₁) +3 (global TestMod.xx) +4 (call core.get_binding_type TestMod :xx) +5 (= slot₁/tmp 10) +6 slot₁/tmp +7 (call core.isa %₆ %₄) +8 (gotoifnot %₇ label₁₀) +9 (goto label₁₂) +10 slot₁/tmp +11 (= slot₁/tmp (call top.convert %₄ %₁₀)) +12 slot₁/tmp +13 (= TestMod.xx %₁₂) +14 (return 10) diff --git a/JuliaLowering/test/demo.jl b/JuliaLowering/test/demo.jl index f46ad3135264a..f9f509f11e35b 100644 --- a/JuliaLowering/test/demo.jl +++ b/JuliaLowering/test/demo.jl @@ -104,34 +104,34 @@ begin end """ -JuliaLowering.include(Main, "demo_include.jl") - -Base.eval(M, quote - function var"@inert"(__context__::JuliaLowering.MacroContext, ex) - @chk kind(ex) == JuliaSyntax.K"quote" - @ast __context__ ex [JuliaSyntax.K"inert" ex] - end - - function var"@label"(__context__::JuliaLowering.MacroContext, ex) - @chk kind(ex) == JuliaSyntax.K"Identifier" - @ast __context__ ex ex=>JuliaSyntax.K"symbolic_label" - end - - function var"@goto"(__context__::JuliaLowering.MacroContext, ex) - @chk kind(ex) == JuliaSyntax.K"Identifier" - @ast __context__ ex ex=>JuliaSyntax.K"symbolic_goto" - end -end) - -JuliaLowering.include_string(M, """ -xx = "xx in M" -macro test_inert_quote() - println(xx) - @inert quote - (\$xx, xx) - end -end -""") +# JuliaLowering.include(Main, "demo_include.jl") +# +# Base.eval(M, quote +# function var"@inert"(__context__::JuliaLowering.MacroContext, ex) +# @chk kind(ex) == JuliaSyntax.K"quote" +# @ast __context__ ex [JuliaSyntax.K"inert" ex] +# end +# +# function var"@label"(__context__::JuliaLowering.MacroContext, ex) +# @chk kind(ex) == JuliaSyntax.K"Identifier" +# @ast __context__ ex ex=>JuliaSyntax.K"symbolic_label" +# end +# +# function var"@goto"(__context__::JuliaLowering.MacroContext, ex) +# @chk kind(ex) == JuliaSyntax.K"Identifier" +# @ast __context__ ex ex=>JuliaSyntax.K"symbolic_goto" +# end +# end) +# +# JuliaLowering.include_string(M, """ +# xx = "xx in M" +# macro test_inert_quote() +# println(xx) +# @inert quote +# (\$xx, xx) +# end +# end +# """) function wrapscope(ex, scope_type) makenode(ex, ex, K"scope_block", ex; scope_type=scope_type) @@ -411,8 +411,11 @@ end """ src = """ -function f() - global some_sym::Int = 1 +begin + local a, b + if a + b + end end """ diff --git a/JuliaLowering/test/functions_ir.jl b/JuliaLowering/test/functions_ir.jl index 5fb9189538401..3886defd9eda8 100644 --- a/JuliaLowering/test/functions_ir.jl +++ b/JuliaLowering/test/functions_ir.jl @@ -8,32 +8,23 @@ function f(x)::Int end #--------------------- 1 (method :f) -2 core.svec -3 core.svec -4 core.Typeof -5 TestMod.f -6 (call %₄ %₅) -7 core.Any -8 (call %₃ %₆ %₇) -9 core.svec -10 (call %₉) -11 (call %₂ %₈ %₁₀ :($(QuoteNode(:(#= line 1 =#))))) -12 --- method :f %₁₁ +2 TestMod.f +3 (call core.Typeof %₂) +4 (call core.svec %₃ core.Any) +5 (call core.svec) +6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 1 =#))))) +7 --- method :f %₆ 1 TestMod.Int - 2 slot₂/x - 3 (gotoifnot %₂ label₄) - 4 (= slot₃/tmp 0xff) - 5 core.isa - 6 slot₃/tmp - 7 (call %₅ %₆ %₁) - 8 (gotoifnot %₇ label₁₀) - 9 (goto label₁₅) - 10 core.typeassert - 11 top.convert - 12 slot₃/tmp - 13 (call %₁₁ %₁ %₁₂) - 14 (= slot₃/tmp (call %₁₀ %₁₃ %₁)) - 15 slot₃/tmp - 16 (return %₁₅) -13 (return %₁) + 2 (gotoifnot slot₂/x label₃) + 3 (= slot₃/tmp 0xff) + 4 slot₃/tmp + 5 (call core.isa %₄ %₁) + 6 (gotoifnot %₅ label₈) + 7 (goto label₁₁) + 8 slot₃/tmp + 9 (call top.convert %₁ %₈) + 10 (= slot₃/tmp (call core.typeassert %₉ %₁)) + 11 slot₃/tmp + 12 (return %₁₁) +8 (return %₁) diff --git a/JuliaLowering/test/utils.jl b/JuliaLowering/test/utils.jl index 9061a3bdf262a..0269161dd5b07 100644 --- a/JuliaLowering/test/utils.jl +++ b/JuliaLowering/test/utils.jl @@ -171,7 +171,7 @@ function refresh_ir_test_cases(filename) for (description,input,ref) in cases ir = format_ir_for_test(test_mod, input) if ir != ref - @info "Refreshing test case $(repr(description))" + @info "Refreshing test case $(repr(description)) in $filename" end println(io, """ @@ -186,3 +186,6 @@ function refresh_ir_test_cases(filename) end end +function refresh_all_ir_test_cases(test_dir=".") + foreach(refresh_ir_test_cases, filter(fn->endswith(fn, "ir.jl"), readdir(test_dir, join=true))) +end From 4dc5383653e9f41caa0c48053a71b0997221e97b Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Mon, 2 Sep 2024 13:45:56 +1000 Subject: [PATCH 0853/1109] Write test update at end of test tool in case of crashes This should preven uncommitted tests from accidentally being deleted if the dev code crashes. --- JuliaLowering/test/utils.jl | 39 +++++++++++++++++++------------------ 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/JuliaLowering/test/utils.jl b/JuliaLowering/test/utils.jl index 0269161dd5b07..b3d21b98c2f72 100644 --- a/JuliaLowering/test/utils.jl +++ b/JuliaLowering/test/utils.jl @@ -163,27 +163,28 @@ function refresh_ir_test_cases(filename) preamble, cases = read_ir_test_cases(filename) test_mod = Module(:TestMod) Base.include_string(test_mod, preamble) - open(filename, "w") do io - if !isempty(preamble) - println(io, preamble, "\n") - println(io, "#*******************************************************************************") - end - for (description,input,ref) in cases - ir = format_ir_for_test(test_mod, input) - if ir != ref - @info "Refreshing test case $(repr(description)) in $filename" - end - println(io, - """ - ######################################## - # $description - $(strip(input)) - #--------------------- - $ir - """ - ) + io = IOBuffer() + if !isempty(preamble) + println(io, preamble, "\n") + println(io, "#*******************************************************************************") + end + for (description,input,ref) in cases + ir = format_ir_for_test(test_mod, input) + if ir != ref + @info "Refreshing test case $(repr(description)) in $filename" end + println(io, + """ + ######################################## + # $description + $(strip(input)) + #--------------------- + $ir + """ + ) end + # Write only at the end to ensure we don't write rubbish if we crash! + write(filename, take!(io)) end function refresh_all_ir_test_cases(test_dir=".") From 8ae72984445a37b512cb0bb5d3ca37fe1a08690c Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Tue, 10 Sep 2024 17:19:29 +1000 Subject: [PATCH 0854/1109] Desugaring of general assignment syntax Expand general assignment syntax, including * UnionAll definitions * Chained assignments * Setting of structure fields * Destructuring * Typed variable declarations Still TODO * Eliminiating tuples in case sides match * Assignments to array elements --- JuliaLowering/src/JuliaLowering.jl | 2 +- JuliaLowering/src/ast.jl | 23 +- JuliaLowering/src/closure_conversion.jl | 7 + JuliaLowering/src/desugaring.jl | 378 ++++++++++++++++++++++-- JuliaLowering/src/kinds.jl | 3 +- JuliaLowering/src/scope_analysis.jl | 7 + JuliaLowering/src/syntax_graph.jl | 5 + JuliaLowering/test/assignments.jl | 97 ++++++ JuliaLowering/test/assignments_ir.jl | 112 +++++++ JuliaLowering/test/decls.jl | 14 +- JuliaLowering/test/decls_ir.jl | 9 + JuliaLowering/test/demo.jl | 51 ++++ JuliaLowering/test/destructuring.jl | 115 +++++++ JuliaLowering/test/destructuring_ir.jl | 134 +++++++++ JuliaLowering/test/loops_ir.jl | 24 ++ JuliaLowering/test/utils.jl | 1 + 16 files changed, 944 insertions(+), 38 deletions(-) create mode 100644 JuliaLowering/test/assignments.jl create mode 100644 JuliaLowering/test/assignments_ir.jl create mode 100644 JuliaLowering/test/destructuring.jl create mode 100644 JuliaLowering/test/destructuring_ir.jl diff --git a/JuliaLowering/src/JuliaLowering.jl b/JuliaLowering/src/JuliaLowering.jl index af69aeb703119..0b64588edaa88 100644 --- a/JuliaLowering/src/JuliaLowering.jl +++ b/JuliaLowering/src/JuliaLowering.jl @@ -13,7 +13,7 @@ using JuliaSyntax: highlight, Kind, @KSet_str using JuliaSyntax: is_leaf, children, numchildren, head, kind, flags, has_flags using JuliaSyntax: filename, first_byte, last_byte, byte_range, sourcefile, source_location, span, sourcetext -using JuliaSyntax: is_literal, is_number, is_operator, is_prec_assignment, is_infix_op_call, is_postfix_op_call, is_error +using JuliaSyntax: is_literal, is_number, is_operator, is_prec_assignment, is_prefix_call, is_infix_op_call, is_postfix_op_call, is_error _include("kinds.jl") _register_kinds() diff --git a/JuliaLowering/src/ast.jl b/JuliaLowering/src/ast.jl index 11f7cf57214f9..8279ebec9389a 100644 --- a/JuliaLowering/src/ast.jl +++ b/JuliaLowering/src/ast.jl @@ -253,6 +253,11 @@ function assign_tmp(ctx::AbstractLoweringContext, ex, name="tmp") var, assign_var end +function emit_assign_tmp(stmts::SyntaxList, ctx, ex, name="tmp") + var = ssavar(ctx, ex, name) + push!(stmts, makenode(ctx, ex, K"=", var, ex)) + var +end #------------------------------------------------------------------------------- # @ast macro @@ -513,6 +518,14 @@ function is_function_def(ex) return k == K"function" || k == K"->" end +function has_parameters(ex) + numchildren(ex) >= 1 && kind(ex[end]) == K"parameters" +end + +function any_assignment(exs) + any(kind(e) == K"=" for e in exs) +end + function is_valid_name(ex) n = identifier_name(ex).name_val n !== "ccall" && n !== "cglobal" @@ -535,13 +548,3 @@ function remove_empty_parameters(args) args[1:i] end -# given a complex assignment LHS, return the symbol that will ultimately be assigned to -function assigned_name(ex) - k = kind(ex) - if (k == K"call" || k == K"curly" || k == K"where") || (k == K"::" && is_eventually_call(ex)) - assigned_name(ex[1]) - else - ex - end -end - diff --git a/JuliaLowering/src/closure_conversion.jl b/JuliaLowering/src/closure_conversion.jl index 55ba168a57650..f3d3512cbcef1 100644 --- a/JuliaLowering/src/closure_conversion.jl +++ b/JuliaLowering/src/closure_conversion.jl @@ -119,6 +119,7 @@ function convert_assignment(ctx, ex) else @ast ctx ex [K"block" [K"=" tmp_rhs0 rhs0] + assgn tmp_rhs0 ] end @@ -152,6 +153,12 @@ function _convert_closures(ctx::ClosureConversionCtx, ex) _convert_closures(ctx, ex[2]) ] end + elseif k == K"::" + _convert_closures(ctx, + @ast ctx ex [K"call" + "typeassert"::K"core" + children(ex)... + ]) elseif k == K"lambda" ctx2 = ClosureConversionCtx(ctx.graph, ctx.bindings, ctx.mod, ex.lambda_locals) mapchildren(e->_convert_closures(ctx2, e), ctx2, ex) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 2a50929991959..2a50fa1d1fd2a 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -31,10 +31,19 @@ function is_identifier_like(ex) k == K"Identifier" || k == K"BindingId" || k == K"Placeholder" end -is_assignment(ex) = kind(ex) == K"=" +# Identify some expressions that are safe to repeat +function is_effect_free(ex) + k = kind(ex) + is_literal(k) || is_identifier_like(ex) || k == K"Symbol" || + k == K"inert" || k == K"top" || k == K"core" || + (k == K"." && numchildren(ex) == 2 && is_identifier_like(ex[1])) # `a.b` with simple `a` + # TODO: metas +end -function has_parameters(ex) - numchildren(ex) >= 1 && kind(ex[end]) == K"parameters" +# Convert things like `(x,y,z) = (a,b,c)` to assignments, eliminating the +# tuple. Includes support for slurping/splatting. +function tuple_to_assignments(ctx, srcref, lhss, rhs) + TODO(srcref, "tuple-eliminating destructuring") end # Create an assignment `$lhs = $rhs` where `lhs` must be "simple". If `rhs` is @@ -42,44 +51,371 @@ end # more expressions at top level. `rhs` should already be expanded. # # flisp: sink-assignment -function simple_assignment(ctx, assign_srcref, lhs, rhs) +function sink_assignment(ctx, srcref, lhs, rhs) @assert is_identifier_like(lhs) if kind(rhs) == K"block" - @ast ctx assign_srcref [K"block" + @ast ctx srcref [K"block" rhs[1:end-1]... [K"=" lhs rhs[end]] ] else - @ast ctx assign_srcref [K"=" lhs rhs] + @ast ctx srcref [K"=" lhs rhs] + end +end + +function _tuple_sides_match(lhs, rhs) + N = max(length(lhs), length(rhs)) + for i = 1:N+1 + if i > length(lhs) + # (x, y) = (a, b) # match + # (x,) = (a, b) # no match + return i > length(rhs) + elseif kind(lhs[i]) == K"..." + # (x, ys..., z) = (a, b) # match + # (x, ys...) = (a,) # match + return true + elseif i > length(rhs) + # (x, y) = (a,) # no match + # (x, y, zs...) = (a,) # no match + return false + elseif kind(rhs[i]) == K"..." + # (x, y) = (as...,) # match + # (x, y, z) = (a, bs...) # match + # (x, y) = (as..., b) # no match + return i == length(rhs) + end + end +end + +function _in_assignment_lhs(lhss, x_rhs) + for e in lhss + x = kind(e) == K"..." ? e[1] : e + if kind(x_rhs) == K"Identifier" && kind(x) == K"Identifier" + if x_rhs.name_val == x.name_val + return true + end + elseif kind(x_rhs) == K"BindingId" && kind(x) == K"BindingId" + if x_rhs.var_id == x.var_id + return true + end + end + end + return false +end + +# Lower `(lhss...) = rhs` in contexts where `rhs` must be a tuple at runtime +# by assuming that `getfield(rhs, i)` works and is efficient. +function lower_tuple_assignment(ctx, assignment_srcref, lhss, rhs) + stmts = SyntaxList(ctx) + tmp = emit_assign_tmp(stmts, ctx, rhs, "rhs_tmp") + for (i, lh) in enumerate(lhss) + push!(stmts, @ast ctx assignment_srcref [K"=" + lh + [K"call" "getfield"::K"core" tmp i::K"Integer"] + ]) + end + makenode(ctx, assignment_srcref, K"block", stmts) +end + +# Implement destructuring with `lhs` a tuple expression (possibly with +# slurping) and `rhs` a general expression. +# +# Destructuring in this context is done via the iteration interface, though +# calls `Base.indexed_iterate()` to allow for a fast path in cases where the +# right hand side is directly indexable. +function _destructure(ctx, assignment_srcref, stmts, lhs, rhs) + n_lhs = numchildren(lhs) + if n_lhs > 0 + iterstate = new_mutable_var(ctx, rhs, "iterstate") + end + + end_stmts = SyntaxList(ctx) + + i = 0 + for lh in children(lhs) + i += 1 + if kind(lh) == K"..." + lh1 = if is_identifier_like(lh[1]) + lh[1] + else + lhs_tmp = ssavar(ctx, lh[1], "lhs_tmp") + push!(end_stmts, expand_forms_2(ctx, @ast ctx lh[1] [K"=" lh[1] lhs_tmp])) + lhs_tmp + end + if i == n_lhs + # Slurping as last lhs, eg, for `zs` in + # (x, y, zs...) = rhs + if kind(lh1) != K"Placeholder" + push!(stmts, expand_forms_2(ctx, + @ast ctx assignment_srcref [K"=" + lh1 + [K"call" + "rest"::K"top" + rhs + if i > 1 + iterstate + end + ] + ] + )) + end + else + # Slurping before last lhs. Eg, for `xs` in + # (xs..., y, z) = rhs + # For this we call + # (xs, tail) = Base.split_rest(...) + # then continue iteration with `tail` as new rhs. + tail = ssavar(ctx, lh, "tail") + push!(stmts, + expand_forms_2(ctx, + lower_tuple_assignment(ctx, + assignment_srcref, + (lh1, tail), + @ast ctx assignment_srcref [K"call" + "split_rest"::K"top" + rhs + (n_lhs - i)::K"Integer" + if i > 1 + iterstate + end + ] + ) + ) + ) + rhs = tail + n_lhs = n_lhs - i + i = 0 + end + else + # Normal case, eg, for `y` in + # (x, y, z) = rhs + lh1 = if is_identifier_like(lh) + lh + # elseif is_eventually_call(lh) (TODO??) + else + lhs_tmp = ssavar(ctx, lh, "lhs_tmp") + push!(end_stmts, expand_forms_2(ctx, @ast ctx lh [K"=" lh lhs_tmp])) + lhs_tmp + end + push!(stmts, + expand_forms_2(ctx, + lower_tuple_assignment(ctx, + assignment_srcref, + i == n_lhs ? (lh1,) : (lh1, iterstate), + @ast ctx assignment_srcref [K"call" + "indexed_iterate"::K"top" + rhs + i::K"Integer" + if i > 1 + iterstate + end + ] + ) + ) + ) + end end + # Actual assignments must happen after the whole iterator is desctructured + # (https://github.com/JuliaLang/julia/issues/40574) + append!(stmts, end_stmts) + stmts end +# Expands all cases of general tuple destructuring function expand_tuple_destruct(ctx, ex) lhs = ex[1] @assert kind(lhs) == K"tuple" - rhs = expand_forms_2(ctx, ex[2]) + rhs = ex[2] - # FIXME: This is specialized to only the form produced by lowering of `for`. - @assert numchildren(lhs) == 2 && all(is_identifier_like, children(lhs)) - @ast ctx ex [K"block" - r = rhs - [K"=" lhs[1] [K"call" "getindex"::K"top" r 1::K"Integer"]] - [K"=" lhs[2] [K"call" "getindex"::K"top" r 2::K"Integer"]] - ] + num_slurp = 0 + for lh in children(lhs) + num_slurp += (kind(lh) == K"...") + if num_slurp > 1 + throw(LoweringError(lh, "multiple `...` in destructuring assignment are ambiguous")) + end + end + + if kind(rhs) == K"tuple" && !any_assignment(children(rhs)) && + !has_parameters(rhs) && _tuple_sides_match(children(lhs), children(rhs)) + return expand_forms_2(ctx, tuple_to_assignments(ctx, ex)) + end + + stmts = SyntaxList(ctx) + rhs1 = if is_ssa(ctx, rhs) || (is_identifier_like(rhs) && + !_in_assignment_lhs(children(lhs), rhs)) + rhs + else + emit_assign_tmp(stmts, ctx, expand_forms_2(ctx, rhs)) + end + _destructure(ctx, ex, stmts, lhs, rhs1) + push!(stmts, @ast ctx rhs1 [K"unnecessary" rhs1]) + makenode(ctx, ex, K"block", stmts) +end + +function _arg_to_temp(ctx, stmts, ex, eq_is_kw=false) + k = kind(ex) + if is_effect_free(ex) + ex + elseif k == K"..." + @ast ctx ex [k _arg_to_temp(ctx, stmts, ex[1])] + elseif k == K"=" && eq_is_kw + @ast ctx ex [K"=" ex[1] _arg_to_temp(ex[2])] + else + emit_assign_tmp(stmts, ctx, ex) + end +end + +# Make the *arguments* of an expression safe for multiple evaluation, for +# example +# +# a[f(x)] => (temp=f(x); a[temp]) +# +# Any assignments are added to `stmts` and a result expression returned which +# may be used in further desugaring. +function remove_argument_side_effects(ctx, stmts, ex) + if is_literal(ex) || is_identifier_like(ex) + ex + else + k = kind(ex) + if k == K"let" + emit_assign_tmp(stmts, ctx, ex) + else + args = SyntaxList(ctx) + eq_is_kw = ((k == K"call" || k == K"dotcall") && is_prefix_call(ex)) || k == K"ref" + for (i,e) in enumerate(children(ex)) + push!(args, _arg_to_temp(ctx, stmts, e, eq_is_kw && i > 1)) + end + # TODO: Copy attributes? + @ast ctx ex [k args...] + end + end end +# Expand general assignment syntax, including +# * UnionAll definitions +# * Chained assignments +# * Setting of structure fields +# * Assignments to array elements +# * Destructuring +# * Typed variable declarations function expand_assignment(ctx, ex) @chk numchildren(ex) == 2 lhs = ex[1] rhs = ex[2] kl = kind(lhs) - if is_identifier_like(lhs) - simple_assignment(ctx, ex, lhs, expand_forms_2(ctx, rhs)) + if kl == K"curly" + # Expand UnionAll definitions + if numchildren(lhs) <= 1 + throw(LoweringError(lhs, "empty type parameter list in type alias")) + end + name = lhs[1] + unionall_def = @ast ctx ex [K"=" + name + [K"where" ex[2] lhs[2:end]...] + ] + @ast ctx ex [K"block" + [K"const_if_global" name] + expand_forms_2(ctx, unionall_def) + ] + elseif kind(rhs) == K"=" + # Expand chains of assignments + # a = b = c ==> b=c; a=c + stmts = SyntaxList(ctx) + push!(stmts, lhs) + while kind(rhs) == K"=" + push!(stmts, rhs[1]) + rhs = rhs[2] + end + if is_identifier_like(rhs) + tmp_rhs = nothing + rr = rhs + else + tmp_rhs = ssavar(ctx, rhs, "rhs") + rr = tmp_rhs + end + for i in 1:length(stmts) + stmts[i] = @ast ctx ex [K"=" stmts[i] rr] + end + if !isnothing(tmp_rhs) + pushfirst!(stmts, @ast ctx ex [K"=" tmp_rhs rhs]) + end + expand_forms_2(ctx, + @ast ctx ex [K"block" + stmts... + [K"unnecessary" rr] + ] + ) + elseif is_identifier_like(lhs) + sink_assignment(ctx, ex, lhs, expand_forms_2(ctx, rhs)) + elseif kl == K"." + # a.b = rhs ==> setproperty!(a, :b, rhs) + @chk numchildren(lhs) == 2 + a = lhs[1] + b = lhs[2] + stmts = SyntaxList(ctx) + if !is_identifier_like(a) + a = emit_assign_tmp(stmts, ctx, expand_forms_2(ctx, a), "a_tmp") + end + if kind(b) == K"Identifier" + b = @ast ctx b b=>K"Symbol" + else + b = emit_assign_tmp(stmts, ctx, expand_forms_2(ctx, b), "b_tmp") + end + if !is_identifier_like(rhs) && !is_literal(rhs) + rhs = emit_assign_tmp(stmts, ctx, expand_forms_2(ctx, rhs), "rhs_tmp") + end + @ast ctx ex [K"block" + stmts... + [K"call" "setproperty!"::K"top" a b rhs] + [K"unnecessary" rhs] + ] elseif kl == K"tuple" # TODO: has_parameters - expand_tuple_destruct(ctx, ex) + if has_parameters(lhs) + TODO(lhs, "Destructuring with named fields") + else + expand_tuple_destruct(ctx, ex) + end + elseif kl == K"ref" + # a[i1, i2] = rhs + TODO(lhs) + elseif kl == K"::" && numchildren(lhs) == 2 + x = lhs[1] + T = lhs[2] + res = if is_identifier_like(x) + # Identifer in lhs[1] is a variable type declaration, eg + # x::T = rhs + @ast ctx ex [K"block" + [K"decl" lhs[1] lhs[2]] + [K"=" lhs[1] rhs] + ] + else + # Otherwise just a type assertion, eg + # a[i]::T = rhs ==> (a[i]::T; a[i] = rhs) + # a[f(x)]::T = rhs ==> (tmp = f(x); a[tmp]::T; a[tmp] = rhs) + stmts = SyntaxList(ctx) + l1 = remove_argument_side_effects(ctx, stmts, lhs[1]) + # TODO: What about (f(z),y)::T = rhs? That's broken syntax and + # needs to be detected somewhere but won't be detected here. Maybe + # it shows that remove_argument_side_effects() is not the ideal + # solution here? + @ast ctx ex [K"block" + stmts... + [K"::" l1 lhs[2]] + [K"=" l1 rhs] + ] + end + expand_forms_2(ctx, res) + elseif kl == K"dotcall" + throw(LoweringError(lhs, "invalid dot call syntax on left hand side of assignment")) + elseif kl == K"typed_hcat" + throw(LoweringError(lhs, "invalid spacing in left side of indexed assignment")) + elseif kl == K"typed_vcat" || kl == K"typed_ncat" + throw(LoweringError(lhs, "unexpected `;` in left side of indexed assignment")) + elseif kl == K"vect" || kl == K"hcat" || kl == K"vcat" || kl == K"ncat" + throw(LoweringError(lhs, "use `(a, b) = ...` to assign multiple values")) else - TODO(ex) + throw(LoweringError(lhs, "invalid assignment location")) end end @@ -288,7 +624,7 @@ function expand_for(ctx, ex) # Assign iteration vars and next state body = @ast ctx iterspec [K"block" lhs_local_defs... - [K"=" [K"tuple" lhs state] next] + lower_tuple_assignment(ctx, iterspec, (lhs, state), next) loop ] @@ -444,7 +780,9 @@ function strip_decls!(ctx, stmts, declkind, declkind2, ex) end end -# local x, (y=2), z => local x; local y; y = 2; local z +# local x, (y=2), z ==> local x; local y; y = 2; local z +# const x = 1 ==> const x; x = 1 +# global x::T = 1 ==> (block (global x) (decl x T) (x = 1)) function expand_decls(ctx, ex) declkind = kind(ex) if numchildren(ex) == 1 && kind(ex[1]) ∈ KSet"const global local" diff --git a/JuliaLowering/src/kinds.jl b/JuliaLowering/src/kinds.jl index 426ef21001154..2a4e6b1bc3191 100644 --- a/JuliaLowering/src/kinds.jl +++ b/JuliaLowering/src/kinds.jl @@ -34,8 +34,9 @@ function _register_kinds() "with_static_parameters" "top" "core" - "toplevel_butfirst" "lambda" + "toplevel_butfirst" + "const_if_global" "moved_local" "foreigncall" "new" diff --git a/JuliaLowering/src/scope_analysis.jl b/JuliaLowering/src/scope_analysis.jl index c76bf13cc00fd..fa19282ed745e 100644 --- a/JuliaLowering/src/scope_analysis.jl +++ b/JuliaLowering/src/scope_analysis.jl @@ -413,6 +413,13 @@ function _resolve_scopes(ctx, ex::SyntaxTree) end end makeleaf(ctx, ex, K"TOMBSTONE") + elseif k == K"const_if_global" + id = _resolve_scopes(ctx, ex[1]) + if lookup_binding(ctx, id).kind == :global + @ast ctx ex [K"const" ex[1]] + else + makeleaf(ctx, ex, K"TOMBSTONE") + end else ex_mapped = mapchildren(e->_resolve_scopes(ctx, e), ctx, ex) maybe_update_bindings!(ctx, ex_mapped) diff --git a/JuliaLowering/src/syntax_graph.jl b/JuliaLowering/src/syntax_graph.jl index b19f5ef6958ac..c4388ecaaad38 100644 --- a/JuliaLowering/src/syntax_graph.jl +++ b/JuliaLowering/src/syntax_graph.jl @@ -568,6 +568,11 @@ function Base.push!(v::SyntaxList, ex::SyntaxTree) push!(v.ids, ex._id) end +function Base.pushfirst!(v::SyntaxList, ex::SyntaxTree) + check_same_graph(v, ex) + pushfirst!(v.ids, ex._id) +end + function Base.append!(v::SyntaxList, exs) for e in exs push!(v, e) diff --git a/JuliaLowering/test/assignments.jl b/JuliaLowering/test/assignments.jl new file mode 100644 index 0000000000000..11477a27272e2 --- /dev/null +++ b/JuliaLowering/test/assignments.jl @@ -0,0 +1,97 @@ +@testset "assignments" begin + +test_ir_cases(joinpath(@__DIR__, "assignments_ir.jl")) + +test_mod = Module() + +Base.include_string(test_mod, +""" +mutable struct X + a + b +end +""") + +# TODO: Desugaring of assignment done, but needs `where` lowering +@test_broken JuliaLowering.include_string(test_mod, """ +MyVector{T} = Array{1,T} +""") == 42 + +# Chained assignment +@test JuliaLowering.include_string(test_mod, """ +let + a = b = 42 +end +""") == 42 + +@test JuliaLowering.include_string(test_mod, """ +let + x = [] + a = b = (push!(x, 1); 42) + (a,b,x) +end +""") == (42,42,[1]) + +# setproperty! +@test JuliaLowering.include_string(test_mod, """ +let + x = X(1,2) + x.a = 10 + (x.a, x.b) +end +""") == (10,2) + +# Lowering of ref +@test_broken JuliaLowering.include_string(test_mod, """ +let + as = [0,0,0,0] + as[begin] = 1 + as[2] = 2 + as[end] = 4 + as +end +""") == [1, 2, 0, 4] + +# Declarations +@test JuliaLowering.include_string(test_mod, """ +let + x::Int = 1 + x = 10.0 + x +end +""") === 10 + +#------------------------------------------------------------------------------- +# Invalid assignment left hand sides with specific error messages +@test_throws LoweringError JuliaLowering.include_string(test_mod, """ +a.(b) = c +""") + +@test_throws LoweringError JuliaLowering.include_string(test_mod, """ +T[x y] = z +""") +@test_throws LoweringError JuliaLowering.include_string(test_mod, """ +T[x; y] = z +""") +@test_throws LoweringError JuliaLowering.include_string(test_mod, """ +T[x ;;; y] = z +""") + +@test_throws LoweringError JuliaLowering.include_string(test_mod, """ +[x, y] = z +""") +@test_throws LoweringError JuliaLowering.include_string(test_mod, """ +[x y] = z +""") +@test_throws LoweringError JuliaLowering.include_string(test_mod, """ +[x; y] = z +""") +@test_throws LoweringError JuliaLowering.include_string(test_mod, """ +[x ;;; y] = z +""") + +@test_throws LoweringError JuliaLowering.include_string(test_mod, """ +1 = x +""") + +end diff --git a/JuliaLowering/test/assignments_ir.jl b/JuliaLowering/test/assignments_ir.jl new file mode 100644 index 0000000000000..fdd786c9a30b8 --- /dev/null +++ b/JuliaLowering/test/assignments_ir.jl @@ -0,0 +1,112 @@ +######################################## +# chain of assignments +let + a = b = c = 1 +end +#--------------------- +1 1 +2 (= slot₁/a %₁) +3 (= slot₂/b %₁) +4 (= slot₃/c %₁) +5 (return %₁) + +######################################## +# chain of assignments with nontrivial rhs +let + a = b = c = f() +end +#--------------------- +1 TestMod.f +2 (call %₁) +3 (= slot₁/a %₂) +4 (= slot₂/b %₂) +5 (= slot₃/c %₂) +6 (return %₂) + +######################################## +# short form function def, not chain of assignments +let + a = b() = c = d +end +#--------------------- +1 (method :b) +2 TestMod.b +3 (call core.Typeof %₂) +4 (call core.svec %₃) +5 (call core.svec) +6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 2 =#))))) +7 --- method :b %₆ + 1 TestMod.d + 2 (= slot₂/c %₁) + 3 (return %₁) +8 (= slot₁/a %₁) +9 (return %₁) + +######################################## +# a.b = ... => setproperty! assignment +let + a.b = c +end +#--------------------- +1 TestMod.a +2 TestMod.c +3 (call top.setproperty! %₁ :b %₂) +4 TestMod.c +5 (return %₄) + +######################################## +# a.b.c = f() => setproperty! assignment, complex case +let + a.b.c = f() +end +#--------------------- +1 TestMod.a +2 (call top.getproperty %₁ :b) +3 TestMod.f +4 (call %₃) +5 (call top.setproperty! %₂ :c %₄) +6 (return %₄) + +######################################## +# declarations of typed locals +let + x::T = f() + x +end +#--------------------- +1 TestMod.f +2 (call %₁) +3 (= slot₂/tmp %₂) +4 slot₂/tmp +5 TestMod.T +6 (call core.isa %₄ %₅) +7 (gotoifnot %₆ label₉) +8 (goto label₁₄) +9 TestMod.T +10 slot₂/tmp +11 (call top.convert %₉ %₁₀) +12 TestMod.T +13 (= slot₂/tmp (call core.typeassert %₁₁ %₁₂)) +14 slot₂/tmp +15 (= slot₁/x %₁₄) +16 slot₁/x +17 (return %₁₆) + +######################################## +# "complex lhs" of `::T` => type-assert, not decl +let + a.b::T = f() + x +end +#--------------------- +1 TestMod.a +2 (call top.getproperty %₁ :b) +3 TestMod.T +4 (call core.typeassert %₂ %₃) +5 TestMod.f +6 (call %₅) +7 TestMod.a +8 (call top.setproperty! %₇ :b %₆) +9 TestMod.x +10 (return %₉) + diff --git a/JuliaLowering/test/decls.jl b/JuliaLowering/test/decls.jl index 20809b3fdef6b..7fa63e0117b1f 100644 --- a/JuliaLowering/test/decls.jl +++ b/JuliaLowering/test/decls.jl @@ -14,12 +14,14 @@ end local x::Int = 1.0 """) === 1.0 -# TODO unadorned declarations -# @test JuliaLowering.include_string(test_mod, """ -# let -# x::Int = 1.0 -# end -# """) === 1 +# Unadorned declarations +@test JuliaLowering.include_string(test_mod, """ +let + a = 0.0 + x::Int = a + x +end +""") === 0 @test JuliaLowering.include_string(test_mod, """ let diff --git a/JuliaLowering/test/decls_ir.jl b/JuliaLowering/test/decls_ir.jl index 74bb4d733e2bc..eb4c10a015aa9 100644 --- a/JuliaLowering/test/decls_ir.jl +++ b/JuliaLowering/test/decls_ir.jl @@ -79,3 +79,12 @@ global xx::T = 10 13 (= TestMod.xx %₁₂) 14 (return 10) +######################################## +# Type assert (TODO: move this?) +x::T +#--------------------- +1 TestMod.x +2 TestMod.T +3 (call core.typeassert %₁ %₂) +4 (return %₃) + diff --git a/JuliaLowering/test/demo.jl b/JuliaLowering/test/demo.jl index f9f509f11e35b..5cdc0d90b041b 100644 --- a/JuliaLowering/test/demo.jl +++ b/JuliaLowering/test/demo.jl @@ -419,6 +419,57 @@ begin end """ +src = """ +let + A{S} = B{S} +end +""" + +src = """ +let + a = b = c = sin(1) + (a,b,c) +end +""" + +src = """ +a.b = c +""" + +src = """ +a[i j] = c +""" + +src = """ +let + as = [1,2,3,4] + (x,ys...,z) = as + (x,ys,z) +end +""" + +src = """ +let + x = (1,2) + (y,x) = x + (x,y) +end +""" + +src = """ +let + a = b = c = sin(1) + (a,b,c) +end +""" + +src = """ +begin + as = [(1,2), (3,4)] + ((x,y), (z,w)) = as +end +""" + ex = parsestmt(SyntaxTree, src, filename="foo.jl") ex = ensure_attributes(ex, var_id=Int) #ex = softscope_test(ex) diff --git a/JuliaLowering/test/destructuring.jl b/JuliaLowering/test/destructuring.jl new file mode 100644 index 0000000000000..7bbb51d9d20a5 --- /dev/null +++ b/JuliaLowering/test/destructuring.jl @@ -0,0 +1,115 @@ +@testset "Destructuring" begin + +test_ir_cases(joinpath(@__DIR__, "loops_ir.jl")) + +test_mod = Module() + +@testset "Destructuring via iteration" begin + +@test JuliaLowering.include_string(test_mod, """ +let + as = [1,2,3] + (x,y) = as + (x,y) +end +""") == (1,2) + +@test JuliaLowering.include_string(test_mod, """ +let + as = [1,2,3] + (x,ys...) = as + (x,ys) +end +""") == (1, [2,3]) + +@test JuliaLowering.include_string(test_mod, """ +let + as = [1,2,3,4] + (x,ys...,z) = as + (x,ys,z) +end +""") == (1, [2, 3], 4) + +@test JuliaLowering.include_string(test_mod, """ +let + as = [1,2,3,4] + (xs...,y) = as + (xs,y) +end +""") == ([1, 2, 3], 4) + +# Case where indexed_iterate is just iteration +@test JuliaLowering.include_string(test_mod, """ +let + (x,ys...,z) = "aβcδe" + (x,ys,z) +end +""") == ('a', "βcδ", 'e') + + +# Use in value position yeilds rhs +@test JuliaLowering.include_string(test_mod, """ +let + as = [1,2] + zs = begin + (x,y) = as + end + (x,y, as === zs) +end +""") == (1, 2, true) + +# lhs variable name in rhs +@test JuliaLowering.include_string(test_mod, """ +let + x = (1,2) + (x,y) = x + (x,y) +end +""") == (1, 2) + +@test JuliaLowering.include_string(test_mod, """ +let + x = (1,2) + (x...,y) = x + (x,y) +end +""") == ((1,), 2) + +@test JuliaLowering.include_string(test_mod, """ +let + zs = [(1,2), (3,(4,5))] + ((a,b), (c,(d,e))) = zs + (a,b,c,d,e) +end +""") == (1,2,3,4,5) + +@test JuliaLowering.include_string(test_mod, """ +let + zs = [[1,2,3], 4] + ((a,bs...), c) = zs + (a, bs, c) +end +""") == (1, [2,3], 4) + +@test_throws LoweringError JuliaLowering.include_string(test_mod, """ +(xs..., ys...) = x +""") + +end + + +@testset "Tuples on both sides" begin + +# lhs variable name in rhs +@test_broken JuliaLowering.include_string(test_mod, """ +let + x = 1 + y = 2 + (x,y) = (y,x) + (x,y) +end +""") == (2, 1) + +end + +end diff --git a/JuliaLowering/test/destructuring_ir.jl b/JuliaLowering/test/destructuring_ir.jl new file mode 100644 index 0000000000000..d680be62055fe --- /dev/null +++ b/JuliaLowering/test/destructuring_ir.jl @@ -0,0 +1,134 @@ +######################################## +# Simple destructuring +let + (x,y) = as +end +#--------------------- +1 TestMod.as +2 (call top.indexed_iterate %₁ 1) +3 (= slot₂/x (call core.getfield %₂ 1)) +4 (= slot₁/iterstate (call core.getfield %₂ 2)) +5 TestMod.as +6 slot₁/iterstate +7 (call top.indexed_iterate %₅ 2 %₆) +8 (= slot₃/y (call core.getfield %₇ 1)) +9 TestMod.as +10 (return %₉) + +######################################## +# Trivial slurping +let + (xs...,) = as +end +#--------------------- +1 TestMod.as +2 (= slot₁/xs (call top.rest %₁)) +3 TestMod.as +4 (return %₃) + +######################################## +# Slurping last arg +let + (x, ys...) = as +end +#--------------------- +1 TestMod.as +2 (call top.indexed_iterate %₁ 1) +3 (= slot₂/x (call core.getfield %₂ 1)) +4 (= slot₁/iterstate (call core.getfield %₂ 2)) +5 TestMod.as +6 slot₁/iterstate +7 (= slot₃/ys (call top.rest %₅ %₆)) +8 TestMod.as +9 (return %₈) + +######################################## +# Slurping, first arg +let + (xs..., y, z) = as +end +#--------------------- +1 TestMod.as +2 (call top.split_rest %₁ 2) +3 (= slot₂/xs (call core.getfield %₂ 1)) +4 (call core.getfield %₂ 2) +5 (call top.indexed_iterate %₄ 1) +6 (= slot₃/y (call core.getfield %₅ 1)) +7 (= slot₁/iterstate (call core.getfield %₅ 2)) +8 slot₁/iterstate +9 (call top.indexed_iterate %₄ 2 %₈) +10 (= slot₄/z (call core.getfield %₉ 1)) +11 TestMod.as +12 (return %₁₁) + +######################################## +# Slurping, middle arg +let + (x, ys..., z) = as +end +#--------------------- +1 TestMod.as +2 (call top.indexed_iterate %₁ 1) +3 (= slot₂/x (call core.getfield %₂ 1)) +4 (= slot₁/iterstate (call core.getfield %₂ 2)) +5 TestMod.as +6 slot₁/iterstate +7 (call top.split_rest %₅ 1 %₆) +8 (= slot₃/ys (call core.getfield %₇ 1)) +9 (call core.getfield %₇ 2) +10 (call top.indexed_iterate %₉ 1) +11 (= slot₄/z (call core.getfield %₁₀ 1)) +12 TestMod.as +13 (return %₁₂) + +######################################## +# Recursive destructuring +let + ((x,y), (z,w)) = as +end +#--------------------- +1 TestMod.as +2 (call top.indexed_iterate %₁ 1) +3 (call core.getfield %₂ 1) +4 (= slot₁/iterstate (call core.getfield %₂ 2)) +5 TestMod.as +6 slot₁/iterstate +7 (call top.indexed_iterate %₅ 2 %₆) +8 (call core.getfield %₇ 1) +9 (call top.indexed_iterate %₃ 1) +10 (= slot₅/x (call core.getfield %₉ 1)) +11 (= slot₂/iterstate (call core.getfield %₉ 2)) +12 slot₂/iterstate +13 (call top.indexed_iterate %₃ 2 %₁₂) +14 (= slot₆/y (call core.getfield %₁₃ 1)) +15 (call top.indexed_iterate %₈ 1) +16 (= slot₇/z (call core.getfield %₁₅ 1)) +17 (= slot₃/iterstate (call core.getfield %₁₅ 2)) +18 slot₃/iterstate +19 (call top.indexed_iterate %₈ 2 %₁₈) +20 (= slot₄/w (call core.getfield %₁₉ 1)) +21 TestMod.as +22 (return %₂₁) + +######################################## +# Recursive destructuring with slurping +let + ((x,ys...), z) = as +end +#--------------------- +1 TestMod.as +2 (call top.indexed_iterate %₁ 1) +3 (call core.getfield %₂ 1) +4 (= slot₁/iterstate (call core.getfield %₂ 2)) +5 TestMod.as +6 slot₁/iterstate +7 (call top.indexed_iterate %₅ 2 %₆) +8 (= slot₅/z (call core.getfield %₇ 1)) +9 (call top.indexed_iterate %₃ 1) +10 (= slot₃/x (call core.getfield %₉ 1)) +11 (= slot₂/iterstate (call core.getfield %₉ 2)) +12 slot₂/iterstate +13 (= slot₄/ys (call top.rest %₃ %₁₂)) +14 TestMod.as +15 (return %₁₄) + diff --git a/JuliaLowering/test/loops_ir.jl b/JuliaLowering/test/loops_ir.jl index 51eb1ea689439..d3c244ae1f492 100644 --- a/JuliaLowering/test/loops_ir.jl +++ b/JuliaLowering/test/loops_ir.jl @@ -48,3 +48,27 @@ end 8 (goto label₁) 9 (return core.nothing) +######################################## +# Basic for loop +for x in xs + body +end +#--------------------- +1 TestMod.xs +2 (= slot₁/next (call top.iterate %₁)) +3 slot₁/next +4 (call core.=== %₃ core.nothing) +5 (call top.not_int %₄) +6 (gotoifnot %₅ label₁₇) +7 slot₁/next +8 (= slot₂/x (call core.getfield %₇ 1)) +9 (call core.getfield %₇ 2) +10 TestMod.body +11 (= slot₁/next (call top.iterate %₁ %₉)) +12 slot₁/next +13 (call core.=== %₁₂ core.nothing) +14 (call top.not_int %₁₃) +15 (gotoifnot %₁₄ label₁₇) +16 (goto label₇) +17 (return core.nothing) + diff --git a/JuliaLowering/test/utils.jl b/JuliaLowering/test/utils.jl index b3d21b98c2f72..f309708e49e25 100644 --- a/JuliaLowering/test/utils.jl +++ b/JuliaLowering/test/utils.jl @@ -185,6 +185,7 @@ function refresh_ir_test_cases(filename) end # Write only at the end to ensure we don't write rubbish if we crash! write(filename, take!(io)) + nothing end function refresh_all_ir_test_cases(test_dir=".") From 76d7351cade9e6cb5b29ef1ec5689f33aefbcec6 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Wed, 11 Sep 2024 14:03:54 +1000 Subject: [PATCH 0855/1109] "matching tuple" assignment destructuring - partly elide tuples Deal with cases like `(x,y) = (a,b)`. Still need to deal with slurps and splats. --- JuliaLowering/src/ast.jl | 2 +- JuliaLowering/src/desugaring.jl | 72 ++++++++-- JuliaLowering/test/demo.jl | 73 ++++++---- JuliaLowering/test/demo_include.jl | 183 +++++++++++-------------- JuliaLowering/test/destructuring_ir.jl | 47 +++++++ 5 files changed, 229 insertions(+), 148 deletions(-) diff --git a/JuliaLowering/src/ast.jl b/JuliaLowering/src/ast.jl index 8279ebec9389a..7dfb920549779 100644 --- a/JuliaLowering/src/ast.jl +++ b/JuliaLowering/src/ast.jl @@ -487,7 +487,7 @@ end # Predicates and accessors working on expression trees function is_quoted(ex) - kind(ex) in KSet"quote top core globalref outerref break inert + kind(ex) in KSet"Symbol quote top core globalref outerref break inert meta inbounds inline noinline loopinfo" end diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 2a50fa1d1fd2a..a0265ef07c0c9 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -31,19 +31,71 @@ function is_identifier_like(ex) k == K"Identifier" || k == K"BindingId" || k == K"Placeholder" end +# Return true when `x` and `y` are "the same identifier", but also works with +# bindings (and hence ssa vars). See also `is_identifier_like()` +function is_same_identifier_like(x, y) + return (kind(x) == K"Identifier" && kind(y) == K"Identifier" && NameKey(x) == NameKey(y)) || + (kind(x) == K"BindingId" && kind(y) == K"BindingId" && x.var_id == y.var_id) +end + # Identify some expressions that are safe to repeat +# +# TODO: Can we use this in more places? function is_effect_free(ex) k = kind(ex) - is_literal(k) || is_identifier_like(ex) || k == K"Symbol" || - k == K"inert" || k == K"top" || k == K"core" || - (k == K"." && numchildren(ex) == 2 && is_identifier_like(ex[1])) # `a.b` with simple `a` # TODO: metas + is_literal(k) || is_identifier_like(ex) || k == K"Symbol" || + k == K"inert" || k == K"top" || k == K"core" + # flisp also includes `a.b` with simple `a`, but this seems like a bug + # because this calls the user-defined getproperty? end # Convert things like `(x,y,z) = (a,b,c)` to assignments, eliminating the # tuple. Includes support for slurping/splatting. -function tuple_to_assignments(ctx, srcref, lhss, rhs) - TODO(srcref, "tuple-eliminating destructuring") +function tuple_to_assignments(ctx, ex) + lhs = ex[1] + rhs = ex[2] + stmts = SyntaxList(ctx) + end_stmts = SyntaxList(ctx) + elements = SyntaxList(ctx) + assigned = SyntaxList(ctx) + + for i in 1:numchildren(lhs) + lh = lhs[i] + if kind(lh) == K"..." + # can be null iff lh is a vararg + rh = i <= numchildren(rhs) ? rhs[i] : nothing + TODO(lh, "... in tuple_to_assignments") + continue + end + rh = rhs[i] # In other cases `rhs[i]` must exist + if kind(rh) == K"..." + TODO(rh, "... in tuple_to_assignments") + else + if is_identifier_like(lh) && is_effect_free(rh) && + !any(is_same_identifier_like(lh, rhs[j]) for j in i+1:lastindex(rhs)) + !any(is_same_identifier_like(rh, a) for a in assigned) + # Overwrite `lh` directly if that won't cause conflicts with + # other symbols + push!(stmts, @ast ctx ex [K"=" lh rh]) + push!(assigned, lh) + push!(elements, rh) + else + # In other cases we need a temporary and we'll overwrite `lh` at the end. + tmp = ssavar(ctx, rh) + push!(stmts, @ast ctx ex [K"=" tmp rh]) + # `push!(assigned, lh)` is not required when we assign `lh` later. + push!(end_stmts, @ast ctx ex [K"=" lh tmp]) + push!(elements, tmp) + end + end + end + + @ast ctx ex [K"block" + stmts... + end_stmts... + [K"unnecessary" [K"tuple" elements...]] + ] end # Create an assignment `$lhs = $rhs` where `lhs` must be "simple". If `rhs` is @@ -90,14 +142,8 @@ end function _in_assignment_lhs(lhss, x_rhs) for e in lhss x = kind(e) == K"..." ? e[1] : e - if kind(x_rhs) == K"Identifier" && kind(x) == K"Identifier" - if x_rhs.name_val == x.name_val - return true - end - elseif kind(x_rhs) == K"BindingId" && kind(x) == K"BindingId" - if x_rhs.var_id == x.var_id - return true - end + if is_same_identifier_like(x, x_rhs) + return true end end return false diff --git a/JuliaLowering/test/demo.jl b/JuliaLowering/test/demo.jl index 5cdc0d90b041b..cec1e88ffb644 100644 --- a/JuliaLowering/test/demo.jl +++ b/JuliaLowering/test/demo.jl @@ -38,6 +38,44 @@ end # ex # end +#------------------------------------------------------------------------------- +# Module containing macros used in the demo. +baremodule M + using Base + + using JuliaLowering: JuliaLowering, @ast, @chk, adopt_scope, MacroExpansionError, makenode + using JuliaSyntax + + macro K_str(str) + convert(JuliaSyntax.Kind, str) + end + + function var"@inert"(__context__::JuliaLowering.MacroContext, ex) + @chk kind(ex) == K"quote" + @ast __context__ ex [K"inert" ex] + end + + function var"@label"(__context__::JuliaLowering.MacroContext, ex) + @chk kind(ex) == K"Identifier" + @ast __context__ ex ex=>K"symbolic_label" + end + + function var"@goto"(__context__::JuliaLowering.MacroContext, ex) + @chk kind(ex) == K"Identifier" + @ast __context__ ex ex=>K"symbolic_goto" + end + + function var"@islocal"(__context__::JuliaLowering.MacroContext, ex) + @chk kind(ex) == K"Identifier" + @ast __context__ ex [K"extension" + "islocal"::K"Symbol" + ex + ] + end + + JuliaLowering.include(M, "demo_include.jl") +end + #------------------------------------------------------------------------------- # Demos of the prototype @@ -104,35 +142,6 @@ begin end """ -# JuliaLowering.include(Main, "demo_include.jl") -# -# Base.eval(M, quote -# function var"@inert"(__context__::JuliaLowering.MacroContext, ex) -# @chk kind(ex) == JuliaSyntax.K"quote" -# @ast __context__ ex [JuliaSyntax.K"inert" ex] -# end -# -# function var"@label"(__context__::JuliaLowering.MacroContext, ex) -# @chk kind(ex) == JuliaSyntax.K"Identifier" -# @ast __context__ ex ex=>JuliaSyntax.K"symbolic_label" -# end -# -# function var"@goto"(__context__::JuliaLowering.MacroContext, ex) -# @chk kind(ex) == JuliaSyntax.K"Identifier" -# @ast __context__ ex ex=>JuliaSyntax.K"symbolic_goto" -# end -# end) -# -# JuliaLowering.include_string(M, """ -# xx = "xx in M" -# macro test_inert_quote() -# println(xx) -# @inert quote -# (\$xx, xx) -# end -# end -# """) - function wrapscope(ex, scope_type) makenode(ex, ex, K"scope_block", ex; scope_type=scope_type) end @@ -470,6 +479,12 @@ begin end """ +src = """ +let +(x, y) = (y,x) +end +""" + ex = parsestmt(SyntaxTree, src, filename="foo.jl") ex = ensure_attributes(ex, var_id=Int) #ex = softscope_test(ex) diff --git a/JuliaLowering/test/demo_include.jl b/JuliaLowering/test/demo_include.jl index 47fcbb93d2d94..2f720060ca0b6 100644 --- a/JuliaLowering/test/demo_include.jl +++ b/JuliaLowering/test/demo_include.jl @@ -1,128 +1,101 @@ -module M - using JuliaLowering: JuliaLowering, @ast, @chk, adopt_scope, MacroExpansionError, makenode - using JuliaSyntax - - # Introspection - macro __MODULE__() - __context__.scope_layer.mod - end +# Introspection +macro __MODULE__() + __context__.scope_layer.mod +end - macro __FILE__() - JuliaLowering.filename(__context__.macroname) - end +macro __FILE__() + JuliaLowering.filename(__context__.macroname) +end - macro __LINE__() - JuliaLowering.source_location(__context__.macroname)[1] - end +macro __LINE__() + JuliaLowering.source_location(__context__.macroname)[1] +end - # Macro with local variables - JuliaLowering.include(M, "demo_include_2.jl") +# Macro with local variables +JuliaLowering.include(M, "demo_include_2.jl") - someglobal = "global in module M" +someglobal = "global in module M" - # Macro with local variables - macro foo(ex) - quote - x = "`x` from @foo" - (x, someglobal, A.@bar $ex) - #(x, someglobal, $ex, A.@bar($ex), A.@bar(x)) - end +# Macro with local variables +macro foo(ex) + quote + x = "`x` from @foo" + (x, someglobal, A.@bar $ex) + #(x, someglobal, $ex, A.@bar($ex), A.@bar(x)) end +end - macro set_a_global(val) - quote - global a_global = $val - end +macro set_a_global(val) + quote + global a_global = $val end +end - macro set_global_in_parent(ex) - e1 = adopt_scope(:(sym_introduced_from_M), __context__) - quote - $e1 = $ex - end +macro set_global_in_parent(ex) + e1 = adopt_scope(:(sym_introduced_from_M), __context__) + quote + $e1 = $ex end +end - macro baz(ex) - quote - let $ex = 10 - $ex - end +macro baz(ex) + quote + let $ex = 10 + $ex end end +end - macro make_module() - :(module X - blah = 10 - end) - end +macro make_module() + :(module X + blah = 10 + end) +end - macro return_a_value() - 42 - end +macro return_a_value() + 42 +end - macro nested_return_a_value() - :( - @return_a_value - ) - end +macro nested_return_a_value() + :( + @return_a_value + ) +end - macro inner() - :(2) - end +macro inner() + :(2) +end - macro outer() - :((1, @inner)) - end +macro outer() + :((1, @inner)) +end - macro K_str(str) - convert(JuliaSyntax.Kind, str[1].value) - end +macro K_str(str) + convert(JuliaSyntax.Kind, str[1].value) +end - # Recursive macro call - macro recursive(N) - Nval = if kind(N) == K"Integer" || kind(N) == K"Value" - N.value - end - if !(Nval isa Integer) - throw(MacroExpansionError(N, "argument must be an integer")) - end - if Nval < 1 - return N - end - quote - x = $N - (@recursive($(Nval-1)), x) - end +# Recursive macro call +macro recursive(N) + Nval = if kind(N) == K"Integer" || kind(N) == K"Value" + N.value + end + if !(Nval isa Integer) + throw(MacroExpansionError(N, "argument must be an integer")) + end + if Nval < 1 + return N end + quote + x = $N + (@recursive($(Nval-1)), x) + end +end - # function var"@recursive"(__context__::JuliaLowering.MacroContext, N) - # @chk kind(N) == K"Integer" - # Nval = N.value::Int - # if Nval < 1 - # return N - # end - # @ast __context__ (@HERE) [K"block" - # [K"="(@HERE) - # "x"::K"Identifier"(@HERE) - # N - # ] - # [K"tuple"(@HERE) - # "x"::K"Identifier"(@HERE) - # [K"macrocall"(@HERE) - # "@recursive"::K"Identifier" - # (Nval-1)::K"Integer" - # ] - # ] - # ] - # end - - # macro inert(ex) - # if kind(ex) != K"quote" - # throw(MacroExpansionError(ex, "expected quote")) - # end - # makenode(__context__, ex, - # makenode(__context__, ex, K"inert", ex)) - # @chk kind(ex) == JuliaSyntax.K"quote" - # @ast __context__ ex [JuliaSyntax.K"inert" ex] - # end +xx = "xx in M" + +macro test_inert_quote() + println(xx) + @inert quote + ($xx, xx) + end end diff --git a/JuliaLowering/test/destructuring_ir.jl b/JuliaLowering/test/destructuring_ir.jl index d680be62055fe..b10ccab46a215 100644 --- a/JuliaLowering/test/destructuring_ir.jl +++ b/JuliaLowering/test/destructuring_ir.jl @@ -132,3 +132,50 @@ end 14 TestMod.as 15 (return %₁₄) +######################################## +# Destructuring with simple tuple elimination +let + (x, y) = (a, b) +end +#--------------------- +1 TestMod.a +2 (= slot₁/x %₁) +3 TestMod.b +4 (= slot₂/y %₃) +5 TestMod.a +6 TestMod.b +7 (call core.tuple %₅ %₆) +8 (return %₇) + +######################################## +# Destructuring with simple tuple elimination and non effect-free rhs +let + (x, y) = (f(), b) +end +#--------------------- +1 TestMod.f +2 (call %₁) +3 TestMod.b +4 (= slot₂/y %₃) +5 (= slot₁/x %₂) +6 TestMod.b +7 (call core.tuple %₂ %₆) +8 (return %₇) + +######################################## +# Destructuring with tuple elimination where variables are repeated +let + (x, y, z) = (y, a, x) +end +#--------------------- +1 slot₂/y +2 TestMod.a +3 (= slot₂/y %₂) +4 slot₁/x +5 (= slot₃/z %₄) +6 (= slot₁/x %₁) +7 TestMod.a +8 slot₁/x +9 (call core.tuple %₁ %₇ %₈) +10 (return %₉) + From 4cd7474590da8af6f591dfdb3cef91dd471de3e7 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Wed, 11 Sep 2024 14:21:18 +1000 Subject: [PATCH 0856/1109] Implement equivalent for `Expr(:islocal)` --- JuliaLowering/src/ast.jl | 6 ++-- JuliaLowering/src/scope_analysis.jl | 21 ++++++++--- JuliaLowering/test/demo.jl | 6 ++++ JuliaLowering/test/scopes.jl | 2 ++ JuliaLowering/test/scopes_ir.jl | 54 +++++++++++++++++++++++++++++ 5 files changed, 82 insertions(+), 7 deletions(-) create mode 100644 JuliaLowering/test/scopes_ir.jl diff --git a/JuliaLowering/src/ast.jl b/JuliaLowering/src/ast.jl index 7dfb920549779..296b652b1985a 100644 --- a/JuliaLowering/src/ast.jl +++ b/JuliaLowering/src/ast.jl @@ -491,11 +491,11 @@ function is_quoted(ex) meta inbounds inline noinline loopinfo" end -function is_assertion(ex, type) - kind(ex) == K"assert" || return false +function extension_type(ex) + @assert kind(ex) == K"extension" || kind(ex) == K"assert" @chk numchildren(ex) >= 1 @chk kind(ex[1]) == K"Symbol" - return ex[1].name_val == type + ex[1].name_val end function is_sym_decl(x) diff --git a/JuliaLowering/src/scope_analysis.jl b/JuliaLowering/src/scope_analysis.jl index fa19282ed745e..38618600060b0 100644 --- a/JuliaLowering/src/scope_analysis.jl +++ b/JuliaLowering/src/scope_analysis.jl @@ -373,8 +373,6 @@ function _resolve_scopes(ctx, ex::SyntaxTree) # ex elseif k == K"local" makeleaf(ctx, ex, K"TOMBSTONE") - # elseif locals # return Dict of locals - # elseif islocal elseif k == K"lambda" lambda_info = ex.lambda_info scope = analyze_scope(ctx, ex, nothing, lambda_info) @@ -398,19 +396,34 @@ function _resolve_scopes(ctx, ex::SyntaxTree) body pop!(ctx.scope_stack) @ast ctx ex [K"block" body...] + elseif k == K"extension" + etype = extension_type(ex) + if etype == "islocal" + id = lookup_var(ctx, NameKey(ex[2])) + islocal = !isnothing(id) && var_kind(ctx, id) != :global + @ast ctx ex islocal::K"Bool" + elseif etype == "locals" + # return Dict of locals + TODO(ex, "@locals") + else + throw(LoweringError(ex, "Unknown syntax extension")) + end elseif k == K"assert" - if is_assertion(ex, "require_existing_locals") + etype = extension_type(ex) + if etype == "require_existing_locals" for v in ex[2:end] vk = var_kind(ctx, NameKey(v)) if vk !== :local throw(LoweringError(v, "`outer` annotations must match with a local variable in an outer scope but no such variable was found")) end end - elseif is_assertion(ex, "global_toplevel_only") + elseif etype == "global_toplevel_only" if !ctx.scope_stack[end].is_toplevel_global_scope e = ex[2][1] throw(LoweringError(e, "$(kind(e)) is only allowed in global scope")) end + else + throw(LoweringError(ex, "Unknown syntax assertion")) end makeleaf(ctx, ex, K"TOMBSTONE") elseif k == K"const_if_global" diff --git a/JuliaLowering/test/demo.jl b/JuliaLowering/test/demo.jl index cec1e88ffb644..e8c9272e4348b 100644 --- a/JuliaLowering/test/demo.jl +++ b/JuliaLowering/test/demo.jl @@ -485,6 +485,12 @@ let end """ +src = """ +let x = 1 + M.@islocal x +end +""" + ex = parsestmt(SyntaxTree, src, filename="foo.jl") ex = ensure_attributes(ex, var_id=Int) #ex = softscope_test(ex) diff --git a/JuliaLowering/test/scopes.jl b/JuliaLowering/test/scopes.jl index 1ec037326ad0a..9b811f073bd71 100644 --- a/JuliaLowering/test/scopes.jl +++ b/JuliaLowering/test/scopes.jl @@ -43,4 +43,6 @@ JuliaLowering.eval(test_mod, wrapscope(assign_z_2, :hard)) JuliaLowering.eval(test_mod, wrapscope(wrapscope(assign_z_2, :neutral), :soft)) @test test_mod.z == 2 +test_ir_cases(joinpath(@__DIR__, "scopes_ir.jl")) + end diff --git a/JuliaLowering/test/scopes_ir.jl b/JuliaLowering/test/scopes_ir.jl new file mode 100644 index 0000000000000..0746abc8af56c --- /dev/null +++ b/JuliaLowering/test/scopes_ir.jl @@ -0,0 +1,54 @@ +using JuliaLowering +using JuliaLowering: kind, @chk, @ast, @K_str + +function var"@islocal"(__context__::JuliaLowering.MacroContext, ex) + @chk kind(ex) == K"Identifier" + @ast __context__ ex [K"extension" + "islocal"::K"Symbol" + ex + ] +end + +#******************************************************************************* +######################################## +# @islocal +let x = 1 + @islocal(a), @islocal(x) +end +#--------------------- +1 1 +2 (= slot₁/x %₁) +3 (call core.tuple false true) +4 (return %₃) + +######################################## +# @islocal +let y = 2 + function f(x) + @islocal(a), @islocal(x), @islocal(y) + end +end +#--------------------- +1 2 +2 (= slot₁/y %₁) +3 (method :f) +4 TestMod.f +5 (call core.Typeof %₄) +6 (call core.svec %₅ core.Any) +7 (call core.svec) +8 (call core.svec %₆ %₇ :($(QuoteNode(:(#= line 2 =#))))) +9 --- method :f %₈ + 1 (call core.tuple false true true) + 2 (return %₁) +10 (return %₃) + +######################################## +# @islocal +begin + global x + @islocal(x) +end +#--------------------- +1 (global TestMod.x) +2 (return false) + From a8054ad78ec6c6b627c23ef50f5f5dfb1fa9673a Mon Sep 17 00:00:00 2001 From: Fredrik Ekre Date: Wed, 11 Sep 2024 18:10:35 +0200 Subject: [PATCH 0857/1109] Fix serialization of kinds with multibyte chars (JuliaLang/JuliaSyntax.jl#501) This patch fixes serialization of `Kind`s to use `sizeof` (number of bytes) instead of `length` (number of characters) when computing number of bytes in the stringified `Kind`. --- JuliaSyntax/src/kinds.jl | 2 +- JuliaSyntax/test/serialization.jl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/JuliaSyntax/src/kinds.jl b/JuliaSyntax/src/kinds.jl index c5b43e9eb350a..2386fe2630529 100644 --- a/JuliaSyntax/src/kinds.jl +++ b/JuliaSyntax/src/kinds.jl @@ -66,7 +66,7 @@ end # can be serialized and deserialized across different JuliaSyntax versions. function Base.write(io::IO, k::Kind) str = convert(String, k) - write(io, UInt8(length(str))) + write(io, str) + write(io, UInt8(sizeof(str))) + write(io, str) end function Base.read(io::IO, ::Type{Kind}) len = read(io, UInt8) diff --git a/JuliaSyntax/test/serialization.jl b/JuliaSyntax/test/serialization.jl index 5d194f0550fd7..abdc5fa61e72f 100644 --- a/JuliaSyntax/test/serialization.jl +++ b/JuliaSyntax/test/serialization.jl @@ -19,7 +19,7 @@ end end @testset "Serialization $T" for T in [Expr, SyntaxNode, JuliaSyntax.GreenNode] - x = JuliaSyntax.parsestmt(T, "f(x) = x + 2") + x = JuliaSyntax.parsestmt(T, "f(x) = x ⋅ 2") f = tempname() open(f, "w") do io serialize(io, x) From 8b4f72dfa3d364fec96cf1791b69f1566f251806 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Fri, 13 Sep 2024 11:59:38 +1000 Subject: [PATCH 0858/1109] Lowering to support returning locals as in `Base.@locals` --- JuliaLowering/src/ast.jl | 9 +++-- JuliaLowering/src/desugaring.jl | 6 ++-- JuliaLowering/src/eval.jl | 1 + JuliaLowering/src/kinds.jl | 1 + JuliaLowering/src/linear_ir.jl | 2 +- JuliaLowering/src/scope_analysis.jl | 55 +++++++++++++++++++++++------ JuliaLowering/src/syntax_graph.jl | 15 ++++---- JuliaLowering/test/demo.jl | 11 ++++++ JuliaLowering/test/scopes_ir.jl | 50 ++++++++++++++++++++++++-- 9 files changed, 124 insertions(+), 26 deletions(-) diff --git a/JuliaLowering/src/ast.jl b/JuliaLowering/src/ast.jl index 296b652b1985a..6c667e25173d8 100644 --- a/JuliaLowering/src/ast.jl +++ b/JuliaLowering/src/ast.jl @@ -63,6 +63,7 @@ struct BindingInfo type::Union{Nothing,SyntaxTree} # Type, for bindings declared like x::T = 10 is_const::Bool # Single assignment, defined before use is_ssa::Bool # Single assignment, defined before use + is_internal::Bool # True for internal bindings generated by the compiler is_ambiguous_local::Bool # Local, but would be global in soft scope (ie, the REPL) end @@ -71,8 +72,9 @@ function BindingInfo(name::AbstractString, kind::Symbol; type::Union{Nothing,SyntaxTree} = nothing, is_const::Bool = false, is_ssa::Bool = false, + is_internal::Bool = false, is_ambiguous_local::Bool = false) - BindingInfo(name, kind, mod, type, is_const, is_ssa, is_ambiguous_local) + BindingInfo(name, kind, mod, type, is_const, is_ssa, is_internal, is_ambiguous_local) end """ @@ -115,6 +117,7 @@ function update_binding!(bindings::Bindings, x; type=nothing, is_const=nothing) isnothing(type) ? b.type : type, isnothing(is_const) ? b.is_const : is_const, b.is_ssa, + b.is_internal, b.is_ambiguous_local, ) end @@ -226,7 +229,7 @@ top_ref(ctx, ex, name) = makeleaf(ctx, ex, K"top", name) # Create a new SSA binding function ssavar(ctx::AbstractLoweringContext, srcref, name="tmp") # TODO: Store this name in only one place? Probably use the provenance chain? - id = new_binding(ctx.bindings, BindingInfo(name, :local; is_ssa=true)) + id = new_binding(ctx.bindings, BindingInfo(name, :local; is_ssa=true, is_internal=true)) # Create an identifier nameref = makeleaf(ctx, srcref, K"Identifier", name_val=name) makeleaf(ctx, nameref, K"BindingId", var_id=id) @@ -238,7 +241,7 @@ end # Create a new local mutable variable function new_mutable_var(ctx::AbstractLoweringContext, srcref, name) - id = new_binding(ctx.bindings, BindingInfo(name, :local)) + id = new_binding(ctx.bindings, BindingInfo(name, :local; is_internal=true)) nameref = makeleaf(ctx, srcref, K"Identifier", name_val=name) var = makeleaf(ctx, nameref, K"BindingId", var_id=id) add_lambda_local!(ctx, id) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index a0265ef07c0c9..4ba9d6e396745 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -936,7 +936,7 @@ function expand_function_def(ctx, ex, docs) if kind(name) == K"::" if numchildren(name) == 1 farg = @ast ctx name [K"::" - "#self#"::K"Identifier" + new_mutable_var(ctx, name, "#self#") name[1] ] else @@ -949,7 +949,7 @@ function expand_function_def(ctx, ex, docs) throw(LoweringError(name, "Invalid function name")) end farg = @ast ctx name [K"::" - "#self#"::K"Identifier" + new_mutable_var(ctx, name, "#self#") [K"call" "Typeof"::K"core" name @@ -971,7 +971,7 @@ function expand_function_def(ctx, ex, docs) push!(arg_names, aname) atype = !isnothing(info.type) ? info.type : Any_type(ctx, arg) @assert !info.is_nospecialize # TODO - @assert !isnothing(info.name) && kind(info.name) == K"Identifier" # TODO + @assert !isnothing(info.name) && is_identifier_like(info.name) # TODO if info.is_slurp if i != length(args) throw(LoweringError(arg, "`...` may only be used for the last function argument")) diff --git a/JuliaLowering/src/eval.jl b/JuliaLowering/src/eval.jl index 2209fadfa760e..6e21c67135be5 100644 --- a/JuliaLowering/src/eval.jl +++ b/JuliaLowering/src/eval.jl @@ -252,6 +252,7 @@ function to_lowered_expr(mod, bindings, ex) k == K"const" ? :const : k == K"leave" ? :leave : k == K"pop_exception" ? :pop_exception : + k == K"isdefined" ? :isdefined : nothing if isnothing(head) TODO(ex, "Unhandled form for kind $k") diff --git a/JuliaLowering/src/kinds.jl b/JuliaLowering/src/kinds.jl index 2a4e6b1bc3191..199ba95a30873 100644 --- a/JuliaLowering/src/kinds.jl +++ b/JuliaLowering/src/kinds.jl @@ -37,6 +37,7 @@ function _register_kinds() "lambda" "toplevel_butfirst" "const_if_global" + "isdefined" "moved_local" "foreigncall" "new" diff --git a/JuliaLowering/src/linear_ir.jl b/JuliaLowering/src/linear_ir.jl index 2df29b24fc504..e656fc84e19a6 100644 --- a/JuliaLowering/src/linear_ir.jl +++ b/JuliaLowering/src/linear_ir.jl @@ -791,7 +791,7 @@ function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) nothing elseif k == K"local_def" || k == K"local" nothing - elseif k == K"const" + elseif k == K"const" || k == K"isdefined" emit(ctx, ex) else throw(LoweringError(ex, "Invalid syntax; $(repr(k))")) diff --git a/JuliaLowering/src/scope_analysis.jl b/JuliaLowering/src/scope_analysis.jl index 38618600060b0..bd846c40edc14 100644 --- a/JuliaLowering/src/scope_analysis.jl +++ b/JuliaLowering/src/scope_analysis.jl @@ -207,16 +207,20 @@ function analyze_scope(ctx, ex, scope_type, lambda_info) var_ids = Dict{NameKey,IdTag}() # Add lambda arguments - if !isnothing(lambda_info) - for a in lambda_info.args - varkey = NameKey(a) - var_ids[varkey] = init_binding(ctx, varkey, :argument) - end - for a in lambda_info.static_parameters - varkey = NameKey(a) - var_ids[varkey] = init_binding(ctx, varkey, :static_parameter) + function add_lambda_args(args, var_kind) + for a in args + if kind(a) == K"Identifier" + varkey = NameKey(a) + var_ids[varkey] = init_binding(ctx, varkey, var_kind) + elseif kind(a) != K"BindingId" + throw(LoweringError(a, "Unexpected lambda arg kind")) + end end end + if !isnothing(lambda_info) + add_lambda_args(lambda_info.args, :argument) + add_lambda_args(lambda_info.static_parameters, :static_parameter) + end global_keys = Set(first(g) for g in globals) # Add explicit locals @@ -403,8 +407,39 @@ function _resolve_scopes(ctx, ex::SyntaxTree) islocal = !isnothing(id) && var_kind(ctx, id) != :global @ast ctx ex islocal::K"Bool" elseif etype == "locals" - # return Dict of locals - TODO(ex, "@locals") + stmts = SyntaxList(ctx) + locals_dict = ssavar(ctx, ex, "locals_dict") + push!(stmts, @ast ctx ex [K"=" + locals_dict + [K"call" + [K"call" + "apply_type"::K"core" + "Dict"::K"top" + "Symbol"::K"core" + "Any"::K"core" + ] + ] + ]) + for scope in ctx.scope_stack + for id in values(scope.var_ids) + binfo = lookup_binding(ctx, id) + if binfo.kind == :global || binfo.is_internal + continue + end + binding = @ast ctx (@ast ctx ex binfo.name::K"Identifier") id::K"BindingId" + push!(stmts, @ast ctx ex [K"if" + [K"isdefined" binding] + [K"call" + "setindex!"::K"top" + locals_dict + binding + binfo.name::K"Symbol" + ] + ]) + end + end + push!(stmts, locals_dict) + makenode(ctx, ex, K"block", stmts) else throw(LoweringError(ex, "Unknown syntax extension")) end diff --git a/JuliaLowering/src/syntax_graph.jl b/JuliaLowering/src/syntax_graph.jl index c4388ecaaad38..fdef199e84146 100644 --- a/JuliaLowering/src/syntax_graph.jl +++ b/JuliaLowering/src/syntax_graph.jl @@ -430,10 +430,13 @@ function _value_string(ex) end if k == K"slot" || k == K"BindingId" p = provenance(ex)[1] - while kind(p) != K"Identifier" + while p isa SyntaxTree + if kind(p) == K"Identifier" + str = "$(str)/$(p.name_val)" + break + end p = provenance(p)[1] end - str = "$(str)/$(p.name_val)" end return str end @@ -555,7 +558,7 @@ function Base.getindex(v::SyntaxList, r::UnitRange) end function Base.setindex!(v::SyntaxList, ex::SyntaxTree, i::Int) - check_same_graph(v, ex) + check_compatible_graph(v, ex) v.ids[i] = ex._id end @@ -564,12 +567,12 @@ function Base.setindex!(v::SyntaxList, id::NodeId, i::Int) end function Base.push!(v::SyntaxList, ex::SyntaxTree) - check_same_graph(v, ex) + check_compatible_graph(v, ex) push!(v.ids, ex._id) end function Base.pushfirst!(v::SyntaxList, ex::SyntaxTree) - check_same_graph(v, ex) + check_compatible_graph(v, ex) pushfirst!(v.ids, ex._id) end @@ -581,7 +584,7 @@ function Base.append!(v::SyntaxList, exs) end function Base.append!(v::SyntaxList, exs::SyntaxList) - check_same_graph(v, exs) + check_compatible_graph(v, exs) append!(v.ids, exs.ids) v end diff --git a/JuliaLowering/test/demo.jl b/JuliaLowering/test/demo.jl index e8c9272e4348b..e6abe485e1070 100644 --- a/JuliaLowering/test/demo.jl +++ b/JuliaLowering/test/demo.jl @@ -73,6 +73,10 @@ baremodule M ] end + function var"@locals"(__context__::JuliaLowering.MacroContext) + @ast __context__ __context__.macroname [K"extension" "locals"::K"Symbol"] + end + JuliaLowering.include(M, "demo_include.jl") end @@ -491,6 +495,13 @@ let x = 1 end """ +src = """ +let x = 1 + local y + M.@locals +end +""" + ex = parsestmt(SyntaxTree, src, filename="foo.jl") ex = ensure_attributes(ex, var_id=Int) #ex = softscope_test(ex) diff --git a/JuliaLowering/test/scopes_ir.jl b/JuliaLowering/test/scopes_ir.jl index 0746abc8af56c..ae3dd3fbcc77c 100644 --- a/JuliaLowering/test/scopes_ir.jl +++ b/JuliaLowering/test/scopes_ir.jl @@ -9,9 +9,13 @@ function var"@islocal"(__context__::JuliaLowering.MacroContext, ex) ] end +function var"@locals"(__context__::JuliaLowering.MacroContext) + @ast __context__ __context__.macroname [K"extension" "locals"::K"Symbol"] +end + #******************************************************************************* ######################################## -# @islocal +# @islocal with locals and undefined vars let x = 1 @islocal(a), @islocal(x) end @@ -22,7 +26,7 @@ end 4 (return %₃) ######################################## -# @islocal +# @islocal with function arguments let y = 2 function f(x) @islocal(a), @islocal(x), @islocal(y) @@ -43,7 +47,7 @@ end 10 (return %₃) ######################################## -# @islocal +# @islocal with global begin global x @islocal(x) @@ -52,3 +56,43 @@ end 1 (global TestMod.x) 2 (return false) +######################################## +# @locals with local and global +begin + global x + local y + @locals +end +#--------------------- +1 (global TestMod.x) +2 (call core.apply_type top.Dict core.Symbol core.Any) +3 (call %₂) +4 (isdefined slot₁/y) +5 (isdefined slot₁/y) +6 (gotoifnot %₅ label₉) +7 slot₁/y +8 (call top.setindex! %₃ %₇ :y) +9 (return %₃) + +######################################## +# @locals with function args (TODO: static parameters) +function f(z) + @locals +end +#--------------------- +1 (method :f) +2 TestMod.f +3 (call core.Typeof %₂) +4 (call core.svec %₃ core.Any) +5 (call core.svec) +6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 1 =#))))) +7 --- method :f %₆ + 1 (call core.apply_type top.Dict core.Symbol core.Any) + 2 (call %₁) + 3 (isdefined slot₂/z) + 4 (isdefined slot₂/z) + 5 (gotoifnot %₄ label₇) + 6 (call top.setindex! %₂ slot₂/z :z) + 7 (return %₂) +8 (return %₁) + From 2621e58feba6e794e5b7a081079b5e40ac43c709 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Mon, 16 Sep 2024 11:16:34 +1000 Subject: [PATCH 0859/1109] Add `is_always_defined` flag to BindingInfo At some we can hopefully replace `local_def` with `local`, combined with a future `struct BindingFlags`. --- JuliaLowering/src/ast.jl | 16 +++++++++++----- JuliaLowering/src/closure_conversion.jl | 8 ++++++-- JuliaLowering/src/kinds.jl | 2 +- JuliaLowering/src/linear_ir.jl | 2 -- JuliaLowering/src/scope_analysis.jl | 4 ++++ 5 files changed, 22 insertions(+), 10 deletions(-) diff --git a/JuliaLowering/src/ast.jl b/JuliaLowering/src/ast.jl index 6c667e25173d8..3a67188752f4a 100644 --- a/JuliaLowering/src/ast.jl +++ b/JuliaLowering/src/ast.jl @@ -61,8 +61,9 @@ struct BindingInfo kind::Symbol # :local :global :argument :static_parameter mod::Union{Nothing,Module} # Set when `kind === :global` type::Union{Nothing,SyntaxTree} # Type, for bindings declared like x::T = 10 - is_const::Bool # Single assignment, defined before use + is_const::Bool # Constant, cannot be reassigned is_ssa::Bool # Single assignment, defined before use + is_always_defined::Bool # A local that we know has an assignment that dominates all usages (is never undef) is_internal::Bool # True for internal bindings generated by the compiler is_ambiguous_local::Bool # Local, but would be global in soft scope (ie, the REPL) end @@ -72,9 +73,11 @@ function BindingInfo(name::AbstractString, kind::Symbol; type::Union{Nothing,SyntaxTree} = nothing, is_const::Bool = false, is_ssa::Bool = false, + is_always_defined::Bool = is_ssa, is_internal::Bool = false, is_ambiguous_local::Bool = false) - BindingInfo(name, kind, mod, type, is_const, is_ssa, is_internal, is_ambiguous_local) + BindingInfo(name, kind, mod, type, is_const, is_ssa, is_always_defined, + is_internal, is_ambiguous_local) end """ @@ -107,7 +110,8 @@ function _binding_id(ex::SyntaxTree) ex.var_id end -function update_binding!(bindings::Bindings, x; type=nothing, is_const=nothing) +function update_binding!(bindings::Bindings, x; + type=nothing, is_const=nothing, is_always_defined=nothing) id = _binding_id(x) b = lookup_binding(bindings, id) bindings.info[id] = BindingInfo( @@ -117,6 +121,7 @@ function update_binding!(bindings::Bindings, x; type=nothing, is_const=nothing) isnothing(type) ? b.type : type, isnothing(is_const) ? b.is_const : is_const, b.is_ssa, + isnothing(is_always_defined) ? b.is_always_defined : is_always_defined, b.is_internal, b.is_ambiguous_local, ) @@ -240,8 +245,9 @@ function add_lambda_local!(ctx::AbstractLoweringContext, id) end # Create a new local mutable variable -function new_mutable_var(ctx::AbstractLoweringContext, srcref, name) - id = new_binding(ctx.bindings, BindingInfo(name, :local; is_internal=true)) +function new_mutable_var(ctx::AbstractLoweringContext, srcref, name; is_always_defined=false) + id = new_binding(ctx.bindings, BindingInfo(name, :local; is_internal=true, + is_always_defined=is_always_defined)) nameref = makeleaf(ctx, srcref, K"Identifier", name_val=name) var = makeleaf(ctx, nameref, K"BindingId", var_id=id) add_lambda_local!(ctx, id) diff --git a/JuliaLowering/src/closure_conversion.jl b/JuliaLowering/src/closure_conversion.jl index f3d3512cbcef1..b486b0cb72e69 100644 --- a/JuliaLowering/src/closure_conversion.jl +++ b/JuliaLowering/src/closure_conversion.jl @@ -9,16 +9,20 @@ function add_lambda_local!(ctx::ClosureConversionCtx, id) push!(ctx.lambda_locals, id) end +# Convert `ex` to `type` by calling `convert(type, ex)` when necessary. +# +# Used for converting the right hand side of an assignment to a typed local or +# global and for converting the return value of a function call to the declared +# return type. function convert_for_type_decl(ctx, srcref, ex, type, do_typeassert) # Require that the caller make `type` "simple", for now (can generalize # later if necessary) kt = kind(type) @assert (kt == K"Identifier" || kt == K"BindingId" || is_literal(kt)) # Use a slot to permit union-splitting this in inference - tmp = new_mutable_var(ctx, srcref, "tmp") + tmp = new_mutable_var(ctx, srcref, "tmp", is_always_defined=true) @ast ctx srcref [K"block" - # [K"local_def" tmp] # [K"=" type_ssa renumber_assigned_ssavalues(type)] [K"=" tmp ex] [K"if" diff --git a/JuliaLowering/src/kinds.jl b/JuliaLowering/src/kinds.jl index 199ba95a30873..5227ec7c49faf 100644 --- a/JuliaLowering/src/kinds.jl +++ b/JuliaLowering/src/kinds.jl @@ -28,7 +28,7 @@ function _register_kinds() # (TODO: May or may not need all these - assess later) "break_block" "scope_block" - "local_def" + "local_def" # TODO: Replace with K"local" plus BindingFlags attribute? "_while" "_do_while" "with_static_parameters" diff --git a/JuliaLowering/src/linear_ir.jl b/JuliaLowering/src/linear_ir.jl index e656fc84e19a6..4989ab3e7e1f7 100644 --- a/JuliaLowering/src/linear_ir.jl +++ b/JuliaLowering/src/linear_ir.jl @@ -789,8 +789,6 @@ function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) end emit(ctx, ex) nothing - elseif k == K"local_def" || k == K"local" - nothing elseif k == K"const" || k == K"isdefined" emit(ctx, ex) else diff --git a/JuliaLowering/src/scope_analysis.jl b/JuliaLowering/src/scope_analysis.jl index bd846c40edc14..1f6113cd827bc 100644 --- a/JuliaLowering/src/scope_analysis.jl +++ b/JuliaLowering/src/scope_analysis.jl @@ -377,6 +377,10 @@ function _resolve_scopes(ctx, ex::SyntaxTree) # ex elseif k == K"local" makeleaf(ctx, ex, K"TOMBSTONE") + elseif k == K"local_def" + id = lookup_var(ctx, NameKey(ex[1])) + update_binding!(ctx, id; is_always_defined=true) + makeleaf(ctx, ex, K"TOMBSTONE") elseif k == K"lambda" lambda_info = ex.lambda_info scope = analyze_scope(ctx, ex, nothing, lambda_info) From 25ec13426563fac7cffd71501548cb990c9d3abc Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Mon, 23 Sep 2024 15:02:15 +1000 Subject: [PATCH 0860/1109] Property destructuring --- JuliaLowering/src/desugaring.jl | 69 +++++++++++++++++++------- JuliaLowering/test/demo.jl | 15 ++++++ JuliaLowering/test/destructuring.jl | 18 ++++++- JuliaLowering/test/destructuring_ir.jl | 60 ++++++++++++++++++++++ JuliaLowering/test/utils.jl | 9 +++- 5 files changed, 151 insertions(+), 20 deletions(-) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 4ba9d6e396745..b3bfe0bc8650b 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -139,16 +139,6 @@ function _tuple_sides_match(lhs, rhs) end end -function _in_assignment_lhs(lhss, x_rhs) - for e in lhss - x = kind(e) == K"..." ? e[1] : e - if is_same_identifier_like(x, x_rhs) - return true - end - end - return false -end - # Lower `(lhss...) = rhs` in contexts where `rhs` must be a tuple at runtime # by assuming that `getfield(rhs, i)` works and is efficient. function lower_tuple_assignment(ctx, assignment_srcref, lhss, rhs) @@ -267,7 +257,43 @@ function _destructure(ctx, assignment_srcref, stmts, lhs, rhs) stmts end -# Expands all cases of general tuple destructuring +# Expands cases of property destructuring +function expand_property_destruct(ctx, ex) + @assert numchildren(ex) == 2 + lhs = ex[1] + @assert kind(lhs) == K"tuple" + if numchildren(lhs) != 1 + throw(LoweringError(ex, "Property destructuring must use a single `;` before the property names, eg `(; a, b) = rhs`")) + end + params = lhs[1] + @assert kind(params) == K"parameters" + rhs = ex[2] + stmts = SyntaxList(ctx) + rhs1 = if is_ssa(ctx, rhs) || (is_identifier_like(rhs) && + !any(is_same_identifier_like(l, rhs) for l in children(params))) + rhs + else + emit_assign_tmp(stmts, ctx, expand_forms_2(ctx, rhs)) + end + for prop in children(params) + propname = kind(prop) == K"Identifier" ? prop : + kind(prop) == K"::" && kind(prop[1]) == K"Identifier" ? prop[1] : + throw(LoweringError(prop, "invalid assignment location")) + push!(stmts, expand_forms_2(ctx, @ast ctx rhs1 [K"=" + prop + [K"call" + "getproperty"::K"top" + rhs1 + propname=>K"Symbol" + ] + ])) + end + push!(stmts, @ast ctx rhs1 [K"unnecessary" rhs1]) + makenode(ctx, ex, K"block", stmts) +end + +# Expands all cases of general tuple destructuring, eg +# (x,y) = (a,b) function expand_tuple_destruct(ctx, ex) lhs = ex[1] @assert kind(lhs) == K"tuple" @@ -281,14 +307,23 @@ function expand_tuple_destruct(ctx, ex) end end - if kind(rhs) == K"tuple" && !any_assignment(children(rhs)) && - !has_parameters(rhs) && _tuple_sides_match(children(lhs), children(rhs)) - return expand_forms_2(ctx, tuple_to_assignments(ctx, ex)) + if kind(rhs) == K"tuple" + num_splat = sum(kind(rh) == K"..." for rh in children(rhs)) + if num_splat == 0 && (numchildren(lhs) - num_slurp) > numchildren(rhs) + throw(LoweringError(ex, "More variables on left hand side than right hand in tuple assignment")) + end + + if !any_assignment(children(rhs)) && !has_parameters(rhs) && + _tuple_sides_match(children(lhs), children(rhs)) + return expand_forms_2(ctx, tuple_to_assignments(ctx, ex)) + end end stmts = SyntaxList(ctx) - rhs1 = if is_ssa(ctx, rhs) || (is_identifier_like(rhs) && - !_in_assignment_lhs(children(lhs), rhs)) + rhs1 = if is_ssa(ctx, rhs) || + (is_identifier_like(rhs) && + !any(is_same_identifier_like(kind(l) == K"..." ? l[1] : l, rhs) + for l in children(lhs))) rhs else emit_assign_tmp(stmts, ctx, expand_forms_2(ctx, rhs)) @@ -418,7 +453,7 @@ function expand_assignment(ctx, ex) elseif kl == K"tuple" # TODO: has_parameters if has_parameters(lhs) - TODO(lhs, "Destructuring with named fields") + expand_property_destruct(ctx, ex) else expand_tuple_destruct(ctx, ex) end diff --git a/JuliaLowering/test/demo.jl b/JuliaLowering/test/demo.jl index e6abe485e1070..364aaf6df6f33 100644 --- a/JuliaLowering/test/demo.jl +++ b/JuliaLowering/test/demo.jl @@ -502,6 +502,21 @@ let x = 1 end """ +src = """ +let + (a, bs...,) = (1,2,3) + bs +end +""" + +src = """ +let + rhs = 1 + 2*Base.im + (; im, re) = rhs + (re, im) +end +""" + ex = parsestmt(SyntaxTree, src, filename="foo.jl") ex = ensure_attributes(ex, var_id=Int) #ex = softscope_test(ex) diff --git a/JuliaLowering/test/destructuring.jl b/JuliaLowering/test/destructuring.jl index 7bbb51d9d20a5..10e737e0e2da1 100644 --- a/JuliaLowering/test/destructuring.jl +++ b/JuliaLowering/test/destructuring.jl @@ -101,7 +101,7 @@ end @testset "Tuples on both sides" begin # lhs variable name in rhs -@test_broken JuliaLowering.include_string(test_mod, """ +@test JuliaLowering.include_string(test_mod, """ let x = 1 y = 2 @@ -112,4 +112,20 @@ end end +@testset "Property destructuring" begin + +# TODO: Move named tuple inside test case once we can lower it +Base.eval(test_mod, :(some_named_tuple = (a=1,b=2))) +@test JuliaLowering.include_string(test_mod, """ +let + (; a, b) = some_named_tuple + (a, b) +end +""") == (1, 2) + +@test_throws LoweringError JuliaLowering.include_string(test_mod, "(x ; a, b) = rhs") +@test_throws LoweringError JuliaLowering.include_string(test_mod, "(; a=1, b) = rhs") + +end + end diff --git a/JuliaLowering/test/destructuring_ir.jl b/JuliaLowering/test/destructuring_ir.jl index b10ccab46a215..5f952f57bc73e 100644 --- a/JuliaLowering/test/destructuring_ir.jl +++ b/JuliaLowering/test/destructuring_ir.jl @@ -179,3 +179,63 @@ end 9 (call core.tuple %₁ %₇ %₈) 10 (return %₉) +######################################## +# Property destructuring +let + (; x, y) = rhs +end +#--------------------- +1 TestMod.rhs +2 (= slot₁/x (call top.getproperty %₁ :x)) +3 TestMod.rhs +4 (= slot₂/y (call top.getproperty %₃ :y)) +5 TestMod.rhs +6 (return %₅) + +######################################## +# Property destructuring with colliding symbolic lhs/rhs +let + local x + (; x, y) = x +end +#--------------------- +1 slot₁/x +2 (= slot₁/x (call top.getproperty %₁ :x)) +3 (= slot₂/y (call top.getproperty %₁ :y)) +4 (return %₁) + +######################################## +# Property destructuring with nontrivial rhs +let + (; x) = f() +end +#--------------------- +1 TestMod.f +2 (call %₁) +3 (= slot₁/x (call top.getproperty %₂ :x)) +4 (return %₂) + +######################################## +# Property destructuring with type decl +let + (; x::T) = rhs +end +#--------------------- +1 TestMod.rhs +2 (call top.getproperty %₁ :x) +3 (= slot₂/tmp %₂) +4 slot₂/tmp +5 TestMod.T +6 (call core.isa %₄ %₅) +7 (gotoifnot %₆ label₉) +8 (goto label₁₄) +9 TestMod.T +10 slot₂/tmp +11 (call top.convert %₉ %₁₀) +12 TestMod.T +13 (= slot₂/tmp (call core.typeassert %₁₁ %₁₂)) +14 slot₂/tmp +15 (= slot₁/x %₁₄) +16 TestMod.rhs +17 (return %₁₆) + diff --git a/JuliaLowering/test/utils.jl b/JuliaLowering/test/utils.jl index f309708e49e25..86aed913a2865 100644 --- a/JuliaLowering/test/utils.jl +++ b/JuliaLowering/test/utils.jl @@ -101,11 +101,16 @@ function desugar(mod::Module, src::String) end function match_ir_test_case(case_str) - m = match(r"# *([^\n]*)\n((?:.|\n)*)#----*\n((?:.|\n)*)"m, strip(case_str)) + m = match(r"# *([^\n]*)\n((?:.|\n)*)"m, strip(case_str)) if isnothing(m) error("Malformatted IR test case:\n$(repr(case_str))") end - (description=strip(m[1]), input=strip(m[2]), output=strip(m[3])) + description = strip(m[1]) + inout = split(m[2], r"#----*") + input, output = length(inout) == 2 ? inout : + length(inout) == 1 ? (inout[1], "") : + error("Too many sections in IR test case") + (; description=strip(description), input=strip(input), output=strip(output)) end function read_ir_test_cases(filename) From 98fc93c70626356820cb93c43c0e6c9e2ec5cde6 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Mon, 23 Sep 2024 22:08:48 +1000 Subject: [PATCH 0861/1109] Lowering of setindex! --- JuliaLowering/README.md | 5 ++ JuliaLowering/src/desugaring.jl | 110 ++++++++++++++++++++++++++- JuliaLowering/test/assignments.jl | 14 +++- JuliaLowering/test/assignments_ir.jl | 102 +++++++++++++++++++++++++ 4 files changed, 229 insertions(+), 2 deletions(-) diff --git a/JuliaLowering/README.md b/JuliaLowering/README.md index afd0216f85f8d..5c7807f0c0048 100644 --- a/JuliaLowering/README.md +++ b/JuliaLowering/README.md @@ -557,6 +557,11 @@ In general, we'd be replacing current *declarative* lowering targets like the setup of its arguments would need to go in a thunk. We've currently got an odd mixture of imperative and declarative lowered code. +## Bugs in Julia's lowering + +List of bugs which should be fixed upstream in flisp implementation +* `f()[begin]` has the side effect `f()` twice. +* `a[(begin=1; a=2)]` gives a weird error ## Notes on Racket's hygiene diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index b3bfe0bc8650b..b4e0729fa8125 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -38,6 +38,15 @@ function is_same_identifier_like(x, y) (kind(x) == K"BindingId" && kind(y) == K"BindingId" && x.var_id == y.var_id) end +function is_same_identifier_like(x, name::AbstractString) + return kind(x) == K"Identifier" && x.name_val == name +end + +function contains_identifier(ex, idents...) + return any(is_same_identifier_like(ex, id) for id in idents) || + (!is_leaf(ex) && any(contains_identifier(e, idents...) for e in children(ex))) +end + # Identify some expressions that are safe to repeat # # TODO: Can we use this in more places? @@ -372,6 +381,105 @@ function remove_argument_side_effects(ctx, stmts, ex) end end +# Replace any `begin` or `end` symbols with an expression indexing the array +# `arr` in the `n`th index. `splats` are a list of the splatted arguments that +# precede index `n` `is_last` is true when this is this +# last index +function replace_beginend(ctx, ex, arr, n, splats, is_last) + k = kind(ex) + if k == K"Identifier" && ex.name_val in ("begin", "end") + indexfunc = @ast ctx ex (ex.name_val == "begin" ? "firstindex" : "lastindex")::K"top" + if length(splats) == 0 + if is_last && n == 1 + @ast ctx ex [K"call" indexfunc arr] + else + @ast ctx ex [K"call" indexfunc arr n::K"Integer"] + end + else + splat_lengths = SyntaxList(ctx) + for splat in splats + push!(splat_lengths, @ast ctx ex [K"call" "length"::K"top" splat]) + end + @ast ctx ex [K"call" + indexfunc + arr + [K"call" + "+"::K"top" + (n - length(splats))::K"Integer" + splat_lengths... + ] + ] + end + elseif is_leaf(ex) || is_quoted(ex) + ex + elseif k == K"ref" || k == K"." + # inside ref and `.` only replace within the first argument + @ast ctx ex [k + replace_beginend(ctx, ex[1], arr, n, splats, is_last) + ex[2:end]... + ] + # elseif k == K"kw" - keyword args - what does this mean here? + # # note from flisp + # # TODO: this probably should not be allowed since keyword args aren't + # # positional, but in this context we have just used their positions anyway + else + mapchildren(e->replace_beginend(ctx, e, arr, n, splats, is_last), ctx, ex) + end +end + +# Go through indices and replace the `begin` or `end` symbol +# `arr` - array being indexed +# `idxs` - list of indices +# returns `idxs_out`; any statements that need to execute first are appended to +# `stmts`. +function process_indices(ctx, stmts, arr, idxs, expand_stmts) + has_splats = any(kind(i) == K"..." for i in idxs) + idxs_out = SyntaxList(ctx) + splats = SyntaxList(ctx) + for (n, idx0) in enumerate(idxs) + is_splat = kind(idx0) == K"..." + val = replace_beginend(ctx, is_splat ? idx0[1] : idx0, + arr, n, splats, n == length(idxs)) + # TODO: kwarg? + idx = !has_splats || is_simple_atom(ctx, val) ? + val : emit_assign_tmp(stmts, ctx, expand_stmts ? expand_forms_2(ctx, val) : val) + if is_splat + push!(splats, idx) + end + push!(idxs_out, is_splat ? @ast(ctx, idx0, [K"..." idx]) : idx) + end + return idxs_out +end + +function expand_setindex(ctx, ex) + @assert kind(ex) == K"=" && numchildren(ex) == 2 + lhs = ex[1] + @assert kind(lhs) == K"ref" + @chk numchildren(lhs) >= 2 + arr = lhs[1] + idxs = lhs[2:end] + rhs = ex[2] + + stmts = SyntaxList(ctx) + if !is_leaf(arr) && any(contains_identifier(e, "begin", "end") for e in idxs) + arr = emit_assign_tmp(stmts, ctx, expand_forms_2(ctx, arr)) + end + new_idxs = process_indices(ctx, stmts, arr, idxs, true) + if !is_ssa(ctx, rhs) && !is_quoted(rhs) + rhs = emit_assign_tmp(stmts, ctx, expand_forms_2(ctx, rhs)) + end + @ast ctx ex [K"block" + stmts... + expand_forms_2(ctx, @ast ctx ex [K"call" + "setindex!"::K"top" + arr + rhs + new_idxs... + ]) + [K"unnecessary" rhs] + ] +end + # Expand general assignment syntax, including # * UnionAll definitions # * Chained assignments @@ -459,7 +567,7 @@ function expand_assignment(ctx, ex) end elseif kl == K"ref" # a[i1, i2] = rhs - TODO(lhs) + expand_setindex(ctx, ex) elseif kl == K"::" && numchildren(lhs) == 2 x = lhs[1] T = lhs[2] diff --git a/JuliaLowering/test/assignments.jl b/JuliaLowering/test/assignments.jl index 11477a27272e2..26b4507b7ab90 100644 --- a/JuliaLowering/test/assignments.jl +++ b/JuliaLowering/test/assignments.jl @@ -42,7 +42,7 @@ end """) == (10,2) # Lowering of ref -@test_broken JuliaLowering.include_string(test_mod, """ +@test JuliaLowering.include_string(test_mod, """ let as = [0,0,0,0] as[begin] = 1 @@ -52,6 +52,18 @@ let end """) == [1, 2, 0, 4] +@test JuliaLowering.include_string(test_mod, """ +let + as = zeros(Int, 2,3) + as[begin, end] = 1 + as[end, begin] = 2 + js = (2,) + as[js..., end] = 3 + as +end +""") == [0 0 1; + 2 0 3] + # Declarations @test JuliaLowering.include_string(test_mod, """ let diff --git a/JuliaLowering/test/assignments_ir.jl b/JuliaLowering/test/assignments_ir.jl index fdd786c9a30b8..fb3b2a666ddf0 100644 --- a/JuliaLowering/test/assignments_ir.jl +++ b/JuliaLowering/test/assignments_ir.jl @@ -110,3 +110,105 @@ end 9 TestMod.x 10 (return %₉) +######################################## +# simple setindex! +a[i] = x +#--------------------- +1 TestMod.x +2 TestMod.a +3 TestMod.i +4 (call top.setindex! %₂ %₁ %₃) +5 (return %₁) + +######################################## +# simple setindex! with begin +a[begin] = x +#--------------------- +1 TestMod.x +2 TestMod.a +3 TestMod.a +4 (call top.firstindex %₃) +5 (call top.setindex! %₂ %₁ %₄) +6 (return %₁) + +######################################## +# simple setindex! with end +a[end] = x +#--------------------- +1 TestMod.x +2 TestMod.a +3 TestMod.a +4 (call top.lastindex %₃) +5 (call top.setindex! %₂ %₁ %₄) +6 (return %₁) + +######################################## +# multidimensional setindex! with begin +a[i, begin] = x +#--------------------- +1 TestMod.x +2 TestMod.a +3 TestMod.i +4 TestMod.a +5 (call top.firstindex %₄ 2) +6 (call top.setindex! %₂ %₁ %₃ %₅) +7 (return %₁) + +######################################## +# multidimensional setindex! with end +a[i, end] = x +#--------------------- +1 TestMod.x +2 TestMod.a +3 TestMod.i +4 TestMod.a +5 (call top.lastindex %₄ 2) +6 (call top.setindex! %₂ %₁ %₃ %₅) +7 (return %₁) + +######################################## +# multidimensional setindex! with begin/end and splats +a[is..., end, js..., begin] = x +#--------------------- +1 TestMod.is +2 TestMod.a +3 (call top.length %₁) +4 (call top.+ 1 %₃) +5 (call top.lastindex %₂ %₄) +6 TestMod.js +7 TestMod.a +8 (call top.length %₁) +9 (call top.length %₆) +10 (call top.+ 2 %₈ %₉) +11 (call top.firstindex %₇ %₁₀) +12 TestMod.x +13 TestMod.a +14 (call core.tuple %₁₃ %₁₂) +15 (call core.tuple %₅) +16 (call core.tuple %₁₁) +17 (call core._apply_iterate top.iterate top.setindex! %₁₄ %₁ %₁₅ %₆ %₁₆) +18 (return %₁₂) + +######################################## +# setindex! with nontrivial array expression and begin/end +f()[end] = x +#--------------------- +1 TestMod.f +2 (call %₁) +3 TestMod.x +4 (call top.lastindex %₂) +5 (call top.setindex! %₂ %₃ %₄) +6 (return %₃) + +######################################## +# nested refs (fixme!) +b[a[begin]] = x +#--------------------- +1 TestMod.x +2 TestMod.b +3 TestMod.a +4 TestMod.begin +5 (call top.getindex %₃ %₄) +6 (call top.setindex! %₂ %₁ %₅) +7 (return %₁) + From 46e3f6186d4b0872e68e90cbb7674e6d2e669bca Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Tue, 24 Sep 2024 06:50:57 +1000 Subject: [PATCH 0862/1109] Destructuring with tuple eliminiation and `...` --- JuliaLowering/src/desugaring.jl | 64 ++++++++++++++++++++++---- JuliaLowering/src/syntax_graph.jl | 2 + JuliaLowering/test/destructuring.jl | 17 +++++++ JuliaLowering/test/destructuring_ir.jl | 16 +++++++ 4 files changed, 90 insertions(+), 9 deletions(-) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index b4e0729fa8125..8a0e58171ff80 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -61,6 +61,12 @@ end # Convert things like `(x,y,z) = (a,b,c)` to assignments, eliminating the # tuple. Includes support for slurping/splatting. +# +# If lhss and rhss are the list of terms on each side, this function assumes +# the following have been checked: +# * There's only one `...` on the left hand side +# * Neither side has any key=val terms +# * _tuple_sides_match returns true function tuple_to_assignments(ctx, ex) lhs = ex[1] rhs = ex[2] @@ -69,21 +75,61 @@ function tuple_to_assignments(ctx, ex) elements = SyntaxList(ctx) assigned = SyntaxList(ctx) - for i in 1:numchildren(lhs) - lh = lhs[i] + il = 0 + ir = 0 + while il < numchildren(lhs) + il += 1 + ir += 1 + lh = lhs[il] if kind(lh) == K"..." - # can be null iff lh is a vararg - rh = i <= numchildren(rhs) ? rhs[i] : nothing - TODO(lh, "... in tuple_to_assignments") + TODO(lhs, "... in tuple lhs") + n_lhs = numchildren(lhs) + n_rhs = numchildren(rhs) + if il == n_lhs + # Simple case: exactly one `...` at end of lhs. Examples: + # (x, ys...) = (a,b,c) + # (ys...) = () + rhs_tmp = emit_assign_tmp(stmts, ctx, + @ast(ctx, rhs, [K"tuple" rhs[ir:end]...]), + "rhs_tmp" + ) + push!(stmts, @ast ctx ex [K"=" lh[1] rhs_tmp]) + push!(elements, @ast ctx rhs_tmp [K"..." rhs_tmp]) + break + else + # Exactly one lhs `...` occurs in the middle somewhere, with a + # general rhs which has one `...` term or at least as many + # non-`...` terms. + # Examples: + # (x, ys..., z) = (a, b, c, d) + # (x, ys..., z) = (a, bs...) + # (xs..., y) = (a, bs...) + # in this case we pairwise-match arguments from the end + # backward, with rhs splats falling back to the general case. + jl = n_lhs + 1 + jr = n_rhs + 1 + while jl > il && jr > ir + if kind(lhs[jl-1]) == K"..." || kind(rhs[jr-1]) == K"..." + break + end + jl -= 1 + jr -= 1 + end + rhs[jr] + end continue end - rh = rhs[i] # In other cases `rhs[i]` must exist + rh = rhs[ir] # In other cases `rhs[ir]` must exist if kind(rh) == K"..." - TODO(rh, "... in tuple_to_assignments") + @assert ir == numchildren(rhs) # _tuple_sides_match ensures this + rh_tmp = emit_assign_tmp(stmts, ctx, rh[1]) + push!(end_stmts, @ast ctx ex [K"=" [K"tuple" lhs[il:end]...] rh_tmp]) + push!(elements, @ast ctx rh [K"..." rh_tmp]) + break else if is_identifier_like(lh) && is_effect_free(rh) && - !any(is_same_identifier_like(lh, rhs[j]) for j in i+1:lastindex(rhs)) - !any(is_same_identifier_like(rh, a) for a in assigned) + !any(contains_identifier(rhs[j], lh) for j in ir+1:lastindex(rhs)) + !any(contains_identifier(a, rh) for a in assigned) # Overwrite `lh` directly if that won't cause conflicts with # other symbols push!(stmts, @ast ctx ex [K"=" lh rh]) diff --git a/JuliaLowering/src/syntax_graph.jl b/JuliaLowering/src/syntax_graph.jl index fdef199e84146..ffc26b3c6cd68 100644 --- a/JuliaLowering/src/syntax_graph.jl +++ b/JuliaLowering/src/syntax_graph.jl @@ -3,6 +3,8 @@ const NodeId = Int """ Directed graph with arbitrary attributes on nodes. Used here for representing one or several syntax trees. + +TODO: Global attributes! """ struct SyntaxGraph{Attrs} edge_ranges::Vector{UnitRange{Int}} diff --git a/JuliaLowering/test/destructuring.jl b/JuliaLowering/test/destructuring.jl index 10e737e0e2da1..3f99293c78b34 100644 --- a/JuliaLowering/test/destructuring.jl +++ b/JuliaLowering/test/destructuring.jl @@ -110,8 +110,25 @@ let end """) == (2, 1) +# dotted rhs in last place +@test JuliaLowering.include_string(test_mod, """ +let + rh = (2, 3) + (x,y,z) = (1,rh...) + (x,y,z) +end +""") == (1, 2, 3) +# in value position +@test JuliaLowering.include_string(test_mod, """ +let + rh = (2, 3) + (x,y) = (1,rh...) +end +""") == (1, 2, 3) + end + @testset "Property destructuring" begin # TODO: Move named tuple inside test case once we can lower it diff --git a/JuliaLowering/test/destructuring_ir.jl b/JuliaLowering/test/destructuring_ir.jl index 5f952f57bc73e..d9b50c057e1e9 100644 --- a/JuliaLowering/test/destructuring_ir.jl +++ b/JuliaLowering/test/destructuring_ir.jl @@ -179,6 +179,22 @@ end 9 (call core.tuple %₁ %₇ %₈) 10 (return %₉) +######################################## +# Destructuring with tuple elimination and trailing rhs ... +let + (x, y) = (a, rhs...) +end +#--------------------- +1 TestMod.a +2 (= slot₁/x %₁) +3 TestMod.rhs +4 (call top.indexed_iterate %₃ 1) +5 (= slot₂/y (call core.getfield %₄ 1)) +6 TestMod.a +7 (call core.tuple %₆) +8 (call core._apply_iterate top.iterate core.tuple %₇ %₃) +9 (return %₈) + ######################################## # Property destructuring let From ae934c9c28d659bfcab573b5fbf9e777a207fbcd Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Tue, 24 Sep 2024 08:20:00 +1000 Subject: [PATCH 0863/1109] `watch_ir_tests()` tool + fix setindex! lowering with zero args --- JuliaLowering/src/desugaring.jl | 2 +- JuliaLowering/test/assignments_ir.jl | 11 +++++++++++ JuliaLowering/test/utils.jl | 13 +++++++++++++ 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 8a0e58171ff80..e40f9d582566a 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -501,7 +501,7 @@ function expand_setindex(ctx, ex) @assert kind(ex) == K"=" && numchildren(ex) == 2 lhs = ex[1] @assert kind(lhs) == K"ref" - @chk numchildren(lhs) >= 2 + @chk numchildren(lhs) >= 1 arr = lhs[1] idxs = lhs[2:end] rhs = ex[2] diff --git a/JuliaLowering/test/assignments_ir.jl b/JuliaLowering/test/assignments_ir.jl index fb3b2a666ddf0..605bb942c3f91 100644 --- a/JuliaLowering/test/assignments_ir.jl +++ b/JuliaLowering/test/assignments_ir.jl @@ -212,3 +212,14 @@ b[a[begin]] = x 6 (call top.setindex! %₂ %₁ %₅) 7 (return %₁) +######################################## +# empty ref and setindex! +let + a[] = rhs +end +#--------------------- +1 TestMod.rhs +2 TestMod.a +3 (call top.setindex! %₂ %₁) +4 (return %₁) + diff --git a/JuliaLowering/test/utils.jl b/JuliaLowering/test/utils.jl index 86aed913a2865..e19606143ed5d 100644 --- a/JuliaLowering/test/utils.jl +++ b/JuliaLowering/test/utils.jl @@ -2,6 +2,7 @@ using Test using JuliaLowering using JuliaSyntax +import FileWatching using JuliaSyntax: sourcetext @@ -196,3 +197,15 @@ end function refresh_all_ir_test_cases(test_dir=".") foreach(refresh_ir_test_cases, filter(fn->endswith(fn, "ir.jl"), readdir(test_dir, join=true))) end + +function watch_ir_tests(dir, delay=0.5) + dir = abspath(dir) + while true + (name, event) = FileWatching.watch_folder(dir) + if endswith(name, "_ir.jl") && (event.changed || event.renamed) + FileWatching.unwatch_folder(dir) + sleep(delay) + refresh_ir_test_cases(joinpath(dir, name)) + end + end +end From 7c577d95ac1ae1aafeccb25276294e8ab6a917f6 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Tue, 24 Sep 2024 14:42:53 +1000 Subject: [PATCH 0864/1109] Fixes to support `function A.b() ...` syntax --- JuliaLowering/Project.toml | 3 +- JuliaLowering/README.md | 8 +++++ JuliaLowering/src/ast.jl | 17 +++++++--- JuliaLowering/src/desugaring.jl | 48 ++++++++++++++++++---------- JuliaLowering/src/eval.jl | 9 +++--- JuliaLowering/test/assignments_ir.jl | 15 ++++----- JuliaLowering/test/demo.jl | 5 +-- JuliaLowering/test/functions.jl | 10 ++++++ JuliaLowering/test/functions_ir.jl | 28 ++++++++++++---- JuliaLowering/test/macros.jl | 2 +- JuliaLowering/test/macros_ir.jl | 36 +++++++++++++++++++++ JuliaLowering/test/scopes_ir.jl | 26 +++++++-------- JuliaLowering/test/utils.jl | 6 +++- 13 files changed, 152 insertions(+), 61 deletions(-) create mode 100644 JuliaLowering/test/macros_ir.jl diff --git a/JuliaLowering/Project.toml b/JuliaLowering/Project.toml index d7b465c86fc43..f781c0888d4a0 100644 --- a/JuliaLowering/Project.toml +++ b/JuliaLowering/Project.toml @@ -11,6 +11,7 @@ julia = "1" [extras] Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" +FileWatching = "7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee" [targets] -test = ["Test"] +test = ["Test", "FileWatching"] diff --git a/JuliaLowering/README.md b/JuliaLowering/README.md index 5c7807f0c0048..76e3db421fffa 100644 --- a/JuliaLowering/README.md +++ b/JuliaLowering/README.md @@ -344,6 +344,13 @@ multiple `return`s create multiple tags rather than assigning to a single variable. Collapsing these into a single case might be worth considering? But also might be worse for type inference in some cases?) +## Untyped IR + +Julia's untyped IR as held in the `CodeInfo` data structure is an array of +statements of type `Expr` with a small number of allowed forms. The IR obeys +certain invariants which are checked by the downstream code in +base/compiler/validation.jl. + ## Julia's existing lowering implementation ### How does macro expansion work? @@ -562,6 +569,7 @@ odd mixture of imperative and declarative lowered code. List of bugs which should be fixed upstream in flisp implementation * `f()[begin]` has the side effect `f()` twice. * `a[(begin=1; a=2)]` gives a weird error +* `function A.ccall() ; end` allows `ccall` as a name but it's not allowed without the `A.` ## Notes on Racket's hygiene diff --git a/JuliaLowering/src/ast.jl b/JuliaLowering/src/ast.jl index 3a67188752f4a..9c106c994e7ad 100644 --- a/JuliaLowering/src/ast.jl +++ b/JuliaLowering/src/ast.jl @@ -535,13 +535,22 @@ function any_assignment(exs) any(kind(e) == K"=" for e in exs) end +# Check valid identifier/function names function is_valid_name(ex) - n = identifier_name(ex).name_val - n !== "ccall" && n !== "cglobal" + k = kind(ex) + if k == K"Identifier" + name = ex.name_val + elseif k == K"var" + name = ex[1].name_val + elseif k == K"." + return is_valid_name(ex[2]) + end + return name != "ccall" && name != "cglobal" end -function identifier_name(ex) - kind(ex) == K"var" ? ex[1] : ex +function is_valid_modref(ex) + return kind(ex) == K"." && kind(ex[2]) == K"Identifier" && + (kind(ex[1]) == K"Identifier" || is_valid_modref(ex[1])) end function decl_var(ex) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index e40f9d582566a..fcfe97710fd06 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -1103,7 +1103,8 @@ function expand_function_def(ctx, ex, docs) return_type = name[2] name = name[1] end - if numchildren(ex) == 1 && is_identifier(name) # TODO: Or name as globalref + if numchildren(ex) == 1 && is_identifier_like(name) + # Function declaration with no methods if !is_valid_name(name) throw(LoweringError(name, "Invalid function name")) end @@ -1120,8 +1121,10 @@ function expand_function_def(ctx, ex, docs) static_parameters = SyntaxList(ctx) # Add self argument where necessary - args = name[2:end] - name = name[1] + args = callex[2:end] + name = callex[1] + function_name = nothing + func_var = ssavar(ctx, name, "func_var") if kind(name) == K"::" if numchildren(name) == 1 farg = @ast ctx name [K"::" @@ -1132,19 +1135,25 @@ function expand_function_def(ctx, ex, docs) TODO("Fixme type") farg = name end - function_name = nothing_(ctx, ex) else if !is_valid_name(name) throw(LoweringError(name, "Invalid function name")) end + if is_identifier_like(name) + function_name = @ast ctx name name=>K"Symbol" + func_var_assignment = @ast ctx name [K"=" func_var [K"method" function_name]] + end farg = @ast ctx name [K"::" new_mutable_var(ctx, name, "#self#") [K"call" "Typeof"::K"core" - name + func_var ] ] - function_name = name + end + if isnothing(function_name) + function_name = nothing_(ctx, name) + func_var_assignment = @ast ctx name [K"=" func_var name] end args = pushfirst!(collect(args), farg) @@ -1195,9 +1204,9 @@ function expand_function_def(ctx, ex, docs) end @ast ctx ex [ K"block" - func = [K"method" function_name=>K"Symbol"] + func_var_assignment [K"method" - function_name=>K"Symbol" + function_name preamble [K"lambda"(body, lambda_info=LambdaInfo(arg_names, static_parameters, ret_var, false)) body @@ -1206,12 +1215,12 @@ function expand_function_def(ctx, ex, docs) if !isnothing(docs) [K"call"(docs) bind_docs!::K"Value" - func + func_var docs[1] method_metadata ] end - [K"unnecessary" func] + [K"unnecessary" func_var] ] elseif kind(name) == K"tuple" TODO(name, "Anon function lowering") @@ -1220,11 +1229,17 @@ function expand_function_def(ctx, ex, docs) end end -function _make_macro_name(ctx, name) - @chk kind(name) == K"Identifier" (name, "invalid macro name") - ex = mapleaf(ctx, name, K"Identifier") - ex.name_val = "@$(name.name_val)" - ex +function _make_macro_name(ctx, ex) + if kind(ex) == K"Identifier" + name = mapleaf(ctx, ex, K"Identifier") + name.name_val = "@$(ex.name_val)" + name + elseif is_valid_modref(ex) + @chk numchildren(ex) == 2 + @ast ctx ex [K"." ex[1] _make_macro_name(ctx, ex[2])] + else + throw(LoweringError(ex, "invalid macro name")) + end end # flisp: expand-macro-def @@ -1246,7 +1261,8 @@ function expand_macro_def(ctx, ex) [K"call"(sig) _make_macro_name(ctx, name) [K"::" - adopt_scope(@ast(ctx, sig, "__context__"::K"Identifier"), name) + adopt_scope(@ast(ctx, sig, "__context__"::K"Identifier"), + kind(name) == K"." ? name[1] : name) MacroContext::K"Value" ] # flisp: We don't mark these @nospecialize because all arguments to diff --git a/JuliaLowering/src/eval.jl b/JuliaLowering/src/eval.jl index 6e21c67135be5..be089ad2e1b31 100644 --- a/JuliaLowering/src/eval.jl +++ b/JuliaLowering/src/eval.jl @@ -234,11 +234,10 @@ function to_lowered_expr(mod, bindings, ex) Core.EnterNode(catch_idx) : Core.EnterNode(catch_idx, to_lowered_expr(ex[2])) elseif k == K"method" - name = ex[1] - @chk kind(name) == K"Symbol" - namesym = Symbol(name.name_val) - cs = map(e->to_lowered_expr(mod, bindings, e), ex[2:end]) - Expr(:method, namesym, cs...) + cs = map(e->to_lowered_expr(mod, bindings, e), children(ex)) + # Ad-hoc unwrapping to satisfy `Expr(:method)` expectations + c1 = cs[1] isa QuoteNode ? cs[1].value : cs[1] + Expr(:method, c1, cs[2:end]...) else # Allowed forms according to https://docs.julialang.org/en/v1/devdocs/ast/ # diff --git a/JuliaLowering/test/assignments_ir.jl b/JuliaLowering/test/assignments_ir.jl index 605bb942c3f91..e0d93bf282e7f 100644 --- a/JuliaLowering/test/assignments_ir.jl +++ b/JuliaLowering/test/assignments_ir.jl @@ -30,17 +30,16 @@ let end #--------------------- 1 (method :b) -2 TestMod.b -3 (call core.Typeof %₂) -4 (call core.svec %₃) -5 (call core.svec) -6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 2 =#))))) -7 --- method :b %₆ +2 (call core.Typeof %₁) +3 (call core.svec %₂) +4 (call core.svec) +5 (call core.svec %₃ %₄ :($(QuoteNode(:(#= line 2 =#))))) +6 --- method :b %₅ 1 TestMod.d 2 (= slot₂/c %₁) 3 (return %₁) -8 (= slot₁/a %₁) -9 (return %₁) +7 (= slot₁/a %₁) +8 (return %₁) ######################################## # a.b = ... => setproperty! assignment diff --git a/JuliaLowering/test/demo.jl b/JuliaLowering/test/demo.jl index 364aaf6df6f33..3cd1d281527af 100644 --- a/JuliaLowering/test/demo.jl +++ b/JuliaLowering/test/demo.jl @@ -510,10 +510,7 @@ end """ src = """ -let - rhs = 1 + 2*Base.im - (; im, re) = rhs - (re, im) +function A.ccall(ex) end """ diff --git a/JuliaLowering/test/functions.jl b/JuliaLowering/test/functions.jl index 633d2799575c5..b478f43e722b9 100644 --- a/JuliaLowering/test/functions.jl +++ b/JuliaLowering/test/functions.jl @@ -45,6 +45,16 @@ begin end """) === (42, 255) +@test_throws LoweringError JuliaLowering.include_string(test_mod, """ +function ccall() +end +""") + +@test_throws LoweringError JuliaLowering.include_string(test_mod, """ +function A.ccall() +end +""") + Base.include_string(test_mod, """ struct X end diff --git a/JuliaLowering/test/functions_ir.jl b/JuliaLowering/test/functions_ir.jl index 3886defd9eda8..554598e910f2c 100644 --- a/JuliaLowering/test/functions_ir.jl +++ b/JuliaLowering/test/functions_ir.jl @@ -8,12 +8,11 @@ function f(x)::Int end #--------------------- 1 (method :f) -2 TestMod.f -3 (call core.Typeof %₂) -4 (call core.svec %₃ core.Any) -5 (call core.svec) -6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 1 =#))))) -7 --- method :f %₆ +2 (call core.Typeof %₁) +3 (call core.svec %₂ core.Any) +4 (call core.svec) +5 (call core.svec %₃ %₄ :($(QuoteNode(:(#= line 1 =#))))) +6 --- method :f %₅ 1 TestMod.Int 2 (gotoifnot slot₂/x label₃) 3 (= slot₃/tmp 0xff) @@ -26,5 +25,20 @@ end 10 (= slot₃/tmp (call core.typeassert %₉ %₁)) 11 slot₃/tmp 12 (return %₁₁) -8 (return %₁) +7 (return %₁) + +######################################## +# Function with module ref in name +function A.f() +end +#--------------------- +1 TestMod.A +2 (call top.getproperty %₁ :f) +3 (call core.Typeof %₂) +4 (call core.svec %₃) +5 (call core.svec) +6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 1 =#))))) +7 --- method core.nothing %₆ + 1 (return core.nothing) +8 (return %₂) diff --git a/JuliaLowering/test/macros.jl b/JuliaLowering/test/macros.jl index a95521e2347fe..2ecd6666fb7cd 100644 --- a/JuliaLowering/test/macros.jl +++ b/JuliaLowering/test/macros.jl @@ -149,7 +149,7 @@ end """) @test_throws LoweringError JuliaLowering.include_string(test_mod, """ -macro A.b(ex) +macro mmm[](ex) end """) diff --git a/JuliaLowering/test/macros_ir.jl b/JuliaLowering/test/macros_ir.jl new file mode 100644 index 0000000000000..b4618268cf9d6 --- /dev/null +++ b/JuliaLowering/test/macros_ir.jl @@ -0,0 +1,36 @@ +######################################## +# Simple macro +macro add_one(ex) + quote + $ex + 1 + end +end +#--------------------- +1 (method :@add_one) +2 (call core.Typeof %₁) +3 (call core.svec %₂ JuliaLowering.MacroContext core.Any) +4 (call core.svec) +5 (call core.svec %₃ %₄ :($(QuoteNode(:(#= line 1 =#))))) +6 --- method :@add_one %₅ + 1 (call core.tuple slot₃/ex) + 2 (call JuliaLowering.interpolate_ast (inert (block (call-i ($ ex) + 1))) %₁) + 3 (return %₂) +7 (return %₁) + +######################################## +# Macro using `__context__` +macro foo(ex) + ctx = __context__ +end +#--------------------- +1 (method :@foo) +2 (call core.Typeof %₁) +3 (call core.svec %₂ JuliaLowering.MacroContext core.Any) +4 (call core.svec) +5 (call core.svec %₃ %₄ :($(QuoteNode(:(#= line 1 =#))))) +6 --- method :@foo %₅ + 1 slot₂/__context__ + 2 (= slot₄/ctx %₁) + 3 (return %₁) +7 (return %₁) + diff --git a/JuliaLowering/test/scopes_ir.jl b/JuliaLowering/test/scopes_ir.jl index ae3dd3fbcc77c..469396153b23c 100644 --- a/JuliaLowering/test/scopes_ir.jl +++ b/JuliaLowering/test/scopes_ir.jl @@ -36,15 +36,14 @@ end 1 2 2 (= slot₁/y %₁) 3 (method :f) -4 TestMod.f -5 (call core.Typeof %₄) -6 (call core.svec %₅ core.Any) -7 (call core.svec) -8 (call core.svec %₆ %₇ :($(QuoteNode(:(#= line 2 =#))))) -9 --- method :f %₈ +4 (call core.Typeof %₃) +5 (call core.svec %₄ core.Any) +6 (call core.svec) +7 (call core.svec %₅ %₆ :($(QuoteNode(:(#= line 2 =#))))) +8 --- method :f %₇ 1 (call core.tuple false true true) 2 (return %₁) -10 (return %₃) +9 (return %₃) ######################################## # @islocal with global @@ -81,12 +80,11 @@ function f(z) end #--------------------- 1 (method :f) -2 TestMod.f -3 (call core.Typeof %₂) -4 (call core.svec %₃ core.Any) -5 (call core.svec) -6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 1 =#))))) -7 --- method :f %₆ +2 (call core.Typeof %₁) +3 (call core.svec %₂ core.Any) +4 (call core.svec) +5 (call core.svec %₃ %₄ :($(QuoteNode(:(#= line 1 =#))))) +6 --- method :f %₅ 1 (call core.apply_type top.Dict core.Symbol core.Any) 2 (call %₁) 3 (isdefined slot₂/z) @@ -94,5 +92,5 @@ end 5 (gotoifnot %₄ label₇) 6 (call top.setindex! %₂ slot₂/z :z) 7 (return %₂) -8 (return %₁) +7 (return %₁) diff --git a/JuliaLowering/test/utils.jl b/JuliaLowering/test/utils.jl index e19606143ed5d..a80ec851bb332 100644 --- a/JuliaLowering/test/utils.jl +++ b/JuliaLowering/test/utils.jl @@ -205,7 +205,11 @@ function watch_ir_tests(dir, delay=0.5) if endswith(name, "_ir.jl") && (event.changed || event.renamed) FileWatching.unwatch_folder(dir) sleep(delay) - refresh_ir_test_cases(joinpath(dir, name)) + try + refresh_ir_test_cases(joinpath(dir, name)) + catch + @error "Error refreshing test case" exception=current_exceptions() + end end end end From c3fdd5fcf722017cfe6a45bd276ed0f0c4730bb2 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Thu, 26 Sep 2024 08:18:34 +1000 Subject: [PATCH 0865/1109] Named tuple desugaring Named tuple destructuring is needed to implement `kw_call`. Also change expansion of `K"."` in `expand_forms_1()` to lower the second element to K"Symbol" early on. We should probably do this upstream in JuliaSyntax :) --- JuliaLowering/src/ast.jl | 18 +++- JuliaLowering/src/desugaring.jl | 147 ++++++++++++++++++++++----- JuliaLowering/src/macro_expansion.jl | 7 ++ JuliaLowering/src/syntax_graph.jl | 10 ++ JuliaLowering/test/assignments.jl | 2 - JuliaLowering/test/branching.jl | 5 - JuliaLowering/test/decls.jl | 2 - JuliaLowering/test/demo.jl | 3 +- JuliaLowering/test/destructuring.jl | 2 - JuliaLowering/test/exceptions.jl | 2 - JuliaLowering/test/functions.jl | 2 - JuliaLowering/test/ir_tests.jl | 10 ++ JuliaLowering/test/loops.jl | 2 - JuliaLowering/test/misc.jl | 25 +++++ JuliaLowering/test/misc_ir.jl | 90 ++++++++++++++++ JuliaLowering/test/runtests.jl | 2 + JuliaLowering/test/scopes.jl | 2 - 17 files changed, 281 insertions(+), 50 deletions(-) create mode 100644 JuliaLowering/test/ir_tests.jl create mode 100644 JuliaLowering/test/misc_ir.jl diff --git a/JuliaLowering/src/ast.jl b/JuliaLowering/src/ast.jl index 9c106c994e7ad..da59cc1281eb8 100644 --- a/JuliaLowering/src/ast.jl +++ b/JuliaLowering/src/ast.jl @@ -527,10 +527,14 @@ function is_function_def(ex) return k == K"function" || k == K"->" end -function has_parameters(ex) +function has_parameters(ex::SyntaxTree) numchildren(ex) >= 1 && kind(ex[end]) == K"parameters" end +function has_parameters(args::AbstractVector) + length(args) >= 1 && kind(args[end]) == K"parameters" +end + function any_assignment(exs) any(kind(e) == K"=" for e in exs) end @@ -542,14 +546,16 @@ function is_valid_name(ex) name = ex.name_val elseif k == K"var" name = ex[1].name_val - elseif k == K"." - return is_valid_name(ex[2]) + elseif k == K"." && kind(ex[2]) == K"Symbol" + name = ex[2].name_val + else + return false end return name != "ccall" && name != "cglobal" end function is_valid_modref(ex) - return kind(ex) == K"." && kind(ex[2]) == K"Identifier" && + return kind(ex) == K"." && kind(ex[2]) == K"Symbol" && (kind(ex[1]) == K"Identifier" || is_valid_modref(ex[1])) end @@ -566,3 +572,7 @@ function remove_empty_parameters(args) args[1:i] end +function to_symbol(ctx, ex) + @ast ctx ex ex=>K"Symbol" +end + diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index fcfe97710fd06..0b0fe01d31d36 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -458,8 +458,8 @@ function replace_beginend(ctx, ex, arr, n, splats, is_last) end elseif is_leaf(ex) || is_quoted(ex) ex - elseif k == K"ref" || k == K"." - # inside ref and `.` only replace within the first argument + elseif k == K"ref" + # inside ref, only replace within the first argument @ast ctx ex [k replace_beginend(ctx, ex[1], arr, n, splats, is_last) ex[2:end]... @@ -588,12 +588,11 @@ function expand_assignment(ctx, ex) a = lhs[1] b = lhs[2] stmts = SyntaxList(ctx) + # TODO: Do we need these first two temporaries? if !is_identifier_like(a) a = emit_assign_tmp(stmts, ctx, expand_forms_2(ctx, a), "a_tmp") end - if kind(b) == K"Identifier" - b = @ast ctx b b=>K"Symbol" - else + if kind(b) != K"Symbol" b = emit_assign_tmp(stmts, ctx, expand_forms_2(ctx, b), "b_tmp") end if !is_identifier_like(rhs) && !is_literal(rhs) @@ -605,7 +604,6 @@ function expand_assignment(ctx, ex) [K"unnecessary" rhs] ] elseif kl == K"tuple" - # TODO: has_parameters if has_parameters(lhs) expand_property_destruct(ctx, ex) else @@ -730,6 +728,101 @@ function expand_let(ctx, ex) return blk end +function _named_tuple_expr(ctx, srcref, names, values) + if isempty(names) + @ast ctx srcref [K"call" "NamedTuple"::K"core"] + else + @ast ctx srcref [K"call" + [K"curly" "NamedTuple"::K"core" [K"tuple" names...]] + # NOTE: don't use `tuple` head, so an assignment expression as a value + # doesn't turn this into another named tuple. + [K"call" "tuple"::K"core" values...] + ] + end +end + +function _merge_named_tuple(ctx, srcref, old, new) + if isnothing(old) + new + else + @ast ctx srcref [K"call" "merge"::K"top" old new] + end +end + +function expand_named_tuple(ctx, ex, kws) + name_strs = Set{String}() + names = SyntaxList(ctx) + values = SyntaxList(ctx) + current_nt = nothing + for (i,kw) in enumerate(kws) + k = kind(kw) + appended_nt = nothing + name = nothing + if kind(k) == K"Identifier" + # x ==> x = x + name = to_symbol(ctx, kw) + value = kw + elseif k == K"=" + # x = a + if kind(kw[1]) != K"Identifier" && kind(kw[1]) != K"Placeholder" + throw(LoweringError(kw[1], "invalid named tuple field name")) + end + if kind(kw[2]) == K"..." + throw(LoweringError(kw[2], "`...` cannot be used in a value for a named tuple field")) + end + name = to_symbol(ctx, kw[1]) + value = kw[2] + elseif k == K"." + # a.x ==> x=a.x + if kind(kw[2]) != K"Symbol" + throw(LoweringError(kw, "invalid named tuple element")) + end + name = to_symbol(ctx, kw[2]) + value = kw + elseif k == K"call" && is_infix_op_call(kw) && numchildren(kw) == 3 && kw[2].name_val == "=>" + # a=>b ==> $a=b + appended_nt = _named_tuple_expr(ctx, kw, (kw[1],), (kw[3],)) + nothing, nothing + elseif k == K"..." + # args... ==> splat pairs + appended_nt = kw[1] + if isnothing(current_nt) && isempty(names) + # Must call merge to create NT from an initial splat + current_nt = _named_tuple_expr(ctx, ex, (), ()) + end + nothing, nothing + else + throw(LoweringError(kw, "Invalid named tuple element")) + end + if !isnothing(name) + if kind(name) == K"Symbol" + name_str = name.name_val + if name_str in name_strs + throw(LoweringError(name, "Field name repeated in named tuple")) + end + push!(name_strs, name_str) + end + push!(names, name) + push!(values, value) + end + if !isnothing(appended_nt) + if !isempty(names) + current_nt = _merge_named_tuple(ctx, ex, current_nt, + _named_tuple_expr(ctx, ex, names, values)) + empty!(names) + empty!(values) + end + current_nt = _merge_named_tuple(ctx, ex, current_nt, appended_nt) + end + end + if !isempty(names) || isnothing(current_nt) + current_nt = _merge_named_tuple(ctx, ex, current_nt, + _named_tuple_expr(ctx, ex, names, values)) + end + @assert !isnothing(current_nt) + current_nt +end + # Wrap unsplatted arguments in `tuple`: # `[a, b, xs..., c]` -> `[(a, b), xs, (c,)]` function _wrap_unsplatted_args(ctx, call_ex, args) @@ -779,22 +872,17 @@ end function expand_dot(ctx, ex) @chk numchildren(ex) == 2 # TODO: bare `.+` syntax rhs = ex[2] - kr = kind(rhs) + # Required to support the possibly dubious syntax `a."b"`. See + # https://github.com/JuliaLang/julia/issues/26873 + # Syntax edition TODO: reconsider this; possibly restrict to only K"String"? + if !(kind(rhs) == K"string" || is_leaf(rhs)) + throw(LoweringError(rhs, "Unrecognized field access syntax")) + end expand_forms_2(ctx, @ast ctx ex [K"call" "getproperty"::K"top" ex[1] - if kr == K"Identifier" - rhs=>K"Symbol" - else - if !(kind(rhs) == K"string" || is_leaf(rhs)) - throw(LoweringError(rhs, "Unrecognized field access syntax")) - end - # Required to support the possibly dubious syntax `a."b"`. See - # https://github.com/JuliaLang/julia/issues/26873 - # Syntax edition TODO: reconsider this; possibly restrict to only K"String"? - rhs - end + rhs ] ) end @@ -1230,8 +1318,9 @@ function expand_function_def(ctx, ex, docs) end function _make_macro_name(ctx, ex) - if kind(ex) == K"Identifier" - name = mapleaf(ctx, ex, K"Identifier") + k = kind(ex) + if k == K"Identifier" || k == K"Symbol" + name = mapleaf(ctx, ex, k) name.name_val = "@$(ex.name_val)" name elseif is_valid_modref(ex) @@ -1513,11 +1602,19 @@ function expand_forms_2(ctx::DesugaringContext, ex::SyntaxTree, docs=nothing) elseif k == K"try" expand_forms_2(ctx, expand_try(ctx, ex)) elseif k == K"tuple" - # TODO: named tuples - expand_forms_2(ctx, @ast ctx ex [K"call" - "tuple"::K"core" - children(ex)... - ]) + if has_parameters(ex) + if numchildren(ex) > 1 + throw(LoweringError(ex[end], "unexpected semicolon in tuple - use `,` to separate tuple elements")) + end + expand_forms_2(ctx, expand_named_tuple(ctx, ex, children(ex[1]))) + elseif any_assignment(children(ex)) + expand_forms_2(ctx, expand_named_tuple(ctx, ex, children(ex))) + else + expand_forms_2(ctx, @ast ctx ex [K"call" + "tuple"::K"core" + children(ex)... + ]) + end elseif k == K"$" throw(LoweringError(ex, "`\$` expression outside string or quote block")) elseif k == K"module" diff --git a/JuliaLowering/src/macro_expansion.jl b/JuliaLowering/src/macro_expansion.jl index e06bb01b851a7..740c82a99d103 100644 --- a/JuliaLowering/src/macro_expansion.jl +++ b/JuliaLowering/src/macro_expansion.jl @@ -250,6 +250,13 @@ function expand_forms_1(ctx::MacroExpansionContext, ex::SyntaxTree) expand_macro(ctx, ex) elseif k == K"module" || k == K"toplevel" || k == K"inert" ex + elseif k == K"." && numchildren(ex) == 2 + e2 = expand_forms_1(ctx, ex[2]) + if kind(e2) == K"Identifier" || kind(e2) == K"Placeholder" + # FIXME: Do the K"Symbol" transformation in the parser?? + e2 = @ast ctx e2 e2=>K"Symbol" + end + @ast ctx ex [K"." expand_forms_1(ctx, ex[1]) e2] elseif is_leaf(ex) ex else diff --git a/JuliaLowering/src/syntax_graph.jl b/JuliaLowering/src/syntax_graph.jl index ffc26b3c6cd68..2f37e80194bb4 100644 --- a/JuliaLowering/src/syntax_graph.jl +++ b/JuliaLowering/src/syntax_graph.jl @@ -599,6 +599,16 @@ function Base.pop!(v::SyntaxList) SyntaxTree(v.graph, pop!(v.ids)) end +function Base.resize!(v::SyntaxList, n) + resize!(v.ids, n) + v +end + +function Base.empty!(v::SyntaxList) + empty!(v.ids) + v +end + function Base.copy(v::SyntaxList) SyntaxList(v.graph, copy(v.ids)) end diff --git a/JuliaLowering/test/assignments.jl b/JuliaLowering/test/assignments.jl index 26b4507b7ab90..826700698e6f7 100644 --- a/JuliaLowering/test/assignments.jl +++ b/JuliaLowering/test/assignments.jl @@ -1,7 +1,5 @@ @testset "assignments" begin -test_ir_cases(joinpath(@__DIR__, "assignments_ir.jl")) - test_mod = Module() Base.include_string(test_mod, diff --git a/JuliaLowering/test/branching.jl b/JuliaLowering/test/branching.jl index 2e8c3b60e6bf5..f040b5fc8737d 100644 --- a/JuliaLowering/test/branching.jl +++ b/JuliaLowering/test/branching.jl @@ -337,9 +337,4 @@ end """) end -#------------------------------------------------------------------------------- -@testset "Branching IR" begin - test_ir_cases(joinpath(@__DIR__,"branching_ir.jl")) -end - end diff --git a/JuliaLowering/test/decls.jl b/JuliaLowering/test/decls.jl index 7fa63e0117b1f..dd70ef4facdec 100644 --- a/JuliaLowering/test/decls.jl +++ b/JuliaLowering/test/decls.jl @@ -73,6 +73,4 @@ function f() end """) -test_ir_cases(joinpath(@__DIR__, "decls_ir.jl")) - end diff --git a/JuliaLowering/test/demo.jl b/JuliaLowering/test/demo.jl index 3cd1d281527af..39236e51cb455 100644 --- a/JuliaLowering/test/demo.jl +++ b/JuliaLowering/test/demo.jl @@ -510,8 +510,7 @@ end """ src = """ -function A.ccall(ex) -end +(; a=1, a=2) """ ex = parsestmt(SyntaxTree, src, filename="foo.jl") diff --git a/JuliaLowering/test/destructuring.jl b/JuliaLowering/test/destructuring.jl index 3f99293c78b34..d5f5d34278f12 100644 --- a/JuliaLowering/test/destructuring.jl +++ b/JuliaLowering/test/destructuring.jl @@ -1,7 +1,5 @@ @testset "Destructuring" begin -test_ir_cases(joinpath(@__DIR__, "loops_ir.jl")) - test_mod = Module() @testset "Destructuring via iteration" begin diff --git a/JuliaLowering/test/exceptions.jl b/JuliaLowering/test/exceptions.jl index d7d3bffc5a066..e270ae38944f7 100644 --- a/JuliaLowering/test/exceptions.jl +++ b/JuliaLowering/test/exceptions.jl @@ -260,8 +260,6 @@ end @test isempty(current_exceptions()) -test_ir_cases(joinpath(@__DIR__, "exceptions_ir.jl")) - end #------------------------------------------------------------------------------- diff --git a/JuliaLowering/test/functions.jl b/JuliaLowering/test/functions.jl index b478f43e722b9..ace116339b5e7 100644 --- a/JuliaLowering/test/functions.jl +++ b/JuliaLowering/test/functions.jl @@ -72,6 +72,4 @@ begin end """) -test_ir_cases(joinpath(@__DIR__, "functions_ir.jl")) - end diff --git a/JuliaLowering/test/ir_tests.jl b/JuliaLowering/test/ir_tests.jl new file mode 100644 index 0000000000000..3035a6f3a7bc4 --- /dev/null +++ b/JuliaLowering/test/ir_tests.jl @@ -0,0 +1,10 @@ +@testset "IR tests" begin + testdir = @__DIR__ + for filename in readdir(testdir) + if endswith(filename, "_ir.jl") + @testset "$filename" begin + test_ir_cases(joinpath(testdir, filename)) + end + end + end +end diff --git a/JuliaLowering/test/loops.jl b/JuliaLowering/test/loops.jl index b63d1a1a7a840..4e1dd8d450acf 100644 --- a/JuliaLowering/test/loops.jl +++ b/JuliaLowering/test/loops.jl @@ -53,8 +53,6 @@ break continue """) -test_ir_cases(joinpath(@__DIR__, "loops_ir.jl")) - # TODO: Test soft scope rules end diff --git a/JuliaLowering/test/misc.jl b/JuliaLowering/test/misc.jl index 66e2597a2e82a..809dabbae3e02 100644 --- a/JuliaLowering/test/misc.jl +++ b/JuliaLowering/test/misc.jl @@ -20,4 +20,29 @@ end @test exc.msg == "all-underscore identifiers are write-only and their values cannot be used in expressions" @test JuliaLowering.is_ancestor(exc.ex, assign_underscore[1]) +@test_throws LoweringError JuliaLowering.include_string(test_mod, """ +(a=1; b=2, c=3) +""") + +@test_throws LoweringError JuliaLowering.include_string(test_mod, """ +(; a=xs...) +""") + +@test_throws LoweringError JuliaLowering.include_string(test_mod, """ +(; a[]=1) +""") + +@test_throws LoweringError JuliaLowering.include_string(test_mod, """ +(; a."b") +""") + +@test_throws LoweringError JuliaLowering.include_string(test_mod, """ +(; a=1, f()) +""") + +# repeated field name +@test_throws LoweringError JuliaLowering.include_string(test_mod, """ +(; a=1, bs..., c=3, a=2) +""") + end diff --git a/JuliaLowering/test/misc_ir.jl b/JuliaLowering/test/misc_ir.jl new file mode 100644 index 0000000000000..0fc74e5feb0cd --- /dev/null +++ b/JuliaLowering/test/misc_ir.jl @@ -0,0 +1,90 @@ +######################################## +# Named tuple +(a=1, b=2) +#--------------------- +1 (call core.tuple :a :b) +2 (call core.apply_type core.NamedTuple %₁) +3 (call core.tuple 1 2) +4 (call %₂ %₃) +5 (return %₄) + +######################################## +# Named tuple with parameters +(; a=1, b=2) +#--------------------- +1 (call core.tuple :a :b) +2 (call core.apply_type core.NamedTuple %₁) +3 (call core.tuple 1 2) +4 (call %₂ %₃) +5 (return %₄) + +######################################## +# Empty named tuple +(;) +#--------------------- +1 (call core.NamedTuple) +2 (return %₁) + +######################################## +# Named tuple with implicit field names +(;x, a.b.c, y._) +#--------------------- +1 (call core.tuple :x :c :_) +2 (call core.apply_type core.NamedTuple %₁) +3 TestMod.x +4 TestMod.a +5 (call top.getproperty %₄ :b) +6 (call top.getproperty %₅ :c) +7 TestMod.y +8 (call top.getproperty %₇ :_) +9 (call core.tuple %₃ %₆ %₈) +10 (call %₂ %₉) +11 (return %₁₀) + +######################################## +# Named tuple with splats +(; a=1, b=2, bs..., c=3, ds...) +#--------------------- +1 (call core.tuple :a :b) +2 (call core.apply_type core.NamedTuple %₁) +3 (call core.tuple 1 2) +4 (call %₂ %₃) +5 TestMod.bs +6 (call top.merge %₄ %₅) +7 (call core.tuple :c) +8 (call core.apply_type core.NamedTuple %₇) +9 (call core.tuple 3) +10 (call %₈ %₉) +11 (call top.merge %₆ %₁₀) +12 TestMod.ds +13 (call top.merge %₁₁ %₁₂) +14 (return %₁₃) + +######################################## +# Named tuple with only splats +(; as..., bs...) +#--------------------- +1 (call core.NamedTuple) +2 TestMod.as +3 (call top.merge %₁ %₂) +4 TestMod.bs +5 (call top.merge %₃ %₄) +6 (return %₅) + +######################################## +# Named tuple with dynamic names +(; a=1, b=2, c=>d) +#--------------------- +1 (call core.tuple :a :b) +2 (call core.apply_type core.NamedTuple %₁) +3 (call core.tuple 1 2) +4 (call %₂ %₃) +5 TestMod.c +6 (call core.tuple %₅) +7 (call core.apply_type core.NamedTuple %₆) +8 TestMod.d +9 (call core.tuple %₈) +10 (call %₇ %₉) +11 (call top.merge %₄ %₁₀) +12 (return %₁₁) + diff --git a/JuliaLowering/test/runtests.jl b/JuliaLowering/test/runtests.jl index 6390e24d6e741..308a81a517a35 100644 --- a/JuliaLowering/test/runtests.jl +++ b/JuliaLowering/test/runtests.jl @@ -6,6 +6,8 @@ include("utils.jl") include("syntax_graph.jl") + include("ir_tests.jl") + include("branching.jl") include("decls.jl") include("desugaring.jl") diff --git a/JuliaLowering/test/scopes.jl b/JuliaLowering/test/scopes.jl index 9b811f073bd71..1ec037326ad0a 100644 --- a/JuliaLowering/test/scopes.jl +++ b/JuliaLowering/test/scopes.jl @@ -43,6 +43,4 @@ JuliaLowering.eval(test_mod, wrapscope(assign_z_2, :hard)) JuliaLowering.eval(test_mod, wrapscope(wrapscope(assign_z_2, :neutral), :soft)) @test test_mod.z == 2 -test_ir_cases(joinpath(@__DIR__, "scopes_ir.jl")) - end From 1577ebfdc083b67d94a2b27303e8901d61835d56 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Thu, 26 Sep 2024 09:31:18 +1000 Subject: [PATCH 0866/1109] =?UTF-8?q?Test=20error=20messages=20in=20IR=20t?= =?UTF-8?q?ests=20=F0=9F=98=83?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- JuliaLowering/src/utils.jl | 8 +++-- JuliaLowering/test/misc.jl | 34 --------------------- JuliaLowering/test/misc_ir.jl | 56 +++++++++++++++++++++++++++++++++++ JuliaLowering/test/utils.jl | 39 ++++++++++++------------ 4 files changed, 80 insertions(+), 57 deletions(-) diff --git a/JuliaLowering/src/utils.jl b/JuliaLowering/src/utils.jl index 5b64e9e167c40..58c39f243f0ce 100644 --- a/JuliaLowering/src/utils.jl +++ b/JuliaLowering/src/utils.jl @@ -10,13 +10,15 @@ struct LoweringError <: Exception msg::String end -function Base.showerror(io::IO, exc::LoweringError) +function Base.showerror(io::IO, exc::LoweringError; show_detail=true) print(io, "LoweringError:\n") src = sourceref(exc.ex) highlight(io, src; note=exc.msg) - print(io, "\n\nDetailed provenance:\n") - showprov(io, exc.ex, tree=true) + if show_detail + print(io, "\n\nDetailed provenance:\n") + showprov(io, exc.ex, tree=true) + end end #------------------------------------------------------------------------------- diff --git a/JuliaLowering/test/misc.jl b/JuliaLowering/test/misc.jl index 809dabbae3e02..8a21d89defbc4 100644 --- a/JuliaLowering/test/misc.jl +++ b/JuliaLowering/test/misc.jl @@ -11,38 +11,4 @@ end # Placeholders @test JuliaLowering.include_string(test_mod, """_ = 10""") == 10 -assign_underscore = parsestmt(SyntaxTree, "_ + 1", filename="foo.jl") -exc = try - JuliaLowering.eval(test_mod, assign_underscore) -catch exc - exc -end -@test exc.msg == "all-underscore identifiers are write-only and their values cannot be used in expressions" -@test JuliaLowering.is_ancestor(exc.ex, assign_underscore[1]) - -@test_throws LoweringError JuliaLowering.include_string(test_mod, """ -(a=1; b=2, c=3) -""") - -@test_throws LoweringError JuliaLowering.include_string(test_mod, """ -(; a=xs...) -""") - -@test_throws LoweringError JuliaLowering.include_string(test_mod, """ -(; a[]=1) -""") - -@test_throws LoweringError JuliaLowering.include_string(test_mod, """ -(; a."b") -""") - -@test_throws LoweringError JuliaLowering.include_string(test_mod, """ -(; a=1, f()) -""") - -# repeated field name -@test_throws LoweringError JuliaLowering.include_string(test_mod, """ -(; a=1, bs..., c=3, a=2) -""") - end diff --git a/JuliaLowering/test/misc_ir.jl b/JuliaLowering/test/misc_ir.jl index 0fc74e5feb0cd..5b8de6fa30086 100644 --- a/JuliaLowering/test/misc_ir.jl +++ b/JuliaLowering/test/misc_ir.jl @@ -1,3 +1,11 @@ +######################################## +# Error: Placeholder value used +_ + 1 +#--------------------- +LoweringError: +_ + 1 +╙ ── all-underscore identifiers are write-only and their values cannot be used in expressions + ######################################## # Named tuple (a=1, b=2) @@ -88,3 +96,51 @@ 11 (call top.merge %₄ %₁₀) 12 (return %₁₁) +######################################## +# Error: Named tuple with repeated fields +(; a=1, bs..., c=3, a=2) +#--------------------- +LoweringError: +(; a=1, bs..., c=3, a=2) +# ╙ ── Field name repeated in named tuple + +######################################## +# Error: Named tuple frankentuple +(a=1; b=2, c=3) +#--------------------- +LoweringError: +(a=1; b=2, c=3) +# └────────┘ ── unexpected semicolon in tuple - use `,` to separate tuple elements + +######################################## +# Error: Named tuple field dots in rhs +(; a=xs...) +#--------------------- +LoweringError: +(; a=xs...) +# └───┘ ── `...` cannot be used in a value for a named tuple field + +######################################## +# Error: Named tuple field invalid lhs +(; a[]=1) +#--------------------- +LoweringError: +(; a[]=1) +# └─┘ ── invalid named tuple field name + +######################################## +# Error: Named tuple element with weird dot syntax +(; a."b") +#--------------------- +LoweringError: +(; a."b") +# └───┘ ── invalid named tuple element + +######################################## +# Error: Named tuple element without valid name +(; a=1, f()) +#--------------------- +LoweringError: +(; a=1, f()) +# └─┘ ── Invalid named tuple element + diff --git a/JuliaLowering/test/utils.jl b/JuliaLowering/test/utils.jl index a80ec851bb332..6d1d51d88ecb2 100644 --- a/JuliaLowering/test/utils.jl +++ b/JuliaLowering/test/utils.jl @@ -111,7 +111,9 @@ function match_ir_test_case(case_str) input, output = length(inout) == 2 ? inout : length(inout) == 1 ? (inout[1], "") : error("Too many sections in IR test case") - (; description=strip(description), input=strip(input), output=strip(output)) + expect_error = startswith(description, "Error") + (; expect_error=expect_error, description=strip(description), + input=strip(input), output=strip(output)) end function read_ir_test_cases(filename) @@ -128,19 +130,27 @@ function read_ir_test_cases(filename) [match_ir_test_case(s) for s in split(cases_str, r"####*") if strip(s) != ""]) end -function format_ir_for_test(mod, input) +function format_ir_for_test(mod, input, expect_error=false) ex = parsestmt(SyntaxTree, input) - x = JuliaLowering.lower(mod, ex) - ir = strip(sprint(JuliaLowering.print_ir, x)) - return replace(ir, string(mod)=>"TestMod") + try + x = JuliaLowering.lower(mod, ex) + ir = strip(sprint(JuliaLowering.print_ir, x)) + return replace(ir, string(mod)=>"TestMod") + catch exc + if expect_error && (exc isa LoweringError) + return sprint(io->Base.showerror(io, exc, show_detail=false)) + else + rethrow() + end + end end function test_ir_cases(filename::AbstractString) preamble, cases = read_ir_test_cases(filename) test_mod = Module(:TestMod) Base.include_string(test_mod, preamble) - for (description,input,ref) in cases - output = format_ir_for_test(test_mod, input) + for (expect_error, description, input, ref) in cases + output = format_ir_for_test(test_mod, input, expect_error) @testset "$description" begin if output != ref # Do our own error dumping, as @test will @@ -151,17 +161,6 @@ function test_ir_cases(filename::AbstractString) end end -function format_ir_test_case(mod, input, description="-- Add description here --") - ir = format_ir_for_test(mod, input) - """ - ######################################## - # $description - $(strip(input)) - #--------------------- - $ir - """ -end - """ Update all IR test cases in `filename` when the IR format has changed. """ @@ -174,8 +173,8 @@ function refresh_ir_test_cases(filename) println(io, preamble, "\n") println(io, "#*******************************************************************************") end - for (description,input,ref) in cases - ir = format_ir_for_test(test_mod, input) + for (expect_error, description,input,ref) in cases + ir = format_ir_for_test(test_mod, input, expect_error) if ir != ref @info "Refreshing test case $(repr(description)) in $filename" end From d3ab81140a7721f1e0d3dec06358413555529edd Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Thu, 26 Sep 2024 12:14:24 +1000 Subject: [PATCH 0867/1109] Test all lowering error messages! --- JuliaLowering/src/desugaring.jl | 2 +- JuliaLowering/test/assignments.jl | 32 ------------ JuliaLowering/test/assignments_ir.jl | 72 ++++++++++++++++++++++++++ JuliaLowering/test/branching.jl | 37 ++++--------- JuliaLowering/test/branching_ir.jl | 34 ++++++++++++ JuliaLowering/test/decls.jl | 24 --------- JuliaLowering/test/decls_ir.jl | 49 +++++++++++++++--- JuliaLowering/test/destructuring.jl | 7 --- JuliaLowering/test/destructuring_ir.jl | 24 +++++++++ JuliaLowering/test/functions.jl | 10 ---- JuliaLowering/test/functions_ir.jl | 20 +++++++ JuliaLowering/test/loops.jl | 17 ------ JuliaLowering/test/loops_ir.jl | 32 ++++++++++++ JuliaLowering/test/macros.jl | 25 --------- JuliaLowering/test/macros_ir.jl | 50 ++++++++++++++++++ JuliaLowering/test/misc_ir.jl | 39 ++++++++++++++ JuliaLowering/test/modules.jl | 14 ----- JuliaLowering/test/quoting_ir.jl | 45 ++++++++++++++++ JuliaLowering/test/utils.jl | 3 ++ 19 files changed, 374 insertions(+), 162 deletions(-) create mode 100644 JuliaLowering/test/quoting_ir.jl diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 0b0fe01d31d36..84870fabfba7b 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -318,7 +318,7 @@ function expand_property_destruct(ctx, ex) lhs = ex[1] @assert kind(lhs) == K"tuple" if numchildren(lhs) != 1 - throw(LoweringError(ex, "Property destructuring must use a single `;` before the property names, eg `(; a, b) = rhs`")) + throw(LoweringError(lhs, "Property destructuring must use a single `;` before the property names, eg `(; a, b) = rhs`")) end params = lhs[1] @assert kind(params) == K"parameters" diff --git a/JuliaLowering/test/assignments.jl b/JuliaLowering/test/assignments.jl index 826700698e6f7..07b482228c715 100644 --- a/JuliaLowering/test/assignments.jl +++ b/JuliaLowering/test/assignments.jl @@ -71,37 +71,5 @@ let end """) === 10 -#------------------------------------------------------------------------------- -# Invalid assignment left hand sides with specific error messages -@test_throws LoweringError JuliaLowering.include_string(test_mod, """ -a.(b) = c -""") - -@test_throws LoweringError JuliaLowering.include_string(test_mod, """ -T[x y] = z -""") -@test_throws LoweringError JuliaLowering.include_string(test_mod, """ -T[x; y] = z -""") -@test_throws LoweringError JuliaLowering.include_string(test_mod, """ -T[x ;;; y] = z -""") - -@test_throws LoweringError JuliaLowering.include_string(test_mod, """ -[x, y] = z -""") -@test_throws LoweringError JuliaLowering.include_string(test_mod, """ -[x y] = z -""") -@test_throws LoweringError JuliaLowering.include_string(test_mod, """ -[x; y] = z -""") -@test_throws LoweringError JuliaLowering.include_string(test_mod, """ -[x ;;; y] = z -""") - -@test_throws LoweringError JuliaLowering.include_string(test_mod, """ -1 = x -""") end diff --git a/JuliaLowering/test/assignments_ir.jl b/JuliaLowering/test/assignments_ir.jl index e0d93bf282e7f..76f23e7bba97c 100644 --- a/JuliaLowering/test/assignments_ir.jl +++ b/JuliaLowering/test/assignments_ir.jl @@ -222,3 +222,75 @@ end 3 (call top.setindex! %₂ %₁) 4 (return %₁) +######################################## +# Error: Invalid lhs in `=` +a.(b) = rhs +#--------------------- +LoweringError: +a.(b) = rhs +└───┘ ── invalid dot call syntax on left hand side of assignment + +######################################## +# Error: Invalid lhs in `=` +T[x y] = rhs +#--------------------- +LoweringError: +T[x y] = rhs +└────┘ ── invalid spacing in left side of indexed assignment + +######################################## +# Error: Invalid lhs in `=` +T[x; y] = rhs +#--------------------- +LoweringError: +T[x; y] = rhs +└─────┘ ── unexpected `;` in left side of indexed assignment + +######################################## +# Error: Invalid lhs in `=` +T[x ;;; y] = rhs +#--------------------- +LoweringError: +T[x ;;; y] = rhs +└────────┘ ── unexpected `;` in left side of indexed assignment + +######################################## +# Error: Invalid lhs in `=` +[x, y] = rhs +#--------------------- +LoweringError: +[x, y] = rhs +└────┘ ── use `(a, b) = ...` to assign multiple values + +######################################## +# Error: Invalid lhs in `=` +[x y] = rhs +#--------------------- +LoweringError: +[x y] = rhs +└───┘ ── use `(a, b) = ...` to assign multiple values + +######################################## +# Error: Invalid lhs in `=` +[x; y] = rhs +#--------------------- +LoweringError: +[x; y] = rhs +└────┘ ── use `(a, b) = ...` to assign multiple values + +######################################## +# Error: Invalid lhs in `=` +[x ;;; y] = rhs +#--------------------- +LoweringError: +[x ;;; y] = rhs +└───────┘ ── use `(a, b) = ...` to assign multiple values + +######################################## +# Error: Invalid lhs in `=` +1 = rhs +#--------------------- +LoweringError: +1 = rhs +╙ ── invalid assignment location + diff --git a/JuliaLowering/test/branching.jl b/JuliaLowering/test/branching.jl index f040b5fc8737d..b3ebcd3b39a5a 100644 --- a/JuliaLowering/test/branching.jl +++ b/JuliaLowering/test/branching.jl @@ -305,36 +305,21 @@ end end @testset "symbolic goto/label" begin - JuliaLowering.include_string(test_mod, """ - let - a = [] - i = 1 - @label foo - push!(a, i) - i = i + 1 - if i <= 2 - @goto foo - end - a - end - """) == [1,2] - @test_throws LoweringError JuliaLowering.include_string(test_mod, """ - begin +JuliaLowering.include_string(test_mod, """ +let + a = [] + i = 1 + @label foo + push!(a, i) + i = i + 1 + if i <= 2 @goto foo end - """) - - @test_throws LoweringError JuliaLowering.include_string(test_mod, """ - begin - @label foo - @label foo - end - """) + a +end +""") == [1,2] - @test_throws LoweringError JuliaLowering.include_string(test_mod, """ - x = @label foo - """) end end diff --git a/JuliaLowering/test/branching_ir.jl b/JuliaLowering/test/branching_ir.jl index a817e02aed77b..4d92070a0f328 100644 --- a/JuliaLowering/test/branching_ir.jl +++ b/JuliaLowering/test/branching_ir.jl @@ -202,3 +202,37 @@ end 23 (pop_exception %₁) 24 (return core.nothing) +######################################## +# Error: no symbolic label +begin + @goto foo +end +#--------------------- +LoweringError: +begin + @goto foo +# └─┘ ── label `foo` referenced but not defined +end + +######################################## +# Error: duplicate symbolic label +begin + @label foo + @label foo +end +#--------------------- +LoweringError: +begin + @label foo + @label foo +# └─┘ ── Label `foo` defined multiple times +end + +######################################## +# Error: using value of symbolic label +x = @label foo +#--------------------- +LoweringError: +x = @label foo +# └─┘ ── misplaced label in value position + diff --git a/JuliaLowering/test/decls.jl b/JuliaLowering/test/decls.jl index dd70ef4facdec..08484dfaec3d5 100644 --- a/JuliaLowering/test/decls.jl +++ b/JuliaLowering/test/decls.jl @@ -49,28 +49,4 @@ end @test Core.get_binding_type(test_mod, :a_typed_global_2) === Int @test test_mod.a_typed_global_2 === 10 -@test_throws LoweringError JuliaLowering.include_string(test_mod, """ -begin - local x::T = 1 - local x::S = 1 -end -""") - -# Const not supported on locals -@test_throws LoweringError JuliaLowering.include_string(test_mod, """ -const local x = 1 -""") -@test_throws LoweringError JuliaLowering.include_string(test_mod, """ -let - const x = 1 -end -""") - -# global type decls only allowed at top level -@test_throws LoweringError JuliaLowering.include_string(test_mod, """ -function f() - global x::Int = 1 -end -""") - end diff --git a/JuliaLowering/test/decls_ir.jl b/JuliaLowering/test/decls_ir.jl index eb4c10a015aa9..9648b3a764572 100644 --- a/JuliaLowering/test/decls_ir.jl +++ b/JuliaLowering/test/decls_ir.jl @@ -80,11 +80,48 @@ global xx::T = 10 14 (return 10) ######################################## -# Type assert (TODO: move this?) -x::T +# Error: x declared twice +begin + local x::T = 1 + local x::S = 1 +end #--------------------- -1 TestMod.x -2 TestMod.T -3 (call core.typeassert %₁ %₂) -4 (return %₃) +LoweringError: +begin + local x::T = 1 + local x::S = 1 +# └──┘ ── multiple type declarations found for `x` +end + +######################################## +# Error: Const not supported on locals +const local x = 1 +#--------------------- +LoweringError: +const local x = 1 +# ╙ ── unsupported `const` declaration on local variable + +######################################## +# Error: Const not supported on locals +let + const x = 1 +end +#--------------------- +LoweringError: +let + const x = 1 +# ╙ ── unsupported `const` declaration on local variable +end + +######################################## +# Error: global type decls only allowed at top level +function f() + global x::Int = 1 +end +#--------------------- +LoweringError: +function f() + global x::Int = 1 +# └────┘ ── type declarations for global variables must be at top level, not inside a function +end diff --git a/JuliaLowering/test/destructuring.jl b/JuliaLowering/test/destructuring.jl index d5f5d34278f12..7e5aac5438f41 100644 --- a/JuliaLowering/test/destructuring.jl +++ b/JuliaLowering/test/destructuring.jl @@ -89,10 +89,6 @@ let end """) == (1, [2,3], 4) -@test_throws LoweringError JuliaLowering.include_string(test_mod, """ -(xs..., ys...) = x -""") - end @@ -138,9 +134,6 @@ let end """) == (1, 2) -@test_throws LoweringError JuliaLowering.include_string(test_mod, "(x ; a, b) = rhs") -@test_throws LoweringError JuliaLowering.include_string(test_mod, "(; a=1, b) = rhs") - end end diff --git a/JuliaLowering/test/destructuring_ir.jl b/JuliaLowering/test/destructuring_ir.jl index d9b50c057e1e9..d7d110f619701 100644 --- a/JuliaLowering/test/destructuring_ir.jl +++ b/JuliaLowering/test/destructuring_ir.jl @@ -81,6 +81,14 @@ end 12 TestMod.as 13 (return %₁₂) +######################################## +# Error: Slurping multiple args +(xs..., ys...) = x +#--------------------- +LoweringError: +(xs..., ys...) = x +# └────┘ ── multiple `...` in destructuring assignment are ambiguous + ######################################## # Recursive destructuring let @@ -255,3 +263,19 @@ end 16 TestMod.rhs 17 (return %₁₆) +######################################## +# Error: Property destructuring with frankentuple +(x ; a, b) = rhs +#--------------------- +LoweringError: +(x ; a, b) = rhs +└────────┘ ── Property destructuring must use a single `;` before the property names, eg `(; a, b) = rhs` + +######################################## +# Error: Property destructuring with values for properties +(; a=1, b) = rhs +#--------------------- +LoweringError: +(; a=1, b) = rhs +# └─┘ ── invalid assignment location + diff --git a/JuliaLowering/test/functions.jl b/JuliaLowering/test/functions.jl index ace116339b5e7..b620b3fb36aa6 100644 --- a/JuliaLowering/test/functions.jl +++ b/JuliaLowering/test/functions.jl @@ -45,16 +45,6 @@ begin end """) === (42, 255) -@test_throws LoweringError JuliaLowering.include_string(test_mod, """ -function ccall() -end -""") - -@test_throws LoweringError JuliaLowering.include_string(test_mod, """ -function A.ccall() -end -""") - Base.include_string(test_mod, """ struct X end diff --git a/JuliaLowering/test/functions_ir.jl b/JuliaLowering/test/functions_ir.jl index 554598e910f2c..daa48475e5ec9 100644 --- a/JuliaLowering/test/functions_ir.jl +++ b/JuliaLowering/test/functions_ir.jl @@ -42,3 +42,23 @@ end 1 (return core.nothing) 8 (return %₂) +######################################## +# Error: Invalid function name +function ccall() +end +#--------------------- +LoweringError: +function ccall() +# └───┘ ── Invalid function name +end + +######################################## +# Error: Invalid function name +function A.ccall() +end +#--------------------- +LoweringError: +function A.ccall() +# └─────┘ ── Invalid function name +end + diff --git a/JuliaLowering/test/loops.jl b/JuliaLowering/test/loops.jl index 4e1dd8d450acf..efa9a9b270b80 100644 --- a/JuliaLowering/test/loops.jl +++ b/JuliaLowering/test/loops.jl @@ -45,14 +45,6 @@ let end """) == [2,4] -@test_throws LoweringError JuliaLowering.include_string(test_mod, """ -break -""") - -@test_throws LoweringError JuliaLowering.include_string(test_mod, """ -continue -""") - # TODO: Test soft scope rules end @@ -152,15 +144,6 @@ let end """) == 2 -@test_throws LoweringError JuliaLowering.include_string(test_mod, """ -let - for outer i = 1:2 - nothing - end - i -end -""") - end diff --git a/JuliaLowering/test/loops_ir.jl b/JuliaLowering/test/loops_ir.jl index d3c244ae1f492..6abaa48f90913 100644 --- a/JuliaLowering/test/loops_ir.jl +++ b/JuliaLowering/test/loops_ir.jl @@ -72,3 +72,35 @@ end 16 (goto label₇) 17 (return core.nothing) +######################################## +# Error: break outside for/while +break +#--------------------- +LoweringError: +break +└───┘ ── break must be used inside a `while` or `for` loop + +######################################## +# Error: continue outside for/while +continue +#--------------------- +LoweringError: +continue +└──────┘ ── continue must be used inside a `while` or `for` loop + +######################################## +# Error: `outer` without outer local variable +let + for outer i = 1:2 + nothing + end + i +end +#--------------------- +LoweringError: +let + for outer i = 1:2 +# ╙ ── `outer` annotations must match with a local variable in an outer scope but no such variable was found + nothing + end + diff --git a/JuliaLowering/test/macros.jl b/JuliaLowering/test/macros.jl index 2ecd6666fb7cd..2338e0e440a8c 100644 --- a/JuliaLowering/test/macros.jl +++ b/JuliaLowering/test/macros.jl @@ -142,29 +142,4 @@ end == [ "2" ] - -@test_throws LoweringError JuliaLowering.include_string(test_mod, """ -macro mmm(a; b=2) -end -""") - -@test_throws LoweringError JuliaLowering.include_string(test_mod, """ -macro mmm[](ex) -end -""") - -# Macros not allowed in local scope -@test_throws LoweringError JuliaLowering.include_string(test_mod, """ -let - macro foo(ex) - end -end -""") -@test_throws LoweringError JuliaLowering.include_string(test_mod, """ -function f() - macro foo() - end -end -""") - end diff --git a/JuliaLowering/test/macros_ir.jl b/JuliaLowering/test/macros_ir.jl index b4618268cf9d6..56e4efc749494 100644 --- a/JuliaLowering/test/macros_ir.jl +++ b/JuliaLowering/test/macros_ir.jl @@ -34,3 +34,53 @@ end 3 (return %₁) 7 (return %₁) +######################################## +# Error: Macro with kw args +macro mmm(a; b=2) +end +#--------------------- +LoweringError: +macro mmm(a; b=2) +# └───┘ ── macros cannot accept keyword arguments +end + +######################################## +# Error: Bad macro name +macro mmm[](ex) +end +#--------------------- +LoweringError: +macro mmm[](ex) +# └───┘ ── invalid macro name +end + +######################################## +# Error: Macros not allowed in local scope +let + macro foo(ex) + end +end +#--------------------- +LoweringError: +let +# ┌──────────── + macro foo(ex) + end +#─────┘ ── macro is only allowed in global scope +end + +######################################## +# Error: Macros not allowed in local scope +function f() + macro foo() + end +end +#--------------------- +LoweringError: +function f() +# ┌────────── + macro foo() + end +#─────┘ ── macro is only allowed in global scope +end + diff --git a/JuliaLowering/test/misc_ir.jl b/JuliaLowering/test/misc_ir.jl index 5b8de6fa30086..949fb11c1a259 100644 --- a/JuliaLowering/test/misc_ir.jl +++ b/JuliaLowering/test/misc_ir.jl @@ -144,3 +144,42 @@ LoweringError: (; a=1, f()) # └─┘ ── Invalid named tuple element +######################################## +# Error: Modules not allowed in local scope +let + module C + end +end +#--------------------- +LoweringError: +let +# ┌─────── + module C + end +#─────┘ ── module is only allowed in global scope +end + +######################################## +# Error: Modules not allowed in local scope +function f() + module C + end +end +#--------------------- +LoweringError: +function f() +# ┌─────── + module C + end +#─────┘ ── module is only allowed in global scope +end + +######################################## +# Basic type assert +x::T +#--------------------- +1 TestMod.x +2 TestMod.T +3 (call core.typeassert %₁ %₂) +4 (return %₃) + diff --git a/JuliaLowering/test/modules.jl b/JuliaLowering/test/modules.jl index e9e71558cf1ce..66595ee6345c4 100644 --- a/JuliaLowering/test/modules.jl +++ b/JuliaLowering/test/modules.jl @@ -31,18 +31,4 @@ begin end """)) == Module -# Modules not allowed in local scope -@test_throws LoweringError JuliaLowering.include_string(test_mod, """ -let - module C - end -end -""") -@test_throws LoweringError JuliaLowering.include_string(test_mod, """ -function f() - module C - end -end -""") - end diff --git a/JuliaLowering/test/quoting_ir.jl b/JuliaLowering/test/quoting_ir.jl new file mode 100644 index 0000000000000..7fa05c5127e52 --- /dev/null +++ b/JuliaLowering/test/quoting_ir.jl @@ -0,0 +1,45 @@ +######################################## +# Simple interpolation +quote + $x + 1 +end +#--------------------- +1 TestMod.x +2 (call core.tuple %₁) +3 (call JuliaLowering.interpolate_ast (inert (block (call-i ($ x) + 1))) %₂) +4 (return %₃) + +######################################## +# Trivial interpolation +:($x) +#--------------------- +1 TestMod.x +2 (call core.tuple %₁) +3 (call JuliaLowering.interpolate_ast (inert ($ x)) %₂) +4 (return %₃) + +######################################## +# Double escape +quote + quote + $$x + 1 + end +end +#--------------------- +1 TestMod.x +2 (call core.tuple %₁) +3 (call JuliaLowering.interpolate_ast (inert (block (quote (block (call-i ($ ($ x)) + 1))))) %₂) +4 (return %₃) + +######################################## +# Error: Double escape +quote + $$x + 1 +end +#--------------------- +LoweringError: +quote + $$x + 1 +# └┘ ── `$` expression outside string or quote block +end + diff --git a/JuliaLowering/test/utils.jl b/JuliaLowering/test/utils.jl index 6d1d51d88ecb2..84428e648fd94 100644 --- a/JuliaLowering/test/utils.jl +++ b/JuliaLowering/test/utils.jl @@ -134,6 +134,9 @@ function format_ir_for_test(mod, input, expect_error=false) ex = parsestmt(SyntaxTree, input) try x = JuliaLowering.lower(mod, ex) + if expect_error + error("Expected a lowering error in test case") + end ir = strip(sprint(JuliaLowering.print_ir, x)) return replace(ir, string(mod)=>"TestMod") catch exc From be16e8784dbe1c75579855466c9c8a91a7ed1472 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sun, 29 Sep 2024 06:03:38 +1000 Subject: [PATCH 0868/1109] Calls to functions with keywords --- JuliaLowering/src/desugaring.jl | 109 +++++++++++++++++++++++----- JuliaLowering/test/demo.jl | 12 +++ JuliaLowering/test/destructuring.jl | 5 +- JuliaLowering/test/functions.jl | 29 ++++++++ JuliaLowering/test/functions_ir.jl | 38 ++++++++++ JuliaLowering/test/misc_ir.jl | 2 +- 6 files changed, 173 insertions(+), 22 deletions(-) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 84870fabfba7b..a2cd54a0cf8c2 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -749,7 +749,9 @@ function _merge_named_tuple(ctx, srcref, old, new) end end -function expand_named_tuple(ctx, ex, kws) +function expand_named_tuple(ctx, ex, kws; + field_name="named tuple field", + element_name="named tuple element") name_strs = Set{String}() names = SyntaxList(ctx) values = SyntaxList(ctx) @@ -765,17 +767,17 @@ function expand_named_tuple(ctx, ex, kws) elseif k == K"=" # x = a if kind(kw[1]) != K"Identifier" && kind(kw[1]) != K"Placeholder" - throw(LoweringError(kw[1], "invalid named tuple field name")) + throw(LoweringError(kw[1], "invalid $field_name name")) end if kind(kw[2]) == K"..." - throw(LoweringError(kw[2], "`...` cannot be used in a value for a named tuple field")) + throw(LoweringError(kw[2], "`...` cannot be used in a value for a $field_name")) end name = to_symbol(ctx, kw[1]) value = kw[2] elseif k == K"." # a.x ==> x=a.x if kind(kw[2]) != K"Symbol" - throw(LoweringError(kw, "invalid named tuple element")) + throw(LoweringError(kw, "invalid $element_name")) end name = to_symbol(ctx, kw[2]) value = kw @@ -792,13 +794,13 @@ function expand_named_tuple(ctx, ex, kws) end nothing, nothing else - throw(LoweringError(kw, "Invalid named tuple element")) + throw(LoweringError(kw, "Invalid $element_name")) end if !isnothing(name) if kind(name) == K"Symbol" name_str = name.name_val if name_str in name_strs - throw(LoweringError(name, "Field name repeated in named tuple")) + throw(LoweringError(name, "Repeated $field_name name")) end push!(name_strs, name_str) end @@ -823,6 +825,25 @@ function expand_named_tuple(ctx, ex, kws) current_nt end +function expand_kw_call(ctx, srcref, farg, args, kws) + @ast ctx srcref [K"block" + func = farg + kw_container = expand_named_tuple(ctx, srcref, kws; + field_name="keyword argument", + element_name="keyword argument") + if all(kind(kw) == K"..." for kw in kws) + # In this case need to check kws nonempty at runtime + [K"if" + [K"call" "isempty"::K"top" kw_container] + [K"call" func args...] + [K"call" "kwcall"::K"core" kw_container func args...] + ] + else + [K"call" "kwcall"::K"core" kw_container func args...] + end + ] +end + # Wrap unsplatted arguments in `tuple`: # `[a, b, xs..., c]` -> `[(a, b), xs, (c,)]` function _wrap_unsplatted_args(ctx, call_ex, args) @@ -846,26 +867,67 @@ function _wrap_unsplatted_args(ctx, call_ex, args) wrapped end +function remove_kw_args!(ctx, args::SyntaxList) + kws = nothing + j = 0 + num_parameter_blocks = 0 + for i in 1:length(args) + arg = args[i] + k = kind(arg) + if k == K"=" + if isnothing(kws) + kws = SyntaxList(ctx) + end + push!(kws, arg) + elseif k == K"parameters" + num_parameter_blocks += 1 + if num_parameter_blocks > 1 + throw(LoweringError(arg, "Cannot have more than one group of keyword arguments separated with `;`")) + end + if numchildren(arg) == 0 + continue # ignore empty parameters (issue #18845) + end + if isnothing(kws) + kws = SyntaxList(ctx) + end + append!(kws, children(arg)) + else + j += 1 + if j < i + args[j] = args[i] + end + end + end + resize!(args, j) + return kws +end + function expand_call(ctx, ex) - cs = children(ex) - if is_infix_op_call(ex) - @chk numchildren(ex) == 3 - cs = [cs[2], cs[1], cs[3]] - elseif is_postfix_op_call(ex) - @chk numchildren(ex) == 2 - cs = [cs[2], cs[1]] + args = SyntaxList(ctx) + if is_infix_op_call(ex) || is_postfix_op_call(ex) + @chk numchildren(ex) >= 2 "Postfix/infix operators must have at least two positional arguments" + farg = ex[2] + push!(args, ex[1]) + append!(args, ex[3:end]) + else + @chk numchildren(ex) > 0 "Call expressions must have a function name" + farg = ex[1] + append!(args, ex[2:end]) + end + kws = remove_kw_args!(ctx, args) + if !isnothing(kws) + return expand_forms_2(ctx, expand_kw_call(ctx, ex, farg, args, kws)) end - # TODO: keywords - if any(kind(c) == K"..." for c in cs) + if any(kind(arg) == K"..." for arg in args) # Splatting, eg, `f(a, xs..., b)` @ast ctx ex [K"call" "_apply_iterate"::K"core" "iterate"::K"top" - expand_forms_2(ctx, cs[1]) - expand_forms_2(ctx, _wrap_unsplatted_args(ctx, ex, cs[2:end]))... + expand_forms_2(ctx, farg) + expand_forms_2(ctx, _wrap_unsplatted_args(ctx, ex, args))... ] else - @ast ctx ex [K"call" expand_forms_2(ctx, cs)...] + @ast ctx ex [K"call" expand_forms_2(ctx, farg) expand_forms_2(ctx, args)...] end end @@ -1630,6 +1692,17 @@ function expand_forms_2(ctx::DesugaringContext, ex::SyntaxTree, docs=nothing) TODO(ex, "ref expansion") end expand_forms_2(ctx, @ast ctx ex [K"call" "getindex"::K"top" ex[1] ex[2]]) + elseif k == K"curly" + if has_parameters(ex) + throw(LoweringError(ex[end], "unexpected semicolon in type parameter list")) + end + for c in children(ex) + if kind(c) == K"=" + throw(LoweringError(c, "misplace assignment in type parameter list")) + end + end + # TODO: implicit where parameters like T{A<:B} + expand_forms_2(ctx, @ast ctx ex [K"call" "apply_type"::K"core" children(ex)...]) elseif k == K"toplevel" # The toplevel form can't be lowered here - it needs to just be quoted # and passed through to a call to eval. diff --git a/JuliaLowering/test/demo.jl b/JuliaLowering/test/demo.jl index 39236e51cb455..bf433bf5a2da3 100644 --- a/JuliaLowering/test/demo.jl +++ b/JuliaLowering/test/demo.jl @@ -513,6 +513,18 @@ src = """ (; a=1, a=2) """ +function f(args...; kws...) + @info "" args kws +end + +src = """ +begin + kws = (c=3, d=4) + xs = 1:3 + f(xs...; kws..., a=1, b=2) +end +""" + ex = parsestmt(SyntaxTree, src, filename="foo.jl") ex = ensure_attributes(ex, var_id=Int) #ex = softscope_test(ex) diff --git a/JuliaLowering/test/destructuring.jl b/JuliaLowering/test/destructuring.jl index 7e5aac5438f41..8e4ef25474993 100644 --- a/JuliaLowering/test/destructuring.jl +++ b/JuliaLowering/test/destructuring.jl @@ -125,11 +125,10 @@ end @testset "Property destructuring" begin -# TODO: Move named tuple inside test case once we can lower it -Base.eval(test_mod, :(some_named_tuple = (a=1,b=2))) @test JuliaLowering.include_string(test_mod, """ let - (; a, b) = some_named_tuple + ab = (a=1, b=2) + (; a, b) = ab (a, b) end """) == (1, 2) diff --git a/JuliaLowering/test/functions.jl b/JuliaLowering/test/functions.jl index b620b3fb36aa6..005d22a7773f3 100644 --- a/JuliaLowering/test/functions.jl +++ b/JuliaLowering/test/functions.jl @@ -62,4 +62,33 @@ begin end """) +#------------------------------------------------------------------------------- +# Keyword calls +Base.eval(test_mod, :( +begin + function f(; kws...) + values(kws) + end + + function f() + "non-kw version of f" + end +end +)) + + +@test JuliaLowering.include_string(test_mod, """ +let + kws = (c=3,d=4) + f(; kws..., a=1, d=0, e=5) +end +""") == (c=3, d=0, a=1, e=5) + +@test JuliaLowering.include_string(test_mod, """ +let + kws = (;) + f(; kws..., kws...) +end +""") == "non-kw version of f" + end diff --git a/JuliaLowering/test/functions_ir.jl b/JuliaLowering/test/functions_ir.jl index daa48475e5ec9..ea717d50d778e 100644 --- a/JuliaLowering/test/functions_ir.jl +++ b/JuliaLowering/test/functions_ir.jl @@ -62,3 +62,41 @@ function A.ccall() # └─────┘ ── Invalid function name end +######################################## +# Keyword calls +f(x; a=1, b=2) +#--------------------- +1 TestMod.f +2 (call core.tuple :a :b) +3 (call core.apply_type core.NamedTuple %₂) +4 (call core.tuple 1 2) +5 (call %₃ %₄) +6 TestMod.x +7 (call core.kwcall %₅ %₁ %₆) +8 (return %₇) + +######################################## +# Keyword call with only splats for kws +f(; ks1..., ks2...) +#--------------------- +1 TestMod.f +2 (call core.NamedTuple) +3 TestMod.ks1 +4 (call top.merge %₂ %₃) +5 TestMod.ks2 +6 (call top.merge %₄ %₅) +7 (call top.isempty %₆) +8 (gotoifnot %₇ label₁₁) +9 (call %₁) +10 (return %₉) +11 (call core.kwcall %₆ %₁) +12 (return %₁₁) + +######################################## +# Error: Call with repeated keywords +f(x; a=1, a=2) +#--------------------- +LoweringError: +f(x; a=1, a=2) +# ╙ ── Repeated keyword argument name + diff --git a/JuliaLowering/test/misc_ir.jl b/JuliaLowering/test/misc_ir.jl index 949fb11c1a259..0acd015948fba 100644 --- a/JuliaLowering/test/misc_ir.jl +++ b/JuliaLowering/test/misc_ir.jl @@ -102,7 +102,7 @@ _ + 1 #--------------------- LoweringError: (; a=1, bs..., c=3, a=2) -# ╙ ── Field name repeated in named tuple +# ╙ ── Repeated named tuple field name ######################################## # Error: Named tuple frankentuple From 0b69398a83d2fcb8f191008e2669b21a3d29e28c Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sun, 29 Sep 2024 13:06:48 +1000 Subject: [PATCH 0869/1109] lowering of integer powers to literal_pow --- JuliaLowering/src/desugaring.jl | 16 ++++++++++++++++ JuliaLowering/test/functions.jl | 4 ++++ JuliaLowering/test/functions_ir.jl | 20 ++++++++++++++++++++ 3 files changed, 40 insertions(+) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index a2cd54a0cf8c2..ba0fbeb9a4bf1 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -926,6 +926,22 @@ function expand_call(ctx, ex) expand_forms_2(ctx, farg) expand_forms_2(ctx, _wrap_unsplatted_args(ctx, ex, args))... ] + elseif length(args) == 2 && kind(farg) == K"Identifier" && farg.name_val == "^" && + kind(args[2]) == K"Integer" + expand_forms_2(ctx, + @ast ctx ex [K"call" + "literal_pow"::K"top" + farg + args[1] + [K"call" + [K"call" + "apply_type"::K"core" + "Val"::K"top" + args[2] + ] + ] + ] + ) else @ast ctx ex [K"call" expand_forms_2(ctx, farg) expand_forms_2(ctx, args)...] end diff --git a/JuliaLowering/test/functions.jl b/JuliaLowering/test/functions.jl index 005d22a7773f3..79fec52b9618f 100644 --- a/JuliaLowering/test/functions.jl +++ b/JuliaLowering/test/functions.jl @@ -91,4 +91,8 @@ let end """) == "non-kw version of f" +# literal_pow +@test JuliaLowering.include_string(test_mod, """ +2^4 +""") == 16 end diff --git a/JuliaLowering/test/functions_ir.jl b/JuliaLowering/test/functions_ir.jl index ea717d50d778e..35a5e39444d7e 100644 --- a/JuliaLowering/test/functions_ir.jl +++ b/JuliaLowering/test/functions_ir.jl @@ -100,3 +100,23 @@ LoweringError: f(x; a=1, a=2) # ╙ ── Repeated keyword argument name +######################################## +# literal_pow lowering +x^42 +#--------------------- +1 TestMod.^ +2 TestMod.x +3 (call core.apply_type top.Val 42) +4 (call %₃) +5 (call top.literal_pow %₁ %₂ %₄) +6 (return %₅) + +######################################## +# not-literal_pow lowering :) +x^42.0 +#--------------------- +1 TestMod.^ +2 TestMod.x +3 (call %₁ %₂ 42.0) +4 (return %₃) + From accbd00f6d3dca5309595852825a86de30827697 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Mon, 30 Sep 2024 13:37:59 +1000 Subject: [PATCH 0870/1109] Fixes for functions with unused arguments and argument slurping * A vector of `Slot`s is now created and passed into the `CodeInfo` creation pass so that code doesn't need access to the `Bindings` anymore. This is a better separation of data structures between passes. * Use K"Placeholder" for unused slots. * Fix small bug which made argument slurping broken. --- JuliaLowering/src/ast.jl | 4 +- JuliaLowering/src/desugaring.jl | 6 +- JuliaLowering/src/eval.jl | 28 ++++---- JuliaLowering/src/linear_ir.jl | 49 +++++++------ JuliaLowering/src/macro_expansion.jl | 2 +- JuliaLowering/src/scope_analysis.jl | 5 +- JuliaLowering/test/functions.jl | 75 +++++++++++--------- JuliaLowering/test/functions_ir.jl | 100 ++++++++++++++++++++++++++- 8 files changed, 191 insertions(+), 78 deletions(-) diff --git a/JuliaLowering/src/ast.jl b/JuliaLowering/src/ast.jl index da59cc1281eb8..92456e8e0f018 100644 --- a/JuliaLowering/src/ast.jl +++ b/JuliaLowering/src/ast.jl @@ -192,7 +192,8 @@ end function makeleaf(ctx, srcref, k::Kind, value; kws...) graph = syntax_graph(ctx) - if k == K"Identifier" || k == K"core" || k == K"top" || k == K"Symbol" || k == K"globalref" + if k == K"Identifier" || k == K"core" || k == K"top" || k == K"Symbol" || + k == K"globalref" || k == K"Placeholder" makeleaf(graph, srcref, k; name_val=value, kws...) elseif k == K"BindingId" makeleaf(graph, srcref, k; var_id=value, kws...) @@ -227,7 +228,6 @@ core_ref(ctx, ex, name) = makeleaf(ctx, ex, K"core", name) Any_type(ctx, ex) = core_ref(ctx, ex, "Any") svec_type(ctx, ex) = core_ref(ctx, ex, "svec") nothing_(ctx, ex) = core_ref(ctx, ex, "nothing") -unused(ctx, ex) = core_ref(ctx, ex, "UNUSED") top_ref(ctx, ex, name) = makeleaf(ctx, ex, K"top", name) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index ba0fbeb9a4bf1..876d9b50199cc 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -1330,17 +1330,15 @@ function expand_function_def(ctx, ex, docs) arg_types = SyntaxList(ctx) for (i,arg) in enumerate(args) info = analyze_function_arg(arg) - aname = (isnothing(info.name) || kind(info.name) == K"Placeholder") ? - unused(ctx, arg) : info.name + aname = !isnothing(info.name) ? info.name : @ast ctx arg "_"::K"Placeholder" push!(arg_names, aname) atype = !isnothing(info.type) ? info.type : Any_type(ctx, arg) @assert !info.is_nospecialize # TODO - @assert !isnothing(info.name) && is_identifier_like(info.name) # TODO if info.is_slurp if i != length(args) throw(LoweringError(arg, "`...` may only be used for the last function argument")) end - atype = @ast ctx arg [K"curly" "Vararg"::K"core" arg] + atype = @ast ctx arg [K"curly" "Vararg"::K"core" atype] end push!(arg_types, atype) end diff --git a/JuliaLowering/src/eval.jl b/JuliaLowering/src/eval.jl index be089ad2e1b31..520f5cc7b8d1d 100644 --- a/JuliaLowering/src/eval.jl +++ b/JuliaLowering/src/eval.jl @@ -99,9 +99,9 @@ end # Convert SyntaxTree to the CodeInfo+Expr data stuctures understood by the # Julia runtime -function to_code_info(ex, mod, funcname, nargs, bindings, slot_rewrites) +function to_code_info(ex, mod, funcname, nargs, slots) input_code = children(ex) - code = Any[to_lowered_expr(mod, bindings, ex) for ex in input_code] + code = Any[to_lowered_expr(mod, ex) for ex in input_code] debuginfo = ir_debug_info(ex) @@ -111,13 +111,11 @@ function to_code_info(ex, mod, funcname, nargs, bindings, slot_rewrites) # - call site @assume_effects ssaflags = zeros(UInt32, length(code)) - nslots = length(slot_rewrites) - slotnames = Vector{Symbol}(undef, nslots) + slotnames = Vector{Symbol}(undef, length(slots)) slot_rename_inds = Dict{String,Int}() - slotflags = Vector{UInt8}(undef, nslots) - for (id,i) in slot_rewrites - info = lookup_binding(bindings, id) - name = info.name + slotflags = Vector{UInt8}(undef, length(slots)) + for (i, slot) in enumerate(slots) + name = slot.name ni = get(slot_rename_inds, name, 0) slot_rename_inds[name] = ni + 1 if ni > 0 @@ -177,7 +175,7 @@ function to_code_info(ex, mod, funcname, nargs, bindings, slot_rewrites) ) end -function to_lowered_expr(mod, bindings, ex) +function to_lowered_expr(mod, ex) k = kind(ex) if is_literal(k) ex.value @@ -204,7 +202,7 @@ function to_lowered_expr(mod, bindings, ex) elseif k == K"SSAValue" Core.SSAValue(ex.var_id) elseif k == K"return" - Core.ReturnNode(to_lowered_expr(mod, bindings, ex[1])) + Core.ReturnNode(to_lowered_expr(mod, ex[1])) elseif is_quoted(k) if k == K"inert" ex[1] @@ -216,7 +214,7 @@ function to_lowered_expr(mod, bindings, ex) "top-level scope" : "none" # FIXME nargs = length(ex.lambda_info.args) - ir = to_code_info(ex[1], mod, funcname, nargs, bindings, ex.slot_rewrites) + ir = to_code_info(ex[1], mod, funcname, nargs, ex.slots) if ex.lambda_info.is_toplevel_thunk Expr(:thunk, ir) else @@ -227,14 +225,14 @@ function to_lowered_expr(mod, bindings, ex) elseif k == K"goto" Core.GotoNode(ex[1].id) elseif k == K"gotoifnot" - Core.GotoIfNot(to_lowered_expr(mod, bindings, ex[1]), ex[2].id) + Core.GotoIfNot(to_lowered_expr(mod, ex[1]), ex[2].id) elseif k == K"enter" catch_idx = ex[1].id numchildren(ex) == 1 ? Core.EnterNode(catch_idx) : Core.EnterNode(catch_idx, to_lowered_expr(ex[2])) elseif k == K"method" - cs = map(e->to_lowered_expr(mod, bindings, e), children(ex)) + cs = map(e->to_lowered_expr(mod, e), children(ex)) # Ad-hoc unwrapping to satisfy `Expr(:method)` expectations c1 = cs[1] isa QuoteNode ? cs[1].value : cs[1] Expr(:method, c1, cs[2:end]...) @@ -256,7 +254,7 @@ function to_lowered_expr(mod, bindings, ex) if isnothing(head) TODO(ex, "Unhandled form for kind $k") end - Expr(head, map(e->to_lowered_expr(mod, bindings, e), children(ex))...) + Expr(head, map(e->to_lowered_expr(mod, e), children(ex))...) end end @@ -272,7 +270,7 @@ function Core.eval(mod::Module, ex::SyntaxTree) return x end linear_ir = lower(mod, ex) - expr_form = to_lowered_expr(mod, linear_ir.bindings, linear_ir) + expr_form = to_lowered_expr(mod, linear_ir) eval(mod, expr_form) end diff --git a/JuliaLowering/src/linear_ir.jl b/JuliaLowering/src/linear_ir.jl index 4989ab3e7e1f7..416b513828bdd 100644 --- a/JuliaLowering/src/linear_ir.jl +++ b/JuliaLowering/src/linear_ir.jl @@ -912,33 +912,44 @@ function renumber_body(ctx, input_code, slot_rewrites) code end -function _add_slots!(slot_rewrites, bindings, ids) - n = length(slot_rewrites) + 1 - for id in ids - info = lookup_binding(bindings, id) - if info.kind == :local || info.kind == :argument - slot_rewrites[id] = n - n += 1 - end - end - slot_rewrites +struct Slot + name::String + # <- todo: flags here etc end function compile_lambda(outer_ctx, ex) - info = ex.lambda_info - # TODO: Add assignments for reassigned arguments to body using info.args - ctx = LinearIRContext(outer_ctx, info.is_toplevel_thunk, ex.lambda_locals, info.ret_var) + lambda_info = ex.lambda_info + # TODO: Add assignments for reassigned arguments to body using lambda_info.args + ctx = LinearIRContext(outer_ctx, lambda_info.is_toplevel_thunk, ex.lambda_locals, lambda_info.ret_var) compile_body(ctx, ex[1]) + slots = Vector{Slot}() slot_rewrites = Dict{IdTag,Int}() - _add_slots!(slot_rewrites, ctx.bindings, (arg.var_id for arg in info.args)) + for arg in lambda_info.args + if kind(arg) == K"Placeholder" + # Unused functions arguments like: `_` or `::T` + push!(slots, Slot(arg.name_val)) + else + @assert kind(arg) == K"BindingId" + id = arg.var_id + info = lookup_binding(ctx.bindings, id) + @assert info.kind == :local || info.kind == :argument + push!(slots, Slot(info.name)) + slot_rewrites[id] = length(slots) + end + end # Sorting the lambda locals is required to remove dependence on Dict iteration order. - _add_slots!(slot_rewrites, ctx.bindings, sort(collect(ex.lambda_locals))) + for id in sort(collect(ex.lambda_locals)) + info = lookup_binding(ctx.bindings, id) + @assert info.kind == :local || info.kind == :argument + push!(slots, Slot(info.name)) + slot_rewrites[id] = length(slots) + end # @info "" @ast ctx ex [K"block" ctx.code] code = renumber_body(ctx, ctx.code, slot_rewrites) makenode(ctx, ex, K"lambda", makenode(ctx, ex[1], K"block", code), - lambda_info=info, - slot_rewrites=slot_rewrites + lambda_info=lambda_info, + slots=slots ) end @@ -952,8 +963,7 @@ loops, etc) to gotos and exception handling to enter/leave. We also convert """ function linearize_ir(ctx, ex) graph = ensure_attributes(ctx.graph, - slot_rewrites=Dict{IdTag,Int}, - bindings=Bindings, + slots=Vector{Slot}, mod=Module, id=Int) # TODO: Cleanup needed - `_ctx` is just a dummy context here. But currently @@ -967,7 +977,6 @@ function linearize_ir(ctx, ex) Dict{String, JumpTarget{GraphType}}(), Vector{JumpOrigin{GraphType}}(), ctx.mod) res = compile_lambda(_ctx, reparent(_ctx, ex)) - setattr!(graph, res._id, bindings=ctx.bindings) _ctx, res end diff --git a/JuliaLowering/src/macro_expansion.jl b/JuliaLowering/src/macro_expansion.jl index 740c82a99d103..5d14d900889e6 100644 --- a/JuliaLowering/src/macro_expansion.jl +++ b/JuliaLowering/src/macro_expansion.jl @@ -141,7 +141,7 @@ function eval_macro_name(ctx, ex) ctx4, ex4 = convert_closures(ctx3, ex3) ctx5, ex5 = linearize_ir(ctx4, ex4) mod = ctx.current_layer.mod - expr_form = to_lowered_expr(mod, ex5.bindings, ex5) + expr_form = to_lowered_expr(mod, ex5) eval(mod, expr_form) end diff --git a/JuliaLowering/src/scope_analysis.jl b/JuliaLowering/src/scope_analysis.jl index 1f6113cd827bc..8c172c901aad8 100644 --- a/JuliaLowering/src/scope_analysis.jl +++ b/JuliaLowering/src/scope_analysis.jl @@ -209,10 +209,11 @@ function analyze_scope(ctx, ex, scope_type, lambda_info) # Add lambda arguments function add_lambda_args(args, var_kind) for a in args - if kind(a) == K"Identifier" + ka = kind(a) + if ka == K"Identifier" varkey = NameKey(a) var_ids[varkey] = init_binding(ctx, varkey, var_kind) - elseif kind(a) != K"BindingId" + elseif ka != K"BindingId" && ka != K"Placeholder" throw(LoweringError(a, "Unexpected lambda arg kind")) end end diff --git a/JuliaLowering/test/functions.jl b/JuliaLowering/test/functions.jl index 79fec52b9618f..16fe6e22025d4 100644 --- a/JuliaLowering/test/functions.jl +++ b/JuliaLowering/test/functions.jl @@ -20,6 +20,39 @@ end (2,3,4), (1,2,3,4,5)) +# Keyword calls +Base.eval(test_mod, :( +begin + function f(; kws...) + values(kws) + end + + function f() + "non-kw version of f" + end +end +)) + + +@test JuliaLowering.include_string(test_mod, """ +let + kws = (c=3,d=4) + f(; kws..., a=1, d=0, e=5) +end +""") == (c=3, d=0, a=1, e=5) + +@test JuliaLowering.include_string(test_mod, """ +let + kws = (;) + f(; kws..., kws...) +end +""") == "non-kw version of f" + +# literal_pow +@test JuliaLowering.include_string(test_mod, """ +2^4 +""") == 16 + #------------------------------------------------------------------------------- # Function definitions @test JuliaLowering.include_string(test_mod, """ @@ -33,6 +66,15 @@ begin end """) == ("hi", 1, 2) +@test JuliaLowering.include_string(test_mod, """ +begin + function unused_arg(x, _, y) + x + y + end + unused_arg(1,2,3) +end +""") == 4 + @test JuliaLowering.include_string(test_mod, """ begin function g(x)::Int @@ -62,37 +104,4 @@ begin end """) -#------------------------------------------------------------------------------- -# Keyword calls -Base.eval(test_mod, :( -begin - function f(; kws...) - values(kws) - end - - function f() - "non-kw version of f" - end -end -)) - - -@test JuliaLowering.include_string(test_mod, """ -let - kws = (c=3,d=4) - f(; kws..., a=1, d=0, e=5) -end -""") == (c=3, d=0, a=1, e=5) - -@test JuliaLowering.include_string(test_mod, """ -let - kws = (;) - f(; kws..., kws...) -end -""") == "non-kw version of f" - -# literal_pow -@test JuliaLowering.include_string(test_mod, """ -2^4 -""") == 16 end diff --git a/JuliaLowering/test/functions_ir.jl b/JuliaLowering/test/functions_ir.jl index 35a5e39444d7e..52ed60ba540da 100644 --- a/JuliaLowering/test/functions_ir.jl +++ b/JuliaLowering/test/functions_ir.jl @@ -1,3 +1,101 @@ +######################################## +# Functions with placeholder arg +function f(x, _, y) + x + y +end +#--------------------- +1 (method :f) +2 (call core.Typeof %₁) +3 (call core.svec %₂ core.Any core.Any core.Any) +4 (call core.svec) +5 (call core.svec %₃ %₄ :($(QuoteNode(:(#= line 1 =#))))) +6 --- method :f %₅ + 1 TestMod.+ + 2 (call %₁ slot₂/x slot₄/y) + 3 (return %₂) +7 (return %₁) + +######################################## +# Functions with argument types only, no name +function f(::T, x) + x +end +#--------------------- +1 (method :f) +2 (call core.Typeof %₁) +3 TestMod.T +4 (call core.svec %₂ %₃ core.Any) +5 (call core.svec) +6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 1 =#))))) +7 --- method :f %₆ + 1 slot₃/x + 2 (return %₁) +8 (return %₁) + +######################################## +# Functions argument types +function f(x, y::T) + body +end +#--------------------- +1 (method :f) +2 (call core.Typeof %₁) +3 TestMod.T +4 (call core.svec %₂ core.Any %₃) +5 (call core.svec) +6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 1 =#))))) +7 --- method :f %₆ + 1 TestMod.body + 2 (return %₁) +8 (return %₁) + +######################################## +# Functions with slurp of Any +function f(x, ys...) + body +end +#--------------------- +1 (method :f) +2 (call core.Typeof %₁) +3 (call core.apply_type core.Vararg core.Any) +4 (call core.svec %₂ core.Any %₃) +5 (call core.svec) +6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 1 =#))))) +7 --- method :f %₆ + 1 TestMod.body + 2 (return %₁) +8 (return %₁) + +######################################## +# Functions with slurp of T +function f(x, ys::T...) + body +end +#--------------------- +1 (method :f) +2 (call core.Typeof %₁) +3 TestMod.T +4 (call core.apply_type core.Vararg %₃) +5 (call core.svec %₂ core.Any %₄) +6 (call core.svec) +7 (call core.svec %₅ %₆ :($(QuoteNode(:(#= line 1 =#))))) +8 --- method :f %₇ + 1 TestMod.body + 2 (return %₁) +9 (return %₁) + +######################################## +# Error: Function with slurp not in last position arg +function f(xs..., y) + body +end +#--------------------- +LoweringError: +function f(xs..., y) +# └───┘ ── `...` may only be used for the last function argument + body +end + ######################################## # Return types function f(x)::Int @@ -112,7 +210,7 @@ x^42 6 (return %₅) ######################################## -# not-literal_pow lowering :) +# almost but not quite literal_pow lowering :) x^42.0 #--------------------- 1 TestMod.^ From fe93b7d580981aae3bf4b33c7372d9b61d2a0762 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sat, 5 Oct 2024 16:31:03 +1000 Subject: [PATCH 0871/1109] `where` desugaring --- JuliaLowering/README.md | 87 +++++++++------------- JuliaLowering/src/desugaring.jl | 95 ++++++++++++++++++++++- JuliaLowering/test/typedefs_ir.jl | 120 ++++++++++++++++++++++++++++++ 3 files changed, 247 insertions(+), 55 deletions(-) create mode 100644 JuliaLowering/test/typedefs_ir.jl diff --git a/JuliaLowering/README.md b/JuliaLowering/README.md index 76e3db421fffa..4da9079e86428 100644 --- a/JuliaLowering/README.md +++ b/JuliaLowering/README.md @@ -351,6 +351,41 @@ statements of type `Expr` with a small number of allowed forms. The IR obeys certain invariants which are checked by the downstream code in base/compiler/validation.jl. +## Scope resolution + +Scopes are documented in the Juila documentation on +[Scope of Variables](https://docs.julialang.org/en/v1/manual/variables-and-scoping/) + +The scope resolution pass disambiguates variables which have the same name in +different scopes and fills in the list of local variables within each lambda. + +During scope resolution, we maintain a stack of `ScopeInfo` data structures. + +When a new `lambda` or `scope_block` is discovered, we create a new `ScopeInfo` by +1. Find all identifiers bound or used within a scope. New *bindings* may be + introduced by one of the `local`, `global` keywords, implicitly by + assignment, as function arguments to a `lambda`, or as type arguments in a + method ("static parameters"). Identifiers are *used* when they are + referenced. +2. Infer which bindings are newly introduced local or global variables (and + thus require a distinct identity from names already in the stack) +3. Assign a `BindingId` (unique integer) to each new binding + +We then push this `ScopeInfo` onto the stack and traverse the expressions +within the scope translating each `K"Identifier"` into the associated +`K"BindingId"`. While we're doing this we also resolve some special forms like +`islocal` by making use of the scope stack. + +The detailed rules for whether assignment introduces a new variable depend on +the `scope_block`'s `scope_type` attribute when we are processing top-level +code. +* `scope_type == :hard` (as for bindings inside a `let` block) means an + assignment always introduces a new binding +* `scope_type == :neutral` - inherit soft or hard scope from the parent scope. +* `scope_type == :soft` - assignments are to globals if the variable + exists in global module scope. Soft scope doesn't have surface syntax and is + introduced for top-level code by REPL-like environments. + ## Julia's existing lowering implementation ### How does macro expansion work? @@ -414,58 +449,6 @@ Things which are expanded: the child of the `do`. This seems like a mess!! -### Scope resolution - -Scopes are documented in the Juila documentation on [Scope of Variables](https://docs.julialang.org/en/v1/manual/variables-and-scoping/) - -This pass disambiguates variables which have the same name in different scopes -and fills in the list of local variables within each lambda. - -#### Which data is needed to define a scope? - -As scope is a collection of variable names by category: -* `argument` - arguments to a lambda -* `local` - variables declared local (at top level) or implicitly local (in lambdas) or desugared to local-def -* `global` - variables declared global (in lambdas) or implicitly global (at top level) -* `static-parameter` - lambda type arguments from `where` clauses - -#### How does scope resolution work? - -We traverse the AST starting at the root paying attention to certian nodes: -* Nodes representing identifiers (Identifier, operators, var) - - If a variable exists in the table, it's *replaced* with the value in the table. - - If it doesn't exist, it becomes an `outerref` -* Variable scoping constructs: `local`, `local-def` - - collected by scope-block - - removed during traversal -* Scope metadata `softscope`, `hardscope` - just removed -* New scopes - - `lambda` creates a new scope containing itself and its arguments, - otherwise copying the parent scope. It resolves the body with that new scope. - - `scope-block` is really complicated - see below -* Scope queries `islocal`, `locals` - - `islocal` - statically expand to true/false based on whether var name is a local var - - `locals` - return list of locals - see `@locals` - - `require-existing-local` - somewhat like `islocal`, but allows globals - too (whaa?! naming) and produces a lowering error immediately if variable - is not known. Should be called `require-in-scope` ?? -* `break-block`, `symbolicgoto`, `symboliclabel` need special handling because - one of their arguments is a non-quoted symbol. -* Add static parameters for generated functions `with-static-parameters` -* `method` - special handling for static params - -`scope-block` is the complicated bit. It's processed by -* Searching the expressions within the block for any `local`, `local-def`, - `global` and assigned vars. Searching doesn't recurse into `lambda`, - `scope-block`, `module` and `toplevel` -* Building lists of implicit locals or globals (depending on whether we're in a - top level thunk) -* Figuring out which local variables need to be renamed. This is any local variable - with a name which has already occurred in processing one of the previous scope blocks -* Check any conflicting local/global decls and soft/hard scope -* Build new scope with table of renames -* Resolve the body with the new scope, applying the renames - ### Intermediate forms used in lowering * `local-def` - flisp code explains this as diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 876d9b50199cc..c54d2211e8c9f 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -688,12 +688,15 @@ function expand_condition(ctx, ex) end function expand_let(ctx, ex) - scope_type = get(ex, :scope_type, :hard) + @chk numchildren(ex) == 2 + bindings = ex[1] + @chk kind(bindings) == K"block" blk = ex[2] - if numchildren(ex[1]) == 0 + scope_type = get(ex, :scope_type, :hard) + if numchildren(bindings) == 0 return @ast ctx ex [K"scope_block"(scope_type=scope_type) blk] end - for binding in Iterators.reverse(children(ex[1])) + for binding in Iterators.reverse(children(bindings)) kb = kind(binding) if is_sym_decl(kb) blk = @ast ctx ex [K"scope_block"(scope_type=scope_type) @@ -1438,6 +1441,90 @@ function expand_macro_def(ctx, ex) ] end +# Analyze type signatures such as `A <: B where C` +# +# Return (name, params, supertype) +function analyze_type_sig(ctx, ex) + k = kind(ex) + if k == K"Identifier" + return (ex, (), nothing_(ctx, ex)) + elseif k == K"curly" && numchildren(ex) >= 1 && kind(ex[1]) == K"Identifier" + return (ex[1], ex[2:end], nothing_(ctx, ex)) + elseif k == K"<:" && numchildren(ex) == 2 + if kind(ex[1]) == K"Identifier" + return (ex[1], (), ex[2]) + elseif kind(ex[1]) == K"curly" && numchildren(ex[1]) >= 1 && kind(ex[1][1]) == K"Identifier" + return (ex[1][1], ex[1][2:end], ex[2]) + end + end + throw(LoweringError(ex, "invalid type signature")) +end + +# Match `x<:T<:y` etc, returning `(name, lower_bound, upper_bound)` +# A bound is `nothing` if not specified +function analyze_typevar(ctx, ex) + k = kind(ex) + if k == K"Identifier" + (ex, nothing, nothing) + elseif k == K"comparison" && numchildren(ex) == 5 + kind(ex[3]) == K"Identifier" || throw(LoweringError(ex[3], "expected type name")) + if !((kind(ex[2]) == K"Identifier" && ex[2].name_val == "<:") && + (kind(ex[4]) == K"Identifier" && ex[4].name_val == "<:")) + throw(LoweringError(ex, "invalid type bounds")) + end + # a <: b <: c + (ex[3], ex[1], ex[5]) + elseif k == K"<:" && numchildren(ex) == 2 + kind(ex[1]) == K"Identifier" || throw(LoweringError(ex[1], "expected type name")) + (ex[1], nothing, ex[2]) + elseif k == K">:" && numchildren(ex) == 2 + kind(ex[2]) == K"Identifier" || throw(LoweringError(ex[2], "expected type name")) + (ex[2], ex[1], nothing) + else + throw(LoweringError(ex, "expected type name or type bounds")) + end +end + +function bounds_to_TypeVar(ctx, srcref, bounds) + name, lb, ub = bounds + # Generate call to one of + # TypeVar(name) + # TypeVar(name, ub) + # TypeVar(name, lb, ub) + @ast ctx srcref [K"call" + "TypeVar"::K"core" + name=>K"Symbol" + lb + if isnothing(ub) && !isnothing(lb) + "Any"::K"core" + else + ub + end + ] +end + +function expand_where(ctx, srcref, lhs, rhs) + bounds = analyze_typevar(ctx, rhs) + v = bounds[1] + @ast ctx srcref [K"let" + [K"block" [K"=" v bounds_to_TypeVar(ctx, srcref, bounds)]] + [K"call" "UnionAll"::K"core" v lhs] + ] +end + +function expand_wheres(ctx, ex) + body = ex[1] + rhs = ex[2] + if kind(rhs) == K"braces" + for r in reverse(children(rhs)) + body = expand_where(ctx, ex, body, r) + end + else + body = expand_where(ctx, ex, body, rhs) + end + body +end + function _append_importpath(ctx, path_spec, path) prev_was_dot = true for component in children(path) @@ -1661,6 +1748,8 @@ function expand_forms_2(ctx::DesugaringContext, ex::SyntaxTree, docs=nothing) else expand_forms_2(ctx, expand_decls(ctx, ex)) # FIXME end + elseif k == K"where" + expand_forms_2(ctx, expand_wheres(ctx, ex)) elseif is_operator(k) && is_leaf(ex) makeleaf(ctx, ex, K"Identifier", ex.name_val) elseif k == K"char" || k == K"var" diff --git a/JuliaLowering/test/typedefs_ir.jl b/JuliaLowering/test/typedefs_ir.jl new file mode 100644 index 0000000000000..91b02cd2c8139 --- /dev/null +++ b/JuliaLowering/test/typedefs_ir.jl @@ -0,0 +1,120 @@ +######################################## +# where expression without type bounds +A where X +#--------------------- +1 (call core.TypeVar :X) +2 (= slot₁/X %₁) +3 slot₁/X +4 TestMod.A +5 (call core.UnionAll %₃ %₄) +6 (return %₅) + +######################################## +# where expression with upper bound +A where X <: UB +#--------------------- +1 TestMod.UB +2 (call core.TypeVar :X %₁) +3 (= slot₁/X %₂) +4 slot₁/X +5 TestMod.A +6 (call core.UnionAll %₄ %₅) +7 (return %₆) + +######################################## +# where expression with lower bound +A where X >: LB +#--------------------- +1 TestMod.X +2 (call core.TypeVar :LB %₁ core.Any) +3 (= slot₁/LB %₂) +4 slot₁/LB +5 TestMod.A +6 (call core.UnionAll %₄ %₅) +7 (return %₆) + +######################################## +# where expression with both bounds +A where LB <: X <: UB +#--------------------- +1 TestMod.LB +2 TestMod.UB +3 (call core.TypeVar :X %₁ %₂) +4 (= slot₁/X %₃) +5 slot₁/X +6 TestMod.A +7 (call core.UnionAll %₅ %₆) +8 (return %₇) + +######################################## +# where expression with braces +A where {X, Y<:X} +#--------------------- +1 (call core.TypeVar :X) +2 (= slot₁/X %₁) +3 slot₁/X +4 slot₁/X +5 (call core.TypeVar :Y %₄) +6 (= slot₂/Y %₅) +7 slot₂/Y +8 TestMod.A +9 (call core.UnionAll %₇ %₈) +10 (call core.UnionAll %₃ %₉) +11 (return %₁₀) + +######################################## +# Equivalent nested where expression without braces +A where Y<:X where X +#--------------------- +1 (call core.TypeVar :X) +2 (= slot₁/X %₁) +3 slot₁/X +4 slot₁/X +5 (call core.TypeVar :Y %₄) +6 (= slot₂/Y %₅) +7 slot₂/Y +8 TestMod.A +9 (call core.UnionAll %₇ %₈) +10 (call core.UnionAll %₃ %₉) +11 (return %₁₀) + +######################################## +# Error: bad type bounds +A where f() +#--------------------- +LoweringError: +A where f() +# └─┘ ── expected type name or type bounds + +######################################## +# Error: bad type bounds +A where X < Y < Z +#--------------------- +LoweringError: +A where X < Y < Z +# └───────┘ ── invalid type bounds + +######################################## +# Error: bad type bounds +A where X <: f() <: Z +#--------------------- +LoweringError: +A where X <: f() <: Z +# └─┘ ── expected type name + +######################################## +# Error: bad type bounds +A where f() <: Y +#--------------------- +LoweringError: +A where f() <: Y +# └─┘ ── expected type name + +######################################## +# Error: bad type bounds +A where Y >: f() +#--------------------- +LoweringError: +A where Y >: f() +# └─┘ ── expected type name + From f5df56e5616af0558159048982f9ab7187b63d01 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sat, 5 Oct 2024 17:28:19 +1000 Subject: [PATCH 0872/1109] Desugaring of abstract type definitions Also fix a bug in linearization of `K"isdefined"` --- JuliaLowering/src/desugaring.jl | 87 +++++++++++++++++++++++------- JuliaLowering/src/linear_ir.jl | 14 +++-- JuliaLowering/test/demo.jl | 6 ++- JuliaLowering/test/runtests.jl | 1 + JuliaLowering/test/scopes_ir.jl | 16 +++--- JuliaLowering/test/typedefs.jl | 30 +++++++++++ JuliaLowering/test/typedefs_ir.jl | 90 +++++++++++++++++++++++++++++++ 7 files changed, 211 insertions(+), 33 deletions(-) create mode 100644 JuliaLowering/test/typedefs.jl diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index c54d2211e8c9f..d1e8ae6bbaf4a 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -1441,25 +1441,6 @@ function expand_macro_def(ctx, ex) ] end -# Analyze type signatures such as `A <: B where C` -# -# Return (name, params, supertype) -function analyze_type_sig(ctx, ex) - k = kind(ex) - if k == K"Identifier" - return (ex, (), nothing_(ctx, ex)) - elseif k == K"curly" && numchildren(ex) >= 1 && kind(ex[1]) == K"Identifier" - return (ex[1], ex[2:end], nothing_(ctx, ex)) - elseif k == K"<:" && numchildren(ex) == 2 - if kind(ex[1]) == K"Identifier" - return (ex[1], (), ex[2]) - elseif kind(ex[1]) == K"curly" && numchildren(ex[1]) >= 1 && kind(ex[1][1]) == K"Identifier" - return (ex[1][1], ex[1][2:end], ex[2]) - end - end - throw(LoweringError(ex, "invalid type signature")) -end - # Match `x<:T<:y` etc, returning `(name, lower_bound, upper_bound)` # A bound is `nothing` if not specified function analyze_typevar(ctx, ex) @@ -1503,6 +1484,72 @@ function bounds_to_TypeVar(ctx, srcref, bounds) ] end +# Analyze type signatures such as `A <: B where C` +# +# Return (name, params, supertype) +function analyze_type_sig(ctx, ex) + k = kind(ex) + if k == K"Identifier" + return (ex, (), @ast ctx ex "Any"::K"core") + elseif k == K"curly" && numchildren(ex) >= 1 && kind(ex[1]) == K"Identifier" + # name{params} + return (ex[1], ex[2:end], @ast ctx ex "Any"::K"core") + elseif k == K"<:" && numchildren(ex) == 2 + if kind(ex[1]) == K"Identifier" + return (ex[1], (), ex[2]) + elseif kind(ex[1]) == K"curly" && numchildren(ex[1]) >= 1 && kind(ex[1][1]) == K"Identifier" + return (ex[1][1], ex[1][2:end], ex[2]) + end + end + throw(LoweringError(ex, "invalid type signature")) +end + +function expand_abstract_type(ctx, ex) + name, params, supertype = analyze_type_sig(ctx, ex[1]) + typevar_names = SyntaxList(ctx) + typevar_stmts = SyntaxList(ctx) + for param in params + bounds = analyze_typevar(ctx, param) + n = bounds[1] + push!(typevar_names, n) + push!(typevar_stmts, @ast ctx param [K"local" n]) + push!(typevar_stmts, @ast ctx param [K"=" n bounds_to_TypeVar(ctx, param, bounds)]) + end + newtype_var = ssavar(ctx, ex, "new_type") + @ast ctx ex [K"block" + [K"scope_block"(scope_type=:neutral) + [K"block" + [K"local_def" name] + typevar_stmts... + [K"=" + newtype_var + [K"call" + "_abstracttype"::K"core" + ctx.mod::K"Value" + name=>K"Symbol" + [K"call" "svec"::K"core" typevar_names...] + ] + ] + [K"=" name newtype_var] + [K"call" "_setsuper!"::K"core" newtype_var supertype] + [K"call" "_typebody!"::K"core" newtype_var] + ] + ] + # [K"assert" "toplevel_only"::K"Symbol" [K"inert" ex] ] FIXME + [K"global" name] + [K"const" name] + [K"if" + [K"&&" + [K"isdefined" name] + [K"call" "_equiv_typedef"::K"core" name newtype_var] + ] + nothing_(ctx, ex) + [K"=" name newtype_var] + ] + nothing_(ctx, ex) + ] +end + function expand_where(ctx, srcref, lhs, rhs) bounds = analyze_typevar(ctx, rhs) v = bounds[1] @@ -1790,6 +1837,8 @@ function expand_forms_2(ctx::DesugaringContext, ex::SyntaxTree, docs=nothing) expand_import(ctx, ex) elseif k == K"export" || k == K"public" TODO(ex) + elseif k == K"abstract" + expand_forms_2(ctx, expand_abstract_type(ctx, ex)) elseif k == K"ref" if numchildren(ex) > 2 TODO(ex, "ref expansion") diff --git a/JuliaLowering/src/linear_ir.jl b/JuliaLowering/src/linear_ir.jl index 416b513828bdd..892fac212e9ec 100644 --- a/JuliaLowering/src/linear_ir.jl +++ b/JuliaLowering/src/linear_ir.jl @@ -783,14 +783,20 @@ function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) if needs_value compile(ctx, nothing_(ctx, ex), needs_value, in_tail_pos) end - elseif k == K"global" + elseif k == K"global" || k == K"const" if needs_value - throw(LoweringError(ex, "misplaced `global` declaration")) + throw(LoweringError(ex, "misplaced declaration")) end emit(ctx, ex) nothing - elseif k == K"const" || k == K"isdefined" - emit(ctx, ex) + elseif k == K"isdefined" + if in_tail_pos + emit_return(ctx, ex) + elseif needs_value + ex + else + emit(ctx, ex) + end else throw(LoweringError(ex, "Invalid syntax; $(repr(k))")) end diff --git a/JuliaLowering/test/demo.jl b/JuliaLowering/test/demo.jl index bf433bf5a2da3..e064b5f227116 100644 --- a/JuliaLowering/test/demo.jl +++ b/JuliaLowering/test/demo.jl @@ -525,6 +525,10 @@ begin end """ +src = """ +abstract type Abstract1 end +""" + ex = parsestmt(SyntaxTree, src, filename="foo.jl") ex = ensure_attributes(ex, var_id=Int) #ex = softscope_test(ex) @@ -547,7 +551,7 @@ ctx4, ex_converted = JuliaLowering.convert_closures(ctx3, ex_scoped) ctx5, ex_compiled = JuliaLowering.linearize_ir(ctx4, ex_converted) @info "Linear IR" ex_compiled formatsrc(ex_compiled, color_by=:var_id) Text(sprint(JuliaLowering.print_ir, ex_compiled)) -ex_expr = JuliaLowering.to_lowered_expr(in_mod, ctx5.bindings, ex_compiled) +ex_expr = JuliaLowering.to_lowered_expr(in_mod, ex_compiled) @info "CodeInfo" ex_expr eval_result = Base.eval(in_mod, ex_expr) diff --git a/JuliaLowering/test/runtests.jl b/JuliaLowering/test/runtests.jl index 308a81a517a35..138f49ac376c9 100644 --- a/JuliaLowering/test/runtests.jl +++ b/JuliaLowering/test/runtests.jl @@ -20,5 +20,6 @@ include("utils.jl") include("modules.jl") include("quoting.jl") include("scopes.jl") + include("typedefs.jl") end diff --git a/JuliaLowering/test/scopes_ir.jl b/JuliaLowering/test/scopes_ir.jl index 469396153b23c..b2c5abaf51a8f 100644 --- a/JuliaLowering/test/scopes_ir.jl +++ b/JuliaLowering/test/scopes_ir.jl @@ -67,11 +67,10 @@ end 2 (call core.apply_type top.Dict core.Symbol core.Any) 3 (call %₂) 4 (isdefined slot₁/y) -5 (isdefined slot₁/y) -6 (gotoifnot %₅ label₉) -7 slot₁/y -8 (call top.setindex! %₃ %₇ :y) -9 (return %₃) +5 (gotoifnot %₄ label₈) +6 slot₁/y +7 (call top.setindex! %₃ %₆ :y) +8 (return %₃) ######################################## # @locals with function args (TODO: static parameters) @@ -88,9 +87,8 @@ end 1 (call core.apply_type top.Dict core.Symbol core.Any) 2 (call %₁) 3 (isdefined slot₂/z) - 4 (isdefined slot₂/z) - 5 (gotoifnot %₄ label₇) - 6 (call top.setindex! %₂ slot₂/z :z) - 7 (return %₂) + 4 (gotoifnot %₃ label₆) + 5 (call top.setindex! %₂ slot₂/z :z) + 6 (return %₂) 7 (return %₁) diff --git a/JuliaLowering/test/typedefs.jl b/JuliaLowering/test/typedefs.jl new file mode 100644 index 0000000000000..4c88fe771950c --- /dev/null +++ b/JuliaLowering/test/typedefs.jl @@ -0,0 +1,30 @@ +@testset "Type definitions" begin + +test_mod = Module() + +@test JuliaLowering.include_string(test_mod, """ +abstract type A end +""") === nothing +@test supertype(test_mod.A) === Any + +@test JuliaLowering.include_string(test_mod, """ +abstract type B <: A end +""") === nothing +@test supertype(test_mod.B) === test_mod.A + +@test JuliaLowering.include_string(test_mod, """ +abstract type C{X} end +""") === nothing + +@test JuliaLowering.include_string(test_mod, """ +abstract type D{X<:A} end +""") === nothing +@test test_mod.D{test_mod.B} isa Type +@test_throws Exception test_mod.D{Int} + +@test JuliaLowering.include_string(test_mod, """ +abstract type E <: C{E} end +""") === nothing +@test test_mod.E isa Type + +end diff --git a/JuliaLowering/test/typedefs_ir.jl b/JuliaLowering/test/typedefs_ir.jl index 91b02cd2c8139..e458eaddec365 100644 --- a/JuliaLowering/test/typedefs_ir.jl +++ b/JuliaLowering/test/typedefs_ir.jl @@ -118,3 +118,93 @@ LoweringError: A where Y >: f() # └─┘ ── expected type name +######################################## +# Simple abstract type definition +abstract type A end +#--------------------- +1 (call core.svec) +2 (call core._abstracttype TestMod :A %₁) +3 (= slot₁/A %₂) +4 (call core._setsuper! %₂ core.Any) +5 (call core._typebody! %₂) +6 (global TestMod.A) +7 (const TestMod.A) +8 (isdefined TestMod.A) +9 (gotoifnot %₈ label₁₄) +10 TestMod.A +11 (call core._equiv_typedef %₁₀ %₂) +12 (gotoifnot %₁₁ label₁₄) +13 (goto label₁₅) +14 (= TestMod.A %₂) +15 (return core.nothing) + +######################################## +# Abstract type definition with supertype +abstract type A <: B end +#--------------------- +1 (call core.svec) +2 (call core._abstracttype TestMod :A %₁) +3 (= slot₁/A %₂) +4 TestMod.B +5 (call core._setsuper! %₂ %₄) +6 (call core._typebody! %₂) +7 (global TestMod.A) +8 (const TestMod.A) +9 (isdefined TestMod.A) +10 (gotoifnot %₉ label₁₅) +11 TestMod.A +12 (call core._equiv_typedef %₁₁ %₂) +13 (gotoifnot %₁₂ label₁₅) +14 (goto label₁₆) +15 (= TestMod.A %₂) +16 (return core.nothing) + +######################################## +# Abstract type definition with multiple typevars +abstract type A{X, Y <: X} end +#--------------------- +1 (= slot₂/X (call core.TypeVar :X)) +2 slot₂/X +3 (= slot₃/Y (call core.TypeVar :Y %₂)) +4 slot₂/X +5 slot₃/Y +6 (call core.svec %₄ %₅) +7 (call core._abstracttype TestMod :A %₆) +8 (= slot₁/A %₇) +9 (call core._setsuper! %₇ core.Any) +10 (call core._typebody! %₇) +11 (global TestMod.A) +12 (const TestMod.A) +13 (isdefined TestMod.A) +14 (gotoifnot %₁₃ label₁₉) +15 TestMod.A +16 (call core._equiv_typedef %₁₅ %₇) +17 (gotoifnot %₁₆ label₁₉) +18 (goto label₂₀) +19 (= TestMod.A %₇) +20 (return core.nothing) + +######################################## +# Error: Abstract type definition with bad signature +abstract type A() end +#--------------------- +LoweringError: +abstract type A() end +# └─┘ ── invalid type signature + +######################################## +# Error: Abstract type definition with bad signature +abstract type A(){T} end +#--------------------- +LoweringError: +abstract type A(){T} end +# └────┘ ── invalid type signature + +######################################## +# Error: Abstract type definition with bad signature +abstract type A() <: B end +#--------------------- +LoweringError: +abstract type A() <: B end +# └───────┘ ── invalid type signature + From 5b45b6769b8012d6fd654589552c8896909f1303 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sat, 5 Oct 2024 17:43:49 +1000 Subject: [PATCH 0873/1109] Add toplevel-only checking for `import` and `abstract type` --- JuliaLowering/src/desugaring.jl | 35 ++++++++++----------- JuliaLowering/src/scope_analysis.jl | 5 +++ JuliaLowering/test/import_ir.jl | 47 +++++++++++++++++++++++++++++ JuliaLowering/test/typedefs_ir.jl | 12 ++++++++ 4 files changed, 82 insertions(+), 17 deletions(-) create mode 100644 JuliaLowering/test/import_ir.jl diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index d1e8ae6bbaf4a..3e60e40d48983 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -1535,7 +1535,7 @@ function expand_abstract_type(ctx, ex) [K"call" "_typebody!"::K"core" newtype_var] ] ] - # [K"assert" "toplevel_only"::K"Symbol" [K"inert" ex] ] FIXME + [K"assert" "toplevel_only"::K"Symbol" [K"inert" ex] ] [K"global" name] [K"const" name] [K"if" @@ -1634,15 +1634,17 @@ function expand_import(ctx, ex) push!(path_spec, isnothing(as_name) ? nothing_(ctx, ex) : @ast(ctx, as_name, as_name.name_val::K"String")) end - @ast ctx ex [ - K"call" - module_import ::K"Value" - ctx.mod ::K"Value" - is_using ::K"Value" - from_path + @ast ctx ex [K"block" + [K"assert" "toplevel_only"::K"Symbol" [K"inert" ex]] [K"call" - "svec"::K"core" - path_spec... + module_import ::K"Value" + ctx.mod ::K"Value" + is_using ::K"Value" + from_path + [K"call" + "svec"::K"core" + path_spec... + ] ] ] end @@ -1830,10 +1832,8 @@ function expand_forms_2(ctx::DesugaringContext, ex::SyntaxTree, docs=nothing) elseif k == K"$" throw(LoweringError(ex, "`\$` expression outside string or quote block")) elseif k == K"module" - # TODO: check-toplevel expand_module(ctx, ex) elseif k == K"import" || k == K"using" - # TODO: check-toplevel expand_import(ctx, ex) elseif k == K"export" || k == K"public" TODO(ex) @@ -1858,12 +1858,13 @@ function expand_forms_2(ctx::DesugaringContext, ex::SyntaxTree, docs=nothing) elseif k == K"toplevel" # The toplevel form can't be lowered here - it needs to just be quoted # and passed through to a call to eval. - # TODO: check-toplevel - @ast ctx ex [ - K"call" - eval ::K"Value" - ctx.mod ::K"Value" - [K"inert" ex] + @ast ctx ex [K"block" + [K"assert" "toplevel_only"::K"Symbol" [K"inert" ex]] + [K"call" + eval ::K"Value" + ctx.mod ::K"Value" + [K"inert" ex] + ] ] elseif k == K"vect" @ast ctx ex [K"call" diff --git a/JuliaLowering/src/scope_analysis.jl b/JuliaLowering/src/scope_analysis.jl index 8c172c901aad8..618b8134e8205 100644 --- a/JuliaLowering/src/scope_analysis.jl +++ b/JuliaLowering/src/scope_analysis.jl @@ -462,6 +462,11 @@ function _resolve_scopes(ctx, ex::SyntaxTree) e = ex[2][1] throw(LoweringError(e, "$(kind(e)) is only allowed in global scope")) end + elseif etype == "toplevel_only" + if !ctx.scope_stack[end].in_toplevel_thunk + e = ex[2][1] + throw(LoweringError(e, "this syntax is only allowed in top level code")) + end else throw(LoweringError(ex, "Unknown syntax assertion")) end diff --git a/JuliaLowering/test/import_ir.jl b/JuliaLowering/test/import_ir.jl new file mode 100644 index 0000000000000..30bfc7f751162 --- /dev/null +++ b/JuliaLowering/test/import_ir.jl @@ -0,0 +1,47 @@ +######################################## +# Basic import +import A: b +#--------------------- +1 (call core.svec "A") +2 (call core.svec 1 "b" core.nothing) +3 (call JuliaLowering.module_import TestMod false %₁ %₂) +4 (return %₃) + +######################################## +# Import with paths and `as` +import A.B.C: b, c.d as e +#--------------------- +1 (call core.svec "A" "B" "C") +2 (call core.svec 1 "b" core.nothing 2 "c" "d" "e") +3 (call JuliaLowering.module_import TestMod false %₁ %₂) +4 (return %₃) + +######################################## +# Using +using A +#--------------------- +1 (call core.svec 1 "A" core.nothing) +2 (call JuliaLowering.module_import TestMod true core.nothing %₁) +3 (return %₂) + +######################################## +# Using with paths and `as` +using A.B.C: b, c.d as e +#--------------------- +1 (call core.svec "A" "B" "C") +2 (call core.svec 1 "b" core.nothing 2 "c" "d" "e") +3 (call JuliaLowering.module_import TestMod true %₁ %₂) +4 (return %₃) + +######################################## +# Error: Import not at top level +function f() + import A: b +end +#--------------------- +LoweringError: +function f() + import A: b +# └─────────┘ ── this syntax is only allowed in top level code +end + diff --git a/JuliaLowering/test/typedefs_ir.jl b/JuliaLowering/test/typedefs_ir.jl index e458eaddec365..4a3343a4a3dd6 100644 --- a/JuliaLowering/test/typedefs_ir.jl +++ b/JuliaLowering/test/typedefs_ir.jl @@ -208,3 +208,15 @@ LoweringError: abstract type A() <: B end # └───────┘ ── invalid type signature +######################################## +# Error: Abstract type definition in function scope +function f() + abstract type A end +end +#--------------------- +LoweringError: +function f() + abstract type A end +# └─────────────────┘ ── this syntax is only allowed in top level code +end + From 4ea8106600c4ff0d6f1df636b6597e21b77cb753 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sat, 5 Oct 2024 18:20:24 +1000 Subject: [PATCH 0874/1109] Desugaring of primitive types --- JuliaLowering/src/desugaring.jl | 20 +++++++-- JuliaLowering/test/typedefs.jl | 9 +++++ JuliaLowering/test/typedefs_ir.jl | 67 +++++++++++++++++++++++++++++++ 3 files changed, 92 insertions(+), 4 deletions(-) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 3e60e40d48983..44f5667f0984f 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -1504,7 +1504,16 @@ function analyze_type_sig(ctx, ex) throw(LoweringError(ex, "invalid type signature")) end -function expand_abstract_type(ctx, ex) +function expand_abstract_or_primitive_type(ctx, ex) + is_abstract = kind(ex) == K"abstract" + if is_abstract + @chk numchildren(ex) == 1 + elseif kind(ex) == K"primitive" + @chk numchildren(ex) == 2 + nbits = ex[2] + else + @assert false + end name, params, supertype = analyze_type_sig(ctx, ex[1]) typevar_names = SyntaxList(ctx) typevar_stmts = SyntaxList(ctx) @@ -1524,10 +1533,13 @@ function expand_abstract_type(ctx, ex) [K"=" newtype_var [K"call" - "_abstracttype"::K"core" + (is_abstract ? "_abstracttype" : "_primitivetype")::K"core" ctx.mod::K"Value" name=>K"Symbol" [K"call" "svec"::K"core" typevar_names...] + if !is_abstract + nbits + end ] ] [K"=" name newtype_var] @@ -1837,8 +1849,8 @@ function expand_forms_2(ctx::DesugaringContext, ex::SyntaxTree, docs=nothing) expand_import(ctx, ex) elseif k == K"export" || k == K"public" TODO(ex) - elseif k == K"abstract" - expand_forms_2(ctx, expand_abstract_type(ctx, ex)) + elseif k == K"abstract" || k == K"primitive" + expand_forms_2(ctx, expand_abstract_or_primitive_type(ctx, ex)) elseif k == K"ref" if numchildren(ex) > 2 TODO(ex, "ref expansion") diff --git a/JuliaLowering/test/typedefs.jl b/JuliaLowering/test/typedefs.jl index 4c88fe771950c..17884b93d9173 100644 --- a/JuliaLowering/test/typedefs.jl +++ b/JuliaLowering/test/typedefs.jl @@ -6,6 +6,7 @@ test_mod = Module() abstract type A end """) === nothing @test supertype(test_mod.A) === Any +@test isabstracttype(test_mod.A) @test JuliaLowering.include_string(test_mod, """ abstract type B <: A end @@ -27,4 +28,12 @@ abstract type E <: C{E} end """) === nothing @test test_mod.E isa Type +@test JuliaLowering.include_string(test_mod, """ +primitive type P <: A 16 end +""") === nothing +@test isconcretetype(test_mod.P) +@test supertype(test_mod.P) === test_mod.A +@test reinterpret(test_mod.P, 0x0001) isa test_mod.P +@test reinterpret(UInt16, reinterpret(test_mod.P, 0x1337)) === 0x1337 + end diff --git a/JuliaLowering/test/typedefs_ir.jl b/JuliaLowering/test/typedefs_ir.jl index 4a3343a4a3dd6..d3e8fb0ae273c 100644 --- a/JuliaLowering/test/typedefs_ir.jl +++ b/JuliaLowering/test/typedefs_ir.jl @@ -220,3 +220,70 @@ function f() # └─────────────────┘ ── this syntax is only allowed in top level code end +######################################## +# Simple primitive type definition +primitive type P 8 end +#--------------------- +1 (call core.svec) +2 (call core._primitivetype TestMod :P %₁ 8) +3 (= slot₁/P %₂) +4 (call core._setsuper! %₂ core.Any) +5 (call core._typebody! %₂) +6 (global TestMod.P) +7 (const TestMod.P) +8 (isdefined TestMod.P) +9 (gotoifnot %₈ label₁₄) +10 TestMod.P +11 (call core._equiv_typedef %₁₀ %₂) +12 (gotoifnot %₁₁ label₁₄) +13 (goto label₁₅) +14 (= TestMod.P %₂) +15 (return core.nothing) + +######################################## +# Complex primitive type definition +primitive type P{X,Y} <: Z 32 end +#--------------------- +1 (= slot₂/X (call core.TypeVar :X)) +2 (= slot₃/Y (call core.TypeVar :Y)) +3 slot₂/X +4 slot₃/Y +5 (call core.svec %₃ %₄) +6 (call core._primitivetype TestMod :P %₅ 32) +7 (= slot₁/P %₆) +8 TestMod.Z +9 (call core._setsuper! %₆ %₈) +10 (call core._typebody! %₆) +11 (global TestMod.P) +12 (const TestMod.P) +13 (isdefined TestMod.P) +14 (gotoifnot %₁₃ label₁₉) +15 TestMod.P +16 (call core._equiv_typedef %₁₅ %₆) +17 (gotoifnot %₁₆ label₁₉) +18 (goto label₂₀) +19 (= TestMod.P %₆) +20 (return core.nothing) + +######################################## +# Primitive type definition with computed size (should this be allowed??) +primitive type P P_nbits() end +#--------------------- +1 (call core.svec) +2 TestMod.P_nbits +3 (call %₂) +4 (call core._primitivetype TestMod :P %₁ %₃) +5 (= slot₁/P %₄) +6 (call core._setsuper! %₄ core.Any) +7 (call core._typebody! %₄) +8 (global TestMod.P) +9 (const TestMod.P) +10 (isdefined TestMod.P) +11 (gotoifnot %₁₀ label₁₆) +12 TestMod.P +13 (call core._equiv_typedef %₁₂ %₄) +14 (gotoifnot %₁₃ label₁₆) +15 (goto label₁₇) +16 (= TestMod.P %₄) +17 (return core.nothing) + From b05d4ccdc6161bda11093c50fd8740e75326b8be Mon Sep 17 00:00:00 2001 From: spaette <111918424+spaette@users.noreply.github.com> Date: Sun, 6 Oct 2024 02:48:30 +0200 Subject: [PATCH 0875/1109] Fix typos in comments and docs (JuliaLang/JuliaSyntax.jl#508) Co-authored-by: spaette --- JuliaSyntax/docs/src/api.md | 2 +- JuliaSyntax/docs/src/design.md | 4 ++-- JuliaSyntax/docs/src/reference.md | 2 +- JuliaSyntax/src/expr.jl | 2 +- JuliaSyntax/src/green_tree.jl | 2 +- JuliaSyntax/src/kinds.jl | 2 +- JuliaSyntax/src/parse_stream.jl | 8 ++++---- JuliaSyntax/src/parser.jl | 10 +++++----- JuliaSyntax/src/parser_api.jl | 4 ++-- JuliaSyntax/src/source_files.jl | 12 ++++++------ JuliaSyntax/test/parse_packages.jl | 2 +- JuliaSyntax/test/parser.jl | 6 +++--- JuliaSyntax/test/tokenize.jl | 2 +- 13 files changed, 29 insertions(+), 29 deletions(-) diff --git a/JuliaSyntax/docs/src/api.md b/JuliaSyntax/docs/src/api.md index 9fa1a37fb50ab..d79c86a343104 100644 --- a/JuliaSyntax/docs/src/api.md +++ b/JuliaSyntax/docs/src/api.md @@ -82,7 +82,7 @@ JuliaSyntax.kind ``` In addition to the `kind`, a small integer set of "flags" is included to -further distinguish details of each expresssion, accessed with the `flags` +further distinguish details of each expression, accessed with the `flags` function. The kind and flags can be wrapped into a `SyntaxHead` which is accessed with the `head` function. diff --git a/JuliaSyntax/docs/src/design.md b/JuliaSyntax/docs/src/design.md index 0f7e2a4f178b4..fb2a06c293835 100644 --- a/JuliaSyntax/docs/src/design.md +++ b/JuliaSyntax/docs/src/design.md @@ -317,7 +317,7 @@ parsing `key=val` pairs inside parentheses. ### Other oddities -* Operators with suffices don't seem to always be parsed consistently as the +* Operators with suffixes don't seem to always be parsed consistently as the same operator without a suffix. Unclear whether this is by design or mistake. For example, `[x +y] ==> (hcat x (+ y))`, but `[x +₁y] ==> (hcat (call +₁ x y))` @@ -425,7 +425,7 @@ First, there's no support for precise source locations and the existing data structures (bare flisp lists) can't easily be extended to add these. Fixing this would require changes to nearly all of the code. -Second, it's written in flisp: an aestheically pleasing, minimal but obscure +Second, it's written in flisp: an aesthetically pleasing, minimal but obscure implementation of Scheme. Learning Scheme is actually a good way to appreciate some of Julia's design inspiration, but it's quite a barrier for developers of Julia language tooling. (Flisp has no user-level documentation but non-schemers diff --git a/JuliaSyntax/docs/src/reference.md b/JuliaSyntax/docs/src/reference.md index 84fe09bcf6539..100110cbfac60 100644 --- a/JuliaSyntax/docs/src/reference.md +++ b/JuliaSyntax/docs/src/reference.md @@ -73,7 +73,7 @@ class of tokenization errors and lets the parser deal with them. ### Improvements to awkward AST forms -* Frakentuples with multiple parameter blocks like `(a=1, b=2; c=3; d=4)` are flattened into the parent tuple instead of using nested `K"parameters"` nodes (#133) +* `FrankenTuple`s with multiple parameter blocks like `(a=1, b=2; c=3; d=4)` are flattened into the parent tuple instead of using nested `K"parameters"` nodes (#133) * Using `try catch else finally end` is parsed with `K"catch"` `K"else"` and `K"finally"` children to avoid the awkwardness of the optional child nodes in the `Expr` representation (#234) * The dotted import path syntax as in `import A.b.c` is parsed with a `K"importpath"` kind rather than `K"."`, because a bare `A.b.c` has a very different nested/quoted expression representation (#244) * We use flags rather than child nodes to represent the difference between `struct` and `mutable struct`, `module` and `baremodule` (#220) diff --git a/JuliaSyntax/src/expr.jl b/JuliaSyntax/src/expr.jl index fd61243c18eaa..265fd02200d50 100644 --- a/JuliaSyntax/src/expr.jl +++ b/JuliaSyntax/src/expr.jl @@ -440,7 +440,7 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads, a1 = args[1] if @isexpr(a1, :block) a1a = (args[1]::Expr).args - # Ugly logic to strip the Expr(:block) in certian cases for compatibility + # Ugly logic to strip the Expr(:block) in certain cases for compatibility if length(a1a) == 1 a = a1a[1] if a isa Symbol || @isexpr(a, :(=)) || @isexpr(a, :(::)) diff --git a/JuliaSyntax/src/green_tree.jl b/JuliaSyntax/src/green_tree.jl index be55e4f685f8b..27da7ec4bfb0f 100644 --- a/JuliaSyntax/src/green_tree.jl +++ b/JuliaSyntax/src/green_tree.jl @@ -12,7 +12,7 @@ As implementation choices, we choose that: * Nodes are immutable and don't know their parents or absolute position, so can be cached and reused -* Nodes are homogenously typed at the language level so they can be stored +* Nodes are homogeneously typed at the language level so they can be stored concretely, with the `head` defining the node type. Normally this would include a "syntax kind" enumeration, but it can also include flags and record information the parser knew about the layout of the child nodes. diff --git a/JuliaSyntax/src/kinds.jl b/JuliaSyntax/src/kinds.jl index 2386fe2630529..29b8120b2c81c 100644 --- a/JuliaSyntax/src/kinds.jl +++ b/JuliaSyntax/src/kinds.jl @@ -135,7 +135,7 @@ Register custom `Kind`s with the given `names`, belonging to a module `mod`. `names` is an array of arbitrary strings. In order for kinds to be represented by a small number of bits, some nontrivial -cooperation is reqired between modules using custom kinds: +cooperation is required between modules using custom kinds: * The integer `module_id` is globally unique for each `mod` which will be used together, and not larger than $_kind_module_id_max. * No two modules register the same `name`. The semantics of a given `kind` name diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index 8aad71df4be79..b1594ea2777da 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -1,5 +1,5 @@ #------------------------------------------------------------------------------- -# Flags hold auxilary information about tokens/nonterminals which the Kind +# Flags hold auxiliary information about tokens/nonterminals which the Kind # doesn't capture in a nice way. # # TODO: Use `primitive type SyntaxFlags 16 end` rather than an alias? @@ -40,7 +40,7 @@ Set for K"tuple", K"block" or K"macrocall" which are delimited by parentheses const PARENS_FLAG = RawFlags(1<<5) """ -Set for K"quote" for the short form `:x` as oppsed to long form `quote x end` +Set for K"quote" for the short form `:x` as opposed to long form `quote x end` """ const COLON_QUOTE = RawFlags(1<<5) @@ -223,7 +223,7 @@ is_dotted(x) = has_flags(x, DOTOP_FLAG) """ is_suffixed(x) -Return true for operators which have sufficies, such as `+₁` +Return true for operators which have suffixes, such as `+₁` """ is_suffixed(x) = has_flags(x, SUFFIXED_FLAG) @@ -822,7 +822,7 @@ end Bump an invisible zero-width token into the output This is useful when surrounding syntax implies the presence of a token. For -example, `2x` means `2*x` via the juxtoposition rules. +example, `2x` means `2*x` via the juxtaposition rules. """ function bump_invisible(stream::ParseStream, kind, flags=EMPTY_FLAGS; error=nothing) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index e1356e998f286..7502eb7a25d6e 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -723,7 +723,7 @@ function parse_cond(ps::ParseState) # FIXME: This is a very specific case. Error recovery should be handled more # generally elsewhere. if is_block_continuation_keyword(ps, kind(t)) - # a "continuaton keyword" is likely to belong to the surrounding code, so + # a "continuation keyword" is likely to belong to the surrounding code, so # we abort early # if true; x ? true elseif true end ==> (if true (block (if x true (error-t) (error-t))) (elseif true (block))) @@ -1472,7 +1472,7 @@ function parse_unary_prefix(ps::ParseState) end end -# Parses a chain of sufficies at function call precedence, leftmost binding +# Parses a chain of suffixes at function call precedence, leftmost binding # tightest. This handles # * Bracketed calls like a() b[] c{} # * Field access like a.b.c @@ -1722,7 +1722,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) # x`str` ==> (macrocall @x_cmd (cmdstring-r "str")) # x"" ==> (macrocall @x_str (string-r "")) # x`` ==> (macrocall @x_cmd (cmdstring-r "")) - # Triple quoted procesing for custom strings + # Triple quoted processing for custom strings # r"""\nx""" ==> (macrocall @r_str (string-s-r "x")) # r"""\n x\n y""" ==> (macrocall @r_str (string-s-r "x\n" "y")) # r"""\n x\\n y""" ==> (macrocall @r_str (string-s-r "x\\\n" "y")) @@ -1735,7 +1735,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) t = peek_token(ps) k = kind(t) if !preceding_whitespace(t) && is_string_macro_suffix(k) - # Macro sufficies can include keywords and numbers + # Macro suffixes can include keywords and numbers # x"s"y ==> (macrocall @x_str (string-r "s") "y") # x"s"end ==> (macrocall @x_str (string-r "s") "end") # x"s"in ==> (macrocall @x_str (string-r "s") "in") @@ -3484,7 +3484,7 @@ function parse_atom(ps::ParseState, check_identifiers=true) end emit(ps, mark, K"char") elseif leading_kind == K"Char" - # FIXME: This is a tokenization error and should be preceeded with + # FIXME: This is a tokenization error and should be preceded with # K"'". However this workaround is better than emitting a bare Char. bump(ps, remap_kind=K"Identifier") elseif leading_kind == K":" diff --git a/JuliaSyntax/src/parser_api.jl b/JuliaSyntax/src/parser_api.jl index 95f077ef7c1c5..83a9ff3af8090 100644 --- a/JuliaSyntax/src/parser_api.jl +++ b/JuliaSyntax/src/parser_api.jl @@ -161,7 +161,7 @@ Token type resulting from calling `tokenize(text)` Use * `kind(tok)` to get the token kind -* `untokenize(tok, text)` to retreive the text +* `untokenize(tok, text)` to retrieve the text * Predicates like `is_error(tok)` to query token categories and flags """ struct Token @@ -177,7 +177,7 @@ head(t::Token) = t.head tokenize(text) Returns the tokenized UTF-8 encoded `text` as a vector of `Token`s. The -text for the token can be retreived by using `untokenize()`. The full text can be +text for the token can be retrieved by using `untokenize()`. The full text can be reconstructed with, for example, `join(untokenize.(tokenize(text), text))`. This interface works on UTF-8 encoded string or buffer data only. diff --git a/JuliaSyntax/src/source_files.jl b/JuliaSyntax/src/source_files.jl index 9d039d928ce67..9c5ccf24a9293 100644 --- a/JuliaSyntax/src/source_files.jl +++ b/JuliaSyntax/src/source_files.jl @@ -70,17 +70,17 @@ second form, get the line number at the given `byte_index` within `source`. source_line(x) = source_line(sourcefile(x), first_byte(x)) """ - souce_location(x) - souce_location(source::SourceFile, byte_index::Integer) + source_location(x) + source_location(source::SourceFile, byte_index::Integer) - souce_location(LineNumberNode, x) - souce_location(LineNumberNode, source, byte_index) + source_location(LineNumberNode, x) + source_location(LineNumberNode, source, byte_index) Get `(line,column)` of the first byte where object `x` appears in the source. The second form allows one to be more precise with the `byte_index`, given the source file. -Providing `LineNumberNode` as the first agrument will return the line and file +Providing `LineNumberNode` as the first argument will return the line and file name in a line number node object. """ source_location(x) = source_location(sourcefile(x), first_byte(x)) @@ -373,7 +373,7 @@ function highlight(io::IO, source::SourceFile, range::UnitRange; # The diagnostic range is compact and we show the whole thing _printstyled(io, source[p:q]; bgcolor=color) else - # Or large and we trucate the code to show only the region around the + # Or large and we truncate the code to show only the region around the # start and end of the error. _printstyled(io, source[p:y]; bgcolor=color) print(io, "⋮\n") diff --git a/JuliaSyntax/test/parse_packages.jl b/JuliaSyntax/test/parse_packages.jl index ef255d656820b..b5b08f488a918 100644 --- a/JuliaSyntax/test/parse_packages.jl +++ b/JuliaSyntax/test/parse_packages.jl @@ -38,7 +38,7 @@ base_tests_path = joinpath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "test") return nothing end - # syntax.jl has some intentially weird syntax which we parse + # syntax.jl has some intentionally weird syntax which we parse # differently than the flisp parser, and some cases which we've # decided are syntax errors. if endswith(f, "syntax.jl") diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index f32e7ba629448..37c82bc8e693a 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -444,17 +444,17 @@ tests = [ "x``" => """(macrocall @x_cmd (cmdstring-r ""))""" "in\"str\"" => """(macrocall @in_str (string-r "str"))""" "outer\"str\"" => """(macrocall @outer_str (string-r "str"))""" - # Triple quoted procesing for custom strings + # Triple quoted processing for custom strings "r\"\"\"\nx\"\"\"" => raw"""(macrocall @r_str (string-s-r "x"))""" "r\"\"\"\n x\n y\"\"\"" => raw"""(macrocall @r_str (string-s-r "x\n" "y"))""" "r\"\"\"\n x\\\n y\"\"\"" => raw"""(macrocall @r_str (string-s-r "x\\\n" "y"))""" - # Macro sufficies can include keywords and numbers + # Macro suffixes can include keywords and numbers "x\"s\"y" => """(macrocall @x_str (string-r "s") "y")""" "x\"s\"end" => """(macrocall @x_str (string-r "s") "end")""" "x\"s\"in" => """(macrocall @x_str (string-r "s") "in")""" "x\"s\"2" => """(macrocall @x_str (string-r "s") 2)""" "x\"s\"10.0" => """(macrocall @x_str (string-r "s") 10.0)""" - # Cmd macro sufficies + # Cmd macro suffixes "x`s`y" => """(macrocall @x_cmd (cmdstring-r "s") "y")""" "x`s`end" => """(macrocall @x_cmd (cmdstring-r "s") "end")""" "x`s`in" => """(macrocall @x_cmd (cmdstring-r "s") "in")""" diff --git a/JuliaSyntax/test/tokenize.jl b/JuliaSyntax/test/tokenize.jl index 8913a20ca0a99..38ff3568cc97f 100644 --- a/JuliaSyntax/test/tokenize.jl +++ b/JuliaSyntax/test/tokenize.jl @@ -720,7 +720,7 @@ end @test toks(".1..") == [".1"=>K"Float", ".."=>K".."] @test toks("0x01..") == ["0x01"=>K"HexInt", ".."=>K".."] - # Dotted operators and other dotted sufficies + # Dotted operators and other dotted suffixes @test toks("1234 .+1") == ["1234"=>K"Integer", " "=>K"Whitespace", ".+"=>K"+", "1"=>K"Integer"] @test toks("1234.0+1") == ["1234.0"=>K"Float", "+"=>K"+", "1"=>K"Integer"] @test toks("1234.0 .+1") == ["1234.0"=>K"Float", " "=>K"Whitespace", ".+"=>K"+", "1"=>K"Integer"] From 166857b92cc03fcc20c4ed0ff9d6ee62494abd38 Mon Sep 17 00:00:00 2001 From: spaette <111918424+spaette@users.noreply.github.com> Date: Sun, 6 Oct 2024 02:49:04 +0200 Subject: [PATCH 0876/1109] Fix http->https in readme (JuliaLang/JuliaSyntax.jl#510) --- JuliaSyntax/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md index 2865b154a299d..ae9b2b9760b36 100644 --- a/JuliaSyntax/README.md +++ b/JuliaSyntax/README.md @@ -1,7 +1,7 @@ # JuliaSyntax [![Build Status](https://github.com/c42f/JuliaSyntax.jl/workflows/CI/badge.svg)](https://github.com/c42f/JuliaSyntax.jl/actions) -[![codecov.io](http://codecov.io/github/JuliaLang/JuliaSyntax.jl/coverage.svg?branch=main)](http://codecov.io/github/JuliaLang/JuliaSyntax.jl?branch=main) +[![codecov.io](https://codecov.io/github/JuliaLang/JuliaSyntax.jl/coverage.svg?branch=main)](https://codecov.io/github/JuliaLang/JuliaSyntax.jl?branch=main) A Julia compiler frontend, written in Julia. From 70627ba3e3f16f79150eeeee438bed29a50f73b8 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sun, 6 Oct 2024 10:49:35 +1000 Subject: [PATCH 0877/1109] Parse docstrings within structs as `K"doc"` (JuliaLang/JuliaSyntax.jl#511) Julia's ecosystem (including Base.Docs and flisp lowering) assumes that strings within `struct` definitions are per-field docstrings, but the flisp parser doesn't handle these - they are only recognized when the struct itself has a docstring and are processed by the `@doc` macro recursing into the struct's internals. For example, the following doesn't result in any docs attached to `A`. ```julia struct A "x_docs" x "y_docs" y end ``` This change adds `K"doc"` node parsing to the insides of a struct, making the semantics clearer in the parser tree and making it possible to address this problems in the future within JuliaLowering. Also ensure that the `Expr` form is unaffected by this change. --- JuliaSyntax/src/expr.jl | 13 +++++++++++++ JuliaSyntax/src/parser.jl | 8 ++++---- JuliaSyntax/test/expr.jl | 3 +++ JuliaSyntax/test/parser.jl | 1 + 4 files changed, 21 insertions(+), 4 deletions(-) diff --git a/JuliaSyntax/src/expr.jl b/JuliaSyntax/src/expr.jl index 265fd02200d50..b436e744e0a39 100644 --- a/JuliaSyntax/src/expr.jl +++ b/JuliaSyntax/src/expr.jl @@ -466,6 +466,19 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads, headsym = :call pushfirst!(args, :*) elseif k == K"struct" + @assert args[2].head == :block + orig_fields = args[2].args + fields = Expr(:block) + for field in orig_fields + if @isexpr(field, :macrocall) && field.args[1] == GlobalRef(Core, Symbol("@doc")) + # @doc macro calls don't occur within structs, in Expr form. + push!(fields.args, field.args[3]) + push!(fields.args, field.args[4]) + else + push!(fields.args, field) + end + end + args[2] = fields pushfirst!(args, has_flags(head, MUTABLE_FLAG)) elseif k == K"importpath" headsym = :. diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 7502eb7a25d6e..0cd65f7aa33e1 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -536,9 +536,9 @@ end # Parse docstrings attached by a space or single newline # # flisp: parse-docstring -function parse_docstring(ps::ParseState) +function parse_docstring(ps::ParseState, down=parse_eq) mark = position(ps) - parse_eq(ps) + down(ps) if peek_behind(ps).kind == K"string" is_doc = true k = peek(ps) @@ -563,7 +563,7 @@ function parse_docstring(ps::ParseState) # """\n doc\n """ foo ==> (doc (string-s "doc\n") foo) end if is_doc - parse_eq(ps) + down(ps) emit(ps, mark, K"doc") end end @@ -1947,7 +1947,7 @@ function parse_resword(ps::ParseState) @check peek(ps) == K"struct" bump(ps, TRIVIA_FLAG) parse_subtype_spec(ps) - parse_block(ps, parse_struct_field) + parse_block(ps, ps1->parse_docstring(ps1, parse_struct_field)) bump_closing_token(ps, K"end") emit(ps, mark, K"struct", is_mut ? MUTABLE_FLAG : EMPTY_FLAGS) elseif word == K"primitive" diff --git a/JuliaSyntax/test/expr.jl b/JuliaSyntax/test/expr.jl index cf9b881d07f98..9361937f31460 100644 --- a/JuliaSyntax/test/expr.jl +++ b/JuliaSyntax/test/expr.jl @@ -727,6 +727,9 @@ Expr(:block, LineNumberNode(2), :a, LineNumberNode(3), :b)) @test parsestmt("struct A const a end", version=v"1.8") == Expr(:struct, false, :A, Expr(:block, LineNumberNode(1), Expr(:const, :a))) + + @test parsestmt("struct A \n \"doc\" \n a end") == + Expr(:struct, false, :A, Expr(:block, LineNumberNode(2), "doc", :a)) end @testset "export" begin diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 37c82bc8e693a..e6115ad474c40 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -502,6 +502,7 @@ tests = [ # struct "struct A <: B \n a::X \n end" => "(struct (<: A B) (block (::-i a X)))" "struct A \n a \n b \n end" => "(struct A (block a b))" + "struct A \n \"doca\" \n a \n \"docb\" \n b \n end" => "(struct A (block (doc (string \"doca\") a) (doc (string \"docb\") b)))" "mutable struct A end" => "(struct-mut A (block))" ((v=v"1.8",), "struct A const a end") => "(struct A (block (const a)))" ((v=v"1.7",), "struct A const a end") => "(struct A (block (error (const a))))" From 7c3278523a4a6b06ba60d50ccc26fab854bf97fe Mon Sep 17 00:00:00 2001 From: Lilith Orion Hafner Date: Sun, 6 Oct 2024 06:21:01 -0500 Subject: [PATCH 0878/1109] Don't assume that `SubString` has `pointer` and copy instead (JuliaLang/JuliaSyntax.jl#506) * Don't assume that `SubString` has `pointer` and copy instead * Still assume `Substring{String}` has `pointer` * Test with `Test.GenericString` --- JuliaSyntax/src/parse_stream.jl | 2 +- JuliaSyntax/test/parse_stream.jl | 13 ++++++++++++- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index b1594ea2777da..5b04b42c71b9a 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -391,7 +391,7 @@ function ParseStream(text::String, index::Integer=1; version=VERSION) ParseStream(unsafe_wrap(Vector{UInt8}, text), text, index, version) end -function ParseStream(text::SubString, index::Integer=1; version=VERSION) +function ParseStream(text::SubString{String}, index::Integer=1; version=VERSION) # See also IOBuffer(SubString("x")) ParseStream(unsafe_wrap(Vector{UInt8}, pointer(text), sizeof(text)), text, index, version) diff --git a/JuliaSyntax/test/parse_stream.jl b/JuliaSyntax/test/parse_stream.jl index f7c0bd60a586c..f5148f2746623 100644 --- a/JuliaSyntax/test/parse_stream.jl +++ b/JuliaSyntax/test/parse_stream.jl @@ -7,7 +7,8 @@ using JuliaSyntax: ParseStream, peek, peek_token, bump, bump_trivia, bump_invisible, emit, emit_diagnostic, TRIVIA_FLAG, INFIX_FLAG, - ParseStreamPosition, first_child_position, last_child_position + ParseStreamPosition, first_child_position, last_child_position, + parsestmt # Here we manually issue parse events in the order the Julia parser would issue # them @@ -147,3 +148,13 @@ end @test first_child_position(st, position(st)) == ParseStreamPosition(4, 1) @test last_child_position(st, position(st)) == ParseStreamPosition(7, 2) end + +@testset "SubString{GenericString} (issue #505)" begin + x = Test.GenericString("1 2") + @test x == "1 2" + y = split(x)[1] + @test y == "1" + @test y isa SubString{GenericString} + @test ParseStream(y) isa ParseStream + @test parsestmt(Expr, y) == parsestmt(Expr, "1") +end From e92ead513a2deb050a5c848b5f3bcd31b664b97b Mon Sep 17 00:00:00 2001 From: Fredrik Ekre Date: Sun, 6 Oct 2024 13:22:02 +0200 Subject: [PATCH 0879/1109] Remove `Base.convert` methods between `AbstractString` and `Kind` (JuliaLang/JuliaSyntax.jl#500) * Remove the method `convert(::Type{String}, ::Kind)` This patch removes the method `convert(::Type{String}, ::Kind)` used for converting kinds to strings and replaces it with the already existing method of `Base.string`. There are two reason for this: i) the method causes invalidations when loading the package and ii) `convert` is called implicitly in e.g. constructors and should therefore typically only be defined between similar enough types. * Remove the method `Base.convert(::Type{Kind}, ::String)` This patch removes the method `Base.convert(::Type{Kind}, ::AbstractString)` and replaces it with a `Kind(::AbstractString)` constructor. The reason for this is that `convert` is called implicitly in e.g. constructors and should therefore typically only be defined between similar enough types. --- JuliaSyntax/src/kinds.jl | 22 +++++++++------------- JuliaSyntax/test/tokenize.jl | 2 +- 2 files changed, 10 insertions(+), 14 deletions(-) diff --git a/JuliaSyntax/src/kinds.jl b/JuliaSyntax/src/kinds.jl index 29b8120b2c81c..554dc08da5c8a 100644 --- a/JuliaSyntax/src/kinds.jl +++ b/JuliaSyntax/src/kinds.jl @@ -42,36 +42,32 @@ function Kind(x::Integer) return Base.bitcast(Kind, convert(UInt16, x)) end -function Base.convert(::Type{String}, k::Kind) - _kind_int_to_str[reinterpret(UInt16, k)] -end - -function Base.convert(::Type{Kind}, s::AbstractString) +function Kind(s::AbstractString) i = get(_kind_str_to_int, s) do error("unknown Kind name $(repr(s))") end Kind(i) end -Base.string(x::Kind) = convert(String, x) -Base.print(io::IO, x::Kind) = print(io, convert(String, x)) +Base.string(x::Kind) = _kind_int_to_str[reinterpret(UInt16, x)] +Base.print(io::IO, x::Kind) = print(io, string(x)) Base.isless(x::Kind, y::Kind) = reinterpret(UInt16, x) < reinterpret(UInt16, y) function Base.show(io::IO, k::Kind) - print(io, "K\"$(convert(String, k))\"") + print(io, "K\"", k, "\"") end # Save the string representation rather than the bit pattern so that kinds # can be serialized and deserialized across different JuliaSyntax versions. function Base.write(io::IO, k::Kind) - str = convert(String, k) + str = string(k) write(io, UInt8(sizeof(str))) + write(io, str) end function Base.read(io::IO, ::Type{Kind}) len = read(io, UInt8) str = String(read(io, len)) - convert(Kind, str) + Kind(str) end function Base.parentmodule(k::Kind) @@ -162,7 +158,7 @@ For example * K"block" is the kind of a block of code (eg, statements within a begin-end). """ macro K_str(s) - convert(Kind, s) + Kind(s) end """ @@ -171,7 +167,7 @@ A set of kinds which can be used with the `in` operator. For example k in KSet"+ - *" """ macro KSet_str(str) - kinds = [convert(Kind, s) for s in split(str)] + kinds = [Kind(s) for s in split(str)] quote ($(kinds...),) @@ -1146,7 +1142,7 @@ function untokenize(k::Kind; unique=true) if unique && k in _nonunique_kind_names return nothing else - return convert(String, k) + return string(k) end end diff --git a/JuliaSyntax/test/tokenize.jl b/JuliaSyntax/test/tokenize.jl index 38ff3568cc97f..e2d069daa1a7c 100644 --- a/JuliaSyntax/test/tokenize.jl +++ b/JuliaSyntax/test/tokenize.jl @@ -334,7 +334,7 @@ end "type", "var"] - @test kind(tok(kw)) == convert(Kind, kw) + @test kind(tok(kw)) == Kind(kw) end end From b17c08f5f63fdf0df62a08b7824c5b19396348de Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sun, 6 Oct 2024 11:00:44 +1000 Subject: [PATCH 0880/1109] Rearrange JuliaLowering kinds into several categories Also introduce `K"code_info"` to distinguish the `CodeInfo`-like form with indexed statements from the more symbolic cross references that are used internally by lowering within `K"lambda"` prior to statement+SSA renumbering. --- JuliaLowering/src/eval.jl | 2 +- JuliaLowering/src/kinds.jl | 78 +++++++++++++++++++++++----------- JuliaLowering/src/linear_ir.jl | 4 +- JuliaLowering/src/utils.jl | 2 +- 4 files changed, 58 insertions(+), 28 deletions(-) diff --git a/JuliaLowering/src/eval.jl b/JuliaLowering/src/eval.jl index 520f5cc7b8d1d..c8378a9f82d76 100644 --- a/JuliaLowering/src/eval.jl +++ b/JuliaLowering/src/eval.jl @@ -209,7 +209,7 @@ function to_lowered_expr(mod, ex) else TODO(ex, "Convert SyntaxTree to Expr") end - elseif k == K"lambda" + elseif k == K"code_info" funcname = ex.lambda_info.is_toplevel_thunk ? "top-level scope" : "none" # FIXME diff --git a/JuliaLowering/src/kinds.jl b/JuliaLowering/src/kinds.jl index 5227ec7c49faf..c423972f8850d 100644 --- a/JuliaLowering/src/kinds.jl +++ b/JuliaLowering/src/kinds.jl @@ -3,25 +3,45 @@ # part of the surface syntax function _register_kinds() JuliaSyntax.register_kinds!(JuliaLowering, 1, [ - "BEGIN_LOWERING_KINDS" - # Compiler metadata hints - "meta" - "extension" - # Semantic assertions used by lowering. The content of an assertion - # is not considered to be quoted, so use K"Symbol" inside where necessary. - "assert" - # A literal Julia value of any kind, as might be inserted by the AST - # during macro expansion + # "Syntax extensions" - expression kinds emitted by macros or macro + # expansion, and known to lowering. These are part of the AST API but + # without having surface syntax. + "BEGIN_EXTENSION_KINDS" + # atomic fields or accesses (see `@atomic`) + "atomic" + # A literal Julia value of any kind, as might be inserted into the + # AST during macro expansion "Value" # A (quoted) `Symbol` "Symbol" + # Compiler metadata hints + "meta" # TODO: Use `meta` for inbounds and loopinfo etc? "inbounds" "inline" "noinline" "loopinfo" - # Identifier for a value which is only assigned once - "SSAValue" + # Call into foreign code. Emitted by `@ccall` + "foreigncall" + # Test whether a variable is defined + "isdefined" + # named labels for `@label` and `@goto` + "symbolic_label" + # Goto named label + "symbolic_goto" + # Internal initializer for structures, called from inner constructor + "new" + # Catch-all for additional syntax extensions without the need to + # extend `Kind`. Known extensions include: + # locals, islocal + "extension" + "END_EXTENSION_KINDS" + + # The following kinds are internal to lowering + "BEGIN_LOWERING_KINDS" + # Semantic assertions used by lowering. The content of an assertion + # is not considered to be quoted, so use K"Symbol" etc inside where necessary. + "assert" # Unique identifying integer for bindings (of variables, constants, etc) "BindingId" # Various heads harvested from flisp lowering. @@ -37,26 +57,36 @@ function _register_kinds() "lambda" "toplevel_butfirst" "const_if_global" - "isdefined" "moved_local" - "foreigncall" - "new" - "globalref" "outerref" - "enter" - "pop_exception" - "leave" "label" - "symbolic_label" - "symbolic_goto" - "goto" - "gotoifnot" "trycatchelse" "tryfinally" - "method" - "slot" "unnecessary" "decl" "END_LOWERING_KINDS" + + # The following kinds are emitted by lowering and used in Julia's untyped IR + "BEGIN_IR_KINDS" + # Identifier for a value which is only assigned once + "SSAValue" + # Local variable in a `CodeInfo` code object (including lambda arguments) + "slot" + # Reference to a global variable within a module + "globalref" + # Unconditional goto + "goto" + # Conditional goto + "gotoifnot" + # Exception handling + "enter" + "leave" + "pop_exception" + # Lowering targets for method definitions arising from `function` etc + "method" + # Result of lowering a `K"lambda"` after bindings have been + # converted to slot/globalref/SSAValue. + "code_info" + "END_IR_KINDS" ]) end diff --git a/JuliaLowering/src/linear_ir.jl b/JuliaLowering/src/linear_ir.jl index 892fac212e9ec..388dde1c16b7c 100644 --- a/JuliaLowering/src/linear_ir.jl +++ b/JuliaLowering/src/linear_ir.jl @@ -867,7 +867,7 @@ function _renumber(ctx, ssa_rewrites, slot_rewrites, label_table, ex) ex elseif k == K"label" @ast ctx ex label_table[ex.id]::K"label" - elseif k == K"lambda" + elseif k == K"code_info" ex else mapchildren(ctx, ex) do e @@ -952,7 +952,7 @@ function compile_lambda(outer_ctx, ex) end # @info "" @ast ctx ex [K"block" ctx.code] code = renumber_body(ctx, ctx.code, slot_rewrites) - makenode(ctx, ex, K"lambda", + makenode(ctx, ex, K"code_info", makenode(ctx, ex[1], K"block", code), lambda_info=lambda_info, slots=slots diff --git a/JuliaLowering/src/utils.jl b/JuliaLowering/src/utils.jl index 58c39f243f0ce..d9c13ec0465e2 100644 --- a/JuliaLowering/src/utils.jl +++ b/JuliaLowering/src/utils.jl @@ -76,7 +76,7 @@ function print_ir(io::IO, ex, indent="") lno = rpad(i, 3) if kind(e) == K"method" && numchildren(e) == 3 println(io, indent, lno, " --- method ", string(e[1]), " ", string(e[2])) - @assert kind(e[3]) == K"lambda" + @assert kind(e[3]) == K"lambda" || kind(e[3]) == K"code_info" print_ir(io, e[3], " ") else code = string(e) From 9de72c964f78e1dea843c24e667b70eb541bcdf8 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Mon, 7 Oct 2024 19:18:51 +1000 Subject: [PATCH 0881/1109] Desugaring of `struct` type definitions Still todo: * inner constructors * outer constructors * doc binding Also included here is `K"alias_binding"` - a more general replacement for the `outerref` used in flisp lowering. `alias_binding` allows one to allocate a binding early during desugaring and make this binding an alias for a given name. Bindings don't participate in scope resolution, so this allows us to bypass the usual scoping rules. For example, to refer to a global struct_name from an outer scope, but within an inner scope where the identifier struct_name is bound to a local variable. (We could also replace outerref by generating a new scope_layer and perhaps that would be simpler?) --- JuliaLowering/src/ast.jl | 5 + JuliaLowering/src/desugaring.jl | 215 +++++++++++++++++++++++++--- JuliaLowering/src/kinds.jl | 2 + JuliaLowering/src/scope_analysis.jl | 34 ++++- JuliaLowering/src/utils.jl | 2 +- JuliaLowering/test/typedefs.jl | 14 ++ JuliaLowering/test/typedefs_ir.jl | 108 ++++++++++++++ 7 files changed, 353 insertions(+), 27 deletions(-) diff --git a/JuliaLowering/src/ast.jl b/JuliaLowering/src/ast.jl index 92456e8e0f018..ba663f94a11f9 100644 --- a/JuliaLowering/src/ast.jl +++ b/JuliaLowering/src/ast.jl @@ -254,6 +254,11 @@ function new_mutable_var(ctx::AbstractLoweringContext, srcref, name; is_always_d var end +function alias_binding(ctx::AbstractLoweringContext, srcref) + id = new_binding(ctx.bindings, BindingInfo("alias", :alias; is_internal=true)) + makeleaf(ctx, srcref, K"BindingId", var_id=id) +end + # Assign `ex` to an SSA variable. # Return (variable, assignment_node) function assign_tmp(ctx::AbstractLoweringContext, ex, name="tmp") diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 44f5667f0984f..5c602fe587270 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -1486,35 +1486,31 @@ end # Analyze type signatures such as `A <: B where C` # -# Return (name, params, supertype) +# Return (name, typevar_names, typevar_stmts, supertype) function analyze_type_sig(ctx, ex) k = kind(ex) if k == K"Identifier" - return (ex, (), @ast ctx ex "Any"::K"core") + name = ex + params = () + supertype = @ast ctx ex "Any"::K"core" elseif k == K"curly" && numchildren(ex) >= 1 && kind(ex[1]) == K"Identifier" # name{params} - return (ex[1], ex[2:end], @ast ctx ex "Any"::K"core") + name = ex[1] + params = ex[2:end] + supertype = @ast ctx ex "Any"::K"core" elseif k == K"<:" && numchildren(ex) == 2 if kind(ex[1]) == K"Identifier" - return (ex[1], (), ex[2]) + name = ex[1] + params = () + supertype = ex[2] elseif kind(ex[1]) == K"curly" && numchildren(ex[1]) >= 1 && kind(ex[1][1]) == K"Identifier" - return (ex[1][1], ex[1][2:end], ex[2]) + name = ex[1][1] + params = ex[1][2:end] + supertype = ex[2] end end - throw(LoweringError(ex, "invalid type signature")) -end + @isdefined(name) || throw(LoweringError(ex, "invalid type signature")) -function expand_abstract_or_primitive_type(ctx, ex) - is_abstract = kind(ex) == K"abstract" - if is_abstract - @chk numchildren(ex) == 1 - elseif kind(ex) == K"primitive" - @chk numchildren(ex) == 2 - nbits = ex[2] - else - @assert false - end - name, params, supertype = analyze_type_sig(ctx, ex[1]) typevar_names = SyntaxList(ctx) typevar_stmts = SyntaxList(ctx) for param in params @@ -1524,9 +1520,22 @@ function expand_abstract_or_primitive_type(ctx, ex) push!(typevar_stmts, @ast ctx param [K"local" n]) push!(typevar_stmts, @ast ctx param [K"=" n bounds_to_TypeVar(ctx, param, bounds)]) end + return (name, typevar_names, typevar_stmts, supertype) +end + +function expand_abstract_or_primitive_type(ctx, ex) + is_abstract = kind(ex) == K"abstract" + if is_abstract + @chk numchildren(ex) == 1 + else + @assert kind(ex) == K"primitive" + @chk numchildren(ex) == 2 + nbits = ex[2] + end + name, typevar_names, typevar_stmts, supertype = analyze_type_sig(ctx, ex[1]) newtype_var = ssavar(ctx, ex, "new_type") @ast ctx ex [K"block" - [K"scope_block"(scope_type=:neutral) + [K"scope_block"(scope_type=:hard) [K"block" [K"local_def" name] typevar_stmts... @@ -1562,6 +1571,172 @@ function expand_abstract_or_primitive_type(ctx, ex) ] end +function _match_struct_field(x0) + type=nothing + docs=nothing + atomic=false + _const=false + x = x0 + while true + k = kind(x) + if k == K"Identifier" + return (name=x, type=type, atomic=atomic, _const=_const, docs=docs) + elseif k == K"::" && numchildren(x) == 2 + isnothing(type) || throw(LoweringError(x0, "multiple types in struct field")) + type = x[2] + x = x[1] + elseif k == K"atomic" + atomic = true + x = x[1] + elseif k == K"const" + _const = true + x = x[1] + elseif k == K"doc" + docs = x[1] + x = x[2] + else + return nothing + end + end +end + +function _collect_struct_fields(ctx, field_names, field_types, field_attrs, field_docs, defs, exs) + for e in exs + if kind(e) == K"block" + _collect_struct_fields(ctx, field_names, field_types, field_attrs, field_docs, + defs, children(e)) + elseif kind(e) == K"=" + throw(LoweringError(e, "assignment syntax in structure fields is reserved")) + else + m = _match_struct_field(e) + if !isnothing(m) + # Struct field + n = length(field_names) + push!(field_names, @ast ctx m.name m.name=>K"Symbol") + push!(field_types, isnothing(m.type) ? @ast(ctx, e, "Any"::K"core") : m.type) + if m.atomic + push!(field_attrs, @ast ctx e n::K"Integer") + push!(field_attrs, @ast ctx e "atomic"::K"Symbol") + end + if m._const + push!(field_attrs, @ast ctx e n::K"Integer") + push!(field_attrs, @ast ctx e "const"::K"Symbol") + end + if !isnothing(m.docs) + push!(field_docs, @ast ctx e n::K"Integer") + push!(field_docs, @ast ctx e m.docs) + end + else + # Inner constructors + push!(defs, e) + end + end + end +end + +function _constructor_min_initalized(ex::SyntaxTree) + if kind(ex) == K"call" && ((kind(ex[1]) == K"Identifier" && ex[1].name_val == "new") || + (kind(ex[1]) == K"curly" && kind(ex[1][1]) == K"Identifier" && ex[1][1].name_val == "new")) + numchildren(ex) - 1 + elseif !is_leaf(ex) + _constructor_min_initalized(children(ex)) + else + typemax(Int) + end +end + +function _constructor_min_initalized(exs::AbstractVector) + minimum((_constructor_min_initalized(e) for e in exs), init=typemax(Int)) +end + +function expand_struct_def(ctx, ex, docs) + @chk numchildren(ex) == 2 + type_sig = ex[1] + type_body = ex[2] + if kind(type_body) != K"block" + throw(LoweringError(type_body, "expected block for `struct` fields")) + end + struct_name, typevar_names, typevar_stmts, supertype = analyze_type_sig(ctx, type_sig) + field_names = SyntaxList(ctx) + field_types = SyntaxList(ctx) + field_attrs = SyntaxList(ctx) + field_docs = SyntaxList(ctx) + defs = SyntaxList(ctx) + _collect_struct_fields(ctx, field_names, field_types, field_attrs, field_docs, + defs, children(type_body)) + is_mutable = has_flags(ex, JuliaSyntax.MUTABLE_FLAG) + min_initialized = min(_constructor_min_initalized(defs), length(field_names)) + newtype_var = ssavar(ctx, ex, "struct_type") + outer_struct_var = alias_binding(ctx, struct_name) + if !isempty(typevar_names) + # Generate expression like `prev_struct.body.body.parameters` + prev_typevars = outer_struct_var + for _ in 1:length(typevar_names) + prev_typevars = @ast ctx type_sig [K"." prev_typevars "body"::K"Symbol"] + end + prev_typevars = @ast ctx type_sig [K"." prev_typevars "parameters"::K"Symbol"] + end + if isempty(defs) + end + @ast ctx ex [K"block" + [K"global" struct_name] + [K"const" struct_name] + [K"alias_binding" outer_struct_var struct_name] + [K"assert" "toplevel_only"::K"Symbol" [K"inert" ex] ] + [K"scope_block"(scope_type=:hard) + [K"block" + [K"local_def" struct_name] + typevar_stmts... + [K"=" + newtype_var + [K"call" + "_structtype"::K"core" + ctx.mod::K"Value" + struct_name=>K"Symbol" + [K"call"(type_sig) "svec"::K"core" typevar_names...] + [K"call"(type_body) "svec"::K"core" field_names...] + [K"call"(type_body) "svec"::K"core" field_attrs...] + is_mutable::K"Bool" + min_initialized::K"Integer" + ] + ] + [K"=" struct_name newtype_var] + [K"call"(supertype) "_setsuper!"::K"core" newtype_var supertype] + [K"if" + [K"isdefined" outer_struct_var] + [K"if" + [K"call" "_equiv_typedef"::K"core" outer_struct_var newtype_var] + [K"block" + # If this is compatible with an old definition, use + # the existing type object and throw away + # NB away the new type + [K"=" struct_name outer_struct_var] + if !isempty(typevar_names) + # And resassign the typevars - these may be + # referenced in the definition of the field + # types below + [K"=" + [K"tuple" typevar_names...] + prev_typevars + ] + end + ] + # Otherwise do an assignment to trigger an error + [K"=" outer_struct_var struct_name] + ] + [K"=" outer_struct_var struct_name] + ] + [K"call"(type_body) + "_typebody!"::K"core" + struct_name + [K"call" "svec"::K"core" field_types...] + ] + ] + ] + nothing_(ctx, ex) + ] +end + function expand_where(ctx, srcref, lhs, rhs) bounds = analyze_typevar(ctx, rhs) v = bounds[1] @@ -1851,6 +2026,8 @@ function expand_forms_2(ctx::DesugaringContext, ex::SyntaxTree, docs=nothing) TODO(ex) elseif k == K"abstract" || k == K"primitive" expand_forms_2(ctx, expand_abstract_or_primitive_type(ctx, ex)) + elseif k == K"struct" + expand_forms_2(ctx, expand_struct_def(ctx, ex, docs)) elseif k == K"ref" if numchildren(ex) > 2 TODO(ex, "ref expansion") diff --git a/JuliaLowering/src/kinds.jl b/JuliaLowering/src/kinds.jl index c423972f8850d..6d0ab18c05b82 100644 --- a/JuliaLowering/src/kinds.jl +++ b/JuliaLowering/src/kinds.jl @@ -42,6 +42,8 @@ function _register_kinds() # Semantic assertions used by lowering. The content of an assertion # is not considered to be quoted, so use K"Symbol" etc inside where necessary. "assert" + # Alias a name to a given binding + "alias_binding" # Unique identifying integer for bindings (of variables, constants, etc) "BindingId" # Various heads harvested from flisp lowering. diff --git a/JuliaLowering/src/scope_analysis.jl b/JuliaLowering/src/scope_analysis.jl index 618b8134e8205..8b00b895d4969 100644 --- a/JuliaLowering/src/scope_analysis.jl +++ b/JuliaLowering/src/scope_analysis.jl @@ -45,12 +45,14 @@ struct NameKey end #------------------------------------------------------------------------------- -function _find_scope_vars!(assignments, locals, globals, used_names, used_bindings, ex) +function _find_scope_vars!(assignments, locals, globals, used_names, used_bindings, alias_bindings, ex) k = kind(ex) if k == K"Identifier" push!(used_names, NameKey(ex)) elseif k == K"BindingId" push!(used_bindings, ex.var_id) + elseif k == K"alias_binding" + push!(alias_bindings, NameKey(ex[2])=>ex[1].var_id) elseif is_leaf(ex) || is_quoted(k) || k in KSet"scope_block lambda module toplevel" return @@ -64,10 +66,10 @@ function _find_scope_vars!(assignments, locals, globals, used_names, used_bindin if !(kind(v) in KSet"BindingId globalref outerref Placeholder") get!(assignments, NameKey(v), v) end - _find_scope_vars!(assignments, locals, globals, used_names, used_bindings, ex[2]) + _find_scope_vars!(assignments, locals, globals, used_names, used_bindings, alias_bindings, ex[2]) else for e in children(ex) - _find_scope_vars!(assignments, locals, globals, used_names, used_bindings, e) + _find_scope_vars!(assignments, locals, globals, used_names, used_bindings, alias_bindings, e) end end end @@ -83,8 +85,9 @@ function find_scope_vars(ex) globals = Dict{NameKey,ExT}() used_names = Set{NameKey}() used_bindings = Set{IdTag}() + alias_bindings = Vector{Pair{NameKey,IdTag}}() for e in children(ex) - _find_scope_vars!(assignments, locals, globals, used_names, used_bindings, e) + _find_scope_vars!(assignments, locals, globals, used_names, used_bindings, alias_bindings, e) end # Sort by key so that id generation is deterministic @@ -94,7 +97,7 @@ function find_scope_vars(ex) used_names = sort(collect(used_names)) used_bindings = sort(collect(used_bindings)) - return assignments, locals, globals, used_names, used_bindings + return assignments, locals, globals, used_names, used_bindings, alias_bindings end function Base.isless(a::NameKey, b::NameKey) @@ -137,6 +140,8 @@ struct ScopeResolutionContext{GraphType} <: AbstractLoweringContext scope_layers::Vector{ScopeLayer} # name=>id mappings for all discovered global vars global_vars::Dict{NameKey,IdTag} + # Map for rewriting binding aliases + alias_map::Dict{IdTag,IdTag} # Stack of name=>id mappings for each scope, innermost scope last. scope_stack::Vector{ScopeInfo} # Variables which were implicitly global due to being assigned to in top @@ -151,6 +156,7 @@ function ScopeResolutionContext(ctx) ctx.mod, ctx.scope_layers, Dict{NameKey,IdTag}(), + Dict{IdTag,IdTag}(), Vector{ScopeInfo}(), Set{NameKey}()) end @@ -200,7 +206,7 @@ function analyze_scope(ctx, ex, scope_type, lambda_info) in_toplevel_thunk = is_toplevel_global_scope || (!is_outer_lambda_scope && parentscope.in_toplevel_thunk) - assignments, locals, globals, used, used_bindings = find_scope_vars(ex) + assignments, locals, globals, used, used_bindings, alias_bindings = find_scope_vars(ex) # Create new lookup table for variables in this scope which differ from the # parent scope. @@ -336,6 +342,13 @@ function analyze_scope(ctx, ex, scope_type, lambda_info) end end + for (varkey, id) in alias_bindings + @assert !haskey(ctx.alias_map, id) + ctx.alias_map[id] = get(var_ids, varkey) do + lookup_var(ctx, varkey) + end + end + return ScopeInfo(is_toplevel_global_scope, in_toplevel_thunk, is_soft_scope, is_hard_scope, var_ids, lambda_locals) end @@ -372,11 +385,18 @@ function _resolve_scopes(ctx, ex::SyntaxTree) if k == K"Identifier" id = lookup_var(ctx, NameKey(ex)) @ast ctx ex id::K"BindingId" + elseif k == K"BindingId" + mapped_id = get(ctx.alias_map, ex.var_id, nothing) + if isnothing(mapped_id) + ex + else + @ast ctx ex mapped_id::K"BindingId" + end elseif is_leaf(ex) || is_quoted(ex) || k == K"toplevel" ex # elseif k == K"global" # ex - elseif k == K"local" + elseif k == K"local" || k == K"alias_binding" makeleaf(ctx, ex, K"TOMBSTONE") elseif k == K"local_def" id = lookup_var(ctx, NameKey(ex[1])) diff --git a/JuliaLowering/src/utils.jl b/JuliaLowering/src/utils.jl index d9c13ec0465e2..7a3c742e115ff 100644 --- a/JuliaLowering/src/utils.jl +++ b/JuliaLowering/src/utils.jl @@ -70,7 +70,7 @@ function showprov(x; kws...) end function print_ir(io::IO, ex, indent="") - @assert kind(ex) == K"lambda" && kind(ex[1]) == K"block" + @assert (kind(ex) == K"lambda" || kind(ex) == K"code_info") && kind(ex[1]) == K"block" stmts = children(ex[1]) for (i, e) in enumerate(stmts) lno = rpad(i, 3) diff --git a/JuliaLowering/test/typedefs.jl b/JuliaLowering/test/typedefs.jl index 17884b93d9173..865d26e78a141 100644 --- a/JuliaLowering/test/typedefs.jl +++ b/JuliaLowering/test/typedefs.jl @@ -36,4 +36,18 @@ primitive type P <: A 16 end @test reinterpret(test_mod.P, 0x0001) isa test_mod.P @test reinterpret(UInt16, reinterpret(test_mod.P, 0x1337)) === 0x1337 +@test JuliaLowering.include_string(test_mod, """ +struct S1{X,Y} <: A + x::X + y::Y + z +end +""") === nothing +@test !isconcretetype(test_mod.S1) +@test fieldnames(test_mod.S1) == (:x, :y, :z) +@test fieldtypes(test_mod.S1) == (Any, Any, Any) +@test isconcretetype(test_mod.S1{Int,String}) +@test fieldtypes(test_mod.S1{Int,String}) == (Int, String, Any) +@test supertype(test_mod.S1) == test_mod.A + end diff --git a/JuliaLowering/test/typedefs_ir.jl b/JuliaLowering/test/typedefs_ir.jl index d3e8fb0ae273c..521759d2be9e3 100644 --- a/JuliaLowering/test/typedefs_ir.jl +++ b/JuliaLowering/test/typedefs_ir.jl @@ -1,3 +1,13 @@ +using JuliaLowering: JuliaLowering, @ast, @chk +using JuliaSyntax + +function var"@atomic"(__context__::JuliaLowering.MacroContext, ex) + @chk kind(ex) == JuliaSyntax.K"Identifier" || kind(ex) == JuliaSyntax.K"::" (ex, + "Expected identifier or declaration") + @ast __context__ ex [K"atomic" ex] +end + +#******************************************************************************* ######################################## # where expression without type bounds A where X @@ -287,3 +297,101 @@ primitive type P P_nbits() end 16 (= TestMod.P %₄) 17 (return core.nothing) +######################################## +# Basic struct +struct X + a + b::T + c +end +#--------------------- +1 (global TestMod.X) +2 (const TestMod.X) +3 (call core.svec) +4 (call core.svec :a :b :c) +5 (call core.svec) +6 (call core._structtype TestMod :X %₃ %₄ %₅ false 3) +7 (= slot₁/X %₆) +8 (call core._setsuper! %₆ core.Any) +9 (isdefined TestMod.X) +10 (gotoifnot %₉ label₂₀) +11 TestMod.X +12 (call core._equiv_typedef %₁₁ %₆) +13 (gotoifnot %₁₂ label₁₇) +14 TestMod.X +15 (= slot₁/X %₁₄) +16 (goto label₁₉) +17 slot₁/X +18 (= TestMod.X %₁₇) +19 (goto label₂₂) +20 slot₁/X +21 (= TestMod.X %₂₀) +22 slot₁/X +23 TestMod.T +24 (call core.svec core.Any %₂₃ core.Any) +25 (call core._typebody! %₂₂ %₂₄) +26 (return core.nothing) + +######################################## +# Struct with supertype and type params +struct X{U, S <: V <: T} <: Z +end +#--------------------- +1 (global TestMod.X) +2 (const TestMod.X) +3 (= slot₂/U (call core.TypeVar :U)) +4 TestMod.S +5 TestMod.T +6 (= slot₃/V (call core.TypeVar :V %₄ %₅)) +7 slot₂/U +8 slot₃/V +9 (call core.svec %₇ %₈) +10 (call core.svec) +11 (call core.svec) +12 (call core._structtype TestMod :X %₉ %₁₀ %₁₁ false 0) +13 (= slot₄/X %₁₂) +14 TestMod.Z +15 (call core._setsuper! %₁₂ %₁₄) +16 (isdefined TestMod.X) +17 (gotoifnot %₁₆ label₃₇) +18 TestMod.X +19 (call core._equiv_typedef %₁₈ %₁₂) +20 (gotoifnot %₁₉ label₃₄) +21 TestMod.X +22 (= slot₄/X %₂₁) +23 TestMod.X +24 (call top.getproperty %₂₃ :body) +25 (call top.getproperty %₂₄ :body) +26 (call top.getproperty %₂₅ :parameters) +27 (call top.indexed_iterate %₂₆ 1) +28 (= slot₂/U (call core.getfield %₂₇ 1)) +29 (= slot₁/iterstate (call core.getfield %₂₇ 2)) +30 slot₁/iterstate +31 (call top.indexed_iterate %₂₆ 2 %₃₀) +32 (= slot₃/V (call core.getfield %₃₁ 1)) +33 (goto label₃₆) +34 slot₄/X +35 (= TestMod.X %₃₄) +36 (goto label₃₉) +37 slot₄/X +38 (= TestMod.X %₃₇) +39 slot₄/X +40 (call core.svec) +41 (call core._typebody! %₃₉ %₄₀) +42 (return core.nothing) + +######################################## +# Error: Struct not at top level +function f() + struct X + end +end +#--------------------- +LoweringError: +function f() +# ┌─────── + struct X + end +#─────┘ ── this syntax is only allowed in top level code +end + From 14e46c890ccebf01954e783a27091d08ea023a91 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Mon, 7 Oct 2024 20:49:54 +1000 Subject: [PATCH 0882/1109] Documentation binding for structs + test const/atomic struct fields --- JuliaLowering/src/desugaring.jl | 21 +++++++- JuliaLowering/src/runtime.jl | 29 +++++++++-- JuliaLowering/test/typedefs_ir.jl | 80 +++++++++++++++++++++++++++++++ 3 files changed, 125 insertions(+), 5 deletions(-) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 5c602fe587270..4f88a4e3aec57 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -1611,8 +1611,8 @@ function _collect_struct_fields(ctx, field_names, field_types, field_attrs, fiel m = _match_struct_field(e) if !isnothing(m) # Struct field - n = length(field_names) push!(field_names, @ast ctx m.name m.name=>K"Symbol") + n = length(field_names) push!(field_types, isnothing(m.type) ? @ast(ctx, e, "Any"::K"core") : m.type) if m.atomic push!(field_attrs, @ast ctx e n::K"Integer") @@ -1733,6 +1733,25 @@ function expand_struct_def(ctx, ex, docs) ] ] ] + # Inner constructors + # TODO + # [K"scope_block"(scope_type=:hard) + # [K"block" + # [K"global" struct_name] + # ] + # ] + if !isnothing(docs) || !isempty(field_docs) + [K"call"(isnothing(docs) ? ex : docs) + bind_docs!::K"Value" + struct_name + isnothing(docs) ? nothing_(ctx, ex) : docs[1] + QuoteNode(source_location(LineNumberNode, ex))::K"Value" + [K"=" + "field_docs"::K"Identifier" + [K"call" "svec"::K"core" field_docs...] + ] + ] + end nothing_(ctx, ex) ] end diff --git a/JuliaLowering/src/runtime.jl b/JuliaLowering/src/runtime.jl index dab1c4346e21c..2fd3018d98e4d 100644 --- a/JuliaLowering/src/runtime.jl +++ b/JuliaLowering/src/runtime.jl @@ -170,17 +170,38 @@ function bind_docs!(f::Function, docstr, method_metadata) bind = Base.Docs.Binding(mod, nameof(f)) full_sig = method_metadata[1] arg_sig = Tuple{full_sig[2:end]...} - linenum = method_metadata[3] + lineno = method_metadata[3] metadata = Dict{Symbol, Any}( - :linenumber => linenum.line, + :linenumber => lineno.line, :module => mod, ) - if !isnothing(linenum.file) - push!(metadata, :path => string(linenum.file)) + if !isnothing(lineno.file) + push!(metadata, :path => string(lineno.file)) end Docs.doc!(mod, bind, Base.Docs.docstr(docstr, metadata), arg_sig) end +function bind_docs!(type::Type, docstr, lineno; field_docs=Core.svec()) + mod = parentmodule(type) + bind = Base.Docs.Binding(mod, nameof(type)) + metadata = Dict{Symbol, Any}( + :linenumber => lineno, + :module => mod, + ) + if !isnothing(lineno.file) + push!(metadata, :path => string(lineno.file)) + end + if !isempty(field_docs) + fd = Dict{Symbol, Any}() + fns = fieldnames(type) + for i = 1:2:length(field_docs) + fd[fns[field_docs[i]]] = field_docs[i+1] + end + metadata[:fields] = fd + end + Docs.doc!(mod, bind, Base.Docs.docstr(docstr, metadata), Union{}) +end + #------------------------------------------------------------------------------- # The following functions are used by lowering to inspect Julia's state. diff --git a/JuliaLowering/test/typedefs_ir.jl b/JuliaLowering/test/typedefs_ir.jl index 521759d2be9e3..d254af4de4ed0 100644 --- a/JuliaLowering/test/typedefs_ir.jl +++ b/JuliaLowering/test/typedefs_ir.jl @@ -380,6 +380,86 @@ end 41 (call core._typebody! %₃₉ %₄₀) 42 (return core.nothing) +######################################## +# Struct with const and atomic fields +struct X + const a + @atomic b + const @atomic c +end +#--------------------- +1 (global TestMod.X) +2 (const TestMod.X) +3 (call core.svec) +4 (call core.svec :a :b :c) +5 (call core.svec 1 :const 2 :atomic 3 :atomic 3 :const) +6 (call core._structtype TestMod :X %₃ %₄ %₅ false 3) +7 (= slot₁/X %₆) +8 (call core._setsuper! %₆ core.Any) +9 (isdefined TestMod.X) +10 (gotoifnot %₉ label₂₀) +11 TestMod.X +12 (call core._equiv_typedef %₁₁ %₆) +13 (gotoifnot %₁₂ label₁₇) +14 TestMod.X +15 (= slot₁/X %₁₄) +16 (goto label₁₉) +17 slot₁/X +18 (= TestMod.X %₁₇) +19 (goto label₂₂) +20 slot₁/X +21 (= TestMod.X %₂₀) +22 slot₁/X +23 (call core.svec core.Any core.Any core.Any) +24 (call core._typebody! %₂₂ %₂₃) +25 (return core.nothing) + +######################################## +# Documented struct +""" +X docs +""" +struct X + "field a docs" + a + "field b docs" + b +end +#--------------------- +1 (global TestMod.X) +2 (const TestMod.X) +3 (call core.svec) +4 (call core.svec :a :b) +5 (call core.svec) +6 (call core._structtype TestMod :X %₃ %₄ %₅ false 2) +7 (= slot₁/X %₆) +8 (call core._setsuper! %₆ core.Any) +9 (isdefined TestMod.X) +10 (gotoifnot %₉ label₂₀) +11 TestMod.X +12 (call core._equiv_typedef %₁₁ %₆) +13 (gotoifnot %₁₂ label₁₇) +14 TestMod.X +15 (= slot₁/X %₁₄) +16 (goto label₁₉) +17 slot₁/X +18 (= TestMod.X %₁₇) +19 (goto label₂₂) +20 slot₁/X +21 (= TestMod.X %₂₀) +22 slot₁/X +23 (call core.svec core.Any core.Any) +24 (call core._typebody! %₂₂ %₂₃) +25 JuliaLowering.bind_docs! +26 (call core.tuple :field_docs) +27 (call core.apply_type core.NamedTuple %₂₆) +28 (call core.svec 1 "field a docs" 2 "field b docs") +29 (call core.tuple %₂₈) +30 (call %₂₇ %₂₉) +31 TestMod.X +32 (call core.kwcall %₃₀ %₂₅ %₃₁ "X docs\n" :($(QuoteNode(:(#= line 4 =#))))) +33 (return core.nothing) + ######################################## # Error: Struct not at top level function f() From 460390eea173c8547ae9dc7d4d1da1213473469a Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Wed, 9 Oct 2024 16:21:39 +1000 Subject: [PATCH 0883/1109] Generalize ast macro to allow ASTs nested in arbitrary expressions --- JuliaLowering/src/ast.jl | 59 +++++++++++++++++---------------- JuliaLowering/src/desugaring.jl | 15 ++++----- JuliaLowering/test/utils.jl | 5 +-- 3 files changed, 39 insertions(+), 40 deletions(-) diff --git a/JuliaLowering/src/ast.jl b/JuliaLowering/src/ast.jl index ba663f94a11f9..6fe3cd6e1a783 100644 --- a/JuliaLowering/src/ast.jl +++ b/JuliaLowering/src/ast.jl @@ -283,7 +283,7 @@ function _match_srcref(ex) end end -function _match_kind_ex(defs, srcref, ex) +function _match_kind(f::Function, srcref, ex) kws = [] if Meta.isexpr(ex, :call) kind = esc(ex.args[1]) @@ -296,22 +296,26 @@ function _match_kind_ex(defs, srcref, ex) pushfirst!(kws, esc(pop!(args))) end if length(args) == 1 - srcref = Symbol("srcref_$(length(defs))") - push!(defs, :($srcref = $(_match_srcref(args[1])))) + srcref_tmp = gensym("srcref") + return quote + $srcref_tmp = $(_match_srcref(args[1])) + $(f(kind, srcref_tmp, kws)) + end elseif length(args) > 1 error("Unexpected: extra srcref argument in `$ex`?") end else kind = esc(ex) end - kind, srcref, kws + f(kind, srcref, kws) end -function _expand_ast_tree(defs, ctx, srcref, tree) +function _expand_ast_tree(ctx, srcref, tree) if Meta.isexpr(tree, :(::)) # Leaf node - kind, srcref, kws = _match_kind_ex(defs, srcref, tree.args[2]) - :(makeleaf($ctx, $srcref, $kind, $(esc(tree.args[1])), $(kws...))) + _match_kind(srcref, tree.args[2]) do kind, srcref, kws + :(makeleaf($ctx, $srcref, $kind, $(esc(tree.args[1])), $(kws...))) + end elseif Meta.isexpr(tree, :call) && tree.args[1] === :(=>) # Leaf node with copied attributes kind = esc(tree.args[3]) @@ -327,20 +331,22 @@ function _expand_ast_tree(defs, ctx, srcref, tree) push!(flatargs, a) end end - kind, srcref, kws = _match_kind_ex(defs, srcref, flatargs[1]) - children = map(a->_expand_ast_tree(defs, ctx, srcref, a), flatargs[2:end]) - :(makenode($ctx, $srcref, $kind, $(children...), $(kws...))) - elseif Meta.isexpr(tree, :(=)) - lhs = esc(tree.args[1]) - rhs = _expand_ast_tree(defs, ctx, srcref, tree.args[2]) - ssadef = Symbol("ssadef$(length(defs))") - push!(defs, :(($lhs, $ssadef) = assign_tmp($ctx, $rhs))) - ssadef - elseif Meta.isexpr(tree, :if) - Expr(:if, esc(tree.args[1]), - map(a->_expand_ast_tree(defs, ctx, srcref, a), tree.args[2:end])...) - elseif Meta.isexpr(tree, (:block, :tuple)) - Expr(tree.head, map(a->_expand_ast_tree(defs, ctx, srcref, a), tree.args)...) + _match_kind(srcref, flatargs[1]) do kind, srcref, kws + children = map(a->_expand_ast_tree(ctx, srcref, a), flatargs[2:end]) + :(makenode($ctx, $srcref, $kind, $(children...), $(kws...))) + end + elseif Meta.isexpr(tree, :(:=)) + lhs = tree.args[1] + rhs = _expand_ast_tree(ctx, srcref, tree.args[2]) + ssadef = gensym("ssadef") + quote + ($(esc(lhs)), $ssadef) = assign_tmp($ctx, $rhs, $(string(lhs))) + $ssadef + end + elseif Meta.isexpr(tree, :macrocall) + esc(tree) + elseif tree isa Expr + Expr(tree.head, map(a->_expand_ast_tree(ctx, srcref, a), tree.args)...) else esc(tree) end @@ -359,7 +365,7 @@ The `tree` contains syntax of the following forms: * `value :: kind` - construct a leaf node * `ex => kind` - convert a leaf node to the given `kind`, copying attributes from it and also using `ex` as the source reference. -* `var=ex` - Set `var=ssavar(...)` and return an assignment node `\$var=ex`. +* `var := ex` - Set `var=ssavar(...)` and return an assignment node `\$var=ex`. `var` may be used outside `@ast` * `cond ? ex1 : ex2` - Conditional; `ex1` and `ex2` will be recursively expanded. `if ... end` and `if ... else ... end` also work with this. @@ -399,13 +405,10 @@ to indicate that the "primary" location of the source is the location where ``` """ macro ast(ctx, srcref, tree) - defs = [] - push!(defs, :(ctx = $(esc(ctx)))) - push!(defs, :(srcref = $(_match_srcref(srcref)))) - ex = _expand_ast_tree(defs, :ctx, :srcref, tree) quote - $(defs...) - $ex + ctx = $(esc(ctx)) + srcref = $(_match_srcref(srcref)) + $(_expand_ast_tree(:ctx, :srcref, tree)) end end diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 4f88a4e3aec57..35c318e549b9b 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -516,7 +516,7 @@ function expand_setindex(ctx, ex) end @ast ctx ex [K"block" stmts... - expand_forms_2(ctx, @ast ctx ex [K"call" + expand_forms_2(ctx, [K"call" "setindex!"::K"top" arr rhs @@ -707,9 +707,8 @@ function expand_let(ctx, ex) lhs = binding[1] rhs = binding[2] if is_sym_decl(lhs) - blk = @ast ctx binding [ - K"block" - tmp=rhs + blk = @ast ctx binding [K"block" + tmp := rhs [K"scope_block"(ex, scope_type=scope_type) # TODO: Use single child for scope_block? [K"local_def"(lhs) lhs] # TODO: Use K"local" with attr? @@ -830,10 +829,10 @@ end function expand_kw_call(ctx, srcref, farg, args, kws) @ast ctx srcref [K"block" - func = farg - kw_container = expand_named_tuple(ctx, srcref, kws; - field_name="keyword argument", - element_name="keyword argument") + func := farg + kw_container := expand_named_tuple(ctx, srcref, kws; + field_name="keyword argument", + element_name="keyword argument") if all(kind(kw) == K"..." for kw in kws) # In this case need to check kws nonempty at runtime [K"if" diff --git a/JuliaLowering/test/utils.jl b/JuliaLowering/test/utils.jl index 84428e648fd94..4747b6994fab0 100644 --- a/JuliaLowering/test/utils.jl +++ b/JuliaLowering/test/utils.jl @@ -29,13 +29,10 @@ function _source_node(graph, src) end macro ast_(tree) - defs = [] - ex = JuliaLowering._expand_ast_tree(defs, :graph, :srcref, tree) quote graph = _ast_test_graph() srcref = _source_node(graph, $(QuoteNode(__source__))) - $(defs...) - $ex + $(JuliaLowering._expand_ast_tree(:graph, :srcref, tree)) end end From 94e41f7909ff9a144da0019d397ab74c88e74e84 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sat, 12 Oct 2024 22:03:15 +1000 Subject: [PATCH 0884/1109] Static parameters in function definitions --- JuliaLowering/src/desugaring.jl | 125 +++++++++++++++++----------- JuliaLowering/src/eval.jl | 2 + JuliaLowering/src/kinds.jl | 2 + JuliaLowering/src/linear_ir.jl | 30 ++++--- JuliaLowering/src/scope_analysis.jl | 15 +++- JuliaLowering/src/syntax_graph.jl | 1 + JuliaLowering/test/demo.jl | 25 +++++- JuliaLowering/test/functions.jl | 25 ++++-- JuliaLowering/test/functions_ir.jl | 99 ++++++++++++++++++++++ 9 files changed, 252 insertions(+), 72 deletions(-) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 35c318e549b9b..585cc2cd993f4 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -1259,29 +1259,57 @@ function analyze_function_arg(full_ex) is_nospecialize=is_nospecialize) end +function _split_wheres!(ctx, typevar_names, typevar_stmts, ex) + if kind(ex) == K"where" && numchildren(ex) == 2 + params = kind(ex[2]) == K"braces" ? ex[2][1:end] : ex[2:2] + for param in params + bounds = analyze_typevar(ctx, param) + n = bounds[1] + push!(typevar_names, n) + push!(typevar_stmts, @ast ctx param [K"local" n]) + push!(typevar_stmts, @ast ctx param [K"=" n bounds_to_TypeVar(ctx, param, bounds)]) + end + _split_wheres!(ctx, typevar_names, typevar_stmts, ex[1]) + else + ex + end +end + function expand_function_def(ctx, ex, docs) @chk numchildren(ex) in (1,2) name = ex[1] + if numchildren(ex) == 1 && is_identifier_like(name) + # Function declaration with no methods + if !is_valid_name(name) + throw(LoweringError(name, "Invalid function name")) + end + return @ast ctx ex [K"method" name=>K"Symbol"] + end + + typevar_names = SyntaxList(ctx) + typevar_stmts = SyntaxList(ctx) if kind(name) == K"where" - TODO("where handling") + # `where` vars end up in two places + # 1. Argument types - the `T` in `x::T` becomes a `TypeVar` parameter in + # the method sig, eg, `function f(x::T) where T ...`. These define the + # static parameters of the method. + # 2. In the method body - either explicitly or implicitly via the method + # return type or default arguments - where `T` turns up as the *name* of + # a special slot of kind ":static_parameter" + name = _split_wheres!(ctx, typevar_names, typevar_stmts, name) end + return_type = nothing if kind(name) == K"::" @chk numchildren(name) == 2 return_type = name[2] name = name[1] end - if numchildren(ex) == 1 && is_identifier_like(name) - # Function declaration with no methods - if !is_valid_name(name) - throw(LoweringError(name, "Invalid function name")) - end - return @ast ctx ex [K"method" name=>K"Symbol"] - elseif kind(name) == K"call" + + if kind(name) == K"call" callex = name body = ex[2] # TODO - # static params # nospecialize # argument destructuring # dotop names @@ -1325,9 +1353,6 @@ function expand_function_def(ctx, ex, docs) end args = pushfirst!(collect(args), farg) - # preamble is arbitrary code which computes - # svec(types, sparms, location) - arg_names = SyntaxList(ctx) arg_types = SyntaxList(ctx) for (i,arg) in enumerate(args) @@ -1345,19 +1370,6 @@ function expand_function_def(ctx, ex, docs) push!(arg_types, atype) end - preamble = @ast ctx callex [ - K"call" - "svec" ::K"core" - [K"call" - "svec" ::K"core" - arg_types... - ] - [K"call" - "svec" ::K"core" - # FIXME sparams - ] - QuoteNode(source_location(LineNumberNode, callex))::K"Value" - ] if !isnothing(return_type) ret_var = ssavar(ctx, return_type, "return_type") body = @ast ctx body [ @@ -1368,25 +1380,41 @@ function expand_function_def(ctx, ex, docs) else ret_var = nothing end - @ast ctx ex [ - K"block" - func_var_assignment - [K"method" - function_name - preamble - [K"lambda"(body, lambda_info=LambdaInfo(arg_names, static_parameters, ret_var, false)) - body + + @ast ctx ex [K"scope_block"(scope_type=:hard) + [K"block" + func_var_assignment + typevar_stmts... + # metadata contains svec(types, sparms, location) + method_metadata := [K"call"(callex) + "svec" ::K"core" + [K"call" + "svec" ::K"core" + arg_types... + ] + [K"call" + "svec" ::K"core" + typevar_names... + ] + QuoteNode(source_location(LineNumberNode, callex))::K"Value" ] - ] - if !isnothing(docs) - [K"call"(docs) - bind_docs!::K"Value" - func_var - docs[1] + [K"method" + function_name method_metadata + [K"lambda"(body, lambda_info=LambdaInfo(arg_names, typevar_names, ret_var, false)) + body + ] ] - end - [K"unnecessary" func_var] + if !isnothing(docs) + [K"call"(docs) + bind_docs!::K"Value" + func_var + docs[1] + method_metadata + ] + end + [K"unnecessary" func_var] + ] ] elseif kind(name) == K"tuple" TODO(name, "Anon function lowering") @@ -1485,7 +1513,12 @@ end # Analyze type signatures such as `A <: B where C` # -# Return (name, typevar_names, typevar_stmts, supertype) +# Return (name, typevar_names, typevar_stmts, supertype) where +# - `name` is the name of the type +# - `typevar_names` are the names of the types's type parameters +# - `typevar_stmts` are a list of statements to define a `TypeVar` for each +# name in `typevar_names`, to be emitted prior to uses of `typevar_names` +# - `supertype` is the super type of the type function analyze_type_sig(ctx, ex) k = kind(ex) if k == K"Identifier" @@ -1610,7 +1643,7 @@ function _collect_struct_fields(ctx, field_names, field_types, field_attrs, fiel m = _match_struct_field(e) if !isnothing(m) # Struct field - push!(field_names, @ast ctx m.name m.name=>K"Symbol") + push!(field_names, m.name) n = length(field_names) push!(field_types, isnothing(m.type) ? @ast(ctx, e, "Any"::K"core") : m.type) if m.atomic @@ -1693,7 +1726,7 @@ function expand_struct_def(ctx, ex, docs) ctx.mod::K"Value" struct_name=>K"Symbol" [K"call"(type_sig) "svec"::K"core" typevar_names...] - [K"call"(type_body) "svec"::K"core" field_names...] + [K"call"(type_body) "svec"::K"core" [n=>K"Symbol" for n in field_names]...] [K"call"(type_body) "svec"::K"core" field_attrs...] is_mutable::K"Bool" min_initialized::K"Integer" @@ -1711,7 +1744,7 @@ function expand_struct_def(ctx, ex, docs) # NB away the new type [K"=" struct_name outer_struct_var] if !isempty(typevar_names) - # And resassign the typevars - these may be + # And resassign the typevar_names - these may be # referenced in the definition of the field # types below [K"=" @@ -2004,8 +2037,6 @@ function expand_forms_2(ctx::DesugaringContext, ex::SyntaxTree, docs=nothing) end elseif k == K"where" expand_forms_2(ctx, expand_wheres(ctx, ex)) - elseif is_operator(k) && is_leaf(ex) - makeleaf(ctx, ex, K"Identifier", ex.name_val) elseif k == K"char" || k == K"var" @chk numchildren(ex) == 1 ex[1] diff --git a/JuliaLowering/src/eval.jl b/JuliaLowering/src/eval.jl index c8378a9f82d76..cb55439b3ceed 100644 --- a/JuliaLowering/src/eval.jl +++ b/JuliaLowering/src/eval.jl @@ -199,6 +199,8 @@ function to_lowered_expr(mod, ex) QuoteNode(Symbol(ex.name_val)) elseif k == K"slot" Core.SlotNumber(ex.var_id) + elseif k == K"static_parameter" + Expr(:static_parameter, ex.var_id) elseif k == K"SSAValue" Core.SSAValue(ex.var_id) elseif k == K"return" diff --git a/JuliaLowering/src/kinds.jl b/JuliaLowering/src/kinds.jl index 6d0ab18c05b82..a3618f2c1cbdd 100644 --- a/JuliaLowering/src/kinds.jl +++ b/JuliaLowering/src/kinds.jl @@ -74,6 +74,8 @@ function _register_kinds() "SSAValue" # Local variable in a `CodeInfo` code object (including lambda arguments) "slot" + # Static parameter to a `CodeInfo` code object ("type parameters" to methods) + "static_parameter" # Reference to a global variable within a module "globalref" # Unconditional goto diff --git a/JuliaLowering/src/linear_ir.jl b/JuliaLowering/src/linear_ir.jl index 388dde1c16b7c..e84ce0b605d57 100644 --- a/JuliaLowering/src/linear_ir.jl +++ b/JuliaLowering/src/linear_ir.jl @@ -846,19 +846,18 @@ function _renumber(ctx, ssa_rewrites, slot_rewrites, label_table, ex) if haskey(ssa_rewrites, id) makeleaf(ctx, ex, K"SSAValue"; var_id=ssa_rewrites[id]) else - slot_id = get(slot_rewrites, id, nothing) - if !isnothing(slot_id) - makeleaf(ctx, ex, K"slot"; var_id=slot_id) + new_id = get(slot_rewrites, id, nothing) + binfo = lookup_binding(ctx, id) + if !isnothing(new_id) + sk = binfo.kind == :local || binfo.kind == :argument ? K"slot" : + binfo.kind == :static_parameter ? K"static_parameter" : + throw(LoweringError(ex, "Found unexpected binding of kind $(binfo.kind)")) + makeleaf(ctx, ex, sk; var_id=new_id) else - # TODO: look up any static parameters - # TODO: Should we defer rewriting globals to globalref until - # CodeInfo generation? - info = lookup_binding(ctx, id) - if info.kind === :global - makeleaf(ctx, ex, K"globalref", info.name, mod=info.mod) - else - TODO(ex, "Bindings of kind $(info.kind)") + if binfo.kind !== :global + throw(LoweringError(ex, "Found unexpected binding of kind $(binfo.kind)")) end + makeleaf(ctx, ex, K"globalref", binfo.name, mod=binfo.mod) end end elseif k == K"outerref" || k == K"meta" @@ -946,10 +945,17 @@ function compile_lambda(outer_ctx, ex) # Sorting the lambda locals is required to remove dependence on Dict iteration order. for id in sort(collect(ex.lambda_locals)) info = lookup_binding(ctx.bindings, id) - @assert info.kind == :local || info.kind == :argument + @assert info.kind == :local push!(slots, Slot(info.name)) slot_rewrites[id] = length(slots) end + for (i,arg) in enumerate(lambda_info.static_parameters) + @assert kind(arg) == K"BindingId" + id = arg.var_id + info = lookup_binding(ctx.bindings, id) + @assert info.kind == :static_parameter + slot_rewrites[id] = i + end # @info "" @ast ctx ex [K"block" ctx.code] code = renumber_body(ctx, ctx.code, slot_rewrites) makenode(ctx, ex, K"code_info", diff --git a/JuliaLowering/src/scope_analysis.jl b/JuliaLowering/src/scope_analysis.jl index 8b00b895d4969..1d514169f2736 100644 --- a/JuliaLowering/src/scope_analysis.jl +++ b/JuliaLowering/src/scope_analysis.jl @@ -212,12 +212,19 @@ function analyze_scope(ctx, ex, scope_type, lambda_info) # parent scope. var_ids = Dict{NameKey,IdTag}() - # Add lambda arguments + # Add lambda arguments and static parameters function add_lambda_args(args, var_kind) - for a in args - ka = kind(a) + for arg in args + ka = kind(arg) if ka == K"Identifier" - varkey = NameKey(a) + varkey = NameKey(arg) + if haskey(var_ids, varkey) + vk = lookup_binding(ctx, var_ids[varkey]).kind + msg = vk == :argument && var_kind == vk ? "function argument name not unique" : + vk == :static_parameter && var_kind == vk ? "function static parameter name not unique" : + "static parameter name not distinct from function argument" + throw(LoweringError(arg, msg)) + end var_ids[varkey] = init_binding(ctx, varkey, var_kind) elseif ka != K"BindingId" && ka != K"Placeholder" throw(LoweringError(a, "Unexpected lambda arg kind")) diff --git a/JuliaLowering/src/syntax_graph.jl b/JuliaLowering/src/syntax_graph.jl index 2f37e80194bb4..10786ad8fae77 100644 --- a/JuliaLowering/src/syntax_graph.jl +++ b/JuliaLowering/src/syntax_graph.jl @@ -417,6 +417,7 @@ function _value_string(ex) k == K"Symbol" ? ":$(ex.name_val)" : k == K"globalref" ? "$(ex.mod).$(ex.name_val)" : k == K"slot" ? "slot" : + k == K"static_parameter" ? "static_parameter" : k == K"symbolic_label" ? "label:$(ex.name_val)" : k == K"symbolic_goto" ? "goto:$(ex.name_val)" : repr(get(ex, :value, nothing)) diff --git a/JuliaLowering/test/demo.jl b/JuliaLowering/test/demo.jl index e064b5f227116..9c3a496898670 100644 --- a/JuliaLowering/test/demo.jl +++ b/JuliaLowering/test/demo.jl @@ -77,7 +77,7 @@ baremodule M @ast __context__ __context__.macroname [K"extension" "locals"::K"Symbol"] end - JuliaLowering.include(M, "demo_include.jl") + # JuliaLowering.include(M, "demo_include.jl") end #------------------------------------------------------------------------------- @@ -526,7 +526,25 @@ end """ src = """ -abstract type Abstract1 end +"some docs" +function f() + println("hi") +end +""" + +src = """ +struct X{U,V} + x::U + y::V +end +""" + +src = """ +function f(::T, ::U, ::S) where T where {U,S} + println(T) + println(U) + println(S) +end """ ex = parsestmt(SyntaxTree, src, filename="foo.jl") @@ -534,7 +552,8 @@ ex = ensure_attributes(ex, var_id=Int) #ex = softscope_test(ex) @info "Input code" formatsrc(ex) -in_mod = Main +module MMM end +in_mod = MMM ctx1, ex_macroexpand = JuliaLowering.expand_forms_1(in_mod, ex) @info "Macro expanded" ex_macroexpand formatsrc(ex_macroexpand, color_by=:scope_layer) #@info "Macro expanded" formatsrc(ex_macroexpand, color_by=e->JuliaLowering.flattened_provenance(e)[1:end-1]) diff --git a/JuliaLowering/test/functions.jl b/JuliaLowering/test/functions.jl index 16fe6e22025d4..c6da226834478 100644 --- a/JuliaLowering/test/functions.jl +++ b/JuliaLowering/test/functions.jl @@ -23,12 +23,12 @@ end # Keyword calls Base.eval(test_mod, :( begin - function f(; kws...) + function kwtest(; kws...) values(kws) end - function f() - "non-kw version of f" + function kwtest() + "non-kw version of kwtest" end end )) @@ -37,16 +37,16 @@ end @test JuliaLowering.include_string(test_mod, """ let kws = (c=3,d=4) - f(; kws..., a=1, d=0, e=5) + kwtest(; kws..., a=1, d=0, e=5) end """) == (c=3, d=0, a=1, e=5) @test JuliaLowering.include_string(test_mod, """ let kws = (;) - f(; kws..., kws...) + kwtest(; kws..., kws...) end -""") == "non-kw version of f" +""") == "non-kw version of kwtest" # literal_pow @test JuliaLowering.include_string(test_mod, """ @@ -87,6 +87,19 @@ begin end """) === (42, 255) +# static parameters +@test JuliaLowering.include_string(test_mod, """ +begin + function h(x, y) + "fallback" + end + function h(::Vector{T}, ::S) where {T, S <: T} + T, S + end + (h(1, 2), h(Number[0xff], 1.0), h(Int[1], 1), h(Int[1], 1.0)) +end +""") === ("fallback", (Number, Float64), (Int, Int), "fallback") + Base.include_string(test_mod, """ struct X end diff --git a/JuliaLowering/test/functions_ir.jl b/JuliaLowering/test/functions_ir.jl index 52ed60ba540da..24e4f815df693 100644 --- a/JuliaLowering/test/functions_ir.jl +++ b/JuliaLowering/test/functions_ir.jl @@ -96,6 +96,87 @@ function f(xs..., y) body end +######################################## +# Basic static parameters +function f(::T, ::U, ::V) where T where {U,V} + (T,U,V) +end +#--------------------- +1 (method :f) +2 (= slot₂/U (call core.TypeVar :U)) +3 (= slot₃/V (call core.TypeVar :V)) +4 (= slot₁/T (call core.TypeVar :T)) +5 (call core.Typeof %₁) +6 slot₁/T +7 slot₂/U +8 slot₃/V +9 (call core.svec %₅ %₆ %₇ %₈) +10 slot₂/U +11 slot₃/V +12 slot₁/T +13 (call core.svec %₁₀ %₁₁ %₁₂) +14 (call core.svec %₉ %₁₃ :($(QuoteNode(:(#= line 1 =#))))) +15 --- method :f %₁₄ + 1 static_parameter₃ + 2 static_parameter₁ + 3 static_parameter₂ + 4 (call core.tuple %₁ %₂ %₃) + 5 (return %₄) +16 (return %₁) + +######################################## +# Static parameter with bounds and used with apply_type in argument +function f(::S{T}) where X <: T <: Y + T +end +#--------------------- +1 (method :f) +2 TestMod.X +3 TestMod.Y +4 (= slot₁/T (call core.TypeVar :T %₂ %₃)) +5 (call core.Typeof %₁) +6 TestMod.S +7 slot₁/T +8 (call core.apply_type %₆ %₇) +9 (call core.svec %₅ %₈) +10 slot₁/T +11 (call core.svec %₁₀) +12 (call core.svec %₉ %₁₁ :($(QuoteNode(:(#= line 1 =#))))) +13 --- method :f %₁₂ + 1 static_parameter₁ + 2 (return %₁) +14 (return %₁) + +######################################## +# Error: Duplicate function argument names +function f(x, x) +end +#--------------------- +LoweringError: +function f(x, x) +# ╙ ── function argument name not unique +end + +######################################## +# Error: Static parameter name not unique +function f() where T where T +end +#--------------------- +LoweringError: +function f() where T where T +# ╙ ── function static parameter name not unique +end + +######################################## +# Error: static parameter colliding with argument names +function f(x::x) where x +end +#--------------------- +LoweringError: +function f(x::x) where x +# ╙ ── static parameter name not distinct from function argument +end + ######################################## # Return types function f(x)::Int @@ -218,3 +299,21 @@ x^42.0 3 (call %₁ %₂ 42.0) 4 (return %₃) +######################################## +# Binding docs to functions +""" +some docs +""" +function f() +end +#--------------------- +1 (method :f) +2 (call core.Typeof %₁) +3 (call core.svec %₂) +4 (call core.svec) +5 (call core.svec %₃ %₄ :($(QuoteNode(:(#= line 4 =#))))) +6 --- method :f %₅ + 1 (return core.nothing) +7 (call JuliaLowering.bind_docs! %₁ "some docs\n" %₅) +8 (return %₁) + From 1fe1fb155aee2b397a4440fa6a4446a80c2402ae Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sun, 13 Oct 2024 07:42:18 +1000 Subject: [PATCH 0885/1109] Move args & sparams into lambda form and remove `LambdaInfo` This form where `K"lambda"` has four children [args, static_parameters, body, ret_var] feels more natural as it keeps AST pieces within the AST rather than as auxiliary attributes. These pieces do still need special treatment in scope resolution, but lambdas are already special there. --- JuliaLowering/src/desugaring.jl | 20 +++++-------- JuliaLowering/src/eval.jl | 10 +++---- JuliaLowering/src/linear_ir.jl | 23 ++++++++------- JuliaLowering/src/scope_analysis.jl | 45 +++++++++++++++++------------ 4 files changed, 52 insertions(+), 46 deletions(-) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 585cc2cd993f4..63a585fa5b5bb 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -1,13 +1,5 @@ # Lowering Pass 2 - syntax desugaring -struct LambdaInfo - # TODO: Make SyntaxList concretely typed? - args::SyntaxList - static_parameters::SyntaxList - ret_var::Union{Nothing,SyntaxTree} - is_toplevel_thunk::Bool -end - struct DesugaringContext{GraphType} <: AbstractLoweringContext graph::GraphType bindings::Bindings @@ -22,7 +14,7 @@ function DesugaringContext(ctx) value=Any, name_val=String, scope_type=Symbol, # :hard or :soft var_id=IdTag, - lambda_info=LambdaInfo) + is_toplevel_thunk=Bool) DesugaringContext(graph, ctx.bindings, ctx.scope_layers, ctx.current_layer.mod) end @@ -1324,11 +1316,10 @@ function expand_function_def(ctx, ex, docs) if kind(name) == K"::" if numchildren(name) == 1 farg = @ast ctx name [K"::" - new_mutable_var(ctx, name, "#self#") + "#self#"::K"Placeholder" name[1] ] else - TODO("Fixme type") farg = name end else @@ -1340,7 +1331,7 @@ function expand_function_def(ctx, ex, docs) func_var_assignment = @ast ctx name [K"=" func_var [K"method" function_name]] end farg = @ast ctx name [K"::" - new_mutable_var(ctx, name, "#self#") + "#self#"::K"Placeholder" [K"call" "Typeof"::K"core" func_var @@ -1401,8 +1392,11 @@ function expand_function_def(ctx, ex, docs) [K"method" function_name method_metadata - [K"lambda"(body, lambda_info=LambdaInfo(arg_names, typevar_names, ret_var, false)) + [K"lambda"(body, is_toplevel_thunk=false) + [K"block" arg_names...] + [K"block" typevar_names...] body + ret_var # might be `nothing` and hence removed ] ] if !isnothing(docs) diff --git a/JuliaLowering/src/eval.jl b/JuliaLowering/src/eval.jl index cb55439b3ceed..0111d3e1db384 100644 --- a/JuliaLowering/src/eval.jl +++ b/JuliaLowering/src/eval.jl @@ -99,7 +99,7 @@ end # Convert SyntaxTree to the CodeInfo+Expr data stuctures understood by the # Julia runtime -function to_code_info(ex, mod, funcname, nargs, slots) +function to_code_info(ex, mod, funcname, slots) input_code = children(ex) code = Any[to_lowered_expr(mod, ex) for ex in input_code] @@ -111,6 +111,7 @@ function to_code_info(ex, mod, funcname, nargs, slots) # - call site @assume_effects ssaflags = zeros(UInt32, length(code)) + nargs = sum((s.kind==:argument for s in slots), init=0) slotnames = Vector{Symbol}(undef, length(slots)) slot_rename_inds = Dict{String,Int}() slotflags = Vector{UInt8}(undef, length(slots)) @@ -212,12 +213,11 @@ function to_lowered_expr(mod, ex) TODO(ex, "Convert SyntaxTree to Expr") end elseif k == K"code_info" - funcname = ex.lambda_info.is_toplevel_thunk ? + funcname = ex.is_toplevel_thunk ? "top-level scope" : "none" # FIXME - nargs = length(ex.lambda_info.args) - ir = to_code_info(ex[1], mod, funcname, nargs, ex.slots) - if ex.lambda_info.is_toplevel_thunk + ir = to_code_info(ex[1], mod, funcname, ex.slots) + if ex.is_toplevel_thunk Expr(:thunk, ir) else ir diff --git a/JuliaLowering/src/linear_ir.jl b/JuliaLowering/src/linear_ir.jl index e84ce0b605d57..3595f876193b3 100644 --- a/JuliaLowering/src/linear_ir.jl +++ b/JuliaLowering/src/linear_ir.jl @@ -919,26 +919,29 @@ end struct Slot name::String + kind::Symbol # <- todo: flags here etc end function compile_lambda(outer_ctx, ex) - lambda_info = ex.lambda_info - # TODO: Add assignments for reassigned arguments to body using lambda_info.args - ctx = LinearIRContext(outer_ctx, lambda_info.is_toplevel_thunk, ex.lambda_locals, lambda_info.ret_var) - compile_body(ctx, ex[1]) + lambda_args = ex[1] + static_parameters = ex[2] + ret_var = numchildren(ex) == 4 ? ex[4] : nothing + # TODO: Add assignments for reassigned arguments to body using lambda_args + ctx = LinearIRContext(outer_ctx, ex.is_toplevel_thunk, ex.lambda_locals, ret_var) + compile_body(ctx, ex[3]) slots = Vector{Slot}() slot_rewrites = Dict{IdTag,Int}() - for arg in lambda_info.args + for arg in children(lambda_args) if kind(arg) == K"Placeholder" # Unused functions arguments like: `_` or `::T` - push!(slots, Slot(arg.name_val)) + push!(slots, Slot(arg.name_val, :argument)) else @assert kind(arg) == K"BindingId" id = arg.var_id info = lookup_binding(ctx.bindings, id) @assert info.kind == :local || info.kind == :argument - push!(slots, Slot(info.name)) + push!(slots, Slot(info.name, :argument)) slot_rewrites[id] = length(slots) end end @@ -946,10 +949,10 @@ function compile_lambda(outer_ctx, ex) for id in sort(collect(ex.lambda_locals)) info = lookup_binding(ctx.bindings, id) @assert info.kind == :local - push!(slots, Slot(info.name)) + push!(slots, Slot(info.name, :local)) slot_rewrites[id] = length(slots) end - for (i,arg) in enumerate(lambda_info.static_parameters) + for (i,arg) in enumerate(children(static_parameters)) @assert kind(arg) == K"BindingId" id = arg.var_id info = lookup_binding(ctx.bindings, id) @@ -960,7 +963,7 @@ function compile_lambda(outer_ctx, ex) code = renumber_body(ctx, ctx.code, slot_rewrites) makenode(ctx, ex, K"code_info", makenode(ctx, ex[1], K"block", code), - lambda_info=lambda_info, + is_toplevel_thunk=ex.is_toplevel_thunk, slots=slots ) end diff --git a/JuliaLowering/src/scope_analysis.jl b/JuliaLowering/src/scope_analysis.jl index 1d514169f2736..e8be823968575 100644 --- a/JuliaLowering/src/scope_analysis.jl +++ b/JuliaLowering/src/scope_analysis.jl @@ -199,10 +199,10 @@ end # Analyze identifier usage within a scope, adding all newly discovered # identifiers to ctx.bindings and returning a lookup table from identifier # names to their variable IDs -function analyze_scope(ctx, ex, scope_type, lambda_info) +function analyze_scope(ctx, ex, scope_type, is_toplevel_global_scope=false, + lambda_args=nothing, lambda_static_parameters=nothing) parentscope = isempty(ctx.scope_stack) ? nothing : ctx.scope_stack[end] is_outer_lambda_scope = kind(ex) == K"lambda" - is_toplevel_global_scope = !isnothing(lambda_info) && lambda_info.is_toplevel_thunk in_toplevel_thunk = is_toplevel_global_scope || (!is_outer_lambda_scope && parentscope.in_toplevel_thunk) @@ -231,9 +231,9 @@ function analyze_scope(ctx, ex, scope_type, lambda_info) end end end - if !isnothing(lambda_info) - add_lambda_args(lambda_info.args, :argument) - add_lambda_args(lambda_info.static_parameters, :static_parameter) + if !isnothing(lambda_args) + add_lambda_args(lambda_args, :argument) + add_lambda_args(lambda_static_parameters, :static_parameter) end global_keys = Set(first(g) for g in globals) @@ -410,20 +410,26 @@ function _resolve_scopes(ctx, ex::SyntaxTree) update_binding!(ctx, id; is_always_defined=true) makeleaf(ctx, ex, K"TOMBSTONE") elseif k == K"lambda" - lambda_info = ex.lambda_info - scope = analyze_scope(ctx, ex, nothing, lambda_info) + is_toplevel_thunk = ex.is_toplevel_thunk + scope = analyze_scope(ctx, ex, nothing, is_toplevel_thunk, + children(ex[1]), children(ex[2])) + push!(ctx.scope_stack, scope) - arg_bindings = _resolve_scopes(ctx, lambda_info.args) - sparm_bindings = _resolve_scopes(ctx, lambda_info.static_parameters) - body = _resolve_scopes(ctx, only(children(ex))) + arg_bindings = _resolve_scopes(ctx, ex[1]) + sparm_bindings = _resolve_scopes(ctx, ex[2]) + body = _resolve_scopes(ctx, ex[3]) + ret_var = numchildren(ex) == 4 ? _resolve_scopes(ctx, ex[4]) : nothing pop!(ctx.scope_stack) - # TODO: add a lambda locals field to lambda_info or make a new struct - # containing the additional info ?? - new_info = LambdaInfo(arg_bindings, sparm_bindings, - lambda_info.ret_var, lambda_info.is_toplevel_thunk) - makenode(ctx, ex, K"lambda", body; lambda_info=new_info, lambda_locals=scope.lambda_locals) + + @ast ctx ex [K"lambda"(lambda_locals=scope.lambda_locals, + is_toplevel_thunk=is_toplevel_thunk) + arg_bindings + sparm_bindings + body + ret_var + ] elseif k == K"scope_block" - scope = analyze_scope(ctx, ex, ex.scope_type, nothing) + scope = analyze_scope(ctx, ex, ex.scope_type) push!(ctx.scope_stack, scope) body = SyntaxList(ctx) for e in children(ex) @@ -521,8 +527,11 @@ function _resolve_scopes(ctx, exs::AbstractVector) end function resolve_scopes(ctx::ScopeResolutionContext, ex) - thunk = makenode(ctx, ex, K"lambda", ex; - lambda_info=LambdaInfo(SyntaxList(ctx), SyntaxList(ctx), nothing, true)) + thunk = @ast ctx ex [K"lambda"(is_toplevel_thunk=true) + [K"block"] + [K"block"] + ex + ] return _resolve_scopes(ctx, thunk) end From 1a94577b296451dc7f0e3dcd2f584ea1261416c2 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sun, 13 Oct 2024 14:37:14 +1000 Subject: [PATCH 0886/1109] Cleanup to function #self# name and type Avoid creating `::` expressions - just add these directly to the function argument name and type lists instead. --- JuliaLowering/src/desugaring.jl | 58 ++++++++++++++++----------------- 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 63a585fa5b5bb..f7bc7713e018d 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -1311,16 +1311,34 @@ function expand_function_def(ctx, ex, docs) # Add self argument where necessary args = callex[2:end] name = callex[1] + + arg_names = SyntaxList(ctx) + arg_types = SyntaxList(ctx) + for (i,arg) in enumerate(args) + info = analyze_function_arg(arg) + aname = !isnothing(info.name) ? info.name : @ast ctx arg "_"::K"Placeholder" + push!(arg_names, aname) + atype = !isnothing(info.type) ? info.type : Any_type(ctx, arg) + @assert !info.is_nospecialize # TODO + if info.is_slurp + if i != length(args) + throw(LoweringError(arg, "`...` may only be used for the last function argument")) + end + atype = @ast ctx arg [K"curly" "Vararg"::K"core" atype] + end + push!(arg_types, atype) + end + function_name = nothing func_var = ssavar(ctx, name, "func_var") if kind(name) == K"::" if numchildren(name) == 1 - farg = @ast ctx name [K"::" - "#self#"::K"Placeholder" - name[1] - ] + farg_name = @ast ctx name "#self#"::K"Placeholder" + farg_type = name[1] else - farg = name + @chk numchildren(name) == 2 + farg_name = name[1] + farg_type = name[2] end else if !is_valid_name(name) @@ -1330,36 +1348,18 @@ function expand_function_def(ctx, ex, docs) function_name = @ast ctx name name=>K"Symbol" func_var_assignment = @ast ctx name [K"=" func_var [K"method" function_name]] end - farg = @ast ctx name [K"::" - "#self#"::K"Placeholder" - [K"call" - "Typeof"::K"core" - func_var - ] + farg_name = @ast ctx callex "#self#"::K"Placeholder" + farg_type = @ast ctx callex [K"call" + "Typeof"::K"core" + func_var ] end + pushfirst!(arg_names, farg_name) + pushfirst!(arg_types, farg_type) if isnothing(function_name) function_name = nothing_(ctx, name) func_var_assignment = @ast ctx name [K"=" func_var name] end - args = pushfirst!(collect(args), farg) - - arg_names = SyntaxList(ctx) - arg_types = SyntaxList(ctx) - for (i,arg) in enumerate(args) - info = analyze_function_arg(arg) - aname = !isnothing(info.name) ? info.name : @ast ctx arg "_"::K"Placeholder" - push!(arg_names, aname) - atype = !isnothing(info.type) ? info.type : Any_type(ctx, arg) - @assert !info.is_nospecialize # TODO - if info.is_slurp - if i != length(args) - throw(LoweringError(arg, "`...` may only be used for the last function argument")) - end - atype = @ast ctx arg [K"curly" "Vararg"::K"core" atype] - end - push!(arg_types, atype) - end if !isnothing(return_type) ret_var = ssavar(ctx, return_type, "return_type") From 149a13d089d45959ddff06812f031e90e040eccc Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sun, 13 Oct 2024 17:52:27 +1000 Subject: [PATCH 0887/1109] Desugaring of callable objects --- JuliaLowering/src/desugaring.jl | 13 ++++++------ JuliaLowering/test/functions_ir.jl | 32 ++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 6 deletions(-) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index f7bc7713e018d..f932d48181d03 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -1340,13 +1340,18 @@ function expand_function_def(ctx, ex, docs) farg_name = name[1] farg_type = name[2] end + function_name = nothing_(ctx, name) + function_obj = farg_type else if !is_valid_name(name) throw(LoweringError(name, "Invalid function name")) end if is_identifier_like(name) function_name = @ast ctx name name=>K"Symbol" - func_var_assignment = @ast ctx name [K"=" func_var [K"method" function_name]] + function_obj = @ast ctx name [K"method" function_name] + else + function_name = nothing_(ctx, name) + function_obj = name end farg_name = @ast ctx callex "#self#"::K"Placeholder" farg_type = @ast ctx callex [K"call" @@ -1356,10 +1361,6 @@ function expand_function_def(ctx, ex, docs) end pushfirst!(arg_names, farg_name) pushfirst!(arg_types, farg_type) - if isnothing(function_name) - function_name = nothing_(ctx, name) - func_var_assignment = @ast ctx name [K"=" func_var name] - end if !isnothing(return_type) ret_var = ssavar(ctx, return_type, "return_type") @@ -1374,7 +1375,7 @@ function expand_function_def(ctx, ex, docs) @ast ctx ex [K"scope_block"(scope_type=:hard) [K"block" - func_var_assignment + [K"=" func_var function_obj] typevar_stmts... # metadata contains svec(types, sparms, location) method_metadata := [K"call"(callex) diff --git a/JuliaLowering/test/functions_ir.jl b/JuliaLowering/test/functions_ir.jl index 24e4f815df693..59a56eb410dc3 100644 --- a/JuliaLowering/test/functions_ir.jl +++ b/JuliaLowering/test/functions_ir.jl @@ -206,6 +206,38 @@ end 12 (return %₁₁) 7 (return %₁) +######################################## +# Callable type +function (::T)(x) + x +end +#--------------------- +1 TestMod.T +2 TestMod.T +3 (call core.svec %₂ core.Any) +4 (call core.svec) +5 (call core.svec %₃ %₄ :($(QuoteNode(:(#= line 1 =#))))) +6 --- method core.nothing %₅ + 1 slot₂/x + 2 (return %₁) +7 (return %₁) + +######################################## +# Callable type with instance +function (y::T)(x) + (y, x) +end +#--------------------- +1 TestMod.T +2 TestMod.T +3 (call core.svec %₂ core.Any) +4 (call core.svec) +5 (call core.svec %₃ %₄ :($(QuoteNode(:(#= line 1 =#))))) +6 --- method core.nothing %₅ + 1 (call core.tuple slot₁/y slot₂/x) + 2 (return %₁) +7 (return %₁) + ######################################## # Function with module ref in name function A.f() From bea1526f5f5a6a8e78d76315e4ff5612b10278b0 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Mon, 14 Oct 2024 13:28:00 +1000 Subject: [PATCH 0888/1109] Default inner constructors --- JuliaLowering/src/ast.jl | 27 +++++- JuliaLowering/src/desugaring.jl | 135 +++++++++++++++++++++++++---- JuliaLowering/src/eval.jl | 1 + JuliaLowering/src/linear_ir.jl | 2 +- JuliaLowering/src/syntax_graph.jl | 23 +++++ JuliaLowering/test/demo.jl | 27 +++++- JuliaLowering/test/functions_ir.jl | 22 +++-- JuliaLowering/test/typedefs.jl | 66 ++++++++++++++ JuliaLowering/test/typedefs_ir.jl | 129 ++++++++++++++++++++++++--- 9 files changed, 385 insertions(+), 47 deletions(-) diff --git a/JuliaLowering/src/ast.jl b/JuliaLowering/src/ast.jl index 6fe3cd6e1a783..7d9c9a89338dd 100644 --- a/JuliaLowering/src/ast.jl +++ b/JuliaLowering/src/ast.jl @@ -244,9 +244,11 @@ function add_lambda_local!(ctx::AbstractLoweringContext, id) # empty - early passes don't need to record lambda locals end -# Create a new local mutable variable -function new_mutable_var(ctx::AbstractLoweringContext, srcref, name; is_always_defined=false) - id = new_binding(ctx.bindings, BindingInfo(name, :local; is_internal=true, +# Create a new local mutable variable or lambda argument +# (TODO: rename this?) +function new_mutable_var(ctx::AbstractLoweringContext, srcref, name; is_always_defined=false, kind=:local) + @assert kind == :local || kind == :argument + id = new_binding(ctx.bindings, BindingInfo(name, kind; is_internal=true, is_always_defined=is_always_defined)) nameref = makeleaf(ctx, srcref, K"Identifier", name_val=name) var = makeleaf(ctx, nameref, K"BindingId", var_id=id) @@ -584,3 +586,22 @@ function to_symbol(ctx, ex) @ast ctx ex ex=>K"Symbol" end +function new_scope_layer(ctx) + new_layer = ScopeLayer(length(ctx.scope_layers)+1, ctx.mod, true) + push!(ctx.scope_layers, new_layer) + new_layer.id +end + +# Create new local variable names with the same names as `names`, but with a +# new scope_layer so that they become independent variables during scope +# resolution. +function similar_identifiers(ctx, names) + scope_layer = new_scope_layer(ctx) + new_names = SyntaxList(ctx) + for name in names + @assert kind(name) == K"Identifier" + push!(new_names, makeleaf(ctx, name, name, kind=K"Identifier", scope_layer=scope_layer)) + end + new_names +end + diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index f932d48181d03..731d4e7d25e48 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -1330,18 +1330,19 @@ function expand_function_def(ctx, ex, docs) end function_name = nothing - func_var = ssavar(ctx, name, "func_var") + func_self = ssavar(ctx, name, "func_self") if kind(name) == K"::" if numchildren(name) == 1 farg_name = @ast ctx name "#self#"::K"Placeholder" - farg_type = name[1] + farg_type_ = name[1] else @chk numchildren(name) == 2 farg_name = name[1] - farg_type = name[2] + farg_type_ = name[2] end function_name = nothing_(ctx, name) - function_obj = farg_type + function_obj = farg_type_ + farg_type = func_self else if !is_valid_name(name) throw(LoweringError(name, "Invalid function name")) @@ -1356,7 +1357,7 @@ function expand_function_def(ctx, ex, docs) farg_name = @ast ctx callex "#self#"::K"Placeholder" farg_type = @ast ctx callex [K"call" "Typeof"::K"core" - func_var + func_self ] end pushfirst!(arg_names, farg_name) @@ -1375,7 +1376,7 @@ function expand_function_def(ctx, ex, docs) @ast ctx ex [K"scope_block"(scope_type=:hard) [K"block" - [K"=" func_var function_obj] + [K"=" func_self function_obj] typevar_stmts... # metadata contains svec(types, sparms, location) method_metadata := [K"call"(callex) @@ -1403,12 +1404,12 @@ function expand_function_def(ctx, ex, docs) if !isnothing(docs) [K"call"(docs) bind_docs!::K"Value" - func_var + func_self docs[1] method_metadata ] end - [K"unnecessary" func_var] + [K"unnecessary" func_self] ] ] elseif kind(name) == K"tuple" @@ -1661,6 +1662,110 @@ function _collect_struct_fields(ctx, field_names, field_types, field_attrs, fiel end end +# generate call to `convert()` for `(call new ...)` expressions +function _new_call_convert_arg(ctx, full_struct_type, field_type, field_index, val) + if kind(field_type) == K"core" && field_type.name_val == "Any" + return val + end + # kt = kind(field_type) + # FIXME: Allow kt == K"Identifier" && kt in static_params to avoid fieldtype call + @ast ctx field_type [K"block" + tmp_type := [K"call" + "fieldtype"::K"core" + full_struct_type + field_index::K"Integer" + ] + convert_for_type_decl(ctx, field_type, val, tmp_type, false) + ] +end + +function default_inner_constructors(ctx, srcref, outer_struct_var, + typevar_names, field_names, field_types) + # TODO: Consider using srcref = @HERE ? + exact_ctor = if isempty(typevar_names) + # Definition with exact types for all arguments + field_decls = SyntaxList(ctx) + @ast ctx srcref [K"function" + [K"call" + [K"::" [K"curly" "Type"::K"core" outer_struct_var]] + [[K"::" n t] for (n,t) in zip(field_names, field_types)]... + ] + [K"new" + outer_struct_var + field_names... + ] + ] + end + maybe_non_Any_field_types = filter(field_types) do ft + !(kind(ft) == K"core" && ft.name_val == "Any") + end + converting_ctor = if !isempty(typevar_names) || !isempty(maybe_non_Any_field_types) + # Definition which takes `Any` for all arguments and uses + # `Base.convert()` to convert those to the exact field type. Only + # defined if at least one field type is not Any. + ctor_self = new_mutable_var(ctx, srcref, "#ctor-self#"; kind=:argument) + @ast ctx srcref [K"function" + [K"call" + [K"::" + ctor_self + if isempty(typevar_names) + [K"curly" "Type"::K"core" outer_struct_var] + else + # `Type{S{X,Y}} where {X, Y}` but with X and Y already allocated `TypeVar`s + body = [K"curly" + "Type"::K"core" + [K"curly" + outer_struct_var + typevar_names... + ] + ] + for v in reverse(typevar_names) + body = [K"call" "UnionAll"::K"core" v body] + end + body + end + ] + field_names... + ] + [K"block" + [K"new" + ctor_self + [_new_call_convert_arg(ctx, ctor_self, type, i, name) + for (i, (name,type)) in enumerate(zip(field_names, field_types))]... + ] + ] + ] + end + if isnothing(exact_ctor) + converting_ctor + else + if isnothing(converting_ctor) + exact_ctor + else + @ast ctx srcref [K"block" + [K"if" + # Only define converting_ctor if at least one field type is not Any. + mapfoldl(t -> [K"call" "==="::K"core" "Any"::K"core" t], + (t,u) -> [K"&&" u t], + maybe_non_Any_field_types) + [K"block"] + converting_ctor + ] + exact_ctor + ] + end + end +end + +function _new_call(ctx, ex, typevar_names, field_names, field_types) + if has_keywords(ex) + throw(LoweringError("")) + end +end + +function _rewrite_constructor_new_calls(ctx, ex, typevar_names, field_names, field_types) +end + function _constructor_min_initalized(ex::SyntaxTree) if kind(ex) == K"call" && ((kind(ex[1]) == K"Identifier" && ex[1].name_val == "new") || (kind(ex[1]) == K"curly" && kind(ex[1][1]) == K"Identifier" && ex[1][1].name_val == "new")) @@ -1705,6 +1810,9 @@ function expand_struct_def(ctx, ex, docs) end if isempty(defs) end + + default_constructor_args = similar_identifiers(ctx, field_names) + @ast ctx ex [K"block" [K"global" struct_name] [K"const" struct_name] @@ -1758,15 +1866,12 @@ function expand_struct_def(ctx, ex, docs) struct_name [K"call" "svec"::K"core" field_types...] ] + # Inner constructors + default_inner_constructors(ctx, ex, outer_struct_var, + typevar_names, field_names, field_types) ] ] - # Inner constructors - # TODO - # [K"scope_block"(scope_type=:hard) - # [K"block" - # [K"global" struct_name] - # ] - # ] + if !isnothing(docs) || !isempty(field_docs) [K"call"(isnothing(docs) ? ex : docs) bind_docs!::K"Value" diff --git a/JuliaLowering/src/eval.jl b/JuliaLowering/src/eval.jl index 0111d3e1db384..ce05037f89387 100644 --- a/JuliaLowering/src/eval.jl +++ b/JuliaLowering/src/eval.jl @@ -246,6 +246,7 @@ function to_lowered_expr(mod, ex) # enter leave pop_exception inbounds boundscheck loopinfo copyast meta # foreigncall new_opaque_closure lambda head = k == K"call" ? :call : + k == K"new" ? :new : k == K"=" ? :(=) : k == K"global" ? :global : k == K"const" ? :const : diff --git a/JuliaLowering/src/linear_ir.jl b/JuliaLowering/src/linear_ir.jl index 3595f876193b3..1c34b6b46f668 100644 --- a/JuliaLowering/src/linear_ir.jl +++ b/JuliaLowering/src/linear_ir.jl @@ -584,7 +584,7 @@ function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) end nothing end - elseif k == K"call" + elseif k == K"call" || k == K"new" # TODO k ∈ splatnew foreigncall cfunction new_opaque_closure cglobal args = compile_args(ctx, children(ex)) callex = makenode(ctx, ex, k, args) diff --git a/JuliaLowering/src/syntax_graph.jl b/JuliaLowering/src/syntax_graph.jl index 10786ad8fae77..ba8c6e9aa54d1 100644 --- a/JuliaLowering/src/syntax_graph.jl +++ b/JuliaLowering/src/syntax_graph.jl @@ -613,3 +613,26 @@ end function Base.copy(v::SyntaxList) SyntaxList(v.graph, copy(v.ids)) end + +function Base.filter(f, exs::SyntaxList) + out = SyntaxList(syntax_graph(exs)) + for ex in exs + if f(ex) + push!(out, ex) + end + end + out +end + +# Would like the following to be an overload of Base.map() ... but need +# somewhat arcane trickery to ensure that this only tries to collect into a +# SyntaxList when `f` yields a SyntaxTree. +# +# function mapsyntax(f, exs::SyntaxList) +# out = SyntaxList(syntax_graph(exs)) +# for ex in exs +# push!(out, f(ex)) +# end +# out +# end + diff --git a/JuliaLowering/test/demo.jl b/JuliaLowering/test/demo.jl index 9c3a496898670..d09d9cbccfa19 100644 --- a/JuliaLowering/test/demo.jl +++ b/JuliaLowering/test/demo.jl @@ -532,6 +532,27 @@ function f() end """ +src = """ +function f(::T, ::U, ::S) where T where {U,S} + println(T) + println(U) + println(S) +end +""" + +src = """ +function (x::XXX)(y) + println("hi", " ", x, " ", y) +end +""" + +src = """ +struct X + x + y::String +end +""" + src = """ struct X{U,V} x::U @@ -540,10 +561,7 @@ end """ src = """ -function f(::T, ::U, ::S) where T where {U,S} - println(T) - println(U) - println(S) +struct X end """ @@ -554,6 +572,7 @@ ex = ensure_attributes(ex, var_id=Int) module MMM end in_mod = MMM +# in_mod=Main ctx1, ex_macroexpand = JuliaLowering.expand_forms_1(in_mod, ex) @info "Macro expanded" ex_macroexpand formatsrc(ex_macroexpand, color_by=:scope_layer) #@info "Macro expanded" formatsrc(ex_macroexpand, color_by=e->JuliaLowering.flattened_provenance(e)[1:end-1]) diff --git a/JuliaLowering/test/functions_ir.jl b/JuliaLowering/test/functions_ir.jl index 59a56eb410dc3..ee6785e390a4d 100644 --- a/JuliaLowering/test/functions_ir.jl +++ b/JuliaLowering/test/functions_ir.jl @@ -213,14 +213,13 @@ function (::T)(x) end #--------------------- 1 TestMod.T -2 TestMod.T -3 (call core.svec %₂ core.Any) -4 (call core.svec) -5 (call core.svec %₃ %₄ :($(QuoteNode(:(#= line 1 =#))))) -6 --- method core.nothing %₅ +2 (call core.svec %₁ core.Any) +3 (call core.svec) +4 (call core.svec %₂ %₃ :($(QuoteNode(:(#= line 1 =#))))) +5 --- method core.nothing %₄ 1 slot₂/x 2 (return %₁) -7 (return %₁) +6 (return %₁) ######################################## # Callable type with instance @@ -229,14 +228,13 @@ function (y::T)(x) end #--------------------- 1 TestMod.T -2 TestMod.T -3 (call core.svec %₂ core.Any) -4 (call core.svec) -5 (call core.svec %₃ %₄ :($(QuoteNode(:(#= line 1 =#))))) -6 --- method core.nothing %₅ +2 (call core.svec %₁ core.Any) +3 (call core.svec) +4 (call core.svec %₂ %₃ :($(QuoteNode(:(#= line 1 =#))))) +5 --- method core.nothing %₄ 1 (call core.tuple slot₁/y slot₂/x) 2 (return %₁) -7 (return %₁) +6 (return %₁) ######################################## # Function with module ref in name diff --git a/JuliaLowering/test/typedefs.jl b/JuliaLowering/test/typedefs.jl index 865d26e78a141..d0bbf3841d1ea 100644 --- a/JuliaLowering/test/typedefs.jl +++ b/JuliaLowering/test/typedefs.jl @@ -50,4 +50,70 @@ end @test fieldtypes(test_mod.S1{Int,String}) == (Int, String, Any) @test supertype(test_mod.S1) == test_mod.A +# Inner constructors: one field non-Any +@test JuliaLowering.include_string(test_mod, """ +struct S2 + x::Int + y +end +""") === nothing +@test length(methods(test_mod.S2)) == 2 +let s = test_mod.S2(42, "hi") + # exact types + @test s.x === 42 + @test s.y == "hi" +end +let s = test_mod.S2(42.0, "hi") + # converted types + @test s.x === 42 + @test s.y == "hi" +end + +# Constructors: All fields Any +@test JuliaLowering.include_string(test_mod, """ +struct S3 + x + y +end +""") === nothing +@test length(methods(test_mod.S3)) == 1 +let s = test_mod.S3(42, "hi") + @test s.x === 42 + @test s.y == "hi" +end + +# Inner constructors: All fields Any; dynamically tested against whatever +# S4_Field resolves to +@test JuliaLowering.include_string(test_mod, """ +S4_Field = Any # actually Any! + +struct S4 + x::S4_Field + y +end +""") === nothing +@test length(methods(test_mod.S4)) == 1 +let s = test_mod.S4(42, "hi") + @test s.x === 42 + @test s.y == "hi" +end + +# Inner constructors; parameterized types +@test JuliaLowering.include_string(test_mod, """ +struct S5{U} + x::U + y +end +""") === nothing +@test length(methods(test_mod.S5)) == 0 +@test length(methods(test_mod.S5{Int})) == 1 +let s = test_mod.S5{Int}(42.0, "hi") + @test s.x === 42 + @test s.y == "hi" +end +let s = test_mod.S5{Any}(42.0, "hi") + @test s.x === 42.0 + @test s.y == "hi" +end + end diff --git a/JuliaLowering/test/typedefs_ir.jl b/JuliaLowering/test/typedefs_ir.jl index d254af4de4ed0..454556e9a2da8 100644 --- a/JuliaLowering/test/typedefs_ir.jl +++ b/JuliaLowering/test/typedefs_ir.jl @@ -297,6 +297,46 @@ primitive type P P_nbits() end 16 (= TestMod.P %₄) 17 (return core.nothing) +######################################## +# Empty struct +struct X +end +#--------------------- +1 (global TestMod.X) +2 (const TestMod.X) +3 (call core.svec) +4 (call core.svec) +5 (call core.svec) +6 (call core._structtype TestMod :X %₃ %₄ %₅ false 0) +7 (= slot₁/X %₆) +8 (call core._setsuper! %₆ core.Any) +9 (isdefined TestMod.X) +10 (gotoifnot %₉ label₂₀) +11 TestMod.X +12 (call core._equiv_typedef %₁₁ %₆) +13 (gotoifnot %₁₂ label₁₇) +14 TestMod.X +15 (= slot₁/X %₁₄) +16 (goto label₁₉) +17 slot₁/X +18 (= TestMod.X %₁₇) +19 (goto label₂₂) +20 slot₁/X +21 (= TestMod.X %₂₀) +22 slot₁/X +23 (call core.svec) +24 (call core._typebody! %₂₂ %₂₃) +25 TestMod.X +26 (call core.apply_type core.Type %₂₅) +27 (call core.svec %₂₆) +28 (call core.svec) +29 (call core.svec %₂₇ %₂₈ :($(QuoteNode(:(#= line 1 =#))))) +30 --- method core.nothing %₂₉ + 1 TestMod.X + 2 (new %₁) + 3 (return %₂) +31 (return core.nothing) + ######################################## # Basic struct struct X @@ -330,7 +370,39 @@ end 23 TestMod.T 24 (call core.svec core.Any %₂₃ core.Any) 25 (call core._typebody! %₂₂ %₂₄) -26 (return core.nothing) +26 TestMod.T +27 (call core.=== core.Any %₂₆) +28 (gotoifnot %₂₇ label₃₀) +29 (goto label₃₆) +30 TestMod.X +31 (call core.apply_type core.Type %₃₀) +32 (call core.svec %₃₁ core.Any core.Any core.Any) +33 (call core.svec) +34 (call core.svec %₃₂ %₃₃ :($(QuoteNode(:(#= line 1 =#))))) +35 --- method core.nothing %₃₄ + 1 (call core.fieldtype slot₁/#ctor-self# 2) + 2 slot₃/b + 3 (= slot₅/tmp %₂) + 4 slot₅/tmp + 5 (call core.isa %₄ %₁) + 6 (gotoifnot %₅ label₈) + 7 (goto label₁₀) + 8 slot₅/tmp + 9 (= slot₅/tmp (call top.convert %₁ %₈)) + 10 slot₅/tmp + 11 (new slot₁/#ctor-self# slot₂/a %₁₀ slot₄/c) + 12 (return %₁₁) +36 TestMod.X +37 (call core.apply_type core.Type %₃₆) +38 TestMod.T +39 (call core.svec %₃₇ core.Any %₃₈ core.Any) +40 (call core.svec) +41 (call core.svec %₃₉ %₄₀ :($(QuoteNode(:(#= line 1 =#))))) +42 --- method core.nothing %₄₁ + 1 TestMod.X + 2 (new %₁ slot₂/a slot₃/b slot₄/c) + 3 (return %₂) +43 (return core.nothing) ######################################## # Struct with supertype and type params @@ -378,7 +450,22 @@ end 39 slot₄/X 40 (call core.svec) 41 (call core._typebody! %₃₉ %₄₀) -42 (return core.nothing) +42 slot₂/U +43 slot₃/V +44 TestMod.X +45 slot₂/U +46 slot₃/V +47 (call core.apply_type %₄₄ %₄₅ %₄₆) +48 (call core.apply_type core.Type %₄₇) +49 (call core.UnionAll %₄₃ %₄₈) +50 (call core.UnionAll %₄₂ %₄₉) +51 (call core.svec %₅₀) +52 (call core.svec) +53 (call core.svec %₅₁ %₅₂ :($(QuoteNode(:(#= line 1 =#))))) +54 --- method core.nothing %₅₃ + 1 (new slot₁/#ctor-self#) + 2 (return %₁) +55 (return core.nothing) ######################################## # Struct with const and atomic fields @@ -412,7 +499,16 @@ end 22 slot₁/X 23 (call core.svec core.Any core.Any core.Any) 24 (call core._typebody! %₂₂ %₂₃) -25 (return core.nothing) +25 TestMod.X +26 (call core.apply_type core.Type %₂₅) +27 (call core.svec %₂₆ core.Any core.Any core.Any) +28 (call core.svec) +29 (call core.svec %₂₇ %₂₈ :($(QuoteNode(:(#= line 1 =#))))) +30 --- method core.nothing %₂₉ + 1 TestMod.X + 2 (new %₁ slot₂/a slot₃/b slot₄/c) + 3 (return %₂) +31 (return core.nothing) ######################################## # Documented struct @@ -450,15 +546,24 @@ end 22 slot₁/X 23 (call core.svec core.Any core.Any) 24 (call core._typebody! %₂₂ %₂₃) -25 JuliaLowering.bind_docs! -26 (call core.tuple :field_docs) -27 (call core.apply_type core.NamedTuple %₂₆) -28 (call core.svec 1 "field a docs" 2 "field b docs") -29 (call core.tuple %₂₈) -30 (call %₂₇ %₂₉) -31 TestMod.X -32 (call core.kwcall %₃₀ %₂₅ %₃₁ "X docs\n" :($(QuoteNode(:(#= line 4 =#))))) -33 (return core.nothing) +25 TestMod.X +26 (call core.apply_type core.Type %₂₅) +27 (call core.svec %₂₆ core.Any core.Any) +28 (call core.svec) +29 (call core.svec %₂₇ %₂₈ :($(QuoteNode(:(#= line 4 =#))))) +30 --- method core.nothing %₂₉ + 1 TestMod.X + 2 (new %₁ slot₂/a slot₃/b) + 3 (return %₂) +31 JuliaLowering.bind_docs! +32 (call core.tuple :field_docs) +33 (call core.apply_type core.NamedTuple %₃₂) +34 (call core.svec 1 "field a docs" 2 "field b docs") +35 (call core.tuple %₃₄) +36 (call %₃₃ %₃₅) +37 TestMod.X +38 (call core.kwcall %₃₆ %₃₁ %₃₇ "X docs\n" :($(QuoteNode(:(#= line 4 =#))))) +39 (return core.nothing) ######################################## # Error: Struct not at top level From ab916a0d2fdb457f40afa476c71fe50682bd1303 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Tue, 15 Oct 2024 18:51:49 +1000 Subject: [PATCH 0889/1109] Remove unused first argument to Expr(:method) Perhaps this was used historically but it's now only used for method tables in method overlays. --- JuliaLowering/src/desugaring.jl | 25 +++++++++++++------------ JuliaLowering/test/assignments_ir.jl | 2 +- JuliaLowering/test/functions_ir.jl | 18 +++++++++--------- JuliaLowering/test/macros_ir.jl | 4 ++-- JuliaLowering/test/scopes_ir.jl | 4 ++-- 5 files changed, 27 insertions(+), 26 deletions(-) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 731d4e7d25e48..b5e4430bb74f5 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -1329,30 +1329,29 @@ function expand_function_def(ctx, ex, docs) push!(arg_types, atype) end - function_name = nothing func_self = ssavar(ctx, name, "func_self") if kind(name) == K"::" if numchildren(name) == 1 + # function (::T)() ... farg_name = @ast ctx name "#self#"::K"Placeholder" farg_type_ = name[1] else + # function (f::T)() ... @chk numchildren(name) == 2 farg_name = name[1] farg_type_ = name[2] end - function_name = nothing_(ctx, name) - function_obj = farg_type_ + func_self_val = farg_type_ # Here we treat the type itself as the function farg_type = func_self else if !is_valid_name(name) throw(LoweringError(name, "Invalid function name")) - end - if is_identifier_like(name) - function_name = @ast ctx name name=>K"Symbol" - function_obj = @ast ctx name [K"method" function_name] + elseif is_identifier_like(name) + # function f() ... + func_self_val = @ast ctx name [K"method" name=>K"Symbol"] else - function_name = nothing_(ctx, name) - function_obj = name + # function A.B.f() ... + func_self_val = name end farg_name = @ast ctx callex "#self#"::K"Placeholder" farg_type = @ast ctx callex [K"call" @@ -1374,9 +1373,11 @@ function expand_function_def(ctx, ex, docs) ret_var = nothing end + method_table = nothing_(ctx, name) # TODO: method overlays + @ast ctx ex [K"scope_block"(scope_type=:hard) [K"block" - [K"=" func_self function_obj] + [K"=" func_self func_self_val] typevar_stmts... # metadata contains svec(types, sparms, location) method_metadata := [K"call"(callex) @@ -1392,7 +1393,7 @@ function expand_function_def(ctx, ex, docs) QuoteNode(source_location(LineNumberNode, callex))::K"Value" ] [K"method" - function_name + method_table method_metadata [K"lambda"(body, is_toplevel_thunk=false) [K"block" arg_names...] @@ -1668,7 +1669,7 @@ function _new_call_convert_arg(ctx, full_struct_type, field_type, field_index, v return val end # kt = kind(field_type) - # FIXME: Allow kt == K"Identifier" && kt in static_params to avoid fieldtype call + # TODO: Allow kt == K"Identifier" && kt in static_params to avoid fieldtype call? @ast ctx field_type [K"block" tmp_type := [K"call" "fieldtype"::K"core" diff --git a/JuliaLowering/test/assignments_ir.jl b/JuliaLowering/test/assignments_ir.jl index 76f23e7bba97c..efd7c360d365a 100644 --- a/JuliaLowering/test/assignments_ir.jl +++ b/JuliaLowering/test/assignments_ir.jl @@ -34,7 +34,7 @@ end 3 (call core.svec %₂) 4 (call core.svec) 5 (call core.svec %₃ %₄ :($(QuoteNode(:(#= line 2 =#))))) -6 --- method :b %₅ +6 --- method core.nothing %₅ 1 TestMod.d 2 (= slot₂/c %₁) 3 (return %₁) diff --git a/JuliaLowering/test/functions_ir.jl b/JuliaLowering/test/functions_ir.jl index ee6785e390a4d..6bef6e2f98b88 100644 --- a/JuliaLowering/test/functions_ir.jl +++ b/JuliaLowering/test/functions_ir.jl @@ -9,7 +9,7 @@ end 3 (call core.svec %₂ core.Any core.Any core.Any) 4 (call core.svec) 5 (call core.svec %₃ %₄ :($(QuoteNode(:(#= line 1 =#))))) -6 --- method :f %₅ +6 --- method core.nothing %₅ 1 TestMod.+ 2 (call %₁ slot₂/x slot₄/y) 3 (return %₂) @@ -27,7 +27,7 @@ end 4 (call core.svec %₂ %₃ core.Any) 5 (call core.svec) 6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 1 =#))))) -7 --- method :f %₆ +7 --- method core.nothing %₆ 1 slot₃/x 2 (return %₁) 8 (return %₁) @@ -44,7 +44,7 @@ end 4 (call core.svec %₂ core.Any %₃) 5 (call core.svec) 6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 1 =#))))) -7 --- method :f %₆ +7 --- method core.nothing %₆ 1 TestMod.body 2 (return %₁) 8 (return %₁) @@ -61,7 +61,7 @@ end 4 (call core.svec %₂ core.Any %₃) 5 (call core.svec) 6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 1 =#))))) -7 --- method :f %₆ +7 --- method core.nothing %₆ 1 TestMod.body 2 (return %₁) 8 (return %₁) @@ -79,7 +79,7 @@ end 5 (call core.svec %₂ core.Any %₄) 6 (call core.svec) 7 (call core.svec %₅ %₆ :($(QuoteNode(:(#= line 1 =#))))) -8 --- method :f %₇ +8 --- method core.nothing %₇ 1 TestMod.body 2 (return %₁) 9 (return %₁) @@ -116,7 +116,7 @@ end 12 slot₁/T 13 (call core.svec %₁₀ %₁₁ %₁₂) 14 (call core.svec %₉ %₁₃ :($(QuoteNode(:(#= line 1 =#))))) -15 --- method :f %₁₄ +15 --- method core.nothing %₁₄ 1 static_parameter₃ 2 static_parameter₁ 3 static_parameter₂ @@ -142,7 +142,7 @@ end 10 slot₁/T 11 (call core.svec %₁₀) 12 (call core.svec %₉ %₁₁ :($(QuoteNode(:(#= line 1 =#))))) -13 --- method :f %₁₂ +13 --- method core.nothing %₁₂ 1 static_parameter₁ 2 (return %₁) 14 (return %₁) @@ -191,7 +191,7 @@ end 3 (call core.svec %₂ core.Any) 4 (call core.svec) 5 (call core.svec %₃ %₄ :($(QuoteNode(:(#= line 1 =#))))) -6 --- method :f %₅ +6 --- method core.nothing %₅ 1 TestMod.Int 2 (gotoifnot slot₂/x label₃) 3 (= slot₃/tmp 0xff) @@ -342,7 +342,7 @@ end 3 (call core.svec %₂) 4 (call core.svec) 5 (call core.svec %₃ %₄ :($(QuoteNode(:(#= line 4 =#))))) -6 --- method :f %₅ +6 --- method core.nothing %₅ 1 (return core.nothing) 7 (call JuliaLowering.bind_docs! %₁ "some docs\n" %₅) 8 (return %₁) diff --git a/JuliaLowering/test/macros_ir.jl b/JuliaLowering/test/macros_ir.jl index 56e4efc749494..0a7cb046e9d68 100644 --- a/JuliaLowering/test/macros_ir.jl +++ b/JuliaLowering/test/macros_ir.jl @@ -11,7 +11,7 @@ end 3 (call core.svec %₂ JuliaLowering.MacroContext core.Any) 4 (call core.svec) 5 (call core.svec %₃ %₄ :($(QuoteNode(:(#= line 1 =#))))) -6 --- method :@add_one %₅ +6 --- method core.nothing %₅ 1 (call core.tuple slot₃/ex) 2 (call JuliaLowering.interpolate_ast (inert (block (call-i ($ ex) + 1))) %₁) 3 (return %₂) @@ -28,7 +28,7 @@ end 3 (call core.svec %₂ JuliaLowering.MacroContext core.Any) 4 (call core.svec) 5 (call core.svec %₃ %₄ :($(QuoteNode(:(#= line 1 =#))))) -6 --- method :@foo %₅ +6 --- method core.nothing %₅ 1 slot₂/__context__ 2 (= slot₄/ctx %₁) 3 (return %₁) diff --git a/JuliaLowering/test/scopes_ir.jl b/JuliaLowering/test/scopes_ir.jl index b2c5abaf51a8f..9d0ac4688ba80 100644 --- a/JuliaLowering/test/scopes_ir.jl +++ b/JuliaLowering/test/scopes_ir.jl @@ -40,7 +40,7 @@ end 5 (call core.svec %₄ core.Any) 6 (call core.svec) 7 (call core.svec %₅ %₆ :($(QuoteNode(:(#= line 2 =#))))) -8 --- method :f %₇ +8 --- method core.nothing %₇ 1 (call core.tuple false true true) 2 (return %₁) 9 (return %₃) @@ -83,7 +83,7 @@ end 3 (call core.svec %₂ core.Any) 4 (call core.svec) 5 (call core.svec %₃ %₄ :($(QuoteNode(:(#= line 1 =#))))) -6 --- method :f %₅ +6 --- method core.nothing %₅ 1 (call core.apply_type top.Dict core.Symbol core.Any) 2 (call %₁) 3 (isdefined slot₂/z) From 998838340b68b2db2aadd0dc8395c2fe92f6944a Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Thu, 17 Oct 2024 11:24:07 +1000 Subject: [PATCH 0890/1109] Default outer constructors --- JuliaLowering/src/desugaring.jl | 178 +++++++++++++++++++++++------- JuliaLowering/src/kinds.jl | 1 + JuliaLowering/test/typedefs.jl | 45 +++++++- JuliaLowering/test/typedefs_ir.jl | 170 ++++++++++++++++++++++++++++ JuliaLowering/test/utils.jl | 33 ++++-- 5 files changed, 374 insertions(+), 53 deletions(-) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index b5e4430bb74f5..932d8a743b9dc 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -1253,13 +1253,18 @@ end function _split_wheres!(ctx, typevar_names, typevar_stmts, ex) if kind(ex) == K"where" && numchildren(ex) == 2 - params = kind(ex[2]) == K"braces" ? ex[2][1:end] : ex[2:2] - for param in params - bounds = analyze_typevar(ctx, param) - n = bounds[1] - push!(typevar_names, n) - push!(typevar_stmts, @ast ctx param [K"local" n]) - push!(typevar_stmts, @ast ctx param [K"=" n bounds_to_TypeVar(ctx, param, bounds)]) + vars_kind = kind(ex[2]) + if vars_kind == K"_typevars" + append!(typevar_names, children(ex[2])) + else + params = vars_kind == K"braces" ? ex[2][1:end] : ex[2:2] + for param in params + bounds = analyze_typevar(ctx, param) + n = bounds[1] + push!(typevar_names, n) + push!(typevar_stmts, @ast ctx param [K"local" n]) + push!(typevar_stmts, @ast ctx param [K"=" n bounds_to_TypeVar(ctx, param, bounds)]) + end end _split_wheres!(ctx, typevar_names, typevar_stmts, ex[1]) else @@ -1374,7 +1379,6 @@ function expand_function_def(ctx, ex, docs) end method_table = nothing_(ctx, name) # TODO: method overlays - @ast ctx ex [K"scope_block"(scope_type=:hard) [K"block" [K"=" func_self func_self_val] @@ -1508,48 +1512,53 @@ function bounds_to_TypeVar(ctx, srcref, bounds) ] end -# Analyze type signatures such as `A <: B where C` +# Analyze type signatures such as `A{C} <: B where C` # # Return (name, typevar_names, typevar_stmts, supertype) where # - `name` is the name of the type -# - `typevar_names` are the names of the types's type parameters -# - `typevar_stmts` are a list of statements to define a `TypeVar` for each -# name in `typevar_names`, to be emitted prior to uses of `typevar_names` # - `supertype` is the super type of the type function analyze_type_sig(ctx, ex) k = kind(ex) if k == K"Identifier" name = ex - params = () + type_params = () supertype = @ast ctx ex "Any"::K"core" elseif k == K"curly" && numchildren(ex) >= 1 && kind(ex[1]) == K"Identifier" - # name{params} + # name{type_params} name = ex[1] - params = ex[2:end] + type_params = ex[2:end] supertype = @ast ctx ex "Any"::K"core" elseif k == K"<:" && numchildren(ex) == 2 if kind(ex[1]) == K"Identifier" name = ex[1] - params = () + type_params = () supertype = ex[2] elseif kind(ex[1]) == K"curly" && numchildren(ex[1]) >= 1 && kind(ex[1][1]) == K"Identifier" name = ex[1][1] - params = ex[1][2:end] + type_params = ex[1][2:end] supertype = ex[2] end end @isdefined(name) || throw(LoweringError(ex, "invalid type signature")) + return (name, type_params, supertype) +end + +# Expand type_params into (typevar_names, typevar_stmts) where +# - `typevar_names` are the names of the types's type parameters +# - `typevar_stmts` are a list of statements to define a `TypeVar` for each parameter +# name in `typevar_names`, to be emitted prior to uses of `typevar_names` +function expand_typevars(ctx, type_params) typevar_names = SyntaxList(ctx) typevar_stmts = SyntaxList(ctx) - for param in params + for param in type_params bounds = analyze_typevar(ctx, param) n = bounds[1] push!(typevar_names, n) push!(typevar_stmts, @ast ctx param [K"local" n]) push!(typevar_stmts, @ast ctx param [K"=" n bounds_to_TypeVar(ctx, param, bounds)]) end - return (name, typevar_names, typevar_stmts, supertype) + return (typevar_names, typevar_stmts) end function expand_abstract_or_primitive_type(ctx, ex) @@ -1561,7 +1570,8 @@ function expand_abstract_or_primitive_type(ctx, ex) @chk numchildren(ex) == 2 nbits = ex[2] end - name, typevar_names, typevar_stmts, supertype = analyze_type_sig(ctx, ex[1]) + name, type_params, supertype = analyze_type_sig(ctx, ex[1]) + typevar_names, typevar_stmts = expand_typevars(ctx, type_params) newtype_var = ssavar(ctx, ex, "new_type") @ast ctx ex [K"block" [K"scope_block"(scope_type=:hard) @@ -1629,11 +1639,11 @@ function _match_struct_field(x0) end end -function _collect_struct_fields(ctx, field_names, field_types, field_attrs, field_docs, defs, exs) +function _collect_struct_fields(ctx, field_names, field_types, field_attrs, field_docs, constructors, exs) for e in exs if kind(e) == K"block" _collect_struct_fields(ctx, field_names, field_types, field_attrs, field_docs, - defs, children(e)) + constructors, children(e)) elseif kind(e) == K"=" throw(LoweringError(e, "assignment syntax in structure fields is reserved")) else @@ -1657,7 +1667,7 @@ function _collect_struct_fields(ctx, field_names, field_types, field_attrs, fiel end else # Inner constructors - push!(defs, e) + push!(constructors, e) end end end @@ -1712,18 +1722,16 @@ function default_inner_constructors(ctx, srcref, outer_struct_var, if isempty(typevar_names) [K"curly" "Type"::K"core" outer_struct_var] else - # `Type{S{X,Y}} where {X, Y}` but with X and Y already allocated `TypeVar`s - body = [K"curly" - "Type"::K"core" + [K"where" [K"curly" - outer_struct_var - typevar_names... + "Type"::K"core" + [K"curly" + outer_struct_var + typevar_names... + ] ] + [K"_typevars" typevar_names...] ] - for v in reverse(typevar_names) - body = [K"call" "UnionAll"::K"core" v body] - end - body end ] field_names... @@ -1758,6 +1766,36 @@ function default_inner_constructors(ctx, srcref, outer_struct_var, end end +# Generate outer constructor for structs with type parameters. Eg, for +# struct X{U,V} +# x::U +# y::V +# end +# +# We basically generate +# function (::Type{X})(x::U, y::V) where {U,V} +# new(X{U,V}, x, y) +# end +# +function default_outer_constructor(ctx, srcref, outer_struct_var, + typevar_names, field_names, field_types) + @ast ctx srcref [K"function" + [K"where" + [K"call" + # We use `::Type{$outer_struct_var}` here rather than just + # `struct_name` because outer_struct_var is a binding to a + # type - we know we're not creating a new `Function` and + # there's no reason to emit the 1-arg `Expr(:method, name)` in + # the next phase of expansion. + [K"::" [K"curly" "Type"::K"core" outer_struct_var]] + [[K"::" n t] for (n,t) in zip(field_names, field_types)]... + ] + [K"_typevars" typevar_names...] + ] + [K"new" [K"curly" outer_struct_var typevar_names...] field_names...] + ] +end + function _new_call(ctx, ex, typevar_names, field_names, field_types) if has_keywords(ex) throw(LoweringError("")) @@ -1789,16 +1827,17 @@ function expand_struct_def(ctx, ex, docs) if kind(type_body) != K"block" throw(LoweringError(type_body, "expected block for `struct` fields")) end - struct_name, typevar_names, typevar_stmts, supertype = analyze_type_sig(ctx, type_sig) + struct_name, type_params, supertype = analyze_type_sig(ctx, type_sig) + typevar_names, typevar_stmts = expand_typevars(ctx, type_params) field_names = SyntaxList(ctx) field_types = SyntaxList(ctx) field_attrs = SyntaxList(ctx) field_docs = SyntaxList(ctx) - defs = SyntaxList(ctx) + constructors = SyntaxList(ctx) _collect_struct_fields(ctx, field_names, field_types, field_attrs, field_docs, - defs, children(type_body)) + constructors, children(type_body)) is_mutable = has_flags(ex, JuliaSyntax.MUTABLE_FLAG) - min_initialized = min(_constructor_min_initalized(defs), length(field_names)) + min_initialized = min(_constructor_min_initalized(constructors), length(field_names)) newtype_var = ssavar(ctx, ex, "struct_type") outer_struct_var = alias_binding(ctx, struct_name) if !isempty(typevar_names) @@ -1809,11 +1848,52 @@ function expand_struct_def(ctx, ex, docs) end prev_typevars = @ast ctx type_sig [K"." prev_typevars "parameters"::K"Symbol"] end - if isempty(defs) - end - default_constructor_args = similar_identifiers(ctx, field_names) + field_names_2 = similar_identifiers(ctx, field_names) + + need_outer_constructor = false + if isempty(constructors) && !isempty(typevar_names) + # To generate an outer constructor each struct type parameter must be + # able to be inferred from the list of fields passed as constuctor + # arguments. + # + # More precisely, it must occur in a field type, or in the bounds of a + # subsequent type parameter. For example the following won't work + # struct X{T} + # a::Int + # end + # X(a::Int) where T = #... construct X{T} ?? + # + # But the following does + # struct X{T} + # a::T + # end + # X(a::T) where {T} = # construct X{typeof(a)}(a) + need_outer_constructor = true + for i in 1:length(typevar_names) + typevar_name = typevar_names[i] + typevar_in_fields = any(contains_identifier(ft, typevar_name) for ft in field_types) + if !typevar_in_fields + typevar_in_bounds = any(type_params[i+1:end]) do param + # Check the bounds of subsequent type params + (_,lb,ub) = analyze_typevar(ctx, param) + # TODO: flisp lowering tests `lb` here so we also do. But + # in practice this doesn't seem to constrain `typevar_name` + # and the generated constructor doesn't work? + (!isnothing(ub) && contains_identifier(ub, typevar_name)) || + (!isnothing(lb) && contains_identifier(lb, typevar_name)) + end + if !typevar_in_bounds + need_outer_constructor = false + break + end + end + end + end + # The following lowering covers several subtle issues in the ordering of + # typevars when "redefining" structs. + # See https://github.com/JuliaLang/julia/pull/36121 @ast ctx ex [K"block" [K"global" struct_name] [K"const" struct_name] @@ -1868,8 +1948,16 @@ function expand_struct_def(ctx, ex, docs) [K"call" "svec"::K"core" field_types...] ] # Inner constructors - default_inner_constructors(ctx, ex, outer_struct_var, - typevar_names, field_names, field_types) + if isempty(constructors) + default_inner_constructors(ctx, ex, outer_struct_var, + typevar_names, field_names_2, field_types) + else + TODO(ex, "Convert new-calls to new-expressions in user-defined constructors") + end + if need_outer_constructor + default_outer_constructor(ctx, ex, outer_struct_var, + typevar_names, field_names_2, field_types) + end ] ] @@ -1902,10 +1990,18 @@ function expand_wheres(ctx, ex) body = ex[1] rhs = ex[2] if kind(rhs) == K"braces" + # S{X,Y} where {X,Y} for r in reverse(children(rhs)) body = expand_where(ctx, ex, body, r) end + elseif kind(rhs) == K"_typevars" + # Eg, `S{X,Y} where {X, Y}` but with X and Y + # already allocated `TypeVar`s + for r in reverse(children(rhs)) + body = @ast ctx ex [K"call" "UnionAll"::K"core" r body] + end else + # S{X} where X body = expand_where(ctx, ex, body, rhs) end body diff --git a/JuliaLowering/src/kinds.jl b/JuliaLowering/src/kinds.jl index a3618f2c1cbdd..e7da05bf04667 100644 --- a/JuliaLowering/src/kinds.jl +++ b/JuliaLowering/src/kinds.jl @@ -53,6 +53,7 @@ function _register_kinds() "local_def" # TODO: Replace with K"local" plus BindingFlags attribute? "_while" "_do_while" + "_typevars" # used for supplying already-allocated `TypeVar`s to `where` "with_static_parameters" "top" "core" diff --git a/JuliaLowering/test/typedefs.jl b/JuliaLowering/test/typedefs.jl index d0bbf3841d1ea..92a0202d01ee6 100644 --- a/JuliaLowering/test/typedefs.jl +++ b/JuliaLowering/test/typedefs.jl @@ -1,6 +1,6 @@ @testset "Type definitions" begin -test_mod = Module() +test_mod = Module(:TestMod) @test JuliaLowering.include_string(test_mod, """ abstract type A end @@ -98,22 +98,61 @@ let s = test_mod.S4(42, "hi") @test s.y == "hi" end -# Inner constructors; parameterized types +# Inner & outer constructors; parameterized types @test JuliaLowering.include_string(test_mod, """ struct S5{U} x::U y end """) === nothing -@test length(methods(test_mod.S5)) == 0 +@test length(methods(test_mod.S5)) == 1 +let s = test_mod.S5(42, "hi") + @test s isa test_mod.S5{Int} + @test s.x === 42 + @test s.y == "hi" +end @test length(methods(test_mod.S5{Int})) == 1 let s = test_mod.S5{Int}(42.0, "hi") + @test s isa test_mod.S5{Int} @test s.x === 42 @test s.y == "hi" end let s = test_mod.S5{Any}(42.0, "hi") + @test s isa test_mod.S5{Any} @test s.x === 42.0 @test s.y == "hi" end +# Test cases from +# https://github.com/JuliaLang/julia/issues/36104 +# https://github.com/JuliaLang/julia/pull/36121 +JuliaLowering.include_string(test_mod, """ +# issue #36104 +module M36104 +struct T36104 + v::Vector{M36104.T36104} +end +struct T36104 # check that redefining it works, issue #21816 + v::Vector{T36104} +end +end +""") +@test fieldtypes(test_mod.M36104.T36104) == (Vector{test_mod.M36104.T36104},) +@test_throws ErrorException("expected") JuliaLowering.include_string(test_mod, """struct X36104; x::error("expected"); end""") +@test isdefined(test_mod, :X36104) +JuliaLowering.include_string(test_mod, "struct X36104; x::Int; end") +@test fieldtypes(test_mod.X36104) == (Int,) +JuliaLowering.include_string(test_mod, "primitive type P36104 8 end") +@test_throws ErrorException("invalid redefinition of constant TestMod.P36104") #= + =# JuliaLowering.include_string(test_mod, "primitive type P36104 16 end") + +# Struct with outer constructor where one typevar is constrained by the other +# See https://github.com/JuliaLang/julia/issues/27269) +@test JuliaLowering.include_string(test_mod, """ +struct X27269{T, S <: Vector{T}} + v::Vector{S} +end +""") === nothing +@test test_mod.X27269([[1,2]]) isa test_mod.X27269{Int, Vector{Int}} + end diff --git a/JuliaLowering/test/typedefs_ir.jl b/JuliaLowering/test/typedefs_ir.jl index 454556e9a2da8..cdfbffe0e7472 100644 --- a/JuliaLowering/test/typedefs_ir.jl +++ b/JuliaLowering/test/typedefs_ir.jl @@ -565,6 +565,176 @@ end 38 (call core.kwcall %₃₆ %₃₁ %₃₇ "X docs\n" :($(QuoteNode(:(#= line 4 =#))))) 39 (return core.nothing) +######################################## +# Struct with outer constructor +struct X{U} + x::U +end +#--------------------- +1 (global TestMod.X) +2 (const TestMod.X) +3 (= slot₁/U (call core.TypeVar :U)) +4 slot₁/U +5 (call core.svec %₄) +6 (call core.svec :x) +7 (call core.svec) +8 (call core._structtype TestMod :X %₅ %₆ %₇ false 1) +9 (= slot₂/X %₈) +10 (call core._setsuper! %₈ core.Any) +11 (isdefined TestMod.X) +12 (gotoifnot %₁₁ label₂₇) +13 TestMod.X +14 (call core._equiv_typedef %₁₃ %₈) +15 (gotoifnot %₁₄ label₂₄) +16 TestMod.X +17 (= slot₂/X %₁₆) +18 TestMod.X +19 (call top.getproperty %₁₈ :body) +20 (call top.getproperty %₁₉ :parameters) +21 (call top.indexed_iterate %₂₀ 1) +22 (= slot₁/U (call core.getfield %₂₁ 1)) +23 (goto label₂₆) +24 slot₂/X +25 (= TestMod.X %₂₄) +26 (goto label₂₉) +27 slot₂/X +28 (= TestMod.X %₂₇) +29 slot₂/X +30 slot₁/U +31 (call core.svec %₃₀) +32 (call core._typebody! %₂₉ %₃₁) +33 slot₁/U +34 TestMod.X +35 slot₁/U +36 (call core.apply_type %₃₄ %₃₅) +37 (call core.apply_type core.Type %₃₆) +38 (call core.UnionAll %₃₃ %₃₇) +39 (call core.svec %₃₈ core.Any) +40 (call core.svec) +41 (call core.svec %₃₉ %₄₀ :($(QuoteNode(:(#= line 1 =#))))) +42 --- method core.nothing %₄₁ + 1 (call core.fieldtype slot₁/#ctor-self# 1) + 2 slot₂/x + 3 (= slot₃/tmp %₂) + 4 slot₃/tmp + 5 (call core.isa %₄ %₁) + 6 (gotoifnot %₅ label₈) + 7 (goto label₁₀) + 8 slot₃/tmp + 9 (= slot₃/tmp (call top.convert %₁ %₈)) + 10 slot₃/tmp + 11 (new slot₁/#ctor-self# %₁₀) + 12 (return %₁₁) +43 TestMod.X +44 (call core.apply_type core.Type %₄₃) +45 slot₁/U +46 (call core.svec %₄₄ %₄₅) +47 slot₁/U +48 (call core.svec %₄₇) +49 (call core.svec %₄₆ %₄₈ :($(QuoteNode(:(#= line 1 =#))))) +50 --- method core.nothing %₄₉ + 1 TestMod.X + 2 static_parameter₁ + 3 (call core.apply_type %₁ %₂) + 4 (new %₃ slot₂/x) + 5 (return %₄) +51 (return core.nothing) + +######################################## +# Struct with outer constructor where one typevar is constrained by the other +# See https://github.com/JuliaLang/julia/issues/27269) +struct X{T, S <: Vector{T}} + v::Vector{S} +end +#--------------------- +1 (global TestMod.X) +2 (const TestMod.X) +3 (= slot₃/T (call core.TypeVar :T)) +4 TestMod.Vector +5 slot₃/T +6 (call core.apply_type %₄ %₅) +7 (= slot₂/S (call core.TypeVar :S %₆)) +8 slot₃/T +9 slot₂/S +10 (call core.svec %₈ %₉) +11 (call core.svec :v) +12 (call core.svec) +13 (call core._structtype TestMod :X %₁₀ %₁₁ %₁₂ false 1) +14 (= slot₄/X %₁₃) +15 (call core._setsuper! %₁₃ core.Any) +16 (isdefined TestMod.X) +17 (gotoifnot %₁₆ label₃₇) +18 TestMod.X +19 (call core._equiv_typedef %₁₈ %₁₃) +20 (gotoifnot %₁₉ label₃₄) +21 TestMod.X +22 (= slot₄/X %₂₁) +23 TestMod.X +24 (call top.getproperty %₂₃ :body) +25 (call top.getproperty %₂₄ :body) +26 (call top.getproperty %₂₅ :parameters) +27 (call top.indexed_iterate %₂₆ 1) +28 (= slot₃/T (call core.getfield %₂₇ 1)) +29 (= slot₁/iterstate (call core.getfield %₂₇ 2)) +30 slot₁/iterstate +31 (call top.indexed_iterate %₂₆ 2 %₃₀) +32 (= slot₂/S (call core.getfield %₃₁ 1)) +33 (goto label₃₆) +34 slot₄/X +35 (= TestMod.X %₃₄) +36 (goto label₃₉) +37 slot₄/X +38 (= TestMod.X %₃₇) +39 slot₄/X +40 TestMod.Vector +41 slot₂/S +42 (call core.apply_type %₄₀ %₄₁) +43 (call core.svec %₄₂) +44 (call core._typebody! %₃₉ %₄₃) +45 slot₃/T +46 slot₂/S +47 TestMod.X +48 slot₃/T +49 slot₂/S +50 (call core.apply_type %₄₇ %₄₈ %₄₉) +51 (call core.apply_type core.Type %₅₀) +52 (call core.UnionAll %₄₆ %₅₁) +53 (call core.UnionAll %₄₅ %₅₂) +54 (call core.svec %₅₃ core.Any) +55 (call core.svec) +56 (call core.svec %₅₄ %₅₅ :($(QuoteNode(:(#= line 1 =#))))) +57 --- method core.nothing %₅₆ + 1 (call core.fieldtype slot₁/#ctor-self# 1) + 2 slot₂/v + 3 (= slot₃/tmp %₂) + 4 slot₃/tmp + 5 (call core.isa %₄ %₁) + 6 (gotoifnot %₅ label₈) + 7 (goto label₁₀) + 8 slot₃/tmp + 9 (= slot₃/tmp (call top.convert %₁ %₈)) + 10 slot₃/tmp + 11 (new slot₁/#ctor-self# %₁₀) + 12 (return %₁₁) +58 TestMod.X +59 (call core.apply_type core.Type %₅₈) +60 TestMod.Vector +61 slot₂/S +62 (call core.apply_type %₆₀ %₆₁) +63 (call core.svec %₅₉ %₆₂) +64 slot₃/T +65 slot₂/S +66 (call core.svec %₆₄ %₆₅) +67 (call core.svec %₆₃ %₆₆ :($(QuoteNode(:(#= line 1 =#))))) +68 --- method core.nothing %₆₇ + 1 TestMod.X + 2 static_parameter₁ + 3 static_parameter₂ + 4 (call core.apply_type %₁ %₂ %₃) + 5 (new %₄ slot₂/v) + 6 (return %₅) +69 (return core.nothing) + ######################################## # Error: Struct not at top level function f() diff --git a/JuliaLowering/test/utils.jl b/JuliaLowering/test/utils.jl index 4747b6994fab0..7367bf4613ff5 100644 --- a/JuliaLowering/test/utils.jl +++ b/JuliaLowering/test/utils.jl @@ -98,12 +98,20 @@ function desugar(mod::Module, src::String) JuliaLowering.expand_forms_2(ctx, ex) end +function uncomment_description(desc) + replace(desc, r"^# ?"m=>"") +end + +function comment_description(desc) + replace(desc, r"^"m=>"# ") +end + function match_ir_test_case(case_str) - m = match(r"# *([^\n]*)\n((?:.|\n)*)"m, strip(case_str)) + m = match(r"(^#(?:.|\n)*?)^([^#](?:.|\n)*)"m, strip(case_str)) if isnothing(m) error("Malformatted IR test case:\n$(repr(case_str))") end - description = strip(m[1]) + description = uncomment_description(m[1]) inout = split(m[2], r"#----*") input, output = length(inout) == 2 ? inout : length(inout) == 1 ? (inout[1], "") : @@ -153,7 +161,7 @@ function test_ir_cases(filename::AbstractString) output = format_ir_for_test(test_mod, input, expect_error) @testset "$description" begin if output != ref - # Do our own error dumping, as @test will + # Do additional error dumping, as @test will not format errors in a nice way @error "Test \"$description\" failed" output=Text(output) ref=Text(ref) end @test output == ref @@ -163,8 +171,11 @@ end """ Update all IR test cases in `filename` when the IR format has changed. + +When `pattern` is supplied, update only those tests where +`occursin(pattern, description)` is true. """ -function refresh_ir_test_cases(filename) +function refresh_ir_test_cases(filename, pattern=nothing) preamble, cases = read_ir_test_cases(filename) test_mod = Module(:TestMod) Base.include_string(test_mod, preamble) @@ -173,15 +184,19 @@ function refresh_ir_test_cases(filename) println(io, preamble, "\n") println(io, "#*******************************************************************************") end - for (expect_error, description,input,ref) in cases - ir = format_ir_for_test(test_mod, input, expect_error) - if ir != ref - @info "Refreshing test case $(repr(description)) in $filename" + for (expect_error, description, input, ref) in cases + if isnothing(pattern) || occursin(pattern, description) + ir = format_ir_for_test(test_mod, input, expect_error) + if ir != ref + @info "Refreshing test case $(repr(description)) in $filename" + end + else + ir = ref end println(io, """ ######################################## - # $description + $(comment_description(description)) $(strip(input)) #--------------------- $ir From d5852c920d397955f3948543bd9b63326dd02ba4 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Thu, 17 Oct 2024 12:51:15 +1000 Subject: [PATCH 0891/1109] Use scope layers for struct lowering rather than `K"alias_binding"` As much as alias_binding is a neat idea, it seems like using a scope layer to distinguish the global vs local bindings might be good enough and allow us to remove the alias_binding concept. As a side effect, this may allow us to avoid needing support arbitrary bindings in some early lowering code. --- JuliaLowering/src/ast.jl | 48 ++++++++++++++++-------- JuliaLowering/src/desugaring.jl | 55 +++++++++++++++------------- JuliaLowering/src/macro_expansion.jl | 18 --------- JuliaLowering/src/scope_analysis.jl | 2 + 4 files changed, 65 insertions(+), 58 deletions(-) diff --git a/JuliaLowering/src/ast.jl b/JuliaLowering/src/ast.jl index 7d9c9a89338dd..347c6c8bf2945 100644 --- a/JuliaLowering/src/ast.jl +++ b/JuliaLowering/src/ast.jl @@ -489,19 +489,46 @@ function copy_ast(ctx, ex) return ex2 end +#------------------------------------------------------------------------------- +function set_scope_layer(ctx, ex, layer_id, force) + k = kind(ex) + scope_layer = force ? layer_id : get(ex, :scope_layer, layer_id) + if k == K"module" || k == K"toplevel" || k == K"inert" + makenode(ctx, ex, ex, children(ex); + scope_layer=scope_layer) + elseif k == K"." + makenode(ctx, ex, ex, set_scope_layer(ctx, ex[1], layer_id, force), ex[2], + scope_layer=scope_layer) + elseif !is_leaf(ex) + mapchildren(e->set_scope_layer(ctx, e, layer_id, force), ctx, ex; + scope_layer=scope_layer) + else + makeleaf(ctx, ex, ex; + scope_layer=scope_layer) + end +end + """ adopt_scope(ex, ref) Copy `ex`, adopting the scope layer of `ref`. """ -function adopt_scope(ex, scope_layer::LayerId) +function adopt_scope(ex::SyntaxTree, scope_layer::LayerId) set_scope_layer(ex, ex, scope_layer, true) end -function adopt_scope(ex, ref::SyntaxTree) +function adopt_scope(ex::SyntaxTree, ref::SyntaxTree) adopt_scope(ex, ref.scope_layer) end +function adopt_scope(exs::SyntaxList, ref) + out = SyntaxList(syntax_graph(exs)) + for e in exs + push!(out, adopt_scope(e, ref)) + end + return out +end + #------------------------------------------------------------------------------- # Predicates and accessors working on expression trees @@ -586,22 +613,13 @@ function to_symbol(ctx, ex) @ast ctx ex ex=>K"Symbol" end -function new_scope_layer(ctx) +function new_scope_layer(ctx, mod_ref::Module=ctx.mod) new_layer = ScopeLayer(length(ctx.scope_layers)+1, ctx.mod, true) push!(ctx.scope_layers, new_layer) new_layer.id end -# Create new local variable names with the same names as `names`, but with a -# new scope_layer so that they become independent variables during scope -# resolution. -function similar_identifiers(ctx, names) - scope_layer = new_scope_layer(ctx) - new_names = SyntaxList(ctx) - for name in names - @assert kind(name) == K"Identifier" - push!(new_names, makeleaf(ctx, name, name, kind=K"Identifier", scope_layer=scope_layer)) - end - new_names +function new_scope_layer(ctx, mod_ref::SyntaxTree) + @assert kind(mod_ref) == K"Identifier" + new_scope_layer(ctx, ctx.scope_layers[mod_ref.scope_layer].mod) end - diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 932d8a743b9dc..4c22ef1f81874 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -1690,7 +1690,7 @@ function _new_call_convert_arg(ctx, full_struct_type, field_type, field_index, v ] end -function default_inner_constructors(ctx, srcref, outer_struct_var, +function default_inner_constructors(ctx, srcref, global_struct_name, typevar_names, field_names, field_types) # TODO: Consider using srcref = @HERE ? exact_ctor = if isempty(typevar_names) @@ -1698,11 +1698,11 @@ function default_inner_constructors(ctx, srcref, outer_struct_var, field_decls = SyntaxList(ctx) @ast ctx srcref [K"function" [K"call" - [K"::" [K"curly" "Type"::K"core" outer_struct_var]] + [K"::" [K"curly" "Type"::K"core" global_struct_name]] [[K"::" n t] for (n,t) in zip(field_names, field_types)]... ] [K"new" - outer_struct_var + global_struct_name field_names... ] ] @@ -1720,13 +1720,13 @@ function default_inner_constructors(ctx, srcref, outer_struct_var, [K"::" ctor_self if isempty(typevar_names) - [K"curly" "Type"::K"core" outer_struct_var] + [K"curly" "Type"::K"core" global_struct_name] else [K"where" [K"curly" "Type"::K"core" [K"curly" - outer_struct_var + global_struct_name typevar_names... ] ] @@ -1777,22 +1777,22 @@ end # new(X{U,V}, x, y) # end # -function default_outer_constructor(ctx, srcref, outer_struct_var, +function default_outer_constructor(ctx, srcref, global_struct_name, typevar_names, field_names, field_types) @ast ctx srcref [K"function" [K"where" [K"call" - # We use `::Type{$outer_struct_var}` here rather than just - # `struct_name` because outer_struct_var is a binding to a + # We use `::Type{$global_struct_name}` here rather than just + # `struct_name` because global_struct_name is a binding to a # type - we know we're not creating a new `Function` and # there's no reason to emit the 1-arg `Expr(:method, name)` in # the next phase of expansion. - [K"::" [K"curly" "Type"::K"core" outer_struct_var]] + [K"::" [K"curly" "Type"::K"core" global_struct_name]] [[K"::" n t] for (n,t) in zip(field_names, field_types)]... ] [K"_typevars" typevar_names...] ] - [K"new" [K"curly" outer_struct_var typevar_names...] field_names...] + [K"new" [K"curly" global_struct_name typevar_names...] field_names...] ] end @@ -1839,17 +1839,22 @@ function expand_struct_def(ctx, ex, docs) is_mutable = has_flags(ex, JuliaSyntax.MUTABLE_FLAG) min_initialized = min(_constructor_min_initalized(constructors), length(field_names)) newtype_var = ssavar(ctx, ex, "struct_type") - outer_struct_var = alias_binding(ctx, struct_name) + layer = new_scope_layer(ctx, struct_name) + global_struct_name = adopt_scope(struct_name, layer) if !isempty(typevar_names) # Generate expression like `prev_struct.body.body.parameters` - prev_typevars = outer_struct_var + prev_typevars = global_struct_name for _ in 1:length(typevar_names) prev_typevars = @ast ctx type_sig [K"." prev_typevars "body"::K"Symbol"] end prev_typevars = @ast ctx type_sig [K"." prev_typevars "parameters"::K"Symbol"] end - field_names_2 = similar_identifiers(ctx, field_names) + # New local variable names for constructor args to avoid clashing with any + # type names + if isempty(constructors) + field_names_2 = adopt_scope(field_names, layer) + end need_outer_constructor = false if isempty(constructors) && !isempty(typevar_names) @@ -1895,12 +1900,11 @@ function expand_struct_def(ctx, ex, docs) # typevars when "redefining" structs. # See https://github.com/JuliaLang/julia/pull/36121 @ast ctx ex [K"block" - [K"global" struct_name] - [K"const" struct_name] - [K"alias_binding" outer_struct_var struct_name] [K"assert" "toplevel_only"::K"Symbol" [K"inert" ex] ] [K"scope_block"(scope_type=:hard) [K"block" + [K"global" global_struct_name] + [K"const" global_struct_name] [K"local_def" struct_name] typevar_stmts... [K"=" @@ -1919,14 +1923,14 @@ function expand_struct_def(ctx, ex, docs) [K"=" struct_name newtype_var] [K"call"(supertype) "_setsuper!"::K"core" newtype_var supertype] [K"if" - [K"isdefined" outer_struct_var] + [K"isdefined" global_struct_name] [K"if" - [K"call" "_equiv_typedef"::K"core" outer_struct_var newtype_var] + [K"call" "_equiv_typedef"::K"core" global_struct_name newtype_var] [K"block" # If this is compatible with an old definition, use # the existing type object and throw away # NB away the new type - [K"=" struct_name outer_struct_var] + [K"=" struct_name global_struct_name] if !isempty(typevar_names) # And resassign the typevar_names - these may be # referenced in the definition of the field @@ -1938,29 +1942,30 @@ function expand_struct_def(ctx, ex, docs) end ] # Otherwise do an assignment to trigger an error - [K"=" outer_struct_var struct_name] + [K"=" global_struct_name struct_name] ] - [K"=" outer_struct_var struct_name] + [K"=" global_struct_name struct_name] ] [K"call"(type_body) "_typebody!"::K"core" struct_name [K"call" "svec"::K"core" field_types...] ] - # Inner constructors + # Default constructors if isempty(constructors) - default_inner_constructors(ctx, ex, outer_struct_var, + default_inner_constructors(ctx, ex, global_struct_name, typevar_names, field_names_2, field_types) else TODO(ex, "Convert new-calls to new-expressions in user-defined constructors") end if need_outer_constructor - default_outer_constructor(ctx, ex, outer_struct_var, + default_outer_constructor(ctx, ex, global_struct_name, typevar_names, field_names_2, field_types) end ] ] - + + # Documentation if !isnothing(docs) || !isempty(field_docs) [K"call"(isnothing(docs) ? ex : docs) bind_docs!::K"Value" diff --git a/JuliaLowering/src/macro_expansion.jl b/JuliaLowering/src/macro_expansion.jl index 5d14d900889e6..fd94d156227ae 100644 --- a/JuliaLowering/src/macro_expansion.jl +++ b/JuliaLowering/src/macro_expansion.jl @@ -113,24 +113,6 @@ function Base.showerror(io::IO, exc::MacroExpansionError) highlight(io, src.file, byterange, note=exc.msg) end -function set_scope_layer(ctx, ex, layer_id, force) - k = kind(ex) - scope_layer = force ? layer_id : get(ex, :scope_layer, layer_id) - if k == K"module" || k == K"toplevel" || k == K"inert" - makenode(ctx, ex, ex, children(ex); - scope_layer=scope_layer) - elseif k == K"." - makenode(ctx, ex, ex, set_scope_layer(ctx, ex[1], layer_id, force), ex[2], - scope_layer=scope_layer) - elseif !is_leaf(ex) - mapchildren(e->set_scope_layer(ctx, e, layer_id, force), ctx, ex; - scope_layer=scope_layer) - else - makeleaf(ctx, ex, ex; - scope_layer=scope_layer) - end -end - function eval_macro_name(ctx, ex) # `ex1` might contain a nontrivial mix of scope layers so we can't just # `eval()` it, as it's already been partially lowered by this point. diff --git a/JuliaLowering/src/scope_analysis.jl b/JuliaLowering/src/scope_analysis.jl index e8be823968575..c1c6a43b70329 100644 --- a/JuliaLowering/src/scope_analysis.jl +++ b/JuliaLowering/src/scope_analysis.jl @@ -349,6 +349,8 @@ function analyze_scope(ctx, ex, scope_type, is_toplevel_global_scope=false, end end + # TODO: Remove alias bindings? Dynamically generated scope layers are + # simpler and probably sufficient? for (varkey, id) in alias_bindings @assert !haskey(ctx.alias_map, id) ctx.alias_map[id] = get(var_ids, varkey) do From 13af6adb02ce86d7655df916cbc979070dd687b0 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sat, 19 Oct 2024 14:15:35 +1000 Subject: [PATCH 0892/1109] Rewrite new() calls for inner constructors and functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Detangling this ball of string ... felt quite epic 😬😅 Here we take a different approach from the flisp code - we don't try to reproduce the function signature matching logic of `expand_function_def` to rewrite constructor signatures within the struct expansion code. Instead, we harness that existing logic by calling expand_function_def with custom rewrite functions for the inner part of the signature expression and the function body where `new()` occurs. --- JuliaLowering/Project.toml | 4 +- JuliaLowering/src/ast.jl | 7 +- JuliaLowering/src/desugaring.jl | 270 ++++++++++++++++++--- JuliaLowering/src/eval.jl | 17 +- JuliaLowering/src/kinds.jl | 3 +- JuliaLowering/src/linear_ir.jl | 4 +- JuliaLowering/src/runtime.jl | 17 +- JuliaLowering/test/demo.jl | 7 +- JuliaLowering/test/functions.jl | 14 ++ JuliaLowering/test/functions_ir.jl | 24 +- JuliaLowering/test/typedefs.jl | 106 ++++++++ JuliaLowering/test/typedefs_ir.jl | 374 +++++++++++++++++++++++++++++ JuliaLowering/test/utils.jl | 25 ++ 13 files changed, 815 insertions(+), 57 deletions(-) diff --git a/JuliaLowering/Project.toml b/JuliaLowering/Project.toml index f781c0888d4a0..72362a643b714 100644 --- a/JuliaLowering/Project.toml +++ b/JuliaLowering/Project.toml @@ -11,7 +11,9 @@ julia = "1" [extras] Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" +Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a" FileWatching = "7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee" +REPL = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" [targets] -test = ["Test", "FileWatching"] +test = ["Test", "FileWatching", "Markdown", "REPL"] diff --git a/JuliaLowering/src/ast.jl b/JuliaLowering/src/ast.jl index 347c6c8bf2945..17fc6bf77fdf9 100644 --- a/JuliaLowering/src/ast.jl +++ b/JuliaLowering/src/ast.jl @@ -147,10 +147,11 @@ end #------------------------------------------------------------------------------- # AST creation utilities -_node_id(ex::NodeId) = ex -_node_id(ex::SyntaxTree) = ex._id - _node_id(graph::SyntaxGraph, ex::SyntaxTree) = (check_compatible_graph(graph, ex); ex._id) +function _node_id(graph::SyntaxGraph, ex) + # Fallback to give a comprehensible error message for use with the @ast macro + error("Attempt to use `$(repr(ex))` of type `$(typeof(ex))` as an AST node. Try annotating with `::K\"your_intended_kind\"?`") +end _node_ids(graph::SyntaxGraph) = () _node_ids(graph::SyntaxGraph, ::Nothing, cs...) = _node_ids(graph, cs...) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 4c22ef1f81874..eb47d985e978e 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -1272,7 +1272,7 @@ function _split_wheres!(ctx, typevar_names, typevar_stmts, ex) end end -function expand_function_def(ctx, ex, docs) +function expand_function_def(ctx, ex, docs, rewrite_call=identity, rewrite_body=identity) @chk numchildren(ex) in (1,2) name = ex[1] if numchildren(ex) == 1 && is_identifier_like(name) @@ -1302,10 +1302,9 @@ function expand_function_def(ctx, ex, docs) return_type = name[2] name = name[1] end - + if kind(name) == K"call" - callex = name - body = ex[2] + callex = rewrite_call(name) # TODO # nospecialize # argument destructuring @@ -1367,6 +1366,7 @@ function expand_function_def(ctx, ex, docs) pushfirst!(arg_names, farg_name) pushfirst!(arg_types, farg_type) + body = rewrite_body(ex[2]) if !isnothing(return_type) ret_var = ssavar(ctx, return_type, "return_type") body = @ast ctx body [ @@ -1381,8 +1381,8 @@ function expand_function_def(ctx, ex, docs) method_table = nothing_(ctx, name) # TODO: method overlays @ast ctx ex [K"scope_block"(scope_type=:hard) [K"block" - [K"=" func_self func_self_val] typevar_stmts... + [K"=" func_self func_self_val] # metadata contains svec(types, sparms, location) method_metadata := [K"call"(callex) "svec" ::K"core" @@ -1639,11 +1639,11 @@ function _match_struct_field(x0) end end -function _collect_struct_fields(ctx, field_names, field_types, field_attrs, field_docs, constructors, exs) +function _collect_struct_fields(ctx, field_names, field_types, field_attrs, field_docs, inner_defs, exs) for e in exs if kind(e) == K"block" _collect_struct_fields(ctx, field_names, field_types, field_attrs, field_docs, - constructors, children(e)) + inner_defs, children(e)) elseif kind(e) == K"=" throw(LoweringError(e, "assignment syntax in structure fields is reserved")) else @@ -1666,8 +1666,9 @@ function _collect_struct_fields(ctx, field_names, field_types, field_attrs, fiel push!(field_docs, @ast ctx e m.docs) end else - # Inner constructors - push!(constructors, e) + # Inner constructors and inner functions + # TODO: Disallow arbitrary expressions inside `struct`? + push!(inner_defs, e) end end end @@ -1802,24 +1803,234 @@ function _new_call(ctx, ex, typevar_names, field_names, field_types) end end -function _rewrite_constructor_new_calls(ctx, ex, typevar_names, field_names, field_types) +function _is_new_call(ex) + kind(ex) == K"call" && + ((kind(ex[1]) == K"Identifier" && ex[1].name_val == "new") || + (kind(ex[1]) == K"curly" && kind(ex[1][1]) == K"Identifier" && ex[1][1].name_val == "new")) +end + +# Rewrite inner constructor signatures for struct `X` from `X(...)` +# to `(ctor_self::Type{X})(...)` +function _rewrite_ctor_sig(ctx, callex, struct_name, global_struct_name, struct_typevars, ctor_self) + @assert kind(callex) == K"call" + name = callex[1] + if is_same_identifier_like(struct_name, name) + # X(x,y) ==> (#ctor-self#::Type{X})(x,y) + ctor_self[] = new_mutable_var(ctx, callex, "#ctor-self#"; kind=:argument) + @ast ctx callex [K"call" + [K"::" + ctor_self[] + [K"curly" "Type"::K"core" global_struct_name] + ] + callex[2:end]... + ] + elseif kind(name) == K"curly" && is_same_identifier_like(struct_name, name[1]) + # X{T}(x,y) ==> (#ctor-self#::Type{X{T}})(x,y) + self = new_mutable_var(ctx, callex, "#ctor-self#"; kind=:argument) + if numchildren(name) - 1 == length(struct_typevars) + # Self fully parameterized - can be used as the full type to + # rewrite new() calls in constructor body. + ctor_self[] = self + end + @ast ctx callex [K"call" + [K"::" + self + [K"curly" + "Type"::K"core" + [K"curly" + global_struct_name + name[2:end]... + ] + ] + ] + callex[2:end]... + ] + else + callex + end +end + +# Rewrite calls to `new` in bodies of inner constructors and inner functions +# into `new` or `splatnew` expressions. For example: +# +# struct X{T,S} +# X() = new() +# X() = new{A,B}() +# X{T,S}() where {T,S} = new() +# X{A,B}() = new() +# X{A}() = new() +# (t::Type{X})() = new{A,B}() +# f() = new() +# f() = new{A,B}() +# f() = new{Ts...}() +# end +# +# Map to the following +# +# X() = ERROR +# (#ctor-self#::Type{X})() = (new X{A,B}) +# (Type{X{T,S}}() where {T,S} = (new #ctor-self#) +# X{A,B}() = (new #ctor-self#) +# X{A}() = ERROR +# (t::Type{X})() = (new X{A,B}) +# f() = ERROR +# f() = (new X{A,B}) +# f() = (new X{Ts...}) +# +# TODO: Arguably the following "could also work", but any symbolic match of +# this case would be heuristic and rely on assuming Type == Core.Type. So +# runtime checks would really be required and flisp lowering doesn't catch +# this case either. +# +# (t::Type{X{A,B}})() = new() +function _rewrite_ctor_new_calls(ctx, ex, struct_name, global_struct_name, ctor_self, + struct_typevars, field_types) + if is_leaf(ex) + return ex + elseif !_is_new_call(ex) + return mapchildren( + e->_rewrite_ctor_new_calls(ctx, e, struct_name, global_struct_name, + ctor_self, struct_typevars, field_types), + ctx, ex + ) + end + # Rewrite a call to new() + kw_arg_i = findfirst(e->(k = kind(e); k == K"=" || k == K"parameters"), children(ex)) + if !isnothing(kw_arg_i) + throw(LoweringError(ex[kw_arg_i], "`new` does not accept keyword arguments")) + end + full_struct_type = if kind(ex[1]) == K"curly" + # new{A,B}(...) + new_type_params = ex[1][2:end] + n_type_splat = sum(kind(t) == K"..." for t in new_type_params) + n_type_nonsplat = length(new_type_params) - n_type_splat + if n_type_splat == 0 && n_type_nonsplat < length(struct_typevars) + throw(LoweringError(ex[1], "too few type parameters specified in `new{...}`")) + elseif n_type_nonsplat > length(struct_typevars) + throw(LoweringError(ex[1], "too many type parameters specified in `new{...}`")) + end + @ast ctx ex[1] [K"curly" global_struct_name new_type_params...] + elseif !isnothing(ctor_self) + # new(...) in constructors + ctor_self + else + # new(...) inside non-constructor inner functions + if isempty(struct_typevars) + global_struct_name + else + throw(LoweringError(ex[1], "too few type parameters specified in `new`")) + end + end + new_args = ex[2:end] + n_splat = sum(kind(t) == K"..." for t in new_args) + n_nonsplat = length(new_args) - n_splat + n_fields = length(field_types) + function throw_n_fields_error(desc) + @ast ctx ex [K"call" + "throw"::K"core" + [K"call" + "ArgumentError"::K"top" + "too $desc arguments in `new` (expected $n_fields)"::K"String" + ] + ] + end + if n_nonsplat > n_fields + return throw_n_fields_error("many") + else + # "Too few" args are allowed in partially initialized structs + end + if n_splat == 0 + @ast ctx ex [K"block" + struct_type := full_struct_type + [K"new" + struct_type + [_new_call_convert_arg(ctx, struct_type, type, i, name) + for (i, (name,type)) in enumerate(zip(ex[2:end], field_types))]... + ] + ] + else + fields_all_Any = all(kind(ft) == K"core" && ft.name_val == "Any" for ft in field_types) + if fields_all_Any + @ast ctx ex [K"block" + struct_type := full_struct_type + [K"splatnew" + struct_type + # Note: `jl_new_structt` ensures length of this tuple is + # exactly the number of fields. + [K"call" "tuple"::K"core" ex[2:end]...] + ] + ] + else + # `new` with splatted args which are symbolically not `Core.Any` + # (might be `Any` at runtime but we can't know that here.) + @ast ctx ex [K"block" + args := [K"call" "tuple"::K"core" ex[2:end]...] + n_args := [K"call" "nfields"::K"core" args] + [K"if" + [K"call" "ult_int"::K"top" n_args n_fields::K"Integer"] + throw_n_fields_error("few") + ] + [K"if" + [K"call" "ult_int"::K"top" n_fields::K"Integer" n_args] + throw_n_fields_error("many") + ] + struct_type := full_struct_type + [K"new" + struct_type + [_new_call_convert_arg(ctx, struct_type, type, i, + [K"call" "getfield"::K"core" args i::K"Integer"]) + for (i, type) in enumerate(field_types)]... + ] + ] + end + end +end + +# Rewrite calls to `new( ... )` to `new` expressions on the appropriate +# type, determined by the containing type and constructor definitions. +# +# This is mainly for constructors, but also needs to work for inner functions +# which may call new() but are not constructors. +function rewrite_new_calls(ctx, ex, struct_name, global_struct_name, + typevar_names, field_names, field_types) + if kind(ex) == K"doc" + docs = ex[1] + ex = ex[2] + else + docs = nothing + end + if kind(ex) != K"function" + return ex + end + if !(numchildren(ex) == 2 && is_eventually_call(ex[1])) + throw(LoweringError(ex, "Expected constructor or named inner function")) + end + + ctor_self = Ref{Union{Nothing,SyntaxTree}}(nothing) + expand_function_def(ctx, ex, docs, + callex->_rewrite_ctor_sig(ctx, callex, struct_name, + global_struct_name, typevar_names, ctor_self), + body->_rewrite_ctor_new_calls(ctx, body, struct_name, global_struct_name, + ctor_self[], typevar_names, field_types) + ) end function _constructor_min_initalized(ex::SyntaxTree) - if kind(ex) == K"call" && ((kind(ex[1]) == K"Identifier" && ex[1].name_val == "new") || - (kind(ex[1]) == K"curly" && kind(ex[1][1]) == K"Identifier" && ex[1][1].name_val == "new")) - numchildren(ex) - 1 + if _is_new_call(ex) + if any(kind(e) == K"..." for e in ex[2:end]) + # Lowering ensures new with splats always inits all fields + # or in the case of splatnew this is enforced by the runtime. + typemax(Int) + else + numchildren(ex) - 1 + end elseif !is_leaf(ex) - _constructor_min_initalized(children(ex)) + minimum((_constructor_min_initalized(e) for e in children(ex)), init=typemax(Int)) else typemax(Int) end end -function _constructor_min_initalized(exs::AbstractVector) - minimum((_constructor_min_initalized(e) for e in exs), init=typemax(Int)) -end - function expand_struct_def(ctx, ex, docs) @chk numchildren(ex) == 2 type_sig = ex[1] @@ -1833,11 +2044,12 @@ function expand_struct_def(ctx, ex, docs) field_types = SyntaxList(ctx) field_attrs = SyntaxList(ctx) field_docs = SyntaxList(ctx) - constructors = SyntaxList(ctx) + inner_defs = SyntaxList(ctx) _collect_struct_fields(ctx, field_names, field_types, field_attrs, field_docs, - constructors, children(type_body)) + inner_defs, children(type_body)) is_mutable = has_flags(ex, JuliaSyntax.MUTABLE_FLAG) - min_initialized = min(_constructor_min_initalized(constructors), length(field_names)) + min_initialized = minimum((_constructor_min_initalized(e) for e in inner_defs), + init=length(field_names)) newtype_var = ssavar(ctx, ex, "struct_type") layer = new_scope_layer(ctx, struct_name) global_struct_name = adopt_scope(struct_name, layer) @@ -1852,12 +2064,12 @@ function expand_struct_def(ctx, ex, docs) # New local variable names for constructor args to avoid clashing with any # type names - if isempty(constructors) + if isempty(inner_defs) field_names_2 = adopt_scope(field_names, layer) end need_outer_constructor = false - if isempty(constructors) && !isempty(typevar_names) + if isempty(inner_defs) && !isempty(typevar_names) # To generate an outer constructor each struct type parameter must be # able to be inferred from the list of fields passed as constuctor # arguments. @@ -1928,8 +2140,8 @@ function expand_struct_def(ctx, ex, docs) [K"call" "_equiv_typedef"::K"core" global_struct_name newtype_var] [K"block" # If this is compatible with an old definition, use - # the existing type object and throw away - # NB away the new type + # the existing type object and throw away the new + # type [K"=" struct_name global_struct_name] if !isempty(typevar_names) # And resassign the typevar_names - these may be @@ -1952,11 +2164,15 @@ function expand_struct_def(ctx, ex, docs) [K"call" "svec"::K"core" field_types...] ] # Default constructors - if isempty(constructors) + if isempty(inner_defs) default_inner_constructors(ctx, ex, global_struct_name, typevar_names, field_names_2, field_types) else - TODO(ex, "Convert new-calls to new-expressions in user-defined constructors") + map!(inner_defs, inner_defs) do def + rewrite_new_calls(ctx, def, struct_name, global_struct_name, + typevar_names, field_names, field_types) + end + [K"block" inner_defs...] end if need_outer_constructor default_outer_constructor(ctx, ex, global_struct_name, diff --git a/JuliaLowering/src/eval.jl b/JuliaLowering/src/eval.jl index ce05037f89387..46d3202a1b19b 100644 --- a/JuliaLowering/src/eval.jl +++ b/JuliaLowering/src/eval.jl @@ -245,14 +245,15 @@ function to_lowered_expr(mod, ex) # primitive_type global const new splatnew isdefined # enter leave pop_exception inbounds boundscheck loopinfo copyast meta # foreigncall new_opaque_closure lambda - head = k == K"call" ? :call : - k == K"new" ? :new : - k == K"=" ? :(=) : - k == K"global" ? :global : - k == K"const" ? :const : - k == K"leave" ? :leave : - k == K"pop_exception" ? :pop_exception : - k == K"isdefined" ? :isdefined : + head = k == K"call" ? :call : + k == K"new" ? :new : + k == K"splatnew" ? :splatnew : + k == K"=" ? :(=) : + k == K"global" ? :global : + k == K"const" ? :const : + k == K"leave" ? :leave : + k == K"isdefined" ? :isdefined : + k == K"pop_exception" ? :pop_exception : nothing if isnothing(head) TODO(ex, "Unhandled form for kind $k") diff --git a/JuliaLowering/src/kinds.jl b/JuliaLowering/src/kinds.jl index e7da05bf04667..9b49b2c79ac0a 100644 --- a/JuliaLowering/src/kinds.jl +++ b/JuliaLowering/src/kinds.jl @@ -29,8 +29,9 @@ function _register_kinds() "symbolic_label" # Goto named label "symbolic_goto" - # Internal initializer for structures, called from inner constructor + # Internal initializer for struct types, for inner constructors/functions "new" + "splatnew" # Catch-all for additional syntax extensions without the need to # extend `Kind`. Known extensions include: # locals, islocal diff --git a/JuliaLowering/src/linear_ir.jl b/JuliaLowering/src/linear_ir.jl index 1c34b6b46f668..39c2159ac7b9a 100644 --- a/JuliaLowering/src/linear_ir.jl +++ b/JuliaLowering/src/linear_ir.jl @@ -584,8 +584,8 @@ function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) end nothing end - elseif k == K"call" || k == K"new" - # TODO k ∈ splatnew foreigncall cfunction new_opaque_closure cglobal + elseif k == K"call" || k == K"new" || k == K"splatnew" + # TODO k ∈ foreigncall cfunction new_opaque_closure cglobal args = compile_args(ctx, children(ex)) callex = makenode(ctx, ex, k, args) if in_tail_pos diff --git a/JuliaLowering/src/runtime.jl b/JuliaLowering/src/runtime.jl index 2fd3018d98e4d..6231ecf1c7a87 100644 --- a/JuliaLowering/src/runtime.jl +++ b/JuliaLowering/src/runtime.jl @@ -165,7 +165,7 @@ Base.@assume_effects :removable :nothrow function current_exception() @ccall jl_current_exception(current_task()::Any)::Any end -function bind_docs!(f::Function, docstr, method_metadata) +function _bind_func_docs!(f, docstr, method_metadata::Core.SimpleVector) mod = parentmodule(f) bind = Base.Docs.Binding(mod, nameof(f)) full_sig = method_metadata[1] @@ -181,7 +181,20 @@ function bind_docs!(f::Function, docstr, method_metadata) Docs.doc!(mod, bind, Base.Docs.docstr(docstr, metadata), arg_sig) end -function bind_docs!(type::Type, docstr, lineno; field_docs=Core.svec()) +function bind_docs!(f::Function, docstr, method_metadata::Core.SimpleVector) + _bind_func_docs!(f, docstr, method_metadata) +end + +# Document constructors +function bind_docs!(::Type{Type{T}}, docstr, method_metadata::Core.SimpleVector) where T + _bind_func_docs!(T, docstr, method_metadata) +end + +function bind_docs!(type::Type, docstr, method_metadata::Core.SimpleVector) + _bind_func_docs!(type, docstr, method_metadata) +end + +function bind_docs!(type::Type, docstr, lineno::LineNumberNode; field_docs=Core.svec()) mod = parentmodule(type) bind = Base.Docs.Binding(mod, nameof(type)) metadata = Dict{Symbol, Any}( diff --git a/JuliaLowering/test/demo.jl b/JuliaLowering/test/demo.jl index d09d9cbccfa19..921f1ba756b49 100644 --- a/JuliaLowering/test/demo.jl +++ b/JuliaLowering/test/demo.jl @@ -561,7 +561,12 @@ end """ src = """ -struct X +struct S9{T} + x + y + + "Docs for S9" + S9{Int}(xs) = new(xs...) end """ diff --git a/JuliaLowering/test/functions.jl b/JuliaLowering/test/functions.jl index c6da226834478..7f392520ceab5 100644 --- a/JuliaLowering/test/functions.jl +++ b/JuliaLowering/test/functions.jl @@ -100,6 +100,20 @@ begin end """) === ("fallback", (Number, Float64), (Int, Int), "fallback") +Base.eval(test_mod, +:(struct X1{T} end) +) + +# `where` params used in function obj type +@test JuliaLowering.include_string(test_mod, """ +begin + function (x::X1{T})() where T + T + end + X1{Int}()() +end +""") === Int + Base.include_string(test_mod, """ struct X end diff --git a/JuliaLowering/test/functions_ir.jl b/JuliaLowering/test/functions_ir.jl index 6bef6e2f98b88..204ef2fa95c75 100644 --- a/JuliaLowering/test/functions_ir.jl +++ b/JuliaLowering/test/functions_ir.jl @@ -102,11 +102,11 @@ function f(::T, ::U, ::V) where T where {U,V} (T,U,V) end #--------------------- -1 (method :f) -2 (= slot₂/U (call core.TypeVar :U)) -3 (= slot₃/V (call core.TypeVar :V)) -4 (= slot₁/T (call core.TypeVar :T)) -5 (call core.Typeof %₁) +1 (= slot₂/U (call core.TypeVar :U)) +2 (= slot₃/V (call core.TypeVar :V)) +3 (= slot₁/T (call core.TypeVar :T)) +4 (method :f) +5 (call core.Typeof %₄) 6 slot₁/T 7 slot₂/U 8 slot₃/V @@ -122,7 +122,7 @@ end 3 static_parameter₂ 4 (call core.tuple %₁ %₂ %₃) 5 (return %₄) -16 (return %₁) +16 (return %₄) ######################################## # Static parameter with bounds and used with apply_type in argument @@ -130,11 +130,11 @@ function f(::S{T}) where X <: T <: Y T end #--------------------- -1 (method :f) -2 TestMod.X -3 TestMod.Y -4 (= slot₁/T (call core.TypeVar :T %₂ %₃)) -5 (call core.Typeof %₁) +1 TestMod.X +2 TestMod.Y +3 (= slot₁/T (call core.TypeVar :T %₁ %₂)) +4 (method :f) +5 (call core.Typeof %₄) 6 TestMod.S 7 slot₁/T 8 (call core.apply_type %₆ %₇) @@ -145,7 +145,7 @@ end 13 --- method core.nothing %₁₂ 1 static_parameter₁ 2 (return %₁) -14 (return %₁) +14 (return %₄) ######################################## # Error: Duplicate function argument names diff --git a/JuliaLowering/test/typedefs.jl b/JuliaLowering/test/typedefs.jl index 92a0202d01ee6..ebb4e0d81a323 100644 --- a/JuliaLowering/test/typedefs.jl +++ b/JuliaLowering/test/typedefs.jl @@ -123,6 +123,112 @@ let s = test_mod.S5{Any}(42.0, "hi") @test s.y == "hi" end +# User defined inner constructors and helper functions for structs without type params +@test JuliaLowering.include_string(test_mod, """ +struct S6 + x + S6_f() = new(42) + + "some docs" + S6() = S6_f() + S6(x) = new(x) +end +""") === nothing +let s = test_mod.S6() + @test s isa test_mod.S6 + @test s.x === 42 +end +let s = test_mod.S6(2) + @test s isa test_mod.S6 + @test s.x === 2 +end +@test docstrings_equal(@doc(test_mod.S6), Markdown.doc"some docs") + +# User defined inner constructors and helper functions for structs with type params +@test JuliaLowering.include_string(test_mod, """ +struct S7{S,T} + x::S + y + + # Cases where full struct type may be deduced and used in body + S7{Int,String}() = new(10.0, "y1") + S7{S,T}() where {S,T} = new(10.0, "y2") + S7{Int,T}() where {T} = new(10.0, "y3") + (::Type{S7{Int,UInt8}})() = new{Int,UInt8}(10.0, "y4") + + # Cases where new{...} is called + S7() = new{Int,Int}(10.0, "y5") + S7{UInt8}() = S7_f() + S7_f() = new{UInt8,UInt8}(10.0, "y6") +end +""") === nothing +let s = test_mod.S7{Int,String}() + @test s isa test_mod.S7{Int,String} + @test s.x === 10 + @test s.y === "y1" +end +let s = test_mod.S7{UInt16,UInt16}() + @test s isa test_mod.S7{UInt16,UInt16} + @test s.x === UInt16(10) + @test s.y === "y2" +end +let s = test_mod.S7{Int,UInt16}() + @test s isa test_mod.S7{Int,UInt16} + @test s.x === 10 + @test s.y === "y3" +end +let s = test_mod.S7{Int,UInt8}() + @test s isa test_mod.S7{Int,UInt8} + @test s.x === 10 + @test s.y === "y4" +end +let s = test_mod.S7() + @test s isa test_mod.S7{Int,Int} + @test s.x === 10 + @test s.y === "y5" +end +let s = test_mod.S7{UInt8}() + @test s isa test_mod.S7{UInt8,UInt8} + @test s.x === UInt8(10) + @test s.y === "y6" +end + +# new() with splats and typed fields +@test JuliaLowering.include_string(test_mod, """ +struct S8 + x::Int + y::Float64 + + S8(xs, ys) = new(xs..., ys...) +end +""") === nothing +let s = test_mod.S8((10.0,), (20,)) + @test s isa test_mod.S8 + @test s.x === 10 + @test s.y === 20.0 +end +# Wrong number of args checked by lowering +@test_throws ArgumentError test_mod.S8((1,), ()) +@test_throws ArgumentError test_mod.S8((1,2,3), ()) + +# new() with splats and untyped fields +@test JuliaLowering.include_string(test_mod, """ +struct S9 + x + y + + S9(xs) = new(xs...) +end +""") === nothing +let s = test_mod.S9((10.0,20)) + @test s isa test_mod.S9 + @test s.x === 10.0 + @test s.y === 20 +end +# Wrong number of args checked by the runtime +@test_throws ArgumentError test_mod.S9((1,)) +@test_throws ArgumentError test_mod.S9((1,2,3)) + # Test cases from # https://github.com/JuliaLang/julia/issues/36104 # https://github.com/JuliaLang/julia/pull/36121 diff --git a/JuliaLowering/test/typedefs_ir.jl b/JuliaLowering/test/typedefs_ir.jl index cdfbffe0e7472..ff86a897f6029 100644 --- a/JuliaLowering/test/typedefs_ir.jl +++ b/JuliaLowering/test/typedefs_ir.jl @@ -735,6 +735,380 @@ end 6 (return %₅) 69 (return core.nothing) +######################################## +# User defined inner constructors and helper functions for structs without type params +struct X + x + f() = new(1) + X() = f() + X(x) = new(x) + X(y,z)::ReallyXIPromise = new(y+z) + """ + Docs for X constructor + """ + X(a,b,c) = new(a) +end +#--------------------- +1 (global TestMod.X) +2 (const TestMod.X) +3 (call core.svec) +4 (call core.svec :x) +5 (call core.svec) +6 (call core._structtype TestMod :X %₃ %₄ %₅ false 1) +7 (= slot₁/X %₆) +8 (call core._setsuper! %₆ core.Any) +9 (isdefined TestMod.X) +10 (gotoifnot %₉ label₂₀) +11 TestMod.X +12 (call core._equiv_typedef %₁₁ %₆) +13 (gotoifnot %₁₂ label₁₇) +14 TestMod.X +15 (= slot₁/X %₁₄) +16 (goto label₁₉) +17 slot₁/X +18 (= TestMod.X %₁₇) +19 (goto label₂₂) +20 slot₁/X +21 (= TestMod.X %₂₀) +22 slot₁/X +23 (call core.svec core.Any) +24 (call core._typebody! %₂₂ %₂₃) +25 (method :f) +26 (call core.Typeof %₂₅) +27 (call core.svec %₂₆) +28 (call core.svec) +29 (call core.svec %₂₇ %₂₈ :($(QuoteNode(:(#= line 3 =#))))) +30 --- method core.nothing %₂₉ + 1 TestMod.X + 2 (new %₁ 1) + 3 (return %₂) +31 TestMod.X +32 (call core.apply_type core.Type %₃₁) +33 (call core.svec %₃₂) +34 (call core.svec) +35 (call core.svec %₃₃ %₃₄ :($(QuoteNode(:(#= line 4 =#))))) +36 --- method core.nothing %₃₅ + 1 TestMod.f + 2 (call %₁) + 3 (return %₂) +37 TestMod.X +38 (call core.apply_type core.Type %₃₇) +39 (call core.svec %₃₈ core.Any) +40 (call core.svec) +41 (call core.svec %₃₉ %₄₀ :($(QuoteNode(:(#= line 5 =#))))) +42 --- method core.nothing %₄₁ + 1 slot₁/#ctor-self# + 2 (new %₁ slot₂/x) + 3 (return %₂) +43 TestMod.X +44 (call core.apply_type core.Type %₄₃) +45 (call core.svec %₄₄ core.Any core.Any) +46 (call core.svec) +47 (call core.svec %₄₅ %₄₆ :($(QuoteNode(:(#= line 6 =#))))) +48 --- method core.nothing %₄₇ + 1 TestMod.ReallyXIPromise + 2 slot₁/#ctor-self# + 3 TestMod.+ + 4 (call %₃ slot₂/y slot₃/z) + 5 (= slot₄/tmp (new %₂ %₄)) + 6 slot₄/tmp + 7 (call core.isa %₆ %₁) + 8 (gotoifnot %₇ label₁₀) + 9 (goto label₁₃) + 10 slot₄/tmp + 11 (call top.convert %₁ %₁₀) + 12 (= slot₄/tmp (call core.typeassert %₁₁ %₁)) + 13 slot₄/tmp + 14 (return %₁₃) +49 TestMod.X +50 (call core.apply_type core.Type %₄₉) +51 (call core.svec %₅₀ core.Any core.Any core.Any) +52 (call core.svec) +53 (call core.svec %₅₁ %₅₂ :($(QuoteNode(:(#= line 10 =#))))) +54 --- method core.nothing %₅₃ + 1 slot₁/#ctor-self# + 2 (new %₁ slot₂/a) + 3 (return %₂) +55 (call JuliaLowering.bind_docs! %₅₀ "Docs for X constructor\n" %₅₃) +56 (return core.nothing) + +######################################## +# User defined inner constructors and helper functions for structs with type params +struct X{S,T} + x + X{A,B}() = new(1) + X{U,V}() where {U,V} = new(1) + f() = new{A,B}(1) +end +#--------------------- +1 (global TestMod.X) +2 (const TestMod.X) +3 (= slot₂/S (call core.TypeVar :S)) +4 (= slot₃/T (call core.TypeVar :T)) +5 slot₂/S +6 slot₃/T +7 (call core.svec %₅ %₆) +8 (call core.svec :x) +9 (call core.svec) +10 (call core._structtype TestMod :X %₇ %₈ %₉ false 1) +11 (= slot₄/X %₁₀) +12 (call core._setsuper! %₁₀ core.Any) +13 (isdefined TestMod.X) +14 (gotoifnot %₁₃ label₃₄) +15 TestMod.X +16 (call core._equiv_typedef %₁₅ %₁₀) +17 (gotoifnot %₁₆ label₃₁) +18 TestMod.X +19 (= slot₄/X %₁₈) +20 TestMod.X +21 (call top.getproperty %₂₀ :body) +22 (call top.getproperty %₂₁ :body) +23 (call top.getproperty %₂₂ :parameters) +24 (call top.indexed_iterate %₂₃ 1) +25 (= slot₂/S (call core.getfield %₂₄ 1)) +26 (= slot₁/iterstate (call core.getfield %₂₄ 2)) +27 slot₁/iterstate +28 (call top.indexed_iterate %₂₃ 2 %₂₇) +29 (= slot₃/T (call core.getfield %₂₈ 1)) +30 (goto label₃₃) +31 slot₄/X +32 (= TestMod.X %₃₁) +33 (goto label₃₆) +34 slot₄/X +35 (= TestMod.X %₃₄) +36 slot₄/X +37 (call core.svec core.Any) +38 (call core._typebody! %₃₆ %₃₇) +39 TestMod.X +40 TestMod.A +41 TestMod.B +42 (call core.apply_type %₃₉ %₄₀ %₄₁) +43 (call core.apply_type core.Type %₄₂) +44 (call core.svec %₄₃) +45 (call core.svec) +46 (call core.svec %₄₄ %₄₅ :($(QuoteNode(:(#= line 3 =#))))) +47 --- method core.nothing %₄₆ + 1 slot₁/#ctor-self# + 2 (new %₁ 1) + 3 (return %₂) +48 (= slot₅/U (call core.TypeVar :U)) +49 (= slot₆/V (call core.TypeVar :V)) +50 TestMod.X +51 slot₅/U +52 slot₆/V +53 (call core.apply_type %₅₀ %₅₁ %₅₂) +54 (call core.apply_type core.Type %₅₃) +55 (call core.svec %₅₄) +56 slot₅/U +57 slot₆/V +58 (call core.svec %₅₆ %₅₇) +59 (call core.svec %₅₅ %₅₈ :($(QuoteNode(:(#= line 4 =#))))) +60 --- method core.nothing %₅₉ + 1 slot₁/#ctor-self# + 2 (new %₁ 1) + 3 (return %₂) +61 (method :f) +62 (call core.Typeof %₆₁) +63 (call core.svec %₆₂) +64 (call core.svec) +65 (call core.svec %₆₃ %₆₄ :($(QuoteNode(:(#= line 5 =#))))) +66 --- method core.nothing %₆₅ + 1 TestMod.X + 2 TestMod.A + 3 TestMod.B + 4 (call core.apply_type %₁ %₂ %₃) + 5 (new %₄ 1) + 6 (return %₅) +67 (return core.nothing) + +######################################## +# new() calls with splats; `Any` fields +struct X + x + y + X(xs) = new(xs...) +end +#--------------------- +1 (global TestMod.X) +2 (const TestMod.X) +3 (call core.svec) +4 (call core.svec :x :y) +5 (call core.svec) +6 (call core._structtype TestMod :X %₃ %₄ %₅ false 2) +7 (= slot₁/X %₆) +8 (call core._setsuper! %₆ core.Any) +9 (isdefined TestMod.X) +10 (gotoifnot %₉ label₂₀) +11 TestMod.X +12 (call core._equiv_typedef %₁₁ %₆) +13 (gotoifnot %₁₂ label₁₇) +14 TestMod.X +15 (= slot₁/X %₁₄) +16 (goto label₁₉) +17 slot₁/X +18 (= TestMod.X %₁₇) +19 (goto label₂₂) +20 slot₁/X +21 (= TestMod.X %₂₀) +22 slot₁/X +23 (call core.svec core.Any core.Any) +24 (call core._typebody! %₂₂ %₂₃) +25 TestMod.X +26 (call core.apply_type core.Type %₂₅) +27 (call core.svec %₂₆ core.Any) +28 (call core.svec) +29 (call core.svec %₂₇ %₂₈ :($(QuoteNode(:(#= line 4 =#))))) +30 --- method core.nothing %₂₉ + 1 slot₁/#ctor-self# + 2 (call core._apply_iterate top.iterate core.tuple slot₂/xs) + 3 (splatnew %₁ %₂) + 4 (return %₃) +31 (return core.nothing) + +######################################## +# new() calls with splats; typed fields +struct X{T} + x::T + y::A + X{T}(xs) where {T} = new(xs...) +end +#--------------------- +1 (global TestMod.X) +2 (const TestMod.X) +3 (= slot₁/T (call core.TypeVar :T)) +4 slot₁/T +5 (call core.svec %₄) +6 (call core.svec :x :y) +7 (call core.svec) +8 (call core._structtype TestMod :X %₅ %₆ %₇ false 2) +9 (= slot₂/X %₈) +10 (call core._setsuper! %₈ core.Any) +11 (isdefined TestMod.X) +12 (gotoifnot %₁₁ label₂₇) +13 TestMod.X +14 (call core._equiv_typedef %₁₃ %₈) +15 (gotoifnot %₁₄ label₂₄) +16 TestMod.X +17 (= slot₂/X %₁₆) +18 TestMod.X +19 (call top.getproperty %₁₈ :body) +20 (call top.getproperty %₁₉ :parameters) +21 (call top.indexed_iterate %₂₀ 1) +22 (= slot₁/T (call core.getfield %₂₁ 1)) +23 (goto label₂₆) +24 slot₂/X +25 (= TestMod.X %₂₄) +26 (goto label₂₉) +27 slot₂/X +28 (= TestMod.X %₂₇) +29 slot₂/X +30 slot₁/T +31 TestMod.A +32 (call core.svec %₃₀ %₃₁) +33 (call core._typebody! %₂₉ %₃₂) +34 (= slot₃/T (call core.TypeVar :T)) +35 TestMod.X +36 slot₃/T +37 (call core.apply_type %₃₅ %₃₆) +38 (call core.apply_type core.Type %₃₇) +39 (call core.svec %₃₈ core.Any) +40 slot₃/T +41 (call core.svec %₄₀) +42 (call core.svec %₃₉ %₄₁ :($(QuoteNode(:(#= line 4 =#))))) +43 --- method core.nothing %₄₂ + 1 (call core._apply_iterate top.iterate core.tuple slot₂/xs) + 2 (call core.nfields %₁) + 3 (call top.ult_int %₂ 2) + 4 (gotoifnot %₃ label₇) + 5 (call top.ArgumentError "too few arguments in `new` (expected 2)") + 6 (call core.throw %₅) + 7 (call top.ult_int 2 %₂) + 8 (gotoifnot %₇ label₁₁) + 9 (call top.ArgumentError "too many arguments in `new` (expected 2)") + 10 (call core.throw %₉) + 11 slot₁/#ctor-self# + 12 (call core.fieldtype %₁₁ 1) + 13 (= slot₃/tmp (call core.getfield %₁ 1)) + 14 slot₃/tmp + 15 (call core.isa %₁₄ %₁₂) + 16 (gotoifnot %₁₅ label₁₈) + 17 (goto label₂₀) + 18 slot₃/tmp + 19 (= slot₃/tmp (call top.convert %₁₂ %₁₈)) + 20 slot₃/tmp + 21 (call core.fieldtype %₁₁ 2) + 22 (= slot₄/tmp (call core.getfield %₁ 2)) + 23 slot₄/tmp + 24 (call core.isa %₂₃ %₂₁) + 25 (gotoifnot %₂₄ label₂₇) + 26 (goto label₂₉) + 27 slot₄/tmp + 28 (= slot₄/tmp (call top.convert %₂₁ %₂₇)) + 29 slot₄/tmp + 30 (new %₁₁ %₂₀ %₂₉) + 31 (return %₃₀) +44 (return core.nothing) + +######################################## +# Error: new doesn't accept keywords +struct X + X() = new(a=1) +end +#--------------------- +LoweringError: +struct X + X() = new(a=1) +# └─┘ ── `new` does not accept keyword arguments +end + +######################################## +# Error: new doesn't accept keywords (params block) +struct X + X() = new(; a=1) +end +#--------------------- +LoweringError: +struct X + X() = new(; a=1) +# └───┘ ── `new` does not accept keyword arguments +end + +######################################## +# Error: User defined inner constructors without enough type params +struct X{S,T} + X() = new{A}() +end +#--------------------- +LoweringError: +struct X{S,T} + X() = new{A}() +# └────┘ ── too few type parameters specified in `new{...}` +end + +######################################## +# Error: User defined inner constructors without enough type params +struct X{S,T} + X{A}() = new() +end +#--------------------- +LoweringError: +struct X{S,T} + X{A}() = new() +# └─┘ ── too few type parameters specified in `new` +end + +######################################## +# Error: User defined inner constructors with too many type params +struct X{S,T} + X() = new{A,B,C}() +end +#--------------------- +LoweringError: +struct X{S,T} + X() = new{A,B,C}() +# └────────┘ ── too many type parameters specified in `new{...}` +end + ######################################## # Error: Struct not at top level function f() diff --git a/JuliaLowering/test/utils.jl b/JuliaLowering/test/utils.jl index 7367bf4613ff5..157b1627ca010 100644 --- a/JuliaLowering/test/utils.jl +++ b/JuliaLowering/test/utils.jl @@ -4,6 +4,12 @@ using JuliaLowering using JuliaSyntax import FileWatching +# The following are for docstrings testing. We need to load the REPL module +# here for `Base.@doc` lookup to work at all. Yes this does seem really, +# really, REALLY messed up. +using Markdown +import REPL + using JuliaSyntax: sourcetext using JuliaLowering: @@ -227,3 +233,22 @@ function watch_ir_tests(dir, delay=0.5) end end end + +# See Julia Base tests in "test/docs.jl" +function docstrings_equal(d1, d2; debug=true) + io1 = IOBuffer() + io2 = IOBuffer() + show(io1, MIME"text/markdown"(), d1) + show(io2, MIME"text/markdown"(), d2) + s1 = String(take!(io1)) + s2 = String(take!(io2)) + if debug && s1 != s2 + print(s1) + println("--------------------------------------------------------------------------------") + print(s2) + println("================================================================================") + end + return s1 == s2 +end +docstrings_equal(d1::Docs.DocStr, d2) = docstrings_equal(Docs.parsedoc(d1), d2) + From cca242b07cf21b6e9b612355d71f856e9a8615ae Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Tue, 19 Nov 2024 15:08:34 +1000 Subject: [PATCH 0893/1109] Various small accumulated fixes/cleanups * Remove outterref - this has been removed upstream * Make expand_unionall_def its own function - this will be required shortly to match some changes upstream. * JuliaSyntax has removed the `convert` overload for `Kind` in the latest dev version --- JuliaLowering/README.md | 2 +- JuliaLowering/src/ast.jl | 2 +- JuliaLowering/src/desugaring.jl | 54 +++++++++++++++++++++-------- JuliaLowering/src/kinds.jl | 1 - JuliaLowering/src/linear_ir.jl | 18 ++++------ JuliaLowering/src/scope_analysis.jl | 2 +- JuliaLowering/test/ccall_demo.jl | 2 +- JuliaLowering/test/demo.jl | 2 +- JuliaLowering/test/demo_include.jl | 2 +- 9 files changed, 51 insertions(+), 34 deletions(-) diff --git a/JuliaLowering/README.md b/JuliaLowering/README.md index 4da9079e86428..3b8697f8fa590 100644 --- a/JuliaLowering/README.md +++ b/JuliaLowering/README.md @@ -28,7 +28,7 @@ This work is intended to Note this is a very early work in progress; most things probably don't work! -1. Use a recent dev version of Julia (need at least version 1.12.0-DEV.512) +1. You need a 1.12-DEV build of Julia: At least 1.12.0-DEV.512; commit `263928f9ad4` currently works. Note that JuliaLowering relies on Julia internals and may be broken on the latest Julia dev version from time to time. (In fact it is currently broken on the latest `1.12-DEV`.) 2. Check out the main branch of [JuliaSyntax](https://github.com/JuliaLang/JuliaSyntax.jl) 3. Get the latest version of [JuliaSyntaxFormatter](https://github.com/c42f/JuliaSyntaxFormatter.jl) 4. Run the demo `include("test/demo.jl")` diff --git a/JuliaLowering/src/ast.jl b/JuliaLowering/src/ast.jl index 17fc6bf77fdf9..28cd0c18e19e7 100644 --- a/JuliaLowering/src/ast.jl +++ b/JuliaLowering/src/ast.jl @@ -534,7 +534,7 @@ end # Predicates and accessors working on expression trees function is_quoted(ex) - kind(ex) in KSet"Symbol quote top core globalref outerref break inert + kind(ex) in KSet"Symbol quote top core globalref break inert meta inbounds inline noinline loopinfo" end diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index eb47d985e978e..10039b53f9621 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -518,6 +518,19 @@ function expand_setindex(ctx, ex) ] end +# Expand UnionAll definitions, eg `X{T} = Y{T,T}` +function expand_unionall_def(ctx, srcref, lhs, rhs) + if numchildren(lhs) <= 1 + throw(LoweringError(lhs, "empty type parameter list in type alias")) + end + name = lhs[1] + @ast ctx srcref [K"block" + [K"const_if_global" name] + unionall_type = expand_forms_2(ctx, [K"where" rhs lhs[2:end]...]) + expand_forms_2([K"=" name unionall_type]) + ] +end + # Expand general assignment syntax, including # * UnionAll definitions # * Chained assignments @@ -531,19 +544,7 @@ function expand_assignment(ctx, ex) rhs = ex[2] kl = kind(lhs) if kl == K"curly" - # Expand UnionAll definitions - if numchildren(lhs) <= 1 - throw(LoweringError(lhs, "empty type parameter list in type alias")) - end - name = lhs[1] - unionall_def = @ast ctx ex [K"=" - name - [K"where" ex[2] lhs[2:end]...] - ] - @ast ctx ex [K"block" - [K"const_if_global" name] - expand_forms_2(ctx, unionall_def) - ] + expand_unionall_def(ctx, ex, lhs, rhs) elseif kind(rhs) == K"=" # Expand chains of assignments # a = b = c ==> b=c; a=c @@ -1206,7 +1207,7 @@ function expand_decls(ctx, ex) makenode(ctx, ex, K"block", stmts) end -function analyze_function_arg(full_ex) +function match_function_arg(full_ex) name = nothing type = nothing default = nothing @@ -1318,8 +1319,10 @@ function expand_function_def(ctx, ex, docs, rewrite_call=identity, rewrite_body= arg_names = SyntaxList(ctx) arg_types = SyntaxList(ctx) + first_default = 0 + arg_defaults = SyntaxList(ctx) for (i,arg) in enumerate(args) - info = analyze_function_arg(arg) + info = match_function_arg(arg) aname = !isnothing(info.name) ? info.name : @ast ctx arg "_"::K"Placeholder" push!(arg_names, aname) atype = !isnothing(info.type) ? info.type : Any_type(ctx, arg) @@ -1330,6 +1333,27 @@ function expand_function_def(ctx, ex, docs, rewrite_call=identity, rewrite_body= end atype = @ast ctx arg [K"curly" "Vararg"::K"core" atype] end + if isnothing(info.default) + if !isempty(arg_defaults) && !info.is_slurp + # TODO: Referring to multiple pieces of syntax in one error message is necessary. + # TODO: Poision ASTs with error nodes and continue rather than immediately throwing. + # + # We should make something like the following kind of thing work! + # arg_defaults[1] = @ast_error ctx arg_defaults[1] """ + # Positional arguments with defaults must occur at the end. + # + # We found a [non-optional position argument]($arg) *after* + # one with a [default value]($(first(arg_defaults))) + # """ + # + throw(LoweringError(args[first_default], "optional positional arguments must occur at end")) + end + else + if isempty(arg_defaults) + first_default = i + end + push!(arg_defaults, info.default) + end push!(arg_types, atype) end diff --git a/JuliaLowering/src/kinds.jl b/JuliaLowering/src/kinds.jl index 9b49b2c79ac0a..cfd5530ea4373 100644 --- a/JuliaLowering/src/kinds.jl +++ b/JuliaLowering/src/kinds.jl @@ -62,7 +62,6 @@ function _register_kinds() "toplevel_butfirst" "const_if_global" "moved_local" - "outerref" "label" "trycatchelse" "tryfinally" diff --git a/JuliaLowering/src/linear_ir.jl b/JuliaLowering/src/linear_ir.jl index 39c2159ac7b9a..1bee7f16aaa5f 100644 --- a/JuliaLowering/src/linear_ir.jl +++ b/JuliaLowering/src/linear_ir.jl @@ -8,8 +8,6 @@ function is_simple_atom(ctx, ex) (k == K"core" && ex.name_val == "nothing") end -# This assumes that resolve-scopes has run, so outerref is equivalent to a -# global in the current scope. function is_valid_ir_argument(ctx, ex) k = kind(ex) if is_simple_atom(ctx, ex) || k == K"inert" || k == K"top" || k == K"core" @@ -26,8 +24,6 @@ function is_valid_ir_argument(ctx, ex) # broken when precompiling a module `B` in the presence of a badly # behaved module `A`, which inconsistently defines globals during # `A.__init__()`??) - # - # TODO (k == K"outerref" && nothrow_julia_global(ex[1])) is_defined_nothrow_global(binfo.mod, Symbol(binfo.name)) else false @@ -126,7 +122,7 @@ end function is_simple_arg(ctx, ex) k = kind(ex) return is_simple_atom(ctx, ex) || k == K"BindingId" || k == K"quote" || k == K"inert" || - k == K"top" || k == K"core" || k == K"globalref" || k == K"outerref" + k == K"top" || k == K"core" || k == K"globalref" end function is_single_assign_var(ctx::LinearIRContext, ex) @@ -150,7 +146,7 @@ function is_valid_ir_rvalue(ctx, lhs, rhs) return is_ssa(ctx, lhs) || is_valid_ir_argument(ctx, rhs) || (kind(lhs) == K"BindingId" && - # FIXME: add: splatnew isdefined invoke cfunction gc_preserve_begin copyast new_opaque_closure globalref outerref + # FIXME: add: splatnew isdefined invoke cfunction gc_preserve_begin copyast new_opaque_closure globalref kind(rhs) in KSet"new call foreigncall") end @@ -465,7 +461,7 @@ function compile_try(ctx::LinearIRContext, ex, needs_value, in_tail_pos) finally_handler = FinallyHandler(new_mutable_var(ctx, finally_block, "finally_tag"), JumpTarget(end_label, ctx)) push!(ctx.finally_handlers, finally_handler) - emit(ctx, @ast ctx finally_block [K"=" finally_handler.tagvar -1::K"Integer"]) + emit(ctx, @ast ctx finally_block [K"=" finally_handler.tagvar (-1)::K"Integer"]) end push!(ctx.handler_token_stack, handler_token) @@ -569,7 +565,7 @@ function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) if k == K"BindingId" || is_literal(k) || k == K"quote" || k == K"inert" || k == K"top" || k == K"core" || k == K"Value" || k == K"Symbol" || k == K"Placeholder" - # TODO: other kinds: copyast $ globalref outerref thismodule cdecl stdcall fastcall thiscall llvmcall + # TODO: other kinds: copyast $ globalref thismodule cdecl stdcall fastcall thiscall llvmcall if needs_value && k == K"Placeholder" # TODO: ensure outterref, globalref work here throw(LoweringError(ex, "all-underscore identifiers are write-only and their values cannot be used in expressions")) @@ -789,13 +785,11 @@ function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) end emit(ctx, ex) nothing - elseif k == K"isdefined" + elseif k == K"isdefined" # TODO || k == K"throw_undef_if_not" (See upstream #53875) if in_tail_pos emit_return(ctx, ex) elseif needs_value ex - else - emit(ctx, ex) end else throw(LoweringError(ex, "Invalid syntax; $(repr(k))")) @@ -860,7 +854,7 @@ function _renumber(ctx, ssa_rewrites, slot_rewrites, label_table, ex) makeleaf(ctx, ex, K"globalref", binfo.name, mod=binfo.mod) end end - elseif k == K"outerref" || k == K"meta" + elseif k == K"meta" TODO(ex, "_renumber $k") elseif is_literal(k) || is_quoted(k) ex diff --git a/JuliaLowering/src/scope_analysis.jl b/JuliaLowering/src/scope_analysis.jl index c1c6a43b70329..87d3bdcfe9dc2 100644 --- a/JuliaLowering/src/scope_analysis.jl +++ b/JuliaLowering/src/scope_analysis.jl @@ -63,7 +63,7 @@ function _find_scope_vars!(assignments, locals, globals, used_names, used_bindin # elseif k == K"method" TODO static parameters elseif k == K"=" v = decl_var(ex[1]) - if !(kind(v) in KSet"BindingId globalref outerref Placeholder") + if !(kind(v) in KSet"BindingId globalref Placeholder") get!(assignments, NameKey(v), v) end _find_scope_vars!(assignments, locals, globals, used_names, used_bindings, alias_bindings, ex[2]) diff --git a/JuliaLowering/test/ccall_demo.jl b/JuliaLowering/test/ccall_demo.jl index 62c859a7e4d56..266b161178587 100644 --- a/JuliaLowering/test/ccall_demo.jl +++ b/JuliaLowering/test/ccall_demo.jl @@ -5,7 +5,7 @@ using JuliaLowering: is_identifier_like, numchildren, children, MacroExpansionEr # Hacky utils # macro K_str(str) -# convert(JuliaSyntax.Kind, str[1].value) +# JuliaSyntax.Kind(str[1].value) # end # # # Needed because we can't lower kwarg calls yet ehehe :-/ diff --git a/JuliaLowering/test/demo.jl b/JuliaLowering/test/demo.jl index 921f1ba756b49..e92c5e690208a 100644 --- a/JuliaLowering/test/demo.jl +++ b/JuliaLowering/test/demo.jl @@ -47,7 +47,7 @@ baremodule M using JuliaSyntax macro K_str(str) - convert(JuliaSyntax.Kind, str) + JuliaSyntax.Kind(str) end function var"@inert"(__context__::JuliaLowering.MacroContext, ex) diff --git a/JuliaLowering/test/demo_include.jl b/JuliaLowering/test/demo_include.jl index 2f720060ca0b6..b28789ba61335 100644 --- a/JuliaLowering/test/demo_include.jl +++ b/JuliaLowering/test/demo_include.jl @@ -71,7 +71,7 @@ macro outer() end macro K_str(str) - convert(JuliaSyntax.Kind, str[1].value) + JuliaSyntax.Kind(str[1].value) end # Recursive macro call From 8909a9887ea4e48effc6940adcd2e628899e95e1 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Fri, 22 Nov 2024 14:57:46 +1000 Subject: [PATCH 0894/1109] Desugaring of positional arguments with defaults --- JuliaLowering/src/desugaring.jl | 250 +++++++++++++++++------ JuliaLowering/test/demo.jl | 57 ++++++ JuliaLowering/test/functions.jl | 56 ++++++ JuliaLowering/test/functions_ir.jl | 306 +++++++++++++++++++++++++++++ 4 files changed, 612 insertions(+), 57 deletions(-) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 10039b53f9621..6e8456e4c8038 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -25,18 +25,36 @@ end # Return true when `x` and `y` are "the same identifier", but also works with # bindings (and hence ssa vars). See also `is_identifier_like()` -function is_same_identifier_like(x, y) - return (kind(x) == K"Identifier" && kind(y) == K"Identifier" && NameKey(x) == NameKey(y)) || - (kind(x) == K"BindingId" && kind(y) == K"BindingId" && x.var_id == y.var_id) +function is_same_identifier_like(ex::SyntaxTree, y::SyntaxTree) + return (kind(ex) == K"Identifier" && kind(y) == K"Identifier" && NameKey(ex) == NameKey(y)) || + (kind(ex) == K"BindingId" && kind(y) == K"BindingId" && ex.var_id == y.var_id) end -function is_same_identifier_like(x, name::AbstractString) - return kind(x) == K"Identifier" && x.name_val == name +function is_same_identifier_like(ex::SyntaxTree, name::AbstractString) + return kind(ex) == K"Identifier" && ex.name_val == name end -function contains_identifier(ex, idents...) - return any(is_same_identifier_like(ex, id) for id in idents) || - (!is_leaf(ex) && any(contains_identifier(e, idents...) for e in children(ex))) +function contains_identifier(ex::SyntaxTree, idents::AbstractVector{<:SyntaxTree}) + contains_unquoted(ex) do e + any(is_same_identifier_like(e, id) for id in idents) + end +end + +function contains_identifier(ex::SyntaxTree, idents...) + contains_unquoted(ex) do e + any(is_same_identifier_like(e, id) for id in idents) + end +end + +# Return true if `f(e)` is true for any unquoted child of `ex`, recursively. +function contains_unquoted(f::Function, ex::SyntaxTree) + if f(ex) + return true + elseif !is_leaf(ex) && !(kind(ex) in KSet"quote inert meta") + return any(contains_unquoted(f, e) for e in children(ex)) + else + return false + end end # Identify some expressions that are safe to repeat @@ -526,7 +544,7 @@ function expand_unionall_def(ctx, srcref, lhs, rhs) name = lhs[1] @ast ctx srcref [K"block" [K"const_if_global" name] - unionall_type = expand_forms_2(ctx, [K"where" rhs lhs[2:end]...]) + unionall_type := expand_forms_2(ctx, [K"where" rhs lhs[2:end]...]) expand_forms_2([K"=" name unionall_type]) ] end @@ -1238,7 +1256,9 @@ function match_function_arg(full_ex) is_nospecialize = true ex = ex[2] elseif k == K"=" - @chk full_ex isnothing(default) && !is_slurp + if !isnothing(default) + throw(full_ex, "multiple defaults provided with `=` in function argument") + end default = ex[2] ex = ex[1] else @@ -1252,6 +1272,11 @@ function match_function_arg(full_ex) is_nospecialize=is_nospecialize) end +# Expand `where` clause(s) of a function into (typevar_names, typevar_stmts) where +# - `typevar_names` are the names of the type's type parameters +# - `typevar_stmts` are a list of statements to define a `TypeVar` for each parameter +# name in `typevar_names`, to be emitted prior to uses of `typevar_names`. +# There is exactly one statement from each typevar. function _split_wheres!(ctx, typevar_names, typevar_stmts, ex) if kind(ex) == K"where" && numchildren(ex) == 2 vars_kind = kind(ex[2]) @@ -1259,13 +1284,7 @@ function _split_wheres!(ctx, typevar_names, typevar_stmts, ex) append!(typevar_names, children(ex[2])) else params = vars_kind == K"braces" ? ex[2][1:end] : ex[2:2] - for param in params - bounds = analyze_typevar(ctx, param) - n = bounds[1] - push!(typevar_names, n) - push!(typevar_stmts, @ast ctx param [K"local" n]) - push!(typevar_stmts, @ast ctx param [K"=" n bounds_to_TypeVar(ctx, param, bounds)]) - end + expand_typevars!(ctx, typevar_names, typevar_stmts, params) end _split_wheres!(ctx, typevar_names, typevar_stmts, ex[1]) else @@ -1273,6 +1292,120 @@ function _split_wheres!(ctx, typevar_names, typevar_stmts, ex) end end +function _method_def_expr(ctx, srcref, callex, func_self, method_table, + docs, typevar_names, arg_names, arg_types, ret_var, body) + # metadata contains svec(types, sparms, location) + @ast ctx srcref [K"block" + method_metadata := [K"call"(callex) + "svec" ::K"core" + [K"call" + "svec" ::K"core" + arg_types... + ] + [K"call" + "svec" ::K"core" + typevar_names... + ] + QuoteNode(source_location(LineNumberNode, callex))::K"Value" + ] + [K"method" + method_table + method_metadata + [K"lambda"(body, is_toplevel_thunk=false) + [K"block" arg_names...] + [K"block" typevar_names...] + body + ret_var # might be `nothing` and hence removed + ] + ] + if !isnothing(docs) + [K"call"(docs) + bind_docs!::K"Value" + func_self + docs[1] + method_metadata + ] + end + ] +end + +function trim_used_typevars(ctx, arg_types, typevar_names, typevar_stmts) + n_typevars = length(typevar_names) + @assert n_typevars == length(typevar_stmts) + # Filter typevar names down to those which are directly used in the arg list + typevar_used = [contains_identifier(tn, arg_types) for tn in typevar_names] + # _Or_ used transitively via other typevars. The following code + # computes this by incrementally coloring the graph of dependencies + # between type vars. + found_used = true + while found_used + found_used = false + for (i,tn) in enumerate(typevar_names) + if typevar_used[i] + continue + end + for j = i+1:n_typevars + if typevar_used[j] && contains_identifier(typevar_stmts[j], tn) + found_used = true + typevar_used[i] = true + break + end + end + end + end + trimmed_typevar_names = SyntaxList(ctx) + for (used,tn) in zip(typevar_used, typevar_names) + if used + push!(trimmed_typevar_names, tn) + end + end + return trimmed_typevar_names +end + +# Generate a method for every number of allowed optional arguments +# For example for `f(x, y=1, z=2)` we generate two additional methods +# f(x) = f(x, 1, 2) +# f(x, y) = f(x, y, 2) +function _optional_positional_defs!(ctx, method_stmts, srcref, callex, func_self, + method_table, typevar_names, typevar_stmts, + arg_names, arg_types, first_default, arg_defaults, ret_var) + # Replace placeholder arguments with variables - we need to pass them to + # the inner method for dispatch even when unused in the inner method body + def_arg_names = map(arg_names) do arg + kind(arg) == K"Placeholder" ? + new_mutable_var(ctx, arg, arg.name_val; kind=:argument) : + arg + end + for def_idx = 1:length(arg_defaults) + first_omitted = first_default + def_idx - 1 + trimmed_arg_names = def_arg_names[1:first_omitted-1] + # Call the full method directly if no arguments are reused in + # subsequent defaults. Otherwise conservatively call the function with + # only one additional default argument supplied and let the chain of + # function calls eventually lead to the full method. + any_args_in_trailing_defaults = + any(arg_defaults[def_idx+1:end]) do defaultval + contains_identifier(defaultval, def_arg_names[first_omitted:end]) + end + last_used_default = any_args_in_trailing_defaults ? + def_idx : lastindex(arg_defaults) + body = @ast ctx callex [K"block" + [K"call" + trimmed_arg_names... + arg_defaults[def_idx:last_used_default]... + ] + ] + trimmed_arg_types = arg_types[1:first_omitted-1] + trimmed_typevar_names = trim_used_typevars(ctx, trimmed_arg_types, + typevar_names, typevar_stmts) + # TODO: Ensure we preserve @nospecialize metadata in args + push!(method_stmts, + _method_def_expr(ctx, srcref, callex, func_self, method_table, nothing, + trimmed_typevar_names, trimmed_arg_names, trimmed_arg_types, + ret_var, body)) + end +end + function expand_function_def(ctx, ex, docs, rewrite_call=identity, rewrite_body=identity) @chk numchildren(ex) in (1,2) name = ex[1] @@ -1336,7 +1469,7 @@ function expand_function_def(ctx, ex, docs, rewrite_call=identity, rewrite_body= if isnothing(info.default) if !isempty(arg_defaults) && !info.is_slurp # TODO: Referring to multiple pieces of syntax in one error message is necessary. - # TODO: Poision ASTs with error nodes and continue rather than immediately throwing. + # TODO: Poison ASTs with error nodes and continue rather than immediately throwing. # # We should make something like the following kind of thing work! # arg_defaults[1] = @ast_error ctx arg_defaults[1] """ @@ -1354,6 +1487,9 @@ function expand_function_def(ctx, ex, docs, rewrite_call=identity, rewrite_body= end push!(arg_defaults, info.default) end + # TODO: Ideally, ensure side effects of evaluating arg_types only + # happen once - we should create an ssavar if there's any following + # defaults. (flisp lowering doesn't ensure this either) push!(arg_types, atype) end @@ -1387,6 +1523,7 @@ function expand_function_def(ctx, ex, docs, rewrite_call=identity, rewrite_body= func_self ] end + # Add self argument pushfirst!(arg_names, farg_name) pushfirst!(arg_types, farg_type) @@ -1402,42 +1539,33 @@ function expand_function_def(ctx, ex, docs, rewrite_call=identity, rewrite_body= ret_var = nothing end - method_table = nothing_(ctx, name) # TODO: method overlays + method_table_val = nothing # TODO: method overlays + method_table = isnothing(method_table_val) ? + @ast(ctx, callex, "nothing"::K"core") : + ssavar(ctx, ex, "method_table") + method_stmts = SyntaxList(ctx) + + if !isempty(arg_defaults) + # For self argument added above + first_default += 1 + _optional_positional_defs!(ctx, method_stmts, ex, callex, func_self, + method_table, typevar_names, typevar_stmts, + arg_names, arg_types, first_default, arg_defaults, ret_var) + end + + # The method with all non-default arguments + push!(method_stmts, + _method_def_expr(ctx, ex, callex, func_self, method_table, docs, + typevar_names, arg_names, arg_types, ret_var, body)) + @ast ctx ex [K"scope_block"(scope_type=:hard) [K"block" typevar_stmts... - [K"=" func_self func_self_val] - # metadata contains svec(types, sparms, location) - method_metadata := [K"call"(callex) - "svec" ::K"core" - [K"call" - "svec" ::K"core" - arg_types... - ] - [K"call" - "svec" ::K"core" - typevar_names... - ] - QuoteNode(source_location(LineNumberNode, callex))::K"Value" - ] - [K"method" - method_table - method_metadata - [K"lambda"(body, is_toplevel_thunk=false) - [K"block" arg_names...] - [K"block" typevar_names...] - body - ret_var # might be `nothing` and hence removed - ] - ] - if !isnothing(docs) - [K"call"(docs) - bind_docs!::K"Value" - func_self - docs[1] - method_metadata - ] + if !isnothing(method_table_val) + [K"=" method_table method_table_val] end + [K"=" func_self func_self_val] + method_stmts... [K"unnecessary" func_self] ] ] @@ -1569,19 +1697,27 @@ function analyze_type_sig(ctx, ex) end # Expand type_params into (typevar_names, typevar_stmts) where -# - `typevar_names` are the names of the types's type parameters +# - `typevar_names` are the names of the type's type parameters # - `typevar_stmts` are a list of statements to define a `TypeVar` for each parameter -# name in `typevar_names`, to be emitted prior to uses of `typevar_names` -function expand_typevars(ctx, type_params) - typevar_names = SyntaxList(ctx) - typevar_stmts = SyntaxList(ctx) +# name in `typevar_names`, to be emitted prior to uses of `typevar_names`. +# There is exactly one statement from each typevar. +function expand_typevars!(ctx, typevar_names, typevar_stmts, type_params) for param in type_params bounds = analyze_typevar(ctx, param) n = bounds[1] push!(typevar_names, n) - push!(typevar_stmts, @ast ctx param [K"local" n]) - push!(typevar_stmts, @ast ctx param [K"=" n bounds_to_TypeVar(ctx, param, bounds)]) + push!(typevar_stmts, @ast ctx param [K"block" + [K"local" n] + [K"=" n bounds_to_TypeVar(ctx, param, bounds)] + ]) end + return nothing +end + +function expand_typevars(ctx, type_params) + typevar_names = SyntaxList(ctx) + typevar_stmts = SyntaxList(ctx) + expand_typevars!(ctx, typevar_names, typevar_stmts, type_params) return (typevar_names, typevar_stmts) end diff --git a/JuliaLowering/test/demo.jl b/JuliaLowering/test/demo.jl index e92c5e690208a..4eb3625342afc 100644 --- a/JuliaLowering/test/demo.jl +++ b/JuliaLowering/test/demo.jl @@ -570,6 +570,63 @@ struct S9{T} end """ +# Default positional args with missing arg names +src = """ +function f(::Int, y=1, z=2) + (y, z) +end +""" + +# Default positional args with placeholders +src = """ +function f(_::Int, x=1) + x +end +""" + +# Positional args and type parameters with transitive dependencies +# Bug in flisp lowering - see https://github.com/JuliaLang/julia/issues/49275 +src = """ +function f(x, y::S=[1], z) where {T, S<:AbstractVector{T}} + (x, y, z, T) +end +""" + +# Default positional args before trailing slurp are allowed +src = """ +function f(x=1, ys...) + ys +end +""" + +# Default positional args after a slurp is an error +src = """ +function f(x=1, ys..., z=2) + ys +end +""" + +# Positional arg with slurp and default +src = """ +function f(x=1, ys...="hi") + ys +end +""" + +# Positional arg with slurp and splat +src = """ +function f(x=1, ys...=(1,2)...) + ys +end +""" + +# TODO: fix this - it's interpreted in a bizarre way as a kw call. +# src = """ +# function f(x=y=1) +# x +# end +# """ + ex = parsestmt(SyntaxTree, src, filename="foo.jl") ex = ensure_attributes(ex, var_id=Int) #ex = softscope_test(ex) diff --git a/JuliaLowering/test/functions.jl b/JuliaLowering/test/functions.jl index 7f392520ceab5..2acc83674359a 100644 --- a/JuliaLowering/test/functions.jl +++ b/JuliaLowering/test/functions.jl @@ -131,4 +131,60 @@ begin end """) +# Default positional arguments +@test JuliaLowering.include_string(test_mod, """ +begin + function f_def_simple(x=1, y=2, z=x) + (x,y,z) + end + + (f_def_simple(), f_def_simple(10), f_def_simple(10,20), f_def_simple(10,20,30)) +end +""") == ((1,2,1), (10,2,10), (10,20,10), (10,20,30)) + +@test JuliaLowering.include_string(test_mod, """ +begin + function f_def_placeholders(::T=1, _::S=1.0) where {T,S} + (T,S) + end + + (f_def_placeholders(), f_def_placeholders(1.0), f_def_placeholders(1.0, 1)) +end +""") == ((Int,Float64), (Float64,Float64), (Float64,Int)) + +@test JuliaLowering.include_string(test_mod, """ +begin + function f_def_typevars(x, y::S=[1], z::U=2) where {T, S<:AbstractVector{T}, U} + (x, y, z, T, S, U) + end + + (f_def_typevars(1), f_def_typevars(1,[1.0]), f_def_typevars(1,[1.0],-1.0)) +end +""") == ((1, [1], 2, Int, Vector{Int}, Int), + (1, [1.0], 2, Float64, Vector{Float64}, Int), + (1, [1.0], -1.0, Float64, Vector{Float64}, Float64)) + +@test JuliaLowering.include_string(test_mod, """ +begin + function f_def_slurp(x=1, ys...) + (x, ys) + end + + (f_def_slurp(), f_def_slurp(2), f_def_slurp(2,3)) +end +""") == ((1, ()), + (2, ()), + (2, (3,))) + +@test JuliaLowering.include_string(test_mod, """ +begin + function f_def_slurp_splat(ys...=(1,2)...) + ys + end + + (f_def_slurp_splat(), f_def_slurp_splat(10,20)) +end +""") == ((1,2), + (10,20)) + end diff --git a/JuliaLowering/test/functions_ir.jl b/JuliaLowering/test/functions_ir.jl index 204ef2fa95c75..16bc02742461c 100644 --- a/JuliaLowering/test/functions_ir.jl +++ b/JuliaLowering/test/functions_ir.jl @@ -329,6 +329,312 @@ x^42.0 3 (call %₁ %₂ 42.0) 4 (return %₃) +######################################## +# Simple positional args with defaults +function f(x::T, y::S=1, z::U=2) + (x,y) +end +#--------------------- +1 (method :f) +2 (call core.Typeof %₁) +3 TestMod.T +4 (call core.svec %₂ %₃) +5 (call core.svec) +6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 1 =#))))) +7 --- method core.nothing %₆ + 1 (call slot₁/#self# slot₂/x 1 2) + 2 (return %₁) +8 (call core.Typeof %₁) +9 TestMod.T +10 TestMod.S +11 (call core.svec %₈ %₉ %₁₀) +12 (call core.svec) +13 (call core.svec %₁₁ %₁₂ :($(QuoteNode(:(#= line 1 =#))))) +14 --- method core.nothing %₁₃ + 1 (call slot₁/#self# slot₂/x slot₃/y 2) + 2 (return %₁) +15 (call core.Typeof %₁) +16 TestMod.T +17 TestMod.S +18 TestMod.U +19 (call core.svec %₁₅ %₁₆ %₁₇ %₁₈) +20 (call core.svec) +21 (call core.svec %₁₉ %₂₀ :($(QuoteNode(:(#= line 1 =#))))) +22 --- method core.nothing %₂₁ + 1 (call core.tuple slot₂/x slot₃/y) + 2 (return %₁) +23 (return %₁) + +######################################## +# Default positional args which depend on other args +function f(x=1, y=x) + (x,y) +end +#--------------------- +1 (method :f) +2 (call core.Typeof %₁) +3 (call core.svec %₂) +4 (call core.svec) +5 (call core.svec %₃ %₄ :($(QuoteNode(:(#= line 1 =#))))) +6 --- method core.nothing %₅ + 1 (call slot₁/#self# 1) + 2 (return %₁) +7 (call core.Typeof %₁) +8 (call core.svec %₇ core.Any) +9 (call core.svec) +10 (call core.svec %₈ %₉ :($(QuoteNode(:(#= line 1 =#))))) +11 --- method core.nothing %₁₀ + 1 (call slot₁/#self# slot₂/x slot₂/x) + 2 (return %₁) +12 (call core.Typeof %₁) +13 (call core.svec %₁₂ core.Any core.Any) +14 (call core.svec) +15 (call core.svec %₁₃ %₁₄ :($(QuoteNode(:(#= line 1 =#))))) +16 --- method core.nothing %₁₅ + 1 (call core.tuple slot₂/x slot₃/y) + 2 (return %₁) +17 (return %₁) + +######################################## +# Default positional args with missing arg names (implicit placeholders) +function f(::Int, y=1, z=2) + (y, z) +end +#--------------------- +1 (method :f) +2 (call core.Typeof %₁) +3 TestMod.Int +4 (call core.svec %₂ %₃) +5 (call core.svec) +6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 1 =#))))) +7 --- method core.nothing %₆ + 1 (call slot₁/#self# slot₂/_ 1 2) + 2 (return %₁) +8 (call core.Typeof %₁) +9 TestMod.Int +10 (call core.svec %₈ %₉ core.Any) +11 (call core.svec) +12 (call core.svec %₁₀ %₁₁ :($(QuoteNode(:(#= line 1 =#))))) +13 --- method core.nothing %₁₂ + 1 (call slot₁/#self# slot₂/_ slot₃/y 2) + 2 (return %₁) +14 (call core.Typeof %₁) +15 TestMod.Int +16 (call core.svec %₁₄ %₁₅ core.Any core.Any) +17 (call core.svec) +18 (call core.svec %₁₆ %₁₇ :($(QuoteNode(:(#= line 1 =#))))) +19 --- method core.nothing %₁₈ + 1 (call core.tuple slot₃/y slot₄/z) + 2 (return %₁) +20 (return %₁) + +######################################## +# Default positional args with placeholders +function f(_::Int, x=1) + x +end +#--------------------- +1 (method :f) +2 (call core.Typeof %₁) +3 TestMod.Int +4 (call core.svec %₂ %₃) +5 (call core.svec) +6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 1 =#))))) +7 --- method core.nothing %₆ + 1 (call slot₁/#self# slot₂/_ 1) + 2 (return %₁) +8 (call core.Typeof %₁) +9 TestMod.Int +10 (call core.svec %₈ %₉ core.Any) +11 (call core.svec) +12 (call core.svec %₁₀ %₁₁ :($(QuoteNode(:(#= line 1 =#))))) +13 --- method core.nothing %₁₂ + 1 slot₃/x + 2 (return %₁) +14 (return %₁) + +######################################## +# Positional args with defaults and `where` clauses +function f(x::T, y::S=1, z::U=2) where {T,S<:T,U<:S} + (x,y,z) +end +#--------------------- +1 (= slot₂/T (call core.TypeVar :T)) +2 slot₂/T +3 (= slot₁/S (call core.TypeVar :S %₂)) +4 slot₁/S +5 (= slot₃/U (call core.TypeVar :U %₄)) +6 (method :f) +7 (call core.Typeof %₆) +8 slot₂/T +9 (call core.svec %₇ %₈) +10 slot₂/T +11 (call core.svec %₁₀) +12 (call core.svec %₉ %₁₁ :($(QuoteNode(:(#= line 1 =#))))) +13 --- method core.nothing %₁₂ + 1 (call slot₁/#self# slot₂/x 1 2) + 2 (return %₁) +14 (call core.Typeof %₆) +15 slot₂/T +16 slot₁/S +17 (call core.svec %₁₄ %₁₅ %₁₆) +18 slot₂/T +19 slot₁/S +20 (call core.svec %₁₈ %₁₉) +21 (call core.svec %₁₇ %₂₀ :($(QuoteNode(:(#= line 1 =#))))) +22 --- method core.nothing %₂₁ + 1 (call slot₁/#self# slot₂/x slot₃/y 2) + 2 (return %₁) +23 (call core.Typeof %₆) +24 slot₂/T +25 slot₁/S +26 slot₃/U +27 (call core.svec %₂₃ %₂₄ %₂₅ %₂₆) +28 slot₂/T +29 slot₁/S +30 slot₃/U +31 (call core.svec %₂₈ %₂₉ %₃₀) +32 (call core.svec %₂₇ %₃₁ :($(QuoteNode(:(#= line 1 =#))))) +33 --- method core.nothing %₃₂ + 1 (call core.tuple slot₂/x slot₃/y slot₄/z) + 2 (return %₁) +34 (return %₆) + +######################################## +# Positional args and type parameters with transitive dependencies +# See https://github.com/JuliaLang/julia/issues/49275 - the first method +# generated here for only `x` should contain zero type parameters. +function f(x, y::S=[1], z::U=2) where {T, S<:AbstractVector{T}, U} + (x, y, z, T, S, U) +end +#--------------------- +1 (= slot₂/T (call core.TypeVar :T)) +2 TestMod.AbstractVector +3 slot₂/T +4 (call core.apply_type %₂ %₃) +5 (= slot₁/S (call core.TypeVar :S %₄)) +6 (= slot₃/U (call core.TypeVar :U)) +7 (method :f) +8 (call core.Typeof %₇) +9 (call core.svec %₈ core.Any) +10 (call core.svec) +11 (call core.svec %₉ %₁₀ :($(QuoteNode(:(#= line 1 =#))))) +12 --- method core.nothing %₁₁ + 1 (call top.vect 1) + 2 (call slot₁/#self# slot₂/x %₁ 2) + 3 (return %₂) +13 (call core.Typeof %₇) +14 slot₁/S +15 (call core.svec %₁₃ core.Any %₁₄) +16 slot₂/T +17 slot₁/S +18 (call core.svec %₁₆ %₁₇) +19 (call core.svec %₁₅ %₁₈ :($(QuoteNode(:(#= line 1 =#))))) +20 --- method core.nothing %₁₉ + 1 (call slot₁/#self# slot₂/x slot₃/y 2) + 2 (return %₁) +21 (call core.Typeof %₇) +22 slot₁/S +23 slot₃/U +24 (call core.svec %₂₁ core.Any %₂₂ %₂₃) +25 slot₂/T +26 slot₁/S +27 slot₃/U +28 (call core.svec %₂₅ %₂₆ %₂₇) +29 (call core.svec %₂₄ %₂₈ :($(QuoteNode(:(#= line 1 =#))))) +30 --- method core.nothing %₂₉ + 1 static_parameter₁ + 2 static_parameter₂ + 3 static_parameter₃ + 4 (call core.tuple slot₂/x slot₃/y slot₄/z %₁ %₂ %₃) + 5 (return %₄) +31 (return %₇) + +######################################## +# Default positional args are allowed before trailing slurp with no default +function f(x=1, ys...) + ys +end +#--------------------- +1 (method :f) +2 (call core.Typeof %₁) +3 (call core.svec %₂) +4 (call core.svec) +5 (call core.svec %₃ %₄ :($(QuoteNode(:(#= line 1 =#))))) +6 --- method core.nothing %₅ + 1 (call slot₁/#self# 1) + 2 (return %₁) +7 (call core.Typeof %₁) +8 (call core.apply_type core.Vararg core.Any) +9 (call core.svec %₇ core.Any %₈) +10 (call core.svec) +11 (call core.svec %₉ %₁₀ :($(QuoteNode(:(#= line 1 =#))))) +12 --- method core.nothing %₁₁ + 1 slot₃/ys + 2 (return %₁) +13 (return %₁) + +######################################## +# Error: Default positional args after a slurp +function f(x=1, ys..., z=2) + ys +end +#--------------------- +LoweringError: +function f(x=1, ys..., z=2) +# └────┘ ── `...` may only be used for the last function argument + ys +end + +######################################## +# Positional arg with slurp and default +function f(xs...=1) + xs +end +#--------------------- +1 (method :f) +2 (call core.Typeof %₁) +3 (call core.svec %₂) +4 (call core.svec) +5 (call core.svec %₃ %₄ :($(QuoteNode(:(#= line 1 =#))))) +6 --- method core.nothing %₅ + 1 (call slot₁/#self# 1) + 2 (return %₁) +7 (call core.Typeof %₁) +8 (call core.apply_type core.Vararg core.Any) +9 (call core.svec %₇ %₈) +10 (call core.svec) +11 (call core.svec %₉ %₁₀ :($(QuoteNode(:(#= line 1 =#))))) +12 --- method core.nothing %₁₁ + 1 slot₂/xs + 2 (return %₁) +13 (return %₁) + +######################################## +# Positional arg with slurp and splatted default value +function f(xs...=(1,2)...) + xs +end +#--------------------- +1 (method :f) +2 (call core.Typeof %₁) +3 (call core.svec %₂) +4 (call core.svec) +5 (call core.svec %₃ %₄ :($(QuoteNode(:(#= line 1 =#))))) +6 --- method core.nothing %₅ + 1 (call core.tuple 1 2) + 2 (call core._apply_iterate top.iterate slot₁/#self# %₁) + 3 (return %₂) +7 (call core.Typeof %₁) +8 (call core.apply_type core.Vararg core.Any) +9 (call core.svec %₇ %₈) +10 (call core.svec) +11 (call core.svec %₉ %₁₀ :($(QuoteNode(:(#= line 1 =#))))) +12 --- method core.nothing %₁₁ + 1 slot₂/xs + 2 (return %₁) +13 (return %₁) + ######################################## # Binding docs to functions """ From 3423cb58790d9d09c48ec7a918a4cec3fc5d22ce Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Fri, 29 Nov 2024 11:39:58 +1000 Subject: [PATCH 0895/1109] Desugaring for function argument destructuring --- JuliaLowering/src/desugaring.jl | 21 ++++++-- JuliaLowering/test/functions.jl | 10 ++++ JuliaLowering/test/functions_ir.jl | 77 ++++++++++++++++++++++++++++++ 3 files changed, 104 insertions(+), 4 deletions(-) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 6e8456e4c8038..3da62e388dfa4 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -1452,11 +1452,20 @@ function expand_function_def(ctx, ex, docs, rewrite_call=identity, rewrite_body= arg_names = SyntaxList(ctx) arg_types = SyntaxList(ctx) + body_stmts = SyntaxList(ctx) first_default = 0 arg_defaults = SyntaxList(ctx) for (i,arg) in enumerate(args) info = match_function_arg(arg) aname = !isnothing(info.name) ? info.name : @ast ctx arg "_"::K"Placeholder" + if kind(aname) == K"tuple" + # Argument destructuring + n = new_mutable_var(ctx, aname, "destructured_arg_$i"; kind=:argument) + # TODO: Tag these destructured locals somehow so we can trigger + # the "function argument name not unique" error if they're repeated? + push!(body_stmts, @ast ctx aname [K"local" [K"=" aname n]]) + aname = n + end push!(arg_names, aname) atype = !isnothing(info.type) ? info.type : Any_type(ctx, arg) @assert !info.is_nospecialize # TODO @@ -1527,16 +1536,20 @@ function expand_function_def(ctx, ex, docs, rewrite_call=identity, rewrite_body= pushfirst!(arg_names, farg_name) pushfirst!(arg_types, farg_type) - body = rewrite_body(ex[2]) if !isnothing(return_type) ret_var = ssavar(ctx, return_type, "return_type") + push!(body_stmts, @ast ctx return_type [K"=" ret_var return_type]) + else + ret_var = nothing + end + + body = rewrite_body(ex[2]) + if !isempty(body_stmts) body = @ast ctx body [ K"block" - [K"=" ret_var return_type] + body_stmts... body ] - else - ret_var = nothing end method_table_val = nothing # TODO: method overlays diff --git a/JuliaLowering/test/functions.jl b/JuliaLowering/test/functions.jl index 2acc83674359a..5046531d34b05 100644 --- a/JuliaLowering/test/functions.jl +++ b/JuliaLowering/test/functions.jl @@ -187,4 +187,14 @@ end """) == ((1,2), (10,20)) +@test JuliaLowering.include_string(test_mod, """ +begin + function f_destructure(x, (y,z)::Tuple{Int,Int}, (w,)...=(4,)...) + (x,y,z,w) + end + + f_destructure(1, (2,3)) +end +""") == (1,2,3,4) + end diff --git a/JuliaLowering/test/functions_ir.jl b/JuliaLowering/test/functions_ir.jl index 16bc02742461c..7ed351b998b93 100644 --- a/JuliaLowering/test/functions_ir.jl +++ b/JuliaLowering/test/functions_ir.jl @@ -635,6 +635,83 @@ end 2 (return %₁) 13 (return %₁) +######################################## +# Trivial function argument destructuring +function f(x, (y,z), w) +end +#--------------------- +1 (method :f) +2 (call core.Typeof %₁) +3 (call core.svec %₂ core.Any core.Any core.Any) +4 (call core.svec) +5 (call core.svec %₃ %₄ :($(QuoteNode(:(#= line 1 =#))))) +6 --- method core.nothing %₅ + 1 (call top.indexed_iterate slot₃/destructured_arg_2 1) + 2 (= slot₆/y (call core.getfield %₁ 1)) + 3 (= slot₅/iterstate (call core.getfield %₁ 2)) + 4 slot₅/iterstate + 5 (call top.indexed_iterate slot₃/destructured_arg_2 2 %₄) + 6 (= slot₇/z (call core.getfield %₅ 1)) + 7 (return core.nothing) +7 (return %₁) + +######################################## +# Function argument destructuring combined with splats, types and and defaults +function f((x,)::T...=rhs) +end +#--------------------- +1 (method :f) +2 (call core.Typeof %₁) +3 (call core.svec %₂) +4 (call core.svec) +5 (call core.svec %₃ %₄ :($(QuoteNode(:(#= line 1 =#))))) +6 --- method core.nothing %₅ + 1 TestMod.rhs + 2 (call slot₁/#self# %₁) + 3 (return %₂) +7 (call core.Typeof %₁) +8 TestMod.T +9 (call core.apply_type core.Vararg %₈) +10 (call core.svec %₇ %₉) +11 (call core.svec) +12 (call core.svec %₁₀ %₁₁ :($(QuoteNode(:(#= line 1 =#))))) +13 --- method core.nothing %₁₂ + 1 (call top.indexed_iterate slot₂/destructured_arg_1 1) + 2 (= slot₃/x (call core.getfield %₁ 1)) + 3 (return core.nothing) +14 (return %₁) + +######################################## +# Broken: the following repeated destructured args should probably be an error +# but they're just normal locals so it's a bit hard to trigger. +function f((x,), (x,)) +end +#--------------------- +1 (method :f) +2 (call core.Typeof %₁) +3 (call core.svec %₂ core.Any core.Any) +4 (call core.svec) +5 (call core.svec %₃ %₄ :($(QuoteNode(:(#= line 1 =#))))) +6 --- method core.nothing %₅ + 1 (call top.indexed_iterate slot₂/destructured_arg_1 1) + 2 (= slot₄/x (call core.getfield %₁ 1)) + 3 (call top.indexed_iterate slot₃/destructured_arg_2 1) + 4 (= slot₄/x (call core.getfield %₃ 1)) + 5 (return core.nothing) +7 (return %₁) + +######################################## +# Error: Function argument destructuring conflicting with a global decl +function f((x,)) + global x +end +#--------------------- +LoweringError: +function f((x,)) +# ╙ ── Variable `x` declared both local and global + global x +end + ######################################## # Binding docs to functions """ From 055a7ef18eb54768df90d4aba472b1edab459f59 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Fri, 29 Nov 2024 14:50:50 +1000 Subject: [PATCH 0896/1109] Tests for all variable scope conflicts + make duplicate destructured args an error Here we introduce a `meta` attribute rather than - or perhaps in addition to - the `K"meta"` kind and use it to tag local variables which derived from function argument destructuring. We use this to make it an error to have duplicate destructured argument names. This is technically breaking, but probably only a good thing - without this users will silently have the intial duplicate argument names overwritten with the result of the last destructuring assignment. Also add tests for the various variable scope conflict errors: argument/local, static-parameter/local, local/global etc. --- JuliaLowering/src/desugaring.jl | 26 ++-- JuliaLowering/src/macro_expansion.jl | 10 +- JuliaLowering/src/scope_analysis.jl | 80 +++++++------ JuliaLowering/test/functions_ir.jl | 51 +------- JuliaLowering/test/scopes_ir.jl | 173 +++++++++++++++++++++++++++ 5 files changed, 247 insertions(+), 93 deletions(-) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 3da62e388dfa4..60f27531fb9ff 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -1171,24 +1171,30 @@ end # (x::T, (y::U, z)) # strip out stmts = (local x) (decl x T) (local x) (decl y U) (local z) # and return (x, (y, z)) -function strip_decls!(ctx, stmts, declkind, declkind2, ex) +function strip_decls!(ctx, stmts, declkind, declkind2, declmeta, ex) k = kind(ex) if k == K"Identifier" - push!(stmts, makenode(ctx, ex, declkind, ex)) + if !isnothing(declmeta) + push!(stmts, makenode(ctx, ex, declkind, ex; meta=declmeta)) + else + push!(stmts, makenode(ctx, ex, declkind, ex)) + end if !isnothing(declkind2) push!(stmts, makenode(ctx, ex, declkind2, ex)) end ex + elseif k == K"Placeholder" + ex elseif k == K"::" @chk numchildren(ex) == 2 name = ex[1] @chk kind(name) == K"Identifier" push!(stmts, makenode(ctx, ex, K"decl", name, ex[2])) - strip_decls!(ctx, stmts, declkind, declkind2, ex[1]) + strip_decls!(ctx, stmts, declkind, declkind2, declmeta, ex[1]) elseif k == K"tuple" || k == K"parameters" cs = SyntaxList(ctx) for e in children(ex) - push!(cs, strip_decls!(ctx, stmts, declkind, declkind2, e)) + push!(cs, strip_decls!(ctx, stmts, declkind, declkind2, declmeta, e)) end makenode(ctx, ex, k, cs) end @@ -1199,6 +1205,7 @@ end # global x::T = 1 ==> (block (global x) (decl x T) (x = 1)) function expand_decls(ctx, ex) declkind = kind(ex) + declmeta = get(ex, :meta, nothing) if numchildren(ex) == 1 && kind(ex[1]) ∈ KSet"const global local" declkind2 = kind(ex[1]) bindings = children(ex[1]) @@ -1211,13 +1218,13 @@ function expand_decls(ctx, ex) kb = kind(binding) if is_prec_assignment(kb) @chk numchildren(binding) == 2 - lhs = strip_decls!(ctx, stmts, declkind, declkind2, binding[1]) + lhs = strip_decls!(ctx, stmts, declkind, declkind2, declmeta, binding[1]) push!(stmts, @ast ctx binding [kb lhs binding[2]]) elseif is_sym_decl(binding) if declkind == K"const" || declkind2 == K"const" throw(LoweringError(ex, "expected assignment after `const`")) end - strip_decls!(ctx, stmts, declkind, declkind2, binding) + strip_decls!(ctx, stmts, declkind, declkind2, declmeta, binding) else throw(LoweringError(ex, "invalid syntax in variable declaration")) end @@ -1461,9 +1468,8 @@ function expand_function_def(ctx, ex, docs, rewrite_call=identity, rewrite_body= if kind(aname) == K"tuple" # Argument destructuring n = new_mutable_var(ctx, aname, "destructured_arg_$i"; kind=:argument) - # TODO: Tag these destructured locals somehow so we can trigger - # the "function argument name not unique" error if they're repeated? - push!(body_stmts, @ast ctx aname [K"local" [K"=" aname n]]) + push!(body_stmts, @ast ctx aname [K"local"(meta=CompileHints(:is_destructured_arg, true)) + [K"=" aname n]]) aname = n end push!(arg_names, aname) @@ -2624,7 +2630,7 @@ function expand_forms_2(ctx::DesugaringContext, ex::SyntaxTree, docs=nothing) # Don't recurse when already simplified - `local x`, etc ex else - expand_forms_2(ctx, expand_decls(ctx, ex)) # FIXME + expand_forms_2(ctx, expand_decls(ctx, ex)) end elseif k == K"where" expand_forms_2(ctx, expand_wheres(ctx, ex)) diff --git a/JuliaLowering/src/macro_expansion.jl b/JuliaLowering/src/macro_expansion.jl index fd94d156227ae..15a966048f0af 100644 --- a/JuliaLowering/src/macro_expansion.jl +++ b/JuliaLowering/src/macro_expansion.jl @@ -14,6 +14,13 @@ struct ScopeLayer is_macro_expansion::Bool # FIXME end +# Type for `meta` attribute, to replace `Expr(:meta)`. +# It's unclear how much flexibility we need here - is a dict good, or could we +# just use a struct? Likely this will be sparse. Alternatively we could just +# use individual attributes but those aren't easy to add on an ad-hoc basis in +# the middle of a pass. +const CompileHints = Base.ImmutableDict{Symbol,Any} + struct MacroExpansionContext{GraphType} <: AbstractLoweringContext graph::GraphType bindings::Bindings @@ -250,7 +257,8 @@ function expand_forms_1(mod::Module, ex::SyntaxTree) graph = ensure_attributes(syntax_graph(ex), var_id=IdTag, scope_layer=LayerId, - __macro_ctx__=Nothing) + __macro_ctx__=Nothing, + meta=CompileHints) layers = ScopeLayer[ScopeLayer(1, mod, false)] ctx = MacroExpansionContext(graph, Bindings(), layers, layers[1]) ex2 = expand_forms_1(ctx, reparent(ctx, ex)) diff --git a/JuliaLowering/src/scope_analysis.jl b/JuliaLowering/src/scope_analysis.jl index 87d3bdcfe9dc2..cfbd6a900cde3 100644 --- a/JuliaLowering/src/scope_analysis.jl +++ b/JuliaLowering/src/scope_analysis.jl @@ -45,7 +45,9 @@ struct NameKey end #------------------------------------------------------------------------------- -function _find_scope_vars!(assignments, locals, globals, used_names, used_bindings, alias_bindings, ex) +_insert_if_not_present!(dict, key, val) = get!(dict, key, val) + +function _find_scope_vars!(assignments, locals, destructured_args, globals, used_names, used_bindings, alias_bindings, ex) k = kind(ex) if k == K"Identifier" push!(used_names, NameKey(ex)) @@ -57,19 +59,23 @@ function _find_scope_vars!(assignments, locals, globals, used_names, used_bindin k in KSet"scope_block lambda module toplevel" return elseif k == K"local" || k == K"local_def" - get!(locals, NameKey(ex[1]), ex) + if (meta = get(ex, :meta, nothing); !isnothing(meta) && get(meta, :is_destructured_arg, false)) + push!(destructured_args, ex[1]) + else + _insert_if_not_present!(locals, NameKey(ex[1]), ex) + end elseif k == K"global" - get!(globals, NameKey(ex[1]), ex) + _insert_if_not_present!(globals, NameKey(ex[1]), ex) # elseif k == K"method" TODO static parameters elseif k == K"=" v = decl_var(ex[1]) if !(kind(v) in KSet"BindingId globalref Placeholder") - get!(assignments, NameKey(v), v) + _insert_if_not_present!(assignments, NameKey(v), v) end - _find_scope_vars!(assignments, locals, globals, used_names, used_bindings, alias_bindings, ex[2]) + _find_scope_vars!(assignments, locals, destructured_args, globals, used_names, used_bindings, alias_bindings, ex[2]) else for e in children(ex) - _find_scope_vars!(assignments, locals, globals, used_names, used_bindings, alias_bindings, e) + _find_scope_vars!(assignments, locals, destructured_args, globals, used_names, used_bindings, alias_bindings, e) end end end @@ -82,12 +88,13 @@ function find_scope_vars(ex) ExT = typeof(ex) assignments = Dict{NameKey,ExT}() locals = Dict{NameKey,ExT}() + destructured_args = Vector{ExT}() globals = Dict{NameKey,ExT}() used_names = Set{NameKey}() used_bindings = Set{IdTag}() alias_bindings = Vector{Pair{NameKey,IdTag}}() for e in children(ex) - _find_scope_vars!(assignments, locals, globals, used_names, used_bindings, alias_bindings, e) + _find_scope_vars!(assignments, locals, destructured_args, globals, used_names, used_bindings, alias_bindings, e) end # Sort by key so that id generation is deterministic @@ -97,7 +104,7 @@ function find_scope_vars(ex) used_names = sort(collect(used_names)) used_bindings = sort(collect(used_bindings)) - return assignments, locals, globals, used_names, used_bindings, alias_bindings + return assignments, locals, destructured_args, globals, used_names, used_bindings, alias_bindings end function Base.isless(a::NameKey, b::NameKey) @@ -196,6 +203,27 @@ function init_binding(ctx, varkey::NameKey, kind::Symbol, is_ambiguous_local=fal id end +# Add lambda arguments and static parameters +function add_lambda_args(ctx, var_ids, args, args_kind) + for arg in args + ka = kind(arg) + if ka == K"Identifier" + varkey = NameKey(arg) + if haskey(var_ids, varkey) + vk = lookup_binding(ctx, var_ids[varkey]).kind + _is_arg(k) = k == :argument || k == :local + msg = _is_arg(vk) && _is_arg(args_kind) ? "function argument name not unique" : + vk == :static_parameter && args_kind == :static_parameter ? "function static parameter name not unique" : + "static parameter name not distinct from function argument" + throw(LoweringError(arg, msg)) + end + var_ids[varkey] = init_binding(ctx, varkey, args_kind) + elseif ka != K"BindingId" && ka != K"Placeholder" + throw(LoweringError(arg, "Unexpected lambda arg kind")) + end + end +end + # Analyze identifier usage within a scope, adding all newly discovered # identifiers to ctx.bindings and returning a lookup table from identifier # names to their variable IDs @@ -206,42 +234,22 @@ function analyze_scope(ctx, ex, scope_type, is_toplevel_global_scope=false, in_toplevel_thunk = is_toplevel_global_scope || (!is_outer_lambda_scope && parentscope.in_toplevel_thunk) - assignments, locals, globals, used, used_bindings, alias_bindings = find_scope_vars(ex) + assignments, locals, destructured_args, globals, + used, used_bindings, alias_bindings = find_scope_vars(ex) # Create new lookup table for variables in this scope which differ from the # parent scope. var_ids = Dict{NameKey,IdTag}() - # Add lambda arguments and static parameters - function add_lambda_args(args, var_kind) - for arg in args - ka = kind(arg) - if ka == K"Identifier" - varkey = NameKey(arg) - if haskey(var_ids, varkey) - vk = lookup_binding(ctx, var_ids[varkey]).kind - msg = vk == :argument && var_kind == vk ? "function argument name not unique" : - vk == :static_parameter && var_kind == vk ? "function static parameter name not unique" : - "static parameter name not distinct from function argument" - throw(LoweringError(arg, msg)) - end - var_ids[varkey] = init_binding(ctx, varkey, var_kind) - elseif ka != K"BindingId" && ka != K"Placeholder" - throw(LoweringError(a, "Unexpected lambda arg kind")) - end - end - end if !isnothing(lambda_args) - add_lambda_args(lambda_args, :argument) - add_lambda_args(lambda_static_parameters, :static_parameter) + add_lambda_args(ctx, var_ids, lambda_args, :argument) + add_lambda_args(ctx, var_ids, lambda_static_parameters, :static_parameter) + add_lambda_args(ctx, var_ids, destructured_args, :local) end - global_keys = Set(first(g) for g in globals) # Add explicit locals for (varkey,e) in locals - if varkey in global_keys - throw(LoweringError(e, "Variable `$(varkey.name)` declared both local and global")) - elseif haskey(var_ids, varkey) + if haskey(var_ids, varkey) vk = lookup_binding(ctx, var_ids[varkey]).kind if vk === :argument && is_outer_lambda_scope throw(LoweringError(e, "local variable name `$(varkey.name)` conflicts with an argument")) @@ -258,7 +266,9 @@ function analyze_scope(ctx, ex, scope_type, is_toplevel_global_scope=false, for (varkey,e) in globals if haskey(var_ids, varkey) vk = lookup_binding(ctx, var_ids[varkey]).kind - if vk === :argument && is_outer_lambda_scope + if vk === :local + throw(LoweringError(e, "Variable `$(varkey.name)` declared both local and global")) + elseif vk === :argument && is_outer_lambda_scope throw(LoweringError(e, "global variable name `$(varkey.name)` conflicts with an argument")) elseif vk === :static_parameter throw(LoweringError(e, "global variable name `$(varkey.name)` conflicts with a static parameter")) diff --git a/JuliaLowering/test/functions_ir.jl b/JuliaLowering/test/functions_ir.jl index 7ed351b998b93..79a67c166e57e 100644 --- a/JuliaLowering/test/functions_ir.jl +++ b/JuliaLowering/test/functions_ir.jl @@ -147,36 +147,6 @@ end 2 (return %₁) 14 (return %₄) -######################################## -# Error: Duplicate function argument names -function f(x, x) -end -#--------------------- -LoweringError: -function f(x, x) -# ╙ ── function argument name not unique -end - -######################################## -# Error: Static parameter name not unique -function f() where T where T -end -#--------------------- -LoweringError: -function f() where T where T -# ╙ ── function static parameter name not unique -end - -######################################## -# Error: static parameter colliding with argument names -function f(x::x) where x -end -#--------------------- -LoweringError: -function f(x::x) where x -# ╙ ── static parameter name not distinct from function argument -end - ######################################## # Return types function f(x)::Int @@ -682,9 +652,8 @@ end 14 (return %₁) ######################################## -# Broken: the following repeated destructured args should probably be an error -# but they're just normal locals so it's a bit hard to trigger. -function f((x,), (x,)) +# Duplicate destructured placeholders ok +function f((_,), (_,)) end #--------------------- 1 (method :f) @@ -694,24 +663,12 @@ end 5 (call core.svec %₃ %₄ :($(QuoteNode(:(#= line 1 =#))))) 6 --- method core.nothing %₅ 1 (call top.indexed_iterate slot₂/destructured_arg_1 1) - 2 (= slot₄/x (call core.getfield %₁ 1)) + 2 (call core.getfield %₁ 1) 3 (call top.indexed_iterate slot₃/destructured_arg_2 1) - 4 (= slot₄/x (call core.getfield %₃ 1)) + 4 (call core.getfield %₃ 1) 5 (return core.nothing) 7 (return %₁) -######################################## -# Error: Function argument destructuring conflicting with a global decl -function f((x,)) - global x -end -#--------------------- -LoweringError: -function f((x,)) -# ╙ ── Variable `x` declared both local and global - global x -end - ######################################## # Binding docs to functions """ diff --git a/JuliaLowering/test/scopes_ir.jl b/JuliaLowering/test/scopes_ir.jl index 9d0ac4688ba80..7d84b4e3d0407 100644 --- a/JuliaLowering/test/scopes_ir.jl +++ b/JuliaLowering/test/scopes_ir.jl @@ -92,3 +92,176 @@ end 6 (return %₂) 7 (return %₁) +######################################## +# Error: Duplicate function argument names +function f(x, x) +end +#--------------------- +LoweringError: +function f(x, x) +# ╙ ── function argument name not unique +end + +######################################## +# Error: Duplicate function argument with destructured arg +function f(x, (x,)) +end +#--------------------- +LoweringError: +function f(x, (x,)) +# ╙ ── function argument name not unique +end + +######################################## +# Error: Static parameter name not unique +function f() where T where T +end +#--------------------- +LoweringError: +function f() where T where T +# ╙ ── function static parameter name not unique +end + +######################################## +# Error: static parameter colliding with argument names +function f(x::x) where x +end +#--------------------- +LoweringError: +function f(x::x) where x +# ╙ ── static parameter name not distinct from function argument +end + +######################################## +# Error: duplicate destructure args +function f((x,), (x,)) +end +#--------------------- +LoweringError: +function f((x,), (x,)) +# ╙ ── function argument name not unique +end + +######################################## +# Error: Conflicting local and global decls +let + local x + global x +end +#--------------------- +LoweringError: +let + local x + global x +# └──────┘ ── Variable `x` declared both local and global +end + +######################################## +# Error: Conflicting argument and local +function f(x) + local x +end +#--------------------- +LoweringError: +function f(x) + local x +# └─────┘ ── local variable name `x` conflicts with an argument +end + +######################################## +# Error: Conflicting argument and global +function f(x) + global x +end +#--------------------- +LoweringError: +function f(x) + global x +# └──────┘ ── global variable name `x` conflicts with an argument +end + +######################################## +# Error: Conflicting destructured argument and global +# TODO: The error could probably be a bit better here +function f((x,)) + global x +end +#--------------------- +LoweringError: +function f((x,)) + global x +# └──────┘ ── Variable `x` declared both local and global +end + +######################################## +# Error: Conflicting static parameter and local +function f() where T + local T +end +#--------------------- +LoweringError: +function f() where T + local T +# └─────┘ ── local variable name `T` conflicts with a static parameter +end + +######################################## +# Error: Conflicting static parameter and global +function f() where T + global T +end +#--------------------- +LoweringError: +function f() where T + global T +# └──────┘ ── global variable name `T` conflicts with a static parameter +end + +######################################## +# Error: Conflicting static parameter and local in nested scope +function f() where T + let + local T + end +end +#--------------------- +LoweringError: +function f() where T + let + local T +# └─────┘ ── local variable name `T` conflicts with a static parameter + end +end + +######################################## +# Error: Conflicting static parameter and global in nested scope +function f() where T + let + global T + end +end +#--------------------- +LoweringError: +function f() where T + let + global T +# └──────┘ ── global variable name `T` conflicts with a static parameter + end +end + +######################################## +# Error: Conflicting static parameter and implicit local +function f() where T + let + T = rhs + end +end +#--------------------- +LoweringError: +function f() where T + let + T = rhs +# ╙ ── local variable name `T` conflicts with a static parameter + end +end + From 6e0345689a633cbffd2afd9f32ce8d9ef21339c5 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sat, 30 Nov 2024 13:52:40 +1000 Subject: [PATCH 0897/1109] Move all "standard syntax extension" macros like `@goto` into JuliaLowering These macros are a part of the language itself because they emit special syntax trees which is known to lowering. This is regardless of the fact that they don't have a surface syntax form. Where a `Base` form of these exists we add a method to that macro so it can be used as usual without needing to import from JuliaLowering. --- JuliaLowering/src/ast.jl | 1 - JuliaLowering/src/desugaring.jl | 5 +- JuliaLowering/src/linear_ir.jl | 2 +- JuliaLowering/src/macro_expansion.jl | 6 +- JuliaLowering/src/runtime.jl | 107 +++++++++++++++++++++++++++ JuliaLowering/test/branching_ir.jl | 14 ---- JuliaLowering/test/demo.jl | 29 +------- JuliaLowering/test/demo_include.jl | 4 +- JuliaLowering/test/functions.jl | 3 + JuliaLowering/test/macros.jl | 4 +- JuliaLowering/test/scopes_ir.jl | 28 +++---- JuliaLowering/test/typedefs_ir.jl | 10 --- 12 files changed, 136 insertions(+), 77 deletions(-) diff --git a/JuliaLowering/src/ast.jl b/JuliaLowering/src/ast.jl index 28cd0c18e19e7..6bdc4b0dd37e9 100644 --- a/JuliaLowering/src/ast.jl +++ b/JuliaLowering/src/ast.jl @@ -226,7 +226,6 @@ end # Convenience functions to create leaf nodes referring to identifiers within # the Core and Top modules. core_ref(ctx, ex, name) = makeleaf(ctx, ex, K"core", name) -Any_type(ctx, ex) = core_ref(ctx, ex, "Any") svec_type(ctx, ex) = core_ref(ctx, ex, "svec") nothing_(ctx, ex) = core_ref(ctx, ex, "nothing") diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 60f27531fb9ff..8a89eac140a1e 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -1473,8 +1473,7 @@ function expand_function_def(ctx, ex, docs, rewrite_call=identity, rewrite_body= aname = n end push!(arg_names, aname) - atype = !isnothing(info.type) ? info.type : Any_type(ctx, arg) - @assert !info.is_nospecialize # TODO + atype = !isnothing(info.type) ? info.type : @ast ctx arg "Any"::K"core" if info.is_slurp if i != length(args) throw(LoweringError(arg, "`...` may only be used for the last function argument")) @@ -1628,7 +1627,7 @@ function expand_macro_def(ctx, ex) [K"call"(sig) _make_macro_name(ctx, name) [K"::" - adopt_scope(@ast(ctx, sig, "__context__"::K"Identifier"), + adopt_scope(@ast(ctx, sig, "__context__"::K"Identifier"), kind(name) == K"." ? name[1] : name) MacroContext::K"Value" ] diff --git a/JuliaLowering/src/linear_ir.jl b/JuliaLowering/src/linear_ir.jl index 1bee7f16aaa5f..a7cb64fc533e0 100644 --- a/JuliaLowering/src/linear_ir.jl +++ b/JuliaLowering/src/linear_ir.jl @@ -844,7 +844,7 @@ function _renumber(ctx, ssa_rewrites, slot_rewrites, label_table, ex) binfo = lookup_binding(ctx, id) if !isnothing(new_id) sk = binfo.kind == :local || binfo.kind == :argument ? K"slot" : - binfo.kind == :static_parameter ? K"static_parameter" : + binfo.kind == :static_parameter ? K"static_parameter" : throw(LoweringError(ex, "Found unexpected binding of kind $(binfo.kind)")) makeleaf(ctx, ex, sk; var_id=new_id) else diff --git a/JuliaLowering/src/macro_expansion.jl b/JuliaLowering/src/macro_expansion.jl index 15a966048f0af..f5e907b5003d2 100644 --- a/JuliaLowering/src/macro_expansion.jl +++ b/JuliaLowering/src/macro_expansion.jl @@ -73,7 +73,7 @@ end #-------------------------------------------------- struct MacroContext <: AbstractLoweringContext graph::SyntaxGraph - macroname::SyntaxTree + macrocall::SyntaxTree scope_layer::ScopeLayer end @@ -99,7 +99,7 @@ function Base.showerror(io::IO, exc::MacroExpansionError) print(io, "MacroExpansionError") ctx = exc.context if !isnothing(ctx) - print(io, " while expanding ", ctx.macroname, + print(io, " while expanding ", ctx.macrocall[1], " in module ", ctx.scope_layer.mod) end print(io, ":\n") @@ -147,7 +147,7 @@ function expand_macro(ctx, ex) # arguments to the macro call. macro_args = [set_scope_layer(ctx, e, ctx.current_layer.id, false) for e in children(ex)[2:end]] - mctx = MacroContext(ctx.graph, macname, ctx.current_layer) + mctx = MacroContext(ctx.graph, ex, ctx.current_layer) expanded = try # TODO: Allow invoking old-style macros for compat invokelatest(macfunc, mctx, macro_args...) diff --git a/JuliaLowering/src/runtime.jl b/JuliaLowering/src/runtime.jl index 6231ecf1c7a87..1644ee4353724 100644 --- a/JuliaLowering/src/runtime.jl +++ b/JuliaLowering/src/runtime.jl @@ -245,3 +245,110 @@ function is_defined_nothrow_global(mod, name) isdefined(b.owner, :value) end +#------------------------------------------------------------------------------- +# The following are versions of macros from Base which act as "standard syntax +# extensions" with special semantics known to lowering. +# +# In order to implement these here without getting into bootstrapping +# difficulties, we just write them as plain old macro-named functions and add +# the required __context__ argument ourselves. +# +# TODO: @inline, @noinline, @inbounds, @simd, @ccall, @isdefined +# +# TODO: Eventually we should move these to proper `macro` definitions and use +# JuliaLowering.include() or something, then we'll be in the fun little +# world of bootstrapping but it shouldn't be too painful :) + +function Base.var"@atomic"(__context__::MacroContext, ex) + @chk kind(ex) == K"Identifier" || kind(ex) == K"::" (ex, "Expected identifier or declaration") + @ast __context__ __context__.macrocall [K"atomic" ex] +end + +function Base.var"@label"(__context__::MacroContext, ex) + @chk kind(ex) == K"Identifier" + @ast __context__ ex ex=>K"symbolic_label" +end + +function Base.var"@goto"(__context__::MacroContext, ex) + @chk kind(ex) == K"Identifier" + @ast __context__ ex ex=>K"symbolic_goto" +end + +function Base.var"@locals"(__context__::MacroContext) + @ast __context__ __context__.macrocall [K"extension" "locals"::K"Symbol"] +end + +function Base.var"@isdefined"(__context__::MacroContext, ex) + @ast __context__ __context__.macrocall [K"isdefined" ex] +end + +# The following `@islocal` and `@inert` are macros for special syntax known to +# lowering which don't exist in Base but arguably should. +# +# For now we have our own versions +function var"@islocal"(__context__::MacroContext, ex) + @chk kind(ex) == K"Identifier" + @ast __context__ __context__.macrocall [K"extension" + "islocal"::K"Symbol" + ex + ] +end + +""" +A non-interpolating quoted expression. + +For example, + +```julia +@inert quote + \$x +end +``` + +does not take `x` from the surrounding scope - instead it leaves the +interpolation `\$x` intact as part of the expression tree. + +TODO: What is the correct way for `@inert` to work? ie which of the following +should work? + +```julia +@inert quote + body +end + +@inert begin + body +end + +@inert x + +@inert \$x +``` + +The especially tricky cases involve nested interpolation ... +```julia +quote + @inert \$x +end + +@inert quote + quote + \$x + end +end + +@inert quote + quote + \$\$x + end +end +``` + +etc. Needs careful thought - we should probably just copy what lisp does with +quote+quasiquote 😅 +""" +function var"@inert"(__context__::MacroContext, ex) + @chk kind(ex) == K"quote" + @ast __context__ __context__.macrocall [K"inert" ex] +end + diff --git a/JuliaLowering/test/branching_ir.jl b/JuliaLowering/test/branching_ir.jl index 4d92070a0f328..6fb47f5c7d996 100644 --- a/JuliaLowering/test/branching_ir.jl +++ b/JuliaLowering/test/branching_ir.jl @@ -1,17 +1,3 @@ -using JuliaLowering: JuliaLowering, @ast, @chk -using JuliaSyntax - -function var"@label"(__context__::JuliaLowering.MacroContext, ex) - @chk kind(ex) == JuliaSyntax.K"Identifier" - @ast __context__ ex ex=>JuliaSyntax.K"symbolic_label" -end - -function var"@goto"(__context__::JuliaLowering.MacroContext, ex) - @chk kind(ex) == JuliaSyntax.K"Identifier" - @ast __context__ ex ex=>JuliaSyntax.K"symbolic_goto" -end - -#******************************************************************************* ######################################## # Basic branching tail && value begin diff --git a/JuliaLowering/test/demo.jl b/JuliaLowering/test/demo.jl index 4eb3625342afc..67a3a9b923f66 100644 --- a/JuliaLowering/test/demo.jl +++ b/JuliaLowering/test/demo.jl @@ -45,38 +45,13 @@ baremodule M using JuliaLowering: JuliaLowering, @ast, @chk, adopt_scope, MacroExpansionError, makenode using JuliaSyntax + using JuliaLowering: @inert, @label, @goto, @islocal + using Base: @locals macro K_str(str) JuliaSyntax.Kind(str) end - function var"@inert"(__context__::JuliaLowering.MacroContext, ex) - @chk kind(ex) == K"quote" - @ast __context__ ex [K"inert" ex] - end - - function var"@label"(__context__::JuliaLowering.MacroContext, ex) - @chk kind(ex) == K"Identifier" - @ast __context__ ex ex=>K"symbolic_label" - end - - function var"@goto"(__context__::JuliaLowering.MacroContext, ex) - @chk kind(ex) == K"Identifier" - @ast __context__ ex ex=>K"symbolic_goto" - end - - function var"@islocal"(__context__::JuliaLowering.MacroContext, ex) - @chk kind(ex) == K"Identifier" - @ast __context__ ex [K"extension" - "islocal"::K"Symbol" - ex - ] - end - - function var"@locals"(__context__::JuliaLowering.MacroContext) - @ast __context__ __context__.macroname [K"extension" "locals"::K"Symbol"] - end - # JuliaLowering.include(M, "demo_include.jl") end diff --git a/JuliaLowering/test/demo_include.jl b/JuliaLowering/test/demo_include.jl index b28789ba61335..34417b43745d9 100644 --- a/JuliaLowering/test/demo_include.jl +++ b/JuliaLowering/test/demo_include.jl @@ -4,11 +4,11 @@ macro __MODULE__() end macro __FILE__() - JuliaLowering.filename(__context__.macroname) + JuliaLowering.filename(__context__.macrocall) end macro __LINE__() - JuliaLowering.source_location(__context__.macroname)[1] + JuliaLowering.source_location(__context__.macrocall)[1] end # Macro with local variables diff --git a/JuliaLowering/test/functions.jl b/JuliaLowering/test/functions.jl index 5046531d34b05..10863a0e33660 100644 --- a/JuliaLowering/test/functions.jl +++ b/JuliaLowering/test/functions.jl @@ -27,6 +27,9 @@ begin values(kws) end + # Note this definition generates an arguably-spurious warning when run via + # `Pkg.test()` due to the use of `--warn-override=true` in the test + # harness. function kwtest() "non-kw version of kwtest" end diff --git a/JuliaLowering/test/macros.jl b/JuliaLowering/test/macros.jl index 2338e0e440a8c..6e25c32608b51 100644 --- a/JuliaLowering/test/macros.jl +++ b/JuliaLowering/test/macros.jl @@ -13,11 +13,11 @@ module M end macro __FILE__() - JuliaLowering.filename(__context__.macroname) + JuliaLowering.filename(__context__.macrocall) end macro __LINE__() - JuliaLowering.source_location(__context__.macroname)[1] + JuliaLowering.source_location(__context__.macrocall)[1] end someglobal = "global in module M" diff --git a/JuliaLowering/test/scopes_ir.jl b/JuliaLowering/test/scopes_ir.jl index 7d84b4e3d0407..ee6bad1190670 100644 --- a/JuliaLowering/test/scopes_ir.jl +++ b/JuliaLowering/test/scopes_ir.jl @@ -1,17 +1,5 @@ -using JuliaLowering -using JuliaLowering: kind, @chk, @ast, @K_str - -function var"@islocal"(__context__::JuliaLowering.MacroContext, ex) - @chk kind(ex) == K"Identifier" - @ast __context__ ex [K"extension" - "islocal"::K"Symbol" - ex - ] -end - -function var"@locals"(__context__::JuliaLowering.MacroContext) - @ast __context__ __context__.macroname [K"extension" "locals"::K"Symbol"] -end +using JuliaLowering: @islocal +using Base: @locals #******************************************************************************* ######################################## @@ -265,3 +253,15 @@ function f() where T end end +######################################## +# @isdefined with defined variables +let x = 1 + @isdefined x + @isdefined y +end +#--------------------- +1 1 +2 (= slot₁/x %₁) +3 (isdefined TestMod.y) +4 (return %₃) + diff --git a/JuliaLowering/test/typedefs_ir.jl b/JuliaLowering/test/typedefs_ir.jl index ff86a897f6029..759e36e7a04a3 100644 --- a/JuliaLowering/test/typedefs_ir.jl +++ b/JuliaLowering/test/typedefs_ir.jl @@ -1,13 +1,3 @@ -using JuliaLowering: JuliaLowering, @ast, @chk -using JuliaSyntax - -function var"@atomic"(__context__::JuliaLowering.MacroContext, ex) - @chk kind(ex) == JuliaSyntax.K"Identifier" || kind(ex) == JuliaSyntax.K"::" (ex, - "Expected identifier or declaration") - @ast __context__ ex [K"atomic" ex] -end - -#******************************************************************************* ######################################## # where expression without type bounds A where X From b0c57a5ba0c49a0a02678a140fc75cdfd4a89e24 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sat, 30 Nov 2024 16:17:11 +1000 Subject: [PATCH 0898/1109] `@ nospecialize` for function arguments I've chosen to attach the nospecialize metadata here as an attribute on the function argument names. This ensures it travels with the function arguments, without otherwise disturbing the AST. TODO: `@ nospecialise` within the function body is not done yet - it's not very natural in this scheme but I guess we should still be able to recognize it during scope analysis and turn it into a tombstone, moving the metadata onto the lambda's argument bindings. --- JuliaLowering/src/ast.jl | 64 ++++++++++++--- JuliaLowering/src/desugaring.jl | 18 ++--- JuliaLowering/src/eval.jl | 112 +++++++++++++++------------ JuliaLowering/src/linear_ir.jl | 7 +- JuliaLowering/src/macro_expansion.jl | 7 -- JuliaLowering/src/runtime.jl | 18 +++++ JuliaLowering/src/scope_analysis.jl | 14 ++-- JuliaLowering/src/syntax_graph.jl | 12 +++ JuliaLowering/src/utils.jl | 2 +- JuliaLowering/test/demo.jl | 5 ++ JuliaLowering/test/functions.jl | 13 ++++ JuliaLowering/test/functions_ir.jl | 14 ++++ JuliaLowering/test/utils.jl | 8 +- 13 files changed, 204 insertions(+), 90 deletions(-) diff --git a/JuliaLowering/src/ast.jl b/JuliaLowering/src/ast.jl index 6bdc4b0dd37e9..2fd5b266dcf13 100644 --- a/JuliaLowering/src/ast.jl +++ b/JuliaLowering/src/ast.jl @@ -66,6 +66,7 @@ struct BindingInfo is_always_defined::Bool # A local that we know has an assignment that dominates all usages (is never undef) is_internal::Bool # True for internal bindings generated by the compiler is_ambiguous_local::Bool # Local, but would be global in soft scope (ie, the REPL) + is_nospecialize::Bool # @nospecialize on this argument (only valid for kind == :argument) end function BindingInfo(name::AbstractString, kind::Symbol; @@ -75,9 +76,10 @@ function BindingInfo(name::AbstractString, kind::Symbol; is_ssa::Bool = false, is_always_defined::Bool = is_ssa, is_internal::Bool = false, - is_ambiguous_local::Bool = false) + is_ambiguous_local::Bool = false, + is_nospecialize::Bool = false) BindingInfo(name, kind, mod, type, is_const, is_ssa, is_always_defined, - is_internal, is_ambiguous_local) + is_internal, is_ambiguous_local, is_nospecialize) end """ @@ -124,6 +126,7 @@ function update_binding!(bindings::Bindings, x; isnothing(is_always_defined) ? b.is_always_defined : is_always_defined, b.is_internal, b.is_ambiguous_local, + b.is_nospecialize ) end @@ -246,10 +249,9 @@ end # Create a new local mutable variable or lambda argument # (TODO: rename this?) -function new_mutable_var(ctx::AbstractLoweringContext, srcref, name; is_always_defined=false, kind=:local) +function new_mutable_var(ctx::AbstractLoweringContext, srcref, name; kind=:local, kws...) @assert kind == :local || kind == :argument - id = new_binding(ctx.bindings, BindingInfo(name, kind; is_internal=true, - is_always_defined=is_always_defined)) + id = new_binding(ctx.bindings, BindingInfo(name, kind; is_internal=true, kws...)) nameref = makeleaf(ctx, srcref, K"Identifier", name_val=name) var = makeleaf(ctx, nameref, K"BindingId", var_id=id) add_lambda_local!(ctx, id) @@ -435,23 +437,24 @@ function copy_attrs!(dest, head::Union{Kind,JuliaSyntax.SyntaxHead}, all=false) end end -function mapchildren(f, ctx, ex; extra_attrs...) +function mapchildren(f::Function, ctx, ex::SyntaxTree, do_map_child::Function; + extra_attrs...) if is_leaf(ex) return ex end orig_children = children(ex) cs = isempty(extra_attrs) ? nothing : SyntaxList(ctx) for (i,e) in enumerate(orig_children) - c = f(e) + newchild = do_map_child(i) ? f(e) : e if isnothing(cs) - if c == e + if newchild == e continue else cs = SyntaxList(ctx) append!(cs, orig_children[1:i-1]) end end - push!(cs::SyntaxList, c) + push!(cs::SyntaxList, newchild) end if isnothing(cs) # This function should be allocation-free if no children were changed @@ -465,6 +468,26 @@ function mapchildren(f, ctx, ex; extra_attrs...) return ex2 end +function mapchildren(f::Function, ctx, ex::SyntaxTree, mapped_children::AbstractVector{<:Integer}; + extra_attrs...) + j = Ref(firstindex(mapped_children)) + function do_map_child(i) + ind = j[] + if ind <= lastindex(mapped_children) && mapped_children[ind] == i + j[] += 1 + true + else + false + end + end + mapchildren(f, ctx, ex, do_map_child; extra_attrs...) +end + +function mapchildren(f::Function, ctx, ex::SyntaxTree; extra_attrs...) + mapchildren(f, ctx, ex, i->true; extra_attrs...) +end + + """ Copy AST `ex` into `ctx` """ @@ -529,6 +552,29 @@ function adopt_scope(exs::SyntaxList, ref) return out end +# Type for `meta` attribute, to replace `Expr(:meta)`. +# It's unclear how much flexibility we need here - is a dict good, or could we +# just use a struct? Likely this will be sparse. Alternatively we could just +# use individual attributes but those aren't easy to add on an ad-hoc basis in +# the middle of a pass. +const CompileHints = Base.ImmutableDict{Symbol,Any} + +function setmeta(ex::SyntaxTree; kws...) + @assert length(kws) == 1 # todo relax later ? + key = first(keys(kws)) + value = first(values(kws)) + meta = begin + m = get(ex, :meta, nothing) + isnothing(m) ? CompileHints(key, value) : CompileHints(m, key, value) + end + setattr(ex; meta=meta) +end + +function getmeta(ex::SyntaxTree, name::Symbol, default) + meta = get(ex, :meta, nothing) + isnothing(meta) ? default : get(meta, name, default) +end + #------------------------------------------------------------------------------- # Predicates and accessors working on expression trees diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 8a89eac140a1e..39a569b749b8a 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -1237,7 +1237,6 @@ function match_function_arg(full_ex) type = nothing default = nothing is_slurp = false - is_nospecialize = false ex = full_ex while true k = kind(ex) @@ -1258,10 +1257,6 @@ function match_function_arg(full_ex) @chk numchildren(ex) == 1 is_slurp = true ex = ex[1] - elseif k == K"meta" - @chk ex[1].name_val == "nospecialize" - is_nospecialize = true - ex = ex[2] elseif k == K"=" if !isnothing(default) throw(full_ex, "multiple defaults provided with `=` in function argument") @@ -1275,8 +1270,7 @@ function match_function_arg(full_ex) return (name=name, type=type, default=default, - is_slurp=is_slurp, - is_nospecialize=is_nospecialize) + is_slurp=is_slurp) end # Expand `where` clause(s) of a function into (typevar_names, typevar_stmts) where @@ -1467,9 +1461,13 @@ function expand_function_def(ctx, ex, docs, rewrite_call=identity, rewrite_body= aname = !isnothing(info.name) ? info.name : @ast ctx arg "_"::K"Placeholder" if kind(aname) == K"tuple" # Argument destructuring - n = new_mutable_var(ctx, aname, "destructured_arg_$i"; kind=:argument) - push!(body_stmts, @ast ctx aname [K"local"(meta=CompileHints(:is_destructured_arg, true)) - [K"=" aname n]]) + is_nospecialize = getmeta(arg, :nospecialize, false) + n = new_mutable_var(ctx, aname, "destructured_arg_$i"; + kind=:argument, is_nospecialize=is_nospecialize) + push!(body_stmts, @ast ctx aname [ + K"local"(meta=CompileHints(:is_destructured_arg, true)) + [K"=" aname n] + ]) aname = n end push!(arg_names, aname) diff --git a/JuliaLowering/src/eval.jl b/JuliaLowering/src/eval.jl index 46d3202a1b19b..14f544ef6a3f9 100644 --- a/JuliaLowering/src/eval.jl +++ b/JuliaLowering/src/eval.jl @@ -51,44 +51,45 @@ function _compress_debuginfo(info) Core.DebugInfo(Symbol(filename), nothing, edges, codelocs) end -function ir_debug_info(ex) - code = children(ex) - +function ir_debug_info_state(ex) e1 = first(flattened_provenance(ex)) topfile = filename(e1) + [(topfile, [], Vector{Int32}())] +end - current_codelocs_stack = [(topfile, [], Vector{Int32}())] - for i in 1:length(code) - locstk = [(filename(e), source_location(e)[1]) for e in flattened_provenance(code[i])] - for j in 1:max(length(locstk), length(current_codelocs_stack)) - if j > length(locstk) || (length(current_codelocs_stack) >= j && - current_codelocs_stack[j][1] != locstk[j][1]) - while length(current_codelocs_stack) >= j - info = pop!(current_codelocs_stack) - push!(last(current_codelocs_stack)[2], info) - end - end - if j > length(locstk) - break - elseif j > length(current_codelocs_stack) - push!(current_codelocs_stack, (locstk[j][1], [], Vector{Int32}())) +function add_ir_debug_info!(current_codelocs_stack, stmt) + locstk = [(filename(e), source_location(e)[1]) for e in flattened_provenance(stmt)] + for j in 1:max(length(locstk), length(current_codelocs_stack)) + if j > length(locstk) || (length(current_codelocs_stack) >= j && + current_codelocs_stack[j][1] != locstk[j][1]) + while length(current_codelocs_stack) >= j + info = pop!(current_codelocs_stack) + push!(last(current_codelocs_stack)[2], info) end end - for (j, (file,line)) in enumerate(locstk) - fn, edges, codelocs = current_codelocs_stack[j] - @assert fn == file - if j < length(locstk) - edge_index = length(edges) + 1 - edge_codeloc_index = fld1(length(current_codelocs_stack[j+1][3]) + 1, 3) - else - edge_index = 0 - edge_codeloc_index = 0 - end - push!(codelocs, line) - push!(codelocs, edge_index) - push!(codelocs, edge_codeloc_index) + if j > length(locstk) + break + elseif j > length(current_codelocs_stack) + push!(current_codelocs_stack, (locstk[j][1], [], Vector{Int32}())) + end + end + for (j, (file,line)) in enumerate(locstk) + fn, edges, codelocs = current_codelocs_stack[j] + @assert fn == file + if j < length(locstk) + edge_index = length(edges) + 1 + edge_codeloc_index = fld1(length(current_codelocs_stack[j+1][3]) + 1, 3) + else + edge_index = 0 + edge_codeloc_index = 0 end + push!(codelocs, line) + push!(codelocs, edge_index) + push!(codelocs, edge_codeloc_index) end +end + +function finish_ir_debug_info!(current_codelocs_stack) while length(current_codelocs_stack) > 1 info = pop!(current_codelocs_stack) push!(last(current_codelocs_stack)[2], info) @@ -101,15 +102,9 @@ end # Julia runtime function to_code_info(ex, mod, funcname, slots) input_code = children(ex) - code = Any[to_lowered_expr(mod, ex) for ex in input_code] + stmts = Any[] - debuginfo = ir_debug_info(ex) - - # TODO: Set ssaflags based on call site annotations: - # - @inbounds annotations - # - call site @inline / @noinline - # - call site @assume_effects - ssaflags = zeros(UInt32, length(code)) + current_codelocs_stack = ir_debug_info_state(ex) nargs = sum((s.kind==:argument for s in slots), init=0) slotnames = Vector{Symbol}(undef, length(slots)) @@ -122,10 +117,29 @@ function to_code_info(ex, mod, funcname, slots) if ni > 0 name = "$name@$ni" end - slotnames[i] = Symbol(name) + sname = Symbol(name) + slotnames[i] = sname slotflags[i] = 0x00 # FIXME!! + if slot.is_nospecialize + add_ir_debug_info!(current_codelocs_stack, ex) + push!(stmts, Expr(:meta, :nospecialize, Core.SlotNumber(i))) + end end + prefix_len = length(stmts) + for stmt in children(ex) + push!(stmts, to_lowered_expr(mod, stmt, prefix_len)) + add_ir_debug_info!(current_codelocs_stack, stmt) + end + + debuginfo = finish_ir_debug_info!(current_codelocs_stack) + + # TODO: Set ssaflags based on call site annotations: + # - @inbounds annotations + # - call site @inline / @noinline + # - call site @assume_effects + ssaflags = zeros(UInt32, length(stmts)) + # TODO: Set true for @propagate_inbounds propagate_inbounds = false # TODO: Set true if there's a foreigncall @@ -141,7 +155,7 @@ function to_code_info(ex, mod, funcname, slots) # The following CodeInfo fields always get their default values for # uninferred code. - ssavaluetypes = length(code) # Why does the runtime code do this? + ssavaluetypes = length(stmts) # Why does the runtime code do this? slottypes = nothing parent = nothing method_for_inference_limit_heuristics = nothing @@ -152,7 +166,7 @@ function to_code_info(ex, mod, funcname, slots) inlining_cost = 0xffff _CodeInfo( - code, + stmts, debuginfo, ssavaluetypes, ssaflags, @@ -176,7 +190,7 @@ function to_code_info(ex, mod, funcname, slots) ) end -function to_lowered_expr(mod, ex) +function to_lowered_expr(mod, ex, prefix_len=0) k = kind(ex) if is_literal(k) ex.value @@ -203,9 +217,9 @@ function to_lowered_expr(mod, ex) elseif k == K"static_parameter" Expr(:static_parameter, ex.var_id) elseif k == K"SSAValue" - Core.SSAValue(ex.var_id) + Core.SSAValue(ex.var_id + prefix_len) elseif k == K"return" - Core.ReturnNode(to_lowered_expr(mod, ex[1])) + Core.ReturnNode(to_lowered_expr(mod, ex[1], prefix_len)) elseif is_quoted(k) if k == K"inert" ex[1] @@ -227,14 +241,14 @@ function to_lowered_expr(mod, ex) elseif k == K"goto" Core.GotoNode(ex[1].id) elseif k == K"gotoifnot" - Core.GotoIfNot(to_lowered_expr(mod, ex[1]), ex[2].id) + Core.GotoIfNot(to_lowered_expr(mod, ex[1], prefix_len), ex[2].id) elseif k == K"enter" catch_idx = ex[1].id numchildren(ex) == 1 ? Core.EnterNode(catch_idx) : - Core.EnterNode(catch_idx, to_lowered_expr(ex[2])) + Core.EnterNode(catch_idx, to_lowered_expr(mod, ex[2], prefix_len)) elseif k == K"method" - cs = map(e->to_lowered_expr(mod, e), children(ex)) + cs = map(e->to_lowered_expr(mod, e, prefix_len), children(ex)) # Ad-hoc unwrapping to satisfy `Expr(:method)` expectations c1 = cs[1] isa QuoteNode ? cs[1].value : cs[1] Expr(:method, c1, cs[2:end]...) @@ -258,7 +272,7 @@ function to_lowered_expr(mod, ex) if isnothing(head) TODO(ex, "Unhandled form for kind $k") end - Expr(head, map(e->to_lowered_expr(mod, e), children(ex))...) + Expr(head, map(e->to_lowered_expr(mod, e, prefix_len), children(ex))...) end end diff --git a/JuliaLowering/src/linear_ir.jl b/JuliaLowering/src/linear_ir.jl index a7cb64fc533e0..6ef2ab282a22a 100644 --- a/JuliaLowering/src/linear_ir.jl +++ b/JuliaLowering/src/linear_ir.jl @@ -914,6 +914,7 @@ end struct Slot name::String kind::Symbol + is_nospecialize::Bool # <- todo: flags here etc end @@ -929,13 +930,13 @@ function compile_lambda(outer_ctx, ex) for arg in children(lambda_args) if kind(arg) == K"Placeholder" # Unused functions arguments like: `_` or `::T` - push!(slots, Slot(arg.name_val, :argument)) + push!(slots, Slot(arg.name_val, :argument, false)) else @assert kind(arg) == K"BindingId" id = arg.var_id info = lookup_binding(ctx.bindings, id) @assert info.kind == :local || info.kind == :argument - push!(slots, Slot(info.name, :argument)) + push!(slots, Slot(info.name, :argument, info.is_nospecialize)) slot_rewrites[id] = length(slots) end end @@ -943,7 +944,7 @@ function compile_lambda(outer_ctx, ex) for id in sort(collect(ex.lambda_locals)) info = lookup_binding(ctx.bindings, id) @assert info.kind == :local - push!(slots, Slot(info.name, :local)) + push!(slots, Slot(info.name, :local, false)) slot_rewrites[id] = length(slots) end for (i,arg) in enumerate(children(static_parameters)) diff --git a/JuliaLowering/src/macro_expansion.jl b/JuliaLowering/src/macro_expansion.jl index f5e907b5003d2..249c57bda14c7 100644 --- a/JuliaLowering/src/macro_expansion.jl +++ b/JuliaLowering/src/macro_expansion.jl @@ -14,13 +14,6 @@ struct ScopeLayer is_macro_expansion::Bool # FIXME end -# Type for `meta` attribute, to replace `Expr(:meta)`. -# It's unclear how much flexibility we need here - is a dict good, or could we -# just use a struct? Likely this will be sparse. Alternatively we could just -# use individual attributes but those aren't easy to add on an ad-hoc basis in -# the middle of a pass. -const CompileHints = Base.ImmutableDict{Symbol,Any} - struct MacroExpansionContext{GraphType} <: AbstractLoweringContext graph::GraphType bindings::Bindings diff --git a/JuliaLowering/src/runtime.jl b/JuliaLowering/src/runtime.jl index 1644ee4353724..0d819034f9c43 100644 --- a/JuliaLowering/src/runtime.jl +++ b/JuliaLowering/src/runtime.jl @@ -259,6 +259,24 @@ end # JuliaLowering.include() or something, then we'll be in the fun little # world of bootstrapping but it shouldn't be too painful :) +function _apply_nospecialize(ctx, ex) + k = kind(ex) + if k == K"Identifier" || k == K"Placeholder" || k == K"tuple" + setmeta(ex; nospecialize=true) + elseif k == K"..." || k == K"::" || k == K"=" + if k == K"::" && numchildren(ex) == 1 + ex = @ast ctx ex [K"::" "_"::K"Placeholder" ex[1]] + end + mapchildren(c->_apply_nospecialize(ctx, c), ctx, ex, 1:1) + else + throw(LoweringError(ex, "Invalid function argument")) + end +end + +function Base.var"@nospecialize"(__context__::MacroContext, ex) + _apply_nospecialize(__context__, ex) +end + function Base.var"@atomic"(__context__::MacroContext, ex) @chk kind(ex) == K"Identifier" || kind(ex) == K"::" (ex, "Expected identifier or declaration") @ast __context__ __context__.macrocall [K"atomic" ex] diff --git a/JuliaLowering/src/scope_analysis.jl b/JuliaLowering/src/scope_analysis.jl index cfbd6a900cde3..7b4b4761a09e5 100644 --- a/JuliaLowering/src/scope_analysis.jl +++ b/JuliaLowering/src/scope_analysis.jl @@ -59,7 +59,7 @@ function _find_scope_vars!(assignments, locals, destructured_args, globals, used k in KSet"scope_block lambda module toplevel" return elseif k == K"local" || k == K"local_def" - if (meta = get(ex, :meta, nothing); !isnothing(meta) && get(meta, :is_destructured_arg, false)) + if getmeta(ex, :is_destructured_arg, false) push!(destructured_args, ex[1]) else _insert_if_not_present!(locals, NameKey(ex[1]), ex) @@ -189,13 +189,11 @@ function var_kind(ctx, varkey::NameKey, exclude_toplevel_globals=false) isnothing(id) ? nothing : lookup_binding(ctx, id).kind end -function init_binding(ctx, varkey::NameKey, kind::Symbol, is_ambiguous_local=false) +function init_binding(ctx, varkey::NameKey, kind::Symbol; kws...) id = kind === :global ? get(ctx.global_vars, varkey, nothing) : nothing if isnothing(id) mod = kind === :global ? ctx.scope_layers[varkey.layer].mod : nothing - id = new_binding(ctx.bindings, - BindingInfo(varkey.name, kind; - mod=mod, is_ambiguous_local=is_ambiguous_local)) + id = new_binding(ctx.bindings, BindingInfo(varkey.name, kind; mod=mod, kws...)) end if kind === :global ctx.global_vars[varkey] = id @@ -217,7 +215,8 @@ function add_lambda_args(ctx, var_ids, args, args_kind) "static parameter name not distinct from function argument" throw(LoweringError(arg, msg)) end - var_ids[varkey] = init_binding(ctx, varkey, args_kind) + var_ids[varkey] = init_binding(ctx, varkey, args_kind; + is_nospecialize=getmeta(arg, :nospecialize, false)) elseif ka != K"BindingId" && ka != K"Placeholder" throw(LoweringError(arg, "Unexpected lambda arg kind")) end @@ -334,7 +333,8 @@ function analyze_scope(ctx, ex, scope_type, is_toplevel_global_scope=false, end end end - var_ids[varkey] = init_binding(ctx, varkey, :local, is_ambiguous_local) + var_ids[varkey] = init_binding(ctx, varkey, :local; + is_ambiguous_local=is_ambiguous_local) end end diff --git a/JuliaLowering/src/syntax_graph.jl b/JuliaLowering/src/syntax_graph.jl index ba8c6e9aa54d1..8847e434d547b 100644 --- a/JuliaLowering/src/syntax_graph.jl +++ b/JuliaLowering/src/syntax_graph.jl @@ -241,6 +241,18 @@ function attrnames(ex::SyntaxTree) [name for (name, value) in pairs(attrs) if haskey(value, ex._id)] end +function setattr(ex::SyntaxTree; extra_attrs...) + graph = syntax_graph(ex) + id = newnode!(graph) + if !is_leaf(ex) + setchildren!(graph, id, _node_ids(graph, children(ex)...)) + end + ex2 = SyntaxTree(graph, id) + copy_attrs!(ex2, ex, true) + setattr!(ex2; extra_attrs...) + ex2 +end + function setattr!(ex::SyntaxTree; attrs...) setattr!(ex._graph, ex._id; attrs...) end diff --git a/JuliaLowering/src/utils.jl b/JuliaLowering/src/utils.jl index 7a3c742e115ff..a7cdb666b88e4 100644 --- a/JuliaLowering/src/utils.jl +++ b/JuliaLowering/src/utils.jl @@ -77,7 +77,7 @@ function print_ir(io::IO, ex, indent="") if kind(e) == K"method" && numchildren(e) == 3 println(io, indent, lno, " --- method ", string(e[1]), " ", string(e[2])) @assert kind(e[3]) == K"lambda" || kind(e[3]) == K"code_info" - print_ir(io, e[3], " ") + print_ir(io, e[3], indent*" ") else code = string(e) println(io, indent, lno, " ", code) diff --git a/JuliaLowering/test/demo.jl b/JuliaLowering/test/demo.jl index 67a3a9b923f66..291aede15d176 100644 --- a/JuliaLowering/test/demo.jl +++ b/JuliaLowering/test/demo.jl @@ -595,6 +595,11 @@ function f(x=1, ys...=(1,2)...) end """ +src = """ +function f((x,), (x,)) +end +""" + # TODO: fix this - it's interpreted in a bizarre way as a kw call. # src = """ # function f(x=y=1) diff --git a/JuliaLowering/test/functions.jl b/JuliaLowering/test/functions.jl index 10863a0e33660..0119935797e32 100644 --- a/JuliaLowering/test/functions.jl +++ b/JuliaLowering/test/functions.jl @@ -200,4 +200,17 @@ begin end """) == (1,2,3,4) +@test JuliaLowering.include_string(test_mod, """ +begin + function f_nospecialize(u, v, @nospecialize(x), y, @nospecialize(z)) + (u, v, x, y, z) + end + + f_nospecialize(1,2,3,4,5) +end +""") == (1,2,3,4,5) +# We dig into the internal of `Method` here to check which slots have been +# flagged as nospecialize. +@test only(methods(test_mod.f_nospecialize)).nospecialize == 0b10100 + end diff --git a/JuliaLowering/test/functions_ir.jl b/JuliaLowering/test/functions_ir.jl index 79a67c166e57e..0403de848e119 100644 --- a/JuliaLowering/test/functions_ir.jl +++ b/JuliaLowering/test/functions_ir.jl @@ -669,6 +669,20 @@ end 5 (return core.nothing) 7 (return %₁) +######################################## +# Functions with @nospecialize argument metadata +function f(@nospecialize(x)) +end +#--------------------- +1 (method :f) +2 (call core.Typeof %₁) +3 (call core.svec %₂ core.Any) +4 (call core.svec) +5 (call core.svec %₃ %₄ :($(QuoteNode(:(#= line 1 =#))))) +6 --- method core.nothing %₅ + 1 (return core.nothing) +7 (return %₁) + ######################################## # Binding docs to functions """ diff --git a/JuliaLowering/test/utils.jl b/JuliaLowering/test/utils.jl index 157b1627ca010..7586d41ef1b6e 100644 --- a/JuliaLowering/test/utils.jl +++ b/JuliaLowering/test/utils.jl @@ -141,12 +141,12 @@ function read_ir_test_cases(filename) [match_ir_test_case(s) for s in split(cases_str, r"####*") if strip(s) != ""]) end -function format_ir_for_test(mod, input, expect_error=false) +function format_ir_for_test(mod, description, input, expect_error=false) ex = parsestmt(SyntaxTree, input) try x = JuliaLowering.lower(mod, ex) if expect_error - error("Expected a lowering error in test case") + error("Expected a lowering error in test case \"$description\"") end ir = strip(sprint(JuliaLowering.print_ir, x)) return replace(ir, string(mod)=>"TestMod") @@ -164,7 +164,7 @@ function test_ir_cases(filename::AbstractString) test_mod = Module(:TestMod) Base.include_string(test_mod, preamble) for (expect_error, description, input, ref) in cases - output = format_ir_for_test(test_mod, input, expect_error) + output = format_ir_for_test(test_mod, description, input, expect_error) @testset "$description" begin if output != ref # Do additional error dumping, as @test will not format errors in a nice way @@ -192,7 +192,7 @@ function refresh_ir_test_cases(filename, pattern=nothing) end for (expect_error, description, input, ref) in cases if isnothing(pattern) || occursin(pattern, description) - ir = format_ir_for_test(test_mod, input, expect_error) + ir = format_ir_for_test(test_mod, description, input, expect_error) if ir != ref @info "Refreshing test case $(repr(description)) in $filename" end From 5e7de9c079172fa60944d509cf8efd69a80012ac Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sun, 1 Dec 2024 16:34:56 +1000 Subject: [PATCH 0899/1109] Big rearrangement and improvements to README * Logically arrange algorithm description into lowering passes. Move existing descriptions there as necessary and write at least a little about each pass where it doesn't exist already. * Describe high level outline of closure conversion pass. * Remove some older descriptions of existing lowering mechanisms which are not really needed anymore. --- JuliaLowering/README.md | 302 ++++++++++++++++++++-------------------- 1 file changed, 154 insertions(+), 148 deletions(-) diff --git a/JuliaLowering/README.md b/JuliaLowering/README.md index 3b8697f8fa590..14eca8bacbfc3 100644 --- a/JuliaLowering/README.md +++ b/JuliaLowering/README.md @@ -26,30 +26,14 @@ This work is intended to ## Trying it out -Note this is a very early work in progress; most things probably don't work! +Note this is a work in progress; many types of syntax are not yet handled. -1. You need a 1.12-DEV build of Julia: At least 1.12.0-DEV.512; commit `263928f9ad4` currently works. Note that JuliaLowering relies on Julia internals and may be broken on the latest Julia dev version from time to time. (In fact it is currently broken on the latest `1.12-DEV`.) +1. You need a 1.12-DEV build of Julia: At least 1.12.0-DEV.512. Commit `263928f9ad4` is currentl known to work. Note that JuliaLowering relies on Julia internals and may be broken on the latest Julia dev version from time to time. (In fact it is currently broken on the latest `1.12-DEV`.) 2. Check out the main branch of [JuliaSyntax](https://github.com/JuliaLang/JuliaSyntax.jl) 3. Get the latest version of [JuliaSyntaxFormatter](https://github.com/c42f/JuliaSyntaxFormatter.jl) 4. Run the demo `include("test/demo.jl")` -# Design Notes - -Lowering has five symbolic simplification passes: - -1. Macro expansion - expanding user-defined syntactic constructs by running the - user's macros. This pass also includes a small amount of other symbolic - simplification. -2. Syntax desugaring - simplifying Julia's rich surface syntax down to a small - number of syntactic forms. -3. Scope analysis - analyzing identifier names used in the code to discover - local variables, closure captures, and associate global variables to the - appropriate module. Transform all names (kind `K"Identifier"`) into binding - IDs (kind `K"BindingId"`) which can be looked up in a table of bindings. -4. Closure conversion - convert closures to types and deal with captured - variables efficiently where possible. -5. Flattening to linear IR - convert code in hierarchical tree form to a - flat array of statements; convert control flow into gotos. +# Design notes ## Syntax trees @@ -189,9 +173,7 @@ M.@outer() # @ foo.jl:1 ``` -## Hygiene - -### Problems with Hygiene in Julia's exiting macro system +## Problems with Hygiene in Julia's exiting macro system To write correct hygienic macros in Julia (as of 2024), macro authors must use `esc()` on any any syntax passed to the macro so that passed identifiers escape @@ -215,6 +197,37 @@ symbols. For example, a macro call `@foo x` gets passed the symbol `:x` which is just a name without any information attached to indicate that it came from the scope where `@foo` was called. +### Hygiene References + +* [Toward Fearless Macros](https://lambdaland.org/posts/2023-10-17_fearless_macros) - + a blog post by Ashton Wiersdorf +* [Towards the Essence of Hygiene](https://michaeldadams.org/papers/hygiene/hygiene-2015-popl-authors-copy.pdf) - a paper by Michael Adams +* [Bindings as sets of scopes](https://www-old.cs.utah.edu/plt/scope-sets/) - a description of Racket's scope set mechanism by Matthew Flatt + +# Overview of lowering passes + +JuliaLowering uses six symbolic transformation passes: + +1. Macro expansion - expanding user-defined syntactic constructs by running the + user's macros. This pass also includes a small amount of other symbolic + simplification. +2. Syntax desugaring - simplifying Julia's rich surface syntax down to a small + number of syntactic forms. +3. Scope analysis - analyzing identifier names used in the code to discover + local variables, closure captures, and associate global variables to the + appropriate module. Transform all names (kind `K"Identifier"`) into binding + IDs (kind `K"BindingId"`) which can be looked up in a table of bindings. +4. Closure conversion - convert closures to types and deal with captured + variables efficiently where possible. +5. Flattening to untyped IR - convert code in hierarchical tree form to a + flat array of statements; convert control flow into gotos. +6. Convert untyped IR to `CodeInfo` form for integration with the Julia runtime. + +## Pass 1: Macro expansion + +This pass expands macros and quoted syntax, and does some very light conversion +of a few syntax `Kind`s in preparation for syntax desugaring. + ### Hygiene in JuliaLowering In JuliaLowering we make hygiene automatic and remove `esc()` by combining names @@ -246,14 +259,104 @@ discussed in Adams' paper: TODO: Write more here... -#### References +## Pass 2: Syntax desugaring -* [Toward Fearless Macros](https://lambdaland.org/posts/2023-10-17_fearless_macros) - - a blog post by Ashton Wiersdorf -* [Towards the Essence of Hygiene](https://michaeldadams.org/papers/hygiene/hygiene-2015-popl-authors-copy.pdf) - a paper by Michael Adams -* [Bindings as sets of scopes](https://www-old.cs.utah.edu/plt/scope-sets/) - a description of Racket's scope set mechanism by Matthew Flatt +This pass recursively converts many special surface syntax forms to a smaller +set of syntax `Kind`s, following the AST's hierarchical tree structure. Some +such as `K"scope_block"` are internal to lowering and removed during later +passes. See `kinds.jl` for a list of these internal forms. + +This pass is implemented in `desugaring.jl`. It's quite large because Julia has +many special syntax features. + +## Pass 3: Scope analysis / binding resolution + +This pass replaces variables with bindings of kind `K"BindingId"`, +disambiguating variables when the same name is used in different scopes. It +also fills in the list of non-global bindings within each lambda and metadata +about such bindings as will be used later during closure conversion. + +Scopes are documented in the Juila documentation on +[Scope of Variables](https://docs.julialang.org/en/v1/manual/variables-and-scoping/) + +During scope resolution, we maintain a stack of `ScopeInfo` data structures. + +When a new `lambda` or `scope_block` is discovered, we create a new `ScopeInfo` by +1. Find all identifiers bound or used within a scope. New *bindings* may be + introduced by one of the `local`, `global` keywords, implicitly by + assignment, as function arguments to a `lambda`, or as type arguments in a + method ("static parameters"). Identifiers are *used* when they are + referenced. +2. Infer which bindings are newly introduced local or global variables (and + thus require a distinct identity from names already in the stack) +3. Assign a `BindingId` (unique integer) to each new binding + +We then push this `ScopeInfo` onto the stack and traverse the expressions +within the scope translating each `K"Identifier"` into the associated +`K"BindingId"`. While we're doing this we also resolve some special forms like +`islocal` by making use of the scope stack. + +The detailed rules for whether assignment introduces a new variable depend on +the `scope_block`'s `scope_type` attribute when we are processing top-level +code. +* `scope_type == :hard` (as for bindings inside a `let` block) means an + assignment always introduces a new binding +* `scope_type == :neutral` - inherit soft or hard scope from the parent scope. +* `scope_type == :soft` - assignments are to globals if the variable + exists in global module scope. Soft scope doesn't have surface syntax and is + introduced for top-level code by REPL-like environments. + +## Pass 4: Closure conversion / lower bindings + +The main goal of this pass is closure conversion, but it's also used for +lowering typed bindings and global assignments. Roughly, this is passes 3 and 4 +in the original `julia-syntax.scm`. In JuliaLowering it also comes in two steps: -## Lowering of exception handlers +The first step (part of `scope_resolution.jl`) is to compute metadata related +to bindings, both per-binding and per-binding-per-closure-scope. + +Properties which are computed per-binding which can help with symbolic +optimizations include: +* Type is declared (`x::T` syntax in a statement): type conversions must be + inserted at every assignment of `x`. +* Never undefined: value is always assigned to the binding before being read + hence this binding doesn't require the use of `Core.NewvarNode`. +* Single assignment: (TODO how is this defined, what is it for and does it go + here or below?) + +Properties of non-globals which are computed per-binding-per-closure include: +* Read: the value of the binding is used. +* Write: the binding is asssigned to. Such bindings often need to become + `Core.Box` so their value can be shared between the defining scope and the + closure body. +* Captured: Bindings defined outside the closure which are either Read or Write + within the closure are "captured" and need to be one of the closure's fields. +* Called: the binding is called as a function, ie, `x()`. (TODO - what is this + for?) + +The second step uses this metadata to +* Convert closures into `struct` types +* Lower bindings captured by closures into references to boxes as necessary +* Deal with typed bindings (`K"decl"`) and their assignments +* Lower const and non-const global assignments +* TODO: probably more here. + +## Pass 5: Convert to untyped IR + +This pass is implemented in `linear_ir.jl`. + +### Untyped IR (JuliaLowering form) + +JuliaLowering's untyped IR is very close to the runtime's `CodeInfo` form (see +below), but is more concretely typed as `JuliaLowering.SyntaxTree`. + +Metadata is generally represented differently: +* The statements retain full code provenance information as `SyntaxTree` + objects. See `kinds.jl` for a list of which `Kind`s occur in the output IR + but not in surface syntax. +* The list of slots is `Vector{Slot}`, including `@nospecialize` metadata + +### Lowering of exception handlers Exception handling involves a careful interplay between lowering and the Julia runtime. The forms `enter`, `leave` and `pop_exception` dynamically modify the @@ -300,7 +403,7 @@ exception-related state restoration which need to happen. Note also that the "handler state restoration" actually includes several pieces of runtime state including GC flags - see `jl_eh_restore_state` in the runtime for that. -### Lowering finally code paths +#### Lowering finally code paths When lowering `finally` blocks we want to emit the user's finally code once but multiple code paths may traverse the finally block. For example, consider the @@ -344,130 +447,33 @@ multiple `return`s create multiple tags rather than assigning to a single variable. Collapsing these into a single case might be worth considering? But also might be worse for type inference in some cases?) -## Untyped IR +## Pass 6: Convert IR to `CodeInfo` representation -Julia's untyped IR as held in the `CodeInfo` data structure is an array of -statements of type `Expr` with a small number of allowed forms. The IR obeys -certain invariants which are checked by the downstream code in -base/compiler/validation.jl. +This pass convert's JuliaLowering's internal representation of untyped IR into +a form the Julia runtime understands. This is a necessary decoupling which +separates the development of JuliaLowering.jl from the evolution of the Julia +runtime itself. -## Scope resolution +### Untyped IR (`CodeInfo` form) -Scopes are documented in the Juila documentation on -[Scope of Variables](https://docs.julialang.org/en/v1/manual/variables-and-scoping/) - -The scope resolution pass disambiguates variables which have the same name in -different scopes and fills in the list of local variables within each lambda. - -During scope resolution, we maintain a stack of `ScopeInfo` data structures. +The final lowered IR is expressed as `CodeInfo` objects which are a sequence of +`code` statments containing +* Literals +* Restricted forms of `Expr` (with semantics different from surface syntax, + even for the same `head`! for example the arguments to `Expr(:call)` in IR + must be "simple" and aren't evaluated in order) +* `Core.SlotNumber` +* Other special forms from `Core` like `Core.ReturnNode`, `Core.EnterNode`, etc. +* `Core.SSAValue`, indexing any value generated from a statement in the `code` + array. +* Etc (todo) -When a new `lambda` or `scope_block` is discovered, we create a new `ScopeInfo` by -1. Find all identifiers bound or used within a scope. New *bindings* may be - introduced by one of the `local`, `global` keywords, implicitly by - assignment, as function arguments to a `lambda`, or as type arguments in a - method ("static parameters"). Identifiers are *used* when they are - referenced. -2. Infer which bindings are newly introduced local or global variables (and - thus require a distinct identity from names already in the stack) -3. Assign a `BindingId` (unique integer) to each new binding - -We then push this `ScopeInfo` onto the stack and traverse the expressions -within the scope translating each `K"Identifier"` into the associated -`K"BindingId"`. While we're doing this we also resolve some special forms like -`islocal` by making use of the scope stack. - -The detailed rules for whether assignment introduces a new variable depend on -the `scope_block`'s `scope_type` attribute when we are processing top-level -code. -* `scope_type == :hard` (as for bindings inside a `let` block) means an - assignment always introduces a new binding -* `scope_type == :neutral` - inherit soft or hard scope from the parent scope. -* `scope_type == :soft` - assignments are to globals if the variable - exists in global module scope. Soft scope doesn't have surface syntax and is - introduced for top-level code by REPL-like environments. - -## Julia's existing lowering implementation - -### How does macro expansion work? - -`macroexpand(m::Module, x)` calls `jl_macroexpand` in ast.c: +The IR obeys certain invariants which are checked by the downstream code in +base/compiler/validation.jl. -``` -jl_value_t *jl_macroexpand(jl_value_t *expr, jl_module_t *inmodule) -{ - expr = jl_copy_ast(expr); - expr = jl_expand_macros(expr, inmodule, NULL, 0, jl_world_counter, 0); - expr = jl_call_scm_on_ast("jl-expand-macroscope", expr, inmodule); - return expr; -} -``` +See also https://docs.julialang.org/en/v1/devdocs/ast/#Lowered-form -First we copy the AST here. This is mostly a trivial deep copy of `Expr`s and -shallow copy of their non-`Expr` children, except for when they contain -embedded `CodeInfo/phi/phic` nodes which are also deep copied. - -Second we expand macros recursively by calling - -`jl_expand_macros(expr, inmodule, macroctx, onelevel, world, throw_load_error)` - -This relies on state indexed by `inmodule` and `world`, which gives it some -funny properties: -* `module` expressions can't be expanded: macro expansion depends on macro - lookup within the module, but we can't do that without `eval`. - -Expansion proceeds from the outermost to innermost macros. So macros see any -macro calls or quasiquote (`quote/$`) in their children as unexpanded forms. - -Things which are expanded: -* `quote` is expanded using flisp code in `julia-bq-macro` - - symbol / ssavalue -> `QuoteNode` (inert) - - atom -> itself - - at depth zero, `$` expands to its content - - Expressions `x` without `$` expand to `(copyast (inert x))` - - Other expressions containing a `$` expand to a call to `_expr` with all the - args mapped through `julia-bq-expand-`. Roughly! - - Special handling exists for multi-splatting arguments as in `quote quote $$(x...) end end` -* `macrocall` proceeds with - - Expand with `jl_invoke_julia_macro` - - Call `eval` on the macro name (!!) to get the macro function. Look up - the method. - - Set up arguments for the macro calling convention - - Wraps errors in macro invocation in `LoadError` - - Returns the expression, as well as the module at - which that method of that macro was defined and `LineNumberNode` where - the macro was invoked in the source. - - Deep copy the AST - - Recursively expand child macros in the context of the module where the - macrocall method was defined - - Wrap the result in `(hygienic-scope ,result ,newctx.m ,lineinfo)` (except - for special case optimizations) -* `hygenic-scope` expands `args[1]` with `jl_expand_macros`, with the module - of expansion set to `args[2]`. Ie, it's the `Expr` representation of the - module and expression arguments to `macroexpand`. The way this returns - either `hygenic-scope` or unwraps is a bit confusing. -* "`do` macrocalls" have their own special handling because the macrocall is - the child of the `do`. This seems like a mess!! - - -### Intermediate forms used in lowering - -* `local-def` - flisp code explains this as - - "a local that we know has an assignment that dominates all usages" - - "local declaration of a defined variable" - -There's also this comment in https://github.com/JuliaLang/julia/issues/22314: - -> mark the [...] variable as local-def, which would prevent it from getting Core.Boxed during the closure conversion it'll be detected as known-SSA - -But maybe that's confusing. It seems like `local-def` is a local which lowering -asserts is "always defined" / "definitely initialized before use". But it's not -necessarily single-assign, so not SSA. - -### Lowered IR - -See https://docs.julialang.org/en/v1/devdocs/ast/#Lowered-form - -#### CodeInfo +CodeInfo layout (as of early 1.12-DEV): ```julia mutable struct CodeInfo @@ -502,7 +508,7 @@ mutable struct CodeInfo end ``` -### Notes on toplevel-only forms and eval-related functions +## Notes on toplevel-only forms and eval-related functions In the current Julia runtime, From 6bf145fe7f0e82781e77ca8270d8d7d44ece1bf0 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sun, 1 Dec 2024 16:37:53 +1000 Subject: [PATCH 0900/1109] Prepare for storing info on closure captures * Generalize attribute lambda_locals -> lambda_bindings * New structs LambdaBindings + CaptureInfo; rearrange existing code to use the bindings struct (capture info currently unused) --- JuliaLowering/src/closure_conversion.jl | 8 +++---- JuliaLowering/src/linear_ir.jl | 14 +++++------ JuliaLowering/src/scope_analysis.jl | 31 ++++++++++++++++++------- JuliaLowering/test/demo.jl | 5 ---- 4 files changed, 34 insertions(+), 24 deletions(-) diff --git a/JuliaLowering/src/closure_conversion.jl b/JuliaLowering/src/closure_conversion.jl index b486b0cb72e69..b33c5cd4e2ec5 100644 --- a/JuliaLowering/src/closure_conversion.jl +++ b/JuliaLowering/src/closure_conversion.jl @@ -2,11 +2,11 @@ struct ClosureConversionCtx{GraphType} <: AbstractLoweringContext graph::GraphType bindings::Bindings mod::Module - lambda_locals::Set{IdTag} + lambda_bindings::LambdaBindings end function add_lambda_local!(ctx::ClosureConversionCtx, id) - push!(ctx.lambda_locals, id) + push!(ctx.lambda_bindings.locals, id) end # Convert `ex` to `type` by calling `convert(type, ex)` when necessary. @@ -164,7 +164,7 @@ function _convert_closures(ctx::ClosureConversionCtx, ex) children(ex)... ]) elseif k == K"lambda" - ctx2 = ClosureConversionCtx(ctx.graph, ctx.bindings, ctx.mod, ex.lambda_locals) + ctx2 = ClosureConversionCtx(ctx.graph, ctx.bindings, ctx.mod, ex.lambda_bindings) mapchildren(e->_convert_closures(ctx2, e), ctx2, ex) else mapchildren(e->_convert_closures(ctx, e), ctx, ex) @@ -187,7 +187,7 @@ Invariants: """ function convert_closures(ctx::ScopeResolutionContext, ex) @assert kind(ex) == K"lambda" - ctx = ClosureConversionCtx(ctx.graph, ctx.bindings, ctx.mod, ex.lambda_locals) + ctx = ClosureConversionCtx(ctx.graph, ctx.bindings, ctx.mod, ex.lambda_bindings) ex1 = _convert_closures(ctx, ex) ctx, ex1 end diff --git a/JuliaLowering/src/linear_ir.jl b/JuliaLowering/src/linear_ir.jl index 6ef2ab282a22a..9abe2f3d12e23 100644 --- a/JuliaLowering/src/linear_ir.jl +++ b/JuliaLowering/src/linear_ir.jl @@ -84,7 +84,7 @@ struct LinearIRContext{GraphType} <: AbstractLoweringContext bindings::Bindings next_label_id::Ref{Int} is_toplevel_thunk::Bool - lambda_locals::Set{IdTag} + lambda_bindings::LambdaBindings return_type::Union{Nothing, SyntaxTree{GraphType}} break_targets::Dict{String, JumpTarget{GraphType}} handler_token_stack::SyntaxList{GraphType, Vector{NodeId}} @@ -95,12 +95,12 @@ struct LinearIRContext{GraphType} <: AbstractLoweringContext mod::Module end -function LinearIRContext(ctx, is_toplevel_thunk, lambda_locals, return_type) +function LinearIRContext(ctx, is_toplevel_thunk, lambda_bindings, return_type) graph = syntax_graph(ctx) rett = isnothing(return_type) ? nothing : reparent(graph, return_type) GraphType = typeof(graph) LinearIRContext(graph, SyntaxList(ctx), ctx.bindings, Ref(0), - is_toplevel_thunk, lambda_locals, rett, + is_toplevel_thunk, lambda_bindings, rett, Dict{String,JumpTarget{GraphType}}(), SyntaxList(ctx), SyntaxList(ctx), Vector{FinallyHandler{GraphType}}(), Dict{String,JumpTarget{GraphType}}(), Vector{JumpOrigin{GraphType}}(), ctx.mod) @@ -399,7 +399,7 @@ function compile_conditional(ctx, ex, false_label) end function add_lambda_local!(ctx::LinearIRContext, id) - push!(ctx.lambda_locals, id) + push!(ctx.lambda_bindings.locals, id) end # Lowering of exception handling must ensure that @@ -923,7 +923,7 @@ function compile_lambda(outer_ctx, ex) static_parameters = ex[2] ret_var = numchildren(ex) == 4 ? ex[4] : nothing # TODO: Add assignments for reassigned arguments to body using lambda_args - ctx = LinearIRContext(outer_ctx, ex.is_toplevel_thunk, ex.lambda_locals, ret_var) + ctx = LinearIRContext(outer_ctx, ex.is_toplevel_thunk, ex.lambda_bindings, ret_var) compile_body(ctx, ex[3]) slots = Vector{Slot}() slot_rewrites = Dict{IdTag,Int}() @@ -941,7 +941,7 @@ function compile_lambda(outer_ctx, ex) end end # Sorting the lambda locals is required to remove dependence on Dict iteration order. - for id in sort(collect(ex.lambda_locals)) + for id in sort(collect(ex.lambda_bindings.locals)) info = lookup_binding(ctx.bindings, id) @assert info.kind == :local push!(slots, Slot(info.name, :local, false)) @@ -980,7 +980,7 @@ function linearize_ir(ctx, ex) # required to call reparent() ... GraphType = typeof(graph) _ctx = LinearIRContext(graph, SyntaxList(graph), ctx.bindings, - Ref(0), false, Set{IdTag}(), nothing, + Ref(0), false, LambdaBindings(), nothing, Dict{String,JumpTarget{typeof(graph)}}(), SyntaxList(graph), SyntaxList(graph), Vector{FinallyHandler{GraphType}}(), diff --git a/JuliaLowering/src/scope_analysis.jl b/JuliaLowering/src/scope_analysis.jl index 7b4b4761a09e5..9f7e1acd29049 100644 --- a/JuliaLowering/src/scope_analysis.jl +++ b/JuliaLowering/src/scope_analysis.jl @@ -123,6 +123,21 @@ function NameKey(ex::SyntaxTree) NameKey(ex.name_val, get(ex, :scope_layer, _lowering_internal_layer)) end +struct CaptureInfo + is_read::Bool + is_assigned::Bool + is_called::Bool +end + +struct LambdaBindings + # Local bindings within the lambda + locals::Set{IdTag} + captures::Dict{IdTag,CaptureInfo} +end + +LambdaBindings() = LambdaBindings(Set{IdTag}(), Dict{IdTag,CaptureInfo}()) + + struct ScopeInfo # True if scope is the global top level scope is_toplevel_global_scope::Bool @@ -136,8 +151,8 @@ struct ScopeInfo # parent scope # TODO: Rename to `locals` or local_bindings? var_ids::Dict{NameKey,IdTag} - # Variables used by the enclosing lambda - lambda_locals::Set{IdTag} + # Bindings used by the enclosing lambda + lambda_bindings::LambdaBindings end struct ScopeResolutionContext{GraphType} <: AbstractLoweringContext @@ -157,7 +172,7 @@ struct ScopeResolutionContext{GraphType} <: AbstractLoweringContext end function ScopeResolutionContext(ctx) - graph = ensure_attributes(ctx.graph, lambda_locals=Set{IdTag}) + graph = ensure_attributes(ctx.graph, lambda_bindings=LambdaBindings) ScopeResolutionContext(graph, ctx.bindings, ctx.mod, @@ -345,17 +360,17 @@ function analyze_scope(ctx, ex, scope_type, is_toplevel_global_scope=false, end end - lambda_locals = is_outer_lambda_scope ? Set{IdTag}() : parentscope.lambda_locals + lambda_bindings = is_outer_lambda_scope ? LambdaBindings() : parentscope.lambda_bindings for id in values(var_ids) vk = var_kind(ctx, id) if vk === :local - push!(lambda_locals, id) + push!(lambda_bindings.locals, id) end end for id in used_bindings info = lookup_binding(ctx, id) if !info.is_ssa && info.kind == :local - push!(lambda_locals, id) + push!(lambda_bindings.locals, id) end end @@ -369,7 +384,7 @@ function analyze_scope(ctx, ex, scope_type, is_toplevel_global_scope=false, end return ScopeInfo(is_toplevel_global_scope, in_toplevel_thunk, is_soft_scope, - is_hard_scope, var_ids, lambda_locals) + is_hard_scope, var_ids, lambda_bindings) end # Do some things which are better done after converting to BindingId. @@ -433,7 +448,7 @@ function _resolve_scopes(ctx, ex::SyntaxTree) ret_var = numchildren(ex) == 4 ? _resolve_scopes(ctx, ex[4]) : nothing pop!(ctx.scope_stack) - @ast ctx ex [K"lambda"(lambda_locals=scope.lambda_locals, + @ast ctx ex [K"lambda"(lambda_bindings=scope.lambda_bindings, is_toplevel_thunk=is_toplevel_thunk) arg_bindings sparm_bindings diff --git a/JuliaLowering/test/demo.jl b/JuliaLowering/test/demo.jl index 291aede15d176..67a3a9b923f66 100644 --- a/JuliaLowering/test/demo.jl +++ b/JuliaLowering/test/demo.jl @@ -595,11 +595,6 @@ function f(x=1, ys...=(1,2)...) end """ -src = """ -function f((x,), (x,)) -end -""" - # TODO: fix this - it's interpreted in a bizarre way as a kw call. # src = """ # function f(x=y=1) From 41235665020ac71b34a3501ef5018a769fac5591 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Tue, 3 Dec 2024 10:44:25 +1000 Subject: [PATCH 0901/1109] Minor fix to README --- JuliaLowering/README.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/JuliaLowering/README.md b/JuliaLowering/README.md index 14eca8bacbfc3..8e974cb7fde4e 100644 --- a/JuliaLowering/README.md +++ b/JuliaLowering/README.md @@ -326,9 +326,7 @@ optimizations include: Properties of non-globals which are computed per-binding-per-closure include: * Read: the value of the binding is used. -* Write: the binding is asssigned to. Such bindings often need to become - `Core.Box` so their value can be shared between the defining scope and the - closure body. +* Write: the binding is asssigned to. * Captured: Bindings defined outside the closure which are either Read or Write within the closure are "captured" and need to be one of the closure's fields. * Called: the binding is called as a function, ie, `x()`. (TODO - what is this From feffa3f22296123a74f3a8a1e5dc6d18b4bcb413 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Tue, 3 Dec 2024 12:11:53 +1000 Subject: [PATCH 0902/1109] `@ SyntaxTree` macro for `SyntaxTree` literals in normal lowering This macro is quite hacky but also really cool - it allows you to use - what appear to be - normal `quote` blocks to extract `SyntaxTree` syntax literals rather than `Expr`. Including basic syntax interpolation (!?!). Honestly I'm kind of amazed this works as well as it seems to work. It even works in the REPL courtesy of some extremely sketchy digging into the REPL history internals. --- JuliaLowering/src/syntax_graph.jl | 83 ++++++++++++++++++++++++++++++ JuliaLowering/test/syntax_graph.jl | 14 +++++ 2 files changed, 97 insertions(+) diff --git a/JuliaLowering/src/syntax_graph.jl b/JuliaLowering/src/syntax_graph.jl index 8847e434d547b..26b4650934e09 100644 --- a/JuliaLowering/src/syntax_graph.jl +++ b/JuliaLowering/src/syntax_graph.jl @@ -546,6 +546,89 @@ end Base.Expr(ex::SyntaxTree) = JuliaSyntax.to_expr(ex) +#-------------------------------------------------- +function _find_SyntaxTree_macro(ex, line) + @assert !is_leaf(ex) + for c in children(ex) + rng = byte_range(c) + firstline = JuliaSyntax.source_line(sourcefile(c), first(rng)) + lastline = JuliaSyntax.source_line(sourcefile(c), last(rng)) + if line < firstline || lastline < line + continue + end + # We're in the line range. Either + if firstline == line && kind(c) == K"macrocall" && begin + name = c[1] + if kind(name) == K"." + name = name[2] + end + @assert kind(name) == K"MacroName" + name.name_val == "@SyntaxTree" + end + # We find the node we're looking for. NB: Currently assuming a max + # of one @SyntaxTree invocation per line. Though we could relax + # this with more heuristic matching of the Expr-AST... + @assert numchildren(c) == 2 + return c[2] + elseif !is_leaf(c) + # Recurse + ex1 = _find_SyntaxTree_macro(c, line) + if !isnothing(ex1) + return ex1 + end + end + end + return nothing # Will get here if multiple children are on the same line. +end + +""" +Macro to construct quoted SyntaxTree literals (instead of quoted Expr literals) +in normal Julia source code. + +Example: + +```julia +tree1 = @SyntaxTree :(some_unique_identifier) +tree2 = @SyntaxTree quote + x = 1 + \$tree1 = x +end +``` +""" +macro SyntaxTree(ex_old) + # The implementation here is hilarious and arguably very janky: we + # 1. Briefly check but throw away the Expr-AST + if !(Meta.isexpr(ex_old, :quote) || ex_old isa QuoteNode) + throw(ArgumentError("@SyntaxTree expects a `quote` block or `:`-quoted expression")) + end + # 2. Re-parse the current source file as SyntaxTree instead + fname = String(__source__.file) + if occursin(r"REPL\[\d+\]", fname) + # Assume we should look at last history entry in REPL + try + # Wow digging in like this is an awful hack but `@SyntaxTree` is + # already a hack so let's go for it I guess 😆 + text = Base.active_repl.mistate.interface.modes[1].hist.history[end] + if !occursin("@SyntaxTree", text) + error("Text not found in last REPL history line") + end + catch + error("Text not found in REPL history") + end + else + text = read(fname, String) + end + full_ex = parseall(SyntaxTree, text) + # 3. Using the current file and line number, dig into the re-parsed tree and + # discover the piece of AST which should be returned. + ex = _find_SyntaxTree_macro(full_ex, __source__.line) + # 4. Do the first step of JuliaLowering's syntax lowering to get + # synax interpolations to work + _, ex1 = expand_forms_1(__module__, ex) + @assert kind(ex1) == K"call" && ex1[1].value == interpolate_ast + esc(Expr(:call, interpolate_ast, ex1[2][1], map(Expr, ex1[3:end])...)) +end + #------------------------------------------------------------------------------- # Lightweight vector of nodes ids with associated pointer to graph stored separately. struct SyntaxList{GraphType, NodeIdVecType} <: AbstractVector{SyntaxTree} diff --git a/JuliaLowering/test/syntax_graph.jl b/JuliaLowering/test/syntax_graph.jl index 282be30cc6479..153dbc88265f2 100644 --- a/JuliaLowering/test/syntax_graph.jl +++ b/JuliaLowering/test/syntax_graph.jl @@ -2,4 +2,18 @@ # Expr conversion @test Expr(parsestmt(SyntaxTree, "begin a + b ; c end", filename="none")) == Meta.parse("begin a + b ; c end") + + tree1 = JuliaLowering.@SyntaxTree :(some_unique_identifier) + @test tree1 isa SyntaxTree + @test kind(tree1) == K"Identifier" + @test tree1.name_val == "some_unique_identifier" + + tree2 = JuliaLowering.@SyntaxTree quote + x + $tree1 + end + @test tree2 isa SyntaxTree + @test kind(tree2) == K"block" + @test kind(tree2[1]) == K"Identifier" && tree2[1].name_val == "x" + @test kind(tree2[2]) == K"Identifier" && tree2[2].name_val == "some_unique_identifier" end From 1ebe863839fe11f3dd93f32933a9e54744ec7a76 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Wed, 4 Dec 2024 16:56:02 +1000 Subject: [PATCH 0903/1109] Move demo includes into main demo file --- JuliaLowering/test/demo.jl | 123 +++++++++++++++++++++++++-- JuliaLowering/test/demo_include.jl | 101 ---------------------- JuliaLowering/test/demo_include_2.jl | 12 --- 3 files changed, 114 insertions(+), 122 deletions(-) delete mode 100644 JuliaLowering/test/demo_include.jl delete mode 100644 JuliaLowering/test/demo_include_2.jl diff --git a/JuliaLowering/test/demo.jl b/JuliaLowering/test/demo.jl index 67a3a9b923f66..c247d49d1e04e 100644 --- a/JuliaLowering/test/demo.jl +++ b/JuliaLowering/test/demo.jl @@ -40,7 +40,7 @@ end #------------------------------------------------------------------------------- # Module containing macros used in the demo. -baremodule M +eval(JuliaLowering.@SyntaxTree :(baremodule M using Base using JuliaLowering: JuliaLowering, @ast, @chk, adopt_scope, MacroExpansionError, makenode @@ -52,8 +52,118 @@ baremodule M JuliaSyntax.Kind(str) end - # JuliaLowering.include(M, "demo_include.jl") -end + # Introspection + macro __MODULE__() + __context__.scope_layer.mod + end + + macro __FILE__() + JuliaLowering.filename(__context__.macrocall) + end + + macro __LINE__() + JuliaLowering.source_location(__context__.macrocall)[1] + end + + # Macro with local variables + module A + another_global = "global in A" + + macro bar(ex) + quote + x = "`x` in @bar" + (x, another_global, $ex) + end + end + end + + someglobal = "global in module M" + + # Macro with local variables + macro foo(ex) + quote + x = "`x` from @foo" + (x, someglobal, A.@bar $ex) + #(x, someglobal, $ex, A.@bar($ex), A.@bar(x)) + end + end + + macro set_a_global(val) + quote + global a_global = $val + end + end + + macro set_global_in_parent(ex) + e1 = adopt_scope(:(sym_introduced_from_M), __context__) + quote + $e1 = $ex + end + end + + macro baz(ex) + quote + let $ex = 10 + $ex + end + end + end + + macro make_module() + :(module X + blah = 10 + end) + end + + macro return_a_value() + 42 + end + + macro nested_return_a_value() + :( + @return_a_value + ) + end + + macro inner() + :(2) + end + + macro outer() + :((1, @inner)) + end + + macro K_str(str) + JuliaSyntax.Kind(str[1].value) + end + + # Recursive macro call + macro recursive(N) + Nval = if kind(N) == K"Integer" || kind(N) == K"Value" + N.value + end + if !(Nval isa Integer) + throw(MacroExpansionError(N, "argument must be an integer")) + end + if Nval < 1 + return N + end + quote + x = $N + (@recursive($(Nval-1)), x) + end + end + + xx = "xx in M" + + macro test_inert_quote() + println(xx) + @inert quote + ($xx, xx) + end + end + +end)) #------------------------------------------------------------------------------- # Demos of the prototype @@ -488,10 +598,6 @@ src = """ (; a=1, a=2) """ -function f(args...; kws...) - @info "" args kws -end - src = """ begin kws = (c=3, d=4) @@ -607,8 +713,7 @@ ex = ensure_attributes(ex, var_id=Int) #ex = softscope_test(ex) @info "Input code" formatsrc(ex) -module MMM end -in_mod = MMM +in_mod = M # in_mod=Main ctx1, ex_macroexpand = JuliaLowering.expand_forms_1(in_mod, ex) @info "Macro expanded" ex_macroexpand formatsrc(ex_macroexpand, color_by=:scope_layer) diff --git a/JuliaLowering/test/demo_include.jl b/JuliaLowering/test/demo_include.jl deleted file mode 100644 index 34417b43745d9..0000000000000 --- a/JuliaLowering/test/demo_include.jl +++ /dev/null @@ -1,101 +0,0 @@ -# Introspection -macro __MODULE__() - __context__.scope_layer.mod -end - -macro __FILE__() - JuliaLowering.filename(__context__.macrocall) -end - -macro __LINE__() - JuliaLowering.source_location(__context__.macrocall)[1] -end - -# Macro with local variables -JuliaLowering.include(M, "demo_include_2.jl") - -someglobal = "global in module M" - -# Macro with local variables -macro foo(ex) - quote - x = "`x` from @foo" - (x, someglobal, A.@bar $ex) - #(x, someglobal, $ex, A.@bar($ex), A.@bar(x)) - end -end - -macro set_a_global(val) - quote - global a_global = $val - end -end - -macro set_global_in_parent(ex) - e1 = adopt_scope(:(sym_introduced_from_M), __context__) - quote - $e1 = $ex - end -end - -macro baz(ex) - quote - let $ex = 10 - $ex - end - end -end - -macro make_module() - :(module X - blah = 10 - end) -end - -macro return_a_value() - 42 -end - -macro nested_return_a_value() - :( - @return_a_value - ) -end - -macro inner() - :(2) -end - -macro outer() - :((1, @inner)) -end - -macro K_str(str) - JuliaSyntax.Kind(str[1].value) -end - -# Recursive macro call -macro recursive(N) - Nval = if kind(N) == K"Integer" || kind(N) == K"Value" - N.value - end - if !(Nval isa Integer) - throw(MacroExpansionError(N, "argument must be an integer")) - end - if Nval < 1 - return N - end - quote - x = $N - (@recursive($(Nval-1)), x) - end -end - -xx = "xx in M" - -macro test_inert_quote() - println(xx) - @inert quote - ($xx, xx) - end -end diff --git a/JuliaLowering/test/demo_include_2.jl b/JuliaLowering/test/demo_include_2.jl deleted file mode 100644 index ee084f9b29c29..0000000000000 --- a/JuliaLowering/test/demo_include_2.jl +++ /dev/null @@ -1,12 +0,0 @@ - - module A - another_global = "global in A" - - macro bar(ex) - quote - x = "`x` in @bar" - (x, another_global, $ex) - end - end - end - From 63e00aeed81d24bb63f0f28fdb176de177199ba7 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Thu, 5 Dec 2024 18:01:48 +1000 Subject: [PATCH 0904/1109] Record captured locals per lambda --- JuliaLowering/src/closure_conversion.jl | 2 +- JuliaLowering/src/linear_ir.jl | 15 ++- JuliaLowering/src/scope_analysis.jl | 141 +++++++++++++++++++----- 3 files changed, 125 insertions(+), 33 deletions(-) diff --git a/JuliaLowering/src/closure_conversion.jl b/JuliaLowering/src/closure_conversion.jl index b33c5cd4e2ec5..139d2ef004c84 100644 --- a/JuliaLowering/src/closure_conversion.jl +++ b/JuliaLowering/src/closure_conversion.jl @@ -6,7 +6,7 @@ struct ClosureConversionCtx{GraphType} <: AbstractLoweringContext end function add_lambda_local!(ctx::ClosureConversionCtx, id) - push!(ctx.lambda_bindings.locals, id) + init_lambda_binding(ctx.lambda_bindings, id) end # Convert `ex` to `type` by calling `convert(type, ex)` when necessary. diff --git a/JuliaLowering/src/linear_ir.jl b/JuliaLowering/src/linear_ir.jl index 9abe2f3d12e23..ed60d9044521f 100644 --- a/JuliaLowering/src/linear_ir.jl +++ b/JuliaLowering/src/linear_ir.jl @@ -399,7 +399,7 @@ function compile_conditional(ctx, ex, false_label) end function add_lambda_local!(ctx::LinearIRContext, id) - push!(ctx.lambda_bindings.locals, id) + init_lambda_binding(ctx.lambda_bindings, id) end # Lowering of exception handling must ensure that @@ -941,11 +941,14 @@ function compile_lambda(outer_ctx, ex) end end # Sorting the lambda locals is required to remove dependence on Dict iteration order. - for id in sort(collect(ex.lambda_bindings.locals)) - info = lookup_binding(ctx.bindings, id) - @assert info.kind == :local - push!(slots, Slot(info.name, :local, false)) - slot_rewrites[id] = length(slots) + for (id, lbinfo) in sort(collect(pairs(ex.lambda_bindings.bindings)), by=first) + if !lbinfo.is_captured + info = lookup_binding(ctx.bindings, id) + if info.kind == :local + push!(slots, Slot(info.name, :local, false)) + slot_rewrites[id] = length(slots) + end + end end for (i,arg) in enumerate(children(static_parameters)) @assert kind(arg) == K"BindingId" diff --git a/JuliaLowering/src/scope_analysis.jl b/JuliaLowering/src/scope_analysis.jl index 9f7e1acd29049..7188cfed7b83e 100644 --- a/JuliaLowering/src/scope_analysis.jl +++ b/JuliaLowering/src/scope_analysis.jl @@ -123,19 +123,49 @@ function NameKey(ex::SyntaxTree) NameKey(ex.name_val, get(ex, :scope_layer, _lowering_internal_layer)) end -struct CaptureInfo +# Metadata about how a binding is used within some enclosing lambda +struct LambdaBindingInfo + is_captured::Bool is_read::Bool is_assigned::Bool is_called::Bool end +LambdaBindingInfo() = LambdaBindingInfo(false, false, false, false) + +function LambdaBindingInfo(parent::LambdaBindingInfo; + is_captured = nothing, + is_read = nothing, + is_assigned = nothing, + is_called = nothing) + LambdaBindingInfo( + isnothing(is_captured) ? parent.is_captured : is_captured, + isnothing(is_read) ? parent.is_read : is_read, + isnothing(is_assigned) ? parent.is_assigned : is_assigned, + isnothing(is_called) ? parent.is_called : is_called, + ) +end + struct LambdaBindings - # Local bindings within the lambda - locals::Set{IdTag} - captures::Dict{IdTag,CaptureInfo} + # Bindings used within the lambda + bindings::Dict{IdTag,LambdaBindingInfo} end -LambdaBindings() = LambdaBindings(Set{IdTag}(), Dict{IdTag,CaptureInfo}()) +function init_lambda_binding(binds::LambdaBindings, id; kws...) + @assert !haskey(binds.bindings, id) + binds.bindings[id] = LambdaBindingInfo(LambdaBindingInfo(); kws...) +end + +function update_lambda_binding!(binds::LambdaBindings, id; kws...) + binfo = binds.bindings[id] + binds.bindings[id] = LambdaBindingInfo(binfo; kws...) +end + +function update_lambda_binding!(ctx::AbstractLoweringContext, id; kws...) + update_lambda_binding!(last(ctx.scope_stack).lambda_bindings, id; kws...) +end + +LambdaBindings() = LambdaBindings(Dict{IdTag,LambdaBindings}()) struct ScopeInfo @@ -230,17 +260,19 @@ function add_lambda_args(ctx, var_ids, args, args_kind) "static parameter name not distinct from function argument" throw(LoweringError(arg, msg)) end - var_ids[varkey] = init_binding(ctx, varkey, args_kind; - is_nospecialize=getmeta(arg, :nospecialize, false)) + id = init_binding(ctx, varkey, args_kind; + is_nospecialize=getmeta(arg, :nospecialize, false)) + var_ids[varkey] = id elseif ka != K"BindingId" && ka != K"Placeholder" throw(LoweringError(arg, "Unexpected lambda arg kind")) end end end -# Analyze identifier usage within a scope, adding all newly discovered -# identifiers to ctx.bindings and returning a lookup table from identifier -# names to their variable IDs +# Analyze identifier usage within a scope +# * Allocate a new binding for each identifier which the scope introduces. +# * Record the identifier=>binding mapping in a lookup table +# * Return a `ScopeInfo` with the mapping plus additional scope metadata function analyze_scope(ctx, ex, scope_type, is_toplevel_global_scope=false, lambda_args=nothing, lambda_static_parameters=nothing) parentscope = isempty(ctx.scope_stack) ? nothing : ctx.scope_stack[end] @@ -251,8 +283,13 @@ function analyze_scope(ctx, ex, scope_type, is_toplevel_global_scope=false, assignments, locals, destructured_args, globals, used, used_bindings, alias_bindings = find_scope_vars(ex) - # Create new lookup table for variables in this scope which differ from the - # parent scope. + # Construct a mapping from identifiers to bindings + # + # This will contain a binding ID for each variable which is introduced by + # the scope, including + # * Explicit locals + # * Explicit globals + # * Implicit locals created by assignment var_ids = Dict{NameKey,IdTag}() if !isnothing(lambda_args) @@ -272,8 +309,9 @@ function analyze_scope(ctx, ex, scope_type, is_toplevel_global_scope=false, end elseif var_kind(ctx, varkey) === :static_parameter throw(LoweringError(e, "local variable name `$(varkey.name)` conflicts with a static parameter")) + else + var_ids[varkey] = init_binding(ctx, varkey, :local) end - var_ids[varkey] = init_binding(ctx, varkey, :local) end # Add explicit globals @@ -353,24 +391,67 @@ function analyze_scope(ctx, ex, scope_type, is_toplevel_global_scope=false, end end - for varkey in used - if lookup_var(ctx, varkey) === nothing - # Add other newly discovered identifiers as globals - init_binding(ctx, varkey, :global) - end - end - + #-------------------------------------------------- + # At this point we've discovered all the bindings defined in this scope and + # added them to `var_ids`. + # + # Next we record information about how the new bindings relate to the + # enclosing lambda + # * All non-globals are recorded (kind :local and :argument will later be turned into slots) + # * Captured variables are detected and recorded lambda_bindings = is_outer_lambda_scope ? LambdaBindings() : parentscope.lambda_bindings + for id in values(var_ids) - vk = var_kind(ctx, id) - if vk === :local - push!(lambda_bindings.locals, id) + binfo = lookup_binding(ctx, id) + if !binfo.is_ssa && binfo.kind !== :global + init_lambda_binding(lambda_bindings, id) end end + + # FIXME: This assumes used bindings are internal to the lambda and cannot + # be from the environment, and also assumes they are assigned. That's + # correct for now but in general we should go by the same code path that + # identifiers do. for id in used_bindings - info = lookup_binding(ctx, id) - if !info.is_ssa && info.kind == :local - push!(lambda_bindings.locals, id) + binfo = lookup_binding(ctx, id) + if !binfo.is_ssa && binfo.kind !== :global + if !haskey(lambda_bindings.bindings, id) + init_lambda_binding(lambda_bindings, id, is_read=true, is_assigned=true) + end + end + end + + for varkey in used + id = haskey(var_ids, varkey) ? var_ids[varkey] : lookup_var(ctx, varkey) + if id === nothing + # Identifiers which are used but not defined in some scope are + # newly discovered global bindings + init_binding(ctx, varkey, :global) + elseif !in_toplevel_thunk + binfo = lookup_binding(ctx, id) + if binfo.kind !== :global + if !haskey(lambda_bindings.bindings, id) + # Used vars from a scope *outside* the current lambda are captured + init_lambda_binding(lambda_bindings, id, is_captured=true, is_read=true) + else + update_lambda_binding!(lambda_bindings, id, is_read=true) + end + end + end + end + + if !in_toplevel_thunk + for (varkey,_) in assignments + id = haskey(var_ids, varkey) ? var_ids[varkey] : lookup_var(ctx, varkey) + binfo = lookup_binding(ctx, id) + if binfo.kind !== :global + if !haskey(lambda_bindings.bindings, id) + # Assigned vars from a scope *outside* the current lambda are captured + init_lambda_binding(lambda_bindings, id, is_captured=true, is_assigned=true) + else + update_lambda_binding!(lambda_bindings, id, is_assigned=true) + end + end end end @@ -410,6 +491,14 @@ function maybe_update_bindings!(ctx, ex) throw(LoweringError(ex, "unsupported `const` declaration on local variable")) end update_binding!(ctx, id; is_const=true) + elseif k == K"call" + name = ex[1] + if kind(name) == K"BindingId" + id = name.var_id + if haskey(last(ctx.scope_stack).lambda_bindings.bindings, id) + update_lambda_binding!(ctx, id, is_called=true) + end + end end nothing end From 17ed5127ec076f278576fadb6f7b2bf95b05bc45 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sun, 8 Dec 2024 20:32:50 +1000 Subject: [PATCH 0905/1109] Fix incomplete_tag generation with trailing whitespace (JuliaLang/JuliaSyntax.jl#518) Previously we relied on the last SyntaxNode consuming all trailing whitespace when detecting incomplete syntax. However this assumption was broken by JuliaLang/JuliaSyntax.jl#397 and is generally fragile with respect to any extra bump_trivia() calls. Fix this by just comparing to the stream position before any bumping of remaining trivia. --- JuliaSyntax/src/hooks.jl | 3 ++- JuliaSyntax/test/hooks.jl | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/JuliaSyntax/src/hooks.jl b/JuliaSyntax/src/hooks.jl index afb8ba8bf01d3..a392b61d7cfba 100644 --- a/JuliaSyntax/src/hooks.jl +++ b/JuliaSyntax/src/hooks.jl @@ -170,6 +170,7 @@ function core_parser_hook(code, filename::String, lineno::Int, offset::Int, opti end end parse!(stream; rule=options) + pos_before_trivia = last_byte(stream) if options === :statement bump_trivia(stream; skip_newlines=false) if peek(stream) == K"NewlineWs" @@ -179,7 +180,7 @@ function core_parser_hook(code, filename::String, lineno::Int, offset::Int, opti if any_error(stream) tree = build_tree(SyntaxNode, stream, first_line=lineno, filename=filename) - tag = _incomplete_tag(tree, lastindex(code)) + tag = _incomplete_tag(tree, pos_before_trivia) if _has_v1_10_hooks exc = ParseError(stream, filename=filename, first_line=lineno, incomplete_tag=tag) diff --git a/JuliaSyntax/test/hooks.jl b/JuliaSyntax/test/hooks.jl index d0111aaca1c9c..61772ce00d106 100644 --- a/JuliaSyntax/test/hooks.jl +++ b/JuliaSyntax/test/hooks.jl @@ -181,6 +181,9 @@ end "1, " => :other "1,\n" => :other "1, \n" => :other + "f(1, " => :other + "[x " => :other + "( " => :other # Reference parser fails to detect incomplete exprs in this case "(x for y" => :other From e7de2a5561e8a226426db4855d514a44a04a3b59 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sat, 21 Dec 2024 07:53:15 +1000 Subject: [PATCH 0906/1109] Make child list construction more efficient for ASTs Rather than forcing construction of individual `SyntaxTree`s and splatting of SyntaxList into such trees, collect children into a single SyntaxList directly. This is basically doing our own lowering of _apply_iterate where we happen to know the concrete type of the list of arguments. --- JuliaLowering/src/ast.jl | 40 ++++++++++++++++++++++++---- JuliaLowering/src/linear_ir.jl | 13 ++++----- JuliaLowering/src/macro_expansion.jl | 2 +- JuliaLowering/test/utils.jl | 2 +- 4 files changed, 44 insertions(+), 13 deletions(-) diff --git a/JuliaLowering/src/ast.jl b/JuliaLowering/src/ast.jl index 2fd5b266dcf13..e1aa9cf786789 100644 --- a/JuliaLowering/src/ast.jl +++ b/JuliaLowering/src/ast.jl @@ -169,6 +169,22 @@ _unpack_srcref(graph, srcref::SyntaxTree) = _node_id(graph, srcref) _unpack_srcref(graph, srcref::Tuple) = _node_ids(graph, srcref...) _unpack_srcref(graph, srcref) = srcref +function _push_nodeid!(graph::SyntaxGraph, ids::Vector{NodeId}, val) + push!(ids, _node_id(graph, val)) +end +function _push_nodeid!(graph::SyntaxGraph, ids::Vector{NodeId}, val::Nothing) + nothing +end +function _append_nodeids!(graph::SyntaxGraph, ids::Vector{NodeId}, vals) + for v in vals + _push_nodeid!(graph, ids, v) + end +end +function _append_nodeids!(graph::SyntaxGraph, ids::Vector{NodeId}, vals::SyntaxList) + check_compatible_graph(graph, vals) + append!(ids, vals.ids) +end + function makeleaf(graph::SyntaxGraph, srcref, proto; attrs...) id = newnode!(graph) ex = SyntaxTree(graph, id) @@ -177,17 +193,20 @@ function makeleaf(graph::SyntaxGraph, srcref, proto; attrs...) return ex end -function makenode(graph::SyntaxGraph, srcref, proto, children...; attrs...) +function _makenode(graph::SyntaxGraph, srcref, proto, children; attrs...) id = newnode!(graph) - setchildren!(graph, id, _node_ids(graph, children...)) + setchildren!(graph, id, children) ex = SyntaxTree(graph, id) copy_attrs!(ex, proto, true) setattr!(graph, id; source=_unpack_srcref(graph, srcref), attrs...) return SyntaxTree(graph, id) end +function _makenode(ctx, srcref, proto, children; attrs...) + _makenode(syntax_graph(ctx), srcref, proto, children; attrs...) +end function makenode(ctx, srcref, proto, children...; attrs...) - makenode(syntax_graph(ctx), srcref, proto, children...; attrs...) + _makenode(ctx, srcref, proto, _node_ids(syntax_graph(ctx), children...); attrs...) end function makeleaf(ctx, srcref, proto; kws...) @@ -335,9 +354,20 @@ function _expand_ast_tree(ctx, srcref, tree) push!(flatargs, a) end end + children_ex = :(let child_ids = Vector{NodeId}(), graph = syntax_graph($ctx) + end) + child_stmts = children_ex.args[2].args + for a in flatargs[2:end] + child = _expand_ast_tree(ctx, srcref, a) + if Meta.isexpr(child, :(...)) + push!(child_stmts, :(_append_nodeids!(graph, child_ids, $(child.args[1])))) + else + push!(child_stmts, :(_push_nodeid!(graph, child_ids, $child))) + end + end + push!(child_stmts, :(child_ids)) _match_kind(srcref, flatargs[1]) do kind, srcref, kws - children = map(a->_expand_ast_tree(ctx, srcref, a), flatargs[2:end]) - :(makenode($ctx, $srcref, $kind, $(children...), $(kws...))) + :(_makenode($ctx, $srcref, $kind, $children_ex; $(kws...))) end elseif Meta.isexpr(tree, :(:=)) lhs = tree.args[1] diff --git a/JuliaLowering/src/linear_ir.jl b/JuliaLowering/src/linear_ir.jl index ed60d9044521f..ab3d0d4836014 100644 --- a/JuliaLowering/src/linear_ir.jl +++ b/JuliaLowering/src/linear_ir.jl @@ -203,7 +203,7 @@ function compile_leave_handler(ctx, srcref, src_tokens, dest_tokens) jump_ok = n == 0 || (n <= length(src_tokens) && dest_tokens[n].var_id == src_tokens[n].var_id) jump_ok || throw(LoweringError(srcref, "Attempt to jump into try block")) if n < length(src_tokens) - @ast ctx srcref [K"leave" src_tokens[n+1:end]] + @ast ctx srcref [K"leave" src_tokens[n+1:end]...] else nothing end @@ -959,11 +959,12 @@ function compile_lambda(outer_ctx, ex) end # @info "" @ast ctx ex [K"block" ctx.code] code = renumber_body(ctx, ctx.code, slot_rewrites) - makenode(ctx, ex, K"code_info", - makenode(ctx, ex[1], K"block", code), - is_toplevel_thunk=ex.is_toplevel_thunk, - slots=slots - ) + @ast ctx ex [K"code_info"(is_toplevel_thunk=ex.is_toplevel_thunk, + slots=slots) + [K"block"(ex[3]) + code... + ] + ] end """ diff --git a/JuliaLowering/src/macro_expansion.jl b/JuliaLowering/src/macro_expansion.jl index 249c57bda14c7..bdbf7e4607a6a 100644 --- a/JuliaLowering/src/macro_expansion.jl +++ b/JuliaLowering/src/macro_expansion.jl @@ -41,7 +41,7 @@ function collect_unquoted!(ctx, unquoted, ex, depth) end function expand_quote(ctx, ex) - unquoted = SyntaxTree[] + unquoted = SyntaxList(ctx) collect_unquoted!(ctx, unquoted, ex, 0) # Unlike user-defined macro expansion, we don't call append_sourceref for # the entire expression produced by `quote` expansion. We could, but it diff --git a/JuliaLowering/test/utils.jl b/JuliaLowering/test/utils.jl index 7586d41ef1b6e..03ada78397202 100644 --- a/JuliaLowering/test/utils.jl +++ b/JuliaLowering/test/utils.jl @@ -38,7 +38,7 @@ macro ast_(tree) quote graph = _ast_test_graph() srcref = _source_node(graph, $(QuoteNode(__source__))) - $(JuliaLowering._expand_ast_tree(:graph, :srcref, tree)) + @ast graph srcref $tree end end From ad0cc8f1df30fcf6c67a0d1d090dcd947e37e9d9 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Fri, 6 Dec 2024 16:15:47 +1000 Subject: [PATCH 0907/1109] Prepare for moving methods of closures to top level --- JuliaLowering/src/closure_conversion.jl | 36 +++++++++++++++++++++---- 1 file changed, 31 insertions(+), 5 deletions(-) diff --git a/JuliaLowering/src/closure_conversion.jl b/JuliaLowering/src/closure_conversion.jl index 139d2ef004c84..7cfec71bbf0e2 100644 --- a/JuliaLowering/src/closure_conversion.jl +++ b/JuliaLowering/src/closure_conversion.jl @@ -3,6 +3,7 @@ struct ClosureConversionCtx{GraphType} <: AbstractLoweringContext bindings::Bindings mod::Module lambda_bindings::LambdaBindings + toplevel_stmts::SyntaxList{GraphType} end function add_lambda_local!(ctx::ClosureConversionCtx, id) @@ -164,13 +165,39 @@ function _convert_closures(ctx::ClosureConversionCtx, ex) children(ex)... ]) elseif k == K"lambda" - ctx2 = ClosureConversionCtx(ctx.graph, ctx.bindings, ctx.mod, ex.lambda_bindings) - mapchildren(e->_convert_closures(ctx2, e), ctx2, ex) + closure_convert_lambda(ctx, ex) else mapchildren(e->_convert_closures(ctx, e), ctx, ex) end end +function closure_convert_lambda(ctx, ex) + @assert kind(ex) == K"lambda" + body_stmts = SyntaxList(ctx) + toplevel_stmts = ex.is_toplevel_thunk ? body_stmts : ctx.toplevel_stmts + ctx2 = ClosureConversionCtx(ctx.graph, ctx.bindings, ctx.mod, + ex.lambda_bindings, toplevel_stmts) + lambda_children = SyntaxList(ctx) + push!(lambda_children, _convert_closures(ctx2, ex[1])) + push!(lambda_children, _convert_closures(ctx2, ex[2])) + + # Convert body. This is done as a special case to allow inner calls to + # _convert_closures to also add to body_stmts in the case that + # ex.is_toplevel_thunk is true. + in_body_stmts = kind(ex[3]) != K"block" ? ex[3:3] : ex[3][1:end] + for e in in_body_stmts + push!(body_stmts, _convert_closures(ctx2, e)) + end + push!(lambda_children, @ast ctx2 ex[3] [K"block" body_stmts...]) + + if numchildren(ex) > 3 + @assert numchildren(ex) == 4 + push!(lambda_children, _convert_closures(ctx2, ex[4])) + end + + makenode(ctx, ex, ex, lambda_children) +end + """ Closure conversion and lowering of bindings @@ -186,8 +213,7 @@ Invariants: * Any new binding IDs must be added to the enclosing lambda locals """ function convert_closures(ctx::ScopeResolutionContext, ex) - @assert kind(ex) == K"lambda" - ctx = ClosureConversionCtx(ctx.graph, ctx.bindings, ctx.mod, ex.lambda_bindings) - ex1 = _convert_closures(ctx, ex) + ctx = ClosureConversionCtx(ctx.graph, ctx.bindings, ctx.mod, ex.lambda_bindings, SyntaxList(ctx)) + ex1 = closure_convert_lambda(ctx, ex) ctx, ex1 end From 9750b1a467316e6317e6473dcc5efc7a4fd5d4c1 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Tue, 17 Dec 2024 05:52:04 +1000 Subject: [PATCH 0908/1109] Get basic closure conversion somewhat working Implement basic creation of closure types with all variables boxed. Variables outside the closure are not handled correctly yet. Desugaring of functions which become closures is tricky because we don't yet know that they're closures and we might move their method definitions to top level. To aid with this, introduce three intermediate forms: * `K"function_decl"` to create a local or global binding for the function name, * `K"function_type"` to evaluate the type (either global generic function or closure converted struct) * `K"method_defs"` to encapsulate code which creates methods and might be moved to top level. (Just using `K"method"` isn't ideal for this because we might want to evaluate some intermediate forms and reuse them for multiple method definitions for convenience and to avoid multiple evaluation of expressions with side effects. Eg when lowering functions with default positional args to multiple method defs.) --- JuliaLowering/README.md | 125 ++++++++++++++ JuliaLowering/src/ast.jl | 6 + JuliaLowering/src/closure_conversion.jl | 215 +++++++++++++++++++++++- JuliaLowering/src/desugaring.jl | 89 +++++----- JuliaLowering/src/eval.jl | 20 ++- JuliaLowering/src/kinds.jl | 14 ++ JuliaLowering/src/linear_ir.jl | 8 + JuliaLowering/src/scope_analysis.jl | 134 ++++++++------- 8 files changed, 494 insertions(+), 117 deletions(-) diff --git a/JuliaLowering/README.md b/JuliaLowering/README.md index 8e974cb7fde4e..e604c4432de37 100644 --- a/JuliaLowering/README.md +++ b/JuliaLowering/README.md @@ -339,6 +339,131 @@ The second step uses this metadata to * Lower const and non-const global assignments * TODO: probably more here. + +### Q&A + +#### When does `function` introduce a closure? + +Closures are just functions where the name of the function is *local* in scope. +How does the function name become a local? The `function` keyword acts like an +assignment to the function name for the purposes of scope resolution. Thus +`function f() body end` is rather like `f = ()->body` and may result in the +symbol `f` being either `local` or `global`. Like other assignments, `f` may be +declared global or local explicitly, but if not `f` is subject to the usual +rules for assignments inside scopes. For example, inside a `let` scope +`function f() ...` would result in the symbol `f` being local. + +Examples: + +```julia +begin + # f is global because `begin ... end` does not introduce a scope + function f() + body + end + + # g is a closure because `g` is explicitly declared local + local g + function g() + body + end +end + +let + # f is local so this is a closure becuase `let ... end` introduces a scope + function f() + body + end + + # g is not a closure because `g` is declared global + global g + function g() + body + end +end +``` + +#### How do captures work with non-closures? + +Yes it's true, you can capture local variables into global methods. For example: + +```julia +begin + local x = 1 + function f(y) + x + y + end + x = 2 +end +``` + +The way this works is to put `x` in a `Box` and interpolate it into the AST of +`f` (the `Box` can be eliminated in some cases, but not here). Essentially this +lowers to code which is almost-equivalent to the following: + +```julia +begin + local x = Core.Box(1) + @eval function f(y) + $(x.contents) + y + end + x.contents = 2 +end +``` + +#### How do captures work with closures with multiple methods? + +Sometimes you might want a closure with multiple methods, but those methods +might capture different local variables. For example, + +```julia +let + x = 1 + y = 1.5 + function f(xx::Int) + xx + x + end + function f(yy::Float64) + yy + y + end + + f(42) +end +``` + +In this case, the closure type must capture both `x` and `y` and the generated +code looks rather like this: + +```julia +struct TheClosureType + x + y +end + +let + x = 1 + y = 1.5 + f = TheClosureType(x,y) + function (self::TheClosureType)(xx::Int) + xx + self.x + end + function (self::TheClosureType)(yy::Int) + yy + self.y + end + + f(42) +end +``` + +#### When are `method` defs lifted to top level? + +Closure method definitions must be lifted to top level whenever the definitions +appear inside a function. This is allow efficient compilation and avoid world +age issues. + +Conversely, when method defs appear in top level code, they are executed +inline. + ## Pass 5: Convert to untyped IR This pass is implemented in `linear_ir.jl`. diff --git a/JuliaLowering/src/ast.jl b/JuliaLowering/src/ast.jl index e1aa9cf786789..098c425c84165 100644 --- a/JuliaLowering/src/ast.jl +++ b/JuliaLowering/src/ast.jl @@ -277,6 +277,12 @@ function new_mutable_var(ctx::AbstractLoweringContext, srcref, name; kind=:local var end +function new_global_binding(ctx::AbstractLoweringContext, srcref, name, mod; kws...) + id = new_binding(ctx.bindings, BindingInfo(name, :global; is_internal=true, mod=mod, kws...)) + nameref = makeleaf(ctx, srcref, K"Identifier", name_val=name) + makeleaf(ctx, nameref, K"BindingId", var_id=id) +end + function alias_binding(ctx::AbstractLoweringContext, srcref) id = new_binding(ctx.bindings, BindingInfo("alias", :alias; is_internal=true)) makeleaf(ctx, srcref, K"BindingId", var_id=id) diff --git a/JuliaLowering/src/closure_conversion.jl b/JuliaLowering/src/closure_conversion.jl index 7cfec71bbf0e2..e058ab72362a3 100644 --- a/JuliaLowering/src/closure_conversion.jl +++ b/JuliaLowering/src/closure_conversion.jl @@ -1,9 +1,30 @@ +struct ClosureInfo{GraphType} + # Global name of the type of the closure + type_name::SyntaxTree{GraphType} + # Names of fields as K"Symbol" nodes, in order + field_syms::SyntaxList{GraphType} + # Map from the original BindingId of closed-over vars to the index of the + # associated field in the closure type. + field_name_inds::Dict{IdTag,Int} +end + struct ClosureConversionCtx{GraphType} <: AbstractLoweringContext graph::GraphType bindings::Bindings mod::Module + closure_bindings::Dict{IdTag,ClosureBindings} + closure_info::Union{Nothing,ClosureInfo{GraphType}} lambda_bindings::LambdaBindings toplevel_stmts::SyntaxList{GraphType} + closure_infos::Dict{IdTag,ClosureInfo{GraphType}} +end + +function ClosureConversionCtx(graph::GraphType, bindings::Bindings, + mod::Module, closure_bindings::Dict{IdTag,ClosureBindings}, + lambda_bindings::LambdaBindings) where {GraphType} + ClosureConversionCtx{GraphType}( + graph, bindings, mod, closure_bindings, nothing, lambda_bindings, SyntaxList(graph), + Dict{IdTag,ClosureInfo{GraphType}}()) end function add_lambda_local!(ctx::ClosureConversionCtx, id) @@ -132,11 +153,127 @@ function convert_assignment(ctx, ex) end end +# Compute fields for a closure type, one field for each captured variable. +function closure_type_fields(ctx, srcref, closure_binds) + capture_ids = Vector{IdTag}() + for lambda_bindings in closure_binds.lambdas + for (id, lbinfo) in lambda_bindings.bindings + if lbinfo.is_captured + push!(capture_ids, id) + end + end + end + # sort here to avoid depending on undefined Dict iteration order. + capture_ids = sort!(unique(capture_ids)) + field_names = Dict{String,IdTag}() + for (i, id) in enumerate(capture_ids) + binfo = lookup_binding(ctx, id) + # We name each field of the closure after the variable which was closed + # over, for clarity. Adding a suffix can be necessary when collisions + # occur due to macro expansion and generated bindings + name0 = binfo.name + name = name0 + i = 1 + while haskey(field_names, name) + name = "$name0#$i" + i += 1 + end + field_names[name] = id + end + field_syms = SyntaxList(ctx) + field_orig_bindings = Vector{IdTag}() + field_name_inds = Dict{IdTag,Int}() + for (name,id) in sort!(collect(field_names)) + push!(field_syms, @ast ctx srcref name::K"Symbol") + push!(field_orig_bindings, id) + field_name_inds[id] = lastindex(field_syms) + end + + return field_syms, field_orig_bindings, field_name_inds +end + +# Return a thunk which creates a new type for a closure with `field_syms` named +# fields. The new type will be named `name_str`, which must be unique. +function type_for_closure(ctx::ClosureConversionCtx, srcref, name_str, field_syms) + # New closure types always belong to the module we're expanding into - they + # need to be serialized there during precompile. + mod = ctx.mod + type_binding = new_global_binding(ctx, srcref, name_str, mod) + type_ex = @ast ctx srcref [K"lambda"(is_toplevel_thunk=true, lambda_bindings=LambdaBindings()) + [K"block"] + [K"block"] + [K"block" + [K"global" type_binding] + closure_type := [K"call" + "_structtype"::K"core" + mod::K"Value" + name_str::K"Symbol" + [K"call" "svec"::K"core"] + [K"call" + "svec"::K"core" + field_syms... + ] + [K"call" "svec"::K"core"] + false::K"Bool" + length(field_syms)::K"Integer" + ] + [K"call" "_setsuper!"::K"core" closure_type "Function"::K"core"] + # TODO: Need K"const_decl" or whatever when we upgrade to the latest Julia. + [K"const" type_binding] + [K"=" type_binding closure_type] + [K"call" + "_typebody!"::K"core" + closure_type + [K"call" "svec"::K"core" ["Box"::K"core" for _ in field_syms]...] + ] + "nothing"::K"core" + ] + ] + type_ex, type_binding +end + function _convert_closures(ctx::ClosureConversionCtx, ex) k = kind(ex) if k == K"BindingId" - # TODO: Captures etc - ex + id = ex.var_id + lbinfo = get(ctx.lambda_bindings.bindings, id, nothing) + if !isnothing(lbinfo) && lbinfo.is_captured + cinfo = ctx.closure_info + field_sym = cinfo.field_syms[cinfo.field_name_inds[id]] + undef_var = new_mutable_var(ctx, ex, lookup_binding(ctx, id).name) + @ast ctx ex [K"block" + box := [K"call" + "getfield"::K"core" + ctx.lambda_bindings.self::K"BindingId" + field_sym + ] + # Lower in an UndefVar check to a similarly named variable + # (ref #20016) so that closure lowering Box introduction + # doesn't impact the error message and the compiler is expected + # to fold away the extraneous null check + # + # TODO: Ideally the runtime would rely on provenance info for + # this error and we can remove isdefined check. + [K"if" [K"call" + "isdefined"::K"core" + box + "contents"::K"Symbol" + ] + "nothing"::K"core" + [K"block" + [K"newvar" undef_var] + undef_var + ] + ] + [K"call" + "getfield"::K"core" + box + "contents"::K"Symbol" + ] + ] + else + ex + end elseif is_leaf(ex) || k == K"inert" ex elseif k == K"=" @@ -166,6 +303,74 @@ function _convert_closures(ctx::ClosureConversionCtx, ex) ]) elseif k == K"lambda" closure_convert_lambda(ctx, ex) + elseif k == K"function_decl" + func_name = ex[1] + @assert kind(func_name) == K"BindingId" + func_name_id = func_name.var_id + if haskey(ctx.closure_bindings, func_name_id) + closure_info = get(ctx.closure_infos, func_name_id, nothing) + needs_def = isnothing(closure_info) + if needs_def + # TODO: Names for closures without relying on gensym + name_str = string(gensym("closure")) + field_syms, field_orig_bindings, field_name_inds = + closure_type_fields(ctx, ex, ctx.closure_bindings[func_name_id]) + closure_type_def, closure_type = + type_for_closure(ctx, ex, name_str, field_syms) + push!(ctx.toplevel_stmts, closure_type_def) + closure_info = ClosureInfo(closure_type, field_syms, field_name_inds) + ctx.closure_infos[func_name_id] = closure_info + init_closure_args = SyntaxList(ctx) + for id in field_orig_bindings + # FIXME: This isn't actually correct: we need to convert + # all outer references to boxes too! + push!(init_closure_args, _convert_closures(ctx, + @ast ctx ex [K"call" "Box"::K"core" id::K"BindingId"])) + end + @ast ctx ex [K"block" + [K"=" func_name + [K"new" + closure_type + init_closure_args... + ] + ] + func_name + ] + else + func_name + end + else + # Single-arg K"method" has the side effect of creating a global + # binding for `func_name` if it doesn't exist. + @ast ctx ex [K"method" func_name] + end + elseif k == K"function_type" + func_name = ex[1] + @assert kind(func_name) == K"BindingId" + if lookup_binding(ctx, func_name.var_id).kind == :global + @ast ctx ex [K"call" "Typeof"::K"core" func_name] + else + ctx.closure_infos[func_name.var_id].type_name + end + elseif k == K"method_defs" + name = ex[1] + is_closure = kind(name) == K"BindingId" && lookup_binding(ctx, name).kind == :local + cinfo = is_closure ? ctx.closure_infos[name.var_id] : nothing + ctx2 = ClosureConversionCtx(ctx.graph, ctx.bindings, ctx.mod, + ctx.closure_bindings, cinfo, ctx.lambda_bindings, + ctx.toplevel_stmts, ctx.closure_infos) + body = _convert_closures(ctx2, ex[2]) + if is_closure + # Move methods to top level + # FIXME: Probably lots more work to do to make this correct + # Especially + # * Renumbering SSA vars + # * Ensuring that moved locals become slots in the top level thunk + push!(ctx.toplevel_stmts, body) + name + else + _convert_closures(ctx, body) + end else mapchildren(e->_convert_closures(ctx, e), ctx, ex) end @@ -176,7 +381,8 @@ function closure_convert_lambda(ctx, ex) body_stmts = SyntaxList(ctx) toplevel_stmts = ex.is_toplevel_thunk ? body_stmts : ctx.toplevel_stmts ctx2 = ClosureConversionCtx(ctx.graph, ctx.bindings, ctx.mod, - ex.lambda_bindings, toplevel_stmts) + ctx.closure_bindings, ctx.closure_info, ex.lambda_bindings, + toplevel_stmts, ctx.closure_infos) lambda_children = SyntaxList(ctx) push!(lambda_children, _convert_closures(ctx2, ex[1])) push!(lambda_children, _convert_closures(ctx2, ex[2])) @@ -213,7 +419,8 @@ Invariants: * Any new binding IDs must be added to the enclosing lambda locals """ function convert_closures(ctx::ScopeResolutionContext, ex) - ctx = ClosureConversionCtx(ctx.graph, ctx.bindings, ctx.mod, ex.lambda_bindings, SyntaxList(ctx)) + ctx = ClosureConversionCtx(ctx.graph, ctx.bindings, ctx.mod, + ctx.closure_bindings, ex.lambda_bindings) ex1 = closure_convert_lambda(ctx, ex) ctx, ex1 end diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 39a569b749b8a..9be05d2b14f2b 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -1293,7 +1293,7 @@ function _split_wheres!(ctx, typevar_names, typevar_stmts, ex) end end -function _method_def_expr(ctx, srcref, callex, func_self, method_table, +function _method_def_expr(ctx, srcref, callex, method_table, docs, typevar_names, arg_names, arg_types, ret_var, body) # metadata contains svec(types, sparms, location) @ast ctx srcref [K"block" @@ -1319,14 +1319,7 @@ function _method_def_expr(ctx, srcref, callex, func_self, method_table, ret_var # might be `nothing` and hence removed ] ] - if !isnothing(docs) - [K"call"(docs) - bind_docs!::K"Value" - func_self - docs[1] - method_metadata - ] - end + [K"unnecessary" method_metadata] ] end @@ -1367,7 +1360,7 @@ end # For example for `f(x, y=1, z=2)` we generate two additional methods # f(x) = f(x, 1, 2) # f(x, y) = f(x, y, 2) -function _optional_positional_defs!(ctx, method_stmts, srcref, callex, func_self, +function _optional_positional_defs!(ctx, method_stmts, srcref, callex, method_table, typevar_names, typevar_stmts, arg_names, arg_types, first_default, arg_defaults, ret_var) # Replace placeholder arguments with variables - we need to pass them to @@ -1401,7 +1394,7 @@ function _optional_positional_defs!(ctx, method_stmts, srcref, callex, func_self typevar_names, typevar_stmts) # TODO: Ensure we preserve @nospecialize metadata in args push!(method_stmts, - _method_def_expr(ctx, srcref, callex, func_self, method_table, nothing, + _method_def_expr(ctx, srcref, callex, method_table, nothing, trimmed_typevar_names, trimmed_arg_names, trimmed_arg_types, ret_var, body)) end @@ -1441,8 +1434,6 @@ function expand_function_def(ctx, ex, docs, rewrite_call=identity, rewrite_body= if kind(name) == K"call" callex = rewrite_call(name) # TODO - # nospecialize - # argument destructuring # dotop names # overlays static_parameters = SyntaxList(ctx) @@ -1505,37 +1496,40 @@ function expand_function_def(ctx, ex, docs, rewrite_call=identity, rewrite_body= push!(arg_types, atype) end - func_self = ssavar(ctx, name, "func_self") + bare_func_name = nothing + doc_obj = nothing + farg_name = nothing if kind(name) == K"::" + # Add methods to an existing type if numchildren(name) == 1 # function (::T)() ... - farg_name = @ast ctx name "#self#"::K"Placeholder" - farg_type_ = name[1] + farg_type = name[1] else # function (f::T)() ... @chk numchildren(name) == 2 farg_name = name[1] - farg_type_ = name[2] + farg_type = name[2] end - func_self_val = farg_type_ # Here we treat the type itself as the function - farg_type = func_self + doc_obj = farg_type else if !is_valid_name(name) throw(LoweringError(name, "Invalid function name")) - elseif is_identifier_like(name) - # function f() ... - func_self_val = @ast ctx name [K"method" name=>K"Symbol"] + end + if is_identifier_like(name) + # Add methods to a global `Function` object, or local closure + # type function f() ... + bare_func_name = name else + # Add methods to an existing Function # function A.B.f() ... - func_self_val = name end - farg_name = @ast ctx callex "#self#"::K"Placeholder" - farg_type = @ast ctx callex [K"call" - "Typeof"::K"core" - func_self - ] + doc_obj = name # todo: can closures be documented? + farg_type = @ast ctx name [K"function_type" name] end # Add self argument + if isnothing(farg_name) + farg_name = new_mutable_var(ctx, name, "#self#"; kind=:argument) + end pushfirst!(arg_names, farg_name) pushfirst!(arg_types, farg_type) @@ -1564,25 +1558,42 @@ function expand_function_def(ctx, ex, docs, rewrite_call=identity, rewrite_body= if !isempty(arg_defaults) # For self argument added above first_default += 1 - _optional_positional_defs!(ctx, method_stmts, ex, callex, func_self, + _optional_positional_defs!(ctx, method_stmts, ex, callex, method_table, typevar_names, typevar_stmts, arg_names, arg_types, first_default, arg_defaults, ret_var) end # The method with all non-default arguments push!(method_stmts, - _method_def_expr(ctx, ex, callex, func_self, method_table, docs, + _method_def_expr(ctx, ex, callex, method_table, docs, typevar_names, arg_names, arg_types, ret_var, body)) + if !isnothing(docs) + method_stmts[end] = @ast ctx docs [K"block" + method_metadata := method_stmts[end] + @ast ctx docs [K"call" + bind_docs!::K"Value" + doc_obj + docs[1] + method_metadata + ] + ] + end - @ast ctx ex [K"scope_block"(scope_type=:hard) - [K"block" - typevar_stmts... - if !isnothing(method_table_val) - [K"=" method_table method_table_val] - end - [K"=" func_self func_self_val] - method_stmts... - [K"unnecessary" func_self] + @ast ctx ex [K"block" + if !isnothing(bare_func_name) + [K"function_decl"(bare_func_name) bare_func_name] + end + [K"scope_block"(scope_type=:hard) + [K"method_defs" + isnothing(bare_func_name) ? "nothing"::K"core" : bare_func_name + [K"block" + typevar_stmts... + if !isnothing(method_table_val) + [K"=" method_table method_table_val] + end + method_stmts... + ] + ] ] ] elseif kind(name) == K"tuple" diff --git a/JuliaLowering/src/eval.jl b/JuliaLowering/src/eval.jl index 14f544ef6a3f9..d647695f9657c 100644 --- a/JuliaLowering/src/eval.jl +++ b/JuliaLowering/src/eval.jl @@ -126,9 +126,9 @@ function to_code_info(ex, mod, funcname, slots) end end - prefix_len = length(stmts) + ssa_offset = length(stmts) for stmt in children(ex) - push!(stmts, to_lowered_expr(mod, stmt, prefix_len)) + push!(stmts, to_lowered_expr(mod, stmt, ssa_offset)) add_ir_debug_info!(current_codelocs_stack, stmt) end @@ -190,7 +190,7 @@ function to_code_info(ex, mod, funcname, slots) ) end -function to_lowered_expr(mod, ex, prefix_len=0) +function to_lowered_expr(mod, ex, ssa_offset=0) k = kind(ex) if is_literal(k) ex.value @@ -217,9 +217,9 @@ function to_lowered_expr(mod, ex, prefix_len=0) elseif k == K"static_parameter" Expr(:static_parameter, ex.var_id) elseif k == K"SSAValue" - Core.SSAValue(ex.var_id + prefix_len) + Core.SSAValue(ex.var_id + ssa_offset) elseif k == K"return" - Core.ReturnNode(to_lowered_expr(mod, ex[1], prefix_len)) + Core.ReturnNode(to_lowered_expr(mod, ex[1], ssa_offset)) elseif is_quoted(k) if k == K"inert" ex[1] @@ -241,17 +241,19 @@ function to_lowered_expr(mod, ex, prefix_len=0) elseif k == K"goto" Core.GotoNode(ex[1].id) elseif k == K"gotoifnot" - Core.GotoIfNot(to_lowered_expr(mod, ex[1], prefix_len), ex[2].id) + Core.GotoIfNot(to_lowered_expr(mod, ex[1], ssa_offset), ex[2].id) elseif k == K"enter" catch_idx = ex[1].id numchildren(ex) == 1 ? Core.EnterNode(catch_idx) : - Core.EnterNode(catch_idx, to_lowered_expr(mod, ex[2], prefix_len)) + Core.EnterNode(catch_idx, to_lowered_expr(mod, ex[2], ssa_offset)) elseif k == K"method" - cs = map(e->to_lowered_expr(mod, e, prefix_len), children(ex)) + cs = map(e->to_lowered_expr(mod, e, ssa_offset), children(ex)) # Ad-hoc unwrapping to satisfy `Expr(:method)` expectations c1 = cs[1] isa QuoteNode ? cs[1].value : cs[1] Expr(:method, c1, cs[2:end]...) + elseif k == K"newvar" + Core.NewvarNode(to_lowered_expr(mod, ex[1], ssa_offset)) else # Allowed forms according to https://docs.julialang.org/en/v1/devdocs/ast/ # @@ -272,7 +274,7 @@ function to_lowered_expr(mod, ex, prefix_len=0) if isnothing(head) TODO(ex, "Unhandled form for kind $k") end - Expr(head, map(e->to_lowered_expr(mod, e, prefix_len), children(ex))...) + Expr(head, map(e->to_lowered_expr(mod, e, ssa_offset), children(ex))...) end end diff --git a/JuliaLowering/src/kinds.jl b/JuliaLowering/src/kinds.jl index cfd5530ea4373..6f290a70e58bb 100644 --- a/JuliaLowering/src/kinds.jl +++ b/JuliaLowering/src/kinds.jl @@ -59,6 +59,17 @@ function _register_kinds() "top" "core" "lambda" + # [K"function_decl" name] + # Declare a zero-method generic function with global `name` or + # creates a closure object and binds it to the local `name`. + "function_decl" + # [K"function_type name] + # Evaluates to the type of the function or closure with given `name` + "function_type" + # [K"method_defs" name block] + # The code in `block` defines methods for generic function `name` + "method_defs" + # The enclosed statements must be executed at top level "toplevel_butfirst" "const_if_global" "moved_local" @@ -89,6 +100,9 @@ function _register_kinds() "pop_exception" # Lowering targets for method definitions arising from `function` etc "method" + # (re-)initialize a slot to undef + # See Core.NewvarNode + "newvar" # Result of lowering a `K"lambda"` after bindings have been # converted to slot/globalref/SSAValue. "code_info" diff --git a/JuliaLowering/src/linear_ir.jl b/JuliaLowering/src/linear_ir.jl index ab3d0d4836014..0f9ec39420ce3 100644 --- a/JuliaLowering/src/linear_ir.jl +++ b/JuliaLowering/src/linear_ir.jl @@ -791,6 +791,14 @@ function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) elseif needs_value ex end + elseif k == K"newvar" + @assert !needs_value + is_duplicate = !isempty(ctx.code) && + (e = last(ctx.code); kind(e) == K"newvar" && e[1].var_id == ex[1].var_id) + if !is_duplicate + # TODO: also exclude deleted vars + emit(ctx, ex) + end else throw(LoweringError(ex, "Invalid syntax; $(repr(k))")) end diff --git a/JuliaLowering/src/scope_analysis.jl b/JuliaLowering/src/scope_analysis.jl index 7188cfed7b83e..ff0765a8fe545 100644 --- a/JuliaLowering/src/scope_analysis.jl +++ b/JuliaLowering/src/scope_analysis.jl @@ -1,40 +1,4 @@ -# Lowering pass 3: analyze scopes (passes 2+3 in flisp code) - -#------------------------------------------------------------------------------- -# AST traversal functions - useful for performing non-recursive AST traversals -# function _schedule_traverse(stack, e) -# push!(stack, e) -# return nothing -# end -# function _schedule_traverse(stack, es::Union{Tuple,AbstractVector,Base.Generator}) -# append!(stack, es) -# return nothing -# end -# -# function traverse_ast(f, exs) -# todo = SyntaxList(first(exs).graph) -# append!(todo, exs) -# while !isempty(todo) -# f(pop!(todo), e->_schedule_traverse(todo, e)) -# end -# end -# -# function traverse_ast(f, ex::SyntaxTree) -# traverse_ast(f, (ex,)) -# end -# -# function find_in_ast(f, ex::SyntaxTree) -# todo = SyntaxList(ex._graph) -# push!(todo, ex) -# while !isempty(todo) -# e1 = pop!(todo) -# res = f(e1, e->_schedule_traverse(todo, e)) -# if !isnothing(res) -# return res -# end -# end -# return nothing -# end +# Lowering pass 3: scope and variable analysis """ Key to use when transforming names into bindings @@ -44,6 +8,22 @@ struct NameKey layer::LayerId end +function Base.isless(a::NameKey, b::NameKey) + (a.name, a.layer) < (b.name, b.layer) +end + +# Identifiers produced by lowering will have the following layer by default. +# +# To make new mutable variables without colliding names, lowering can +# - generate new var_id's directly (like the gensyms used by the old system) +# - create additional layers, though this may be unnecessary +const _lowering_internal_layer = -1 + +function NameKey(ex::SyntaxTree) + @chk kind(ex) == K"Identifier" + NameKey(ex.name_val, get(ex, :scope_layer, _lowering_internal_layer)) +end + #------------------------------------------------------------------------------- _insert_if_not_present!(dict, key, val) = get!(dict, key, val) @@ -66,13 +46,16 @@ function _find_scope_vars!(assignments, locals, destructured_args, globals, used end elseif k == K"global" _insert_if_not_present!(globals, NameKey(ex[1]), ex) - # elseif k == K"method" TODO static parameters elseif k == K"=" v = decl_var(ex[1]) if !(kind(v) in KSet"BindingId globalref Placeholder") _insert_if_not_present!(assignments, NameKey(v), v) end _find_scope_vars!(assignments, locals, destructured_args, globals, used_names, used_bindings, alias_bindings, ex[2]) + elseif k == K"function_decl" + v = ex[1] + @assert kind(v) == K"Identifier" + _insert_if_not_present!(assignments, NameKey(v), v) else for e in children(ex) _find_scope_vars!(assignments, locals, destructured_args, globals, used_names, used_bindings, alias_bindings, e) @@ -107,22 +90,6 @@ function find_scope_vars(ex) return assignments, locals, destructured_args, globals, used_names, used_bindings, alias_bindings end -function Base.isless(a::NameKey, b::NameKey) - (a.name, a.layer) < (b.name, b.layer) -end - -# Identifiers produced by lowering will have the following layer by default. -# -# To make new mutable variables without colliding names, lowering can -# - generate new var_id's directly (like the gensyms used by the old system) -# - create additional layers, though this may be unnecessary -const _lowering_internal_layer = -1 - -function NameKey(ex::SyntaxTree) - @chk kind(ex) == K"Identifier" - NameKey(ex.name_val, get(ex, :scope_layer, _lowering_internal_layer)) -end - # Metadata about how a binding is used within some enclosing lambda struct LambdaBindingInfo is_captured::Bool @@ -148,9 +115,12 @@ end struct LambdaBindings # Bindings used within the lambda + self::IdTag bindings::Dict{IdTag,LambdaBindingInfo} end +LambdaBindings(self::IdTag = 0) = LambdaBindings(self, Dict{IdTag,LambdaBindings}()) + function init_lambda_binding(binds::LambdaBindings, id; kws...) @assert !haskey(binds.bindings, id) binds.bindings[id] = LambdaBindingInfo(LambdaBindingInfo(); kws...) @@ -165,8 +135,11 @@ function update_lambda_binding!(ctx::AbstractLoweringContext, id; kws...) update_lambda_binding!(last(ctx.scope_stack).lambda_bindings, id; kws...) end -LambdaBindings() = LambdaBindings(Dict{IdTag,LambdaBindings}()) +struct ClosureBindings + lambdas::Vector{LambdaBindings} +end +ClosureBindings() = ClosureBindings(Vector{LambdaBindings}()) struct ScopeInfo # True if scope is the global top level scope @@ -196,9 +169,12 @@ struct ScopeResolutionContext{GraphType} <: AbstractLoweringContext alias_map::Dict{IdTag,IdTag} # Stack of name=>id mappings for each scope, innermost scope last. scope_stack::Vector{ScopeInfo} + method_def_stack::SyntaxList{GraphType} # Variables which were implicitly global due to being assigned to in top # level code implicit_toplevel_globals::Set{NameKey} + # + closure_bindings::Dict{IdTag,ClosureBindings} end function ScopeResolutionContext(ctx) @@ -210,7 +186,9 @@ function ScopeResolutionContext(ctx) Dict{NameKey,IdTag}(), Dict{IdTag,IdTag}(), Vector{ScopeInfo}(), - Set{NameKey}()) + SyntaxList(graph), + Set{NameKey}(), + Dict{IdTag,ClosureBindings}()) end function lookup_var(ctx, varkey::NameKey, exclude_toplevel_globals=false) @@ -340,7 +318,7 @@ function analyze_scope(ctx, ex, scope_type, is_toplevel_global_scope=false, # a macro expansion for (varkey,e) in assignments vk = haskey(var_ids, varkey) ? - lookup_binding(ctx, var_ids[varkey]).kind : + lookup_binding(ctx, var_ids[varkey]).kind : var_kind(ctx, varkey, true) if vk === nothing if ctx.scope_layers[varkey.layer].is_macro_expansion @@ -399,7 +377,18 @@ function analyze_scope(ctx, ex, scope_type, is_toplevel_global_scope=false, # enclosing lambda # * All non-globals are recorded (kind :local and :argument will later be turned into slots) # * Captured variables are detected and recorded - lambda_bindings = is_outer_lambda_scope ? LambdaBindings() : parentscope.lambda_bindings + lambda_bindings = if is_outer_lambda_scope + if isempty(lambda_args) + LambdaBindings() + else + selfarg = first(lambda_args) + selfid = kind(selfarg) == K"BindingId" ? + selfarg.var_id : var_ids[NameKey(selfarg)] + LambdaBindings(selfid) + end + else + parentscope.lambda_bindings + end for id in values(var_ids) binfo = lookup_binding(ctx, id) @@ -537,7 +526,21 @@ function _resolve_scopes(ctx, ex::SyntaxTree) ret_var = numchildren(ex) == 4 ? _resolve_scopes(ctx, ex[4]) : nothing pop!(ctx.scope_stack) - @ast ctx ex [K"lambda"(lambda_bindings=scope.lambda_bindings, + lambda_bindings = scope.lambda_bindings + if !is_toplevel_thunk + func_name = last(ctx.method_def_stack) + if kind(func_name) == K"BindingId" + func_name_id = func_name.var_id + if lookup_binding(ctx, func_name_id).kind == :local + cbinds = get!(ctx.closure_bindings, func_name_id) do + ClosureBindings() + end + push!(cbinds.lambdas, lambda_bindings) + end + end + end + + @ast ctx ex [K"lambda"(lambda_bindings=lambda_bindings, is_toplevel_thunk=is_toplevel_thunk) arg_bindings sparm_bindings @@ -627,6 +630,11 @@ function _resolve_scopes(ctx, ex::SyntaxTree) else makeleaf(ctx, ex, K"TOMBSTONE") end + elseif k == K"method_defs" + push!(ctx.method_def_stack, _resolve_scopes(ctx, ex[1])) + ex_mapped = mapchildren(e->_resolve_scopes(ctx, e), ctx, ex) + pop!(ctx.method_def_stack) + ex_mapped else ex_mapped = mapchildren(e->_resolve_scopes(ctx, e), ctx, ex) maybe_update_bindings!(ctx, ex_mapped) @@ -658,12 +666,8 @@ Names of kind `K"Identifier"` are transformed into binding identifiers of kind `K"BindingId"`. The associated `Bindings` table in the context records metadata about each binding. -This pass also records the set of binding IDs are locals within the enclosing -lambda form. - -TODO: This pass should also record information about variables used by closure -conversion, find which variables are assigned or captured, and record variable -type declarations. +This pass also records the set of binding IDs used locally within the +enclosing lambda form and information about variables captured by closures. """ function resolve_scopes(ctx::DesugaringContext, ex) ctx2 = ScopeResolutionContext(ctx) From 990ce773d10ca0564b41703d17b8cf1e0f8b3daf Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Wed, 18 Dec 2024 21:59:51 +1000 Subject: [PATCH 0909/1109] Conversion of assignments to closure captures Convert captured variables to `Box`es for both read and write, both inside and outside the closure. Also ensure we emit newvar nodes for variables which might be used uninitialized (currently pessimistic). --- JuliaLowering/src/ast.jl | 20 ++- JuliaLowering/src/closure_conversion.jl | 169 ++++++++++++++---------- JuliaLowering/src/scope_analysis.jl | 37 +++++- JuliaLowering/src/utils.jl | 3 + JuliaLowering/test/closures_ir.jl | 89 +++++++++++++ 5 files changed, 236 insertions(+), 82 deletions(-) create mode 100644 JuliaLowering/test/closures_ir.jl diff --git a/JuliaLowering/src/ast.jl b/JuliaLowering/src/ast.jl index 098c425c84165..9d6459117f663 100644 --- a/JuliaLowering/src/ast.jl +++ b/JuliaLowering/src/ast.jl @@ -63,6 +63,7 @@ struct BindingInfo type::Union{Nothing,SyntaxTree} # Type, for bindings declared like x::T = 10 is_const::Bool # Constant, cannot be reassigned is_ssa::Bool # Single assignment, defined before use + is_captured::Bool # Variable is captured by some lambda is_always_defined::Bool # A local that we know has an assignment that dominates all usages (is never undef) is_internal::Bool # True for internal bindings generated by the compiler is_ambiguous_local::Bool # Local, but would be global in soft scope (ie, the REPL) @@ -74,11 +75,12 @@ function BindingInfo(name::AbstractString, kind::Symbol; type::Union{Nothing,SyntaxTree} = nothing, is_const::Bool = false, is_ssa::Bool = false, + is_captured::Bool = false, is_always_defined::Bool = is_ssa, is_internal::Bool = false, is_ambiguous_local::Bool = false, is_nospecialize::Bool = false) - BindingInfo(name, kind, mod, type, is_const, is_ssa, is_always_defined, + BindingInfo(name, kind, mod, type, is_const, is_ssa, is_captured, is_always_defined, is_internal, is_ambiguous_local, is_nospecialize) end @@ -113,7 +115,7 @@ function _binding_id(ex::SyntaxTree) end function update_binding!(bindings::Bindings, x; - type=nothing, is_const=nothing, is_always_defined=nothing) + type=nothing, is_const=nothing, is_always_defined=nothing, is_captured=nothing) id = _binding_id(x) b = lookup_binding(bindings, id) bindings.info[id] = BindingInfo( @@ -123,6 +125,7 @@ function update_binding!(bindings::Bindings, x; isnothing(type) ? b.type : type, isnothing(is_const) ? b.is_const : is_const, b.is_ssa, + isnothing(is_captured) ? b.captured : is_captured, isnothing(is_always_defined) ? b.is_always_defined : is_always_defined, b.is_internal, b.is_ambiguous_local, @@ -224,6 +227,8 @@ function makeleaf(ctx, srcref, k::Kind, value; kws...) makeleaf(graph, srcref, k; id=value, kws...) elseif k == K"symbolic_label" makeleaf(graph, srcref, k; name_val=value, kws...) + elseif k == K"TOMBSTONE" + makeleaf(graph, srcref, k; kws...) else val = k == K"Integer" ? convert(Int, value) : k == K"Float" ? convert(Float64, value) : @@ -342,8 +347,15 @@ end function _expand_ast_tree(ctx, srcref, tree) if Meta.isexpr(tree, :(::)) # Leaf node - _match_kind(srcref, tree.args[2]) do kind, srcref, kws - :(makeleaf($ctx, $srcref, $kind, $(esc(tree.args[1])), $(kws...))) + if length(tree.args) == 2 + val = esc(tree.args[1]) + kindspec = tree.args[2] + else + val = nothing + kindspec = tree.args[1] + end + _match_kind(srcref, kindspec) do kind, srcref, kws + :(makeleaf($ctx, $srcref, $kind, $(val), $(kws...))) end elseif Meta.isexpr(tree, :call) && tree.args[1] === :(=>) # Leaf node with copied attributes diff --git a/JuliaLowering/src/closure_conversion.jl b/JuliaLowering/src/closure_conversion.jl index e058ab72362a3..713618a6566d7 100644 --- a/JuliaLowering/src/closure_conversion.jl +++ b/JuliaLowering/src/closure_conversion.jl @@ -31,6 +31,49 @@ function add_lambda_local!(ctx::ClosureConversionCtx, id) init_lambda_binding(ctx.lambda_bindings, id) end +# Access captured variable from inside a closure +function captured_var_access(ctx, ex) + cinfo = ctx.closure_info + field_sym = cinfo.field_syms[cinfo.field_name_inds[ex.var_id]] + @ast ctx ex [K"call" + "getfield"::K"core" + # FIXME: attributing the self binding to srcref=ex gives misleading printing. + # We should carry provenance with each binding to fix this. + ctx.lambda_bindings.self::K"BindingId" + field_sym + ] +end + +function get_box_contents(ctx::ClosureConversionCtx, var, box_ex) + undef_var = new_mutable_var(ctx, var, lookup_binding(ctx, var.var_id).name) + @ast ctx var [K"block" + box := box_ex + # Lower in an UndefVar check to a similarly named variable + # (ref #20016) so that closure lowering Box introduction + # doesn't impact the error message and the compiler is expected + # to fold away the extraneous null check + # + # TODO: Ideally the runtime would rely on provenance info for + # this error and we can remove isdefined check. + [K"if" [K"call" + "isdefined"::K"core" + box + "contents"::K"Symbol" + ] + ::K"TOMBSTONE" + [K"block" + [K"newvar" undef_var] + undef_var + ] + ] + [K"call" + "getfield"::K"core" + box + "contents"::K"Symbol" + ] + ] +end + # Convert `ex` to `type` by calling `convert(type, ex)` when necessary. # # Used for converting the right hand side of an assignment to a typed local or @@ -118,37 +161,32 @@ function convert_assignment(ctx, ex) if binfo.kind == :global convert_global_assignment(ctx, ex, var, rhs0) else - closed = false # TODO - captured = false # TODO - @assert binfo.kind == :local - if isnothing(binfo.type) && !closed && !captured + @assert binfo.kind == :local || binfo.kind == :argument + lbinfo = get(ctx.lambda_bindings.bindings, var.var_id, nothing) + self_captured = !isnothing(lbinfo) && lbinfo.is_captured + captured = binfo.is_captured + if isnothing(binfo.type) && !self_captured && !captured @ast ctx ex [K"=" var rhs0] else - @assert binfo.kind == :local # Typed local - tmp_rhs0 = is_simple_atom(ctx, rhs0) ? nothing : ssavar(ctx, rhs0) - rhs1 = isnothing(tmp_rhs0) ? rhs0 : tmp_rhs0 - rhs = isnothing(binfo.type) ? rhs1 : - convert_for_type_decl(ctx, ex, rhs1, _convert_closures(ctx, binfo.type), true) - assgn = if closed - @assert false # TODO - elseif captured - @assert false # TODO - else - @ast ctx ex [K"=" var rhs] - end - if isnothing(tmp_rhs0) - @ast ctx ex [K"block" - assgn - rhs0 + tmp_rhs0 = ssavar(ctx, rhs0) + rhs = isnothing(binfo.type) ? tmp_rhs0 : + convert_for_type_decl(ctx, ex, tmp_rhs0, _convert_closures(ctx, binfo.type), true) + assignment = if self_captured || captured + @ast ctx ex [K"call" + "setfield!"::K"core" + self_captured ? captured_var_access(ctx, var) : var + "contents"::K"Symbol" + rhs ] else - @ast ctx ex [K"block" - [K"=" tmp_rhs0 rhs0] - assgn - tmp_rhs0 - ] + @ast ctx ex [K"=" var rhs] end + @ast ctx ex [K"block" + [K"=" tmp_rhs0 rhs0] + assignment + tmp_rhs0 + ] end end end @@ -237,40 +275,10 @@ function _convert_closures(ctx::ClosureConversionCtx, ex) if k == K"BindingId" id = ex.var_id lbinfo = get(ctx.lambda_bindings.bindings, id, nothing) - if !isnothing(lbinfo) && lbinfo.is_captured - cinfo = ctx.closure_info - field_sym = cinfo.field_syms[cinfo.field_name_inds[id]] - undef_var = new_mutable_var(ctx, ex, lookup_binding(ctx, id).name) - @ast ctx ex [K"block" - box := [K"call" - "getfield"::K"core" - ctx.lambda_bindings.self::K"BindingId" - field_sym - ] - # Lower in an UndefVar check to a similarly named variable - # (ref #20016) so that closure lowering Box introduction - # doesn't impact the error message and the compiler is expected - # to fold away the extraneous null check - # - # TODO: Ideally the runtime would rely on provenance info for - # this error and we can remove isdefined check. - [K"if" [K"call" - "isdefined"::K"core" - box - "contents"::K"Symbol" - ] - "nothing"::K"core" - [K"block" - [K"newvar" undef_var] - undef_var - ] - ] - [K"call" - "getfield"::K"core" - box - "contents"::K"Symbol" - ] - ] + if !isnothing(lbinfo) && lbinfo.is_captured # TODO: && vinfo:asgn cv ?? + get_box_contents(ctx, ex, captured_var_access(ctx, ex)) + elseif lookup_binding(ctx, id).is_captured # TODO: && vinfo:asgn vi + get_box_contents(ctx, ex, ex) else ex end @@ -295,6 +303,16 @@ function _convert_closures(ctx::ClosureConversionCtx, ex) _convert_closures(ctx, ex[2]) ] end + elseif k == K"local" + var = ex[1] + binfo = lookup_binding(ctx, var) + if binfo.is_captured + @ast ctx ex [K"=" var [K"call" "Box"::K"core"]] + elseif !binfo.is_always_defined + @ast ctx ex [K"newvar" var] + else + makeleaf(ctx, ex, K"TOMBSTONE") + end elseif k == K"::" _convert_closures(ctx, @ast ctx ex [K"call" @@ -322,10 +340,7 @@ function _convert_closures(ctx::ClosureConversionCtx, ex) ctx.closure_infos[func_name_id] = closure_info init_closure_args = SyntaxList(ctx) for id in field_orig_bindings - # FIXME: This isn't actually correct: we need to convert - # all outer references to boxes too! - push!(init_closure_args, _convert_closures(ctx, - @ast ctx ex [K"call" "Box"::K"core" id::K"BindingId"])) + push!(init_closure_args, @ast ctx ex id::K"BindingId") end @ast ctx ex [K"block" [K"=" func_name @@ -380,18 +395,30 @@ function closure_convert_lambda(ctx, ex) @assert kind(ex) == K"lambda" body_stmts = SyntaxList(ctx) toplevel_stmts = ex.is_toplevel_thunk ? body_stmts : ctx.toplevel_stmts + lambda_bindings = ex.lambda_bindings ctx2 = ClosureConversionCtx(ctx.graph, ctx.bindings, ctx.mod, - ctx.closure_bindings, ctx.closure_info, ex.lambda_bindings, + ctx.closure_bindings, ctx.closure_info, lambda_bindings, toplevel_stmts, ctx.closure_infos) lambda_children = SyntaxList(ctx) - push!(lambda_children, _convert_closures(ctx2, ex[1])) - push!(lambda_children, _convert_closures(ctx2, ex[2])) + args = ex[1] + push!(lambda_children, args) + push!(lambda_children, ex[2]) - # Convert body. This is done as a special case to allow inner calls to - # _convert_closures to also add to body_stmts in the case that - # ex.is_toplevel_thunk is true. - in_body_stmts = kind(ex[3]) != K"block" ? ex[3:3] : ex[3][1:end] - for e in in_body_stmts + # Add box initializations for arguments which are captured by an inner lambda + for arg in children(args) + kind(arg) != K"Placeholder" || continue + binfo = lookup_binding(ctx, arg) + if binfo.is_captured # TODO: && binfo.is_assigned + push!(body_stmts, @ast ctx arg [K"=" + arg + [K"call" "Box"::K"core" arg] + ]) + end + end + # Convert body. Note that _convert_closures may call `push!(body_stmts, e)` + # internally for any expressions `e` which need to be moved to top level. + input_body_stmts = kind(ex[3]) != K"block" ? ex[3:3] : ex[3][1:end] + for e in input_body_stmts push!(body_stmts, _convert_closures(ctx2, e)) end push!(lambda_children, @ast ctx2 ex[3] [K"block" body_stmts...]) diff --git a/JuliaLowering/src/scope_analysis.jl b/JuliaLowering/src/scope_analysis.jl index ff0765a8fe545..c0ddb4318500a 100644 --- a/JuliaLowering/src/scope_analysis.jl +++ b/JuliaLowering/src/scope_analysis.jl @@ -64,9 +64,9 @@ function _find_scope_vars!(assignments, locals, destructured_args, globals, used end # Find names of all identifiers used in the given expression, grouping them -# into sets by type. +# into sets by type of usage. # -# NB: This only works propery after desugaring has already processed assignments +# NB: This only works propery after desugaring function find_scope_vars(ex) ExT = typeof(ex) assignments = Dict{NameKey,ExT}() @@ -422,6 +422,7 @@ function analyze_scope(ctx, ex, scope_type, is_toplevel_global_scope=false, if !haskey(lambda_bindings.bindings, id) # Used vars from a scope *outside* the current lambda are captured init_lambda_binding(lambda_bindings, id, is_captured=true, is_read=true) + update_binding!(ctx, id; is_captured=true) else update_lambda_binding!(lambda_bindings, id, is_read=true) end @@ -437,6 +438,7 @@ function analyze_scope(ctx, ex, scope_type, is_toplevel_global_scope=false, if !haskey(lambda_bindings.bindings, id) # Assigned vars from a scope *outside* the current lambda are captured init_lambda_binding(lambda_bindings, id, is_captured=true, is_assigned=true) + update_binding!(ctx, id; is_captured=true) else update_lambda_binding!(lambda_bindings, id, is_assigned=true) end @@ -457,6 +459,17 @@ function analyze_scope(ctx, ex, scope_type, is_toplevel_global_scope=false, is_hard_scope, var_ids, lambda_bindings) end +function add_local_decls!(ctx, stmts, srcref, scope) + # Add local decls to start of block so that closure conversion can + # initialize if necessary. + for id in values(scope.var_ids) + binfo = lookup_binding(ctx, id) + if binfo.kind == :local + push!(stmts, @ast ctx srcref [K"local" id::K"BindingId"]) + end + end +end + # Do some things which are better done after converting to BindingId. function maybe_update_bindings!(ctx, ex) k = kind(ex) @@ -522,12 +535,20 @@ function _resolve_scopes(ctx, ex::SyntaxTree) push!(ctx.scope_stack, scope) arg_bindings = _resolve_scopes(ctx, ex[1]) sparm_bindings = _resolve_scopes(ctx, ex[2]) + body_stmts = SyntaxList(ctx) + add_local_decls!(ctx, body_stmts, ex, scope) body = _resolve_scopes(ctx, ex[3]) + if kind(body) == K"block" + append!(body_stmts, children(body)) + else + push!(body_stmts, body) + end ret_var = numchildren(ex) == 4 ? _resolve_scopes(ctx, ex[4]) : nothing pop!(ctx.scope_stack) lambda_bindings = scope.lambda_bindings if !is_toplevel_thunk + # Record all lambdas for the same closure type in one place func_name = last(ctx.method_def_stack) if kind(func_name) == K"BindingId" func_name_id = func_name.var_id @@ -544,19 +565,21 @@ function _resolve_scopes(ctx, ex::SyntaxTree) is_toplevel_thunk=is_toplevel_thunk) arg_bindings sparm_bindings - body + [K"block" + body_stmts... + ] ret_var ] elseif k == K"scope_block" scope = analyze_scope(ctx, ex, ex.scope_type) push!(ctx.scope_stack, scope) - body = SyntaxList(ctx) + stmts = SyntaxList(ctx) + add_local_decls!(ctx, stmts, ex, scope) for e in children(ex) - push!(body, _resolve_scopes(ctx, e)) + push!(stmts, _resolve_scopes(ctx, e)) end - body pop!(ctx.scope_stack) - @ast ctx ex [K"block" body...] + @ast ctx ex [K"block" stmts...] elseif k == K"extension" etype = extension_type(ex) if etype == "islocal" diff --git a/JuliaLowering/src/utils.jl b/JuliaLowering/src/utils.jl index a7cdb666b88e4..0259bf51381a2 100644 --- a/JuliaLowering/src/utils.jl +++ b/JuliaLowering/src/utils.jl @@ -78,6 +78,9 @@ function print_ir(io::IO, ex, indent="") println(io, indent, lno, " --- method ", string(e[1]), " ", string(e[2])) @assert kind(e[3]) == K"lambda" || kind(e[3]) == K"code_info" print_ir(io, e[3], indent*" ") + elseif kind(e) == K"code_info" && e.is_toplevel_thunk + println(io, indent, lno, " --- thunk") + print_ir(io, e, indent*" ") else code = string(e) println(io, indent, lno, " ", code) diff --git a/JuliaLowering/test/closures_ir.jl b/JuliaLowering/test/closures_ir.jl new file mode 100644 index 0000000000000..5973bc6a9b8c6 --- /dev/null +++ b/JuliaLowering/test/closures_ir.jl @@ -0,0 +1,89 @@ +######################################## +# Simple closure +let + x = 1 + function f(y) + x + y + end +end +#--------------------- +1 (newvar slot₁) +2 (= slot₂ (call core.Box)) +3 --- thunk + 1 (global TestMod.##closure#277) + 2 (call core.svec) + 3 (call core.svec :x) + 4 (call core.svec) + 5 (call core._structtype TestMod :##closure#277 %₂ %₃ %₄ false 1) + 6 (call core._setsuper! %₅ core.Function) + 7 (const TestMod.##closure#277) + 8 (= TestMod.##closure#277 %₅) + 9 (call core.svec core.Box) + 10 (call core._typebody! %₅ %₉) + 11 (return core.nothing) +4 TestMod.##closure#277 +5 (call core.svec %₄ core.Any) +6 (call core.svec) +7 (call core.svec %₅ %₆ :($(QuoteNode(:(#= line 3 =#))))) +8 --- method core.nothing %₇ + 1 TestMod.+ + 2 (call core.getfield slot₁/x :x) + 3 (call core.isdefined %₂ :contents) + 4 (gotoifnot %₃ label₆) + 5 (goto label₈) + 6 (newvar slot₃/x) + 7 slot₃/x + 8 (call core.getfield %₂ :contents) + 9 (call %₁ %₈ slot₂/y) + 10 (return %₉) +9 1 +10 slot₂/x +11 (call core.setfield! %₁₀ :contents %₉) +12 TestMod.##closure#277 +13 slot₂/f +14 (= slot₁/f (new %₁₂ %₁₃)) +15 slot₁/f +16 slot₁/f +17 (return %₁₆) + +######################################## +# Closure which sets the value of a captured variable +let + x = 1 + function f(y) + x = 2 + end +end +#--------------------- +1 (newvar slot₁) +2 (= slot₂ (call core.Box)) +3 --- thunk + 1 (global TestMod.##closure#278) + 2 (call core.svec) + 3 (call core.svec :x) + 4 (call core.svec) + 5 (call core._structtype TestMod :##closure#278 %₂ %₃ %₄ false 1) + 6 (call core._setsuper! %₅ core.Function) + 7 (const TestMod.##closure#278) + 8 (= TestMod.##closure#278 %₅) + 9 (call core.svec core.Box) + 10 (call core._typebody! %₅ %₉) + 11 (return core.nothing) +4 TestMod.##closure#278 +5 (call core.svec %₄ core.Any) +6 (call core.svec) +7 (call core.svec %₅ %₆ :($(QuoteNode(:(#= line 3 =#))))) +8 --- method core.nothing %₇ + 1 2 + 2 (call core.getfield slot₁/x :x) + 3 (call core.setfield! %₂ :contents %₁) + 4 (return %₁) +9 1 +10 slot₂/x +11 (call core.setfield! %₁₀ :contents %₉) +12 TestMod.##closure#278 +13 slot₂/f +14 (= slot₁/f (new %₁₂ %₁₃)) +15 slot₁/f +16 slot₁/f +17 (return %₁₆) From d88ae8853fa3ab50c36f2881c87cbed99377a972 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Fri, 20 Dec 2024 12:43:01 +1000 Subject: [PATCH 0910/1109] Fix closure type naming to use list of nested functions This greatly improves the naming stability over using a global gensym(), allowing our closure tests to be stable. This follow the naming scheme from https://github.com/JuliaLang/julia/pull/53719 with a fixes for closures nested in non-function scopes. --- JuliaLowering/src/closure_conversion.jl | 23 +++-- JuliaLowering/src/runtime.jl | 18 +++- JuliaLowering/src/scope_analysis.jl | 16 +++- JuliaLowering/test/closures_ir.jl | 111 ++++++++++++++++++++---- 4 files changed, 141 insertions(+), 27 deletions(-) diff --git a/JuliaLowering/src/closure_conversion.jl b/JuliaLowering/src/closure_conversion.jl index 713618a6566d7..fff24ae41a697 100644 --- a/JuliaLowering/src/closure_conversion.jl +++ b/JuliaLowering/src/closure_conversion.jl @@ -54,7 +54,7 @@ function get_box_contents(ctx::ClosureConversionCtx, var, box_ex) # to fold away the extraneous null check # # TODO: Ideally the runtime would rely on provenance info for - # this error and we can remove isdefined check. + # this error and we can remove the isdefined check. [K"if" [K"call" "isdefined"::K"core" box @@ -230,8 +230,21 @@ function closure_type_fields(ctx, srcref, closure_binds) return field_syms, field_orig_bindings, field_name_inds end +function closure_name(mod, name_stack) + basename = "#$(join(name_stack, "#"))##" + i = 0 + while true + name = "$basename$i" + if reserve_module_binding(mod, Symbol(name)) + return name + end + i += 1 + end +end + # Return a thunk which creates a new type for a closure with `field_syms` named -# fields. The new type will be named `name_str`, which must be unique. +# fields. The new type will be named `name_str` which must be an unassigned +# name in the module. function type_for_closure(ctx::ClosureConversionCtx, srcref, name_str, field_syms) # New closure types always belong to the module we're expanding into - they # need to be serialized there during precompile. @@ -329,10 +342,10 @@ function _convert_closures(ctx::ClosureConversionCtx, ex) closure_info = get(ctx.closure_infos, func_name_id, nothing) needs_def = isnothing(closure_info) if needs_def - # TODO: Names for closures without relying on gensym - name_str = string(gensym("closure")) + closure_binds = ctx.closure_bindings[func_name_id] field_syms, field_orig_bindings, field_name_inds = - closure_type_fields(ctx, ex, ctx.closure_bindings[func_name_id]) + closure_type_fields(ctx, ex, closure_binds) + name_str = closure_name(ctx.mod, closure_binds.name_stack) closure_type_def, closure_type = type_for_closure(ctx, ex, name_str, field_syms) push!(ctx.toplevel_stmts, closure_type_def) diff --git a/JuliaLowering/src/runtime.jl b/JuliaLowering/src/runtime.jl index 0d819034f9c43..db926db33892d 100644 --- a/JuliaLowering/src/runtime.jl +++ b/JuliaLowering/src/runtime.jl @@ -221,8 +221,8 @@ end # Get the binding for `name` if one is already resolved in module `mod`. Note # that we cannot use `isdefined(::Module, ::Symbol)` here, because that causes # binding resolution which is a massive side effect we must avoid in lowering. -function _get_module_binding(mod, name) - b = @ccall jl_get_module_binding(mod::Module, name::Symbol, 0::Cint)::Ptr{Core.Binding} +function _get_module_binding(mod, name; create=false) + b = @ccall jl_get_module_binding(mod::Module, name::Symbol, create::Cint)::Ptr{Core.Binding} b == C_NULL ? nothing : unsafe_pointer_to_objref(b) end @@ -245,6 +245,20 @@ function is_defined_nothrow_global(mod, name) isdefined(b.owner, :value) end +# "Reserve" a binding: create the binding if it doesn't exist but do not assign +# to it. +function reserve_module_binding(mod, name) + # TODO: Fix the race condition here: We should really hold the Module's + # binding lock during this test-and-set type operation. But the binding + # lock is only accessible from C. See also the C code in + # `fl_module_unique_name`. + if _get_module_binding(mod, name; create=false) === nothing + _get_module_binding(mod, name; create=true) !== nothing + else + return false + end +end + #------------------------------------------------------------------------------- # The following are versions of macros from Base which act as "standard syntax # extensions" with special semantics known to lowering. diff --git a/JuliaLowering/src/scope_analysis.jl b/JuliaLowering/src/scope_analysis.jl index c0ddb4318500a..3fedc1dbc2c2b 100644 --- a/JuliaLowering/src/scope_analysis.jl +++ b/JuliaLowering/src/scope_analysis.jl @@ -136,10 +136,11 @@ function update_lambda_binding!(ctx::AbstractLoweringContext, id; kws...) end struct ClosureBindings - lambdas::Vector{LambdaBindings} + name_stack::Vector{String} # Names of functions the closure is nested within + lambdas::Vector{LambdaBindings} # Bindings for each method of the closure end -ClosureBindings() = ClosureBindings(Vector{LambdaBindings}()) +ClosureBindings(name_stack) = ClosureBindings(name_stack, Vector{LambdaBindings}()) struct ScopeInfo # True if scope is the global top level scope @@ -173,7 +174,8 @@ struct ScopeResolutionContext{GraphType} <: AbstractLoweringContext # Variables which were implicitly global due to being assigned to in top # level code implicit_toplevel_globals::Set{NameKey} - # + # Collection of information about each closure, principally which methods + # are part of the closure (and hence captures). closure_bindings::Dict{IdTag,ClosureBindings} end @@ -554,7 +556,13 @@ function _resolve_scopes(ctx, ex::SyntaxTree) func_name_id = func_name.var_id if lookup_binding(ctx, func_name_id).kind == :local cbinds = get!(ctx.closure_bindings, func_name_id) do - ClosureBindings() + name_stack = Vector{String}() + for fname in ctx.method_def_stack + if kind(fname) == K"BindingId" + push!(name_stack, lookup_binding(ctx, fname).name) + end + end + ClosureBindings(name_stack) end push!(cbinds.lambdas, lambda_bindings) end diff --git a/JuliaLowering/test/closures_ir.jl b/JuliaLowering/test/closures_ir.jl index 5973bc6a9b8c6..d31293bcc78c6 100644 --- a/JuliaLowering/test/closures_ir.jl +++ b/JuliaLowering/test/closures_ir.jl @@ -7,21 +7,21 @@ let end end #--------------------- -1 (newvar slot₁) -2 (= slot₂ (call core.Box)) +1 (= slot₂ (call core.Box)) +2 (newvar slot₁) 3 --- thunk - 1 (global TestMod.##closure#277) + 1 (global TestMod.#f##0) 2 (call core.svec) 3 (call core.svec :x) 4 (call core.svec) - 5 (call core._structtype TestMod :##closure#277 %₂ %₃ %₄ false 1) + 5 (call core._structtype TestMod :#f##0 %₂ %₃ %₄ false 1) 6 (call core._setsuper! %₅ core.Function) - 7 (const TestMod.##closure#277) - 8 (= TestMod.##closure#277 %₅) + 7 (const TestMod.#f##0) + 8 (= TestMod.#f##0 %₅) 9 (call core.svec core.Box) 10 (call core._typebody! %₅ %₉) 11 (return core.nothing) -4 TestMod.##closure#277 +4 TestMod.#f##0 5 (call core.svec %₄ core.Any) 6 (call core.svec) 7 (call core.svec %₅ %₆ :($(QuoteNode(:(#= line 3 =#))))) @@ -39,7 +39,7 @@ end 9 1 10 slot₂/x 11 (call core.setfield! %₁₀ :contents %₉) -12 TestMod.##closure#277 +12 TestMod.#f##0 13 slot₂/f 14 (= slot₁/f (new %₁₂ %₁₃)) 15 slot₁/f @@ -55,21 +55,21 @@ let end end #--------------------- -1 (newvar slot₁) -2 (= slot₂ (call core.Box)) +1 (= slot₂ (call core.Box)) +2 (newvar slot₁) 3 --- thunk - 1 (global TestMod.##closure#278) + 1 (global TestMod.#f##1) 2 (call core.svec) 3 (call core.svec :x) 4 (call core.svec) - 5 (call core._structtype TestMod :##closure#278 %₂ %₃ %₄ false 1) + 5 (call core._structtype TestMod :#f##1 %₂ %₃ %₄ false 1) 6 (call core._setsuper! %₅ core.Function) - 7 (const TestMod.##closure#278) - 8 (= TestMod.##closure#278 %₅) + 7 (const TestMod.#f##1) + 8 (= TestMod.#f##1 %₅) 9 (call core.svec core.Box) 10 (call core._typebody! %₅ %₉) 11 (return core.nothing) -4 TestMod.##closure#278 +4 TestMod.#f##1 5 (call core.svec %₄ core.Any) 6 (call core.svec) 7 (call core.svec %₅ %₆ :($(QuoteNode(:(#= line 3 =#))))) @@ -81,9 +81,88 @@ end 9 1 10 slot₂/x 11 (call core.setfield! %₁₀ :contents %₉) -12 TestMod.##closure#278 +12 TestMod.#f##1 13 slot₂/f 14 (= slot₁/f (new %₁₂ %₁₃)) 15 slot₁/f 16 slot₁/f 17 (return %₁₆) + +######################################## +# Function where arguments are captured into a closure +function f(x) + function g() + x = 10 + end + g() + x +end +#--------------------- +1 (method TestMod.f) +2 --- thunk + 1 (global TestMod.#f#g##0) + 2 (call core.svec) + 3 (call core.svec :x) + 4 (call core.svec) + 5 (call core._structtype TestMod :#f#g##0 %₂ %₃ %₄ false 1) + 6 (call core._setsuper! %₅ core.Function) + 7 (const TestMod.#f#g##0) + 8 (= TestMod.#f#g##0 %₅) + 9 (call core.svec core.Box) + 10 (call core._typebody! %₅ %₉) + 11 (return core.nothing) +3 TestMod.#f#g##0 +4 (call core.svec %₃) +5 (call core.svec) +6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 2 =#))))) +7 --- method core.nothing %₆ + 1 10 + 2 (call core.getfield slot₁/x :x) + 3 (call core.setfield! %₂ :contents %₁) + 4 (return %₁) +8 TestMod.f +9 (call core.Typeof %₈) +10 (call core.svec %₉ core.Any) +11 (call core.svec) +12 (call core.svec %₁₀ %₁₁ :($(QuoteNode(:(#= line 1 =#))))) +13 --- method core.nothing %₁₂ + 1 (= slot₂/x (call core.Box slot₂/x)) + 2 slot₂/x + 3 (call core.isdefined %₂ :contents) + 4 (gotoifnot %₃ label₆) + 5 (goto label₈) + 6 (newvar slot₅/x) + 7 slot₅/x + 8 (call core.getfield %₂ :contents) + 9 (call core.Box %₈) + 10 (call core.setfield! slot₂/x :contents %₉) + 11 (newvar slot₃) + 12 TestMod.#f#g##0 + 13 slot₂/g + 14 (call core.isdefined %₁₃ :contents) + 15 (gotoifnot %₁₄ label₁₇) + 16 (goto label₁₉) + 17 (newvar slot₆/x) + 18 slot₆/x + 19 (call core.getfield %₁₃ :contents) + 20 (= slot₃/g (new %₁₂ %₁₉)) + 21 slot₃/g + 22 slot₃/g + 23 slot₃/g + 24 (call %₂₃) + 25 slot₂/x + 26 (call core.isdefined %₂₅ :contents) + 27 (gotoifnot %₂₆ label₂₉) + 28 (goto label₃₁) + 29 (newvar slot₇/x) + 30 slot₇/x + 31 (call core.getfield %₂₅ :contents) + 32 (call core.isdefined %₃₁ :contents) + 33 (gotoifnot %₃₂ label₃₅) + 34 (goto label₃₇) + 35 (newvar slot₄/x) + 36 slot₄/x + 37 (call core.getfield %₃₁ :contents) + 38 (return %₃₇) +14 (return %₁₂) + From 0726c50f6049d7db4a0f9983d1936e6deebd60df Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sat, 21 Dec 2024 07:40:10 +1000 Subject: [PATCH 0911/1109] Minor restructure: Early return in expand_function_def --- JuliaLowering/src/desugaring.jl | 306 ++++++++++++++++---------------- 1 file changed, 153 insertions(+), 153 deletions(-) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 9be05d2b14f2b..78d29e4c10fe4 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -1431,176 +1431,176 @@ function expand_function_def(ctx, ex, docs, rewrite_call=identity, rewrite_body= name = name[1] end - if kind(name) == K"call" - callex = rewrite_call(name) - # TODO - # dotop names - # overlays - static_parameters = SyntaxList(ctx) - - # Add self argument where necessary - args = callex[2:end] - name = callex[1] - - arg_names = SyntaxList(ctx) - arg_types = SyntaxList(ctx) - body_stmts = SyntaxList(ctx) - first_default = 0 - arg_defaults = SyntaxList(ctx) - for (i,arg) in enumerate(args) - info = match_function_arg(arg) - aname = !isnothing(info.name) ? info.name : @ast ctx arg "_"::K"Placeholder" - if kind(aname) == K"tuple" - # Argument destructuring - is_nospecialize = getmeta(arg, :nospecialize, false) - n = new_mutable_var(ctx, aname, "destructured_arg_$i"; - kind=:argument, is_nospecialize=is_nospecialize) - push!(body_stmts, @ast ctx aname [ - K"local"(meta=CompileHints(:is_destructured_arg, true)) - [K"=" aname n] - ]) - aname = n - end - push!(arg_names, aname) - atype = !isnothing(info.type) ? info.type : @ast ctx arg "Any"::K"core" - if info.is_slurp - if i != length(args) - throw(LoweringError(arg, "`...` may only be used for the last function argument")) - end - atype = @ast ctx arg [K"curly" "Vararg"::K"core" atype] + if kind(name) == K"tuple" + TODO(name, "Anon function lowering") + elseif kind(name) != K"call" + throw(LoweringError(name, "Bad function definition")) + end + + callex = rewrite_call(name) + # TODO + # dotop names + # overlays + static_parameters = SyntaxList(ctx) + + # Add self argument where necessary + args = callex[2:end] + name = callex[1] + + arg_names = SyntaxList(ctx) + arg_types = SyntaxList(ctx) + body_stmts = SyntaxList(ctx) + first_default = 0 + arg_defaults = SyntaxList(ctx) + for (i,arg) in enumerate(args) + info = match_function_arg(arg) + aname = !isnothing(info.name) ? info.name : @ast ctx arg "_"::K"Placeholder" + if kind(aname) == K"tuple" + # Argument destructuring + is_nospecialize = getmeta(arg, :nospecialize, false) + n = new_mutable_var(ctx, aname, "destructured_arg_$i"; + kind=:argument, is_nospecialize=is_nospecialize) + push!(body_stmts, @ast ctx aname [ + K"local"(meta=CompileHints(:is_destructured_arg, true)) + [K"=" aname n] + ]) + aname = n + end + push!(arg_names, aname) + atype = !isnothing(info.type) ? info.type : @ast ctx arg "Any"::K"core" + if info.is_slurp + if i != length(args) + throw(LoweringError(arg, "`...` may only be used for the last function argument")) end - if isnothing(info.default) - if !isempty(arg_defaults) && !info.is_slurp - # TODO: Referring to multiple pieces of syntax in one error message is necessary. - # TODO: Poison ASTs with error nodes and continue rather than immediately throwing. - # - # We should make something like the following kind of thing work! - # arg_defaults[1] = @ast_error ctx arg_defaults[1] """ - # Positional arguments with defaults must occur at the end. - # - # We found a [non-optional position argument]($arg) *after* - # one with a [default value]($(first(arg_defaults))) - # """ - # - throw(LoweringError(args[first_default], "optional positional arguments must occur at end")) - end - else - if isempty(arg_defaults) - first_default = i - end - push!(arg_defaults, info.default) + atype = @ast ctx arg [K"curly" "Vararg"::K"core" atype] + end + if isnothing(info.default) + if !isempty(arg_defaults) && !info.is_slurp + # TODO: Referring to multiple pieces of syntax in one error message is necessary. + # TODO: Poison ASTs with error nodes and continue rather than immediately throwing. + # + # We should make something like the following kind of thing work! + # arg_defaults[1] = @ast_error ctx arg_defaults[1] """ + # Positional arguments with defaults must occur at the end. + # + # We found a [non-optional position argument]($arg) *after* + # one with a [default value]($(first(arg_defaults))) + # """ + # + throw(LoweringError(args[first_default], "optional positional arguments must occur at end")) end - # TODO: Ideally, ensure side effects of evaluating arg_types only - # happen once - we should create an ssavar if there's any following - # defaults. (flisp lowering doesn't ensure this either) - push!(arg_types, atype) - end - - bare_func_name = nothing - doc_obj = nothing - farg_name = nothing - if kind(name) == K"::" - # Add methods to an existing type - if numchildren(name) == 1 - # function (::T)() ... - farg_type = name[1] - else - # function (f::T)() ... - @chk numchildren(name) == 2 - farg_name = name[1] - farg_type = name[2] - end - doc_obj = farg_type else - if !is_valid_name(name) - throw(LoweringError(name, "Invalid function name")) - end - if is_identifier_like(name) - # Add methods to a global `Function` object, or local closure - # type function f() ... - bare_func_name = name - else - # Add methods to an existing Function - # function A.B.f() ... + if isempty(arg_defaults) + first_default = i end - doc_obj = name # todo: can closures be documented? - farg_type = @ast ctx name [K"function_type" name] - end - # Add self argument - if isnothing(farg_name) - farg_name = new_mutable_var(ctx, name, "#self#"; kind=:argument) + push!(arg_defaults, info.default) end - pushfirst!(arg_names, farg_name) - pushfirst!(arg_types, farg_type) + # TODO: Ideally, ensure side effects of evaluating arg_types only + # happen once - we should create an ssavar if there's any following + # defaults. (flisp lowering doesn't ensure this either) + push!(arg_types, atype) + end - if !isnothing(return_type) - ret_var = ssavar(ctx, return_type, "return_type") - push!(body_stmts, @ast ctx return_type [K"=" ret_var return_type]) + bare_func_name = nothing + doc_obj = nothing + farg_name = nothing + if kind(name) == K"::" + # Add methods to an existing type + if numchildren(name) == 1 + # function (::T)() ... + farg_type = name[1] else - ret_var = nothing + # function (f::T)() ... + @chk numchildren(name) == 2 + farg_name = name[1] + farg_type = name[2] end - - body = rewrite_body(ex[2]) - if !isempty(body_stmts) - body = @ast ctx body [ - K"block" - body_stmts... - body - ] + doc_obj = farg_type + else + if !is_valid_name(name) + throw(LoweringError(name, "Invalid function name")) + end + if is_identifier_like(name) + # Add methods to a global `Function` object, or local closure + # type function f() ... + bare_func_name = name + else + # Add methods to an existing Function + # function A.B.f() ... end + doc_obj = name # todo: can closures be documented? + farg_type = @ast ctx name [K"function_type" name] + end + # Add self argument + if isnothing(farg_name) + farg_name = new_mutable_var(ctx, name, "#self#"; kind=:argument) + end + pushfirst!(arg_names, farg_name) + pushfirst!(arg_types, farg_type) - method_table_val = nothing # TODO: method overlays - method_table = isnothing(method_table_val) ? - @ast(ctx, callex, "nothing"::K"core") : - ssavar(ctx, ex, "method_table") - method_stmts = SyntaxList(ctx) + if !isnothing(return_type) + ret_var = ssavar(ctx, return_type, "return_type") + push!(body_stmts, @ast ctx return_type [K"=" ret_var return_type]) + else + ret_var = nothing + end - if !isempty(arg_defaults) - # For self argument added above - first_default += 1 - _optional_positional_defs!(ctx, method_stmts, ex, callex, - method_table, typevar_names, typevar_stmts, - arg_names, arg_types, first_default, arg_defaults, ret_var) - end + body = rewrite_body(ex[2]) + if !isempty(body_stmts) + body = @ast ctx body [ + K"block" + body_stmts... + body + ] + end - # The method with all non-default arguments - push!(method_stmts, - _method_def_expr(ctx, ex, callex, method_table, docs, - typevar_names, arg_names, arg_types, ret_var, body)) - if !isnothing(docs) - method_stmts[end] = @ast ctx docs [K"block" - method_metadata := method_stmts[end] - @ast ctx docs [K"call" - bind_docs!::K"Value" - doc_obj - docs[1] - method_metadata - ] + method_table_val = nothing # TODO: method overlays + method_table = isnothing(method_table_val) ? + @ast(ctx, callex, "nothing"::K"core") : + ssavar(ctx, ex, "method_table") + method_stmts = SyntaxList(ctx) + + if !isempty(arg_defaults) + # For self argument added above + first_default += 1 + _optional_positional_defs!(ctx, method_stmts, ex, callex, + method_table, typevar_names, typevar_stmts, + arg_names, arg_types, first_default, arg_defaults, ret_var) + end + + # The method with all non-default arguments + push!(method_stmts, + _method_def_expr(ctx, ex, callex, method_table, docs, + typevar_names, arg_names, arg_types, ret_var, body)) + if !isnothing(docs) + method_stmts[end] = @ast ctx docs [K"block" + method_metadata := method_stmts[end] + @ast ctx docs [K"call" + bind_docs!::K"Value" + doc_obj + docs[1] + method_metadata ] - end + ] + end - @ast ctx ex [K"block" - if !isnothing(bare_func_name) - [K"function_decl"(bare_func_name) bare_func_name] - end - [K"scope_block"(scope_type=:hard) - [K"method_defs" - isnothing(bare_func_name) ? "nothing"::K"core" : bare_func_name - [K"block" - typevar_stmts... - if !isnothing(method_table_val) - [K"=" method_table method_table_val] - end - method_stmts... - ] + @ast ctx ex [K"block" + if !isnothing(bare_func_name) + [K"function_decl"(bare_func_name) bare_func_name] + end + [K"scope_block"(scope_type=:hard) + [K"method_defs" + isnothing(bare_func_name) ? "nothing"::K"core" : bare_func_name + [K"block" + typevar_stmts... + if !isnothing(method_table_val) + [K"=" method_table method_table_val] + end + method_stmts... ] ] ] - elseif kind(name) == K"tuple" - TODO(name, "Anon function lowering") - else - throw(LoweringError(name, "Bad function definition")) - end + ] end function _make_macro_name(ctx, ex) From 25cf0ca70727d7beb4423380b8730fc0073c9a30 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sun, 22 Dec 2024 11:30:08 +1000 Subject: [PATCH 0912/1109] Add `TRAILING_COMMA_FLAG` to distinguish `(a,b)` vs `(a,b,)` (JuliaLang/JuliaSyntax.jl#521) This syntax flag allows the stylistic choice of adding trailing commas to be easily detected. For example, `f(x)` vs `f(x,)` and `(a,b)` vs `(a,b,)`. --- JuliaSyntax/docs/src/api.md | 1 + JuliaSyntax/src/parse_stream.jl | 26 ++++++++++----- JuliaSyntax/src/parser.jl | 56 ++++++++++++++++++++------------- JuliaSyntax/test/parser.jl | 29 +++++++++++------ 4 files changed, 73 insertions(+), 39 deletions(-) diff --git a/JuliaSyntax/docs/src/api.md b/JuliaSyntax/docs/src/api.md index d79c86a343104..5dfbec6e4fcc3 100644 --- a/JuliaSyntax/docs/src/api.md +++ b/JuliaSyntax/docs/src/api.md @@ -115,6 +115,7 @@ JuliaSyntax.has_flags JuliaSyntax.TRIPLE_STRING_FLAG JuliaSyntax.RAW_STRING_FLAG JuliaSyntax.PARENS_FLAG +JuliaSyntax.TRAILING_COMMA_FLAG JuliaSyntax.COLON_QUOTE JuliaSyntax.TOPLEVEL_SEMICOLONS_FLAG JuliaSyntax.MUTABLE_FLAG diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index 5b04b42c71b9a..02c0307e9c032 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -39,6 +39,13 @@ Set for K"tuple", K"block" or K"macrocall" which are delimited by parentheses """ const PARENS_FLAG = RawFlags(1<<5) +""" +Set for various delimited constructs when they contains a trailing comma. For +example, to distinguish `(a,b,)` vs `(a,b)`, and `f(a)` vs `f(a,)`. Kinds where +this applies are: `tuple call dotcall macrocall vect curly braces <: >:`. +""" +const TRAILING_COMMA_FLAG = RawFlags(1<<6) + """ Set for K"quote" for the short form `:x` as opposed to long form `quote x end` """ @@ -139,22 +146,27 @@ function untokenize(head::SyntaxHead; unique=true, include_flag_suff=true) is_prefix_op_call(head) && (str = str*"-pre") is_postfix_op_call(head) && (str = str*"-post") - if kind(head) in KSet"string cmdstring Identifier" + k = kind(head) + if k in KSet"string cmdstring Identifier" has_flags(head, TRIPLE_STRING_FLAG) && (str = str*"-s") has_flags(head, RAW_STRING_FLAG) && (str = str*"-r") - elseif kind(head) in KSet"tuple block macrocall" + elseif k in KSet"tuple block macrocall" has_flags(head, PARENS_FLAG) && (str = str*"-p") - elseif kind(head) == K"quote" + elseif k == K"quote" has_flags(head, COLON_QUOTE) && (str = str*"-:") - elseif kind(head) == K"toplevel" + elseif k == K"toplevel" has_flags(head, TOPLEVEL_SEMICOLONS_FLAG) && (str = str*"-;") - elseif kind(head) == K"function" + elseif k == K"function" has_flags(head, SHORT_FORM_FUNCTION_FLAG) && (str = str*"-=") - elseif kind(head) == K"struct" + elseif k == K"struct" has_flags(head, MUTABLE_FLAG) && (str = str*"-mut") - elseif kind(head) == K"module" + elseif k == K"module" has_flags(head, BARE_MODULE_FLAG) && (str = str*"-bare") end + if k in KSet"tuple call dotcall macrocall vect curly braces <: >:" && + has_flags(head, TRAILING_COMMA_FLAG) + str *= "-," + end is_suffixed(head) && (str = str*"-suf") n = numeric_flags(head) n != 0 && (str = str*"-"*string(n)) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 0cd65f7aa33e1..bb08134d83186 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -1302,10 +1302,10 @@ function parse_unary(ps::ParseState) # +(a,b)(x)^2 ==> (call-i (call (call + a b) x) ^ 2) if is_type_operator(op_t) # <:(a,) ==> (<: a) - emit(ps, mark, op_k) + emit(ps, mark, op_k, opts.delim_flags) reset_node!(ps, op_pos, flags=TRIVIA_FLAG) else - emit(ps, mark, K"call") + emit(ps, mark, K"call", opts.delim_flags) end parse_call_chain(ps, mark) parse_factor_with_initial_ex(ps, mark) @@ -1552,13 +1552,14 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) # f (a) ==> (call f (error-t) a) bump_disallowed_space(ps) bump(ps, TRIVIA_FLAG) - parse_call_arglist(ps, K")") + opts = parse_call_arglist(ps, K")") if peek(ps) == K"do" # f(x) do y body end ==> (call f x (do (tuple y) (block body))) parse_do(ps) end emit(ps, mark, is_macrocall ? K"macrocall" : K"call", - is_macrocall ? PARENS_FLAG : EMPTY_FLAGS) + # TODO: Add PARENS_FLAG to all calls which use them? + (is_macrocall ? PARENS_FLAG : EMPTY_FLAGS)|opts.delim_flags) if is_macrocall # @x(a, b) ==> (macrocall-p @x a b) # A.@x(y) ==> (macrocall-p (. A @x) y) @@ -1634,8 +1635,8 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) # f. (x) ==> (dotcall f (error-t) x) bump_disallowed_space(ps) bump(ps, TRIVIA_FLAG) - parse_call_arglist(ps, K")") - emit(ps, mark, K"dotcall") + opts = parse_call_arglist(ps, K")") + emit(ps, mark, K"dotcall", opts.delim_flags) elseif k == K":" # A.:+ ==> (. A (quote-: +)) # A.: + ==> (. A (error-t) (quote-: +)) @@ -1697,20 +1698,20 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) # S {a} ==> (curly S (error-t) a) bump_disallowed_space(ps) bump(ps, TRIVIA_FLAG) - parse_call_arglist(ps, K"}") + opts = parse_call_arglist(ps, K"}") if is_macrocall # @S{a,b} ==> (macrocall S (braces a b)) # A.@S{a} ==> (macrocall (. A @S) (braces a)) # @S{a}.b ==> (. (macrocall @S (braces a)) b) fix_macro_name_kind!(ps, macro_name_position) - emit(ps, m, K"braces") + emit(ps, m, K"braces", opts.delim_flags) emit(ps, mark, K"macrocall") min_supported_version(v"1.6", ps, mark, "macro call without space before `{}`") is_macrocall = false macro_atname_range = nothing else # S{a,b} ==> (curly S a b) - emit(ps, mark, K"curly") + emit(ps, mark, K"curly", opts.delim_flags) end elseif k in KSet" \" \"\"\" ` ``` " && !preceding_whitespace(t) && maybe_strmac && @@ -2151,7 +2152,7 @@ function parse_function_signature(ps::ParseState, is_function::Bool) # function (f(x),) end ==> (function (tuple-p (call f x)) (block)) ambiguous_parens = opts.maybe_grouping_parens && peek_behind(ps).kind in KSet"macrocall $" - emit(ps, mark, K"tuple", PARENS_FLAG) + emit(ps, mark, K"tuple", PARENS_FLAG|opts.delim_flags) if ambiguous_parens # Got something like `(@f(x))`. Is it anon `(@f(x),)` or named sig `@f(x)` ?? emit(ps, mark, K"error", error="Ambiguous signature. Add a trailing comma if this is a 1-argument anonymous function; remove parentheses if this is a macro call acting as function signature.") @@ -2716,16 +2717,21 @@ end # surrounding brackets. # # flisp: parse-vect -function parse_vect(ps::ParseState, closer) +function parse_vect(ps::ParseState, closer, prefix_trailing_comma) # [x, y] ==> (vect x y) # [x, y] ==> (vect x y) # [x,y ; z] ==> (vect x y (parameters z)) # [x=1, y=2] ==> (vect (= x 1) (= y 2)) # [x=1, ; y=2] ==> (vect (= x 1) (parameters (= y 2))) - parse_brackets(ps, closer) do _, _, _, _ - return (needs_parameters=true,) + opts = parse_brackets(ps, closer) do _, _, _, num_subexprs + return (needs_parameters=true, + num_subexprs=num_subexprs) + end + delim_flags = opts.delim_flags + if opts.num_subexprs == 0 && prefix_trailing_comma + delim_flags |= TRAILING_COMMA_FLAG end - return (K"vect", EMPTY_FLAGS) + return (K"vect", delim_flags) end # Parse generators @@ -2988,7 +2994,7 @@ function parse_cat(ps::ParseState, closer, end_is_symbol) mark = position(ps) if k == closer # [] ==> (vect) - return parse_vect(ps, closer) + return parse_vect(ps, closer, false) elseif k == K";" #v1.8: [;] ==> (ncat-1) #v1.8: [;;] ==> (ncat-2) @@ -3003,14 +3009,15 @@ function parse_cat(ps::ParseState, closer, end_is_symbol) parse_eq_star(ps) k = peek(ps, skip_newlines=true) if k == K"," || (is_closing_token(ps, k) && k != K";") - if k == K"," + prefix_trailing_comma = k == K"," + if prefix_trailing_comma # [x,] ==> (vect x) bump(ps, TRIVIA_FLAG; skip_newlines = true) end # [x] ==> (vect x) # [x \n ] ==> (vect x) # [x ==> (vect x (error-t)) - parse_vect(ps, closer) + parse_vect(ps, closer, prefix_trailing_comma) elseif k == K"for" # [x for a in as] ==> (comprehension (generator x (iteration (in a as)))) # [x \n\n for a in as] ==> (comprehension (generator x (iteration (in a as)))) @@ -3087,7 +3094,7 @@ function parse_paren(ps::ParseState, check_identifiers=true) # (; a=1; b=2) ==> (tuple-p (parameters (= a 1)) (parameters (= b 2))) # (a; b; c,d) ==> (tuple-p a (parameters b) (parameters c d)) # (a=1, b=2; c=3) ==> (tuple-p (= a 1) (= b 2) (parameters (= c 3))) - emit(ps, mark, K"tuple", PARENS_FLAG) + emit(ps, mark, K"tuple", PARENS_FLAG|opts.delim_flags) elseif opts.is_block # Blocks # (;;) ==> (block-p) @@ -3135,6 +3142,7 @@ function parse_brackets(after_parse::Function, had_commas = false had_splat = false param_start = nothing + trailing_comma = false while true k = peek(ps) if k == closing_kind @@ -3150,11 +3158,13 @@ function parse_brackets(after_parse::Function, bump(ps, TRIVIA_FLAG) bump_trivia(ps) elseif is_closing_token(ps, k) + trailing_comma = false # Error; handled below in bump_closing_token break else mark = position(ps) parse_eq_star(ps) + trailing_comma = false num_subexprs += 1 if num_subexprs == 1 had_splat = peek_behind(ps).kind == K"..." @@ -3172,6 +3182,7 @@ function parse_brackets(after_parse::Function, if k == K"," had_commas = true bump(ps, TRIVIA_FLAG) + trailing_comma = true elseif k == K";" || k == closing_kind # Handled above continue @@ -3193,7 +3204,7 @@ function parse_brackets(after_parse::Function, end release_positions(ps.stream, params_positions) bump_closing_token(ps, closing_kind, " or `,`") - return opts + return (; opts..., delim_flags=trailing_comma ? TRAILING_COMMA_FLAG : EMPTY_FLAGS) end _is_indentation(b::UInt8) = (b == u8" " || b == u8"\t") @@ -3420,14 +3431,15 @@ end function emit_braces(ps, mark, ckind, cflags) if ckind == K"hcat" # {x y} ==> (bracescat (row x y)) - emit(ps, mark, K"row", cflags) + emit(ps, mark, K"row", cflags & ~TRAILING_COMMA_FLAG) elseif ckind == K"ncat" # {x ;;; y} ==> (bracescat (nrow-3 x y)) - emit(ps, mark, K"nrow", cflags) + emit(ps, mark, K"nrow", cflags & ~TRAILING_COMMA_FLAG) end check_ncat_compat(ps, mark, ckind) outk = ckind in KSet"vect comprehension" ? K"braces" : K"bracescat" - emit(ps, mark, outk) + delim_flags = outk == K"braces" ? (cflags & TRAILING_COMMA_FLAG) : EMPTY_FLAGS + emit(ps, mark, outk, delim_flags) end # parse numbers, identifiers, parenthesized expressions, lists, vectors, etc. diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index e6115ad474c40..e36ba73e8a889 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -234,9 +234,10 @@ tests = [ ".*(x)" => "(call (. *) x)" # Prefix function calls for operators which are both binary and unary "+(a,b)" => "(call + a b)" - ".+(a,)" => "(call (. +) a)" + "+(a,)" => "(call-, + a)" + ".+(a,)" => "(call-, (. +) a)" "(.+)(a)" => "(call (parens (. +)) a)" - "+(a=1,)" => "(call + (= a 1))" + "+(a=1,)" => "(call-, + (= a 1))" "+(a...)" => "(call + (... a))" "+(a;b,c)" => "(call + a (parameters b c))" "+(;a)" => "(call + (parameters a))" @@ -251,7 +252,7 @@ tests = [ # Prefix calls have higher precedence than ^ "+(a,b)^2" => "(call-i (call + a b) ^ 2)" "+(a,b)(x)^2" => "(call-i (call (call + a b) x) ^ 2)" - "<:(a,)" => "(<: a)" + "<:(a,)" => "(<:-, a)" # Unary function calls with brackets as grouping, not an arglist ".+(a)" => "(dotcall-pre + (parens a))" "+(a;b)" => "(call-pre + (block-p a b))" @@ -306,6 +307,7 @@ tests = [ # Really for parse_where "x where \n {T}" => "(where x (braces T))" "x where {T,S}" => "(where x (braces T S))" + "x where {T,S,}" => "(where x (braces-, T S))" "x where {T S}" => "(where x (bracescat (row T S)))" "x where {y for y in ys}" => "(where x (braces (generator y (iteration (in y ys)))))" "x where T" => "(where x T)" @@ -364,11 +366,13 @@ tests = [ # calls with brackets "f(a,b)" => "(call f a b)" + "f(a,)" => "(call-, f a)" "f(a=1; b=2)" => "(call f (= a 1) (parameters (= b 2)))" "f(a; b; c)" => "(call f a (parameters b) (parameters c))" "(a=1)()" => "(call (parens (= a 1)))" "f (a)" => "(call f (error-t) a)" "@x(a, b)" => "(macrocall-p @x a b)" + "@x(a, b,)" => "(macrocall-p-, @x a b)" "A.@x(y)" => "(macrocall-p (. A @x) y)" "A.@x(y).z" => "(. (macrocall-p (. A @x) y) z)" "f(y for x = xs; a)" => "(call f (generator y (iteration (in x xs))) (parameters a))" @@ -407,6 +411,7 @@ tests = [ "A.@B.x" => "(macrocall (. (. A B) (error-t) @x))" "@M.(x)" => "(macrocall (dotcall @M (error-t) x))" "f.(a,b)" => "(dotcall f a b)" + "f.(a,b,)" => "(dotcall-, f a b)" "f.(a=1; b=2)" => "(dotcall f (= a 1) (parameters (= b 2)))" "(a=1).()" => "(dotcall (parens (= a 1)))" "f. (x)" => "(dotcall f (error-t) x)" @@ -577,9 +582,10 @@ tests = [ "macro (\$f)() end" => "(macro (call (parens (\$ f))) (block))" "function (x) body end"=> "(function (tuple-p x) (block body))" "function (x,y) end" => "(function (tuple-p x y) (block))" + "function (x,y,) end" => "(function (tuple-p-, x y) (block))" "function (x=1) end" => "(function (tuple-p (= x 1)) (block))" "function (;x=1) end" => "(function (tuple-p (parameters (= x 1))) (block))" - "function (f(x),) end" => "(function (tuple-p (call f x)) (block))" + "function (f(x),) end" => "(function (tuple-p-, (call f x)) (block))" "function (@f(x);) end" => "(function (tuple-p (macrocall-p @f x) (parameters)) (block))" "function (@f(x)...) end" => "(function (tuple-p (... (macrocall-p @f x))) (block))" "function (@f(x)) end" => "(function (error (tuple-p (macrocall-p @f x))) (block))" @@ -715,7 +721,7 @@ tests = [ JuliaSyntax.parse_paren => [ # Tuple syntax with commas "()" => "(tuple-p)" - "(x,)" => "(tuple-p x)" + "(x,)" => "(tuple-p-, x)" "(x,y)" => "(tuple-p x y)" "(x=1, y=2)" => "(tuple-p (= x 1) (= y 2))" # Named tuples with initial semicolon @@ -827,11 +833,12 @@ tests = [ "=" => "(error =)" # parse_cat "[]" => "(vect)" - "[x,]" => "(vect x)" - "[x\n,,]" => "(vect x (error-t ✘))" + "[x,]" => "(vect-, x)" + "[x,y,]" => "(vect-, x y)" + "[x\n,,]" => "(vect-, x (error-t ✘))" "[x]" => "(vect x)" "[x \n ]" => "(vect x)" - "[x \n, ]" => "(vect x)" + "[x \n, ]" => "(vect-, x)" "[x" => "(vect x (error-t))" "[x \n\n ]" => "(vect x)" "[x for a in as]" => "(comprehension (generator x (iteration (in a as))))" @@ -849,10 +856,10 @@ tests = [ "(x for a in as if z)" => "(parens (generator x (filter (iteration (in a as)) z)))" # parse_vect "[x, y]" => "(vect x y)" - "[x, y]" => "(vect x y)" + "[x, y,]" => "(vect-, x y)" "[x,\n y]" => "(vect x y)" "[x\n, y]" => "(vect x y)" - "[x\n,, y]" => "(vect x (error-t ✘ y))" + "[x\n,, y]" => "(vect-, x (error-t ✘ y))" "[x,y ; z]" => "(vect x y (parameters z))" "[x=1, y=2]" => "(vect (= x 1) (= y 2))" "[x=1, ; y=2]" => "(vect (= x 1) (parameters (= y 2)))" @@ -862,6 +869,8 @@ tests = [ ":(::\n)" => "(quote-: (parens ::))" "(function f \n end)" => "(parens (function f))" # braces + "{x,y}" => "(braces x y)" + "{x,y,}" => "(braces-, x y)" "{x y}" => "(bracescat (row x y))" ((v=v"1.7",), "{x ;;; y}") => "(bracescat (nrow-3 x y))" # Macro names can be keywords From 39a5b3567bff72646f676aa321fc5d7e2432c33d Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sun, 22 Dec 2024 15:31:01 +1000 Subject: [PATCH 0913/1109] Desugaring of anonymous closures * Cleanup expand_function_def to have more logical code extracting the self type and name. * Initially desugar anonymous functions to use `K"Placeholder"` for the function name, then expand this name to an ssa var early on so that closure conversion can tie together the closure instantiation in function_decl with the return of method_defs. * `->` generates ssavars labelled with `->` in the printed IR. `function` keyword anon functions uses `#anon#`. * While we're at it, ensure we emit an error for dotop-named functions. Note *_ir.jl test cases are not yet fixed in most cases because we still have excess newvar nodes. --- JuliaLowering/src/ast.jl | 19 +-- JuliaLowering/src/closure_conversion.jl | 9 +- JuliaLowering/src/desugaring.jl | 201 ++++++++++++++++-------- JuliaLowering/src/scope_analysis.jl | 29 ++-- JuliaLowering/test/closures_ir.jl | 58 +++++++ JuliaLowering/test/functions_ir.jl | 43 ++++- JuliaLowering/test/utils.jl | 4 +- 7 files changed, 256 insertions(+), 107 deletions(-) diff --git a/JuliaLowering/src/ast.jl b/JuliaLowering/src/ast.jl index 9d6459117f663..f98b7b3b9250c 100644 --- a/JuliaLowering/src/ast.jl +++ b/JuliaLowering/src/ast.jl @@ -125,7 +125,7 @@ function update_binding!(bindings::Bindings, x; isnothing(type) ? b.type : type, isnothing(is_const) ? b.is_const : is_const, b.is_ssa, - isnothing(is_captured) ? b.captured : is_captured, + isnothing(is_captured) ? b.is_captured : is_captured, isnothing(is_always_defined) ? b.is_always_defined : is_always_defined, b.is_internal, b.is_ambiguous_local, @@ -274,7 +274,7 @@ end # Create a new local mutable variable or lambda argument # (TODO: rename this?) function new_mutable_var(ctx::AbstractLoweringContext, srcref, name; kind=:local, kws...) - @assert kind == :local || kind == :argument + @assert kind === :local || kind === :argument id = new_binding(ctx.bindings, BindingInfo(name, kind; is_internal=true, kws...)) nameref = makeleaf(ctx, srcref, K"Identifier", name_val=name) var = makeleaf(ctx, nameref, K"BindingId", var_id=id) @@ -670,21 +670,6 @@ function any_assignment(exs) any(kind(e) == K"=" for e in exs) end -# Check valid identifier/function names -function is_valid_name(ex) - k = kind(ex) - if k == K"Identifier" - name = ex.name_val - elseif k == K"var" - name = ex[1].name_val - elseif k == K"." && kind(ex[2]) == K"Symbol" - name = ex[2].name_val - else - return false - end - return name != "ccall" && name != "cglobal" -end - function is_valid_modref(ex) return kind(ex) == K"." && kind(ex[2]) == K"Symbol" && (kind(ex[1]) == K"Identifier" || is_valid_modref(ex[1])) diff --git a/JuliaLowering/src/closure_conversion.jl b/JuliaLowering/src/closure_conversion.jl index fff24ae41a697..e1e283340af20 100644 --- a/JuliaLowering/src/closure_conversion.jl +++ b/JuliaLowering/src/closure_conversion.jl @@ -374,15 +374,14 @@ function _convert_closures(ctx::ClosureConversionCtx, ex) end elseif k == K"function_type" func_name = ex[1] - @assert kind(func_name) == K"BindingId" - if lookup_binding(ctx, func_name.var_id).kind == :global - @ast ctx ex [K"call" "Typeof"::K"core" func_name] - else + if kind(func_name) == K"BindingId" && lookup_binding(ctx, func_name).kind == :local ctx.closure_infos[func_name.var_id].type_name + else + @ast ctx ex [K"call" "Typeof"::K"core" func_name] end elseif k == K"method_defs" name = ex[1] - is_closure = kind(name) == K"BindingId" && lookup_binding(ctx, name).kind == :local + is_closure = kind(name) == K"BindingId" && lookup_binding(ctx, name).kind === :local cinfo = is_closure ? ctx.closure_infos[name.var_id] : nothing ctx2 = ClosureConversionCtx(ctx.graph, ctx.bindings, ctx.mod, ctx.closure_bindings, cinfo, ctx.lambda_bindings, diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 78d29e4c10fe4..c70032e148ac9 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -1293,10 +1293,10 @@ function _split_wheres!(ctx, typevar_names, typevar_stmts, ex) end end -function _method_def_expr(ctx, srcref, callex, method_table, - docs, typevar_names, arg_names, arg_types, ret_var, body) - # metadata contains svec(types, sparms, location) +function method_def_expr(ctx, srcref, callex, method_table, + docs, typevar_names, arg_names, arg_types, ret_var, body) @ast ctx srcref [K"block" + # metadata contains svec(types, sparms, location) method_metadata := [K"call"(callex) "svec" ::K"core" [K"call" @@ -1360,9 +1360,10 @@ end # For example for `f(x, y=1, z=2)` we generate two additional methods # f(x) = f(x, 1, 2) # f(x, y) = f(x, y, 2) -function _optional_positional_defs!(ctx, method_stmts, srcref, callex, - method_table, typevar_names, typevar_stmts, - arg_names, arg_types, first_default, arg_defaults, ret_var) +function optional_positional_defs!(ctx, method_stmts, srcref, callex, + method_table, typevar_names, typevar_stmts, + arg_names, arg_types, first_default, + arg_defaults, ret_var) # Replace placeholder arguments with variables - we need to pass them to # the inner method for dispatch even when unused in the inner method body def_arg_names = map(arg_names) do arg @@ -1394,10 +1395,24 @@ function _optional_positional_defs!(ctx, method_stmts, srcref, callex, typevar_names, typevar_stmts) # TODO: Ensure we preserve @nospecialize metadata in args push!(method_stmts, - _method_def_expr(ctx, srcref, callex, method_table, nothing, - trimmed_typevar_names, trimmed_arg_names, trimmed_arg_types, - ret_var, body)) + method_def_expr(ctx, srcref, callex, method_table, nothing, + trimmed_typevar_names, trimmed_arg_names, trimmed_arg_types, + ret_var, body)) + end +end + +# Check valid identifier/function names +function is_valid_func_name(ex) + k = kind(ex) + if k == K"Identifier" + name = ex.name_val + elseif k == K"." && numchildren(ex) == 2 && kind(ex[2]) == K"Symbol" + # `function A.f(x,y) ...` + name = ex[2].name_val + else + return false end + return name != "ccall" && name != "cglobal" end function expand_function_def(ctx, ex, docs, rewrite_call=identity, rewrite_body=identity) @@ -1405,7 +1420,7 @@ function expand_function_def(ctx, ex, docs, rewrite_call=identity, rewrite_body= name = ex[1] if numchildren(ex) == 1 && is_identifier_like(name) # Function declaration with no methods - if !is_valid_name(name) + if !is_valid_func_name(name) throw(LoweringError(name, "Invalid function name")) end return @ast ctx ex [K"method" name=>K"Symbol"] @@ -1431,24 +1446,73 @@ function expand_function_def(ctx, ex, docs, rewrite_call=identity, rewrite_body= name = name[1] end - if kind(name) == K"tuple" - TODO(name, "Anon function lowering") - elseif kind(name) != K"call" + callex = if kind(name) == K"call" + name + elseif kind(name) == K"tuple" + # Anonymous function syntax `function (x,y) ... end` + @ast ctx name [K"call" + "#anon#"::K"Placeholder" + children(name)... + ] + else throw(LoweringError(name, "Bad function definition")) end + # Fixup for `new` constructor sigs if necessary + callex = rewrite_call(callex) - callex = rewrite_call(name) - # TODO - # dotop names - # overlays - static_parameters = SyntaxList(ctx) - - # Add self argument where necessary - args = callex[2:end] + # Construct method argument lists of names and types. + # + # First, match the "self" argument: In the method signature, each function + # gets a self argument name+type. For normal generic functions, this is a + # singleton and subtype of `Function`. But objects of any type can be made + # callable when the self argument is explicitly given using `::` syntax in + # the function name. name = callex[1] + bare_func_name = nothing + doc_obj = nothing + self_name = nothing + if kind(name) == K"::" + # Self argument is specified by user + if numchildren(name) == 1 + # function (::T)() ... + self_type = name[1] + else + # function (f::T)() ... + @chk numchildren(name) == 2 + self_name = name[1] + self_type = name[2] + end + doc_obj = self_type + else + if kind(name) == K"Placeholder" + # Anonymous function. In this case we may use an ssavar for the + # closure's value. + name = ssavar(ctx, name, name.name_val) + bare_func_name = name + elseif !is_valid_func_name(name) + throw(LoweringError(name, "Invalid function name")) + elseif is_identifier_like(name) + # Add methods to a global `Function` object, or local closure + # type function f() ... + bare_func_name = name + else + # Add methods to an existing Function + # function A.B.f() ... + end + doc_obj = name # todo: can closures be documented? + self_type = @ast ctx name [K"function_type" name] + end + # Add self argument + if isnothing(self_name) + self_name = new_mutable_var(ctx, name, "#self#"; kind=:argument) + end + # Expand remaining argument names and types arg_names = SyntaxList(ctx) arg_types = SyntaxList(ctx) + push!(arg_names, self_name) + push!(arg_types, self_type) + args = callex[2:end] body_stmts = SyntaxList(ctx) first_default = 0 arg_defaults = SyntaxList(ctx) @@ -1467,6 +1531,7 @@ function expand_function_def(ctx, ex, docs, rewrite_call=identity, rewrite_body= aname = n end push!(arg_names, aname) + atype = !isnothing(info.type) ? info.type : @ast ctx arg "Any"::K"core" if info.is_slurp if i != length(args) @@ -1497,47 +1562,12 @@ function expand_function_def(ctx, ex, docs, rewrite_call=identity, rewrite_body= end # TODO: Ideally, ensure side effects of evaluating arg_types only # happen once - we should create an ssavar if there's any following - # defaults. (flisp lowering doesn't ensure this either) + # defaults. (flisp lowering doesn't ensure this either). Beware if + # fixing this that optional_positional_defs! depends on filtering the + # *symbolic* representation of arg_types. push!(arg_types, atype) end - bare_func_name = nothing - doc_obj = nothing - farg_name = nothing - if kind(name) == K"::" - # Add methods to an existing type - if numchildren(name) == 1 - # function (::T)() ... - farg_type = name[1] - else - # function (f::T)() ... - @chk numchildren(name) == 2 - farg_name = name[1] - farg_type = name[2] - end - doc_obj = farg_type - else - if !is_valid_name(name) - throw(LoweringError(name, "Invalid function name")) - end - if is_identifier_like(name) - # Add methods to a global `Function` object, or local closure - # type function f() ... - bare_func_name = name - else - # Add methods to an existing Function - # function A.B.f() ... - end - doc_obj = name # todo: can closures be documented? - farg_type = @ast ctx name [K"function_type" name] - end - # Add self argument - if isnothing(farg_name) - farg_name = new_mutable_var(ctx, name, "#self#"; kind=:argument) - end - pushfirst!(arg_names, farg_name) - pushfirst!(arg_types, farg_type) - if !isnothing(return_type) ret_var = ssavar(ctx, return_type, "return_type") push!(body_stmts, @ast ctx return_type [K"=" ret_var return_type]) @@ -1561,17 +1591,16 @@ function expand_function_def(ctx, ex, docs, rewrite_call=identity, rewrite_body= method_stmts = SyntaxList(ctx) if !isempty(arg_defaults) - # For self argument added above - first_default += 1 - _optional_positional_defs!(ctx, method_stmts, ex, callex, - method_table, typevar_names, typevar_stmts, - arg_names, arg_types, first_default, arg_defaults, ret_var) + first_default += 1 # Offset for self argument + optional_positional_defs!(ctx, method_stmts, ex, callex, + method_table, typevar_names, typevar_stmts, + arg_names, arg_types, first_default, arg_defaults, ret_var) end # The method with all non-default arguments push!(method_stmts, - _method_def_expr(ctx, ex, callex, method_table, docs, - typevar_names, arg_names, arg_types, ret_var, body)) + method_def_expr(ctx, ex, callex, method_table, docs, + typevar_names, arg_names, arg_types, ret_var, body)) if !isnothing(docs) method_stmts[end] = @ast ctx docs [K"block" method_metadata := method_stmts[end] @@ -1603,6 +1632,36 @@ function expand_function_def(ctx, ex, docs, rewrite_call=identity, rewrite_body= ] end +function expand_arrow_arglist(ctx, arglist) + k = kind(arglist) + if k == K"where" + @ast ctx arglist [K"where" + expand_arrow_arglist(ctx, arglist[1]) + argslist[2] + ] + else + # The arglist can sometimes be parsed as a block, or something else, and + # fixing this is extremely awkward when nested inside `where`. See + # https://github.com/JuliaLang/JuliaSyntax.jl/pull/522 + if k == K"block" + @chk numchildren(arglist) == 2 + arglist = @ast ctx arglist [K"tuple" + ex[1] + [K"parameters" ex[2]] + ] + elseif k != K"tuple" + # `x::Int -> body` + arglist = @ast ctx arglist [K"tuple" + ex[1] + ] + end + @ast ctx arglist [K"call" + "->"::K"Placeholder" + children(arglist)... + ] + end +end + function _make_macro_name(ctx, ex) k = kind(ex) if k == K"Identifier" || k == K"Symbol" @@ -2615,6 +2674,13 @@ function expand_forms_2(ctx::DesugaringContext, ex::SyntaxTree, docs=nothing) sig = expand_forms_2(ctx, ex[2], ex) elseif k == K"for" expand_forms_2(ctx, expand_for(ctx, ex)) + elseif k == K"->" + expand_forms_2(ctx, + @ast ctx ex [K"function" + expand_arrow_arglist(ctx, ex[1]) + ex[2] + ] + ) elseif k == K"function" expand_forms_2(ctx, expand_function_def(ctx, ex, docs)) elseif k == K"macro" @@ -2642,9 +2708,6 @@ function expand_forms_2(ctx::DesugaringContext, ex::SyntaxTree, docs=nothing) end elseif k == K"where" expand_forms_2(ctx, expand_wheres(ctx, ex)) - elseif k == K"char" || k == K"var" - @chk numchildren(ex) == 1 - ex[1] elseif k == K"string" if numchildren(ex) == 1 && kind(ex[1]) == K"String" ex[1] diff --git a/JuliaLowering/src/scope_analysis.jl b/JuliaLowering/src/scope_analysis.jl index 3fedc1dbc2c2b..7c4a4935a05dd 100644 --- a/JuliaLowering/src/scope_analysis.jl +++ b/JuliaLowering/src/scope_analysis.jl @@ -27,7 +27,7 @@ end #------------------------------------------------------------------------------- _insert_if_not_present!(dict, key, val) = get!(dict, key, val) -function _find_scope_vars!(assignments, locals, destructured_args, globals, used_names, used_bindings, alias_bindings, ex) +function _find_scope_vars!(ctx, assignments, locals, destructured_args, globals, used_names, used_bindings, alias_bindings, ex) k = kind(ex) if k == K"Identifier" push!(used_names, NameKey(ex)) @@ -51,14 +51,22 @@ function _find_scope_vars!(assignments, locals, destructured_args, globals, used if !(kind(v) in KSet"BindingId globalref Placeholder") _insert_if_not_present!(assignments, NameKey(v), v) end - _find_scope_vars!(assignments, locals, destructured_args, globals, used_names, used_bindings, alias_bindings, ex[2]) + _find_scope_vars!(ctx, assignments, locals, destructured_args, globals, used_names, used_bindings, alias_bindings, ex[2]) elseif k == K"function_decl" v = ex[1] - @assert kind(v) == K"Identifier" - _insert_if_not_present!(assignments, NameKey(v), v) + kv = kind(v) + if kv == K"Identifier" + _insert_if_not_present!(assignments, NameKey(v), v) + elseif kv == K"BindingId" + if !lookup_binding(ctx, v).is_ssa + TODO(v, "BindingId as function name") + end + else + @assert false + end else for e in children(ex) - _find_scope_vars!(assignments, locals, destructured_args, globals, used_names, used_bindings, alias_bindings, e) + _find_scope_vars!(ctx, assignments, locals, destructured_args, globals, used_names, used_bindings, alias_bindings, e) end end end @@ -67,7 +75,7 @@ end # into sets by type of usage. # # NB: This only works propery after desugaring -function find_scope_vars(ex) +function find_scope_vars(ctx, ex) ExT = typeof(ex) assignments = Dict{NameKey,ExT}() locals = Dict{NameKey,ExT}() @@ -77,7 +85,7 @@ function find_scope_vars(ex) used_bindings = Set{IdTag}() alias_bindings = Vector{Pair{NameKey,IdTag}}() for e in children(ex) - _find_scope_vars!(assignments, locals, destructured_args, globals, used_names, used_bindings, alias_bindings, e) + _find_scope_vars!(ctx, assignments, locals, destructured_args, globals, used_names, used_bindings, alias_bindings, e) end # Sort by key so that id generation is deterministic @@ -261,7 +269,7 @@ function analyze_scope(ctx, ex, scope_type, is_toplevel_global_scope=false, (!is_outer_lambda_scope && parentscope.in_toplevel_thunk) assignments, locals, destructured_args, globals, - used, used_bindings, alias_bindings = find_scope_vars(ex) + used, used_bindings, alias_bindings = find_scope_vars(ctx, ex) # Construct a mapping from identifiers to bindings # @@ -405,7 +413,8 @@ function analyze_scope(ctx, ex, scope_type, is_toplevel_global_scope=false, # identifiers do. for id in used_bindings binfo = lookup_binding(ctx, id) - if !binfo.is_ssa && binfo.kind !== :global + if (binfo.kind === :local && !binfo.is_ssa) || binfo.kind === :argument || + binfo.kind === :static_parameter if !haskey(lambda_bindings.bindings, id) init_lambda_binding(lambda_bindings, id, is_read=true, is_assigned=true) end @@ -554,7 +563,7 @@ function _resolve_scopes(ctx, ex::SyntaxTree) func_name = last(ctx.method_def_stack) if kind(func_name) == K"BindingId" func_name_id = func_name.var_id - if lookup_binding(ctx, func_name_id).kind == :local + if lookup_binding(ctx, func_name_id).kind === :local cbinds = get!(ctx.closure_bindings, func_name_id) do name_stack = Vector{String}() for fname in ctx.method_def_stack diff --git a/JuliaLowering/test/closures_ir.jl b/JuliaLowering/test/closures_ir.jl index d31293bcc78c6..f857ea91c6ac4 100644 --- a/JuliaLowering/test/closures_ir.jl +++ b/JuliaLowering/test/closures_ir.jl @@ -166,3 +166,61 @@ end 38 (return %₃₇) 14 (return %₁₂) +######################################## +# Anonymous function syntax with -> +x -> x*x +#--------------------- +1 --- thunk + 1 (global TestMod.#->##0) + 2 (call core.svec) + 3 (call core.svec) + 4 (call core.svec) + 5 (call core._structtype TestMod :#->##0 %₂ %₃ %₄ false 0) + 6 (call core._setsuper! %₅ core.Function) + 7 (const TestMod.#->##0) + 8 (= TestMod.#->##0 %₅) + 9 (call core.svec) + 10 (call core._typebody! %₅ %₉) + 11 (return core.nothing) +2 TestMod.#->##0 +3 (new %₂) +4 TestMod.#->##0 +5 (call core.svec %₄ core.Any) +6 (call core.svec) +7 (call core.svec %₅ %₆ :($(QuoteNode(:(#= line 1 =#))))) +8 --- method core.nothing %₇ + 1 TestMod.* + 2 (call %₁ slot₂/x slot₂/x) + 3 (return %₂) +9 (return %₃) + +######################################## +# Anonymous function syntax with `function` +function (x) + x*x +end +#--------------------- +1 --- thunk + 1 (global TestMod.##anon###0) + 2 (call core.svec) + 3 (call core.svec) + 4 (call core.svec) + 5 (call core._structtype TestMod :##anon###0 %₂ %₃ %₄ false 0) + 6 (call core._setsuper! %₅ core.Function) + 7 (const TestMod.##anon###0) + 8 (= TestMod.##anon###0 %₅) + 9 (call core.svec) + 10 (call core._typebody! %₅ %₉) + 11 (return core.nothing) +2 TestMod.##anon###0 +3 (new %₂) +4 TestMod.##anon###0 +5 (call core.svec %₄ core.Any) +6 (call core.svec) +7 (call core.svec %₅ %₆ :($(QuoteNode(:(#= line 1 =#))))) +8 --- method core.nothing %₇ + 1 TestMod.* + 2 (call %₁ slot₂/x slot₂/x) + 3 (return %₂) +9 (return %₃) + diff --git a/JuliaLowering/test/functions_ir.jl b/JuliaLowering/test/functions_ir.jl index 0403de848e119..af1a4611381cf 100644 --- a/JuliaLowering/test/functions_ir.jl +++ b/JuliaLowering/test/functions_ir.jl @@ -222,7 +222,7 @@ end 8 (return %₂) ######################################## -# Error: Invalid function name +# Error: Invalid function name ccall function ccall() end #--------------------- @@ -232,7 +232,7 @@ function ccall() end ######################################## -# Error: Invalid function name +# Error: Invalid function name ccall function A.ccall() end #--------------------- @@ -241,6 +241,26 @@ function A.ccall() # └─────┘ ── Invalid function name end +######################################## +# Error: Invalid dotop function name +function (.+)(x,y) +end +#--------------------- +LoweringError: +function (.+)(x,y) +# └┘ ── Invalid function name +end + +######################################## +# Error: Invalid function name +function f[](x,y) +end +#--------------------- +LoweringError: +function f[](x,y) +# └─┘ ── Invalid function name +end + ######################################## # Keyword calls f(x; a=1, b=2) @@ -697,7 +717,22 @@ end 4 (call core.svec) 5 (call core.svec %₃ %₄ :($(QuoteNode(:(#= line 4 =#))))) 6 --- method core.nothing %₅ + +######################################## +# Binding docs to callable type +""" +some docs +""" +function (x::T)() +end +#--------------------- +1 TestMod.T +2 (call core.svec %₁) +3 (call core.svec) +4 (call core.svec %₂ %₃ :($(QuoteNode(:(#= line 4 =#))))) +5 --- method core.nothing %₄ 1 (return core.nothing) -7 (call JuliaLowering.bind_docs! %₁ "some docs\n" %₅) -8 (return %₁) +6 TestMod.T +7 (call JuliaLowering.bind_docs! %₆ "some docs\n" %₄) +8 (return %₇) diff --git a/JuliaLowering/test/utils.jl b/JuliaLowering/test/utils.jl index 03ada78397202..a6c3a831ac699 100644 --- a/JuliaLowering/test/utils.jl +++ b/JuliaLowering/test/utils.jl @@ -138,7 +138,7 @@ function read_ir_test_cases(filename) cases_str = only(parts) end (preamble_str, - [match_ir_test_case(s) for s in split(cases_str, r"####*") if strip(s) != ""]) + [match_ir_test_case(s) for s in split(cases_str, r"######*") if strip(s) != ""]) end function format_ir_for_test(mod, description, input, expect_error=false) @@ -154,7 +154,7 @@ function format_ir_for_test(mod, description, input, expect_error=false) if expect_error && (exc isa LoweringError) return sprint(io->Base.showerror(io, exc, show_detail=false)) else - rethrow() + throw("Error in test case \"$description\"") end end end From a0c35e0d840859e0569ddc95f3388e599332c2a2 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sun, 22 Dec 2024 19:01:18 +1000 Subject: [PATCH 0914/1109] Filter out some unnecessary newvar nodes and fix test cases * Filter out newvar nodes which are assigned-before-branch * Set return value of `function` to `nothing` when adding a method to a callable type or function in another module (this is compatible with existing lowering. Is it sensible though?) * Fix IR tests so that they pass again. However, some extra newvar nodes are still present so there's extra churn here. --- JuliaLowering/src/closure_conversion.jl | 11 +- JuliaLowering/src/desugaring.jl | 6 + JuliaLowering/src/kinds.jl | 2 +- JuliaLowering/src/linear_ir.jl | 53 +- JuliaLowering/src/scope_analysis.jl | 2 +- JuliaLowering/src/syntax_graph.jl | 4 + JuliaLowering/test/assignments_ir.jl | 56 +- JuliaLowering/test/branching_ir.jl | 78 +-- JuliaLowering/test/closures.jl | 67 +++ JuliaLowering/test/closures_ir.jl | 113 ++-- JuliaLowering/test/decls_ir.jl | 30 +- JuliaLowering/test/demo.jl | 66 ++- JuliaLowering/test/destructuring_ir.jl | 35 +- JuliaLowering/test/exceptions_ir.jl | 54 +- JuliaLowering/test/functions_ir.jl | 682 +++++++++++++----------- JuliaLowering/test/macros_ir.jl | 32 +- JuliaLowering/test/runtests.jl | 1 + JuliaLowering/test/scopes_ir.jl | 58 +- JuliaLowering/test/typedefs.jl | 4 +- JuliaLowering/test/typedefs_ir.jl | 287 +++++----- 20 files changed, 975 insertions(+), 666 deletions(-) create mode 100644 JuliaLowering/test/closures.jl diff --git a/JuliaLowering/src/closure_conversion.jl b/JuliaLowering/src/closure_conversion.jl index e1e283340af20..81e07d489e909 100644 --- a/JuliaLowering/src/closure_conversion.jl +++ b/JuliaLowering/src/closure_conversion.jl @@ -362,10 +362,10 @@ function _convert_closures(ctx::ClosureConversionCtx, ex) init_closure_args... ] ] - func_name + ::K"TOMBSTONE" ] else - func_name + @ast ctx ex (::K"TOMBSTONE") end else # Single-arg K"method" has the side effect of creating a global @@ -394,9 +394,12 @@ function _convert_closures(ctx::ClosureConversionCtx, ex) # * Renumbering SSA vars # * Ensuring that moved locals become slots in the top level thunk push!(ctx.toplevel_stmts, body) - name + @ast ctx ex (::K"TOMBSTONE") else - _convert_closures(ctx, body) + @ast ctx ex [K"block" + body + ::K"TOMBSTONE" + ] end else mapchildren(e->_convert_closures(ctx, e), ctx, ex) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index c70032e148ac9..ae071d32164b7 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -1629,6 +1629,12 @@ function expand_function_def(ctx, ex, docs, rewrite_call=identity, rewrite_body= ] ] ] + if !isnothing(bare_func_name) + # K"function_decl" ensures this name is defined + bare_func_name + else + "nothing"::K"core" + end ] end diff --git a/JuliaLowering/src/kinds.jl b/JuliaLowering/src/kinds.jl index 6f290a70e58bb..817865e12358e 100644 --- a/JuliaLowering/src/kinds.jl +++ b/JuliaLowering/src/kinds.jl @@ -61,7 +61,7 @@ function _register_kinds() "lambda" # [K"function_decl" name] # Declare a zero-method generic function with global `name` or - # creates a closure object and binds it to the local `name`. + # creates a closure object and assigns it to the local `name`. "function_decl" # [K"function_type name] # Evaluates to the type of the function or closure with given `name` diff --git a/JuliaLowering/src/linear_ir.jl b/JuliaLowering/src/linear_ir.jl index 0f9ec39420ce3..df78dfc6b63d7 100644 --- a/JuliaLowering/src/linear_ir.jl +++ b/JuliaLowering/src/linear_ir.jl @@ -804,6 +804,52 @@ function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) end end +function _remove_vars_with_isdefined_check!(vars, ex) + if is_leaf(ex) || is_quoted(ex) + return + elseif kind(ex) == K"isdefined" + delete!(vars, ex[1].var_id) + else + for e in children(ex) + _remove_vars_with_isdefined_check!(vars, e) + end + end +end + +# Find newvar nodes that are unnecessary because +# 1. The variable is not captured and +# 2. The variable is assigned before any branches. +# +# This is used to remove newvar nodes that are not needed for re-initializing +# variables to undefined (see Julia issue #11065). It doesn't look for variable +# *uses*, because any variables used-before-def that also pass this test are +# *always* used undefined, and therefore don't need to be reinitialized. The +# one exception to that is `@isdefined`, which can observe an undefined +# variable without throwing an error. +function unnecessary_newvar_ids(ctx, stmts) + vars = Set{IdTag}() + ids_assigned_before_branch = Set{IdTag}() + for ex in stmts + _remove_vars_with_isdefined_check!(vars, ex) + k = kind(ex) + if k == K"newvar" + id = ex[1].var_id + if !lookup_binding(ctx, id).is_captured + push!(vars, id) + end + elseif k == K"goto" || k == K"gotoifnot" || (k == K"=" && kind(ex[2]) == K"enter") + empty!(vars) + elseif k == K"=" + id = ex[1].var_id + if id in vars + delete!(vars, id) + push!(ids_assigned_before_branch, id) + end + end + end + ids_assigned_before_branch +end + # flisp: compile-body function compile_body(ctx, ex) compile(ctx, ex, true, true) @@ -834,7 +880,12 @@ function compile_body(ctx, ex) @assert kind(ctx.code[i]) == K"TOMBSTONE" ctx.code[i] = @ast ctx origin.goto [K"goto" target.label] end - # TODO: Filter out any newvar nodes where the arg is definitely initialized + + # Filter out unnecessary newvar nodes + ids_assigned_before_branch = unnecessary_newvar_ids(ctx, ctx.code) + filter!(ctx.code) do ex + !(kind(ex) == K"newvar" && ex[1].var_id in ids_assigned_before_branch) + end end #------------------------------------------------------------------------------- diff --git a/JuliaLowering/src/scope_analysis.jl b/JuliaLowering/src/scope_analysis.jl index 7c4a4935a05dd..b6bda4c3f391d 100644 --- a/JuliaLowering/src/scope_analysis.jl +++ b/JuliaLowering/src/scope_analysis.jl @@ -473,7 +473,7 @@ end function add_local_decls!(ctx, stmts, srcref, scope) # Add local decls to start of block so that closure conversion can # initialize if necessary. - for id in values(scope.var_ids) + for id in sort!(collect(values(scope.var_ids))) binfo = lookup_binding(ctx, id) if binfo.kind == :local push!(stmts, @ast ctx srcref [K"local" id::K"BindingId"]) diff --git a/JuliaLowering/src/syntax_graph.jl b/JuliaLowering/src/syntax_graph.jl index 26b4650934e09..2c651565f03c1 100644 --- a/JuliaLowering/src/syntax_graph.jl +++ b/JuliaLowering/src/syntax_graph.jl @@ -705,6 +705,10 @@ function Base.empty!(v::SyntaxList) v end +function Base.deleteat!(v::SyntaxList, inds) + deleteat!(v.ids, inds) +end + function Base.copy(v::SyntaxList) SyntaxList(v.graph, copy(v.ids)) end diff --git a/JuliaLowering/test/assignments_ir.jl b/JuliaLowering/test/assignments_ir.jl index efd7c360d365a..e20ea56353f16 100644 --- a/JuliaLowering/test/assignments_ir.jl +++ b/JuliaLowering/test/assignments_ir.jl @@ -25,21 +25,24 @@ end ######################################## # short form function def, not chain of assignments -let +begin + local a a = b() = c = d end #--------------------- -1 (method :b) -2 (call core.Typeof %₁) -3 (call core.svec %₂) -4 (call core.svec) -5 (call core.svec %₃ %₄ :($(QuoteNode(:(#= line 2 =#))))) -6 --- method core.nothing %₅ +1 (method TestMod.b) +2 TestMod.b +3 (call core.Typeof %₂) +4 (call core.svec %₃) +5 (call core.svec) +6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 3 =#))))) +7 --- method core.nothing %₆ 1 TestMod.d 2 (= slot₂/c %₁) 3 (return %₁) -7 (= slot₁/a %₁) -8 (return %₁) +8 TestMod.b +9 (= slot₁/a %₈) +10 (return %₈) ######################################## # a.b = ... => setproperty! assignment @@ -73,23 +76,24 @@ let x end #--------------------- -1 TestMod.f -2 (call %₁) -3 (= slot₂/tmp %₂) -4 slot₂/tmp -5 TestMod.T -6 (call core.isa %₄ %₅) -7 (gotoifnot %₆ label₉) -8 (goto label₁₄) -9 TestMod.T -10 slot₂/tmp -11 (call top.convert %₉ %₁₀) -12 TestMod.T -13 (= slot₂/tmp (call core.typeassert %₁₁ %₁₂)) -14 slot₂/tmp -15 (= slot₁/x %₁₄) -16 slot₁/x -17 (return %₁₆) +1 (newvar slot₁) +2 TestMod.f +3 (call %₂) +4 (= slot₂/tmp %₃) +5 slot₂/tmp +6 TestMod.T +7 (call core.isa %₅ %₆) +8 (gotoifnot %₇ label₁₀) +9 (goto label₁₅) +10 TestMod.T +11 slot₂/tmp +12 (call top.convert %₁₀ %₁₁) +13 TestMod.T +14 (= slot₂/tmp (call core.typeassert %₁₂ %₁₃)) +15 slot₂/tmp +16 (= slot₁/x %₁₅) +17 slot₁/x +18 (return %₁₇) ######################################## # "complex lhs" of `::T` => type-assert, not decl diff --git a/JuliaLowering/test/branching_ir.jl b/JuliaLowering/test/branching_ir.jl index 6fb47f5c7d996..48929f3856e77 100644 --- a/JuliaLowering/test/branching_ir.jl +++ b/JuliaLowering/test/branching_ir.jl @@ -7,11 +7,13 @@ begin end end #--------------------- -1 slot₁/a -2 (gotoifnot %₁ label₅) -3 slot₂/b -4 (return %₃) -5 (return core.nothing) +1 (newvar slot₁) +2 (newvar slot₂) +3 slot₁/a +4 (gotoifnot %₃ label₇) +5 slot₂/b +6 (return %₅) +7 (return core.nothing) ######################################## # Branching, !tail && !value @@ -23,11 +25,14 @@ begin c end #--------------------- -1 slot₁/a -2 (gotoifnot %₁ label₄) -3 slot₂/b -4 slot₃/c -5 (return %₄) +1 (newvar slot₁) +2 (newvar slot₂) +3 (newvar slot₃) +4 slot₁/a +5 (gotoifnot %₄ label₇) +6 slot₂/b +7 slot₃/c +8 (return %₇) ######################################## # Branching with else @@ -40,12 +45,15 @@ begin end end #--------------------- -1 slot₁/a -2 (gotoifnot %₁ label₅) -3 slot₂/b -4 (return %₃) -5 slot₃/c -6 (return %₅) +1 (newvar slot₁) +2 (newvar slot₂) +3 (newvar slot₃) +4 slot₁/a +5 (gotoifnot %₄ label₈) +6 slot₂/b +7 (return %₆) +8 slot₃/c +9 (return %₈) ######################################## # Branching with else, !tail && !value @@ -59,13 +67,17 @@ begin d end #--------------------- -1 slot₁/a -2 (gotoifnot %₁ label₅) -3 slot₂/b -4 (goto label₆) -5 slot₃/c -6 slot₄/d -7 (return %₆) +1 (newvar slot₁) +2 (newvar slot₂) +3 (newvar slot₃) +4 (newvar slot₄) +5 slot₁/a +6 (gotoifnot %₅ label₉) +7 slot₂/b +8 (goto label₁₀) +9 slot₃/c +10 slot₄/d +11 (return %₁₀) ######################################## # Blocks compile directly to branches @@ -76,14 +88,18 @@ begin end end #--------------------- -1 slot₁/a -2 slot₂/b -3 (gotoifnot %₂ label₈) -4 slot₃/c -5 (gotoifnot %₄ label₈) -6 slot₄/d -7 (return %₆) -8 (return core.nothing) +1 (newvar slot₁) +2 (newvar slot₂) +3 (newvar slot₃) +4 (newvar slot₄) +5 slot₁/a +6 slot₂/b +7 (gotoifnot %₆ label₁₂) +8 slot₃/c +9 (gotoifnot %₈ label₁₂) +10 slot₄/d +11 (return %₁₀) +12 (return core.nothing) ######################################## # symbolic goto forward jump diff --git a/JuliaLowering/test/closures.jl b/JuliaLowering/test/closures.jl new file mode 100644 index 0000000000000..81253a185dc06 --- /dev/null +++ b/JuliaLowering/test/closures.jl @@ -0,0 +1,67 @@ + +@testset "Functions" begin + +test_mod = Module() + +# Capture assigned before closure +@test JuliaLowering.include_string(test_mod, """ +let + x = 1 + f(y) = x+y + f(2), f(3) +end +""") == (3,4) + +# Capture assigned after closure +@test JuliaLowering.include_string(test_mod, """ +let + f(y) = x+y + x = 1 + f(2) +end +""") == 3 + +# Capture assigned inside closure +@test JuliaLowering.include_string(test_mod, """ +let + x = 1 + function f(y) + x = y + end + f(100) + x +end +""") == 100 + +# Anon function syntax +@test JuliaLowering.include_string(test_mod, """ +begin + call_it(f, arg) = f(arg) + local y = 2 + call_it(x->x+y, 3) +end +""") == 5 + +# Anon function syntax with `where` +@test JuliaLowering.include_string(test_mod, """ +begin + call_it(f, arg) = f(arg) + local y = 2 + call_it((x::T where {T<:Integer})->x+y, 3) +end +""") == 5 + + +# Attempt to reference capture which is not assigned +@test_throws UndefVarError(:x, :local) JuliaLowering.include_string(test_mod, """ +let + function f() + x + end + f() + x = 1 +end +""") + + +end diff --git a/JuliaLowering/test/closures_ir.jl b/JuliaLowering/test/closures_ir.jl index f857ea91c6ac4..16d61a179a3b6 100644 --- a/JuliaLowering/test/closures_ir.jl +++ b/JuliaLowering/test/closures_ir.jl @@ -8,8 +8,7 @@ let end #--------------------- 1 (= slot₂ (call core.Box)) -2 (newvar slot₁) -3 --- thunk +2 --- thunk 1 (global TestMod.#f##0) 2 (call core.svec) 3 (call core.svec :x) @@ -21,11 +20,11 @@ end 9 (call core.svec core.Box) 10 (call core._typebody! %₅ %₉) 11 (return core.nothing) -4 TestMod.#f##0 -5 (call core.svec %₄ core.Any) -6 (call core.svec) -7 (call core.svec %₅ %₆ :($(QuoteNode(:(#= line 3 =#))))) -8 --- method core.nothing %₇ +3 TestMod.#f##0 +4 (call core.svec %₃ core.Any) +5 (call core.svec) +6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 3 =#))))) +7 --- method core.nothing %₆ 1 TestMod.+ 2 (call core.getfield slot₁/x :x) 3 (call core.isdefined %₂ :contents) @@ -36,15 +35,14 @@ end 8 (call core.getfield %₂ :contents) 9 (call %₁ %₈ slot₂/y) 10 (return %₉) -9 1 -10 slot₂/x -11 (call core.setfield! %₁₀ :contents %₉) -12 TestMod.#f##0 -13 slot₂/f -14 (= slot₁/f (new %₁₂ %₁₃)) -15 slot₁/f -16 slot₁/f -17 (return %₁₆) +8 1 +9 slot₂/x +10 (call core.setfield! %₉ :contents %₈) +11 TestMod.#f##0 +12 slot₂/f +13 (= slot₁/f (new %₁₁ %₁₂)) +14 slot₁/f +15 (return %₁₄) ######################################## # Closure which sets the value of a captured variable @@ -56,8 +54,7 @@ let end #--------------------- 1 (= slot₂ (call core.Box)) -2 (newvar slot₁) -3 --- thunk +2 --- thunk 1 (global TestMod.#f##1) 2 (call core.svec) 3 (call core.svec :x) @@ -69,24 +66,23 @@ end 9 (call core.svec core.Box) 10 (call core._typebody! %₅ %₉) 11 (return core.nothing) -4 TestMod.#f##1 -5 (call core.svec %₄ core.Any) -6 (call core.svec) -7 (call core.svec %₅ %₆ :($(QuoteNode(:(#= line 3 =#))))) -8 --- method core.nothing %₇ +3 TestMod.#f##1 +4 (call core.svec %₃ core.Any) +5 (call core.svec) +6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 3 =#))))) +7 --- method core.nothing %₆ 1 2 2 (call core.getfield slot₁/x :x) 3 (call core.setfield! %₂ :contents %₁) 4 (return %₁) -9 1 -10 slot₂/x -11 (call core.setfield! %₁₀ :contents %₉) -12 TestMod.#f##1 -13 slot₂/f -14 (= slot₁/f (new %₁₂ %₁₃)) -15 slot₁/f -16 slot₁/f -17 (return %₁₆) +8 1 +9 slot₂/x +10 (call core.setfield! %₉ :contents %₈) +11 TestMod.#f##1 +12 slot₂/f +13 (= slot₁/f (new %₁₁ %₁₂)) +14 slot₁/f +15 (return %₁₄) ######################################## # Function where arguments are captured into a closure @@ -127,44 +123,21 @@ end 12 (call core.svec %₁₀ %₁₁ :($(QuoteNode(:(#= line 1 =#))))) 13 --- method core.nothing %₁₂ 1 (= slot₂/x (call core.Box slot₂/x)) - 2 slot₂/x - 3 (call core.isdefined %₂ :contents) - 4 (gotoifnot %₃ label₆) - 5 (goto label₈) - 6 (newvar slot₅/x) - 7 slot₅/x - 8 (call core.getfield %₂ :contents) - 9 (call core.Box %₈) - 10 (call core.setfield! slot₂/x :contents %₉) - 11 (newvar slot₃) - 12 TestMod.#f#g##0 - 13 slot₂/g - 14 (call core.isdefined %₁₃ :contents) - 15 (gotoifnot %₁₄ label₁₇) - 16 (goto label₁₉) - 17 (newvar slot₆/x) - 18 slot₆/x - 19 (call core.getfield %₁₃ :contents) - 20 (= slot₃/g (new %₁₂ %₁₉)) - 21 slot₃/g - 22 slot₃/g - 23 slot₃/g - 24 (call %₂₃) - 25 slot₂/x - 26 (call core.isdefined %₂₅ :contents) - 27 (gotoifnot %₂₆ label₂₉) - 28 (goto label₃₁) - 29 (newvar slot₇/x) - 30 slot₇/x - 31 (call core.getfield %₂₅ :contents) - 32 (call core.isdefined %₃₁ :contents) - 33 (gotoifnot %₃₂ label₃₅) - 34 (goto label₃₇) - 35 (newvar slot₄/x) - 36 slot₄/x - 37 (call core.getfield %₃₁ :contents) - 38 (return %₃₇) -14 (return %₁₂) + 2 TestMod.#f#g##0 + 3 (= slot₃/g (new %₂ slot₂/g)) + 4 slot₃/g + 5 slot₃/g + 6 (call %₅) + 7 slot₂/x + 8 (call core.isdefined %₇ :contents) + 9 (gotoifnot %₈ label₁₁) + 10 (goto label₁₃) + 11 (newvar slot₄/x) + 12 slot₄/x + 13 (call core.getfield %₇ :contents) + 14 (return %₁₃) +14 TestMod.f +15 (return %₁₄) ######################################## # Anonymous function syntax with -> diff --git a/JuliaLowering/test/decls_ir.jl b/JuliaLowering/test/decls_ir.jl index 9648b3a764572..7082d300fd713 100644 --- a/JuliaLowering/test/decls_ir.jl +++ b/JuliaLowering/test/decls_ir.jl @@ -2,20 +2,22 @@ # Local declaration with type local x::T = 1 #--------------------- -1 (= slot₂/tmp 1) -2 slot₂/tmp -3 TestMod.T -4 (call core.isa %₂ %₃) -5 (gotoifnot %₄ label₇) -6 (goto label₁₂) -7 TestMod.T -8 slot₂/tmp -9 (call top.convert %₇ %₈) -10 TestMod.T -11 (= slot₂/tmp (call core.typeassert %₉ %₁₀)) -12 slot₂/tmp -13 (= slot₁/x %₁₂) -14 (return 1) +1 (newvar slot₁) +2 1 +3 (= slot₂/tmp %₂) +4 slot₂/tmp +5 TestMod.T +6 (call core.isa %₄ %₅) +7 (gotoifnot %₆ label₉) +8 (goto label₁₄) +9 TestMod.T +10 slot₂/tmp +11 (call top.convert %₉ %₁₀) +12 TestMod.T +13 (= slot₂/tmp (call core.typeassert %₁₁ %₁₂)) +14 slot₂/tmp +15 (= slot₁/x %₁₄) +16 (return %₂) ######################################## # const diff --git a/JuliaLowering/test/demo.jl b/JuliaLowering/test/demo.jl index c247d49d1e04e..cc70cb9c8663e 100644 --- a/JuliaLowering/test/demo.jl +++ b/JuliaLowering/test/demo.jl @@ -13,7 +13,10 @@ function var_kind(ctx, ex) if isnothing(id) return nothing end - return lookup_binding(ctx, id).kind + binfo = lookup_binding(ctx, id) + return binfo.kind == :local ? + (binfo.is_captured ? :local_captured : :local) : + binfo.kind end # Extract module of globals for highlighting @@ -163,8 +166,21 @@ eval(JuliaLowering.@SyntaxTree :(baremodule M end end + macro mmm(ex) + :(let + local x + function f() + (x, $ex) + end + f() + end) + end + end)) +# module M +# end +# #------------------------------------------------------------------------------- # Demos of the prototype @@ -701,6 +717,52 @@ function f(x=1, ys...=(1,2)...) end """ +src = """ +let + x = 10 + function f(y) + x + y + end +end +""" + +src = """ +begin + local f, set_x + local x = 10 + local y = 100 + function f() + z = 1 + y - x + z + end + function set_x() + x = 1 + end + println("f = ", f()) + set_x() + y = 10 + println("f = ", f()) +end +""" + +src = """ +x->y +""" + +src = """ +struct X + x + f() = new(1) + X() = f() + X(x) = new(x) + X(y,z)::ReallyXIPromise = new(y+z) + " + Docs for X constructor + " + X(a,b,c) = new(a) +end +""" + # TODO: fix this - it's interpreted in a bizarre way as a kw call. # src = """ # function f(x=y=1) @@ -723,7 +785,7 @@ ctx2, ex_desugar = JuliaLowering.expand_forms_2(ctx1, ex_macroexpand) @info "Desugared" ex_desugar formatsrc(ex_desugar, color_by=:scope_layer) ctx3, ex_scoped = JuliaLowering.resolve_scopes(ctx2, ex_desugar) -@info "Resolved scopes" ex_scoped formatsrc(ex_scoped, color_by=:var_id) +@info "Resolved scopes" ex_scoped formatsrc(ex_scoped, color_by=e->var_kind(ctx2,e)) ctx4, ex_converted = JuliaLowering.convert_closures(ctx3, ex_scoped) @info "Closure converted" ex_converted formatsrc(ex_converted, color_by=:var_id) diff --git a/JuliaLowering/test/destructuring_ir.jl b/JuliaLowering/test/destructuring_ir.jl index d7d110f619701..b73d737e9c68c 100644 --- a/JuliaLowering/test/destructuring_ir.jl +++ b/JuliaLowering/test/destructuring_ir.jl @@ -245,23 +245,24 @@ let (; x::T) = rhs end #--------------------- -1 TestMod.rhs -2 (call top.getproperty %₁ :x) -3 (= slot₂/tmp %₂) -4 slot₂/tmp -5 TestMod.T -6 (call core.isa %₄ %₅) -7 (gotoifnot %₆ label₉) -8 (goto label₁₄) -9 TestMod.T -10 slot₂/tmp -11 (call top.convert %₉ %₁₀) -12 TestMod.T -13 (= slot₂/tmp (call core.typeassert %₁₁ %₁₂)) -14 slot₂/tmp -15 (= slot₁/x %₁₄) -16 TestMod.rhs -17 (return %₁₆) +1 (newvar slot₁) +2 TestMod.rhs +3 (call top.getproperty %₂ :x) +4 (= slot₂/tmp %₃) +5 slot₂/tmp +6 TestMod.T +7 (call core.isa %₅ %₆) +8 (gotoifnot %₇ label₁₀) +9 (goto label₁₅) +10 TestMod.T +11 slot₂/tmp +12 (call top.convert %₁₀ %₁₁) +13 TestMod.T +14 (= slot₂/tmp (call core.typeassert %₁₂ %₁₃)) +15 slot₂/tmp +16 (= slot₁/x %₁₅) +17 TestMod.rhs +18 (return %₁₇) ######################################## # Error: Property destructuring with frankentuple diff --git a/JuliaLowering/test/exceptions_ir.jl b/JuliaLowering/test/exceptions_ir.jl index 49f4a116dc482..faf09371e465a 100644 --- a/JuliaLowering/test/exceptions_ir.jl +++ b/JuliaLowering/test/exceptions_ir.jl @@ -115,18 +115,19 @@ let end end #--------------------- -1 (enter label₇) -2 TestMod.a -3 (leave %₁) -4 TestMod.c -5 (= slot₂/try_result %₄) -6 (goto label₁₀) -7 TestMod.b -8 (= slot₂/try_result %₇) -9 (pop_exception %₁) -10 slot₂/try_result -11 (= slot₁/z %₁₀) -12 (return %₁₀) +1 (newvar slot₁) +2 (enter label₈) +3 TestMod.a +4 (leave %₂) +5 TestMod.c +6 (= slot₂/try_result %₅) +7 (goto label₁₁) +8 TestMod.b +9 (= slot₂/try_result %₈) +10 (pop_exception %₂) +11 slot₂/try_result +12 (= slot₁/z %₁₁) +13 (return %₁₁) ######################################## # try/catch/else, not value/tail @@ -183,20 +184,21 @@ let end end #--------------------- -1 (enter label₇) -2 (= slot₃/finally_tag -1) -3 TestMod.a -4 (= slot₂/try_result %₃) -5 (leave %₁) -6 (goto label₈) -7 (= slot₃/finally_tag 1) -8 TestMod.b -9 (call core.=== slot₃/finally_tag 1) -10 (gotoifnot %₉ label₁₂) -11 (call top.rethrow) -12 slot₂/try_result -13 (= slot₁/z %₁₂) -14 (return %₁₂) +1 (newvar slot₁) +2 (enter label₈) +3 (= slot₃/finally_tag -1) +4 TestMod.a +5 (= slot₂/try_result %₄) +6 (leave %₂) +7 (goto label₉) +8 (= slot₃/finally_tag 1) +9 TestMod.b +10 (call core.=== slot₃/finally_tag 1) +11 (gotoifnot %₁₀ label₁₃) +12 (call top.rethrow) +13 slot₂/try_result +14 (= slot₁/z %₁₃) +15 (return %₁₃) ######################################## # basic try/finally, not value/tail diff --git a/JuliaLowering/test/functions_ir.jl b/JuliaLowering/test/functions_ir.jl index af1a4611381cf..79cf6d48d63c9 100644 --- a/JuliaLowering/test/functions_ir.jl +++ b/JuliaLowering/test/functions_ir.jl @@ -4,16 +4,18 @@ function f(x, _, y) x + y end #--------------------- -1 (method :f) -2 (call core.Typeof %₁) -3 (call core.svec %₂ core.Any core.Any core.Any) -4 (call core.svec) -5 (call core.svec %₃ %₄ :($(QuoteNode(:(#= line 1 =#))))) -6 --- method core.nothing %₅ +1 (method TestMod.f) +2 TestMod.f +3 (call core.Typeof %₂) +4 (call core.svec %₃ core.Any core.Any core.Any) +5 (call core.svec) +6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 1 =#))))) +7 --- method core.nothing %₆ 1 TestMod.+ 2 (call %₁ slot₂/x slot₄/y) 3 (return %₂) -7 (return %₁) +8 TestMod.f +9 (return %₈) ######################################## # Functions with argument types only, no name @@ -21,16 +23,18 @@ function f(::T, x) x end #--------------------- -1 (method :f) -2 (call core.Typeof %₁) -3 TestMod.T -4 (call core.svec %₂ %₃ core.Any) -5 (call core.svec) -6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 1 =#))))) -7 --- method core.nothing %₆ +1 (method TestMod.f) +2 TestMod.f +3 (call core.Typeof %₂) +4 TestMod.T +5 (call core.svec %₃ %₄ core.Any) +6 (call core.svec) +7 (call core.svec %₅ %₆ :($(QuoteNode(:(#= line 1 =#))))) +8 --- method core.nothing %₇ 1 slot₃/x 2 (return %₁) -8 (return %₁) +9 TestMod.f +10 (return %₉) ######################################## # Functions argument types @@ -38,16 +42,18 @@ function f(x, y::T) body end #--------------------- -1 (method :f) -2 (call core.Typeof %₁) -3 TestMod.T -4 (call core.svec %₂ core.Any %₃) -5 (call core.svec) -6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 1 =#))))) -7 --- method core.nothing %₆ +1 (method TestMod.f) +2 TestMod.f +3 (call core.Typeof %₂) +4 TestMod.T +5 (call core.svec %₃ core.Any %₄) +6 (call core.svec) +7 (call core.svec %₅ %₆ :($(QuoteNode(:(#= line 1 =#))))) +8 --- method core.nothing %₇ 1 TestMod.body 2 (return %₁) -8 (return %₁) +9 TestMod.f +10 (return %₉) ######################################## # Functions with slurp of Any @@ -55,16 +61,18 @@ function f(x, ys...) body end #--------------------- -1 (method :f) -2 (call core.Typeof %₁) -3 (call core.apply_type core.Vararg core.Any) -4 (call core.svec %₂ core.Any %₃) -5 (call core.svec) -6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 1 =#))))) -7 --- method core.nothing %₆ +1 (method TestMod.f) +2 TestMod.f +3 (call core.Typeof %₂) +4 (call core.apply_type core.Vararg core.Any) +5 (call core.svec %₃ core.Any %₄) +6 (call core.svec) +7 (call core.svec %₅ %₆ :($(QuoteNode(:(#= line 1 =#))))) +8 --- method core.nothing %₇ 1 TestMod.body 2 (return %₁) -8 (return %₁) +9 TestMod.f +10 (return %₉) ######################################## # Functions with slurp of T @@ -72,17 +80,19 @@ function f(x, ys::T...) body end #--------------------- -1 (method :f) -2 (call core.Typeof %₁) -3 TestMod.T -4 (call core.apply_type core.Vararg %₃) -5 (call core.svec %₂ core.Any %₄) -6 (call core.svec) -7 (call core.svec %₅ %₆ :($(QuoteNode(:(#= line 1 =#))))) -8 --- method core.nothing %₇ +1 (method TestMod.f) +2 TestMod.f +3 (call core.Typeof %₂) +4 TestMod.T +5 (call core.apply_type core.Vararg %₄) +6 (call core.svec %₃ core.Any %₅) +7 (call core.svec) +8 (call core.svec %₆ %₇ :($(QuoteNode(:(#= line 1 =#))))) +9 --- method core.nothing %₈ 1 TestMod.body 2 (return %₁) -9 (return %₁) +10 TestMod.f +11 (return %₁₀) ######################################## # Error: Function with slurp not in last position arg @@ -102,27 +112,29 @@ function f(::T, ::U, ::V) where T where {U,V} (T,U,V) end #--------------------- -1 (= slot₂/U (call core.TypeVar :U)) -2 (= slot₃/V (call core.TypeVar :V)) -3 (= slot₁/T (call core.TypeVar :T)) -4 (method :f) -5 (call core.Typeof %₄) -6 slot₁/T -7 slot₂/U -8 slot₃/V -9 (call core.svec %₅ %₆ %₇ %₈) -10 slot₂/U -11 slot₃/V -12 slot₁/T -13 (call core.svec %₁₀ %₁₁ %₁₂) -14 (call core.svec %₉ %₁₃ :($(QuoteNode(:(#= line 1 =#))))) -15 --- method core.nothing %₁₄ +1 (method TestMod.f) +2 (= slot₂/U (call core.TypeVar :U)) +3 (= slot₃/V (call core.TypeVar :V)) +4 (= slot₁/T (call core.TypeVar :T)) +5 TestMod.f +6 (call core.Typeof %₅) +7 slot₁/T +8 slot₂/U +9 slot₃/V +10 (call core.svec %₆ %₇ %₈ %₉) +11 slot₂/U +12 slot₃/V +13 slot₁/T +14 (call core.svec %₁₁ %₁₂ %₁₃) +15 (call core.svec %₁₀ %₁₄ :($(QuoteNode(:(#= line 1 =#))))) +16 --- method core.nothing %₁₅ 1 static_parameter₃ 2 static_parameter₁ 3 static_parameter₂ 4 (call core.tuple %₁ %₂ %₃) 5 (return %₄) -16 (return %₄) +17 TestMod.f +18 (return %₁₇) ######################################## # Static parameter with bounds and used with apply_type in argument @@ -130,22 +142,24 @@ function f(::S{T}) where X <: T <: Y T end #--------------------- -1 TestMod.X -2 TestMod.Y -3 (= slot₁/T (call core.TypeVar :T %₁ %₂)) -4 (method :f) -5 (call core.Typeof %₄) -6 TestMod.S -7 slot₁/T -8 (call core.apply_type %₆ %₇) -9 (call core.svec %₅ %₈) -10 slot₁/T -11 (call core.svec %₁₀) -12 (call core.svec %₉ %₁₁ :($(QuoteNode(:(#= line 1 =#))))) -13 --- method core.nothing %₁₂ +1 (method TestMod.f) +2 TestMod.X +3 TestMod.Y +4 (= slot₁/T (call core.TypeVar :T %₂ %₃)) +5 TestMod.f +6 (call core.Typeof %₅) +7 TestMod.S +8 slot₁/T +9 (call core.apply_type %₇ %₈) +10 (call core.svec %₆ %₉) +11 slot₁/T +12 (call core.svec %₁₁) +13 (call core.svec %₁₀ %₁₂ :($(QuoteNode(:(#= line 1 =#))))) +14 --- method core.nothing %₁₃ 1 static_parameter₁ 2 (return %₁) -14 (return %₄) +15 TestMod.f +16 (return %₁₅) ######################################## # Return types @@ -156,12 +170,13 @@ function f(x)::Int 0xff end #--------------------- -1 (method :f) -2 (call core.Typeof %₁) -3 (call core.svec %₂ core.Any) -4 (call core.svec) -5 (call core.svec %₃ %₄ :($(QuoteNode(:(#= line 1 =#))))) -6 --- method core.nothing %₅ +1 (method TestMod.f) +2 TestMod.f +3 (call core.Typeof %₂) +4 (call core.svec %₃ core.Any) +5 (call core.svec) +6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 1 =#))))) +7 --- method core.nothing %₆ 1 TestMod.Int 2 (gotoifnot slot₂/x label₃) 3 (= slot₃/tmp 0xff) @@ -174,7 +189,8 @@ end 10 (= slot₃/tmp (call core.typeassert %₉ %₁)) 11 slot₃/tmp 12 (return %₁₁) -7 (return %₁) +8 TestMod.f +9 (return %₈) ######################################## # Callable type @@ -189,7 +205,7 @@ end 5 --- method core.nothing %₄ 1 slot₂/x 2 (return %₁) -6 (return %₁) +6 (return core.nothing) ######################################## # Callable type with instance @@ -204,7 +220,26 @@ end 5 --- method core.nothing %₄ 1 (call core.tuple slot₁/y slot₂/x) 2 (return %₁) -6 (return %₁) +6 (return core.nothing) + +######################################## +# `where` params used in callable object type +function (x::X1{T})() where T + T +end +#--------------------- +1 (= slot₁/T (call core.TypeVar :T)) +2 TestMod.X1 +3 slot₁/T +4 (call core.apply_type %₂ %₃) +5 (call core.svec %₄) +6 slot₁/T +7 (call core.svec %₆) +8 (call core.svec %₅ %₇ :($(QuoteNode(:(#= line 1 =#))))) +9 --- method core.nothing %₈ + 1 static_parameter₁ + 2 (return %₁) +10 (return core.nothing) ######################################## # Function with module ref in name @@ -219,7 +254,7 @@ end 6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 1 =#))))) 7 --- method core.nothing %₆ 1 (return core.nothing) -8 (return %₂) +8 (return core.nothing) ######################################## # Error: Invalid function name ccall @@ -325,35 +360,39 @@ function f(x::T, y::S=1, z::U=2) (x,y) end #--------------------- -1 (method :f) -2 (call core.Typeof %₁) -3 TestMod.T -4 (call core.svec %₂ %₃) -5 (call core.svec) -6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 1 =#))))) -7 --- method core.nothing %₆ +1 (method TestMod.f) +2 TestMod.f +3 (call core.Typeof %₂) +4 TestMod.T +5 (call core.svec %₃ %₄) +6 (call core.svec) +7 (call core.svec %₅ %₆ :($(QuoteNode(:(#= line 1 =#))))) +8 --- method core.nothing %₇ 1 (call slot₁/#self# slot₂/x 1 2) 2 (return %₁) -8 (call core.Typeof %₁) -9 TestMod.T -10 TestMod.S -11 (call core.svec %₈ %₉ %₁₀) -12 (call core.svec) -13 (call core.svec %₁₁ %₁₂ :($(QuoteNode(:(#= line 1 =#))))) -14 --- method core.nothing %₁₃ +9 TestMod.f +10 (call core.Typeof %₉) +11 TestMod.T +12 TestMod.S +13 (call core.svec %₁₀ %₁₁ %₁₂) +14 (call core.svec) +15 (call core.svec %₁₃ %₁₄ :($(QuoteNode(:(#= line 1 =#))))) +16 --- method core.nothing %₁₅ 1 (call slot₁/#self# slot₂/x slot₃/y 2) 2 (return %₁) -15 (call core.Typeof %₁) -16 TestMod.T -17 TestMod.S -18 TestMod.U -19 (call core.svec %₁₅ %₁₆ %₁₇ %₁₈) -20 (call core.svec) -21 (call core.svec %₁₉ %₂₀ :($(QuoteNode(:(#= line 1 =#))))) -22 --- method core.nothing %₂₁ +17 TestMod.f +18 (call core.Typeof %₁₇) +19 TestMod.T +20 TestMod.S +21 TestMod.U +22 (call core.svec %₁₈ %₁₉ %₂₀ %₂₁) +23 (call core.svec) +24 (call core.svec %₂₂ %₂₃ :($(QuoteNode(:(#= line 1 =#))))) +25 --- method core.nothing %₂₄ 1 (call core.tuple slot₂/x slot₃/y) 2 (return %₁) -23 (return %₁) +26 TestMod.f +27 (return %₂₆) ######################################## # Default positional args which depend on other args @@ -361,29 +400,33 @@ function f(x=1, y=x) (x,y) end #--------------------- -1 (method :f) -2 (call core.Typeof %₁) -3 (call core.svec %₂) -4 (call core.svec) -5 (call core.svec %₃ %₄ :($(QuoteNode(:(#= line 1 =#))))) -6 --- method core.nothing %₅ +1 (method TestMod.f) +2 TestMod.f +3 (call core.Typeof %₂) +4 (call core.svec %₃) +5 (call core.svec) +6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 1 =#))))) +7 --- method core.nothing %₆ 1 (call slot₁/#self# 1) 2 (return %₁) -7 (call core.Typeof %₁) -8 (call core.svec %₇ core.Any) -9 (call core.svec) -10 (call core.svec %₈ %₉ :($(QuoteNode(:(#= line 1 =#))))) -11 --- method core.nothing %₁₀ +8 TestMod.f +9 (call core.Typeof %₈) +10 (call core.svec %₉ core.Any) +11 (call core.svec) +12 (call core.svec %₁₀ %₁₁ :($(QuoteNode(:(#= line 1 =#))))) +13 --- method core.nothing %₁₂ 1 (call slot₁/#self# slot₂/x slot₂/x) 2 (return %₁) -12 (call core.Typeof %₁) -13 (call core.svec %₁₂ core.Any core.Any) -14 (call core.svec) -15 (call core.svec %₁₃ %₁₄ :($(QuoteNode(:(#= line 1 =#))))) -16 --- method core.nothing %₁₅ +14 TestMod.f +15 (call core.Typeof %₁₄) +16 (call core.svec %₁₅ core.Any core.Any) +17 (call core.svec) +18 (call core.svec %₁₆ %₁₇ :($(QuoteNode(:(#= line 1 =#))))) +19 --- method core.nothing %₁₈ 1 (call core.tuple slot₂/x slot₃/y) 2 (return %₁) -17 (return %₁) +20 TestMod.f +21 (return %₂₀) ######################################## # Default positional args with missing arg names (implicit placeholders) @@ -391,32 +434,36 @@ function f(::Int, y=1, z=2) (y, z) end #--------------------- -1 (method :f) -2 (call core.Typeof %₁) -3 TestMod.Int -4 (call core.svec %₂ %₃) -5 (call core.svec) -6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 1 =#))))) -7 --- method core.nothing %₆ +1 (method TestMod.f) +2 TestMod.f +3 (call core.Typeof %₂) +4 TestMod.Int +5 (call core.svec %₃ %₄) +6 (call core.svec) +7 (call core.svec %₅ %₆ :($(QuoteNode(:(#= line 1 =#))))) +8 --- method core.nothing %₇ 1 (call slot₁/#self# slot₂/_ 1 2) 2 (return %₁) -8 (call core.Typeof %₁) -9 TestMod.Int -10 (call core.svec %₈ %₉ core.Any) -11 (call core.svec) -12 (call core.svec %₁₀ %₁₁ :($(QuoteNode(:(#= line 1 =#))))) -13 --- method core.nothing %₁₂ +9 TestMod.f +10 (call core.Typeof %₉) +11 TestMod.Int +12 (call core.svec %₁₀ %₁₁ core.Any) +13 (call core.svec) +14 (call core.svec %₁₂ %₁₃ :($(QuoteNode(:(#= line 1 =#))))) +15 --- method core.nothing %₁₄ 1 (call slot₁/#self# slot₂/_ slot₃/y 2) 2 (return %₁) -14 (call core.Typeof %₁) -15 TestMod.Int -16 (call core.svec %₁₄ %₁₅ core.Any core.Any) -17 (call core.svec) -18 (call core.svec %₁₆ %₁₇ :($(QuoteNode(:(#= line 1 =#))))) -19 --- method core.nothing %₁₈ +16 TestMod.f +17 (call core.Typeof %₁₆) +18 TestMod.Int +19 (call core.svec %₁₇ %₁₈ core.Any core.Any) +20 (call core.svec) +21 (call core.svec %₁₉ %₂₀ :($(QuoteNode(:(#= line 1 =#))))) +22 --- method core.nothing %₂₁ 1 (call core.tuple slot₃/y slot₄/z) 2 (return %₁) -20 (return %₁) +23 TestMod.f +24 (return %₂₃) ######################################## # Default positional args with placeholders @@ -424,24 +471,27 @@ function f(_::Int, x=1) x end #--------------------- -1 (method :f) -2 (call core.Typeof %₁) -3 TestMod.Int -4 (call core.svec %₂ %₃) -5 (call core.svec) -6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 1 =#))))) -7 --- method core.nothing %₆ +1 (method TestMod.f) +2 TestMod.f +3 (call core.Typeof %₂) +4 TestMod.Int +5 (call core.svec %₃ %₄) +6 (call core.svec) +7 (call core.svec %₅ %₆ :($(QuoteNode(:(#= line 1 =#))))) +8 --- method core.nothing %₇ 1 (call slot₁/#self# slot₂/_ 1) 2 (return %₁) -8 (call core.Typeof %₁) -9 TestMod.Int -10 (call core.svec %₈ %₉ core.Any) -11 (call core.svec) -12 (call core.svec %₁₀ %₁₁ :($(QuoteNode(:(#= line 1 =#))))) -13 --- method core.nothing %₁₂ +9 TestMod.f +10 (call core.Typeof %₉) +11 TestMod.Int +12 (call core.svec %₁₀ %₁₁ core.Any) +13 (call core.svec) +14 (call core.svec %₁₂ %₁₃ :($(QuoteNode(:(#= line 1 =#))))) +15 --- method core.nothing %₁₄ 1 slot₃/x 2 (return %₁) -14 (return %₁) +16 TestMod.f +17 (return %₁₆) ######################################## # Positional args with defaults and `where` clauses @@ -449,46 +499,50 @@ function f(x::T, y::S=1, z::U=2) where {T,S<:T,U<:S} (x,y,z) end #--------------------- -1 (= slot₂/T (call core.TypeVar :T)) -2 slot₂/T -3 (= slot₁/S (call core.TypeVar :S %₂)) -4 slot₁/S -5 (= slot₃/U (call core.TypeVar :U %₄)) -6 (method :f) -7 (call core.Typeof %₆) -8 slot₂/T -9 (call core.svec %₇ %₈) -10 slot₂/T -11 (call core.svec %₁₀) -12 (call core.svec %₉ %₁₁ :($(QuoteNode(:(#= line 1 =#))))) -13 --- method core.nothing %₁₂ +1 (method TestMod.f) +2 (= slot₂/T (call core.TypeVar :T)) +3 slot₂/T +4 (= slot₁/S (call core.TypeVar :S %₃)) +5 slot₁/S +6 (= slot₃/U (call core.TypeVar :U %₅)) +7 TestMod.f +8 (call core.Typeof %₇) +9 slot₂/T +10 (call core.svec %₈ %₉) +11 slot₂/T +12 (call core.svec %₁₁) +13 (call core.svec %₁₀ %₁₂ :($(QuoteNode(:(#= line 1 =#))))) +14 --- method core.nothing %₁₃ 1 (call slot₁/#self# slot₂/x 1 2) 2 (return %₁) -14 (call core.Typeof %₆) -15 slot₂/T -16 slot₁/S -17 (call core.svec %₁₄ %₁₅ %₁₆) -18 slot₂/T -19 slot₁/S -20 (call core.svec %₁₈ %₁₉) -21 (call core.svec %₁₇ %₂₀ :($(QuoteNode(:(#= line 1 =#))))) -22 --- method core.nothing %₂₁ +15 TestMod.f +16 (call core.Typeof %₁₅) +17 slot₂/T +18 slot₁/S +19 (call core.svec %₁₆ %₁₇ %₁₈) +20 slot₂/T +21 slot₁/S +22 (call core.svec %₂₀ %₂₁) +23 (call core.svec %₁₉ %₂₂ :($(QuoteNode(:(#= line 1 =#))))) +24 --- method core.nothing %₂₃ 1 (call slot₁/#self# slot₂/x slot₃/y 2) 2 (return %₁) -23 (call core.Typeof %₆) -24 slot₂/T -25 slot₁/S -26 slot₃/U -27 (call core.svec %₂₃ %₂₄ %₂₅ %₂₆) -28 slot₂/T -29 slot₁/S -30 slot₃/U -31 (call core.svec %₂₈ %₂₉ %₃₀) -32 (call core.svec %₂₇ %₃₁ :($(QuoteNode(:(#= line 1 =#))))) -33 --- method core.nothing %₃₂ +25 TestMod.f +26 (call core.Typeof %₂₅) +27 slot₂/T +28 slot₁/S +29 slot₃/U +30 (call core.svec %₂₆ %₂₇ %₂₈ %₂₉) +31 slot₂/T +32 slot₁/S +33 slot₃/U +34 (call core.svec %₃₁ %₃₂ %₃₃) +35 (call core.svec %₃₀ %₃₄ :($(QuoteNode(:(#= line 1 =#))))) +36 --- method core.nothing %₃₅ 1 (call core.tuple slot₂/x slot₃/y slot₄/z) 2 (return %₁) -34 (return %₆) +37 TestMod.f +38 (return %₃₇) ######################################## # Positional args and type parameters with transitive dependencies @@ -498,47 +552,51 @@ function f(x, y::S=[1], z::U=2) where {T, S<:AbstractVector{T}, U} (x, y, z, T, S, U) end #--------------------- -1 (= slot₂/T (call core.TypeVar :T)) -2 TestMod.AbstractVector -3 slot₂/T -4 (call core.apply_type %₂ %₃) -5 (= slot₁/S (call core.TypeVar :S %₄)) -6 (= slot₃/U (call core.TypeVar :U)) -7 (method :f) -8 (call core.Typeof %₇) -9 (call core.svec %₈ core.Any) -10 (call core.svec) -11 (call core.svec %₉ %₁₀ :($(QuoteNode(:(#= line 1 =#))))) -12 --- method core.nothing %₁₁ +1 (method TestMod.f) +2 (= slot₂/T (call core.TypeVar :T)) +3 TestMod.AbstractVector +4 slot₂/T +5 (call core.apply_type %₃ %₄) +6 (= slot₁/S (call core.TypeVar :S %₅)) +7 (= slot₃/U (call core.TypeVar :U)) +8 TestMod.f +9 (call core.Typeof %₈) +10 (call core.svec %₉ core.Any) +11 (call core.svec) +12 (call core.svec %₁₀ %₁₁ :($(QuoteNode(:(#= line 1 =#))))) +13 --- method core.nothing %₁₂ 1 (call top.vect 1) 2 (call slot₁/#self# slot₂/x %₁ 2) 3 (return %₂) -13 (call core.Typeof %₇) -14 slot₁/S -15 (call core.svec %₁₃ core.Any %₁₄) -16 slot₂/T -17 slot₁/S -18 (call core.svec %₁₆ %₁₇) -19 (call core.svec %₁₅ %₁₈ :($(QuoteNode(:(#= line 1 =#))))) -20 --- method core.nothing %₁₉ +14 TestMod.f +15 (call core.Typeof %₁₄) +16 slot₁/S +17 (call core.svec %₁₅ core.Any %₁₆) +18 slot₂/T +19 slot₁/S +20 (call core.svec %₁₈ %₁₉) +21 (call core.svec %₁₇ %₂₀ :($(QuoteNode(:(#= line 1 =#))))) +22 --- method core.nothing %₂₁ 1 (call slot₁/#self# slot₂/x slot₃/y 2) 2 (return %₁) -21 (call core.Typeof %₇) -22 slot₁/S -23 slot₃/U -24 (call core.svec %₂₁ core.Any %₂₂ %₂₃) -25 slot₂/T -26 slot₁/S -27 slot₃/U -28 (call core.svec %₂₅ %₂₆ %₂₇) -29 (call core.svec %₂₄ %₂₈ :($(QuoteNode(:(#= line 1 =#))))) -30 --- method core.nothing %₂₉ +23 TestMod.f +24 (call core.Typeof %₂₃) +25 slot₁/S +26 slot₃/U +27 (call core.svec %₂₄ core.Any %₂₅ %₂₆) +28 slot₂/T +29 slot₁/S +30 slot₃/U +31 (call core.svec %₂₈ %₂₉ %₃₀) +32 (call core.svec %₂₇ %₃₁ :($(QuoteNode(:(#= line 1 =#))))) +33 --- method core.nothing %₃₂ 1 static_parameter₁ 2 static_parameter₂ 3 static_parameter₃ 4 (call core.tuple slot₂/x slot₃/y slot₄/z %₁ %₂ %₃) 5 (return %₄) -31 (return %₇) +34 TestMod.f +35 (return %₃₄) ######################################## # Default positional args are allowed before trailing slurp with no default @@ -546,23 +604,26 @@ function f(x=1, ys...) ys end #--------------------- -1 (method :f) -2 (call core.Typeof %₁) -3 (call core.svec %₂) -4 (call core.svec) -5 (call core.svec %₃ %₄ :($(QuoteNode(:(#= line 1 =#))))) -6 --- method core.nothing %₅ +1 (method TestMod.f) +2 TestMod.f +3 (call core.Typeof %₂) +4 (call core.svec %₃) +5 (call core.svec) +6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 1 =#))))) +7 --- method core.nothing %₆ 1 (call slot₁/#self# 1) 2 (return %₁) -7 (call core.Typeof %₁) -8 (call core.apply_type core.Vararg core.Any) -9 (call core.svec %₇ core.Any %₈) -10 (call core.svec) -11 (call core.svec %₉ %₁₀ :($(QuoteNode(:(#= line 1 =#))))) -12 --- method core.nothing %₁₁ +8 TestMod.f +9 (call core.Typeof %₈) +10 (call core.apply_type core.Vararg core.Any) +11 (call core.svec %₉ core.Any %₁₀) +12 (call core.svec) +13 (call core.svec %₁₁ %₁₂ :($(QuoteNode(:(#= line 1 =#))))) +14 --- method core.nothing %₁₃ 1 slot₃/ys 2 (return %₁) -13 (return %₁) +15 TestMod.f +16 (return %₁₅) ######################################## # Error: Default positional args after a slurp @@ -582,23 +643,26 @@ function f(xs...=1) xs end #--------------------- -1 (method :f) -2 (call core.Typeof %₁) -3 (call core.svec %₂) -4 (call core.svec) -5 (call core.svec %₃ %₄ :($(QuoteNode(:(#= line 1 =#))))) -6 --- method core.nothing %₅ +1 (method TestMod.f) +2 TestMod.f +3 (call core.Typeof %₂) +4 (call core.svec %₃) +5 (call core.svec) +6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 1 =#))))) +7 --- method core.nothing %₆ 1 (call slot₁/#self# 1) 2 (return %₁) -7 (call core.Typeof %₁) -8 (call core.apply_type core.Vararg core.Any) -9 (call core.svec %₇ %₈) -10 (call core.svec) -11 (call core.svec %₉ %₁₀ :($(QuoteNode(:(#= line 1 =#))))) -12 --- method core.nothing %₁₁ +8 TestMod.f +9 (call core.Typeof %₈) +10 (call core.apply_type core.Vararg core.Any) +11 (call core.svec %₉ %₁₀) +12 (call core.svec) +13 (call core.svec %₁₁ %₁₂ :($(QuoteNode(:(#= line 1 =#))))) +14 --- method core.nothing %₁₃ 1 slot₂/xs 2 (return %₁) -13 (return %₁) +15 TestMod.f +16 (return %₁₅) ######################################## # Positional arg with slurp and splatted default value @@ -606,36 +670,40 @@ function f(xs...=(1,2)...) xs end #--------------------- -1 (method :f) -2 (call core.Typeof %₁) -3 (call core.svec %₂) -4 (call core.svec) -5 (call core.svec %₃ %₄ :($(QuoteNode(:(#= line 1 =#))))) -6 --- method core.nothing %₅ +1 (method TestMod.f) +2 TestMod.f +3 (call core.Typeof %₂) +4 (call core.svec %₃) +5 (call core.svec) +6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 1 =#))))) +7 --- method core.nothing %₆ 1 (call core.tuple 1 2) 2 (call core._apply_iterate top.iterate slot₁/#self# %₁) 3 (return %₂) -7 (call core.Typeof %₁) -8 (call core.apply_type core.Vararg core.Any) -9 (call core.svec %₇ %₈) -10 (call core.svec) -11 (call core.svec %₉ %₁₀ :($(QuoteNode(:(#= line 1 =#))))) -12 --- method core.nothing %₁₁ +8 TestMod.f +9 (call core.Typeof %₈) +10 (call core.apply_type core.Vararg core.Any) +11 (call core.svec %₉ %₁₀) +12 (call core.svec) +13 (call core.svec %₁₁ %₁₂ :($(QuoteNode(:(#= line 1 =#))))) +14 --- method core.nothing %₁₃ 1 slot₂/xs 2 (return %₁) -13 (return %₁) +15 TestMod.f +16 (return %₁₅) ######################################## # Trivial function argument destructuring function f(x, (y,z), w) end #--------------------- -1 (method :f) -2 (call core.Typeof %₁) -3 (call core.svec %₂ core.Any core.Any core.Any) -4 (call core.svec) -5 (call core.svec %₃ %₄ :($(QuoteNode(:(#= line 1 =#))))) -6 --- method core.nothing %₅ +1 (method TestMod.f) +2 TestMod.f +3 (call core.Typeof %₂) +4 (call core.svec %₃ core.Any core.Any core.Any) +5 (call core.svec) +6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 1 =#))))) +7 --- method core.nothing %₆ 1 (call top.indexed_iterate slot₃/destructured_arg_2 1) 2 (= slot₆/y (call core.getfield %₁ 1)) 3 (= slot₅/iterstate (call core.getfield %₁ 2)) @@ -643,65 +711,73 @@ end 5 (call top.indexed_iterate slot₃/destructured_arg_2 2 %₄) 6 (= slot₇/z (call core.getfield %₅ 1)) 7 (return core.nothing) -7 (return %₁) +8 TestMod.f +9 (return %₈) ######################################## # Function argument destructuring combined with splats, types and and defaults function f((x,)::T...=rhs) end #--------------------- -1 (method :f) -2 (call core.Typeof %₁) -3 (call core.svec %₂) -4 (call core.svec) -5 (call core.svec %₃ %₄ :($(QuoteNode(:(#= line 1 =#))))) -6 --- method core.nothing %₅ +1 (method TestMod.f) +2 TestMod.f +3 (call core.Typeof %₂) +4 (call core.svec %₃) +5 (call core.svec) +6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 1 =#))))) +7 --- method core.nothing %₆ 1 TestMod.rhs 2 (call slot₁/#self# %₁) 3 (return %₂) -7 (call core.Typeof %₁) -8 TestMod.T -9 (call core.apply_type core.Vararg %₈) -10 (call core.svec %₇ %₉) -11 (call core.svec) -12 (call core.svec %₁₀ %₁₁ :($(QuoteNode(:(#= line 1 =#))))) -13 --- method core.nothing %₁₂ +8 TestMod.f +9 (call core.Typeof %₈) +10 TestMod.T +11 (call core.apply_type core.Vararg %₁₀) +12 (call core.svec %₉ %₁₁) +13 (call core.svec) +14 (call core.svec %₁₂ %₁₃ :($(QuoteNode(:(#= line 1 =#))))) +15 --- method core.nothing %₁₄ 1 (call top.indexed_iterate slot₂/destructured_arg_1 1) 2 (= slot₃/x (call core.getfield %₁ 1)) 3 (return core.nothing) -14 (return %₁) +16 TestMod.f +17 (return %₁₆) ######################################## # Duplicate destructured placeholders ok function f((_,), (_,)) end #--------------------- -1 (method :f) -2 (call core.Typeof %₁) -3 (call core.svec %₂ core.Any core.Any) -4 (call core.svec) -5 (call core.svec %₃ %₄ :($(QuoteNode(:(#= line 1 =#))))) -6 --- method core.nothing %₅ +1 (method TestMod.f) +2 TestMod.f +3 (call core.Typeof %₂) +4 (call core.svec %₃ core.Any core.Any) +5 (call core.svec) +6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 1 =#))))) +7 --- method core.nothing %₆ 1 (call top.indexed_iterate slot₂/destructured_arg_1 1) 2 (call core.getfield %₁ 1) 3 (call top.indexed_iterate slot₃/destructured_arg_2 1) 4 (call core.getfield %₃ 1) 5 (return core.nothing) -7 (return %₁) +8 TestMod.f +9 (return %₈) ######################################## # Functions with @nospecialize argument metadata function f(@nospecialize(x)) end #--------------------- -1 (method :f) -2 (call core.Typeof %₁) -3 (call core.svec %₂ core.Any) -4 (call core.svec) -5 (call core.svec %₃ %₄ :($(QuoteNode(:(#= line 1 =#))))) -6 --- method core.nothing %₅ +1 (method TestMod.f) +2 TestMod.f +3 (call core.Typeof %₂) +4 (call core.svec %₃ core.Any) +5 (call core.svec) +6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 1 =#))))) +7 --- method core.nothing %₆ 1 (return core.nothing) -7 (return %₁) +8 TestMod.f +9 (return %₈) ######################################## # Binding docs to functions @@ -711,12 +787,18 @@ some docs function f() end #--------------------- -1 (method :f) -2 (call core.Typeof %₁) -3 (call core.svec %₂) -4 (call core.svec) -5 (call core.svec %₃ %₄ :($(QuoteNode(:(#= line 4 =#))))) -6 --- method core.nothing %₅ +1 (method TestMod.f) +2 TestMod.f +3 (call core.Typeof %₂) +4 (call core.svec %₃) +5 (call core.svec) +6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 4 =#))))) +7 --- method core.nothing %₆ + 1 (return core.nothing) +8 TestMod.f +9 (call JuliaLowering.bind_docs! %₈ "some docs\n" %₆) +10 TestMod.f +11 (return %₁₀) ######################################## # Binding docs to callable type @@ -734,5 +816,5 @@ end 1 (return core.nothing) 6 TestMod.T 7 (call JuliaLowering.bind_docs! %₆ "some docs\n" %₄) -8 (return %₇) +8 (return core.nothing) diff --git a/JuliaLowering/test/macros_ir.jl b/JuliaLowering/test/macros_ir.jl index 0a7cb046e9d68..a994d6d351df2 100644 --- a/JuliaLowering/test/macros_ir.jl +++ b/JuliaLowering/test/macros_ir.jl @@ -6,16 +6,18 @@ macro add_one(ex) end end #--------------------- -1 (method :@add_one) -2 (call core.Typeof %₁) -3 (call core.svec %₂ JuliaLowering.MacroContext core.Any) -4 (call core.svec) -5 (call core.svec %₃ %₄ :($(QuoteNode(:(#= line 1 =#))))) -6 --- method core.nothing %₅ +1 (method TestMod.@add_one) +2 TestMod.@add_one +3 (call core.Typeof %₂) +4 (call core.svec %₃ JuliaLowering.MacroContext core.Any) +5 (call core.svec) +6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 1 =#))))) +7 --- method core.nothing %₆ 1 (call core.tuple slot₃/ex) 2 (call JuliaLowering.interpolate_ast (inert (block (call-i ($ ex) + 1))) %₁) 3 (return %₂) -7 (return %₁) +8 TestMod.@add_one +9 (return %₈) ######################################## # Macro using `__context__` @@ -23,16 +25,18 @@ macro foo(ex) ctx = __context__ end #--------------------- -1 (method :@foo) -2 (call core.Typeof %₁) -3 (call core.svec %₂ JuliaLowering.MacroContext core.Any) -4 (call core.svec) -5 (call core.svec %₃ %₄ :($(QuoteNode(:(#= line 1 =#))))) -6 --- method core.nothing %₅ +1 (method TestMod.@foo) +2 TestMod.@foo +3 (call core.Typeof %₂) +4 (call core.svec %₃ JuliaLowering.MacroContext core.Any) +5 (call core.svec) +6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 1 =#))))) +7 --- method core.nothing %₆ 1 slot₂/__context__ 2 (= slot₄/ctx %₁) 3 (return %₁) -7 (return %₁) +8 TestMod.@foo +9 (return %₈) ######################################## # Error: Macro with kw args diff --git a/JuliaLowering/test/runtests.jl b/JuliaLowering/test/runtests.jl index 138f49ac376c9..71e9d92a1262d 100644 --- a/JuliaLowering/test/runtests.jl +++ b/JuliaLowering/test/runtests.jl @@ -13,6 +13,7 @@ include("utils.jl") include("desugaring.jl") include("exceptions.jl") include("functions.jl") + include("closures.jl") include("import.jl") include("loops.jl") include("macros.jl") diff --git a/JuliaLowering/test/scopes_ir.jl b/JuliaLowering/test/scopes_ir.jl index ee6bad1190670..1f05df5e51ee9 100644 --- a/JuliaLowering/test/scopes_ir.jl +++ b/JuliaLowering/test/scopes_ir.jl @@ -15,23 +15,28 @@ end ######################################## # @islocal with function arguments -let y = 2 +begin + local y = 2 function f(x) @islocal(a), @islocal(x), @islocal(y) end end #--------------------- -1 2 -2 (= slot₁/y %₁) -3 (method :f) -4 (call core.Typeof %₃) -5 (call core.svec %₄ core.Any) -6 (call core.svec) -7 (call core.svec %₅ %₆ :($(QuoteNode(:(#= line 2 =#))))) -8 --- method core.nothing %₇ +1 (= slot₁ (call core.Box)) +2 2 +3 slot₁/y +4 (call core.setfield! %₃ :contents %₂) +5 (method TestMod.f) +6 TestMod.f +7 (call core.Typeof %₆) +8 (call core.svec %₇ core.Any) +9 (call core.svec) +10 (call core.svec %₈ %₉ :($(QuoteNode(:(#= line 3 =#))))) +11 --- method core.nothing %₁₀ 1 (call core.tuple false true true) 2 (return %₁) -9 (return %₃) +12 TestMod.f +13 (return %₁₂) ######################################## # @islocal with global @@ -51,14 +56,15 @@ begin @locals end #--------------------- -1 (global TestMod.x) -2 (call core.apply_type top.Dict core.Symbol core.Any) -3 (call %₂) -4 (isdefined slot₁/y) -5 (gotoifnot %₄ label₈) -6 slot₁/y -7 (call top.setindex! %₃ %₆ :y) -8 (return %₃) +1 (newvar slot₁) +2 (global TestMod.x) +3 (call core.apply_type top.Dict core.Symbol core.Any) +4 (call %₃) +5 (isdefined slot₁/y) +6 (gotoifnot %₅ label₉) +7 slot₁/y +8 (call top.setindex! %₄ %₇ :y) +9 (return %₄) ######################################## # @locals with function args (TODO: static parameters) @@ -66,19 +72,21 @@ function f(z) @locals end #--------------------- -1 (method :f) -2 (call core.Typeof %₁) -3 (call core.svec %₂ core.Any) -4 (call core.svec) -5 (call core.svec %₃ %₄ :($(QuoteNode(:(#= line 1 =#))))) -6 --- method core.nothing %₅ +1 (method TestMod.f) +2 TestMod.f +3 (call core.Typeof %₂) +4 (call core.svec %₃ core.Any) +5 (call core.svec) +6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 1 =#))))) +7 --- method core.nothing %₆ 1 (call core.apply_type top.Dict core.Symbol core.Any) 2 (call %₁) 3 (isdefined slot₂/z) 4 (gotoifnot %₃ label₆) 5 (call top.setindex! %₂ slot₂/z :z) 6 (return %₂) -7 (return %₁) +8 TestMod.f +9 (return %₈) ######################################## # Error: Duplicate function argument names diff --git a/JuliaLowering/test/typedefs.jl b/JuliaLowering/test/typedefs.jl index ebb4e0d81a323..1842e442ad054 100644 --- a/JuliaLowering/test/typedefs.jl +++ b/JuliaLowering/test/typedefs.jl @@ -130,7 +130,7 @@ struct S6 S6_f() = new(42) "some docs" - S6() = S6_f() + S6() = new(42) # FIXME: call S6_f() S6(x) = new(x) end """) === nothing @@ -158,7 +158,7 @@ struct S7{S,T} # Cases where new{...} is called S7() = new{Int,Int}(10.0, "y5") - S7{UInt8}() = S7_f() + S7{UInt8}() = new{UInt8,UInt8}(10.0, "y6") # FIXME: call S7_f() S7_f() = new{UInt8,UInt8}(10.0, "y6") end """) === nothing diff --git a/JuliaLowering/test/typedefs_ir.jl b/JuliaLowering/test/typedefs_ir.jl index 759e36e7a04a3..27c3f80a22d95 100644 --- a/JuliaLowering/test/typedefs_ir.jl +++ b/JuliaLowering/test/typedefs_ir.jl @@ -730,7 +730,7 @@ end struct X x f() = new(1) - X() = f() + #X() = f() # FIXME: this X() captures `f` (in flisp, as a Box :-/ ) X(x) = new(x) X(y,z)::ReallyXIPromise = new(y+z) """ @@ -739,63 +739,69 @@ struct X X(a,b,c) = new(a) end #--------------------- -1 (global TestMod.X) -2 (const TestMod.X) -3 (call core.svec) -4 (call core.svec :x) -5 (call core.svec) -6 (call core._structtype TestMod :X %₃ %₄ %₅ false 1) -7 (= slot₁/X %₆) -8 (call core._setsuper! %₆ core.Any) -9 (isdefined TestMod.X) -10 (gotoifnot %₉ label₂₀) -11 TestMod.X -12 (call core._equiv_typedef %₁₁ %₆) -13 (gotoifnot %₁₂ label₁₇) -14 TestMod.X -15 (= slot₁/X %₁₄) -16 (goto label₁₉) -17 slot₁/X -18 (= TestMod.X %₁₇) -19 (goto label₂₂) -20 slot₁/X -21 (= TestMod.X %₂₀) -22 slot₁/X -23 (call core.svec core.Any) -24 (call core._typebody! %₂₂ %₂₃) -25 (method :f) -26 (call core.Typeof %₂₅) -27 (call core.svec %₂₆) -28 (call core.svec) -29 (call core.svec %₂₇ %₂₈ :($(QuoteNode(:(#= line 3 =#))))) -30 --- method core.nothing %₂₉ +1 --- thunk + 1 (global TestMod.#f##0) + 2 (call core.svec) + 3 (call core.svec) + 4 (call core.svec) + 5 (call core._structtype TestMod :#f##0 %₂ %₃ %₄ false 0) + 6 (call core._setsuper! %₅ core.Function) + 7 (const TestMod.#f##0) + 8 (= TestMod.#f##0 %₅) + 9 (call core.svec) + 10 (call core._typebody! %₅ %₉) + 11 (return core.nothing) +2 TestMod.#f##0 +3 (call core.svec %₂) +4 (call core.svec) +5 (call core.svec %₃ %₄ :($(QuoteNode(:(#= line 3 =#))))) +6 --- method core.nothing %₅ 1 TestMod.X 2 (new %₁ 1) 3 (return %₂) -31 TestMod.X -32 (call core.apply_type core.Type %₃₁) -33 (call core.svec %₃₂) -34 (call core.svec) -35 (call core.svec %₃₃ %₃₄ :($(QuoteNode(:(#= line 4 =#))))) -36 --- method core.nothing %₃₅ - 1 TestMod.f - 2 (call %₁) - 3 (return %₂) -37 TestMod.X -38 (call core.apply_type core.Type %₃₇) -39 (call core.svec %₃₈ core.Any) -40 (call core.svec) -41 (call core.svec %₃₉ %₄₀ :($(QuoteNode(:(#= line 5 =#))))) -42 --- method core.nothing %₄₁ +7 (newvar slot₂) +8 (global TestMod.X) +9 (const TestMod.X) +10 (call core.svec) +11 (call core.svec :x) +12 (call core.svec) +13 (call core._structtype TestMod :X %₁₀ %₁₁ %₁₂ false 1) +14 (= slot₁/X %₁₃) +15 (call core._setsuper! %₁₃ core.Any) +16 (isdefined TestMod.X) +17 (gotoifnot %₁₆ label₂₇) +18 TestMod.X +19 (call core._equiv_typedef %₁₈ %₁₃) +20 (gotoifnot %₁₉ label₂₄) +21 TestMod.X +22 (= slot₁/X %₂₁) +23 (goto label₂₆) +24 slot₁/X +25 (= TestMod.X %₂₄) +26 (goto label₂₉) +27 slot₁/X +28 (= TestMod.X %₂₇) +29 slot₁/X +30 (call core.svec core.Any) +31 (call core._typebody! %₂₉ %₃₀) +32 TestMod.#f##0 +33 (= slot₂/f (new %₃₂)) +34 slot₂/f +35 TestMod.X +36 (call core.apply_type core.Type %₃₅) +37 (call core.svec %₃₆ core.Any) +38 (call core.svec) +39 (call core.svec %₃₇ %₃₈ :($(QuoteNode(:(#= line 5 =#))))) +40 --- method core.nothing %₃₉ 1 slot₁/#ctor-self# 2 (new %₁ slot₂/x) 3 (return %₂) -43 TestMod.X -44 (call core.apply_type core.Type %₄₃) -45 (call core.svec %₄₄ core.Any core.Any) -46 (call core.svec) -47 (call core.svec %₄₅ %₄₆ :($(QuoteNode(:(#= line 6 =#))))) -48 --- method core.nothing %₄₇ +41 TestMod.X +42 (call core.apply_type core.Type %₄₁) +43 (call core.svec %₄₂ core.Any core.Any) +44 (call core.svec) +45 (call core.svec %₄₃ %₄₄ :($(QuoteNode(:(#= line 6 =#))))) +46 --- method core.nothing %₄₅ 1 TestMod.ReallyXIPromise 2 slot₁/#ctor-self# 3 TestMod.+ @@ -810,16 +816,18 @@ end 12 (= slot₄/tmp (call core.typeassert %₁₁ %₁)) 13 slot₄/tmp 14 (return %₁₃) -49 TestMod.X -50 (call core.apply_type core.Type %₄₉) -51 (call core.svec %₅₀ core.Any core.Any core.Any) -52 (call core.svec) -53 (call core.svec %₅₁ %₅₂ :($(QuoteNode(:(#= line 10 =#))))) -54 --- method core.nothing %₅₃ +47 TestMod.X +48 (call core.apply_type core.Type %₄₇) +49 (call core.svec %₄₈ core.Any core.Any core.Any) +50 (call core.svec) +51 (call core.svec %₄₉ %₅₀ :($(QuoteNode(:(#= line 10 =#))))) +52 --- method core.nothing %₅₁ 1 slot₁/#ctor-self# 2 (new %₁ slot₂/a) 3 (return %₂) -55 (call JuliaLowering.bind_docs! %₅₀ "Docs for X constructor\n" %₅₃) +53 TestMod.X +54 (call core.apply_type core.Type %₅₃) +55 (call JuliaLowering.bind_docs! %₅₄ "Docs for X constructor\n" %₅₁) 56 (return core.nothing) ######################################## @@ -831,85 +839,100 @@ struct X{S,T} f() = new{A,B}(1) end #--------------------- -1 (global TestMod.X) -2 (const TestMod.X) -3 (= slot₂/S (call core.TypeVar :S)) -4 (= slot₃/T (call core.TypeVar :T)) -5 slot₂/S -6 slot₃/T -7 (call core.svec %₅ %₆) -8 (call core.svec :x) -9 (call core.svec) -10 (call core._structtype TestMod :X %₇ %₈ %₉ false 1) -11 (= slot₄/X %₁₀) -12 (call core._setsuper! %₁₀ core.Any) -13 (isdefined TestMod.X) -14 (gotoifnot %₁₃ label₃₄) -15 TestMod.X -16 (call core._equiv_typedef %₁₅ %₁₀) -17 (gotoifnot %₁₆ label₃₁) -18 TestMod.X -19 (= slot₄/X %₁₈) -20 TestMod.X -21 (call top.getproperty %₂₀ :body) -22 (call top.getproperty %₂₁ :body) -23 (call top.getproperty %₂₂ :parameters) -24 (call top.indexed_iterate %₂₃ 1) -25 (= slot₂/S (call core.getfield %₂₄ 1)) -26 (= slot₁/iterstate (call core.getfield %₂₄ 2)) -27 slot₁/iterstate -28 (call top.indexed_iterate %₂₃ 2 %₂₇) -29 (= slot₃/T (call core.getfield %₂₈ 1)) -30 (goto label₃₃) -31 slot₄/X -32 (= TestMod.X %₃₁) -33 (goto label₃₆) -34 slot₄/X -35 (= TestMod.X %₃₄) -36 slot₄/X -37 (call core.svec core.Any) -38 (call core._typebody! %₃₆ %₃₇) -39 TestMod.X -40 TestMod.A -41 TestMod.B -42 (call core.apply_type %₃₉ %₄₀ %₄₁) -43 (call core.apply_type core.Type %₄₂) -44 (call core.svec %₄₃) -45 (call core.svec) -46 (call core.svec %₄₄ %₄₅ :($(QuoteNode(:(#= line 3 =#))))) -47 --- method core.nothing %₄₆ - 1 slot₁/#ctor-self# - 2 (new %₁ 1) - 3 (return %₂) -48 (= slot₅/U (call core.TypeVar :U)) -49 (= slot₆/V (call core.TypeVar :V)) -50 TestMod.X -51 slot₅/U -52 slot₆/V -53 (call core.apply_type %₅₀ %₅₁ %₅₂) -54 (call core.apply_type core.Type %₅₃) -55 (call core.svec %₅₄) -56 slot₅/U -57 slot₆/V -58 (call core.svec %₅₆ %₅₇) -59 (call core.svec %₅₅ %₅₈ :($(QuoteNode(:(#= line 4 =#))))) -60 --- method core.nothing %₅₉ - 1 slot₁/#ctor-self# - 2 (new %₁ 1) - 3 (return %₂) -61 (method :f) -62 (call core.Typeof %₆₁) -63 (call core.svec %₆₂) -64 (call core.svec) -65 (call core.svec %₆₃ %₆₄ :($(QuoteNode(:(#= line 5 =#))))) -66 --- method core.nothing %₆₅ +1 --- thunk + 1 (global TestMod.#f##1) + 2 (call core.svec) + 3 (call core.svec) + 4 (call core.svec) + 5 (call core._structtype TestMod :#f##1 %₂ %₃ %₄ false 0) + 6 (call core._setsuper! %₅ core.Function) + 7 (const TestMod.#f##1) + 8 (= TestMod.#f##1 %₅) + 9 (call core.svec) + 10 (call core._typebody! %₅ %₉) + 11 (return core.nothing) +2 TestMod.#f##1 +3 (call core.svec %₂) +4 (call core.svec) +5 (call core.svec %₃ %₄ :($(QuoteNode(:(#= line 5 =#))))) +6 --- method core.nothing %₅ 1 TestMod.X 2 TestMod.A 3 TestMod.B 4 (call core.apply_type %₁ %₂ %₃) 5 (new %₄ 1) 6 (return %₅) -67 (return core.nothing) +7 (newvar slot₅) +8 (global TestMod.X) +9 (const TestMod.X) +10 (= slot₂/S (call core.TypeVar :S)) +11 (= slot₃/T (call core.TypeVar :T)) +12 slot₂/S +13 slot₃/T +14 (call core.svec %₁₂ %₁₃) +15 (call core.svec :x) +16 (call core.svec) +17 (call core._structtype TestMod :X %₁₄ %₁₅ %₁₆ false 1) +18 (= slot₄/X %₁₇) +19 (call core._setsuper! %₁₇ core.Any) +20 (isdefined TestMod.X) +21 (gotoifnot %₂₀ label₄₁) +22 TestMod.X +23 (call core._equiv_typedef %₂₂ %₁₇) +24 (gotoifnot %₂₃ label₃₈) +25 TestMod.X +26 (= slot₄/X %₂₅) +27 TestMod.X +28 (call top.getproperty %₂₇ :body) +29 (call top.getproperty %₂₈ :body) +30 (call top.getproperty %₂₉ :parameters) +31 (call top.indexed_iterate %₃₀ 1) +32 (= slot₂/S (call core.getfield %₃₁ 1)) +33 (= slot₁/iterstate (call core.getfield %₃₁ 2)) +34 slot₁/iterstate +35 (call top.indexed_iterate %₃₀ 2 %₃₄) +36 (= slot₃/T (call core.getfield %₃₅ 1)) +37 (goto label₄₀) +38 slot₄/X +39 (= TestMod.X %₃₈) +40 (goto label₄₃) +41 slot₄/X +42 (= TestMod.X %₄₁) +43 slot₄/X +44 (call core.svec core.Any) +45 (call core._typebody! %₄₃ %₄₄) +46 TestMod.X +47 TestMod.A +48 TestMod.B +49 (call core.apply_type %₄₆ %₄₇ %₄₈) +50 (call core.apply_type core.Type %₄₉) +51 (call core.svec %₅₀) +52 (call core.svec) +53 (call core.svec %₅₁ %₅₂ :($(QuoteNode(:(#= line 3 =#))))) +54 --- method core.nothing %₅₃ + 1 slot₁/#ctor-self# + 2 (new %₁ 1) + 3 (return %₂) +55 (= slot₆/U (call core.TypeVar :U)) +56 (= slot₇/V (call core.TypeVar :V)) +57 TestMod.X +58 slot₆/U +59 slot₇/V +60 (call core.apply_type %₅₇ %₅₈ %₅₉) +61 (call core.apply_type core.Type %₆₀) +62 (call core.svec %₆₁) +63 slot₆/U +64 slot₇/V +65 (call core.svec %₆₃ %₆₄) +66 (call core.svec %₆₂ %₆₅ :($(QuoteNode(:(#= line 4 =#))))) +67 --- method core.nothing %₆₆ + 1 slot₁/#ctor-self# + 2 (new %₁ 1) + 3 (return %₂) +68 TestMod.#f##1 +69 (= slot₅/f (new %₆₈)) +70 slot₅/f +71 (return core.nothing) ######################################## # new() calls with splats; `Any` fields From ec7b47e728107d3bfe45230baec0fe29315ff799 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Mon, 23 Dec 2024 06:50:52 +1000 Subject: [PATCH 0915/1109] Store binding provenance in BindingInfo Here we give every binding a source reference (stored as node_id) so that if we only have the BindingInfo we can still insert it into the AST with sensible proveneance. For example, for `@locals`, we're required to reconstruct a list of bindings without any source construct to hang them off other than the `@locals` invocation. With this change we can include the original location where the binding was defined. This also fixes some pretty misleading printing of the `#self#` binding for closures. Additional cleanup: * Regroup all binding-related stuff into its own source file * Remove alias bindings. These seemed like a good idea at the time but so far, there's other ways to achieve the same thing. * Rename new_mutable_var to new_local_binding --- JuliaLowering/src/JuliaLowering.jl | 1 + JuliaLowering/src/ast.jl | 138 ++---------------------- JuliaLowering/src/bindings.jl | 136 +++++++++++++++++++++++ JuliaLowering/src/closure_conversion.jl | 8 +- JuliaLowering/src/desugaring.jl | 16 +-- JuliaLowering/src/kinds.jl | 2 - JuliaLowering/src/linear_ir.jl | 8 +- JuliaLowering/src/scope_analysis.jl | 77 +++++-------- JuliaLowering/test/assignments_ir.jl | 2 +- JuliaLowering/test/branching_ir.jl | 32 +++--- JuliaLowering/test/closures_ir.jl | 16 +-- JuliaLowering/test/decls_ir.jl | 2 +- JuliaLowering/test/demo.jl | 36 ++----- JuliaLowering/test/destructuring_ir.jl | 2 +- JuliaLowering/test/exceptions_ir.jl | 4 +- JuliaLowering/test/scopes_ir.jl | 4 +- JuliaLowering/test/typedefs_ir.jl | 4 +- 17 files changed, 233 insertions(+), 255 deletions(-) create mode 100644 JuliaLowering/src/bindings.jl diff --git a/JuliaLowering/src/JuliaLowering.jl b/JuliaLowering/src/JuliaLowering.jl index 0b64588edaa88..3f42bf5b0c737 100644 --- a/JuliaLowering/src/JuliaLowering.jl +++ b/JuliaLowering/src/JuliaLowering.jl @@ -20,6 +20,7 @@ _register_kinds() _include("syntax_graph.jl") _include("ast.jl") +_include("bindings.jl") _include("utils.jl") _include("macro_expansion.jl") diff --git a/JuliaLowering/src/ast.jl b/JuliaLowering/src/ast.jl index f98b7b3b9250c..8843a654864a4 100644 --- a/JuliaLowering/src/ast.jl +++ b/JuliaLowering/src/ast.jl @@ -48,109 +48,24 @@ end #------------------------------------------------------------------------------- abstract type AbstractLoweringContext end +function add_lambda_local!(ctx::AbstractLoweringContext, id) + # empty - early passes don't need to record lambda locals +end + +function syntax_graph(ctx::AbstractLoweringContext) + ctx.graph +end + """ Unique symbolic identity for a variable, constant, label, or other entity """ const IdTag = Int """ -Metadata about a binding -""" -struct BindingInfo - name::String - kind::Symbol # :local :global :argument :static_parameter - mod::Union{Nothing,Module} # Set when `kind === :global` - type::Union{Nothing,SyntaxTree} # Type, for bindings declared like x::T = 10 - is_const::Bool # Constant, cannot be reassigned - is_ssa::Bool # Single assignment, defined before use - is_captured::Bool # Variable is captured by some lambda - is_always_defined::Bool # A local that we know has an assignment that dominates all usages (is never undef) - is_internal::Bool # True for internal bindings generated by the compiler - is_ambiguous_local::Bool # Local, but would be global in soft scope (ie, the REPL) - is_nospecialize::Bool # @nospecialize on this argument (only valid for kind == :argument) -end - -function BindingInfo(name::AbstractString, kind::Symbol; - mod::Union{Nothing,Module} = nothing, - type::Union{Nothing,SyntaxTree} = nothing, - is_const::Bool = false, - is_ssa::Bool = false, - is_captured::Bool = false, - is_always_defined::Bool = is_ssa, - is_internal::Bool = false, - is_ambiguous_local::Bool = false, - is_nospecialize::Bool = false) - BindingInfo(name, kind, mod, type, is_const, is_ssa, is_captured, is_always_defined, - is_internal, is_ambiguous_local, is_nospecialize) -end - -""" -Metadata about "entities" (variables, constants, etc) in the program. Each -entity is associated to a unique integer id, the BindingId. A binding will be -inferred for each *name* in the user's source program by symbolic analysis of -the source. - -However, bindings can also be introduced programmatically during lowering or -macro expansion: the primary key for bindings is the `BindingId` integer, not -a name. +Id for scope layers in macro expansion """ -struct Bindings - info::Vector{BindingInfo} -end - -Bindings() = Bindings(Vector{BindingInfo}()) - -function new_binding(bindings::Bindings, info::BindingInfo) - push!(bindings.info, info) - return length(bindings.info) -end - -function _binding_id(id::Integer) - id -end - -function _binding_id(ex::SyntaxTree) - @chk kind(ex) == K"BindingId" - ex.var_id -end - -function update_binding!(bindings::Bindings, x; - type=nothing, is_const=nothing, is_always_defined=nothing, is_captured=nothing) - id = _binding_id(x) - b = lookup_binding(bindings, id) - bindings.info[id] = BindingInfo( - b.name, - b.kind, - b.mod, - isnothing(type) ? b.type : type, - isnothing(is_const) ? b.is_const : is_const, - b.is_ssa, - isnothing(is_captured) ? b.is_captured : is_captured, - isnothing(is_always_defined) ? b.is_always_defined : is_always_defined, - b.is_internal, - b.is_ambiguous_local, - b.is_nospecialize - ) -end - -function lookup_binding(bindings::Bindings, x) - bindings.info[_binding_id(x)] -end - -function lookup_binding(ctx::AbstractLoweringContext, x) - lookup_binding(ctx.bindings, x) -end - -function update_binding!(ctx::AbstractLoweringContext, x; kws...) - update_binding!(ctx.bindings, x; kws...) -end - const LayerId = Int -function syntax_graph(ctx::AbstractLoweringContext) - ctx.graph -end - #------------------------------------------------------------------------------- # AST creation utilities _node_id(graph::SyntaxGraph, ex::SyntaxTree) = (check_compatible_graph(graph, ex); ex._id) @@ -258,41 +173,6 @@ nothing_(ctx, ex) = core_ref(ctx, ex, "nothing") top_ref(ctx, ex, name) = makeleaf(ctx, ex, K"top", name) -# Create a new SSA binding -function ssavar(ctx::AbstractLoweringContext, srcref, name="tmp") - # TODO: Store this name in only one place? Probably use the provenance chain? - id = new_binding(ctx.bindings, BindingInfo(name, :local; is_ssa=true, is_internal=true)) - # Create an identifier - nameref = makeleaf(ctx, srcref, K"Identifier", name_val=name) - makeleaf(ctx, nameref, K"BindingId", var_id=id) -end - -function add_lambda_local!(ctx::AbstractLoweringContext, id) - # empty - early passes don't need to record lambda locals -end - -# Create a new local mutable variable or lambda argument -# (TODO: rename this?) -function new_mutable_var(ctx::AbstractLoweringContext, srcref, name; kind=:local, kws...) - @assert kind === :local || kind === :argument - id = new_binding(ctx.bindings, BindingInfo(name, kind; is_internal=true, kws...)) - nameref = makeleaf(ctx, srcref, K"Identifier", name_val=name) - var = makeleaf(ctx, nameref, K"BindingId", var_id=id) - add_lambda_local!(ctx, id) - var -end - -function new_global_binding(ctx::AbstractLoweringContext, srcref, name, mod; kws...) - id = new_binding(ctx.bindings, BindingInfo(name, :global; is_internal=true, mod=mod, kws...)) - nameref = makeleaf(ctx, srcref, K"Identifier", name_val=name) - makeleaf(ctx, nameref, K"BindingId", var_id=id) -end - -function alias_binding(ctx::AbstractLoweringContext, srcref) - id = new_binding(ctx.bindings, BindingInfo("alias", :alias; is_internal=true)) - makeleaf(ctx, srcref, K"BindingId", var_id=id) -end - # Assign `ex` to an SSA variable. # Return (variable, assignment_node) function assign_tmp(ctx::AbstractLoweringContext, ex, name="tmp") diff --git a/JuliaLowering/src/bindings.jl b/JuliaLowering/src/bindings.jl new file mode 100644 index 0000000000000..5a18956984e7c --- /dev/null +++ b/JuliaLowering/src/bindings.jl @@ -0,0 +1,136 @@ +""" +Metadata about a binding +""" +struct BindingInfo + id::IdTag # Unique integer identifying this binding + name::String + kind::Symbol # :local :global :argument :static_parameter + node_id::Int # ID of associated K"BindingId" node in the syntax graph + mod::Union{Nothing,Module} # Set when `kind === :global` + type::Union{Nothing,SyntaxTree} # Type, for bindings declared like x::T = 10 + is_const::Bool # Constant, cannot be reassigned + is_ssa::Bool # Single assignment, defined before use + is_captured::Bool # Variable is captured by some lambda + is_always_defined::Bool # A local that we know has an assignment that dominates all usages (is never undef) + is_internal::Bool # True for internal bindings generated by the compiler + is_ambiguous_local::Bool # Local, but would be global in soft scope (ie, the REPL) + is_nospecialize::Bool # @nospecialize on this argument (only valid for kind == :argument) +end + +function BindingInfo(id::IdTag, name::AbstractString, kind::Symbol, node_id::Integer; + mod::Union{Nothing,Module} = nothing, + type::Union{Nothing,SyntaxTree} = nothing, + is_const::Bool = false, + is_ssa::Bool = false, + is_captured::Bool = false, + is_always_defined::Bool = is_ssa, + is_internal::Bool = false, + is_ambiguous_local::Bool = false, + is_nospecialize::Bool = false) + BindingInfo(id, name, kind, node_id, mod, type, is_const, is_ssa, is_captured, is_always_defined, + is_internal, is_ambiguous_local, is_nospecialize) +end + +""" +Metadata about "entities" (variables, constants, etc) in the program. Each +entity is associated to a unique integer id, the BindingId. A binding will be +inferred for each *name* in the user's source program by symbolic analysis of +the source. + +However, bindings can also be introduced programmatically during lowering or +macro expansion: the primary key for bindings is the `BindingId` integer, not +a name. +""" +struct Bindings + info::Vector{BindingInfo} +end + +Bindings() = Bindings(Vector{BindingInfo}()) + +next_binding_id(bindings::Bindings) = length(bindings.info) + 1 + +function add_binding(bindings::Bindings, binding) + if next_binding_id(bindings) != binding.id + error("Use next_binding_id() to create a valid binding id") + end + push!(bindings.info, binding) +end + +function _binding_id(id::Integer) + id +end + +function _binding_id(ex::SyntaxTree) + @chk kind(ex) == K"BindingId" + ex.var_id +end + +function update_binding!(bindings::Bindings, x; + type=nothing, is_const=nothing, is_always_defined=nothing, is_captured=nothing) + id = _binding_id(x) + b = lookup_binding(bindings, id) + bindings.info[id] = BindingInfo( + b.id, + b.name, + b.kind, + b.node_id, + b.mod, + isnothing(type) ? b.type : type, + isnothing(is_const) ? b.is_const : is_const, + b.is_ssa, + isnothing(is_captured) ? b.is_captured : is_captured, + isnothing(is_always_defined) ? b.is_always_defined : is_always_defined, + b.is_internal, + b.is_ambiguous_local, + b.is_nospecialize + ) +end + +function lookup_binding(bindings::Bindings, x) + bindings.info[_binding_id(x)] +end + +function lookup_binding(ctx::AbstractLoweringContext, x) + lookup_binding(ctx.bindings, x) +end + +function update_binding!(ctx::AbstractLoweringContext, x; kws...) + update_binding!(ctx.bindings, x; kws...) +end + +function new_binding(ctx::AbstractLoweringContext, srcref::SyntaxTree, + name::AbstractString, kind::Symbol; kws...) + binding_id = next_binding_id(ctx.bindings) + ex = @ast ctx srcref binding_id::K"BindingId" + add_binding(ctx.bindings, BindingInfo(binding_id, name, kind, ex._id; kws...)) + ex +end + +# Create a new SSA binding +function ssavar(ctx::AbstractLoweringContext, srcref, name="tmp") + nameref = makeleaf(ctx, srcref, K"Identifier", name_val=name) + new_binding(ctx, nameref, name, :local; is_ssa=true, is_internal=true) +end + +# Create a new local mutable binding or lambda argument +function new_local_binding(ctx::AbstractLoweringContext, srcref, name; kind=:local, kws...) + @assert kind === :local || kind === :argument + nameref = makeleaf(ctx, srcref, K"Identifier", name_val=name) + ex = new_binding(ctx, nameref, name, kind; is_internal=true, kws...) + add_lambda_local!(ctx, ex.var_id) + ex +end + +function new_global_binding(ctx::AbstractLoweringContext, srcref, name, mod; kws...) + nameref = makeleaf(ctx, srcref, K"Identifier", name_val=name) + new_binding(ctx, nameref, name, :global; is_internal=true, mod=mod, kws...) +end + +function binding_ex(ctx::AbstractLoweringContext, id::IdTag) + # Reconstruct the SyntaxTree for this binding. We keep only the node_id + # here, because that's got a concrete type. Whereas if we stored SyntaxTree + # that would contain the type of the graph used in the pass where the + # bindings were created and we'd need to call reparent(), etc. + SyntaxTree(syntax_graph(ctx), lookup_binding(ctx, id).node_id) +end + diff --git a/JuliaLowering/src/closure_conversion.jl b/JuliaLowering/src/closure_conversion.jl index 81e07d489e909..c723a7226f6cb 100644 --- a/JuliaLowering/src/closure_conversion.jl +++ b/JuliaLowering/src/closure_conversion.jl @@ -39,13 +39,13 @@ function captured_var_access(ctx, ex) "getfield"::K"core" # FIXME: attributing the self binding to srcref=ex gives misleading printing. # We should carry provenance with each binding to fix this. - ctx.lambda_bindings.self::K"BindingId" + binding_ex(ctx, ctx.lambda_bindings.self) field_sym ] end function get_box_contents(ctx::ClosureConversionCtx, var, box_ex) - undef_var = new_mutable_var(ctx, var, lookup_binding(ctx, var.var_id).name) + undef_var = new_local_binding(ctx, var, lookup_binding(ctx, var.var_id).name) @ast ctx var [K"block" box := box_ex # Lower in an UndefVar check to a similarly named variable @@ -85,7 +85,7 @@ function convert_for_type_decl(ctx, srcref, ex, type, do_typeassert) kt = kind(type) @assert (kt == K"Identifier" || kt == K"BindingId" || is_literal(kt)) # Use a slot to permit union-splitting this in inference - tmp = new_mutable_var(ctx, srcref, "tmp", is_always_defined=true) + tmp = new_local_binding(ctx, srcref, "tmp", is_always_defined=true) @ast ctx srcref [K"block" # [K"=" type_ssa renumber_assigned_ssavalues(type)] @@ -353,7 +353,7 @@ function _convert_closures(ctx::ClosureConversionCtx, ex) ctx.closure_infos[func_name_id] = closure_info init_closure_args = SyntaxList(ctx) for id in field_orig_bindings - push!(init_closure_args, @ast ctx ex id::K"BindingId") + push!(init_closure_args, binding_ex(ctx, id)) end @ast ctx ex [K"block" [K"=" func_name diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index ae071d32164b7..41221cdbf26b0 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -227,7 +227,7 @@ end function _destructure(ctx, assignment_srcref, stmts, lhs, rhs) n_lhs = numchildren(lhs) if n_lhs > 0 - iterstate = new_mutable_var(ctx, rhs, "iterstate") + iterstate = new_local_binding(ctx, rhs, "iterstate") end end_stmts = SyntaxList(ctx) @@ -1031,7 +1031,7 @@ function expand_for(ctx, ex) end iter_ex = iterspec[2] - next = new_mutable_var(ctx, iterspec, "next") + next = new_local_binding(ctx, iterspec, "next") state = ssavar(ctx, iterspec, "state") collection = ssavar(ctx, iter_ex, "collection") @@ -1368,7 +1368,7 @@ function optional_positional_defs!(ctx, method_stmts, srcref, callex, # the inner method for dispatch even when unused in the inner method body def_arg_names = map(arg_names) do arg kind(arg) == K"Placeholder" ? - new_mutable_var(ctx, arg, arg.name_val; kind=:argument) : + new_local_binding(ctx, arg, arg.name_val; kind=:argument) : arg end for def_idx = 1:length(arg_defaults) @@ -1504,7 +1504,7 @@ function expand_function_def(ctx, ex, docs, rewrite_call=identity, rewrite_body= end # Add self argument if isnothing(self_name) - self_name = new_mutable_var(ctx, name, "#self#"; kind=:argument) + self_name = new_local_binding(ctx, name, "#self#"; kind=:argument) end # Expand remaining argument names and types @@ -1522,7 +1522,7 @@ function expand_function_def(ctx, ex, docs, rewrite_call=identity, rewrite_body= if kind(aname) == K"tuple" # Argument destructuring is_nospecialize = getmeta(arg, :nospecialize, false) - n = new_mutable_var(ctx, aname, "destructured_arg_$i"; + n = new_local_binding(ctx, aname, "destructured_arg_$i"; kind=:argument, is_nospecialize=is_nospecialize) push!(body_stmts, @ast ctx aname [ K"local"(meta=CompileHints(:is_destructured_arg, true)) @@ -1967,7 +1967,7 @@ function default_inner_constructors(ctx, srcref, global_struct_name, # Definition which takes `Any` for all arguments and uses # `Base.convert()` to convert those to the exact field type. Only # defined if at least one field type is not Any. - ctor_self = new_mutable_var(ctx, srcref, "#ctor-self#"; kind=:argument) + ctor_self = new_local_binding(ctx, srcref, "#ctor-self#"; kind=:argument) @ast ctx srcref [K"function" [K"call" [K"::" @@ -2068,7 +2068,7 @@ function _rewrite_ctor_sig(ctx, callex, struct_name, global_struct_name, struct_ name = callex[1] if is_same_identifier_like(struct_name, name) # X(x,y) ==> (#ctor-self#::Type{X})(x,y) - ctor_self[] = new_mutable_var(ctx, callex, "#ctor-self#"; kind=:argument) + ctor_self[] = new_local_binding(ctx, callex, "#ctor-self#"; kind=:argument) @ast ctx callex [K"call" [K"::" ctor_self[] @@ -2078,7 +2078,7 @@ function _rewrite_ctor_sig(ctx, callex, struct_name, global_struct_name, struct_ ] elseif kind(name) == K"curly" && is_same_identifier_like(struct_name, name[1]) # X{T}(x,y) ==> (#ctor-self#::Type{X{T}})(x,y) - self = new_mutable_var(ctx, callex, "#ctor-self#"; kind=:argument) + self = new_local_binding(ctx, callex, "#ctor-self#"; kind=:argument) if numchildren(name) - 1 == length(struct_typevars) # Self fully parameterized - can be used as the full type to # rewrite new() calls in constructor body. diff --git a/JuliaLowering/src/kinds.jl b/JuliaLowering/src/kinds.jl index 817865e12358e..a2cbeca68d3b0 100644 --- a/JuliaLowering/src/kinds.jl +++ b/JuliaLowering/src/kinds.jl @@ -43,8 +43,6 @@ function _register_kinds() # Semantic assertions used by lowering. The content of an assertion # is not considered to be quoted, so use K"Symbol" etc inside where necessary. "assert" - # Alias a name to a given binding - "alias_binding" # Unique identifying integer for bindings (of variables, constants, etc) "BindingId" # Various heads harvested from flisp lowering. diff --git a/JuliaLowering/src/linear_ir.jl b/JuliaLowering/src/linear_ir.jl index df78dfc6b63d7..0e70a7eb68b91 100644 --- a/JuliaLowering/src/linear_ir.jl +++ b/JuliaLowering/src/linear_ir.jl @@ -278,7 +278,7 @@ function emit_return(ctx, srcref, ex) # TODO: Why does flisp lowering create a mutable variable here even # though we don't mutate it? # tmp = ssavar(ctx, srcref, "returnval_via_finally") # <- can we use this? - tmp = new_mutable_var(ctx, srcref, "returnval_via_finally") + tmp = new_local_binding(ctx, srcref, "returnval_via_finally") emit(ctx, @ast ctx srcref [K"=" tmp ex]) tmp else @@ -447,7 +447,7 @@ function compile_try(ctx::LinearIRContext, ex, needs_value, in_tail_pos) end end_label = !in_tail_pos || !isnothing(finally_block) ? make_label(ctx, ex) : nothing - try_result = needs_value && !in_tail_pos ? new_mutable_var(ctx, ex, "try_result") : nothing + try_result = needs_value && !in_tail_pos ? new_local_binding(ctx, ex, "try_result") : nothing # Exception handler block prefix handler_token = ssavar(ctx, ex, "handler_token") @@ -458,7 +458,7 @@ function compile_try(ctx::LinearIRContext, ex, needs_value, in_tail_pos) if !isnothing(finally_block) # TODO: Trivial finally block optimization from JuliaLang/julia#52593 (or # support a special form for @with)? - finally_handler = FinallyHandler(new_mutable_var(ctx, finally_block, "finally_tag"), + finally_handler = FinallyHandler(new_local_binding(ctx, finally_block, "finally_tag"), JumpTarget(end_label, ctx)) push!(ctx.finally_handlers, finally_handler) emit(ctx, @ast ctx finally_block [K"=" finally_handler.tagvar (-1)::K"Integer"]) @@ -693,7 +693,7 @@ function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) end nothing else - val = needs_value && new_mutable_var(ctx, ex, "if_val") + val = needs_value && new_local_binding(ctx, ex, "if_val") v1 = compile(ctx, ex[2], needs_value, in_tail_pos) if needs_value emit_assignment(ctx, ex, val, v1) diff --git a/JuliaLowering/src/scope_analysis.jl b/JuliaLowering/src/scope_analysis.jl index b6bda4c3f391d..6e1864963e660 100644 --- a/JuliaLowering/src/scope_analysis.jl +++ b/JuliaLowering/src/scope_analysis.jl @@ -27,14 +27,12 @@ end #------------------------------------------------------------------------------- _insert_if_not_present!(dict, key, val) = get!(dict, key, val) -function _find_scope_vars!(ctx, assignments, locals, destructured_args, globals, used_names, used_bindings, alias_bindings, ex) +function _find_scope_vars!(ctx, assignments, locals, destructured_args, globals, used_names, used_bindings, ex) k = kind(ex) if k == K"Identifier" - push!(used_names, NameKey(ex)) + _insert_if_not_present!(used_names, NameKey(ex), ex) elseif k == K"BindingId" push!(used_bindings, ex.var_id) - elseif k == K"alias_binding" - push!(alias_bindings, NameKey(ex[2])=>ex[1].var_id) elseif is_leaf(ex) || is_quoted(k) || k in KSet"scope_block lambda module toplevel" return @@ -51,7 +49,7 @@ function _find_scope_vars!(ctx, assignments, locals, destructured_args, globals, if !(kind(v) in KSet"BindingId globalref Placeholder") _insert_if_not_present!(assignments, NameKey(v), v) end - _find_scope_vars!(ctx, assignments, locals, destructured_args, globals, used_names, used_bindings, alias_bindings, ex[2]) + _find_scope_vars!(ctx, assignments, locals, destructured_args, globals, used_names, used_bindings, ex[2]) elseif k == K"function_decl" v = ex[1] kv = kind(v) @@ -66,7 +64,7 @@ function _find_scope_vars!(ctx, assignments, locals, destructured_args, globals, end else for e in children(ex) - _find_scope_vars!(ctx, assignments, locals, destructured_args, globals, used_names, used_bindings, alias_bindings, e) + _find_scope_vars!(ctx, assignments, locals, destructured_args, globals, used_names, used_bindings, e) end end end @@ -81,21 +79,20 @@ function find_scope_vars(ctx, ex) locals = Dict{NameKey,ExT}() destructured_args = Vector{ExT}() globals = Dict{NameKey,ExT}() - used_names = Set{NameKey}() + used_names = Dict{NameKey,ExT}() used_bindings = Set{IdTag}() - alias_bindings = Vector{Pair{NameKey,IdTag}}() for e in children(ex) - _find_scope_vars!(ctx, assignments, locals, destructured_args, globals, used_names, used_bindings, alias_bindings, e) + _find_scope_vars!(ctx, assignments, locals, destructured_args, globals, used_names, used_bindings, e) end # Sort by key so that id generation is deterministic - assignments = sort(collect(pairs(assignments)), by=first) - locals = sort(collect(pairs(locals)), by=first) - globals = sort(collect(pairs(globals)), by=first) - used_names = sort(collect(used_names)) - used_bindings = sort(collect(used_bindings)) + assignments = sort!(collect(pairs(assignments)), by=first) + locals = sort!(collect(pairs(locals)), by=first) + globals = sort!(collect(pairs(globals)), by=first) + used_names = sort!(collect(pairs(used_names)), by=first) + used_bindings = sort!(collect(used_bindings)) - return assignments, locals, destructured_args, globals, used_names, used_bindings, alias_bindings + return assignments, locals, destructured_args, globals, used_names, used_bindings end # Metadata about how a binding is used within some enclosing lambda @@ -174,8 +171,6 @@ struct ScopeResolutionContext{GraphType} <: AbstractLoweringContext scope_layers::Vector{ScopeLayer} # name=>id mappings for all discovered global vars global_vars::Dict{NameKey,IdTag} - # Map for rewriting binding aliases - alias_map::Dict{IdTag,IdTag} # Stack of name=>id mappings for each scope, innermost scope last. scope_stack::Vector{ScopeInfo} method_def_stack::SyntaxList{GraphType} @@ -194,7 +189,6 @@ function ScopeResolutionContext(ctx) ctx.mod, ctx.scope_layers, Dict{NameKey,IdTag}(), - Dict{IdTag,IdTag}(), Vector{ScopeInfo}(), SyntaxList(graph), Set{NameKey}(), @@ -222,11 +216,12 @@ function var_kind(ctx, varkey::NameKey, exclude_toplevel_globals=false) isnothing(id) ? nothing : lookup_binding(ctx, id).kind end -function init_binding(ctx, varkey::NameKey, kind::Symbol; kws...) +function init_binding(ctx, srcref, varkey::NameKey, kind::Symbol; kws...) id = kind === :global ? get(ctx.global_vars, varkey, nothing) : nothing if isnothing(id) mod = kind === :global ? ctx.scope_layers[varkey.layer].mod : nothing - id = new_binding(ctx.bindings, BindingInfo(varkey.name, kind; mod=mod, kws...)) + ex = new_binding(ctx, srcref, varkey.name, kind; mod=mod, kws...) + id = ex.var_id end if kind === :global ctx.global_vars[varkey] = id @@ -248,7 +243,7 @@ function add_lambda_args(ctx, var_ids, args, args_kind) "static parameter name not distinct from function argument" throw(LoweringError(arg, msg)) end - id = init_binding(ctx, varkey, args_kind; + id = init_binding(ctx, arg, varkey, args_kind; is_nospecialize=getmeta(arg, :nospecialize, false)) var_ids[varkey] = id elseif ka != K"BindingId" && ka != K"Placeholder" @@ -269,7 +264,7 @@ function analyze_scope(ctx, ex, scope_type, is_toplevel_global_scope=false, (!is_outer_lambda_scope && parentscope.in_toplevel_thunk) assignments, locals, destructured_args, globals, - used, used_bindings, alias_bindings = find_scope_vars(ctx, ex) + used_names, used_bindings = find_scope_vars(ctx, ex) # Construct a mapping from identifiers to bindings # @@ -298,7 +293,7 @@ function analyze_scope(ctx, ex, scope_type, is_toplevel_global_scope=false, elseif var_kind(ctx, varkey) === :static_parameter throw(LoweringError(e, "local variable name `$(varkey.name)` conflicts with a static parameter")) else - var_ids[varkey] = init_binding(ctx, varkey, :local) + var_ids[varkey] = init_binding(ctx, e[1], varkey, :local) end end @@ -316,7 +311,7 @@ function analyze_scope(ctx, ex, scope_type, is_toplevel_global_scope=false, elseif var_kind(ctx, varkey) === :static_parameter throw(LoweringError(e, "global variable name `$(varkey.name)` conflicts with a static parameter")) end - var_ids[varkey] = init_binding(ctx, varkey, :global) + var_ids[varkey] = init_binding(ctx, e[1], varkey, :global) end # Compute implicit locals and globals @@ -332,9 +327,9 @@ function analyze_scope(ctx, ex, scope_type, is_toplevel_global_scope=false, var_kind(ctx, varkey, true) if vk === nothing if ctx.scope_layers[varkey.layer].is_macro_expansion - var_ids[varkey] = init_binding(ctx, varkey, :local) + var_ids[varkey] = init_binding(ctx, e, varkey, :local) else - init_binding(ctx, varkey, :global) + init_binding(ctx, e, varkey, :global) push!(ctx.implicit_toplevel_globals, varkey) end end @@ -365,7 +360,7 @@ function analyze_scope(ctx, ex, scope_type, is_toplevel_global_scope=false, # like assignments to locals do inside a function. if is_soft_scope # Soft scope (eg, for loop in REPL) => treat as a global - init_binding(ctx, varkey, :global) + init_binding(ctx, e, varkey, :global) continue else # Ambiguous case (eg, nontrivial scopes in package top level code) @@ -374,7 +369,7 @@ function analyze_scope(ctx, ex, scope_type, is_toplevel_global_scope=false, end end end - var_ids[varkey] = init_binding(ctx, varkey, :local; + var_ids[varkey] = init_binding(ctx, e, varkey, :local; is_ambiguous_local=is_ambiguous_local) end end @@ -421,12 +416,12 @@ function analyze_scope(ctx, ex, scope_type, is_toplevel_global_scope=false, end end - for varkey in used + for (varkey, e) in used_names id = haskey(var_ids, varkey) ? var_ids[varkey] : lookup_var(ctx, varkey) if id === nothing # Identifiers which are used but not defined in some scope are # newly discovered global bindings - init_binding(ctx, varkey, :global) + init_binding(ctx, e, varkey, :global) elseif !in_toplevel_thunk binfo = lookup_binding(ctx, id) if binfo.kind !== :global @@ -457,15 +452,6 @@ function analyze_scope(ctx, ex, scope_type, is_toplevel_global_scope=false, end end - # TODO: Remove alias bindings? Dynamically generated scope layers are - # simpler and probably sufficient? - for (varkey, id) in alias_bindings - @assert !haskey(ctx.alias_map, id) - ctx.alias_map[id] = get(var_ids, varkey) do - lookup_var(ctx, varkey) - end - end - return ScopeInfo(is_toplevel_global_scope, in_toplevel_thunk, is_soft_scope, is_hard_scope, var_ids, lambda_bindings) end @@ -476,7 +462,7 @@ function add_local_decls!(ctx, stmts, srcref, scope) for id in sort!(collect(values(scope.var_ids))) binfo = lookup_binding(ctx, id) if binfo.kind == :local - push!(stmts, @ast ctx srcref [K"local" id::K"BindingId"]) + push!(stmts, @ast ctx srcref [K"local" binding_ex(ctx, id)]) end end end @@ -521,18 +507,11 @@ function _resolve_scopes(ctx, ex::SyntaxTree) if k == K"Identifier" id = lookup_var(ctx, NameKey(ex)) @ast ctx ex id::K"BindingId" - elseif k == K"BindingId" - mapped_id = get(ctx.alias_map, ex.var_id, nothing) - if isnothing(mapped_id) - ex - else - @ast ctx ex mapped_id::K"BindingId" - end elseif is_leaf(ex) || is_quoted(ex) || k == K"toplevel" ex # elseif k == K"global" # ex - elseif k == K"local" || k == K"alias_binding" + elseif k == K"local" makeleaf(ctx, ex, K"TOMBSTONE") elseif k == K"local_def" id = lookup_var(ctx, NameKey(ex[1])) @@ -623,7 +602,7 @@ function _resolve_scopes(ctx, ex::SyntaxTree) if binfo.kind == :global || binfo.is_internal continue end - binding = @ast ctx (@ast ctx ex binfo.name::K"Identifier") id::K"BindingId" + binding = binding_ex(ctx, id) push!(stmts, @ast ctx ex [K"if" [K"isdefined" binding] [K"call" diff --git a/JuliaLowering/test/assignments_ir.jl b/JuliaLowering/test/assignments_ir.jl index e20ea56353f16..79130339ce620 100644 --- a/JuliaLowering/test/assignments_ir.jl +++ b/JuliaLowering/test/assignments_ir.jl @@ -76,7 +76,7 @@ let x end #--------------------- -1 (newvar slot₁) +1 (newvar slot₁/x) 2 TestMod.f 3 (call %₂) 4 (= slot₂/tmp %₃) diff --git a/JuliaLowering/test/branching_ir.jl b/JuliaLowering/test/branching_ir.jl index 48929f3856e77..dd45d37c6f07e 100644 --- a/JuliaLowering/test/branching_ir.jl +++ b/JuliaLowering/test/branching_ir.jl @@ -7,8 +7,8 @@ begin end end #--------------------- -1 (newvar slot₁) -2 (newvar slot₂) +1 (newvar slot₁/a) +2 (newvar slot₂/b) 3 slot₁/a 4 (gotoifnot %₃ label₇) 5 slot₂/b @@ -25,9 +25,9 @@ begin c end #--------------------- -1 (newvar slot₁) -2 (newvar slot₂) -3 (newvar slot₃) +1 (newvar slot₁/a) +2 (newvar slot₂/b) +3 (newvar slot₃/c) 4 slot₁/a 5 (gotoifnot %₄ label₇) 6 slot₂/b @@ -45,9 +45,9 @@ begin end end #--------------------- -1 (newvar slot₁) -2 (newvar slot₂) -3 (newvar slot₃) +1 (newvar slot₁/a) +2 (newvar slot₂/b) +3 (newvar slot₃/c) 4 slot₁/a 5 (gotoifnot %₄ label₈) 6 slot₂/b @@ -67,10 +67,10 @@ begin d end #--------------------- -1 (newvar slot₁) -2 (newvar slot₂) -3 (newvar slot₃) -4 (newvar slot₄) +1 (newvar slot₁/a) +2 (newvar slot₂/b) +3 (newvar slot₃/c) +4 (newvar slot₄/d) 5 slot₁/a 6 (gotoifnot %₅ label₉) 7 slot₂/b @@ -88,10 +88,10 @@ begin end end #--------------------- -1 (newvar slot₁) -2 (newvar slot₂) -3 (newvar slot₃) -4 (newvar slot₄) +1 (newvar slot₁/a) +2 (newvar slot₂/b) +3 (newvar slot₃/c) +4 (newvar slot₄/d) 5 slot₁/a 6 slot₂/b 7 (gotoifnot %₆ label₁₂) diff --git a/JuliaLowering/test/closures_ir.jl b/JuliaLowering/test/closures_ir.jl index 16d61a179a3b6..fe723435d91f5 100644 --- a/JuliaLowering/test/closures_ir.jl +++ b/JuliaLowering/test/closures_ir.jl @@ -7,7 +7,7 @@ let end end #--------------------- -1 (= slot₂ (call core.Box)) +1 (= slot₂/x (call core.Box)) 2 --- thunk 1 (global TestMod.#f##0) 2 (call core.svec) @@ -26,7 +26,7 @@ end 6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 3 =#))))) 7 --- method core.nothing %₆ 1 TestMod.+ - 2 (call core.getfield slot₁/x :x) + 2 (call core.getfield slot₁/#self# :x) 3 (call core.isdefined %₂ :contents) 4 (gotoifnot %₃ label₆) 5 (goto label₈) @@ -39,7 +39,7 @@ end 9 slot₂/x 10 (call core.setfield! %₉ :contents %₈) 11 TestMod.#f##0 -12 slot₂/f +12 slot₂/x 13 (= slot₁/f (new %₁₁ %₁₂)) 14 slot₁/f 15 (return %₁₄) @@ -53,7 +53,7 @@ let end end #--------------------- -1 (= slot₂ (call core.Box)) +1 (= slot₂/x (call core.Box)) 2 --- thunk 1 (global TestMod.#f##1) 2 (call core.svec) @@ -72,14 +72,14 @@ end 6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 3 =#))))) 7 --- method core.nothing %₆ 1 2 - 2 (call core.getfield slot₁/x :x) + 2 (call core.getfield slot₁/#self# :x) 3 (call core.setfield! %₂ :contents %₁) 4 (return %₁) 8 1 9 slot₂/x 10 (call core.setfield! %₉ :contents %₈) 11 TestMod.#f##1 -12 slot₂/f +12 slot₂/x 13 (= slot₁/f (new %₁₁ %₁₂)) 14 slot₁/f 15 (return %₁₄) @@ -113,7 +113,7 @@ end 6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 2 =#))))) 7 --- method core.nothing %₆ 1 10 - 2 (call core.getfield slot₁/x :x) + 2 (call core.getfield slot₁/#self# :x) 3 (call core.setfield! %₂ :contents %₁) 4 (return %₁) 8 TestMod.f @@ -124,7 +124,7 @@ end 13 --- method core.nothing %₁₂ 1 (= slot₂/x (call core.Box slot₂/x)) 2 TestMod.#f#g##0 - 3 (= slot₃/g (new %₂ slot₂/g)) + 3 (= slot₃/g (new %₂ slot₂/x)) 4 slot₃/g 5 slot₃/g 6 (call %₅) diff --git a/JuliaLowering/test/decls_ir.jl b/JuliaLowering/test/decls_ir.jl index 7082d300fd713..5474c03df1441 100644 --- a/JuliaLowering/test/decls_ir.jl +++ b/JuliaLowering/test/decls_ir.jl @@ -2,7 +2,7 @@ # Local declaration with type local x::T = 1 #--------------------- -1 (newvar slot₁) +1 (newvar slot₁/x) 2 1 3 (= slot₂/tmp %₂) 4 slot₂/tmp diff --git a/JuliaLowering/test/demo.jl b/JuliaLowering/test/demo.jl index cc70cb9c8663e..3209f521a3f34 100644 --- a/JuliaLowering/test/demo.jl +++ b/JuliaLowering/test/demo.jl @@ -43,6 +43,10 @@ end #------------------------------------------------------------------------------- # Module containing macros used in the demo. +define_macros = false +if !define_macros + eval(:(module M end)) +else eval(JuliaLowering.@SyntaxTree :(baremodule M using Base @@ -177,9 +181,7 @@ eval(JuliaLowering.@SyntaxTree :(baremodule M end end)) - -# module M -# end +end # #------------------------------------------------------------------------------- # Demos of the prototype @@ -745,24 +747,6 @@ begin end """ -src = """ -x->y -""" - -src = """ -struct X - x - f() = new(1) - X() = f() - X(x) = new(x) - X(y,z)::ReallyXIPromise = new(y+z) - " - Docs for X constructor - " - X(a,b,c) = new(a) -end -""" - # TODO: fix this - it's interpreted in a bizarre way as a kw call. # src = """ # function f(x=y=1) @@ -778,20 +762,20 @@ ex = ensure_attributes(ex, var_id=Int) in_mod = M # in_mod=Main ctx1, ex_macroexpand = JuliaLowering.expand_forms_1(in_mod, ex) -@info "Macro expanded" ex_macroexpand formatsrc(ex_macroexpand, color_by=:scope_layer) +@info "Macro expanded" formatsrc(ex_macroexpand, color_by=:scope_layer) #@info "Macro expanded" formatsrc(ex_macroexpand, color_by=e->JuliaLowering.flattened_provenance(e)[1:end-1]) ctx2, ex_desugar = JuliaLowering.expand_forms_2(ctx1, ex_macroexpand) -@info "Desugared" ex_desugar formatsrc(ex_desugar, color_by=:scope_layer) +@info "Desugared" formatsrc(ex_desugar, color_by=:scope_layer) ctx3, ex_scoped = JuliaLowering.resolve_scopes(ctx2, ex_desugar) -@info "Resolved scopes" ex_scoped formatsrc(ex_scoped, color_by=e->var_kind(ctx2,e)) +@info "Resolved scopes" formatsrc(ex_scoped, color_by=e->var_kind(ctx2,e)) ctx4, ex_converted = JuliaLowering.convert_closures(ctx3, ex_scoped) -@info "Closure converted" ex_converted formatsrc(ex_converted, color_by=:var_id) +@info "Closure converted" formatsrc(ex_converted, color_by=:var_id) ctx5, ex_compiled = JuliaLowering.linearize_ir(ctx4, ex_converted) -@info "Linear IR" ex_compiled formatsrc(ex_compiled, color_by=:var_id) Text(sprint(JuliaLowering.print_ir, ex_compiled)) +@info "Linear IR" formatsrc(ex_compiled, color_by=:var_id) Text(sprint(JuliaLowering.print_ir, ex_compiled)) ex_expr = JuliaLowering.to_lowered_expr(in_mod, ex_compiled) @info "CodeInfo" ex_expr diff --git a/JuliaLowering/test/destructuring_ir.jl b/JuliaLowering/test/destructuring_ir.jl index b73d737e9c68c..51d3ead88251d 100644 --- a/JuliaLowering/test/destructuring_ir.jl +++ b/JuliaLowering/test/destructuring_ir.jl @@ -245,7 +245,7 @@ let (; x::T) = rhs end #--------------------- -1 (newvar slot₁) +1 (newvar slot₁/x) 2 TestMod.rhs 3 (call top.getproperty %₂ :x) 4 (= slot₂/tmp %₃) diff --git a/JuliaLowering/test/exceptions_ir.jl b/JuliaLowering/test/exceptions_ir.jl index faf09371e465a..874f34a57bf99 100644 --- a/JuliaLowering/test/exceptions_ir.jl +++ b/JuliaLowering/test/exceptions_ir.jl @@ -115,7 +115,7 @@ let end end #--------------------- -1 (newvar slot₁) +1 (newvar slot₁/z) 2 (enter label₈) 3 TestMod.a 4 (leave %₂) @@ -184,7 +184,7 @@ let end end #--------------------- -1 (newvar slot₁) +1 (newvar slot₁/z) 2 (enter label₈) 3 (= slot₃/finally_tag -1) 4 TestMod.a diff --git a/JuliaLowering/test/scopes_ir.jl b/JuliaLowering/test/scopes_ir.jl index 1f05df5e51ee9..7d34278e1bb67 100644 --- a/JuliaLowering/test/scopes_ir.jl +++ b/JuliaLowering/test/scopes_ir.jl @@ -22,7 +22,7 @@ begin end end #--------------------- -1 (= slot₁ (call core.Box)) +1 (= slot₁/y (call core.Box)) 2 2 3 slot₁/y 4 (call core.setfield! %₃ :contents %₂) @@ -56,7 +56,7 @@ begin @locals end #--------------------- -1 (newvar slot₁) +1 (newvar slot₁/y) 2 (global TestMod.x) 3 (call core.apply_type top.Dict core.Symbol core.Any) 4 (call %₃) diff --git a/JuliaLowering/test/typedefs_ir.jl b/JuliaLowering/test/typedefs_ir.jl index 27c3f80a22d95..492fa75ea5ab7 100644 --- a/JuliaLowering/test/typedefs_ir.jl +++ b/JuliaLowering/test/typedefs_ir.jl @@ -759,7 +759,7 @@ end 1 TestMod.X 2 (new %₁ 1) 3 (return %₂) -7 (newvar slot₂) +7 (newvar slot₂/f) 8 (global TestMod.X) 9 (const TestMod.X) 10 (call core.svec) @@ -862,7 +862,7 @@ end 4 (call core.apply_type %₁ %₂ %₃) 5 (new %₄ 1) 6 (return %₅) -7 (newvar slot₅) +7 (newvar slot₅/f) 8 (global TestMod.X) 9 (const TestMod.X) 10 (= slot₂/S (call core.TypeVar :S)) From c32f75d1c780e8150d4636efb39b39dfe6730566 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Mon, 23 Dec 2024 07:02:50 +1000 Subject: [PATCH 0916/1109] Fix method overwrite warning in tests --- JuliaLowering/test/closures.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/JuliaLowering/test/closures.jl b/JuliaLowering/test/closures.jl index 81253a185dc06..daf9d7ca13685 100644 --- a/JuliaLowering/test/closures.jl +++ b/JuliaLowering/test/closures.jl @@ -33,10 +33,11 @@ let end """) == 100 +Base.eval(test_mod, :(call_it(f, args...) = f(args...))) + # Anon function syntax @test JuliaLowering.include_string(test_mod, """ begin - call_it(f, arg) = f(arg) local y = 2 call_it(x->x+y, 3) end @@ -45,7 +46,6 @@ end # Anon function syntax with `where` @test JuliaLowering.include_string(test_mod, """ begin - call_it(f, arg) = f(arg) local y = 2 call_it((x::T where {T<:Integer})->x+y, 3) end From 0bfb33f87f4d18dbb15b35a9c0da5ee8d7dc2000 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Fri, 27 Dec 2024 11:21:00 +1000 Subject: [PATCH 0917/1109] Make left hand side of `->` a tuple of arguments (JuliaLang/JuliaSyntax.jl#522) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This makes argument lists on the left hand side of `->` parse as tuples. In particular, this fixes a very inconsistent case where the left hand side previously parsed as a `K"block"`; we now have: * `(a;x=1)->b` ==> `(-> (tuple-p a (parameters (= x 1))) b` rather than `(block a (= x 1))` on the left hand side. In addition, the following forms are treated consistently * `a->b` ==> `(-> (tuple a) b)` * `(a)->b` ==> `(-> (tuple-p a) b)` * `(a,)->b` ==> `(-> (tuple-p-, a) b)` The upside of this is that expression processing of `->` syntax should be much easier. There's one aberrant case involving `where` which is particularly difficult and still not dealt with: `(x where T) -> b` does not parse as `(where (tuple x) T)` on the left hand side. However, `where` precedence involving `::` and `->` is already horribly broken and this syntax will always be awful to write unless we make breaking changes. So I'm too tempted to call this a lost cause for now 😬. Compatibility shims for converting the `SyntaxNode` form back to `Expr` in order to keep `Expr` stable are included. (At some point we should consider fixing this and deleting these shims because the new form is so much more consistent and would be reflected neatly into `Expr` form.) --- JuliaSyntax/docs/src/reference.md | 1 + JuliaSyntax/src/expr.jl | 26 +++++++++++++++++++++++++- JuliaSyntax/src/parser.jl | 20 +++++++++++++++++--- JuliaSyntax/test/expr.jl | 18 ++++++++++++++++++ JuliaSyntax/test/parser.jl | 13 +++++++++++-- 5 files changed, 72 insertions(+), 6 deletions(-) diff --git a/JuliaSyntax/docs/src/reference.md b/JuliaSyntax/docs/src/reference.md index 100110cbfac60..aeb1b44a59a19 100644 --- a/JuliaSyntax/docs/src/reference.md +++ b/JuliaSyntax/docs/src/reference.md @@ -70,6 +70,7 @@ class of tokenization errors and lets the parser deal with them. * Standalone dotted operators are always parsed as `(. op)`. For example `.*(x,y)` is parsed as `(call (. *) x y)` (#240) * The `K"="` kind is used for keyword syntax rather than `kw`, to avoid various inconsistencies and ambiguities (#103) * Unadorned postfix adjoint is parsed as `call` rather than as a syntactic operator for consistency with suffixed versions like `x'ᵀ` (#124) +* The argument list in the left hand side of `->` is always a tuple. For example, `x->y` parses as `(-> (tuple x) y)` rather than `(-> x y)` (#522) ### Improvements to awkward AST forms diff --git a/JuliaSyntax/src/expr.jl b/JuliaSyntax/src/expr.jl index b436e744e0a39..aaa57c72849bf 100644 --- a/JuliaSyntax/src/expr.jl +++ b/JuliaSyntax/src/expr.jl @@ -388,11 +388,35 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads, # Block for conditional's source location args[1] = Expr(:block, loc, args[1]) elseif k == K"->" + a1 = args[1] + if @isexpr(a1, :tuple) + # TODO: This makes the Expr form objectively worse for the sake of + # compatibility. We should consider deleting this special case in + # the future as a minor change. + if length(a1.args) == 1 && + (!has_flags(childheads[1], PARENS_FLAG) || + !has_flags(childheads[1], TRAILING_COMMA_FLAG)) && + !Meta.isexpr(a1.args[1], :parameters) + # `(a) -> c` is parsed without tuple on lhs in Expr form + args[1] = a1.args[1] + elseif length(a1.args) == 2 && (a11 = a1.args[1]; @isexpr(a11, :parameters) && + length(a11.args) <= 1 && !Meta.isexpr(a1.args[2], :(...))) + # `(a; b=1) -> c` parses args as `block` in Expr form :-( + if length(a11.args) == 0 + args[1] = Expr(:block, a1.args[2]) + else + a111 = only(a11.args) + assgn = @isexpr(a111, :kw) ? Expr(:(=), a111.args...) : a111 + argloc = source_location(LineNumberNode, source, last(childranges[1])) + args[1] = Expr(:block, a1.args[2], argloc, assgn) + end + end + end a2 = args[2] + # Add function source location to rhs; add block if necessary if @isexpr(a2, :block) pushfirst!(a2.args, loc) else - # Add block for source locations args[2] = Expr(:block, loc, args[2]) end elseif k == K"function" diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index bb08134d83186..b640b03daa6e2 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -1409,8 +1409,21 @@ function parse_decl_with_initial_ex(ps::ParseState, mark) emit(ps, mark, K"::", INFIX_FLAG) end if peek(ps) == K"->" - # x -> y ==> (-> x y) - # a::b->c ==> (-> (::-i a b) c) + kb = peek_behind(ps).kind + if kb == K"tuple" + # (x,y) -> z + # (x) -> y + # (x; a=1) -> y + elseif kb == K"where" + # `where` and `->` have the "wrong" precedence when writing anon functons. + # So ignore this case to allow use of grouping brackets with `where`. + # This needs to worked around in lowering :-( + # (x where T) -> y ==> (-> (x where T) y) + else + # x -> y ==> (-> (tuple x) y) + # a::b->c ==> (-> (tuple (::-i a b)) c) + emit(ps, mark, K"tuple") + end bump(ps, TRIVIA_FLAG) # -> is unusual: it binds tightly on the left and loosely on the right. parse_eq_star(ps) @@ -3073,7 +3086,8 @@ function parse_paren(ps::ParseState, check_identifiers=true) initial_semi = peek(ps) == K";" opts = parse_brackets(ps, K")") do had_commas, had_splat, num_semis, num_subexprs is_tuple = had_commas || (had_splat && num_semis >= 1) || - (initial_semi && (num_semis == 1 || num_subexprs > 0)) + (initial_semi && (num_semis == 1 || num_subexprs > 0)) || + (peek(ps, 2) == K"->" && peek_behind(ps).kind != K"where") return (needs_parameters=is_tuple, is_tuple=is_tuple, is_block=num_semis > 0) diff --git a/JuliaSyntax/test/expr.jl b/JuliaSyntax/test/expr.jl index 9361937f31460..417ec2fd18745 100644 --- a/JuliaSyntax/test/expr.jl +++ b/JuliaSyntax/test/expr.jl @@ -130,6 +130,10 @@ @testset "->" begin @test parsestmt("a -> b") == Expr(:->, :a, Expr(:block, LineNumberNode(1), :b)) + @test parsestmt("(a,) -> b") == + Expr(:->, Expr(:tuple, :a), Expr(:block, LineNumberNode(1), :b)) + @test parsestmt("(a where T) -> b") == + Expr(:->, Expr(:where, :a, :T), Expr(:block, LineNumberNode(1), :b)) # @test parsestmt("a -> (\nb;c)") == # Expr(:->, :a, Expr(:block, LineNumberNode(1), :b)) @test parsestmt("a -> begin\nb\nc\nend") == @@ -137,6 +141,20 @@ LineNumberNode(1), LineNumberNode(2), :b, LineNumberNode(3), :c)) + @test parsestmt("(a;b=1) -> c") == + Expr(:->, + Expr(:block, :a, LineNumberNode(1), Expr(:(=), :b, 1)), + Expr(:block, LineNumberNode(1), :c)) + @test parsestmt("(a...;b...) -> c") == + Expr(:->, + Expr(:tuple, Expr(:parameters, Expr(:(...), :b)), Expr(:(...), :a)), + Expr(:block, LineNumberNode(1), :c)) + @test parsestmt("(;) -> c") == + Expr(:->, + Expr(:tuple, Expr(:parameters)), + Expr(:block, LineNumberNode(1), :c)) + @test parsestmt("a::T -> b") == + Expr(:->, Expr(:(::), :a, :T), Expr(:block, LineNumberNode(1), :b)) end @testset "elseif" begin diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index e36ba73e8a889..584a8d66efa21 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -288,9 +288,18 @@ tests = [ "begin x end::T" => "(::-i (block x) T)" # parse_decl_with_initial_ex "a::b" => "(::-i a b)" - "a->b" => "(-> a b)" "a::b::c" => "(::-i (::-i a b) c)" - "a::b->c" => "(-> (::-i a b) c)" + "a->b" => "(-> (tuple a) b)" + "(a,b)->c" => "(-> (tuple-p a b) c)" + "(a;b=1)->c" => "(-> (tuple-p a (parameters (= b 1))) c)" + "x::T->c" => "(-> (tuple (::-i x T)) c)" + # `where` combined with `->` still parses strangely. However: + # * It's extra hard to add a tuple around the `x` in this syntax corner case. + # * The user already needs to add additional, ugly, parens to get this + # to parse correctly because the precendence of `where` is + # inconsistent with `::` and `->` in this case. + "(x where T)->c" => "(-> (parens (where x T)) c)" + "((x::T) where T)->c" => "(-> (parens (where (parens (::-i x T)) T)) c)" ], JuliaSyntax.parse_unary_subtype => [ "<: )" => "<:" From a9c0eb38a0783004011f020df3d167bb74d2742c Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sat, 28 Dec 2024 22:22:00 +1000 Subject: [PATCH 0918/1109] Variable analysis cleanup + implement slot flags * Code to set slot flags from analysis of the untyped IR * Print slot flags for each code info so they can be included in the IR test cases. * Move most variable analysis into its own sub-pass. This makes it easier to set is_read and is_called correctly. * maybe-undef and single-assign slot flags are still TODO (is_read also needs some work in the cases where closure conversion adds variable usages) * Also noticed we can desugar `x::T` to a typeassert earlier so that's in here, though it's a bit unrelated oops. --- JuliaLowering/src/ast.jl | 10 +- JuliaLowering/src/bindings.jl | 82 ++++++- JuliaLowering/src/closure_conversion.jl | 27 +-- JuliaLowering/src/desugaring.jl | 6 + JuliaLowering/src/eval.jl | 10 +- JuliaLowering/src/linear_ir.jl | 39 ++-- JuliaLowering/src/scope_analysis.jl | 294 +++++++++++++----------- JuliaLowering/src/syntax_graph.jl | 4 +- JuliaLowering/src/utils.jl | 31 ++- JuliaLowering/test/assignments_ir.jl | 1 + JuliaLowering/test/closures_ir.jl | 7 + JuliaLowering/test/demo.jl | 6 + JuliaLowering/test/functions.jl | 29 ++- JuliaLowering/test/functions_ir.jl | 54 ++++- JuliaLowering/test/macros_ir.jl | 2 + JuliaLowering/test/scopes_ir.jl | 2 + JuliaLowering/test/typedefs_ir.jl | 19 ++ 17 files changed, 430 insertions(+), 193 deletions(-) diff --git a/JuliaLowering/src/ast.jl b/JuliaLowering/src/ast.jl index 8843a654864a4..ece141b4b92bc 100644 --- a/JuliaLowering/src/ast.jl +++ b/JuliaLowering/src/ast.jl @@ -48,9 +48,13 @@ end #------------------------------------------------------------------------------- abstract type AbstractLoweringContext end -function add_lambda_local!(ctx::AbstractLoweringContext, id) - # empty - early passes don't need to record lambda locals -end +""" +Bindings for the current lambda being processed. + +Lowering passes prior to scope resolution return `nothing` and bindings are +collected later. +""" +current_lambda_bindings(ctx::AbstractLoweringContext) = nothing function syntax_graph(ctx::AbstractLoweringContext) ctx.graph diff --git a/JuliaLowering/src/bindings.jl b/JuliaLowering/src/bindings.jl index 5a18956984e7c..e6fda3c23681d 100644 --- a/JuliaLowering/src/bindings.jl +++ b/JuliaLowering/src/bindings.jl @@ -8,6 +8,7 @@ struct BindingInfo node_id::Int # ID of associated K"BindingId" node in the syntax graph mod::Union{Nothing,Module} # Set when `kind === :global` type::Union{Nothing,SyntaxTree} # Type, for bindings declared like x::T = 10 + n_assigned::Int32 # Number of times variable is assigned to is_const::Bool # Constant, cannot be reassigned is_ssa::Bool # Single assignment, defined before use is_captured::Bool # Variable is captured by some lambda @@ -20,6 +21,7 @@ end function BindingInfo(id::IdTag, name::AbstractString, kind::Symbol, node_id::Integer; mod::Union{Nothing,Module} = nothing, type::Union{Nothing,SyntaxTree} = nothing, + n_assigned::Integer = 0, is_const::Bool = false, is_ssa::Bool = false, is_captured::Bool = false, @@ -27,7 +29,8 @@ function BindingInfo(id::IdTag, name::AbstractString, kind::Symbol, node_id::Int is_internal::Bool = false, is_ambiguous_local::Bool = false, is_nospecialize::Bool = false) - BindingInfo(id, name, kind, node_id, mod, type, is_const, is_ssa, is_captured, is_always_defined, + BindingInfo(id, name, kind, node_id, mod, type, n_assigned, is_const, + is_ssa, is_captured, is_always_defined, is_internal, is_ambiguous_local, is_nospecialize) end @@ -66,7 +69,8 @@ function _binding_id(ex::SyntaxTree) end function update_binding!(bindings::Bindings, x; - type=nothing, is_const=nothing, is_always_defined=nothing, is_captured=nothing) + type=nothing, is_const=nothing, add_assigned=0, + is_always_defined=nothing, is_captured=nothing) id = _binding_id(x) b = lookup_binding(bindings, id) bindings.info[id] = BindingInfo( @@ -76,6 +80,7 @@ function update_binding!(bindings::Bindings, x; b.node_id, b.mod, isnothing(type) ? b.type : type, + b.n_assigned + add_assigned, isnothing(is_const) ? b.is_const : is_const, b.is_ssa, isnothing(is_captured) ? b.is_captured : is_captured, @@ -117,7 +122,10 @@ function new_local_binding(ctx::AbstractLoweringContext, srcref, name; kind=:loc @assert kind === :local || kind === :argument nameref = makeleaf(ctx, srcref, K"Identifier", name_val=name) ex = new_binding(ctx, nameref, name, kind; is_internal=true, kws...) - add_lambda_local!(ctx, ex.var_id) + lbindings = current_lambda_bindings(ctx) + if !isnothing(lbindings) + init_lambda_binding(lbindings, ex.var_id) + end ex end @@ -134,3 +142,71 @@ function binding_ex(ctx::AbstractLoweringContext, id::IdTag) SyntaxTree(syntax_graph(ctx), lookup_binding(ctx, id).node_id) end + +#------------------------------------------------------------------------------- +""" +Metadata about how a binding is used within some enclosing lambda +""" +struct LambdaBindingInfo + is_captured::Bool + is_read::Bool + is_assigned::Bool + # Binding was the function name in a call. Used for specialization + # heuristics in the optimizer. + is_called::Bool +end + +LambdaBindingInfo() = LambdaBindingInfo(false, false, false, false) + +function LambdaBindingInfo(parent::LambdaBindingInfo; + is_captured = nothing, + is_read = nothing, + is_assigned = nothing, + is_called = nothing) + LambdaBindingInfo( + isnothing(is_captured) ? parent.is_captured : is_captured, + isnothing(is_read) ? parent.is_read : is_read, + isnothing(is_assigned) ? parent.is_assigned : is_assigned, + isnothing(is_called) ? parent.is_called : is_called, + ) +end + +struct LambdaBindings + # Bindings used within the lambda + self::IdTag + bindings::Dict{IdTag,LambdaBindingInfo} +end + +LambdaBindings(self::IdTag = 0) = LambdaBindings(self, Dict{IdTag,LambdaBindings}()) + +function init_lambda_binding(bindings::LambdaBindings, id; kws...) + @assert !haskey(bindings.bindings, id) + bindings.bindings[id] = LambdaBindingInfo(LambdaBindingInfo(); kws...) +end + +function update_lambda_binding!(bindings::LambdaBindings, x; kws...) + id = _binding_id(x) + binfo = bindings.bindings[id] + bindings.bindings[id] = LambdaBindingInfo(binfo; kws...) +end + +function update_lambda_binding!(ctx::AbstractLoweringContext, x; kws...) + update_lambda_binding!(current_lambda_bindings(ctx), x; kws...) +end + +function lookup_lambda_binding(bindings::LambdaBindings, x) + get(bindings.bindings, _binding_id(x), nothing) +end + +function lookup_lambda_binding(ctx::AbstractLoweringContext, x) + lookup_lambda_binding(current_lambda_bindings(ctx), x) +end + +function has_lambda_binding(bindings::LambdaBindings, x) + haskey(bindings.bindings, _binding_id(x)) +end + +function has_lambda_binding(ctx::AbstractLoweringContext, x) + has_lambda_binding(current_lambda_bindings(ctx), x) +end + diff --git a/JuliaLowering/src/closure_conversion.jl b/JuliaLowering/src/closure_conversion.jl index c723a7226f6cb..e45506aab42aa 100644 --- a/JuliaLowering/src/closure_conversion.jl +++ b/JuliaLowering/src/closure_conversion.jl @@ -27,8 +27,8 @@ function ClosureConversionCtx(graph::GraphType, bindings::Bindings, Dict{IdTag,ClosureInfo{GraphType}}()) end -function add_lambda_local!(ctx::ClosureConversionCtx, id) - init_lambda_binding(ctx.lambda_bindings, id) +function current_lambda_bindings(ctx::ClosureConversionCtx) + ctx.lambda_bindings end # Access captured variable from inside a closure @@ -39,7 +39,7 @@ function captured_var_access(ctx, ex) "getfield"::K"core" # FIXME: attributing the self binding to srcref=ex gives misleading printing. # We should carry provenance with each binding to fix this. - binding_ex(ctx, ctx.lambda_bindings.self) + binding_ex(ctx, current_lambda_bindings(ctx).self) field_sym ] end @@ -162,7 +162,7 @@ function convert_assignment(ctx, ex) convert_global_assignment(ctx, ex, var, rhs0) else @assert binfo.kind == :local || binfo.kind == :argument - lbinfo = get(ctx.lambda_bindings.bindings, var.var_id, nothing) + lbinfo = lookup_lambda_binding(ctx, var) self_captured = !isnothing(lbinfo) && lbinfo.is_captured captured = binfo.is_captured if isnothing(binfo.type) && !self_captured && !captured @@ -287,7 +287,7 @@ function _convert_closures(ctx::ClosureConversionCtx, ex) k = kind(ex) if k == K"BindingId" id = ex.var_id - lbinfo = get(ctx.lambda_bindings.bindings, id, nothing) + lbinfo = lookup_lambda_binding(ctx, id) if !isnothing(lbinfo) && lbinfo.is_captured # TODO: && vinfo:asgn cv ?? get_box_contents(ctx, ex, captured_var_access(ctx, ex)) elseif lookup_binding(ctx, id).is_captured # TODO: && vinfo:asgn vi @@ -326,12 +326,6 @@ function _convert_closures(ctx::ClosureConversionCtx, ex) else makeleaf(ctx, ex, K"TOMBSTONE") end - elseif k == K"::" - _convert_closures(ctx, - @ast ctx ex [K"call" - "typeassert"::K"core" - children(ex)... - ]) elseif k == K"lambda" closure_convert_lambda(ctx, ex) elseif k == K"function_decl" @@ -370,11 +364,14 @@ function _convert_closures(ctx::ClosureConversionCtx, ex) else # Single-arg K"method" has the side effect of creating a global # binding for `func_name` if it doesn't exist. - @ast ctx ex [K"method" func_name] + @ast ctx ex [K"block" + [K"method" func_name] + ::K"TOMBSTONE" + ] end elseif k == K"function_type" func_name = ex[1] - if kind(func_name) == K"BindingId" && lookup_binding(ctx, func_name).kind == :local + if kind(func_name) == K"BindingId" && lookup_binding(ctx, func_name).kind === :local ctx.closure_infos[func_name.var_id].type_name else @ast ctx ex [K"call" "Typeof"::K"core" func_name] @@ -443,7 +440,7 @@ function closure_convert_lambda(ctx, ex) push!(lambda_children, _convert_closures(ctx2, ex[4])) end - makenode(ctx, ex, ex, lambda_children) + makenode(ctx, ex, ex, lambda_children; lambda_bindings=lambda_bindings) end @@ -460,7 +457,7 @@ Invariants: * This pass must not introduce new K"Identifier" - only K"BindingId". * Any new binding IDs must be added to the enclosing lambda locals """ -function convert_closures(ctx::ScopeResolutionContext, ex) +function convert_closures(ctx::VariableAnalysisContext, ex) ctx = ClosureConversionCtx(ctx.graph, ctx.bindings, ctx.mod, ctx.closure_bindings, ex.lambda_bindings) ex1 = closure_convert_lambda(ctx, ex) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 41221cdbf26b0..e8199a17b3427 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -2668,6 +2668,12 @@ function expand_forms_2(ctx::DesugaringContext, ex::SyntaxTree, docs=nothing) else @ast ctx ex [K"if" cond true::K"Bool" cs[end]] end + elseif k == K"::" && numchildren(ex) == 2 + @ast ctx ex [K"call" + "typeassert"::K"core" + expand_forms_2(ctx, ex[1]) + expand_forms_2(ctx, ex[2]) + ] elseif k == K"=" expand_assignment(ctx, ex) elseif k == K"break" diff --git a/JuliaLowering/src/eval.jl b/JuliaLowering/src/eval.jl index d647695f9657c..f14b3ee6bfdb8 100644 --- a/JuliaLowering/src/eval.jl +++ b/JuliaLowering/src/eval.jl @@ -112,6 +112,9 @@ function to_code_info(ex, mod, funcname, slots) slotflags = Vector{UInt8}(undef, length(slots)) for (i, slot) in enumerate(slots) name = slot.name + # TODO: Do we actually want unique names here? The C code in + # `jl_new_code_info_from_ir` has logic to simplify gensym'd names and + # use the empty string for compiler-generated bindings. ni = get(slot_rename_inds, name, 0) slot_rename_inds[name] = ni + 1 if ni > 0 @@ -119,8 +122,13 @@ function to_code_info(ex, mod, funcname, slots) end sname = Symbol(name) slotnames[i] = sname - slotflags[i] = 0x00 # FIXME!! + slotflags[i] = # Inference | Codegen + slot.is_read << 3 | # SLOT_USED | jl_vinfo_sa + slot.is_single_assign << 4 | # SLOT_ASSIGNEDONCE | - + slot.is_maybe_undef << 5 | # SLOT_USEDUNDEF | jl_vinfo_usedundef + slot.is_called << 6 # SLOT_CALLED | - if slot.is_nospecialize + # Ideally this should be a slot flag instead add_ir_debug_info!(current_codelocs_stack, ex) push!(stmts, Expr(:meta, :nospecialize, Core.SlotNumber(i))) end diff --git a/JuliaLowering/src/linear_ir.jl b/JuliaLowering/src/linear_ir.jl index 0e70a7eb68b91..06e6c85244599 100644 --- a/JuliaLowering/src/linear_ir.jl +++ b/JuliaLowering/src/linear_ir.jl @@ -106,6 +106,10 @@ function LinearIRContext(ctx, is_toplevel_thunk, lambda_bindings, return_type) Vector{JumpOrigin{GraphType}}(), ctx.mod) end +function current_lambda_bindings(ctx::LinearIRContext) + ctx.lambda_bindings +end + function is_valid_body_ir_argument(ctx, ex) if is_valid_ir_argument(ctx, ex) true @@ -398,10 +402,6 @@ function compile_conditional(ctx, ex, false_label) end end -function add_lambda_local!(ctx::LinearIRContext, id) - init_lambda_binding(ctx.lambda_bindings, id) -end - # Lowering of exception handling must ensure that # # * Each `enter` is matched with a `leave` on every possible non-exceptional @@ -974,37 +974,46 @@ struct Slot name::String kind::Symbol is_nospecialize::Bool - # <- todo: flags here etc + is_read::Bool + is_single_assign::Bool + is_maybe_undef::Bool + is_called::Bool end function compile_lambda(outer_ctx, ex) lambda_args = ex[1] static_parameters = ex[2] ret_var = numchildren(ex) == 4 ? ex[4] : nothing - # TODO: Add assignments for reassigned arguments to body using lambda_args - ctx = LinearIRContext(outer_ctx, ex.is_toplevel_thunk, ex.lambda_bindings, ret_var) + # TODO: Add assignments for reassigned arguments to body + lambda_bindings = ex.lambda_bindings + ctx = LinearIRContext(outer_ctx, ex.is_toplevel_thunk, lambda_bindings, ret_var) compile_body(ctx, ex[3]) slots = Vector{Slot}() slot_rewrites = Dict{IdTag,Int}() for arg in children(lambda_args) if kind(arg) == K"Placeholder" # Unused functions arguments like: `_` or `::T` - push!(slots, Slot(arg.name_val, :argument, false)) + push!(slots, Slot(arg.name_val, :argument, false, false, false, false, false)) else @assert kind(arg) == K"BindingId" id = arg.var_id - info = lookup_binding(ctx.bindings, id) - @assert info.kind == :local || info.kind == :argument - push!(slots, Slot(info.name, :argument, info.is_nospecialize)) + binfo = lookup_binding(ctx, id) + lbinfo = lookup_lambda_binding(ctx, id) + @assert binfo.kind == :local || binfo.kind == :argument + # FIXME: is_single_assign, is_maybe_undef + push!(slots, Slot(binfo.name, :argument, binfo.is_nospecialize, + lbinfo.is_read, false, false, lbinfo.is_called)) slot_rewrites[id] = length(slots) end end # Sorting the lambda locals is required to remove dependence on Dict iteration order. - for (id, lbinfo) in sort(collect(pairs(ex.lambda_bindings.bindings)), by=first) + for (id, lbinfo) in sort(collect(pairs(lambda_bindings.bindings)), by=first) if !lbinfo.is_captured - info = lookup_binding(ctx.bindings, id) - if info.kind == :local - push!(slots, Slot(info.name, :local, false)) + binfo = lookup_binding(ctx.bindings, id) + if binfo.kind == :local + # FIXME: is_single_assign, is_maybe_undef + push!(slots, Slot(binfo.name, :local, false, + lbinfo.is_read, false, false, lbinfo.is_called)) slot_rewrites[id] = length(slots) end end diff --git a/JuliaLowering/src/scope_analysis.jl b/JuliaLowering/src/scope_analysis.jl index 6e1864963e660..1ab7494855453 100644 --- a/JuliaLowering/src/scope_analysis.jl +++ b/JuliaLowering/src/scope_analysis.jl @@ -95,58 +95,6 @@ function find_scope_vars(ctx, ex) return assignments, locals, destructured_args, globals, used_names, used_bindings end -# Metadata about how a binding is used within some enclosing lambda -struct LambdaBindingInfo - is_captured::Bool - is_read::Bool - is_assigned::Bool - is_called::Bool -end - -LambdaBindingInfo() = LambdaBindingInfo(false, false, false, false) - -function LambdaBindingInfo(parent::LambdaBindingInfo; - is_captured = nothing, - is_read = nothing, - is_assigned = nothing, - is_called = nothing) - LambdaBindingInfo( - isnothing(is_captured) ? parent.is_captured : is_captured, - isnothing(is_read) ? parent.is_read : is_read, - isnothing(is_assigned) ? parent.is_assigned : is_assigned, - isnothing(is_called) ? parent.is_called : is_called, - ) -end - -struct LambdaBindings - # Bindings used within the lambda - self::IdTag - bindings::Dict{IdTag,LambdaBindingInfo} -end - -LambdaBindings(self::IdTag = 0) = LambdaBindings(self, Dict{IdTag,LambdaBindings}()) - -function init_lambda_binding(binds::LambdaBindings, id; kws...) - @assert !haskey(binds.bindings, id) - binds.bindings[id] = LambdaBindingInfo(LambdaBindingInfo(); kws...) -end - -function update_lambda_binding!(binds::LambdaBindings, id; kws...) - binfo = binds.bindings[id] - binds.bindings[id] = LambdaBindingInfo(binfo; kws...) -end - -function update_lambda_binding!(ctx::AbstractLoweringContext, id; kws...) - update_lambda_binding!(last(ctx.scope_stack).lambda_bindings, id; kws...) -end - -struct ClosureBindings - name_stack::Vector{String} # Names of functions the closure is nested within - lambdas::Vector{LambdaBindings} # Bindings for each method of the closure -end - -ClosureBindings(name_stack) = ClosureBindings(name_stack, Vector{LambdaBindings}()) - struct ScopeInfo # True if scope is the global top level scope is_toplevel_global_scope::Bool @@ -173,13 +121,9 @@ struct ScopeResolutionContext{GraphType} <: AbstractLoweringContext global_vars::Dict{NameKey,IdTag} # Stack of name=>id mappings for each scope, innermost scope last. scope_stack::Vector{ScopeInfo} - method_def_stack::SyntaxList{GraphType} # Variables which were implicitly global due to being assigned to in top # level code implicit_toplevel_globals::Set{NameKey} - # Collection of information about each closure, principally which methods - # are part of the closure (and hence captures). - closure_bindings::Dict{IdTag,ClosureBindings} end function ScopeResolutionContext(ctx) @@ -190,9 +134,11 @@ function ScopeResolutionContext(ctx) ctx.scope_layers, Dict{NameKey,IdTag}(), Vector{ScopeInfo}(), - SyntaxList(graph), - Set{NameKey}(), - Dict{IdTag,ClosureBindings}()) + Set{NameKey}()) +end + +function current_lambda_bindings(ctx::ScopeResolutionContext) + last(ctx.scope_stack).lambda_bindings end function lookup_var(ctx, varkey::NameKey, exclude_toplevel_globals=false) @@ -382,6 +328,8 @@ function analyze_scope(ctx, ex, scope_type, is_toplevel_global_scope=false, # enclosing lambda # * All non-globals are recorded (kind :local and :argument will later be turned into slots) # * Captured variables are detected and recorded + # + # TODO: Move most or-all of this to the VariableAnalysis sub-pass lambda_bindings = if is_outer_lambda_scope if isempty(lambda_args) LambdaBindings() @@ -410,8 +358,8 @@ function analyze_scope(ctx, ex, scope_type, is_toplevel_global_scope=false, binfo = lookup_binding(ctx, id) if (binfo.kind === :local && !binfo.is_ssa) || binfo.kind === :argument || binfo.kind === :static_parameter - if !haskey(lambda_bindings.bindings, id) - init_lambda_binding(lambda_bindings, id, is_read=true, is_assigned=true) + if !has_lambda_binding(lambda_bindings, id) + init_lambda_binding(lambda_bindings, id) end end end @@ -425,12 +373,10 @@ function analyze_scope(ctx, ex, scope_type, is_toplevel_global_scope=false, elseif !in_toplevel_thunk binfo = lookup_binding(ctx, id) if binfo.kind !== :global - if !haskey(lambda_bindings.bindings, id) + if !has_lambda_binding(lambda_bindings, id) # Used vars from a scope *outside* the current lambda are captured - init_lambda_binding(lambda_bindings, id, is_captured=true, is_read=true) + init_lambda_binding(lambda_bindings, id, is_captured=true) update_binding!(ctx, id; is_captured=true) - else - update_lambda_binding!(lambda_bindings, id, is_read=true) end end end @@ -441,12 +387,10 @@ function analyze_scope(ctx, ex, scope_type, is_toplevel_global_scope=false, id = haskey(var_ids, varkey) ? var_ids[varkey] : lookup_var(ctx, varkey) binfo = lookup_binding(ctx, id) if binfo.kind !== :global - if !haskey(lambda_bindings.bindings, id) + if !has_lambda_binding(lambda_bindings, id) # Assigned vars from a scope *outside* the current lambda are captured - init_lambda_binding(lambda_bindings, id, is_captured=true, is_assigned=true) + init_lambda_binding(lambda_bindings, id, is_captured=true) update_binding!(ctx, id; is_captured=true) - else - update_lambda_binding!(lambda_bindings, id, is_assigned=true) end end end @@ -467,52 +411,26 @@ function add_local_decls!(ctx, stmts, srcref, scope) end end -# Do some things which are better done after converting to BindingId. -function maybe_update_bindings!(ctx, ex) - k = kind(ex) - if k == K"decl" - @chk numchildren(ex) == 2 - id = ex[1] - if kind(id) != K"Placeholder" - binfo = lookup_binding(ctx, id) - if !isnothing(binfo.type) - throw(LoweringError(ex, "multiple type declarations found for `$(binfo.name)`")) - end - if binfo.kind == :global && !ctx.scope_stack[end].in_toplevel_thunk - throw(LoweringError(ex, "type declarations for global variables must be at top level, not inside a function")) - # set_binding_type! - end - update_binding!(ctx, id; type=ex[2]) - end - elseif k == K"const" - id = ex[1] - if lookup_binding(ctx, id).kind == :local - throw(LoweringError(ex, "unsupported `const` declaration on local variable")) - end - update_binding!(ctx, id; is_const=true) - elseif k == K"call" - name = ex[1] - if kind(name) == K"BindingId" - id = name.var_id - if haskey(last(ctx.scope_stack).lambda_bindings.bindings, id) - update_lambda_binding!(ctx, id, is_called=true) - end - end - end - nothing -end - function _resolve_scopes(ctx, ex::SyntaxTree) k = kind(ex) if k == K"Identifier" - id = lookup_var(ctx, NameKey(ex)) - @ast ctx ex id::K"BindingId" + @ast ctx ex lookup_var(ctx, NameKey(ex))::K"BindingId" elseif is_leaf(ex) || is_quoted(ex) || k == K"toplevel" ex # elseif k == K"global" # ex elseif k == K"local" makeleaf(ctx, ex, K"TOMBSTONE") + elseif k == K"decl" + ex_out = mapchildren(e->_resolve_scopes(ctx, e), ctx, ex) + name = ex_out[1] + if kind(name) != K"Placeholder" + binfo = lookup_binding(ctx, name) + if binfo.kind == :global && !ctx.scope_stack[end].in_toplevel_thunk + throw(LoweringError(ex, "type declarations for global variables must be at top level, not inside a function")) + end + end + ex_out elseif k == K"local_def" id = lookup_var(ctx, NameKey(ex[1])) update_binding!(ctx, id; is_always_defined=true) @@ -536,28 +454,7 @@ function _resolve_scopes(ctx, ex::SyntaxTree) ret_var = numchildren(ex) == 4 ? _resolve_scopes(ctx, ex[4]) : nothing pop!(ctx.scope_stack) - lambda_bindings = scope.lambda_bindings - if !is_toplevel_thunk - # Record all lambdas for the same closure type in one place - func_name = last(ctx.method_def_stack) - if kind(func_name) == K"BindingId" - func_name_id = func_name.var_id - if lookup_binding(ctx, func_name_id).kind === :local - cbinds = get!(ctx.closure_bindings, func_name_id) do - name_stack = Vector{String}() - for fname in ctx.method_def_stack - if kind(fname) == K"BindingId" - push!(name_stack, lookup_binding(ctx, fname).name) - end - end - ClosureBindings(name_stack) - end - push!(cbinds.lambdas, lambda_bindings) - end - end - end - - @ast ctx ex [K"lambda"(lambda_bindings=lambda_bindings, + @ast ctx ex [K"lambda"(lambda_bindings=scope.lambda_bindings, is_toplevel_thunk=is_toplevel_thunk) arg_bindings sparm_bindings @@ -649,15 +546,8 @@ function _resolve_scopes(ctx, ex::SyntaxTree) else makeleaf(ctx, ex, K"TOMBSTONE") end - elseif k == K"method_defs" - push!(ctx.method_def_stack, _resolve_scopes(ctx, ex[1])) - ex_mapped = mapchildren(e->_resolve_scopes(ctx, e), ctx, ex) - pop!(ctx.method_def_stack) - ex_mapped else - ex_mapped = mapchildren(e->_resolve_scopes(ctx, e), ctx, ex) - maybe_update_bindings!(ctx, ex_mapped) - ex_mapped + mapchildren(e->_resolve_scopes(ctx, e), ctx, ex) end end @@ -669,13 +559,139 @@ function _resolve_scopes(ctx, exs::AbstractVector) out end +#------------------------------------------------------------------------------- +# Sub-pass to compute additional information about variable usage as required +# by closure conversion, etc +struct ClosureBindings + name_stack::Vector{String} # Names of functions the closure is nested within + lambdas::Vector{LambdaBindings} # Bindings for each method of the closure +end + +ClosureBindings(name_stack) = ClosureBindings(name_stack, Vector{LambdaBindings}()) + +struct VariableAnalysisContext{GraphType} <: AbstractLoweringContext + graph::GraphType + bindings::Bindings + mod::Module + lambda_bindings::LambdaBindings + # Stack of method definitions for closure naming + method_def_stack::SyntaxList{GraphType} + # Collection of information about each closure, principally which methods + # are part of the closure (and hence captures). + closure_bindings::Dict{IdTag,ClosureBindings} +end + +function VariableAnalysisContext(graph, bindings, mod, lambda_bindings) + VariableAnalysisContext(graph, bindings, mod, lambda_bindings, + SyntaxList(graph), Dict{IdTag,ClosureBindings}()) +end + +function current_lambda_bindings(ctx::VariableAnalysisContext) + ctx.lambda_bindings +end + +# Update ctx.bindings and ctx.lambda_bindings metadata based on binding usage +function analyze_variables!(ctx, ex) + k = kind(ex) + if k == K"BindingId" + if has_lambda_binding(ctx, ex) + # FIXME: Move this after closure conversion so that we don't need + # to model the closure conversion transformations here. + update_lambda_binding!(ctx, ex, is_read=true) + end + elseif is_leaf(ex) || is_quoted(ex) + return + elseif k == K"local" || k == K"global" + # Uses of bindings which don't count as uses. + return + elseif k == K"=" + lhs = ex[1] + if kind(lhs) != K"Placeholder" + update_binding!(ctx, lhs, add_assigned=1) + if has_lambda_binding(ctx, lhs) + update_lambda_binding!(ctx, lhs, is_assigned=true) + end + end + analyze_variables!(ctx, ex[2]) + elseif k == K"function_decl" + name = ex[1] + update_binding!(ctx, name, add_assigned=1) + if has_lambda_binding(ctx, name) + update_lambda_binding!(ctx, name, is_assigned=true) + end + elseif k == K"function_type" + if kind(ex[1]) != K"BindingId" || lookup_binding(ctx, ex[1]).kind !== :local + analyze_variables!(ctx, ex[1]) + end + elseif k == K"decl" + @chk numchildren(ex) == 2 + id = ex[1] + if kind(id) != K"Placeholder" + binfo = lookup_binding(ctx, id) + if !isnothing(binfo.type) + throw(LoweringError(ex, "multiple type declarations found for `$(binfo.name)`")) + end + update_binding!(ctx, id; type=ex[2]) + end + analyze_variables!(ctx, ex[2]) + elseif k == K"const" + id = ex[1] + if lookup_binding(ctx, id).kind == :local + throw(LoweringError(ex, "unsupported `const` declaration on local variable")) + end + update_binding!(ctx, id; is_const=true) + elseif k == K"call" + name = ex[1] + if kind(name) == K"BindingId" + id = name.var_id + if has_lambda_binding(ctx, id) + # FIXME: Move this after closure conversion so that we don't need + # to model the closure conversion transformations. + update_lambda_binding!(ctx, id, is_called=true) + end + end + foreach(e->analyze_variables!(ctx, e), children(ex)) + elseif k == K"method_defs" + push!(ctx.method_def_stack, ex[1]) + analyze_variables!(ctx, ex[2]) + pop!(ctx.method_def_stack) + elseif k == K"lambda" + lambda_bindings = ex.lambda_bindings + if !ex.is_toplevel_thunk + # Record all lambdas for the same closure type in one place + func_name = last(ctx.method_def_stack) + if kind(func_name) == K"BindingId" + func_name_id = func_name.var_id + if lookup_binding(ctx, func_name_id).kind === :local + cbinds = get!(ctx.closure_bindings, func_name_id) do + name_stack = Vector{String}() + for fname in ctx.method_def_stack + if kind(fname) == K"BindingId" + push!(name_stack, lookup_binding(ctx, fname).name) + end + end + ClosureBindings(name_stack) + end + push!(cbinds.lambdas, lambda_bindings) + end + end + end + ctx2 = VariableAnalysisContext(ctx.graph, ctx.bindings, ctx.mod, lambda_bindings, + ctx.method_def_stack, ctx.closure_bindings) + foreach(e->analyze_variables!(ctx2, e), ex[3:end]) + else + foreach(e->analyze_variables!(ctx, e), children(ex)) + end + nothing +end + function resolve_scopes(ctx::ScopeResolutionContext, ex) thunk = @ast ctx ex [K"lambda"(is_toplevel_thunk=true) [K"block"] [K"block"] ex ] - return _resolve_scopes(ctx, thunk) + _resolve_scopes(ctx, thunk) end """ @@ -691,5 +707,7 @@ enclosing lambda form and information about variables captured by closures. function resolve_scopes(ctx::DesugaringContext, ex) ctx2 = ScopeResolutionContext(ctx) ex2 = resolve_scopes(ctx2, reparent(ctx2, ex)) - ctx2, ex2 + ctx3 = VariableAnalysisContext(ctx2.graph, ctx2.bindings, ctx2.mod, ex2.lambda_bindings) + analyze_variables!(ctx3, ex2) + ctx3, ex2 end diff --git a/JuliaLowering/src/syntax_graph.jl b/JuliaLowering/src/syntax_graph.jl index 2c651565f03c1..e39215eb66ed2 100644 --- a/JuliaLowering/src/syntax_graph.jl +++ b/JuliaLowering/src/syntax_graph.jl @@ -438,9 +438,7 @@ function _value_string(ex) id = get(ex, :id, nothing) end if !isnothing(id) - idstr = replace(string(id), - "0"=>"₀", "1"=>"₁", "2"=>"₂", "3"=>"₃", "4"=>"₄", - "5"=>"₅", "6"=>"₆", "7"=>"₇", "8"=>"₈", "9"=>"₉") + idstr = subscript_str(id) str = "$(str)$idstr" end if k == K"slot" || k == K"BindingId" diff --git a/JuliaLowering/src/utils.jl b/JuliaLowering/src/utils.jl index 0259bf51381a2..f461124a1b641 100644 --- a/JuliaLowering/src/utils.jl +++ b/JuliaLowering/src/utils.jl @@ -69,18 +69,45 @@ function showprov(x; kws...) showprov(stdout, x; kws...) end +function subscript_str(i) + replace(string(i), + "0"=>"₀", "1"=>"₁", "2"=>"₂", "3"=>"₃", "4"=>"₄", + "5"=>"₅", "6"=>"₆", "7"=>"₇", "8"=>"₈", "9"=>"₉") +end + function print_ir(io::IO, ex, indent="") + added_indent = " " @assert (kind(ex) == K"lambda" || kind(ex) == K"code_info") && kind(ex[1]) == K"block" + if !ex.is_toplevel_thunk && kind(ex) == K"code_info" + slots = ex.slots + print(io, indent, "slots: [") + for (i,slot) in enumerate(slots) + print(io, "slot$(subscript_str(i))/$(slot.name)") + flags = String[] + slot.is_nospecialize && push!(flags, "nospecialize") + !slot.is_read && push!(flags, "!read") + slot.is_single_assign && push!(flags, "single_assign") + slot.is_maybe_undef && push!(flags, "maybe_undef") + slot.is_called && push!(flags, "called") + if !isempty(flags) + print(io, "($(join(flags, ",")))") + end + if i < length(slots) + print(io, " ") + end + end + println(io, "]") + end stmts = children(ex[1]) for (i, e) in enumerate(stmts) lno = rpad(i, 3) if kind(e) == K"method" && numchildren(e) == 3 println(io, indent, lno, " --- method ", string(e[1]), " ", string(e[2])) @assert kind(e[3]) == K"lambda" || kind(e[3]) == K"code_info" - print_ir(io, e[3], indent*" ") + print_ir(io, e[3], indent*added_indent) elseif kind(e) == K"code_info" && e.is_toplevel_thunk println(io, indent, lno, " --- thunk") - print_ir(io, e, indent*" ") + print_ir(io, e, indent*added_indent) else code = string(e) println(io, indent, lno, " ", code) diff --git a/JuliaLowering/test/assignments_ir.jl b/JuliaLowering/test/assignments_ir.jl index 79130339ce620..072831b5f6b83 100644 --- a/JuliaLowering/test/assignments_ir.jl +++ b/JuliaLowering/test/assignments_ir.jl @@ -37,6 +37,7 @@ end 5 (call core.svec) 6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 3 =#))))) 7 --- method core.nothing %₆ + slots: [slot₁/#self#(!read) slot₂/c(!read)] 1 TestMod.d 2 (= slot₂/c %₁) 3 (return %₁) diff --git a/JuliaLowering/test/closures_ir.jl b/JuliaLowering/test/closures_ir.jl index fe723435d91f5..3e4acccf01d0b 100644 --- a/JuliaLowering/test/closures_ir.jl +++ b/JuliaLowering/test/closures_ir.jl @@ -1,5 +1,6 @@ ######################################## # Simple closure +# (FIXME: #self# should have `read` flag set) let x = 1 function f(y) @@ -25,6 +26,7 @@ end 5 (call core.svec) 6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 3 =#))))) 7 --- method core.nothing %₆ + slots: [slot₁/#self#(!read) slot₂/y slot₃/x(!read)] 1 TestMod.+ 2 (call core.getfield slot₁/#self# :x) 3 (call core.isdefined %₂ :contents) @@ -71,6 +73,7 @@ end 5 (call core.svec) 6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 3 =#))))) 7 --- method core.nothing %₆ + slots: [slot₁/#self#(!read) slot₂/y(!read)] 1 2 2 (call core.getfield slot₁/#self# :x) 3 (call core.setfield! %₂ :contents %₁) @@ -112,6 +115,7 @@ end 5 (call core.svec) 6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 2 =#))))) 7 --- method core.nothing %₆ + slots: [slot₁/#self#(!read)] 1 10 2 (call core.getfield slot₁/#self# :x) 3 (call core.setfield! %₂ :contents %₁) @@ -122,6 +126,7 @@ end 11 (call core.svec) 12 (call core.svec %₁₀ %₁₁ :($(QuoteNode(:(#= line 1 =#))))) 13 --- method core.nothing %₁₂ + slots: [slot₁/#self#(!read) slot₂/x slot₃/g(called) slot₄/x(!read)] 1 (= slot₂/x (call core.Box slot₂/x)) 2 TestMod.#f#g##0 3 (= slot₃/g (new %₂ slot₂/x)) @@ -162,6 +167,7 @@ x -> x*x 6 (call core.svec) 7 (call core.svec %₅ %₆ :($(QuoteNode(:(#= line 1 =#))))) 8 --- method core.nothing %₇ + slots: [slot₁/#self#(!read) slot₂/x] 1 TestMod.* 2 (call %₁ slot₂/x slot₂/x) 3 (return %₂) @@ -192,6 +198,7 @@ end 6 (call core.svec) 7 (call core.svec %₅ %₆ :($(QuoteNode(:(#= line 1 =#))))) 8 --- method core.nothing %₇ + slots: [slot₁/#self#(!read) slot₂/x] 1 TestMod.* 2 (call %₁ slot₂/x slot₂/x) 3 (return %₂) diff --git a/JuliaLowering/test/demo.jl b/JuliaLowering/test/demo.jl index 3209f521a3f34..9bf5b57e3731a 100644 --- a/JuliaLowering/test/demo.jl +++ b/JuliaLowering/test/demo.jl @@ -754,6 +754,12 @@ end # end # """ +src = """ +function f_slotflags(x, y, f, z) + f() + x + y +end +""" + ex = parsestmt(SyntaxTree, src, filename="foo.jl") ex = ensure_attributes(ex, var_id=Int) #ex = softscope_test(ex) diff --git a/JuliaLowering/test/functions.jl b/JuliaLowering/test/functions.jl index 0119935797e32..6a61f5139938a 100644 --- a/JuliaLowering/test/functions.jl +++ b/JuliaLowering/test/functions.jl @@ -200,17 +200,28 @@ begin end """) == (1,2,3,4) -@test JuliaLowering.include_string(test_mod, """ -begin - function f_nospecialize(u, v, @nospecialize(x), y, @nospecialize(z)) - (u, v, x, y, z) +@testset "Slot flags" begin + + @test JuliaLowering.include_string(test_mod, """ + begin + function f_nospecialize(u, v, @nospecialize(x), y, @nospecialize(z)) + (u, v, x, y, z) + end + + f_nospecialize(1,2,3,4,5) + end + """) == (1,2,3,4,5) + # We dig into the internal of `Method` here to check which slots have been + # flagged as nospecialize. + @test only(methods(test_mod.f_nospecialize)).nospecialize == 0b10100 + + JuliaLowering.include_string(test_mod, """ + function f_slotflags(x, y, f, z) + f() + x + y end + """) + @test only(methods(test_mod.f_slotflags)).called == 0b0100 - f_nospecialize(1,2,3,4,5) end -""") == (1,2,3,4,5) -# We dig into the internal of `Method` here to check which slots have been -# flagged as nospecialize. -@test only(methods(test_mod.f_nospecialize)).nospecialize == 0b10100 end diff --git a/JuliaLowering/test/functions_ir.jl b/JuliaLowering/test/functions_ir.jl index 79cf6d48d63c9..d448fa308600d 100644 --- a/JuliaLowering/test/functions_ir.jl +++ b/JuliaLowering/test/functions_ir.jl @@ -11,6 +11,7 @@ end 5 (call core.svec) 6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 1 =#))))) 7 --- method core.nothing %₆ + slots: [slot₁/#self#(!read) slot₂/x slot₃/_(!read) slot₄/y] 1 TestMod.+ 2 (call %₁ slot₂/x slot₄/y) 3 (return %₂) @@ -31,6 +32,7 @@ end 6 (call core.svec) 7 (call core.svec %₅ %₆ :($(QuoteNode(:(#= line 1 =#))))) 8 --- method core.nothing %₇ + slots: [slot₁/#self#(!read) slot₂/_(!read) slot₃/x] 1 slot₃/x 2 (return %₁) 9 TestMod.f @@ -50,6 +52,7 @@ end 6 (call core.svec) 7 (call core.svec %₅ %₆ :($(QuoteNode(:(#= line 1 =#))))) 8 --- method core.nothing %₇ + slots: [slot₁/#self#(!read) slot₂/x(!read) slot₃/y(!read)] 1 TestMod.body 2 (return %₁) 9 TestMod.f @@ -69,6 +72,7 @@ end 6 (call core.svec) 7 (call core.svec %₅ %₆ :($(QuoteNode(:(#= line 1 =#))))) 8 --- method core.nothing %₇ + slots: [slot₁/#self#(!read) slot₂/x(!read) slot₃/ys(!read)] 1 TestMod.body 2 (return %₁) 9 TestMod.f @@ -89,6 +93,7 @@ end 7 (call core.svec) 8 (call core.svec %₆ %₇ :($(QuoteNode(:(#= line 1 =#))))) 9 --- method core.nothing %₈ + slots: [slot₁/#self#(!read) slot₂/x(!read) slot₃/ys(!read)] 1 TestMod.body 2 (return %₁) 10 TestMod.f @@ -128,6 +133,7 @@ end 14 (call core.svec %₁₁ %₁₂ %₁₃) 15 (call core.svec %₁₀ %₁₄ :($(QuoteNode(:(#= line 1 =#))))) 16 --- method core.nothing %₁₅ + slots: [slot₁/#self#(!read) slot₂/_(!read) slot₃/_(!read) slot₄/_(!read)] 1 static_parameter₃ 2 static_parameter₁ 3 static_parameter₂ @@ -156,6 +162,7 @@ end 12 (call core.svec %₁₁) 13 (call core.svec %₁₀ %₁₂ :($(QuoteNode(:(#= line 1 =#))))) 14 --- method core.nothing %₁₃ + slots: [slot₁/#self#(!read) slot₂/_(!read)] 1 static_parameter₁ 2 (return %₁) 15 TestMod.f @@ -177,6 +184,7 @@ end 5 (call core.svec) 6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 1 =#))))) 7 --- method core.nothing %₆ + slots: [slot₁/#self#(!read) slot₂/x slot₃/tmp(!read)] 1 TestMod.Int 2 (gotoifnot slot₂/x label₃) 3 (= slot₃/tmp 0xff) @@ -203,6 +211,7 @@ end 3 (call core.svec) 4 (call core.svec %₂ %₃ :($(QuoteNode(:(#= line 1 =#))))) 5 --- method core.nothing %₄ + slots: [slot₁/#self#(!read) slot₂/x] 1 slot₂/x 2 (return %₁) 6 (return core.nothing) @@ -218,6 +227,7 @@ end 3 (call core.svec) 4 (call core.svec %₂ %₃ :($(QuoteNode(:(#= line 1 =#))))) 5 --- method core.nothing %₄ + slots: [slot₁/y slot₂/x] 1 (call core.tuple slot₁/y slot₂/x) 2 (return %₁) 6 (return core.nothing) @@ -237,6 +247,7 @@ end 7 (call core.svec %₆) 8 (call core.svec %₅ %₇ :($(QuoteNode(:(#= line 1 =#))))) 9 --- method core.nothing %₈ + slots: [slot₁/x(!read)] 1 static_parameter₁ 2 (return %₁) 10 (return core.nothing) @@ -253,6 +264,7 @@ end 5 (call core.svec) 6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 1 =#))))) 7 --- method core.nothing %₆ + slots: [slot₁/#self#(!read)] 1 (return core.nothing) 8 (return core.nothing) @@ -368,6 +380,7 @@ end 6 (call core.svec) 7 (call core.svec %₅ %₆ :($(QuoteNode(:(#= line 1 =#))))) 8 --- method core.nothing %₇ + slots: [slot₁/#self#(called) slot₂/x] 1 (call slot₁/#self# slot₂/x 1 2) 2 (return %₁) 9 TestMod.f @@ -378,6 +391,7 @@ end 14 (call core.svec) 15 (call core.svec %₁₃ %₁₄ :($(QuoteNode(:(#= line 1 =#))))) 16 --- method core.nothing %₁₅ + slots: [slot₁/#self#(called) slot₂/x slot₃/y] 1 (call slot₁/#self# slot₂/x slot₃/y 2) 2 (return %₁) 17 TestMod.f @@ -389,6 +403,7 @@ end 23 (call core.svec) 24 (call core.svec %₂₂ %₂₃ :($(QuoteNode(:(#= line 1 =#))))) 25 --- method core.nothing %₂₄ + slots: [slot₁/#self#(!read) slot₂/x slot₃/y slot₄/z(!read)] 1 (call core.tuple slot₂/x slot₃/y) 2 (return %₁) 26 TestMod.f @@ -407,6 +422,7 @@ end 5 (call core.svec) 6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 1 =#))))) 7 --- method core.nothing %₆ + slots: [slot₁/#self#(called)] 1 (call slot₁/#self# 1) 2 (return %₁) 8 TestMod.f @@ -415,6 +431,7 @@ end 11 (call core.svec) 12 (call core.svec %₁₀ %₁₁ :($(QuoteNode(:(#= line 1 =#))))) 13 --- method core.nothing %₁₂ + slots: [slot₁/#self#(called) slot₂/x] 1 (call slot₁/#self# slot₂/x slot₂/x) 2 (return %₁) 14 TestMod.f @@ -423,6 +440,7 @@ end 17 (call core.svec) 18 (call core.svec %₁₆ %₁₇ :($(QuoteNode(:(#= line 1 =#))))) 19 --- method core.nothing %₁₈ + slots: [slot₁/#self#(!read) slot₂/x slot₃/y] 1 (call core.tuple slot₂/x slot₃/y) 2 (return %₁) 20 TestMod.f @@ -442,6 +460,7 @@ end 6 (call core.svec) 7 (call core.svec %₅ %₆ :($(QuoteNode(:(#= line 1 =#))))) 8 --- method core.nothing %₇ + slots: [slot₁/#self#(called) slot₂/_] 1 (call slot₁/#self# slot₂/_ 1 2) 2 (return %₁) 9 TestMod.f @@ -451,6 +470,7 @@ end 13 (call core.svec) 14 (call core.svec %₁₂ %₁₃ :($(QuoteNode(:(#= line 1 =#))))) 15 --- method core.nothing %₁₄ + slots: [slot₁/#self#(called) slot₂/_ slot₃/y] 1 (call slot₁/#self# slot₂/_ slot₃/y 2) 2 (return %₁) 16 TestMod.f @@ -460,6 +480,7 @@ end 20 (call core.svec) 21 (call core.svec %₁₉ %₂₀ :($(QuoteNode(:(#= line 1 =#))))) 22 --- method core.nothing %₂₁ + slots: [slot₁/#self#(!read) slot₂/_(!read) slot₃/y slot₄/z] 1 (call core.tuple slot₃/y slot₄/z) 2 (return %₁) 23 TestMod.f @@ -479,6 +500,7 @@ end 6 (call core.svec) 7 (call core.svec %₅ %₆ :($(QuoteNode(:(#= line 1 =#))))) 8 --- method core.nothing %₇ + slots: [slot₁/#self#(called) slot₂/_] 1 (call slot₁/#self# slot₂/_ 1) 2 (return %₁) 9 TestMod.f @@ -488,6 +510,7 @@ end 13 (call core.svec) 14 (call core.svec %₁₂ %₁₃ :($(QuoteNode(:(#= line 1 =#))))) 15 --- method core.nothing %₁₄ + slots: [slot₁/#self#(!read) slot₂/_(!read) slot₃/x] 1 slot₃/x 2 (return %₁) 16 TestMod.f @@ -513,6 +536,7 @@ end 12 (call core.svec %₁₁) 13 (call core.svec %₁₀ %₁₂ :($(QuoteNode(:(#= line 1 =#))))) 14 --- method core.nothing %₁₃ + slots: [slot₁/#self#(called) slot₂/x] 1 (call slot₁/#self# slot₂/x 1 2) 2 (return %₁) 15 TestMod.f @@ -525,6 +549,7 @@ end 22 (call core.svec %₂₀ %₂₁) 23 (call core.svec %₁₉ %₂₂ :($(QuoteNode(:(#= line 1 =#))))) 24 --- method core.nothing %₂₃ + slots: [slot₁/#self#(called) slot₂/x slot₃/y] 1 (call slot₁/#self# slot₂/x slot₃/y 2) 2 (return %₁) 25 TestMod.f @@ -539,6 +564,7 @@ end 34 (call core.svec %₃₁ %₃₂ %₃₃) 35 (call core.svec %₃₀ %₃₄ :($(QuoteNode(:(#= line 1 =#))))) 36 --- method core.nothing %₃₅ + slots: [slot₁/#self#(!read) slot₂/x slot₃/y slot₄/z] 1 (call core.tuple slot₂/x slot₃/y slot₄/z) 2 (return %₁) 37 TestMod.f @@ -565,6 +591,7 @@ end 11 (call core.svec) 12 (call core.svec %₁₀ %₁₁ :($(QuoteNode(:(#= line 1 =#))))) 13 --- method core.nothing %₁₂ + slots: [slot₁/#self#(called) slot₂/x] 1 (call top.vect 1) 2 (call slot₁/#self# slot₂/x %₁ 2) 3 (return %₂) @@ -577,6 +604,7 @@ end 20 (call core.svec %₁₈ %₁₉) 21 (call core.svec %₁₇ %₂₀ :($(QuoteNode(:(#= line 1 =#))))) 22 --- method core.nothing %₂₁ + slots: [slot₁/#self#(called) slot₂/x slot₃/y] 1 (call slot₁/#self# slot₂/x slot₃/y 2) 2 (return %₁) 23 TestMod.f @@ -590,6 +618,7 @@ end 31 (call core.svec %₂₈ %₂₉ %₃₀) 32 (call core.svec %₂₇ %₃₁ :($(QuoteNode(:(#= line 1 =#))))) 33 --- method core.nothing %₃₂ + slots: [slot₁/#self#(!read) slot₂/x slot₃/y slot₄/z] 1 static_parameter₁ 2 static_parameter₂ 3 static_parameter₃ @@ -611,6 +640,7 @@ end 5 (call core.svec) 6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 1 =#))))) 7 --- method core.nothing %₆ + slots: [slot₁/#self#(called)] 1 (call slot₁/#self# 1) 2 (return %₁) 8 TestMod.f @@ -620,6 +650,7 @@ end 12 (call core.svec) 13 (call core.svec %₁₁ %₁₂ :($(QuoteNode(:(#= line 1 =#))))) 14 --- method core.nothing %₁₃ + slots: [slot₁/#self#(!read) slot₂/x(!read) slot₃/ys] 1 slot₃/ys 2 (return %₁) 15 TestMod.f @@ -650,6 +681,7 @@ end 5 (call core.svec) 6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 1 =#))))) 7 --- method core.nothing %₆ + slots: [slot₁/#self#(called)] 1 (call slot₁/#self# 1) 2 (return %₁) 8 TestMod.f @@ -659,6 +691,7 @@ end 12 (call core.svec) 13 (call core.svec %₁₁ %₁₂ :($(QuoteNode(:(#= line 1 =#))))) 14 --- method core.nothing %₁₃ + slots: [slot₁/#self#(!read) slot₂/xs] 1 slot₂/xs 2 (return %₁) 15 TestMod.f @@ -677,6 +710,7 @@ end 5 (call core.svec) 6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 1 =#))))) 7 --- method core.nothing %₆ + slots: [slot₁/#self#] 1 (call core.tuple 1 2) 2 (call core._apply_iterate top.iterate slot₁/#self# %₁) 3 (return %₂) @@ -687,6 +721,7 @@ end 12 (call core.svec) 13 (call core.svec %₁₁ %₁₂ :($(QuoteNode(:(#= line 1 =#))))) 14 --- method core.nothing %₁₃ + slots: [slot₁/#self#(!read) slot₂/xs] 1 slot₂/xs 2 (return %₁) 15 TestMod.f @@ -704,6 +739,7 @@ end 5 (call core.svec) 6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 1 =#))))) 7 --- method core.nothing %₆ + slots: [slot₁/#self#(!read) slot₂/x(!read) slot₃/destructured_arg_2 slot₄/w(!read) slot₅/iterstate slot₆/y(!read) slot₇/z(!read)] 1 (call top.indexed_iterate slot₃/destructured_arg_2 1) 2 (= slot₆/y (call core.getfield %₁ 1)) 3 (= slot₅/iterstate (call core.getfield %₁ 2)) @@ -726,6 +762,7 @@ end 5 (call core.svec) 6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 1 =#))))) 7 --- method core.nothing %₆ + slots: [slot₁/#self#(called)] 1 TestMod.rhs 2 (call slot₁/#self# %₁) 3 (return %₂) @@ -737,6 +774,7 @@ end 13 (call core.svec) 14 (call core.svec %₁₂ %₁₃ :($(QuoteNode(:(#= line 1 =#))))) 15 --- method core.nothing %₁₄ + slots: [slot₁/#self#(!read) slot₂/destructured_arg_1 slot₃/x(!read)] 1 (call top.indexed_iterate slot₂/destructured_arg_1 1) 2 (= slot₃/x (call core.getfield %₁ 1)) 3 (return core.nothing) @@ -755,6 +793,7 @@ end 5 (call core.svec) 6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 1 =#))))) 7 --- method core.nothing %₆ + slots: [slot₁/#self#(!read) slot₂/destructured_arg_1 slot₃/destructured_arg_2] 1 (call top.indexed_iterate slot₂/destructured_arg_1 1) 2 (call core.getfield %₁ 1) 3 (call top.indexed_iterate slot₃/destructured_arg_2 1) @@ -764,18 +803,23 @@ end 9 (return %₈) ######################################## -# Functions with @nospecialize argument metadata -function f(@nospecialize(x)) +# Slot flags +function f(@nospecialize(x), g, y) + g() + y end #--------------------- 1 (method TestMod.f) 2 TestMod.f 3 (call core.Typeof %₂) -4 (call core.svec %₃ core.Any) +4 (call core.svec %₃ core.Any core.Any core.Any) 5 (call core.svec) 6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 1 =#))))) 7 --- method core.nothing %₆ - 1 (return core.nothing) + slots: [slot₁/#self#(!read) slot₂/x(nospecialize,!read) slot₃/g(called) slot₄/y] + 1 TestMod.+ + 2 (call slot₃/g) + 3 (call %₁ %₂ slot₄/y) + 4 (return %₃) 8 TestMod.f 9 (return %₈) @@ -794,6 +838,7 @@ end 5 (call core.svec) 6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 4 =#))))) 7 --- method core.nothing %₆ + slots: [slot₁/#self#(!read)] 1 (return core.nothing) 8 TestMod.f 9 (call JuliaLowering.bind_docs! %₈ "some docs\n" %₆) @@ -813,6 +858,7 @@ end 3 (call core.svec) 4 (call core.svec %₂ %₃ :($(QuoteNode(:(#= line 4 =#))))) 5 --- method core.nothing %₄ + slots: [slot₁/x(!read)] 1 (return core.nothing) 6 TestMod.T 7 (call JuliaLowering.bind_docs! %₆ "some docs\n" %₄) diff --git a/JuliaLowering/test/macros_ir.jl b/JuliaLowering/test/macros_ir.jl index a994d6d351df2..c98facff21ed2 100644 --- a/JuliaLowering/test/macros_ir.jl +++ b/JuliaLowering/test/macros_ir.jl @@ -13,6 +13,7 @@ end 5 (call core.svec) 6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 1 =#))))) 7 --- method core.nothing %₆ + slots: [slot₁/#self#(!read) slot₂/__context__(!read) slot₃/ex] 1 (call core.tuple slot₃/ex) 2 (call JuliaLowering.interpolate_ast (inert (block (call-i ($ ex) + 1))) %₁) 3 (return %₂) @@ -32,6 +33,7 @@ end 5 (call core.svec) 6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 1 =#))))) 7 --- method core.nothing %₆ + slots: [slot₁/#self#(!read) slot₂/__context__ slot₃/ex(!read) slot₄/ctx(!read)] 1 slot₂/__context__ 2 (= slot₄/ctx %₁) 3 (return %₁) diff --git a/JuliaLowering/test/scopes_ir.jl b/JuliaLowering/test/scopes_ir.jl index 7d34278e1bb67..87335922c296f 100644 --- a/JuliaLowering/test/scopes_ir.jl +++ b/JuliaLowering/test/scopes_ir.jl @@ -33,6 +33,7 @@ end 9 (call core.svec) 10 (call core.svec %₈ %₉ :($(QuoteNode(:(#= line 3 =#))))) 11 --- method core.nothing %₁₀ + slots: [slot₁/#self#(!read) slot₂/x(!read)] 1 (call core.tuple false true true) 2 (return %₁) 12 TestMod.f @@ -79,6 +80,7 @@ end 5 (call core.svec) 6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 1 =#))))) 7 --- method core.nothing %₆ + slots: [slot₁/#self#(!read) slot₂/z] 1 (call core.apply_type top.Dict core.Symbol core.Any) 2 (call %₁) 3 (isdefined slot₂/z) diff --git a/JuliaLowering/test/typedefs_ir.jl b/JuliaLowering/test/typedefs_ir.jl index 492fa75ea5ab7..a907917362dca 100644 --- a/JuliaLowering/test/typedefs_ir.jl +++ b/JuliaLowering/test/typedefs_ir.jl @@ -322,6 +322,7 @@ end 28 (call core.svec) 29 (call core.svec %₂₇ %₂₈ :($(QuoteNode(:(#= line 1 =#))))) 30 --- method core.nothing %₂₉ + slots: [slot₁/#self#(!read)] 1 TestMod.X 2 (new %₁) 3 (return %₂) @@ -370,6 +371,7 @@ end 33 (call core.svec) 34 (call core.svec %₃₂ %₃₃ :($(QuoteNode(:(#= line 1 =#))))) 35 --- method core.nothing %₃₄ + slots: [slot₁/#ctor-self# slot₂/a slot₃/b slot₄/c slot₅/tmp] 1 (call core.fieldtype slot₁/#ctor-self# 2) 2 slot₃/b 3 (= slot₅/tmp %₂) @@ -389,6 +391,7 @@ end 40 (call core.svec) 41 (call core.svec %₃₉ %₄₀ :($(QuoteNode(:(#= line 1 =#))))) 42 --- method core.nothing %₄₁ + slots: [slot₁/#self#(!read) slot₂/a slot₃/b slot₄/c] 1 TestMod.X 2 (new %₁ slot₂/a slot₃/b slot₄/c) 3 (return %₂) @@ -453,6 +456,7 @@ end 52 (call core.svec) 53 (call core.svec %₅₁ %₅₂ :($(QuoteNode(:(#= line 1 =#))))) 54 --- method core.nothing %₅₃ + slots: [slot₁/#ctor-self#] 1 (new slot₁/#ctor-self#) 2 (return %₁) 55 (return core.nothing) @@ -495,6 +499,7 @@ end 28 (call core.svec) 29 (call core.svec %₂₇ %₂₈ :($(QuoteNode(:(#= line 1 =#))))) 30 --- method core.nothing %₂₉ + slots: [slot₁/#self#(!read) slot₂/a slot₃/b slot₄/c] 1 TestMod.X 2 (new %₁ slot₂/a slot₃/b slot₄/c) 3 (return %₂) @@ -542,6 +547,7 @@ end 28 (call core.svec) 29 (call core.svec %₂₇ %₂₈ :($(QuoteNode(:(#= line 4 =#))))) 30 --- method core.nothing %₂₉ + slots: [slot₁/#self#(!read) slot₂/a slot₃/b] 1 TestMod.X 2 (new %₁ slot₂/a slot₃/b) 3 (return %₂) @@ -603,6 +609,7 @@ end 40 (call core.svec) 41 (call core.svec %₃₉ %₄₀ :($(QuoteNode(:(#= line 1 =#))))) 42 --- method core.nothing %₄₁ + slots: [slot₁/#ctor-self# slot₂/x slot₃/tmp] 1 (call core.fieldtype slot₁/#ctor-self# 1) 2 slot₂/x 3 (= slot₃/tmp %₂) @@ -623,6 +630,7 @@ end 48 (call core.svec %₄₇) 49 (call core.svec %₄₆ %₄₈ :($(QuoteNode(:(#= line 1 =#))))) 50 --- method core.nothing %₄₉ + slots: [slot₁/#self#(!read) slot₂/x] 1 TestMod.X 2 static_parameter₁ 3 (call core.apply_type %₁ %₂) @@ -694,6 +702,7 @@ end 55 (call core.svec) 56 (call core.svec %₅₄ %₅₅ :($(QuoteNode(:(#= line 1 =#))))) 57 --- method core.nothing %₅₆ + slots: [slot₁/#ctor-self# slot₂/v slot₃/tmp] 1 (call core.fieldtype slot₁/#ctor-self# 1) 2 slot₂/v 3 (= slot₃/tmp %₂) @@ -717,6 +726,7 @@ end 66 (call core.svec %₆₄ %₆₅) 67 (call core.svec %₆₃ %₆₆ :($(QuoteNode(:(#= line 1 =#))))) 68 --- method core.nothing %₆₇ + slots: [slot₁/#self#(!read) slot₂/v] 1 TestMod.X 2 static_parameter₁ 3 static_parameter₂ @@ -756,6 +766,7 @@ end 4 (call core.svec) 5 (call core.svec %₃ %₄ :($(QuoteNode(:(#= line 3 =#))))) 6 --- method core.nothing %₅ + slots: [slot₁/#self#(!read)] 1 TestMod.X 2 (new %₁ 1) 3 (return %₂) @@ -793,6 +804,7 @@ end 38 (call core.svec) 39 (call core.svec %₃₇ %₃₈ :($(QuoteNode(:(#= line 5 =#))))) 40 --- method core.nothing %₃₉ + slots: [slot₁/#ctor-self# slot₂/x] 1 slot₁/#ctor-self# 2 (new %₁ slot₂/x) 3 (return %₂) @@ -802,6 +814,7 @@ end 44 (call core.svec) 45 (call core.svec %₄₃ %₄₄ :($(QuoteNode(:(#= line 6 =#))))) 46 --- method core.nothing %₄₅ + slots: [slot₁/#ctor-self# slot₂/y slot₃/z slot₄/tmp(!read)] 1 TestMod.ReallyXIPromise 2 slot₁/#ctor-self# 3 TestMod.+ @@ -822,6 +835,7 @@ end 50 (call core.svec) 51 (call core.svec %₄₉ %₅₀ :($(QuoteNode(:(#= line 10 =#))))) 52 --- method core.nothing %₅₁ + slots: [slot₁/#ctor-self# slot₂/a slot₃/b(!read) slot₄/c(!read)] 1 slot₁/#ctor-self# 2 (new %₁ slot₂/a) 3 (return %₂) @@ -856,6 +870,7 @@ end 4 (call core.svec) 5 (call core.svec %₃ %₄ :($(QuoteNode(:(#= line 5 =#))))) 6 --- method core.nothing %₅ + slots: [slot₁/#self#(!read)] 1 TestMod.X 2 TestMod.A 3 TestMod.B @@ -910,6 +925,7 @@ end 52 (call core.svec) 53 (call core.svec %₅₁ %₅₂ :($(QuoteNode(:(#= line 3 =#))))) 54 --- method core.nothing %₅₃ + slots: [slot₁/#ctor-self#] 1 slot₁/#ctor-self# 2 (new %₁ 1) 3 (return %₂) @@ -926,6 +942,7 @@ end 65 (call core.svec %₆₃ %₆₄) 66 (call core.svec %₆₂ %₆₅ :($(QuoteNode(:(#= line 4 =#))))) 67 --- method core.nothing %₆₆ + slots: [slot₁/#ctor-self#] 1 slot₁/#ctor-self# 2 (new %₁ 1) 3 (return %₂) @@ -972,6 +989,7 @@ end 28 (call core.svec) 29 (call core.svec %₂₇ %₂₈ :($(QuoteNode(:(#= line 4 =#))))) 30 --- method core.nothing %₂₉ + slots: [slot₁/#ctor-self# slot₂/xs] 1 slot₁/#ctor-self# 2 (call core._apply_iterate top.iterate core.tuple slot₂/xs) 3 (splatnew %₁ %₂) @@ -1029,6 +1047,7 @@ end 41 (call core.svec %₄₀) 42 (call core.svec %₃₉ %₄₁ :($(QuoteNode(:(#= line 4 =#))))) 43 --- method core.nothing %₄₂ + slots: [slot₁/#ctor-self# slot₂/xs slot₃/tmp slot₄/tmp] 1 (call core._apply_iterate top.iterate core.tuple slot₂/xs) 2 (call core.nfields %₁) 3 (call top.ult_int %₂ 2) From d50def1689b339c7062ee11521da0a4c12de9a9d Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Tue, 31 Dec 2024 06:30:47 +1000 Subject: [PATCH 0919/1109] Parse non-syntactic operator tokens as `K"Identifier"` kind (JuliaLang/JuliaSyntax.jl#523) Most operators are semantically just normal identifiers after parsing so should get the Kind `K"Identifier"`. For example, after this change `a + b` parses with `K"Identifier"` kind for the `+` token. As an exception, standalone syntactic ops keep their kind - they can't really be used in a sane way as identifiers or interpolated into expressions in the normal way because they have their own syntactic forms. This also helps us in `Expr` conversion where they also have their own rules for coalescing with dots, when dotted. Also introduce a new keyword `operators_as_identifiers` to the `tokenize()` API to accommodate some simple uses of this API to colour token strings by operator type, even when the operator is semantically in identifier-position. --- JuliaSyntax/src/expr.jl | 2 +- JuliaSyntax/src/kinds.jl | 9 ++++++ JuliaSyntax/src/parse_stream.jl | 3 +- JuliaSyntax/src/parser.jl | 52 ++++++++++++++++----------------- JuliaSyntax/src/parser_api.jl | 19 ++++++++++-- JuliaSyntax/test/green_node.jl | 2 +- JuliaSyntax/test/parser.jl | 46 ++++++++++++++++++++++++++--- JuliaSyntax/test/parser_api.jl | 21 ++++++++++++- JuliaSyntax/test/syntax_tree.jl | 6 ++-- 9 files changed, 119 insertions(+), 41 deletions(-) diff --git a/JuliaSyntax/src/expr.jl b/JuliaSyntax/src/expr.jl index aaa57c72849bf..fdc05a881f6d2 100644 --- a/JuliaSyntax/src/expr.jl +++ b/JuliaSyntax/src/expr.jl @@ -297,7 +297,7 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads, if !@isexpr(a2, :quote) && !(a2 isa QuoteNode) args[2] = QuoteNode(a2) end - elseif length(args) == 1 && is_operator(childheads[1]) + elseif length(args) == 1 # Hack: Here we preserve the head of the operator to determine whether # we need to coalesce it with the dot into a single symbol later on. args[1] = (childheads[1], args[1]) diff --git a/JuliaSyntax/src/kinds.jl b/JuliaSyntax/src/kinds.jl index 554dc08da5c8a..dafc91deb0f45 100644 --- a/JuliaSyntax/src/kinds.jl +++ b/JuliaSyntax/src/kinds.jl @@ -1230,3 +1230,12 @@ function is_whitespace(x) k = kind(x) return k == K"Whitespace" || k == K"NewlineWs" || k == K"Comment" end + +function is_syntactic_operator(x) + k = kind(x) + # TODO: Do we need to disallow dotted and suffixed forms when this is used + # in the parser? The lexer itself usually disallows such tokens, so it's + # not clear whether we need to handle them. (Though note `.->` is a + # token...) + return k in KSet"&& || . ... ->" || is_syntactic_assignment(k) +end diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index 02c0307e9c032..42bedc49f52b7 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -890,7 +890,8 @@ function bump_split(stream::ParseStream, split_spec::Vararg{Any, N}) where {N} for (i, (nbyte, k, f)) in enumerate(split_spec) h = SyntaxHead(k, f) b = (i == length(split_spec)) ? tok.next_byte : b + nbyte - push!(stream.tokens, SyntaxToken(h, kind(tok), false, b)) + orig_k = k == K"." ? K"." : kind(tok) + push!(stream.tokens, SyntaxToken(h, orig_k, false, b)) end stream.peek_count = 0 return position(stream) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index b640b03daa6e2..0ef7342a2583a 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -382,14 +382,14 @@ function parse_LtoR(ps::ParseState, down, is_op) down(ps) while is_op(peek(ps)) t = peek_token(ps) - bump_dotsplit(ps) + bump_dotsplit(ps, remap_kind=K"Identifier") down(ps) emit(ps, mark, is_dotted(t) ? K"dotcall" : K"call", INFIX_FLAG) end end # parse right-to-left binary operator -# produces structures like (= a (= b (= c d))) +# produces structures like (=> a (=> b (=> c d))) # # flisp: parse-RtoL function parse_RtoL(ps::ParseState, down, is_op, self) @@ -397,7 +397,7 @@ function parse_RtoL(ps::ParseState, down, is_op, self) down(ps) t = peek_token(ps) if is_op(kind(t)) - bump_dotsplit(ps) + bump_dotsplit(ps, remap_kind=K"Identifier") self(ps) emit(ps, mark, is_dotted(t) ? K"dotcall" : K"call", INFIX_FLAG) end @@ -624,7 +624,7 @@ function parse_assignment_with_initial_ex(ps::ParseState, mark, down::T) where { # a .~ b ==> (dotcall-i a ~ b) # [a ~ b c] ==> (hcat (call-i a ~ b) c) # [a~b] ==> (vect (call-i a ~ b)) - bump_dotsplit(ps) + bump_dotsplit(ps, remap_kind=K"Identifier") bump_trivia(ps) parse_assignment(ps, down) emit(ps, mark, is_dotted(t) ? K"dotcall" : K"call", INFIX_FLAG) @@ -759,7 +759,7 @@ function parse_arrow(ps::ParseState) # x <--> y ==> (call-i x <--> y) # x .--> y ==> (dotcall-i x --> y) # x -->₁ y ==> (call-i x -->₁ y) - bump_dotsplit(ps) + bump_dotsplit(ps, remap_kind=K"Identifier") parse_arrow(ps) emit(ps, mark, is_dotted(t) ? K"dotcall" : K"call", INFIX_FLAG) end @@ -821,7 +821,7 @@ function parse_comparison(ps::ParseState, subtype_comparison=false) while (t = peek_token(ps); is_prec_comparison(t)) n_comparisons += 1 op_dotted = is_dotted(t) - op_pos = bump_dotsplit(ps, emit_dot_node=true) + op_pos = bump_dotsplit(ps, emit_dot_node=true, remap_kind=K"Identifier") parse_pipe_lt(ps) end if n_comparisons == 1 @@ -881,7 +881,7 @@ function parse_range(ps::ParseState) # a..b ==> (call-i a .. b) # a … b ==> (call-i a … b) # a .… b ==> (dotcall-i a … b) - bump_dotsplit(ps) + bump_dotsplit(ps, remap_kind=K"Identifier") parse_invalid_ops(ps) emit(ps, mark, is_dotted(initial_tok) ? K"dotcall" : K"call", INFIX_FLAG) elseif initial_kind == K":" && ps.range_colon_enabled @@ -904,9 +904,9 @@ function parse_range(ps::ParseState) # a :> b ==> (call-i a (error : >) b) bump_trivia(ps, skip_newlines=false) emark = position(ps) - bump(ps) # K":" + bump(ps, remap_kind=K"Identifier") # K":" ks = untokenize(peek(ps)) - bump(ps) # K"<" or K">" + bump(ps, remap_kind=K"Identifier") # K"<" or K">" emit(ps, emark, K"error", error="Invalid `:$ks` found, maybe replace with `$ks:`") parse_invalid_ops(ps) @@ -914,7 +914,7 @@ function parse_range(ps::ParseState) break end n_colons += 1 - bump(ps, n_colons == 1 ? EMPTY_FLAGS : TRIVIA_FLAG) + bump(ps, n_colons == 1 ? EMPTY_FLAGS : TRIVIA_FLAG; remap_kind=K"Identifier") had_newline = peek(ps) == K"NewlineWs" t = peek_token(ps) if is_closing_token(ps, kind(t)) @@ -1008,7 +1008,7 @@ function parse_with_chains(ps::ParseState, down, is_op, chain_ops) # [x+y + z] ==> (vect (call-i x + y z)) break end - bump_dotsplit(ps) + bump_dotsplit(ps, remap_kind=K"Identifier") down(ps) if kind(t) in chain_ops && !is_decorated(t) # a + b + c ==> (call-i a + b c) @@ -1217,7 +1217,7 @@ function parse_unary(ps::ParseState) # unary negation # -2^x ==> (call-pre - (call-i 2 ^ x)) # -2[1, 3] ==> (call-pre - (ref 2 1 3)) - bump(ps) + bump(ps, remap_kind=K"Identifier") parse_factor(ps) emit(ps, mark, K"call", PREFIX_OP_FLAG) else @@ -1256,7 +1256,7 @@ function parse_unary(ps::ParseState) # # (The flisp parser only considers commas before `;` and thus gets this # last case wrong) - op_pos = bump_dotsplit(ps, emit_dot_node=true) + op_pos = bump_dotsplit(ps, emit_dot_node=true, remap_kind=K"Identifier") space_before_paren = preceding_whitespace(t2) if space_before_paren @@ -1303,7 +1303,7 @@ function parse_unary(ps::ParseState) if is_type_operator(op_t) # <:(a,) ==> (<: a) emit(ps, mark, op_k, opts.delim_flags) - reset_node!(ps, op_pos, flags=TRIVIA_FLAG) + reset_node!(ps, op_pos, flags=TRIVIA_FLAG, kind=op_k) else emit(ps, mark, K"call", opts.delim_flags) end @@ -1329,7 +1329,7 @@ function parse_unary(ps::ParseState) if is_type_operator(op_t) # <:(a) ==> (<:-pre (parens a)) emit(ps, mark, op_k, PREFIX_OP_FLAG) - reset_node!(ps, op_pos, flags=TRIVIA_FLAG) + reset_node!(ps, op_pos, flags=TRIVIA_FLAG, kind=op_k) else if is_dotted(op_t) emit(ps, mark, K"dotcall", PREFIX_OP_FLAG) @@ -1349,12 +1349,12 @@ function parse_unary(ps::ParseState) # -0x1 ==> (call-pre - 0x01) # - 2 ==> (call-pre - 2) # .-2 ==> (dotcall-pre - 2) - op_pos = bump_dotsplit(ps, EMPTY_FLAGS) + op_pos = bump_dotsplit(ps, EMPTY_FLAGS, remap_kind=K"Identifier") else # /x ==> (call-pre (error /) x) # +₁ x ==> (call-pre (error +₁) x) # .<: x ==> (dotcall-pre (error (. <:)) x) - bump_dotsplit(ps, EMPTY_FLAGS, emit_dot_node=true) + bump_dotsplit(ps, EMPTY_FLAGS, emit_dot_node=true, remap_kind=K"Identifier") op_pos = emit(ps, mark, K"error", error="not a unary operator") end parse_unary(ps) @@ -1385,7 +1385,7 @@ end function parse_factor_with_initial_ex(ps::ParseState, mark) parse_decl_with_initial_ex(ps, mark) if (t = peek_token(ps); is_prec_power(kind(t))) - bump_dotsplit(ps) + bump_dotsplit(ps, remap_kind=K"Identifier") parse_factor_after(ps) emit(ps, mark, is_dotted(t) ? K"dotcall" : K"call", INFIX_FLAG) end @@ -1687,11 +1687,12 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) macro_atname_range = (m, position(ps)) emit(ps, mark, K".") elseif k == K"'" + # f.' => f (error-t (. ')) + bump_dotsplit(ps, remap_kind=K"Identifier") # TODO: Reclaim dotted postfix operators :-) - # f.' => f (error-t ') - bump(ps) - emit(ps, emark, K"error", TRIVIA_FLAG, + emit(ps, emark, K"error", error="the .' operator for transpose is discontinued") + emit(ps, mark, K"dotcall", POSTFIX_OP_FLAG) else # Field/property syntax # f.x.y ==> (. (. f x) y) @@ -1703,7 +1704,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) elseif k == K"'" && !preceding_whitespace(t) # f' ==> (call-post f ') # f'ᵀ ==> (call-post f 'ᵀ) - bump(ps) + bump(ps, remap_kind=K"Identifier") emit(ps, mark, K"call", POSTFIX_OP_FLAG) elseif k == K"{" # Type parameter curlies and macro calls @@ -3554,11 +3555,8 @@ function parse_atom(ps::ParseState, check_identifiers=true) # + ==> + # .+ ==> (. +) # .= ==> (. =) - if is_dotted(peek_token(ps)) - bump_dotsplit(ps, emit_dot_node=true) - else - bump(ps, remap_kind=K"Identifier") - end + bump_dotsplit(ps, emit_dot_node=true, remap_kind= + is_syntactic_operator(leading_kind) ? leading_kind : K"Identifier") if check_identifiers && !is_valid_identifier(leading_kind) # += ==> (error +=) # ? ==> (error ?) diff --git a/JuliaSyntax/src/parser_api.jl b/JuliaSyntax/src/parser_api.jl index 83a9ff3af8090..7931ef31d3d2f 100644 --- a/JuliaSyntax/src/parser_api.jl +++ b/JuliaSyntax/src/parser_api.jl @@ -174,15 +174,20 @@ Token() = Token(SyntaxHead(K"None", EMPTY_FLAGS), 0:0) head(t::Token) = t.head """ - tokenize(text) + tokenize(text; operators_as_identifiers=true) Returns the tokenized UTF-8 encoded `text` as a vector of `Token`s. The text for the token can be retrieved by using `untokenize()`. The full text can be reconstructed with, for example, `join(untokenize.(tokenize(text), text))`. This interface works on UTF-8 encoded string or buffer data only. + +The keyword `operators_as_identifiers` specifies whether operators in +identifier-position should have `K"Identifier"` as their kind, or be emitted as +more specific operator kinds. For example, whether the `+` in `a + b` should be +emitted as `K"Identifier"` (the default) or as `K"+"`. """ -function tokenize(text) +function tokenize(text; operators_as_identifiers=true) ps = ParseStream(text) parse!(ps, rule=:all) ts = ps.tokens @@ -192,7 +197,15 @@ function tokenize(text) continue end r = ts[i-1].next_byte:ts[i].next_byte-1 - push!(output_tokens, Token(head(ts[i]), r)) + k = kind(ts[i]) + if k == K"Identifier" && !operators_as_identifiers + orig_k = ts[i].orig_kind + if is_operator(orig_k) && !is_word_operator(orig_k) + k = orig_k + end + end + f = flags(ts[i]) + push!(output_tokens, Token(SyntaxHead(k,f), r)) end output_tokens end diff --git a/JuliaSyntax/test/green_node.jl b/JuliaSyntax/test/green_node.jl index c3c4da40960ee..42d20f5217e1f 100644 --- a/JuliaSyntax/test/green_node.jl +++ b/JuliaSyntax/test/green_node.jl @@ -8,7 +8,7 @@ @test head.(children(t)) == [ SyntaxHead(K"Identifier", 0x0000) SyntaxHead(K"Whitespace", 0x0001) - SyntaxHead(K"+", 0x0000) + SyntaxHead(K"Identifier", 0x0000) SyntaxHead(K"Whitespace", 0x0001) SyntaxHead(K"Identifier", 0x0000) ] diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 584a8d66efa21..a747e1c7e871b 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -1,14 +1,14 @@ """ Parse string to SyntaxNode tree and show as an sexpression """ -function parse_to_sexpr_str(production, code::AbstractString; v=v"1.6") +function parse_to_sexpr_str(production, code::AbstractString; v=v"1.6", show_kws...) stream = ParseStream(code, version=v) production(ParseState(stream)) JuliaSyntax.validate_tokens(stream) t = build_tree(GreenNode, stream) source = SourceFile(code) s = SyntaxNode(source, t, keep_parens=true) - return sprint(show, MIME("text/x.sexpression"), s) + return sprint(io->show(io, MIME("text/x.sexpression"), s; show_kws...)) end function test_parse(production, input, output) @@ -29,7 +29,7 @@ function test_parse(inout::Pair) test_parse(JuliaSyntax.parse_toplevel, inout...) end -const PARSE_ERROR = r"\(error-t " +PARSE_ERROR = r"\(error-t " with_version(v::VersionNumber, (i,o)::Pair) = ((;v=v), i) => o @@ -436,7 +436,7 @@ tests = [ "A.@x a" => "(macrocall (. A @x) a)" "@A.B.@x a" => "(macrocall (. (. A B) (error-t) @x) a)" # .' discontinued - "f.'" => "(wrapper f (error-t '))" + "f.'" => "(dotcall-post f (error '))" # Field/property syntax "f.x.y" => "(. (. f x) y)" "x .y" => "(. x (error-t) y)" @@ -1112,6 +1112,44 @@ parsestmt_test_specs = [ end end +parsestmt_with_kind_tests = [ + # Most operators are semantically just normal identifiers after parsing so + # get the Kind K"Identifier" + "+" => "+::Identifier" + "a + b" => "(call-i a::Identifier +::Identifier b::Identifier)" + "a .+ b" => "(dotcall-i a::Identifier +::Identifier b::Identifier)" + "a |> b" => "(call-i a::Identifier |>::Identifier b::Identifier)" + "a => b" => "(call-i a::Identifier =>::Identifier b::Identifier)" + "a → b" => "(call-i a::Identifier →::Identifier b::Identifier)" + "a < b < c" => "(comparison a::Identifier <::Identifier b::Identifier <::Identifier c::Identifier)" + "a .<: b"=> "(dotcall-i a::Identifier <:::Identifier b::Identifier)" + "a .. b" => "(call-i a::Identifier ..::Identifier b::Identifier)" + "a : b" => "(call-i a::Identifier :::Identifier b::Identifier)" + "-2^x" => "(call-pre -::Identifier (call-i 2::Integer ^::Identifier x::Identifier))" + "-(2)" => "(call-pre -::Identifier (parens 2::Integer))" + "<:(a,)" => "(<:-, a::Identifier)" + "- 2" => "(call-pre -::Identifier 2::Integer)" + "/x" => "(call-pre (error /::Identifier) x::Identifier)" + "a^b" => "(call-i a::Identifier ^::Identifier b::Identifier)" + "f.'" => "(dotcall-post f::Identifier (error '::Identifier))" + "f'" => "(call-post f::Identifier '::Identifier)" + # Standalone syntactic ops which keep their kind - they can't really be + # used in a sane way as identifiers or interpolated into expressions + # because they have their own syntactic forms. + ":(::)" => "(quote-: (parens ::::::))" + ":(\$)" => "(quote-: (parens \$::\$))" + ":(<:)" => "(quote-: (parens <:::<:))" + ":(&&)" => "(quote-: (parens &&::&&))" + ":(=)" => "(quote-: (parens =::=))" +] + +@testset "parser `Kind` remapping" begin + @testset "$(repr(input))" for (input, output) in parsestmt_with_kind_tests + input = ((show_kind=true,), input) + test_parse(JuliaSyntax.parse_stmts, input, output) + end +end + @testset "Trivia attachment" begin # TODO: Need to expand this greatly to cover as many forms as possible! @test show_green_tree("f(a;b)") == """ diff --git a/JuliaSyntax/test/parser_api.jl b/JuliaSyntax/test/parser_api.jl index 11570ce9242c4..10a09d3ace585 100644 --- a/JuliaSyntax/test/parser_api.jl +++ b/JuliaSyntax/test/parser_api.jl @@ -170,7 +170,7 @@ end end end -tokensplit(str) = [kind(tok) => untokenize(tok, str) for tok in tokenize(str)] +tokensplit(str; kws...) = [kind(tok) => untokenize(tok, str) for tok in tokenize(str; kws...)] @testset "tokenize() API" begin # tokenize() is eager @@ -178,6 +178,17 @@ tokensplit(str) = [kind(tok) => untokenize(tok, str) for tok in tokenize(str)] # . is a separate token from + in `.+` @test tokensplit("a .+ β") == [ + K"Identifier" => "a", + K"Whitespace" => " ", + K"." => ".", + K"Identifier" => "+", + K"Whitespace" => " ", + K"Identifier" => "β", + ] + + # + is kind K"+" when operators in identifier position are emitted as + # operator kinds. + @test tokensplit("a .+ β"; operators_as_identifiers=false) == [ K"Identifier" => "a", K"Whitespace" => " ", K"." => ".", @@ -194,6 +205,14 @@ tokensplit(str) = [kind(tok) => untokenize(tok, str) for tok in tokenize(str)] K"Whitespace" => " ", K"Integer" => "1", ] + # Including word operators + @test tokensplit("where = 1"; operators_as_identifiers=false) == [ + K"Identifier" => "where", + K"Whitespace" => " ", + K"=" => "=", + K"Whitespace" => " ", + K"Integer" => "1", + ] # A predicate based on flags() @test JuliaSyntax.is_suffixed(tokenize("+₁")[1]) diff --git a/JuliaSyntax/test/syntax_tree.jl b/JuliaSyntax/test/syntax_tree.jl index f647f1aec7dab..c6b673c7245e6 100644 --- a/JuliaSyntax/test/syntax_tree.jl +++ b/JuliaSyntax/test/syntax_tree.jl @@ -25,7 +25,7 @@ @test sprint(show, t) == "(call-i (call-i a * b) + c)" @test sprint(io->show(io, MIME("text/x.sexpression"), t, show_kind=true)) == - "(call-i (call-i a::Identifier *::* b::Identifier) +::+ c::Identifier)" + "(call-i (call-i a::Identifier *::Identifier b::Identifier) +::Identifier c::Identifier)" @test sprint(highlight, t[1][3]) == "a*b + c\n# ╙" @test sprint(highlight, t.source, t.raw, 1, 3) == "a*b + c\n# ╙" @@ -75,7 +75,7 @@ end f :: Identifier [call-i] a :: Identifier - * :: * + * :: Identifier b :: Identifier c :: Identifier """ @@ -88,7 +88,7 @@ end 1:1 │ 1:1 │ f :: Identifier 1:3 │ 3:5 │ [call-i] 1:3 │ 3:3 │ a :: Identifier - 1:4 │ 4:4 │ * :: * + 1:4 │ 4:4 │ * :: Identifier 1:5 │ 5:5 │ b :: Identifier 2:3 │ 10:10 │ c :: Identifier """ From 4b334634c9dc9107b8ceeadc2f64830483c86acf Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Tue, 31 Dec 2024 16:05:39 +1000 Subject: [PATCH 0920/1109] Remove duplicate method Somehow this deletion was forgotten from the previous PR --- JuliaSyntax/src/parser.jl | 8 -------- 1 file changed, 8 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 0ef7342a2583a..cbe6985669d0a 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -274,14 +274,6 @@ function is_block_form(k) abstract primitive struct try module" end -function is_syntactic_operator(k) - k = kind(k) - # TODO: Do we need to disallow dotted and suffixed forms here? - # The lexer itself usually disallows such tokens, so it's not clear whether - # we need to handle them. (Though note `.->` is a token...) - return k in KSet"&& || . ... ->" || is_syntactic_assignment(k) -end - function is_syntactic_unary_op(k) kind(k) in KSet"$ & ::" end From a5f25b41394db973839e7b80608df43e4e3a0acb Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Tue, 31 Dec 2024 16:25:54 +1000 Subject: [PATCH 0921/1109] Add error for attempting to add methods to a function argument --- JuliaLowering/src/scope_analysis.jl | 3 +++ JuliaLowering/test/closures_ir.jl | 28 ++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/JuliaLowering/src/scope_analysis.jl b/JuliaLowering/src/scope_analysis.jl index 1ab7494855453..1eac9b41d8cdf 100644 --- a/JuliaLowering/src/scope_analysis.jl +++ b/JuliaLowering/src/scope_analysis.jl @@ -615,6 +615,9 @@ function analyze_variables!(ctx, ex) analyze_variables!(ctx, ex[2]) elseif k == K"function_decl" name = ex[1] + if kind(name) == K"BindingId" && lookup_binding(ctx, name).kind == :argument + throw(LoweringError(name, "Cannot add method to a function argument")) + end update_binding!(ctx, name, add_assigned=1) if has_lambda_binding(ctx, name) update_lambda_binding!(ctx, name, is_assigned=true) diff --git a/JuliaLowering/test/closures_ir.jl b/JuliaLowering/test/closures_ir.jl index 3e4acccf01d0b..7bb0f060c261c 100644 --- a/JuliaLowering/test/closures_ir.jl +++ b/JuliaLowering/test/closures_ir.jl @@ -204,3 +204,31 @@ end 3 (return %₂) 9 (return %₃) +######################################## +# Error: Attempt to add methods to a function argument +function f(g) + function g() + end +end +#--------------------- +LoweringError: +function f(g) + function g() +# ╙ ── Cannot add method to a function argument + end +end + +######################################## +# Error: Static parameter clashing with closure name +function f() where {g} + function g() + end +end +#--------------------- +LoweringError: +function f() where {g} + function g() +# ╙ ── local variable name `g` conflicts with a static parameter + end +end + From 733eaec295cee2f07981fc28c275c2473edea54a Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Tue, 7 Jan 2025 20:00:13 +1000 Subject: [PATCH 0922/1109] Support for unboxed closure captures Currently we box most things, but arguments and static parameters of an outer function may be unboxed. --- JuliaLowering/src/closure_conversion.jl | 109 +++++++++++++++++------- JuliaLowering/src/scope_analysis.jl | 5 +- JuliaLowering/test/closures.jl | 16 ++++ JuliaLowering/test/closures_ir.jl | 105 ++++++++++++++++++++++- JuliaLowering/test/decls_ir.jl | 49 +++++++++++ 5 files changed, 249 insertions(+), 35 deletions(-) diff --git a/JuliaLowering/src/closure_conversion.jl b/JuliaLowering/src/closure_conversion.jl index e45506aab42aa..b23d715b76a42 100644 --- a/JuliaLowering/src/closure_conversion.jl +++ b/JuliaLowering/src/closure_conversion.jl @@ -162,20 +162,18 @@ function convert_assignment(ctx, ex) convert_global_assignment(ctx, ex, var, rhs0) else @assert binfo.kind == :local || binfo.kind == :argument - lbinfo = lookup_lambda_binding(ctx, var) - self_captured = !isnothing(lbinfo) && lbinfo.is_captured - captured = binfo.is_captured - if isnothing(binfo.type) && !self_captured && !captured + boxed = is_boxed(binfo) + if isnothing(binfo.type) && !boxed @ast ctx ex [K"=" var rhs0] else # Typed local tmp_rhs0 = ssavar(ctx, rhs0) rhs = isnothing(binfo.type) ? tmp_rhs0 : convert_for_type_decl(ctx, ex, tmp_rhs0, _convert_closures(ctx, binfo.type), true) - assignment = if self_captured || captured + assignment = if boxed @ast ctx ex [K"call" "setfield!"::K"core" - self_captured ? captured_var_access(ctx, var) : var + is_self_captured(ctx, var) ? captured_var_access(ctx, var) : var "contents"::K"Symbol" rhs ] @@ -221,13 +219,15 @@ function closure_type_fields(ctx, srcref, closure_binds) field_syms = SyntaxList(ctx) field_orig_bindings = Vector{IdTag}() field_name_inds = Dict{IdTag,Int}() + field_is_box = Vector{Bool}() for (name,id) in sort!(collect(field_names)) push!(field_syms, @ast ctx srcref name::K"Symbol") push!(field_orig_bindings, id) + push!(field_is_box, is_boxed(ctx, id)) field_name_inds[id] = lastindex(field_syms) end - return field_syms, field_orig_bindings, field_name_inds + return field_syms, field_orig_bindings, field_name_inds, field_is_box end function closure_name(mod, name_stack) @@ -245,21 +245,36 @@ end # Return a thunk which creates a new type for a closure with `field_syms` named # fields. The new type will be named `name_str` which must be an unassigned # name in the module. -function type_for_closure(ctx::ClosureConversionCtx, srcref, name_str, field_syms) +function type_for_closure(ctx::ClosureConversionCtx, srcref, name_str, field_syms, field_is_box) # New closure types always belong to the module we're expanding into - they # need to be serialized there during precompile. mod = ctx.mod type_binding = new_global_binding(ctx, srcref, name_str, mod) + typevar_stmts = SyntaxList(ctx) + type_params = SyntaxList(ctx) + field_types = SyntaxList(ctx) + for (name, isbox) in zip(field_syms, field_is_box) + if !isbox + typevar_name = "$(name.name_val)_type" + tv = ssavar(ctx, name) + push!(typevar_stmts, @ast ctx name [K"=" tv [K"call" "TypeVar"::K"core" typevar_name::K"Symbol"]]) + push!(type_params, tv) + push!(field_types, tv) + else + push!(field_types, @ast ctx name "Box"::K"core") + end + end type_ex = @ast ctx srcref [K"lambda"(is_toplevel_thunk=true, lambda_bindings=LambdaBindings()) [K"block"] [K"block"] [K"block" [K"global" type_binding] + typevar_stmts... closure_type := [K"call" "_structtype"::K"core" mod::K"Value" name_str::K"Symbol" - [K"call" "svec"::K"core"] + [K"call" "svec"::K"core" type_params...] [K"call" "svec"::K"core" field_syms... @@ -275,7 +290,7 @@ function type_for_closure(ctx::ClosureConversionCtx, srcref, name_str, field_sym [K"call" "_typebody!"::K"core" closure_type - [K"call" "svec"::K"core" ["Box"::K"core" for _ in field_syms]...] + [K"call" "svec"::K"core" field_types...] ] "nothing"::K"core" ] @@ -283,38 +298,53 @@ function type_for_closure(ctx::ClosureConversionCtx, srcref, name_str, field_sym type_ex, type_binding end +function is_boxed(binfo::BindingInfo) + # True for + # * :argument when it's not reassigned + # * :static_parameter (these can't be reassigned) + defined_but_not_assigned = binfo.is_always_defined && binfo.n_assigned == 0 + # For now, we box almost everything but later we'll want to do dominance + # analysis on the untyped IR. + return binfo.is_captured && !defined_but_not_assigned +end + +function is_boxed(ctx, x) + is_boxed(lookup_binding(ctx, x)) +end + +# Is captured in the closure's `self` argument +function is_self_captured(ctx, x) + lbinfo = lookup_lambda_binding(ctx, x) + !isnothing(lbinfo) && lbinfo.is_captured +end + function _convert_closures(ctx::ClosureConversionCtx, ex) k = kind(ex) if k == K"BindingId" - id = ex.var_id - lbinfo = lookup_lambda_binding(ctx, id) - if !isnothing(lbinfo) && lbinfo.is_captured # TODO: && vinfo:asgn cv ?? - get_box_contents(ctx, ex, captured_var_access(ctx, ex)) - elseif lookup_binding(ctx, id).is_captured # TODO: && vinfo:asgn vi - get_box_contents(ctx, ex, ex) + access = is_self_captured(ctx, ex) ? captured_var_access(ctx, ex) : ex + if is_boxed(ctx, ex) + get_box_contents(ctx, ex, access) else - ex + access end elseif is_leaf(ex) || k == K"inert" ex elseif k == K"=" convert_assignment(ctx, ex) + # elseif k == K"isdefined" TODO + # Convert isdefined expr to function for closure converted variables elseif k == K"decl" - if kind(ex[1]) != K"BindingId" - # TODO: This case might be better dealt with in an earlier pass, - # emitting `K"::"`?? - TODO(ex, "assertions for decls with non-bindings") - end + @assert kind(ex[1]) == K"BindingId" binfo = lookup_binding(ctx, ex[1]) - if binfo.kind == :local - makeleaf(ctx, ex, K"TOMBSTONE") - else + if binfo.kind == :global @ast ctx ex [K"call" "set_binding_type!"::K"core" binfo.mod::K"Value" binfo.name::K"Symbol" _convert_closures(ctx, ex[2]) ] + else + makeleaf(ctx, ex, K"TOMBSTONE") end elseif k == K"local" var = ex[1] @@ -337,19 +367,33 @@ function _convert_closures(ctx::ClosureConversionCtx, ex) needs_def = isnothing(closure_info) if needs_def closure_binds = ctx.closure_bindings[func_name_id] - field_syms, field_orig_bindings, field_name_inds = + field_syms, field_orig_bindings, field_name_inds, field_is_box = closure_type_fields(ctx, ex, closure_binds) name_str = closure_name(ctx.mod, closure_binds.name_stack) - closure_type_def, closure_type = - type_for_closure(ctx, ex, name_str, field_syms) + closure_type_def, closure_type_ = + type_for_closure(ctx, ex, name_str, field_syms, field_is_box) push!(ctx.toplevel_stmts, closure_type_def) - closure_info = ClosureInfo(closure_type, field_syms, field_name_inds) + closure_info = ClosureInfo(closure_type_, field_syms, field_name_inds) ctx.closure_infos[func_name_id] = closure_info + type_params = SyntaxList(ctx) init_closure_args = SyntaxList(ctx) - for id in field_orig_bindings - push!(init_closure_args, binding_ex(ctx, id)) + for (id,boxed) in zip(field_orig_bindings, field_is_box) + field_val = binding_ex(ctx, id) + push!(init_closure_args, field_val) + if !boxed + push!(type_params, @ast ctx ex [K"call" + # TODO: Update to use _typeof_captured_variable (#40985) + #"_typeof_captured_variable"::K"core" + "typeof"::K"core" + field_val]) + end end @ast ctx ex [K"block" + closure_type := if isempty(type_params) + closure_type_ + else + [K"call" "apply_type"::K"core" closure_type_ type_params...] + end [K"=" func_name [K"new" closure_type @@ -419,8 +463,7 @@ function closure_convert_lambda(ctx, ex) # Add box initializations for arguments which are captured by an inner lambda for arg in children(args) kind(arg) != K"Placeholder" || continue - binfo = lookup_binding(ctx, arg) - if binfo.is_captured # TODO: && binfo.is_assigned + if is_boxed(ctx, arg) push!(body_stmts, @ast ctx arg [K"=" arg [K"call" "Box"::K"core" arg] diff --git a/JuliaLowering/src/scope_analysis.jl b/JuliaLowering/src/scope_analysis.jl index 1eac9b41d8cdf..3171d81988103 100644 --- a/JuliaLowering/src/scope_analysis.jl +++ b/JuliaLowering/src/scope_analysis.jl @@ -189,8 +189,10 @@ function add_lambda_args(ctx, var_ids, args, args_kind) "static parameter name not distinct from function argument" throw(LoweringError(arg, msg)) end + is_always_defined = args_kind == :argument || args_kind == :static_parameter id = init_binding(ctx, arg, varkey, args_kind; - is_nospecialize=getmeta(arg, :nospecialize, false)) + is_nospecialize=getmeta(arg, :nospecialize, false), + is_always_defined=is_always_defined) var_ids[varkey] = id elseif ka != K"BindingId" && ka != K"Placeholder" throw(LoweringError(arg, "Unexpected lambda arg kind")) @@ -681,6 +683,7 @@ function analyze_variables!(ctx, ex) end ctx2 = VariableAnalysisContext(ctx.graph, ctx.bindings, ctx.mod, lambda_bindings, ctx.method_def_stack, ctx.closure_bindings) + # TODO: Types of any assigned captured vars will also be used and might be captured. foreach(e->analyze_variables!(ctx2, e), ex[3:end]) else foreach(e->analyze_variables!(ctx, e), children(ex)) diff --git a/JuliaLowering/test/closures.jl b/JuliaLowering/test/closures.jl index daf9d7ca13685..76524ca9a4b0f 100644 --- a/JuliaLowering/test/closures.jl +++ b/JuliaLowering/test/closures.jl @@ -35,6 +35,22 @@ end Base.eval(test_mod, :(call_it(f, args...) = f(args...))) +# Closure where a local `x` is captured but not boxed +@test JuliaLowering.include_string(test_mod, """ +begin + function f(x) + z = 0 + function g() + y = x # x will not be boxed + (y + 1, z) + end + z = 2 # will be boxed + (x, g()) + end + f(10) +end +""") == (10,(11,2)) + # Anon function syntax @test JuliaLowering.include_string(test_mod, """ begin diff --git a/JuliaLowering/test/closures_ir.jl b/JuliaLowering/test/closures_ir.jl index 7bb0f060c261c..82e53bc055332 100644 --- a/JuliaLowering/test/closures_ir.jl +++ b/JuliaLowering/test/closures_ir.jl @@ -88,7 +88,7 @@ end 15 (return %₁₄) ######################################## -# Function where arguments are captured into a closure +# Function where arguments are captured into a closure and assigned function f(x) function g() x = 10 @@ -144,6 +144,109 @@ end 14 TestMod.f 15 (return %₁₄) +######################################## +# Closure where a local `x` is captured but not boxed +function f(x) + function g() + y = x + end + z = x +end +#--------------------- +1 (method TestMod.f) +2 --- thunk + 1 (global TestMod.#f#g##1) + 2 (call core.TypeVar :x_type) + 3 (call core.svec %₂) + 4 (call core.svec :x) + 5 (call core.svec) + 6 (call core._structtype TestMod :#f#g##1 %₃ %₄ %₅ false 1) + 7 (call core._setsuper! %₆ core.Function) + 8 (const TestMod.#f#g##1) + 9 (= TestMod.#f#g##1 %₆) + 10 (call core.svec %₂) + 11 (call core._typebody! %₆ %₁₀) + 12 (return core.nothing) +3 TestMod.#f#g##1 +4 (call core.svec %₃) +5 (call core.svec) +6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 2 =#))))) +7 --- method core.nothing %₆ + slots: [slot₁/#self#(!read) slot₂/y(!read)] + 1 (call core.getfield slot₁/#self# :x) + 2 (= slot₂/y %₁) + 3 (return %₁) +8 TestMod.f +9 (call core.Typeof %₈) +10 (call core.svec %₉ core.Any) +11 (call core.svec) +12 (call core.svec %₁₀ %₁₁ :($(QuoteNode(:(#= line 1 =#))))) +13 --- method core.nothing %₁₂ + slots: [slot₁/#self#(!read) slot₂/x slot₃/g slot₄/z(!read)] + 1 TestMod.#f#g##1 + 2 (call core.typeof slot₂/x) + 3 (call core.apply_type %₁ %₂) + 4 (= slot₃/g (new %₃ slot₂/x)) + 5 slot₃/g + 6 slot₂/x + 7 (= slot₄/z %₆) + 8 (return %₆) +14 TestMod.f +15 (return %₁₄) + +######################################## +# Closure where a static parameter of an outer function is captured +function f(::T) where T + function g() + use(T) + end +end +#--------------------- +1 (method TestMod.f) +2 --- thunk + 1 (global TestMod.#f#g##2) + 2 (call core.TypeVar :T_type) + 3 (call core.svec %₂) + 4 (call core.svec :T) + 5 (call core.svec) + 6 (call core._structtype TestMod :#f#g##2 %₃ %₄ %₅ false 1) + 7 (call core._setsuper! %₆ core.Function) + 8 (const TestMod.#f#g##2) + 9 (= TestMod.#f#g##2 %₆) + 10 (call core.svec %₂) + 11 (call core._typebody! %₆ %₁₀) + 12 (return core.nothing) +3 TestMod.#f#g##2 +4 (call core.svec %₃) +5 (call core.svec) +6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 2 =#))))) +7 --- method core.nothing %₆ + slots: [slot₁/#self#(!read)] + 1 TestMod.use + 2 (call core.getfield slot₁/#self# :T) + 3 (call %₁ %₂) + 4 (return %₃) +8 (= slot₁/T (call core.TypeVar :T)) +9 TestMod.f +10 (call core.Typeof %₉) +11 slot₁/T +12 (call core.svec %₁₀ %₁₁) +13 slot₁/T +14 (call core.svec %₁₃) +15 (call core.svec %₁₂ %₁₄ :($(QuoteNode(:(#= line 1 =#))))) +16 --- method core.nothing %₁₅ + slots: [slot₁/#self#(!read) slot₂/_(!read) slot₃/g] + 1 TestMod.#f#g##2 + 2 static_parameter₁ + 3 (call core.typeof %₂) + 4 (call core.apply_type %₁ %₃) + 5 static_parameter₁ + 6 (= slot₃/g (new %₄ %₅)) + 7 slot₃/g + 8 (return %₇) +17 TestMod.f +18 (return %₁₇) + ######################################## # Anonymous function syntax with -> x -> x*x diff --git a/JuliaLowering/test/decls_ir.jl b/JuliaLowering/test/decls_ir.jl index 5474c03df1441..ec2e3f21c76e7 100644 --- a/JuliaLowering/test/decls_ir.jl +++ b/JuliaLowering/test/decls_ir.jl @@ -115,6 +115,55 @@ let # ╙ ── unsupported `const` declaration on local variable end +######################################## +# Type decl on function argument +function f(x) + x::Int = 1 + x = 2.0 + x +end +#--------------------- +1 (method TestMod.f) +2 TestMod.f +3 (call core.Typeof %₂) +4 (call core.svec %₃ core.Any) +5 (call core.svec) +6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 1 =#))))) +7 --- method core.nothing %₆ + slots: [slot₁/#self#(!read) slot₂/x slot₃/tmp(!read) slot₄/tmp(!read)] + 1 1 + 2 (= slot₃/tmp %₁) + 3 slot₃/tmp + 4 TestMod.Int + 5 (call core.isa %₃ %₄) + 6 (gotoifnot %₅ label₈) + 7 (goto label₁₃) + 8 TestMod.Int + 9 slot₃/tmp + 10 (call top.convert %₈ %₉) + 11 TestMod.Int + 12 (= slot₃/tmp (call core.typeassert %₁₀ %₁₁)) + 13 slot₃/tmp + 14 (= slot₂/x %₁₃) + 15 2.0 + 16 (= slot₄/tmp %₁₅) + 17 slot₄/tmp + 18 TestMod.Int + 19 (call core.isa %₁₇ %₁₈) + 20 (gotoifnot %₁₉ label₂₂) + 21 (goto label₂₇) + 22 TestMod.Int + 23 slot₄/tmp + 24 (call top.convert %₂₂ %₂₃) + 25 TestMod.Int + 26 (= slot₄/tmp (call core.typeassert %₂₄ %₂₅)) + 27 slot₄/tmp + 28 (= slot₂/x %₂₇) + 29 slot₂/x + 30 (return %₂₉) +8 TestMod.f +9 (return %₈) + ######################################## # Error: global type decls only allowed at top level function f() From 362d368ef0566d0e58445bc3be2cd0f85b7bc9e6 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Tue, 7 Jan 2025 20:26:02 +1000 Subject: [PATCH 0923/1109] Special case `isdefined` handling for closure captures --- JuliaLowering/src/closure_conversion.jl | 17 ++++++- JuliaLowering/test/closures.jl | 23 ++++++++- JuliaLowering/test/closures_ir.jl | 64 +++++++++++++++++++++++++ 3 files changed, 101 insertions(+), 3 deletions(-) diff --git a/JuliaLowering/src/closure_conversion.jl b/JuliaLowering/src/closure_conversion.jl index b23d715b76a42..069fe7393d20f 100644 --- a/JuliaLowering/src/closure_conversion.jl +++ b/JuliaLowering/src/closure_conversion.jl @@ -331,8 +331,23 @@ function _convert_closures(ctx::ClosureConversionCtx, ex) ex elseif k == K"=" convert_assignment(ctx, ex) - # elseif k == K"isdefined" TODO + elseif k == K"isdefined" # Convert isdefined expr to function for closure converted variables + var = ex[1] + binfo = lookup_binding(ctx, var) + if is_boxed(binfo) + access = is_self_captured(ctx, var) ? captured_var_access(ctx, var) : var + @ast ctx ex [K"call" + "isdefined"::K"core" + access + "contents"::K"Symbol" + ] + elseif binfo.is_always_defined || is_self_captured(ctx, var) + # Captured but unboxed vars are always defined + @ast ctx ex true::K"Bool" + else + ex + end elseif k == K"decl" @assert kind(ex[1]) == K"BindingId" binfo = lookup_binding(ctx, ex[1]) diff --git a/JuliaLowering/test/closures.jl b/JuliaLowering/test/closures.jl index 76524ca9a4b0f..f5532cc5fc03a 100644 --- a/JuliaLowering/test/closures.jl +++ b/JuliaLowering/test/closures.jl @@ -38,7 +38,7 @@ Base.eval(test_mod, :(call_it(f, args...) = f(args...))) # Closure where a local `x` is captured but not boxed @test JuliaLowering.include_string(test_mod, """ begin - function f(x) + function f_unboxed_test(x) z = 0 function g() y = x # x will not be boxed @@ -47,10 +47,29 @@ begin z = 2 # will be boxed (x, g()) end - f(10) + f_unboxed_test(10) end """) == (10,(11,2)) +# Use of isdefined +@test JuliaLowering.include_string(test_mod, """ +begin + function f_isdefined(x) + local w + function g() + z = 3 + (@isdefined(x), # unboxed, always defined capture + @isdefined(y), # boxed capture + @isdefined(z), # normal local var + @isdefined(w)) # boxed undefined var + end + y = 2 + (@isdefined(y), @isdefined(w), g()) + end + f_isdefined(1) +end +""") == (true, false, (true, true, true, false)) + # Anon function syntax @test JuliaLowering.include_string(test_mod, """ begin diff --git a/JuliaLowering/test/closures_ir.jl b/JuliaLowering/test/closures_ir.jl index 82e53bc055332..0bf3d3493e367 100644 --- a/JuliaLowering/test/closures_ir.jl +++ b/JuliaLowering/test/closures_ir.jl @@ -247,6 +247,70 @@ end 17 TestMod.f 18 (return %₁₇) +######################################## +# Use of isdefined +function f(x) + function g() + z = 3 + (@isdefined(x), # unboxed, always defined capture + @isdefined(y), # boxed capture + @isdefined(z)) # normal local var + end + y = 2 + (@isdefined(y), # boxed local + @isdefined(x)) # always defined local (function arg) +end +#--------------------- +1 (method TestMod.f) +2 --- thunk + 1 (global TestMod.#f#g##3) + 2 (call core.TypeVar :x_type) + 3 (call core.svec %₂) + 4 (call core.svec :x :y) + 5 (call core.svec) + 6 (call core._structtype TestMod :#f#g##3 %₃ %₄ %₅ false 2) + 7 (call core._setsuper! %₆ core.Function) + 8 (const TestMod.#f#g##3) + 9 (= TestMod.#f#g##3 %₆) + 10 (call core.svec %₂ core.Box) + 11 (call core._typebody! %₆ %₁₀) + 12 (return core.nothing) +3 TestMod.#f#g##3 +4 (call core.svec %₃) +5 (call core.svec) +6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 2 =#))))) +7 --- method core.nothing %₆ + slots: [slot₁/#self#(!read) slot₂/z] + 1 (= slot₂/z 3) + 2 (call core.getfield slot₁/#self# :y) + 3 (call core.isdefined %₂ :contents) + 4 (isdefined slot₂/z) + 5 (call core.tuple true %₃ %₄) + 6 (return %₅) +8 TestMod.f +9 (call core.Typeof %₈) +10 (call core.svec %₉ core.Any) +11 (call core.svec) +12 (call core.svec %₁₀ %₁₁ :($(QuoteNode(:(#= line 1 =#))))) +13 --- method core.nothing %₁₂ + slots: [slot₁/#self#(!read) slot₂/x slot₃/g slot₄/y] + 1 (= slot₄/y (call core.Box)) + 2 TestMod.#f#g##3 + 3 (call core.typeof slot₂/x) + 4 (call core.apply_type %₂ %₃) + 5 slot₄/y + 6 (= slot₃/g (new %₄ slot₂/x %₅)) + 7 slot₃/g + 8 2 + 9 slot₄/y + 10 (call core.setfield! %₉ :contents %₈) + 11 slot₄/y + 12 (call core.isdefined %₁₁ :contents) + 13 (call core.tuple %₁₂ true) + 14 (return %₁₃) +14 TestMod.f +15 (return %₁₄) + ######################################## # Anonymous function syntax with -> x -> x*x From 99352ff139e5cf941fe595eeec1193c0ea287cb9 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Wed, 8 Jan 2025 13:26:49 +1000 Subject: [PATCH 0924/1109] Local variable capture for methods of global functions Here we introduce a new intermediate form `K"captured_local"` containing an index into a list of the boxed local variables. After the `CodeInfo` is constructed we then apply an extra rewrite step to replace any `Expr(:captured_local)` with the associated boxed local variable. Using a captured_local form allows us to finish lowering the lambda expression (including linear IR construction) and eagerly construct a CodeInfo, preserving the normal lowering workflow. In contrast, the approach in the flisp code quoted the syntax for the lambda and ran the macro `$` expansion pass again. --- JuliaLowering/src/closure_conversion.jl | 55 ++++++++++++++++++------- JuliaLowering/src/eval.jl | 1 + JuliaLowering/src/kinds.jl | 4 ++ JuliaLowering/src/linear_ir.jl | 5 +-- JuliaLowering/src/runtime.jl | 10 +++++ JuliaLowering/src/utils.jl | 14 ++++--- JuliaLowering/test/closures.jl | 12 ++++++ JuliaLowering/test/closures_ir.jl | 42 ++++++++++++++++++- JuliaLowering/test/scopes_ir.jl | 7 ++-- 9 files changed, 123 insertions(+), 27 deletions(-) diff --git a/JuliaLowering/src/closure_conversion.jl b/JuliaLowering/src/closure_conversion.jl index 069fe7393d20f..65aa4de35ebf2 100644 --- a/JuliaLowering/src/closure_conversion.jl +++ b/JuliaLowering/src/closure_conversion.jl @@ -13,7 +13,7 @@ struct ClosureConversionCtx{GraphType} <: AbstractLoweringContext bindings::Bindings mod::Module closure_bindings::Dict{IdTag,ClosureBindings} - closure_info::Union{Nothing,ClosureInfo{GraphType}} + capture_rewriting::Union{Nothing,ClosureInfo{GraphType},SyntaxList{GraphType}} lambda_bindings::LambdaBindings toplevel_stmts::SyntaxList{GraphType} closure_infos::Dict{IdTag,ClosureInfo{GraphType}} @@ -33,15 +33,22 @@ end # Access captured variable from inside a closure function captured_var_access(ctx, ex) - cinfo = ctx.closure_info - field_sym = cinfo.field_syms[cinfo.field_name_inds[ex.var_id]] - @ast ctx ex [K"call" - "getfield"::K"core" - # FIXME: attributing the self binding to srcref=ex gives misleading printing. - # We should carry provenance with each binding to fix this. - binding_ex(ctx, current_lambda_bindings(ctx).self) - field_sym - ] + cap_rewrite = ctx.capture_rewriting + if cap_rewrite isa ClosureInfo + field_sym = cap_rewrite.field_syms[cap_rewrite.field_name_inds[ex.var_id]] + @ast ctx ex [K"call" + "getfield"::K"core" + binding_ex(ctx, current_lambda_bindings(ctx).self) + field_sym + ] + else + interpolations = cap_rewrite + @assert !isnothing(cap_rewrite) + if isempty(interpolations) || !is_same_identifier_like(interpolations[end], ex) + push!(interpolations, ex) + end + @ast ctx ex [K"captured_local" length(interpolations)::K"Integer"] + end end function get_box_contents(ctx::ClosureConversionCtx, var, box_ex) @@ -438,9 +445,9 @@ function _convert_closures(ctx::ClosureConversionCtx, ex) elseif k == K"method_defs" name = ex[1] is_closure = kind(name) == K"BindingId" && lookup_binding(ctx, name).kind === :local - cinfo = is_closure ? ctx.closure_infos[name.var_id] : nothing + cap_rewrite = is_closure ? ctx.closure_infos[name.var_id] : nothing ctx2 = ClosureConversionCtx(ctx.graph, ctx.bindings, ctx.mod, - ctx.closure_bindings, cinfo, ctx.lambda_bindings, + ctx.closure_bindings, cap_rewrite, ctx.lambda_bindings, ctx.toplevel_stmts, ctx.closure_infos) body = _convert_closures(ctx2, ex[2]) if is_closure @@ -467,8 +474,15 @@ function closure_convert_lambda(ctx, ex) body_stmts = SyntaxList(ctx) toplevel_stmts = ex.is_toplevel_thunk ? body_stmts : ctx.toplevel_stmts lambda_bindings = ex.lambda_bindings + interpolations = nothing + if isnothing(ctx.capture_rewriting) + interpolations = SyntaxList(ctx) + cap_rewrite = interpolations + else + cap_rewrite = ctx.capture_rewriting + end ctx2 = ClosureConversionCtx(ctx.graph, ctx.bindings, ctx.mod, - ctx.closure_bindings, ctx.closure_info, lambda_bindings, + ctx.closure_bindings, cap_rewrite, lambda_bindings, toplevel_stmts, ctx.closure_infos) lambda_children = SyntaxList(ctx) args = ex[1] @@ -494,11 +508,24 @@ function closure_convert_lambda(ctx, ex) push!(lambda_children, @ast ctx2 ex[3] [K"block" body_stmts...]) if numchildren(ex) > 3 + # Convert return type @assert numchildren(ex) == 4 push!(lambda_children, _convert_closures(ctx2, ex[4])) end - makenode(ctx, ex, ex, lambda_children; lambda_bindings=lambda_bindings) + lam = makenode(ctx, ex, ex, lambda_children; lambda_bindings=lambda_bindings) + if !isnothing(interpolations) && !isempty(interpolations) + @ast ctx ex [K"call" + replace_captured_locals!::K"Value" + lam + [K"call" + "svec"::K"core" + interpolations... + ] + ] + else + lam + end end diff --git a/JuliaLowering/src/eval.jl b/JuliaLowering/src/eval.jl index f14b3ee6bfdb8..e2541150e40f9 100644 --- a/JuliaLowering/src/eval.jl +++ b/JuliaLowering/src/eval.jl @@ -278,6 +278,7 @@ function to_lowered_expr(mod, ex, ssa_offset=0) k == K"leave" ? :leave : k == K"isdefined" ? :isdefined : k == K"pop_exception" ? :pop_exception : + k == K"captured_local" ? :captured_local : nothing if isnothing(head) TODO(ex, "Unhandled form for kind $k") diff --git a/JuliaLowering/src/kinds.jl b/JuliaLowering/src/kinds.jl index a2cbeca68d3b0..c0e3976f1534d 100644 --- a/JuliaLowering/src/kinds.jl +++ b/JuliaLowering/src/kinds.jl @@ -76,6 +76,10 @@ function _register_kinds() "tryfinally" "unnecessary" "decl" + # [K"captured_local" index] + # A local variable captured into a global method. Contains the + # `index` of the associated `Box` in the rewrite list. + "captured_local" "END_LOWERING_KINDS" # The following kinds are emitted by lowering and used in Julia's untyped IR diff --git a/JuliaLowering/src/linear_ir.jl b/JuliaLowering/src/linear_ir.jl index 06e6c85244599..a17a5f463f54a 100644 --- a/JuliaLowering/src/linear_ir.jl +++ b/JuliaLowering/src/linear_ir.jl @@ -743,8 +743,7 @@ function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) if kind(lam) == K"lambda" lam = compile_lambda(ctx, lam) else - # lam = emit_assign_tmp(ctx, compile(ctx, lam, true, false)) - TODO(lam, "non-lambda method argument??") + lam = emit_assign_tmp(ctx, compile(ctx, lam, true, false)) end emit(ctx, ex, K"method", fname, sig, lam) @assert !needs_value && !in_tail_pos @@ -785,7 +784,7 @@ function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) end emit(ctx, ex) nothing - elseif k == K"isdefined" # TODO || k == K"throw_undef_if_not" (See upstream #53875) + elseif k == K"isdefined" || k == K"captured_local" # TODO || k == K"throw_undef_if_not" (See upstream #53875) if in_tail_pos emit_return(ctx, ex) elseif needs_value diff --git a/JuliaLowering/src/runtime.jl b/JuliaLowering/src/runtime.jl index db926db33892d..6a91d641773f0 100644 --- a/JuliaLowering/src/runtime.jl +++ b/JuliaLowering/src/runtime.jl @@ -98,6 +98,16 @@ function interpolate_ast(ex, values...) end end +# Interpolate captured local variables into the CodeInfo for a global method +function replace_captured_locals!(codeinfo, locals) + for (i, ex) in enumerate(codeinfo.code) + if Meta.isexpr(ex, :captured_local) + codeinfo.code[i] = locals[ex.args[1]] + end + end + codeinfo +end + # Construct new bare module including only the "default names" # # using Core diff --git a/JuliaLowering/src/utils.jl b/JuliaLowering/src/utils.jl index f461124a1b641..7e9ee3e47b580 100644 --- a/JuliaLowering/src/utils.jl +++ b/JuliaLowering/src/utils.jl @@ -102,11 +102,15 @@ function print_ir(io::IO, ex, indent="") for (i, e) in enumerate(stmts) lno = rpad(i, 3) if kind(e) == K"method" && numchildren(e) == 3 - println(io, indent, lno, " --- method ", string(e[1]), " ", string(e[2])) - @assert kind(e[3]) == K"lambda" || kind(e[3]) == K"code_info" - print_ir(io, e[3], indent*added_indent) - elseif kind(e) == K"code_info" && e.is_toplevel_thunk - println(io, indent, lno, " --- thunk") + print(io, indent, lno, " --- method ", string(e[1]), " ", string(e[2])) + if kind(e[3]) == K"lambda" || kind(e[3]) == K"code_info" + println(io) + print_ir(io, e[3], indent*added_indent) + else + println(io, " ", string(e[3])) + end + elseif kind(e) == K"code_info" + println(io, indent, lno, " --- ", e.is_toplevel_thunk ? "thunk" : "code_info") print_ir(io, e, indent*added_indent) else code = string(e) diff --git a/JuliaLowering/test/closures.jl b/JuliaLowering/test/closures.jl index f5532cc5fc03a..c26a691e54b1f 100644 --- a/JuliaLowering/test/closures.jl +++ b/JuliaLowering/test/closures.jl @@ -70,6 +70,18 @@ begin end """) == (true, false, (true, true, true, false)) +# Global method capturing local variables +JuliaLowering.include_string(test_mod, """ +begin + local x = 1 + function f_global_method_capturing_local() + x = x + 1 + end +end +""") +@test test_mod.f_global_method_capturing_local() == 2 +@test test_mod.f_global_method_capturing_local() == 3 + # Anon function syntax @test JuliaLowering.include_string(test_mod, """ begin diff --git a/JuliaLowering/test/closures_ir.jl b/JuliaLowering/test/closures_ir.jl index 0bf3d3493e367..9c8853cf05c93 100644 --- a/JuliaLowering/test/closures_ir.jl +++ b/JuliaLowering/test/closures_ir.jl @@ -248,7 +248,7 @@ end 18 (return %₁₇) ######################################## -# Use of isdefined +# Closure captures with `isdefined` function f(x) function g() z = 3 @@ -311,6 +311,46 @@ end 14 TestMod.f 15 (return %₁₄) +######################################## +# Global method capturing local variables +begin + local x = 1 + function f() + x = x + 1 + end +end +#--------------------- +1 (= slot₁/x (call core.Box)) +2 1 +3 slot₁/x +4 (call core.setfield! %₃ :contents %₂) +5 (method TestMod.f) +6 TestMod.f +7 (call core.Typeof %₆) +8 (call core.svec %₇) +9 (call core.svec) +10 (call core.svec %₈ %₉ :($(QuoteNode(:(#= line 3 =#))))) +11 --- code_info + slots: [slot₁/#self#(!read) slot₂/x(!read)] + 1 TestMod.+ + 2 (captured_local 1) + 3 (call core.isdefined %₂ :contents) + 4 (gotoifnot %₃ label₆) + 5 (goto label₈) + 6 (newvar slot₂/x) + 7 slot₂/x + 8 (call core.getfield %₂ :contents) + 9 (call %₁ %₈ 1) + 10 (captured_local 1) + 11 (call core.setfield! %₁₀ :contents %₉) + 12 (return %₉) +12 slot₁/x +13 (call core.svec %₁₂) +14 (call JuliaLowering.replace_captured_locals! %₁₁ %₁₃) +15 --- method core.nothing %₁₀ %₁₄ +16 TestMod.f +17 (return %₁₆) + ######################################## # Anonymous function syntax with -> x -> x*x diff --git a/JuliaLowering/test/scopes_ir.jl b/JuliaLowering/test/scopes_ir.jl index 87335922c296f..e7e6700da3430 100644 --- a/JuliaLowering/test/scopes_ir.jl +++ b/JuliaLowering/test/scopes_ir.jl @@ -83,10 +83,9 @@ end slots: [slot₁/#self#(!read) slot₂/z] 1 (call core.apply_type top.Dict core.Symbol core.Any) 2 (call %₁) - 3 (isdefined slot₂/z) - 4 (gotoifnot %₃ label₆) - 5 (call top.setindex! %₂ slot₂/z :z) - 6 (return %₂) + 3 (gotoifnot true label₅) + 4 (call top.setindex! %₂ slot₂/z :z) + 5 (return %₂) 8 TestMod.f 9 (return %₈) From 59bacedd7f9ebd05b0bd2de640506ca38ec1108a Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Wed, 8 Jan 2025 22:16:35 +1000 Subject: [PATCH 0925/1109] Add errors for use of braces `{ }` syntax --- JuliaLowering/src/desugaring.jl | 2 ++ JuliaLowering/test/misc_ir.jl | 16 ++++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index e8199a17b3427..dcb879c6901c7 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -2720,6 +2720,8 @@ function expand_forms_2(ctx::DesugaringContext, ex::SyntaxTree, docs=nothing) end elseif k == K"where" expand_forms_2(ctx, expand_wheres(ctx, ex)) + elseif k == K"braces" || k == K"bracescat" + throw(LoweringError(ex, "{ } syntax is reserved for future use")) elseif k == K"string" if numchildren(ex) == 1 && kind(ex[1]) == K"String" ex[1] diff --git a/JuliaLowering/test/misc_ir.jl b/JuliaLowering/test/misc_ir.jl index 0acd015948fba..1546983a7a03c 100644 --- a/JuliaLowering/test/misc_ir.jl +++ b/JuliaLowering/test/misc_ir.jl @@ -183,3 +183,19 @@ x::T 3 (call core.typeassert %₁ %₂) 4 (return %₃) +######################################## +# Error: braces vector syntax +{x, y} +#--------------------- +LoweringError: +{x, y} +└────┘ ── { } syntax is reserved for future use + +######################################## +# Error: braces matrix syntax +{x y; y z} +#--------------------- +LoweringError: +{x y; y z} +└────────┘ ── { } syntax is reserved for future use + From c4ec290f03adb7806b4b04d60bb70027487bd1d1 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Thu, 9 Jan 2025 10:53:52 +1000 Subject: [PATCH 0926/1109] Desugaring: vcat, hcat, hvcat --- JuliaLowering/src/desugaring.jl | 79 ++++++++++++++++++++++++- JuliaLowering/test/arrays.jl | 37 ++++++++++++ JuliaLowering/test/arrays_ir.jl | 101 ++++++++++++++++++++++++++++++++ JuliaLowering/test/misc_ir.jl | 8 +++ JuliaLowering/test/runtests.jl | 1 + 5 files changed, 225 insertions(+), 1 deletion(-) create mode 100644 JuliaLowering/test/arrays.jl create mode 100644 JuliaLowering/test/arrays_ir.jl diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index dcb879c6901c7..7241296d598b9 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -536,6 +536,70 @@ function expand_setindex(ctx, ex) ] end +function check_no_assignment(exs) + assign_pos = findfirst(kind(e) == K"=" for e in exs) + if !isnothing(assign_pos) + throw(LoweringError(exs[assign_pos], "misplaced assignment statement in `[ ... ]`")) + end +end + +function expand_vcat(ctx, ex) + if has_parameters(ex) + throw(LoweringError(ex, "unexpected semicolon in array expression")) + end + check_no_assignment(children(ex)) + had_row = false + had_row_splat = false + for e in children(ex) + k = kind(e) + if k == K"row" + had_row = true + had_row_splat = had_row_splat || any(kind(e1) == K"..." for e1 in children(e)) + end + end + if had_row_splat + # In case there is splatting inside `hvcat`, collect each row as a + # separate tuple and pass those to `hvcat_rows` instead (ref #38844) + rows = SyntaxList(ctx) + for e in children(ex) + if kind(e) == K"row" + push!(rows, @ast ctx e [K"tuple" children(e)...]) + else + push!(rows, @ast ctx e [K"tuple" e]) + end + end + @ast ctx ex [K"call" + "hvcat_rows"::K"top" + rows... + ] + else + row_sizes = SyntaxList(ctx) + elements = SyntaxList(ctx) + for e in children(ex) + if kind(e) == K"row" + rowsize = numchildren(e) + append!(elements, children(e)) + else + rowsize = 1 + push!(elements, e) + end + push!(row_sizes, @ast ctx e rowsize::K"Integer") + end + if had_row + @ast ctx ex [K"call" + "hvcat"::K"top" + [K"tuple" row_sizes...] + elements... + ] + else + @ast ctx ex [K"call" + "vcat"::K"top" + elements... + ] + end + end +end + # Expand UnionAll definitions, eg `X{T} = Y{T,T}` function expand_unionall_def(ctx, srcref, lhs, rhs) if numchildren(lhs) <= 1 @@ -2668,7 +2732,8 @@ function expand_forms_2(ctx::DesugaringContext, ex::SyntaxTree, docs=nothing) else @ast ctx ex [K"if" cond true::K"Bool" cs[end]] end - elseif k == K"::" && numchildren(ex) == 2 + elseif k == K"::" + @chk numchildren(ex) == 2 "`::` must be written `value::type` outside function argument lists" @ast ctx ex [K"call" "typeassert"::K"core" expand_forms_2(ctx, ex[1]) @@ -2787,10 +2852,22 @@ function expand_forms_2(ctx::DesugaringContext, ex::SyntaxTree, docs=nothing) ] ] elseif k == K"vect" + if has_parameters(ex) + throw(LoweringError(ex, "unexpected semicolon in array expression")) + end + check_no_assignment(children(ex)) @ast ctx ex [K"call" "vect"::K"top" expand_forms_2(ctx, children(ex))... ] + elseif k == K"hcat" + check_no_assignment(children(ex)) + @ast ctx ex [K"call" + "hcat"::K"top" + expand_forms_2(ctx, children(ex))... + ] + elseif k == K"vcat" + expand_forms_2(ctx, expand_vcat(ctx, ex)) elseif k == K"while" @chk numchildren(ex) == 2 @ast ctx ex [K"break_block" "loop_exit"::K"symbolic_label" diff --git a/JuliaLowering/test/arrays.jl b/JuliaLowering/test/arrays.jl new file mode 100644 index 0000000000000..ce99708ba8a76 --- /dev/null +++ b/JuliaLowering/test/arrays.jl @@ -0,0 +1,37 @@ +@testset "Array syntax" begin + +test_mod = Module() + +# vect +@test JuliaLowering.include_string(test_mod, """ +[1,2,3] +""") == [1,2,3] + +# hcat +@test JuliaLowering.include_string(test_mod, """ +[1 2 3] +""") == [1 2 3] + +@test JuliaLowering.include_string(test_mod, """ +let + xs = (1,2) + [xs...; xs...] +end +""") == [1,2,1,2] + +# hvcat +@test JuliaLowering.include_string(test_mod, """ +[1 2 3; 4 5 6] +""") == [1 2 3; + 4 5 6] + +# hvcat_rows +@test JuliaLowering.include_string(test_mod, """ +let + xs = (1,2) + [xs... 3; 4 xs...] +end +""") == [1 2 3; + 4 1 2] + +end diff --git a/JuliaLowering/test/arrays_ir.jl b/JuliaLowering/test/arrays_ir.jl new file mode 100644 index 0000000000000..3745a18c7af30 --- /dev/null +++ b/JuliaLowering/test/arrays_ir.jl @@ -0,0 +1,101 @@ +######################################## +# vect syntax +[a, b, c] +#--------------------- +1 TestMod.a +2 TestMod.b +3 TestMod.c +4 (call top.vect %₁ %₂ %₃) +5 (return %₄) + +######################################## +# Error: vect syntax with parameters +[a, b; c] +#--------------------- +LoweringError: +[a, b; c] +└───────┘ ── unexpected semicolon in array expression + +######################################## +# Error: vect syntax with embedded assignments +[a=b, c] +#--------------------- +LoweringError: +[a=b, c] +#└─┘ ── misplaced assignment statement in `[ ... ]` + +######################################## +# hcat syntax +[a b c] +#--------------------- +1 TestMod.a +2 TestMod.b +3 TestMod.c +4 (call top.hcat %₁ %₂ %₃) +5 (return %₄) + +######################################## +# Error: hcat syntax with embedded assignments +[a b c=d] +#--------------------- +LoweringError: +[a b c=d] +# └──┘ ── misplaced assignment statement in `[ ... ]` + +######################################## +# vcat syntax +[a; b; c] +#--------------------- +1 TestMod.a +2 TestMod.b +3 TestMod.c +4 (call top.vcat %₁ %₂ %₃) +5 (return %₄) + +######################################## +# vcat with splats +[a...; b; c] +#--------------------- +1 TestMod.a +2 TestMod.b +3 TestMod.c +4 (call core.tuple %₂ %₃) +5 (call core._apply_iterate top.iterate top.vcat %₁ %₄) +6 (return %₅) + +######################################## +# hvcat syntax +[a; b c; d e f] +#--------------------- +1 (call core.tuple 1 2 3) +2 TestMod.a +3 TestMod.b +4 TestMod.c +5 TestMod.d +6 TestMod.e +7 TestMod.f +8 (call top.hvcat %₁ %₂ %₃ %₄ %₅ %₆ %₇) +9 (return %₈) + +######################################## +# hvcat with splats nested within rows +[a; b c...] +#--------------------- +1 TestMod.a +2 (call core.tuple %₁) +3 TestMod.b +4 (call core.tuple %₃) +5 TestMod.c +6 (call core._apply_iterate top.iterate core.tuple %₄ %₅) +7 (call core.tuple %₂ %₆) +8 (call top.hvcat_rows %₇) +9 (return %₈) + +######################################## +# Error: vcat syntax with assignments +[a=b; c] +#--------------------- +LoweringError: +[a=b; c] +#└─┘ ── misplaced assignment statement in `[ ... ]` + diff --git a/JuliaLowering/test/misc_ir.jl b/JuliaLowering/test/misc_ir.jl index 1546983a7a03c..aa61e4eb0cc04 100644 --- a/JuliaLowering/test/misc_ir.jl +++ b/JuliaLowering/test/misc_ir.jl @@ -183,6 +183,14 @@ x::T 3 (call core.typeassert %₁ %₂) 4 (return %₃) +######################################## +# Error: Invalid :: syntax outside function arg list +::T +#--------------------- +LoweringError: +::T +└─┘ ── `::` must be written `value::type` outside function argument lists + ######################################## # Error: braces vector syntax {x, y} diff --git a/JuliaLowering/test/runtests.jl b/JuliaLowering/test/runtests.jl index 71e9d92a1262d..0c8d0a2608b36 100644 --- a/JuliaLowering/test/runtests.jl +++ b/JuliaLowering/test/runtests.jl @@ -8,6 +8,7 @@ include("utils.jl") include("ir_tests.jl") + include("arrays.jl") include("branching.jl") include("decls.jl") include("desugaring.jl") From f5edbcf1626961280122422e83a28cf6926ed610 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Thu, 9 Jan 2025 12:11:11 +1000 Subject: [PATCH 0927/1109] Fix hvcat_rows test case --- JuliaLowering/test/arrays_ir.jl | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/JuliaLowering/test/arrays_ir.jl b/JuliaLowering/test/arrays_ir.jl index 3745a18c7af30..901e21defa244 100644 --- a/JuliaLowering/test/arrays_ir.jl +++ b/JuliaLowering/test/arrays_ir.jl @@ -87,9 +87,8 @@ LoweringError: 4 (call core.tuple %₃) 5 TestMod.c 6 (call core._apply_iterate top.iterate core.tuple %₄ %₅) -7 (call core.tuple %₂ %₆) -8 (call top.hvcat_rows %₇) -9 (return %₈) +7 (call top.hvcat_rows %₂ %₆) +8 (return %₇) ######################################## # Error: vcat syntax with assignments From e8684c2bc5ce5dc91bef8bb1500157ddc40d41b1 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Thu, 9 Jan 2025 12:31:29 +1000 Subject: [PATCH 0928/1109] Desugaring: typed_hcat, typed_vcat --- JuliaLowering/src/desugaring.jl | 39 +++++++++++++++++-------- JuliaLowering/test/arrays.jl | 51 ++++++++++++++++++++++++++++----- JuliaLowering/test/arrays_ir.jl | 51 +++++++++++++++++++++++++++++++++ 3 files changed, 122 insertions(+), 19 deletions(-) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 7241296d598b9..10d61db9d3116 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -550,7 +550,10 @@ function expand_vcat(ctx, ex) check_no_assignment(children(ex)) had_row = false had_row_splat = false - for e in children(ex) + is_typed = kind(ex) == K"typed_vcat" + eltype = is_typed ? ex[1] : nothing + elements = is_typed ? ex[2:end] : ex[1:end] + for e in elements k = kind(e) if k == K"row" had_row = true @@ -561,40 +564,46 @@ function expand_vcat(ctx, ex) # In case there is splatting inside `hvcat`, collect each row as a # separate tuple and pass those to `hvcat_rows` instead (ref #38844) rows = SyntaxList(ctx) - for e in children(ex) + for e in elements if kind(e) == K"row" push!(rows, @ast ctx e [K"tuple" children(e)...]) else push!(rows, @ast ctx e [K"tuple" e]) end end + fname = is_typed ? "typed_hvcat_rows" : "hvcat_rows" @ast ctx ex [K"call" - "hvcat_rows"::K"top" + fname::K"top" + eltype rows... ] else row_sizes = SyntaxList(ctx) - elements = SyntaxList(ctx) - for e in children(ex) + elem_list = SyntaxList(ctx) + for e in elements if kind(e) == K"row" rowsize = numchildren(e) - append!(elements, children(e)) + append!(elem_list, children(e)) else rowsize = 1 - push!(elements, e) + push!(elem_list, e) end push!(row_sizes, @ast ctx e rowsize::K"Integer") end if had_row + fname = is_typed ? "typed_hvcat" : "hvcat" @ast ctx ex [K"call" - "hvcat"::K"top" + fname::K"top" + eltype [K"tuple" row_sizes...] - elements... + elem_list... ] else + fname = is_typed ? "typed_vcat" : "vcat" @ast ctx ex [K"call" - "vcat"::K"top" - elements... + fname::K"top" + eltype + elem_list... ] end end @@ -2866,7 +2875,13 @@ function expand_forms_2(ctx::DesugaringContext, ex::SyntaxTree, docs=nothing) "hcat"::K"top" expand_forms_2(ctx, children(ex))... ] - elseif k == K"vcat" + elseif k == K"typed_hcat" + check_no_assignment(children(ex)) + @ast ctx ex [K"call" + "typed_hcat"::K"top" + expand_forms_2(ctx, children(ex))... + ] + elseif k == K"vcat" || k == K"typed_vcat" expand_forms_2(ctx, expand_vcat(ctx, ex)) elseif k == K"while" @chk numchildren(ex) == 2 diff --git a/JuliaLowering/test/arrays.jl b/JuliaLowering/test/arrays.jl index ce99708ba8a76..162ef86a62a8d 100644 --- a/JuliaLowering/test/arrays.jl +++ b/JuliaLowering/test/arrays.jl @@ -2,28 +2,45 @@ test_mod = Module() +# Test that two array element types are equal and that they are also equal +# elementwise +function ≅(a, b) + eltype(a) == eltype(b) && a == b +end + # vect @test JuliaLowering.include_string(test_mod, """ [1,2,3] -""") == [1,2,3] +""") ≅ [1,2,3] # hcat @test JuliaLowering.include_string(test_mod, """ [1 2 3] -""") == [1 2 3] +""") ≅ [1 2 3] + +# typed_hcat +@test JuliaLowering.include_string(test_mod, """ +Int[1.0 2.0 3.0] +""") ≅ [1 2 3] + + +# vcat +@test JuliaLowering.include_string(test_mod, """ +[1;2;3] +""") ≅ [1; 2; 3] @test JuliaLowering.include_string(test_mod, """ let xs = (1,2) [xs...; xs...] end -""") == [1,2,1,2] +""") ≅ [1,2,1,2] # hvcat @test JuliaLowering.include_string(test_mod, """ [1 2 3; 4 5 6] -""") == [1 2 3; - 4 5 6] +""") ≅ [1 2 3; + 4 5 6] # hvcat_rows @test JuliaLowering.include_string(test_mod, """ @@ -31,7 +48,27 @@ let xs = (1,2) [xs... 3; 4 xs...] end -""") == [1 2 3; - 4 1 2] +""") ≅ [1 2 3; + 4 1 2] + +# typed_vcat +@test JuliaLowering.include_string(test_mod, """ +Int[1.0; 2.0; 3.0] +""") ≅ [1; 2; 3] + +# typed_hvcat +@test JuliaLowering.include_string(test_mod, """ +Int[1.0 2.0 3.0; 4.0 5.0 6.0] +""") ≅ [1 2 3; + 4 5 6] + +# typed_hvcat_rows +@test JuliaLowering.include_string(test_mod, """ +let + xs = (1.0,2.0) + Int[xs... 3; 4 xs...] +end +""") ≅ [1 2 3; + 4 1 2] end diff --git a/JuliaLowering/test/arrays_ir.jl b/JuliaLowering/test/arrays_ir.jl index 901e21defa244..53fa022bcd9bd 100644 --- a/JuliaLowering/test/arrays_ir.jl +++ b/JuliaLowering/test/arrays_ir.jl @@ -34,6 +34,17 @@ LoweringError: 4 (call top.hcat %₁ %₂ %₃) 5 (return %₄) +######################################## +# typed hcat syntax +T[a b c] +#--------------------- +1 TestMod.T +2 TestMod.a +3 TestMod.b +4 TestMod.c +5 (call top.typed_hcat %₁ %₂ %₃ %₄) +6 (return %₅) + ######################################## # Error: hcat syntax with embedded assignments [a b c=d] @@ -98,3 +109,43 @@ LoweringError: [a=b; c] #└─┘ ── misplaced assignment statement in `[ ... ]` +######################################## +# typed_vcat syntax +T[a; b; c] +#--------------------- +1 TestMod.T +2 TestMod.a +3 TestMod.b +4 TestMod.c +5 (call top.typed_vcat %₁ %₂ %₃ %₄) +6 (return %₅) + +######################################## +# typed_hvcat syntax +T[a; b c; d e f] +#--------------------- +1 TestMod.T +2 (call core.tuple 1 2 3) +3 TestMod.a +4 TestMod.b +5 TestMod.c +6 TestMod.d +7 TestMod.e +8 TestMod.f +9 (call top.typed_hvcat %₁ %₂ %₃ %₄ %₅ %₆ %₇ %₈) +10 (return %₉) + +######################################## +# typed_hvcat with splats nested within rows +T[a; b c...] +#--------------------- +1 TestMod.T +2 TestMod.a +3 (call core.tuple %₂) +4 TestMod.b +5 (call core.tuple %₄) +6 TestMod.c +7 (call core._apply_iterate top.iterate core.tuple %₅ %₆) +8 (call top.typed_hvcat_rows %₁ %₃ %₇) +9 (return %₈) + From 8409b93d0d33db3007ee24d0450201003ea9e095 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Thu, 9 Jan 2025 13:11:52 +1000 Subject: [PATCH 0929/1109] Simplify arrays_ir test cases using literals These don't need separate SSA values so result in more compact IR which is easier to read. --- JuliaLowering/test/arrays_ir.jl | 138 ++++++++++++-------------------- 1 file changed, 53 insertions(+), 85 deletions(-) diff --git a/JuliaLowering/test/arrays_ir.jl b/JuliaLowering/test/arrays_ir.jl index 53fa022bcd9bd..23e0f21d88f31 100644 --- a/JuliaLowering/test/arrays_ir.jl +++ b/JuliaLowering/test/arrays_ir.jl @@ -1,151 +1,119 @@ ######################################## # vect syntax -[a, b, c] +[10, 20, 30] #--------------------- -1 TestMod.a -2 TestMod.b -3 TestMod.c -4 (call top.vect %₁ %₂ %₃) -5 (return %₄) +1 (call top.vect 10 20 30) +2 (return %₁) ######################################## # Error: vect syntax with parameters -[a, b; c] +[10, 20; 30] #--------------------- LoweringError: -[a, b; c] -└───────┘ ── unexpected semicolon in array expression +[10, 20; 30] +└──────────┘ ── unexpected semicolon in array expression ######################################## # Error: vect syntax with embedded assignments -[a=b, c] +[a=20, 30] #--------------------- LoweringError: -[a=b, c] -#└─┘ ── misplaced assignment statement in `[ ... ]` +[a=20, 30] +#└──┘ ── misplaced assignment statement in `[ ... ]` ######################################## # hcat syntax -[a b c] +[10 20 30] #--------------------- -1 TestMod.a -2 TestMod.b -3 TestMod.c -4 (call top.hcat %₁ %₂ %₃) -5 (return %₄) +1 (call top.hcat 10 20 30) +2 (return %₁) ######################################## # typed hcat syntax -T[a b c] +T[10 20 30] #--------------------- 1 TestMod.T -2 TestMod.a -3 TestMod.b -4 TestMod.c -5 (call top.typed_hcat %₁ %₂ %₃ %₄) -6 (return %₅) +2 (call top.typed_hcat %₁ 10 20 30) +3 (return %₂) ######################################## # Error: hcat syntax with embedded assignments -[a b c=d] +[10 20 a=40] #--------------------- LoweringError: -[a b c=d] -# └──┘ ── misplaced assignment statement in `[ ... ]` +[10 20 a=40] +# └───┘ ── misplaced assignment statement in `[ ... ]` ######################################## # vcat syntax -[a; b; c] +[10; 20; 30] #--------------------- -1 TestMod.a -2 TestMod.b -3 TestMod.c -4 (call top.vcat %₁ %₂ %₃) -5 (return %₄) +1 (call top.vcat 10 20 30) +2 (return %₁) ######################################## # vcat with splats -[a...; b; c] +[a...; 20; 30] #--------------------- 1 TestMod.a -2 TestMod.b -3 TestMod.c -4 (call core.tuple %₂ %₃) -5 (call core._apply_iterate top.iterate top.vcat %₁ %₄) -6 (return %₅) +2 (call core.tuple 20 30) +3 (call core._apply_iterate top.iterate top.vcat %₁ %₂) +4 (return %₃) ######################################## # hvcat syntax -[a; b c; d e f] +[10; 20 30; 40 e f] #--------------------- 1 (call core.tuple 1 2 3) -2 TestMod.a -3 TestMod.b -4 TestMod.c -5 TestMod.d -6 TestMod.e -7 TestMod.f -8 (call top.hvcat %₁ %₂ %₃ %₄ %₅ %₆ %₇) -9 (return %₈) +2 TestMod.e +3 TestMod.f +4 (call top.hvcat %₁ 10 20 30 40 %₂ %₃) +5 (return %₄) ######################################## # hvcat with splats nested within rows -[a; b c...] +[10; 20 a...] #--------------------- -1 TestMod.a -2 (call core.tuple %₁) -3 TestMod.b -4 (call core.tuple %₃) -5 TestMod.c -6 (call core._apply_iterate top.iterate core.tuple %₄ %₅) -7 (call top.hvcat_rows %₂ %₆) -8 (return %₇) +1 (call core.tuple 10) +2 (call core.tuple 20) +3 TestMod.a +4 (call core._apply_iterate top.iterate core.tuple %₂ %₃) +5 (call top.hvcat_rows %₁ %₄) +6 (return %₅) ######################################## # Error: vcat syntax with assignments -[a=b; c] +[a=20; 30] #--------------------- LoweringError: -[a=b; c] -#└─┘ ── misplaced assignment statement in `[ ... ]` +[a=20; 30] +#└──┘ ── misplaced assignment statement in `[ ... ]` ######################################## # typed_vcat syntax -T[a; b; c] +T[10; 20; 30] #--------------------- 1 TestMod.T -2 TestMod.a -3 TestMod.b -4 TestMod.c -5 (call top.typed_vcat %₁ %₂ %₃ %₄) -6 (return %₅) +2 (call top.typed_vcat %₁ 10 20 30) +3 (return %₂) ######################################## # typed_hvcat syntax -T[a; b c; d e f] +T[10; 20 30; 40 50 60] #--------------------- 1 TestMod.T 2 (call core.tuple 1 2 3) -3 TestMod.a -4 TestMod.b -5 TestMod.c -6 TestMod.d -7 TestMod.e -8 TestMod.f -9 (call top.typed_hvcat %₁ %₂ %₃ %₄ %₅ %₆ %₇ %₈) -10 (return %₉) +3 (call top.typed_hvcat %₁ %₂ 10 20 30 40 50 60) +4 (return %₃) ######################################## # typed_hvcat with splats nested within rows -T[a; b c...] +T[10; 20 a...] #--------------------- 1 TestMod.T -2 TestMod.a -3 (call core.tuple %₂) -4 TestMod.b -5 (call core.tuple %₄) -6 TestMod.c -7 (call core._apply_iterate top.iterate core.tuple %₅ %₆) -8 (call top.typed_hvcat_rows %₁ %₃ %₇) -9 (return %₈) - +2 (call core.tuple 10) +3 (call core.tuple 20) +4 TestMod.a +5 (call core._apply_iterate top.iterate core.tuple %₃ %₄) +6 (call top.typed_hvcat_rows %₁ %₂ %₅) +7 (return %₆) From 1a54b10e51d3b1fa00abd1f05d81ae09f63f33c9 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Fri, 17 Jan 2025 12:44:24 +1000 Subject: [PATCH 0930/1109] Allow testing lowering of AST fragments with no source form --- JuliaLowering/test/misc_ir.jl | 9 +++++++++ JuliaLowering/test/utils.jl | 21 ++++++++++++++++----- 2 files changed, 25 insertions(+), 5 deletions(-) diff --git a/JuliaLowering/test/misc_ir.jl b/JuliaLowering/test/misc_ir.jl index aa61e4eb0cc04..c5f0ab376dad4 100644 --- a/JuliaLowering/test/misc_ir.jl +++ b/JuliaLowering/test/misc_ir.jl @@ -207,3 +207,12 @@ LoweringError: {x y; y z} └────────┘ ── { } syntax is reserved for future use +######################################## +# Error: Test AST which has no source form and thus must have been constructed +# programmatically (eg, a malformed if) +@ast_ [K"if"] +#--------------------- +LoweringError: +#= line 1 =# - expected `numchildren(ex) >= 2` + + diff --git a/JuliaLowering/test/utils.jl b/JuliaLowering/test/utils.jl index a6c3a831ac699..b90e928eed798 100644 --- a/JuliaLowering/test/utils.jl +++ b/JuliaLowering/test/utils.jl @@ -23,7 +23,7 @@ using JuliaLowering: function _ast_test_graph() graph = SyntaxGraph() ensure_attributes!(graph, - kind=Kind, source=Union{SourceRef,NodeId,LineNumberNode}, + kind=Kind, source=Union{SourceRef,NodeId,Tuple,LineNumberNode}, var_id=Int, value=Any, name_val=String) end @@ -35,6 +35,7 @@ function _source_node(graph, src) end macro ast_(tree) + # TODO: Implement this in terms of new-style macros. quote graph = _ast_test_graph() srcref = _source_node(graph, $(QuoteNode(__source__))) @@ -141,9 +142,21 @@ function read_ir_test_cases(filename) [match_ir_test_case(s) for s in split(cases_str, r"######*") if strip(s) != ""]) end +function setup_ir_test_module(preamble) + test_mod = Module(:TestMod) + Base.include_string(test_mod, preamble) + Base.eval(test_mod, :(const var"@ast_" = $(var"@ast_"))) + test_mod +end + function format_ir_for_test(mod, description, input, expect_error=false) ex = parsestmt(SyntaxTree, input) try + if kind(ex) == K"macrocall" && ex[1].name_val == "@ast_" + # Total hack, until @ast_ can be implemented in terms of new-style + # macros. + ex = JuliaLowering.eval(mod, Expr(ex)) + end x = JuliaLowering.lower(mod, ex) if expect_error error("Expected a lowering error in test case \"$description\"") @@ -161,8 +174,7 @@ end function test_ir_cases(filename::AbstractString) preamble, cases = read_ir_test_cases(filename) - test_mod = Module(:TestMod) - Base.include_string(test_mod, preamble) + test_mod = setup_ir_test_module(preamble) for (expect_error, description, input, ref) in cases output = format_ir_for_test(test_mod, description, input, expect_error) @testset "$description" begin @@ -183,8 +195,7 @@ When `pattern` is supplied, update only those tests where """ function refresh_ir_test_cases(filename, pattern=nothing) preamble, cases = read_ir_test_cases(filename) - test_mod = Module(:TestMod) - Base.include_string(test_mod, preamble) + test_mod = setup_ir_test_module(preamble) io = IOBuffer() if !isempty(preamble) println(io, preamble, "\n") From fee47788149ff9d06f0f7e497c1ae8be4ae66c14 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Fri, 17 Jan 2025 17:09:23 +1000 Subject: [PATCH 0931/1109] Fix `SyntaxNode->Expr` conversion with SubString source code (JuliaLang/JuliaSyntax.jl#528) --- JuliaSyntax/src/expr.jl | 12 +++++++----- JuliaSyntax/test/expr.jl | 6 ++++++ 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/JuliaSyntax/src/expr.jl b/JuliaSyntax/src/expr.jl index fdc05a881f6d2..f7832f1e99d16 100644 --- a/JuliaSyntax/src/expr.jl +++ b/JuliaSyntax/src/expr.jl @@ -68,7 +68,7 @@ function _expr_leaf_val(node::SyntaxNode) node.val end -function _leaf_to_Expr(source, txtbuf, head, srcrange, node) +function _leaf_to_Expr(source, txtbuf, txtbuf_offset, head, srcrange, node) k = kind(head) if k == K"MacroName" && view(source, srcrange) == "." return Symbol("@__dot__") @@ -77,7 +77,9 @@ function _leaf_to_Expr(source, txtbuf, head, srcrange, node) Expr(:error) : Expr(:error, "$(_token_error_descriptions[k]): `$(source[srcrange])`") else - val = isnothing(node) ? parse_julia_literal(txtbuf, head, srcrange) : _expr_leaf_val(node) + val = isnothing(node) ? + parse_julia_literal(txtbuf, head, srcrange .+ txtbuf_offset) : + _expr_leaf_val(node) if val isa Union{Int128,UInt128,BigInt} # Ignore the values of large integers and convert them back to # symbolic/textural form for compatibility with the Expr @@ -547,7 +549,7 @@ function build_tree(::Type{Expr}, stream::ParseStream; end k = kind(head) if isnothing(nodechildren) - ex = _leaf_to_Expr(source, txtbuf, head, srcrange, nothing) + ex = _leaf_to_Expr(source, txtbuf, 0, head, srcrange, nothing) else resize!(childranges, length(nodechildren)) resize!(childheads, length(nodechildren)) @@ -568,8 +570,8 @@ end function _to_expr(node) file = sourcefile(node) if is_leaf(node) - offset, txtbuf = _unsafe_wrap_substring(sourcetext(file)) - return _leaf_to_Expr(file, txtbuf, head(node), byte_range(node) .+ offset, node) + txtbuf_offset, txtbuf = _unsafe_wrap_substring(sourcetext(file)) + return _leaf_to_Expr(file, txtbuf, txtbuf_offset, head(node), byte_range(node), node) end cs = children(node) args = Any[_to_expr(c) for c in cs] diff --git a/JuliaSyntax/test/expr.jl b/JuliaSyntax/test/expr.jl index 417ec2fd18745..eb998229eb5de 100644 --- a/JuliaSyntax/test/expr.jl +++ b/JuliaSyntax/test/expr.jl @@ -824,3 +824,9 @@ Expr(:import, Expr(:as, Expr(:., :A, :+), :y)) end end + +@testset "SyntaxNode->Expr conversion" begin + src = repeat('a', 1000) * '\n' * "@hi" + @test Expr(parsestmt(SyntaxNode, SubString(src, 1001:lastindex(src)))) == + Expr(:macrocall, Symbol("@hi"), LineNumberNode(2)) +end From e59d0b10717a43f8c829424a9e0db4afe175c13e Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sat, 18 Jan 2025 08:16:26 +1000 Subject: [PATCH 0932/1109] Desugaring of ncat syntax --- JuliaLowering/src/JuliaLowering.jl | 2 +- JuliaLowering/src/desugaring.jl | 167 +++++++++++++++++++++++++++-- JuliaLowering/test/arrays.jl | 10 ++ JuliaLowering/test/arrays_ir.jl | 135 +++++++++++++++++++++++ JuliaLowering/test/utils.jl | 7 +- 5 files changed, 311 insertions(+), 10 deletions(-) diff --git a/JuliaLowering/src/JuliaLowering.jl b/JuliaLowering/src/JuliaLowering.jl index 3f42bf5b0c737..b692aef7fa58e 100644 --- a/JuliaLowering/src/JuliaLowering.jl +++ b/JuliaLowering/src/JuliaLowering.jl @@ -10,7 +10,7 @@ using Core: eval using JuliaSyntax using JuliaSyntax: highlight, Kind, @KSet_str -using JuliaSyntax: is_leaf, children, numchildren, head, kind, flags, has_flags +using JuliaSyntax: is_leaf, children, numchildren, head, kind, flags, has_flags, numeric_flags using JuliaSyntax: filename, first_byte, last_byte, byte_range, sourcefile, source_location, span, sourcetext using JuliaSyntax: is_literal, is_number, is_operator, is_prec_assignment, is_prefix_call, is_infix_op_call, is_postfix_op_call, is_error diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 10d61db9d3116..3b9cddf426160 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -579,14 +579,14 @@ function expand_vcat(ctx, ex) ] else row_sizes = SyntaxList(ctx) - elem_list = SyntaxList(ctx) + flat_elems = SyntaxList(ctx) for e in elements if kind(e) == K"row" rowsize = numchildren(e) - append!(elem_list, children(e)) + append!(flat_elems, children(e)) else rowsize = 1 - push!(elem_list, e) + push!(flat_elems, e) end push!(row_sizes, @ast ctx e rowsize::K"Integer") end @@ -596,19 +596,172 @@ function expand_vcat(ctx, ex) fname::K"top" eltype [K"tuple" row_sizes...] - elem_list... + flat_elems... ] else fname = is_typed ? "typed_vcat" : "vcat" @ast ctx ex [K"call" fname::K"top" eltype - elem_list... + flat_elems... ] end end end +function ncat_contains_row(ex) + k = kind(ex) + if k == K"row" + return true + elseif k == K"nrow" + return any(ncat_contains_row(e) for e in children(ex)) + else + return false + end +end + +# flip first and second dimension for row major layouts +function nrow_flipdim(row_major, d) + return !row_major ? d : + d == 1 ? 2 : + d == 2 ? 1 : d +end + +function flatten_ncat_rows!(flat_elems, nrow_spans, row_major, parent_layout_dim, ex) + # Note that most of the checks for valid nesting here are also checked in + # the parser - they can only fail when nrcat is constructed + # programmatically (eg, by a macro). + k = kind(ex) + if k == K"row" + layout_dim = 1 + @chk parent_layout_dim != 1 (ex,"Badly nested rows in `ncat`") + elseif k == K"nrow" + dim = numeric_flags(ex) + @chk dim > 0 (ex,"Unsupported dimension $dim in ncat") + @chk !row_major || dim != 2 (ex,"2D `nrow` cannot be mixed with `row` in `ncat`") + layout_dim = nrow_flipdim(row_major, dim) + elseif kind(ex) == K"..." + throw(LoweringError(ex, "Splatting ... in an `ncat` with multiple dimensions is not supported")) + else + push!(flat_elems, ex) + for ld in parent_layout_dim-1:-1:1 + push!(nrow_spans, (ld, 1)) + end + return + end + row_start = length(flat_elems) + @chk parent_layout_dim > layout_dim (ex, "Badly nested rows in `ncat`") + for e in children(ex) + if layout_dim == 1 + @chk kind(e) ∉ KSet"nrow row" (e,"Badly nested rows in `ncat`") + end + flatten_ncat_rows!(flat_elems, nrow_spans, row_major, layout_dim, e) + end + n_elems_in_row = length(flat_elems) - row_start + for ld in parent_layout_dim-1:-1:layout_dim + push!(nrow_spans, (ld, n_elems_in_row)) + end +end + +# ncat comes in various layouts which we need to lower to special cases +# - one dimensional along some dimension +# - balanced column first or row first +# - ragged colum first or row first +function expand_ncat(ctx, ex) + is_typed = kind(ex) == K"typed_ncat" + outer_dim = numeric_flags(ex) + @chk outer_dim > 0 (ex,"Unsupported dimension in ncat") + eltype = is_typed ? ex[1] : nothing + elements = is_typed ? ex[2:end] : ex[1:end] + hvncat_name = is_typed ? "typed_hvncat" : "hvncat" + if !any(kind(e) in KSet"row nrow" for e in elements) + # One-dimensional ncat along some dimension + # [a ;;; b ;;; c] + return @ast ctx ex [K"call" + hvncat_name::K"top" + eltype + outer_dim::K"Integer" + elements... + ] + end + # N-dimensional case. May be + # * column first or row first: + # [a;b ;;; c;d] + # [a b ;;; c d] + # * balanced or ragged: + # [a ; b ;;; c ; d] + # [a ; b ;;; c] + row_major = any(ncat_contains_row, elements) + @chk !row_major || outer_dim != 2 (ex,"2D `nrow` cannot be mixed with `row` in `ncat`") + flat_elems = SyntaxList(ctx) + # `ncat` syntax nests lower dimensional `nrow` inside higher dimensional + # ones (with the exception of K"row" when `row_major` is true). Each nrow + # spans a number of elements and we first extract that. + nrow_spans = Vector{Tuple{Int,Int}}() + for e in elements + flatten_ncat_rows!(flat_elems, nrow_spans, row_major, + nrow_flipdim(row_major, outer_dim), e) + end + push!(nrow_spans, (outer_dim, length(flat_elems))) + # Construct the shape specification by postprocessing the flat list of + # spans. + sort!(nrow_spans, by=first) # depends on a stable sort + is_balanced = true + i = 1 + dim_lengths = zeros(outer_dim) + prev_dimspan = 1 + while i <= length(nrow_spans) + layout_dim, dimspan = nrow_spans[i] + while i <= length(nrow_spans) && nrow_spans[i][1] == layout_dim + if dimspan != nrow_spans[i][2] + is_balanced = false + break + end + i += 1 + end + is_balanced || break + @assert dimspan % prev_dimspan == 0 + dim_lengths[layout_dim] = dimspan ÷ prev_dimspan + prev_dimspan = dimspan + end + shape_spec = SyntaxList(ctx) + if is_balanced + if row_major + dim_lengths[1], dim_lengths[2] = dim_lengths[2], dim_lengths[1] + end + # For balanced concatenations, the shape is specified by the length + # along each dimension. + for dl in dim_lengths + push!(shape_spec, @ast ctx ex dl::K"Integer") + end + else + # For unbalanced/ragged concatenations, the shape is specified by the + # number of elements in each ND slice of the array, from layout + # dimension 1 to N. See the documentation for `hvncat` for details. + i = 1 + while i <= length(nrow_spans) + groups_for_dim = Int[] + layout_dim = nrow_spans[i][1] + while i <= length(nrow_spans) && nrow_spans[i][1] == layout_dim + push!(groups_for_dim, nrow_spans[i][2]) + i += 1 + end + push!(shape_spec, + @ast ctx ex [K"tuple" + [i::K"Integer" for i in groups_for_dim]... + ] + ) + end + end + @ast ctx ex [K"call" + hvncat_name::K"top" + eltype + [K"tuple" shape_spec...] + row_major::K"Bool" + flat_elems... + ] +end + # Expand UnionAll definitions, eg `X{T} = Y{T,T}` function expand_unionall_def(ctx, srcref, lhs, rhs) if numchildren(lhs) <= 1 @@ -2735,7 +2888,7 @@ function expand_forms_2(ctx::DesugaringContext, ex::SyntaxTree, docs=nothing) cs[1] : makenode(ctx, ex, k, cs[1:end-1]) # This transformation assumes the type assertion `cond::Bool` will be - # added by a later pass. + # added by a later compiler pass (currently done in codegen) if k == K"&&" @ast ctx ex [K"if" cond cs[end] false::K"Bool"] else @@ -2883,6 +3036,8 @@ function expand_forms_2(ctx::DesugaringContext, ex::SyntaxTree, docs=nothing) ] elseif k == K"vcat" || k == K"typed_vcat" expand_forms_2(ctx, expand_vcat(ctx, ex)) + elseif k == K"ncat" || k == K"typed_ncat" + expand_forms_2(ctx, expand_ncat(ctx, ex)) elseif k == K"while" @chk numchildren(ex) == 2 @ast ctx ex [K"break_block" "loop_exit"::K"symbolic_label" diff --git a/JuliaLowering/test/arrays.jl b/JuliaLowering/test/arrays.jl index 162ef86a62a8d..1c327d438a16d 100644 --- a/JuliaLowering/test/arrays.jl +++ b/JuliaLowering/test/arrays.jl @@ -71,4 +71,14 @@ end """) ≅ [1 2 3; 4 1 2] +# ncat with a single dimension +@test JuliaLowering.include_string(test_mod, """ +[1 ;;; 2 ;;; 3] +""") ≅ [1 ;;; 2 ;;; 3] + +@test JuliaLowering.include_string(test_mod, """ +Int[1.0 ;;; 2.0 ;;; 3.0] +""") ≅ [1 ;;; 2 ;;; 3] + + end diff --git a/JuliaLowering/test/arrays_ir.jl b/JuliaLowering/test/arrays_ir.jl index 23e0f21d88f31..6b41d433a5599 100644 --- a/JuliaLowering/test/arrays_ir.jl +++ b/JuliaLowering/test/arrays_ir.jl @@ -117,3 +117,138 @@ T[10; 20 a...] 5 (call core._apply_iterate top.iterate core.tuple %₃ %₄) 6 (call top.typed_hvcat_rows %₁ %₂ %₅) 7 (return %₆) + +######################################## +# ncat with a single dimension +[10 ;;; 20 ;;; 30] +#--------------------- +1 (call top.hvncat 3 10 20 30) +2 (return %₁) + +######################################## +# typed_ncat with a single dimension +T[10 ;;; 20 ;;; 30] +#--------------------- +1 TestMod.T +2 (call top.typed_hvncat %₁ 3 10 20 30) +3 (return %₂) + +######################################## +# ncat with balanced column major element layout +[10 ; 20 ; 30 ;;; 40 ; 50 ; 60] +#--------------------- +1 (call core.tuple 3 1 2) +2 (call top.hvncat %₁ false 10 20 30 40 50 60) +3 (return %₂) + +######################################## +# typed multidimensional ncat +T[10 ; 20 ; 30 ;;; 40 ; 50 ; 60] +#--------------------- +1 TestMod.T +2 (call core.tuple 3 1 2) +3 (call top.typed_hvncat %₁ %₂ false 10 20 30 40 50 60) +4 (return %₃) + +######################################## +# ncat with balanced row major element layout +[10 20 30 ; 40 50 60 ;;;] +#--------------------- +1 (call core.tuple 2 3 1) +2 (call top.hvncat %₁ true 10 20 30 40 50 60) +3 (return %₂) + +######################################## +# ncat of 3D array with balanced layout +[10 ; 20 ;; 30 ; 40 ;;; 50 ; 60 ;; 70 ; 80] +#--------------------- +1 (call core.tuple 2 2 2) +2 (call top.hvncat %₁ false 10 20 30 40 50 60 70 80) +3 (return %₂) + +######################################## +# ncat with unbalanced column major layout +[10 ; 20 ;; 30 ;;; 40 ;;;;] +#--------------------- +1 (call core.tuple 2 1 1) +2 (call core.tuple 3 1) +3 (call core.tuple 4) +4 (call core.tuple 4) +5 (call core.tuple %₁ %₂ %₃ %₄) +6 (call top.hvncat %₅ false 10 20 30 40) +7 (return %₆) + +######################################## +# ncat with unbalanced row major layout +[10 20 ; 30 40 ; 50 60 ;;; 70 ;;; 80 ;;;;] +#--------------------- +1 (call core.tuple 2 2 2 1 1) +2 (call core.tuple 6 1 1) +3 (call core.tuple 8) +4 (call core.tuple 8) +5 (call core.tuple %₁ %₂ %₃ %₄) +6 (call top.hvncat %₅ true 10 20 30 40 50 60 70 80) +7 (return %₆) + +######################################## +# Splatting with 1D ncat +[xs ;;; ys... ;;; zs] +#--------------------- +1 TestMod.xs +2 (call core.tuple 3 %₁) +3 TestMod.ys +4 TestMod.zs +5 (call core.tuple %₄) +6 (call core._apply_iterate top.iterate top.hvncat %₂ %₃ %₅) +7 (return %₆) + +######################################## +# Error: splatting with multi-dimensional ncat +[xs ; ys ;;; zs...] +#--------------------- +LoweringError: +[xs ; ys ;;; zs...] +# └───┘ ── Splatting ... in an `ncat` with multiple dimensions is not supported + +######################################## +# Error: bad nrow nesting +@ast_ [K"ncat"(syntax_flags=set_numeric_flags(3)) + [K"nrow"(syntax_flags=set_numeric_flags(1)) + [K"nrow"(syntax_flags=set_numeric_flags(1)) + 1::K"Integer" + ] + ] +] +#--------------------- +LoweringError: +#= line 1 =# - Badly nested rows in `ncat` + + +######################################## +# Error: bad nrow nesting +@ast_ [K"ncat"(syntax_flags=set_numeric_flags(3)) + [K"nrow"(syntax_flags=set_numeric_flags(2)) + [K"row" + 1::K"Integer" + ] + ] +] +#--------------------- +LoweringError: +#= line 1 =# - 2D `nrow` cannot be mixed with `row` in `ncat` + + +######################################## +# Error: bad nrow nesting +@ast_ [K"ncat"(syntax_flags=set_numeric_flags(3)) + [K"row" + [K"row" + 1::K"Integer" + ] + ] +] +#--------------------- +LoweringError: +#= line 1 =# - Badly nested rows in `ncat` + + diff --git a/JuliaLowering/test/utils.jl b/JuliaLowering/test/utils.jl index b90e928eed798..998beaf404608 100644 --- a/JuliaLowering/test/utils.jl +++ b/JuliaLowering/test/utils.jl @@ -10,7 +10,7 @@ import FileWatching using Markdown import REPL -using JuliaSyntax: sourcetext +using JuliaSyntax: sourcetext, set_numeric_flags using JuliaLowering: SyntaxGraph, newnode!, ensure_attributes!, @@ -23,7 +23,8 @@ using JuliaLowering: function _ast_test_graph() graph = SyntaxGraph() ensure_attributes!(graph, - kind=Kind, source=Union{SourceRef,NodeId,Tuple,LineNumberNode}, + kind=Kind, syntax_flags=UInt16, + source=Union{SourceRef,NodeId,Tuple,LineNumberNode}, var_id=Int, value=Any, name_val=String) end @@ -204,7 +205,7 @@ function refresh_ir_test_cases(filename, pattern=nothing) for (expect_error, description, input, ref) in cases if isnothing(pattern) || occursin(pattern, description) ir = format_ir_for_test(test_mod, description, input, expect_error) - if ir != ref + if rstrip(ir) != ref @info "Refreshing test case $(repr(description)) in $filename" end else From 329e2351e4376249989a69dfc82b7e6b6b750f37 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sat, 18 Jan 2025 09:33:35 +1000 Subject: [PATCH 0933/1109] Move literal_pow expansion into expand_forms_1 This is used in both broadcasting and normal call expansion, so it seems like the sensible thing to move this to the first pass and do it in one place. --- JuliaLowering/src/desugaring.jl | 16 ---------------- JuliaLowering/src/macro_expansion.jl | 19 +++++++++++++++++++ 2 files changed, 19 insertions(+), 16 deletions(-) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 3b9cddf426160..a637f9c118897 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -1165,22 +1165,6 @@ function expand_call(ctx, ex) expand_forms_2(ctx, farg) expand_forms_2(ctx, _wrap_unsplatted_args(ctx, ex, args))... ] - elseif length(args) == 2 && kind(farg) == K"Identifier" && farg.name_val == "^" && - kind(args[2]) == K"Integer" - expand_forms_2(ctx, - @ast ctx ex [K"call" - "literal_pow"::K"top" - farg - args[1] - [K"call" - [K"call" - "apply_type"::K"core" - "Val"::K"top" - args[2] - ] - ] - ] - ) else @ast ctx ex [K"call" expand_forms_2(ctx, farg) expand_forms_2(ctx, args)...] end diff --git a/JuliaLowering/src/macro_expansion.jl b/JuliaLowering/src/macro_expansion.jl index bdbf7e4607a6a..806e98005d638 100644 --- a/JuliaLowering/src/macro_expansion.jl +++ b/JuliaLowering/src/macro_expansion.jl @@ -239,6 +239,25 @@ function expand_forms_1(ctx::MacroExpansionContext, ex::SyntaxTree) e2 = @ast ctx e2 e2=>K"Symbol" end @ast ctx ex [K"." expand_forms_1(ctx, ex[1]) e2] + elseif (k == K"call" || k == K"dotcall") && numchildren(ex) == 3 && begin + fname = is_infix_op_call(ex) ? ex[2] : ex[1] + is_same_identifier_like(fname, "^") && kind(ex[3]) == K"Integer" + end + # Do literal-pow expansion here as it's later used in both call and + # dotcall expansion. + arg1 = is_infix_op_call(ex) ? ex[1] : ex[2] + @ast ctx ex [k + "literal_pow"::K"top" + expand_forms_1(ctx, fname) + expand_forms_1(ctx, arg1) + [K"call" + [K"call" + "apply_type"::K"core" + "Val"::K"top" + ex[3] + ] + ] + ] elseif is_leaf(ex) ex else From 81d206ac3b4de2469703abe949420e3cdb51da6c Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sat, 18 Jan 2025 10:32:26 +1000 Subject: [PATCH 0934/1109] Additional tests for bad `K"call"` ASTs --- JuliaLowering/test/functions_ir.jl | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/JuliaLowering/test/functions_ir.jl b/JuliaLowering/test/functions_ir.jl index d448fa308600d..c88d7ac50c9ae 100644 --- a/JuliaLowering/test/functions_ir.jl +++ b/JuliaLowering/test/functions_ir.jl @@ -864,3 +864,31 @@ end 7 (call JuliaLowering.bind_docs! %₆ "some docs\n" %₄) 8 (return core.nothing) +######################################## +# Error: infix call without enough arguments +@ast_ [K"call"(syntax_flags=JuliaSyntax.INFIX_FLAG) + "x"::K"Identifier" +] +#--------------------- +LoweringError: +#= line 1 =# - Postfix/infix operators must have at least two positional arguments + + +######################################## +# Error: postfix call without enough arguments +@ast_ [K"call"(syntax_flags=JuliaSyntax.POSTFIX_OP_FLAG) + "x"::K"Identifier" +] +#--------------------- +LoweringError: +#= line 1 =# - Postfix/infix operators must have at least two positional arguments + + +######################################## +# Error: Call with no function name +@ast_ [K"call"] +#--------------------- +LoweringError: +#= line 1 =# - Call expressions must have a function name + + From d96198f271b7390cbe68ae6d020ccd1c574a4426 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sat, 18 Jan 2025 20:17:42 +1000 Subject: [PATCH 0935/1109] Move infix/prefix/postfix call desugaring into pass 1 This small amount of extra desugaring in pass 1 normalizes things a bit for pass 2. --- JuliaLowering/src/desugaring.jl | 20 ++++------ JuliaLowering/src/macro_expansion.jl | 57 +++++++++++++++++++--------- JuliaLowering/src/syntax_graph.jl | 6 ++- JuliaLowering/test/arrays_ir.jl | 3 -- JuliaLowering/test/functions_ir.jl | 4 +- JuliaLowering/test/misc_ir.jl | 1 - JuliaLowering/test/utils.jl | 21 +++++++--- 7 files changed, 67 insertions(+), 45 deletions(-) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index a637f9c118897..294287bcc5cdd 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -1020,9 +1020,10 @@ function expand_named_tuple(ctx, ex, kws; end name = to_symbol(ctx, kw[2]) value = kw - elseif k == K"call" && is_infix_op_call(kw) && numchildren(kw) == 3 && kw[2].name_val == "=>" + elseif k == K"call" && is_infix_op_call(kw) && numchildren(kw) == 3 && + is_same_identifier_like(kw[1], "=>") # a=>b ==> $a=b - appended_nt = _named_tuple_expr(ctx, kw, (kw[1],), (kw[3],)) + appended_nt = _named_tuple_expr(ctx, kw, (kw[2],), (kw[3],)) nothing, nothing elseif k == K"..." # args... ==> splat pairs @@ -1142,17 +1143,8 @@ function remove_kw_args!(ctx, args::SyntaxList) end function expand_call(ctx, ex) - args = SyntaxList(ctx) - if is_infix_op_call(ex) || is_postfix_op_call(ex) - @chk numchildren(ex) >= 2 "Postfix/infix operators must have at least two positional arguments" - farg = ex[2] - push!(args, ex[1]) - append!(args, ex[3:end]) - else - @chk numchildren(ex) > 0 "Call expressions must have a function name" - farg = ex[1] - append!(args, ex[2:end]) - end + farg = ex[1] + args = copy(ex[2:end]) kws = remove_kw_args!(ctx, args) if !isnothing(kws) return expand_forms_2(ctx, expand_kw_call(ctx, ex, farg, args, kws)) @@ -1664,6 +1656,8 @@ function expand_function_def(ctx, ex, docs, rewrite_call=identity, rewrite_body= "#anon#"::K"Placeholder" children(name)... ] + elseif kind(name) == K"dotcall" + throw(LoweringError(name, "Cannot define function using `.` broadcast syntax")) else throw(LoweringError(name, "Bad function definition")) end diff --git a/JuliaLowering/src/macro_expansion.jl b/JuliaLowering/src/macro_expansion.jl index 806e98005d638..2fc846637492a 100644 --- a/JuliaLowering/src/macro_expansion.jl +++ b/JuliaLowering/src/macro_expansion.jl @@ -204,8 +204,7 @@ function expand_forms_1(ctx::MacroExpansionContext, ex::SyntaxTree) k = kind(ex) if k == K"Identifier" && all(==('_'), ex.name_val) @ast ctx ex ex=>K"Placeholder" - elseif k == K"Identifier" || k == K"MacroName" || k == K"StringMacroName" || - (is_operator(k) && is_leaf(ex)) # <- TODO: fix upstream: make operator *tokens* into identifiers + elseif k == K"Identifier" || k == K"MacroName" || k == K"StringMacroName" layerid = get(ex, :scope_layer, ctx.current_layer.id) makeleaf(ctx, ex, ex, kind=K"Identifier", scope_layer=layerid) elseif k == K"var" || k == K"char" || k == K"parens" @@ -239,25 +238,49 @@ function expand_forms_1(ctx::MacroExpansionContext, ex::SyntaxTree) e2 = @ast ctx e2 e2=>K"Symbol" end @ast ctx ex [K"." expand_forms_1(ctx, ex[1]) e2] - elseif (k == K"call" || k == K"dotcall") && numchildren(ex) == 3 && begin - fname = is_infix_op_call(ex) ? ex[2] : ex[1] - is_same_identifier_like(fname, "^") && kind(ex[3]) == K"Integer" + elseif (k == K"call" || k == K"dotcall") + # Do some initial desugaring of call and dotcall here to simplify + # the later desugaring pass + args = SyntaxList(ctx) + if is_infix_op_call(ex) || is_postfix_op_call(ex) + @chk numchildren(ex) >= 2 "Postfix/infix operators must have at least two positional arguments" + farg = ex[2] + push!(args, ex[1]) + append!(args, ex[3:end]) + else + @chk numchildren(ex) > 0 "Call expressions must have a function name" + farg = ex[1] + append!(args, ex[2:end]) end - # Do literal-pow expansion here as it's later used in both call and - # dotcall expansion. - arg1 = is_infix_op_call(ex) ? ex[1] : ex[2] - @ast ctx ex [k - "literal_pow"::K"top" - expand_forms_1(ctx, fname) - expand_forms_1(ctx, arg1) - [K"call" + if length(args) == 2 && is_same_identifier_like(farg, "^") && kind(args[2]) == K"Integer" + # Do literal-pow expansion here as it's later used in both call and + # dotcall expansion. + @ast ctx ex [k + "literal_pow"::K"top" + expand_forms_1(ctx, farg) + expand_forms_1(ctx, args[1]) [K"call" - "apply_type"::K"core" - "Val"::K"top" - ex[3] + [K"call" + "apply_type"::K"core" + "Val"::K"top" + args[2] + ] ] ] - ] + else + if kind(farg) == K"." && numchildren(farg) == 1 + # (.+)(x,y) is treated as a dotcall + k = K"dotcall" + farg = farg[1] + end + # Preserve call type flags (mostly ignored in the next pass as + # we've already reordered arguments.) + callflags = JuliaSyntax.call_type_flags(ex) + @ast ctx ex [k(syntax_flags=(callflags == 0 ? nothing : callflags)) + expand_forms_1(ctx, farg) + (expand_forms_1(ctx, a) for a in args)... + ] + end elseif is_leaf(ex) ex else diff --git a/JuliaLowering/src/syntax_graph.jl b/JuliaLowering/src/syntax_graph.jl index e39215eb66ed2..552a7a36e8872 100644 --- a/JuliaLowering/src/syntax_graph.jl +++ b/JuliaLowering/src/syntax_graph.jl @@ -122,7 +122,9 @@ end # TODO: Probably terribly non-inferrable? function setattr!(graph::SyntaxGraph, id; attrs...) for (k,v) in pairs(attrs) - getattr(graph, k)[id] = v + if !isnothing(v) + getattr(graph, k)[id] = v + end end end @@ -310,7 +312,7 @@ JuliaSyntax.source_location(::Type{LineNumberNode}, src::LineNumberNode, byte_in JuliaSyntax.filename(src::LineNumberNode) = string(src.file) function JuliaSyntax.highlight(io::IO, src::LineNumberNode; note="") - print(io, src, " - ", note, "\n") + print(io, src, " - ", note) end function JuliaSyntax.highlight(io::IO, src::SourceRef; kws...) diff --git a/JuliaLowering/test/arrays_ir.jl b/JuliaLowering/test/arrays_ir.jl index 6b41d433a5599..79e7d823a7991 100644 --- a/JuliaLowering/test/arrays_ir.jl +++ b/JuliaLowering/test/arrays_ir.jl @@ -223,7 +223,6 @@ LoweringError: LoweringError: #= line 1 =# - Badly nested rows in `ncat` - ######################################## # Error: bad nrow nesting @ast_ [K"ncat"(syntax_flags=set_numeric_flags(3)) @@ -237,7 +236,6 @@ LoweringError: LoweringError: #= line 1 =# - 2D `nrow` cannot be mixed with `row` in `ncat` - ######################################## # Error: bad nrow nesting @ast_ [K"ncat"(syntax_flags=set_numeric_flags(3)) @@ -251,4 +249,3 @@ LoweringError: LoweringError: #= line 1 =# - Badly nested rows in `ncat` - diff --git a/JuliaLowering/test/functions_ir.jl b/JuliaLowering/test/functions_ir.jl index c88d7ac50c9ae..4e2039545df6d 100644 --- a/JuliaLowering/test/functions_ir.jl +++ b/JuliaLowering/test/functions_ir.jl @@ -295,7 +295,7 @@ end #--------------------- LoweringError: function (.+)(x,y) -# └┘ ── Invalid function name +# └───────┘ ── Cannot define function using `.` broadcast syntax end ######################################## @@ -873,7 +873,6 @@ end LoweringError: #= line 1 =# - Postfix/infix operators must have at least two positional arguments - ######################################## # Error: postfix call without enough arguments @ast_ [K"call"(syntax_flags=JuliaSyntax.POSTFIX_OP_FLAG) @@ -883,7 +882,6 @@ LoweringError: LoweringError: #= line 1 =# - Postfix/infix operators must have at least two positional arguments - ######################################## # Error: Call with no function name @ast_ [K"call"] diff --git a/JuliaLowering/test/misc_ir.jl b/JuliaLowering/test/misc_ir.jl index c5f0ab376dad4..e77c937d58f53 100644 --- a/JuliaLowering/test/misc_ir.jl +++ b/JuliaLowering/test/misc_ir.jl @@ -215,4 +215,3 @@ LoweringError: LoweringError: #= line 1 =# - expected `numchildren(ex) >= 2` - diff --git a/JuliaLowering/test/utils.jl b/JuliaLowering/test/utils.jl index 998beaf404608..25d8aad4d4ed0 100644 --- a/JuliaLowering/test/utils.jl +++ b/JuliaLowering/test/utils.jl @@ -125,7 +125,9 @@ function match_ir_test_case(case_str) length(inout) == 1 ? (inout[1], "") : error("Too many sections in IR test case") expect_error = startswith(description, "Error") - (; expect_error=expect_error, description=strip(description), + is_todo = startswith(description, "TODO") + (; expect_error=expect_error, is_todo=is_todo, + description=strip(description), input=strip(input), output=strip(output)) end @@ -150,7 +152,7 @@ function setup_ir_test_module(preamble) test_mod end -function format_ir_for_test(mod, description, input, expect_error=false) +function format_ir_for_test(mod, description, input, expect_error=false, is_todo=false) ex = parsestmt(SyntaxTree, input) try if kind(ex) == K"macrocall" && ex[1].name_val == "@ast_" @@ -165,8 +167,12 @@ function format_ir_for_test(mod, description, input, expect_error=false) ir = strip(sprint(JuliaLowering.print_ir, x)) return replace(ir, string(mod)=>"TestMod") catch exc - if expect_error && (exc isa LoweringError) + if exc isa InterruptException + rethrow() + elseif expect_error && (exc isa LoweringError) return sprint(io->Base.showerror(io, exc, show_detail=false)) + elseif is_todo + return sprint(io->Base.showerror(io, exc)) else throw("Error in test case \"$description\"") end @@ -176,7 +182,10 @@ end function test_ir_cases(filename::AbstractString) preamble, cases = read_ir_test_cases(filename) test_mod = setup_ir_test_module(preamble) - for (expect_error, description, input, ref) in cases + for (expect_error, is_todo, description, input, ref) in cases + if is_todo + continue + end output = format_ir_for_test(test_mod, description, input, expect_error) @testset "$description" begin if output != ref @@ -202,9 +211,9 @@ function refresh_ir_test_cases(filename, pattern=nothing) println(io, preamble, "\n") println(io, "#*******************************************************************************") end - for (expect_error, description, input, ref) in cases + for (expect_error, is_todo, description, input, ref) in cases if isnothing(pattern) || occursin(pattern, description) - ir = format_ir_for_test(test_mod, description, input, expect_error) + ir = format_ir_for_test(test_mod, description, input, expect_error, is_todo) if rstrip(ir) != ref @info "Refreshing test case $(repr(description)) in $filename" end From ab69066b846d5086e0d7a488f503919b2249be1e Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sat, 18 Jan 2025 22:20:55 +1000 Subject: [PATCH 0936/1109] Desugaring of broadcast syntax Complete except for a couple of todos which will come with their associated syntax is dealt with: In-place updating broadcast with ref on left hand side and comparison chains. --- JuliaLowering/src/JuliaLowering.jl | 2 +- JuliaLowering/src/desugaring.jl | 77 +++++++++++++++- JuliaLowering/test/functions.jl | 21 +++++ JuliaLowering/test/functions_ir.jl | 137 +++++++++++++++++++++++++++++ 4 files changed, 235 insertions(+), 2 deletions(-) diff --git a/JuliaLowering/src/JuliaLowering.jl b/JuliaLowering/src/JuliaLowering.jl index b692aef7fa58e..eb99309fe1e4b 100644 --- a/JuliaLowering/src/JuliaLowering.jl +++ b/JuliaLowering/src/JuliaLowering.jl @@ -13,7 +13,7 @@ using JuliaSyntax: highlight, Kind, @KSet_str using JuliaSyntax: is_leaf, children, numchildren, head, kind, flags, has_flags, numeric_flags using JuliaSyntax: filename, first_byte, last_byte, byte_range, sourcefile, source_location, span, sourcetext -using JuliaSyntax: is_literal, is_number, is_operator, is_prec_assignment, is_prefix_call, is_infix_op_call, is_postfix_op_call, is_error +using JuliaSyntax: is_literal, is_number, is_operator, is_prec_assignment, is_prefix_call, is_infix_op_call, is_postfix_op_call, is_error, is_dotted _include("kinds.jl") _register_kinds() diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 294287bcc5cdd..eb25c3a9ffdfb 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -536,6 +536,75 @@ function expand_setindex(ctx, ex) ] end +function expand_dotcall(ctx, ex) + k = kind(ex) + if k == K"dotcall" + @chk numchildren(ex) >= 1 + farg = ex[1] + args = SyntaxList(ctx) + append!(args, ex[2:end]) + kws = remove_kw_args!(ctx, args) + @ast ctx ex [K"call" + (isnothing(kws) ? "broadcasted" : "broadcasted_kwsyntax")::K"top" + farg # todo: What about (z=f).(x,y) ? + (expand_dotcall(ctx, arg) for arg in args)... + if !isnothing(kws) + [K"parameters" + kws... + ] + end + ] + elseif k == K"comparison" + TODO(ex, "call expand-compare-chain inside expand_dotcall") + elseif (k == K"&&" || k == K"||") && is_dotted(ex) + @ast ctx ex [K"call" + "broadcasted"::K"top" + (k == K"&&" ? "andand" : "oror")::K"top" + (expand_dotcall(ctx, arg) for arg in children(ex))... + ] + else + ex + end +end + +function expand_fuse_broadcast(ctx, ex) + if kind(ex) == K"=" + @assert is_dotted(ex) + @chk numchildren(ex) == 2 + lhs = ex[1] + kl = kind(lhs) + rhs = expand_dotcall(ctx, ex[2]) + @ast ctx ex [K"call" + "materialize!"::K"top" + if kl == K"ref" + TODO(lhs, "Need to call partially-expand-ref") + elseif kl == K"." && numchildren(lhs) == 2 + [K"call" + "dotgetproperty"::K"top" + children(lhs)... + ] + else + lhs + end + if !(kind(rhs) == K"call" && kind(rhs[1]) == K"top" && rhs[1].name_val == "broadcasted") + # Ensure the rhs of .= is always wrapped in a call to `broadcasted()` + [K"call"(rhs) + "broadcasted"::K"top" + "identity"::K"top" + rhs + ] + else + rhs + end + ] + else + @ast ctx ex [K"call" + "materialize"::K"top" + expand_dotcall(ctx, ex) + ] + end +end + function check_no_assignment(exs) assign_pos = findfirst(kind(e) == K"=" for e in exs) if !isnothing(assign_pos) @@ -2850,6 +2919,8 @@ function expand_forms_2(ctx::DesugaringContext, ex::SyntaxTree, docs=nothing) k = kind(ex) if k == K"call" expand_call(ctx, ex) + elseif k == K"dotcall" || ((k == K"&&" || k == K"||") && is_dotted(ex)) + expand_forms_2(ctx, expand_fuse_broadcast(ctx, ex)) elseif k == K"." expand_dot(ctx, ex) elseif k == K"?" @@ -2880,7 +2951,11 @@ function expand_forms_2(ctx::DesugaringContext, ex::SyntaxTree, docs=nothing) expand_forms_2(ctx, ex[2]) ] elseif k == K"=" - expand_assignment(ctx, ex) + if is_dotted(ex) + expand_fuse_broadcast(ctx, ex) + else + expand_assignment(ctx, ex) + end elseif k == K"break" numchildren(ex) > 0 ? ex : @ast ctx ex [K"break" "loop_exit"::K"symbolic_label"] diff --git a/JuliaLowering/test/functions.jl b/JuliaLowering/test/functions.jl index 6a61f5139938a..5d22f99aeb5e2 100644 --- a/JuliaLowering/test/functions.jl +++ b/JuliaLowering/test/functions.jl @@ -224,4 +224,25 @@ end end +@testset "Broadcast" begin + @test JuliaLowering.include_string(test_mod, """ + let x = [1,2], y = [3,4], z = [5,6] + x .* y .+ z + end + """) == [8, 14] + + @test JuliaLowering.include_string(test_mod, """ + let nums = [1, 2, 3] + string.(nums, base=2; pad=2) + end + """) == ["01", "10", "11"] + + @test JuliaLowering.include_string(test_mod, """ + let lhs = [0,0], x = [1,2], y = [3,4], z = [5,6] + lhs .= x .* y .+ z + lhs + end + """) == [8, 14] +end + end diff --git a/JuliaLowering/test/functions_ir.jl b/JuliaLowering/test/functions_ir.jl index 4e2039545df6d..1a5f079c4c3d8 100644 --- a/JuliaLowering/test/functions_ir.jl +++ b/JuliaLowering/test/functions_ir.jl @@ -889,4 +889,141 @@ LoweringError: LoweringError: #= line 1 =# - Call expressions must have a function name +######################################## +# Simple broadcast +x .* y .+ f.(z) +#--------------------- +1 TestMod.+ +2 TestMod.* +3 TestMod.x +4 TestMod.y +5 (call top.broadcasted %₂ %₃ %₄) +6 TestMod.f +7 TestMod.z +8 (call top.broadcasted %₆ %₇) +9 (call top.broadcasted %₁ %₅ %₈) +10 (call top.materialize %₉) +11 (return %₁₀) + +######################################## +# Broadcast with unary function calls +.+x +#--------------------- +1 TestMod.+ +2 TestMod.x +3 (call top.broadcasted %₁ %₂) +4 (call top.materialize %₃) +5 (return %₄) + +######################################## +# Broadcast with short circuit operators +x .&& y .|| z +#--------------------- +1 TestMod.x +2 TestMod.y +3 (call top.broadcasted top.andand %₁ %₂) +4 TestMod.z +5 (call top.broadcasted top.oror %₃ %₄) +6 (call top.materialize %₅) +7 (return %₆) + +######################################## +# TODO: Broadcast with comparison chains +x .< y .< z +#--------------------- +LoweringError: +x .< y .< z +# └┘ ── expected `numchildren(ex) == 2` + +Detailed provenance: +(. <) +└─ (. <) + └─ @ :1 + + +######################################## +# Broadcast with literal_pow +x.^3 +#--------------------- +1 TestMod.^ +2 TestMod.x +3 (call core.apply_type top.Val 3) +4 (call %₃) +5 (call top.broadcasted top.literal_pow %₁ %₂ %₄) +6 (call top.materialize %₅) +7 (return %₆) + +######################################## +# Broadcast with keywords +f.(x, y, z = 1; w = 2) +#--------------------- +1 top.broadcasted_kwsyntax +2 (call core.tuple :z :w) +3 (call core.apply_type core.NamedTuple %₂) +4 (call core.tuple 1 2) +5 (call %₃ %₄) +6 TestMod.f +7 TestMod.x +8 TestMod.y +9 (call core.kwcall %₅ %₁ %₆ %₇ %₈) +10 (call top.materialize %₉) +11 (return %₁₀) + +######################################## +# Broadcast with unary dot syntax +(.+)(x,y) +#--------------------- +1 TestMod.+ +2 TestMod.x +3 TestMod.y +4 (call top.broadcasted %₁ %₂ %₃) +5 (call top.materialize %₄) +6 (return %₅) + +######################################## +# Trivial in-place broadcast update +x .= y +#--------------------- +1 TestMod.x +2 TestMod.y +3 (call top.broadcasted top.identity %₂) +4 (call top.materialize! %₁ %₃) +5 (return %₄) + +######################################## +# Fused in-place broadcast update +x .= y .+ z +#--------------------- +1 TestMod.x +2 TestMod.+ +3 TestMod.y +4 TestMod.z +5 (call top.broadcasted %₂ %₃ %₄) +6 (call top.materialize! %₁ %₅) +7 (return %₆) + +######################################## +# In-place broadcast update with property assignment on left hand side +x.prop .= y +#--------------------- +1 TestMod.x +2 (call top.dotgetproperty %₁ :prop) +3 TestMod.y +4 (call top.broadcasted top.identity %₃) +5 (call top.materialize! %₂ %₄) +6 (return %₅) + +######################################## +# TODO: In-place broadcast update with ref on left hand side +x[i] .= y +#--------------------- +LoweringError: +x[i] .= y +└──┘ ── Lowering TODO: Need to call partially-expand-ref + +Detailed provenance: +(ref x i) +└─ (ref x i) + └─ @ :1 + From 1d16d9b4d7624918a3997988c07cf5e2cf512843 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sun, 19 Jan 2025 07:36:36 +1000 Subject: [PATCH 0937/1109] Desugar standalone dotted operators `.+` Also add sections to desugaring.jl which is getting quite long. --- JuliaLowering/src/desugaring.jl | 91 +++++++++++++++++++++++++++----- JuliaLowering/test/desugaring.jl | 16 ------ JuliaLowering/test/misc_ir.jl | 31 +++++++++++ 3 files changed, 110 insertions(+), 28 deletions(-) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index eb25c3a9ffdfb..18d9fe1b86d59 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -69,6 +69,9 @@ function is_effect_free(ex) # because this calls the user-defined getproperty? end +#------------------------------------------------------------------------------- +# Destructuring + # Convert things like `(x,y,z) = (a,b,c)` to assignments, eliminating the # tuple. Includes support for slurping/splatting. # @@ -398,6 +401,8 @@ function expand_tuple_destruct(ctx, ex) makenode(ctx, ex, K"block", stmts) end +#------------------------------------------------------------------------------- +# Expansion of array indexing function _arg_to_temp(ctx, stmts, ex, eq_is_kw=false) k = kind(ex) if is_effect_free(ex) @@ -536,6 +541,9 @@ function expand_setindex(ctx, ex) ] end +#------------------------------------------------------------------------------- +# Expansion of broadcast notation `f.(x .+ y)` + function expand_dotcall(ctx, ex) k = kind(ex) if k == K"dotcall" @@ -605,6 +613,9 @@ function expand_fuse_broadcast(ctx, ex) end end +#------------------------------------------------------------------------------- +# Expansion of array concatenation notation `[a b ; c d]` etc + function check_no_assignment(exs) assign_pos = findfirst(kind(e) == K"=" for e in exs) if !isnothing(assign_pos) @@ -831,6 +842,9 @@ function expand_ncat(ctx, ex) ] end +#------------------------------------------------------------------------------- +# Expand assignments + # Expand UnionAll definitions, eg `X{T} = Y{T,T}` function expand_unionall_def(ctx, srcref, lhs, rhs) if numchildren(lhs) <= 1 @@ -958,6 +972,9 @@ function expand_assignment(ctx, ex) end end +#------------------------------------------------------------------------------- +# Expand logical conditional statements + # Flatten nested && or || nodes and expand their children function expand_cond_children(ctx, ex, cond_kind=kind(ex), flat_children=SyntaxList(ctx)) for e in children(ex) @@ -993,6 +1010,9 @@ function expand_condition(ctx, ex) end end +#------------------------------------------------------------------------------- +# Expand let blocks + function expand_let(ctx, ex) @chk numchildren(ex) == 2 bindings = ex[1] @@ -1036,6 +1056,9 @@ function expand_let(ctx, ex) return blk end +#------------------------------------------------------------------------------- +# Expand named tuples + function _named_tuple_expr(ctx, srcref, names, values) if isempty(names) @ast ctx srcref [K"call" "NamedTuple"::K"core"] @@ -1134,6 +1157,9 @@ function expand_named_tuple(ctx, ex, kws; current_nt end +#------------------------------------------------------------------------------- +# Call expansion + function expand_kw_call(ctx, srcref, farg, args, kws) @ast ctx srcref [K"block" func := farg @@ -1231,24 +1257,37 @@ function expand_call(ctx, ex) end end +#------------------------------------------------------------------------------- + function expand_dot(ctx, ex) - @chk numchildren(ex) == 2 # TODO: bare `.+` syntax - rhs = ex[2] - # Required to support the possibly dubious syntax `a."b"`. See - # https://github.com/JuliaLang/julia/issues/26873 - # Syntax edition TODO: reconsider this; possibly restrict to only K"String"? - if !(kind(rhs) == K"string" || is_leaf(rhs)) - throw(LoweringError(rhs, "Unrecognized field access syntax")) - end - expand_forms_2(ctx, + @chk numchildren(ex) in (1,2) (ex, "`.` form requires either one or two children") + + if numchildren(ex) == 1 + # eg, `f = .+` + @ast ctx ex [K"call" + "BroadcastFunction"::K"top" + ex[1] + ] + elseif numchildren(ex) == 2 + # eg, `x.a` syntax + rhs = ex[2] + # Required to support the possibly dubious syntax `a."b"`. See + # https://github.com/JuliaLang/julia/issues/26873 + # Syntax edition TODO: reconsider this; possibly restrict to only K"String"? + if !(kind(rhs) == K"string" || is_leaf(rhs)) + throw(LoweringError(rhs, "Unrecognized field access syntax")) + end @ast ctx ex [K"call" "getproperty"::K"top" ex[1] rhs ] - ) + end end +#------------------------------------------------------------------------------- +# Expand for loops + function foreach_lhs_var(f::Function, ex) k = kind(ex) if k == K"Identifier" @@ -1367,6 +1406,9 @@ function expand_for(ctx, ex) ] end +#------------------------------------------------------------------------------- +# Expand try/catch/finally + function match_try(ex) @chk numchildren(ex) > 1 "Invalid `try` form" try_ = ex[1] @@ -1436,6 +1478,9 @@ function expand_try(ctx, ex) end end +#------------------------------------------------------------------------------- +# Expand local/global/const declarations + # Strip variable type declarations from within a `local` or `global`, returning # the stripped expression. Works recursively with complex left hand side # assignments containing tuple destructuring. Eg, given @@ -1503,6 +1548,9 @@ function expand_decls(ctx, ex) makenode(ctx, ex, K"block", stmts) end +#------------------------------------------------------------------------------- +# Expansion of function definitions + function match_function_arg(full_ex) name = nothing type = nothing @@ -1941,6 +1989,9 @@ function expand_arrow_arglist(ctx, arglist) end end +#------------------------------------------------------------------------------- +# Expand macro definitions + function _make_macro_name(ctx, ex) k = kind(ex) if k == K"Identifier" || k == K"Symbol" @@ -1986,6 +2037,9 @@ function expand_macro_def(ctx, ex) ] end +#------------------------------------------------------------------------------- +# Expand type definitions + # Match `x<:T<:y` etc, returning `(name, lower_bound, upper_bound)` # A bound is `nothing` if not specified function analyze_typevar(ctx, ex) @@ -2723,6 +2777,9 @@ function expand_struct_def(ctx, ex, docs) ] end +#------------------------------------------------------------------------------- +# Expand `where` syntax + function expand_where(ctx, srcref, lhs, rhs) bounds = analyze_typevar(ctx, rhs) v = bounds[1] @@ -2753,6 +2810,9 @@ function expand_wheres(ctx, ex) body end +#------------------------------------------------------------------------------- +# Expand import / using / export + function _append_importpath(ctx, path_spec, path) prev_was_dot = true for component in children(path) @@ -2830,6 +2890,9 @@ function expand_import(ctx, ex) ] end +#------------------------------------------------------------------------------- +# Expand module definitions + function expand_module(ctx::DesugaringContext, ex::SyntaxTree) modname_ex = ex[1] @chk kind(modname_ex) == K"Identifier" @@ -2908,9 +2971,13 @@ function expand_module(ctx::DesugaringContext, ex::SyntaxTree) ] end +#------------------------------------------------------------------------------- +# Desugaring's "big switch": expansion of some simple forms; dispatch to other +# expansion functions for the rest. + """ Lowering pass 2 - desugaring - + This pass simplifies expressions by expanding complicated syntax sugar into a small set of core syntactic forms. For example, field access syntax `a.b` is expanded to a function call `getproperty(a, :b)`. @@ -2922,7 +2989,7 @@ function expand_forms_2(ctx::DesugaringContext, ex::SyntaxTree, docs=nothing) elseif k == K"dotcall" || ((k == K"&&" || k == K"||") && is_dotted(ex)) expand_forms_2(ctx, expand_fuse_broadcast(ctx, ex)) elseif k == K"." - expand_dot(ctx, ex) + expand_forms_2(ctx, expand_dot(ctx, ex)) elseif k == K"?" @chk numchildren(ex) == 3 expand_forms_2(ctx, @ast ctx ex [K"if" children(ex)...]) diff --git a/JuliaLowering/test/desugaring.jl b/JuliaLowering/test/desugaring.jl index 3aad0e152e594..834d0fb50865c 100644 --- a/JuliaLowering/test/desugaring.jl +++ b/JuliaLowering/test/desugaring.jl @@ -2,22 +2,6 @@ test_mod = Module(:TestMod) -@test desugar(test_mod, """ -a.b -""") ~ @ast_ [K"call" - "getproperty"::K"top" - "a"::K"Identifier" - "b"::K"Symbol" -] - -@test desugar(test_mod, """ -a."b" -""") ~ @ast_ [K"call" - "getproperty"::K"top" - "a"::K"Identifier" - "b"::K"String" -] - # @test desugar(test_mod, """ # let # y = 0 diff --git a/JuliaLowering/test/misc_ir.jl b/JuliaLowering/test/misc_ir.jl index e77c937d58f53..9c724da2642c1 100644 --- a/JuliaLowering/test/misc_ir.jl +++ b/JuliaLowering/test/misc_ir.jl @@ -1,3 +1,34 @@ +######################################## +# Getproperty syntax +x.a +#--------------------- +1 TestMod.x +2 (call top.getproperty %₁ :a) +3 (return %₂) + +######################################## +# Getproperty syntax with a string on right hand side +x."b" +#--------------------- +1 TestMod.x +2 (call top.getproperty %₁ "b") +3 (return %₂) + +######################################## +# Standalone dot syntax +.* +#--------------------- +1 TestMod.* +2 (call top.BroadcastFunction %₁) +3 (return %₂) + +######################################## +# Error: Wrong number of children in `.` +@ast_ [K"." "x"::K"Identifier" "a"::K"Identifier" 3::K"Integer"] +#--------------------- +LoweringError: +#= line 1 =# - `.` form requires either one or two children + ######################################## # Error: Placeholder value used _ + 1 From bbe6231599c408cf8a13eb5049aa8e6ed4f555c8 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sun, 19 Jan 2025 08:06:41 +1000 Subject: [PATCH 0938/1109] Add REPL mode for testing JuliaLowering --- JuliaLowering/test/repl_mode.jl | 71 +++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 JuliaLowering/test/repl_mode.jl diff --git a/JuliaLowering/test/repl_mode.jl b/JuliaLowering/test/repl_mode.jl new file mode 100644 index 0000000000000..d50b493e44e6b --- /dev/null +++ b/JuliaLowering/test/repl_mode.jl @@ -0,0 +1,71 @@ +module JuliaLoweringREPL + +import ReplMaker +import REPL + +using JuliaLowering: JuliaLowering, SyntaxTree, children +using JuliaSyntax + +function is_incomplete(prompt_state) + str = String(take!(copy(REPL.LineEdit.buffer(prompt_state)))) + stream = JuliaSyntax.ParseStream(str) + JuliaSyntax.parse!(stream, rule=:all) + if JuliaSyntax.any_error(stream) + tree = JuliaSyntax.build_tree(SyntaxNode, stream) + tag = JuliaSyntax._incomplete_tag(tree, 1) + return tag != :none + else + return false + end +end + +function eval_ish(mod, ex, do_eval) + k = kind(ex) + if k == K"toplevel" + x = nothing + for e in children(ex) + x = eval_ish(mod, e, do_eval) + end + return x + end + linear_ir = JuliaLowering.lower(mod, ex) + JuliaLowering.print_ir(stdout, linear_ir) + println(stdout, "#----------------------") + if do_eval + expr_form = JuliaLowering.to_lowered_expr(mod, linear_ir) + Base.eval(mod, expr_form) + end +end + +DO_EVAL::Bool = false +function opts(; do_eval=false) + global DO_EVAL = do_eval +end + +function handle_input(str) + global DO_EVAL + if str == "DO_EVAL" + DO_EVAL = true + return + elseif str == "!DO_EVAL" + DO_EVAL = false + return + end + ex = parseall(SyntaxTree, str; filename="REPL") + eval_ish(Main, ex, DO_EVAL) +end + +function init() + ReplMaker.initrepl(handle_input, + valid_input_checker = !is_incomplete, + prompt_text="Lowering> ", + prompt_color = :blue, + start_key=")", + mode_name=:JuliaLowering) +end + +function __init__() + init() +end + +end From fd6bb66475863540bfb3aa17839bf9bd67219fcf Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sun, 19 Jan 2025 13:48:06 +1000 Subject: [PATCH 0939/1109] Add devdocs regarding `@ chk` vs `@ assert` vs `throw(LoweringError())` --- JuliaLowering/README.md | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/JuliaLowering/README.md b/JuliaLowering/README.md index e604c4432de37..4a7f5552abdab 100644 --- a/JuliaLowering/README.md +++ b/JuliaLowering/README.md @@ -71,6 +71,35 @@ to use some of these tricks to make `SyntaxTree` faster, eventually. See, for example, [Building Games in ECS with Entity Relationships](https://ajmmertens.medium.com/building-games-in-ecs-with-entity-relationships-657275ba2c6c) +### Structural assertions / checking validity of syntax trees + +Syntax trees in Julia `Expr` form are very close to lisp lists: a symbol at the +`head` of the list which specifies the syntactic form, and a sequence of +children in the syntax tree. This is a representation which `JuliaSyntax` and +`JuliaLowering` follow but it does come with certain disadvantages. One of the +most problematic is that the number of children affects the validity (and +sometimes semantics) of an AST node, as much as the `head` symbol does. + +In `JuliaSyntax` we've greatly reduced the overloading of `head` in order to +simplify the interpretation of child structures in the tree. For example, +broadcast calls like `f.(x,y)` use the `K"dotcall"` kind rather than being a +node with `head == Symbol(".")` and a tuple as children. + +However, there's still many ways for lowering to encounter invalid expressions +of type `SyntaxTree` and these must be checked. In JuliaSyntax we have several +levels of effort corresponding to the type of errors conditions we desire to +check and report: + +* For invalid syntax which is accepted by the `JuliaSyntax` + parser but is invalid in lowering we use manual `if` blocks followed by + throwing a `LoweringError`. This is more programming effort but allows for + the highest quality error messages for the typical end user. +* For invalid syntax which can only be produced by macros (ie, not by the + parser) we mostly use the `@chk` macro. This is a quick tool for validating + input but gives lesser quality error messages. +* For JuliaLowering's internal invariants we just use `@assert` - these should + never be hit and can be compiled out in principle. + ## Provenance tracking Expression provenance is tracked through lowering by attaching provenance From fa60e5f7b6d8bf8890b024e0c5d272e3bc949f32 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sun, 19 Jan 2025 16:34:49 +1000 Subject: [PATCH 0940/1109] Desugaring of comparison chains --- JuliaLowering/src/desugaring.jl | 87 +++++++++++++++++++++- JuliaLowering/test/functions.jl | 4 ++ JuliaLowering/test/functions_ir.jl | 111 ++++++++++++++++++++++++++--- JuliaLowering/test/repl_mode.jl | 2 +- 4 files changed, 194 insertions(+), 10 deletions(-) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 18d9fe1b86d59..73767af055e7d 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -401,6 +401,89 @@ function expand_tuple_destruct(ctx, ex) makenode(ctx, ex, K"block", stmts) end +#------------------------------------------------------------------------------- +# Expand comparison chains + +function expand_scalar_compare_chain(ctx, srcref, terms, i) + comparisons = nothing + while i + 2 <= length(terms) + lhs = terms[i] + op = terms[i+1] + rhs = terms[i+2] + if kind(op) == K"." + break + end + comp = @ast ctx op [K"call" + op + lhs + rhs + ] + if isnothing(comparisons) + comparisons = comp + else + comparisons = @ast ctx srcref [K"&&" + comparisons + comp + ] + end + i += 2 + end + (comparisons, i) +end + +# Expanding comparison chains: (comparison a op b op c ...) +# +# We use && to combine pairs of adjacent scalar comparisons and .& to combine +# vector-vector and vector-scalar comparisons. Combining scalar comparisons are +# treated as having higher precedence than vector comparisons, thus: +# +# a < b < c ==> (a < b) && (b < c) +# a .< b .< c ==> (a .< b) .& (b .< c) +# a < b < c .< d .< e ==> (a < b && b < c) .& (c .< d) .& (d .< e) +# a .< b .< c < d < e ==> (a .< b) .& (b .< c) .& (c < d && d < e) +function expand_compare_chain(ctx, ex) + @assert kind(ex) == K"comparison" + terms = children(ex) + @chk numchildren(ex) >= 3 + @chk isodd(numchildren(ex)) + i = 1 + comparisons = nothing + # Combine any number of dotted comparisons + while i + 2 <= length(terms) + if kind(terms[i+1]) != K"." + (comp, i) = expand_scalar_compare_chain(ctx, ex, terms, i) + else + lhs = terms[i] + op = terms[i+1] + rhs = terms[i+2] + i += 2 + comp = @ast ctx op [K"dotcall" + op[1] + lhs + rhs + ] + end + if isnothing(comparisons) + comparisons = comp + else + comparisons = @ast ctx ex [K"dotcall" + "&"::K"top" + # ^^ NB: Flisp bug. Flisp lowering essentially does + # adopt_scope("&"::K"Identifier", ctx.mod) + # here which seems wrong if the comparison chain arose from + # a macro in a different module. One fix would be to use + # adopt_scope("&"::K"Identifier", ex) + # to get the module of the comparison expression for the + # `&` operator. But a simpler option is probably to always + # use `Base.&` so we do that. + comparisons + comp + ] + end + end + comparisons +end + #------------------------------------------------------------------------------- # Expansion of array indexing function _arg_to_temp(ctx, stmts, ex, eq_is_kw=false) @@ -563,7 +646,7 @@ function expand_dotcall(ctx, ex) end ] elseif k == K"comparison" - TODO(ex, "call expand-compare-chain inside expand_dotcall") + expand_dotcall(ctx, expand_compare_chain(ctx, ex)) elseif (k == K"&&" || k == K"||") && is_dotted(ex) @ast ctx ex [K"call" "broadcasted"::K"top" @@ -3028,6 +3111,8 @@ function expand_forms_2(ctx::DesugaringContext, ex::SyntaxTree, docs=nothing) @ast ctx ex [K"break" "loop_exit"::K"symbolic_label"] elseif k == K"continue" @ast ctx ex [K"break" "loop_cont"::K"symbolic_label"] + elseif k == K"comparison" + expand_forms_2(ctx, expand_compare_chain(ctx, ex)) elseif k == K"doc" @chk numchildren(ex) == 2 sig = expand_forms_2(ctx, ex[2], ex) diff --git a/JuliaLowering/test/functions.jl b/JuliaLowering/test/functions.jl index 5d22f99aeb5e2..5cd41093d0d4a 100644 --- a/JuliaLowering/test/functions.jl +++ b/JuliaLowering/test/functions.jl @@ -243,6 +243,10 @@ end lhs end """) == [8, 14] + + @test JuliaLowering.include_string(test_mod, """ + [1,2] .+ ([3,4] .< [5,6] .< [7,1]) + """) == [2, 2] end end diff --git a/JuliaLowering/test/functions_ir.jl b/JuliaLowering/test/functions_ir.jl index 1a5f079c4c3d8..362b71f7fa4f8 100644 --- a/JuliaLowering/test/functions_ir.jl +++ b/JuliaLowering/test/functions_ir.jl @@ -928,18 +928,113 @@ x .&& y .|| z 7 (return %₆) ######################################## -# TODO: Broadcast with comparison chains -x .< y .< z +# Scalar comparison chain +x < y < z #--------------------- -LoweringError: +1 TestMod.< +2 TestMod.x +3 TestMod.y +4 (call %₁ %₂ %₃) +5 (gotoifnot %₄ label₁₁) +6 TestMod.< +7 TestMod.y +8 TestMod.z +9 (call %₆ %₇ %₈) +10 (return %₉) +11 (return false) + +######################################## +# Broadcasted comparison chain x .< y .< z -# └┘ ── expected `numchildren(ex) == 2` +#--------------------- +1 TestMod.< +2 TestMod.x +3 TestMod.y +4 (call top.broadcasted %₁ %₂ %₃) +5 TestMod.< +6 TestMod.y +7 TestMod.z +8 (call top.broadcasted %₅ %₆ %₇) +9 (call top.broadcasted top.& %₄ %₈) +10 (call top.materialize %₉) +11 (return %₁₀) -Detailed provenance: -(. <) -└─ (. <) - └─ @ :1 +######################################## +# Mixed scalar / broadcasted comparison chain +a < b < c .< d .< e +#--------------------- +1 TestMod.< +2 TestMod.a +3 TestMod.b +4 (call %₁ %₂ %₃) +5 (gotoifnot %₄ label₁₁) +6 TestMod.< +7 TestMod.b +8 TestMod.c +9 (= slot₁/if_val (call %₆ %₇ %₈)) +10 (goto label₁₂) +11 (= slot₁/if_val false) +12 slot₁/if_val +13 TestMod.< +14 TestMod.c +15 TestMod.d +16 (call top.broadcasted %₁₃ %₁₄ %₁₅) +17 (call top.broadcasted top.& %₁₂ %₁₆) +18 TestMod.< +19 TestMod.d +20 TestMod.e +21 (call top.broadcasted %₁₈ %₁₉ %₂₀) +22 (call top.broadcasted top.& %₁₇ %₂₁) +23 (call top.materialize %₂₂) +24 (return %₂₃) + +######################################## +# Mixed scalar / broadcasted comparison chain +a .< b .< c < d < e +#--------------------- +1 TestMod.< +2 TestMod.a +3 TestMod.b +4 (call top.broadcasted %₁ %₂ %₃) +5 TestMod.< +6 TestMod.b +7 TestMod.c +8 (call top.broadcasted %₅ %₆ %₇) +9 (call top.broadcasted top.& %₄ %₈) +10 TestMod.< +11 TestMod.c +12 TestMod.d +13 (call %₁₀ %₁₁ %₁₂) +14 (gotoifnot %₁₃ label₂₀) +15 TestMod.< +16 TestMod.d +17 TestMod.e +18 (= slot₁/if_val (call %₁₅ %₁₆ %₁₇)) +19 (goto label₂₁) +20 (= slot₁/if_val false) +21 slot₁/if_val +22 (call top.broadcasted top.& %₉ %₂₁) +23 (call top.materialize %₂₂) +24 (return %₂₃) +######################################## +# Comparison chain fused with other broadcasting +x .+ (a .< b .< c) +#--------------------- +1 TestMod.+ +2 TestMod.x +3 TestMod.< +4 TestMod.a +5 TestMod.b +6 (call top.broadcasted %₃ %₄ %₅) +7 TestMod.< +8 TestMod.b +9 TestMod.c +10 (call top.broadcasted %₇ %₈ %₉) +11 (call top.broadcasted top.& %₆ %₁₀) +12 (call top.broadcasted %₁ %₂ %₁₁) +13 (call top.materialize %₁₂) +14 (return %₁₃) ######################################## # Broadcast with literal_pow diff --git a/JuliaLowering/test/repl_mode.jl b/JuliaLowering/test/repl_mode.jl index d50b493e44e6b..cf69659dfabd1 100644 --- a/JuliaLowering/test/repl_mode.jl +++ b/JuliaLowering/test/repl_mode.jl @@ -30,8 +30,8 @@ function eval_ish(mod, ex, do_eval) end linear_ir = JuliaLowering.lower(mod, ex) JuliaLowering.print_ir(stdout, linear_ir) - println(stdout, "#----------------------") if do_eval + println(stdout, "#----------------------") expr_form = JuliaLowering.to_lowered_expr(mod, linear_ir) Base.eval(mod, expr_form) end From 601b425386593d9fc5927c72c046fa5a80a3a214 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sun, 19 Jan 2025 20:52:29 +1000 Subject: [PATCH 0941/1109] Fix UnionAll assignment desugaring + add tests --- JuliaLowering/src/desugaring.jl | 2 +- JuliaLowering/src/scope_analysis.jl | 2 +- JuliaLowering/test/assignments.jl | 5 +++-- JuliaLowering/test/assignments_ir.jl | 33 ++++++++++++++++++++++++++++ 4 files changed, 38 insertions(+), 4 deletions(-) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 73767af055e7d..94367e8e9886e 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -937,7 +937,7 @@ function expand_unionall_def(ctx, srcref, lhs, rhs) @ast ctx srcref [K"block" [K"const_if_global" name] unionall_type := expand_forms_2(ctx, [K"where" rhs lhs[2:end]...]) - expand_forms_2([K"=" name unionall_type]) + expand_forms_2(ctx, [K"=" name unionall_type]) ] end diff --git a/JuliaLowering/src/scope_analysis.jl b/JuliaLowering/src/scope_analysis.jl index 3171d81988103..698df95f77809 100644 --- a/JuliaLowering/src/scope_analysis.jl +++ b/JuliaLowering/src/scope_analysis.jl @@ -544,7 +544,7 @@ function _resolve_scopes(ctx, ex::SyntaxTree) elseif k == K"const_if_global" id = _resolve_scopes(ctx, ex[1]) if lookup_binding(ctx, id).kind == :global - @ast ctx ex [K"const" ex[1]] + @ast ctx ex [K"const" id] else makeleaf(ctx, ex, K"TOMBSTONE") end diff --git a/JuliaLowering/test/assignments.jl b/JuliaLowering/test/assignments.jl index 07b482228c715..75728d94ae588 100644 --- a/JuliaLowering/test/assignments.jl +++ b/JuliaLowering/test/assignments.jl @@ -11,9 +11,10 @@ end """) # TODO: Desugaring of assignment done, but needs `where` lowering -@test_broken JuliaLowering.include_string(test_mod, """ +JuliaLowering.include_string(test_mod, """ MyVector{T} = Array{1,T} -""") == 42 +""") +@test test_mod.MyVector{Int} == Array{1,Int} # Chained assignment @test JuliaLowering.include_string(test_mod, """ diff --git a/JuliaLowering/test/assignments_ir.jl b/JuliaLowering/test/assignments_ir.jl index 072831b5f6b83..9dd7bd2eb4cba 100644 --- a/JuliaLowering/test/assignments_ir.jl +++ b/JuliaLowering/test/assignments_ir.jl @@ -114,6 +114,39 @@ end 9 TestMod.x 10 (return %₉) +######################################## +# UnionAll expansion at global scope results in const decl +X{T} = Y{T,T} +#--------------------- +1 (const TestMod.X) +2 (call core.TypeVar :T) +3 (= slot₁/T %₂) +4 slot₁/T +5 TestMod.Y +6 slot₁/T +7 slot₁/T +8 (call core.apply_type %₅ %₆ %₇) +9 (call core.UnionAll %₄ %₈) +10 (= TestMod.X %₉) +11 (return %₉) + +######################################## +# UnionAll expansion in local scope +let + X{T} = Y{T,T} +end +#--------------------- +1 (call core.TypeVar :T) +2 (= slot₂/T %₁) +3 slot₂/T +4 TestMod.Y +5 slot₂/T +6 slot₂/T +7 (call core.apply_type %₄ %₅ %₆) +8 (call core.UnionAll %₃ %₇) +9 (= slot₁/X %₈) +10 (return %₈) + ######################################## # simple setindex! a[i] = x From 7e7aa7c72fa67ab56db1d768665a70123a5608e3 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sun, 19 Jan 2025 21:20:45 +1000 Subject: [PATCH 0942/1109] Full desugaring of ref `a[i]` syntax and broadcast ref assignment `a[i] .= rhs` --- JuliaLowering/src/ast.jl | 46 ++++++++++ JuliaLowering/src/desugaring.jl | 128 +++++++++++++++++---------- JuliaLowering/src/linear_ir.jl | 7 -- JuliaLowering/test/arrays.jl | 23 +++++ JuliaLowering/test/arrays_ir.jl | 101 ++++++++++++++++++++- JuliaLowering/test/assignments_ir.jl | 86 +++++++++--------- JuliaLowering/test/functions.jl | 8 ++ JuliaLowering/test/functions_ir.jl | 21 +++-- 8 files changed, 308 insertions(+), 112 deletions(-) diff --git a/JuliaLowering/src/ast.jl b/JuliaLowering/src/ast.jl index ece141b4b92bc..24a6643f54cfc 100644 --- a/JuliaLowering/src/ast.jl +++ b/JuliaLowering/src/ast.jl @@ -186,6 +186,9 @@ function assign_tmp(ctx::AbstractLoweringContext, ex, name="tmp") end function emit_assign_tmp(stmts::SyntaxList, ctx, ex, name="tmp") + if is_ssa(ctx, ex) + return ex + end var = ssavar(ctx, ex, name) push!(stmts, makenode(ctx, ex, K"=", var, ex)) var @@ -559,6 +562,13 @@ function is_valid_modref(ex) (kind(ex[1]) == K"Identifier" || is_valid_modref(ex[1])) end +function is_simple_atom(ctx, ex) + k = kind(ex) + # TODO thismodule + is_literal(k) || k == K"Symbol" || k == K"Value" || is_ssa(ctx, ex) || + (k == K"core" && ex.name_val == "nothing") +end + function decl_var(ex) kind(ex) == K"::" ? ex[1] : ex end @@ -586,3 +596,39 @@ function new_scope_layer(ctx, mod_ref::SyntaxTree) @assert kind(mod_ref) == K"Identifier" new_scope_layer(ctx, ctx.scope_layers[mod_ref.scope_layer].mod) end + +#------------------------------------------------------------------------------- +# Context wrapper which helps to construct a list of statements to be executed +# prior to some expression. Useful when we need to use subexpressions multiple +# times. +struct StatementListCtx{Ctx, GraphType} <: AbstractLoweringContext + ctx::Ctx + stmts::SyntaxList{GraphType} +end + +function Base.getproperty(ctx::StatementListCtx, field::Symbol) + if field === :ctx + getfield(ctx, :ctx) + elseif field === :stmts + getfield(ctx, :stmts) + else + getproperty(getfield(ctx, :ctx), field) + end +end + +function emit(ctx::StatementListCtx, ex) + push!(ctx.stmts, ex) +end + +function emit_assign_tmp(ctx::StatementListCtx, ex, name="tmp") + emit_assign_tmp(ctx.stmts, ctx.ctx, ex, name) +end + +with_stmts(ctx, stmts) = StatementListCtx(ctx, stmts) +with_stmts(ctx::StatementListCtx, stmts) = StatementListCtx(ctx.ctx, stmts) + +function with_stmts(ctx) + StatementListCtx(ctx, SyntaxList(ctx)) +end + +with_stmts(ctx::StatementListCtx) = StatementListCtx(ctx.ctx) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 94367e8e9886e..9e12395c2320f 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -18,6 +18,8 @@ function DesugaringContext(ctx) DesugaringContext(graph, ctx.bindings, ctx.scope_layers, ctx.current_layer.mod) end +#------------------------------------------------------------------------------- + function is_identifier_like(ex) k = kind(ex) k == K"Identifier" || k == K"BindingId" || k == K"Placeholder" @@ -69,6 +71,22 @@ function is_effect_free(ex) # because this calls the user-defined getproperty? end +function check_no_parameters(ex::SyntaxTree, msg) + if numchildren(ex) >= 1 + pars = ex[end] + if kind(pars) == K"parameters" + throw(LoweringError(pars, msg)) + end + end +end + +function check_no_assignment(exs) + assign_pos = findfirst(kind(e) == K"=" for e in exs) + if !isnothing(assign_pos) + throw(LoweringError(exs[assign_pos], "misplaced assignment statement in `[ ... ]`")) + end +end + #------------------------------------------------------------------------------- # Destructuring @@ -574,9 +592,9 @@ end # Go through indices and replace the `begin` or `end` symbol # `arr` - array being indexed # `idxs` - list of indices -# returns `idxs_out`; any statements that need to execute first are appended to -# `stmts`. -function process_indices(ctx, stmts, arr, idxs, expand_stmts) +# returns the expanded indices. Any statements that need to execute first are +# added to ctx.stmts. +function process_indices(ctx::StatementListCtx, arr, idxs) has_splats = any(kind(i) == K"..." for i in idxs) idxs_out = SyntaxList(ctx) splats = SyntaxList(ctx) @@ -585,8 +603,7 @@ function process_indices(ctx, stmts, arr, idxs, expand_stmts) val = replace_beginend(ctx, is_splat ? idx0[1] : idx0, arr, n, splats, n == length(idxs)) # TODO: kwarg? - idx = !has_splats || is_simple_atom(ctx, val) ? - val : emit_assign_tmp(stmts, ctx, expand_stmts ? expand_forms_2(ctx, val) : val) + idx = !has_splats || is_simple_atom(ctx, val) ? val : emit_assign_tmp(ctx, val) if is_splat push!(splats, idx) end @@ -595,30 +612,36 @@ function process_indices(ctx, stmts, arr, idxs, expand_stmts) return idxs_out end +# Expand things like `f()[i,end]`, add to `ctx.stmts` (temporaries for +# computing indices) and return +# * `arr` - The array (may be a temporary ssa value) +# * `idxs` - List of indices +function expand_ref_components(ctx::StatementListCtx, ex) + check_no_parameters(ex, "unexpected semicolon in array expression") + @assert kind(ex) == K"ref" + @chk numchildren(ex) >= 1 + arr = ex[1] + idxs = ex[2:end] + if any(contains_identifier(e, "begin", "end") for e in idxs) + arr = emit_assign_tmp(ctx, arr) + end + new_idxs = process_indices(ctx, arr, idxs) + return (arr, new_idxs) +end + function expand_setindex(ctx, ex) @assert kind(ex) == K"=" && numchildren(ex) == 2 lhs = ex[1] - @assert kind(lhs) == K"ref" - @chk numchildren(lhs) >= 1 - arr = lhs[1] - idxs = lhs[2:end] - rhs = ex[2] - - stmts = SyntaxList(ctx) - if !is_leaf(arr) && any(contains_identifier(e, "begin", "end") for e in idxs) - arr = emit_assign_tmp(stmts, ctx, expand_forms_2(ctx, arr)) - end - new_idxs = process_indices(ctx, stmts, arr, idxs, true) - if !is_ssa(ctx, rhs) && !is_quoted(rhs) - rhs = emit_assign_tmp(stmts, ctx, expand_forms_2(ctx, rhs)) - end + sctx = with_stmts(ctx) + (arr, idxs) = expand_ref_components(sctx, lhs) + rhs = emit_assign_tmp(sctx, ex[2]) @ast ctx ex [K"block" - stmts... + sctx.stmts... expand_forms_2(ctx, [K"call" "setindex!"::K"top" arr rhs - new_idxs... + idxs... ]) [K"unnecessary" rhs] ] @@ -668,7 +691,16 @@ function expand_fuse_broadcast(ctx, ex) @ast ctx ex [K"call" "materialize!"::K"top" if kl == K"ref" - TODO(lhs, "Need to call partially-expand-ref") + sctx = with_stmts(ctx) + (arr, idxs) = expand_ref_components(sctx, lhs) + [K"block" + sctx.stmts... + [K"call" + "dotview"::K"top" + arr + idxs... + ] + ] elseif kl == K"." && numchildren(lhs) == 2 [K"call" "dotgetproperty"::K"top" @@ -699,17 +731,8 @@ end #------------------------------------------------------------------------------- # Expansion of array concatenation notation `[a b ; c d]` etc -function check_no_assignment(exs) - assign_pos = findfirst(kind(e) == K"=" for e in exs) - if !isnothing(assign_pos) - throw(LoweringError(exs[assign_pos], "misplaced assignment statement in `[ ... ]`")) - end -end - function expand_vcat(ctx, ex) - if has_parameters(ex) - throw(LoweringError(ex, "unexpected semicolon in array expression")) - end + check_no_parameters(ex, "unexpected semicolon in array expression") check_no_assignment(children(ex)) had_row = false had_row_splat = false @@ -1014,7 +1037,7 @@ function expand_assignment(ctx, ex) end elseif kl == K"ref" # a[i1, i2] = rhs - expand_setindex(ctx, ex) + expand_forms_2(ctx, expand_setindex(ctx, ex)) elseif kl == K"::" && numchildren(lhs) == 2 x = lhs[1] T = lhs[2] @@ -1336,7 +1359,10 @@ function expand_call(ctx, ex) expand_forms_2(ctx, _wrap_unsplatted_args(ctx, ex, args))... ] else - @ast ctx ex [K"call" expand_forms_2(ctx, farg) expand_forms_2(ctx, args)...] + @ast ctx ex [K"call" + expand_forms_2(ctx, farg) + expand_forms_2(ctx, args)... + ] end end @@ -2976,7 +3002,7 @@ end #------------------------------------------------------------------------------- # Expand module definitions -function expand_module(ctx::DesugaringContext, ex::SyntaxTree) +function expand_module(ctx, ex::SyntaxTree) modname_ex = ex[1] @chk kind(modname_ex) == K"Identifier" modname = modname_ex.name_val @@ -3102,7 +3128,7 @@ function expand_forms_2(ctx::DesugaringContext, ex::SyntaxTree, docs=nothing) ] elseif k == K"=" if is_dotted(ex) - expand_fuse_broadcast(ctx, ex) + expand_forms_2(ctx, expand_fuse_broadcast(ctx, ex)) else expand_assignment(ctx, ex) end @@ -3192,14 +3218,20 @@ function expand_forms_2(ctx::DesugaringContext, ex::SyntaxTree, docs=nothing) elseif k == K"struct" expand_forms_2(ctx, expand_struct_def(ctx, ex, docs)) elseif k == K"ref" - if numchildren(ex) > 2 - TODO(ex, "ref expansion") - end - expand_forms_2(ctx, @ast ctx ex [K"call" "getindex"::K"top" ex[1] ex[2]]) + sctx = with_stmts(ctx) + (arr, idxs) = expand_ref_components(sctx, ex) + expand_forms_2(ctx, + @ast ctx ex [K"block" + sctx.stmts... + [K"call" + "getindex"::K"top" + arr + idxs... + ] + ] + ) elseif k == K"curly" - if has_parameters(ex) - throw(LoweringError(ex[end], "unexpected semicolon in type parameter list")) - end + check_no_parameters(ex, "unexpected semicolon in type parameter list") for c in children(ex) if kind(c) == K"=" throw(LoweringError(c, "misplace assignment in type parameter list")) @@ -3219,9 +3251,7 @@ function expand_forms_2(ctx::DesugaringContext, ex::SyntaxTree, docs=nothing) ] ] elseif k == K"vect" - if has_parameters(ex) - throw(LoweringError(ex, "unexpected semicolon in array expression")) - end + check_no_parameters(ex, "unexpected semicolon in array expression") check_no_assignment(children(ex)) @ast ctx ex [K"call" "vect"::K"top" @@ -3272,7 +3302,11 @@ function expand_forms_2(ctx::DesugaringContext, exs::Union{Tuple,AbstractVector} res end -function expand_forms_2(ctx, ex::SyntaxTree) +function expand_forms_2(ctx::StatementListCtx, args...) + expand_forms_2(ctx.ctx, args...) +end + +function expand_forms_2(ctx::MacroExpansionContext, ex::SyntaxTree) ctx1 = DesugaringContext(ctx) ex1 = expand_forms_2(ctx1, reparent(ctx1, ex)) ctx1, ex1 diff --git a/JuliaLowering/src/linear_ir.jl b/JuliaLowering/src/linear_ir.jl index a17a5f463f54a..0ee1a3bb8287a 100644 --- a/JuliaLowering/src/linear_ir.jl +++ b/JuliaLowering/src/linear_ir.jl @@ -1,13 +1,6 @@ #------------------------------------------------------------------------------- # Lowering pass 5: Flatten to linear IR -function is_simple_atom(ctx, ex) - k = kind(ex) - # TODO thismodule - is_literal(k) || k == K"Symbol" || k == K"Value" || is_ssa(ctx, ex) || - (k == K"core" && ex.name_val == "nothing") -end - function is_valid_ir_argument(ctx, ex) k = kind(ex) if is_simple_atom(ctx, ex) || k == K"inert" || k == K"top" || k == K"core" diff --git a/JuliaLowering/test/arrays.jl b/JuliaLowering/test/arrays.jl index 1c327d438a16d..9699ad866bc52 100644 --- a/JuliaLowering/test/arrays.jl +++ b/JuliaLowering/test/arrays.jl @@ -80,5 +80,28 @@ end Int[1.0 ;;; 2.0 ;;; 3.0] """) ≅ [1 ;;; 2 ;;; 3] +# getindex +@test JuliaLowering.include_string(test_mod, """ +let + x = [1 2; + 3 4] + (x[end,begin], x[begin,end]) +end +""") == (3, 2) + +# getindex with splats +@test JuliaLowering.include_string(test_mod, """ +let + x = [1 2; + 3 4 + ;;; + 5 6; + 7 8] + inds = (2,1) + ind1 = (1,) + (x[inds..., begin], x[inds..., end], x[1, inds...], + x[ind1..., ind1..., end]) +end +""") == (3, 7, 2, 5) end diff --git a/JuliaLowering/test/arrays_ir.jl b/JuliaLowering/test/arrays_ir.jl index 79e7d823a7991..ca31db9e8d845 100644 --- a/JuliaLowering/test/arrays_ir.jl +++ b/JuliaLowering/test/arrays_ir.jl @@ -11,7 +11,7 @@ #--------------------- LoweringError: [10, 20; 30] -└──────────┘ ── unexpected semicolon in array expression +# └──┘ ── unexpected semicolon in array expression ######################################## # Error: vect syntax with embedded assignments @@ -249,3 +249,102 @@ LoweringError: LoweringError: #= line 1 =# - Badly nested rows in `ncat` +######################################## +# Simple getindex +a[i] +#--------------------- +1 TestMod.a +2 TestMod.i +3 (call top.getindex %₁ %₂) +4 (return %₃) + +######################################## +# simple 1D getindex with begin +a[begin] +#--------------------- +1 TestMod.a +2 (call top.firstindex %₁) +3 (call top.getindex %₁ %₂) +4 (return %₃) + +######################################## +# simple 1D getindex with end +a[end] +#--------------------- +1 TestMod.a +2 (call top.lastindex %₁) +3 (call top.getindex %₁ %₂) +4 (return %₃) + +######################################## +# multidimensional getindex with begin +a[i, begin] +#--------------------- +1 TestMod.a +2 TestMod.i +3 (call top.firstindex %₁ 2) +4 (call top.getindex %₁ %₂ %₃) +5 (return %₄) + +######################################## +# multidimensional getindex with end +a[i, end] +#--------------------- +1 TestMod.a +2 TestMod.i +3 (call top.lastindex %₁ 2) +4 (call top.getindex %₁ %₂ %₃) +5 (return %₄) + +######################################## +# multidimensional getindex with begin/end and splats +a[is..., end, js..., begin] +#--------------------- +1 TestMod.a +2 TestMod.is +3 (call top.length %₂) +4 (call top.+ 1 %₃) +5 (call top.lastindex %₁ %₄) +6 TestMod.js +7 (call top.length %₂) +8 (call top.length %₆) +9 (call top.+ 2 %₇ %₈) +10 (call top.firstindex %₁ %₉) +11 (call core.tuple %₁) +12 (call core.tuple %₅) +13 (call core.tuple %₁₀) +14 (call core._apply_iterate top.iterate top.getindex %₁₁ %₂ %₁₂ %₆ %₁₃) +15 (return %₁₄) + +######################################## +# getindex with nontrivial array expression and begin/end +f()[end] +#--------------------- +1 TestMod.f +2 (call %₁) +3 (call top.lastindex %₂) +4 (call top.getindex %₂ %₃) +5 (return %₄) + +######################################## +# nested refs with getindex and begin/end +b[a[begin, end], begin, end] +#--------------------- +1 TestMod.b +2 TestMod.a +3 (call top.firstindex %₂ 1) +4 (call top.lastindex %₂ 2) +5 (call top.getindex %₂ %₃ %₄) +6 (call top.firstindex %₁ 2) +7 (call top.lastindex %₁ 3) +8 (call top.getindex %₁ %₅ %₆ %₇) +9 (return %₈) + +######################################## +# Error: parameters in array ref +a[i, j; w=1] +#--------------------- +LoweringError: +a[i, j; w=1] +# └───┘ ── unexpected semicolon in array expression + diff --git a/JuliaLowering/test/assignments_ir.jl b/JuliaLowering/test/assignments_ir.jl index 9dd7bd2eb4cba..40c7b81b2e87d 100644 --- a/JuliaLowering/test/assignments_ir.jl +++ b/JuliaLowering/test/assignments_ir.jl @@ -161,70 +161,64 @@ a[i] = x # simple setindex! with begin a[begin] = x #--------------------- -1 TestMod.x -2 TestMod.a -3 TestMod.a -4 (call top.firstindex %₃) -5 (call top.setindex! %₂ %₁ %₄) -6 (return %₁) +1 TestMod.a +2 TestMod.x +3 (call top.firstindex %₁) +4 (call top.setindex! %₁ %₂ %₃) +5 (return %₂) ######################################## # simple setindex! with end a[end] = x #--------------------- -1 TestMod.x -2 TestMod.a -3 TestMod.a -4 (call top.lastindex %₃) -5 (call top.setindex! %₂ %₁ %₄) -6 (return %₁) +1 TestMod.a +2 TestMod.x +3 (call top.lastindex %₁) +4 (call top.setindex! %₁ %₂ %₃) +5 (return %₂) ######################################## # multidimensional setindex! with begin a[i, begin] = x #--------------------- -1 TestMod.x -2 TestMod.a +1 TestMod.a +2 TestMod.x 3 TestMod.i -4 TestMod.a -5 (call top.firstindex %₄ 2) -6 (call top.setindex! %₂ %₁ %₃ %₅) -7 (return %₁) +4 (call top.firstindex %₁ 2) +5 (call top.setindex! %₁ %₂ %₃ %₄) +6 (return %₂) ######################################## # multidimensional setindex! with end a[i, end] = x #--------------------- -1 TestMod.x -2 TestMod.a +1 TestMod.a +2 TestMod.x 3 TestMod.i -4 TestMod.a -5 (call top.lastindex %₄ 2) -6 (call top.setindex! %₂ %₁ %₃ %₅) -7 (return %₁) +4 (call top.lastindex %₁ 2) +5 (call top.setindex! %₁ %₂ %₃ %₄) +6 (return %₂) ######################################## # multidimensional setindex! with begin/end and splats a[is..., end, js..., begin] = x #--------------------- -1 TestMod.is -2 TestMod.a -3 (call top.length %₁) +1 TestMod.a +2 TestMod.is +3 (call top.length %₂) 4 (call top.+ 1 %₃) -5 (call top.lastindex %₂ %₄) +5 (call top.lastindex %₁ %₄) 6 TestMod.js -7 TestMod.a -8 (call top.length %₁) -9 (call top.length %₆) -10 (call top.+ 2 %₈ %₉) -11 (call top.firstindex %₇ %₁₀) -12 TestMod.x -13 TestMod.a -14 (call core.tuple %₁₃ %₁₂) -15 (call core.tuple %₅) -16 (call core.tuple %₁₁) -17 (call core._apply_iterate top.iterate top.setindex! %₁₄ %₁ %₁₅ %₆ %₁₆) -18 (return %₁₂) +7 (call top.length %₂) +8 (call top.length %₆) +9 (call top.+ 2 %₇ %₈) +10 (call top.firstindex %₁ %₉) +11 TestMod.x +12 (call core.tuple %₁ %₁₁) +13 (call core.tuple %₅) +14 (call core.tuple %₁₀) +15 (call core._apply_iterate top.iterate top.setindex! %₁₂ %₂ %₁₃ %₆ %₁₄) +16 (return %₁₁) ######################################## # setindex! with nontrivial array expression and begin/end @@ -238,16 +232,16 @@ f()[end] = x 6 (return %₃) ######################################## -# nested refs (fixme!) +# nested refs b[a[begin]] = x #--------------------- -1 TestMod.x -2 TestMod.b +1 TestMod.b +2 TestMod.x 3 TestMod.a -4 TestMod.begin +4 (call top.firstindex %₃) 5 (call top.getindex %₃ %₄) -6 (call top.setindex! %₂ %₁ %₅) -7 (return %₁) +6 (call top.setindex! %₁ %₂ %₅) +7 (return %₂) ######################################## # empty ref and setindex! diff --git a/JuliaLowering/test/functions.jl b/JuliaLowering/test/functions.jl index 5cd41093d0d4a..bd80aae675e4d 100644 --- a/JuliaLowering/test/functions.jl +++ b/JuliaLowering/test/functions.jl @@ -247,6 +247,14 @@ end @test JuliaLowering.include_string(test_mod, """ [1,2] .+ ([3,4] .< [5,6] .< [7,1]) """) == [2, 2] + + @test JuliaLowering.include_string(test_mod, """ + let + x = [0,0,0,0] + x[begin+1:end-1] .= [1,2] .+ [3,4] + x + end + """) == [0,4,6,0] end end diff --git a/JuliaLowering/test/functions_ir.jl b/JuliaLowering/test/functions_ir.jl index 362b71f7fa4f8..065d0b27d294f 100644 --- a/JuliaLowering/test/functions_ir.jl +++ b/JuliaLowering/test/functions_ir.jl @@ -1109,16 +1109,15 @@ x.prop .= y 6 (return %₅) ######################################## -# TODO: In-place broadcast update with ref on left hand side -x[i] .= y +# In-place broadcast update with ref on left hand side +x[i,end] .= y #--------------------- -LoweringError: -x[i] .= y -└──┘ ── Lowering TODO: Need to call partially-expand-ref - -Detailed provenance: -(ref x i) -└─ (ref x i) - └─ @ :1 - +1 TestMod.x +2 TestMod.i +3 (call top.lastindex %₁ 2) +4 (call top.dotview %₁ %₂ %₃) +5 TestMod.y +6 (call top.broadcasted top.identity %₅) +7 (call top.materialize! %₄ %₆) +8 (return %₇) From d4ab6f3022c7c80b93cc2c83ffe6416ac3424f33 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sun, 19 Jan 2025 21:38:58 +1000 Subject: [PATCH 0943/1109] Move some test cases from assignments -> arrays --- JuliaLowering/test/arrays.jl | 23 ++++++ JuliaLowering/test/arrays_ir.jl | 105 ++++++++++++++++++++++++++ JuliaLowering/test/assignments.jl | 23 ------ JuliaLowering/test/assignments_ir.jl | 107 --------------------------- 4 files changed, 128 insertions(+), 130 deletions(-) diff --git a/JuliaLowering/test/arrays.jl b/JuliaLowering/test/arrays.jl index 9699ad866bc52..f39e4a8002f24 100644 --- a/JuliaLowering/test/arrays.jl +++ b/JuliaLowering/test/arrays.jl @@ -80,6 +80,29 @@ end Int[1.0 ;;; 2.0 ;;; 3.0] """) ≅ [1 ;;; 2 ;;; 3] +# Lowering of ref to setindex +@test JuliaLowering.include_string(test_mod, """ +let + as = [0,0,0,0] + as[begin] = 1 + as[2] = 2 + as[end] = 4 + as +end +""") == [1, 2, 0, 4] + +@test JuliaLowering.include_string(test_mod, """ +let + as = zeros(Int, 2,3) + as[begin, end] = 1 + as[end, begin] = 2 + js = (2,) + as[js..., end] = 3 + as +end +""") == [0 0 1; + 2 0 3] + # getindex @test JuliaLowering.include_string(test_mod, """ let diff --git a/JuliaLowering/test/arrays_ir.jl b/JuliaLowering/test/arrays_ir.jl index ca31db9e8d845..c2fd227a08a38 100644 --- a/JuliaLowering/test/arrays_ir.jl +++ b/JuliaLowering/test/arrays_ir.jl @@ -348,3 +348,108 @@ LoweringError: a[i, j; w=1] # └───┘ ── unexpected semicolon in array expression +######################################## +# simple setindex! +a[i] = x +#--------------------- +1 TestMod.x +2 TestMod.a +3 TestMod.i +4 (call top.setindex! %₂ %₁ %₃) +5 (return %₁) + +######################################## +# simple setindex! with begin +a[begin] = x +#--------------------- +1 TestMod.a +2 TestMod.x +3 (call top.firstindex %₁) +4 (call top.setindex! %₁ %₂ %₃) +5 (return %₂) + +######################################## +# simple setindex! with end +a[end] = x +#--------------------- +1 TestMod.a +2 TestMod.x +3 (call top.lastindex %₁) +4 (call top.setindex! %₁ %₂ %₃) +5 (return %₂) + +######################################## +# multidimensional setindex! with begin +a[i, begin] = x +#--------------------- +1 TestMod.a +2 TestMod.x +3 TestMod.i +4 (call top.firstindex %₁ 2) +5 (call top.setindex! %₁ %₂ %₃ %₄) +6 (return %₂) + +######################################## +# multidimensional setindex! with end +a[i, end] = x +#--------------------- +1 TestMod.a +2 TestMod.x +3 TestMod.i +4 (call top.lastindex %₁ 2) +5 (call top.setindex! %₁ %₂ %₃ %₄) +6 (return %₂) + +######################################## +# multidimensional setindex! with begin/end and splats +a[is..., end, js..., begin] = x +#--------------------- +1 TestMod.a +2 TestMod.is +3 (call top.length %₂) +4 (call top.+ 1 %₃) +5 (call top.lastindex %₁ %₄) +6 TestMod.js +7 (call top.length %₂) +8 (call top.length %₆) +9 (call top.+ 2 %₇ %₈) +10 (call top.firstindex %₁ %₉) +11 TestMod.x +12 (call core.tuple %₁ %₁₁) +13 (call core.tuple %₅) +14 (call core.tuple %₁₀) +15 (call core._apply_iterate top.iterate top.setindex! %₁₂ %₂ %₁₃ %₆ %₁₄) +16 (return %₁₁) + +######################################## +# setindex! with nontrivial array expression and begin/end +f()[end] = x +#--------------------- +1 TestMod.f +2 (call %₁) +3 TestMod.x +4 (call top.lastindex %₂) +5 (call top.setindex! %₂ %₃ %₄) +6 (return %₃) + +######################################## +# nested refs +b[a[begin]] = x +#--------------------- +1 TestMod.b +2 TestMod.x +3 TestMod.a +4 (call top.firstindex %₃) +5 (call top.getindex %₃ %₄) +6 (call top.setindex! %₁ %₂ %₅) +7 (return %₂) + +######################################## +# empty ref and setindex! +a[] = rhs +#--------------------- +1 TestMod.rhs +2 TestMod.a +3 (call top.setindex! %₂ %₁) +4 (return %₁) + diff --git a/JuliaLowering/test/assignments.jl b/JuliaLowering/test/assignments.jl index 75728d94ae588..54ddcbab7257c 100644 --- a/JuliaLowering/test/assignments.jl +++ b/JuliaLowering/test/assignments.jl @@ -40,29 +40,6 @@ let end """) == (10,2) -# Lowering of ref -@test JuliaLowering.include_string(test_mod, """ -let - as = [0,0,0,0] - as[begin] = 1 - as[2] = 2 - as[end] = 4 - as -end -""") == [1, 2, 0, 4] - -@test JuliaLowering.include_string(test_mod, """ -let - as = zeros(Int, 2,3) - as[begin, end] = 1 - as[end, begin] = 2 - js = (2,) - as[js..., end] = 3 - as -end -""") == [0 0 1; - 2 0 3] - # Declarations @test JuliaLowering.include_string(test_mod, """ let diff --git a/JuliaLowering/test/assignments_ir.jl b/JuliaLowering/test/assignments_ir.jl index 40c7b81b2e87d..964d498c8057a 100644 --- a/JuliaLowering/test/assignments_ir.jl +++ b/JuliaLowering/test/assignments_ir.jl @@ -147,113 +147,6 @@ end 9 (= slot₁/X %₈) 10 (return %₈) -######################################## -# simple setindex! -a[i] = x -#--------------------- -1 TestMod.x -2 TestMod.a -3 TestMod.i -4 (call top.setindex! %₂ %₁ %₃) -5 (return %₁) - -######################################## -# simple setindex! with begin -a[begin] = x -#--------------------- -1 TestMod.a -2 TestMod.x -3 (call top.firstindex %₁) -4 (call top.setindex! %₁ %₂ %₃) -5 (return %₂) - -######################################## -# simple setindex! with end -a[end] = x -#--------------------- -1 TestMod.a -2 TestMod.x -3 (call top.lastindex %₁) -4 (call top.setindex! %₁ %₂ %₃) -5 (return %₂) - -######################################## -# multidimensional setindex! with begin -a[i, begin] = x -#--------------------- -1 TestMod.a -2 TestMod.x -3 TestMod.i -4 (call top.firstindex %₁ 2) -5 (call top.setindex! %₁ %₂ %₃ %₄) -6 (return %₂) - -######################################## -# multidimensional setindex! with end -a[i, end] = x -#--------------------- -1 TestMod.a -2 TestMod.x -3 TestMod.i -4 (call top.lastindex %₁ 2) -5 (call top.setindex! %₁ %₂ %₃ %₄) -6 (return %₂) - -######################################## -# multidimensional setindex! with begin/end and splats -a[is..., end, js..., begin] = x -#--------------------- -1 TestMod.a -2 TestMod.is -3 (call top.length %₂) -4 (call top.+ 1 %₃) -5 (call top.lastindex %₁ %₄) -6 TestMod.js -7 (call top.length %₂) -8 (call top.length %₆) -9 (call top.+ 2 %₇ %₈) -10 (call top.firstindex %₁ %₉) -11 TestMod.x -12 (call core.tuple %₁ %₁₁) -13 (call core.tuple %₅) -14 (call core.tuple %₁₀) -15 (call core._apply_iterate top.iterate top.setindex! %₁₂ %₂ %₁₃ %₆ %₁₄) -16 (return %₁₁) - -######################################## -# setindex! with nontrivial array expression and begin/end -f()[end] = x -#--------------------- -1 TestMod.f -2 (call %₁) -3 TestMod.x -4 (call top.lastindex %₂) -5 (call top.setindex! %₂ %₃ %₄) -6 (return %₃) - -######################################## -# nested refs -b[a[begin]] = x -#--------------------- -1 TestMod.b -2 TestMod.x -3 TestMod.a -4 (call top.firstindex %₃) -5 (call top.getindex %₃ %₄) -6 (call top.setindex! %₁ %₂ %₅) -7 (return %₂) - -######################################## -# empty ref and setindex! -let - a[] = rhs -end -#--------------------- -1 TestMod.rhs -2 TestMod.a -3 (call top.setindex! %₂ %₁) -4 (return %₁) - ######################################## # Error: Invalid lhs in `=` a.(b) = rhs From 96b4345729b9e171777a0144e2677e0e5d564763 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sun, 19 Jan 2025 22:36:26 +1000 Subject: [PATCH 0944/1109] Desugaring of `do` syntax --- JuliaLowering/src/ast.jl | 18 +++++++++++-- JuliaLowering/src/desugaring.jl | 39 +++++++++++++++------------- JuliaLowering/src/macro_expansion.jl | 6 +++++ JuliaLowering/test/closures.jl | 9 +++++++ JuliaLowering/test/closures_ir.jl | 38 +++++++++++++++++++++++++++ 5 files changed, 90 insertions(+), 20 deletions(-) diff --git a/JuliaLowering/src/ast.jl b/JuliaLowering/src/ast.jl index 24a6643f54cfc..794fc56c72c03 100644 --- a/JuliaLowering/src/ast.jl +++ b/JuliaLowering/src/ast.jl @@ -545,12 +545,26 @@ function is_function_def(ex) return k == K"function" || k == K"->" end +function find_parameters_ind(exs) + i = length(exs) + while i >= 1 + k = kind(exs[i]) + if k == K"parameters" + return i + elseif k != K"do" + break + end + i -= 1 + end + return 0 +end + function has_parameters(ex::SyntaxTree) - numchildren(ex) >= 1 && kind(ex[end]) == K"parameters" + find_parameters_ind(children(ex)) != 0 end function has_parameters(args::AbstractVector) - length(args) >= 1 && kind(args[end]) == K"parameters" + find_parameters_ind(args) != 0 end function any_assignment(exs) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 9e12395c2320f..99bf57199033d 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -72,18 +72,16 @@ function is_effect_free(ex) end function check_no_parameters(ex::SyntaxTree, msg) - if numchildren(ex) >= 1 - pars = ex[end] - if kind(pars) == K"parameters" - throw(LoweringError(pars, msg)) - end + i = find_parameters_ind(children(ex)) + if i > 0 + throw(LoweringError(ex[i], msg)) end end function check_no_assignment(exs) - assign_pos = findfirst(kind(e) == K"=" for e in exs) - if !isnothing(assign_pos) - throw(LoweringError(exs[assign_pos], "misplaced assignment statement in `[ ... ]`")) + i = findfirst(kind(e) == K"=" for e in exs) + if !isnothing(i) + throw(LoweringError(exs[i], "misplaced assignment statement in `[ ... ]`")) end end @@ -2068,11 +2066,11 @@ function expand_function_def(ctx, ex, docs, rewrite_call=identity, rewrite_body= ] end -function expand_arrow_arglist(ctx, arglist) +function expand_arrow_arglist(ctx, arglist, arrowname) k = kind(arglist) if k == K"where" @ast ctx arglist [K"where" - expand_arrow_arglist(ctx, arglist[1]) + expand_arrow_arglist(ctx, arglist[1], arrowname) argslist[2] ] else @@ -2092,12 +2090,22 @@ function expand_arrow_arglist(ctx, arglist) ] end @ast ctx arglist [K"call" - "->"::K"Placeholder" + arrowname::K"Placeholder" children(arglist)... ] end end +function expand_arrow(ctx, ex) + @chk numchildren(ex) == 2 + expand_forms_2(ctx, + @ast ctx ex [K"function" + expand_arrow_arglist(ctx, ex[1], string(kind(ex))) + ex[2] + ] + ) +end + #------------------------------------------------------------------------------- # Expand macro definitions @@ -3144,13 +3152,8 @@ function expand_forms_2(ctx::DesugaringContext, ex::SyntaxTree, docs=nothing) sig = expand_forms_2(ctx, ex[2], ex) elseif k == K"for" expand_forms_2(ctx, expand_for(ctx, ex)) - elseif k == K"->" - expand_forms_2(ctx, - @ast ctx ex [K"function" - expand_arrow_arglist(ctx, ex[1]) - ex[2] - ] - ) + elseif k == K"->" || k == K"do" + expand_forms_2(ctx, expand_arrow(ctx, ex)) elseif k == K"function" expand_forms_2(ctx, expand_function_def(ctx, ex, docs)) elseif k == K"macro" diff --git a/JuliaLowering/src/macro_expansion.jl b/JuliaLowering/src/macro_expansion.jl index 2fc846637492a..6a9fc9ce0d1d5 100644 --- a/JuliaLowering/src/macro_expansion.jl +++ b/JuliaLowering/src/macro_expansion.jl @@ -252,6 +252,12 @@ function expand_forms_1(ctx::MacroExpansionContext, ex::SyntaxTree) farg = ex[1] append!(args, ex[2:end]) end + if !isempty(args) + if kind(args[end]) == K"do" + # move do block into first argument location + pushfirst!(args, pop!(args)) + end + end if length(args) == 2 && is_same_identifier_like(farg, "^") && kind(args[2]) == K"Integer" # Do literal-pow expansion here as it's later used in both call and # dotcall expansion. diff --git a/JuliaLowering/test/closures.jl b/JuliaLowering/test/closures.jl index c26a691e54b1f..12c070f8f6b4f 100644 --- a/JuliaLowering/test/closures.jl +++ b/JuliaLowering/test/closures.jl @@ -98,6 +98,15 @@ begin end """) == 5 +# Do block syntax +@test JuliaLowering.include_string(test_mod, """ +begin + local y = 2 + call_it(3) do x + x + y + end +end +""") == 5 # Attempt to reference capture which is not assigned @test_throws UndefVarError(:x, :local) JuliaLowering.include_string(test_mod, """ diff --git a/JuliaLowering/test/closures_ir.jl b/JuliaLowering/test/closures_ir.jl index 9c8853cf05c93..1213efe469131 100644 --- a/JuliaLowering/test/closures_ir.jl +++ b/JuliaLowering/test/closures_ir.jl @@ -411,6 +411,44 @@ end 3 (return %₂) 9 (return %₃) +######################################## +# `do` blocks +f(x; a=1) do y + y + 2 +end +#--------------------- +1 TestMod.f +2 (call core.tuple :a) +3 (call core.apply_type core.NamedTuple %₂) +4 (call core.tuple 1) +5 (call %₃ %₄) +6 --- thunk + 1 (global TestMod.#do##0) + 2 (call core.svec) + 3 (call core.svec) + 4 (call core.svec) + 5 (call core._structtype TestMod :#do##0 %₂ %₃ %₄ false 0) + 6 (call core._setsuper! %₅ core.Function) + 7 (const TestMod.#do##0) + 8 (= TestMod.#do##0 %₅) + 9 (call core.svec) + 10 (call core._typebody! %₅ %₉) + 11 (return core.nothing) +7 TestMod.#do##0 +8 (call core.svec %₇ core.Any) +9 (call core.svec) +10 (call core.svec %₈ %₉ :($(QuoteNode(:(#= line 1 =#))))) +11 --- method core.nothing %₁₀ + slots: [slot₁/#self#(!read) slot₂/y] + 1 TestMod.+ + 2 (call %₁ slot₂/y 2) + 3 (return %₂) +12 TestMod.#do##0 +13 (new %₁₂) +14 TestMod.x +15 (call core.kwcall %₅ %₁ %₁₃ %₁₄) +16 (return %₁₅) + ######################################## # Error: Attempt to add methods to a function argument function f(g) From de554340cd3f6cf1286aeefaaae9d7910d7e1411 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sun, 19 Jan 2025 22:47:17 +1000 Subject: [PATCH 0945/1109] Add desugaring error for misplaced `@ atomic` --- JuliaLowering/src/desugaring.jl | 4 +++- JuliaLowering/test/misc_ir.jl | 12 ++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 99bf57199033d..170ba58accffc 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -3101,7 +3101,9 @@ expanded to a function call `getproperty(a, :b)`. """ function expand_forms_2(ctx::DesugaringContext, ex::SyntaxTree, docs=nothing) k = kind(ex) - if k == K"call" + if k == K"atomic" + throw(LoweringError(ex, "unimplemented or unsupported atomic declaration")) + elseif k == K"call" expand_call(ctx, ex) elseif k == K"dotcall" || ((k == K"&&" || k == K"||") && is_dotted(ex)) expand_forms_2(ctx, expand_fuse_broadcast(ctx, ex)) diff --git a/JuliaLowering/test/misc_ir.jl b/JuliaLowering/test/misc_ir.jl index 9c724da2642c1..27b631059fdfd 100644 --- a/JuliaLowering/test/misc_ir.jl +++ b/JuliaLowering/test/misc_ir.jl @@ -246,3 +246,15 @@ LoweringError: LoweringError: #= line 1 =# - expected `numchildren(ex) >= 2` +######################################## +# Error: @atomic in wrong position +let + @atomic x +end +#--------------------- +LoweringError: +let + @atomic x +# └───────┘ ── unimplemented or unsupported atomic declaration +end + From fe3a0f2980e6dc4ab797be79efd40b1a2a54c17b Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Mon, 20 Jan 2025 00:07:17 +1000 Subject: [PATCH 0946/1109] Lowering of `GC.@ preserve` --- JuliaLowering/src/eval.jl | 2 ++ JuliaLowering/src/kinds.jl | 3 +++ JuliaLowering/src/linear_ir.jl | 11 +++++++++-- JuliaLowering/src/runtime.jl | 23 +++++++++++++++++++++++ JuliaLowering/src/scope_analysis.jl | 2 -- JuliaLowering/test/demo.jl | 4 ++-- JuliaLowering/test/misc.jl | 9 +++++++++ JuliaLowering/test/misc_ir.jl | 29 +++++++++++++++++++++++++++++ JuliaLowering/test/runtests.jl | 4 +++- JuliaLowering/test/utils.jl | 6 ++++-- 10 files changed, 84 insertions(+), 9 deletions(-) diff --git a/JuliaLowering/src/eval.jl b/JuliaLowering/src/eval.jl index e2541150e40f9..70669586f30bb 100644 --- a/JuliaLowering/src/eval.jl +++ b/JuliaLowering/src/eval.jl @@ -279,6 +279,8 @@ function to_lowered_expr(mod, ex, ssa_offset=0) k == K"isdefined" ? :isdefined : k == K"pop_exception" ? :pop_exception : k == K"captured_local" ? :captured_local : + k == K"gc_preserve_begin" ? :gc_preserve_begin : + k == K"gc_preserve_end" ? :gc_preserve_end : nothing if isnothing(head) TODO(ex, "Unhandled form for kind $k") diff --git a/JuliaLowering/src/kinds.jl b/JuliaLowering/src/kinds.jl index c0e3976f1534d..60a9a7f0c4813 100644 --- a/JuliaLowering/src/kinds.jl +++ b/JuliaLowering/src/kinds.jl @@ -9,6 +9,9 @@ function _register_kinds() "BEGIN_EXTENSION_KINDS" # atomic fields or accesses (see `@atomic`) "atomic" + # Temporary rooting of identifiers (GC.@preserve) + "gc_preserve_begin" + "gc_preserve_end" # A literal Julia value of any kind, as might be inserted into the # AST during macro expansion "Value" diff --git a/JuliaLowering/src/linear_ir.jl b/JuliaLowering/src/linear_ir.jl index 0ee1a3bb8287a..c58d28e6a58f6 100644 --- a/JuliaLowering/src/linear_ir.jl +++ b/JuliaLowering/src/linear_ir.jl @@ -144,7 +144,7 @@ function is_valid_ir_rvalue(ctx, lhs, rhs) is_valid_ir_argument(ctx, rhs) || (kind(lhs) == K"BindingId" && # FIXME: add: splatnew isdefined invoke cfunction gc_preserve_begin copyast new_opaque_closure globalref - kind(rhs) in KSet"new call foreigncall") + kind(rhs) in KSet"new call foreigncall gc_preserve_begin") end # evaluate the arguments of a call, creating temporary locations as needed @@ -751,6 +751,13 @@ function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) else emit(ctx, lam) end + elseif k == K"gc_preserve_begin" + makenode(ctx, ex, k, compile_args(ctx, children(ex))) + elseif k == K"gc_preserve_end" + if needs_value + throw(LoweringError(ex, "misplaced label in value position")) + end + emit(ctx, ex) elseif k == K"_while" end_label = make_label(ctx, ex) top_label = emit_label(ctx, ex) @@ -1017,7 +1024,7 @@ function compile_lambda(outer_ctx, ex) @assert info.kind == :static_parameter slot_rewrites[id] = i end - # @info "" @ast ctx ex [K"block" ctx.code] + # @info "" @ast ctx ex [K"block" ctx.code...] code = renumber_body(ctx, ctx.code, slot_rewrites) @ast ctx ex [K"code_info"(is_toplevel_thunk=ex.is_toplevel_thunk, slots=slots) diff --git a/JuliaLowering/src/runtime.jl b/JuliaLowering/src/runtime.jl index 6a91d641773f0..3768ecd8ab821 100644 --- a/JuliaLowering/src/runtime.jl +++ b/JuliaLowering/src/runtime.jl @@ -301,6 +301,29 @@ function Base.var"@nospecialize"(__context__::MacroContext, ex) _apply_nospecialize(__context__, ex) end +function Base.GC.var"@preserve"(__context__::MacroContext, exs...) + idents = exs[1:end-1] + for e in idents + if kind(e) != K"Identifier" + throw(MacroExpansionError(e, "Preserved variable must be a symbol")) + end + end + @ast __context__ __context__.macrocall [K"block" + [K"=" + "s"::K"Identifier" + [K"gc_preserve_begin" + idents... + ] + ] + [K"=" + "r"::K"Identifier" + exs[end] + ] + [K"gc_preserve_end" "s"::K"Identifier"] + "r"::K"Identifier" + ] +end + function Base.var"@atomic"(__context__::MacroContext, ex) @chk kind(ex) == K"Identifier" || kind(ex) == K"::" (ex, "Expected identifier or declaration") @ast __context__ __context__.macrocall [K"atomic" ex] diff --git a/JuliaLowering/src/scope_analysis.jl b/JuliaLowering/src/scope_analysis.jl index 698df95f77809..a5f1cad838906 100644 --- a/JuliaLowering/src/scope_analysis.jl +++ b/JuliaLowering/src/scope_analysis.jl @@ -515,8 +515,6 @@ function _resolve_scopes(ctx, ex::SyntaxTree) end push!(stmts, locals_dict) makenode(ctx, ex, K"block", stmts) - else - throw(LoweringError(ex, "Unknown syntax extension")) end elseif k == K"assert" etype = extension_type(ex) diff --git a/JuliaLowering/test/demo.jl b/JuliaLowering/test/demo.jl index 9bf5b57e3731a..6553620b50f20 100644 --- a/JuliaLowering/test/demo.jl +++ b/JuliaLowering/test/demo.jl @@ -755,8 +755,8 @@ end # """ src = """ -function f_slotflags(x, y, f, z) - f() + x + y +GC.@preserve a b begin + f(a,b) end """ diff --git a/JuliaLowering/test/misc.jl b/JuliaLowering/test/misc.jl index 8a21d89defbc4..e27dc35353ff4 100644 --- a/JuliaLowering/test/misc.jl +++ b/JuliaLowering/test/misc.jl @@ -11,4 +11,13 @@ end # Placeholders @test JuliaLowering.include_string(test_mod, """_ = 10""") == 10 +# GC.@preserve +@test JuliaLowering.include_string(test_mod, """ +let x = [1,2] + GC.@preserve x begin + x + end +end +""") == [1,2] + end diff --git a/JuliaLowering/test/misc_ir.jl b/JuliaLowering/test/misc_ir.jl index 27b631059fdfd..498cbca2fc235 100644 --- a/JuliaLowering/test/misc_ir.jl +++ b/JuliaLowering/test/misc_ir.jl @@ -258,3 +258,32 @@ let # └───────┘ ── unimplemented or unsupported atomic declaration end +######################################## +# GC.@preserve support +GC.@preserve a b begin + f(a,b) +end +#--------------------- +1 TestMod.a +2 TestMod.b +3 (= slot₂/s (gc_preserve_begin %₁ %₂)) +4 TestMod.f +5 TestMod.a +6 TestMod.b +7 (= slot₁/r (call %₄ %₅ %₆)) +8 (gc_preserve_end slot₂/s) +9 slot₁/r +10 (return %₉) + +######################################## +# Error: GC.@preserve bad args +GC.@preserve a b g() begin + body +end +#--------------------- +MacroExpansionError while expanding (. GC @preserve) in module Main.TestMod: +GC.@preserve a b g() begin +# └─┘ ── Preserved variable must be a symbol + body +end + diff --git a/JuliaLowering/test/runtests.jl b/JuliaLowering/test/runtests.jl index 0c8d0a2608b36..4ecfb5453ab24 100644 --- a/JuliaLowering/test/runtests.jl +++ b/JuliaLowering/test/runtests.jl @@ -9,12 +9,14 @@ include("utils.jl") include("ir_tests.jl") include("arrays.jl") + include("assignments.jl") include("branching.jl") + include("closures.jl") include("decls.jl") + include("destructuring.jl") include("desugaring.jl") include("exceptions.jl") include("functions.jl") - include("closures.jl") include("import.jl") include("loops.jl") include("macros.jl") diff --git a/JuliaLowering/test/utils.jl b/JuliaLowering/test/utils.jl index 25d8aad4d4ed0..bdb3940a7e7ee 100644 --- a/JuliaLowering/test/utils.jl +++ b/JuliaLowering/test/utils.jl @@ -17,7 +17,7 @@ using JuliaLowering: Kind, SourceRef, SyntaxTree, NodeId, makenode, makeleaf, setattr!, sethead!, is_leaf, numchildren, children, - @ast, flattened_provenance, showprov, LoweringError, + @ast, flattened_provenance, showprov, LoweringError, MacroExpansionError, syntax_graph, Bindings, ScopeLayer function _ast_test_graph() @@ -155,7 +155,7 @@ end function format_ir_for_test(mod, description, input, expect_error=false, is_todo=false) ex = parsestmt(SyntaxTree, input) try - if kind(ex) == K"macrocall" && ex[1].name_val == "@ast_" + if kind(ex) == K"macrocall" && kind(ex[1]) == K"MacroName" && ex[1].name_val == "@ast_" # Total hack, until @ast_ can be implemented in terms of new-style # macros. ex = JuliaLowering.eval(mod, Expr(ex)) @@ -171,6 +171,8 @@ function format_ir_for_test(mod, description, input, expect_error=false, is_todo rethrow() elseif expect_error && (exc isa LoweringError) return sprint(io->Base.showerror(io, exc, show_detail=false)) + elseif expect_error && (exc isa MacroExpansionError) + return sprint(io->Base.showerror(io, exc)) elseif is_todo return sprint(io->Base.showerror(io, exc)) else From 71f6cf6d55c9ec8dbc173e203fdb498f5b72f5a7 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Tue, 21 Jan 2025 16:44:40 +1000 Subject: [PATCH 0947/1109] Desugaring of `ccall()` syntax --- JuliaLowering/src/desugaring.jl | 163 ++++++++++++++++++++++++++++-- JuliaLowering/src/eval.jl | 5 +- JuliaLowering/src/linear_ir.jl | 45 ++++++++- JuliaLowering/test/ccall_demo.jl | 1 + JuliaLowering/test/demo.jl | 4 +- JuliaLowering/test/misc.jl | 3 + JuliaLowering/test/misc_ir.jl | 164 +++++++++++++++++++++++++++++++ 7 files changed, 366 insertions(+), 19 deletions(-) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 170ba58accffc..c33bdd6f3412b 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -592,38 +592,38 @@ end # `idxs` - list of indices # returns the expanded indices. Any statements that need to execute first are # added to ctx.stmts. -function process_indices(ctx::StatementListCtx, arr, idxs) +function process_indices(sctx::StatementListCtx, arr, idxs) has_splats = any(kind(i) == K"..." for i in idxs) - idxs_out = SyntaxList(ctx) - splats = SyntaxList(ctx) + idxs_out = SyntaxList(sctx) + splats = SyntaxList(sctx) for (n, idx0) in enumerate(idxs) is_splat = kind(idx0) == K"..." - val = replace_beginend(ctx, is_splat ? idx0[1] : idx0, + val = replace_beginend(sctx, is_splat ? idx0[1] : idx0, arr, n, splats, n == length(idxs)) # TODO: kwarg? - idx = !has_splats || is_simple_atom(ctx, val) ? val : emit_assign_tmp(ctx, val) + idx = !has_splats || is_simple_atom(sctx, val) ? val : emit_assign_tmp(sctx, val) if is_splat push!(splats, idx) end - push!(idxs_out, is_splat ? @ast(ctx, idx0, [K"..." idx]) : idx) + push!(idxs_out, is_splat ? @ast(sctx, idx0, [K"..." idx]) : idx) end return idxs_out end -# Expand things like `f()[i,end]`, add to `ctx.stmts` (temporaries for +# Expand things like `f()[i,end]`, add to `sctx.stmts` (temporaries for # computing indices) and return # * `arr` - The array (may be a temporary ssa value) # * `idxs` - List of indices -function expand_ref_components(ctx::StatementListCtx, ex) +function expand_ref_components(sctx::StatementListCtx, ex) check_no_parameters(ex, "unexpected semicolon in array expression") @assert kind(ex) == K"ref" @chk numchildren(ex) >= 1 arr = ex[1] idxs = ex[2:end] if any(contains_identifier(e, "begin", "end") for e in idxs) - arr = emit_assign_tmp(ctx, arr) + arr = emit_assign_tmp(sctx, arr) end - new_idxs = process_indices(ctx, arr, idxs) + new_idxs = process_indices(sctx, arr, idxs) return (arr, new_idxs) end @@ -1283,6 +1283,146 @@ function expand_kw_call(ctx, srcref, farg, args, kws) ] end +function expand_ccall(ctx, ex) + @assert kind(ex) == K"call" && is_same_identifier_like(ex[1], "ccall") + if numchildren(ex) < 4 + throw(LoweringError(ex, "too few arguments to ccall")) + end + cfunc_name = ex[2] + # Detect calling convention if present. + # + # Note `@ccall` also emits `Expr(:cconv, convention, nreq)`, but this is a + # somewhat undocumented performance workaround. Instead we should just make + # sure @ccall can emit foreigncall directly and efficiently. + known_conventions = ("cdecl", "stdcall", "fastcall", "thiscall", "llvmcall") + cconv = if any(is_same_identifier_like(ex[3], id) for id in known_conventions) + ex[3] + end + if isnothing(cconv) + rt_idx = 3 + else + rt_idx = 4 + if numchildren(ex) < 5 + throw(LoweringError(ex, "too few arguments to ccall with calling convention specified")) + end + end + return_type = ex[rt_idx] + arg_type_tuple = ex[rt_idx+1] + args = ex[rt_idx+2:end] + if kind(arg_type_tuple) != K"tuple" + msg = "ccall argument types must be a tuple; try `(T,)`" + if kind(return_type) == K"tuple" + throw(LoweringError(return_type, msg*" and check if you specified a correct return type")) + else + throw(LoweringError(arg_type_tuple, msg)) + end + end + arg_types = children(arg_type_tuple) + vararg_type = nothing + if length(arg_types) >= 1 + va = arg_types[end] + if kind(va) == K"..." + @chk numchildren(va) == 1 + # Ok: vararg function + vararg_type = va + end + end + # todo: use multi-range errors here + if length(args) < length(arg_types) + throw(LoweringError(ex, "Too few arguments in ccall compared to argument types")) + elseif length(args) > length(arg_types) && isnothing(vararg_type) + throw(LoweringError(ex, "More arguments than types in ccall")) + end + if isnothing(vararg_type) + num_required_args = 0 + else + num_required_args = length(arg_types) - 1 + if num_required_args < 1 + throw(LoweringError(vararg_type, "C ABI prohibits vararg without one required argument")) + end + end + sctx = with_stmts(ctx) + expanded_types = SyntaxList(ctx) + for (i, argt) in enumerate(arg_types) + if kind(argt) == K"..." + if i == length(arg_types) + argt = argt[1] + else + throw(LoweringError(argt, "only the trailing ccall argument type should have `...`")) + end + end + if is_same_identifier_like(argt, "Any") + # Special rule: Any becomes core.Any regardless of the module + # scope, and don't need GC roots. + argt = @ast ctx argt "Any"::K"core" + end + push!(expanded_types, expand_forms_2(ctx, argt)) + end + # + # An improvement might be wrap the use of types in cconvert in a special + # K"global_scope" expression which modifies the scope resolution. This + # would at least make the rules self consistent if not pretty. + # + # One small improvement we make here is to emit temporaries for all the + # types used during expansion so at least we don't have their side effects + # more than once. + types_for_conv = SyntaxList(ctx) + for argt in expanded_types + push!(types_for_conv, emit_assign_tmp(sctx, argt)) + end + gc_roots = SyntaxList(ctx) + unsafe_args = SyntaxList(ctx) + for (i,arg) in enumerate(args) + if i > length(expanded_types) + raw_argt = expanded_types[end] + push!(expanded_types, raw_argt) + argt = types_for_conv[end] + else + raw_argt = expanded_types[i] + argt = types_for_conv[i] + end + exarg = expand_forms_2(ctx, arg) + if kind(raw_argt) == K"core" && raw_argt.name_val == "Any" + push!(unsafe_args, exarg) + else + cconverted_arg = emit_assign_tmp(sctx, + @ast ctx argt [K"call" + "cconvert"::K"top" + argt + exarg + ] + ) + push!(gc_roots, cconverted_arg) + push!(unsafe_args, + @ast ctx argt [K"call" + "unsafe_convert"::K"top" + argt + cconverted_arg + ] + ) + end + end + @ast ctx ex [K"block" + sctx.stmts... + [K"foreigncall" + expand_forms_2(ctx, cfunc_name) + expand_forms_2(ctx, return_type) + [K"call" + "svec"::K"core" + expanded_types... + ] + num_required_args::K"Integer" + if isnothing(cconv) + "ccall"::K"Symbol" + else + cconv=>K"Symbol" + end + unsafe_args... + gc_roots... # GC roots + ] + ] +end + # Wrap unsplatted arguments in `tuple`: # `[a, b, xs..., c]` -> `[(a, b), xs, (c,)]` function _wrap_unsplatted_args(ctx, call_ex, args) @@ -1343,6 +1483,9 @@ end function expand_call(ctx, ex) farg = ex[1] + if is_same_identifier_like(farg, "ccall") + return expand_ccall(ctx, ex) + end args = copy(ex[2:end]) kws = remove_kw_args!(ctx, args) if !isnothing(kws) diff --git a/JuliaLowering/src/eval.jl b/JuliaLowering/src/eval.jl index 70669586f30bb..00c0340da1811 100644 --- a/JuliaLowering/src/eval.jl +++ b/JuliaLowering/src/eval.jl @@ -277,10 +277,11 @@ function to_lowered_expr(mod, ex, ssa_offset=0) k == K"const" ? :const : k == K"leave" ? :leave : k == K"isdefined" ? :isdefined : - k == K"pop_exception" ? :pop_exception : - k == K"captured_local" ? :captured_local : + k == K"pop_exception" ? :pop_exception : + k == K"captured_local" ? :captured_local : k == K"gc_preserve_begin" ? :gc_preserve_begin : k == K"gc_preserve_end" ? :gc_preserve_end : + k == K"foreigncall" ? :foreigncall : nothing if isnothing(head) TODO(ex, "Unhandled form for kind $k") diff --git a/JuliaLowering/src/linear_ir.jl b/JuliaLowering/src/linear_ir.jl index c58d28e6a58f6..ef44bc6e6540a 100644 --- a/JuliaLowering/src/linear_ir.jl +++ b/JuliaLowering/src/linear_ir.jl @@ -144,7 +144,13 @@ function is_valid_ir_rvalue(ctx, lhs, rhs) is_valid_ir_argument(ctx, rhs) || (kind(lhs) == K"BindingId" && # FIXME: add: splatnew isdefined invoke cfunction gc_preserve_begin copyast new_opaque_closure globalref - kind(rhs) in KSet"new call foreigncall gc_preserve_begin") + kind(rhs) in KSet"new call foreigncall gc_preserve_begin foreigncall") +end + +function contains_nonglobal_binding(ctx, ex) + contains_unquoted(ex) do e + kind(e) == K"BindingId" && lookup_binding(ctx, e).kind !== :global + end end # evaluate the arguments of a call, creating temporary locations as needed @@ -573,9 +579,40 @@ function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) end nothing end - elseif k == K"call" || k == K"new" || k == K"splatnew" - # TODO k ∈ foreigncall cfunction new_opaque_closure cglobal - args = compile_args(ctx, children(ex)) + elseif k == K"call" || k == K"new" || k == K"splatnew" || k == K"foreigncall" + # TODO k ∈ cfunction new_opaque_closure cglobal + args = if k == K"foreigncall" + args_ = SyntaxList(ctx) + # todo: is is_leaf correct here? flisp uses `atom?` + func = ex[1] + if kind(func) == K"call" && kind(func[1]) == K"core" && func[1].name_val == "tuple" + # Tuples like core.tuple(:funcname, mylib_name) are allowed, + # but may only reference globals. + if contains_nonglobal_binding(ctx, func) + throw(LoweringError(func, "ccall function name and library expression cannot reference local variables")) + end + append!(args_, compile_args(ctx, ex[1:1])) + elseif is_leaf(func) + append!(args_, compile_args(ctx, ex[1:1])) + else + push!(args_, func) + end + # 2nd to 5th arguments of foreigncall are special. They must be + # left in place but cannot reference locals. + if contains_nonglobal_binding(ctx, ex[2]) + throw(LoweringError(ex[2], "ccall return type cannot reference local variables")) + end + for argt in children(ex[3]) + if contains_nonglobal_binding(ctx, argt) + throw(LoweringError(argt, "ccall argument types cannot reference local variables")) + end + end + append!(args_, ex[2:5]) + append!(args_, compile_args(ctx, ex[6:end])) + args_ + else + compile_args(ctx, children(ex)) + end callex = makenode(ctx, ex, k, args) if in_tail_pos emit_return(ctx, ex, callex) diff --git a/JuliaLowering/test/ccall_demo.jl b/JuliaLowering/test/ccall_demo.jl index 266b161178587..0d7b784c377eb 100644 --- a/JuliaLowering/test/ccall_demo.jl +++ b/JuliaLowering/test/ccall_demo.jl @@ -110,6 +110,7 @@ function ccall_macro_lower(ex, convention, func, rettype, types, args, num_varar func rettype ast":(Core.svec($(types...)))" + # Is this num_varargs correct? It seems wrong? num_varargs::K"Integer" convention::K"Symbol" cargs... diff --git a/JuliaLowering/test/demo.jl b/JuliaLowering/test/demo.jl index 6553620b50f20..1402b67e7ba7b 100644 --- a/JuliaLowering/test/demo.jl +++ b/JuliaLowering/test/demo.jl @@ -755,9 +755,7 @@ end # """ src = """ -GC.@preserve a b begin - f(a,b) -end +ccall(:printf, Cint, (Cstring, Cstring...), "%s = %s\n", "2 + 2", "5") """ ex = parsestmt(SyntaxTree, src, filename="foo.jl") diff --git a/JuliaLowering/test/misc.jl b/JuliaLowering/test/misc.jl index e27dc35353ff4..61288f23eb8ff 100644 --- a/JuliaLowering/test/misc.jl +++ b/JuliaLowering/test/misc.jl @@ -20,4 +20,7 @@ let x = [1,2] end """) == [1,2] +# ccall +@test ccall(:strlen, Csize_t, (Cstring,), "asdfg") == 5 + end diff --git a/JuliaLowering/test/misc_ir.jl b/JuliaLowering/test/misc_ir.jl index 498cbca2fc235..62a7785366410 100644 --- a/JuliaLowering/test/misc_ir.jl +++ b/JuliaLowering/test/misc_ir.jl @@ -287,3 +287,167 @@ GC.@preserve a b g() begin body end +######################################## +# basic ccall +ccall(:strlen, Csize_t, (Cstring,), "asdfg") +#--------------------- +1 TestMod.Cstring +2 (call top.cconvert %₁ "asdfg") +3 (call top.unsafe_convert %₁ %₂) +4 (foreigncall :strlen TestMod.Csize_t (call core.svec TestMod.Cstring) 0 :ccall %₃ %₂) +5 (return %₄) + +######################################## +# ccall with library name as a global var +ccall((:strlen, libc), Csize_t, (Cstring,), "asdfg") +#--------------------- +1 TestMod.Cstring +2 (call top.cconvert %₁ "asdfg") +3 TestMod.libc +4 (call core.tuple :strlen %₃) +5 (call top.unsafe_convert %₁ %₂) +6 (foreigncall %₄ TestMod.Csize_t (call core.svec TestMod.Cstring) 0 :ccall %₅ %₂) +7 (return %₆) + +######################################## +# ccall with a calling convention +ccall(:foo, stdcall, Csize_t, ()) +#--------------------- +1 (foreigncall :foo TestMod.Csize_t (call core.svec) 0 :stdcall) +2 (return %₁) + +######################################## +# ccall with Any args become core.Any and don't need conversion or GC roots +ccall(:foo, stdcall, Csize_t, (Any,), x) +#--------------------- +1 core.Any +2 TestMod.x +3 (foreigncall :foo TestMod.Csize_t (call core.svec core.Any) 0 :stdcall %₂) +4 (return %₃) + +######################################## +# ccall with variable as function name (must eval to a pointer) +ccall(ptr, Csize_t, (Cstring,), "asdfg") +#--------------------- +1 TestMod.Cstring +2 (call top.cconvert %₁ "asdfg") +3 TestMod.ptr +4 (call top.unsafe_convert %₁ %₂) +5 (foreigncall %₃ TestMod.Csize_t (call core.svec TestMod.Cstring) 0 :ccall %₄ %₂) +6 (return %₅) + +######################################## +# ccall with varargs +ccall(:printf, Cint, (Cstring, Cstring...), "%s = %s\n", "2 + 2", "5") +#--------------------- +1 TestMod.Cstring +2 TestMod.Cstring +3 (call top.cconvert %₁ "%s = %s\n") +4 (call top.cconvert %₂ "2 + 2") +5 (call top.cconvert %₂ "5") +6 (call top.unsafe_convert %₁ %₃) +7 (call top.unsafe_convert %₂ %₄) +8 (call top.unsafe_convert %₂ %₅) +9 (foreigncall :printf TestMod.Cint (call core.svec TestMod.Cstring TestMod.Cstring TestMod.Cstring) 1 :ccall %₆ %₇ %₈ %₃ %₄ %₅) +10 (return %₉) + +######################################## +# Error: ccall with too few arguments +ccall(:foo, Csize_t) +#--------------------- +LoweringError: +ccall(:foo, Csize_t) +└──────────────────┘ ── too few arguments to ccall + +######################################## +# Error: ccall with calling conv and too few arguments +ccall(:foo, thiscall, Csize_t) +#--------------------- +LoweringError: +ccall(:foo, thiscall, Csize_t) +└────────────────────────────┘ ── too few arguments to ccall with calling convention specified + +######################################## +# Error: ccall without tuple for argument types +ccall(:foo, Csize_t, Cstring) +#--------------------- +LoweringError: +ccall(:foo, Csize_t, Cstring) +# └─────┘ ── ccall argument types must be a tuple; try `(T,)` + +######################################## +# Error: ccall without tuple for argument types +ccall(:foo, (Csize_t,), "arg") +#--------------------- +LoweringError: +ccall(:foo, (Csize_t,), "arg") +# └────────┘ ── ccall argument types must be a tuple; try `(T,)` and check if you specified a correct return type + +######################################## +# Error: ccall with library name which is a local variable +let libc = "libc" + ccall((:strlen, libc), Csize_t, (Cstring,), "asdfg") +end +#--------------------- +LoweringError: +let libc = "libc" + ccall((:strlen, libc), Csize_t, (Cstring,), "asdfg") +# └─────────────┘ ── ccall function name and library expression cannot reference local variables +end + +######################################## +# Error: ccall with return type which is a local variable +let Csize_t = 1 + ccall(:strlen, Csize_t, (Cstring,), "asdfg") +end +#--------------------- +LoweringError: +let Csize_t = 1 + ccall(:strlen, Csize_t, (Cstring,), "asdfg") +# └─────┘ ── ccall return type cannot reference local variables +end + +######################################## +# Error: ccall with argument type which is a local variable +let Cstring = 1 + ccall(:strlen, Csize_t, (Cstring,), "asdfg") +end +#--------------------- +LoweringError: +let Cstring = 1 + ccall(:strlen, Csize_t, (Cstring,), "asdfg") +# └─────┘ ── ccall argument types cannot reference local variables +end + +######################################## +# Error: ccall with too few arguments +ccall(:strlen, Csize_t, (Cstring,)) +#--------------------- +LoweringError: +ccall(:strlen, Csize_t, (Cstring,)) +└─────────────────────────────────┘ ── Too few arguments in ccall compared to argument types + +######################################## +# Error: ccall with too many arguments +ccall(:strlen, Csize_t, (Cstring,), "asdfg", "blah") +#--------------------- +LoweringError: +ccall(:strlen, Csize_t, (Cstring,), "asdfg", "blah") +└──────────────────────────────────────────────────┘ ── More arguments than types in ccall + +######################################## +# Error: ccall varargs with too few args +ccall(:foo, Csize_t, (Cstring...,), "asdfg") +#--------------------- +LoweringError: +ccall(:foo, Csize_t, (Cstring...,), "asdfg") +# └────────┘ ── C ABI prohibits vararg without one required argument + +######################################## +# Error: ccall with multiple varargs +ccall(:foo, Csize_t, (Cstring..., Cstring...), "asdfg", "blah") +#--------------------- +LoweringError: +ccall(:foo, Csize_t, (Cstring..., Cstring...), "asdfg", "blah") +# └────────┘ ── only the trailing ccall argument type should have `...` + From 40514a9bae3537daa6c8481c8aa61d6722f68a45 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Wed, 22 Jan 2025 14:39:54 +1000 Subject: [PATCH 0948/1109] Desugaring of updating assignment ops as in `x += y` Depends on https://github.com/JuliaLang/JuliaSyntax.jl/pull/530 --- JuliaLowering/src/desugaring.jl | 67 ++++++++++++++ JuliaLowering/test/assignments.jl | 35 ++++++++ JuliaLowering/test/assignments_ir.jl | 125 +++++++++++++++++++++++++++ 3 files changed, 227 insertions(+) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index c33bdd6f3412b..abc87ccf1726a 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -48,6 +48,12 @@ function contains_identifier(ex::SyntaxTree, idents...) end end +function contains_ssa_binding(ctx, ex) + contains_unquoted(ex) do e + kind(e) == K"BindingId" && lookup_binding(ctx, e).is_ssa + end +end + # Return true if `f(e)` is true for any unquoted child of `ex`, recursively. function contains_unquoted(f::Function, ex::SyntaxTree) if f(ex) @@ -1076,6 +1082,65 @@ function expand_assignment(ctx, ex) end end +function expand_update_operator(ctx, ex) + k = kind(ex) + dotted = is_dotted(ex) + + @chk numchildren(ex) == 3 + lhs = ex[1] + op = ex[2] + rhs = ex[3] + + stmts = SyntaxList(ctx) + + declT = nothing + if kind(lhs) == K"::" + # eg `a[i]::T += 1` + declT = lhs[2] + decl_lhs = lhs + lhs = lhs[1] + end + + if kind(lhs) == K"ref" + # eg `a[end] = rhs` + sctx = with_stmts(ctx, stmts) + (arr, idxs) = expand_ref_components(sctx, lhs) + lhs = @ast ctx lhs [K"ref" arr idxs...] + end + + lhs = remove_argument_side_effects(ctx, stmts, lhs) + + if dotted + if !(kind(lhs) == K"ref" || (kind(lhs) == K"." && numchildren(lhs) == 2)) + # `f() .+= rhs` + lhs = emit_assign_tmp(stmts, ctx, lhs) + end + else + if kind(lhs) == K"tuple" && contains_ssa_binding(ctx, lhs) + # If remove_argument_side_effects needed to replace an expression + # with an ssavalue, then it can't be updated by assignment + # (JuliaLang/julia#30062) + throw(LoweringError(lhs, "invalid multiple assignment location")) + end + end + + @ast ctx ex [K"block" + stmts... + [K"="(syntax_flags=(dotted ? JuliaSyntax.DOTOP_FLAG : nothing)) + lhs + [(dotted ? K"dotcall" : K"call") + op + if isnothing(declT) + lhs + else + [K"::"(decl_lhs) lhs declT] + end + rhs + ] + ] + ] +end + #------------------------------------------------------------------------------- # Expand logical conditional statements @@ -3279,6 +3344,8 @@ function expand_forms_2(ctx::DesugaringContext, ex::SyntaxTree, docs=nothing) expand_forms_2(ctx, ex[1]) expand_forms_2(ctx, ex[2]) ] + elseif k == K"op=" + expand_forms_2(ctx, expand_update_operator(ctx, ex)) elseif k == K"=" if is_dotted(ex) expand_forms_2(ctx, expand_fuse_broadcast(ctx, ex)) diff --git a/JuliaLowering/test/assignments.jl b/JuliaLowering/test/assignments.jl index 54ddcbab7257c..0f7fa488c18d9 100644 --- a/JuliaLowering/test/assignments.jl +++ b/JuliaLowering/test/assignments.jl @@ -49,5 +49,40 @@ let end """) === 10 +# Updating assignments +@test JuliaLowering.include_string(test_mod, """ +let x = "hi" + x *= " ho" + x +end +""") == "hi ho" + +@test JuliaLowering.include_string(test_mod, """ +let x = [1,3] + x .-= [0,1] + x +end +""") == [1,2] + +@test JuliaLowering.include_string(test_mod, """ +let x = [1 2; 3 4] + x[begin, 1:end] .-= 1 + x +end +""") == [0 1 ; 3 4] + +# Test that side effects of computing indices in left hand side only occur +# once. +@test JuliaLowering.include_string(test_mod, """ +let + x = [1, 2] + n_calls = 0 + the_index() = (n_calls = n_calls + 1; 1) + x[the_index()] += 1 + x[the_index()]::Int += 1 + x[the_index():end] .+= 1 + n_calls +end +""") == 3 end diff --git a/JuliaLowering/test/assignments_ir.jl b/JuliaLowering/test/assignments_ir.jl index 964d498c8057a..0959f819d3169 100644 --- a/JuliaLowering/test/assignments_ir.jl +++ b/JuliaLowering/test/assignments_ir.jl @@ -219,3 +219,128 @@ LoweringError: 1 = rhs ╙ ── invalid assignment location +######################################## +# Basic updating assignment +begin + local x + x += y +end +#--------------------- +1 TestMod.+ +2 slot₁/x +3 TestMod.y +4 (call %₁ %₂ %₃) +5 (= slot₁/x %₄) +6 (return %₄) + +######################################## +# Broadcasted updating assignment +begin + local x + x .+= y +end +#--------------------- +1 (newvar slot₁/x) +2 slot₁/x +3 TestMod.+ +4 TestMod.y +5 (call top.broadcasted %₃ %₂ %₄) +6 (call top.materialize! %₂ %₅) +7 (return %₆) + +######################################## +# Broadcasted updating assignment with general left hand side permitted +f() .+= y +#--------------------- +1 TestMod.f +2 (call %₁) +3 TestMod.+ +4 TestMod.y +5 (call top.broadcasted %₃ %₂ %₄) +6 (call top.materialize! %₂ %₅) +7 (return %₆) + +######################################## +# Updating assignment with basic ref as left hand side +x[i] += y +#--------------------- +1 TestMod.+ +2 TestMod.x +3 TestMod.i +4 (call top.getindex %₂ %₃) +5 TestMod.y +6 (call %₁ %₄ %₅) +7 TestMod.x +8 TestMod.i +9 (call top.setindex! %₇ %₆ %₈) +10 (return %₆) + +######################################## +# Updating assignment with complex ref as left hand side +g()[f(), end] += y +#--------------------- +1 TestMod.g +2 (call %₁) +3 TestMod.f +4 (call %₃) +5 (call top.lastindex %₂ 2) +6 TestMod.+ +7 (call top.getindex %₂ %₄ %₅) +8 TestMod.y +9 (call %₆ %₇ %₈) +10 (call top.setindex! %₂ %₉ %₄ %₅) +11 (return %₉) + +######################################## +# Updating assignment with type assert on left hand side +begin + local x + x::T += y +end +#--------------------- +1 TestMod.+ +2 slot₁/x +3 TestMod.T +4 (call core.typeassert %₂ %₃) +5 TestMod.y +6 (call %₁ %₄ %₅) +7 (= slot₁/x %₆) +8 (return %₆) + +######################################## +# Updating assignment with ref and type assert on left hand side +begin + local x + x[f()]::T += y +end +#--------------------- +1 (newvar slot₁/x) +2 TestMod.f +3 (call %₂) +4 TestMod.+ +5 slot₁/x +6 (call top.getindex %₅ %₃) +7 TestMod.T +8 (call core.typeassert %₆ %₇) +9 TestMod.y +10 (call %₄ %₈ %₉) +11 slot₁/x +12 (call top.setindex! %₁₁ %₁₀ %₃) +13 (return %₁₀) + +######################################## +# Error: Updating assignment with invalid left hand side +f() += y +#--------------------- +LoweringError: +f() += y +└─┘ ── invalid assignment location + +######################################## +# Error: Updating assignment with invalid tuple destructuring on left hand side +(if false end, b) += 2 +#--------------------- +LoweringError: +(if false end, b) += 2 +└───────────────┘ ── invalid multiple assignment location + From b6f5904025a2e27bc9509000243fe2bae68a6aa3 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Wed, 22 Jan 2025 14:45:25 +1000 Subject: [PATCH 0949/1109] Desugaring of juxtaposition --- JuliaLowering/src/macro_expansion.jl | 8 ++++++++ JuliaLowering/test/misc.jl | 10 +++++++++- JuliaLowering/test/misc_ir.jl | 9 +++++++++ 3 files changed, 26 insertions(+), 1 deletion(-) diff --git a/JuliaLowering/src/macro_expansion.jl b/JuliaLowering/src/macro_expansion.jl index 6a9fc9ce0d1d5..6e0d98035794e 100644 --- a/JuliaLowering/src/macro_expansion.jl +++ b/JuliaLowering/src/macro_expansion.jl @@ -211,6 +211,14 @@ function expand_forms_1(ctx::MacroExpansionContext, ex::SyntaxTree) # Strip "container" nodes @chk numchildren(ex) == 1 expand_forms_1(ctx, ex[1]) + elseif k == K"juxtapose" + layerid = get(ex, :scope_layer, ctx.current_layer.id) + @chk numchildren(ex) == 2 + @ast ctx ex [K"call" + "*"::K"Identifier"(scope_layer=layerid) + expand_forms_1(ctx, ex[1]) + expand_forms_1(ctx, ex[2]) + ] elseif k == K"quote" @chk numchildren(ex) == 1 # TODO: Upstream should set a general flag for detecting parenthesized diff --git a/JuliaLowering/test/misc.jl b/JuliaLowering/test/misc.jl index 61288f23eb8ff..b1e894263a04b 100644 --- a/JuliaLowering/test/misc.jl +++ b/JuliaLowering/test/misc.jl @@ -20,7 +20,15 @@ let x = [1,2] end """) == [1,2] +@test JuliaLowering.include_string(test_mod, """ +let x=11 + 20x +end +""") == 220 + # ccall -@test ccall(:strlen, Csize_t, (Cstring,), "asdfg") == 5 +@test JuliaLowering.include_string(test_mod, """ +ccall(:strlen, Csize_t, (Cstring,), "asdfg") +""") == 5 end diff --git a/JuliaLowering/test/misc_ir.jl b/JuliaLowering/test/misc_ir.jl index 62a7785366410..fad1ff8f44108 100644 --- a/JuliaLowering/test/misc_ir.jl +++ b/JuliaLowering/test/misc_ir.jl @@ -287,6 +287,15 @@ GC.@preserve a b g() begin body end +######################################## +# Juxtaposition +20x +#--------------------- +1 TestMod.* +2 TestMod.x +3 (call %₁ 20 %₂) +4 (return %₃) + ######################################## # basic ccall ccall(:strlen, Csize_t, (Cstring,), "asdfg") From 5d6df34d8627edfb48a2d062d3ca377b3bb444a0 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Wed, 22 Jan 2025 14:45:25 +1000 Subject: [PATCH 0950/1109] Additional test for macro scope of * in juxtaposition --- JuliaLowering/test/misc_ir.jl | 23 +++++++++++++++++++++++ JuliaLowering/test/utils.jl | 2 +- 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/JuliaLowering/test/misc_ir.jl b/JuliaLowering/test/misc_ir.jl index fad1ff8f44108..d5d8383b31dee 100644 --- a/JuliaLowering/test/misc_ir.jl +++ b/JuliaLowering/test/misc_ir.jl @@ -1,3 +1,10 @@ +module JuxtTest + macro emit_juxt() + :(10x) + end +end + +#******************************************************************************* ######################################## # Getproperty syntax x.a @@ -296,6 +303,22 @@ end 3 (call %₁ 20 %₂) 4 (return %₃) +######################################## +# Juxtaposition - check the juxtapose multiply is resolved to `JuxtTest.*` when +# emitted by the macro in the JuxtTest module. +# +# This is consistent with Julia's existing system but it's not entirely clear +# this is good - perhaps we should resolve to Base.* instead? Resolving to the +# module-local version makes it exactly equivalent to `*`. But one might argue +# this is confusing because the symbol `*` appears nowhere in the user's source +# code. +JuxtTest.@emit_juxt +#--------------------- +1 TestMod.JuxtTest.* +2 TestMod.JuxtTest.x +3 (call %₁ 10 %₂) +4 (return %₃) + ######################################## # basic ccall ccall(:strlen, Csize_t, (Cstring,), "asdfg") diff --git a/JuliaLowering/test/utils.jl b/JuliaLowering/test/utils.jl index bdb3940a7e7ee..66634cc52fb52 100644 --- a/JuliaLowering/test/utils.jl +++ b/JuliaLowering/test/utils.jl @@ -147,7 +147,7 @@ end function setup_ir_test_module(preamble) test_mod = Module(:TestMod) - Base.include_string(test_mod, preamble) + JuliaLowering.include_string(test_mod, preamble) Base.eval(test_mod, :(const var"@ast_" = $(var"@ast_"))) test_mod end From a069880c9b70452b9e0ff0122213dd2f395960b3 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Wed, 22 Jan 2025 16:08:35 +1000 Subject: [PATCH 0951/1109] Expansion of implicit where parameters `X{<:T}` --- JuliaLowering/src/desugaring.jl | 52 ++++++++++++++++++++----- JuliaLowering/test/typedefs.jl | 6 +++ JuliaLowering/test/typedefs_ir.jl | 65 +++++++++++++++++++++++++++++++ 3 files changed, 113 insertions(+), 10 deletions(-) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index abc87ccf1726a..cc06e6c347c6a 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -84,10 +84,10 @@ function check_no_parameters(ex::SyntaxTree, msg) end end -function check_no_assignment(exs) +function check_no_assignment(exs, msg="misplaced assignment statement in `[ ... ]`") i = findfirst(kind(e) == K"=" for e in exs) if !isnothing(i) - throw(LoweringError(exs[i], "misplaced assignment statement in `[ ... ]`")) + throw(LoweringError(exs[i], msg)) end end @@ -3135,6 +3135,45 @@ function expand_wheres(ctx, ex) body end +# Match implicit where parameters for `Foo{<:Bar}` ==> `Foo{T} where T<:Bar` +function expand_curly(ctx, ex) + @assert kind(ex) == K"curly" + check_no_parameters(ex, "unexpected semicolon in type parameter list") + check_no_assignment(children(ex), "misplace assignment in type parameter list") + + stmts = SyntaxList(ctx) + type_args = SyntaxList(ctx) + implicit_typevars = SyntaxList(ctx) + + i = 1 + for e in children(ex) + k = kind(e) + if (k == K"<:" || k == K">:") && numchildren(e) == 1 + # `X{<:A}` and `X{>:A}` + name = @ast ctx e "#T$i"::K"Placeholder" + i += 1 + typevar = k == K"<:" ? + bounds_to_TypeVar(ctx, e, (name, nothing, e[1])) : + bounds_to_TypeVar(ctx, e, (name, e[1], nothing)) + arg = emit_assign_tmp(stmts, ctx, typevar) + push!(implicit_typevars, arg) + else + arg = e + end + push!(type_args, arg) + end + + type = @ast ctx ex [K"call" "apply_type"::K"core" type_args...] + if !isempty(implicit_typevars) + type = @ast ctx ex [K"block" + stmts... + [K"where" type [K"_typevars" implicit_typevars...]] + ] + end + + return type +end + #------------------------------------------------------------------------------- # Expand import / using / export @@ -3446,14 +3485,7 @@ function expand_forms_2(ctx::DesugaringContext, ex::SyntaxTree, docs=nothing) ] ) elseif k == K"curly" - check_no_parameters(ex, "unexpected semicolon in type parameter list") - for c in children(ex) - if kind(c) == K"=" - throw(LoweringError(c, "misplace assignment in type parameter list")) - end - end - # TODO: implicit where parameters like T{A<:B} - expand_forms_2(ctx, @ast ctx ex [K"call" "apply_type"::K"core" children(ex)...]) + expand_forms_2(ctx, expand_curly(ctx, ex)) elseif k == K"toplevel" # The toplevel form can't be lowered here - it needs to just be quoted # and passed through to a call to eval. diff --git a/JuliaLowering/test/typedefs.jl b/JuliaLowering/test/typedefs.jl index 1842e442ad054..5b689522caf82 100644 --- a/JuliaLowering/test/typedefs.jl +++ b/JuliaLowering/test/typedefs.jl @@ -2,6 +2,12 @@ test_mod = Module(:TestMod) +Base.eval(test_mod, :(struct XX{S,T,U,W} end)) + +@test JuliaLowering.include_string(test_mod, """ +XX{Int, <:Integer, Float64, >:AbstractChar} +""") == (test_mod.XX{Int, T, Float64, S} where {T <: Integer, S >: AbstractChar}) + @test JuliaLowering.include_string(test_mod, """ abstract type A end """) === nothing diff --git a/JuliaLowering/test/typedefs_ir.jl b/JuliaLowering/test/typedefs_ir.jl index a907917362dca..91695d4f392f2 100644 --- a/JuliaLowering/test/typedefs_ir.jl +++ b/JuliaLowering/test/typedefs_ir.jl @@ -118,6 +118,71 @@ LoweringError: A where Y >: f() # └─┘ ── expected type name +######################################## +# Simple type application +X{A,B,C} +#--------------------- +1 TestMod.X +2 TestMod.A +3 TestMod.B +4 TestMod.C +5 (call core.apply_type %₁ %₂ %₃ %₄) +6 (return %₅) + +######################################## +# Type with implicit where param upper bound +X{<:A} +#--------------------- +1 TestMod.A +2 (call core.TypeVar :#T1 %₁) +3 TestMod.X +4 (call core.apply_type %₃ %₂) +5 (call core.UnionAll %₂ %₄) +6 (return %₅) + +######################################## +# Type with implicit where param lower bound +X{>:A} +#--------------------- +1 TestMod.A +2 (call core.TypeVar :#T1 %₁ core.Any) +3 TestMod.X +4 (call core.apply_type %₃ %₂) +5 (call core.UnionAll %₂ %₄) +6 (return %₅) + +######################################## +# Type with several implicit where params +X{S, <:A, T, >:B} +#--------------------- +1 TestMod.A +2 (call core.TypeVar :#T1 %₁) +3 TestMod.B +4 (call core.TypeVar :#T2 %₃ core.Any) +5 TestMod.X +6 TestMod.S +7 TestMod.T +8 (call core.apply_type %₅ %₆ %₂ %₇ %₄) +9 (call core.UnionAll %₄ %₈) +10 (call core.UnionAll %₂ %₉) +11 (return %₁₀) + +######################################## +# Error: parameters in type application +X{S, T; W} +#--------------------- +LoweringError: +X{S, T; W} +# └─┘ ── unexpected semicolon in type parameter list + +######################################## +# Error: assignment in type application +X{S, T=w} +#--------------------- +LoweringError: +X{S, T=w} +# └──┘ ── misplace assignment in type parameter list + ######################################## # Simple abstract type definition abstract type A end From 2ffb20720f39c70991ceb382c814213d593e7b76 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Wed, 22 Jan 2025 16:09:20 +1000 Subject: [PATCH 0952/1109] AST: Use a single kind `K"op="` for updating assignments (JuliaLang/JuliaSyntax.jl#530) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make all updating assignment operators like `+=` be represented with a single `K"op="` head, with the operator itself in infix position. For example, `x += 1` is now parsed as [op=] x :: Identifier + :: Identifier y :: Identifier This greatly reduces the number of distinct forms here from a rather big list (`$=` `%=` `&=` `*=` `+=` `-=` `//=` `/=` `<<=` `>>=` `>>>=` `\=` `^=` `|=` `÷=` `⊻=`) and makes the operator itself appear in the AST as kind `K"Identifier"`, as it should. It also makes it possible to add further unicode updating operators while keeping the AST stable. The need for this was highlighted when working on JuliaLowering. When using `K"+="` as a head, one needs to look up the appropriate operator from the list of updating operators or use string munging on the Kind itself. This is quite awkward especially as it needs special rules for inferring the macro scope of the `+` identifier. In addition, having a single head for this form means update operator semantics only need to be dealt with in one place. --- JuliaSyntax/docs/src/reference.md | 1 + JuliaSyntax/src/expr.jl | 10 ++++++++ JuliaSyntax/src/kinds.jl | 17 +------------ JuliaSyntax/src/parse_stream.jl | 9 ++++--- JuliaSyntax/src/parser.jl | 36 +++++++++++++++++++++------ JuliaSyntax/src/tokenize.jl | 41 ++++++++++++++++--------------- JuliaSyntax/test/expr.jl | 10 ++++++++ JuliaSyntax/test/parser.jl | 24 ++++++++++++------ JuliaSyntax/test/tokenize.jl | 24 +++++++++--------- 9 files changed, 105 insertions(+), 67 deletions(-) diff --git a/JuliaSyntax/docs/src/reference.md b/JuliaSyntax/docs/src/reference.md index aeb1b44a59a19..be6ff90acf8b9 100644 --- a/JuliaSyntax/docs/src/reference.md +++ b/JuliaSyntax/docs/src/reference.md @@ -80,6 +80,7 @@ class of tokenization errors and lets the parser deal with them. * We use flags rather than child nodes to represent the difference between `struct` and `mutable struct`, `module` and `baremodule` (#220) * Iterations are represented with the `iteration` and `in` heads rather than `=` within the header of a `for`. Thus `for i=is ; body end` parses to `(for (iteration (in i is)) (block body))`. Cartesian iteration as in `for a=as, b=bs body end` are represented with a nested `(iteration (in a as) (in b bs))` rather than a `block` containing `=` because these lists of iterators are neither semantically nor syntactically a sequence of statements, unlike other uses of `block`. Generators also use the `iteration` head - see information on that below. * Short form functions like `f(x) = x + 1` are represented with the `function` head rather than the `=` head. In this case the `SHORT_FORM_FUNCTION_FLAG` flag is set to allow the surface syntactic form to be easily distinguished from long form functions. +* All kinds of updating assignment operators like `+=` are represented with a single `K"op="` head, with the operator itself in infix position. For example, `x += 1` is `(op= x + 1)`, where the plus token is of kind `K"Identifer"`. This greatly reduces the number of distinct forms here from a rather big list (`$=` `%=` `&=` `*=` `+=` `-=` `//=` `/=` `<<=` `>>=` `>>>=` `\=` `^=` `|=` `÷=` `⊻=`) and makes the operator itself appear in the AST as kind `K"Identifier"`, as it should. It also makes it possible to add further unicode updating operators while keeping the AST stable. ## More detail on tree differences diff --git a/JuliaSyntax/src/expr.jl b/JuliaSyntax/src/expr.jl index f7832f1e99d16..638e0b7569ac8 100644 --- a/JuliaSyntax/src/expr.jl +++ b/JuliaSyntax/src/expr.jl @@ -232,6 +232,16 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads, if k == K"?" headsym = :if + elseif k == K"op=" && length(args) == 3 + lhs = args[1] + op = args[2] + rhs = args[3] + headstr = string(args[2], '=') + if is_dotted(head) + headstr = '.'*headstr + end + headsym = Symbol(headstr) + args = Any[lhs, rhs] elseif k == K"macrocall" if length(args) >= 2 a2 = args[2] diff --git a/JuliaSyntax/src/kinds.jl b/JuliaSyntax/src/kinds.jl index dafc91deb0f45..21328c1a93ada 100644 --- a/JuliaSyntax/src/kinds.jl +++ b/JuliaSyntax/src/kinds.jl @@ -293,23 +293,8 @@ register_kinds!(JuliaSyntax, 0, [ "BEGIN_ASSIGNMENTS" "BEGIN_SYNTACTIC_ASSIGNMENTS" "=" - "+=" - "-=" # Also used for "−=" - "*=" - "/=" - "//=" - "|=" - "^=" - "÷=" - "%=" - "<<=" - ">>=" - ">>>=" - "\\=" - "&=" + "op=" # Updating assignment operator ( $= %= &= *= += -= //= /= <<= >>= >>>= \= ^= |= ÷= ⊻= ) ":=" - "\$=" - "⊻=" "END_SYNTACTIC_ASSIGNMENTS" "~" "≔" diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index 42bedc49f52b7..33e029c6188d2 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -871,8 +871,9 @@ end Bump the next token, splitting it into several pieces Tokens are defined by a number of `token_spec` of shape `(nbyte, kind, flags)`. -The number of input bytes of the last spec is taken from the remaining bytes of -the input token, with the associated `nbyte` ignored. +If all `nbyte` are positive, the sum must equal the token length. If one +`nbyte` is negative, that token is given `tok_len + nbyte` bytes and the sum of +all `nbyte` must equal zero. This is a hack which helps resolves the occasional lexing ambiguity. For example @@ -887,12 +888,14 @@ function bump_split(stream::ParseStream, split_spec::Vararg{Any, N}) where {N} tok = stream.lookahead[stream.lookahead_index] stream.lookahead_index += 1 b = _next_byte(stream) + toklen = tok.next_byte - b for (i, (nbyte, k, f)) in enumerate(split_spec) h = SyntaxHead(k, f) - b = (i == length(split_spec)) ? tok.next_byte : b + nbyte + b += nbyte < 0 ? (toklen + nbyte) : nbyte orig_k = k == K"." ? K"." : kind(tok) push!(stream.tokens, SyntaxToken(h, orig_k, false, b)) end + @assert tok.next_byte == b stream.peek_count = 0 return position(stream) end diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index cbe6985669d0a..0b66547f2fe94 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -340,7 +340,7 @@ function bump_dotsplit(ps, flags=EMPTY_FLAGS; bump_trivia(ps) mark = position(ps) k = remap_kind != K"None" ? remap_kind : kind(t) - pos = bump_split(ps, (1, K".", TRIVIA_FLAG), (0, k, flags)) + pos = bump_split(ps, (1, K".", TRIVIA_FLAG), (-1, k, flags)) if emit_dot_node pos = emit(ps, mark, K".") end @@ -626,7 +626,22 @@ function parse_assignment_with_initial_ex(ps::ParseState, mark, down::T) where { # a += b ==> (+= a b) # a .= b ==> (.= a b) is_short_form_func = k == K"=" && !is_dotted(t) && was_eventually_call(ps) - bump(ps, TRIVIA_FLAG) + if k == K"op=" + # x += y ==> (op= x + y) + # x .+= y ==> (.op= x + y) + bump_trivia(ps) + if is_dotted(t) + bump_split(ps, (1, K".", TRIVIA_FLAG), + (-2, K"Identifier", EMPTY_FLAGS), # op + (1, K"=", TRIVIA_FLAG)) + else + bump_split(ps, + (-1, K"Identifier", EMPTY_FLAGS), # op + (1, K"=", TRIVIA_FLAG)) + end + else + bump(ps, TRIVIA_FLAG) + end bump_trivia(ps) # Syntax Edition TODO: We'd like to call `down` here when # is_short_form_func is true, to prevent `f() = 1 = 2` from parsing. @@ -1843,7 +1858,7 @@ function parse_resword(ps::ParseState) # let x::1 ; end ==> (let (block (::-i x 1)) (block)) # let x ; end ==> (let (block x) (block)) # let x=1,y=2 ; end ==> (let (block (= x 1) (= y 2) (block))) - # let x+=1 ; end ==> (let (block (+= x 1)) (block)) + # let x+=1 ; end ==> (let (block (op= x + 1)) (block)) parse_comma_separated(ps, parse_eq_star) end emit(ps, m, K"block") @@ -2571,7 +2586,7 @@ function parse_import_path(ps::ParseState) # Modules with operator symbol names # import .⋆ ==> (import (importpath . ⋆)) bump_trivia(ps) - bump_split(ps, (1,K".",EMPTY_FLAGS), (1,peek(ps),EMPTY_FLAGS)) + bump_split(ps, (1,K".",EMPTY_FLAGS), (-1,peek(ps),EMPTY_FLAGS)) else # import @x ==> (import (importpath @x)) # import $A ==> (import (importpath ($ A))) @@ -2599,7 +2614,12 @@ function parse_import_path(ps::ParseState) warning="space between dots in import path") end bump_trivia(ps) - bump_split(ps, (1,K".",TRIVIA_FLAG), (1,k,EMPTY_FLAGS)) + m = position(ps) + bump_split(ps, (1,K".",TRIVIA_FLAG), (-1,k,EMPTY_FLAGS)) + if is_syntactic_operator(k) + # import A.= ==> (import (importpath A (error =))) + emit(ps, m, K"error", error="syntactic operators not allowed in import") + end elseif k == K"..." # Import the .. operator # import A... ==> (import (importpath A ..)) @@ -3550,13 +3570,13 @@ function parse_atom(ps::ParseState, check_identifiers=true) bump_dotsplit(ps, emit_dot_node=true, remap_kind= is_syntactic_operator(leading_kind) ? leading_kind : K"Identifier") if check_identifiers && !is_valid_identifier(leading_kind) - # += ==> (error +=) + # += ==> (error (op= +)) # ? ==> (error ?) - # .+= ==> (error (. +=)) + # .+= ==> (error (. (op= +))) emit(ps, mark, K"error", error="invalid identifier") else # Quoted syntactic operators allowed - # :+= ==> (quote-: +=) + # :+= ==> (quote-: (op= +)) end elseif is_keyword(leading_kind) if leading_kind == K"var" && (t = peek_token(ps,2); diff --git a/JuliaSyntax/src/tokenize.jl b/JuliaSyntax/src/tokenize.jl index af78bee42c3ff..b1e2325b0914c 100644 --- a/JuliaSyntax/src/tokenize.jl +++ b/JuliaSyntax/src/tokenize.jl @@ -93,6 +93,7 @@ function _nondot_symbolic_operator_kinds() K"isa" K"in" K".'" + K"op=" ]) end @@ -527,14 +528,14 @@ function _next_token(l::Lexer, c) elseif c == '-' return lex_minus(l); elseif c == '−' # \minus '−' treated as hyphen '-' - return emit(l, accept(l, '=') ? K"-=" : K"-") + return emit(l, accept(l, '=') ? K"op=" : K"-") elseif c == '`' return lex_backtick(l); elseif is_identifier_start_char(c) return lex_identifier(l, c) elseif isdigit(c) return lex_digit(l, K"Integer") - elseif (k = get(_unicode_ops, c, K"error")) != K"error" + elseif (k = get(_unicode_ops, c, K"None")) != K"None" return emit(l, k) else emit(l, @@ -797,12 +798,12 @@ function lex_greater(l::Lexer) if accept(l, '>') if accept(l, '>') if accept(l, '=') - return emit(l, K">>>=") + return emit(l, K"op=") else # >>>?, ? not a = return emit(l, K">>>") end elseif accept(l, '=') - return emit(l, K">>=") + return emit(l, K"op=") else return emit(l, K">>") end @@ -819,7 +820,7 @@ end function lex_less(l::Lexer) if accept(l, '<') if accept(l, '=') - return emit(l, K"<<=") + return emit(l, K"op=") else # '<') return emit(l, K"|>") elseif accept(l, '|') @@ -910,7 +911,7 @@ function lex_plus(l::Lexer) if accept(l, '+') return emit(l, K"++") elseif accept(l, '=') - return emit(l, K"+=") + return emit(l, K"op=") end return emit(l, K"+") end @@ -925,7 +926,7 @@ function lex_minus(l::Lexer) elseif !l.dotop && accept(l, '>') return emit(l, K"->") elseif accept(l, '=') - return emit(l, K"-=") + return emit(l, K"op=") end return emit(l, K"-") end @@ -934,35 +935,35 @@ function lex_star(l::Lexer) if accept(l, '*') return emit(l, K"Error**") # "**" is an invalid operator use ^ elseif accept(l, '=') - return emit(l, K"*=") + return emit(l, K"op=") end return emit(l, K"*") end function lex_circumflex(l::Lexer) if accept(l, '=') - return emit(l, K"^=") + return emit(l, K"op=") end return emit(l, K"^") end function lex_division(l::Lexer) if accept(l, '=') - return emit(l, K"÷=") + return emit(l, K"op=") end return emit(l, K"÷") end function lex_dollar(l::Lexer) if accept(l, '=') - return emit(l, K"$=") + return emit(l, K"op=") end return emit(l, K"$") end function lex_xor(l::Lexer) if accept(l, '=') - return emit(l, K"⊻=") + return emit(l, K"op=") end return emit(l, K"⊻") end @@ -1110,7 +1111,7 @@ function lex_amper(l::Lexer) if accept(l, '&') return emit(l, K"&&") elseif accept(l, '=') - return emit(l, K"&=") + return emit(l, K"op=") else return emit(l, K"&") end @@ -1148,12 +1149,12 @@ end function lex_forwardslash(l::Lexer) if accept(l, '/') if accept(l, '=') - return emit(l, K"//=") + return emit(l, K"op=") else return emit(l, K"//") end elseif accept(l, '=') - return emit(l, K"/=") + return emit(l, K"op=") else return emit(l, K"/") end @@ -1161,7 +1162,7 @@ end function lex_backslash(l::Lexer) if accept(l, '=') - return emit(l, K"\=") + return emit(l, K"op=") end return emit(l, K"\\") end @@ -1193,7 +1194,7 @@ function lex_dot(l::Lexer) elseif pc == '−' l.dotop = true readchar(l) - return emit(l, accept(l, '=') ? K"-=" : K"-") + return emit(l, accept(l, '=') ? K"op=" : K"-") elseif pc =='*' l.dotop = true readchar(l) @@ -1222,7 +1223,7 @@ function lex_dot(l::Lexer) l.dotop = true readchar(l) if accept(l, '=') - return emit(l, K"&=") + return emit(l, K"op=") else if accept(l, '&') return emit(l, K"&&") diff --git a/JuliaSyntax/test/expr.jl b/JuliaSyntax/test/expr.jl index eb998229eb5de..200e87649e198 100644 --- a/JuliaSyntax/test/expr.jl +++ b/JuliaSyntax/test/expr.jl @@ -501,6 +501,16 @@ @test parsestmt("./x", ignore_errors=true) == Expr(:call, Expr(:error, Expr(:., :/)), :x) end + @testset "syntactic update-assignment operators" begin + @test parsestmt("x += y") == Expr(:(+=), :x, :y) + @test parsestmt("x .+= y") == Expr(:(.+=), :x, :y) + @test parsestmt(":+=") == QuoteNode(Symbol("+=")) + @test parsestmt(":(+=)") == QuoteNode(Symbol("+=")) + @test parsestmt(":.+=") == QuoteNode(Symbol(".+=")) + @test parsestmt(":(.+=)") == QuoteNode(Symbol(".+=")) + @test parsestmt("x \u2212= y") == Expr(:(-=), :x, :y) + end + @testset "let" begin @test parsestmt("let x=1\n end") == Expr(:let, Expr(:(=), :x, 1), Expr(:block, LineNumberNode(2))) diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index a747e1c7e871b..9eb7caa25d175 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -62,8 +62,8 @@ tests = [ # parse_assignment "a = b" => "(= a b)" "a .= b" => "(.= a b)" - "a += b" => "(+= a b)" - "a .+= b" => "(.+= a b)" + "a += b" => "(op= a + b)" + "a .+= b" => "(.op= a + b)" "a, b = c, d" => "(= (tuple a b) (tuple c d))" "x, = xs" => "(= (tuple x) xs)" "[a ~b]" => "(hcat a (call-pre ~ b))" @@ -497,7 +497,7 @@ tests = [ "let x ; end" => "(let (block x) (block))" "let x::1 ; end" => "(let (block (::-i x 1)) (block))" "let x=1,y=2 end" => "(let (block (= x 1) (= y 2)) (block))" - "let x+=1 ; end" => "(let (block (+= x 1)) (block))" + "let x+=1 ; end" => "(let (block (op= x + 1)) (block))" "let ; end" => "(let (block) (block))" "let ; body end" => "(let (block) (block body))" "let\na\nb\nend" => "(let (block) (block a b))" @@ -576,7 +576,7 @@ tests = [ "const x = 1" => "(const (= x 1))" "const x .= 1" => "(error (const (.= x 1)))" "global x ~ 1" => "(global (call-i x ~ 1))" - "global x += 1" => "(global (+= x 1))" + "global x += 1" => "(global (op= x + 1))" "const x" => "(error (const x))" "global const x" => "(global (error (const x)))" "const global x" => "(error (const (global x)))" @@ -715,6 +715,7 @@ tests = [ "using :A" => "(using (importpath (error (quote-: A))))" "using A: :b" => "(using (: (importpath A) (importpath (error (quote-: b)))))" "using A: b.:c" => "(using (: (importpath A) (importpath b (quote-: c))))" + # Syntactic operators not allowed in import ], JuliaSyntax.parse_iteration_specs => [ "i = rhs" => "(iteration (in i rhs))" @@ -832,6 +833,7 @@ tests = [ "≕" => "≕" # Quoted syntactic operators allowed ":+=" => "(quote-: +=)" + ":.+=" => "(quote-: (. +=))" ":.=" => "(quote-: (. =))" ":.&&" => "(quote-: (. &&))" # Special symbols quoted @@ -1023,7 +1025,7 @@ tests = [ JuliaSyntax.parse_stmts => with_version.(v"1.11", [ "function f(public)\n public + 3\nend" => "(function (call f public) (block (call-i public + 3)))" "public A, B" => "(public A B)" - "if true \n public *= 4 \n end" => "(if true (block (*= public 4)))" + "if true \n public *= 4 \n end" => "(if true (block (op= public * 4)))" "module Mod\n public A, B \n end" => "(module Mod (block (public A B)))" "module Mod2\n a = 3; b = 6; public a, b\n end" => "(module Mod2 (block (= a 3) (= b 6) (public a b)))" "a = 3; b = 6; public a, b" => "(toplevel-; (= a 3) (= b 6) (public a b))" @@ -1141,6 +1143,12 @@ parsestmt_with_kind_tests = [ ":(<:)" => "(quote-: (parens <:::<:))" ":(&&)" => "(quote-: (parens &&::&&))" ":(=)" => "(quote-: (parens =::=))" + "a := b" => "(:= a::Identifier b::Identifier)" + "a += b" => "(op= a::Identifier +::Identifier b::Identifier)" + "a .+= b" => "(.op= a::Identifier +::Identifier b::Identifier)" + "a >>= b" => "(op= a::Identifier >>::Identifier b::Identifier)" + ":+=" => "(quote-: +=::op=)" + ":.+=" => "(quote-: (. +=::op=))" ] @testset "parser `Kind` remapping" begin @@ -1174,10 +1182,10 @@ end # · and · normalize to ⋅ @test parse_to_sexpr_str(JuliaSyntax.parse_eq, "a \u00B7 b") == "(call-i a \u22C5 b)" @test parse_to_sexpr_str(JuliaSyntax.parse_eq, "a \u0387 b") == "(call-i a \u22C5 b)" - # − normalizes to - + # − ('\u2212') normalizes to - ('\u002d') @test parse_to_sexpr_str(JuliaSyntax.parse_expr, "a \u2212 b") == "(call-i a - b)" - @test parse_to_sexpr_str(JuliaSyntax.parse_eq, "a \u2212= b") == "(-= a b)" - @test parse_to_sexpr_str(JuliaSyntax.parse_eq, "a .\u2212= b") == "(.-= a b)" + @test parse_to_sexpr_str(JuliaSyntax.parse_eq, "a \u2212= b") == "(op= a - b)" + @test parse_to_sexpr_str(JuliaSyntax.parse_eq, "a .\u2212= b") == "(.op= a - b)" end @testset "Unbalanced bidirectional unicode" begin diff --git a/JuliaSyntax/test/tokenize.jl b/JuliaSyntax/test/tokenize.jl index e2d069daa1a7c..2a2309bce6db9 100644 --- a/JuliaSyntax/test/tokenize.jl +++ b/JuliaSyntax/test/tokenize.jl @@ -175,14 +175,15 @@ end end @testset "test added operators" begin - @test tok("1+=2", 2).kind == K"+=" - @test tok("1-=2", 2).kind == K"-=" + @test tok("1+=2", 2).kind == K"op=" + @test tok("1-=2", 2).kind == K"op=" + @test tok("1*=2", 2).kind == K"op=" + @test tok("1^=2", 2).kind == K"op=" + @test tok("1÷=2", 2).kind == K"op=" + @test tok("1\\=2", 2).kind == K"op=" + @test tok("1\$=2", 2).kind == K"op=" + @test tok("1⊻=2", 2).kind == K"op=" @test tok("1:=2", 2).kind == K":=" - @test tok("1*=2", 2).kind == K"*=" - @test tok("1^=2", 2).kind == K"^=" - @test tok("1÷=2", 2).kind == K"÷=" - @test tok("1\\=2", 2).kind == K"\=" - @test tok("1\$=2", 2).kind == K"$=" @test tok("1-->2", 2).kind == K"-->" @test tok("1<--2", 2).kind == K"<--" @test tok("1<-->2", 2).kind == K"<-->" @@ -342,10 +343,6 @@ end @test length(collect(tokenize("x)"))) == 3 end -@testset "xor_eq" begin - @test tok("1 ⊻= 2", 3).kind==K"⊻=" -end - @testset "lex binary" begin @test tok("0b0101").kind==K"BinInt" end @@ -824,6 +821,9 @@ for opkind in Tokenize._nondot_symbolic_operator_kinds() tokens = collect(tokenize(str)) exop = expr.head == :call ? expr.args[1] : expr.head #println(str) + if Symbol(Tokenize.untokenize(tokens[arity == 1 ? 1 : 3], str)) != exop + @info "" arity str exop + end @test Symbol(Tokenize.untokenize(tokens[arity == 1 ? 1 : 3], str)) == exop else break @@ -842,7 +842,7 @@ end # https://github.com/JuliaLang/julia/pull/40948 @test tok("−").kind == K"-" - @test tok("−=").kind == K"-=" + @test tok("−=").kind == K"op=" @test tok(".−").dotop end From bd6c2a12caa65175c0397143506d5d4beccc96c9 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Wed, 22 Jan 2025 16:54:44 +1000 Subject: [PATCH 0953/1109] Desugaring of bare `<:`, `>:`, `-->`, `...`, `&`, `$` forms --- JuliaLowering/src/desugaring.jl | 14 ++++++++ JuliaLowering/src/macro_expansion.jl | 5 +++ JuliaLowering/test/misc_ir.jl | 54 ++++++++++++++++++++++++++++ 3 files changed, 73 insertions(+) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index cc06e6c347c6a..f76e1c4b9593c 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -1579,6 +1579,9 @@ function expand_dot(ctx, ex) if numchildren(ex) == 1 # eg, `f = .+` + # Upstream TODO: Remove the (. +) representation and replace with use + # of DOTOP_FLAG? This way, `K"."` will be exclusively used for + # getproperty. @ast ctx ex [K"call" "BroadcastFunction"::K"top" ex[1] @@ -3383,6 +3386,11 @@ function expand_forms_2(ctx::DesugaringContext, ex::SyntaxTree, docs=nothing) expand_forms_2(ctx, ex[1]) expand_forms_2(ctx, ex[2]) ] + elseif k == K"<:" || k == K">:" || k == K"-->" + expand_forms_2(ctx, @ast ctx ex [K"call" + adopt_scope(string(k)::K"Identifier", ex) + children(ex)... + ]) elseif k == K"op=" expand_forms_2(ctx, expand_update_operator(ctx, ex)) elseif k == K"=" @@ -3534,6 +3542,12 @@ function expand_forms_2(ctx::DesugaringContext, ex::SyntaxTree, docs=nothing) ] elseif k == K"inert" ex + elseif k == K"&" + throw(LoweringError(ex, "invalid syntax")) + elseif k == K"$" + throw(LoweringError(ex, "`\$` expression outside string or quote")) + elseif k == K"..." + throw(LoweringError(ex, "`...` expression outside call")) elseif is_leaf(ex) ex else diff --git a/JuliaLowering/src/macro_expansion.jl b/JuliaLowering/src/macro_expansion.jl index 6e0d98035794e..3ceda2151ff3f 100644 --- a/JuliaLowering/src/macro_expansion.jl +++ b/JuliaLowering/src/macro_expansion.jl @@ -297,6 +297,11 @@ function expand_forms_1(ctx::MacroExpansionContext, ex::SyntaxTree) end elseif is_leaf(ex) ex + elseif k == K"<:" || k == K">:" || k == K"-->" + # TODO: Should every form get layerid systematically? Or only the ones + # which expand_forms_2 needs? + layerid = get(ex, :scope_layer, ctx.current_layer.id) + mapchildren(e->expand_forms_1(ctx,e), ctx, ex; scope_layer=layerid) else mapchildren(e->expand_forms_1(ctx,e), ctx, ex) end diff --git a/JuliaLowering/test/misc_ir.jl b/JuliaLowering/test/misc_ir.jl index d5d8383b31dee..4576b2cc5d76b 100644 --- a/JuliaLowering/test/misc_ir.jl +++ b/JuliaLowering/test/misc_ir.jl @@ -29,6 +29,36 @@ x."b" 2 (call top.BroadcastFunction %₁) 3 (return %₂) +######################################## +# <: as a function call +x <: y +#--------------------- +1 TestMod.<: +2 TestMod.x +3 TestMod.y +4 (call %₁ %₂ %₃) +5 (return %₄) + +######################################## +# >: as a function call +x >: y +#--------------------- +1 TestMod.>: +2 TestMod.x +3 TestMod.y +4 (call %₁ %₂ %₃) +5 (return %₄) + +######################################## +# --> as a function call +x --> y +#--------------------- +1 TestMod.--> +2 TestMod.x +3 TestMod.y +4 (call %₁ %₂ %₃) +5 (return %₄) + ######################################## # Error: Wrong number of children in `.` @ast_ [K"." "x"::K"Identifier" "a"::K"Identifier" 3::K"Integer"] @@ -483,3 +513,27 @@ LoweringError: ccall(:foo, Csize_t, (Cstring..., Cstring...), "asdfg", "blah") # └────────┘ ── only the trailing ccall argument type should have `...` +######################################## +# Error: unary & syntax +&x +#--------------------- +LoweringError: +&x +└┘ ── invalid syntax + +######################################## +# Error: $ outside quote/string +$x +#--------------------- +LoweringError: +$x +└┘ ── `$` expression outside string or quote block + +######################################## +# Error: splat outside call +x... +#--------------------- +LoweringError: +x... +└──┘ ── `...` expression outside call + From e4b977051091ee02503acefdb2e53d65b8f7fb51 Mon Sep 17 00:00:00 2001 From: Laine Taffin Altman Date: Wed, 22 Jan 2025 18:37:17 -0800 Subject: [PATCH 0954/1109] =?UTF-8?q?Add=20U+1F8B2=20=F0=9F=A2=B2=20as=20a?= =?UTF-8?q?n=20operator=20(JuliaLang/JuliaSyntax.jl#525)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The character U+1F8B2 🢲 (RIGHTWARDS ARROW WITH LOWER HOOK) is new is Unicode 16; it is of a kind (no pun intended) with the longstanding characters U+21A9 ↩ (LEFTWARDS ARROW WITH HOOK) and U+21AA ↪ (RIGHTWARDS ARROW WITH HOOK), both of which are already supported as operators in Julia. It was added to Unicode as part of the Symbols for Legacy Computing effort, wherein it was sourced from Smalltalk character sets in the 1970s—so it has a very long history of being used in programming languages. --------- Co-authored-by: Claire Foster --- JuliaSyntax/src/kinds.jl | 1 + JuliaSyntax/src/tokenize.jl | 1 + JuliaSyntax/test/tokenize.jl | 3 +++ 3 files changed, 5 insertions(+) diff --git a/JuliaSyntax/src/kinds.jl b/JuliaSyntax/src/kinds.jl index 21328c1a93ada..c7d27e3597a6b 100644 --- a/JuliaSyntax/src/kinds.jl +++ b/JuliaSyntax/src/kinds.jl @@ -464,6 +464,7 @@ register_kinds!(JuliaSyntax, 0, [ "↶" "↺" "↻" + "🢲" "END_ARROW" # Level 4 diff --git a/JuliaSyntax/src/tokenize.jl b/JuliaSyntax/src/tokenize.jl index b1e2325b0914c..6eb76954dfe0c 100644 --- a/JuliaSyntax/src/tokenize.jl +++ b/JuliaSyntax/src/tokenize.jl @@ -22,6 +22,7 @@ end function is_identifier_start_char(c::Char) c == EOF_CHAR && return false isvalid(c) || return false + c == '🢲' && return false # First divergence from Base.is_id_start_char return Base.is_id_start_char(c) end diff --git a/JuliaSyntax/test/tokenize.jl b/JuliaSyntax/test/tokenize.jl index 2a2309bce6db9..478bed577cf7d 100644 --- a/JuliaSyntax/test/tokenize.jl +++ b/JuliaSyntax/test/tokenize.jl @@ -922,6 +922,9 @@ end end allops = split(join(ops, " "), " ") @test all(s->Base.isoperator(Symbol(s)) == is_operator(first(collect(tokenize(s))).kind), allops) + + # "\U1f8b2" added in Julia 1.12 + @test is_operator(first(collect(tokenize("🢲")))) end const all_kws = Set([ From 1d2d851be336505d6a48659ff8d2ef7a1cf3ddfc Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Thu, 23 Jan 2025 21:15:30 +1000 Subject: [PATCH 0955/1109] Bump version to 1.0.0 (JuliaLang/JuliaSyntax.jl#531) --- JuliaSyntax/Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaSyntax/Project.toml b/JuliaSyntax/Project.toml index 92f233e6036f8..49a07d642b27d 100644 --- a/JuliaSyntax/Project.toml +++ b/JuliaSyntax/Project.toml @@ -1,7 +1,7 @@ name = "JuliaSyntax" uuid = "70703baa-626e-46a2-a12c-08ffd08c73b4" authors = ["Claire Foster and contributors"] -version = "1.0.0-DEV" +version = "1.0.0" [compat] Serialization = "1.0" From 23818bc83191c81c635dfd9496942771ed7a4d03 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Fri, 24 Jan 2025 19:03:37 +1000 Subject: [PATCH 0956/1109] Desugaring of generator, comprehension, typed_comprehension --- JuliaLowering/src/desugaring.jl | 157 ++++++++++++++ JuliaLowering/test/closures_ir.jl | 10 + JuliaLowering/test/generators.jl | 52 +++++ JuliaLowering/test/generators_ir.jl | 318 ++++++++++++++++++++++++++++ JuliaLowering/test/runtests.jl | 1 + JuliaLowering/test/utils.jl | 16 +- 6 files changed, 546 insertions(+), 8 deletions(-) create mode 100644 JuliaLowering/test/generators.jl create mode 100644 JuliaLowering/test/generators_ir.jl diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index f76e1c4b9593c..26ea38087c722 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -732,6 +732,145 @@ function expand_fuse_broadcast(ctx, ex) end end +#------------------------------------------------------------------------------- +# Expansion of generators and comprehensions + +# Return any subexpression which is a 'return` statement, not including any +# inside quoted sections or method bodies. +function find_return(ex::SyntaxTree) + if kind(ex) == K"return" + return ex + elseif !is_leaf(ex) && !(kind(ex) in KSet"quote inert meta function ->") + for e in children(ex) + r = find_return(e) + if !isnothing(r) + return r + end + end + else + return nothing + end +end + +# Return true for nested tuples of the same identifiers +function similar_tuples_or_identifiers(a, b) + if kind(a) == K"tuple" && kind(b) == K"tuple" + return numchildren(a) == numchildren(b) && + all( ((x,y),)->similar_tuples_or_identifiers(x,y), + zip(children(a), children(b))) + else + is_same_identifier_like(a,b) + end +end + +# Return the anonymous function taking an iterated value, for use with the +# first agument to `Base.Generator` +function func_for_generator(ctx, body, iter_value_destructuring) + if similar_tuples_or_identifiers(iter_value_destructuring, body) + # Use Base.identity for generators which are filters such as + # `(x for x in xs if f(x))`. This avoids creating a new type. + @ast ctx body "identity"::K"top" + else + @ast ctx body [K"->" + [K"tuple" + iter_value_destructuring + ] + [K"block" + body + ] + ] + end +end + +function expand_generator(ctx, ex) + @chk numchildren(ex) >= 2 + body = ex[1] + body_ret = find_return(body) + if !isnothing(body_ret) + throw(LoweringError(body_ret, "`return` not allowed inside comprehension or generator")) + end + if numchildren(ex) > 2 + # Uniquify outer vars by NameKey + outervars_by_key = Dict{NameKey,typeof(ex)}() + for iterspecs in ex[2:end-1] + for iterspec in children(iterspecs) + lhs = iterspec[1] + foreach_lhs_var(lhs) do var + @assert kind(var) == K"Identifier" # Todo: K"BindingId"? + outervars_by_key[NameKey(var)] = var + end + end + end + outervar_assignments = SyntaxList(ctx) + for (k,v) in sort(collect(pairs(outervars_by_key)), by=first) + push!(outervar_assignments, @ast ctx v [K"=" v v]) + end + body = @ast ctx ex [K"let" + [K"block" + outervar_assignments... + ] + [K"block" + body + ] + ] + end + for iterspecs_ind in numchildren(ex):-1:2 + iterspecs = ex[iterspecs_ind] + filter_test = nothing + if kind(iterspecs) == K"filter" + filter_test = iterspecs[2] + iterspecs = iterspecs[1] + end + if kind(iterspecs) != K"iteration" + throw(LoweringError("""Expected `K"iteration"` iteration specification in generator""")) + end + iter_ranges = SyntaxList(ctx) + iter_lhss = SyntaxList(ctx) + for iterspec in children(iterspecs) + @chk kind(iterspec) == K"in" + @chk numchildren(iterspec) == 2 + push!(iter_lhss, iterspec[1]) + push!(iter_ranges, iterspec[2]) + end + iter_value_destructuring = if numchildren(iterspecs) == 1 + iterspecs[1][1] + else + iter_lhss = SyntaxList(ctx) + for iterspec in children(iterspecs) + push!(iter_lhss, iterspec[1]) + end + @ast ctx iterspecs [K"tuple" iter_lhss...] + end + iter = if length(iter_ranges) > 1 + @ast ctx iterspecs [K"call" + "product"::K"top" + iter_ranges... + ] + else + iter_ranges[1] + end + if !isnothing(filter_test) + iter = @ast ctx ex [K"call" + "Filter"::K"top" + func_for_generator(ctx, filter_test, iter_value_destructuring) + iter + ] + end + body = @ast ctx ex [K"call" + "Generator"::K"top" + func_for_generator(ctx, body, iter_value_destructuring) + iter + ] + if iterspecs_ind < numchildren(ex) + body = @ast ctx ex [K"call" + "Flatten"::K"top" + body + ] + end + end + body +end + #------------------------------------------------------------------------------- # Expansion of array concatenation notation `[a b ; c d]` etc @@ -3411,6 +3550,24 @@ function expand_forms_2(ctx::DesugaringContext, ex::SyntaxTree, docs=nothing) sig = expand_forms_2(ctx, ex[2], ex) elseif k == K"for" expand_forms_2(ctx, expand_for(ctx, ex)) + elseif k == K"comprehension" + @chk numchildren(ex) == 1 + @chk kind(ex[1]) == K"generator" + @ast ctx ex [K"call" + "collect"::K"top" + expand_forms_2(ctx, ex[1]) + ] + elseif k == K"typed_comprehension" + @chk numchildren(ex) == 2 + @chk kind(ex[2]) == K"generator" + # TODO: Hack for early lowering of selected typed_comprehension + @ast ctx ex [K"call" + "collect"::K"top" + expand_forms_2(ctx, ex[1]) + expand_forms_2(ctx, ex[2]) + ] + elseif k == K"generator" + expand_forms_2(ctx, expand_generator(ctx, ex)) elseif k == K"->" || k == K"do" expand_forms_2(ctx, expand_arrow(ctx, ex)) elseif k == K"function" diff --git a/JuliaLowering/test/closures_ir.jl b/JuliaLowering/test/closures_ir.jl index 1213efe469131..978d70ab1f7f6 100644 --- a/JuliaLowering/test/closures_ir.jl +++ b/JuliaLowering/test/closures_ir.jl @@ -311,6 +311,16 @@ end 14 TestMod.f 15 (return %₁₄) +######################################## +# Nested captures of arguments +function f(x) + function g(y) + function h(z) + (x,y,z) + end + end +end + ######################################## # Global method capturing local variables begin diff --git a/JuliaLowering/test/generators.jl b/JuliaLowering/test/generators.jl new file mode 100644 index 0000000000000..1a9cb539b4d3b --- /dev/null +++ b/JuliaLowering/test/generators.jl @@ -0,0 +1,52 @@ +@testset "Generators" begin + +test_mod = Module() + +@test JuliaLowering.include_string(test_mod, """ +collect(x^2 for x in 1:3) +""") == [1,4,9] + +@test JuliaLowering.include_string(test_mod, """ +collect(x for x in 1:5 if isodd(x)) +""") == [1,3,5] + +@test JuliaLowering.include_string(test_mod, """ +collect((y,x) for (x,y) in zip(1:3, 2:4) if y != 3) +""") == [(2,1), (4,3)] + +# product iterator +@test JuliaLowering.include_string(test_mod, """ +collect((x,y) for x in 1:3, y in 1:2) +""") == [(1,1) (1,2) + (2,1) (2,2) + (3,1) (3,2)] + +# flattened iterator +@test JuliaLowering.include_string(test_mod, """ +collect((x,y,z) for x in 1:3, y in 4:5 for z in 6:7) +""") == [ + (1,4,6) + (1,4,7) + (2,4,6) + (2,4,7) + (3,4,6) + (3,4,7) + (1,5,6) + (1,5,7) + (2,5,6) + (2,5,7) + (3,5,6) + (3,5,7) +] + +# Duplicate iteration variables - body sees only innermost +@test JuliaLowering.include_string(test_mod, """ +collect(x for x in 1:3 for x in 1:2) +""") == [1, 2, 1, 2, 1, 2] + +# Outer iteration variables are protected from mutation +@test JuliaLowering.include_string(test_mod, """ +collect((z=y; y=100; z) for y in 1:3 for x in 1:2) +""") == [1, 1, 2, 2, 3, 3] + +end diff --git a/JuliaLowering/test/generators_ir.jl b/JuliaLowering/test/generators_ir.jl new file mode 100644 index 0000000000000..62bf7d862e986 --- /dev/null +++ b/JuliaLowering/test/generators_ir.jl @@ -0,0 +1,318 @@ +######################################## +# Simple 1D generator +(x+1 for x in xs) +#--------------------- +1 --- thunk + 1 (global TestMod.#->##0) + 2 (call core.svec) + 3 (call core.svec) + 4 (call core.svec) + 5 (call core._structtype TestMod :#->##0 %₂ %₃ %₄ false 0) + 6 (call core._setsuper! %₅ core.Function) + 7 (const TestMod.#->##0) + 8 (= TestMod.#->##0 %₅) + 9 (call core.svec) + 10 (call core._typebody! %₅ %₉) + 11 (return core.nothing) +2 TestMod.#->##0 +3 (call core.svec %₂ core.Any) +4 (call core.svec) +5 (call core.svec %₃ %₄ :($(QuoteNode(:(#= line 1 =#))))) +6 --- method core.nothing %₅ + slots: [slot₁/#self#(!read) slot₂/x] + 1 TestMod.+ + 2 (call %₁ slot₂/x 1) + 3 (return %₂) +7 TestMod.#->##0 +8 (new %₇) +9 TestMod.xs +10 (call top.Generator %₈ %₉) +11 (return %₁₀) + +######################################## +# Product iteration +(x+y for x in xs, y in ys) +#--------------------- +1 --- thunk + 1 (global TestMod.#->##1) + 2 (call core.svec) + 3 (call core.svec) + 4 (call core.svec) + 5 (call core._structtype TestMod :#->##1 %₂ %₃ %₄ false 0) + 6 (call core._setsuper! %₅ core.Function) + 7 (const TestMod.#->##1) + 8 (= TestMod.#->##1 %₅) + 9 (call core.svec) + 10 (call core._typebody! %₅ %₉) + 11 (return core.nothing) +2 TestMod.#->##1 +3 (call core.svec %₂ core.Any) +4 (call core.svec) +5 (call core.svec %₃ %₄ :($(QuoteNode(:(#= line 1 =#))))) +6 --- method core.nothing %₅ + slots: [slot₁/#self#(!read) slot₂/destructured_arg_1 slot₃/iterstate slot₄/x slot₅/y] + 1 (call top.indexed_iterate slot₂/destructured_arg_1 1) + 2 (= slot₄/x (call core.getfield %₁ 1)) + 3 (= slot₃/iterstate (call core.getfield %₁ 2)) + 4 slot₃/iterstate + 5 (call top.indexed_iterate slot₂/destructured_arg_1 2 %₄) + 6 (= slot₅/y (call core.getfield %₅ 1)) + 7 TestMod.+ + 8 slot₄/x + 9 slot₅/y + 10 (call %₇ %₈ %₉) + 11 (return %₁₀) +7 TestMod.#->##1 +8 (new %₇) +9 TestMod.xs +10 TestMod.ys +11 (call top.product %₉ %₁₀) +12 (call top.Generator %₈ %₁₁) +13 (return %₁₂) + +######################################## +# Use `identity` as the Generator function when possible eg in filters +((x,y) for (x,y) in iter if f(x)) +#--------------------- +1 --- thunk + 1 (global TestMod.#->##2) + 2 (call core.svec) + 3 (call core.svec) + 4 (call core.svec) + 5 (call core._structtype TestMod :#->##2 %₂ %₃ %₄ false 0) + 6 (call core._setsuper! %₅ core.Function) + 7 (const TestMod.#->##2) + 8 (= TestMod.#->##2 %₅) + 9 (call core.svec) + 10 (call core._typebody! %₅ %₉) + 11 (return core.nothing) +2 TestMod.#->##2 +3 (call core.svec %₂ core.Any) +4 (call core.svec) +5 (call core.svec %₃ %₄ :($(QuoteNode(:(#= line 1 =#))))) +6 --- method core.nothing %₅ + slots: [slot₁/#self#(!read) slot₂/destructured_arg_1 slot₃/iterstate slot₄/x slot₅/y(!read)] + 1 (call top.indexed_iterate slot₂/destructured_arg_1 1) + 2 (= slot₄/x (call core.getfield %₁ 1)) + 3 (= slot₃/iterstate (call core.getfield %₁ 2)) + 4 slot₃/iterstate + 5 (call top.indexed_iterate slot₂/destructured_arg_1 2 %₄) + 6 (= slot₅/y (call core.getfield %₅ 1)) + 7 TestMod.f + 8 slot₄/x + 9 (call %₇ %₈) + 10 (return %₉) +7 TestMod.#->##2 +8 (new %₇) +9 TestMod.iter +10 (call top.Filter %₈ %₉) +11 (call top.Generator top.identity %₁₀) +12 (return %₁₁) + +######################################## +# Use of placeholders in iteration vars +(1 for _ in xs) +#--------------------- +1 --- thunk + 1 (global TestMod.#->##3) + 2 (call core.svec) + 3 (call core.svec) + 4 (call core.svec) + 5 (call core._structtype TestMod :#->##3 %₂ %₃ %₄ false 0) + 6 (call core._setsuper! %₅ core.Function) + 7 (const TestMod.#->##3) + 8 (= TestMod.#->##3 %₅) + 9 (call core.svec) + 10 (call core._typebody! %₅ %₉) + 11 (return core.nothing) +2 TestMod.#->##3 +3 (call core.svec %₂ core.Any) +4 (call core.svec) +5 (call core.svec %₃ %₄ :($(QuoteNode(:(#= line 1 =#))))) +6 --- method core.nothing %₅ + slots: [slot₁/#self#(!read) slot₂/_(!read)] + 1 (return 1) +7 TestMod.#->##3 +8 (new %₇) +9 TestMod.xs +10 (call top.Generator %₈ %₉) +11 (return %₁₀) + +######################################## +# Error: Use of placeholders in body +(_ for _ in xs) +#--------------------- +LoweringError: +(_ for _ in xs) +#╙ ── all-underscore identifiers are write-only and their values cannot be used in expressions + +######################################## +# 1D generator with destructuring +(body for (x,_,y) in iter) +#--------------------- +1 --- thunk + 1 (global TestMod.#->##5) + 2 (call core.svec) + 3 (call core.svec) + 4 (call core.svec) + 5 (call core._structtype TestMod :#->##5 %₂ %₃ %₄ false 0) + 6 (call core._setsuper! %₅ core.Function) + 7 (const TestMod.#->##5) + 8 (= TestMod.#->##5 %₅) + 9 (call core.svec) + 10 (call core._typebody! %₅ %₉) + 11 (return core.nothing) +2 TestMod.#->##5 +3 (call core.svec %₂ core.Any) +4 (call core.svec) +5 (call core.svec %₃ %₄ :($(QuoteNode(:(#= line 1 =#))))) +6 --- method core.nothing %₅ + slots: [slot₁/#self#(!read) slot₂/destructured_arg_1 slot₃/iterstate slot₄/x(!read) slot₅/y(!read)] + 1 (call top.indexed_iterate slot₂/destructured_arg_1 1) + 2 (= slot₄/x (call core.getfield %₁ 1)) + 3 (= slot₃/iterstate (call core.getfield %₁ 2)) + 4 slot₃/iterstate + 5 (call top.indexed_iterate slot₂/destructured_arg_1 2 %₄) + 6 (call core.getfield %₅ 1) + 7 (= slot₃/iterstate (call core.getfield %₅ 2)) + 8 slot₃/iterstate + 9 (call top.indexed_iterate slot₂/destructured_arg_1 3 %₈) + 10 (= slot₅/y (call core.getfield %₉ 1)) + 11 TestMod.body + 12 (return %₁₁) +7 TestMod.#->##5 +8 (new %₇) +9 TestMod.iter +10 (call top.Generator %₈ %₉) +11 (return %₁₀) + +######################################## +# return permitted in quoted syntax in generator +(:(return x) for _ in iter) +#--------------------- +1 --- thunk + 1 (global TestMod.#->##6) + 2 (call core.svec) + 3 (call core.svec) + 4 (call core.svec) + 5 (call core._structtype TestMod :#->##6 %₂ %₃ %₄ false 0) + 6 (call core._setsuper! %₅ core.Function) + 7 (const TestMod.#->##6) + 8 (= TestMod.#->##6 %₅) + 9 (call core.svec) + 10 (call core._typebody! %₅ %₉) + 11 (return core.nothing) +2 TestMod.#->##6 +3 (call core.svec %₂ core.Any) +4 (call core.svec) +5 (call core.svec %₃ %₄ :($(QuoteNode(:(#= line 1 =#))))) +6 --- method core.nothing %₅ + slots: [slot₁/#self#(!read) slot₂/_(!read)] + 1 (call JuliaLowering.interpolate_ast (inert (return x))) + 2 (return %₁) +7 TestMod.#->##6 +8 (new %₇) +9 TestMod.iter +10 (call top.Generator %₈ %₉) +11 (return %₁₀) + +######################################## +# Error: `return` not permitted in generator body +((return x) + y for x in iter) +#--------------------- +LoweringError: +((return x) + y for x in iter) +# └──────┘ ── `return` not allowed inside comprehension or generator + +######################################## +# FIXME - error in nested closure conversion: Triply nested generator +((x,y,z) for x in 1:3 for y in 4:5 for z in 6:7) +#--------------------- +LoweringError: +((x,y,z) for x in 1:3 for y in 4:5 for z in 6:7) +# ╙ ── Found unexpected binding of kind argument + +Detailed provenance: +#₁₃/x +└─ x + └─ x + └─ @ :1 + + +######################################## +# Nested case with duplicate iteration variables +(x for x in 1:3 for x in 1:2) +#--------------------- +1 --- thunk + 1 (global TestMod.#->##8) + 2 (call core.svec) + 3 (call core.svec) + 4 (call core.svec) + 5 (call core._structtype TestMod :#->##8 %₂ %₃ %₄ false 0) + 6 (call core._setsuper! %₅ core.Function) + 7 (const TestMod.#->##8) + 8 (= TestMod.#->##8 %₅) + 9 (call core.svec) + 10 (call core._typebody! %₅ %₉) + 11 (return core.nothing) +2 --- thunk + 1 (global TestMod.#->#->##1) + 2 (call core.svec) + 3 (call core.svec) + 4 (call core.svec) + 5 (call core._structtype TestMod :#->#->##1 %₂ %₃ %₄ false 0) + 6 (call core._setsuper! %₅ core.Function) + 7 (const TestMod.#->#->##1) + 8 (= TestMod.#->#->##1 %₅) + 9 (call core.svec) + 10 (call core._typebody! %₅ %₉) + 11 (return core.nothing) +3 TestMod.#->#->##1 +4 (call core.svec %₃ core.Any) +5 (call core.svec) +6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 1 =#))))) +7 --- method core.nothing %₆ + slots: [slot₁/#self#(!read) slot₂/x slot₃/x] + 1 slot₂/x + 2 (= slot₃/x %₁) + 3 slot₃/x + 4 (return %₃) +8 TestMod.#->##8 +9 (call core.svec %₈ core.Any) +10 (call core.svec) +11 (call core.svec %₉ %₁₀ :($(QuoteNode(:(#= line 1 =#))))) +12 --- method core.nothing %₁₁ + slots: [slot₁/#self#(!read) slot₂/x(!read)] + 1 TestMod.#->#->##1 + 2 (new %₁) + 3 TestMod.: + 4 (call %₃ 1 2) + 5 (call top.Generator %₂ %₄) + 6 (return %₅) +13 TestMod.#->##8 +14 (new %₁₃) +15 TestMod.: +16 (call %₁₅ 1 3) +17 (call top.Generator %₁₄ %₁₆) +18 (call top.Flatten %₁₇) +19 (return %₁₈) + +######################################## +# Comprehension lowers to generator with collect +[x for x in xs] +#--------------------- +1 TestMod.xs +2 (call top.Generator top.identity %₁) +3 (call top.collect %₂) +4 (return %₃) + +######################################## +# Typed comprehension lowers to generator with collect +T[x for x in xs] +#--------------------- +1 TestMod.T +2 TestMod.xs +3 (call top.Generator top.identity %₂) +4 (call top.collect %₁ %₃) +5 (return %₄) + diff --git a/JuliaLowering/test/runtests.jl b/JuliaLowering/test/runtests.jl index 4ecfb5453ab24..f8a76bae4d55a 100644 --- a/JuliaLowering/test/runtests.jl +++ b/JuliaLowering/test/runtests.jl @@ -17,6 +17,7 @@ include("utils.jl") include("desugaring.jl") include("exceptions.jl") include("functions.jl") + include("generators.jl") include("import.jl") include("loops.jl") include("macros.jl") diff --git a/JuliaLowering/test/utils.jl b/JuliaLowering/test/utils.jl index 66634cc52fb52..d49229b26c399 100644 --- a/JuliaLowering/test/utils.jl +++ b/JuliaLowering/test/utils.jl @@ -125,8 +125,8 @@ function match_ir_test_case(case_str) length(inout) == 1 ? (inout[1], "") : error("Too many sections in IR test case") expect_error = startswith(description, "Error") - is_todo = startswith(description, "TODO") - (; expect_error=expect_error, is_todo=is_todo, + is_broken = startswith(description, "FIXME") + (; expect_error=expect_error, is_broken=is_broken, description=strip(description), input=strip(input), output=strip(output)) end @@ -152,7 +152,7 @@ function setup_ir_test_module(preamble) test_mod end -function format_ir_for_test(mod, description, input, expect_error=false, is_todo=false) +function format_ir_for_test(mod, description, input, expect_error=false, is_broken=false) ex = parsestmt(SyntaxTree, input) try if kind(ex) == K"macrocall" && kind(ex[1]) == K"MacroName" && ex[1].name_val == "@ast_" @@ -173,7 +173,7 @@ function format_ir_for_test(mod, description, input, expect_error=false, is_todo return sprint(io->Base.showerror(io, exc, show_detail=false)) elseif expect_error && (exc isa MacroExpansionError) return sprint(io->Base.showerror(io, exc)) - elseif is_todo + elseif is_broken return sprint(io->Base.showerror(io, exc)) else throw("Error in test case \"$description\"") @@ -184,8 +184,8 @@ end function test_ir_cases(filename::AbstractString) preamble, cases = read_ir_test_cases(filename) test_mod = setup_ir_test_module(preamble) - for (expect_error, is_todo, description, input, ref) in cases - if is_todo + for (expect_error, is_broken, description, input, ref) in cases + if is_broken continue end output = format_ir_for_test(test_mod, description, input, expect_error) @@ -213,9 +213,9 @@ function refresh_ir_test_cases(filename, pattern=nothing) println(io, preamble, "\n") println(io, "#*******************************************************************************") end - for (expect_error, is_todo, description, input, ref) in cases + for (expect_error, is_broken, description, input, ref) in cases if isnothing(pattern) || occursin(pattern, description) - ir = format_ir_for_test(test_mod, description, input, expect_error, is_todo) + ir = format_ir_for_test(test_mod, description, input, expect_error, is_broken) if rstrip(ir) != ref @info "Refreshing test case $(repr(description)) in $filename" end From 6acc425b7b6898884f77363735ecab1c876496d9 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sat, 25 Jan 2025 08:48:02 +1000 Subject: [PATCH 0957/1109] typed_comprehension -> for loop lowering for simple cases We inherit this hack from the flisp code, though it's unclear how much it's needed in the current compiler (needs measurement somehow). --- JuliaLowering/src/desugaring.jl | 80 +++++++++++++++++++++++++---- JuliaLowering/test/demo.jl | 2 +- JuliaLowering/test/generators.jl | 6 +++ JuliaLowering/test/generators_ir.jl | 59 ++++++++++++++++++--- 4 files changed, 129 insertions(+), 18 deletions(-) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 26ea38087c722..e63369a5bfbbb 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -752,6 +752,13 @@ function find_return(ex::SyntaxTree) end end +function check_no_return(ex) + r = find_return(ex) + if !isnothing(r) + throw(LoweringError(r, "`return` not allowed inside comprehension or generator")) + end +end + # Return true for nested tuples of the same identifiers function similar_tuples_or_identifiers(a, b) if kind(a) == K"tuple" && kind(b) == K"tuple" @@ -785,10 +792,7 @@ end function expand_generator(ctx, ex) @chk numchildren(ex) >= 2 body = ex[1] - body_ret = find_return(body) - if !isnothing(body_ret) - throw(LoweringError(body_ret, "`return` not allowed inside comprehension or generator")) - end + check_no_return(body) if numchildren(ex) > 2 # Uniquify outer vars by NameKey outervars_by_key = Dict{NameKey,typeof(ex)}() @@ -871,6 +875,57 @@ function expand_generator(ctx, ex) body end +function expand_comprehension_to_loops(ctx, ex) + @assert kind(ex) == K"typed_comprehension" + element_type = ex[1] + gen = ex[2] + @assert kind(gen) == K"generator" + body = gen[1] + check_no_return(body) + # TODO: check_no_break_continue + iterspecs = gen[2] + @assert kind(iterspecs) == K"iteration" + new_iterspecs = SyntaxList(ctx) + iters = SyntaxList(ctx) + iter_defs = SyntaxList(ctx) + for iterspec in children(iterspecs) + iter = emit_assign_tmp(iter_defs, ctx, iterspec[2], "iter") + push!(iters, iter) + push!(new_iterspecs, @ast ctx iterspec [K"in" iterspec[1] iter]) + end + # Lower to nested for loops + # layer = new_scope_layer(ctx) + idx = new_local_binding(ctx, iterspecs, "idx") + @ast ctx ex [K"block" + iter_defs... + full_iter := if length(iters) == 1 + iters[1] + else + [K"call" + "product"::K"top" + iters... + ] + end + iter_size := [K"call" "IteratorSize"::K"top" full_iter] + size_unknown := [K"call" "isa"::K"core" iter_size "SizeUnknown"::K"top"] + result := [K"call" "_array_for"::K"top" element_type full_iter iter_size] + [K"=" idx [K"call" "first"::K"top" [K"call" "LinearIndices"::K"top" result]]] + [K"for" [K"iteration" Iterators.reverse(new_iterspecs)...] + [K"block" + val := body + # TODO: inbounds setindex + [K"if" size_unknown + [K"call" "push!"::K"top" result val] + [K"call" "setindex!"::K"top" result val idx] + ] + #[K"call" "println"::K"top" [K"call" "typeof"::K"core" idx]] + [K"=" idx [K"call" "add_int"::K"top" idx 1::K"Integer"]] + ] + ] + result + ] +end + #------------------------------------------------------------------------------- # Expansion of array concatenation notation `[a b ; c d]` etc @@ -3560,12 +3615,17 @@ function expand_forms_2(ctx::DesugaringContext, ex::SyntaxTree, docs=nothing) elseif k == K"typed_comprehension" @chk numchildren(ex) == 2 @chk kind(ex[2]) == K"generator" - # TODO: Hack for early lowering of selected typed_comprehension - @ast ctx ex [K"call" - "collect"::K"top" - expand_forms_2(ctx, ex[1]) - expand_forms_2(ctx, ex[2]) - ] + if numchildren(ex[2]) == 2 && kind(ex[2][2]) == K"iteration" + # Hack to lower simple typed comprehensions to loops very early, + # greatly reducing the number of functions and load on the compiler + expand_forms_2(ctx, expand_comprehension_to_loops(ctx, ex)) + else + @ast ctx ex [K"call" + "collect"::K"top" + expand_forms_2(ctx, ex[1]) + expand_forms_2(ctx, ex[2]) + ] + end elseif k == K"generator" expand_forms_2(ctx, expand_generator(ctx, ex)) elseif k == K"->" || k == K"do" diff --git a/JuliaLowering/test/demo.jl b/JuliaLowering/test/demo.jl index 1402b67e7ba7b..39371a25b80cb 100644 --- a/JuliaLowering/test/demo.jl +++ b/JuliaLowering/test/demo.jl @@ -755,7 +755,7 @@ end # """ src = """ -ccall(:printf, Cint, (Cstring, Cstring...), "%s = %s\n", "2 + 2", "5") +Tuple[(x,y) for x in 1:2, y in 1:3] """ ex = parsestmt(SyntaxTree, src, filename="foo.jl") diff --git a/JuliaLowering/test/generators.jl b/JuliaLowering/test/generators.jl index 1a9cb539b4d3b..9688066fc1cf6 100644 --- a/JuliaLowering/test/generators.jl +++ b/JuliaLowering/test/generators.jl @@ -49,4 +49,10 @@ collect(x for x in 1:3 for x in 1:2) collect((z=y; y=100; z) for y in 1:3 for x in 1:2) """) == [1, 1, 2, 2, 3, 3] +# Simple typed comprehension lowered to for loops +@test JuliaLowering.include_string(test_mod, """ +Tuple{Int,Int}[(x,y) for x in 1:2, y in 1:3] +""") == [(1,1) (1,2) (1,3) + (2,1) (2,2) (2,3)] + end diff --git a/JuliaLowering/test/generators_ir.jl b/JuliaLowering/test/generators_ir.jl index 62bf7d862e986..47b6e0784bb6d 100644 --- a/JuliaLowering/test/generators_ir.jl +++ b/JuliaLowering/test/generators_ir.jl @@ -307,12 +307,57 @@ Detailed provenance: 4 (return %₃) ######################################## -# Typed comprehension lowers to generator with collect -T[x for x in xs] +# Simple typed comprehension lowers to for loop +T[(x,y) for x in xs, y in ys] #--------------------- -1 TestMod.T -2 TestMod.xs -3 (call top.Generator top.identity %₂) -4 (call top.collect %₁ %₃) -5 (return %₄) +1 TestMod.xs +2 TestMod.ys +3 (call top.product %₁ %₂) +4 (call top.IteratorSize %₃) +5 (call core.isa %₄ top.SizeUnknown) +6 TestMod.T +7 (call top._array_for %₆ %₃ %₄) +8 (call top.LinearIndices %₇) +9 (= slot₁/idx (call top.first %₈)) +10 (= slot₃/next (call top.iterate %₂)) +11 slot₃/next +12 (call core.=== %₁₁ core.nothing) +13 (call top.not_int %₁₂) +14 (gotoifnot %₁₃ label₅₀) +15 slot₃/next +16 (= slot₄/y (call core.getfield %₁₅ 1)) +17 (call core.getfield %₁₅ 2) +18 (= slot₂/next (call top.iterate %₁)) +19 slot₂/next +20 (call core.=== %₁₉ core.nothing) +21 (call top.not_int %₂₀) +22 (gotoifnot %₂₁ label₄₄) +23 slot₄/y +24 (= slot₆/y %₂₃) +25 slot₂/next +26 (= slot₅/x (call core.getfield %₂₅ 1)) +27 (call core.getfield %₂₅ 2) +28 slot₅/x +29 slot₆/y +30 (call core.tuple %₂₈ %₂₉) +31 (gotoifnot %₅ label₃₄) +32 (call top.push! %₇ %₃₀) +33 (goto label₃₆) +34 slot₁/idx +35 (call top.setindex! %₇ %₃₀ %₃₄) +36 slot₁/idx +37 (= slot₁/idx (call top.add_int %₃₆ 1)) +38 (= slot₂/next (call top.iterate %₁ %₂₇)) +39 slot₂/next +40 (call core.=== %₃₉ core.nothing) +41 (call top.not_int %₄₀) +42 (gotoifnot %₄₁ label₄₄) +43 (goto label₂₃) +44 (= slot₃/next (call top.iterate %₂ %₁₇)) +45 slot₃/next +46 (call core.=== %₄₅ core.nothing) +47 (call top.not_int %₄₆) +48 (gotoifnot %₄₇ label₅₀) +49 (goto label₁₅) +50 (return %₇) From 69f17d1efcc4340941bea8ba636d3b7f27560ec8 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Mon, 27 Jan 2025 16:49:11 +1000 Subject: [PATCH 0958/1109] Add `K"SourceLocation"` for richer source location literals Previously, we converted to LineNumberNode early in cases where the line information needs to be emitted into the IR (eg, functionloc in method signatures). However, this is lossy. `K"SourceLocation"` is a "source provenance literal" - it doesn't hold any data itself, but is merely a marker from which the source location can later be extracted from the provenance tree. Also factor out function argument matching of expand_function_def so it can be called separately for opaque closure desugaring. --- JuliaLowering/src/ast.jl | 2 +- JuliaLowering/src/desugaring.jl | 134 +++---- JuliaLowering/src/eval.jl | 2 + JuliaLowering/src/kinds.jl | 3 + JuliaLowering/src/linear_ir.jl | 2 +- JuliaLowering/src/syntax_graph.jl | 1 + JuliaLowering/test/assignments_ir.jl | 11 +- JuliaLowering/test/closures_ir.jl | 197 +++++----- JuliaLowering/test/decls_ir.jl | 9 +- JuliaLowering/test/functions_ir.jl | 514 +++++++++++++++------------ JuliaLowering/test/generators_ir.jl | 196 +++++----- JuliaLowering/test/macros_ir.jl | 18 +- JuliaLowering/test/scopes_ir.jl | 18 +- JuliaLowering/test/typedefs_ir.jl | 380 ++++++++++---------- 14 files changed, 798 insertions(+), 689 deletions(-) diff --git a/JuliaLowering/src/ast.jl b/JuliaLowering/src/ast.jl index 794fc56c72c03..3a88fec2dbe7a 100644 --- a/JuliaLowering/src/ast.jl +++ b/JuliaLowering/src/ast.jl @@ -146,7 +146,7 @@ function makeleaf(ctx, srcref, k::Kind, value; kws...) makeleaf(graph, srcref, k; id=value, kws...) elseif k == K"symbolic_label" makeleaf(graph, srcref, k; name_val=value, kws...) - elseif k == K"TOMBSTONE" + elseif k == K"TOMBSTONE" || k == K"SourceLocation" makeleaf(graph, srcref, k; kws...) else val = k == K"Integer" ? convert(Int, value) : diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index e63369a5bfbbb..e55a167484925 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -2063,45 +2063,60 @@ end #------------------------------------------------------------------------------- # Expansion of function definitions -function match_function_arg(full_ex) - name = nothing - type = nothing - default = nothing - is_slurp = false - ex = full_ex - while true - k = kind(ex) - if k == K"Identifier" || k == K"Placeholder" || k == K"tuple" - name = ex - break - elseif k == K"::" - @chk numchildren(ex) in (1,2) - if numchildren(ex) == 1 - type = ex[1] - else - name = ex[1] - type = ex[2] - end - break - elseif k == K"..." - @chk !is_slurp (full_ex,"nested `...` in function argument") - @chk numchildren(ex) == 1 - is_slurp = true - ex = ex[1] - elseif k == K"=" - if !isnothing(default) - throw(full_ex, "multiple defaults provided with `=` in function argument") - end - default = ex[2] - ex = ex[1] +function expand_function_arg(ctx, body_stmts, arg, is_last_arg) + ex = arg + + if kind(ex) == K"=" + default = ex[2] + ex = ex[1] + else + default = nothing + end + + if kind(ex) == K"..." + if !is_last_arg + throw(LoweringError(arg, "`...` may only be used for the last function argument")) + end + @chk numchildren(ex) == 1 + slurp_ex = ex + ex = ex[1] + else + slurp_ex = nothing + end + + if kind(ex) == K"::" + @chk numchildren(ex) in (1,2) + if numchildren(ex) == 1 + type = ex[1] + ex = @ast ctx ex "_"::K"Placeholder" else - throw(LoweringError(ex, "Invalid function argument")) + type = ex[2] + ex = ex[1] end + else + type = @ast ctx ex "Any"::K"core" + end + if !isnothing(slurp_ex) + type = @ast ctx slurp_ex [K"curly" "Vararg"::K"core" type] end - return (name=name, - type=type, - default=default, - is_slurp=is_slurp) + + k = kind(ex) + if k == K"tuple" + # Argument destructuring + is_nospecialize = getmeta(arg, :nospecialize, false) + name = new_local_binding(ctx, ex, "destructured_arg"; + kind=:argument, is_nospecialize=is_nospecialize) + push!(body_stmts, @ast ctx ex [ + K"local"(meta=CompileHints(:is_destructured_arg, true)) + [K"=" ex name] + ]) + elseif k == K"Identifier" || k == K"Placeholder" + name = ex + else + throw(LoweringError(ex, "Invalid function argument")) + end + + return (name, type, default, !isnothing(slurp_ex)) end # Expand `where` clause(s) of a function into (typevar_names, typevar_stmts) where @@ -2138,7 +2153,7 @@ function method_def_expr(ctx, srcref, callex, method_table, "svec" ::K"core" typevar_names... ] - QuoteNode(source_location(LineNumberNode, callex))::K"Value" + ::K"SourceLocation"(callex) ] [K"method" method_table @@ -2350,30 +2365,19 @@ function expand_function_def(ctx, ex, docs, rewrite_call=identity, rewrite_body= first_default = 0 arg_defaults = SyntaxList(ctx) for (i,arg) in enumerate(args) - info = match_function_arg(arg) - aname = !isnothing(info.name) ? info.name : @ast ctx arg "_"::K"Placeholder" - if kind(aname) == K"tuple" - # Argument destructuring - is_nospecialize = getmeta(arg, :nospecialize, false) - n = new_local_binding(ctx, aname, "destructured_arg_$i"; - kind=:argument, is_nospecialize=is_nospecialize) - push!(body_stmts, @ast ctx aname [ - K"local"(meta=CompileHints(:is_destructured_arg, true)) - [K"=" aname n] - ]) - aname = n - end + (aname, atype, default, is_slurp) = expand_function_arg(ctx, body_stmts, arg, + i == length(args)) push!(arg_names, aname) - atype = !isnothing(info.type) ? info.type : @ast ctx arg "Any"::K"core" - if info.is_slurp - if i != length(args) - throw(LoweringError(arg, "`...` may only be used for the last function argument")) - end - atype = @ast ctx arg [K"curly" "Vararg"::K"core" atype] - end - if isnothing(info.default) - if !isempty(arg_defaults) && !info.is_slurp + # TODO: Ideally, ensure side effects of evaluating arg_types only + # happen once - we should create an ssavar if there's any following + # defaults. (flisp lowering doesn't ensure this either). Beware if + # fixing this that optional_positional_defs! depends on filtering the + # *symbolic* representation of arg_types. + push!(arg_types, atype) + + if isnothing(default) + if !isempty(arg_defaults) && !is_slurp # TODO: Referring to multiple pieces of syntax in one error message is necessary. # TODO: Poison ASTs with error nodes and continue rather than immediately throwing. # @@ -2391,14 +2395,8 @@ function expand_function_def(ctx, ex, docs, rewrite_call=identity, rewrite_body= if isempty(arg_defaults) first_default = i end - push!(arg_defaults, info.default) + push!(arg_defaults, default) end - # TODO: Ideally, ensure side effects of evaluating arg_types only - # happen once - we should create an ssavar if there's any following - # defaults. (flisp lowering doesn't ensure this either). Beware if - # fixing this that optional_positional_defs! depends on filtering the - # *symbolic* representation of arg_types. - push!(arg_types, atype) end if !isnothing(return_type) @@ -2471,6 +2469,8 @@ function expand_function_def(ctx, ex, docs, rewrite_call=identity, rewrite_body= ] end +#------------------------------------------------------------------------------- +# Anon function syntax function expand_arrow_arglist(ctx, arglist, arrowname) k = kind(arglist) if k == K"where" @@ -3288,7 +3288,7 @@ function expand_struct_def(ctx, ex, docs) bind_docs!::K"Value" struct_name isnothing(docs) ? nothing_(ctx, ex) : docs[1] - QuoteNode(source_location(LineNumberNode, ex))::K"Value" + ::K"SourceLocation"(ex) [K"=" "field_docs"::K"Identifier" [K"call" "svec"::K"core" field_docs...] diff --git a/JuliaLowering/src/eval.jl b/JuliaLowering/src/eval.jl index 00c0340da1811..327025702a4a0 100644 --- a/JuliaLowering/src/eval.jl +++ b/JuliaLowering/src/eval.jl @@ -218,6 +218,8 @@ function to_lowered_expr(mod, ex, ssa_offset=0) # TODO: Should we even have plain identifiers at this point or should # they all effectively be resolved into GlobalRef earlier? Symbol(ex.name_val) + elseif k == K"SourceLocation" + QuoteNode(source_location(LineNumberNode, ex)) elseif k == K"Symbol" QuoteNode(Symbol(ex.name_val)) elseif k == K"slot" diff --git a/JuliaLowering/src/kinds.jl b/JuliaLowering/src/kinds.jl index 60a9a7f0c4813..f3c160578c4a9 100644 --- a/JuliaLowering/src/kinds.jl +++ b/JuliaLowering/src/kinds.jl @@ -60,6 +60,9 @@ function _register_kinds() "top" "core" "lambda" + # "A source location literal" - a node which exists only to record + # a sourceref + "SourceLocation" # [K"function_decl" name] # Declare a zero-method generic function with global `name` or # creates a closure object and assigns it to the local `name`. diff --git a/JuliaLowering/src/linear_ir.jl b/JuliaLowering/src/linear_ir.jl index ef44bc6e6540a..e04b875404598 100644 --- a/JuliaLowering/src/linear_ir.jl +++ b/JuliaLowering/src/linear_ir.jl @@ -563,7 +563,7 @@ function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) k = kind(ex) if k == K"BindingId" || is_literal(k) || k == K"quote" || k == K"inert" || k == K"top" || k == K"core" || k == K"Value" || k == K"Symbol" || - k == K"Placeholder" + k == K"Placeholder" || k == K"SourceLocation" # TODO: other kinds: copyast $ globalref thismodule cdecl stdcall fastcall thiscall llvmcall if needs_value && k == K"Placeholder" # TODO: ensure outterref, globalref work here diff --git a/JuliaLowering/src/syntax_graph.jl b/JuliaLowering/src/syntax_graph.jl index 552a7a36e8872..7568310a15257 100644 --- a/JuliaLowering/src/syntax_graph.jl +++ b/JuliaLowering/src/syntax_graph.jl @@ -434,6 +434,7 @@ function _value_string(ex) k == K"static_parameter" ? "static_parameter" : k == K"symbolic_label" ? "label:$(ex.name_val)" : k == K"symbolic_goto" ? "goto:$(ex.name_val)" : + k == K"SourceLocation" ? "SourceLocation:$(JuliaSyntax.filename(ex)):$(join(source_location(ex), ':'))" : repr(get(ex, :value, nothing)) id = get(ex, :var_id, nothing) if isnothing(id) diff --git a/JuliaLowering/test/assignments_ir.jl b/JuliaLowering/test/assignments_ir.jl index 0959f819d3169..80a419ce5d8f7 100644 --- a/JuliaLowering/test/assignments_ir.jl +++ b/JuliaLowering/test/assignments_ir.jl @@ -35,15 +35,16 @@ end 3 (call core.Typeof %₂) 4 (call core.svec %₃) 5 (call core.svec) -6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 3 =#))))) -7 --- method core.nothing %₆ +6 SourceLocation::3:9 +7 (call core.svec %₄ %₅ %₆) +8 --- method core.nothing %₇ slots: [slot₁/#self#(!read) slot₂/c(!read)] 1 TestMod.d 2 (= slot₂/c %₁) 3 (return %₁) -8 TestMod.b -9 (= slot₁/a %₈) -10 (return %₈) +9 TestMod.b +10 (= slot₁/a %₉) +11 (return %₉) ######################################## # a.b = ... => setproperty! assignment diff --git a/JuliaLowering/test/closures_ir.jl b/JuliaLowering/test/closures_ir.jl index 978d70ab1f7f6..dc16fcb40da9e 100644 --- a/JuliaLowering/test/closures_ir.jl +++ b/JuliaLowering/test/closures_ir.jl @@ -24,8 +24,9 @@ end 3 TestMod.#f##0 4 (call core.svec %₃ core.Any) 5 (call core.svec) -6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 3 =#))))) -7 --- method core.nothing %₆ +6 SourceLocation::3:14 +7 (call core.svec %₄ %₅ %₆) +8 --- method core.nothing %₇ slots: [slot₁/#self#(!read) slot₂/y slot₃/x(!read)] 1 TestMod.+ 2 (call core.getfield slot₁/#self# :x) @@ -37,14 +38,14 @@ end 8 (call core.getfield %₂ :contents) 9 (call %₁ %₈ slot₂/y) 10 (return %₉) -8 1 -9 slot₂/x -10 (call core.setfield! %₉ :contents %₈) -11 TestMod.#f##0 -12 slot₂/x -13 (= slot₁/f (new %₁₁ %₁₂)) -14 slot₁/f -15 (return %₁₄) +9 1 +10 slot₂/x +11 (call core.setfield! %₁₀ :contents %₉) +12 TestMod.#f##0 +13 slot₂/x +14 (= slot₁/f (new %₁₂ %₁₃)) +15 slot₁/f +16 (return %₁₅) ######################################## # Closure which sets the value of a captured variable @@ -71,21 +72,22 @@ end 3 TestMod.#f##1 4 (call core.svec %₃ core.Any) 5 (call core.svec) -6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 3 =#))))) -7 --- method core.nothing %₆ +6 SourceLocation::3:14 +7 (call core.svec %₄ %₅ %₆) +8 --- method core.nothing %₇ slots: [slot₁/#self#(!read) slot₂/y(!read)] 1 2 2 (call core.getfield slot₁/#self# :x) 3 (call core.setfield! %₂ :contents %₁) 4 (return %₁) -8 1 -9 slot₂/x -10 (call core.setfield! %₉ :contents %₈) -11 TestMod.#f##1 -12 slot₂/x -13 (= slot₁/f (new %₁₁ %₁₂)) -14 slot₁/f -15 (return %₁₄) +9 1 +10 slot₂/x +11 (call core.setfield! %₁₀ :contents %₉) +12 TestMod.#f##1 +13 slot₂/x +14 (= slot₁/f (new %₁₂ %₁₃)) +15 slot₁/f +16 (return %₁₅) ######################################## # Function where arguments are captured into a closure and assigned @@ -113,19 +115,21 @@ end 3 TestMod.#f#g##0 4 (call core.svec %₃) 5 (call core.svec) -6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 2 =#))))) -7 --- method core.nothing %₆ +6 SourceLocation::2:14 +7 (call core.svec %₄ %₅ %₆) +8 --- method core.nothing %₇ slots: [slot₁/#self#(!read)] 1 10 2 (call core.getfield slot₁/#self# :x) 3 (call core.setfield! %₂ :contents %₁) 4 (return %₁) -8 TestMod.f -9 (call core.Typeof %₈) -10 (call core.svec %₉ core.Any) -11 (call core.svec) -12 (call core.svec %₁₀ %₁₁ :($(QuoteNode(:(#= line 1 =#))))) -13 --- method core.nothing %₁₂ +9 TestMod.f +10 (call core.Typeof %₉) +11 (call core.svec %₁₀ core.Any) +12 (call core.svec) +13 SourceLocation::1:10 +14 (call core.svec %₁₁ %₁₂ %₁₃) +15 --- method core.nothing %₁₄ slots: [slot₁/#self#(!read) slot₂/x slot₃/g(called) slot₄/x(!read)] 1 (= slot₂/x (call core.Box slot₂/x)) 2 TestMod.#f#g##0 @@ -141,8 +145,8 @@ end 12 slot₄/x 13 (call core.getfield %₇ :contents) 14 (return %₁₃) -14 TestMod.f -15 (return %₁₄) +16 TestMod.f +17 (return %₁₆) ######################################## # Closure where a local `x` is captured but not boxed @@ -170,18 +174,20 @@ end 3 TestMod.#f#g##1 4 (call core.svec %₃) 5 (call core.svec) -6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 2 =#))))) -7 --- method core.nothing %₆ +6 SourceLocation::2:14 +7 (call core.svec %₄ %₅ %₆) +8 --- method core.nothing %₇ slots: [slot₁/#self#(!read) slot₂/y(!read)] 1 (call core.getfield slot₁/#self# :x) 2 (= slot₂/y %₁) 3 (return %₁) -8 TestMod.f -9 (call core.Typeof %₈) -10 (call core.svec %₉ core.Any) -11 (call core.svec) -12 (call core.svec %₁₀ %₁₁ :($(QuoteNode(:(#= line 1 =#))))) -13 --- method core.nothing %₁₂ +9 TestMod.f +10 (call core.Typeof %₉) +11 (call core.svec %₁₀ core.Any) +12 (call core.svec) +13 SourceLocation::1:10 +14 (call core.svec %₁₁ %₁₂ %₁₃) +15 --- method core.nothing %₁₄ slots: [slot₁/#self#(!read) slot₂/x slot₃/g slot₄/z(!read)] 1 TestMod.#f#g##1 2 (call core.typeof slot₂/x) @@ -191,8 +197,8 @@ end 6 slot₂/x 7 (= slot₄/z %₆) 8 (return %₆) -14 TestMod.f -15 (return %₁₄) +16 TestMod.f +17 (return %₁₆) ######################################## # Closure where a static parameter of an outer function is captured @@ -219,22 +225,24 @@ end 3 TestMod.#f#g##2 4 (call core.svec %₃) 5 (call core.svec) -6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 2 =#))))) -7 --- method core.nothing %₆ +6 SourceLocation::2:14 +7 (call core.svec %₄ %₅ %₆) +8 --- method core.nothing %₇ slots: [slot₁/#self#(!read)] 1 TestMod.use 2 (call core.getfield slot₁/#self# :T) 3 (call %₁ %₂) 4 (return %₃) -8 (= slot₁/T (call core.TypeVar :T)) -9 TestMod.f -10 (call core.Typeof %₉) -11 slot₁/T -12 (call core.svec %₁₀ %₁₁) -13 slot₁/T -14 (call core.svec %₁₃) -15 (call core.svec %₁₂ %₁₄ :($(QuoteNode(:(#= line 1 =#))))) -16 --- method core.nothing %₁₅ +9 (= slot₁/T (call core.TypeVar :T)) +10 TestMod.f +11 (call core.Typeof %₁₀) +12 slot₁/T +13 (call core.svec %₁₁ %₁₂) +14 slot₁/T +15 (call core.svec %₁₄) +16 SourceLocation::1:10 +17 (call core.svec %₁₃ %₁₅ %₁₆) +18 --- method core.nothing %₁₇ slots: [slot₁/#self#(!read) slot₂/_(!read) slot₃/g] 1 TestMod.#f#g##2 2 static_parameter₁ @@ -244,8 +252,8 @@ end 6 (= slot₃/g (new %₄ %₅)) 7 slot₃/g 8 (return %₇) -17 TestMod.f -18 (return %₁₇) +19 TestMod.f +20 (return %₁₉) ######################################## # Closure captures with `isdefined` @@ -278,8 +286,9 @@ end 3 TestMod.#f#g##3 4 (call core.svec %₃) 5 (call core.svec) -6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 2 =#))))) -7 --- method core.nothing %₆ +6 SourceLocation::2:14 +7 (call core.svec %₄ %₅ %₆) +8 --- method core.nothing %₇ slots: [slot₁/#self#(!read) slot₂/z] 1 (= slot₂/z 3) 2 (call core.getfield slot₁/#self# :y) @@ -287,12 +296,13 @@ end 4 (isdefined slot₂/z) 5 (call core.tuple true %₃ %₄) 6 (return %₅) -8 TestMod.f -9 (call core.Typeof %₈) -10 (call core.svec %₉ core.Any) -11 (call core.svec) -12 (call core.svec %₁₀ %₁₁ :($(QuoteNode(:(#= line 1 =#))))) -13 --- method core.nothing %₁₂ +9 TestMod.f +10 (call core.Typeof %₉) +11 (call core.svec %₁₀ core.Any) +12 (call core.svec) +13 SourceLocation::1:10 +14 (call core.svec %₁₁ %₁₂ %₁₃) +15 --- method core.nothing %₁₄ slots: [slot₁/#self#(!read) slot₂/x slot₃/g slot₄/y] 1 (= slot₄/y (call core.Box)) 2 TestMod.#f#g##3 @@ -308,11 +318,11 @@ end 12 (call core.isdefined %₁₁ :contents) 13 (call core.tuple %₁₂ true) 14 (return %₁₃) -14 TestMod.f -15 (return %₁₄) +16 TestMod.f +17 (return %₁₆) ######################################## -# Nested captures of arguments +# FIXME: Nested captures of arguments function f(x) function g(y) function h(z) @@ -320,6 +330,19 @@ function f(x) end end end +#--------------------- +LoweringError: +function f(x) +# ╙ ── Found unexpected binding of kind argument + function g(y) + function h(z) + +Detailed provenance: +#₈/x +└─ x + └─ x + └─ @ :1 + ######################################## # Global method capturing local variables @@ -339,8 +362,9 @@ end 7 (call core.Typeof %₆) 8 (call core.svec %₇) 9 (call core.svec) -10 (call core.svec %₈ %₉ :($(QuoteNode(:(#= line 3 =#))))) -11 --- code_info +10 SourceLocation::3:14 +11 (call core.svec %₈ %₉ %₁₀) +12 --- code_info slots: [slot₁/#self#(!read) slot₂/x(!read)] 1 TestMod.+ 2 (captured_local 1) @@ -354,12 +378,12 @@ end 10 (captured_local 1) 11 (call core.setfield! %₁₀ :contents %₉) 12 (return %₉) -12 slot₁/x -13 (call core.svec %₁₂) -14 (call JuliaLowering.replace_captured_locals! %₁₁ %₁₃) -15 --- method core.nothing %₁₀ %₁₄ -16 TestMod.f -17 (return %₁₆) +13 slot₁/x +14 (call core.svec %₁₃) +15 (call JuliaLowering.replace_captured_locals! %₁₂ %₁₄) +16 --- method core.nothing %₁₁ %₁₅ +17 TestMod.f +18 (return %₁₇) ######################################## # Anonymous function syntax with -> @@ -382,13 +406,14 @@ x -> x*x 4 TestMod.#->##0 5 (call core.svec %₄ core.Any) 6 (call core.svec) -7 (call core.svec %₅ %₆ :($(QuoteNode(:(#= line 1 =#))))) -8 --- method core.nothing %₇ +7 SourceLocation::1:1 +8 (call core.svec %₅ %₆ %₇) +9 --- method core.nothing %₈ slots: [slot₁/#self#(!read) slot₂/x] 1 TestMod.* 2 (call %₁ slot₂/x slot₂/x) 3 (return %₂) -9 (return %₃) +10 (return %₃) ######################################## # Anonymous function syntax with `function` @@ -413,13 +438,14 @@ end 4 TestMod.##anon###0 5 (call core.svec %₄ core.Any) 6 (call core.svec) -7 (call core.svec %₅ %₆ :($(QuoteNode(:(#= line 1 =#))))) -8 --- method core.nothing %₇ +7 SourceLocation::1:10 +8 (call core.svec %₅ %₆ %₇) +9 --- method core.nothing %₈ slots: [slot₁/#self#(!read) slot₂/x] 1 TestMod.* 2 (call %₁ slot₂/x slot₂/x) 3 (return %₂) -9 (return %₃) +10 (return %₃) ######################################## # `do` blocks @@ -447,17 +473,18 @@ end 7 TestMod.#do##0 8 (call core.svec %₇ core.Any) 9 (call core.svec) -10 (call core.svec %₈ %₉ :($(QuoteNode(:(#= line 1 =#))))) -11 --- method core.nothing %₁₀ +10 SourceLocation::1:13 +11 (call core.svec %₈ %₉ %₁₀) +12 --- method core.nothing %₁₁ slots: [slot₁/#self#(!read) slot₂/y] 1 TestMod.+ 2 (call %₁ slot₂/y 2) 3 (return %₂) -12 TestMod.#do##0 -13 (new %₁₂) -14 TestMod.x -15 (call core.kwcall %₅ %₁ %₁₃ %₁₄) -16 (return %₁₅) +13 TestMod.#do##0 +14 (new %₁₃) +15 TestMod.x +16 (call core.kwcall %₅ %₁ %₁₄ %₁₅) +17 (return %₁₆) ######################################## # Error: Attempt to add methods to a function argument diff --git a/JuliaLowering/test/decls_ir.jl b/JuliaLowering/test/decls_ir.jl index ec2e3f21c76e7..c284ff77edc07 100644 --- a/JuliaLowering/test/decls_ir.jl +++ b/JuliaLowering/test/decls_ir.jl @@ -128,8 +128,9 @@ end 3 (call core.Typeof %₂) 4 (call core.svec %₃ core.Any) 5 (call core.svec) -6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 1 =#))))) -7 --- method core.nothing %₆ +6 SourceLocation::1:10 +7 (call core.svec %₄ %₅ %₆) +8 --- method core.nothing %₇ slots: [slot₁/#self#(!read) slot₂/x slot₃/tmp(!read) slot₄/tmp(!read)] 1 1 2 (= slot₃/tmp %₁) @@ -161,8 +162,8 @@ end 28 (= slot₂/x %₂₇) 29 slot₂/x 30 (return %₂₉) -8 TestMod.f -9 (return %₈) +9 TestMod.f +10 (return %₉) ######################################## # Error: global type decls only allowed at top level diff --git a/JuliaLowering/test/functions_ir.jl b/JuliaLowering/test/functions_ir.jl index 065d0b27d294f..6bdd46dac3c05 100644 --- a/JuliaLowering/test/functions_ir.jl +++ b/JuliaLowering/test/functions_ir.jl @@ -9,14 +9,15 @@ end 3 (call core.Typeof %₂) 4 (call core.svec %₃ core.Any core.Any core.Any) 5 (call core.svec) -6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 1 =#))))) -7 --- method core.nothing %₆ +6 SourceLocation::1:10 +7 (call core.svec %₄ %₅ %₆) +8 --- method core.nothing %₇ slots: [slot₁/#self#(!read) slot₂/x slot₃/_(!read) slot₄/y] 1 TestMod.+ 2 (call %₁ slot₂/x slot₄/y) 3 (return %₂) -8 TestMod.f -9 (return %₈) +9 TestMod.f +10 (return %₉) ######################################## # Functions with argument types only, no name @@ -30,13 +31,14 @@ end 4 TestMod.T 5 (call core.svec %₃ %₄ core.Any) 6 (call core.svec) -7 (call core.svec %₅ %₆ :($(QuoteNode(:(#= line 1 =#))))) -8 --- method core.nothing %₇ +7 SourceLocation::1:10 +8 (call core.svec %₅ %₆ %₇) +9 --- method core.nothing %₈ slots: [slot₁/#self#(!read) slot₂/_(!read) slot₃/x] 1 slot₃/x 2 (return %₁) -9 TestMod.f -10 (return %₉) +10 TestMod.f +11 (return %₁₀) ######################################## # Functions argument types @@ -50,13 +52,14 @@ end 4 TestMod.T 5 (call core.svec %₃ core.Any %₄) 6 (call core.svec) -7 (call core.svec %₅ %₆ :($(QuoteNode(:(#= line 1 =#))))) -8 --- method core.nothing %₇ +7 SourceLocation::1:10 +8 (call core.svec %₅ %₆ %₇) +9 --- method core.nothing %₈ slots: [slot₁/#self#(!read) slot₂/x(!read) slot₃/y(!read)] 1 TestMod.body 2 (return %₁) -9 TestMod.f -10 (return %₉) +10 TestMod.f +11 (return %₁₀) ######################################## # Functions with slurp of Any @@ -70,13 +73,14 @@ end 4 (call core.apply_type core.Vararg core.Any) 5 (call core.svec %₃ core.Any %₄) 6 (call core.svec) -7 (call core.svec %₅ %₆ :($(QuoteNode(:(#= line 1 =#))))) -8 --- method core.nothing %₇ +7 SourceLocation::1:10 +8 (call core.svec %₅ %₆ %₇) +9 --- method core.nothing %₈ slots: [slot₁/#self#(!read) slot₂/x(!read) slot₃/ys(!read)] 1 TestMod.body 2 (return %₁) -9 TestMod.f -10 (return %₉) +10 TestMod.f +11 (return %₁₀) ######################################## # Functions with slurp of T @@ -91,13 +95,14 @@ end 5 (call core.apply_type core.Vararg %₄) 6 (call core.svec %₃ core.Any %₅) 7 (call core.svec) -8 (call core.svec %₆ %₇ :($(QuoteNode(:(#= line 1 =#))))) -9 --- method core.nothing %₈ +8 SourceLocation::1:10 +9 (call core.svec %₆ %₇ %₈) +10 --- method core.nothing %₉ slots: [slot₁/#self#(!read) slot₂/x(!read) slot₃/ys(!read)] 1 TestMod.body 2 (return %₁) -10 TestMod.f -11 (return %₁₀) +11 TestMod.f +12 (return %₁₁) ######################################## # Error: Function with slurp not in last position arg @@ -131,16 +136,17 @@ end 12 slot₃/V 13 slot₁/T 14 (call core.svec %₁₁ %₁₂ %₁₃) -15 (call core.svec %₁₀ %₁₄ :($(QuoteNode(:(#= line 1 =#))))) -16 --- method core.nothing %₁₅ +15 SourceLocation::1:10 +16 (call core.svec %₁₀ %₁₄ %₁₅) +17 --- method core.nothing %₁₆ slots: [slot₁/#self#(!read) slot₂/_(!read) slot₃/_(!read) slot₄/_(!read)] 1 static_parameter₃ 2 static_parameter₁ 3 static_parameter₂ 4 (call core.tuple %₁ %₂ %₃) 5 (return %₄) -17 TestMod.f -18 (return %₁₇) +18 TestMod.f +19 (return %₁₈) ######################################## # Static parameter with bounds and used with apply_type in argument @@ -160,13 +166,14 @@ end 10 (call core.svec %₆ %₉) 11 slot₁/T 12 (call core.svec %₁₁) -13 (call core.svec %₁₀ %₁₂ :($(QuoteNode(:(#= line 1 =#))))) -14 --- method core.nothing %₁₃ +13 SourceLocation::1:10 +14 (call core.svec %₁₀ %₁₂ %₁₃) +15 --- method core.nothing %₁₄ slots: [slot₁/#self#(!read) slot₂/_(!read)] 1 static_parameter₁ 2 (return %₁) -15 TestMod.f -16 (return %₁₅) +16 TestMod.f +17 (return %₁₆) ######################################## # Return types @@ -182,8 +189,9 @@ end 3 (call core.Typeof %₂) 4 (call core.svec %₃ core.Any) 5 (call core.svec) -6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 1 =#))))) -7 --- method core.nothing %₆ +6 SourceLocation::1:10 +7 (call core.svec %₄ %₅ %₆) +8 --- method core.nothing %₇ slots: [slot₁/#self#(!read) slot₂/x slot₃/tmp(!read)] 1 TestMod.Int 2 (gotoifnot slot₂/x label₃) @@ -197,8 +205,8 @@ end 10 (= slot₃/tmp (call core.typeassert %₉ %₁)) 11 slot₃/tmp 12 (return %₁₁) -8 TestMod.f -9 (return %₈) +9 TestMod.f +10 (return %₉) ######################################## # Callable type @@ -209,12 +217,13 @@ end 1 TestMod.T 2 (call core.svec %₁ core.Any) 3 (call core.svec) -4 (call core.svec %₂ %₃ :($(QuoteNode(:(#= line 1 =#))))) -5 --- method core.nothing %₄ +4 SourceLocation::1:10 +5 (call core.svec %₂ %₃ %₄) +6 --- method core.nothing %₅ slots: [slot₁/#self#(!read) slot₂/x] 1 slot₂/x 2 (return %₁) -6 (return core.nothing) +7 (return core.nothing) ######################################## # Callable type with instance @@ -225,12 +234,13 @@ end 1 TestMod.T 2 (call core.svec %₁ core.Any) 3 (call core.svec) -4 (call core.svec %₂ %₃ :($(QuoteNode(:(#= line 1 =#))))) -5 --- method core.nothing %₄ +4 SourceLocation::1:10 +5 (call core.svec %₂ %₃ %₄) +6 --- method core.nothing %₅ slots: [slot₁/y slot₂/x] 1 (call core.tuple slot₁/y slot₂/x) 2 (return %₁) -6 (return core.nothing) +7 (return core.nothing) ######################################## # `where` params used in callable object type @@ -245,12 +255,13 @@ end 5 (call core.svec %₄) 6 slot₁/T 7 (call core.svec %₆) -8 (call core.svec %₅ %₇ :($(QuoteNode(:(#= line 1 =#))))) -9 --- method core.nothing %₈ +8 SourceLocation::1:10 +9 (call core.svec %₅ %₇ %₈) +10 --- method core.nothing %₉ slots: [slot₁/x(!read)] 1 static_parameter₁ 2 (return %₁) -10 (return core.nothing) +11 (return core.nothing) ######################################## # Function with module ref in name @@ -262,11 +273,12 @@ end 3 (call core.Typeof %₂) 4 (call core.svec %₃) 5 (call core.svec) -6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 1 =#))))) -7 --- method core.nothing %₆ +6 SourceLocation::1:10 +7 (call core.svec %₄ %₅ %₆) +8 --- method core.nothing %₇ slots: [slot₁/#self#(!read)] 1 (return core.nothing) -8 (return core.nothing) +9 (return core.nothing) ######################################## # Error: Invalid function name ccall @@ -378,36 +390,39 @@ end 4 TestMod.T 5 (call core.svec %₃ %₄) 6 (call core.svec) -7 (call core.svec %₅ %₆ :($(QuoteNode(:(#= line 1 =#))))) -8 --- method core.nothing %₇ +7 SourceLocation::1:10 +8 (call core.svec %₅ %₆ %₇) +9 --- method core.nothing %₈ slots: [slot₁/#self#(called) slot₂/x] 1 (call slot₁/#self# slot₂/x 1 2) 2 (return %₁) -9 TestMod.f -10 (call core.Typeof %₉) -11 TestMod.T -12 TestMod.S -13 (call core.svec %₁₀ %₁₁ %₁₂) -14 (call core.svec) -15 (call core.svec %₁₃ %₁₄ :($(QuoteNode(:(#= line 1 =#))))) -16 --- method core.nothing %₁₅ +10 TestMod.f +11 (call core.Typeof %₁₀) +12 TestMod.T +13 TestMod.S +14 (call core.svec %₁₁ %₁₂ %₁₃) +15 (call core.svec) +16 SourceLocation::1:10 +17 (call core.svec %₁₄ %₁₅ %₁₆) +18 --- method core.nothing %₁₇ slots: [slot₁/#self#(called) slot₂/x slot₃/y] 1 (call slot₁/#self# slot₂/x slot₃/y 2) 2 (return %₁) -17 TestMod.f -18 (call core.Typeof %₁₇) -19 TestMod.T -20 TestMod.S -21 TestMod.U -22 (call core.svec %₁₈ %₁₉ %₂₀ %₂₁) -23 (call core.svec) -24 (call core.svec %₂₂ %₂₃ :($(QuoteNode(:(#= line 1 =#))))) -25 --- method core.nothing %₂₄ +19 TestMod.f +20 (call core.Typeof %₁₉) +21 TestMod.T +22 TestMod.S +23 TestMod.U +24 (call core.svec %₂₀ %₂₁ %₂₂ %₂₃) +25 (call core.svec) +26 SourceLocation::1:10 +27 (call core.svec %₂₄ %₂₅ %₂₆) +28 --- method core.nothing %₂₇ slots: [slot₁/#self#(!read) slot₂/x slot₃/y slot₄/z(!read)] 1 (call core.tuple slot₂/x slot₃/y) 2 (return %₁) -26 TestMod.f -27 (return %₂₆) +29 TestMod.f +30 (return %₂₉) ######################################## # Default positional args which depend on other args @@ -420,31 +435,34 @@ end 3 (call core.Typeof %₂) 4 (call core.svec %₃) 5 (call core.svec) -6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 1 =#))))) -7 --- method core.nothing %₆ +6 SourceLocation::1:10 +7 (call core.svec %₄ %₅ %₆) +8 --- method core.nothing %₇ slots: [slot₁/#self#(called)] 1 (call slot₁/#self# 1) 2 (return %₁) -8 TestMod.f -9 (call core.Typeof %₈) -10 (call core.svec %₉ core.Any) -11 (call core.svec) -12 (call core.svec %₁₀ %₁₁ :($(QuoteNode(:(#= line 1 =#))))) -13 --- method core.nothing %₁₂ +9 TestMod.f +10 (call core.Typeof %₉) +11 (call core.svec %₁₀ core.Any) +12 (call core.svec) +13 SourceLocation::1:10 +14 (call core.svec %₁₁ %₁₂ %₁₃) +15 --- method core.nothing %₁₄ slots: [slot₁/#self#(called) slot₂/x] 1 (call slot₁/#self# slot₂/x slot₂/x) 2 (return %₁) -14 TestMod.f -15 (call core.Typeof %₁₄) -16 (call core.svec %₁₅ core.Any core.Any) -17 (call core.svec) -18 (call core.svec %₁₆ %₁₇ :($(QuoteNode(:(#= line 1 =#))))) -19 --- method core.nothing %₁₈ +16 TestMod.f +17 (call core.Typeof %₁₆) +18 (call core.svec %₁₇ core.Any core.Any) +19 (call core.svec) +20 SourceLocation::1:10 +21 (call core.svec %₁₈ %₁₉ %₂₀) +22 --- method core.nothing %₂₁ slots: [slot₁/#self#(!read) slot₂/x slot₃/y] 1 (call core.tuple slot₂/x slot₃/y) 2 (return %₁) -20 TestMod.f -21 (return %₂₀) +23 TestMod.f +24 (return %₂₃) ######################################## # Default positional args with missing arg names (implicit placeholders) @@ -458,33 +476,36 @@ end 4 TestMod.Int 5 (call core.svec %₃ %₄) 6 (call core.svec) -7 (call core.svec %₅ %₆ :($(QuoteNode(:(#= line 1 =#))))) -8 --- method core.nothing %₇ +7 SourceLocation::1:10 +8 (call core.svec %₅ %₆ %₇) +9 --- method core.nothing %₈ slots: [slot₁/#self#(called) slot₂/_] 1 (call slot₁/#self# slot₂/_ 1 2) 2 (return %₁) -9 TestMod.f -10 (call core.Typeof %₉) -11 TestMod.Int -12 (call core.svec %₁₀ %₁₁ core.Any) -13 (call core.svec) -14 (call core.svec %₁₂ %₁₃ :($(QuoteNode(:(#= line 1 =#))))) -15 --- method core.nothing %₁₄ +10 TestMod.f +11 (call core.Typeof %₁₀) +12 TestMod.Int +13 (call core.svec %₁₁ %₁₂ core.Any) +14 (call core.svec) +15 SourceLocation::1:10 +16 (call core.svec %₁₃ %₁₄ %₁₅) +17 --- method core.nothing %₁₆ slots: [slot₁/#self#(called) slot₂/_ slot₃/y] 1 (call slot₁/#self# slot₂/_ slot₃/y 2) 2 (return %₁) -16 TestMod.f -17 (call core.Typeof %₁₆) -18 TestMod.Int -19 (call core.svec %₁₇ %₁₈ core.Any core.Any) -20 (call core.svec) -21 (call core.svec %₁₉ %₂₀ :($(QuoteNode(:(#= line 1 =#))))) -22 --- method core.nothing %₂₁ +18 TestMod.f +19 (call core.Typeof %₁₈) +20 TestMod.Int +21 (call core.svec %₁₉ %₂₀ core.Any core.Any) +22 (call core.svec) +23 SourceLocation::1:10 +24 (call core.svec %₂₁ %₂₂ %₂₃) +25 --- method core.nothing %₂₄ slots: [slot₁/#self#(!read) slot₂/_(!read) slot₃/y slot₄/z] 1 (call core.tuple slot₃/y slot₄/z) 2 (return %₁) -23 TestMod.f -24 (return %₂₃) +26 TestMod.f +27 (return %₂₆) ######################################## # Default positional args with placeholders @@ -498,23 +519,25 @@ end 4 TestMod.Int 5 (call core.svec %₃ %₄) 6 (call core.svec) -7 (call core.svec %₅ %₆ :($(QuoteNode(:(#= line 1 =#))))) -8 --- method core.nothing %₇ +7 SourceLocation::1:10 +8 (call core.svec %₅ %₆ %₇) +9 --- method core.nothing %₈ slots: [slot₁/#self#(called) slot₂/_] 1 (call slot₁/#self# slot₂/_ 1) 2 (return %₁) -9 TestMod.f -10 (call core.Typeof %₉) -11 TestMod.Int -12 (call core.svec %₁₀ %₁₁ core.Any) -13 (call core.svec) -14 (call core.svec %₁₂ %₁₃ :($(QuoteNode(:(#= line 1 =#))))) -15 --- method core.nothing %₁₄ +10 TestMod.f +11 (call core.Typeof %₁₀) +12 TestMod.Int +13 (call core.svec %₁₁ %₁₂ core.Any) +14 (call core.svec) +15 SourceLocation::1:10 +16 (call core.svec %₁₃ %₁₄ %₁₅) +17 --- method core.nothing %₁₆ slots: [slot₁/#self#(!read) slot₂/_(!read) slot₃/x] 1 slot₃/x 2 (return %₁) -16 TestMod.f -17 (return %₁₆) +18 TestMod.f +19 (return %₁₈) ######################################## # Positional args with defaults and `where` clauses @@ -534,41 +557,44 @@ end 10 (call core.svec %₈ %₉) 11 slot₂/T 12 (call core.svec %₁₁) -13 (call core.svec %₁₀ %₁₂ :($(QuoteNode(:(#= line 1 =#))))) -14 --- method core.nothing %₁₃ +13 SourceLocation::1:10 +14 (call core.svec %₁₀ %₁₂ %₁₃) +15 --- method core.nothing %₁₄ slots: [slot₁/#self#(called) slot₂/x] 1 (call slot₁/#self# slot₂/x 1 2) 2 (return %₁) -15 TestMod.f -16 (call core.Typeof %₁₅) -17 slot₂/T -18 slot₁/S -19 (call core.svec %₁₆ %₁₇ %₁₈) -20 slot₂/T -21 slot₁/S -22 (call core.svec %₂₀ %₂₁) -23 (call core.svec %₁₉ %₂₂ :($(QuoteNode(:(#= line 1 =#))))) -24 --- method core.nothing %₂₃ +16 TestMod.f +17 (call core.Typeof %₁₆) +18 slot₂/T +19 slot₁/S +20 (call core.svec %₁₇ %₁₈ %₁₉) +21 slot₂/T +22 slot₁/S +23 (call core.svec %₂₁ %₂₂) +24 SourceLocation::1:10 +25 (call core.svec %₂₀ %₂₃ %₂₄) +26 --- method core.nothing %₂₅ slots: [slot₁/#self#(called) slot₂/x slot₃/y] 1 (call slot₁/#self# slot₂/x slot₃/y 2) 2 (return %₁) -25 TestMod.f -26 (call core.Typeof %₂₅) -27 slot₂/T -28 slot₁/S -29 slot₃/U -30 (call core.svec %₂₆ %₂₇ %₂₈ %₂₉) -31 slot₂/T -32 slot₁/S -33 slot₃/U -34 (call core.svec %₃₁ %₃₂ %₃₃) -35 (call core.svec %₃₀ %₃₄ :($(QuoteNode(:(#= line 1 =#))))) -36 --- method core.nothing %₃₅ +27 TestMod.f +28 (call core.Typeof %₂₇) +29 slot₂/T +30 slot₁/S +31 slot₃/U +32 (call core.svec %₂₈ %₂₉ %₃₀ %₃₁) +33 slot₂/T +34 slot₁/S +35 slot₃/U +36 (call core.svec %₃₃ %₃₄ %₃₅) +37 SourceLocation::1:10 +38 (call core.svec %₃₂ %₃₆ %₃₇) +39 --- method core.nothing %₃₈ slots: [slot₁/#self#(!read) slot₂/x slot₃/y slot₄/z] 1 (call core.tuple slot₂/x slot₃/y slot₄/z) 2 (return %₁) -37 TestMod.f -38 (return %₃₇) +40 TestMod.f +41 (return %₄₀) ######################################## # Positional args and type parameters with transitive dependencies @@ -589,43 +615,46 @@ end 9 (call core.Typeof %₈) 10 (call core.svec %₉ core.Any) 11 (call core.svec) -12 (call core.svec %₁₀ %₁₁ :($(QuoteNode(:(#= line 1 =#))))) -13 --- method core.nothing %₁₂ +12 SourceLocation::1:10 +13 (call core.svec %₁₀ %₁₁ %₁₂) +14 --- method core.nothing %₁₃ slots: [slot₁/#self#(called) slot₂/x] 1 (call top.vect 1) 2 (call slot₁/#self# slot₂/x %₁ 2) 3 (return %₂) -14 TestMod.f -15 (call core.Typeof %₁₄) -16 slot₁/S -17 (call core.svec %₁₅ core.Any %₁₆) -18 slot₂/T -19 slot₁/S -20 (call core.svec %₁₈ %₁₉) -21 (call core.svec %₁₇ %₂₀ :($(QuoteNode(:(#= line 1 =#))))) -22 --- method core.nothing %₂₁ +15 TestMod.f +16 (call core.Typeof %₁₅) +17 slot₁/S +18 (call core.svec %₁₆ core.Any %₁₇) +19 slot₂/T +20 slot₁/S +21 (call core.svec %₁₉ %₂₀) +22 SourceLocation::1:10 +23 (call core.svec %₁₈ %₂₁ %₂₂) +24 --- method core.nothing %₂₃ slots: [slot₁/#self#(called) slot₂/x slot₃/y] 1 (call slot₁/#self# slot₂/x slot₃/y 2) 2 (return %₁) -23 TestMod.f -24 (call core.Typeof %₂₃) -25 slot₁/S -26 slot₃/U -27 (call core.svec %₂₄ core.Any %₂₅ %₂₆) -28 slot₂/T -29 slot₁/S -30 slot₃/U -31 (call core.svec %₂₈ %₂₉ %₃₀) -32 (call core.svec %₂₇ %₃₁ :($(QuoteNode(:(#= line 1 =#))))) -33 --- method core.nothing %₃₂ +25 TestMod.f +26 (call core.Typeof %₂₅) +27 slot₁/S +28 slot₃/U +29 (call core.svec %₂₆ core.Any %₂₇ %₂₈) +30 slot₂/T +31 slot₁/S +32 slot₃/U +33 (call core.svec %₃₀ %₃₁ %₃₂) +34 SourceLocation::1:10 +35 (call core.svec %₂₉ %₃₃ %₃₄) +36 --- method core.nothing %₃₅ slots: [slot₁/#self#(!read) slot₂/x slot₃/y slot₄/z] 1 static_parameter₁ 2 static_parameter₂ 3 static_parameter₃ 4 (call core.tuple slot₂/x slot₃/y slot₄/z %₁ %₂ %₃) 5 (return %₄) -34 TestMod.f -35 (return %₃₄) +37 TestMod.f +38 (return %₃₇) ######################################## # Default positional args are allowed before trailing slurp with no default @@ -638,23 +667,25 @@ end 3 (call core.Typeof %₂) 4 (call core.svec %₃) 5 (call core.svec) -6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 1 =#))))) -7 --- method core.nothing %₆ +6 SourceLocation::1:10 +7 (call core.svec %₄ %₅ %₆) +8 --- method core.nothing %₇ slots: [slot₁/#self#(called)] 1 (call slot₁/#self# 1) 2 (return %₁) -8 TestMod.f -9 (call core.Typeof %₈) -10 (call core.apply_type core.Vararg core.Any) -11 (call core.svec %₉ core.Any %₁₀) -12 (call core.svec) -13 (call core.svec %₁₁ %₁₂ :($(QuoteNode(:(#= line 1 =#))))) -14 --- method core.nothing %₁₃ +9 TestMod.f +10 (call core.Typeof %₉) +11 (call core.apply_type core.Vararg core.Any) +12 (call core.svec %₁₀ core.Any %₁₁) +13 (call core.svec) +14 SourceLocation::1:10 +15 (call core.svec %₁₂ %₁₃ %₁₄) +16 --- method core.nothing %₁₅ slots: [slot₁/#self#(!read) slot₂/x(!read) slot₃/ys] 1 slot₃/ys 2 (return %₁) -15 TestMod.f -16 (return %₁₅) +17 TestMod.f +18 (return %₁₇) ######################################## # Error: Default positional args after a slurp @@ -679,23 +710,25 @@ end 3 (call core.Typeof %₂) 4 (call core.svec %₃) 5 (call core.svec) -6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 1 =#))))) -7 --- method core.nothing %₆ +6 SourceLocation::1:10 +7 (call core.svec %₄ %₅ %₆) +8 --- method core.nothing %₇ slots: [slot₁/#self#(called)] 1 (call slot₁/#self# 1) 2 (return %₁) -8 TestMod.f -9 (call core.Typeof %₈) -10 (call core.apply_type core.Vararg core.Any) -11 (call core.svec %₉ %₁₀) -12 (call core.svec) -13 (call core.svec %₁₁ %₁₂ :($(QuoteNode(:(#= line 1 =#))))) -14 --- method core.nothing %₁₃ +9 TestMod.f +10 (call core.Typeof %₉) +11 (call core.apply_type core.Vararg core.Any) +12 (call core.svec %₁₀ %₁₁) +13 (call core.svec) +14 SourceLocation::1:10 +15 (call core.svec %₁₂ %₁₃ %₁₄) +16 --- method core.nothing %₁₅ slots: [slot₁/#self#(!read) slot₂/xs] 1 slot₂/xs 2 (return %₁) -15 TestMod.f -16 (return %₁₅) +17 TestMod.f +18 (return %₁₇) ######################################## # Positional arg with slurp and splatted default value @@ -708,24 +741,26 @@ end 3 (call core.Typeof %₂) 4 (call core.svec %₃) 5 (call core.svec) -6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 1 =#))))) -7 --- method core.nothing %₆ +6 SourceLocation::1:10 +7 (call core.svec %₄ %₅ %₆) +8 --- method core.nothing %₇ slots: [slot₁/#self#] 1 (call core.tuple 1 2) 2 (call core._apply_iterate top.iterate slot₁/#self# %₁) 3 (return %₂) -8 TestMod.f -9 (call core.Typeof %₈) -10 (call core.apply_type core.Vararg core.Any) -11 (call core.svec %₉ %₁₀) -12 (call core.svec) -13 (call core.svec %₁₁ %₁₂ :($(QuoteNode(:(#= line 1 =#))))) -14 --- method core.nothing %₁₃ +9 TestMod.f +10 (call core.Typeof %₉) +11 (call core.apply_type core.Vararg core.Any) +12 (call core.svec %₁₀ %₁₁) +13 (call core.svec) +14 SourceLocation::1:10 +15 (call core.svec %₁₂ %₁₃ %₁₄) +16 --- method core.nothing %₁₅ slots: [slot₁/#self#(!read) slot₂/xs] 1 slot₂/xs 2 (return %₁) -15 TestMod.f -16 (return %₁₅) +17 TestMod.f +18 (return %₁₇) ######################################## # Trivial function argument destructuring @@ -737,18 +772,19 @@ end 3 (call core.Typeof %₂) 4 (call core.svec %₃ core.Any core.Any core.Any) 5 (call core.svec) -6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 1 =#))))) -7 --- method core.nothing %₆ - slots: [slot₁/#self#(!read) slot₂/x(!read) slot₃/destructured_arg_2 slot₄/w(!read) slot₅/iterstate slot₆/y(!read) slot₇/z(!read)] - 1 (call top.indexed_iterate slot₃/destructured_arg_2 1) +6 SourceLocation::1:10 +7 (call core.svec %₄ %₅ %₆) +8 --- method core.nothing %₇ + slots: [slot₁/#self#(!read) slot₂/x(!read) slot₃/destructured_arg slot₄/w(!read) slot₅/iterstate slot₆/y(!read) slot₇/z(!read)] + 1 (call top.indexed_iterate slot₃/destructured_arg 1) 2 (= slot₆/y (call core.getfield %₁ 1)) 3 (= slot₅/iterstate (call core.getfield %₁ 2)) 4 slot₅/iterstate - 5 (call top.indexed_iterate slot₃/destructured_arg_2 2 %₄) + 5 (call top.indexed_iterate slot₃/destructured_arg 2 %₄) 6 (= slot₇/z (call core.getfield %₅ 1)) 7 (return core.nothing) -8 TestMod.f -9 (return %₈) +9 TestMod.f +10 (return %₉) ######################################## # Function argument destructuring combined with splats, types and and defaults @@ -760,26 +796,28 @@ end 3 (call core.Typeof %₂) 4 (call core.svec %₃) 5 (call core.svec) -6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 1 =#))))) -7 --- method core.nothing %₆ +6 SourceLocation::1:10 +7 (call core.svec %₄ %₅ %₆) +8 --- method core.nothing %₇ slots: [slot₁/#self#(called)] 1 TestMod.rhs 2 (call slot₁/#self# %₁) 3 (return %₂) -8 TestMod.f -9 (call core.Typeof %₈) -10 TestMod.T -11 (call core.apply_type core.Vararg %₁₀) -12 (call core.svec %₉ %₁₁) -13 (call core.svec) -14 (call core.svec %₁₂ %₁₃ :($(QuoteNode(:(#= line 1 =#))))) -15 --- method core.nothing %₁₄ - slots: [slot₁/#self#(!read) slot₂/destructured_arg_1 slot₃/x(!read)] - 1 (call top.indexed_iterate slot₂/destructured_arg_1 1) +9 TestMod.f +10 (call core.Typeof %₉) +11 TestMod.T +12 (call core.apply_type core.Vararg %₁₁) +13 (call core.svec %₁₀ %₁₂) +14 (call core.svec) +15 SourceLocation::1:10 +16 (call core.svec %₁₃ %₁₄ %₁₅) +17 --- method core.nothing %₁₆ + slots: [slot₁/#self#(!read) slot₂/destructured_arg slot₃/x(!read)] + 1 (call top.indexed_iterate slot₂/destructured_arg 1) 2 (= slot₃/x (call core.getfield %₁ 1)) 3 (return core.nothing) -16 TestMod.f -17 (return %₁₆) +18 TestMod.f +19 (return %₁₈) ######################################## # Duplicate destructured placeholders ok @@ -791,16 +829,17 @@ end 3 (call core.Typeof %₂) 4 (call core.svec %₃ core.Any core.Any) 5 (call core.svec) -6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 1 =#))))) -7 --- method core.nothing %₆ - slots: [slot₁/#self#(!read) slot₂/destructured_arg_1 slot₃/destructured_arg_2] - 1 (call top.indexed_iterate slot₂/destructured_arg_1 1) +6 SourceLocation::1:10 +7 (call core.svec %₄ %₅ %₆) +8 --- method core.nothing %₇ + slots: [slot₁/#self#(!read) slot₂/destructured_arg slot₃/destructured_arg] + 1 (call top.indexed_iterate slot₂/destructured_arg 1) 2 (call core.getfield %₁ 1) - 3 (call top.indexed_iterate slot₃/destructured_arg_2 1) + 3 (call top.indexed_iterate slot₃/destructured_arg 1) 4 (call core.getfield %₃ 1) 5 (return core.nothing) -8 TestMod.f -9 (return %₈) +9 TestMod.f +10 (return %₉) ######################################## # Slot flags @@ -813,15 +852,16 @@ end 3 (call core.Typeof %₂) 4 (call core.svec %₃ core.Any core.Any core.Any) 5 (call core.svec) -6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 1 =#))))) -7 --- method core.nothing %₆ +6 SourceLocation::1:10 +7 (call core.svec %₄ %₅ %₆) +8 --- method core.nothing %₇ slots: [slot₁/#self#(!read) slot₂/x(nospecialize,!read) slot₃/g(called) slot₄/y] 1 TestMod.+ 2 (call slot₃/g) 3 (call %₁ %₂ slot₄/y) 4 (return %₃) -8 TestMod.f -9 (return %₈) +9 TestMod.f +10 (return %₉) ######################################## # Binding docs to functions @@ -836,14 +876,15 @@ end 3 (call core.Typeof %₂) 4 (call core.svec %₃) 5 (call core.svec) -6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 4 =#))))) -7 --- method core.nothing %₆ +6 SourceLocation::4:10 +7 (call core.svec %₄ %₅ %₆) +8 --- method core.nothing %₇ slots: [slot₁/#self#(!read)] 1 (return core.nothing) -8 TestMod.f -9 (call JuliaLowering.bind_docs! %₈ "some docs\n" %₆) -10 TestMod.f -11 (return %₁₀) +9 TestMod.f +10 (call JuliaLowering.bind_docs! %₉ "some docs\n" %₇) +11 TestMod.f +12 (return %₁₁) ######################################## # Binding docs to callable type @@ -856,13 +897,14 @@ end 1 TestMod.T 2 (call core.svec %₁) 3 (call core.svec) -4 (call core.svec %₂ %₃ :($(QuoteNode(:(#= line 4 =#))))) -5 --- method core.nothing %₄ +4 SourceLocation::4:10 +5 (call core.svec %₂ %₃ %₄) +6 --- method core.nothing %₅ slots: [slot₁/x(!read)] 1 (return core.nothing) -6 TestMod.T -7 (call JuliaLowering.bind_docs! %₆ "some docs\n" %₄) -8 (return core.nothing) +7 TestMod.T +8 (call JuliaLowering.bind_docs! %₇ "some docs\n" %₅) +9 (return core.nothing) ######################################## # Error: infix call without enough arguments diff --git a/JuliaLowering/test/generators_ir.jl b/JuliaLowering/test/generators_ir.jl index 47b6e0784bb6d..685310ad4d832 100644 --- a/JuliaLowering/test/generators_ir.jl +++ b/JuliaLowering/test/generators_ir.jl @@ -17,17 +17,18 @@ 2 TestMod.#->##0 3 (call core.svec %₂ core.Any) 4 (call core.svec) -5 (call core.svec %₃ %₄ :($(QuoteNode(:(#= line 1 =#))))) -6 --- method core.nothing %₅ +5 SourceLocation::1:2 +6 (call core.svec %₃ %₄ %₅) +7 --- method core.nothing %₆ slots: [slot₁/#self#(!read) slot₂/x] 1 TestMod.+ 2 (call %₁ slot₂/x 1) 3 (return %₂) -7 TestMod.#->##0 -8 (new %₇) -9 TestMod.xs -10 (call top.Generator %₈ %₉) -11 (return %₁₀) +8 TestMod.#->##0 +9 (new %₈) +10 TestMod.xs +11 (call top.Generator %₉ %₁₀) +12 (return %₁₁) ######################################## # Product iteration @@ -48,27 +49,28 @@ 2 TestMod.#->##1 3 (call core.svec %₂ core.Any) 4 (call core.svec) -5 (call core.svec %₃ %₄ :($(QuoteNode(:(#= line 1 =#))))) -6 --- method core.nothing %₅ - slots: [slot₁/#self#(!read) slot₂/destructured_arg_1 slot₃/iterstate slot₄/x slot₅/y] - 1 (call top.indexed_iterate slot₂/destructured_arg_1 1) +5 SourceLocation::1:2 +6 (call core.svec %₃ %₄ %₅) +7 --- method core.nothing %₆ + slots: [slot₁/#self#(!read) slot₂/destructured_arg slot₃/iterstate slot₄/x slot₅/y] + 1 (call top.indexed_iterate slot₂/destructured_arg 1) 2 (= slot₄/x (call core.getfield %₁ 1)) 3 (= slot₃/iterstate (call core.getfield %₁ 2)) 4 slot₃/iterstate - 5 (call top.indexed_iterate slot₂/destructured_arg_1 2 %₄) + 5 (call top.indexed_iterate slot₂/destructured_arg 2 %₄) 6 (= slot₅/y (call core.getfield %₅ 1)) 7 TestMod.+ 8 slot₄/x 9 slot₅/y 10 (call %₇ %₈ %₉) 11 (return %₁₀) -7 TestMod.#->##1 -8 (new %₇) -9 TestMod.xs -10 TestMod.ys -11 (call top.product %₉ %₁₀) -12 (call top.Generator %₈ %₁₁) -13 (return %₁₂) +8 TestMod.#->##1 +9 (new %₈) +10 TestMod.xs +11 TestMod.ys +12 (call top.product %₁₀ %₁₁) +13 (call top.Generator %₉ %₁₂) +14 (return %₁₃) ######################################## # Use `identity` as the Generator function when possible eg in filters @@ -89,25 +91,26 @@ 2 TestMod.#->##2 3 (call core.svec %₂ core.Any) 4 (call core.svec) -5 (call core.svec %₃ %₄ :($(QuoteNode(:(#= line 1 =#))))) -6 --- method core.nothing %₅ - slots: [slot₁/#self#(!read) slot₂/destructured_arg_1 slot₃/iterstate slot₄/x slot₅/y(!read)] - 1 (call top.indexed_iterate slot₂/destructured_arg_1 1) +5 SourceLocation::1:29 +6 (call core.svec %₃ %₄ %₅) +7 --- method core.nothing %₆ + slots: [slot₁/#self#(!read) slot₂/destructured_arg slot₃/iterstate slot₄/x slot₅/y(!read)] + 1 (call top.indexed_iterate slot₂/destructured_arg 1) 2 (= slot₄/x (call core.getfield %₁ 1)) 3 (= slot₃/iterstate (call core.getfield %₁ 2)) 4 slot₃/iterstate - 5 (call top.indexed_iterate slot₂/destructured_arg_1 2 %₄) + 5 (call top.indexed_iterate slot₂/destructured_arg 2 %₄) 6 (= slot₅/y (call core.getfield %₅ 1)) 7 TestMod.f 8 slot₄/x 9 (call %₇ %₈) 10 (return %₉) -7 TestMod.#->##2 -8 (new %₇) -9 TestMod.iter -10 (call top.Filter %₈ %₉) -11 (call top.Generator top.identity %₁₀) -12 (return %₁₁) +8 TestMod.#->##2 +9 (new %₈) +10 TestMod.iter +11 (call top.Filter %₉ %₁₀) +12 (call top.Generator top.identity %₁₁) +13 (return %₁₂) ######################################## # Use of placeholders in iteration vars @@ -128,15 +131,16 @@ 2 TestMod.#->##3 3 (call core.svec %₂ core.Any) 4 (call core.svec) -5 (call core.svec %₃ %₄ :($(QuoteNode(:(#= line 1 =#))))) -6 --- method core.nothing %₅ +5 SourceLocation::1:2 +6 (call core.svec %₃ %₄ %₅) +7 --- method core.nothing %₆ slots: [slot₁/#self#(!read) slot₂/_(!read)] 1 (return 1) -7 TestMod.#->##3 -8 (new %₇) -9 TestMod.xs -10 (call top.Generator %₈ %₉) -11 (return %₁₀) +8 TestMod.#->##3 +9 (new %₈) +10 TestMod.xs +11 (call top.Generator %₉ %₁₀) +12 (return %₁₁) ######################################## # Error: Use of placeholders in body @@ -165,26 +169,27 @@ LoweringError: 2 TestMod.#->##5 3 (call core.svec %₂ core.Any) 4 (call core.svec) -5 (call core.svec %₃ %₄ :($(QuoteNode(:(#= line 1 =#))))) -6 --- method core.nothing %₅ - slots: [slot₁/#self#(!read) slot₂/destructured_arg_1 slot₃/iterstate slot₄/x(!read) slot₅/y(!read)] - 1 (call top.indexed_iterate slot₂/destructured_arg_1 1) +5 SourceLocation::1:2 +6 (call core.svec %₃ %₄ %₅) +7 --- method core.nothing %₆ + slots: [slot₁/#self#(!read) slot₂/destructured_arg slot₃/iterstate slot₄/x(!read) slot₅/y(!read)] + 1 (call top.indexed_iterate slot₂/destructured_arg 1) 2 (= slot₄/x (call core.getfield %₁ 1)) 3 (= slot₃/iterstate (call core.getfield %₁ 2)) 4 slot₃/iterstate - 5 (call top.indexed_iterate slot₂/destructured_arg_1 2 %₄) + 5 (call top.indexed_iterate slot₂/destructured_arg 2 %₄) 6 (call core.getfield %₅ 1) 7 (= slot₃/iterstate (call core.getfield %₅ 2)) 8 slot₃/iterstate - 9 (call top.indexed_iterate slot₂/destructured_arg_1 3 %₈) + 9 (call top.indexed_iterate slot₂/destructured_arg 3 %₈) 10 (= slot₅/y (call core.getfield %₉ 1)) 11 TestMod.body 12 (return %₁₁) -7 TestMod.#->##5 -8 (new %₇) -9 TestMod.iter -10 (call top.Generator %₈ %₉) -11 (return %₁₀) +8 TestMod.#->##5 +9 (new %₈) +10 TestMod.iter +11 (call top.Generator %₉ %₁₀) +12 (return %₁₁) ######################################## # return permitted in quoted syntax in generator @@ -205,16 +210,17 @@ LoweringError: 2 TestMod.#->##6 3 (call core.svec %₂ core.Any) 4 (call core.svec) -5 (call core.svec %₃ %₄ :($(QuoteNode(:(#= line 1 =#))))) -6 --- method core.nothing %₅ +5 SourceLocation::1:4 +6 (call core.svec %₃ %₄ %₅) +7 --- method core.nothing %₆ slots: [slot₁/#self#(!read) slot₂/_(!read)] 1 (call JuliaLowering.interpolate_ast (inert (return x))) 2 (return %₁) -7 TestMod.#->##6 -8 (new %₇) -9 TestMod.iter -10 (call top.Generator %₈ %₉) -11 (return %₁₀) +8 TestMod.#->##6 +9 (new %₈) +10 TestMod.iter +11 (call top.Generator %₉ %₁₀) +12 (return %₁₁) ######################################## # Error: `return` not permitted in generator body @@ -224,78 +230,65 @@ LoweringError: ((return x) + y for x in iter) # └──────┘ ── `return` not allowed inside comprehension or generator -######################################## -# FIXME - error in nested closure conversion: Triply nested generator -((x,y,z) for x in 1:3 for y in 4:5 for z in 6:7) -#--------------------- -LoweringError: -((x,y,z) for x in 1:3 for y in 4:5 for z in 6:7) -# ╙ ── Found unexpected binding of kind argument - -Detailed provenance: -#₁₃/x -└─ x - └─ x - └─ @ :1 - - ######################################## # Nested case with duplicate iteration variables (x for x in 1:3 for x in 1:2) #--------------------- 1 --- thunk - 1 (global TestMod.#->##8) + 1 (global TestMod.#->##7) 2 (call core.svec) 3 (call core.svec) 4 (call core.svec) - 5 (call core._structtype TestMod :#->##8 %₂ %₃ %₄ false 0) + 5 (call core._structtype TestMod :#->##7 %₂ %₃ %₄ false 0) 6 (call core._setsuper! %₅ core.Function) - 7 (const TestMod.#->##8) - 8 (= TestMod.#->##8 %₅) + 7 (const TestMod.#->##7) + 8 (= TestMod.#->##7 %₅) 9 (call core.svec) 10 (call core._typebody! %₅ %₉) 11 (return core.nothing) 2 --- thunk - 1 (global TestMod.#->#->##1) + 1 (global TestMod.#->#->##0) 2 (call core.svec) 3 (call core.svec) 4 (call core.svec) - 5 (call core._structtype TestMod :#->#->##1 %₂ %₃ %₄ false 0) + 5 (call core._structtype TestMod :#->#->##0 %₂ %₃ %₄ false 0) 6 (call core._setsuper! %₅ core.Function) - 7 (const TestMod.#->#->##1) - 8 (= TestMod.#->#->##1 %₅) + 7 (const TestMod.#->#->##0) + 8 (= TestMod.#->#->##0 %₅) 9 (call core.svec) 10 (call core._typebody! %₅ %₉) 11 (return core.nothing) -3 TestMod.#->#->##1 +3 TestMod.#->#->##0 4 (call core.svec %₃ core.Any) 5 (call core.svec) -6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 1 =#))))) -7 --- method core.nothing %₆ +6 SourceLocation::1:2 +7 (call core.svec %₄ %₅ %₆) +8 --- method core.nothing %₇ slots: [slot₁/#self#(!read) slot₂/x slot₃/x] 1 slot₂/x 2 (= slot₃/x %₁) 3 slot₃/x 4 (return %₃) -8 TestMod.#->##8 -9 (call core.svec %₈ core.Any) -10 (call core.svec) -11 (call core.svec %₉ %₁₀ :($(QuoteNode(:(#= line 1 =#))))) -12 --- method core.nothing %₁₁ +9 TestMod.#->##7 +10 (call core.svec %₉ core.Any) +11 (call core.svec) +12 SourceLocation::1:2 +13 (call core.svec %₁₀ %₁₁ %₁₂) +14 --- method core.nothing %₁₃ slots: [slot₁/#self#(!read) slot₂/x(!read)] - 1 TestMod.#->#->##1 + 1 TestMod.#->#->##0 2 (new %₁) 3 TestMod.: 4 (call %₃ 1 2) 5 (call top.Generator %₂ %₄) 6 (return %₅) -13 TestMod.#->##8 -14 (new %₁₃) -15 TestMod.: -16 (call %₁₅ 1 3) -17 (call top.Generator %₁₄ %₁₆) -18 (call top.Flatten %₁₇) -19 (return %₁₈) +15 TestMod.#->##7 +16 (new %₁₅) +17 TestMod.: +18 (call %₁₇ 1 3) +19 (call top.Generator %₁₆ %₁₈) +20 (call top.Flatten %₁₉) +21 (return %₂₀) ######################################## # Comprehension lowers to generator with collect @@ -361,3 +354,18 @@ T[(x,y) for x in xs, y in ys] 49 (goto label₁₅) 50 (return %₇) +######################################## +# FIXME - error in nested closure conversion: Triply nested generator +((x,y,z) for x in 1:3 for y in 4:5 for z in 6:7) +#--------------------- +LoweringError: +((x,y,z) for x in 1:3 for y in 4:5 for z in 6:7) +# ╙ ── Found unexpected binding of kind argument + +Detailed provenance: +#₁₃/x +└─ x + └─ x + └─ @ :1 + + diff --git a/JuliaLowering/test/macros_ir.jl b/JuliaLowering/test/macros_ir.jl index c98facff21ed2..94e03e4fe8fce 100644 --- a/JuliaLowering/test/macros_ir.jl +++ b/JuliaLowering/test/macros_ir.jl @@ -11,14 +11,15 @@ end 3 (call core.Typeof %₂) 4 (call core.svec %₃ JuliaLowering.MacroContext core.Any) 5 (call core.svec) -6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 1 =#))))) -7 --- method core.nothing %₆ +6 SourceLocation::1:7 +7 (call core.svec %₄ %₅ %₆) +8 --- method core.nothing %₇ slots: [slot₁/#self#(!read) slot₂/__context__(!read) slot₃/ex] 1 (call core.tuple slot₃/ex) 2 (call JuliaLowering.interpolate_ast (inert (block (call-i ($ ex) + 1))) %₁) 3 (return %₂) -8 TestMod.@add_one -9 (return %₈) +9 TestMod.@add_one +10 (return %₉) ######################################## # Macro using `__context__` @@ -31,14 +32,15 @@ end 3 (call core.Typeof %₂) 4 (call core.svec %₃ JuliaLowering.MacroContext core.Any) 5 (call core.svec) -6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 1 =#))))) -7 --- method core.nothing %₆ +6 SourceLocation::1:7 +7 (call core.svec %₄ %₅ %₆) +8 --- method core.nothing %₇ slots: [slot₁/#self#(!read) slot₂/__context__ slot₃/ex(!read) slot₄/ctx(!read)] 1 slot₂/__context__ 2 (= slot₄/ctx %₁) 3 (return %₁) -8 TestMod.@foo -9 (return %₈) +9 TestMod.@foo +10 (return %₉) ######################################## # Error: Macro with kw args diff --git a/JuliaLowering/test/scopes_ir.jl b/JuliaLowering/test/scopes_ir.jl index e7e6700da3430..ed745f86081ea 100644 --- a/JuliaLowering/test/scopes_ir.jl +++ b/JuliaLowering/test/scopes_ir.jl @@ -31,13 +31,14 @@ end 7 (call core.Typeof %₆) 8 (call core.svec %₇ core.Any) 9 (call core.svec) -10 (call core.svec %₈ %₉ :($(QuoteNode(:(#= line 3 =#))))) -11 --- method core.nothing %₁₀ +10 SourceLocation::3:14 +11 (call core.svec %₈ %₉ %₁₀) +12 --- method core.nothing %₁₁ slots: [slot₁/#self#(!read) slot₂/x(!read)] 1 (call core.tuple false true true) 2 (return %₁) -12 TestMod.f -13 (return %₁₂) +13 TestMod.f +14 (return %₁₃) ######################################## # @islocal with global @@ -78,16 +79,17 @@ end 3 (call core.Typeof %₂) 4 (call core.svec %₃ core.Any) 5 (call core.svec) -6 (call core.svec %₄ %₅ :($(QuoteNode(:(#= line 1 =#))))) -7 --- method core.nothing %₆ +6 SourceLocation::1:10 +7 (call core.svec %₄ %₅ %₆) +8 --- method core.nothing %₇ slots: [slot₁/#self#(!read) slot₂/z] 1 (call core.apply_type top.Dict core.Symbol core.Any) 2 (call %₁) 3 (gotoifnot true label₅) 4 (call top.setindex! %₂ slot₂/z :z) 5 (return %₂) -8 TestMod.f -9 (return %₈) +9 TestMod.f +10 (return %₉) ######################################## # Error: Duplicate function argument names diff --git a/JuliaLowering/test/typedefs_ir.jl b/JuliaLowering/test/typedefs_ir.jl index 91695d4f392f2..c3b76fb0809ff 100644 --- a/JuliaLowering/test/typedefs_ir.jl +++ b/JuliaLowering/test/typedefs_ir.jl @@ -385,13 +385,14 @@ end 26 (call core.apply_type core.Type %₂₅) 27 (call core.svec %₂₆) 28 (call core.svec) -29 (call core.svec %₂₇ %₂₈ :($(QuoteNode(:(#= line 1 =#))))) -30 --- method core.nothing %₂₉ +29 SourceLocation::1:1 +30 (call core.svec %₂₇ %₂₈ %₂₉) +31 --- method core.nothing %₃₀ slots: [slot₁/#self#(!read)] 1 TestMod.X 2 (new %₁) 3 (return %₂) -31 (return core.nothing) +32 (return core.nothing) ######################################## # Basic struct @@ -429,13 +430,14 @@ end 26 TestMod.T 27 (call core.=== core.Any %₂₆) 28 (gotoifnot %₂₇ label₃₀) -29 (goto label₃₆) +29 (goto label₃₇) 30 TestMod.X 31 (call core.apply_type core.Type %₃₀) 32 (call core.svec %₃₁ core.Any core.Any core.Any) 33 (call core.svec) -34 (call core.svec %₃₂ %₃₃ :($(QuoteNode(:(#= line 1 =#))))) -35 --- method core.nothing %₃₄ +34 SourceLocation::1:1 +35 (call core.svec %₃₂ %₃₃ %₃₄) +36 --- method core.nothing %₃₅ slots: [slot₁/#ctor-self# slot₂/a slot₃/b slot₄/c slot₅/tmp] 1 (call core.fieldtype slot₁/#ctor-self# 2) 2 slot₃/b @@ -449,18 +451,19 @@ end 10 slot₅/tmp 11 (new slot₁/#ctor-self# slot₂/a %₁₀ slot₄/c) 12 (return %₁₁) -36 TestMod.X -37 (call core.apply_type core.Type %₃₆) -38 TestMod.T -39 (call core.svec %₃₇ core.Any %₃₈ core.Any) -40 (call core.svec) -41 (call core.svec %₃₉ %₄₀ :($(QuoteNode(:(#= line 1 =#))))) -42 --- method core.nothing %₄₁ +37 TestMod.X +38 (call core.apply_type core.Type %₃₇) +39 TestMod.T +40 (call core.svec %₃₈ core.Any %₃₉ core.Any) +41 (call core.svec) +42 SourceLocation::1:1 +43 (call core.svec %₄₀ %₄₁ %₄₂) +44 --- method core.nothing %₄₃ slots: [slot₁/#self#(!read) slot₂/a slot₃/b slot₄/c] 1 TestMod.X 2 (new %₁ slot₂/a slot₃/b slot₄/c) 3 (return %₂) -43 (return core.nothing) +45 (return core.nothing) ######################################## # Struct with supertype and type params @@ -519,12 +522,13 @@ end 50 (call core.UnionAll %₄₂ %₄₉) 51 (call core.svec %₅₀) 52 (call core.svec) -53 (call core.svec %₅₁ %₅₂ :($(QuoteNode(:(#= line 1 =#))))) -54 --- method core.nothing %₅₃ +53 SourceLocation::1:1 +54 (call core.svec %₅₁ %₅₂ %₅₃) +55 --- method core.nothing %₅₄ slots: [slot₁/#ctor-self#] 1 (new slot₁/#ctor-self#) 2 (return %₁) -55 (return core.nothing) +56 (return core.nothing) ######################################## # Struct with const and atomic fields @@ -562,13 +566,14 @@ end 26 (call core.apply_type core.Type %₂₅) 27 (call core.svec %₂₆ core.Any core.Any core.Any) 28 (call core.svec) -29 (call core.svec %₂₇ %₂₈ :($(QuoteNode(:(#= line 1 =#))))) -30 --- method core.nothing %₂₉ +29 SourceLocation::1:1 +30 (call core.svec %₂₇ %₂₈ %₂₉) +31 --- method core.nothing %₃₀ slots: [slot₁/#self#(!read) slot₂/a slot₃/b slot₄/c] 1 TestMod.X 2 (new %₁ slot₂/a slot₃/b slot₄/c) 3 (return %₂) -31 (return core.nothing) +32 (return core.nothing) ######################################## # Documented struct @@ -610,21 +615,23 @@ end 26 (call core.apply_type core.Type %₂₅) 27 (call core.svec %₂₆ core.Any core.Any) 28 (call core.svec) -29 (call core.svec %₂₇ %₂₈ :($(QuoteNode(:(#= line 4 =#))))) -30 --- method core.nothing %₂₉ +29 SourceLocation::4:1 +30 (call core.svec %₂₇ %₂₈ %₂₉) +31 --- method core.nothing %₃₀ slots: [slot₁/#self#(!read) slot₂/a slot₃/b] 1 TestMod.X 2 (new %₁ slot₂/a slot₃/b) 3 (return %₂) -31 JuliaLowering.bind_docs! -32 (call core.tuple :field_docs) -33 (call core.apply_type core.NamedTuple %₃₂) -34 (call core.svec 1 "field a docs" 2 "field b docs") -35 (call core.tuple %₃₄) -36 (call %₃₃ %₃₅) -37 TestMod.X -38 (call core.kwcall %₃₆ %₃₁ %₃₇ "X docs\n" :($(QuoteNode(:(#= line 4 =#))))) -39 (return core.nothing) +32 JuliaLowering.bind_docs! +33 (call core.tuple :field_docs) +34 (call core.apply_type core.NamedTuple %₃₃) +35 (call core.svec 1 "field a docs" 2 "field b docs") +36 (call core.tuple %₃₅) +37 (call %₃₄ %₃₆) +38 TestMod.X +39 SourceLocation::4:1 +40 (call core.kwcall %₃₇ %₃₂ %₃₈ "X docs\n" %₃₉) +41 (return core.nothing) ######################################## # Struct with outer constructor @@ -672,8 +679,9 @@ end 38 (call core.UnionAll %₃₃ %₃₇) 39 (call core.svec %₃₈ core.Any) 40 (call core.svec) -41 (call core.svec %₃₉ %₄₀ :($(QuoteNode(:(#= line 1 =#))))) -42 --- method core.nothing %₄₁ +41 SourceLocation::1:1 +42 (call core.svec %₃₉ %₄₀ %₄₁) +43 --- method core.nothing %₄₂ slots: [slot₁/#ctor-self# slot₂/x slot₃/tmp] 1 (call core.fieldtype slot₁/#ctor-self# 1) 2 slot₂/x @@ -687,21 +695,22 @@ end 10 slot₃/tmp 11 (new slot₁/#ctor-self# %₁₀) 12 (return %₁₁) -43 TestMod.X -44 (call core.apply_type core.Type %₄₃) -45 slot₁/U -46 (call core.svec %₄₄ %₄₅) -47 slot₁/U -48 (call core.svec %₄₇) -49 (call core.svec %₄₆ %₄₈ :($(QuoteNode(:(#= line 1 =#))))) -50 --- method core.nothing %₄₉ +44 TestMod.X +45 (call core.apply_type core.Type %₄₄) +46 slot₁/U +47 (call core.svec %₄₅ %₄₆) +48 slot₁/U +49 (call core.svec %₄₈) +50 SourceLocation::1:1 +51 (call core.svec %₄₇ %₄₉ %₅₀) +52 --- method core.nothing %₅₁ slots: [slot₁/#self#(!read) slot₂/x] 1 TestMod.X 2 static_parameter₁ 3 (call core.apply_type %₁ %₂) 4 (new %₃ slot₂/x) 5 (return %₄) -51 (return core.nothing) +53 (return core.nothing) ######################################## # Struct with outer constructor where one typevar is constrained by the other @@ -765,8 +774,9 @@ end 53 (call core.UnionAll %₄₅ %₅₂) 54 (call core.svec %₅₃ core.Any) 55 (call core.svec) -56 (call core.svec %₅₄ %₅₅ :($(QuoteNode(:(#= line 1 =#))))) -57 --- method core.nothing %₅₆ +56 SourceLocation::1:1 +57 (call core.svec %₅₄ %₅₅ %₅₆) +58 --- method core.nothing %₅₇ slots: [slot₁/#ctor-self# slot₂/v slot₃/tmp] 1 (call core.fieldtype slot₁/#ctor-self# 1) 2 slot₂/v @@ -780,17 +790,18 @@ end 10 slot₃/tmp 11 (new slot₁/#ctor-self# %₁₀) 12 (return %₁₁) -58 TestMod.X -59 (call core.apply_type core.Type %₅₈) -60 TestMod.Vector -61 slot₂/S -62 (call core.apply_type %₆₀ %₆₁) -63 (call core.svec %₅₉ %₆₂) -64 slot₃/T -65 slot₂/S -66 (call core.svec %₆₄ %₆₅) -67 (call core.svec %₆₃ %₆₆ :($(QuoteNode(:(#= line 1 =#))))) -68 --- method core.nothing %₆₇ +59 TestMod.X +60 (call core.apply_type core.Type %₅₉) +61 TestMod.Vector +62 slot₂/S +63 (call core.apply_type %₆₁ %₆₂) +64 (call core.svec %₆₀ %₆₃) +65 slot₃/T +66 slot₂/S +67 (call core.svec %₆₅ %₆₆) +68 SourceLocation::1:1 +69 (call core.svec %₆₄ %₆₇ %₆₈) +70 --- method core.nothing %₆₉ slots: [slot₁/#self#(!read) slot₂/v] 1 TestMod.X 2 static_parameter₁ @@ -798,7 +809,7 @@ end 4 (call core.apply_type %₁ %₂ %₃) 5 (new %₄ slot₂/v) 6 (return %₅) -69 (return core.nothing) +71 (return core.nothing) ######################################## # User defined inner constructors and helper functions for structs without type params @@ -829,56 +840,59 @@ end 2 TestMod.#f##0 3 (call core.svec %₂) 4 (call core.svec) -5 (call core.svec %₃ %₄ :($(QuoteNode(:(#= line 3 =#))))) -6 --- method core.nothing %₅ +5 SourceLocation::3:5 +6 (call core.svec %₃ %₄ %₅) +7 --- method core.nothing %₆ slots: [slot₁/#self#(!read)] 1 TestMod.X 2 (new %₁ 1) 3 (return %₂) -7 (newvar slot₂/f) -8 (global TestMod.X) -9 (const TestMod.X) -10 (call core.svec) -11 (call core.svec :x) -12 (call core.svec) -13 (call core._structtype TestMod :X %₁₀ %₁₁ %₁₂ false 1) -14 (= slot₁/X %₁₃) -15 (call core._setsuper! %₁₃ core.Any) -16 (isdefined TestMod.X) -17 (gotoifnot %₁₆ label₂₇) -18 TestMod.X -19 (call core._equiv_typedef %₁₈ %₁₃) -20 (gotoifnot %₁₉ label₂₄) -21 TestMod.X -22 (= slot₁/X %₂₁) -23 (goto label₂₆) -24 slot₁/X -25 (= TestMod.X %₂₄) -26 (goto label₂₉) -27 slot₁/X -28 (= TestMod.X %₂₇) -29 slot₁/X -30 (call core.svec core.Any) -31 (call core._typebody! %₂₉ %₃₀) -32 TestMod.#f##0 -33 (= slot₂/f (new %₃₂)) -34 slot₂/f -35 TestMod.X -36 (call core.apply_type core.Type %₃₅) -37 (call core.svec %₃₆ core.Any) -38 (call core.svec) -39 (call core.svec %₃₇ %₃₈ :($(QuoteNode(:(#= line 5 =#))))) -40 --- method core.nothing %₃₉ +8 (newvar slot₂/f) +9 (global TestMod.X) +10 (const TestMod.X) +11 (call core.svec) +12 (call core.svec :x) +13 (call core.svec) +14 (call core._structtype TestMod :X %₁₁ %₁₂ %₁₃ false 1) +15 (= slot₁/X %₁₄) +16 (call core._setsuper! %₁₄ core.Any) +17 (isdefined TestMod.X) +18 (gotoifnot %₁₇ label₂₈) +19 TestMod.X +20 (call core._equiv_typedef %₁₉ %₁₄) +21 (gotoifnot %₂₀ label₂₅) +22 TestMod.X +23 (= slot₁/X %₂₂) +24 (goto label₂₇) +25 slot₁/X +26 (= TestMod.X %₂₅) +27 (goto label₃₀) +28 slot₁/X +29 (= TestMod.X %₂₈) +30 slot₁/X +31 (call core.svec core.Any) +32 (call core._typebody! %₃₀ %₃₁) +33 TestMod.#f##0 +34 (= slot₂/f (new %₃₃)) +35 slot₂/f +36 TestMod.X +37 (call core.apply_type core.Type %₃₆) +38 (call core.svec %₃₇ core.Any) +39 (call core.svec) +40 SourceLocation::5:5 +41 (call core.svec %₃₈ %₃₉ %₄₀) +42 --- method core.nothing %₄₁ slots: [slot₁/#ctor-self# slot₂/x] 1 slot₁/#ctor-self# 2 (new %₁ slot₂/x) 3 (return %₂) -41 TestMod.X -42 (call core.apply_type core.Type %₄₁) -43 (call core.svec %₄₂ core.Any core.Any) -44 (call core.svec) -45 (call core.svec %₄₃ %₄₄ :($(QuoteNode(:(#= line 6 =#))))) -46 --- method core.nothing %₄₅ +43 TestMod.X +44 (call core.apply_type core.Type %₄₃) +45 (call core.svec %₄₄ core.Any core.Any) +46 (call core.svec) +47 SourceLocation::6:5 +48 (call core.svec %₄₅ %₄₆ %₄₇) +49 --- method core.nothing %₄₈ slots: [slot₁/#ctor-self# slot₂/y slot₃/z slot₄/tmp(!read)] 1 TestMod.ReallyXIPromise 2 slot₁/#ctor-self# @@ -894,20 +908,21 @@ end 12 (= slot₄/tmp (call core.typeassert %₁₁ %₁)) 13 slot₄/tmp 14 (return %₁₃) -47 TestMod.X -48 (call core.apply_type core.Type %₄₇) -49 (call core.svec %₄₈ core.Any core.Any core.Any) -50 (call core.svec) -51 (call core.svec %₄₉ %₅₀ :($(QuoteNode(:(#= line 10 =#))))) -52 --- method core.nothing %₅₁ +50 TestMod.X +51 (call core.apply_type core.Type %₅₀) +52 (call core.svec %₅₁ core.Any core.Any core.Any) +53 (call core.svec) +54 SourceLocation::10:5 +55 (call core.svec %₅₂ %₅₃ %₅₄) +56 --- method core.nothing %₅₅ slots: [slot₁/#ctor-self# slot₂/a slot₃/b(!read) slot₄/c(!read)] 1 slot₁/#ctor-self# 2 (new %₁ slot₂/a) 3 (return %₂) -53 TestMod.X -54 (call core.apply_type core.Type %₅₃) -55 (call JuliaLowering.bind_docs! %₅₄ "Docs for X constructor\n" %₅₁) -56 (return core.nothing) +57 TestMod.X +58 (call core.apply_type core.Type %₅₇) +59 (call JuliaLowering.bind_docs! %₅₈ "Docs for X constructor\n" %₅₅) +60 (return core.nothing) ######################################## # User defined inner constructors and helper functions for structs with type params @@ -933,8 +948,9 @@ end 2 TestMod.#f##1 3 (call core.svec %₂) 4 (call core.svec) -5 (call core.svec %₃ %₄ :($(QuoteNode(:(#= line 5 =#))))) -6 --- method core.nothing %₅ +5 SourceLocation::5:5 +6 (call core.svec %₃ %₄ %₅) +7 --- method core.nothing %₆ slots: [slot₁/#self#(!read)] 1 TestMod.X 2 TestMod.A @@ -942,79 +958,81 @@ end 4 (call core.apply_type %₁ %₂ %₃) 5 (new %₄ 1) 6 (return %₅) -7 (newvar slot₅/f) -8 (global TestMod.X) -9 (const TestMod.X) -10 (= slot₂/S (call core.TypeVar :S)) -11 (= slot₃/T (call core.TypeVar :T)) -12 slot₂/S -13 slot₃/T -14 (call core.svec %₁₂ %₁₃) -15 (call core.svec :x) -16 (call core.svec) -17 (call core._structtype TestMod :X %₁₄ %₁₅ %₁₆ false 1) -18 (= slot₄/X %₁₇) -19 (call core._setsuper! %₁₇ core.Any) -20 (isdefined TestMod.X) -21 (gotoifnot %₂₀ label₄₁) -22 TestMod.X -23 (call core._equiv_typedef %₂₂ %₁₇) -24 (gotoifnot %₂₃ label₃₈) -25 TestMod.X -26 (= slot₄/X %₂₅) -27 TestMod.X -28 (call top.getproperty %₂₇ :body) +8 (newvar slot₅/f) +9 (global TestMod.X) +10 (const TestMod.X) +11 (= slot₂/S (call core.TypeVar :S)) +12 (= slot₃/T (call core.TypeVar :T)) +13 slot₂/S +14 slot₃/T +15 (call core.svec %₁₃ %₁₄) +16 (call core.svec :x) +17 (call core.svec) +18 (call core._structtype TestMod :X %₁₅ %₁₆ %₁₇ false 1) +19 (= slot₄/X %₁₈) +20 (call core._setsuper! %₁₈ core.Any) +21 (isdefined TestMod.X) +22 (gotoifnot %₂₁ label₄₂) +23 TestMod.X +24 (call core._equiv_typedef %₂₃ %₁₈) +25 (gotoifnot %₂₄ label₃₉) +26 TestMod.X +27 (= slot₄/X %₂₆) +28 TestMod.X 29 (call top.getproperty %₂₈ :body) -30 (call top.getproperty %₂₉ :parameters) -31 (call top.indexed_iterate %₃₀ 1) -32 (= slot₂/S (call core.getfield %₃₁ 1)) -33 (= slot₁/iterstate (call core.getfield %₃₁ 2)) -34 slot₁/iterstate -35 (call top.indexed_iterate %₃₀ 2 %₃₄) -36 (= slot₃/T (call core.getfield %₃₅ 1)) -37 (goto label₄₀) -38 slot₄/X -39 (= TestMod.X %₃₈) -40 (goto label₄₃) -41 slot₄/X -42 (= TestMod.X %₄₁) -43 slot₄/X -44 (call core.svec core.Any) -45 (call core._typebody! %₄₃ %₄₄) -46 TestMod.X -47 TestMod.A -48 TestMod.B -49 (call core.apply_type %₄₆ %₄₇ %₄₈) -50 (call core.apply_type core.Type %₄₉) -51 (call core.svec %₅₀) -52 (call core.svec) -53 (call core.svec %₅₁ %₅₂ :($(QuoteNode(:(#= line 3 =#))))) -54 --- method core.nothing %₅₃ +30 (call top.getproperty %₂₉ :body) +31 (call top.getproperty %₃₀ :parameters) +32 (call top.indexed_iterate %₃₁ 1) +33 (= slot₂/S (call core.getfield %₃₂ 1)) +34 (= slot₁/iterstate (call core.getfield %₃₂ 2)) +35 slot₁/iterstate +36 (call top.indexed_iterate %₃₁ 2 %₃₅) +37 (= slot₃/T (call core.getfield %₃₆ 1)) +38 (goto label₄₁) +39 slot₄/X +40 (= TestMod.X %₃₉) +41 (goto label₄₄) +42 slot₄/X +43 (= TestMod.X %₄₂) +44 slot₄/X +45 (call core.svec core.Any) +46 (call core._typebody! %₄₄ %₄₅) +47 TestMod.X +48 TestMod.A +49 TestMod.B +50 (call core.apply_type %₄₇ %₄₈ %₄₉) +51 (call core.apply_type core.Type %₅₀) +52 (call core.svec %₅₁) +53 (call core.svec) +54 SourceLocation::3:5 +55 (call core.svec %₅₂ %₅₃ %₅₄) +56 --- method core.nothing %₅₅ slots: [slot₁/#ctor-self#] 1 slot₁/#ctor-self# 2 (new %₁ 1) 3 (return %₂) -55 (= slot₆/U (call core.TypeVar :U)) -56 (= slot₇/V (call core.TypeVar :V)) -57 TestMod.X -58 slot₆/U -59 slot₇/V -60 (call core.apply_type %₅₇ %₅₈ %₅₉) -61 (call core.apply_type core.Type %₆₀) -62 (call core.svec %₆₁) -63 slot₆/U -64 slot₇/V -65 (call core.svec %₆₃ %₆₄) -66 (call core.svec %₆₂ %₆₅ :($(QuoteNode(:(#= line 4 =#))))) -67 --- method core.nothing %₆₆ +57 (= slot₆/U (call core.TypeVar :U)) +58 (= slot₇/V (call core.TypeVar :V)) +59 TestMod.X +60 slot₆/U +61 slot₇/V +62 (call core.apply_type %₅₉ %₆₀ %₆₁) +63 (call core.apply_type core.Type %₆₂) +64 (call core.svec %₆₃) +65 slot₆/U +66 slot₇/V +67 (call core.svec %₆₅ %₆₆) +68 SourceLocation::4:5 +69 (call core.svec %₆₄ %₆₇ %₆₈) +70 --- method core.nothing %₆₉ slots: [slot₁/#ctor-self#] 1 slot₁/#ctor-self# 2 (new %₁ 1) 3 (return %₂) -68 TestMod.#f##1 -69 (= slot₅/f (new %₆₈)) -70 slot₅/f -71 (return core.nothing) +71 TestMod.#f##1 +72 (= slot₅/f (new %₇₁)) +73 slot₅/f +74 (return core.nothing) ######################################## # new() calls with splats; `Any` fields @@ -1052,14 +1070,15 @@ end 26 (call core.apply_type core.Type %₂₅) 27 (call core.svec %₂₆ core.Any) 28 (call core.svec) -29 (call core.svec %₂₇ %₂₈ :($(QuoteNode(:(#= line 4 =#))))) -30 --- method core.nothing %₂₉ +29 SourceLocation::4:5 +30 (call core.svec %₂₇ %₂₈ %₂₉) +31 --- method core.nothing %₃₀ slots: [slot₁/#ctor-self# slot₂/xs] 1 slot₁/#ctor-self# 2 (call core._apply_iterate top.iterate core.tuple slot₂/xs) 3 (splatnew %₁ %₂) 4 (return %₃) -31 (return core.nothing) +32 (return core.nothing) ######################################## # new() calls with splats; typed fields @@ -1110,8 +1129,9 @@ end 39 (call core.svec %₃₈ core.Any) 40 slot₃/T 41 (call core.svec %₄₀) -42 (call core.svec %₃₉ %₄₁ :($(QuoteNode(:(#= line 4 =#))))) -43 --- method core.nothing %₄₂ +42 SourceLocation::4:5 +43 (call core.svec %₃₉ %₄₁ %₄₂) +44 --- method core.nothing %₄₃ slots: [slot₁/#ctor-self# slot₂/xs slot₃/tmp slot₄/tmp] 1 (call core._apply_iterate top.iterate core.tuple slot₂/xs) 2 (call core.nfields %₁) @@ -1144,7 +1164,7 @@ end 29 slot₄/tmp 30 (new %₁₁ %₂₀ %₂₉) 31 (return %₃₀) -44 (return core.nothing) +45 (return core.nothing) ######################################## # Error: new doesn't accept keywords From ac1beb3e94fbdaf6b8379533b3d6c7bffa187ccf Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Mon, 27 Jan 2025 18:13:31 +1000 Subject: [PATCH 0959/1109] Lowering of opaque_closure --- JuliaLowering/src/ast.jl | 7 +- JuliaLowering/src/closure_conversion.jl | 99 +++++++++++++++++-------- JuliaLowering/src/desugaring.jl | 59 +++++++++++++++ JuliaLowering/src/eval.jl | 8 +- JuliaLowering/src/kinds.jl | 8 ++ JuliaLowering/src/linear_ir.jl | 35 +++++---- JuliaLowering/src/runtime.jl | 11 +++ JuliaLowering/src/scope_analysis.jl | 7 ++ JuliaLowering/src/utils.jl | 8 ++ JuliaLowering/test/closures.jl | 15 ++++ JuliaLowering/test/closures_ir.jl | 57 ++++++++++++++ 11 files changed, 269 insertions(+), 45 deletions(-) diff --git a/JuliaLowering/src/ast.jl b/JuliaLowering/src/ast.jl index 3a88fec2dbe7a..4bd1d4ea3cbb4 100644 --- a/JuliaLowering/src/ast.jl +++ b/JuliaLowering/src/ast.jl @@ -576,11 +576,14 @@ function is_valid_modref(ex) (kind(ex[1]) == K"Identifier" || is_valid_modref(ex[1])) end +function is_core_nothing(ex) + kind(ex) == K"core" && ex.name_val == "nothing" +end + function is_simple_atom(ctx, ex) k = kind(ex) # TODO thismodule - is_literal(k) || k == K"Symbol" || k == K"Value" || is_ssa(ctx, ex) || - (k == K"core" && ex.name_val == "nothing") + is_literal(k) || k == K"Symbol" || k == K"Value" || is_ssa(ctx, ex) || is_core_nothing(ex) end function decl_var(ex) diff --git a/JuliaLowering/src/closure_conversion.jl b/JuliaLowering/src/closure_conversion.jl index 65aa4de35ebf2..e21787eb76428 100644 --- a/JuliaLowering/src/closure_conversion.jl +++ b/JuliaLowering/src/closure_conversion.jl @@ -1,11 +1,11 @@ struct ClosureInfo{GraphType} # Global name of the type of the closure type_name::SyntaxTree{GraphType} - # Names of fields as K"Symbol" nodes, in order - field_syms::SyntaxList{GraphType} + # Names of fields for use with getfield, in order + field_names::SyntaxList{GraphType} # Map from the original BindingId of closed-over vars to the index of the # associated field in the closure type. - field_name_inds::Dict{IdTag,Int} + field_inds::Dict{IdTag,Int} end struct ClosureConversionCtx{GraphType} <: AbstractLoweringContext @@ -35,7 +35,7 @@ end function captured_var_access(ctx, ex) cap_rewrite = ctx.capture_rewriting if cap_rewrite isa ClosureInfo - field_sym = cap_rewrite.field_syms[cap_rewrite.field_name_inds[ex.var_id]] + field_sym = cap_rewrite.field_names[cap_rewrite.field_inds[ex.var_id]] @ast ctx ex [K"call" "getfield"::K"core" binding_ex(ctx, current_lambda_bindings(ctx).self) @@ -197,7 +197,7 @@ function convert_assignment(ctx, ex) end # Compute fields for a closure type, one field for each captured variable. -function closure_type_fields(ctx, srcref, closure_binds) +function closure_type_fields(ctx, srcref, closure_binds, is_opaque) capture_ids = Vector{IdTag}() for lambda_bindings in closure_binds.lambdas for (id, lbinfo) in lambda_bindings.bindings @@ -208,33 +208,45 @@ function closure_type_fields(ctx, srcref, closure_binds) end # sort here to avoid depending on undefined Dict iteration order. capture_ids = sort!(unique(capture_ids)) - field_names = Dict{String,IdTag}() - for (i, id) in enumerate(capture_ids) - binfo = lookup_binding(ctx, id) - # We name each field of the closure after the variable which was closed - # over, for clarity. Adding a suffix can be necessary when collisions - # occur due to macro expansion and generated bindings - name0 = binfo.name - name = name0 - i = 1 - while haskey(field_names, name) - name = "$name0#$i" - i += 1 + + field_syms = SyntaxList(ctx) + if is_opaque + field_orig_bindings = capture_ids + # For opaque closures we don't try to generate sensible names for the + # fields as there's no closure type to generate. + for (i,id) in enumerate(field_orig_bindings) + push!(field_syms, @ast ctx srcref i::K"Integer") + end + else + field_names = Dict{String,IdTag}() + for id in capture_ids + binfo = lookup_binding(ctx, id) + # We name each field of the closure after the variable which was closed + # over, for clarity. Adding a suffix can be necessary when collisions + # occur due to macro expansion and generated bindings + name0 = binfo.name + name = name0 + i = 1 + while haskey(field_names, name) + name = "$name0#$i" + i += 1 + end + field_names[name] = id + end + field_orig_bindings = Vector{IdTag}() + for (name,id) in sort!(collect(field_names)) + push!(field_syms, @ast ctx srcref name::K"Symbol") + push!(field_orig_bindings, id) end - field_names[name] = id end - field_syms = SyntaxList(ctx) - field_orig_bindings = Vector{IdTag}() - field_name_inds = Dict{IdTag,Int}() + field_inds = Dict{IdTag,Int}() field_is_box = Vector{Bool}() - for (name,id) in sort!(collect(field_names)) - push!(field_syms, @ast ctx srcref name::K"Symbol") - push!(field_orig_bindings, id) + for (i,id) in enumerate(field_orig_bindings) push!(field_is_box, is_boxed(ctx, id)) - field_name_inds[id] = lastindex(field_syms) + field_inds[id] = i end - return field_syms, field_orig_bindings, field_name_inds, field_is_box + return field_syms, field_orig_bindings, field_inds, field_is_box end function closure_name(mod, name_stack) @@ -389,13 +401,13 @@ function _convert_closures(ctx::ClosureConversionCtx, ex) needs_def = isnothing(closure_info) if needs_def closure_binds = ctx.closure_bindings[func_name_id] - field_syms, field_orig_bindings, field_name_inds, field_is_box = - closure_type_fields(ctx, ex, closure_binds) + field_syms, field_orig_bindings, field_inds, field_is_box = + closure_type_fields(ctx, ex, closure_binds, false) name_str = closure_name(ctx.mod, closure_binds.name_stack) closure_type_def, closure_type_ = type_for_closure(ctx, ex, name_str, field_syms, field_is_box) push!(ctx.toplevel_stmts, closure_type_def) - closure_info = ClosureInfo(closure_type_, field_syms, field_name_inds) + closure_info = ClosureInfo(closure_type_, field_syms, field_inds) ctx.closure_infos[func_name_id] = closure_info type_params = SyntaxList(ctx) init_closure_args = SyntaxList(ctx) @@ -464,6 +476,35 @@ function _convert_closures(ctx::ClosureConversionCtx, ex) ::K"TOMBSTONE" ] end + elseif k == K"_opaque_closure" + closure_binds = ctx.closure_bindings[ex[1].var_id] + field_syms, field_orig_bindings, field_inds, field_is_box = + closure_type_fields(ctx, ex, closure_binds, true) + + capture_rewrites = ClosureInfo(ex #=unused=#, field_syms, field_inds) + + ctx2 = ClosureConversionCtx(ctx.graph, ctx.bindings, ctx.mod, + ctx.closure_bindings, capture_rewrites, ctx.lambda_bindings, + ctx.toplevel_stmts, ctx.closure_infos) + + init_closure_args = SyntaxList(ctx) + for id in field_orig_bindings + push!(init_closure_args, binding_ex(ctx, id)) + end + @ast ctx ex [K"new_opaque_closure" + ex[2] # arg type tuple + ex[3] # return_lower_bound + ex[4] # return_upper_bound + ex[5] # allow_partial + [K"opaque_closure_method" + "nothing"::K"core" + ex[6] # nargs + ex[7] # is_va + ex[8] # functionloc + closure_convert_lambda(ctx2, ex[9]) + ] + init_closure_args... + ] else mapchildren(e->_convert_closures(ctx, e), ctx, ex) end diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index e55a167484925..8e78a84433e46 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -2511,6 +2511,63 @@ function expand_arrow(ctx, ex) ) end +function expand_opaque_closure(ctx, ex) + arg_types_spec = ex[1] + return_lower_bound = ex[2] + return_upper_bound = ex[3] + allow_partial = ex[4] + func_expr = ex[5] + @chk kind(func_expr) == K"->" + @chk numchildren(func_expr) == 2 + args = func_expr[1] + @chk kind(args) == K"tuple" + check_no_parameters(ex, args) + + arg_names = SyntaxList(ctx) + arg_types = SyntaxList(ctx) + push!(arg_names, new_local_binding(ctx, args, "#self#"; kind=:argument)) + body_stmts = SyntaxList(ctx) + is_va = false + for (i, arg) in enumerate(children(args)) + (aname, atype, default, is_slurp) = expand_function_arg(ctx, body_stmts, arg, + i == numchildren(args)) + is_va |= is_slurp + push!(arg_names, aname) + push!(arg_types, atype) + if !isnothing(default) + throw(LoweringError(default, "Default positional arguments cannot be used in an opaque closure")) + end + end + + nargs = length(arg_names) - 1 # ignoring #self# + + @ast ctx ex [K"_opaque_closure" + ssavar(ctx, ex, "opaque_closure_id") # only a placeholder. Must be :local + if is_core_nothing(arg_types_spec) + [K"curly" + "Tuple"::K"core" + arg_types... + ] + else + arg_types_spec + end + is_core_nothing(return_lower_bound) ? [K"curly" "Union"::K"core"] : return_lower_bound + is_core_nothing(return_upper_bound) ? "Any"::K"core" : return_upper_bound + allow_partial + nargs::K"Integer" + is_va::K"Bool" + ::K"SourceLocation"(func_expr) + [K"lambda"(func_expr, is_toplevel_thunk=false) + [K"block" arg_names...] + [K"block"] + [K"block" + body_stmts... + func_expr[2] + ] + ] + ] +end + #------------------------------------------------------------------------------- # Expand macro definitions @@ -3741,6 +3798,8 @@ function expand_forms_2(ctx::DesugaringContext, ex::SyntaxTree, docs=nothing) "typed_hcat"::K"top" expand_forms_2(ctx, children(ex))... ] + elseif k == K"opaque_closure" + expand_forms_2(ctx, expand_opaque_closure(ctx, ex)) elseif k == K"vcat" || k == K"typed_vcat" expand_forms_2(ctx, expand_vcat(ctx, ex)) elseif k == K"ncat" || k == K"typed_ncat" diff --git a/JuliaLowering/src/eval.jl b/JuliaLowering/src/eval.jl index 327025702a4a0..db76d7377cc60 100644 --- a/JuliaLowering/src/eval.jl +++ b/JuliaLowering/src/eval.jl @@ -264,13 +264,18 @@ function to_lowered_expr(mod, ex, ssa_offset=0) Expr(:method, c1, cs[2:end]...) elseif k == K"newvar" Core.NewvarNode(to_lowered_expr(mod, ex[1], ssa_offset)) + elseif k == K"new_opaque_closure" + args = map(e->to_lowered_expr(mod, e, ssa_offset), children(ex)) + # TODO: put allow_partial back in once we update to the latest julia + splice!(args, 4) # allow_partial + Expr(:new_opaque_closure, args...) else # Allowed forms according to https://docs.julialang.org/en/v1/devdocs/ast/ # # call invoke static_parameter `=` method struct_type abstract_type # primitive_type global const new splatnew isdefined # enter leave pop_exception inbounds boundscheck loopinfo copyast meta - # foreigncall new_opaque_closure lambda + # lambda head = k == K"call" ? :call : k == K"new" ? :new : k == K"splatnew" ? :splatnew : @@ -284,6 +289,7 @@ function to_lowered_expr(mod, ex, ssa_offset=0) k == K"gc_preserve_begin" ? :gc_preserve_begin : k == K"gc_preserve_end" ? :gc_preserve_end : k == K"foreigncall" ? :foreigncall : + k == K"opaque_closure_method" ? :opaque_closure_method : nothing if isnothing(head) TODO(ex, "Unhandled form for kind $k") diff --git a/JuliaLowering/src/kinds.jl b/JuliaLowering/src/kinds.jl index f3c160578c4a9..94c5c943b9512 100644 --- a/JuliaLowering/src/kinds.jl +++ b/JuliaLowering/src/kinds.jl @@ -26,6 +26,8 @@ function _register_kinds() "loopinfo" # Call into foreign code. Emitted by `@ccall` "foreigncall" + # Special form emitted by `Base.Experimental.@opaque` + "opaque_closure" # Test whether a variable is defined "isdefined" # named labels for `@label` and `@goto` @@ -73,6 +75,8 @@ function _register_kinds() # [K"method_defs" name block] # The code in `block` defines methods for generic function `name` "method_defs" + # The code in `block` defines methods for generic function `name` + "_opaque_closure" # The enclosed statements must be executed at top level "toplevel_butfirst" "const_if_global" @@ -114,6 +118,10 @@ function _register_kinds() # Result of lowering a `K"lambda"` after bindings have been # converted to slot/globalref/SSAValue. "code_info" + # Internal initializer for opaque closures + "new_opaque_closure" + # Wrapper for the lambda of around opaque closure methods + "opaque_closure_method" "END_IR_KINDS" ]) end diff --git a/JuliaLowering/src/linear_ir.jl b/JuliaLowering/src/linear_ir.jl index e04b875404598..7dcea91e5d273 100644 --- a/JuliaLowering/src/linear_ir.jl +++ b/JuliaLowering/src/linear_ir.jl @@ -143,8 +143,8 @@ function is_valid_ir_rvalue(ctx, lhs, rhs) return is_ssa(ctx, lhs) || is_valid_ir_argument(ctx, rhs) || (kind(lhs) == K"BindingId" && - # FIXME: add: splatnew isdefined invoke cfunction gc_preserve_begin copyast new_opaque_closure globalref - kind(rhs) in KSet"new call foreigncall gc_preserve_begin foreigncall") + # FIXME: add: invoke cfunction gc_preserve_begin copyast + kind(rhs) in KSet"new splatnew isdefined call foreigncall gc_preserve_begin foreigncall new_opaque_closure") end function contains_nonglobal_binding(ctx, ex) @@ -579,10 +579,11 @@ function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) end nothing end - elseif k == K"call" || k == K"new" || k == K"splatnew" || k == K"foreigncall" - # TODO k ∈ cfunction new_opaque_closure cglobal - args = if k == K"foreigncall" - args_ = SyntaxList(ctx) + elseif k == K"call" || k == K"new" || k == K"splatnew" || k == K"foreigncall" || + k == K"new_opaque_closure" + # TODO k ∈ cfunction cglobal + if k == K"foreigncall" + args = SyntaxList(ctx) # todo: is is_leaf correct here? flisp uses `atom?` func = ex[1] if kind(func) == K"call" && kind(func[1]) == K"core" && func[1].name_val == "tuple" @@ -591,11 +592,11 @@ function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) if contains_nonglobal_binding(ctx, func) throw(LoweringError(func, "ccall function name and library expression cannot reference local variables")) end - append!(args_, compile_args(ctx, ex[1:1])) + append!(args, compile_args(ctx, ex[1:1])) elseif is_leaf(func) - append!(args_, compile_args(ctx, ex[1:1])) + append!(args, compile_args(ctx, ex[1:1])) else - push!(args_, func) + push!(args, func) end # 2nd to 5th arguments of foreigncall are special. They must be # left in place but cannot reference locals. @@ -607,11 +608,11 @@ function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) throw(LoweringError(argt, "ccall argument types cannot reference local variables")) end end - append!(args_, ex[2:5]) - append!(args_, compile_args(ctx, ex[6:end])) - args_ + append!(args, ex[2:5]) + append!(args, compile_args(ctx, ex[6:end])) + args else - compile_args(ctx, children(ex)) + args = compile_args(ctx, children(ex)) end callex = makenode(ctx, ex, k, args) if in_tail_pos @@ -779,6 +780,14 @@ function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) @assert !needs_value && !in_tail_pos nothing end + elseif k == K"opaque_closure_method" + @ast ctx ex [K"opaque_closure_method" + ex[1] + ex[2] + ex[3] + ex[4] + compile_lambda(ctx, ex[5]) + ] elseif k == K"lambda" lam = compile_lambda(ctx, ex) if in_tail_pos diff --git a/JuliaLowering/src/runtime.jl b/JuliaLowering/src/runtime.jl index 3768ecd8ab821..ff74ea72574e4 100644 --- a/JuliaLowering/src/runtime.jl +++ b/JuliaLowering/src/runtime.jl @@ -359,6 +359,17 @@ function var"@islocal"(__context__::MacroContext, ex) ] end +function Base.Experimental.var"@opaque"(__context__::MacroContext, ex) + @chk kind(ex) == K"->" + @ast __context__ __context__.macrocall [K"opaque_closure" + "nothing"::K"core" + "nothing"::K"core" + "nothing"::K"core" + true::K"Bool" + ex + ] +end + """ A non-interpolating quoted expression. diff --git a/JuliaLowering/src/scope_analysis.jl b/JuliaLowering/src/scope_analysis.jl index a5f1cad838906..73ed495336dc1 100644 --- a/JuliaLowering/src/scope_analysis.jl +++ b/JuliaLowering/src/scope_analysis.jl @@ -658,6 +658,13 @@ function analyze_variables!(ctx, ex) push!(ctx.method_def_stack, ex[1]) analyze_variables!(ctx, ex[2]) pop!(ctx.method_def_stack) + elseif k == K"_opaque_closure" + push!(ctx.method_def_stack, ex[1]) + analyze_variables!(ctx, ex[2]) + analyze_variables!(ctx, ex[3]) + analyze_variables!(ctx, ex[4]) + analyze_variables!(ctx, ex[9]) + pop!(ctx.method_def_stack) elseif k == K"lambda" lambda_bindings = ex.lambda_bindings if !ex.is_toplevel_thunk diff --git a/JuliaLowering/src/utils.jl b/JuliaLowering/src/utils.jl index 7e9ee3e47b580..b027131cb5fdd 100644 --- a/JuliaLowering/src/utils.jl +++ b/JuliaLowering/src/utils.jl @@ -109,6 +109,14 @@ function print_ir(io::IO, ex, indent="") else println(io, " ", string(e[3])) end + elseif kind(e) == K"opaque_closure_method" + @assert numchildren(e) == 5 + print(io, indent, lno, " --- opaque_closure_method ") + for i=1:4 + print(io, " ", e[i]) + end + println(io) + print_ir(io, e[5], indent*added_indent) elseif kind(e) == K"code_info" println(io, indent, lno, " --- ", e.is_toplevel_thunk ? "thunk" : "code_info") print_ir(io, e, indent*added_indent) diff --git a/JuliaLowering/test/closures.jl b/JuliaLowering/test/closures.jl index 12c070f8f6b4f..87fa40c12577e 100644 --- a/JuliaLowering/test/closures.jl +++ b/JuliaLowering/test/closures.jl @@ -119,5 +119,20 @@ let end """) +# Opaque closure +@test JuliaLowering.include_string(test_mod, """ +let y = 1 + oc = Base.Experimental.@opaque x->2x + y + oc(3) +end +""") == 7 + +# Opaque closure with `...` +@test JuliaLowering.include_string(test_mod, """ +let + oc = Base.Experimental.@opaque (xs...)->xs + oc(3,4,5) +end +""") == (3,4,5) end diff --git a/JuliaLowering/test/closures_ir.jl b/JuliaLowering/test/closures_ir.jl index dc16fcb40da9e..eb988496b6e92 100644 --- a/JuliaLowering/test/closures_ir.jl +++ b/JuliaLowering/test/closures_ir.jl @@ -514,3 +514,60 @@ function f() where {g} end end +######################################## +# Opaque closure +let y = 1 + Base.Experimental.@opaque (x, z::T)->2x + y - z +end +#--------------------- +1 1 +2 (= slot₁/y (call core.Box)) +3 slot₁/y +4 (call core.setfield! %₃ :contents %₁) +5 TestMod.T +6 (call core.apply_type core.Tuple core.Any %₅) +7 (call core.apply_type core.Union) +8 --- opaque_closure_method core.nothing 2 false SourceLocation::2:31 + slots: [slot₁/#self#(!read) slot₂/x slot₃/z slot₄/y(!read)] + 1 TestMod.- + 2 TestMod.+ + 3 TestMod.* + 4 (call %₃ 2 slot₂/x) + 5 (call core.getfield slot₁/#self# 1) + 6 (call core.isdefined %₅ :contents) + 7 (gotoifnot %₆ label₉) + 8 (goto label₁₁) + 9 (newvar slot₄/y) + 10 slot₄/y + 11 (call core.getfield %₅ :contents) + 12 (call %₂ %₄ %₁₁) + 13 (call %₁ %₁₂ slot₃/z) + 14 (return %₁₃) +9 slot₁/y +10 (new_opaque_closure %₆ %₇ core.Any true %₈ %₉) +11 (return %₁₀) + +######################################## +# Opaque closure with `...` +let + Base.Experimental.@opaque (x, ys...)->ys +end +#--------------------- +1 (call core.apply_type core.Vararg core.Any) +2 (call core.apply_type core.Tuple core.Any %₁) +3 (call core.apply_type core.Union) +4 --- opaque_closure_method core.nothing 2 true SourceLocation::2:31 + slots: [slot₁/#self#(!read) slot₂/x(!read) slot₃/ys] + 1 slot₃/ys + 2 (return %₁) +5 (new_opaque_closure %₂ %₃ core.Any true %₄) +6 (return %₅) + +######################################## +# Error: Opaque closure with default args +Base.Experimental.@opaque (x=1)->2x +#--------------------- +LoweringError: +Base.Experimental.@opaque (x=1)->2x +# ╙ ── Default positional arguments cannot be used in an opaque closure + From d41ca392cdb6abbef8373fec5bff1a206d63dcaf Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Tue, 28 Jan 2025 11:56:33 +1000 Subject: [PATCH 0960/1109] Move fancy function call syntax tests into function_calls_ir.jl --- JuliaLowering/test/function_calls_ir.jl | 519 ++++++++++++++++++++++++ JuliaLowering/test/functions_ir.jl | 315 -------------- JuliaLowering/test/misc_ir.jl | 194 --------- 3 files changed, 519 insertions(+), 509 deletions(-) create mode 100644 JuliaLowering/test/function_calls_ir.jl diff --git a/JuliaLowering/test/function_calls_ir.jl b/JuliaLowering/test/function_calls_ir.jl new file mode 100644 index 0000000000000..4c71124d07bc6 --- /dev/null +++ b/JuliaLowering/test/function_calls_ir.jl @@ -0,0 +1,519 @@ +######################################## +# Simple call +f(x, y) +#--------------------- +1 TestMod.f +2 TestMod.x +3 TestMod.y +4 (call %₁ %₂ %₃) +5 (return %₄) + +######################################## +# Keyword calls +f(x; a=1, b=2) +#--------------------- +1 TestMod.f +2 (call core.tuple :a :b) +3 (call core.apply_type core.NamedTuple %₂) +4 (call core.tuple 1 2) +5 (call %₃ %₄) +6 TestMod.x +7 (call core.kwcall %₅ %₁ %₆) +8 (return %₇) + +######################################## +# Keyword call with only splats for kws +f(; ks1..., ks2...) +#--------------------- +1 TestMod.f +2 (call core.NamedTuple) +3 TestMod.ks1 +4 (call top.merge %₂ %₃) +5 TestMod.ks2 +6 (call top.merge %₄ %₅) +7 (call top.isempty %₆) +8 (gotoifnot %₇ label₁₁) +9 (call %₁) +10 (return %₉) +11 (call core.kwcall %₆ %₁) +12 (return %₁₁) + +######################################## +# Error: Call with repeated keywords +f(x; a=1, a=2) +#--------------------- +LoweringError: +f(x; a=1, a=2) +# ╙ ── Repeated keyword argument name + +######################################## +# literal_pow lowering +x^42 +#--------------------- +1 TestMod.^ +2 TestMod.x +3 (call core.apply_type top.Val 42) +4 (call %₃) +5 (call top.literal_pow %₁ %₂ %₄) +6 (return %₅) + +######################################## +# almost but not quite literal_pow lowering :) +x^42.0 +#--------------------- +1 TestMod.^ +2 TestMod.x +3 (call %₁ %₂ 42.0) +4 (return %₃) + +######################################## +# Error: infix call without enough arguments +@ast_ [K"call"(syntax_flags=JuliaSyntax.INFIX_FLAG) + "x"::K"Identifier" +] +#--------------------- +LoweringError: +#= line 1 =# - Postfix/infix operators must have at least two positional arguments + +######################################## +# Error: postfix call without enough arguments +@ast_ [K"call"(syntax_flags=JuliaSyntax.POSTFIX_OP_FLAG) + "x"::K"Identifier" +] +#--------------------- +LoweringError: +#= line 1 =# - Postfix/infix operators must have at least two positional arguments + +######################################## +# Error: Call with no function name +@ast_ [K"call"] +#--------------------- +LoweringError: +#= line 1 =# - Call expressions must have a function name + +######################################## +# Simple broadcast +x .* y .+ f.(z) +#--------------------- +1 TestMod.+ +2 TestMod.* +3 TestMod.x +4 TestMod.y +5 (call top.broadcasted %₂ %₃ %₄) +6 TestMod.f +7 TestMod.z +8 (call top.broadcasted %₆ %₇) +9 (call top.broadcasted %₁ %₅ %₈) +10 (call top.materialize %₉) +11 (return %₁₀) + +######################################## +# Broadcast with unary function calls +.+x +#--------------------- +1 TestMod.+ +2 TestMod.x +3 (call top.broadcasted %₁ %₂) +4 (call top.materialize %₃) +5 (return %₄) + +######################################## +# Broadcast with short circuit operators +x .&& y .|| z +#--------------------- +1 TestMod.x +2 TestMod.y +3 (call top.broadcasted top.andand %₁ %₂) +4 TestMod.z +5 (call top.broadcasted top.oror %₃ %₄) +6 (call top.materialize %₅) +7 (return %₆) + +######################################## +# Scalar comparison chain +x < y < z +#--------------------- +1 TestMod.< +2 TestMod.x +3 TestMod.y +4 (call %₁ %₂ %₃) +5 (gotoifnot %₄ label₁₁) +6 TestMod.< +7 TestMod.y +8 TestMod.z +9 (call %₆ %₇ %₈) +10 (return %₉) +11 (return false) + +######################################## +# Broadcasted comparison chain +x .< y .< z +#--------------------- +1 TestMod.< +2 TestMod.x +3 TestMod.y +4 (call top.broadcasted %₁ %₂ %₃) +5 TestMod.< +6 TestMod.y +7 TestMod.z +8 (call top.broadcasted %₅ %₆ %₇) +9 (call top.broadcasted top.& %₄ %₈) +10 (call top.materialize %₉) +11 (return %₁₀) + +######################################## +# Mixed scalar / broadcasted comparison chain +a < b < c .< d .< e +#--------------------- +1 TestMod.< +2 TestMod.a +3 TestMod.b +4 (call %₁ %₂ %₃) +5 (gotoifnot %₄ label₁₁) +6 TestMod.< +7 TestMod.b +8 TestMod.c +9 (= slot₁/if_val (call %₆ %₇ %₈)) +10 (goto label₁₂) +11 (= slot₁/if_val false) +12 slot₁/if_val +13 TestMod.< +14 TestMod.c +15 TestMod.d +16 (call top.broadcasted %₁₃ %₁₄ %₁₅) +17 (call top.broadcasted top.& %₁₂ %₁₆) +18 TestMod.< +19 TestMod.d +20 TestMod.e +21 (call top.broadcasted %₁₈ %₁₉ %₂₀) +22 (call top.broadcasted top.& %₁₇ %₂₁) +23 (call top.materialize %₂₂) +24 (return %₂₃) + +######################################## +# Mixed scalar / broadcasted comparison chain +a .< b .< c < d < e +#--------------------- +1 TestMod.< +2 TestMod.a +3 TestMod.b +4 (call top.broadcasted %₁ %₂ %₃) +5 TestMod.< +6 TestMod.b +7 TestMod.c +8 (call top.broadcasted %₅ %₆ %₇) +9 (call top.broadcasted top.& %₄ %₈) +10 TestMod.< +11 TestMod.c +12 TestMod.d +13 (call %₁₀ %₁₁ %₁₂) +14 (gotoifnot %₁₃ label₂₀) +15 TestMod.< +16 TestMod.d +17 TestMod.e +18 (= slot₁/if_val (call %₁₅ %₁₆ %₁₇)) +19 (goto label₂₁) +20 (= slot₁/if_val false) +21 slot₁/if_val +22 (call top.broadcasted top.& %₉ %₂₁) +23 (call top.materialize %₂₂) +24 (return %₂₃) + +######################################## +# Comparison chain fused with other broadcasting +x .+ (a .< b .< c) +#--------------------- +1 TestMod.+ +2 TestMod.x +3 TestMod.< +4 TestMod.a +5 TestMod.b +6 (call top.broadcasted %₃ %₄ %₅) +7 TestMod.< +8 TestMod.b +9 TestMod.c +10 (call top.broadcasted %₇ %₈ %₉) +11 (call top.broadcasted top.& %₆ %₁₀) +12 (call top.broadcasted %₁ %₂ %₁₁) +13 (call top.materialize %₁₂) +14 (return %₁₃) + +######################################## +# Broadcast with literal_pow +x.^3 +#--------------------- +1 TestMod.^ +2 TestMod.x +3 (call core.apply_type top.Val 3) +4 (call %₃) +5 (call top.broadcasted top.literal_pow %₁ %₂ %₄) +6 (call top.materialize %₅) +7 (return %₆) + +######################################## +# Broadcast with keywords +f.(x, y, z = 1; w = 2) +#--------------------- +1 top.broadcasted_kwsyntax +2 (call core.tuple :z :w) +3 (call core.apply_type core.NamedTuple %₂) +4 (call core.tuple 1 2) +5 (call %₃ %₄) +6 TestMod.f +7 TestMod.x +8 TestMod.y +9 (call core.kwcall %₅ %₁ %₆ %₇ %₈) +10 (call top.materialize %₉) +11 (return %₁₀) + +######################################## +# Broadcast with unary dot syntax +(.+)(x,y) +#--------------------- +1 TestMod.+ +2 TestMod.x +3 TestMod.y +4 (call top.broadcasted %₁ %₂ %₃) +5 (call top.materialize %₄) +6 (return %₅) + +######################################## +# Trivial in-place broadcast update +x .= y +#--------------------- +1 TestMod.x +2 TestMod.y +3 (call top.broadcasted top.identity %₂) +4 (call top.materialize! %₁ %₃) +5 (return %₄) + +######################################## +# Fused in-place broadcast update +x .= y .+ z +#--------------------- +1 TestMod.x +2 TestMod.+ +3 TestMod.y +4 TestMod.z +5 (call top.broadcasted %₂ %₃ %₄) +6 (call top.materialize! %₁ %₅) +7 (return %₆) + +######################################## +# In-place broadcast update with property assignment on left hand side +x.prop .= y +#--------------------- +1 TestMod.x +2 (call top.dotgetproperty %₁ :prop) +3 TestMod.y +4 (call top.broadcasted top.identity %₃) +5 (call top.materialize! %₂ %₄) +6 (return %₅) + +######################################## +# In-place broadcast update with ref on left hand side +x[i,end] .= y +#--------------------- +1 TestMod.x +2 TestMod.i +3 (call top.lastindex %₁ 2) +4 (call top.dotview %₁ %₂ %₃) +5 TestMod.y +6 (call top.broadcasted top.identity %₅) +7 (call top.materialize! %₄ %₆) +8 (return %₇) + +######################################## +# <: as a function call +x <: y +#--------------------- +1 TestMod.<: +2 TestMod.x +3 TestMod.y +4 (call %₁ %₂ %₃) +5 (return %₄) + +######################################## +# >: as a function call +x >: y +#--------------------- +1 TestMod.>: +2 TestMod.x +3 TestMod.y +4 (call %₁ %₂ %₃) +5 (return %₄) + +######################################## +# --> as a function call +x --> y +#--------------------- +1 TestMod.--> +2 TestMod.x +3 TestMod.y +4 (call %₁ %₂ %₃) +5 (return %₄) + +######################################## +# basic ccall +ccall(:strlen, Csize_t, (Cstring,), "asdfg") +#--------------------- +1 TestMod.Cstring +2 (call top.cconvert %₁ "asdfg") +3 (call top.unsafe_convert %₁ %₂) +4 (foreigncall :strlen TestMod.Csize_t (call core.svec TestMod.Cstring) 0 :ccall %₃ %₂) +5 (return %₄) + +######################################## +# ccall with library name as a global var +ccall((:strlen, libc), Csize_t, (Cstring,), "asdfg") +#--------------------- +1 TestMod.Cstring +2 (call top.cconvert %₁ "asdfg") +3 TestMod.libc +4 (call core.tuple :strlen %₃) +5 (call top.unsafe_convert %₁ %₂) +6 (foreigncall %₄ TestMod.Csize_t (call core.svec TestMod.Cstring) 0 :ccall %₅ %₂) +7 (return %₆) + +######################################## +# ccall with a calling convention +ccall(:foo, stdcall, Csize_t, ()) +#--------------------- +1 (foreigncall :foo TestMod.Csize_t (call core.svec) 0 :stdcall) +2 (return %₁) + +######################################## +# ccall with Any args become core.Any and don't need conversion or GC roots +ccall(:foo, stdcall, Csize_t, (Any,), x) +#--------------------- +1 core.Any +2 TestMod.x +3 (foreigncall :foo TestMod.Csize_t (call core.svec core.Any) 0 :stdcall %₂) +4 (return %₃) + +######################################## +# ccall with variable as function name (must eval to a pointer) +ccall(ptr, Csize_t, (Cstring,), "asdfg") +#--------------------- +1 TestMod.Cstring +2 (call top.cconvert %₁ "asdfg") +3 TestMod.ptr +4 (call top.unsafe_convert %₁ %₂) +5 (foreigncall %₃ TestMod.Csize_t (call core.svec TestMod.Cstring) 0 :ccall %₄ %₂) +6 (return %₅) + +######################################## +# ccall with varargs +ccall(:printf, Cint, (Cstring, Cstring...), "%s = %s\n", "2 + 2", "5") +#--------------------- +1 TestMod.Cstring +2 TestMod.Cstring +3 (call top.cconvert %₁ "%s = %s\n") +4 (call top.cconvert %₂ "2 + 2") +5 (call top.cconvert %₂ "5") +6 (call top.unsafe_convert %₁ %₃) +7 (call top.unsafe_convert %₂ %₄) +8 (call top.unsafe_convert %₂ %₅) +9 (foreigncall :printf TestMod.Cint (call core.svec TestMod.Cstring TestMod.Cstring TestMod.Cstring) 1 :ccall %₆ %₇ %₈ %₃ %₄ %₅) +10 (return %₉) + +######################################## +# Error: ccall with too few arguments +ccall(:foo, Csize_t) +#--------------------- +LoweringError: +ccall(:foo, Csize_t) +└──────────────────┘ ── too few arguments to ccall + +######################################## +# Error: ccall with calling conv and too few arguments +ccall(:foo, thiscall, Csize_t) +#--------------------- +LoweringError: +ccall(:foo, thiscall, Csize_t) +└────────────────────────────┘ ── too few arguments to ccall with calling convention specified + +######################################## +# Error: ccall without tuple for argument types +ccall(:foo, Csize_t, Cstring) +#--------------------- +LoweringError: +ccall(:foo, Csize_t, Cstring) +# └─────┘ ── ccall argument types must be a tuple; try `(T,)` + +######################################## +# Error: ccall without tuple for argument types +ccall(:foo, (Csize_t,), "arg") +#--------------------- +LoweringError: +ccall(:foo, (Csize_t,), "arg") +# └────────┘ ── ccall argument types must be a tuple; try `(T,)` and check if you specified a correct return type + +######################################## +# Error: ccall with library name which is a local variable +let libc = "libc" + ccall((:strlen, libc), Csize_t, (Cstring,), "asdfg") +end +#--------------------- +LoweringError: +let libc = "libc" + ccall((:strlen, libc), Csize_t, (Cstring,), "asdfg") +# └─────────────┘ ── ccall function name and library expression cannot reference local variables +end + +######################################## +# Error: ccall with return type which is a local variable +let Csize_t = 1 + ccall(:strlen, Csize_t, (Cstring,), "asdfg") +end +#--------------------- +LoweringError: +let Csize_t = 1 + ccall(:strlen, Csize_t, (Cstring,), "asdfg") +# └─────┘ ── ccall return type cannot reference local variables +end + +######################################## +# Error: ccall with argument type which is a local variable +let Cstring = 1 + ccall(:strlen, Csize_t, (Cstring,), "asdfg") +end +#--------------------- +LoweringError: +let Cstring = 1 + ccall(:strlen, Csize_t, (Cstring,), "asdfg") +# └─────┘ ── ccall argument types cannot reference local variables +end + +######################################## +# Error: ccall with too few arguments +ccall(:strlen, Csize_t, (Cstring,)) +#--------------------- +LoweringError: +ccall(:strlen, Csize_t, (Cstring,)) +└─────────────────────────────────┘ ── Too few arguments in ccall compared to argument types + +######################################## +# Error: ccall with too many arguments +ccall(:strlen, Csize_t, (Cstring,), "asdfg", "blah") +#--------------------- +LoweringError: +ccall(:strlen, Csize_t, (Cstring,), "asdfg", "blah") +└──────────────────────────────────────────────────┘ ── More arguments than types in ccall + +######################################## +# Error: ccall varargs with too few args +ccall(:foo, Csize_t, (Cstring...,), "asdfg") +#--------------------- +LoweringError: +ccall(:foo, Csize_t, (Cstring...,), "asdfg") +# └────────┘ ── C ABI prohibits vararg without one required argument + +######################################## +# Error: ccall with multiple varargs +ccall(:foo, Csize_t, (Cstring..., Cstring...), "asdfg", "blah") +#--------------------- +LoweringError: +ccall(:foo, Csize_t, (Cstring..., Cstring...), "asdfg", "blah") +# └────────┘ ── only the trailing ccall argument type should have `...` + diff --git a/JuliaLowering/test/functions_ir.jl b/JuliaLowering/test/functions_ir.jl index 6bdd46dac3c05..eafbae3256b7d 100644 --- a/JuliaLowering/test/functions_ir.jl +++ b/JuliaLowering/test/functions_ir.jl @@ -320,64 +320,6 @@ function f[](x,y) # └─┘ ── Invalid function name end -######################################## -# Keyword calls -f(x; a=1, b=2) -#--------------------- -1 TestMod.f -2 (call core.tuple :a :b) -3 (call core.apply_type core.NamedTuple %₂) -4 (call core.tuple 1 2) -5 (call %₃ %₄) -6 TestMod.x -7 (call core.kwcall %₅ %₁ %₆) -8 (return %₇) - -######################################## -# Keyword call with only splats for kws -f(; ks1..., ks2...) -#--------------------- -1 TestMod.f -2 (call core.NamedTuple) -3 TestMod.ks1 -4 (call top.merge %₂ %₃) -5 TestMod.ks2 -6 (call top.merge %₄ %₅) -7 (call top.isempty %₆) -8 (gotoifnot %₇ label₁₁) -9 (call %₁) -10 (return %₉) -11 (call core.kwcall %₆ %₁) -12 (return %₁₁) - -######################################## -# Error: Call with repeated keywords -f(x; a=1, a=2) -#--------------------- -LoweringError: -f(x; a=1, a=2) -# ╙ ── Repeated keyword argument name - -######################################## -# literal_pow lowering -x^42 -#--------------------- -1 TestMod.^ -2 TestMod.x -3 (call core.apply_type top.Val 42) -4 (call %₃) -5 (call top.literal_pow %₁ %₂ %₄) -6 (return %₅) - -######################################## -# almost but not quite literal_pow lowering :) -x^42.0 -#--------------------- -1 TestMod.^ -2 TestMod.x -3 (call %₁ %₂ 42.0) -4 (return %₃) - ######################################## # Simple positional args with defaults function f(x::T, y::S=1, z::U=2) @@ -906,260 +848,3 @@ end 8 (call JuliaLowering.bind_docs! %₇ "some docs\n" %₅) 9 (return core.nothing) -######################################## -# Error: infix call without enough arguments -@ast_ [K"call"(syntax_flags=JuliaSyntax.INFIX_FLAG) - "x"::K"Identifier" -] -#--------------------- -LoweringError: -#= line 1 =# - Postfix/infix operators must have at least two positional arguments - -######################################## -# Error: postfix call without enough arguments -@ast_ [K"call"(syntax_flags=JuliaSyntax.POSTFIX_OP_FLAG) - "x"::K"Identifier" -] -#--------------------- -LoweringError: -#= line 1 =# - Postfix/infix operators must have at least two positional arguments - -######################################## -# Error: Call with no function name -@ast_ [K"call"] -#--------------------- -LoweringError: -#= line 1 =# - Call expressions must have a function name - -######################################## -# Simple broadcast -x .* y .+ f.(z) -#--------------------- -1 TestMod.+ -2 TestMod.* -3 TestMod.x -4 TestMod.y -5 (call top.broadcasted %₂ %₃ %₄) -6 TestMod.f -7 TestMod.z -8 (call top.broadcasted %₆ %₇) -9 (call top.broadcasted %₁ %₅ %₈) -10 (call top.materialize %₉) -11 (return %₁₀) - -######################################## -# Broadcast with unary function calls -.+x -#--------------------- -1 TestMod.+ -2 TestMod.x -3 (call top.broadcasted %₁ %₂) -4 (call top.materialize %₃) -5 (return %₄) - -######################################## -# Broadcast with short circuit operators -x .&& y .|| z -#--------------------- -1 TestMod.x -2 TestMod.y -3 (call top.broadcasted top.andand %₁ %₂) -4 TestMod.z -5 (call top.broadcasted top.oror %₃ %₄) -6 (call top.materialize %₅) -7 (return %₆) - -######################################## -# Scalar comparison chain -x < y < z -#--------------------- -1 TestMod.< -2 TestMod.x -3 TestMod.y -4 (call %₁ %₂ %₃) -5 (gotoifnot %₄ label₁₁) -6 TestMod.< -7 TestMod.y -8 TestMod.z -9 (call %₆ %₇ %₈) -10 (return %₉) -11 (return false) - -######################################## -# Broadcasted comparison chain -x .< y .< z -#--------------------- -1 TestMod.< -2 TestMod.x -3 TestMod.y -4 (call top.broadcasted %₁ %₂ %₃) -5 TestMod.< -6 TestMod.y -7 TestMod.z -8 (call top.broadcasted %₅ %₆ %₇) -9 (call top.broadcasted top.& %₄ %₈) -10 (call top.materialize %₉) -11 (return %₁₀) - -######################################## -# Mixed scalar / broadcasted comparison chain -a < b < c .< d .< e -#--------------------- -1 TestMod.< -2 TestMod.a -3 TestMod.b -4 (call %₁ %₂ %₃) -5 (gotoifnot %₄ label₁₁) -6 TestMod.< -7 TestMod.b -8 TestMod.c -9 (= slot₁/if_val (call %₆ %₇ %₈)) -10 (goto label₁₂) -11 (= slot₁/if_val false) -12 slot₁/if_val -13 TestMod.< -14 TestMod.c -15 TestMod.d -16 (call top.broadcasted %₁₃ %₁₄ %₁₅) -17 (call top.broadcasted top.& %₁₂ %₁₆) -18 TestMod.< -19 TestMod.d -20 TestMod.e -21 (call top.broadcasted %₁₈ %₁₉ %₂₀) -22 (call top.broadcasted top.& %₁₇ %₂₁) -23 (call top.materialize %₂₂) -24 (return %₂₃) - -######################################## -# Mixed scalar / broadcasted comparison chain -a .< b .< c < d < e -#--------------------- -1 TestMod.< -2 TestMod.a -3 TestMod.b -4 (call top.broadcasted %₁ %₂ %₃) -5 TestMod.< -6 TestMod.b -7 TestMod.c -8 (call top.broadcasted %₅ %₆ %₇) -9 (call top.broadcasted top.& %₄ %₈) -10 TestMod.< -11 TestMod.c -12 TestMod.d -13 (call %₁₀ %₁₁ %₁₂) -14 (gotoifnot %₁₃ label₂₀) -15 TestMod.< -16 TestMod.d -17 TestMod.e -18 (= slot₁/if_val (call %₁₅ %₁₆ %₁₇)) -19 (goto label₂₁) -20 (= slot₁/if_val false) -21 slot₁/if_val -22 (call top.broadcasted top.& %₉ %₂₁) -23 (call top.materialize %₂₂) -24 (return %₂₃) - -######################################## -# Comparison chain fused with other broadcasting -x .+ (a .< b .< c) -#--------------------- -1 TestMod.+ -2 TestMod.x -3 TestMod.< -4 TestMod.a -5 TestMod.b -6 (call top.broadcasted %₃ %₄ %₅) -7 TestMod.< -8 TestMod.b -9 TestMod.c -10 (call top.broadcasted %₇ %₈ %₉) -11 (call top.broadcasted top.& %₆ %₁₀) -12 (call top.broadcasted %₁ %₂ %₁₁) -13 (call top.materialize %₁₂) -14 (return %₁₃) - -######################################## -# Broadcast with literal_pow -x.^3 -#--------------------- -1 TestMod.^ -2 TestMod.x -3 (call core.apply_type top.Val 3) -4 (call %₃) -5 (call top.broadcasted top.literal_pow %₁ %₂ %₄) -6 (call top.materialize %₅) -7 (return %₆) - -######################################## -# Broadcast with keywords -f.(x, y, z = 1; w = 2) -#--------------------- -1 top.broadcasted_kwsyntax -2 (call core.tuple :z :w) -3 (call core.apply_type core.NamedTuple %₂) -4 (call core.tuple 1 2) -5 (call %₃ %₄) -6 TestMod.f -7 TestMod.x -8 TestMod.y -9 (call core.kwcall %₅ %₁ %₆ %₇ %₈) -10 (call top.materialize %₉) -11 (return %₁₀) - -######################################## -# Broadcast with unary dot syntax -(.+)(x,y) -#--------------------- -1 TestMod.+ -2 TestMod.x -3 TestMod.y -4 (call top.broadcasted %₁ %₂ %₃) -5 (call top.materialize %₄) -6 (return %₅) - -######################################## -# Trivial in-place broadcast update -x .= y -#--------------------- -1 TestMod.x -2 TestMod.y -3 (call top.broadcasted top.identity %₂) -4 (call top.materialize! %₁ %₃) -5 (return %₄) - -######################################## -# Fused in-place broadcast update -x .= y .+ z -#--------------------- -1 TestMod.x -2 TestMod.+ -3 TestMod.y -4 TestMod.z -5 (call top.broadcasted %₂ %₃ %₄) -6 (call top.materialize! %₁ %₅) -7 (return %₆) - -######################################## -# In-place broadcast update with property assignment on left hand side -x.prop .= y -#--------------------- -1 TestMod.x -2 (call top.dotgetproperty %₁ :prop) -3 TestMod.y -4 (call top.broadcasted top.identity %₃) -5 (call top.materialize! %₂ %₄) -6 (return %₅) - -######################################## -# In-place broadcast update with ref on left hand side -x[i,end] .= y -#--------------------- -1 TestMod.x -2 TestMod.i -3 (call top.lastindex %₁ 2) -4 (call top.dotview %₁ %₂ %₃) -5 TestMod.y -6 (call top.broadcasted top.identity %₅) -7 (call top.materialize! %₄ %₆) -8 (return %₇) - diff --git a/JuliaLowering/test/misc_ir.jl b/JuliaLowering/test/misc_ir.jl index 4576b2cc5d76b..e6bd2589b7bc1 100644 --- a/JuliaLowering/test/misc_ir.jl +++ b/JuliaLowering/test/misc_ir.jl @@ -29,36 +29,6 @@ x."b" 2 (call top.BroadcastFunction %₁) 3 (return %₂) -######################################## -# <: as a function call -x <: y -#--------------------- -1 TestMod.<: -2 TestMod.x -3 TestMod.y -4 (call %₁ %₂ %₃) -5 (return %₄) - -######################################## -# >: as a function call -x >: y -#--------------------- -1 TestMod.>: -2 TestMod.x -3 TestMod.y -4 (call %₁ %₂ %₃) -5 (return %₄) - -######################################## -# --> as a function call -x --> y -#--------------------- -1 TestMod.--> -2 TestMod.x -3 TestMod.y -4 (call %₁ %₂ %₃) -5 (return %₄) - ######################################## # Error: Wrong number of children in `.` @ast_ [K"." "x"::K"Identifier" "a"::K"Identifier" 3::K"Integer"] @@ -349,170 +319,6 @@ JuxtTest.@emit_juxt 3 (call %₁ 10 %₂) 4 (return %₃) -######################################## -# basic ccall -ccall(:strlen, Csize_t, (Cstring,), "asdfg") -#--------------------- -1 TestMod.Cstring -2 (call top.cconvert %₁ "asdfg") -3 (call top.unsafe_convert %₁ %₂) -4 (foreigncall :strlen TestMod.Csize_t (call core.svec TestMod.Cstring) 0 :ccall %₃ %₂) -5 (return %₄) - -######################################## -# ccall with library name as a global var -ccall((:strlen, libc), Csize_t, (Cstring,), "asdfg") -#--------------------- -1 TestMod.Cstring -2 (call top.cconvert %₁ "asdfg") -3 TestMod.libc -4 (call core.tuple :strlen %₃) -5 (call top.unsafe_convert %₁ %₂) -6 (foreigncall %₄ TestMod.Csize_t (call core.svec TestMod.Cstring) 0 :ccall %₅ %₂) -7 (return %₆) - -######################################## -# ccall with a calling convention -ccall(:foo, stdcall, Csize_t, ()) -#--------------------- -1 (foreigncall :foo TestMod.Csize_t (call core.svec) 0 :stdcall) -2 (return %₁) - -######################################## -# ccall with Any args become core.Any and don't need conversion or GC roots -ccall(:foo, stdcall, Csize_t, (Any,), x) -#--------------------- -1 core.Any -2 TestMod.x -3 (foreigncall :foo TestMod.Csize_t (call core.svec core.Any) 0 :stdcall %₂) -4 (return %₃) - -######################################## -# ccall with variable as function name (must eval to a pointer) -ccall(ptr, Csize_t, (Cstring,), "asdfg") -#--------------------- -1 TestMod.Cstring -2 (call top.cconvert %₁ "asdfg") -3 TestMod.ptr -4 (call top.unsafe_convert %₁ %₂) -5 (foreigncall %₃ TestMod.Csize_t (call core.svec TestMod.Cstring) 0 :ccall %₄ %₂) -6 (return %₅) - -######################################## -# ccall with varargs -ccall(:printf, Cint, (Cstring, Cstring...), "%s = %s\n", "2 + 2", "5") -#--------------------- -1 TestMod.Cstring -2 TestMod.Cstring -3 (call top.cconvert %₁ "%s = %s\n") -4 (call top.cconvert %₂ "2 + 2") -5 (call top.cconvert %₂ "5") -6 (call top.unsafe_convert %₁ %₃) -7 (call top.unsafe_convert %₂ %₄) -8 (call top.unsafe_convert %₂ %₅) -9 (foreigncall :printf TestMod.Cint (call core.svec TestMod.Cstring TestMod.Cstring TestMod.Cstring) 1 :ccall %₆ %₇ %₈ %₃ %₄ %₅) -10 (return %₉) - -######################################## -# Error: ccall with too few arguments -ccall(:foo, Csize_t) -#--------------------- -LoweringError: -ccall(:foo, Csize_t) -└──────────────────┘ ── too few arguments to ccall - -######################################## -# Error: ccall with calling conv and too few arguments -ccall(:foo, thiscall, Csize_t) -#--------------------- -LoweringError: -ccall(:foo, thiscall, Csize_t) -└────────────────────────────┘ ── too few arguments to ccall with calling convention specified - -######################################## -# Error: ccall without tuple for argument types -ccall(:foo, Csize_t, Cstring) -#--------------------- -LoweringError: -ccall(:foo, Csize_t, Cstring) -# └─────┘ ── ccall argument types must be a tuple; try `(T,)` - -######################################## -# Error: ccall without tuple for argument types -ccall(:foo, (Csize_t,), "arg") -#--------------------- -LoweringError: -ccall(:foo, (Csize_t,), "arg") -# └────────┘ ── ccall argument types must be a tuple; try `(T,)` and check if you specified a correct return type - -######################################## -# Error: ccall with library name which is a local variable -let libc = "libc" - ccall((:strlen, libc), Csize_t, (Cstring,), "asdfg") -end -#--------------------- -LoweringError: -let libc = "libc" - ccall((:strlen, libc), Csize_t, (Cstring,), "asdfg") -# └─────────────┘ ── ccall function name and library expression cannot reference local variables -end - -######################################## -# Error: ccall with return type which is a local variable -let Csize_t = 1 - ccall(:strlen, Csize_t, (Cstring,), "asdfg") -end -#--------------------- -LoweringError: -let Csize_t = 1 - ccall(:strlen, Csize_t, (Cstring,), "asdfg") -# └─────┘ ── ccall return type cannot reference local variables -end - -######################################## -# Error: ccall with argument type which is a local variable -let Cstring = 1 - ccall(:strlen, Csize_t, (Cstring,), "asdfg") -end -#--------------------- -LoweringError: -let Cstring = 1 - ccall(:strlen, Csize_t, (Cstring,), "asdfg") -# └─────┘ ── ccall argument types cannot reference local variables -end - -######################################## -# Error: ccall with too few arguments -ccall(:strlen, Csize_t, (Cstring,)) -#--------------------- -LoweringError: -ccall(:strlen, Csize_t, (Cstring,)) -└─────────────────────────────────┘ ── Too few arguments in ccall compared to argument types - -######################################## -# Error: ccall with too many arguments -ccall(:strlen, Csize_t, (Cstring,), "asdfg", "blah") -#--------------------- -LoweringError: -ccall(:strlen, Csize_t, (Cstring,), "asdfg", "blah") -└──────────────────────────────────────────────────┘ ── More arguments than types in ccall - -######################################## -# Error: ccall varargs with too few args -ccall(:foo, Csize_t, (Cstring...,), "asdfg") -#--------------------- -LoweringError: -ccall(:foo, Csize_t, (Cstring...,), "asdfg") -# └────────┘ ── C ABI prohibits vararg without one required argument - -######################################## -# Error: ccall with multiple varargs -ccall(:foo, Csize_t, (Cstring..., Cstring...), "asdfg", "blah") -#--------------------- -LoweringError: -ccall(:foo, Csize_t, (Cstring..., Cstring...), "asdfg", "blah") -# └────────┘ ── only the trailing ccall argument type should have `...` - ######################################## # Error: unary & syntax &x From a2938f76c87b4157434bf075cec2e9656579a72e Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Tue, 28 Jan 2025 12:21:58 +1000 Subject: [PATCH 0961/1109] Add notes on keyword function def desugaring --- JuliaLowering/README.md | 99 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 98 insertions(+), 1 deletion(-) diff --git a/JuliaLowering/README.md b/JuliaLowering/README.md index 4a7f5552abdab..bf1f63dda44aa 100644 --- a/JuliaLowering/README.md +++ b/JuliaLowering/README.md @@ -298,6 +298,100 @@ passes. See `kinds.jl` for a list of these internal forms. This pass is implemented in `desugaring.jl`. It's quite large because Julia has many special syntax features. +### Desugaring of function definitions + +Desugaring of function definitions is particularly complex because of the cross +product of features which need to work together consistently: + +* Positional arguments (with and without defaults, with and without types) +* Keyword arguments (with and without defaults, with and without types) +* Type parameters with `where` syntax +* Argument slurping syntax with `...` +* Fancy arguments (argument destructuring) + +The combination of positional arguments with defaults and keyword arguments is +particularly complex. Here's an example. Suppose we're given the function +definition + +```julia +function f(a::A=a_default, b::B=b_default; x::X=x_default,y::Y=y_default) + body +end +``` + +This generates +* One method of `f` for each number of positional arguments which can be + called when `f` is called without keyword args +* One overload of `Core.kwcall(kws, ::typeof(f), ...)` for each number of + positional arguments (when called with a nonzero number of keyword args; the + tuple `kws` being constructed by the caller) +* One internal method for the body of the function (we can call it `f_kw` + though it will be named something like `#f#18`) + +First, partially expanding the kw definitions this roughly looks like + +```julia +function f_kw(x::X, y::X, f_self::typeof(f), a::A, b::B) + body +end + +function f(a::A=a_default, b::B=b_default) + f_kw(x_default, y_default, var"#self#", a, b) +end + +function Core.kwcall(kws::NamedTuple, self::typeof(f), a::A=a_default, b::B=b_default) + if Core.isdefined(kws, :x) + x_tmp = Core.getfield(kws, :x) + if x_tmp isa X + nothing + else + Core.throw($(Expr(:new, Core.TypeError, Symbol("keyword argument"), :x, X, x_tmp))) + end + x = x_tmp + else + x = 1 + end + if Core.isdefined(kws, :y) + y_tmp = Core.getfield(kws, :y) + if y_tmp isa Y + nothing + else + Core.throw($(Expr(:new, Core.TypeError, Symbol("keyword argument"), :y, Y, y_tmp))) + end + y = y_tmp + else + y = 2 + end + f_kw(x, y, self, a, b) +end +``` + +We can then pass this to function expansion for default arguments which expands +each of the above into three more methods. For example, for the first +definition we conceptually expand `f(a::A=a_default, b::B=b_default)` into the +methods + +```julia +# The body +function f(a::A, b::B) + f_kw(x_default, y_default, var"#self#", a, b) +end + +# And two methods for the different numbers of default args +function f(a::A) + var"#self#"(a, b_default) +end + +function f() + var"#self#"(a_default, b_default) +end +``` + +In total, this expands a single "function definition" into seven methods. + +Note that the above is only a sketch! There's more fiddly details when `where` +syntax comes in + ## Pass 3: Scope analysis / binding resolution This pass replaces variables with bindings of kind `K"BindingId"`, @@ -707,10 +801,13 @@ odd mixture of imperative and declarative lowered code. ## Bugs in Julia's lowering -List of bugs which should be fixed upstream in flisp implementation +Subset of bugs which exist in upstream in flisp implementation, but which are fixed here * `f()[begin]` has the side effect `f()` twice. * `a[(begin=1; a=2)]` gives a weird error * `function A.ccall() ; end` allows `ccall` as a name but it's not allowed without the `A.` +* `a .< b .< c` expands to `(a .< b) .& (b .< c)` where the scope of the `&` is + the expansion module but should be `top.&` to avoid scope-dependence + (especially in the presence of macros) ## Notes on Racket's hygiene From aeb89b16196a4db80b2d0e2d0405db5852dfce50 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Tue, 28 Jan 2025 12:25:33 +1000 Subject: [PATCH 0962/1109] Fix bug with zero-method closure lowering --- JuliaLowering/src/desugaring.jl | 6 +++++- JuliaLowering/src/scope_analysis.jl | 33 +++++++++++++++++++---------- JuliaLowering/test/closures_ir.jl | 25 ++++++++++++++++++++++ JuliaLowering/test/functions_ir.jl | 9 ++++++++ 4 files changed, 61 insertions(+), 12 deletions(-) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 8e78a84433e46..833360593ec5f 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -2269,7 +2269,10 @@ function expand_function_def(ctx, ex, docs, rewrite_call=identity, rewrite_body= if !is_valid_func_name(name) throw(LoweringError(name, "Invalid function name")) end - return @ast ctx ex [K"method" name=>K"Symbol"] + return @ast ctx ex [K"block" + [K"function_decl" name] + name + ] end typevar_names = SyntaxList(ctx) @@ -2305,6 +2308,7 @@ function expand_function_def(ctx, ex, docs, rewrite_call=identity, rewrite_body= else throw(LoweringError(name, "Bad function definition")) end + # Fixup for `new` constructor sigs if necessary callex = rewrite_call(callex) diff --git a/JuliaLowering/src/scope_analysis.jl b/JuliaLowering/src/scope_analysis.jl index 73ed495336dc1..738caa0be7f10 100644 --- a/JuliaLowering/src/scope_analysis.jl +++ b/JuliaLowering/src/scope_analysis.jl @@ -590,6 +590,21 @@ function current_lambda_bindings(ctx::VariableAnalysisContext) ctx.lambda_bindings end +function init_closure_bindings!(ctx, fname) + func_name_id = fname.var_id + @assert lookup_binding(ctx, func_name_id).kind === :local + get!(ctx.closure_bindings, func_name_id) do + name_stack = Vector{String}() + for parentname in ctx.method_def_stack + if kind(parentname) == K"BindingId" + push!(name_stack, lookup_binding(ctx, parentname).name) + end + end + push!(name_stack, lookup_binding(ctx, func_name_id).name) + ClosureBindings(name_stack) + end +end + # Update ctx.bindings and ctx.lambda_bindings metadata based on binding usage function analyze_variables!(ctx, ex) k = kind(ex) @@ -618,6 +633,9 @@ function analyze_variables!(ctx, ex) if kind(name) == K"BindingId" && lookup_binding(ctx, name).kind == :argument throw(LoweringError(name, "Cannot add method to a function argument")) end + if lookup_binding(ctx, name.var_id).kind === :local + init_closure_bindings!(ctx, name) + end update_binding!(ctx, name, add_assigned=1) if has_lambda_binding(ctx, name) update_lambda_binding!(ctx, name, is_assigned=true) @@ -659,7 +677,9 @@ function analyze_variables!(ctx, ex) analyze_variables!(ctx, ex[2]) pop!(ctx.method_def_stack) elseif k == K"_opaque_closure" - push!(ctx.method_def_stack, ex[1]) + name = ex[1] + init_closure_bindings!(ctx, name) + push!(ctx.method_def_stack, name) analyze_variables!(ctx, ex[2]) analyze_variables!(ctx, ex[3]) analyze_variables!(ctx, ex[4]) @@ -673,16 +693,7 @@ function analyze_variables!(ctx, ex) if kind(func_name) == K"BindingId" func_name_id = func_name.var_id if lookup_binding(ctx, func_name_id).kind === :local - cbinds = get!(ctx.closure_bindings, func_name_id) do - name_stack = Vector{String}() - for fname in ctx.method_def_stack - if kind(fname) == K"BindingId" - push!(name_stack, lookup_binding(ctx, fname).name) - end - end - ClosureBindings(name_stack) - end - push!(cbinds.lambdas, lambda_bindings) + push!(ctx.closure_bindings[func_name_id].lambdas, lambda_bindings) end end end diff --git a/JuliaLowering/test/closures_ir.jl b/JuliaLowering/test/closures_ir.jl index eb988496b6e92..59502396ddaee 100644 --- a/JuliaLowering/test/closures_ir.jl +++ b/JuliaLowering/test/closures_ir.jl @@ -47,6 +47,31 @@ end 15 slot₁/f 16 (return %₁₅) +######################################## +# Closure declaration with no methods +begin + local no_method_f + function no_method_f + end +end +#--------------------- +1 --- thunk + 1 (global TestMod.#no_method_f##0) + 2 (call core.svec) + 3 (call core.svec) + 4 (call core.svec) + 5 (call core._structtype TestMod :#no_method_f##0 %₂ %₃ %₄ false 0) + 6 (call core._setsuper! %₅ core.Function) + 7 (const TestMod.#no_method_f##0) + 8 (= TestMod.#no_method_f##0 %₅) + 9 (call core.svec) + 10 (call core._typebody! %₅ %₉) + 11 (return core.nothing) +2 TestMod.#no_method_f##0 +3 (= slot₁/no_method_f (new %₂)) +4 slot₁/no_method_f +5 (return %₄) + ######################################## # Closure which sets the value of a captured variable let diff --git a/JuliaLowering/test/functions_ir.jl b/JuliaLowering/test/functions_ir.jl index eafbae3256b7d..781f1c621e828 100644 --- a/JuliaLowering/test/functions_ir.jl +++ b/JuliaLowering/test/functions_ir.jl @@ -1,3 +1,12 @@ +######################################## +# Function declaration with no methods +function f +end +#--------------------- +1 (method TestMod.f) +2 TestMod.f +3 (return %₂) + ######################################## # Functions with placeholder arg function f(x, _, y) From fbe0c5e146894095c9b4c7f613f9792a3eeb3dfe Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Tue, 28 Jan 2025 12:43:21 +1000 Subject: [PATCH 0963/1109] Move typevar initialization outside method_defs block This should help with kw function desugaring where we want to define methods for Core.kwfunc and the body. --- JuliaLowering/src/desugaring.jl | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 833360593ec5f..110f716867e57 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -2453,14 +2453,16 @@ function expand_function_def(ctx, ex, docs, rewrite_call=identity, rewrite_body= [K"function_decl"(bare_func_name) bare_func_name] end [K"scope_block"(scope_type=:hard) - [K"method_defs" - isnothing(bare_func_name) ? "nothing"::K"core" : bare_func_name - [K"block" - typevar_stmts... - if !isnothing(method_table_val) - [K"=" method_table method_table_val] - end - method_stmts... + [K"block" + typevar_stmts... + [K"method_defs" + isnothing(bare_func_name) ? "nothing"::K"core" : bare_func_name + [K"block" + if !isnothing(method_table_val) + [K"=" method_table method_table_val] + end + method_stmts... + ] ] ] ] From 9d6825bfbfe335edcd1011702e98d49bc5e3ba70 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Tue, 28 Jan 2025 14:06:31 +1000 Subject: [PATCH 0964/1109] Clean up some args in function expansion helpers Also fix redundant return value conversion in default argument shim functions - only the body needs to do the conversion. --- JuliaLowering/src/desugaring.jl | 20 +++--- JuliaLowering/test/functions.jl | 106 ++++++++++++++++------------- JuliaLowering/test/functions_ir.jl | 39 +++++++++++ 3 files changed, 108 insertions(+), 57 deletions(-) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 110f716867e57..d0920b6d18d75 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -2139,11 +2139,11 @@ function _split_wheres!(ctx, typevar_names, typevar_stmts, ex) end end -function method_def_expr(ctx, srcref, callex, method_table, - docs, typevar_names, arg_names, arg_types, ret_var, body) +function method_def_expr(ctx, srcref, callex_srcref, method_table, + typevar_names, arg_names, arg_types, ret_var, body) @ast ctx srcref [K"block" # metadata contains svec(types, sparms, location) - method_metadata := [K"call"(callex) + method_metadata := [K"call"(callex_srcref) "svec" ::K"core" [K"call" "svec" ::K"core" @@ -2153,7 +2153,7 @@ function method_def_expr(ctx, srcref, callex, method_table, "svec" ::K"core" typevar_names... ] - ::K"SourceLocation"(callex) + ::K"SourceLocation"(callex_srcref) ] [K"method" method_table @@ -2209,7 +2209,7 @@ end function optional_positional_defs!(ctx, method_stmts, srcref, callex, method_table, typevar_names, typevar_stmts, arg_names, arg_types, first_default, - arg_defaults, ret_var) + arg_defaults) # Replace placeholder arguments with variables - we need to pass them to # the inner method for dispatch even when unused in the inner method body def_arg_names = map(arg_names) do arg @@ -2241,9 +2241,9 @@ function optional_positional_defs!(ctx, method_stmts, srcref, callex, typevar_names, typevar_stmts) # TODO: Ensure we preserve @nospecialize metadata in args push!(method_stmts, - method_def_expr(ctx, srcref, callex, method_table, nothing, + method_def_expr(ctx, srcref, callex, method_table, trimmed_typevar_names, trimmed_arg_names, trimmed_arg_types, - ret_var, body)) + nothing, body)) end end @@ -2429,13 +2429,13 @@ function expand_function_def(ctx, ex, docs, rewrite_call=identity, rewrite_body= first_default += 1 # Offset for self argument optional_positional_defs!(ctx, method_stmts, ex, callex, method_table, typevar_names, typevar_stmts, - arg_names, arg_types, first_default, arg_defaults, ret_var) + arg_names, arg_types, first_default, arg_defaults) end # The method with all non-default arguments push!(method_stmts, - method_def_expr(ctx, ex, callex, method_table, docs, - typevar_names, arg_names, arg_types, ret_var, body)) + method_def_expr(ctx, ex, callex, method_table, typevar_names, arg_names, + arg_types, ret_var, body)) if !isnothing(docs) method_stmts[end] = @ast ctx docs [K"block" method_metadata := method_stmts[end] diff --git a/JuliaLowering/test/functions.jl b/JuliaLowering/test/functions.jl index bd80aae675e4d..0619c75a203c5 100644 --- a/JuliaLowering/test/functions.jl +++ b/JuliaLowering/test/functions.jl @@ -134,71 +134,83 @@ begin end """) -# Default positional arguments -@test JuliaLowering.include_string(test_mod, """ -begin - function f_def_simple(x=1, y=2, z=x) - (x,y,z) +@testset "Default positional arguments" begin + @test JuliaLowering.include_string(test_mod, """ + begin + function f_def_simple(x=1, y=2, z=x) + (x,y,z) + end + + (f_def_simple(), f_def_simple(10), f_def_simple(10,20), f_def_simple(10,20,30)) end + """) == ((1,2,1), (10,2,10), (10,20,10), (10,20,30)) - (f_def_simple(), f_def_simple(10), f_def_simple(10,20), f_def_simple(10,20,30)) -end -""") == ((1,2,1), (10,2,10), (10,20,10), (10,20,30)) + @test JuliaLowering.include_string(test_mod, """ + begin + function f_def_placeholders(::T=1, _::S=1.0) where {T,S} + (T,S) + end -@test JuliaLowering.include_string(test_mod, """ -begin - function f_def_placeholders(::T=1, _::S=1.0) where {T,S} - (T,S) + (f_def_placeholders(), f_def_placeholders(1.0), f_def_placeholders(1.0, 1)) end + """) == ((Int,Float64), (Float64,Float64), (Float64,Int)) - (f_def_placeholders(), f_def_placeholders(1.0), f_def_placeholders(1.0, 1)) -end -""") == ((Int,Float64), (Float64,Float64), (Float64,Int)) + @test JuliaLowering.include_string(test_mod, """ + begin + function f_def_typevars(x, y::S=[1], z::U=2) where {T, S<:AbstractVector{T}, U} + (x, y, z, T, S, U) + end -@test JuliaLowering.include_string(test_mod, """ -begin - function f_def_typevars(x, y::S=[1], z::U=2) where {T, S<:AbstractVector{T}, U} - (x, y, z, T, S, U) + (f_def_typevars(1), f_def_typevars(1,[1.0]), f_def_typevars(1,[1.0],-1.0)) end + """) == ((1, [1], 2, Int, Vector{Int}, Int), + (1, [1.0], 2, Float64, Vector{Float64}, Int), + (1, [1.0], -1.0, Float64, Vector{Float64}, Float64)) - (f_def_typevars(1), f_def_typevars(1,[1.0]), f_def_typevars(1,[1.0],-1.0)) -end -""") == ((1, [1], 2, Int, Vector{Int}, Int), - (1, [1.0], 2, Float64, Vector{Float64}, Int), - (1, [1.0], -1.0, Float64, Vector{Float64}, Float64)) + @test JuliaLowering.include_string(test_mod, """ + begin + function f_def_slurp(x=1, ys...) + (x, ys) + end -@test JuliaLowering.include_string(test_mod, """ -begin - function f_def_slurp(x=1, ys...) - (x, ys) + (f_def_slurp(), f_def_slurp(2), f_def_slurp(2,3)) end + """) == ((1, ()), + (2, ()), + (2, (3,))) - (f_def_slurp(), f_def_slurp(2), f_def_slurp(2,3)) -end -""") == ((1, ()), - (2, ()), - (2, (3,))) + @test JuliaLowering.include_string(test_mod, """ + begin + function f_def_ret_type(x=1.0)::Int + x + end -@test JuliaLowering.include_string(test_mod, """ -begin - function f_def_slurp_splat(ys...=(1,2)...) - ys + (f_def_ret_type(), f_def_ret_type(10.0)) end + """) === (1,10) - (f_def_slurp_splat(), f_def_slurp_splat(10,20)) -end -""") == ((1,2), - (10,20)) + @test JuliaLowering.include_string(test_mod, """ + begin + function f_def_slurp_splat(ys...=(1,2)...) + ys + end -@test JuliaLowering.include_string(test_mod, """ -begin - function f_destructure(x, (y,z)::Tuple{Int,Int}, (w,)...=(4,)...) - (x,y,z,w) + (f_def_slurp_splat(), f_def_slurp_splat(10,20)) + end + """) == ((1,2), + (10,20)) + + @test JuliaLowering.include_string(test_mod, """ + begin + function f_def_destructure(x, (y,z)::Tuple{Int,Int}, (w,)...=(4,)...) + (x,y,z,w) + end + + f_def_destructure(1, (2,3)) end + """) == (1,2,3,4) - f_destructure(1, (2,3)) end -""") == (1,2,3,4) @testset "Slot flags" begin diff --git a/JuliaLowering/test/functions_ir.jl b/JuliaLowering/test/functions_ir.jl index 781f1c621e828..883d5bfd26f38 100644 --- a/JuliaLowering/test/functions_ir.jl +++ b/JuliaLowering/test/functions_ir.jl @@ -770,6 +770,45 @@ end 18 TestMod.f 19 (return %₁₈) +######################################## +# Function argument destructuring combined with splats, types and and defaults +function f(x=default_x)::T +end +#--------------------- +1 (method TestMod.f) +2 TestMod.f +3 (call core.Typeof %₂) +4 (call core.svec %₃) +5 (call core.svec) +6 SourceLocation::1:10 +7 (call core.svec %₄ %₅ %₆) +8 --- method core.nothing %₇ + slots: [slot₁/#self#(called)] + 1 TestMod.default_x + 2 (call slot₁/#self# %₁) + 3 (return %₂) +9 TestMod.f +10 (call core.Typeof %₉) +11 (call core.svec %₁₀ core.Any) +12 (call core.svec) +13 SourceLocation::1:10 +14 (call core.svec %₁₁ %₁₂ %₁₃) +15 --- method core.nothing %₁₄ + slots: [slot₁/#self#(!read) slot₂/x(!read) slot₃/tmp(!read)] + 1 TestMod.T + 2 (= slot₃/tmp core.nothing) + 3 slot₃/tmp + 4 (call core.isa %₃ %₁) + 5 (gotoifnot %₄ label₇) + 6 (goto label₁₀) + 7 slot₃/tmp + 8 (call top.convert %₁ %₇) + 9 (= slot₃/tmp (call core.typeassert %₈ %₁)) + 10 slot₃/tmp + 11 (return %₁₀) +16 TestMod.f +17 (return %₁₆) + ######################################## # Duplicate destructured placeholders ok function f((_,), (_,)) From 1e5e83ec4489787a1c21b2d25c6137b1472658a8 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Tue, 28 Jan 2025 18:48:08 +1000 Subject: [PATCH 0965/1109] Desugaring of functions definitions with keywords This is a work in progress. Some things still to do include: * `where` / static parameters, and how those interact with keyword argument types and defaults * keyword arguments with defaults which depend on other keyword arguments * keyword arguments where the defaults have assignments (ugh!!) * slurping of keywords --- JuliaLowering/README.md | 6 + JuliaLowering/src/ast.jl | 8 +- JuliaLowering/src/closure_conversion.jl | 15 +- JuliaLowering/src/desugaring.jl | 217 ++++++++++++++++++++++-- JuliaLowering/src/runtime.jl | 19 +++ JuliaLowering/test/functions.jl | 39 +++++ JuliaLowering/test/functions_ir.jl | 153 +++++++++++++++++ 7 files changed, 430 insertions(+), 27 deletions(-) diff --git a/JuliaLowering/README.md b/JuliaLowering/README.md index bf1f63dda44aa..e666104e43bfb 100644 --- a/JuliaLowering/README.md +++ b/JuliaLowering/README.md @@ -362,6 +362,12 @@ function Core.kwcall(kws::NamedTuple, self::typeof(f), a::A=a_default, b::B=b_de else y = 2 end + if Base.isempty(Base.diff_names(Base.keys(kws), (:x, :y))) + nothing + else + # Else unsupported kws + Base.kwerr(kws, self, a, b) + end f_kw(x, y, self, a, b) end ``` diff --git a/JuliaLowering/src/ast.jl b/JuliaLowering/src/ast.jl index 4bd1d4ea3cbb4..b00bd422dc74a 100644 --- a/JuliaLowering/src/ast.jl +++ b/JuliaLowering/src/ast.jl @@ -580,6 +580,10 @@ function is_core_nothing(ex) kind(ex) == K"core" && ex.name_val == "nothing" end +function is_core_Any(ex) + kind(ex) == K"core" && ex.name_val == "Any" +end + function is_simple_atom(ctx, ex) k = kind(ex) # TODO thismodule @@ -603,8 +607,8 @@ function to_symbol(ctx, ex) @ast ctx ex ex=>K"Symbol" end -function new_scope_layer(ctx, mod_ref::Module=ctx.mod) - new_layer = ScopeLayer(length(ctx.scope_layers)+1, ctx.mod, true) +function new_scope_layer(ctx, mod_ref::Module=ctx.mod; is_macro_expansion=true) + new_layer = ScopeLayer(length(ctx.scope_layers)+1, ctx.mod, is_macro_expansion) push!(ctx.scope_layers, new_layer) new_layer.id end diff --git a/JuliaLowering/src/closure_conversion.jl b/JuliaLowering/src/closure_conversion.jl index e21787eb76428..e259366541f56 100644 --- a/JuliaLowering/src/closure_conversion.jl +++ b/JuliaLowering/src/closure_conversion.jl @@ -249,18 +249,6 @@ function closure_type_fields(ctx, srcref, closure_binds, is_opaque) return field_syms, field_orig_bindings, field_inds, field_is_box end -function closure_name(mod, name_stack) - basename = "#$(join(name_stack, "#"))##" - i = 0 - while true - name = "$basename$i" - if reserve_module_binding(mod, Symbol(name)) - return name - end - i += 1 - end -end - # Return a thunk which creates a new type for a closure with `field_syms` named # fields. The new type will be named `name_str` which must be an unassigned # name in the module. @@ -403,7 +391,8 @@ function _convert_closures(ctx::ClosureConversionCtx, ex) closure_binds = ctx.closure_bindings[func_name_id] field_syms, field_orig_bindings, field_inds, field_is_box = closure_type_fields(ctx, ex, closure_binds, false) - name_str = closure_name(ctx.mod, closure_binds.name_stack) + name_str = reserve_module_binding_i(ctx.mod, + "#$(join(closure_binds.name_stack, "#"))##") closure_type_def, closure_type_ = type_for_closure(ctx, ex, name_str, field_syms, field_is_box) push!(ctx.toplevel_stmts, closure_type_def) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index d0920b6d18d75..c74846510f6dc 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -894,7 +894,6 @@ function expand_comprehension_to_loops(ctx, ex) push!(new_iterspecs, @ast ctx iterspec [K"in" iterspec[1] iter]) end # Lower to nested for loops - # layer = new_scope_layer(ctx) idx = new_local_binding(ctx, iterspecs, "idx") @ast ctx ex [K"block" iter_defs... @@ -1641,7 +1640,7 @@ function expand_ccall(ctx, ex) argt = types_for_conv[i] end exarg = expand_forms_2(ctx, arg) - if kind(raw_argt) == K"core" && raw_argt.name_val == "Any" + if is_core_Any(raw_argt) push!(unsafe_args, exarg) else cconverted_arg = emit_assign_tmp(sctx, @@ -2102,6 +2101,9 @@ function expand_function_arg(ctx, body_stmts, arg, is_last_arg) k = kind(ex) if k == K"tuple" + if isnothing(body_stmts) + throw(LoweringError(ex, "Invalid keyword name")) + end # Argument destructuring is_nospecialize = getmeta(arg, :nospecialize, false) name = new_local_binding(ctx, ex, "destructured_arg"; @@ -2140,7 +2142,7 @@ function _split_wheres!(ctx, typevar_names, typevar_stmts, ex) end function method_def_expr(ctx, srcref, callex_srcref, method_table, - typevar_names, arg_names, arg_types, ret_var, body) + typevar_names, arg_names, arg_types, body, ret_var=nothing) @ast ctx srcref [K"block" # metadata contains svec(types, sparms, location) method_metadata := [K"call"(callex_srcref) @@ -2243,8 +2245,161 @@ function optional_positional_defs!(ctx, method_stmts, srcref, callex, push!(method_stmts, method_def_expr(ctx, srcref, callex, method_table, trimmed_typevar_names, trimmed_arg_names, trimmed_arg_types, - nothing, body)) + body)) + end +end + +function keyword_function_defs(ctx, srcref, callex_srcref, name_str, + typevar_names, typevar_stmts, arg_names, + arg_types, first_default, arg_defaults, keywords, body, ret_var) + mangled_name = let n = isnothing(name_str) ? "_" : name_str + n = string(startswith(n, '#') ? "" : "#", n, "#") + reserve_module_binding_i(ctx.mod, n) + end + # TODO: Is the layer correct here? Which module should be the parent module + # of this body function? + layer = new_scope_layer(ctx; is_macro_expansion=false) + body_func_name = adopt_scope(@ast(ctx, callex_srcref, mangled_name::K"Identifier"), layer) + + kwcall_arg_names = SyntaxList(ctx) + kwcall_arg_types = SyntaxList(ctx) + kwcall_body_stmts = SyntaxList(ctx) + + push!(kwcall_arg_names, new_local_binding(ctx, callex_srcref, "#self#"; kind=:argument)) + push!(kwcall_arg_types, + @ast ctx callex_srcref [K"call" + "typeof"::K"core" + "kwcall"::K"core" + ] + ) + kws_arg = new_local_binding(ctx, keywords, "kws"; kind=:argument) + push!(kwcall_arg_names, kws_arg) + push!(kwcall_arg_types, @ast ctx keywords "NamedTuple"::K"core") + + body_arg_names = SyntaxList(ctx) + body_arg_types = SyntaxList(ctx) + push!(body_arg_names, new_local_binding(ctx, body_func_name, "#self#"; kind=:argument)) + push!(body_arg_types, @ast ctx body_func_name [K"function_type" body_func_name]) + + kw_defaults = SyntaxList(ctx) + kw_name_syms = SyntaxList(ctx) + kw_val_vars = SyntaxList(ctx) + for (i,arg) in enumerate(children(keywords)) + (aname, atype, default, is_slurp) = + expand_function_arg(ctx, nothing, arg, i == numchildren(keywords)) + name_sym = @ast ctx aname aname=>K"Symbol" + @assert !is_slurp # TODO + if isnothing(default) + default = @ast ctx arg [K"call" + "throw"::K"core" + [K"call" + "UndefKeywordError"::K"core" + name_sym + ] + ] + end + kw_var = ssavar(ctx, arg, "kw_var") # <- TODO: Use `aname` here, if necessary + push!(kw_val_vars, kw_var) + push!(kwcall_body_stmts, @ast ctx arg [K"=" + kw_var + [K"if" + [K"call" "isdefined"::K"core" kws_arg name_sym] + [K"block" + kwval := [K"call" "getfield"::K"core" kws_arg name_sym] + # TODO: if the "declared" type of a KW arg includes something + # from keyword-sparams then don't assert it here, since those + # static parameters don't have values yet. instead, the type + # will be picked up when the underlying method is called. + if !is_core_Any(atype) + [K"if" [K"call" "isa"::K"core" kwval atype] + "nothing"::K"core" + [K"call" + "throw"::K"core" + [K"new" "TypeError"::K"core" + "keyword argument"::K"Symbol" + name_sym + atype + kwval + ] + ] + ] + end + kwval + ] + default + ] + ]) + + push!(kw_defaults, default) + push!(kw_name_syms, name_sym) + push!(body_arg_names, aname) + push!(body_arg_types, atype) + end + append!(body_arg_names, arg_names) + append!(body_arg_types, arg_types) + + first_default += length(kwcall_arg_names) + append!(kwcall_arg_names, arg_names) + append!(kwcall_arg_types, arg_types) + + kwcall_mtable = @ast(ctx, srcref, "nothing"::K"core") + + kwcall_defs = SyntaxList(ctx) + if !isempty(arg_defaults) + optional_positional_defs!(ctx, kwcall_defs, srcref, callex_srcref, + kwcall_mtable, typevar_names, typevar_stmts, + kwcall_arg_names, kwcall_arg_types, first_default, arg_defaults) end + + kwcall_body = @ast ctx keywords [K"block" + kwcall_body_stmts... + [K"if" + [K"call" + "isempty"::K"top" + [K"call" + "diff_names"::K"top" + [K"call" "keys"::K"top" kws_arg] + [K"tuple" kw_name_syms...] + ] + ] + "nothing"::K"core" + if true + [K"call" # Report unsupported kws + "kwerr"::K"top" + kws_arg + arg_names... + ] + else + # TODO: kw slurping + end + ] + [K"call" + body_func_name + kw_val_vars... + arg_names... + ] + ] + + push!(kwcall_defs, + method_def_expr(ctx, srcref, callex_srcref, kwcall_mtable, + typevar_names, kwcall_arg_names, kwcall_arg_types, kwcall_body)) + + kw_func_method_defs = @ast ctx srcref [K"block" + [K"method_defs" + body_func_name + [K"block" + method_def_expr(ctx, srcref, callex_srcref, "nothing"::K"core", + typevar_names, body_arg_names, body_arg_types, body, ret_var) + ] + ] + [K"method_defs" + "nothing"::K"core" + [K"block" + kwcall_defs... + ] + ] + ] + body_func_name, kw_func_method_defs, kw_defaults end # Check valid identifier/function names @@ -2321,6 +2476,7 @@ function expand_function_def(ctx, ex, docs, rewrite_call=identity, rewrite_body= # the function name. name = callex[1] bare_func_name = nothing + name_str = nothing doc_obj = nothing self_name = nothing if kind(name) == K"::" @@ -2339,6 +2495,7 @@ function expand_function_def(ctx, ex, docs, rewrite_call=identity, rewrite_body= if kind(name) == K"Placeholder" # Anonymous function. In this case we may use an ssavar for the # closure's value. + name_str = name.name_val name = ssavar(ctx, name, name.name_val) bare_func_name = name elseif !is_valid_func_name(name) @@ -2346,16 +2503,27 @@ function expand_function_def(ctx, ex, docs, rewrite_call=identity, rewrite_body= elseif is_identifier_like(name) # Add methods to a global `Function` object, or local closure # type function f() ... + name_str = name.name_val bare_func_name = name else # Add methods to an existing Function # function A.B.f() ... + if kind(name) == K"." && kind(name[2]) == K"Symbol" + name_str = name[2].name_val + end end doc_obj = name # todo: can closures be documented? self_type = @ast ctx name [K"function_type" name] end # Add self argument if isnothing(self_name) + # TODO: #self# should be symbolic rather than a binding for the cases + # where it's reused in `optional_positional_defs!` because it's + # probably unsafe to reuse bindings for multiple different methods in + # the presence of closure captures or other global binding properties. + # + # This is reminiscent of the need to renumber SSA vars in certain cases + # in the flisp implementation. self_name = new_local_binding(ctx, name, "#self#"; kind=:argument) end @@ -2365,6 +2533,14 @@ function expand_function_def(ctx, ex, docs, rewrite_call=identity, rewrite_body= push!(arg_names, self_name) push!(arg_types, self_type) args = callex[2:end] + keywords = nothing + if !isempty(args) && kind(args[end]) == K"parameters" + keywords = args[end] + args = args[1:end-1] + if numchildren(keywords) == 0 + keywords = nothing + end + end body_stmts = SyntaxList(ctx) first_default = 0 arg_defaults = SyntaxList(ctx) @@ -2397,7 +2573,7 @@ function expand_function_def(ctx, ex, docs, rewrite_call=identity, rewrite_body= end else if isempty(arg_defaults) - first_default = i + first_default = i + 1 # Offset for self argument end push!(arg_defaults, default) end @@ -2419,6 +2595,22 @@ function expand_function_def(ctx, ex, docs, rewrite_call=identity, rewrite_body= ] end + if isnothing(keywords) + body_func_name, kw_func_method_defs = (nothing, nothing) + else + body_func_name, kw_func_method_defs, kw_defaults = + keyword_function_defs(ctx, ex, callex, name_str, typevar_names, typevar_stmts, + arg_names, arg_types, first_default, arg_defaults, + keywords, body, ret_var) + # The non-kw function dispatches to the body method + body = @ast ctx ex [K"call" body_func_name + kw_defaults... + arg_names... + ] + # ret_var is used only in the body method + ret_var = nothing + end + method_table_val = nothing # TODO: method overlays method_table = isnothing(method_table_val) ? @ast(ctx, callex, "nothing"::K"core") : @@ -2426,7 +2618,6 @@ function expand_function_def(ctx, ex, docs, rewrite_call=identity, rewrite_body= method_stmts = SyntaxList(ctx) if !isempty(arg_defaults) - first_default += 1 # Offset for self argument optional_positional_defs!(ctx, method_stmts, ex, callex, method_table, typevar_names, typevar_stmts, arg_names, arg_types, first_default, arg_defaults) @@ -2435,7 +2626,7 @@ function expand_function_def(ctx, ex, docs, rewrite_call=identity, rewrite_body= # The method with all non-default arguments push!(method_stmts, method_def_expr(ctx, ex, callex, method_table, typevar_names, arg_names, - arg_types, ret_var, body)) + arg_types, body, ret_var)) if !isnothing(docs) method_stmts[end] = @ast ctx docs [K"block" method_metadata := method_stmts[end] @@ -2452,9 +2643,13 @@ function expand_function_def(ctx, ex, docs, rewrite_call=identity, rewrite_body= if !isnothing(bare_func_name) [K"function_decl"(bare_func_name) bare_func_name] end + if !isnothing(body_func_name) + [K"function_decl"(body_func_name) body_func_name] + end [K"scope_block"(scope_type=:hard) [K"block" typevar_stmts... + kw_func_method_defs [K"method_defs" isnothing(bare_func_name) ? "nothing"::K"core" : bare_func_name [K"block" @@ -2840,7 +3035,7 @@ end # generate call to `convert()` for `(call new ...)` expressions function _new_call_convert_arg(ctx, full_struct_type, field_type, field_index, val) - if kind(field_type) == K"core" && field_type.name_val == "Any" + if is_core_Any(field_type) return val end # kt = kind(field_type) @@ -2872,9 +3067,7 @@ function default_inner_constructors(ctx, srcref, global_struct_name, ] ] end - maybe_non_Any_field_types = filter(field_types) do ft - !(kind(ft) == K"core" && ft.name_val == "Any") - end + maybe_non_Any_field_types = filter(!is_core_Any, field_types) converting_ctor = if !isempty(typevar_names) || !isempty(maybe_non_Any_field_types) # Definition which takes `Any` for all arguments and uses # `Base.convert()` to convert those to the exact field type. Only @@ -3113,7 +3306,7 @@ function _rewrite_ctor_new_calls(ctx, ex, struct_name, global_struct_name, ctor_ ] ] else - fields_all_Any = all(kind(ft) == K"core" && ft.name_val == "Any" for ft in field_types) + fields_all_Any = all(is_core_Any, field_types) if fields_all_Any @ast ctx ex [K"block" struct_type := full_struct_type diff --git a/JuliaLowering/src/runtime.jl b/JuliaLowering/src/runtime.jl index ff74ea72574e4..cae563b4aea87 100644 --- a/JuliaLowering/src/runtime.jl +++ b/JuliaLowering/src/runtime.jl @@ -269,6 +269,25 @@ function reserve_module_binding(mod, name) end end +# Reserve a global binding named "$basename#$i" in module `mod` for the +# smallest `i` starting at `0`. +# +# TODO: Remove the use of this where possible. Currently this is used within +# lowering to create unique global names for keyword function bodies and +# closure types as an alternative to current-julia-module-counter. However, we +# should defer the it to eval-time to make lowering itself completely +# non-mutating. +function reserve_module_binding_i(mod, basename) + i = 0 + while true + name = "$basename$i" + if reserve_module_binding(mod, Symbol(name)) + return name + end + i += 1 + end +end + #------------------------------------------------------------------------------- # The following are versions of macros from Base which act as "standard syntax # extensions" with special semantics known to lowering. diff --git a/JuliaLowering/test/functions.jl b/JuliaLowering/test/functions.jl index 0619c75a203c5..51bffb6937489 100644 --- a/JuliaLowering/test/functions.jl +++ b/JuliaLowering/test/functions.jl @@ -236,6 +236,45 @@ end end +@testset "Keyword functions" begin + JuliaLowering.include_string(test_mod, """ + function f_kw_simple(a::Int=1, b::Float64=1.0; x::Char='a', y::Bool=true) + (a, b, x, y) + end + """) + + @test test_mod.f_kw_simple() === (1, 1.0, 'a', true) + @test test_mod.f_kw_simple(x='b') === (1, 1.0, 'b', true) + @test test_mod.f_kw_simple(y=false) === (1, 1.0, 'a', false) + @test test_mod.f_kw_simple(x='b', y=false) === (1, 1.0, 'b', false) + + @test test_mod.f_kw_simple(20) === (20, 1.0, 'a', true) + @test test_mod.f_kw_simple(20; x='b') === (20, 1.0, 'b', true) + @test test_mod.f_kw_simple(20; y=false) === (20, 1.0, 'a', false) + @test test_mod.f_kw_simple(20; x='b', y=false) === (20, 1.0, 'b', false) + + @test test_mod.f_kw_simple(20, 2.0) === (20, 2.0, 'a', true) + @test test_mod.f_kw_simple(20, 2.0; x='b') === (20, 2.0, 'b', true) + @test test_mod.f_kw_simple(20, 2.0; y=false) === (20, 2.0, 'a', false) + @test test_mod.f_kw_simple(20, 2.0; x='b', y=false) === (20, 2.0, 'b', false) + + # Bad defaults throw a type error + @test_throws(TypeError(Symbol("keyword argument"), :x, Char, 100), + test_mod.f_kw_simple(x=100)) + @test_throws(TypeError(Symbol("keyword argument"), :y, Bool, 100), + test_mod.f_kw_simple(y=100)) + + # Keywords which aren't present throw an error + try + test_mod.f_kw_simple(20; not_present=100) + @test false + catch exc + @test exc isa MethodError + @test exc.f == Core.kwcall + @test exc.args == ((; not_present=100), test_mod.f_kw_simple, 20, 1.0) + end +end + @testset "Broadcast" begin @test JuliaLowering.include_string(test_mod, """ let x = [1,2], y = [3,4], z = [5,6] diff --git a/JuliaLowering/test/functions_ir.jl b/JuliaLowering/test/functions_ir.jl index 883d5bfd26f38..c4c26e219c4d4 100644 --- a/JuliaLowering/test/functions_ir.jl +++ b/JuliaLowering/test/functions_ir.jl @@ -896,3 +896,156 @@ end 8 (call JuliaLowering.bind_docs! %₇ "some docs\n" %₅) 9 (return core.nothing) +######################################## +# Keyword function with defaults. +# Order of methods +# 1. #f_kw_simple#0(x, y, ::typeof(f_kw_simple), a, b) (body) +# 2. Core.kwcall(kws, ::typeof(f_kw_simple)) +# 3. Core.kwcall(kws, ::typeof(f_kw_simple), a) +# 4. Core.kwcall(kws, ::typeof(f_kw_simple), a, b) (kwcall body) +# 5. f_kw_simple() +# 6. f_kw_simple(a) +# 7. f_kw_simple(a, b) +function f_kw_simple(a::Int=1, b::Float64=1.0; x::Char='a', y::Bool=true) + (a, b, x, y) +end +#--------------------- +1 (method TestMod.f_kw_simple) +2 (method TestMod.#f_kw_simple#0) +3 TestMod.#f_kw_simple#0 +4 (call core.Typeof %₃) +5 TestMod.Char +6 TestMod.Bool +7 TestMod.f_kw_simple +8 (call core.Typeof %₇) +9 TestMod.Int +10 TestMod.Float64 +11 (call core.svec %₄ %₅ %₆ %₈ %₉ %₁₀) +12 (call core.svec) +13 SourceLocation::1:10 +14 (call core.svec %₁₁ %₁₂ %₁₃) +15 --- method core.nothing %₁₄ + slots: [slot₁/#self#(!read) slot₂/x slot₃/y slot₄/#self#(!read) slot₅/a slot₆/b] + 1 (call core.tuple slot₅/a slot₆/b slot₂/x slot₃/y) + 2 (return %₁) +16 (call core.typeof core.kwcall) +17 TestMod.f_kw_simple +18 (call core.Typeof %₁₇) +19 (call core.svec %₁₆ core.NamedTuple %₁₈) +20 (call core.svec) +21 SourceLocation::1:10 +22 (call core.svec %₁₉ %₂₀ %₂₁) +23 --- method core.nothing %₂₂ + slots: [slot₁/#self#(called) slot₂/kws slot₃/#self#] + 1 (call slot₁/#self# slot₂/kws slot₃/#self# 1 1.0) + 2 (return %₁) +24 (call core.typeof core.kwcall) +25 TestMod.f_kw_simple +26 (call core.Typeof %₂₅) +27 TestMod.Int +28 (call core.svec %₂₄ core.NamedTuple %₂₆ %₂₇) +29 (call core.svec) +30 SourceLocation::1:10 +31 (call core.svec %₂₈ %₂₉ %₃₀) +32 --- method core.nothing %₃₁ + slots: [slot₁/#self#(called) slot₂/kws slot₃/#self# slot₄/a] + 1 (call slot₁/#self# slot₂/kws slot₃/#self# slot₄/a 1.0) + 2 (return %₁) +33 (call core.typeof core.kwcall) +34 TestMod.f_kw_simple +35 (call core.Typeof %₃₄) +36 TestMod.Int +37 TestMod.Float64 +38 (call core.svec %₃₃ core.NamedTuple %₃₅ %₃₆ %₃₇) +39 (call core.svec) +40 SourceLocation::1:10 +41 (call core.svec %₃₈ %₃₉ %₄₀) +42 --- method core.nothing %₄₁ + slots: [slot₁/#self#(!read) slot₂/kws slot₃/#self# slot₄/a slot₅/b slot₆/if_val(!read) slot₇/if_val(!read)] + 1 (call core.isdefined slot₂/kws :x) + 2 (gotoifnot %₁ label₁₃) + 3 (call core.getfield slot₂/kws :x) + 4 TestMod.Char + 5 (call core.isa %₃ %₄) + 6 (gotoifnot %₅ label₈) + 7 (goto label₁₁) + 8 TestMod.Char + 9 (new core.TypeError :keyword argument :x %₈ %₃) + 10 (call core.throw %₉) + 11 (= slot₆/if_val %₃) + 12 (goto label₁₄) + 13 (= slot₆/if_val 'a') + 14 slot₆/if_val + 15 (call core.isdefined slot₂/kws :y) + 16 (gotoifnot %₁₅ label₂₇) + 17 (call core.getfield slot₂/kws :y) + 18 TestMod.Bool + 19 (call core.isa %₁₇ %₁₈) + 20 (gotoifnot %₁₉ label₂₂) + 21 (goto label₂₅) + 22 TestMod.Bool + 23 (new core.TypeError :keyword argument :y %₂₂ %₁₇) + 24 (call core.throw %₂₃) + 25 (= slot₇/if_val %₁₇) + 26 (goto label₂₈) + 27 (= slot₇/if_val true) + 28 slot₇/if_val + 29 (call top.keys slot₂/kws) + 30 (call core.tuple :x :y) + 31 (call top.diff_names %₂₉ %₃₀) + 32 (call top.isempty %₃₁) + 33 (gotoifnot %₃₂ label₃₅) + 34 (goto label₃₆) + 35 (call top.kwerr slot₂/kws slot₃/#self# slot₄/a slot₅/b) + 36 TestMod.#f_kw_simple#0 + 37 (call %₃₆ %₁₄ %₂₈ slot₃/#self# slot₄/a slot₅/b) + 38 (return %₃₇) +43 TestMod.f_kw_simple +44 (call core.Typeof %₄₃) +45 (call core.svec %₄₄) +46 (call core.svec) +47 SourceLocation::1:10 +48 (call core.svec %₄₅ %₄₆ %₄₇) +49 --- method core.nothing %₄₈ + slots: [slot₁/#self#(called)] + 1 (call slot₁/#self# 1 1.0) + 2 (return %₁) +50 TestMod.f_kw_simple +51 (call core.Typeof %₅₀) +52 TestMod.Int +53 (call core.svec %₅₁ %₅₂) +54 (call core.svec) +55 SourceLocation::1:10 +56 (call core.svec %₅₃ %₅₄ %₅₅) +57 --- method core.nothing %₅₆ + slots: [slot₁/#self#(called) slot₂/a] + 1 (call slot₁/#self# slot₂/a 1.0) + 2 (return %₁) +58 TestMod.f_kw_simple +59 (call core.Typeof %₅₈) +60 TestMod.Int +61 TestMod.Float64 +62 (call core.svec %₅₉ %₆₀ %₆₁) +63 (call core.svec) +64 SourceLocation::1:10 +65 (call core.svec %₆₂ %₆₃ %₆₄) +66 --- method core.nothing %₆₅ + slots: [slot₁/#self# slot₂/a slot₃/b] + 1 TestMod.#f_kw_simple#0 + 2 (call %₁ 'a' true slot₁/#self# slot₂/a slot₃/b) + 3 (return %₂) +67 TestMod.f_kw_simple +68 (return %₆₇) + +######################################## +# Error: argument unpacking in keywords +function f_invalid_kw(; (x,y)=10) + (x, y) +end +#--------------------- +LoweringError: +function f_invalid_kw(; (x,y)=10) +# └───┘ ── Invalid keyword name + (x, y) +end + From 32cd53d12ca5e8dba1611c92611c67c278353d2d Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Tue, 28 Jan 2025 22:17:01 +1000 Subject: [PATCH 0966/1109] Test throwing of UndefKeywordError --- JuliaLowering/src/desugaring.jl | 3 +-- JuliaLowering/test/functions.jl | 11 ++++++++++- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index c74846510f6dc..65feceefd14fd 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -2253,8 +2253,7 @@ function keyword_function_defs(ctx, srcref, callex_srcref, name_str, typevar_names, typevar_stmts, arg_names, arg_types, first_default, arg_defaults, keywords, body, ret_var) mangled_name = let n = isnothing(name_str) ? "_" : name_str - n = string(startswith(n, '#') ? "" : "#", n, "#") - reserve_module_binding_i(ctx.mod, n) + reserve_module_binding_i(ctx.mod, string(startswith(n, '#') ? "" : "#", n, "#")) end # TODO: Is the layer correct here? Which module should be the parent module # of this body function? diff --git a/JuliaLowering/test/functions.jl b/JuliaLowering/test/functions.jl index 51bffb6937489..f558c5b9a5aa4 100644 --- a/JuliaLowering/test/functions.jl +++ b/JuliaLowering/test/functions.jl @@ -258,7 +258,7 @@ end @test test_mod.f_kw_simple(20, 2.0; y=false) === (20, 2.0, 'a', false) @test test_mod.f_kw_simple(20, 2.0; x='b', y=false) === (20, 2.0, 'b', false) - # Bad defaults throw a type error + # Bad types for keyword args throw a type error @test_throws(TypeError(Symbol("keyword argument"), :x, Char, 100), test_mod.f_kw_simple(x=100)) @test_throws(TypeError(Symbol("keyword argument"), :y, Bool, 100), @@ -273,6 +273,15 @@ end @test exc.f == Core.kwcall @test exc.args == ((; not_present=100), test_mod.f_kw_simple, 20, 1.0) end + + # Throwing of UndefKeywordError + JuliaLowering.include_string(test_mod, """ + function f_kw_no_default(; x) + x + end + """) + @test test_mod.f_kw_no_default(x = 10) == 10 + @test_throws UndefKeywordError(:x) test_mod.f_kw_no_default() == 10 end @testset "Broadcast" begin From e2868f7354129e3deceb42aea1ef8e761900171c Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Wed, 29 Jan 2025 12:30:12 +1000 Subject: [PATCH 0967/1109] Fix mutually recursive closures + cleanups Ensure the assignment generated by `K"function_decl"` is closure converted. This fixes closures with keyword arguments which have a mutually recursive pattern between the separate body closure and the closure which is named by the user. Also various cleanups to simplify the generated IR: * Add a runtime function `eval_closure_type()` to simplify the top level IR for closure types. Generating closure types is fairly simple given the list of field names including which fields are boxed. * Mark the return of `K"function"` as unnecessary so it can be removed by the linearization pass if not in value position. --- JuliaLowering/src/closure_conversion.jl | 60 +-- JuliaLowering/src/desugaring.jl | 15 +- JuliaLowering/src/runtime.jl | 24 + JuliaLowering/test/closures.jl | 14 + JuliaLowering/test/closures_ir.jl | 597 ++++++++++++++---------- JuliaLowering/test/demo.jl | 9 +- JuliaLowering/test/functions.jl | 11 + JuliaLowering/test/functions_ir.jl | 4 +- JuliaLowering/test/generators_ir.jl | 294 +++++------- JuliaLowering/test/typedefs_ir.jl | 286 ++++++------ 10 files changed, 665 insertions(+), 649 deletions(-) diff --git a/JuliaLowering/src/closure_conversion.jl b/JuliaLowering/src/closure_conversion.jl index e259366541f56..77e3a4a603d46 100644 --- a/JuliaLowering/src/closure_conversion.jl +++ b/JuliaLowering/src/closure_conversion.jl @@ -257,50 +257,13 @@ function type_for_closure(ctx::ClosureConversionCtx, srcref, name_str, field_sym # need to be serialized there during precompile. mod = ctx.mod type_binding = new_global_binding(ctx, srcref, name_str, mod) - typevar_stmts = SyntaxList(ctx) - type_params = SyntaxList(ctx) - field_types = SyntaxList(ctx) - for (name, isbox) in zip(field_syms, field_is_box) - if !isbox - typevar_name = "$(name.name_val)_type" - tv = ssavar(ctx, name) - push!(typevar_stmts, @ast ctx name [K"=" tv [K"call" "TypeVar"::K"core" typevar_name::K"Symbol"]]) - push!(type_params, tv) - push!(field_types, tv) - else - push!(field_types, @ast ctx name "Box"::K"core") - end - end - type_ex = @ast ctx srcref [K"lambda"(is_toplevel_thunk=true, lambda_bindings=LambdaBindings()) - [K"block"] - [K"block"] - [K"block" - [K"global" type_binding] - typevar_stmts... - closure_type := [K"call" - "_structtype"::K"core" - mod::K"Value" - name_str::K"Symbol" - [K"call" "svec"::K"core" type_params...] - [K"call" - "svec"::K"core" - field_syms... - ] - [K"call" "svec"::K"core"] - false::K"Bool" - length(field_syms)::K"Integer" - ] - [K"call" "_setsuper!"::K"core" closure_type "Function"::K"core"] - # TODO: Need K"const_decl" or whatever when we upgrade to the latest Julia. - [K"const" type_binding] - [K"=" type_binding closure_type] - [K"call" - "_typebody!"::K"core" - closure_type - [K"call" "svec"::K"core" field_types...] - ] - "nothing"::K"core" - ] + type_ex = @ast ctx srcref [K"call" + #"_call_latest"::K"core" + eval_closure_type::K"Value" + ctx.mod::K"Value" + name_str::K"Symbol" + [K"call" "svec"::K"core" field_syms...] + [K"call" "svec"::K"core" [f::K"Bool" for f in field_is_box]...] ] type_ex, type_binding end @@ -417,12 +380,11 @@ function _convert_closures(ctx::ClosureConversionCtx, ex) else [K"call" "apply_type"::K"core" closure_type_ type_params...] end - [K"=" func_name - [K"new" - closure_type - init_closure_args... - ] + closure_val := [K"new" + closure_type + init_closure_args... ] + convert_assignment(ctx, [K"=" func_name closure_val]) ::K"TOMBSTONE" ] else diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 65feceefd14fd..efab67e9c3854 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -2639,12 +2639,12 @@ function expand_function_def(ctx, ex, docs, rewrite_call=identity, rewrite_body= end @ast ctx ex [K"block" - if !isnothing(bare_func_name) - [K"function_decl"(bare_func_name) bare_func_name] - end if !isnothing(body_func_name) [K"function_decl"(body_func_name) body_func_name] end + if !isnothing(bare_func_name) + [K"function_decl"(bare_func_name) bare_func_name] + end [K"scope_block"(scope_type=:hard) [K"block" typevar_stmts... @@ -2660,12 +2660,9 @@ function expand_function_def(ctx, ex, docs, rewrite_call=identity, rewrite_body= ] ] ] - if !isnothing(bare_func_name) - # K"function_decl" ensures this name is defined - bare_func_name - else - "nothing"::K"core" - end + [K"unnecessary" + isnothing(bare_func_name) ? "nothing"::K"core" : bare_func_name + ] ] end diff --git a/JuliaLowering/src/runtime.jl b/JuliaLowering/src/runtime.jl index cae563b4aea87..8a4cd02bce2ae 100644 --- a/JuliaLowering/src/runtime.jl +++ b/JuliaLowering/src/runtime.jl @@ -98,6 +98,30 @@ function interpolate_ast(ex, values...) end end +function eval_closure_type(mod, closure_type_name, field_names, field_is_box) + type_params = Core.TypeVar[] + field_types = [] + for (name, isbox) in zip(field_names, field_is_box) + if !isbox + T = Core.TypeVar(Symbol(name, "_type")) + push!(type_params, T) + push!(field_types, T) + else + push!(field_types, Core.Box) + end + end + type = Core._structtype(mod, closure_type_name, + Core.svec(type_params...), + Core.svec(field_names...), + Core.svec(), + false, + length(field_names)) + Core._setsuper!(type, Core.Function) + Base.eval(mod, :(const $closure_type_name = $type)) + Core._typebody!(type, Core.svec(field_types...)) + type +end + # Interpolate captured local variables into the CodeInfo for a global method function replace_captured_locals!(codeinfo, locals) for (i, ex) in enumerate(codeinfo.code) diff --git a/JuliaLowering/test/closures.jl b/JuliaLowering/test/closures.jl index 87fa40c12577e..7134b9c043977 100644 --- a/JuliaLowering/test/closures.jl +++ b/JuliaLowering/test/closures.jl @@ -70,6 +70,20 @@ begin end """) == (true, false, (true, true, true, false)) +# Mutually recursive closures (closure capturing a closure) +@test JuliaLowering.include_string(test_mod, """ +let + function recursive_a(n) + here = (:a, n) + n <= 0 ? here : (here, recursive_b(n-1)) + end + function recursive_b(n) + ((:b, n), recursive_a(n-1)) + end + recursive_a(2) +end +""") == ((:a, 2), ((:b, 1), (:a, 0))) + # Global method capturing local variables JuliaLowering.include_string(test_mod, """ begin diff --git a/JuliaLowering/test/closures_ir.jl b/JuliaLowering/test/closures_ir.jl index 59502396ddaee..a0ea8fe9ed0e0 100644 --- a/JuliaLowering/test/closures_ir.jl +++ b/JuliaLowering/test/closures_ir.jl @@ -9,24 +9,15 @@ let end #--------------------- 1 (= slot₂/x (call core.Box)) -2 --- thunk - 1 (global TestMod.#f##0) - 2 (call core.svec) - 3 (call core.svec :x) - 4 (call core.svec) - 5 (call core._structtype TestMod :#f##0 %₂ %₃ %₄ false 1) - 6 (call core._setsuper! %₅ core.Function) - 7 (const TestMod.#f##0) - 8 (= TestMod.#f##0 %₅) - 9 (call core.svec core.Box) - 10 (call core._typebody! %₅ %₉) - 11 (return core.nothing) -3 TestMod.#f##0 -4 (call core.svec %₃ core.Any) -5 (call core.svec) -6 SourceLocation::3:14 -7 (call core.svec %₄ %₅ %₆) -8 --- method core.nothing %₇ +2 (call core.svec :x) +3 (call core.svec true) +4 (call JuliaLowering.eval_closure_type TestMod :#f##0 %₂ %₃) +5 TestMod.#f##0 +6 (call core.svec %₅ core.Any) +7 (call core.svec) +8 SourceLocation::3:14 +9 (call core.svec %₆ %₇ %₈) +10 --- method core.nothing %₉ slots: [slot₁/#self#(!read) slot₂/y slot₃/x(!read)] 1 TestMod.+ 2 (call core.getfield slot₁/#self# :x) @@ -38,14 +29,15 @@ end 8 (call core.getfield %₂ :contents) 9 (call %₁ %₈ slot₂/y) 10 (return %₉) -9 1 -10 slot₂/x -11 (call core.setfield! %₁₀ :contents %₉) -12 TestMod.#f##0 -13 slot₂/x -14 (= slot₁/f (new %₁₂ %₁₃)) -15 slot₁/f -16 (return %₁₅) +11 1 +12 slot₂/x +13 (call core.setfield! %₁₂ :contents %₁₁) +14 TestMod.#f##0 +15 slot₂/x +16 (new %₁₄ %₁₅) +17 (= slot₁/f %₁₆) +18 slot₁/f +19 (return %₁₈) ######################################## # Closure declaration with no methods @@ -55,22 +47,14 @@ begin end end #--------------------- -1 --- thunk - 1 (global TestMod.#no_method_f##0) - 2 (call core.svec) - 3 (call core.svec) - 4 (call core.svec) - 5 (call core._structtype TestMod :#no_method_f##0 %₂ %₃ %₄ false 0) - 6 (call core._setsuper! %₅ core.Function) - 7 (const TestMod.#no_method_f##0) - 8 (= TestMod.#no_method_f##0 %₅) - 9 (call core.svec) - 10 (call core._typebody! %₅ %₉) - 11 (return core.nothing) -2 TestMod.#no_method_f##0 -3 (= slot₁/no_method_f (new %₂)) -4 slot₁/no_method_f -5 (return %₄) +1 (call core.svec) +2 (call core.svec) +3 (call JuliaLowering.eval_closure_type TestMod :#no_method_f##0 %₁ %₂) +4 TestMod.#no_method_f##0 +5 (new %₄) +6 (= slot₁/no_method_f %₅) +7 slot₁/no_method_f +8 (return %₇) ######################################## # Closure which sets the value of a captured variable @@ -82,37 +66,29 @@ let end #--------------------- 1 (= slot₂/x (call core.Box)) -2 --- thunk - 1 (global TestMod.#f##1) - 2 (call core.svec) - 3 (call core.svec :x) - 4 (call core.svec) - 5 (call core._structtype TestMod :#f##1 %₂ %₃ %₄ false 1) - 6 (call core._setsuper! %₅ core.Function) - 7 (const TestMod.#f##1) - 8 (= TestMod.#f##1 %₅) - 9 (call core.svec core.Box) - 10 (call core._typebody! %₅ %₉) - 11 (return core.nothing) -3 TestMod.#f##1 -4 (call core.svec %₃ core.Any) -5 (call core.svec) -6 SourceLocation::3:14 -7 (call core.svec %₄ %₅ %₆) -8 --- method core.nothing %₇ +2 (call core.svec :x) +3 (call core.svec true) +4 (call JuliaLowering.eval_closure_type TestMod :#f##1 %₂ %₃) +5 TestMod.#f##1 +6 (call core.svec %₅ core.Any) +7 (call core.svec) +8 SourceLocation::3:14 +9 (call core.svec %₆ %₇ %₈) +10 --- method core.nothing %₉ slots: [slot₁/#self#(!read) slot₂/y(!read)] 1 2 2 (call core.getfield slot₁/#self# :x) 3 (call core.setfield! %₂ :contents %₁) 4 (return %₁) -9 1 -10 slot₂/x -11 (call core.setfield! %₁₀ :contents %₉) -12 TestMod.#f##1 -13 slot₂/x -14 (= slot₁/f (new %₁₂ %₁₃)) -15 slot₁/f -16 (return %₁₅) +11 1 +12 slot₂/x +13 (call core.setfield! %₁₂ :contents %₁₁) +14 TestMod.#f##1 +15 slot₂/x +16 (new %₁₄ %₁₅) +17 (= slot₁/f %₁₆) +18 slot₁/f +19 (return %₁₈) ######################################## # Function where arguments are captured into a closure and assigned @@ -125,41 +101,32 @@ function f(x) end #--------------------- 1 (method TestMod.f) -2 --- thunk - 1 (global TestMod.#f#g##0) - 2 (call core.svec) - 3 (call core.svec :x) - 4 (call core.svec) - 5 (call core._structtype TestMod :#f#g##0 %₂ %₃ %₄ false 1) - 6 (call core._setsuper! %₅ core.Function) - 7 (const TestMod.#f#g##0) - 8 (= TestMod.#f#g##0 %₅) - 9 (call core.svec core.Box) - 10 (call core._typebody! %₅ %₉) - 11 (return core.nothing) -3 TestMod.#f#g##0 -4 (call core.svec %₃) -5 (call core.svec) -6 SourceLocation::2:14 -7 (call core.svec %₄ %₅ %₆) -8 --- method core.nothing %₇ +2 (call core.svec :x) +3 (call core.svec true) +4 (call JuliaLowering.eval_closure_type TestMod :#f#g##0 %₂ %₃) +5 TestMod.#f#g##0 +6 (call core.svec %₅) +7 (call core.svec) +8 SourceLocation::2:14 +9 (call core.svec %₆ %₇ %₈) +10 --- method core.nothing %₉ slots: [slot₁/#self#(!read)] 1 10 2 (call core.getfield slot₁/#self# :x) 3 (call core.setfield! %₂ :contents %₁) 4 (return %₁) -9 TestMod.f -10 (call core.Typeof %₉) -11 (call core.svec %₁₀ core.Any) -12 (call core.svec) -13 SourceLocation::1:10 -14 (call core.svec %₁₁ %₁₂ %₁₃) -15 --- method core.nothing %₁₄ +11 TestMod.f +12 (call core.Typeof %₁₁) +13 (call core.svec %₁₂ core.Any) +14 (call core.svec) +15 SourceLocation::1:10 +16 (call core.svec %₁₃ %₁₄ %₁₅) +17 --- method core.nothing %₁₆ slots: [slot₁/#self#(!read) slot₂/x slot₃/g(called) slot₄/x(!read)] 1 (= slot₂/x (call core.Box slot₂/x)) 2 TestMod.#f#g##0 - 3 (= slot₃/g (new %₂ slot₂/x)) - 4 slot₃/g + 3 (new %₂ slot₂/x) + 4 (= slot₃/g %₃) 5 slot₃/g 6 (call %₅) 7 slot₂/x @@ -170,8 +137,8 @@ end 12 slot₄/x 13 (call core.getfield %₇ :contents) 14 (return %₁₃) -16 TestMod.f -17 (return %₁₆) +18 TestMod.f +19 (return %₁₈) ######################################## # Closure where a local `x` is captured but not boxed @@ -183,47 +150,37 @@ function f(x) end #--------------------- 1 (method TestMod.f) -2 --- thunk - 1 (global TestMod.#f#g##1) - 2 (call core.TypeVar :x_type) - 3 (call core.svec %₂) - 4 (call core.svec :x) - 5 (call core.svec) - 6 (call core._structtype TestMod :#f#g##1 %₃ %₄ %₅ false 1) - 7 (call core._setsuper! %₆ core.Function) - 8 (const TestMod.#f#g##1) - 9 (= TestMod.#f#g##1 %₆) - 10 (call core.svec %₂) - 11 (call core._typebody! %₆ %₁₀) - 12 (return core.nothing) -3 TestMod.#f#g##1 -4 (call core.svec %₃) -5 (call core.svec) -6 SourceLocation::2:14 -7 (call core.svec %₄ %₅ %₆) -8 --- method core.nothing %₇ +2 (call core.svec :x) +3 (call core.svec false) +4 (call JuliaLowering.eval_closure_type TestMod :#f#g##1 %₂ %₃) +5 TestMod.#f#g##1 +6 (call core.svec %₅) +7 (call core.svec) +8 SourceLocation::2:14 +9 (call core.svec %₆ %₇ %₈) +10 --- method core.nothing %₉ slots: [slot₁/#self#(!read) slot₂/y(!read)] 1 (call core.getfield slot₁/#self# :x) 2 (= slot₂/y %₁) 3 (return %₁) -9 TestMod.f -10 (call core.Typeof %₉) -11 (call core.svec %₁₀ core.Any) -12 (call core.svec) -13 SourceLocation::1:10 -14 (call core.svec %₁₁ %₁₂ %₁₃) -15 --- method core.nothing %₁₄ +11 TestMod.f +12 (call core.Typeof %₁₁) +13 (call core.svec %₁₂ core.Any) +14 (call core.svec) +15 SourceLocation::1:10 +16 (call core.svec %₁₃ %₁₄ %₁₅) +17 --- method core.nothing %₁₆ slots: [slot₁/#self#(!read) slot₂/x slot₃/g slot₄/z(!read)] 1 TestMod.#f#g##1 2 (call core.typeof slot₂/x) 3 (call core.apply_type %₁ %₂) - 4 (= slot₃/g (new %₃ slot₂/x)) - 5 slot₃/g + 4 (new %₃ slot₂/x) + 5 (= slot₃/g %₄) 6 slot₂/x 7 (= slot₄/z %₆) 8 (return %₆) -16 TestMod.f -17 (return %₁₆) +18 TestMod.f +19 (return %₁₈) ######################################## # Closure where a static parameter of an outer function is captured @@ -234,51 +191,42 @@ function f(::T) where T end #--------------------- 1 (method TestMod.f) -2 --- thunk - 1 (global TestMod.#f#g##2) - 2 (call core.TypeVar :T_type) - 3 (call core.svec %₂) - 4 (call core.svec :T) - 5 (call core.svec) - 6 (call core._structtype TestMod :#f#g##2 %₃ %₄ %₅ false 1) - 7 (call core._setsuper! %₆ core.Function) - 8 (const TestMod.#f#g##2) - 9 (= TestMod.#f#g##2 %₆) - 10 (call core.svec %₂) - 11 (call core._typebody! %₆ %₁₀) - 12 (return core.nothing) -3 TestMod.#f#g##2 -4 (call core.svec %₃) -5 (call core.svec) -6 SourceLocation::2:14 -7 (call core.svec %₄ %₅ %₆) -8 --- method core.nothing %₇ +2 (call core.svec :T) +3 (call core.svec false) +4 (call JuliaLowering.eval_closure_type TestMod :#f#g##2 %₂ %₃) +5 TestMod.#f#g##2 +6 (call core.svec %₅) +7 (call core.svec) +8 SourceLocation::2:14 +9 (call core.svec %₆ %₇ %₈) +10 --- method core.nothing %₉ slots: [slot₁/#self#(!read)] 1 TestMod.use 2 (call core.getfield slot₁/#self# :T) 3 (call %₁ %₂) 4 (return %₃) -9 (= slot₁/T (call core.TypeVar :T)) -10 TestMod.f -11 (call core.Typeof %₁₀) -12 slot₁/T -13 (call core.svec %₁₁ %₁₂) +11 (= slot₁/T (call core.TypeVar :T)) +12 TestMod.f +13 (call core.Typeof %₁₂) 14 slot₁/T -15 (call core.svec %₁₄) -16 SourceLocation::1:10 -17 (call core.svec %₁₃ %₁₅ %₁₆) -18 --- method core.nothing %₁₇ +15 (call core.svec %₁₃ %₁₄) +16 slot₁/T +17 (call core.svec %₁₆) +18 SourceLocation::1:10 +19 (call core.svec %₁₅ %₁₇ %₁₈) +20 --- method core.nothing %₁₉ slots: [slot₁/#self#(!read) slot₂/_(!read) slot₃/g] 1 TestMod.#f#g##2 2 static_parameter₁ 3 (call core.typeof %₂) 4 (call core.apply_type %₁ %₃) 5 static_parameter₁ - 6 (= slot₃/g (new %₄ %₅)) - 7 slot₃/g - 8 (return %₇) -19 TestMod.f -20 (return %₁₉) + 6 (new %₄ %₅) + 7 (= slot₃/g %₆) + 8 slot₃/g + 9 (return %₈) +21 TestMod.f +22 (return %₂₁) ######################################## # Closure captures with `isdefined` @@ -295,25 +243,15 @@ function f(x) end #--------------------- 1 (method TestMod.f) -2 --- thunk - 1 (global TestMod.#f#g##3) - 2 (call core.TypeVar :x_type) - 3 (call core.svec %₂) - 4 (call core.svec :x :y) - 5 (call core.svec) - 6 (call core._structtype TestMod :#f#g##3 %₃ %₄ %₅ false 2) - 7 (call core._setsuper! %₆ core.Function) - 8 (const TestMod.#f#g##3) - 9 (= TestMod.#f#g##3 %₆) - 10 (call core.svec %₂ core.Box) - 11 (call core._typebody! %₆ %₁₀) - 12 (return core.nothing) -3 TestMod.#f#g##3 -4 (call core.svec %₃) -5 (call core.svec) -6 SourceLocation::2:14 -7 (call core.svec %₄ %₅ %₆) -8 --- method core.nothing %₇ +2 (call core.svec :x :y) +3 (call core.svec false true) +4 (call JuliaLowering.eval_closure_type TestMod :#f#g##3 %₂ %₃) +5 TestMod.#f#g##3 +6 (call core.svec %₅) +7 (call core.svec) +8 SourceLocation::2:14 +9 (call core.svec %₆ %₇ %₈) +10 --- method core.nothing %₉ slots: [slot₁/#self#(!read) slot₂/z] 1 (= slot₂/z 3) 2 (call core.getfield slot₁/#self# :y) @@ -321,21 +259,21 @@ end 4 (isdefined slot₂/z) 5 (call core.tuple true %₃ %₄) 6 (return %₅) -9 TestMod.f -10 (call core.Typeof %₉) -11 (call core.svec %₁₀ core.Any) -12 (call core.svec) -13 SourceLocation::1:10 -14 (call core.svec %₁₁ %₁₂ %₁₃) -15 --- method core.nothing %₁₄ +11 TestMod.f +12 (call core.Typeof %₁₁) +13 (call core.svec %₁₂ core.Any) +14 (call core.svec) +15 SourceLocation::1:10 +16 (call core.svec %₁₃ %₁₄ %₁₅) +17 --- method core.nothing %₁₆ slots: [slot₁/#self#(!read) slot₂/x slot₃/g slot₄/y] 1 (= slot₄/y (call core.Box)) 2 TestMod.#f#g##3 3 (call core.typeof slot₂/x) 4 (call core.apply_type %₂ %₃) 5 slot₄/y - 6 (= slot₃/g (new %₄ slot₂/x %₅)) - 7 slot₃/g + 6 (new %₄ slot₂/x %₅) + 7 (= slot₃/g %₆) 8 2 9 slot₄/y 10 (call core.setfield! %₉ :contents %₈) @@ -343,8 +281,8 @@ end 12 (call core.isdefined %₁₁ :contents) 13 (call core.tuple %₁₂ true) 14 (return %₁₃) -16 TestMod.f -17 (return %₁₆) +18 TestMod.f +19 (return %₁₈) ######################################## # FIXME: Nested captures of arguments @@ -414,31 +352,22 @@ end # Anonymous function syntax with -> x -> x*x #--------------------- -1 --- thunk - 1 (global TestMod.#->##0) - 2 (call core.svec) - 3 (call core.svec) - 4 (call core.svec) - 5 (call core._structtype TestMod :#->##0 %₂ %₃ %₄ false 0) - 6 (call core._setsuper! %₅ core.Function) - 7 (const TestMod.#->##0) - 8 (= TestMod.#->##0 %₅) - 9 (call core.svec) - 10 (call core._typebody! %₅ %₉) - 11 (return core.nothing) -2 TestMod.#->##0 -3 (new %₂) +1 (call core.svec) +2 (call core.svec) +3 (call JuliaLowering.eval_closure_type TestMod :#->##0 %₁ %₂) 4 TestMod.#->##0 -5 (call core.svec %₄ core.Any) -6 (call core.svec) -7 SourceLocation::1:1 -8 (call core.svec %₅ %₆ %₇) -9 --- method core.nothing %₈ +5 (new %₄) +6 TestMod.#->##0 +7 (call core.svec %₆ core.Any) +8 (call core.svec) +9 SourceLocation::1:1 +10 (call core.svec %₇ %₈ %₉) +11 --- method core.nothing %₁₀ slots: [slot₁/#self#(!read) slot₂/x] 1 TestMod.* 2 (call %₁ slot₂/x slot₂/x) 3 (return %₂) -10 (return %₃) +12 (return %₅) ######################################## # Anonymous function syntax with `function` @@ -446,31 +375,22 @@ function (x) x*x end #--------------------- -1 --- thunk - 1 (global TestMod.##anon###0) - 2 (call core.svec) - 3 (call core.svec) - 4 (call core.svec) - 5 (call core._structtype TestMod :##anon###0 %₂ %₃ %₄ false 0) - 6 (call core._setsuper! %₅ core.Function) - 7 (const TestMod.##anon###0) - 8 (= TestMod.##anon###0 %₅) - 9 (call core.svec) - 10 (call core._typebody! %₅ %₉) - 11 (return core.nothing) -2 TestMod.##anon###0 -3 (new %₂) +1 (call core.svec) +2 (call core.svec) +3 (call JuliaLowering.eval_closure_type TestMod :##anon###0 %₁ %₂) 4 TestMod.##anon###0 -5 (call core.svec %₄ core.Any) -6 (call core.svec) -7 SourceLocation::1:10 -8 (call core.svec %₅ %₆ %₇) -9 --- method core.nothing %₈ +5 (new %₄) +6 TestMod.##anon###0 +7 (call core.svec %₆ core.Any) +8 (call core.svec) +9 SourceLocation::1:10 +10 (call core.svec %₇ %₈ %₉) +11 --- method core.nothing %₁₀ slots: [slot₁/#self#(!read) slot₂/x] 1 TestMod.* 2 (call %₁ slot₂/x slot₂/x) 3 (return %₂) -10 (return %₃) +12 (return %₅) ######################################## # `do` blocks @@ -483,33 +403,24 @@ end 3 (call core.apply_type core.NamedTuple %₂) 4 (call core.tuple 1) 5 (call %₃ %₄) -6 --- thunk - 1 (global TestMod.#do##0) - 2 (call core.svec) - 3 (call core.svec) - 4 (call core.svec) - 5 (call core._structtype TestMod :#do##0 %₂ %₃ %₄ false 0) - 6 (call core._setsuper! %₅ core.Function) - 7 (const TestMod.#do##0) - 8 (= TestMod.#do##0 %₅) - 9 (call core.svec) - 10 (call core._typebody! %₅ %₉) - 11 (return core.nothing) -7 TestMod.#do##0 -8 (call core.svec %₇ core.Any) -9 (call core.svec) -10 SourceLocation::1:13 -11 (call core.svec %₈ %₉ %₁₀) -12 --- method core.nothing %₁₁ +6 (call core.svec) +7 (call core.svec) +8 (call JuliaLowering.eval_closure_type TestMod :#do##0 %₆ %₇) +9 TestMod.#do##0 +10 (call core.svec %₉ core.Any) +11 (call core.svec) +12 SourceLocation::1:13 +13 (call core.svec %₁₀ %₁₁ %₁₂) +14 --- method core.nothing %₁₃ slots: [slot₁/#self#(!read) slot₂/y] 1 TestMod.+ 2 (call %₁ slot₂/y 2) 3 (return %₂) -13 TestMod.#do##0 -14 (new %₁₃) -15 TestMod.x -16 (call core.kwcall %₅ %₁ %₁₄ %₁₅) -17 (return %₁₆) +15 TestMod.#do##0 +16 (new %₁₅) +17 TestMod.x +18 (call core.kwcall %₅ %₁ %₁₆ %₁₇) +19 (return %₁₈) ######################################## # Error: Attempt to add methods to a function argument @@ -596,3 +507,183 @@ LoweringError: Base.Experimental.@opaque (x=1)->2x # ╙ ── Default positional arguments cannot be used in an opaque closure +######################################## +# Mutually recursive closures +let + function recursive_a() + recursive_b() + end + function recursive_b() + recursive_a() + end +end +#--------------------- +1 (= slot₁/recursive_a (call core.Box)) +2 (= slot₂/recursive_b (call core.Box)) +3 (call core.svec :recursive_b) +4 (call core.svec true) +5 (call JuliaLowering.eval_closure_type TestMod :#recursive_a##0 %₃ %₄) +6 TestMod.#recursive_a##0 +7 (call core.svec %₆) +8 (call core.svec) +9 SourceLocation::2:14 +10 (call core.svec %₇ %₈ %₉) +11 --- method core.nothing %₁₀ + slots: [slot₁/#self#(!read) slot₂/recursive_b(!read)] + 1 (call core.getfield slot₁/#self# :recursive_b) + 2 (call core.isdefined %₁ :contents) + 3 (gotoifnot %₂ label₅) + 4 (goto label₇) + 5 (newvar slot₂/recursive_b) + 6 slot₂/recursive_b + 7 (call core.getfield %₁ :contents) + 8 (call %₇) + 9 (return %₈) +12 (call core.svec :recursive_a) +13 (call core.svec true) +14 (call JuliaLowering.eval_closure_type TestMod :#recursive_b##0 %₁₂ %₁₃) +15 TestMod.#recursive_b##0 +16 (call core.svec %₁₅) +17 (call core.svec) +18 SourceLocation::5:14 +19 (call core.svec %₁₆ %₁₇ %₁₈) +20 --- method core.nothing %₁₉ + slots: [slot₁/#self#(!read) slot₂/recursive_a(!read)] + 1 (call core.getfield slot₁/#self# :recursive_a) + 2 (call core.isdefined %₁ :contents) + 3 (gotoifnot %₂ label₅) + 4 (goto label₇) + 5 (newvar slot₂/recursive_a) + 6 slot₂/recursive_a + 7 (call core.getfield %₁ :contents) + 8 (call %₇) + 9 (return %₈) +21 TestMod.#recursive_a##0 +22 slot₂/recursive_b +23 (new %₂₁ %₂₂) +24 slot₁/recursive_a +25 (call core.setfield! %₂₄ :contents %₂₃) +26 TestMod.#recursive_b##0 +27 slot₁/recursive_a +28 (new %₂₆ %₂₇) +29 slot₂/recursive_b +30 (call core.setfield! %₂₉ :contents %₂₈) +31 slot₂/recursive_b +32 (call core.isdefined %₃₁ :contents) +33 (gotoifnot %₃₂ label₃₅) +34 (goto label₃₇) +35 (newvar slot₄/recursive_b) +36 slot₄/recursive_b +37 (call core.getfield %₃₁ :contents) +38 (return %₃₇) + +######################################## +# Closure with keywords +let y = y_init + function f_kw_closure(; x::X=x_default) + x + y + end +end +#--------------------- +1 TestMod.y_init +2 (call core.svec :y) +3 (call core.svec true) +4 (call JuliaLowering.eval_closure_type TestMod :##f_kw_closure#0##0 %₂ %₃) +5 (call core.svec :#f_kw_closure#0) +6 (call core.svec true) +7 (call JuliaLowering.eval_closure_type TestMod :#f_kw_closure##0 %₅ %₆) +8 TestMod.##f_kw_closure#0##0 +9 TestMod.X +10 TestMod.#f_kw_closure##0 +11 (call core.svec %₈ %₉ %₁₀) +12 (call core.svec) +13 SourceLocation::2:14 +14 (call core.svec %₁₁ %₁₂ %₁₃) +15 --- method core.nothing %₁₄ + slots: [slot₁/#self#(!read) slot₂/x slot₃/#self#(!read) slot₄/y(!read)] + 1 TestMod.+ + 2 (call core.getfield slot₁/#self# :y) + 3 (call core.isdefined %₂ :contents) + 4 (gotoifnot %₃ label₆) + 5 (goto label₈) + 6 (newvar slot₄/y) + 7 slot₄/y + 8 (call core.getfield %₂ :contents) + 9 (call %₁ slot₂/x %₈) + 10 (return %₉) +16 TestMod.#f_kw_closure##0 +17 (call core.svec %₁₆) +18 (call core.svec) +19 SourceLocation::2:14 +20 (call core.svec %₁₇ %₁₈ %₁₉) +21 --- method core.nothing %₂₀ + slots: [slot₁/#self# slot₂/#f_kw_closure#0(!read)] + 1 (call core.getfield slot₁/#self# :#f_kw_closure#0) + 2 (call core.isdefined %₁ :contents) + 3 (gotoifnot %₂ label₅) + 4 (goto label₇) + 5 (newvar slot₂/#f_kw_closure#0) + 6 slot₂/#f_kw_closure#0 + 7 (call core.getfield %₁ :contents) + 8 TestMod.x_default + 9 (call %₇ %₈ slot₁/#self#) + 10 (return %₉) +22 (= slot₁/y (call core.Box)) +23 (= slot₂/#f_kw_closure#0 (call core.Box)) +24 slot₁/y +25 (call core.setfield! %₂₄ :contents %₁) +26 TestMod.##f_kw_closure#0##0 +27 slot₁/y +28 (new %₂₆ %₂₇) +29 slot₂/#f_kw_closure#0 +30 (call core.setfield! %₂₉ :contents %₂₈) +31 TestMod.#f_kw_closure##0 +32 slot₂/#f_kw_closure#0 +33 (new %₃₁ %₃₂) +34 (= slot₃/f_kw_closure %₃₃) +35 (call core.typeof core.kwcall) +36 TestMod.#f_kw_closure##0 +37 (call core.svec %₃₅ core.NamedTuple %₃₆) +38 (call core.svec) +39 SourceLocation::2:14 +40 (call core.svec %₃₇ %₃₈ %₃₉) +41 --- code_info + slots: [slot₁/#self#(!read) slot₂/kws slot₃/#self# slot₄/#f_kw_closure#0(!read) slot₅/if_val(!read)] + 1 (call core.isdefined slot₂/kws :x) + 2 (gotoifnot %₁ label₁₃) + 3 (call core.getfield slot₂/kws :x) + 4 TestMod.X + 5 (call core.isa %₃ %₄) + 6 (gotoifnot %₅ label₈) + 7 (goto label₁₁) + 8 TestMod.X + 9 (new core.TypeError :keyword argument :x %₈ %₃) + 10 (call core.throw %₉) + 11 (= slot₅/if_val %₃) + 12 (goto label₁₅) + 13 TestMod.x_default + 14 (= slot₅/if_val %₁₃) + 15 slot₅/if_val + 16 (call top.keys slot₂/kws) + 17 (call core.tuple :x) + 18 (call top.diff_names %₁₆ %₁₇) + 19 (call top.isempty %₁₈) + 20 (gotoifnot %₁₉ label₂₂) + 21 (goto label₂₃) + 22 (call top.kwerr slot₂/kws slot₃/#self#) + 23 (captured_local 1) + 24 (call core.isdefined %₂₃ :contents) + 25 (gotoifnot %₂₄ label₂₇) + 26 (goto label₂₉) + 27 (newvar slot₄/#f_kw_closure#0) + 28 slot₄/#f_kw_closure#0 + 29 (call core.getfield %₂₃ :contents) + 30 (call %₂₉ %₁₅ slot₃/#self#) + 31 (return %₃₀) +42 slot₂/#f_kw_closure#0 +43 (call core.svec %₄₂) +44 (call JuliaLowering.replace_captured_locals! %₄₁ %₄₃) +45 --- method core.nothing %₄₀ %₄₄ +46 slot₃/f_kw_closure +47 (return %₄₆) + diff --git a/JuliaLowering/test/demo.jl b/JuliaLowering/test/demo.jl index 39371a25b80cb..739c693c52425 100644 --- a/JuliaLowering/test/demo.jl +++ b/JuliaLowering/test/demo.jl @@ -755,7 +755,14 @@ end # """ src = """ -Tuple[(x,y) for x in 1:2, y in 1:3] +let + function recursive_a() + recursive_b() + end + function recursive_b() + recursive_a() + end +end """ ex = parsestmt(SyntaxTree, src, filename="foo.jl") diff --git a/JuliaLowering/test/functions.jl b/JuliaLowering/test/functions.jl index f558c5b9a5aa4..29ddd599a289d 100644 --- a/JuliaLowering/test/functions.jl +++ b/JuliaLowering/test/functions.jl @@ -282,6 +282,17 @@ end """) @test test_mod.f_kw_no_default(x = 10) == 10 @test_throws UndefKeywordError(:x) test_mod.f_kw_no_default() == 10 + + # Closure with keywords + cl = JuliaLowering.include_string(test_mod, """ + let y = 1 + function f_kw_closure(; x=10) + x + y + end + end + """) + @test cl() == 11 + @test cl(x = 20) == 21 end @testset "Broadcast" begin diff --git a/JuliaLowering/test/functions_ir.jl b/JuliaLowering/test/functions_ir.jl index c4c26e219c4d4..46c41c52d42a1 100644 --- a/JuliaLowering/test/functions_ir.jl +++ b/JuliaLowering/test/functions_ir.jl @@ -910,8 +910,8 @@ function f_kw_simple(a::Int=1, b::Float64=1.0; x::Char='a', y::Bool=true) (a, b, x, y) end #--------------------- -1 (method TestMod.f_kw_simple) -2 (method TestMod.#f_kw_simple#0) +1 (method TestMod.#f_kw_simple#0) +2 (method TestMod.f_kw_simple) 3 TestMod.#f_kw_simple#0 4 (call core.Typeof %₃) 5 TestMod.Char diff --git a/JuliaLowering/test/generators_ir.jl b/JuliaLowering/test/generators_ir.jl index 685310ad4d832..91265248595ea 100644 --- a/JuliaLowering/test/generators_ir.jl +++ b/JuliaLowering/test/generators_ir.jl @@ -2,56 +2,38 @@ # Simple 1D generator (x+1 for x in xs) #--------------------- -1 --- thunk - 1 (global TestMod.#->##0) - 2 (call core.svec) - 3 (call core.svec) - 4 (call core.svec) - 5 (call core._structtype TestMod :#->##0 %₂ %₃ %₄ false 0) - 6 (call core._setsuper! %₅ core.Function) - 7 (const TestMod.#->##0) - 8 (= TestMod.#->##0 %₅) - 9 (call core.svec) - 10 (call core._typebody! %₅ %₉) - 11 (return core.nothing) -2 TestMod.#->##0 -3 (call core.svec %₂ core.Any) -4 (call core.svec) -5 SourceLocation::1:2 -6 (call core.svec %₃ %₄ %₅) -7 --- method core.nothing %₆ +1 (call core.svec) +2 (call core.svec) +3 (call JuliaLowering.eval_closure_type TestMod :#->##0 %₁ %₂) +4 TestMod.#->##0 +5 (call core.svec %₄ core.Any) +6 (call core.svec) +7 SourceLocation::1:2 +8 (call core.svec %₅ %₆ %₇) +9 --- method core.nothing %₈ slots: [slot₁/#self#(!read) slot₂/x] 1 TestMod.+ 2 (call %₁ slot₂/x 1) 3 (return %₂) -8 TestMod.#->##0 -9 (new %₈) -10 TestMod.xs -11 (call top.Generator %₉ %₁₀) -12 (return %₁₁) +10 TestMod.#->##0 +11 (new %₁₀) +12 TestMod.xs +13 (call top.Generator %₁₁ %₁₂) +14 (return %₁₃) ######################################## # Product iteration (x+y for x in xs, y in ys) #--------------------- -1 --- thunk - 1 (global TestMod.#->##1) - 2 (call core.svec) - 3 (call core.svec) - 4 (call core.svec) - 5 (call core._structtype TestMod :#->##1 %₂ %₃ %₄ false 0) - 6 (call core._setsuper! %₅ core.Function) - 7 (const TestMod.#->##1) - 8 (= TestMod.#->##1 %₅) - 9 (call core.svec) - 10 (call core._typebody! %₅ %₉) - 11 (return core.nothing) -2 TestMod.#->##1 -3 (call core.svec %₂ core.Any) -4 (call core.svec) -5 SourceLocation::1:2 -6 (call core.svec %₃ %₄ %₅) -7 --- method core.nothing %₆ +1 (call core.svec) +2 (call core.svec) +3 (call JuliaLowering.eval_closure_type TestMod :#->##1 %₁ %₂) +4 TestMod.#->##1 +5 (call core.svec %₄ core.Any) +6 (call core.svec) +7 SourceLocation::1:2 +8 (call core.svec %₅ %₆ %₇) +9 --- method core.nothing %₈ slots: [slot₁/#self#(!read) slot₂/destructured_arg slot₃/iterstate slot₄/x slot₅/y] 1 (call top.indexed_iterate slot₂/destructured_arg 1) 2 (= slot₄/x (call core.getfield %₁ 1)) @@ -64,36 +46,27 @@ 9 slot₅/y 10 (call %₇ %₈ %₉) 11 (return %₁₀) -8 TestMod.#->##1 -9 (new %₈) -10 TestMod.xs -11 TestMod.ys -12 (call top.product %₁₀ %₁₁) -13 (call top.Generator %₉ %₁₂) -14 (return %₁₃) +10 TestMod.#->##1 +11 (new %₁₀) +12 TestMod.xs +13 TestMod.ys +14 (call top.product %₁₂ %₁₃) +15 (call top.Generator %₁₁ %₁₄) +16 (return %₁₅) ######################################## # Use `identity` as the Generator function when possible eg in filters ((x,y) for (x,y) in iter if f(x)) #--------------------- -1 --- thunk - 1 (global TestMod.#->##2) - 2 (call core.svec) - 3 (call core.svec) - 4 (call core.svec) - 5 (call core._structtype TestMod :#->##2 %₂ %₃ %₄ false 0) - 6 (call core._setsuper! %₅ core.Function) - 7 (const TestMod.#->##2) - 8 (= TestMod.#->##2 %₅) - 9 (call core.svec) - 10 (call core._typebody! %₅ %₉) - 11 (return core.nothing) -2 TestMod.#->##2 -3 (call core.svec %₂ core.Any) -4 (call core.svec) -5 SourceLocation::1:29 -6 (call core.svec %₃ %₄ %₅) -7 --- method core.nothing %₆ +1 (call core.svec) +2 (call core.svec) +3 (call JuliaLowering.eval_closure_type TestMod :#->##2 %₁ %₂) +4 TestMod.#->##2 +5 (call core.svec %₄ core.Any) +6 (call core.svec) +7 SourceLocation::1:29 +8 (call core.svec %₅ %₆ %₇) +9 --- method core.nothing %₈ slots: [slot₁/#self#(!read) slot₂/destructured_arg slot₃/iterstate slot₄/x slot₅/y(!read)] 1 (call top.indexed_iterate slot₂/destructured_arg 1) 2 (= slot₄/x (call core.getfield %₁ 1)) @@ -105,42 +78,33 @@ 8 slot₄/x 9 (call %₇ %₈) 10 (return %₉) -8 TestMod.#->##2 -9 (new %₈) -10 TestMod.iter -11 (call top.Filter %₉ %₁₀) -12 (call top.Generator top.identity %₁₁) -13 (return %₁₂) +10 TestMod.#->##2 +11 (new %₁₀) +12 TestMod.iter +13 (call top.Filter %₁₁ %₁₂) +14 (call top.Generator top.identity %₁₃) +15 (return %₁₄) ######################################## # Use of placeholders in iteration vars (1 for _ in xs) #--------------------- -1 --- thunk - 1 (global TestMod.#->##3) - 2 (call core.svec) - 3 (call core.svec) - 4 (call core.svec) - 5 (call core._structtype TestMod :#->##3 %₂ %₃ %₄ false 0) - 6 (call core._setsuper! %₅ core.Function) - 7 (const TestMod.#->##3) - 8 (= TestMod.#->##3 %₅) - 9 (call core.svec) - 10 (call core._typebody! %₅ %₉) - 11 (return core.nothing) -2 TestMod.#->##3 -3 (call core.svec %₂ core.Any) -4 (call core.svec) -5 SourceLocation::1:2 -6 (call core.svec %₃ %₄ %₅) -7 --- method core.nothing %₆ +1 (call core.svec) +2 (call core.svec) +3 (call JuliaLowering.eval_closure_type TestMod :#->##3 %₁ %₂) +4 TestMod.#->##3 +5 (call core.svec %₄ core.Any) +6 (call core.svec) +7 SourceLocation::1:2 +8 (call core.svec %₅ %₆ %₇) +9 --- method core.nothing %₈ slots: [slot₁/#self#(!read) slot₂/_(!read)] 1 (return 1) -8 TestMod.#->##3 -9 (new %₈) -10 TestMod.xs -11 (call top.Generator %₉ %₁₀) -12 (return %₁₁) +10 TestMod.#->##3 +11 (new %₁₀) +12 TestMod.xs +13 (call top.Generator %₁₁ %₁₂) +14 (return %₁₃) ######################################## # Error: Use of placeholders in body @@ -154,24 +118,15 @@ LoweringError: # 1D generator with destructuring (body for (x,_,y) in iter) #--------------------- -1 --- thunk - 1 (global TestMod.#->##5) - 2 (call core.svec) - 3 (call core.svec) - 4 (call core.svec) - 5 (call core._structtype TestMod :#->##5 %₂ %₃ %₄ false 0) - 6 (call core._setsuper! %₅ core.Function) - 7 (const TestMod.#->##5) - 8 (= TestMod.#->##5 %₅) - 9 (call core.svec) - 10 (call core._typebody! %₅ %₉) - 11 (return core.nothing) -2 TestMod.#->##5 -3 (call core.svec %₂ core.Any) -4 (call core.svec) -5 SourceLocation::1:2 -6 (call core.svec %₃ %₄ %₅) -7 --- method core.nothing %₆ +1 (call core.svec) +2 (call core.svec) +3 (call JuliaLowering.eval_closure_type TestMod :#->##5 %₁ %₂) +4 TestMod.#->##5 +5 (call core.svec %₄ core.Any) +6 (call core.svec) +7 SourceLocation::1:2 +8 (call core.svec %₅ %₆ %₇) +9 --- method core.nothing %₈ slots: [slot₁/#self#(!read) slot₂/destructured_arg slot₃/iterstate slot₄/x(!read) slot₅/y(!read)] 1 (call top.indexed_iterate slot₂/destructured_arg 1) 2 (= slot₄/x (call core.getfield %₁ 1)) @@ -185,42 +140,33 @@ LoweringError: 10 (= slot₅/y (call core.getfield %₉ 1)) 11 TestMod.body 12 (return %₁₁) -8 TestMod.#->##5 -9 (new %₈) -10 TestMod.iter -11 (call top.Generator %₉ %₁₀) -12 (return %₁₁) +10 TestMod.#->##5 +11 (new %₁₀) +12 TestMod.iter +13 (call top.Generator %₁₁ %₁₂) +14 (return %₁₃) ######################################## # return permitted in quoted syntax in generator (:(return x) for _ in iter) #--------------------- -1 --- thunk - 1 (global TestMod.#->##6) - 2 (call core.svec) - 3 (call core.svec) - 4 (call core.svec) - 5 (call core._structtype TestMod :#->##6 %₂ %₃ %₄ false 0) - 6 (call core._setsuper! %₅ core.Function) - 7 (const TestMod.#->##6) - 8 (= TestMod.#->##6 %₅) - 9 (call core.svec) - 10 (call core._typebody! %₅ %₉) - 11 (return core.nothing) -2 TestMod.#->##6 -3 (call core.svec %₂ core.Any) -4 (call core.svec) -5 SourceLocation::1:4 -6 (call core.svec %₃ %₄ %₅) -7 --- method core.nothing %₆ +1 (call core.svec) +2 (call core.svec) +3 (call JuliaLowering.eval_closure_type TestMod :#->##6 %₁ %₂) +4 TestMod.#->##6 +5 (call core.svec %₄ core.Any) +6 (call core.svec) +7 SourceLocation::1:4 +8 (call core.svec %₅ %₆ %₇) +9 --- method core.nothing %₈ slots: [slot₁/#self#(!read) slot₂/_(!read)] 1 (call JuliaLowering.interpolate_ast (inert (return x))) 2 (return %₁) -8 TestMod.#->##6 -9 (new %₈) -10 TestMod.iter -11 (call top.Generator %₉ %₁₀) -12 (return %₁₁) +10 TestMod.#->##6 +11 (new %₁₀) +12 TestMod.iter +13 (call top.Generator %₁₁ %₁₂) +14 (return %₁₃) ######################################## # Error: `return` not permitted in generator body @@ -234,47 +180,29 @@ LoweringError: # Nested case with duplicate iteration variables (x for x in 1:3 for x in 1:2) #--------------------- -1 --- thunk - 1 (global TestMod.#->##7) - 2 (call core.svec) - 3 (call core.svec) - 4 (call core.svec) - 5 (call core._structtype TestMod :#->##7 %₂ %₃ %₄ false 0) - 6 (call core._setsuper! %₅ core.Function) - 7 (const TestMod.#->##7) - 8 (= TestMod.#->##7 %₅) - 9 (call core.svec) - 10 (call core._typebody! %₅ %₉) - 11 (return core.nothing) -2 --- thunk - 1 (global TestMod.#->#->##0) - 2 (call core.svec) - 3 (call core.svec) - 4 (call core.svec) - 5 (call core._structtype TestMod :#->#->##0 %₂ %₃ %₄ false 0) - 6 (call core._setsuper! %₅ core.Function) - 7 (const TestMod.#->#->##0) - 8 (= TestMod.#->#->##0 %₅) - 9 (call core.svec) - 10 (call core._typebody! %₅ %₉) - 11 (return core.nothing) -3 TestMod.#->#->##0 -4 (call core.svec %₃ core.Any) +1 (call core.svec) +2 (call core.svec) +3 (call JuliaLowering.eval_closure_type TestMod :#->##7 %₁ %₂) +4 (call core.svec) 5 (call core.svec) -6 SourceLocation::1:2 -7 (call core.svec %₄ %₅ %₆) -8 --- method core.nothing %₇ +6 (call JuliaLowering.eval_closure_type TestMod :#->#->##0 %₄ %₅) +7 TestMod.#->#->##0 +8 (call core.svec %₇ core.Any) +9 (call core.svec) +10 SourceLocation::1:2 +11 (call core.svec %₈ %₉ %₁₀) +12 --- method core.nothing %₁₁ slots: [slot₁/#self#(!read) slot₂/x slot₃/x] 1 slot₂/x 2 (= slot₃/x %₁) 3 slot₃/x 4 (return %₃) -9 TestMod.#->##7 -10 (call core.svec %₉ core.Any) -11 (call core.svec) -12 SourceLocation::1:2 -13 (call core.svec %₁₀ %₁₁ %₁₂) -14 --- method core.nothing %₁₃ +13 TestMod.#->##7 +14 (call core.svec %₁₃ core.Any) +15 (call core.svec) +16 SourceLocation::1:2 +17 (call core.svec %₁₄ %₁₅ %₁₆) +18 --- method core.nothing %₁₇ slots: [slot₁/#self#(!read) slot₂/x(!read)] 1 TestMod.#->#->##0 2 (new %₁) @@ -282,13 +210,13 @@ LoweringError: 4 (call %₃ 1 2) 5 (call top.Generator %₂ %₄) 6 (return %₅) -15 TestMod.#->##7 -16 (new %₁₅) -17 TestMod.: -18 (call %₁₇ 1 3) -19 (call top.Generator %₁₆ %₁₈) -20 (call top.Flatten %₁₉) -21 (return %₂₀) +19 TestMod.#->##7 +20 (new %₁₉) +21 TestMod.: +22 (call %₂₁ 1 3) +23 (call top.Generator %₂₀ %₂₂) +24 (call top.Flatten %₂₃) +25 (return %₂₄) ######################################## # Comprehension lowers to generator with collect diff --git a/JuliaLowering/test/typedefs_ir.jl b/JuliaLowering/test/typedefs_ir.jl index c3b76fb0809ff..bd134e6292112 100644 --- a/JuliaLowering/test/typedefs_ir.jl +++ b/JuliaLowering/test/typedefs_ir.jl @@ -825,74 +825,65 @@ struct X X(a,b,c) = new(a) end #--------------------- -1 --- thunk - 1 (global TestMod.#f##0) - 2 (call core.svec) - 3 (call core.svec) - 4 (call core.svec) - 5 (call core._structtype TestMod :#f##0 %₂ %₃ %₄ false 0) - 6 (call core._setsuper! %₅ core.Function) - 7 (const TestMod.#f##0) - 8 (= TestMod.#f##0 %₅) - 9 (call core.svec) - 10 (call core._typebody! %₅ %₉) - 11 (return core.nothing) -2 TestMod.#f##0 -3 (call core.svec %₂) -4 (call core.svec) -5 SourceLocation::3:5 -6 (call core.svec %₃ %₄ %₅) -7 --- method core.nothing %₆ +1 (call core.svec) +2 (call core.svec) +3 (call JuliaLowering.eval_closure_type TestMod :#f##0 %₁ %₂) +4 TestMod.#f##0 +5 (call core.svec %₄) +6 (call core.svec) +7 SourceLocation::3:5 +8 (call core.svec %₅ %₆ %₇) +9 --- method core.nothing %₈ slots: [slot₁/#self#(!read)] 1 TestMod.X 2 (new %₁ 1) 3 (return %₂) -8 (newvar slot₂/f) -9 (global TestMod.X) -10 (const TestMod.X) -11 (call core.svec) -12 (call core.svec :x) +10 (newvar slot₂/f) +11 (global TestMod.X) +12 (const TestMod.X) 13 (call core.svec) -14 (call core._structtype TestMod :X %₁₁ %₁₂ %₁₃ false 1) -15 (= slot₁/X %₁₄) -16 (call core._setsuper! %₁₄ core.Any) -17 (isdefined TestMod.X) -18 (gotoifnot %₁₇ label₂₈) -19 TestMod.X -20 (call core._equiv_typedef %₁₉ %₁₄) -21 (gotoifnot %₂₀ label₂₅) -22 TestMod.X -23 (= slot₁/X %₂₂) -24 (goto label₂₇) -25 slot₁/X -26 (= TestMod.X %₂₅) -27 (goto label₃₀) -28 slot₁/X -29 (= TestMod.X %₂₈) +14 (call core.svec :x) +15 (call core.svec) +16 (call core._structtype TestMod :X %₁₃ %₁₄ %₁₅ false 1) +17 (= slot₁/X %₁₆) +18 (call core._setsuper! %₁₆ core.Any) +19 (isdefined TestMod.X) +20 (gotoifnot %₁₉ label₃₀) +21 TestMod.X +22 (call core._equiv_typedef %₂₁ %₁₆) +23 (gotoifnot %₂₂ label₂₇) +24 TestMod.X +25 (= slot₁/X %₂₄) +26 (goto label₂₉) +27 slot₁/X +28 (= TestMod.X %₂₇) +29 (goto label₃₂) 30 slot₁/X -31 (call core.svec core.Any) -32 (call core._typebody! %₃₀ %₃₁) -33 TestMod.#f##0 -34 (= slot₂/f (new %₃₃)) -35 slot₂/f -36 TestMod.X -37 (call core.apply_type core.Type %₃₆) -38 (call core.svec %₃₇ core.Any) -39 (call core.svec) -40 SourceLocation::5:5 -41 (call core.svec %₃₈ %₃₉ %₄₀) -42 --- method core.nothing %₄₁ +31 (= TestMod.X %₃₀) +32 slot₁/X +33 (call core.svec core.Any) +34 (call core._typebody! %₃₂ %₃₃) +35 TestMod.#f##0 +36 (new %₃₅) +37 (= slot₂/f %₃₆) +38 TestMod.X +39 (call core.apply_type core.Type %₃₈) +40 (call core.svec %₃₉ core.Any) +41 (call core.svec) +42 SourceLocation::5:5 +43 (call core.svec %₄₀ %₄₁ %₄₂) +44 --- method core.nothing %₄₃ slots: [slot₁/#ctor-self# slot₂/x] 1 slot₁/#ctor-self# 2 (new %₁ slot₂/x) 3 (return %₂) -43 TestMod.X -44 (call core.apply_type core.Type %₄₃) -45 (call core.svec %₄₄ core.Any core.Any) -46 (call core.svec) -47 SourceLocation::6:5 -48 (call core.svec %₄₅ %₄₆ %₄₇) -49 --- method core.nothing %₄₈ +45 TestMod.X +46 (call core.apply_type core.Type %₄₅) +47 (call core.svec %₄₆ core.Any core.Any) +48 (call core.svec) +49 SourceLocation::6:5 +50 (call core.svec %₄₇ %₄₈ %₄₉) +51 --- method core.nothing %₅₀ slots: [slot₁/#ctor-self# slot₂/y slot₃/z slot₄/tmp(!read)] 1 TestMod.ReallyXIPromise 2 slot₁/#ctor-self# @@ -908,21 +899,21 @@ end 12 (= slot₄/tmp (call core.typeassert %₁₁ %₁)) 13 slot₄/tmp 14 (return %₁₃) -50 TestMod.X -51 (call core.apply_type core.Type %₅₀) -52 (call core.svec %₅₁ core.Any core.Any core.Any) -53 (call core.svec) -54 SourceLocation::10:5 -55 (call core.svec %₅₂ %₅₃ %₅₄) -56 --- method core.nothing %₅₅ +52 TestMod.X +53 (call core.apply_type core.Type %₅₂) +54 (call core.svec %₅₃ core.Any core.Any core.Any) +55 (call core.svec) +56 SourceLocation::10:5 +57 (call core.svec %₅₄ %₅₅ %₅₆) +58 --- method core.nothing %₅₇ slots: [slot₁/#ctor-self# slot₂/a slot₃/b(!read) slot₄/c(!read)] 1 slot₁/#ctor-self# 2 (new %₁ slot₂/a) 3 (return %₂) -57 TestMod.X -58 (call core.apply_type core.Type %₅₇) -59 (call JuliaLowering.bind_docs! %₅₈ "Docs for X constructor\n" %₅₅) -60 (return core.nothing) +59 TestMod.X +60 (call core.apply_type core.Type %₅₉) +61 (call JuliaLowering.bind_docs! %₆₀ "Docs for X constructor\n" %₅₇) +62 (return core.nothing) ######################################## # User defined inner constructors and helper functions for structs with type params @@ -933,24 +924,15 @@ struct X{S,T} f() = new{A,B}(1) end #--------------------- -1 --- thunk - 1 (global TestMod.#f##1) - 2 (call core.svec) - 3 (call core.svec) - 4 (call core.svec) - 5 (call core._structtype TestMod :#f##1 %₂ %₃ %₄ false 0) - 6 (call core._setsuper! %₅ core.Function) - 7 (const TestMod.#f##1) - 8 (= TestMod.#f##1 %₅) - 9 (call core.svec) - 10 (call core._typebody! %₅ %₉) - 11 (return core.nothing) -2 TestMod.#f##1 -3 (call core.svec %₂) -4 (call core.svec) -5 SourceLocation::5:5 -6 (call core.svec %₃ %₄ %₅) -7 --- method core.nothing %₆ +1 (call core.svec) +2 (call core.svec) +3 (call JuliaLowering.eval_closure_type TestMod :#f##1 %₁ %₂) +4 TestMod.#f##1 +5 (call core.svec %₄) +6 (call core.svec) +7 SourceLocation::5:5 +8 (call core.svec %₅ %₆ %₇) +9 --- method core.nothing %₈ slots: [slot₁/#self#(!read)] 1 TestMod.X 2 TestMod.A @@ -958,81 +940,81 @@ end 4 (call core.apply_type %₁ %₂ %₃) 5 (new %₄ 1) 6 (return %₅) -8 (newvar slot₅/f) -9 (global TestMod.X) -10 (const TestMod.X) -11 (= slot₂/S (call core.TypeVar :S)) -12 (= slot₃/T (call core.TypeVar :T)) -13 slot₂/S -14 slot₃/T -15 (call core.svec %₁₃ %₁₄) -16 (call core.svec :x) -17 (call core.svec) -18 (call core._structtype TestMod :X %₁₅ %₁₆ %₁₇ false 1) -19 (= slot₄/X %₁₈) -20 (call core._setsuper! %₁₈ core.Any) -21 (isdefined TestMod.X) -22 (gotoifnot %₂₁ label₄₂) -23 TestMod.X -24 (call core._equiv_typedef %₂₃ %₁₈) -25 (gotoifnot %₂₄ label₃₉) -26 TestMod.X -27 (= slot₄/X %₂₆) +10 (newvar slot₅/f) +11 (global TestMod.X) +12 (const TestMod.X) +13 (= slot₂/S (call core.TypeVar :S)) +14 (= slot₃/T (call core.TypeVar :T)) +15 slot₂/S +16 slot₃/T +17 (call core.svec %₁₅ %₁₆) +18 (call core.svec :x) +19 (call core.svec) +20 (call core._structtype TestMod :X %₁₇ %₁₈ %₁₉ false 1) +21 (= slot₄/X %₂₀) +22 (call core._setsuper! %₂₀ core.Any) +23 (isdefined TestMod.X) +24 (gotoifnot %₂₃ label₄₄) +25 TestMod.X +26 (call core._equiv_typedef %₂₅ %₂₀) +27 (gotoifnot %₂₆ label₄₁) 28 TestMod.X -29 (call top.getproperty %₂₈ :body) -30 (call top.getproperty %₂₉ :body) -31 (call top.getproperty %₃₀ :parameters) -32 (call top.indexed_iterate %₃₁ 1) -33 (= slot₂/S (call core.getfield %₃₂ 1)) -34 (= slot₁/iterstate (call core.getfield %₃₂ 2)) -35 slot₁/iterstate -36 (call top.indexed_iterate %₃₁ 2 %₃₅) -37 (= slot₃/T (call core.getfield %₃₆ 1)) -38 (goto label₄₁) -39 slot₄/X -40 (= TestMod.X %₃₉) -41 (goto label₄₄) -42 slot₄/X -43 (= TestMod.X %₄₂) +29 (= slot₄/X %₂₈) +30 TestMod.X +31 (call top.getproperty %₃₀ :body) +32 (call top.getproperty %₃₁ :body) +33 (call top.getproperty %₃₂ :parameters) +34 (call top.indexed_iterate %₃₃ 1) +35 (= slot₂/S (call core.getfield %₃₄ 1)) +36 (= slot₁/iterstate (call core.getfield %₃₄ 2)) +37 slot₁/iterstate +38 (call top.indexed_iterate %₃₃ 2 %₃₇) +39 (= slot₃/T (call core.getfield %₃₈ 1)) +40 (goto label₄₃) +41 slot₄/X +42 (= TestMod.X %₄₁) +43 (goto label₄₆) 44 slot₄/X -45 (call core.svec core.Any) -46 (call core._typebody! %₄₄ %₄₅) -47 TestMod.X -48 TestMod.A -49 TestMod.B -50 (call core.apply_type %₄₇ %₄₈ %₄₉) -51 (call core.apply_type core.Type %₅₀) -52 (call core.svec %₅₁) -53 (call core.svec) -54 SourceLocation::3:5 -55 (call core.svec %₅₂ %₅₃ %₅₄) -56 --- method core.nothing %₅₅ +45 (= TestMod.X %₄₄) +46 slot₄/X +47 (call core.svec core.Any) +48 (call core._typebody! %₄₆ %₄₇) +49 TestMod.X +50 TestMod.A +51 TestMod.B +52 (call core.apply_type %₄₉ %₅₀ %₅₁) +53 (call core.apply_type core.Type %₅₂) +54 (call core.svec %₅₃) +55 (call core.svec) +56 SourceLocation::3:5 +57 (call core.svec %₅₄ %₅₅ %₅₆) +58 --- method core.nothing %₅₇ slots: [slot₁/#ctor-self#] 1 slot₁/#ctor-self# 2 (new %₁ 1) 3 (return %₂) -57 (= slot₆/U (call core.TypeVar :U)) -58 (= slot₇/V (call core.TypeVar :V)) -59 TestMod.X -60 slot₆/U -61 slot₇/V -62 (call core.apply_type %₅₉ %₆₀ %₆₁) -63 (call core.apply_type core.Type %₆₂) -64 (call core.svec %₆₃) -65 slot₆/U -66 slot₇/V -67 (call core.svec %₆₅ %₆₆) -68 SourceLocation::4:5 -69 (call core.svec %₆₄ %₆₇ %₆₈) -70 --- method core.nothing %₆₉ +59 (= slot₆/U (call core.TypeVar :U)) +60 (= slot₇/V (call core.TypeVar :V)) +61 TestMod.X +62 slot₆/U +63 slot₇/V +64 (call core.apply_type %₆₁ %₆₂ %₆₃) +65 (call core.apply_type core.Type %₆₄) +66 (call core.svec %₆₅) +67 slot₆/U +68 slot₇/V +69 (call core.svec %₆₇ %₆₈) +70 SourceLocation::4:5 +71 (call core.svec %₆₆ %₆₉ %₇₀) +72 --- method core.nothing %₇₁ slots: [slot₁/#ctor-self#] 1 slot₁/#ctor-self# 2 (new %₁ 1) 3 (return %₂) -71 TestMod.#f##1 -72 (= slot₅/f (new %₇₁)) -73 slot₅/f -74 (return core.nothing) +73 TestMod.#f##1 +74 (new %₇₃) +75 (= slot₅/f %₇₄) +76 (return core.nothing) ######################################## # new() calls with splats; `Any` fields From fde7c4d55c81f3c50e6de831222188caac660143 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Wed, 29 Jan 2025 12:44:58 +1000 Subject: [PATCH 0968/1109] Rename `K"unnecessary"` -> `K"removable"` For clarity in analogy to the terminology of `Base.@assume_effects :removable` --- JuliaLowering/src/desugaring.jl | 16 ++++++++-------- JuliaLowering/src/kinds.jl | 6 +++++- JuliaLowering/src/linear_ir.jl | 4 +--- 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index efab67e9c3854..2e23bc8b21b70 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -184,7 +184,7 @@ function tuple_to_assignments(ctx, ex) @ast ctx ex [K"block" stmts... end_stmts... - [K"unnecessary" [K"tuple" elements...]] + [K"removable" [K"tuple" elements...]] ] end @@ -378,7 +378,7 @@ function expand_property_destruct(ctx, ex) ] ])) end - push!(stmts, @ast ctx rhs1 [K"unnecessary" rhs1]) + push!(stmts, @ast ctx rhs1 [K"removable" rhs1]) makenode(ctx, ex, K"block", stmts) end @@ -419,7 +419,7 @@ function expand_tuple_destruct(ctx, ex) emit_assign_tmp(stmts, ctx, expand_forms_2(ctx, rhs)) end _destructure(ctx, ex, stmts, lhs, rhs1) - push!(stmts, @ast ctx rhs1 [K"unnecessary" rhs1]) + push!(stmts, @ast ctx rhs1 [K"removable" rhs1]) makenode(ctx, ex, K"block", stmts) end @@ -647,7 +647,7 @@ function expand_setindex(ctx, ex) rhs idxs... ]) - [K"unnecessary" rhs] + [K"removable" rhs] ] end @@ -1200,7 +1200,7 @@ function expand_assignment(ctx, ex) expand_forms_2(ctx, @ast ctx ex [K"block" stmts... - [K"unnecessary" rr] + [K"removable" rr] ] ) elseif is_identifier_like(lhs) @@ -1224,7 +1224,7 @@ function expand_assignment(ctx, ex) @ast ctx ex [K"block" stmts... [K"call" "setproperty!"::K"top" a b rhs] - [K"unnecessary" rhs] + [K"removable" rhs] ] elseif kl == K"tuple" if has_parameters(lhs) @@ -2167,7 +2167,7 @@ function method_def_expr(ctx, srcref, callex_srcref, method_table, ret_var # might be `nothing` and hence removed ] ] - [K"unnecessary" method_metadata] + [K"removable" method_metadata] ] end @@ -2660,7 +2660,7 @@ function expand_function_def(ctx, ex, docs, rewrite_call=identity, rewrite_body= ] ] ] - [K"unnecessary" + [K"removable" isnothing(bare_func_name) ? "nothing"::K"core" : bare_func_name ] ] diff --git a/JuliaLowering/src/kinds.jl b/JuliaLowering/src/kinds.jl index 94c5c943b9512..3b688f523752f 100644 --- a/JuliaLowering/src/kinds.jl +++ b/JuliaLowering/src/kinds.jl @@ -84,7 +84,11 @@ function _register_kinds() "label" "trycatchelse" "tryfinally" - "unnecessary" + # The contained block of code causes no side effects and can be + # removed by a later lowering pass if its value isn't used. + # (That is, it's removable in the same sense as + # `@assume_effects :removable`.) + "removable" "decl" # [K"captured_local" index] # A local variable captured into a global method. Contains the diff --git a/JuliaLowering/src/linear_ir.jl b/JuliaLowering/src/linear_ir.jl index 7dcea91e5d273..14c8cbe17f7c9 100644 --- a/JuliaLowering/src/linear_ir.jl +++ b/JuliaLowering/src/linear_ir.jl @@ -698,9 +698,7 @@ function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) elseif k == K"return" compile(ctx, ex[1], true, true) nothing - elseif k == K"unnecessary" - # `unnecessary` marks expressions generated by lowering that - # do not need to be evaluated if their value is unused. + elseif k == K"removable" if needs_value compile(ctx, ex[1], needs_value, in_tail_pos) else From fb93f6740e2d356775e93bbcef4322b663d44517 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Wed, 29 Jan 2025 14:32:08 +1000 Subject: [PATCH 0969/1109] Desugaring of keyword argument slurping --- JuliaLowering/src/ast.jl | 4 + JuliaLowering/src/desugaring.jl | 99 +++++++++++------- JuliaLowering/test/functions.jl | 16 +++ JuliaLowering/test/functions_ir.jl | 156 +++++++++++++++++++++++++++-- 4 files changed, 230 insertions(+), 45 deletions(-) diff --git a/JuliaLowering/src/ast.jl b/JuliaLowering/src/ast.jl index b00bd422dc74a..5e9544bd7d3a0 100644 --- a/JuliaLowering/src/ast.jl +++ b/JuliaLowering/src/ast.jl @@ -77,6 +77,10 @@ function _node_id(graph::SyntaxGraph, ex) # Fallback to give a comprehensible error message for use with the @ast macro error("Attempt to use `$(repr(ex))` of type `$(typeof(ex))` as an AST node. Try annotating with `::K\"your_intended_kind\"?`") end +function _node_id(graph::SyntaxGraph, ex::AbstractVector{<:SyntaxTree}) + # Fallback to give a comprehensible error message for use with the @ast macro + error("Attempt to use vector as an AST node. Did you mean to splat this? (content: `$(repr(ex))`)") +end _node_ids(graph::SyntaxGraph) = () _node_ids(graph::SyntaxGraph, ::Nothing, cs...) = _node_ids(graph, cs...) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 2e23bc8b21b70..51932e18433b5 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -2062,7 +2062,7 @@ end #------------------------------------------------------------------------------- # Expansion of function definitions -function expand_function_arg(ctx, body_stmts, arg, is_last_arg) +function expand_function_arg(ctx, body_stmts, arg, is_last_arg, is_kw) ex = arg if kind(ex) == K"=" @@ -2074,7 +2074,8 @@ function expand_function_arg(ctx, body_stmts, arg, is_last_arg) if kind(ex) == K"..." if !is_last_arg - throw(LoweringError(arg, "`...` may only be used for the last function argument")) + typmsg = is_kw ? "keyword" : "positional" + throw(LoweringError(arg, "`...` may only be used for the last $typmsg argument")) end @chk numchildren(ex) == 1 slurp_ex = ex @@ -2092,6 +2093,9 @@ function expand_function_arg(ctx, body_stmts, arg, is_last_arg) type = ex[2] ex = ex[1] end + if is_kw && !isnothing(slurp_ex) + throw(LoweringError(slurp_ex, "keyword argument with `...` may not be given a type")) + end else type = @ast ctx ex "Any"::K"core" end @@ -2100,10 +2104,7 @@ function expand_function_arg(ctx, body_stmts, arg, is_last_arg) end k = kind(ex) - if k == K"tuple" - if isnothing(body_stmts) - throw(LoweringError(ex, "Invalid keyword name")) - end + if k == K"tuple" && !is_kw # Argument destructuring is_nospecialize = getmeta(arg, :nospecialize, false) name = new_local_binding(ctx, ex, "destructured_arg"; @@ -2115,7 +2116,7 @@ function expand_function_arg(ctx, body_stmts, arg, is_last_arg) elseif k == K"Identifier" || k == K"Placeholder" name = ex else - throw(LoweringError(ex, "Invalid function argument")) + throw(LoweringError(ex, is_kw ? "Invalid keyword name" : "Invalid function argument")) end return (name, type, default, !isnothing(slurp_ex)) @@ -2249,6 +2250,7 @@ function optional_positional_defs!(ctx, method_stmts, srcref, callex, end end +# Generate body function and `Core.kwcall` overloads for functions taking keywords. function keyword_function_defs(ctx, srcref, callex_srcref, name_str, typevar_names, typevar_stmts, arg_names, arg_types, first_default, arg_defaults, keywords, body, ret_var) @@ -2283,11 +2285,23 @@ function keyword_function_defs(ctx, srcref, callex_srcref, name_str, kw_defaults = SyntaxList(ctx) kw_name_syms = SyntaxList(ctx) kw_val_vars = SyntaxList(ctx) + has_kw_slurp = false for (i,arg) in enumerate(children(keywords)) (aname, atype, default, is_slurp) = - expand_function_arg(ctx, nothing, arg, i == numchildren(keywords)) + expand_function_arg(ctx, nothing, arg, i == numchildren(keywords), true) name_sym = @ast ctx aname aname=>K"Symbol" - @assert !is_slurp # TODO + push!(body_arg_names, aname) + + if is_slurp + if !isnothing(default) + throw(LoweringError(arg, "keyword argument with `...` cannot have a default value")) + end + has_kw_slurp = true + push!(body_arg_types, @ast ctx arg [K"call" "pairs"::K"top" "NamedTuple"::K"core"]) + continue + end + + push!(body_arg_types, atype) if isnothing(default) default = @ast ctx arg [K"call" "throw"::K"core" @@ -2299,6 +2313,7 @@ function keyword_function_defs(ctx, srcref, callex_srcref, name_str, end kw_var = ssavar(ctx, arg, "kw_var") # <- TODO: Use `aname` here, if necessary push!(kw_val_vars, kw_var) + # Extract the value and check the type of each expected keyword argument push!(kwcall_body_stmts, @ast ctx arg [K"=" kw_var [K"if" @@ -2331,8 +2346,6 @@ function keyword_function_defs(ctx, srcref, callex_srcref, name_str, push!(kw_defaults, default) push!(kw_name_syms, name_sym) - push!(body_arg_names, aname) - push!(body_arg_types, atype) end append!(body_arg_names, arg_names) append!(body_arg_types, arg_types) @@ -2350,31 +2363,53 @@ function keyword_function_defs(ctx, srcref, callex_srcref, name_str, kwcall_arg_names, kwcall_arg_types, first_default, arg_defaults) end + # The "main kwcall overload" unpacks keywords and checks their consistency + # before dispatching to the user's code in the body method. kwcall_body = @ast ctx keywords [K"block" + # Unpack kws kwcall_body_stmts... - [K"if" - [K"call" - "isempty"::K"top" + if has_kw_slurp + # Slurp remaining keywords into last arg + remaining_kws := [K"call" + "pairs"::K"top" + if isempty(kw_name_syms) + kws_arg + else + [K"call" + "structdiff"::K"top" + kws_arg + [K"curly" + "NamedTuple"::K"core" + [K"tuple" kw_name_syms...] + ] + ] + end + ] + else + # Check that there's no unexpected keywords + [K"if" [K"call" - "diff_names"::K"top" - [K"call" "keys"::K"top" kws_arg] - [K"tuple" kw_name_syms...] + "isempty"::K"top" + [K"call" + "diff_names"::K"top" + [K"call" "keys"::K"top" kws_arg] + [K"tuple" kw_name_syms...] + ] ] - ] - "nothing"::K"core" - if true - [K"call" # Report unsupported kws + "nothing"::K"core" + [K"call" "kwerr"::K"top" kws_arg arg_names... ] - else - # TODO: kw slurping - end - ] + ] + end [K"call" body_func_name kw_val_vars... + if has_kw_slurp + remaining_kws + end arg_names... ] ] @@ -2541,11 +2576,11 @@ function expand_function_def(ctx, ex, docs, rewrite_call=identity, rewrite_body= end end body_stmts = SyntaxList(ctx) - first_default = 0 + first_default = 0 # index into arg_names/arg_types arg_defaults = SyntaxList(ctx) for (i,arg) in enumerate(args) (aname, atype, default, is_slurp) = expand_function_arg(ctx, body_stmts, arg, - i == length(args)) + i == length(args), false) push!(arg_names, aname) # TODO: Ideally, ensure side effects of evaluating arg_types only @@ -2568,7 +2603,7 @@ function expand_function_def(ctx, ex, docs, rewrite_call=identity, rewrite_body= # one with a [default value]($(first(arg_defaults))) # """ # - throw(LoweringError(args[first_default], "optional positional arguments must occur at end")) + throw(LoweringError(args[first_default-1], "optional positional arguments must occur at end")) end else if isempty(arg_defaults) @@ -2727,7 +2762,7 @@ function expand_opaque_closure(ctx, ex) is_va = false for (i, arg) in enumerate(children(args)) (aname, atype, default, is_slurp) = expand_function_arg(ctx, body_stmts, arg, - i == numchildren(args)) + i == numchildren(args), false) is_va |= is_slurp push!(arg_names, aname) push!(arg_types, atype) @@ -3150,12 +3185,6 @@ function default_outer_constructor(ctx, srcref, global_struct_name, ] end -function _new_call(ctx, ex, typevar_names, field_names, field_types) - if has_keywords(ex) - throw(LoweringError("")) - end -end - function _is_new_call(ex) kind(ex) == K"call" && ((kind(ex[1]) == K"Identifier" && ex[1].name_val == "new") || diff --git a/JuliaLowering/test/functions.jl b/JuliaLowering/test/functions.jl index 29ddd599a289d..b81b230880b07 100644 --- a/JuliaLowering/test/functions.jl +++ b/JuliaLowering/test/functions.jl @@ -274,6 +274,22 @@ end @test exc.args == ((; not_present=100), test_mod.f_kw_simple, 20, 1.0) end + # Slurping of keyword args + JuliaLowering.include_string(test_mod, """ + function f_kw_slurp_all(; kws...) + kws + end + """) + @test values(test_mod.f_kw_slurp_all(x = 1, y = 2)) === (x=1, y=2) + + # Slurping of keyword args + JuliaLowering.include_string(test_mod, """ + function f_kw_slurp_some(; x=1, y=2, kws...) + kws + end + """) + @test values(test_mod.f_kw_slurp_some(z=3, x = 1, y = 2, w=4)) === (z=3, w=4) + # Throwing of UndefKeywordError JuliaLowering.include_string(test_mod, """ function f_kw_no_default(; x) diff --git a/JuliaLowering/test/functions_ir.jl b/JuliaLowering/test/functions_ir.jl index 46c41c52d42a1..c24b5c5ba0c4f 100644 --- a/JuliaLowering/test/functions_ir.jl +++ b/JuliaLowering/test/functions_ir.jl @@ -121,7 +121,7 @@ end #--------------------- LoweringError: function f(xs..., y) -# └───┘ ── `...` may only be used for the last function argument +# └───┘ ── `...` may only be used for the last positional argument body end @@ -639,14 +639,14 @@ end 18 (return %₁₇) ######################################## -# Error: Default positional args after a slurp -function f(x=1, ys..., z=2) +# Error: Default positional args before non-default arg +function f(x=1, ys, z=2) ys end #--------------------- LoweringError: -function f(x=1, ys..., z=2) -# └────┘ ── `...` may only be used for the last function argument +function f(x=1, ys, z=2) +# └─┘ ── optional positional arguments must occur at end ys end @@ -1037,15 +1037,151 @@ end 67 TestMod.f_kw_simple 68 (return %₆₇) +######################################## +# Keyword slurping - simple forwarding of all kws +function f_kw_slurp_simple(; kws...) + kws +end +#--------------------- +1 (method TestMod.#f_kw_slurp_simple#0) +2 (method TestMod.f_kw_slurp_simple) +3 TestMod.#f_kw_slurp_simple#0 +4 (call core.Typeof %₃) +5 (call top.pairs core.NamedTuple) +6 TestMod.f_kw_slurp_simple +7 (call core.Typeof %₆) +8 (call core.svec %₄ %₅ %₇) +9 (call core.svec) +10 SourceLocation::1:10 +11 (call core.svec %₈ %₉ %₁₀) +12 --- method core.nothing %₁₁ + slots: [slot₁/#self#(!read) slot₂/kws slot₃/#self#(!read)] + 1 slot₂/kws + 2 (return %₁) +13 (call core.typeof core.kwcall) +14 TestMod.f_kw_slurp_simple +15 (call core.Typeof %₁₄) +16 (call core.svec %₁₃ core.NamedTuple %₁₅) +17 (call core.svec) +18 SourceLocation::1:10 +19 (call core.svec %₁₆ %₁₇ %₁₈) +20 --- method core.nothing %₁₉ + slots: [slot₁/#self#(!read) slot₂/kws slot₃/#self#] + 1 (call top.pairs slot₂/kws) + 2 TestMod.#f_kw_slurp_simple#0 + 3 (call %₂ %₁ slot₃/#self#) + 4 (return %₃) +21 TestMod.f_kw_slurp_simple +22 (call core.Typeof %₂₁) +23 (call core.svec %₂₂) +24 (call core.svec) +25 SourceLocation::1:10 +26 (call core.svec %₂₃ %₂₄ %₂₅) +27 --- method core.nothing %₂₆ + slots: [slot₁/#self#] + 1 TestMod.#f_kw_slurp_simple#0 + 2 (call %₁ slot₁/#self#) + 3 (return %₂) +28 TestMod.f_kw_slurp_simple +29 (return %₂₈) + +######################################## +# Keyword slurping +function f_kw_slurp(; x=x_default, kws...) + kws +end +#--------------------- +1 (method TestMod.#f_kw_slurp#0) +2 (method TestMod.f_kw_slurp) +3 TestMod.#f_kw_slurp#0 +4 (call core.Typeof %₃) +5 (call top.pairs core.NamedTuple) +6 TestMod.f_kw_slurp +7 (call core.Typeof %₆) +8 (call core.svec %₄ core.Any %₅ %₇) +9 (call core.svec) +10 SourceLocation::1:10 +11 (call core.svec %₈ %₉ %₁₀) +12 --- method core.nothing %₁₁ + slots: [slot₁/#self#(!read) slot₂/x(!read) slot₃/kws slot₄/#self#(!read)] + 1 slot₃/kws + 2 (return %₁) +13 (call core.typeof core.kwcall) +14 TestMod.f_kw_slurp +15 (call core.Typeof %₁₄) +16 (call core.svec %₁₃ core.NamedTuple %₁₅) +17 (call core.svec) +18 SourceLocation::1:10 +19 (call core.svec %₁₆ %₁₇ %₁₈) +20 --- method core.nothing %₁₉ + slots: [slot₁/#self#(!read) slot₂/kws slot₃/#self# slot₄/if_val(!read)] + 1 (call core.isdefined slot₂/kws :x) + 2 (gotoifnot %₁ label₆) + 3 (call core.getfield slot₂/kws :x) + 4 (= slot₄/if_val %₃) + 5 (goto label₈) + 6 TestMod.x_default + 7 (= slot₄/if_val %₆) + 8 slot₄/if_val + 9 (call core.tuple :x) + 10 (call core.apply_type core.NamedTuple %₉) + 11 (call top.structdiff slot₂/kws %₁₀) + 12 (call top.pairs %₁₁) + 13 TestMod.#f_kw_slurp#0 + 14 (call %₁₃ %₈ %₁₂ slot₃/#self#) + 15 (return %₁₄) +21 TestMod.f_kw_slurp +22 (call core.Typeof %₂₁) +23 (call core.svec %₂₂) +24 (call core.svec) +25 SourceLocation::1:10 +26 (call core.svec %₂₃ %₂₄ %₂₅) +27 --- method core.nothing %₂₆ + slots: [slot₁/#self#] + 1 TestMod.#f_kw_slurp#0 + 2 TestMod.x_default + 3 (call %₁ %₂ slot₁/#self#) + 4 (return %₃) +28 TestMod.f_kw_slurp +29 (return %₂₈) + ######################################## # Error: argument unpacking in keywords -function f_invalid_kw(; (x,y)=10) - (x, y) +function f_kw_destruct(; (x,y)=10) +end +#--------------------- +LoweringError: +function f_kw_destruct(; (x,y)=10) +# └───┘ ── Invalid keyword name +end + +######################################## +# Error: keyword slurping combined with a default +function f_kw_slurp_default(; kws...=def) +end +#--------------------- +LoweringError: +function f_kw_slurp_default(; kws...=def) +# └────────┘ ── keyword argument with `...` cannot have a default value +end + +######################################## +# Error: keyword slurping combined with type +function f_kw_slurp_type(; kws::T...) +end +#--------------------- +LoweringError: +function f_kw_slurp_type(; kws::T...) +# └───────┘ ── keyword argument with `...` may not be given a type +end + +######################################## +# Error: keyword slurping on non-final argument +function f_kw_slurp_not_last(; kws..., x=1) end #--------------------- LoweringError: -function f_invalid_kw(; (x,y)=10) -# └───┘ ── Invalid keyword name - (x, y) +function f_kw_slurp_not_last(; kws..., x=1) +# └────┘ ── `...` may only be used for the last keyword argument end From 47756de46e3a2fe7fed3668dab3ad61163981f4c Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Wed, 29 Jan 2025 15:23:25 +1000 Subject: [PATCH 0970/1109] Fix forwarding of slurped positional args in keyword-containing functions --- JuliaLowering/src/desugaring.jl | 35 ++++++++++++++++++++++----------- JuliaLowering/test/functions.jl | 9 +++++++++ 2 files changed, 33 insertions(+), 11 deletions(-) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 51932e18433b5..adca8f12baf35 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -2253,7 +2253,8 @@ end # Generate body function and `Core.kwcall` overloads for functions taking keywords. function keyword_function_defs(ctx, srcref, callex_srcref, name_str, typevar_names, typevar_stmts, arg_names, - arg_types, first_default, arg_defaults, keywords, body, ret_var) + arg_types, has_slurp, first_default, arg_defaults, + keywords, body, ret_var) mangled_name = let n = isnothing(name_str) ? "_" : name_str reserve_module_binding_i(ctx.mod, string(startswith(n, '#') ? "" : "#", n, "#")) end @@ -2363,6 +2364,14 @@ function keyword_function_defs(ctx, srcref, callex_srcref, name_str, kwcall_arg_names, kwcall_arg_types, first_default, arg_defaults) end + positional_forwarding_args = if has_slurp + a = copy(arg_names) + a[end] = @ast ctx a[end] [K"..." a[end]] + a + else + arg_names + end + # The "main kwcall overload" unpacks keywords and checks their consistency # before dispatching to the user's code in the body method. kwcall_body = @ast ctx keywords [K"block" @@ -2400,7 +2409,7 @@ function keyword_function_defs(ctx, srcref, callex_srcref, name_str, [K"call" "kwerr"::K"top" kws_arg - arg_names... + positional_forwarding_args... ] ] end @@ -2410,7 +2419,7 @@ function keyword_function_defs(ctx, srcref, callex_srcref, name_str, if has_kw_slurp remaining_kws end - arg_names... + positional_forwarding_args... ] ] @@ -2433,7 +2442,13 @@ function keyword_function_defs(ctx, srcref, callex_srcref, name_str, ] ] ] - body_func_name, kw_func_method_defs, kw_defaults + + body_for_positional_args_only = @ast ctx srcref [K"call" body_func_name + kw_defaults... + positional_forwarding_args... + ] + + body_func_name, kw_func_method_defs, body_for_positional_args_only end # Check valid identifier/function names @@ -2576,11 +2591,13 @@ function expand_function_def(ctx, ex, docs, rewrite_call=identity, rewrite_body= end end body_stmts = SyntaxList(ctx) + has_slurp = false first_default = 0 # index into arg_names/arg_types arg_defaults = SyntaxList(ctx) for (i,arg) in enumerate(args) (aname, atype, default, is_slurp) = expand_function_arg(ctx, body_stmts, arg, i == length(args), false) + has_slurp |= is_slurp push!(arg_names, aname) # TODO: Ideally, ensure side effects of evaluating arg_types only @@ -2632,15 +2649,11 @@ function expand_function_def(ctx, ex, docs, rewrite_call=identity, rewrite_body= if isnothing(keywords) body_func_name, kw_func_method_defs = (nothing, nothing) else - body_func_name, kw_func_method_defs, kw_defaults = + # Rewrite `body` here so that the positional-only versions dispatch there. + body_func_name, kw_func_method_defs, body = keyword_function_defs(ctx, ex, callex, name_str, typevar_names, typevar_stmts, - arg_names, arg_types, first_default, arg_defaults, + arg_names, arg_types, has_slurp, first_default, arg_defaults, keywords, body, ret_var) - # The non-kw function dispatches to the body method - body = @ast ctx ex [K"call" body_func_name - kw_defaults... - arg_names... - ] # ret_var is used only in the body method ret_var = nothing end diff --git a/JuliaLowering/test/functions.jl b/JuliaLowering/test/functions.jl index b81b230880b07..7de920051e8e6 100644 --- a/JuliaLowering/test/functions.jl +++ b/JuliaLowering/test/functions.jl @@ -274,6 +274,15 @@ end @test exc.args == ((; not_present=100), test_mod.f_kw_simple, 20, 1.0) end + # Slurping of positional args with keywords + JuliaLowering.include_string(test_mod, """ + function f_pos_slurp_with_kws(z, args...; x=1,y=2) + args + end + """) + @test test_mod.f_pos_slurp_with_kws(3, 2, 1; x = 100) === (2,1) + @test test_mod.f_pos_slurp_with_kws(3, 2, 1) === (2,1) + # Slurping of keyword args JuliaLowering.include_string(test_mod, """ function f_kw_slurp_all(; kws...) From 73257f5dbbf64a7a33f1ba1b4f28bb1e6b7e986c Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Wed, 29 Jan 2025 16:15:13 +1000 Subject: [PATCH 0971/1109] Fix slurped keyword default when no kws specified Also reintroduce a performance hack used to reduce compile time with many keyword arguments. --- JuliaLowering/src/desugaring.jl | 45 +++++++++++++++++++----------- JuliaLowering/test/closures_ir.jl | 12 ++++---- JuliaLowering/test/functions.jl | 3 ++ JuliaLowering/test/functions_ir.jl | 34 ++++++++++++---------- 4 files changed, 57 insertions(+), 37 deletions(-) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index adca8f12baf35..5283ed871a812 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -2265,7 +2265,6 @@ function keyword_function_defs(ctx, srcref, callex_srcref, name_str, kwcall_arg_names = SyntaxList(ctx) kwcall_arg_types = SyntaxList(ctx) - kwcall_body_stmts = SyntaxList(ctx) push!(kwcall_arg_names, new_local_binding(ctx, callex_srcref, "#self#"; kind=:argument)) push!(kwcall_arg_types, @@ -2283,10 +2282,11 @@ function keyword_function_defs(ctx, srcref, callex_srcref, name_str, push!(body_arg_names, new_local_binding(ctx, body_func_name, "#self#"; kind=:argument)) push!(body_arg_types, @ast ctx body_func_name [K"function_type" body_func_name]) + kw_values = SyntaxList(ctx) kw_defaults = SyntaxList(ctx) kw_name_syms = SyntaxList(ctx) - kw_val_vars = SyntaxList(ctx) has_kw_slurp = false + kwtmp = new_local_binding(ctx, keywords, "kwtmp") for (i,arg) in enumerate(children(keywords)) (aname, atype, default, is_slurp) = expand_function_arg(ctx, nothing, arg, i == numchildren(keywords), true) @@ -2299,10 +2299,12 @@ function keyword_function_defs(ctx, srcref, callex_srcref, name_str, end has_kw_slurp = true push!(body_arg_types, @ast ctx arg [K"call" "pairs"::K"top" "NamedTuple"::K"core"]) + push!(kw_defaults, @ast ctx arg [K"call" "pairs"::K"top" [K"call" "NamedTuple"::K"core"]]) continue + else + push!(body_arg_types, atype) end - push!(body_arg_types, atype) if isnothing(default) default = @ast ctx arg [K"call" "throw"::K"core" @@ -2312,11 +2314,10 @@ function keyword_function_defs(ctx, srcref, callex_srcref, name_str, ] ] end - kw_var = ssavar(ctx, arg, "kw_var") # <- TODO: Use `aname` here, if necessary - push!(kw_val_vars, kw_var) - # Extract the value and check the type of each expected keyword argument - push!(kwcall_body_stmts, @ast ctx arg [K"=" - kw_var + push!(kw_defaults, default) + + # Extract the keyword argument value and check the type + push!(kw_values, @ast ctx arg [K"block" [K"if" [K"call" "isdefined"::K"core" kws_arg name_sym] [K"block" @@ -2339,13 +2340,17 @@ function keyword_function_defs(ctx, srcref, callex_srcref, name_str, ] ] end - kwval + # Compiler performance hack: we reuse the kwtmp slot in all + # keyword if blocks rather than using the if block in value + # position. This cuts down on the number of slots required + # https://github.com/JuliaLang/julia/pull/44333 + [K"=" kwtmp kwval] ] - default + [K"=" kwtmp default] ] + kwtmp ]) - push!(kw_defaults, default) push!(kw_name_syms, name_sym) end append!(body_arg_names, arg_names) @@ -2357,9 +2362,9 @@ function keyword_function_defs(ctx, srcref, callex_srcref, name_str, kwcall_mtable = @ast(ctx, srcref, "nothing"::K"core") - kwcall_defs = SyntaxList(ctx) + kwcall_method_defs = SyntaxList(ctx) if !isempty(arg_defaults) - optional_positional_defs!(ctx, kwcall_defs, srcref, callex_srcref, + optional_positional_defs!(ctx, kwcall_method_defs, srcref, callex_srcref, kwcall_mtable, typevar_names, typevar_stmts, kwcall_arg_names, kwcall_arg_types, first_default, arg_defaults) end @@ -2372,11 +2377,18 @@ function keyword_function_defs(ctx, srcref, callex_srcref, name_str, arg_names end + kw_val_vars = SyntaxList(ctx) + kw_val_stmts = SyntaxList(ctx) + for val in kw_values + v = emit_assign_tmp(kw_val_stmts, ctx, val, "kwval") + push!(kw_val_vars, v) + end + # The "main kwcall overload" unpacks keywords and checks their consistency # before dispatching to the user's code in the body method. kwcall_body = @ast ctx keywords [K"block" # Unpack kws - kwcall_body_stmts... + kw_val_stmts... if has_kw_slurp # Slurp remaining keywords into last arg remaining_kws := [K"call" @@ -2423,7 +2435,7 @@ function keyword_function_defs(ctx, srcref, callex_srcref, name_str, ] ] - push!(kwcall_defs, + push!(kwcall_method_defs, method_def_expr(ctx, srcref, callex_srcref, kwcall_mtable, typevar_names, kwcall_arg_names, kwcall_arg_types, kwcall_body)) @@ -2438,11 +2450,12 @@ function keyword_function_defs(ctx, srcref, callex_srcref, name_str, [K"method_defs" "nothing"::K"core" [K"block" - kwcall_defs... + kwcall_method_defs... ] ] ] + # Body for call with no keywords body_for_positional_args_only = @ast ctx srcref [K"call" body_func_name kw_defaults... positional_forwarding_args... diff --git a/JuliaLowering/test/closures_ir.jl b/JuliaLowering/test/closures_ir.jl index a0ea8fe9ed0e0..85f1043a54c85 100644 --- a/JuliaLowering/test/closures_ir.jl +++ b/JuliaLowering/test/closures_ir.jl @@ -648,7 +648,7 @@ end 39 SourceLocation::2:14 40 (call core.svec %₃₇ %₃₈ %₃₉) 41 --- code_info - slots: [slot₁/#self#(!read) slot₂/kws slot₃/#self# slot₄/#f_kw_closure#0(!read) slot₅/if_val(!read)] + slots: [slot₁/#self#(!read) slot₂/kws slot₃/#self# slot₄/kwtmp slot₅/#f_kw_closure#0(!read)] 1 (call core.isdefined slot₂/kws :x) 2 (gotoifnot %₁ label₁₃) 3 (call core.getfield slot₂/kws :x) @@ -659,11 +659,11 @@ end 8 TestMod.X 9 (new core.TypeError :keyword argument :x %₈ %₃) 10 (call core.throw %₉) - 11 (= slot₅/if_val %₃) + 11 (= slot₄/kwtmp %₃) 12 (goto label₁₅) 13 TestMod.x_default - 14 (= slot₅/if_val %₁₃) - 15 slot₅/if_val + 14 (= slot₄/kwtmp %₁₃) + 15 slot₄/kwtmp 16 (call top.keys slot₂/kws) 17 (call core.tuple :x) 18 (call top.diff_names %₁₆ %₁₇) @@ -675,8 +675,8 @@ end 24 (call core.isdefined %₂₃ :contents) 25 (gotoifnot %₂₄ label₂₇) 26 (goto label₂₉) - 27 (newvar slot₄/#f_kw_closure#0) - 28 slot₄/#f_kw_closure#0 + 27 (newvar slot₅/#f_kw_closure#0) + 28 slot₅/#f_kw_closure#0 29 (call core.getfield %₂₃ :contents) 30 (call %₂₉ %₁₅ slot₃/#self#) 31 (return %₃₀) diff --git a/JuliaLowering/test/functions.jl b/JuliaLowering/test/functions.jl index 7de920051e8e6..8ceb784caab58 100644 --- a/JuliaLowering/test/functions.jl +++ b/JuliaLowering/test/functions.jl @@ -290,6 +290,7 @@ end end """) @test values(test_mod.f_kw_slurp_all(x = 1, y = 2)) === (x=1, y=2) + @test values(test_mod.f_kw_slurp_all()) === (;) # Slurping of keyword args JuliaLowering.include_string(test_mod, """ @@ -298,6 +299,8 @@ end end """) @test values(test_mod.f_kw_slurp_some(z=3, x = 1, y = 2, w=4)) === (z=3, w=4) + @test values(test_mod.f_kw_slurp_some(x = 1)) === (;) + @test values(test_mod.f_kw_slurp_some()) === (;) # Throwing of UndefKeywordError JuliaLowering.include_string(test_mod, """ diff --git a/JuliaLowering/test/functions_ir.jl b/JuliaLowering/test/functions_ir.jl index c24b5c5ba0c4f..1aab73df3b456 100644 --- a/JuliaLowering/test/functions_ir.jl +++ b/JuliaLowering/test/functions_ir.jl @@ -961,7 +961,7 @@ end 40 SourceLocation::1:10 41 (call core.svec %₃₈ %₃₉ %₄₀) 42 --- method core.nothing %₄₁ - slots: [slot₁/#self#(!read) slot₂/kws slot₃/#self# slot₄/a slot₅/b slot₆/if_val(!read) slot₇/if_val(!read)] + slots: [slot₁/#self#(!read) slot₂/kws slot₃/#self# slot₄/a slot₅/b slot₆/kwtmp] 1 (call core.isdefined slot₂/kws :x) 2 (gotoifnot %₁ label₁₃) 3 (call core.getfield slot₂/kws :x) @@ -972,10 +972,10 @@ end 8 TestMod.Char 9 (new core.TypeError :keyword argument :x %₈ %₃) 10 (call core.throw %₉) - 11 (= slot₆/if_val %₃) + 11 (= slot₆/kwtmp %₃) 12 (goto label₁₄) - 13 (= slot₆/if_val 'a') - 14 slot₆/if_val + 13 (= slot₆/kwtmp 'a') + 14 slot₆/kwtmp 15 (call core.isdefined slot₂/kws :y) 16 (gotoifnot %₁₅ label₂₇) 17 (call core.getfield slot₂/kws :y) @@ -986,10 +986,10 @@ end 22 TestMod.Bool 23 (new core.TypeError :keyword argument :y %₂₂ %₁₇) 24 (call core.throw %₂₃) - 25 (= slot₇/if_val %₁₇) + 25 (= slot₆/kwtmp %₁₇) 26 (goto label₂₈) - 27 (= slot₇/if_val true) - 28 slot₇/if_val + 27 (= slot₆/kwtmp true) + 28 slot₆/kwtmp 29 (call top.keys slot₂/kws) 30 (call core.tuple :x :y) 31 (call top.diff_names %₂₉ %₃₀) @@ -1080,8 +1080,10 @@ end 27 --- method core.nothing %₂₆ slots: [slot₁/#self#] 1 TestMod.#f_kw_slurp_simple#0 - 2 (call %₁ slot₁/#self#) - 3 (return %₂) + 2 (call core.NamedTuple) + 3 (call top.pairs %₂) + 4 (call %₁ %₃ slot₁/#self#) + 5 (return %₄) 28 TestMod.f_kw_slurp_simple 29 (return %₂₈) @@ -1114,15 +1116,15 @@ end 18 SourceLocation::1:10 19 (call core.svec %₁₆ %₁₇ %₁₈) 20 --- method core.nothing %₁₉ - slots: [slot₁/#self#(!read) slot₂/kws slot₃/#self# slot₄/if_val(!read)] + slots: [slot₁/#self#(!read) slot₂/kws slot₃/#self# slot₄/kwtmp] 1 (call core.isdefined slot₂/kws :x) 2 (gotoifnot %₁ label₆) 3 (call core.getfield slot₂/kws :x) - 4 (= slot₄/if_val %₃) + 4 (= slot₄/kwtmp %₃) 5 (goto label₈) 6 TestMod.x_default - 7 (= slot₄/if_val %₆) - 8 slot₄/if_val + 7 (= slot₄/kwtmp %₆) + 8 slot₄/kwtmp 9 (call core.tuple :x) 10 (call core.apply_type core.NamedTuple %₉) 11 (call top.structdiff slot₂/kws %₁₀) @@ -1140,8 +1142,10 @@ end slots: [slot₁/#self#] 1 TestMod.#f_kw_slurp#0 2 TestMod.x_default - 3 (call %₁ %₂ slot₁/#self#) - 4 (return %₃) + 3 (call core.NamedTuple) + 4 (call top.pairs %₃) + 5 (call %₁ %₂ %₄ slot₁/#self#) + 6 (return %₅) 28 TestMod.f_kw_slurp 29 (return %₂₈) From aa27522422b27b6b7843adf83ad8b9b2d4972786 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Wed, 29 Jan 2025 19:10:23 +1000 Subject: [PATCH 0972/1109] Support keyword argument defaults which depend on other keywords --- JuliaLowering/src/desugaring.jl | 68 +++++++++++++++++++++++++-------- JuliaLowering/test/functions.jl | 13 +++++++ 2 files changed, 66 insertions(+), 15 deletions(-) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 5283ed871a812..2d75c246c385b 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -2250,6 +2250,15 @@ function optional_positional_defs!(ctx, method_stmts, srcref, callex, end end +function scope_nest(ctx, names, values, body) + for (name, value) in Iterators.reverse(zip(names, values)) + body = @ast ctx name [K"let" [K"block" [K"=" name value]] + body + ] + end + body +end + # Generate body function and `Core.kwcall` overloads for functions taking keywords. function keyword_function_defs(ctx, srcref, callex_srcref, name_str, typevar_names, typevar_stmts, arg_names, @@ -2284,12 +2293,14 @@ function keyword_function_defs(ctx, srcref, callex_srcref, name_str, kw_values = SyntaxList(ctx) kw_defaults = SyntaxList(ctx) + kw_names = SyntaxList(ctx) kw_name_syms = SyntaxList(ctx) has_kw_slurp = false kwtmp = new_local_binding(ctx, keywords, "kwtmp") for (i,arg) in enumerate(children(keywords)) (aname, atype, default, is_slurp) = expand_function_arg(ctx, nothing, arg, i == numchildren(keywords), true) + push!(kw_names, aname) name_sym = @ast ctx aname aname=>K"Symbol" push!(body_arg_names, aname) @@ -2364,6 +2375,8 @@ function keyword_function_defs(ctx, srcref, callex_srcref, name_str, kwcall_method_defs = SyntaxList(ctx) if !isempty(arg_defaults) + # Construct kwcall overloads which forward default positional args on + # to the main kwcall overload. optional_positional_defs!(ctx, kwcall_method_defs, srcref, callex_srcref, kwcall_mtable, typevar_names, typevar_stmts, kwcall_arg_names, kwcall_arg_types, first_default, arg_defaults) @@ -2377,18 +2390,26 @@ function keyword_function_defs(ctx, srcref, callex_srcref, name_str, arg_names end - kw_val_vars = SyntaxList(ctx) - kw_val_stmts = SyntaxList(ctx) - for val in kw_values - v = emit_assign_tmp(kw_val_stmts, ctx, val, "kwval") - push!(kw_val_vars, v) + #-------------------------------------------------- + # Construct the "main kwcall overload" which unpacks keywords and checks + # their consistency before dispatching to the user's code in the body + # method. + defaults_depend_on_kw_names = any(val->contains_identifier(val, kw_names), kw_defaults) + defaults_have_assign = any(val->contains_unquoted(e->kind(e) == K"=", val), kw_defaults) + use_ssa_kw_temps = !defaults_depend_on_kw_names && !defaults_have_assign + + if use_ssa_kw_temps + kw_val_stmts = SyntaxList(ctx) + kw_val_vars = SyntaxList(ctx) + for val in kw_values + v = emit_assign_tmp(kw_val_stmts, ctx, val, "kwval") + push!(kw_val_vars, v) + end + else + kw_val_vars = kw_names end - # The "main kwcall overload" unpacks keywords and checks their consistency - # before dispatching to the user's code in the body method. - kwcall_body = @ast ctx keywords [K"block" - # Unpack kws - kw_val_stmts... + kwcall_body_tail = @ast ctx keywords [K"block" if has_kw_slurp # Slurp remaining keywords into last arg remaining_kws := [K"call" @@ -2434,7 +2455,14 @@ function keyword_function_defs(ctx, srcref, callex_srcref, name_str, positional_forwarding_args... ] ] - + kwcall_body = if use_ssa_kw_temps + @ast ctx keywords [K"block" + kw_val_stmts... + kwcall_body_tail + ] + else + scope_nest(ctx, kw_names, kw_values, kwcall_body_tail) + end push!(kwcall_method_defs, method_def_expr(ctx, srcref, callex_srcref, kwcall_mtable, typevar_names, kwcall_arg_names, kwcall_arg_types, kwcall_body)) @@ -2455,11 +2483,21 @@ function keyword_function_defs(ctx, srcref, callex_srcref, name_str, ] ] + #-------------------------------------------------- # Body for call with no keywords - body_for_positional_args_only = @ast ctx srcref [K"call" body_func_name - kw_defaults... - positional_forwarding_args... - ] + body_for_positional_args_only = if defaults_depend_on_kw_names + scope_nest(ctx, kw_names, kw_defaults, + @ast ctx srcref [K"call" body_func_name + kw_names... + positional_forwarding_args... + ] + ) + else + @ast ctx srcref [K"call" body_func_name + kw_defaults... + positional_forwarding_args... + ] + end body_func_name, kw_func_method_defs, body_for_positional_args_only end diff --git a/JuliaLowering/test/functions.jl b/JuliaLowering/test/functions.jl index 8ceb784caab58..9836b4cde8193 100644 --- a/JuliaLowering/test/functions.jl +++ b/JuliaLowering/test/functions.jl @@ -302,6 +302,19 @@ end @test values(test_mod.f_kw_slurp_some(x = 1)) === (;) @test values(test_mod.f_kw_slurp_some()) === (;) + # Keyword defaults which depend on other keywords. + JuliaLowering.include_string(test_mod, """ + begin + aaa = :outer + function f_kw_default_dependencies(; x=1, y=x, bbb=aaa, aaa=:aaa_kw, ccc=aaa) + (x, y, bbb, aaa, ccc) + end + end + """) + @test values(test_mod.f_kw_default_dependencies()) === (1, 1, :outer, :aaa_kw, :aaa_kw) + @test values(test_mod.f_kw_default_dependencies(x = 10)) === (10, 10, :outer, :aaa_kw, :aaa_kw) + @test values(test_mod.f_kw_default_dependencies(x = 10, aaa=:blah)) === (10, 10, :outer, :blah, :blah) + # Throwing of UndefKeywordError JuliaLowering.include_string(test_mod, """ function f_kw_no_default(; x) From 45b0678e2e82bdadd360158cf059f102a0fe7e30 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Thu, 30 Jan 2025 08:06:47 +1000 Subject: [PATCH 0973/1109] Add slots for keyword names for reflection --- JuliaLowering/src/desugaring.jl | 5 + JuliaLowering/test/functions_ir.jl | 141 +++++++++++++++-------------- 2 files changed, 78 insertions(+), 68 deletions(-) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 2d75c246c385b..d5a97ecefe474 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -2400,6 +2400,11 @@ function keyword_function_defs(ctx, srcref, callex_srcref, name_str, if use_ssa_kw_temps kw_val_stmts = SyntaxList(ctx) + for n in kw_names + # If not using slots for the keyword argument values, still declare + # them for reflection purposes. + push!(kw_val_stmts, @ast ctx n [K"local" n]) + end kw_val_vars = SyntaxList(ctx) for val in kw_values v = emit_assign_tmp(kw_val_stmts, ctx, val, "kwval") diff --git a/JuliaLowering/test/functions_ir.jl b/JuliaLowering/test/functions_ir.jl index 1aab73df3b456..1031fcb6b299b 100644 --- a/JuliaLowering/test/functions_ir.jl +++ b/JuliaLowering/test/functions_ir.jl @@ -961,45 +961,47 @@ end 40 SourceLocation::1:10 41 (call core.svec %₃₈ %₃₉ %₄₀) 42 --- method core.nothing %₄₁ - slots: [slot₁/#self#(!read) slot₂/kws slot₃/#self# slot₄/a slot₅/b slot₆/kwtmp] - 1 (call core.isdefined slot₂/kws :x) - 2 (gotoifnot %₁ label₁₃) - 3 (call core.getfield slot₂/kws :x) - 4 TestMod.Char - 5 (call core.isa %₃ %₄) - 6 (gotoifnot %₅ label₈) - 7 (goto label₁₁) - 8 TestMod.Char - 9 (new core.TypeError :keyword argument :x %₈ %₃) - 10 (call core.throw %₉) - 11 (= slot₆/kwtmp %₃) - 12 (goto label₁₄) - 13 (= slot₆/kwtmp 'a') - 14 slot₆/kwtmp - 15 (call core.isdefined slot₂/kws :y) - 16 (gotoifnot %₁₅ label₂₇) - 17 (call core.getfield slot₂/kws :y) - 18 TestMod.Bool - 19 (call core.isa %₁₇ %₁₈) - 20 (gotoifnot %₁₉ label₂₂) - 21 (goto label₂₅) - 22 TestMod.Bool - 23 (new core.TypeError :keyword argument :y %₂₂ %₁₇) - 24 (call core.throw %₂₃) - 25 (= slot₆/kwtmp %₁₇) - 26 (goto label₂₈) - 27 (= slot₆/kwtmp true) - 28 slot₆/kwtmp - 29 (call top.keys slot₂/kws) - 30 (call core.tuple :x :y) - 31 (call top.diff_names %₂₉ %₃₀) - 32 (call top.isempty %₃₁) - 33 (gotoifnot %₃₂ label₃₅) - 34 (goto label₃₆) - 35 (call top.kwerr slot₂/kws slot₃/#self# slot₄/a slot₅/b) - 36 TestMod.#f_kw_simple#0 - 37 (call %₃₆ %₁₄ %₂₈ slot₃/#self# slot₄/a slot₅/b) - 38 (return %₃₇) + slots: [slot₁/#self#(!read) slot₂/kws slot₃/#self# slot₄/a slot₅/b slot₆/kwtmp slot₇/x(!read) slot₈/y(!read)] + 1 (newvar slot₇/x) + 2 (newvar slot₈/y) + 3 (call core.isdefined slot₂/kws :x) + 4 (gotoifnot %₃ label₁₅) + 5 (call core.getfield slot₂/kws :x) + 6 TestMod.Char + 7 (call core.isa %₅ %₆) + 8 (gotoifnot %₇ label₁₀) + 9 (goto label₁₃) + 10 TestMod.Char + 11 (new core.TypeError :keyword argument :x %₁₀ %₅) + 12 (call core.throw %₁₁) + 13 (= slot₆/kwtmp %₅) + 14 (goto label₁₆) + 15 (= slot₆/kwtmp 'a') + 16 slot₆/kwtmp + 17 (call core.isdefined slot₂/kws :y) + 18 (gotoifnot %₁₇ label₂₉) + 19 (call core.getfield slot₂/kws :y) + 20 TestMod.Bool + 21 (call core.isa %₁₉ %₂₀) + 22 (gotoifnot %₂₁ label₂₄) + 23 (goto label₂₇) + 24 TestMod.Bool + 25 (new core.TypeError :keyword argument :y %₂₄ %₁₉) + 26 (call core.throw %₂₅) + 27 (= slot₆/kwtmp %₁₉) + 28 (goto label₃₀) + 29 (= slot₆/kwtmp true) + 30 slot₆/kwtmp + 31 (call top.keys slot₂/kws) + 32 (call core.tuple :x :y) + 33 (call top.diff_names %₃₁ %₃₂) + 34 (call top.isempty %₃₃) + 35 (gotoifnot %₃₄ label₃₇) + 36 (goto label₃₈) + 37 (call top.kwerr slot₂/kws slot₃/#self# slot₄/a slot₅/b) + 38 TestMod.#f_kw_simple#0 + 39 (call %₃₈ %₁₆ %₃₀ slot₃/#self# slot₄/a slot₅/b) + 40 (return %₃₉) 43 TestMod.f_kw_simple 44 (call core.Typeof %₄₃) 45 (call core.svec %₄₄) @@ -1039,8 +1041,8 @@ end ######################################## # Keyword slurping - simple forwarding of all kws -function f_kw_slurp_simple(; kws...) - kws +function f_kw_slurp_simple(; all_kws...) + all_kws end #--------------------- 1 (method TestMod.#f_kw_slurp_simple#0) @@ -1055,8 +1057,8 @@ end 10 SourceLocation::1:10 11 (call core.svec %₈ %₉ %₁₀) 12 --- method core.nothing %₁₁ - slots: [slot₁/#self#(!read) slot₂/kws slot₃/#self#(!read)] - 1 slot₂/kws + slots: [slot₁/#self#(!read) slot₂/all_kws slot₃/#self#(!read)] + 1 slot₂/all_kws 2 (return %₁) 13 (call core.typeof core.kwcall) 14 TestMod.f_kw_slurp_simple @@ -1066,11 +1068,12 @@ end 18 SourceLocation::1:10 19 (call core.svec %₁₆ %₁₇ %₁₈) 20 --- method core.nothing %₁₉ - slots: [slot₁/#self#(!read) slot₂/kws slot₃/#self#] - 1 (call top.pairs slot₂/kws) - 2 TestMod.#f_kw_slurp_simple#0 - 3 (call %₂ %₁ slot₃/#self#) - 4 (return %₃) + slots: [slot₁/#self#(!read) slot₂/kws slot₃/#self# slot₄/all_kws(!read)] + 1 (newvar slot₄/all_kws) + 2 (call top.pairs slot₂/kws) + 3 TestMod.#f_kw_slurp_simple#0 + 4 (call %₃ %₂ slot₃/#self#) + 5 (return %₄) 21 TestMod.f_kw_slurp_simple 22 (call core.Typeof %₂₁) 23 (call core.svec %₂₂) @@ -1089,8 +1092,8 @@ end ######################################## # Keyword slurping -function f_kw_slurp(; x=x_default, kws...) - kws +function f_kw_slurp(; x=x_default, non_x_kws...) + all_kws end #--------------------- 1 (method TestMod.#f_kw_slurp#0) @@ -1105,8 +1108,8 @@ end 10 SourceLocation::1:10 11 (call core.svec %₈ %₉ %₁₀) 12 --- method core.nothing %₁₁ - slots: [slot₁/#self#(!read) slot₂/x(!read) slot₃/kws slot₄/#self#(!read)] - 1 slot₃/kws + slots: [slot₁/#self#(!read) slot₂/x(!read) slot₃/non_x_kws(!read) slot₄/#self#(!read)] + 1 TestMod.all_kws 2 (return %₁) 13 (call core.typeof core.kwcall) 14 TestMod.f_kw_slurp @@ -1116,22 +1119,24 @@ end 18 SourceLocation::1:10 19 (call core.svec %₁₆ %₁₇ %₁₈) 20 --- method core.nothing %₁₉ - slots: [slot₁/#self#(!read) slot₂/kws slot₃/#self# slot₄/kwtmp] - 1 (call core.isdefined slot₂/kws :x) - 2 (gotoifnot %₁ label₆) - 3 (call core.getfield slot₂/kws :x) - 4 (= slot₄/kwtmp %₃) - 5 (goto label₈) - 6 TestMod.x_default - 7 (= slot₄/kwtmp %₆) - 8 slot₄/kwtmp - 9 (call core.tuple :x) - 10 (call core.apply_type core.NamedTuple %₉) - 11 (call top.structdiff slot₂/kws %₁₀) - 12 (call top.pairs %₁₁) - 13 TestMod.#f_kw_slurp#0 - 14 (call %₁₃ %₈ %₁₂ slot₃/#self#) - 15 (return %₁₄) + slots: [slot₁/#self#(!read) slot₂/kws slot₃/#self# slot₄/kwtmp slot₅/non_x_kws(!read) slot₆/x(!read)] + 1 (newvar slot₅/non_x_kws) + 2 (newvar slot₆/x) + 3 (call core.isdefined slot₂/kws :x) + 4 (gotoifnot %₃ label₈) + 5 (call core.getfield slot₂/kws :x) + 6 (= slot₄/kwtmp %₅) + 7 (goto label₁₀) + 8 TestMod.x_default + 9 (= slot₄/kwtmp %₈) + 10 slot₄/kwtmp + 11 (call core.tuple :x) + 12 (call core.apply_type core.NamedTuple %₁₁) + 13 (call top.structdiff slot₂/kws %₁₂) + 14 (call top.pairs %₁₃) + 15 TestMod.#f_kw_slurp#0 + 16 (call %₁₅ %₁₀ %₁₄ slot₃/#self#) + 17 (return %₁₆) 21 TestMod.f_kw_slurp 22 (call core.Typeof %₂₁) 23 (call core.svec %₂₂) From 32aec86a11bd3b4cab8d2a93ac465a75d9454029 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Thu, 30 Jan 2025 15:16:29 +1000 Subject: [PATCH 0974/1109] Fix up test forgotten from previous commit --- JuliaLowering/test/closures_ir.jl | 65 ++++++++++++++++--------------- 1 file changed, 33 insertions(+), 32 deletions(-) diff --git a/JuliaLowering/test/closures_ir.jl b/JuliaLowering/test/closures_ir.jl index 85f1043a54c85..c187a7ba0a9d2 100644 --- a/JuliaLowering/test/closures_ir.jl +++ b/JuliaLowering/test/closures_ir.jl @@ -648,38 +648,39 @@ end 39 SourceLocation::2:14 40 (call core.svec %₃₇ %₃₈ %₃₉) 41 --- code_info - slots: [slot₁/#self#(!read) slot₂/kws slot₃/#self# slot₄/kwtmp slot₅/#f_kw_closure#0(!read)] - 1 (call core.isdefined slot₂/kws :x) - 2 (gotoifnot %₁ label₁₃) - 3 (call core.getfield slot₂/kws :x) - 4 TestMod.X - 5 (call core.isa %₃ %₄) - 6 (gotoifnot %₅ label₈) - 7 (goto label₁₁) - 8 TestMod.X - 9 (new core.TypeError :keyword argument :x %₈ %₃) - 10 (call core.throw %₉) - 11 (= slot₄/kwtmp %₃) - 12 (goto label₁₅) - 13 TestMod.x_default - 14 (= slot₄/kwtmp %₁₃) - 15 slot₄/kwtmp - 16 (call top.keys slot₂/kws) - 17 (call core.tuple :x) - 18 (call top.diff_names %₁₆ %₁₇) - 19 (call top.isempty %₁₈) - 20 (gotoifnot %₁₉ label₂₂) - 21 (goto label₂₃) - 22 (call top.kwerr slot₂/kws slot₃/#self#) - 23 (captured_local 1) - 24 (call core.isdefined %₂₃ :contents) - 25 (gotoifnot %₂₄ label₂₇) - 26 (goto label₂₉) - 27 (newvar slot₅/#f_kw_closure#0) - 28 slot₅/#f_kw_closure#0 - 29 (call core.getfield %₂₃ :contents) - 30 (call %₂₉ %₁₅ slot₃/#self#) - 31 (return %₃₀) + slots: [slot₁/#self#(!read) slot₂/kws slot₃/#self# slot₄/kwtmp slot₅/x(!read) slot₆/#f_kw_closure#0(!read)] + 1 (newvar slot₅/x) + 2 (call core.isdefined slot₂/kws :x) + 3 (gotoifnot %₂ label₁₄) + 4 (call core.getfield slot₂/kws :x) + 5 TestMod.X + 6 (call core.isa %₄ %₅) + 7 (gotoifnot %₆ label₉) + 8 (goto label₁₂) + 9 TestMod.X + 10 (new core.TypeError :keyword argument :x %₉ %₄) + 11 (call core.throw %₁₀) + 12 (= slot₄/kwtmp %₄) + 13 (goto label₁₆) + 14 TestMod.x_default + 15 (= slot₄/kwtmp %₁₄) + 16 slot₄/kwtmp + 17 (call top.keys slot₂/kws) + 18 (call core.tuple :x) + 19 (call top.diff_names %₁₇ %₁₈) + 20 (call top.isempty %₁₉) + 21 (gotoifnot %₂₀ label₂₃) + 22 (goto label₂₄) + 23 (call top.kwerr slot₂/kws slot₃/#self#) + 24 (captured_local 1) + 25 (call core.isdefined %₂₄ :contents) + 26 (gotoifnot %₂₅ label₂₈) + 27 (goto label₃₀) + 28 (newvar slot₆/#f_kw_closure#0) + 29 slot₆/#f_kw_closure#0 + 30 (call core.getfield %₂₄ :contents) + 31 (call %₃₀ %₁₆ slot₃/#self#) + 32 (return %₃₁) 42 slot₂/#f_kw_closure#0 43 (call core.svec %₄₂) 44 (call JuliaLowering.replace_captured_locals! %₄₁ %₄₃) From e1679b01b843310fa64521d9ea1d7e055aad9122 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Thu, 30 Jan 2025 15:30:42 +1000 Subject: [PATCH 0975/1109] Desugaring of keyword arg types which depend on `where` typevars This change completes the implementation of functions with keyword arguments. --- JuliaLowering/src/desugaring.jl | 112 +++++++++++++++------ JuliaLowering/src/syntax_graph.jl | 8 ++ JuliaLowering/src/utils.jl | 2 +- JuliaLowering/test/closures_ir.jl | 4 +- JuliaLowering/test/functions.jl | 23 +++++ JuliaLowering/test/functions_ir.jl | 152 +++++++++++++++++++++++++++++ JuliaLowering/test/scopes_ir.jl | 26 ++--- 7 files changed, 283 insertions(+), 44 deletions(-) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index d5a97ecefe474..a02907be60a0d 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -2125,18 +2125,23 @@ end # Expand `where` clause(s) of a function into (typevar_names, typevar_stmts) where # - `typevar_names` are the names of the type's type parameters # - `typevar_stmts` are a list of statements to define a `TypeVar` for each parameter -# name in `typevar_names`, to be emitted prior to uses of `typevar_names`. -# There is exactly one statement from each typevar. -function _split_wheres!(ctx, typevar_names, typevar_stmts, ex) +# name in `typevar_names`, with exactly one per `typevar_name`. Some of these +# may already have been emitted. +# - `new_typevar_stmts` is the list of statements which needs to to be emitted +# prior to uses of `typevar_names`. +function _split_wheres!(ctx, typevar_names, typevar_stmts, new_typevar_stmts, ex) if kind(ex) == K"where" && numchildren(ex) == 2 vars_kind = kind(ex[2]) if vars_kind == K"_typevars" - append!(typevar_names, children(ex[2])) + append!(typevar_names, children(ex[2][1])) + append!(typevar_stmts, children(ex[2][2])) else params = vars_kind == K"braces" ? ex[2][1:end] : ex[2:2] - expand_typevars!(ctx, typevar_names, typevar_stmts, params) + n_existing = length(new_typevar_stmts) + expand_typevars!(ctx, typevar_names, new_typevar_stmts, params) + append!(typevar_stmts, view(new_typevar_stmts, n_existing+1:length(new_typevar_stmts))) end - _split_wheres!(ctx, typevar_names, typevar_stmts, ex[1]) + _split_wheres!(ctx, typevar_names, typevar_stmts, new_typevar_stmts, ex[1]) else ex end @@ -2172,11 +2177,19 @@ function method_def_expr(ctx, srcref, callex_srcref, method_table, ] end -function trim_used_typevars(ctx, arg_types, typevar_names, typevar_stmts) +# Select static parameters which are used in function arguments `arg_types`, or +# transitively used. +# +# The transitive usage check probably doesn't guarentee that the types are +# inferrable during dispatch as they may only be part of the bounds of another +# type. Thus we might get false positives here but we shouldn't get false +# negatives. +function select_used_typevars(arg_types, typevar_names, typevar_stmts) n_typevars = length(typevar_names) @assert n_typevars == length(typevar_stmts) # Filter typevar names down to those which are directly used in the arg list - typevar_used = [contains_identifier(tn, arg_types) for tn in typevar_names] + typevar_used = Bool[any(contains_identifier(argtype, tn) for argtype in arg_types) + for tn in typevar_names] # _Or_ used transitively via other typevars. The following code # computes this by incrementally coloring the graph of dependencies # between type vars. @@ -2196,6 +2209,24 @@ function trim_used_typevars(ctx, arg_types, typevar_names, typevar_stmts) end end end + typevar_used +end + +function check_all_typevars_used(arg_types, typevar_names, typevar_stmts) + selected = select_used_typevars(arg_types, typevar_names, typevar_stmts) + unused_typevar = findfirst(s->!s, selected) + if !isnothing(unused_typevar) + # Type variables which may be statically determined to be unused in + # any function argument and therefore can't be inferred during + # dispatch. + throw(LoweringError(typevar_names[unused_typevar], + "Method definition declares type variable but does not use it in the type of any function parameter")) + end +end + +# Return `typevar_names` which are used directly or indirectly in `arg_types`. +function trim_used_typevars(ctx, arg_types, typevar_names, typevar_stmts) + typevar_used = select_used_typevars(arg_types, typevar_names, typevar_stmts) trimmed_typevar_names = SyntaxList(ctx) for (used,tn) in zip(typevar_used, typevar_names) if used @@ -2291,6 +2322,9 @@ function keyword_function_defs(ctx, srcref, callex_srcref, name_str, push!(body_arg_names, new_local_binding(ctx, body_func_name, "#self#"; kind=:argument)) push!(body_arg_types, @ast ctx body_func_name [K"function_type" body_func_name]) + non_positional_typevars = typevar_names[map(!, + select_used_typevars(arg_types, typevar_names, typevar_stmts))] + kw_values = SyntaxList(ctx) kw_defaults = SyntaxList(ctx) kw_names = SyntaxList(ctx) @@ -2333,11 +2367,19 @@ function keyword_function_defs(ctx, srcref, callex_srcref, name_str, [K"call" "isdefined"::K"core" kws_arg name_sym] [K"block" kwval := [K"call" "getfield"::K"core" kws_arg name_sym] - # TODO: if the "declared" type of a KW arg includes something - # from keyword-sparams then don't assert it here, since those - # static parameters don't have values yet. instead, the type - # will be picked up when the underlying method is called. - if !is_core_Any(atype) + if is_core_Any(atype) || contains_identifier(atype, non_positional_typevars) + # <- Do nothing in this branch because `atype` includes + # something from the typevars and those static + # parameters don't have values yet. Instead, the type + # will be picked up when the body method is called and + # result in a MethodError during dispatch rather than + # the `TypeError` below. + # + # In principle we could probably construct the + # appropriate UnionAll here in some simple cases but + # the fully general case probably requires simulating + # the runtime's dispatch machinery. + else [K"if" [K"call" "isa"::K"core" kwval atype] "nothing"::K"core" [K"call" @@ -2468,14 +2510,19 @@ function keyword_function_defs(ctx, srcref, callex_srcref, name_str, else scope_nest(ctx, kw_names, kw_values, kwcall_body_tail) end + main_kwcall_typevars = trim_used_typevars(ctx, kwcall_arg_types, typevar_names, typevar_stmts) push!(kwcall_method_defs, method_def_expr(ctx, srcref, callex_srcref, kwcall_mtable, - typevar_names, kwcall_arg_names, kwcall_arg_types, kwcall_body)) + main_kwcall_typevars, kwcall_arg_names, kwcall_arg_types, kwcall_body)) + + # Check kws of body method + check_all_typevars_used(body_arg_types, typevar_names, typevar_stmts) kw_func_method_defs = @ast ctx srcref [K"block" [K"method_defs" body_func_name [K"block" + # TODO: nkw method_def_expr(ctx, srcref, callex_srcref, "nothing"::K"core", typevar_names, body_arg_names, body_arg_types, body, ret_var) ] @@ -2537,6 +2584,7 @@ function expand_function_def(ctx, ex, docs, rewrite_call=identity, rewrite_body= typevar_names = SyntaxList(ctx) typevar_stmts = SyntaxList(ctx) + new_typevar_stmts = SyntaxList(ctx) if kind(name) == K"where" # `where` vars end up in two places # 1. Argument types - the `T` in `x::T` becomes a `TypeVar` parameter in @@ -2545,7 +2593,7 @@ function expand_function_def(ctx, ex, docs, rewrite_call=identity, rewrite_body= # 2. In the method body - either explicitly or implicitly via the method # return type or default arguments - where `T` turns up as the *name* of # a special slot of kind ":static_parameter" - name = _split_wheres!(ctx, typevar_names, typevar_stmts, name) + name = _split_wheres!(ctx, typevar_names, typevar_stmts, new_typevar_stmts, name) end return_type = nothing @@ -2704,12 +2752,20 @@ function expand_function_def(ctx, ex, docs, rewrite_call=identity, rewrite_body= if isnothing(keywords) body_func_name, kw_func_method_defs = (nothing, nothing) + # NB: This check seems good as it statically catches any useless + # typevars which can't be inferred. However it wasn't previously an + # error so we might need to reduce it to a warning? + check_all_typevars_used(arg_types, typevar_names, typevar_stmts) + main_typevar_names = typevar_names else # Rewrite `body` here so that the positional-only versions dispatch there. body_func_name, kw_func_method_defs, body = keyword_function_defs(ctx, ex, callex, name_str, typevar_names, typevar_stmts, arg_names, arg_types, has_slurp, first_default, arg_defaults, keywords, body, ret_var) + # The main function (but without keywords) needs its typevars trimmed, + # as some of them may be for the keywords only. + main_typevar_names = trim_used_typevars(ctx, arg_types, typevar_names, typevar_stmts) # ret_var is used only in the body method ret_var = nothing end @@ -2728,7 +2784,7 @@ function expand_function_def(ctx, ex, docs, rewrite_call=identity, rewrite_body= # The method with all non-default arguments push!(method_stmts, - method_def_expr(ctx, ex, callex, method_table, typevar_names, arg_names, + method_def_expr(ctx, ex, callex, method_table, main_typevar_names, arg_names, arg_types, body, ret_var)) if !isnothing(docs) method_stmts[end] = @ast ctx docs [K"block" @@ -2751,7 +2807,7 @@ function expand_function_def(ctx, ex, docs, rewrite_call=identity, rewrite_body= end [K"scope_block"(scope_type=:hard) [K"block" - typevar_stmts... + new_typevar_stmts... kw_func_method_defs [K"method_defs" isnothing(bare_func_name) ? "nothing"::K"core" : bare_func_name @@ -3151,7 +3207,7 @@ function _new_call_convert_arg(ctx, full_struct_type, field_type, field_index, v end function default_inner_constructors(ctx, srcref, global_struct_name, - typevar_names, field_names, field_types) + typevar_names, typevar_stmts, field_names, field_types) # TODO: Consider using srcref = @HERE ? exact_ctor = if isempty(typevar_names) # Definition with exact types for all arguments @@ -3188,7 +3244,7 @@ function default_inner_constructors(ctx, srcref, global_struct_name, typevar_names... ] ] - [K"_typevars" typevar_names...] + [K"_typevars" [K"block" typevar_names...] [K"block" typevar_stmts...]] ] end ] @@ -3236,7 +3292,7 @@ end # end # function default_outer_constructor(ctx, srcref, global_struct_name, - typevar_names, field_names, field_types) + typevar_names, typevar_stmts, field_names, field_types) @ast ctx srcref [K"function" [K"where" [K"call" @@ -3248,7 +3304,7 @@ function default_outer_constructor(ctx, srcref, global_struct_name, [K"::" [K"curly" "Type"::K"core" global_struct_name]] [[K"::" n t] for (n,t) in zip(field_names, field_types)]... ] - [K"_typevars" typevar_names...] + [K"_typevars" [K"block" typevar_names...] [K"block" typevar_stmts...]] ] [K"new" [K"curly" global_struct_name typevar_names...] field_names...] ] @@ -3617,7 +3673,7 @@ function expand_struct_def(ctx, ex, docs) # Default constructors if isempty(inner_defs) default_inner_constructors(ctx, ex, global_struct_name, - typevar_names, field_names_2, field_types) + typevar_names, typevar_stmts, field_names_2, field_types) else map!(inner_defs, inner_defs) do def rewrite_new_calls(ctx, def, struct_name, global_struct_name, @@ -3627,7 +3683,7 @@ function expand_struct_def(ctx, ex, docs) end if need_outer_constructor default_outer_constructor(ctx, ex, global_struct_name, - typevar_names, field_names_2, field_types) + typevar_names, typevar_stmts, field_names_2, field_types) end ] ] @@ -3672,7 +3728,7 @@ function expand_wheres(ctx, ex) elseif kind(rhs) == K"_typevars" # Eg, `S{X,Y} where {X, Y}` but with X and Y # already allocated `TypeVar`s - for r in reverse(children(rhs)) + for r in reverse(children(rhs[1])) body = @ast ctx ex [K"call" "UnionAll"::K"core" r body] end else @@ -3688,7 +3744,7 @@ function expand_curly(ctx, ex) check_no_parameters(ex, "unexpected semicolon in type parameter list") check_no_assignment(children(ex), "misplace assignment in type parameter list") - stmts = SyntaxList(ctx) + typevar_stmts = SyntaxList(ctx) type_args = SyntaxList(ctx) implicit_typevars = SyntaxList(ctx) @@ -3702,7 +3758,7 @@ function expand_curly(ctx, ex) typevar = k == K"<:" ? bounds_to_TypeVar(ctx, e, (name, nothing, e[1])) : bounds_to_TypeVar(ctx, e, (name, e[1], nothing)) - arg = emit_assign_tmp(stmts, ctx, typevar) + arg = emit_assign_tmp(typevar_stmts, ctx, typevar) push!(implicit_typevars, arg) else arg = e @@ -3713,8 +3769,8 @@ function expand_curly(ctx, ex) type = @ast ctx ex [K"call" "apply_type"::K"core" type_args...] if !isempty(implicit_typevars) type = @ast ctx ex [K"block" - stmts... - [K"where" type [K"_typevars" implicit_typevars...]] + typevar_stmts... + [K"where" type [K"_typevars" [K"block" implicit_typevars...] [K"block" typevar_stmts...]]] ] end diff --git a/JuliaLowering/src/syntax_graph.jl b/JuliaLowering/src/syntax_graph.jl index 7568310a15257..bada8ca1b8e53 100644 --- a/JuliaLowering/src/syntax_graph.jl +++ b/JuliaLowering/src/syntax_graph.jl @@ -675,6 +675,14 @@ function Base.pushfirst!(v::SyntaxList, ex::SyntaxTree) pushfirst!(v.ids, ex._id) end +function Base.similar(v::SyntaxList, size::Tuple=Base.size(v.ids)) + SyntaxList(v.graph, zeros(NodeId, size)) +end + +function Base.isassigned(v::SyntaxList, i::Integer) + v.ids[i] > 0 +end + function Base.append!(v::SyntaxList, exs) for e in exs push!(v, e) diff --git a/JuliaLowering/src/utils.jl b/JuliaLowering/src/utils.jl index b027131cb5fdd..d26c6d41f4a59 100644 --- a/JuliaLowering/src/utils.jl +++ b/JuliaLowering/src/utils.jl @@ -39,7 +39,7 @@ function _show_provtree(io::IO, prov, indent) printstyled(io, "@ $fn:$line\n", color=:light_black) end -function showprov(io::IO, exs::Vector) +function showprov(io::IO, exs::AbstractVector) for (i,ex) in enumerate(Iterators.reverse(exs)) sr = sourceref(ex) if i > 1 diff --git a/JuliaLowering/test/closures_ir.jl b/JuliaLowering/test/closures_ir.jl index c187a7ba0a9d2..0f1d1ebb74538 100644 --- a/JuliaLowering/test/closures_ir.jl +++ b/JuliaLowering/test/closures_ir.jl @@ -438,13 +438,13 @@ end ######################################## # Error: Static parameter clashing with closure name -function f() where {g} +function f(::g) where {g} function g() end end #--------------------- LoweringError: -function f() where {g} +function f(::g) where {g} function g() # ╙ ── local variable name `g` conflicts with a static parameter end diff --git a/JuliaLowering/test/functions.jl b/JuliaLowering/test/functions.jl index 9836b4cde8193..e0a7d3ef0771e 100644 --- a/JuliaLowering/test/functions.jl +++ b/JuliaLowering/test/functions.jl @@ -315,6 +315,29 @@ end @test values(test_mod.f_kw_default_dependencies(x = 10)) === (10, 10, :outer, :aaa_kw, :aaa_kw) @test values(test_mod.f_kw_default_dependencies(x = 10, aaa=:blah)) === (10, 10, :outer, :blah, :blah) + # Keywords with static parameters + JuliaLowering.include_string(test_mod, """ + function f_kw_sparams(x::X, y::Y; a::A, b::B) where {X,Y,A,B} + (X,Y,A,B) + end + """) + @test values(test_mod.f_kw_sparams(1, 1.0; a="a", b='b')) === (Int, Float64, String, Char) + + # Keywords with static parameters, where some keyword types can be inferred + # based on the positional parameters and others cannot. + JuliaLowering.include_string(test_mod, """ + function f_kw_type_errors(x::X; a::F, b::X) where {X<:Integer,F<:AbstractFloat} + (X,F) + end + """) + @test values(test_mod.f_kw_type_errors(1; a=1.0, b=10)) === (Int, Float64) + # The following is a keyword TypeError because we can infer `X` based on + # the positional parameters and use that to check the type of `b`. + @test_throws TypeError values(test_mod.f_kw_type_errors(1; a=1.0, b="str")) + # The following is only a method error as we can't infer `F` prior to + # dispatching to the body function. + @test_throws MethodError values(test_mod.f_kw_type_errors(1; a="str", b=10)) + # Throwing of UndefKeywordError JuliaLowering.include_string(test_mod, """ function f_kw_no_default(; x) diff --git a/JuliaLowering/test/functions_ir.jl b/JuliaLowering/test/functions_ir.jl index 1031fcb6b299b..0fd9f837d226c 100644 --- a/JuliaLowering/test/functions_ir.jl +++ b/JuliaLowering/test/functions_ir.jl @@ -184,6 +184,49 @@ end 16 TestMod.f 17 (return %₁₆) +######################################## +# Static parameter which is used only in the bounds of another static parameter +# See https://github.com/JuliaLang/julia/issues/49275 +function f(x, y::S) where {T, S<:AbstractVector{T}} + (T,S) +end +#--------------------- +1 (method TestMod.f) +2 (= slot₂/T (call core.TypeVar :T)) +3 TestMod.AbstractVector +4 slot₂/T +5 (call core.apply_type %₃ %₄) +6 (= slot₁/S (call core.TypeVar :S %₅)) +7 TestMod.f +8 (call core.Typeof %₇) +9 slot₁/S +10 (call core.svec %₈ core.Any %₉) +11 slot₂/T +12 slot₁/S +13 (call core.svec %₁₁ %₁₂) +14 SourceLocation::1:10 +15 (call core.svec %₁₀ %₁₃ %₁₄) +16 --- method core.nothing %₁₅ + slots: [slot₁/#self#(!read) slot₂/x(!read) slot₃/y(!read)] + 1 static_parameter₁ + 2 static_parameter₂ + 3 (call core.tuple %₁ %₂) + 4 (return %₃) +17 TestMod.f +18 (return %₁₇) + +######################################## +# Error: Static parameter which is unused +function f(::T) where {T,S} + (T,S) +end +#--------------------- +LoweringError: +function f(::T) where {T,S} +# ╙ ── Method definition declares type variable but does not use it in the type of any function parameter + (T,S) +end + ######################################## # Return types function f(x)::Int @@ -1154,6 +1197,115 @@ end 28 TestMod.f_kw_slurp 29 (return %₂₈) +######################################## +# Static parameters used in keywords, with and without the static parameter +# being present in positional argument types. +# +# Here the wrong type for `b` will get a `TypeError` but `A` will need to rely +# on a MethodError. +function f_kw_sparams(x::X; a::A=a_def, b::X=b_def) where {X,A} + (X,A) +end +#--------------------- +1 (method TestMod.#f_kw_sparams#0) +2 (method TestMod.f_kw_sparams) +3 (= slot₂/X (call core.TypeVar :X)) +4 (= slot₁/A (call core.TypeVar :A)) +5 TestMod.#f_kw_sparams#0 +6 (call core.Typeof %₅) +7 slot₁/A +8 slot₂/X +9 TestMod.f_kw_sparams +10 (call core.Typeof %₉) +11 slot₂/X +12 (call core.svec %₆ %₇ %₈ %₁₀ %₁₁) +13 slot₂/X +14 slot₁/A +15 (call core.svec %₁₃ %₁₄) +16 SourceLocation::1:10 +17 (call core.svec %₁₂ %₁₅ %₁₆) +18 --- method core.nothing %₁₇ + slots: [slot₁/#self#(!read) slot₂/a(!read) slot₃/b(!read) slot₄/#self#(!read) slot₅/x(!read)] + 1 static_parameter₁ + 2 static_parameter₂ + 3 (call core.tuple %₁ %₂) + 4 (return %₃) +19 (call core.typeof core.kwcall) +20 TestMod.f_kw_sparams +21 (call core.Typeof %₂₀) +22 slot₂/X +23 (call core.svec %₁₉ core.NamedTuple %₂₁ %₂₂) +24 slot₂/X +25 (call core.svec %₂₄) +26 SourceLocation::1:10 +27 (call core.svec %₂₃ %₂₅ %₂₆) +28 --- method core.nothing %₂₇ + slots: [slot₁/#self#(!read) slot₂/kws slot₃/#self# slot₄/x slot₅/kwtmp slot₆/a(!read) slot₇/b(!read)] + 1 (newvar slot₆/a) + 2 (newvar slot₇/b) + 3 (call core.isdefined slot₂/kws :a) + 4 (gotoifnot %₃ label₈) + 5 (call core.getfield slot₂/kws :a) + 6 (= slot₅/kwtmp %₅) + 7 (goto label₁₀) + 8 TestMod.a_def + 9 (= slot₅/kwtmp %₈) + 10 slot₅/kwtmp + 11 (call core.isdefined slot₂/kws :b) + 12 (gotoifnot %₁₁ label₂₃) + 13 (call core.getfield slot₂/kws :b) + 14 static_parameter₁ + 15 (call core.isa %₁₃ %₁₄) + 16 (gotoifnot %₁₅ label₁₈) + 17 (goto label₂₁) + 18 static_parameter₁ + 19 (new core.TypeError :keyword argument :b %₁₈ %₁₃) + 20 (call core.throw %₁₉) + 21 (= slot₅/kwtmp %₁₃) + 22 (goto label₂₅) + 23 TestMod.b_def + 24 (= slot₅/kwtmp %₂₃) + 25 slot₅/kwtmp + 26 (call top.keys slot₂/kws) + 27 (call core.tuple :a :b) + 28 (call top.diff_names %₂₆ %₂₇) + 29 (call top.isempty %₂₈) + 30 (gotoifnot %₂₉ label₃₂) + 31 (goto label₃₃) + 32 (call top.kwerr slot₂/kws slot₃/#self# slot₄/x) + 33 TestMod.#f_kw_sparams#0 + 34 (call %₃₃ %₁₀ %₂₅ slot₃/#self# slot₄/x) + 35 (return %₃₄) +29 TestMod.f_kw_sparams +30 (call core.Typeof %₂₉) +31 slot₂/X +32 (call core.svec %₃₀ %₃₁) +33 slot₂/X +34 (call core.svec %₃₃) +35 SourceLocation::1:10 +36 (call core.svec %₃₂ %₃₄ %₃₅) +37 --- method core.nothing %₃₆ + slots: [slot₁/#self# slot₂/x] + 1 TestMod.#f_kw_sparams#0 + 2 TestMod.a_def + 3 TestMod.b_def + 4 (call %₁ %₂ %₃ slot₁/#self# slot₂/x) + 5 (return %₄) +38 TestMod.f_kw_sparams +39 (return %₃₈) + +######################################## +# Error: Static parameter which is unused in keyword body arg types +function f_kw_sparams(x::X; a::A) where {X,Y,A} + (X,A) +end +#--------------------- +LoweringError: +function f_kw_sparams(x::X; a::A) where {X,Y,A} +# ╙ ── Method definition declares type variable but does not use it in the type of any function parameter + (X,A) +end + ######################################## # Error: argument unpacking in keywords function f_kw_destruct(; (x,y)=10) diff --git a/JuliaLowering/test/scopes_ir.jl b/JuliaLowering/test/scopes_ir.jl index ed745f86081ea..5a8a5cec9e3f3 100644 --- a/JuliaLowering/test/scopes_ir.jl +++ b/JuliaLowering/test/scopes_ir.jl @@ -113,12 +113,12 @@ end ######################################## # Error: Static parameter name not unique -function f() where T where T +function f(::T) where T where T end #--------------------- LoweringError: -function f() where T where T -# ╙ ── function static parameter name not unique +function f(::T) where T where T +# ╙ ── function static parameter name not unique end ######################################## @@ -194,38 +194,38 @@ end ######################################## # Error: Conflicting static parameter and local -function f() where T +function f(::T) where T local T end #--------------------- LoweringError: -function f() where T +function f(::T) where T local T # └─────┘ ── local variable name `T` conflicts with a static parameter end ######################################## # Error: Conflicting static parameter and global -function f() where T +function f(::T) where T global T end #--------------------- LoweringError: -function f() where T +function f(::T) where T global T # └──────┘ ── global variable name `T` conflicts with a static parameter end ######################################## # Error: Conflicting static parameter and local in nested scope -function f() where T +function f(::T) where T let local T end end #--------------------- LoweringError: -function f() where T +function f(::T) where T let local T # └─────┘ ── local variable name `T` conflicts with a static parameter @@ -234,14 +234,14 @@ end ######################################## # Error: Conflicting static parameter and global in nested scope -function f() where T +function f(::T) where T let global T end end #--------------------- LoweringError: -function f() where T +function f(::T) where T let global T # └──────┘ ── global variable name `T` conflicts with a static parameter @@ -250,14 +250,14 @@ end ######################################## # Error: Conflicting static parameter and implicit local -function f() where T +function f(::T) where T let T = rhs end end #--------------------- LoweringError: -function f() where T +function f(::T) where T let T = rhs # ╙ ── local variable name `T` conflicts with a static parameter From e5dd569e2517790814c0076074f4a81c17e70eaf Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Thu, 30 Jan 2025 22:26:48 +1000 Subject: [PATCH 0976/1109] Desugaring of remaining `let` syntax forms `let` was done very early in prototyping so only understood a very basic left hand side. Fix this to now support the remaining fancy syntax of type declarations, tuple unpacking and function definitions. There's a couple of variations in the lowering here vs the flisp lowering which I think are bug fixes: let f() = y local y # <- f does not capture this `y` end let x::T = rhs local T # x is of type `T` from outside the `let`, not this one. end Also some cleanup * Remove local_def in favour of using `local` and a separate `always_defined` intermediate form * Some cleanup in linearization. --- JuliaLowering/src/ast.jl | 35 ++++++++++ JuliaLowering/src/desugaring.jl | 82 +++++++++++++++++----- JuliaLowering/src/kinds.jl | 11 ++- JuliaLowering/src/linear_ir.jl | 28 ++++---- JuliaLowering/src/scope_analysis.jl | 4 +- JuliaLowering/test/branching.jl | 12 ---- JuliaLowering/test/scopes.jl | 38 +++++++++- JuliaLowering/test/scopes_ir.jl | 105 ++++++++++++++++++++++++++++ 8 files changed, 268 insertions(+), 47 deletions(-) diff --git a/JuliaLowering/src/ast.jl b/JuliaLowering/src/ast.jl index 5e9544bd7d3a0..9de4164135fff 100644 --- a/JuliaLowering/src/ast.jl +++ b/JuliaLowering/src/ast.jl @@ -594,10 +594,45 @@ function is_simple_atom(ctx, ex) is_literal(k) || k == K"Symbol" || k == K"Value" || is_ssa(ctx, ex) || is_core_nothing(ex) end +function is_identifier_like(ex) + k = kind(ex) + k == K"Identifier" || k == K"BindingId" || k == K"Placeholder" +end + function decl_var(ex) kind(ex) == K"::" ? ex[1] : ex end +# Given the signature of a `function`, return the symbol that will ultimately +# be assigned to in local/global scope, if any. +function assigned_function_name(ex) + while kind(ex) == K"where" + # f() where T + ex = ex[1] + end + if kind(ex) == K"::" && numchildren(ex) == 2 + # f()::T + ex = ex[1] + end + if kind(ex) != K"call" + throw(LoweringError(ex, "Expected call syntax in function signature")) + end + ex = ex[1] + if kind(ex) == K"curly" + # f{T}() + ex = ex[1] + end + if kind(ex) == K"::" || kind(ex) == K"." + # (obj::CallableType)(args) + # A.b.c(args) + nothing + elseif is_identifier_like(ex) + ex + else + throw(LoweringError(ex, "Unexpected name in function signature")) + end +end + # Remove empty parameters block, eg, in the arg list of `f(x, y;)` function remove_empty_parameters(args) i = length(args) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index a02907be60a0d..7948374dcf762 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -20,11 +20,6 @@ end #------------------------------------------------------------------------------- -function is_identifier_like(ex) - k = kind(ex) - k == K"Identifier" || k == K"BindingId" || k == K"Placeholder" -end - # Return true when `x` and `y` are "the same identifier", but also works with # bindings (and hence ssa vars). See also `is_identifier_like()` function is_same_identifier_like(ex::SyntaxTree, y::SyntaxTree) @@ -1394,22 +1389,71 @@ function expand_let(ctx, ex) elseif kb == K"=" && numchildren(binding) == 2 lhs = binding[1] rhs = binding[2] - if is_sym_decl(lhs) + kl = kind(lhs) + if kl == K"Identifier" || kl == K"BindingId" blk = @ast ctx binding [K"block" tmp := rhs [K"scope_block"(ex, scope_type=scope_type) - # TODO: Use single child for scope_block? - [K"local_def"(lhs) lhs] # TODO: Use K"local" with attr? - [K"="(rhs) - decl_var(lhs) - tmp - ] + [K"local"(lhs) lhs] + [K"always_defined" lhs] + [K"="(binding) lhs tmp] + blk + ] + ] + elseif kl == K"::" + var = lhs[1] + if !(kind(var) in KSet"Identifier BindingId") + throw(LoweringError(var, "Invalid assignment location in let syntax")) + end + blk = @ast ctx binding [K"block" + tmp := rhs + type := lhs[2] + [K"scope_block"(ex, scope_type=scope_type) + [K"local"(lhs) [K"::" var type]] + [K"always_defined" var] + [K"="(binding) var tmp] + blk + ] + ] + elseif kind(lhs) == K"tuple" + lhs_locals = SyntaxList(ctx) + foreach_lhs_var(lhs) do var + push!(lhs_locals, @ast ctx var [K"local" var]) + push!(lhs_locals, @ast ctx var [K"always_defined" var]) + end + blk = @ast ctx binding [K"block" + tmp := rhs + [K"scope_block"(ex, scope_type=scope_type) + lhs_locals... + [K"="(binding) lhs tmp] blk ] ] else - TODO("Functions and multiple assignment") + throw(LoweringError(lhs, "Invalid assignment location in let syntax")) end + elseif kind(binding) == K"function" + sig = binding[1] + func_name = assigned_function_name(sig) + if isnothing(func_name) + # Some valid function syntaxes define methods on existing types and + # don't really make sense with let: + # let A.f() = 1 ... end + # let (obj::Callable)() = 1 ... end + throw(LoweringError(sig, "Function signature does not define a local function name")) + end + blk = @ast ctx binding [K"block" + [K"scope_block"(ex, scope_type=scope_type) + [K"local"(func_name) func_name] + [K"always_defined" func_name] + binding + [K"scope_block"(ex, scope_type=scope_type) + # The inside of the block is isolated from the closure, + # which itself can only capture values from the outside. + blk + ] + ] + ] else throw(LoweringError(binding, "Invalid binding in let")) continue @@ -1801,10 +1845,14 @@ end function foreach_lhs_var(f::Function, ex) k = kind(ex) - if k == K"Identifier" + if k == K"Identifier" || k == K"BindingId" f(ex) elseif k == K"Placeholder" # Ignored + elseif k == K"tuple" + for e in children(ex) + foreach_lhs_var(f, e) + end else TODO(ex, "LHS vars") end @@ -3091,7 +3139,8 @@ function expand_abstract_or_primitive_type(ctx, ex) @ast ctx ex [K"block" [K"scope_block"(scope_type=:hard) [K"block" - [K"local_def" name] + [K"local" name] + [K"always_defined" name] typevar_stmts... [K"=" newtype_var @@ -3624,7 +3673,8 @@ function expand_struct_def(ctx, ex, docs) [K"block" [K"global" global_struct_name] [K"const" global_struct_name] - [K"local_def" struct_name] + [K"local" struct_name] + [K"always_defined" struct_name] typevar_stmts... [K"=" newtype_var diff --git a/JuliaLowering/src/kinds.jl b/JuliaLowering/src/kinds.jl index 3b688f523752f..93a2a2fb0145e 100644 --- a/JuliaLowering/src/kinds.jl +++ b/JuliaLowering/src/kinds.jl @@ -21,6 +21,7 @@ function _register_kinds() "meta" # TODO: Use `meta` for inbounds and loopinfo etc? "inbounds" + "boundscheck" "inline" "noinline" "loopinfo" @@ -30,6 +31,11 @@ function _register_kinds() "opaque_closure" # Test whether a variable is defined "isdefined" + # [K"throw_undef_if_not" var cond] + # This form is used internally in Core.Compiler but might be + # emitted by packages such as Diffractor. In principle it needs to + # be passed through lowering in a similar way to `isdefined` + "throw_undef_if_not" # named labels for `@label` and `@goto` "symbolic_label" # Goto named label @@ -53,8 +59,11 @@ function _register_kinds() # Various heads harvested from flisp lowering. # (TODO: May or may not need all these - assess later) "break_block" + # Like block, but introduces a lexical scope; used during scope resolution. "scope_block" - "local_def" # TODO: Replace with K"local" plus BindingFlags attribute? + # [K"always_defined" x] is an assertion that variable `x` is assigned before use + # ('local-def in flisp implementation is K"local" plus K"always_defined" + "always_defined" "_while" "_do_while" "_typevars" # used for supplying already-allocated `TypeVar`s to `where` diff --git a/JuliaLowering/src/linear_ir.jl b/JuliaLowering/src/linear_ir.jl index 14c8cbe17f7c9..3b9ba96413b53 100644 --- a/JuliaLowering/src/linear_ir.jl +++ b/JuliaLowering/src/linear_ir.jl @@ -268,17 +268,18 @@ function _actually_return(ctx, ex) end function emit_return(ctx, srcref, ex) + # todo: Mark implicit returns if isnothing(ex) return elseif isempty(ctx.handler_token_stack) _actually_return(ctx, ex) return end - # FIXME: What's this !is_ssa(ctx, ex) here about? + # TODO: What's this !is_ssa(ctx, ex) here about? x = if is_simple_atom(ctx, ex) && !(is_ssa(ctx, ex) && !isempty(ctx.finally_handlers)) ex elseif !isempty(ctx.finally_handlers) - # TODO: Why does flisp lowering create a mutable variable here even + # todo: Why does flisp lowering create a mutable variable here even # though we don't mutate it? # tmp = ssavar(ctx, srcref, "returnval_via_finally") # <- can we use this? tmp = new_local_binding(ctx, srcref, "returnval_via_finally") @@ -293,8 +294,6 @@ function emit_return(ctx, srcref, ex) emit(ctx, @ast ctx srcref [K"leave" ctx.handler_token_stack...]) _actually_return(ctx, x) end - # Should we return `x` here? The flisp code does, but that doesn't seem - # useful as any returned value cannot be used? return nothing end @@ -563,12 +562,7 @@ function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) k = kind(ex) if k == K"BindingId" || is_literal(k) || k == K"quote" || k == K"inert" || k == K"top" || k == K"core" || k == K"Value" || k == K"Symbol" || - k == K"Placeholder" || k == K"SourceLocation" - # TODO: other kinds: copyast $ globalref thismodule cdecl stdcall fastcall thiscall llvmcall - if needs_value && k == K"Placeholder" - # TODO: ensure outterref, globalref work here - throw(LoweringError(ex, "all-underscore identifiers are write-only and their values cannot be used in expressions")) - end + k == K"SourceLocation" if in_tail_pos emit_return(ctx, ex) elseif needs_value @@ -579,6 +573,14 @@ function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) end nothing end + elseif k == K"Placeholder" + if needs_value + throw(LoweringError(ex, "all-underscore identifiers are write-only and their values cannot be used in expressions")) + end + nothing + elseif k == K"TOMBSTONE" + @chk !needs_value (ex,"TOMBSTONE encountered in value position") + nothing elseif k == K"call" || k == K"new" || k == K"splatnew" || k == K"foreigncall" || k == K"new_opaque_closure" # TODO k ∈ cfunction cglobal @@ -704,9 +706,6 @@ function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) else nothing end - elseif k == K"TOMBSTONE" - @chk !needs_value (ex,"TOMBSTONE encountered in value position") - nothing elseif k == K"if" || k == K"elseif" @chk numchildren(ex) <= 3 has_else = numchildren(ex) > 2 @@ -828,7 +827,8 @@ function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) end emit(ctx, ex) nothing - elseif k == K"isdefined" || k == K"captured_local" # TODO || k == K"throw_undef_if_not" (See upstream #53875) + elseif k == K"isdefined" || k == K"captured_local" || k == K"throw_undef_if_not" || + k == K"boundscheck" if in_tail_pos emit_return(ctx, ex) elseif needs_value diff --git a/JuliaLowering/src/scope_analysis.jl b/JuliaLowering/src/scope_analysis.jl index 738caa0be7f10..6c77659a2f7b6 100644 --- a/JuliaLowering/src/scope_analysis.jl +++ b/JuliaLowering/src/scope_analysis.jl @@ -36,7 +36,7 @@ function _find_scope_vars!(ctx, assignments, locals, destructured_args, globals, elseif is_leaf(ex) || is_quoted(k) || k in KSet"scope_block lambda module toplevel" return - elseif k == K"local" || k == K"local_def" + elseif k == K"local" if getmeta(ex, :is_destructured_arg, false) push!(destructured_args, ex[1]) else @@ -433,7 +433,7 @@ function _resolve_scopes(ctx, ex::SyntaxTree) end end ex_out - elseif k == K"local_def" + elseif k == K"always_defined" id = lookup_var(ctx, NameKey(ex[1])) update_binding!(ctx, id; is_always_defined=true) makeleaf(ctx, ex, K"TOMBSTONE") diff --git a/JuliaLowering/test/branching.jl b/JuliaLowering/test/branching.jl index b3ebcd3b39a5a..09996bd4c1e7c 100644 --- a/JuliaLowering/test/branching.jl +++ b/JuliaLowering/test/branching.jl @@ -9,18 +9,6 @@ Base.eval(test_mod, quote using JuliaSyntax end) -Base.eval(test_mod, quote - function var"@label"(__context__::JuliaLowering.MacroContext, ex) - @chk kind(ex) == JuliaSyntax.K"Identifier" - @ast __context__ ex ex=>JuliaSyntax.K"symbolic_label" - end - - function var"@goto"(__context__::JuliaLowering.MacroContext, ex) - @chk kind(ex) == JuliaSyntax.K"Identifier" - @ast __context__ ex ex=>JuliaSyntax.K"symbolic_goto" - end -end) - #------------------------------------------------------------------------------- @testset "Tail position" begin diff --git a/JuliaLowering/test/scopes.jl b/JuliaLowering/test/scopes.jl index 1ec037326ad0a..25228c6fde89e 100644 --- a/JuliaLowering/test/scopes.jl +++ b/JuliaLowering/test/scopes.jl @@ -17,13 +17,47 @@ end """) == (1, 2) JuliaLowering.include_string(test_mod, """ - x = 101 - y = 202 +x = 101 +y = 202 """) @test test_mod.x == 101 @test test_mod.y == 202 @test JuliaLowering.include_string(test_mod, "x + y") == 303 +@test JuliaLowering.include_string(test_mod, """ +begin + local x = 1 + local x = 2 + let (x,y) = (:x,:y) + (y,x) + end +end +""") === (:y,:x) + +# Types on left hand side of type decls refer to the outer scope +# (In the flisp implementation they refer to the inner scope, but this seems +# like a bug.) +@test JuliaLowering.include_string(test_mod, """ +let x::Int = 10.0 + local Int = Float64 + x +end +""") === 10 + +# Closures in let syntax can only capture values from the outside +# (In the flisp implementation it captures from inner scope, but this is +# inconsistent with let assignment where the rhs refers to the outer scope and +# thus seems like a bug.) +@test JuliaLowering.include_string(test_mod, """ +begin + local y = :outer_y + let f() = y + local y = :inner_y + f() + end +end +""") === :outer_y + # wrap expression in scope block of `scope_type` function wrapscope(ex, scope_type) g = JuliaLowering.ensure_attributes(ex._graph, scope_type=Symbol) diff --git a/JuliaLowering/test/scopes_ir.jl b/JuliaLowering/test/scopes_ir.jl index 5a8a5cec9e3f3..9b4e69f012a98 100644 --- a/JuliaLowering/test/scopes_ir.jl +++ b/JuliaLowering/test/scopes_ir.jl @@ -2,6 +2,111 @@ using JuliaLowering: @islocal using Base: @locals #******************************************************************************* +######################################## +# let syntax with decl in binding list +let x::T = rhs + local T = 1 + T # <- This is a different `T` from the T in `x::T` +end +#--------------------- +1 TestMod.rhs +2 TestMod.T +3 (newvar slot₁/T) +4 (= slot₃/tmp %₁) +5 slot₃/tmp +6 (call core.isa %₅ %₂) +7 (gotoifnot %₆ label₉) +8 (goto label₁₂) +9 slot₃/tmp +10 (call top.convert %₂ %₉) +11 (= slot₃/tmp (call core.typeassert %₁₀ %₂)) +12 slot₃/tmp +13 (= slot₂/x %₁₂) +14 (= slot₁/T 1) +15 slot₁/T +16 (return %₁₅) + +######################################## +# let syntax with tuple on lhs +let (x,y) = rhs +end +#--------------------- +1 TestMod.rhs +2 (call top.indexed_iterate %₁ 1) +3 (= slot₂/x (call core.getfield %₂ 1)) +4 (= slot₁/iterstate (call core.getfield %₂ 2)) +5 slot₁/iterstate +6 (call top.indexed_iterate %₁ 2 %₅) +7 (= slot₃/y (call core.getfield %₆ 1)) +8 (return core.nothing) + +######################################## +# Let syntax with the same name creates nested bindings +let x = f(x), x = g(x) +end +#--------------------- +1 TestMod.f +2 TestMod.x +3 (call %₁ %₂) +4 (= slot₁/x %₃) +5 TestMod.g +6 slot₁/x +7 (call %₅ %₆) +8 (= slot₂/x %₇) +9 (return core.nothing) + +######################################## +# let syntax with a function definition in the binding list creates a closure +let f() = body +end +#--------------------- +1 (call core.svec) +2 (call core.svec) +3 (call JuliaLowering.eval_closure_type TestMod :#f##0 %₁ %₂) +4 TestMod.#f##0 +5 (call core.svec %₄) +6 (call core.svec) +7 SourceLocation::1:5 +8 (call core.svec %₅ %₆ %₇) +9 --- method core.nothing %₈ + slots: [slot₁/#self#(!read)] + 1 TestMod.body + 2 (return %₁) +10 TestMod.#f##0 +11 (new %₁₀) +12 (= slot₁/f %₁₁) +13 (return core.nothing) + +######################################## +# Error: Invalid `let` var with K"::" +let f[]::T = rhs +end +#--------------------- +LoweringError: +let f[]::T = rhs +# └─┘ ── Invalid assignment location in let syntax +end + +######################################## +# Error: Invalid `let` var +let f[] = rhs +end +#--------------------- +LoweringError: +let f[] = rhs +# └─┘ ── Invalid assignment location in let syntax +end + +######################################## +# Error: Invalid function def in `let` +let (obj::Callable)() = rhs +end +#--------------------- +LoweringError: +let (obj::Callable)() = rhs +# └───────────────┘ ── Function signature does not define a local function name +end + ######################################## # @islocal with locals and undefined vars let x = 1 From 4390927bddd63b043705b6243e65812874818c26 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Thu, 30 Jan 2025 23:27:35 +1000 Subject: [PATCH 0977/1109] Fix to allow named tuple unpacking in let syntax left hand side --- JuliaLowering/src/desugaring.jl | 13 ++++++----- JuliaLowering/test/loops.jl | 19 ++++++++++++--- JuliaLowering/test/loops_ir.jl | 41 +++++++++++++++++++++++++++++++++ JuliaLowering/test/scopes_ir.jl | 10 ++++++++ 4 files changed, 74 insertions(+), 9 deletions(-) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 7948374dcf762..2b2668e8c547c 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -1843,19 +1843,21 @@ end #------------------------------------------------------------------------------- # Expand for loops +# Extract the variable names assigned to from a "fancy assignment left hand +# side" such as nested tuple destructuring. function foreach_lhs_var(f::Function, ex) k = kind(ex) if k == K"Identifier" || k == K"BindingId" f(ex) - elseif k == K"Placeholder" - # Ignored - elseif k == K"tuple" + elseif k == K"::" && numchildren(ex) == 2 + foreach_lhs_var(f, ex[1]) + elseif k == K"tuple" || k == K"parameters" for e in children(ex) foreach_lhs_var(f, e) end - else - TODO(ex, "LHS vars") end + # k == K"Placeholder" ignored, along with everything else - we assume + # validation is done elsewhere. end function expand_for(ctx, ex) @@ -1874,7 +1876,6 @@ function expand_for(ctx, ex) lhs = iterspec[1] if kind(lhs) != K"outer" foreach_lhs_var(lhs) do var - @chk kind(var) == K"Identifier" push!(copied_vars, @ast ctx var [K"=" var var]) end end diff --git a/JuliaLowering/test/loops.jl b/JuliaLowering/test/loops.jl index efa9a9b270b80..24af5ba1070e4 100644 --- a/JuliaLowering/test/loops.jl +++ b/JuliaLowering/test/loops.jl @@ -45,11 +45,8 @@ let end """) == [2,4] -# TODO: Test soft scope rules - end - @testset "for loops" begin test_mod = Module() @@ -144,6 +141,19 @@ let end """) == 2 +# Fancy for loop left hand side - unpacking and scoping +@test JuliaLowering.include_string(test_mod, """ +let + a = [] + i = 100 + j = 200 + for (i,j) in [('a', 'b'), (1,2)] + push!(a, (i,j)) + end + (a, i, j) +end +""") == ([('a', 'b'), (1,2)], 100, 200) + end @@ -162,6 +172,7 @@ end """) == [(1,3), (1,4), (2,3), (2,4)] @testset "break/continue" begin + @test JuliaLowering.include_string(test_mod, """ let a = [] @@ -198,6 +209,8 @@ let a end """) == [(1,2), (1,4), (2,2), (2,4)] + + end diff --git a/JuliaLowering/test/loops_ir.jl b/JuliaLowering/test/loops_ir.jl index 6abaa48f90913..3ed96c386456a 100644 --- a/JuliaLowering/test/loops_ir.jl +++ b/JuliaLowering/test/loops_ir.jl @@ -72,6 +72,47 @@ end 16 (goto label₇) 17 (return core.nothing) +######################################## +# Syntax sugar for nested for loop +for x in xs, y in ys + x = 10 # Copy of x; does not overwrite x iteration var +end +#--------------------- +1 TestMod.xs +2 (= slot₂/next (call top.iterate %₁)) +3 slot₂/next +4 (call core.=== %₃ core.nothing) +5 (call top.not_int %₄) +6 (gotoifnot %₅ label₃₄) +7 slot₂/next +8 (= slot₃/x (call core.getfield %₇ 1)) +9 (call core.getfield %₇ 2) +10 TestMod.ys +11 (= slot₁/next (call top.iterate %₁₀)) +12 slot₁/next +13 (call core.=== %₁₂ core.nothing) +14 (call top.not_int %₁₃) +15 (gotoifnot %₁₄ label₂₈) +16 slot₃/x +17 (= slot₄/x %₁₆) +18 slot₁/next +19 (= slot₅/y (call core.getfield %₁₈ 1)) +20 (call core.getfield %₁₈ 2) +21 (= slot₄/x 10) +22 (= slot₁/next (call top.iterate %₁₀ %₂₀)) +23 slot₁/next +24 (call core.=== %₂₃ core.nothing) +25 (call top.not_int %₂₄) +26 (gotoifnot %₂₅ label₂₈) +27 (goto label₁₆) +28 (= slot₂/next (call top.iterate %₁ %₉)) +29 slot₂/next +30 (call core.=== %₂₉ core.nothing) +31 (call top.not_int %₃₀) +32 (gotoifnot %₃₁ label₃₄) +33 (goto label₇) +34 (return core.nothing) + ######################################## # Error: break outside for/while break diff --git a/JuliaLowering/test/scopes_ir.jl b/JuliaLowering/test/scopes_ir.jl index 9b4e69f012a98..2c3d277fc3bec 100644 --- a/JuliaLowering/test/scopes_ir.jl +++ b/JuliaLowering/test/scopes_ir.jl @@ -40,6 +40,16 @@ end 7 (= slot₃/y (call core.getfield %₆ 1)) 8 (return core.nothing) +######################################## +# let syntax with named tuple on lhs creates locals for the unpacked vars +let (; x,y) = rhs +end +#--------------------- +1 TestMod.rhs +2 (= slot₁/x (call top.getproperty %₁ :x)) +3 (= slot₂/y (call top.getproperty %₁ :y)) +4 (return core.nothing) + ######################################## # Let syntax with the same name creates nested bindings let x = f(x), x = g(x) From 4c76d88a2ff5d139a5aceabf972b9a936aaeda37 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Fri, 31 Jan 2025 11:35:19 +1000 Subject: [PATCH 0978/1109] Add error for global methods inside function scope --- JuliaLowering/src/scope_analysis.jl | 16 ++++++++++++--- JuliaLowering/test/closures_ir.jl | 14 -------------- JuliaLowering/test/scopes_ir.jl | 30 +++++++++++++++++++++++++++++ 3 files changed, 43 insertions(+), 17 deletions(-) diff --git a/JuliaLowering/src/scope_analysis.jl b/JuliaLowering/src/scope_analysis.jl index 6c77659a2f7b6..0be080bdeb9ca 100644 --- a/JuliaLowering/src/scope_analysis.jl +++ b/JuliaLowering/src/scope_analysis.jl @@ -539,6 +539,19 @@ function _resolve_scopes(ctx, ex::SyntaxTree) throw(LoweringError(ex, "Unknown syntax assertion")) end makeleaf(ctx, ex, K"TOMBSTONE") + elseif k == K"function_decl" + resolved = mapchildren(e->_resolve_scopes(ctx, e), ctx, ex) + name = resolved[1] + if kind(name) == K"BindingId" + bk = lookup_binding(ctx, name).kind + if bk == :argument + throw(LoweringError(name, "Cannot add method to a function argument")) + elseif bk == :global && !ctx.scope_stack[end].in_toplevel_thunk + throw(LoweringError(name, + "Global method definition needs to be placed at the top level, or use `eval()`")) + end + end + resolved elseif k == K"const_if_global" id = _resolve_scopes(ctx, ex[1]) if lookup_binding(ctx, id).kind == :global @@ -630,9 +643,6 @@ function analyze_variables!(ctx, ex) analyze_variables!(ctx, ex[2]) elseif k == K"function_decl" name = ex[1] - if kind(name) == K"BindingId" && lookup_binding(ctx, name).kind == :argument - throw(LoweringError(name, "Cannot add method to a function argument")) - end if lookup_binding(ctx, name.var_id).kind === :local init_closure_bindings!(ctx, name) end diff --git a/JuliaLowering/test/closures_ir.jl b/JuliaLowering/test/closures_ir.jl index 0f1d1ebb74538..b2ab36ca0afb4 100644 --- a/JuliaLowering/test/closures_ir.jl +++ b/JuliaLowering/test/closures_ir.jl @@ -422,20 +422,6 @@ end 18 (call core.kwcall %₅ %₁ %₁₆ %₁₇) 19 (return %₁₈) -######################################## -# Error: Attempt to add methods to a function argument -function f(g) - function g() - end -end -#--------------------- -LoweringError: -function f(g) - function g() -# ╙ ── Cannot add method to a function argument - end -end - ######################################## # Error: Static parameter clashing with closure name function f(::g) where {g} diff --git a/JuliaLowering/test/scopes_ir.jl b/JuliaLowering/test/scopes_ir.jl index 2c3d277fc3bec..a39211e2f8a67 100644 --- a/JuliaLowering/test/scopes_ir.jl +++ b/JuliaLowering/test/scopes_ir.jl @@ -379,6 +379,36 @@ function f(::T) where T end end +######################################## +# Error: Attempt to add methods to a function argument +function f(g) + function g() + end +end +#--------------------- +LoweringError: +function f(g) + function g() +# ╙ ── Cannot add method to a function argument + end +end + +######################################## +# Error: Global method definition inside function scope +function f() + global global_method + function global_method() + end +end +#--------------------- +LoweringError: +function f() + global global_method + function global_method() +# └───────────┘ ── Global method definition needs to be placed at the top level, or use `eval()` + end +end + ######################################## # @isdefined with defined variables let x = 1 From d9ea78d3bd6d026d36620e96108b75042da70ebf Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Fri, 31 Jan 2025 11:58:53 +1000 Subject: [PATCH 0979/1109] Desugaring of export/public --- JuliaLowering/src/desugaring.jl | 12 +++++++++++- JuliaLowering/src/runtime.jl | 6 ++++++ JuliaLowering/test/import.jl | 11 +++++++++++ JuliaLowering/test/import_ir.jl | 14 ++++++++++++++ 4 files changed, 42 insertions(+), 1 deletion(-) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 2b2668e8c547c..b85bab1947b7e 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -3908,6 +3908,16 @@ function expand_import(ctx, ex) ] end +# Expand `public` or `export` +function expand_public(ctx, ex) + @ast ctx ex [K"call" + module_public::K"Value" + ctx.mod::K"Value" + (kind(ex) == K"export")::K"Bool" + (e.name_val::K"String" for e in children(ex))... + ] +end + #------------------------------------------------------------------------------- # Expand module definitions @@ -4148,7 +4158,7 @@ function expand_forms_2(ctx::DesugaringContext, ex::SyntaxTree, docs=nothing) elseif k == K"import" || k == K"using" expand_import(ctx, ex) elseif k == K"export" || k == K"public" - TODO(ex) + expand_public(ctx, ex) elseif k == K"abstract" || k == K"primitive" expand_forms_2(ctx, expand_abstract_or_primitive_type(ctx, ex)) elseif k == K"struct" diff --git a/JuliaLowering/src/runtime.jl b/JuliaLowering/src/runtime.jl index 8a4cd02bce2ae..6c1424945353f 100644 --- a/JuliaLowering/src/runtime.jl +++ b/JuliaLowering/src/runtime.jl @@ -193,6 +193,12 @@ function module_import(into_mod::Module, is_using::Bool, nothing end +function module_public(mod::Module, is_exported::Bool, identifiers...) + for ident in identifiers + @ccall jl_module_public(mod::Module, Symbol(ident)::Symbol, is_exported::Cint)::Cvoid + end +end + # Return the current exception. In JuliaLowering we use this rather than the # special form `K"the_exception"` to reduces the number of special forms. Base.@assume_effects :removable :nothrow function current_exception() diff --git a/JuliaLowering/test/import.jl b/JuliaLowering/test/import.jl index 5098c3aa308d6..20055174f2b8a 100644 --- a/JuliaLowering/test/import.jl +++ b/JuliaLowering/test/import.jl @@ -14,6 +14,17 @@ JuliaLowering.include_string(test_mod, """ @test test_mod.st2 === JuliaLowering.SyntaxTree @test test_mod.parsestmt === JuliaSyntax.parsestmt +JuliaLowering.include_string(test_mod, """ +x = 1 +y = 2 +export x +public y +""") +@test Base.isexported(test_mod, :x) +@test Base.ispublic(test_mod, :x) +@test Base.ispublic(test_mod, :y) +@test !Base.isexported(test_mod, :y) + C = JuliaLowering.include_string(test_mod, """ module C module D diff --git a/JuliaLowering/test/import_ir.jl b/JuliaLowering/test/import_ir.jl index 30bfc7f751162..ef95a431e7a98 100644 --- a/JuliaLowering/test/import_ir.jl +++ b/JuliaLowering/test/import_ir.jl @@ -45,3 +45,17 @@ function f() # └─────────┘ ── this syntax is only allowed in top level code end +######################################## +# Export +export a, b, c +#--------------------- +1 (call JuliaLowering.module_public TestMod true "a" "b" "c") +2 (return %₁) + +######################################## +# Public +public a, b, c +#--------------------- +1 (call JuliaLowering.module_public TestMod false "a" "b" "c") +2 (return %₁) + From 04ef4c1415d2536bdcacb2ba6ba25770c3e45a97 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sat, 1 Feb 2025 11:35:11 +1000 Subject: [PATCH 0980/1109] Fix desugaring of `return` without argument flisp lowering deals with this in the parser by inserting the `nothing` there. JuliaSyntax avoids this in order to more faithfully represent the source, so we need to deal with it in lowering instead. --- JuliaLowering/src/desugaring.jl | 8 ++++++++ JuliaLowering/test/functions_ir.jl | 22 ++++++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index b85bab1947b7e..059a1d3f1e391 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -4236,6 +4236,14 @@ function expand_forms_2(ctx::DesugaringContext, ex::SyntaxTree, docs=nothing) throw(LoweringError(ex, "`...` expression outside call")) elseif is_leaf(ex) ex + elseif k == K"return" + if numchildren(ex) == 0 + @ast ctx ex [K"return" "nothing"::K"core"] + elseif numchildren(ex) == 1 + mapchildren(e->expand_forms_2(ctx,e), ctx, ex) + else + throw(LoweringError(ex, "More than one argument to return")) + end else mapchildren(e->expand_forms_2(ctx,e), ctx, ex) end diff --git a/JuliaLowering/test/functions_ir.jl b/JuliaLowering/test/functions_ir.jl index 0fd9f837d226c..feb9bf491b0d6 100644 --- a/JuliaLowering/test/functions_ir.jl +++ b/JuliaLowering/test/functions_ir.jl @@ -896,6 +896,28 @@ end 9 TestMod.f 10 (return %₉) +######################################## +# Function return without arguments +function f() + return + after_return # <- distinguish output from implicit return +end +#--------------------- +1 (method TestMod.f) +2 TestMod.f +3 (call core.Typeof %₂) +4 (call core.svec %₃) +5 (call core.svec) +6 SourceLocation::1:10 +7 (call core.svec %₄ %₅ %₆) +8 --- method core.nothing %₇ + slots: [slot₁/#self#(!read)] + 1 (return core.nothing) + 2 TestMod.after_return + 3 (return %₂) +9 TestMod.f +10 (return %₉) + ######################################## # Binding docs to functions """ From f8bb72bdbf3cd189f60af1ecfc85cab1da9a5234 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sat, 1 Feb 2025 15:57:22 +1000 Subject: [PATCH 0981/1109] Desugaring and runtime support for generated functions I've opted to diverge from Base's lowering of `@ generated` code generators to add a semi-hidden `__context__` argument to the lowered code generator function in exact analogy to the context added to macros. This makes the code generator even more macro-like, and I've also reused the MacroExpansionContext. We need our own `GeneratedFunctionStub` here in order to construct the context, to call back into JuliaLowering's version of the lowering machinery in a precise way, and to propagate enhanced provenance information. This all turned out to be quite subtle, but JuliaLowering is able to almost fully integrate with the runtime without any changes to the runtime itself. Thus we can see the effort people have put into building abstractions for Cassette/etc has really paid off. --- JuliaLowering/README.md | 63 +++++++++++++ JuliaLowering/src/desugaring.jl | 127 ++++++++++++++++++++++++++- JuliaLowering/src/eval.jl | 13 +-- JuliaLowering/src/kinds.jl | 4 + JuliaLowering/src/linear_ir.jl | 30 ++++--- JuliaLowering/src/macro_expansion.jl | 2 +- JuliaLowering/src/runtime.jl | 106 +++++++++++++++++++++- JuliaLowering/src/scope_analysis.jl | 21 +++-- JuliaLowering/test/demo.jl | 40 +++++++-- JuliaLowering/test/functions.jl | 33 +++++++ JuliaLowering/test/functions_ir.jl | 81 +++++++++++++++++ 11 files changed, 485 insertions(+), 35 deletions(-) diff --git a/JuliaLowering/README.md b/JuliaLowering/README.md index e666104e43bfb..1f277257ed08e 100644 --- a/JuliaLowering/README.md +++ b/JuliaLowering/README.md @@ -398,6 +398,69 @@ In total, this expands a single "function definition" into seven methods. Note that the above is only a sketch! There's more fiddly details when `where` syntax comes in +### Desugaring of generated functions + +A brief description of how this works. Let's consider the generated function + +```julia +function gen(x::NTuple{N}, y) where {N,T} + shared = :shared + # Unnecessary use of @generated, but it shows what's going on. + if @generated + quote + maybe_gen = ($x, $N) + end + else + maybe_gen = (typeof(x), N) + end + (shared, maybe_gen) +end +``` + +This is desugared into the following two function definitions. First, a code +generator which will generate code for the body of the function, given the +static parameters `N`, `T` and the positional arguments `x`, `y`. +(`var"#self#"::Type{typeof(gen)}` is also provided by the Julia runtime to +complete the full signature of `gen`, though the user won't normally use this.) + +```julia +function var"#gen@generator#0"(__context__::JuilaSyntax.MacroContext, N, T, var"#self#", x, y) + gen_stuff = quote + maybe_gen = ($x, $N) + end + quote + shared = :shared + $gen_stuff + (shared, maybe_gen) + end +end +``` + +Second, the non-generated version, using the `if @generated` else branches, and +containing mostly normal code. + +```julia +function gen(x::NTuple{N}, y) where {N,T} + $(Expr(:meta, :generated, + Expr(:call, JuliaLowering.GeneratedFunctionStub, + :var"#gen@generator#0", sourceref_of_gen, + :(Core.svec(:var"#self", :x, :y)) + :(Core.svec(:N, :T))))) + shared = :shared + maybe_gen = (typeof(x), N) + (shared, maybe_gen) +end +``` + +The one extra thing added here is the `Expr(:meta, :generated)` which is an +expression creating a callable wrapper for the user's generator, to be +evaluated at top level. This wrapper will then be invoked by the runtime +whenever the user calls `gen` with a new signature and it's expected that a +`CodeInfo` be returned from it. `JuliaLowering.GeneratedFunctionStub` differs +from `Core.GeneratedFunctionStub` in that it contains extra provenance +information (the `sourcref_of_gen`) and expects a `SyntaxTree` to be returned +by the user's generator code. + ## Pass 3: Scope analysis / binding resolution This pass replaces variables with bindings of kind `K"BindingId"`, diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 059a1d3f1e391..5e1e5f09c36ed 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -2213,7 +2213,7 @@ function method_def_expr(ctx, srcref, callex_srcref, method_table, ::K"SourceLocation"(callex_srcref) ] [K"method" - method_table + isnothing(method_table) ? "nothing"::K"core" : method_table method_metadata [K"lambda"(body, is_toplevel_thunk=false) [K"block" arg_names...] @@ -2285,6 +2285,117 @@ function trim_used_typevars(ctx, arg_types, typevar_names, typevar_stmts) return trimmed_typevar_names end +function is_if_generated(ex) + kind(ex) == K"if" && kind(ex[1]) == K"generated" +end + +# Return true if a function body contains a code generator from `@generated` in +# the form `[K"if" [K"generated"] ...]` +function is_generated(ex) + if is_if_generated(ex) + return true + elseif is_quoted(ex) || kind(ex) == K"function" + return false + else + return any(is_generated, children(ex)) + end +end + +function split_generated(ctx, ex, gen_part) + if is_leaf(ex) + ex + elseif is_if_generated(ex) + gen_part ? @ast(ctx, ex, [K"$" ex[2]]) : ex[3] + else + mapchildren(e->split_generated(ctx, e, gen_part), ctx, ex) + end +end + +# Split @generated function body into two parts: +# * The code generator +# * The non-generated function body +function expand_function_generator(ctx, srcref, callex_srcref, func_name, func_name_str, body, arg_names, typevar_names) + gen_body = if is_if_generated(body) + body[2] # Simple case - don't need interpolation when the whole body is generated + else + expand_quote(ctx, @ast ctx body [K"block" split_generated(ctx, body, true)]) + end + gen_name_str = reserve_module_binding_i(ctx.mod, + "#$(isnothing(func_name_str) ? "_" : func_name_str)@generator#") + gen_name = new_global_binding(ctx, body, gen_name_str, ctx.mod) + + # Set up the arguments for the code generator + gen_arg_names = SyntaxList(ctx) + gen_arg_types = SyntaxList(ctx) + # Self arg + push!(gen_arg_names, new_local_binding(ctx, callex_srcref, "#self#"; kind=:argument)) + push!(gen_arg_types, @ast ctx callex_srcref [K"function_type" gen_name]) + # Macro expansion context arg + if kind(func_name) != K"Identifier" + TODO(func_name, "Which scope do we adopt for @generated generator `__context__` in this case?") + end + push!(gen_arg_names, adopt_scope(@ast(ctx, callex_srcref, "__context__"::K"Identifier"), func_name)) + push!(gen_arg_types, @ast(ctx, callex_srcref, MacroContext::K"Value")) + # Trailing arguments to the generator are provided by the Julia runtime. They are: + # static_parameters... parent_function arg_types... + first_trailing_arg = length(gen_arg_names) + 1 + append!(gen_arg_names, typevar_names) + append!(gen_arg_names, arg_names) + # Apply nospecialize to all arguments to prevent so much codegen and add + # Core.Any type for them + for i in first_trailing_arg:length(gen_arg_names) + gen_arg_names[i] = setmeta(gen_arg_names[i]; nospecialize=true) + push!(gen_arg_types, @ast ctx gen_arg_names[i] "Any"::K"core") + end + # Code generator definition + gen_func_method_defs = @ast ctx srcref [K"method_defs" + gen_name + method_def_expr(ctx, srcref, callex_srcref, nothing, SyntaxList(ctx), gen_arg_names, + gen_arg_types, gen_body, nothing) + ] + + # Extract non-generated body + nongen_body = @ast ctx body [K"block" + # The Julia runtime associates the code generator with the + # non-generated method by adding this meta to the body. This feels like + # a hack though since the generator ultimately gets attached to the + # method rather than the CodeInfo which we're putting it inside. + [K"meta" + "generated"::K"Symbol" + # The following is code to be evaluated at top level and will wrap + # whatever code comes from the user's generator into an appropriate + # K"lambda" (+ K"with_static_parameters") suitable for lowering + # into a CodeInfo. + # + # todo: As isolated top-level code, we don't actually want to apply + # the normal scope rules of the surrounding function ... it should + # technically have scope resolved at top level. + [K"new" + GeneratedFunctionStub::K"Value" # Use stub type from JuliaLowering + gen_name + # Truncate provenance to just the source file range, as this + # will live permanently in the IR and we probably don't want + # the full provenance tree and intermediate expressions + # (TODO: More truncation. We certainly don't want to store the + # source file either.) + sourceref(srcref)::K"Value" + [K"call" + "svec"::K"core" + "#self#"::K"Symbol" + (n.name_val::K"Symbol"(n) for n in arg_names[2:end])... + ] + [K"call" + "svec"::K"core" + (n.name_val::K"Symbol"(n) for n in typevar_names)... + ] + ] + ] + split_generated(ctx, body, false) + ] + + return gen_name, gen_func_method_defs, nongen_body +end + # Generate a method for every number of allowed optional arguments # For example for `f(x, y=1, z=2)` we generate two additional methods # f(x) = f(x, 1, 2) @@ -2799,6 +2910,14 @@ function expand_function_def(ctx, ex, docs, rewrite_call=identity, rewrite_body= ] end + gen_func_name = nothing + gen_func_method_defs = nothing + if is_generated(body) + gen_func_name, gen_func_method_defs, body = + expand_function_generator(ctx, ex, callex, name, name_str, body, arg_names, typevar_names) + + end + if isnothing(keywords) body_func_name, kw_func_method_defs = (nothing, nothing) # NB: This check seems good as it statically catches any useless @@ -2848,6 +2967,9 @@ function expand_function_def(ctx, ex, docs, rewrite_call=identity, rewrite_body= end @ast ctx ex [K"block" + if !isnothing(gen_func_name) + [K"function_decl"(gen_func_name) gen_func_name] + end if !isnothing(body_func_name) [K"function_decl"(body_func_name) body_func_name] end @@ -2857,6 +2979,7 @@ function expand_function_def(ctx, ex, docs, rewrite_call=identity, rewrite_body= [K"scope_block"(scope_type=:hard) [K"block" new_typevar_stmts... + gen_func_method_defs kw_func_method_defs [K"method_defs" isnothing(bare_func_name) ? "nothing"::K"core" : bare_func_name @@ -3651,7 +3774,7 @@ function expand_struct_def(ctx, ex, docs) typevar_in_bounds = any(type_params[i+1:end]) do param # Check the bounds of subsequent type params (_,lb,ub) = analyze_typevar(ctx, param) - # TODO: flisp lowering tests `lb` here so we also do. But + # todo: flisp lowering tests `lb` here so we also do. But # in practice this doesn't seem to constrain `typevar_name` # and the generated constructor doesn't work? (!isnothing(ub) && contains_identifier(ub, typevar_name)) || diff --git a/JuliaLowering/src/eval.jl b/JuliaLowering/src/eval.jl index db76d7377cc60..a007985d4dd5f 100644 --- a/JuliaLowering/src/eval.jl +++ b/JuliaLowering/src/eval.jl @@ -230,12 +230,8 @@ function to_lowered_expr(mod, ex, ssa_offset=0) Core.SSAValue(ex.var_id + ssa_offset) elseif k == K"return" Core.ReturnNode(to_lowered_expr(mod, ex[1], ssa_offset)) - elseif is_quoted(k) - if k == K"inert" - ex[1] - else - TODO(ex, "Convert SyntaxTree to Expr") - end + elseif k == K"inert" + ex[1] elseif k == K"code_info" funcname = ex.is_toplevel_thunk ? "top-level scope" : @@ -269,6 +265,11 @@ function to_lowered_expr(mod, ex, ssa_offset=0) # TODO: put allow_partial back in once we update to the latest julia splice!(args, 4) # allow_partial Expr(:new_opaque_closure, args...) + elseif k == K"meta" + args = Any[to_lowered_expr(mod, e, ssa_offset) for e in children(ex)] + # Unpack K"Symbol" QuoteNode as `Expr(:meta)` requires an identifier here. + args[1] = args[1].value + Expr(:meta, args...) else # Allowed forms according to https://docs.julialang.org/en/v1/devdocs/ast/ # diff --git a/JuliaLowering/src/kinds.jl b/JuliaLowering/src/kinds.jl index 93a2a2fb0145e..68f20e80c20ad 100644 --- a/JuliaLowering/src/kinds.jl +++ b/JuliaLowering/src/kinds.jl @@ -9,6 +9,8 @@ function _register_kinds() "BEGIN_EXTENSION_KINDS" # atomic fields or accesses (see `@atomic`) "atomic" + # Flag for @generated parts of a functon + "generated" # Temporary rooting of identifiers (GC.@preserve) "gc_preserve_begin" "gc_preserve_end" @@ -46,6 +48,8 @@ function _register_kinds() # Catch-all for additional syntax extensions without the need to # extend `Kind`. Known extensions include: # locals, islocal + # The content of an assertion is not considered to be quoted, so + # use K"Symbol" or K"inert" inside where necessary. "extension" "END_EXTENSION_KINDS" diff --git a/JuliaLowering/src/linear_ir.jl b/JuliaLowering/src/linear_ir.jl index 3b9ba96413b53..191410e528af8 100644 --- a/JuliaLowering/src/linear_ir.jl +++ b/JuliaLowering/src/linear_ir.jl @@ -796,11 +796,22 @@ function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) end elseif k == K"gc_preserve_begin" makenode(ctx, ex, k, compile_args(ctx, children(ex))) - elseif k == K"gc_preserve_end" + elseif k == K"gc_preserve_end" || k == K"global" || k == K"const" if needs_value - throw(LoweringError(ex, "misplaced label in value position")) + throw(LoweringError(ex, "misplaced kind $k in value position")) end emit(ctx, ex) + nothing + elseif k == K"meta" + emit(ctx, ex) + if needs_value + val = @ast ctx ex "nothing"::K"core" + if in_tail_pos + emit_return(ctx, val) + else + val + end + end elseif k == K"_while" end_label = make_label(ctx, ex) top_label = emit_label(ctx, ex) @@ -821,12 +832,6 @@ function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) if needs_value compile(ctx, nothing_(ctx, ex), needs_value, in_tail_pos) end - elseif k == K"global" || k == K"const" - if needs_value - throw(LoweringError(ex, "misplaced declaration")) - end - emit(ctx, ex) - nothing elseif k == K"isdefined" || k == K"captured_local" || k == K"throw_undef_if_not" || k == K"boundscheck" if in_tail_pos @@ -957,7 +962,12 @@ function _renumber(ctx, ssa_rewrites, slot_rewrites, label_table, ex) end end elseif k == K"meta" - TODO(ex, "_renumber $k") + # Somewhat-hack for Expr(:meta, :generated, gen) which has + # weird top-level semantics for `gen`, but we still need to translate + # the binding it contains to a globalref. + mapchildren(ctx, ex) do e + _renumber(ctx, ssa_rewrites, slot_rewrites, label_table, e) + end elseif is_literal(k) || is_quoted(k) ex elseif k == K"label" @@ -968,8 +978,6 @@ function _renumber(ctx, ssa_rewrites, slot_rewrites, label_table, ex) mapchildren(ctx, ex) do e _renumber(ctx, ssa_rewrites, slot_rewrites, label_table, e) end - # TODO: foreigncall error check: - # "ccall function name and library expression cannot reference local variables" end end diff --git a/JuliaLowering/src/macro_expansion.jl b/JuliaLowering/src/macro_expansion.jl index 3ceda2151ff3f..4f9fc9095fede 100644 --- a/JuliaLowering/src/macro_expansion.jl +++ b/JuliaLowering/src/macro_expansion.jl @@ -66,7 +66,7 @@ end #-------------------------------------------------- struct MacroContext <: AbstractLoweringContext graph::SyntaxGraph - macrocall::SyntaxTree + macrocall::Union{SyntaxTree,LineNumberNode,SourceRef} scope_layer::ScopeLayer end diff --git a/JuliaLowering/src/runtime.jl b/JuliaLowering/src/runtime.jl index 6c1424945353f..243b1e2ba2798 100644 --- a/JuliaLowering/src/runtime.jl +++ b/JuliaLowering/src/runtime.jl @@ -67,7 +67,7 @@ function interpolate_ast(ex, values...) # hacky though. # # Perhaps we should use a ScopedValue for this instead or get it from - # the macro __context__? Nothing feels great here. + # the macro __context__? None of the options feel great here. graph = nothing for vals in values for v in vals @@ -255,6 +255,91 @@ function bind_docs!(type::Type, docstr, lineno::LineNumberNode; field_docs=Core. Docs.doc!(mod, bind, Base.Docs.docstr(docstr, metadata), Union{}) end +# An alternative to Core.GeneratedFunctionStub which works on SyntaxTree rather +# than Expr. +struct GeneratedFunctionStub + gen + srcref + argnames::Core.SimpleVector + spnames::Core.SimpleVector +end + +# Call the `@generated` code generator function and wrap the results of the +# expression into a CodeInfo. +# +# `args` passed into stub by the Julia runtime are (parent_func, static_params..., arg_types...) +function (g::GeneratedFunctionStub)(world::UInt, source::LineNumberNode, @nospecialize args...) + # Some of the lowering pipeline from lower() and the pass-specific setup is + # re-implemented here because generated functions are very much (but not + # entirely) like macro expansion. + # + # TODO: Reduce duplication where possible. + + mod = parentmodule(g.gen) + + # Attributes from parsing + graph = ensure_attributes(SyntaxGraph(), kind=Kind, syntax_flags=UInt16, source=SourceAttrType, + value=Any, name_val=String) + + # Attributes for macro expansion + graph = ensure_attributes(graph, + var_id=IdTag, + scope_layer=LayerId, + __macro_ctx__=Nothing, + meta=CompileHints, + # Additional attribute for resolve_scopes, for + # adding our custom lambda below + is_toplevel_thunk=Bool + ) + + # Macro expansion + layers = ScopeLayer[ScopeLayer(1, mod, false)] + ctx1 = MacroExpansionContext(graph, Bindings(), layers, layers[1]) + + # Run code generator - this acts like a macro expander and like a macro + # expander it gets a MacroContext. + mctx = MacroContext(syntax_graph(ctx1), g.srcref, layers[1]) + ex0 = g.gen(mctx, args...) + if ex0 isa SyntaxTree + if !is_compatible_graph(ctx1, ex0) + # If the macro has produced syntax outside the macro context, copy it over. + # TODO: Do we expect this always to happen? What is the API for access + # to the macro expansion context? + ex0 = copy_ast(ctx1, ex0) + end + else + ex0 = @ast ctx ex expanded::K"Value" + end + # Expand any macros emitted by the generator + ex1 = expand_forms_1(ctx1, reparent(ctx1, ex0)) + ctx1 = MacroExpansionContext(delete_attributes(graph, :__macro_ctx__), + ctx1.bindings, ctx1.scope_layers, ctx1.current_layer) + ex1 = reparent(ctx1, ex1) + + # Desugaring + ctx2, ex2 = expand_forms_2( ctx1, ex1) + + # Wrap expansion in a non-toplevel lambda and run scope resolution + ex2 = @ast ctx2 source [K"lambda"(is_toplevel_thunk=false) + [K"block" + (string(n)::K"Identifier" for n in g.argnames)... + ] + [K"block" + (string(n)::K"Identifier" for n in g.spnames)... + ] + ex2 + ] + ctx3, ex3 = resolve_scopes( ctx2, ex2) + + + # Rest of lowering + ctx4, ex4 = convert_closures(ctx3, ex3) + ctx5, ex5 = linearize_ir( ctx4, ex4) + ci = to_lowered_expr(mod, ex5) + @assert ci isa Core.CodeInfo + return ci +end + #------------------------------------------------------------------------------- # The following functions are used by lowering to inspect Julia's state. @@ -396,6 +481,25 @@ function Base.var"@isdefined"(__context__::MacroContext, ex) @ast __context__ __context__.macrocall [K"isdefined" ex] end +function Base.var"@generated"(__context__::MacroContext) + @ast __context__ __context__.macrocall [K"generated"] +end +function Base.var"@generated"(__context__::MacroContext, ex) + if kind(ex) != K"function" + throw(LoweringError(ex, "Expected a function argument to `@generated`")) + end + @ast __context__ __context__.macrocall [K"function" + ex[1] + [K"if" [K"generated"] + ex[2] + [K"block" + [K"meta" "generated_only"::K"Symbol"] + [K"return"] + ] + ] + ] +end + # The following `@islocal` and `@inert` are macros for special syntax known to # lowering which don't exist in Base but arguably should. # diff --git a/JuliaLowering/src/scope_analysis.jl b/JuliaLowering/src/scope_analysis.jl index 0be080bdeb9ca..5e18d9dc154c8 100644 --- a/JuliaLowering/src/scope_analysis.jl +++ b/JuliaLowering/src/scope_analysis.jl @@ -56,7 +56,8 @@ function _find_scope_vars!(ctx, assignments, locals, destructured_args, globals, if kv == K"Identifier" _insert_if_not_present!(assignments, NameKey(v), v) elseif kv == K"BindingId" - if !lookup_binding(ctx, v).is_ssa + binfo = lookup_binding(ctx, v) + if !binfo.is_ssa && binfo.kind != :global TODO(v, "BindingId as function name") end else @@ -697,7 +698,7 @@ function analyze_variables!(ctx, ex) pop!(ctx.method_def_stack) elseif k == K"lambda" lambda_bindings = ex.lambda_bindings - if !ex.is_toplevel_thunk + if !ex.is_toplevel_thunk && !isempty(ctx.method_def_stack) # Record all lambdas for the same closure type in one place func_name = last(ctx.method_def_stack) if kind(func_name) == K"BindingId" @@ -718,12 +719,16 @@ function analyze_variables!(ctx, ex) end function resolve_scopes(ctx::ScopeResolutionContext, ex) - thunk = @ast ctx ex [K"lambda"(is_toplevel_thunk=true) - [K"block"] - [K"block"] - ex - ] - _resolve_scopes(ctx, thunk) + if kind(ex) != K"lambda" + # Wrap in a top level thunk if we're not already expanding a lambda. + # (Maybe this should be done elsewhere?) + ex = @ast ctx ex [K"lambda"(is_toplevel_thunk=true) + [K"block"] + [K"block"] + ex + ] + end + _resolve_scopes(ctx, ex) end """ diff --git a/JuliaLowering/test/demo.jl b/JuliaLowering/test/demo.jl index 739c693c52425..4267e669b3418 100644 --- a/JuliaLowering/test/demo.jl +++ b/JuliaLowering/test/demo.jl @@ -754,14 +754,42 @@ end # end # """ -src = """ -let - function recursive_a() - recursive_b() +function gen_stuff(ctx, N, x) + JuliaLowering.@ast ctx ctx.macrocall [K"tuple" + (i::K"Integer" for i in 1:N)... + ] +end + +src = raw""" +function gen(x::NTuple{N}) where {N} + nongen_stuff = :nongen + if @generated + quote + maybe_gen_stuff = ($N, $x) + end + else + maybe_gen_stuff = :nongen_2 end - function recursive_b() - recursive_a() + (nongen_stuff, maybe_gen_stuff) +end +""" + +src = raw""" +begin + function partially_gen(x::NTuple{N,T}) where {N,T} + shared = :shared_stuff + if @generated + quote + unshared = ($x, $N, $T) + end + else + # Uuuum. How do we test both sides of this branch?? + unshared = :nongen # (typeof(x), N, T) + end + (shared, unshared) end + + partially_gen((1,2,3,4,5)) end """ diff --git a/JuliaLowering/test/functions.jl b/JuliaLowering/test/functions.jl index e0a7d3ef0771e..4cb4221304c94 100644 --- a/JuliaLowering/test/functions.jl +++ b/JuliaLowering/test/functions.jl @@ -359,6 +359,39 @@ end @test cl(x = 20) == 21 end +@testset "Generated functions" begin + @test JuliaLowering.include_string(test_mod, raw""" + begin + @generated function f_gen(x::NTuple{N,T}) where {N,T} + quote + ($x, $N, $T) + end + end + + f_gen((1,2,3,4,5)) + end + """) == (NTuple{5,Int}, 5, Int) + + @test JuliaLowering.include_string(test_mod, raw""" + begin + function f_partially_gen(x::NTuple{N,T}) where {N,T} + shared = :shared_stuff + if @generated + quote + unshared = ($x, $N, $T) + end + else + # Uuuum. How do we actually test both sides of this branch??? + unshared = :nongen # (typeof(x), N, T) + end + (shared, unshared) + end + + f_partially_gen((1,2,3,4,5)) + end + """) == (:shared_stuff, (NTuple{5,Int}, 5, Int)) +end + @testset "Broadcast" begin @test JuliaLowering.include_string(test_mod, """ let x = [1,2], y = [3,4], z = [5,6] diff --git a/JuliaLowering/test/functions_ir.jl b/JuliaLowering/test/functions_ir.jl index feb9bf491b0d6..818b52fd23e56 100644 --- a/JuliaLowering/test/functions_ir.jl +++ b/JuliaLowering/test/functions_ir.jl @@ -1368,3 +1368,84 @@ function f_kw_slurp_not_last(; kws..., x=1) # └────┘ ── `...` may only be used for the last keyword argument end +######################################## +# Fully generated function +@generated function f_only_generated(x, y) + generator_code(x,y) +end +#--------------------- +1 (method TestMod.#f_only_generated@generator#0) +2 (method TestMod.f_only_generated) +3 TestMod.#f_only_generated@generator#0 +4 (call core.Typeof %₃) +5 (call core.svec %₄ JuliaLowering.MacroContext core.Any core.Any core.Any) +6 (call core.svec) +7 SourceLocation::1:21 +8 (call core.svec %₅ %₆ %₇) +9 --- method core.nothing %₈ + slots: [slot₁/#self#(!read) slot₂/__context__(!read) slot₃/#self#(!read) slot₄/x(nospecialize) slot₅/y(nospecialize)] + 1 TestMod.generator_code + 2 (call %₁ slot₄/x slot₅/y) + 3 (return %₂) +10 TestMod.f_only_generated +11 (call core.Typeof %₁₀) +12 (call core.svec %₁₁ core.Any core.Any) +13 (call core.svec) +14 SourceLocation::1:21 +15 (call core.svec %₁₂ %₁₃ %₁₄) +16 --- method core.nothing %₁₅ + slots: [slot₁/#self#(!read) slot₂/x(!read) slot₃/y(!read)] + 1 (meta :generated (new JuliaLowering.GeneratedFunctionStub TestMod.#f_only_generated@generator#0 SourceRef(SourceFile("@generated function f_only_generated(x, y)\n generator_code(x,y)\nend", 0, nothing, 1, [1, 44, 68, 71]), 1, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"macrocall", 0x0000), 0x00000046, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"@", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"MacroName", 0x0000), 0x00000009, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"function", 0x0000), 0x0000003b, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"function", 0x0001), 0x00000008, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"call", 0x0000), 0x00000016, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000010, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"(", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K",", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K")", 0x0001), 0x00000001, nothing)]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"block", 0x0000), 0x00000019, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"NewlineWs", 0x0001), 0x00000005, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"call", 0x0000), 0x00000013, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x0000000e, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"(", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K",", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K")", 0x0001), 0x00000001, nothing)]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"NewlineWs", 0x0001), 0x00000001, nothing)]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"end", 0x0001), 0x00000003, nothing)])])) (call core.svec :#self# :x :y) (call core.svec))) + 2 (meta :generated_only) + 3 (return core.nothing) +17 TestMod.f_only_generated +18 (return %₁₇) + +######################################## +# Partially generated function with `if @generated` +function f_partially_generated(x, y) + nongen_stuff = bothgen(x, y) + if @generated + quote + maybe_gen_stuff = some_gen_stuff(x, y) + end + else + maybe_gen_stuff = some_nongen_stuff(x, y) + end + (nongen_stuff, maybe_gen_stuff) +end +#--------------------- +1 (method TestMod.#f_partially_generated@generator#0) +2 (method TestMod.f_partially_generated) +3 TestMod.#f_partially_generated@generator#0 +4 (call core.Typeof %₃) +5 (call core.svec %₄ JuliaLowering.MacroContext core.Any core.Any core.Any) +6 (call core.svec) +7 SourceLocation::1:10 +8 (call core.svec %₅ %₆ %₇) +9 --- method core.nothing %₈ + slots: [slot₁/#self#(!read) slot₂/__context__(!read) slot₃/#self#(!read) slot₄/x(nospecialize,!read) slot₅/y(nospecialize,!read)] + 1 (call JuliaLowering.interpolate_ast (inert (block (= maybe_gen_stuff (call some_gen_stuff x y))))) + 2 (call core.tuple %₁) + 3 (call JuliaLowering.interpolate_ast (inert (block (block (= nongen_stuff (call bothgen x y)) ($ (block (call JuliaLowering.interpolate_ast (inert (block (= maybe_gen_stuff (call some_gen_stuff x y))))))) (tuple-p nongen_stuff maybe_gen_stuff)))) %₂) + 4 (return %₃) +10 TestMod.f_partially_generated +11 (call core.Typeof %₁₀) +12 (call core.svec %₁₁ core.Any core.Any) +13 (call core.svec) +14 SourceLocation::1:10 +15 (call core.svec %₁₂ %₁₃ %₁₄) +16 --- method core.nothing %₁₅ + slots: [slot₁/#self#(!read) slot₂/x slot₃/y slot₄/maybe_gen_stuff slot₅/nongen_stuff] + 1 (meta :generated (new JuliaLowering.GeneratedFunctionStub TestMod.#f_partially_generated@generator#0 SourceRef(SourceFile("function f_partially_generated(x, y)\n nongen_stuff = bothgen(x, y)\n if @generated\n quote\n maybe_gen_stuff = some_gen_stuff(x, y)\n end\n else\n maybe_gen_stuff = some_nongen_stuff(x, y)\n end\n (nongen_stuff, maybe_gen_stuff)\nend", 0, nothing, 1, [1, 38, 71, 89, 103, 154, 166, 175, 225, 233, 269, 272]), 1, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"function", 0x0000), 0x0000010f, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"function", 0x0001), 0x00000008, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"call", 0x0000), 0x0000001b, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000015, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"(", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K",", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K")", 0x0001), 0x00000001, nothing)]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"block", 0x0000), 0x000000e8, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"NewlineWs", 0x0001), 0x00000005, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"=", 0x0000), 0x0000001c, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x0000000c, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"=", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"call", 0x0000), 0x0000000d, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000007, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"(", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K",", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K")", 0x0001), 0x00000001, nothing)])]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"NewlineWs", 0x0001), 0x00000005, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"if", 0x0000), 0x0000009d, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"if", 0x0001), 0x00000002, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"macrocall", 0x0000), 0x0000000a, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"@", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"MacroName", 0x0000), 0x00000009, nothing)]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"block", 0x0000), 0x00000052, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"NewlineWs", 0x0001), 0x00000009, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"quote", 0x0000), 0x00000044, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"block", 0x0000), 0x00000044, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"quote", 0x0001), 0x00000005, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"NewlineWs", 0x0001), 0x0000000d, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"=", 0x0000), 0x00000026, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x0000000f, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"=", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"call", 0x0000), 0x00000014, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x0000000e, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"(", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K",", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K")", 0x0001), 0x00000001, nothing)])]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"NewlineWs", 0x0001), 0x00000009, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"end", 0x0001), 0x00000003, nothing)])]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"NewlineWs", 0x0001), 0x00000005, nothing)]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"else", 0x0001), 0x00000004, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"block", 0x0000), 0x00000037, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"NewlineWs", 0x0001), 0x00000009, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"=", 0x0000), 0x00000029, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x0000000f, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"=", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"call", 0x0000), 0x00000017, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000011, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"(", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K",", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K")", 0x0001), 0x00000001, nothing)])]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"NewlineWs", 0x0001), 0x00000005, nothing)]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"end", 0x0001), 0x00000003, nothing)]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"NewlineWs", 0x0001), 0x00000005, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"tuple", 0x0020), 0x0000001f, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"(", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x0000000c, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K",", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x0000000f, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K")", 0x0001), 0x00000001, nothing)]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"NewlineWs", 0x0001), 0x00000001, nothing)]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"end", 0x0001), 0x00000003, nothing)])) (call core.svec :#self# :x :y) (call core.svec))) + 2 TestMod.bothgen + 3 (= slot₅/nongen_stuff (call %₂ slot₂/x slot₃/y)) + 4 TestMod.some_nongen_stuff + 5 (= slot₄/maybe_gen_stuff (call %₄ slot₂/x slot₃/y)) + 6 slot₅/nongen_stuff + 7 slot₄/maybe_gen_stuff + 8 (call core.tuple %₆ %₇) + 9 (return %₈) +17 TestMod.f_partially_generated +18 (return %₁₇) + From 025c2a8a4a1de348fd08eb0f343c1f4869d28922 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sat, 1 Feb 2025 19:38:28 +1000 Subject: [PATCH 0982/1109] Add nkw meta to keyword function bodies --- JuliaLowering/src/desugaring.jl | 7 ++++++- JuliaLowering/test/functions_ir.jl | 24 ++++++++++++++---------- 2 files changed, 20 insertions(+), 11 deletions(-) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 5e1e5f09c36ed..f8276567c06c3 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -2684,7 +2684,12 @@ function keyword_function_defs(ctx, srcref, callex_srcref, name_str, [K"block" # TODO: nkw method_def_expr(ctx, srcref, callex_srcref, "nothing"::K"core", - typevar_names, body_arg_names, body_arg_types, body, ret_var) + typevar_names, body_arg_names, body_arg_types, + [K"block" + [K"meta" "nkw"::K"Symbol" numchildren(keywords)::K"Integer"] + body + ], + ret_var) ] ] [K"method_defs" diff --git a/JuliaLowering/test/functions_ir.jl b/JuliaLowering/test/functions_ir.jl index 818b52fd23e56..a4d1668445ab2 100644 --- a/JuliaLowering/test/functions_ir.jl +++ b/JuliaLowering/test/functions_ir.jl @@ -991,8 +991,9 @@ end 14 (call core.svec %₁₁ %₁₂ %₁₃) 15 --- method core.nothing %₁₄ slots: [slot₁/#self#(!read) slot₂/x slot₃/y slot₄/#self#(!read) slot₅/a slot₆/b] - 1 (call core.tuple slot₅/a slot₆/b slot₂/x slot₃/y) - 2 (return %₁) + 1 (meta :nkw 2) + 2 (call core.tuple slot₅/a slot₆/b slot₂/x slot₃/y) + 3 (return %₂) 16 (call core.typeof core.kwcall) 17 TestMod.f_kw_simple 18 (call core.Typeof %₁₇) @@ -1123,8 +1124,9 @@ end 11 (call core.svec %₈ %₉ %₁₀) 12 --- method core.nothing %₁₁ slots: [slot₁/#self#(!read) slot₂/all_kws slot₃/#self#(!read)] - 1 slot₂/all_kws - 2 (return %₁) + 1 (meta :nkw 1) + 2 slot₂/all_kws + 3 (return %₂) 13 (call core.typeof core.kwcall) 14 TestMod.f_kw_slurp_simple 15 (call core.Typeof %₁₄) @@ -1174,8 +1176,9 @@ end 11 (call core.svec %₈ %₉ %₁₀) 12 --- method core.nothing %₁₁ slots: [slot₁/#self#(!read) slot₂/x(!read) slot₃/non_x_kws(!read) slot₄/#self#(!read)] - 1 TestMod.all_kws - 2 (return %₁) + 1 (meta :nkw 2) + 2 TestMod.all_kws + 3 (return %₂) 13 (call core.typeof core.kwcall) 14 TestMod.f_kw_slurp 15 (call core.Typeof %₁₄) @@ -1248,10 +1251,11 @@ end 17 (call core.svec %₁₂ %₁₅ %₁₆) 18 --- method core.nothing %₁₇ slots: [slot₁/#self#(!read) slot₂/a(!read) slot₃/b(!read) slot₄/#self#(!read) slot₅/x(!read)] - 1 static_parameter₁ - 2 static_parameter₂ - 3 (call core.tuple %₁ %₂) - 4 (return %₃) + 1 (meta :nkw 2) + 2 static_parameter₁ + 3 static_parameter₂ + 4 (call core.tuple %₂ %₃) + 5 (return %₄) 19 (call core.typeof core.kwcall) 20 TestMod.f_kw_sparams 21 (call core.Typeof %₂₀) From a3b309688ddff41965fd78c3c44413b28b5cf1f7 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Mon, 3 Feb 2025 17:46:41 +1000 Subject: [PATCH 0983/1109] Fix missed meta nkw in tests --- JuliaLowering/test/closures_ir.jl | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/JuliaLowering/test/closures_ir.jl b/JuliaLowering/test/closures_ir.jl index b2ab36ca0afb4..d3079369d5183 100644 --- a/JuliaLowering/test/closures_ir.jl +++ b/JuliaLowering/test/closures_ir.jl @@ -587,16 +587,17 @@ end 14 (call core.svec %₁₁ %₁₂ %₁₃) 15 --- method core.nothing %₁₄ slots: [slot₁/#self#(!read) slot₂/x slot₃/#self#(!read) slot₄/y(!read)] - 1 TestMod.+ - 2 (call core.getfield slot₁/#self# :y) - 3 (call core.isdefined %₂ :contents) - 4 (gotoifnot %₃ label₆) - 5 (goto label₈) - 6 (newvar slot₄/y) - 7 slot₄/y - 8 (call core.getfield %₂ :contents) - 9 (call %₁ slot₂/x %₈) - 10 (return %₉) + 1 (meta :nkw 1) + 2 TestMod.+ + 3 (call core.getfield slot₁/#self# :y) + 4 (call core.isdefined %₃ :contents) + 5 (gotoifnot %₄ label₇) + 6 (goto label₉) + 7 (newvar slot₄/y) + 8 slot₄/y + 9 (call core.getfield %₃ :contents) + 10 (call %₂ slot₂/x %₉) + 11 (return %₁₀) 16 TestMod.#f_kw_closure##0 17 (call core.svec %₁₆) 18 (call core.svec) From 87d3421072a1c1b9339df0c6840a2f80a89f84e4 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Mon, 3 Feb 2025 18:15:36 +1000 Subject: [PATCH 0984/1109] Complete tuple destructuring with complex splatted left hand sides The flisp implementation has several arguable-bugs here which allow us to observe some assignments before all side effects of the right hand side have occurred. For example (x, y) = (1, undefined) assigns to `x` before throwing the `UndefVarError`. As another example, let f() = (x = 100) (x, y) = (1, f()) x end leaves `x` with the value of 100 in the existing implementation. Both these examples violate the principle that symbolic simpification should not be observable. As a fix, we now assign the right hand sides to temporary variables and do this even for normal identifiers on the right hand side, ensuring the normal left-to-right evaluation order for function arguments on the right hand side. --- JuliaLowering/src/desugaring.jl | 171 ++++++++++++++----------- JuliaLowering/src/eval.jl | 2 +- JuliaLowering/src/scope_analysis.jl | 2 +- JuliaLowering/test/destructuring.jl | 95 +++++++++++++- JuliaLowering/test/destructuring_ir.jl | 168 ++++++++++++++++++------ 5 files changed, 319 insertions(+), 119 deletions(-) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index f8276567c06c3..42ecfaf31cd64 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -67,7 +67,7 @@ function is_effect_free(ex) k = kind(ex) # TODO: metas is_literal(k) || is_identifier_like(ex) || k == K"Symbol" || - k == K"inert" || k == K"top" || k == K"core" + k == K"inert" || k == K"top" || k == K"core" || k == K"Value" # flisp also includes `a.b` with simple `a`, but this seems like a bug # because this calls the user-defined getproperty? end @@ -90,96 +90,122 @@ end # Destructuring # Convert things like `(x,y,z) = (a,b,c)` to assignments, eliminating the -# tuple. Includes support for slurping/splatting. +# tuple. Includes support for slurping/splatting. This function assumes that +# `_tuple_sides_match` returns true, so the following have already been +# checked: +# * There's max one `...` on the left hand side +# * There's max one `...` on the right hand side, in the last place, or +# matched with an lhs... in the last place. (required so that +# pairwise-matching terms from the right is valid) +# * Neither side has any key=val terms or parameter blocks # -# If lhss and rhss are the list of terms on each side, this function assumes -# the following have been checked: -# * There's only one `...` on the left hand side -# * Neither side has any key=val terms -# * _tuple_sides_match returns true +# Tuple elimination must act /as if/ the right hand side tuple was first +# constructed followed by destructuring. In particular, any side effects due to +# evaluating the individual terms in the right hand side tuple must happen in +# order. function tuple_to_assignments(ctx, ex) lhs = ex[1] rhs = ex[2] + + # Tuple elimination aims to turn assignments between tuples into lists of assignments. + # + # However, there's a complex interplay of side effects due to the + # individual assignments and these can be surprisingly complicated to + # model. For example `(x[i], y) = (f(), g)` can contain the following + # surprises: + # * `tmp = f()` calls `f` which might throw, or modify the bindings for + # `x` or `y`. + # * `x[i] = tmp` is lowered to `setindex!` which might throw or modify the + # bindings for `x` or `y`. + # * `g` might throw an `UndefVarError` + # + # Thus for correctness we introduce temporaries for all right hand sides + # with observable side effects and ensure they're evaluated in order. + n_lhs = numchildren(lhs) + n_rhs = numchildren(rhs) stmts = SyntaxList(ctx) - end_stmts = SyntaxList(ctx) - elements = SyntaxList(ctx) - assigned = SyntaxList(ctx) + rhs_tmps = SyntaxList(ctx) + for i in 1:n_rhs + rh = rhs[i] + r = if kind(rh) == K"..." + rh[1] + else + rh + end + k = kind(r) + if is_literal(k) || k == K"Symbol" || k == K"inert" || k == K"top" || + k == K"core" || k == K"Value" + # Effect-free and nothrow right hand sides do not need a temporary + # (we require nothrow because the order of rhs terms is observable + # due to sequencing, thus identifiers are not allowed) + else + # Example rhs which need a temporary + # * `f()` - arbitrary side effects to any binding + # * `z` - might throw UndefVarError + tmp = emit_assign_tmp(stmts, ctx, r) + rh = kind(rh) == K"..." ? @ast(ctx, rh, [K"..." tmp]) : tmp + end + push!(rhs_tmps, rh) + end il = 0 ir = 0 - while il < numchildren(lhs) + while il < n_lhs il += 1 ir += 1 lh = lhs[il] if kind(lh) == K"..." - TODO(lhs, "... in tuple lhs") - n_lhs = numchildren(lhs) - n_rhs = numchildren(rhs) - if il == n_lhs - # Simple case: exactly one `...` at end of lhs. Examples: - # (x, ys...) = (a,b,c) - # (ys...) = () - rhs_tmp = emit_assign_tmp(stmts, ctx, - @ast(ctx, rhs, [K"tuple" rhs[ir:end]...]), - "rhs_tmp" - ) - push!(stmts, @ast ctx ex [K"=" lh[1] rhs_tmp]) - push!(elements, @ast ctx rhs_tmp [K"..." rhs_tmp]) - break - else - # Exactly one lhs `...` occurs in the middle somewhere, with a - # general rhs which has one `...` term or at least as many - # non-`...` terms. - # Examples: - # (x, ys..., z) = (a, b, c, d) - # (x, ys..., z) = (a, bs...) - # (xs..., y) = (a, bs...) - # in this case we pairwise-match arguments from the end - # backward, with rhs splats falling back to the general case. - jl = n_lhs + 1 - jr = n_rhs + 1 - while jl > il && jr > ir - if kind(lhs[jl-1]) == K"..." || kind(rhs[jr-1]) == K"..." - break - end - jl -= 1 - jr -= 1 + # Exactly one lhs `...` occurs in the middle somewhere, with a + # general rhs which has at least as many non-`...` terms or one + # `...` term at the end. + # Examples: + # (x, ys..., z) = (a, b, c, d) + # (x, ys..., z) = (a, bs...) + # (xs..., y) = (a, bs...) + # (xs...) = (a, b, c) + # in this case we can pairwise-match arguments from the end + # backward and emit a general tuple assignment for the middle. + jl = n_lhs + jr = n_rhs + while jl > il && jr > ir + if kind(lhs[jl]) == K"..." || kind(rhs_tmps[jr]) == K"..." + break end - rhs[jr] + jl -= 1 + jr -= 1 end - continue - end - rh = rhs[ir] # In other cases `rhs[ir]` must exist - if kind(rh) == K"..." - @assert ir == numchildren(rhs) # _tuple_sides_match ensures this - rh_tmp = emit_assign_tmp(stmts, ctx, rh[1]) - push!(end_stmts, @ast ctx ex [K"=" [K"tuple" lhs[il:end]...] rh_tmp]) - push!(elements, @ast ctx rh [K"..." rh_tmp]) - break + middle = emit_assign_tmp(stmts, ctx, + @ast(ctx, rhs, [K"tuple" rhs_tmps[ir:jr]...]), + "rhs_tmp" + ) + if il == jl + # (x, ys...) = (a,b,c) + # (x, ys...) = (a,bs...) + # (ys...) = () + push!(stmts, @ast ctx ex [K"=" lh[1] middle]) + else + # (x, ys..., z) = (a, b, c, d) + # (x, ys..., z) = (a, bs...) + # (xs..., y) = (a, bs...) + push!(stmts, @ast ctx ex [K"=" [K"tuple" lhs[il:jl]...] middle]) + end + # Continue with the remainder of the list of non-splat terms + il = jl + ir = jr else - if is_identifier_like(lh) && is_effect_free(rh) && - !any(contains_identifier(rhs[j], lh) for j in ir+1:lastindex(rhs)) - !any(contains_identifier(a, rh) for a in assigned) - # Overwrite `lh` directly if that won't cause conflicts with - # other symbols - push!(stmts, @ast ctx ex [K"=" lh rh]) - push!(assigned, lh) - push!(elements, rh) + rh = rhs_tmps[ir] + if kind(rh) == K"..." + push!(stmts, @ast ctx ex [K"=" [K"tuple" lhs[il:end]...] rh[1]]) + break else - # In other cases we need a temporary and we'll overwrite `lh` at the end. - tmp = ssavar(ctx, rh) - push!(stmts, @ast ctx ex [K"=" tmp rh]) - # `push!(assigned, lh)` is not required when we assign `lh` later. - push!(end_stmts, @ast ctx ex [K"=" lh tmp]) - push!(elements, tmp) + push!(stmts, @ast ctx ex [K"=" lh rh]) end end end @ast ctx ex [K"block" stmts... - end_stmts... - [K"removable" [K"tuple" elements...]] + [K"removable" [K"tuple" rhs_tmps...]] ] end @@ -354,12 +380,7 @@ function expand_property_destruct(ctx, ex) @assert kind(params) == K"parameters" rhs = ex[2] stmts = SyntaxList(ctx) - rhs1 = if is_ssa(ctx, rhs) || (is_identifier_like(rhs) && - !any(is_same_identifier_like(l, rhs) for l in children(params))) - rhs - else - emit_assign_tmp(stmts, ctx, expand_forms_2(ctx, rhs)) - end + rhs1 = emit_assign_tmp(stmts, ctx, expand_forms_2(ctx, rhs)) for prop in children(params) propname = kind(prop) == K"Identifier" ? prop : kind(prop) == K"::" && kind(prop[1]) == K"Identifier" ? prop[1] : diff --git a/JuliaLowering/src/eval.jl b/JuliaLowering/src/eval.jl index a007985d4dd5f..fb303911d9d05 100644 --- a/JuliaLowering/src/eval.jl +++ b/JuliaLowering/src/eval.jl @@ -293,7 +293,7 @@ function to_lowered_expr(mod, ex, ssa_offset=0) k == K"opaque_closure_method" ? :opaque_closure_method : nothing if isnothing(head) - TODO(ex, "Unhandled form for kind $k") + throw(LoweringError(ex, "Unhandled form for kind $k")) end Expr(head, map(e->to_lowered_expr(mod, e, ssa_offset), children(ex))...) end diff --git a/JuliaLowering/src/scope_analysis.jl b/JuliaLowering/src/scope_analysis.jl index 5e18d9dc154c8..4f7203e9a88ad 100644 --- a/JuliaLowering/src/scope_analysis.jl +++ b/JuliaLowering/src/scope_analysis.jl @@ -58,7 +58,7 @@ function _find_scope_vars!(ctx, assignments, locals, destructured_args, globals, elseif kv == K"BindingId" binfo = lookup_binding(ctx, v) if !binfo.is_ssa && binfo.kind != :global - TODO(v, "BindingId as function name") + @assert false "allow local BindingId as function name?" end else @assert false diff --git a/JuliaLowering/test/destructuring.jl b/JuliaLowering/test/destructuring.jl index 8e4ef25474993..4289952ec886b 100644 --- a/JuliaLowering/test/destructuring.jl +++ b/JuliaLowering/test/destructuring.jl @@ -92,18 +92,61 @@ end end -@testset "Tuples on both sides" begin +@testset "Tuple elimination with tuples on both sides" begin + +# Simple case +@test JuliaLowering.include_string(test_mod, """ +let a = 1, b = 2 + (x,y) = (a,b) + (x,y) +end +""") == (1, 2) # lhs variable name in rhs @test JuliaLowering.include_string(test_mod, """ -let - x = 1 - y = 2 +let x = 1, y = 2 (x,y) = (y,x) (x,y) end """) == (2, 1) +# Slurps and splats + +@test JuliaLowering.include_string(test_mod, """ +let a = 1, b = 2, c = 3 + (x, ys..., z) = (a, b, c) + (x, ys, z) +end +""") == (1, (2,), 3) + +@test JuliaLowering.include_string(test_mod, """ +let a = 1, b = 2, cs = (3,4) + (x, ys...) = (a, b, cs...) + (x, ys) +end +""") == (1, (2,3,4)) + +@test JuliaLowering.include_string(test_mod, """ +let a = 1, bs = (2,3), c = 4 + (x, ys...) = (a, bs..., c) + (x, ys) +end +""") == (1, (2,3,4)) + +@test JuliaLowering.include_string(test_mod, """ +let a = 1, b = 2, cs = (3,4) + (x, ys..., z) = (a, b, cs...) + (x, ys, z) +end +""") == (1, (2,3), 4) + +@test JuliaLowering.include_string(test_mod, """ +let a = 1 + (x, ys...) = (a,) + (x, ys) +end +""") == (1, ()) + # dotted rhs in last place @test JuliaLowering.include_string(test_mod, """ let @@ -112,6 +155,7 @@ let (x,y,z) end """) == (1, 2, 3) + # in value position @test JuliaLowering.include_string(test_mod, """ let @@ -120,6 +164,49 @@ let end """) == (1, 2, 3) +# Side effects in the right hand tuple can affect the previous left hand side +# bindings, for example, `x`, below. In this case we need to ensure `f()` is +# called before `x` is assigned the value from the right hand side. +# (the flisp implementation fails this test.) +@test JuliaLowering.include_string(test_mod, """ +let + function f() + x=100 + 2 + end + (x,y) = (1,f()) + x,y +end +""") == (1,2) + +# `x` is not assigned and no side effect from `f()` happens when the right hand +# side throws an UndefVarError +@test JuliaLowering.include_string(test_mod, """ +let x=1, y=2, z=3, side_effect=false, a + exc = try + function f() + side_effect=true + end + (x,y,z) = (100, a, f()) + catch e + e + end + (x, y, z, side_effect, exc.var) +end +""") == (1, 2, 3, false, :a) + +# Require that rhs is evaluated before any assignments, thus `x` is not defined +# here because accessing `a` first throws an UndefVarError +@test JuliaLowering.include_string(test_mod, """ +let x, y, a + try + (x, y) = (1, a) + catch + end + @isdefined(x) +end +""") == false + end diff --git a/JuliaLowering/test/destructuring_ir.jl b/JuliaLowering/test/destructuring_ir.jl index 51d3ead88251d..65b016f90dce7 100644 --- a/JuliaLowering/test/destructuring_ir.jl +++ b/JuliaLowering/test/destructuring_ir.jl @@ -147,16 +147,29 @@ let end #--------------------- 1 TestMod.a -2 (= slot₁/x %₁) -3 TestMod.b -4 (= slot₂/y %₃) -5 TestMod.a -6 TestMod.b -7 (call core.tuple %₅ %₆) +2 TestMod.b +3 (= slot₁/x %₁) +4 (= slot₂/y %₂) +5 (call core.tuple %₁ %₂) +6 (return %₅) + +######################################## +# Destructuring with tuple elimination where variables are repeated +let + (x, y, z) = (y, a, x) +end +#--------------------- +1 slot₂/y +2 TestMod.a +3 slot₁/x +4 (= slot₁/x %₁) +5 (= slot₂/y %₂) +6 (= slot₃/z %₃) +7 (call core.tuple %₁ %₂ %₃) 8 (return %₇) ######################################## -# Destructuring with simple tuple elimination and non effect-free rhs +# Destructuring with simple tuple elimination and rhs with side effects let (x, y) = (f(), b) end @@ -164,28 +177,25 @@ end 1 TestMod.f 2 (call %₁) 3 TestMod.b -4 (= slot₂/y %₃) -5 (= slot₁/x %₂) -6 TestMod.b -7 (call core.tuple %₂ %₆) -8 (return %₇) +4 (= slot₁/x %₂) +5 (= slot₂/y %₃) +6 (call core.tuple %₂ %₃) +7 (return %₆) ######################################## -# Destructuring with tuple elimination where variables are repeated +# Destructuring with simple tuple elimination and lhs with side effects let - (x, y, z) = (y, a, x) + (x[10], y[20]) = (1,2) end #--------------------- -1 slot₂/y -2 TestMod.a -3 (= slot₂/y %₂) -4 slot₁/x -5 (= slot₃/z %₄) -6 (= slot₁/x %₁) -7 TestMod.a -8 slot₁/x -9 (call core.tuple %₁ %₇ %₈) -10 (return %₉) +1 1 +2 TestMod.x +3 (call top.setindex! %₂ %₁ 10) +4 2 +5 TestMod.y +6 (call top.setindex! %₅ %₄ 20) +7 (call core.tuple 1 2) +8 (return %₇) ######################################## # Destructuring with tuple elimination and trailing rhs ... @@ -194,15 +204,99 @@ let end #--------------------- 1 TestMod.a -2 (= slot₁/x %₁) -3 TestMod.rhs -4 (call top.indexed_iterate %₃ 1) +2 TestMod.rhs +3 (= slot₁/x %₁) +4 (call top.indexed_iterate %₂ 1) 5 (= slot₂/y (call core.getfield %₄ 1)) -6 TestMod.a -7 (call core.tuple %₆) -8 (call core._apply_iterate top.iterate core.tuple %₇ %₃) +6 (call core.tuple %₁) +7 (call core._apply_iterate top.iterate core.tuple %₆ %₂) +8 (return %₇) + +######################################## +# Destructuring with with non-trailing rhs `...` does not use tuple elimination +# (though we could do it for the `x = a` part here) +let + (x, y, z) = (a, rhs..., b) +end +#--------------------- +1 TestMod.a +2 (call core.tuple %₁) +3 TestMod.rhs +4 TestMod.b +5 (call core.tuple %₄) +6 (call core._apply_iterate top.iterate core.tuple %₂ %₃ %₅) +7 (call top.indexed_iterate %₆ 1) +8 (= slot₂/x (call core.getfield %₇ 1)) +9 (= slot₁/iterstate (call core.getfield %₇ 2)) +10 slot₁/iterstate +11 (call top.indexed_iterate %₆ 2 %₁₀) +12 (= slot₃/y (call core.getfield %₁₁ 1)) +13 (= slot₁/iterstate (call core.getfield %₁₁ 2)) +14 slot₁/iterstate +15 (call top.indexed_iterate %₆ 3 %₁₄) +16 (= slot₄/z (call core.getfield %₁₅ 1)) +17 (return %₆) + +######################################## +# Destructuring with tuple elimination and final ... on lhs +let + (x, ys...) = (a,b,c) +end +#--------------------- +1 TestMod.a +2 TestMod.b +3 TestMod.c +4 (= slot₁/x %₁) +5 (call core.tuple %₂ %₃) +6 (= slot₂/ys %₅) +7 (call core.tuple %₁ %₂ %₃) +8 (return %₇) + +######################################## +# Destructuring with tuple elimination, slurping, and completely effect free right hand sides +let + (x, ys...) = (1,2,3) +end +#--------------------- +1 (= slot₁/x 1) +2 (call core.tuple 2 3) +3 (= slot₂/ys %₂) +4 (call core.tuple 1 2 3) +5 (return %₄) + +######################################## +# Destructuring with tuple elimination and non-final ... on lhs +let + (x, ys..., z) = (a,b,c) +end +#--------------------- +1 TestMod.a +2 TestMod.b +3 TestMod.c +4 (= slot₁/x %₁) +5 (call core.tuple %₂) +6 (= slot₂/ys %₅) +7 (= slot₃/z %₃) +8 (call core.tuple %₁ %₂ %₃) 9 (return %₈) +######################################## +# Destructuring with tuple elimination but not in value position never creates +# the tuple +let + (x, ys...) = (a,b,c) + nothing +end +#--------------------- +1 TestMod.a +2 TestMod.b +3 TestMod.c +4 (= slot₁/x %₁) +5 (call core.tuple %₂ %₃) +6 (= slot₂/ys %₅) +7 TestMod.nothing +8 (return %₇) + ######################################## # Property destructuring let @@ -211,10 +305,8 @@ end #--------------------- 1 TestMod.rhs 2 (= slot₁/x (call top.getproperty %₁ :x)) -3 TestMod.rhs -4 (= slot₂/y (call top.getproperty %₃ :y)) -5 TestMod.rhs -6 (return %₅) +3 (= slot₂/y (call top.getproperty %₁ :y)) +4 (return %₁) ######################################## # Property destructuring with colliding symbolic lhs/rhs @@ -231,13 +323,14 @@ end ######################################## # Property destructuring with nontrivial rhs let - (; x) = f() + (; x, y) = f() end #--------------------- 1 TestMod.f 2 (call %₁) 3 (= slot₁/x (call top.getproperty %₂ :x)) -4 (return %₂) +4 (= slot₂/y (call top.getproperty %₂ :y)) +5 (return %₂) ######################################## # Property destructuring with type decl @@ -261,8 +354,7 @@ end 14 (= slot₂/tmp (call core.typeassert %₁₂ %₁₃)) 15 slot₂/tmp 16 (= slot₁/x %₁₅) -17 TestMod.rhs -18 (return %₁₇) +17 (return %₂) ######################################## # Error: Property destructuring with frankentuple From ccf5b707fd7c2ca627a6492f56c0c8a0ef2169cb Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Tue, 4 Feb 2025 11:07:09 +1000 Subject: [PATCH 0985/1109] Cleanup runtime.jl and move syntax extension macros to their own file. This is pure code movement to groups code more logically and clearly and some edits to comments. There's no functional changes. --- JuliaLowering/src/JuliaLowering.jl | 1 + JuliaLowering/src/runtime.jl | 223 ++++------------------------- JuliaLowering/src/syntax_macros.jl | 182 +++++++++++++++++++++++ 3 files changed, 213 insertions(+), 193 deletions(-) create mode 100644 JuliaLowering/src/syntax_macros.jl diff --git a/JuliaLowering/src/JuliaLowering.jl b/JuliaLowering/src/JuliaLowering.jl index eb99309fe1e4b..19fd0c868dc6b 100644 --- a/JuliaLowering/src/JuliaLowering.jl +++ b/JuliaLowering/src/JuliaLowering.jl @@ -29,6 +29,7 @@ _include("scope_analysis.jl") _include("closure_conversion.jl") _include("linear_ir.jl") _include("runtime.jl") +_include("syntax_macros.jl") _include("eval.jl") diff --git a/JuliaLowering/src/runtime.jl b/JuliaLowering/src/runtime.jl index 243b1e2ba2798..f694c54543719 100644 --- a/JuliaLowering/src/runtime.jl +++ b/JuliaLowering/src/runtime.jl @@ -1,10 +1,20 @@ -# Runtime support functionality. +# Runtime support for +# 1. Functions called by the code emitted from lowering +# 2. Introspecting Julia's state during lowering # -# Lowering generates code which uses these functions and types but it doesn't -# call them directly. -# -# These should probably move to `Core` at some point? +# These should probably all move to `Core` at some point. + +#------------------------------------------------------------------------------- +# Functions/types used by code emitted from lowering, but not called by it directly +# Return the current exception. In JuliaLowering we use this rather than the +# special form `K"the_exception"` to reduces the number of special forms. +Base.@assume_effects :removable :nothrow function current_exception() + @ccall jl_current_exception(current_task()::Any)::Any +end + +#-------------------------------------------------- +# Supporting functions for AST interpolation (`quote`) struct InterpolationContext{Graph} <: AbstractLoweringContext graph::Graph values::Tuple @@ -98,6 +108,8 @@ function interpolate_ast(ex, values...) end end +#-------------------------------------------------- +# Functions called by closure conversion function eval_closure_type(mod, closure_type_name, field_names, field_is_box) type_params = Core.TypeVar[] field_types = [] @@ -132,6 +144,9 @@ function replace_captured_locals!(codeinfo, locals) codeinfo end +#-------------------------------------------------- +# Functions which create modules or mutate their bindings + # Construct new bare module including only the "default names" # # using Core @@ -199,12 +214,8 @@ function module_public(mod::Module, is_exported::Bool, identifiers...) end end -# Return the current exception. In JuliaLowering we use this rather than the -# special form `K"the_exception"` to reduces the number of special forms. -Base.@assume_effects :removable :nothrow function current_exception() - @ccall jl_current_exception(current_task()::Any)::Any -end - +#-------------------------------------------------- +# Docsystem integration function _bind_func_docs!(f, docstr, method_metadata::Core.SimpleVector) mod = parentmodule(f) bind = Base.Docs.Binding(mod, nameof(f)) @@ -255,6 +266,9 @@ function bind_docs!(type::Type, docstr, lineno::LineNumberNode; field_docs=Core. Docs.doc!(mod, bind, Base.Docs.docstr(docstr, metadata), Union{}) end +#-------------------------------------------------- +# Runtime support infrastructure for `@generated` + # An alternative to Core.GeneratedFunctionStub which works on SyntaxTree rather # than Expr. struct GeneratedFunctionStub @@ -340,8 +354,9 @@ function (g::GeneratedFunctionStub)(world::UInt, source::LineNumberNode, @nospec return ci end + #------------------------------------------------------------------------------- -# The following functions are used by lowering to inspect Julia's state. +# The following functions are called directly by lowering to inspect Julia's state. # Get the binding for `name` if one is already resolved in module `mod`. Note # that we cannot use `isdefined(::Module, ::Symbol)` here, because that causes @@ -389,9 +404,9 @@ end # # TODO: Remove the use of this where possible. Currently this is used within # lowering to create unique global names for keyword function bodies and -# closure types as an alternative to current-julia-module-counter. However, we -# should defer the it to eval-time to make lowering itself completely -# non-mutating. +# closure types as a more local alternative to current-julia-module-counter. +# However, we should ideally defer it to eval-time to make lowering itself +# completely non-mutating. function reserve_module_binding_i(mod, basename) i = 0 while true @@ -403,181 +418,3 @@ function reserve_module_binding_i(mod, basename) end end -#------------------------------------------------------------------------------- -# The following are versions of macros from Base which act as "standard syntax -# extensions" with special semantics known to lowering. -# -# In order to implement these here without getting into bootstrapping -# difficulties, we just write them as plain old macro-named functions and add -# the required __context__ argument ourselves. -# -# TODO: @inline, @noinline, @inbounds, @simd, @ccall, @isdefined -# -# TODO: Eventually we should move these to proper `macro` definitions and use -# JuliaLowering.include() or something, then we'll be in the fun little -# world of bootstrapping but it shouldn't be too painful :) - -function _apply_nospecialize(ctx, ex) - k = kind(ex) - if k == K"Identifier" || k == K"Placeholder" || k == K"tuple" - setmeta(ex; nospecialize=true) - elseif k == K"..." || k == K"::" || k == K"=" - if k == K"::" && numchildren(ex) == 1 - ex = @ast ctx ex [K"::" "_"::K"Placeholder" ex[1]] - end - mapchildren(c->_apply_nospecialize(ctx, c), ctx, ex, 1:1) - else - throw(LoweringError(ex, "Invalid function argument")) - end -end - -function Base.var"@nospecialize"(__context__::MacroContext, ex) - _apply_nospecialize(__context__, ex) -end - -function Base.GC.var"@preserve"(__context__::MacroContext, exs...) - idents = exs[1:end-1] - for e in idents - if kind(e) != K"Identifier" - throw(MacroExpansionError(e, "Preserved variable must be a symbol")) - end - end - @ast __context__ __context__.macrocall [K"block" - [K"=" - "s"::K"Identifier" - [K"gc_preserve_begin" - idents... - ] - ] - [K"=" - "r"::K"Identifier" - exs[end] - ] - [K"gc_preserve_end" "s"::K"Identifier"] - "r"::K"Identifier" - ] -end - -function Base.var"@atomic"(__context__::MacroContext, ex) - @chk kind(ex) == K"Identifier" || kind(ex) == K"::" (ex, "Expected identifier or declaration") - @ast __context__ __context__.macrocall [K"atomic" ex] -end - -function Base.var"@label"(__context__::MacroContext, ex) - @chk kind(ex) == K"Identifier" - @ast __context__ ex ex=>K"symbolic_label" -end - -function Base.var"@goto"(__context__::MacroContext, ex) - @chk kind(ex) == K"Identifier" - @ast __context__ ex ex=>K"symbolic_goto" -end - -function Base.var"@locals"(__context__::MacroContext) - @ast __context__ __context__.macrocall [K"extension" "locals"::K"Symbol"] -end - -function Base.var"@isdefined"(__context__::MacroContext, ex) - @ast __context__ __context__.macrocall [K"isdefined" ex] -end - -function Base.var"@generated"(__context__::MacroContext) - @ast __context__ __context__.macrocall [K"generated"] -end -function Base.var"@generated"(__context__::MacroContext, ex) - if kind(ex) != K"function" - throw(LoweringError(ex, "Expected a function argument to `@generated`")) - end - @ast __context__ __context__.macrocall [K"function" - ex[1] - [K"if" [K"generated"] - ex[2] - [K"block" - [K"meta" "generated_only"::K"Symbol"] - [K"return"] - ] - ] - ] -end - -# The following `@islocal` and `@inert` are macros for special syntax known to -# lowering which don't exist in Base but arguably should. -# -# For now we have our own versions -function var"@islocal"(__context__::MacroContext, ex) - @chk kind(ex) == K"Identifier" - @ast __context__ __context__.macrocall [K"extension" - "islocal"::K"Symbol" - ex - ] -end - -function Base.Experimental.var"@opaque"(__context__::MacroContext, ex) - @chk kind(ex) == K"->" - @ast __context__ __context__.macrocall [K"opaque_closure" - "nothing"::K"core" - "nothing"::K"core" - "nothing"::K"core" - true::K"Bool" - ex - ] -end - -""" -A non-interpolating quoted expression. - -For example, - -```julia -@inert quote - \$x -end -``` - -does not take `x` from the surrounding scope - instead it leaves the -interpolation `\$x` intact as part of the expression tree. - -TODO: What is the correct way for `@inert` to work? ie which of the following -should work? - -```julia -@inert quote - body -end - -@inert begin - body -end - -@inert x - -@inert \$x -``` - -The especially tricky cases involve nested interpolation ... -```julia -quote - @inert \$x -end - -@inert quote - quote - \$x - end -end - -@inert quote - quote - \$\$x - end -end -``` - -etc. Needs careful thought - we should probably just copy what lisp does with -quote+quasiquote 😅 -""" -function var"@inert"(__context__::MacroContext, ex) - @chk kind(ex) == K"quote" - @ast __context__ __context__.macrocall [K"inert" ex] -end - diff --git a/JuliaLowering/src/syntax_macros.jl b/JuliaLowering/src/syntax_macros.jl new file mode 100644 index 0000000000000..05fb5aacf9801 --- /dev/null +++ b/JuliaLowering/src/syntax_macros.jl @@ -0,0 +1,182 @@ +# The following are versions of macros from Base which act as "standard syntax +# extensions": +# +# * They emit syntactic forms with special `Kind`s and semantics known to +# lowering +# * There is no other Julia surface syntax for these `Kind`s. + +# In order to implement these here without getting into bootstrapping problems, +# we just write them as plain old macro-named functions and add the required +# __context__ argument ourselves. +# +# TODO: @inline, @noinline, @inbounds, @simd, @ccall, @isdefined, @assume_effects +# +# TODO: Eventually move these to proper `macro` definitions and use +# `JuliaLowering.include()` or something. Then we'll be in the fun little world +# of bootstrapping but it shouldn't be too painful :) + +function _apply_nospecialize(ctx, ex) + k = kind(ex) + if k == K"Identifier" || k == K"Placeholder" || k == K"tuple" + setmeta(ex; nospecialize=true) + elseif k == K"..." || k == K"::" || k == K"=" + if k == K"::" && numchildren(ex) == 1 + ex = @ast ctx ex [K"::" "_"::K"Placeholder" ex[1]] + end + mapchildren(c->_apply_nospecialize(ctx, c), ctx, ex, 1:1) + else + throw(LoweringError(ex, "Invalid function argument")) + end +end + +function Base.var"@nospecialize"(__context__::MacroContext, ex) + _apply_nospecialize(__context__, ex) +end + +function Base.var"@atomic"(__context__::MacroContext, ex) + @chk kind(ex) == K"Identifier" || kind(ex) == K"::" (ex, "Expected identifier or declaration") + @ast __context__ __context__.macrocall [K"atomic" ex] +end + +function Base.var"@label"(__context__::MacroContext, ex) + @chk kind(ex) == K"Identifier" + @ast __context__ ex ex=>K"symbolic_label" +end + +function Base.var"@goto"(__context__::MacroContext, ex) + @chk kind(ex) == K"Identifier" + @ast __context__ ex ex=>K"symbolic_goto" +end + +function Base.var"@locals"(__context__::MacroContext) + @ast __context__ __context__.macrocall [K"extension" "locals"::K"Symbol"] +end + +function Base.var"@isdefined"(__context__::MacroContext, ex) + @ast __context__ __context__.macrocall [K"isdefined" ex] +end + +function Base.var"@generated"(__context__::MacroContext) + @ast __context__ __context__.macrocall [K"generated"] +end +function Base.var"@generated"(__context__::MacroContext, ex) + if kind(ex) != K"function" + throw(LoweringError(ex, "Expected a function argument to `@generated`")) + end + @ast __context__ __context__.macrocall [K"function" + ex[1] + [K"if" [K"generated"] + ex[2] + [K"block" + [K"meta" "generated_only"::K"Symbol"] + [K"return"] + ] + ] + ] +end + +function Base.GC.var"@preserve"(__context__::MacroContext, exs...) + idents = exs[1:end-1] + for e in idents + if kind(e) != K"Identifier" + throw(MacroExpansionError(e, "Preserved variable must be a symbol")) + end + end + @ast __context__ __context__.macrocall [K"block" + [K"=" + "s"::K"Identifier" + [K"gc_preserve_begin" + idents... + ] + ] + [K"=" + "r"::K"Identifier" + exs[end] + ] + [K"gc_preserve_end" "s"::K"Identifier"] + "r"::K"Identifier" + ] +end + +function Base.Experimental.var"@opaque"(__context__::MacroContext, ex) + @chk kind(ex) == K"->" + @ast __context__ __context__.macrocall [K"opaque_closure" + "nothing"::K"core" + "nothing"::K"core" + "nothing"::K"core" + true::K"Bool" + ex + ] +end + +#-------------------------------------------------------------------------------- +# The following `@islocal` and `@inert` are macros for special syntax known to +# lowering which don't exist in Base but arguably should. +# +# For now we have our own versions +function var"@islocal"(__context__::MacroContext, ex) + @chk kind(ex) == K"Identifier" + @ast __context__ __context__.macrocall [K"extension" + "islocal"::K"Symbol" + ex + ] +end + +""" +A non-interpolating quoted expression. + +For example, + +```julia +@inert quote + \$x +end +``` + +does not take `x` from the surrounding scope - instead it leaves the +interpolation `\$x` intact as part of the expression tree. + +TODO: What is the correct way for `@inert` to work? ie which of the following +should work? + +```julia +@inert quote + body +end + +@inert begin + body +end + +@inert x + +@inert \$x +``` + +The especially tricky cases involve nested interpolation ... +```julia +quote + @inert \$x +end + +@inert quote + quote + \$x + end +end + +@inert quote + quote + \$\$x + end +end +``` + +etc. Needs careful thought - we should probably just copy what lisp does with +quote+quasiquote 😅 +""" +function var"@inert"(__context__::MacroContext, ex) + @chk kind(ex) == K"quote" + @ast __context__ __context__.macrocall [K"inert" ex] +end + From f73c974bb26bb99c90162413a7858978561b859e Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Tue, 4 Feb 2025 14:09:06 +1000 Subject: [PATCH 0986/1109] Front end macro and lowering of `cfunction` Quoting of the callable seems broken with respect to macro hygiene, but for now we follow the Julia reference implementation and defer fixing this to later. See also issue JuliaLang/JuliaLowering.jl#9. --- JuliaLowering/src/eval.jl | 1 + JuliaLowering/src/kinds.jl | 2 ++ JuliaLowering/src/linear_ir.jl | 39 +++++++++++++++---------- JuliaLowering/src/syntax_macros.jl | 41 ++++++++++++++++++++++++++ JuliaLowering/test/misc.jl | 17 ++++++++++- JuliaLowering/test/misc_ir.jl | 47 ++++++++++++++++++++++++++++++ 6 files changed, 131 insertions(+), 16 deletions(-) diff --git a/JuliaLowering/src/eval.jl b/JuliaLowering/src/eval.jl index fb303911d9d05..beaa2e28b8ea4 100644 --- a/JuliaLowering/src/eval.jl +++ b/JuliaLowering/src/eval.jl @@ -290,6 +290,7 @@ function to_lowered_expr(mod, ex, ssa_offset=0) k == K"gc_preserve_begin" ? :gc_preserve_begin : k == K"gc_preserve_end" ? :gc_preserve_end : k == K"foreigncall" ? :foreigncall : + k == K"cfunction" ? :cfunction : k == K"opaque_closure_method" ? :opaque_closure_method : nothing if isnothing(head) diff --git a/JuliaLowering/src/kinds.jl b/JuliaLowering/src/kinds.jl index 68f20e80c20ad..467810c0cd2a6 100644 --- a/JuliaLowering/src/kinds.jl +++ b/JuliaLowering/src/kinds.jl @@ -29,6 +29,8 @@ function _register_kinds() "loopinfo" # Call into foreign code. Emitted by `@ccall` "foreigncall" + # Special form for constructing a function callable from C + "cfunction" # Special form emitted by `Base.Experimental.@opaque` "opaque_closure" # Test whether a variable is defined diff --git a/JuliaLowering/src/linear_ir.jl b/JuliaLowering/src/linear_ir.jl index 191410e528af8..db286bcc7b663 100644 --- a/JuliaLowering/src/linear_ir.jl +++ b/JuliaLowering/src/linear_ir.jl @@ -143,14 +143,17 @@ function is_valid_ir_rvalue(ctx, lhs, rhs) return is_ssa(ctx, lhs) || is_valid_ir_argument(ctx, rhs) || (kind(lhs) == K"BindingId" && - # FIXME: add: invoke cfunction gc_preserve_begin copyast - kind(rhs) in KSet"new splatnew isdefined call foreigncall gc_preserve_begin foreigncall new_opaque_closure") + # FIXME: add: invoke ? + kind(rhs) in KSet"new splatnew cfunction isdefined call foreigncall gc_preserve_begin foreigncall new_opaque_closure") end -function contains_nonglobal_binding(ctx, ex) - contains_unquoted(ex) do e +function check_no_local_bindings(ctx, ex, msg) + contains_nonglobal_binding = contains_unquoted(ex) do e kind(e) == K"BindingId" && lookup_binding(ctx, e).kind !== :global end + if contains_nonglobal_binding + throw(LoweringError(ex, msg)) + end end # evaluate the arguments of a call, creating temporary locations as needed @@ -582,8 +585,7 @@ function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) @chk !needs_value (ex,"TOMBSTONE encountered in value position") nothing elseif k == K"call" || k == K"new" || k == K"splatnew" || k == K"foreigncall" || - k == K"new_opaque_closure" - # TODO k ∈ cfunction cglobal + k == K"new_opaque_closure" || k == K"cfunction" if k == K"foreigncall" args = SyntaxList(ctx) # todo: is is_leaf correct here? flisp uses `atom?` @@ -591,9 +593,7 @@ function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) if kind(func) == K"call" && kind(func[1]) == K"core" && func[1].name_val == "tuple" # Tuples like core.tuple(:funcname, mylib_name) are allowed, # but may only reference globals. - if contains_nonglobal_binding(ctx, func) - throw(LoweringError(func, "ccall function name and library expression cannot reference local variables")) - end + check_no_local_bindings(ctx, func, "ccall function name and library expression cannot reference local variables") append!(args, compile_args(ctx, ex[1:1])) elseif is_leaf(func) append!(args, compile_args(ctx, ex[1:1])) @@ -602,18 +602,27 @@ function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) end # 2nd to 5th arguments of foreigncall are special. They must be # left in place but cannot reference locals. - if contains_nonglobal_binding(ctx, ex[2]) - throw(LoweringError(ex[2], "ccall return type cannot reference local variables")) - end + check_no_local_bindings(ctx, ex[2], "ccall return type cannot reference local variables") for argt in children(ex[3]) - if contains_nonglobal_binding(ctx, argt) - throw(LoweringError(argt, "ccall argument types cannot reference local variables")) - end + check_no_local_bindings(ctx, argt, + "ccall argument types cannot reference local variables") end append!(args, ex[2:5]) append!(args, compile_args(ctx, ex[6:end])) args + elseif k == K"cfunction" + # Arguments of cfunction must be left in place except for argument + # 2 (fptr) + args = copy(children(ex)) + args[2] = only(compile_args(ctx, args[2:2])) + check_no_local_bindings(ctx, ex[3], + "cfunction return type cannot reference local variables") + for arg in children(ex[4]) + check_no_local_bindings(ctx, arg, + "cfunction argument cannot reference local variables") + end else + # TODO: cglobal args = compile_args(ctx, children(ex)) end callex = makenode(ctx, ex, k, args) diff --git a/JuliaLowering/src/syntax_macros.jl b/JuliaLowering/src/syntax_macros.jl index 05fb5aacf9801..5a18059d1a075 100644 --- a/JuliaLowering/src/syntax_macros.jl +++ b/JuliaLowering/src/syntax_macros.jl @@ -75,6 +75,47 @@ function Base.var"@generated"(__context__::MacroContext, ex) ] end +function Base.var"@cfunction"(__context__::MacroContext, callable, return_type, arg_types) + if kind(arg_types) != K"tuple" + throw(MacroExpansionError(arg_types, "@cfunction argument types must be a literal tuple")) + end + arg_types_svec = @ast __context__ arg_types [K"call" + "svec"::K"core" + children(arg_types)... + ] + if kind(callable) == K"$" + fptr = callable[1] + typ = Base.CFunction + else + # Kinda weird semantics here - without `$`, the callable is a top level + # expression which will be evaluated by `jl_resolve_globals_in_ir`, + # implicitly within the module where the `@cfunction` is expanded into. + # + # TODO: The existing flisp implementation is arguably broken because it + # ignores macro hygiene when `callable` is the result of a macro + # expansion within a different module. For now we've inherited this + # brokenness. + # + # Ideally we'd fix this by bringing the scoping rules for this + # expression back into lowering. One option may be to wrap the + # expression in a form which pushes it to top level - maybe as a whole + # separate top level thunk like closure lowering - then use the + # K"captured_local" mechanism to interpolate it back in. This scheme + # would make the complicated scope semantics explicit and let them be + # dealt with in the right place in the frontend rather than putting the + # rules into the runtime itself. + fptr = @ast __context__ callable QuoteNode(Expr(callable))::K"Value" + typ = Ptr{Cvoid} + end + @ast __context__ __context__.macrocall [K"cfunction" + typ::K"Value" + fptr + return_type + arg_types_svec + "ccall"::K"Symbol" + ] +end + function Base.GC.var"@preserve"(__context__::MacroContext, exs...) idents = exs[1:end-1] for e in idents diff --git a/JuliaLowering/test/misc.jl b/JuliaLowering/test/misc.jl index b1e894263a04b..90bc32cc45f48 100644 --- a/JuliaLowering/test/misc.jl +++ b/JuliaLowering/test/misc.jl @@ -17,7 +17,7 @@ let x = [1,2] GC.@preserve x begin x end -end +end """) == [1,2] @test JuliaLowering.include_string(test_mod, """ @@ -31,4 +31,19 @@ end ccall(:strlen, Csize_t, (Cstring,), "asdfg") """) == 5 +# cfunction +JuliaLowering.include_string(test_mod, """ +function f_ccallable(x, y) + x + y * 10 +end +""") +cf_int = JuliaLowering.include_string(test_mod, """ +@cfunction(f_ccallable, Int, (Int,Int)) +""") +@test @ccall($cf_int(2::Int, 3::Int)::Int) == 32 +cf_float = JuliaLowering.include_string(test_mod, """ +@cfunction(f_ccallable, Float64, (Float64,Float64)) +""") +@test @ccall($cf_float(2::Float64, 3::Float64)::Float64) == 32.0 + end diff --git a/JuliaLowering/test/misc_ir.jl b/JuliaLowering/test/misc_ir.jl index e6bd2589b7bc1..d073cf97ffe54 100644 --- a/JuliaLowering/test/misc_ir.jl +++ b/JuliaLowering/test/misc_ir.jl @@ -319,6 +319,53 @@ JuxtTest.@emit_juxt 3 (call %₁ 10 %₂) 4 (return %₃) +######################################## +# @cfunction expansion with global generic function as function argument +@cfunction(callable, Int, (Int, Float64)) +#--------------------- +1 (cfunction Ptr{Nothing} :(:callable) TestMod.Int (call core.svec TestMod.Int TestMod.Float64) :ccall) +2 (return %₁) + +######################################## +# @cfunction expansion with closed-over callable argument +@cfunction($close_over, Int, (Int, Float64)) +#--------------------- +1 TestMod.close_over +2 (cfunction Base.CFunction %₁ TestMod.Int (call core.svec TestMod.Int TestMod.Float64) :ccall) +3 (return %₂) + +######################################## +# Error: Bad arg types to @cfunction +@cfunction(f, Int, NotATuple) +#--------------------- +MacroExpansionError while expanding @cfunction in module Main.TestMod: +@cfunction(f, Int, NotATuple) +# └───────┘ ── @cfunction argument types must be a literal tuple + +######################################## +# Error: Locals used in @cfunction return type +let T=Float64 + @cfunction(f, T, (Float64,)) +end +#--------------------- +LoweringError: +let T=Float64 + @cfunction(f, T, (Float64,)) +# ╙ ── cfunction return type cannot reference local variables +end + +######################################## +# Error: Locals used in @cfunction arg type +let T=Float64 + @cfunction(f, Float64, (Float64,T)) +end +#--------------------- +LoweringError: +let T=Float64 + @cfunction(f, Float64, (Float64,T)) +# ╙ ── cfunction argument cannot reference local variables +end + ######################################## # Error: unary & syntax &x From 52b8f59fd2dbc6a807ec6a33d1c0014369b649bb Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Tue, 4 Feb 2025 15:23:22 +1000 Subject: [PATCH 0987/1109] Lowering of cglobal Special case lowering of the first (sym,lib) argument to cglobal. Also lower the identifiers `cglobal` and `ccall` to `K"core"` psuedo-refs very early, ensuring that cglobal and ccall can never be turned into normal bindings (and thus never be assigned to local or global variables). Also fixes handling of ccall/foreigncall (sym,lib) argument. --- JuliaLowering/src/ast.jl | 15 +++++- JuliaLowering/src/desugaring.jl | 14 ++--- JuliaLowering/src/eval.jl | 8 ++- JuliaLowering/src/linear_ir.jl | 32 ++++++----- JuliaLowering/src/macro_expansion.jl | 12 ++++- JuliaLowering/test/demo.jl | 4 ++ JuliaLowering/test/function_calls_ir.jl | 72 ++++++++++++++++++++++--- JuliaLowering/test/functions_ir.jl | 20 ------- 8 files changed, 126 insertions(+), 51 deletions(-) diff --git a/JuliaLowering/src/ast.jl b/JuliaLowering/src/ast.jl index 9de4164135fff..7188eba40b61c 100644 --- a/JuliaLowering/src/ast.jl +++ b/JuliaLowering/src/ast.jl @@ -517,6 +517,13 @@ end #------------------------------------------------------------------------------- # Predicates and accessors working on expression trees +# For historical reasons, `cglobal` and `ccall` are their own special +# quasi-identifier-like syntax but with special handling inside lowering which +# means they can't be used as normal identifiers. +function is_ccall_or_cglobal(name::AbstractString) + return name == "ccall" || name == "cglobal" +end + function is_quoted(ex) kind(ex) in KSet"Symbol quote top core globalref break inert meta inbounds inline noinline loopinfo" @@ -580,12 +587,16 @@ function is_valid_modref(ex) (kind(ex[1]) == K"Identifier" || is_valid_modref(ex[1])) end +function is_core_ref(ex, name) + kind(ex) == K"core" && ex.name_val == name +end + function is_core_nothing(ex) - kind(ex) == K"core" && ex.name_val == "nothing" + is_core_ref(ex, "nothing") end function is_core_Any(ex) - kind(ex) == K"core" && ex.name_val == "Any" + is_core_ref(ex, "Any") end function is_simple_atom(ctx, ex) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 42ecfaf31cd64..e04dfdda396db 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -1607,7 +1607,7 @@ function expand_kw_call(ctx, srcref, farg, args, kws) end function expand_ccall(ctx, ex) - @assert kind(ex) == K"call" && is_same_identifier_like(ex[1], "ccall") + @assert kind(ex) == K"call" && is_core_ref(ex[1], "ccall") if numchildren(ex) < 4 throw(LoweringError(ex, "too few arguments to ccall")) end @@ -1806,7 +1806,7 @@ end function expand_call(ctx, ex) farg = ex[1] - if is_same_identifier_like(farg, "ccall") + if is_core_ref(farg, "ccall") return expand_ccall(ctx, ex) end args = copy(ex[2:end]) @@ -2741,7 +2741,7 @@ function keyword_function_defs(ctx, srcref, callex_srcref, name_str, end # Check valid identifier/function names -function is_valid_func_name(ex) +function is_invalid_func_name(ex) k = kind(ex) if k == K"Identifier" name = ex.name_val @@ -2749,9 +2749,9 @@ function is_valid_func_name(ex) # `function A.f(x,y) ...` name = ex[2].name_val else - return false + return true end - return name != "ccall" && name != "cglobal" + return is_ccall_or_cglobal(name) end function expand_function_def(ctx, ex, docs, rewrite_call=identity, rewrite_body=identity) @@ -2759,7 +2759,7 @@ function expand_function_def(ctx, ex, docs, rewrite_call=identity, rewrite_body= name = ex[1] if numchildren(ex) == 1 && is_identifier_like(name) # Function declaration with no methods - if !is_valid_func_name(name) + if is_invalid_func_name(name) throw(LoweringError(name, "Invalid function name")) end return @ast ctx ex [K"block" @@ -2837,7 +2837,7 @@ function expand_function_def(ctx, ex, docs, rewrite_call=identity, rewrite_body= name_str = name.name_val name = ssavar(ctx, name, name.name_val) bare_func_name = name - elseif !is_valid_func_name(name) + elseif is_invalid_func_name(name) throw(LoweringError(name, "Invalid function name")) elseif is_identifier_like(name) # Add methods to a global `Function` object, or local closure diff --git a/JuliaLowering/src/eval.jl b/JuliaLowering/src/eval.jl index beaa2e28b8ea4..eb4f28063e658 100644 --- a/JuliaLowering/src/eval.jl +++ b/JuliaLowering/src/eval.jl @@ -203,7 +203,13 @@ function to_lowered_expr(mod, ex, ssa_offset=0) if is_literal(k) ex.value elseif k == K"core" - GlobalRef(Core, Symbol(ex.name_val)) + name = ex.name_val + if name == "cglobal" + # cglobal isn't a true name within core - instead it's a builtin + :cglobal + else + GlobalRef(Core, Symbol(name)) + end elseif k == K"top" GlobalRef(Base, Symbol(ex.name_val)) elseif k == K"globalref" diff --git a/JuliaLowering/src/linear_ir.jl b/JuliaLowering/src/linear_ir.jl index db286bcc7b663..0bb91f976969d 100644 --- a/JuliaLowering/src/linear_ir.jl +++ b/JuliaLowering/src/linear_ir.jl @@ -174,6 +174,19 @@ function compile_args(ctx, args) return args_out end +# Compile the (sym,lib) argument to ccall/cglobal +function compile_C_library_symbol(ctx, ex) + if kind(ex) == K"call" && kind(ex[1]) == K"core" && ex[1].name_val == "tuple" + # Tuples like core.tuple(:funcname, mylib_name) are allowed and are + # kept inline, but may only reference globals. + check_no_local_bindings(ctx, ex, + "function name and library expression cannot reference local variables") + ex + else + only(compile_args(ctx, (ex,))) + end +end + function emit(ctx::LinearIRContext, ex) push!(ctx.code, ex) return ex @@ -588,18 +601,7 @@ function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) k == K"new_opaque_closure" || k == K"cfunction" if k == K"foreigncall" args = SyntaxList(ctx) - # todo: is is_leaf correct here? flisp uses `atom?` - func = ex[1] - if kind(func) == K"call" && kind(func[1]) == K"core" && func[1].name_val == "tuple" - # Tuples like core.tuple(:funcname, mylib_name) are allowed, - # but may only reference globals. - check_no_local_bindings(ctx, func, "ccall function name and library expression cannot reference local variables") - append!(args, compile_args(ctx, ex[1:1])) - elseif is_leaf(func) - append!(args, compile_args(ctx, ex[1:1])) - else - push!(args, func) - end + push!(args, compile_C_library_symbol(ctx, ex[1])) # 2nd to 5th arguments of foreigncall are special. They must be # left in place but cannot reference locals. check_no_local_bindings(ctx, ex[2], "ccall return type cannot reference local variables") @@ -621,8 +623,12 @@ function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) check_no_local_bindings(ctx, arg, "cfunction argument cannot reference local variables") end + elseif k == K"call" && is_core_ref(ex[1], "cglobal") + args = SyntaxList(ctx) + push!(args, ex[1]) + push!(args, compile_C_library_symbol(ctx, ex[2])) + append!(args, compile_args(ctx, ex[3:end])) else - # TODO: cglobal args = compile_args(ctx, children(ex)) end callex = makenode(ctx, ex, k, args) diff --git a/JuliaLowering/src/macro_expansion.jl b/JuliaLowering/src/macro_expansion.jl index 4f9fc9095fede..397cbd94166da 100644 --- a/JuliaLowering/src/macro_expansion.jl +++ b/JuliaLowering/src/macro_expansion.jl @@ -202,8 +202,16 @@ need to be dealt with before other lowering. """ function expand_forms_1(ctx::MacroExpansionContext, ex::SyntaxTree) k = kind(ex) - if k == K"Identifier" && all(==('_'), ex.name_val) - @ast ctx ex ex=>K"Placeholder" + if k == K"Identifier" + name_str = ex.name_val + if all(==('_'), name_str) + @ast ctx ex ex=>K"Placeholder" + elseif is_ccall_or_cglobal(name_str) + @ast ctx ex name_str::K"core" + else + layerid = get(ex, :scope_layer, ctx.current_layer.id) + makeleaf(ctx, ex, ex, kind=K"Identifier", scope_layer=layerid) + end elseif k == K"Identifier" || k == K"MacroName" || k == K"StringMacroName" layerid = get(ex, :scope_layer, ctx.current_layer.id) makeleaf(ctx, ex, ex, kind=K"Identifier", scope_layer=layerid) diff --git a/JuliaLowering/test/demo.jl b/JuliaLowering/test/demo.jl index 4267e669b3418..3aa83937ee6f4 100644 --- a/JuliaLowering/test/demo.jl +++ b/JuliaLowering/test/demo.jl @@ -793,6 +793,10 @@ begin end """ +src = """ +cglobal(:jl_uv_stdin, Ptr{Cvoid}) +""" + ex = parsestmt(SyntaxTree, src, filename="foo.jl") ex = ensure_attributes(ex, var_id=Int) #ex = softscope_test(ex) diff --git a/JuliaLowering/test/function_calls_ir.jl b/JuliaLowering/test/function_calls_ir.jl index 4c71124d07bc6..7017902bff0fb 100644 --- a/JuliaLowering/test/function_calls_ir.jl +++ b/JuliaLowering/test/function_calls_ir.jl @@ -369,11 +369,9 @@ ccall((:strlen, libc), Csize_t, (Cstring,), "asdfg") #--------------------- 1 TestMod.Cstring 2 (call top.cconvert %₁ "asdfg") -3 TestMod.libc -4 (call core.tuple :strlen %₃) -5 (call top.unsafe_convert %₁ %₂) -6 (foreigncall %₄ TestMod.Csize_t (call core.svec TestMod.Cstring) 0 :ccall %₅ %₂) -7 (return %₆) +3 (call top.unsafe_convert %₁ %₂) +4 (foreigncall (call core.tuple :strlen TestMod.libc) TestMod.Csize_t (call core.svec TestMod.Cstring) 0 :ccall %₃ %₂) +5 (return %₄) ######################################## # ccall with a calling convention @@ -458,7 +456,7 @@ end LoweringError: let libc = "libc" ccall((:strlen, libc), Csize_t, (Cstring,), "asdfg") -# └─────────────┘ ── ccall function name and library expression cannot reference local variables +# └─────────────┘ ── function name and library expression cannot reference local variables end ######################################## @@ -517,3 +515,65 @@ LoweringError: ccall(:foo, Csize_t, (Cstring..., Cstring...), "asdfg", "blah") # └────────┘ ── only the trailing ccall argument type should have `...` +######################################## +# cglobal special support for (sym, lib) tuple +cglobal((:sym, lib), Int) +#--------------------- +1 TestMod.Int +2 (call core.cglobal (call core.tuple :sym TestMod.lib) %₁) +3 (return %₂) + +######################################## +# cglobal - non-tuple expressions in first arg are lowered as normal +cglobal(f(), Int) +#--------------------- +1 TestMod.f +2 (call %₁) +3 TestMod.Int +4 (call core.cglobal %₂ %₃) +5 (return %₄) + +######################################## +# Error: assigning to `cglobal` +cglobal = 10 +#--------------------- +LoweringError: +cglobal = 10 +└─────┘ ── invalid assignment location + +######################################## +# Error: assigning to `ccall` +ccall = 10 +#--------------------- +LoweringError: +ccall = 10 +└───┘ ── invalid assignment location + +######################################## +# Error: assigning to `var"ccall"` +var"ccall" = 10 +#--------------------- +LoweringError: +var"ccall" = 10 +# └───┘ ── invalid assignment location + +######################################## +# Error: Invalid function name ccall +function ccall() +end +#--------------------- +LoweringError: +function ccall() +# └───┘ ── Invalid function name +end + +######################################## +# Error: Invalid function name ccall +function A.ccall() +end +#--------------------- +LoweringError: +function A.ccall() +# └─────┘ ── Invalid function name +end + diff --git a/JuliaLowering/test/functions_ir.jl b/JuliaLowering/test/functions_ir.jl index a4d1668445ab2..da9be80ad32cc 100644 --- a/JuliaLowering/test/functions_ir.jl +++ b/JuliaLowering/test/functions_ir.jl @@ -332,26 +332,6 @@ end 1 (return core.nothing) 9 (return core.nothing) -######################################## -# Error: Invalid function name ccall -function ccall() -end -#--------------------- -LoweringError: -function ccall() -# └───┘ ── Invalid function name -end - -######################################## -# Error: Invalid function name ccall -function A.ccall() -end -#--------------------- -LoweringError: -function A.ccall() -# └─────┘ ── Invalid function name -end - ######################################## # Error: Invalid dotop function name function (.+)(x,y) From 5336f88519a28dae8afce8f510bb8977a71fdef7 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Thu, 6 Feb 2025 11:50:25 +1000 Subject: [PATCH 0988/1109] Avoid adding tuple in parsing of `$(x) -> rhs` (JuliaLang/JuliaSyntax.jl#534) --- JuliaSyntax/src/parser.jl | 14 +++++++------- JuliaSyntax/test/parser.jl | 4 ++++ 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index 0b66547f2fe94..d1a91478eb0ee 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -1461,7 +1461,7 @@ end # $a ==> ($ a) # # flisp: parse-unary-prefix -function parse_unary_prefix(ps::ParseState) +function parse_unary_prefix(ps::ParseState, has_unary_prefix=false) mark = position(ps) t = peek_token(ps) k = kind(t) @@ -1480,7 +1480,7 @@ function parse_unary_prefix(ps::ParseState) # $a ==> ($ a) # $$a ==> ($ ($ a)) # $&a ==> ($ (& a)) - parse_unary_prefix(ps) + parse_unary_prefix(ps, true) end # Only need PREFIX_OP_FLAG for :: f = k == K"::" ? PREFIX_OP_FLAG : EMPTY_FLAGS @@ -1488,7 +1488,7 @@ function parse_unary_prefix(ps::ParseState) end else # .&(x,y) ==> (call .& x y) - parse_atom(ps) + parse_atom(ps, true, has_unary_prefix) end end @@ -3066,7 +3066,7 @@ end # *very* overloaded! # # flisp: parse-paren / parse-paren- -function parse_paren(ps::ParseState, check_identifiers=true) +function parse_paren(ps::ParseState, check_identifiers=true, has_unary_prefix=false) ps = ParseState(ps, range_colon_enabled=true, space_sensitive=false, where_enabled=true, @@ -3100,7 +3100,7 @@ function parse_paren(ps::ParseState, check_identifiers=true) opts = parse_brackets(ps, K")") do had_commas, had_splat, num_semis, num_subexprs is_tuple = had_commas || (had_splat && num_semis >= 1) || (initial_semi && (num_semis == 1 || num_subexprs > 0)) || - (peek(ps, 2) == K"->" && peek_behind(ps).kind != K"where") + (peek(ps, 2) == K"->" && (peek_behind(ps).kind != K"where" && !has_unary_prefix)) return (needs_parameters=is_tuple, is_tuple=is_tuple, is_block=num_semis > 0) @@ -3475,7 +3475,7 @@ end # the syntactic operators or closing tokens. # # flisp: parse-atom -function parse_atom(ps::ParseState, check_identifiers=true) +function parse_atom(ps::ParseState, check_identifiers=true, has_unary_prefix=false) bump_trivia(ps) mark = position(ps) leading_kind = peek(ps) @@ -3634,7 +3634,7 @@ function parse_atom(ps::ParseState, check_identifiers=true) bump(ps, remap_kind=K"Identifier") end elseif leading_kind == K"(" # parens or tuple - parse_paren(ps, check_identifiers) + parse_paren(ps, check_identifiers, has_unary_prefix) elseif leading_kind == K"[" # cat expression bump(ps, TRIVIA_FLAG) ckind, cflags = parse_cat(ps, K"]", ps.end_symbol) diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 9eb7caa25d175..f208e24c3dabc 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -293,6 +293,10 @@ tests = [ "(a,b)->c" => "(-> (tuple-p a b) c)" "(a;b=1)->c" => "(-> (tuple-p a (parameters (= b 1))) c)" "x::T->c" => "(-> (tuple (::-i x T)) c)" + "\$a->b" => "(-> (tuple (\$ a)) b)" + "\$(a)->b" => "(-> (tuple (\$ (parens a))) b)" + # FIXME "&(a)->b" => "(-> (tuple-p (& (parens a))) b)" + # FIXME "::(a)->b" => "(-> (tuple-p (:: (parens a))) b)" # `where` combined with `->` still parses strangely. However: # * It's extra hard to add a tuple around the `x` in this syntax corner case. # * The user already needs to add additional, ugly, parens to get this From 2b9f99d85238e126bc0c529c5cc147d5ad113287 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Thu, 6 Feb 2025 11:50:56 +1000 Subject: [PATCH 0989/1109] Bump to version 1.0.1 --- JuliaSyntax/Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaSyntax/Project.toml b/JuliaSyntax/Project.toml index 49a07d642b27d..3b394076a96a6 100644 --- a/JuliaSyntax/Project.toml +++ b/JuliaSyntax/Project.toml @@ -1,7 +1,7 @@ name = "JuliaSyntax" uuid = "70703baa-626e-46a2-a12c-08ffd08c73b4" authors = ["Claire Foster and contributors"] -version = "1.0.0" +version = "1.0.1" [compat] Serialization = "1.0" From 44544df2a4d75aa8ad7cddc05db7e398325fbe1e Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Thu, 6 Feb 2025 17:26:08 +1000 Subject: [PATCH 0990/1109] Ensure `TypeVar` definitions accompany method defs moved to top level The `K"method_defs"` needs to group a preamble+sets of methods for a function together so that typevars can be defined in the preamble and the whole thing moved to top level as one piece, if necessary, during closure conversion. Previous refactoring had broken that - reinstated it here. There's some repetition of the typevars here for certain cases like kwcall overloads vs the actual function overloads, vs the separate kw body function - we can't tell during desugaring whether a function will become a closure or not so it's possible we might need to run these TypeVar initializations twice. --- JuliaLowering/src/desugaring.jl | 107 +++++++++++++++-------------- JuliaLowering/test/closures_ir.jl | 24 +++---- JuliaLowering/test/functions_ir.jl | 70 ++++++++++--------- 3 files changed, 106 insertions(+), 95 deletions(-) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index e04dfdda396db..b45603e545a95 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -2369,10 +2369,17 @@ function expand_function_generator(ctx, srcref, callex_srcref, func_name, func_n push!(gen_arg_types, @ast ctx gen_arg_names[i] "Any"::K"core") end # Code generator definition - gen_func_method_defs = @ast ctx srcref [K"method_defs" - gen_name - method_def_expr(ctx, srcref, callex_srcref, nothing, SyntaxList(ctx), gen_arg_names, - gen_arg_types, gen_body, nothing) + gen_func_method_defs = @ast ctx srcref [K"block" + [K"function_decl" gen_name] + [K"scope_block"(scope_type=:hard) + [K"method_defs" + gen_name + [K"block" + method_def_expr(ctx, srcref, callex_srcref, nothing, SyntaxList(ctx), + gen_arg_names, gen_arg_types, gen_body, nothing) + ] + ] + ] ] # Extract non-generated body @@ -2414,7 +2421,7 @@ function expand_function_generator(ctx, srcref, callex_srcref, func_name, func_n split_generated(ctx, body, false) ] - return gen_name, gen_func_method_defs, nongen_body + return gen_func_method_defs, nongen_body end # Generate a method for every number of allowed optional arguments @@ -2472,8 +2479,8 @@ function scope_nest(ctx, names, values, body) end # Generate body function and `Core.kwcall` overloads for functions taking keywords. -function keyword_function_defs(ctx, srcref, callex_srcref, name_str, - typevar_names, typevar_stmts, arg_names, +function keyword_function_defs(ctx, srcref, callex_srcref, name_str, typevar_names, + typevar_stmts, new_typevar_stmts, arg_names, arg_types, has_slurp, first_default, arg_defaults, keywords, body, ret_var) mangled_name = let n = isnothing(name_str) ? "_" : name_str @@ -2700,23 +2707,29 @@ function keyword_function_defs(ctx, srcref, callex_srcref, name_str, check_all_typevars_used(body_arg_types, typevar_names, typevar_stmts) kw_func_method_defs = @ast ctx srcref [K"block" - [K"method_defs" - body_func_name - [K"block" - # TODO: nkw - method_def_expr(ctx, srcref, callex_srcref, "nothing"::K"core", - typevar_names, body_arg_names, body_arg_types, - [K"block" - [K"meta" "nkw"::K"Symbol" numchildren(keywords)::K"Integer"] - body - ], - ret_var) + [K"function_decl" body_func_name] + [K"scope_block"(scope_type=:hard) + [K"method_defs" + body_func_name + [K"block" + new_typevar_stmts... + method_def_expr(ctx, srcref, callex_srcref, "nothing"::K"core", + typevar_names, body_arg_names, body_arg_types, + [K"block" + [K"meta" "nkw"::K"Symbol" numchildren(keywords)::K"Integer"] + body + ], + ret_var) + ] ] ] - [K"method_defs" - "nothing"::K"core" - [K"block" - kwcall_method_defs... + [K"scope_block"(scope_type=:hard) + [K"method_defs" + "nothing"::K"core" + [K"block" + new_typevar_stmts... + kwcall_method_defs... + ] ] ] ] @@ -2737,7 +2750,7 @@ function keyword_function_defs(ctx, srcref, callex_srcref, name_str, ] end - body_func_name, kw_func_method_defs, body_for_positional_args_only + kw_func_method_defs, body_for_positional_args_only end # Check valid identifier/function names @@ -2936,27 +2949,27 @@ function expand_function_def(ctx, ex, docs, rewrite_call=identity, rewrite_body= ] end - gen_func_name = nothing gen_func_method_defs = nothing if is_generated(body) - gen_func_name, gen_func_method_defs, body = + gen_func_method_defs, body = expand_function_generator(ctx, ex, callex, name, name_str, body, arg_names, typevar_names) end if isnothing(keywords) - body_func_name, kw_func_method_defs = (nothing, nothing) - # NB: This check seems good as it statically catches any useless - # typevars which can't be inferred. However it wasn't previously an - # error so we might need to reduce it to a warning? + kw_func_method_defs = nothing + # NB: The following check seems good as it statically catches any useless + # static parameters which can't be bound during method invocation. + # However it wasn't previously an error so we might need to reduce it + # to a warning? check_all_typevars_used(arg_types, typevar_names, typevar_stmts) main_typevar_names = typevar_names else # Rewrite `body` here so that the positional-only versions dispatch there. - body_func_name, kw_func_method_defs, body = + kw_func_method_defs, body = keyword_function_defs(ctx, ex, callex, name_str, typevar_names, typevar_stmts, - arg_names, arg_types, has_slurp, first_default, arg_defaults, - keywords, body, ret_var) + new_typevar_stmts, arg_names, arg_types, has_slurp, + first_default, arg_defaults, keywords, body, ret_var) # The main function (but without keywords) needs its typevars trimmed, # as some of them may be for the keywords only. main_typevar_names = trim_used_typevars(ctx, arg_types, typevar_names, typevar_stmts) @@ -2993,28 +3006,22 @@ function expand_function_def(ctx, ex, docs, rewrite_call=identity, rewrite_body= end @ast ctx ex [K"block" - if !isnothing(gen_func_name) - [K"function_decl"(gen_func_name) gen_func_name] - end - if !isnothing(body_func_name) - [K"function_decl"(body_func_name) body_func_name] - end if !isnothing(bare_func_name) + # Need the main function type created here before running any code + # in kw_func_method_defs [K"function_decl"(bare_func_name) bare_func_name] end + gen_func_method_defs + kw_func_method_defs [K"scope_block"(scope_type=:hard) - [K"block" - new_typevar_stmts... - gen_func_method_defs - kw_func_method_defs - [K"method_defs" - isnothing(bare_func_name) ? "nothing"::K"core" : bare_func_name - [K"block" - if !isnothing(method_table_val) - [K"=" method_table method_table_val] - end - method_stmts... - ] + [K"method_defs" + isnothing(bare_func_name) ? "nothing"::K"core" : bare_func_name + [K"block" + new_typevar_stmts... + if !isnothing(method_table_val) + [K"=" method_table method_table_val] + end + method_stmts... ] ] ] diff --git a/JuliaLowering/test/closures_ir.jl b/JuliaLowering/test/closures_ir.jl index d3079369d5183..67ce53d199ea7 100644 --- a/JuliaLowering/test/closures_ir.jl +++ b/JuliaLowering/test/closures_ir.jl @@ -572,12 +572,12 @@ let y = y_init end #--------------------- 1 TestMod.y_init -2 (call core.svec :y) +2 (call core.svec :#f_kw_closure#0) 3 (call core.svec true) -4 (call JuliaLowering.eval_closure_type TestMod :##f_kw_closure#0##0 %₂ %₃) -5 (call core.svec :#f_kw_closure#0) +4 (call JuliaLowering.eval_closure_type TestMod :#f_kw_closure##0 %₂ %₃) +5 (call core.svec :y) 6 (call core.svec true) -7 (call JuliaLowering.eval_closure_type TestMod :#f_kw_closure##0 %₅ %₆) +7 (call JuliaLowering.eval_closure_type TestMod :##f_kw_closure#0##0 %₅ %₆) 8 TestMod.##f_kw_closure#0##0 9 TestMod.X 10 TestMod.#f_kw_closure##0 @@ -619,15 +619,15 @@ end 23 (= slot₂/#f_kw_closure#0 (call core.Box)) 24 slot₁/y 25 (call core.setfield! %₂₄ :contents %₁) -26 TestMod.##f_kw_closure#0##0 -27 slot₁/y +26 TestMod.#f_kw_closure##0 +27 slot₂/#f_kw_closure#0 28 (new %₂₆ %₂₇) -29 slot₂/#f_kw_closure#0 -30 (call core.setfield! %₂₉ :contents %₂₈) -31 TestMod.#f_kw_closure##0 -32 slot₂/#f_kw_closure#0 -33 (new %₃₁ %₃₂) -34 (= slot₃/f_kw_closure %₃₃) +29 (= slot₃/f_kw_closure %₂₈) +30 TestMod.##f_kw_closure#0##0 +31 slot₁/y +32 (new %₃₀ %₃₁) +33 slot₂/#f_kw_closure#0 +34 (call core.setfield! %₃₃ :contents %₃₂) 35 (call core.typeof core.kwcall) 36 TestMod.#f_kw_closure##0 37 (call core.svec %₃₅ core.NamedTuple %₃₆) diff --git a/JuliaLowering/test/functions_ir.jl b/JuliaLowering/test/functions_ir.jl index da9be80ad32cc..9abb27e9ebce3 100644 --- a/JuliaLowering/test/functions_ir.jl +++ b/JuliaLowering/test/functions_ir.jl @@ -955,8 +955,8 @@ function f_kw_simple(a::Int=1, b::Float64=1.0; x::Char='a', y::Bool=true) (a, b, x, y) end #--------------------- -1 (method TestMod.#f_kw_simple#0) -2 (method TestMod.f_kw_simple) +1 (method TestMod.f_kw_simple) +2 (method TestMod.#f_kw_simple#0) 3 TestMod.#f_kw_simple#0 4 (call core.Typeof %₃) 5 TestMod.Char @@ -1091,8 +1091,8 @@ function f_kw_slurp_simple(; all_kws...) all_kws end #--------------------- -1 (method TestMod.#f_kw_slurp_simple#0) -2 (method TestMod.f_kw_slurp_simple) +1 (method TestMod.f_kw_slurp_simple) +2 (method TestMod.#f_kw_slurp_simple#0) 3 TestMod.#f_kw_slurp_simple#0 4 (call core.Typeof %₃) 5 (call top.pairs core.NamedTuple) @@ -1143,8 +1143,8 @@ function f_kw_slurp(; x=x_default, non_x_kws...) all_kws end #--------------------- -1 (method TestMod.#f_kw_slurp#0) -2 (method TestMod.f_kw_slurp) +1 (method TestMod.f_kw_slurp) +2 (method TestMod.#f_kw_slurp#0) 3 TestMod.#f_kw_slurp#0 4 (call core.Typeof %₃) 5 (call top.pairs core.NamedTuple) @@ -1212,8 +1212,8 @@ function f_kw_sparams(x::X; a::A=a_def, b::X=b_def) where {X,A} (X,A) end #--------------------- -1 (method TestMod.#f_kw_sparams#0) -2 (method TestMod.f_kw_sparams) +1 (method TestMod.f_kw_sparams) +2 (method TestMod.#f_kw_sparams#0) 3 (= slot₂/X (call core.TypeVar :X)) 4 (= slot₁/A (call core.TypeVar :A)) 5 TestMod.#f_kw_sparams#0 @@ -1236,16 +1236,18 @@ end 3 static_parameter₂ 4 (call core.tuple %₂ %₃) 5 (return %₄) -19 (call core.typeof core.kwcall) -20 TestMod.f_kw_sparams -21 (call core.Typeof %₂₀) -22 slot₂/X -23 (call core.svec %₁₉ core.NamedTuple %₂₁ %₂₂) -24 slot₂/X -25 (call core.svec %₂₄) -26 SourceLocation::1:10 -27 (call core.svec %₂₃ %₂₅ %₂₆) -28 --- method core.nothing %₂₇ +19 (= slot₄/X (call core.TypeVar :X)) +20 (= slot₃/A (call core.TypeVar :A)) +21 (call core.typeof core.kwcall) +22 TestMod.f_kw_sparams +23 (call core.Typeof %₂₂) +24 slot₄/X +25 (call core.svec %₂₁ core.NamedTuple %₂₃ %₂₄) +26 slot₄/X +27 (call core.svec %₂₆) +28 SourceLocation::1:10 +29 (call core.svec %₂₅ %₂₇ %₂₈) +30 --- method core.nothing %₂₉ slots: [slot₁/#self#(!read) slot₂/kws slot₃/#self# slot₄/x slot₅/kwtmp slot₆/a(!read) slot₇/b(!read)] 1 (newvar slot₆/a) 2 (newvar slot₇/b) @@ -1282,23 +1284,25 @@ end 33 TestMod.#f_kw_sparams#0 34 (call %₃₃ %₁₀ %₂₅ slot₃/#self# slot₄/x) 35 (return %₃₄) -29 TestMod.f_kw_sparams -30 (call core.Typeof %₂₉) -31 slot₂/X -32 (call core.svec %₃₀ %₃₁) -33 slot₂/X -34 (call core.svec %₃₃) -35 SourceLocation::1:10 -36 (call core.svec %₃₂ %₃₄ %₃₅) -37 --- method core.nothing %₃₆ +31 (= slot₆/X (call core.TypeVar :X)) +32 (= slot₅/A (call core.TypeVar :A)) +33 TestMod.f_kw_sparams +34 (call core.Typeof %₃₃) +35 slot₆/X +36 (call core.svec %₃₄ %₃₅) +37 slot₆/X +38 (call core.svec %₃₇) +39 SourceLocation::1:10 +40 (call core.svec %₃₆ %₃₈ %₃₉) +41 --- method core.nothing %₄₀ slots: [slot₁/#self# slot₂/x] 1 TestMod.#f_kw_sparams#0 2 TestMod.a_def 3 TestMod.b_def 4 (call %₁ %₂ %₃ slot₁/#self# slot₂/x) 5 (return %₄) -38 TestMod.f_kw_sparams -39 (return %₃₈) +42 TestMod.f_kw_sparams +43 (return %₄₂) ######################################## # Error: Static parameter which is unused in keyword body arg types @@ -1358,8 +1362,8 @@ end generator_code(x,y) end #--------------------- -1 (method TestMod.#f_only_generated@generator#0) -2 (method TestMod.f_only_generated) +1 (method TestMod.f_only_generated) +2 (method TestMod.#f_only_generated@generator#0) 3 TestMod.#f_only_generated@generator#0 4 (call core.Typeof %₃) 5 (call core.svec %₄ JuliaLowering.MacroContext core.Any core.Any core.Any) @@ -1399,8 +1403,8 @@ function f_partially_generated(x, y) (nongen_stuff, maybe_gen_stuff) end #--------------------- -1 (method TestMod.#f_partially_generated@generator#0) -2 (method TestMod.f_partially_generated) +1 (method TestMod.f_partially_generated) +2 (method TestMod.#f_partially_generated@generator#0) 3 TestMod.#f_partially_generated@generator#0 4 (call core.Typeof %₃) 5 (call core.svec %₄ JuliaLowering.MacroContext core.Any core.Any core.Any) From 3cd4b14e491ab0b03eaace041bb02999463cc292 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Fri, 7 Feb 2025 14:08:00 +1000 Subject: [PATCH 0991/1109] Fix for toplevel-preserving statements in closure conversion Previously, closures were lifted to the outermost level of the toplevel thunk. Instead they should be kept inside any `if`, `try` and `block` top level statements, but lifted out of most other constructs. --- JuliaLowering/src/closure_conversion.jl | 80 +++++--- JuliaLowering/test/closures.jl | 18 +- JuliaLowering/test/closures_ir.jl | 235 ++++++++++++----------- JuliaLowering/test/scopes_ir.jl | 16 +- JuliaLowering/test/typedefs_ir.jl | 236 ++++++++++++------------ JuliaLowering/test/utils.jl | 2 +- 6 files changed, 325 insertions(+), 262 deletions(-) diff --git a/JuliaLowering/src/closure_conversion.jl b/JuliaLowering/src/closure_conversion.jl index 77e3a4a603d46..481698e169c92 100644 --- a/JuliaLowering/src/closure_conversion.jl +++ b/JuliaLowering/src/closure_conversion.jl @@ -15,6 +15,9 @@ struct ClosureConversionCtx{GraphType} <: AbstractLoweringContext closure_bindings::Dict{IdTag,ClosureBindings} capture_rewriting::Union{Nothing,ClosureInfo{GraphType},SyntaxList{GraphType}} lambda_bindings::LambdaBindings + # True if we're in a section of code which preserves top-level sequencing + # such that closure types can be emitted inline with other code. + is_toplevel_seq_point::Bool toplevel_stmts::SyntaxList{GraphType} closure_infos::Dict{IdTag,ClosureInfo{GraphType}} end @@ -23,8 +26,8 @@ function ClosureConversionCtx(graph::GraphType, bindings::Bindings, mod::Module, closure_bindings::Dict{IdTag,ClosureBindings}, lambda_bindings::LambdaBindings) where {GraphType} ClosureConversionCtx{GraphType}( - graph, bindings, mod, closure_bindings, nothing, lambda_bindings, SyntaxList(graph), - Dict{IdTag,ClosureInfo{GraphType}}()) + graph, bindings, mod, closure_bindings, nothing, + lambda_bindings, false, SyntaxList(graph), Dict{IdTag,ClosureInfo{GraphType}}()) end function current_lambda_bindings(ctx::ClosureConversionCtx) @@ -288,6 +291,28 @@ function is_self_captured(ctx, x) !isnothing(lbinfo) && lbinfo.is_captured end +# Map the children of `ex` through _convert_closures, lifting any toplevel +# closure definition statements to occur before the other content of `ex`. +function map_cl_convert(ctx::ClosureConversionCtx, ex, toplevel_preserving) + if ctx.is_toplevel_seq_point && !toplevel_preserving + toplevel_stmts = SyntaxList(ctx) + ctx2 = ClosureConversionCtx(ctx.graph, ctx.bindings, ctx.mod, + ctx.closure_bindings, ctx.capture_rewriting, ctx.lambda_bindings, + false, toplevel_stmts, ctx.closure_infos) + res = mapchildren(e->_convert_closures(ctx2, e), ctx2, ex) + if isempty(toplevel_stmts) + res + else + @ast ctx ex [K"block" + toplevel_stmts... + res + ] + end + else + mapchildren(e->_convert_closures(ctx, e), ctx, ex) + end +end + function _convert_closures(ctx::ClosureConversionCtx, ex) k = kind(ex) if k == K"BindingId" @@ -358,7 +383,10 @@ function _convert_closures(ctx::ClosureConversionCtx, ex) "#$(join(closure_binds.name_stack, "#"))##") closure_type_def, closure_type_ = type_for_closure(ctx, ex, name_str, field_syms, field_is_box) - push!(ctx.toplevel_stmts, closure_type_def) + if !ctx.is_toplevel_seq_point + push!(ctx.toplevel_stmts, closure_type_def) + closure_type_def = nothing + end closure_info = ClosureInfo(closure_type_, field_syms, field_inds) ctx.closure_infos[func_name_id] = closure_info type_params = SyntaxList(ctx) @@ -375,6 +403,7 @@ function _convert_closures(ctx::ClosureConversionCtx, ex) end end @ast ctx ex [K"block" + closure_type_def closure_type := if isempty(type_params) closure_type_ else @@ -395,7 +424,7 @@ function _convert_closures(ctx::ClosureConversionCtx, ex) # binding for `func_name` if it doesn't exist. @ast ctx ex [K"block" [K"method" func_name] - ::K"TOMBSTONE" + ::K"TOMBSTONE" # <- function_decl should not be used in value position ] end elseif k == K"function_type" @@ -410,17 +439,17 @@ function _convert_closures(ctx::ClosureConversionCtx, ex) is_closure = kind(name) == K"BindingId" && lookup_binding(ctx, name).kind === :local cap_rewrite = is_closure ? ctx.closure_infos[name.var_id] : nothing ctx2 = ClosureConversionCtx(ctx.graph, ctx.bindings, ctx.mod, - ctx.closure_bindings, cap_rewrite, ctx.lambda_bindings, - ctx.toplevel_stmts, ctx.closure_infos) - body = _convert_closures(ctx2, ex[2]) + ctx.closure_bindings, cap_rewrite, ctx.lambda_bindings, + ctx.is_toplevel_seq_point, ctx.toplevel_stmts, ctx.closure_infos) + body = map_cl_convert(ctx2, ex[2], false) if is_closure - # Move methods to top level - # FIXME: Probably lots more work to do to make this correct - # Especially - # * Renumbering SSA vars - # * Ensuring that moved locals become slots in the top level thunk - push!(ctx.toplevel_stmts, body) - @ast ctx ex (::K"TOMBSTONE") + if ctx.is_toplevel_seq_point + body + else + # Move methods out to a top-level sequence point. + push!(ctx.toplevel_stmts, body) + @ast ctx ex (::K"TOMBSTONE") + end else @ast ctx ex [K"block" body @@ -435,8 +464,8 @@ function _convert_closures(ctx::ClosureConversionCtx, ex) capture_rewrites = ClosureInfo(ex #=unused=#, field_syms, field_inds) ctx2 = ClosureConversionCtx(ctx.graph, ctx.bindings, ctx.mod, - ctx.closure_bindings, capture_rewrites, ctx.lambda_bindings, - ctx.toplevel_stmts, ctx.closure_infos) + ctx.closure_bindings, capture_rewrites, ctx.lambda_bindings, + false, ctx.toplevel_stmts, ctx.closure_infos) init_closure_args = SyntaxList(ctx) for id in field_orig_bindings @@ -457,17 +486,21 @@ function _convert_closures(ctx::ClosureConversionCtx, ex) init_closure_args... ] else - mapchildren(e->_convert_closures(ctx, e), ctx, ex) + # A small number of kinds are toplevel-preserving in terms of closure + # closure definitions will be lifted out into `toplevel_stmts` if they + # occur inside `ex`. + toplevel_seq_preserving = k == K"if" || k == K"elseif" || k == K"block" || + k == K"tryfinally" || k == K"trycatchelse" + map_cl_convert(ctx, ex, toplevel_seq_preserving) end end function closure_convert_lambda(ctx, ex) @assert kind(ex) == K"lambda" - body_stmts = SyntaxList(ctx) - toplevel_stmts = ex.is_toplevel_thunk ? body_stmts : ctx.toplevel_stmts lambda_bindings = ex.lambda_bindings interpolations = nothing if isnothing(ctx.capture_rewriting) + # Global method which may capture locals interpolations = SyntaxList(ctx) cap_rewrite = interpolations else @@ -475,13 +508,14 @@ function closure_convert_lambda(ctx, ex) end ctx2 = ClosureConversionCtx(ctx.graph, ctx.bindings, ctx.mod, ctx.closure_bindings, cap_rewrite, lambda_bindings, - toplevel_stmts, ctx.closure_infos) + ex.is_toplevel_thunk, ctx.toplevel_stmts, ctx.closure_infos) lambda_children = SyntaxList(ctx) args = ex[1] push!(lambda_children, args) push!(lambda_children, ex[2]) # Add box initializations for arguments which are captured by an inner lambda + body_stmts = SyntaxList(ctx) for arg in children(args) kind(arg) != K"Placeholder" || continue if is_boxed(ctx, arg) @@ -491,8 +525,7 @@ function closure_convert_lambda(ctx, ex) ]) end end - # Convert body. Note that _convert_closures may call `push!(body_stmts, e)` - # internally for any expressions `e` which need to be moved to top level. + # Convert body. input_body_stmts = kind(ex[3]) != K"block" ? ex[3:3] : ex[3][1:end] for e in input_body_stmts push!(body_stmts, _convert_closures(ctx2, e)) @@ -538,5 +571,8 @@ function convert_closures(ctx::VariableAnalysisContext, ex) ctx = ClosureConversionCtx(ctx.graph, ctx.bindings, ctx.mod, ctx.closure_bindings, ex.lambda_bindings) ex1 = closure_convert_lambda(ctx, ex) + if !isempty(ctx.toplevel_stmts) + throw(LoweringError(first(ctx.toplevel_stmts), "Top level code was found outside any top level context. `@generated` functions may not contain closures, including `do` syntax and generators/comprehension")) + end ctx, ex1 end diff --git a/JuliaLowering/test/closures.jl b/JuliaLowering/test/closures.jl index 7134b9c043977..b7c62cc990067 100644 --- a/JuliaLowering/test/closures.jl +++ b/JuliaLowering/test/closures.jl @@ -1,5 +1,4 @@ - -@testset "Functions" begin +@testset "Closures" begin test_mod = Module() @@ -96,6 +95,21 @@ end @test test_mod.f_global_method_capturing_local() == 2 @test test_mod.f_global_method_capturing_local() == 3 +# Closure with multiple methods depending on local variables +f_closure_local_var_types = JuliaLowering.include_string(test_mod, """ +let T=Int, S=Float64 + function f_closure_local_var_types(::T) + 1 + end + function f_closure_local_var_types(::S) + 1.0 + end +end +""") +@test f_closure_local_var_types(2) == 1 +@test f_closure_local_var_types(2.0) == 1.0 +@test_throws MethodError f_closure_local_var_types("hi") + # Anon function syntax @test JuliaLowering.include_string(test_mod, """ begin diff --git a/JuliaLowering/test/closures_ir.jl b/JuliaLowering/test/closures_ir.jl index 67ce53d199ea7..e7ae55ab5f6a9 100644 --- a/JuliaLowering/test/closures_ir.jl +++ b/JuliaLowering/test/closures_ir.jl @@ -9,15 +9,22 @@ let end #--------------------- 1 (= slot₂/x (call core.Box)) -2 (call core.svec :x) -3 (call core.svec true) -4 (call JuliaLowering.eval_closure_type TestMod :#f##0 %₂ %₃) -5 TestMod.#f##0 -6 (call core.svec %₅ core.Any) -7 (call core.svec) -8 SourceLocation::3:14 -9 (call core.svec %₆ %₇ %₈) -10 --- method core.nothing %₉ +2 1 +3 slot₂/x +4 (call core.setfield! %₃ :contents %₂) +5 (call core.svec :x) +6 (call core.svec true) +7 (call JuliaLowering.eval_closure_type TestMod :#f##0 %₅ %₆) +8 TestMod.#f##0 +9 slot₂/x +10 (new %₈ %₉) +11 (= slot₁/f %₁₀) +12 TestMod.#f##0 +13 (call core.svec %₁₂ core.Any) +14 (call core.svec) +15 SourceLocation::3:14 +16 (call core.svec %₁₃ %₁₄ %₁₅) +17 --- method core.nothing %₁₆ slots: [slot₁/#self#(!read) slot₂/y slot₃/x(!read)] 1 TestMod.+ 2 (call core.getfield slot₁/#self# :x) @@ -29,13 +36,6 @@ end 8 (call core.getfield %₂ :contents) 9 (call %₁ %₈ slot₂/y) 10 (return %₉) -11 1 -12 slot₂/x -13 (call core.setfield! %₁₂ :contents %₁₁) -14 TestMod.#f##0 -15 slot₂/x -16 (new %₁₄ %₁₅) -17 (= slot₁/f %₁₆) 18 slot₁/f 19 (return %₁₈) @@ -66,27 +66,27 @@ let end #--------------------- 1 (= slot₂/x (call core.Box)) -2 (call core.svec :x) -3 (call core.svec true) -4 (call JuliaLowering.eval_closure_type TestMod :#f##1 %₂ %₃) -5 TestMod.#f##1 -6 (call core.svec %₅ core.Any) -7 (call core.svec) -8 SourceLocation::3:14 -9 (call core.svec %₆ %₇ %₈) -10 --- method core.nothing %₉ +2 1 +3 slot₂/x +4 (call core.setfield! %₃ :contents %₂) +5 (call core.svec :x) +6 (call core.svec true) +7 (call JuliaLowering.eval_closure_type TestMod :#f##1 %₅ %₆) +8 TestMod.#f##1 +9 slot₂/x +10 (new %₈ %₉) +11 (= slot₁/f %₁₀) +12 TestMod.#f##1 +13 (call core.svec %₁₂ core.Any) +14 (call core.svec) +15 SourceLocation::3:14 +16 (call core.svec %₁₃ %₁₄ %₁₅) +17 --- method core.nothing %₁₆ slots: [slot₁/#self#(!read) slot₂/y(!read)] 1 2 2 (call core.getfield slot₁/#self# :x) 3 (call core.setfield! %₂ :contents %₁) 4 (return %₁) -11 1 -12 slot₂/x -13 (call core.setfield! %₁₂ :contents %₁₁) -14 TestMod.#f##1 -15 slot₂/x -16 (new %₁₄ %₁₅) -17 (= slot₁/f %₁₆) 18 slot₁/f 19 (return %₁₈) @@ -510,11 +510,16 @@ end 4 (call core.svec true) 5 (call JuliaLowering.eval_closure_type TestMod :#recursive_a##0 %₃ %₄) 6 TestMod.#recursive_a##0 -7 (call core.svec %₆) -8 (call core.svec) -9 SourceLocation::2:14 -10 (call core.svec %₇ %₈ %₉) -11 --- method core.nothing %₁₀ +7 slot₂/recursive_b +8 (new %₆ %₇) +9 slot₁/recursive_a +10 (call core.setfield! %₉ :contents %₈) +11 TestMod.#recursive_a##0 +12 (call core.svec %₁₁) +13 (call core.svec) +14 SourceLocation::2:14 +15 (call core.svec %₁₂ %₁₃ %₁₄) +16 --- method core.nothing %₁₅ slots: [slot₁/#self#(!read) slot₂/recursive_b(!read)] 1 (call core.getfield slot₁/#self# :recursive_b) 2 (call core.isdefined %₁ :contents) @@ -525,15 +530,20 @@ end 7 (call core.getfield %₁ :contents) 8 (call %₇) 9 (return %₈) -12 (call core.svec :recursive_a) -13 (call core.svec true) -14 (call JuliaLowering.eval_closure_type TestMod :#recursive_b##0 %₁₂ %₁₃) -15 TestMod.#recursive_b##0 -16 (call core.svec %₁₅) -17 (call core.svec) -18 SourceLocation::5:14 -19 (call core.svec %₁₆ %₁₇ %₁₈) -20 --- method core.nothing %₁₉ +17 (call core.svec :recursive_a) +18 (call core.svec true) +19 (call JuliaLowering.eval_closure_type TestMod :#recursive_b##0 %₁₇ %₁₈) +20 TestMod.#recursive_b##0 +21 slot₁/recursive_a +22 (new %₂₀ %₂₁) +23 slot₂/recursive_b +24 (call core.setfield! %₂₃ :contents %₂₂) +25 TestMod.#recursive_b##0 +26 (call core.svec %₂₅) +27 (call core.svec) +28 SourceLocation::5:14 +29 (call core.svec %₂₆ %₂₇ %₂₈) +30 --- method core.nothing %₂₉ slots: [slot₁/#self#(!read) slot₂/recursive_a(!read)] 1 (call core.getfield slot₁/#self# :recursive_a) 2 (call core.isdefined %₁ :contents) @@ -544,16 +554,6 @@ end 7 (call core.getfield %₁ :contents) 8 (call %₇) 9 (return %₈) -21 TestMod.#recursive_a##0 -22 slot₂/recursive_b -23 (new %₂₁ %₂₂) -24 slot₁/recursive_a -25 (call core.setfield! %₂₄ :contents %₂₃) -26 TestMod.#recursive_b##0 -27 slot₁/recursive_a -28 (new %₂₆ %₂₇) -29 slot₂/recursive_b -30 (call core.setfield! %₂₉ :contents %₂₈) 31 slot₂/recursive_b 32 (call core.isdefined %₃₁ :contents) 33 (gotoifnot %₃₂ label₃₅) @@ -572,20 +572,33 @@ let y = y_init end #--------------------- 1 TestMod.y_init -2 (call core.svec :#f_kw_closure#0) -3 (call core.svec true) -4 (call JuliaLowering.eval_closure_type TestMod :#f_kw_closure##0 %₂ %₃) -5 (call core.svec :y) -6 (call core.svec true) -7 (call JuliaLowering.eval_closure_type TestMod :##f_kw_closure#0##0 %₅ %₆) -8 TestMod.##f_kw_closure#0##0 -9 TestMod.X -10 TestMod.#f_kw_closure##0 -11 (call core.svec %₈ %₉ %₁₀) -12 (call core.svec) -13 SourceLocation::2:14 -14 (call core.svec %₁₁ %₁₂ %₁₃) -15 --- method core.nothing %₁₄ +2 (= slot₁/y (call core.Box)) +3 (= slot₂/#f_kw_closure#0 (call core.Box)) +4 slot₁/y +5 (call core.setfield! %₄ :contents %₁) +6 (call core.svec :#f_kw_closure#0) +7 (call core.svec true) +8 (call JuliaLowering.eval_closure_type TestMod :#f_kw_closure##0 %₆ %₇) +9 TestMod.#f_kw_closure##0 +10 slot₂/#f_kw_closure#0 +11 (new %₉ %₁₀) +12 (= slot₃/f_kw_closure %₁₁) +13 (call core.svec :y) +14 (call core.svec true) +15 (call JuliaLowering.eval_closure_type TestMod :##f_kw_closure#0##0 %₁₃ %₁₄) +16 TestMod.##f_kw_closure#0##0 +17 slot₁/y +18 (new %₁₆ %₁₇) +19 slot₂/#f_kw_closure#0 +20 (call core.setfield! %₁₉ :contents %₁₈) +21 TestMod.##f_kw_closure#0##0 +22 TestMod.X +23 TestMod.#f_kw_closure##0 +24 (call core.svec %₂₁ %₂₂ %₂₃) +25 (call core.svec) +26 SourceLocation::2:14 +27 (call core.svec %₂₄ %₂₅ %₂₆) +28 --- method core.nothing %₂₇ slots: [slot₁/#self#(!read) slot₂/x slot₃/#self#(!read) slot₄/y(!read)] 1 (meta :nkw 1) 2 TestMod.+ @@ -598,43 +611,13 @@ end 9 (call core.getfield %₃ :contents) 10 (call %₂ slot₂/x %₉) 11 (return %₁₀) -16 TestMod.#f_kw_closure##0 -17 (call core.svec %₁₆) -18 (call core.svec) -19 SourceLocation::2:14 -20 (call core.svec %₁₇ %₁₈ %₁₉) -21 --- method core.nothing %₂₀ - slots: [slot₁/#self# slot₂/#f_kw_closure#0(!read)] - 1 (call core.getfield slot₁/#self# :#f_kw_closure#0) - 2 (call core.isdefined %₁ :contents) - 3 (gotoifnot %₂ label₅) - 4 (goto label₇) - 5 (newvar slot₂/#f_kw_closure#0) - 6 slot₂/#f_kw_closure#0 - 7 (call core.getfield %₁ :contents) - 8 TestMod.x_default - 9 (call %₇ %₈ slot₁/#self#) - 10 (return %₉) -22 (= slot₁/y (call core.Box)) -23 (= slot₂/#f_kw_closure#0 (call core.Box)) -24 slot₁/y -25 (call core.setfield! %₂₄ :contents %₁) -26 TestMod.#f_kw_closure##0 -27 slot₂/#f_kw_closure#0 -28 (new %₂₆ %₂₇) -29 (= slot₃/f_kw_closure %₂₈) -30 TestMod.##f_kw_closure#0##0 -31 slot₁/y -32 (new %₃₀ %₃₁) -33 slot₂/#f_kw_closure#0 -34 (call core.setfield! %₃₃ :contents %₃₂) -35 (call core.typeof core.kwcall) -36 TestMod.#f_kw_closure##0 -37 (call core.svec %₃₅ core.NamedTuple %₃₆) -38 (call core.svec) -39 SourceLocation::2:14 -40 (call core.svec %₃₇ %₃₈ %₃₉) -41 --- code_info +29 (call core.typeof core.kwcall) +30 TestMod.#f_kw_closure##0 +31 (call core.svec %₂₉ core.NamedTuple %₃₀) +32 (call core.svec) +33 SourceLocation::2:14 +34 (call core.svec %₃₁ %₃₂ %₃₃) +35 --- code_info slots: [slot₁/#self#(!read) slot₂/kws slot₃/#self# slot₄/kwtmp slot₅/x(!read) slot₆/#f_kw_closure#0(!read)] 1 (newvar slot₅/x) 2 (call core.isdefined slot₂/kws :x) @@ -668,10 +651,40 @@ end 30 (call core.getfield %₂₄ :contents) 31 (call %₃₀ %₁₆ slot₃/#self#) 32 (return %₃₁) -42 slot₂/#f_kw_closure#0 -43 (call core.svec %₄₂) -44 (call JuliaLowering.replace_captured_locals! %₄₁ %₄₃) -45 --- method core.nothing %₄₀ %₄₄ +36 slot₂/#f_kw_closure#0 +37 (call core.svec %₃₆) +38 (call JuliaLowering.replace_captured_locals! %₃₅ %₃₇) +39 --- method core.nothing %₃₄ %₃₈ +40 TestMod.#f_kw_closure##0 +41 (call core.svec %₄₀) +42 (call core.svec) +43 SourceLocation::2:14 +44 (call core.svec %₄₁ %₄₂ %₄₃) +45 --- method core.nothing %₄₄ + slots: [slot₁/#self# slot₂/#f_kw_closure#0(!read)] + 1 (call core.getfield slot₁/#self# :#f_kw_closure#0) + 2 (call core.isdefined %₁ :contents) + 3 (gotoifnot %₂ label₅) + 4 (goto label₇) + 5 (newvar slot₂/#f_kw_closure#0) + 6 slot₂/#f_kw_closure#0 + 7 (call core.getfield %₁ :contents) + 8 TestMod.x_default + 9 (call %₇ %₈ slot₁/#self#) + 10 (return %₉) 46 slot₃/f_kw_closure 47 (return %₄₆) +######################################## +# Error: Closure outside any top level context +# (Should only happen in a user-visible way when lowering code emitted +# from a `@generated` function code generator.) +@ast_ [K"lambda"(is_toplevel_thunk=false) + [K"block"] + [K"block"] + [K"->" [K"tuple"] [K"block"]] +] +#--------------------- +LoweringError: +#= line 1 =# - Top level code was found outside any top level context. `@generated` functions may not contain closures, including `do` syntax and generators/comprehension + diff --git a/JuliaLowering/test/scopes_ir.jl b/JuliaLowering/test/scopes_ir.jl index a39211e2f8a67..c0013a3963775 100644 --- a/JuliaLowering/test/scopes_ir.jl +++ b/JuliaLowering/test/scopes_ir.jl @@ -74,17 +74,17 @@ end 2 (call core.svec) 3 (call JuliaLowering.eval_closure_type TestMod :#f##0 %₁ %₂) 4 TestMod.#f##0 -5 (call core.svec %₄) -6 (call core.svec) -7 SourceLocation::1:5 -8 (call core.svec %₅ %₆ %₇) -9 --- method core.nothing %₈ +5 (new %₄) +6 (= slot₁/f %₅) +7 TestMod.#f##0 +8 (call core.svec %₇) +9 (call core.svec) +10 SourceLocation::1:5 +11 (call core.svec %₈ %₉ %₁₀) +12 --- method core.nothing %₁₁ slots: [slot₁/#self#(!read)] 1 TestMod.body 2 (return %₁) -10 TestMod.#f##0 -11 (new %₁₀) -12 (= slot₁/f %₁₁) 13 (return core.nothing) ######################################## diff --git a/JuliaLowering/test/typedefs_ir.jl b/JuliaLowering/test/typedefs_ir.jl index bd134e6292112..17190b259b10b 100644 --- a/JuliaLowering/test/typedefs_ir.jl +++ b/JuliaLowering/test/typedefs_ir.jl @@ -825,47 +825,47 @@ struct X X(a,b,c) = new(a) end #--------------------- -1 (call core.svec) -2 (call core.svec) -3 (call JuliaLowering.eval_closure_type TestMod :#f##0 %₁ %₂) -4 TestMod.#f##0 -5 (call core.svec %₄) +1 (newvar slot₂/f) +2 (global TestMod.X) +3 (const TestMod.X) +4 (call core.svec) +5 (call core.svec :x) 6 (call core.svec) -7 SourceLocation::3:5 -8 (call core.svec %₅ %₆ %₇) -9 --- method core.nothing %₈ +7 (call core._structtype TestMod :X %₄ %₅ %₆ false 1) +8 (= slot₁/X %₇) +9 (call core._setsuper! %₇ core.Any) +10 (isdefined TestMod.X) +11 (gotoifnot %₁₀ label₂₁) +12 TestMod.X +13 (call core._equiv_typedef %₁₂ %₇) +14 (gotoifnot %₁₃ label₁₈) +15 TestMod.X +16 (= slot₁/X %₁₅) +17 (goto label₂₀) +18 slot₁/X +19 (= TestMod.X %₁₈) +20 (goto label₂₃) +21 slot₁/X +22 (= TestMod.X %₂₁) +23 slot₁/X +24 (call core.svec core.Any) +25 (call core._typebody! %₂₃ %₂₄) +26 (call core.svec) +27 (call core.svec) +28 (call JuliaLowering.eval_closure_type TestMod :#f##0 %₂₆ %₂₇) +29 TestMod.#f##0 +30 (new %₂₉) +31 (= slot₂/f %₃₀) +32 TestMod.#f##0 +33 (call core.svec %₃₂) +34 (call core.svec) +35 SourceLocation::3:5 +36 (call core.svec %₃₃ %₃₄ %₃₅) +37 --- method core.nothing %₃₆ slots: [slot₁/#self#(!read)] 1 TestMod.X 2 (new %₁ 1) 3 (return %₂) -10 (newvar slot₂/f) -11 (global TestMod.X) -12 (const TestMod.X) -13 (call core.svec) -14 (call core.svec :x) -15 (call core.svec) -16 (call core._structtype TestMod :X %₁₃ %₁₄ %₁₅ false 1) -17 (= slot₁/X %₁₆) -18 (call core._setsuper! %₁₆ core.Any) -19 (isdefined TestMod.X) -20 (gotoifnot %₁₉ label₃₀) -21 TestMod.X -22 (call core._equiv_typedef %₂₁ %₁₆) -23 (gotoifnot %₂₂ label₂₇) -24 TestMod.X -25 (= slot₁/X %₂₄) -26 (goto label₂₉) -27 slot₁/X -28 (= TestMod.X %₂₇) -29 (goto label₃₂) -30 slot₁/X -31 (= TestMod.X %₃₀) -32 slot₁/X -33 (call core.svec core.Any) -34 (call core._typebody! %₃₂ %₃₃) -35 TestMod.#f##0 -36 (new %₃₅) -37 (= slot₂/f %₃₆) 38 TestMod.X 39 (call core.apply_type core.Type %₃₈) 40 (call core.svec %₃₉ core.Any) @@ -924,96 +924,96 @@ struct X{S,T} f() = new{A,B}(1) end #--------------------- -1 (call core.svec) -2 (call core.svec) -3 (call JuliaLowering.eval_closure_type TestMod :#f##1 %₁ %₂) -4 TestMod.#f##1 -5 (call core.svec %₄) -6 (call core.svec) -7 SourceLocation::5:5 -8 (call core.svec %₅ %₆ %₇) -9 --- method core.nothing %₈ - slots: [slot₁/#self#(!read)] - 1 TestMod.X - 2 TestMod.A - 3 TestMod.B - 4 (call core.apply_type %₁ %₂ %₃) - 5 (new %₄ 1) - 6 (return %₅) -10 (newvar slot₅/f) -11 (global TestMod.X) -12 (const TestMod.X) -13 (= slot₂/S (call core.TypeVar :S)) -14 (= slot₃/T (call core.TypeVar :T)) -15 slot₂/S -16 slot₃/T -17 (call core.svec %₁₅ %₁₆) -18 (call core.svec :x) -19 (call core.svec) -20 (call core._structtype TestMod :X %₁₇ %₁₈ %₁₉ false 1) -21 (= slot₄/X %₂₀) -22 (call core._setsuper! %₂₀ core.Any) -23 (isdefined TestMod.X) -24 (gotoifnot %₂₃ label₄₄) -25 TestMod.X -26 (call core._equiv_typedef %₂₅ %₂₀) -27 (gotoifnot %₂₆ label₄₁) -28 TestMod.X -29 (= slot₄/X %₂₈) -30 TestMod.X -31 (call top.getproperty %₃₀ :body) -32 (call top.getproperty %₃₁ :body) -33 (call top.getproperty %₃₂ :parameters) -34 (call top.indexed_iterate %₃₃ 1) -35 (= slot₂/S (call core.getfield %₃₄ 1)) -36 (= slot₁/iterstate (call core.getfield %₃₄ 2)) -37 slot₁/iterstate -38 (call top.indexed_iterate %₃₃ 2 %₃₇) -39 (= slot₃/T (call core.getfield %₃₈ 1)) -40 (goto label₄₃) -41 slot₄/X -42 (= TestMod.X %₄₁) -43 (goto label₄₆) -44 slot₄/X -45 (= TestMod.X %₄₄) -46 slot₄/X -47 (call core.svec core.Any) -48 (call core._typebody! %₄₆ %₄₇) -49 TestMod.X -50 TestMod.A -51 TestMod.B -52 (call core.apply_type %₄₉ %₅₀ %₅₁) -53 (call core.apply_type core.Type %₅₂) -54 (call core.svec %₅₃) -55 (call core.svec) -56 SourceLocation::3:5 -57 (call core.svec %₅₄ %₅₅ %₅₆) -58 --- method core.nothing %₅₇ +1 (newvar slot₅/f) +2 (global TestMod.X) +3 (const TestMod.X) +4 (= slot₂/S (call core.TypeVar :S)) +5 (= slot₃/T (call core.TypeVar :T)) +6 slot₂/S +7 slot₃/T +8 (call core.svec %₆ %₇) +9 (call core.svec :x) +10 (call core.svec) +11 (call core._structtype TestMod :X %₈ %₉ %₁₀ false 1) +12 (= slot₄/X %₁₁) +13 (call core._setsuper! %₁₁ core.Any) +14 (isdefined TestMod.X) +15 (gotoifnot %₁₄ label₃₅) +16 TestMod.X +17 (call core._equiv_typedef %₁₆ %₁₁) +18 (gotoifnot %₁₇ label₃₂) +19 TestMod.X +20 (= slot₄/X %₁₉) +21 TestMod.X +22 (call top.getproperty %₂₁ :body) +23 (call top.getproperty %₂₂ :body) +24 (call top.getproperty %₂₃ :parameters) +25 (call top.indexed_iterate %₂₄ 1) +26 (= slot₂/S (call core.getfield %₂₅ 1)) +27 (= slot₁/iterstate (call core.getfield %₂₅ 2)) +28 slot₁/iterstate +29 (call top.indexed_iterate %₂₄ 2 %₂₈) +30 (= slot₃/T (call core.getfield %₂₉ 1)) +31 (goto label₃₄) +32 slot₄/X +33 (= TestMod.X %₃₂) +34 (goto label₃₇) +35 slot₄/X +36 (= TestMod.X %₃₅) +37 slot₄/X +38 (call core.svec core.Any) +39 (call core._typebody! %₃₇ %₃₈) +40 TestMod.X +41 TestMod.A +42 TestMod.B +43 (call core.apply_type %₄₀ %₄₁ %₄₂) +44 (call core.apply_type core.Type %₄₃) +45 (call core.svec %₄₄) +46 (call core.svec) +47 SourceLocation::3:5 +48 (call core.svec %₄₅ %₄₆ %₄₇) +49 --- method core.nothing %₄₈ slots: [slot₁/#ctor-self#] 1 slot₁/#ctor-self# 2 (new %₁ 1) 3 (return %₂) -59 (= slot₆/U (call core.TypeVar :U)) -60 (= slot₇/V (call core.TypeVar :V)) -61 TestMod.X -62 slot₆/U -63 slot₇/V -64 (call core.apply_type %₆₁ %₆₂ %₆₃) -65 (call core.apply_type core.Type %₆₄) -66 (call core.svec %₆₅) -67 slot₆/U -68 slot₇/V -69 (call core.svec %₆₇ %₆₈) -70 SourceLocation::4:5 -71 (call core.svec %₆₆ %₆₉ %₇₀) -72 --- method core.nothing %₇₁ +50 (= slot₆/U (call core.TypeVar :U)) +51 (= slot₇/V (call core.TypeVar :V)) +52 TestMod.X +53 slot₆/U +54 slot₇/V +55 (call core.apply_type %₅₂ %₅₃ %₅₄) +56 (call core.apply_type core.Type %₅₅) +57 (call core.svec %₅₆) +58 slot₆/U +59 slot₇/V +60 (call core.svec %₅₈ %₅₉) +61 SourceLocation::4:5 +62 (call core.svec %₅₇ %₆₀ %₆₁) +63 --- method core.nothing %₆₂ slots: [slot₁/#ctor-self#] 1 slot₁/#ctor-self# 2 (new %₁ 1) 3 (return %₂) -73 TestMod.#f##1 -74 (new %₇₃) -75 (= slot₅/f %₇₄) +64 (call core.svec) +65 (call core.svec) +66 (call JuliaLowering.eval_closure_type TestMod :#f##1 %₆₄ %₆₅) +67 TestMod.#f##1 +68 (new %₆₇) +69 (= slot₅/f %₆₈) +70 TestMod.#f##1 +71 (call core.svec %₇₀) +72 (call core.svec) +73 SourceLocation::5:5 +74 (call core.svec %₇₁ %₇₂ %₇₃) +75 --- method core.nothing %₇₄ + slots: [slot₁/#self#(!read)] + 1 TestMod.X + 2 TestMod.A + 3 TestMod.B + 4 (call core.apply_type %₁ %₂ %₃) + 5 (new %₄ 1) + 6 (return %₅) 76 (return core.nothing) ######################################## diff --git a/JuliaLowering/test/utils.jl b/JuliaLowering/test/utils.jl index d49229b26c399..597de3bdad00f 100644 --- a/JuliaLowering/test/utils.jl +++ b/JuliaLowering/test/utils.jl @@ -25,7 +25,7 @@ function _ast_test_graph() ensure_attributes!(graph, kind=Kind, syntax_flags=UInt16, source=Union{SourceRef,NodeId,Tuple,LineNumberNode}, - var_id=Int, value=Any, name_val=String) + var_id=Int, value=Any, name_val=String, is_toplevel_thunk=Bool) end function _source_node(graph, src) From d6b0c6ee01320ed2c0517bacd57596d94419cd85 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Fri, 7 Feb 2025 15:39:08 +1000 Subject: [PATCH 0992/1109] Ensure outer closures capture variables nested inside inner closures --- JuliaLowering/src/closure_conversion.jl | 7 ++- JuliaLowering/src/scope_analysis.jl | 14 ++++++ JuliaLowering/test/closures.jl | 16 ++++++ JuliaLowering/test/closures_ir.jl | 67 ++++++++++++++++++++----- JuliaLowering/test/generators.jl | 18 +++++++ JuliaLowering/test/generators_ir.jl | 15 ------ 6 files changed, 108 insertions(+), 29 deletions(-) diff --git a/JuliaLowering/src/closure_conversion.jl b/JuliaLowering/src/closure_conversion.jl index 481698e169c92..d93174581f2c7 100644 --- a/JuliaLowering/src/closure_conversion.jl +++ b/JuliaLowering/src/closure_conversion.jl @@ -391,8 +391,13 @@ function _convert_closures(ctx::ClosureConversionCtx, ex) ctx.closure_infos[func_name_id] = closure_info type_params = SyntaxList(ctx) init_closure_args = SyntaxList(ctx) - for (id,boxed) in zip(field_orig_bindings, field_is_box) + for (id, boxed) in zip(field_orig_bindings, field_is_box) field_val = binding_ex(ctx, id) + if is_self_captured(ctx, field_val) + # Access from outer closure if necessary but do not + # unbox to feed into the inner nested closure. + field_val = captured_var_access(ctx, field_val) + end push!(init_closure_args, field_val) if !boxed push!(type_params, @ast ctx ex [K"call" diff --git a/JuliaLowering/src/scope_analysis.jl b/JuliaLowering/src/scope_analysis.jl index 4f7203e9a88ad..6899f7c520c1a 100644 --- a/JuliaLowering/src/scope_analysis.jl +++ b/JuliaLowering/src/scope_analysis.jl @@ -710,6 +710,20 @@ function analyze_variables!(ctx, ex) end ctx2 = VariableAnalysisContext(ctx.graph, ctx.bindings, ctx.mod, lambda_bindings, ctx.method_def_stack, ctx.closure_bindings) + # Add any captured bindings to the enclosing lambda, if necessary. + for (id,lbinfo) in pairs(lambda_bindings.bindings) + if lbinfo.is_captured + outer_lbinfo = lookup_lambda_binding(ctx.lambda_bindings, id) + if isnothing(outer_lbinfo) + # Inner lambda captures a variable. If it's not yet present + # in the outer lambda, the outer lambda must capture it as + # well so that the closure associated to the inner lambda + # can be initialized when `function_decl` is hit. + init_lambda_binding(ctx.lambda_bindings, id, is_captured=true, is_read=true) + end + end + end + # TODO: Types of any assigned captured vars will also be used and might be captured. foreach(e->analyze_variables!(ctx2, e), ex[3:end]) else diff --git a/JuliaLowering/test/closures.jl b/JuliaLowering/test/closures.jl index b7c62cc990067..031c20a2cd49b 100644 --- a/JuliaLowering/test/closures.jl +++ b/JuliaLowering/test/closures.jl @@ -110,6 +110,22 @@ end @test f_closure_local_var_types(2.0) == 1.0 @test_throws MethodError f_closure_local_var_types("hi") +# Multiply nested closures. In this case g_nest needs to capture `x` in order +# to construct an instance of `h_nest()` inside it. +@test JuliaLowering.include_string(test_mod, """ +begin + function f_nest(x) + function g_nest(y) + function h_nest(z) + (x,y,z) + end + end + end + + f_nest(1)(2)(3) +end +""") === (1,2,3) + # Anon function syntax @test JuliaLowering.include_string(test_mod, """ begin diff --git a/JuliaLowering/test/closures_ir.jl b/JuliaLowering/test/closures_ir.jl index e7ae55ab5f6a9..62e4aa9105289 100644 --- a/JuliaLowering/test/closures_ir.jl +++ b/JuliaLowering/test/closures_ir.jl @@ -285,7 +285,8 @@ end 19 (return %₁₈) ######################################## -# FIXME: Nested captures of arguments +# Nested captures - here `g` captures `x` because it is needed to initialize +# the closure `h` which captures both `x` and `y`. function f(x) function g(y) function h(z) @@ -294,18 +295,58 @@ function f(x) end end #--------------------- -LoweringError: -function f(x) -# ╙ ── Found unexpected binding of kind argument - function g(y) - function h(z) - -Detailed provenance: -#₈/x -└─ x - └─ x - └─ @ :1 - +1 (method TestMod.f) +2 (call core.svec :x) +3 (call core.svec false) +4 (call JuliaLowering.eval_closure_type TestMod :#f#g##4 %₂ %₃) +5 (call core.svec :x :y) +6 (call core.svec false false) +7 (call JuliaLowering.eval_closure_type TestMod :#f#g#h##0 %₅ %₆) +8 TestMod.#f#g#h##0 +9 (call core.svec %₈ core.Any) +10 (call core.svec) +11 SourceLocation::3:18 +12 (call core.svec %₉ %₁₀ %₁₁) +13 --- method core.nothing %₁₂ + slots: [slot₁/#self#(!read) slot₂/z] + 1 (call core.getfield slot₁/#self# :x) + 2 (call core.getfield slot₁/#self# :y) + 3 (call core.tuple %₁ %₂ slot₂/z) + 4 (return %₃) +14 TestMod.#f#g##4 +15 (call core.svec %₁₄ core.Any) +16 (call core.svec) +17 SourceLocation::2:14 +18 (call core.svec %₁₅ %₁₆ %₁₇) +19 --- method core.nothing %₁₈ + slots: [slot₁/#self#(!read) slot₂/y(!read) slot₃/h] + 1 TestMod.#f#g#h##0 + 2 (call core.getfield slot₁/#self# :x) + 3 (call core.typeof %₂) + 4 (call core.typeof slot₂/y) + 5 (call core.apply_type %₁ %₃ %₄) + 6 (call core.getfield slot₁/#self# :x) + 7 (new %₅ %₆ slot₂/y) + 8 (= slot₃/h %₇) + 9 slot₃/h + 10 (return %₉) +20 TestMod.f +21 (call core.Typeof %₂₀) +22 (call core.svec %₂₁ core.Any) +23 (call core.svec) +24 SourceLocation::1:10 +25 (call core.svec %₂₂ %₂₃ %₂₄) +26 --- method core.nothing %₂₅ + slots: [slot₁/#self#(!read) slot₂/x(!read) slot₃/g] + 1 TestMod.#f#g##4 + 2 (call core.typeof slot₂/x) + 3 (call core.apply_type %₁ %₂) + 4 (new %₃ slot₂/x) + 5 (= slot₃/g %₄) + 6 slot₃/g + 7 (return %₆) +27 TestMod.f +28 (return %₂₇) ######################################## # Global method capturing local variables diff --git a/JuliaLowering/test/generators.jl b/JuliaLowering/test/generators.jl index 9688066fc1cf6..7dce6236afe20 100644 --- a/JuliaLowering/test/generators.jl +++ b/JuliaLowering/test/generators.jl @@ -55,4 +55,22 @@ Tuple{Int,Int}[(x,y) for x in 1:2, y in 1:3] """) == [(1,1) (1,2) (1,3) (2,1) (2,2) (2,3)] +# Triply nested comprehension +@test JuliaLowering.include_string(test_mod, """ +[(x,y,z) for x in 1:3 for y in 4:5 for z in 6:7] +""") == [ + (1, 4, 6) + (1, 4, 7) + (1, 5, 6) + (1, 5, 7) + (2, 4, 6) + (2, 4, 7) + (2, 5, 6) + (2, 5, 7) + (3, 4, 6) + (3, 4, 7) + (3, 5, 6) + (3, 5, 7) +] + end diff --git a/JuliaLowering/test/generators_ir.jl b/JuliaLowering/test/generators_ir.jl index 91265248595ea..22b2a3ab01f85 100644 --- a/JuliaLowering/test/generators_ir.jl +++ b/JuliaLowering/test/generators_ir.jl @@ -282,18 +282,3 @@ T[(x,y) for x in xs, y in ys] 49 (goto label₁₅) 50 (return %₇) -######################################## -# FIXME - error in nested closure conversion: Triply nested generator -((x,y,z) for x in 1:3 for y in 4:5 for z in 6:7) -#--------------------- -LoweringError: -((x,y,z) for x in 1:3 for y in 4:5 for z in 6:7) -# ╙ ── Found unexpected binding of kind argument - -Detailed provenance: -#₁₃/x -└─ x - └─ x - └─ @ :1 - - From 399269ed5cc69f3325ab50e957d616b6be39b57e Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Fri, 7 Feb 2025 16:04:48 +1000 Subject: [PATCH 0993/1109] Test infrastructure for limiting output to one method When testing the IR generated by closures or complex function desugaring there can be a lot of methods generated and it can be hard to review changes. Add a `method_filter` to test cases to limit the output to a method of interest. --- JuliaLowering/src/utils.jl | 47 +++++++++++++++++++-- JuliaLowering/test/closures_ir.jl | 70 +++++++------------------------ JuliaLowering/test/utils.jl | 53 ++++++++++++----------- 3 files changed, 87 insertions(+), 83 deletions(-) diff --git a/JuliaLowering/src/utils.jl b/JuliaLowering/src/utils.jl index d26c6d41f4a59..ced8827ef044e 100644 --- a/JuliaLowering/src/utils.jl +++ b/JuliaLowering/src/utils.jl @@ -75,7 +75,46 @@ function subscript_str(i) "5"=>"₅", "6"=>"₆", "7"=>"₇", "8"=>"₈", "9"=>"₉") end -function print_ir(io::IO, ex, indent="") +function _deref_ssa(stmts, ex) + while kind(ex) == K"SSAValue" + ex = stmts[ex.var_id] + end + ex +end + +function _find_method_lambda(ex, name) + @assert kind(ex) == K"code_info" + # Heuristic search through outer thunk for the method in question. + method_found = false + stmts = children(ex[1]) + for e in stmts + if kind(e) == K"method" && numchildren(e) >= 2 + sig = _deref_ssa(stmts, e[2]) + @assert kind(sig) == K"call" + arg_types = _deref_ssa(stmts, sig[2]) + @assert kind(arg_types) == K"call" + self_type = _deref_ssa(stmts, arg_types[2]) + if kind(self_type) == K"globalref" && occursin(name, self_type.name_val) + return e[3] + end + end + end +end + +function print_ir(io::IO, ex, method_filter=nothing) + @assert kind(ex) == K"code_info" + if !isnothing(method_filter) + filtered = _find_method_lambda(ex, method_filter) + if isnothing(filtered) + @warn "Method not found with method filter $method_filter" + else + ex = filtered + end + end + _print_ir(io, ex, "") +end + +function _print_ir(io::IO, ex, indent) added_indent = " " @assert (kind(ex) == K"lambda" || kind(ex) == K"code_info") && kind(ex[1]) == K"block" if !ex.is_toplevel_thunk && kind(ex) == K"code_info" @@ -105,7 +144,7 @@ function print_ir(io::IO, ex, indent="") print(io, indent, lno, " --- method ", string(e[1]), " ", string(e[2])) if kind(e[3]) == K"lambda" || kind(e[3]) == K"code_info" println(io) - print_ir(io, e[3], indent*added_indent) + _print_ir(io, e[3], indent*added_indent) else println(io, " ", string(e[3])) end @@ -116,10 +155,10 @@ function print_ir(io::IO, ex, indent="") print(io, " ", e[i]) end println(io) - print_ir(io, e[5], indent*added_indent) + _print_ir(io, e[5], indent*added_indent) elseif kind(e) == K"code_info" println(io, indent, lno, " --- ", e.is_toplevel_thunk ? "thunk" : "code_info") - print_ir(io, e, indent*added_indent) + _print_ir(io, e, indent*added_indent) else code = string(e) println(io, indent, lno, " ", code) diff --git a/JuliaLowering/test/closures_ir.jl b/JuliaLowering/test/closures_ir.jl index 62e4aa9105289..953d9bf85f80b 100644 --- a/JuliaLowering/test/closures_ir.jl +++ b/JuliaLowering/test/closures_ir.jl @@ -287,66 +287,26 @@ end ######################################## # Nested captures - here `g` captures `x` because it is needed to initialize # the closure `h` which captures both `x` and `y`. -function f(x) - function g(y) - function h(z) +# [method_filter: #f_nest#g_nest##0] +function f_nest(x) + function g_nest(y) + function h_nest(z) (x,y,z) end end end #--------------------- -1 (method TestMod.f) -2 (call core.svec :x) -3 (call core.svec false) -4 (call JuliaLowering.eval_closure_type TestMod :#f#g##4 %₂ %₃) -5 (call core.svec :x :y) -6 (call core.svec false false) -7 (call JuliaLowering.eval_closure_type TestMod :#f#g#h##0 %₅ %₆) -8 TestMod.#f#g#h##0 -9 (call core.svec %₈ core.Any) -10 (call core.svec) -11 SourceLocation::3:18 -12 (call core.svec %₉ %₁₀ %₁₁) -13 --- method core.nothing %₁₂ - slots: [slot₁/#self#(!read) slot₂/z] - 1 (call core.getfield slot₁/#self# :x) - 2 (call core.getfield slot₁/#self# :y) - 3 (call core.tuple %₁ %₂ slot₂/z) - 4 (return %₃) -14 TestMod.#f#g##4 -15 (call core.svec %₁₄ core.Any) -16 (call core.svec) -17 SourceLocation::2:14 -18 (call core.svec %₁₅ %₁₆ %₁₇) -19 --- method core.nothing %₁₈ - slots: [slot₁/#self#(!read) slot₂/y(!read) slot₃/h] - 1 TestMod.#f#g#h##0 - 2 (call core.getfield slot₁/#self# :x) - 3 (call core.typeof %₂) - 4 (call core.typeof slot₂/y) - 5 (call core.apply_type %₁ %₃ %₄) - 6 (call core.getfield slot₁/#self# :x) - 7 (new %₅ %₆ slot₂/y) - 8 (= slot₃/h %₇) - 9 slot₃/h - 10 (return %₉) -20 TestMod.f -21 (call core.Typeof %₂₀) -22 (call core.svec %₂₁ core.Any) -23 (call core.svec) -24 SourceLocation::1:10 -25 (call core.svec %₂₂ %₂₃ %₂₄) -26 --- method core.nothing %₂₅ - slots: [slot₁/#self#(!read) slot₂/x(!read) slot₃/g] - 1 TestMod.#f#g##4 - 2 (call core.typeof slot₂/x) - 3 (call core.apply_type %₁ %₂) - 4 (new %₃ slot₂/x) - 5 (= slot₃/g %₄) - 6 slot₃/g - 7 (return %₆) -27 TestMod.f -28 (return %₂₇) +slots: [slot₁/#self#(!read) slot₂/y(!read) slot₃/h_nest] +1 TestMod.#f_nest#g_nest#h_nest##0 +2 (call core.getfield slot₁/#self# :x) +3 (call core.typeof %₂) +4 (call core.typeof slot₂/y) +5 (call core.apply_type %₁ %₃ %₄) +6 (call core.getfield slot₁/#self# :x) +7 (new %₅ %₆ slot₂/y) +8 (= slot₃/h_nest %₇) +9 slot₃/h_nest +10 (return %₉) ######################################## # Global method capturing local variables diff --git a/JuliaLowering/test/utils.jl b/JuliaLowering/test/utils.jl index 597de3bdad00f..817a4de660b65 100644 --- a/JuliaLowering/test/utils.jl +++ b/JuliaLowering/test/utils.jl @@ -126,8 +126,13 @@ function match_ir_test_case(case_str) error("Too many sections in IR test case") expect_error = startswith(description, "Error") is_broken = startswith(description, "FIXME") + method_filter = begin + mf = match(r"\[method_filter: *(.*)\]", description) + isnothing(mf) ? nothing : strip(mf[1]) + end (; expect_error=expect_error, is_broken=is_broken, description=strip(description), + method_filter=method_filter, input=strip(input), output=strip(output)) end @@ -152,8 +157,8 @@ function setup_ir_test_module(preamble) test_mod end -function format_ir_for_test(mod, description, input, expect_error=false, is_broken=false) - ex = parsestmt(SyntaxTree, input) +function format_ir_for_test(mod, case) + ex = parsestmt(SyntaxTree, case.input) try if kind(ex) == K"macrocall" && kind(ex[1]) == K"MacroName" && ex[1].name_val == "@ast_" # Total hack, until @ast_ can be implemented in terms of new-style @@ -161,22 +166,22 @@ function format_ir_for_test(mod, description, input, expect_error=false, is_brok ex = JuliaLowering.eval(mod, Expr(ex)) end x = JuliaLowering.lower(mod, ex) - if expect_error - error("Expected a lowering error in test case \"$description\"") + if case.expect_error + error("Expected a lowering error in test case \"$(case.description)\"") end - ir = strip(sprint(JuliaLowering.print_ir, x)) + ir = strip(sprint(JuliaLowering.print_ir, x, case.method_filter)) return replace(ir, string(mod)=>"TestMod") catch exc if exc isa InterruptException rethrow() - elseif expect_error && (exc isa LoweringError) + elseif case.expect_error && (exc isa LoweringError) return sprint(io->Base.showerror(io, exc, show_detail=false)) - elseif expect_error && (exc isa MacroExpansionError) + elseif case.expect_error && (exc isa MacroExpansionError) return sprint(io->Base.showerror(io, exc)) - elseif is_broken + elseif case.is_broken return sprint(io->Base.showerror(io, exc)) else - throw("Error in test case \"$description\"") + throw("Error in test case \"$(case.description)\"") end end end @@ -184,17 +189,17 @@ end function test_ir_cases(filename::AbstractString) preamble, cases = read_ir_test_cases(filename) test_mod = setup_ir_test_module(preamble) - for (expect_error, is_broken, description, input, ref) in cases - if is_broken + for case in cases + if case.is_broken continue end - output = format_ir_for_test(test_mod, description, input, expect_error) - @testset "$description" begin - if output != ref + output = format_ir_for_test(test_mod, case) + @testset "$(case.description)" begin + if output != case.output # Do additional error dumping, as @test will not format errors in a nice way - @error "Test \"$description\" failed" output=Text(output) ref=Text(ref) + @error "Test \"$(case.description)\" failed" output=Text(output) ref=Text(case.output) end - @test output == ref + @test output == case.output end end end @@ -213,20 +218,20 @@ function refresh_ir_test_cases(filename, pattern=nothing) println(io, preamble, "\n") println(io, "#*******************************************************************************") end - for (expect_error, is_broken, description, input, ref) in cases - if isnothing(pattern) || occursin(pattern, description) - ir = format_ir_for_test(test_mod, description, input, expect_error, is_broken) - if rstrip(ir) != ref - @info "Refreshing test case $(repr(description)) in $filename" + for case in cases + if isnothing(pattern) || occursin(pattern, case.description) + ir = format_ir_for_test(test_mod, case) + if rstrip(ir) != case.output + @info "Refreshing test case $(repr(case.description)) in $filename" end else - ir = ref + ir = case.output end println(io, """ ######################################## - $(comment_description(description)) - $(strip(input)) + $(comment_description(case.description)) + $(strip(case.input)) #--------------------- $ir """ From ede0365a50d51592300213c016c6d5143bfe4644 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Fri, 7 Feb 2025 19:12:48 +1000 Subject: [PATCH 0994/1109] Ensure closures capture the types of typed variables --- JuliaLowering/src/closure_conversion.jl | 13 ++--- JuliaLowering/src/scope_analysis.jl | 45 +++++++++------ JuliaLowering/test/assignments_ir.jl | 26 ++++----- JuliaLowering/test/closures.jl | 56 +++++++++++++++++++ JuliaLowering/test/closures_ir.jl | 34 ++++++++++++ JuliaLowering/test/decls_ir.jl | 74 ++++++++++++------------- JuliaLowering/test/destructuring_ir.jl | 24 ++++---- 7 files changed, 179 insertions(+), 93 deletions(-) diff --git a/JuliaLowering/src/closure_conversion.jl b/JuliaLowering/src/closure_conversion.jl index d93174581f2c7..211d6922e765c 100644 --- a/JuliaLowering/src/closure_conversion.jl +++ b/JuliaLowering/src/closure_conversion.jl @@ -90,29 +90,26 @@ end # global and for converting the return value of a function call to the declared # return type. function convert_for_type_decl(ctx, srcref, ex, type, do_typeassert) - # Require that the caller make `type` "simple", for now (can generalize - # later if necessary) - kt = kind(type) - @assert (kt == K"Identifier" || kt == K"BindingId" || is_literal(kt)) # Use a slot to permit union-splitting this in inference tmp = new_local_binding(ctx, srcref, "tmp", is_always_defined=true) @ast ctx srcref [K"block" + type_tmp := type # [K"=" type_ssa renumber_assigned_ssavalues(type)] [K"=" tmp ex] [K"if" - [K"call" "isa"::K"core" tmp type] + [K"call" "isa"::K"core" tmp type_tmp] "nothing"::K"core" [K"=" tmp if do_typeassert [K"call" "typeassert"::K"core" - [K"call" "convert"::K"top" type tmp] - type + [K"call" "convert"::K"top" type_tmp tmp] + type_tmp ] else - [K"call" "convert"::K"top" type tmp] + [K"call" "convert"::K"top" type_tmp tmp] end ] ] diff --git a/JuliaLowering/src/scope_analysis.jl b/JuliaLowering/src/scope_analysis.jl index 6899f7c520c1a..343b5c6b246fc 100644 --- a/JuliaLowering/src/scope_analysis.jl +++ b/JuliaLowering/src/scope_analysis.jl @@ -433,6 +433,14 @@ function _resolve_scopes(ctx, ex::SyntaxTree) throw(LoweringError(ex, "type declarations for global variables must be at top level, not inside a function")) end end + id = ex_out[1] + if kind(id) != K"Placeholder" + binfo = lookup_binding(ctx, id) + if !isnothing(binfo.type) + throw(LoweringError(ex, "multiple type declarations found for `$(binfo.name)`")) + end + update_binding!(ctx, id; type=ex_out[2]) + end ex_out elseif k == K"always_defined" id = lookup_var(ctx, NameKey(ex[1])) @@ -624,14 +632,22 @@ function analyze_variables!(ctx, ex) k = kind(ex) if k == K"BindingId" if has_lambda_binding(ctx, ex) - # FIXME: Move this after closure conversion so that we don't need + # TODO: Move this after closure conversion so that we don't need # to model the closure conversion transformations here. update_lambda_binding!(ctx, ex, is_read=true) + else + binfo = lookup_binding(ctx, ex.var_id) + if !binfo.is_ssa && binfo.kind != :global + # The type of typed locals is invisible in the previous pass, + # but is filled in here. + init_lambda_binding(ctx.lambda_bindings, ex.var_id, is_captured=true, is_read=true) + update_binding!(ctx, ex, is_captured=true) + end end elseif is_leaf(ex) || is_quoted(ex) return elseif k == K"local" || k == K"global" - # Uses of bindings which don't count as uses. + # Presence of BindingId within local/global is ignored. return elseif k == K"=" lhs = ex[1] @@ -640,6 +656,12 @@ function analyze_variables!(ctx, ex) if has_lambda_binding(ctx, lhs) update_lambda_binding!(ctx, lhs, is_assigned=true) end + lhs_binfo = lookup_binding(ctx, lhs) + if !isnothing(lhs_binfo.type) + # Assignments introduce a variable's type later during closure + # conversion, but we must model that explicitly here. + analyze_variables!(ctx, lhs_binfo.type) + end end analyze_variables!(ctx, ex[2]) elseif k == K"function_decl" @@ -655,17 +677,6 @@ function analyze_variables!(ctx, ex) if kind(ex[1]) != K"BindingId" || lookup_binding(ctx, ex[1]).kind !== :local analyze_variables!(ctx, ex[1]) end - elseif k == K"decl" - @chk numchildren(ex) == 2 - id = ex[1] - if kind(id) != K"Placeholder" - binfo = lookup_binding(ctx, id) - if !isnothing(binfo.type) - throw(LoweringError(ex, "multiple type declarations found for `$(binfo.name)`")) - end - update_binding!(ctx, id; type=ex[2]) - end - analyze_variables!(ctx, ex[2]) elseif k == K"const" id = ex[1] if lookup_binding(ctx, id).kind == :local @@ -677,7 +688,7 @@ function analyze_variables!(ctx, ex) if kind(name) == K"BindingId" id = name.var_id if has_lambda_binding(ctx, id) - # FIXME: Move this after closure conversion so that we don't need + # TODO: Move this after closure conversion so that we don't need # to model the closure conversion transformations. update_lambda_binding!(ctx, id, is_called=true) end @@ -710,9 +721,10 @@ function analyze_variables!(ctx, ex) end ctx2 = VariableAnalysisContext(ctx.graph, ctx.bindings, ctx.mod, lambda_bindings, ctx.method_def_stack, ctx.closure_bindings) - # Add any captured bindings to the enclosing lambda, if necessary. + foreach(e->analyze_variables!(ctx2, e), ex[3:end]) # body & return type for (id,lbinfo) in pairs(lambda_bindings.bindings) if lbinfo.is_captured + # Add any captured bindings to the enclosing lambda, if necessary. outer_lbinfo = lookup_lambda_binding(ctx.lambda_bindings, id) if isnothing(outer_lbinfo) # Inner lambda captures a variable. If it's not yet present @@ -723,9 +735,6 @@ function analyze_variables!(ctx, ex) end end end - - # TODO: Types of any assigned captured vars will also be used and might be captured. - foreach(e->analyze_variables!(ctx2, e), ex[3:end]) else foreach(e->analyze_variables!(ctx, e), children(ex)) end diff --git a/JuliaLowering/test/assignments_ir.jl b/JuliaLowering/test/assignments_ir.jl index 80a419ce5d8f7..9a1393c22a3a8 100644 --- a/JuliaLowering/test/assignments_ir.jl +++ b/JuliaLowering/test/assignments_ir.jl @@ -81,21 +81,19 @@ end 1 (newvar slot₁/x) 2 TestMod.f 3 (call %₂) -4 (= slot₂/tmp %₃) -5 slot₂/tmp -6 TestMod.T -7 (call core.isa %₅ %₆) +4 TestMod.T +5 (= slot₂/tmp %₃) +6 slot₂/tmp +7 (call core.isa %₆ %₄) 8 (gotoifnot %₇ label₁₀) -9 (goto label₁₅) -10 TestMod.T -11 slot₂/tmp -12 (call top.convert %₁₀ %₁₁) -13 TestMod.T -14 (= slot₂/tmp (call core.typeassert %₁₂ %₁₃)) -15 slot₂/tmp -16 (= slot₁/x %₁₅) -17 slot₁/x -18 (return %₁₇) +9 (goto label₁₃) +10 slot₂/tmp +11 (call top.convert %₄ %₁₀) +12 (= slot₂/tmp (call core.typeassert %₁₁ %₄)) +13 slot₂/tmp +14 (= slot₁/x %₁₃) +15 slot₁/x +16 (return %₁₅) ######################################## # "complex lhs" of `::T` => type-assert, not decl diff --git a/JuliaLowering/test/closures.jl b/JuliaLowering/test/closures.jl index 031c20a2cd49b..27a5afa610818 100644 --- a/JuliaLowering/test/closures.jl +++ b/JuliaLowering/test/closures.jl @@ -126,6 +126,62 @@ begin end """) === (1,2,3) +# Closure with return type must capture the return type +@test JuliaLowering.include_string(test_mod, """ +let T = Int + function f_captured_return_type()::T + 2.0 + end + f_captured_return_type() +end +""") === 2 + +# Capturing a typed local +@test JuliaLowering.include_string(test_mod, """ +let T = Int + x::T = 1.0 + function f_captured_typed_local() + x = 2.0 + end + f_captured_typed_local() + x +end +""") === 2 + +# Capturing a typed local where the type is a nontrivial expression +@test begin + res = JuliaLowering.include_string(test_mod, """ + let T = Int, V=Vector + x::V{T} = [1,2] + function f_captured_typed_local_composite() + x = [100.0, 200.0] + end + f_captured_typed_local_composite() + x + end + """) + res == [100, 200] && eltype(res) == Int +end + +# Evil case where we mutate `T` which is the type of `x`, such that x is +# eventually set to a Float64. +# +# Completely dynamic types for variables should be disallowed somehow?? For +# example, by emitting the expression computing the type of `x` alongside the +# newvar node. However, for now we verify that this potentially evil behavior +# is compatible with the existing implementation :) +@test JuliaLowering.include_string(test_mod, """ +let T = Int + x::T = 1.0 + function f_captured_mutating_typed_local() + x = 2 + end + T = Float64 + f_captured_mutating_typed_local() + x +end +""") === 2.0 + # Anon function syntax @test JuliaLowering.include_string(test_mod, """ begin diff --git a/JuliaLowering/test/closures_ir.jl b/JuliaLowering/test/closures_ir.jl index 953d9bf85f80b..6d6d13396cdab 100644 --- a/JuliaLowering/test/closures_ir.jl +++ b/JuliaLowering/test/closures_ir.jl @@ -676,6 +676,40 @@ end 46 slot₃/f_kw_closure 47 (return %₄₆) +######################################## +# Closure capturing a typed local must also capture the type expression +# [method_filter: #f_captured_typed_local##0] +let T=Blah + x::T = 1.0 + function f_captured_typed_local() + x = 2.0 + end + f_captured_typed_local() + x +end +#--------------------- +slots: [slot₁/#self#(!read) slot₂/T(!read) slot₃/tmp(!read)] +1 2.0 +2 (call core.getfield slot₁/#self# :x) +3 (call core.getfield slot₁/#self# :T) +4 (call core.isdefined %₃ :contents) +5 (gotoifnot %₄ label₇) +6 (goto label₉) +7 (newvar slot₂/T) +8 slot₂/T +9 (call core.getfield %₃ :contents) +10 (= slot₃/tmp %₁) +11 slot₃/tmp +12 (call core.isa %₁₁ %₉) +13 (gotoifnot %₁₂ label₁₅) +14 (goto label₁₈) +15 slot₃/tmp +16 (call top.convert %₉ %₁₅) +17 (= slot₃/tmp (call core.typeassert %₁₆ %₉)) +18 slot₃/tmp +19 (call core.setfield! %₂ :contents %₁₈) +20 (return %₁) + ######################################## # Error: Closure outside any top level context # (Should only happen in a user-visible way when lowering code emitted diff --git a/JuliaLowering/test/decls_ir.jl b/JuliaLowering/test/decls_ir.jl index c284ff77edc07..a8b9fd98f0c91 100644 --- a/JuliaLowering/test/decls_ir.jl +++ b/JuliaLowering/test/decls_ir.jl @@ -4,20 +4,18 @@ local x::T = 1 #--------------------- 1 (newvar slot₁/x) 2 1 -3 (= slot₂/tmp %₂) -4 slot₂/tmp -5 TestMod.T -6 (call core.isa %₄ %₅) +3 TestMod.T +4 (= slot₂/tmp %₂) +5 slot₂/tmp +6 (call core.isa %₅ %₃) 7 (gotoifnot %₆ label₉) -8 (goto label₁₄) -9 TestMod.T -10 slot₂/tmp -11 (call top.convert %₉ %₁₀) -12 TestMod.T -13 (= slot₂/tmp (call core.typeassert %₁₁ %₁₂)) -14 slot₂/tmp -15 (= slot₁/x %₁₄) -16 (return %₂) +8 (goto label₁₂) +9 slot₂/tmp +10 (call top.convert %₃ %₉) +11 (= slot₂/tmp (call core.typeassert %₁₀ %₃)) +12 slot₂/tmp +13 (= slot₁/x %₁₂) +14 (return %₂) ######################################## # const @@ -133,35 +131,31 @@ end 8 --- method core.nothing %₇ slots: [slot₁/#self#(!read) slot₂/x slot₃/tmp(!read) slot₄/tmp(!read)] 1 1 - 2 (= slot₃/tmp %₁) - 3 slot₃/tmp - 4 TestMod.Int - 5 (call core.isa %₃ %₄) + 2 TestMod.Int + 3 (= slot₃/tmp %₁) + 4 slot₃/tmp + 5 (call core.isa %₄ %₂) 6 (gotoifnot %₅ label₈) - 7 (goto label₁₃) - 8 TestMod.Int - 9 slot₃/tmp - 10 (call top.convert %₈ %₉) - 11 TestMod.Int - 12 (= slot₃/tmp (call core.typeassert %₁₀ %₁₁)) - 13 slot₃/tmp - 14 (= slot₂/x %₁₃) - 15 2.0 - 16 (= slot₄/tmp %₁₅) - 17 slot₄/tmp - 18 TestMod.Int - 19 (call core.isa %₁₇ %₁₈) - 20 (gotoifnot %₁₉ label₂₂) - 21 (goto label₂₇) - 22 TestMod.Int + 7 (goto label₁₁) + 8 slot₃/tmp + 9 (call top.convert %₂ %₈) + 10 (= slot₃/tmp (call core.typeassert %₉ %₂)) + 11 slot₃/tmp + 12 (= slot₂/x %₁₁) + 13 2.0 + 14 TestMod.Int + 15 (= slot₄/tmp %₁₃) + 16 slot₄/tmp + 17 (call core.isa %₁₆ %₁₄) + 18 (gotoifnot %₁₇ label₂₀) + 19 (goto label₂₃) + 20 slot₄/tmp + 21 (call top.convert %₁₄ %₂₀) + 22 (= slot₄/tmp (call core.typeassert %₂₁ %₁₄)) 23 slot₄/tmp - 24 (call top.convert %₂₂ %₂₃) - 25 TestMod.Int - 26 (= slot₄/tmp (call core.typeassert %₂₄ %₂₅)) - 27 slot₄/tmp - 28 (= slot₂/x %₂₇) - 29 slot₂/x - 30 (return %₂₉) + 24 (= slot₂/x %₂₃) + 25 slot₂/x + 26 (return %₂₅) 9 TestMod.f 10 (return %₉) diff --git a/JuliaLowering/test/destructuring_ir.jl b/JuliaLowering/test/destructuring_ir.jl index 65b016f90dce7..f6b28b9a3f66d 100644 --- a/JuliaLowering/test/destructuring_ir.jl +++ b/JuliaLowering/test/destructuring_ir.jl @@ -341,20 +341,18 @@ end 1 (newvar slot₁/x) 2 TestMod.rhs 3 (call top.getproperty %₂ :x) -4 (= slot₂/tmp %₃) -5 slot₂/tmp -6 TestMod.T -7 (call core.isa %₅ %₆) +4 TestMod.T +5 (= slot₂/tmp %₃) +6 slot₂/tmp +7 (call core.isa %₆ %₄) 8 (gotoifnot %₇ label₁₀) -9 (goto label₁₅) -10 TestMod.T -11 slot₂/tmp -12 (call top.convert %₁₀ %₁₁) -13 TestMod.T -14 (= slot₂/tmp (call core.typeassert %₁₂ %₁₃)) -15 slot₂/tmp -16 (= slot₁/x %₁₅) -17 (return %₂) +9 (goto label₁₃) +10 slot₂/tmp +11 (call top.convert %₄ %₁₀) +12 (= slot₂/tmp (call core.typeassert %₁₁ %₄)) +13 slot₂/tmp +14 (= slot₁/x %₁₃) +15 (return %₂) ######################################## # Error: Property destructuring with frankentuple From 298982220f1558c938cfd58d77b29f39765dff1c Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Fri, 7 Feb 2025 20:45:10 +1000 Subject: [PATCH 0995/1109] Call inner helper functions in struct typedef tests These fixmes have been previously resolved by closure conversion. --- JuliaLowering/test/typedefs.jl | 4 +- JuliaLowering/test/typedefs_ir.jl | 90 +++++++++++++++++++------------ 2 files changed, 58 insertions(+), 36 deletions(-) diff --git a/JuliaLowering/test/typedefs.jl b/JuliaLowering/test/typedefs.jl index 5b689522caf82..d2f0594b24eac 100644 --- a/JuliaLowering/test/typedefs.jl +++ b/JuliaLowering/test/typedefs.jl @@ -136,7 +136,7 @@ struct S6 S6_f() = new(42) "some docs" - S6() = new(42) # FIXME: call S6_f() + S6() = S6_f() S6(x) = new(x) end """) === nothing @@ -164,7 +164,7 @@ struct S7{S,T} # Cases where new{...} is called S7() = new{Int,Int}(10.0, "y5") - S7{UInt8}() = new{UInt8,UInt8}(10.0, "y6") # FIXME: call S7_f() + S7{UInt8}() = S7_f() S7_f() = new{UInt8,UInt8}(10.0, "y6") end """) === nothing diff --git a/JuliaLowering/test/typedefs_ir.jl b/JuliaLowering/test/typedefs_ir.jl index 17190b259b10b..14f0b1cc0f7f4 100644 --- a/JuliaLowering/test/typedefs_ir.jl +++ b/JuliaLowering/test/typedefs_ir.jl @@ -816,7 +816,7 @@ end struct X x f() = new(1) - #X() = f() # FIXME: this X() captures `f` (in flisp, as a Box :-/ ) + X() = f() # this X() captures `f` (in flisp, as a Box :-/ ) X(x) = new(x) X(y,z)::ReallyXIPromise = new(y+z) """ @@ -825,7 +825,7 @@ struct X X(a,b,c) = new(a) end #--------------------- -1 (newvar slot₂/f) +1 (= slot₂/f (call core.Box)) 2 (global TestMod.X) 3 (const TestMod.X) 4 (call core.svec) @@ -855,35 +855,57 @@ end 28 (call JuliaLowering.eval_closure_type TestMod :#f##0 %₂₆ %₂₇) 29 TestMod.#f##0 30 (new %₂₉) -31 (= slot₂/f %₃₀) -32 TestMod.#f##0 -33 (call core.svec %₃₂) -34 (call core.svec) -35 SourceLocation::3:5 -36 (call core.svec %₃₃ %₃₄ %₃₅) -37 --- method core.nothing %₃₆ +31 slot₂/f +32 (call core.setfield! %₃₁ :contents %₃₀) +33 TestMod.#f##0 +34 (call core.svec %₃₃) +35 (call core.svec) +36 SourceLocation::3:5 +37 (call core.svec %₃₄ %₃₅ %₃₆) +38 --- method core.nothing %₃₇ slots: [slot₁/#self#(!read)] 1 TestMod.X 2 (new %₁ 1) 3 (return %₂) -38 TestMod.X -39 (call core.apply_type core.Type %₃₈) -40 (call core.svec %₃₉ core.Any) -41 (call core.svec) -42 SourceLocation::5:5 -43 (call core.svec %₄₀ %₄₁ %₄₂) -44 --- method core.nothing %₄₃ +39 TestMod.X +40 (call core.apply_type core.Type %₃₉) +41 (call core.svec %₄₀) +42 (call core.svec) +43 SourceLocation::4:5 +44 (call core.svec %₄₁ %₄₂ %₄₃) +45 --- code_info + slots: [slot₁/#ctor-self#(!read) slot₂/f(!read)] + 1 (captured_local 1) + 2 (call core.isdefined %₁ :contents) + 3 (gotoifnot %₂ label₅) + 4 (goto label₇) + 5 (newvar slot₂/f) + 6 slot₂/f + 7 (call core.getfield %₁ :contents) + 8 (call %₇) + 9 (return %₈) +46 slot₂/f +47 (call core.svec %₄₆) +48 (call JuliaLowering.replace_captured_locals! %₄₅ %₄₇) +49 --- method core.nothing %₄₄ %₄₈ +50 TestMod.X +51 (call core.apply_type core.Type %₅₀) +52 (call core.svec %₅₁ core.Any) +53 (call core.svec) +54 SourceLocation::5:5 +55 (call core.svec %₅₂ %₅₃ %₅₄) +56 --- method core.nothing %₅₅ slots: [slot₁/#ctor-self# slot₂/x] 1 slot₁/#ctor-self# 2 (new %₁ slot₂/x) 3 (return %₂) -45 TestMod.X -46 (call core.apply_type core.Type %₄₅) -47 (call core.svec %₄₆ core.Any core.Any) -48 (call core.svec) -49 SourceLocation::6:5 -50 (call core.svec %₄₇ %₄₈ %₄₉) -51 --- method core.nothing %₅₀ +57 TestMod.X +58 (call core.apply_type core.Type %₅₇) +59 (call core.svec %₅₈ core.Any core.Any) +60 (call core.svec) +61 SourceLocation::6:5 +62 (call core.svec %₅₉ %₆₀ %₆₁) +63 --- method core.nothing %₆₂ slots: [slot₁/#ctor-self# slot₂/y slot₃/z slot₄/tmp(!read)] 1 TestMod.ReallyXIPromise 2 slot₁/#ctor-self# @@ -899,21 +921,21 @@ end 12 (= slot₄/tmp (call core.typeassert %₁₁ %₁)) 13 slot₄/tmp 14 (return %₁₃) -52 TestMod.X -53 (call core.apply_type core.Type %₅₂) -54 (call core.svec %₅₃ core.Any core.Any core.Any) -55 (call core.svec) -56 SourceLocation::10:5 -57 (call core.svec %₅₄ %₅₅ %₅₆) -58 --- method core.nothing %₅₇ +64 TestMod.X +65 (call core.apply_type core.Type %₆₄) +66 (call core.svec %₆₅ core.Any core.Any core.Any) +67 (call core.svec) +68 SourceLocation::10:5 +69 (call core.svec %₆₆ %₆₇ %₆₈) +70 --- method core.nothing %₆₉ slots: [slot₁/#ctor-self# slot₂/a slot₃/b(!read) slot₄/c(!read)] 1 slot₁/#ctor-self# 2 (new %₁ slot₂/a) 3 (return %₂) -59 TestMod.X -60 (call core.apply_type core.Type %₅₉) -61 (call JuliaLowering.bind_docs! %₆₀ "Docs for X constructor\n" %₅₇) -62 (return core.nothing) +71 TestMod.X +72 (call core.apply_type core.Type %₇₁) +73 (call JuliaLowering.bind_docs! %₇₂ "Docs for X constructor\n" %₆₉) +74 (return core.nothing) ######################################## # User defined inner constructors and helper functions for structs with type params From ade28cb4e483a2b4593e121485ed1b06074d61ab Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Tue, 11 Feb 2025 15:36:17 +1000 Subject: [PATCH 0996/1109] Add PRINT_IR option to REPL mode --- JuliaLowering/test/repl_mode.jl | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/JuliaLowering/test/repl_mode.jl b/JuliaLowering/test/repl_mode.jl index cf69659dfabd1..ade7e1a6bf98e 100644 --- a/JuliaLowering/test/repl_mode.jl +++ b/JuliaLowering/test/repl_mode.jl @@ -19,17 +19,19 @@ function is_incomplete(prompt_state) end end -function eval_ish(mod, ex, do_eval) +function eval_ish(mod, ex, do_eval, do_print_ir) k = kind(ex) if k == K"toplevel" x = nothing for e in children(ex) - x = eval_ish(mod, e, do_eval) + x = eval_ish(mod, e, do_eval, do_print_ir) end return x end linear_ir = JuliaLowering.lower(mod, ex) - JuliaLowering.print_ir(stdout, linear_ir) + if do_print_ir + JuliaLowering.print_ir(stdout, linear_ir) + end if do_eval println(stdout, "#----------------------") expr_form = JuliaLowering.to_lowered_expr(mod, linear_ir) @@ -37,22 +39,30 @@ function eval_ish(mod, ex, do_eval) end end +PRINT_IR::Bool = true DO_EVAL::Bool = false -function opts(; do_eval=false) +function opts(; do_eval=false, print_ir=false) global DO_EVAL = do_eval + global PRINT_IR = print_ir end function handle_input(str) - global DO_EVAL + global DO_EVAL, PRINT_IR if str == "DO_EVAL" DO_EVAL = true return elseif str == "!DO_EVAL" DO_EVAL = false return + elseif str == "PRINT_IR" + PRINT_IR = true + return + elseif str == "!PRINT_IR" + PRINT_IR = false + return end ex = parseall(SyntaxTree, str; filename="REPL") - eval_ish(Main, ex, DO_EVAL) + eval_ish(Main, ex, DO_EVAL, PRINT_IR) end function init() @@ -69,3 +79,4 @@ function __init__() end end + From 3f2d6afab06662905bdfcad2754de74509e81234 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Thu, 13 Feb 2025 21:20:08 +1000 Subject: [PATCH 0997/1109] Make `Expr(:incomplete)` detection more robust to whitespace (JuliaLang/JuliaSyntax.jl#538) Rework incomplete expression detection so that trailing whitespace is always ignored, regardless of how the parser itself decides to attach it to other nodes of the tree. To do this, we walk back from the end of the parse stream and look for the byte offset of the last non-whitespace token. We then use that to determine whether the error node is "at the end of the parse". Improve testing by * Extracting the incomplete expressions which are part of the REPL stdlib tests and ensuring these match the incomplete tag generation of the flisp parser. Fix some divergences for `var""` syntax and invalid escape sequences in strings. * Ensuring that we test both `:statement` and `:all` level parsing - the REPL uses `:all` to allow parsing of multiple top level statements, so we need to test this. Also fix a minor bug where `enable_in_core!(false)` would result in the flisp parser being used, regardless of whether `VERSION` ships with JuliaSyntax enabled by default. Fixes JuliaLang/JuliaSyntax.jl#519. See also JuliaLang/JuliaSyntax.jl#518. --- JuliaSyntax/src/hooks.jl | 21 ++- JuliaSyntax/src/parse_stream.jl | 11 ++ JuliaSyntax/test/hooks.jl | 321 +++++++++++++++++++++++++++++++- 3 files changed, 339 insertions(+), 14 deletions(-) diff --git a/JuliaSyntax/src/hooks.jl b/JuliaSyntax/src/hooks.jl index a392b61d7cfba..914c0c1bdd39d 100644 --- a/JuliaSyntax/src/hooks.jl +++ b/JuliaSyntax/src/hooks.jl @@ -35,15 +35,17 @@ end function _incomplete_tag(n::SyntaxNode, codelen) i,c = _first_error(n) if isnothing(c) || last_byte(c) < codelen || codelen == 0 - return :none - elseif first_byte(c) <= codelen - if kind(c) == K"ErrorEofMultiComment" && last_byte(c) == codelen + if kind(c) == K"ErrorEofMultiComment" # This is the one weird case where the token itself is an # incomplete error return :comment else return :none end + elseif first_byte(c) <= codelen && kind(c) != K"ErrorInvalidEscapeSequence" + # "ErrorInvalidEscapeSequence" may be incomplete, so we don't include it + # here as a hard error. + return :none end if kind(c) == K"error" && numchildren(c) > 0 for cc in children(c) @@ -56,7 +58,7 @@ function _incomplete_tag(n::SyntaxNode, codelen) return :other end kp = kind(c.parent) - if kp == K"string" + if kp == K"string" || kp == K"var" return :string elseif kp == K"cmdstring" return :cmd @@ -170,7 +172,6 @@ function core_parser_hook(code, filename::String, lineno::Int, offset::Int, opti end end parse!(stream; rule=options) - pos_before_trivia = last_byte(stream) if options === :statement bump_trivia(stream; skip_newlines=false) if peek(stream) == K"NewlineWs" @@ -179,8 +180,9 @@ function core_parser_hook(code, filename::String, lineno::Int, offset::Int, opti end if any_error(stream) + pos_before_comments = last_non_whitespace_byte(stream) tree = build_tree(SyntaxNode, stream, first_line=lineno, filename=filename) - tag = _incomplete_tag(tree, pos_before_trivia) + tag = _incomplete_tag(tree, pos_before_comments) if _has_v1_10_hooks exc = ParseError(stream, filename=filename, first_line=lineno, incomplete_tag=tag) @@ -245,6 +247,7 @@ function core_parser_hook(code, filename::String, lineno::Int, offset::Int, opti # EXIT last_offset=$last_offset #-#-#- """) + flush(_debug_log[]) end # Rewrap result in an svec for use by the C code @@ -257,6 +260,7 @@ function core_parser_hook(code, filename::String, lineno::Int, offset::Int, opti # $exc #-#-#- """) + flush(_debug_log[]) end @error("""JuliaSyntax parser failed — falling back to flisp! This is not your fault. Please submit a bug report to https://github.com/JuliaLang/JuliaSyntax.jl/issues""", @@ -284,6 +288,8 @@ else Base.Meta.ParseError(e::JuliaSyntax.ParseError) = e end +_default_system_parser = _has_v1_6_hooks ? Core._parse : nothing + """ enable_in_core!([enable=true; freeze_world_age=true, debug_filename=nothing]) @@ -313,7 +319,8 @@ function enable_in_core!(enable=true; freeze_world_age = true, world_age = freeze_world_age ? Base.get_world_counter() : typemax(UInt) _set_core_parse_hook(fix_world_age(core_parser_hook, world_age)) else - _set_core_parse_hook(Core.Compiler.fl_parse) + @assert !isnothing(_default_system_parser) + _set_core_parse_hook(_default_system_parser) end nothing end diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index 33e029c6188d2..0c57c2a43f390 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -1257,6 +1257,17 @@ first_byte(stream::ParseStream) = first(stream.tokens).next_byte # Use sentinel last_byte(stream::ParseStream) = _next_byte(stream)-1 any_error(stream::ParseStream) = any_error(stream.diagnostics) +# Return last non-whitespace byte which was parsed +function last_non_whitespace_byte(stream::ParseStream) + for i = length(stream.tokens):-1:1 + tok = stream.tokens[i] + if !(kind(tok) in KSet"Comment Whitespace NewlineWs ErrorEofMultiComment") + return tok.next_byte - 1 + end + end + return first_byte(stream) - 1 +end + function Base.empty!(stream::ParseStream) t = last(stream.tokens) empty!(stream.tokens) diff --git a/JuliaSyntax/test/hooks.jl b/JuliaSyntax/test/hooks.jl index 61772ce00d106..c41d2dacf5482 100644 --- a/JuliaSyntax/test/hooks.jl +++ b/JuliaSyntax/test/hooks.jl @@ -117,11 +117,6 @@ end end @test Meta.parse(mystr) == :hi - JuliaSyntax.enable_in_core!(false) - end - - @testset "Expr(:incomplete)" begin - JuliaSyntax.enable_in_core!() err = Meta.parse("\"") @test Meta.isexpr(err, :incomplete) if JuliaSyntax._has_v1_10_hooks @@ -134,6 +129,10 @@ end @test err.args[1] isa String end + JuliaSyntax.enable_in_core!(false) + end + + @testset "Expr(:incomplete)" begin for (str, tag) in [ "\"" => :string "\"\$foo" => :string @@ -195,12 +194,320 @@ end "a b" => :none "()x" => :none "." => :none + + # Some error tokens which cannot be made complete by appending more characters + "1.e1." => :none + "\u200b" => :none + "x #=\xf5b\n=#" => :none + "₁" => :none + "0x1.0\n" => :none + "\"\$x෴\"" => :none + "10e1000" => :none + + # Multiline input with comments (#519) + "function f()\nbody #comment" => :block + "a = [\n1,\n2, #comment" => :other + + # Extended set of cases extracted from the REPL stdlib tests. + # There is some redundancy here, but we've mostly left these + # here because incomplete-detection is partly heuristic and + # it's good to have a wide variety of incomplete expressions. + # + # The "desired" incomplete tag here was generated from the + # flisp parser. + "Main.CompletionFoo." => :other + "Base.return_types(getin" => :other + "test7()." => :other + "(3,2)." => :other + "Base.print(\"lol" => :string + "run(`lol" => :cmd + "copy(A')." => :other + "cd(\"path_to_an_empty_folder_should_not_complete_latex\\\\\\alpha" => :string + "\"C:\\\\ \\alpha" => :string + "cd(\"C:\\U" => :string + "max(" => :other + "!(" => :other + "!isnothing(" => :other + "!!isnothing(" => :other + "CompletionFoo.test(1, 1, " => :other + "CompletionFoo.test(CompletionFoo.array," => :other + "CompletionFoo.test(1,1,1," => :other + "CompletionFoo.test1(Int," => :other + "CompletionFoo.test1(Float64," => :other + "prevind(\"θ\",1," => :other + "(1, CompletionFoo.test2(\")\"," => :other + "(1, CompletionFoo.test2(')'," => :other + "(1, CompletionFoo.test2(`')'`," => :other + "CompletionFoo.test3([1, 2] .+ CompletionFoo.varfloat," => :other + "CompletionFoo.test3([1.,2.], 1.," => :other + "CompletionFoo.test4(\"e\",r\" \"," => :other + "CompletionFoo.test5(broadcast((x,y)->x==y, push!(Base.split(\"\",' '),\"\",\"\"), \"\")," => :other + "CompletionFoo.test5(Bool[x==1 for x=1:4]," => :other + "CompletionFoo.test4(CompletionFoo.test_y_array[1]()[1], CompletionFoo.test_y_array[1]()[2], " => :other + "CompletionFoo.test4(\"\\\"\"," => :other + "convert(" => :other + "convert(" => :other + "CompletionFoo.test5(AbstractArray[Bool[]][1]," => :other + "CompletionFoo.test3(@time([1, 2] .+ CompletionFoo.varfloat)," => :other + "CompletionFoo.kwtest( " => :other + "CompletionFoo.kwtest(;" => :other + "CompletionFoo.kwtest(; x=1, " => :other + "CompletionFoo.kwtest(; kw=1, " => :other + "CompletionFoo.kwtest(x=1, " => :other + "CompletionFoo.kwtest(x=1; " => :other + "CompletionFoo.kwtest(x=kw=1, " => :other + "CompletionFoo.kwtest(; x=kw=1, " => :other + "CompletionFoo.kwtest2(1, x=1," => :other + "CompletionFoo.kwtest2(1; x=1, " => :other + "CompletionFoo.kwtest2(1, x=1; " => :other + "CompletionFoo.kwtest2(1, kw=1, " => :other + "CompletionFoo.kwtest2(1; kw=1, " => :other + "CompletionFoo.kwtest2(1, kw=1; " => :other + "CompletionFoo.kwtest2(y=3, 1, " => :other + "CompletionFoo.kwtest2(y=3, 1; " => :other + "CompletionFoo.kwtest2(kw=3, 1, " => :other + "CompletionFoo.kwtest2(kw=3, 1; " => :other + "CompletionFoo.kwtest2(1; " => :other + "CompletionFoo.kwtest2(1, " => :other + "CompletionFoo.kwtest4(x23=18, x; " => :other + "CompletionFoo.kwtest4(x23=18, x, " => :other + "CompletionFoo.kwtest4(x23=18, " => :other + "CompletionFoo.kwtest5(3, somekwarg=6," => :other + "CompletionFoo.kwtest5(3, somekwarg=6, anything, " => :other + "CompletionFoo.?([1,2,3], 2.0" => :other + "CompletionFoo.?('c'" => :other + "CompletionFoo.?(false, \"a\", 3, " => :other + "CompletionFoo.?(false, \"a\", 3, " => :other + "CompletionFoo.?(\"a\", 3, " => :other + "CompletionFoo.?(; " => :other + "CompletionFoo.?(" => :other + "CompletionFoo.test10(z, Integer[]...," => :other + "CompletionFoo.test10(3, Integer[]...," => :other + "CompletionFoo.test10(3, 4," => :other + "CompletionFoo.test10(3, 4, 5," => :other + "CompletionFoo.test10(z, z, 0, " => :other + "CompletionFoo.test10(\"a\", Union{Signed,Bool,String}[3][1], " => :other + "CompletionFoo.test11(Integer[false][1], Integer[14][1], " => :other + "CompletionFoo.test11(Integer[-7][1], Integer[0x6][1], 6," => :other + "CompletionFoo.test11(3, 4," => :other + "CompletionFoo.test11(0x8, 5," => :other + "CompletionFoo.test11(0x8, 'c'," => :other + "CompletionFoo.test11('d', 3," => :other + "CompletionFoo.test!12(" => :other + "CompletionFoo.kwtest(; x=2, y=4; kw=3, " => :other + "CompletionFoo.kwtest(x=2; y=4; " => :other + "CompletionFoo.kwtest((x=y)=4, " => :other + "CompletionFoo.kwtest(; (x=y)=4, " => :other + "CompletionFoo.kwtest(; w...=16, " => :other + "CompletionFoo.kwtest(; 2, " => :other + "CompletionFoo.kwtest(; 2=3, " => :other + "CompletionFoo.kwtest3(im; (true ? length : length), " => :other + "CompletionFoo.kwtest.(x=2; y=4; " => :other + "CompletionFoo.kwtest.(; w...=16, " => :other + "(1+2im)." => :other + "((1+2im))." => :other + "CompletionFoo.test_y_array[1]." => :other + "CompletionFoo.named." => :other + "#=\n\\alpha" => :comment + "#=\nmax" => :comment + "using " => :other + "(max" => :other + "@show \"/dev/nul" => :string + "@show \"/tm" => :string + "@show \"/dev/nul" => :string + "(Iter" => :other + "\"/tmp/jl_4sjOtz/tmpfoob" => :string + "\"~" => :string + "\"~user" => :string + "\"/tmp/jl_Mn9Rbz/selfsym" => :string + "\"~/ka8w5rsz" => :string + "\"foo~bar" => :string + "\"~/Zx6Wa0GkC" => :string + "\"~/Zx6Wa0GkC0" => :string + "\"~/Zx6Wa0GkC0/my_" => :string + "\"~/Zx6Wa0GkC0/my_file" => :string + "cd(\"folder_do_not_exist_77/file" => :string + "CompletionFoo.tuple." => :other + "CompletionFoo.test_dict[\"ab" => :string + "CompletionFoo.test_dict[\"abcd" => :string + "CompletionFoo.test_dict[ \"abcd" => :string + "CompletionFoo.test_dict[\"abcd" => :string + "CompletionFoo.test_dict[:b" => :other + "CompletionFoo.test_dict[:bar2" => :other + "CompletionFoo.test_dict[Ba" => :other + "CompletionFoo.test_dict[occ" => :other + "CompletionFoo.test_dict[`l" => :cmd + "CompletionFoo.test_dict[6" => :other + "CompletionFoo.test_dict[66" => :other + "CompletionFoo.test_dict[(" => :other + "CompletionFoo.test_dict[\"\\alp" => :string + "CompletionFoo.test_dict[\"\\alpha" => :string + "CompletionFoo.test_dict[\"α" => :string + "CompletionFoo.test_dict[:α" => :other + "CompletionFoo.test_dict[" => :other + "CompletionFoo.test_customdict[\"ab" => :string + "CompletionFoo.test_customdict[\"abcd" => :string + "CompletionFoo.test_customdict[ \"abcd" => :string + "CompletionFoo.test_customdict[\"abcd" => :string + "CompletionFoo.test_customdict[:b" => :other + "CompletionFoo.test_customdict[:bar2" => :other + "CompletionFoo.test_customdict[Ba" => :other + "CompletionFoo.test_customdict[occ" => :other + "CompletionFoo.test_customdict[`l" => :cmd + "CompletionFoo.test_customdict[6" => :other + "CompletionFoo.test_customdict[66" => :other + "CompletionFoo.test_customdict[(" => :other + "CompletionFoo.test_customdict[\"\\alp" => :string + "CompletionFoo.test_customdict[\"\\alpha" => :string + "CompletionFoo.test_customdict[\"α" => :string + "CompletionFoo.test_customdict[:α" => :other + "CompletionFoo.test_customdict[" => :other + "test_repl_comp_dict[\"ab" => :string + "test_repl_comp_dict[\"abcd" => :string + "test_repl_comp_dict[ \"abcd" => :string + "test_repl_comp_dict[\"abcd" => :string + "test_repl_comp_dict[:b" => :other + "test_repl_comp_dict[:bar2" => :other + "test_repl_comp_dict[Ba" => :other + "test_repl_comp_dict[occ" => :other + "test_repl_comp_dict[`l" => :cmd + "test_repl_comp_dict[6" => :other + "test_repl_comp_dict[66" => :other + "test_repl_comp_dict[(" => :other + "test_repl_comp_dict[\"\\alp" => :string + "test_repl_comp_dict[\"\\alpha" => :string + "test_repl_comp_dict[\"α" => :string + "test_repl_comp_dict[:α" => :other + "test_repl_comp_dict[" => :other + "test_repl_comp_customdict[\"ab" => :string + "test_repl_comp_customdict[\"abcd" => :string + "test_repl_comp_customdict[ \"abcd" => :string + "test_repl_comp_customdict[\"abcd" => :string + "test_repl_comp_customdict[:b" => :other + "test_repl_comp_customdict[:bar2" => :other + "test_repl_comp_customdict[Ba" => :other + "test_repl_comp_customdict[occ" => :other + "test_repl_comp_customdict[`l" => :cmd + "test_repl_comp_customdict[6" => :other + "test_repl_comp_customdict[66" => :other + "test_repl_comp_customdict[(" => :other + "test_repl_comp_customdict[\"\\alp" => :string + "test_repl_comp_customdict[\"\\alpha" => :string + "test_repl_comp_customdict[\"α" => :string + "test_repl_comp_customdict[:α" => :other + "test_repl_comp_customdict[" => :other + "CompletionFoo.kwtest3(a;foob" => :other + "CompletionFoo.kwtest3(a; le" => :other + "CompletionFoo.kwtest3.(a;\nlength" => :other + "CompletionFoo.kwtest3(a, length=4, l" => :other + "CompletionFoo.kwtest3(a; kwargs..., fo" => :other + "CompletionFoo.kwtest3(a; another!kwarg=0, le" => :other + "CompletionFoo.kwtest3(a; another!" => :other + "CompletionFoo.kwtest3(a; another!kwarg=0, foob" => :other + "CompletionFoo.kwtest3(a; namedarg=0, foob" => :other + "kwtest3(blabla; unknown=4, namedar" => :other + "kwtest3(blabla; named" => :other + "kwtest3(blabla; named." => :other + "kwtest3(blabla; named..., another!" => :other + "kwtest3(blabla; named..., len" => :other + "kwtest3(1+3im; named" => :other + "kwtest3(1+3im; named." => :other + "CompletionFoo.kwtest4(a; x23=0, _" => :other + "CompletionFoo.kwtest4(a; xαβγ=1, _" => :other + "CompletionFoo.kwtest4.(a; xαβγ=1, _" => :other + "CompletionFoo.kwtest4(a; x23=0, x" => :other + "CompletionFoo.kwtest4.(a; x23=0, x" => :other + "CompletionFoo.kwtest4(a; _a1b=1, x" => :other + "CompletionFoo.kwtest5(3, 5; somek" => :other + "CompletionFoo.kwtest5(3, 5, somekwarg=4, somek" => :other + "CompletionFoo.kwtest5(3, 5, 7; somekw" => :other + "CompletionFoo.kwtest5(3, 5, 7, 9; somekw" => :other + "CompletionFoo.kwtest5(3, 5, 7, 9, Any[]...; somek" => :other + "CompletionFoo.kwtest5(unknownsplat...; somekw" => :other + "CompletionFoo.kwtest5(3, 5, 7, 9, somekwarg=4, somek" => :other + "CompletionFoo.kwtest5(String[]..., unknownsplat...; xy" => :other + "CompletionFoo.kwtest5('a', unknownsplat...; xy" => :other + "CompletionFoo.kwtest5('a', 3, String[]...; xy" => :other + "CompletionFoo.kwtest3(" => :other + "CompletionFoo.kwtest3(a;" => :other + "CompletionFoo.kwtest3(a; len2=" => :other + "CompletionFoo.kwtest3(a; len2=le" => :other + "CompletionFoo.kwtest3(a; len2=3 " => :other + "CompletionFoo.kwtest3(a; [le" => :other + "CompletionFoo.kwtest3([length; le" => :other + "CompletionFoo.kwtest3(a; (le" => :other + "CompletionFoo.kwtest3(a; foo(le" => :other + "CompletionFoo.kwtest3(a; (; le" => :other + "CompletionFoo.kwtest3(a; length, " => :other + "CompletionFoo.kwtest3(a; kwargs..., " => :other + ":(function foo(::Int) end).args[1].args[2]." => :other + "log(log.(varfloat)," => :other + "Base.return_types(getin" => :other + "test(1,1, " => :other + "test.(1,1, " => :other + "prevind(\"θ\",1," => :other + "typeof(+)." => :other + "test_dict[\"ab" => :string + "CompletionFoo.x." => :other + "@noexist." => :other + "Main.@noexist." => :none # <- Invalid syntax which adding a suffix can't fix + "@Main.noexist." => :other + "@show." => :other + "@macroexpand." => :other + "CompletionFoo.@foobar()." => :other + "CompletionFoo.@foobar(4)." => :other + "foo(#=#==#=##==#).rs[1]." => :other + "foo().r." => :other + "foo(#=#=# =#= =#).r." => :other + "test_47594." => :other + "Issue36437(42)." => :other + "Some(Issue36437(42)).value." => :other + "some_issue36437.value." => :other + "some_issue36437.value.a, some_issue36437.value." => :other + "@show some_issue36437.value.a; some_issue36437.value." => :other + "()." => :other + "Ref(Issue36437(42))[]." => :other + "global_dict[:r]." => :other + "global_dict_nested[:g][:r]." => :other + "global_dict_nested[" => :other + "global_dict_nested[:g][" => :other + "pop!(global_xs)." => :other + "tcd1." => :other + "tcd1.x." => :other + "tcd1.x.v." => :other + "getkeyelem(mutable_const_prop)." => :other + "getkeyelem(mutable_const_prop).value." => :other + "var\"complicated " => :string + "WeirdNames().var\"oh " => :string + "WeirdNames().var\"" => :string + "\"abc\"." => :other + "(rand(Bool) ? issue51499_2_1 : issue51499_2_2)." => :other + "union_somes(1, 1.0)." => :other + "union_some_ref(1, 1.0)." => :other + "Issue49892(fal" => :other + "-CompletionFoo.Test_y(3)." => :other + "99 ⨷⁻ᵨ⁷ CompletionFoo.type_test." => :other + "CompletionFoo.type_test + CompletionFoo.Test_y(2)." => :other + "(CompletionFoo.type_test + CompletionFoo.Test_y(2))." => :other + "CompletionFoo.type_test + CompletionFoo.unicode_αβγ." => :other + "(CompletionFoo.type_test + CompletionFoo.unicode_αβγ)." => :other + "using Base." => :other + "@time(using .Iss" => :other + "using .Issue52922.Inner1." => :other + "Issue53126()." => :other + "using " => :other + "global xxx::Number = Base." => :other ] @testset "$(repr(str))" begin - @test Base.incomplete_tag(Meta.parse(str, raise=false)) == tag + # Test :statement parsing + ex = JuliaSyntax.core_parser_hook(str, "somefile", 1, 0, :statement)[1] + @test Base.incomplete_tag(ex) == tag + # Test :all parsing - this is what the REPL uses to parse user input. + ex = JuliaSyntax.core_parser_hook(str, "somefile", 1, 0, :all)[1] + @test ex.head == :toplevel + @test Base.incomplete_tag(ex.args[end]) == tag end end - JuliaSyntax.enable_in_core!(false) # Should not throw @test JuliaSyntax.core_parser_hook("+=", "somefile", 1, 0, :statement)[1] isa Expr From e69654383e455dd55223aa15eead0420560e3595 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Thu, 13 Feb 2025 21:20:53 +1000 Subject: [PATCH 0998/1109] Bump version to 1.0.2 --- JuliaSyntax/Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaSyntax/Project.toml b/JuliaSyntax/Project.toml index 3b394076a96a6..231dab6fea415 100644 --- a/JuliaSyntax/Project.toml +++ b/JuliaSyntax/Project.toml @@ -1,7 +1,7 @@ name = "JuliaSyntax" uuid = "70703baa-626e-46a2-a12c-08ffd08c73b4" authors = ["Claire Foster and contributors"] -version = "1.0.1" +version = "1.0.2" [compat] Serialization = "1.0" From 0db8b127acd09ec0eb6e5d6837c27eb5b2fe260b Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Tue, 25 Feb 2025 15:48:33 +1000 Subject: [PATCH 0999/1109] Ensure module of macro method is used for AST scope This change allows macros to be extended from modules other than `parentmodule(mac)` and have the scope of the AST be that of the module the extension is written in. This matches Julia's current semantics for macro extensions though it's unclear how useful this actually is when different macro methods are distinguished based only on the number of arguments. --- JuliaLowering/src/macro_expansion.jl | 14 ++++++++++---- JuliaLowering/src/runtime.jl | 8 ++++++++ JuliaLowering/test/macros_ir.jl | 29 ++++++++++++++++++++++++++++ 3 files changed, 47 insertions(+), 4 deletions(-) diff --git a/JuliaLowering/src/macro_expansion.jl b/JuliaLowering/src/macro_expansion.jl index 397cbd94166da..1e4ac7565ca2c 100644 --- a/JuliaLowering/src/macro_expansion.jl +++ b/JuliaLowering/src/macro_expansion.jl @@ -138,12 +138,15 @@ function expand_macro(ctx, ex) # a macro expansion. # In either case, we need to set any unset scope layers before passing the # arguments to the macro call. - macro_args = [set_scope_layer(ctx, e, ctx.current_layer.id, false) - for e in children(ex)[2:end]] mctx = MacroContext(ctx.graph, ex, ctx.current_layer) + macro_args = Any[mctx] + for i in 2:numchildren(ex) + push!(macro_args, set_scope_layer(ctx, ex[i], ctx.current_layer.id, false)) + end + macro_invocation_world = Base.get_world_counter() expanded = try # TODO: Allow invoking old-style macros for compat - invokelatest(macfunc, mctx, macro_args...) + invokelatest(macfunc, macro_args...) catch exc if exc isa MacroExpansionError # Add context to the error. @@ -162,7 +165,10 @@ function expand_macro(ctx, ex) expanded = copy_ast(ctx, expanded) end expanded = append_sourceref(ctx, expanded, ex) - new_layer = ScopeLayer(length(ctx.scope_layers)+1, parentmodule(macfunc), true) + # Module scope for the returned AST is the module where this particular + # method was defined (may be different from `parentmodule(macfunc)`) + mod_for_ast = lookup_method_instance(macfunc, macro_args, macro_invocation_world).def.module + new_layer = ScopeLayer(length(ctx.scope_layers)+1, mod_for_ast, true) push!(ctx.scope_layers, new_layer) inner_ctx = MacroExpansionContext(ctx.graph, ctx.bindings, ctx.scope_layers, new_layer) expanded = expand_forms_1(inner_ctx, expanded) diff --git a/JuliaLowering/src/runtime.jl b/JuliaLowering/src/runtime.jl index f694c54543719..a3d9c2ce79643 100644 --- a/JuliaLowering/src/runtime.jl +++ b/JuliaLowering/src/runtime.jl @@ -418,3 +418,11 @@ function reserve_module_binding_i(mod, basename) end end +function lookup_method_instance(func, args, world::Integer) + allargs = Vector{Any}(undef, length(args) + 1) + allargs[1] = func + allargs[2:end] = args + mi = @ccall jl_method_lookup(allargs::Ptr{Any}, length(allargs)::Csize_t, + world::Csize_t)::Ptr{Cvoid} + return mi == C_NULL ? nothing : unsafe_pointer_to_objref(mi) +end diff --git a/JuliaLowering/test/macros_ir.jl b/JuliaLowering/test/macros_ir.jl index 94e03e4fe8fce..e1c6460cb1bb1 100644 --- a/JuliaLowering/test/macros_ir.jl +++ b/JuliaLowering/test/macros_ir.jl @@ -1,3 +1,21 @@ +module MacroMethods + macro some_macro() + quote + some_global + end + end + + module ExtraMacroMethods + using ..MacroMethods + macro MacroMethods.some_macro(ex) + quote + some_global + end + end + end +end + +#******************************************************************************* ######################################## # Simple macro macro add_one(ex) @@ -42,6 +60,17 @@ end 9 TestMod.@foo 10 (return %₉) +######################################## +# Scope for symbols emitted by macros is the module where the method was +# defined, thus two different modules in this case, even though `@some_macro` +# belongs to the MacroMethods module. +(MacroMethods.@some_macro(), MacroMethods.@some_macro(unused)) +#--------------------- +1 TestMod.MacroMethods.some_global +2 TestMod.MacroMethods.ExtraMacroMethods.some_global +3 (call core.tuple %₁ %₂) +4 (return %₃) + ######################################## # Error: Macro with kw args macro mmm(a; b=2) From a062a483174022ee7e690e49e48b964d5a4f0791 Mon Sep 17 00:00:00 2001 From: Lilith Orion Hafner Date: Tue, 25 Mar 2025 11:10:36 -0500 Subject: [PATCH 1000/1109] Update cache version to unbreak CI --- JuliaSyntax/.github/workflows/CI.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaSyntax/.github/workflows/CI.yml b/JuliaSyntax/.github/workflows/CI.yml index dcdb5f35311c8..b6ceedb5e4f63 100644 --- a/JuliaSyntax/.github/workflows/CI.yml +++ b/JuliaSyntax/.github/workflows/CI.yml @@ -92,7 +92,7 @@ jobs: with: version: 1.6 arch: x64 - - uses: actions/cache@v1 + - uses: actions/cache@v4 env: cache-name: cache-artifacts with: From 4ef115e7b780362045512c1534eb75c886ea20ad Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Wed, 26 Mar 2025 16:38:21 +0100 Subject: [PATCH 1001/1109] also precompile for `SubString` (JuliaLang/JuliaSyntax.jl#542) --- JuliaSyntax/src/precompile.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/JuliaSyntax/src/precompile.jl b/JuliaSyntax/src/precompile.jl index 922be3540dc07..5a80d92d6837c 100644 --- a/JuliaSyntax/src/precompile.jl +++ b/JuliaSyntax/src/precompile.jl @@ -6,6 +6,7 @@ let filename = joinpath(@__DIR__, "literal_parsing.jl") if _has_v1_6_hooks enable_in_core!() Meta.parse("1 + 2") + Meta.parse(SubString("1 + 2")) enable_in_core!(false) end end From dd283a17daaae0db9f279a85d7726a49234d2647 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sat, 12 Apr 2025 14:19:24 +1000 Subject: [PATCH 1002/1109] Enhanced pretty printing of bindings --- JuliaLowering/src/bindings.jl | 38 +++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/JuliaLowering/src/bindings.jl b/JuliaLowering/src/bindings.jl index e6fda3c23681d..f35a61c05d876 100644 --- a/JuliaLowering/src/bindings.jl +++ b/JuliaLowering/src/bindings.jl @@ -34,6 +34,44 @@ function BindingInfo(id::IdTag, name::AbstractString, kind::Symbol, node_id::Int is_internal, is_ambiguous_local, is_nospecialize) end +function Base.show(io::IO, binfo::BindingInfo) + print(io, "BindingInfo(", binfo.id, ", ", + repr(binfo.name), ", ", + repr(binfo.kind), ", ", + binfo.node_id) + if !isnothing(binfo.mod) + print(io, ", mod=", binfo.mod) + end + if !isnothing(binfo.type) + print(io, ", type=", binfo.type) + end + if binfo.n_assigned != 0 + print(io, ", n_assigned=", binfo.n_assigned) + end + if binfo.is_const + print(io, ", is_const=", binfo.is_const) + end + if binfo.is_ssa + print(io, ", is_ssa=", binfo.is_ssa) + end + if binfo.is_captured + print(io, ", is_captured=", binfo.is_captured) + end + if binfo.is_always_defined != binfo.is_ssa + print(io, ", is_always_defined=", binfo.is_always_defined) + end + if binfo.is_internal + print(io, ", is_internal=", binfo.is_internal) + end + if binfo.is_ambiguous_local + print(io, ", is_ambiguous_local=", binfo.is_ambiguous_local) + end + if binfo.is_nospecialize + print(io, ", is_nospecialize=", binfo.is_nospecialize) + end + print(io, ")") +end + """ Metadata about "entities" (variables, constants, etc) in the program. Each entity is associated to a unique integer id, the BindingId. A binding will be From d39ff84899d5e4ad7122ea739ba4a81b061972dc Mon Sep 17 00:00:00 2001 From: adienes <51664769+adienes@users.noreply.github.com> Date: Tue, 13 May 2025 17:17:17 -0400 Subject: [PATCH 1003/1109] simple_hash faster by lifting length(str) --- JuliaSyntax/src/tokenize.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/JuliaSyntax/src/tokenize.jl b/JuliaSyntax/src/tokenize.jl index 6eb76954dfe0c..e0d3770ea580c 100644 --- a/JuliaSyntax/src/tokenize.jl +++ b/JuliaSyntax/src/tokenize.jl @@ -1339,7 +1339,8 @@ end function simple_hash(str) ind = 1 h = UInt64(0) - while ind <= length(str) + L = length(str) + while ind <= L h = simple_hash(str[ind], h) ind = nextind(str, ind) end From 4f99905b8114aa0680a0ea44f16e5f488eb7702e Mon Sep 17 00:00:00 2001 From: adienes <51664769+adienes@users.noreply.github.com> Date: Wed, 14 May 2025 12:38:11 -0400 Subject: [PATCH 1004/1109] Update src/tokenize.jl Co-authored-by: Sebastian Pfitzner --- JuliaSyntax/src/tokenize.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaSyntax/src/tokenize.jl b/JuliaSyntax/src/tokenize.jl index e0d3770ea580c..0ea9be19fe250 100644 --- a/JuliaSyntax/src/tokenize.jl +++ b/JuliaSyntax/src/tokenize.jl @@ -1339,7 +1339,7 @@ end function simple_hash(str) ind = 1 h = UInt64(0) - L = length(str) + L = min(lastindex(str), MAX_KW_LENGTH) while ind <= L h = simple_hash(str[ind], h) ind = nextind(str, ind) From 60e2c2135a766b2d94345f25bfdadd1058d7edf9 Mon Sep 17 00:00:00 2001 From: Em Chu <61633163+mlechu@users.noreply.github.com> Date: Sun, 18 May 2025 03:51:11 -0700 Subject: [PATCH 1005/1109] Make `source_location()` and `line_starts` consistent (JuliaLang/JuliaSyntax.jl#552) when the last source byte is a newline --- JuliaSyntax/src/source_files.jl | 11 +++++------ JuliaSyntax/test/source_files.jl | 13 +++++++++---- JuliaSyntax/test/syntax_tree.jl | 5 +++++ 3 files changed, 19 insertions(+), 10 deletions(-) diff --git a/JuliaSyntax/src/source_files.jl b/JuliaSyntax/src/source_files.jl index 9c5ccf24a9293..5a7c6eba7e919 100644 --- a/JuliaSyntax/src/source_files.jl +++ b/JuliaSyntax/src/source_files.jl @@ -156,9 +156,6 @@ function SourceFile(code::AbstractString; filename=nothing, first_line=1, # The line is considered to start after the `\n` code[i] == '\n' && push!(line_starts, i+1) end - if isempty(code) || last(code) != '\n' - push!(line_starts, ncodeunits(code)+1) - end SourceFile(code, first_index-1, filename, first_line, line_starts) end @@ -168,8 +165,7 @@ end # Get line number of the given byte within the code function _source_line_index(source::SourceFile, byte_index) - lineidx = searchsortedlast(source.line_starts, byte_index - source.byte_offset) - return (lineidx < lastindex(source.line_starts)) ? lineidx : lineidx-1 + searchsortedlast(source.line_starts, byte_index - source.byte_offset) end _source_line(source::SourceFile, lineidx) = lineidx + source.first_line - 1 @@ -204,7 +200,10 @@ function source_line_range(source::SourceFile, byte_index::Integer; context_lines_before=0, context_lines_after=0) lineidx = _source_line_index(source, byte_index) fbyte = source.line_starts[max(lineidx-context_lines_before, 1)] - lbyte = source.line_starts[min(lineidx+1+context_lines_after, end)] - 1 + lline = lineidx + context_lines_after + lbyte = lline >= lastindex(source.line_starts) ? + ncodeunits(source.code) : source.line_starts[lline + 1] - 1 + return (fbyte + source.byte_offset, lbyte + source.byte_offset) end diff --git a/JuliaSyntax/test/source_files.jl b/JuliaSyntax/test/source_files.jl index 0e36b7fe31103..d518124f1e6e1 100644 --- a/JuliaSyntax/test/source_files.jl +++ b/JuliaSyntax/test/source_files.jl @@ -3,23 +3,28 @@ @test source_location(SourceFile("a"), 2) == (1,2) @test source_location(SourceFile("a\n"), 2) == (1,2) - @test source_location(SourceFile("a\n"), 3) == (1,3) + @test source_location(SourceFile("a\n"), 3) == (2,1) @test source_location(SourceFile("a\nb\n"), 2) == (1,2) @test source_location(SourceFile("a\nb\n"), 3) == (2,1) @test source_location(SourceFile("a\nb\n"), 4) == (2,2) - @test source_location(SourceFile("a\nb\n"), 5) == (2,3) + @test source_location(SourceFile("a\nb\n"), 5) == (3,1) + + @test source_location(SourceFile("\n\n"), 1) == (1,1) + @test source_location(SourceFile("\n\n"), 2) == (2,1) + @test source_location(SourceFile("\n\n"), 3) == (3,1) @test source_location(SourceFile("a"; first_line=7), 1) == (7,1) @test source_location(SourceFile("a"; first_line=7), 2) == (7,2) @test source_location(SourceFile("a\n"; first_line=7), 2) == (7,2) - @test source_location(SourceFile("a\n"; first_line=7), 3) == (7,3) + @test source_location(SourceFile("a\n"; first_line=7), 3) == (8,1) @test source_location(SourceFile("a\nb\n"; first_line=7), 2) == (7,2) @test source_location(SourceFile("a\nb\n"; first_line=7), 3) == (8,1) @test source_location(SourceFile("a\nb\n"; first_line=7), 4) == (8,2) - @test source_location(SourceFile("a\nb\n"; first_line=7), 5) == (8,3) + @test source_location(SourceFile("a\nb\n"; first_line=7), 5) == (9,1) + mktemp() do path, io write(io, "a\n") diff --git a/JuliaSyntax/test/syntax_tree.jl b/JuliaSyntax/test/syntax_tree.jl index c6b673c7245e6..2fac0d6baea20 100644 --- a/JuliaSyntax/test/syntax_tree.jl +++ b/JuliaSyntax/test/syntax_tree.jl @@ -37,6 +37,11 @@ e = try node.val = :q catch e e end @test occursin("immutable", e.msg) && occursin("SyntaxData", e.msg) + # Newline-terminated source + t = parsestmt(SyntaxNode, "a*b + c\n") + @test sprint(highlight, t[1][3]) == "a*b + c\n# ╙" + @test sprint(highlight, t.source, t.raw, 1, 3) == "a*b + c\n# ╙" + # copy t = parsestmt(SyntaxNode, "a*b + c") ct = copy(t) From ac77b0a692dbd5983b9797a6430b26b926210dc0 Mon Sep 17 00:00:00 2001 From: Oscar Smith Date: Fri, 6 Jun 2025 19:24:18 -0400 Subject: [PATCH 1006/1109] add `activate!` function as alias to `enable_in_core!` (JuliaLang/JuliaSyntax.jl#547) --- JuliaSyntax/src/expr.jl | 4 ++-- JuliaSyntax/src/hooks.jl | 3 +++ JuliaSyntax/src/syntax_tree.jl | 3 ++- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/JuliaSyntax/src/expr.jl b/JuliaSyntax/src/expr.jl index 638e0b7569ac8..0a6f16a7e812f 100644 --- a/JuliaSyntax/src/expr.jl +++ b/JuliaSyntax/src/expr.jl @@ -140,7 +140,7 @@ function _string_to_Expr(args) # """\n a\n b""" ==> "a\nb" return only(args2) else - # This only happens when the kind is K"string" or when an error has occurred. + # This only happens when the kind is K"string" or when an error has occurred. return Expr(:string, args2...) end end @@ -159,7 +159,7 @@ function _fixup_Expr_children!(head, loc, args) arg = args[i] was_parens = @isexpr(arg, :parens) arg = _strip_parens(arg) - if @isexpr(arg, :(=)) && eq_to_kw_in_call && i > 1 + if @isexpr(arg, :(=)) && eq_to_kw_in_call && i > 1 arg = Expr(:kw, arg.args...) elseif k != K"parens" && @isexpr(arg, :., 1) && arg.args[1] isa Tuple h, a = arg.args[1]::Tuple{SyntaxHead,Any} diff --git a/JuliaSyntax/src/hooks.jl b/JuliaSyntax/src/hooks.jl index 914c0c1bdd39d..1c31030fb4d00 100644 --- a/JuliaSyntax/src/hooks.jl +++ b/JuliaSyntax/src/hooks.jl @@ -290,6 +290,9 @@ end _default_system_parser = _has_v1_6_hooks ? Core._parse : nothing +# hook into InteractiveUtils.@activate +activate!(enable=true) = enable_in_core!(enable) + """ enable_in_core!([enable=true; freeze_world_age=true, debug_filename=nothing]) diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index aa3d40091afd7..7f7776e7ce9b5 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -131,7 +131,8 @@ numchildren(node::TreeNode) = (isnothing(node.children) ? 0 : length(node.childr Base.getindex(node::AbstractSyntaxNode, i::Int) = children(node)[i] Base.getindex(node::AbstractSyntaxNode, rng::UnitRange) = view(children(node), rng) Base.firstindex(node::AbstractSyntaxNode) = 1 -Base.lastindex(node::AbstractSyntaxNode) = length(children(node)) +Base.length(node::AbstractSyntaxNode) = length(children(node)) +Base.lastindex(node::AbstractSyntaxNode) = length(node) function Base.setindex!(node::SN, x::SN, i::Int) where {SN<:AbstractSyntaxNode} children(node)[i] = x From 74301f15bbd8b4fc61690590b4f835392a30b8bf Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki <40514306+aviatesk@users.noreply.github.com> Date: Sat, 7 Jun 2025 16:46:26 +0900 Subject: [PATCH 1007/1109] optimize `hash` implementations (JuliaLang/JuliaSyntax.jl#559) This optimizes the `hash` implementation added in JuliaLang/JuliaSyntax.jl#452. Creating tuples with statically unknown types that may hold random values at runtime should be avoided for performance reasons. As a result, the allocation regression reported in JuliaLang/JuliaSyntax.jl#558 has been reduced to the previous level, although the time regression from `hash` calculation still remains. --- JuliaSyntax/src/green_tree.jl | 13 +++++++++++-- JuliaSyntax/src/source_files.jl | 3 ++- JuliaSyntax/src/syntax_tree.jl | 22 +++++++++++++++++++--- 3 files changed, 32 insertions(+), 6 deletions(-) diff --git a/JuliaSyntax/src/green_tree.jl b/JuliaSyntax/src/green_tree.jl index 27da7ec4bfb0f..4164529a51d71 100644 --- a/JuliaSyntax/src/green_tree.jl +++ b/JuliaSyntax/src/green_tree.jl @@ -72,7 +72,17 @@ end Base.summary(node::GreenNode) = summary(node.head) -Base.hash(node::GreenNode, h::UInt) = hash((node.head, node.span, node.children), h) +function Base.hash(node::GreenNode, h::UInt) + children = node.children + if children === nothing + h = hash(nothing, h) + else # optimization - avoid extra allocations from `hash(::AbstractVector, ::UInt)` + for child in children + h = hash(child, h) + end + end + hash(node.head, hash(node.span, h)) +end function Base.:(==)(n1::GreenNode, n2::GreenNode) n1.head == n2.head && n1.span == n2.span && n1.children == n2.children end @@ -129,4 +139,3 @@ function build_tree(::Type{GreenNode}, stream::ParseStream; kws...) GreenNode(h, span, collect(GreenNode{SyntaxHead}, cs)) end end - diff --git a/JuliaSyntax/src/source_files.jl b/JuliaSyntax/src/source_files.jl index 5a7c6eba7e919..1058693f2a941 100644 --- a/JuliaSyntax/src/source_files.jl +++ b/JuliaSyntax/src/source_files.jl @@ -143,7 +143,8 @@ struct SourceFile line_starts::Vector{Int} end -Base.hash(s::SourceFile, h::UInt) = hash((s.code, s.byte_offset, s.filename, s.first_line, s.line_starts), h) +Base.hash(s::SourceFile, h::UInt) = + hash(s.code, hash(s.byte_offset, hash(s.filename, hash(s.first_line, hash(s.line_starts, h))))) function Base.:(==)(a::SourceFile, b::SourceFile) a.code == b.code && a.byte_offset == b.byte_offset && a.filename == b.filename && a.first_line == b.first_line && a.line_starts == b.line_starts diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index 7f7776e7ce9b5..edc864e0e939e 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -18,7 +18,18 @@ mutable struct TreeNode{NodeData} # ? prevent others from using this with Node end # Exclude parent from hash and equality checks. This means that subtrees can compare equal. -Base.hash(node::TreeNode, h::UInt) = hash((node.children, node.data), h) +function Base.hash(node::TreeNode, h::UInt) + h = hash(node.data, h) + children = node.children + if children === nothing + return hash(nothing, h) + else # optimization - avoid extra allocations from `hash(::AbstractVector, ::UInt)` + for child in children + h = hash(child, h) + end + return h + end +end function Base.:(==)(a::TreeNode{T}, b::TreeNode{T}) where T a.children == b.children && a.data == b.data end @@ -50,9 +61,14 @@ struct SyntaxData <: AbstractSyntaxData val::Any end -Base.hash(data::SyntaxData, h::UInt) = hash((data.source, data.raw, data.position, data.val), h) +Base.hash(data::SyntaxData, h::UInt) = + hash(data.source, hash(data.raw, hash(data.position, + # Avoid dynamic dispatch: + # This does not support custom `hash` implementation that may be defined for `typeof(data.val)`, + # However, such custom user types should not generally appear in the AST. + Core.invoke(hash, Tuple{Any,UInt}, data.val, h)))) function Base.:(==)(a::SyntaxData, b::SyntaxData) - a.source == b.source && a.raw == b.raw && a.position == b.position && a.val == b.val + a.source == b.source && a.raw == b.raw && a.position == b.position && a.val === b.val end """ From 0b489521fb12666d1316b9d60a487701fe1803fc Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Wed, 11 Jun 2025 20:32:22 -0400 Subject: [PATCH 1008/1109] parser_stream: Produce green tree traversal rather than token ranges (JuliaLang/JuliaSyntax.jl#560) ## Background I've written about 5 parsers that use the general red/tree green tree pattern. Now that we're using JuliaSyntax in base, I'd like to replace some of them by a version based on JuliaSyntax, so that I can avoid having to multiple copies of similar infrastructure. As a result, I'm taking a close look at some of the internals of JuliaSyntax. ## Current Design One thing that I really like about JuliaSyntax is that the parser basically produces a flat output buffer (well two in the current design, after https://github.com/JuliaLang/JuliaSyntax.jl/pull/19). In essence, the output is a post-order depth-first traversal of the parse tree, each node annotated with the range of covered by this range. From there, it is possible to recover the parse tree without re-parsing by partitioning the token list according to the ranges of the non-terminal tokens. One particular application of this is to re-build a pointer-y green tree structure that stores relative by ranges and serves the same incremental parsing purpose as green tree representations in other system. The single-output-buffer design is a great innovation over the pointer-y system. It's much easier to handle and it also enforces important invariants by construction (or at least makes them easy to check). However, I think the whole post-parse tree construction logic is reducing the value of it significantly. In particular, green trees are supposed to be able to serve as compact, persistent representations of parse tree. However, here the compact, persistent representation (the output memory buffer) is not usable as a green tree. We do have the pointer-y `GreenNode` tree, but this has all the same downsides that the single buffer system was supposed to avoid. It uses explicit vectors in every node and even constructing it from the parser output allocates a nontrivial amount of memory to recover the tree structure. ## Proposed design This PR proposed to change the parser output to be directly usable as a green-tree in-situ by changing the post-order dfs traversal to instead produce (byte, node) spans (note that this is the same data as in the current `GreenNode`, except that the node span is implicit in the length of the vector and that here the children are implicit by the position in the output). This does essentially mean semantically reverting JuliaLang/JuliaSyntax.jl#19, but the representation proposed here is more compact than both main and the pre-JuliaLang/JuliaSyntax.jl#19 representation. In particular, the output is now a sequence of: ``` struct RawGreenNode head::SyntaxHead # Kind,flags byte_span::UInt32 # Number of bytes covered by this range # If NON_TERMINAL_FLAG is set, this is the total number of child nodes # Otherwise this is a terminal node (i.e. a token) and this is orig_kind node_span_or_orig_kind::UInt32 end ``` The structure is used for both terminals and non-terminals, with the iterpretation differing between them for the last field. This is marginally more compact than the current token list representation on current `main`, because we do not store the `next_byte` pointer (which would instead have to be recovered from the green tree using the usual `O(log n)` algorithm). However, because we store `node_span`, this data structure provides linear time traversal (in reverse order) over the children of the current ndoe. In particular, this means that the tree structure is manifest and does not require the allocation of temporary stacks to recover the tree structure. As a result, the output buffer can now be used as an efficient, persistent, green tree representation. I think the primary weird thing about this design is that the iteration over the children must happen in reverse order. The current GreenNode design has constant time access to all children. Of course, a lookup table for this can be computed in linear time with smaller memory than GreenNode design, but it's important to point out this limitation. That said, for transformation uses cases (e.g. to Expr or Syntax node), constant time access to the children is not really required (although the children are being produced backwards, which looks a little funny). That said, to avoid any disruption to downstream users, the `GreenNode` design itself is not changed to use this faster alternative. We can consider doing so in a later PR. ## Benchmark The motivation for this change is not performance, but rather representational cleanliness. That said, it's of course imperative that this not degrade performance. Fortunately, the benchmarks show that this is in fact marginally faster for `Expr` construction, largely because we get to avoid the additional memory allocation traffic from having the tree structure explicitly represented. Parse time itself is essentially unchanged (which is unsurprising, since we're primarily changing what's being put into the output - although the parser does a few lookback-style operations in a few places). Co-authored-by: Em Chu <61633163+mlechu@users.noreply.github.com> --- JuliaSyntax/Project.toml | 2 - JuliaSyntax/docs/src/design.md | 38 +- JuliaSyntax/src/JuliaSyntax.jl | 5 +- JuliaSyntax/src/expr.jl | 466 +++++++------- .../src/{green_tree.jl => green_node.jl} | 64 +- JuliaSyntax/src/kinds.jl | 6 +- JuliaSyntax/src/parse_stream.jl | 596 +++++++++--------- JuliaSyntax/src/parser.jl | 2 + JuliaSyntax/src/parser_api.jl | 8 +- JuliaSyntax/src/syntax_tree.jl | 116 +++- JuliaSyntax/src/tokenize.jl | 6 +- JuliaSyntax/src/tree_cursors.jl | 166 +++++ JuliaSyntax/src/utils.jl | 9 + JuliaSyntax/test/expr.jl | 14 +- JuliaSyntax/test/green_node.jl | 2 +- JuliaSyntax/test/parse_stream.jl | 52 +- JuliaSyntax/test/parser.jl | 4 +- JuliaSyntax/test/syntax_tree.jl | 6 +- JuliaSyntax/test/test_utils.jl | 3 +- 19 files changed, 941 insertions(+), 624 deletions(-) rename JuliaSyntax/src/{green_tree.jl => green_node.jl} (67%) create mode 100644 JuliaSyntax/src/tree_cursors.jl diff --git a/JuliaSyntax/Project.toml b/JuliaSyntax/Project.toml index 231dab6fea415..1abbf2f71e44b 100644 --- a/JuliaSyntax/Project.toml +++ b/JuliaSyntax/Project.toml @@ -7,8 +7,6 @@ version = "1.0.2" Serialization = "1.0" julia = "1.0" -[deps] - [extras] Logging = "56ddb016-857b-54e1-b83d-db4d58db5568" Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b" diff --git a/JuliaSyntax/docs/src/design.md b/JuliaSyntax/docs/src/design.md index fb2a06c293835..968a0e11bf609 100644 --- a/JuliaSyntax/docs/src/design.md +++ b/JuliaSyntax/docs/src/design.md @@ -56,7 +56,7 @@ We use a hand-written lexer (a heavily modified version of The main parser innovation is the `ParseStream` interface which provides a stream-like I/O interface for writing the parser. The parser does not depend on or produce any concrete tree data structure as part of the parsing -phase but the output spans can be post-processed into various tree data +phase but the output nodes can be post-processed into various tree data structures as required. This is like the design of rust-analyzer though with a simpler implementation. @@ -64,35 +64,39 @@ Parsing proceeds by recursive descent; * The parser consumes a flat list of lexed tokens as *input* using `peek()` to examine tokens and `bump()` to consume them. -* The parser produces a flat list of text spans as *output* using `bump()` to - transfer tokens to the output and `position()`/`emit()` for nonterminal ranges. +* The parser produces a flat list of `RawGreenNode`s as *output* using `bump()` to + transfer tokens to the output and `position()`/`emit()` for nonterminal nodes. * Diagnostics are emitted as separate text spans * Whitespace and comments are automatically `bump()`ed and don't need to be handled explicitly. The exception is syntactically relevant newlines in space sensitive mode. * Parser modes are passed down the call tree using `ParseState`. -The output spans track the byte range, a syntax "kind" stored as an integer -tag, and some flags. The kind tag makes the spans a [sum -type](https://blog.waleedkhan.name/union-vs-sum-types/) but where the type is -tracked explicitly outside of Julia's type system. +The output nodes track the byte range, a syntax "kind" stored as an integer +tag, and some flags. Each node also stores either the number of child nodes +(for non-terminals) or the original token kind (for terminals). The kind tag +makes the nodes a [sum type](https://blog.waleedkhan.name/union-vs-sum-types/) +but where the type is tracked explicitly outside of Julia's type system. -For lossless parsing the output spans must cover the entire input text. Using +For lossless parsing the output nodes must cover the entire input text. Using `bump()`, `position()` and `emit()` in a natural way also ensures that: -* Spans are cleanly nested with children contained entirely within their parents -* Siblings spans are emitted in source order -* Parent spans are emitted after all their children. +* Nodes are cleanly nested with children contained entirely within their parents +* Sibling nodes are emitted in source order +* Parent nodes are emitted after all their children. -These properties make the output spans naturally isomorphic to a +These properties make the output nodes a post-order traversal of a ["green tree"](#raw-syntax-tree--green-tree) -in the terminology of C#'s Roslyn compiler. +in the terminology of C#'s Roslyn compiler, with the tree structure +implicit in the node spans. ### Tree construction -The `build_tree` function performs a depth-first traversal of the `ParseStream` -output spans allowing it to be assembled into a concrete tree data structure, -for example using the `GreenNode` data type. We further build on top of this to -define `build_tree` for the AST type `SyntaxNode` and for normal Julia `Expr`. +The `build_tree` function uses the implicit tree structure in the `ParseStream` +output to assemble concrete tree data structures. Since the output is already +a post-order traversal of `RawGreenNode`s with node spans encoding parent-child +relationships, tree construction is straightforward. We build on top of this to +define `build_tree` for various tree types including `GreenNode`, the AST type +`SyntaxNode`, and for normal Julia `Expr`. ### Error recovery diff --git a/JuliaSyntax/src/JuliaSyntax.jl b/JuliaSyntax/src/JuliaSyntax.jl index 9afff8725f980..3c276984e8c06 100644 --- a/JuliaSyntax/src/JuliaSyntax.jl +++ b/JuliaSyntax/src/JuliaSyntax.jl @@ -73,7 +73,7 @@ export @K_str, kind export SyntaxNode -@_public GreenNode, +@_public GreenNode, RedTreeCursor, GreenTreeCursor, span # Helper utilities @@ -95,7 +95,8 @@ include("parser_api.jl") include("literal_parsing.jl") # Tree data structures -include("green_tree.jl") +include("tree_cursors.jl") +include("green_node.jl") include("syntax_tree.jl") include("expr.jl") diff --git a/JuliaSyntax/src/expr.jl b/JuliaSyntax/src/expr.jl index 0a6f16a7e812f..dc802b4ed8653 100644 --- a/JuliaSyntax/src/expr.jl +++ b/JuliaSyntax/src/expr.jl @@ -28,7 +28,7 @@ macro isexpr(ex, head, nargs) length($(esc(ex)).args) == $(esc(nargs))) end -function _reorder_parameters!(args::Vector{Any}, params_pos) +function _reorder_parameters!(args::Vector{Any}, params_pos::Int) p = 0 for i = length(args):-1:1 ai = args[i] @@ -48,7 +48,7 @@ function _reorder_parameters!(args::Vector{Any}, params_pos) insert!(args, params_pos, pop!(args)) end -function _strip_parens(ex) +function _strip_parens(ex::Expr) while true if @isexpr(ex, :parens) if length(ex.args) == 1 @@ -63,37 +63,9 @@ function _strip_parens(ex) end end -# Get Julia value of leaf node as it would be represented in `Expr` form -function _expr_leaf_val(node::SyntaxNode) - node.val -end -function _leaf_to_Expr(source, txtbuf, txtbuf_offset, head, srcrange, node) - k = kind(head) - if k == K"MacroName" && view(source, srcrange) == "." - return Symbol("@__dot__") - elseif is_error(k) - return k == K"error" ? - Expr(:error) : - Expr(:error, "$(_token_error_descriptions[k]): `$(source[srcrange])`") - else - val = isnothing(node) ? - parse_julia_literal(txtbuf, head, srcrange .+ txtbuf_offset) : - _expr_leaf_val(node) - if val isa Union{Int128,UInt128,BigInt} - # Ignore the values of large integers and convert them back to - # symbolic/textural form for compatibility with the Expr - # representation of these. - str = replace(source[srcrange], '_'=>"") - macname = val isa Int128 ? Symbol("@int128_str") : - val isa UInt128 ? Symbol("@uint128_str") : - Symbol("@big_str") - return Expr(:macrocall, GlobalRef(Core, macname), nothing, str) - else - return val - end - end -end +reverse_nontrivia_children(cursor::RedTreeCursor) = Iterators.filter(should_include_node, Iterators.reverse(cursor)) +reverse_nontrivia_children(cursor::SyntaxNode) = Iterators.filter(should_include_node, Iterators.reverse(children(cursor))) # Julia string literals in a `K"string"` node may be split into several chunks # interspersed with trivia in two situations: @@ -102,89 +74,110 @@ end # # This function concatenating adjacent string chunks together as done in the # reference parser. -function _string_to_Expr(args) +function _string_to_Expr(cursor::Union{RedTreeCursor, SyntaxNode}, source::SourceFile, txtbuf::Vector{UInt8}, txtbuf_offset::UInt32) + ret = Expr(:string) args2 = Any[] i = 1 - while i <= length(args) - if args[i] isa String - if i < length(args) && args[i+1] isa String + it = reverse_nontrivia_children(cursor) + r = iterate(it) + while r !== nothing + (child, state) = r + ex = node_to_expr(child, source, txtbuf, txtbuf_offset) + if isa(ex, String) + # This branch combines consequent string chunks together. + # It's unrolled once to avoid unnecessary allocations. + r = iterate(it, state) + if r === nothing + pushfirst!(ret.args, ex) + continue + end + (child, state) = r + ex2 = node_to_expr(child, source, txtbuf, txtbuf_offset) + if !isa(ex2, String) + pushfirst!(ret.args, ex) + ex = ex2 + # Fall through to process `ex` (!::String) + else + strings = String[ex2, ex] # Note: reversed order since we're iterating backwards + r = iterate(it, state) + while r !== nothing + (child, state) = r + ex = node_to_expr(child, source, txtbuf, txtbuf_offset) + isa(ex, String) || break + pushfirst!(strings, ex) + r = iterate(it, state) + end buf = IOBuffer() - while i <= length(args) && args[i] isa String - write(buf, args[i]::String) - i += 1 + for s in strings + write(buf, s) end - push!(args2, String(take!(buf))) - else - push!(args2, args[i]) - i += 1 + pushfirst!(ret.args, String(take!(buf))) + r === nothing && break + # Fall through to process `ex` (!::String) end - else - ex = args[i] - if @isexpr(ex, :parens, 1) - ex = _strip_parens(ex) - if ex isa String - # Wrap interpolated literal strings in (string) so we can - # distinguish them from the surrounding text (issue #38501) - # Ie, "$("str")" vs "str" - # https://github.com/JuliaLang/julia/pull/38692 - ex = Expr(:string, ex) - end + end + # ex not a string + if @isexpr(ex, :parens, 1) + ex = _strip_parens(ex) + if ex isa String + # Wrap interpolated literal strings in (string) so we can + # distinguish them from the surrounding text (issue #38501) + # Ie, "$("str")" vs "str" + # https://github.com/JuliaLang/julia/pull/38692 + ex = Expr(:string, ex) end - push!(args2, ex) - i += 1 end + @assert ex !== nothing + pushfirst!(ret.args, ex) + r = iterate(it, state) end - if length(args2) == 1 && args2[1] isa String + + if length(ret.args) == 1 && ret.args[1] isa String # If there's a single string remaining after joining, we unwrap # to give a string literal. # """\n a\n b""" ==> "a\nb" - return only(args2) + return only(ret.args) else # This only happens when the kind is K"string" or when an error has occurred. - return Expr(:string, args2...) + return ret end end # Shared fixups for Expr children in cases where the type of the parent node # affects the child layout. -function _fixup_Expr_children!(head, loc, args) +function fixup_Expr_child(head::SyntaxHead, @nospecialize(arg), first::Bool) + isa(arg, Expr) || return arg k = kind(head) eq_to_kw_in_call = ((k == K"call" || k == K"dotcall") && is_prefix_call(head)) || k == K"ref" eq_to_kw_in_params = k != K"vect" && k != K"curly" && k != K"braces" && k != K"ref" coalesce_dot = k in KSet"call dotcall curly" || - (k == K"quote" && flags(head) == COLON_QUOTE) - for i in 1:length(args) - arg = args[i] - was_parens = @isexpr(arg, :parens) - arg = _strip_parens(arg) - if @isexpr(arg, :(=)) && eq_to_kw_in_call && i > 1 - arg = Expr(:kw, arg.args...) - elseif k != K"parens" && @isexpr(arg, :., 1) && arg.args[1] isa Tuple - h, a = arg.args[1]::Tuple{SyntaxHead,Any} - arg = ((!was_parens && coalesce_dot && i == 1) || - (k == K"comparison" && iseven(i)) || - is_syntactic_operator(h)) ? - Symbol(".", a) : Expr(:., a) - elseif @isexpr(arg, :parameters) && eq_to_kw_in_params - pargs = arg.args - for j = 1:length(pargs) - pj = pargs[j] - if @isexpr(pj, :(=)) - pargs[j] = Expr(:kw, pj.args...) - end + (k == K"quote" && has_flags(head, COLON_QUOTE)) + was_parens = @isexpr(arg, :parens) + arg = _strip_parens(arg) + if @isexpr(arg, :(=)) && eq_to_kw_in_call && !first + arg = Expr(:kw, arg.args...) + elseif k != K"parens" && @isexpr(arg, :., 1) && arg.args[1] isa Tuple + # This undoes the "Hack" below" + h, a = arg.args[1]::Tuple{SyntaxHead,Any} + arg = ((!was_parens && coalesce_dot && first) || + is_syntactic_operator(h)) ? + Symbol(".", a) : Expr(:., a) + elseif @isexpr(arg, :parameters) && eq_to_kw_in_params + pargs = arg.args + for j = 1:length(pargs) + pj = pargs[j] + if @isexpr(pj, :(=)) + pargs[j] = Expr(:kw, pj.args...) end - elseif k == K"let" && i == 1 && @isexpr(arg, :block) - filter!(a -> !(a isa LineNumberNode), arg.args) end - args[i] = arg end - return args + return arg end # Remove the `do` block from the final position in a function/macro call arg list -function _extract_do_lambda!(args) +function _extract_do_lambda!(args::Vector{Any}) if length(args) > 1 && Meta.isexpr(args[end], :do_lambda) do_ex = pop!(args)::Expr return Expr(:->, do_ex.args...) @@ -193,7 +186,7 @@ function _extract_do_lambda!(args) end end -function _append_iterspec!(args, ex) +function _append_iterspec!(args::Vector{Any}, @nospecialize(ex)) if @isexpr(ex, :iteration) for iter in ex.args::Vector{Any} push!(args, Expr(:(=), iter.args...)) @@ -204,48 +197,131 @@ function _append_iterspec!(args, ex) return args end +function parseargs!(retexpr::Expr, loc::LineNumberNode, cursor::Union{RedTreeCursor, SyntaxNode}, source::SourceFile, txtbuf::Vector{UInt8}, txtbuf_offset::UInt32) + args = retexpr.args + firstchildhead = head(cursor) + firstchildrange::UnitRange{UInt32} = byte_range(cursor) + itr = reverse_nontrivia_children(cursor) + r = iterate(itr) + while r !== nothing + (child, state) = r + r = iterate(itr, state) + expr = node_to_expr(child, source, txtbuf, txtbuf_offset) + @assert expr !== nothing + firstchildhead = head(child) + firstchildrange = byte_range(child) + pushfirst!(args, fixup_Expr_child(head(cursor), expr, r === nothing)) + end + return (firstchildhead, firstchildrange) +end + # Convert internal node of the JuliaSyntax parse tree to an Expr -function _internal_node_to_Expr(source, srcrange, head, childranges, childheads, args) - k = kind(head) - if (k == K"var" || k == K"char") && length(args) == 1 - # Ideally we'd like `@check length(args) == 1` as an invariant for all - # K"var" and K"char" nodes, but this discounts having embedded error - # nodes when ignore_errors=true is set. - return args[1] - elseif k == K"string" - return _string_to_Expr(args) +function node_to_expr(cursor::Union{RedTreeCursor, SyntaxNode}, source::SourceFile, txtbuf::Vector{UInt8}, txtbuf_offset::UInt32=UInt32(0)) + if !should_include_node(cursor) + return nothing + end + + nodehead = head(cursor) + k = kind(cursor) + srcrange::UnitRange{UInt32} = byte_range(cursor) + if is_leaf(cursor) + if k == K"MacroName" && view(source, srcrange) == "." + return Symbol("@__dot__") + elseif is_error(k) + return k == K"error" ? + Expr(:error) : + Expr(:error, "$(_token_error_descriptions[k]): `$(source[srcrange])`") + else + val = parse_julia_literal(txtbuf, head(cursor), srcrange .+ txtbuf_offset) + if val isa Union{Int128,UInt128,BigInt} + # Ignore the values of large integers and convert them back to + # symbolic/textural form for compatibility with the Expr + # representation of these. + str = replace(source[srcrange], '_'=>"") + macname = val isa Int128 ? Symbol("@int128_str") : + val isa UInt128 ? Symbol("@uint128_str") : + Symbol("@big_str") + return Expr(:macrocall, GlobalRef(Core, macname), nothing, str) + else + return val + end + end + end + + if k == K"string" + return _string_to_Expr(cursor, source, txtbuf, txtbuf_offset) end loc = source_location(LineNumberNode, source, first(srcrange)) - endloc = source_location(LineNumberNode, source, last(srcrange)) if k == K"cmdstring" - return Expr(:macrocall, GlobalRef(Core, Symbol("@cmd")), loc, _string_to_Expr(args)) + return Expr(:macrocall, GlobalRef(Core, Symbol("@cmd")), loc, + _string_to_Expr(cursor, source, txtbuf, txtbuf_offset)) end - _fixup_Expr_children!(head, loc, args) - - headstr = untokenize(head, include_flag_suff=false) + headstr = untokenize(nodehead, include_flag_suff=false) headsym = !isnothing(headstr) ? Symbol(headstr) : error("Can't untokenize head of kind $(k)") + retexpr = Expr(headsym) - if k == K"?" - headsym = :if + # Block gets special handling for extra line number nodes + if k == K"block" || (k == K"toplevel" && !has_flags(nodehead, TOPLEVEL_SEMICOLONS_FLAG)) + args = retexpr.args + for child in reverse_nontrivia_children(cursor) + expr = node_to_expr(child, source, txtbuf, txtbuf_offset) + @assert expr !== nothing + # K"block" does not have special first-child handling, so we do not need to keep track of that here + pushfirst!(args, fixup_Expr_child(head(cursor), expr, false)) + pushfirst!(args, source_location(LineNumberNode, source, first(byte_range(child)))) + end + isempty(args) && push!(args, loc) + if k == K"block" && has_flags(nodehead, PARENS_FLAG) + popfirst!(args) + end + return retexpr + end + + # Now recurse to parse all arguments + (firstchildhead, firstchildrange) = parseargs!(retexpr, loc, cursor, source, txtbuf, txtbuf_offset) + + return _node_to_expr(retexpr, loc, srcrange, + firstchildhead, firstchildrange, + nodehead, source) +end + +# Split out from the above for codesize reasons, to avoid specialization on multiple +# tree types. +@noinline function _node_to_expr(retexpr::Expr, loc::LineNumberNode, + srcrange::UnitRange{UInt32}, + firstchildhead::SyntaxHead, + firstchildrange::UnitRange{UInt32}, + nodehead::SyntaxHead, + source::SourceFile) + args = retexpr.args + k = kind(nodehead) + endloc = source_location(LineNumberNode, source, last(srcrange)) + if (k == K"var" || k == K"char") && length(retexpr.args) == 1 + # `var` and `char` nodes have a single argument which is the value. + # However, errors can add additional errors tokens which we represent + # as e.g. `Expr(:var, ..., Expr(:error))`. + return retexpr.args[1] + elseif k == K"?" + retexpr.head = :if elseif k == K"op=" && length(args) == 3 lhs = args[1] op = args[2] rhs = args[3] headstr = string(args[2], '=') - if is_dotted(head) + if is_dotted(nodehead) headstr = '.'*headstr end - headsym = Symbol(headstr) - args = Any[lhs, rhs] + retexpr.head = Symbol(headstr) + retexpr.args = Any[lhs, rhs] elseif k == K"macrocall" if length(args) >= 2 a2 = args[2] - if @isexpr(a2, :macrocall) && kind(childheads[1]) == K"CmdMacroName" + if @isexpr(a2, :macrocall) && kind(firstchildhead) == K"CmdMacroName" # Fix up for custom cmd macros like foo`x` args[2] = a2.args[3] end @@ -254,54 +330,41 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads, _reorder_parameters!(args, 2) insert!(args, 2, loc) if do_lambda isa Expr - return Expr(:do, Expr(headsym, args...), do_lambda) - end - elseif k == K"block" || (k == K"toplevel" && !has_flags(head, TOPLEVEL_SEMICOLONS_FLAG)) - if isempty(args) - push!(args, loc) - else - resize!(args, 2*length(args)) - for i = length(childranges):-1:1 - args[2*i] = args[i] - args[2*i-1] = source_location(LineNumberNode, source, first(childranges[i])) - end - end - if k == K"block" && has_flags(head, PARENS_FLAG) - popfirst!(args) + return Expr(:do, retexpr, do_lambda) end elseif k == K"doc" - headsym = :macrocall - args = [GlobalRef(Core, Symbol("@doc")), loc, args...] + retexpr.head = :macrocall + retexpr.args = [GlobalRef(Core, Symbol("@doc")), loc, args...] elseif k == K"dotcall" || k == K"call" # Julia's standard `Expr` ASTs have children stored in a canonical # order which is often not always source order. We permute the children # here as necessary to get the canonical order. - if is_infix_op_call(head) || is_postfix_op_call(head) + if is_infix_op_call(nodehead) || is_postfix_op_call(nodehead) args[2], args[1] = args[1], args[2] end # Lower (call x ') to special ' head - if is_postfix_op_call(head) && args[1] == Symbol("'") + if is_postfix_op_call(nodehead) && args[1] == Symbol("'") popfirst!(args) - headsym = Symbol("'") + retexpr.head = Symbol("'") end do_lambda = _extract_do_lambda!(args) # Move parameters blocks to args[2] _reorder_parameters!(args, 2) - if headsym === :dotcall + if retexpr.head === :dotcall funcname = args[1] - if is_prefix_call(head) - headsym = :. - args = Any[funcname, Expr(:tuple, args[2:end]...)] + if is_prefix_call(nodehead) + retexpr.head = :. + retexpr.args = Any[funcname, Expr(:tuple, args[2:end]...)] else # operator calls - headsym = :call + retexpr.head = :call if funcname isa Symbol args[1] = Symbol(:., funcname) end # else funcname could be an Expr(:error), just propagate it end end if do_lambda isa Expr - return Expr(:do, Expr(headsym, args...), do_lambda) + return Expr(:do, retexpr, do_lambda) end elseif k == K"." if length(args) == 2 @@ -312,7 +375,7 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads, elseif length(args) == 1 # Hack: Here we preserve the head of the operator to determine whether # we need to coalesce it with the dot into a single symbol later on. - args[1] = (childheads[1], args[1]) + args[1] = (firstchildhead, args[1]) end elseif k == K"ref" || k == K"curly" # Move parameters blocks to args[2] @@ -335,11 +398,11 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads, if @isexpr(a2, :braces) a2a = a2.args _reorder_parameters!(a2a, 2) - args = Any[args[1], a2a...] + retexpr.args = Any[args[1], a2a...] end end elseif k == K"catch" - if kind(childheads[1]) == K"Placeholder" + if kind(firstchildhead) == K"Placeholder" args[1] = false end elseif k == K"try" @@ -367,7 +430,8 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads, @assert false "Illegal $a subclause in `try`" end end - args = Any[try_, catch_var, catch_] + empty!(args) + push!(args, try_, catch_var, catch_) if finally_ !== false || else_ !== false push!(args, finally_) if else_ !== false @@ -389,13 +453,13 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads, return gen elseif k == K"filter" @assert length(args) == 2 - args = _append_iterspec!(Any[args[2]], args[1]) + retexpr.args = _append_iterspec!(Any[args[2]], args[1]) elseif k == K"nrow" || k == K"ncat" # For lack of a better place, the dimension argument to nrow/ncat # is stored in the flags - pushfirst!(args, numeric_flags(flags(head))) + pushfirst!(args, numeric_flags(flags(nodehead))) elseif k == K"typed_ncat" - insert!(args, 2, numeric_flags(flags(head))) + insert!(args, 2, numeric_flags(flags(nodehead))) elseif k == K"elseif" # Block for conditional's source location args[1] = Expr(:block, loc, args[1]) @@ -406,8 +470,8 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads, # compatibility. We should consider deleting this special case in # the future as a minor change. if length(a1.args) == 1 && - (!has_flags(childheads[1], PARENS_FLAG) || - !has_flags(childheads[1], TRAILING_COMMA_FLAG)) && + (!has_flags(firstchildhead, PARENS_FLAG) || + !has_flags(firstchildhead, TRAILING_COMMA_FLAG)) && !Meta.isexpr(a1.args[1], :parameters) # `(a) -> c` is parsed without tuple on lhs in Expr form args[1] = a1.args[1] @@ -419,7 +483,7 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads, else a111 = only(a11.args) assgn = @isexpr(a111, :kw) ? Expr(:(=), a111.args...) : a111 - argloc = source_location(LineNumberNode, source, last(childranges[1])) + argloc = source_location(LineNumberNode, source, last(firstchildrange)) args[1] = Expr(:block, a1.args[2], argloc, assgn) end end @@ -433,12 +497,12 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads, end elseif k == K"function" if length(args) > 1 - if has_flags(head, SHORT_FORM_FUNCTION_FLAG) + if has_flags(nodehead, SHORT_FORM_FUNCTION_FLAG) a2 = args[2] if !@isexpr(a2, :block) args[2] = Expr(:block, a2) end - headsym = :(=) + retexpr.head = :(=) else a1 = args[1] if @isexpr(a1, :tuple) @@ -451,31 +515,36 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads, end end end - pushfirst!((args[2]::Expr).args, loc) + arg2 = args[2] + # Only push if this is an Expr - could be an ErrorVal + isa(arg2, Expr) && pushfirst!(arg2.args, loc) end elseif k == K"macro" if length(args) > 1 pushfirst!((args[2]::Expr).args, loc) end elseif k == K"module" - pushfirst!(args, !has_flags(head, BARE_MODULE_FLAG)) + pushfirst!(args, !has_flags(nodehead, BARE_MODULE_FLAG)) pushfirst!((args[3]::Expr).args, loc) elseif k == K"inert" return QuoteNode(only(args)) - elseif k == K"quote" && length(args) == 1 - a1 = only(args) - if !(a1 isa Expr || a1 isa QuoteNode || a1 isa Bool) - # Flisp parser does an optimization here: simple values are stored - # as inert QuoteNode rather than in `Expr(:quote)` quasiquote - return QuoteNode(a1) + elseif k == K"quote" + if length(args) == 1 + a1 = only(args) + if !(a1 isa Expr || a1 isa QuoteNode || a1 isa Bool) + # Flisp parser does an optimization here: simple values are stored + # as inert QuoteNode rather than in `Expr(:quote)` quasiquote + return QuoteNode(a1) + end end elseif k == K"do" # Temporary head which is picked up by _extract_do_lambda - headsym = :do_lambda + retexpr.head = :do_lambda elseif k == K"let" a1 = args[1] if @isexpr(a1, :block) a1a = (args[1]::Expr).args + filter!(a -> !(a isa LineNumberNode), a1a) # Ugly logic to strip the Expr(:block) in certain cases for compatibility if length(a1a) == 1 a = a1a[1] @@ -489,17 +558,17 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads, a1 = args[1] if @isexpr(a1, :const) # Normalize `local const` to `const local` - args[1] = Expr(headsym, (a1::Expr).args...) - headsym = :const + args[1] = Expr(retexpr.head, (a1::Expr).args...) + retexpr.head = :const elseif @isexpr(a1, :tuple) # Normalize `global (x, y)` to `global x, y` - args = a1.args + retexpr.args = a1.args end end elseif k == K"return" && isempty(args) push!(args, nothing) elseif k == K"juxtapose" - headsym = :call + retexpr.head = :call pushfirst!(args, :*) elseif k == K"struct" @assert args[2].head == :block @@ -515,9 +584,9 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads, end end args[2] = fields - pushfirst!(args, has_flags(head, MUTABLE_FLAG)) + pushfirst!(args, has_flags(nodehead, MUTABLE_FLAG)) elseif k == K"importpath" - headsym = :. + retexpr.head = :. for i = 1:length(args) ai = args[i] if ai isa QuoteNode @@ -529,72 +598,41 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads, elseif k == K"wrapper" # This should only happen for errors wrapped next to what should have # been single statements or atoms - represent these as blocks. - headsym = :block + retexpr.head = :block + elseif k == K"comparison" + for i = 2:2:length(args) + arg = args[i] + if @isexpr(arg, :., 1) + args[i] = Symbol(".", arg.args[1]) + end + end end - return Expr(headsym, args...) -end - - -# Stack entry for build_tree Expr conversion. -# We'd use `Tuple{UnitRange{Int},SyntaxHead,Any}` instead, but that's an -# abstract type due to the `Any` and tuple covariance which destroys -# performance. -struct _BuildExprStackEntry - srcrange::UnitRange{Int} - head::SyntaxHead - ex::Any + return retexpr end function build_tree(::Type{Expr}, stream::ParseStream; filename=nothing, first_line=1, kws...) source = SourceFile(stream, filename=filename, first_line=first_line) txtbuf = unsafe_textbuf(stream) - args = Any[] - childranges = UnitRange{Int}[] - childheads = SyntaxHead[] - entry = build_tree(_BuildExprStackEntry, stream; kws...) do head, srcrange, nodechildren - if is_trivia(head) && !is_error(head) - return nothing - end - k = kind(head) - if isnothing(nodechildren) - ex = _leaf_to_Expr(source, txtbuf, 0, head, srcrange, nothing) - else - resize!(childranges, length(nodechildren)) - resize!(childheads, length(nodechildren)) - resize!(args, length(nodechildren)) - for (i,c) in enumerate(nodechildren) - childranges[i] = c.srcrange - childheads[i] = c.head - args[i] = c.ex - end - ex = _internal_node_to_Expr(source, srcrange, head, childranges, childheads, args) - end - return _BuildExprStackEntry(srcrange, head, ex) - end - loc = source_location(LineNumberNode, source, first(entry.srcrange)) - only(_fixup_Expr_children!(SyntaxHead(K"None",EMPTY_FLAGS), loc, Any[entry.ex])) -end - -function _to_expr(node) - file = sourcefile(node) - if is_leaf(node) - txtbuf_offset, txtbuf = _unsafe_wrap_substring(sourcetext(file)) - return _leaf_to_Expr(file, txtbuf, txtbuf_offset, head(node), byte_range(node), node) + cursor = RedTreeCursor(stream) + wrapper_head = SyntaxHead(K"wrapper",EMPTY_FLAGS) + if has_toplevel_siblings(cursor) + entry = Expr(:block) + for child in + Iterators.filter(should_include_node, reverse_toplevel_siblings(cursor)) + pushfirst!(entry.args, fixup_Expr_child(wrapper_head, node_to_expr(child, source, txtbuf), false)) + end + length(entry.args) == 1 && (entry = only(entry.args)) + else + entry = fixup_Expr_child(wrapper_head, node_to_expr(cursor, source, txtbuf), false) end - cs = children(node) - args = Any[_to_expr(c) for c in cs] - _internal_node_to_Expr(file, byte_range(node), head(node), byte_range.(cs), head.(cs), args) -end - -function to_expr(node) - ex = _to_expr(node) - loc = source_location(LineNumberNode, node) - only(_fixup_Expr_children!(SyntaxHead(K"None",EMPTY_FLAGS), loc, Any[ex])) + return entry end function Base.Expr(node::SyntaxNode) - to_expr(node) + source = sourcefile(node) + txtbuf_offset, txtbuf = _unsafe_wrap_substring(sourcetext(source)) + wrapper_head = SyntaxHead(K"wrapper",EMPTY_FLAGS) + return fixup_Expr_child(wrapper_head, node_to_expr(node, source, txtbuf, UInt32(txtbuf_offset)), false) end - diff --git a/JuliaSyntax/src/green_tree.jl b/JuliaSyntax/src/green_node.jl similarity index 67% rename from JuliaSyntax/src/green_tree.jl rename to JuliaSyntax/src/green_node.jl index 4164529a51d71..61bdbb01c5f92 100644 --- a/JuliaSyntax/src/green_tree.jl +++ b/JuliaSyntax/src/green_node.jl @@ -1,24 +1,10 @@ """ - GreenNode(head, span) - GreenNode(head, children...) + struct GreenNode -A "green tree" is a lossless syntax tree which overlays all the source text. -The most basic properties of a green tree are that: - -* Nodes cover a contiguous span of bytes in the text -* Sibling nodes are ordered in the same order as the text - -As implementation choices, we choose that: - -* Nodes are immutable and don't know their parents or absolute position, so can - be cached and reused -* Nodes are homogeneously typed at the language level so they can be stored - concretely, with the `head` defining the node type. Normally this would - include a "syntax kind" enumeration, but it can also include flags and record - information the parser knew about the layout of the child nodes. -* For simplicity and uniformity, leaf nodes cover a single token in the source. - This is like rust-analyzer, but different from Roslyn where leaves can - include syntax trivia. +An explicit pointer-y representation of the green tree produced by the parser. +See [`RawGreenNode`](@ref) for documentation on working with the implicit green +tree directly. However, this representation is useful for introspection as it +provides O(1) access to the children (as well as forward iteration). """ struct GreenNode{Head} head::Head @@ -46,7 +32,7 @@ span(node::GreenNode) = node.span Base.getindex(node::GreenNode, i::Int) = children(node)[i] Base.getindex(node::GreenNode, rng::UnitRange) = view(children(node), rng) Base.firstindex(node::GreenNode) = 1 -Base.lastindex(node::GreenNode) = length(children(node)) +Base.lastindex(node::GreenNode) = children(node) === nothing ? 0 : length(children(node)) """ Get absolute position and span of the child of `node` at the given tree `path`. @@ -132,10 +118,38 @@ function Base.show(io::IO, ::MIME"text/plain", node::GreenNode, str::AbstractStr _show_green_node(io, node, "", 1, str, show_trivia) end -function build_tree(::Type{GreenNode}, stream::ParseStream; kws...) - build_tree(GreenNode{SyntaxHead}, stream; kws...) do h, srcrange, cs - span = length(srcrange) - isnothing(cs) ? GreenNode(h, span) : - GreenNode(h, span, collect(GreenNode{SyntaxHead}, cs)) +function GreenNode(cursor::GreenTreeCursor) + chead = head(cursor) + T = typeof(chead) + if is_leaf(cursor) + return GreenNode{T}(head(cursor), span(cursor), nothing) + else + children = GreenNode{T}[] + for child in reverse(cursor) + pushfirst!(children, GreenNode(child)) + end + return GreenNode{T}(head(cursor), span(cursor), children) + end +end + +function build_tree(T::Type{GreenNode}, stream::ParseStream; kws...) + cursor = GreenTreeCursor(stream) + if has_toplevel_siblings(cursor) + # There are multiple toplevel nodes, e.g. because we're using this + # to test a partial parse. Wrap everything in K"wrapper" + all_processed = 0 + local cs + for child in reverse_toplevel_siblings(cursor) + c = GreenNode(child) + if !@isdefined(cs) + cs = GreenNode{SyntaxHead}[c] + else + pushfirst!(cs, c) + end + end + @assert length(cs) != 1 + return GreenNode(SyntaxHead(K"wrapper", NON_TERMINAL_FLAG), stream.next_byte-1, cs) + else + return GreenNode(cursor) end end diff --git a/JuliaSyntax/src/kinds.jl b/JuliaSyntax/src/kinds.jl index c7d27e3597a6b..9d8999c7dd13d 100644 --- a/JuliaSyntax/src/kinds.jl +++ b/JuliaSyntax/src/kinds.jl @@ -27,7 +27,7 @@ primitive type Kind 16 end const _kind_str_to_int = Dict{String,UInt16}() const _kind_int_to_str = Dict{UInt16,String}() const _kind_modules = Dict{Int,Union{Symbol,Module}}( - 0=>:JuliaSyntax, + 0=>nameof(@__MODULE__), 1=>:JuliaLowering, 2=>:JuliaSyntaxFormatter ) @@ -49,7 +49,7 @@ function Kind(s::AbstractString) Kind(i) end -Base.string(x::Kind) = _kind_int_to_str[reinterpret(UInt16, x)] +Base.string(x::Kind) = get(_kind_int_to_str, reinterpret(UInt16, x), "") Base.print(io::IO, x::Kind) = print(io, string(x)) Base.isless(x::Kind, y::Kind) = reinterpret(UInt16, x) < reinterpret(UInt16, y) @@ -127,7 +127,7 @@ end """ register_kinds!(mod, module_id, names) -Register custom `Kind`s with the given `names`, belonging to a module `mod`. +Register custom `Kind`s with the given `names`, belonging to a module `mod`. `names` is an array of arbitrary strings. In order for kinds to be represented by a small number of bits, some nontrivial diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/parse_stream.jl index 0c57c2a43f390..1000fdaa123aa 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/parse_stream.jl @@ -71,6 +71,11 @@ Set for K"module" when it's not bare (`module`, not `baremodule`) """ const BARE_MODULE_FLAG = RawFlags(1<<5) +""" +Set for nodes that are non-terminals +""" +const NON_TERMINAL_FLAG = RawFlags(1<<7) + # Flags holding the dimension of an nrow or other UInt8 not held in the source # TODO: Given this is only used for nrow/ncat, we could actually use all the flags? const NUMERIC_FLAGS = RawFlags(RawFlags(0xff)<<8) @@ -282,25 +287,105 @@ preceding_whitespace(tok::SyntaxToken) = tok.preceding_whitespace #------------------------------------------------------------------------------- """ -Range in the source text which will become a node in the tree. Can be either a -token (leaf node of the tree) or an interior node, depending on how the -start_mark compares to previous nodes. -""" -struct TaggedRange - head::SyntaxHead # Kind,flags - # The following field is used for one of two things: - # - For leaf nodes it's an index in the tokens array - # - For non-leaf nodes it points to the index of the first child - first_token::UInt32 - last_token::UInt32 + RawGreenNode(head::SyntaxHead, byte_span::UInt32, orig_kind::Kind) # Terminal + RawGreenNode(head::SyntaxHead, byte_span::UInt32, nchildren::UInt32) # Non-terminal + +A "green tree" is a lossless syntax tree which overlays all the source text. +The most basic properties of a green tree are that: + +* Nodes cover a contiguous span of bytes in the text +* Sibling nodes are ordered in the same order as the text + +As implementation choices, we choose that: + +* Nodes are immutable and don't know their parents or absolute position, so can + be cached and reused +* Nodes are homogeneously typed at the language level so they can be stored + concretely, with the `head` defining the node type. Normally this would + include a "syntax kind" enumeration, but it can also include flags and record + information the parser knew about the layout of the child nodes. +* For simplicity and uniformity, leaf nodes cover a single token in the source. + This is like rust-analyzer, but different from Roslyn where leaves can + include syntax trivia. +* The parser produces a single buffer of `RawGreenNode` which encodes the tree. + There are higher level accessors, which make working with this tree easier. +""" +struct RawGreenNode + head::SyntaxHead # Kind,flags + byte_span::UInt32 # Number of bytes covered by this range + # If NON_TERMINAL_FLAG is set, this is the total number of child nodes + # Otherwise this is a terminal node (i.e. a token) and this is orig_kind + node_span_or_orig_kind::UInt32 + + # Constructor for terminal nodes (tokens) + function RawGreenNode(head::SyntaxHead, byte_span::Integer, orig_kind::Kind) + @assert (flags(head) & NON_TERMINAL_FLAG) == 0 + new(head, UInt32(byte_span), UInt32(reinterpret(UInt16, orig_kind))) + end + + # Constructor for non-terminal nodes - automatically sets NON_TERMINAL_FLAG + function RawGreenNode(head::SyntaxHead, byte_span::Integer, node_span::Integer) + h = SyntaxHead(kind(head), flags(head) | NON_TERMINAL_FLAG) + new(h, UInt32(byte_span), UInt32(node_span)) + end + + global reset_node + function reset_node(node::RawGreenNode, kind, flags) + new(_reset_node_head(node, kind, flags), + getfield(node, :byte_span), + getfield(node, :node_span_or_orig_kind)) + end end -head(range::TaggedRange) = range.head +function _reset_node_head(node, k, f) + if !isnothing(f) + f = RawFlags(f) + @assert (f & NON_TERMINAL_FLAG) == 0 + f |= flags(node) & NON_TERMINAL_FLAG + else + f = flags(node) + end + h = SyntaxHead(isnothing(k) ? kind(node) : k, f) +end + +Base.summary(node::RawGreenNode) = summary(node.head) +function Base.show(io::IO, node::RawGreenNode) + print(io, summary(node), " (", node.byte_span, " bytes,") + if is_terminal(node) + print(io, " orig_kind=", node.orig_kind, ")") + else + print(io, " ", node.node_span, " children)") + end +end + +function Base.getproperty(rgn::RawGreenNode, name::Symbol) + if name === :node_span + has_flags(getfield(rgn, :head), NON_TERMINAL_FLAG) || return UInt32(0) # Leaf nodes have no children + return getfield(rgn, :node_span_or_orig_kind) + elseif name === :orig_kind + has_flags(getfield(rgn, :head), NON_TERMINAL_FLAG) && error("Cannot access orig_kind for non-terminal node") + return Kind(getfield(rgn, :node_span_or_orig_kind)) + end + getfield(rgn, name) +end + +head(range::RawGreenNode) = range.head + +# Helper functions for unified output +is_terminal(node::RawGreenNode) = !has_flags(node.head, NON_TERMINAL_FLAG) +is_non_terminal(node::RawGreenNode) = has_flags(node.head, NON_TERMINAL_FLAG) #------------------------------------------------------------------------------- struct ParseStreamPosition - token_index::UInt32 # Index of last token in output - range_index::UInt32 + """ + The current position in the byte stream, i.e. the byte at `byte_index` is + the first byte of the next token to be parsed. + """ + byte_index::UInt32 + """ + The total number of nodes (terminal + non-terminal) in the output so far. + """ + node_index::UInt32 end const NO_POSITION = ParseStreamPosition(0, 0) @@ -349,10 +434,9 @@ mutable struct ParseStream lookahead_index::Int # Pool of stream positions for use as working space in parsing position_pool::Vector{Vector{ParseStreamPosition}} - # Buffer of finalized tokens - tokens::Vector{SyntaxToken} - # Parser output as an ordered sequence of ranges, parent nodes after children. - ranges::Vector{TaggedRange} + output::Vector{RawGreenNode} + # Current byte position in the output (the next byte to be written) + next_byte::Int # Parsing diagnostics (errors/warnings etc) diagnostics::Vector{Diagnostic} # Counter for number of peek()s we've done without making progress via a bump() @@ -372,17 +456,16 @@ mutable struct ParseStream # numbers. This means we're inexact for old dev versions but that seems # like an acceptable tradeoff. ver = (version.major, version.minor) - # Initial sentinel token containing the first byte of the first real token. - sentinel = SyntaxToken(SyntaxHead(K"TOMBSTONE", EMPTY_FLAGS), - K"TOMBSTONE", false, next_byte) + # Initial sentinel node (covering all ignored bytes before the first token) + sentinel = RawGreenNode(SyntaxHead(K"TOMBSTONE", EMPTY_FLAGS), next_byte-1, K"TOMBSTONE") new(text_buf, text_root, lexer, Vector{SyntaxToken}(), 1, Vector{Vector{ParseStreamPosition}}(), - SyntaxToken[sentinel], - Vector{TaggedRange}(), + RawGreenNode[sentinel], + next_byte, # Initialize next_byte from the parameter Vector{Diagnostic}(), 0, ver) @@ -427,7 +510,7 @@ function ParseStream(io::IO; version=VERSION) end function Base.show(io::IO, mime::MIME"text/plain", stream::ParseStream) - println(io, "ParseStream at position $(_next_byte(stream))") + println(io, "ParseStream at position $(stream.next_byte)") end function show_diagnostics(io::IO, stream::ParseStream) @@ -448,19 +531,11 @@ function release_positions(stream, positions) end #------------------------------------------------------------------------------- -# Return true when a token was emitted last at stream position `pos` +# Return true when a terminal (token) was emitted last at stream position `pos` function token_is_last(stream, pos) - return pos.range_index == 0 || - pos.token_index > stream.ranges[pos.range_index].last_token -end - -# Compute the first byte of a token at given index `i` -function token_first_byte(stream, i) - stream.tokens[i-1].next_byte -end - -function token_last_byte(stream::ParseStream, i) - stream.tokens[i].next_byte - 1 + # In the unified structure, check if the node at pos is a terminal + return pos.node_index > 0 && pos.node_index <= length(stream.output) && + is_terminal(stream.output[pos.node_index]) end function lookahead_token_first_byte(stream, i) @@ -507,7 +582,7 @@ end # Return the index of the next byte of the input function _next_byte(stream) - last(stream.tokens).next_byte + stream.next_byte end # Find the index of the next nontrivia token @@ -571,7 +646,7 @@ end @noinline function _parser_stuck_error(stream) # Optimization: emit unlikely errors in a separate function - error("The parser seems stuck at byte $(_next_byte(stream))") + error("The parser seems stuck at byte $(stream.next_byte)") end """ @@ -644,18 +719,19 @@ Retroactively inspecting or modifying the parser's output can be confusing, so using this function should be avoided where possible. """ function peek_behind(stream::ParseStream, pos::ParseStreamPosition) - if token_is_last(stream, pos) && pos.token_index > 0 - t = stream.tokens[pos.token_index] - return (kind=kind(t), - flags=flags(t), - orig_kind=t.orig_kind, - is_leaf=true) - elseif !isempty(stream.ranges) && pos.range_index > 0 - r = stream.ranges[pos.range_index] - return (kind=kind(r), - flags=flags(r), - orig_kind=K"None", - is_leaf=false) + if pos.node_index > 0 && pos.node_index <= length(stream.output) + node = stream.output[pos.node_index] + if is_terminal(node) + return (kind=kind(node), + flags=flags(node), + orig_kind=node.orig_kind, + is_leaf=true) + else + return (kind=kind(node), + flags=flags(node), + orig_kind=K"None", + is_leaf=false) + end else return (kind=K"None", flags=EMPTY_FLAGS, @@ -664,70 +740,57 @@ function peek_behind(stream::ParseStream, pos::ParseStreamPosition) end end +""" + first_child_position(stream::ParseStream, pos::ParseStreamPosition) + +Find the first non-trivia child of this node (in the GreenTree/RedTree sense) and return +its position. +""" function first_child_position(stream::ParseStream, pos::ParseStreamPosition) - ranges = stream.ranges - @assert pos.range_index > 0 - parent = ranges[pos.range_index] - # Find the first nontrivia range which is a child of this range but not a - # child of the child - c = 0 - for i = pos.range_index-1:-1:1 - if ranges[i].first_token < parent.first_token - break - end - if (c == 0 || ranges[i].first_token < ranges[c].first_token) && !is_trivia(ranges[i]) - c = i - end + output = stream.output + @assert pos.node_index > 0 + cursor = RedTreeCursor(GreenTreeCursor(output, pos.node_index), pos.byte_index-UInt32(1)) + candidate = nothing + for child in reverse(cursor) + is_trivia(child) && continue + candidate = child end - # Find first nontrivia token - t = 0 - for i = parent.first_token:parent.last_token - if !is_trivia(stream.tokens[i]) - t = i - break + candidate !== nothing && return ParseStreamPosition(candidate.byte_end+UInt32(1), candidate.green.position) + + # No children found - return the first non-trivia *token* (even if it + # is the child of a non-terminal trivia node (e.g. an error)). + byte_end = pos.byte_index + for i in pos.node_index-1:-1:(pos.node_index - treesize(cursor)) + node = output[i] + if is_terminal(node) + if !is_trivia(node) + return ParseStreamPosition(byte_end, i) + end + byte_end -= node.byte_span end end - if c == 0 || (t != 0 && ranges[c].first_token > t) - # Return leaf node at `t` - return ParseStreamPosition(t, 0) - else - # Return interior node at `c` - return ParseStreamPosition(ranges[c].last_token, c) - end + # Still none found. Return a sentinel value + return ParseStreamPosition(0, 0) end -function last_child_position(stream::ParseStream, pos::ParseStreamPosition) - ranges = stream.ranges - @assert pos.range_index > 0 - parent = ranges[pos.range_index] - # Find the last nontrivia range which is a child of this range - c = 0 - if pos.range_index > 1 - i = pos.range_index-1 - if ranges[i].first_token >= parent.first_token - # Valid child of current range - c = i - end - end - - # Find last nontrivia token - t = 0 - for i = parent.last_token:-1:parent.first_token - if !is_trivia(stream.tokens[i]) - t = i - break - end - end +""" + first_child_position(stream::ParseStream, pos::ParseStreamPosition) - if c == 0 || (t != 0 && ranges[c].last_token < t) - # Return leaf node at `t` - return ParseStreamPosition(t, 0) - else - # Return interior node at `c` - return ParseStreamPosition(ranges[c].last_token, c) + Find the last non-trivia child of this node (in the GreenTree/RedTree sense) and + return its position (i.e. the position as if that child had been the last thing parsed). +""" +function last_child_position(stream::ParseStream, pos::ParseStreamPosition) + output = stream.output + @assert pos.node_index > 0 + cursor = RedTreeCursor(GreenTreeCursor(output, pos.node_index), pos.byte_index-1) + candidate = nothing + for child in reverse(cursor) + is_trivia(child) && continue + return ParseStreamPosition(child.byte_end+UInt32(1), child.green.position) end + return ParseStreamPosition(0, 0) end # Get last position in stream "of interest", skipping @@ -736,24 +799,34 @@ end # * whitespace (if skip_trivia=true) function peek_behind_pos(stream::ParseStream; skip_trivia::Bool=true, skip_parens::Bool=true) - token_index = lastindex(stream.tokens) - range_index = lastindex(stream.ranges) + # Work backwards through the output + node_idx = length(stream.output) + byte_idx = stream.next_byte + + # Skip parens nodes if requested if skip_parens - while range_index >= firstindex(stream.ranges) && - kind(stream.ranges[range_index]) == K"parens" - range_index -= 1 + while node_idx > 0 + node = stream.output[node_idx] + if is_non_terminal(node) && kind(node) == K"parens" + node_idx -= 1 + else + break + end end end - last_token_in_nonterminal = range_index == 0 ? 0 : - stream.ranges[range_index].last_token - while token_index > last_token_in_nonterminal - t = stream.tokens[token_index] - if kind(t) != K"TOMBSTONE" && (!skip_trivia || !is_trivia(t)) + + # Skip trivia if requested + while node_idx > 0 + node = stream.output[node_idx] + if kind(node) == K"TOMBSTONE" || (skip_trivia && is_trivia(node)) + node_idx -= 1 + byte_idx -= node.byte_span + else break end - token_index -= 1 end - return ParseStreamPosition(token_index, range_index) + + return ParseStreamPosition(byte_idx, node_idx) end function peek_behind(stream::ParseStream; kws...) @@ -767,7 +840,7 @@ end # Bump up until the `n`th token # flags and remap_kind are applied to any non-trivia tokens -function _bump_until_n(stream::ParseStream, n::Integer, flags, remap_kind=K"None") +function _bump_until_n(stream::ParseStream, n::Integer, new_flags, remap_kind=K"None") if n < stream.lookahead_index return end @@ -777,13 +850,28 @@ function _bump_until_n(stream::ParseStream, n::Integer, flags, remap_kind=K"None if k == K"EndMarker" break end - f = flags | (@__MODULE__).flags(tok) + f = new_flags | flags(tok) is_trivia = is_whitespace(k) is_trivia && (f |= TRIVIA_FLAG) outk = (is_trivia || remap_kind == K"None") ? k : remap_kind h = SyntaxHead(outk, f) - push!(stream.tokens, - SyntaxToken(h, kind(tok), tok.preceding_whitespace, tok.next_byte)) + + # Calculate byte span for this token + if i == stream.lookahead_index + # First token in this batch - calculate span from current stream position + prev_byte = stream.next_byte + else + # Subsequent tokens - use previous token's next_byte + prev_byte = stream.lookahead[i-1].next_byte + end + byte_span = Int(tok.next_byte) - Int(prev_byte) + + # Create terminal RawGreenNode + node = RawGreenNode(h, byte_span, kind(tok)) + push!(stream.output, node) + + # Update next_byte + stream.next_byte += byte_span end stream.lookahead_index = n + 1 # Defuse the time bomb @@ -838,9 +926,12 @@ example, `2x` means `2*x` via the juxtaposition rules. """ function bump_invisible(stream::ParseStream, kind, flags=EMPTY_FLAGS; error=nothing) - b = _next_byte(stream) + b = stream.next_byte h = SyntaxHead(kind, flags) - push!(stream.tokens, SyntaxToken(h, (@__MODULE__).kind(h), false, b)) + # Zero-width token + node = RawGreenNode(h, 0, kind) + push!(stream.output, node) + # No need to update next_byte for zero-width token if !isnothing(error) emit_diagnostic(stream, b:b-1, error=error) end @@ -858,8 +949,14 @@ whitespace if necessary with bump_trivia. function bump_glue(stream::ParseStream, kind, flags) i = stream.lookahead_index h = SyntaxHead(kind, flags) - push!(stream.tokens, SyntaxToken(h, kind, false, - stream.lookahead[i+1].next_byte)) + # Calculate byte span for glued tokens + start_byte = stream.next_byte + end_byte = stream.lookahead[i+1].next_byte + byte_span = end_byte - start_byte + + node = RawGreenNode(h, byte_span, kind) + push!(stream.output, node) + stream.next_byte += byte_span stream.lookahead_index += 2 stream.peek_count = 0 return position(stream) @@ -887,24 +984,23 @@ simpler one which only splits preceding dots? function bump_split(stream::ParseStream, split_spec::Vararg{Any, N}) where {N} tok = stream.lookahead[stream.lookahead_index] stream.lookahead_index += 1 - b = _next_byte(stream) - toklen = tok.next_byte - b + start_b = _next_byte(stream) + toklen = tok.next_byte - start_b + prev_b = start_b for (i, (nbyte, k, f)) in enumerate(split_spec) h = SyntaxHead(k, f) - b += nbyte < 0 ? (toklen + nbyte) : nbyte + actual_nbyte = nbyte < 0 ? (toklen + nbyte) : nbyte orig_k = k == K"." ? K"." : kind(tok) - push!(stream.tokens, SyntaxToken(h, orig_k, false, b)) + node = RawGreenNode(h, actual_nbyte, orig_k) + push!(stream.output, node) + prev_b += actual_nbyte + stream.next_byte += actual_nbyte end - @assert tok.next_byte == b + @assert tok.next_byte == prev_b stream.peek_count = 0 return position(stream) end -function _reset_node_head(x, k, f) - h = SyntaxHead(isnothing(k) ? kind(x) : k, - isnothing(f) ? flags(x) : f) -end - """ Reset kind or flags of an existing node in the output stream @@ -915,17 +1011,8 @@ in those cases. """ function reset_node!(stream::ParseStream, pos::ParseStreamPosition; kind=nothing, flags=nothing) - if token_is_last(stream, pos) - t = stream.tokens[pos.token_index] - stream.tokens[pos.token_index] = - SyntaxToken(_reset_node_head(t, kind, flags), - t.orig_kind, t.preceding_whitespace, t.next_byte) - else - r = stream.ranges[pos.range_index] - stream.ranges[pos.range_index] = - TaggedRange(_reset_node_head(r, kind, flags), - r.first_token, r.last_token) - end + node = stream.output[pos.node_index] + stream.output[pos.node_index] = reset_node(node, kind, flags) end """ @@ -937,45 +1024,57 @@ Hack alert! This is used only for managing the complicated rules related to dedenting triple quoted strings. """ function steal_token_bytes!(stream::ParseStream, pos::ParseStreamPosition, numbytes) - i = pos.token_index - t1 = stream.tokens[i] - t2 = stream.tokens[i+1] + i = pos.node_index + t1 = stream.output[i] + t2 = stream.output[i+1] + @assert is_terminal(t1) && is_terminal(t2) - t1_next_byte = t1.next_byte + numbytes - stream.tokens[i] = SyntaxToken(t1.head, t1.orig_kind, - t1.preceding_whitespace, t1_next_byte) + stream.output[i] = RawGreenNode(t1.head, t1.byte_span + numbytes, + t1.orig_kind) - t2_is_empty = t1_next_byte == t2.next_byte + t2_is_empty = t2.byte_span == numbytes head2 = t2_is_empty ? SyntaxHead(K"TOMBSTONE", EMPTY_FLAGS) : t2.head - stream.tokens[i+1] = SyntaxToken(head2, t2.orig_kind, - t2.preceding_whitespace, t2.next_byte) + stream.output[i+1] = RawGreenNode(head2, t2.byte_span - numbytes, + t2.orig_kind) return t2_is_empty end # Get position of last item emitted into the output stream function Base.position(stream::ParseStream) - ParseStreamPosition(lastindex(stream.tokens), lastindex(stream.ranges)) + byte_idx = stream.next_byte + node_idx = length(stream.output) + + ParseStreamPosition(byte_idx, node_idx) end """ emit(stream, mark, kind, flags = EMPTY_FLAGS; error=nothing) -Emit a new text span into the output which covers source bytes from `mark` to +Emit a new non-terminal node into the output which covers source bytes from `mark` to the end of the most recent token which was `bump()`'ed. The starting `mark` -should be a previous return value of `position()`. +should be a previous return value of `position()`. The emitted node will have +its `node_span` set to the number of nodes emitted since `mark`. """ function emit(stream::ParseStream, mark::ParseStreamPosition, kind::Kind, flags::RawFlags = EMPTY_FLAGS; error=nothing) - first_token = mark.token_index + 1 - range = TaggedRange(SyntaxHead(kind, flags), first_token, length(stream.tokens)) + # Calculate byte span from mark position to current + mark_byte = mark.byte_index + current_byte = stream.next_byte + byte_span = current_byte - mark_byte + + # Calculate node span (number of children, exclusive of the node itself) + node_span = length(stream.output) - mark.node_index + + # Create non-terminal RawGreenNode + node = RawGreenNode(SyntaxHead(kind, flags), byte_span, node_span) + if !isnothing(error) - # The first child must be a leaf, otherwise ranges would be improperly - # nested. - fbyte = token_first_byte(stream, first_token) - lbyte = token_last_byte(stream, lastindex(stream.tokens)) - emit_diagnostic(stream, fbyte:lbyte, error=error) + emit_diagnostic(stream, mark_byte:current_byte-1, error=error) end - push!(stream.ranges, range) + + push!(stream.output, node) + # Note: emit() for non-terminals doesn't advance next_byte + # because it's a range over already-emitted tokens return position(stream) end @@ -1008,25 +1107,21 @@ function emit_diagnostic(stream::ParseStream; whitespace=false, kws...) end function emit_diagnostic(stream::ParseStream, mark::ParseStreamPosition; trim_whitespace=true, kws...) - i = mark.token_index - j = lastindex(stream.tokens) + # Find the byte range from mark to current position + start_byte = mark.byte_index + end_byte = stream.next_byte - 1 + if trim_whitespace - while i < j && is_whitespace(stream.tokens[j]) - j -= 1 - end - while i+1 < j && is_whitespace(stream.tokens[i+1]) - i += 1 - end + # TODO: Implement whitespace trimming for unified output + # This would require scanning the output array end - byterange = stream.tokens[i].next_byte:stream.tokens[j].next_byte-1 - emit_diagnostic(stream, byterange; kws...) + + emit_diagnostic(stream, start_byte:end_byte; kws...) end function emit_diagnostic(stream::ParseStream, mark::ParseStreamPosition, end_mark::ParseStreamPosition; kws...) - fbyte = stream.tokens[mark.token_index].next_byte - lbyte = stream.tokens[end_mark.token_index].next_byte-1 - emit_diagnostic(stream, fbyte:lbyte; kws...) + emit_diagnostic(stream, mark.byte_index:end_mark.byte_index-1; kws...) end function emit_diagnostic(diagnostics::AbstractVector{Diagnostic}, @@ -1039,15 +1134,21 @@ end function validate_tokens(stream::ParseStream) txtbuf = unsafe_textbuf(stream) - toks = stream.tokens charbuf = IOBuffer() - for i = 2:length(toks) - t = toks[i] - k = kind(t) - fbyte = toks[i-1].next_byte - nbyte = t.next_byte + + # Process terminal nodes in the output + fbyte = stream.output[1].byte_span+1 # Start after sentinel + for i = 2:length(stream.output) + node = stream.output[i] + if !is_terminal(node) || kind(node) == K"TOMBSTONE" + continue + end + + k = kind(node) + nbyte = fbyte + node.byte_span tokrange = fbyte:nbyte-1 error_kind = K"None" + if k in KSet"Integer BinInt OctInt HexInt" # The following shouldn't be able to error... # parse_int_literal @@ -1090,7 +1191,7 @@ function validate_tokens(stream::ParseStream) error="character literal contains multiple characters") end end - elseif k == K"String" && !has_flags(t, RAW_STRING_FLAG) + elseif k == K"String" && !has_flags(node, RAW_STRING_FLAG) had_error = unescape_julia_string(devnull, txtbuf, fbyte, nbyte, stream.diagnostics) if had_error @@ -1108,11 +1209,14 @@ function validate_tokens(stream::ParseStream) end emit_diagnostic(stream, tokrange, error=msg) end + if error_kind != K"None" - toks[i] = SyntaxToken(SyntaxHead(error_kind, EMPTY_FLAGS), - t.orig_kind, t.preceding_whitespace, - t.next_byte) + # Update the node with new error kind + stream.output[i] = RawGreenNode(SyntaxHead(error_kind, EMPTY_FLAGS), + node.byte_span, node.orig_kind) end + + fbyte = nbyte end sort!(stream.diagnostics, by=first_byte) end @@ -1121,89 +1225,6 @@ end # API for extracting results from ParseStream -""" - build_tree(make_node::Function, ::Type{StackEntry}, stream::ParseStream; kws...) - -Construct a tree from a ParseStream using depth-first traversal. `make_node` -must have the signature - - make_node(head::SyntaxHead, span::Integer, children) - -where `children` is either `nothing` for leaf nodes or an iterable of the -children of type `StackEntry` for internal nodes. `StackEntry` may be a node -type, but also may include other information required during building the tree. - -If the ParseStream has multiple nodes at the top level, `K"wrapper"` is used to -wrap them in a single node. - -The tree here is constructed depth-first in postorder. -""" -function build_tree(make_node::Function, ::Type{NodeType}, stream::ParseStream; - kws...) where NodeType - stack = Vector{NamedTuple{(:first_token,:node),Tuple{Int,NodeType}}}() - - tokens = stream.tokens - ranges = stream.ranges - i = firstindex(tokens) - j = firstindex(ranges) - while true - last_token = j <= lastindex(ranges) ? - ranges[j].last_token : lastindex(tokens) - # Process tokens to leaf nodes for all tokens used by the next internal node - while i <= last_token - t = tokens[i] - if kind(t) == K"TOMBSTONE" - i += 1 - continue # Ignore removed tokens - end - srcrange = (stream.tokens[i-1].next_byte: - stream.tokens[i].next_byte - 1) - h = head(t) - node = make_node(h, srcrange, nothing) - if !isnothing(node) - push!(stack, (first_token=i, node=node)) - end - i += 1 - end - if j > lastindex(ranges) - break - end - # Process internal nodes which end at the current position - while j <= lastindex(ranges) - r = ranges[j] - if r.last_token != last_token - break - end - if kind(r) == K"TOMBSTONE" - j += 1 - continue - end - # Collect children from the stack for this internal node - k = length(stack) + 1 - while k > 1 && r.first_token <= stack[k-1].first_token - k -= 1 - end - srcrange = (stream.tokens[r.first_token-1].next_byte: - stream.tokens[r.last_token].next_byte - 1) - children = (stack[n].node for n = k:length(stack)) - node = make_node(head(r), srcrange, children) - resize!(stack, k-1) - if !isnothing(node) - push!(stack, (first_token=r.first_token, node=node)) - end - j += 1 - end - end - if length(stack) == 1 - return only(stack).node - else - srcrange = (stream.tokens[1].next_byte: - stream.tokens[end].next_byte - 1) - children = (x.node for x in stack) - return make_node(SyntaxHead(K"wrapper", EMPTY_FLAGS), srcrange, children) - end -end - function sourcetext(stream::ParseStream; steal_textbuf=false) Base.depwarn("Use of `sourcetext(::ParseStream)` is deprecated. Use `SourceFile(stream)` instead", :sourcetext) root = stream.text_root @@ -1253,27 +1274,34 @@ Return the `Vector{UInt8}` text buffer being parsed by this `ParseStream`. """ unsafe_textbuf(stream) = stream.textbuf -first_byte(stream::ParseStream) = first(stream.tokens).next_byte # Use sentinel token -last_byte(stream::ParseStream) = _next_byte(stream)-1 +first_byte(stream::ParseStream) = first(stream.output).byte_span + 1 # After sentinel +last_byte(stream::ParseStream) = stream.next_byte - 1 any_error(stream::ParseStream) = any_error(stream.diagnostics) # Return last non-whitespace byte which was parsed function last_non_whitespace_byte(stream::ParseStream) - for i = length(stream.tokens):-1:1 - tok = stream.tokens[i] - if !(kind(tok) in KSet"Comment Whitespace NewlineWs ErrorEofMultiComment") - return tok.next_byte - 1 + byte_pos = stream.next_byte + for i = length(stream.output):-1:1 + node = stream.output[i] + if is_terminal(node) + if !(kind(node) in KSet"Comment Whitespace NewlineWs ErrorEofMultiComment") + return byte_pos - 1 + end + byte_pos -= node.byte_span end end return first_byte(stream) - 1 end function Base.empty!(stream::ParseStream) - t = last(stream.tokens) - empty!(stream.tokens) - # Restore sentinel token - push!(stream.tokens, SyntaxToken(SyntaxHead(K"TOMBSTONE",EMPTY_FLAGS), - K"TOMBSTONE", t.preceding_whitespace, - t.next_byte)) - empty!(stream.ranges) + # Keep only the sentinel + if !isempty(stream.output) && kind(stream.output[1]) == K"TOMBSTONE" + resize!(stream.output, 1) + else + empty!(stream.output) + # Restore sentinel node + push!(stream.output, RawGreenNode(SyntaxHead(K"TOMBSTONE", EMPTY_FLAGS), 0, K"TOMBSTONE")) + end + # Reset next_byte to initial position + stream.next_byte = 1 end diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/parser.jl index d1a91478eb0ee..d593fe0b64e81 100644 --- a/JuliaSyntax/src/parser.jl +++ b/JuliaSyntax/src/parser.jl @@ -340,6 +340,8 @@ function bump_dotsplit(ps, flags=EMPTY_FLAGS; bump_trivia(ps) mark = position(ps) k = remap_kind != K"None" ? remap_kind : kind(t) + # Split the dotted operator into . and the operator + # First split emits the . token (1 byte) at position mark.node_index+1 pos = bump_split(ps, (1, K".", TRIVIA_FLAG), (-1, k, flags)) if emit_dot_node pos = emit(ps, mark, K".") diff --git a/JuliaSyntax/src/parser_api.jl b/JuliaSyntax/src/parser_api.jl index 7931ef31d3d2f..a3e2162bc985b 100644 --- a/JuliaSyntax/src/parser_api.jl +++ b/JuliaSyntax/src/parser_api.jl @@ -190,13 +190,15 @@ emitted as `K"Identifier"` (the default) or as `K"+"`. function tokenize(text; operators_as_identifiers=true) ps = ParseStream(text) parse!(ps, rule=:all) - ts = ps.tokens + ts = ps.output output_tokens = Token[] + byte_start::UInt32 = ps.output[1].byte_span + 1 for i = 2:length(ts) - if kind(ts[i]) == K"TOMBSTONE" + if kind(ts[i]) == K"TOMBSTONE" || is_non_terminal(ts[i]) continue end - r = ts[i-1].next_byte:ts[i].next_byte-1 + r = byte_start:(byte_start+ts[i].byte_span - 1) + byte_start = last(r) + 1 k = kind(ts[i]) if k == K"Identifier" && !operators_as_identifiers orig_k = ts[i].orig_kind diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/syntax_tree.jl index edc864e0e939e..71b1be82e413f 100644 --- a/JuliaSyntax/src/syntax_tree.jl +++ b/JuliaSyntax/src/syntax_tree.jl @@ -57,18 +57,28 @@ const AbstractSyntaxNode = TreeNode{<:AbstractSyntaxData} struct SyntaxData <: AbstractSyntaxData source::SourceFile raw::GreenNode{SyntaxHead} - position::Int + byte_end::UInt32 val::Any end +function Base.getproperty(data::SyntaxData, name::Symbol) + if name === :position + # Previous versions of JuliaSyntax had `position::Int`. + # Allow access for compatibility. It was renamed (with changed) semantics + # to `byte_end::UInt32` to match the rest of the code base, which identified + # nodes, by their last byte. + return Int(getfield(data, :byte_end) - getfield(data, :raw).span + UInt32(1)) + end + return getfield(data, name) +end Base.hash(data::SyntaxData, h::UInt) = - hash(data.source, hash(data.raw, hash(data.position, + hash(data.source, hash(data.raw, hash(data.byte_end, # Avoid dynamic dispatch: # This does not support custom `hash` implementation that may be defined for `typeof(data.val)`, # However, such custom user types should not generally appear in the AST. Core.invoke(hash, Tuple{Any,UInt}, data.val, h)))) function Base.:(==)(a::SyntaxData, b::SyntaxData) - a.source == b.source && a.raw == b.raw && a.position == b.position && a.val === b.val + a.source == b.source && a.raw == b.raw && a.byte_end == b.byte_end && a.val === b.val end """ @@ -80,41 +90,56 @@ text by calling one of the parser API functions such as [`parseall`](@ref) """ const SyntaxNode = TreeNode{SyntaxData} -function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}; - keep_parens=false, position::Integer=1) +function SyntaxNode(source::SourceFile, cursor::RedTreeCursor; + keep_parens=false) + # Build the full GreenNode tree once upfront (including trivia) + green = GreenNode(cursor.green) + GC.@preserve source begin raw_offset, txtbuf = _unsafe_wrap_substring(source.code) offset = raw_offset - source.byte_offset - _to_SyntaxNode(source, txtbuf, offset, raw, convert(Int, position), keep_parens) + _to_SyntaxNode(source, txtbuf, offset, cursor, green, keep_parens) end end +function SyntaxNode(source::SourceFile, cursor::RedTreeCursor, green::GreenNode{SyntaxHead}; + keep_parens=false) + GC.@preserve source begin + raw_offset, txtbuf = _unsafe_wrap_substring(source.code) + offset = raw_offset - source.byte_offset + _to_SyntaxNode(source, txtbuf, offset, cursor, green, keep_parens) + end +end + +should_include_node(child) = !is_trivia(child) || is_error(child) + function _to_SyntaxNode(source::SourceFile, txtbuf::Vector{UInt8}, offset::Int, - raw::GreenNode{SyntaxHead}, - position::Int, keep_parens::Bool) - if is_leaf(raw) + cursor::RedTreeCursor, green::GreenNode{SyntaxHead}, keep_parens::Bool) + if is_leaf(cursor) # Here we parse the values eagerly rather than representing them as # strings. Maybe this is good. Maybe not. - valrange = position:position + span(raw) - 1 - val = parse_julia_literal(txtbuf, head(raw), valrange .+ offset) - return SyntaxNode(nothing, nothing, SyntaxData(source, raw, position, val)) + valrange = byte_range(cursor) + val = parse_julia_literal(txtbuf, head(cursor), valrange .+ offset) + return SyntaxNode(nothing, nothing, SyntaxData(source, green, cursor.byte_end, val)) else cs = SyntaxNode[] - pos = position - for (i,rawchild) in enumerate(children(raw)) - # FIXME: Allowing trivia is_error nodes here corrupts the tree layout. - if !is_trivia(rawchild) || is_error(rawchild) - push!(cs, _to_SyntaxNode(source, txtbuf, offset, rawchild, pos, keep_parens)) + green_children = children(green) + + # We need to match up the filtered SyntaxNode children with the unfiltered GreenNode children + # Both cursor and green children need to be traversed in the same order + # Since cursor iterates in reverse, we need to match from the end of green_children + green_idx = green_children === nothing ? 0 : length(green_children) + + for (i, child_cursor) in enumerate(reverse(cursor)) + if should_include_node(child_cursor) + pushfirst!(cs, _to_SyntaxNode(source, txtbuf, offset, child_cursor, green[end-i+1], keep_parens)) end - pos += Int(rawchild.span) end - if !keep_parens && kind(raw) == K"parens" && length(cs) == 1 - return cs[1] - end - if kind(raw) == K"wrapper" && length(cs) == 1 + + if !keep_parens && kind(cursor) == K"parens" && length(cs) == 1 return cs[1] end - node = SyntaxNode(nothing, cs, SyntaxData(source, raw, position, nothing)) + node = SyntaxNode(nothing, cs, SyntaxData(source, green, cursor.byte_end, nothing)) for c in cs c.parent = node end @@ -162,9 +187,12 @@ structure. """ head(node::AbstractSyntaxNode) = head(node.raw) -span(node::AbstractSyntaxNode) = span(node.raw) +span(node::AbstractSyntaxNode) = node.raw.span -byte_range(node::AbstractSyntaxNode) = node.position:(node.position + span(node) - 1) +byte_range(node::AbstractSyntaxNode) = (node.byte_end - span(node) + 1):node.byte_end + +first_byte(node::AbstractSyntaxNode) = first(byte_range(node)) +last_byte(node::AbstractSyntaxNode) = last(byte_range(node)) sourcefile(node::AbstractSyntaxNode) = node.source @@ -271,13 +299,45 @@ function Base.copy(node::TreeNode) end # shallow-copy the data -Base.copy(data::SyntaxData) = SyntaxData(data.source, data.raw, data.position, data.val) +Base.copy(data::SyntaxData) = SyntaxData(data.source, data.raw, data.byte_end, data.val) function build_tree(::Type{SyntaxNode}, stream::ParseStream; filename=nothing, first_line=1, keep_parens=false, kws...) - green_tree = build_tree(GreenNode, stream; kws...) source = SourceFile(stream, filename=filename, first_line=first_line) - SyntaxNode(source, green_tree, position=first_byte(stream), keep_parens=keep_parens) + cursor = RedTreeCursor(stream) + if has_toplevel_siblings(cursor) + # There are multiple toplevel nodes, e.g. because we're using this + # to test a partial parse. Wrap everything in K"wrapper" + + # First build the full green tree for all children (including trivia) + green_children = GreenNode{SyntaxHead}[] + for child in reverse_toplevel_siblings(cursor) + pushfirst!(green_children, GreenNode(child.green)) + end + + # Create a wrapper GreenNode with children + green = GreenNode(SyntaxHead(K"wrapper", NON_TERMINAL_FLAG), + stream.next_byte-1, green_children) + + # Now build SyntaxNodes, iterating through cursors and green nodes together + cs = SyntaxNode[] + for (i, child) in enumerate(reverse_toplevel_siblings(cursor)) + if should_include_node(child) + pushfirst!(cs, SyntaxNode(source, child, green[end-i+1], keep_parens=keep_parens)) + end + end + + length(cs) == 1 && return only(cs) + + node = SyntaxNode(nothing, cs, SyntaxData(source, green, + stream.next_byte-1, nothing)) + for c in cs + c.parent = node + end + return node + else + return SyntaxNode(source, cursor, keep_parens=keep_parens) + end end @deprecate haschildren(x) !is_leaf(x) false diff --git a/JuliaSyntax/src/tokenize.jl b/JuliaSyntax/src/tokenize.jl index 0ea9be19fe250..761455dd84adc 100644 --- a/JuliaSyntax/src/tokenize.jl +++ b/JuliaSyntax/src/tokenize.jl @@ -2,7 +2,7 @@ module Tokenize export tokenize, untokenize -using ..JuliaSyntax: JuliaSyntax, Kind, @K_str, @KSet_str +using ..JuliaSyntax: JuliaSyntax, Kind, @K_str, @KSet_str, @callsite_inline import ..JuliaSyntax: kind, is_literal, is_contextual_keyword, is_word_operator @@ -1303,14 +1303,14 @@ function lex_identifier(l::Lexer, c) @inbounds if (pc_byte == UInt8('!') && ppc == '=') || !ascii_is_identifier_char[pc_byte+1] break end - elseif Unicode.isgraphemebreak!(graphemestate, c, pc) + elseif @callsite_inline Unicode.isgraphemebreak!(graphemestate, c, pc) if (pc == '!' && ppc == '=') || !is_identifier_char(pc) break end elseif pc in ('\u200c','\u200d') # ZWNJ/ZWJ control characters # ZWJ/ZWNJ only within grapheme sequences, not at end graphemestate_peek[] = graphemestate[] - if Unicode.isgraphemebreak!(graphemestate_peek, pc, ppc) + if @callsite_inline Unicode.isgraphemebreak!(graphemestate_peek, pc, ppc) break end end diff --git a/JuliaSyntax/src/tree_cursors.jl b/JuliaSyntax/src/tree_cursors.jl new file mode 100644 index 0000000000000..3f65b6ce4fb1a --- /dev/null +++ b/JuliaSyntax/src/tree_cursors.jl @@ -0,0 +1,166 @@ +using Base.Iterators: Reverse + +""" + prev_sibling_assumed(cursor::GreenTreeCursor)::Union{Nothing, GreenTreeCursor} + prev_sibling_assumed(cursor::RedTreeCursor)::Union{Nothing, RedTreeCursor} + +Gives the previous sibling of the current node, but makes the assumption that +there is one or that we are at the top level. +Without knowing the parent, we cannot otherwise know which the last sibling is, +unless we are at the top level in which case `nothing` is returned. +""" +function prev_sibling_assumed end + +""" + GreenTreeCursor + +Represents a cursors into a ParseStream output buffer that makes it easy to +work with the green tree representation. +""" +struct GreenTreeCursor + parser_output::Vector{RawGreenNode} + position::UInt32 +end +GreenTreeCursor(stream::ParseStream) = GreenTreeCursor(stream.output, length(stream.output)) +this(node::GreenTreeCursor) = node.parser_output[node.position] + +const SENTINEL_INDEX = UInt32(1) +function prev_sibling_assumed(cursor::GreenTreeCursor) + next_idx = cursor.position - this(cursor).node_span - UInt32(1) + next_idx == SENTINEL_INDEX && return nothing + GreenTreeCursor(cursor.parser_output, next_idx) +end + +# Debug printing +function Base.show(io::IO, node::GreenTreeCursor) + print(io, Base.summary(this(node)), " @", node.position) +end + +# Reverse iterator interface +Base.reverse(node::GreenTreeCursor) = Base.Iterators.Reverse(node) +Base.IteratorSize(::Type{Reverse{GreenTreeCursor}}) = Base.SizeUnknown() +@inline function Base.iterate(node::Reverse{GreenTreeCursor}, + (next_idx, final)::NTuple{2, UInt32} = + (node.itr.position-UInt32(1), node.itr.position - this(node.itr).node_span - UInt32(1)))::Union{Nothing, Tuple{GreenTreeCursor, NTuple{2, UInt32}}} + node = node.itr + while true + next_idx == final && return nothing + next_node = GreenTreeCursor(node.parser_output, next_idx) + nrgn = this(next_node) + if getfield(nrgn, :head).kind == K"TOMBSTONE" + # TOMBSTONED nodes are counted as part of the size of the tree, but + # do not contribute either byte ranges or children. + next_idx -= UInt32(1) + continue + end + # Inlined prev_sibling_assumed + new_next_idx = next_idx - nrgn.node_span - UInt32(1) + return (next_node, (new_next_idx, final)) + end +end + +# Accessors / predicates +is_leaf(node::GreenTreeCursor) = !is_non_terminal(this(node)) +head(node::GreenTreeCursor) = this(node).head +treesize(node::GreenTreeCursor) = this(node).node_span +is_non_terminal(node::GreenTreeCursor) = is_non_terminal(this(node)) + +""" + span(node) + +Get the number of bytes this node covers in the source text. +""" +span(node::GreenTreeCursor) = this(node).byte_span + +""" + RedTreeCursor + +Wraps a `GreenTreeCursor` to keep track of the absolute position of the node +in the original source text. +""" +struct RedTreeCursor + green::GreenTreeCursor + # The last byte that is still part of the node + byte_end::UInt32 +end +RedTreeCursor(stream::ParseStream) = RedTreeCursor( + GreenTreeCursor(stream), stream.next_byte - UInt32(1)) + +function prev_sibling_assumed(cursor::RedTreeCursor) + prevgreen = prev_sibling_assumed(cursor.green) + if prevgreen === nothing + return nothing + end + return RedTreeCursor(prevgreen, cursor.byte_end - span(cursor)) +end + + +Base.reverse(node::RedTreeCursor) = Base.Iterators.Reverse(node) +Base.IteratorSize(::Type{Reverse{RedTreeCursor}}) = Base.SizeUnknown() +@inline function Base.iterate(node::Reverse{RedTreeCursor})::Union{Nothing, Tuple{RedTreeCursor, NTuple{3, UInt32}}} + r = iterate(Reverse(node.itr.green)) + return _iterate_red_cursor(r, node.itr.byte_end) +end + +@inline function Base.iterate(node::Reverse{RedTreeCursor}, state::NTuple{3, UInt32})::Union{Nothing, Tuple{RedTreeCursor, NTuple{3, UInt32}}} + r = iterate(Reverse(node.itr.green), Base.tail(state)) + return _iterate_red_cursor(r, first(state)) +end + +@inline function _iterate_red_cursor(r, byte_end) + r === nothing && return nothing + next_node, next_idx = r + return RedTreeCursor(next_node, byte_end), + (byte_end - span(next_node), next_idx...) +end + +is_leaf(node::RedTreeCursor) = is_leaf(node.green) +head(node::RedTreeCursor) = head(node.green) +span(node::RedTreeCursor) = span(node.green) +byte_range(node::RedTreeCursor) = (node.byte_end - span(node.green) + UInt32(1)):node.byte_end +treesize(node::RedTreeCursor) = treesize(node.green) +is_non_terminal(node::RedTreeCursor) = is_non_terminal(node.green) + +function Base.show(io::IO, node::RedTreeCursor) + print(io, node.green, " [", byte_range(node), "]") +end + +has_toplevel_siblings(cursor::GreenTreeCursor) = + treesize(cursor)+1 != length(cursor.parser_output)-1 +has_toplevel_siblings(cursor::RedTreeCursor) = + has_toplevel_siblings(cursor.green) +struct TopLevelSiblingIterator{C} + cursor::C +end + +function reverse_toplevel_siblings(cursor::RedTreeCursor) + @assert cursor.green.position == length(cursor.green.parser_output) + TopLevelSiblingIterator(cursor) +end + +function reverse_toplevel_siblings(cursor::GreenTreeCursor) + @assert cursor.position == length(cursor.parser_output) + TopLevelSiblingIterator(cursor) +end + +function Base.iterate(tsi::TopLevelSiblingIterator) + return (tsi.cursor, tsi.cursor) +end +function Base.iterate(cursor::TopLevelSiblingIterator{C}, last::C) where {C} + this = prev_sibling_assumed(last) + this === nothing && return nothing + return (this, this) +end + +# HACK: Force inlining of `filter` for our cursors to avoid significant perf +# degradation. +@inline function Base.iterate(f::Iterators.Filter{<:Any, Iterators.Reverse{T}}, state...) where {T<:Union{RedTreeCursor, GreenTreeCursor}} + y = iterate(f.itr, state...) + while y !== nothing + if f.flt(y[1]) + return y + end + y = iterate(f.itr, y[2]) + end + nothing +end diff --git a/JuliaSyntax/src/utils.jl b/JuliaSyntax/src/utils.jl index 3f95c48572f53..c21c251eb688a 100644 --- a/JuliaSyntax/src/utils.jl +++ b/JuliaSyntax/src/utils.jl @@ -14,6 +14,15 @@ if VERSION < v"1.5" import Base.peek end +@static if VERSION < v"1.8" + macro callsite_inline(call) + esc(call) + end +else + const var"@callsite_inline" = var"@inline" +end + + _unsafe_wrap_substring(s) = (s.offset, unsafe_wrap(Vector{UInt8}, s.string)) #-------------------------------------------------- diff --git a/JuliaSyntax/test/expr.jl b/JuliaSyntax/test/expr.jl index 200e87649e198..7651347cf853f 100644 --- a/JuliaSyntax/test/expr.jl +++ b/JuliaSyntax/test/expr.jl @@ -379,7 +379,7 @@ Expr(:call, :f, Expr(:parameters, Expr(:kw, :b, 2))) @test parsestmt("f(a=1; b=2)") == Expr(:call, :f, Expr(:parameters, Expr(:kw, :b, 2)), Expr(:kw, :a, 1)) - @test parsestmt("f(a; b; c)") == + @test parsestmt("f(a; b; c)") == Expr(:call, :f, Expr(:parameters, Expr(:parameters, :c), :b), :a) @test parsestmt("+(a=1,)") == Expr(:call, :+, Expr(:kw, :a, 1)) @@ -389,11 +389,11 @@ # Operator calls: = is not :kw @test parsestmt("(x=1) != 2") == Expr(:call, :!=, Expr(:(=), :x, 1), 2) - @test parsestmt("+(a=1)") == + @test parsestmt("+(a=1)") == Expr(:call, :+, Expr(:(=), :a, 1)) - @test parsestmt("(a=1)'") == + @test parsestmt("(a=1)'") == Expr(Symbol("'"), Expr(:(=), :a, 1)) - @test parsestmt("(a=1)'ᵀ") == + @test parsestmt("(a=1)'ᵀ") == Expr(:call, Symbol("'ᵀ"), Expr(:(=), :a, 1)) # Dotcall @@ -611,8 +611,8 @@ Expr(:generator, :x, Expr(:filter, :z, Expr(:(=), :a, :as), Expr(:(=), :b, :bs))) @test parsestmt("(x for a in as, b in bs for c in cs, d in ds)") == - Expr(:flatten, - Expr(:generator, + Expr(:flatten, + Expr(:generator, Expr(:generator, :x, Expr(:(=), :c, :cs), Expr(:(=), :d, :ds)), Expr(:(=), :a, :as), Expr(:(=), :b, :bs))) @test parsestmt("(x for a in as for b in bs if z)") == @@ -782,7 +782,7 @@ @test parsestmt("global x ~ 1") == Expr(:global, Expr(:call, :~, :x, 1)) @test parsestmt("global x += 1") == Expr(:global, Expr(:+=, :x, 1)) - # Parsing of global/local with + # Parsing of global/local with @test parsestmt("global (x,y)") == Expr(:global, :x, :y) @test parsestmt("local (x,y)") == Expr(:local, :x, :y) end diff --git a/JuliaSyntax/test/green_node.jl b/JuliaSyntax/test/green_node.jl index 42d20f5217e1f..727c717885e2b 100644 --- a/JuliaSyntax/test/green_node.jl +++ b/JuliaSyntax/test/green_node.jl @@ -3,7 +3,7 @@ @test span(t) == 6 @test !is_leaf(t) - @test head(t) == SyntaxHead(K"call", 0x0008) + @test head(t) == SyntaxHead(K"call", 0x0088) @test span.(children(t)) == [2,1,1,1,1] @test head.(children(t)) == [ SyntaxHead(K"Identifier", 0x0000) diff --git a/JuliaSyntax/test/parse_stream.jl b/JuliaSyntax/test/parse_stream.jl index f5148f2746623..0eca59b794e3f 100644 --- a/JuliaSyntax/test/parse_stream.jl +++ b/JuliaSyntax/test/parse_stream.jl @@ -20,7 +20,6 @@ using JuliaSyntax: ParseStream, yy end """ - st = ParseStream(code) p1 = position(st) @@ -73,8 +72,6 @@ using JuliaSyntax: ParseStream, @test peek(st) == K"NewlineWs" bump(st, TRIVIA_FLAG) emit(st, p1, K"toplevel") - - @test build_tree(GreenNode, st) isa JuliaSyntax.GreenNode end @testset "ParseStream constructors" begin @@ -106,47 +103,48 @@ end end @testset "ParseStream tree traversal" begin - # NB: ParseStreamPosition.token_index includes an initial sentinel token so - # indices here are one more than "might be expected". + # NB: ParseStreamPosition.node_index includes an initial sentinel token so + # indices here are one more than "might be expected". Additionally, note that + # the byte index points to the first byte after the token. st = parse_sexpr("((a b) c)") child1_pos = first_child_position(st, position(st)) - @test child1_pos == ParseStreamPosition(7, 1) - @test first_child_position(st, child1_pos) == ParseStreamPosition(4, 0) - @test last_child_position(st, position(st)) == ParseStreamPosition(9, 0) - @test last_child_position(st, child1_pos) == ParseStreamPosition(6, 0) + @test child1_pos == ParseStreamPosition(7, 8) + @test first_child_position(st, child1_pos) == ParseStreamPosition(4, 4) + @test last_child_position(st, position(st)) == ParseStreamPosition(9, 10) + @test last_child_position(st, child1_pos) == ParseStreamPosition(6, 6) st = parse_sexpr("( (a b) c)") child1_pos = first_child_position(st, position(st)) - @test child1_pos == ParseStreamPosition(8, 1) - @test first_child_position(st, child1_pos) == ParseStreamPosition(5, 0) - @test last_child_position(st, position(st)) == ParseStreamPosition(10, 0) - @test last_child_position(st, child1_pos) == ParseStreamPosition(7, 0) + @test child1_pos == ParseStreamPosition(8, 9) + @test first_child_position(st, child1_pos) == ParseStreamPosition(5, 5) + @test last_child_position(st, position(st)) == ParseStreamPosition(10, 11) + @test last_child_position(st, child1_pos) == ParseStreamPosition(7, 7) st = parse_sexpr("(a (b c))") - @test first_child_position(st, position(st)) == ParseStreamPosition(3, 0) + @test first_child_position(st, position(st)) == ParseStreamPosition(3, 3) child2_pos = last_child_position(st, position(st)) - @test child2_pos == ParseStreamPosition(9, 1) - @test first_child_position(st, child2_pos) == ParseStreamPosition(6, 0) - @test last_child_position(st, child2_pos) == ParseStreamPosition(8, 0) + @test child2_pos == ParseStreamPosition(9, 10) + @test first_child_position(st, child2_pos) == ParseStreamPosition(6, 6) + @test last_child_position(st, child2_pos) == ParseStreamPosition(8, 8) st = parse_sexpr("( a (b c))") - @test first_child_position(st, position(st)) == ParseStreamPosition(4, 0) + @test first_child_position(st, position(st)) == ParseStreamPosition(4, 4) child2_pos = last_child_position(st, position(st)) - @test child2_pos == ParseStreamPosition(10, 1) - @test first_child_position(st, child2_pos) == ParseStreamPosition(7, 0) - @test last_child_position(st, child2_pos) == ParseStreamPosition(9, 0) + @test child2_pos == ParseStreamPosition(10, 11) + @test first_child_position(st, child2_pos) == ParseStreamPosition(7, 7) + @test last_child_position(st, child2_pos) == ParseStreamPosition(9, 9) st = parse_sexpr("a (b c)") - @test first_child_position(st, position(st)) == ParseStreamPosition(5, 0) - @test last_child_position(st, position(st)) == ParseStreamPosition(7, 0) + @test first_child_position(st, position(st)) == ParseStreamPosition(5, 5) + @test last_child_position(st, position(st)) == ParseStreamPosition(7, 7) st = parse_sexpr("(a) (b c)") - @test first_child_position(st, position(st)) == ParseStreamPosition(7, 0) - @test last_child_position(st, position(st)) == ParseStreamPosition(9, 0) + @test first_child_position(st, position(st)) == ParseStreamPosition(7, 8) + @test last_child_position(st, position(st)) == ParseStreamPosition(9, 10) st = parse_sexpr("(() ())") - @test first_child_position(st, position(st)) == ParseStreamPosition(4, 1) - @test last_child_position(st, position(st)) == ParseStreamPosition(7, 2) + @test first_child_position(st, position(st)) == ParseStreamPosition(4, 5) + @test last_child_position(st, position(st)) == ParseStreamPosition(7, 9) end @testset "SubString{GenericString} (issue #505)" begin diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index f208e24c3dabc..f0ff0f51cd438 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -5,9 +5,7 @@ function parse_to_sexpr_str(production, code::AbstractString; v=v"1.6", show_kws stream = ParseStream(code, version=v) production(ParseState(stream)) JuliaSyntax.validate_tokens(stream) - t = build_tree(GreenNode, stream) - source = SourceFile(code) - s = SyntaxNode(source, t, keep_parens=true) + s = build_tree(SyntaxNode, stream, keep_parens=true) return sprint(io->show(io, MIME("text/x.sexpression"), s; show_kws...)) end diff --git a/JuliaSyntax/test/syntax_tree.jl b/JuliaSyntax/test/syntax_tree.jl index 2fac0d6baea20..3e2361ca56b2f 100644 --- a/JuliaSyntax/test/syntax_tree.jl +++ b/JuliaSyntax/test/syntax_tree.jl @@ -28,7 +28,6 @@ "(call-i (call-i a::Identifier *::Identifier b::Identifier) +::Identifier c::Identifier)" @test sprint(highlight, t[1][3]) == "a*b + c\n# ╙" - @test sprint(highlight, t.source, t.raw, 1, 3) == "a*b + c\n# ╙" # Pass-through field access node = t[1][1] @@ -40,7 +39,6 @@ # Newline-terminated source t = parsestmt(SyntaxNode, "a*b + c\n") @test sprint(highlight, t[1][3]) == "a*b + c\n# ╙" - @test sprint(highlight, t.source, t.raw, 1, 3) == "a*b + c\n# ╙" # copy t = parsestmt(SyntaxNode, "a*b + c") @@ -58,8 +56,8 @@ # SyntaxNode with offsets t,_ = parsestmt(SyntaxNode, "begin a end\nbegin b end", 13) - @test t.position == 13 - @test t[1].position == 19 + @test first(byte_range(t)) == 13 + @test first(byte_range(t[1])) == 19 @test t[1].val == :b # Unicode character ranges diff --git a/JuliaSyntax/test/test_utils.jl b/JuliaSyntax/test/test_utils.jl index 7553bf1c09604..dae16cc03d1a4 100644 --- a/JuliaSyntax/test/test_utils.jl +++ b/JuliaSyntax/test/test_utils.jl @@ -18,6 +18,7 @@ using .JuliaSyntax: @K_str, # Nodes GreenNode, + RedTreeCursor, SyntaxNode, ErrorVal, # Node inspection @@ -131,7 +132,7 @@ function exprs_roughly_equal(fl_ex, ex) args = ex.head in (:block, :quote, :toplevel) ? filter(x->!(x isa LineNumberNode), ex.args) : ex.args - if (fl_ex.head == :block && ex.head == :tuple && + if (fl_ex.head == :block && ex.head == :tuple && length(fl_args) == 2 && length(args) == 2 && Meta.isexpr(args[1], :parameters, 1) && exprs_roughly_equal(fl_args[2], args[1].args[1]) && From 7c60f2f613574fb6698005eb5b19882de5a65dc1 Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Wed, 11 Jun 2025 22:32:25 -0400 Subject: [PATCH 1009/1109] kinds: rm unused kind K"inert" (JuliaLang/JuliaSyntax.jl#561) `inert` is mostly used in lowering. As far as I can tell, there is precisely one place in the flisp parser that produces it: https://github.com/JuliaLang/julia/blob/d6b3669621ceb18aea693d8544b2c38870d289ad/src/julia-parser.scm#L1259 However, we do not insert and `inert` there in JuliaSyntax, instead preferring to automatically quote the rhs of a `.` head in expr conversion: https://github.com/JuliaLang/JuliaSyntax.jl/blob/74301f15bbd8b4fc61690590b4f835392a30b8bf/src/expr.jl#L310 As a result, (and as code coverage complains), the syntax head is unused and should be removed here. However, it should probably then be added back in JuliaLowering. --- JuliaSyntax/src/expr.jl | 2 -- JuliaSyntax/src/kinds.jl | 1 - 2 files changed, 3 deletions(-) diff --git a/JuliaSyntax/src/expr.jl b/JuliaSyntax/src/expr.jl index dc802b4ed8653..0b6c2880a6698 100644 --- a/JuliaSyntax/src/expr.jl +++ b/JuliaSyntax/src/expr.jl @@ -526,8 +526,6 @@ end elseif k == K"module" pushfirst!(args, !has_flags(nodehead, BARE_MODULE_FLAG)) pushfirst!((args[3]::Expr).args, loc) - elseif k == K"inert" - return QuoteNode(only(args)) elseif k == K"quote" if length(args) == 1 a1 = only(args) diff --git a/JuliaSyntax/src/kinds.jl b/JuliaSyntax/src/kinds.jl index 9d8999c7dd13d..e95b257bdb378 100644 --- a/JuliaSyntax/src/kinds.jl +++ b/JuliaSyntax/src/kinds.jl @@ -1015,7 +1015,6 @@ register_kinds!(JuliaSyntax, 0, [ "dotcall" "comparison" "curly" - "inert" # QuoteNode; not quasiquote "juxtapose" # Numeric juxtaposition like 2x "string" # A string interior node (possibly containing interpolations) "cmdstring" # A cmd string node (containing delimiters plus string) From 04ed94ad78ebc29495d005ece92d12dedf94b05b Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Wed, 11 Jun 2025 23:14:53 -0400 Subject: [PATCH 1010/1109] Use implicit tree for error search (JuliaLang/JuliaSyntax.jl#562) This builds on top of JuliaLang/JuliaSyntax.jl#560 and replaces the use of `SyntaxNode` in hooks.jl by the new lower-level cursor APIs. This avoid allocating two completely separate representations of the syntax tree. As a result, the end-to-end parse time for error-containing code is between 1.5x (if the error is the first token) and 2x (if the error is the last token) faster than current master. However, the main motivation here is just to reduce coupling between the Expr-producing and SyntaxNode producing parts of the code. --- JuliaSyntax/src/expr.jl | 7 ++- JuliaSyntax/src/hooks.jl | 83 +++++++++++++++++++++------------ JuliaSyntax/src/tree_cursors.jl | 9 ++++ 3 files changed, 68 insertions(+), 31 deletions(-) diff --git a/JuliaSyntax/src/expr.jl b/JuliaSyntax/src/expr.jl index 0b6c2880a6698..7387127174f72 100644 --- a/JuliaSyntax/src/expr.jl +++ b/JuliaSyntax/src/expr.jl @@ -609,9 +609,12 @@ end return retexpr end -function build_tree(::Type{Expr}, stream::ParseStream; - filename=nothing, first_line=1, kws...) +function build_tree(::Type{Expr}, stream::ParseStream; filename=nothing, first_line=1, kws...) source = SourceFile(stream, filename=filename, first_line=first_line) + return build_tree(Expr, stream, source) +end + +function build_tree(::Type{Expr}, stream::ParseStream, source::SourceFile) txtbuf = unsafe_textbuf(stream) cursor = RedTreeCursor(stream) wrapper_head = SyntaxHead(K"wrapper",EMPTY_FLAGS) diff --git a/JuliaSyntax/src/hooks.jl b/JuliaSyntax/src/hooks.jl index 1c31030fb4d00..2d1e4df852c18 100644 --- a/JuliaSyntax/src/hooks.jl +++ b/JuliaSyntax/src/hooks.jl @@ -4,25 +4,51 @@ const _has_v1_6_hooks = VERSION >= v"1.6" const _has_v1_10_hooks = isdefined(Core, :_setparser!) +struct ErrorSpec + child_idx::Int + node::RedTreeCursor + parent_kind::Kind +end + +function first_error_cursor(stream::ParseStream) + output = stream.output + for i = 2:length(output) + is_error(output[i]) && return GreenTreeCursor(output, i) + end +end + # Find the first error in a SyntaxNode tree, returning the index of the error # within its parent and the node itself. -function _first_error(t::SyntaxNode) - if is_error(t) - return 0,t +function first_tree_error(c::RedTreeCursor, error_cursor::GreenTreeCursor) + @assert !is_leaf(c) && !is_error(c) + first_child = first_error = nothing + it = reverse_nontrivia_children(c) + r = iterate(it) + local child + while r !== nothing + (child, state) = r + r = iterate(it, state) + (error_cursor in child || error_cursor == child.green) || continue + is_error(child) && break + return first_tree_error(child, error_cursor) end - if !is_leaf(t) - for (i,c) in enumerate(children(t)) - if is_error(c) - return i,c - else - x = _first_error(c) - if x != (0,nothing) - return x - end - end - end + i = 1 # count node index + while r !== nothing + i += 1 + (_, state) = r + r = iterate(it, state) + end + return ErrorSpec(i, child, kind(c)) +end + +function first_tree_error(stream::ParseStream) + c = RedTreeCursor(stream) + err = first_error_cursor(stream) + for c in reverse_toplevel_siblings(c) + is_error(c) && return ErrorSpec(0, c, K"wrapper") + is_leaf(c) && continue + return first_tree_error(c, err) end - return 0,nothing end # Classify an incomplete expression, returning a Symbol compatible with @@ -32,8 +58,10 @@ end # next if the incomplete stream was to continue. (Though this is just rough. In # practice several categories are combined for the purposes of the REPL - # perhaps we can/should do something more precise in the future.) -function _incomplete_tag(n::SyntaxNode, codelen) - i,c = _first_error(n) +function _incomplete_tag(theerror::ErrorSpec, codelen) + i = theerror.child_idx + c = theerror.node + kp = theerror.parent_kind if isnothing(c) || last_byte(c) < codelen || codelen == 0 if kind(c) == K"ErrorEofMultiComment" # This is the one weird case where the token itself is an @@ -47,18 +75,16 @@ function _incomplete_tag(n::SyntaxNode, codelen) # here as a hard error. return :none end - if kind(c) == K"error" && numchildren(c) > 0 - for cc in children(c) + if kind(c) == K"error" && is_non_terminal(c) + for cc in reverse_nontrivia_children(c) if kind(cc) == K"error" return :other end end end - if isnothing(c.parent) + if kp == K"wrapper" return :other - end - kp = kind(c.parent) - if kp == K"string" || kp == K"var" + elseif kp == K"string" || kp == K"var" return :string elseif kp == K"cmdstring" return :cmd @@ -181,8 +207,8 @@ function core_parser_hook(code, filename::String, lineno::Int, offset::Int, opti if any_error(stream) pos_before_comments = last_non_whitespace_byte(stream) - tree = build_tree(SyntaxNode, stream, first_line=lineno, filename=filename) - tag = _incomplete_tag(tree, pos_before_comments) + errspec = first_tree_error(stream) + tag = _incomplete_tag(errspec, pos_before_comments) if _has_v1_10_hooks exc = ParseError(stream, filename=filename, first_line=lineno, incomplete_tag=tag) @@ -211,15 +237,15 @@ function core_parser_hook(code, filename::String, lineno::Int, offset::Int, opti # * truncates the top level expression arg list before that error # * includes the last line number # * appends the error message - topex = Expr(tree) + source = SourceFile(stream, filename=filename, first_line=lineno) + topex = build_tree(Expr, stream, source) @assert topex.head == :toplevel i = findfirst(_has_nested_error, topex.args) if i > 1 && topex.args[i-1] isa LineNumberNode i -= 1 end resize!(topex.args, i-1) - _,errort = _first_error(tree) - push!(topex.args, LineNumberNode(source_line(errort), filename)) + push!(topex.args, LineNumberNode(source_line(source, first_byte(errspec.node)), filename)) push!(topex.args, error_ex) topex else @@ -402,4 +428,3 @@ end # Convenience functions to mirror `JuliaSyntax.parsestmt(Expr, ...)` in simple cases. fl_parse(::Type{Expr}, args...; kws...) = fl_parse(args...; kws...) fl_parseall(::Type{Expr}, args...; kws...) = fl_parseall(args...; kws...) - diff --git a/JuliaSyntax/src/tree_cursors.jl b/JuliaSyntax/src/tree_cursors.jl index 3f65b6ce4fb1a..75a5c0e44008f 100644 --- a/JuliaSyntax/src/tree_cursors.jl +++ b/JuliaSyntax/src/tree_cursors.jl @@ -31,6 +31,12 @@ function prev_sibling_assumed(cursor::GreenTreeCursor) GreenTreeCursor(cursor.parser_output, next_idx) end +function Base.in(child::GreenTreeCursor, parent::GreenTreeCursor) + @assert child.parser_output === parent.parser_output + child.position < parent.position || return false + return child.position >= parent.position - this(parent).node_span +end + # Debug printing function Base.show(io::IO, node::GreenTreeCursor) print(io, Base.summary(this(node)), " @", node.position) @@ -164,3 +170,6 @@ end end nothing end + +Base.in(child::GreenTreeCursor, parent::RedTreeCursor) = + in(child, parent.green) From fd98fc6ceb8150d2db2510589327e9e215e12c07 Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Thu, 12 Jun 2025 21:12:21 -0400 Subject: [PATCH 1011/1109] organization: Split into logically distinct subpieces (JuliaLang/JuliaSyntax.jl#563) A I mentioned in JuliaLang/JuliaSyntax.jl#560, and as contemplated in JuliaLang/JuliaSyntax.jl#536, I'd like to try re-using JuliaParser infrastructure to replace parsers I've written for some other languages. This takes the first step to do so by moving various files into directories depending on whether they are language-dependent or not. Right now there is still some coupling and of course, there are no actual abstractions between these pieces. The idea would be to intrduce those over time. For now, if we put in this refactoring, the way to use this would be to copy the appropriate pieces (at least `core/`) into your downstream parser and then rewrite it to those APIs. I'm planning to do that with a parser or two to see if I hit any big API issues and see what it would take to actually make the re-use happen. - core: Core functionality for parsing - julia: Core functionality for parsing *julia* - integration: Integration code to use as the parser for base - porcelain: Other syntax tree types for external users of the package The `integration` and `porcelain` components should not depend on each other. Otherwise it's layered as expected. This is just the reorganization. Additional work is required to actually spearate the abstractions. --- JuliaSyntax/src/JuliaSyntax.jl | 27 +- JuliaSyntax/src/{ => core}/diagnostics.jl | 0 JuliaSyntax/src/{ => core}/parse_stream.jl | 314 ----------------- JuliaSyntax/src/{ => core}/source_files.jl | 0 JuliaSyntax/src/{ => core}/tree_cursors.jl | 0 JuliaSyntax/src/{ => integration}/expr.jl | 0 JuliaSyntax/src/{ => integration}/hooks.jl | 0 JuliaSyntax/src/julia/julia_parse_stream.jl | 315 ++++++++++++++++++ JuliaSyntax/src/{ => julia}/kinds.jl | 0 .../src/{ => julia}/literal_parsing.jl | 0 JuliaSyntax/src/{ => julia}/parser.jl | 0 JuliaSyntax/src/{ => julia}/parser_api.jl | 0 JuliaSyntax/src/{ => julia}/tokenize.jl | 0 JuliaSyntax/src/{ => porcelain}/green_node.jl | 0 .../src/{ => porcelain}/syntax_tree.jl | 0 JuliaSyntax/src/precompile.jl | 2 +- 16 files changed, 330 insertions(+), 328 deletions(-) rename JuliaSyntax/src/{ => core}/diagnostics.jl (100%) rename JuliaSyntax/src/{ => core}/parse_stream.jl (77%) rename JuliaSyntax/src/{ => core}/source_files.jl (100%) rename JuliaSyntax/src/{ => core}/tree_cursors.jl (100%) rename JuliaSyntax/src/{ => integration}/expr.jl (100%) rename JuliaSyntax/src/{ => integration}/hooks.jl (100%) create mode 100644 JuliaSyntax/src/julia/julia_parse_stream.jl rename JuliaSyntax/src/{ => julia}/kinds.jl (100%) rename JuliaSyntax/src/{ => julia}/literal_parsing.jl (100%) rename JuliaSyntax/src/{ => julia}/parser.jl (100%) rename JuliaSyntax/src/{ => julia}/parser_api.jl (100%) rename JuliaSyntax/src/{ => julia}/tokenize.jl (100%) rename JuliaSyntax/src/{ => porcelain}/green_node.jl (100%) rename JuliaSyntax/src/{ => porcelain}/syntax_tree.jl (100%) diff --git a/JuliaSyntax/src/JuliaSyntax.jl b/JuliaSyntax/src/JuliaSyntax.jl index 3c276984e8c06..da5861c0d5b62 100644 --- a/JuliaSyntax/src/JuliaSyntax.jl +++ b/JuliaSyntax/src/JuliaSyntax.jl @@ -79,29 +79,30 @@ export SyntaxNode # Helper utilities include("utils.jl") -include("kinds.jl") +include("julia/kinds.jl") # Lexing uses a significantly modified version of Tokenize.jl -include("tokenize.jl") +include("julia/tokenize.jl") # Source and diagnostics -include("source_files.jl") -include("diagnostics.jl") +include("core/source_files.jl") +include("core/diagnostics.jl") # Parsing -include("parse_stream.jl") -include("parser.jl") -include("parser_api.jl") -include("literal_parsing.jl") +include("core/parse_stream.jl") +include("core/tree_cursors.jl") +include("julia/julia_parse_stream.jl") +include("julia/parser.jl") +include("julia/parser_api.jl") +include("julia/literal_parsing.jl") # Tree data structures -include("tree_cursors.jl") -include("green_node.jl") -include("syntax_tree.jl") -include("expr.jl") +include("porcelain/green_node.jl") +include("porcelain/syntax_tree.jl") +include("integration/expr.jl") # Hooks to integrate the parser with Base -include("hooks.jl") +include("integration/hooks.jl") include("precompile.jl") end diff --git a/JuliaSyntax/src/diagnostics.jl b/JuliaSyntax/src/core/diagnostics.jl similarity index 100% rename from JuliaSyntax/src/diagnostics.jl rename to JuliaSyntax/src/core/diagnostics.jl diff --git a/JuliaSyntax/src/parse_stream.jl b/JuliaSyntax/src/core/parse_stream.jl similarity index 77% rename from JuliaSyntax/src/parse_stream.jl rename to JuliaSyntax/src/core/parse_stream.jl index 1000fdaa123aa..fd66b2b44b8ae 100644 --- a/JuliaSyntax/src/parse_stream.jl +++ b/JuliaSyntax/src/core/parse_stream.jl @@ -9,93 +9,11 @@ const EMPTY_FLAGS = RawFlags(0) # Set for tokens or ranges which are syntax trivia after parsing const TRIVIA_FLAG = RawFlags(1<<0) -# Token flags - may be set for operator kinded tokens -# Operator is dotted -const DOTOP_FLAG = RawFlags(1<<1) -# Operator has a suffix -const SUFFIXED_FLAG = RawFlags(1<<2) - -# Set for K"call", K"dotcall" or any syntactic operator heads -# Distinguish various syntaxes which are mapped to K"call" -const PREFIX_CALL_FLAG = RawFlags(0<<3) -const INFIX_FLAG = RawFlags(1<<3) -const PREFIX_OP_FLAG = RawFlags(2<<3) -const POSTFIX_OP_FLAG = RawFlags(3<<3) - -# The following flags are quite head-specific and may overlap - -""" -Set when K"string" or K"cmdstring" was triple-delimited as with \"\"\" or ``` -""" -const TRIPLE_STRING_FLAG = RawFlags(1<<5) - -""" -Set when a K"string", K"cmdstring" or K"Identifier" needs raw string unescaping -""" -const RAW_STRING_FLAG = RawFlags(1<<6) - -""" -Set for K"tuple", K"block" or K"macrocall" which are delimited by parentheses -""" -const PARENS_FLAG = RawFlags(1<<5) - -""" -Set for various delimited constructs when they contains a trailing comma. For -example, to distinguish `(a,b,)` vs `(a,b)`, and `f(a)` vs `f(a,)`. Kinds where -this applies are: `tuple call dotcall macrocall vect curly braces <: >:`. -""" -const TRAILING_COMMA_FLAG = RawFlags(1<<6) - -""" -Set for K"quote" for the short form `:x` as opposed to long form `quote x end` -""" -const COLON_QUOTE = RawFlags(1<<5) - -""" -Set for K"toplevel" which is delimited by parentheses -""" -const TOPLEVEL_SEMICOLONS_FLAG = RawFlags(1<<5) - -""" -Set for K"function" in short form definitions such as `f() = 1` -""" -const SHORT_FORM_FUNCTION_FLAG = RawFlags(1<<5) - -""" -Set for K"struct" when mutable -""" -const MUTABLE_FLAG = RawFlags(1<<5) - -""" -Set for K"module" when it's not bare (`module`, not `baremodule`) -""" -const BARE_MODULE_FLAG = RawFlags(1<<5) - """ Set for nodes that are non-terminals """ const NON_TERMINAL_FLAG = RawFlags(1<<7) -# Flags holding the dimension of an nrow or other UInt8 not held in the source -# TODO: Given this is only used for nrow/ncat, we could actually use all the flags? -const NUMERIC_FLAGS = RawFlags(RawFlags(0xff)<<8) - -function set_numeric_flags(n::Integer) - f = RawFlags((n << 8) & NUMERIC_FLAGS) - if numeric_flags(f) != n - error("Numeric flags unable to hold large integer $n") - end - f -end - -function call_type_flags(f::RawFlags) - f & 0b11000 -end - -function numeric_flags(f::RawFlags) - Int((f >> 8) % UInt8) -end - function remove_flags(n::RawFlags, fs...) RawFlags(n & ~(RawFlags((|)(fs...)))) end @@ -138,47 +56,6 @@ function Base.summary(head::SyntaxHead) untokenize(head, unique=false, include_flag_suff=false) end -function untokenize(head::SyntaxHead; unique=true, include_flag_suff=true) - str = (is_error(kind(head)) ? untokenize(kind(head); unique=false) : - untokenize(kind(head); unique=unique))::String - if is_dotted(head) - str = "."*str - end - if include_flag_suff - # Ignore DOTOP_FLAG - it's represented above with . prefix - is_trivia(head) && (str = str*"-t") - is_infix_op_call(head) && (str = str*"-i") - is_prefix_op_call(head) && (str = str*"-pre") - is_postfix_op_call(head) && (str = str*"-post") - - k = kind(head) - if k in KSet"string cmdstring Identifier" - has_flags(head, TRIPLE_STRING_FLAG) && (str = str*"-s") - has_flags(head, RAW_STRING_FLAG) && (str = str*"-r") - elseif k in KSet"tuple block macrocall" - has_flags(head, PARENS_FLAG) && (str = str*"-p") - elseif k == K"quote" - has_flags(head, COLON_QUOTE) && (str = str*"-:") - elseif k == K"toplevel" - has_flags(head, TOPLEVEL_SEMICOLONS_FLAG) && (str = str*"-;") - elseif k == K"function" - has_flags(head, SHORT_FORM_FUNCTION_FLAG) && (str = str*"-=") - elseif k == K"struct" - has_flags(head, MUTABLE_FLAG) && (str = str*"-mut") - elseif k == K"module" - has_flags(head, BARE_MODULE_FLAG) && (str = str*"-bare") - end - if k in KSet"tuple call dotcall macrocall vect curly braces <: >:" && - has_flags(head, TRAILING_COMMA_FLAG) - str *= "-," - end - is_suffixed(head) && (str = str*"-suf") - n = numeric_flags(head) - n != 0 && (str = str*"-"*string(n)) - end - str -end - #------------------------------------------------------------------------------- # Generic interface for types `T` which have kind and flags. Either: # 1. Define kind(::T) and flags(::T), or @@ -200,65 +77,6 @@ invisible to the parser (eg, whitespace) or implied by the structure of the AST """ is_trivia(x) = has_flags(x, TRIVIA_FLAG) -""" - is_prefix_call(x) - -Return true for normal prefix function call syntax such as the `f` call node -parsed from `f(x)`. -""" -is_prefix_call(x) = call_type_flags(x) == PREFIX_CALL_FLAG - -""" - is_infix_op_call(x) - -Return true for infix operator calls such as the `+` call node parsed from -`x + y`. -""" -is_infix_op_call(x) = call_type_flags(x) == INFIX_FLAG - -""" - is_prefix_op_call(x) - -Return true for prefix operator calls such as the `+` call node parsed from `+x`. -""" -is_prefix_op_call(x) = call_type_flags(x) == PREFIX_OP_FLAG - -""" - is_postfix_op_call(x) - -Return true for postfix operator calls such as the `'ᵀ` call node parsed from `x'ᵀ`. -""" -is_postfix_op_call(x) = call_type_flags(x) == POSTFIX_OP_FLAG - -""" - is_dotted(x) - -Return true for dotted syntax tokens -""" -is_dotted(x) = has_flags(x, DOTOP_FLAG) - -""" - is_suffixed(x) - -Return true for operators which have suffixes, such as `+₁` -""" -is_suffixed(x) = has_flags(x, SUFFIXED_FLAG) - -""" - is_decorated(x) - -Return true for operators which are decorated with a dot or suffix. -""" -is_decorated(x) = is_dotted(x) || is_suffixed(x) - -""" - numeric_flags(x) - -Return the number attached to a `SyntaxHead`. This is only for kinds `K"nrow"` -and `K"ncat"`, for now. -""" -numeric_flags(x) = numeric_flags(flags(x)) - #------------------------------------------------------------------------------- """ `SyntaxToken` is a token covering a contiguous byte range in the input text. @@ -280,7 +98,6 @@ function Base.show(io::IO, tok::SyntaxToken) end head(tok::SyntaxToken) = tok.head -flags(tok::SyntaxToken) = remove_flags(flags(tok.head), NUMERIC_FLAGS) preceding_whitespace(tok::SyntaxToken) = tok.preceding_whitespace @@ -962,45 +779,6 @@ function bump_glue(stream::ParseStream, kind, flags) return position(stream) end -""" - bump_split(stream, token_spec1, [token_spec2 ...]) - -Bump the next token, splitting it into several pieces - -Tokens are defined by a number of `token_spec` of shape `(nbyte, kind, flags)`. -If all `nbyte` are positive, the sum must equal the token length. If one -`nbyte` is negative, that token is given `tok_len + nbyte` bytes and the sum of -all `nbyte` must equal zero. - -This is a hack which helps resolves the occasional lexing ambiguity. For -example -* Whether .+ should be a single token or the composite (. +) which is used for - standalone operators. -* Whether ... is splatting (most of the time) or three . tokens in import paths - -TODO: Are these the only cases? Can we replace this general utility with a -simpler one which only splits preceding dots? -""" -function bump_split(stream::ParseStream, split_spec::Vararg{Any, N}) where {N} - tok = stream.lookahead[stream.lookahead_index] - stream.lookahead_index += 1 - start_b = _next_byte(stream) - toklen = tok.next_byte - start_b - prev_b = start_b - for (i, (nbyte, k, f)) in enumerate(split_spec) - h = SyntaxHead(k, f) - actual_nbyte = nbyte < 0 ? (toklen + nbyte) : nbyte - orig_k = k == K"." ? K"." : kind(tok) - node = RawGreenNode(h, actual_nbyte, orig_k) - push!(stream.output, node) - prev_b += actual_nbyte - stream.next_byte += actual_nbyte - end - @assert tok.next_byte == prev_b - stream.peek_count = 0 - return position(stream) -end - """ Reset kind or flags of an existing node in the output stream @@ -1129,98 +907,6 @@ function emit_diagnostic(diagnostics::AbstractVector{Diagnostic}, push!(diagnostics, Diagnostic(first(byterange), last(byterange); kws...)) end -#------------------------------------------------------------------------------- -# ParseStream Post-processing - -function validate_tokens(stream::ParseStream) - txtbuf = unsafe_textbuf(stream) - charbuf = IOBuffer() - - # Process terminal nodes in the output - fbyte = stream.output[1].byte_span+1 # Start after sentinel - for i = 2:length(stream.output) - node = stream.output[i] - if !is_terminal(node) || kind(node) == K"TOMBSTONE" - continue - end - - k = kind(node) - nbyte = fbyte + node.byte_span - tokrange = fbyte:nbyte-1 - error_kind = K"None" - - if k in KSet"Integer BinInt OctInt HexInt" - # The following shouldn't be able to error... - # parse_int_literal - # parse_uint_literal - elseif k == K"Float" || k == K"Float32" - underflow0 = false - if k == K"Float" - x, code = parse_float_literal(Float64, txtbuf, fbyte, nbyte) - # jl_strtod_c can return "underflow" even for valid cases such - # as `5e-324` where the source is an exact representation of - # `x`. So only warn when underflowing to zero. - underflow0 = code === :underflow && x == 0 - else - x, code = parse_float_literal(Float32, txtbuf, fbyte, nbyte) - underflow0 = code === :underflow && x == 0 - end - if code === :ok - # pass - elseif code === :overflow - emit_diagnostic(stream, tokrange, - error="overflow in floating point literal") - error_kind = K"ErrorNumericOverflow" - elseif underflow0 - emit_diagnostic(stream, tokrange, - warning="underflow to zero in floating point literal") - end - elseif k == K"Char" - @assert fbyte < nbyte # Already handled in the parser - truncate(charbuf, 0) - had_error = unescape_julia_string(charbuf, txtbuf, fbyte, - nbyte, stream.diagnostics) - if had_error - error_kind = K"ErrorInvalidEscapeSequence" - else - seek(charbuf,0) - read(charbuf, Char) - if !eof(charbuf) - error_kind = K"ErrorOverLongCharacter" - emit_diagnostic(stream, tokrange, - error="character literal contains multiple characters") - end - end - elseif k == K"String" && !has_flags(node, RAW_STRING_FLAG) - had_error = unescape_julia_string(devnull, txtbuf, fbyte, - nbyte, stream.diagnostics) - if had_error - error_kind = K"ErrorInvalidEscapeSequence" - end - elseif is_error(k) && k != K"error" - # Emit messages for non-generic token errors - tokstr = String(txtbuf[tokrange]) - msg = if k in KSet"ErrorInvisibleChar ErrorUnknownCharacter ErrorIdentifierStart" - "$(_token_error_descriptions[k]) $(repr(tokstr[1]))" - elseif k in KSet"ErrorInvalidUTF8 ErrorBidiFormatting" - "$(_token_error_descriptions[k]) $(repr(tokstr))" - else - _token_error_descriptions[k] - end - emit_diagnostic(stream, tokrange, error=msg) - end - - if error_kind != K"None" - # Update the node with new error kind - stream.output[i] = RawGreenNode(SyntaxHead(error_kind, EMPTY_FLAGS), - node.byte_span, node.orig_kind) - end - - fbyte = nbyte - end - sort!(stream.diagnostics, by=first_byte) -end - # Tree construction from the list of text ranges held by ParseStream # API for extracting results from ParseStream diff --git a/JuliaSyntax/src/source_files.jl b/JuliaSyntax/src/core/source_files.jl similarity index 100% rename from JuliaSyntax/src/source_files.jl rename to JuliaSyntax/src/core/source_files.jl diff --git a/JuliaSyntax/src/tree_cursors.jl b/JuliaSyntax/src/core/tree_cursors.jl similarity index 100% rename from JuliaSyntax/src/tree_cursors.jl rename to JuliaSyntax/src/core/tree_cursors.jl diff --git a/JuliaSyntax/src/expr.jl b/JuliaSyntax/src/integration/expr.jl similarity index 100% rename from JuliaSyntax/src/expr.jl rename to JuliaSyntax/src/integration/expr.jl diff --git a/JuliaSyntax/src/hooks.jl b/JuliaSyntax/src/integration/hooks.jl similarity index 100% rename from JuliaSyntax/src/hooks.jl rename to JuliaSyntax/src/integration/hooks.jl diff --git a/JuliaSyntax/src/julia/julia_parse_stream.jl b/JuliaSyntax/src/julia/julia_parse_stream.jl new file mode 100644 index 0000000000000..aab8a5472a331 --- /dev/null +++ b/JuliaSyntax/src/julia/julia_parse_stream.jl @@ -0,0 +1,315 @@ +# Token flags - may be set for operator kinded tokens +# Operator is dotted +const DOTOP_FLAG = RawFlags(1<<1) +# Operator has a suffix +const SUFFIXED_FLAG = RawFlags(1<<2) + +# Set for K"call", K"dotcall" or any syntactic operator heads +# Distinguish various syntaxes which are mapped to K"call" +const PREFIX_CALL_FLAG = RawFlags(0<<3) +const INFIX_FLAG = RawFlags(1<<3) +const PREFIX_OP_FLAG = RawFlags(2<<3) +const POSTFIX_OP_FLAG = RawFlags(3<<3) + +# The following flags are quite head-specific and may overlap + +""" +Set when K"string" or K"cmdstring" was triple-delimited as with \"\"\" or ``` +""" +const TRIPLE_STRING_FLAG = RawFlags(1<<5) + +""" +Set when a K"string", K"cmdstring" or K"Identifier" needs raw string unescaping +""" +const RAW_STRING_FLAG = RawFlags(1<<6) + +""" +Set for K"tuple", K"block" or K"macrocall" which are delimited by parentheses +""" +const PARENS_FLAG = RawFlags(1<<5) + +""" +Set for various delimited constructs when they contains a trailing comma. For +example, to distinguish `(a,b,)` vs `(a,b)`, and `f(a)` vs `f(a,)`. Kinds where +this applies are: `tuple call dotcall macrocall vect curly braces <: >:`. +""" +const TRAILING_COMMA_FLAG = RawFlags(1<<6) + +""" +Set for K"quote" for the short form `:x` as opposed to long form `quote x end` +""" +const COLON_QUOTE = RawFlags(1<<5) + +""" +Set for K"toplevel" which is delimited by parentheses +""" +const TOPLEVEL_SEMICOLONS_FLAG = RawFlags(1<<5) + +""" +Set for K"function" in short form definitions such as `f() = 1` +""" +const SHORT_FORM_FUNCTION_FLAG = RawFlags(1<<5) + +""" +Set for K"struct" when mutable +""" +const MUTABLE_FLAG = RawFlags(1<<5) + +""" +Set for K"module" when it's not bare (`module`, not `baremodule`) +""" +const BARE_MODULE_FLAG = RawFlags(1<<5) + +# Flags holding the dimension of an nrow or other UInt8 not held in the source +# TODO: Given this is only used for nrow/ncat, we could actually use all the flags? +const NUMERIC_FLAGS = RawFlags(RawFlags(0xff)<<8) + +function set_numeric_flags(n::Integer) + f = RawFlags((n << 8) & NUMERIC_FLAGS) + if numeric_flags(f) != n + error("Numeric flags unable to hold large integer $n") + end + f +end + +function call_type_flags(f::RawFlags) + f & 0b11000 +end + +function numeric_flags(f::RawFlags) + Int((f >> 8) % UInt8) +end + +flags(tok::SyntaxToken) = remove_flags(flags(tok.head), NUMERIC_FLAGS) + +""" + is_prefix_call(x) + +Return true for normal prefix function call syntax such as the `f` call node +parsed from `f(x)`. +""" +is_prefix_call(x) = call_type_flags(x) == PREFIX_CALL_FLAG + +""" + is_infix_op_call(x) + +Return true for infix operator calls such as the `+` call node parsed from +`x + y`. +""" +is_infix_op_call(x) = call_type_flags(x) == INFIX_FLAG + +""" + is_prefix_op_call(x) + +Return true for prefix operator calls such as the `+` call node parsed from `+x`. +""" +is_prefix_op_call(x) = call_type_flags(x) == PREFIX_OP_FLAG + +""" + is_postfix_op_call(x) + +Return true for postfix operator calls such as the `'ᵀ` call node parsed from `x'ᵀ`. +""" +is_postfix_op_call(x) = call_type_flags(x) == POSTFIX_OP_FLAG + +""" + is_dotted(x) + +Return true for dotted syntax tokens +""" +is_dotted(x) = has_flags(x, DOTOP_FLAG) + +""" + is_suffixed(x) + +Return true for operators which have suffixes, such as `+₁` +""" +is_suffixed(x) = has_flags(x, SUFFIXED_FLAG) + +""" + is_decorated(x) + +Return true for operators which are decorated with a dot or suffix. +""" +is_decorated(x) = is_dotted(x) || is_suffixed(x) + +""" + numeric_flags(x) + +Return the number attached to a `SyntaxHead`. This is only for kinds `K"nrow"` +and `K"ncat"`, for now. +""" +numeric_flags(x) = numeric_flags(flags(x)) + +function untokenize(head::SyntaxHead; unique=true, include_flag_suff=true) + str = (is_error(kind(head)) ? untokenize(kind(head); unique=false) : + untokenize(kind(head); unique=unique))::String + if is_dotted(head) + str = "."*str + end + if include_flag_suff + # Ignore DOTOP_FLAG - it's represented above with . prefix + is_trivia(head) && (str = str*"-t") + is_infix_op_call(head) && (str = str*"-i") + is_prefix_op_call(head) && (str = str*"-pre") + is_postfix_op_call(head) && (str = str*"-post") + + k = kind(head) + if k in KSet"string cmdstring Identifier" + has_flags(head, TRIPLE_STRING_FLAG) && (str = str*"-s") + has_flags(head, RAW_STRING_FLAG) && (str = str*"-r") + elseif k in KSet"tuple block macrocall" + has_flags(head, PARENS_FLAG) && (str = str*"-p") + elseif k == K"quote" + has_flags(head, COLON_QUOTE) && (str = str*"-:") + elseif k == K"toplevel" + has_flags(head, TOPLEVEL_SEMICOLONS_FLAG) && (str = str*"-;") + elseif k == K"function" + has_flags(head, SHORT_FORM_FUNCTION_FLAG) && (str = str*"-=") + elseif k == K"struct" + has_flags(head, MUTABLE_FLAG) && (str = str*"-mut") + elseif k == K"module" + has_flags(head, BARE_MODULE_FLAG) && (str = str*"-bare") + end + if k in KSet"tuple call dotcall macrocall vect curly braces <: >:" && + has_flags(head, TRAILING_COMMA_FLAG) + str *= "-," + end + is_suffixed(head) && (str = str*"-suf") + n = numeric_flags(head) + n != 0 && (str = str*"-"*string(n)) + end + str +end + + +#------------------------------------------------------------------------------- +# ParseStream Post-processing + +function validate_tokens(stream::ParseStream) + txtbuf = unsafe_textbuf(stream) + charbuf = IOBuffer() + + # Process terminal nodes in the output + fbyte = stream.output[1].byte_span+1 # Start after sentinel + for i = 2:length(stream.output) + node = stream.output[i] + if !is_terminal(node) || kind(node) == K"TOMBSTONE" + continue + end + + k = kind(node) + nbyte = fbyte + node.byte_span + tokrange = fbyte:nbyte-1 + error_kind = K"None" + + if k in KSet"Integer BinInt OctInt HexInt" + # The following shouldn't be able to error... + # parse_int_literal + # parse_uint_literal + elseif k == K"Float" || k == K"Float32" + underflow0 = false + if k == K"Float" + x, code = parse_float_literal(Float64, txtbuf, fbyte, nbyte) + # jl_strtod_c can return "underflow" even for valid cases such + # as `5e-324` where the source is an exact representation of + # `x`. So only warn when underflowing to zero. + underflow0 = code === :underflow && x == 0 + else + x, code = parse_float_literal(Float32, txtbuf, fbyte, nbyte) + underflow0 = code === :underflow && x == 0 + end + if code === :ok + # pass + elseif code === :overflow + emit_diagnostic(stream, tokrange, + error="overflow in floating point literal") + error_kind = K"ErrorNumericOverflow" + elseif underflow0 + emit_diagnostic(stream, tokrange, + warning="underflow to zero in floating point literal") + end + elseif k == K"Char" + @assert fbyte < nbyte # Already handled in the parser + truncate(charbuf, 0) + had_error = unescape_julia_string(charbuf, txtbuf, fbyte, + nbyte, stream.diagnostics) + if had_error + error_kind = K"ErrorInvalidEscapeSequence" + else + seek(charbuf,0) + read(charbuf, Char) + if !eof(charbuf) + error_kind = K"ErrorOverLongCharacter" + emit_diagnostic(stream, tokrange, + error="character literal contains multiple characters") + end + end + elseif k == K"String" && !has_flags(node, RAW_STRING_FLAG) + had_error = unescape_julia_string(devnull, txtbuf, fbyte, + nbyte, stream.diagnostics) + if had_error + error_kind = K"ErrorInvalidEscapeSequence" + end + elseif is_error(k) && k != K"error" + # Emit messages for non-generic token errors + tokstr = String(txtbuf[tokrange]) + msg = if k in KSet"ErrorInvisibleChar ErrorUnknownCharacter ErrorIdentifierStart" + "$(_token_error_descriptions[k]) $(repr(tokstr[1]))" + elseif k in KSet"ErrorInvalidUTF8 ErrorBidiFormatting" + "$(_token_error_descriptions[k]) $(repr(tokstr))" + else + _token_error_descriptions[k] + end + emit_diagnostic(stream, tokrange, error=msg) + end + + if error_kind != K"None" + # Update the node with new error kind + stream.output[i] = RawGreenNode(SyntaxHead(error_kind, EMPTY_FLAGS), + node.byte_span, node.orig_kind) + end + + fbyte = nbyte + end + sort!(stream.diagnostics, by=first_byte) +end + +""" + bump_split(stream, token_spec1, [token_spec2 ...]) + +Bump the next token, splitting it into several pieces + +Tokens are defined by a number of `token_spec` of shape `(nbyte, kind, flags)`. +If all `nbyte` are positive, the sum must equal the token length. If one +`nbyte` is negative, that token is given `tok_len + nbyte` bytes and the sum of +all `nbyte` must equal zero. + +This is a hack which helps resolves the occasional lexing ambiguity. For +example +* Whether .+ should be a single token or the composite (. +) which is used for + standalone operators. +* Whether ... is splatting (most of the time) or three . tokens in import paths + +TODO: Are these the only cases? Can we replace this general utility with a +simpler one which only splits preceding dots? +""" +function bump_split(stream::ParseStream, split_spec::Vararg{Any, N}) where {N} + tok = stream.lookahead[stream.lookahead_index] + stream.lookahead_index += 1 + start_b = _next_byte(stream) + toklen = tok.next_byte - start_b + prev_b = start_b + for (i, (nbyte, k, f)) in enumerate(split_spec) + h = SyntaxHead(k, f) + actual_nbyte = nbyte < 0 ? (toklen + nbyte) : nbyte + orig_k = k == K"." ? K"." : kind(tok) + node = RawGreenNode(h, actual_nbyte, orig_k) + push!(stream.output, node) + prev_b += actual_nbyte + stream.next_byte += actual_nbyte + end + @assert tok.next_byte == prev_b + stream.peek_count = 0 + return position(stream) +end diff --git a/JuliaSyntax/src/kinds.jl b/JuliaSyntax/src/julia/kinds.jl similarity index 100% rename from JuliaSyntax/src/kinds.jl rename to JuliaSyntax/src/julia/kinds.jl diff --git a/JuliaSyntax/src/literal_parsing.jl b/JuliaSyntax/src/julia/literal_parsing.jl similarity index 100% rename from JuliaSyntax/src/literal_parsing.jl rename to JuliaSyntax/src/julia/literal_parsing.jl diff --git a/JuliaSyntax/src/parser.jl b/JuliaSyntax/src/julia/parser.jl similarity index 100% rename from JuliaSyntax/src/parser.jl rename to JuliaSyntax/src/julia/parser.jl diff --git a/JuliaSyntax/src/parser_api.jl b/JuliaSyntax/src/julia/parser_api.jl similarity index 100% rename from JuliaSyntax/src/parser_api.jl rename to JuliaSyntax/src/julia/parser_api.jl diff --git a/JuliaSyntax/src/tokenize.jl b/JuliaSyntax/src/julia/tokenize.jl similarity index 100% rename from JuliaSyntax/src/tokenize.jl rename to JuliaSyntax/src/julia/tokenize.jl diff --git a/JuliaSyntax/src/green_node.jl b/JuliaSyntax/src/porcelain/green_node.jl similarity index 100% rename from JuliaSyntax/src/green_node.jl rename to JuliaSyntax/src/porcelain/green_node.jl diff --git a/JuliaSyntax/src/syntax_tree.jl b/JuliaSyntax/src/porcelain/syntax_tree.jl similarity index 100% rename from JuliaSyntax/src/syntax_tree.jl rename to JuliaSyntax/src/porcelain/syntax_tree.jl diff --git a/JuliaSyntax/src/precompile.jl b/JuliaSyntax/src/precompile.jl index 5a80d92d6837c..9fb71c74d7132 100644 --- a/JuliaSyntax/src/precompile.jl +++ b/JuliaSyntax/src/precompile.jl @@ -1,5 +1,5 @@ # Just parse some file as a precompile workload -let filename = joinpath(@__DIR__, "literal_parsing.jl") +let filename = joinpath(@__DIR__, "julia/literal_parsing.jl") text = read(filename, String) parseall(Expr, text) parseall(SyntaxNode, text) From 590da3a483c0b8f6e5612d7f7d81f55b9c89bee7 Mon Sep 17 00:00:00 2001 From: Sebastian Pfitzner Date: Tue, 24 Jun 2025 02:02:22 +0200 Subject: [PATCH 1012/1109] fix: skip zero-width error tokens in last_non_whitespace_byte (JuliaLang/JuliaSyntax.jl#566) to ensure that `let x = 1 # comment` is treated as an incomplete expression --- JuliaSyntax/src/core/parse_stream.jl | 5 +++-- JuliaSyntax/test/hooks.jl | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/JuliaSyntax/src/core/parse_stream.jl b/JuliaSyntax/src/core/parse_stream.jl index fd66b2b44b8ae..d82a601f4029a 100644 --- a/JuliaSyntax/src/core/parse_stream.jl +++ b/JuliaSyntax/src/core/parse_stream.jl @@ -970,10 +970,11 @@ function last_non_whitespace_byte(stream::ParseStream) for i = length(stream.output):-1:1 node = stream.output[i] if is_terminal(node) - if !(kind(node) in KSet"Comment Whitespace NewlineWs ErrorEofMultiComment") + if kind(node) in KSet"Comment Whitespace NewlineWs ErrorEofMultiComment" || kind(node) == K"error" && node.byte_span == 0 + byte_pos -= node.byte_span + else return byte_pos - 1 end - byte_pos -= node.byte_span end end return first_byte(stream) - 1 diff --git a/JuliaSyntax/test/hooks.jl b/JuliaSyntax/test/hooks.jl index c41d2dacf5482..333344d7c50a1 100644 --- a/JuliaSyntax/test/hooks.jl +++ b/JuliaSyntax/test/hooks.jl @@ -497,6 +497,7 @@ end "Issue53126()." => :other "using " => :other "global xxx::Number = Base." => :other + "let x = 1 # comment" => :other ] @testset "$(repr(str))" begin # Test :statement parsing From 4de06c23fbd334bdbbc6b0cc946fbba1e8c5c410 Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Tue, 1 Jul 2025 20:43:40 -0400 Subject: [PATCH 1013/1109] Remove DOTOP_FLAG from tokenizer and parser (JuliaLang/JuliaSyntax.jl#568) This implements the first of a series of AST format changes described in JuliaLang/JuliaSyntax.jl#567. In particular, this removes the DOTOP_FLAG. Currently, we already do not emit the DOTOP_FLAG on terminals, they always get split into one dot token and one identifier token (although we did set it on the intermediate tokens that came out of the lexer). The only four kinds for which DOTOP_FLAG was ever set in the final AST were `=`, `op=`, `&&` and `||`. This introduces separate head kinds for each of these (similar to how there are already separate head calls for `dotcall` and `dot). Otherwise the AST structure should be unchanged. --- JuliaSyntax/LICENSE.md | 6 +- JuliaSyntax/src/core/parse_stream.jl | 1 - JuliaSyntax/src/integration/expr.jl | 10 +- JuliaSyntax/src/julia/julia_parse_stream.jl | 47 ++-- JuliaSyntax/src/julia/kinds.jl | 4 + JuliaSyntax/src/julia/parser.jl | 277 ++++++++++---------- JuliaSyntax/src/julia/tokenize.jl | 98 +------ JuliaSyntax/test/diagnostics.jl | 2 +- JuliaSyntax/test/parser.jl | 12 +- JuliaSyntax/test/tokenize.jl | 54 ++-- 10 files changed, 238 insertions(+), 273 deletions(-) diff --git a/JuliaSyntax/LICENSE.md b/JuliaSyntax/LICENSE.md index bfb0e81bccb04..88fc63f3a342a 100644 --- a/JuliaSyntax/LICENSE.md +++ b/JuliaSyntax/LICENSE.md @@ -1,17 +1,17 @@ The JuliaSyntax.jl package is licensed under the MIT "Expat" License: > Copyright (c) 2021 Julia Computing and contributors -> +> > Permission is hereby granted, free of charge, to any person obtaining a copy > of this software and associated documentation files (the "Software"), to deal > in the Software without restriction, including without limitation the rights > to use, copy, modify, merge, publish, distribute, sublicense, and/or sell > copies of the Software, and to permit persons to whom the Software is > furnished to do so, subject to the following conditions: -> +> > The above copyright notice and this permission notice shall be included in all > copies or substantial portions of the Software. -> +> > THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE diff --git a/JuliaSyntax/src/core/parse_stream.jl b/JuliaSyntax/src/core/parse_stream.jl index d82a601f4029a..da4d70ccf1086 100644 --- a/JuliaSyntax/src/core/parse_stream.jl +++ b/JuliaSyntax/src/core/parse_stream.jl @@ -376,7 +376,6 @@ function _buffer_lookahead_tokens(lexer, lookahead) was_whitespace = is_whitespace(k) had_whitespace |= was_whitespace f = EMPTY_FLAGS - raw.dotop && (f |= DOTOP_FLAG) raw.suffix && (f |= SUFFIXED_FLAG) push!(lookahead, SyntaxToken(SyntaxHead(k, f), k, had_whitespace, raw.endbyte + 2)) diff --git a/JuliaSyntax/src/integration/expr.jl b/JuliaSyntax/src/integration/expr.jl index 7387127174f72..ff9b955c8fe26 100644 --- a/JuliaSyntax/src/integration/expr.jl +++ b/JuliaSyntax/src/integration/expr.jl @@ -313,9 +313,13 @@ end op = args[2] rhs = args[3] headstr = string(args[2], '=') - if is_dotted(nodehead) - headstr = '.'*headstr - end + retexpr.head = Symbol(headstr) + retexpr.args = Any[lhs, rhs] + elseif k == K".op=" && length(args) == 3 + lhs = args[1] + op = args[2] + rhs = args[3] + headstr = '.' * string(args[2], '=') retexpr.head = Symbol(headstr) retexpr.args = Any[lhs, rhs] elseif k == K"macrocall" diff --git a/JuliaSyntax/src/julia/julia_parse_stream.jl b/JuliaSyntax/src/julia/julia_parse_stream.jl index aab8a5472a331..fc3eac28f3783 100644 --- a/JuliaSyntax/src/julia/julia_parse_stream.jl +++ b/JuliaSyntax/src/julia/julia_parse_stream.jl @@ -1,6 +1,4 @@ # Token flags - may be set for operator kinded tokens -# Operator is dotted -const DOTOP_FLAG = RawFlags(1<<1) # Operator has a suffix const SUFFIXED_FLAG = RawFlags(1<<2) @@ -112,12 +110,6 @@ Return true for postfix operator calls such as the `'ᵀ` call node parsed from """ is_postfix_op_call(x) = call_type_flags(x) == POSTFIX_OP_FLAG -""" - is_dotted(x) - -Return true for dotted syntax tokens -""" -is_dotted(x) = has_flags(x, DOTOP_FLAG) """ is_suffixed(x) @@ -126,12 +118,6 @@ Return true for operators which have suffixes, such as `+₁` """ is_suffixed(x) = has_flags(x, SUFFIXED_FLAG) -""" - is_decorated(x) - -Return true for operators which are decorated with a dot or suffix. -""" -is_decorated(x) = is_dotted(x) || is_suffixed(x) """ numeric_flags(x) @@ -144,11 +130,7 @@ numeric_flags(x) = numeric_flags(flags(x)) function untokenize(head::SyntaxHead; unique=true, include_flag_suff=true) str = (is_error(kind(head)) ? untokenize(kind(head); unique=false) : untokenize(kind(head); unique=unique))::String - if is_dotted(head) - str = "."*str - end if include_flag_suff - # Ignore DOTOP_FLAG - it's represented above with . prefix is_trivia(head) && (str = str*"-t") is_infix_op_call(head) && (str = str*"-i") is_prefix_op_call(head) && (str = str*"-pre") @@ -313,3 +295,32 @@ function bump_split(stream::ParseStream, split_spec::Vararg{Any, N}) where {N} stream.peek_count = 0 return position(stream) end + +function peek_dotted_op_token(ps, allow_whitespace=false) + # Peek the next token, but if it is a dot, peek the next one as well + t = peek_token(ps) + isdotted = kind(t) == K"." + if isdotted + t2 = peek_token(ps, 2) + if !is_operator(t2) || (!allow_whitespace && preceding_whitespace(t2)) + isdotted = false + else + t = t2 + end + end + return (isdotted, t) +end + +function bump_dotted(ps, isdot, flags=EMPTY_FLAGS; emit_dot_node=false, remap_kind=K"None") + if isdot + if emit_dot_node + dotmark = position(ps) + bump(ps, TRIVIA_FLAG) # TODO: NOTATION_FLAG + else + bump(ps, TRIVIA_FLAG) # TODO: NOTATION_FLAG + end + end + pos = bump(ps, flags, remap_kind=remap_kind) + isdot && emit_dot_node && (pos = emit(ps, dotmark, K".")) + return pos +end diff --git a/JuliaSyntax/src/julia/kinds.jl b/JuliaSyntax/src/julia/kinds.jl index e95b257bdb378..8a20a2b82f42e 100644 --- a/JuliaSyntax/src/julia/kinds.jl +++ b/JuliaSyntax/src/julia/kinds.jl @@ -293,7 +293,9 @@ register_kinds!(JuliaSyntax, 0, [ "BEGIN_ASSIGNMENTS" "BEGIN_SYNTACTIC_ASSIGNMENTS" "=" + ".=" "op=" # Updating assignment operator ( $= %= &= *= += -= //= /= <<= >>= >>>= \= ^= |= ÷= ⊻= ) + ".op=" ":=" "END_SYNTACTIC_ASSIGNMENTS" "~" @@ -470,11 +472,13 @@ register_kinds!(JuliaSyntax, 0, [ # Level 4 "BEGIN_LAZYOR" "||" + ".||" "END_LAZYOR" # Level 5 "BEGIN_LAZYAND" "&&" + ".&&" "END_LAZYAND" # Level 6 diff --git a/JuliaSyntax/src/julia/parser.jl b/JuliaSyntax/src/julia/parser.jl index d593fe0b64e81..a1a0a96b09077 100644 --- a/JuliaSyntax/src/julia/parser.jl +++ b/JuliaSyntax/src/julia/parser.jl @@ -222,7 +222,7 @@ end # All these take either a raw kind or a token. function is_plain_equals(t) - kind(t) == K"=" && !is_decorated(t) + kind(t) == K"=" && !is_suffixed(t) end function is_closing_token(ps::ParseState, k) @@ -278,25 +278,25 @@ function is_syntactic_unary_op(k) kind(k) in KSet"$ & ::" end -function is_type_operator(t) - kind(t) in KSet"<: >:" && !is_dotted(t) +function is_type_operator(t, isdot) + kind(t) in KSet"<: >:" && !isdot end -function is_unary_op(t) +function is_unary_op(t, isdot) k = kind(t) !is_suffixed(t) && ( - (k in KSet"<: >:" && !is_dotted(t)) || + (k in KSet"<: >:" && !isdot) || k in KSet"+ - ! ~ ¬ √ ∛ ∜ ⋆ ± ∓" # dotop allowed ) end # Operators that are both unary and binary -function is_both_unary_and_binary(t) +function is_both_unary_and_binary(t, isdot) k = kind(t) # Preventing is_suffixed here makes this consistent with the flisp parser. # But is this by design or happenstance? !is_suffixed(t) && ( - k in KSet"+ - ⋆ ± ∓" || (k in KSet"$ & ~" && !is_dotted(t)) + k in KSet"+ - ⋆ ± ∓" || (k in KSet"$ & ~" && !isdot) ) end @@ -333,28 +333,6 @@ function was_eventually_call(ps::ParseState) end end -function bump_dotsplit(ps, flags=EMPTY_FLAGS; - emit_dot_node::Bool=false, remap_kind::Kind=K"None") - t = peek_token(ps) - if is_dotted(t) - bump_trivia(ps) - mark = position(ps) - k = remap_kind != K"None" ? remap_kind : kind(t) - # Split the dotted operator into . and the operator - # First split emits the . token (1 byte) at position mark.node_index+1 - pos = bump_split(ps, (1, K".", TRIVIA_FLAG), (-1, k, flags)) - if emit_dot_node - pos = emit(ps, mark, K".") - end - else - if remap_kind != K"None" - pos = bump(ps, remap_kind=remap_kind) - else - pos = bump(ps) - end - end - return pos -end #------------------------------------------------------------------------------- # Parser @@ -374,11 +352,13 @@ end function parse_LtoR(ps::ParseState, down, is_op) mark = position(ps) down(ps) - while is_op(peek(ps)) - t = peek_token(ps) - bump_dotsplit(ps, remap_kind=K"Identifier") + while true + isdot, tk = peek_dotted_op_token(ps) + is_op(tk) || break + isdot && bump(ps, TRIVIA_FLAG) # TODO: NOTATION_FLAG + bump(ps, remap_kind=K"Identifier") down(ps) - emit(ps, mark, is_dotted(t) ? K"dotcall" : K"call", INFIX_FLAG) + emit(ps, mark, isdot ? K"dotcall" : K"call", INFIX_FLAG) end end @@ -389,11 +369,11 @@ end function parse_RtoL(ps::ParseState, down, is_op, self) mark = position(ps) down(ps) - t = peek_token(ps) - if is_op(kind(t)) - bump_dotsplit(ps, remap_kind=K"Identifier") + isdot, tk = peek_dotted_op_token(ps) + if is_op(tk) + bump_dotted(ps, isdot, remap_kind=K"Identifier") self(ps) - emit(ps, mark, is_dotted(t) ? K"dotcall" : K"call", INFIX_FLAG) + emit(ps, mark, isdot ? K"dotcall" : K"call", INFIX_FLAG) end end @@ -601,7 +581,7 @@ function parse_assignment(ps::ParseState, down) end function parse_assignment_with_initial_ex(ps::ParseState, mark, down::T) where {T} # where => specialize on `down` - t = peek_token(ps) + isdot, t = peek_dotted_op_token(ps) k = kind(t) if !is_prec_assignment(k) return @@ -618,38 +598,33 @@ function parse_assignment_with_initial_ex(ps::ParseState, mark, down::T) where { # a .~ b ==> (dotcall-i a ~ b) # [a ~ b c] ==> (hcat (call-i a ~ b) c) # [a~b] ==> (vect (call-i a ~ b)) - bump_dotsplit(ps, remap_kind=K"Identifier") + bump_dotted(ps, isdot, remap_kind=K"Identifier") bump_trivia(ps) parse_assignment(ps, down) - emit(ps, mark, is_dotted(t) ? K"dotcall" : K"call", INFIX_FLAG) + emit(ps, mark, isdot ? K"dotcall" : K"call", INFIX_FLAG) else # f() = 1 ==> (function-= (call f) 1) # f() .= 1 ==> (.= (call f) 1) # a += b ==> (+= a b) # a .= b ==> (.= a b) - is_short_form_func = k == K"=" && !is_dotted(t) && was_eventually_call(ps) + is_short_form_func = k == K"=" && !isdot && was_eventually_call(ps) if k == K"op=" # x += y ==> (op= x + y) # x .+= y ==> (.op= x + y) bump_trivia(ps) - if is_dotted(t) - bump_split(ps, (1, K".", TRIVIA_FLAG), - (-2, K"Identifier", EMPTY_FLAGS), # op - (1, K"=", TRIVIA_FLAG)) - else - bump_split(ps, - (-1, K"Identifier", EMPTY_FLAGS), # op - (1, K"=", TRIVIA_FLAG)) - end + isdot && bump(ps, TRIVIA_FLAG) # TODO: NOTATION_FLAG + bump_split(ps, + (-1, K"Identifier", EMPTY_FLAGS), # op + (1, K"=", TRIVIA_FLAG)) else - bump(ps, TRIVIA_FLAG) + bump_dotted(ps, isdot, TRIVIA_FLAG) end bump_trivia(ps) # Syntax Edition TODO: We'd like to call `down` here when # is_short_form_func is true, to prevent `f() = 1 = 2` from parsing. parse_assignment(ps, down) emit(ps, mark, - is_short_form_func ? K"function" : k, + is_short_form_func ? K"function" : (isdot ? dotted(k) : k), is_short_form_func ? SHORT_FORM_FUNCTION_FLAG : flags(t)) end end @@ -755,10 +730,10 @@ end function parse_arrow(ps::ParseState) mark = position(ps) parse_or(ps) - t = peek_token(ps) + isdot, t = peek_dotted_op_token(ps) k = kind(t) if is_prec_arrow(k) - if kind(t) == K"-->" && !is_decorated(t) + if kind(t) == K"-->" && !isdot && !is_suffixed(t) # x --> y ==> (--> x y) # The only syntactic arrow bump(ps, TRIVIA_FLAG) parse_arrow(ps) @@ -768,24 +743,38 @@ function parse_arrow(ps::ParseState) # x <--> y ==> (call-i x <--> y) # x .--> y ==> (dotcall-i x --> y) # x -->₁ y ==> (call-i x -->₁ y) - bump_dotsplit(ps, remap_kind=K"Identifier") + bump_dotted(ps, isdot, remap_kind=K"Identifier") parse_arrow(ps) - emit(ps, mark, is_dotted(t) ? K"dotcall" : K"call", INFIX_FLAG) + emit(ps, mark, isdot ? K"dotcall" : K"call", INFIX_FLAG) end end end +function dotted(k) + if k == K"||" + return K".||" + elseif k == K"&&" + return K".&&" + elseif k == K"=" + return K".=" + elseif k == K"op=" + return K".op=" + else + error("Unexpected dotted operator: $k") + end +end + # Like parse_RtoL, but specialized for the version test of dotted operators. function parse_lazy_cond(ps::ParseState, down, is_op, self) mark = position(ps) down(ps) - t = peek_token(ps) + (isdot, t) = peek_dotted_op_token(ps) k = kind(t) if is_op(k) - bump(ps, TRIVIA_FLAG) + bump_dotted(ps, isdot, TRIVIA_FLAG) self(ps) - emit(ps, mark, k, flags(t)) - if is_dotted(t) + emit(ps, mark, isdot ? dotted(k) : k, flags(t)) + if isdot min_supported_version(v"1.7", ps, mark, "dotted operators `.||` and `.&&`") end end @@ -826,15 +815,15 @@ function parse_comparison(ps::ParseState, subtype_comparison=false) n_comparisons = 0 op_pos = NO_POSITION op_dotted = false - initial_tok = peek_token(ps) - while (t = peek_token(ps); is_prec_comparison(t)) + (initial_dot, initial_tok) = peek_dotted_op_token(ps) + while ((isdot, t) = peek_dotted_op_token(ps); is_prec_comparison(t)) n_comparisons += 1 - op_dotted = is_dotted(t) - op_pos = bump_dotsplit(ps, emit_dot_node=true, remap_kind=K"Identifier") + op_dotted = isdot + op_pos = bump_dotted(ps, isdot, emit_dot_node=true, remap_kind=K"Identifier") parse_pipe_lt(ps) end if n_comparisons == 1 - if is_type_operator(initial_tok) + if is_type_operator(initial_tok, initial_dot) # Type comparisons are syntactic # x <: y ==> (<: x y) # x >: y ==> (>: x y) @@ -845,10 +834,10 @@ function parse_comparison(ps::ParseState, subtype_comparison=false) # x < y ==> (call-i x < y) # x .< y ==> (dotcall-i x < y) if op_dotted - # x .<: y ==> (dotcall-i x <: y) + # Reset the extra (non-terminal) K"." (e.g. in `(. <)`) node to just `. <` reset_node!(ps, op_pos, kind=K"TOMBSTONE", flags=TRIVIA_FLAG) end - emit(ps, mark, is_dotted(initial_tok) ? K"dotcall" : K"call", INFIX_FLAG) + emit(ps, mark, op_dotted ? K"dotcall" : K"call", INFIX_FLAG) end elseif n_comparisons > 1 # Comparison chains @@ -884,15 +873,15 @@ end function parse_range(ps::ParseState) mark = position(ps) parse_invalid_ops(ps) - initial_tok = peek_token(ps) + (initial_dot, initial_tok) = peek_dotted_op_token(ps) initial_kind = kind(initial_tok) if initial_kind != K":" && is_prec_colon(initial_kind) # a..b ==> (call-i a .. b) # a … b ==> (call-i a … b) # a .… b ==> (dotcall-i a … b) - bump_dotsplit(ps, remap_kind=K"Identifier") + bump_dotted(ps, initial_dot, remap_kind=K"Identifier") parse_invalid_ops(ps) - emit(ps, mark, is_dotted(initial_tok) ? K"dotcall" : K"call", INFIX_FLAG) + emit(ps, mark, initial_dot ? K"dotcall" : K"call", INFIX_FLAG) elseif initial_kind == K":" && ps.range_colon_enabled # a ? b : c:d ==> (? a b (call-i c : d)) n_colons = 0 @@ -974,11 +963,11 @@ end function parse_invalid_ops(ps::ParseState) mark = position(ps) parse_expr(ps) - while (t = peek_token(ps); kind(t) in KSet"ErrorInvalidOperator Error**") + while ((isdot, t) = peek_dotted_op_token(ps); kind(t) in KSet"ErrorInvalidOperator Error**") bump_trivia(ps) - bump_dotsplit(ps) + bump_dotted(ps, isdot) parse_expr(ps) - emit(ps, mark, is_dotted(t) ? K"dotcall" : K"call", INFIX_FLAG) + emit(ps, mark, isdot ? K"dotcall" : K"call", INFIX_FLAG) end end @@ -1004,9 +993,9 @@ end function parse_with_chains(ps::ParseState, down, is_op, chain_ops) mark = position(ps) down(ps) - while (t = peek_token(ps); is_op(kind(t))) + while ((isdot, t) = peek_dotted_op_token(ps); is_op(kind(t))) if ps.space_sensitive && preceding_whitespace(t) && - is_both_unary_and_binary(t) && + is_both_unary_and_binary(t, isdot) && !preceding_whitespace(peek_token(ps, 2)) # The following is two elements of a hcat # [x +y] ==> (hcat x (call-pre + y)) @@ -1017,16 +1006,16 @@ function parse_with_chains(ps::ParseState, down, is_op, chain_ops) # [x+y + z] ==> (vect (call-i x + y z)) break end - bump_dotsplit(ps, remap_kind=K"Identifier") + bump_dotted(ps, isdot, remap_kind=K"Identifier") down(ps) - if kind(t) in chain_ops && !is_decorated(t) + if kind(t) in chain_ops && !is_suffixed(t) && !isdot # a + b + c ==> (call-i a + b c) # a + b .+ c ==> (dotcall-i (call-i a + b) + c) parse_chain(ps, down, kind(t)) end # a +₁ b +₁ c ==> (call-i (call-i a +₁ b) +₁ c) # a .+ b .+ c ==> (dotcall-i (dotcall-i a + b) + c) - emit(ps, mark, is_dotted(t) ? K"dotcall" : K"call", INFIX_FLAG) + emit(ps, mark, isdot ? K"dotcall" : K"call", INFIX_FLAG) end end @@ -1034,9 +1023,13 @@ end # # flisp: parse-chain function parse_chain(ps::ParseState, down, op_kind) - while (t = peek_token(ps); kind(t) == op_kind && !is_decorated(t)) + while true + isdot, t = peek_dotted_op_token(ps) + if kind(t) != op_kind || is_suffixed(t) || isdot + break + end if ps.space_sensitive && preceding_whitespace(t) && - is_both_unary_and_binary(t) && + is_both_unary_and_binary(t, false) && !preceding_whitespace(peek_token(ps, 2)) # [x +y] ==> (hcat x (call-pre + y)) break @@ -1063,7 +1056,7 @@ end # flisp: parse-unary-subtype function parse_unary_subtype(ps::ParseState) t = peek_token(ps) - if is_type_operator(t) + if is_type_operator(t, false) k2 = peek(ps, 2) if is_closing_token(ps, k2) || k2 in KSet"NewlineWs =" # return operator by itself @@ -1198,13 +1191,13 @@ end function parse_unary(ps::ParseState) mark = position(ps) bump_trivia(ps) - op_t = peek_token(ps) + (op_dotted, op_t) = peek_dotted_op_token(ps) op_k = kind(op_t) if ( !is_operator(op_k) || is_word_operator(op_k) || (op_k in KSet": ' .'") || - (is_syntactic_unary_op(op_k) && !is_dotted(op_t)) || + (is_syntactic_unary_op(op_k) && !op_dotted) || is_syntactic_operator(op_k) ) # `op_t` is not an initial operator @@ -1214,9 +1207,9 @@ function parse_unary(ps::ParseState) parse_factor(ps) return end - t2 = peek_token(ps, 2) + t2 = peek_token(ps, 2+op_dotted) k2 = kind(t2) - if op_k in KSet"- +" && !is_decorated(op_t) + if op_k in KSet"- +" && !is_suffixed(op_t) && !op_dotted if !preceding_whitespace(t2) && (k2 in KSet"Integer Float Float32" || (op_k == K"+" && k2 in KSet"BinInt HexInt OctInt")) @@ -1251,7 +1244,7 @@ function parse_unary(ps::ParseState) # .+ ==> (. +) # .& ==> (. &) parse_atom(ps) - elseif k2 == K"{" || (!is_unary_op(op_t) && k2 == K"(") + elseif k2 == K"{" || (!is_unary_op(op_t, op_dotted) && k2 == K"(") # Call with type parameters or non-unary prefix call # +{T}(x::T) ==> (call (curly + T) (:: x T)) # *(x) ==> (call * x) @@ -1265,7 +1258,7 @@ function parse_unary(ps::ParseState) # # (The flisp parser only considers commas before `;` and thus gets this # last case wrong) - op_pos = bump_dotsplit(ps, emit_dot_node=true, remap_kind=K"Identifier") + op_pos = bump_dotted(ps, op_dotted, emit_dot_node=true, remap_kind=K"Identifier") space_before_paren = preceding_whitespace(t2) if space_before_paren @@ -1309,7 +1302,7 @@ function parse_unary(ps::ParseState) # Prefix calls have higher precedence than ^ # +(a,b)^2 ==> (call-i (call + a b) ^ 2) # +(a,b)(x)^2 ==> (call-i (call (call + a b) x) ^ 2) - if is_type_operator(op_t) + if is_type_operator(op_t, op_dotted) # <:(a,) ==> (<: a) emit(ps, mark, op_k, opts.delim_flags) reset_node!(ps, op_pos, flags=TRIVIA_FLAG, kind=op_k) @@ -1335,13 +1328,14 @@ function parse_unary(ps::ParseState) # +(a)(x,y)^2 ==> (call-pre + (call-i (call (parens a) x y) ^ 2)) parse_call_chain(ps, mark_before_paren) parse_factor_with_initial_ex(ps, mark_before_paren) - if is_type_operator(op_t) + if is_type_operator(op_t, op_dotted) # <:(a) ==> (<:-pre (parens a)) emit(ps, mark, op_k, PREFIX_OP_FLAG) reset_node!(ps, op_pos, flags=TRIVIA_FLAG, kind=op_k) else - if is_dotted(op_t) + if op_dotted emit(ps, mark, K"dotcall", PREFIX_OP_FLAG) + # Reset the extra (non-terminal) K"." (e.g. in `(. +)`) node to just `. +` reset_node!(ps, op_pos, kind=K"TOMBSTONE") else emit(ps, mark, K"call", PREFIX_OP_FLAG) @@ -1349,7 +1343,7 @@ function parse_unary(ps::ParseState) end end else - if is_unary_op(op_t) + if is_unary_op(op_t, op_dotted) # Normal unary calls # +x ==> (call-pre + x) # √x ==> (call-pre √ x) @@ -1358,20 +1352,20 @@ function parse_unary(ps::ParseState) # -0x1 ==> (call-pre - 0x01) # - 2 ==> (call-pre - 2) # .-2 ==> (dotcall-pre - 2) - op_pos = bump_dotsplit(ps, EMPTY_FLAGS, remap_kind=K"Identifier") + op_pos = bump_dotted(ps, op_dotted, remap_kind=K"Identifier") else # /x ==> (call-pre (error /) x) # +₁ x ==> (call-pre (error +₁) x) # .<: x ==> (dotcall-pre (error (. <:)) x) - bump_dotsplit(ps, EMPTY_FLAGS, emit_dot_node=true, remap_kind=K"Identifier") + bump_dotted(ps, op_dotted, emit_dot_node=true, remap_kind=K"Identifier") op_pos = emit(ps, mark, K"error", error="not a unary operator") end parse_unary(ps) - if is_type_operator(op_t) + if is_type_operator(op_t, op_dotted) reset_node!(ps, op_pos, flags=TRIVIA_FLAG) emit(ps, mark, op_k, PREFIX_OP_FLAG) else - emit(ps, mark, is_dotted(op_t) ? K"dotcall" : K"call", PREFIX_OP_FLAG) + emit(ps, mark, op_dotted ? K"dotcall" : K"call", PREFIX_OP_FLAG) end end end @@ -1393,10 +1387,10 @@ end # flisp: parse-factor-with-initial-ex function parse_factor_with_initial_ex(ps::ParseState, mark) parse_decl_with_initial_ex(ps, mark) - if (t = peek_token(ps); is_prec_power(kind(t))) - bump_dotsplit(ps, remap_kind=K"Identifier") + if ((isdot, t) = peek_dotted_op_token(ps); is_prec_power(kind(t))) + bump_dotted(ps, isdot, remap_kind=K"Identifier") parse_factor_after(ps) - emit(ps, mark, is_dotted(t) ? K"dotcall" : K"call", INFIX_FLAG) + emit(ps, mark, isdot ? K"dotcall" : K"call", INFIX_FLAG) end end @@ -1465,9 +1459,9 @@ end # flisp: parse-unary-prefix function parse_unary_prefix(ps::ParseState, has_unary_prefix=false) mark = position(ps) - t = peek_token(ps) + (isdot, t) = peek_dotted_op_token(ps) k = kind(t) - if is_syntactic_unary_op(k) && !is_dotted(t) + if is_syntactic_unary_op(k) && !isdot k2 = peek(ps, 2) if k in KSet"& $" && (is_closing_token(ps, k2) || k2 == K"NewlineWs") # &) ==> & @@ -1631,6 +1625,13 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) check_ncat_compat(ps, mark, ckind) end elseif k == K"." + # Check if this is a dotted operator, not field access + k2 = peek(ps, 2) + if is_operator(k2) && !is_word_operator(k2) && k2 != K":" && k2 != K"$" && k2 != K"'" && k2 != K"?" + # This is a dotted operator like .=, .+, etc., not field access + # Let the appropriate parser handle it + break + end # x .y ==> (. x (error-t) y) bump_disallowed_space(ps) emark = position(ps) @@ -1696,8 +1697,8 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) macro_atname_range = (m, position(ps)) emit(ps, mark, K".") elseif k == K"'" - # f.' => f (error-t (. ')) - bump_dotsplit(ps, remap_kind=K"Identifier") + # f.' => (dotcall-post f (error ')) + bump(ps, remap_kind=K"Identifier") # bump ' # TODO: Reclaim dotted postfix operators :-) emit(ps, emark, K"error", error="the .' operator for transpose is discontinued") @@ -2104,7 +2105,7 @@ end function parse_global_local_const_vars(ps) mark = position(ps) n_commas = parse_comma(ps, false) - t = peek_token(ps) + (isdot, t) = peek_dotted_op_token(ps) if is_prec_assignment(t) if n_commas >= 1 # const x,y = 1,2 ==> (const (= (tuple x y) (tuple 1 2))) @@ -2117,7 +2118,7 @@ function parse_global_local_const_vars(ps) else # global x,y ==> (global x y) end - return kind(t) == K"=" && !is_dotted(t) + return kind(t) == K"=" && !isdot end # Parse function and macro definitions @@ -2565,6 +2566,8 @@ function parse_import_path(ps::ParseState) # import ....A ==> (import (importpath . . . . A)) # Dots with spaces are allowed (a misfeature?) # import . .A ==> (import (importpath . . A)) + # Modules with operator symbol names + # import .⋆ ==> (import (importpath . ⋆)) first_dot = true while true t = peek_token(ps) @@ -2584,16 +2587,9 @@ function parse_import_path(ps::ParseState) end first_dot = false end - if is_dotted(peek_token(ps)) - # Modules with operator symbol names - # import .⋆ ==> (import (importpath . ⋆)) - bump_trivia(ps) - bump_split(ps, (1,K".",EMPTY_FLAGS), (-1,peek(ps),EMPTY_FLAGS)) - else - # import @x ==> (import (importpath @x)) - # import $A ==> (import (importpath ($ A))) - parse_atsym(ps, false) - end + # import @x ==> (import (importpath @x)) + # import $A ==> (import (importpath ($ A))) + parse_atsym(ps, false) while true t = peek_token(ps) k = kind(t) @@ -2601,27 +2597,21 @@ function parse_import_path(ps::ParseState) # import A.B ==> (import (importpath A B)) # import $A.@x ==> (import (importpath ($ A) @x)) # import A.B.C ==> (import (importpath A B C)) - bump_disallowed_space(ps) - bump(ps, TRIVIA_FLAG) - parse_atsym(ps) - elseif is_dotted(t) - # Resolve tokenization ambiguity: In imports, dots are part of the - # path, not operators - # import A.== ==> (import (importpath A ==)) # import A.⋆.f ==> (import (importpath A ⋆ f)) - if preceding_whitespace(t) - # Whitespace in import path allowed but discouraged - # import A .== ==> (import (importpath A ==)) - emit_diagnostic(ps, whitespace=true, - warning="space between dots in import path") - end - bump_trivia(ps) - m = position(ps) - bump_split(ps, (1,K".",TRIVIA_FLAG), (-1,k,EMPTY_FLAGS)) - if is_syntactic_operator(k) - # import A.= ==> (import (importpath A (error =))) - emit(ps, m, K"error", error="syntactic operators not allowed in import") + next_tok = peek_token(ps, 2) + if is_operator(kind(next_tok)) + if preceding_whitespace(t) + # Whitespace in import path allowed but discouraged + # import A .== ==> (import (importpath A ==)) + emit_diagnostic(ps, whitespace=true, + warning="space between dots in import path") + end + bump_trivia(ps) + else + bump_disallowed_space(ps) end + bump(ps, TRIVIA_FLAG) + parse_atsym(ps) elseif k == K"..." # Import the .. operator # import A... ==> (import (importpath A ..)) @@ -3077,7 +3067,8 @@ function parse_paren(ps::ParseState, check_identifiers=true, has_unary_prefix=fa @check peek(ps) == K"(" bump(ps, TRIVIA_FLAG) # K"(" after_paren_mark = position(ps) - k = peek(ps) + (isdot, tok) = peek_dotted_op_token(ps) + k = kind(tok) if k == K")" # () ==> (tuple-p) bump(ps, TRIVIA_FLAG) @@ -3480,9 +3471,17 @@ end function parse_atom(ps::ParseState, check_identifiers=true, has_unary_prefix=false) bump_trivia(ps) mark = position(ps) - leading_kind = peek(ps) + (leading_dot, leading_tok) = peek_dotted_op_token(ps) + leading_kind = kind(leading_tok) # todo: Reorder to put most likely tokens first? - if is_error(leading_kind) + if leading_dot + is_operator(leading_kind) && @goto is_operator + bump(ps, remap_kind=K"Identifier") + if check_identifiers + # . ==> (error .) + emit(ps, mark, K"error", error="invalid identifier") + end + elseif is_error(leading_kind) # Errors for bad tokens are emitted in validate_tokens() rather than # here. bump(ps) @@ -3555,7 +3554,7 @@ function parse_atom(ps::ParseState, check_identifiers=true, has_unary_prefix=fal # a[:(end)] ==> (ref a (quote-: (error-t end))) parse_atom(ParseState(ps, end_symbol=false), false) emit(ps, mark, K"quote", COLON_QUOTE) - elseif check_identifiers && leading_kind == K"=" && is_plain_equals(peek_token(ps)) + elseif check_identifiers && leading_kind == K"=" && is_plain_equals(peek_token(ps)) && !leading_dot # = ==> (error =) bump(ps, error="unexpected `=`") elseif leading_kind == K"Identifier" @@ -3566,10 +3565,10 @@ function parse_atom(ps::ParseState, check_identifiers=true, has_unary_prefix=fal # where=1 ==> (= where 1) bump(ps, remap_kind=K"Identifier") elseif is_operator(leading_kind) +@label is_operator # + ==> + # .+ ==> (. +) - # .= ==> (. =) - bump_dotsplit(ps, emit_dot_node=true, remap_kind= + bump_dotted(ps, leading_dot, emit_dot_node=true, remap_kind= is_syntactic_operator(leading_kind) ? leading_kind : K"Identifier") if check_identifiers && !is_valid_identifier(leading_kind) # += ==> (error (op= +)) diff --git a/JuliaSyntax/src/julia/tokenize.jl b/JuliaSyntax/src/julia/tokenize.jl index 761455dd84adc..24c96870e88cd 100644 --- a/JuliaSyntax/src/julia/tokenize.jl +++ b/JuliaSyntax/src/julia/tokenize.jl @@ -197,13 +197,12 @@ struct RawToken # Offsets into a string or buffer startbyte::Int # The byte where the token start in the buffer endbyte::Int # The byte where the token ended in the buffer - dotop::Bool suffix::Bool end function RawToken(kind::Kind, startbyte::Int, endbyte::Int) - RawToken(kind, startbyte, endbyte, false, false) + RawToken(kind, startbyte, endbyte, false) end -RawToken() = RawToken(K"error", 0, 0, false, false) +RawToken() = RawToken(K"error", 0, 0, false) const EMPTY_TOKEN = RawToken() @@ -254,7 +253,6 @@ mutable struct Lexer{IO_t <: IO} string_states::Vector{StringState} chars::Tuple{Char,Char,Char,Char} charspos::Tuple{Int,Int,Int,Int} - dotop::Bool end function Lexer(io::IO) @@ -283,7 +281,7 @@ function Lexer(io::IO) end Lexer(io, position(io), K"error", Vector{StringState}(), - (c1,c2,c3,c4), (p1,p2,p3,p4), false) + (c1,c2,c3,c4), (p1,p2,p3,p4)) end Lexer(str::AbstractString) = Lexer(IOBuffer(str)) @@ -438,9 +436,8 @@ function emit(l::Lexer, kind::Kind, maybe_op=true) end end - tok = RawToken(kind, startpos(l), position(l) - 1, l.dotop, suffix) + tok = RawToken(kind, startpos(l), position(l) - 1, suffix) - l.dotop = false l.last_token = kind return tok end @@ -924,7 +921,7 @@ function lex_minus(l::Lexer) else return emit(l, K"ErrorInvalidOperator") # "--" is an invalid operator end - elseif !l.dotop && accept(l, '>') + elseif l.last_token != K"." && accept(l, '>') return emit(l, K"->") elseif accept(l, '=') return emit(l, K"op=") @@ -1184,87 +1181,16 @@ function lex_dot(l::Lexer) return lex_digit(l, K"Float") else pc, dpc = dpeekchar(l) - if pc == '+' - l.dotop = true - readchar(l) - return lex_plus(l) - elseif pc =='-' - l.dotop = true - readchar(l) - return lex_minus(l) - elseif pc == '−' - l.dotop = true - readchar(l) - return emit(l, accept(l, '=') ? K"op=" : K"-") - elseif pc =='*' - l.dotop = true - readchar(l) - return lex_star(l) - elseif pc =='/' - l.dotop = true - readchar(l) - return lex_forwardslash(l) - elseif pc =='\\' - l.dotop = true - readchar(l) - return lex_backslash(l) - elseif pc =='^' - l.dotop = true - readchar(l) - return lex_circumflex(l) - elseif pc =='<' - l.dotop = true - readchar(l) - return lex_less(l) - elseif pc =='>' - l.dotop = true - readchar(l) - return lex_greater(l) - elseif pc =='&' - l.dotop = true - readchar(l) - if accept(l, '=') - return emit(l, K"op=") - else - if accept(l, '&') - return emit(l, K"&&") - end - return emit(l, K"&") - end - elseif pc =='%' - l.dotop = true - readchar(l) - return lex_percent(l) - elseif pc == '=' && dpc != '>' - l.dotop = true - readchar(l) - return lex_equal(l) - elseif pc == '|' - l.dotop = true - readchar(l) - if accept(l, '|') - return emit(l, K"||") - end - return lex_bar(l) - elseif pc == '!' && dpc == '=' - l.dotop = true - readchar(l) - return lex_exclaim(l) - elseif pc == '⊻' - l.dotop = true - readchar(l) - return lex_xor(l) + # When we see a dot followed by an operator, we want to emit just the dot + # and let the next token be the operator + if is_operator_start_char(pc) || (pc == '!' && dpc == '=') + return emit(l, K".") elseif pc == '÷' - l.dotop = true - readchar(l) - return lex_division(l) + return emit(l, K".") elseif pc == '=' && dpc == '>' - l.dotop = true - readchar(l) - return lex_equal(l) + return emit(l, K".") elseif is_dottable_operator_start_char(pc) - l.dotop = true - return _next_token(l, readchar(l)) + return emit(l, K".") end return emit(l, K".") end diff --git a/JuliaSyntax/test/diagnostics.jl b/JuliaSyntax/test/diagnostics.jl index 8371ede20b4d1..07e66abf27022 100644 --- a/JuliaSyntax/test/diagnostics.jl +++ b/JuliaSyntax/test/diagnostics.jl @@ -78,7 +78,7 @@ end @test diagnostic("f(x, y #=hi=#\ng(z)") == Diagnostic(7, 6, :error, "Expected `)` or `,`") @test diagnostic("(x, y \nz") == Diagnostic(6, 5, :error, "Expected `)` or `,`") @test diagnostic("function f(x, y \nz end") == Diagnostic(16, 15, :error, "Expected `)` or `,`") - + @test diagnostic("sin. (1)") == Diagnostic(5, 5, :error, "whitespace is not allowed here") @test diagnostic("x [i]") == diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index f0ff0f51cd438..3e4b801203200 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -9,17 +9,17 @@ function parse_to_sexpr_str(production, code::AbstractString; v=v"1.6", show_kws return sprint(io->show(io, MIME("text/x.sexpression"), s; show_kws...)) end -function test_parse(production, input, output) +function test_parse(production, input, expected) if !(input isa AbstractString) opts, input = input else opts = NamedTuple() end parsed = parse_to_sexpr_str(production, input; opts...) - if output isa Regex # Could be AbstractPattern, but that type was added in Julia 1.6. - @test match(output, parsed) !== nothing + if expected isa Regex # Could be AbstractPattern, but that type was added in Julia 1.6. + @test match(expected, parsed) !== nothing else - @test parsed == output + @test parsed == expected end end @@ -119,6 +119,7 @@ tests = [ "x < y" => "(call-i x < y)" "x .< y" => "(dotcall-i x < y)" "x .<: y" => "(dotcall-i x <: y)" + ":. == :." => "(call-i (quote-: .) == (quote-: .))" # Comparison chains "x < y < z" => "(comparison x < y < z)" "x == y < z" => "(comparison x == y < z)" @@ -235,6 +236,7 @@ tests = [ "+(a,)" => "(call-, + a)" ".+(a,)" => "(call-, (. +) a)" "(.+)(a)" => "(call (parens (. +)) a)" + "(.~(a))" => "(parens (dotcall-pre ~ (parens a)))" "+(a=1,)" => "(call-, + (= a 1))" "+(a...)" => "(call + (... a))" "+(a;b,c)" => "(call + a (parameters b c))" @@ -442,6 +444,8 @@ tests = [ # Field/property syntax "f.x.y" => "(. (. f x) y)" "x .y" => "(. x (error-t) y)" + "x.?" => "(. x ?)" + "x.in" => "(. x in)" # Adjoint "f'" => "(call-post f ')" "f'ᵀ" => "(call-post f 'ᵀ)" diff --git a/JuliaSyntax/test/tokenize.jl b/JuliaSyntax/test/tokenize.jl index 478bed577cf7d..5089152065c71 100644 --- a/JuliaSyntax/test/tokenize.jl +++ b/JuliaSyntax/test/tokenize.jl @@ -718,20 +718,20 @@ end @test toks("0x01..") == ["0x01"=>K"HexInt", ".."=>K".."] # Dotted operators and other dotted suffixes - @test toks("1234 .+1") == ["1234"=>K"Integer", " "=>K"Whitespace", ".+"=>K"+", "1"=>K"Integer"] + @test toks("1234 .+1") == ["1234"=>K"Integer", " "=>K"Whitespace", "."=>K".", "+"=>K"+", "1"=>K"Integer"] @test toks("1234.0+1") == ["1234.0"=>K"Float", "+"=>K"+", "1"=>K"Integer"] - @test toks("1234.0 .+1") == ["1234.0"=>K"Float", " "=>K"Whitespace", ".+"=>K"+", "1"=>K"Integer"] + @test toks("1234.0 .+1") == ["1234.0"=>K"Float", " "=>K"Whitespace", "."=>K".", "+"=>K"+", "1"=>K"Integer"] @test toks("1234 .f(a)") == ["1234"=>K"Integer", " "=>K"Whitespace", "."=>K".", "f"=>K"Identifier", "("=>K"(", "a"=>K"Identifier", ")"=>K")"] @test toks("1234.0 .f(a)") == ["1234.0"=>K"Float", " "=>K"Whitespace", "."=>K".", "f"=>K"Identifier", "("=>K"(", "a"=>K"Identifier", ")"=>K")"] - @test toks("1f0./1") == ["1f0"=>K"Float32", "./"=>K"/", "1"=>K"Integer"] + @test toks("1f0./1") == ["1f0"=>K"Float32", "."=>K".", "/"=>K"/", "1"=>K"Integer"] # Dotted operators after numeric constants are ok - @test toks("1e1.⫪") == ["1e1"=>K"Float", ".⫪"=>K"⫪"] - @test toks("1.1.⫪") == ["1.1"=>K"Float", ".⫪"=>K"⫪"] - @test toks("1e1.−") == ["1e1"=>K"Float", ".−"=>K"-"] - @test toks("1.1.−") == ["1.1"=>K"Float", ".−"=>K"-"] + @test toks("1e1.⫪") == ["1e1"=>K"Float", "."=>K".", "⫪"=>K"⫪"] + @test toks("1.1.⫪") == ["1.1"=>K"Float", "."=>K".", "⫪"=>K"⫪"] + @test toks("1e1.−") == ["1e1"=>K"Float", "."=>K".", "−"=>K"-"] + @test toks("1.1.−") == ["1.1"=>K"Float", "."=>K".", "−"=>K"-"] # Non-dottable operators are not ok @test toks("1e1.\$") == ["1e1."=>K"ErrorInvalidNumericConstant", "\$"=>K"$"] @test toks("1.1.\$") == ["1.1."=>K"ErrorInvalidNumericConstant", "\$"=>K"$"] @@ -821,10 +821,28 @@ for opkind in Tokenize._nondot_symbolic_operator_kinds() tokens = collect(tokenize(str)) exop = expr.head == :call ? expr.args[1] : expr.head #println(str) - if Symbol(Tokenize.untokenize(tokens[arity == 1 ? 1 : 3], str)) != exop - @info "" arity str exop + # For dotted operators, we need to reconstruct the operator from separate tokens + # Note: .. and ... are not dotted operators, they're regular operators + exop_str = string(exop) + is_dotted = occursin(".", exop_str) && exop != :.. && exop != :... + if is_dotted + # Dotted operators are now two tokens: . and the operator + dot_pos = arity == 1 ? 1 : 3 + op_pos = arity == 1 ? 2 : 4 + reconstructed_op = Symbol(Tokenize.untokenize(tokens[dot_pos], str) * + Tokenize.untokenize(tokens[op_pos], str)) + if reconstructed_op != exop + @info "" arity str exop reconstructed_op + end + @test reconstructed_op == exop + else + # Regular operators and suffixed operators + op_pos = arity == 1 ? 1 : 3 + if Symbol(Tokenize.untokenize(tokens[op_pos], str)) != exop + @info "" arity str exop op_pos + end + @test Symbol(Tokenize.untokenize(tokens[op_pos], str)) == exop end - @test Symbol(Tokenize.untokenize(tokens[arity == 1 ? 1 : 3], str)) == exop else break end @@ -837,13 +855,13 @@ end # https://github.com/JuliaLang/julia/pull/25157 @test tok("\u00b7").kind == K"⋅" @test tok("\u0387").kind == K"⋅" - @test tok(".\u00b7").dotop - @test tok(".\u0387").dotop + @test toks(".\u00b7") == ["."=>K".", "\u00b7"=>K"⋅"] + @test toks(".\u0387") == ["."=>K".", "\u0387"=>K"⋅"] # https://github.com/JuliaLang/julia/pull/40948 @test tok("−").kind == K"-" @test tok("−=").kind == K"op=" - @test tok(".−").dotop + @test toks(".−") == ["."=>K".", "−"=>K"-"] end @testset "perp" begin @@ -1158,15 +1176,15 @@ end end @testset "dotop miscellanea" begin - @test strtok("a .-> b") == ["a", " ", ".-", ">", " ", "b", ""] - @test strtok(".>: b") == [".>:", " ", "b", ""] - @test strtok(".<: b") == [".<:", " ", "b", ""] + @test strtok("a .-> b") == ["a", " ", ".", "-", ">", " ", "b", ""] + @test strtok(".>: b") == [".", ">:", " ", "b", ""] + @test strtok(".<: b") == [".", "<:", " ", "b", ""] @test strtok("a ||₁ b") == ["a", " ", "||", "₁", " ", "b", ""] @test strtok("a ||̄ b") == ["a", " ", "||", "̄", " ", "b", ""] - @test strtok("a .||₁ b") == ["a", " ", ".||", "₁", " ", "b", ""] + @test strtok("a .||₁ b") == ["a", " ", ".", "||", "₁", " ", "b", ""] @test strtok("a &&₁ b") == ["a", " ", "&&", "₁", " ", "b", ""] @test strtok("a &&̄ b") == ["a", " ", "&&", "̄", " ", "b", ""] - @test strtok("a .&&₁ b") == ["a", " ", ".&&", "₁", " ", "b", ""] + @test strtok("a .&&₁ b") == ["a", " ", ".", "&&", "₁", " ", "b", ""] end end From 9376dd1ecfbab635a0d34135b77e56c6f46d3da2 Mon Sep 17 00:00:00 2001 From: Em Chu <61633163+mlechu@users.noreply.github.com> Date: Wed, 2 Jul 2025 08:17:39 -0700 Subject: [PATCH 1014/1109] Tweaks to expr-conversion for JuliaLowering (JuliaLang/JuliaSyntax.jl#569) * Tweaks to expr-conversion for JuliaLowering Two small and related changes: 1. The node->expr conversion shouldn't peek at provenance from SyntaxNode or SyntaxTree, so fix the couple of places we do this. We've always had provenance available, but we won't once there are nodes constructed from exprs for compatibility. 2. JuliaLowering currently relies on putting its own data structures through the JuliaSyntax node->expr machine. Remove a couple of type annotations and reinstate `_expr_leaf_val` so that this is possible. * Remove source's type annotation too A JuliaLowering.SyntaxTree may have a line number node instead of a SourceFile as its location. Pushing the linenode through `node_to_expr` is strange and temporary, according to comments. --- JuliaSyntax/src/integration/expr.jl | 30 ++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/JuliaSyntax/src/integration/expr.jl b/JuliaSyntax/src/integration/expr.jl index ff9b955c8fe26..eb1cefacd014b 100644 --- a/JuliaSyntax/src/integration/expr.jl +++ b/JuliaSyntax/src/integration/expr.jl @@ -65,7 +65,7 @@ end reverse_nontrivia_children(cursor::RedTreeCursor) = Iterators.filter(should_include_node, Iterators.reverse(cursor)) -reverse_nontrivia_children(cursor::SyntaxNode) = Iterators.filter(should_include_node, Iterators.reverse(children(cursor))) +reverse_nontrivia_children(cursor) = Iterators.filter(should_include_node, Iterators.reverse(children(cursor))) # Julia string literals in a `K"string"` node may be split into several chunks # interspersed with trivia in two situations: @@ -74,7 +74,7 @@ reverse_nontrivia_children(cursor::SyntaxNode) = Iterators.filter(should_include # # This function concatenating adjacent string chunks together as done in the # reference parser. -function _string_to_Expr(cursor::Union{RedTreeCursor, SyntaxNode}, source::SourceFile, txtbuf::Vector{UInt8}, txtbuf_offset::UInt32) +function _string_to_Expr(cursor, source, txtbuf::Vector{UInt8}, txtbuf_offset::UInt32) ret = Expr(:string) args2 = Any[] i = 1 @@ -197,7 +197,7 @@ function _append_iterspec!(args::Vector{Any}, @nospecialize(ex)) return args end -function parseargs!(retexpr::Expr, loc::LineNumberNode, cursor::Union{RedTreeCursor, SyntaxNode}, source::SourceFile, txtbuf::Vector{UInt8}, txtbuf_offset::UInt32) +function parseargs!(retexpr::Expr, loc::LineNumberNode, cursor, source, txtbuf::Vector{UInt8}, txtbuf_offset::UInt32) args = retexpr.args firstchildhead = head(cursor) firstchildrange::UnitRange{UInt32} = byte_range(cursor) @@ -215,8 +215,14 @@ function parseargs!(retexpr::Expr, loc::LineNumberNode, cursor::Union{RedTreeCur return (firstchildhead, firstchildrange) end -# Convert internal node of the JuliaSyntax parse tree to an Expr -function node_to_expr(cursor::Union{RedTreeCursor, SyntaxNode}, source::SourceFile, txtbuf::Vector{UInt8}, txtbuf_offset::UInt32=UInt32(0)) +_expr_leaf_val(node::SyntaxNode, _...) = node.val +_expr_leaf_val(cursor::RedTreeCursor, txtbuf::Vector{UInt8}, txtbuf_offset::UInt32) = + parse_julia_literal(txtbuf, head(cursor), byte_range(cursor) .+ txtbuf_offset) +# Extended in JuliaLowering to support `node_to_expr(::SyntaxTree, ...)` + +# Convert `cursor` (SyntaxNode or RedTreeCursor) to an Expr +# `source` is a SourceFile, or if node was an Expr originally, a LineNumberNode +function node_to_expr(cursor, source, txtbuf::Vector{UInt8}, txtbuf_offset::UInt32=UInt32(0)) if !should_include_node(cursor) return nothing end @@ -225,14 +231,12 @@ function node_to_expr(cursor::Union{RedTreeCursor, SyntaxNode}, source::SourceFi k = kind(cursor) srcrange::UnitRange{UInt32} = byte_range(cursor) if is_leaf(cursor) - if k == K"MacroName" && view(source, srcrange) == "." - return Symbol("@__dot__") - elseif is_error(k) + if is_error(k) return k == K"error" ? Expr(:error) : Expr(:error, "$(_token_error_descriptions[k]): `$(source[srcrange])`") else - val = parse_julia_literal(txtbuf, head(cursor), srcrange .+ txtbuf_offset) + val = _expr_leaf_val(cursor, txtbuf, txtbuf_offset) if val isa Union{Int128,UInt128,BigInt} # Ignore the values of large integers and convert them back to # symbolic/textural form for compatibility with the Expr @@ -242,6 +246,8 @@ function node_to_expr(cursor::Union{RedTreeCursor, SyntaxNode}, source::SourceFi val isa UInt128 ? Symbol("@uint128_str") : Symbol("@big_str") return Expr(:macrocall, GlobalRef(Core, macname), nothing, str) + elseif k == K"MacroName" && val === Symbol("@.") + return Symbol("@__dot__") else return val end @@ -297,7 +303,7 @@ end firstchildhead::SyntaxHead, firstchildrange::UnitRange{UInt32}, nodehead::SyntaxHead, - source::SourceFile) + source) args = retexpr.args k = kind(nodehead) endloc = source_location(LineNumberNode, source, last(srcrange)) @@ -635,9 +641,11 @@ function build_tree(::Type{Expr}, stream::ParseStream, source::SourceFile) return entry end -function Base.Expr(node::SyntaxNode) +function to_expr(node) source = sourcefile(node) txtbuf_offset, txtbuf = _unsafe_wrap_substring(sourcetext(source)) wrapper_head = SyntaxHead(K"wrapper",EMPTY_FLAGS) return fixup_Expr_child(wrapper_head, node_to_expr(node, source, txtbuf, UInt32(txtbuf_offset)), false) end + +Base.Expr(node::SyntaxNode) = to_expr(node) From c769a97e5d023948014286c6b7d7b0f42c7ec45d Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Wed, 2 Jul 2025 23:35:51 -0400 Subject: [PATCH 1015/1109] Refactor node-specific flags to overlap with numeric flags (JuliaLang/JuliaSyntax.jl#570) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change moves node-specific flags (TRIPLE_STRING_FLAG, PARENS_FLAG, etc.) from bits 5-6 to overlap with numeric flags in bits 8-15. This is safe because: 1. Node types that use specific flags never use numeric flags 2. Numeric flags are only used by ncat/nrow nodes, which don't use node-specific flags 3. The parser now passes dimensions separately to avoid flag conflicts Key changes: - Moved most node-specific flags to bit 8, some to bit 9 - Refactored parse_cat/parse_array to return dimension as a separate value - Updated emit_braces to accept dimension parameter - Made untokenize function head-aware for proper flag display - Simplified flag handling by removing conditionals (just OR flags together) - Added tests for dimension 4 ncat to ensure higher dimensions work correctly 🤖 Generated with [Claude Code](https://claude.ai/code) Co-authored-by: Keno Fischer Co-authored-by: Claude --- JuliaSyntax/src/julia/julia_parse_stream.jl | 67 +++++++++++---------- JuliaSyntax/src/julia/parser.jl | 44 ++++++++------ JuliaSyntax/test/parser.jl | 7 +++ 3 files changed, 67 insertions(+), 51 deletions(-) diff --git a/JuliaSyntax/src/julia/julia_parse_stream.jl b/JuliaSyntax/src/julia/julia_parse_stream.jl index fc3eac28f3783..87ad038699a77 100644 --- a/JuliaSyntax/src/julia/julia_parse_stream.jl +++ b/JuliaSyntax/src/julia/julia_parse_stream.jl @@ -9,54 +9,54 @@ const INFIX_FLAG = RawFlags(1<<3) const PREFIX_OP_FLAG = RawFlags(2<<3) const POSTFIX_OP_FLAG = RawFlags(3<<3) -# The following flags are quite head-specific and may overlap +# The following flags are quite head-specific and may overlap with numeric flags """ Set when K"string" or K"cmdstring" was triple-delimited as with \"\"\" or ``` """ -const TRIPLE_STRING_FLAG = RawFlags(1<<5) +const TRIPLE_STRING_FLAG = RawFlags(1<<8) """ Set when a K"string", K"cmdstring" or K"Identifier" needs raw string unescaping """ -const RAW_STRING_FLAG = RawFlags(1<<6) +const RAW_STRING_FLAG = RawFlags(1<<9) """ Set for K"tuple", K"block" or K"macrocall" which are delimited by parentheses """ -const PARENS_FLAG = RawFlags(1<<5) +const PARENS_FLAG = RawFlags(1<<8) """ Set for various delimited constructs when they contains a trailing comma. For example, to distinguish `(a,b,)` vs `(a,b)`, and `f(a)` vs `f(a,)`. Kinds where this applies are: `tuple call dotcall macrocall vect curly braces <: >:`. """ -const TRAILING_COMMA_FLAG = RawFlags(1<<6) +const TRAILING_COMMA_FLAG = RawFlags(1<<9) """ Set for K"quote" for the short form `:x` as opposed to long form `quote x end` """ -const COLON_QUOTE = RawFlags(1<<5) +const COLON_QUOTE = RawFlags(1<<8) """ Set for K"toplevel" which is delimited by parentheses """ -const TOPLEVEL_SEMICOLONS_FLAG = RawFlags(1<<5) +const TOPLEVEL_SEMICOLONS_FLAG = RawFlags(1<<8) """ Set for K"function" in short form definitions such as `f() = 1` """ -const SHORT_FORM_FUNCTION_FLAG = RawFlags(1<<5) +const SHORT_FORM_FUNCTION_FLAG = RawFlags(1<<8) """ Set for K"struct" when mutable """ -const MUTABLE_FLAG = RawFlags(1<<5) +const MUTABLE_FLAG = RawFlags(1<<8) """ Set for K"module" when it's not bare (`module`, not `baremodule`) """ -const BARE_MODULE_FLAG = RawFlags(1<<5) +const BARE_MODULE_FLAG = RawFlags(1<<8) # Flags holding the dimension of an nrow or other UInt8 not held in the source # TODO: Given this is only used for nrow/ncat, we could actually use all the flags? @@ -137,29 +137,34 @@ function untokenize(head::SyntaxHead; unique=true, include_flag_suff=true) is_postfix_op_call(head) && (str = str*"-post") k = kind(head) - if k in KSet"string cmdstring Identifier" - has_flags(head, TRIPLE_STRING_FLAG) && (str = str*"-s") - has_flags(head, RAW_STRING_FLAG) && (str = str*"-r") - elseif k in KSet"tuple block macrocall" - has_flags(head, PARENS_FLAG) && (str = str*"-p") - elseif k == K"quote" - has_flags(head, COLON_QUOTE) && (str = str*"-:") - elseif k == K"toplevel" - has_flags(head, TOPLEVEL_SEMICOLONS_FLAG) && (str = str*"-;") - elseif k == K"function" - has_flags(head, SHORT_FORM_FUNCTION_FLAG) && (str = str*"-=") - elseif k == K"struct" - has_flags(head, MUTABLE_FLAG) && (str = str*"-mut") - elseif k == K"module" - has_flags(head, BARE_MODULE_FLAG) && (str = str*"-bare") - end - if k in KSet"tuple call dotcall macrocall vect curly braces <: >:" && - has_flags(head, TRAILING_COMMA_FLAG) - str *= "-," + # Handle numeric flags for nrow/ncat nodes + if k in KSet"nrow ncat typed_ncat" + n = numeric_flags(head) + n != 0 && (str = str*"-"*string(n)) + else + # Handle head-specific flags that overlap with numeric flags + if k in KSet"string cmdstring Identifier" + has_flags(head, TRIPLE_STRING_FLAG) && (str = str*"-s") + has_flags(head, RAW_STRING_FLAG) && (str = str*"-r") + elseif k in KSet"tuple block macrocall" + has_flags(head, PARENS_FLAG) && (str = str*"-p") + elseif k == K"quote" + has_flags(head, COLON_QUOTE) && (str = str*"-:") + elseif k == K"toplevel" + has_flags(head, TOPLEVEL_SEMICOLONS_FLAG) && (str = str*"-;") + elseif k == K"function" + has_flags(head, SHORT_FORM_FUNCTION_FLAG) && (str = str*"-=") + elseif k == K"struct" + has_flags(head, MUTABLE_FLAG) && (str = str*"-mut") + elseif k == K"module" + has_flags(head, BARE_MODULE_FLAG) && (str = str*"-bare") + end + if k in KSet"tuple call dotcall macrocall vect curly braces <: >:" && + has_flags(head, TRAILING_COMMA_FLAG) + str *= "-," + end end is_suffixed(head) && (str = str*"-suf") - n = numeric_flags(head) - n != 0 && (str = str*"-"*string(n)) end str end diff --git a/JuliaSyntax/src/julia/parser.jl b/JuliaSyntax/src/julia/parser.jl index a1a0a96b09077..49ba902cba0d1 100644 --- a/JuliaSyntax/src/julia/parser.jl +++ b/JuliaSyntax/src/julia/parser.jl @@ -1098,8 +1098,8 @@ function parse_where_chain(ps0::ParseState, mark) # x where {y for y in ys} ==> (where x (braces (generator y (iteration (in y ys))))) m = position(ps) bump(ps, TRIVIA_FLAG) - ckind, cflags = parse_cat(ps, K"}", ps.end_symbol) - emit_braces(ps, m, ckind, cflags) + ckind, cflags, dim = parse_cat(ps, K"}", ps.end_symbol) + emit_braces(ps, m, ckind, cflags, dim) emit(ps, mark, K"where") else # x where T ==> (where x T) @@ -1589,7 +1589,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) # a [i] ==> (ref a (error-t) i) bump_disallowed_space(ps) bump(ps, TRIVIA_FLAG) - ckind, cflags = parse_cat(ParseState(ps, end_symbol=true), + ckind, cflags, dim = parse_cat(ParseState(ps, end_symbol=true), K"]", ps.end_symbol) if is_macrocall # @S[a,b] ==> (macrocall @S (vect a b)) @@ -1600,7 +1600,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) #v1.7: @S[a ;; b] ==> (macrocall @S (ncat-2 a b)) #v1.6: @S[a ;; b] ==> (macrocall @S (error (ncat-2 a b))) fix_macro_name_kind!(ps, macro_name_position) - emit(ps, m, ckind, cflags) + emit(ps, m, ckind, cflags | set_numeric_flags(dim)) check_ncat_compat(ps, m, ckind) emit(ps, mark, K"macrocall") is_macrocall = false @@ -1621,7 +1621,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) ckind == K"comprehension" ? K"typed_comprehension" : ckind == K"ncat" ? K"typed_ncat" : internal_error("unrecognized kind in parse_cat ", string(ckind)) - emit(ps, mark, outk, cflags) + emit(ps, mark, outk, cflags | set_numeric_flags(dim)) check_ncat_compat(ps, mark, ckind) end elseif k == K"." @@ -2840,7 +2840,7 @@ function parse_array(ps::ParseState, mark, closer, end_is_symbol) if binding_power == typemin(Int) # [x@y ==> (hcat x (error-t ✘ y)) bump_closing_token(ps, closer) - return (K"hcat", EMPTY_FLAGS) + return (K"hcat", 0) end while true (next_dim, next_bp) = parse_array_inner(ps, binding_power, array_order) @@ -2856,9 +2856,9 @@ function parse_array(ps::ParseState, mark, closer, end_is_symbol) binding_power = next_bp end bump_closing_token(ps, closer) - return binding_power == -1 ? (K"vcat", EMPTY_FLAGS) : - binding_power == 0 ? (K"hcat", EMPTY_FLAGS) : - (K"ncat", set_numeric_flags(dim)) + return binding_power == -1 ? (K"vcat", 0) : + binding_power == 0 ? (K"hcat", 0) : + (K"ncat", dim) end # Parse equal and ascending precedence chains of array concatenation operators - @@ -3012,7 +3012,8 @@ function parse_cat(ps::ParseState, closer, end_is_symbol) mark = position(ps) if k == closer # [] ==> (vect) - return parse_vect(ps, closer, false) + ckind, cflags = parse_vect(ps, closer, false) + return (ckind, cflags, 0) elseif k == K";" #v1.8: [;] ==> (ncat-1) #v1.8: [;;] ==> (ncat-2) @@ -3022,7 +3023,7 @@ function parse_cat(ps::ParseState, closer, end_is_symbol) dim, _ = parse_array_separator(ps, Ref(:unknown)) min_supported_version(v"1.8", ps, mark, "empty multidimensional array syntax") bump_closing_token(ps, closer) - return (K"ncat", set_numeric_flags(dim)) + return (K"ncat", EMPTY_FLAGS, dim) end parse_eq_star(ps) k = peek(ps, skip_newlines=true) @@ -3035,15 +3036,18 @@ function parse_cat(ps::ParseState, closer, end_is_symbol) # [x] ==> (vect x) # [x \n ] ==> (vect x) # [x ==> (vect x (error-t)) - parse_vect(ps, closer, prefix_trailing_comma) + ckind, cflags = parse_vect(ps, closer, prefix_trailing_comma) + return (ckind, cflags, 0) elseif k == K"for" # [x for a in as] ==> (comprehension (generator x (iteration (in a as)))) # [x \n\n for a in as] ==> (comprehension (generator x (iteration (in a as)))) - parse_comprehension(ps, mark, closer) + ckind, cflags = parse_comprehension(ps, mark, closer) + return (ckind, cflags, 0) else # [x y] ==> (hcat x y) # and other forms; See parse_array. - parse_array(ps, mark, closer, end_is_symbol) + ckind, dim = parse_array(ps, mark, closer, end_is_symbol) + return (ckind, EMPTY_FLAGS, dim) end end @@ -3448,13 +3452,13 @@ function parse_string(ps::ParseState, raw::Bool) emit(ps, mark, string_kind, str_flags) end -function emit_braces(ps, mark, ckind, cflags) +function emit_braces(ps, mark, ckind, cflags, dim=0) if ckind == K"hcat" # {x y} ==> (bracescat (row x y)) emit(ps, mark, K"row", cflags & ~TRAILING_COMMA_FLAG) elseif ckind == K"ncat" # {x ;;; y} ==> (bracescat (nrow-3 x y)) - emit(ps, mark, K"nrow", cflags & ~TRAILING_COMMA_FLAG) + emit(ps, mark, K"nrow", set_numeric_flags(dim)) end check_ncat_compat(ps, mark, ckind) outk = ckind in KSet"vect comprehension" ? K"braces" : K"bracescat" @@ -3638,13 +3642,13 @@ function parse_atom(ps::ParseState, check_identifiers=true, has_unary_prefix=fal parse_paren(ps, check_identifiers, has_unary_prefix) elseif leading_kind == K"[" # cat expression bump(ps, TRIVIA_FLAG) - ckind, cflags = parse_cat(ps, K"]", ps.end_symbol) - emit(ps, mark, ckind, cflags) + ckind, cflags, dim = parse_cat(ps, K"]", ps.end_symbol) + emit(ps, mark, ckind, cflags | set_numeric_flags(dim)) check_ncat_compat(ps, mark, ckind) elseif leading_kind == K"{" # cat expression bump(ps, TRIVIA_FLAG) - ckind, cflags = parse_cat(ps, K"}", ps.end_symbol) - emit_braces(ps, mark, ckind, cflags) + ckind, cflags, dim = parse_cat(ps, K"}", ps.end_symbol) + emit_braces(ps, mark, ckind, cflags, dim) elseif leading_kind == K"@" # macro call # Macro names can be keywords # @end x ==> (macrocall @end x) diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 3e4b801203200..f84009287bf0f 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -890,6 +890,8 @@ tests = [ "{x,y,}" => "(braces-, x y)" "{x y}" => "(bracescat (row x y))" ((v=v"1.7",), "{x ;;; y}") => "(bracescat (nrow-3 x y))" + ((v=v"1.7",), "{a ;; b}") => "(bracescat (nrow-2 a b))" + ((v=v"1.7",), "{a ;;;; b}") => "(bracescat (nrow-4 a b))" # Macro names can be keywords "@end x" => "(macrocall @end x)" # __dot__ macro @@ -929,6 +931,11 @@ tests = [ # Column major ((v=v"1.7",), "[x ; y ;; z ; w ;;; a ; b ;; c ; d]") => "(ncat-3 (nrow-2 (nrow-1 x y) (nrow-1 z w)) (nrow-2 (nrow-1 a b) (nrow-1 c d)))" + # Dimension 4 ncat + ((v=v"1.7",), "[x ;;;; y]") => "(ncat-4 x y)" + ((v=v"1.7",), "[a ; b ;;;; c ; d]") => "(ncat-4 (nrow-1 a b) (nrow-1 c d))" + ((v=v"1.7",), "[a b ; c d ;;;; e f ; g h]") => + "(ncat-4 (nrow-1 (row a b) (row c d)) (nrow-1 (row e f) (row g h)))" # Array separators # Newlines before semicolons are not significant "[a \n ;]" => "(vcat a)" From 1eb72c9acb767a6fcebd0bb59901125afdbeb410 Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Thu, 10 Jul 2025 14:07:58 -0400 Subject: [PATCH 1016/1109] Tweak macro name representation (JuliaLang/JuliaSyntax.jl#572) The current representation for macro names is a bit peculiar. When the parser encounters `@a`, it treats `@` as notation for the macrocall and then `reset_node!`'s (which itself may be considered a bit of a code smell) the `a` to a special MacroName token kind that (when translated back to julia Expr) implicitly adds back the `@`. Things get even more peculiar with `@var"a"` where only the token inside the string macro gets reset. One particular consequence of this is https://github.com/JuliaLang/julia/issues/58885, because our translation back to Expr does not check the RAW_STRING_FLAG (whereas the translation for K"Identifier" does). A second issue is that we currently parse `@A.b.c` and `A.b.@c` to the same SyntaxTree (of course the green tree is different). We aren't currently being super precise about the required invariants for syntax trees, but in general it would be desirable for non-trivia notation (like `@`) to be precisely recoverable from the tree, which is not the case here. This is especially annoying because there are syntax cases that are errors for one of these, but not the other (e.g. `@A.b.$` is an error, but `A.B.@$` is allowed). Now, I think the wisdom of some of those syntax choices can be debated, but that is the situation we face. So this PR tries to clean that all up a bit by: - Replacing the terminal K"MacroName" by a non-terminal K"macro_name". With this form, `@A.c` parses as `(macro_name (. A c))` while `A.@c` parses as `(. A (macro_name c))`. - (In particular the `@` notation is now always associated with the macro_name). - Emitting the dots in `@..` and `@...` as direct identifier tokens rather than having to reset them back. - Adjusting everything else accordingly. Partially written by Claude Code, though it had some trouble with the actual code changes. --- JuliaSyntax/src/integration/expr.jl | 32 ++- JuliaSyntax/src/julia/kinds.jl | 18 +- JuliaSyntax/src/julia/literal_parsing.jl | 6 - JuliaSyntax/src/julia/parser.jl | 253 ++++++++++++----------- JuliaSyntax/src/julia/tokenize.jl | 15 +- JuliaSyntax/test/diagnostics.jl | 2 +- JuliaSyntax/test/expr.jl | 1 + JuliaSyntax/test/green_node.jl | 10 +- JuliaSyntax/test/parser.jl | 187 +++++++++-------- 9 files changed, 271 insertions(+), 253 deletions(-) diff --git a/JuliaSyntax/src/integration/expr.jl b/JuliaSyntax/src/integration/expr.jl index eb1cefacd014b..038bad9a20c90 100644 --- a/JuliaSyntax/src/integration/expr.jl +++ b/JuliaSyntax/src/integration/expr.jl @@ -246,8 +246,6 @@ function node_to_expr(cursor, source, txtbuf::Vector{UInt8}, txtbuf_offset::UInt val isa UInt128 ? Symbol("@uint128_str") : Symbol("@big_str") return Expr(:macrocall, GlobalRef(Core, macname), nothing, str) - elseif k == K"MacroName" && val === Symbol("@.") - return Symbol("@__dot__") else return val end @@ -296,7 +294,31 @@ function node_to_expr(cursor, source, txtbuf::Vector{UInt8}, txtbuf_offset::UInt nodehead, source) end -# Split out from the above for codesize reasons, to avoid specialization on multiple +function adjust_macro_name!(retexpr::Union{Expr, Symbol}, k::Kind) + if !(retexpr isa Symbol) + retexpr::Expr + # can happen for incomplete or errors + (length(retexpr.args) < 2 || retexpr.head != :(.)) && return retexpr + arg2 = retexpr.args[2] + isa(arg2, QuoteNode) || return retexpr + retexpr.args[2] = QuoteNode(adjust_macro_name!(arg2.value, k)) + return retexpr + end + if k == K"macro_name" + if retexpr === Symbol(".") + return Symbol("@__dot__") + else + return Symbol("@$retexpr") + end + elseif k == K"macro_name_cmd" + return Symbol("@$(retexpr)_cmd") + else + @assert k == K"macro_name_str" + return Symbol("@$(retexpr)_str") + end +end + +# Split out from `node_to_expr` for codesize reasons, to avoid specialization on multiple # tree types. @noinline function _node_to_expr(retexpr::Expr, loc::LineNumberNode, srcrange::UnitRange{UInt32}, @@ -312,6 +334,8 @@ end # However, errors can add additional errors tokens which we represent # as e.g. `Expr(:var, ..., Expr(:error))`. return retexpr.args[1] + elseif k in KSet"macro_name macro_name_cmd macro_name_str" + return adjust_macro_name!(retexpr.args[1], k) elseif k == K"?" retexpr.head = :if elseif k == K"op=" && length(args) == 3 @@ -331,7 +355,7 @@ end elseif k == K"macrocall" if length(args) >= 2 a2 = args[2] - if @isexpr(a2, :macrocall) && kind(firstchildhead) == K"CmdMacroName" + if @isexpr(a2, :macrocall) && kind(firstchildhead) == K"macro_name_cmd" # Fix up for custom cmd macros like foo`x` args[2] = a2.args[3] end diff --git a/JuliaSyntax/src/julia/kinds.jl b/JuliaSyntax/src/julia/kinds.jl index 8a20a2b82f42e..19a00eb2bee8e 100644 --- a/JuliaSyntax/src/julia/kinds.jl +++ b/JuliaSyntax/src/julia/kinds.jl @@ -194,15 +194,6 @@ register_kinds!(JuliaSyntax, 0, [ "BEGIN_IDENTIFIERS" "Identifier" "Placeholder" # Used for empty catch variables, and all-underscore identifiers in lowering - # Macro names are modelled as special kinds of identifiers because the full - # macro name may not appear as characters in the source: The `@` may be - # detached from the macro name as in `@A.x` (ugh!!), or have a _str or _cmd - # suffix appended. - "BEGIN_MACRO_NAMES" - "MacroName" - "StringMacroName" - "CmdMacroName" - "END_MACRO_NAMES" "END_IDENTIFIERS" "BEGIN_KEYWORDS" @@ -1048,6 +1039,10 @@ register_kinds!(JuliaSyntax, 0, [ "iteration" "comprehension" "typed_comprehension" + # Macro names + "macro_name" + "macro_name_cmd" + "macro_name_str" # Container for a single statement/atom plus any trivia and errors "wrapper" "END_SYNTAX_KINDS" @@ -1111,10 +1106,6 @@ const _nonunique_kind_names = Set([ K"String" K"Char" K"CmdString" - - K"MacroName" - K"StringMacroName" - K"CmdMacroName" ]) """ @@ -1201,7 +1192,6 @@ is_prec_unicode_ops(x) = K"BEGIN_UNICODE_OPS" <= kind(x) <= K"END_UNICODE_OPS" is_prec_pipe_lt(x) = kind(x) == K"<|" is_prec_pipe_gt(x) = kind(x) == K"|>" is_syntax_kind(x) = K"BEGIN_SYNTAX_KINDS"<= kind(x) <= K"END_SYNTAX_KINDS" -is_macro_name(x) = K"BEGIN_MACRO_NAMES" <= kind(x) <= K"END_MACRO_NAMES" is_syntactic_assignment(x) = K"BEGIN_SYNTACTIC_ASSIGNMENTS" <= kind(x) <= K"END_SYNTACTIC_ASSIGNMENTS" function is_string_delim(x) diff --git a/JuliaSyntax/src/julia/literal_parsing.jl b/JuliaSyntax/src/julia/literal_parsing.jl index f2b99b862210e..0d716e39d4081 100644 --- a/JuliaSyntax/src/julia/literal_parsing.jl +++ b/JuliaSyntax/src/julia/literal_parsing.jl @@ -430,12 +430,6 @@ function parse_julia_literal(txtbuf::Vector{UInt8}, head::SyntaxHead, srcrange) Symbol(normalize_identifier(val_str)) elseif k == K"error" ErrorVal() - elseif k == K"MacroName" - Symbol("@$(normalize_identifier(val_str))") - elseif k == K"StringMacroName" - Symbol("@$(normalize_identifier(val_str))_str") - elseif k == K"CmdMacroName" - Symbol("@$(normalize_identifier(val_str))_cmd") elseif is_syntax_kind(head) nothing elseif is_keyword(k) diff --git a/JuliaSyntax/src/julia/parser.jl b/JuliaSyntax/src/julia/parser.jl index 49ba902cba0d1..2abed160181ab 100644 --- a/JuliaSyntax/src/julia/parser.jl +++ b/JuliaSyntax/src/julia/parser.jl @@ -1488,6 +1488,13 @@ function parse_unary_prefix(ps::ParseState, has_unary_prefix=false) end end +function maybe_parsed_macro_name(ps, processing_macro_name, mark) + if processing_macro_name + emit(ps, mark, K"macro_name") + end + return false +end + # Parses a chain of suffixes at function call precedence, leftmost binding # tightest. This handles # * Bracketed calls like a() b[] c{} @@ -1505,13 +1512,14 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) # 2(x) ==> (* 2 x) return end + processing_macro_name = is_macrocall + saw_misplaced_atsym = false + misplaced_atsym_mark = nothing # source range of the @-prefixed part of a macro macro_atname_range = nothing - # $A.@x ==> (macrocall (. ($ A) @x)) + # $A.@x ==> (macrocall (. ($ A) (macro_name x))) maybe_strmac = true - # We record the last component of chains of dot-separated identifiers so we - # know which identifier was the macro name. - macro_name_position = position(ps) # points to same output span as peek_behind + last_identifier_orig_kind = peek_behind(ps).orig_kind while true maybe_strmac_1 = false t = peek_token(ps) @@ -1523,33 +1531,34 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) break elseif is_macrocall && (preceding_whitespace(t) || !(k in KSet"( [ { ' .")) # Macro calls with space-separated arguments - # @foo a b ==> (macrocall @foo a b) - # @foo (x) ==> (macrocall @foo (parens x)) - # @foo (x,y) ==> (macrocall @foo (tuple-p x y)) - # [@foo x] ==> (vect (macrocall @foo x)) - # [@foo] ==> (vect (macrocall @foo)) - # @var"#" a ==> (macrocall (var @#) a) - # A.@x y ==> (macrocall (. A @x) y) - # A.@var"#" a ==> (macrocall (. A (var @#)) a) - # @+x y ==> (macrocall @+ x y) - # A.@.x ==> (macrocall (. A @.) x) - fix_macro_name_kind!(ps, macro_name_position) + # @foo a b ==> (macrocall (macro_name foo) a b) + # @foo (x) ==> (macrocall (macro_name foo) (parens x)) + # @foo (x,y) ==> (macrocall (macro_name foo) (tuple-p x y)) + # [@foo x] ==> (vect (macrocall (macro_name foo) x)) + # [@foo] ==> (vect (macrocall (macro_name foo))) + # @var"#" a ==> (macrocall (macro_name (var #)) a) + # A.@x y ==> (macrocall (. A (macro_name x)) y) + # A.@var"#" a ==> (macrocall (. A (macro_name (var #))) a) + # @+x y ==> (macrocall (macro_name +) x y) + # A.@.x ==> (macrocall (. A (macro_name .)) x) + processing_macro_name = maybe_parsed_macro_name( + ps, processing_macro_name, mark) let ps = with_space_sensitive(ps) # Space separated macro arguments - # A.@foo a b ==> (macrocall (. A @foo) a b) - # @A.foo a b ==> (macrocall (. A @foo) a b) + # A.@foo a b ==> (macrocall (. A (macro_name foo)) a b) + # @A.foo a b ==> (macrocall (macro_name (. A foo)) a b) n_args = parse_space_separated_exprs(ps) - is_doc_macro = peek_behind(ps, macro_name_position).orig_kind == K"doc" + is_doc_macro = last_identifier_orig_kind == K"doc" if is_doc_macro && n_args == 1 # Parse extended @doc args on next line - # @doc x\ny ==> (macrocall @doc x y) - # A.@doc x\ny ==> (macrocall (. A @doc) doc x y) - # @A.doc x\ny ==> (macrocall (. A @doc) doc x y) - # @doc x y\nz ==> (macrocall @doc x y) + # @doc x\ny ==> (macrocall (macro_name doc) x y) + # A.@doc x\ny ==> (macrocall (. A (macro_name doc)) x y) + # @A.doc x\ny ==> (macrocall (macro_name (. A doc)) x y) + # @doc x y\nz ==> (macrocall (macro_name doc) x y) # # Excluded cases - # @doc x\n\ny ==> (macrocall @doc x) - # @doc x\nend ==> (macrocall @doc x) + # @doc x\n\ny ==> (macrocall (macro_name doc) x) + # @doc x\nend ==> (macrocall (macro_name doc) x) k2 = peek(ps, 2) if peek(ps) == K"NewlineWs" && !is_closing_token(ps, k2) && k2 != K"NewlineWs" @@ -1566,6 +1575,9 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) # f(a; b; c) ==> (call f a (parameters b) (parameters c)) # (a=1)() ==> (call (parens (= a 1))) # f (a) ==> (call f (error-t) a) + processing_macro_name = maybe_parsed_macro_name( + ps, processing_macro_name, mark) + processing_macro_name = false bump_disallowed_space(ps) bump(ps, TRIVIA_FLAG) opts = parse_call_arglist(ps, K")") @@ -1577,14 +1589,16 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) # TODO: Add PARENS_FLAG to all calls which use them? (is_macrocall ? PARENS_FLAG : EMPTY_FLAGS)|opts.delim_flags) if is_macrocall - # @x(a, b) ==> (macrocall-p @x a b) - # A.@x(y) ==> (macrocall-p (. A @x) y) - # A.@x(y).z ==> (. (macrocall-p (. A @x) y) z) - fix_macro_name_kind!(ps, macro_name_position) + # @x(a, b) ==> (macrocall-p (macro_name x) a b) + # A.@x(y) ==> (macrocall-p (. A (macro_name x)) y) + # A.@x(y).z ==> (. (macrocall-p (. A (macro_name x)) y) z) is_macrocall = false + # @f()() ==> (call (macrocall-p (macro_name f))) macro_atname_range = nothing end elseif k == K"[" + processing_macro_name = maybe_parsed_macro_name( + ps, processing_macro_name, mark) m = position(ps) # a [i] ==> (ref a (error-t) i) bump_disallowed_space(ps) @@ -1592,14 +1606,13 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) ckind, cflags, dim = parse_cat(ParseState(ps, end_symbol=true), K"]", ps.end_symbol) if is_macrocall - # @S[a,b] ==> (macrocall @S (vect a b)) - # @S[a b] ==> (macrocall @S (hcat a b)) - # @S[a; b] ==> (macrocall @S (vcat a b)) - # A.@S[a] ==> (macrocall (. A @S) (vect a)) - # @S[a].b ==> (. (macrocall @S (vect a)) b) - #v1.7: @S[a ;; b] ==> (macrocall @S (ncat-2 a b)) - #v1.6: @S[a ;; b] ==> (macrocall @S (error (ncat-2 a b))) - fix_macro_name_kind!(ps, macro_name_position) + # @S[a,b] ==> (macrocall (macro_name S) (vect a b)) + # @S[a b] ==> (macrocall (macro_name S) (hcat a b)) + # @S[a; b] ==> (macrocall (macro_name S) (vcat a b)) + # A.@S[a] ==> (macrocall (. A (macro_name S)) (vect a)) + # @S[a].b ==> (. (macrocall (macro_name S) (vect a)) b) + #v1.7: @S[a ;; b] ==> (macrocall (macro_name S) (ncat-2 a b)) + #v1.6: @S[a ;; b] ==> (macrocall (macro_name S) (error (ncat-2 a b))) emit(ps, m, ckind, cflags | set_numeric_flags(dim)) check_ncat_compat(ps, m, ckind) emit(ps, mark, K"macrocall") @@ -1637,19 +1650,24 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) emark = position(ps) if !isnothing(macro_atname_range) # Allow `@` in macrocall only in first and last position - # A.B.@x ==> (macrocall (. (. A B) @x)) - # @A.B.x ==> (macrocall (. (. A B) @x)) - # A.@B.x ==> (macrocall (. (. A B (error-t)) @x)) + # A.B.@x ==> (macrocall (. (. A B) (macro_name x))) + # @A.B.x ==> (macrocall (macro_name (. (. A B) x))) + # A.@B.x ==> (macrocall (. (. A (error-t) B) (macro_name (error-t) x))) emit_diagnostic(ps, macro_atname_range..., error="`@` must appear on first or last macro name component") - bump(ps, TRIVIA_FLAG, error="Unexpected `.` after macro name") - else - bump(ps, TRIVIA_FLAG) + # Recover by treating the `@` as if it had been on the last identifier + saw_misplaced_atsym = true + reset_node!(ps, macro_atname_range[2], kind=K"TOMBSTONE") + reset_node!(ps, macro_atname_range[1], kind=K"error") end + bump(ps, TRIVIA_FLAG) k = peek(ps) if k == K"(" if is_macrocall - # @M.(x) ==> (macrocall (dotcall @M (error-t) x)) + # Recover by pretending we do have the syntax + processing_macro_name = maybe_parsed_macro_name( + ps, processing_macro_name, mark) + # @M.(x) ==> (macrocall (dotcall (macro_name M) (error-t) x)) bump_invisible(ps, K"error", TRIVIA_FLAG) emit_diagnostic(ps, mark, error="dot call syntax not supported for macros") @@ -1672,29 +1690,34 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) elseif k == K"$" # f.$x ==> (. f ($ x)) # f.$(x+y) ==> (. f ($ (call + x y))) - # A.$B.@x ==> (macrocall (. (. A ($ B)) @x)) - # @A.$x a ==> (macrocall (. A (error x)) a) + # A.$B.@x ==> (macrocall (. (. A ($ B)) (macro_name x))) + # @A.$x a ==> (macrocall (macro_name (. A (error x))) a) m = position(ps) bump(ps, TRIVIA_FLAG) parse_atom(ps) - emit(ps, m, K"$") - macro_name_position = position(ps) + if is_macrocall + emit(ps, m, K"error", error="invalid macro name") + else + emit(ps, m, K"$") + end + last_identifier_orig_kind = K"$" emit(ps, mark, K".") elseif k == K"@" # A macro call after some prefix A has been consumed - # A.@x ==> (macrocall (. A @x)) - # A.@x a ==> (macrocall (. A @x) a) + # A.@x ==> (macrocall (. A (macro_name x))) + # A.@x a ==> (macrocall (. A (macro_name x)) a) m = position(ps) if is_macrocall - # @A.B.@x a ==> (macrocall (. (. A B) (error-t) @x) a) + # @A.B.@x a ==> (macrocall (. (. A B) (error-t) (macro_name x)) a) bump(ps, TRIVIA_FLAG, error="repeated `@` in macro module path") else bump(ps, TRIVIA_FLAG) - is_macrocall = true end parse_macro_name(ps) - macro_name_position = position(ps) + last_identifier_orig_kind = peek_behind(ps).orig_kind + !is_macrocall && emit(ps, m, K"macro_name") macro_atname_range = (m, position(ps)) + is_macrocall = true emit(ps, mark, K".") elseif k == K"'" # f.' => (dotcall-post f (error ')) @@ -1704,10 +1727,27 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) error="the .' operator for transpose is discontinued") emit(ps, mark, K"dotcall", POSTFIX_OP_FLAG) else + if saw_misplaced_atsym + # If we saw a misplaced `@` earlier, this might be the place + # where it should have been. Opportunistically bump the + # zero-width error token here. If that's not right, we'll + # reset it later. + if misplaced_atsym_mark !== nothing + reset_node!(ps, misplaced_atsym_mark[1], kind=K"TOMBSTONE") + reset_node!(ps, misplaced_atsym_mark[2], kind=K"TOMBSTONE") + end + macro_name_mark = position(ps) + bump_invisible(ps, K"error", TRIVIA_FLAG) + aterror_mark = position(ps) + end # Field/property syntax # f.x.y ==> (. (. f x) y) parse_atom(ps, false) - macro_name_position = position(ps) + if saw_misplaced_atsym + emit(ps, macro_name_mark, K"macro_name") + misplaced_atsym_mark = (aterror_mark, position(ps)) + end + last_identifier_orig_kind = peek_behind(ps).orig_kind maybe_strmac_1 = true emit(ps, mark, K".") end @@ -1717,6 +1757,8 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) bump(ps, remap_kind=K"Identifier") emit(ps, mark, K"call", POSTFIX_OP_FLAG) elseif k == K"{" + processing_macro_name = maybe_parsed_macro_name( + ps, processing_macro_name, mark) # Type parameter curlies and macro calls m = position(ps) # S {a} ==> (curly S (error-t) a) @@ -1724,10 +1766,9 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) bump(ps, TRIVIA_FLAG) opts = parse_call_arglist(ps, K"}") if is_macrocall - # @S{a,b} ==> (macrocall S (braces a b)) - # A.@S{a} ==> (macrocall (. A @S) (braces a)) - # @S{a}.b ==> (. (macrocall @S (braces a)) b) - fix_macro_name_kind!(ps, macro_name_position) + # @S{a,b} ==> (macrocall (macro_name S) (braces a b)) + # A.@S{a} ==> (macrocall (. A (macro_name S)) (braces a)) + # @S{a}.b ==> (. (macrocall (macro_name S) (braces a)) b) emit(ps, m, K"braces", opts.delim_flags) emit(ps, mark, K"macrocall") min_supported_version(v"1.6", ps, mark, "macro call without space before `{}`") @@ -1740,32 +1781,32 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) elseif k in KSet" \" \"\"\" ` ``` " && !preceding_whitespace(t) && maybe_strmac && (# Must mirror the logic in lex_quote() for consistency - origk = peek_behind(ps, macro_name_position).orig_kind; + origk = last_identifier_orig_kind; origk == K"Identifier" || is_contextual_keyword(origk) || is_word_operator(origk)) # Custom string and command literals - # x"str" ==> (macrocall @x_str (string-r "str")) - # x`str` ==> (macrocall @x_cmd (cmdstring-r "str")) - # x"" ==> (macrocall @x_str (string-r "")) - # x`` ==> (macrocall @x_cmd (cmdstring-r "")) + # x"str" ==> (macrocall (macro_name_str x) (string-r "str")) + # x`str` ==> (macrocall (macro_name_cmd x) (cmdstring-r "str")) + # x"" ==> (macrocall (macro_name_str x) (string-r "")) + # x`` ==> (macrocall (macro_name_cmd x) (cmdstring-r "")) # Triple quoted processing for custom strings - # r"""\nx""" ==> (macrocall @r_str (string-s-r "x")) - # r"""\n x\n y""" ==> (macrocall @r_str (string-s-r "x\n" "y")) - # r"""\n x\\n y""" ==> (macrocall @r_str (string-s-r "x\\\n" "y")) + # r"""\nx""" ==> (macrocall (macro_name_str r) (string-s-r "x")) + # r"""\n x\n y""" ==> (macrocall (macro_name_str r) (string-s-r "x\n" "y")) + # r"""\n x\\n y""" ==> (macrocall (macro_name_str r) (string-s-r "x\\\n" "y")) # # Use a special token kind for string and cmd macro names so the # names can be expanded later as necessary. - outk = is_string_delim(k) ? K"StringMacroName" : K"CmdMacroName" - fix_macro_name_kind!(ps, macro_name_position, outk) + outk = is_string_delim(k) ? K"macro_name_str" : K"macro_name_cmd" + emit(ps, mark, outk) parse_string(ps, true) t = peek_token(ps) k = kind(t) if !preceding_whitespace(t) && is_string_macro_suffix(k) # Macro suffixes can include keywords and numbers - # x"s"y ==> (macrocall @x_str (string-r "s") "y") - # x"s"end ==> (macrocall @x_str (string-r "s") "end") - # x"s"in ==> (macrocall @x_str (string-r "s") "in") - # x"s"2 ==> (macrocall @x_str (string-r "s") 2) - # x"s"10.0 ==> (macrocall @x_str (string-r "s") 10.0) + # x"s"y ==> (macrocall (macro_name_str x) (string-r "s") "y") + # x"s"end ==> (macrocall (macro_name_str x) (string-r "s") "end") + # x"s"in ==> (macrocall (macro_name_str x) (string-r "s") "in") + # x"s"2 ==> (macrocall (macro_name_str x) (string-r "s") 2) + # x"s"10.0 ==> (macrocall (macro_name_str x) (string-r "s") 10.0) suffix_kind = (k == K"Identifier" || is_keyword(k) || is_word_operator(k)) ? K"String" : k bump(ps, remap_kind=suffix_kind) @@ -2033,13 +2074,13 @@ function parse_resword(ps::ParseState) word == K"baremodule" ? BARE_MODULE_FLAG : EMPTY_FLAGS) elseif word in KSet"export public" # export a ==> (export a) - # export @a ==> (export @a) - # export a, \n @b ==> (export a @b) + # export @a ==> (export (macro_name a)) + # export a, \n @b ==> (export a (macro_name b)) # export +, == ==> (export + ==) # export \n a ==> (export a) # export \$a, \$(a*b) ==> (export (\$ a) (\$ (parens (call-i a * b)))) bump(ps, TRIVIA_FLAG) - parse_comma_separated(ps, x->parse_atsym(x, false)) + parse_comma_separated(ps, x->parse_import_atsym(x, false)) emit(ps, mark, word) elseif word in KSet"import using" parse_imports(ps) @@ -2372,43 +2413,12 @@ function _is_valid_macro_name(peektok) return !is_error(peektok.kind) && (peektok.is_leaf || peektok.kind == K"var") end -function fix_macro_name_kind!(ps::ParseState, macro_name_position, name_kind=nothing) - k = peek_behind(ps, macro_name_position).kind - if k == K"var" - macro_name_position = first_child_position(ps, macro_name_position) - k = peek_behind(ps, macro_name_position).kind - elseif k == K"parens" - # @(A) x ==> (macrocall (parens @A) x) - macro_name_position = first_child_position(ps, macro_name_position) - if macro_name_position == NO_POSITION - return - end - k = peek_behind(ps, macro_name_position).kind - elseif k == K"error" - # Error already reported in parse_macro_name - return - end - if isnothing(name_kind) - name_kind = _is_valid_macro_name(peek_behind(ps, macro_name_position)) ? - K"MacroName" : K"error" - if name_kind == K"error" - # TODO: This isn't quite accurate - emit_diagnostic(ps, macro_name_position, macro_name_position, - error="invalid macro name") - end - end - reset_node!(ps, macro_name_position, kind=name_kind) -end - -# If remap_kind is false, the kind will be remapped by parse_call_chain after -# it discovers which component of the macro's module path is the macro name. -# # flisp: parse-macro-name function parse_macro_name(ps::ParseState) # @! x ==> (macrocall @! x) - # @.. x ==> (macrocall @.. x) - # @$ x ==> (macrocall @$ x) - # @var"#" x ==> (macrocall (var @#) x) + # @.. x ==> (macrocall (macro_name ..) x) + # @$ x ==> (macrocall (macro_name $) x) + # @var"#" x ==> (macrocall (macro_name (var #)) x) bump_disallowed_space(ps) mark = position(ps) parse_atom(ps, false) @@ -2417,7 +2427,7 @@ function parse_macro_name(ps::ParseState) emit_diagnostic(ps, mark, warning="parenthesizing macro names is unnecessary") elseif !_is_valid_macro_name(b) - # @[x] y z ==> (macrocall (error (vect x)) y z) + # @[x] y z ==> (macrocall (macro_name (error (vect x))) y z) emit(ps, mark, K"error", error="invalid macro name") end end @@ -2425,15 +2435,16 @@ end # Parse an identifier, interpolation or @-prefixed symbol # # flisp: parse-atsym -function parse_atsym(ps::ParseState, allow_quotes=true) +function parse_import_atsym(ps::ParseState, allow_quotes=true) bump_trivia(ps) if peek(ps) == K"@" - # export @a ==> (export @a) - # export @var"'" ==> (export (var @')) - # export a, \n @b ==> (export a @b) + mark = position(ps) + # export @a ==> (export (macro_name a)) + # export @var"'" ==> (export (macro_name (var '))) + # export a, \n @b ==> (export a (macro_name b)) bump(ps, TRIVIA_FLAG) parse_macro_name(ps) - fix_macro_name_kind!(ps, position(ps)) + emit(ps, mark, K"macro_name") else # export a ==> (export a) # export \n a ==> (export a) @@ -2538,7 +2549,7 @@ function parse_import(ps::ParseState, word, has_import_prefix) # import A: x as y ==> (import (: (importpath A) (as (importpath x) y))) # using A: x as y ==> (using (: (importpath A) (as (importpath x) y))) bump(ps, TRIVIA_FLAG) - parse_atsym(ps, false) + parse_import_atsym(ps, false) emit(ps, mark, K"as") if word == K"using" && !has_import_prefix # using A as B ==> (using (error (as (importpath A) B))) @@ -2587,15 +2598,15 @@ function parse_import_path(ps::ParseState) end first_dot = false end - # import @x ==> (import (importpath @x)) + # import @x ==> (import (importpath (macro_name x))) # import $A ==> (import (importpath ($ A))) - parse_atsym(ps, false) + parse_import_atsym(ps, false) while true t = peek_token(ps) k = kind(t) if k == K"." # import A.B ==> (import (importpath A B)) - # import $A.@x ==> (import (importpath ($ A) @x)) + # import $A.@x ==> (import (importpath ($ A) (macro_name x))) # import A.B.C ==> (import (importpath A B C)) # import A.⋆.f ==> (import (importpath A ⋆ f)) next_tok = peek_token(ps, 2) @@ -2611,7 +2622,7 @@ function parse_import_path(ps::ParseState) bump_disallowed_space(ps) end bump(ps, TRIVIA_FLAG) - parse_atsym(ps) + parse_import_atsym(ps) elseif k == K"..." # Import the .. operator # import A... ==> (import (importpath A ..)) @@ -3651,7 +3662,7 @@ function parse_atom(ps::ParseState, check_identifiers=true, has_unary_prefix=fal emit_braces(ps, mark, ckind, cflags, dim) elseif leading_kind == K"@" # macro call # Macro names can be keywords - # @end x ==> (macrocall @end x) + # @end x ==> (macrocall (macro_name end) x) bump(ps, TRIVIA_FLAG) parse_macro_name(ps) parse_call_chain(ps, mark, true) diff --git a/JuliaSyntax/src/julia/tokenize.jl b/JuliaSyntax/src/julia/tokenize.jl index 24c96870e88cd..2bd0f56df1b84 100644 --- a/JuliaSyntax/src/julia/tokenize.jl +++ b/JuliaSyntax/src/julia/tokenize.jl @@ -1168,30 +1168,21 @@ end function lex_dot(l::Lexer) if accept(l, '.') if accept(l, '.') + l.last_token == K"@" && return emit(l, K"Identifier") return emit(l, K"...") else if is_dottable_operator_start_char(peekchar(l)) readchar(l) return emit(l, K"ErrorInvalidOperator") else + l.last_token == K"@" && return emit(l, K"Identifier") return emit(l, K"..") end end elseif Base.isdigit(peekchar(l)) return lex_digit(l, K"Float") else - pc, dpc = dpeekchar(l) - # When we see a dot followed by an operator, we want to emit just the dot - # and let the next token be the operator - if is_operator_start_char(pc) || (pc == '!' && dpc == '=') - return emit(l, K".") - elseif pc == '÷' - return emit(l, K".") - elseif pc == '=' && dpc == '>' - return emit(l, K".") - elseif is_dottable_operator_start_char(pc) - return emit(l, K".") - end + l.last_token == K"@" && return emit(l, K"Identifier") return emit(l, K".") end end diff --git a/JuliaSyntax/test/diagnostics.jl b/JuliaSyntax/test/diagnostics.jl index 07e66abf27022..1397dd215a9be 100644 --- a/JuliaSyntax/test/diagnostics.jl +++ b/JuliaSyntax/test/diagnostics.jl @@ -53,7 +53,7 @@ end Diagnostic(1, 9, :error, "try without catch or finally") # TODO: better range @test diagnostic("@A.\$x a") == - Diagnostic(6, 5, :error, "invalid macro name") + Diagnostic(4, 5, :error, "invalid macro name") @test diagnostic("a, , b") == Diagnostic(4, 4, :error, "unexpected `,`") diff --git a/JuliaSyntax/test/expr.jl b/JuliaSyntax/test/expr.jl index 7651347cf853f..d7547848bef09 100644 --- a/JuliaSyntax/test/expr.jl +++ b/JuliaSyntax/test/expr.jl @@ -554,6 +554,7 @@ # var"" @test parsestmt("@var\"#\" a") == Expr(:macrocall, Symbol("@#"), LineNumberNode(1), :a) + @test parsestmt("@var\"\\\"\" a") == Expr(:macrocall, Symbol("@\""), LineNumberNode(1), :a) @test parsestmt("A.@var\"#\" a") == Expr(:macrocall, Expr(:., :A, QuoteNode(Symbol("@#"))), LineNumberNode(1), :a) # Square brackets diff --git a/JuliaSyntax/test/green_node.jl b/JuliaSyntax/test/green_node.jl index 727c717885e2b..cc0294e0ea335 100644 --- a/JuliaSyntax/test/green_node.jl +++ b/JuliaSyntax/test/green_node.jl @@ -33,8 +33,9 @@ 1:1 │ Identifier ✔ 2:2 │ ( 3:7 │ [macrocall] - 3:3 │ @ - 4:4 │ MacroName ✔ + 3:4 │ [macro_name] + 3:3 │ @ + 4:4 │ Identifier ✔ 5:5 │ ( 6:6 │ Identifier ✔ 7:7 │ ) @@ -50,8 +51,9 @@ 1:1 │ Identifier ✔ "f" 2:2 │ ( "(" 3:7 │ [macrocall] - 3:3 │ @ "@" - 4:4 │ MacroName ✔ "x" + 3:4 │ [macro_name] + 3:3 │ @ "@" + 4:4 │ Identifier ✔ "x" 5:5 │ ( "(" 6:6 │ Identifier ✔ "y" 7:7 │ ) ")" diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index f84009287bf0f..64ecc8ea554e2 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -345,37 +345,37 @@ tests = [ ".&(x,y)" => "(call (. &) x y)" # parse_call_chain "f(a).g(b)" => "(call (. (call f a) g) b)" - "\$A.@x" => "(macrocall (. (\$ A) @x))" + "\$A.@x" => "(macrocall (. (\$ A) (macro_name x)))" # non-errors in space sensitive contexts "[f (x)]" => "(hcat f (parens x))" "[f x]" => "(hcat f x)" # space separated macro calls - "@foo a b" => "(macrocall @foo a b)" - "@foo (x)" => "(macrocall @foo (parens x))" - "@foo (x,y)" => "(macrocall @foo (tuple-p x y))" - "A.@foo a b" => "(macrocall (. A @foo) a b)" - "@A.foo a b" => "(macrocall (. A @foo) a b)" - "[@foo x]" => "(vect (macrocall @foo x))" - "[@foo]" => "(vect (macrocall @foo))" - "@var\"#\" a" => "(macrocall (var @#) a)" - "@(A) x" => "(macrocall (parens @A) x)" - "A.@x y" => "(macrocall (. A @x) y)" - "A.@var\"#\" a"=> "(macrocall (. A (var @#)) a)" - "@+x y" => "(macrocall @+ x y)" - "A.@.x" => "(macrocall (. A @.) x)" + "@foo a b" => "(macrocall (macro_name foo) a b)" + "@foo (x)" => "(macrocall (macro_name foo) (parens x))" + "@foo (x,y)" => "(macrocall (macro_name foo) (tuple-p x y))" + "A.@foo a b" => "(macrocall (. A (macro_name foo)) a b)" + "@A.foo a b" => "(macrocall (macro_name (. A foo)) a b)" + "[@foo x]" => "(vect (macrocall (macro_name foo) x))" + "[@foo]" => "(vect (macrocall (macro_name foo)))" + "@var\"#\" a" => "(macrocall (macro_name (var #)) a)" + "@(A) x" => "(macrocall (macro_name (parens A)) x)" + "A.@x y" => "(macrocall (. A (macro_name x)) y)" + "A.@var\"#\" a"=> "(macrocall (. A (macro_name (var #))) a)" + "@+x y" => "(macrocall (macro_name +) x y)" + "A.@.x" => "(macrocall (. A (macro_name .)) x)" # Macro names - "@! x" => "(macrocall @! x)" - "@.. x" => "(macrocall @.. x)" - "@\$ y" => "(macrocall @\$ y)" - "@[x] y z" => "(macrocall (error (vect x)) y z)" + "@! x" => "(macrocall (macro_name !) x)" + "@.. x" => "(macrocall (macro_name ..) x)" + "@\$ y" => "(macrocall (macro_name \$) y)" + "@[x] y z" => "(macrocall (macro_name (error (vect x))) y z)" # Special @doc parsing rules - "@doc x\ny" => "(macrocall @doc x y)" - "A.@doc x\ny" => "(macrocall (. A @doc) x y)" - "@A.doc x\ny" => "(macrocall (. A @doc) x y)" - "@doc x y\nz" => "(macrocall @doc x y)" - "@doc x\n\ny" => "(macrocall @doc x)" - "@doc x\nend" => "(macrocall @doc x)" + "@doc x\ny" => "(macrocall (macro_name doc) x y)" + "A.@doc x\ny" => "(macrocall (. A (macro_name doc)) x y)" + "@A.doc x\ny" => "(macrocall (macro_name (. A doc)) x y)" + "@doc x y\nz" => "(macrocall (macro_name doc) x y)" + "@doc x\n\ny" => "(macrocall (macro_name doc) x)" + "@doc x\nend" => "(macrocall (macro_name doc) x)" # calls with brackets "f(a,b)" => "(call f a b)" @@ -384,26 +384,26 @@ tests = [ "f(a; b; c)" => "(call f a (parameters b) (parameters c))" "(a=1)()" => "(call (parens (= a 1)))" "f (a)" => "(call f (error-t) a)" - "@x(a, b)" => "(macrocall-p @x a b)" - "@x(a, b,)" => "(macrocall-p-, @x a b)" - "A.@x(y)" => "(macrocall-p (. A @x) y)" - "A.@x(y).z" => "(. (macrocall-p (. A @x) y) z)" + "@x(a, b)" => "(macrocall-p (macro_name x) a b)" + "@x(a, b,)" => "(macrocall-p-, (macro_name x) a b)" + "A.@x(y)" => "(macrocall-p (. A (macro_name x)) y)" + "A.@x(y).z" => "(. (macrocall-p (. A (macro_name x)) y) z)" "f(y for x = xs; a)" => "(call f (generator y (iteration (in x xs))) (parameters a))" # do "f() do\nend" => "(call f (do (tuple) (block)))" "f() do ; body end" => "(call f (do (tuple) (block body)))" "f() do x, y\n body end" => "(call f (do (tuple x y) (block body)))" "f(x) do y body end" => "(call f x (do (tuple y) (block body)))" - "@f(x) do y body end" => "(macrocall-p @f x (do (tuple y) (block body)))" + "@f(x) do y body end" => "(macrocall-p (macro_name f) x (do (tuple y) (block body)))" # square brackets - "@S[a,b]" => "(macrocall @S (vect a b))" - "@S[a b]" => "(macrocall @S (hcat a b))" - "@S[a; b]" => "(macrocall @S (vcat a b))" - "A.@S[a]" => "(macrocall (. A @S) (vect a))" - "@S[a].b" => "(. (macrocall @S (vect a)) b)" - ((v=v"1.7",), "@S[a ;; b]") => "(macrocall @S (ncat-2 a b))" - ((v=v"1.6",), "@S[a ;; b]") => "(macrocall @S (error (ncat-2 a b)))" + "@S[a,b]" => "(macrocall (macro_name S) (vect a b))" + "@S[a b]" => "(macrocall (macro_name S) (hcat a b))" + "@S[a; b]" => "(macrocall (macro_name S) (vcat a b))" + "A.@S[a]" => "(macrocall (. A (macro_name S)) (vect a))" + "@S[a].b" => "(. (macrocall (macro_name S) (vect a)) b)" + ((v=v"1.7",), "@S[a ;; b]") => "(macrocall (macro_name S) (ncat-2 a b))" + ((v=v"1.6",), "@S[a ;; b]") => "(macrocall (macro_name S) (error (ncat-2 a b)))" "a[i]" => "(ref a i)" "a [i]" => "(ref a (error-t) i)" "a[i,j]" => "(ref a i j)" @@ -419,10 +419,10 @@ tests = [ # Dotted forms # Allow `@` in macrocall only in first and last position - "A.B.@x" => "(macrocall (. (. A B) @x))" - "@A.B.x" => "(macrocall (. (. A B) @x))" - "A.@B.x" => "(macrocall (. (. A B) (error-t) @x))" - "@M.(x)" => "(macrocall (dotcall @M (error-t) x))" + "A.B.@x" => "(macrocall (. (. A B) (macro_name x)))" + "@A.B.x" => "(macrocall (macro_name (. (. A B) x)))" + "A.@B.x" => "(macrocall (. (. A (error-t) B) (macro_name (error-t) x)))" + "@M.(x)" => "(macrocall (dotcall (macro_name M) (error-t) x))" "f.(a,b)" => "(dotcall f a b)" "f.(a,b,)" => "(dotcall-, f a b)" "f.(a=1; b=2)" => "(dotcall f (= a 1) (parameters (= b 2)))" @@ -434,11 +434,11 @@ tests = [ "A.: +" => "(. A (quote-: (error-t) +))" "f.\$x" => "(. f (\$ x))" "f.\$(x+y)" => "(. f (\$ (parens (call-i x + y))))" - "A.\$B.@x" => "(macrocall (. (. A (\$ B)) @x))" - "@A.\$x a" => "(macrocall (. A (error x)) a)" - "A.@x" => "(macrocall (. A @x))" - "A.@x a" => "(macrocall (. A @x) a)" - "@A.B.@x a" => "(macrocall (. (. A B) (error-t) @x) a)" + "A.\$B.@x" => "(macrocall (. (. A (\$ B)) (macro_name x)))" + "@A.\$x a" => "(macrocall (macro_name (. A (error x))) a)" + "A.@x" => "(macrocall (. A (macro_name x)))" + "A.@x a" => "(macrocall (. A (macro_name x)) a)" + "@A.B.@x a" => "(macrocall (macro_name (. (. A B) (error-t) x)) a)" # .' discontinued "f.'" => "(dotcall-post f (error '))" # Field/property syntax @@ -451,35 +451,40 @@ tests = [ "f'ᵀ" => "(call-post f 'ᵀ)" # Curly calls "S {a}" => "(curly S (error-t) a)" - "A.@S{a}" => "(macrocall (. A @S) (braces a))" - "@S{a,b}" => "(macrocall @S (braces a b))" - "A.@S{a}" => "(macrocall (. A @S) (braces a))" - "@S{a}.b" => "(. (macrocall @S (braces a)) b)" + "A.@S{a}" => "(macrocall (. A (macro_name S)) (braces a))" + "@S{a,b}" => "(macrocall (macro_name S) (braces a b))" + "A.@S{a}" => "(macrocall (. A (macro_name S)) (braces a))" + "@S{a}.b" => "(. (macrocall (macro_name S) (braces a)) b)" + # Macro calls with chained operations + "@a[b][c]" => "(ref (macrocall (macro_name a) (vect b)) c)" + "@a{b}{c}" => "(curly (macrocall (macro_name a) (braces b)) c)" + "@a[b]{c}" => "(curly (macrocall (macro_name a) (vect b)) c)" + "@a{b}[c]" => "(ref (macrocall (macro_name a) (braces b)) c)" "S{a,b}" => "(curly S a b)" "T{y for x = xs; a}" => "(curly T (generator y (iteration (in x xs))) (parameters a))" # String macros - "x\"str\"" => """(macrocall @x_str (string-r "str"))""" - "x`str`" => """(macrocall @x_cmd (cmdstring-r "str"))""" - "x\"\"" => """(macrocall @x_str (string-r ""))""" - "x``" => """(macrocall @x_cmd (cmdstring-r ""))""" - "in\"str\"" => """(macrocall @in_str (string-r "str"))""" - "outer\"str\"" => """(macrocall @outer_str (string-r "str"))""" + "x\"str\"" => """(macrocall (macro_name_str x) (string-r "str"))""" + "x`str`" => """(macrocall (macro_name_cmd x) (cmdstring-r "str"))""" + "x\"\"" => """(macrocall (macro_name_str x) (string-r ""))""" + "x``" => """(macrocall (macro_name_cmd x) (cmdstring-r ""))""" + "in\"str\"" => """(macrocall (macro_name_str in) (string-r "str"))""" + "outer\"str\"" => """(macrocall (macro_name_str outer) (string-r "str"))""" # Triple quoted processing for custom strings - "r\"\"\"\nx\"\"\"" => raw"""(macrocall @r_str (string-s-r "x"))""" - "r\"\"\"\n x\n y\"\"\"" => raw"""(macrocall @r_str (string-s-r "x\n" "y"))""" - "r\"\"\"\n x\\\n y\"\"\"" => raw"""(macrocall @r_str (string-s-r "x\\\n" "y"))""" + "r\"\"\"\nx\"\"\"" => raw"""(macrocall (macro_name_str r) (string-s-r "x"))""" + "r\"\"\"\n x\n y\"\"\"" => raw"""(macrocall (macro_name_str r) (string-s-r "x\n" "y"))""" + "r\"\"\"\n x\\\n y\"\"\"" => raw"""(macrocall (macro_name_str r) (string-s-r "x\\\n" "y"))""" # Macro suffixes can include keywords and numbers - "x\"s\"y" => """(macrocall @x_str (string-r "s") "y")""" - "x\"s\"end" => """(macrocall @x_str (string-r "s") "end")""" - "x\"s\"in" => """(macrocall @x_str (string-r "s") "in")""" - "x\"s\"2" => """(macrocall @x_str (string-r "s") 2)""" - "x\"s\"10.0" => """(macrocall @x_str (string-r "s") 10.0)""" + "x\"s\"y" => """(macrocall (macro_name_str x) (string-r "s") "y")""" + "x\"s\"end" => """(macrocall (macro_name_str x) (string-r "s") "end")""" + "x\"s\"in" => """(macrocall (macro_name_str x) (string-r "s") "in")""" + "x\"s\"2" => """(macrocall (macro_name_str x) (string-r "s") 2)""" + "x\"s\"10.0" => """(macrocall (macro_name_str x) (string-r "s") 10.0)""" # Cmd macro suffixes - "x`s`y" => """(macrocall @x_cmd (cmdstring-r "s") "y")""" - "x`s`end" => """(macrocall @x_cmd (cmdstring-r "s") "end")""" - "x`s`in" => """(macrocall @x_cmd (cmdstring-r "s") "in")""" - "x`s`2" => """(macrocall @x_cmd (cmdstring-r "s") 2)""" - "x`s`10.0" => """(macrocall @x_cmd (cmdstring-r "s") 10.0)""" + "x`s`y" => """(macrocall (macro_name_cmd x) (cmdstring-r "s") "y")""" + "x`s`end" => """(macrocall (macro_name_cmd x) (cmdstring-r "s") "end")""" + "x`s`in" => """(macrocall (macro_name_cmd x) (cmdstring-r "s") "in")""" + "x`s`2" => """(macrocall (macro_name_cmd x) (cmdstring-r "s") 2)""" + "x`s`10.0" => """(macrocall (macro_name_cmd x) (cmdstring-r "s") 10.0)""" ], JuliaSyntax.parse_resword => [ # In normal_context @@ -545,9 +550,9 @@ tests = [ """module A \n "x"\na\n end""" => """(module A (block (doc (string "x") a)))""" # export "export a" => "(export a)" - "export @a" => "(export @a)" - "export @var\"'\"" => "(export (var @'))" - "export a, \n @b" => "(export a @b)" + "export @a" => "(export (macro_name a))" + "export @var\"'\"" => "(export (macro_name (var ')))" + "export a, \n @b" => "(export a (macro_name b))" "export +, ==" => "(export + ==)" "export \n a" => "(export a)" "export \$a, \$(a*b)" => "(export (\$ a) (\$ (parens (call-i a * b))))" @@ -601,9 +606,9 @@ tests = [ "function (x=1) end" => "(function (tuple-p (= x 1)) (block))" "function (;x=1) end" => "(function (tuple-p (parameters (= x 1))) (block))" "function (f(x),) end" => "(function (tuple-p-, (call f x)) (block))" - "function (@f(x);) end" => "(function (tuple-p (macrocall-p @f x) (parameters)) (block))" - "function (@f(x)...) end" => "(function (tuple-p (... (macrocall-p @f x))) (block))" - "function (@f(x)) end" => "(function (error (tuple-p (macrocall-p @f x))) (block))" + "function (@f(x);) end" => "(function (tuple-p (macrocall-p (macro_name f) x) (parameters)) (block))" + "function (@f(x)...) end" => "(function (tuple-p (... (macrocall-p (macro_name f) x))) (block))" + "function (@f(x)) end" => "(function (error (tuple-p (macrocall-p (macro_name f) x))) (block))" "function (\$f) end" => "(function (error (tuple-p (\$ f))) (block))" "function ()(x) end" => "(function (call (tuple-p) x) (block))" "function (A).f() end" => "(function (call (. (parens A) f)) (block))" @@ -647,10 +652,10 @@ tests = [ "function f() \n a \n b end" => "(function (call f) (block a b))" "function f() end" => "(function (call f) (block))" # Macrocall as sig - ((v=v"1.12",), "function @callmemacro(a::Int) \n 1 \n end") => "(function (macrocall-p @callmemacro (::-i a Int)) (block 1))" - ((v=v"1.12",), "function @callmemacro(a::T, b::T) where T <: Int64\n3\nend") => "(function (where (macrocall-p @callmemacro (::-i a T) (::-i b T)) (<: T Int64)) (block 3))" - ((v=v"1.12",), "function @callmemacro(a::Int, b::Int, c::Int)::Float64\n4\nend") => "(function (::-i (macrocall-p @callmemacro (::-i a Int) (::-i b Int) (::-i c Int)) Float64) (block 4))" - ((v=v"1.12",), "function @f()() end") => "(function (call (macrocall-p @f)) (block))" + ((v=v"1.12",), "function @callmemacro(a::Int) \n 1 \n end") => "(function (macrocall-p (macro_name callmemacro) (::-i a Int)) (block 1))" + ((v=v"1.12",), "function @callmemacro(a::T, b::T) where T <: Int64\n3\nend") => "(function (where (macrocall-p (macro_name callmemacro) (::-i a T) (::-i b T)) (<: T Int64)) (block 3))" + ((v=v"1.12",), "function @callmemacro(a::Int, b::Int, c::Int)::Float64\n4\nend") => "(function (::-i (macrocall-p (macro_name callmemacro) (::-i a Int) (::-i b Int) (::-i c Int)) Float64) (block 4))" + ((v=v"1.12",), "function @f()() end") => "(function (call (macrocall-p (macro_name f))) (block))" # Errors "function" => "(function (error (error)) (block (error)) (error-t))" ], @@ -704,9 +709,9 @@ tests = [ # Modules with operator symbol names "import .⋆" => "(import (importpath . ⋆))" # Expressions allowed in import paths - "import @x" => "(import (importpath @x))" + "import @x" => "(import (importpath (macro_name x)))" "import \$A" => "(import (importpath (\$ A)))" - "import \$A.@x" => "(import (importpath (\$ A) @x))" + "import \$A.@x" => "(import (importpath (\$ A) (macro_name x)))" "import A.B" => "(import (importpath A B))" "import A.B.C" => "(import (importpath A B C))" "import A.:+" => "(import (importpath A (quote-: +)))" @@ -893,9 +898,9 @@ tests = [ ((v=v"1.7",), "{a ;; b}") => "(bracescat (nrow-2 a b))" ((v=v"1.7",), "{a ;;;; b}") => "(bracescat (nrow-4 a b))" # Macro names can be keywords - "@end x" => "(macrocall @end x)" + "@end x" => "(macrocall (macro_name end) x)" # __dot__ macro - "@. x" => "(macrocall @. x)" + "@. x" => "(macrocall (macro_name .) x)" # cmd strings "``" => "(cmdstring-r \"\")" "`cmd`" => "(cmdstring-r \"cmd\")" @@ -1047,8 +1052,8 @@ tests = [ "public export=true foo, bar" => PARSE_ERROR # but these may be "public experimental=true foo, bar" => PARSE_ERROR # supported soon ;) "public(x::String) = false" => "(function-= (call public (::-i x String)) false)" - "module M; export @a; end" => "(module M (block (export @a)))" - "module M; public @a; end" => "(module M (block (public @a)))" + "module M; export @a; end" => "(module M (block (export (macro_name a))))" + "module M; public @a; end" => "(module M (block (public (macro_name a))))" "module M; export ⤈; end" => "(module M (block (export ⤈)))" "module M; public ⤈; end" => "(module M (block (public ⤈)))" "public = 4" => "(= public 4)" @@ -1056,7 +1061,7 @@ tests = [ "public() = 6" => "(function-= (call public) 6)" ]), JuliaSyntax.parse_stmts => [ - ((v = v"1.12",), "@callmemacro(b::Float64) = 2") => "(= (macrocall-p @callmemacro (::-i b Float64)) 2)" + ((v = v"1.12",), "@callmemacro(b::Float64) = 2") => "(= (macrocall-p (macro_name callmemacro) (::-i b Float64)) 2)" ], JuliaSyntax.parse_docstring => [ """ "notdoc" ] """ => "(string \"notdoc\")" @@ -1098,10 +1103,10 @@ parsestmt_test_specs = [ # The following may not be ideal error recovery! But at least the parser # shouldn't crash - "@(x y)" => "(macrocall (parens @x (error-t y)))" + "@(x y)" => "(macrocall (macro_name (parens x (error-t y))))" "|(&\nfunction" => "(call | (& (function (error (error)) (block (error)) (error-t))) (error-t))" - "@(" => "(macrocall (parens (error-t)))" - "x = @(" => "(= x (macrocall (parens (error-t))))" + "@(" => "(macrocall (macro_name (parens (error-t))))" + "x = @(" => "(= x (macrocall (macro_name (parens (error-t)))))" "function(where" => "(function (tuple-p where (error-t)) (block (error)) (error-t))" # Contextual keyword pairs must not be separated by newlines even within parens "(abstract\ntype X end)" => "(wrapper (parens abstract (error-t type X)) (error-t end ✘))" @@ -1189,9 +1194,9 @@ end @testset "Unicode normalization in tree conversion" begin # ɛµ normalizes to εμ @test parse_to_sexpr_str(JuliaSyntax.parse_eq, "\u025B\u00B5()") == "(call \u03B5\u03BC)" - @test parse_to_sexpr_str(JuliaSyntax.parse_eq, "@\u025B\u00B5") == "(macrocall @\u03B5\u03BC)" - @test parse_to_sexpr_str(JuliaSyntax.parse_eq, "\u025B\u00B5\"\"") == "(macrocall @\u03B5\u03BC_str (string-r \"\"))" - @test parse_to_sexpr_str(JuliaSyntax.parse_eq, "\u025B\u00B5``") == "(macrocall @\u03B5\u03BC_cmd (cmdstring-r \"\"))" + @test parse_to_sexpr_str(JuliaSyntax.parse_eq, "@\u025B\u00B5") == "(macrocall (macro_name \u03B5\u03BC))" + @test parse_to_sexpr_str(JuliaSyntax.parse_eq, "\u025B\u00B5\"\"") == "(macrocall (macro_name_str \u03B5\u03BC) (string-r \"\"))" + @test parse_to_sexpr_str(JuliaSyntax.parse_eq, "\u025B\u00B5``") == "(macrocall (macro_name_cmd \u03B5\u03BC) (cmdstring-r \"\"))" # · and · normalize to ⋅ @test parse_to_sexpr_str(JuliaSyntax.parse_eq, "a \u00B7 b") == "(call-i a \u22C5 b)" @test parse_to_sexpr_str(JuliaSyntax.parse_eq, "a \u0387 b") == "(call-i a \u22C5 b)" From f8131acc5accc377f712d57258cf723069f497b0 Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki <40514306+aviatesk@users.noreply.github.com> Date: Sat, 12 Jul 2025 02:44:39 +0900 Subject: [PATCH 1017/1109] build_tree: clarify accepted keyword arguments (JuliaLang/JuliaSyntax.jl#571) Currently `build_tree` accepts extra `kwargs...` which causes actually invalid `kwargs...` to be silently ignored. This is particularly confusing since they can be mixed up with `_parse!` keyword arguments. This change clarifies which keyword arguments `build_tree` accepts and makes it explicitly error when invalid keyword arguments are passed. --- JuliaSyntax/src/integration/expr.jl | 5 ++++- JuliaSyntax/src/porcelain/green_node.jl | 4 +++- JuliaSyntax/src/porcelain/syntax_tree.jl | 2 +- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/JuliaSyntax/src/integration/expr.jl b/JuliaSyntax/src/integration/expr.jl index 038bad9a20c90..c4fd82220f00e 100644 --- a/JuliaSyntax/src/integration/expr.jl +++ b/JuliaSyntax/src/integration/expr.jl @@ -643,7 +643,10 @@ end return retexpr end -function build_tree(::Type{Expr}, stream::ParseStream; filename=nothing, first_line=1, kws...) +function build_tree(::Type{Expr}, stream::ParseStream; + filename=nothing, first_line=1, + # unused, but required since `_parse` is written generic + keep_parens=false) source = SourceFile(stream, filename=filename, first_line=first_line) return build_tree(Expr, stream, source) end diff --git a/JuliaSyntax/src/porcelain/green_node.jl b/JuliaSyntax/src/porcelain/green_node.jl index 61bdbb01c5f92..b06dab56cbbe1 100644 --- a/JuliaSyntax/src/porcelain/green_node.jl +++ b/JuliaSyntax/src/porcelain/green_node.jl @@ -132,7 +132,9 @@ function GreenNode(cursor::GreenTreeCursor) end end -function build_tree(T::Type{GreenNode}, stream::ParseStream; kws...) +function build_tree(::Type{GreenNode}, stream::ParseStream; + # unused, but required since `_parse` is written generic + filename=nothing, first_line=1, keep_parens=false) cursor = GreenTreeCursor(stream) if has_toplevel_siblings(cursor) # There are multiple toplevel nodes, e.g. because we're using this diff --git a/JuliaSyntax/src/porcelain/syntax_tree.jl b/JuliaSyntax/src/porcelain/syntax_tree.jl index 71b1be82e413f..1002919c43cb5 100644 --- a/JuliaSyntax/src/porcelain/syntax_tree.jl +++ b/JuliaSyntax/src/porcelain/syntax_tree.jl @@ -302,7 +302,7 @@ end Base.copy(data::SyntaxData) = SyntaxData(data.source, data.raw, data.byte_end, data.val) function build_tree(::Type{SyntaxNode}, stream::ParseStream; - filename=nothing, first_line=1, keep_parens=false, kws...) + filename=nothing, first_line=1, keep_parens=false) source = SourceFile(stream, filename=filename, first_line=first_line) cursor = RedTreeCursor(stream) if has_toplevel_siblings(cursor) From ecaf00424ae514ddc75ce1a7e1b99c1a378f07ae Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Sun, 20 Jul 2025 21:06:51 -0400 Subject: [PATCH 1018/1109] Move over K"inert" (JuliaLang/JuliaLowering.jl#13) JuliaSyntax does not generate this kind, so I'm moving it over here. --- JuliaLowering/src/kinds.jl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/JuliaLowering/src/kinds.jl b/JuliaLowering/src/kinds.jl index 467810c0cd2a6..6a1a4eccd8445 100644 --- a/JuliaLowering/src/kinds.jl +++ b/JuliaLowering/src/kinds.jl @@ -19,6 +19,8 @@ function _register_kinds() "Value" # A (quoted) `Symbol` "Symbol" + # QuoteNode; not quasiquote + "inert" # Compiler metadata hints "meta" # TODO: Use `meta` for inbounds and loopinfo etc? From 8241124cd0536b74e081ddb7fe65243bf7f564e7 Mon Sep 17 00:00:00 2001 From: Em Chu <61633163+mlechu@users.noreply.github.com> Date: Sun, 20 Jul 2025 18:11:08 -0700 Subject: [PATCH 1019/1109] Performance: make `SyntaxGraph` and `SyntaxList` mutable (JuliaLang/JuliaLowering.jl#12) The long initial hang in generating code for JuliaLowering is LLVM taking forever to deal with what julia gives it. Large structs on the stack are the likely culprit. --- JuliaLowering/src/syntax_graph.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/JuliaLowering/src/syntax_graph.jl b/JuliaLowering/src/syntax_graph.jl index bada8ca1b8e53..bdbf98e092927 100644 --- a/JuliaLowering/src/syntax_graph.jl +++ b/JuliaLowering/src/syntax_graph.jl @@ -6,7 +6,7 @@ one or several syntax trees. TODO: Global attributes! """ -struct SyntaxGraph{Attrs} +mutable struct SyntaxGraph{Attrs} edge_ranges::Vector{UnitRange{Int}} edges::Vector{NodeId} attributes::Attrs @@ -632,7 +632,7 @@ end #------------------------------------------------------------------------------- # Lightweight vector of nodes ids with associated pointer to graph stored separately. -struct SyntaxList{GraphType, NodeIdVecType} <: AbstractVector{SyntaxTree} +mutable struct SyntaxList{GraphType, NodeIdVecType} <: AbstractVector{SyntaxTree} graph::GraphType ids::NodeIdVecType end From 03150050cc475da646d20e75d33f37f1b6191f68 Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki <40514306+aviatesk@users.noreply.github.com> Date: Mon, 21 Jul 2025 10:39:35 +0900 Subject: [PATCH 1020/1109] Placeholder implementation of multi-arg `@nospecialize` (JuliaLang/JuliaLowering.jl#15) --- JuliaLowering/src/syntax_macros.jl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/JuliaLowering/src/syntax_macros.jl b/JuliaLowering/src/syntax_macros.jl index 5a18059d1a075..d9fac7bec22e3 100644 --- a/JuliaLowering/src/syntax_macros.jl +++ b/JuliaLowering/src/syntax_macros.jl @@ -2,14 +2,14 @@ # extensions": # # * They emit syntactic forms with special `Kind`s and semantics known to -# lowering +# lowering # * There is no other Julia surface syntax for these `Kind`s. # In order to implement these here without getting into bootstrapping problems, # we just write them as plain old macro-named functions and add the required # __context__ argument ourselves. # -# TODO: @inline, @noinline, @inbounds, @simd, @ccall, @isdefined, @assume_effects +# TODO: @inline, @noinline, @inbounds, @simd, @ccall, @assume_effects # # TODO: Eventually move these to proper `macro` definitions and use # `JuliaLowering.include()` or something. Then we'll be in the fun little world @@ -29,7 +29,8 @@ function _apply_nospecialize(ctx, ex) end end -function Base.var"@nospecialize"(__context__::MacroContext, ex) +function Base.var"@nospecialize"(__context__::MacroContext, ex, exs...) + # TODO support multi-arg version properly _apply_nospecialize(__context__, ex) end @@ -220,4 +221,3 @@ function var"@inert"(__context__::MacroContext, ex) @chk kind(ex) == K"quote" @ast __context__ __context__.macrocall [K"inert" ex] end - From e707438590c5a4865cf62cf97745468b495c0068 Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki <40514306+aviatesk@users.noreply.github.com> Date: Mon, 21 Jul 2025 10:44:33 +0900 Subject: [PATCH 1021/1109] make `showprov()` able to take optional keyword arguments for `highlight()` (JuliaLang/JuliaLowering.jl#16) * make `showprov()` able to take optional keyword arguments for `highlight()` So that user of `showprov` can tweak options of `highlight`, e.g, `color` and `context_lines_before`. * add `include_location::Bool` option for `showprov` --- JuliaLowering/README.md | 21 ++++++++++----------- JuliaLowering/src/utils.jl | 26 +++++++++++++++----------- 2 files changed, 25 insertions(+), 22 deletions(-) diff --git a/JuliaLowering/README.md b/JuliaLowering/README.md index 1f277257ed08e..789e1ce74666b 100644 --- a/JuliaLowering/README.md +++ b/JuliaLowering/README.md @@ -13,7 +13,7 @@ This work is intended to * Bring precise code provenance to Julia's lowered form (and eventually downstream in type inference, stack traces, etc). This has many benefits - Talk to users precisely about their code via character-precise error and - diagnostic messages from lowering + diagnostic messages from lowering - Greatly simplify the implementation of critical tools like Revise.jl which rely on analyzing how the user's source maps to the compiler's data structures @@ -109,10 +109,10 @@ For example when parsing a source file we have ```julia julia> ex = parsestmt(SyntaxTree, "a + b", filename="foo.jl") SyntaxTree with attributes kind,value,name_val,syntax_flags,source -[call-i] │ - a │ - + │ - b │ +[call-i] │ + a │ + + │ + b │ julia> ex[3].source a + b @@ -157,9 +157,9 @@ The tree which arises from macro expanding this is pretty simple: ```julia julia> expanded = JuliaLowering.macroexpand(Main, parsestmt(SyntaxTree, "M.@outer()")) SyntaxTree with attributes scope_layer,kind,value,var_id,name_val,syntax_flags,source -[tuple-p] │ - 1 │ - 2 │ +[tuple-p] │ + 1 │ + 2 │ ``` but the provenance information recorded for the second element `2` of this @@ -777,7 +777,7 @@ The final lowered IR is expressed as `CodeInfo` objects which are a sequence of * Restricted forms of `Expr` (with semantics different from surface syntax, even for the same `head`! for example the arguments to `Expr(:call)` in IR must be "simple" and aren't evaluated in order) -* `Core.SlotNumber` +* `Core.SlotNumber` * Other special forms from `Core` like `Core.ReturnNode`, `Core.EnterNode`, etc. * `Core.SSAValue`, indexing any value generated from a statement in the `code` array. @@ -857,7 +857,7 @@ Pros: - Replaces more Expr usage - Replaces a whole pile of C code with significantly less Julia code - Lowering output becomes more consistently imperative -Cons: +Cons: - Lots more code to write - May need to invent intermediate data structures to replace `Expr` - Bootstrap? @@ -895,4 +895,3 @@ Some differences which makes Racket's macro expander different from Julia: expand macros; the "pass system". Julia just executes all top level statements in order when precompiling a package. * As a lisp, Racket's surface syntax is dramatically simpler and more uniform - diff --git a/JuliaLowering/src/utils.jl b/JuliaLowering/src/utils.jl index ced8827ef044e..5d749f05c502a 100644 --- a/JuliaLowering/src/utils.jl +++ b/JuliaLowering/src/utils.jl @@ -39,29 +39,34 @@ function _show_provtree(io::IO, prov, indent) printstyled(io, "@ $fn:$line\n", color=:light_black) end -function showprov(io::IO, exs::AbstractVector) +function showprov(io::IO, exs::AbstractVector; + note=nothing, include_location::Bool=true, highlight_kwargs...) for (i,ex) in enumerate(Iterators.reverse(exs)) sr = sourceref(ex) if i > 1 print(io, "\n\n") end k = kind(ex) - note = i > 1 && k == K"macrocall" ? "in macro expansion" : - i > 1 && k == K"$" ? "interpolated here" : - "in source" - highlight(io, sr, note=note) + if isnothing(note) # use provided `note` otherwise + note = i > 1 && k == K"macrocall" ? "in macro expansion" : + i > 1 && k == K"$" ? "interpolated here" : + "in source" + end + highlight(io, sr; note=note, highlight_kwargs...) - line, _ = source_location(sr) - locstr = "$(filename(sr)):$line" - JuliaSyntax._printstyled(io, "\n# @ $locstr", fgcolor=:light_black) + if include_location + line, _ = source_location(sr) + locstr = "$(filename(sr)):$line" + JuliaSyntax._printstyled(io, "\n# @ $locstr", fgcolor=:light_black) + end end end -function showprov(io::IO, ex::SyntaxTree; tree=false) +function showprov(io::IO, ex::SyntaxTree; tree::Bool=false, showprov_kwargs...) if tree _show_provtree(io, ex, "") else - showprov(io, flattened_provenance(ex)) + showprov(io, flattened_provenance(ex); showprov_kwargs...) end end @@ -165,4 +170,3 @@ function _print_ir(io::IO, ex, indent) end end end - From 7af5c8230650f37a2e075b2dd04172ef6e6a157c Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki <40514306+aviatesk@users.noreply.github.com> Date: Mon, 21 Jul 2025 10:49:37 +0900 Subject: [PATCH 1022/1109] add precompilation statements to improve first-time-to-lower latency (JuliaLang/JuliaLowering.jl#14) Exercises the complete lowering pipeline during precompilation with a sample code thunks to create method and type definitions. This precompilation is enabled by default but can be disabled by setting the `JULIA_LOWERING_PRECOMPILE` environment variable to false-cy, which may be useful during development. Ideally we'd like to use Preferences.jl for this configuration, but my understanding is that JuliaLowering aims to be self-contained and should not add dependencies other than JuliaSyntax. --- JuliaLowering/src/JuliaLowering.jl | 2 ++ JuliaLowering/src/precompile.jl | 27 +++++++++++++++++++++++++++ 2 files changed, 29 insertions(+) create mode 100644 JuliaLowering/src/precompile.jl diff --git a/JuliaLowering/src/JuliaLowering.jl b/JuliaLowering/src/JuliaLowering.jl index 19fd0c868dc6b..a365a9e69eb43 100644 --- a/JuliaLowering/src/JuliaLowering.jl +++ b/JuliaLowering/src/JuliaLowering.jl @@ -37,4 +37,6 @@ function __init__() _register_kinds() end +_include("precompile.jl") + end diff --git a/JuliaLowering/src/precompile.jl b/JuliaLowering/src/precompile.jl new file mode 100644 index 0000000000000..0c07b5465eb3a --- /dev/null +++ b/JuliaLowering/src/precompile.jl @@ -0,0 +1,27 @@ +# exercise the whole lowering pipeline +if Base.get_bool_env("JULIA_LOWERING_PRECOMPILE", true) + thunks = String[ + """ + function foo(xxx, yyy) + @nospecialize xxx + return Pair{Any,Any}(typeof(xxx), typeof(yyy)) + end + """ + + """ + struct Foo + x::Int + Foo(x::Int) = new(x) + # Foo() = new() + end + """ + ] + for thunk in thunks + stream = JuliaSyntax.ParseStream(thunk) + JuliaSyntax.parse!(stream; rule=:all) + st0 = JuliaSyntax.build_tree(SyntaxTree, stream; filename=@__FILE__) + lwrst = lower(@__MODULE__, st0[1]) + lwr = to_lowered_expr(@__MODULE__, lwrst) + @assert Meta.isexpr(lwr, :thunk) && only(lwr.args) isa Core.CodeInfo + end +end From a498a8877f9f435240b6fe8376df19c37af1263f Mon Sep 17 00:00:00 2001 From: Fredrik Ekre Date: Sat, 26 Jul 2025 14:15:21 -0400 Subject: [PATCH 1023/1109] Update version on main to 2.0.0-DEV (JuliaLang/JuliaSyntax.jl#576) --- JuliaSyntax/Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaSyntax/Project.toml b/JuliaSyntax/Project.toml index 1abbf2f71e44b..6ab84fdfb4733 100644 --- a/JuliaSyntax/Project.toml +++ b/JuliaSyntax/Project.toml @@ -1,7 +1,7 @@ name = "JuliaSyntax" uuid = "70703baa-626e-46a2-a12c-08ffd08c73b4" authors = ["Claire Foster and contributors"] -version = "1.0.2" +version = "2.0.0-DEV" [compat] Serialization = "1.0" From 69f67f5e61eccd4d3d5a09d8b816c37f2218d36a Mon Sep 17 00:00:00 2001 From: Em Chu <61633163+mlechu@users.noreply.github.com> Date: Mon, 4 Aug 2025 16:50:02 -0700 Subject: [PATCH 1024/1109] Compatibility with julia nightly (JuliaLang/JuliaLowering.jl#10) * Update CodeInfo struct and handling Co-authored-by: Claire Foster * Don't produce raw symbol from globalref This used to implicitly refer to a module-level name, but lowering is now expected to wrap it in a `globalref`. Part of JuliaLang/julia#54772 * Updates to const and global lowering; add K"constdecl"; omit `wrap` JuliaLang/julia#54773, JuliaLang/julia#56713, JuliaLang/julia#57470. Some changes omitted from `expand-decls` and `expand-assignment`. Note that the two-argument IR "const" is K"constdecl", whereas the one-argument K"const" only appears in the AST. Also note that the `wrap` parameter is omitted throughout assignment desugaring. As far as I'm aware, all this plumbing was just to support `const a,b,c = 1,2,3` having `b` and `c` inherit the `const`. TODO: find a better way of doing the same thing (a ScopedValue might be a clean solution; we currently throw an error). The check for `let; const x = 1; end`, (which should throw) is in scope analysis (lisp has it in `compile`). Co-authored-by: Claire Foster * Add `isdefinedglobal` builtin JuliaLang/julia#54999, JuliaLang/julia#56985 * :global no longer valid_ir_argument; rm `is_defined_nothrow_global` JuliaLang/julia#56746. Also call :slot and :static_parameter valid (for now) * Fix `is_defined_and_owned_global` (Core.Binding changes) Adapt to bpart changes in JuliaLang/julia#54788 * Struct desugaring: "Undo decision to publish incomplete types..." JuliaLang/julia#56497; Add self-referencing struct shim I have doubts about how long this solution will stay in the base repository, and how complete it is (doesn't seem to work with M1.M2.S), but we are testing for it here. Also change the expected value of a test changed in the same PR. * Emit `latestworld` world age increments For method defs, `latestworld` is produced in desugaring rather than closure conversion for now (our closure conversion doesn't seem to cover the same cases as lisp lowering yet). Covers JuliaLang/julia#56523, JuliaLang/julia#56509, JuliaLang/julia#57299. Also includes changes from JuliaLang/julia#57102 (bpart: Start enforcing minimum world age for const bparts) and JuliaLang/julia#57150 (bpart: Start enforcing min_world for global variable definitions) since the lowering changes from those appear to be amendments to the changes above (missing world age increments). Co-authored-by: Claire Foster * bpart changes: `Core._typebody!` signature `Core._typebody!` now takes a new "prev" argument, which we don't use yet here. Changes from JuliaLang/julia#57253 * bpart changes: struct desugaring Changes from JuliaLang/julia#57253 (bpart: Fully switch to partitioned semantics). This fixes one failing test and realigns struct desugaring to match lisp for now. Also changed: the expected result of redefining a primitive type (now allowed). * Additional argument in `new_opaque_closure` Fix segfaulting test. Thanks for the TODO * Adapt to different `GeneratedFunctionStub` signature Signature changed in JuliaLang/julia#57230. Thanks @aviatesk for the help! * Fix `public` and `export` As of JuliaLang/julia#57765, `jl_module_public` is no longer exported. Change our runtime to handle it like `public` and `export` like we handle `import` or `using` for now * Fix modules.jl test I believe this was a world age issue * Regenerate IR tests Too many to count. * Update README to known-good julia, JuliaSyntax versions Latest julia works. Changes are needed to work with the latest JuliaSyntax, but that isn't in base julia yet, and more changes are likely to come. * Fix small bug from JuliaLang/JuliaLowering.jl#16 so tests pass The change lifted the scope of `note`, so it was being changed in the loop * Changes from code review: const/global lowering Ping me if you'd like this squashed into the original const/global commit! Co-authored-by: Claire Foster * Remove a special case No longer needed since we no longer put `global` or `local` forms back into the expand_forms machine. Some error messages change slightly as a result. * Changes from code review Co-authored-by: Claire Foster * Fix + test for assignment in value but not tail position * Disallow `static_parameter` as `valid_ir_argument` See added comment, and discussion at https://github.com/c42f/JuliaLowering.jl/pull/10#discussion_r2247136484 Co-authored-by: Claire Foster * Change printing of `K"latestworld"` Parens are nice, but it wasn't consistent. Also make it a leaf (remaining non-leaves are deleted in the next commit.) * Move most `latestworld`s to linearization From the docs: ``` The following statements raise the current world age: 1. An explicit invocation of Core.@latestworld 2. The start of every top-level statement 3. The start of every REPL prompt 4. Any type or struct definition 5. Any method definition 6. Any constant declaration 7. Any global variable declaration (but not a global variable assignment) 8. Any using, import, export or public statement 9. Certain other macros like eval (depends on the macro implementation) ``` This commit handles each case as follows: ``` 1. = 9 2. I'm not sure this actually happens (or needs to happen, unless we're being defensive? Doing it after each world-changing operation should suffice). But if we need it, this would just be emitting once at the beginning of every lowered output. 3. = 2 4. = 6 5. Emit seeing `method` in linearize 6. Emit seeing `constdecl` in linearize 7. Emit seeing `global` or `globaldecl` in linearize 8. We just defer to `eval`, but should probably go in desugaring later - using/import recently became builtin calls, and I haven't updated JL to use them yet. Base._import_using has an expr-based API that may change, and our importpath destructuring is worth keeping. - export and public (special forms) are handled in toplevel.c 9. Done for us ``` Other quirks: - `JuliaLowering.eval_closure_type` calls eval to assign a const, so we still need to deal with that in closure conversion. - The `include` hack isn't mentioned in the docs, but can stay in desugaring. I'm not certain why we don't do the same for non-macro `eval`. --------- Co-authored-by: Claire Foster --- JuliaLowering/README.md | 4 +- JuliaLowering/src/ast.jl | 2 +- JuliaLowering/src/closure_conversion.jl | 23 +- JuliaLowering/src/desugaring.jl | 269 +++-- JuliaLowering/src/eval.jl | 50 +- JuliaLowering/src/kinds.jl | 12 +- JuliaLowering/src/linear_ir.jl | 91 +- JuliaLowering/src/runtime.jl | 24 +- JuliaLowering/src/scope_analysis.jl | 19 +- JuliaLowering/src/syntax_graph.jl | 1 + JuliaLowering/src/utils.jl | 11 +- JuliaLowering/test/assignments.jl | 10 + JuliaLowering/test/assignments_ir.jl | 57 +- JuliaLowering/test/closures_ir.jl | 474 ++++---- JuliaLowering/test/decls.jl | 45 +- JuliaLowering/test/decls_ir.jl | 144 ++- JuliaLowering/test/functions_ir.jl | 1354 ++++++++++++----------- JuliaLowering/test/generators_ir.jl | 196 ++-- JuliaLowering/test/macros_ir.jl | 40 +- JuliaLowering/test/misc.jl | 8 + JuliaLowering/test/misc_ir.jl | 9 + JuliaLowering/test/modules.jl | 3 +- JuliaLowering/test/quoting.jl | 16 + JuliaLowering/test/scopes_ir.jl | 98 +- JuliaLowering/test/typedefs.jl | 7 +- JuliaLowering/test/typedefs_ir.jl | 1040 +++++++++-------- 26 files changed, 2267 insertions(+), 1740 deletions(-) diff --git a/JuliaLowering/README.md b/JuliaLowering/README.md index 789e1ce74666b..47ea97db06cb5 100644 --- a/JuliaLowering/README.md +++ b/JuliaLowering/README.md @@ -28,8 +28,8 @@ This work is intended to Note this is a work in progress; many types of syntax are not yet handled. -1. You need a 1.12-DEV build of Julia: At least 1.12.0-DEV.512. Commit `263928f9ad4` is currentl known to work. Note that JuliaLowering relies on Julia internals and may be broken on the latest Julia dev version from time to time. (In fact it is currently broken on the latest `1.12-DEV`.) -2. Check out the main branch of [JuliaSyntax](https://github.com/JuliaLang/JuliaSyntax.jl) +1. You need a 1.13.0-DEV build of Julia: At least 1.13.0-DEV.880. Commit `5ebc5b463ea` is currently known to work. Note that JuliaLowering relies on Julia internals and may be broken on the latest Julia dev version from time to time. +2. Use commit `46723f0` of [JuliaSyntax](https://github.com/JuliaLang/JuliaSyntax.jl) 3. Get the latest version of [JuliaSyntaxFormatter](https://github.com/c42f/JuliaSyntaxFormatter.jl) 4. Run the demo `include("test/demo.jl")` diff --git a/JuliaLowering/src/ast.jl b/JuliaLowering/src/ast.jl index 7188eba40b61c..1b718e14409ba 100644 --- a/JuliaLowering/src/ast.jl +++ b/JuliaLowering/src/ast.jl @@ -150,7 +150,7 @@ function makeleaf(ctx, srcref, k::Kind, value; kws...) makeleaf(graph, srcref, k; id=value, kws...) elseif k == K"symbolic_label" makeleaf(graph, srcref, k; name_val=value, kws...) - elseif k == K"TOMBSTONE" || k == K"SourceLocation" + elseif k in KSet"TOMBSTONE SourceLocation latestworld latestworld_if_toplevel" makeleaf(graph, srcref, k; kws...) else val = k == K"Integer" ? convert(Int, value) : diff --git a/JuliaLowering/src/closure_conversion.jl b/JuliaLowering/src/closure_conversion.jl index 211d6922e765c..b261cf36dd2c0 100644 --- a/JuliaLowering/src/closure_conversion.jl +++ b/JuliaLowering/src/closure_conversion.jl @@ -147,6 +147,7 @@ function convert_global_assignment(ctx, ex, var, rhs0) end push!(stmts, @ast ctx ex [K"=" var rhs]) @ast ctx ex [K"block" + [K"globaldecl" var] stmts... rhs1 ] @@ -337,6 +338,13 @@ function _convert_closures(ctx::ClosureConversionCtx, ex) elseif binfo.is_always_defined || is_self_captured(ctx, var) # Captured but unboxed vars are always defined @ast ctx ex true::K"Bool" + elseif binfo.kind == :global + # Normal isdefined won't work for globals (#56985) + @ast ctx ex [K"call" + "isdefinedglobal"::K"core" + ctx.mod::K"Value" + binfo.name::K"Symbol" + false::K"Bool"] else ex end @@ -344,12 +352,13 @@ function _convert_closures(ctx::ClosureConversionCtx, ex) @assert kind(ex[1]) == K"BindingId" binfo = lookup_binding(ctx, ex[1]) if binfo.kind == :global - @ast ctx ex [K"call" - "set_binding_type!"::K"core" - binfo.mod::K"Value" - binfo.name::K"Symbol" - _convert_closures(ctx, ex[2]) - ] + @ast ctx ex [K"block" + # flisp has this, but our K"assert" handling is in a previous pass + # [K"assert" "toplevel_only"::K"Symbol" [K"inert" ex]] + [K"globaldecl" + ex[1] + _convert_closures(ctx, ex[2])] + "nothing"::K"core"] else makeleaf(ctx, ex, K"TOMBSTONE") end @@ -382,6 +391,7 @@ function _convert_closures(ctx::ClosureConversionCtx, ex) type_for_closure(ctx, ex, name_str, field_syms, field_is_box) if !ctx.is_toplevel_seq_point push!(ctx.toplevel_stmts, closure_type_def) + push!(ctx.toplevel_stmts, @ast ctx ex (::K"latestworld_if_toplevel")) closure_type_def = nothing end closure_info = ClosureInfo(closure_type_, field_syms, field_inds) @@ -406,6 +416,7 @@ function _convert_closures(ctx::ClosureConversionCtx, ex) end @ast ctx ex [K"block" closure_type_def + (::K"latestworld_if_toplevel") closure_type := if isempty(type_params) closure_type_ else diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index b45603e545a95..9661702d4a876 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -814,8 +814,7 @@ function expand_generator(ctx, ex) outervars_by_key = Dict{NameKey,typeof(ex)}() for iterspecs in ex[2:end-1] for iterspec in children(iterspecs) - lhs = iterspec[1] - foreach_lhs_var(lhs) do var + foreach_lhs_name(iterspec[1]) do var @assert kind(var) == K"Identifier" # Todo: K"BindingId"? outervars_by_key[NameKey(var)] = var end @@ -1165,16 +1164,19 @@ end # Expand assignments # Expand UnionAll definitions, eg `X{T} = Y{T,T}` -function expand_unionall_def(ctx, srcref, lhs, rhs) +function expand_unionall_def(ctx, srcref, lhs, rhs, is_const=true) if numchildren(lhs) <= 1 throw(LoweringError(lhs, "empty type parameter list in type alias")) end name = lhs[1] - @ast ctx srcref [K"block" - [K"const_if_global" name] - unionall_type := expand_forms_2(ctx, [K"where" rhs lhs[2:end]...]) - expand_forms_2(ctx, [K"=" name unionall_type]) - ] + expand_forms_2( + ctx, + @ast ctx srcref [K"block" + rr := [K"where" rhs lhs[2:end]...] + [is_const ? K"constdecl" : K"assign_or_constdecl_if_global" name rr] + [K"removable" rr] + ] + ) end # Expand general assignment syntax, including @@ -1184,13 +1186,13 @@ end # * Assignments to array elements # * Destructuring # * Typed variable declarations -function expand_assignment(ctx, ex) +function expand_assignment(ctx, ex, is_const=false) @chk numchildren(ex) == 2 lhs = ex[1] rhs = ex[2] kl = kind(lhs) if kl == K"curly" - expand_unionall_def(ctx, ex, lhs, rhs) + expand_unionall_def(ctx, ex, lhs, rhs, is_const) elseif kind(rhs) == K"=" # Expand chains of assignments # a = b = c ==> b=c; a=c @@ -1207,7 +1209,9 @@ function expand_assignment(ctx, ex) tmp_rhs = ssavar(ctx, rhs, "rhs") rr = tmp_rhs end - for i in 1:length(stmts) + # In const a = b = c, only a is const + stmts[1] = @ast ctx ex [(is_const ? K"constdecl" : K"=") stmts[1] rr] + for i in 2:length(stmts) stmts[i] = @ast ctx ex [K"=" stmts[i] rr] end if !isnothing(tmp_rhs) @@ -1220,9 +1224,18 @@ function expand_assignment(ctx, ex) ] ) elseif is_identifier_like(lhs) - sink_assignment(ctx, ex, lhs, expand_forms_2(ctx, rhs)) + if is_const + @ast ctx ex [K"block" + rr := expand_forms_2(ctx, rhs) + [K"constdecl" lhs rr] + [K"removable" rr] + ] + else + sink_assignment(ctx, ex, lhs, expand_forms_2(ctx, rhs)) + end elseif kl == K"." # a.b = rhs ==> setproperty!(a, :b, rhs) + @chk !is_const (ex, "cannot declare `.` form const") @chk numchildren(lhs) == 2 a = lhs[1] b = lhs[2] @@ -1250,16 +1263,23 @@ function expand_assignment(ctx, ex) end elseif kl == K"ref" # a[i1, i2] = rhs + @chk !is_const (ex, "cannot declare ref form const") expand_forms_2(ctx, expand_setindex(ctx, ex)) elseif kl == K"::" && numchildren(lhs) == 2 x = lhs[1] T = lhs[2] - res = if is_identifier_like(x) + res = if is_const + expand_forms_2(ctx, @ast ctx ex [K"const" + [K"=" + lhs[1] + convert_for_type_decl(ctx, ex, rhs, T, true) + ]]) + elseif is_identifier_like(x) # Identifer in lhs[1] is a variable type declaration, eg # x::T = rhs @ast ctx ex [K"block" [K"decl" lhs[1] lhs[2]] - [K"=" lhs[1] rhs] + is_const ? [K"const" [K"=" lhs[1] rhs]] : [K"=" lhs[1] rhs] ] else # Otherwise just a type assertion, eg @@ -1271,6 +1291,7 @@ function expand_assignment(ctx, ex) # needs to be detected somewhere but won't be detected here. Maybe # it shows that remove_argument_side_effects() is not the ideal # solution here? + # TODO: handle underscore? @ast ctx ex [K"block" stmts... [K"::" l1 lhs[2]] @@ -1438,7 +1459,7 @@ function expand_let(ctx, ex) ] elseif kind(lhs) == K"tuple" lhs_locals = SyntaxList(ctx) - foreach_lhs_var(lhs) do var + foreach_lhs_name(lhs) do var push!(lhs_locals, @ast ctx var [K"local" var]) push!(lhs_locals, @ast ctx var [K"always_defined" var]) end @@ -1822,6 +1843,17 @@ function expand_call(ctx, ex) expand_forms_2(ctx, farg) expand_forms_2(ctx, _wrap_unsplatted_args(ctx, ex, args))... ] + elseif kind(farg) == K"Identifier" && farg.name_val == "include" + # world age special case + r = ssavar(ctx, ex) + @ast ctx ex [K"block" + [K"=" r [K"call" + expand_forms_2(ctx, farg) + expand_forms_2(ctx, args)... + ]] + (::K"latestworld_if_toplevel") + r + ] else @ast ctx ex [K"call" expand_forms_2(ctx, farg) @@ -1864,23 +1896,6 @@ end #------------------------------------------------------------------------------- # Expand for loops -# Extract the variable names assigned to from a "fancy assignment left hand -# side" such as nested tuple destructuring. -function foreach_lhs_var(f::Function, ex) - k = kind(ex) - if k == K"Identifier" || k == K"BindingId" - f(ex) - elseif k == K"::" && numchildren(ex) == 2 - foreach_lhs_var(f, ex[1]) - elseif k == K"tuple" || k == K"parameters" - for e in children(ex) - foreach_lhs_var(f, e) - end - end - # k == K"Placeholder" ignored, along with everything else - we assume - # validation is done elsewhere. -end - function expand_for(ctx, ex) iterspecs = ex[1] @@ -1896,7 +1911,7 @@ function expand_for(ctx, ex) @chk kind(iterspec) == K"in" lhs = iterspec[1] if kind(lhs) != K"outer" - foreach_lhs_var(lhs) do var + foreach_lhs_name(lhs) do var push!(copied_vars, @ast ctx var [K"=" var var]) end end @@ -1913,7 +1928,7 @@ function expand_for(ctx, ex) if outer lhs = lhs[1] end - foreach_lhs_var(lhs) do var + foreach_lhs_name(lhs) do var if outer push!(lhs_outer_defs, @ast ctx var var) else @@ -2068,7 +2083,7 @@ end # (x::T, (y::U, z)) # strip out stmts = (local x) (decl x T) (local x) (decl y U) (local z) # and return (x, (y, z)) -function strip_decls!(ctx, stmts, declkind, declkind2, declmeta, ex) +function strip_decls!(ctx, stmts, declkind, declmeta, ex) k = kind(ex) if k == K"Identifier" if !isnothing(declmeta) @@ -2076,9 +2091,6 @@ function strip_decls!(ctx, stmts, declkind, declkind2, declmeta, ex) else push!(stmts, makenode(ctx, ex, declkind, ex)) end - if !isnothing(declkind2) - push!(stmts, makenode(ctx, ex, declkind2, ex)) - end ex elseif k == K"Placeholder" ex @@ -2087,41 +2099,34 @@ function strip_decls!(ctx, stmts, declkind, declkind2, declmeta, ex) name = ex[1] @chk kind(name) == K"Identifier" push!(stmts, makenode(ctx, ex, K"decl", name, ex[2])) - strip_decls!(ctx, stmts, declkind, declkind2, declmeta, ex[1]) + strip_decls!(ctx, stmts, declkind, declmeta, ex[1]) elseif k == K"tuple" || k == K"parameters" cs = SyntaxList(ctx) for e in children(ex) - push!(cs, strip_decls!(ctx, stmts, declkind, declkind2, declmeta, e)) + push!(cs, strip_decls!(ctx, stmts, declkind, declmeta, e)) end makenode(ctx, ex, k, cs) + else + throw(LoweringError(ex, "invalid kind $k in $declkind declaration")) end end -# local x, (y=2), z ==> local x; local y; y = 2; local z -# const x = 1 ==> const x; x = 1 -# global x::T = 1 ==> (block (global x) (decl x T) (x = 1)) -function expand_decls(ctx, ex) +# Separate decls and assignments (which require re-expansion) +# local x, (y=2), z ==> local x; local z; y = 2 +function expand_decls(ctx, ex, is_const=false) declkind = kind(ex) + @assert declkind in KSet"local global" declmeta = get(ex, :meta, nothing) - if numchildren(ex) == 1 && kind(ex[1]) ∈ KSet"const global local" - declkind2 = kind(ex[1]) - bindings = children(ex[1]) - else - declkind2 = nothing - bindings = children(ex) - end + bindings = children(ex) stmts = SyntaxList(ctx) for binding in bindings kb = kind(binding) if is_prec_assignment(kb) @chk numchildren(binding) == 2 - lhs = strip_decls!(ctx, stmts, declkind, declkind2, declmeta, binding[1]) - push!(stmts, @ast ctx binding [kb lhs binding[2]]) - elseif is_sym_decl(binding) - if declkind == K"const" || declkind2 == K"const" - throw(LoweringError(ex, "expected assignment after `const`")) - end - strip_decls!(ctx, stmts, declkind, declkind2, declmeta, binding) + lhs = strip_decls!(ctx, stmts, declkind, declmeta, binding[1]) + push!(stmts, expand_assignment(ctx, @ast ctx binding [kb lhs binding[2]])) + elseif is_sym_decl(binding) && !is_const + strip_decls!(ctx, stmts, declkind, declmeta, binding) else throw(LoweringError(ex, "invalid syntax in variable declaration")) end @@ -2129,6 +2134,49 @@ function expand_decls(ctx, ex) makenode(ctx, ex, K"block", stmts) end +# Iterate over the variable names assigned to from a "fancy assignment left hand +# side" such as nested tuple destructuring, curlies, and calls. +function foreach_lhs_name(f::Function, ex) + k = kind(ex) + if k == K"Placeholder" + # Ignored + elseif is_identifier_like(ex) + f(ex) + elseif (k === K"::" && numchildren(ex) === 2) || k in KSet"call curly where" + foreach_lhs_name(f, ex[1]) + elseif k in KSet"tuple parameters" + for c in children(ex) + foreach_lhs_name(f, c) + end + end + return nothing +end + +function expand_const_decl(ctx, ex) + k = kind(ex[1]) + if k == K"global" + asgn = ex[1][1] + @chk (kind(asgn) == K"=") (ex, "expected assignment after `const`") + globals = SyntaxList(ctx) + foreach_lhs_name(asgn[1]) do x + push!(globals, @ast ctx ex [K"global" x]) + end + @ast ctx ex [K"block" + globals... + expand_assignment(ctx, asgn, true) + ] + elseif k == K"=" + if numchildren(ex[1]) >= 1 && kind(ex[1][1]) == K"tuple" + TODO(ex[1][1], "`const` tuple assignment desugaring") + end + expand_assignment(ctx, ex[1], true) + elseif k == K"local" + throw(LoweringError(ex, "unsupported `const local` declaration")) + else + throw(LoweringError(ex, "expected assignment after `const`")) + end +end + #------------------------------------------------------------------------------- # Expansion of function definitions @@ -3313,19 +3361,22 @@ function expand_abstract_or_primitive_type(ctx, ex) ] [K"=" name newtype_var] [K"call" "_setsuper!"::K"core" newtype_var supertype] - [K"call" "_typebody!"::K"core" newtype_var] + [K"call" "_typebody!"::K"core" false::K"Bool" name] ] ] [K"assert" "toplevel_only"::K"Symbol" [K"inert" ex] ] [K"global" name] - [K"const" name] [K"if" [K"&&" - [K"isdefined" name] + [K"call" + "isdefinedglobal"::K"core" + ctx.mod::K"Value" + name=>K"Symbol" + false::K"Bool"] [K"call" "_equiv_typedef"::K"core" name newtype_var] ] nothing_(ctx, ex) - [K"=" name newtype_var] + [K"constdecl" name newtype_var] ] nothing_(ctx, ex) ] @@ -3744,6 +3795,24 @@ function _constructor_min_initalized(ex::SyntaxTree) end end +# Let S be a struct we're defining in module M. Below is a hack to allow its +# field types to refer to S as M.S. See #56497. +function insert_struct_shim(ctx, fieldtypes, name) + function replace_type(ex) + if kind(ex) == K"." && + numchildren(ex) == 2 && + kind(ex[2]) == K"Symbol" && + ex[2].name_val == name.name_val + @ast ctx ex [K"call" "struct_name_shim"::K"core" ex[1] ex[2] ctx.mod::K"Value" name] + elseif numchildren(ex) > 0 + mapchildren(replace_type, ctx, ex) + else + ex + end + end + map(replace_type, fieldtypes) +end + function expand_struct_def(ctx, ex, docs) @chk numchildren(ex) == 2 type_sig = ex[1] @@ -3764,6 +3833,9 @@ function expand_struct_def(ctx, ex, docs) min_initialized = minimum((_constructor_min_initalized(e) for e in inner_defs), init=length(field_names)) newtype_var = ssavar(ctx, ex, "struct_type") + hasprev = ssavar(ctx, ex, "hasprev") + prev = ssavar(ctx, ex, "prev") + newdef = ssavar(ctx, ex, "newdef") layer = new_scope_layer(ctx, struct_name) global_struct_name = adopt_scope(struct_name, layer) if !isempty(typevar_names) @@ -3827,9 +3899,9 @@ function expand_struct_def(ctx, ex, docs) @ast ctx ex [K"block" [K"assert" "toplevel_only"::K"Symbol" [K"inert" ex] ] [K"scope_block"(scope_type=:hard) + # Needed for later constdecl to work, though plain global form may be removed soon. + [K"global" global_struct_name] [K"block" - [K"global" global_struct_name] - [K"const" global_struct_name] [K"local" struct_name] [K"always_defined" struct_name] typevar_stmts... @@ -3848,35 +3920,38 @@ function expand_struct_def(ctx, ex, docs) ] [K"=" struct_name newtype_var] [K"call"(supertype) "_setsuper!"::K"core" newtype_var supertype] - [K"if" - [K"isdefined" global_struct_name] - [K"if" - [K"call" "_equiv_typedef"::K"core" global_struct_name newtype_var] - [K"block" - # If this is compatible with an old definition, use - # the existing type object and throw away the new - # type - [K"=" struct_name global_struct_name] - if !isempty(typevar_names) - # And resassign the typevar_names - these may be - # referenced in the definition of the field - # types below - [K"=" - [K"tuple" typevar_names...] - prev_typevars - ] - end - ] - # Otherwise do an assignment to trigger an error - [K"=" global_struct_name struct_name] + [K"=" hasprev + [K"&&" [K"call" "isdefinedglobal"::K"core" + ctx.mod::K"Value" + struct_name=>K"Symbol" + false::K"Bool"] + [K"call" "_equiv_typedef"::K"core" global_struct_name newtype_var] + ]] + [K"=" prev [K"if" hasprev global_struct_name false::K"Bool"]] + [K"if" hasprev + [K"block" + # if this is compatible with an old definition, use the old parameters, but the + # new object. This will fail to capture recursive cases, but the call to typebody! + # below is permitted to choose either type definition to put into the binding table + if !isempty(typevar_names) + # And resassign the typevar_names - these may be + # referenced in the definition of the field + # types below + [K"=" [K"tuple" typevar_names...] prev_typevars] + end ] - [K"=" global_struct_name struct_name] - ] - [K"call"(type_body) - "_typebody!"::K"core" - struct_name - [K"call" "svec"::K"core" field_types...] ] + [K"=" newdef + [K"call"(type_body) + "_typebody!"::K"core" + prev + newtype_var + [K"call" "svec"::K"core" insert_struct_shim(ctx, field_types, struct_name)...] + ]] + [K"constdecl" + global_struct_name + newdef + ] # Default constructors if isempty(inner_defs) default_inner_constructors(ctx, ex, global_struct_name, @@ -4271,12 +4346,14 @@ function expand_forms_2(ctx::DesugaringContext, ex::SyntaxTree, docs=nothing) ] elseif k == K"let" expand_forms_2(ctx, expand_let(ctx, ex)) - elseif k == K"local" || k == K"global" || k == K"const" - if numchildren(ex) == 1 && kind(ex[1]) == K"Identifier" - # Don't recurse when already simplified - `local x`, etc - ex + elseif k == K"const" + expand_const_decl(ctx, ex) + elseif k == K"local" || k == K"global" + if k == K"global" && kind(ex[1]) == K"const" + # Normalize `global const` to `const global` + expand_const_decl(ctx, @ast ctx ex [K"const" [K"global" ex[1][1]]]) else - expand_forms_2(ctx, expand_decls(ctx, ex)) + expand_decls(ctx, ex) end elseif k == K"where" expand_forms_2(ctx, expand_wheres(ctx, ex)) diff --git a/JuliaLowering/src/eval.jl b/JuliaLowering/src/eval.jl index eb4f28063e658..98722d6265059 100644 --- a/JuliaLowering/src/eval.jl +++ b/JuliaLowering/src/eval.jl @@ -12,6 +12,16 @@ function macroexpand(mod::Module, ex) ex1 end +function codeinfo_has_image_globalref(@nospecialize(e)) + if e isa GlobalRef + return 0x00 !== @ccall jl_object_in_image(e.mod::Any)::UInt8 + elseif e isa Core.CodeInfo + return any(codeinfo_has_image_globalref, e.code) + else + return false + end +end + _CodeInfo_need_ver = v"1.12.0-DEV.512" if VERSION < _CodeInfo_need_ver function _CodeInfo(args...) @@ -20,19 +30,29 @@ if VERSION < _CodeInfo_need_ver else # debuginfo changed completely as of https://github.com/JuliaLang/julia/pull/52415 # nargs / isva was added as of https://github.com/JuliaLang/julia/pull/54341 + # field rettype added in https://github.com/JuliaLang/julia/pull/54655 + # field has_image_globalref added in https://github.com/JuliaLang/julia/pull/57433 # CodeInfo constructor. TODO: Should be in Core let fns = fieldnames(Core.CodeInfo) fts = fieldtypes(Core.CodeInfo) conversions = [:(convert($t, $n)) for (t,n) in zip(fts, fns)] - expected_fns = (:code, :debuginfo, :ssavaluetypes, :ssaflags, :slotnames, :slotflags, :slottypes, :parent, :method_for_inference_limit_heuristics, :edges, :min_world, :max_world, :nargs, :propagate_inbounds, :has_fcall, :nospecializeinfer, :isva, :inlining, :constprop, :purity, :inlining_cost) - expected_fts = (Vector{Any}, Core.DebugInfo, Any, Vector{UInt32}, Vector{Symbol}, Vector{UInt8}, Any, Any, Any, Any, UInt64, UInt64, UInt64, Bool, Bool, Bool, Bool, UInt8, UInt8, UInt16, UInt16) + expected_fns = (:code, :debuginfo, :ssavaluetypes, :ssaflags, :slotnames, :slotflags, :slottypes, :rettype, :parent, :edges, :min_world, :max_world, :method_for_inference_limit_heuristics, :nargs, :propagate_inbounds, :has_fcall, :has_image_globalref, :nospecializeinfer, :isva, :inlining, :constprop, :purity, :inlining_cost) + expected_fts = (Vector{Any}, Core.DebugInfo, Any, Vector{UInt32}, Vector{Symbol}, Vector{UInt8}, Any, Any, Any, Any, UInt64, UInt64, Any, UInt64, Bool, Bool, Bool, Bool, Bool, UInt8, UInt8, UInt16, UInt16) - code = if fns != expected_fns || fts != expected_fts + code = if fns != expected_fns + unexpected_fns = collect(setdiff(Set(fns), Set(expected_fns))) + missing_fns = collect(setdiff(Set(expected_fns), Set(fns))) :(function _CodeInfo(args...) - error("Unrecognized CodeInfo layout: Maybe version $VERSION is to new for this version of JuliaLowering?") - end) + error("Unrecognized CodeInfo fields: Maybe version $VERSION is too new for this version of JuliaLowering?" + * isempty(unexpected_fns) ? "" : "\nUnexpected fields found: $($unexpected_fns)" + * isempty(missing_fns) ? "" : "\nMissing fields: $($missing_fns)") + end) + elseif fts != expected_fts + :(function _CodeInfo(args...) + error("Unrecognized CodeInfo field types: Maybe version $VERSION is too new for this version of JuliaLowering?") + end) else :(function _CodeInfo($(fns...)) $(Expr(:new, :(Core.CodeInfo), conversions...)) @@ -142,6 +162,8 @@ function to_code_info(ex, mod, funcname, slots) debuginfo = finish_ir_debug_info!(current_codelocs_stack) + has_image_globalref = any(codeinfo_has_image_globalref, stmts) + # TODO: Set ssaflags based on call site annotations: # - @inbounds annotations # - call site @inline / @noinline @@ -172,6 +194,7 @@ function to_code_info(ex, mod, funcname, slots) max_world = typemax(Csize_t) isva = false inlining_cost = 0xffff + rettype = Any _CodeInfo( stmts, @@ -181,14 +204,16 @@ function to_code_info(ex, mod, funcname, slots) slotnames, slotflags, slottypes, + rettype, parent, - method_for_inference_limit_heuristics, edges, min_world, max_world, + method_for_inference_limit_heuristics, nargs, propagate_inbounds, has_fcall, + has_image_globalref, nospecializeinfer, isva, inlining, @@ -213,12 +238,7 @@ function to_lowered_expr(mod, ex, ssa_offset=0) elseif k == K"top" GlobalRef(Base, Symbol(ex.name_val)) elseif k == K"globalref" - if mod === ex.mod - # Implicitly refers to name in parent module. - Symbol(ex.name_val) - else - GlobalRef(ex.mod, Symbol(ex.name_val)) - end + GlobalRef(ex.mod, Symbol(ex.name_val)) elseif k == K"Identifier" # Implicitly refers to name in parent module # TODO: Should we even have plain identifiers at this point or should @@ -268,8 +288,6 @@ function to_lowered_expr(mod, ex, ssa_offset=0) Core.NewvarNode(to_lowered_expr(mod, ex[1], ssa_offset)) elseif k == K"new_opaque_closure" args = map(e->to_lowered_expr(mod, e, ssa_offset), children(ex)) - # TODO: put allow_partial back in once we update to the latest julia - splice!(args, 4) # allow_partial Expr(:new_opaque_closure, args...) elseif k == K"meta" args = Any[to_lowered_expr(mod, e, ssa_offset) for e in children(ex)] @@ -288,9 +306,11 @@ function to_lowered_expr(mod, ex, ssa_offset=0) k == K"splatnew" ? :splatnew : k == K"=" ? :(=) : k == K"global" ? :global : - k == K"const" ? :const : + k == K"constdecl" ? :const : k == K"leave" ? :leave : k == K"isdefined" ? :isdefined : + k == K"latestworld" ? :latestworld : + k == K"globaldecl" ? :globaldecl : k == K"pop_exception" ? :pop_exception : k == K"captured_local" ? :captured_local : k == K"gc_preserve_begin" ? :gc_preserve_begin : diff --git a/JuliaLowering/src/kinds.jl b/JuliaLowering/src/kinds.jl index 6a1a4eccd8445..741307ba94ba5 100644 --- a/JuliaLowering/src/kinds.jl +++ b/JuliaLowering/src/kinds.jl @@ -96,7 +96,7 @@ function _register_kinds() "_opaque_closure" # The enclosed statements must be executed at top level "toplevel_butfirst" - "const_if_global" + "assign_or_constdecl_if_global" "moved_local" "label" "trycatchelse" @@ -111,6 +111,8 @@ function _register_kinds() # A local variable captured into a global method. Contains the # `index` of the associated `Box` in the rewrite list. "captured_local" + # Causes the linearization pass to conditionally emit a world age increment + "latestworld_if_toplevel" "END_LOWERING_KINDS" # The following kinds are emitted by lowering and used in Julia's untyped IR @@ -121,8 +123,12 @@ function _register_kinds() "slot" # Static parameter to a `CodeInfo` code object ("type parameters" to methods) "static_parameter" - # Reference to a global variable within a module + # References/declares a global variable within a module "globalref" + "globaldecl" + # Two-argument constant declaration and assignment. + # Translated to :const in the IR for now (we use K"const" already in parsing). + "constdecl" # Unconditional goto "goto" # Conditional goto @@ -143,6 +149,8 @@ function _register_kinds() "new_opaque_closure" # Wrapper for the lambda of around opaque closure methods "opaque_closure_method" + # World age increment + "latestworld" "END_IR_KINDS" ]) end diff --git a/JuliaLowering/src/linear_ir.jl b/JuliaLowering/src/linear_ir.jl index 0bb91f976969d..b83bf099010ff 100644 --- a/JuliaLowering/src/linear_ir.jl +++ b/JuliaLowering/src/linear_ir.jl @@ -3,24 +3,17 @@ function is_valid_ir_argument(ctx, ex) k = kind(ex) - if is_simple_atom(ctx, ex) || k == K"inert" || k == K"top" || k == K"core" + if is_simple_atom(ctx, ex) || k in KSet"inert top core quote" true elseif k == K"BindingId" binfo = lookup_binding(ctx, ex) bk = binfo.kind - # TODO: Can we allow bk == :local || bk == :argument || bk == :static_parameter ??? - # Why does flisp seem to allow (slot) and (static_parameter), but these - # aren't yet converted to by existing lowering?? - if bk == :global - # Globals are nothrow when they are defined - we assume a previously - # defined global can never be set to undefined. (TODO: This could be - # broken when precompiling a module `B` in the presence of a badly - # behaved module `A`, which inconsistently defines globals during - # `A.__init__()`??) - is_defined_nothrow_global(binfo.mod, Symbol(binfo.name)) - else - false - end + bk === :slot + # TODO: We should theoretically be able to allow `bk === + # :static_parameter` for slightly more compact IR, but it's uncertain + # what the compiler is built to tolerate. Notably, flisp allows + # static_parameter, but doesn't produce this form until a later pass, so + # it doesn't end up in the IR. else false end @@ -69,7 +62,7 @@ end Context for creating linear IR. One of these is created per lambda expression to flatten the body down to -a sequence of statements (linear IR). +a sequence of statements (linear IR), which eventually becomes one CodeInfo. """ struct LinearIRContext{GraphType} <: AbstractLoweringContext graph::GraphType @@ -334,18 +327,35 @@ function emit_break(ctx, ex) emit_jump(ctx, ex, target) end -function emit_assignment(ctx, srcref, lhs, rhs) +# `op` may be either K"=" (where global assignments are converted to setglobal!) +# or K"constdecl". flisp: emit-assignment-or-setglobal +function emit_simple_assignment(ctx, srcref, lhs, rhs, op=K"=") + binfo = lookup_binding(ctx, lhs.var_id) + if binfo.kind == :global && op == K"=" + emit(ctx, @ast ctx srcref [ + K"call" + "setglobal!"::K"core" + binfo.mod::K"Value" + binfo.name::K"Symbol" + rhs + ]) + else + emit(ctx, srcref, op, lhs, rhs) + end +end + +function emit_assignment(ctx, srcref, lhs, rhs, op=K"=") if !isnothing(rhs) if is_valid_ir_rvalue(ctx, lhs, rhs) - emit(ctx, srcref, K"=", lhs, rhs) + emit_simple_assignment(ctx, srcref, lhs, rhs, op) else r = emit_assign_tmp(ctx, rhs) - emit(ctx, srcref, K"=", lhs, r) + emit_simple_assignment(ctx, srcref, lhs, r, op) end else # in unreachable code (such as after return); still emit the assignment # so that the structure of those uses is preserved - emit(ctx, @ast ctx srcref [K"=" lhs "nothing"::K"core"]) + emit_simple_assignment(ctx, srcref, lhs, @ast ctx srcref "nothing"::K"core", op) nothing end end @@ -370,6 +380,11 @@ function emit_label(ctx, srcref) l end +function emit_latestworld(ctx, srcref) + (isempty(ctx.code) || kind(last(ctx.code)) != K"latestworld") && + emit(ctx, makeleaf(ctx, srcref, K"latestworld")) +end + function compile_condition_term(ctx, ex) cond = compile(ctx, ex, true, false) if !is_valid_body_ir_argument(ctx, cond) @@ -640,25 +655,27 @@ function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) emit(ctx, callex) nothing end - elseif k == K"=" + elseif k == K"=" || k == K"constdecl" lhs = ex[1] - if kind(lhs) == K"Placeholder" + res = if kind(lhs) == K"Placeholder" compile(ctx, ex[2], needs_value, in_tail_pos) else rhs = compile(ctx, ex[2], true, false) # TODO look up arg-map for renaming if lhs was reassigned if needs_value && !isnothing(rhs) r = emit_assign_tmp(ctx, rhs) - emit(ctx, ex, K"=", lhs, r) + emit_simple_assignment(ctx, ex, lhs, r, k) if in_tail_pos emit_return(ctx, ex, r) else r end else - emit_assignment(ctx, ex, lhs, rhs) + emit_assignment(ctx, ex, lhs, rhs, k) end end + k == K"constdecl" && emit_latestworld(ctx, ex) + res elseif k == K"block" || k == K"scope_block" nc = numchildren(ex) if nc == 0 @@ -767,7 +784,7 @@ function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) # TODO # throw(LoweringError(ex, # "Global method definition needs to be placed at the top level, or use `eval`")) - if numchildren(ex) == 1 + res = if numchildren(ex) == 1 if in_tail_pos emit_return(ctx, ex) elseif needs_value @@ -792,6 +809,8 @@ function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) @assert !needs_value && !in_tail_pos nothing end + emit_latestworld(ctx, ex) + res elseif k == K"opaque_closure_method" @ast ctx ex [K"opaque_closure_method" ex[1] @@ -811,12 +830,19 @@ function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) end elseif k == K"gc_preserve_begin" makenode(ctx, ex, k, compile_args(ctx, children(ex))) - elseif k == K"gc_preserve_end" || k == K"global" || k == K"const" + elseif k == K"gc_preserve_end" if needs_value throw(LoweringError(ex, "misplaced kind $k in value position")) end emit(ctx, ex) nothing + elseif k == K"global" + if needs_value + throw(LoweringError(ex, "misplaced global declaration in value position")) + end + emit(ctx, ex) + ctx.is_toplevel_thunk && emit_latestworld(ctx, ex) + nothing elseif k == K"meta" emit(ctx, ex) if needs_value @@ -862,6 +888,21 @@ function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) # TODO: also exclude deleted vars emit(ctx, ex) end + elseif k == K"globaldecl" + if needs_value + throw(LoweringError(ex, "misplaced global declaration")) + end + if numchildren(ex) == 1 || is_identifier_like(ex[2]) + emit(ctx, ex) + else + rr = emit_assign_tmp(ctx, ex[2]) + emit(ctx, @ast ctx ex [K"globaldecl" ex[1] rr]) + end + ctx.is_toplevel_thunk && emit_latestworld(ctx, ex) + elseif k == K"latestworld" + emit_latestworld(ctx, ex) + elseif k == K"latestworld_if_toplevel" + ctx.is_toplevel_thunk && emit_latestworld(ctx, ex) else throw(LoweringError(ex, "Invalid syntax; $(repr(k))")) end diff --git a/JuliaLowering/src/runtime.jl b/JuliaLowering/src/runtime.jl index a3d9c2ce79643..23252400fd770 100644 --- a/JuliaLowering/src/runtime.jl +++ b/JuliaLowering/src/runtime.jl @@ -130,7 +130,7 @@ function eval_closure_type(mod, closure_type_name, field_names, field_is_box) length(field_names)) Core._setsuper!(type, Core.Function) Base.eval(mod, :(const $closure_type_name = $type)) - Core._typebody!(type, Core.svec(field_types...)) + Core._typebody!(false, type, Core.svec(field_types...)) type end @@ -209,9 +209,8 @@ function module_import(into_mod::Module, is_using::Bool, end function module_public(mod::Module, is_exported::Bool, identifiers...) - for ident in identifiers - @ccall jl_module_public(mod::Module, Symbol(ident)::Symbol, is_exported::Cint)::Cvoid - end + # symbol jl_module_public is no longer exported as of #57765 + eval(mod, Expr((is_exported ? :export : :public), map(Symbol, identifiers)...)) end #-------------------------------------------------- @@ -282,7 +281,7 @@ end # expression into a CodeInfo. # # `args` passed into stub by the Julia runtime are (parent_func, static_params..., arg_types...) -function (g::GeneratedFunctionStub)(world::UInt, source::LineNumberNode, @nospecialize args...) +function (g::GeneratedFunctionStub)(world::UInt, source::Method, @nospecialize args...) # Some of the lowering pipeline from lower() and the pass-specific setup is # re-implemented here because generated functions are very much (but not # entirely) like macro expansion. @@ -334,7 +333,7 @@ function (g::GeneratedFunctionStub)(world::UInt, source::LineNumberNode, @nospec ctx2, ex2 = expand_forms_2( ctx1, ex1) # Wrap expansion in a non-toplevel lambda and run scope resolution - ex2 = @ast ctx2 source [K"lambda"(is_toplevel_thunk=false) + ex2 = @ast ctx2 ex0 [K"lambda"(is_toplevel_thunk=false) [K"block" (string(n)::K"Identifier" for n in g.argnames)... ] @@ -371,18 +370,7 @@ end # # (This should do what fl_defined_julia_global does for flisp lowering) function is_defined_and_owned_global(mod, name) - b = _get_module_binding(mod, name) - !isnothing(b) && isdefined(b, :owner) && b.owner === b -end - -# Return true if `name` is defined in `mod`, the sense that accessing it is nothrow. -# Has no side effects, unlike isdefined() -# -# (This should do what fl_nothrow_julia_global does for flisp lowering) -function is_defined_nothrow_global(mod, name) - b = _get_module_binding(mod, name) - !isnothing(b) && isdefined(b, :owner) || return false - isdefined(b.owner, :value) + Base.binding_kind(mod, name) === Base.PARTITION_KIND_GLOBAL end # "Reserve" a binding: create the binding if it doesn't exist but do not assign diff --git a/JuliaLowering/src/scope_analysis.jl b/JuliaLowering/src/scope_analysis.jl index 343b5c6b246fc..c0f659532b5be 100644 --- a/JuliaLowering/src/scope_analysis.jl +++ b/JuliaLowering/src/scope_analysis.jl @@ -44,7 +44,11 @@ function _find_scope_vars!(ctx, assignments, locals, destructured_args, globals, end elseif k == K"global" _insert_if_not_present!(globals, NameKey(ex[1]), ex) - elseif k == K"=" + elseif k == K"assign_or_constdecl_if_global" + # like v = val, except that if `v` turns out global(either implicitly or + # by explicit `global`), it gains an implicit `const` + _insert_if_not_present!(assignments, NameKey(ex[1]), ex) + elseif k == K"=" || k == K"constdecl" v = decl_var(ex[1]) if !(kind(v) in KSet"BindingId globalref Placeholder") _insert_if_not_present!(assignments, NameKey(v), v) @@ -561,13 +565,12 @@ function _resolve_scopes(ctx, ex::SyntaxTree) end end resolved - elseif k == K"const_if_global" + elseif k == K"assign_or_constdecl_if_global" id = _resolve_scopes(ctx, ex[1]) - if lookup_binding(ctx, id).kind == :global - @ast ctx ex [K"const" id] - else - makeleaf(ctx, ex, K"TOMBSTONE") - end + bk = lookup_binding(ctx, id).kind + @assert numchildren(ex) === 2 + assignment_kind = bk == :global ? K"constdecl" : K"=" + @ast ctx ex _resolve_scopes(ctx, [assignment_kind ex[1] ex[2]]) else mapchildren(e->_resolve_scopes(ctx, e), ctx, ex) end @@ -677,7 +680,7 @@ function analyze_variables!(ctx, ex) if kind(ex[1]) != K"BindingId" || lookup_binding(ctx, ex[1]).kind !== :local analyze_variables!(ctx, ex[1]) end - elseif k == K"const" + elseif k == K"constdecl" id = ex[1] if lookup_binding(ctx, id).kind == :local throw(LoweringError(ex, "unsupported `const` declaration on local variable")) diff --git a/JuliaLowering/src/syntax_graph.jl b/JuliaLowering/src/syntax_graph.jl index bdbf98e092927..35c9b188635d6 100644 --- a/JuliaLowering/src/syntax_graph.jl +++ b/JuliaLowering/src/syntax_graph.jl @@ -431,6 +431,7 @@ function _value_string(ex) k == K"Symbol" ? ":$(ex.name_val)" : k == K"globalref" ? "$(ex.mod).$(ex.name_val)" : k == K"slot" ? "slot" : + k == K"latestworld" ? "latestworld" : k == K"static_parameter" ? "static_parameter" : k == K"symbolic_label" ? "label:$(ex.name_val)" : k == K"symbolic_goto" ? "goto:$(ex.name_val)" : diff --git a/JuliaLowering/src/utils.jl b/JuliaLowering/src/utils.jl index 5d749f05c502a..15c1b27b14db9 100644 --- a/JuliaLowering/src/utils.jl +++ b/JuliaLowering/src/utils.jl @@ -47,12 +47,11 @@ function showprov(io::IO, exs::AbstractVector; print(io, "\n\n") end k = kind(ex) - if isnothing(note) # use provided `note` otherwise - note = i > 1 && k == K"macrocall" ? "in macro expansion" : - i > 1 && k == K"$" ? "interpolated here" : - "in source" - end - highlight(io, sr; note=note, highlight_kwargs...) + ex_note = !isnothing(note) ? note : + i > 1 && k == K"macrocall" ? "in macro expansion" : + i > 1 && k == K"$" ? "interpolated here" : + "in source" + highlight(io, sr; note=ex_note, highlight_kwargs...) if include_location line, _ = source_location(sr) diff --git a/JuliaLowering/test/assignments.jl b/JuliaLowering/test/assignments.jl index 0f7fa488c18d9..d15706c2f8d70 100644 --- a/JuliaLowering/test/assignments.jl +++ b/JuliaLowering/test/assignments.jl @@ -23,6 +23,16 @@ let end """) == 42 +# Assignment in value but not tail position +@test JuliaLowering.include_string(test_mod, """ +let + x = begin + y = 42 + end + x +end +""") == 42 + @test JuliaLowering.include_string(test_mod, """ let x = [] diff --git a/JuliaLowering/test/assignments_ir.jl b/JuliaLowering/test/assignments_ir.jl index 9a1393c22a3a8..9789fc6e6315e 100644 --- a/JuliaLowering/test/assignments_ir.jl +++ b/JuliaLowering/test/assignments_ir.jl @@ -23,6 +23,21 @@ end 5 (= slot₃/c %₂) 6 (return %₂) +######################################## +# Assignment in value but not tail position +let + x = begin + y = 42 + end + x +end +#--------------------- +1 42 +2 (= slot₂/y %₁) +3 (= slot₁/x %₁) +4 slot₁/x +5 (return %₄) + ######################################## # short form function def, not chain of assignments begin @@ -31,20 +46,22 @@ begin end #--------------------- 1 (method TestMod.b) -2 TestMod.b -3 (call core.Typeof %₂) -4 (call core.svec %₃) -5 (call core.svec) -6 SourceLocation::3:9 -7 (call core.svec %₄ %₅ %₆) -8 --- method core.nothing %₇ +2 latestworld +3 TestMod.b +4 (call core.Typeof %₃) +5 (call core.svec %₄) +6 (call core.svec) +7 SourceLocation::3:9 +8 (call core.svec %₅ %₆ %₇) +9 --- method core.nothing %₈ slots: [slot₁/#self#(!read) slot₂/c(!read)] 1 TestMod.d 2 (= slot₂/c %₁) 3 (return %₁) -9 TestMod.b -10 (= slot₁/a %₉) -11 (return %₉) +10 latestworld +11 TestMod.b +12 (= slot₁/a %₁₁) +13 (return %₁₁) ######################################## # a.b = ... => setproperty! assignment @@ -117,17 +134,17 @@ end # UnionAll expansion at global scope results in const decl X{T} = Y{T,T} #--------------------- -1 (const TestMod.X) -2 (call core.TypeVar :T) -3 (= slot₁/T %₂) -4 slot₁/T -5 TestMod.Y +1 (call core.TypeVar :T) +2 (= slot₁/T %₁) +3 slot₁/T +4 TestMod.Y +5 slot₁/T 6 slot₁/T -7 slot₁/T -8 (call core.apply_type %₅ %₆ %₇) -9 (call core.UnionAll %₄ %₈) -10 (= TestMod.X %₉) -11 (return %₉) +7 (call core.apply_type %₄ %₅ %₆) +8 (call core.UnionAll %₃ %₇) +9 (constdecl TestMod.X %₈) +10 latestworld +11 (return %₈) ######################################## # UnionAll expansion in local scope diff --git a/JuliaLowering/test/closures_ir.jl b/JuliaLowering/test/closures_ir.jl index 6d6d13396cdab..6495b7450fc90 100644 --- a/JuliaLowering/test/closures_ir.jl +++ b/JuliaLowering/test/closures_ir.jl @@ -15,16 +15,17 @@ end 5 (call core.svec :x) 6 (call core.svec true) 7 (call JuliaLowering.eval_closure_type TestMod :#f##0 %₅ %₆) -8 TestMod.#f##0 -9 slot₂/x -10 (new %₈ %₉) -11 (= slot₁/f %₁₀) -12 TestMod.#f##0 -13 (call core.svec %₁₂ core.Any) -14 (call core.svec) -15 SourceLocation::3:14 -16 (call core.svec %₁₃ %₁₄ %₁₅) -17 --- method core.nothing %₁₆ +8 latestworld +9 TestMod.#f##0 +10 slot₂/x +11 (new %₉ %₁₀) +12 (= slot₁/f %₁₁) +13 TestMod.#f##0 +14 (call core.svec %₁₃ core.Any) +15 (call core.svec) +16 SourceLocation::3:14 +17 (call core.svec %₁₄ %₁₅ %₁₆) +18 --- method core.nothing %₁₇ slots: [slot₁/#self#(!read) slot₂/y slot₃/x(!read)] 1 TestMod.+ 2 (call core.getfield slot₁/#self# :x) @@ -36,8 +37,9 @@ end 8 (call core.getfield %₂ :contents) 9 (call %₁ %₈ slot₂/y) 10 (return %₉) -18 slot₁/f -19 (return %₁₈) +19 latestworld +20 slot₁/f +21 (return %₂₀) ######################################## # Closure declaration with no methods @@ -50,11 +52,12 @@ end 1 (call core.svec) 2 (call core.svec) 3 (call JuliaLowering.eval_closure_type TestMod :#no_method_f##0 %₁ %₂) -4 TestMod.#no_method_f##0 -5 (new %₄) -6 (= slot₁/no_method_f %₅) -7 slot₁/no_method_f -8 (return %₇) +4 latestworld +5 TestMod.#no_method_f##0 +6 (new %₅) +7 (= slot₁/no_method_f %₆) +8 slot₁/no_method_f +9 (return %₈) ######################################## # Closure which sets the value of a captured variable @@ -72,23 +75,25 @@ end 5 (call core.svec :x) 6 (call core.svec true) 7 (call JuliaLowering.eval_closure_type TestMod :#f##1 %₅ %₆) -8 TestMod.#f##1 -9 slot₂/x -10 (new %₈ %₉) -11 (= slot₁/f %₁₀) -12 TestMod.#f##1 -13 (call core.svec %₁₂ core.Any) -14 (call core.svec) -15 SourceLocation::3:14 -16 (call core.svec %₁₃ %₁₄ %₁₅) -17 --- method core.nothing %₁₆ +8 latestworld +9 TestMod.#f##1 +10 slot₂/x +11 (new %₉ %₁₀) +12 (= slot₁/f %₁₁) +13 TestMod.#f##1 +14 (call core.svec %₁₃ core.Any) +15 (call core.svec) +16 SourceLocation::3:14 +17 (call core.svec %₁₄ %₁₅ %₁₆) +18 --- method core.nothing %₁₇ slots: [slot₁/#self#(!read) slot₂/y(!read)] 1 2 2 (call core.getfield slot₁/#self# :x) 3 (call core.setfield! %₂ :contents %₁) 4 (return %₁) -18 slot₁/f -19 (return %₁₈) +19 latestworld +20 slot₁/f +21 (return %₂₀) ######################################## # Function where arguments are captured into a closure and assigned @@ -101,27 +106,30 @@ function f(x) end #--------------------- 1 (method TestMod.f) -2 (call core.svec :x) -3 (call core.svec true) -4 (call JuliaLowering.eval_closure_type TestMod :#f#g##0 %₂ %₃) -5 TestMod.#f#g##0 -6 (call core.svec %₅) -7 (call core.svec) -8 SourceLocation::2:14 -9 (call core.svec %₆ %₇ %₈) -10 --- method core.nothing %₉ +2 latestworld +3 (call core.svec :x) +4 (call core.svec true) +5 (call JuliaLowering.eval_closure_type TestMod :#f#g##0 %₃ %₄) +6 latestworld +7 TestMod.#f#g##0 +8 (call core.svec %₇) +9 (call core.svec) +10 SourceLocation::2:14 +11 (call core.svec %₈ %₉ %₁₀) +12 --- method core.nothing %₁₁ slots: [slot₁/#self#(!read)] 1 10 2 (call core.getfield slot₁/#self# :x) 3 (call core.setfield! %₂ :contents %₁) 4 (return %₁) -11 TestMod.f -12 (call core.Typeof %₁₁) -13 (call core.svec %₁₂ core.Any) -14 (call core.svec) -15 SourceLocation::1:10 -16 (call core.svec %₁₃ %₁₄ %₁₅) -17 --- method core.nothing %₁₆ +13 latestworld +14 TestMod.f +15 (call core.Typeof %₁₄) +16 (call core.svec %₁₅ core.Any) +17 (call core.svec) +18 SourceLocation::1:10 +19 (call core.svec %₁₆ %₁₇ %₁₈) +20 --- method core.nothing %₁₉ slots: [slot₁/#self#(!read) slot₂/x slot₃/g(called) slot₄/x(!read)] 1 (= slot₂/x (call core.Box slot₂/x)) 2 TestMod.#f#g##0 @@ -137,8 +145,9 @@ end 12 slot₄/x 13 (call core.getfield %₇ :contents) 14 (return %₁₃) -18 TestMod.f -19 (return %₁₈) +21 latestworld +22 TestMod.f +23 (return %₂₂) ######################################## # Closure where a local `x` is captured but not boxed @@ -150,26 +159,29 @@ function f(x) end #--------------------- 1 (method TestMod.f) -2 (call core.svec :x) -3 (call core.svec false) -4 (call JuliaLowering.eval_closure_type TestMod :#f#g##1 %₂ %₃) -5 TestMod.#f#g##1 -6 (call core.svec %₅) -7 (call core.svec) -8 SourceLocation::2:14 -9 (call core.svec %₆ %₇ %₈) -10 --- method core.nothing %₉ +2 latestworld +3 (call core.svec :x) +4 (call core.svec false) +5 (call JuliaLowering.eval_closure_type TestMod :#f#g##1 %₃ %₄) +6 latestworld +7 TestMod.#f#g##1 +8 (call core.svec %₇) +9 (call core.svec) +10 SourceLocation::2:14 +11 (call core.svec %₈ %₉ %₁₀) +12 --- method core.nothing %₁₁ slots: [slot₁/#self#(!read) slot₂/y(!read)] 1 (call core.getfield slot₁/#self# :x) 2 (= slot₂/y %₁) 3 (return %₁) -11 TestMod.f -12 (call core.Typeof %₁₁) -13 (call core.svec %₁₂ core.Any) -14 (call core.svec) -15 SourceLocation::1:10 -16 (call core.svec %₁₃ %₁₄ %₁₅) -17 --- method core.nothing %₁₆ +13 latestworld +14 TestMod.f +15 (call core.Typeof %₁₄) +16 (call core.svec %₁₅ core.Any) +17 (call core.svec) +18 SourceLocation::1:10 +19 (call core.svec %₁₆ %₁₇ %₁₈) +20 --- method core.nothing %₁₉ slots: [slot₁/#self#(!read) slot₂/x slot₃/g slot₄/z(!read)] 1 TestMod.#f#g##1 2 (call core.typeof slot₂/x) @@ -179,8 +191,9 @@ end 6 slot₂/x 7 (= slot₄/z %₆) 8 (return %₆) -18 TestMod.f -19 (return %₁₈) +21 latestworld +22 TestMod.f +23 (return %₂₂) ######################################## # Closure where a static parameter of an outer function is captured @@ -191,30 +204,33 @@ function f(::T) where T end #--------------------- 1 (method TestMod.f) -2 (call core.svec :T) -3 (call core.svec false) -4 (call JuliaLowering.eval_closure_type TestMod :#f#g##2 %₂ %₃) -5 TestMod.#f#g##2 -6 (call core.svec %₅) -7 (call core.svec) -8 SourceLocation::2:14 -9 (call core.svec %₆ %₇ %₈) -10 --- method core.nothing %₉ +2 latestworld +3 (call core.svec :T) +4 (call core.svec false) +5 (call JuliaLowering.eval_closure_type TestMod :#f#g##2 %₃ %₄) +6 latestworld +7 TestMod.#f#g##2 +8 (call core.svec %₇) +9 (call core.svec) +10 SourceLocation::2:14 +11 (call core.svec %₈ %₉ %₁₀) +12 --- method core.nothing %₁₁ slots: [slot₁/#self#(!read)] 1 TestMod.use 2 (call core.getfield slot₁/#self# :T) 3 (call %₁ %₂) 4 (return %₃) -11 (= slot₁/T (call core.TypeVar :T)) -12 TestMod.f -13 (call core.Typeof %₁₂) -14 slot₁/T -15 (call core.svec %₁₃ %₁₄) -16 slot₁/T -17 (call core.svec %₁₆) -18 SourceLocation::1:10 -19 (call core.svec %₁₅ %₁₇ %₁₈) -20 --- method core.nothing %₁₉ +13 latestworld +14 (= slot₁/T (call core.TypeVar :T)) +15 TestMod.f +16 (call core.Typeof %₁₅) +17 slot₁/T +18 (call core.svec %₁₆ %₁₇) +19 slot₁/T +20 (call core.svec %₁₉) +21 SourceLocation::1:10 +22 (call core.svec %₁₈ %₂₀ %₂₁) +23 --- method core.nothing %₂₂ slots: [slot₁/#self#(!read) slot₂/_(!read) slot₃/g] 1 TestMod.#f#g##2 2 static_parameter₁ @@ -225,8 +241,9 @@ end 7 (= slot₃/g %₆) 8 slot₃/g 9 (return %₈) -21 TestMod.f -22 (return %₂₁) +24 latestworld +25 TestMod.f +26 (return %₂₅) ######################################## # Closure captures with `isdefined` @@ -243,15 +260,17 @@ function f(x) end #--------------------- 1 (method TestMod.f) -2 (call core.svec :x :y) -3 (call core.svec false true) -4 (call JuliaLowering.eval_closure_type TestMod :#f#g##3 %₂ %₃) -5 TestMod.#f#g##3 -6 (call core.svec %₅) -7 (call core.svec) -8 SourceLocation::2:14 -9 (call core.svec %₆ %₇ %₈) -10 --- method core.nothing %₉ +2 latestworld +3 (call core.svec :x :y) +4 (call core.svec false true) +5 (call JuliaLowering.eval_closure_type TestMod :#f#g##3 %₃ %₄) +6 latestworld +7 TestMod.#f#g##3 +8 (call core.svec %₇) +9 (call core.svec) +10 SourceLocation::2:14 +11 (call core.svec %₈ %₉ %₁₀) +12 --- method core.nothing %₁₁ slots: [slot₁/#self#(!read) slot₂/z] 1 (= slot₂/z 3) 2 (call core.getfield slot₁/#self# :y) @@ -259,13 +278,14 @@ end 4 (isdefined slot₂/z) 5 (call core.tuple true %₃ %₄) 6 (return %₅) -11 TestMod.f -12 (call core.Typeof %₁₁) -13 (call core.svec %₁₂ core.Any) -14 (call core.svec) -15 SourceLocation::1:10 -16 (call core.svec %₁₃ %₁₄ %₁₅) -17 --- method core.nothing %₁₆ +13 latestworld +14 TestMod.f +15 (call core.Typeof %₁₄) +16 (call core.svec %₁₅ core.Any) +17 (call core.svec) +18 SourceLocation::1:10 +19 (call core.svec %₁₆ %₁₇ %₁₈) +20 --- method core.nothing %₁₉ slots: [slot₁/#self#(!read) slot₂/x slot₃/g slot₄/y] 1 (= slot₄/y (call core.Box)) 2 TestMod.#f#g##3 @@ -281,8 +301,9 @@ end 12 (call core.isdefined %₁₁ :contents) 13 (call core.tuple %₁₂ true) 14 (return %₁₃) -18 TestMod.f -19 (return %₁₈) +21 latestworld +22 TestMod.f +23 (return %₂₂) ######################################## # Nested captures - here `g` captures `x` because it is needed to initialize @@ -322,13 +343,14 @@ end 3 slot₁/x 4 (call core.setfield! %₃ :contents %₂) 5 (method TestMod.f) -6 TestMod.f -7 (call core.Typeof %₆) -8 (call core.svec %₇) -9 (call core.svec) -10 SourceLocation::3:14 -11 (call core.svec %₈ %₉ %₁₀) -12 --- code_info +6 latestworld +7 TestMod.f +8 (call core.Typeof %₇) +9 (call core.svec %₈) +10 (call core.svec) +11 SourceLocation::3:14 +12 (call core.svec %₉ %₁₀ %₁₁) +13 --- code_info slots: [slot₁/#self#(!read) slot₂/x(!read)] 1 TestMod.+ 2 (captured_local 1) @@ -342,12 +364,13 @@ end 10 (captured_local 1) 11 (call core.setfield! %₁₀ :contents %₉) 12 (return %₉) -13 slot₁/x -14 (call core.svec %₁₃) -15 (call JuliaLowering.replace_captured_locals! %₁₂ %₁₄) -16 --- method core.nothing %₁₁ %₁₅ -17 TestMod.f -18 (return %₁₇) +14 slot₁/x +15 (call core.svec %₁₄) +16 (call JuliaLowering.replace_captured_locals! %₁₃ %₁₅) +17 --- method core.nothing %₁₂ %₁₆ +18 latestworld +19 TestMod.f +20 (return %₁₉) ######################################## # Anonymous function syntax with -> @@ -356,19 +379,21 @@ x -> x*x 1 (call core.svec) 2 (call core.svec) 3 (call JuliaLowering.eval_closure_type TestMod :#->##0 %₁ %₂) -4 TestMod.#->##0 -5 (new %₄) -6 TestMod.#->##0 -7 (call core.svec %₆ core.Any) -8 (call core.svec) -9 SourceLocation::1:1 -10 (call core.svec %₇ %₈ %₉) -11 --- method core.nothing %₁₀ +4 latestworld +5 TestMod.#->##0 +6 (new %₅) +7 TestMod.#->##0 +8 (call core.svec %₇ core.Any) +9 (call core.svec) +10 SourceLocation::1:1 +11 (call core.svec %₈ %₉ %₁₀) +12 --- method core.nothing %₁₁ slots: [slot₁/#self#(!read) slot₂/x] 1 TestMod.* 2 (call %₁ slot₂/x slot₂/x) 3 (return %₂) -12 (return %₅) +13 latestworld +14 (return %₆) ######################################## # Anonymous function syntax with `function` @@ -379,19 +404,21 @@ end 1 (call core.svec) 2 (call core.svec) 3 (call JuliaLowering.eval_closure_type TestMod :##anon###0 %₁ %₂) -4 TestMod.##anon###0 -5 (new %₄) -6 TestMod.##anon###0 -7 (call core.svec %₆ core.Any) -8 (call core.svec) -9 SourceLocation::1:10 -10 (call core.svec %₇ %₈ %₉) -11 --- method core.nothing %₁₀ +4 latestworld +5 TestMod.##anon###0 +6 (new %₅) +7 TestMod.##anon###0 +8 (call core.svec %₇ core.Any) +9 (call core.svec) +10 SourceLocation::1:10 +11 (call core.svec %₈ %₉ %₁₀) +12 --- method core.nothing %₁₁ slots: [slot₁/#self#(!read) slot₂/x] 1 TestMod.* 2 (call %₁ slot₂/x slot₂/x) 3 (return %₂) -12 (return %₅) +13 latestworld +14 (return %₆) ######################################## # `do` blocks @@ -407,21 +434,23 @@ end 6 (call core.svec) 7 (call core.svec) 8 (call JuliaLowering.eval_closure_type TestMod :#do##0 %₆ %₇) -9 TestMod.#do##0 -10 (call core.svec %₉ core.Any) -11 (call core.svec) -12 SourceLocation::1:13 -13 (call core.svec %₁₀ %₁₁ %₁₂) -14 --- method core.nothing %₁₃ +9 latestworld +10 TestMod.#do##0 +11 (call core.svec %₁₀ core.Any) +12 (call core.svec) +13 SourceLocation::1:13 +14 (call core.svec %₁₁ %₁₂ %₁₃) +15 --- method core.nothing %₁₄ slots: [slot₁/#self#(!read) slot₂/y] 1 TestMod.+ 2 (call %₁ slot₂/y 2) 3 (return %₂) -15 TestMod.#do##0 -16 (new %₁₅) -17 TestMod.x -18 (call core.kwcall %₅ %₁ %₁₆ %₁₇) -19 (return %₁₈) +16 latestworld +17 TestMod.#do##0 +18 (new %₁₇) +19 TestMod.x +20 (call core.kwcall %₅ %₁ %₁₈ %₁₉) +21 (return %₂₀) ######################################## # Error: Static parameter clashing with closure name @@ -510,17 +539,18 @@ end 3 (call core.svec :recursive_b) 4 (call core.svec true) 5 (call JuliaLowering.eval_closure_type TestMod :#recursive_a##0 %₃ %₄) -6 TestMod.#recursive_a##0 -7 slot₂/recursive_b -8 (new %₆ %₇) -9 slot₁/recursive_a -10 (call core.setfield! %₉ :contents %₈) -11 TestMod.#recursive_a##0 -12 (call core.svec %₁₁) -13 (call core.svec) -14 SourceLocation::2:14 -15 (call core.svec %₁₂ %₁₃ %₁₄) -16 --- method core.nothing %₁₅ +6 latestworld +7 TestMod.#recursive_a##0 +8 slot₂/recursive_b +9 (new %₇ %₈) +10 slot₁/recursive_a +11 (call core.setfield! %₁₀ :contents %₉) +12 TestMod.#recursive_a##0 +13 (call core.svec %₁₂) +14 (call core.svec) +15 SourceLocation::2:14 +16 (call core.svec %₁₃ %₁₄ %₁₅) +17 --- method core.nothing %₁₆ slots: [slot₁/#self#(!read) slot₂/recursive_b(!read)] 1 (call core.getfield slot₁/#self# :recursive_b) 2 (call core.isdefined %₁ :contents) @@ -531,20 +561,22 @@ end 7 (call core.getfield %₁ :contents) 8 (call %₇) 9 (return %₈) -17 (call core.svec :recursive_a) -18 (call core.svec true) -19 (call JuliaLowering.eval_closure_type TestMod :#recursive_b##0 %₁₇ %₁₈) -20 TestMod.#recursive_b##0 -21 slot₁/recursive_a -22 (new %₂₀ %₂₁) -23 slot₂/recursive_b -24 (call core.setfield! %₂₃ :contents %₂₂) -25 TestMod.#recursive_b##0 -26 (call core.svec %₂₅) -27 (call core.svec) -28 SourceLocation::5:14 -29 (call core.svec %₂₆ %₂₇ %₂₈) -30 --- method core.nothing %₂₉ +18 latestworld +19 (call core.svec :recursive_a) +20 (call core.svec true) +21 (call JuliaLowering.eval_closure_type TestMod :#recursive_b##0 %₁₉ %₂₀) +22 latestworld +23 TestMod.#recursive_b##0 +24 slot₁/recursive_a +25 (new %₂₃ %₂₄) +26 slot₂/recursive_b +27 (call core.setfield! %₂₆ :contents %₂₅) +28 TestMod.#recursive_b##0 +29 (call core.svec %₂₈) +30 (call core.svec) +31 SourceLocation::5:14 +32 (call core.svec %₂₉ %₃₀ %₃₁) +33 --- method core.nothing %₃₂ slots: [slot₁/#self#(!read) slot₂/recursive_a(!read)] 1 (call core.getfield slot₁/#self# :recursive_a) 2 (call core.isdefined %₁ :contents) @@ -555,14 +587,15 @@ end 7 (call core.getfield %₁ :contents) 8 (call %₇) 9 (return %₈) -31 slot₂/recursive_b -32 (call core.isdefined %₃₁ :contents) -33 (gotoifnot %₃₂ label₃₅) -34 (goto label₃₇) -35 (newvar slot₄/recursive_b) -36 slot₄/recursive_b -37 (call core.getfield %₃₁ :contents) -38 (return %₃₇) +34 latestworld +35 slot₂/recursive_b +36 (call core.isdefined %₃₅ :contents) +37 (gotoifnot %₃₆ label₃₉) +38 (goto label₄₁) +39 (newvar slot₄/recursive_b) +40 slot₄/recursive_b +41 (call core.getfield %₃₅ :contents) +42 (return %₄₁) ######################################## # Closure with keywords @@ -580,26 +613,28 @@ end 6 (call core.svec :#f_kw_closure#0) 7 (call core.svec true) 8 (call JuliaLowering.eval_closure_type TestMod :#f_kw_closure##0 %₆ %₇) -9 TestMod.#f_kw_closure##0 -10 slot₂/#f_kw_closure#0 -11 (new %₉ %₁₀) -12 (= slot₃/f_kw_closure %₁₁) -13 (call core.svec :y) -14 (call core.svec true) -15 (call JuliaLowering.eval_closure_type TestMod :##f_kw_closure#0##0 %₁₃ %₁₄) -16 TestMod.##f_kw_closure#0##0 -17 slot₁/y -18 (new %₁₆ %₁₇) -19 slot₂/#f_kw_closure#0 -20 (call core.setfield! %₁₉ :contents %₁₈) -21 TestMod.##f_kw_closure#0##0 -22 TestMod.X -23 TestMod.#f_kw_closure##0 -24 (call core.svec %₂₁ %₂₂ %₂₃) -25 (call core.svec) -26 SourceLocation::2:14 -27 (call core.svec %₂₄ %₂₅ %₂₆) -28 --- method core.nothing %₂₇ +9 latestworld +10 TestMod.#f_kw_closure##0 +11 slot₂/#f_kw_closure#0 +12 (new %₁₀ %₁₁) +13 (= slot₃/f_kw_closure %₁₂) +14 (call core.svec :y) +15 (call core.svec true) +16 (call JuliaLowering.eval_closure_type TestMod :##f_kw_closure#0##0 %₁₄ %₁₅) +17 latestworld +18 TestMod.##f_kw_closure#0##0 +19 slot₁/y +20 (new %₁₈ %₁₉) +21 slot₂/#f_kw_closure#0 +22 (call core.setfield! %₂₁ :contents %₂₀) +23 TestMod.##f_kw_closure#0##0 +24 TestMod.X +25 TestMod.#f_kw_closure##0 +26 (call core.svec %₂₃ %₂₄ %₂₅) +27 (call core.svec) +28 SourceLocation::2:14 +29 (call core.svec %₂₆ %₂₇ %₂₈) +30 --- method core.nothing %₂₉ slots: [slot₁/#self#(!read) slot₂/x slot₃/#self#(!read) slot₄/y(!read)] 1 (meta :nkw 1) 2 TestMod.+ @@ -612,13 +647,14 @@ end 9 (call core.getfield %₃ :contents) 10 (call %₂ slot₂/x %₉) 11 (return %₁₀) -29 (call core.typeof core.kwcall) -30 TestMod.#f_kw_closure##0 -31 (call core.svec %₂₉ core.NamedTuple %₃₀) -32 (call core.svec) -33 SourceLocation::2:14 -34 (call core.svec %₃₁ %₃₂ %₃₃) -35 --- code_info +31 latestworld +32 (call core.typeof core.kwcall) +33 TestMod.#f_kw_closure##0 +34 (call core.svec %₃₂ core.NamedTuple %₃₃) +35 (call core.svec) +36 SourceLocation::2:14 +37 (call core.svec %₃₄ %₃₅ %₃₆) +38 --- code_info slots: [slot₁/#self#(!read) slot₂/kws slot₃/#self# slot₄/kwtmp slot₅/x(!read) slot₆/#f_kw_closure#0(!read)] 1 (newvar slot₅/x) 2 (call core.isdefined slot₂/kws :x) @@ -652,16 +688,17 @@ end 30 (call core.getfield %₂₄ :contents) 31 (call %₃₀ %₁₆ slot₃/#self#) 32 (return %₃₁) -36 slot₂/#f_kw_closure#0 -37 (call core.svec %₃₆) -38 (call JuliaLowering.replace_captured_locals! %₃₅ %₃₇) -39 --- method core.nothing %₃₄ %₃₈ -40 TestMod.#f_kw_closure##0 -41 (call core.svec %₄₀) -42 (call core.svec) -43 SourceLocation::2:14 -44 (call core.svec %₄₁ %₄₂ %₄₃) -45 --- method core.nothing %₄₄ +39 slot₂/#f_kw_closure#0 +40 (call core.svec %₃₉) +41 (call JuliaLowering.replace_captured_locals! %₃₈ %₄₀) +42 --- method core.nothing %₃₇ %₄₁ +43 latestworld +44 TestMod.#f_kw_closure##0 +45 (call core.svec %₄₄) +46 (call core.svec) +47 SourceLocation::2:14 +48 (call core.svec %₄₅ %₄₆ %₄₇) +49 --- method core.nothing %₄₈ slots: [slot₁/#self# slot₂/#f_kw_closure#0(!read)] 1 (call core.getfield slot₁/#self# :#f_kw_closure#0) 2 (call core.isdefined %₁ :contents) @@ -673,8 +710,9 @@ end 8 TestMod.x_default 9 (call %₇ %₈ slot₁/#self#) 10 (return %₉) -46 slot₃/f_kw_closure -47 (return %₄₆) +50 latestworld +51 slot₃/f_kw_closure +52 (return %₅₁) ######################################## # Closure capturing a typed local must also capture the type expression diff --git a/JuliaLowering/test/decls.jl b/JuliaLowering/test/decls.jl index 08484dfaec3d5..7684fa33ad9d3 100644 --- a/JuliaLowering/test/decls.jl +++ b/JuliaLowering/test/decls.jl @@ -9,7 +9,7 @@ begin end """) === 1 -# In value position, yeild the right hand side, not `x` +# In value position, yield the right hand side, not `x` @test JuliaLowering.include_string(test_mod, """ local x::Int = 1.0 """) === 1.0 @@ -33,6 +33,30 @@ let end """) === (1, 20) +# Global const mixes +@test JuliaLowering.include_string(test_mod, "global x_g = 1") === 1 +@test Base.isdefinedglobal(test_mod, :x_g) +@test !Base.isconst(test_mod, :x_g) +@test test_mod.x_g === 1 + +@test JuliaLowering.include_string(test_mod, "const x_c = 1") === 1 +@test Base.isdefinedglobal(test_mod, :x_c) +@test Base.isconst(test_mod, :x_c) +@test test_mod.x_c === 1 + +@test JuliaLowering.include_string(test_mod, "global const x_gc = 1") === 1 +@test Base.isdefinedglobal(test_mod, :x_gc) +@test Base.isconst(test_mod, :x_gc) +@test test_mod.x_gc === 1 + +@test JuliaLowering.include_string(test_mod, "const global x_cg = 1") === 1 +@test Base.isdefinedglobal(test_mod, :x_cg) +@test Base.isconst(test_mod, :x_cg) +@test test_mod.x_cg === 1 +# Possibly worth testing excessive global/const keywords or invalid combinations +# (local + global/const) once we decide whether that's a parse error or a +# lowering error + # Global decls with types @test JuliaLowering.include_string(test_mod, """ global a_typed_global::Int = 10.0 @@ -49,4 +73,23 @@ end @test Core.get_binding_type(test_mod, :a_typed_global_2) === Int @test test_mod.a_typed_global_2 === 10 +@test JuliaLowering.include_string(test_mod, "const x_c_T::Int = 9") === 9 +@test Base.isdefinedglobal(test_mod, :x_c_T) +@test Base.isconst(test_mod, :x_c_T) + +@testset "typed const redeclaration" begin + # redeclaration of the same value used to be allowed + @test_throws ErrorException JuliaLowering.include_string(test_mod, "x_c_T = 9") + @test_throws ErrorException JuliaLowering.include_string(test_mod, "x_c_T = 10") + # redeclaration with const should be OK + @test JuliaLowering.include_string(test_mod, "const x_c_T::Int = 0") === 0 +end + +# Tuple/destructuring assignments +@test JuliaLowering.include_string(test_mod, "(a0, a1, a2) = [1,2,3]") == [1,2,3] + + +# Unsupported for now +@test_throws LoweringError JuliaLowering.include_string(test_mod, "const a,b,c = 1,2,3") + end diff --git a/JuliaLowering/test/decls_ir.jl b/JuliaLowering/test/decls_ir.jl index a8b9fd98f0c91..7b8c2d373d338 100644 --- a/JuliaLowering/test/decls_ir.jl +++ b/JuliaLowering/test/decls_ir.jl @@ -21,63 +21,109 @@ local x::T = 1 # const const xx = 10 #--------------------- -1 (const TestMod.xx) -2 (= TestMod.xx 10) -3 (return 10) +1 10 +2 (constdecl TestMod.xx %₁) +3 latestworld +4 (return %₁) ######################################## # Typed const const xx::T = 10 #--------------------- 1 TestMod.T -2 (call core.set_binding_type! TestMod :xx %₁) -3 (const TestMod.xx) -4 (call core.get_binding_type TestMod :xx) -5 (= slot₁/tmp 10) -6 slot₁/tmp -7 (call core.isa %₆ %₄) -8 (gotoifnot %₇ label₁₀) -9 (goto label₁₂) +2 (= slot₁/tmp 10) +3 slot₁/tmp +4 (call core.isa %₃ %₁) +5 (gotoifnot %₄ label₇) +6 (goto label₁₀) +7 slot₁/tmp +8 (call top.convert %₁ %₇) +9 (= slot₁/tmp (call core.typeassert %₈ %₁)) 10 slot₁/tmp -11 (= slot₁/tmp (call top.convert %₄ %₁₀)) +11 (constdecl TestMod.xx %₁₀) +12 latestworld +13 (return %₁₀) + +######################################## +# Error: Const tuple +const xxx,xxxx,xxxxx = 10,20,30 +#--------------------- +LoweringError: +const xxx,xxxx,xxxxx = 10,20,30 +# └─────────────┘ ── Lowering TODO: `const` tuple assignment desugaring + +######################################## +# Const in chain: only first is const +const c0 = v0 = v1 = 123 +#--------------------- +1 123 +2 (constdecl TestMod.c0 %₁) +3 latestworld +4 (globaldecl TestMod.v0) +5 latestworld +6 (call core.get_binding_type TestMod :v0) +7 (= slot₁/tmp %₁) +8 slot₁/tmp +9 (call core.isa %₈ %₆) +10 (gotoifnot %₉ label₁₂) +11 (goto label₁₄) 12 slot₁/tmp -13 (= TestMod.xx %₁₂) -14 (return 10) +13 (= slot₁/tmp (call top.convert %₆ %₁₂)) +14 slot₁/tmp +15 (call core.setglobal! TestMod :v0 %₁₄) +16 (globaldecl TestMod.v1) +17 latestworld +18 (call core.get_binding_type TestMod :v1) +19 (= slot₂/tmp %₁) +20 slot₂/tmp +21 (call core.isa %₂₀ %₁₈) +22 (gotoifnot %₂₁ label₂₄) +23 (goto label₂₆) +24 slot₂/tmp +25 (= slot₂/tmp (call top.convert %₁₈ %₂₄)) +26 slot₂/tmp +27 (call core.setglobal! TestMod :v1 %₂₆) +28 (return %₁) ######################################## # Global assignment xx = 10 #--------------------- -1 (call core.get_binding_type TestMod :xx) -2 (= slot₁/tmp 10) -3 slot₁/tmp -4 (call core.isa %₃ %₁) -5 (gotoifnot %₄ label₇) -6 (goto label₉) -7 slot₁/tmp -8 (= slot₁/tmp (call top.convert %₁ %₇)) +1 (globaldecl TestMod.xx) +2 latestworld +3 (call core.get_binding_type TestMod :xx) +4 (= slot₁/tmp 10) +5 slot₁/tmp +6 (call core.isa %₅ %₃) +7 (gotoifnot %₆ label₉) +8 (goto label₁₁) 9 slot₁/tmp -10 (= TestMod.xx %₉) -11 (return 10) +10 (= slot₁/tmp (call top.convert %₃ %₉)) +11 slot₁/tmp +12 (call core.setglobal! TestMod :xx %₁₁) +13 (return 10) ######################################## # Typed global assignment global xx::T = 10 #--------------------- -1 TestMod.T -2 (call core.set_binding_type! TestMod :xx %₁) +1 (globaldecl TestMod.xx TestMod.T) +2 latestworld 3 (global TestMod.xx) -4 (call core.get_binding_type TestMod :xx) -5 (= slot₁/tmp 10) -6 slot₁/tmp -7 (call core.isa %₆ %₄) -8 (gotoifnot %₇ label₁₀) -9 (goto label₁₂) -10 slot₁/tmp -11 (= slot₁/tmp (call top.convert %₄ %₁₀)) -12 slot₁/tmp -13 (= TestMod.xx %₁₂) -14 (return 10) +4 latestworld +5 (globaldecl TestMod.xx) +6 latestworld +7 (call core.get_binding_type TestMod :xx) +8 (= slot₁/tmp 10) +9 slot₁/tmp +10 (call core.isa %₉ %₇) +11 (gotoifnot %₁₀ label₁₃) +12 (goto label₁₅) +13 slot₁/tmp +14 (= slot₁/tmp (call top.convert %₇ %₁₃)) +15 slot₁/tmp +16 (call core.setglobal! TestMod :xx %₁₅) +17 (return 10) ######################################## # Error: x declared twice @@ -99,7 +145,7 @@ const local x = 1 #--------------------- LoweringError: const local x = 1 -# ╙ ── unsupported `const` declaration on local variable +└───────────────┘ ── unsupported `const local` declaration ######################################## # Error: Const not supported on locals @@ -110,7 +156,7 @@ end LoweringError: let const x = 1 -# ╙ ── unsupported `const` declaration on local variable +# └────┘ ── unsupported `const` declaration on local variable end ######################################## @@ -122,13 +168,14 @@ function f(x) end #--------------------- 1 (method TestMod.f) -2 TestMod.f -3 (call core.Typeof %₂) -4 (call core.svec %₃ core.Any) -5 (call core.svec) -6 SourceLocation::1:10 -7 (call core.svec %₄ %₅ %₆) -8 --- method core.nothing %₇ +2 latestworld +3 TestMod.f +4 (call core.Typeof %₃) +5 (call core.svec %₄ core.Any) +6 (call core.svec) +7 SourceLocation::1:10 +8 (call core.svec %₅ %₆ %₇) +9 --- method core.nothing %₈ slots: [slot₁/#self#(!read) slot₂/x slot₃/tmp(!read) slot₄/tmp(!read)] 1 1 2 TestMod.Int @@ -156,8 +203,9 @@ end 24 (= slot₂/x %₂₃) 25 slot₂/x 26 (return %₂₅) -9 TestMod.f -10 (return %₉) +10 latestworld +11 TestMod.f +12 (return %₁₁) ######################################## # Error: global type decls only allowed at top level diff --git a/JuliaLowering/test/functions_ir.jl b/JuliaLowering/test/functions_ir.jl index 9abb27e9ebce3..5ea648c2a2160 100644 --- a/JuliaLowering/test/functions_ir.jl +++ b/JuliaLowering/test/functions_ir.jl @@ -4,8 +4,9 @@ function f end #--------------------- 1 (method TestMod.f) -2 TestMod.f -3 (return %₂) +2 latestworld +3 TestMod.f +4 (return %₃) ######################################## # Functions with placeholder arg @@ -14,19 +15,21 @@ function f(x, _, y) end #--------------------- 1 (method TestMod.f) -2 TestMod.f -3 (call core.Typeof %₂) -4 (call core.svec %₃ core.Any core.Any core.Any) -5 (call core.svec) -6 SourceLocation::1:10 -7 (call core.svec %₄ %₅ %₆) -8 --- method core.nothing %₇ +2 latestworld +3 TestMod.f +4 (call core.Typeof %₃) +5 (call core.svec %₄ core.Any core.Any core.Any) +6 (call core.svec) +7 SourceLocation::1:10 +8 (call core.svec %₅ %₆ %₇) +9 --- method core.nothing %₈ slots: [slot₁/#self#(!read) slot₂/x slot₃/_(!read) slot₄/y] 1 TestMod.+ 2 (call %₁ slot₂/x slot₄/y) 3 (return %₂) -9 TestMod.f -10 (return %₉) +10 latestworld +11 TestMod.f +12 (return %₁₁) ######################################## # Functions with argument types only, no name @@ -35,19 +38,21 @@ function f(::T, x) end #--------------------- 1 (method TestMod.f) -2 TestMod.f -3 (call core.Typeof %₂) -4 TestMod.T -5 (call core.svec %₃ %₄ core.Any) -6 (call core.svec) -7 SourceLocation::1:10 -8 (call core.svec %₅ %₆ %₇) -9 --- method core.nothing %₈ +2 latestworld +3 TestMod.f +4 (call core.Typeof %₃) +5 TestMod.T +6 (call core.svec %₄ %₅ core.Any) +7 (call core.svec) +8 SourceLocation::1:10 +9 (call core.svec %₆ %₇ %₈) +10 --- method core.nothing %₉ slots: [slot₁/#self#(!read) slot₂/_(!read) slot₃/x] 1 slot₃/x 2 (return %₁) -10 TestMod.f -11 (return %₁₀) +11 latestworld +12 TestMod.f +13 (return %₁₂) ######################################## # Functions argument types @@ -56,19 +61,21 @@ function f(x, y::T) end #--------------------- 1 (method TestMod.f) -2 TestMod.f -3 (call core.Typeof %₂) -4 TestMod.T -5 (call core.svec %₃ core.Any %₄) -6 (call core.svec) -7 SourceLocation::1:10 -8 (call core.svec %₅ %₆ %₇) -9 --- method core.nothing %₈ +2 latestworld +3 TestMod.f +4 (call core.Typeof %₃) +5 TestMod.T +6 (call core.svec %₄ core.Any %₅) +7 (call core.svec) +8 SourceLocation::1:10 +9 (call core.svec %₆ %₇ %₈) +10 --- method core.nothing %₉ slots: [slot₁/#self#(!read) slot₂/x(!read) slot₃/y(!read)] 1 TestMod.body 2 (return %₁) -10 TestMod.f -11 (return %₁₀) +11 latestworld +12 TestMod.f +13 (return %₁₂) ######################################## # Functions with slurp of Any @@ -77,19 +84,21 @@ function f(x, ys...) end #--------------------- 1 (method TestMod.f) -2 TestMod.f -3 (call core.Typeof %₂) -4 (call core.apply_type core.Vararg core.Any) -5 (call core.svec %₃ core.Any %₄) -6 (call core.svec) -7 SourceLocation::1:10 -8 (call core.svec %₅ %₆ %₇) -9 --- method core.nothing %₈ +2 latestworld +3 TestMod.f +4 (call core.Typeof %₃) +5 (call core.apply_type core.Vararg core.Any) +6 (call core.svec %₄ core.Any %₅) +7 (call core.svec) +8 SourceLocation::1:10 +9 (call core.svec %₆ %₇ %₈) +10 --- method core.nothing %₉ slots: [slot₁/#self#(!read) slot₂/x(!read) slot₃/ys(!read)] 1 TestMod.body 2 (return %₁) -10 TestMod.f -11 (return %₁₀) +11 latestworld +12 TestMod.f +13 (return %₁₂) ######################################## # Functions with slurp of T @@ -98,20 +107,22 @@ function f(x, ys::T...) end #--------------------- 1 (method TestMod.f) -2 TestMod.f -3 (call core.Typeof %₂) -4 TestMod.T -5 (call core.apply_type core.Vararg %₄) -6 (call core.svec %₃ core.Any %₅) -7 (call core.svec) -8 SourceLocation::1:10 -9 (call core.svec %₆ %₇ %₈) -10 --- method core.nothing %₉ +2 latestworld +3 TestMod.f +4 (call core.Typeof %₃) +5 TestMod.T +6 (call core.apply_type core.Vararg %₅) +7 (call core.svec %₄ core.Any %₆) +8 (call core.svec) +9 SourceLocation::1:10 +10 (call core.svec %₇ %₈ %₉) +11 --- method core.nothing %₁₀ slots: [slot₁/#self#(!read) slot₂/x(!read) slot₃/ys(!read)] 1 TestMod.body 2 (return %₁) -11 TestMod.f -12 (return %₁₁) +12 latestworld +13 TestMod.f +14 (return %₁₃) ######################################## # Error: Function with slurp not in last position arg @@ -132,30 +143,32 @@ function f(::T, ::U, ::V) where T where {U,V} end #--------------------- 1 (method TestMod.f) -2 (= slot₂/U (call core.TypeVar :U)) -3 (= slot₃/V (call core.TypeVar :V)) -4 (= slot₁/T (call core.TypeVar :T)) -5 TestMod.f -6 (call core.Typeof %₅) -7 slot₁/T -8 slot₂/U -9 slot₃/V -10 (call core.svec %₆ %₇ %₈ %₉) -11 slot₂/U -12 slot₃/V -13 slot₁/T -14 (call core.svec %₁₁ %₁₂ %₁₃) -15 SourceLocation::1:10 -16 (call core.svec %₁₀ %₁₄ %₁₅) -17 --- method core.nothing %₁₆ +2 latestworld +3 (= slot₂/U (call core.TypeVar :U)) +4 (= slot₃/V (call core.TypeVar :V)) +5 (= slot₁/T (call core.TypeVar :T)) +6 TestMod.f +7 (call core.Typeof %₆) +8 slot₁/T +9 slot₂/U +10 slot₃/V +11 (call core.svec %₇ %₈ %₉ %₁₀) +12 slot₂/U +13 slot₃/V +14 slot₁/T +15 (call core.svec %₁₂ %₁₃ %₁₄) +16 SourceLocation::1:10 +17 (call core.svec %₁₁ %₁₅ %₁₆) +18 --- method core.nothing %₁₇ slots: [slot₁/#self#(!read) slot₂/_(!read) slot₃/_(!read) slot₄/_(!read)] 1 static_parameter₃ 2 static_parameter₁ 3 static_parameter₂ 4 (call core.tuple %₁ %₂ %₃) 5 (return %₄) -18 TestMod.f -19 (return %₁₈) +19 latestworld +20 TestMod.f +21 (return %₂₀) ######################################## # Static parameter with bounds and used with apply_type in argument @@ -164,25 +177,27 @@ function f(::S{T}) where X <: T <: Y end #--------------------- 1 (method TestMod.f) -2 TestMod.X -3 TestMod.Y -4 (= slot₁/T (call core.TypeVar :T %₂ %₃)) -5 TestMod.f -6 (call core.Typeof %₅) -7 TestMod.S -8 slot₁/T -9 (call core.apply_type %₇ %₈) -10 (call core.svec %₆ %₉) -11 slot₁/T -12 (call core.svec %₁₁) -13 SourceLocation::1:10 -14 (call core.svec %₁₀ %₁₂ %₁₃) -15 --- method core.nothing %₁₄ +2 latestworld +3 TestMod.X +4 TestMod.Y +5 (= slot₁/T (call core.TypeVar :T %₃ %₄)) +6 TestMod.f +7 (call core.Typeof %₆) +8 TestMod.S +9 slot₁/T +10 (call core.apply_type %₈ %₉) +11 (call core.svec %₇ %₁₀) +12 slot₁/T +13 (call core.svec %₁₂) +14 SourceLocation::1:10 +15 (call core.svec %₁₁ %₁₃ %₁₄) +16 --- method core.nothing %₁₅ slots: [slot₁/#self#(!read) slot₂/_(!read)] 1 static_parameter₁ 2 (return %₁) -16 TestMod.f -17 (return %₁₆) +17 latestworld +18 TestMod.f +19 (return %₁₈) ######################################## # Static parameter which is used only in the bounds of another static parameter @@ -192,28 +207,30 @@ function f(x, y::S) where {T, S<:AbstractVector{T}} end #--------------------- 1 (method TestMod.f) -2 (= slot₂/T (call core.TypeVar :T)) -3 TestMod.AbstractVector -4 slot₂/T -5 (call core.apply_type %₃ %₄) -6 (= slot₁/S (call core.TypeVar :S %₅)) -7 TestMod.f -8 (call core.Typeof %₇) -9 slot₁/S -10 (call core.svec %₈ core.Any %₉) -11 slot₂/T -12 slot₁/S -13 (call core.svec %₁₁ %₁₂) -14 SourceLocation::1:10 -15 (call core.svec %₁₀ %₁₃ %₁₄) -16 --- method core.nothing %₁₅ +2 latestworld +3 (= slot₂/T (call core.TypeVar :T)) +4 TestMod.AbstractVector +5 slot₂/T +6 (call core.apply_type %₄ %₅) +7 (= slot₁/S (call core.TypeVar :S %₆)) +8 TestMod.f +9 (call core.Typeof %₈) +10 slot₁/S +11 (call core.svec %₉ core.Any %₁₀) +12 slot₂/T +13 slot₁/S +14 (call core.svec %₁₂ %₁₃) +15 SourceLocation::1:10 +16 (call core.svec %₁₁ %₁₄ %₁₅) +17 --- method core.nothing %₁₆ slots: [slot₁/#self#(!read) slot₂/x(!read) slot₃/y(!read)] 1 static_parameter₁ 2 static_parameter₂ 3 (call core.tuple %₁ %₂) 4 (return %₃) -17 TestMod.f -18 (return %₁₇) +18 latestworld +19 TestMod.f +20 (return %₁₉) ######################################## # Error: Static parameter which is unused @@ -237,13 +254,14 @@ function f(x)::Int end #--------------------- 1 (method TestMod.f) -2 TestMod.f -3 (call core.Typeof %₂) -4 (call core.svec %₃ core.Any) -5 (call core.svec) -6 SourceLocation::1:10 -7 (call core.svec %₄ %₅ %₆) -8 --- method core.nothing %₇ +2 latestworld +3 TestMod.f +4 (call core.Typeof %₃) +5 (call core.svec %₄ core.Any) +6 (call core.svec) +7 SourceLocation::1:10 +8 (call core.svec %₅ %₆ %₇) +9 --- method core.nothing %₈ slots: [slot₁/#self#(!read) slot₂/x slot₃/tmp(!read)] 1 TestMod.Int 2 (gotoifnot slot₂/x label₃) @@ -257,8 +275,9 @@ end 10 (= slot₃/tmp (call core.typeassert %₉ %₁)) 11 slot₃/tmp 12 (return %₁₁) -9 TestMod.f -10 (return %₉) +10 latestworld +11 TestMod.f +12 (return %₁₁) ######################################## # Callable type @@ -275,7 +294,8 @@ end slots: [slot₁/#self#(!read) slot₂/x] 1 slot₂/x 2 (return %₁) -7 (return core.nothing) +7 latestworld +8 (return core.nothing) ######################################## # Callable type with instance @@ -292,7 +312,8 @@ end slots: [slot₁/y slot₂/x] 1 (call core.tuple slot₁/y slot₂/x) 2 (return %₁) -7 (return core.nothing) +7 latestworld +8 (return core.nothing) ######################################## # `where` params used in callable object type @@ -313,7 +334,8 @@ end slots: [slot₁/x(!read)] 1 static_parameter₁ 2 (return %₁) -11 (return core.nothing) +11 latestworld +12 (return core.nothing) ######################################## # Function with module ref in name @@ -330,7 +352,8 @@ end 8 --- method core.nothing %₇ slots: [slot₁/#self#(!read)] 1 (return core.nothing) -9 (return core.nothing) +9 latestworld +10 (return core.nothing) ######################################## # Error: Invalid dotop function name @@ -359,44 +382,48 @@ function f(x::T, y::S=1, z::U=2) end #--------------------- 1 (method TestMod.f) -2 TestMod.f -3 (call core.Typeof %₂) -4 TestMod.T -5 (call core.svec %₃ %₄) -6 (call core.svec) -7 SourceLocation::1:10 -8 (call core.svec %₅ %₆ %₇) -9 --- method core.nothing %₈ +2 latestworld +3 TestMod.f +4 (call core.Typeof %₃) +5 TestMod.T +6 (call core.svec %₄ %₅) +7 (call core.svec) +8 SourceLocation::1:10 +9 (call core.svec %₆ %₇ %₈) +10 --- method core.nothing %₉ slots: [slot₁/#self#(called) slot₂/x] 1 (call slot₁/#self# slot₂/x 1 2) 2 (return %₁) -10 TestMod.f -11 (call core.Typeof %₁₀) -12 TestMod.T -13 TestMod.S -14 (call core.svec %₁₁ %₁₂ %₁₃) -15 (call core.svec) -16 SourceLocation::1:10 -17 (call core.svec %₁₄ %₁₅ %₁₆) -18 --- method core.nothing %₁₇ +11 latestworld +12 TestMod.f +13 (call core.Typeof %₁₂) +14 TestMod.T +15 TestMod.S +16 (call core.svec %₁₃ %₁₄ %₁₅) +17 (call core.svec) +18 SourceLocation::1:10 +19 (call core.svec %₁₆ %₁₇ %₁₈) +20 --- method core.nothing %₁₉ slots: [slot₁/#self#(called) slot₂/x slot₃/y] 1 (call slot₁/#self# slot₂/x slot₃/y 2) 2 (return %₁) -19 TestMod.f -20 (call core.Typeof %₁₉) -21 TestMod.T -22 TestMod.S -23 TestMod.U -24 (call core.svec %₂₀ %₂₁ %₂₂ %₂₃) -25 (call core.svec) -26 SourceLocation::1:10 -27 (call core.svec %₂₄ %₂₅ %₂₆) -28 --- method core.nothing %₂₇ +21 latestworld +22 TestMod.f +23 (call core.Typeof %₂₂) +24 TestMod.T +25 TestMod.S +26 TestMod.U +27 (call core.svec %₂₃ %₂₄ %₂₅ %₂₆) +28 (call core.svec) +29 SourceLocation::1:10 +30 (call core.svec %₂₇ %₂₈ %₂₉) +31 --- method core.nothing %₃₀ slots: [slot₁/#self#(!read) slot₂/x slot₃/y slot₄/z(!read)] 1 (call core.tuple slot₂/x slot₃/y) 2 (return %₁) -29 TestMod.f -30 (return %₂₉) +32 latestworld +33 TestMod.f +34 (return %₃₃) ######################################## # Default positional args which depend on other args @@ -405,38 +432,42 @@ function f(x=1, y=x) end #--------------------- 1 (method TestMod.f) -2 TestMod.f -3 (call core.Typeof %₂) -4 (call core.svec %₃) -5 (call core.svec) -6 SourceLocation::1:10 -7 (call core.svec %₄ %₅ %₆) -8 --- method core.nothing %₇ +2 latestworld +3 TestMod.f +4 (call core.Typeof %₃) +5 (call core.svec %₄) +6 (call core.svec) +7 SourceLocation::1:10 +8 (call core.svec %₅ %₆ %₇) +9 --- method core.nothing %₈ slots: [slot₁/#self#(called)] 1 (call slot₁/#self# 1) 2 (return %₁) -9 TestMod.f -10 (call core.Typeof %₉) -11 (call core.svec %₁₀ core.Any) -12 (call core.svec) -13 SourceLocation::1:10 -14 (call core.svec %₁₁ %₁₂ %₁₃) -15 --- method core.nothing %₁₄ +10 latestworld +11 TestMod.f +12 (call core.Typeof %₁₁) +13 (call core.svec %₁₂ core.Any) +14 (call core.svec) +15 SourceLocation::1:10 +16 (call core.svec %₁₃ %₁₄ %₁₅) +17 --- method core.nothing %₁₆ slots: [slot₁/#self#(called) slot₂/x] 1 (call slot₁/#self# slot₂/x slot₂/x) 2 (return %₁) -16 TestMod.f -17 (call core.Typeof %₁₆) -18 (call core.svec %₁₇ core.Any core.Any) -19 (call core.svec) -20 SourceLocation::1:10 -21 (call core.svec %₁₈ %₁₉ %₂₀) -22 --- method core.nothing %₂₁ +18 latestworld +19 TestMod.f +20 (call core.Typeof %₁₉) +21 (call core.svec %₂₀ core.Any core.Any) +22 (call core.svec) +23 SourceLocation::1:10 +24 (call core.svec %₂₁ %₂₂ %₂₃) +25 --- method core.nothing %₂₄ slots: [slot₁/#self#(!read) slot₂/x slot₃/y] 1 (call core.tuple slot₂/x slot₃/y) 2 (return %₁) -23 TestMod.f -24 (return %₂₃) +26 latestworld +27 TestMod.f +28 (return %₂₇) ######################################## # Default positional args with missing arg names (implicit placeholders) @@ -445,41 +476,45 @@ function f(::Int, y=1, z=2) end #--------------------- 1 (method TestMod.f) -2 TestMod.f -3 (call core.Typeof %₂) -4 TestMod.Int -5 (call core.svec %₃ %₄) -6 (call core.svec) -7 SourceLocation::1:10 -8 (call core.svec %₅ %₆ %₇) -9 --- method core.nothing %₈ +2 latestworld +3 TestMod.f +4 (call core.Typeof %₃) +5 TestMod.Int +6 (call core.svec %₄ %₅) +7 (call core.svec) +8 SourceLocation::1:10 +9 (call core.svec %₆ %₇ %₈) +10 --- method core.nothing %₉ slots: [slot₁/#self#(called) slot₂/_] 1 (call slot₁/#self# slot₂/_ 1 2) 2 (return %₁) -10 TestMod.f -11 (call core.Typeof %₁₀) -12 TestMod.Int -13 (call core.svec %₁₁ %₁₂ core.Any) -14 (call core.svec) -15 SourceLocation::1:10 -16 (call core.svec %₁₃ %₁₄ %₁₅) -17 --- method core.nothing %₁₆ +11 latestworld +12 TestMod.f +13 (call core.Typeof %₁₂) +14 TestMod.Int +15 (call core.svec %₁₃ %₁₄ core.Any) +16 (call core.svec) +17 SourceLocation::1:10 +18 (call core.svec %₁₅ %₁₆ %₁₇) +19 --- method core.nothing %₁₈ slots: [slot₁/#self#(called) slot₂/_ slot₃/y] 1 (call slot₁/#self# slot₂/_ slot₃/y 2) 2 (return %₁) -18 TestMod.f -19 (call core.Typeof %₁₈) -20 TestMod.Int -21 (call core.svec %₁₉ %₂₀ core.Any core.Any) -22 (call core.svec) -23 SourceLocation::1:10 -24 (call core.svec %₂₁ %₂₂ %₂₃) -25 --- method core.nothing %₂₄ +20 latestworld +21 TestMod.f +22 (call core.Typeof %₂₁) +23 TestMod.Int +24 (call core.svec %₂₂ %₂₃ core.Any core.Any) +25 (call core.svec) +26 SourceLocation::1:10 +27 (call core.svec %₂₄ %₂₅ %₂₆) +28 --- method core.nothing %₂₇ slots: [slot₁/#self#(!read) slot₂/_(!read) slot₃/y slot₄/z] 1 (call core.tuple slot₃/y slot₄/z) 2 (return %₁) -26 TestMod.f -27 (return %₂₆) +29 latestworld +30 TestMod.f +31 (return %₃₀) ######################################## # Default positional args with placeholders @@ -488,30 +523,33 @@ function f(_::Int, x=1) end #--------------------- 1 (method TestMod.f) -2 TestMod.f -3 (call core.Typeof %₂) -4 TestMod.Int -5 (call core.svec %₃ %₄) -6 (call core.svec) -7 SourceLocation::1:10 -8 (call core.svec %₅ %₆ %₇) -9 --- method core.nothing %₈ +2 latestworld +3 TestMod.f +4 (call core.Typeof %₃) +5 TestMod.Int +6 (call core.svec %₄ %₅) +7 (call core.svec) +8 SourceLocation::1:10 +9 (call core.svec %₆ %₇ %₈) +10 --- method core.nothing %₉ slots: [slot₁/#self#(called) slot₂/_] 1 (call slot₁/#self# slot₂/_ 1) 2 (return %₁) -10 TestMod.f -11 (call core.Typeof %₁₀) -12 TestMod.Int -13 (call core.svec %₁₁ %₁₂ core.Any) -14 (call core.svec) -15 SourceLocation::1:10 -16 (call core.svec %₁₃ %₁₄ %₁₅) -17 --- method core.nothing %₁₆ +11 latestworld +12 TestMod.f +13 (call core.Typeof %₁₂) +14 TestMod.Int +15 (call core.svec %₁₃ %₁₄ core.Any) +16 (call core.svec) +17 SourceLocation::1:10 +18 (call core.svec %₁₅ %₁₆ %₁₇) +19 --- method core.nothing %₁₈ slots: [slot₁/#self#(!read) slot₂/_(!read) slot₃/x] 1 slot₃/x 2 (return %₁) -18 TestMod.f -19 (return %₁₈) +20 latestworld +21 TestMod.f +22 (return %₂₁) ######################################## # Positional args with defaults and `where` clauses @@ -520,55 +558,59 @@ function f(x::T, y::S=1, z::U=2) where {T,S<:T,U<:S} end #--------------------- 1 (method TestMod.f) -2 (= slot₂/T (call core.TypeVar :T)) -3 slot₂/T -4 (= slot₁/S (call core.TypeVar :S %₃)) -5 slot₁/S -6 (= slot₃/U (call core.TypeVar :U %₅)) -7 TestMod.f -8 (call core.Typeof %₇) -9 slot₂/T -10 (call core.svec %₈ %₉) -11 slot₂/T -12 (call core.svec %₁₁) -13 SourceLocation::1:10 -14 (call core.svec %₁₀ %₁₂ %₁₃) -15 --- method core.nothing %₁₄ +2 latestworld +3 (= slot₂/T (call core.TypeVar :T)) +4 slot₂/T +5 (= slot₁/S (call core.TypeVar :S %₄)) +6 slot₁/S +7 (= slot₃/U (call core.TypeVar :U %₆)) +8 TestMod.f +9 (call core.Typeof %₈) +10 slot₂/T +11 (call core.svec %₉ %₁₀) +12 slot₂/T +13 (call core.svec %₁₂) +14 SourceLocation::1:10 +15 (call core.svec %₁₁ %₁₃ %₁₄) +16 --- method core.nothing %₁₅ slots: [slot₁/#self#(called) slot₂/x] 1 (call slot₁/#self# slot₂/x 1 2) 2 (return %₁) -16 TestMod.f -17 (call core.Typeof %₁₆) -18 slot₂/T -19 slot₁/S -20 (call core.svec %₁₇ %₁₈ %₁₉) -21 slot₂/T -22 slot₁/S -23 (call core.svec %₂₁ %₂₂) -24 SourceLocation::1:10 -25 (call core.svec %₂₀ %₂₃ %₂₄) -26 --- method core.nothing %₂₅ +17 latestworld +18 TestMod.f +19 (call core.Typeof %₁₈) +20 slot₂/T +21 slot₁/S +22 (call core.svec %₁₉ %₂₀ %₂₁) +23 slot₂/T +24 slot₁/S +25 (call core.svec %₂₃ %₂₄) +26 SourceLocation::1:10 +27 (call core.svec %₂₂ %₂₅ %₂₆) +28 --- method core.nothing %₂₇ slots: [slot₁/#self#(called) slot₂/x slot₃/y] 1 (call slot₁/#self# slot₂/x slot₃/y 2) 2 (return %₁) -27 TestMod.f -28 (call core.Typeof %₂₇) -29 slot₂/T -30 slot₁/S -31 slot₃/U -32 (call core.svec %₂₈ %₂₉ %₃₀ %₃₁) -33 slot₂/T -34 slot₁/S -35 slot₃/U -36 (call core.svec %₃₃ %₃₄ %₃₅) -37 SourceLocation::1:10 -38 (call core.svec %₃₂ %₃₆ %₃₇) -39 --- method core.nothing %₃₈ +29 latestworld +30 TestMod.f +31 (call core.Typeof %₃₀) +32 slot₂/T +33 slot₁/S +34 slot₃/U +35 (call core.svec %₃₁ %₃₂ %₃₃ %₃₄) +36 slot₂/T +37 slot₁/S +38 slot₃/U +39 (call core.svec %₃₆ %₃₇ %₃₈) +40 SourceLocation::1:10 +41 (call core.svec %₃₅ %₃₉ %₄₀) +42 --- method core.nothing %₄₁ slots: [slot₁/#self#(!read) slot₂/x slot₃/y slot₄/z] 1 (call core.tuple slot₂/x slot₃/y slot₄/z) 2 (return %₁) -40 TestMod.f -41 (return %₄₀) +43 latestworld +44 TestMod.f +45 (return %₄₄) ######################################## # Positional args and type parameters with transitive dependencies @@ -579,56 +621,60 @@ function f(x, y::S=[1], z::U=2) where {T, S<:AbstractVector{T}, U} end #--------------------- 1 (method TestMod.f) -2 (= slot₂/T (call core.TypeVar :T)) -3 TestMod.AbstractVector -4 slot₂/T -5 (call core.apply_type %₃ %₄) -6 (= slot₁/S (call core.TypeVar :S %₅)) -7 (= slot₃/U (call core.TypeVar :U)) -8 TestMod.f -9 (call core.Typeof %₈) -10 (call core.svec %₉ core.Any) -11 (call core.svec) -12 SourceLocation::1:10 -13 (call core.svec %₁₀ %₁₁ %₁₂) -14 --- method core.nothing %₁₃ +2 latestworld +3 (= slot₂/T (call core.TypeVar :T)) +4 TestMod.AbstractVector +5 slot₂/T +6 (call core.apply_type %₄ %₅) +7 (= slot₁/S (call core.TypeVar :S %₆)) +8 (= slot₃/U (call core.TypeVar :U)) +9 TestMod.f +10 (call core.Typeof %₉) +11 (call core.svec %₁₀ core.Any) +12 (call core.svec) +13 SourceLocation::1:10 +14 (call core.svec %₁₁ %₁₂ %₁₃) +15 --- method core.nothing %₁₄ slots: [slot₁/#self#(called) slot₂/x] 1 (call top.vect 1) 2 (call slot₁/#self# slot₂/x %₁ 2) 3 (return %₂) -15 TestMod.f -16 (call core.Typeof %₁₅) -17 slot₁/S -18 (call core.svec %₁₆ core.Any %₁₇) -19 slot₂/T -20 slot₁/S -21 (call core.svec %₁₉ %₂₀) -22 SourceLocation::1:10 -23 (call core.svec %₁₈ %₂₁ %₂₂) -24 --- method core.nothing %₂₃ +16 latestworld +17 TestMod.f +18 (call core.Typeof %₁₇) +19 slot₁/S +20 (call core.svec %₁₈ core.Any %₁₉) +21 slot₂/T +22 slot₁/S +23 (call core.svec %₂₁ %₂₂) +24 SourceLocation::1:10 +25 (call core.svec %₂₀ %₂₃ %₂₄) +26 --- method core.nothing %₂₅ slots: [slot₁/#self#(called) slot₂/x slot₃/y] 1 (call slot₁/#self# slot₂/x slot₃/y 2) 2 (return %₁) -25 TestMod.f -26 (call core.Typeof %₂₅) -27 slot₁/S -28 slot₃/U -29 (call core.svec %₂₆ core.Any %₂₇ %₂₈) -30 slot₂/T -31 slot₁/S -32 slot₃/U -33 (call core.svec %₃₀ %₃₁ %₃₂) -34 SourceLocation::1:10 -35 (call core.svec %₂₉ %₃₃ %₃₄) -36 --- method core.nothing %₃₅ +27 latestworld +28 TestMod.f +29 (call core.Typeof %₂₈) +30 slot₁/S +31 slot₃/U +32 (call core.svec %₂₉ core.Any %₃₀ %₃₁) +33 slot₂/T +34 slot₁/S +35 slot₃/U +36 (call core.svec %₃₃ %₃₄ %₃₅) +37 SourceLocation::1:10 +38 (call core.svec %₃₂ %₃₆ %₃₇) +39 --- method core.nothing %₃₈ slots: [slot₁/#self#(!read) slot₂/x slot₃/y slot₄/z] 1 static_parameter₁ 2 static_parameter₂ 3 static_parameter₃ 4 (call core.tuple slot₂/x slot₃/y slot₄/z %₁ %₂ %₃) 5 (return %₄) -37 TestMod.f -38 (return %₃₇) +40 latestworld +41 TestMod.f +42 (return %₄₁) ######################################## # Default positional args are allowed before trailing slurp with no default @@ -637,29 +683,32 @@ function f(x=1, ys...) end #--------------------- 1 (method TestMod.f) -2 TestMod.f -3 (call core.Typeof %₂) -4 (call core.svec %₃) -5 (call core.svec) -6 SourceLocation::1:10 -7 (call core.svec %₄ %₅ %₆) -8 --- method core.nothing %₇ +2 latestworld +3 TestMod.f +4 (call core.Typeof %₃) +5 (call core.svec %₄) +6 (call core.svec) +7 SourceLocation::1:10 +8 (call core.svec %₅ %₆ %₇) +9 --- method core.nothing %₈ slots: [slot₁/#self#(called)] 1 (call slot₁/#self# 1) 2 (return %₁) -9 TestMod.f -10 (call core.Typeof %₉) -11 (call core.apply_type core.Vararg core.Any) -12 (call core.svec %₁₀ core.Any %₁₁) -13 (call core.svec) -14 SourceLocation::1:10 -15 (call core.svec %₁₂ %₁₃ %₁₄) -16 --- method core.nothing %₁₅ +10 latestworld +11 TestMod.f +12 (call core.Typeof %₁₁) +13 (call core.apply_type core.Vararg core.Any) +14 (call core.svec %₁₂ core.Any %₁₃) +15 (call core.svec) +16 SourceLocation::1:10 +17 (call core.svec %₁₄ %₁₅ %₁₆) +18 --- method core.nothing %₁₇ slots: [slot₁/#self#(!read) slot₂/x(!read) slot₃/ys] 1 slot₃/ys 2 (return %₁) -17 TestMod.f -18 (return %₁₇) +19 latestworld +20 TestMod.f +21 (return %₂₀) ######################################## # Error: Default positional args before non-default arg @@ -676,33 +725,36 @@ end ######################################## # Positional arg with slurp and default function f(xs...=1) - xs -end -#--------------------- -1 (method TestMod.f) -2 TestMod.f -3 (call core.Typeof %₂) -4 (call core.svec %₃) -5 (call core.svec) -6 SourceLocation::1:10 -7 (call core.svec %₄ %₅ %₆) -8 --- method core.nothing %₇ + xs +end +#--------------------- +1 (method TestMod.f) +2 latestworld +3 TestMod.f +4 (call core.Typeof %₃) +5 (call core.svec %₄) +6 (call core.svec) +7 SourceLocation::1:10 +8 (call core.svec %₅ %₆ %₇) +9 --- method core.nothing %₈ slots: [slot₁/#self#(called)] 1 (call slot₁/#self# 1) 2 (return %₁) -9 TestMod.f -10 (call core.Typeof %₉) -11 (call core.apply_type core.Vararg core.Any) -12 (call core.svec %₁₀ %₁₁) -13 (call core.svec) -14 SourceLocation::1:10 -15 (call core.svec %₁₂ %₁₃ %₁₄) -16 --- method core.nothing %₁₅ +10 latestworld +11 TestMod.f +12 (call core.Typeof %₁₁) +13 (call core.apply_type core.Vararg core.Any) +14 (call core.svec %₁₂ %₁₃) +15 (call core.svec) +16 SourceLocation::1:10 +17 (call core.svec %₁₄ %₁₅ %₁₆) +18 --- method core.nothing %₁₇ slots: [slot₁/#self#(!read) slot₂/xs] 1 slot₂/xs 2 (return %₁) -17 TestMod.f -18 (return %₁₇) +19 latestworld +20 TestMod.f +21 (return %₂₀) ######################################## # Positional arg with slurp and splatted default value @@ -711,30 +763,33 @@ function f(xs...=(1,2)...) end #--------------------- 1 (method TestMod.f) -2 TestMod.f -3 (call core.Typeof %₂) -4 (call core.svec %₃) -5 (call core.svec) -6 SourceLocation::1:10 -7 (call core.svec %₄ %₅ %₆) -8 --- method core.nothing %₇ +2 latestworld +3 TestMod.f +4 (call core.Typeof %₃) +5 (call core.svec %₄) +6 (call core.svec) +7 SourceLocation::1:10 +8 (call core.svec %₅ %₆ %₇) +9 --- method core.nothing %₈ slots: [slot₁/#self#] 1 (call core.tuple 1 2) 2 (call core._apply_iterate top.iterate slot₁/#self# %₁) 3 (return %₂) -9 TestMod.f -10 (call core.Typeof %₉) -11 (call core.apply_type core.Vararg core.Any) -12 (call core.svec %₁₀ %₁₁) -13 (call core.svec) -14 SourceLocation::1:10 -15 (call core.svec %₁₂ %₁₃ %₁₄) -16 --- method core.nothing %₁₅ +10 latestworld +11 TestMod.f +12 (call core.Typeof %₁₁) +13 (call core.apply_type core.Vararg core.Any) +14 (call core.svec %₁₂ %₁₃) +15 (call core.svec) +16 SourceLocation::1:10 +17 (call core.svec %₁₄ %₁₅ %₁₆) +18 --- method core.nothing %₁₇ slots: [slot₁/#self#(!read) slot₂/xs] 1 slot₂/xs 2 (return %₁) -17 TestMod.f -18 (return %₁₇) +19 latestworld +20 TestMod.f +21 (return %₂₀) ######################################## # Trivial function argument destructuring @@ -742,13 +797,14 @@ function f(x, (y,z), w) end #--------------------- 1 (method TestMod.f) -2 TestMod.f -3 (call core.Typeof %₂) -4 (call core.svec %₃ core.Any core.Any core.Any) -5 (call core.svec) -6 SourceLocation::1:10 -7 (call core.svec %₄ %₅ %₆) -8 --- method core.nothing %₇ +2 latestworld +3 TestMod.f +4 (call core.Typeof %₃) +5 (call core.svec %₄ core.Any core.Any core.Any) +6 (call core.svec) +7 SourceLocation::1:10 +8 (call core.svec %₅ %₆ %₇) +9 --- method core.nothing %₈ slots: [slot₁/#self#(!read) slot₂/x(!read) slot₃/destructured_arg slot₄/w(!read) slot₅/iterstate slot₆/y(!read) slot₇/z(!read)] 1 (call top.indexed_iterate slot₃/destructured_arg 1) 2 (= slot₆/y (call core.getfield %₁ 1)) @@ -757,8 +813,9 @@ end 5 (call top.indexed_iterate slot₃/destructured_arg 2 %₄) 6 (= slot₇/z (call core.getfield %₅ 1)) 7 (return core.nothing) -9 TestMod.f -10 (return %₉) +10 latestworld +11 TestMod.f +12 (return %₁₁) ######################################## # Function argument destructuring combined with splats, types and and defaults @@ -766,32 +823,35 @@ function f((x,)::T...=rhs) end #--------------------- 1 (method TestMod.f) -2 TestMod.f -3 (call core.Typeof %₂) -4 (call core.svec %₃) -5 (call core.svec) -6 SourceLocation::1:10 -7 (call core.svec %₄ %₅ %₆) -8 --- method core.nothing %₇ +2 latestworld +3 TestMod.f +4 (call core.Typeof %₃) +5 (call core.svec %₄) +6 (call core.svec) +7 SourceLocation::1:10 +8 (call core.svec %₅ %₆ %₇) +9 --- method core.nothing %₈ slots: [slot₁/#self#(called)] 1 TestMod.rhs 2 (call slot₁/#self# %₁) 3 (return %₂) -9 TestMod.f -10 (call core.Typeof %₉) -11 TestMod.T -12 (call core.apply_type core.Vararg %₁₁) -13 (call core.svec %₁₀ %₁₂) -14 (call core.svec) -15 SourceLocation::1:10 -16 (call core.svec %₁₃ %₁₄ %₁₅) -17 --- method core.nothing %₁₆ +10 latestworld +11 TestMod.f +12 (call core.Typeof %₁₁) +13 TestMod.T +14 (call core.apply_type core.Vararg %₁₃) +15 (call core.svec %₁₂ %₁₄) +16 (call core.svec) +17 SourceLocation::1:10 +18 (call core.svec %₁₅ %₁₆ %₁₇) +19 --- method core.nothing %₁₈ slots: [slot₁/#self#(!read) slot₂/destructured_arg slot₃/x(!read)] 1 (call top.indexed_iterate slot₂/destructured_arg 1) 2 (= slot₃/x (call core.getfield %₁ 1)) 3 (return core.nothing) -18 TestMod.f -19 (return %₁₈) +20 latestworld +21 TestMod.f +22 (return %₂₁) ######################################## # Function argument destructuring combined with splats, types and and defaults @@ -799,24 +859,26 @@ function f(x=default_x)::T end #--------------------- 1 (method TestMod.f) -2 TestMod.f -3 (call core.Typeof %₂) -4 (call core.svec %₃) -5 (call core.svec) -6 SourceLocation::1:10 -7 (call core.svec %₄ %₅ %₆) -8 --- method core.nothing %₇ +2 latestworld +3 TestMod.f +4 (call core.Typeof %₃) +5 (call core.svec %₄) +6 (call core.svec) +7 SourceLocation::1:10 +8 (call core.svec %₅ %₆ %₇) +9 --- method core.nothing %₈ slots: [slot₁/#self#(called)] 1 TestMod.default_x 2 (call slot₁/#self# %₁) 3 (return %₂) -9 TestMod.f -10 (call core.Typeof %₉) -11 (call core.svec %₁₀ core.Any) -12 (call core.svec) -13 SourceLocation::1:10 -14 (call core.svec %₁₁ %₁₂ %₁₃) -15 --- method core.nothing %₁₄ +10 latestworld +11 TestMod.f +12 (call core.Typeof %₁₁) +13 (call core.svec %₁₂ core.Any) +14 (call core.svec) +15 SourceLocation::1:10 +16 (call core.svec %₁₃ %₁₄ %₁₅) +17 --- method core.nothing %₁₆ slots: [slot₁/#self#(!read) slot₂/x(!read) slot₃/tmp(!read)] 1 TestMod.T 2 (= slot₃/tmp core.nothing) @@ -829,8 +891,9 @@ end 9 (= slot₃/tmp (call core.typeassert %₈ %₁)) 10 slot₃/tmp 11 (return %₁₀) -16 TestMod.f -17 (return %₁₆) +18 latestworld +19 TestMod.f +20 (return %₁₉) ######################################## # Duplicate destructured placeholders ok @@ -838,21 +901,23 @@ function f((_,), (_,)) end #--------------------- 1 (method TestMod.f) -2 TestMod.f -3 (call core.Typeof %₂) -4 (call core.svec %₃ core.Any core.Any) -5 (call core.svec) -6 SourceLocation::1:10 -7 (call core.svec %₄ %₅ %₆) -8 --- method core.nothing %₇ +2 latestworld +3 TestMod.f +4 (call core.Typeof %₃) +5 (call core.svec %₄ core.Any core.Any) +6 (call core.svec) +7 SourceLocation::1:10 +8 (call core.svec %₅ %₆ %₇) +9 --- method core.nothing %₈ slots: [slot₁/#self#(!read) slot₂/destructured_arg slot₃/destructured_arg] 1 (call top.indexed_iterate slot₂/destructured_arg 1) 2 (call core.getfield %₁ 1) 3 (call top.indexed_iterate slot₃/destructured_arg 1) 4 (call core.getfield %₃ 1) 5 (return core.nothing) -9 TestMod.f -10 (return %₉) +10 latestworld +11 TestMod.f +12 (return %₁₁) ######################################## # Slot flags @@ -861,20 +926,22 @@ function f(@nospecialize(x), g, y) end #--------------------- 1 (method TestMod.f) -2 TestMod.f -3 (call core.Typeof %₂) -4 (call core.svec %₃ core.Any core.Any core.Any) -5 (call core.svec) -6 SourceLocation::1:10 -7 (call core.svec %₄ %₅ %₆) -8 --- method core.nothing %₇ +2 latestworld +3 TestMod.f +4 (call core.Typeof %₃) +5 (call core.svec %₄ core.Any core.Any core.Any) +6 (call core.svec) +7 SourceLocation::1:10 +8 (call core.svec %₅ %₆ %₇) +9 --- method core.nothing %₈ slots: [slot₁/#self#(!read) slot₂/x(nospecialize,!read) slot₃/g(called) slot₄/y] 1 TestMod.+ 2 (call slot₃/g) 3 (call %₁ %₂ slot₄/y) 4 (return %₃) -9 TestMod.f -10 (return %₉) +10 latestworld +11 TestMod.f +12 (return %₁₁) ######################################## # Function return without arguments @@ -884,19 +951,21 @@ function f() end #--------------------- 1 (method TestMod.f) -2 TestMod.f -3 (call core.Typeof %₂) -4 (call core.svec %₃) -5 (call core.svec) -6 SourceLocation::1:10 -7 (call core.svec %₄ %₅ %₆) -8 --- method core.nothing %₇ +2 latestworld +3 TestMod.f +4 (call core.Typeof %₃) +5 (call core.svec %₄) +6 (call core.svec) +7 SourceLocation::1:10 +8 (call core.svec %₅ %₆ %₇) +9 --- method core.nothing %₈ slots: [slot₁/#self#(!read)] 1 (return core.nothing) 2 TestMod.after_return 3 (return %₂) -9 TestMod.f -10 (return %₉) +10 latestworld +11 TestMod.f +12 (return %₁₁) ######################################## # Binding docs to functions @@ -907,19 +976,21 @@ function f() end #--------------------- 1 (method TestMod.f) -2 TestMod.f -3 (call core.Typeof %₂) -4 (call core.svec %₃) -5 (call core.svec) -6 SourceLocation::4:10 -7 (call core.svec %₄ %₅ %₆) -8 --- method core.nothing %₇ +2 latestworld +3 TestMod.f +4 (call core.Typeof %₃) +5 (call core.svec %₄) +6 (call core.svec) +7 SourceLocation::4:10 +8 (call core.svec %₅ %₆ %₇) +9 --- method core.nothing %₈ slots: [slot₁/#self#(!read)] 1 (return core.nothing) -9 TestMod.f -10 (call JuliaLowering.bind_docs! %₉ "some docs\n" %₇) +10 latestworld 11 TestMod.f -12 (return %₁₁) +12 (call JuliaLowering.bind_docs! %₁₁ "some docs\n" %₈) +13 TestMod.f +14 (return %₁₃) ######################################## # Binding docs to callable type @@ -937,9 +1008,10 @@ end 6 --- method core.nothing %₅ slots: [slot₁/x(!read)] 1 (return core.nothing) -7 TestMod.T -8 (call JuliaLowering.bind_docs! %₇ "some docs\n" %₅) -9 (return core.nothing) +7 latestworld +8 TestMod.T +9 (call JuliaLowering.bind_docs! %₈ "some docs\n" %₅) +10 (return core.nothing) ######################################## # Keyword function with defaults. @@ -956,57 +1028,62 @@ function f_kw_simple(a::Int=1, b::Float64=1.0; x::Char='a', y::Bool=true) end #--------------------- 1 (method TestMod.f_kw_simple) -2 (method TestMod.#f_kw_simple#0) -3 TestMod.#f_kw_simple#0 -4 (call core.Typeof %₃) -5 TestMod.Char -6 TestMod.Bool -7 TestMod.f_kw_simple -8 (call core.Typeof %₇) -9 TestMod.Int -10 TestMod.Float64 -11 (call core.svec %₄ %₅ %₆ %₈ %₉ %₁₀) -12 (call core.svec) -13 SourceLocation::1:10 -14 (call core.svec %₁₁ %₁₂ %₁₃) -15 --- method core.nothing %₁₄ +2 latestworld +3 (method TestMod.#f_kw_simple#0) +4 latestworld +5 TestMod.#f_kw_simple#0 +6 (call core.Typeof %₅) +7 TestMod.Char +8 TestMod.Bool +9 TestMod.f_kw_simple +10 (call core.Typeof %₉) +11 TestMod.Int +12 TestMod.Float64 +13 (call core.svec %₆ %₇ %₈ %₁₀ %₁₁ %₁₂) +14 (call core.svec) +15 SourceLocation::1:10 +16 (call core.svec %₁₃ %₁₄ %₁₅) +17 --- method core.nothing %₁₆ slots: [slot₁/#self#(!read) slot₂/x slot₃/y slot₄/#self#(!read) slot₅/a slot₆/b] 1 (meta :nkw 2) 2 (call core.tuple slot₅/a slot₆/b slot₂/x slot₃/y) 3 (return %₂) -16 (call core.typeof core.kwcall) -17 TestMod.f_kw_simple -18 (call core.Typeof %₁₇) -19 (call core.svec %₁₆ core.NamedTuple %₁₈) -20 (call core.svec) -21 SourceLocation::1:10 -22 (call core.svec %₁₉ %₂₀ %₂₁) -23 --- method core.nothing %₂₂ +18 latestworld +19 (call core.typeof core.kwcall) +20 TestMod.f_kw_simple +21 (call core.Typeof %₂₀) +22 (call core.svec %₁₉ core.NamedTuple %₂₁) +23 (call core.svec) +24 SourceLocation::1:10 +25 (call core.svec %₂₂ %₂₃ %₂₄) +26 --- method core.nothing %₂₅ slots: [slot₁/#self#(called) slot₂/kws slot₃/#self#] 1 (call slot₁/#self# slot₂/kws slot₃/#self# 1 1.0) 2 (return %₁) -24 (call core.typeof core.kwcall) -25 TestMod.f_kw_simple -26 (call core.Typeof %₂₅) -27 TestMod.Int -28 (call core.svec %₂₄ core.NamedTuple %₂₆ %₂₇) -29 (call core.svec) -30 SourceLocation::1:10 -31 (call core.svec %₂₈ %₂₉ %₃₀) -32 --- method core.nothing %₃₁ +27 latestworld +28 (call core.typeof core.kwcall) +29 TestMod.f_kw_simple +30 (call core.Typeof %₂₉) +31 TestMod.Int +32 (call core.svec %₂₈ core.NamedTuple %₃₀ %₃₁) +33 (call core.svec) +34 SourceLocation::1:10 +35 (call core.svec %₃₂ %₃₃ %₃₄) +36 --- method core.nothing %₃₅ slots: [slot₁/#self#(called) slot₂/kws slot₃/#self# slot₄/a] 1 (call slot₁/#self# slot₂/kws slot₃/#self# slot₄/a 1.0) 2 (return %₁) -33 (call core.typeof core.kwcall) -34 TestMod.f_kw_simple -35 (call core.Typeof %₃₄) -36 TestMod.Int -37 TestMod.Float64 -38 (call core.svec %₃₃ core.NamedTuple %₃₅ %₃₆ %₃₇) -39 (call core.svec) -40 SourceLocation::1:10 -41 (call core.svec %₃₈ %₃₉ %₄₀) -42 --- method core.nothing %₄₁ +37 latestworld +38 (call core.typeof core.kwcall) +39 TestMod.f_kw_simple +40 (call core.Typeof %₃₉) +41 TestMod.Int +42 TestMod.Float64 +43 (call core.svec %₃₈ core.NamedTuple %₄₀ %₄₁ %₄₂) +44 (call core.svec) +45 SourceLocation::1:10 +46 (call core.svec %₄₃ %₄₄ %₄₅) +47 --- method core.nothing %₄₆ slots: [slot₁/#self#(!read) slot₂/kws slot₃/#self# slot₄/a slot₅/b slot₆/kwtmp slot₇/x(!read) slot₈/y(!read)] 1 (newvar slot₇/x) 2 (newvar slot₈/y) @@ -1048,42 +1125,46 @@ end 38 TestMod.#f_kw_simple#0 39 (call %₃₈ %₁₆ %₃₀ slot₃/#self# slot₄/a slot₅/b) 40 (return %₃₉) -43 TestMod.f_kw_simple -44 (call core.Typeof %₄₃) -45 (call core.svec %₄₄) -46 (call core.svec) -47 SourceLocation::1:10 -48 (call core.svec %₄₅ %₄₆ %₄₇) -49 --- method core.nothing %₄₈ +48 latestworld +49 TestMod.f_kw_simple +50 (call core.Typeof %₄₉) +51 (call core.svec %₅₀) +52 (call core.svec) +53 SourceLocation::1:10 +54 (call core.svec %₅₁ %₅₂ %₅₃) +55 --- method core.nothing %₅₄ slots: [slot₁/#self#(called)] 1 (call slot₁/#self# 1 1.0) 2 (return %₁) -50 TestMod.f_kw_simple -51 (call core.Typeof %₅₀) -52 TestMod.Int -53 (call core.svec %₅₁ %₅₂) -54 (call core.svec) -55 SourceLocation::1:10 -56 (call core.svec %₅₃ %₅₄ %₅₅) -57 --- method core.nothing %₅₆ +56 latestworld +57 TestMod.f_kw_simple +58 (call core.Typeof %₅₇) +59 TestMod.Int +60 (call core.svec %₅₈ %₅₉) +61 (call core.svec) +62 SourceLocation::1:10 +63 (call core.svec %₆₀ %₆₁ %₆₂) +64 --- method core.nothing %₆₃ slots: [slot₁/#self#(called) slot₂/a] 1 (call slot₁/#self# slot₂/a 1.0) 2 (return %₁) -58 TestMod.f_kw_simple -59 (call core.Typeof %₅₈) -60 TestMod.Int -61 TestMod.Float64 -62 (call core.svec %₅₉ %₆₀ %₆₁) -63 (call core.svec) -64 SourceLocation::1:10 -65 (call core.svec %₆₂ %₆₃ %₆₄) -66 --- method core.nothing %₆₅ +65 latestworld +66 TestMod.f_kw_simple +67 (call core.Typeof %₆₆) +68 TestMod.Int +69 TestMod.Float64 +70 (call core.svec %₆₇ %₆₈ %₆₉) +71 (call core.svec) +72 SourceLocation::1:10 +73 (call core.svec %₇₀ %₇₁ %₇₂) +74 --- method core.nothing %₇₃ slots: [slot₁/#self# slot₂/a slot₃/b] 1 TestMod.#f_kw_simple#0 2 (call %₁ 'a' true slot₁/#self# slot₂/a slot₃/b) 3 (return %₂) -67 TestMod.f_kw_simple -68 (return %₆₇) +75 latestworld +76 TestMod.f_kw_simple +77 (return %₇₆) ######################################## # Keyword slurping - simple forwarding of all kws @@ -1092,50 +1173,55 @@ function f_kw_slurp_simple(; all_kws...) end #--------------------- 1 (method TestMod.f_kw_slurp_simple) -2 (method TestMod.#f_kw_slurp_simple#0) -3 TestMod.#f_kw_slurp_simple#0 -4 (call core.Typeof %₃) -5 (call top.pairs core.NamedTuple) -6 TestMod.f_kw_slurp_simple -7 (call core.Typeof %₆) -8 (call core.svec %₄ %₅ %₇) -9 (call core.svec) -10 SourceLocation::1:10 -11 (call core.svec %₈ %₉ %₁₀) -12 --- method core.nothing %₁₁ +2 latestworld +3 (method TestMod.#f_kw_slurp_simple#0) +4 latestworld +5 TestMod.#f_kw_slurp_simple#0 +6 (call core.Typeof %₅) +7 (call top.pairs core.NamedTuple) +8 TestMod.f_kw_slurp_simple +9 (call core.Typeof %₈) +10 (call core.svec %₆ %₇ %₉) +11 (call core.svec) +12 SourceLocation::1:10 +13 (call core.svec %₁₀ %₁₁ %₁₂) +14 --- method core.nothing %₁₃ slots: [slot₁/#self#(!read) slot₂/all_kws slot₃/#self#(!read)] 1 (meta :nkw 1) 2 slot₂/all_kws 3 (return %₂) -13 (call core.typeof core.kwcall) -14 TestMod.f_kw_slurp_simple -15 (call core.Typeof %₁₄) -16 (call core.svec %₁₃ core.NamedTuple %₁₅) -17 (call core.svec) -18 SourceLocation::1:10 -19 (call core.svec %₁₆ %₁₇ %₁₈) -20 --- method core.nothing %₁₉ +15 latestworld +16 (call core.typeof core.kwcall) +17 TestMod.f_kw_slurp_simple +18 (call core.Typeof %₁₇) +19 (call core.svec %₁₆ core.NamedTuple %₁₈) +20 (call core.svec) +21 SourceLocation::1:10 +22 (call core.svec %₁₉ %₂₀ %₂₁) +23 --- method core.nothing %₂₂ slots: [slot₁/#self#(!read) slot₂/kws slot₃/#self# slot₄/all_kws(!read)] 1 (newvar slot₄/all_kws) 2 (call top.pairs slot₂/kws) 3 TestMod.#f_kw_slurp_simple#0 4 (call %₃ %₂ slot₃/#self#) 5 (return %₄) -21 TestMod.f_kw_slurp_simple -22 (call core.Typeof %₂₁) -23 (call core.svec %₂₂) -24 (call core.svec) -25 SourceLocation::1:10 -26 (call core.svec %₂₃ %₂₄ %₂₅) -27 --- method core.nothing %₂₆ +24 latestworld +25 TestMod.f_kw_slurp_simple +26 (call core.Typeof %₂₅) +27 (call core.svec %₂₆) +28 (call core.svec) +29 SourceLocation::1:10 +30 (call core.svec %₂₇ %₂₈ %₂₉) +31 --- method core.nothing %₃₀ slots: [slot₁/#self#] 1 TestMod.#f_kw_slurp_simple#0 2 (call core.NamedTuple) 3 (call top.pairs %₂) 4 (call %₁ %₃ slot₁/#self#) 5 (return %₄) -28 TestMod.f_kw_slurp_simple -29 (return %₂₈) +32 latestworld +33 TestMod.f_kw_slurp_simple +34 (return %₃₃) ######################################## # Keyword slurping @@ -1144,29 +1230,32 @@ function f_kw_slurp(; x=x_default, non_x_kws...) end #--------------------- 1 (method TestMod.f_kw_slurp) -2 (method TestMod.#f_kw_slurp#0) -3 TestMod.#f_kw_slurp#0 -4 (call core.Typeof %₃) -5 (call top.pairs core.NamedTuple) -6 TestMod.f_kw_slurp -7 (call core.Typeof %₆) -8 (call core.svec %₄ core.Any %₅ %₇) -9 (call core.svec) -10 SourceLocation::1:10 -11 (call core.svec %₈ %₉ %₁₀) -12 --- method core.nothing %₁₁ +2 latestworld +3 (method TestMod.#f_kw_slurp#0) +4 latestworld +5 TestMod.#f_kw_slurp#0 +6 (call core.Typeof %₅) +7 (call top.pairs core.NamedTuple) +8 TestMod.f_kw_slurp +9 (call core.Typeof %₈) +10 (call core.svec %₆ core.Any %₇ %₉) +11 (call core.svec) +12 SourceLocation::1:10 +13 (call core.svec %₁₀ %₁₁ %₁₂) +14 --- method core.nothing %₁₃ slots: [slot₁/#self#(!read) slot₂/x(!read) slot₃/non_x_kws(!read) slot₄/#self#(!read)] 1 (meta :nkw 2) 2 TestMod.all_kws 3 (return %₂) -13 (call core.typeof core.kwcall) -14 TestMod.f_kw_slurp -15 (call core.Typeof %₁₄) -16 (call core.svec %₁₃ core.NamedTuple %₁₅) -17 (call core.svec) -18 SourceLocation::1:10 -19 (call core.svec %₁₆ %₁₇ %₁₈) -20 --- method core.nothing %₁₉ +15 latestworld +16 (call core.typeof core.kwcall) +17 TestMod.f_kw_slurp +18 (call core.Typeof %₁₇) +19 (call core.svec %₁₆ core.NamedTuple %₁₈) +20 (call core.svec) +21 SourceLocation::1:10 +22 (call core.svec %₁₉ %₂₀ %₂₁) +23 --- method core.nothing %₂₂ slots: [slot₁/#self#(!read) slot₂/kws slot₃/#self# slot₄/kwtmp slot₅/non_x_kws(!read) slot₆/x(!read)] 1 (newvar slot₅/non_x_kws) 2 (newvar slot₆/x) @@ -1185,13 +1274,14 @@ end 15 TestMod.#f_kw_slurp#0 16 (call %₁₅ %₁₀ %₁₄ slot₃/#self#) 17 (return %₁₆) -21 TestMod.f_kw_slurp -22 (call core.Typeof %₂₁) -23 (call core.svec %₂₂) -24 (call core.svec) -25 SourceLocation::1:10 -26 (call core.svec %₂₃ %₂₄ %₂₅) -27 --- method core.nothing %₂₆ +24 latestworld +25 TestMod.f_kw_slurp +26 (call core.Typeof %₂₅) +27 (call core.svec %₂₆) +28 (call core.svec) +29 SourceLocation::1:10 +30 (call core.svec %₂₇ %₂₈ %₂₉) +31 --- method core.nothing %₃₀ slots: [slot₁/#self#] 1 TestMod.#f_kw_slurp#0 2 TestMod.x_default @@ -1199,8 +1289,9 @@ end 4 (call top.pairs %₃) 5 (call %₁ %₂ %₄ slot₁/#self#) 6 (return %₅) -28 TestMod.f_kw_slurp -29 (return %₂₈) +32 latestworld +33 TestMod.f_kw_slurp +34 (return %₃₃) ######################################## # Static parameters used in keywords, with and without the static parameter @@ -1213,41 +1304,44 @@ function f_kw_sparams(x::X; a::A=a_def, b::X=b_def) where {X,A} end #--------------------- 1 (method TestMod.f_kw_sparams) -2 (method TestMod.#f_kw_sparams#0) -3 (= slot₂/X (call core.TypeVar :X)) -4 (= slot₁/A (call core.TypeVar :A)) -5 TestMod.#f_kw_sparams#0 -6 (call core.Typeof %₅) -7 slot₁/A -8 slot₂/X -9 TestMod.f_kw_sparams -10 (call core.Typeof %₉) -11 slot₂/X -12 (call core.svec %₆ %₇ %₈ %₁₀ %₁₁) +2 latestworld +3 (method TestMod.#f_kw_sparams#0) +4 latestworld +5 (= slot₂/X (call core.TypeVar :X)) +6 (= slot₁/A (call core.TypeVar :A)) +7 TestMod.#f_kw_sparams#0 +8 (call core.Typeof %₇) +9 slot₁/A +10 slot₂/X +11 TestMod.f_kw_sparams +12 (call core.Typeof %₁₁) 13 slot₂/X -14 slot₁/A -15 (call core.svec %₁₃ %₁₄) -16 SourceLocation::1:10 -17 (call core.svec %₁₂ %₁₅ %₁₆) -18 --- method core.nothing %₁₇ +14 (call core.svec %₈ %₉ %₁₀ %₁₂ %₁₃) +15 slot₂/X +16 slot₁/A +17 (call core.svec %₁₅ %₁₆) +18 SourceLocation::1:10 +19 (call core.svec %₁₄ %₁₇ %₁₈) +20 --- method core.nothing %₁₉ slots: [slot₁/#self#(!read) slot₂/a(!read) slot₃/b(!read) slot₄/#self#(!read) slot₅/x(!read)] 1 (meta :nkw 2) 2 static_parameter₁ 3 static_parameter₂ 4 (call core.tuple %₂ %₃) 5 (return %₄) -19 (= slot₄/X (call core.TypeVar :X)) -20 (= slot₃/A (call core.TypeVar :A)) -21 (call core.typeof core.kwcall) -22 TestMod.f_kw_sparams -23 (call core.Typeof %₂₂) -24 slot₄/X -25 (call core.svec %₂₁ core.NamedTuple %₂₃ %₂₄) -26 slot₄/X -27 (call core.svec %₂₆) -28 SourceLocation::1:10 -29 (call core.svec %₂₅ %₂₇ %₂₈) -30 --- method core.nothing %₂₉ +21 latestworld +22 (= slot₄/X (call core.TypeVar :X)) +23 (= slot₃/A (call core.TypeVar :A)) +24 (call core.typeof core.kwcall) +25 TestMod.f_kw_sparams +26 (call core.Typeof %₂₅) +27 slot₄/X +28 (call core.svec %₂₄ core.NamedTuple %₂₆ %₂₇) +29 slot₄/X +30 (call core.svec %₂₉) +31 SourceLocation::1:10 +32 (call core.svec %₂₈ %₃₀ %₃₁) +33 --- method core.nothing %₃₂ slots: [slot₁/#self#(!read) slot₂/kws slot₃/#self# slot₄/x slot₅/kwtmp slot₆/a(!read) slot₇/b(!read)] 1 (newvar slot₆/a) 2 (newvar slot₇/b) @@ -1284,25 +1378,27 @@ end 33 TestMod.#f_kw_sparams#0 34 (call %₃₃ %₁₀ %₂₅ slot₃/#self# slot₄/x) 35 (return %₃₄) -31 (= slot₆/X (call core.TypeVar :X)) -32 (= slot₅/A (call core.TypeVar :A)) -33 TestMod.f_kw_sparams -34 (call core.Typeof %₃₃) -35 slot₆/X -36 (call core.svec %₃₄ %₃₅) -37 slot₆/X -38 (call core.svec %₃₇) -39 SourceLocation::1:10 -40 (call core.svec %₃₆ %₃₈ %₃₉) -41 --- method core.nothing %₄₀ +34 latestworld +35 (= slot₆/X (call core.TypeVar :X)) +36 (= slot₅/A (call core.TypeVar :A)) +37 TestMod.f_kw_sparams +38 (call core.Typeof %₃₇) +39 slot₆/X +40 (call core.svec %₃₈ %₃₉) +41 slot₆/X +42 (call core.svec %₄₁) +43 SourceLocation::1:10 +44 (call core.svec %₄₀ %₄₂ %₄₃) +45 --- method core.nothing %₄₄ slots: [slot₁/#self# slot₂/x] 1 TestMod.#f_kw_sparams#0 2 TestMod.a_def 3 TestMod.b_def 4 (call %₁ %₂ %₃ slot₁/#self# slot₂/x) 5 (return %₄) -42 TestMod.f_kw_sparams -43 (return %₄₂) +46 latestworld +47 TestMod.f_kw_sparams +48 (return %₄₇) ######################################## # Error: Static parameter which is unused in keyword body arg types @@ -1363,31 +1459,35 @@ end end #--------------------- 1 (method TestMod.f_only_generated) -2 (method TestMod.#f_only_generated@generator#0) -3 TestMod.#f_only_generated@generator#0 -4 (call core.Typeof %₃) -5 (call core.svec %₄ JuliaLowering.MacroContext core.Any core.Any core.Any) -6 (call core.svec) -7 SourceLocation::1:21 -8 (call core.svec %₅ %₆ %₇) -9 --- method core.nothing %₈ +2 latestworld +3 (method TestMod.#f_only_generated@generator#0) +4 latestworld +5 TestMod.#f_only_generated@generator#0 +6 (call core.Typeof %₅) +7 (call core.svec %₆ JuliaLowering.MacroContext core.Any core.Any core.Any) +8 (call core.svec) +9 SourceLocation::1:21 +10 (call core.svec %₇ %₈ %₉) +11 --- method core.nothing %₁₀ slots: [slot₁/#self#(!read) slot₂/__context__(!read) slot₃/#self#(!read) slot₄/x(nospecialize) slot₅/y(nospecialize)] 1 TestMod.generator_code 2 (call %₁ slot₄/x slot₅/y) 3 (return %₂) -10 TestMod.f_only_generated -11 (call core.Typeof %₁₀) -12 (call core.svec %₁₁ core.Any core.Any) -13 (call core.svec) -14 SourceLocation::1:21 -15 (call core.svec %₁₂ %₁₃ %₁₄) -16 --- method core.nothing %₁₅ +12 latestworld +13 TestMod.f_only_generated +14 (call core.Typeof %₁₃) +15 (call core.svec %₁₄ core.Any core.Any) +16 (call core.svec) +17 SourceLocation::1:21 +18 (call core.svec %₁₅ %₁₆ %₁₇) +19 --- method core.nothing %₁₈ slots: [slot₁/#self#(!read) slot₂/x(!read) slot₃/y(!read)] 1 (meta :generated (new JuliaLowering.GeneratedFunctionStub TestMod.#f_only_generated@generator#0 SourceRef(SourceFile("@generated function f_only_generated(x, y)\n generator_code(x,y)\nend", 0, nothing, 1, [1, 44, 68, 71]), 1, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"macrocall", 0x0000), 0x00000046, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"@", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"MacroName", 0x0000), 0x00000009, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"function", 0x0000), 0x0000003b, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"function", 0x0001), 0x00000008, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"call", 0x0000), 0x00000016, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000010, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"(", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K",", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K")", 0x0001), 0x00000001, nothing)]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"block", 0x0000), 0x00000019, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"NewlineWs", 0x0001), 0x00000005, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"call", 0x0000), 0x00000013, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x0000000e, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"(", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K",", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K")", 0x0001), 0x00000001, nothing)]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"NewlineWs", 0x0001), 0x00000001, nothing)]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"end", 0x0001), 0x00000003, nothing)])])) (call core.svec :#self# :x :y) (call core.svec))) 2 (meta :generated_only) 3 (return core.nothing) -17 TestMod.f_only_generated -18 (return %₁₇) +20 latestworld +21 TestMod.f_only_generated +22 (return %₂₁) ######################################## # Partially generated function with `if @generated` @@ -1404,26 +1504,29 @@ function f_partially_generated(x, y) end #--------------------- 1 (method TestMod.f_partially_generated) -2 (method TestMod.#f_partially_generated@generator#0) -3 TestMod.#f_partially_generated@generator#0 -4 (call core.Typeof %₃) -5 (call core.svec %₄ JuliaLowering.MacroContext core.Any core.Any core.Any) -6 (call core.svec) -7 SourceLocation::1:10 -8 (call core.svec %₅ %₆ %₇) -9 --- method core.nothing %₈ +2 latestworld +3 (method TestMod.#f_partially_generated@generator#0) +4 latestworld +5 TestMod.#f_partially_generated@generator#0 +6 (call core.Typeof %₅) +7 (call core.svec %₆ JuliaLowering.MacroContext core.Any core.Any core.Any) +8 (call core.svec) +9 SourceLocation::1:10 +10 (call core.svec %₇ %₈ %₉) +11 --- method core.nothing %₁₀ slots: [slot₁/#self#(!read) slot₂/__context__(!read) slot₃/#self#(!read) slot₄/x(nospecialize,!read) slot₅/y(nospecialize,!read)] 1 (call JuliaLowering.interpolate_ast (inert (block (= maybe_gen_stuff (call some_gen_stuff x y))))) 2 (call core.tuple %₁) 3 (call JuliaLowering.interpolate_ast (inert (block (block (= nongen_stuff (call bothgen x y)) ($ (block (call JuliaLowering.interpolate_ast (inert (block (= maybe_gen_stuff (call some_gen_stuff x y))))))) (tuple-p nongen_stuff maybe_gen_stuff)))) %₂) 4 (return %₃) -10 TestMod.f_partially_generated -11 (call core.Typeof %₁₀) -12 (call core.svec %₁₁ core.Any core.Any) -13 (call core.svec) -14 SourceLocation::1:10 -15 (call core.svec %₁₂ %₁₃ %₁₄) -16 --- method core.nothing %₁₅ +12 latestworld +13 TestMod.f_partially_generated +14 (call core.Typeof %₁₃) +15 (call core.svec %₁₄ core.Any core.Any) +16 (call core.svec) +17 SourceLocation::1:10 +18 (call core.svec %₁₅ %₁₆ %₁₇) +19 --- method core.nothing %₁₈ slots: [slot₁/#self#(!read) slot₂/x slot₃/y slot₄/maybe_gen_stuff slot₅/nongen_stuff] 1 (meta :generated (new JuliaLowering.GeneratedFunctionStub TestMod.#f_partially_generated@generator#0 SourceRef(SourceFile("function f_partially_generated(x, y)\n nongen_stuff = bothgen(x, y)\n if @generated\n quote\n maybe_gen_stuff = some_gen_stuff(x, y)\n end\n else\n maybe_gen_stuff = some_nongen_stuff(x, y)\n end\n (nongen_stuff, maybe_gen_stuff)\nend", 0, nothing, 1, [1, 38, 71, 89, 103, 154, 166, 175, 225, 233, 269, 272]), 1, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"function", 0x0000), 0x0000010f, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"function", 0x0001), 0x00000008, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"call", 0x0000), 0x0000001b, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000015, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"(", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K",", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K")", 0x0001), 0x00000001, nothing)]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"block", 0x0000), 0x000000e8, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"NewlineWs", 0x0001), 0x00000005, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"=", 0x0000), 0x0000001c, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x0000000c, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"=", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"call", 0x0000), 0x0000000d, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000007, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"(", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K",", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K")", 0x0001), 0x00000001, nothing)])]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"NewlineWs", 0x0001), 0x00000005, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"if", 0x0000), 0x0000009d, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"if", 0x0001), 0x00000002, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"macrocall", 0x0000), 0x0000000a, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"@", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"MacroName", 0x0000), 0x00000009, nothing)]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"block", 0x0000), 0x00000052, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"NewlineWs", 0x0001), 0x00000009, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"quote", 0x0000), 0x00000044, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"block", 0x0000), 0x00000044, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"quote", 0x0001), 0x00000005, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"NewlineWs", 0x0001), 0x0000000d, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"=", 0x0000), 0x00000026, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x0000000f, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"=", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"call", 0x0000), 0x00000014, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x0000000e, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"(", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K",", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K")", 0x0001), 0x00000001, nothing)])]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"NewlineWs", 0x0001), 0x00000009, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"end", 0x0001), 0x00000003, nothing)])]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"NewlineWs", 0x0001), 0x00000005, nothing)]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"else", 0x0001), 0x00000004, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"block", 0x0000), 0x00000037, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"NewlineWs", 0x0001), 0x00000009, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"=", 0x0000), 0x00000029, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x0000000f, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"=", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"call", 0x0000), 0x00000017, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000011, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"(", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K",", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K")", 0x0001), 0x00000001, nothing)])]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"NewlineWs", 0x0001), 0x00000005, nothing)]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"end", 0x0001), 0x00000003, nothing)]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"NewlineWs", 0x0001), 0x00000005, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"tuple", 0x0020), 0x0000001f, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"(", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x0000000c, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K",", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x0000000f, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K")", 0x0001), 0x00000001, nothing)]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"NewlineWs", 0x0001), 0x00000001, nothing)]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"end", 0x0001), 0x00000003, nothing)])) (call core.svec :#self# :x :y) (call core.svec))) 2 TestMod.bothgen @@ -1434,6 +1537,7 @@ end 7 slot₄/maybe_gen_stuff 8 (call core.tuple %₆ %₇) 9 (return %₈) -17 TestMod.f_partially_generated -18 (return %₁₇) +20 latestworld +21 TestMod.f_partially_generated +22 (return %₂₁) diff --git a/JuliaLowering/test/generators_ir.jl b/JuliaLowering/test/generators_ir.jl index 22b2a3ab01f85..7ab7e18257e10 100644 --- a/JuliaLowering/test/generators_ir.jl +++ b/JuliaLowering/test/generators_ir.jl @@ -5,21 +5,23 @@ 1 (call core.svec) 2 (call core.svec) 3 (call JuliaLowering.eval_closure_type TestMod :#->##0 %₁ %₂) -4 TestMod.#->##0 -5 (call core.svec %₄ core.Any) -6 (call core.svec) -7 SourceLocation::1:2 -8 (call core.svec %₅ %₆ %₇) -9 --- method core.nothing %₈ +4 latestworld +5 TestMod.#->##0 +6 (call core.svec %₅ core.Any) +7 (call core.svec) +8 SourceLocation::1:2 +9 (call core.svec %₆ %₇ %₈) +10 --- method core.nothing %₉ slots: [slot₁/#self#(!read) slot₂/x] 1 TestMod.+ 2 (call %₁ slot₂/x 1) 3 (return %₂) -10 TestMod.#->##0 -11 (new %₁₀) -12 TestMod.xs -13 (call top.Generator %₁₁ %₁₂) -14 (return %₁₃) +11 latestworld +12 TestMod.#->##0 +13 (new %₁₂) +14 TestMod.xs +15 (call top.Generator %₁₃ %₁₄) +16 (return %₁₅) ######################################## # Product iteration @@ -28,12 +30,13 @@ 1 (call core.svec) 2 (call core.svec) 3 (call JuliaLowering.eval_closure_type TestMod :#->##1 %₁ %₂) -4 TestMod.#->##1 -5 (call core.svec %₄ core.Any) -6 (call core.svec) -7 SourceLocation::1:2 -8 (call core.svec %₅ %₆ %₇) -9 --- method core.nothing %₈ +4 latestworld +5 TestMod.#->##1 +6 (call core.svec %₅ core.Any) +7 (call core.svec) +8 SourceLocation::1:2 +9 (call core.svec %₆ %₇ %₈) +10 --- method core.nothing %₉ slots: [slot₁/#self#(!read) slot₂/destructured_arg slot₃/iterstate slot₄/x slot₅/y] 1 (call top.indexed_iterate slot₂/destructured_arg 1) 2 (= slot₄/x (call core.getfield %₁ 1)) @@ -46,13 +49,14 @@ 9 slot₅/y 10 (call %₇ %₈ %₉) 11 (return %₁₀) -10 TestMod.#->##1 -11 (new %₁₀) -12 TestMod.xs -13 TestMod.ys -14 (call top.product %₁₂ %₁₃) -15 (call top.Generator %₁₁ %₁₄) -16 (return %₁₅) +11 latestworld +12 TestMod.#->##1 +13 (new %₁₂) +14 TestMod.xs +15 TestMod.ys +16 (call top.product %₁₄ %₁₅) +17 (call top.Generator %₁₃ %₁₆) +18 (return %₁₇) ######################################## # Use `identity` as the Generator function when possible eg in filters @@ -61,12 +65,13 @@ 1 (call core.svec) 2 (call core.svec) 3 (call JuliaLowering.eval_closure_type TestMod :#->##2 %₁ %₂) -4 TestMod.#->##2 -5 (call core.svec %₄ core.Any) -6 (call core.svec) -7 SourceLocation::1:29 -8 (call core.svec %₅ %₆ %₇) -9 --- method core.nothing %₈ +4 latestworld +5 TestMod.#->##2 +6 (call core.svec %₅ core.Any) +7 (call core.svec) +8 SourceLocation::1:29 +9 (call core.svec %₆ %₇ %₈) +10 --- method core.nothing %₉ slots: [slot₁/#self#(!read) slot₂/destructured_arg slot₃/iterstate slot₄/x slot₅/y(!read)] 1 (call top.indexed_iterate slot₂/destructured_arg 1) 2 (= slot₄/x (call core.getfield %₁ 1)) @@ -78,12 +83,13 @@ 8 slot₄/x 9 (call %₇ %₈) 10 (return %₉) -10 TestMod.#->##2 -11 (new %₁₀) -12 TestMod.iter -13 (call top.Filter %₁₁ %₁₂) -14 (call top.Generator top.identity %₁₃) -15 (return %₁₄) +11 latestworld +12 TestMod.#->##2 +13 (new %₁₂) +14 TestMod.iter +15 (call top.Filter %₁₃ %₁₄) +16 (call top.Generator top.identity %₁₅) +17 (return %₁₆) ######################################## # Use of placeholders in iteration vars @@ -92,19 +98,21 @@ 1 (call core.svec) 2 (call core.svec) 3 (call JuliaLowering.eval_closure_type TestMod :#->##3 %₁ %₂) -4 TestMod.#->##3 -5 (call core.svec %₄ core.Any) -6 (call core.svec) -7 SourceLocation::1:2 -8 (call core.svec %₅ %₆ %₇) -9 --- method core.nothing %₈ +4 latestworld +5 TestMod.#->##3 +6 (call core.svec %₅ core.Any) +7 (call core.svec) +8 SourceLocation::1:2 +9 (call core.svec %₆ %₇ %₈) +10 --- method core.nothing %₉ slots: [slot₁/#self#(!read) slot₂/_(!read)] 1 (return 1) -10 TestMod.#->##3 -11 (new %₁₀) -12 TestMod.xs -13 (call top.Generator %₁₁ %₁₂) -14 (return %₁₃) +11 latestworld +12 TestMod.#->##3 +13 (new %₁₂) +14 TestMod.xs +15 (call top.Generator %₁₃ %₁₄) +16 (return %₁₅) ######################################## # Error: Use of placeholders in body @@ -121,12 +129,13 @@ LoweringError: 1 (call core.svec) 2 (call core.svec) 3 (call JuliaLowering.eval_closure_type TestMod :#->##5 %₁ %₂) -4 TestMod.#->##5 -5 (call core.svec %₄ core.Any) -6 (call core.svec) -7 SourceLocation::1:2 -8 (call core.svec %₅ %₆ %₇) -9 --- method core.nothing %₈ +4 latestworld +5 TestMod.#->##5 +6 (call core.svec %₅ core.Any) +7 (call core.svec) +8 SourceLocation::1:2 +9 (call core.svec %₆ %₇ %₈) +10 --- method core.nothing %₉ slots: [slot₁/#self#(!read) slot₂/destructured_arg slot₃/iterstate slot₄/x(!read) slot₅/y(!read)] 1 (call top.indexed_iterate slot₂/destructured_arg 1) 2 (= slot₄/x (call core.getfield %₁ 1)) @@ -140,11 +149,12 @@ LoweringError: 10 (= slot₅/y (call core.getfield %₉ 1)) 11 TestMod.body 12 (return %₁₁) -10 TestMod.#->##5 -11 (new %₁₀) -12 TestMod.iter -13 (call top.Generator %₁₁ %₁₂) -14 (return %₁₃) +11 latestworld +12 TestMod.#->##5 +13 (new %₁₂) +14 TestMod.iter +15 (call top.Generator %₁₃ %₁₄) +16 (return %₁₅) ######################################## # return permitted in quoted syntax in generator @@ -153,20 +163,22 @@ LoweringError: 1 (call core.svec) 2 (call core.svec) 3 (call JuliaLowering.eval_closure_type TestMod :#->##6 %₁ %₂) -4 TestMod.#->##6 -5 (call core.svec %₄ core.Any) -6 (call core.svec) -7 SourceLocation::1:4 -8 (call core.svec %₅ %₆ %₇) -9 --- method core.nothing %₈ +4 latestworld +5 TestMod.#->##6 +6 (call core.svec %₅ core.Any) +7 (call core.svec) +8 SourceLocation::1:4 +9 (call core.svec %₆ %₇ %₈) +10 --- method core.nothing %₉ slots: [slot₁/#self#(!read) slot₂/_(!read)] 1 (call JuliaLowering.interpolate_ast (inert (return x))) 2 (return %₁) -10 TestMod.#->##6 -11 (new %₁₀) -12 TestMod.iter -13 (call top.Generator %₁₁ %₁₂) -14 (return %₁₃) +11 latestworld +12 TestMod.#->##6 +13 (new %₁₂) +14 TestMod.iter +15 (call top.Generator %₁₃ %₁₄) +16 (return %₁₅) ######################################## # Error: `return` not permitted in generator body @@ -183,26 +195,29 @@ LoweringError: 1 (call core.svec) 2 (call core.svec) 3 (call JuliaLowering.eval_closure_type TestMod :#->##7 %₁ %₂) -4 (call core.svec) +4 latestworld 5 (call core.svec) -6 (call JuliaLowering.eval_closure_type TestMod :#->#->##0 %₄ %₅) -7 TestMod.#->#->##0 -8 (call core.svec %₇ core.Any) -9 (call core.svec) -10 SourceLocation::1:2 -11 (call core.svec %₈ %₉ %₁₀) -12 --- method core.nothing %₁₁ +6 (call core.svec) +7 (call JuliaLowering.eval_closure_type TestMod :#->#->##0 %₅ %₆) +8 latestworld +9 TestMod.#->#->##0 +10 (call core.svec %₉ core.Any) +11 (call core.svec) +12 SourceLocation::1:2 +13 (call core.svec %₁₀ %₁₁ %₁₂) +14 --- method core.nothing %₁₃ slots: [slot₁/#self#(!read) slot₂/x slot₃/x] 1 slot₂/x 2 (= slot₃/x %₁) 3 slot₃/x 4 (return %₃) -13 TestMod.#->##7 -14 (call core.svec %₁₃ core.Any) -15 (call core.svec) -16 SourceLocation::1:2 -17 (call core.svec %₁₄ %₁₅ %₁₆) -18 --- method core.nothing %₁₇ +15 latestworld +16 TestMod.#->##7 +17 (call core.svec %₁₆ core.Any) +18 (call core.svec) +19 SourceLocation::1:2 +20 (call core.svec %₁₇ %₁₈ %₁₉) +21 --- method core.nothing %₂₀ slots: [slot₁/#self#(!read) slot₂/x(!read)] 1 TestMod.#->#->##0 2 (new %₁) @@ -210,13 +225,14 @@ LoweringError: 4 (call %₃ 1 2) 5 (call top.Generator %₂ %₄) 6 (return %₅) -19 TestMod.#->##7 -20 (new %₁₉) -21 TestMod.: -22 (call %₂₁ 1 3) -23 (call top.Generator %₂₀ %₂₂) -24 (call top.Flatten %₂₃) -25 (return %₂₄) +22 latestworld +23 TestMod.#->##7 +24 (new %₂₃) +25 TestMod.: +26 (call %₂₅ 1 3) +27 (call top.Generator %₂₄ %₂₆) +28 (call top.Flatten %₂₇) +29 (return %₂₈) ######################################## # Comprehension lowers to generator with collect diff --git a/JuliaLowering/test/macros_ir.jl b/JuliaLowering/test/macros_ir.jl index e1c6460cb1bb1..65de7733c69ba 100644 --- a/JuliaLowering/test/macros_ir.jl +++ b/JuliaLowering/test/macros_ir.jl @@ -25,19 +25,21 @@ macro add_one(ex) end #--------------------- 1 (method TestMod.@add_one) -2 TestMod.@add_one -3 (call core.Typeof %₂) -4 (call core.svec %₃ JuliaLowering.MacroContext core.Any) -5 (call core.svec) -6 SourceLocation::1:7 -7 (call core.svec %₄ %₅ %₆) -8 --- method core.nothing %₇ +2 latestworld +3 TestMod.@add_one +4 (call core.Typeof %₃) +5 (call core.svec %₄ JuliaLowering.MacroContext core.Any) +6 (call core.svec) +7 SourceLocation::1:7 +8 (call core.svec %₅ %₆ %₇) +9 --- method core.nothing %₈ slots: [slot₁/#self#(!read) slot₂/__context__(!read) slot₃/ex] 1 (call core.tuple slot₃/ex) 2 (call JuliaLowering.interpolate_ast (inert (block (call-i ($ ex) + 1))) %₁) 3 (return %₂) -9 TestMod.@add_one -10 (return %₉) +10 latestworld +11 TestMod.@add_one +12 (return %₁₁) ######################################## # Macro using `__context__` @@ -46,19 +48,21 @@ macro foo(ex) end #--------------------- 1 (method TestMod.@foo) -2 TestMod.@foo -3 (call core.Typeof %₂) -4 (call core.svec %₃ JuliaLowering.MacroContext core.Any) -5 (call core.svec) -6 SourceLocation::1:7 -7 (call core.svec %₄ %₅ %₆) -8 --- method core.nothing %₇ +2 latestworld +3 TestMod.@foo +4 (call core.Typeof %₃) +5 (call core.svec %₄ JuliaLowering.MacroContext core.Any) +6 (call core.svec) +7 SourceLocation::1:7 +8 (call core.svec %₅ %₆ %₇) +9 --- method core.nothing %₈ slots: [slot₁/#self#(!read) slot₂/__context__ slot₃/ex(!read) slot₄/ctx(!read)] 1 slot₂/__context__ 2 (= slot₄/ctx %₁) 3 (return %₁) -9 TestMod.@foo -10 (return %₉) +10 latestworld +11 TestMod.@foo +12 (return %₁₁) ######################################## # Scope for symbols emitted by macros is the module where the method was diff --git a/JuliaLowering/test/misc.jl b/JuliaLowering/test/misc.jl index 90bc32cc45f48..c391f0cce16b6 100644 --- a/JuliaLowering/test/misc.jl +++ b/JuliaLowering/test/misc.jl @@ -46,4 +46,12 @@ cf_float = JuliaLowering.include_string(test_mod, """ """) @test @ccall($cf_float(2::Float64, 3::Float64)::Float64) == 32.0 +@testset "CodeInfo: has_image_globalref" begin + elower(mod, s) = JuliaLowering.to_lowered_expr( + mod, JuliaLowering.lower( + mod, parsestmt(JuliaLowering.SyntaxTree, s))) + @test elower(test_mod, "x + y").args[1].has_image_globalref === false + @test elower(Main, "x + y").args[1].has_image_globalref === true +end + end diff --git a/JuliaLowering/test/misc_ir.jl b/JuliaLowering/test/misc_ir.jl index d073cf97ffe54..15fa0229e595a 100644 --- a/JuliaLowering/test/misc_ir.jl +++ b/JuliaLowering/test/misc_ir.jl @@ -390,3 +390,12 @@ LoweringError: x... └──┘ ── `...` expression outside call +######################################## +# `include` should increment world age +include("hi.jl") +#--------------------- +1 TestMod.include +2 (call %₁ "hi.jl") +3 latestworld +4 (return %₂) + diff --git a/JuliaLowering/test/modules.jl b/JuliaLowering/test/modules.jl index 66595ee6345c4..97cb536f51437 100644 --- a/JuliaLowering/test/modules.jl +++ b/JuliaLowering/test/modules.jl @@ -13,7 +13,8 @@ end @test A.g() == "hi" @test A.include isa Function @test A.Base === Base -@test A.eval(:(x = -1)) == -1 && A.x == -1 +@test A.eval(:(x = -1)) == -1 +@test A.x == -1 B = JuliaLowering.include_string(test_mod, """ baremodule B diff --git a/JuliaLowering/test/quoting.jl b/JuliaLowering/test/quoting.jl index 0d747654a9e86..32b917304344f 100644 --- a/JuliaLowering/test/quoting.jl +++ b/JuliaLowering/test/quoting.jl @@ -47,6 +47,22 @@ end end end # @ string:5""" +@test sprint(io->showprov(io, ex[1][3]; note="foo")) == raw""" + begin + x = 10 + y = :(g(z)) + # └──┘ ── foo + quote + f($(x+1), $y) + # @ string:3 + + y = :(g(z)) + quote + f($(x+1), $y) + # └┘ ── foo + end + end + # @ string:5""" # Test expression flags are preserved during interpolation diff --git a/JuliaLowering/test/scopes_ir.jl b/JuliaLowering/test/scopes_ir.jl index c0013a3963775..da1f3529fc8d4 100644 --- a/JuliaLowering/test/scopes_ir.jl +++ b/JuliaLowering/test/scopes_ir.jl @@ -73,19 +73,21 @@ end 1 (call core.svec) 2 (call core.svec) 3 (call JuliaLowering.eval_closure_type TestMod :#f##0 %₁ %₂) -4 TestMod.#f##0 -5 (new %₄) -6 (= slot₁/f %₅) -7 TestMod.#f##0 -8 (call core.svec %₇) -9 (call core.svec) -10 SourceLocation::1:5 -11 (call core.svec %₈ %₉ %₁₀) -12 --- method core.nothing %₁₁ +4 latestworld +5 TestMod.#f##0 +6 (new %₅) +7 (= slot₁/f %₆) +8 TestMod.#f##0 +9 (call core.svec %₈) +10 (call core.svec) +11 SourceLocation::1:5 +12 (call core.svec %₉ %₁₀ %₁₁) +13 --- method core.nothing %₁₂ slots: [slot₁/#self#(!read)] 1 TestMod.body 2 (return %₁) -13 (return core.nothing) +14 latestworld +15 (return core.nothing) ######################################## # Error: Invalid `let` var with K"::" @@ -142,18 +144,20 @@ end 3 slot₁/y 4 (call core.setfield! %₃ :contents %₂) 5 (method TestMod.f) -6 TestMod.f -7 (call core.Typeof %₆) -8 (call core.svec %₇ core.Any) -9 (call core.svec) -10 SourceLocation::3:14 -11 (call core.svec %₈ %₉ %₁₀) -12 --- method core.nothing %₁₁ +6 latestworld +7 TestMod.f +8 (call core.Typeof %₇) +9 (call core.svec %₈ core.Any) +10 (call core.svec) +11 SourceLocation::3:14 +12 (call core.svec %₉ %₁₀ %₁₁) +13 --- method core.nothing %₁₂ slots: [slot₁/#self#(!read) slot₂/x(!read)] 1 (call core.tuple false true true) 2 (return %₁) -13 TestMod.f -14 (return %₁₃) +14 latestworld +15 TestMod.f +16 (return %₁₅) ######################################## # @islocal with global @@ -163,7 +167,8 @@ begin end #--------------------- 1 (global TestMod.x) -2 (return false) +2 latestworld +3 (return false) ######################################## # @locals with local and global @@ -175,13 +180,14 @@ end #--------------------- 1 (newvar slot₁/y) 2 (global TestMod.x) -3 (call core.apply_type top.Dict core.Symbol core.Any) -4 (call %₃) -5 (isdefined slot₁/y) -6 (gotoifnot %₅ label₉) -7 slot₁/y -8 (call top.setindex! %₄ %₇ :y) -9 (return %₄) +3 latestworld +4 (call core.apply_type top.Dict core.Symbol core.Any) +5 (call %₄) +6 (isdefined slot₁/y) +7 (gotoifnot %₆ label₁₀) +8 slot₁/y +9 (call top.setindex! %₅ %₈ :y) +10 (return %₅) ######################################## # @locals with function args (TODO: static parameters) @@ -190,21 +196,23 @@ function f(z) end #--------------------- 1 (method TestMod.f) -2 TestMod.f -3 (call core.Typeof %₂) -4 (call core.svec %₃ core.Any) -5 (call core.svec) -6 SourceLocation::1:10 -7 (call core.svec %₄ %₅ %₆) -8 --- method core.nothing %₇ +2 latestworld +3 TestMod.f +4 (call core.Typeof %₃) +5 (call core.svec %₄ core.Any) +6 (call core.svec) +7 SourceLocation::1:10 +8 (call core.svec %₅ %₆ %₇) +9 --- method core.nothing %₈ slots: [slot₁/#self#(!read) slot₂/z] 1 (call core.apply_type top.Dict core.Symbol core.Any) 2 (call %₁) 3 (gotoifnot true label₅) 4 (call top.setindex! %₂ slot₂/z :z) 5 (return %₂) -9 TestMod.f -10 (return %₉) +10 latestworld +11 TestMod.f +12 (return %₁₁) ######################################## # Error: Duplicate function argument names @@ -267,7 +275,7 @@ LoweringError: let local x global x -# └──────┘ ── Variable `x` declared both local and global +# ╙ ── Variable `x` declared both local and global end ######################################## @@ -279,7 +287,7 @@ end LoweringError: function f(x) local x -# └─────┘ ── local variable name `x` conflicts with an argument +# ╙ ── local variable name `x` conflicts with an argument end ######################################## @@ -291,7 +299,7 @@ end LoweringError: function f(x) global x -# └──────┘ ── global variable name `x` conflicts with an argument +# ╙ ── global variable name `x` conflicts with an argument end ######################################## @@ -304,7 +312,7 @@ end LoweringError: function f((x,)) global x -# └──────┘ ── Variable `x` declared both local and global +# ╙ ── Variable `x` declared both local and global end ######################################## @@ -316,7 +324,7 @@ end LoweringError: function f(::T) where T local T -# └─────┘ ── local variable name `T` conflicts with a static parameter +# ╙ ── local variable name `T` conflicts with a static parameter end ######################################## @@ -328,7 +336,7 @@ end LoweringError: function f(::T) where T global T -# └──────┘ ── global variable name `T` conflicts with a static parameter +# ╙ ── global variable name `T` conflicts with a static parameter end ######################################## @@ -343,7 +351,7 @@ LoweringError: function f(::T) where T let local T -# └─────┘ ── local variable name `T` conflicts with a static parameter +# ╙ ── local variable name `T` conflicts with a static parameter end end @@ -359,7 +367,7 @@ LoweringError: function f(::T) where T let global T -# └──────┘ ── global variable name `T` conflicts with a static parameter +# ╙ ── global variable name `T` conflicts with a static parameter end end @@ -418,6 +426,6 @@ end #--------------------- 1 1 2 (= slot₁/x %₁) -3 (isdefined TestMod.y) +3 (call core.isdefinedglobal TestMod :y false) 4 (return %₃) diff --git a/JuliaLowering/test/typedefs.jl b/JuliaLowering/test/typedefs.jl index d2f0594b24eac..be509eeb88156 100644 --- a/JuliaLowering/test/typedefs.jl +++ b/JuliaLowering/test/typedefs.jl @@ -251,12 +251,13 @@ end """) @test fieldtypes(test_mod.M36104.T36104) == (Vector{test_mod.M36104.T36104},) @test_throws ErrorException("expected") JuliaLowering.include_string(test_mod, """struct X36104; x::error("expected"); end""") -@test isdefined(test_mod, :X36104) +@test !isdefined(test_mod, :X36104) JuliaLowering.include_string(test_mod, "struct X36104; x::Int; end") @test fieldtypes(test_mod.X36104) == (Int,) JuliaLowering.include_string(test_mod, "primitive type P36104 8 end") -@test_throws ErrorException("invalid redefinition of constant TestMod.P36104") #= - =# JuliaLowering.include_string(test_mod, "primitive type P36104 16 end") +JuliaLowering.include_string(test_mod, "const orig_P36104 = P36104") +JuliaLowering.include_string(test_mod, "primitive type P36104 16 end") +@test test_mod.P36104 !== test_mod.orig_P36104 # Struct with outer constructor where one typevar is constrained by the other # See https://github.com/JuliaLang/julia/issues/27269) diff --git a/JuliaLowering/test/typedefs_ir.jl b/JuliaLowering/test/typedefs_ir.jl index 14f0b1cc0f7f4..0785a8f21ce93 100644 --- a/JuliaLowering/test/typedefs_ir.jl +++ b/JuliaLowering/test/typedefs_ir.jl @@ -191,17 +191,19 @@ abstract type A end 2 (call core._abstracttype TestMod :A %₁) 3 (= slot₁/A %₂) 4 (call core._setsuper! %₂ core.Any) -5 (call core._typebody! %₂) -6 (global TestMod.A) -7 (const TestMod.A) -8 (isdefined TestMod.A) -9 (gotoifnot %₈ label₁₄) -10 TestMod.A -11 (call core._equiv_typedef %₁₀ %₂) -12 (gotoifnot %₁₁ label₁₄) -13 (goto label₁₅) -14 (= TestMod.A %₂) -15 (return core.nothing) +5 slot₁/A +6 (call core._typebody! false %₅) +7 (global TestMod.A) +8 latestworld +9 (call core.isdefinedglobal TestMod :A false) +10 (gotoifnot %₉ label₁₅) +11 TestMod.A +12 (call core._equiv_typedef %₁₁ %₂) +13 (gotoifnot %₁₂ label₁₅) +14 (goto label₁₇) +15 (constdecl TestMod.A %₂) +16 latestworld +17 (return core.nothing) ######################################## # Abstract type definition with supertype @@ -212,17 +214,19 @@ abstract type A <: B end 3 (= slot₁/A %₂) 4 TestMod.B 5 (call core._setsuper! %₂ %₄) -6 (call core._typebody! %₂) -7 (global TestMod.A) -8 (const TestMod.A) -9 (isdefined TestMod.A) -10 (gotoifnot %₉ label₁₅) -11 TestMod.A -12 (call core._equiv_typedef %₁₁ %₂) -13 (gotoifnot %₁₂ label₁₅) -14 (goto label₁₆) -15 (= TestMod.A %₂) -16 (return core.nothing) +6 slot₁/A +7 (call core._typebody! false %₆) +8 (global TestMod.A) +9 latestworld +10 (call core.isdefinedglobal TestMod :A false) +11 (gotoifnot %₁₀ label₁₆) +12 TestMod.A +13 (call core._equiv_typedef %₁₂ %₂) +14 (gotoifnot %₁₃ label₁₆) +15 (goto label₁₈) +16 (constdecl TestMod.A %₂) +17 latestworld +18 (return core.nothing) ######################################## # Abstract type definition with multiple typevars @@ -237,17 +241,19 @@ abstract type A{X, Y <: X} end 7 (call core._abstracttype TestMod :A %₆) 8 (= slot₁/A %₇) 9 (call core._setsuper! %₇ core.Any) -10 (call core._typebody! %₇) -11 (global TestMod.A) -12 (const TestMod.A) -13 (isdefined TestMod.A) -14 (gotoifnot %₁₃ label₁₉) -15 TestMod.A -16 (call core._equiv_typedef %₁₅ %₇) -17 (gotoifnot %₁₆ label₁₉) -18 (goto label₂₀) -19 (= TestMod.A %₇) -20 (return core.nothing) +10 slot₁/A +11 (call core._typebody! false %₁₀) +12 (global TestMod.A) +13 latestworld +14 (call core.isdefinedglobal TestMod :A false) +15 (gotoifnot %₁₄ label₂₀) +16 TestMod.A +17 (call core._equiv_typedef %₁₆ %₇) +18 (gotoifnot %₁₇ label₂₀) +19 (goto label₂₂) +20 (constdecl TestMod.A %₇) +21 latestworld +22 (return core.nothing) ######################################## # Error: Abstract type definition with bad signature @@ -293,17 +299,19 @@ primitive type P 8 end 2 (call core._primitivetype TestMod :P %₁ 8) 3 (= slot₁/P %₂) 4 (call core._setsuper! %₂ core.Any) -5 (call core._typebody! %₂) -6 (global TestMod.P) -7 (const TestMod.P) -8 (isdefined TestMod.P) -9 (gotoifnot %₈ label₁₄) -10 TestMod.P -11 (call core._equiv_typedef %₁₀ %₂) -12 (gotoifnot %₁₁ label₁₄) -13 (goto label₁₅) -14 (= TestMod.P %₂) -15 (return core.nothing) +5 slot₁/P +6 (call core._typebody! false %₅) +7 (global TestMod.P) +8 latestworld +9 (call core.isdefinedglobal TestMod :P false) +10 (gotoifnot %₉ label₁₅) +11 TestMod.P +12 (call core._equiv_typedef %₁₁ %₂) +13 (gotoifnot %₁₂ label₁₅) +14 (goto label₁₇) +15 (constdecl TestMod.P %₂) +16 latestworld +17 (return core.nothing) ######################################## # Complex primitive type definition @@ -318,17 +326,19 @@ primitive type P{X,Y} <: Z 32 end 7 (= slot₁/P %₆) 8 TestMod.Z 9 (call core._setsuper! %₆ %₈) -10 (call core._typebody! %₆) -11 (global TestMod.P) -12 (const TestMod.P) -13 (isdefined TestMod.P) -14 (gotoifnot %₁₃ label₁₉) -15 TestMod.P -16 (call core._equiv_typedef %₁₅ %₆) -17 (gotoifnot %₁₆ label₁₉) -18 (goto label₂₀) -19 (= TestMod.P %₆) -20 (return core.nothing) +10 slot₁/P +11 (call core._typebody! false %₁₀) +12 (global TestMod.P) +13 latestworld +14 (call core.isdefinedglobal TestMod :P false) +15 (gotoifnot %₁₄ label₂₀) +16 TestMod.P +17 (call core._equiv_typedef %₁₆ %₆) +18 (gotoifnot %₁₇ label₂₀) +19 (goto label₂₂) +20 (constdecl TestMod.P %₆) +21 latestworld +22 (return core.nothing) ######################################## # Primitive type definition with computed size (should this be allowed??) @@ -340,17 +350,19 @@ primitive type P P_nbits() end 4 (call core._primitivetype TestMod :P %₁ %₃) 5 (= slot₁/P %₄) 6 (call core._setsuper! %₄ core.Any) -7 (call core._typebody! %₄) -8 (global TestMod.P) -9 (const TestMod.P) -10 (isdefined TestMod.P) -11 (gotoifnot %₁₀ label₁₆) -12 TestMod.P -13 (call core._equiv_typedef %₁₂ %₄) -14 (gotoifnot %₁₃ label₁₆) -15 (goto label₁₇) -16 (= TestMod.P %₄) -17 (return core.nothing) +7 slot₁/P +8 (call core._typebody! false %₇) +9 (global TestMod.P) +10 latestworld +11 (call core.isdefinedglobal TestMod :P false) +12 (gotoifnot %₁₁ label₁₇) +13 TestMod.P +14 (call core._equiv_typedef %₁₃ %₄) +15 (gotoifnot %₁₄ label₁₇) +16 (goto label₁₉) +17 (constdecl TestMod.P %₄) +18 latestworld +19 (return core.nothing) ######################################## # Empty struct @@ -358,41 +370,44 @@ struct X end #--------------------- 1 (global TestMod.X) -2 (const TestMod.X) +2 latestworld 3 (call core.svec) 4 (call core.svec) 5 (call core.svec) 6 (call core._structtype TestMod :X %₃ %₄ %₅ false 0) 7 (= slot₁/X %₆) 8 (call core._setsuper! %₆ core.Any) -9 (isdefined TestMod.X) -10 (gotoifnot %₉ label₂₀) +9 (call core.isdefinedglobal TestMod :X false) +10 (gotoifnot %₉ label₁₄) 11 TestMod.X -12 (call core._equiv_typedef %₁₁ %₆) -13 (gotoifnot %₁₂ label₁₇) -14 TestMod.X -15 (= slot₁/X %₁₄) -16 (goto label₁₉) -17 slot₁/X -18 (= TestMod.X %₁₇) -19 (goto label₂₂) -20 slot₁/X -21 (= TestMod.X %₂₀) -22 slot₁/X +12 (= slot₂/if_val (call core._equiv_typedef %₁₁ %₆)) +13 (goto label₁₅) +14 (= slot₂/if_val false) +15 slot₂/if_val +16 (gotoifnot %₁₅ label₂₀) +17 TestMod.X +18 (= slot₃/if_val %₁₇) +19 (goto label₂₁) +20 (= slot₃/if_val false) +21 slot₃/if_val +22 (gotoifnot %₁₅ label₂₃) 23 (call core.svec) -24 (call core._typebody! %₂₂ %₂₃) -25 TestMod.X -26 (call core.apply_type core.Type %₂₅) -27 (call core.svec %₂₆) -28 (call core.svec) -29 SourceLocation::1:1 -30 (call core.svec %₂₇ %₂₈ %₂₉) -31 --- method core.nothing %₃₀ +24 (call core._typebody! %₂₁ %₆ %₂₃) +25 (constdecl TestMod.X %₂₄) +26 latestworld +27 TestMod.X +28 (call core.apply_type core.Type %₂₇) +29 (call core.svec %₂₈) +30 (call core.svec) +31 SourceLocation::1:1 +32 (call core.svec %₂₉ %₃₀ %₃₁) +33 --- method core.nothing %₃₂ slots: [slot₁/#self#(!read)] 1 TestMod.X 2 (new %₁) 3 (return %₂) -32 (return core.nothing) +34 latestworld +35 (return core.nothing) ######################################## # Basic struct @@ -403,41 +418,43 @@ struct X end #--------------------- 1 (global TestMod.X) -2 (const TestMod.X) +2 latestworld 3 (call core.svec) 4 (call core.svec :a :b :c) 5 (call core.svec) 6 (call core._structtype TestMod :X %₃ %₄ %₅ false 3) 7 (= slot₁/X %₆) 8 (call core._setsuper! %₆ core.Any) -9 (isdefined TestMod.X) -10 (gotoifnot %₉ label₂₀) +9 (call core.isdefinedglobal TestMod :X false) +10 (gotoifnot %₉ label₁₄) 11 TestMod.X -12 (call core._equiv_typedef %₁₁ %₆) -13 (gotoifnot %₁₂ label₁₇) -14 TestMod.X -15 (= slot₁/X %₁₄) -16 (goto label₁₉) -17 slot₁/X -18 (= TestMod.X %₁₇) -19 (goto label₂₂) -20 slot₁/X -21 (= TestMod.X %₂₀) -22 slot₁/X +12 (= slot₂/if_val (call core._equiv_typedef %₁₁ %₆)) +13 (goto label₁₅) +14 (= slot₂/if_val false) +15 slot₂/if_val +16 (gotoifnot %₁₅ label₂₀) +17 TestMod.X +18 (= slot₃/if_val %₁₇) +19 (goto label₂₁) +20 (= slot₃/if_val false) +21 slot₃/if_val +22 (gotoifnot %₁₅ label₂₃) 23 TestMod.T 24 (call core.svec core.Any %₂₃ core.Any) -25 (call core._typebody! %₂₂ %₂₄) -26 TestMod.T -27 (call core.=== core.Any %₂₆) -28 (gotoifnot %₂₇ label₃₀) -29 (goto label₃₇) -30 TestMod.X -31 (call core.apply_type core.Type %₃₀) -32 (call core.svec %₃₁ core.Any core.Any core.Any) -33 (call core.svec) -34 SourceLocation::1:1 -35 (call core.svec %₃₂ %₃₃ %₃₄) -36 --- method core.nothing %₃₅ +25 (call core._typebody! %₂₁ %₆ %₂₄) +26 (constdecl TestMod.X %₂₅) +27 latestworld +28 TestMod.T +29 (call core.=== core.Any %₂₈) +30 (gotoifnot %₂₉ label₃₂) +31 (goto label₄₀) +32 TestMod.X +33 (call core.apply_type core.Type %₃₂) +34 (call core.svec %₃₃ core.Any core.Any core.Any) +35 (call core.svec) +36 SourceLocation::1:1 +37 (call core.svec %₃₄ %₃₅ %₃₆) +38 --- method core.nothing %₃₇ slots: [slot₁/#ctor-self# slot₂/a slot₃/b slot₄/c slot₅/tmp] 1 (call core.fieldtype slot₁/#ctor-self# 2) 2 slot₃/b @@ -451,19 +468,21 @@ end 10 slot₅/tmp 11 (new slot₁/#ctor-self# slot₂/a %₁₀ slot₄/c) 12 (return %₁₁) -37 TestMod.X -38 (call core.apply_type core.Type %₃₇) -39 TestMod.T -40 (call core.svec %₃₈ core.Any %₃₉ core.Any) -41 (call core.svec) -42 SourceLocation::1:1 -43 (call core.svec %₄₀ %₄₁ %₄₂) -44 --- method core.nothing %₄₃ +39 latestworld +40 TestMod.X +41 (call core.apply_type core.Type %₄₀) +42 TestMod.T +43 (call core.svec %₄₁ core.Any %₄₂ core.Any) +44 (call core.svec) +45 SourceLocation::1:1 +46 (call core.svec %₄₃ %₄₄ %₄₅) +47 --- method core.nothing %₄₆ slots: [slot₁/#self#(!read) slot₂/a slot₃/b slot₄/c] 1 TestMod.X 2 (new %₁ slot₂/a slot₃/b slot₄/c) 3 (return %₂) -45 (return core.nothing) +48 latestworld +49 (return core.nothing) ######################################## # Struct with supertype and type params @@ -471,7 +490,7 @@ struct X{U, S <: V <: T} <: Z end #--------------------- 1 (global TestMod.X) -2 (const TestMod.X) +2 latestworld 3 (= slot₂/U (call core.TypeVar :U)) 4 TestMod.S 5 TestMod.T @@ -485,50 +504,53 @@ end 13 (= slot₄/X %₁₂) 14 TestMod.Z 15 (call core._setsuper! %₁₂ %₁₄) -16 (isdefined TestMod.X) -17 (gotoifnot %₁₆ label₃₇) +16 (call core.isdefinedglobal TestMod :X false) +17 (gotoifnot %₁₆ label₂₁) 18 TestMod.X -19 (call core._equiv_typedef %₁₈ %₁₂) -20 (gotoifnot %₁₉ label₃₄) -21 TestMod.X -22 (= slot₄/X %₂₁) -23 TestMod.X -24 (call top.getproperty %₂₃ :body) -25 (call top.getproperty %₂₄ :body) -26 (call top.getproperty %₂₅ :parameters) -27 (call top.indexed_iterate %₂₆ 1) -28 (= slot₂/U (call core.getfield %₂₇ 1)) -29 (= slot₁/iterstate (call core.getfield %₂₇ 2)) -30 slot₁/iterstate -31 (call top.indexed_iterate %₂₆ 2 %₃₀) -32 (= slot₃/V (call core.getfield %₃₁ 1)) -33 (goto label₃₆) -34 slot₄/X -35 (= TestMod.X %₃₄) -36 (goto label₃₉) -37 slot₄/X -38 (= TestMod.X %₃₇) -39 slot₄/X +19 (= slot₅/if_val (call core._equiv_typedef %₁₈ %₁₂)) +20 (goto label₂₂) +21 (= slot₅/if_val false) +22 slot₅/if_val +23 (gotoifnot %₂₂ label₂₇) +24 TestMod.X +25 (= slot₆/if_val %₂₄) +26 (goto label₂₈) +27 (= slot₆/if_val false) +28 slot₆/if_val +29 (gotoifnot %₂₂ label₄₀) +30 TestMod.X +31 (call top.getproperty %₃₀ :body) +32 (call top.getproperty %₃₁ :body) +33 (call top.getproperty %₃₂ :parameters) +34 (call top.indexed_iterate %₃₃ 1) +35 (= slot₂/U (call core.getfield %₃₄ 1)) +36 (= slot₁/iterstate (call core.getfield %₃₄ 2)) +37 slot₁/iterstate +38 (call top.indexed_iterate %₃₃ 2 %₃₇) +39 (= slot₃/V (call core.getfield %₃₈ 1)) 40 (call core.svec) -41 (call core._typebody! %₃₉ %₄₀) -42 slot₂/U -43 slot₃/V -44 TestMod.X -45 slot₂/U -46 slot₃/V -47 (call core.apply_type %₄₄ %₄₅ %₄₆) -48 (call core.apply_type core.Type %₄₇) -49 (call core.UnionAll %₄₃ %₄₈) -50 (call core.UnionAll %₄₂ %₄₉) -51 (call core.svec %₅₀) -52 (call core.svec) -53 SourceLocation::1:1 -54 (call core.svec %₅₁ %₅₂ %₅₃) -55 --- method core.nothing %₅₄ +41 (call core._typebody! %₂₈ %₁₂ %₄₀) +42 (constdecl TestMod.X %₄₁) +43 latestworld +44 slot₂/U +45 slot₃/V +46 TestMod.X +47 slot₂/U +48 slot₃/V +49 (call core.apply_type %₄₆ %₄₇ %₄₈) +50 (call core.apply_type core.Type %₄₉) +51 (call core.UnionAll %₄₅ %₅₀) +52 (call core.UnionAll %₄₄ %₅₁) +53 (call core.svec %₅₂) +54 (call core.svec) +55 SourceLocation::1:1 +56 (call core.svec %₅₃ %₅₄ %₅₅) +57 --- method core.nothing %₅₆ slots: [slot₁/#ctor-self#] 1 (new slot₁/#ctor-self#) 2 (return %₁) -56 (return core.nothing) +58 latestworld +59 (return core.nothing) ######################################## # Struct with const and atomic fields @@ -539,41 +561,44 @@ struct X end #--------------------- 1 (global TestMod.X) -2 (const TestMod.X) +2 latestworld 3 (call core.svec) 4 (call core.svec :a :b :c) 5 (call core.svec 1 :const 2 :atomic 3 :atomic 3 :const) 6 (call core._structtype TestMod :X %₃ %₄ %₅ false 3) 7 (= slot₁/X %₆) 8 (call core._setsuper! %₆ core.Any) -9 (isdefined TestMod.X) -10 (gotoifnot %₉ label₂₀) +9 (call core.isdefinedglobal TestMod :X false) +10 (gotoifnot %₉ label₁₄) 11 TestMod.X -12 (call core._equiv_typedef %₁₁ %₆) -13 (gotoifnot %₁₂ label₁₇) -14 TestMod.X -15 (= slot₁/X %₁₄) -16 (goto label₁₉) -17 slot₁/X -18 (= TestMod.X %₁₇) -19 (goto label₂₂) -20 slot₁/X -21 (= TestMod.X %₂₀) -22 slot₁/X +12 (= slot₂/if_val (call core._equiv_typedef %₁₁ %₆)) +13 (goto label₁₅) +14 (= slot₂/if_val false) +15 slot₂/if_val +16 (gotoifnot %₁₅ label₂₀) +17 TestMod.X +18 (= slot₃/if_val %₁₇) +19 (goto label₂₁) +20 (= slot₃/if_val false) +21 slot₃/if_val +22 (gotoifnot %₁₅ label₂₃) 23 (call core.svec core.Any core.Any core.Any) -24 (call core._typebody! %₂₂ %₂₃) -25 TestMod.X -26 (call core.apply_type core.Type %₂₅) -27 (call core.svec %₂₆ core.Any core.Any core.Any) -28 (call core.svec) -29 SourceLocation::1:1 -30 (call core.svec %₂₇ %₂₈ %₂₉) -31 --- method core.nothing %₃₀ +24 (call core._typebody! %₂₁ %₆ %₂₃) +25 (constdecl TestMod.X %₂₄) +26 latestworld +27 TestMod.X +28 (call core.apply_type core.Type %₂₇) +29 (call core.svec %₂₈ core.Any core.Any core.Any) +30 (call core.svec) +31 SourceLocation::1:1 +32 (call core.svec %₂₉ %₃₀ %₃₁) +33 --- method core.nothing %₃₂ slots: [slot₁/#self#(!read) slot₂/a slot₃/b slot₄/c] 1 TestMod.X 2 (new %₁ slot₂/a slot₃/b slot₄/c) 3 (return %₂) -32 (return core.nothing) +34 latestworld +35 (return core.nothing) ######################################## # Documented struct @@ -588,50 +613,53 @@ struct X end #--------------------- 1 (global TestMod.X) -2 (const TestMod.X) +2 latestworld 3 (call core.svec) 4 (call core.svec :a :b) 5 (call core.svec) 6 (call core._structtype TestMod :X %₃ %₄ %₅ false 2) 7 (= slot₁/X %₆) 8 (call core._setsuper! %₆ core.Any) -9 (isdefined TestMod.X) -10 (gotoifnot %₉ label₂₀) +9 (call core.isdefinedglobal TestMod :X false) +10 (gotoifnot %₉ label₁₄) 11 TestMod.X -12 (call core._equiv_typedef %₁₁ %₆) -13 (gotoifnot %₁₂ label₁₇) -14 TestMod.X -15 (= slot₁/X %₁₄) -16 (goto label₁₉) -17 slot₁/X -18 (= TestMod.X %₁₇) -19 (goto label₂₂) -20 slot₁/X -21 (= TestMod.X %₂₀) -22 slot₁/X +12 (= slot₂/if_val (call core._equiv_typedef %₁₁ %₆)) +13 (goto label₁₅) +14 (= slot₂/if_val false) +15 slot₂/if_val +16 (gotoifnot %₁₅ label₂₀) +17 TestMod.X +18 (= slot₃/if_val %₁₇) +19 (goto label₂₁) +20 (= slot₃/if_val false) +21 slot₃/if_val +22 (gotoifnot %₁₅ label₂₃) 23 (call core.svec core.Any core.Any) -24 (call core._typebody! %₂₂ %₂₃) -25 TestMod.X -26 (call core.apply_type core.Type %₂₅) -27 (call core.svec %₂₆ core.Any core.Any) -28 (call core.svec) -29 SourceLocation::4:1 -30 (call core.svec %₂₇ %₂₈ %₂₉) -31 --- method core.nothing %₃₀ +24 (call core._typebody! %₂₁ %₆ %₂₃) +25 (constdecl TestMod.X %₂₄) +26 latestworld +27 TestMod.X +28 (call core.apply_type core.Type %₂₇) +29 (call core.svec %₂₈ core.Any core.Any) +30 (call core.svec) +31 SourceLocation::4:1 +32 (call core.svec %₂₉ %₃₀ %₃₁) +33 --- method core.nothing %₃₂ slots: [slot₁/#self#(!read) slot₂/a slot₃/b] 1 TestMod.X 2 (new %₁ slot₂/a slot₃/b) 3 (return %₂) -32 JuliaLowering.bind_docs! -33 (call core.tuple :field_docs) -34 (call core.apply_type core.NamedTuple %₃₃) -35 (call core.svec 1 "field a docs" 2 "field b docs") -36 (call core.tuple %₃₅) -37 (call %₃₄ %₃₆) -38 TestMod.X -39 SourceLocation::4:1 -40 (call core.kwcall %₃₇ %₃₂ %₃₈ "X docs\n" %₃₉) -41 (return core.nothing) +34 latestworld +35 JuliaLowering.bind_docs! +36 (call core.tuple :field_docs) +37 (call core.apply_type core.NamedTuple %₃₆) +38 (call core.svec 1 "field a docs" 2 "field b docs") +39 (call core.tuple %₃₈) +40 (call %₃₇ %₃₉) +41 TestMod.X +42 SourceLocation::4:1 +43 (call core.kwcall %₄₀ %₃₅ %₄₁ "X docs\n" %₄₂) +44 (return core.nothing) ######################################## # Struct with outer constructor @@ -640,7 +668,7 @@ struct X{U} end #--------------------- 1 (global TestMod.X) -2 (const TestMod.X) +2 latestworld 3 (= slot₁/U (call core.TypeVar :U)) 4 slot₁/U 5 (call core.svec %₄) @@ -649,39 +677,41 @@ end 8 (call core._structtype TestMod :X %₅ %₆ %₇ false 1) 9 (= slot₂/X %₈) 10 (call core._setsuper! %₈ core.Any) -11 (isdefined TestMod.X) -12 (gotoifnot %₁₁ label₂₇) +11 (call core.isdefinedglobal TestMod :X false) +12 (gotoifnot %₁₁ label₁₆) 13 TestMod.X -14 (call core._equiv_typedef %₁₃ %₈) -15 (gotoifnot %₁₄ label₂₄) -16 TestMod.X -17 (= slot₂/X %₁₆) -18 TestMod.X -19 (call top.getproperty %₁₈ :body) -20 (call top.getproperty %₁₉ :parameters) -21 (call top.indexed_iterate %₂₀ 1) -22 (= slot₁/U (call core.getfield %₂₁ 1)) -23 (goto label₂₆) -24 slot₂/X -25 (= TestMod.X %₂₄) -26 (goto label₂₉) -27 slot₂/X -28 (= TestMod.X %₂₇) -29 slot₂/X +14 (= slot₃/if_val (call core._equiv_typedef %₁₃ %₈)) +15 (goto label₁₇) +16 (= slot₃/if_val false) +17 slot₃/if_val +18 (gotoifnot %₁₇ label₂₂) +19 TestMod.X +20 (= slot₄/if_val %₁₉) +21 (goto label₂₃) +22 (= slot₄/if_val false) +23 slot₄/if_val +24 (gotoifnot %₁₇ label₃₀) +25 TestMod.X +26 (call top.getproperty %₂₅ :body) +27 (call top.getproperty %₂₆ :parameters) +28 (call top.indexed_iterate %₂₇ 1) +29 (= slot₁/U (call core.getfield %₂₈ 1)) 30 slot₁/U 31 (call core.svec %₃₀) -32 (call core._typebody! %₂₉ %₃₁) -33 slot₁/U -34 TestMod.X +32 (call core._typebody! %₂₃ %₈ %₃₁) +33 (constdecl TestMod.X %₃₂) +34 latestworld 35 slot₁/U -36 (call core.apply_type %₃₄ %₃₅) -37 (call core.apply_type core.Type %₃₆) -38 (call core.UnionAll %₃₃ %₃₇) -39 (call core.svec %₃₈ core.Any) -40 (call core.svec) -41 SourceLocation::1:1 -42 (call core.svec %₃₉ %₄₀ %₄₁) -43 --- method core.nothing %₄₂ +36 TestMod.X +37 slot₁/U +38 (call core.apply_type %₃₆ %₃₇) +39 (call core.apply_type core.Type %₃₈) +40 (call core.UnionAll %₃₅ %₃₉) +41 (call core.svec %₄₀ core.Any) +42 (call core.svec) +43 SourceLocation::1:1 +44 (call core.svec %₄₁ %₄₂ %₄₃) +45 --- method core.nothing %₄₄ slots: [slot₁/#ctor-self# slot₂/x slot₃/tmp] 1 (call core.fieldtype slot₁/#ctor-self# 1) 2 slot₂/x @@ -695,22 +725,24 @@ end 10 slot₃/tmp 11 (new slot₁/#ctor-self# %₁₀) 12 (return %₁₁) -44 TestMod.X -45 (call core.apply_type core.Type %₄₄) -46 slot₁/U -47 (call core.svec %₄₅ %₄₆) -48 slot₁/U -49 (call core.svec %₄₈) -50 SourceLocation::1:1 -51 (call core.svec %₄₇ %₄₉ %₅₀) -52 --- method core.nothing %₅₁ +46 latestworld +47 TestMod.X +48 (call core.apply_type core.Type %₄₇) +49 slot₁/U +50 (call core.svec %₄₈ %₄₉) +51 slot₁/U +52 (call core.svec %₅₁) +53 SourceLocation::1:1 +54 (call core.svec %₅₀ %₅₂ %₅₃) +55 --- method core.nothing %₅₄ slots: [slot₁/#self#(!read) slot₂/x] 1 TestMod.X 2 static_parameter₁ 3 (call core.apply_type %₁ %₂) 4 (new %₃ slot₂/x) 5 (return %₄) -53 (return core.nothing) +56 latestworld +57 (return core.nothing) ######################################## # Struct with outer constructor where one typevar is constrained by the other @@ -720,7 +752,7 @@ struct X{T, S <: Vector{T}} end #--------------------- 1 (global TestMod.X) -2 (const TestMod.X) +2 latestworld 3 (= slot₃/T (call core.TypeVar :T)) 4 TestMod.Vector 5 slot₃/T @@ -734,49 +766,51 @@ end 13 (call core._structtype TestMod :X %₁₀ %₁₁ %₁₂ false 1) 14 (= slot₄/X %₁₃) 15 (call core._setsuper! %₁₃ core.Any) -16 (isdefined TestMod.X) -17 (gotoifnot %₁₆ label₃₇) +16 (call core.isdefinedglobal TestMod :X false) +17 (gotoifnot %₁₆ label₂₁) 18 TestMod.X -19 (call core._equiv_typedef %₁₈ %₁₃) -20 (gotoifnot %₁₉ label₃₄) -21 TestMod.X -22 (= slot₄/X %₂₁) -23 TestMod.X -24 (call top.getproperty %₂₃ :body) -25 (call top.getproperty %₂₄ :body) -26 (call top.getproperty %₂₅ :parameters) -27 (call top.indexed_iterate %₂₆ 1) -28 (= slot₃/T (call core.getfield %₂₇ 1)) -29 (= slot₁/iterstate (call core.getfield %₂₇ 2)) -30 slot₁/iterstate -31 (call top.indexed_iterate %₂₆ 2 %₃₀) -32 (= slot₂/S (call core.getfield %₃₁ 1)) -33 (goto label₃₆) -34 slot₄/X -35 (= TestMod.X %₃₄) -36 (goto label₃₉) -37 slot₄/X -38 (= TestMod.X %₃₇) -39 slot₄/X +19 (= slot₅/if_val (call core._equiv_typedef %₁₈ %₁₃)) +20 (goto label₂₂) +21 (= slot₅/if_val false) +22 slot₅/if_val +23 (gotoifnot %₂₂ label₂₇) +24 TestMod.X +25 (= slot₆/if_val %₂₄) +26 (goto label₂₈) +27 (= slot₆/if_val false) +28 slot₆/if_val +29 (gotoifnot %₂₂ label₄₀) +30 TestMod.X +31 (call top.getproperty %₃₀ :body) +32 (call top.getproperty %₃₁ :body) +33 (call top.getproperty %₃₂ :parameters) +34 (call top.indexed_iterate %₃₃ 1) +35 (= slot₃/T (call core.getfield %₃₄ 1)) +36 (= slot₁/iterstate (call core.getfield %₃₄ 2)) +37 slot₁/iterstate +38 (call top.indexed_iterate %₃₃ 2 %₃₇) +39 (= slot₂/S (call core.getfield %₃₈ 1)) 40 TestMod.Vector 41 slot₂/S 42 (call core.apply_type %₄₀ %₄₁) 43 (call core.svec %₄₂) -44 (call core._typebody! %₃₉ %₄₃) -45 slot₃/T -46 slot₂/S -47 TestMod.X -48 slot₃/T -49 slot₂/S -50 (call core.apply_type %₄₇ %₄₈ %₄₉) -51 (call core.apply_type core.Type %₅₀) -52 (call core.UnionAll %₄₆ %₅₁) -53 (call core.UnionAll %₄₅ %₅₂) -54 (call core.svec %₅₃ core.Any) -55 (call core.svec) -56 SourceLocation::1:1 -57 (call core.svec %₅₄ %₅₅ %₅₆) -58 --- method core.nothing %₅₇ +44 (call core._typebody! %₂₈ %₁₃ %₄₃) +45 (constdecl TestMod.X %₄₄) +46 latestworld +47 slot₃/T +48 slot₂/S +49 TestMod.X +50 slot₃/T +51 slot₂/S +52 (call core.apply_type %₄₉ %₅₀ %₅₁) +53 (call core.apply_type core.Type %₅₂) +54 (call core.UnionAll %₄₈ %₅₃) +55 (call core.UnionAll %₄₇ %₅₄) +56 (call core.svec %₅₅ core.Any) +57 (call core.svec) +58 SourceLocation::1:1 +59 (call core.svec %₅₆ %₅₇ %₅₈) +60 --- method core.nothing %₅₉ slots: [slot₁/#ctor-self# slot₂/v slot₃/tmp] 1 (call core.fieldtype slot₁/#ctor-self# 1) 2 slot₂/v @@ -790,18 +824,19 @@ end 10 slot₃/tmp 11 (new slot₁/#ctor-self# %₁₀) 12 (return %₁₁) -59 TestMod.X -60 (call core.apply_type core.Type %₅₉) -61 TestMod.Vector -62 slot₂/S -63 (call core.apply_type %₆₁ %₆₂) -64 (call core.svec %₆₀ %₆₃) -65 slot₃/T -66 slot₂/S -67 (call core.svec %₆₅ %₆₆) -68 SourceLocation::1:1 -69 (call core.svec %₆₄ %₆₇ %₆₈) -70 --- method core.nothing %₆₉ +61 latestworld +62 TestMod.X +63 (call core.apply_type core.Type %₆₂) +64 TestMod.Vector +65 slot₂/S +66 (call core.apply_type %₆₄ %₆₅) +67 (call core.svec %₆₃ %₆₆) +68 slot₃/T +69 slot₂/S +70 (call core.svec %₆₈ %₆₉) +71 SourceLocation::1:1 +72 (call core.svec %₆₇ %₇₀ %₇₁) +73 --- method core.nothing %₇₂ slots: [slot₁/#self#(!read) slot₂/v] 1 TestMod.X 2 static_parameter₁ @@ -809,7 +844,8 @@ end 4 (call core.apply_type %₁ %₂ %₃) 5 (new %₄ slot₂/v) 6 (return %₅) -71 (return core.nothing) +74 latestworld +75 (return core.nothing) ######################################## # User defined inner constructors and helper functions for structs without type params @@ -827,53 +863,57 @@ end #--------------------- 1 (= slot₂/f (call core.Box)) 2 (global TestMod.X) -3 (const TestMod.X) +3 latestworld 4 (call core.svec) 5 (call core.svec :x) 6 (call core.svec) 7 (call core._structtype TestMod :X %₄ %₅ %₆ false 1) 8 (= slot₁/X %₇) 9 (call core._setsuper! %₇ core.Any) -10 (isdefined TestMod.X) -11 (gotoifnot %₁₀ label₂₁) +10 (call core.isdefinedglobal TestMod :X false) +11 (gotoifnot %₁₀ label₁₅) 12 TestMod.X -13 (call core._equiv_typedef %₁₂ %₇) -14 (gotoifnot %₁₃ label₁₈) -15 TestMod.X -16 (= slot₁/X %₁₅) -17 (goto label₂₀) -18 slot₁/X -19 (= TestMod.X %₁₈) -20 (goto label₂₃) -21 slot₁/X -22 (= TestMod.X %₂₁) -23 slot₁/X +13 (= slot₄/if_val (call core._equiv_typedef %₁₂ %₇)) +14 (goto label₁₆) +15 (= slot₄/if_val false) +16 slot₄/if_val +17 (gotoifnot %₁₆ label₂₁) +18 TestMod.X +19 (= slot₅/if_val %₁₈) +20 (goto label₂₂) +21 (= slot₅/if_val false) +22 slot₅/if_val +23 (gotoifnot %₁₆ label₂₄) 24 (call core.svec core.Any) -25 (call core._typebody! %₂₃ %₂₄) -26 (call core.svec) -27 (call core.svec) -28 (call JuliaLowering.eval_closure_type TestMod :#f##0 %₂₆ %₂₇) -29 TestMod.#f##0 -30 (new %₂₉) -31 slot₂/f -32 (call core.setfield! %₃₁ :contents %₃₀) -33 TestMod.#f##0 -34 (call core.svec %₃₃) -35 (call core.svec) -36 SourceLocation::3:5 -37 (call core.svec %₃₄ %₃₅ %₃₆) -38 --- method core.nothing %₃₇ +25 (call core._typebody! %₂₂ %₇ %₂₄) +26 (constdecl TestMod.X %₂₅) +27 latestworld +28 (call core.svec) +29 (call core.svec) +30 (call JuliaLowering.eval_closure_type TestMod :#f##0 %₂₈ %₂₉) +31 latestworld +32 TestMod.#f##0 +33 (new %₃₂) +34 slot₂/f +35 (call core.setfield! %₃₄ :contents %₃₃) +36 TestMod.#f##0 +37 (call core.svec %₃₆) +38 (call core.svec) +39 SourceLocation::3:5 +40 (call core.svec %₃₇ %₃₈ %₃₉) +41 --- method core.nothing %₄₀ slots: [slot₁/#self#(!read)] 1 TestMod.X 2 (new %₁ 1) 3 (return %₂) -39 TestMod.X -40 (call core.apply_type core.Type %₃₉) -41 (call core.svec %₄₀) -42 (call core.svec) -43 SourceLocation::4:5 -44 (call core.svec %₄₁ %₄₂ %₄₃) -45 --- code_info +42 latestworld +43 TestMod.X +44 (call core.apply_type core.Type %₄₃) +45 (call core.svec %₄₄) +46 (call core.svec) +47 SourceLocation::4:5 +48 (call core.svec %₄₅ %₄₆ %₄₇) +49 --- code_info slots: [slot₁/#ctor-self#(!read) slot₂/f(!read)] 1 (captured_local 1) 2 (call core.isdefined %₁ :contents) @@ -884,28 +924,30 @@ end 7 (call core.getfield %₁ :contents) 8 (call %₇) 9 (return %₈) -46 slot₂/f -47 (call core.svec %₄₆) -48 (call JuliaLowering.replace_captured_locals! %₄₅ %₄₇) -49 --- method core.nothing %₄₄ %₄₈ -50 TestMod.X -51 (call core.apply_type core.Type %₅₀) -52 (call core.svec %₅₁ core.Any) -53 (call core.svec) -54 SourceLocation::5:5 -55 (call core.svec %₅₂ %₅₃ %₅₄) -56 --- method core.nothing %₅₅ +50 slot₂/f +51 (call core.svec %₅₀) +52 (call JuliaLowering.replace_captured_locals! %₄₉ %₅₁) +53 --- method core.nothing %₄₈ %₅₂ +54 latestworld +55 TestMod.X +56 (call core.apply_type core.Type %₅₅) +57 (call core.svec %₅₆ core.Any) +58 (call core.svec) +59 SourceLocation::5:5 +60 (call core.svec %₅₇ %₅₈ %₅₉) +61 --- method core.nothing %₆₀ slots: [slot₁/#ctor-self# slot₂/x] 1 slot₁/#ctor-self# 2 (new %₁ slot₂/x) 3 (return %₂) -57 TestMod.X -58 (call core.apply_type core.Type %₅₇) -59 (call core.svec %₅₈ core.Any core.Any) -60 (call core.svec) -61 SourceLocation::6:5 -62 (call core.svec %₅₉ %₆₀ %₆₁) -63 --- method core.nothing %₆₂ +62 latestworld +63 TestMod.X +64 (call core.apply_type core.Type %₆₃) +65 (call core.svec %₆₄ core.Any core.Any) +66 (call core.svec) +67 SourceLocation::6:5 +68 (call core.svec %₆₅ %₆₆ %₆₇) +69 --- method core.nothing %₆₈ slots: [slot₁/#ctor-self# slot₂/y slot₃/z slot₄/tmp(!read)] 1 TestMod.ReallyXIPromise 2 slot₁/#ctor-self# @@ -921,21 +963,23 @@ end 12 (= slot₄/tmp (call core.typeassert %₁₁ %₁)) 13 slot₄/tmp 14 (return %₁₃) -64 TestMod.X -65 (call core.apply_type core.Type %₆₄) -66 (call core.svec %₆₅ core.Any core.Any core.Any) -67 (call core.svec) -68 SourceLocation::10:5 -69 (call core.svec %₆₆ %₆₇ %₆₈) -70 --- method core.nothing %₆₉ +70 latestworld +71 TestMod.X +72 (call core.apply_type core.Type %₇₁) +73 (call core.svec %₇₂ core.Any core.Any core.Any) +74 (call core.svec) +75 SourceLocation::10:5 +76 (call core.svec %₇₃ %₇₄ %₇₅) +77 --- method core.nothing %₇₆ slots: [slot₁/#ctor-self# slot₂/a slot₃/b(!read) slot₄/c(!read)] 1 slot₁/#ctor-self# 2 (new %₁ slot₂/a) 3 (return %₂) -71 TestMod.X -72 (call core.apply_type core.Type %₇₁) -73 (call JuliaLowering.bind_docs! %₇₂ "Docs for X constructor\n" %₆₉) -74 (return core.nothing) +78 latestworld +79 TestMod.X +80 (call core.apply_type core.Type %₇₉) +81 (call JuliaLowering.bind_docs! %₈₀ "Docs for X constructor\n" %₇₆) +82 (return core.nothing) ######################################## # User defined inner constructors and helper functions for structs with type params @@ -948,7 +992,7 @@ end #--------------------- 1 (newvar slot₅/f) 2 (global TestMod.X) -3 (const TestMod.X) +3 latestworld 4 (= slot₂/S (call core.TypeVar :S)) 5 (= slot₃/T (call core.TypeVar :T)) 6 slot₂/S @@ -959,76 +1003,81 @@ end 11 (call core._structtype TestMod :X %₈ %₉ %₁₀ false 1) 12 (= slot₄/X %₁₁) 13 (call core._setsuper! %₁₁ core.Any) -14 (isdefined TestMod.X) -15 (gotoifnot %₁₄ label₃₅) +14 (call core.isdefinedglobal TestMod :X false) +15 (gotoifnot %₁₄ label₁₉) 16 TestMod.X -17 (call core._equiv_typedef %₁₆ %₁₁) -18 (gotoifnot %₁₇ label₃₂) -19 TestMod.X -20 (= slot₄/X %₁₉) -21 TestMod.X -22 (call top.getproperty %₂₁ :body) -23 (call top.getproperty %₂₂ :body) -24 (call top.getproperty %₂₃ :parameters) -25 (call top.indexed_iterate %₂₄ 1) -26 (= slot₂/S (call core.getfield %₂₅ 1)) -27 (= slot₁/iterstate (call core.getfield %₂₅ 2)) -28 slot₁/iterstate -29 (call top.indexed_iterate %₂₄ 2 %₂₈) -30 (= slot₃/T (call core.getfield %₂₉ 1)) -31 (goto label₃₄) -32 slot₄/X -33 (= TestMod.X %₃₂) -34 (goto label₃₇) -35 slot₄/X -36 (= TestMod.X %₃₅) -37 slot₄/X +17 (= slot₈/if_val (call core._equiv_typedef %₁₆ %₁₁)) +18 (goto label₂₀) +19 (= slot₈/if_val false) +20 slot₈/if_val +21 (gotoifnot %₂₀ label₂₅) +22 TestMod.X +23 (= slot₉/if_val %₂₂) +24 (goto label₂₆) +25 (= slot₉/if_val false) +26 slot₉/if_val +27 (gotoifnot %₂₀ label₃₈) +28 TestMod.X +29 (call top.getproperty %₂₈ :body) +30 (call top.getproperty %₂₉ :body) +31 (call top.getproperty %₃₀ :parameters) +32 (call top.indexed_iterate %₃₁ 1) +33 (= slot₂/S (call core.getfield %₃₂ 1)) +34 (= slot₁/iterstate (call core.getfield %₃₂ 2)) +35 slot₁/iterstate +36 (call top.indexed_iterate %₃₁ 2 %₃₅) +37 (= slot₃/T (call core.getfield %₃₆ 1)) 38 (call core.svec core.Any) -39 (call core._typebody! %₃₇ %₃₈) -40 TestMod.X -41 TestMod.A -42 TestMod.B -43 (call core.apply_type %₄₀ %₄₁ %₄₂) -44 (call core.apply_type core.Type %₄₃) -45 (call core.svec %₄₄) -46 (call core.svec) -47 SourceLocation::3:5 -48 (call core.svec %₄₅ %₄₆ %₄₇) -49 --- method core.nothing %₄₈ +39 (call core._typebody! %₂₆ %₁₁ %₃₈) +40 (constdecl TestMod.X %₃₉) +41 latestworld +42 TestMod.X +43 TestMod.A +44 TestMod.B +45 (call core.apply_type %₄₂ %₄₃ %₄₄) +46 (call core.apply_type core.Type %₄₅) +47 (call core.svec %₄₆) +48 (call core.svec) +49 SourceLocation::3:5 +50 (call core.svec %₄₇ %₄₈ %₄₉) +51 --- method core.nothing %₅₀ slots: [slot₁/#ctor-self#] 1 slot₁/#ctor-self# 2 (new %₁ 1) 3 (return %₂) -50 (= slot₆/U (call core.TypeVar :U)) -51 (= slot₇/V (call core.TypeVar :V)) -52 TestMod.X -53 slot₆/U -54 slot₇/V -55 (call core.apply_type %₅₂ %₅₃ %₅₄) -56 (call core.apply_type core.Type %₅₅) -57 (call core.svec %₅₆) -58 slot₆/U -59 slot₇/V -60 (call core.svec %₅₈ %₅₉) -61 SourceLocation::4:5 -62 (call core.svec %₅₇ %₆₀ %₆₁) -63 --- method core.nothing %₆₂ +52 latestworld +53 (= slot₆/U (call core.TypeVar :U)) +54 (= slot₇/V (call core.TypeVar :V)) +55 TestMod.X +56 slot₆/U +57 slot₇/V +58 (call core.apply_type %₅₅ %₅₆ %₅₇) +59 (call core.apply_type core.Type %₅₈) +60 (call core.svec %₅₉) +61 slot₆/U +62 slot₇/V +63 (call core.svec %₆₁ %₆₂) +64 SourceLocation::4:5 +65 (call core.svec %₆₀ %₆₃ %₆₄) +66 --- method core.nothing %₆₅ slots: [slot₁/#ctor-self#] 1 slot₁/#ctor-self# 2 (new %₁ 1) 3 (return %₂) -64 (call core.svec) -65 (call core.svec) -66 (call JuliaLowering.eval_closure_type TestMod :#f##1 %₆₄ %₆₅) -67 TestMod.#f##1 -68 (new %₆₇) -69 (= slot₅/f %₆₈) -70 TestMod.#f##1 -71 (call core.svec %₇₀) -72 (call core.svec) -73 SourceLocation::5:5 -74 (call core.svec %₇₁ %₇₂ %₇₃) -75 --- method core.nothing %₇₄ +67 latestworld +68 (call core.svec) +69 (call core.svec) +70 (call JuliaLowering.eval_closure_type TestMod :#f##1 %₆₈ %₆₉) +71 latestworld +72 TestMod.#f##1 +73 (new %₇₂) +74 (= slot₅/f %₇₃) +75 TestMod.#f##1 +76 (call core.svec %₇₅) +77 (call core.svec) +78 SourceLocation::5:5 +79 (call core.svec %₇₆ %₇₇ %₇₈) +80 --- method core.nothing %₇₉ slots: [slot₁/#self#(!read)] 1 TestMod.X 2 TestMod.A @@ -1036,7 +1085,8 @@ end 4 (call core.apply_type %₁ %₂ %₃) 5 (new %₄ 1) 6 (return %₅) -76 (return core.nothing) +81 latestworld +82 (return core.nothing) ######################################## # new() calls with splats; `Any` fields @@ -1047,42 +1097,45 @@ struct X end #--------------------- 1 (global TestMod.X) -2 (const TestMod.X) +2 latestworld 3 (call core.svec) 4 (call core.svec :x :y) 5 (call core.svec) 6 (call core._structtype TestMod :X %₃ %₄ %₅ false 2) 7 (= slot₁/X %₆) 8 (call core._setsuper! %₆ core.Any) -9 (isdefined TestMod.X) -10 (gotoifnot %₉ label₂₀) +9 (call core.isdefinedglobal TestMod :X false) +10 (gotoifnot %₉ label₁₄) 11 TestMod.X -12 (call core._equiv_typedef %₁₁ %₆) -13 (gotoifnot %₁₂ label₁₇) -14 TestMod.X -15 (= slot₁/X %₁₄) -16 (goto label₁₉) -17 slot₁/X -18 (= TestMod.X %₁₇) -19 (goto label₂₂) -20 slot₁/X -21 (= TestMod.X %₂₀) -22 slot₁/X +12 (= slot₂/if_val (call core._equiv_typedef %₁₁ %₆)) +13 (goto label₁₅) +14 (= slot₂/if_val false) +15 slot₂/if_val +16 (gotoifnot %₁₅ label₂₀) +17 TestMod.X +18 (= slot₃/if_val %₁₇) +19 (goto label₂₁) +20 (= slot₃/if_val false) +21 slot₃/if_val +22 (gotoifnot %₁₅ label₂₃) 23 (call core.svec core.Any core.Any) -24 (call core._typebody! %₂₂ %₂₃) -25 TestMod.X -26 (call core.apply_type core.Type %₂₅) -27 (call core.svec %₂₆ core.Any) -28 (call core.svec) -29 SourceLocation::4:5 -30 (call core.svec %₂₇ %₂₈ %₂₉) -31 --- method core.nothing %₃₀ +24 (call core._typebody! %₂₁ %₆ %₂₃) +25 (constdecl TestMod.X %₂₄) +26 latestworld +27 TestMod.X +28 (call core.apply_type core.Type %₂₇) +29 (call core.svec %₂₈ core.Any) +30 (call core.svec) +31 SourceLocation::4:5 +32 (call core.svec %₂₉ %₃₀ %₃₁) +33 --- method core.nothing %₃₂ slots: [slot₁/#ctor-self# slot₂/xs] 1 slot₁/#ctor-self# 2 (call core._apply_iterate top.iterate core.tuple slot₂/xs) 3 (splatnew %₁ %₂) 4 (return %₃) -32 (return core.nothing) +34 latestworld +35 (return core.nothing) ######################################## # new() calls with splats; typed fields @@ -1093,7 +1146,7 @@ struct X{T} end #--------------------- 1 (global TestMod.X) -2 (const TestMod.X) +2 latestworld 3 (= slot₁/T (call core.TypeVar :T)) 4 slot₁/T 5 (call core.svec %₄) @@ -1102,40 +1155,42 @@ end 8 (call core._structtype TestMod :X %₅ %₆ %₇ false 2) 9 (= slot₂/X %₈) 10 (call core._setsuper! %₈ core.Any) -11 (isdefined TestMod.X) -12 (gotoifnot %₁₁ label₂₇) +11 (call core.isdefinedglobal TestMod :X false) +12 (gotoifnot %₁₁ label₁₆) 13 TestMod.X -14 (call core._equiv_typedef %₁₃ %₈) -15 (gotoifnot %₁₄ label₂₄) -16 TestMod.X -17 (= slot₂/X %₁₆) -18 TestMod.X -19 (call top.getproperty %₁₈ :body) -20 (call top.getproperty %₁₉ :parameters) -21 (call top.indexed_iterate %₂₀ 1) -22 (= slot₁/T (call core.getfield %₂₁ 1)) -23 (goto label₂₆) -24 slot₂/X -25 (= TestMod.X %₂₄) -26 (goto label₂₉) -27 slot₂/X -28 (= TestMod.X %₂₇) -29 slot₂/X +14 (= slot₄/if_val (call core._equiv_typedef %₁₃ %₈)) +15 (goto label₁₇) +16 (= slot₄/if_val false) +17 slot₄/if_val +18 (gotoifnot %₁₇ label₂₂) +19 TestMod.X +20 (= slot₅/if_val %₁₉) +21 (goto label₂₃) +22 (= slot₅/if_val false) +23 slot₅/if_val +24 (gotoifnot %₁₇ label₃₀) +25 TestMod.X +26 (call top.getproperty %₂₅ :body) +27 (call top.getproperty %₂₆ :parameters) +28 (call top.indexed_iterate %₂₇ 1) +29 (= slot₁/T (call core.getfield %₂₈ 1)) 30 slot₁/T 31 TestMod.A 32 (call core.svec %₃₀ %₃₁) -33 (call core._typebody! %₂₉ %₃₂) -34 (= slot₃/T (call core.TypeVar :T)) -35 TestMod.X -36 slot₃/T -37 (call core.apply_type %₃₅ %₃₆) -38 (call core.apply_type core.Type %₃₇) -39 (call core.svec %₃₈ core.Any) -40 slot₃/T -41 (call core.svec %₄₀) -42 SourceLocation::4:5 -43 (call core.svec %₃₉ %₄₁ %₄₂) -44 --- method core.nothing %₄₃ +33 (call core._typebody! %₂₃ %₈ %₃₂) +34 (constdecl TestMod.X %₃₃) +35 latestworld +36 (= slot₃/T (call core.TypeVar :T)) +37 TestMod.X +38 slot₃/T +39 (call core.apply_type %₃₇ %₃₈) +40 (call core.apply_type core.Type %₃₉) +41 (call core.svec %₄₀ core.Any) +42 slot₃/T +43 (call core.svec %₄₂) +44 SourceLocation::4:5 +45 (call core.svec %₄₁ %₄₃ %₄₄) +46 --- method core.nothing %₄₅ slots: [slot₁/#ctor-self# slot₂/xs slot₃/tmp slot₄/tmp] 1 (call core._apply_iterate top.iterate core.tuple slot₂/xs) 2 (call core.nfields %₁) @@ -1168,7 +1223,8 @@ end 29 slot₄/tmp 30 (new %₁₁ %₂₀ %₂₉) 31 (return %₃₀) -45 (return core.nothing) +47 latestworld +48 (return core.nothing) ######################################## # Error: new doesn't accept keywords From 46ed43e37a5906561e34417ab7e91eede19db8dd Mon Sep 17 00:00:00 2001 From: Em Chu Date: Fri, 27 Jun 2025 09:12:05 -0700 Subject: [PATCH 1025/1109] Remove use of JuliaSyntax `DOTOP_FLAG` and `is_dotted` These have been removed upstream on the main branch and replaced with a small number of new `Kind`s. --- JuliaLowering/src/JuliaLowering.jl | 2 +- JuliaLowering/src/desugaring.jl | 21 ++++++++------------- JuliaLowering/src/syntax_graph.jl | 26 ++++++++++++++------------ JuliaLowering/test/functions_ir.jl | 4 ++-- 4 files changed, 25 insertions(+), 28 deletions(-) diff --git a/JuliaLowering/src/JuliaLowering.jl b/JuliaLowering/src/JuliaLowering.jl index a365a9e69eb43..315cd9eabac58 100644 --- a/JuliaLowering/src/JuliaLowering.jl +++ b/JuliaLowering/src/JuliaLowering.jl @@ -13,7 +13,7 @@ using JuliaSyntax: highlight, Kind, @KSet_str using JuliaSyntax: is_leaf, children, numchildren, head, kind, flags, has_flags, numeric_flags using JuliaSyntax: filename, first_byte, last_byte, byte_range, sourcefile, source_location, span, sourcetext -using JuliaSyntax: is_literal, is_number, is_operator, is_prec_assignment, is_prefix_call, is_infix_op_call, is_postfix_op_call, is_error, is_dotted +using JuliaSyntax: is_literal, is_number, is_operator, is_prec_assignment, is_prefix_call, is_infix_op_call, is_postfix_op_call, is_error _include("kinds.jl") _register_kinds() diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 9661702d4a876..08f42154e306d 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -690,10 +690,10 @@ function expand_dotcall(ctx, ex) ] elseif k == K"comparison" expand_dotcall(ctx, expand_compare_chain(ctx, ex)) - elseif (k == K"&&" || k == K"||") && is_dotted(ex) + elseif k == K".&&" || k == K".||" @ast ctx ex [K"call" "broadcasted"::K"top" - (k == K"&&" ? "andand" : "oror")::K"top" + (k == K".&&" ? "andand" : "oror")::K"top" (expand_dotcall(ctx, arg) for arg in children(ex))... ] else @@ -702,8 +702,7 @@ function expand_dotcall(ctx, ex) end function expand_fuse_broadcast(ctx, ex) - if kind(ex) == K"=" - @assert is_dotted(ex) + if kind(ex) == K".=" || kind(ex) == K".op=" @chk numchildren(ex) == 2 lhs = ex[1] kl = kind(lhs) @@ -1314,7 +1313,7 @@ end function expand_update_operator(ctx, ex) k = kind(ex) - dotted = is_dotted(ex) + dotted = k == K".op=" @chk numchildren(ex) == 3 lhs = ex[1] @@ -1356,7 +1355,7 @@ function expand_update_operator(ctx, ex) @ast ctx ex [K"block" stmts... - [K"="(syntax_flags=(dotted ? JuliaSyntax.DOTOP_FLAG : nothing)) + [(dotted ? K".=" : K"=") lhs [(dotted ? K"dotcall" : K"call") op @@ -4247,7 +4246,7 @@ function expand_forms_2(ctx::DesugaringContext, ex::SyntaxTree, docs=nothing) throw(LoweringError(ex, "unimplemented or unsupported atomic declaration")) elseif k == K"call" expand_call(ctx, ex) - elseif k == K"dotcall" || ((k == K"&&" || k == K"||") && is_dotted(ex)) + elseif k == K"dotcall" || k == K".&&" || k == K".||" || k == K".=" expand_forms_2(ctx, expand_fuse_broadcast(ctx, ex)) elseif k == K"." expand_forms_2(ctx, expand_dot(ctx, ex)) @@ -4283,14 +4282,10 @@ function expand_forms_2(ctx::DesugaringContext, ex::SyntaxTree, docs=nothing) adopt_scope(string(k)::K"Identifier", ex) children(ex)... ]) - elseif k == K"op=" + elseif k == K"op=" || k == K".op=" expand_forms_2(ctx, expand_update_operator(ctx, ex)) elseif k == K"=" - if is_dotted(ex) - expand_forms_2(ctx, expand_fuse_broadcast(ctx, ex)) - else - expand_assignment(ctx, ex) - end + expand_assignment(ctx, ex) elseif k == K"break" numchildren(ex) > 0 ? ex : @ast ctx ex [K"break" "loop_exit"::K"symbolic_label"] diff --git a/JuliaLowering/src/syntax_graph.jl b/JuliaLowering/src/syntax_graph.jl index 35c9b188635d6..ff9edafc9fbb6 100644 --- a/JuliaLowering/src/syntax_graph.jl +++ b/JuliaLowering/src/syntax_graph.jl @@ -137,17 +137,20 @@ function Base.getproperty(graph::SyntaxGraph, name::Symbol) end function sethead!(graph, id::NodeId, h::JuliaSyntax.SyntaxHead) - graph.kind[id] = kind(h) - f = flags(h) - if f != 0 - graph.syntax_flags[id] = f - end + sethead!(graph, id, kind(h)) + setflags!(graph, id, flags(h)) end function sethead!(graph, id::NodeId, k::Kind) graph.kind[id] = k end +function setflags!(graph, id::NodeId, f::UInt16) + if f != 0 + graph.syntax_flags[id] = f + end +end + function _convert_nodes(graph::SyntaxGraph, node::SyntaxNode) id = newnode!(graph) sethead!(graph, id, head(node)) @@ -307,6 +310,7 @@ JuliaSyntax.source_line(src::LineNumberNode) = src.line # The follow somewhat strange cases are for where LineNumberNode is standing in # for SourceFile because we've only got Expr-based provenance info JuliaSyntax.sourcefile(src::LineNumberNode) = src +JuliaSyntax.sourcetext(src::LineNumberNode) = SubString("") JuliaSyntax.source_location(src::LineNumberNode, byte_index::Integer) = (src.line, 0) JuliaSyntax.source_location(::Type{LineNumberNode}, src::LineNumberNode, byte_index::Integer) = src JuliaSyntax.filename(src::LineNumberNode) = string(src.file) @@ -537,13 +541,11 @@ end JuliaSyntax.sourcefile(ex::SyntaxTree) = sourcefile(sourceref(ex)) JuliaSyntax.byte_range(ex::SyntaxTree) = byte_range(sourceref(ex)) -function JuliaSyntax._expr_leaf_val(ex::SyntaxTree) +function JuliaSyntax._expr_leaf_val(ex::SyntaxTree, _...) name = get(ex, :name_val, nothing) - if !isnothing(name) - Symbol(name) - else - ex.value - end + !isnothing(name) && return Symbol(name) + name = get(ex, :value, nothing) + return name end Base.Expr(ex::SyntaxTree) = JuliaSyntax.to_expr(ex) @@ -604,7 +606,7 @@ macro SyntaxTree(ex_old) throw(ArgumentError("@SyntaxTree expects a `quote` block or `:`-quoted expression")) end # 2. Re-parse the current source file as SyntaxTree instead - fname = String(__source__.file) + fname = isnothing(__source__.file) ? error("No current file") : String(__source__.file) if occursin(r"REPL\[\d+\]", fname) # Assume we should look at last history entry in REPL try diff --git a/JuliaLowering/test/functions_ir.jl b/JuliaLowering/test/functions_ir.jl index 5ea648c2a2160..b621a8bba9467 100644 --- a/JuliaLowering/test/functions_ir.jl +++ b/JuliaLowering/test/functions_ir.jl @@ -1482,7 +1482,7 @@ end 18 (call core.svec %₁₅ %₁₆ %₁₇) 19 --- method core.nothing %₁₈ slots: [slot₁/#self#(!read) slot₂/x(!read) slot₃/y(!read)] - 1 (meta :generated (new JuliaLowering.GeneratedFunctionStub TestMod.#f_only_generated@generator#0 SourceRef(SourceFile("@generated function f_only_generated(x, y)\n generator_code(x,y)\nend", 0, nothing, 1, [1, 44, 68, 71]), 1, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"macrocall", 0x0000), 0x00000046, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"@", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"MacroName", 0x0000), 0x00000009, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"function", 0x0000), 0x0000003b, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"function", 0x0001), 0x00000008, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"call", 0x0000), 0x00000016, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000010, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"(", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K",", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K")", 0x0001), 0x00000001, nothing)]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"block", 0x0000), 0x00000019, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"NewlineWs", 0x0001), 0x00000005, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"call", 0x0000), 0x00000013, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x0000000e, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"(", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K",", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K")", 0x0001), 0x00000001, nothing)]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"NewlineWs", 0x0001), 0x00000001, nothing)]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"end", 0x0001), 0x00000003, nothing)])])) (call core.svec :#self# :x :y) (call core.svec))) + 1 (meta :generated (new JuliaLowering.GeneratedFunctionStub TestMod.#f_only_generated@generator#0 SourceRef(SourceFile("@generated function f_only_generated(x, y)\n generator_code(x,y)\nend", 0, nothing, 1, [1, 44, 68]), 1, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"macrocall", 0x0080), 0x00000046, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"@", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"MacroName", 0x0000), 0x00000009, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"function", 0x0080), 0x0000003b, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"function", 0x0001), 0x00000008, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"call", 0x0080), 0x00000016, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000010, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"(", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K",", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K")", 0x0001), 0x00000001, nothing)]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"block", 0x0080), 0x00000019, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"NewlineWs", 0x0001), 0x00000005, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"call", 0x0080), 0x00000013, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x0000000e, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"(", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K",", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K")", 0x0001), 0x00000001, nothing)]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"NewlineWs", 0x0001), 0x00000001, nothing)]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"end", 0x0001), 0x00000003, nothing)])])) (call core.svec :#self# :x :y) (call core.svec))) 2 (meta :generated_only) 3 (return core.nothing) 20 latestworld @@ -1528,7 +1528,7 @@ end 18 (call core.svec %₁₅ %₁₆ %₁₇) 19 --- method core.nothing %₁₈ slots: [slot₁/#self#(!read) slot₂/x slot₃/y slot₄/maybe_gen_stuff slot₅/nongen_stuff] - 1 (meta :generated (new JuliaLowering.GeneratedFunctionStub TestMod.#f_partially_generated@generator#0 SourceRef(SourceFile("function f_partially_generated(x, y)\n nongen_stuff = bothgen(x, y)\n if @generated\n quote\n maybe_gen_stuff = some_gen_stuff(x, y)\n end\n else\n maybe_gen_stuff = some_nongen_stuff(x, y)\n end\n (nongen_stuff, maybe_gen_stuff)\nend", 0, nothing, 1, [1, 38, 71, 89, 103, 154, 166, 175, 225, 233, 269, 272]), 1, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"function", 0x0000), 0x0000010f, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"function", 0x0001), 0x00000008, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"call", 0x0000), 0x0000001b, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000015, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"(", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K",", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K")", 0x0001), 0x00000001, nothing)]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"block", 0x0000), 0x000000e8, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"NewlineWs", 0x0001), 0x00000005, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"=", 0x0000), 0x0000001c, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x0000000c, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"=", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"call", 0x0000), 0x0000000d, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000007, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"(", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K",", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K")", 0x0001), 0x00000001, nothing)])]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"NewlineWs", 0x0001), 0x00000005, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"if", 0x0000), 0x0000009d, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"if", 0x0001), 0x00000002, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"macrocall", 0x0000), 0x0000000a, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"@", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"MacroName", 0x0000), 0x00000009, nothing)]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"block", 0x0000), 0x00000052, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"NewlineWs", 0x0001), 0x00000009, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"quote", 0x0000), 0x00000044, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"block", 0x0000), 0x00000044, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"quote", 0x0001), 0x00000005, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"NewlineWs", 0x0001), 0x0000000d, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"=", 0x0000), 0x00000026, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x0000000f, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"=", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"call", 0x0000), 0x00000014, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x0000000e, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"(", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K",", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K")", 0x0001), 0x00000001, nothing)])]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"NewlineWs", 0x0001), 0x00000009, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"end", 0x0001), 0x00000003, nothing)])]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"NewlineWs", 0x0001), 0x00000005, nothing)]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"else", 0x0001), 0x00000004, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"block", 0x0000), 0x00000037, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"NewlineWs", 0x0001), 0x00000009, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"=", 0x0000), 0x00000029, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x0000000f, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"=", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"call", 0x0000), 0x00000017, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000011, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"(", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K",", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K")", 0x0001), 0x00000001, nothing)])]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"NewlineWs", 0x0001), 0x00000005, nothing)]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"end", 0x0001), 0x00000003, nothing)]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"NewlineWs", 0x0001), 0x00000005, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"tuple", 0x0020), 0x0000001f, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"(", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x0000000c, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K",", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x0000000f, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K")", 0x0001), 0x00000001, nothing)]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"NewlineWs", 0x0001), 0x00000001, nothing)]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"end", 0x0001), 0x00000003, nothing)])) (call core.svec :#self# :x :y) (call core.svec))) + 1 (meta :generated (new JuliaLowering.GeneratedFunctionStub TestMod.#f_partially_generated@generator#0 SourceRef(SourceFile("function f_partially_generated(x, y)\n nongen_stuff = bothgen(x, y)\n if @generated\n quote\n maybe_gen_stuff = some_gen_stuff(x, y)\n end\n else\n maybe_gen_stuff = some_nongen_stuff(x, y)\n end\n (nongen_stuff, maybe_gen_stuff)\nend", 0, nothing, 1, [1, 38, 71, 89, 103, 154, 166, 175, 225, 233, 269]), 1, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"function", 0x0080), 0x0000010f, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"function", 0x0001), 0x00000008, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"call", 0x0080), 0x0000001b, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000015, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"(", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K",", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K")", 0x0001), 0x00000001, nothing)]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"block", 0x0080), 0x000000e8, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"NewlineWs", 0x0001), 0x00000005, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"=", 0x0080), 0x0000001c, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x0000000c, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"=", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"call", 0x0080), 0x0000000d, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000007, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"(", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K",", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K")", 0x0001), 0x00000001, nothing)])]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"NewlineWs", 0x0001), 0x00000005, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"if", 0x0080), 0x0000009d, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"if", 0x0001), 0x00000002, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"macrocall", 0x0080), 0x0000000a, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"@", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"MacroName", 0x0000), 0x00000009, nothing)]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"block", 0x0080), 0x00000052, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"NewlineWs", 0x0001), 0x00000009, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"quote", 0x0080), 0x00000044, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"block", 0x0080), 0x00000044, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"quote", 0x0001), 0x00000005, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"NewlineWs", 0x0001), 0x0000000d, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"=", 0x0080), 0x00000026, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x0000000f, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"=", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"call", 0x0080), 0x00000014, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x0000000e, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"(", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K",", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K")", 0x0001), 0x00000001, nothing)])]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"NewlineWs", 0x0001), 0x00000009, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"end", 0x0001), 0x00000003, nothing)])]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"NewlineWs", 0x0001), 0x00000005, nothing)]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"else", 0x0001), 0x00000004, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"block", 0x0080), 0x00000037, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"NewlineWs", 0x0001), 0x00000009, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"=", 0x0080), 0x00000029, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x0000000f, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"=", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"call", 0x0080), 0x00000017, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000011, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"(", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K",", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K")", 0x0001), 0x00000001, nothing)])]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"NewlineWs", 0x0001), 0x00000005, nothing)]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"end", 0x0001), 0x00000003, nothing)]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"NewlineWs", 0x0001), 0x00000005, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"tuple", 0x00a0), 0x0000001f, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"(", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x0000000c, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K",", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x0000000f, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K")", 0x0001), 0x00000001, nothing)]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"NewlineWs", 0x0001), 0x00000001, nothing)]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"end", 0x0001), 0x00000003, nothing)])) (call core.svec :#self# :x :y) (call core.svec))) 2 TestMod.bothgen 3 (= slot₅/nongen_stuff (call %₂ slot₂/x slot₃/y)) 4 TestMod.some_nongen_stuff From 14240e0746ad26d470b78ca6fe747f87f959a0c6 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Tue, 5 Aug 2025 11:57:59 +1000 Subject: [PATCH 1026/1109] Update for JuliaSyntax flags refactor One test here depends on GreenNode printing which prints the raw flags. An update to JuliaSyntax will fix that soon. --- JuliaLowering/README.md | 2 +- JuliaLowering/test/functions_ir.jl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/JuliaLowering/README.md b/JuliaLowering/README.md index 47ea97db06cb5..83b10ebb8b8b8 100644 --- a/JuliaLowering/README.md +++ b/JuliaLowering/README.md @@ -29,7 +29,7 @@ This work is intended to Note this is a work in progress; many types of syntax are not yet handled. 1. You need a 1.13.0-DEV build of Julia: At least 1.13.0-DEV.880. Commit `5ebc5b463ea` is currently known to work. Note that JuliaLowering relies on Julia internals and may be broken on the latest Julia dev version from time to time. -2. Use commit `46723f0` of [JuliaSyntax](https://github.com/JuliaLang/JuliaSyntax.jl) +2. Use commit `e02f29f` of [JuliaSyntax](https://github.com/JuliaLang/JuliaSyntax.jl) 3. Get the latest version of [JuliaSyntaxFormatter](https://github.com/c42f/JuliaSyntaxFormatter.jl) 4. Run the demo `include("test/demo.jl")` diff --git a/JuliaLowering/test/functions_ir.jl b/JuliaLowering/test/functions_ir.jl index b621a8bba9467..525db70b39bda 100644 --- a/JuliaLowering/test/functions_ir.jl +++ b/JuliaLowering/test/functions_ir.jl @@ -1528,7 +1528,7 @@ end 18 (call core.svec %₁₅ %₁₆ %₁₇) 19 --- method core.nothing %₁₈ slots: [slot₁/#self#(!read) slot₂/x slot₃/y slot₄/maybe_gen_stuff slot₅/nongen_stuff] - 1 (meta :generated (new JuliaLowering.GeneratedFunctionStub TestMod.#f_partially_generated@generator#0 SourceRef(SourceFile("function f_partially_generated(x, y)\n nongen_stuff = bothgen(x, y)\n if @generated\n quote\n maybe_gen_stuff = some_gen_stuff(x, y)\n end\n else\n maybe_gen_stuff = some_nongen_stuff(x, y)\n end\n (nongen_stuff, maybe_gen_stuff)\nend", 0, nothing, 1, [1, 38, 71, 89, 103, 154, 166, 175, 225, 233, 269]), 1, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"function", 0x0080), 0x0000010f, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"function", 0x0001), 0x00000008, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"call", 0x0080), 0x0000001b, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000015, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"(", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K",", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K")", 0x0001), 0x00000001, nothing)]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"block", 0x0080), 0x000000e8, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"NewlineWs", 0x0001), 0x00000005, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"=", 0x0080), 0x0000001c, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x0000000c, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"=", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"call", 0x0080), 0x0000000d, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000007, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"(", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K",", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K")", 0x0001), 0x00000001, nothing)])]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"NewlineWs", 0x0001), 0x00000005, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"if", 0x0080), 0x0000009d, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"if", 0x0001), 0x00000002, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"macrocall", 0x0080), 0x0000000a, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"@", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"MacroName", 0x0000), 0x00000009, nothing)]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"block", 0x0080), 0x00000052, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"NewlineWs", 0x0001), 0x00000009, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"quote", 0x0080), 0x00000044, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"block", 0x0080), 0x00000044, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"quote", 0x0001), 0x00000005, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"NewlineWs", 0x0001), 0x0000000d, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"=", 0x0080), 0x00000026, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x0000000f, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"=", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"call", 0x0080), 0x00000014, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x0000000e, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"(", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K",", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K")", 0x0001), 0x00000001, nothing)])]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"NewlineWs", 0x0001), 0x00000009, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"end", 0x0001), 0x00000003, nothing)])]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"NewlineWs", 0x0001), 0x00000005, nothing)]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"else", 0x0001), 0x00000004, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"block", 0x0080), 0x00000037, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"NewlineWs", 0x0001), 0x00000009, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"=", 0x0080), 0x00000029, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x0000000f, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"=", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"call", 0x0080), 0x00000017, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000011, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"(", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K",", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K")", 0x0001), 0x00000001, nothing)])]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"NewlineWs", 0x0001), 0x00000005, nothing)]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"end", 0x0001), 0x00000003, nothing)]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"NewlineWs", 0x0001), 0x00000005, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"tuple", 0x00a0), 0x0000001f, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"(", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x0000000c, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K",", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x0000000f, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K")", 0x0001), 0x00000001, nothing)]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"NewlineWs", 0x0001), 0x00000001, nothing)]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"end", 0x0001), 0x00000003, nothing)])) (call core.svec :#self# :x :y) (call core.svec))) + 1 (meta :generated (new JuliaLowering.GeneratedFunctionStub TestMod.#f_partially_generated@generator#0 SourceRef(SourceFile("function f_partially_generated(x, y)\n nongen_stuff = bothgen(x, y)\n if @generated\n quote\n maybe_gen_stuff = some_gen_stuff(x, y)\n end\n else\n maybe_gen_stuff = some_nongen_stuff(x, y)\n end\n (nongen_stuff, maybe_gen_stuff)\nend", 0, nothing, 1, [1, 38, 71, 89, 103, 154, 166, 175, 225, 233, 269]), 1, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"function", 0x0080), 0x0000010f, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"function", 0x0001), 0x00000008, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"call", 0x0080), 0x0000001b, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000015, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"(", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K",", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K")", 0x0001), 0x00000001, nothing)]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"block", 0x0080), 0x000000e8, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"NewlineWs", 0x0001), 0x00000005, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"=", 0x0080), 0x0000001c, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x0000000c, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"=", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"call", 0x0080), 0x0000000d, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000007, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"(", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K",", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K")", 0x0001), 0x00000001, nothing)])]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"NewlineWs", 0x0001), 0x00000005, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"if", 0x0080), 0x0000009d, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"if", 0x0001), 0x00000002, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"macrocall", 0x0080), 0x0000000a, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"@", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"MacroName", 0x0000), 0x00000009, nothing)]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"block", 0x0080), 0x00000052, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"NewlineWs", 0x0001), 0x00000009, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"quote", 0x0080), 0x00000044, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"block", 0x0080), 0x00000044, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"quote", 0x0001), 0x00000005, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"NewlineWs", 0x0001), 0x0000000d, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"=", 0x0080), 0x00000026, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x0000000f, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"=", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"call", 0x0080), 0x00000014, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x0000000e, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"(", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K",", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K")", 0x0001), 0x00000001, nothing)])]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"NewlineWs", 0x0001), 0x00000009, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"end", 0x0001), 0x00000003, nothing)])]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"NewlineWs", 0x0001), 0x00000005, nothing)]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"else", 0x0001), 0x00000004, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"block", 0x0080), 0x00000037, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"NewlineWs", 0x0001), 0x00000009, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"=", 0x0080), 0x00000029, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x0000000f, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"=", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"call", 0x0080), 0x00000017, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000011, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"(", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K",", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K")", 0x0001), 0x00000001, nothing)])]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"NewlineWs", 0x0001), 0x00000005, nothing)]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"end", 0x0001), 0x00000003, nothing)]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"NewlineWs", 0x0001), 0x00000005, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"tuple", 0x0180), 0x0000001f, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"(", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x0000000c, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K",", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x0000000f, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K")", 0x0001), 0x00000001, nothing)]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"NewlineWs", 0x0001), 0x00000001, nothing)]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"end", 0x0001), 0x00000003, nothing)])) (call core.svec :#self# :x :y) (call core.svec))) 2 TestMod.bothgen 3 (= slot₅/nongen_stuff (call %₂ slot₂/x slot₃/y)) 4 TestMod.some_nongen_stuff From e0aace48a34c3cdf0a3f698f6f86922597b9f01b Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Tue, 5 Aug 2025 12:00:27 +1000 Subject: [PATCH 1027/1109] Remove old versions of Julia from CI These won't work with the new `CodeInfo`, so it's pointless to run our tests on them, for now. At some point it might make sense to make the non-eval related parts of JuliaLowering work on older versions for use by tooling packages, but it's unclear this is practical. --- JuliaLowering/.github/workflows/CI.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/JuliaLowering/.github/workflows/CI.yml b/JuliaLowering/.github/workflows/CI.yml index e081dae8470c7..7ed1133e01315 100644 --- a/JuliaLowering/.github/workflows/CI.yml +++ b/JuliaLowering/.github/workflows/CI.yml @@ -19,8 +19,6 @@ jobs: fail-fast: false matrix: version: - - '1.0' - - '1.11' - 'nightly' os: - ubuntu-latest From 97434aea715bd3e14fbbd46c6d9bab7f89a82322 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Tue, 5 Aug 2025 20:51:32 +1000 Subject: [PATCH 1028/1109] Abbreviated non-MIME `show()` for `GreenNode` (JuliaLang/JuliaSyntax.jl#581) Non-MIME `show()` for `GreenNode` was using the default implementation which is extremely verbose. Here we print it as an S-expression instead. I've made a choice to show absolute position within the parent node rather than node span so that it directly relates to indices in the string it was derived from. --- JuliaSyntax/src/porcelain/green_node.jl | 19 +++++++++++++++++++ JuliaSyntax/src/porcelain/syntax_tree.jl | 2 -- JuliaSyntax/test/green_node.jl | 3 +++ 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/JuliaSyntax/src/porcelain/green_node.jl b/JuliaSyntax/src/porcelain/green_node.jl index b06dab56cbbe1..7838ff733c0bf 100644 --- a/JuliaSyntax/src/porcelain/green_node.jl +++ b/JuliaSyntax/src/porcelain/green_node.jl @@ -118,6 +118,25 @@ function Base.show(io::IO, ::MIME"text/plain", node::GreenNode, str::AbstractStr _show_green_node(io, node, "", 1, str, show_trivia) end +function _show_green_node_sexpr(io, node::GreenNode, position) + if is_leaf(node) + print(io, position, "-", position+node.span-1, "::", untokenize(head(node); unique=false)) + else + print(io, "(", untokenize(head(node); unique=false)) + p = position + for n in children(node) + print(io, ' ') + _show_green_node_sexpr(io, n, p) + p += n.span + end + print(io, ')') + end +end + +function Base.show(io::IO, node::GreenNode) + _show_green_node_sexpr(io, node, 1) +end + function GreenNode(cursor::GreenTreeCursor) chead = head(cursor) T = typeof(chead) diff --git a/JuliaSyntax/src/porcelain/syntax_tree.jl b/JuliaSyntax/src/porcelain/syntax_tree.jl index 1002919c43cb5..4ad22cf699de2 100644 --- a/JuliaSyntax/src/porcelain/syntax_tree.jl +++ b/JuliaSyntax/src/porcelain/syntax_tree.jl @@ -250,11 +250,9 @@ function _show_syntax_node_sexpr(io, node::AbstractSyntaxNode, show_kind) end else print(io, "(", untokenize(head(node))) - first = true for n in children(node) print(io, ' ') _show_syntax_node_sexpr(io, n, show_kind) - first = false end print(io, ')') end diff --git a/JuliaSyntax/test/green_node.jl b/JuliaSyntax/test/green_node.jl index cc0294e0ea335..0c3be65873c2e 100644 --- a/JuliaSyntax/test/green_node.jl +++ b/JuliaSyntax/test/green_node.jl @@ -62,4 +62,7 @@ 10:10 │ Identifier ✔ "z" 11:11 │ ) ")" """ + + @test sprint(show, parsestmt(GreenNode, "a + bb - f(ccc)")) == + "(call-i (call-i 1-1::Identifier 2-2::Whitespace-t 3-3::Identifier 4-4::Whitespace-t 5-6::Identifier) 7-7::Whitespace-t 8-8::Identifier 9-9::Whitespace-t (call 10-10::Identifier 11-11::(-t 12-14::Identifier 15-15::)-t))" end From d781112ea45abada06113fef7ba41965a23634da Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki Date: Wed, 6 Aug 2025 03:37:43 +0900 Subject: [PATCH 1029/1109] Add stacktrace capture to MacroExpansionError `MacroExpansionError` has good information about error location, but does not preserve information about the error raised by user macros beyond the error message itself. Such information is very useful for tooling like language servers, and stacktraces are particularly important. This commit adds a `stacktrace::Vector{Base.StackTraces.StackFrame}` field to `MacroExpansionError`. New macro definitions still call the `MacroExpansionError(ex::SyntaxTree, msg::AbstractString; position=:all)` constructor, which internally calls `stacktrace(...)`, so the user-facing interface remains unchanged. Additionally, `scrub_expand_macro_stacktrace` is implemented to automatically trim information about JL internal functions that are not useful to users. --- JuliaLowering/Project.toml | 3 ++ JuliaLowering/src/macro_expansion.jl | 21 +++++++++----- JuliaLowering/test/ccall_demo.jl | 5 ++-- JuliaLowering/test/macros.jl | 43 ++++++++++++++++++++++++---- JuliaLowering/test/runtests.jl | 4 +-- 5 files changed, 57 insertions(+), 19 deletions(-) diff --git a/JuliaLowering/Project.toml b/JuliaLowering/Project.toml index 72362a643b714..256ebf76965f3 100644 --- a/JuliaLowering/Project.toml +++ b/JuliaLowering/Project.toml @@ -6,6 +6,9 @@ version = "1.0.0-DEV" [deps] JuliaSyntax = "70703baa-626e-46a2-a12c-08ffd08c73b4" +[sources] +JuliaSyntax = {rev = "e02f29f", url = "https://github.com/JuliaLang/JuliaSyntax.jl"} + [compat] julia = "1" diff --git a/JuliaLowering/src/macro_expansion.jl b/JuliaLowering/src/macro_expansion.jl index 1e4ac7565ca2c..9bd6f43a2ba0e 100644 --- a/JuliaLowering/src/macro_expansion.jl +++ b/JuliaLowering/src/macro_expansion.jl @@ -79,13 +79,21 @@ struct MacroExpansionError ex::SyntaxTree msg::String position::Symbol + stacktrace::Vector{Base.StackTraces.StackFrame} end """ `position` - the source position relative to the node - may be `:begin` or `:end` or `:all` """ function MacroExpansionError(ex::SyntaxTree, msg::AbstractString; position=:all) - MacroExpansionError(nothing, ex, msg, position) + MacroExpansionError(nothing, ex, msg, position, scrub_expand_macro_stacktrace(stacktrace(backtrace()))) +end + +function scrub_expand_macro_stacktrace(stacktrace::Vector{Base.StackTraces.StackFrame}) + idx = @something findfirst(stacktrace) do stackframe::Base.StackTraces.StackFrame + stackframe.func === :expand_macro && stackframe.file === Symbol(@__FILE__) + end error("`scrub_expand_macro_stacktrace` is expected to be called from `expand_macro`") + return stacktrace[1:idx-1] end function Base.showerror(io::IO, exc::MacroExpansionError) @@ -113,7 +121,7 @@ function Base.showerror(io::IO, exc::MacroExpansionError) highlight(io, src.file, byterange, note=exc.msg) end -function eval_macro_name(ctx, ex) +function eval_macro_name(ctx::MacroExpansionContext, ex::SyntaxTree) # `ex1` might contain a nontrivial mix of scope layers so we can't just # `eval()` it, as it's already been partially lowered by this point. # Instead, we repeat the latter parts of `lower()` here. @@ -127,7 +135,7 @@ function eval_macro_name(ctx, ex) eval(mod, expr_form) end -function expand_macro(ctx, ex) +function expand_macro(ctx::MacroExpansionContext, ex::SyntaxTree) @assert kind(ex) == K"macrocall" macname = ex[1] @@ -151,9 +159,9 @@ function expand_macro(ctx, ex) if exc isa MacroExpansionError # Add context to the error. # TODO: Using rethrow() is kinda ugh. Is there a way to avoid it? - rethrow(MacroExpansionError(mctx, exc.ex, exc.msg, exc.position)) + rethrow(MacroExpansionError(mctx, exc.ex, exc.msg, exc.position, exc.stacktrace)) else - throw(MacroExpansionError(mctx, ex, "Error expanding macro", :all)) + throw(MacroExpansionError(mctx, ex, "Error expanding macro", :all, scrub_expand_macro_stacktrace(stacktrace(catch_backtrace())))) end end @@ -237,7 +245,7 @@ function expand_forms_1(ctx::MacroExpansionContext, ex::SyntaxTree) @chk numchildren(ex) == 1 # TODO: Upstream should set a general flag for detecting parenthesized # expressions so we don't need to dig into `green_tree` here. Ugh! - plain_symbol = has_flags(ex, JuliaSyntax.COLON_QUOTE) && + plain_symbol = has_flags(ex, JuliaSyntax.COLON_QUOTE) && kind(ex[1]) == K"Identifier" && (sr = sourceref(ex); sr isa SourceRef && kind(sr.green_tree[2]) != K"parens") if plain_symbol @@ -337,4 +345,3 @@ function expand_forms_1(mod::Module, ex::SyntaxTree) ctx.current_layer) return ctx2, reparent(ctx2, ex2) end - diff --git a/JuliaLowering/test/ccall_demo.jl b/JuliaLowering/test/ccall_demo.jl index 0d7b784c377eb..f5e2e987e3839 100644 --- a/JuliaLowering/test/ccall_demo.jl +++ b/JuliaLowering/test/ccall_demo.jl @@ -105,7 +105,7 @@ function ccall_macro_lower(ex, convention, func, rettype, types, args, num_varar push!(roots, argi) push!(cargs, ast":(Base.unsafe_convert($type, $argi))") end - push!(statements, + push!(statements, @ast ex ex [K"foreigncall" func rettype @@ -126,5 +126,4 @@ function var"@ccall"(ctx::JuliaLowering.MacroContext, ex) ccall_macro_lower(ex, "ccall", ccall_macro_parse(ex)...) end -end - +end # module CCall diff --git a/JuliaLowering/test/macros.jl b/JuliaLowering/test/macros.jl index 6e25c32608b51..dad958efeb4cb 100644 --- a/JuliaLowering/test/macros.jl +++ b/JuliaLowering/test/macros.jl @@ -1,6 +1,8 @@ -@testset "macros" begin +module macros -test_mod = Module() +using JuliaLowering, Test + +module test_mod end JuliaLowering.include_string(test_mod, """ module M @@ -75,7 +77,7 @@ end """) @test JuliaLowering.include_string(test_mod, """ -let +let x = "`x` from outer scope" M.@foo x end @@ -89,7 +91,7 @@ end @test !isdefined(test_mod.M, :a_global) @test JuliaLowering.include_string(test_mod, """ -begin +begin M.@set_a_global 42 M.a_global end @@ -133,13 +135,42 @@ M.@recursive 3 """) == (3, (2, (1, 0))) @test let - ex = parsestmt(SyntaxTree, "M.@outer()", filename="foo.jl") + ex = JuliaLowering.parsestmt(JuliaLowering.SyntaxTree, "M.@outer()", filename="foo.jl") expanded = JuliaLowering.macroexpand(test_mod, ex) - sourcetext.(flattened_provenance(expanded[2])) + JuliaLowering.sourcetext.(JuliaLowering.flattened_provenance(expanded[2])) end == [ "M.@outer()" "@inner" "2" ] +JuliaLowering.include_string(test_mod, """ +f_throw(x) = throw(x) +macro m_throw(x) + :(\$(f_throw(x))) +end +""") +let ret = try + JuliaLowering.include_string(test_mod, "_never_exist = @m_throw 42") + catch err + err + end + @test ret isa JuliaLowering.MacroExpansionError + @test length(ret.stacktrace) == 2 + @test ret.stacktrace[1].func === :f_throw + @test ret.stacktrace[2].func === Symbol("@m_throw") end + +include("ccall_demo.jl") +@test JuliaLowering.include_string(CCall, "@ccall strlen(\"foo\"::Cstring)::Csize_t") == 3 +let ret = try + JuliaLowering.include_string(CCall, "@ccall strlen(\"foo\"::Cstring)") + catch e + e + end + @test ret isa JuliaLowering.MacroExpansionError + @test ret.msg == "Expected a return type annotation like `::T`" + @test any(sf->sf.func===:ccall_macro_parse, ret.stacktrace) +end + +end # module macros diff --git a/JuliaLowering/test/runtests.jl b/JuliaLowering/test/runtests.jl index f8a76bae4d55a..d628fde0911bb 100644 --- a/JuliaLowering/test/runtests.jl +++ b/JuliaLowering/test/runtests.jl @@ -3,7 +3,6 @@ using Test include("utils.jl") @testset "JuliaLowering.jl" begin - include("syntax_graph.jl") include("ir_tests.jl") @@ -20,11 +19,10 @@ include("utils.jl") include("generators.jl") include("import.jl") include("loops.jl") - include("macros.jl") + @testset "macros" include("macros.jl") include("misc.jl") include("modules.jl") include("quoting.jl") include("scopes.jl") include("typedefs.jl") - end From 77090d488537b8d543ea3a52891a420cd1089018 Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki Date: Wed, 6 Aug 2025 16:05:24 +0900 Subject: [PATCH 1030/1109] Avoid capturing stacktrace, just use `rethrow` instead --- JuliaLowering/src/macro_expansion.jl | 17 +++++------------ JuliaLowering/test/macros.jl | 25 +++++++++++++------------ 2 files changed, 18 insertions(+), 24 deletions(-) diff --git a/JuliaLowering/src/macro_expansion.jl b/JuliaLowering/src/macro_expansion.jl index 9bd6f43a2ba0e..473dab2c060fe 100644 --- a/JuliaLowering/src/macro_expansion.jl +++ b/JuliaLowering/src/macro_expansion.jl @@ -79,21 +79,13 @@ struct MacroExpansionError ex::SyntaxTree msg::String position::Symbol - stacktrace::Vector{Base.StackTraces.StackFrame} end """ `position` - the source position relative to the node - may be `:begin` or `:end` or `:all` """ function MacroExpansionError(ex::SyntaxTree, msg::AbstractString; position=:all) - MacroExpansionError(nothing, ex, msg, position, scrub_expand_macro_stacktrace(stacktrace(backtrace()))) -end - -function scrub_expand_macro_stacktrace(stacktrace::Vector{Base.StackTraces.StackFrame}) - idx = @something findfirst(stacktrace) do stackframe::Base.StackTraces.StackFrame - stackframe.func === :expand_macro && stackframe.file === Symbol(@__FILE__) - end error("`scrub_expand_macro_stacktrace` is expected to be called from `expand_macro`") - return stacktrace[1:idx-1] + MacroExpansionError(nothing, ex, msg, position) end function Base.showerror(io::IO, exc::MacroExpansionError) @@ -156,12 +148,13 @@ function expand_macro(ctx::MacroExpansionContext, ex::SyntaxTree) # TODO: Allow invoking old-style macros for compat invokelatest(macfunc, macro_args...) catch exc + # TODO: Using rethrow() is kinda ugh. Is there a way to avoid it? + # NOTE: Although currently rethrow() is necessary to allow outside catchers to access full stacktrace information if exc isa MacroExpansionError # Add context to the error. - # TODO: Using rethrow() is kinda ugh. Is there a way to avoid it? - rethrow(MacroExpansionError(mctx, exc.ex, exc.msg, exc.position, exc.stacktrace)) + rethrow(MacroExpansionError(mctx, exc.ex, exc.msg, exc.position)) else - throw(MacroExpansionError(mctx, ex, "Error expanding macro", :all, scrub_expand_macro_stacktrace(stacktrace(catch_backtrace())))) + rethrow(MacroExpansionError(mctx, ex, "Error expanding macro", :all)) end end diff --git a/JuliaLowering/test/macros.jl b/JuliaLowering/test/macros.jl index dad958efeb4cb..c6c44c41ebf31 100644 --- a/JuliaLowering/test/macros.jl +++ b/JuliaLowering/test/macros.jl @@ -150,27 +150,28 @@ macro m_throw(x) :(\$(f_throw(x))) end """) -let ret = try +let (err, st) = try JuliaLowering.include_string(test_mod, "_never_exist = @m_throw 42") - catch err - err + catch e + e, stacktrace(catch_backtrace()) end - @test ret isa JuliaLowering.MacroExpansionError - @test length(ret.stacktrace) == 2 - @test ret.stacktrace[1].func === :f_throw - @test ret.stacktrace[2].func === Symbol("@m_throw") + @test err isa JuliaLowering.MacroExpansionError + # Check that `catch_backtrace` can capture the stacktrace of the macro functions + @test any(sf->sf.func===:f_throw, st) + @test any(sf->sf.func===Symbol("@m_throw"), st) end include("ccall_demo.jl") @test JuliaLowering.include_string(CCall, "@ccall strlen(\"foo\"::Cstring)::Csize_t") == 3 -let ret = try +let (err, st) = try JuliaLowering.include_string(CCall, "@ccall strlen(\"foo\"::Cstring)") catch e - e + e, stacktrace(catch_backtrace()) end - @test ret isa JuliaLowering.MacroExpansionError - @test ret.msg == "Expected a return type annotation like `::T`" - @test any(sf->sf.func===:ccall_macro_parse, ret.stacktrace) + @test err isa JuliaLowering.MacroExpansionError + @test err.msg == "Expected a return type annotation like `::T`" + # Check that `catch_backtrace` can capture the stacktrace of the macro function + @test any(sf->sf.func===:ccall_macro_parse, st) end end # module macros From 80611241dc2ab2e3d884719531c3be14f6338815 Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki <40514306+aviatesk@users.noreply.github.com> Date: Thu, 7 Aug 2025 10:43:51 +0900 Subject: [PATCH 1031/1109] Make `MacroExpansionError` subtype of `Exception` (JuliaLang/JuliaLowering.jl#26) --- JuliaLowering/src/macro_expansion.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaLowering/src/macro_expansion.jl b/JuliaLowering/src/macro_expansion.jl index 473dab2c060fe..75baba04febcd 100644 --- a/JuliaLowering/src/macro_expansion.jl +++ b/JuliaLowering/src/macro_expansion.jl @@ -74,7 +74,7 @@ function adopt_scope(ex, ctx::MacroContext) adopt_scope(ex, ctx.scope_layer.id) end -struct MacroExpansionError +struct MacroExpansionError <: Exception context::Union{Nothing,MacroContext} ex::SyntaxTree msg::String From cbb2a2fa21e2d468cd0a2b9ea9c29b494efa2e86 Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki <40514306+aviatesk@users.noreply.github.com> Date: Thu, 7 Aug 2025 16:18:35 +0900 Subject: [PATCH 1032/1109] Add error handling for macro name resolution (JuliaLang/JuliaLowering.jl#25) Wrap `eval()` call in try-catch to throw `MacroExpansionError` when macro name evaluation fails, providing clearer error messages for undefined macro names. --- Co-authored-by: Claire Foster --- JuliaLowering/src/macro_expansion.jl | 12 ++++++++---- JuliaLowering/test/macros.jl | 9 +++++++++ JuliaLowering/test/macros_ir.jl | 7 +++++++ 3 files changed, 24 insertions(+), 4 deletions(-) diff --git a/JuliaLowering/src/macro_expansion.jl b/JuliaLowering/src/macro_expansion.jl index 75baba04febcd..f5130a30446a4 100644 --- a/JuliaLowering/src/macro_expansion.jl +++ b/JuliaLowering/src/macro_expansion.jl @@ -113,7 +113,7 @@ function Base.showerror(io::IO, exc::MacroExpansionError) highlight(io, src.file, byterange, note=exc.msg) end -function eval_macro_name(ctx::MacroExpansionContext, ex::SyntaxTree) +function eval_macro_name(ctx::MacroExpansionContext, mctx::MacroContext, ex::SyntaxTree) # `ex1` might contain a nontrivial mix of scope layers so we can't just # `eval()` it, as it's already been partially lowered by this point. # Instead, we repeat the latter parts of `lower()` here. @@ -124,21 +124,25 @@ function eval_macro_name(ctx::MacroExpansionContext, ex::SyntaxTree) ctx5, ex5 = linearize_ir(ctx4, ex4) mod = ctx.current_layer.mod expr_form = to_lowered_expr(mod, ex5) - eval(mod, expr_form) + try + eval(mod, expr_form) + catch + throw(MacroExpansionError(mctx, ex, "Macro not found", :all)) + end end function expand_macro(ctx::MacroExpansionContext, ex::SyntaxTree) @assert kind(ex) == K"macrocall" macname = ex[1] - macfunc = eval_macro_name(ctx, macname) + mctx = MacroContext(ctx.graph, ex, ctx.current_layer) + macfunc = eval_macro_name(ctx, mctx, macname) # Macro call arguments may be either # * Unprocessed by the macro expansion pass # * Previously processed, but spliced into a further macro call emitted by # a macro expansion. # In either case, we need to set any unset scope layers before passing the # arguments to the macro call. - mctx = MacroContext(ctx.graph, ex, ctx.current_layer) macro_args = Any[mctx] for i in 2:numchildren(ex) push!(macro_args, set_scope_layer(ctx, ex[i], ctx.current_layer.id, false)) diff --git a/JuliaLowering/test/macros.jl b/JuliaLowering/test/macros.jl index c6c44c41ebf31..397e5d74d1945 100644 --- a/JuliaLowering/test/macros.jl +++ b/JuliaLowering/test/macros.jl @@ -161,6 +161,15 @@ let (err, st) = try @test any(sf->sf.func===Symbol("@m_throw"), st) end +let res = try + JuliaLowering.include_string(test_mod, "_never_exist = @m_not_exist 42") + catch e + e + end + @test res isa JuliaLowering.MacroExpansionError + @test res.msg == "Macro not found" +end + include("ccall_demo.jl") @test JuliaLowering.include_string(CCall, "@ccall strlen(\"foo\"::Cstring)::Csize_t") == 3 let (err, st) = try diff --git a/JuliaLowering/test/macros_ir.jl b/JuliaLowering/test/macros_ir.jl index 65de7733c69ba..eccefb44cef6f 100644 --- a/JuliaLowering/test/macros_ir.jl +++ b/JuliaLowering/test/macros_ir.jl @@ -125,3 +125,10 @@ function f() #─────┘ ── macro is only allowed in global scope end +######################################## +# Error: Macros not found +_never_exist = @m_not_exist 42 +#--------------------- +MacroExpansionError while expanding @m_not_exist in module Main.TestMod: +_never_exist = @m_not_exist 42 +# └─────────┘ ── Macro not found From 2967b36c69c46a2393ff72141af90c76dc058e54 Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki Date: Wed, 6 Aug 2025 18:35:06 +0900 Subject: [PATCH 1033/1109] Add inner err capture to MacroExpansionError Information about this inner exception is currently completely lost, so without capturing it, there is no way for outside catchers to access this information. Co-authored-by: Claire Foster --- JuliaLowering/src/macro_expansion.jl | 27 ++++++++++++++++++--------- JuliaLowering/test/macros.jl | 3 +++ 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/JuliaLowering/src/macro_expansion.jl b/JuliaLowering/src/macro_expansion.jl index f5130a30446a4..0931b9bf1902b 100644 --- a/JuliaLowering/src/macro_expansion.jl +++ b/JuliaLowering/src/macro_expansion.jl @@ -78,12 +78,19 @@ struct MacroExpansionError <: Exception context::Union{Nothing,MacroContext} ex::SyntaxTree msg::String + "The source position relative to the node - may be `:begin` or `:end` or `:all`" position::Symbol + "Error that occurred inside the macro function call (note that this may not be defined)" + err + MacroExpansionError( + context::Union{Nothing,MacroContext}, ex::SyntaxTree, msg::AbstractString, position::Symbol + ) = new(context, ex, msg, position) + MacroExpansionError( + context::Union{Nothing,MacroContext}, ex::SyntaxTree, msg::AbstractString, position::Symbol, + @nospecialize err + ) = new(context, ex, msg, position, err) end -""" -`position` - the source position relative to the node - may be `:begin` or `:end` or `:all` -""" function MacroExpansionError(ex::SyntaxTree, msg::AbstractString; position=:all) MacroExpansionError(nothing, ex, msg, position) end @@ -126,8 +133,8 @@ function eval_macro_name(ctx::MacroExpansionContext, mctx::MacroContext, ex::Syn expr_form = to_lowered_expr(mod, ex5) try eval(mod, expr_form) - catch - throw(MacroExpansionError(mctx, ex, "Macro not found", :all)) + catch err + throw(MacroExpansionError(mctx, ex, "Macro not found", :all, err)) end end @@ -152,14 +159,16 @@ function expand_macro(ctx::MacroExpansionContext, ex::SyntaxTree) # TODO: Allow invoking old-style macros for compat invokelatest(macfunc, macro_args...) catch exc - # TODO: Using rethrow() is kinda ugh. Is there a way to avoid it? - # NOTE: Although currently rethrow() is necessary to allow outside catchers to access full stacktrace information if exc isa MacroExpansionError # Add context to the error. - rethrow(MacroExpansionError(mctx, exc.ex, exc.msg, exc.position)) + newexc = isdefined(exc, :err) ? + MacroExpansionError(mctx, exc.ex, exc.msg, exc.position, exc.err) : + MacroExpansionError(mctx, exc.ex, exc.msg, exc.position) else - rethrow(MacroExpansionError(mctx, ex, "Error expanding macro", :all)) + newexc = MacroExpansionError(mctx, ex, "Error expanding macro", :all, exc) end + # TODO: We can delete this rethrow when we move to AST-based error propagation. + rethrow(newexc) end if expanded isa SyntaxTree diff --git a/JuliaLowering/test/macros.jl b/JuliaLowering/test/macros.jl index 397e5d74d1945..cdb87a3fa982b 100644 --- a/JuliaLowering/test/macros.jl +++ b/JuliaLowering/test/macros.jl @@ -156,6 +156,7 @@ let (err, st) = try e, stacktrace(catch_backtrace()) end @test err isa JuliaLowering.MacroExpansionError + @test isdefined(err, :err) # Check that `catch_backtrace` can capture the stacktrace of the macro functions @test any(sf->sf.func===:f_throw, st) @test any(sf->sf.func===Symbol("@m_throw"), st) @@ -168,6 +169,7 @@ let res = try end @test res isa JuliaLowering.MacroExpansionError @test res.msg == "Macro not found" + @test isdefined(res, :err) && res.err isa UndefVarError end include("ccall_demo.jl") @@ -179,6 +181,7 @@ let (err, st) = try end @test err isa JuliaLowering.MacroExpansionError @test err.msg == "Expected a return type annotation like `::T`" + @test !isdefined(err, :err) # Check that `catch_backtrace` can capture the stacktrace of the macro function @test any(sf->sf.func===:ccall_macro_parse, st) end From 57cd71954d4fc1d982eb16c4f2a8d51c7d02e63d Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki Date: Thu, 7 Aug 2025 16:26:57 +0900 Subject: [PATCH 1034/1109] always set `err` field of `MacroExpansionError` --- JuliaLowering/src/macro_expansion.jl | 11 +++-------- JuliaLowering/test/macros.jl | 12 ++++++------ 2 files changed, 9 insertions(+), 14 deletions(-) diff --git a/JuliaLowering/src/macro_expansion.jl b/JuliaLowering/src/macro_expansion.jl index 0931b9bf1902b..d3abadde83b12 100644 --- a/JuliaLowering/src/macro_expansion.jl +++ b/JuliaLowering/src/macro_expansion.jl @@ -80,14 +80,11 @@ struct MacroExpansionError <: Exception msg::String "The source position relative to the node - may be `:begin` or `:end` or `:all`" position::Symbol - "Error that occurred inside the macro function call (note that this may not be defined)" + "Error that occurred inside the macro function call (`nothing` if no inner exception)" err - MacroExpansionError( - context::Union{Nothing,MacroContext}, ex::SyntaxTree, msg::AbstractString, position::Symbol - ) = new(context, ex, msg, position) MacroExpansionError( context::Union{Nothing,MacroContext}, ex::SyntaxTree, msg::AbstractString, position::Symbol, - @nospecialize err + @nospecialize err = nothing ) = new(context, ex, msg, position, err) end @@ -161,9 +158,7 @@ function expand_macro(ctx::MacroExpansionContext, ex::SyntaxTree) catch exc if exc isa MacroExpansionError # Add context to the error. - newexc = isdefined(exc, :err) ? - MacroExpansionError(mctx, exc.ex, exc.msg, exc.position, exc.err) : - MacroExpansionError(mctx, exc.ex, exc.msg, exc.position) + newexc = MacroExpansionError(mctx, exc.ex, exc.msg, exc.position, exc.err) else newexc = MacroExpansionError(mctx, ex, "Error expanding macro", :all, exc) end diff --git a/JuliaLowering/test/macros.jl b/JuliaLowering/test/macros.jl index cdb87a3fa982b..9cfdef9597a45 100644 --- a/JuliaLowering/test/macros.jl +++ b/JuliaLowering/test/macros.jl @@ -156,20 +156,20 @@ let (err, st) = try e, stacktrace(catch_backtrace()) end @test err isa JuliaLowering.MacroExpansionError - @test isdefined(err, :err) + @test !isnothing(err.err) # Check that `catch_backtrace` can capture the stacktrace of the macro functions @test any(sf->sf.func===:f_throw, st) @test any(sf->sf.func===Symbol("@m_throw"), st) end -let res = try +let err = try JuliaLowering.include_string(test_mod, "_never_exist = @m_not_exist 42") catch e e end - @test res isa JuliaLowering.MacroExpansionError - @test res.msg == "Macro not found" - @test isdefined(res, :err) && res.err isa UndefVarError + @test err isa JuliaLowering.MacroExpansionError + @test err.msg == "Macro not found" + @test err.err isa UndefVarError end include("ccall_demo.jl") @@ -181,7 +181,7 @@ let (err, st) = try end @test err isa JuliaLowering.MacroExpansionError @test err.msg == "Expected a return type annotation like `::T`" - @test !isdefined(err, :err) + @test isnothing(err.err) # Check that `catch_backtrace` can capture the stacktrace of the macro function @test any(sf->sf.func===:ccall_macro_parse, st) end From 8c6ab9171e7f158bea1535be1dcd5b9c849633ab Mon Sep 17 00:00:00 2001 From: Em Chu <61633163+mlechu@users.noreply.github.com> Date: Thu, 7 Aug 2025 08:22:41 -0700 Subject: [PATCH 1035/1109] Random lowering bugfixes (JuliaLang/JuliaLowering.jl#24) * Random lowering bugfixes - `init` not provided for possibly-empty `sum()`s in desugaring causing failures for empty ctors, empty tuple destruct - linearize: break_block type error - missing desugaring when if-condition is a block - undef var `ex` in expand_arrow_arglist Co-authored-by: Claire Foster --- JuliaLowering/src/desugaring.jl | 21 ++++++------ JuliaLowering/src/linear_ir.jl | 2 +- JuliaLowering/test/branching.jl | 9 +++++ JuliaLowering/test/destructuring_ir.jl | 16 +++++++++ JuliaLowering/test/functions.jl | 25 ++++++++++++++ JuliaLowering/test/loops.jl | 13 ++++++++ JuliaLowering/test/typedefs_ir.jl | 46 ++++++++++++++++++++++++++ 7 files changed, 120 insertions(+), 12 deletions(-) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 08f42154e306d..33a9364724db8 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -414,7 +414,7 @@ function expand_tuple_destruct(ctx, ex) end if kind(rhs) == K"tuple" - num_splat = sum(kind(rh) == K"..." for rh in children(rhs)) + num_splat = sum(kind(rh) == K"..." for rh in children(rhs); init=0) if num_splat == 0 && (numchildren(lhs) - num_slurp) > numchildren(rhs) throw(LoweringError(ex, "More variables on left hand side than right hand in tuple assignment")) end @@ -1402,7 +1402,7 @@ function expand_condition(ctx, ex) if isblock # Special handling so that the rules for `&&` and `||` can be applied # to the last statement of a block - @ast ctx ex [K"block" ex[1:end-1]... test] + @ast ctx ex [K"block" map(e->expand_forms_2(ctx,e), ex[1:end-1])... test] else test end @@ -2112,7 +2112,7 @@ end # Separate decls and assignments (which require re-expansion) # local x, (y=2), z ==> local x; local z; y = 2 -function expand_decls(ctx, ex, is_const=false) +function expand_decls(ctx, ex) declkind = kind(ex) @assert declkind in KSet"local global" declmeta = get(ex, :meta, nothing) @@ -2124,7 +2124,7 @@ function expand_decls(ctx, ex, is_const=false) @chk numchildren(binding) == 2 lhs = strip_decls!(ctx, stmts, declkind, declmeta, binding[1]) push!(stmts, expand_assignment(ctx, @ast ctx binding [kb lhs binding[2]])) - elseif is_sym_decl(binding) && !is_const + elseif is_sym_decl(binding) strip_decls!(ctx, stmts, declkind, declmeta, binding) else throw(LoweringError(ex, "invalid syntax in variable declaration")) @@ -3085,7 +3085,7 @@ function expand_arrow_arglist(ctx, arglist, arrowname) if k == K"where" @ast ctx arglist [K"where" expand_arrow_arglist(ctx, arglist[1], arrowname) - argslist[2] + arglist[2] ] else # The arglist can sometimes be parsed as a block, or something else, and @@ -3094,13 +3094,12 @@ function expand_arrow_arglist(ctx, arglist, arrowname) if k == K"block" @chk numchildren(arglist) == 2 arglist = @ast ctx arglist [K"tuple" - ex[1] - [K"parameters" ex[2]] + arglist[1] + [K"parameters" arglist[2]] ] elseif k != K"tuple" - # `x::Int -> body` arglist = @ast ctx arglist [K"tuple" - ex[1] + arglist[1] ] end @ast ctx arglist [K"call" @@ -3665,7 +3664,7 @@ function _rewrite_ctor_new_calls(ctx, ex, struct_name, global_struct_name, ctor_ full_struct_type = if kind(ex[1]) == K"curly" # new{A,B}(...) new_type_params = ex[1][2:end] - n_type_splat = sum(kind(t) == K"..." for t in new_type_params) + n_type_splat = sum(kind(t) == K"..." for t in new_type_params; init=0) n_type_nonsplat = length(new_type_params) - n_type_splat if n_type_splat == 0 && n_type_nonsplat < length(struct_typevars) throw(LoweringError(ex[1], "too few type parameters specified in `new{...}`")) @@ -3685,7 +3684,7 @@ function _rewrite_ctor_new_calls(ctx, ex, struct_name, global_struct_name, ctor_ end end new_args = ex[2:end] - n_splat = sum(kind(t) == K"..." for t in new_args) + n_splat = sum(kind(t) == K"..." for t in new_args; init=0) n_nonsplat = length(new_args) - n_splat n_fields = length(field_types) function throw_n_fields_error(desc) diff --git a/JuliaLowering/src/linear_ir.jl b/JuliaLowering/src/linear_ir.jl index b83bf099010ff..95700c1e85bd6 100644 --- a/JuliaLowering/src/linear_ir.jl +++ b/JuliaLowering/src/linear_ir.jl @@ -703,7 +703,7 @@ function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) if isnothing(outer_target) delete!(ctx.break_targets, name) else - ctx.break_targets = outer_target + ctx.break_targets[name] = outer_target end emit(ctx, end_label) if needs_value diff --git a/JuliaLowering/test/branching.jl b/JuliaLowering/test/branching.jl index 09996bd4c1e7c..e0844b8937268 100644 --- a/JuliaLowering/test/branching.jl +++ b/JuliaLowering/test/branching.jl @@ -221,6 +221,15 @@ end """) === 3 end +#------------------------------------------------------------------------------- +# Block condition +@test JuliaLowering.include_string(test_mod, """ +let a = true + if begin; x = 2; a; end + x + end +end +""") === 2 #------------------------------------------------------------------------------- @testset "`&&` and `||` chains" begin diff --git a/JuliaLowering/test/destructuring_ir.jl b/JuliaLowering/test/destructuring_ir.jl index f6b28b9a3f66d..cd4e3a6ef1e67 100644 --- a/JuliaLowering/test/destructuring_ir.jl +++ b/JuliaLowering/test/destructuring_ir.jl @@ -280,6 +280,22 @@ end 8 (call core.tuple %₁ %₂ %₃) 9 (return %₈) +######################################## +# Error: Destructuring with tuple elimination and too few RHS elements +(x,) = () +#--------------------- +LoweringError: +(x,) = () +└───────┘ ── More variables on left hand side than right hand in tuple assignment + +######################################## +# Error: Destructuring with tuple elimination, slurping, and too few RHS elements +(x,y,ys...) = (1,) +#--------------------- +LoweringError: +(x,y,ys...) = (1,) +└────────────────┘ ── More variables on left hand side than right hand in tuple assignment + ######################################## # Destructuring with tuple elimination but not in value position never creates # the tuple diff --git a/JuliaLowering/test/functions.jl b/JuliaLowering/test/functions.jl index 4cb4221304c94..634bb13e260f9 100644 --- a/JuliaLowering/test/functions.jl +++ b/JuliaLowering/test/functions.jl @@ -56,6 +56,31 @@ end 2^4 """) == 16 +#------------------------------------------------------------------------------- +# Arrow syntax +@test JuliaLowering.include_string(test_mod, """ +let + f = ((x::T, y::T) where T) -> x + y + f(1, 2) +end +""") === 3 + +@test JuliaLowering.include_string(test_mod, """ +let + f = ((x::T; y=2) where T) -> x + y + f(1) +end +""") === 3 + +# Passes desugaring, but T is detected as unused and throws an error. +# Is it clear whether this should be `f(x::T) where T` or `f(x::T where T)`? +@test_broken JuliaLowering.include_string(test_mod, """ +let + f = ((x::T) where T) -> x + f(1) +end +""") === 1 + #------------------------------------------------------------------------------- # Function definitions @test JuliaLowering.include_string(test_mod, """ diff --git a/JuliaLowering/test/loops.jl b/JuliaLowering/test/loops.jl index 24af5ba1070e4..6f63b28cc2b0e 100644 --- a/JuliaLowering/test/loops.jl +++ b/JuliaLowering/test/loops.jl @@ -95,6 +95,19 @@ let a end """) == [1, 2] +# Break from inner nested loop +@test JuliaLowering.include_string(test_mod, """ +let + a = [] + for i in 1:2 + for j in 3:4 + push!(a, (i, j)) + j == 6 && break + end + end + a +end +""") == [(1, 3), (1, 4), (2, 3), (2, 4)] # continue @test JuliaLowering.include_string(test_mod, """ diff --git a/JuliaLowering/test/typedefs_ir.jl b/JuliaLowering/test/typedefs_ir.jl index 0785a8f21ce93..1f7395ece1f8d 100644 --- a/JuliaLowering/test/typedefs_ir.jl +++ b/JuliaLowering/test/typedefs_ir.jl @@ -409,6 +409,52 @@ end 34 latestworld 35 (return core.nothing) +######################################## +# Empty struct with empty ctor +struct X + X() = new() +end +#--------------------- +1 (global TestMod.X) +2 latestworld +3 (call core.svec) +4 (call core.svec) +5 (call core.svec) +6 (call core._structtype TestMod :X %₃ %₄ %₅ false 0) +7 (= slot₁/X %₆) +8 (call core._setsuper! %₆ core.Any) +9 (call core.isdefinedglobal TestMod :X false) +10 (gotoifnot %₉ label₁₄) +11 TestMod.X +12 (= slot₂/if_val (call core._equiv_typedef %₁₁ %₆)) +13 (goto label₁₅) +14 (= slot₂/if_val false) +15 slot₂/if_val +16 (gotoifnot %₁₅ label₂₀) +17 TestMod.X +18 (= slot₃/if_val %₁₇) +19 (goto label₂₁) +20 (= slot₃/if_val false) +21 slot₃/if_val +22 (gotoifnot %₁₅ label₂₃) +23 (call core.svec) +24 (call core._typebody! %₂₁ %₆ %₂₃) +25 (constdecl TestMod.X %₂₄) +26 latestworld +27 TestMod.X +28 (call core.apply_type core.Type %₂₇) +29 (call core.svec %₂₈) +30 (call core.svec) +31 SourceLocation::2:5 +32 (call core.svec %₂₉ %₃₀ %₃₁) +33 --- method core.nothing %₃₂ + slots: [slot₁/#ctor-self#] + 1 slot₁/#ctor-self# + 2 (new %₁) + 3 (return %₂) +34 latestworld +35 (return core.nothing) + ######################################## # Basic struct struct X From 580a5ddaf8b2720e74d263b6fc78a7eb495e7f58 Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki <40514306+aviatesk@users.noreply.github.com> Date: Mon, 11 Aug 2025 05:20:31 +0900 Subject: [PATCH 1036/1109] use `Base.IncludeInto` for `_include` (JuliaLang/JuliaLowering.jl#31) This does not change any behavior on JL side, but this is necessary to allow JET to analyze JL (although we still need to add a JET-specific handling for `JS.register_kinds!` to make JL actually analyzable by JET). --- JuliaLowering/src/JuliaLowering.jl | 2 +- JuliaLowering/src/kinds.jl | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/JuliaLowering/src/JuliaLowering.jl b/JuliaLowering/src/JuliaLowering.jl index 315cd9eabac58..717a3bbe667ae 100644 --- a/JuliaLowering/src/JuliaLowering.jl +++ b/JuliaLowering/src/JuliaLowering.jl @@ -4,7 +4,7 @@ baremodule JuliaLowering using Base # We define a separate _include() for use in this module to avoid mixing method # tables with the public `JuliaLowering.include()` API -_include(path::AbstractString) = Base.include(JuliaLowering, path) +const _include = Base.IncludeInto(JuliaLowering) using Core: eval using JuliaSyntax diff --git a/JuliaLowering/src/kinds.jl b/JuliaLowering/src/kinds.jl index 741307ba94ba5..45138969b2de9 100644 --- a/JuliaLowering/src/kinds.jl +++ b/JuliaLowering/src/kinds.jl @@ -1,4 +1,3 @@ - # The following kinds are used in intermediate forms by lowering but are not # part of the surface syntax function _register_kinds() From c45a67c415954316c28463fef0261fd3e7ff178e Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Tue, 12 Aug 2025 22:27:59 +1000 Subject: [PATCH 1037/1109] Revert to the use of StrMacroName/CmdMacroName kinds (JuliaLang/JuliaSyntax.jl#583) Trying out `macro_name_str` / `macro_name_cmd` in JuliaLowering it turns out to be inconvenient to work with identifiers which change their meaning due to being nested inside another construct. This change revert to the previous use of StrMacroName / CmdMacroName identifier kinds. macro_name is left as-is because it faithfully represents the position of the `@`. This isn't a complete reversion to the previous JuliaSyntax behavior. Previously, SyntaxNode would contain the symbol `@x_str` for the string macro `x` in `x"hi"` despite having kind set to `StrMacroName`. However, appending the `_str` is best seen as a symbolic lowering (/name mangling) step which isn't reflected in the source code and shouldn't be the business of the parser or parser-related tools. Thus, in the code here we defer this name mangling to the `Expr` conversion step instead, and introduce `lower_identifier_name()` as a standard way to do this conversion. To go with these slightly modified semantics and to mimic the `_str` name mangling, `StrMacroName` replaces the previous `StringMacroName`. --- JuliaSyntax/src/integration/expr.jl | 37 ++++++++----------- JuliaSyntax/src/julia/kinds.jl | 10 +++-- JuliaSyntax/src/julia/literal_parsing.jl | 32 +++++++++++++++- JuliaSyntax/src/julia/parser.jl | 38 ++++++++++--------- JuliaSyntax/src/porcelain/syntax_tree.jl | 9 ++++- JuliaSyntax/test/parser.jl | 47 +++++++++++++----------- 6 files changed, 106 insertions(+), 67 deletions(-) diff --git a/JuliaSyntax/src/integration/expr.jl b/JuliaSyntax/src/integration/expr.jl index c4fd82220f00e..da9c67c99e4ed 100644 --- a/JuliaSyntax/src/integration/expr.jl +++ b/JuliaSyntax/src/integration/expr.jl @@ -246,6 +246,8 @@ function node_to_expr(cursor, source, txtbuf::Vector{UInt8}, txtbuf_offset::UInt val isa UInt128 ? Symbol("@uint128_str") : Symbol("@big_str") return Expr(:macrocall, GlobalRef(Core, macname), nothing, str) + elseif is_identifier(k) + return lower_identifier_name(val, k) else return val end @@ -294,27 +296,18 @@ function node_to_expr(cursor, source, txtbuf::Vector{UInt8}, txtbuf_offset::UInt nodehead, source) end -function adjust_macro_name!(retexpr::Union{Expr, Symbol}, k::Kind) - if !(retexpr isa Symbol) +function adjust_macro_name!(retexpr::Union{Expr, Symbol}) + if retexpr isa Symbol + return lower_identifier_name(retexpr, K"macro_name") + else retexpr::Expr - # can happen for incomplete or errors - (length(retexpr.args) < 2 || retexpr.head != :(.)) && return retexpr - arg2 = retexpr.args[2] - isa(arg2, QuoteNode) || return retexpr - retexpr.args[2] = QuoteNode(adjust_macro_name!(arg2.value, k)) - return retexpr - end - if k == K"macro_name" - if retexpr === Symbol(".") - return Symbol("@__dot__") - else - return Symbol("@$retexpr") + if length(retexpr.args) == 2 && retexpr.head == :(.) + arg2 = retexpr.args[2] + if isa(arg2, QuoteNode) && arg2.value isa Symbol + retexpr.args[2] = QuoteNode(lower_identifier_name(arg2.value, K"macro_name")) + end end - elseif k == K"macro_name_cmd" - return Symbol("@$(retexpr)_cmd") - else - @assert k == K"macro_name_str" - return Symbol("@$(retexpr)_str") + return retexpr end end @@ -334,8 +327,8 @@ end # However, errors can add additional errors tokens which we represent # as e.g. `Expr(:var, ..., Expr(:error))`. return retexpr.args[1] - elseif k in KSet"macro_name macro_name_cmd macro_name_str" - return adjust_macro_name!(retexpr.args[1], k) + elseif k == K"macro_name" + return adjust_macro_name!(retexpr.args[1]) elseif k == K"?" retexpr.head = :if elseif k == K"op=" && length(args) == 3 @@ -355,7 +348,7 @@ end elseif k == K"macrocall" if length(args) >= 2 a2 = args[2] - if @isexpr(a2, :macrocall) && kind(firstchildhead) == K"macro_name_cmd" + if @isexpr(a2, :macrocall) && kind(firstchildhead) == K"CmdMacroName" # Fix up for custom cmd macros like foo`x` args[2] = a2.args[3] end diff --git a/JuliaSyntax/src/julia/kinds.jl b/JuliaSyntax/src/julia/kinds.jl index 19a00eb2bee8e..96d78ad729db7 100644 --- a/JuliaSyntax/src/julia/kinds.jl +++ b/JuliaSyntax/src/julia/kinds.jl @@ -194,6 +194,10 @@ register_kinds!(JuliaSyntax, 0, [ "BEGIN_IDENTIFIERS" "Identifier" "Placeholder" # Used for empty catch variables, and all-underscore identifiers in lowering + # String and command macro names are modeled as a special kind of + # identifier as they need to be mangled before lookup. + "StrMacroName" + "CmdMacroName" "END_IDENTIFIERS" "BEGIN_KEYWORDS" @@ -1039,10 +1043,7 @@ register_kinds!(JuliaSyntax, 0, [ "iteration" "comprehension" "typed_comprehension" - # Macro names "macro_name" - "macro_name_cmd" - "macro_name_str" # Container for a single statement/atom plus any trivia and errors "wrapper" "END_SYNTAX_KINDS" @@ -1106,6 +1107,9 @@ const _nonunique_kind_names = Set([ K"String" K"Char" K"CmdString" + + K"StrMacroName" + K"CmdMacroName" ]) """ diff --git a/JuliaSyntax/src/julia/literal_parsing.jl b/JuliaSyntax/src/julia/literal_parsing.jl index 0d716e39d4081..1db36d7f8e44a 100644 --- a/JuliaSyntax/src/julia/literal_parsing.jl +++ b/JuliaSyntax/src/julia/literal_parsing.jl @@ -416,7 +416,7 @@ function parse_julia_literal(txtbuf::Vector{UInt8}, head::SyntaxHead, srcrange) parse_int_literal(val_str) elseif k in KSet"BinInt OctInt HexInt" parse_uint_literal(val_str, k) - elseif k == K"Identifier" || k == K"Placeholder" + elseif is_identifier(k) if has_flags(head, RAW_STRING_FLAG) io = IOBuffer() unescape_raw_string(io, txtbuf, first(srcrange), last(srcrange)+1, false) @@ -442,3 +442,33 @@ function parse_julia_literal(txtbuf::Vector{UInt8}, head::SyntaxHead, srcrange) end end +""" + lower_identifier_name(name, kind) + +Lower a Julia identifier `name` of given `kind` to the name used by the Julia +runtime. (In particular, this handles the name mangling of macros.) + +This is a lowering (rather than parsing) step, but is needed for `Expr` +conversion and is also used for pretty printing. +""" +function lower_identifier_name(name::AbstractString, k::Kind) + # Replicate eager lowering done by the flisp parser + if k == K"macro_name" + name == "." ? "@__dot__" : "@$name" + elseif k == K"StrMacroName" + "@$(name)_str" + elseif k == K"CmdMacroName" + "@$(name)_cmd" + else + name + end +end + +function lower_identifier_name(name::Symbol, k::Kind) + if k == K"Identifier" + name # avoid unnecessary conversion + else + Symbol(lower_identifier_name(string(name), k)) + end +end + diff --git a/JuliaSyntax/src/julia/parser.jl b/JuliaSyntax/src/julia/parser.jl index 2abed160181ab..a2ce4209a8c2a 100644 --- a/JuliaSyntax/src/julia/parser.jl +++ b/JuliaSyntax/src/julia/parser.jl @@ -1519,7 +1519,8 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) macro_atname_range = nothing # $A.@x ==> (macrocall (. ($ A) (macro_name x))) maybe_strmac = true - last_identifier_orig_kind = peek_behind(ps).orig_kind + last_identifier_pos = peek_behind_pos(ps) + last_identifier_orig_kind = peek_behind(ps, last_identifier_pos).orig_kind while true maybe_strmac_1 = false t = peek_token(ps) @@ -1577,7 +1578,6 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) # f (a) ==> (call f (error-t) a) processing_macro_name = maybe_parsed_macro_name( ps, processing_macro_name, mark) - processing_macro_name = false bump_disallowed_space(ps) bump(ps, TRIVIA_FLAG) opts = parse_call_arglist(ps, K")") @@ -1714,7 +1714,8 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) bump(ps, TRIVIA_FLAG) end parse_macro_name(ps) - last_identifier_orig_kind = peek_behind(ps).orig_kind + last_identifier_pos = peek_behind_pos(ps) + last_identifier_orig_kind = peek_behind(ps, last_identifier_pos).orig_kind !is_macrocall && emit(ps, m, K"macro_name") macro_atname_range = (m, position(ps)) is_macrocall = true @@ -1747,7 +1748,8 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) emit(ps, macro_name_mark, K"macro_name") misplaced_atsym_mark = (aterror_mark, position(ps)) end - last_identifier_orig_kind = peek_behind(ps).orig_kind + last_identifier_pos = peek_behind_pos(ps) + last_identifier_orig_kind = peek_behind(ps, last_identifier_pos).orig_kind maybe_strmac_1 = true emit(ps, mark, K".") end @@ -1784,29 +1786,29 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) origk = last_identifier_orig_kind; origk == K"Identifier" || is_contextual_keyword(origk) || is_word_operator(origk)) # Custom string and command literals - # x"str" ==> (macrocall (macro_name_str x) (string-r "str")) - # x`str` ==> (macrocall (macro_name_cmd x) (cmdstring-r "str")) - # x"" ==> (macrocall (macro_name_str x) (string-r "")) - # x`` ==> (macrocall (macro_name_cmd x) (cmdstring-r "")) + # x"str" ==> (macrocall @x_str (string-r "str")) + # x`str` ==> (macrocall @x_cmd (cmdstring-r "str")) + # x"" ==> (macrocall @x_str (string-r "")) + # x`` ==> (macrocall @x_cmd (cmdstring-r "")) # Triple quoted processing for custom strings - # r"""\nx""" ==> (macrocall (macro_name_str r) (string-s-r "x")) - # r"""\n x\n y""" ==> (macrocall (macro_name_str r) (string-s-r "x\n" "y")) - # r"""\n x\\n y""" ==> (macrocall (macro_name_str r) (string-s-r "x\\\n" "y")) + # r"""\nx""" ==> (macrocall @r_str (string-s-r "x")) + # r"""\n x\n y""" ==> (macrocall @r_str (string-s-r "x\n" "y")) + # r"""\n x\\n y""" ==> (macrocall @r_str (string-s-r "x\\\n" "y")) # # Use a special token kind for string and cmd macro names so the # names can be expanded later as necessary. - outk = is_string_delim(k) ? K"macro_name_str" : K"macro_name_cmd" - emit(ps, mark, outk) + name_kind = is_string_delim(k) ? K"StrMacroName" : K"CmdMacroName" + reset_node!(ps, last_identifier_pos, kind=name_kind) parse_string(ps, true) t = peek_token(ps) k = kind(t) if !preceding_whitespace(t) && is_string_macro_suffix(k) # Macro suffixes can include keywords and numbers - # x"s"y ==> (macrocall (macro_name_str x) (string-r "s") "y") - # x"s"end ==> (macrocall (macro_name_str x) (string-r "s") "end") - # x"s"in ==> (macrocall (macro_name_str x) (string-r "s") "in") - # x"s"2 ==> (macrocall (macro_name_str x) (string-r "s") 2) - # x"s"10.0 ==> (macrocall (macro_name_str x) (string-r "s") 10.0) + # x"s"y ==> (macrocall @x_str (string-r "s") "y") + # x"s"end ==> (macrocall @x_str (string-r "s") "end") + # x"s"in ==> (macrocall @x_str (string-r "s") "in") + # x"s"2 ==> (macrocall @x_str (string-r "s") 2) + # x"s"10.0 ==> (macrocall @x_str (string-r "s") 10.0) suffix_kind = (k == K"Identifier" || is_keyword(k) || is_word_operator(k)) ? K"String" : k bump(ps, remap_kind=suffix_kind) diff --git a/JuliaSyntax/src/porcelain/syntax_tree.jl b/JuliaSyntax/src/porcelain/syntax_tree.jl index 4ad22cf699de2..ad08b25e6a1df 100644 --- a/JuliaSyntax/src/porcelain/syntax_tree.jl +++ b/JuliaSyntax/src/porcelain/syntax_tree.jl @@ -198,7 +198,7 @@ sourcefile(node::AbstractSyntaxNode) = node.source function leaf_string(ex) if !is_leaf(ex) - throw(ArgumentError("_value_string should be used for leaf nodes only")) + throw(ArgumentError("leaf_string should be used for leaf nodes only")) end k = kind(ex) value = ex.val @@ -243,7 +243,12 @@ function _show_syntax_node_sexpr(io, node::AbstractSyntaxNode, show_kind) if is_error(node) print(io, "(", untokenize(head(node)), ")") else - print(io, leaf_string(node)) + str = leaf_string(node) + k = kind(node) + if is_identifier(k) && !show_kind + str = lower_identifier_name(str, k) + end + print(io, str) if show_kind print(io, "::", kind(node)) end diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 64ecc8ea554e2..a6ee4b62f1c8a 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -463,28 +463,30 @@ tests = [ "S{a,b}" => "(curly S a b)" "T{y for x = xs; a}" => "(curly T (generator y (iteration (in x xs))) (parameters a))" # String macros - "x\"str\"" => """(macrocall (macro_name_str x) (string-r "str"))""" - "x`str`" => """(macrocall (macro_name_cmd x) (cmdstring-r "str"))""" - "x\"\"" => """(macrocall (macro_name_str x) (string-r ""))""" - "x``" => """(macrocall (macro_name_cmd x) (cmdstring-r ""))""" - "in\"str\"" => """(macrocall (macro_name_str in) (string-r "str"))""" - "outer\"str\"" => """(macrocall (macro_name_str outer) (string-r "str"))""" + "x\"str\"" => """(macrocall @x_str (string-r "str"))""" + "x`str`" => """(macrocall @x_cmd (cmdstring-r "str"))""" + "x\"\"" => """(macrocall @x_str (string-r ""))""" + "x``" => """(macrocall @x_cmd (cmdstring-r ""))""" + "in\"str\"" => """(macrocall @in_str (string-r "str"))""" + "outer\"str\"" => """(macrocall @outer_str (string-r "str"))""" + "A.x\"str\"" => """(macrocall (. A @x_str) (string-r "str"))""" + "A.x`str`" => """(macrocall (. A @x_cmd) (cmdstring-r "str"))""" # Triple quoted processing for custom strings - "r\"\"\"\nx\"\"\"" => raw"""(macrocall (macro_name_str r) (string-s-r "x"))""" - "r\"\"\"\n x\n y\"\"\"" => raw"""(macrocall (macro_name_str r) (string-s-r "x\n" "y"))""" - "r\"\"\"\n x\\\n y\"\"\"" => raw"""(macrocall (macro_name_str r) (string-s-r "x\\\n" "y"))""" + "r\"\"\"\nx\"\"\"" => raw"""(macrocall @r_str (string-s-r "x"))""" + "r\"\"\"\n x\n y\"\"\"" => raw"""(macrocall @r_str (string-s-r "x\n" "y"))""" + "r\"\"\"\n x\\\n y\"\"\"" => raw"""(macrocall @r_str (string-s-r "x\\\n" "y"))""" # Macro suffixes can include keywords and numbers - "x\"s\"y" => """(macrocall (macro_name_str x) (string-r "s") "y")""" - "x\"s\"end" => """(macrocall (macro_name_str x) (string-r "s") "end")""" - "x\"s\"in" => """(macrocall (macro_name_str x) (string-r "s") "in")""" - "x\"s\"2" => """(macrocall (macro_name_str x) (string-r "s") 2)""" - "x\"s\"10.0" => """(macrocall (macro_name_str x) (string-r "s") 10.0)""" + "x\"s\"y" => """(macrocall @x_str (string-r "s") "y")""" + "x\"s\"end" => """(macrocall @x_str (string-r "s") "end")""" + "x\"s\"in" => """(macrocall @x_str (string-r "s") "in")""" + "x\"s\"2" => """(macrocall @x_str (string-r "s") 2)""" + "x\"s\"10.0" => """(macrocall @x_str (string-r "s") 10.0)""" # Cmd macro suffixes - "x`s`y" => """(macrocall (macro_name_cmd x) (cmdstring-r "s") "y")""" - "x`s`end" => """(macrocall (macro_name_cmd x) (cmdstring-r "s") "end")""" - "x`s`in" => """(macrocall (macro_name_cmd x) (cmdstring-r "s") "in")""" - "x`s`2" => """(macrocall (macro_name_cmd x) (cmdstring-r "s") 2)""" - "x`s`10.0" => """(macrocall (macro_name_cmd x) (cmdstring-r "s") 10.0)""" + "x`s`y" => """(macrocall @x_cmd (cmdstring-r "s") "y")""" + "x`s`end" => """(macrocall @x_cmd (cmdstring-r "s") "end")""" + "x`s`in" => """(macrocall @x_cmd (cmdstring-r "s") "in")""" + "x`s`2" => """(macrocall @x_cmd (cmdstring-r "s") 2)""" + "x`s`10.0" => """(macrocall @x_cmd (cmdstring-r "s") 10.0)""" ], JuliaSyntax.parse_resword => [ # In normal_context @@ -1167,6 +1169,9 @@ parsestmt_with_kind_tests = [ "a >>= b" => "(op= a::Identifier >>::Identifier b::Identifier)" ":+=" => "(quote-: +=::op=)" ":.+=" => "(quote-: (. +=::op=))" + # str/cmd macro name kinds + "x\"str\"" => """(macrocall x::StrMacroName (string-r "str"::String))""" + "x`str`" => """(macrocall x::CmdMacroName (cmdstring-r "str"::CmdString))""" ] @testset "parser `Kind` remapping" begin @@ -1195,8 +1200,8 @@ end # ɛµ normalizes to εμ @test parse_to_sexpr_str(JuliaSyntax.parse_eq, "\u025B\u00B5()") == "(call \u03B5\u03BC)" @test parse_to_sexpr_str(JuliaSyntax.parse_eq, "@\u025B\u00B5") == "(macrocall (macro_name \u03B5\u03BC))" - @test parse_to_sexpr_str(JuliaSyntax.parse_eq, "\u025B\u00B5\"\"") == "(macrocall (macro_name_str \u03B5\u03BC) (string-r \"\"))" - @test parse_to_sexpr_str(JuliaSyntax.parse_eq, "\u025B\u00B5``") == "(macrocall (macro_name_cmd \u03B5\u03BC) (cmdstring-r \"\"))" + @test parse_to_sexpr_str(JuliaSyntax.parse_eq, "\u025B\u00B5\"\"") == "(macrocall @\u03B5\u03BC_str (string-r \"\"))" + @test parse_to_sexpr_str(JuliaSyntax.parse_eq, "\u025B\u00B5``") == "(macrocall @\u03B5\u03BC_cmd (cmdstring-r \"\"))" # · and · normalize to ⋅ @test parse_to_sexpr_str(JuliaSyntax.parse_eq, "a \u00B7 b") == "(call-i a \u22C5 b)" @test parse_to_sexpr_str(JuliaSyntax.parse_eq, "a \u0387 b") == "(call-i a \u22C5 b)" From 17c169430371223df9371566a7ce7da5fafc0d73 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Fri, 15 Aug 2025 13:54:08 +1000 Subject: [PATCH 1038/1109] Fixes for opaque closure method lowering (JuliaLang/JuliaLowering.jl#34) opaque_closure_method has special non-evaluated semantics for its argument list, so we need some special non-quoted conversion here for the functionloc argument and the name argument when it's set to the global ref Core.nothing. To fix the globalref, I've chosen to translate `"core"::K"nothing"` into a literal `nothing` in general - this is consistent with how ast.c translates the special flisp `(null)` form. --- JuliaLowering/src/eval.jl | 14 +++++++++++--- JuliaLowering/test/closures.jl | 6 ++++++ JuliaLowering/test/misc.jl | 7 ++----- JuliaLowering/test/utils.jl | 5 +++++ 4 files changed, 24 insertions(+), 8 deletions(-) diff --git a/JuliaLowering/src/eval.jl b/JuliaLowering/src/eval.jl index 98722d6265059..a7896bf0dbcac 100644 --- a/JuliaLowering/src/eval.jl +++ b/JuliaLowering/src/eval.jl @@ -232,6 +232,10 @@ function to_lowered_expr(mod, ex, ssa_offset=0) if name == "cglobal" # cglobal isn't a true name within core - instead it's a builtin :cglobal + elseif name == "nothing" + # Translate Core.nothing into literal `nothing`s (flisp uses a + # special form (null) for this during desugaring, etc) + nothing else GlobalRef(Core, Symbol(name)) end @@ -286,9 +290,13 @@ function to_lowered_expr(mod, ex, ssa_offset=0) Expr(:method, c1, cs[2:end]...) elseif k == K"newvar" Core.NewvarNode(to_lowered_expr(mod, ex[1], ssa_offset)) - elseif k == K"new_opaque_closure" + elseif k == K"opaque_closure_method" args = map(e->to_lowered_expr(mod, e, ssa_offset), children(ex)) - Expr(:new_opaque_closure, args...) + # opaque_closure_method has special non-evaluated semantics for the + # `functionloc` line number node so we need to undo a level of quoting + @assert args[4] isa QuoteNode + args[4] = args[4].value + Expr(:opaque_closure_method, args...) elseif k == K"meta" args = Any[to_lowered_expr(mod, e, ssa_offset) for e in children(ex)] # Unpack K"Symbol" QuoteNode as `Expr(:meta)` requires an identifier here. @@ -317,7 +325,7 @@ function to_lowered_expr(mod, ex, ssa_offset=0) k == K"gc_preserve_end" ? :gc_preserve_end : k == K"foreigncall" ? :foreigncall : k == K"cfunction" ? :cfunction : - k == K"opaque_closure_method" ? :opaque_closure_method : + k == K"new_opaque_closure" ? :new_opaque_closure : nothing if isnothing(head) throw(LoweringError(ex, "Unhandled form for kind $k")) diff --git a/JuliaLowering/test/closures.jl b/JuliaLowering/test/closures.jl index 27a5afa610818..3999b1a2c486a 100644 --- a/JuliaLowering/test/closures.jl +++ b/JuliaLowering/test/closures.jl @@ -235,4 +235,10 @@ let end """) == (3,4,5) +# opaque_closure_method internals +method_ex = lower_str(test_mod, "Base.Experimental.@opaque x -> 2x").args[1].code[3] +@test method_ex.head === :opaque_closure_method +@test method_ex.args[1] === nothing +@test method_ex.args[4] isa LineNumberNode + end diff --git a/JuliaLowering/test/misc.jl b/JuliaLowering/test/misc.jl index c391f0cce16b6..a56923fc2d010 100644 --- a/JuliaLowering/test/misc.jl +++ b/JuliaLowering/test/misc.jl @@ -47,11 +47,8 @@ cf_float = JuliaLowering.include_string(test_mod, """ @test @ccall($cf_float(2::Float64, 3::Float64)::Float64) == 32.0 @testset "CodeInfo: has_image_globalref" begin - elower(mod, s) = JuliaLowering.to_lowered_expr( - mod, JuliaLowering.lower( - mod, parsestmt(JuliaLowering.SyntaxTree, s))) - @test elower(test_mod, "x + y").args[1].has_image_globalref === false - @test elower(Main, "x + y").args[1].has_image_globalref === true + @test lower_str(test_mod, "x + y").args[1].has_image_globalref === false + @test lower_str(Main, "x + y").args[1].has_image_globalref === true end end diff --git a/JuliaLowering/test/utils.jl b/JuliaLowering/test/utils.jl index 817a4de660b65..0712cfe2c3af8 100644 --- a/JuliaLowering/test/utils.jl +++ b/JuliaLowering/test/utils.jl @@ -262,6 +262,11 @@ function watch_ir_tests(dir, delay=0.5) end end +function lower_str(mod, s) + ex = parsestmt(JuliaLowering.SyntaxTree, s) + return JuliaLowering.to_lowered_expr(mod, JuliaLowering.lower(mod, ex)) +end + # See Julia Base tests in "test/docs.jl" function docstrings_equal(d1, d2; debug=true) io1 = IOBuffer() From 42bb2bf9cbd7ed4c3d27bf5518ce129044105e32 Mon Sep 17 00:00:00 2001 From: Em Chu <61633163+mlechu@users.noreply.github.com> Date: Fri, 15 Aug 2025 16:28:45 -0700 Subject: [PATCH 1039/1109] Low-provenance `Expr`->`SyntaxTree` conversion (JuliaLang/JuliaLowering.jl#22) --------- Co-authored-by: Claire Foster --- JuliaLowering/src/JuliaLowering.jl | 1 + JuliaLowering/src/ast.jl | 7 +- JuliaLowering/src/compat.jl | 510 +++++++++++++++++++++++++++++ JuliaLowering/src/kinds.jl | 3 + JuliaLowering/src/syntax_graph.jl | 11 +- JuliaLowering/test/compat.jl | 390 ++++++++++++++++++++++ JuliaLowering/test/desugaring.jl | 2 +- JuliaLowering/test/quoting.jl | 4 +- JuliaLowering/test/runtests.jl | 1 + JuliaLowering/test/utils.jl | 6 +- 10 files changed, 923 insertions(+), 12 deletions(-) create mode 100644 JuliaLowering/src/compat.jl create mode 100644 JuliaLowering/test/compat.jl diff --git a/JuliaLowering/src/JuliaLowering.jl b/JuliaLowering/src/JuliaLowering.jl index 717a3bbe667ae..b24c5687b3a2c 100644 --- a/JuliaLowering/src/JuliaLowering.jl +++ b/JuliaLowering/src/JuliaLowering.jl @@ -32,6 +32,7 @@ _include("runtime.jl") _include("syntax_macros.jl") _include("eval.jl") +_include("compat.jl") function __init__() _register_kinds() diff --git a/JuliaLowering/src/ast.jl b/JuliaLowering/src/ast.jl index 1b718e14409ba..16239bf734538 100644 --- a/JuliaLowering/src/ast.jl +++ b/JuliaLowering/src/ast.jl @@ -498,7 +498,7 @@ end # the middle of a pass. const CompileHints = Base.ImmutableDict{Symbol,Any} -function setmeta(ex::SyntaxTree; kws...) +function setmeta!(ex::SyntaxTree; kws...) @assert length(kws) == 1 # todo relax later ? key = first(keys(kws)) value = first(values(kws)) @@ -506,9 +506,12 @@ function setmeta(ex::SyntaxTree; kws...) m = get(ex, :meta, nothing) isnothing(m) ? CompileHints(key, value) : CompileHints(m, key, value) end - setattr(ex; meta=meta) + setattr!(ex; meta=meta) + ex end +setmeta(ex::SyntaxTree; kws...) = setmeta!(copy_node(ex); kws...) + function getmeta(ex::SyntaxTree, name::Symbol, default) meta = get(ex, :meta, nothing) isnothing(meta) ? default : get(meta, name, default) diff --git a/JuliaLowering/src/compat.jl b/JuliaLowering/src/compat.jl new file mode 100644 index 0000000000000..76dad46cb2db7 --- /dev/null +++ b/JuliaLowering/src/compat.jl @@ -0,0 +1,510 @@ +const JS = JuliaSyntax + +function _insert_tree_node(graph::SyntaxGraph, k::Kind, src::SourceAttrType, + flags::UInt16=0x0000; attrs...) + id = newnode!(graph) + sethead!(graph, id, k) + flags !== 0 && setflags!(graph, id, flags) + setattr!(graph, id; source=src, attrs...) + return id +end + +""" +An Expr -> SyntaxTree transformation that should preserve semantics, but will +have low-quality provenance info (namely, each tree node will be associated with +the last seen LineNumberNode in the pre-order expr traversal). + +Last-resort option so that, for example, we can lower the output of old +Expr-producing macros. Always prefer re-parsing source text over using this. + +Supports parsed and/or macro-expanded exprs, but not lowered exprs +""" +function expr_to_syntaxtree(@nospecialize(e), lnn::Union{LineNumberNode, Nothing}=nothing) + graph = ensure_attributes!( + SyntaxGraph(), + kind=Kind, syntax_flags=UInt16, + source=SourceAttrType, var_id=Int, value=Any, + name_val=String, is_toplevel_thunk=Bool) + expr_to_syntaxtree(graph, e, lnn) +end + +function expr_to_syntaxtree(ctx, @nospecialize(e), lnn::Union{LineNumberNode, Nothing}) + graph = syntax_graph(ctx) + toplevel_src = if isnothing(lnn) + # Provenance sinkhole for all nodes until we hit a linenode + dummy_src = SourceRef( + SourceFile("No source for expression: $e"), + 1, JS.GreenNode(K"None", 0)) + _insert_tree_node(graph, K"None", dummy_src) + else + lnn + end + st_id, _ = _insert_convert_expr(e, graph, toplevel_src) + out = SyntaxTree(graph, st_id) + return out +end + +function _expr_replace!(@nospecialize(e), replace_pred::Function, replacer!::Function, + recurse_pred=(@nospecialize e)->true) + if replace_pred(e) + replacer!(e) + end + if e isa Expr && recurse_pred(e) + for a in e.args + _expr_replace!(a, replace_pred, replacer!, recurse_pred) + end + end +end + +function _to_iterspec(exs::Vector, is_generator::Bool) + if length(exs) === 1 && exs[1].head === :filter + @assert length(exs[1].args) >= 2 + return Expr(:filter, _to_iterspec(exs[1].args[2:end], true), exs[1].args[1]) + end + outex = Expr(:iteration) + for e in exs + if e.head === :block && !is_generator + for iter in e.args + push!(outex.args, Expr(:in, iter.args...)) + end + elseif e.head === :(=) + push!(outex.args, Expr(:in, e.args...)) + else + @assert false "unknown iterspec in $e" + end + end + return outex +end + +""" +Return `e.args`, but with any parameters in SyntaxTree (flattened, source) order. +Parameters are expected to be as `e.args[pos]`. + +e.g. orderings of (a,b,c;d;e;f): + Expr: (tuple (parameters (parameters (parameters f) e) d) a b c) + SyntaxTree: (tuple a b c (parameters d) (parameters e) (parameters f)) +""" +function collect_expr_parameters(e::Expr, pos::Int) + params = expr_parameters(e, pos) + isnothing(params) && return copy(e.args) + args = Any[e.args[1:pos-1]..., e.args[pos+1:end]...] + return _flatten_params!(args, params) +end +function _flatten_params!(out::Vector{Any}, p::Expr) + p1 = expr_parameters(p, 1) + if !isnothing(p1) + push!(out, Expr(:parameters, p.args[2:end]...)) + _flatten_params!(out, p1) + else + push!(out, p::Any) + end + return out +end +function expr_parameters(p::Expr, pos::Int) + if length(p.args) >= pos && + p.args[pos] isa Expr && + p.args[pos].head === :parameters + return p.args[pos] + end + return nothing +end + +""" +If `b` (usually a block) has exactly one non-LineNumberNode argument, unwrap it. +""" +function maybe_unwrap_arg(b::Expr) + e1 = findfirst(c -> !isa(c, LineNumberNode), b.args) + isnothing(e1) && return b + e2 = findfirst(c -> !isa(c, LineNumberNode), b.args[e1+1:end]) + !isnothing(e2) && return b + return b.args[e1] +end + +function maybe_extract_lnn(b, default) + !(b isa Expr) && return b + lnn_i = findfirst(a->isa(a, LineNumberNode), b.args) + return isnothing(lnn_i) ? default : b.args[lnn_i] +end + +# Get kind by string if exists. TODO relies on internals +function find_kind(s::String) + out = get(JS._kind_str_to_int, s, nothing) + return isnothing(out) ? nothing : JS.Kind(out) +end + +function is_dotted_operator(s::AbstractString) + return length(s) >= 2 && + s[1] === '.' && + JS.is_operator(something(find_kind(s[2:end]), K"None")) +end + +function is_eventually_call(e) + return e isa Expr && (e.head === :call || + e.head in (:where, :(::)) && is_eventually_call(e.args[1])) +end + +""" +Insert `e` converted to a syntaxtree into graph and recurse on children. Return +a pair (my_node_id, last_srcloc). Should not mutate `e`. + +`src` is the latest location found in the pre-order traversal, and is the line +number node to be associated with `e`. +""" +function _insert_convert_expr(@nospecialize(e), graph::SyntaxGraph, src::SourceAttrType) + #--------------------------------------------------------------------------- + # Non-expr types + if isnothing(e) + st_id = _insert_tree_node(graph, K"core", src; name_val="nothing") + return st_id, src + elseif e isa Symbol + st_id = _insert_tree_node(graph, K"Identifier", src; name_val=String(e)) + return st_id, src + elseif e isa QuoteNode && e.value isa Symbol + # Undo special handling from st->expr + return _insert_convert_expr(Expr(:quote, e.value), graph, src) + # elseif e isa QuoteNode + # st_id = _insert_tree_node(graph, K"inert", src) + # quote_child, _ = _insert_convert_expr(e.value, graph, src) + # setchildren!(graph, st_id, NodeId[quote_child]) + # return st_id, src + elseif e isa String + st_id = _insert_tree_node(graph, K"string", src) + id_inner = _insert_tree_node(graph, K"String", src; value=e) + setchildren!(graph, st_id, [id_inner]) + return st_id, src + elseif !(e isa Expr) + # There are other kinds we could potentially back-convert (e.g. Float), + # but Value should work fine. + st_k = e isa Integer ? K"Integer" : find_kind(string(typeof(e))) + st_id = _insert_tree_node(graph, isnothing(st_k) ? K"Value" : st_k, src; value=e) + return st_id, src + end + + #--------------------------------------------------------------------------- + # `e` is an expr. In many cases, it suffices to + # - guess that the kind name is the same as the expr head + # - add no syntax flags or attrs + # - map e.args to syntax tree children one-to-one + e::Expr + nargs = length(e.args) + maybe_kind = find_kind(string(e.head)) + st_k = isnothing(maybe_kind) ? K"None" : maybe_kind + st_flags = 0x0000 + st_attrs = Dict{Symbol, Any}() + # Note that SyntaxTree/Node differentiate 0-child non-terminals and leaves + child_exprs::Union{Nothing, Vector{Any}} = copy(e.args) + + # However, the following are (many) special cases where the kind, flags, + # children, or attributes are different from what we guessed above + if Base.isoperator(e.head) && st_k === K"None" + # e.head is an updating assignment operator (+=, .-=, etc). Non-= + # dotted ops are wrapped in a call, so we don't reach this. + s = string(e.head) + @assert s[end] === '=' && nargs === 2 + if s[1] === '.' + st_k = K".op=" + op = s[2:end-1] + else + st_k = K"op=" + op = s[1:end-1] + end + child_exprs = Any[e.args[1], Symbol(op), e.args[2]] + elseif e.head === :comparison + for i = 2:2:length(child_exprs) + op = child_exprs[i] + @assert op isa Symbol + op_s = string(op) + if is_dotted_operator(op_s) + child_exprs[i] = Expr(:., Symbol(op_s[2:end])) + end + end + elseif e.head === :macrocall + @assert nargs >= 2 + a1 = e.args[1] + child_exprs = collect_expr_parameters(e, 3) + if child_exprs[2] isa LineNumberNode + src = child_exprs[2] + end + deleteat!(child_exprs, 2) + if a1 isa Symbol + child_exprs[1] = Expr(:MacroName, a1) + elseif a1 isa Expr && a1.head === :(.) && a1.args[2] isa QuoteNode + child_exprs[1] = Expr(:(.), a1.args[1], Expr(:MacroName, a1.args[2].value)) + elseif a1 isa GlobalRef + # TODO (maybe): syntax-introduced macrocalls are listed here for + # reference. We probably don't need to convert these. + if a1.name === Symbol("@cmd") + elseif a1.name === Symbol("@doc") + elseif a1.name === Symbol("@int128_str") + elseif a1.name === Symbol("@int128_str") + elseif a1.name === Symbol("@big_str") + end + elseif a1 isa Function + # pass + else + error("Unknown macrocall form $(sprint(dump, e))") + @assert false + end + elseif e.head === Symbol("'") + @assert nargs === 1 + st_k = K"call" + child_exprs = Any[e.head, e.args[1]] + elseif e.head === :. && nargs === 2 + a2 = e.args[2] + if a2 isa Expr && a2.head === :tuple + st_k = K"dotcall" + tuple_exprs = collect_expr_parameters(a2, 1) + child_exprs = pushfirst!(tuple_exprs, e.args[1]) + elseif a2 isa QuoteNode && a2.value isa Symbol + child_exprs[2] = a2.value + elseif a2 isa Expr && a2.head === :MacroName + else + @error "Unknown 2-arg dot form" e + end + elseif e.head === :for + @assert nargs === 2 + child_exprs = Any[_to_iterspec(Any[e.args[1]], false), e.args[2]] + elseif e.head === :where + @assert nargs >= 2 + if !(e.args[2] isa Expr && e.args[2].head === :braces) + child_exprs = Any[e.args[1], Expr(:braces, e.args[2:end]...)] + end + elseif e.head in (:tuple, :vect, :braces) + child_exprs = collect_expr_parameters(e, 1) + elseif e.head in (:curly, :ref) + child_exprs = collect_expr_parameters(e, 2) + elseif e.head === :try + child_exprs = Any[e.args[1]] + # Expr: + # (try (block ...) var (block ...) [block ...] [block ...]) + # # try catch_var catch finally else + # SyntaxTree: + # (try (block ...) + # [catch var (block ...)] + # [else (block ...)] + # [finally (block ...)]) + if e.args[2] != false || e.args[3] != false + push!(child_exprs, + Expr(:catch, + e.args[2] === false ? Expr(:catch_var_placeholder) : e.args[2], + e.args[3] === false ? nothing : e.args[3])) + end + if nargs >= 5 + push!(child_exprs, Expr(:else, e.args[5])) + end + if nargs >= 4 + push!(child_exprs, + Expr(:finally, e.args[4] === false ? nothing : e.args[4])) + end + elseif e.head === :flatten || e.head === :generator + st_k = K"generator" + child_exprs = Any[] + next = e + while next.head === :flatten + @assert next.args[1].head === :generator + push!(child_exprs, _to_iterspec(next.args[1].args[2:end], true)) + next = next.args[1].args[1] + end + @assert next.head === :generator + push!(child_exprs, _to_iterspec(next.args[2:end], true)) + pushfirst!(child_exprs, next.args[1]) + elseif e.head === :ncat || e.head === :nrow + dim = popfirst!(child_exprs) + st_flags |= JS.set_numeric_flags(dim) + elseif e.head === :typed_ncat + st_flags |= JS.set_numeric_flags(e.args[2]) + deleteat!(child_exprs, 2) + elseif e.head === :(->) + @assert nargs === 2 + if e.args[1] isa Expr && e.args[1].head === :block + # Expr parsing fails to make :parameters here... + lam_args = Any[] + lam_eqs = Any[] + for a in e.args[1].args + a isa Expr && a.head === :(=) ? push!(lam_eqs, a) : push!(lam_args, a) + end + !isempty(lam_eqs) && push!(lam_args, Expr(:parameters, lam_eqs...)) + child_exprs[1] = Expr(:tuple, lam_args...) + elseif !(e.args[1] isa Expr && (e.args[1].head in (:tuple, :where))) + child_exprs[1] = Expr(:tuple, e.args[1]) + end + src = maybe_extract_lnn(e.args[2], src) + child_exprs[2] = maybe_unwrap_arg(e.args[2]) + elseif e.head === :call + child_exprs = collect_expr_parameters(e, 2) + a1 = child_exprs[1] + if a1 isa Symbol + a1s = string(a1) + if is_dotted_operator(a1s) + # non-assigning dotop like .+ or .== + st_k = K"dotcall" + child_exprs[1] = Symbol(a1s[2:end]) + end + end + elseif e.head === :function + if nargs >= 2 + src = maybe_extract_lnn(e.args[2], src) + end + elseif e.head === :(=) + if is_eventually_call(e.args[1]) + st_k = K"function" + st_flags |= JS.SHORT_FORM_FUNCTION_FLAG + src = maybe_extract_lnn(e.args[2], src) + child_exprs[2] = maybe_unwrap_arg(e.args[2]) + end + elseif e.head === :module + @assert nargs === 3 + if !e.args[1] + st_flags |= JS.BARE_MODULE_FLAG + end + child_exprs = Any[e.args[2], e.args[3]] + elseif e.head === :do + # Expr: + # (do (call f args...) (-> (tuple lam_args...) (block ...))) + # SyntaxTree: + # (call f args... (do (tuple lam_args...) (block ...))) + callargs = collect_expr_parameters(e.args[1], 2) + fname = string(callargs[1]) + if e.args[1].head === :macrocall + st_k = K"macrocall" + callargs[1] = Expr(:MacroName, callargs[1]) + else + st_k = K"call" + end + child_exprs = Any[callargs..., Expr(:do_lambda, e.args[2].args...)] + elseif e.head === :let + if nargs >= 1 && !(e.args[1] isa Expr && e.args[1].head === :block) + child_exprs[1] = Expr(:block, e.args[1]) + end + elseif e.head === :struct + e.args[1] && (st_flags |= JS.MUTABLE_FLAG) + child_exprs = child_exprs[2:end] + # TODO handle docstrings after refactor + elseif (e.head === :using || e.head === :import) + _expr_replace!(e, + (e)->(e isa Expr && e.head === :.), + (e)->(e.head = :importpath)) + elseif e.head === :kw + st_k = K"=" + elseif e.head in (:local, :global) && nargs > 1 + # Possible normalization + # child_exprs = Any[Expr(:tuple, child_exprs...)] + elseif e.head === :error + # Zero-child errors from parsing are leaf nodes. We could change this + # upstream for consistency. + if nargs === 0 + child_exprs = nothing + st_attrs[:value] = JS.ErrorVal() + st_flags |= JS.TRIVIA_FLAG + end + end + + #--------------------------------------------------------------------------- + # The following heads are not emitted from parsing, but old macros could + # produce these and they would historically be accepted by flisp lowering. + if e.head === Symbol("latestworld-if-toplevel") + st_k = K"latestworld_if_toplevel" + elseif e.head === Symbol("hygienic-scope") + st_k = K"hygienic_scope" + elseif e.head === :meta + # Messy and undocumented. Only sometimes we want a K"meta". + @assert e.args[1] isa Symbol + if e.args[1] === :nospecialize + if nargs > 2 + st_k = K"block" + # Kick the can down the road (should only be simple atoms?) + child_exprs = map(c->Expr(:meta, :nospecialize, c), child_exprs[2:end]) + else + st_id, src = _insert_convert_expr(e.args[2], graph, src) + setmeta!(SyntaxTree(graph, st_id); nospecialize=true) + return st_id, src + end + else + @assert nargs === 1 + child_exprs[1] = Expr(:quoted_symbol, e.args[1]) + end + elseif e.head === :symbolicgoto || e.head === :symboliclabel + @assert nargs === 1 + st_k = e.head === :symbolicgoto ? K"symbolic_label" : K"symbolic_goto" + st_attrs[:name_val] = string(e.args[1]) + child_exprs = nothing + elseif e.head === :inline || e.head === :noinline + @assert nargs === 1 && e.args[1] isa Bool + # TODO: JuliaLowering doesn't accept this (non-:meta) form yet + st_k = K"TOMBSTONE" + child_exprs = nothing + elseif e.head === :core + @assert nargs === 1 + @assert e.args[1] isa Symbol + st_attrs[:name_val] = string(e.args[1]) + child_exprs = nothing + elseif e.head === :islocal || e.head === :isglobal + st_k = K"extension" + child_exprs = [Expr(:quoted_symbol, e.head), e.args[1]] + elseif e.head === :block && nargs >= 1 && + e.args[1] isa Expr && e.args[1].head === :softscope + # (block (softscope true) ex) produced with every REPL prompt. + # :hardscope exists too, but should just be a let, and appears to be + # unused in the wild. + ensure_attributes!(graph; scope_type=Symbol) + st_k = K"scope_block" + st_attrs[:scope_type] = :soft + child_exprs = e.args[2:end] + end + + #--------------------------------------------------------------------------- + # Temporary heads introduced by us converting the parent expr + if e.head === :MacroName + @assert nargs === 1 + mac_name = string(e.args[1]) + mac_name = mac_name == "@__dot__" ? "@." : mac_name + st_id = _insert_tree_node(graph, K"MacroName", src, st_flags; name_val=mac_name) + return st_id, src + elseif e.head === :catch_var_placeholder + st_k = K"Placeholder" + st_attrs[:name_val] = "" + child_exprs = nothing + elseif e.head === :quoted_symbol + st_k = K"Symbol" + st_attrs[:name_val] = String(e.args[1]) + child_exprs = nothing + elseif e.head === :do_lambda + st_k = K"do" + end + + #--------------------------------------------------------------------------- + # Throw if this script isn't complete. Finally, insert a new node into the + # graph and recurse on child_exprs + if st_k === K"None" + error("Unknown expr head `$(e.head)`\n$(sprint(dump, e))") + end + + st_id = _insert_tree_node(graph, st_k, src, st_flags; st_attrs...) + + # child_exprs === nothing means we want a leaf. Note that setchildren! with + # an empty list makes a node non-leaf. + if isnothing(child_exprs) + return st_id, src + else + setflags!(graph, st_id, st_flags) + st_child_ids, last_src = _insert_child_exprs(child_exprs, graph, src) + setchildren!(graph, st_id, st_child_ids) + return st_id, last_src + end +end + +function _insert_child_exprs(child_exprs::Vector{Any}, graph::SyntaxGraph, + src::SourceAttrType) + st_child_ids = NodeId[] + last_src = src + for c in child_exprs + if c isa LineNumberNode + last_src = c + else + (c_id, c_src) = _insert_convert_expr(c, graph, last_src) + push!(st_child_ids, c_id) + last_src = something(c_src, src) + end + end + return st_child_ids, last_src +end diff --git a/JuliaLowering/src/kinds.jl b/JuliaLowering/src/kinds.jl index 45138969b2de9..efd73c2420312 100644 --- a/JuliaLowering/src/kinds.jl +++ b/JuliaLowering/src/kinds.jl @@ -48,6 +48,9 @@ function _register_kinds() # Internal initializer for struct types, for inner constructors/functions "new" "splatnew" + # For expr-macro compatibility; gone after expansion + "escape" + "hygienic_scope" # Catch-all for additional syntax extensions without the need to # extend `Kind`. Known extensions include: # locals, islocal diff --git a/JuliaLowering/src/syntax_graph.jl b/JuliaLowering/src/syntax_graph.jl index ff9edafc9fbb6..6eb0737c4c85e 100644 --- a/JuliaLowering/src/syntax_graph.jl +++ b/JuliaLowering/src/syntax_graph.jl @@ -146,9 +146,7 @@ function sethead!(graph, id::NodeId, k::Kind) end function setflags!(graph, id::NodeId, f::UInt16) - if f != 0 - graph.syntax_flags[id] = f - end + graph.syntax_flags[id] = f end function _convert_nodes(graph::SyntaxGraph, node::SyntaxNode) @@ -246,7 +244,7 @@ function attrnames(ex::SyntaxTree) [name for (name, value) in pairs(attrs) if haskey(value, ex._id)] end -function setattr(ex::SyntaxTree; extra_attrs...) +function copy_node(ex::SyntaxTree) graph = syntax_graph(ex) id = newnode!(graph) if !is_leaf(ex) @@ -254,6 +252,11 @@ function setattr(ex::SyntaxTree; extra_attrs...) end ex2 = SyntaxTree(graph, id) copy_attrs!(ex2, ex, true) + ex2 +end + +function setattr(ex::SyntaxTree; extra_attrs...) + ex2 = copy_node(ex) setattr!(ex2; extra_attrs...) ex2 end diff --git a/JuliaLowering/test/compat.jl b/JuliaLowering/test/compat.jl new file mode 100644 index 0000000000000..f73628db89181 --- /dev/null +++ b/JuliaLowering/test/compat.jl @@ -0,0 +1,390 @@ +using Test +using JuliaSyntax +using JuliaLowering +const JS = JuliaSyntax +const JL = JuliaLowering + +@testset "expr->syntaxtree" begin + @testset "semantics only" begin + # Test that `s` evaluates to the same thing both under normal parsing + # and with the expr->tree->expr transformation + + programs = [ + "let x = 2; x += 5; x -= 1; [1] .*= 1; end", + "let var\"x\" = 123; x; end", + "try; 1; catch e; e; else; 2; finally; 3; end", + "for x in 1:2, y in 3:4; x + y; end", + "[x+y for x in 1:2, y in 3:4]", + "Int[x+y for x in 1:2, y in 3:4 if true]", + "for x in 1; x+=1\n if true\n continue \n elseif false \n break\n end\n end", + "Base.Meta.@lower 1", + "function foo(x, y=1; z, what::Int=5); x + y + z + what; end; foo(1,2;z=3)", + "(()->1)()", + "((x)->2)(3)", + "((x,y)->4)(5,6)", + "filter([1,2,3]) do x; x > 1; end", + """ + struct X + f1::Int # hi + "foo" + f2::Int + f3::Int + X(y) = new(y,y,y) + end + """, + "global x,y", + "global (x,y)", + "999999999999999999999999999999999999999", + "0x00000000000000001", + "(0x00000000000000001)", + "let x = 1; 2x; end", + "let x = 1; (2)(3)x; end", + "if false\n1\nelseif true\n 3\nend", + "\"str\"", + "\"\$(\"str\")\"", + "'a'", + "'α'", + "'\\xce\\xb1'", + "let x = 1; \"\"\"\n a\n \$x\n b\n c\"\"\"; end", + "try throw(0) catch e; 1 end", + "try 0 finally 1 end", + "try throw(0) catch e; 1 finally 2 end", + "try throw(0) catch e; 1 else 2 end", + "try throw(0) catch e; 1 else 2 finally 3 end", + "try throw(0) finally 1 catch e; 2 end", + ":.+", + ":.=", + ":(.=)", + ":+=", + ":(+=)", + ":.+=", + ":(.+=)", + ] + + test_mod_1 = Module() + test_mod_2 = Module() + + for p in programs + @testset "`$p`" begin + local good_expr, good_out, test_st, test_expr, test_out + try + good_expr = JS.parseall(Expr, p; ignore_errors=true) + good_out = Core.eval(test_mod_1, good_expr) + catch e + @error "Couldn't eval the reference expression---fix your test" + rethrow(e) + end + + test_st = JuliaLowering.expr_to_syntaxtree(good_expr) + test_expr = Expr(test_st) + test_out = Core.eval(test_mod_2, test_expr) + + @test good_out == test_out + end + end + end + + # Remove any information that can't be recovered from an Expr + function normalize_st!(st) + k = JS.kind(st) + args = JS.children(st) + + if JS.is_infix_op_call(st) && (k === K"call" || k === K"dotcall") + # Infix calls are not preserved in Expr; we need to re-order the children + pre_st_args = JL.NodeId[st[2]._id, st[1]._id] + for c in st[3:end] + push!(pre_st_args, c._id) + end + pre_st_flags = (JS.flags(st) & ~JS.INFIX_FLAG) | JS.PREFIX_CALL_FLAG + JL.setchildren!(st._graph, st._id, pre_st_args) + JL.setflags!(st._graph, st._id, pre_st_flags) + elseif JS.is_postfix_op_call(st) && (k === K"call" || k === K"dotcall") + pre_st_args = JL.NodeId[st[end]._id] + for c in st[1:end-1] + push!(pre_st_args, c._id) + end + pre_st_flags = (JS.flags(st) & ~JS.POSTFIX_OP_FLAG) | JS.PREFIX_CALL_FLAG + JL.setchildren!(st._graph, st._id, pre_st_args) + JL.setflags!(st._graph, st._id, pre_st_flags) + elseif k in JS.KSet"tuple block macrocall" + JL.setflags!(st._graph, st._id, JS.flags(st) & ~JS.PARENS_FLAG) + elseif k === K"toplevel" + JL.setflags!(st._graph, st._id, JS.flags(st) & ~JS.TOPLEVEL_SEMICOLONS_FLAG) + end + + if k in JS.KSet"tuple call dotcall macrocall vect curly braces <: >:" + JL.setflags!(st._graph, st._id, JS.flags(st) & ~JS.TRAILING_COMMA_FLAG) + end + + k === K"quote" && JL.setflags!(st._graph, st._id, JS.flags(st) & ~JS.COLON_QUOTE) + k === K"wrapper" && JL.sethead!(st._graph, st._id, K"block") + + # All ops are prefix ops in an expr. + # Ignore trivia (shows up on some K"error"s) + JL.setflags!(st._graph, st._id, JS.flags(st) & + ~JS.PREFIX_OP_FLAG & ~JS.INFIX_FLAG & ~JS.TRIVIA_FLAG & ~JS.NON_TERMINAL_FLAG) + + for c in JS.children(st) + normalize_st!(c) + end + return st + end + + function st_roughly_equal(; st_good, st_test) + normalize_st!(st_good) + + if kind(st_good) === kind(st_test) === K"error" + # We could consider some sort of equivalence later, but we would + # need to specify within JS what the error node contains. + return true + end + + out = kind(st_good) === kind(st_test) && + JS.flags(st_good) === JS.flags(st_test) && + JS.numchildren(st_good) === JS.numchildren(st_test) && + JS.is_leaf(st_good) === JS.is_leaf(st_test) && + get(st_good, :value, nothing) === get(st_test, :value, nothing) && + get(st_good, :name_val, nothing) === get(st_test, :name_val, nothing) && + all(map((cg, ct)->st_roughly_equal(;st_good=cg, st_test=ct), + JS.children(st_good), JS.children(st_test))) + + !out && @warn("!st_roughly_equal (normalized_reference, st_test):", + JS.sourcetext(st_good), st_good, st_test) + return out + end + + @testset "SyntaxTree equivalence (tests taken from JuliaSyntax expr.jl)" begin + # test that string->tree->expr->tree ~= string->tree + # ^^ + programs = [ + "begin a\nb\n\nc\nend", + "(a;b;c)", + "begin end", + "(;;)", + "a;b", + "module A\n\nbody\nend", + "function f()\na\n\nb\nend", + "f() = 1", + "macro f()\na\nend", + "function f end", + "macro f end", + "function (f() where {T}) end", + "function (f()::S) end", + "a -> b", + "(a,) -> b", + "(a where {T}) -> b", + "a -> (\nb;c)", + "a -> begin\nb\nc\nend", + "(a;b=1) -> c", + "(a...;b...) -> c", + "(;) -> c", + "a::T -> b", + "let i=is, j=js\nbody\nend", + "for x=xs\n\nend", + "for x=xs\ny\nend", + "while cond\n\nend", + "while cond\ny\nend", + "f() = xs", + "f() =\n(a;b)", + "f() =\nbegin\na\nb\nend", + "let f(x) =\ng(x)=1\nend", + "f() .= xs", + "for i=is body end", + "for i=is, j=js\nbody\nend", + "f(x) do y\n body end", + "@f(x) do y body end", + "f(x; a=1) do y body end", + "g(f(x) do y\n body end)", + "f(a=1)", + "f(; b=2)", + "f(a=1; b=2)", + "f(a; b; c)", + "+(a=1,)", + "(a=1)()", + "(x=1) != 2", + "+(a=1)", + "(a=1)'", + "f.(a=1; b=2)", + "(a=1,)", + "(a=1,; b=2)", + "(a=1,; b=2; c=3)", + "x[i=j]", + "(i=j)[x]", + "x[a, b; i=j]", + "(i=j){x}", + "x{a, b; i=j}", + "[a=1,; b=2]", + "{a=1,; b=2}", + "f(a .= 1)", + "f(((a = 1)))", + "(((a = 1)),)", + "(;((a = 1)),)", + "a.b", + "a.@b x", + "f.(x,y)", + "f.(x=1)", + "f.(a=1; b=2)", + "(a=1).()", + "x .+ y", + "(x=1) .+ y", + "a .< b .< c", + "a .< (.<) .< c", + "quote .+ end", + ".+(x)", + ".+x", + "f(.+)", + "(a, .+)", + "x += y", + "x .+= y", + "x \u2212= y", + "let x=1\n end", + "let x=1 ; end", + "let x ; end", + "let x::1 ; end", + "let x=1,y=2 end", + "let x+=1 ; end", + "let ; end", + "let ; body end", + "let\na\nb\nend", + "A where {T}", + "A where {S, T}", + "A where {X, Y; Z}", + "@m\n", + "\n@m", + "@m(x; a)", + "@m(a=1; b=2)", + "@S[a,b]", + "@S[a b]", + "@S[a; b]", + "@S[a ;; b]", + "[x,y ; z]", + "[a ;;; b ;;;; c]", + "[a b ; c d]", + "[a\nb]", + "[a b]", + "[a b ; c d]", + "T[a ;;; b ;;;; c]", + "T[a b ; c d]", + "T[a\nb]", + "T[a b]", + "T[a b ; c d]", + "(x for a in as for b in bs)", + "(x for a in as, b in bs)", + "(x for a in as, b in bs if z)", + "(x for a in as, b in bs for c in cs, d in ds)", + "(x for a in as for b in bs if z)", + "(x for a in as if z for b in bs)", + "[x for a = as for b = bs if cond1 for c = cs if cond2]" , + "[x for a = as if begin cond2 end]" , + "(x for a in as if z)", + "return x", + "struct A end", + "mutable struct A end", + "struct A <: B \n a::X \n end", + "struct A \n a \n b \n end", + "struct A const a end", + "export a", + "export +, ==", + "export \n a", + "global x", + "local x", + "global x,y", + "const x,y = 1,2", + "const x = 1", + "global x ~ 1", + "global x += 1", + "(;)", + "(; a=1)", + "(; a=1; b=2)", + "(a; b; c,d)", + "module A end", + "baremodule A end", + "import A", + ] + + for p in programs + @testset "`$p`" begin + st_good = JS.parsestmt(JL.SyntaxTree, p; ignore_errors=true) + st_test = JL.expr_to_syntaxtree(Expr(st_good)) + @test st_roughly_equal(;st_good, st_test) + end + end + end + + @testset "provenance via scavenging for LineNumberNodes" begin + # Provenenance of a node should be the last seen LineNumberNode in the + # depth-first traversal of the Expr, or the initial line given if none + # have been seen yet. If none have been seen and no initial line was + # given, .source should still be defined on all nodes (of unspecified + # value, but hopefully a helpful value for the user.) + ex = Expr(:block, + LineNumberNode(123), + Expr(:block, + Expr(:block, LineNumberNode(456)), + Expr(:block)), + Expr(:block, + Expr(:block), + Expr(:block))) + + # No initial line provided + st = JuliaLowering.expr_to_syntaxtree(ex) + for i in length(st._graph.edge_ranges) + @test !isnothing(get(SyntaxTree(st._graph, i), :source, nothing)) + end + @test let lnn = st[1].source; lnn isa LineNumberNode && lnn.line === 123; end + @test let lnn = st[1][1].source; lnn isa LineNumberNode && lnn.line === 123; end + @test let lnn = st[1][2].source; lnn isa LineNumberNode && lnn.line === 456; end + @test let lnn = st[2].source; lnn isa LineNumberNode && lnn.line === 456; end + @test let lnn = st[2][1].source; lnn isa LineNumberNode && lnn.line === 456; end + @test let lnn = st[2][2].source; lnn isa LineNumberNode && lnn.line === 456; end + + # Same tree, but provide an initial line + st = JuliaLowering.expr_to_syntaxtree(ex, LineNumberNode(789)) + @test let lnn = st.source; lnn isa LineNumberNode && lnn.line === 789; end + @test let lnn = st[1].source; lnn isa LineNumberNode && lnn.line === 123; end + @test let lnn = st[1][1].source; lnn isa LineNumberNode && lnn.line === 123; end + @test let lnn = st[1][2].source; lnn isa LineNumberNode && lnn.line === 456; end + @test let lnn = st[2].source; lnn isa LineNumberNode && lnn.line === 456; end + @test let lnn = st[2][1].source; lnn isa LineNumberNode && lnn.line === 456; end + @test let lnn = st[2][2].source; lnn isa LineNumberNode && lnn.line === 456; end + + ex = parsestmt(Expr, """ + begin + try + maybe + lots + of + lines + catch exc + y + end + end""") + st = JuliaLowering.expr_to_syntaxtree(ex, LineNumberNode(1)) + + # sanity: ensure we're testing the tree we expect + @test kind(st) === K"block" + @test kind(st[1]) === K"try" + @test kind(st[1][1]) === K"block" + @test kind(st[1][1][1]) === K"Identifier" && st[1][1][1].name_val === "maybe" + @test kind(st[1][1][2]) === K"Identifier" && st[1][1][2].name_val === "lots" + @test kind(st[1][1][3]) === K"Identifier" && st[1][1][3].name_val === "of" + @test kind(st[1][1][4]) === K"Identifier" && st[1][1][4].name_val === "lines" + @test kind(st[1][2]) === K"catch" + @test kind(st[1][2][1]) === K"Identifier" && st[1][2][1].name_val === "exc" + @test kind(st[1][2][2]) === K"block" + @test kind(st[1][2][2][1]) === K"Identifier" && st[1][2][2][1].name_val === "y" + + @test let lnn = st.source; lnn isa LineNumberNode && lnn.line === 1; end + @test let lnn = st[1].source; lnn isa LineNumberNode && lnn.line === 2; end + @test let lnn = st[1][1].source; lnn isa LineNumberNode && lnn.line === 2; end + @test let lnn = st[1][1][1].source; lnn isa LineNumberNode && lnn.line === 3; end + @test let lnn = st[1][1][2].source; lnn isa LineNumberNode && lnn.line === 4; end + @test let lnn = st[1][1][3].source; lnn isa LineNumberNode && lnn.line === 5; end + @test let lnn = st[1][1][4].source; lnn isa LineNumberNode && lnn.line === 6; end + @test let lnn = st[1][2].source; lnn isa LineNumberNode && lnn.line === 6; end + @test let lnn = st[1][2][1].source; lnn isa LineNumberNode && lnn.line === 6; end + @test let lnn = st[1][2][2].source; lnn isa LineNumberNode && lnn.line === 6; end + @test let lnn = st[1][2][2][1].source; lnn isa LineNumberNode && lnn.line === 8; end + + end +end diff --git a/JuliaLowering/test/desugaring.jl b/JuliaLowering/test/desugaring.jl index 834d0fb50865c..66a1766b342cb 100644 --- a/JuliaLowering/test/desugaring.jl +++ b/JuliaLowering/test/desugaring.jl @@ -11,7 +11,7 @@ test_mod = Module(:TestMod) # end # (x, y) # end -# """) ~ @ast_ [K"block" +# """) ≈ @ast_ [K"block" # [K"block" # [K"=" # "y"::K"Identifier" diff --git a/JuliaLowering/test/quoting.jl b/JuliaLowering/test/quoting.jl index 32b917304344f..887e7d1bb26e0 100644 --- a/JuliaLowering/test/quoting.jl +++ b/JuliaLowering/test/quoting.jl @@ -11,7 +11,7 @@ begin end end """) -@test ex ~ @ast_ [K"block" +@test ex ≈ @ast_ [K"block" [K"call" "f"::K"Identifier" 11::K"Value" @@ -107,7 +107,7 @@ let end end """) -@test ex ~ @ast_ [K"block" +@test ex ≈ @ast_ [K"block" [K"=" "x"::K"Identifier" 1::K"Integer" diff --git a/JuliaLowering/test/runtests.jl b/JuliaLowering/test/runtests.jl index d628fde0911bb..dc24047f0f72e 100644 --- a/JuliaLowering/test/runtests.jl +++ b/JuliaLowering/test/runtests.jl @@ -25,4 +25,5 @@ include("utils.jl") include("quoting.jl") include("scopes.jl") include("typedefs.jl") + include("compat.jl") end diff --git a/JuliaLowering/test/utils.jl b/JuliaLowering/test/utils.jl index 0712cfe2c3af8..bf59f62e13c7b 100644 --- a/JuliaLowering/test/utils.jl +++ b/JuliaLowering/test/utils.jl @@ -44,7 +44,7 @@ macro ast_(tree) end end -function ~(ex1, ex2) +function ≈(ex1, ex2) if kind(ex1) != kind(ex2) || is_leaf(ex1) != is_leaf(ex2) return false end @@ -52,7 +52,7 @@ function ~(ex1, ex2) if numchildren(ex1) != numchildren(ex2) return false end - return all(c1 ~ c2 for (c1,c2) in zip(children(ex1), children(ex2))) + return all(c1 ≈ c2 for (c1,c2) in zip(children(ex1), children(ex2))) else return get(ex1, :value, nothing) == get(ex2, :value, nothing) && get(ex1, :name_val, nothing) == get(ex2, :name_val, nothing) @@ -92,7 +92,7 @@ end format_as_ast_macro(ex) Format AST `ex` as a Juila source code call to the `@ast_` macro for generating -test case comparisons with the `~` function. +test case comparisons with the `≈` function. """ format_as_ast_macro(ex) = format_as_ast_macro(stdout, ex) From e285523cc9b46c48d14cf09a2199530f1e62ca4a Mon Sep 17 00:00:00 2001 From: Em Chu <61633163+mlechu@users.noreply.github.com> Date: Fri, 15 Aug 2025 20:44:09 -0700 Subject: [PATCH 1040/1109] Add hook for testing JuliaLowering in core (JuliaLang/JuliaLowering.jl#32) --------- Co-authored-by: Claire Foster --- JuliaLowering/src/JuliaLowering.jl | 1 + JuliaLowering/src/hooks.jl | 51 ++++++++++++++++++++++++++++++ JuliaLowering/test/hooks.jl | 38 ++++++++++++++++++++++ JuliaLowering/test/runtests.jl | 1 + 4 files changed, 91 insertions(+) create mode 100644 JuliaLowering/src/hooks.jl create mode 100644 JuliaLowering/test/hooks.jl diff --git a/JuliaLowering/src/JuliaLowering.jl b/JuliaLowering/src/JuliaLowering.jl index b24c5687b3a2c..4d37b90f09028 100644 --- a/JuliaLowering/src/JuliaLowering.jl +++ b/JuliaLowering/src/JuliaLowering.jl @@ -33,6 +33,7 @@ _include("syntax_macros.jl") _include("eval.jl") _include("compat.jl") +_include("hooks.jl") function __init__() _register_kinds() diff --git a/JuliaLowering/src/hooks.jl b/JuliaLowering/src/hooks.jl new file mode 100644 index 0000000000000..c150fce9a1283 --- /dev/null +++ b/JuliaLowering/src/hooks.jl @@ -0,0 +1,51 @@ +""" +Becomes `Core._lower()` upon activating JuliaLowering. + +Returns an svec with the lowered code (usually expr) as its first element, and +(until integration is less experimental) whatever we want after it +""" +function core_lowering_hook(@nospecialize(code), mod::Module, + file="none", line=0, world=typemax(Csize_t), warn=false) + if !(code isa SyntaxTree || code isa Expr) + # e.g. LineNumberNode, integer... + return Core.svec(code) + end + + # TODO: fix in base + file = file isa Ptr{UInt8} ? unsafe_string(file) : file + line = !(line isa Int64) ? Int64(line) : line + + st0 = code isa Expr ? expr_to_syntaxtree(code, LineNumberNode(line, file)) : code + try + ctx1, st1 = expand_forms_1( mod, st0) + ctx2, st2 = expand_forms_2( ctx1, st1) + ctx3, st3 = resolve_scopes( ctx2, st2) + ctx4, st4 = convert_closures(ctx3, st3) + ctx5, st5 = linearize_ir( ctx4, st4) + ex = to_lowered_expr(mod, st5) + return Core.svec(ex, st5, ctx5) + catch exc + @error("JuliaLowering failed — falling back to flisp!", + exception=(exc,catch_backtrace()), + code=code, file=file, line=line, mod=mod) + return Base.fl_lower(code, mod, file, line, world, warn) + end +end + +# TODO: Write a parser hook here. The input to `core_lowering_hook` should +# eventually be a (convertible to) SyntaxTree, but we need to make updates to +# the parsing API to include a parameter for AST type. + +const _has_v1_13_hooks = isdefined(Core, :_lower) + +function activate!(enable=true) + if !_has_v1_13_hooks + error("Cannot use JuliaLowering without `Core._lower` binding or in $VERSION < 1.13") + end + + if enable + Core._setlowerer!(core_lowering_hook) + else + Core._setlowerer!(Base.fl_lower) + end +end diff --git a/JuliaLowering/test/hooks.jl b/JuliaLowering/test/hooks.jl new file mode 100644 index 0000000000000..39a3d883a3ede --- /dev/null +++ b/JuliaLowering/test/hooks.jl @@ -0,0 +1,38 @@ +const JL = JuliaLowering + +@testset "hooks" begin + test_mod = Module() + + @testset "`core_lowering_hook`" begin + # Non-AST types are often sent through lowering + stuff = Any[LineNumberNode(1), 123, 123.123, true, "foo", test_mod] + for s in stuff + @test JL.core_lowering_hook(s, test_mod) == Core.svec(s) + end + + for ast_type in (Expr, JL.SyntaxTree) + ex = parsestmt(ast_type, "[1,2,3] .+= 1") + out = JL.core_lowering_hook(ex, test_mod) + @test out isa Core.SimpleVector && out[1] isa Expr + val = Core.eval(test_mod, out[1]) + @test val == [2,3,4] + end + end + + @testset "integration: `JuliaLowering.activate!`" begin + prog = parseall(Expr, "global asdf = 1") + JL.activate!() + out = Core.eval(test_mod, prog) + JL.activate!(false) + @test out === 1 + @test isdefined(test_mod, :asdf) + + prog = parseall(Expr, "module M; x = 1; end") + JL.activate!() + out = Core.eval(test_mod, prog) + JL.activate!(false) + @test out isa Module + @test isdefined(test_mod, :M) + @test isdefined(test_mod.M, :x) + end +end diff --git a/JuliaLowering/test/runtests.jl b/JuliaLowering/test/runtests.jl index dc24047f0f72e..5225f7dabc6c2 100644 --- a/JuliaLowering/test/runtests.jl +++ b/JuliaLowering/test/runtests.jl @@ -26,4 +26,5 @@ include("utils.jl") include("scopes.jl") include("typedefs.jl") include("compat.jl") + include("hooks.jl") end From 81d5cee572ce2cae341b33091fab07cca8e2fbda Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Wed, 20 Aug 2025 09:46:13 +1000 Subject: [PATCH 1041/1109] Add `K"static_eval"` for cfunction/ccall/cglobal (JuliaLang/JuliaLowering.jl#36) A few special forms have a kind of "deferred static evaluation" semantics for some of their children: * `@cfunction` - the function name and types * `ccall` / `foreigncall` / `@ccall` - the type arguments and sometimes the function name * `cglobal` - the function name * `@generated` - the expression defining the generated function stub For example, in `@ccall f()::Int`, the `Int` means "the symbol `Int` as looked up in global scope in the module, or as a static parameter of the method", and should fail if `Int` refers to a local variable. Currently all three of these cases are handled through different mechanisms with varying levels of hygiene inconsistency and ability to warn about access to local variables. To fix this problem, introduce the new `K"static_eval"` form which wraps an expression and preserves it as a piece of AST in the output (rather than producing IR), but still resolves scope and hygiene. Use this new form to remove all the special case child-index-dependent handling of these disparate forms from the IR. Also fixes bugs in `Base.@cfunction` hygiene where the function name might be resolved to a global symbol in the wrong module. Also move demo `@ccall` implementation into JuliaLowering, clean up and fix a few things which were broken and implement the gc_safe option from `Base.@ccall`. Makes use of static_eval kind for more precise diagnostics. --- JuliaLowering/src/ast.jl | 2 + JuliaLowering/src/closure_conversion.jl | 2 +- JuliaLowering/src/desugaring.jl | 38 ++++-- JuliaLowering/src/eval.jl | 10 +- JuliaLowering/src/kinds.jl | 5 + JuliaLowering/src/linear_ir.jl | 62 ++-------- JuliaLowering/src/macro_expansion.jl | 7 ++ JuliaLowering/src/scope_analysis.jl | 25 ++++ JuliaLowering/src/syntax_macros.jl | 155 +++++++++++++++++++++--- JuliaLowering/test/ccall_demo.jl | 129 -------------------- JuliaLowering/test/function_calls_ir.jl | 40 ++++-- JuliaLowering/test/macros.jl | 16 ++- JuliaLowering/test/macros_ir.jl | 4 + JuliaLowering/test/misc.jl | 53 ++++++++ JuliaLowering/test/misc_ir.jl | 122 ++++++++++++++++++- 15 files changed, 442 insertions(+), 228 deletions(-) delete mode 100644 JuliaLowering/test/ccall_demo.jl diff --git a/JuliaLowering/src/ast.jl b/JuliaLowering/src/ast.jl index 16239bf734538..bbe59015fbf7d 100644 --- a/JuliaLowering/src/ast.jl +++ b/JuliaLowering/src/ast.jl @@ -517,6 +517,8 @@ function getmeta(ex::SyntaxTree, name::Symbol, default) isnothing(meta) ? default : get(meta, name, default) end +name_hint(name) = CompileHints(:name_hint, name) + #------------------------------------------------------------------------------- # Predicates and accessors working on expression trees diff --git a/JuliaLowering/src/closure_conversion.jl b/JuliaLowering/src/closure_conversion.jl index b261cf36dd2c0..3ac34effb1bbd 100644 --- a/JuliaLowering/src/closure_conversion.jl +++ b/JuliaLowering/src/closure_conversion.jl @@ -320,7 +320,7 @@ function _convert_closures(ctx::ClosureConversionCtx, ex) else access end - elseif is_leaf(ex) || k == K"inert" + elseif is_leaf(ex) || k == K"inert" || k == K"static_eval" ex elseif k == K"=" convert_assignment(ctx, ex) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 33a9364724db8..4a4d4e296e187 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -1626,6 +1626,17 @@ function expand_kw_call(ctx, srcref, farg, args, kws) ] end +# Expand the (sym,lib) argument to ccall/cglobal +function expand_C_library_symbol(ctx, ex) + expanded = expand_forms_2(ctx, ex) + if kind(ex) == K"tuple" + expanded = @ast ctx ex [K"static_eval"(meta=name_hint("function name and library expression")) + expanded + ] + end + return expanded +end + function expand_ccall(ctx, ex) @assert kind(ex) == K"call" && is_core_ref(ex[1], "ccall") if numchildren(ex) < 4 @@ -1633,10 +1644,6 @@ function expand_ccall(ctx, ex) end cfunc_name = ex[2] # Detect calling convention if present. - # - # Note `@ccall` also emits `Expr(:cconv, convention, nreq)`, but this is a - # somewhat undocumented performance workaround. Instead we should just make - # sure @ccall can emit foreigncall directly and efficiently. known_conventions = ("cdecl", "stdcall", "fastcall", "thiscall", "llvmcall") cconv = if any(is_same_identifier_like(ex[3], id) for id in known_conventions) ex[3] @@ -1748,11 +1755,15 @@ function expand_ccall(ctx, ex) @ast ctx ex [K"block" sctx.stmts... [K"foreigncall" - expand_forms_2(ctx, cfunc_name) - expand_forms_2(ctx, return_type) - [K"call" - "svec"::K"core" - expanded_types... + expand_C_library_symbol(ctx, cfunc_name) + [K"static_eval"(meta=name_hint("ccall return type")) + expand_forms_2(ctx, return_type) + ] + [K"static_eval"(meta=name_hint("ccall argument type")) + [K"call" + "svec"::K"core" + expanded_types... + ] ] num_required_args::K"Integer" if isnothing(cconv) @@ -1828,6 +1839,15 @@ function expand_call(ctx, ex) farg = ex[1] if is_core_ref(farg, "ccall") return expand_ccall(ctx, ex) + elseif is_core_ref(farg, "cglobal") + @chk numchildren(ex) in 2:3 (ex, "cglobal must have one or two arguments") + return @ast ctx ex [K"call" + ex[1] + expand_C_library_symbol(ctx, ex[2]) + if numchildren(ex) == 3 + expand_forms_2(ctx, ex[3]) + end + ] end args = copy(ex[2:end]) kws = remove_kw_args!(ctx, args) diff --git a/JuliaLowering/src/eval.jl b/JuliaLowering/src/eval.jl index a7896bf0dbcac..1f375ab1b6f4b 100644 --- a/JuliaLowering/src/eval.jl +++ b/JuliaLowering/src/eval.jl @@ -302,6 +302,15 @@ function to_lowered_expr(mod, ex, ssa_offset=0) # Unpack K"Symbol" QuoteNode as `Expr(:meta)` requires an identifier here. args[1] = args[1].value Expr(:meta, args...) + elseif k == K"static_eval" + @assert numchildren(ex) == 1 + to_lowered_expr(mod, ex[1], ssa_offset) + elseif k == K"cfunction" + args = Any[to_lowered_expr(mod, e, ssa_offset) for e in children(ex)] + if kind(ex[2]) == K"static_eval" + args[2] = QuoteNode(args[2]) + end + Expr(:cfunction, args...) else # Allowed forms according to https://docs.julialang.org/en/v1/devdocs/ast/ # @@ -324,7 +333,6 @@ function to_lowered_expr(mod, ex, ssa_offset=0) k == K"gc_preserve_begin" ? :gc_preserve_begin : k == K"gc_preserve_end" ? :gc_preserve_end : k == K"foreigncall" ? :foreigncall : - k == K"cfunction" ? :cfunction : k == K"new_opaque_closure" ? :new_opaque_closure : nothing if isnothing(head) diff --git a/JuliaLowering/src/kinds.jl b/JuliaLowering/src/kinds.jl index efd73c2420312..7aa17aa4a0f00 100644 --- a/JuliaLowering/src/kinds.jl +++ b/JuliaLowering/src/kinds.jl @@ -51,6 +51,11 @@ function _register_kinds() # For expr-macro compatibility; gone after expansion "escape" "hygienic_scope" + # An expression which will eventually be evaluated "statically" in + # the context of a CodeInfo and thus allows access only to globals + # and static parameters. Used for ccall, cfunction, cglobal + # TODO: Use this for GeneratedFunctionStub also? + "static_eval" # Catch-all for additional syntax extensions without the need to # extend `Kind`. Known extensions include: # locals, islocal diff --git a/JuliaLowering/src/linear_ir.jl b/JuliaLowering/src/linear_ir.jl index 95700c1e85bd6..bf4c741393880 100644 --- a/JuliaLowering/src/linear_ir.jl +++ b/JuliaLowering/src/linear_ir.jl @@ -3,7 +3,7 @@ function is_valid_ir_argument(ctx, ex) k = kind(ex) - if is_simple_atom(ctx, ex) || k in KSet"inert top core quote" + if is_simple_atom(ctx, ex) || k in KSet"inert top core quote static_eval" true elseif k == K"BindingId" binfo = lookup_binding(ctx, ex) @@ -112,7 +112,7 @@ end function is_simple_arg(ctx, ex) k = kind(ex) return is_simple_atom(ctx, ex) || k == K"BindingId" || k == K"quote" || k == K"inert" || - k == K"top" || k == K"core" || k == K"globalref" + k == K"top" || k == K"core" || k == K"globalref" || k == K"static_eval" end function is_single_assign_var(ctx::LinearIRContext, ex) @@ -128,7 +128,7 @@ function is_const_read_arg(ctx, ex) # Even if we have side effects, we know that singly-assigned # locals cannot be affected by them so we can inline them anyway. # TODO from flisp: "We could also allow const globals here" - return k == K"inert" || k == K"top" || k == K"core" || + return k == K"inert" || k == K"top" || k == K"core" || k == K"static_eval" || is_simple_atom(ctx, ex) || is_single_assign_var(ctx, ex) end @@ -167,19 +167,6 @@ function compile_args(ctx, args) return args_out end -# Compile the (sym,lib) argument to ccall/cglobal -function compile_C_library_symbol(ctx, ex) - if kind(ex) == K"call" && kind(ex[1]) == K"core" && ex[1].name_val == "tuple" - # Tuples like core.tuple(:funcname, mylib_name) are allowed and are - # kept inline, but may only reference globals. - check_no_local_bindings(ctx, ex, - "function name and library expression cannot reference local variables") - ex - else - only(compile_args(ctx, (ex,))) - end -end - function emit(ctx::LinearIRContext, ex) push!(ctx.code, ex) return ex @@ -593,7 +580,7 @@ function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) k = kind(ex) if k == K"BindingId" || is_literal(k) || k == K"quote" || k == K"inert" || k == K"top" || k == K"core" || k == K"Value" || k == K"Symbol" || - k == K"SourceLocation" + k == K"SourceLocation" || k == K"static_eval" if in_tail_pos emit_return(ctx, ex) elseif needs_value @@ -614,39 +601,7 @@ function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) nothing elseif k == K"call" || k == K"new" || k == K"splatnew" || k == K"foreigncall" || k == K"new_opaque_closure" || k == K"cfunction" - if k == K"foreigncall" - args = SyntaxList(ctx) - push!(args, compile_C_library_symbol(ctx, ex[1])) - # 2nd to 5th arguments of foreigncall are special. They must be - # left in place but cannot reference locals. - check_no_local_bindings(ctx, ex[2], "ccall return type cannot reference local variables") - for argt in children(ex[3]) - check_no_local_bindings(ctx, argt, - "ccall argument types cannot reference local variables") - end - append!(args, ex[2:5]) - append!(args, compile_args(ctx, ex[6:end])) - args - elseif k == K"cfunction" - # Arguments of cfunction must be left in place except for argument - # 2 (fptr) - args = copy(children(ex)) - args[2] = only(compile_args(ctx, args[2:2])) - check_no_local_bindings(ctx, ex[3], - "cfunction return type cannot reference local variables") - for arg in children(ex[4]) - check_no_local_bindings(ctx, arg, - "cfunction argument cannot reference local variables") - end - elseif k == K"call" && is_core_ref(ex[1], "cglobal") - args = SyntaxList(ctx) - push!(args, ex[1]) - push!(args, compile_C_library_symbol(ctx, ex[2])) - append!(args, compile_args(ctx, ex[3:end])) - else - args = compile_args(ctx, children(ex)) - end - callex = makenode(ctx, ex, k, args) + callex = makenode(ctx, ex, k, compile_args(ctx, children(ex))) if in_tail_pos emit_return(ctx, ex, callex) elseif needs_value @@ -909,7 +864,7 @@ function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) end function _remove_vars_with_isdefined_check!(vars, ex) - if is_leaf(ex) || is_quoted(ex) + if is_leaf(ex) || is_quoted(ex) || kind(ex) == K"static_eval" return elseif kind(ex) == K"isdefined" delete!(vars, ex[1].var_id) @@ -1017,10 +972,11 @@ function _renumber(ctx, ssa_rewrites, slot_rewrites, label_table, ex) makeleaf(ctx, ex, K"globalref", binfo.name, mod=binfo.mod) end end - elseif k == K"meta" + elseif k == K"meta" || k == K"static_eval" # Somewhat-hack for Expr(:meta, :generated, gen) which has # weird top-level semantics for `gen`, but we still need to translate - # the binding it contains to a globalref. + # the binding it contains to a globalref. (TODO: use + # static_eval for this meta, somehow) mapchildren(ctx, ex) do e _renumber(ctx, ssa_rewrites, slot_rewrites, label_table, e) end diff --git a/JuliaLowering/src/macro_expansion.jl b/JuliaLowering/src/macro_expansion.jl index d3abadde83b12..f02f2fa8f0edd 100644 --- a/JuliaLowering/src/macro_expansion.jl +++ b/JuliaLowering/src/macro_expansion.jl @@ -115,6 +115,10 @@ function Base.showerror(io::IO, exc::MacroExpansionError) pos == :end ? (lb+1:lb) : error("Unknown position $pos") highlight(io, src.file, byterange, note=exc.msg) + if !isnothing(exc.err) + print(io, "\nCaused by:\n") + showerror(io, exc.err) + end end function eval_macro_name(ctx::MacroExpansionContext, mctx::MacroContext, ex::SyntaxTree) @@ -222,6 +226,9 @@ function expand_forms_1(ctx::MacroExpansionContext, ex::SyntaxTree) if all(==('_'), name_str) @ast ctx ex ex=>K"Placeholder" elseif is_ccall_or_cglobal(name_str) + # Lower special identifiers `cglobal` and `ccall` to `K"core"` + # psuedo-refs very early so that cglobal and ccall can never be + # turned into normal bindings (eg, assigned to) @ast ctx ex name_str::K"core" else layerid = get(ex, :scope_layer, ctx.current_layer.id) diff --git a/JuliaLowering/src/scope_analysis.jl b/JuliaLowering/src/scope_analysis.jl index c0f659532b5be..8c7554622d465 100644 --- a/JuliaLowering/src/scope_analysis.jl +++ b/JuliaLowering/src/scope_analysis.jl @@ -630,6 +630,24 @@ function init_closure_bindings!(ctx, fname) end end +function find_any_local_binding(ctx, ex) + k = kind(ex) + if k == K"BindingId" + bkind = lookup_binding(ctx, ex.var_id).kind + if bkind != :global && bkind != :static_parameter + return ex + end + elseif !is_leaf(ex) && !is_quoted(ex) + for e in children(ex) + r = find_any_local_binding(ctx, e) + if !isnothing(r) + return r + end + end + end + return nothing +end + # Update ctx.bindings and ctx.lambda_bindings metadata based on binding usage function analyze_variables!(ctx, ex) k = kind(ex) @@ -649,6 +667,13 @@ function analyze_variables!(ctx, ex) end elseif is_leaf(ex) || is_quoted(ex) return + elseif k == K"static_eval" + badvar = find_any_local_binding(ctx, ex[1]) + if !isnothing(badvar) + name_hint = getmeta(ex, :name_hint, "syntax") + throw(LoweringError(badvar, "$(name_hint) cannot reference local variable")) + end + return elseif k == K"local" || k == K"global" # Presence of BindingId within local/global is ignored. return diff --git a/JuliaLowering/src/syntax_macros.jl b/JuliaLowering/src/syntax_macros.jl index d9fac7bec22e3..43c186cdbc4cb 100644 --- a/JuliaLowering/src/syntax_macros.jl +++ b/JuliaLowering/src/syntax_macros.jl @@ -91,32 +91,153 @@ function Base.var"@cfunction"(__context__::MacroContext, callable, return_type, # Kinda weird semantics here - without `$`, the callable is a top level # expression which will be evaluated by `jl_resolve_globals_in_ir`, # implicitly within the module where the `@cfunction` is expanded into. - # - # TODO: The existing flisp implementation is arguably broken because it - # ignores macro hygiene when `callable` is the result of a macro - # expansion within a different module. For now we've inherited this - # brokenness. - # - # Ideally we'd fix this by bringing the scoping rules for this - # expression back into lowering. One option may be to wrap the - # expression in a form which pushes it to top level - maybe as a whole - # separate top level thunk like closure lowering - then use the - # K"captured_local" mechanism to interpolate it back in. This scheme - # would make the complicated scope semantics explicit and let them be - # dealt with in the right place in the frontend rather than putting the - # rules into the runtime itself. - fptr = @ast __context__ callable QuoteNode(Expr(callable))::K"Value" + fptr = @ast __context__ callable [K"static_eval"( + meta=name_hint("cfunction function name")) + callable + ] typ = Ptr{Cvoid} end @ast __context__ __context__.macrocall [K"cfunction" typ::K"Value" fptr - return_type - arg_types_svec + [K"static_eval"(meta=name_hint("cfunction return type")) + return_type + ] + [K"static_eval"(meta=name_hint("cfunction argument type")) + arg_types_svec + ] "ccall"::K"Symbol" ] end +function ccall_macro_parse(ctx, ex, opts) + gc_safe=false + for opt in opts + if kind(opt) != K"=" || numchildren(opt) != 2 || + kind(opt[1]) != K"Identifier" + throw(MacroExpansionError(opt, "Bad option to ccall")) + else + optname = opt[1].name_val + if optname == "gc_safe" + if kind(opt[2]) == K"Bool" + gc_safe = opt[2].value::Bool + else + throw(MacroExpansionError(opt[2], "gc_safe must be true or false")) + end + else + throw(MacroExpansionError(opt[1], "Unknown option name for ccall")) + end + end + end + + if kind(ex) != K"::" + throw(MacroExpansionError(ex, "Expected a return type annotation `::SomeType`", position=:end)) + end + + rettype = ex[2] + call = ex[1] + if kind(call) != K"call" + throw(MacroExpansionError(call, "Expected function call syntax `f()`")) + end + + func = call[1] + varargs = numchildren(call) > 1 && kind(call[end]) == K"parameters" ? + children(call[end]) : nothing + + # collect args and types + args = SyntaxList(ctx) + types = SyntaxList(ctx) + function pusharg!(arg) + if kind(arg) != K"::" + throw(MacroExpansionError(arg, "argument needs a type annotation")) + end + push!(args, arg[1]) + push!(types, arg[2]) + end + + for e in call[2:(isnothing(varargs) ? end : end-1)] + kind(e) != K"parameters" || throw(MacroExpansionError(call[end], "Multiple parameter blocks not allowed")) + pusharg!(e) + end + + if !isnothing(varargs) + num_required_args = length(args) + if num_required_args == 0 + throw(MacroExpansionError(call[end], "C ABI prohibits varargs without one required argument")) + end + for e in varargs + pusharg!(e) + end + else + num_required_args = 0 # Non-vararg call + end + + return func, rettype, types, args, gc_safe, num_required_args +end + +function ccall_macro_lower(ctx, ex, convention, func, rettype, types, args, gc_safe, num_required_args) + statements = SyntaxTree[] + kf = kind(func) + if kf == K"Identifier" + lowered_func = @ast ctx func func=>K"Symbol" + elseif kf == K"." + lowered_func = @ast ctx func [K"tuple" + func[2]=>K"Symbol" + [K"static_eval"(meta=name_hint("@ccall library name")) + func[1] + ] + ] + elseif kf == K"$" + check = @SyntaxTree quote + func = $(func[1]) + if !isa(func, Ptr{Cvoid}) + name = :($(func[1])) + throw(ArgumentError("interpolated function `$name` was not a `Ptr{Cvoid}`, but $(typeof(func))")) + end + end + push!(statements, check) + lowered_func = check[1][1] + else + throw(MacroExpansionError(func, + "Function name must be a symbol like `foo`, a library and function name like `libc.printf` or an interpolated function pointer like `\$ptr`")) + end + + roots = SyntaxTree[] + cargs = SyntaxTree[] + for (i, (type, arg)) in enumerate(zip(types, args)) + argi = @ast ctx arg "arg$i"::K"Identifier" + # TODO: Does it help to emit ssavar() here for the `argi`? + push!(statements, @SyntaxTree :(local $argi = Base.cconvert($type, $arg))) + push!(roots, argi) + push!(cargs, @SyntaxTree :(Base.unsafe_convert($type, $argi))) + end + effect_flags = UInt16(0) + push!(statements, @ast ctx ex [K"foreigncall" + lowered_func + [K"static_eval"(meta=name_hint("@ccall return type")) + rettype + ] + [K"static_eval"(meta=name_hint("@ccall argument type")) + [K"call" + "svec"::K"core" + types... + ] + ] + num_required_args::K"Integer" + QuoteNode((convention, effect_flags, gc_safe))::K"Value" + cargs... + roots... + ]) + + @ast ctx ex [K"block" + statements... + ] +end + +function Base.var"@ccall"(ctx::MacroContext, ex, opts...) + ccall_macro_lower(ctx, ex, :ccall, ccall_macro_parse(ctx, ex, opts)...) +end + function Base.GC.var"@preserve"(__context__::MacroContext, exs...) idents = exs[1:end-1] for e in idents diff --git a/JuliaLowering/test/ccall_demo.jl b/JuliaLowering/test/ccall_demo.jl deleted file mode 100644 index f5e2e987e3839..0000000000000 --- a/JuliaLowering/test/ccall_demo.jl +++ /dev/null @@ -1,129 +0,0 @@ -module CCall - -using JuliaSyntax, JuliaLowering -using JuliaLowering: is_identifier_like, numchildren, children, MacroExpansionError, @ast, SyntaxTree - -# Hacky utils -# macro K_str(str) -# JuliaSyntax.Kind(str[1].value) -# end -# -# # Needed because we can't lower kwarg calls yet ehehe :-/ -# function mac_ex_error(ex, msg, pos) -# kwargs = Core.apply_type(Core.NamedTuple, (:position,))((pos,)) -# Core.kwcall(kwargs, MacroExpansionError, ex, msg) -# end - -macro ast_str(str) - ex = parsestmt(JuliaLowering.SyntaxTree, str, filename=string(__source__.file)) - ctx1, ex1 = JuliaLowering.expand_forms_1(__module__, ex) - @assert kind(ex1) == K"call" && ex1[1].value === JuliaLowering.interpolate_ast - cs = map(e->esc(Expr(e)), ex1[3:end]) - :(JuliaLowering.interpolate_ast($(ex1[2][1]), $(cs...))) -end - -function ccall_macro_parse(ex) - if kind(ex) != K"::" - throw(MacroExpansionError(ex, "Expected a return type annotation like `::T`", position=:end)) - end - - rettype = ex[2] - call = ex[1] - if kind(call) != K"call" - throw(MacroExpansionError(call, "Expected function call syntax `f()`")) - end - - # get the function symbols - func = let f = call[1], kf = kind(f) - if kf == K"." - @ast ex ex [K"tuple" f[2]=>K"Symbol" f[1]] - elseif kf == K"$" - f - elseif kf == K"Identifier" - @ast ex ex f=>K"Symbol" - else - throw(MacroExpansionError(f, - "Function name must be a symbol like `foo`, a library and function name like `libc.printf` or an interpolated function pointer like `\$ptr`")) - end - end - - varargs = nothing - - # collect args and types - args = SyntaxTree[] - types = SyntaxTree[] - - function pusharg!(arg) - if kind(arg) != K"::" - throw(MacroExpansionError(arg, "argument needs a type annotation like `::T`")) - end - push!(args, arg[1]) - push!(types, arg[2]) - end - - varargs = nothing - num_varargs = 0 - for e in call[2:end] - if kind(e) == K"parameters" - num_varargs == 0 || throw(MacroExpansionError(e, "Multiple parameter blocks not allowed")) - num_varargs = numchildren(e) - num_varargs > 0 || throw(MacroExpansionError(e, "C ABI prohibits vararg without one required argument")) - varargs = children(e) - else - pusharg!(e) - end - end - if !isnothing(varargs) - for e in varargs - pusharg!(e) - end - end - - return func, rettype, types, args, num_varargs -end - -function ccall_macro_lower(ex, convention, func, rettype, types, args, num_varargs) - statements = SyntaxTree[] - if kind(func) == K"$" - check = ast"""quote - func = $(func[1]) - if !isa(func, Ptr{Cvoid}) - name = :($(func[1])) - throw(ArgumentError("interpolated function `$name` was not a `Ptr{Cvoid}`, but $(typeof(func))")) - end - end""" - func = check[1][1] - push!(statements, check) - end - - roots = SyntaxTree[] - cargs = SyntaxTree[] - for (i, (type, arg)) in enumerate(zip(types, args)) - argi = @ast ex arg "arg$i"::K"Identifier" - # TODO: Is there any safe way to use SSAValue here? - push!(statements, ast":(local $argi = Base.cconvert($type, $arg))") - push!(roots, argi) - push!(cargs, ast":(Base.unsafe_convert($type, $argi))") - end - push!(statements, - @ast ex ex [K"foreigncall" - func - rettype - ast":(Core.svec($(types...)))" - # Is this num_varargs correct? It seems wrong? - num_varargs::K"Integer" - convention::K"Symbol" - cargs... - roots... - ]) - - @ast ex ex [K"block" - statements... - ] -end - -function var"@ccall"(ctx::JuliaLowering.MacroContext, ex) - ccall_macro_lower(ex, "ccall", ccall_macro_parse(ex)...) -end - -end # module CCall diff --git a/JuliaLowering/test/function_calls_ir.jl b/JuliaLowering/test/function_calls_ir.jl index 7017902bff0fb..b0a7016044c94 100644 --- a/JuliaLowering/test/function_calls_ir.jl +++ b/JuliaLowering/test/function_calls_ir.jl @@ -360,7 +360,7 @@ ccall(:strlen, Csize_t, (Cstring,), "asdfg") 1 TestMod.Cstring 2 (call top.cconvert %₁ "asdfg") 3 (call top.unsafe_convert %₁ %₂) -4 (foreigncall :strlen TestMod.Csize_t (call core.svec TestMod.Cstring) 0 :ccall %₃ %₂) +4 (foreigncall :strlen (static_eval TestMod.Csize_t) (static_eval (call core.svec TestMod.Cstring)) 0 :ccall %₃ %₂) 5 (return %₄) ######################################## @@ -370,14 +370,14 @@ ccall((:strlen, libc), Csize_t, (Cstring,), "asdfg") 1 TestMod.Cstring 2 (call top.cconvert %₁ "asdfg") 3 (call top.unsafe_convert %₁ %₂) -4 (foreigncall (call core.tuple :strlen TestMod.libc) TestMod.Csize_t (call core.svec TestMod.Cstring) 0 :ccall %₃ %₂) +4 (foreigncall (static_eval (call core.tuple :strlen TestMod.libc)) (static_eval TestMod.Csize_t) (static_eval (call core.svec TestMod.Cstring)) 0 :ccall %₃ %₂) 5 (return %₄) ######################################## # ccall with a calling convention ccall(:foo, stdcall, Csize_t, ()) #--------------------- -1 (foreigncall :foo TestMod.Csize_t (call core.svec) 0 :stdcall) +1 (foreigncall :foo (static_eval TestMod.Csize_t) (static_eval (call core.svec)) 0 :stdcall) 2 (return %₁) ######################################## @@ -386,7 +386,7 @@ ccall(:foo, stdcall, Csize_t, (Any,), x) #--------------------- 1 core.Any 2 TestMod.x -3 (foreigncall :foo TestMod.Csize_t (call core.svec core.Any) 0 :stdcall %₂) +3 (foreigncall :foo (static_eval TestMod.Csize_t) (static_eval (call core.svec core.Any)) 0 :stdcall %₂) 4 (return %₃) ######################################## @@ -397,7 +397,7 @@ ccall(ptr, Csize_t, (Cstring,), "asdfg") 2 (call top.cconvert %₁ "asdfg") 3 TestMod.ptr 4 (call top.unsafe_convert %₁ %₂) -5 (foreigncall %₃ TestMod.Csize_t (call core.svec TestMod.Cstring) 0 :ccall %₄ %₂) +5 (foreigncall %₃ (static_eval TestMod.Csize_t) (static_eval (call core.svec TestMod.Cstring)) 0 :ccall %₄ %₂) 6 (return %₅) ######################################## @@ -412,7 +412,7 @@ ccall(:printf, Cint, (Cstring, Cstring...), "%s = %s\n", "2 + 2", "5") 6 (call top.unsafe_convert %₁ %₃) 7 (call top.unsafe_convert %₂ %₄) 8 (call top.unsafe_convert %₂ %₅) -9 (foreigncall :printf TestMod.Cint (call core.svec TestMod.Cstring TestMod.Cstring TestMod.Cstring) 1 :ccall %₆ %₇ %₈ %₃ %₄ %₅) +9 (foreigncall :printf (static_eval TestMod.Cint) (static_eval (call core.svec TestMod.Cstring TestMod.Cstring TestMod.Cstring)) 1 :ccall %₆ %₇ %₈ %₃ %₄ %₅) 10 (return %₉) ######################################## @@ -456,7 +456,7 @@ end LoweringError: let libc = "libc" ccall((:strlen, libc), Csize_t, (Cstring,), "asdfg") -# └─────────────┘ ── function name and library expression cannot reference local variables +# └──┘ ── function name and library expression cannot reference local variable end ######################################## @@ -468,7 +468,7 @@ end LoweringError: let Csize_t = 1 ccall(:strlen, Csize_t, (Cstring,), "asdfg") -# └─────┘ ── ccall return type cannot reference local variables +# └─────┘ ── ccall return type cannot reference local variable end ######################################## @@ -480,7 +480,7 @@ end LoweringError: let Cstring = 1 ccall(:strlen, Csize_t, (Cstring,), "asdfg") -# └─────┘ ── ccall argument types cannot reference local variables +# └─────┘ ── ccall argument type cannot reference local variable end ######################################## @@ -520,7 +520,7 @@ ccall(:foo, Csize_t, (Cstring..., Cstring...), "asdfg", "blah") cglobal((:sym, lib), Int) #--------------------- 1 TestMod.Int -2 (call core.cglobal (call core.tuple :sym TestMod.lib) %₁) +2 (call core.cglobal (static_eval (call core.tuple :sym TestMod.lib)) %₁) 3 (return %₂) ######################################## @@ -533,6 +533,26 @@ cglobal(f(), Int) 4 (call core.cglobal %₂ %₃) 5 (return %₄) +######################################## +# Error: cglobal with library name referencing local variable +let func="myfunc" + cglobal((func, "somelib"), Int) +end +#--------------------- +LoweringError: +let func="myfunc" + cglobal((func, "somelib"), Int) +# └──┘ ── function name and library expression cannot reference local variable +end + +######################################## +# Error: cglobal too many arguments +cglobal(:sym, Int, blah) +#--------------------- +LoweringError: +cglobal(:sym, Int, blah) +└──────────────────────┘ ── cglobal must have one or two arguments + ######################################## # Error: assigning to `cglobal` cglobal = 10 diff --git a/JuliaLowering/test/macros.jl b/JuliaLowering/test/macros.jl index 9cfdef9597a45..a75d0feceb8f1 100644 --- a/JuliaLowering/test/macros.jl +++ b/JuliaLowering/test/macros.jl @@ -172,15 +172,23 @@ let err = try @test err.err isa UndefVarError end -include("ccall_demo.jl") -@test JuliaLowering.include_string(CCall, "@ccall strlen(\"foo\"::Cstring)::Csize_t") == 3 +@test JuliaLowering.include_string(test_mod, "@ccall strlen(\"foo\"::Cstring)::Csize_t") == 3 +@test JuliaLowering.include_string(test_mod, "@ccall strlen(\"asdf\"::Cstring)::Csize_t gc_safe=true") == 4 +@test JuliaLowering.include_string(test_mod, """ +begin + buf = zeros(UInt8, 20) + @ccall sprintf(buf::Ptr{UInt8}, "num:%d str:%s"::Cstring; 42::Cint, "hello"::Cstring)::Cint + String(buf) +end +""") == "num:42 str:hello\0\0\0\0" + let (err, st) = try - JuliaLowering.include_string(CCall, "@ccall strlen(\"foo\"::Cstring)") + JuliaLowering.include_string(test_mod, "@ccall strlen(\"foo\"::Cstring)") catch e e, stacktrace(catch_backtrace()) end @test err isa JuliaLowering.MacroExpansionError - @test err.msg == "Expected a return type annotation like `::T`" + @test err.msg == "Expected a return type annotation `::SomeType`" @test isnothing(err.err) # Check that `catch_backtrace` can capture the stacktrace of the macro function @test any(sf->sf.func===:ccall_macro_parse, st) diff --git a/JuliaLowering/test/macros_ir.jl b/JuliaLowering/test/macros_ir.jl index eccefb44cef6f..29f4e6509b743 100644 --- a/JuliaLowering/test/macros_ir.jl +++ b/JuliaLowering/test/macros_ir.jl @@ -132,3 +132,7 @@ _never_exist = @m_not_exist 42 MacroExpansionError while expanding @m_not_exist in module Main.TestMod: _never_exist = @m_not_exist 42 # └─────────┘ ── Macro not found +Caused by: +UndefVarError: `@m_not_exist` not defined in `Main.TestMod` +Suggestion: check for spelling errors or missing imports. + diff --git a/JuliaLowering/test/misc.jl b/JuliaLowering/test/misc.jl index a56923fc2d010..1fa6a15cfde11 100644 --- a/JuliaLowering/test/misc.jl +++ b/JuliaLowering/test/misc.jl @@ -46,6 +46,59 @@ cf_float = JuliaLowering.include_string(test_mod, """ """) @test @ccall($cf_float(2::Float64, 3::Float64)::Float64) == 32.0 +# Test that hygiene works with @ccallable function names (this is broken in +# Base) +JuliaLowering.include_string(test_mod, raw""" +f_ccallable_hygiene() = 1 + +module Nested + f_ccallable_hygiene() = 2 + macro cfunction_hygiene() + :(@cfunction(f_ccallable_hygiene, Int, ())) + end +end +""") +cf_hygiene = JuliaLowering.include_string(test_mod, """ +Nested.@cfunction_hygiene +""") +@test @ccall($cf_hygiene()::Int) == 2 + +# Test that ccall can be passed static parameters in type signatures. +# +# Note that the cases where this works are extremely limited and tend to look +# like `Ptr{T}` or `Ref{T}` (`T` doesn't work!?) because of the compilation +# order in which the runtime inspects the arguments to ccall (`Ptr{T}` has a +# well defined C ABI even when `T` is not yet determined). See also +# https://github.com/JuliaLang/julia/issues/29400 +# https://github.com/JuliaLang/julia/pull/40947 +JuliaLowering.include_string(test_mod, raw""" +function sparam_ccallable(x::Ptr{T}) where {T} + unsafe_store!(x, one(T)) + nothing +end + +function ccall_with_sparams(::Type{T}) where {T} + x = T[zero(T)] + cf = @cfunction(sparam_ccallable, Cvoid, (Ptr{T},)) + @ccall $cf(x::Ptr{T})::Cvoid + x[1] +end +""") +@test test_mod.ccall_with_sparams(Int) === 1 +@test test_mod.ccall_with_sparams(Float64) === 1.0 + +# Test that ccall can be passed static parameters in the function name +JuliaLowering.include_string(test_mod, raw""" +# In principle, may add other strlen-like functions here for different string +# types +ccallable_sptest_name(::Type{String}) = :strlen + +function ccall_with_sparams_in_name(s::T) where {T} + ccall(ccallable_sptest_name(T), Csize_t, (Cstring,), s) +end +""") +@test test_mod.ccall_with_sparams_in_name("hii") == 3 + @testset "CodeInfo: has_image_globalref" begin @test lower_str(test_mod, "x + y").args[1].has_image_globalref === false @test lower_str(Main, "x + y").args[1].has_image_globalref === true diff --git a/JuliaLowering/test/misc_ir.jl b/JuliaLowering/test/misc_ir.jl index 15fa0229e595a..33fe64491d4a1 100644 --- a/JuliaLowering/test/misc_ir.jl +++ b/JuliaLowering/test/misc_ir.jl @@ -323,7 +323,7 @@ JuxtTest.@emit_juxt # @cfunction expansion with global generic function as function argument @cfunction(callable, Int, (Int, Float64)) #--------------------- -1 (cfunction Ptr{Nothing} :(:callable) TestMod.Int (call core.svec TestMod.Int TestMod.Float64) :ccall) +1 (cfunction Ptr{Nothing} (static_eval TestMod.callable) (static_eval TestMod.Int) (static_eval (call core.svec TestMod.Int TestMod.Float64)) :ccall) 2 (return %₁) ######################################## @@ -331,7 +331,7 @@ JuxtTest.@emit_juxt @cfunction($close_over, Int, (Int, Float64)) #--------------------- 1 TestMod.close_over -2 (cfunction Base.CFunction %₁ TestMod.Int (call core.svec TestMod.Int TestMod.Float64) :ccall) +2 (cfunction Base.CFunction %₁ (static_eval TestMod.Int) (static_eval (call core.svec TestMod.Int TestMod.Float64)) :ccall) 3 (return %₂) ######################################## @@ -351,7 +351,7 @@ end LoweringError: let T=Float64 @cfunction(f, T, (Float64,)) -# ╙ ── cfunction return type cannot reference local variables +# ╙ ── cfunction return type cannot reference local variable end ######################################## @@ -363,9 +363,123 @@ end LoweringError: let T=Float64 @cfunction(f, Float64, (Float64,T)) -# ╙ ── cfunction argument cannot reference local variables +# ╙ ── cfunction argument type cannot reference local variable end +######################################## +# Basic @ccall lowering +@ccall foo(x::X, y::Y)::R +#--------------------- +1 JuliaLowering.Base +2 (call top.getproperty %₁ :cconvert) +3 TestMod.X +4 TestMod.x +5 (= slot₁/arg1 (call %₂ %₃ %₄)) +6 JuliaLowering.Base +7 (call top.getproperty %₆ :cconvert) +8 TestMod.Y +9 TestMod.y +10 (= slot₂/arg2 (call %₇ %₈ %₉)) +11 JuliaLowering.Base +12 (call top.getproperty %₁₁ :unsafe_convert) +13 TestMod.X +14 slot₁/arg1 +15 (call %₁₂ %₁₃ %₁₄) +16 JuliaLowering.Base +17 (call top.getproperty %₁₆ :unsafe_convert) +18 TestMod.Y +19 slot₂/arg2 +20 (call %₁₇ %₁₈ %₁₉) +21 slot₁/arg1 +22 slot₂/arg2 +23 (foreigncall :foo (static_eval TestMod.R) (static_eval (call core.svec TestMod.X TestMod.Y)) 0 :($(QuoteNode((:ccall, 0x0000, false)))) %₁₅ %₂₀ %₂₁ %₂₂) +24 (return %₂₃) + +######################################## +# @ccall lowering with varargs and gc_safe +@ccall foo(x::X; y::Y)::R gc_safe=true +#--------------------- +1 JuliaLowering.Base +2 (call top.getproperty %₁ :cconvert) +3 TestMod.X +4 TestMod.x +5 (= slot₁/arg1 (call %₂ %₃ %₄)) +6 JuliaLowering.Base +7 (call top.getproperty %₆ :cconvert) +8 TestMod.Y +9 TestMod.y +10 (= slot₂/arg2 (call %₇ %₈ %₉)) +11 JuliaLowering.Base +12 (call top.getproperty %₁₁ :unsafe_convert) +13 TestMod.X +14 slot₁/arg1 +15 (call %₁₂ %₁₃ %₁₄) +16 JuliaLowering.Base +17 (call top.getproperty %₁₆ :unsafe_convert) +18 TestMod.Y +19 slot₂/arg2 +20 (call %₁₇ %₁₈ %₁₉) +21 slot₁/arg1 +22 slot₂/arg2 +23 (foreigncall :foo (static_eval TestMod.R) (static_eval (call core.svec TestMod.X TestMod.Y)) 1 :($(QuoteNode((:ccall, 0x0000, true)))) %₁₅ %₂₀ %₂₁ %₂₂) +24 (return %₂₃) + +######################################## +# Error: No return annotation on @ccall +@ccall strlen("foo"::Cstring) +#--------------------- +MacroExpansionError while expanding @ccall in module Main.TestMod: +@ccall strlen("foo"::Cstring) +# └ ── Expected a return type annotation `::SomeType` + +######################################## +# Error: No argument type on @ccall +@ccall foo("blah"::Cstring, "bad")::Int +#--------------------- +MacroExpansionError while expanding @ccall in module Main.TestMod: +@ccall foo("blah"::Cstring, "bad")::Int +# └───┘ ── argument needs a type annotation + +######################################## +# Error: @ccall varags without one fixed argument +@ccall foo(; x::Int)::Int +#--------------------- +MacroExpansionError while expanding @ccall in module Main.TestMod: +@ccall foo(; x::Int)::Int +# └──────┘ ── C ABI prohibits varargs without one required argument + +######################################## +# Error: Multiple varargs blocks +@ccall foo(; x::Int; y::Float64)::Int +#--------------------- +MacroExpansionError while expanding @ccall in module Main.TestMod: +@ccall foo(; x::Int; y::Float64)::Int +# └──────────┘ ── Multiple parameter blocks not allowed + +######################################## +# Error: Bad @ccall option +@ccall foo(x::Int)::Int bad_opt +#--------------------- +MacroExpansionError while expanding @ccall in module Main.TestMod: +@ccall foo(x::Int)::Int bad_opt +# └─────┘ ── Bad option to ccall + +######################################## +# Error: Unknown @ccall option name +@ccall foo(x::Int)::Int bad_opt=true +#--------------------- +MacroExpansionError while expanding @ccall in module Main.TestMod: +@ccall foo(x::Int)::Int bad_opt=true +# └─────┘ ── Unknown option name for ccall + +######################################## +# Error: Unknown option type +@ccall foo(x::Int)::Int gc_safe="hi" +#--------------------- +MacroExpansionError while expanding @ccall in module Main.TestMod: +@ccall foo(x::Int)::Int gc_safe="hi" +# └──┘ ── gc_safe must be true or false + ######################################## # Error: unary & syntax &x From fd53bfcbb8947d2d4c6a44f549c247d26f1ed022 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Wed, 20 Aug 2025 15:36:24 +1000 Subject: [PATCH 1042/1109] Macro expansion for old-style `Expr` macros (JuliaLang/JuliaLowering.jl#33) Implements mixed macro expansion for old-style macros (those written to expect an `Expr` data structure) and new-style macros (those written to expect `SyntaxTree`). The main difficulty here is managing hygiene correctly. We choose to represent new-style scoped identifiers passed to old macros using `Expr(:scope_layer, name, layer_id)` where necessary. But only where necessary - in most contexts, old-style macros will see unadorned identifiers just as they currently do. The only time the new `Expr` construct is visible is when new macros interpolate an expression into a call to an old-style macro in the returned code. Previously, such macro-calling-macro situations would result in the inner macro call seeing `Expr(:escape, ...)` but they now see `Expr(:scope_layer)`. However, and it's rare for old-style macros to de- and re-construct escaped expressions correctly so this should be a minor issue for compatibility. Old-style macros may still return `Expr(:escape)` expressions resulting from manual escaping. When consuming the output of old macros, we process these manual escapes by escaping up the macro expansion stack in the same way we currently do. Also add `parent_layer` id to `ScopeLayer` to preserve the macro expansion stack there for use by JETLS. Co-authored-by: Em Chu <61633163+mlechu@users.noreply.github.com> --- JuliaLowering/README.md | 59 ++++++++ JuliaLowering/src/ast.jl | 4 +- JuliaLowering/src/compat.jl | 10 +- JuliaLowering/src/desugaring.jl | 4 +- JuliaLowering/src/kinds.jl | 5 +- JuliaLowering/src/macro_expansion.jl | 203 ++++++++++++++++++++------- JuliaLowering/src/runtime.jl | 19 +-- JuliaLowering/src/syntax_graph.jl | 34 ++++- JuliaLowering/test/demo.jl | 59 +++++++- JuliaLowering/test/macros.jl | 201 +++++++++++++++++++------- 10 files changed, 478 insertions(+), 120 deletions(-) diff --git a/JuliaLowering/README.md b/JuliaLowering/README.md index 83b10ebb8b8b8..0918fb0fd540d 100644 --- a/JuliaLowering/README.md +++ b/JuliaLowering/README.md @@ -288,6 +288,65 @@ discussed in Adams' paper: TODO: Write more here... + +### Compatibility with `Expr` macros + +In order to have compatibility with old-style macros which expect an `Expr`-based +data structure as input, we convert `SyntaxTree` to `Expr`, call the old-style +macro, then convert `SyntaxTree` back to `Expr` and continue with the expansion +process. This involves some loss of provenance precision but allows full +interoperability in the package ecosystem without a need to make breaking +changes. + +Let's look at an example. Suppose a manually escaped old-style macro +`@oldstyle` is implemented as + +```julia +macro oldstyle(a, b) + quote + x = "x in @oldstyle" + @newstyle $(esc(a)) $(esc(b)) x + end +end +``` + +along with two correctly escaped new-style macros: + +```julia +macro call_oldstyle_macro(y) + quote + x = "x in call_oldstyle_macro" + @oldstyle $y x + end +end + +macro newstyle(x, y, z) + quote + x = "x in @newstyle" + ($x, $y, $z, x) + end +end +``` + +Then want some code like the following to "just work" with respect to hygiene + +```julia +let + x = "x in outer ctx" + @call_oldstyle_macro x +end +``` + +When calling `@oldstyle`, we must convert `SyntaxTree` into `Expr`, but we need +to preserve the scope layer of the `x` from the outer context as it is passed +into `@oldstyle` as a macro argument. To do this, we use `Expr(:scope_layer, +:x, outer_layer_id)`. (In the old system, this would be `Expr(:escape, :x)` +instead, presuming that `@call_oldstyle_macro` was implemented using `esc()`.) + +When receiving output from old style macro invocations, we preserve the escape +handling of the existing system for any symbols which aren't tagged with a +scope layer. + ## Pass 2: Syntax desugaring This pass recursively converts many special surface syntax forms to a smaller diff --git a/JuliaLowering/src/ast.jl b/JuliaLowering/src/ast.jl index bbe59015fbf7d..46aa82e505dc0 100644 --- a/JuliaLowering/src/ast.jl +++ b/JuliaLowering/src/ast.jl @@ -662,8 +662,8 @@ function to_symbol(ctx, ex) @ast ctx ex ex=>K"Symbol" end -function new_scope_layer(ctx, mod_ref::Module=ctx.mod; is_macro_expansion=true) - new_layer = ScopeLayer(length(ctx.scope_layers)+1, ctx.mod, is_macro_expansion) +function new_scope_layer(ctx, mod_ref::Module=ctx.mod) + new_layer = ScopeLayer(length(ctx.scope_layers)+1, ctx.mod, 0, false) push!(ctx.scope_layers, new_layer) new_layer.id end diff --git a/JuliaLowering/src/compat.jl b/JuliaLowering/src/compat.jl index 76dad46cb2db7..f12f0e7b5cf81 100644 --- a/JuliaLowering/src/compat.jl +++ b/JuliaLowering/src/compat.jl @@ -24,7 +24,8 @@ function expr_to_syntaxtree(@nospecialize(e), lnn::Union{LineNumberNode, Nothing SyntaxGraph(), kind=Kind, syntax_flags=UInt16, source=SourceAttrType, var_id=Int, value=Any, - name_val=String, is_toplevel_thunk=Bool) + name_val=String, is_toplevel_thunk=Bool, + scope_layer=LayerId) expr_to_syntaxtree(graph, e, lnn) end @@ -423,6 +424,13 @@ function _insert_convert_expr(@nospecialize(e), graph::SyntaxGraph, src::SourceA @assert nargs === 1 child_exprs[1] = Expr(:quoted_symbol, e.args[1]) end + elseif e.head === :scope_layer + @assert nargs === 2 + @assert e.args[1] isa Symbol + @assert e.args[2] isa LayerId + st_id, src = _insert_convert_expr(e.args[1], graph, src) + setattr!(graph, st_id, scope_layer=e.args[2]) + return st_id, src elseif e.head === :symbolicgoto || e.head === :symboliclabel @assert nargs === 1 st_k = e.head === :symbolicgoto ? K"symbolic_label" : K"symbolic_goto" diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 4a4d4e296e187..573b712c63d6a 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -15,7 +15,7 @@ function DesugaringContext(ctx) scope_type=Symbol, # :hard or :soft var_id=IdTag, is_toplevel_thunk=Bool) - DesugaringContext(graph, ctx.bindings, ctx.scope_layers, ctx.current_layer.mod) + DesugaringContext(graph, ctx.bindings, ctx.scope_layers, first(ctx.scope_layers).mod) end #------------------------------------------------------------------------------- @@ -2555,7 +2555,7 @@ function keyword_function_defs(ctx, srcref, callex_srcref, name_str, typevar_nam end # TODO: Is the layer correct here? Which module should be the parent module # of this body function? - layer = new_scope_layer(ctx; is_macro_expansion=false) + layer = new_scope_layer(ctx) body_func_name = adopt_scope(@ast(ctx, callex_srcref, mangled_name::K"Identifier"), layer) kwcall_arg_names = SyntaxList(ctx) diff --git a/JuliaLowering/src/kinds.jl b/JuliaLowering/src/kinds.jl index 7aa17aa4a0f00..4cc0afe86f088 100644 --- a/JuliaLowering/src/kinds.jl +++ b/JuliaLowering/src/kinds.jl @@ -48,8 +48,11 @@ function _register_kinds() # Internal initializer for struct types, for inner constructors/functions "new" "splatnew" - # For expr-macro compatibility; gone after expansion + # Used for converting `esc()`'d expressions arising from old macro + # invocations during macro expansion (gone after macro expansion) "escape" + # Used for converting the old-style macro hygienic-scope form (gone + # after macro expansion) "hygienic_scope" # An expression which will eventually be evaluated "statically" in # the context of a CodeInfo and thus allows access only to globals diff --git a/JuliaLowering/src/macro_expansion.jl b/JuliaLowering/src/macro_expansion.jl index f02f2fa8f0edd..e321af03129ed 100644 --- a/JuliaLowering/src/macro_expansion.jl +++ b/JuliaLowering/src/macro_expansion.jl @@ -11,6 +11,7 @@ generates a new layer. struct ScopeLayer id::LayerId mod::Module + parent_layer::LayerId # Index of parent layer in a macro expansion. Equal to 0 for no parent is_macro_expansion::Bool # FIXME end @@ -18,9 +19,17 @@ struct MacroExpansionContext{GraphType} <: AbstractLoweringContext graph::GraphType bindings::Bindings scope_layers::Vector{ScopeLayer} - current_layer::ScopeLayer + scope_layer_stack::Vector{LayerId} end +function MacroExpansionContext(graph::SyntaxGraph, mod::Module) + layers = ScopeLayer[ScopeLayer(1, mod, 0, false)] + MacroExpansionContext(graph, Bindings(), layers, LayerId[length(layers)]) +end + +current_layer(ctx::MacroExpansionContext) = ctx.scope_layers[last(ctx.scope_layer_stack)] +current_layer_id(ctx::MacroExpansionContext) = last(ctx.scope_layer_stack) + #-------------------------------------------------- # Expansion of quoted expressions function collect_unquoted!(ctx, unquoted, ex, depth) @@ -130,7 +139,7 @@ function eval_macro_name(ctx::MacroExpansionContext, mctx::MacroContext, ex::Syn ctx3, ex3 = resolve_scopes(ctx2, ex2) ctx4, ex4 = convert_closures(ctx3, ex3) ctx5, ex5 = linearize_ir(ctx4, ex4) - mod = ctx.current_layer.mod + mod = current_layer(ctx).mod expr_form = to_lowered_expr(mod, ex5) try eval(mod, expr_form) @@ -139,54 +148,136 @@ function eval_macro_name(ctx::MacroExpansionContext, mctx::MacroContext, ex::Syn end end -function expand_macro(ctx::MacroExpansionContext, ex::SyntaxTree) +# Record scope layer information for symbols passed to a macro by setting +# scope_layer for each expression and also processing any K"escape" arising +# from previous expansion of old-style macros. +# +# See also set_scope_layer() +function set_macro_arg_hygiene(ctx, ex, layer_ids, layer_idx) + k = kind(ex) + scope_layer = get(ex, :scope_layer, layer_ids[layer_idx]) + if k == K"module" || k == K"toplevel" || k == K"inert" + makenode(ctx, ex, ex, children(ex); + scope_layer=scope_layer) + elseif k == K"." + makenode(ctx, ex, ex, set_macro_arg_hygiene(ctx, ex[1], layer_ids, layer_idx), ex[2], + scope_layer=scope_layer) + elseif !is_leaf(ex) + inner_layer_idx = layer_idx + if k == K"escape" + inner_layer_idx = layer_idx - 1 + if inner_layer_idx < 1 + # If we encounter too many escape nodes, there's probably been + # an error in the previous macro expansion. + # todo: The error here isn't precise about that - maybe we + # should record that macro call expression with the scope layer + # if we want to report the error against the macro call? + throw(MacroExpansionError(ex, "`escape` node in outer context")) + end + end + mapchildren(e->set_macro_arg_hygiene(ctx, e, layer_ids, inner_layer_idx), + ctx, ex; scope_layer=scope_layer) + else + makeleaf(ctx, ex, ex; scope_layer=scope_layer) + end +end + +function prepare_macro_args(ctx, mctx, raw_args) + macro_args = Any[mctx] + for arg in raw_args + # Add hygiene information to be carried along with macro arguments. + # + # Macro call arguments may be either + # * Unprocessed by the macro expansion pass + # * Previously processed, but spliced into a further macro call emitted by + # a macro expansion. + # In either case, we need to set scope layers before passing the + # arguments to the macro call. + push!(macro_args, set_macro_arg_hygiene(ctx, arg, ctx.scope_layer_stack, + length(ctx.scope_layer_stack))) + end + return macro_args +end + +function expand_macro(ctx, ex) @assert kind(ex) == K"macrocall" macname = ex[1] - mctx = MacroContext(ctx.graph, ex, ctx.current_layer) + mctx = MacroContext(ctx.graph, ex, current_layer(ctx)) macfunc = eval_macro_name(ctx, mctx, macname) - # Macro call arguments may be either - # * Unprocessed by the macro expansion pass - # * Previously processed, but spliced into a further macro call emitted by - # a macro expansion. - # In either case, we need to set any unset scope layers before passing the - # arguments to the macro call. - macro_args = Any[mctx] - for i in 2:numchildren(ex) - push!(macro_args, set_scope_layer(ctx, ex[i], ctx.current_layer.id, false)) - end - macro_invocation_world = Base.get_world_counter() - expanded = try - # TODO: Allow invoking old-style macros for compat - invokelatest(macfunc, macro_args...) - catch exc - if exc isa MacroExpansionError - # Add context to the error. - newexc = MacroExpansionError(mctx, exc.ex, exc.msg, exc.position, exc.err) + raw_args = ex[2:end] + # We use a specific well defined world age for the next checks and macro + # expansion invocations. This avoids inconsistencies if the latest world + # age changes concurrently. + # + # TODO: Allow this to be passed in + macro_world = Base.get_world_counter() + if hasmethod(macfunc, Tuple{typeof(mctx), typeof.(raw_args)...}; world=macro_world) + macro_args = prepare_macro_args(ctx, mctx, raw_args) + expanded = try + Base.invoke_in_world(macro_world, macfunc, macro_args...) + catch exc + newexc = exc isa MacroExpansionError ? + MacroExpansionError(mctx, exc.ex, exc.msg, exc.position, exc.err) : + MacroExpansionError(mctx, ex, "Error expanding macro", :all, exc) + # TODO: We can delete this rethrow when we move to AST-based error propagation. + rethrow(newexc) + end + if expanded isa SyntaxTree + if !is_compatible_graph(ctx, expanded) + # If the macro has produced syntax outside the macro context, + # copy it over. TODO: Do we expect this always to happen? What + # is the API for access to the macro expansion context? + expanded = copy_ast(ctx, expanded) + end else - newexc = MacroExpansionError(mctx, ex, "Error expanding macro", :all, exc) + expanded = @ast ctx ex expanded::K"Value" + end + else + # Compat: attempt to invoke an old-style macro if there's no applicable + # method for new-style macro arguments. + macro_loc = source_location(LineNumberNode, ex) + macro_args = Any[macro_loc, current_layer(ctx).mod] + for arg in raw_args + # For hygiene in old-style macros, we omit any additional scope + # layer information from macro arguments. Old-style macros will + # handle that using manual escaping in the macro itself. + # + # Note that there's one slight incompatibility here for identifiers + # interpolated into the `raw_args` from outer macro expansions of + # new-style macros which call old-style macros. Instead of seeing + # `Expr(:escape)` in such situations, old-style macros will now see + # `Expr(:scope_layer)` inside `macro_args`. + push!(macro_args, Expr(arg)) end - # TODO: We can delete this rethrow when we move to AST-based error propagation. - rethrow(newexc) + expanded = try + Base.invoke_in_world(macro_world, macfunc, macro_args...) + catch exc + if exc isa MethodError && exc.f === macfunc + if !isempty(methods_in_world(macfunc, Tuple{typeof(mctx), Vararg{Any}}, macro_world)) + # If the macro has at least some methods implemented in the + # new style, assume the user meant to call one of those + # rather than any old-style macro methods which might exist + exc = MethodError(macfunc, (prepare_macro_args(ctx, mctx, raw_args)..., ), macro_world) + end + end + rethrow(MacroExpansionError(mctx, ex, "Error expanding macro", :all, exc)) + end + expanded = expr_to_syntaxtree(ctx, expanded, macro_loc) end - if expanded isa SyntaxTree - if !is_compatible_graph(ctx, expanded) - # If the macro has produced syntax outside the macro context, copy it over. - # TODO: Do we expect this always to happen? What is the API for access - # to the macro expansion context? - expanded = copy_ast(ctx, expanded) - end + if kind(expanded) != K"Value" expanded = append_sourceref(ctx, expanded, ex) # Module scope for the returned AST is the module where this particular # method was defined (may be different from `parentmodule(macfunc)`) - mod_for_ast = lookup_method_instance(macfunc, macro_args, macro_invocation_world).def.module - new_layer = ScopeLayer(length(ctx.scope_layers)+1, mod_for_ast, true) + mod_for_ast = lookup_method_instance(macfunc, macro_args, + macro_world).def.module + new_layer = ScopeLayer(length(ctx.scope_layers)+1, mod_for_ast, + current_layer_id(ctx), true) push!(ctx.scope_layers, new_layer) - inner_ctx = MacroExpansionContext(ctx.graph, ctx.bindings, ctx.scope_layers, new_layer) - expanded = expand_forms_1(inner_ctx, expanded) - else - expanded = @ast ctx ex expanded::K"Value" + push!(ctx.scope_layer_stack, new_layer.id) + expanded = expand_forms_1(ctx, expanded) + pop!(ctx.scope_layer_stack) end return expanded end @@ -231,18 +322,37 @@ function expand_forms_1(ctx::MacroExpansionContext, ex::SyntaxTree) # turned into normal bindings (eg, assigned to) @ast ctx ex name_str::K"core" else - layerid = get(ex, :scope_layer, ctx.current_layer.id) + layerid = get(ex, :scope_layer, current_layer_id(ctx)) makeleaf(ctx, ex, ex, kind=K"Identifier", scope_layer=layerid) end elseif k == K"Identifier" || k == K"MacroName" || k == K"StringMacroName" - layerid = get(ex, :scope_layer, ctx.current_layer.id) + layerid = get(ex, :scope_layer, current_layer_id(ctx)) makeleaf(ctx, ex, ex, kind=K"Identifier", scope_layer=layerid) elseif k == K"var" || k == K"char" || k == K"parens" # Strip "container" nodes @chk numchildren(ex) == 1 expand_forms_1(ctx, ex[1]) + elseif k == K"escape" + # For processing of old-style macros + @chk numchildren(ex) >= 1 "`escape` requires an argument" + if length(ctx.scope_layer_stack) === 1 + throw(MacroExpansionError(ex, "`escape` node in outer context")) + end + top_layer = pop!(ctx.scope_layer_stack) + escaped_ex = expand_forms_1(ctx, ex[1]) + push!(ctx.scope_layer_stack, top_layer) + escaped_ex + elseif k == K"hygienic_scope" + @chk numchildren(ex) >= 2 && ex[2].value isa Module (ex,"`hygienic_scope` requires an AST and a module") + new_layer = ScopeLayer(length(ctx.scope_layers)+1, ex[2].value, + current_layer_id(ctx), true) + push!(ctx.scope_layers, new_layer) + push!(ctx.scope_layer_stack, new_layer.id) + hyg_ex = expand_forms_1(ctx, ex[1]) + pop!(ctx.scope_layer_stack) + hyg_ex elseif k == K"juxtapose" - layerid = get(ex, :scope_layer, ctx.current_layer.id) + layerid = get(ex, :scope_layer, current_layer_id(ctx)) @chk numchildren(ex) == 2 @ast ctx ex [K"call" "*"::K"Identifier"(scope_layer=layerid) @@ -330,7 +440,7 @@ function expand_forms_1(ctx::MacroExpansionContext, ex::SyntaxTree) elseif k == K"<:" || k == K">:" || k == K"-->" # TODO: Should every form get layerid systematically? Or only the ones # which expand_forms_2 needs? - layerid = get(ex, :scope_layer, ctx.current_layer.id) + layerid = get(ex, :scope_layer, current_layer_id(ctx)) mapchildren(e->expand_forms_1(ctx,e), ctx, ex; scope_layer=layerid) else mapchildren(e->expand_forms_1(ctx,e), ctx, ex) @@ -343,13 +453,12 @@ function expand_forms_1(mod::Module, ex::SyntaxTree) scope_layer=LayerId, __macro_ctx__=Nothing, meta=CompileHints) - layers = ScopeLayer[ScopeLayer(1, mod, false)] - ctx = MacroExpansionContext(graph, Bindings(), layers, layers[1]) + ctx = MacroExpansionContext(graph, mod) ex2 = expand_forms_1(ctx, reparent(ctx, ex)) graph2 = delete_attributes(graph, :__macro_ctx__) # TODO: Returning the context with pass-specific mutable data is a bad way - # to carry state into the next pass. - ctx2 = MacroExpansionContext(graph2, ctx.bindings, ctx.scope_layers, - ctx.current_layer) + # to carry state into the next pass. We might fix this by attaching such + # data to the graph itself as global attributes? + ctx2 = MacroExpansionContext(graph2, ctx.bindings, ctx.scope_layers, LayerId[]) return ctx2, reparent(ctx2, ex2) end diff --git a/JuliaLowering/src/runtime.jl b/JuliaLowering/src/runtime.jl index 23252400fd770..9d9e0e5087e22 100644 --- a/JuliaLowering/src/runtime.jl +++ b/JuliaLowering/src/runtime.jl @@ -306,12 +306,11 @@ function (g::GeneratedFunctionStub)(world::UInt, source::Method, @nospecialize a ) # Macro expansion - layers = ScopeLayer[ScopeLayer(1, mod, false)] - ctx1 = MacroExpansionContext(graph, Bindings(), layers, layers[1]) + ctx1 = MacroExpansionContext(graph, mod) # Run code generator - this acts like a macro expander and like a macro # expander it gets a MacroContext. - mctx = MacroContext(syntax_graph(ctx1), g.srcref, layers[1]) + mctx = MacroContext(syntax_graph(ctx1), g.srcref, ctx1.scope_layers[end]) ex0 = g.gen(mctx, args...) if ex0 isa SyntaxTree if !is_compatible_graph(ctx1, ex0) @@ -326,11 +325,11 @@ function (g::GeneratedFunctionStub)(world::UInt, source::Method, @nospecialize a # Expand any macros emitted by the generator ex1 = expand_forms_1(ctx1, reparent(ctx1, ex0)) ctx1 = MacroExpansionContext(delete_attributes(graph, :__macro_ctx__), - ctx1.bindings, ctx1.scope_layers, ctx1.current_layer) + ctx1.bindings, ctx1.scope_layers, LayerId[]) ex1 = reparent(ctx1, ex1) # Desugaring - ctx2, ex2 = expand_forms_2( ctx1, ex1) + ctx2, ex2 = expand_forms_2(ctx1, ex1) # Wrap expansion in a non-toplevel lambda and run scope resolution ex2 = @ast ctx2 ex0 [K"lambda"(is_toplevel_thunk=false) @@ -342,12 +341,11 @@ function (g::GeneratedFunctionStub)(world::UInt, source::Method, @nospecialize a ] ex2 ] - ctx3, ex3 = resolve_scopes( ctx2, ex2) - + ctx3, ex3 = resolve_scopes(ctx2, ex2) # Rest of lowering ctx4, ex4 = convert_closures(ctx3, ex3) - ctx5, ex5 = linearize_ir( ctx4, ex4) + ctx5, ex5 = linearize_ir(ctx4, ex4) ci = to_lowered_expr(mod, ex5) @assert ci isa Core.CodeInfo return ci @@ -414,3 +412,8 @@ function lookup_method_instance(func, args, world::Integer) world::Csize_t)::Ptr{Cvoid} return mi == C_NULL ? nothing : unsafe_pointer_to_objref(mi) end + +# Like `Base.methods()` but with world age support +function methods_in_world(func, arg_sig, world) + Base._methods(func, arg_sig, -1, world) +end diff --git a/JuliaLowering/src/syntax_graph.jl b/JuliaLowering/src/syntax_graph.jl index 6eb0737c4c85e..d883b2464d945 100644 --- a/JuliaLowering/src/syntax_graph.jl +++ b/JuliaLowering/src/syntax_graph.jl @@ -429,7 +429,7 @@ attrsummary(name, value::Number) = "$name=$value" function _value_string(ex) k = kind(ex) str = k == K"Identifier" || k == K"MacroName" || is_operator(k) ? ex.name_val : - k == K"Placeholder" ? ex.name_val : + k == K"Placeholder" ? ex.name_val : k == K"SSAValue" ? "%" : k == K"BindingId" ? "#" : k == K"label" ? "label" : @@ -546,9 +546,16 @@ JuliaSyntax.byte_range(ex::SyntaxTree) = byte_range(sourceref(ex)) function JuliaSyntax._expr_leaf_val(ex::SyntaxTree, _...) name = get(ex, :name_val, nothing) - !isnothing(name) && return Symbol(name) - name = get(ex, :value, nothing) - return name + if !isnothing(name) + n = Symbol(name) + if hasattr(ex, :scope_layer) + Expr(:scope_layer, n, ex.scope_layer) + else + n + end + else + get(ex, :value, nothing) + end end Base.Expr(ex::SyntaxTree) = JuliaSyntax.to_expr(ex) @@ -588,6 +595,21 @@ function _find_SyntaxTree_macro(ex, line) return nothing # Will get here if multiple children are on the same line. end +# Translate JuliaLowering hygiene to esc() for use in @SyntaxTree +function _scope_layer_1_to_esc!(ex) + if ex isa Expr + if ex.head == :scope_layer + @assert ex.args[2] === 1 + return esc(_scope_layer_1_to_esc!(ex.args[1])) + else + map!(_scope_layer_1_to_esc!, ex.args, ex.args) + return ex + end + else + return ex + end +end + """ Macro to construct quoted SyntaxTree literals (instead of quoted Expr literals) in normal Julia source code. @@ -630,10 +652,10 @@ macro SyntaxTree(ex_old) # discover the piece of AST which should be returned. ex = _find_SyntaxTree_macro(full_ex, __source__.line) # 4. Do the first step of JuliaLowering's syntax lowering to get - # synax interpolations to work + # syntax interpolations to work _, ex1 = expand_forms_1(__module__, ex) @assert kind(ex1) == K"call" && ex1[1].value == interpolate_ast - esc(Expr(:call, interpolate_ast, ex1[2][1], map(Expr, ex1[3:end])...)) + Expr(:call, :interpolate_ast, ex1[2][1], map(e->_scope_layer_1_to_esc!(Expr(e)), ex1[3:end])...) end #------------------------------------------------------------------------------- diff --git a/JuliaLowering/test/demo.jl b/JuliaLowering/test/demo.jl index 3aa83937ee6f4..c0698b022dcd6 100644 --- a/JuliaLowering/test/demo.jl +++ b/JuliaLowering/test/demo.jl @@ -43,7 +43,7 @@ end #------------------------------------------------------------------------------- # Module containing macros used in the demo. -define_macros = false +define_macros = true if !define_macros eval(:(module M end)) else @@ -95,6 +95,34 @@ eval(JuliaLowering.@SyntaxTree :(baremodule M end end + macro call_show(x) + quote + z = "z in @call_show" + @show z $x + end + end + + macro call_info(x) + quote + z = "z in @call_info" + @info "hi" z $x + end + end + + macro call_oldstyle_macro(y) + quote + x = "x in call_oldstyle_macro" + @oldstyle $y x + end + end + + macro newstyle(x, y, z) + quote + x = "x in @newstyle" + ($x, $y, $z, x) + end + end + macro set_a_global(val) quote global a_global = $val @@ -182,6 +210,16 @@ eval(JuliaLowering.@SyntaxTree :(baremodule M end)) end + +Base.eval(M, :( +macro oldstyle(a, b) + quote + x = "x in @oldstyle" + @newstyle $(esc(a)) $(esc(b)) x + end +end +)) + # #------------------------------------------------------------------------------- # Demos of the prototype @@ -794,7 +832,24 @@ end """ src = """ -cglobal(:jl_uv_stdin, Ptr{Cvoid}) +let + z = "z in outer ctx" + @call_show z +end +""" + +src = """ +let + x = "x in outer ctx" + @call_oldstyle_macro x +end +""" + +src = """ +let + z = "z in outer ctx" + @call_info z +end """ ex = parsestmt(SyntaxTree, src, filename="foo.jl") diff --git a/JuliaLowering/test/macros.jl b/JuliaLowering/test/macros.jl index a75d0feceb8f1..34d028876c09f 100644 --- a/JuliaLowering/test/macros.jl +++ b/JuliaLowering/test/macros.jl @@ -4,7 +4,7 @@ using JuliaLowering, Test module test_mod end -JuliaLowering.include_string(test_mod, """ +JuliaLowering.include_string(test_mod, raw""" module M using JuliaLowering: JuliaLowering, @ast, @chk, adopt_scope using JuliaSyntax @@ -28,51 +28,49 @@ module M macro foo(ex) :(begin x = "`x` from @foo" - (x, someglobal, \$ex) + (x, someglobal, $ex) end) end # Set `a_global` in M macro set_a_global(val) :(begin - global a_global = \$val + global a_global = $val end) end macro set_other_global(ex, val) :(begin - global \$ex = \$val + global $ex = $val end) end macro set_global_in_parent(ex) e1 = adopt_scope(:(sym_introduced_from_M), __context__) quote - \$e1 = \$ex + $e1 = $ex nothing end end macro inner() - :(2) + :(y) end macro outer() - :((1, @inner)) - end - - # # Recursive macro call - # # TODO: Need branching! - # macro recursive(N) - # Nval = N.value #::Int - # if Nval < 1 - # return N - # end - # quote - # x = \$N - # (@recursive \$(Nval-1), x) - # end - # end + :((x, @inner)) + end + + macro recursive(N) + Nval = N.value::Int + if Nval < 1 + return N + end + quote + x = $N + (x, @recursive $(Nval-1)) + end + end end """) @@ -84,6 +82,7 @@ end """) == ("`x` from @foo", "global in module M", "`x` from outer scope") @test !isdefined(test_mod.M, :x) + @test JuliaLowering.include_string(test_mod, """ #line1 (M.@__MODULE__(), M.@__FILE__(), M.@__LINE__()) @@ -106,43 +105,21 @@ JuliaLowering.include_string(test_mod, "M.@set_other_global global_in_test_mod 1 @test !isdefined(test_mod.M, :global_in_test_mod) @test test_mod.global_in_test_mod == 100 -Base.eval(test_mod.M, :( -# Recursive macro call -function var"@recursive"(mctx, N) - @chk kind(N) == K"Integer" - Nval = N.value::Int - if Nval < 1 - return N - end - @ast mctx (@HERE) [K"block" - [K"="(@HERE) - "x"::K"Identifier"(@HERE) - N - ] - [K"tuple"(@HERE) - "x"::K"Identifier"(@HERE) - [K"macrocall"(@HERE) - "@recursive"::K"Identifier" - (Nval-1)::K"Integer" - ] - ] - ] -end -)) - @test JuliaLowering.include_string(test_mod, """ M.@recursive 3 """) == (3, (2, (1, 0))) -@test let - ex = JuliaLowering.parsestmt(JuliaLowering.SyntaxTree, "M.@outer()", filename="foo.jl") - expanded = JuliaLowering.macroexpand(test_mod, ex) - JuliaLowering.sourcetext.(JuliaLowering.flattened_provenance(expanded[2])) -end == [ +ex = JuliaLowering.parsestmt(JuliaLowering.SyntaxTree, "M.@outer()", filename="foo.jl") +ctx, expanded = JuliaLowering.expand_forms_1(test_mod, ex) +@test JuliaLowering.sourcetext.(JuliaLowering.flattened_provenance(expanded[2])) == [ "M.@outer()" "@inner" - "2" + "y" ] +# Layer parenting +@test expanded[1].scope_layer == 2 +@test expanded[2].scope_layer == 3 +@test getfield.(ctx.scope_layers, :parent_layer) == [0,1,2] JuliaLowering.include_string(test_mod, """ f_throw(x) = throw(x) @@ -194,4 +171,126 @@ let (err, st) = try @test any(sf->sf.func===:ccall_macro_parse, st) end +# Tests for interop between old and new-style macros + +# Hygiene interop +JuliaLowering.include_string(test_mod, raw""" + macro call_oldstyle_macro(a) + quote + x = "x in call_oldstyle_macro" + @oldstyle $a x + end + end + + macro newstyle(a, b, c) + quote + x = "x in @newstyle" + ($a, $b, $c, x) + end + end +""") +# TODO: Make this macro lowering go via JuliaSyntax rather than the flisp code +# (JuliaSyntax needs support for old-style quasiquote processing) +Base.eval(test_mod, :( +macro oldstyle(a, b) + quote + x = "x in @oldstyle" + @newstyle $(esc(a)) $(esc(b)) x + end +end +)) +@test JuliaLowering.include_string(test_mod, """ +let x = "x in outer scope" + @call_oldstyle_macro x +end +""") == ("x in outer scope", + "x in call_oldstyle_macro", + "x in @oldstyle", + "x in @newstyle") + +# Old style unhygenic escaping with esc() +Base.eval(test_mod, :( +macro oldstyle_unhygenic() + esc(:x) +end +)) +@test JuliaLowering.include_string(test_mod, """ +let x = "x in outer scope" + @oldstyle_unhygenic +end +""") == "x in outer scope" + +# Exceptions in old style macros +Base.eval(test_mod, :( +macro oldstyle_error() + error("Some error in old style macro") +end +)) +@test try + JuliaLowering.include_string(test_mod, """ + @oldstyle_error + """) +catch exc + sprint(showerror, exc) +end == """ +MacroExpansionError while expanding @oldstyle_error in module Main.macros.test_mod: +@oldstyle_error +└─────────────┘ ── Error expanding macro +Caused by: +Some error in old style macro""" + +# Old-style macros returning non-Expr values +Base.eval(test_mod, :( +macro oldstyle_non_Expr() + 42 +end +)) +@test JuliaLowering.include_string(test_mod, """ +@oldstyle_non_Expr +""") === 42 + +# New-style macros called with the wrong arguments +JuliaLowering.include_string(test_mod, raw""" +macro method_error_test(a) +end +""") +Base.eval(test_mod, :( +macro method_error_test() +end +)) +try + JuliaLowering.include_string(test_mod, raw""" + @method_error_test x y + """) + @test false +catch exc + @test exc isa JuliaLowering.MacroExpansionError + mexc = exc.err + @test mexc isa MethodError + @test mexc.args isa Tuple{JuliaLowering.MacroContext, JuliaLowering.SyntaxTree, JuliaLowering.SyntaxTree} +end + +@testset "calling with old/new macro signatures" begin + # Old defined with 1 arg, new with 2 args, both with 3 (but with different values) + Base.eval(test_mod, :(macro sig_mismatch(x); x; end)) + Base.eval(test_mod, :(macro sig_mismatch(x, y, z); z; end)) + JuliaLowering.include_string(test_mod, "macro sig_mismatch(x, y); x; end") + JuliaLowering.include_string(test_mod, "macro sig_mismatch(x, y, z); x; end") + + @test JuliaLowering.include_string(test_mod, "@sig_mismatch(1)") === 1 + @test JuliaLowering.include_string(test_mod, "@sig_mismatch(1, 2)") === 1 + @test JuliaLowering.include_string(test_mod, "@sig_mismatch(1, 2, 3)") === 1 # 3 if we prioritize old sig + err = try + JuliaLowering.include_string(test_mod, "@sig_mismatch(1, 2, 3, 4)") === 1 + catch exc + sprint(showerror, exc, context=:module=>@__MODULE__) + end + @test startswith(err, """ + MacroExpansionError while expanding @sig_mismatch in module Main.macros.test_mod: + @sig_mismatch(1, 2, 3, 4) + └───────────────────────┘ ── Error expanding macro + Caused by: + MethodError: no method matching var"@sig_mismatch"(::JuliaLowering.MacroContext, ::JuliaLowering.SyntaxTree""") +end + end # module macros From 1f9c4b55aeaa9443d11e1a883972772f82d6ecc7 Mon Sep 17 00:00:00 2001 From: Yuchi Yamaguchi Date: Wed, 20 Aug 2025 17:26:29 +0900 Subject: [PATCH 1043/1109] Do not treat static parameter as `is_always_defined` (JuliaLang/JuliaLowering.jl#41) Co-authored-by: Shuhei Kadowaki <40514306+aviatesk@users.noreply.github.com> --- JuliaLowering/src/scope_analysis.jl | 2 +- JuliaLowering/test/closures_ir.jl | 25 ++++++++++++++----------- JuliaLowering/test/functions.jl | 10 ++++++++++ 3 files changed, 25 insertions(+), 12 deletions(-) diff --git a/JuliaLowering/src/scope_analysis.jl b/JuliaLowering/src/scope_analysis.jl index 8c7554622d465..ee0b1498ff4f2 100644 --- a/JuliaLowering/src/scope_analysis.jl +++ b/JuliaLowering/src/scope_analysis.jl @@ -194,7 +194,7 @@ function add_lambda_args(ctx, var_ids, args, args_kind) "static parameter name not distinct from function argument" throw(LoweringError(arg, msg)) end - is_always_defined = args_kind == :argument || args_kind == :static_parameter + is_always_defined = args_kind == :argument id = init_binding(ctx, arg, varkey, args_kind; is_nospecialize=getmeta(arg, :nospecialize, false), is_always_defined=is_always_defined) diff --git a/JuliaLowering/test/closures_ir.jl b/JuliaLowering/test/closures_ir.jl index 6495b7450fc90..475eea8c71938 100644 --- a/JuliaLowering/test/closures_ir.jl +++ b/JuliaLowering/test/closures_ir.jl @@ -206,7 +206,7 @@ end 1 (method TestMod.f) 2 latestworld 3 (call core.svec :T) -4 (call core.svec false) +4 (call core.svec true) 5 (call JuliaLowering.eval_closure_type TestMod :#f#g##2 %₃ %₄) 6 latestworld 7 TestMod.#f#g##2 @@ -215,11 +215,17 @@ end 10 SourceLocation::2:14 11 (call core.svec %₈ %₉ %₁₀) 12 --- method core.nothing %₁₁ - slots: [slot₁/#self#(!read)] + slots: [slot₁/#self#(!read) slot₂/T(!read)] 1 TestMod.use 2 (call core.getfield slot₁/#self# :T) - 3 (call %₁ %₂) - 4 (return %₃) + 3 (call core.isdefined %₂ :contents) + 4 (gotoifnot %₃ label₆) + 5 (goto label₈) + 6 (newvar slot₂/T) + 7 slot₂/T + 8 (call core.getfield %₂ :contents) + 9 (call %₁ %₈) + 10 (return %₉) 13 latestworld 14 (= slot₁/T (call core.TypeVar :T)) 15 TestMod.f @@ -234,13 +240,10 @@ end slots: [slot₁/#self#(!read) slot₂/_(!read) slot₃/g] 1 TestMod.#f#g##2 2 static_parameter₁ - 3 (call core.typeof %₂) - 4 (call core.apply_type %₁ %₃) - 5 static_parameter₁ - 6 (new %₄ %₅) - 7 (= slot₃/g %₆) - 8 slot₃/g - 9 (return %₈) + 3 (new %₁ %₂) + 4 (= slot₃/g %₃) + 5 slot₃/g + 6 (return %₅) 24 latestworld 25 TestMod.f 26 (return %₂₅) diff --git a/JuliaLowering/test/functions.jl b/JuliaLowering/test/functions.jl index 634bb13e260f9..ed63ec818b5b4 100644 --- a/JuliaLowering/test/functions.jl +++ b/JuliaLowering/test/functions.jl @@ -192,6 +192,16 @@ end (1, [1.0], 2, Float64, Vector{Float64}, Int), (1, [1.0], -1.0, Float64, Vector{Float64}, Float64)) + @test JuliaLowering.include_string(test_mod, """ + begin + function f_def_typevar_vararg_undef(x::T, y::Vararg{S}) where {T,S} + (x, y, @isdefined S) + end + + (f_def_typevar_vararg_undef(1), f_def_typevar_vararg_undef(1,2), f_def_typevar_vararg_undef(1,2,3)) + end + """) === ((1, (), false), (1, (2,), true), (1, (2, 3), true)) + @test JuliaLowering.include_string(test_mod, """ begin function f_def_slurp(x=1, ys...) From d2d1698cc83457645ce4be84ade9870806074cb6 Mon Sep 17 00:00:00 2001 From: Yuchi Yamaguchi Date: Wed, 20 Aug 2025 18:04:04 +0900 Subject: [PATCH 1044/1109] Fix handling lower bound in type parameter (JuliaLang/JuliaLowering.jl#37) Co-authored-by: Shuhei Kadowaki <40514306+aviatesk@users.noreply.github.com> --- JuliaLowering/src/desugaring.jl | 2 +- JuliaLowering/test/functions.jl | 8 ++++++++ JuliaLowering/test/functions_ir.jl | 28 ++++++++++++++++++++++++++++ JuliaLowering/test/typedefs_ir.jl | 8 ++++---- 4 files changed, 41 insertions(+), 5 deletions(-) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 573b712c63d6a..008ef131b8899 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -3266,7 +3266,7 @@ function analyze_typevar(ctx, ex) (ex[1], nothing, ex[2]) elseif k == K">:" && numchildren(ex) == 2 kind(ex[2]) == K"Identifier" || throw(LoweringError(ex[2], "expected type name")) - (ex[2], ex[1], nothing) + (ex[1], ex[2], nothing) else throw(LoweringError(ex, "expected type name or type bounds")) end diff --git a/JuliaLowering/test/functions.jl b/JuliaLowering/test/functions.jl index ed63ec818b5b4..f8c50fb6ff6d5 100644 --- a/JuliaLowering/test/functions.jl +++ b/JuliaLowering/test/functions.jl @@ -202,6 +202,14 @@ end end """) === ((1, (), false), (1, (2,), true), (1, (2, 3), true)) + @test JuliaLowering.include_string(test_mod, """ + begin + f_def_typevar_with_lowerbound(x::T) where {T>:Int} = + (x, @isdefined(T)) + (f_def_typevar_with_lowerbound(1), f_def_typevar_with_lowerbound(1.0)) + end + """) == ((1, true), (1.0, false)) + @test JuliaLowering.include_string(test_mod, """ begin function f_def_slurp(x=1, ys...) diff --git a/JuliaLowering/test/functions_ir.jl b/JuliaLowering/test/functions_ir.jl index 525db70b39bda..b5318c648b2ad 100644 --- a/JuliaLowering/test/functions_ir.jl +++ b/JuliaLowering/test/functions_ir.jl @@ -199,6 +199,34 @@ end 18 TestMod.f 19 (return %₁₈) +######################################## +# Static parameter with lower bound +function f(::S{T}) where T >: X + T +end +#--------------------- +1 (method TestMod.f) +2 latestworld +3 TestMod.X +4 (= slot₁/T (call core.TypeVar :T %₃ core.Any)) +5 TestMod.f +6 (call core.Typeof %₅) +7 TestMod.S +8 slot₁/T +9 (call core.apply_type %₇ %₈) +10 (call core.svec %₆ %₉) +11 slot₁/T +12 (call core.svec %₁₁) +13 SourceLocation::1:10 +14 (call core.svec %₁₀ %₁₂ %₁₃) +15 --- method core.nothing %₁₄ + slots: [slot₁/#self#(!read) slot₂/_(!read)] + 1 static_parameter₁ + 2 (return %₁) +16 latestworld +17 TestMod.f +18 (return %₁₇) + ######################################## # Static parameter which is used only in the bounds of another static parameter # See https://github.com/JuliaLang/julia/issues/49275 diff --git a/JuliaLowering/test/typedefs_ir.jl b/JuliaLowering/test/typedefs_ir.jl index 1f7395ece1f8d..1fcc96fc0ffe7 100644 --- a/JuliaLowering/test/typedefs_ir.jl +++ b/JuliaLowering/test/typedefs_ir.jl @@ -25,10 +25,10 @@ A where X <: UB # where expression with lower bound A where X >: LB #--------------------- -1 TestMod.X -2 (call core.TypeVar :LB %₁ core.Any) -3 (= slot₁/LB %₂) -4 slot₁/LB +1 TestMod.LB +2 (call core.TypeVar :X %₁ core.Any) +3 (= slot₁/X %₂) +4 slot₁/X 5 TestMod.A 6 (call core.UnionAll %₄ %₅) 7 (return %₆) From 9a42f2bf3c10ecd3b8a3c06517a9b91e89c9f271 Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki <40514306+aviatesk@users.noreply.github.com> Date: Wed, 20 Aug 2025 21:45:20 +0900 Subject: [PATCH 1045/1109] support splat in the array syntax (JuliaLang/JuliaLowering.jl#40) - fixes c42f/JuliaLowering.jl#38 --- JuliaLowering/src/desugaring.jl | 87 +++++++++++++++++---------------- JuliaLowering/test/arrays.jl | 20 +++++++- JuliaLowering/test/arrays_ir.jl | 45 ++++++++++++++++- 3 files changed, 109 insertions(+), 43 deletions(-) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 008ef131b8899..22127725c1021 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -1,4 +1,4 @@ -# Lowering Pass 2 - syntax desugaring +# Lowering Pass 2 - syntax desugaring struct DesugaringContext{GraphType} <: AbstractLoweringContext graph::GraphType @@ -74,7 +74,7 @@ end function check_no_parameters(ex::SyntaxTree, msg) i = find_parameters_ind(children(ex)) - if i > 0 + if i > 0 throw(LoweringError(ex[i], msg)) end end @@ -523,7 +523,7 @@ function expand_compare_chain(ctx, ex) end #------------------------------------------------------------------------------- -# Expansion of array indexing +# Expansion of array indexing function _arg_to_temp(ctx, stmts, ex, eq_is_kw=false) k = kind(ex) if is_effect_free(ex) @@ -939,6 +939,29 @@ function expand_comprehension_to_loops(ctx, ex) ] end +function expand_splat(ctx, ex, topfunc, args) + return @ast ctx ex [K"call" + "_apply_iterate"::K"core" + "iterate"::K"top" + topfunc + expand_forms_2(ctx, _wrap_unsplatted_args(ctx, ex, args))... + ] +end + +function expand_array(ctx, ex, topfunc) + args = children(ex) + check_no_assignment(args) + topfunc = @ast ctx ex topfunc::K"top" + if any(kind(arg) == K"..." for arg in args) + expand_splat(ctx, ex, topfunc, args) + else + @ast ctx ex [K"call" + topfunc + expand_forms_2(ctx, args)... + ] + end +end + #------------------------------------------------------------------------------- # Expansion of array concatenation notation `[a b ; c d]` etc @@ -1735,10 +1758,10 @@ function expand_ccall(ctx, ex) if is_core_Any(raw_argt) push!(unsafe_args, exarg) else - cconverted_arg = emit_assign_tmp(sctx, + cconverted_arg = emit_assign_tmp(sctx, @ast ctx argt [K"call" "cconvert"::K"top" - argt + argt exarg ] ) @@ -1856,12 +1879,7 @@ function expand_call(ctx, ex) end if any(kind(arg) == K"..." for arg in args) # Splatting, eg, `f(a, xs..., b)` - @ast ctx ex [K"call" - "_apply_iterate"::K"core" - "iterate"::K"top" - expand_forms_2(ctx, farg) - expand_forms_2(ctx, _wrap_unsplatted_args(ctx, ex, args))... - ] + expand_splat(ctx, ex, expand_forms_2(ctx, farg), args) elseif kind(farg) == K"Identifier" && farg.name_val == "include" # world age special case r = ssavar(ctx, ex) @@ -1905,7 +1923,7 @@ function expand_dot(ctx, ex) throw(LoweringError(rhs, "Unrecognized field access syntax")) end @ast ctx ex [K"call" - "getproperty"::K"top" + "getproperty"::K"top" ex[1] rhs ] @@ -3092,7 +3110,7 @@ function expand_function_def(ctx, ex, docs, rewrite_call=identity, rewrite_body= ] ] ] - [K"removable" + [K"removable" isnothing(bare_func_name) ? "nothing"::K"core" : bare_func_name ] ] @@ -3131,7 +3149,7 @@ end function expand_arrow(ctx, ex) @chk numchildren(ex) == 2 - expand_forms_2(ctx, + expand_forms_2(ctx, @ast ctx ex [K"function" expand_arrow_arglist(ctx, ex[1], string(kind(ex))) ex[2] @@ -3515,7 +3533,7 @@ function default_inner_constructors(ctx, srcref, global_struct_name, [K"curly" "Type"::K"core" [K"curly" - global_struct_name + global_struct_name typevar_names... ] ] @@ -3570,7 +3588,7 @@ function default_outer_constructor(ctx, srcref, global_struct_name, typevar_names, typevar_stmts, field_names, field_types) @ast ctx srcref [K"function" [K"where" - [K"call" + [K"call" # We use `::Type{$global_struct_name}` here rather than just # `struct_name` because global_struct_name is a binding to a # type - we know we're not creating a new `Function` and @@ -4189,7 +4207,7 @@ function expand_module(ctx, ex::SyntaxTree) "x" ::K"Identifier" ] [K"call" - "eval" ::K"core" + "eval" ::K"core" modname ::K"Identifier" "x" ::K"Identifier" ] @@ -4216,11 +4234,11 @@ function expand_module(ctx, ex::SyntaxTree) "x" ::K"Identifier" ] [K"call" - "_call_latest" ::K"core" - "include" ::K"top" - "mapexpr" ::K"Identifier" - modname ::K"Identifier" - "x" ::K"Identifier" + "_call_latest" ::K"core" + "include" ::K"top" + "mapexpr" ::K"Identifier" + modname ::K"Identifier" + "x" ::K"Identifier" ] ] ] @@ -4254,7 +4272,7 @@ end """ Lowering pass 2 - desugaring - + This pass simplifies expressions by expanding complicated syntax sugar into a small set of core syntactic forms. For example, field access syntax `a.b` is expanded to a function call `getproperty(a, :b)`. @@ -4377,7 +4395,7 @@ function expand_forms_2(ctx::DesugaringContext, ex::SyntaxTree, docs=nothing) if numchildren(ex) == 1 && kind(ex[1]) == K"String" ex[1] else - @ast ctx ex [K"call" + @ast ctx ex [K"call" "string"::K"top" expand_forms_2(ctx, children(ex))... ] @@ -4393,7 +4411,7 @@ function expand_forms_2(ctx::DesugaringContext, ex::SyntaxTree, docs=nothing) elseif any_assignment(children(ex)) expand_forms_2(ctx, expand_named_tuple(ctx, ex, children(ex))) else - expand_forms_2(ctx, @ast ctx ex [K"call" + expand_forms_2(ctx, @ast ctx ex [K"call" "tuple"::K"core" children(ex)... ]) @@ -4438,23 +4456,11 @@ function expand_forms_2(ctx::DesugaringContext, ex::SyntaxTree, docs=nothing) ] elseif k == K"vect" check_no_parameters(ex, "unexpected semicolon in array expression") - check_no_assignment(children(ex)) - @ast ctx ex [K"call" - "vect"::K"top" - expand_forms_2(ctx, children(ex))... - ] + expand_array(ctx, ex, "vect") elseif k == K"hcat" - check_no_assignment(children(ex)) - @ast ctx ex [K"call" - "hcat"::K"top" - expand_forms_2(ctx, children(ex))... - ] + expand_array(ctx, ex, "hcat") elseif k == K"typed_hcat" - check_no_assignment(children(ex)) - @ast ctx ex [K"call" - "typed_hcat"::K"top" - expand_forms_2(ctx, children(ex))... - ] + expand_array(ctx, ex, "typed_hcat") elseif k == K"opaque_closure" expand_forms_2(ctx, expand_opaque_closure(ctx, ex)) elseif k == K"vcat" || k == K"typed_vcat" @@ -4513,4 +4519,3 @@ function expand_forms_2(ctx::MacroExpansionContext, ex::SyntaxTree) ex1 = expand_forms_2(ctx1, reparent(ctx1, ex)) ctx1, ex1 end - diff --git a/JuliaLowering/test/arrays.jl b/JuliaLowering/test/arrays.jl index f39e4a8002f24..bc3e43af0b089 100644 --- a/JuliaLowering/test/arrays.jl +++ b/JuliaLowering/test/arrays.jl @@ -1,3 +1,5 @@ +using Test, JuliaLowering + @testset "Array syntax" begin test_mod = Module() @@ -23,6 +25,22 @@ end Int[1.0 2.0 3.0] """) ≅ [1 2 3] +# splat with vect/hcat/typed_hcat +@test JuliaLowering.include_string(test_mod, """ +let xs = [1,2,3] + [0, xs...] +end +""") ≅ [0,1,2,3] +@test JuliaLowering.include_string(test_mod, """ +let xs = [1,2,3] + [0 xs...] +end +""") ≅ [0 1 2 3] +@test JuliaLowering.include_string(test_mod, """ +let xs = [1,2,3] + Int[0 xs...] +end +""") ≅ Int[0 1 2 3] # vcat @test JuliaLowering.include_string(test_mod, """ @@ -127,4 +145,4 @@ let end """) == (3, 7, 2, 5) -end +end # @testset "Array syntax" begin diff --git a/JuliaLowering/test/arrays_ir.jl b/JuliaLowering/test/arrays_ir.jl index c2fd227a08a38..4595603e4b79d 100644 --- a/JuliaLowering/test/arrays_ir.jl +++ b/JuliaLowering/test/arrays_ir.jl @@ -5,6 +5,29 @@ 1 (call top.vect 10 20 30) 2 (return %₁) +######################################## +# vect with splat +[x, xs...] +#--------------------- +1 TestMod.x +2 (call core.tuple %₁) +3 TestMod.xs +4 (call core._apply_iterate top.iterate top.vect %₂ %₃) +5 (return %₄) + +######################################## +# vect with splats +[x, xs..., y, ys...] +#--------------------- +1 TestMod.x +2 (call core.tuple %₁) +3 TestMod.xs +4 TestMod.y +5 (call core.tuple %₄) +6 TestMod.ys +7 (call core._apply_iterate top.iterate top.vect %₂ %₃ %₅ %₆) +8 (return %₇) + ######################################## # Error: vect syntax with parameters [10, 20; 30] @@ -28,6 +51,16 @@ LoweringError: 1 (call top.hcat 10 20 30) 2 (return %₁) +######################################## +# hcat with splat +[x xs...] +#--------------------- +1 TestMod.x +2 (call core.tuple %₁) +3 TestMod.xs +4 (call core._apply_iterate top.iterate top.hcat %₂ %₃) +5 (return %₄) + ######################################## # typed hcat syntax T[10 20 30] @@ -36,6 +69,17 @@ T[10 20 30] 2 (call top.typed_hcat %₁ 10 20 30) 3 (return %₂) +######################################## +# typed hcat syntax with splat +T[x xs...] +#--------------------- +1 TestMod.T +2 TestMod.x +3 (call core.tuple %₁ %₂) +4 TestMod.xs +5 (call core._apply_iterate top.iterate top.typed_hcat %₃ %₄) +6 (return %₅) + ######################################## # Error: hcat syntax with embedded assignments [10 20 a=40] @@ -452,4 +496,3 @@ a[] = rhs 2 TestMod.a 3 (call top.setindex! %₂ %₁) 4 (return %₁) - From 66dde2f615d0c3eac980c01f38e41721ac12a392 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Thu, 21 Aug 2025 22:14:26 +1000 Subject: [PATCH 1046/1109] Expand `import/using` to newer Base runtime calls (JuliaLang/JuliaLowering.jl#43) Calls to `import` and `using` are expanded by lowering as of the changes in https://github.com/JuliaLang/julia/pull/57965 and no longer dealt with by the C function `jl_toplevel_eval_flex`. This implies we can't use `eval()` for these if we want to activate JuliaLowering in Core, or we'll hit a stack overflow. I've chosen to duplicate the flisp lowering here for consistency and import paths are thus lowered to a restricted kind of quoted `Expr`. (It's mildly annoying to rely on quoted `Expr` in the lowered paths than the previous use of `Core.svec` but deleting the svec representation allows us to use `Base._eval_import` and `Base._eval_using` directly so seems like a worthy simplification.) Similarly, use a precomputed vector of names in public/export expansion - this list can be computed at expansion time rather than emitting each element into the lowered code individually. Includes minor test+CI fixes julia 1.12 in support of JETLS. --- JuliaLowering/.github/workflows/CI.yml | 1 + JuliaLowering/src/desugaring.jl | 105 ++++++++++++++++--------- JuliaLowering/src/linear_ir.jl | 3 + JuliaLowering/src/runtime.jl | 50 +++++------- JuliaLowering/test/hooks.jl | 30 +++---- JuliaLowering/test/import.jl | 33 ++++++++ JuliaLowering/test/import_ir.jl | 47 ++++++----- 7 files changed, 169 insertions(+), 100 deletions(-) diff --git a/JuliaLowering/.github/workflows/CI.yml b/JuliaLowering/.github/workflows/CI.yml index 7ed1133e01315..49cf0a37c2231 100644 --- a/JuliaLowering/.github/workflows/CI.yml +++ b/JuliaLowering/.github/workflows/CI.yml @@ -19,6 +19,7 @@ jobs: fail-fast: false matrix: version: + - '~1.12.0-rc1' - 'nightly' os: - ubuntu-latest diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 22127725c1021..bed0e7b376fc5 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -4098,7 +4098,9 @@ end #------------------------------------------------------------------------------- # Expand import / using / export -function _append_importpath(ctx, path_spec, path) +function expand_importpath(path) + @chk kind(path) == K"importpath" + path_spec = Expr(:.) prev_was_dot = true for component in children(path) k = kind(component) @@ -4114,13 +4116,12 @@ function _append_importpath(ctx, path_spec, path) throw(LoweringError(component, "invalid import path: `.` in identifier path")) end prev_was_dot = is_dot - push!(path_spec, @ast(ctx, component, name::K"String")) + push!(path_spec.args, Symbol(name)) end - path_spec + return path_spec end -function expand_import(ctx, ex) - is_using = kind(ex) == K"using" +function expand_import_or_using(ctx, ex) if kind(ex[1]) == K":" # import M: x.y as z, w # (import (: (importpath M) (as (importpath x y) z) (importpath w))) @@ -4131,57 +4132,87 @@ function expand_import(ctx, ex) # (call core.svec 2 "x" "y" "z" 1 "w" "w")) @chk numchildren(ex[1]) >= 2 from = ex[1][1] - @chk kind(from) == K"importpath" - from_path = @ast ctx from [K"call" - "svec"::K"core" - _append_importpath(ctx, SyntaxList(ctx), from)... - ] + from_path = @ast ctx from QuoteNode(expand_importpath(from))::K"Value" paths = ex[1][2:end] else # import A.B # (using (importpath A B)) - # (call module_import true nothing (call core.svec 1 "w")) + # (call eval_import true nothing (call core.svec 1 "w")) @chk numchildren(ex) >= 1 - from_path = nothing_(ctx, ex) + from_path = nothing paths = children(ex) end - path_spec = SyntaxList(ctx) - for path in paths + # Here we represent the paths as quoted `Expr` data structures + path_specs = SyntaxList(ctx) + for spec in paths as_name = nothing - if kind(path) == K"as" - @chk numchildren(path) == 2 - as_name = path[2] - @chk kind(as_name) == K"Identifier" - path = path[1] + if kind(spec) == K"as" + @chk numchildren(spec) == 2 + @chk kind(spec[2]) == K"Identifier" + as_name = Symbol(spec[2].name_val) + path = QuoteNode(Expr(:as, expand_importpath(spec[1]), as_name)) + else + path = QuoteNode(expand_importpath(spec)) end - @chk kind(path) == K"importpath" - push!(path_spec, @ast(ctx, path, numchildren(path)::K"Integer")) - _append_importpath(ctx, path_spec, path) - push!(path_spec, isnothing(as_name) ? nothing_(ctx, ex) : - @ast(ctx, as_name, as_name.name_val::K"String")) + push!(path_specs, @ast ctx spec path::K"Value") end - @ast ctx ex [K"block" - [K"assert" "toplevel_only"::K"Symbol" [K"inert" ex]] - [K"call" - module_import ::K"Value" + is_using = kind(ex) == K"using" + stmts = SyntaxList(ctx) + if isnothing(from_path) + for spec in path_specs + if is_using + push!(stmts, + @ast ctx spec [K"call" + eval_using ::K"Value" + ctx.mod ::K"Value" + spec + ] + ) + else + push!(stmts, + @ast ctx spec [K"call" + eval_import ::K"Value" + (!is_using) ::K"Bool" + ctx.mod ::K"Value" + "nothing" ::K"top" + spec + ] + ) + end + # latestworld required between imports so that previous symbols + # become visible + push!(stmts, @ast ctx spec (::K"latestworld")) + end + else + push!(stmts, @ast ctx ex [K"call" + eval_import ::K"Value" + (!is_using) ::K"Bool" ctx.mod ::K"Value" - is_using ::K"Value" from_path - [K"call" - "svec"::K"core" - path_spec... - ] - ] + path_specs... + ]) + push!(stmts, @ast ctx ex (::K"latestworld")) + end + @ast ctx ex [K"block" + [K"assert" "toplevel_only"::K"Symbol" [K"inert" ex]] + stmts... + [K"removable" "nothing"::K"core"] ] end # Expand `public` or `export` function expand_public(ctx, ex) + identifiers = String[] + for e in children(ex) + @chk kind(e) == K"Identifier" (ex, "Expected identifier") + push!(identifiers, e.name_val) + end + (e.name_val::K"String" for e in children(ex)) @ast ctx ex [K"call" - module_public::K"Value" + eval_public::K"Value" ctx.mod::K"Value" (kind(ex) == K"export")::K"Bool" - (e.name_val::K"String" for e in children(ex))... + identifiers::K"Value" ] end @@ -4421,7 +4452,7 @@ function expand_forms_2(ctx::DesugaringContext, ex::SyntaxTree, docs=nothing) elseif k == K"module" expand_module(ctx, ex) elseif k == K"import" || k == K"using" - expand_import(ctx, ex) + expand_import_or_using(ctx, ex) elseif k == K"export" || k == K"public" expand_public(ctx, ex) elseif k == K"abstract" || k == K"primitive" diff --git a/JuliaLowering/src/linear_ir.jl b/JuliaLowering/src/linear_ir.jl index bf4c741393880..5e0b4db206016 100644 --- a/JuliaLowering/src/linear_ir.jl +++ b/JuliaLowering/src/linear_ir.jl @@ -855,6 +855,9 @@ function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) end ctx.is_toplevel_thunk && emit_latestworld(ctx, ex) elseif k == K"latestworld" + if needs_value + throw(LoweringError(ex, "misplaced latestsworld")) + end emit_latestworld(ctx, ex) elseif k == K"latestworld_if_toplevel" ctx.is_toplevel_thunk && emit_latestworld(ctx, ex) diff --git a/JuliaLowering/src/runtime.jl b/JuliaLowering/src/runtime.jl index 9d9e0e5087e22..551d58524c5d4 100644 --- a/JuliaLowering/src/runtime.jl +++ b/JuliaLowering/src/runtime.jl @@ -110,7 +110,7 @@ end #-------------------------------------------------- # Functions called by closure conversion -function eval_closure_type(mod, closure_type_name, field_names, field_is_box) +function eval_closure_type(mod::Module, closure_type_name::Symbol, field_names, field_is_box) type_params = Core.TypeVar[] field_types = [] for (name, isbox) in zip(field_names, field_is_box) @@ -129,7 +129,7 @@ function eval_closure_type(mod, closure_type_name, field_names, field_is_box) false, length(field_names)) Core._setsuper!(type, Core.Function) - Base.eval(mod, :(const $closure_type_name = $type)) + @ccall jl_set_const(mod::Module, closure_type_name::Symbol, type::Any)::Cvoid Core._typebody!(false, type, Core.svec(field_types...)) type end @@ -176,39 +176,29 @@ function eval_module(parentmod, modname, body) )) end -# Evaluate content of `import` or `using` statement -function module_import(into_mod::Module, is_using::Bool, - from_mod::Union{Nothing,Core.SimpleVector}, paths::Core.SimpleVector) - # For now, this function converts our lowered representation back to Expr - # and calls eval() to avoid replicating all of the fiddly logic in - # jl_toplevel_eval_flex. - # TODO: ccall Julia runtime functions directly? - # * jl_module_using jl_module_use_as - # * import_module jl_module_import_as - path_args = [] - i = 1 - while i < length(paths) - nsyms = paths[i]::Int - n = i + nsyms - path = Expr(:., [Symbol(paths[i+j]::String) for j = 1:nsyms]...) - as_name = paths[i+nsyms+1] - push!(path_args, isnothing(as_name) ? path : - Expr(:as, path, Symbol(as_name))) - i += nsyms + 2 +const _Base_has_eval_import = isdefined(Base, :_eval_import) + +function eval_import(imported::Bool, to::Module, from::Union{Expr, Nothing}, paths::Expr...) + if _Base_has_eval_import + Base._eval_import(imported, to, from, paths...) + else + head = imported ? :import : :using + ex = isnothing(from) ? + Expr(head, paths...) : + Expr(head, Expr(Symbol(":"), from, paths...)) + Base.eval(to, ex) end - ex = if isnothing(from_mod) - Expr(is_using ? :using : :import, - path_args...) +end + +function eval_using(to::Module, path::Expr) + if _Base_has_eval_import + Base._eval_using(to, path) else - from_path = Expr(:., [Symbol(s::String) for s in from_mod]...) - Expr(is_using ? :using : :import, - Expr(:(:), from_path, path_args...)) + Base.eval(to, Expr(:using, path)) end - eval(into_mod, ex) - nothing end -function module_public(mod::Module, is_exported::Bool, identifiers...) +function eval_public(mod::Module, is_exported::Bool, identifiers) # symbol jl_module_public is no longer exported as of #57765 eval(mod, Expr((is_exported ? :export : :public), map(Symbol, identifiers)...)) end diff --git a/JuliaLowering/test/hooks.jl b/JuliaLowering/test/hooks.jl index 39a3d883a3ede..46ea5c5355557 100644 --- a/JuliaLowering/test/hooks.jl +++ b/JuliaLowering/test/hooks.jl @@ -19,20 +19,22 @@ const JL = JuliaLowering end end - @testset "integration: `JuliaLowering.activate!`" begin - prog = parseall(Expr, "global asdf = 1") - JL.activate!() - out = Core.eval(test_mod, prog) - JL.activate!(false) - @test out === 1 - @test isdefined(test_mod, :asdf) + if isdefined(Core, :_lower) + @testset "integration: `JuliaLowering.activate!`" begin + prog = parseall(Expr, "global asdf = 1") + JL.activate!() + out = Core.eval(test_mod, prog) + JL.activate!(false) + @test out === 1 + @test isdefined(test_mod, :asdf) - prog = parseall(Expr, "module M; x = 1; end") - JL.activate!() - out = Core.eval(test_mod, prog) - JL.activate!(false) - @test out isa Module - @test isdefined(test_mod, :M) - @test isdefined(test_mod.M, :x) + prog = parseall(Expr, "module M; x = 1; end") + JL.activate!() + out = Core.eval(test_mod, prog) + JL.activate!(false) + @test out isa Module + @test isdefined(test_mod, :M) + @test isdefined(test_mod.M, :x) + end end end diff --git a/JuliaLowering/test/import.jl b/JuliaLowering/test/import.jl index 20055174f2b8a..0dc39db83f8cd 100644 --- a/JuliaLowering/test/import.jl +++ b/JuliaLowering/test/import.jl @@ -39,4 +39,37 @@ end """) @test C.D.f === C.E.f +# Test that `using` F brings in the symbol G immediately +F = JuliaLowering.include_string(test_mod, """ +module F + export G + module G + export G_global + G_global = "exported from G" + end +end +""") +JuliaLowering.include_string(test_mod, """ +using .F, .G +""") +@test test_mod.F === F +@test test_mod.G === F.G +@test test_mod.G_global === "exported from G" + +# Similarly, that import makes symbols available immediately +H = JuliaLowering.include_string(test_mod, """ +module H + module I + module J + end + end +end +""") +JuliaLowering.include_string(test_mod, """ +import .H.I, .I.J +""") +@test test_mod.I === H.I +@test test_mod.J === H.I.J +@test test_mod.G_global === "exported from G" + end diff --git a/JuliaLowering/test/import_ir.jl b/JuliaLowering/test/import_ir.jl index ef95a431e7a98..6e17b0f58033f 100644 --- a/JuliaLowering/test/import_ir.jl +++ b/JuliaLowering/test/import_ir.jl @@ -2,36 +2,45 @@ # Basic import import A: b #--------------------- -1 (call core.svec "A") -2 (call core.svec 1 "b" core.nothing) -3 (call JuliaLowering.module_import TestMod false %₁ %₂) -4 (return %₃) +1 (call JuliaLowering.eval_import true TestMod :($(QuoteNode(:($(Expr(:., :A)))))) :($(QuoteNode(:($(Expr(:., :b))))))) +2 latestworld +3 (return core.nothing) ######################################## # Import with paths and `as` import A.B.C: b, c.d as e #--------------------- -1 (call core.svec "A" "B" "C") -2 (call core.svec 1 "b" core.nothing 2 "c" "d" "e") -3 (call JuliaLowering.module_import TestMod false %₁ %₂) -4 (return %₃) +1 (call JuliaLowering.eval_import true TestMod :($(QuoteNode(:($(Expr(:., :A, :B, :C)))))) :($(QuoteNode(:($(Expr(:., :b)))))) :($(QuoteNode(:(c.d as e))))) +2 latestworld +3 (return core.nothing) ######################################## -# Using -using A +# Imports without `from` module need separating with latestworld +import A, B #--------------------- -1 (call core.svec 1 "A" core.nothing) -2 (call JuliaLowering.module_import TestMod true core.nothing %₁) -3 (return %₂) +1 (call JuliaLowering.eval_import true TestMod top.nothing :($(QuoteNode(:($(Expr(:., :A))))))) +2 latestworld +3 (call JuliaLowering.eval_import true TestMod top.nothing :($(QuoteNode(:($(Expr(:., :B))))))) +4 latestworld +5 (return core.nothing) + +######################################## +# Multiple usings need separating with latestworld +using A, B +#--------------------- +1 (call JuliaLowering.eval_using TestMod :($(QuoteNode(:($(Expr(:., :A))))))) +2 latestworld +3 (call JuliaLowering.eval_using TestMod :($(QuoteNode(:($(Expr(:., :B))))))) +4 latestworld +5 (return core.nothing) ######################################## # Using with paths and `as` using A.B.C: b, c.d as e #--------------------- -1 (call core.svec "A" "B" "C") -2 (call core.svec 1 "b" core.nothing 2 "c" "d" "e") -3 (call JuliaLowering.module_import TestMod true %₁ %₂) -4 (return %₃) +1 (call JuliaLowering.eval_import false TestMod :($(QuoteNode(:($(Expr(:., :A, :B, :C)))))) :($(QuoteNode(:($(Expr(:., :b)))))) :($(QuoteNode(:(c.d as e))))) +2 latestworld +3 (return core.nothing) ######################################## # Error: Import not at top level @@ -49,13 +58,13 @@ end # Export export a, b, c #--------------------- -1 (call JuliaLowering.module_public TestMod true "a" "b" "c") +1 (call JuliaLowering.eval_public TestMod true ["a", "b", "c"]) 2 (return %₁) ######################################## # Public public a, b, c #--------------------- -1 (call JuliaLowering.module_public TestMod false "a" "b" "c") +1 (call JuliaLowering.eval_public TestMod false ["a", "b", "c"]) 2 (return %₁) From 9514ab00e18122bbe8cf80a7631d6b0516cfc8cf Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sat, 23 Aug 2025 02:38:51 +1000 Subject: [PATCH 1047/1109] Fix IR generation for `return` in value position (JuliaLang/JuliaLowering.jl#47) `return` in value position is allowed in Julia so that ASTs compose more easily. --- JuliaLowering/src/linear_ir.jl | 2 +- JuliaLowering/test/functions.jl | 10 ++++++++++ JuliaLowering/test/functions_ir.jl | 22 ++++++++++++++++++++++ 3 files changed, 33 insertions(+), 1 deletion(-) diff --git a/JuliaLowering/src/linear_ir.jl b/JuliaLowering/src/linear_ir.jl index 5e0b4db206016..b7c11678a5cd3 100644 --- a/JuliaLowering/src/linear_ir.jl +++ b/JuliaLowering/src/linear_ir.jl @@ -342,7 +342,7 @@ function emit_assignment(ctx, srcref, lhs, rhs, op=K"=") else # in unreachable code (such as after return); still emit the assignment # so that the structure of those uses is preserved - emit_simple_assignment(ctx, srcref, lhs, @ast ctx srcref "nothing"::K"core", op) + emit_simple_assignment(ctx, srcref, lhs, nothing_(ctx, srcref), op) nothing end end diff --git a/JuliaLowering/test/functions.jl b/JuliaLowering/test/functions.jl index f8c50fb6ff6d5..e91e5142479c5 100644 --- a/JuliaLowering/test/functions.jl +++ b/JuliaLowering/test/functions.jl @@ -159,6 +159,16 @@ begin end """) +@test JuliaLowering.include_string(test_mod, """ +x = 0 +function f_return_in_value_pos() + global x + x = return 42 +end + +(f_return_in_value_pos(), x) +""") === (42, 0) + @testset "Default positional arguments" begin @test JuliaLowering.include_string(test_mod, """ begin diff --git a/JuliaLowering/test/functions_ir.jl b/JuliaLowering/test/functions_ir.jl index b5318c648b2ad..34d07a20fccfa 100644 --- a/JuliaLowering/test/functions_ir.jl +++ b/JuliaLowering/test/functions_ir.jl @@ -995,6 +995,28 @@ end 11 TestMod.f 12 (return %₁₁) +######################################## +# Function return in value position is allowed +function f() + x = return 1 +end +#--------------------- +1 (method TestMod.f) +2 latestworld +3 TestMod.f +4 (call core.Typeof %₃) +5 (call core.svec %₄) +6 (call core.svec) +7 SourceLocation::1:10 +8 (call core.svec %₅ %₆ %₇) +9 --- method core.nothing %₈ + slots: [slot₁/#self#(!read) slot₂/x(!read)] + 1 (return 1) + 2 (= slot₂/x core.nothing) +10 latestworld +11 TestMod.f +12 (return %₁₁) + ######################################## # Binding docs to functions """ From 4162a976682fbbf0e7b71094d6cebda025b46633 Mon Sep 17 00:00:00 2001 From: Em Chu <61633163+mlechu@users.noreply.github.com> Date: Fri, 22 Aug 2025 16:08:58 -0700 Subject: [PATCH 1048/1109] Compat behaviours: typeof quoted expressions, macro def signatures (JuliaLang/JuliaLowering.jl#44) Co-authored-by: Shuhei Kadowaki <40514306+aviatesk@users.noreply.github.com> Co-authored-by: Claire Foster --- JuliaLowering/src/desugaring.jl | 74 ++++++++++++++++++++-------- JuliaLowering/src/eval.jl | 14 +++--- JuliaLowering/src/hooks.jl | 2 +- JuliaLowering/src/macro_expansion.jl | 12 +++-- JuliaLowering/src/runtime.jl | 18 +++++-- JuliaLowering/src/syntax_graph.jl | 5 +- JuliaLowering/test/arrays_ir.jl | 1 + JuliaLowering/test/demo.jl | 4 +- JuliaLowering/test/functions_ir.jl | 4 +- JuliaLowering/test/generators_ir.jl | 2 +- JuliaLowering/test/macros.jl | 2 +- JuliaLowering/test/macros_ir.jl | 2 +- JuliaLowering/test/quoting_ir.jl | 6 +-- 13 files changed, 95 insertions(+), 51 deletions(-) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index bed0e7b376fc5..a313e7cda3437 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -5,9 +5,10 @@ struct DesugaringContext{GraphType} <: AbstractLoweringContext bindings::Bindings scope_layers::Vector{ScopeLayer} mod::Module + expr_compat_mode::Bool end -function DesugaringContext(ctx) +function DesugaringContext(ctx, expr_compat_mode::Bool) graph = ensure_attributes(syntax_graph(ctx), kind=Kind, syntax_flags=UInt16, source=SourceAttrType, @@ -15,7 +16,11 @@ function DesugaringContext(ctx) scope_type=Symbol, # :hard or :soft var_id=IdTag, is_toplevel_thunk=Bool) - DesugaringContext(graph, ctx.bindings, ctx.scope_layers, first(ctx.scope_layers).mod) + DesugaringContext(graph, + ctx.bindings, + ctx.scope_layers, + first(ctx.scope_layers).mod, + expr_compat_mode) end #------------------------------------------------------------------------------- @@ -3246,20 +3251,39 @@ function expand_macro_def(ctx, ex) name = sig[1] args = remove_empty_parameters(children(sig)) @chk kind(args[end]) != K"parameters" (args[end], "macros cannot accept keyword arguments") - ret = @ast ctx ex [K"function" - [K"call"(sig) - _make_macro_name(ctx, name) - [K"::" - adopt_scope(@ast(ctx, sig, "__context__"::K"Identifier"), - kind(name) == K"." ? name[1] : name) - MacroContext::K"Value" + scope_ref = kind(name) == K"." ? name[1] : name + if ctx.expr_compat_mode + @ast ctx ex [K"function" + [K"call"(sig) + _make_macro_name(ctx, name) + [K"::" + # TODO: should we be adopting the scope of the K"macro" expression itself? + adopt_scope(@ast(ctx, sig, "__source__"::K"Identifier"), scope_ref) + LineNumberNode::K"Value" + ] + [K"::" + adopt_scope(@ast(ctx, sig, "__module__"::K"Identifier"), scope_ref) + Module::K"Value" + ] + map(e->_apply_nospecialize(ctx, e), args[2:end])... ] - # flisp: We don't mark these @nospecialize because all arguments to - # new macros will be of type SyntaxTree - args[2:end]... + ex[2] ] - ex[2] - ] + else + @ast ctx ex [K"function" + [K"call"(sig) + _make_macro_name(ctx, name) + [K"::" + adopt_scope(@ast(ctx, sig, "__context__"::K"Identifier"), scope_ref) + MacroContext::K"Value" + ] + # flisp: We don't mark these @nospecialize because all arguments to + # new macros will be of type SyntaxTree + args[2:end]... + ] + ex[2] + ] + end end #------------------------------------------------------------------------------- @@ -4284,9 +4308,10 @@ function expand_module(ctx, ex::SyntaxTree) [K"inert" ex] ] [K"call" - eval_module ::K"Value" - ctx.mod ::K"Value" - modname ::K"String" + eval_module ::K"Value" + ctx.mod ::K"Value" + modname ::K"String" + ctx.expr_compat_mode ::K"Bool" [K"inert"(body) [K"toplevel" std_defs @@ -4477,14 +4502,21 @@ function expand_forms_2(ctx::DesugaringContext, ex::SyntaxTree, docs=nothing) elseif k == K"toplevel" # The toplevel form can't be lowered here - it needs to just be quoted # and passed through to a call to eval. - @ast ctx ex [K"block" + ex2 = @ast ctx ex [K"block" [K"assert" "toplevel_only"::K"Symbol" [K"inert" ex]] [K"call" - eval ::K"Value" - ctx.mod ::K"Value" + eval ::K"Value" + ctx.mod ::K"Value" [K"inert" ex] + [K"parameters" + [K"=" + "expr_compat_mode"::K"Identifier" + ctx.expr_compat_mode::K"Bool" + ] + ] ] ] + expand_forms_2(ctx, ex2) elseif k == K"vect" check_no_parameters(ex, "unexpected semicolon in array expression") expand_array(ctx, ex, "vect") @@ -4546,7 +4578,7 @@ function expand_forms_2(ctx::StatementListCtx, args...) end function expand_forms_2(ctx::MacroExpansionContext, ex::SyntaxTree) - ctx1 = DesugaringContext(ctx) + ctx1 = DesugaringContext(ctx, ctx.expr_compat_mode) ex1 = expand_forms_2(ctx1, reparent(ctx1, ex)) ctx1, ex1 end diff --git a/JuliaLowering/src/eval.jl b/JuliaLowering/src/eval.jl index 1f375ab1b6f4b..112639650c1b4 100644 --- a/JuliaLowering/src/eval.jl +++ b/JuliaLowering/src/eval.jl @@ -1,5 +1,5 @@ -function lower(mod::Module, ex0) - ctx1, ex1 = expand_forms_1( mod, ex0) +function lower(mod::Module, ex0, expr_compat_mode=false) + ctx1, ex1 = expand_forms_1( mod, ex0, expr_compat_mode) ctx2, ex2 = expand_forms_2( ctx1, ex1) ctx3, ex3 = resolve_scopes( ctx2, ex2) ctx4, ex4 = convert_closures(ctx3, ex3) @@ -7,8 +7,8 @@ function lower(mod::Module, ex0) ex5 end -function macroexpand(mod::Module, ex) - ctx1, ex1 = expand_forms_1(mod, ex) +function macroexpand(mod::Module, ex, expr_compat_mode=false) + ctx1, ex1 = expand_forms_1(mod, ex, expr_compat_mode) ex1 end @@ -344,16 +344,16 @@ end #------------------------------------------------------------------------------- # Our version of eval takes our own data structures -function Core.eval(mod::Module, ex::SyntaxTree) +function Core.eval(mod::Module, ex::SyntaxTree; expr_compat_mode::Bool=false) k = kind(ex) if k == K"toplevel" x = nothing for e in children(ex) - x = eval(mod, e) + x = eval(mod, e; expr_compat_mode) end return x end - linear_ir = lower(mod, ex) + linear_ir = lower(mod, ex, expr_compat_mode) expr_form = to_lowered_expr(mod, linear_ir) eval(mod, expr_form) end diff --git a/JuliaLowering/src/hooks.jl b/JuliaLowering/src/hooks.jl index c150fce9a1283..5beb00a6d4e08 100644 --- a/JuliaLowering/src/hooks.jl +++ b/JuliaLowering/src/hooks.jl @@ -17,7 +17,7 @@ function core_lowering_hook(@nospecialize(code), mod::Module, st0 = code isa Expr ? expr_to_syntaxtree(code, LineNumberNode(line, file)) : code try - ctx1, st1 = expand_forms_1( mod, st0) + ctx1, st1 = expand_forms_1( mod, st0, true) ctx2, st2 = expand_forms_2( ctx1, st1) ctx3, st3 = resolve_scopes( ctx2, st2) ctx4, st4 = convert_closures(ctx3, st3) diff --git a/JuliaLowering/src/macro_expansion.jl b/JuliaLowering/src/macro_expansion.jl index e321af03129ed..e83a73715a623 100644 --- a/JuliaLowering/src/macro_expansion.jl +++ b/JuliaLowering/src/macro_expansion.jl @@ -20,11 +20,12 @@ struct MacroExpansionContext{GraphType} <: AbstractLoweringContext bindings::Bindings scope_layers::Vector{ScopeLayer} scope_layer_stack::Vector{LayerId} + expr_compat_mode::Bool end -function MacroExpansionContext(graph::SyntaxGraph, mod::Module) +function MacroExpansionContext(graph::SyntaxGraph, mod::Module, expr_compat_mode::Bool) layers = ScopeLayer[ScopeLayer(1, mod, 0, false)] - MacroExpansionContext(graph, Bindings(), layers, LayerId[length(layers)]) + MacroExpansionContext(graph, Bindings(), layers, LayerId[length(layers)], expr_compat_mode) end current_layer(ctx::MacroExpansionContext) = ctx.scope_layers[last(ctx.scope_layer_stack)] @@ -67,6 +68,7 @@ function expand_quote(ctx, ex) # (ex, @HERE) ? @ast ctx ex [K"call" interpolate_ast::K"Value" + (ctx.expr_compat_mode ? Expr : SyntaxTree)::K"Value" [K"inert" ex] unquoted... ] @@ -447,18 +449,18 @@ function expand_forms_1(ctx::MacroExpansionContext, ex::SyntaxTree) end end -function expand_forms_1(mod::Module, ex::SyntaxTree) +function expand_forms_1(mod::Module, ex::SyntaxTree, expr_compat_mode::Bool) graph = ensure_attributes(syntax_graph(ex), var_id=IdTag, scope_layer=LayerId, __macro_ctx__=Nothing, meta=CompileHints) - ctx = MacroExpansionContext(graph, mod) + ctx = MacroExpansionContext(graph, mod, expr_compat_mode) ex2 = expand_forms_1(ctx, reparent(ctx, ex)) graph2 = delete_attributes(graph, :__macro_ctx__) # TODO: Returning the context with pass-specific mutable data is a bad way # to carry state into the next pass. We might fix this by attaching such # data to the graph itself as global attributes? - ctx2 = MacroExpansionContext(graph2, ctx.bindings, ctx.scope_layers, LayerId[]) + ctx2 = MacroExpansionContext(graph2, ctx.bindings, ctx.scope_layers, LayerId[], expr_compat_mode) return ctx2, reparent(ctx2, ex2) end diff --git a/JuliaLowering/src/runtime.jl b/JuliaLowering/src/runtime.jl index 551d58524c5d4..dda924f38e267 100644 --- a/JuliaLowering/src/runtime.jl +++ b/JuliaLowering/src/runtime.jl @@ -71,7 +71,7 @@ function _interpolate_ast(ctx::InterpolationContext, ex, depth) makenode(ctx, ex, head(ex), expanded_children) end -function interpolate_ast(ex, values...) +function interpolate_ast(::Type{SyntaxTree}, ex, values...) # Construct graph for interpolation context. We inherit this from the macro # context where possible by detecting it using __macro_ctx__. This feels # hacky though. @@ -108,6 +108,14 @@ function interpolate_ast(ex, values...) end end +function interpolate_ast(::Type{Expr}, ex, values...) + # TODO: Adjust `_interpolated_value` to ensure that incoming `Expr` data + # structures are treated as AST in Expr compat mode, rather than `K"Value"`? + # Or convert `ex` to `Expr` early during lowering and implement + # `interpolate_ast` for `Expr`? + Expr(interpolate_ast(SyntaxTree, ex, values...)) +end + #-------------------------------------------------- # Functions called by closure conversion function eval_closure_type(mod::Module, closure_type_name::Symbol, field_names, field_is_box) @@ -154,7 +162,7 @@ end # public modname # # And run statments in the toplevel expression `body` -function eval_module(parentmod, modname, body) +function eval_module(parentmod, modname, expr_compat_mode, body) # Here we just use `eval()` with an Expr. # If we wanted to avoid this we'd need to reproduce a lot of machinery from # jl_eval_module_expr() @@ -171,7 +179,7 @@ function eval_module(parentmod, modname, body) name = Symbol(modname) eval(parentmod, :( baremodule $name - $eval($name, $body) + $eval($name, $body; expr_compat_mode=$expr_compat_mode) end )) end @@ -296,7 +304,7 @@ function (g::GeneratedFunctionStub)(world::UInt, source::Method, @nospecialize a ) # Macro expansion - ctx1 = MacroExpansionContext(graph, mod) + ctx1 = MacroExpansionContext(graph, mod, false) # Run code generator - this acts like a macro expander and like a macro # expander it gets a MacroContext. @@ -315,7 +323,7 @@ function (g::GeneratedFunctionStub)(world::UInt, source::Method, @nospecialize a # Expand any macros emitted by the generator ex1 = expand_forms_1(ctx1, reparent(ctx1, ex0)) ctx1 = MacroExpansionContext(delete_attributes(graph, :__macro_ctx__), - ctx1.bindings, ctx1.scope_layers, LayerId[]) + ctx1.bindings, ctx1.scope_layers, LayerId[], false) ex1 = reparent(ctx1, ex1) # Desugaring diff --git a/JuliaLowering/src/syntax_graph.jl b/JuliaLowering/src/syntax_graph.jl index d883b2464d945..a7a2b28a3adf5 100644 --- a/JuliaLowering/src/syntax_graph.jl +++ b/JuliaLowering/src/syntax_graph.jl @@ -653,9 +653,10 @@ macro SyntaxTree(ex_old) ex = _find_SyntaxTree_macro(full_ex, __source__.line) # 4. Do the first step of JuliaLowering's syntax lowering to get # syntax interpolations to work - _, ex1 = expand_forms_1(__module__, ex) + _, ex1 = expand_forms_1(__module__, ex, false) @assert kind(ex1) == K"call" && ex1[1].value == interpolate_ast - Expr(:call, :interpolate_ast, ex1[2][1], map(e->_scope_layer_1_to_esc!(Expr(e)), ex1[3:end])...) + Expr(:call, :interpolate_ast, SyntaxTree, ex1[3][1], + map(e->_scope_layer_1_to_esc!(Expr(e)), ex1[4:end])...) end #------------------------------------------------------------------------------- diff --git a/JuliaLowering/test/arrays_ir.jl b/JuliaLowering/test/arrays_ir.jl index 4595603e4b79d..f0fa3f1ed0f5d 100644 --- a/JuliaLowering/test/arrays_ir.jl +++ b/JuliaLowering/test/arrays_ir.jl @@ -496,3 +496,4 @@ a[] = rhs 2 TestMod.a 3 (call top.setindex! %₂ %₁) 4 (return %₁) + diff --git a/JuliaLowering/test/demo.jl b/JuliaLowering/test/demo.jl index c0698b022dcd6..1601bf9245355 100644 --- a/JuliaLowering/test/demo.jl +++ b/JuliaLowering/test/demo.jl @@ -35,7 +35,7 @@ end # Currently broken - need to push info back onto src # function annotate_scopes(mod, ex) # ex = ensure_attributes(ex, var_id=Int) -# ctx1, ex_macroexpand = JuliaLowering.expand_forms_1(mod, ex) +# ctx1, ex_macroexpand = JuliaLowering.expand_forms_1(mod, ex, false) # ctx2, ex_desugar = JuliaLowering.expand_forms_2(ctx1, ex_macroexpand) # ctx3, ex_scoped = JuliaLowering.resolve_scopes(ctx2, ex_desugar) # ex @@ -859,7 +859,7 @@ ex = ensure_attributes(ex, var_id=Int) in_mod = M # in_mod=Main -ctx1, ex_macroexpand = JuliaLowering.expand_forms_1(in_mod, ex) +ctx1, ex_macroexpand = JuliaLowering.expand_forms_1(in_mod, ex, false) @info "Macro expanded" formatsrc(ex_macroexpand, color_by=:scope_layer) #@info "Macro expanded" formatsrc(ex_macroexpand, color_by=e->JuliaLowering.flattened_provenance(e)[1:end-1]) diff --git a/JuliaLowering/test/functions_ir.jl b/JuliaLowering/test/functions_ir.jl index 34d07a20fccfa..d60a397bfd6da 100644 --- a/JuliaLowering/test/functions_ir.jl +++ b/JuliaLowering/test/functions_ir.jl @@ -1565,9 +1565,9 @@ end 10 (call core.svec %₇ %₈ %₉) 11 --- method core.nothing %₁₀ slots: [slot₁/#self#(!read) slot₂/__context__(!read) slot₃/#self#(!read) slot₄/x(nospecialize,!read) slot₅/y(nospecialize,!read)] - 1 (call JuliaLowering.interpolate_ast (inert (block (= maybe_gen_stuff (call some_gen_stuff x y))))) + 1 (call JuliaLowering.interpolate_ast SyntaxTree (inert (block (= maybe_gen_stuff (call some_gen_stuff x y))))) 2 (call core.tuple %₁) - 3 (call JuliaLowering.interpolate_ast (inert (block (block (= nongen_stuff (call bothgen x y)) ($ (block (call JuliaLowering.interpolate_ast (inert (block (= maybe_gen_stuff (call some_gen_stuff x y))))))) (tuple-p nongen_stuff maybe_gen_stuff)))) %₂) + 3 (call JuliaLowering.interpolate_ast SyntaxTree (inert (block (block (= nongen_stuff (call bothgen x y)) ($ (block (call JuliaLowering.interpolate_ast SyntaxTree (inert (block (= maybe_gen_stuff (call some_gen_stuff x y))))))) (tuple-p nongen_stuff maybe_gen_stuff)))) %₂) 4 (return %₃) 12 latestworld 13 TestMod.f_partially_generated diff --git a/JuliaLowering/test/generators_ir.jl b/JuliaLowering/test/generators_ir.jl index 7ab7e18257e10..eecfb1b1d0b38 100644 --- a/JuliaLowering/test/generators_ir.jl +++ b/JuliaLowering/test/generators_ir.jl @@ -171,7 +171,7 @@ LoweringError: 9 (call core.svec %₆ %₇ %₈) 10 --- method core.nothing %₉ slots: [slot₁/#self#(!read) slot₂/_(!read)] - 1 (call JuliaLowering.interpolate_ast (inert (return x))) + 1 (call JuliaLowering.interpolate_ast SyntaxTree (inert (return x))) 2 (return %₁) 11 latestworld 12 TestMod.#->##6 diff --git a/JuliaLowering/test/macros.jl b/JuliaLowering/test/macros.jl index 34d028876c09f..2095e305bdbde 100644 --- a/JuliaLowering/test/macros.jl +++ b/JuliaLowering/test/macros.jl @@ -110,7 +110,7 @@ M.@recursive 3 """) == (3, (2, (1, 0))) ex = JuliaLowering.parsestmt(JuliaLowering.SyntaxTree, "M.@outer()", filename="foo.jl") -ctx, expanded = JuliaLowering.expand_forms_1(test_mod, ex) +ctx, expanded = JuliaLowering.expand_forms_1(test_mod, ex, false) @test JuliaLowering.sourcetext.(JuliaLowering.flattened_provenance(expanded[2])) == [ "M.@outer()" "@inner" diff --git a/JuliaLowering/test/macros_ir.jl b/JuliaLowering/test/macros_ir.jl index 29f4e6509b743..874ca9594e3da 100644 --- a/JuliaLowering/test/macros_ir.jl +++ b/JuliaLowering/test/macros_ir.jl @@ -35,7 +35,7 @@ end 9 --- method core.nothing %₈ slots: [slot₁/#self#(!read) slot₂/__context__(!read) slot₃/ex] 1 (call core.tuple slot₃/ex) - 2 (call JuliaLowering.interpolate_ast (inert (block (call-i ($ ex) + 1))) %₁) + 2 (call JuliaLowering.interpolate_ast SyntaxTree (inert (block (call-i ($ ex) + 1))) %₁) 3 (return %₂) 10 latestworld 11 TestMod.@add_one diff --git a/JuliaLowering/test/quoting_ir.jl b/JuliaLowering/test/quoting_ir.jl index 7fa05c5127e52..e53be0c61fcbd 100644 --- a/JuliaLowering/test/quoting_ir.jl +++ b/JuliaLowering/test/quoting_ir.jl @@ -6,7 +6,7 @@ end #--------------------- 1 TestMod.x 2 (call core.tuple %₁) -3 (call JuliaLowering.interpolate_ast (inert (block (call-i ($ x) + 1))) %₂) +3 (call JuliaLowering.interpolate_ast SyntaxTree (inert (block (call-i ($ x) + 1))) %₂) 4 (return %₃) ######################################## @@ -15,7 +15,7 @@ end #--------------------- 1 TestMod.x 2 (call core.tuple %₁) -3 (call JuliaLowering.interpolate_ast (inert ($ x)) %₂) +3 (call JuliaLowering.interpolate_ast SyntaxTree (inert ($ x)) %₂) 4 (return %₃) ######################################## @@ -28,7 +28,7 @@ end #--------------------- 1 TestMod.x 2 (call core.tuple %₁) -3 (call JuliaLowering.interpolate_ast (inert (block (quote (block (call-i ($ ($ x)) + 1))))) %₂) +3 (call JuliaLowering.interpolate_ast SyntaxTree (inert (block (quote (block (call-i ($ ($ x)) + 1))))) %₂) 4 (return %₃) ######################################## From 64b218f35c1140f4f2e75e8715dba57a88bdaf04 Mon Sep 17 00:00:00 2001 From: Em Chu <61633163+mlechu@users.noreply.github.com> Date: Fri, 22 Aug 2025 16:25:36 -0700 Subject: [PATCH 1049/1109] Random lowering bugfix batch 2 (JuliaLang/JuliaLowering.jl#45) * fix showing MacroExpansionError when sourceref isa LineNumberNode * Support K"gc_preserve" (Note that this lowering may change in https://github.com/JuliaLang/julia/pull/59129) * Support `Expr(:isglobal)` * Support K"cmdstring" (just convert it to a macrocall) * Delete two-arg dot form check (The number of forms isn't bounded) * Don't fail on `Expr(:inbounds)` * List known meta forms + improve compat.jl errors * Turn off flisp fallback for now --------- Co-authored-by: Claire Foster --- JuliaLowering/src/compat.jl | 23 ++++++++++++------ JuliaLowering/src/desugaring.jl | 9 +++++++ JuliaLowering/src/hooks.jl | 12 ++++++---- JuliaLowering/src/kinds.jl | 3 ++- JuliaLowering/src/macro_expansion.jl | 29 +++++++++++++++------- JuliaLowering/src/scope_analysis.jl | 10 ++++++++ JuliaLowering/test/macros.jl | 36 ++++++++++++++++++++++++++++ JuliaLowering/test/macros_ir.jl | 11 +++++++++ 8 files changed, 112 insertions(+), 21 deletions(-) diff --git a/JuliaLowering/src/compat.jl b/JuliaLowering/src/compat.jl index f12f0e7b5cf81..4dd9f07ffa77a 100644 --- a/JuliaLowering/src/compat.jl +++ b/JuliaLowering/src/compat.jl @@ -243,7 +243,7 @@ function _insert_convert_expr(@nospecialize(e), graph::SyntaxGraph, src::SourceA elseif a1 isa Function # pass else - error("Unknown macrocall form $(sprint(dump, e))") + error("Unknown macrocall form at $src: $(sprint(dump, e))") @assert false end elseif e.head === Symbol("'") @@ -258,9 +258,6 @@ function _insert_convert_expr(@nospecialize(e), graph::SyntaxGraph, src::SourceA child_exprs = pushfirst!(tuple_exprs, e.args[1]) elseif a2 isa QuoteNode && a2.value isa Symbol child_exprs[2] = a2.value - elseif a2 isa Expr && a2.head === :MacroName - else - @error "Unknown 2-arg dot form" e end elseif e.head === :for @assert nargs === 2 @@ -420,8 +417,15 @@ function _insert_convert_expr(@nospecialize(e), graph::SyntaxGraph, src::SourceA setmeta!(SyntaxTree(graph, st_id); nospecialize=true) return st_id, src end + elseif e.args[1] in (:inline, :noinline, :generated, :generated_only, + :max_methods, :optlevel, :toplevel, :push_loc, :pop_loc, + :aggressive_constprop, :specialize, :compile, :infer, + :nospecializeinfer) + # TODO: Some need to be handled in lowering + child_exprs[1] = Expr(:quoted_symbol, e.args[1]) else - @assert nargs === 1 + # Can't throw a hard error; it is explicitly tested that meta can take arbitrary keys. + @error("Unknown meta form at $src: `$e`\n$(sprint(dump, e))") child_exprs[1] = Expr(:quoted_symbol, e.args[1]) end elseif e.head === :scope_layer @@ -436,11 +440,16 @@ function _insert_convert_expr(@nospecialize(e), graph::SyntaxGraph, src::SourceA st_k = e.head === :symbolicgoto ? K"symbolic_label" : K"symbolic_goto" st_attrs[:name_val] = string(e.args[1]) child_exprs = nothing - elseif e.head === :inline || e.head === :noinline + elseif e.head in (:inline, :noinline) @assert nargs === 1 && e.args[1] isa Bool # TODO: JuliaLowering doesn't accept this (non-:meta) form yet st_k = K"TOMBSTONE" child_exprs = nothing + elseif e.head === :inbounds + @assert nargs === 1 && typeof(e.args[1]) in (Symbol, Bool) + # TODO: JuliaLowering doesn't accept this form yet + st_k = K"TOMBSTONE" + child_exprs = nothing elseif e.head === :core @assert nargs === 1 @assert e.args[1] isa Symbol @@ -484,7 +493,7 @@ function _insert_convert_expr(@nospecialize(e), graph::SyntaxGraph, src::SourceA # Throw if this script isn't complete. Finally, insert a new node into the # graph and recurse on child_exprs if st_k === K"None" - error("Unknown expr head `$(e.head)`\n$(sprint(dump, e))") + error("Unknown expr head at $src: `$(e.head)`\n$(sprint(dump, e))") end st_id = _insert_tree_node(graph, st_k, src, st_flags; st_attrs...) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index a313e7cda3437..105b2168573ec 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -4544,6 +4544,15 @@ function expand_forms_2(ctx::DesugaringContext, ex::SyntaxTree, docs=nothing) ] elseif k == K"inert" ex + elseif k == K"gc_preserve" + s = ssavar(ctx, ex) + r = ssavar(ctx, ex) + @ast ctx ex [K"block" + s := [K"gc_preserve_begin" children(ex)[2:end]...] + r := expand_forms_2(ctx, children(ex)[1]) + [K"gc_preserve_end" s] + r + ] elseif k == K"&" throw(LoweringError(ex, "invalid syntax")) elseif k == K"$" diff --git a/JuliaLowering/src/hooks.jl b/JuliaLowering/src/hooks.jl index 5beb00a6d4e08..f030923284bfe 100644 --- a/JuliaLowering/src/hooks.jl +++ b/JuliaLowering/src/hooks.jl @@ -25,10 +25,14 @@ function core_lowering_hook(@nospecialize(code), mod::Module, ex = to_lowered_expr(mod, st5) return Core.svec(ex, st5, ctx5) catch exc - @error("JuliaLowering failed — falling back to flisp!", - exception=(exc,catch_backtrace()), - code=code, file=file, line=line, mod=mod) - return Base.fl_lower(code, mod, file, line, world, warn) + @info("JuliaLowering threw given input:", code=code, st0=st0, file=file, line=line, mod=mod) + rethrow(exc) + + # TODO: Re-enable flisp fallback once we're done collecting errors + # @error("JuliaLowering failed — falling back to flisp!", + # exception=(exc,catch_backtrace()), + # code=code, file=file, line=line, mod=mod) + # return Base.fl_lower(code, mod, file, line, world, warn) end end diff --git a/JuliaLowering/src/kinds.jl b/JuliaLowering/src/kinds.jl index 4cc0afe86f088..3a5d9bd698d5d 100644 --- a/JuliaLowering/src/kinds.jl +++ b/JuliaLowering/src/kinds.jl @@ -11,6 +11,7 @@ function _register_kinds() # Flag for @generated parts of a functon "generated" # Temporary rooting of identifiers (GC.@preserve) + "gc_preserve" "gc_preserve_begin" "gc_preserve_end" # A literal Julia value of any kind, as might be inserted into the @@ -61,7 +62,7 @@ function _register_kinds() "static_eval" # Catch-all for additional syntax extensions without the need to # extend `Kind`. Known extensions include: - # locals, islocal + # locals, islocal, isglobal # The content of an assertion is not considered to be quoted, so # use K"Symbol" or K"inert" inside where necessary. "extension" diff --git a/JuliaLowering/src/macro_expansion.jl b/JuliaLowering/src/macro_expansion.jl index e83a73715a623..6aa1684e31ffc 100644 --- a/JuliaLowering/src/macro_expansion.jl +++ b/JuliaLowering/src/macro_expansion.jl @@ -118,14 +118,18 @@ function Base.showerror(io::IO, exc::MacroExpansionError) # * How to deal with highlighting trivia? Could provide a token kind or # child position within the raw tree? How to abstract this?? src = sourceref(exc.ex) - fb = first_byte(src) - lb = last_byte(src) - pos = exc.position - byterange = pos == :all ? (fb:lb) : - pos == :begin ? (fb:fb-1) : - pos == :end ? (lb+1:lb) : - error("Unknown position $pos") - highlight(io, src.file, byterange, note=exc.msg) + if src isa LineNumberNode + highlight(io, src, note=exc.msg) + else + fb = first_byte(src) + lb = last_byte(src) + pos = exc.position + byterange = pos == :all ? (fb:lb) : + pos == :begin ? (fb:fb-1) : + pos == :end ? (lb+1:lb) : + error("Unknown position $pos") + highlight(io, src.file, byterange, note=exc.msg) + end if !isnothing(exc.err) print(io, "\nCaused by:\n") showerror(io, exc.err) @@ -238,7 +242,10 @@ function expand_macro(ctx, ex) else # Compat: attempt to invoke an old-style macro if there's no applicable # method for new-style macro arguments. - macro_loc = source_location(LineNumberNode, ex) + macro_loc = let loc = source_location(LineNumberNode, ex) + # Some macros, e.g. @cmd, don't play nicely with file == nothing + isnothing(loc.file) ? LineNumberNode(loc.line, :none) : loc + end macro_args = Any[macro_loc, current_layer(ctx).mod] for arg in raw_args # For hygiene in old-style macros, we omit any additional scope @@ -388,6 +395,10 @@ function expand_forms_1(ctx::MacroExpansionContext, ex::SyntaxTree) e2 = @ast ctx e2 e2=>K"Symbol" end @ast ctx ex [K"." expand_forms_1(ctx, ex[1]) e2] + elseif k == K"cmdstring" + @chk numchildren(ex) == 1 + e2 = @ast ctx ex [K"macrocall" "@cmd"::K"core" ex[1]] + expand_macro(ctx, e2) elseif (k == K"call" || k == K"dotcall") # Do some initial desugaring of call and dotcall here to simplify # the later desugaring pass diff --git a/JuliaLowering/src/scope_analysis.jl b/JuliaLowering/src/scope_analysis.jl index ee0b1498ff4f2..59d8bfa515b70 100644 --- a/JuliaLowering/src/scope_analysis.jl +++ b/JuliaLowering/src/scope_analysis.jl @@ -494,6 +494,16 @@ function _resolve_scopes(ctx, ex::SyntaxTree) id = lookup_var(ctx, NameKey(ex[2])) islocal = !isnothing(id) && var_kind(ctx, id) != :global @ast ctx ex islocal::K"Bool" + elseif etype == "isglobal" + e2 = ex[2] + @chk kind(e2) in KSet"Identifier Placeholder" + isglobal = if kind(e2) == K"Identifier" + id = lookup_var(ctx, NameKey(e2)) + isnothing(id) || var_kind(ctx, id) == :global + else + false + end + @ast ctx ex isglobal::K"Bool" elseif etype == "locals" stmts = SyntaxList(ctx) locals_dict = ssavar(ctx, ex, "locals_dict") diff --git a/JuliaLowering/test/macros.jl b/JuliaLowering/test/macros.jl index 2095e305bdbde..a23327748106f 100644 --- a/JuliaLowering/test/macros.jl +++ b/JuliaLowering/test/macros.jl @@ -239,6 +239,13 @@ MacroExpansionError while expanding @oldstyle_error in module Main.macros.test_m Caused by: Some error in old style macro""" +@test sprint( + showerror, + JuliaLowering.MacroExpansionError( + JuliaLowering.expr_to_syntaxtree(:(foo), LineNumberNode(1)), + "fake error")) == + "MacroExpansionError:\n#= line 1 =# - fake error" + # Old-style macros returning non-Expr values Base.eval(test_mod, :( macro oldstyle_non_Expr() @@ -293,4 +300,33 @@ end MethodError: no method matching var"@sig_mismatch"(::JuliaLowering.MacroContext, ::JuliaLowering.SyntaxTree""") end +@testset "old macros producing exotic expr heads" begin + @test JuliaLowering.include_string(test_mod, """ + let # example from @preserve docstring + x = Ref{Int}(101) + p = Base.unsafe_convert(Ptr{Int}, x) + GC.@preserve x unsafe_load(p) + end""") === 101 # Expr(:gc_preserve) + + # only invokelatest produces :isglobal now, so MWE here + Base.eval(test_mod, :(macro isglobal(x); esc(Expr(:isglobal, x)); end)) + @test JuliaLowering.include_string(test_mod, """ + some_global = 1 + function isglobal_chk(some_arg) + local some_local = 1 + (@isglobal(some_undefined), @isglobal(some_global), @isglobal(some_arg), @isglobal(some_local)) + end + isglobal_chk(1) + """) === (true, true, false, false) + # with K"Placeholder"s + @test JuliaLowering.include_string(test_mod, """ + __ = 1 + function isglobal_chk(___) + local ____ = 1 + (@isglobal(_), @isglobal(__), @isglobal(___), @isglobal(____)) + end + isglobal_chk(1) + """) === (false, false, false, false) +end + end # module macros diff --git a/JuliaLowering/test/macros_ir.jl b/JuliaLowering/test/macros_ir.jl index 874ca9594e3da..2598a4a92197b 100644 --- a/JuliaLowering/test/macros_ir.jl +++ b/JuliaLowering/test/macros_ir.jl @@ -136,3 +136,14 @@ Caused by: UndefVarError: `@m_not_exist` not defined in `Main.TestMod` Suggestion: check for spelling errors or missing imports. +######################################## +# Simple cmdstring +`echo 1` +#--------------------- +1 Base.cmd_gen +2 (call core.tuple "echo") +3 (call core.tuple "1") +4 (call core.tuple %₂ %₃) +5 (call %₁ %₄) +6 (return %₅) + From 79e95e19116afd2bbbbbca2873bae83df150e672 Mon Sep 17 00:00:00 2001 From: Neven Sajko <4944410+nsajko@users.noreply.github.com> Date: Tue, 26 Aug 2025 09:16:42 +0200 Subject: [PATCH 1050/1109] `_register_kinds!`: prevent unintentional closure capture, boxing (JuliaLang/JuliaSyntax.jl#586) The variable `i` was unintentionally shared between the generator closure in `_register_kinds!` and another part of the body of `_register_kinds!`. Thus `i` was boxed, causing trouble for inference. Fix this by moving the part of `_register_kinds!` with `i` to a new function. Fixing this should make the sysimage more resistant to invalidation, once the change propagates to Julia itself. --- JuliaSyntax/src/julia/kinds.jl | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/JuliaSyntax/src/julia/kinds.jl b/JuliaSyntax/src/julia/kinds.jl index 96d78ad729db7..c659f6678d528 100644 --- a/JuliaSyntax/src/julia/kinds.jl +++ b/JuliaSyntax/src/julia/kinds.jl @@ -102,6 +102,12 @@ function _register_kinds!(kind_modules, int_to_kindstr, kind_str_to_int, mod, mo error("Kind module ID $module_id already claimed by module $m") end end + _register_kinds_names!(int_to_kindstr, kind_str_to_int, module_id, names) +end + +# This function is separated from `_register_kinds!` to prevent sharing of the variable `i` +# here and in the closure in `_register_kinds!`, which causes boxing and bad inference. +function _register_kinds_names!(int_to_kindstr, kind_str_to_int, module_id, names) # Process names to conflate category BEGIN/END markers with the first/last # in the category. i = 0 From f13e873354e7d51834a69d120e515a994f0ba63e Mon Sep 17 00:00:00 2001 From: Neven Sajko <4944410+nsajko@users.noreply.github.com> Date: Tue, 26 Aug 2025 10:07:16 +0200 Subject: [PATCH 1051/1109] `_register_kinds!`: improve type stability when creating `Vector` (JuliaLang/JuliaSyntax.jl#587) Make the temporary `existing_kinds::Vector` infer concretely. NB: a more thorough solution might do away with the construction of `existing_kinds` altogether, however this seems like an OK quick fix. Co-authored-by: Shuhei Kadowaki <40514306+aviatesk@users.noreply.github.com> --- JuliaSyntax/src/julia/kinds.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaSyntax/src/julia/kinds.jl b/JuliaSyntax/src/julia/kinds.jl index c659f6678d528..dd25663b14ef3 100644 --- a/JuliaSyntax/src/julia/kinds.jl +++ b/JuliaSyntax/src/julia/kinds.jl @@ -88,7 +88,7 @@ function _register_kinds!(kind_modules, int_to_kindstr, kind_str_to_int, mod, mo # Ok: known kind module, but not loaded until now kind_modules[module_id] = mod elseif m == mod - existing_kinds = [(i = get(kind_str_to_int, n, nothing); + existing_kinds = Union{Nothing, Kind}[(i = get(kind_str_to_int, n, nothing); isnothing(i) ? nothing : Kind(i)) for n in names] if any(isnothing, existing_kinds) || !issorted(existing_kinds) || From cf9a345a77850dde8ca7cfa1d3ca61459ee1d8a7 Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki <40514306+aviatesk@users.noreply.github.com> Date: Thu, 28 Aug 2025 03:54:04 +0900 Subject: [PATCH 1052/1109] docs: fix docs of `last_byte` (JuliaLang/JuliaSyntax.jl#590) --- JuliaSyntax/src/core/source_files.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaSyntax/src/core/source_files.jl b/JuliaSyntax/src/core/source_files.jl index 1058693f2a941..87019aa81e594 100644 --- a/JuliaSyntax/src/core/source_files.jl +++ b/JuliaSyntax/src/core/source_files.jl @@ -41,7 +41,7 @@ Return the first byte of `x` in the source text. first_byte(x) = first(byte_range(x)) """ - first_byte(x) + last_byte(x) Return the last byte of `x` in the source text. """ From ef9626d8c788c7ae3a0fd3d37b293d23a8224063 Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki <40514306+aviatesk@users.noreply.github.com> Date: Thu, 28 Aug 2025 05:01:21 +0900 Subject: [PATCH 1053/1109] compat: support `Base.Experimental.@force_compile` (JuliaLang/JuliaLowering.jl#56) `:meta :force_compile` is one of the forms known by the compiler. Also adds `:doc` to the list. Co-authored-by: Em Chu <61633163+mlechu@users.noreply.github.com> --------- Co-authored-by: Em Chu <61633163+mlechu@users.noreply.github.com> --- JuliaLowering/src/compat.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/JuliaLowering/src/compat.jl b/JuliaLowering/src/compat.jl index 4dd9f07ffa77a..24b75108c3d79 100644 --- a/JuliaLowering/src/compat.jl +++ b/JuliaLowering/src/compat.jl @@ -420,7 +420,7 @@ function _insert_convert_expr(@nospecialize(e), graph::SyntaxGraph, src::SourceA elseif e.args[1] in (:inline, :noinline, :generated, :generated_only, :max_methods, :optlevel, :toplevel, :push_loc, :pop_loc, :aggressive_constprop, :specialize, :compile, :infer, - :nospecializeinfer) + :nospecializeinfer, :force_compile, :doc) # TODO: Some need to be handled in lowering child_exprs[1] = Expr(:quoted_symbol, e.args[1]) else @@ -428,7 +428,7 @@ function _insert_convert_expr(@nospecialize(e), graph::SyntaxGraph, src::SourceA @error("Unknown meta form at $src: `$e`\n$(sprint(dump, e))") child_exprs[1] = Expr(:quoted_symbol, e.args[1]) end - elseif e.head === :scope_layer + elseif e.head === :scope_layer @assert nargs === 2 @assert e.args[1] isa Symbol @assert e.args[2] isa LayerId From ef5067ea827603dd20bd0dd6df6f1fecc37a800a Mon Sep 17 00:00:00 2001 From: Em Chu <61633163+mlechu@users.noreply.github.com> Date: Thu, 28 Aug 2025 17:26:03 -0700 Subject: [PATCH 1054/1109] SyntaxGraph: Usability and performance tweaks (JuliaLang/JuliaLowering.jl#48) * `copy_ast`: Add option to not recurse on `.source`, clarify docs, fix over-recursion * `ensure_attributes!`: throw instead of attempting to mutate NamedTuple * Add graph utils: `unfreeze_attrs`, `attrdefs` * Print more information when node does not have attribute * Fix printing for identifier-like kinds `String`/`Cmd` `MacroName` * Do not coerce attrs to NamedTuple unnecessarily For `ensure_attributes` and `delete_attributes`, the output graph's `.attributes` now have the same type (`Dict` or `NamedTuple`) as the input. Add `delete_attributes!` defined only on dict-attrs to be consistent with `ensure_attributes!` * Remove ineffective call to `freeze_attrs` converting from SyntaxNode * Test `ensure`, `delete` attrs, `attrtypes`, `copy_ast` --------- Co-authored-by: Shuhei Kadowaki Co-authored-by: Claire Foster --- JuliaLowering/src/ast.jl | 56 ++++++++++++------ JuliaLowering/src/syntax_graph.jl | 47 ++++++++++++--- JuliaLowering/test/syntax_graph.jl | 91 ++++++++++++++++++++++++++++++ 3 files changed, 167 insertions(+), 27 deletions(-) diff --git a/JuliaLowering/src/ast.jl b/JuliaLowering/src/ast.jl index 46aa82e505dc0..a0e65ce189fb3 100644 --- a/JuliaLowering/src/ast.jl +++ b/JuliaLowering/src/ast.jl @@ -428,27 +428,47 @@ end """ -Copy AST `ex` into `ctx` +Recursively copy AST `ex` into `ctx`. + +Special provenance handling: If `copy_source` is true, treat the `.source` +attribute as a reference and recurse on its contents. Otherwise, treat it like +any other attribute. """ -function copy_ast(ctx, ex) - # TODO: Do we need to keep a mapping of node IDs to ensure we don't - # double-copy here in the case when some tree nodes are pointed to by - # multiple parents? (How much does this actually happen in practice?) - s = ex.source - # TODO: Figure out how to use provenance() here? - srcref = s isa NodeId ? copy_ast(ctx, SyntaxTree(ex._graph, s)) : - s isa Tuple ? map(i->copy_ast(ctx, SyntaxTree(ex._graph, i)), s) : - s - if !is_leaf(ex) - cs = SyntaxList(ctx) - for e in children(ex) - push!(cs, copy_ast(ctx, e)) - end - ex2 = makenode(ctx, srcref, ex, cs) +function copy_ast(ctx, ex::SyntaxTree; copy_source=true) + graph1 = syntax_graph(ex) + graph2 = syntax_graph(ctx) + !copy_source && check_same_graph(graph1, graph2) + id2 = _copy_ast(graph2, graph1, ex._id, Dict{NodeId, NodeId}(), copy_source) + return SyntaxTree(graph2, id2) +end + +function _copy_ast(graph2::SyntaxGraph, graph1::SyntaxGraph, + id1::NodeId, seen, copy_source) + let copied = get(seen, id1, nothing) + isnothing(copied) || return copied + end + id2 = newnode!(graph2) + seen[id1] = id2 + src1 = get(SyntaxTree(graph1, id1), :source, nothing) + src2 = if !copy_source + src1 + elseif src1 isa NodeId + _copy_ast(graph2, graph1, src1, seen, copy_source) + elseif src1 isa Tuple + map(i->_copy_ast(graph2, graph1, i, seen, copy_source), src1) else - ex2 = makeleaf(ctx, srcref, ex) + src1 end - return ex2 + copy_attrs!(SyntaxTree(graph2, id2), SyntaxTree(graph1, id1), true) + setattr!(graph2, id2; source=src2) + if !is_leaf(graph1, id1) + cs = NodeId[] + for cid in children(graph1, id1) + push!(cs, _copy_ast(graph2, graph1, cid, seen, copy_source)) + end + setchildren!(graph2, id2, cs) + end + return id2 end #------------------------------------------------------------------------------- diff --git a/JuliaLowering/src/syntax_graph.jl b/JuliaLowering/src/syntax_graph.jl index a7a2b28a3adf5..6a4aa5946d508 100644 --- a/JuliaLowering/src/syntax_graph.jl +++ b/JuliaLowering/src/syntax_graph.jl @@ -22,6 +22,12 @@ function freeze_attrs(graph::SyntaxGraph) SyntaxGraph(graph.edge_ranges, graph.edges, frozen_attrs) end +# Create a copy of `graph` where the attribute list is mutable +function unfreeze_attrs(graph::SyntaxGraph) + unfrozen_attrs = Dict{Symbol,Any}(pairs(graph.attributes)...) + SyntaxGraph(graph.edge_ranges, graph.edges, unfrozen_attrs) +end + function _show_attrs(io, attributes::Dict) show(io, MIME("text/plain"), attributes) end @@ -33,6 +39,10 @@ function attrnames(graph::SyntaxGraph) keys(graph.attributes) end +function attrdefs(graph::SyntaxGraph) + [(k=>typeof(v).parameters[2]) for (k, v) in pairs(graph.attributes)] +end + function Base.show(io::IO, ::MIME"text/plain", graph::SyntaxGraph) print(io, typeof(graph), " with $(length(graph.edge_ranges)) vertices, $(length(graph.edges)) edges, and attributes:\n") @@ -46,6 +56,10 @@ function ensure_attributes!(graph::SyntaxGraph; kws...) if haskey(graph.attributes, k) v0 = valtype(graph.attributes[k]) v == v0 || throw(ErrorException("Attribute type mismatch $v != $v0")) + elseif graph.attributes isa NamedTuple + throw(ErrorException(""" + ensure_attributes!: $k is not an existing attribute, and the graph's attributes are frozen. \ + Consider calling non-mutating `ensure_attributes` instead.""")) else graph.attributes[k] = Dict{NodeId,v}() end @@ -53,18 +67,31 @@ function ensure_attributes!(graph::SyntaxGraph; kws...) graph end -function ensure_attributes(graph::SyntaxGraph; kws...) - g = SyntaxGraph(graph.edge_ranges, graph.edges, Dict(pairs(graph.attributes)...)) +function ensure_attributes(graph::SyntaxGraph{<:Dict}; kws...) + g = unfreeze_attrs(graph) + ensure_attributes!(g; kws...) +end + +function ensure_attributes(graph::SyntaxGraph{<:NamedTuple}; kws...) + g = unfreeze_attrs(graph) ensure_attributes!(g; kws...) freeze_attrs(g) end -function delete_attributes(graph::SyntaxGraph, attr_names...) - attributes = Dict(pairs(graph.attributes)...) +function delete_attributes!(graph::SyntaxGraph{<:Dict}, attr_names::Symbol...) for name in attr_names - delete!(attributes, name) + delete!(graph.attributes, name) end - SyntaxGraph(graph.edge_ranges, graph.edges, (; pairs(attributes)...)) + graph +end + +function delete_attributes(graph::SyntaxGraph{<:Dict}, attr_names::Symbol...) + delete_attributes!(unfreeze_attrs(graph), attr_names...) +end + +function delete_attributes(graph::SyntaxGraph{<:NamedTuple}, attr_names::Symbol...) + g = delete_attributes!(unfreeze_attrs(graph), attr_names...) + freeze_attrs(g) end function newnode!(graph::SyntaxGraph) @@ -205,7 +232,9 @@ function Base.getproperty(ex::SyntaxTree, name::Symbol) name === :_id && return getfield(ex, :_id) _id = getfield(ex, :_id) return get(getproperty(getfield(ex, :_graph), name), _id) do - error("Property `$name[$_id]` not found") + attrstr = join(["\n $n = $(getproperty(ex, n))" + for n in attrnames(ex)], ",") + error("Property `$name[$_id]` not found. Available attributes:$attrstr") end end @@ -415,7 +444,7 @@ const SourceAttrType = Union{SourceRef,LineNumberNode,NodeId,Tuple} function SyntaxTree(graph::SyntaxGraph, node::SyntaxNode) ensure_attributes!(graph, kind=Kind, syntax_flags=UInt16, source=SourceAttrType, value=Any, name_val=String) - id = _convert_nodes(freeze_attrs(graph), node) + id = _convert_nodes(graph, node) return SyntaxTree(graph, id) end @@ -428,7 +457,7 @@ attrsummary(name, value::Number) = "$name=$value" function _value_string(ex) k = kind(ex) - str = k == K"Identifier" || k == K"MacroName" || is_operator(k) ? ex.name_val : + str = k in KSet"Identifier MacroName StringMacroName CmdMacroName" || is_operator(k) ? ex.name_val : k == K"Placeholder" ? ex.name_val : k == K"SSAValue" ? "%" : k == K"BindingId" ? "#" : diff --git a/JuliaLowering/test/syntax_graph.jl b/JuliaLowering/test/syntax_graph.jl index 153dbc88265f2..60fd10dde0c20 100644 --- a/JuliaLowering/test/syntax_graph.jl +++ b/JuliaLowering/test/syntax_graph.jl @@ -1,3 +1,45 @@ +@testset "SyntaxGraph attrs" begin + st = parsestmt(SyntaxTree, "function foo end") + g_init = JuliaLowering.unfreeze_attrs(st._graph) + gf1 = JuliaLowering.freeze_attrs(g_init) + gu1 = JuliaLowering.unfreeze_attrs(gf1) + + # Check that freeze/unfreeze do their jobs + @test gf1.attributes isa NamedTuple + @test gu1.attributes isa Dict + @test Set(keys(gf1.attributes)) == Set(keys(gu1.attributes)) + + # ensure_attributes + gf2 = JuliaLowering.ensure_attributes(gf1, test_attr=Symbol, foo=Type) + gu2 = JuliaLowering.ensure_attributes(gu1, test_attr=Symbol, foo=Type) + # returns a graph with the same attribute storage + @test gf2.attributes isa NamedTuple + @test gu2.attributes isa Dict + # does its job + @test (:test_attr=>Symbol) in JuliaLowering.attrdefs(gf2) + @test (:foo=>Type) in JuliaLowering.attrdefs(gf2) + @test Set(keys(gf2.attributes)) == Set(keys(gu2.attributes)) + # no mutation + @test !((:test_attr=>Symbol) in JuliaLowering.attrdefs(gf1)) + @test !((:foo=>Type) in JuliaLowering.attrdefs(gf1)) + @test Set(keys(gf1.attributes)) == Set(keys(gu1.attributes)) + + # delete_attributes + gf3 = JuliaLowering.delete_attributes(gf2, :test_attr, :foo) + gu3 = JuliaLowering.delete_attributes(gu2, :test_attr, :foo) + # returns a graph with the same attribute storage + @test gf3.attributes isa NamedTuple + @test gu3.attributes isa Dict + # does its job + @test !((:test_attr=>Symbol) in JuliaLowering.attrdefs(gf3)) + @test !((:foo=>Type) in JuliaLowering.attrdefs(gf3)) + @test Set(keys(gf3.attributes)) == Set(keys(gu3.attributes)) + # no mutation + @test (:test_attr=>Symbol) in JuliaLowering.attrdefs(gf2) + @test (:foo=>Type) in JuliaLowering.attrdefs(gf2) + @test Set(keys(gf2.attributes)) == Set(keys(gu2.attributes)) +end + @testset "SyntaxTree" begin # Expr conversion @test Expr(parsestmt(SyntaxTree, "begin a + b ; c end", filename="none")) == @@ -16,4 +58,53 @@ @test kind(tree2) == K"block" @test kind(tree2[1]) == K"Identifier" && tree2[1].name_val == "x" @test kind(tree2[2]) == K"Identifier" && tree2[2].name_val == "some_unique_identifier" + + "For filling required attrs in graphs created by hand" + function testgraph(edge_ranges, edges, more_attrs...) + kinds = Dict(map(i->(i=>K"block"), eachindex(edge_ranges))) + sources = Dict(map(i->(i=>LineNumberNode(i)), eachindex(edge_ranges))) + SyntaxGraph( + edge_ranges, + edges, + Dict(:kind => kinds, :source => sources, more_attrs...)) + end + + @testset "copy_ast" begin + # 1 --> 2 --> 3 src(7-9) = line 7-9 + # 4 --> 5 --> 6 src(i) = i + 3 + # 7 --> 8 --> 9 + g = testgraph([1:1, 2:2, 0:-1, 3:3, 4:4, 0:-1, 5:5, 6:6, 0:-1], + [2, 3, 5, 6, 8, 9], + :source => Dict(enumerate([ + map(i->i+3, 1:6)... + map(LineNumberNode, 7:9)...]))) + st = SyntaxTree(g, 1) + stcopy = JuliaLowering.copy_ast(g, st) + # Each node should be copied once + @test length(g.edge_ranges) === 18 + @test st._id != stcopy._id + @test st ≈ stcopy + @test st.source !== stcopy.source + @test st.source[1] !== stcopy.source[1] + @test st.source[1][1] !== stcopy.source[1][1] + + stcopy2 = JuliaLowering.copy_ast(g, st; copy_source=false) + # Only nodes 1-3 should be copied + @test length(g.edge_ranges) === 21 + @test st._id != stcopy2._id + @test st ≈ stcopy2 + @test st.source === stcopy2.source + @test st.source[1] === stcopy2.source[1] + @test st.source[1][1] === stcopy2.source[1][1] + + # Copy into a new graph + new_g = ensure_attributes!(SyntaxGraph(); JuliaLowering.attrdefs(g)...) + stcopy3 = JuliaLowering.copy_ast(new_g, st) + @test length(new_g.edge_ranges) === 9 + @test st ≈ stcopy3 + + new_g = ensure_attributes!(SyntaxGraph(); JuliaLowering.attrdefs(g)...) + # Disallow for now, since we can't prevent dangling sourcerefs + @test_throws ErrorException JuliaLowering.copy_ast(new_g, st; copy_source=false) + end end From 7155b76f8c6e41c1dc06c78aa9aba09f53689e1b Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sat, 30 Aug 2025 13:19:08 +1000 Subject: [PATCH 1055/1109] Fixes for quote interpolation in Expr compat mode (JuliaLang/JuliaLowering.jl#57) Ensure that interpolating values into quoted expressions works in Expr compat mode. * To produce `Expr` in `CodeInfo` conversion, add an `as_Expr` attribute to `K"inert"`. expand_quote * In analogy with `eval()`, add `expr_compat_mode` keyword to `include_string`. We may want to rethink this when we rethink how options to `eval()` work. * Adapt the runtime function `interpolate_ast` to work with both `Expr` and `SyntaxTree` as the source expression. * Allow `Symbol` to be interpolated into `SyntaxTree` as syntax rather than a `K"Value": Even in "new style macro mode", `:x` is interpreted as a `Symbol` rather than an AST as it's used by many packages as a kind of lightweight untyped enum (ie, a usage which is almost entirely unrelated to AST manipulation) * Fix `SyntaxTree->Expr` conversion so that embedded Expr `K"Value"` means a leaf, not an embedded tree. --- JuliaLowering/src/eval.jl | 8 ++- JuliaLowering/src/macro_expansion.jl | 4 +- JuliaLowering/src/runtime.jl | 100 ++++++++++++++++++++------- JuliaLowering/src/syntax_graph.jl | 10 ++- JuliaLowering/test/quoting.jl | 82 +++++++++++++++++++--- 5 files changed, 164 insertions(+), 40 deletions(-) diff --git a/JuliaLowering/src/eval.jl b/JuliaLowering/src/eval.jl index 112639650c1b4..9822ff3278125 100644 --- a/JuliaLowering/src/eval.jl +++ b/JuliaLowering/src/eval.jl @@ -261,7 +261,8 @@ function to_lowered_expr(mod, ex, ssa_offset=0) elseif k == K"return" Core.ReturnNode(to_lowered_expr(mod, ex[1], ssa_offset)) elseif k == K"inert" - ex[1] + e1 = ex[1] + getmeta(ex, :as_Expr, false) ? QuoteNode(Expr(e1)) : e1 elseif k == K"code_info" funcname = ex.is_toplevel_thunk ? "top-level scope" : @@ -391,7 +392,8 @@ end Like `include`, except reads code from the given string rather than from a file. """ -function include_string(mod::Module, code::AbstractString, filename::AbstractString="string") - eval(mod, parseall(SyntaxTree, code; filename=filename)) +function include_string(mod::Module, code::AbstractString, filename::AbstractString="string"; + expr_compat_mode=false) + eval(mod, parseall(SyntaxTree, code; filename=filename); expr_compat_mode) end diff --git a/JuliaLowering/src/macro_expansion.jl b/JuliaLowering/src/macro_expansion.jl index 6aa1684e31ffc..d6ab9815ad809 100644 --- a/JuliaLowering/src/macro_expansion.jl +++ b/JuliaLowering/src/macro_expansion.jl @@ -57,7 +57,7 @@ function expand_quote(ctx, ex) # the entire expression produced by `quote` expansion. We could, but it # seems unnecessary for `quote` because the surface syntax is a transparent # representation of the expansion process. However, it's useful to add the - # extra srcref in a more targetted way for $ interpolations inside + # extra srcref in a more targeted way for $ interpolations inside # interpolate_ast, so we do that there. # # In principle, particular user-defined macros could opt into a similar @@ -69,7 +69,7 @@ function expand_quote(ctx, ex) @ast ctx ex [K"call" interpolate_ast::K"Value" (ctx.expr_compat_mode ? Expr : SyntaxTree)::K"Value" - [K"inert" ex] + [K"inert"(meta=CompileHints(:as_Expr, ctx.expr_compat_mode)) ex] unquoted... ] end diff --git a/JuliaLowering/src/runtime.jl b/JuliaLowering/src/runtime.jl index dda924f38e267..a88a5966fb974 100644 --- a/JuliaLowering/src/runtime.jl +++ b/JuliaLowering/src/runtime.jl @@ -21,46 +21,95 @@ struct InterpolationContext{Graph} <: AbstractLoweringContext current_index::Ref{Int} end +# Context for `Expr`-based AST interpolation in compat mode +struct ExprInterpolationContext <: AbstractLoweringContext + values::Tuple + current_index::Ref{Int} +end + +# Helper functions to make shared interpolation code which works with both +# SyntaxTree and Expr data structures. +_interp_kind(ex::SyntaxTree) = kind(ex) +function _interp_kind(@nospecialize(ex)) + return (ex isa Expr && ex.head === :quote) ? K"quote" : + (ex isa Expr && ex.head === :$) ? K"$" : + K"None" # Other cases irrelevant to interpolation +end + +_children(ex::SyntaxTree) = children(ex) +_children(@nospecialize(ex)) = ex isa Expr ? ex.args : () + +_numchildren(ex::SyntaxTree) = numchildren(ex) +_numchildren(@nospecialize(ex)) = ex isa Expr ? length(ex.args) : 0 + +_syntax_list(ctx::InterpolationContext) = SyntaxList(ctx) +_syntax_list(ctx::ExprInterpolationContext) = Any[] + +_interp_makenode(ctx::InterpolationContext, ex, args) = makenode(ctx, ex, ex, args) +_interp_makenode(ctx::ExprInterpolationContext, ex, args) = Expr((ex::Expr).head, args...) + +_to_syntax_tree(ex::SyntaxTree) = ex +_to_syntax_tree(@nospecialize(ex)) = expr_to_syntaxtree(ex) + + function _contains_active_interp(ex, depth) - k = kind(ex) + k = _interp_kind(ex) if k == K"$" && depth == 0 return true + elseif _numchildren(ex) == 0 + return false end inner_depth = k == K"quote" ? depth + 1 : k == K"$" ? depth - 1 : depth - return any(_contains_active_interp(c, inner_depth) for c in children(ex)) + return any(_contains_active_interp(c, inner_depth) for c in _children(ex)) end # Produce interpolated node for `$x` syntax -function _interpolated_value(ctx, srcref, ex) +function _interpolated_value(ctx::InterpolationContext, srcref, ex) if ex isa SyntaxTree if !is_compatible_graph(ctx, ex) ex = copy_ast(ctx, ex) end append_sourceref(ctx, ex, srcref) + elseif ex isa Symbol + # Plain symbols become identifiers. This is an accomodation for + # compatibility to allow `:x` (a Symbol) and `:(x)` (a SyntaxTree) to + # be used interchangably in macros. + makeleaf(ctx, srcref, K"Identifier", string(ex)) else makeleaf(ctx, srcref, K"Value", ex) end end -function _interpolate_ast(ctx::InterpolationContext, ex, depth) +function _interpolated_value(::ExprInterpolationContext, _, ex) + ex +end + +function copy_ast(::ExprInterpolationContext, @nospecialize(ex)) + @ccall(jl_copy_ast(ex::Any)::Any) +end + +function _interpolate_ast(ctx, ex, depth) if ctx.current_index[] > length(ctx.values) || !_contains_active_interp(ex, depth) return ex end # We have an interpolation deeper in the tree somewhere - expand to an - # expression - inner_depth = kind(ex) == K"quote" ? depth + 1 : - kind(ex) == K"$" ? depth - 1 : + # expression which performs the interpolation. + k = _interp_kind(ex) + inner_depth = k == K"quote" ? depth + 1 : + k == K"$" ? depth - 1 : depth - expanded_children = SyntaxList(ctx) - for e in children(ex) - if kind(e) == K"$" && inner_depth == 0 + + expanded_children = _syntax_list(ctx) + + for e in _children(ex) + if _interp_kind(e) == K"$" && inner_depth == 0 vals = ctx.values[ctx.current_index[]]::Tuple ctx.current_index[] += 1 for (i,v) in enumerate(vals) - srcref = numchildren(e) == 1 ? e : e[i] + srcref = _numchildren(e) == 1 ? e : _children(e)[i] push!(expanded_children, _interpolated_value(ctx, srcref, v)) end else @@ -68,10 +117,10 @@ function _interpolate_ast(ctx::InterpolationContext, ex, depth) end end - makenode(ctx, ex, head(ex), expanded_children) + _interp_makenode(ctx, ex, expanded_children) end -function interpolate_ast(::Type{SyntaxTree}, ex, values...) +function _setup_interpolation(::Type{SyntaxTree}, ex, values) # Construct graph for interpolation context. We inherit this from the macro # context where possible by detecting it using __macro_ctx__. This feels # hacky though. @@ -92,15 +141,26 @@ function interpolate_ast(::Type{SyntaxTree}, ex, values...) value=Any, name_val=String, scope_layer=LayerId) end ctx = InterpolationContext(graph, values, Ref(1)) + return ctx +end + +function _setup_interpolation(::Type{Expr}, ex, values) + return ExprInterpolationContext(values, Ref(1)) +end + +function interpolate_ast(::Type{T}, ex, values...) where {T} + ctx = _setup_interpolation(T, ex, values) + # We must copy the AST into our context to use it as the source reference - # of generated expressions. + # of generated expressions (and in the Expr case at least, to avoid mutation) ex1 = copy_ast(ctx, ex) - if kind(ex1) == K"$" + if _interp_kind(ex1) == K"$" @assert length(values) == 1 vs = values[1] if length(vs) > 1 # :($($(xs...))) where xs is more than length 1 - throw(LoweringError(ex1, "More than one value in bare `\$` expression")) + throw(LoweringError(_to_syntax_tree(ex1), + "More than one value in bare `\$` expression")) end _interpolated_value(ctx, ex1, only(vs)) else @@ -108,14 +168,6 @@ function interpolate_ast(::Type{SyntaxTree}, ex, values...) end end -function interpolate_ast(::Type{Expr}, ex, values...) - # TODO: Adjust `_interpolated_value` to ensure that incoming `Expr` data - # structures are treated as AST in Expr compat mode, rather than `K"Value"`? - # Or convert `ex` to `Expr` early during lowering and implement - # `interpolate_ast` for `Expr`? - Expr(interpolate_ast(SyntaxTree, ex, values...)) -end - #-------------------------------------------------- # Functions called by closure conversion function eval_closure_type(mod::Module, closure_type_name::Symbol, field_names, field_is_box) diff --git a/JuliaLowering/src/syntax_graph.jl b/JuliaLowering/src/syntax_graph.jl index 6a4aa5946d508..1f9367e992ab3 100644 --- a/JuliaLowering/src/syntax_graph.jl +++ b/JuliaLowering/src/syntax_graph.jl @@ -583,7 +583,14 @@ function JuliaSyntax._expr_leaf_val(ex::SyntaxTree, _...) n end else - get(ex, :value, nothing) + val = get(ex, :value, nothing) + if kind(ex) == K"Value" && val isa Expr || val isa LineNumberNode + # Expr AST embedded in a SyntaxTree should be quoted rather than + # becoming part of the output AST. + QuoteNode(val) + else + val + end end end @@ -774,6 +781,7 @@ end function Base.deleteat!(v::SyntaxList, inds) deleteat!(v.ids, inds) + v end function Base.copy(v::SyntaxList) diff --git a/JuliaLowering/test/quoting.jl b/JuliaLowering/test/quoting.jl index 887e7d1bb26e0..0c125a1d3145e 100644 --- a/JuliaLowering/test/quoting.jl +++ b/JuliaLowering/test/quoting.jl @@ -95,14 +95,14 @@ end @test ex[2].name_val == "a" # interpolations at multiple depths -ex = JuliaLowering.include_string(test_mod, """ +ex = JuliaLowering.include_string(test_mod, raw""" let args = (:(x),:(y)) quote x = 1 y = 2 quote - f(\$\$(args...)) + f($$(args...)) end end end @@ -140,23 +140,85 @@ ex2 = JuliaLowering.eval(test_mod, ex) @test JuliaLowering.include_string(test_mod, ":(x)") isa SyntaxTree # Double interpolation -ex = JuliaLowering.include_string(test_mod, """ +double_interp_ex = JuliaLowering.include_string(test_mod, raw""" let args = (:(xxx),) - :(:(\$\$(args...))) + :(:($$(args...))) end """) Base.eval(test_mod, :(xxx = 111)) -ex2 = JuliaLowering.eval(test_mod, ex) -@test kind(ex2) == K"Value" -@test ex2.value == 111 +dinterp_eval = JuliaLowering.eval(test_mod, double_interp_ex) +@test kind(dinterp_eval) == K"Value" +@test dinterp_eval.value == 111 -double_interp_ex = JuliaLowering.include_string(test_mod, """ +multi_interp_ex = JuliaLowering.include_string(test_mod, raw""" let args = (:(x), :(y)) - :(:(\$\$(args...))) + :(:($$(args...))) +end +""") +@test_throws LoweringError JuliaLowering.eval(test_mod, multi_interp_ex) + +# Interpolation of SyntaxTree Identifier vs plain Symbol +symbol_interp = JuliaLowering.include_string(test_mod, raw""" +let + x = :xx # Plain Symbol + y = :(yy) # SyntaxTree K"Identifier" + :(f($x, $y, z)) +end +""") +@test symbol_interp ≈ @ast_ [K"call" + "f"::K"Identifier" + "xx"::K"Identifier" + "yy"::K"Identifier" + "z"::K"Identifier" +] +@test sourcetext(symbol_interp[2]) == "\$x" # No provenance for plain Symbol +@test sourcetext(symbol_interp[3]) == "yy" + +# Mixing Expr into a SyntaxTree doesn't graft it onto the SyntaxTree AST but +# treats it as a plain old value. (This is the conservative API choice and also +# encourages ASTs to be written in the new form. However we may choose to +# change this if necessary for compatibility.) +expr_interp_is_value = JuliaLowering.include_string(test_mod, raw""" +let + x = Expr(:call, :f, :x) + :(g($x)) end """) -@test_throws LoweringError JuliaLowering.eval(test_mod, double_interp_ex) +@test expr_interp_is_value ≈ @ast_ [K"call" + "g"::K"Identifier" + Expr(:call, :f, :x)::K"Value" + # ^^ NB not [K"call" "f"::K"Identifier" "x"::K"Identifier"] +] +@test Expr(expr_interp_is_value) == Expr(:call, :g, QuoteNode(Expr(:call, :f, :x))) + +@testset "Interpolation in Expr compat mode" begin + expr_interp = JuliaLowering.include_string(test_mod, raw""" + let + x = :xx + :(f($x, z)) + end + """, expr_compat_mode=true) + @test expr_interp == Expr(:call, :f, :xx, :z) + + double_interp_expr = JuliaLowering.include_string(test_mod, raw""" + let + x = :xx + :(:(f($$x, $y))) + end + """, expr_compat_mode=true) + @test double_interp_expr == Expr(:quote, Expr(:call, :f, Expr(:$, :xx), Expr(:$, :y))) + + # Test that ASTs are copied before they're seen by the user + @test JuliaLowering.include_string(test_mod, raw""" + exs = [] + for i = 1:2 + push!(exs, :(f(x,y))) + push!(exs[end].args, :z) + end + exs + """, expr_compat_mode=true) == Any[Expr(:call, :f, :x, :y, :z), Expr(:call, :f, :x, :y, :z)] +end end From 16108ddbb6a89b96c903ea6ae4b7818533f7aae1 Mon Sep 17 00:00:00 2001 From: Em Chu <61633163+mlechu@users.noreply.github.com> Date: Mon, 1 Sep 2025 08:35:50 -0700 Subject: [PATCH 1056/1109] Tweak JuliaSyntax import to allow being in Base (JuliaLang/JuliaLowering.jl#58) Changes I've been using for experiments. If we ever need to do anything more complex, it's worth looking at [what Compiler.jl does](https://github.com/JuliaLang/julia/blob/339656293405169c8021ea88615049a428413c2d/Compiler/src/Compiler.jl#L3) --- JuliaLowering/src/JuliaLowering.jl | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/JuliaLowering/src/JuliaLowering.jl b/JuliaLowering/src/JuliaLowering.jl index 4d37b90f09028..03f9531c0defe 100644 --- a/JuliaLowering/src/JuliaLowering.jl +++ b/JuliaLowering/src/JuliaLowering.jl @@ -7,13 +7,17 @@ using Base const _include = Base.IncludeInto(JuliaLowering) using Core: eval -using JuliaSyntax - -using JuliaSyntax: highlight, Kind, @KSet_str -using JuliaSyntax: is_leaf, children, numchildren, head, kind, flags, has_flags, numeric_flags -using JuliaSyntax: filename, first_byte, last_byte, byte_range, sourcefile, source_location, span, sourcetext +if parentmodule(JuliaLowering) === Base + using Base.JuliaSyntax +else + using JuliaSyntax +end -using JuliaSyntax: is_literal, is_number, is_operator, is_prec_assignment, is_prefix_call, is_infix_op_call, is_postfix_op_call, is_error +using .JuliaSyntax: highlight, Kind, @KSet_str, is_leaf, children, numchildren, + head, kind, flags, has_flags, numeric_flags, filename, first_byte, + last_byte, byte_range, sourcefile, source_location, span, sourcetext, + is_literal, is_number, is_operator, is_prec_assignment, is_prefix_call, + is_infix_op_call, is_postfix_op_call, is_error _include("kinds.jl") _register_kinds() From 8ddf52d70ac3fb39523e028474241215278a21f1 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Tue, 2 Sep 2025 07:14:33 +1000 Subject: [PATCH 1057/1109] Add statment offset to branch address in CodeInfo conversion (JuliaLang/JuliaLowering.jl#60) When pushing :meta expressions to the start of the CodeInfo block we need to offset goto branch addresses in addition to SSA indices, but this was forgotten. --- JuliaLowering/src/eval.jl | 32 ++++++++++++++++---------------- JuliaLowering/test/functions.jl | 14 ++++++++++++++ 2 files changed, 30 insertions(+), 16 deletions(-) diff --git a/JuliaLowering/src/eval.jl b/JuliaLowering/src/eval.jl index 9822ff3278125..2ab8b920591f1 100644 --- a/JuliaLowering/src/eval.jl +++ b/JuliaLowering/src/eval.jl @@ -154,9 +154,9 @@ function to_code_info(ex, mod, funcname, slots) end end - ssa_offset = length(stmts) + stmt_offset = length(stmts) for stmt in children(ex) - push!(stmts, to_lowered_expr(mod, stmt, ssa_offset)) + push!(stmts, to_lowered_expr(mod, stmt, stmt_offset)) add_ir_debug_info!(current_codelocs_stack, stmt) end @@ -223,7 +223,7 @@ function to_code_info(ex, mod, funcname, slots) ) end -function to_lowered_expr(mod, ex, ssa_offset=0) +function to_lowered_expr(mod, ex, stmt_offset=0) k = kind(ex) if is_literal(k) ex.value @@ -257,9 +257,9 @@ function to_lowered_expr(mod, ex, ssa_offset=0) elseif k == K"static_parameter" Expr(:static_parameter, ex.var_id) elseif k == K"SSAValue" - Core.SSAValue(ex.var_id + ssa_offset) + Core.SSAValue(ex.var_id + stmt_offset) elseif k == K"return" - Core.ReturnNode(to_lowered_expr(mod, ex[1], ssa_offset)) + Core.ReturnNode(to_lowered_expr(mod, ex[1], stmt_offset)) elseif k == K"inert" e1 = ex[1] getmeta(ex, :as_Expr, false) ? QuoteNode(Expr(e1)) : e1 @@ -276,38 +276,38 @@ function to_lowered_expr(mod, ex, ssa_offset=0) elseif k == K"Value" ex.value elseif k == K"goto" - Core.GotoNode(ex[1].id) + Core.GotoNode(ex[1].id + stmt_offset) elseif k == K"gotoifnot" - Core.GotoIfNot(to_lowered_expr(mod, ex[1], ssa_offset), ex[2].id) + Core.GotoIfNot(to_lowered_expr(mod, ex[1], stmt_offset), ex[2].id + stmt_offset) elseif k == K"enter" catch_idx = ex[1].id numchildren(ex) == 1 ? Core.EnterNode(catch_idx) : - Core.EnterNode(catch_idx, to_lowered_expr(mod, ex[2], ssa_offset)) + Core.EnterNode(catch_idx, to_lowered_expr(mod, ex[2], stmt_offset)) elseif k == K"method" - cs = map(e->to_lowered_expr(mod, e, ssa_offset), children(ex)) + cs = map(e->to_lowered_expr(mod, e, stmt_offset), children(ex)) # Ad-hoc unwrapping to satisfy `Expr(:method)` expectations c1 = cs[1] isa QuoteNode ? cs[1].value : cs[1] Expr(:method, c1, cs[2:end]...) elseif k == K"newvar" - Core.NewvarNode(to_lowered_expr(mod, ex[1], ssa_offset)) + Core.NewvarNode(to_lowered_expr(mod, ex[1], stmt_offset)) elseif k == K"opaque_closure_method" - args = map(e->to_lowered_expr(mod, e, ssa_offset), children(ex)) + args = map(e->to_lowered_expr(mod, e, stmt_offset), children(ex)) # opaque_closure_method has special non-evaluated semantics for the # `functionloc` line number node so we need to undo a level of quoting @assert args[4] isa QuoteNode args[4] = args[4].value Expr(:opaque_closure_method, args...) elseif k == K"meta" - args = Any[to_lowered_expr(mod, e, ssa_offset) for e in children(ex)] + args = Any[to_lowered_expr(mod, e, stmt_offset) for e in children(ex)] # Unpack K"Symbol" QuoteNode as `Expr(:meta)` requires an identifier here. args[1] = args[1].value Expr(:meta, args...) elseif k == K"static_eval" @assert numchildren(ex) == 1 - to_lowered_expr(mod, ex[1], ssa_offset) + to_lowered_expr(mod, ex[1], stmt_offset) elseif k == K"cfunction" - args = Any[to_lowered_expr(mod, e, ssa_offset) for e in children(ex)] + args = Any[to_lowered_expr(mod, e, stmt_offset) for e in children(ex)] if kind(ex[2]) == K"static_eval" args[2] = QuoteNode(args[2]) end @@ -339,7 +339,7 @@ function to_lowered_expr(mod, ex, ssa_offset=0) if isnothing(head) throw(LoweringError(ex, "Unhandled form for kind $k")) end - Expr(head, map(e->to_lowered_expr(mod, e, ssa_offset), children(ex))...) + Expr(head, map(e->to_lowered_expr(mod, e, stmt_offset), children(ex))...) end end @@ -355,7 +355,7 @@ function Core.eval(mod::Module, ex::SyntaxTree; expr_compat_mode::Bool=false) return x end linear_ir = lower(mod, ex, expr_compat_mode) - expr_form = to_lowered_expr(mod, linear_ir) + expr_form = to_lowered_expr(mod, linear_ir, 0) eval(mod, expr_form) end diff --git a/JuliaLowering/test/functions.jl b/JuliaLowering/test/functions.jl index e91e5142479c5..1bfa53012b8d1 100644 --- a/JuliaLowering/test/functions.jl +++ b/JuliaLowering/test/functions.jl @@ -287,6 +287,20 @@ end """) @test only(methods(test_mod.f_slotflags)).called == 0b0100 + # Branching combined with nospecialize meta in CodeInfo + @test JuliaLowering.include_string(test_mod, """ + begin + function f_branch_meta(@nospecialize(x), cond) + if cond + x + 1 + else + x + 2 + end + end + + (f_branch_meta(10, false), f_branch_meta(20, true)) + end + """) == (12, 21) end @testset "Keyword functions" begin From a0345481d02d986bebd55c73f18a938c895014aa Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Tue, 2 Sep 2025 12:10:26 +1000 Subject: [PATCH 1058/1109] Tools for automatic test case reduction (JuliaLang/JuliaLowering.jl#61) Adds a few tools for verbose debug lowering and automatic test case reduction. --- JuliaLowering/test/demo.jl | 76 ++++++++++++++++++--------- JuliaLowering/test/utils.jl | 101 +++++++++++++++++++++++++++++++++++- 2 files changed, 152 insertions(+), 25 deletions(-) diff --git a/JuliaLowering/test/demo.jl b/JuliaLowering/test/demo.jl index 1601bf9245355..a34e0ebca97a4 100644 --- a/JuliaLowering/test/demo.jl +++ b/JuliaLowering/test/demo.jl @@ -3,7 +3,7 @@ using JuliaSyntax using JuliaLowering -using JuliaLowering: SyntaxGraph, SyntaxTree, ensure_attributes!, ensure_attributes, newnode!, setchildren!, is_leaf, children, child, setattr!, sourceref, makenode, sourcetext, showprov, lookup_binding +using JuliaLowering: SyntaxGraph, SyntaxTree, ensure_attributes!, ensure_attributes, newnode!, setchildren!, is_leaf, @ast, numchildren, children, child, setattr!, sourceref, makenode, sourcetext, showprov, lookup_binding using JuliaSyntaxFormatter @@ -32,6 +32,37 @@ function formatsrc(ex; kws...) Text(JuliaSyntaxFormatter.formatsrc(ex; kws...)) end +function debug_lower(mod, ex; expr_compat_mode=false, verbose=false, do_eval=false) + ctx1, ex_macroexpand = JuliaLowering.expand_forms_1(mod, ex, expr_compat_mode) + + verbose && @info "Macro expanded" formatsrc(ex_macroexpand, color_by=:scope_layer) + + ctx2, ex_desugar = JuliaLowering.expand_forms_2(ctx1, ex_macroexpand) + verbose && @info "Desugared" formatsrc(ex_desugar, color_by=:scope_layer) + + ctx3, ex_scoped = JuliaLowering.resolve_scopes(ctx2, ex_desugar) + verbose && @info "Resolved scopes" formatsrc(ex_scoped, color_by=e->var_kind(ctx2,e)) + + ctx4, ex_converted = JuliaLowering.convert_closures(ctx3, ex_scoped) + verbose && @info "Closure converted" formatsrc(ex_converted, color_by=:var_id) + + ctx5, ex_compiled = JuliaLowering.linearize_ir(ctx4, ex_converted) + verbose && @info "Linear IR" formatsrc(ex_compiled, color_by=:var_id) Text(sprint(JuliaLowering.print_ir, ex_compiled)) + + ex_expr = JuliaLowering.to_lowered_expr(mod, ex_compiled) + verbose && @info "CodeInfo" ex_expr + + if do_eval + eval_result = Base.eval(mod, ex_expr) + verbose && @info "Eval" eval_result + else + eval_result = nothing + end + + (ctx1, ex_macroexpand, ctx2, ex_desugar, ctx3, ex_scoped, ctx4, ex_converted, ctx5, ex_compiled, ex_expr, eval_result) +end + + # Currently broken - need to push info back onto src # function annotate_scopes(mod, ex) # ex = ensure_attributes(ex, var_id=Int) @@ -853,31 +884,28 @@ end """ ex = parsestmt(SyntaxTree, src, filename="foo.jl") -ex = ensure_attributes(ex, var_id=Int) +#ex = ensure_attributes(ex, var_id=Int) #ex = softscope_test(ex) @info "Input code" formatsrc(ex) -in_mod = M -# in_mod=Main -ctx1, ex_macroexpand = JuliaLowering.expand_forms_1(in_mod, ex, false) -@info "Macro expanded" formatsrc(ex_macroexpand, color_by=:scope_layer) -#@info "Macro expanded" formatsrc(ex_macroexpand, color_by=e->JuliaLowering.flattened_provenance(e)[1:end-1]) - -ctx2, ex_desugar = JuliaLowering.expand_forms_2(ctx1, ex_macroexpand) -@info "Desugared" formatsrc(ex_desugar, color_by=:scope_layer) - -ctx3, ex_scoped = JuliaLowering.resolve_scopes(ctx2, ex_desugar) -@info "Resolved scopes" formatsrc(ex_scoped, color_by=e->var_kind(ctx2,e)) - -ctx4, ex_converted = JuliaLowering.convert_closures(ctx3, ex_scoped) -@info "Closure converted" formatsrc(ex_converted, color_by=:var_id) - -ctx5, ex_compiled = JuliaLowering.linearize_ir(ctx4, ex_converted) -@info "Linear IR" formatsrc(ex_compiled, color_by=:var_id) Text(sprint(JuliaLowering.print_ir, ex_compiled)) - -ex_expr = JuliaLowering.to_lowered_expr(in_mod, ex_compiled) -@info "CodeInfo" ex_expr +(ctx1, ex_macroexpand, + ctx2, ex_desugar, + ctx3, ex_scoped, + ctx4, ex_converted, + ctx5, ex_compiled, + ex_expr, eval_result) = debug_lower(M, ex; verbose=true) + +# Automatic test reduction +# bad = reduce_any_failing_toplevel(JuliaLowering, joinpath(@__DIR__, "../src/desugaring.jl")) +# if !isnothing(bad) +# @error "Reduced expression as code" formatsrc(bad) +# write("bad.jl", JuliaSyntaxFormatter.formatsrc(bad)) +# end -eval_result = Base.eval(in_mod, ex_expr) -@info "Eval" eval_result +# Old lowering +# text = read(joinpath(@__DIR__, "../src/desugaring.jl"), String) +# ex = parseall(SyntaxTree, text, filename="desugaring.jl") +# for e in Meta.parseall(text).args +# Meta.lower(JuliaLowering, e) +# end diff --git a/JuliaLowering/test/utils.jl b/JuliaLowering/test/utils.jl index bf59f62e13c7b..6134876ef4268 100644 --- a/JuliaLowering/test/utils.jl +++ b/JuliaLowering/test/utils.jl @@ -18,7 +18,7 @@ using JuliaLowering: makenode, makeleaf, setattr!, sethead!, is_leaf, numchildren, children, @ast, flattened_provenance, showprov, LoweringError, MacroExpansionError, - syntax_graph, Bindings, ScopeLayer + syntax_graph, Bindings, ScopeLayer, mapchildren function _ast_test_graph() graph = SyntaxGraph() @@ -285,3 +285,102 @@ function docstrings_equal(d1, d2; debug=true) end docstrings_equal(d1::Docs.DocStr, d2) = docstrings_equal(Docs.parsedoc(d1), d2) +#------------------------------------------------------------------------------- +# Tools for test case reduction + +function block_reduction_1(is_lowering_error::Function, orig_ex::ST, ex::ST, + curr_path = Int[]) where {ST <: SyntaxTree} + if !is_leaf(ex) + if kind(ex) == K"block" + for i in 1:numchildren(ex) + trial_ex = delete_block_child(orig_ex, orig_ex, curr_path, i) + if is_lowering_error(trial_ex) + # @info "Reduced expression" curr_path i + return trial_ex + end + end + end + for (i,e) in enumerate(children(ex)) + push!(curr_path, i) + res = block_reduction_1(is_lowering_error, orig_ex, e, curr_path) + if !isnothing(res) + return res + end + pop!(curr_path) + end + end + return nothing +end + +# Find children of all `K"block"`s in an expression and try deleting them while +# preserving the invariant `is_lowering_error(reduced) == true`. +function block_reduction(is_lowering_error, ex) + reduced = ex + was_reduced = false + while true + r = block_reduction_1(is_lowering_error, reduced, reduced) + if isnothing(r) + return (reduced, was_reduced) + end + reduced = r + was_reduced = true + end +end + +function delete_block_child(ctx, ex, block_path, child_idx, depth=1) + if depth > length(block_path) + cs = copy(children(ex)) + deleteat!(cs, child_idx) + @ast ctx ex [ex cs...] + else + j = block_path[depth] + mapchildren(ctx, ex, j:j) do e + delete_block_child(ctx, e, block_path, child_idx, depth+1) + end + end +end + +function throws_lowering_exc(mod, ex) + try + debug_lower(mod, ex) + return false + catch exc + if exc isa LoweringError + return true + else + rethrow() + end + end +end + +# Parse a file and lower the top level expression one child at a time, finding +# any top level statement that fails lowering and producing a partially reduced +# test case. +function reduce_any_failing_toplevel(mod, filename; do_eval=false) + text = read(filename, String) + ex0 = parseall(SyntaxTree, text; filename) + for ex in children(ex0) + try + ex_compiled = JuliaLowering.lower(mod, ex) + ex_expr = JuliaLowering.to_lowered_expr(mod, ex_compiled) + if do_eval + Base.eval(mod, ex_expr) + end + catch exc + @error "Failure lowering code" ex + if !(exc isa LoweringError) + rethrow() + end + (reduced,was_reduced) = block_reduction(e->throws_lowering_exc(mod,e), ex) + if !was_reduced + @info "No reduction possible" + return ex + else + @info "Reduced code" reduced + return reduced + end + end + end + nothing +end + From 51b4ab445bc1d6ad5985caa1ce29c2444326d7fe Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Wed, 3 Sep 2025 04:05:14 +1000 Subject: [PATCH 1059/1109] Fix for Expr->SyntaxTree conversion with interpolated field name (JuliaLang/JuliaLowering.jl#64) The second argument of `K"."` is implicitly `K"inert"` in our representation so we need to remove `QuoteNode` whenever it's present in the Expr form. --- JuliaLowering/src/compat.jl | 2 +- JuliaLowering/test/compat.jl | 2 ++ JuliaLowering/test/quoting.jl | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/JuliaLowering/src/compat.jl b/JuliaLowering/src/compat.jl index 24b75108c3d79..692811f9e9ed3 100644 --- a/JuliaLowering/src/compat.jl +++ b/JuliaLowering/src/compat.jl @@ -256,7 +256,7 @@ function _insert_convert_expr(@nospecialize(e), graph::SyntaxGraph, src::SourceA st_k = K"dotcall" tuple_exprs = collect_expr_parameters(a2, 1) child_exprs = pushfirst!(tuple_exprs, e.args[1]) - elseif a2 isa QuoteNode && a2.value isa Symbol + elseif a2 isa QuoteNode child_exprs[2] = a2.value end elseif e.head === :for diff --git a/JuliaLowering/test/compat.jl b/JuliaLowering/test/compat.jl index f73628db89181..bafc27f5108ff 100644 --- a/JuliaLowering/test/compat.jl +++ b/JuliaLowering/test/compat.jl @@ -300,6 +300,8 @@ const JL = JuliaLowering "module A end", "baremodule A end", "import A", + "A.x", + "A.\$x", ] for p in programs diff --git a/JuliaLowering/test/quoting.jl b/JuliaLowering/test/quoting.jl index 0c125a1d3145e..80344f0e3bfc0 100644 --- a/JuliaLowering/test/quoting.jl +++ b/JuliaLowering/test/quoting.jl @@ -88,7 +88,7 @@ end ex = JuliaLowering.include_string(test_mod, """ let field_name = :(a) - :(a.\$field_name) + :(x.\$field_name) end """) @test kind(ex[2]) == K"Identifier" From 315a394e09cf7200c63112aacdcb1698004a6f8a Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Wed, 3 Sep 2025 11:57:17 +1000 Subject: [PATCH 1060/1109] Fixes for escape handling in `Expr->SyntaxTree` conversion (JuliaLang/JuliaLowering.jl#62) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The manual `esc()` system allows macros to emit `Expr(:escape)` in almost any location within the AST so we generally need to unwrap and rewrap such escapes when pattern matching the incoming `Expr` tree during conversion to `SyntaxTree`. Also apply the same treatment to `Expr(:hygienic-scope)` in case sombody passes us code from `macroexpand()` or explicitly uses this form. Also fix some other issues found during testing of these changes: * `finally` block shouldn't be added if it's set to `false` in the Expr form * The test implementation of `≈` for ASTs was very buggy due to a previous refactor. Fix this. * Expr form allows any callable object as the macro name, so we can't really reject much here - remove the test for that. * Do not format Expr to string as part of conversion - this could be expensive and also fails in Base in some strange cases of esc() placement (eg, ncat dim argument) * Filter out `K"TOMBSTONE"` inside `expr_to_syntaxtree()` because this can end up in value position which is an error later in linear IR generation. I tacked on a change to convert `@ doc` -> `K"doc"` here as well so that we can use the improved documentation lowering rather than the doc macro's partial reimplementation of lowering. --- JuliaLowering/src/ast.jl | 3 +- JuliaLowering/src/compat.jl | 147 ++++++++++++++------- JuliaLowering/test/compat.jl | 246 +++++++++++++++++++++++++++++++++-- JuliaLowering/test/utils.jl | 6 +- 4 files changed, 336 insertions(+), 66 deletions(-) diff --git a/JuliaLowering/src/ast.jl b/JuliaLowering/src/ast.jl index a0e65ce189fb3..6ae7ebf7751ea 100644 --- a/JuliaLowering/src/ast.jl +++ b/JuliaLowering/src/ast.jl @@ -142,7 +142,8 @@ end function makeleaf(ctx, srcref, k::Kind, value; kws...) graph = syntax_graph(ctx) if k == K"Identifier" || k == K"core" || k == K"top" || k == K"Symbol" || - k == K"globalref" || k == K"Placeholder" + k == K"globalref" || k == K"Placeholder" || k == K"MacroName" || + k == "StringMacroName" || k == K"CmdMacroName" makeleaf(graph, srcref, k; name_val=value, kws...) elseif k == K"BindingId" makeleaf(graph, srcref, k; var_id=value, kws...) diff --git a/JuliaLowering/src/compat.jl b/JuliaLowering/src/compat.jl index 692811f9e9ed3..d617eda685fbd 100644 --- a/JuliaLowering/src/compat.jl +++ b/JuliaLowering/src/compat.jl @@ -34,7 +34,7 @@ function expr_to_syntaxtree(ctx, @nospecialize(e), lnn::Union{LineNumberNode, No toplevel_src = if isnothing(lnn) # Provenance sinkhole for all nodes until we hit a linenode dummy_src = SourceRef( - SourceFile("No source for expression: $e"), + SourceFile("No source for expression"), 1, JS.GreenNode(K"None", 0)) _insert_tree_node(graph, K"None", dummy_src) else @@ -91,21 +91,23 @@ function collect_expr_parameters(e::Expr, pos::Int) args = Any[e.args[1:pos-1]..., e.args[pos+1:end]...] return _flatten_params!(args, params) end -function _flatten_params!(out::Vector{Any}, p::Expr) +function _flatten_params!(out::Vector{Any}, params::Expr) + p,p_esc = unwrap_esc(params) p1 = expr_parameters(p, 1) if !isnothing(p1) - push!(out, Expr(:parameters, p.args[2:end]...)) - _flatten_params!(out, p1) + push!(out, p_esc(Expr(:parameters, p.args[2:end]...))) + _flatten_params!(out, p_esc(p1)) else - push!(out, p::Any) + push!(out, params::Any) end return out end function expr_parameters(p::Expr, pos::Int) - if length(p.args) >= pos && - p.args[pos] isa Expr && - p.args[pos].head === :parameters - return p.args[pos] + if pos <= length(p.args) + e,_ = unwrap_esc(p.args[pos]) + if e isa Expr && e.head === :parameters + return p.args[pos] + end end return nothing end @@ -113,7 +115,10 @@ end """ If `b` (usually a block) has exactly one non-LineNumberNode argument, unwrap it. """ -function maybe_unwrap_arg(b::Expr) +function maybe_unwrap_arg(b) + if !(b isa Expr) + return b + end e1 = findfirst(c -> !isa(c, LineNumberNode), b.args) isnothing(e1) && return b e2 = findfirst(c -> !isa(c, LineNumberNode), b.args[e1+1:end]) @@ -122,7 +127,7 @@ function maybe_unwrap_arg(b::Expr) end function maybe_extract_lnn(b, default) - !(b isa Expr) && return b + !(b isa Expr) && return default lnn_i = findfirst(a->isa(a, LineNumberNode), b.args) return isnothing(lnn_i) ? default : b.args[lnn_i] end @@ -141,7 +146,33 @@ end function is_eventually_call(e) return e isa Expr && (e.head === :call || - e.head in (:where, :(::)) && is_eventually_call(e.args[1])) + e.head in (:escape, :where, :(::)) && is_eventually_call(e.args[1])) +end + +function rewrap_escapes(hyg, ex) + if hyg isa Expr && hyg.head in (:escape, :var"hygienic-scope") + ex = Expr(hyg.head, rewrap_escapes(hyg.args[1], ex)) + if hyg.head === :var"hygienic-scope" + append!(ex.args, @view hyg.args[2:end]) + end + end + return ex +end + +# Unwrap Expr(:escape) and Expr(:hygienic-scope). Return the unwrapped +# expression and a function which will rewrap a derived expression in the +# correct hygiene wrapper. +function unwrap_esc(ex) + orig_ex = ex + while ex isa Expr && ex.head in (:escape, :var"hygienic-scope") + @assert length(ex.args) >= 1 + ex = ex.args[1] + end + return ex, e->rewrap_escapes(orig_ex, e) +end + +function unwrap_esc_(e) + unwrap_esc(e)[1] end """ @@ -212,59 +243,61 @@ function _insert_convert_expr(@nospecialize(e), graph::SyntaxGraph, src::SourceA child_exprs = Any[e.args[1], Symbol(op), e.args[2]] elseif e.head === :comparison for i = 2:2:length(child_exprs) - op = child_exprs[i] + op,op_esc = unwrap_esc(child_exprs[i]) @assert op isa Symbol op_s = string(op) if is_dotted_operator(op_s) - child_exprs[i] = Expr(:., Symbol(op_s[2:end])) + child_exprs[i] = Expr(:., op_esc(Symbol(op_s[2:end]))) end end elseif e.head === :macrocall @assert nargs >= 2 - a1 = e.args[1] + a1,a1_esc = unwrap_esc(e.args[1]) child_exprs = collect_expr_parameters(e, 3) if child_exprs[2] isa LineNumberNode src = child_exprs[2] end deleteat!(child_exprs, 2) if a1 isa Symbol - child_exprs[1] = Expr(:MacroName, a1) - elseif a1 isa Expr && a1.head === :(.) && a1.args[2] isa QuoteNode - child_exprs[1] = Expr(:(.), a1.args[1], Expr(:MacroName, a1.args[2].value)) - elseif a1 isa GlobalRef + child_exprs[1] = a1_esc(Expr(:MacroName, a1)) + elseif a1 isa Expr && a1.head === :(.) + a12,a12_esc = unwrap_esc(a1.args[2]) + if a12 isa QuoteNode + child_exprs[1] = a1_esc(Expr(:(.), a1.args[1], + Expr(:MacroName, a12_esc(a12.value)))) + end + elseif a1 isa GlobalRef && a1.mod === Core # TODO (maybe): syntax-introduced macrocalls are listed here for # reference. We probably don't need to convert these. if a1.name === Symbol("@cmd") elseif a1.name === Symbol("@doc") + st_k = K"doc" + child_exprs = child_exprs[2:end] elseif a1.name === Symbol("@int128_str") elseif a1.name === Symbol("@int128_str") elseif a1.name === Symbol("@big_str") end - elseif a1 isa Function - # pass - else - error("Unknown macrocall form at $src: $(sprint(dump, e))") - @assert false end elseif e.head === Symbol("'") @assert nargs === 1 st_k = K"call" child_exprs = Any[e.head, e.args[1]] elseif e.head === :. && nargs === 2 - a2 = e.args[2] + a2, a2_esc = unwrap_esc(e.args[2]) if a2 isa Expr && a2.head === :tuple st_k = K"dotcall" - tuple_exprs = collect_expr_parameters(a2, 1) + tuple_exprs = collect_expr_parameters(a2_esc(a2), 1) child_exprs = pushfirst!(tuple_exprs, e.args[1]) elseif a2 isa QuoteNode - child_exprs[2] = a2.value + child_exprs[2] = a2_esc(a2.value) end elseif e.head === :for @assert nargs === 2 child_exprs = Any[_to_iterspec(Any[e.args[1]], false), e.args[2]] elseif e.head === :where @assert nargs >= 2 - if !(e.args[2] isa Expr && e.args[2].head === :braces) + e2,_ = unwrap_esc(e.args[2]) + if !(e2 isa Expr && e2.head === :braces) child_exprs = Any[e.args[1], Expr(:braces, e.args[2:end]...)] end elseif e.head in (:tuple, :vect, :braces) @@ -281,18 +314,19 @@ function _insert_convert_expr(@nospecialize(e), graph::SyntaxGraph, src::SourceA # [catch var (block ...)] # [else (block ...)] # [finally (block ...)]) - if e.args[2] != false || e.args[3] != false + e2 = unwrap_esc_(e.args[2]) + e3 = unwrap_esc_(e.args[3]) + if e2 !== false || e3 !== false push!(child_exprs, Expr(:catch, - e.args[2] === false ? Expr(:catch_var_placeholder) : e.args[2], - e.args[3] === false ? nothing : e.args[3])) + e2 === false ? Expr(:catch_var_placeholder) : e.args[2], + e3 === false ? nothing : e.args[3])) end if nargs >= 5 push!(child_exprs, Expr(:else, e.args[5])) end - if nargs >= 4 - push!(child_exprs, - Expr(:finally, e.args[4] === false ? nothing : e.args[4])) + if nargs >= 4 && unwrap_esc_(e.args[4]) !== false + push!(child_exprs, Expr(:finally, e.args[4])) end elseif e.head === :flatten || e.head === :generator st_k = K"generator" @@ -307,36 +341,37 @@ function _insert_convert_expr(@nospecialize(e), graph::SyntaxGraph, src::SourceA push!(child_exprs, _to_iterspec(next.args[2:end], true)) pushfirst!(child_exprs, next.args[1]) elseif e.head === :ncat || e.head === :nrow - dim = popfirst!(child_exprs) + dim = unwrap_esc_(popfirst!(child_exprs)) st_flags |= JS.set_numeric_flags(dim) elseif e.head === :typed_ncat - st_flags |= JS.set_numeric_flags(e.args[2]) + st_flags |= JS.set_numeric_flags(unwrap_esc_(e.args[2])) deleteat!(child_exprs, 2) elseif e.head === :(->) @assert nargs === 2 - if e.args[1] isa Expr && e.args[1].head === :block + a1, a1_esc = unwrap_esc(e.args[1]) + if a1 isa Expr && a1.head === :block # Expr parsing fails to make :parameters here... lam_args = Any[] lam_eqs = Any[] - for a in e.args[1].args + for a in a1.args a isa Expr && a.head === :(=) ? push!(lam_eqs, a) : push!(lam_args, a) end !isempty(lam_eqs) && push!(lam_args, Expr(:parameters, lam_eqs...)) - child_exprs[1] = Expr(:tuple, lam_args...) - elseif !(e.args[1] isa Expr && (e.args[1].head in (:tuple, :where))) - child_exprs[1] = Expr(:tuple, e.args[1]) + child_exprs[1] = a1_esc(Expr(:tuple, lam_args...)) + elseif !(a1 isa Expr && (a1.head in (:tuple, :where))) + child_exprs[1] = a1_esc(Expr(:tuple, a1)) end src = maybe_extract_lnn(e.args[2], src) child_exprs[2] = maybe_unwrap_arg(e.args[2]) elseif e.head === :call child_exprs = collect_expr_parameters(e, 2) - a1 = child_exprs[1] + a1,a1_esc = unwrap_esc(child_exprs[1]) if a1 isa Symbol a1s = string(a1) if is_dotted_operator(a1s) # non-assigning dotop like .+ or .== st_k = K"dotcall" - child_exprs[1] = Symbol(a1s[2:end]) + child_exprs[1] = a1_esc(Symbol(a1s[2:end])) end end elseif e.head === :function @@ -362,17 +397,20 @@ function _insert_convert_expr(@nospecialize(e), graph::SyntaxGraph, src::SourceA # SyntaxTree: # (call f args... (do (tuple lam_args...) (block ...))) callargs = collect_expr_parameters(e.args[1], 2) - fname = string(callargs[1]) if e.args[1].head === :macrocall st_k = K"macrocall" - callargs[1] = Expr(:MacroName, callargs[1]) + c1,c1_esc = unwrap_esc(callargs[1]) + callargs[1] = c1_esc(Expr(:MacroName, c1)) else st_k = K"call" end child_exprs = Any[callargs..., Expr(:do_lambda, e.args[2].args...)] elseif e.head === :let - if nargs >= 1 && !(e.args[1] isa Expr && e.args[1].head === :block) - child_exprs[1] = Expr(:block, e.args[1]) + if nargs >= 1 + a1,_ = unwrap_esc(e.args[1]) + if !(a1 isa Expr && a1.head === :block) + child_exprs[1] = Expr(:block, e.args[1]) + end end elseif e.head === :struct e.args[1] && (st_flags |= JS.MUTABLE_FLAG) @@ -404,6 +442,12 @@ function _insert_convert_expr(@nospecialize(e), graph::SyntaxGraph, src::SourceA st_k = K"latestworld_if_toplevel" elseif e.head === Symbol("hygienic-scope") st_k = K"hygienic_scope" + elseif e.head === :escape + if length(e.args) == 1 && unwrap_esc_(e.args[1]) isa LineNumberNode + # escape containing only a LineNumberNode will become empty and + # thus must be removed before lowering sees it. + st_k = K"TOMBSTONE" + end elseif e.head === :meta # Messy and undocumented. Only sometimes we want a K"meta". @assert e.args[1] isa Symbol @@ -490,10 +534,12 @@ function _insert_convert_expr(@nospecialize(e), graph::SyntaxGraph, src::SourceA end #--------------------------------------------------------------------------- - # Throw if this script isn't complete. Finally, insert a new node into the + # Throw if this function isn't complete. Finally, insert a new node into the # graph and recurse on child_exprs if st_k === K"None" error("Unknown expr head at $src: `$(e.head)`\n$(sprint(dump, e))") + elseif st_k === K"TOMBSTONE" + return nothing, src end st_id = _insert_tree_node(graph, st_k, src, st_flags; st_attrs...) @@ -503,7 +549,6 @@ function _insert_convert_expr(@nospecialize(e), graph::SyntaxGraph, src::SourceA if isnothing(child_exprs) return st_id, src else - setflags!(graph, st_id, st_flags) st_child_ids, last_src = _insert_child_exprs(child_exprs, graph, src) setchildren!(graph, st_id, st_child_ids) return st_id, last_src @@ -519,7 +564,9 @@ function _insert_child_exprs(child_exprs::Vector{Any}, graph::SyntaxGraph, last_src = c else (c_id, c_src) = _insert_convert_expr(c, graph, last_src) - push!(st_child_ids, c_id) + if !isnothing(c_id) + push!(st_child_ids, c_id) + end last_src = something(c_src, src) end end diff --git a/JuliaLowering/test/compat.jl b/JuliaLowering/test/compat.jl index bafc27f5108ff..0d945c85651ba 100644 --- a/JuliaLowering/test/compat.jl +++ b/JuliaLowering/test/compat.jl @@ -302,15 +302,33 @@ const JL = JuliaLowering "import A", "A.x", "A.\$x", + "try x catch e; y end", + "try x finally y end", + "try x catch e; y finally z end", + "try x catch e; y else z end", + "try x catch e; y else z finally w end", ] for p in programs - @testset "`$p`" begin + @testset "`$(repr(p))`" begin st_good = JS.parsestmt(JL.SyntaxTree, p; ignore_errors=true) st_test = JL.expr_to_syntaxtree(Expr(st_good)) @test st_roughly_equal(;st_good, st_test) end end + + # toplevel has a special parsing mode where docstrings and a couple of + # other things are enabled + toplevel_programs = [ + "\"docstr\"\nthing_to_be_documented", + ] + for p in toplevel_programs + @testset "`$(repr(p))`" begin + st_good = JS.parseall(JL.SyntaxTree, p; ignore_errors=true) + st_test = JL.expr_to_syntaxtree(Expr(st_good)) + @test st_roughly_equal(;st_good, st_test) + end + end end @testset "provenance via scavenging for LineNumberNodes" begin @@ -364,17 +382,22 @@ const JL = JuliaLowering st = JuliaLowering.expr_to_syntaxtree(ex, LineNumberNode(1)) # sanity: ensure we're testing the tree we expect - @test kind(st) === K"block" - @test kind(st[1]) === K"try" - @test kind(st[1][1]) === K"block" - @test kind(st[1][1][1]) === K"Identifier" && st[1][1][1].name_val === "maybe" - @test kind(st[1][1][2]) === K"Identifier" && st[1][1][2].name_val === "lots" - @test kind(st[1][1][3]) === K"Identifier" && st[1][1][3].name_val === "of" - @test kind(st[1][1][4]) === K"Identifier" && st[1][1][4].name_val === "lines" - @test kind(st[1][2]) === K"catch" - @test kind(st[1][2][1]) === K"Identifier" && st[1][2][1].name_val === "exc" - @test kind(st[1][2][2]) === K"block" - @test kind(st[1][2][2][1]) === K"Identifier" && st[1][2][2][1].name_val === "y" + @test st ≈ @ast_ [K"block" + [K"try" + [K"block" + "maybe"::K"Identifier" + "lots"::K"Identifier" + "of"::K"Identifier" + "lines"::K"Identifier" + ] + [K"catch" + "exc"::K"Identifier" + [K"block" + "y"::K"Identifier" + ] + ] + ] + ] @test let lnn = st.source; lnn isa LineNumberNode && lnn.line === 1; end @test let lnn = st[1].source; lnn isa LineNumberNode && lnn.line === 2; end @@ -388,5 +411,204 @@ const JL = JuliaLowering @test let lnn = st[1][2][2].source; lnn isa LineNumberNode && lnn.line === 6; end @test let lnn = st[1][2][2][1].source; lnn isa LineNumberNode && lnn.line === 8; end + st_shortfunc = JuliaLowering.expr_to_syntaxtree( + Expr(:block, + LineNumberNode(11), + Expr(:(=), + Expr(:call, :f), + :body)) + ) + @test st_shortfunc ≈ @ast_ [K"block" + [K"function" + [K"call" "f"::K"Identifier"] + "body"::K"Identifier" + ] + ] + @test let lnn = st_shortfunc[1][1].source; lnn isa LineNumberNode && lnn.line === 11; end + + st_shortfunc_2 = JuliaLowering.expr_to_syntaxtree( + Expr(:block, + LineNumberNode(11), + Expr(:(=), + Expr(:call, :f), + Expr(:block, + LineNumberNode(22), + :body))) + ) + @test st_shortfunc_2 ≈ @ast_ [K"block" + [K"function" + [K"call" "f"::K"Identifier"] + "body"::K"Identifier" + ] + ] + @test let lnn = st_shortfunc_2[1][1].source; lnn isa LineNumberNode && lnn.line === 22; end + end + + @testset "`Expr(:escape)` handling" begin + # `x.y` with quoted y escaped (this esc does nothing, but is permitted by + # the existing expander) + @test JuliaLowering.expr_to_syntaxtree(Expr(:(.), :x, esc(QuoteNode(:y)))) ≈ + @ast_ [K"." + "x"::K"Identifier" + [K"escape" + "y"::K"Identifier" + ] + ] + + # `f(x; y)` with parameters escaped + @test JuliaLowering.expr_to_syntaxtree(Expr(:call, :f, esc(Expr(:parameters, :y)), :x)) ≈ + @ast_ [K"call" + "f"::K"Identifier" + "x"::K"Identifier" + [K"escape" + [K"parameters" + "y"::K"Identifier" + ] + ] + ] + + # `.+(x)` with operator escaped + @test JuliaLowering.expr_to_syntaxtree(Expr(:call, esc(Symbol(".+")), :x)) ≈ + @ast_ [K"dotcall" + [K"escape" "+"::K"Identifier"] + "x"::K"Identifier" + ] + + # `let x \n end` with binding escaped + @test JuliaLowering.expr_to_syntaxtree(Expr(:let, esc(:x), Expr(:block))) ≈ + @ast_ [K"let" + [K"block" [K"escape" "x"::K"Identifier"]] + [K"block"] + ] + + # `x .+ y` with .+ escaped + @test JuliaLowering.expr_to_syntaxtree(Expr(:comparison, :x, esc(Symbol(".+")), :y)) ≈ + @ast_ [K"comparison" + "x"::K"Identifier" + [K"." + [K"escape" "+"::K"Identifier"] + ] + "y"::K"Identifier" + ] + + # `@mac x` with macro name escaped + @test JuliaLowering.expr_to_syntaxtree(Expr(:macrocall, esc(Symbol("@mac")), nothing, :x)) ≈ + @ast_ [K"macrocall" + [K"escape" "@mac"::K"MacroName"] + "x"::K"Identifier" + ] + + # `@mac x` with macro name escaped + @test JuliaLowering.expr_to_syntaxtree( + Expr(:macrocall, esc(Expr(:(.), :A, QuoteNode(Symbol("@mac")))), nothing, :x) + ) ≈ @ast_ [K"macrocall" + [K"escape" + [K"." + "A"::K"Identifier" + "@mac"::K"MacroName" + ] + ] + "x"::K"Identifier" + ] + + # `x where y` + @test JuliaLowering.expr_to_syntaxtree(Expr(:where, :x, esc(:y))) ≈ + @ast_ [K"where" + "x"::K"Identifier" + [K"braces" + [K"escape" "y"::K"Identifier"] + ] + ] + + # Some weirdly placed esc's in try-catch + # `try body1 catch exc \n end` + @test JuliaLowering.expr_to_syntaxtree(Expr(:try, :body1, :exc, esc(false))) ≈ + @ast_ [K"try" + "body1"::K"Identifier" + [K"catch" + "exc"::K"Identifier" + "nothing"::K"core" + ] + ] + # `try body1 catch \n body2 \n end` + @test JuliaLowering.expr_to_syntaxtree(Expr(:try, :body1, esc(false), :body2)) ≈ + @ast_ [K"try" + "body1"::K"Identifier" + [K"catch" + ""::K"Placeholder" + "body2"::K"Identifier" + ] + ] + # `try body1 finally body2 end` + @test JuliaLowering.expr_to_syntaxtree(Expr(:try, :body1, esc(false), esc(false), :body2)) ≈ + @ast_ [K"try" + "body1"::K"Identifier" + [K"finally" + "body2"::K"Identifier" + ] + ] + + # `try body1 finally body2 end` + @test JuliaLowering.expr_to_syntaxtree(Expr(:try, :body1, esc(false), esc(false), esc(false), :body2)) ≈ + @ast_ [K"try" + "body1"::K"Identifier" + [K"else" + "body2"::K"Identifier" + ] + ] + + # [x ;;; y] with dim escaped + @test JuliaLowering.expr_to_syntaxtree(Expr(:ncat, esc(3), :x, :y)) ≈ + @ast_ [K"ncat"(syntax_flags=JuliaSyntax.set_numeric_flags(3)) + "x"::K"Identifier" + "y"::K"Identifier" + ] + + # T[x ;;; y] with dim escaped + @test JuliaLowering.expr_to_syntaxtree(Expr(:typed_ncat, :T, esc(3), :x, :y)) ≈ + @ast_ [K"typed_ncat"(syntax_flags=JuliaSyntax.set_numeric_flags(3)) + "T"::K"Identifier" + "x"::K"Identifier" + "y"::K"Identifier" + ] + + # One example of hygienic-scope (handled with the same mechanism as escape) + @test JuliaLowering.expr_to_syntaxtree( + Expr(:macrocall, Expr(:var"hygienic-scope", Symbol("@mac"), :other, :args), nothing, :x)) ≈ + @ast_ [K"macrocall" + [K"hygienic_scope" + "@mac"::K"MacroName" + "other"::K"Identifier" # (<- normally a Module) + "args"::K"Identifier" # (<- normally a LineNumberNode) + ] + "x"::K"Identifier" + ] + + # One example of double escaping + @test JuliaLowering.expr_to_syntaxtree(Expr(:macrocall, esc(esc(Symbol("@mac"))), nothing, :x)) ≈ + @ast_ [K"macrocall" + [K"escape" [K"escape" "@mac"::K"MacroName"]] + "x"::K"Identifier" + ] + + # One example of nested escape and hygienic-scope + @test JuliaLowering.expr_to_syntaxtree( + Expr(:macrocall, + Expr(:var"hygienic-scope", esc(Symbol("@mac")), :other, :args), + nothing, + :x)) ≈ + @ast_ [K"macrocall" + [K"hygienic_scope" + [K"escape" + "@mac"::K"MacroName" + ] + "other"::K"Identifier" # (<- normally a Module) + "args"::K"Identifier" # (<- normally a LineNumberNode) + ] + "x"::K"Identifier" + ] + + @test JuliaLowering.expr_to_syntaxtree(Expr(:block, esc(LineNumberNode(1)))) ≈ @ast_ [K"block"] + end end diff --git a/JuliaLowering/test/utils.jl b/JuliaLowering/test/utils.jl index 6134876ef4268..7cc0783d1149e 100644 --- a/JuliaLowering/test/utils.jl +++ b/JuliaLowering/test/utils.jl @@ -49,13 +49,13 @@ function ≈(ex1, ex2) return false end if is_leaf(ex1) + return get(ex1, :value, nothing) == get(ex2, :value, nothing) && + get(ex1, :name_val, nothing) == get(ex2, :name_val, nothing) + else if numchildren(ex1) != numchildren(ex2) return false end return all(c1 ≈ c2 for (c1,c2) in zip(children(ex1), children(ex2))) - else - return get(ex1, :value, nothing) == get(ex2, :value, nothing) && - get(ex1, :name_val, nothing) == get(ex2, :name_val, nothing) end end From 7799b0bbb19669d41478da9fc5022c23ff3526fc Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Wed, 3 Sep 2025 18:46:32 +1000 Subject: [PATCH 1061/1109] World age fixes for macro expansion and generated functions (JuliaLang/JuliaLowering.jl#63) * Support for expanding macros in a specific world age * Connect macro expansion world to Core hook * Fixes for CodeInfo generation in GeneratedFunctionStub - ensure `isva` and edges to bindings are set as is done in Base. * Test non-generated branch of generated function --- JuliaLowering/src/eval.jl | 10 ++++---- JuliaLowering/src/hooks.jl | 2 +- JuliaLowering/src/macro_expansion.jl | 24 +++++++++--------- JuliaLowering/src/runtime.jl | 28 ++++++++++++++++----- JuliaLowering/src/syntax_graph.jl | 2 +- JuliaLowering/test/demo.jl | 2 +- JuliaLowering/test/functions.jl | 23 +++++++++++++---- JuliaLowering/test/macros.jl | 37 +++++++++++++++++++++------- 8 files changed, 88 insertions(+), 40 deletions(-) diff --git a/JuliaLowering/src/eval.jl b/JuliaLowering/src/eval.jl index 2ab8b920591f1..27d976e705646 100644 --- a/JuliaLowering/src/eval.jl +++ b/JuliaLowering/src/eval.jl @@ -1,5 +1,5 @@ -function lower(mod::Module, ex0, expr_compat_mode=false) - ctx1, ex1 = expand_forms_1( mod, ex0, expr_compat_mode) +function lower(mod::Module, ex0; expr_compat_mode=false, world=Base.get_world_counter()) + ctx1, ex1 = expand_forms_1( mod, ex0, expr_compat_mode, world) ctx2, ex2 = expand_forms_2( ctx1, ex1) ctx3, ex3 = resolve_scopes( ctx2, ex2) ctx4, ex4 = convert_closures(ctx3, ex3) @@ -7,8 +7,8 @@ function lower(mod::Module, ex0, expr_compat_mode=false) ex5 end -function macroexpand(mod::Module, ex, expr_compat_mode=false) - ctx1, ex1 = expand_forms_1(mod, ex, expr_compat_mode) +function macroexpand(mod::Module, ex; expr_compat_mode=false, world=Base.get_world_counter()) + ctx1, ex1 = expand_forms_1(mod, ex, expr_compat_mode, world) ex1 end @@ -354,7 +354,7 @@ function Core.eval(mod::Module, ex::SyntaxTree; expr_compat_mode::Bool=false) end return x end - linear_ir = lower(mod, ex, expr_compat_mode) + linear_ir = lower(mod, ex; expr_compat_mode) expr_form = to_lowered_expr(mod, linear_ir, 0) eval(mod, expr_form) end diff --git a/JuliaLowering/src/hooks.jl b/JuliaLowering/src/hooks.jl index f030923284bfe..3ef25fe78a97a 100644 --- a/JuliaLowering/src/hooks.jl +++ b/JuliaLowering/src/hooks.jl @@ -17,7 +17,7 @@ function core_lowering_hook(@nospecialize(code), mod::Module, st0 = code isa Expr ? expr_to_syntaxtree(code, LineNumberNode(line, file)) : code try - ctx1, st1 = expand_forms_1( mod, st0, true) + ctx1, st1 = expand_forms_1( mod, st0, true, world) ctx2, st2 = expand_forms_2( ctx1, st1) ctx3, st3 = resolve_scopes( ctx2, st2) ctx4, st4 = convert_closures(ctx3, st3) diff --git a/JuliaLowering/src/macro_expansion.jl b/JuliaLowering/src/macro_expansion.jl index d6ab9815ad809..4b2160c0bcef5 100644 --- a/JuliaLowering/src/macro_expansion.jl +++ b/JuliaLowering/src/macro_expansion.jl @@ -21,11 +21,12 @@ struct MacroExpansionContext{GraphType} <: AbstractLoweringContext scope_layers::Vector{ScopeLayer} scope_layer_stack::Vector{LayerId} expr_compat_mode::Bool + macro_world::UInt end -function MacroExpansionContext(graph::SyntaxGraph, mod::Module, expr_compat_mode::Bool) +function MacroExpansionContext(graph::SyntaxGraph, mod::Module, expr_compat_mode::Bool, world::UInt) layers = ScopeLayer[ScopeLayer(1, mod, 0, false)] - MacroExpansionContext(graph, Bindings(), layers, LayerId[length(layers)], expr_compat_mode) + MacroExpansionContext(graph, Bindings(), layers, LayerId[length(layers)], expr_compat_mode, world) end current_layer(ctx::MacroExpansionContext) = ctx.scope_layers[last(ctx.scope_layer_stack)] @@ -217,11 +218,10 @@ function expand_macro(ctx, ex) # age changes concurrently. # # TODO: Allow this to be passed in - macro_world = Base.get_world_counter() - if hasmethod(macfunc, Tuple{typeof(mctx), typeof.(raw_args)...}; world=macro_world) + if hasmethod(macfunc, Tuple{typeof(mctx), typeof.(raw_args)...}; world=ctx.macro_world) macro_args = prepare_macro_args(ctx, mctx, raw_args) expanded = try - Base.invoke_in_world(macro_world, macfunc, macro_args...) + Base.invoke_in_world(ctx.macro_world, macfunc, macro_args...) catch exc newexc = exc isa MacroExpansionError ? MacroExpansionError(mctx, exc.ex, exc.msg, exc.position, exc.err) : @@ -260,14 +260,14 @@ function expand_macro(ctx, ex) push!(macro_args, Expr(arg)) end expanded = try - Base.invoke_in_world(macro_world, macfunc, macro_args...) + Base.invoke_in_world(ctx.macro_world, macfunc, macro_args...) catch exc if exc isa MethodError && exc.f === macfunc - if !isempty(methods_in_world(macfunc, Tuple{typeof(mctx), Vararg{Any}}, macro_world)) + if !isempty(methods_in_world(macfunc, Tuple{typeof(mctx), Vararg{Any}}, ctx.macro_world)) # If the macro has at least some methods implemented in the # new style, assume the user meant to call one of those # rather than any old-style macro methods which might exist - exc = MethodError(macfunc, (prepare_macro_args(ctx, mctx, raw_args)..., ), macro_world) + exc = MethodError(macfunc, (prepare_macro_args(ctx, mctx, raw_args)..., ), ctx.macro_world) end end rethrow(MacroExpansionError(mctx, ex, "Error expanding macro", :all, exc)) @@ -280,7 +280,7 @@ function expand_macro(ctx, ex) # Module scope for the returned AST is the module where this particular # method was defined (may be different from `parentmodule(macfunc)`) mod_for_ast = lookup_method_instance(macfunc, macro_args, - macro_world).def.module + ctx.macro_world).def.module new_layer = ScopeLayer(length(ctx.scope_layers)+1, mod_for_ast, current_layer_id(ctx), true) push!(ctx.scope_layers, new_layer) @@ -460,18 +460,18 @@ function expand_forms_1(ctx::MacroExpansionContext, ex::SyntaxTree) end end -function expand_forms_1(mod::Module, ex::SyntaxTree, expr_compat_mode::Bool) +function expand_forms_1(mod::Module, ex::SyntaxTree, expr_compat_mode::Bool, macro_world::UInt) graph = ensure_attributes(syntax_graph(ex), var_id=IdTag, scope_layer=LayerId, __macro_ctx__=Nothing, meta=CompileHints) - ctx = MacroExpansionContext(graph, mod, expr_compat_mode) + ctx = MacroExpansionContext(graph, mod, expr_compat_mode, macro_world) ex2 = expand_forms_1(ctx, reparent(ctx, ex)) graph2 = delete_attributes(graph, :__macro_ctx__) # TODO: Returning the context with pass-specific mutable data is a bad way # to carry state into the next pass. We might fix this by attaching such # data to the graph itself as global attributes? - ctx2 = MacroExpansionContext(graph2, ctx.bindings, ctx.scope_layers, LayerId[], expr_compat_mode) + ctx2 = MacroExpansionContext(graph2, ctx.bindings, ctx.scope_layers, LayerId[], expr_compat_mode, macro_world) return ctx2, reparent(ctx2, ex2) end diff --git a/JuliaLowering/src/runtime.jl b/JuliaLowering/src/runtime.jl index a88a5966fb974..69f1a5c3fd147 100644 --- a/JuliaLowering/src/runtime.jl +++ b/JuliaLowering/src/runtime.jl @@ -338,12 +338,9 @@ function (g::GeneratedFunctionStub)(world::UInt, source::Method, @nospecialize a # # TODO: Reduce duplication where possible. - mod = parentmodule(g.gen) - # Attributes from parsing graph = ensure_attributes(SyntaxGraph(), kind=Kind, syntax_flags=UInt16, source=SourceAttrType, value=Any, name_val=String) - # Attributes for macro expansion graph = ensure_attributes(graph, var_id=IdTag, @@ -355,8 +352,11 @@ function (g::GeneratedFunctionStub)(world::UInt, source::Method, @nospecialize a is_toplevel_thunk=Bool ) - # Macro expansion - ctx1 = MacroExpansionContext(graph, mod, false) + # Macro expansion. Looking at Core.GeneratedFunctionStub, it seems that + # macros emitted by the generator are currently expanded in the latest + # world, so do that for compatibility. + macro_world = typemax(UInt) + ctx1 = MacroExpansionContext(graph, source.module, false, macro_world) # Run code generator - this acts like a macro expander and like a macro # expander it gets a MacroContext. @@ -375,7 +375,8 @@ function (g::GeneratedFunctionStub)(world::UInt, source::Method, @nospecialize a # Expand any macros emitted by the generator ex1 = expand_forms_1(ctx1, reparent(ctx1, ex0)) ctx1 = MacroExpansionContext(delete_attributes(graph, :__macro_ctx__), - ctx1.bindings, ctx1.scope_layers, LayerId[], false) + ctx1.bindings, ctx1.scope_layers, + LayerId[], false, macro_world) ex1 = reparent(ctx1, ex1) # Desugaring @@ -398,6 +399,21 @@ function (g::GeneratedFunctionStub)(world::UInt, source::Method, @nospecialize a ctx5, ex5 = linearize_ir(ctx4, ex4) ci = to_lowered_expr(mod, ex5) @assert ci isa Core.CodeInfo + + # See GeneratedFunctionStub code in base/expr.jl + ci.isva = source.isva + code = ci.code + bindings = IdSet{Core.Binding}() + for i = 1:length(code) + stmt = code[i] + if isa(stmt, GlobalRef) + push!(bindings, convert(Core.Binding, stmt)) + end + end + if !isempty(bindings) + ci.edges = Core.svec(bindings...) + end + return ci end diff --git a/JuliaLowering/src/syntax_graph.jl b/JuliaLowering/src/syntax_graph.jl index 1f9367e992ab3..05ff9005f7344 100644 --- a/JuliaLowering/src/syntax_graph.jl +++ b/JuliaLowering/src/syntax_graph.jl @@ -689,7 +689,7 @@ macro SyntaxTree(ex_old) ex = _find_SyntaxTree_macro(full_ex, __source__.line) # 4. Do the first step of JuliaLowering's syntax lowering to get # syntax interpolations to work - _, ex1 = expand_forms_1(__module__, ex, false) + _, ex1 = expand_forms_1(__module__, ex, false, Base.tls_world_age()) @assert kind(ex1) == K"call" && ex1[1].value == interpolate_ast Expr(:call, :interpolate_ast, SyntaxTree, ex1[3][1], map(e->_scope_layer_1_to_esc!(Expr(e)), ex1[4:end])...) diff --git a/JuliaLowering/test/demo.jl b/JuliaLowering/test/demo.jl index a34e0ebca97a4..af7c0631665d7 100644 --- a/JuliaLowering/test/demo.jl +++ b/JuliaLowering/test/demo.jl @@ -33,7 +33,7 @@ function formatsrc(ex; kws...) end function debug_lower(mod, ex; expr_compat_mode=false, verbose=false, do_eval=false) - ctx1, ex_macroexpand = JuliaLowering.expand_forms_1(mod, ex, expr_compat_mode) + ctx1, ex_macroexpand = JuliaLowering.expand_forms_1(mod, ex, expr_compat_mode, Base.get_world_counter()) verbose && @info "Macro expanded" formatsrc(ex_macroexpand, color_by=:scope_layer) diff --git a/JuliaLowering/test/functions.jl b/JuliaLowering/test/functions.jl index 1bfa53012b8d1..42c45ee5ca3db 100644 --- a/JuliaLowering/test/functions.jl +++ b/JuliaLowering/test/functions.jl @@ -444,19 +444,32 @@ end function f_partially_gen(x::NTuple{N,T}) where {N,T} shared = :shared_stuff if @generated + if N == 2 + error("intentionally broken codegen (will trigger nongen branch)") + end quote - unshared = ($x, $N, $T) + unshared = (:gen, ($x, $N, $T)) end else - # Uuuum. How do we actually test both sides of this branch??? - unshared = :nongen # (typeof(x), N, T) + unshared = (:nongen, (typeof(x), N, T)) end (shared, unshared) end - f_partially_gen((1,2,3,4,5)) + (f_partially_gen((1,2)), f_partially_gen((1,2,3,4,5))) end - """) == (:shared_stuff, (NTuple{5,Int}, 5, Int)) + """) == ((:shared_stuff, (:nongen, (NTuple{2,Int}, 2, Int))), + (:shared_stuff, (:gen, (NTuple{5,Int}, 5, Int)))) + + # Test generated function edges to bindings + # (see also https://github.com/JuliaLang/julia/pull/57230) + JuliaLowering.include_string(test_mod, raw""" + const delete_me = 4 + @generated f_generated_return_delete_me() = return :(delete_me) + """) + @test test_mod.f_generated_return_delete_me() == 4 + Base.delete_binding(test_mod, :delete_me) + @test_throws UndefVarError test_mod.f_generated_return_delete_me() end @testset "Broadcast" begin diff --git a/JuliaLowering/test/macros.jl b/JuliaLowering/test/macros.jl index a23327748106f..76833c4255205 100644 --- a/JuliaLowering/test/macros.jl +++ b/JuliaLowering/test/macros.jl @@ -1,8 +1,6 @@ -module macros +@testset "macro tests" begin -using JuliaLowering, Test - -module test_mod end +test_mod = Module(:macro_test) JuliaLowering.include_string(test_mod, raw""" module M @@ -110,12 +108,33 @@ M.@recursive 3 """) == (3, (2, (1, 0))) ex = JuliaLowering.parsestmt(JuliaLowering.SyntaxTree, "M.@outer()", filename="foo.jl") -ctx, expanded = JuliaLowering.expand_forms_1(test_mod, ex, false) +ctx, expanded = JuliaLowering.expand_forms_1(test_mod, ex, false, Base.get_world_counter()) @test JuliaLowering.sourcetext.(JuliaLowering.flattened_provenance(expanded[2])) == [ "M.@outer()" "@inner" "y" ] + +# World age support for macro expansion +JuliaLowering.include_string(test_mod, raw""" +macro world_age_test() + :(world1) +end +""") +world1 = Base.get_world_counter() +JuliaLowering.include_string(test_mod, raw""" +macro world_age_test() + :(world2) +end +""") +world2 = Base.get_world_counter() + +call_world_arg_test = JuliaLowering.parsestmt(JuliaLowering.SyntaxTree, "@world_age_test()") +@test JuliaLowering.expand_forms_1(test_mod, call_world_arg_test, false, world1)[2] ≈ + @ast_ "world1"::K"Identifier" +@test JuliaLowering.expand_forms_1(test_mod, call_world_arg_test, false, world2)[2] ≈ + @ast_ "world2"::K"Identifier" + # Layer parenting @test expanded[1].scope_layer == 2 @test expanded[2].scope_layer == 3 @@ -233,7 +252,7 @@ end catch exc sprint(showerror, exc) end == """ -MacroExpansionError while expanding @oldstyle_error in module Main.macros.test_mod: +MacroExpansionError while expanding @oldstyle_error in module Main.macro_test: @oldstyle_error └─────────────┘ ── Error expanding macro Caused by: @@ -290,10 +309,10 @@ end err = try JuliaLowering.include_string(test_mod, "@sig_mismatch(1, 2, 3, 4)") === 1 catch exc - sprint(showerror, exc, context=:module=>@__MODULE__) + sprint(showerror, exc, context=:module=>test_mod) end @test startswith(err, """ - MacroExpansionError while expanding @sig_mismatch in module Main.macros.test_mod: + MacroExpansionError while expanding @sig_mismatch in module Main.macro_test: @sig_mismatch(1, 2, 3, 4) └───────────────────────┘ ── Error expanding macro Caused by: @@ -329,4 +348,4 @@ end """) === (false, false, false, false) end -end # module macros +end From 3322bfa9d042f88996fe8b167efcb62151e62735 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Thu, 4 Sep 2025 02:14:37 +1000 Subject: [PATCH 1062/1109] Custom cmd macro support + tests for cmd and string macros (JuliaLang/JuliaLowering.jl#66) --- JuliaLowering/src/ast.jl | 2 +- JuliaLowering/src/macro_expansion.jl | 2 +- JuliaLowering/test/macros_ir.jl | 40 ++++++++++++++++++++++++++++ 3 files changed, 42 insertions(+), 2 deletions(-) diff --git a/JuliaLowering/src/ast.jl b/JuliaLowering/src/ast.jl index 6ae7ebf7751ea..9c4f9f5b9331e 100644 --- a/JuliaLowering/src/ast.jl +++ b/JuliaLowering/src/ast.jl @@ -143,7 +143,7 @@ function makeleaf(ctx, srcref, k::Kind, value; kws...) graph = syntax_graph(ctx) if k == K"Identifier" || k == K"core" || k == K"top" || k == K"Symbol" || k == K"globalref" || k == K"Placeholder" || k == K"MacroName" || - k == "StringMacroName" || k == K"CmdMacroName" + k == K"StringMacroName" || k == K"CmdMacroName" makeleaf(graph, srcref, k; name_val=value, kws...) elseif k == K"BindingId" makeleaf(graph, srcref, k; var_id=value, kws...) diff --git a/JuliaLowering/src/macro_expansion.jl b/JuliaLowering/src/macro_expansion.jl index 4b2160c0bcef5..ec6174598499a 100644 --- a/JuliaLowering/src/macro_expansion.jl +++ b/JuliaLowering/src/macro_expansion.jl @@ -334,7 +334,7 @@ function expand_forms_1(ctx::MacroExpansionContext, ex::SyntaxTree) layerid = get(ex, :scope_layer, current_layer_id(ctx)) makeleaf(ctx, ex, ex, kind=K"Identifier", scope_layer=layerid) end - elseif k == K"Identifier" || k == K"MacroName" || k == K"StringMacroName" + elseif k == K"Identifier" || k == K"MacroName" || k == K"StringMacroName" || k == K"CmdMacroName" layerid = get(ex, :scope_layer, current_layer_id(ctx)) makeleaf(ctx, ex, ex, kind=K"Identifier", scope_layer=layerid) elseif k == K"var" || k == K"char" || k == K"parens" diff --git a/JuliaLowering/test/macros_ir.jl b/JuliaLowering/test/macros_ir.jl index 2598a4a92197b..506a6cca5de79 100644 --- a/JuliaLowering/test/macros_ir.jl +++ b/JuliaLowering/test/macros_ir.jl @@ -15,6 +15,22 @@ module MacroMethods end end +macro strmac_str(ex, suff=nothing) + s = "$(ex[1].value) from strmac" + if !isnothing(suff) + s = "$s with suffix $(suff.value)" + end + s +end + +macro cmdmac_cmd(ex, suff=nothing) + s = "$(ex[1].value) from cmdmac" + if !isnothing(suff) + s = "$s with suffix $(suff.value)" + end + s +end + #******************************************************************************* ######################################## # Simple macro @@ -147,3 +163,27 @@ Suggestion: check for spelling errors or missing imports. 5 (call %₁ %₄) 6 (return %₅) +######################################## +# Simple string macro +strmac"hello" +#--------------------- +1 (return "hello from strmac") + +######################################## +# String macro with suffix +strmac"hello"blah +#--------------------- +1 (return "hello from strmac with suffix blah") + +######################################## +# Simple cmd macro +cmdmac`hello` +#--------------------- +1 (return "hello from cmdmac") + +######################################## +# Cmd macro with suffix +cmdmac`hello`12345 +#--------------------- +1 (return "hello from cmdmac with suffix 12345") + From d458e6b4f56041e693286507a12e9303f4c30ac7 Mon Sep 17 00:00:00 2001 From: Em Chu <61633163+mlechu@users.noreply.github.com> Date: Wed, 3 Sep 2025 16:16:38 -0700 Subject: [PATCH 1063/1109] Add timing `@zone`s for profiling each pass (JuliaLang/JuliaLowering.jl#69) --- JuliaLowering/src/closure_conversion.jl | 2 +- JuliaLowering/src/compat.jl | 2 +- JuliaLowering/src/desugaring.jl | 2 +- JuliaLowering/src/eval.jl | 32 ++++++++++++++----------- JuliaLowering/src/linear_ir.jl | 3 +-- JuliaLowering/src/macro_expansion.jl | 2 +- JuliaLowering/src/scope_analysis.jl | 2 +- JuliaLowering/src/utils.jl | 14 +++++++++++ 8 files changed, 38 insertions(+), 21 deletions(-) diff --git a/JuliaLowering/src/closure_conversion.jl b/JuliaLowering/src/closure_conversion.jl index 3ac34effb1bbd..d2820c08387c9 100644 --- a/JuliaLowering/src/closure_conversion.jl +++ b/JuliaLowering/src/closure_conversion.jl @@ -580,7 +580,7 @@ Invariants: * This pass must not introduce new K"Identifier" - only K"BindingId". * Any new binding IDs must be added to the enclosing lambda locals """ -function convert_closures(ctx::VariableAnalysisContext, ex) +@fzone "JL: closures" function convert_closures(ctx::VariableAnalysisContext, ex) ctx = ClosureConversionCtx(ctx.graph, ctx.bindings, ctx.mod, ctx.closure_bindings, ex.lambda_bindings) ex1 = closure_convert_lambda(ctx, ex) diff --git a/JuliaLowering/src/compat.jl b/JuliaLowering/src/compat.jl index d617eda685fbd..850c97711017b 100644 --- a/JuliaLowering/src/compat.jl +++ b/JuliaLowering/src/compat.jl @@ -29,7 +29,7 @@ function expr_to_syntaxtree(@nospecialize(e), lnn::Union{LineNumberNode, Nothing expr_to_syntaxtree(graph, e, lnn) end -function expr_to_syntaxtree(ctx, @nospecialize(e), lnn::Union{LineNumberNode, Nothing}) +@fzone "JL: expr_to_syntaxtree" function expr_to_syntaxtree(ctx, @nospecialize(e), lnn::Union{LineNumberNode, Nothing}) graph = syntax_graph(ctx) toplevel_src = if isnothing(lnn) # Provenance sinkhole for all nodes until we hit a linenode diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 105b2168573ec..87f4c7ffaded8 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -4586,7 +4586,7 @@ function expand_forms_2(ctx::StatementListCtx, args...) expand_forms_2(ctx.ctx, args...) end -function expand_forms_2(ctx::MacroExpansionContext, ex::SyntaxTree) +@fzone "JL: desugar" function expand_forms_2(ctx::MacroExpansionContext, ex::SyntaxTree) ctx1 = DesugaringContext(ctx, ctx.expr_compat_mode) ex1 = expand_forms_2(ctx1, reparent(ctx1, ex)) ctx1, ex1 diff --git a/JuliaLowering/src/eval.jl b/JuliaLowering/src/eval.jl index 27d976e705646..102215ce5cf5e 100644 --- a/JuliaLowering/src/eval.jl +++ b/JuliaLowering/src/eval.jl @@ -156,7 +156,7 @@ function to_code_info(ex, mod, funcname, slots) stmt_offset = length(stmts) for stmt in children(ex) - push!(stmts, to_lowered_expr(mod, stmt, stmt_offset)) + push!(stmts, _to_lowered_expr(mod, stmt, stmt_offset)) add_ir_debug_info!(current_codelocs_stack, stmt) end @@ -223,7 +223,11 @@ function to_code_info(ex, mod, funcname, slots) ) end -function to_lowered_expr(mod, ex, stmt_offset=0) +@fzone "JL: to_lowered_expr" function to_lowered_expr(mod, ex) + _to_lowered_expr(mod, ex, 0) +end + +function _to_lowered_expr(mod, ex, stmt_offset) k = kind(ex) if is_literal(k) ex.value @@ -259,7 +263,7 @@ function to_lowered_expr(mod, ex, stmt_offset=0) elseif k == K"SSAValue" Core.SSAValue(ex.var_id + stmt_offset) elseif k == K"return" - Core.ReturnNode(to_lowered_expr(mod, ex[1], stmt_offset)) + Core.ReturnNode(_to_lowered_expr(mod, ex[1], stmt_offset)) elseif k == K"inert" e1 = ex[1] getmeta(ex, :as_Expr, false) ? QuoteNode(Expr(e1)) : e1 @@ -278,36 +282,36 @@ function to_lowered_expr(mod, ex, stmt_offset=0) elseif k == K"goto" Core.GotoNode(ex[1].id + stmt_offset) elseif k == K"gotoifnot" - Core.GotoIfNot(to_lowered_expr(mod, ex[1], stmt_offset), ex[2].id + stmt_offset) + Core.GotoIfNot(_to_lowered_expr(mod, ex[1], stmt_offset), ex[2].id + stmt_offset) elseif k == K"enter" catch_idx = ex[1].id numchildren(ex) == 1 ? Core.EnterNode(catch_idx) : - Core.EnterNode(catch_idx, to_lowered_expr(mod, ex[2], stmt_offset)) + Core.EnterNode(catch_idx, _to_lowered_expr(mod, ex[2], stmt_offset)) elseif k == K"method" - cs = map(e->to_lowered_expr(mod, e, stmt_offset), children(ex)) + cs = map(e->_to_lowered_expr(mod, e, stmt_offset), children(ex)) # Ad-hoc unwrapping to satisfy `Expr(:method)` expectations c1 = cs[1] isa QuoteNode ? cs[1].value : cs[1] Expr(:method, c1, cs[2:end]...) elseif k == K"newvar" - Core.NewvarNode(to_lowered_expr(mod, ex[1], stmt_offset)) + Core.NewvarNode(_to_lowered_expr(mod, ex[1], stmt_offset)) elseif k == K"opaque_closure_method" - args = map(e->to_lowered_expr(mod, e, stmt_offset), children(ex)) + args = map(e->_to_lowered_expr(mod, e, stmt_offset), children(ex)) # opaque_closure_method has special non-evaluated semantics for the # `functionloc` line number node so we need to undo a level of quoting @assert args[4] isa QuoteNode args[4] = args[4].value Expr(:opaque_closure_method, args...) elseif k == K"meta" - args = Any[to_lowered_expr(mod, e, stmt_offset) for e in children(ex)] + args = Any[_to_lowered_expr(mod, e, stmt_offset) for e in children(ex)] # Unpack K"Symbol" QuoteNode as `Expr(:meta)` requires an identifier here. args[1] = args[1].value Expr(:meta, args...) elseif k == K"static_eval" @assert numchildren(ex) == 1 - to_lowered_expr(mod, ex[1], stmt_offset) + _to_lowered_expr(mod, ex[1], stmt_offset) elseif k == K"cfunction" - args = Any[to_lowered_expr(mod, e, stmt_offset) for e in children(ex)] + args = Any[_to_lowered_expr(mod, e, stmt_offset) for e in children(ex)] if kind(ex[2]) == K"static_eval" args[2] = QuoteNode(args[2]) end @@ -339,13 +343,13 @@ function to_lowered_expr(mod, ex, stmt_offset=0) if isnothing(head) throw(LoweringError(ex, "Unhandled form for kind $k")) end - Expr(head, map(e->to_lowered_expr(mod, e, stmt_offset), children(ex))...) + Expr(head, map(e->_to_lowered_expr(mod, e, stmt_offset), children(ex))...) end end #------------------------------------------------------------------------------- # Our version of eval takes our own data structures -function Core.eval(mod::Module, ex::SyntaxTree; expr_compat_mode::Bool=false) +@fzone "JL: eval" function Core.eval(mod::Module, ex::SyntaxTree; expr_compat_mode::Bool=false) k = kind(ex) if k == K"toplevel" x = nothing @@ -355,7 +359,7 @@ function Core.eval(mod::Module, ex::SyntaxTree; expr_compat_mode::Bool=false) return x end linear_ir = lower(mod, ex; expr_compat_mode) - expr_form = to_lowered_expr(mod, linear_ir, 0) + expr_form = to_lowered_expr(mod, linear_ir) eval(mod, expr_form) end diff --git a/JuliaLowering/src/linear_ir.jl b/JuliaLowering/src/linear_ir.jl index b7c11678a5cd3..fe1dacafbc7e5 100644 --- a/JuliaLowering/src/linear_ir.jl +++ b/JuliaLowering/src/linear_ir.jl @@ -1109,7 +1109,7 @@ Most of the compliexty of this pass is in lowering structured control flow (if, loops, etc) to gotos and exception handling to enter/leave. We also convert `K"BindingId"` into K"slot", `K"globalref"` or `K"SSAValue` as appropriate. """ -function linearize_ir(ctx, ex) +@fzone "JL: linearize" function linearize_ir(ctx, ex) graph = ensure_attributes(ctx.graph, slots=Vector{Slot}, mod=Module, @@ -1127,4 +1127,3 @@ function linearize_ir(ctx, ex) res = compile_lambda(_ctx, reparent(_ctx, ex)) _ctx, res end - diff --git a/JuliaLowering/src/macro_expansion.jl b/JuliaLowering/src/macro_expansion.jl index ec6174598499a..d233418279d44 100644 --- a/JuliaLowering/src/macro_expansion.jl +++ b/JuliaLowering/src/macro_expansion.jl @@ -460,7 +460,7 @@ function expand_forms_1(ctx::MacroExpansionContext, ex::SyntaxTree) end end -function expand_forms_1(mod::Module, ex::SyntaxTree, expr_compat_mode::Bool, macro_world::UInt) +@fzone "JL: macroexpand" function expand_forms_1(mod::Module, ex::SyntaxTree, expr_compat_mode::Bool, macro_world::UInt) graph = ensure_attributes(syntax_graph(ex), var_id=IdTag, scope_layer=LayerId, diff --git a/JuliaLowering/src/scope_analysis.jl b/JuliaLowering/src/scope_analysis.jl index 59d8bfa515b70..f13c41c143356 100644 --- a/JuliaLowering/src/scope_analysis.jl +++ b/JuliaLowering/src/scope_analysis.jl @@ -802,7 +802,7 @@ metadata about each binding. This pass also records the set of binding IDs used locally within the enclosing lambda form and information about variables captured by closures. """ -function resolve_scopes(ctx::DesugaringContext, ex) +@fzone "JL: resolve_scopes" function resolve_scopes(ctx::DesugaringContext, ex) ctx2 = ScopeResolutionContext(ctx) ex2 = resolve_scopes(ctx2, reparent(ctx2, ex)) ctx3 = VariableAnalysisContext(ctx2.graph, ctx2.bindings, ctx2.mod, ex2.lambda_bindings) diff --git a/JuliaLowering/src/utils.jl b/JuliaLowering/src/utils.jl index 15c1b27b14db9..a3807ae24a09e 100644 --- a/JuliaLowering/src/utils.jl +++ b/JuliaLowering/src/utils.jl @@ -169,3 +169,17 @@ function _print_ir(io::IO, ex, indent) end end end + +# Wrap a function body in Base.Compiler.@zone for profiling +if isdefined(Base.Compiler, Symbol("@zone")) + macro fzone(str, f) + @assert f isa Expr && f.head === :function && length(f.args) === 2 && str isa String + esc(Expr(:function, f.args[1], + # Use source of our caller, not of this macro. + Expr(:macrocall, :(Base.Compiler.var"@zone"), __source__, str, f.args[2]))) + end +else + macro fzone(str, f) + esc(f) + end +end From d81c5a592357d6c270fb93b66928c607e724121a Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Thu, 4 Sep 2025 11:23:47 +1000 Subject: [PATCH 1064/1109] Separate `JuliaLowering.eval()` from `Core.eval()` (JuliaLang/JuliaLowering.jl#70) Overloading `Core.eval()` with `SyntaxTree` was a cute trick but I believe it causes invalidations in the include machinery (Base.IncludeInto) and doesn't really integrate JuliaLowering properly because it doesn't let us implement `Core.eval(::Module, ::Expr)`. Instead we need hooks to do the integration properly. For now, implementing `eval` as a separate function helps quite a lot with precompile time - about a 15% reduction on my machine. --- JuliaLowering/src/JuliaLowering.jl | 3 +-- JuliaLowering/src/eval.jl | 8 ++++---- JuliaLowering/src/macro_expansion.jl | 2 +- JuliaLowering/src/runtime.jl | 8 ++++---- JuliaLowering/test/scopes.jl | 2 +- JuliaLowering/test/utils.jl | 2 +- 6 files changed, 12 insertions(+), 13 deletions(-) diff --git a/JuliaLowering/src/JuliaLowering.jl b/JuliaLowering/src/JuliaLowering.jl index 03f9531c0defe..d8ff05dd013e8 100644 --- a/JuliaLowering/src/JuliaLowering.jl +++ b/JuliaLowering/src/JuliaLowering.jl @@ -1,11 +1,10 @@ +# Use a baremodule because we're implementing `include` and `eval` baremodule JuliaLowering -# ^ Use baremodule because we're implementing `Base.include` and `Core.eval`. using Base # We define a separate _include() for use in this module to avoid mixing method # tables with the public `JuliaLowering.include()` API const _include = Base.IncludeInto(JuliaLowering) -using Core: eval if parentmodule(JuliaLowering) === Base using Base.JuliaSyntax diff --git a/JuliaLowering/src/eval.jl b/JuliaLowering/src/eval.jl index 102215ce5cf5e..8233fcf31e6e8 100644 --- a/JuliaLowering/src/eval.jl +++ b/JuliaLowering/src/eval.jl @@ -59,7 +59,7 @@ else end) end - eval(@__MODULE__, code) + Core.eval(@__MODULE__, code) end end @@ -349,7 +349,7 @@ end #------------------------------------------------------------------------------- # Our version of eval takes our own data structures -@fzone "JL: eval" function Core.eval(mod::Module, ex::SyntaxTree; expr_compat_mode::Bool=false) +@fzone "JL: eval" function eval(mod::Module, ex::SyntaxTree; expr_compat_mode::Bool=false) k = kind(ex) if k == K"toplevel" x = nothing @@ -359,8 +359,8 @@ end return x end linear_ir = lower(mod, ex; expr_compat_mode) - expr_form = to_lowered_expr(mod, linear_ir) - eval(mod, expr_form) + thunk = to_lowered_expr(mod, linear_ir) + Core.eval(mod, thunk) end """ diff --git a/JuliaLowering/src/macro_expansion.jl b/JuliaLowering/src/macro_expansion.jl index d233418279d44..d946b32009477 100644 --- a/JuliaLowering/src/macro_expansion.jl +++ b/JuliaLowering/src/macro_expansion.jl @@ -149,7 +149,7 @@ function eval_macro_name(ctx::MacroExpansionContext, mctx::MacroContext, ex::Syn mod = current_layer(ctx).mod expr_form = to_lowered_expr(mod, ex5) try - eval(mod, expr_form) + Core.eval(mod, expr_form) catch err throw(MacroExpansionError(mctx, ex, "Macro not found", :all, err)) end diff --git a/JuliaLowering/src/runtime.jl b/JuliaLowering/src/runtime.jl index 69f1a5c3fd147..81e83b995734d 100644 --- a/JuliaLowering/src/runtime.jl +++ b/JuliaLowering/src/runtime.jl @@ -229,7 +229,7 @@ function eval_module(parentmod, modname, expr_compat_mode, body) # mod = @ccall jl_new_module(Symbol(modname)::Symbol, parentmod::Module)::Any # ... name = Symbol(modname) - eval(parentmod, :( + Core.eval(parentmod, :( baremodule $name $eval($name, $body; expr_compat_mode=$expr_compat_mode) end @@ -246,7 +246,7 @@ function eval_import(imported::Bool, to::Module, from::Union{Expr, Nothing}, pat ex = isnothing(from) ? Expr(head, paths...) : Expr(head, Expr(Symbol(":"), from, paths...)) - Base.eval(to, ex) + Core.eval(to, ex) end end @@ -254,13 +254,13 @@ function eval_using(to::Module, path::Expr) if _Base_has_eval_import Base._eval_using(to, path) else - Base.eval(to, Expr(:using, path)) + Core.eval(to, Expr(:using, path)) end end function eval_public(mod::Module, is_exported::Bool, identifiers) # symbol jl_module_public is no longer exported as of #57765 - eval(mod, Expr((is_exported ? :export : :public), map(Symbol, identifiers)...)) + Core.eval(mod, Expr((is_exported ? :export : :public), map(Symbol, identifiers)...)) end #-------------------------------------------------- diff --git a/JuliaLowering/test/scopes.jl b/JuliaLowering/test/scopes.jl index 25228c6fde89e..e327343eb03e6 100644 --- a/JuliaLowering/test/scopes.jl +++ b/JuliaLowering/test/scopes.jl @@ -66,7 +66,7 @@ function wrapscope(ex, scope_type) end assign_z_2 = parsestmt(SyntaxTree, "begin z = 2 end", filename="foo.jl") -JuliaLowering.eval(test_mod, :(z=1)) +Base.eval(test_mod, :(z=1)) @test test_mod.z == 1 # neutral (eg, for loops) and hard (eg, let) scopes create a new binding for z JuliaLowering.eval(test_mod, wrapscope(assign_z_2, :neutral)) diff --git a/JuliaLowering/test/utils.jl b/JuliaLowering/test/utils.jl index 7cc0783d1149e..74f50874f4cc0 100644 --- a/JuliaLowering/test/utils.jl +++ b/JuliaLowering/test/utils.jl @@ -163,7 +163,7 @@ function format_ir_for_test(mod, case) if kind(ex) == K"macrocall" && kind(ex[1]) == K"MacroName" && ex[1].name_val == "@ast_" # Total hack, until @ast_ can be implemented in terms of new-style # macros. - ex = JuliaLowering.eval(mod, Expr(ex)) + ex = Base.eval(mod, Expr(ex)) end x = JuliaLowering.lower(mod, ex) if case.expect_error From 497c442aa69a20b8f86cfaaebd3adfec2b84c98f Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Fri, 5 Sep 2025 16:34:27 +1000 Subject: [PATCH 1065/1109] Fixes for global and local decls in value position (JuliaLang/JuliaLowering.jl#67) Disallows "use" of the value of `global x`, except in tail position in top level thunks - in those cases, return `nothing` so that `global x` can be used nested within of a top level thunk as long as it's "statement like". This is normally harmless as one cannot observe the value from tail position except in special circumstances - namely the return value of `eval()` (and things which call eval, such as `include()`). It's a conservative relaxation of the current rules where globals in value position are only allowed in top level code if they're not nested in other syntax. While we're thinking about this, also disallow a bare `local x` in a top level thunk because this cannot have useful side effects and is just confusing when it occurs outside a block construct. (This is not currently disallowed for `local` arising from macro expansions because it's not an obvious user error in that case. It could possibly arise as a valid macro expansion of a trivial case, for some macros?) --- JuliaLowering/src/kinds.jl | 2 + JuliaLowering/src/linear_ir.jl | 16 +++++-- JuliaLowering/src/macro_expansion.jl | 5 ++ JuliaLowering/src/scope_analysis.jl | 5 +- JuliaLowering/test/decls.jl | 7 ++- JuliaLowering/test/decls_ir.jl | 72 +++++++++++++++++++++++++++- 6 files changed, 100 insertions(+), 7 deletions(-) diff --git a/JuliaLowering/src/kinds.jl b/JuliaLowering/src/kinds.jl index 3a5d9bd698d5d..fc7afdd4780cc 100644 --- a/JuliaLowering/src/kinds.jl +++ b/JuliaLowering/src/kinds.jl @@ -117,6 +117,8 @@ function _register_kinds() # (That is, it's removable in the same sense as # `@assume_effects :removable`.) "removable" + # Variable type declaration; `x::T = rhs` will be temporarily + # desugared to include `(decl x T)` "decl" # [K"captured_local" index] # A local variable captured into a global method. Contains the diff --git a/JuliaLowering/src/linear_ir.jl b/JuliaLowering/src/linear_ir.jl index fe1dacafbc7e5..c1133145f053c 100644 --- a/JuliaLowering/src/linear_ir.jl +++ b/JuliaLowering/src/linear_ir.jl @@ -792,12 +792,20 @@ function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) emit(ctx, ex) nothing elseif k == K"global" - if needs_value - throw(LoweringError(ex, "misplaced global declaration in value position")) - end emit(ctx, ex) ctx.is_toplevel_thunk && emit_latestworld(ctx, ex) - nothing + if needs_value + if in_tail_pos && ctx.is_toplevel_thunk + # Permit "statement-like" globals at top level but potentially + # inside blocks. + compile(ctx, nothing_(ctx, ex), needs_value, in_tail_pos) + else + throw(LoweringError(ex, + "global declaration doesn't read the variable and can't return a value")) + end + else + nothing + end elseif k == K"meta" emit(ctx, ex) if needs_value diff --git a/JuliaLowering/src/macro_expansion.jl b/JuliaLowering/src/macro_expansion.jl index d946b32009477..8596954dd8e34 100644 --- a/JuliaLowering/src/macro_expansion.jl +++ b/JuliaLowering/src/macro_expansion.jl @@ -461,6 +461,11 @@ function expand_forms_1(ctx::MacroExpansionContext, ex::SyntaxTree) end @fzone "JL: macroexpand" function expand_forms_1(mod::Module, ex::SyntaxTree, expr_compat_mode::Bool, macro_world::UInt) + if kind(ex) == K"local" + # This error assumes we're expanding the body of a top level thunk but + # we might want to make that more explicit in the pass system. + throw(LoweringError(ex, "local declarations have no effect outside a scope")) + end graph = ensure_attributes(syntax_graph(ex), var_id=IdTag, scope_layer=LayerId, diff --git a/JuliaLowering/src/scope_analysis.jl b/JuliaLowering/src/scope_analysis.jl index f13c41c143356..6f146b235848b 100644 --- a/JuliaLowering/src/scope_analysis.jl +++ b/JuliaLowering/src/scope_analysis.jl @@ -427,7 +427,10 @@ function _resolve_scopes(ctx, ex::SyntaxTree) # elseif k == K"global" # ex elseif k == K"local" - makeleaf(ctx, ex, K"TOMBSTONE") + # Local declarations have a value of `nothing` according to flisp + # lowering. + # TODO: Should local decls be disallowed in value position? + @ast ctx ex "nothing"::K"core" elseif k == K"decl" ex_out = mapchildren(e->_resolve_scopes(ctx, e), ctx, ex) name = ex_out[1] diff --git a/JuliaLowering/test/decls.jl b/JuliaLowering/test/decls.jl index 7684fa33ad9d3..50aa98a596504 100644 --- a/JuliaLowering/test/decls.jl +++ b/JuliaLowering/test/decls.jl @@ -11,9 +11,14 @@ end # In value position, yield the right hand side, not `x` @test JuliaLowering.include_string(test_mod, """ -local x::Int = 1.0 +begin + local x::Int = 1.0 +end """) === 1.0 +# Global decl in value position without assignment returns nothing +@test JuliaLowering.include_string(test_mod, "global x_no_assign") === nothing + # Unadorned declarations @test JuliaLowering.include_string(test_mod, """ let diff --git a/JuliaLowering/test/decls_ir.jl b/JuliaLowering/test/decls_ir.jl index 7b8c2d373d338..155754cde9cff 100644 --- a/JuliaLowering/test/decls_ir.jl +++ b/JuliaLowering/test/decls_ir.jl @@ -1,6 +1,8 @@ ######################################## # Local declaration with type -local x::T = 1 +begin + local x::T = 1 +end #--------------------- 1 (newvar slot₁/x) 2 1 @@ -17,6 +19,74 @@ local x::T = 1 13 (= slot₁/x %₁₂) 14 (return %₂) +######################################## +# Error: Local declarations outside a scope are disallowed +# See https://github.com/JuliaLang/julia/issues/57483 +local x +#--------------------- +LoweringError: +local x +└─────┘ ── local declarations have no effect outside a scope + +######################################## +# Local declaration allowed in tail position +begin + local x +end +#--------------------- +1 (newvar slot₁/x) +2 (return core.nothing) + +######################################## +# Local declaration allowed in value position +# TODO: This may be a bug in flisp lowering - should we reconsider this? +let + y = local x +end +#--------------------- +1 (newvar slot₁/x) +2 core.nothing +3 (= slot₂/y %₂) +4 (return %₂) + +######################################## +# Global declaration allowed in tail position +global x +#--------------------- +1 (global TestMod.x) +2 latestworld +3 (return core.nothing) + +######################################## +# Global declaration allowed in tail position, nested +begin + global x +end +#--------------------- +1 (global TestMod.x) +2 latestworld +3 (return core.nothing) + +######################################## +# Error: Global declaration not allowed in tail position in functions +function f() + global x +end +#--------------------- +LoweringError: +function f() + global x +# ╙ ── global declaration doesn't read the variable and can't return a value +end + +######################################## +# Error: Global declaration not allowed in value position +y = global x +#--------------------- +LoweringError: +y = global x +# ╙ ── global declaration doesn't read the variable and can't return a value + ######################################## # const const xx = 10 From 313db4bdd68751190c194ab9bef33b762f9247de Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sat, 6 Sep 2025 16:18:14 +1000 Subject: [PATCH 1066/1109] Implement include() with Base.IncludeInto and similarly for eval (JuliaLang/JuliaLowering.jl#68) Adopt the changes from https://github.com/JuliaLang/julia/pull/55949 to define module-local include / eval in terms of `Base.IncludeInto` and `Core.EvalInto`. It previously made sense to define `include()` and `eval()` as part of desugaring. But now that `include` is just an instance of `IncludeInto` the runtime can define these without calling into flisp so I've reverted to letting `jl_eval_module_expr()` do that from the C code. --- JuliaLowering/src/desugaring.jl | 53 ++------------------------------- JuliaLowering/src/runtime.jl | 12 ++++---- JuliaLowering/test/misc_ir.jl | 20 +++++++++++++ JuliaLowering/test/modules.jl | 8 +++-- 4 files changed, 33 insertions(+), 60 deletions(-) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 87f4c7ffaded8..492bbefb489b9 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -4248,56 +4248,7 @@ function expand_module(ctx, ex::SyntaxTree) @chk kind(modname_ex) == K"Identifier" modname = modname_ex.name_val - std_defs = if !has_flags(ex, JuliaSyntax.BARE_MODULE_FLAG) - @ast ctx (@HERE) [ - K"block" - [K"using"(@HERE) - [K"importpath" - "Base" ::K"Identifier" - ] - ] - [K"function"(@HERE) - [K"call" - "eval" ::K"Identifier" - "x" ::K"Identifier" - ] - [K"call" - "eval" ::K"core" - modname ::K"Identifier" - "x" ::K"Identifier" - ] - ] - [K"function"(@HERE) - [K"call" - "include" ::K"Identifier" - "x" ::K"Identifier" - ] - [K"call" - "_call_latest" ::K"core" - "include" ::K"top" - modname ::K"Identifier" - "x" ::K"Identifier" - ] - ] - [K"function"(@HERE) - [K"call" - "include" ::K"Identifier" - [K"::" - "mapexpr" ::K"Identifier" - "Function" ::K"top" - ] - "x" ::K"Identifier" - ] - [K"call" - "_call_latest" ::K"core" - "include" ::K"top" - "mapexpr" ::K"Identifier" - modname ::K"Identifier" - "x" ::K"Identifier" - ] - ] - ] - end + std_defs = !has_flags(ex, JuliaSyntax.BARE_MODULE_FLAG) body = ex[2] @chk kind(body) == K"block" @@ -4311,10 +4262,10 @@ function expand_module(ctx, ex::SyntaxTree) eval_module ::K"Value" ctx.mod ::K"Value" modname ::K"String" + std_defs ::K"Bool" ctx.expr_compat_mode ::K"Bool" [K"inert"(body) [K"toplevel" - std_defs children(body)... ] ] diff --git a/JuliaLowering/src/runtime.jl b/JuliaLowering/src/runtime.jl index 81e83b995734d..d23d8dbe6118f 100644 --- a/JuliaLowering/src/runtime.jl +++ b/JuliaLowering/src/runtime.jl @@ -214,7 +214,8 @@ end # public modname # # And run statments in the toplevel expression `body` -function eval_module(parentmod, modname, expr_compat_mode, body) +function eval_module(parentmod::Module, modname::AbstractString, std_defs::Bool, + expr_compat_mode::Bool, body::SyntaxTree) # Here we just use `eval()` with an Expr. # If we wanted to avoid this we'd need to reproduce a lot of machinery from # jl_eval_module_expr() @@ -229,11 +230,10 @@ function eval_module(parentmod, modname, expr_compat_mode, body) # mod = @ccall jl_new_module(Symbol(modname)::Symbol, parentmod::Module)::Any # ... name = Symbol(modname) - Core.eval(parentmod, :( - baremodule $name - $eval($name, $body; expr_compat_mode=$expr_compat_mode) - end - )) + eval_module_body(mod) = eval(mod, body; expr_compat_mode=expr_compat_mode) + Core.eval(parentmod, + Expr(:module, std_defs, name, + Expr(:block, Expr(:call, eval_module_body, name)))) end const _Base_has_eval_import = isdefined(Base, :_eval_import) diff --git a/JuliaLowering/test/misc_ir.jl b/JuliaLowering/test/misc_ir.jl index 33fe64491d4a1..8acb0ada74957 100644 --- a/JuliaLowering/test/misc_ir.jl +++ b/JuliaLowering/test/misc_ir.jl @@ -182,6 +182,26 @@ LoweringError: (; a=1, f()) # └─┘ ── Invalid named tuple element +######################################## +# Module lowering +module Mod + body + stmts +end +#--------------------- +1 (call JuliaLowering.eval_module TestMod "Mod" true false (inert (toplevel body stmts))) +2 (return %₁) + +######################################## +# Bare module lowering +baremodule BareMod + body + stmts +end +#--------------------- +1 (call JuliaLowering.eval_module TestMod "BareMod" false false (inert (toplevel body stmts))) +2 (return %₁) + ######################################## # Error: Modules not allowed in local scope let diff --git a/JuliaLowering/test/modules.jl b/JuliaLowering/test/modules.jl index 97cb536f51437..541a609c24167 100644 --- a/JuliaLowering/test/modules.jl +++ b/JuliaLowering/test/modules.jl @@ -11,10 +11,11 @@ end """, "module_test") @test A isa Module @test A.g() == "hi" -@test A.include isa Function +@test A.include isa Base.IncludeInto +@test A.eval isa Core.EvalInto @test A.Base === Base -@test A.eval(:(x = -1)) == -1 -@test A.x == -1 +@test A.eval(:(x = -2)) == -2 +@test A.x == -2 B = JuliaLowering.include_string(test_mod, """ baremodule B @@ -22,6 +23,7 @@ end """, "baremodule_test") @test B.Core === Core @test !isdefined(B, :include) +@test !isdefined(B, :eval) @test !isdefined(B, :Base) # modules allowed in nested code in global scope From fe491ecb794a1dabb3dbdcd94069938709fed0cf Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki <40514306+aviatesk@users.noreply.github.com> Date: Sun, 7 Sep 2025 14:09:24 +0900 Subject: [PATCH 1067/1109] Fix effects annotations on `current_exception` (JuliaLang/JuliaLowering.jl#74) `:nothrow` is actually unnecessary because it is already subsumed in `:removable`. --- JuliaLowering/src/runtime.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaLowering/src/runtime.jl b/JuliaLowering/src/runtime.jl index d23d8dbe6118f..36ba688cdd276 100644 --- a/JuliaLowering/src/runtime.jl +++ b/JuliaLowering/src/runtime.jl @@ -9,7 +9,7 @@ # Return the current exception. In JuliaLowering we use this rather than the # special form `K"the_exception"` to reduces the number of special forms. -Base.@assume_effects :removable :nothrow function current_exception() +Base.@assume_effects :removable function current_exception() @ccall jl_current_exception(current_task()::Any)::Any end From 2a76fa3e29e34d73d34bab65fd458db144137f37 Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki Date: Fri, 5 Sep 2025 20:15:55 +0900 Subject: [PATCH 1068/1109] Pass correct `Module` object to `GeneratedFunctionStub` Currently it looks like we are wrongly passing unintentional `mod::typeof(Base.mod)` object. Adds few type annotations to avoid similar regressions in the future. --- JuliaLowering/src/eval.jl | 13 ++++--------- JuliaLowering/src/runtime.jl | 6 ++++-- JuliaLowering/test/demo.jl | 5 ++--- JuliaLowering/test/repl_mode.jl | 3 +-- JuliaLowering/test/utils.jl | 7 +++---- 5 files changed, 14 insertions(+), 20 deletions(-) diff --git a/JuliaLowering/src/eval.jl b/JuliaLowering/src/eval.jl index 8233fcf31e6e8..b70c000452c2b 100644 --- a/JuliaLowering/src/eval.jl +++ b/JuliaLowering/src/eval.jl @@ -120,8 +120,7 @@ end # Convert SyntaxTree to the CodeInfo+Expr data stuctures understood by the # Julia runtime -function to_code_info(ex, mod, funcname, slots) - input_code = children(ex) +function to_code_info(ex::SyntaxTree, mod::Module, slots::Vector{Slot}) stmts = Any[] current_codelocs_stack = ir_debug_info_state(ex) @@ -223,11 +222,11 @@ function to_code_info(ex, mod, funcname, slots) ) end -@fzone "JL: to_lowered_expr" function to_lowered_expr(mod, ex) +@fzone "JL: to_lowered_expr" function to_lowered_expr(mod::Module, ex::SyntaxTree) _to_lowered_expr(mod, ex, 0) end -function _to_lowered_expr(mod, ex, stmt_offset) +function _to_lowered_expr(mod::Module, ex::SyntaxTree, stmt_offset::Int) k = kind(ex) if is_literal(k) ex.value @@ -268,10 +267,7 @@ function _to_lowered_expr(mod, ex, stmt_offset) e1 = ex[1] getmeta(ex, :as_Expr, false) ? QuoteNode(Expr(e1)) : e1 elseif k == K"code_info" - funcname = ex.is_toplevel_thunk ? - "top-level scope" : - "none" # FIXME - ir = to_code_info(ex[1], mod, funcname, ex.slots) + ir = to_code_info(ex[1], mod, ex.slots) if ex.is_toplevel_thunk Expr(:thunk, ir) else @@ -400,4 +396,3 @@ function include_string(mod::Module, code::AbstractString, filename::AbstractStr expr_compat_mode=false) eval(mod, parseall(SyntaxTree, code; filename=filename); expr_compat_mode) end - diff --git a/JuliaLowering/src/runtime.jl b/JuliaLowering/src/runtime.jl index 36ba688cdd276..dbc2c85b03d4f 100644 --- a/JuliaLowering/src/runtime.jl +++ b/JuliaLowering/src/runtime.jl @@ -352,11 +352,13 @@ function (g::GeneratedFunctionStub)(world::UInt, source::Method, @nospecialize a is_toplevel_thunk=Bool ) + __module__ = source.module + # Macro expansion. Looking at Core.GeneratedFunctionStub, it seems that # macros emitted by the generator are currently expanded in the latest # world, so do that for compatibility. macro_world = typemax(UInt) - ctx1 = MacroExpansionContext(graph, source.module, false, macro_world) + ctx1 = MacroExpansionContext(graph, __module__, false, macro_world) # Run code generator - this acts like a macro expander and like a macro # expander it gets a MacroContext. @@ -397,7 +399,7 @@ function (g::GeneratedFunctionStub)(world::UInt, source::Method, @nospecialize a # Rest of lowering ctx4, ex4 = convert_closures(ctx3, ex3) ctx5, ex5 = linearize_ir(ctx4, ex4) - ci = to_lowered_expr(mod, ex5) + ci = to_lowered_expr(__module__, ex5) @assert ci isa Core.CodeInfo # See GeneratedFunctionStub code in base/expr.jl diff --git a/JuliaLowering/test/demo.jl b/JuliaLowering/test/demo.jl index af7c0631665d7..8c8cdc353cca2 100644 --- a/JuliaLowering/test/demo.jl +++ b/JuliaLowering/test/demo.jl @@ -32,7 +32,7 @@ function formatsrc(ex; kws...) Text(JuliaSyntaxFormatter.formatsrc(ex; kws...)) end -function debug_lower(mod, ex; expr_compat_mode=false, verbose=false, do_eval=false) +function debug_lower(mod::Module, ex::SyntaxTree; expr_compat_mode::Bool=false, verbose::Bool=false, do_eval::Bool=false) ctx1, ex_macroexpand = JuliaLowering.expand_forms_1(mod, ex, expr_compat_mode, Base.get_world_counter()) verbose && @info "Macro expanded" formatsrc(ex_macroexpand, color_by=:scope_layer) @@ -502,7 +502,7 @@ end src = """ function f(y) - x = + x = try try error("hi") @@ -908,4 +908,3 @@ ex = parsestmt(SyntaxTree, src, filename="foo.jl") # for e in Meta.parseall(text).args # Meta.lower(JuliaLowering, e) # end - diff --git a/JuliaLowering/test/repl_mode.jl b/JuliaLowering/test/repl_mode.jl index ade7e1a6bf98e..3fd1ef3706bf6 100644 --- a/JuliaLowering/test/repl_mode.jl +++ b/JuliaLowering/test/repl_mode.jl @@ -19,7 +19,7 @@ function is_incomplete(prompt_state) end end -function eval_ish(mod, ex, do_eval, do_print_ir) +function eval_ish(mod::Module, ex::SyntaxTree, do_eval::Bool, do_print_ir::Bool) k = kind(ex) if k == K"toplevel" x = nothing @@ -79,4 +79,3 @@ function __init__() end end - diff --git a/JuliaLowering/test/utils.jl b/JuliaLowering/test/utils.jl index 74f50874f4cc0..a826d95fa4e4a 100644 --- a/JuliaLowering/test/utils.jl +++ b/JuliaLowering/test/utils.jl @@ -262,7 +262,7 @@ function watch_ir_tests(dir, delay=0.5) end end -function lower_str(mod, s) +function lower_str(mod::Module, s::AbstractString) ex = parsestmt(JuliaLowering.SyntaxTree, s) return JuliaLowering.to_lowered_expr(mod, JuliaLowering.lower(mod, ex)) end @@ -356,7 +356,7 @@ end # Parse a file and lower the top level expression one child at a time, finding # any top level statement that fails lowering and producing a partially reduced # test case. -function reduce_any_failing_toplevel(mod, filename; do_eval=false) +function reduce_any_failing_toplevel(mod::Module, filename::AbstractString; do_eval::Bool=false) text = read(filename, String) ex0 = parseall(SyntaxTree, text; filename) for ex in children(ex0) @@ -373,7 +373,7 @@ function reduce_any_failing_toplevel(mod, filename; do_eval=false) end (reduced,was_reduced) = block_reduction(e->throws_lowering_exc(mod,e), ex) if !was_reduced - @info "No reduction possible" + @info "No reduction possible" return ex else @info "Reduced code" reduced @@ -383,4 +383,3 @@ function reduce_any_failing_toplevel(mod, filename; do_eval=false) end nothing end - From d1a99cf966f5a561e87d8f2c1184b96474c03f3f Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki Date: Sun, 7 Sep 2025 14:25:37 +0900 Subject: [PATCH 1069/1109] Remove `mod::Module` parameter from `to_lowered_expr` The module parameter was unused in the conversion process, so it has been removed from `to_lowered_expr`, `to_code_info`, and `_to_lowered_expr` functions along with all their call sites. --- JuliaLowering/src/eval.jl | 34 ++++++++++++++-------------- JuliaLowering/src/hooks.jl | 2 +- JuliaLowering/src/macro_expansion.jl | 2 +- JuliaLowering/src/precompile.jl | 2 +- JuliaLowering/src/runtime.jl | 2 +- JuliaLowering/test/demo.jl | 2 +- JuliaLowering/test/repl_mode.jl | 2 +- JuliaLowering/test/utils.jl | 4 ++-- 8 files changed, 25 insertions(+), 25 deletions(-) diff --git a/JuliaLowering/src/eval.jl b/JuliaLowering/src/eval.jl index b70c000452c2b..6395f05b940e5 100644 --- a/JuliaLowering/src/eval.jl +++ b/JuliaLowering/src/eval.jl @@ -120,7 +120,7 @@ end # Convert SyntaxTree to the CodeInfo+Expr data stuctures understood by the # Julia runtime -function to_code_info(ex::SyntaxTree, mod::Module, slots::Vector{Slot}) +function to_code_info(ex::SyntaxTree, slots::Vector{Slot}) stmts = Any[] current_codelocs_stack = ir_debug_info_state(ex) @@ -155,7 +155,7 @@ function to_code_info(ex::SyntaxTree, mod::Module, slots::Vector{Slot}) stmt_offset = length(stmts) for stmt in children(ex) - push!(stmts, _to_lowered_expr(mod, stmt, stmt_offset)) + push!(stmts, _to_lowered_expr(stmt, stmt_offset)) add_ir_debug_info!(current_codelocs_stack, stmt) end @@ -222,11 +222,11 @@ function to_code_info(ex::SyntaxTree, mod::Module, slots::Vector{Slot}) ) end -@fzone "JL: to_lowered_expr" function to_lowered_expr(mod::Module, ex::SyntaxTree) - _to_lowered_expr(mod, ex, 0) +@fzone "JL: to_lowered_expr" function to_lowered_expr(ex::SyntaxTree) + _to_lowered_expr(ex, 0) end -function _to_lowered_expr(mod::Module, ex::SyntaxTree, stmt_offset::Int) +function _to_lowered_expr(ex::SyntaxTree, stmt_offset::Int) k = kind(ex) if is_literal(k) ex.value @@ -262,12 +262,12 @@ function _to_lowered_expr(mod::Module, ex::SyntaxTree, stmt_offset::Int) elseif k == K"SSAValue" Core.SSAValue(ex.var_id + stmt_offset) elseif k == K"return" - Core.ReturnNode(_to_lowered_expr(mod, ex[1], stmt_offset)) + Core.ReturnNode(_to_lowered_expr(ex[1], stmt_offset)) elseif k == K"inert" e1 = ex[1] getmeta(ex, :as_Expr, false) ? QuoteNode(Expr(e1)) : e1 elseif k == K"code_info" - ir = to_code_info(ex[1], mod, ex.slots) + ir = to_code_info(ex[1], ex.slots) if ex.is_toplevel_thunk Expr(:thunk, ir) else @@ -278,36 +278,36 @@ function _to_lowered_expr(mod::Module, ex::SyntaxTree, stmt_offset::Int) elseif k == K"goto" Core.GotoNode(ex[1].id + stmt_offset) elseif k == K"gotoifnot" - Core.GotoIfNot(_to_lowered_expr(mod, ex[1], stmt_offset), ex[2].id + stmt_offset) + Core.GotoIfNot(_to_lowered_expr(ex[1], stmt_offset), ex[2].id + stmt_offset) elseif k == K"enter" catch_idx = ex[1].id numchildren(ex) == 1 ? Core.EnterNode(catch_idx) : - Core.EnterNode(catch_idx, _to_lowered_expr(mod, ex[2], stmt_offset)) + Core.EnterNode(catch_idx, _to_lowered_expr(ex[2], stmt_offset)) elseif k == K"method" - cs = map(e->_to_lowered_expr(mod, e, stmt_offset), children(ex)) + cs = map(e->_to_lowered_expr(e, stmt_offset), children(ex)) # Ad-hoc unwrapping to satisfy `Expr(:method)` expectations c1 = cs[1] isa QuoteNode ? cs[1].value : cs[1] Expr(:method, c1, cs[2:end]...) elseif k == K"newvar" - Core.NewvarNode(_to_lowered_expr(mod, ex[1], stmt_offset)) + Core.NewvarNode(_to_lowered_expr(ex[1], stmt_offset)) elseif k == K"opaque_closure_method" - args = map(e->_to_lowered_expr(mod, e, stmt_offset), children(ex)) + args = map(e->_to_lowered_expr(e, stmt_offset), children(ex)) # opaque_closure_method has special non-evaluated semantics for the # `functionloc` line number node so we need to undo a level of quoting @assert args[4] isa QuoteNode args[4] = args[4].value Expr(:opaque_closure_method, args...) elseif k == K"meta" - args = Any[_to_lowered_expr(mod, e, stmt_offset) for e in children(ex)] + args = Any[_to_lowered_expr(e, stmt_offset) for e in children(ex)] # Unpack K"Symbol" QuoteNode as `Expr(:meta)` requires an identifier here. args[1] = args[1].value Expr(:meta, args...) elseif k == K"static_eval" @assert numchildren(ex) == 1 - _to_lowered_expr(mod, ex[1], stmt_offset) + _to_lowered_expr(ex[1], stmt_offset) elseif k == K"cfunction" - args = Any[_to_lowered_expr(mod, e, stmt_offset) for e in children(ex)] + args = Any[_to_lowered_expr(e, stmt_offset) for e in children(ex)] if kind(ex[2]) == K"static_eval" args[2] = QuoteNode(args[2]) end @@ -339,7 +339,7 @@ function _to_lowered_expr(mod::Module, ex::SyntaxTree, stmt_offset::Int) if isnothing(head) throw(LoweringError(ex, "Unhandled form for kind $k")) end - Expr(head, map(e->_to_lowered_expr(mod, e, stmt_offset), children(ex))...) + Expr(head, map(e->_to_lowered_expr(e, stmt_offset), children(ex))...) end end @@ -355,7 +355,7 @@ end return x end linear_ir = lower(mod, ex; expr_compat_mode) - thunk = to_lowered_expr(mod, linear_ir) + thunk = to_lowered_expr(linear_ir) Core.eval(mod, thunk) end diff --git a/JuliaLowering/src/hooks.jl b/JuliaLowering/src/hooks.jl index 3ef25fe78a97a..c9f49453fbf4c 100644 --- a/JuliaLowering/src/hooks.jl +++ b/JuliaLowering/src/hooks.jl @@ -22,7 +22,7 @@ function core_lowering_hook(@nospecialize(code), mod::Module, ctx3, st3 = resolve_scopes( ctx2, st2) ctx4, st4 = convert_closures(ctx3, st3) ctx5, st5 = linearize_ir( ctx4, st4) - ex = to_lowered_expr(mod, st5) + ex = to_lowered_expr(st5) return Core.svec(ex, st5, ctx5) catch exc @info("JuliaLowering threw given input:", code=code, st0=st0, file=file, line=line, mod=mod) diff --git a/JuliaLowering/src/macro_expansion.jl b/JuliaLowering/src/macro_expansion.jl index 8596954dd8e34..c2f8fe40b14de 100644 --- a/JuliaLowering/src/macro_expansion.jl +++ b/JuliaLowering/src/macro_expansion.jl @@ -147,7 +147,7 @@ function eval_macro_name(ctx::MacroExpansionContext, mctx::MacroContext, ex::Syn ctx4, ex4 = convert_closures(ctx3, ex3) ctx5, ex5 = linearize_ir(ctx4, ex4) mod = current_layer(ctx).mod - expr_form = to_lowered_expr(mod, ex5) + expr_form = to_lowered_expr(ex5) try Core.eval(mod, expr_form) catch err diff --git a/JuliaLowering/src/precompile.jl b/JuliaLowering/src/precompile.jl index 0c07b5465eb3a..7a5fccaded4b5 100644 --- a/JuliaLowering/src/precompile.jl +++ b/JuliaLowering/src/precompile.jl @@ -21,7 +21,7 @@ if Base.get_bool_env("JULIA_LOWERING_PRECOMPILE", true) JuliaSyntax.parse!(stream; rule=:all) st0 = JuliaSyntax.build_tree(SyntaxTree, stream; filename=@__FILE__) lwrst = lower(@__MODULE__, st0[1]) - lwr = to_lowered_expr(@__MODULE__, lwrst) + lwr = to_lowered_expr(lwrst) @assert Meta.isexpr(lwr, :thunk) && only(lwr.args) isa Core.CodeInfo end end diff --git a/JuliaLowering/src/runtime.jl b/JuliaLowering/src/runtime.jl index dbc2c85b03d4f..53ce83995c738 100644 --- a/JuliaLowering/src/runtime.jl +++ b/JuliaLowering/src/runtime.jl @@ -399,7 +399,7 @@ function (g::GeneratedFunctionStub)(world::UInt, source::Method, @nospecialize a # Rest of lowering ctx4, ex4 = convert_closures(ctx3, ex3) ctx5, ex5 = linearize_ir(ctx4, ex4) - ci = to_lowered_expr(__module__, ex5) + ci = to_lowered_expr(ex5) @assert ci isa Core.CodeInfo # See GeneratedFunctionStub code in base/expr.jl diff --git a/JuliaLowering/test/demo.jl b/JuliaLowering/test/demo.jl index 8c8cdc353cca2..a6d12fe2d192d 100644 --- a/JuliaLowering/test/demo.jl +++ b/JuliaLowering/test/demo.jl @@ -49,7 +49,7 @@ function debug_lower(mod::Module, ex::SyntaxTree; expr_compat_mode::Bool=false, ctx5, ex_compiled = JuliaLowering.linearize_ir(ctx4, ex_converted) verbose && @info "Linear IR" formatsrc(ex_compiled, color_by=:var_id) Text(sprint(JuliaLowering.print_ir, ex_compiled)) - ex_expr = JuliaLowering.to_lowered_expr(mod, ex_compiled) + ex_expr = JuliaLowering.to_lowered_expr(ex_compiled) verbose && @info "CodeInfo" ex_expr if do_eval diff --git a/JuliaLowering/test/repl_mode.jl b/JuliaLowering/test/repl_mode.jl index 3fd1ef3706bf6..6c0a889b250e6 100644 --- a/JuliaLowering/test/repl_mode.jl +++ b/JuliaLowering/test/repl_mode.jl @@ -34,7 +34,7 @@ function eval_ish(mod::Module, ex::SyntaxTree, do_eval::Bool, do_print_ir::Bool) end if do_eval println(stdout, "#----------------------") - expr_form = JuliaLowering.to_lowered_expr(mod, linear_ir) + expr_form = JuliaLowering.to_lowered_expr(linear_ir) Base.eval(mod, expr_form) end end diff --git a/JuliaLowering/test/utils.jl b/JuliaLowering/test/utils.jl index a826d95fa4e4a..e0a37947b48ea 100644 --- a/JuliaLowering/test/utils.jl +++ b/JuliaLowering/test/utils.jl @@ -264,7 +264,7 @@ end function lower_str(mod::Module, s::AbstractString) ex = parsestmt(JuliaLowering.SyntaxTree, s) - return JuliaLowering.to_lowered_expr(mod, JuliaLowering.lower(mod, ex)) + return JuliaLowering.to_lowered_expr(JuliaLowering.lower(mod, ex)) end # See Julia Base tests in "test/docs.jl" @@ -362,7 +362,7 @@ function reduce_any_failing_toplevel(mod::Module, filename::AbstractString; do_e for ex in children(ex0) try ex_compiled = JuliaLowering.lower(mod, ex) - ex_expr = JuliaLowering.to_lowered_expr(mod, ex_compiled) + ex_expr = JuliaLowering.to_lowered_expr(ex_compiled) if do_eval Base.eval(mod, ex_expr) end From ad86d9a803595cc0faf442a28f621477480560d8 Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki <40514306+aviatesk@users.noreply.github.com> Date: Sun, 7 Sep 2025 14:09:42 +0900 Subject: [PATCH 1070/1109] Minor type stability improvements (JuliaLang/JuliaLowering.jl#73) Fixing several type instabilities that I noticed while reviewing the code. Probably does not have much impact on compilation/runtime performance. --- JuliaLowering/src/eval.jl | 18 +++++++++++++----- JuliaLowering/src/macro_expansion.jl | 2 +- JuliaLowering/src/runtime.jl | 4 ++-- 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/JuliaLowering/src/eval.jl b/JuliaLowering/src/eval.jl index 6395f05b940e5..011d5572bcba9 100644 --- a/JuliaLowering/src/eval.jl +++ b/JuliaLowering/src/eval.jl @@ -287,7 +287,8 @@ function _to_lowered_expr(ex::SyntaxTree, stmt_offset::Int) elseif k == K"method" cs = map(e->_to_lowered_expr(e, stmt_offset), children(ex)) # Ad-hoc unwrapping to satisfy `Expr(:method)` expectations - c1 = cs[1] isa QuoteNode ? cs[1].value : cs[1] + cs1 = cs[1] + c1 = cs1 isa QuoteNode ? cs1.value : cs1 Expr(:method, c1, cs[2:end]...) elseif k == K"newvar" Core.NewvarNode(_to_lowered_expr(ex[1], stmt_offset)) @@ -295,13 +296,16 @@ function _to_lowered_expr(ex::SyntaxTree, stmt_offset::Int) args = map(e->_to_lowered_expr(e, stmt_offset), children(ex)) # opaque_closure_method has special non-evaluated semantics for the # `functionloc` line number node so we need to undo a level of quoting - @assert args[4] isa QuoteNode - args[4] = args[4].value + arg4 = args[4] + @assert arg4 isa QuoteNode + args[4] = arg4.value Expr(:opaque_closure_method, args...) elseif k == K"meta" args = Any[_to_lowered_expr(e, stmt_offset) for e in children(ex)] # Unpack K"Symbol" QuoteNode as `Expr(:meta)` requires an identifier here. - args[1] = args[1].value + arg1 = args[1] + @assert arg1 isa QuoteNode + args[1] = arg1.value Expr(:meta, args...) elseif k == K"static_eval" @assert numchildren(ex) == 1 @@ -339,7 +343,11 @@ function _to_lowered_expr(ex::SyntaxTree, stmt_offset::Int) if isnothing(head) throw(LoweringError(ex, "Unhandled form for kind $k")) end - Expr(head, map(e->_to_lowered_expr(e, stmt_offset), children(ex))...) + ret = Expr(head) + for e in children(ex) + push!(ret.args, _to_lowered_expr(e, stmt_offset)) + end + return ret end end diff --git a/JuliaLowering/src/macro_expansion.jl b/JuliaLowering/src/macro_expansion.jl index c2f8fe40b14de..4d5ec67dbc356 100644 --- a/JuliaLowering/src/macro_expansion.jl +++ b/JuliaLowering/src/macro_expansion.jl @@ -216,7 +216,7 @@ function expand_macro(ctx, ex) # We use a specific well defined world age for the next checks and macro # expansion invocations. This avoids inconsistencies if the latest world # age changes concurrently. - # + # # TODO: Allow this to be passed in if hasmethod(macfunc, Tuple{typeof(mctx), typeof.(raw_args)...}; world=ctx.macro_world) macro_args = prepare_macro_args(ctx, mctx, raw_args) diff --git a/JuliaLowering/src/runtime.jl b/JuliaLowering/src/runtime.jl index 53ce83995c738..67480f7bc1c3e 100644 --- a/JuliaLowering/src/runtime.jl +++ b/JuliaLowering/src/runtime.jl @@ -195,10 +195,10 @@ function eval_closure_type(mod::Module, closure_type_name::Symbol, field_names, end # Interpolate captured local variables into the CodeInfo for a global method -function replace_captured_locals!(codeinfo, locals) +function replace_captured_locals!(codeinfo::Core.CodeInfo, locals::Core.SimpleVector) for (i, ex) in enumerate(codeinfo.code) if Meta.isexpr(ex, :captured_local) - codeinfo.code[i] = locals[ex.args[1]] + codeinfo.code[i] = locals[ex.args[1]::Int] end end codeinfo From a253d4a389bef21057709e21e6161aa3faad9900 Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki <40514306+aviatesk@users.noreply.github.com> Date: Thu, 11 Sep 2025 00:43:54 +0900 Subject: [PATCH 1071/1109] ast: Remove unused utility functions (JuliaLang/JuliaLowering.jl#76) --- JuliaLowering/src/ast.jl | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/JuliaLowering/src/ast.jl b/JuliaLowering/src/ast.jl index 9c4f9f5b9331e..5da635e9fd206 100644 --- a/JuliaLowering/src/ast.jl +++ b/JuliaLowering/src/ast.jl @@ -340,7 +340,7 @@ to indicate that the "primary" location of the source is the location where "x" ::K"Identifier" ] [K"call" - "eval" ::K"core" + "eval" ::K"core" mn =>K"Identifier" "x" ::K"Identifier" ] @@ -567,21 +567,11 @@ function is_sym_decl(x) k == K"Identifier" || k == K"::" end -function is_identifier(x) - k = kind(x) - k == K"Identifier" || k == K"var" || is_operator(k) || is_macro_name(k) -end - function is_eventually_call(ex::SyntaxTree) k = kind(ex) return k == K"call" || ((k == K"where" || k == K"::") && is_eventually_call(ex[1])) end -function is_function_def(ex) - k = kind(ex) - return k == K"function" || k == K"->" -end - function find_parameters_ind(exs) i = length(exs) while i >= 1 From 8378fbd43d9a796e3a4c6b60ad42eba4908b86b3 Mon Sep 17 00:00:00 2001 From: Em Chu <61633163+mlechu@users.noreply.github.com> Date: Mon, 15 Sep 2025 09:31:05 -0700 Subject: [PATCH 1072/1109] Fix `eval_macro_name` into closed modules during precompilation (JuliaLang/JuliaLowering.jl#82) --- JuliaLowering/src/macro_expansion.jl | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/JuliaLowering/src/macro_expansion.jl b/JuliaLowering/src/macro_expansion.jl index 4d5ec67dbc356..350670de529f3 100644 --- a/JuliaLowering/src/macro_expansion.jl +++ b/JuliaLowering/src/macro_expansion.jl @@ -149,7 +149,13 @@ function eval_macro_name(ctx::MacroExpansionContext, mctx::MacroContext, ex::Syn mod = current_layer(ctx).mod expr_form = to_lowered_expr(ex5) try - Core.eval(mod, expr_form) + # Using Core.eval here fails when precompiling packages since we hit the + # user-facing error (in `jl_check_top_level_effect`) that warns that + # effects won't persist when eval-ing into a closed module. + # `jl_invoke_julia_macro` bypasses this by calling `jl_toplevel_eval` on + # the macro name. This is fine assuming the first argument to the + # macrocall is effect-free. + ccall(:jl_toplevel_eval, Any, (Any, Any), mod, expr_form) catch err throw(MacroExpansionError(mctx, ex, "Macro not found", :all, err)) end From 4276c9dfb9ac284e6f7650aaebc361f57f29eb96 Mon Sep 17 00:00:00 2001 From: Em Chu <61633163+mlechu@users.noreply.github.com> Date: Mon, 15 Sep 2025 09:55:01 -0700 Subject: [PATCH 1073/1109] `expr_to_syntaxtree`: Quoted symbol fixes (JuliaLang/JuliaLowering.jl#78) Fix JuliaLang/JuliaLowering.jl#54. QuoteNode was previously converted to `(quote )` which lowers to a runtime call. - For plain symbol contents, this was too dynamic for ccall to handle, and optimizable in any case. - For Expr contents, I don't think using non-interpolating quote was correct in the first place. - For LineNumberNode contents, we wrap with a QuoteNode on the way out, so we need to unwrap it on the way in. I still need to think more about what the various forms of quoting mean when Expr and SyntaxTree are mixed, but this is at least an upgrade in number of passing tests. I've added some cases from things I broke along the way. Other fixes: - Bool isa Integer, so prevent `expr_to_syntaxtree` from giving booleans `K"Integer"` kind - enable logging of failures from `expr_to_syntaxtree` in the core lowering hook --------- Co-authored-by: Claire Foster --- JuliaLowering/src/compat.jl | 57 ++++++++++++++++++------------- JuliaLowering/src/hooks.jl | 3 +- JuliaLowering/src/syntax_graph.jl | 4 ++- JuliaLowering/test/compat.jl | 19 ++++++++++- JuliaLowering/test/hooks.jl | 33 +++++++++++++----- JuliaLowering/test/macros.jl | 7 ++++ 6 files changed, 88 insertions(+), 35 deletions(-) diff --git a/JuliaLowering/src/compat.jl b/JuliaLowering/src/compat.jl index 850c97711017b..fa1ea43e6f089 100644 --- a/JuliaLowering/src/compat.jl +++ b/JuliaLowering/src/compat.jl @@ -25,7 +25,7 @@ function expr_to_syntaxtree(@nospecialize(e), lnn::Union{LineNumberNode, Nothing kind=Kind, syntax_flags=UInt16, source=SourceAttrType, var_id=Int, value=Any, name_val=String, is_toplevel_thunk=Bool, - scope_layer=LayerId) + scope_layer=LayerId, meta=CompileHints) expr_to_syntaxtree(graph, e, lnn) end @@ -188,17 +188,23 @@ function _insert_convert_expr(@nospecialize(e), graph::SyntaxGraph, src::SourceA if isnothing(e) st_id = _insert_tree_node(graph, K"core", src; name_val="nothing") return st_id, src + elseif e isa LineNumberNode + # A LineNumberNode in value position evaluates to nothing + st_id = _insert_tree_node(graph, K"core", src; name_val="nothing") + return st_id, e elseif e isa Symbol st_id = _insert_tree_node(graph, K"Identifier", src; name_val=String(e)) return st_id, src - elseif e isa QuoteNode && e.value isa Symbol - # Undo special handling from st->expr - return _insert_convert_expr(Expr(:quote, e.value), graph, src) - # elseif e isa QuoteNode - # st_id = _insert_tree_node(graph, K"inert", src) - # quote_child, _ = _insert_convert_expr(e.value, graph, src) - # setchildren!(graph, st_id, NodeId[quote_child]) - # return st_id, src + elseif e isa QuoteNode + if e.value isa Symbol + return _insert_convert_expr(Expr(:quoted_symbol, e.value), graph, src) + elseif e.value isa Expr + return _insert_convert_expr(Expr(:inert, e.value), graph, src) + elseif e.value isa LineNumberNode + return _insert_tree_node(graph, K"Value", src; value=e.value), src + else + return _insert_convert_expr(e.value, graph, src) + end elseif e isa String st_id = _insert_tree_node(graph, K"string", src) id_inner = _insert_tree_node(graph, K"String", src; value=e) @@ -207,7 +213,9 @@ function _insert_convert_expr(@nospecialize(e), graph::SyntaxGraph, src::SourceA elseif !(e isa Expr) # There are other kinds we could potentially back-convert (e.g. Float), # but Value should work fine. - st_k = e isa Integer ? K"Integer" : find_kind(string(typeof(e))) + st_k = e isa Bool ? K"Bool" : + e isa Integer ? K"Integer" : + find_kind(string(typeof(e))) st_id = _insert_tree_node(graph, isnothing(st_k) ? K"Value" : st_k, src; value=e) return st_id, src end @@ -354,6 +362,7 @@ function _insert_convert_expr(@nospecialize(e), graph::SyntaxGraph, src::SourceA lam_args = Any[] lam_eqs = Any[] for a in a1.args + a isa LineNumberNode && continue a isa Expr && a.head === :(=) ? push!(lam_eqs, a) : push!(lam_args, a) end !isempty(lam_eqs) && push!(lam_args, Expr(:parameters, lam_eqs...)) @@ -399,6 +408,10 @@ function _insert_convert_expr(@nospecialize(e), graph::SyntaxGraph, src::SourceA callargs = collect_expr_parameters(e.args[1], 2) if e.args[1].head === :macrocall st_k = K"macrocall" + if callargs[2] isa LineNumberNode + src = callargs[2] + end + deleteat!(callargs, 2) c1,c1_esc = unwrap_esc(callargs[1]) callargs[1] = c1_esc(Expr(:MacroName, c1)) else @@ -442,12 +455,6 @@ function _insert_convert_expr(@nospecialize(e), graph::SyntaxGraph, src::SourceA st_k = K"latestworld_if_toplevel" elseif e.head === Symbol("hygienic-scope") st_k = K"hygienic_scope" - elseif e.head === :escape - if length(e.args) == 1 && unwrap_esc_(e.args[1]) isa LineNumberNode - # escape containing only a LineNumberNode will become empty and - # thus must be removed before lowering sees it. - st_k = K"TOMBSTONE" - end elseif e.head === :meta # Messy and undocumented. Only sometimes we want a K"meta". @assert e.args[1] isa Symbol @@ -549,25 +556,27 @@ function _insert_convert_expr(@nospecialize(e), graph::SyntaxGraph, src::SourceA if isnothing(child_exprs) return st_id, src else - st_child_ids, last_src = _insert_child_exprs(child_exprs, graph, src) + st_child_ids, last_src = _insert_child_exprs(e.head, child_exprs, graph, src) setchildren!(graph, st_id, st_child_ids) return st_id, last_src end end -function _insert_child_exprs(child_exprs::Vector{Any}, graph::SyntaxGraph, - src::SourceAttrType) +function _insert_child_exprs(head::Symbol, child_exprs::Vector{Any}, + graph::SyntaxGraph, src::SourceAttrType) st_child_ids = NodeId[] last_src = src - for c in child_exprs - if c isa LineNumberNode - last_src = c + for (i, c) in enumerate(child_exprs) + c_unwrapped, _ = unwrap_esc(c) + # If c::LineNumberNode is anywhere in a block OR c is not in tail + # position, we don't need to insert `nothing` here + if c_unwrapped isa LineNumberNode && (head === :block || head === :toplevel && i != length(child_exprs)) + last_src = c_unwrapped else - (c_id, c_src) = _insert_convert_expr(c, graph, last_src) + (c_id, last_src) = _insert_convert_expr(c, graph, last_src) if !isnothing(c_id) push!(st_child_ids, c_id) end - last_src = something(c_src, src) end end return st_child_ids, last_src diff --git a/JuliaLowering/src/hooks.jl b/JuliaLowering/src/hooks.jl index c9f49453fbf4c..311576321f761 100644 --- a/JuliaLowering/src/hooks.jl +++ b/JuliaLowering/src/hooks.jl @@ -15,8 +15,9 @@ function core_lowering_hook(@nospecialize(code), mod::Module, file = file isa Ptr{UInt8} ? unsafe_string(file) : file line = !(line isa Int64) ? Int64(line) : line - st0 = code isa Expr ? expr_to_syntaxtree(code, LineNumberNode(line, file)) : code + local st0 = nothing try + st0 = code isa Expr ? expr_to_syntaxtree(code, LineNumberNode(line, file)) : code ctx1, st1 = expand_forms_1( mod, st0, true, world) ctx2, st2 = expand_forms_2( ctx1, st1) ctx3, st3 = resolve_scopes( ctx2, st2) diff --git a/JuliaLowering/src/syntax_graph.jl b/JuliaLowering/src/syntax_graph.jl index 05ff9005f7344..2ec127ef9d27b 100644 --- a/JuliaLowering/src/syntax_graph.jl +++ b/JuliaLowering/src/syntax_graph.jl @@ -577,7 +577,9 @@ function JuliaSyntax._expr_leaf_val(ex::SyntaxTree, _...) name = get(ex, :name_val, nothing) if !isnothing(name) n = Symbol(name) - if hasattr(ex, :scope_layer) + if kind(ex) === K"Symbol" + return QuoteNode(n) + elseif hasattr(ex, :scope_layer) Expr(:scope_layer, n, ex.scope_layer) else n diff --git a/JuliaLowering/test/compat.jl b/JuliaLowering/test/compat.jl index 0d945c85651ba..93736641bd715 100644 --- a/JuliaLowering/test/compat.jl +++ b/JuliaLowering/test/compat.jl @@ -608,7 +608,24 @@ const JL = JuliaLowering "x"::K"Identifier" ] - @test JuliaLowering.expr_to_syntaxtree(Expr(:block, esc(LineNumberNode(1)))) ≈ @ast_ [K"block"] + @test JuliaLowering.expr_to_syntaxtree(Expr(:block, LineNumberNode(1))) ≈ + @ast_ [K"block"] + @test JuliaLowering.expr_to_syntaxtree(Expr(:block, esc(LineNumberNode(1)))) ≈ + @ast_ [K"block"] + @test JuliaLowering.expr_to_syntaxtree(Expr(:block, QuoteNode(LineNumberNode(1)))) ≈ + @ast_ [K"block" LineNumberNode(1)::K"Value"] + + # toplevel (and all other non-block forms) keep LineNumberNodes in value position + @test JuliaLowering.expr_to_syntaxtree(Expr(:toplevel, esc(LineNumberNode(1)))) ≈ + @ast_ [K"toplevel" [K"escape" "nothing"::K"core"]] + @test JuliaLowering.expr_to_syntaxtree(Expr(:toplevel, LineNumberNode(1))) ≈ + @ast_ [K"toplevel" "nothing"::K"core"] + @test JuliaLowering.expr_to_syntaxtree(Expr(:toplevel, QuoteNode(LineNumberNode(1)))) ≈ + @ast_ [K"toplevel" LineNumberNode(1)::K"Value"] + @test JuliaLowering.expr_to_syntaxtree(Expr(:call, :identity, LineNumberNode(1))) ≈ + @ast_ [K"call" "identity"::K"Identifier" "nothing"::K"core"] + @test JuliaLowering.expr_to_syntaxtree(Expr(:call, :identity, QuoteNode(LineNumberNode(1)))) ≈ + @ast_ [K"call" "identity"::K"Identifier" LineNumberNode(1)::K"Value"] end end diff --git a/JuliaLowering/test/hooks.jl b/JuliaLowering/test/hooks.jl index 46ea5c5355557..fa3e615e0e740 100644 --- a/JuliaLowering/test/hooks.jl +++ b/JuliaLowering/test/hooks.jl @@ -20,21 +20,38 @@ const JL = JuliaLowering end if isdefined(Core, :_lower) + function jeval(str) + prog = parseall(Expr, str) + local out + try + JL.activate!() + out = Core.eval(test_mod, prog) + finally + JL.activate!(false) + end + end @testset "integration: `JuliaLowering.activate!`" begin - prog = parseall(Expr, "global asdf = 1") - JL.activate!() - out = Core.eval(test_mod, prog) - JL.activate!(false) + out = jeval("global asdf = 1") @test out === 1 @test isdefined(test_mod, :asdf) - prog = parseall(Expr, "module M; x = 1; end") - JL.activate!() - out = Core.eval(test_mod, prog) - JL.activate!(false) + out = jeval("module M; x = 1; end") @test out isa Module @test isdefined(test_mod, :M) @test isdefined(test_mod.M, :x) + + # Tricky cases with symbols + out = jeval("""module M + Base.@constprop :aggressive function f(x); x; end + const what = ccall(:jl_value_ptr, Ptr{Cvoid}, (Any,), Core.nothing) + end""") + @test out isa Module + @test isdefined(test_mod, :M) + @test isdefined(test_mod.M, :f) + @test isdefined(test_mod.M, :what) + + # TODO: broken, commented to prevent error logging + # @test jeval("Base.@propagate_inbounds @inline meta_double_quote_issue(x) = x") isa Function end end end diff --git a/JuliaLowering/test/macros.jl b/JuliaLowering/test/macros.jl index 76833c4255205..0eeb7646a704c 100644 --- a/JuliaLowering/test/macros.jl +++ b/JuliaLowering/test/macros.jl @@ -346,6 +346,13 @@ end end isglobal_chk(1) """) === (false, false, false, false) + + # @test appears to be the only macro in base to use :inert + test_result = JuliaLowering.include_string(test_mod, """ + using Test + @test identity(123) === 123 + """; expr_compat_mode=true) + @test test_result.value === true end end From 3b2878e8c77fa426d01c0c485ec7b0198f8c372b Mon Sep 17 00:00:00 2001 From: Em Chu <61633163+mlechu@users.noreply.github.com> Date: Mon, 15 Sep 2025 17:22:34 -0700 Subject: [PATCH 1074/1109] `_expr_leaf_val`: Accept `Expr(:scope_layer)` from JuliaLowering (JuliaLang/JuliaSyntax.jl#595) --- JuliaSyntax/src/integration/expr.jl | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/JuliaSyntax/src/integration/expr.jl b/JuliaSyntax/src/integration/expr.jl index da9c67c99e4ed..53de5f55f0ee4 100644 --- a/JuliaSyntax/src/integration/expr.jl +++ b/JuliaSyntax/src/integration/expr.jl @@ -236,7 +236,8 @@ function node_to_expr(cursor, source, txtbuf::Vector{UInt8}, txtbuf_offset::UInt Expr(:error) : Expr(:error, "$(_token_error_descriptions[k]): `$(source[srcrange])`") else - val = _expr_leaf_val(cursor, txtbuf, txtbuf_offset) + scoped_val = _expr_leaf_val(cursor, txtbuf, txtbuf_offset) + val = @isexpr(scoped_val, :scope_layer) ? scoped_val.args[1] : scoped_val if val isa Union{Int128,UInt128,BigInt} # Ignore the values of large integers and convert them back to # symbolic/textural form for compatibility with the Expr @@ -247,9 +248,11 @@ function node_to_expr(cursor, source, txtbuf::Vector{UInt8}, txtbuf_offset::UInt Symbol("@big_str") return Expr(:macrocall, GlobalRef(Core, macname), nothing, str) elseif is_identifier(k) - return lower_identifier_name(val, k) + val2 = lower_identifier_name(val, k) + return @isexpr(scoped_val, :scope_layer) ? + Expr(:scope_layer, val2, scoped_val.args[2]) : val2 else - return val + return scoped_val end end end From 395b2a1c1f7c47b96e4c347a5c31b020eb910222 Mon Sep 17 00:00:00 2001 From: Em Chu <61633163+mlechu@users.noreply.github.com> Date: Mon, 15 Sep 2025 11:38:17 -0700 Subject: [PATCH 1075/1109] Fix macros producing `Expr(:toplevel)` (JuliaLang/JuliaLowering.jl#81) --- JuliaLowering/src/macro_expansion.jl | 36 ++++++++++++++++++++++++---- JuliaLowering/test/macros.jl | 7 ++++++ 2 files changed, 39 insertions(+), 4 deletions(-) diff --git a/JuliaLowering/src/macro_expansion.jl b/JuliaLowering/src/macro_expansion.jl index 350670de529f3..fb8395e12e6d0 100644 --- a/JuliaLowering/src/macro_expansion.jl +++ b/JuliaLowering/src/macro_expansion.jl @@ -212,6 +212,33 @@ function prepare_macro_args(ctx, mctx, raw_args) return macro_args end +# TODO: Do we need to handle :scope_layer or multiple escapes here? +# See https://github.com/c42f/JuliaLowering.jl/issues/39 +""" +Insert a hygienic-scope around each arg of K"toplevel" returned from a macro. + +It isn't correct for macro expansion to recurse into a K"toplevel" expression +since one child may define a macro and the next may use it. However, not +recursing now means we lose some important context: the module of the macro we +just expanded, which is necessary for resolving the identifiers in the +K"toplevel" AST. The solution implemented in JuliaLang/julia#53515 was to save +our place and expand later using `Expr(:hygienic-scope toplevel_child mod)`. + +Of course, these hygienic-scopes are also necessary because existing user code +contains the corresponding escaping, which would otherwise cause errors. We +already consumed the hygienic-scope that comes with every expansion, but won't +be looking for escapes under :toplevel, so push hygienic-scope under toplevel +""" +function fix_toplevel_expansion(ctx, ex::SyntaxTree, mod::Module, lnn::LineNumberNode) + if kind(ex) === K"toplevel" + mapchildren(ctx, ex) do e + @ast ctx ex [K"hygienic_scope" e mod::K"Value" lnn::K"Value"] + end + else + mapchildren(e->fix_toplevel_expansion(ctx, e, mod, lnn), ctx, ex) + end +end + function expand_macro(ctx, ex) @assert kind(ex) == K"macrocall" @@ -219,6 +246,10 @@ function expand_macro(ctx, ex) mctx = MacroContext(ctx.graph, ex, current_layer(ctx)) macfunc = eval_macro_name(ctx, mctx, macname) raw_args = ex[2:end] + macro_loc = let loc = source_location(LineNumberNode, ex) + # Some macros, e.g. @cmd, don't play nicely with file == nothing + isnothing(loc.file) ? LineNumberNode(loc.line, :none) : loc + end # We use a specific well defined world age for the next checks and macro # expansion invocations. This avoids inconsistencies if the latest world # age changes concurrently. @@ -248,10 +279,6 @@ function expand_macro(ctx, ex) else # Compat: attempt to invoke an old-style macro if there's no applicable # method for new-style macro arguments. - macro_loc = let loc = source_location(LineNumberNode, ex) - # Some macros, e.g. @cmd, don't play nicely with file == nothing - isnothing(loc.file) ? LineNumberNode(loc.line, :none) : loc - end macro_args = Any[macro_loc, current_layer(ctx).mod] for arg in raw_args # For hygiene in old-style macros, we omit any additional scope @@ -287,6 +314,7 @@ function expand_macro(ctx, ex) # method was defined (may be different from `parentmodule(macfunc)`) mod_for_ast = lookup_method_instance(macfunc, macro_args, ctx.macro_world).def.module + expanded = fix_toplevel_expansion(ctx, expanded, mod_for_ast, macro_loc) new_layer = ScopeLayer(length(ctx.scope_layers)+1, mod_for_ast, current_layer_id(ctx), true) push!(ctx.scope_layers, new_layer) diff --git a/JuliaLowering/test/macros.jl b/JuliaLowering/test/macros.jl index 0eeb7646a704c..5fd42faa8b435 100644 --- a/JuliaLowering/test/macros.jl +++ b/JuliaLowering/test/macros.jl @@ -353,6 +353,13 @@ end @test identity(123) === 123 """; expr_compat_mode=true) @test test_result.value === true + + # @enum produces Expr(:toplevel) + JuliaLowering.include_string(test_mod, """ + @enum SOME_ENUM X1 X2 X3 + """; expr_compat_mode=true) + @test test_mod.SOME_ENUM <: Enum + @test test_mod.X1 isa Enum end end From 26055dcaad4c9176b86c0f4255c5c93673feab6c Mon Sep 17 00:00:00 2001 From: Em Chu <61633163+mlechu@users.noreply.github.com> Date: Mon, 15 Sep 2025 18:08:57 -0700 Subject: [PATCH 1076/1109] Fix BoundsError with mixed file provenance not caused by macros (JuliaLang/JuliaLowering.jl#75) Was causing several stdlib failures. MWE: ``` julia> ex = Meta.parse("begin x = 111 x = 222 end") JuliaLowering.core_lowering_hook(ex, Main, "foo.jl", 100) ``` If `core_lowering_hook` is given one filename (e.g. "none" from `@eval`), but some part of the expression contains LineNumberNodes with a different filename, we trigger the "inlined macro-expansion" logic in the debuginfo generator, which assumes new filenames are from new macro expansions atop the old filename. The violated invariant is that the list of files in this statement's flattened provenance shares some prefix with the last statement's list of files. This fix assumes there is some base file that all statements share, and normalizes different base filenames to the first it sees. Aside: Not sure if this stack logic is 100% correct given that two adjacent statements can share arbitrarily many file stack entries despite being from different macro expansions. --- JuliaLowering/src/eval.jl | 19 ++++++++++++------- JuliaLowering/test/hooks.jl | 5 +++++ 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/JuliaLowering/src/eval.jl b/JuliaLowering/src/eval.jl index 011d5572bcba9..e3b4d6885922a 100644 --- a/JuliaLowering/src/eval.jl +++ b/JuliaLowering/src/eval.jl @@ -79,20 +79,25 @@ end function add_ir_debug_info!(current_codelocs_stack, stmt) locstk = [(filename(e), source_location(e)[1]) for e in flattened_provenance(stmt)] - for j in 1:max(length(locstk), length(current_codelocs_stack)) - if j > length(locstk) || (length(current_codelocs_stack) >= j && - current_codelocs_stack[j][1] != locstk[j][1]) - while length(current_codelocs_stack) >= j + for j in 1:length(locstk) + if j === 1 && current_codelocs_stack[j][1] != locstk[j][1] + # dilemma: the filename stack here shares no prefix with that of the + # previous statement, where differing filenames usually (j > 1) mean + # a different macro expansion has started at this statement. guess + # that both files are the same, and inherit the previous filename. + locstk[j] = (current_codelocs_stack[j][1], locstk[j][2]) + end + if j < length(current_codelocs_stack) && (j === length(locstk) || + current_codelocs_stack[j+1][1] != locstk[j+1][1]) + while j < length(current_codelocs_stack) info = pop!(current_codelocs_stack) push!(last(current_codelocs_stack)[2], info) end - end - if j > length(locstk) - break elseif j > length(current_codelocs_stack) push!(current_codelocs_stack, (locstk[j][1], [], Vector{Int32}())) end end + @assert length(locstk) === length(current_codelocs_stack) for (j, (file,line)) in enumerate(locstk) fn, edges, codelocs = current_codelocs_stack[j] @assert fn == file diff --git a/JuliaLowering/test/hooks.jl b/JuliaLowering/test/hooks.jl index fa3e615e0e740..d83c2b14d7856 100644 --- a/JuliaLowering/test/hooks.jl +++ b/JuliaLowering/test/hooks.jl @@ -17,6 +17,11 @@ const JL = JuliaLowering val = Core.eval(test_mod, out[1]) @test val == [2,3,4] end + + # file argument mismatch with embedded linenumbernodes shouldn't crash + ex = Expr(:block, LineNumberNode(111), :(x = 1), LineNumberNode(222), :(x + 1)) + lwr = JuliaLowering.core_lowering_hook(ex, test_mod, "foo.jl", 333)[1] + @test Core.eval(test_mod, lwr) === 2 end if isdefined(Core, :_lower) From 1ce61d0fb88cc13a0ced09941a038f7c2eae0c30 Mon Sep 17 00:00:00 2001 From: Em Chu <61633163+mlechu@users.noreply.github.com> Date: Mon, 15 Sep 2025 18:36:10 -0700 Subject: [PATCH 1077/1109] Docsystem: handle "semantically important docstring" cases (JuliaLang/JuliaLowering.jl#80) --------- Co-authored-by: Claire Foster --- JuliaLowering/src/compat.jl | 10 +++--- JuliaLowering/src/desugaring.jl | 57 +++++++++++++++++++++++++++++++-- JuliaLowering/src/runtime.jl | 15 +++++++++ JuliaLowering/test/misc.jl | 55 +++++++++++++++++++++++++++++++ 4 files changed, 130 insertions(+), 7 deletions(-) diff --git a/JuliaLowering/src/compat.jl b/JuliaLowering/src/compat.jl index fa1ea43e6f089..6e57a64240f5a 100644 --- a/JuliaLowering/src/compat.jl +++ b/JuliaLowering/src/compat.jl @@ -275,12 +275,14 @@ function _insert_convert_expr(@nospecialize(e), graph::SyntaxGraph, src::SourceA Expr(:MacroName, a12_esc(a12.value)))) end elseif a1 isa GlobalRef && a1.mod === Core - # TODO (maybe): syntax-introduced macrocalls are listed here for - # reference. We probably don't need to convert these. + # Syntax-introduced macrocalls are listed here for reference. We + # probably don't need to convert these. if a1.name === Symbol("@cmd") - elseif a1.name === Symbol("@doc") + elseif a1.name === Symbol("@doc") && nargs === 4 # two macro args only + # Single-arg @doc is a lookup not corresponding to K"doc" + # Revise sometimes calls @doc with three args, but probably shouldn't st_k = K"doc" - child_exprs = child_exprs[2:end] + child_exprs = child_exprs[2:3] elseif a1.name === Symbol("@int128_str") elseif a1.name === Symbol("@int128_str") elseif a1.name === Symbol("@big_str") diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 492bbefb489b9..bcd504e4ba274 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -2857,7 +2857,7 @@ function is_invalid_func_name(ex) return is_ccall_or_cglobal(name) end -function expand_function_def(ctx, ex, docs, rewrite_call=identity, rewrite_body=identity) +function expand_function_def(ctx, ex, docs, rewrite_call=identity, rewrite_body=identity; doc_only=false) @chk numchildren(ex) in (1,2) name = ex[1] if numchildren(ex) == 1 && is_identifier_like(name) @@ -3023,6 +3023,35 @@ function expand_function_def(ctx, ex, docs, rewrite_call=identity, rewrite_body= end end + if doc_only + # The (doc str (call ...)) form requires method signature lowering, but + # does not execute or define any method, so we can't use function_type. + # This is a bit of a messy case in the docsystem which we'll hopefully + # be able to delete at some point. + sig_stmts = SyntaxList(ctx) + @assert first_default != 1 && length(arg_types) >= 1 + last_required = first_default === 0 ? length(arg_types) : first_default - 1 + for i in last_required:length(arg_types) + push!(sig_stmts, @ast(ctx, ex, [K"curly" "Tuple"::K"core" arg_types[2:i]...])) + end + sig_type = @ast ctx ex [K"where" + [K"curly" "Union"::K"core" sig_stmts...] + [K"_typevars" [K"block" typevar_names...] [K"block"]] + ] + out = @ast ctx docs [K"block" + typevar_stmts... + [K"call" + bind_static_docs!::K"Value" + (kind(name) == K"." ? name[1] : ctx.mod::K"Value") + name_str::K"Symbol" + docs[1] + ::K"SourceLocation"(ex) + sig_type + ] + ] + return expand_forms_2(ctx, out) + end + if !isnothing(return_type) ret_var = ssavar(ctx, return_type, "return_type") push!(body_stmts, @ast ctx return_type [K"=" ret_var return_type]) @@ -3083,10 +3112,11 @@ function expand_function_def(ctx, ex, docs, rewrite_call=identity, rewrite_body= push!(method_stmts, method_def_expr(ctx, ex, callex, method_table, main_typevar_names, arg_names, arg_types, body, ret_var)) + if !isnothing(docs) method_stmts[end] = @ast ctx docs [K"block" method_metadata := method_stmts[end] - @ast ctx docs [K"call" + [K"call" bind_docs!::K"Value" doc_obj docs[1] @@ -4273,6 +4303,27 @@ function expand_module(ctx, ex::SyntaxTree) ] end +#------------------------------------------------------------------------------- +# Expand docstring-annotated expressions + +function expand_doc(ctx, ex, docex, mod=ctx.mod) + if kind(ex) in (K"Identifier", K".") + expand_forms_2(ctx, @ast ctx docex [K"call" + bind_static_docs!::K"Value" + (kind(ex) === K"." ? ex[1] : ctx.mod::K"Value") + (kind(ex) === K"." ? ex[2] : ex).name_val::K"Symbol" + docex[1] + ::K"SourceLocation"(ex) + Union{}::K"Value" + ]) + elseif is_eventually_call(ex) + expand_function_def(ctx, @ast(ctx, ex, [K"function" ex [K"block"]]), + docex; doc_only=true) + else + expand_forms_2(ctx, ex, docex) + end +end + #------------------------------------------------------------------------------- # Desugaring's "big switch": expansion of some simple forms; dispatch to other # expansion functions for the rest. @@ -4339,7 +4390,7 @@ function expand_forms_2(ctx::DesugaringContext, ex::SyntaxTree, docs=nothing) expand_forms_2(ctx, expand_compare_chain(ctx, ex)) elseif k == K"doc" @chk numchildren(ex) == 2 - sig = expand_forms_2(ctx, ex[2], ex) + expand_doc(ctx, ex[2], ex) elseif k == K"for" expand_forms_2(ctx, expand_for(ctx, ex)) elseif k == K"comprehension" diff --git a/JuliaLowering/src/runtime.jl b/JuliaLowering/src/runtime.jl index 67480f7bc1c3e..0fe5631c4254a 100644 --- a/JuliaLowering/src/runtime.jl +++ b/JuliaLowering/src/runtime.jl @@ -315,6 +315,21 @@ function bind_docs!(type::Type, docstr, lineno::LineNumberNode; field_docs=Core. Docs.doc!(mod, bind, Base.Docs.docstr(docstr, metadata), Union{}) end +""" +Called in the unfortunate cases (K"call", K".", K"Identifier") where docstrings +change the semantics of the expressions they annotate, no longer requiring the +expression to execute. +""" +function bind_static_docs!(mod::Module, name::Symbol, docstr, lnn::LineNumberNode, sigtypes::Type) + metadata = Dict{Symbol, Any}( + :linenumber => lnn.line, + :module => mod, + :path => something(lnn.file, "none"), + ) + bind = Base.Docs.Binding(mod, name) + Docs.doc!(mod, bind, Base.Docs.docstr(docstr, metadata), sigtypes) +end + #-------------------------------------------------- # Runtime support infrastructure for `@generated` diff --git a/JuliaLowering/test/misc.jl b/JuliaLowering/test/misc.jl index 1fa6a15cfde11..51ca6d0979f5d 100644 --- a/JuliaLowering/test/misc.jl +++ b/JuliaLowering/test/misc.jl @@ -104,4 +104,59 @@ end @test lower_str(Main, "x + y").args[1].has_image_globalref === true end +@testset "docstrings: doc-only expressions" begin + local jeval(mod, str) = JuliaLowering.include_string(mod, str; expr_compat_mode=true) + jeval(test_mod, "function fun_exists(x); x; end") + jeval(test_mod, "module M end; module M2 end") + # TODO: return values are to be determined, currently Base.Docs.Binding for + # both lowering implementations. We can't return the value of the + # expression in these special cases. + jeval(test_mod, "\"docstr1\" sym_noexist") + jeval(test_mod, "\"docstr2\" fun_noexist()") + jeval(test_mod, "\"docstr3\" fun_exists(sym_noexist)") + jeval(test_mod, "\"docstr4\" M.sym_noexist") + jeval(test_mod, "\"docstr5\" M.fun_noexist()") + jeval(test_mod, "\"docstr6\" M.fun_exists(sym_noexist)") + @test jeval(test_mod, "@doc sym_noexist") |> string === "docstr1\n" + @test jeval(test_mod, "@doc fun_noexist()") |> string === "docstr2\n" + @test jeval(test_mod, "@doc fun_exists(sym_noexist)") |> string === "docstr3\n" + @test jeval(test_mod, "@doc M.sym_noexist") |> string === "docstr4\n" + @test jeval(test_mod, "@doc M.fun_noexist()") |> string === "docstr5\n" + @test jeval(test_mod, "@doc M.fun_exists(sym_noexist)") |> string === "docstr6\n" + @test jeval(test_mod.M, "@doc M.sym_noexist") |> string === "docstr4\n" + @test jeval(test_mod.M, "@doc M.fun_noexist()") |> string === "docstr5\n" + @test jeval(test_mod.M, "@doc M.fun_exists(sym_noexist)") |> string === "docstr6\n" + + jeval(test_mod.M2, "\"docstr7\" M2.M2.sym_noexist") + jeval(test_mod.M2, "\"docstr8\" M2.M2.fun_noexist()") + jeval(test_mod.M2, "\"docstr9\" M2.M2.fun_exists(sym_noexist)") + @test jeval(test_mod, "@doc M2.M2.sym_noexist") |> string === "docstr7\n" + @test jeval(test_mod, "@doc M2.M2.fun_noexist()") |> string === "docstr8\n" + @test jeval(test_mod, "@doc M2.M2.fun_exists(sym_noexist)") |> string === "docstr9\n" + @test jeval(test_mod.M2, "@doc M2.M2.sym_noexist") |> string === "docstr7\n" + @test jeval(test_mod.M2, "@doc M2.M2.fun_noexist()") |> string === "docstr8\n" + @test jeval(test_mod.M2, "@doc M2.M2.fun_exists(sym_noexist)") |> string === "docstr9\n" + + # Try with signatures and type variables + jeval(test_mod, "abstract type T_exists end") + + jeval(test_mod, "\"docstr10\" f10(x::Int, y, z::T_exists)") + d = jeval(test_mod, "@doc f10") + @test d |> string === "docstr10\n" + # TODO: Is there a better way of accessing this? Feel free to change tests + # if docsystem storage changes. + @test d.meta[:results][1].data[:typesig] === Tuple{Int, Any, test_mod.T_exists} + + jeval(test_mod, "\"docstr11\" f11(x::T_exists, y::U, z::T) where {T, U<:Number}") + d = jeval(test_mod, "@doc f11") + @test d |> string === "docstr11\n" + @test d.meta[:results][1].data[:typesig] === Tuple{test_mod.T_exists, U, T} where {T, U<:Number} + + jeval(test_mod, "\"docstr12\" f12(x::Int, y::U, z::T=1) where {T, U<:Number}") + d = jeval(test_mod, "@doc f12") + @test d |> string === "docstr12\n" + @test d.meta[:results][1].data[:typesig] === Union{Tuple{Int64, U, T}, Tuple{Int64, U}} where {T, U<:Number} + +end + end From 943c12d503309b364d0abd39c7e01f323b865950 Mon Sep 17 00:00:00 2001 From: Em Chu <61633163+mlechu@users.noreply.github.com> Date: Mon, 15 Sep 2025 19:45:55 -0700 Subject: [PATCH 1078/1109] Adapt to JuliaSyntax `MacroName` change (JuliaLang/JuliaLowering.jl#71) Adapt to JuliaSyntax changes in how macro names are represented in the tree. This is a bit messy but is important to keep in sync for now until we figure out how the green tree relates to (or differs from) the AST seen by macros and lowering. --- JuliaLowering/Project.toml | 2 +- JuliaLowering/src/ast.jl | 4 +-- JuliaLowering/src/compat.jl | 25 +++++++++++------- JuliaLowering/src/macro_expansion.jl | 38 +++++++++++++++++++++++----- JuliaLowering/src/syntax_graph.jl | 12 ++++++--- JuliaLowering/test/compat.jl | 10 ++++---- JuliaLowering/test/functions_ir.jl | 4 +-- JuliaLowering/test/macros_ir.jl | 2 +- JuliaLowering/test/misc_ir.jl | 2 +- JuliaLowering/test/utils.jl | 2 +- 10 files changed, 70 insertions(+), 31 deletions(-) diff --git a/JuliaLowering/Project.toml b/JuliaLowering/Project.toml index 256ebf76965f3..2b01366509e8b 100644 --- a/JuliaLowering/Project.toml +++ b/JuliaLowering/Project.toml @@ -7,7 +7,7 @@ version = "1.0.0-DEV" JuliaSyntax = "70703baa-626e-46a2-a12c-08ffd08c73b4" [sources] -JuliaSyntax = {rev = "e02f29f", url = "https://github.com/JuliaLang/JuliaSyntax.jl"} +JuliaSyntax = {rev = "99e975a7", url = "https://github.com/JuliaLang/JuliaSyntax.jl"} [compat] julia = "1" diff --git a/JuliaLowering/src/ast.jl b/JuliaLowering/src/ast.jl index 5da635e9fd206..ae8f605a125c7 100644 --- a/JuliaLowering/src/ast.jl +++ b/JuliaLowering/src/ast.jl @@ -142,8 +142,8 @@ end function makeleaf(ctx, srcref, k::Kind, value; kws...) graph = syntax_graph(ctx) if k == K"Identifier" || k == K"core" || k == K"top" || k == K"Symbol" || - k == K"globalref" || k == K"Placeholder" || k == K"MacroName" || - k == K"StringMacroName" || k == K"CmdMacroName" + k == K"globalref" || k == K"Placeholder" || + k == K"StrMacroName" || k == K"CmdMacroName" makeleaf(graph, srcref, k; name_val=value, kws...) elseif k == K"BindingId" makeleaf(graph, srcref, k; var_id=value, kws...) diff --git a/JuliaLowering/src/compat.jl b/JuliaLowering/src/compat.jl index 6e57a64240f5a..bb2edc3ec10be 100644 --- a/JuliaLowering/src/compat.jl +++ b/JuliaLowering/src/compat.jl @@ -267,12 +267,12 @@ function _insert_convert_expr(@nospecialize(e), graph::SyntaxGraph, src::SourceA end deleteat!(child_exprs, 2) if a1 isa Symbol - child_exprs[1] = a1_esc(Expr(:MacroName, a1)) + child_exprs[1] = a1_esc(Expr(:macro_name, a1)) elseif a1 isa Expr && a1.head === :(.) a12,a12_esc = unwrap_esc(a1.args[2]) if a12 isa QuoteNode child_exprs[1] = a1_esc(Expr(:(.), a1.args[1], - Expr(:MacroName, a12_esc(a12.value)))) + Expr(:macro_name, a12_esc(a12.value)))) end elseif a1 isa GlobalRef && a1.mod === Core # Syntax-introduced macrocalls are listed here for reference. We @@ -415,7 +415,7 @@ function _insert_convert_expr(@nospecialize(e), graph::SyntaxGraph, src::SourceA end deleteat!(callargs, 2) c1,c1_esc = unwrap_esc(callargs[1]) - callargs[1] = c1_esc(Expr(:MacroName, c1)) + callargs[1] = c1_esc(Expr(:macro_name, c1)) else st_k = K"call" end @@ -523,13 +523,20 @@ function _insert_convert_expr(@nospecialize(e), graph::SyntaxGraph, src::SourceA end #--------------------------------------------------------------------------- - # Temporary heads introduced by us converting the parent expr - if e.head === :MacroName + # Possibly-temporary heads introduced by us converting the parent expr + if e.head === :macro_name @assert nargs === 1 - mac_name = string(e.args[1]) - mac_name = mac_name == "@__dot__" ? "@." : mac_name - st_id = _insert_tree_node(graph, K"MacroName", src, st_flags; name_val=mac_name) - return st_id, src + # Trim `@` for a correct SyntaxTree, although we need to add it back + # later for finding the macro + if e.args[1] === :(.) + mac_name = string(e.args[1][2]) + mac_name = mac_name == "@__dot__" ? "." : mac_name[2:end] + child_exprs[1] = Expr(:(.), e.args[1][1], Symbol(mac_name)) + else + mac_name = string(e.args[1]) + mac_name = mac_name == "@__dot__" ? "." : mac_name[2:end] + child_exprs[1] = Symbol(mac_name) + end elseif e.head === :catch_var_placeholder st_k = K"Placeholder" st_attrs[:name_val] = "" diff --git a/JuliaLowering/src/macro_expansion.jl b/JuliaLowering/src/macro_expansion.jl index fb8395e12e6d0..f378e874f40af 100644 --- a/JuliaLowering/src/macro_expansion.jl +++ b/JuliaLowering/src/macro_expansion.jl @@ -108,7 +108,12 @@ function Base.showerror(io::IO, exc::MacroExpansionError) print(io, "MacroExpansionError") ctx = exc.context if !isnothing(ctx) - print(io, " while expanding ", ctx.macrocall[1], + # Use `Expr` formatting to pretty print the macro name for now - + # there's quite a lot of special cases. We could alternatively consider + # calling sourcetext() though that won't work well if it's a + # synthetically-generated macro name path. + macname_str = string(Expr(:macrocall, Expr(ctx.macrocall[1]), nothing)) + print(io, " while expanding ", macname_str, " in module ", ctx.scope_layer.mod) end print(io, ":\n") @@ -137,11 +142,31 @@ function Base.showerror(io::IO, exc::MacroExpansionError) end end +function fixup_macro_name(ctx::MacroExpansionContext, ex::SyntaxTree) + k = kind(ex) + if k == K"StrMacroName" || k == K"CmdMacroName" + layerid = get(ex, :scope_layer, current_layer_id(ctx)) + newname = JuliaSyntax.lower_identifier_name(ex.name_val, k) + makeleaf(ctx, ex, ex, kind=K"Identifier", scope_layer=layerid, name_val=newname) + elseif k == K"macro_name" + @chk numchildren(ex) === 1 + if kind(ex[1]) === K"." + @ast ctx ex [K"." ex[1][1] [K"macro_name" ex[1][2]]] + else + layerid = get(ex, :scope_layer, current_layer_id(ctx)) + newname = JuliaSyntax.lower_identifier_name(ex[1].name_val, K"macro_name") + makeleaf(ctx, ex[1], ex[1], kind=kind(ex[1]), name_val=newname) + end + else + mapchildren(e->fixup_macro_name(ctx,e), ctx, ex) + end +end + function eval_macro_name(ctx::MacroExpansionContext, mctx::MacroContext, ex::SyntaxTree) # `ex1` might contain a nontrivial mix of scope layers so we can't just # `eval()` it, as it's already been partially lowered by this point. # Instead, we repeat the latter parts of `lower()` here. - ex1 = expand_forms_1(ctx, ex) + ex1 = expand_forms_1(ctx, fixup_macro_name(ctx, ex)) ctx2, ex2 = expand_forms_2(ctx, ex1) ctx3, ex3 = resolve_scopes(ctx2, ex2) ctx4, ex4 = convert_closures(ctx3, ex3) @@ -368,9 +393,10 @@ function expand_forms_1(ctx::MacroExpansionContext, ex::SyntaxTree) layerid = get(ex, :scope_layer, current_layer_id(ctx)) makeleaf(ctx, ex, ex, kind=K"Identifier", scope_layer=layerid) end - elseif k == K"Identifier" || k == K"MacroName" || k == K"StringMacroName" || k == K"CmdMacroName" - layerid = get(ex, :scope_layer, current_layer_id(ctx)) - makeleaf(ctx, ex, ex, kind=K"Identifier", scope_layer=layerid) + elseif k == K"StrMacroName" || k == K"CmdMacroName" || k == K"macro_name" + # These can appear outside of a macrocall, e.g. in `import` + e2 = fixup_macro_name(ctx, ex) + expand_forms_1(ctx, e2) elseif k == K"var" || k == K"char" || k == K"parens" # Strip "container" nodes @chk numchildren(ex) == 1 @@ -431,7 +457,7 @@ function expand_forms_1(ctx::MacroExpansionContext, ex::SyntaxTree) @ast ctx ex [K"." expand_forms_1(ctx, ex[1]) e2] elseif k == K"cmdstring" @chk numchildren(ex) == 1 - e2 = @ast ctx ex [K"macrocall" "@cmd"::K"core" ex[1]] + e2 = @ast ctx ex [K"macrocall" [K"macro_name" "cmd"::K"core"] ex[1]] expand_macro(ctx, e2) elseif (k == K"call" || k == K"dotcall") # Do some initial desugaring of call and dotcall here to simplify diff --git a/JuliaLowering/src/syntax_graph.jl b/JuliaLowering/src/syntax_graph.jl index 2ec127ef9d27b..d8dd51372cbca 100644 --- a/JuliaLowering/src/syntax_graph.jl +++ b/JuliaLowering/src/syntax_graph.jl @@ -457,7 +457,7 @@ attrsummary(name, value::Number) = "$name=$value" function _value_string(ex) k = kind(ex) - str = k in KSet"Identifier MacroName StringMacroName CmdMacroName" || is_operator(k) ? ex.name_val : + str = k in KSet"Identifier StrMacroName CmdMacroName" || is_operator(k) ? ex.name_val : k == K"Placeholder" ? ex.name_val : k == K"SSAValue" ? "%" : k == K"BindingId" ? "#" : @@ -611,11 +611,17 @@ function _find_SyntaxTree_macro(ex, line) # We're in the line range. Either if firstline == line && kind(c) == K"macrocall" && begin name = c[1] + if kind(name) == K"macro_name" + name = name[1] + end if kind(name) == K"." name = name[2] + if kind(name) == K"macro_name" + name = name[1] + end end - @assert kind(name) == K"MacroName" - name.name_val == "@SyntaxTree" + @assert kind(name) == K"Identifier" + name.name_val == "SyntaxTree" end # We find the node we're looking for. NB: Currently assuming a max # of one @SyntaxTree invocation per line. Though we could relax diff --git a/JuliaLowering/test/compat.jl b/JuliaLowering/test/compat.jl index 93736641bd715..71f5d3005f6ad 100644 --- a/JuliaLowering/test/compat.jl +++ b/JuliaLowering/test/compat.jl @@ -494,7 +494,7 @@ const JL = JuliaLowering # `@mac x` with macro name escaped @test JuliaLowering.expr_to_syntaxtree(Expr(:macrocall, esc(Symbol("@mac")), nothing, :x)) ≈ @ast_ [K"macrocall" - [K"escape" "@mac"::K"MacroName"] + [K"escape" [K"macro_name" "mac"::K"Identifier"]] "x"::K"Identifier" ] @@ -505,7 +505,7 @@ const JL = JuliaLowering [K"escape" [K"." "A"::K"Identifier" - "@mac"::K"MacroName" + [K"macro_name" "mac"::K"Identifier"] ] ] "x"::K"Identifier" @@ -577,7 +577,7 @@ const JL = JuliaLowering Expr(:macrocall, Expr(:var"hygienic-scope", Symbol("@mac"), :other, :args), nothing, :x)) ≈ @ast_ [K"macrocall" [K"hygienic_scope" - "@mac"::K"MacroName" + [K"macro_name" "mac"::K"Identifier"] "other"::K"Identifier" # (<- normally a Module) "args"::K"Identifier" # (<- normally a LineNumberNode) ] @@ -587,7 +587,7 @@ const JL = JuliaLowering # One example of double escaping @test JuliaLowering.expr_to_syntaxtree(Expr(:macrocall, esc(esc(Symbol("@mac"))), nothing, :x)) ≈ @ast_ [K"macrocall" - [K"escape" [K"escape" "@mac"::K"MacroName"]] + [K"escape" [K"escape" [K"macro_name" "mac"::K"Identifier"]]] "x"::K"Identifier" ] @@ -600,7 +600,7 @@ const JL = JuliaLowering @ast_ [K"macrocall" [K"hygienic_scope" [K"escape" - "@mac"::K"MacroName" + [K"macro_name" "mac"::K"Identifier"] ] "other"::K"Identifier" # (<- normally a Module) "args"::K"Identifier" # (<- normally a LineNumberNode) diff --git a/JuliaLowering/test/functions_ir.jl b/JuliaLowering/test/functions_ir.jl index d60a397bfd6da..ccce5ffbcf1bb 100644 --- a/JuliaLowering/test/functions_ir.jl +++ b/JuliaLowering/test/functions_ir.jl @@ -1532,7 +1532,7 @@ end 18 (call core.svec %₁₅ %₁₆ %₁₇) 19 --- method core.nothing %₁₈ slots: [slot₁/#self#(!read) slot₂/x(!read) slot₃/y(!read)] - 1 (meta :generated (new JuliaLowering.GeneratedFunctionStub TestMod.#f_only_generated@generator#0 SourceRef(SourceFile("@generated function f_only_generated(x, y)\n generator_code(x,y)\nend", 0, nothing, 1, [1, 44, 68]), 1, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"macrocall", 0x0080), 0x00000046, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"@", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"MacroName", 0x0000), 0x00000009, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"function", 0x0080), 0x0000003b, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"function", 0x0001), 0x00000008, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"call", 0x0080), 0x00000016, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000010, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"(", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K",", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K")", 0x0001), 0x00000001, nothing)]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"block", 0x0080), 0x00000019, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"NewlineWs", 0x0001), 0x00000005, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"call", 0x0080), 0x00000013, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x0000000e, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"(", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K",", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K")", 0x0001), 0x00000001, nothing)]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"NewlineWs", 0x0001), 0x00000001, nothing)]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"end", 0x0001), 0x00000003, nothing)])])) (call core.svec :#self# :x :y) (call core.svec))) + 1 (meta :generated (new JuliaLowering.GeneratedFunctionStub TestMod.#f_only_generated@generator#0 SourceRef(SourceFile("@generated function f_only_generated(x, y)\n generator_code(x,y)\nend", 0, nothing, 1, [1, 44, 68]), 1, (macrocall (macro_name 1-1::@-t 2-10::Identifier) 11-11::Whitespace-t (function 12-19::function-t 20-20::Whitespace-t (call 21-36::Identifier 37-37::(-t 38-38::Identifier 39-39::,-t 40-40::Whitespace-t 41-41::Identifier 42-42::)-t) (block 43-47::NewlineWs-t (call 48-61::Identifier 62-62::(-t 63-63::Identifier 64-64::,-t 65-65::Identifier 66-66::)-t) 67-67::NewlineWs-t) 68-70::end-t))) (call core.svec :#self# :x :y) (call core.svec))) 2 (meta :generated_only) 3 (return core.nothing) 20 latestworld @@ -1578,7 +1578,7 @@ end 18 (call core.svec %₁₅ %₁₆ %₁₇) 19 --- method core.nothing %₁₈ slots: [slot₁/#self#(!read) slot₂/x slot₃/y slot₄/maybe_gen_stuff slot₅/nongen_stuff] - 1 (meta :generated (new JuliaLowering.GeneratedFunctionStub TestMod.#f_partially_generated@generator#0 SourceRef(SourceFile("function f_partially_generated(x, y)\n nongen_stuff = bothgen(x, y)\n if @generated\n quote\n maybe_gen_stuff = some_gen_stuff(x, y)\n end\n else\n maybe_gen_stuff = some_nongen_stuff(x, y)\n end\n (nongen_stuff, maybe_gen_stuff)\nend", 0, nothing, 1, [1, 38, 71, 89, 103, 154, 166, 175, 225, 233, 269]), 1, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"function", 0x0080), 0x0000010f, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"function", 0x0001), 0x00000008, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"call", 0x0080), 0x0000001b, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000015, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"(", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K",", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K")", 0x0001), 0x00000001, nothing)]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"block", 0x0080), 0x000000e8, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"NewlineWs", 0x0001), 0x00000005, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"=", 0x0080), 0x0000001c, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x0000000c, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"=", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"call", 0x0080), 0x0000000d, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000007, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"(", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K",", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K")", 0x0001), 0x00000001, nothing)])]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"NewlineWs", 0x0001), 0x00000005, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"if", 0x0080), 0x0000009d, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"if", 0x0001), 0x00000002, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"macrocall", 0x0080), 0x0000000a, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"@", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"MacroName", 0x0000), 0x00000009, nothing)]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"block", 0x0080), 0x00000052, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"NewlineWs", 0x0001), 0x00000009, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"quote", 0x0080), 0x00000044, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"block", 0x0080), 0x00000044, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"quote", 0x0001), 0x00000005, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"NewlineWs", 0x0001), 0x0000000d, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"=", 0x0080), 0x00000026, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x0000000f, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"=", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"call", 0x0080), 0x00000014, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x0000000e, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"(", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K",", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K")", 0x0001), 0x00000001, nothing)])]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"NewlineWs", 0x0001), 0x00000009, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"end", 0x0001), 0x00000003, nothing)])]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"NewlineWs", 0x0001), 0x00000005, nothing)]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"else", 0x0001), 0x00000004, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"block", 0x0080), 0x00000037, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"NewlineWs", 0x0001), 0x00000009, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"=", 0x0080), 0x00000029, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x0000000f, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"=", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"call", 0x0080), 0x00000017, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000011, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"(", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K",", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K")", 0x0001), 0x00000001, nothing)])]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"NewlineWs", 0x0001), 0x00000005, nothing)]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"end", 0x0001), 0x00000003, nothing)]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"NewlineWs", 0x0001), 0x00000005, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"tuple", 0x0180), 0x0000001f, JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}[JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"(", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x0000000c, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K",", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Whitespace", 0x0001), 0x00000001, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"Identifier", 0x0000), 0x0000000f, nothing), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K")", 0x0001), 0x00000001, nothing)]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"NewlineWs", 0x0001), 0x00000001, nothing)]), JuliaSyntax.GreenNode{JuliaSyntax.SyntaxHead}(JuliaSyntax.SyntaxHead(K"end", 0x0001), 0x00000003, nothing)])) (call core.svec :#self# :x :y) (call core.svec))) + 1 (meta :generated (new JuliaLowering.GeneratedFunctionStub TestMod.#f_partially_generated@generator#0 SourceRef(SourceFile("function f_partially_generated(x, y)\n nongen_stuff = bothgen(x, y)\n if @generated\n quote\n maybe_gen_stuff = some_gen_stuff(x, y)\n end\n else\n maybe_gen_stuff = some_nongen_stuff(x, y)\n end\n (nongen_stuff, maybe_gen_stuff)\nend", 0, nothing, 1, [1, 38, 71, 89, 103, 154, 166, 175, 225, 233, 269]), 1, (function 1-8::function-t 9-9::Whitespace-t (call 10-30::Identifier 31-31::(-t 32-32::Identifier 33-33::,-t 34-34::Whitespace-t 35-35::Identifier 36-36::)-t) (block 37-41::NewlineWs-t (= 42-53::Identifier 54-54::Whitespace-t 55-55::=-t 56-56::Whitespace-t (call 57-63::Identifier 64-64::(-t 65-65::Identifier 66-66::,-t 67-67::Whitespace-t 68-68::Identifier 69-69::)-t)) 70-74::NewlineWs-t (if 75-76::if-t 77-77::Whitespace-t (macrocall (macro_name 78-78::@-t 79-87::Identifier)) (block 88-96::NewlineWs-t (quote (block 97-101::quote-t 102-114::NewlineWs-t (= 115-129::Identifier 130-130::Whitespace-t 131-131::=-t 132-132::Whitespace-t (call 133-146::Identifier 147-147::(-t 148-148::Identifier 149-149::,-t 150-150::Whitespace-t 151-151::Identifier 152-152::)-t)) 153-161::NewlineWs-t 162-164::end-t)) 165-169::NewlineWs-t) 170-173::else-t (block 174-182::NewlineWs-t (= 183-197::Identifier 198-198::Whitespace-t 199-199::=-t 200-200::Whitespace-t (call 201-217::Identifier 218-218::(-t 219-219::Identifier 220-220::,-t 221-221::Whitespace-t 222-222::Identifier 223-223::)-t)) 224-228::NewlineWs-t) 229-231::end-t) 232-236::NewlineWs-t (tuple-p 237-237::(-t 238-249::Identifier 250-250::,-t 251-251::Whitespace-t 252-266::Identifier 267-267::)-t) 268-268::NewlineWs-t) 269-271::end-t)) (call core.svec :#self# :x :y) (call core.svec))) 2 TestMod.bothgen 3 (= slot₅/nongen_stuff (call %₂ slot₂/x slot₃/y)) 4 TestMod.some_nongen_stuff diff --git a/JuliaLowering/test/macros_ir.jl b/JuliaLowering/test/macros_ir.jl index 506a6cca5de79..9547455418def 100644 --- a/JuliaLowering/test/macros_ir.jl +++ b/JuliaLowering/test/macros_ir.jl @@ -147,7 +147,7 @@ _never_exist = @m_not_exist 42 #--------------------- MacroExpansionError while expanding @m_not_exist in module Main.TestMod: _never_exist = @m_not_exist 42 -# └─────────┘ ── Macro not found +# └──────────┘ ── Macro not found Caused by: UndefVarError: `@m_not_exist` not defined in `Main.TestMod` Suggestion: check for spelling errors or missing imports. diff --git a/JuliaLowering/test/misc_ir.jl b/JuliaLowering/test/misc_ir.jl index 8acb0ada74957..920bb2d5e6bc3 100644 --- a/JuliaLowering/test/misc_ir.jl +++ b/JuliaLowering/test/misc_ir.jl @@ -308,7 +308,7 @@ GC.@preserve a b g() begin body end #--------------------- -MacroExpansionError while expanding (. GC @preserve) in module Main.TestMod: +MacroExpansionError while expanding GC.@preserve in module Main.TestMod: GC.@preserve a b g() begin # └─┘ ── Preserved variable must be a symbol body diff --git a/JuliaLowering/test/utils.jl b/JuliaLowering/test/utils.jl index e0a37947b48ea..4507f93a6b45a 100644 --- a/JuliaLowering/test/utils.jl +++ b/JuliaLowering/test/utils.jl @@ -160,7 +160,7 @@ end function format_ir_for_test(mod, case) ex = parsestmt(SyntaxTree, case.input) try - if kind(ex) == K"macrocall" && kind(ex[1]) == K"MacroName" && ex[1].name_val == "@ast_" + if kind(ex) == K"macrocall" && kind(ex[1]) == K"macro_name" && ex[1][1].name_val == "ast_" # Total hack, until @ast_ can be implemented in terms of new-style # macros. ex = Base.eval(mod, Expr(ex)) From fb48de1ad13caa3cc84aab139f4b41c1cf5416fb Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Fri, 19 Sep 2025 17:55:00 +1000 Subject: [PATCH 1079/1109] Interpret `module` expressions at top level in `eval()` This is a step toward an iteration interface for lowering which can return a sequence of CodeInfo to be evaluated for top level and module expressions. This also restricts lowering of module expressions to be syntactically at top level (ie, not inside a top level thunk), consistent with the existing way that they're handled in eval. --- JuliaLowering/src/desugaring.jl | 37 +---------------- JuliaLowering/src/eval.jl | 61 ++++++++++++++++++++++++---- JuliaLowering/src/macro_expansion.jl | 17 +++++--- JuliaLowering/src/runtime.jl | 37 +---------------- JuliaLowering/test/demo.jl | 2 +- JuliaLowering/test/misc_ir.jl | 30 +++----------- JuliaLowering/test/modules.jl | 27 +++++++++--- 7 files changed, 96 insertions(+), 115 deletions(-) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index bcd504e4ba274..84ccbac98c793 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -19,7 +19,7 @@ function DesugaringContext(ctx, expr_compat_mode::Bool) DesugaringContext(graph, ctx.bindings, ctx.scope_layers, - first(ctx.scope_layers).mod, + current_layer(ctx).mod, expr_compat_mode) end @@ -4270,39 +4270,6 @@ function expand_public(ctx, ex) ] end -#------------------------------------------------------------------------------- -# Expand module definitions - -function expand_module(ctx, ex::SyntaxTree) - modname_ex = ex[1] - @chk kind(modname_ex) == K"Identifier" - modname = modname_ex.name_val - - std_defs = !has_flags(ex, JuliaSyntax.BARE_MODULE_FLAG) - - body = ex[2] - @chk kind(body) == K"block" - - @ast ctx ex [K"block" - [K"assert" - "global_toplevel_only"::K"Symbol" - [K"inert" ex] - ] - [K"call" - eval_module ::K"Value" - ctx.mod ::K"Value" - modname ::K"String" - std_defs ::K"Bool" - ctx.expr_compat_mode ::K"Bool" - [K"inert"(body) - [K"toplevel" - children(body)... - ] - ] - ] - ] -end - #------------------------------------------------------------------------------- # Expand docstring-annotated expressions @@ -4477,7 +4444,7 @@ function expand_forms_2(ctx::DesugaringContext, ex::SyntaxTree, docs=nothing) elseif k == K"$" throw(LoweringError(ex, "`\$` expression outside string or quote block")) elseif k == K"module" - expand_module(ctx, ex) + throw(LoweringError(ex, "`module` is only allowed at top level")) elseif k == K"import" || k == K"using" expand_import_or_using(ctx, ex) elseif k == K"export" || k == K"public" diff --git a/JuliaLowering/src/eval.jl b/JuliaLowering/src/eval.jl index e3b4d6885922a..8e719249778e1 100644 --- a/JuliaLowering/src/eval.jl +++ b/JuliaLowering/src/eval.jl @@ -358,18 +358,63 @@ end #------------------------------------------------------------------------------- # Our version of eval takes our own data structures -@fzone "JL: eval" function eval(mod::Module, ex::SyntaxTree; expr_compat_mode::Bool=false) +@fzone "JL: eval" function eval(mod::Module, ex::SyntaxTree; + expr_compat_mode::Bool=false, macro_world=Base.get_world_counter()) + graph = ensure_macro_attributes(syntax_graph(ex)) + ctx = MacroExpansionContext(graph, mod, expr_compat_mode, macro_world) + _eval(ctx, ex) +end + +function _eval_stmts(ctx, exs) + res = nothing + for ex in exs + res = _eval(ctx, ex) + end + res +end + +function _eval_module_body(ctx, mod, ex) + new_layer = ScopeLayer(length(ctx.scope_layers)+1, mod, + current_layer_id(ctx), false) + push!(ctx.scope_layers, new_layer) + push!(ctx.scope_layer_stack, new_layer.id) + stmts = kind(ex[2]) == K"block" ? ex[2][1:end] : ex[2:2] + _eval_stmts(ctx, stmts) + pop!(ctx.scope_layer_stack) +end + +function _eval_module(ctx, ex) + # Here we just use `eval()` with an Expr to create a module. + # TODO: Refactor jl_eval_module_expr() in the runtime so that we can avoid + # eval. + std_defs = !has_flags(ex, JuliaSyntax.BARE_MODULE_FLAG) + newmod_name = Symbol(ex[1].name_val) + Core.eval(current_layer(ctx).mod, + Expr(:module, std_defs, newmod_name, + Expr(:block, Expr(:call, + newmod->_eval_module_body(ctx, newmod, ex), + newmod_name)))) +end + +function _eval(ctx, ex::SyntaxTree) k = kind(ex) if k == K"toplevel" - x = nothing - for e in children(ex) - x = eval(mod, e; expr_compat_mode) + _eval_stmts(ctx, children(ex)) + elseif k == K"module" + _eval_module(ctx, ex) + else + ex1 = expand_forms_1(ctx, ex) + if kind(ex1) in KSet"toplevel module" + _eval(ctx, ex1) + else + ctx2, ex2 = expand_forms_2(ctx, ex1) + ctx3, ex3 = resolve_scopes(ctx2, ex2) + ctx4, ex4 = convert_closures(ctx3, ex3) + ctx5, ex5 = linearize_ir(ctx4, ex4) + thunk = to_lowered_expr(ex5) + Core.eval(current_layer(ctx).mod, thunk) end - return x end - linear_ir = lower(mod, ex; expr_compat_mode) - thunk = to_lowered_expr(linear_ir) - Core.eval(mod, thunk) end """ diff --git a/JuliaLowering/src/macro_expansion.jl b/JuliaLowering/src/macro_expansion.jl index f378e874f40af..caeb027950345 100644 --- a/JuliaLowering/src/macro_expansion.jl +++ b/JuliaLowering/src/macro_expansion.jl @@ -520,23 +520,28 @@ function expand_forms_1(ctx::MacroExpansionContext, ex::SyntaxTree) end end +function ensure_macro_attributes(graph) + ensure_attributes(graph, + var_id=IdTag, + scope_layer=LayerId, + __macro_ctx__=Nothing, + meta=CompileHints) +end + @fzone "JL: macroexpand" function expand_forms_1(mod::Module, ex::SyntaxTree, expr_compat_mode::Bool, macro_world::UInt) if kind(ex) == K"local" # This error assumes we're expanding the body of a top level thunk but # we might want to make that more explicit in the pass system. throw(LoweringError(ex, "local declarations have no effect outside a scope")) end - graph = ensure_attributes(syntax_graph(ex), - var_id=IdTag, - scope_layer=LayerId, - __macro_ctx__=Nothing, - meta=CompileHints) + graph = ensure_macro_attributes(syntax_graph(ex)) ctx = MacroExpansionContext(graph, mod, expr_compat_mode, macro_world) ex2 = expand_forms_1(ctx, reparent(ctx, ex)) graph2 = delete_attributes(graph, :__macro_ctx__) # TODO: Returning the context with pass-specific mutable data is a bad way # to carry state into the next pass. We might fix this by attaching such # data to the graph itself as global attributes? - ctx2 = MacroExpansionContext(graph2, ctx.bindings, ctx.scope_layers, LayerId[], expr_compat_mode, macro_world) + ctx2 = MacroExpansionContext(graph2, ctx.bindings, ctx.scope_layers, ctx.scope_layer_stack, + expr_compat_mode, macro_world) return ctx2, reparent(ctx2, ex2) end diff --git a/JuliaLowering/src/runtime.jl b/JuliaLowering/src/runtime.jl index 0fe5631c4254a..ec175047a7735 100644 --- a/JuliaLowering/src/runtime.jl +++ b/JuliaLowering/src/runtime.jl @@ -207,35 +207,6 @@ end #-------------------------------------------------- # Functions which create modules or mutate their bindings -# Construct new bare module including only the "default names" -# -# using Core -# const modname = modval -# public modname -# -# And run statments in the toplevel expression `body` -function eval_module(parentmod::Module, modname::AbstractString, std_defs::Bool, - expr_compat_mode::Bool, body::SyntaxTree) - # Here we just use `eval()` with an Expr. - # If we wanted to avoid this we'd need to reproduce a lot of machinery from - # jl_eval_module_expr() - # - # 1. Register / deparent toplevel modules - # 2. Set binding in parent module - # 3. Deal with replacing modules - # * Warn if replacing - # * Root old module being replaced - # 4. Run __init__ - # * Also run __init__ for any children after parent is defined - # mod = @ccall jl_new_module(Symbol(modname)::Symbol, parentmod::Module)::Any - # ... - name = Symbol(modname) - eval_module_body(mod) = eval(mod, body; expr_compat_mode=expr_compat_mode) - Core.eval(parentmod, - Expr(:module, std_defs, name, - Expr(:block, Expr(:call, eval_module_body, name)))) -end - const _Base_has_eval_import = isdefined(Base, :_eval_import) function eval_import(imported::Bool, to::Module, from::Union{Expr, Nothing}, paths::Expr...) @@ -357,11 +328,7 @@ function (g::GeneratedFunctionStub)(world::UInt, source::Method, @nospecialize a graph = ensure_attributes(SyntaxGraph(), kind=Kind, syntax_flags=UInt16, source=SourceAttrType, value=Any, name_val=String) # Attributes for macro expansion - graph = ensure_attributes(graph, - var_id=IdTag, - scope_layer=LayerId, - __macro_ctx__=Nothing, - meta=CompileHints, + graph = ensure_attributes(ensure_macro_attributes(graph), # Additional attribute for resolve_scopes, for # adding our custom lambda below is_toplevel_thunk=Bool @@ -393,7 +360,7 @@ function (g::GeneratedFunctionStub)(world::UInt, source::Method, @nospecialize a ex1 = expand_forms_1(ctx1, reparent(ctx1, ex0)) ctx1 = MacroExpansionContext(delete_attributes(graph, :__macro_ctx__), ctx1.bindings, ctx1.scope_layers, - LayerId[], false, macro_world) + ctx1.scope_layer_stack, false, macro_world) ex1 = reparent(ctx1, ex1) # Desugaring diff --git a/JuliaLowering/test/demo.jl b/JuliaLowering/test/demo.jl index a6d12fe2d192d..aebb6f8d9b35b 100644 --- a/JuliaLowering/test/demo.jl +++ b/JuliaLowering/test/demo.jl @@ -893,7 +893,7 @@ ex = parsestmt(SyntaxTree, src, filename="foo.jl") ctx3, ex_scoped, ctx4, ex_converted, ctx5, ex_compiled, - ex_expr, eval_result) = debug_lower(M, ex; verbose=true) + ex_expr, eval_result) = debug_lower(M, ex; verbose=true, do_eval=true) # Automatic test reduction # bad = reduce_any_failing_toplevel(JuliaLowering, joinpath(@__DIR__, "../src/desugaring.jl")) diff --git a/JuliaLowering/test/misc_ir.jl b/JuliaLowering/test/misc_ir.jl index 920bb2d5e6bc3..a93e5ac194732 100644 --- a/JuliaLowering/test/misc_ir.jl +++ b/JuliaLowering/test/misc_ir.jl @@ -183,38 +183,18 @@ LoweringError: # └─┘ ── Invalid named tuple element ######################################## -# Module lowering -module Mod - body - stmts -end -#--------------------- -1 (call JuliaLowering.eval_module TestMod "Mod" true false (inert (toplevel body stmts))) -2 (return %₁) - -######################################## -# Bare module lowering -baremodule BareMod - body - stmts -end -#--------------------- -1 (call JuliaLowering.eval_module TestMod "BareMod" false false (inert (toplevel body stmts))) -2 (return %₁) - -######################################## -# Error: Modules not allowed in local scope -let +# Error: Modules not allowed inside blocks +begin module C end end #--------------------- LoweringError: -let +begin # ┌─────── module C end -#─────┘ ── module is only allowed in global scope +#─────┘ ── `module` is only allowed at top level end ######################################## @@ -229,7 +209,7 @@ function f() # ┌─────── module C end -#─────┘ ── module is only allowed in global scope +#─────┘ ── `module` is only allowed at top level end ######################################## diff --git a/JuliaLowering/test/modules.jl b/JuliaLowering/test/modules.jl index 541a609c24167..a68c5f8a8b6e2 100644 --- a/JuliaLowering/test/modules.jl +++ b/JuliaLowering/test/modules.jl @@ -1,4 +1,4 @@ -@testset "JuliaLowering.jl" begin +@testset "modules" begin test_mod = Module() @@ -26,12 +26,29 @@ end @test !isdefined(B, :eval) @test !isdefined(B, :Base) -# modules allowed in nested code in global scope -@test typeof(JuliaLowering.include_string(test_mod, """ -begin +# Module init order +Amod = JuliaLowering.include_string(test_mod, """ +module A + init_order = [] + __init__() = push!(init_order, "A") + module B + using ..A + __init__() = push!(A.init_order, "B") + end module C + using ..A + __init__() = push!(A.init_order, "C") + module D + using ...A + __init__() = push!(A.init_order, "D") + end + module E + using ...A + __init__() = push!(A.init_order, "E") + end end end -""")) == Module +""") +@test Amod.init_order == ["B", "D", "E", "C", "A"] end From 1576c5b13662635a119906b7528ad4c6b79d7964 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sat, 20 Sep 2025 10:54:14 +1000 Subject: [PATCH 1080/1109] Incremental lowering API Julia's incrementally evaluated top level semantics make it rather tricky to design a lowering interface for top level and module level expressions. Currently these expressions are effectively *interpreted* by eval rather than ever being processed by lowering. However, I'd like a cleaner separation between "low level evaluation" and lowering, such that Core can contain only the low level eval "driver function". I'd like to propose the split as follows: * "Low level" evaluation is about executing a sequence of thunks represented as `CodeInfo` and creating modules for those to be executed inside. * Lowering is about expression processing. In principle, the runtime's view of `eval()` shouldn't know about `Expr` or `SyntaxTree` (or whatever AST we use) - that should be left to the compiler frontend. A useful way to think about the duties of the frontend is to consider the question "What if we wanted to host another language on top of the Julia runtime?". If we can eventually achieve that without ever generating Julia `Expr` then we will have succeeded in separating the frontend. To implement all this I've recast lowering as an incremental iterative API in this change. Thus it's the job of `eval()` to simply evaluate thunks and create new modules as driven by lowering. (Perhaps we'd move this definition of `eval()` over to the Julia runtime before 1.13.) The iteration API is currently oddly bespoke and arguably somewhat non-Julian for two reasons: * Lowering knows when new modules are required, and may request them with `:begin_module`. However `eval()` generates those modules so they need to be passed back into lowering. So we can't just use `Base.iterate()`. (Put a different way, we have a situation which is suited to coroutines but we don't want to use full Julia `Task`s for this.) * We might want to implement this `eval()` in Julia's C runtime code or early in bootstrap. Hence using SimpleVector and Symbol as the return values of `lower_step()` We might consider changing at least the second of these choices, depending on how we end up integrating this into Base. --- JuliaLowering/src/eval.jl | 201 ++++++++++++++++++++------- JuliaLowering/src/macro_expansion.jl | 15 +- JuliaLowering/test/demo.jl | 2 +- 3 files changed, 165 insertions(+), 53 deletions(-) diff --git a/JuliaLowering/src/eval.jl b/JuliaLowering/src/eval.jl index 8e719249778e1..fe859ae0b52ed 100644 --- a/JuliaLowering/src/eval.jl +++ b/JuliaLowering/src/eval.jl @@ -1,3 +1,6 @@ +# Non-incremental lowering API for non-toplevel non-module expressions. +# May be removed? + function lower(mod::Module, ex0; expr_compat_mode=false, world=Base.get_world_counter()) ctx1, ex1 = expand_forms_1( mod, ex0, expr_compat_mode, world) ctx2, ex2 = expand_forms_2( ctx1, ex1) @@ -12,6 +15,96 @@ function macroexpand(mod::Module, ex; expr_compat_mode=false, world=Base.get_wor ex1 end +# Incremental lowering API which can manage toplevel and module expressions. +# +# This iteration API is oddly bespoke and arguably somewhat non-Julian for two +# reasons: +# +# * Lowering knows when new modules are required, and may request them with +# `:begin_module`. However `eval()` generates those modules so they need to +# be passed back into lowering. So we can't just use `Base.iterate()`. (Put a +# different way, we have a situation which is suited to coroutines but we +# don't want to use full Julia `Task`s for this.) +# * We might want to implement this `eval()` in Julia's C runtime code or early +# in bootstrap. Hence using SimpleVector and Symbol as the return values of +# `lower_step()` +# +# We might consider changing at least the second of these choices, depending on +# how we end up putting this into Base. + +struct LoweringIterator{GraphType} + ctx::MacroExpansionContext{GraphType} + todo::Vector{Tuple{SyntaxTree{GraphType}, Bool, Int}} +end + +function lower_init(ex::SyntaxTree, mod::Module, macro_world::UInt; expr_compat_mode::Bool=false) + graph = ensure_macro_attributes(syntax_graph(ex)) + ctx = MacroExpansionContext(graph, mod, expr_compat_mode, macro_world) + ex = reparent(ctx, ex) + LoweringIterator{typeof(graph)}(ctx, [(ex, false, 0)]) +end + +function lower_step(iter, push_mod=nothing) + if !isnothing(push_mod) + push_layer!(iter.ctx, push_mod, false) + end + + if isempty(iter.todo) + return Core.svec(:done) + end + + ex, is_module_body, child_idx = pop!(iter.todo) + if child_idx > 0 + next_child = child_idx + 1 + if child_idx <= numchildren(ex) + push!(iter.todo, (ex, is_module_body, next_child)) + ex = ex[child_idx] + else + if is_module_body + pop_layer!(iter.ctx) + return Core.svec(:end_module) + else + return lower_step(iter) + end + end + end + + k = kind(ex) + if !(k in KSet"toplevel module") + ex = expand_forms_1(iter.ctx, ex) + k = kind(ex) + end + if k == K"toplevel" + push!(iter.todo, (ex, false, 1)) + return lower_step(iter) + elseif k == K"module" + name = ex[1] + if kind(name) != K"Identifier" + throw(LoweringError(name, "Expected module name")) + end + newmod_name = Symbol(name.name_val) + body = ex[2] + if kind(body) != K"block" + throw(LoweringError(body, "Expected block in module body")) + end + std_defs = !has_flags(ex, JuliaSyntax.BARE_MODULE_FLAG) + loc = source_location(LineNumberNode, ex) + push!(iter.todo, (body, true, 1)) + return Core.svec(:begin_module, newmod_name, std_defs, loc) + else + # Non macro expansion parts of lowering + ctx2, ex2 = expand_forms_2(iter.ctx, ex) + ctx3, ex3 = resolve_scopes(ctx2, ex2) + ctx4, ex4 = convert_closures(ctx3, ex3) + ctx5, ex5 = linearize_ir(ctx4, ex4) + thunk = to_lowered_expr(ex5) + return Core.svec(:thunk, thunk) + end +end + + +#------------------------------------------------------------------------------- + function codeinfo_has_image_globalref(@nospecialize(e)) if e isa GlobalRef return 0x00 !== @ccall jl_object_in_image(e.mod::Any)::UInt8 @@ -274,7 +367,7 @@ function _to_lowered_expr(ex::SyntaxTree, stmt_offset::Int) elseif k == K"code_info" ir = to_code_info(ex[1], ex.slots) if ex.is_toplevel_thunk - Expr(:thunk, ir) + Expr(:thunk, ir) # TODO: Maybe nice to just return a CodeInfo here? else ir end @@ -357,64 +450,74 @@ function _to_lowered_expr(ex::SyntaxTree, stmt_offset::Int) end #------------------------------------------------------------------------------- -# Our version of eval takes our own data structures +# Our version of eval - should be upstreamed though? @fzone "JL: eval" function eval(mod::Module, ex::SyntaxTree; - expr_compat_mode::Bool=false, macro_world=Base.get_world_counter()) - graph = ensure_macro_attributes(syntax_graph(ex)) - ctx = MacroExpansionContext(graph, mod, expr_compat_mode, macro_world) - _eval(ctx, ex) + macro_world::UInt=Base.get_world_counter(), + opts...) + iter = lower_init(ex, mod, macro_world; opts...) + _eval(mod, iter) end -function _eval_stmts(ctx, exs) - res = nothing - for ex in exs - res = _eval(ctx, ex) +if VERSION >= v"1.13.0-DEV.1199" # https://github.com/JuliaLang/julia/pull/59604 + +function _eval(mod, iter) + modules = Module[] + new_mod = nothing + result = nothing + while true + thunk = lower_step(iter, new_mod)::Core.SimpleVector + new_mod = nothing + type = thunk[1]::Symbol + if type == :done + break + elseif type == :begin_module + push!(modules, mod) + mod = @ccall jl_begin_new_module(mod::Any, thunk[2]::Symbol, thunk[3]::Cint, + thunk[4].file::Cstring, thunk[4].line::Cint)::Module + new_mod = mod + elseif type == :end_module + @ccall jl_end_new_module(mod::Module)::Cvoid + result = mod + mod = pop!(modules) + else + @assert type == :thunk + result = Core.eval(mod, thunk[2]) + end end - res + @assert isempty(modules) + return result end -function _eval_module_body(ctx, mod, ex) - new_layer = ScopeLayer(length(ctx.scope_layers)+1, mod, - current_layer_id(ctx), false) - push!(ctx.scope_layers, new_layer) - push!(ctx.scope_layer_stack, new_layer.id) - stmts = kind(ex[2]) == K"block" ? ex[2][1:end] : ex[2:2] - _eval_stmts(ctx, stmts) - pop!(ctx.scope_layer_stack) -end - -function _eval_module(ctx, ex) - # Here we just use `eval()` with an Expr to create a module. - # TODO: Refactor jl_eval_module_expr() in the runtime so that we can avoid - # eval. - std_defs = !has_flags(ex, JuliaSyntax.BARE_MODULE_FLAG) - newmod_name = Symbol(ex[1].name_val) - Core.eval(current_layer(ctx).mod, - Expr(:module, std_defs, newmod_name, - Expr(:block, Expr(:call, - newmod->_eval_module_body(ctx, newmod, ex), - newmod_name)))) -end +else -function _eval(ctx, ex::SyntaxTree) - k = kind(ex) - if k == K"toplevel" - _eval_stmts(ctx, children(ex)) - elseif k == K"module" - _eval_module(ctx, ex) - else - ex1 = expand_forms_1(ctx, ex) - if kind(ex1) in KSet"toplevel module" - _eval(ctx, ex1) +function _eval(mod, iter, new_mod=nothing) + in_new_mod = !isnothing(new_mod) + result = nothing + while true + thunk = lower_step(iter, new_mod)::Core.SimpleVector + new_mod = nothing + type = thunk[1]::Symbol + if type == :done + @assert !in_new_mod + break + elseif type == :begin_module + name = thunk[2]::Symbol + std_defs = thunk[3] + result = Core.eval(mod, + Expr(:module, std_defs, name, + Expr(:block, thunk[4], Expr(:call, m->_eval(m, iter, m), name))) + ) + elseif type == :end_module + @assert in_new_mod + return mod else - ctx2, ex2 = expand_forms_2(ctx, ex1) - ctx3, ex3 = resolve_scopes(ctx2, ex2) - ctx4, ex4 = convert_closures(ctx3, ex3) - ctx5, ex5 = linearize_ir(ctx4, ex4) - thunk = to_lowered_expr(ex5) - Core.eval(current_layer(ctx).mod, thunk) + @assert type == :thunk + result = Core.eval(mod, thunk[2]) end end + return result +end + end """ diff --git a/JuliaLowering/src/macro_expansion.jl b/JuliaLowering/src/macro_expansion.jl index caeb027950345..a04cbacfef1e5 100644 --- a/JuliaLowering/src/macro_expansion.jl +++ b/JuliaLowering/src/macro_expansion.jl @@ -29,6 +29,16 @@ function MacroExpansionContext(graph::SyntaxGraph, mod::Module, expr_compat_mode MacroExpansionContext(graph, Bindings(), layers, LayerId[length(layers)], expr_compat_mode, world) end +function push_layer!(ctx::MacroExpansionContext, mod::Module, is_macro_expansion::Bool) + new_layer = ScopeLayer(length(ctx.scope_layers)+1, mod, + current_layer_id(ctx), is_macro_expansion) + push!(ctx.scope_layers, new_layer) + push!(ctx.scope_layer_stack, new_layer.id) +end +function pop_layer!(ctx::MacroExpansionContext) + pop!(ctx.scope_layer_stack) +end + current_layer(ctx::MacroExpansionContext) = ctx.scope_layers[last(ctx.scope_layer_stack)] current_layer_id(ctx::MacroExpansionContext) = last(ctx.scope_layer_stack) @@ -342,10 +352,9 @@ function expand_macro(ctx, ex) expanded = fix_toplevel_expansion(ctx, expanded, mod_for_ast, macro_loc) new_layer = ScopeLayer(length(ctx.scope_layers)+1, mod_for_ast, current_layer_id(ctx), true) - push!(ctx.scope_layers, new_layer) - push!(ctx.scope_layer_stack, new_layer.id) + push_layer!(ctx, mod_for_ast, true) expanded = expand_forms_1(ctx, expanded) - pop!(ctx.scope_layer_stack) + pop_layer!(ctx) end return expanded end diff --git a/JuliaLowering/test/demo.jl b/JuliaLowering/test/demo.jl index aebb6f8d9b35b..0b2fe25a82cc2 100644 --- a/JuliaLowering/test/demo.jl +++ b/JuliaLowering/test/demo.jl @@ -74,7 +74,7 @@ end #------------------------------------------------------------------------------- # Module containing macros used in the demo. -define_macros = true +define_macros = false if !define_macros eval(:(module M end)) else From 355b18b9a77d8ec06401a5768afb9bba917678d6 Mon Sep 17 00:00:00 2001 From: Cody Tapscott <84105208+topolarity@users.noreply.github.com> Date: Thu, 2 Oct 2025 10:53:08 -0400 Subject: [PATCH 1081/1109] Add `:no_constprop` as expected `Expr(:meta, ...)` argument (JuliaLang/JuliaLowering.jl#88) --- JuliaLowering/src/compat.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JuliaLowering/src/compat.jl b/JuliaLowering/src/compat.jl index bb2edc3ec10be..e683e6f23d42b 100644 --- a/JuliaLowering/src/compat.jl +++ b/JuliaLowering/src/compat.jl @@ -472,7 +472,7 @@ function _insert_convert_expr(@nospecialize(e), graph::SyntaxGraph, src::SourceA end elseif e.args[1] in (:inline, :noinline, :generated, :generated_only, :max_methods, :optlevel, :toplevel, :push_loc, :pop_loc, - :aggressive_constprop, :specialize, :compile, :infer, + :no_constprop, :aggressive_constprop, :specialize, :compile, :infer, :nospecializeinfer, :force_compile, :doc) # TODO: Some need to be handled in lowering child_exprs[1] = Expr(:quoted_symbol, e.args[1]) From 5f68f629d8dbc78258854bf709bf9a0c332ef72b Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki <40514306+aviatesk@users.noreply.github.com> Date: Fri, 3 Oct 2025 15:33:15 +0900 Subject: [PATCH 1082/1109] Lower cglobal to GlobalRef(Core.Intrinsics, :cglobal) (JuliaLang/JuliaLowering.jl#86) Fix the lowering of `cglobal` to produce `GlobalRef(Core.Intrinsics, :cglobal)` instead of a bare symbol `:cglobal`. The inference validator requires cglobal to be a GlobalRef: https://github.com/JuliaLang/julia/blob/7a8cd6e202f1d1216a6c0c0b928fb43a123cada8/Compiler/src/validation.jl#L87 With this commit `_to_lowered_expr` resolves `cglobal` to `GlobalRef(Core.Intrinsics, :cglobal)`, matching Julia's builtin lowerer behavior and satisfying the inference validator's requirements. --- JuliaLowering/src/eval.jl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/JuliaLowering/src/eval.jl b/JuliaLowering/src/eval.jl index fe859ae0b52ed..808143e55003b 100644 --- a/JuliaLowering/src/eval.jl +++ b/JuliaLowering/src/eval.jl @@ -331,8 +331,10 @@ function _to_lowered_expr(ex::SyntaxTree, stmt_offset::Int) elseif k == K"core" name = ex.name_val if name == "cglobal" - # cglobal isn't a true name within core - instead it's a builtin - :cglobal + # Inference expects cglobal as call argument to be `GlobalRef`, + # so we resolve that name as a symbol of `Core.Intrinsics` here. + # https://github.com/JuliaLang/julia/blob/7a8cd6e202f1d1216a6c0c0b928fb43a123cada8/Compiler/src/validation.jl#L87 + GlobalRef(Core.Intrinsics, :cglobal) elseif name == "nothing" # Translate Core.nothing into literal `nothing`s (flisp uses a # special form (null) for this during desugaring, etc) From 1ed4b93db7b44a9a7d0932d45a46ca50aa20950d Mon Sep 17 00:00:00 2001 From: Em Chu <61633163+mlechu@users.noreply.github.com> Date: Sat, 4 Oct 2025 00:23:48 +0200 Subject: [PATCH 1083/1109] Preserve method-related meta forms through lowering (JuliaLang/JuliaLowering.jl#90) Co-authored-by: Shuhei Kadowaki --- JuliaLowering/src/ast.jl | 8 ++++++ JuliaLowering/src/compat.jl | 50 +++++++++++++++++++++------------- JuliaLowering/src/eval.jl | 30 +++++++++++--------- JuliaLowering/src/linear_ir.jl | 21 ++++++++++---- JuliaLowering/test/macros.jl | 44 ++++++++++++++++++++++++++++++ 5 files changed, 116 insertions(+), 37 deletions(-) diff --git a/JuliaLowering/src/ast.jl b/JuliaLowering/src/ast.jl index ae8f605a125c7..34ac2939a42fc 100644 --- a/JuliaLowering/src/ast.jl +++ b/JuliaLowering/src/ast.jl @@ -519,6 +519,14 @@ end # the middle of a pass. const CompileHints = Base.ImmutableDict{Symbol,Any} +function CompileHints(d::Dict{Symbol, Any}) + id = CompileHints() + for (k, v) in d + id = CompileHints(id, k, v) + end + id +end + function setmeta!(ex::SyntaxTree; kws...) @assert length(kws) == 1 # todo relax later ? key = first(keys(kws)) diff --git a/JuliaLowering/src/compat.jl b/JuliaLowering/src/compat.jl index e683e6f23d42b..91037bc256b36 100644 --- a/JuliaLowering/src/compat.jl +++ b/JuliaLowering/src/compat.jl @@ -459,27 +459,39 @@ function _insert_convert_expr(@nospecialize(e), graph::SyntaxGraph, src::SourceA st_k = K"hygienic_scope" elseif e.head === :meta # Messy and undocumented. Only sometimes we want a K"meta". - @assert e.args[1] isa Symbol - if e.args[1] === :nospecialize - if nargs > 2 - st_k = K"block" - # Kick the can down the road (should only be simple atoms?) - child_exprs = map(c->Expr(:meta, :nospecialize, c), child_exprs[2:end]) + if e.args[1] isa Expr && e.args[1].head === :purity + st_k = K"meta" + child_exprs = [Expr(:quoted_symbol, :purity), Base.EffectsOverride(e.args[1].args...)] + else + @assert e.args[1] isa Symbol + if e.args[1] === :nospecialize + if nargs > 2 + st_k = K"block" + # Kick the can down the road (should only be simple atoms?) + child_exprs = map(c->Expr(:meta, :nospecialize, c), child_exprs[2:end]) + else + st_id, src = _insert_convert_expr(e.args[2], graph, src) + setmeta!(SyntaxTree(graph, st_id); nospecialize=true) + return st_id, src + end + elseif e.args[1] in (:inline, :noinline, :generated, :generated_only, + :max_methods, :optlevel, :toplevel, :push_loc, :pop_loc, + :no_constprop, :aggressive_constprop, :specialize, :compile, :infer, + :nospecializeinfer, :force_compile, :propagate_inbounds, :doc) + # TODO: Some need to be handled in lowering + for (i, ma) in enumerate(e.args) + if ma isa Symbol + # @propagate_inbounds becomes (meta inline + # propagate_inbounds), but usually(?) only args[1] is + # converted here + child_exprs[i] = Expr(:quoted_symbol, e.args[i]) + end + end else - st_id, src = _insert_convert_expr(e.args[2], graph, src) - setmeta!(SyntaxTree(graph, st_id); nospecialize=true) - return st_id, src + # Can't throw a hard error; it is explicitly tested that meta can take arbitrary keys. + @error("Unknown meta form at $src: `$e`\n$(sprint(dump, e))") + child_exprs[1] = Expr(:quoted_symbol, e.args[1]) end - elseif e.args[1] in (:inline, :noinline, :generated, :generated_only, - :max_methods, :optlevel, :toplevel, :push_loc, :pop_loc, - :no_constprop, :aggressive_constprop, :specialize, :compile, :infer, - :nospecializeinfer, :force_compile, :doc) - # TODO: Some need to be handled in lowering - child_exprs[1] = Expr(:quoted_symbol, e.args[1]) - else - # Can't throw a hard error; it is explicitly tested that meta can take arbitrary keys. - @error("Unknown meta form at $src: `$e`\n$(sprint(dump, e))") - child_exprs[1] = Expr(:quoted_symbol, e.args[1]) end elseif e.head === :scope_layer @assert nargs === 2 diff --git a/JuliaLowering/src/eval.jl b/JuliaLowering/src/eval.jl index 808143e55003b..e7ed94e180407 100644 --- a/JuliaLowering/src/eval.jl +++ b/JuliaLowering/src/eval.jl @@ -218,7 +218,7 @@ end # Convert SyntaxTree to the CodeInfo+Expr data stuctures understood by the # Julia runtime -function to_code_info(ex::SyntaxTree, slots::Vector{Slot}) +function to_code_info(ex::SyntaxTree, slots::Vector{Slot}, meta::CompileHints) stmts = Any[] current_codelocs_stack = ir_debug_info_state(ex) @@ -267,18 +267,22 @@ function to_code_info(ex::SyntaxTree, slots::Vector{Slot}) # - call site @assume_effects ssaflags = zeros(UInt32, length(stmts)) - # TODO: Set true for @propagate_inbounds - propagate_inbounds = false + propagate_inbounds = + get(meta, :propagate_inbounds, false) # TODO: Set true if there's a foreigncall - has_fcall = false - # TODO: Set for @nospecializeinfer - nospecializeinfer = false - # TODO: Set based on @inline -> 0x01 or @noinline -> 0x02 - inlining = 0x00 - # TODO: Set based on @constprop :aggressive -> 0x01 or @constprop :none -> 0x02 - constprop = 0x00 - # TODO: Set based on Base.@assume_effects - purity = 0x0000 + has_fcall = false + nospecializeinfer = + get(meta, :nospecializeinfer, false) + inlining = + get(meta, :inline, false) ? 0x01 : + get(meta, :noinline, false) ? 0x02 : 0x00 + constprop = + get(meta, :aggressive_constprop, false) ? 0x01 : + get(meta, :no_constprop, false) ? 0x02 : 0x00 + purity = + let eo = get(meta, :purity, nothing) + isnothing(eo) ? 0x0000 : Base.encode_effects_override(eo) + end # The following CodeInfo fields always get their default values for # uninferred code. @@ -367,7 +371,7 @@ function _to_lowered_expr(ex::SyntaxTree, stmt_offset::Int) e1 = ex[1] getmeta(ex, :as_Expr, false) ? QuoteNode(Expr(e1)) : e1 elseif k == K"code_info" - ir = to_code_info(ex[1], ex.slots) + ir = to_code_info(ex[1], ex.slots, ex.meta) if ex.is_toplevel_thunk Expr(:thunk, ir) # TODO: Maybe nice to just return a CodeInfo here? else diff --git a/JuliaLowering/src/linear_ir.jl b/JuliaLowering/src/linear_ir.jl index c1133145f053c..d15214641fa99 100644 --- a/JuliaLowering/src/linear_ir.jl +++ b/JuliaLowering/src/linear_ir.jl @@ -78,6 +78,7 @@ struct LinearIRContext{GraphType} <: AbstractLoweringContext finally_handlers::Vector{FinallyHandler{GraphType}} symbolic_jump_targets::Dict{String,JumpTarget{GraphType}} symbolic_jump_origins::Vector{JumpOrigin{GraphType}} + meta::Dict{Symbol, Any} mod::Module end @@ -89,7 +90,7 @@ function LinearIRContext(ctx, is_toplevel_thunk, lambda_bindings, return_type) is_toplevel_thunk, lambda_bindings, rett, Dict{String,JumpTarget{GraphType}}(), SyntaxList(ctx), SyntaxList(ctx), Vector{FinallyHandler{GraphType}}(), Dict{String,JumpTarget{GraphType}}(), - Vector{JumpOrigin{GraphType}}(), ctx.mod) + Vector{JumpOrigin{GraphType}}(), Dict{Symbol, Any}(), ctx.mod) end function current_lambda_bindings(ctx::LinearIRContext) @@ -807,7 +808,17 @@ function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) nothing end elseif k == K"meta" - emit(ctx, ex) + @chk numchildren(ex) >= 1 + if ex[1].name_val in ("inline", "noinline", "propagate_inbounds", + "nospecializeinfer", "aggressive_constprop", "no_constprop") + for c in children(ex) + ctx.meta[Symbol(c.name_val)] = true + end + elseif ex[1].name_val === "purity" + ctx.meta[Symbol(ex[1].name_val)] = ex[2].value::Base.EffectsOverride + else + emit(ctx, ex) + end if needs_value val = @ast ctx ex "nothing"::K"core" if in_tail_pos @@ -1099,10 +1110,9 @@ function compile_lambda(outer_ctx, ex) @assert info.kind == :static_parameter slot_rewrites[id] = i end - # @info "" @ast ctx ex [K"block" ctx.code...] code = renumber_body(ctx, ctx.code, slot_rewrites) @ast ctx ex [K"code_info"(is_toplevel_thunk=ex.is_toplevel_thunk, - slots=slots) + slots=slots, meta=CompileHints(ctx.meta)) [K"block"(ex[3]) code... ] @@ -1131,7 +1141,8 @@ loops, etc) to gotos and exception handling to enter/leave. We also convert SyntaxList(graph), SyntaxList(graph), Vector{FinallyHandler{GraphType}}(), Dict{String, JumpTarget{GraphType}}(), - Vector{JumpOrigin{GraphType}}(), ctx.mod) + Vector{JumpOrigin{GraphType}}(), + Dict{Symbol, Any}(), ctx.mod) res = compile_lambda(_ctx, reparent(_ctx, ex)) _ctx, res end diff --git a/JuliaLowering/test/macros.jl b/JuliaLowering/test/macros.jl index 5fd42faa8b435..21212a3089577 100644 --- a/JuliaLowering/test/macros.jl +++ b/JuliaLowering/test/macros.jl @@ -362,4 +362,48 @@ end @test test_mod.X1 isa Enum end +@testset "macros producing meta forms" begin + function find_method_ci(thunk) + ci = thunk.args[1]::Core.CodeInfo + m = findfirst(x->(x isa Expr && x.head === :method && length(x.args) === 3), ci.code) + ci.code[m].args[3] + end + jlower_e(s) = JuliaLowering.to_lowered_expr( + JuliaLowering.lower( + test_mod, JuliaLowering.parsestmt( + JuliaLowering.SyntaxTree, s); + expr_compat_mode=true)) + + prog = "Base.@assume_effects :foldable function foo(); end" + ref = Meta.lower(test_mod, Meta.parse(prog)) + our = jlower_e(prog) + @test find_method_ci(ref).purity === find_method_ci(our).purity + + prog = "Base.@inline function foo(); end" + ref = Meta.lower(test_mod, Meta.parse(prog)) + our = jlower_e(prog) + @test find_method_ci(ref).inlining === find_method_ci(our).inlining + + prog = "Base.@noinline function foo(); end" + ref = Meta.lower(test_mod, Meta.parse(prog)) + our = jlower_e(prog) + @test find_method_ci(ref).inlining === find_method_ci(our).inlining + + prog = "Base.@constprop :none function foo(); end" + ref = Meta.lower(test_mod, Meta.parse(prog)) + our = jlower_e(prog) + @test find_method_ci(ref).constprop === find_method_ci(our).constprop + + prog = "Base.@nospecializeinfer function foo(); end" + ref = Meta.lower(test_mod, Meta.parse(prog)) + our = jlower_e(prog) + @test find_method_ci(ref).nospecializeinfer === find_method_ci(our).nospecializeinfer + + prog = "Base.@propagate_inbounds function foo(); end" + ref = Meta.lower(test_mod, Meta.parse(prog)) + our = jlower_e(prog) + @test find_method_ci(ref).propagate_inbounds === find_method_ci(our).propagate_inbounds + +end + end From 1c215f7fec7b6f57d4a1f2ae678bdca36f9fbcd7 Mon Sep 17 00:00:00 2001 From: Sam Schweigel Date: Wed, 8 Oct 2025 11:59:25 -0700 Subject: [PATCH 1084/1109] New Core.declare_global and Core.declare_const lowering (JuliaLang/JuliaLowering.jl#87) * Support `const foo() = ...` * Add support for destructuring `const` * Generate Core.declare_const; make constdecl a lowering-only kind * Generate Core.declare_global; remove globaldecl kind, desugar global Corresponds to JuliaLang/JuliaLowering.jl#58279 (also take unused_only) * Refresh IR test cases * Update README * Fix toplevel_pure * Random typo fix * Use Core.declare_const instead of jl_set_const * Don't test on 1.12 in CI * Update test/decls.jl Co-authored-by: Em Chu <61633163+mlechu@users.noreply.github.com> * Expand global/local function def body properly * Add a handful more IR tests for declarations * Add tests for JuliaLang/julia#59755 --------- Co-authored-by: Em Chu <61633163+mlechu@users.noreply.github.com> --- JuliaLowering/.github/workflows/CI.yml | 1 - JuliaLowering/README.md | 2 - JuliaLowering/src/closure_conversion.jl | 70 ++++++++++++--- JuliaLowering/src/compat.jl | 3 +- JuliaLowering/src/desugaring.jl | 110 ++++++++++++++---------- JuliaLowering/src/eval.jl | 3 - JuliaLowering/src/kinds.jl | 11 ++- JuliaLowering/src/linear_ir.jl | 48 +++++------ JuliaLowering/src/runtime.jl | 7 +- JuliaLowering/src/scope_analysis.jl | 23 +++-- JuliaLowering/test/assignments_ir.jl | 2 +- JuliaLowering/test/closures_ir.jl | 2 +- JuliaLowering/test/decls.jl | 27 +++++- JuliaLowering/test/decls_ir.jl | 69 ++++++++------- JuliaLowering/test/misc_ir.jl | 29 +++++++ JuliaLowering/test/scopes_ir.jl | 106 ++++++++++++++++++++++- JuliaLowering/test/typedefs_ir.jl | 72 ++++++++-------- JuliaLowering/test/utils.jl | 3 +- 18 files changed, 404 insertions(+), 184 deletions(-) diff --git a/JuliaLowering/.github/workflows/CI.yml b/JuliaLowering/.github/workflows/CI.yml index 49cf0a37c2231..7ed1133e01315 100644 --- a/JuliaLowering/.github/workflows/CI.yml +++ b/JuliaLowering/.github/workflows/CI.yml @@ -19,7 +19,6 @@ jobs: fail-fast: false matrix: version: - - '~1.12.0-rc1' - 'nightly' os: - ubuntu-latest diff --git a/JuliaLowering/README.md b/JuliaLowering/README.md index 0918fb0fd540d..a420702752b30 100644 --- a/JuliaLowering/README.md +++ b/JuliaLowering/README.md @@ -898,8 +898,6 @@ In the current Julia runtime, * `:import` * `:public` * `:export` - * `:global` - * `:const` * `:toplevel` * `:error` * `:incomplete` diff --git a/JuliaLowering/src/closure_conversion.jl b/JuliaLowering/src/closure_conversion.jl index d2820c08387c9..99aecd3f83c97 100644 --- a/JuliaLowering/src/closure_conversion.jl +++ b/JuliaLowering/src/closure_conversion.jl @@ -18,6 +18,11 @@ struct ClosureConversionCtx{GraphType} <: AbstractLoweringContext # True if we're in a section of code which preserves top-level sequencing # such that closure types can be emitted inline with other code. is_toplevel_seq_point::Bool + # True if this expression should not have toplevel effects, namely, it + # should not declare the globals it references. This allows generated + # functions to refer to globals that have already been declared, without + # triggering the "function body AST not pure" error. + toplevel_pure::Bool toplevel_stmts::SyntaxList{GraphType} closure_infos::Dict{IdTag,ClosureInfo{GraphType}} end @@ -27,7 +32,8 @@ function ClosureConversionCtx(graph::GraphType, bindings::Bindings, lambda_bindings::LambdaBindings) where {GraphType} ClosureConversionCtx{GraphType}( graph, bindings, mod, closure_bindings, nothing, - lambda_bindings, false, SyntaxList(graph), Dict{IdTag,ClosureInfo{GraphType}}()) + lambda_bindings, false, true, SyntaxList(graph), + Dict{IdTag,ClosureInfo{GraphType}}()) end function current_lambda_bindings(ctx::ClosureConversionCtx) @@ -117,10 +123,39 @@ function convert_for_type_decl(ctx, srcref, ex, type, do_typeassert) ] end +# TODO: Avoid producing redundant calls to declare_global +function make_globaldecl(ctx, src_ex, mod, name, strong=false, type=nothing; ret_nothing=false) + if !ctx.toplevel_pure + decl = @ast ctx src_ex [K"block" + [K"call" + "declare_global"::K"core" + mod::K"Value" name::K"Symbol" strong::K"Bool" + if type !== nothing + type + end + ] + [K"latestworld"] + @ast ctx src_ex [K"removable" "nothing"::K"core"] + ] + if ctx.is_toplevel_seq_point + return decl + else + push!(ctx.toplevel_stmts, decl) + end + end + if ret_nothing + nothing + else + @ast ctx src_ex [K"removable" "nothing"::K"core"] + end +end + function convert_global_assignment(ctx, ex, var, rhs0) binfo = lookup_binding(ctx, var) @assert binfo.kind == :global stmts = SyntaxList(ctx) + decl = make_globaldecl(ctx, ex, binfo.mod, binfo.name, true; ret_nothing=true) + decl !== nothing && push!(stmts, decl) rhs1 = if is_simple_atom(ctx, rhs0) rhs0 else @@ -147,7 +182,6 @@ function convert_global_assignment(ctx, ex, var, rhs0) end push!(stmts, @ast ctx ex [K"=" var rhs]) @ast ctx ex [K"block" - [K"globaldecl" var] stmts... rhs1 ] @@ -296,7 +330,7 @@ function map_cl_convert(ctx::ClosureConversionCtx, ex, toplevel_preserving) toplevel_stmts = SyntaxList(ctx) ctx2 = ClosureConversionCtx(ctx.graph, ctx.bindings, ctx.mod, ctx.closure_bindings, ctx.capture_rewriting, ctx.lambda_bindings, - false, toplevel_stmts, ctx.closure_infos) + false, ctx.toplevel_pure, toplevel_stmts, ctx.closure_infos) res = mapchildren(e->_convert_closures(ctx2, e), ctx2, ex) if isempty(toplevel_stmts) res @@ -352,16 +386,24 @@ function _convert_closures(ctx::ClosureConversionCtx, ex) @assert kind(ex[1]) == K"BindingId" binfo = lookup_binding(ctx, ex[1]) if binfo.kind == :global - @ast ctx ex [K"block" - # flisp has this, but our K"assert" handling is in a previous pass - # [K"assert" "toplevel_only"::K"Symbol" [K"inert" ex]] - [K"globaldecl" - ex[1] - _convert_closures(ctx, ex[2])] - "nothing"::K"core"] + # flisp has this, but our K"assert" handling is in a previous pass + # [K"assert" "toplevel_only"::K"Symbol" [K"inert" ex]] + make_globaldecl(ctx, ex, binfo.mod, binfo.name, true, _convert_closures(ctx, ex[2])) else makeleaf(ctx, ex, K"TOMBSTONE") end + elseif k == K"global" + # Leftover `global` forms become weak globals. + mod, name = if kind(ex[1]) == K"BindingId" + binfo = lookup_binding(ctx, ex[1]) + @assert binfo.kind == :global + binfo.mod, binfo.name + else + # See note about using eval on Expr(:global/:const, GlobalRef(...)) + @assert ex[1].value isa GlobalRef + ex[1].value.mod, String(ex[1].value.name) + end + @ast ctx ex [K"unused_only" make_globaldecl(ctx, ex, mod, name, false)] elseif k == K"local" var = ex[1] binfo = lookup_binding(ctx, var) @@ -453,7 +495,8 @@ function _convert_closures(ctx::ClosureConversionCtx, ex) cap_rewrite = is_closure ? ctx.closure_infos[name.var_id] : nothing ctx2 = ClosureConversionCtx(ctx.graph, ctx.bindings, ctx.mod, ctx.closure_bindings, cap_rewrite, ctx.lambda_bindings, - ctx.is_toplevel_seq_point, ctx.toplevel_stmts, ctx.closure_infos) + ctx.is_toplevel_seq_point, ctx.toplevel_pure, ctx.toplevel_stmts, + ctx.closure_infos) body = map_cl_convert(ctx2, ex[2], false) if is_closure if ctx.is_toplevel_seq_point @@ -478,7 +521,7 @@ function _convert_closures(ctx::ClosureConversionCtx, ex) ctx2 = ClosureConversionCtx(ctx.graph, ctx.bindings, ctx.mod, ctx.closure_bindings, capture_rewrites, ctx.lambda_bindings, - false, ctx.toplevel_stmts, ctx.closure_infos) + false, ctx.toplevel_pure, ctx.toplevel_stmts, ctx.closure_infos) init_closure_args = SyntaxList(ctx) for id in field_orig_bindings @@ -521,7 +564,8 @@ function closure_convert_lambda(ctx, ex) end ctx2 = ClosureConversionCtx(ctx.graph, ctx.bindings, ctx.mod, ctx.closure_bindings, cap_rewrite, lambda_bindings, - ex.is_toplevel_thunk, ctx.toplevel_stmts, ctx.closure_infos) + ex.is_toplevel_thunk, ctx.toplevel_pure && ex.toplevel_pure, + ctx.toplevel_stmts, ctx.closure_infos) lambda_children = SyntaxList(ctx) args = ex[1] push!(lambda_children, args) diff --git a/JuliaLowering/src/compat.jl b/JuliaLowering/src/compat.jl index 91037bc256b36..e9800d0a7ab48 100644 --- a/JuliaLowering/src/compat.jl +++ b/JuliaLowering/src/compat.jl @@ -25,7 +25,8 @@ function expr_to_syntaxtree(@nospecialize(e), lnn::Union{LineNumberNode, Nothing kind=Kind, syntax_flags=UInt16, source=SourceAttrType, var_id=Int, value=Any, name_val=String, is_toplevel_thunk=Bool, - scope_layer=LayerId, meta=CompileHints) + scope_layer=LayerId, meta=CompileHints, + toplevel_pure=Bool) expr_to_syntaxtree(graph, e, lnn) end diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 84ccbac98c793..716a8a60648d8 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -15,7 +15,8 @@ function DesugaringContext(ctx, expr_compat_mode::Bool) value=Any, name_val=String, scope_type=Symbol, # :hard or :soft var_id=IdTag, - is_toplevel_thunk=Bool) + is_toplevel_thunk=Bool, + toplevel_pure=Bool) DesugaringContext(graph, ctx.bindings, ctx.scope_layers, @@ -108,9 +109,10 @@ end # constructed followed by destructuring. In particular, any side effects due to # evaluating the individual terms in the right hand side tuple must happen in # order. -function tuple_to_assignments(ctx, ex) +function tuple_to_assignments(ctx, ex, is_const) lhs = ex[1] rhs = ex[2] + wrap(asgn) = is_const ? (@ast ctx ex [K"const" asgn]) : asgn # Tuple elimination aims to turn assignments between tuples into lists of assignments. # @@ -187,12 +189,12 @@ function tuple_to_assignments(ctx, ex) # (x, ys...) = (a,b,c) # (x, ys...) = (a,bs...) # (ys...) = () - push!(stmts, @ast ctx ex [K"=" lh[1] middle]) + push!(stmts, wrap(@ast ctx ex [K"=" lh[1] middle])) else # (x, ys..., z) = (a, b, c, d) # (x, ys..., z) = (a, bs...) # (xs..., y) = (a, bs...) - push!(stmts, @ast ctx ex [K"=" [K"tuple" lhs[il:jl]...] middle]) + push!(stmts, wrap(@ast ctx ex [K"=" [K"tuple" lhs[il:jl]...] middle])) end # Continue with the remainder of the list of non-splat terms il = jl @@ -200,10 +202,10 @@ function tuple_to_assignments(ctx, ex) else rh = rhs_tmps[ir] if kind(rh) == K"..." - push!(stmts, @ast ctx ex [K"=" [K"tuple" lhs[il:end]...] rh[1]]) + push!(stmts, wrap(@ast ctx ex [K"=" [K"tuple" lhs[il:end]...] rh[1]])) break else - push!(stmts, @ast ctx ex [K"=" lh rh]) + push!(stmts, wrap(@ast ctx ex [K"=" lh rh])) end end end @@ -275,23 +277,24 @@ end # Destructuring in this context is done via the iteration interface, though # calls `Base.indexed_iterate()` to allow for a fast path in cases where the # right hand side is directly indexable. -function _destructure(ctx, assignment_srcref, stmts, lhs, rhs) +function _destructure(ctx, assignment_srcref, stmts, lhs, rhs, is_const) n_lhs = numchildren(lhs) if n_lhs > 0 iterstate = new_local_binding(ctx, rhs, "iterstate") end end_stmts = SyntaxList(ctx) + wrap(asgn) = is_const ? (@ast ctx assignment_srcref [K"const" asgn]) : asgn i = 0 for lh in children(lhs) i += 1 if kind(lh) == K"..." - lh1 = if is_identifier_like(lh[1]) + lh1 = if is_identifier_like(lh[1]) && !is_const lh[1] else lhs_tmp = ssavar(ctx, lh[1], "lhs_tmp") - push!(end_stmts, expand_forms_2(ctx, @ast ctx lh[1] [K"=" lh[1] lhs_tmp])) + push!(end_stmts, expand_forms_2(ctx, wrap(@ast ctx lh[1] [K"=" lh[1] lhs_tmp]))) lhs_tmp end if i == n_lhs @@ -341,12 +344,12 @@ function _destructure(ctx, assignment_srcref, stmts, lhs, rhs) else # Normal case, eg, for `y` in # (x, y, z) = rhs - lh1 = if is_identifier_like(lh) + lh1 = if is_identifier_like(lh) && !is_const lh # elseif is_eventually_call(lh) (TODO??) else lhs_tmp = ssavar(ctx, lh, "lhs_tmp") - push!(end_stmts, expand_forms_2(ctx, @ast ctx lh [K"=" lh lhs_tmp])) + push!(end_stmts, expand_forms_2(ctx, wrap(@ast ctx lh [K"=" lh lhs_tmp]))) lhs_tmp end push!(stmts, @@ -374,7 +377,7 @@ function _destructure(ctx, assignment_srcref, stmts, lhs, rhs) end # Expands cases of property destructuring -function expand_property_destruct(ctx, ex) +function expand_property_destruct(ctx, ex, is_const) @assert numchildren(ex) == 2 lhs = ex[1] @assert kind(lhs) == K"tuple" @@ -405,7 +408,7 @@ end # Expands all cases of general tuple destructuring, eg # (x,y) = (a,b) -function expand_tuple_destruct(ctx, ex) +function expand_tuple_destruct(ctx, ex, is_const) lhs = ex[1] @assert kind(lhs) == K"tuple" rhs = ex[2] @@ -426,7 +429,7 @@ function expand_tuple_destruct(ctx, ex) if !any_assignment(children(rhs)) && !has_parameters(rhs) && _tuple_sides_match(children(lhs), children(rhs)) - return expand_forms_2(ctx, tuple_to_assignments(ctx, ex)) + return expand_forms_2(ctx, tuple_to_assignments(ctx, ex, is_const)) end end @@ -439,7 +442,7 @@ function expand_tuple_destruct(ctx, ex) else emit_assign_tmp(stmts, ctx, expand_forms_2(ctx, rhs)) end - _destructure(ctx, ex, stmts, lhs, rhs1) + _destructure(ctx, ex, stmts, lhs, rhs1, is_const) push!(stmts, @ast ctx rhs1 [K"removable" rhs1]) makenode(ctx, ex, K"block", stmts) end @@ -1218,7 +1221,11 @@ function expand_assignment(ctx, ex, is_const=false) lhs = ex[1] rhs = ex[2] kl = kind(lhs) - if kl == K"curly" + if kind(ex) == K"function" + # `const f() = ...` - The `const` here is inoperative, but the syntax + # happened to work in earlier versions, so simply strip `const`. + expand_forms_2(ctx, ex[1]) + elseif kl == K"curly" expand_unionall_def(ctx, ex, lhs, rhs, is_const) elseif kind(rhs) == K"=" # Expand chains of assignments @@ -1284,9 +1291,9 @@ function expand_assignment(ctx, ex, is_const=false) ] elseif kl == K"tuple" if has_parameters(lhs) - expand_property_destruct(ctx, ex) + expand_property_destruct(ctx, ex, is_const) else - expand_tuple_destruct(ctx, ex) + expand_tuple_destruct(ctx, ex, is_const) end elseif kl == K"ref" # a[i1, i2] = rhs @@ -2119,35 +2126,37 @@ end #------------------------------------------------------------------------------- # Expand local/global/const declarations -# Strip variable type declarations from within a `local` or `global`, returning -# the stripped expression. Works recursively with complex left hand side -# assignments containing tuple destructuring. Eg, given +# Create local/global declarations, and possibly type declarations for each name +# on an assignment LHS. Works recursively with complex left hand side +# assignments containing tuple destructuring. Eg, given # (x::T, (y::U, z)) # strip out stmts = (local x) (decl x T) (local x) (decl y U) (local z) # and return (x, (y, z)) -function strip_decls!(ctx, stmts, declkind, declmeta, ex) +function make_lhs_decls(ctx, stmts, declkind, declmeta, ex, type_decls=true) k = kind(ex) - if k == K"Identifier" + if k == K"Identifier" || k == K"Value" && ex.value isa GlobalRef + # TODO: consider removing support for Expr(:global, GlobalRef(...)) and + # other Exprs that cannot be produced by the parser (tested by + # test/precompile.jl #50538). if !isnothing(declmeta) push!(stmts, makenode(ctx, ex, declkind, ex; meta=declmeta)) else push!(stmts, makenode(ctx, ex, declkind, ex)) end - ex elseif k == K"Placeholder" - ex - elseif k == K"::" - @chk numchildren(ex) == 2 - name = ex[1] - @chk kind(name) == K"Identifier" - push!(stmts, makenode(ctx, ex, K"decl", name, ex[2])) - strip_decls!(ctx, stmts, declkind, declmeta, ex[1]) + nothing + elseif (k === K"::" && numchildren(ex) === 2) || k in KSet"call curly where" + if type_decls + @chk numchildren(ex) == 2 + name = ex[1] + @chk kind(name) == K"Identifier" + push!(stmts, makenode(ctx, ex, K"decl", name, ex[2])) + end + make_lhs_decls(ctx, stmts, declkind, declmeta, ex[1], type_decls) elseif k == K"tuple" || k == K"parameters" - cs = SyntaxList(ctx) for e in children(ex) - push!(cs, strip_decls!(ctx, stmts, declkind, declmeta, e)) + make_lhs_decls(ctx, stmts, declkind, declmeta, e, type_decls) end - makenode(ctx, ex, k, cs) else throw(LoweringError(ex, "invalid kind $k in $declkind declaration")) end @@ -2162,13 +2171,16 @@ function expand_decls(ctx, ex) bindings = children(ex) stmts = SyntaxList(ctx) for binding in bindings - kb = kind(binding) - if is_prec_assignment(kb) + if is_prec_assignment(kind(binding)) @chk numchildren(binding) == 2 - lhs = strip_decls!(ctx, stmts, declkind, declmeta, binding[1]) - push!(stmts, expand_assignment(ctx, @ast ctx binding [kb lhs binding[2]])) - elseif is_sym_decl(binding) - strip_decls!(ctx, stmts, declkind, declmeta, binding) + # expand_assignment will create the type decls + make_lhs_decls(ctx, stmts, declkind, declmeta, binding[1], false) + push!(stmts, expand_assignment(ctx, binding)) + elseif is_sym_decl(binding) || kind(binding) == K"Value" + make_lhs_decls(ctx, stmts, declkind, declmeta, binding, true) + elseif kind(binding) == K"function" + make_lhs_decls(ctx, stmts, declkind, declmeta, binding[1], false) + push!(stmts, expand_forms_2(ctx, binding)) else throw(LoweringError(ex, "invalid syntax in variable declaration")) end @@ -2198,7 +2210,7 @@ function expand_const_decl(ctx, ex) k = kind(ex[1]) if k == K"global" asgn = ex[1][1] - @chk (kind(asgn) == K"=") (ex, "expected assignment after `const`") + @chk (kind(asgn) == K"=" || kind(asgn) == K"function") (ex, "expected assignment after `const`") globals = SyntaxList(ctx) foreach_lhs_name(asgn[1]) do x push!(globals, @ast ctx ex [K"global" x]) @@ -2207,13 +2219,17 @@ function expand_const_decl(ctx, ex) globals... expand_assignment(ctx, asgn, true) ] - elseif k == K"=" - if numchildren(ex[1]) >= 1 && kind(ex[1][1]) == K"tuple" - TODO(ex[1][1], "`const` tuple assignment desugaring") - end + elseif k == K"=" || k == K"function" expand_assignment(ctx, ex[1], true) elseif k == K"local" throw(LoweringError(ex, "unsupported `const local` declaration")) + elseif k == K"Identifier" || k == K"Value" + # Expr(:const, v) where v is a Symbol or a GlobalRef is an unfortunate + # remnant from the days when const-ness was a flag that could be set on + # any global. It creates a binding with kind PARTITION_KIND_UNDEF_CONST. + # TODO: deprecate and delete this "feature" + @chk numchildren(ex) == 1 + @ast ctx ex [K"constdecl" ex[1]] else throw(LoweringError(ex, "expected assignment after `const`")) end @@ -2326,7 +2342,7 @@ function method_def_expr(ctx, srcref, callex_srcref, method_table, [K"method" isnothing(method_table) ? "nothing"::K"core" : method_table method_metadata - [K"lambda"(body, is_toplevel_thunk=false) + [K"lambda"(body, is_toplevel_thunk=false, toplevel_pure=false) [K"block" arg_names...] [K"block" typevar_names...] body @@ -3238,7 +3254,7 @@ function expand_opaque_closure(ctx, ex) nargs::K"Integer" is_va::K"Bool" ::K"SourceLocation"(func_expr) - [K"lambda"(func_expr, is_toplevel_thunk=false) + [K"lambda"(func_expr, is_toplevel_thunk=false, toplevel_pure=false) [K"block" arg_names...] [K"block"] [K"block" diff --git a/JuliaLowering/src/eval.jl b/JuliaLowering/src/eval.jl index e7ed94e180407..f31cfc2592e68 100644 --- a/JuliaLowering/src/eval.jl +++ b/JuliaLowering/src/eval.jl @@ -431,12 +431,9 @@ function _to_lowered_expr(ex::SyntaxTree, stmt_offset::Int) k == K"new" ? :new : k == K"splatnew" ? :splatnew : k == K"=" ? :(=) : - k == K"global" ? :global : - k == K"constdecl" ? :const : k == K"leave" ? :leave : k == K"isdefined" ? :isdefined : k == K"latestworld" ? :latestworld : - k == K"globaldecl" ? :globaldecl : k == K"pop_exception" ? :pop_exception : k == K"captured_local" ? :captured_local : k == K"gc_preserve_begin" ? :gc_preserve_begin : diff --git a/JuliaLowering/src/kinds.jl b/JuliaLowering/src/kinds.jl index fc7afdd4780cc..0f2dfbb407d99 100644 --- a/JuliaLowering/src/kinds.jl +++ b/JuliaLowering/src/kinds.jl @@ -126,6 +126,13 @@ function _register_kinds() "captured_local" # Causes the linearization pass to conditionally emit a world age increment "latestworld_if_toplevel" + # This has two forms: + # [K"constdecl" var val] => declare and assign constant + # [K"constdecl" var] => declare undefined constant + # var is GlobalRef Value or Identifier + "constdecl" + # Returned from statements that should error if the result is used. + "unused_only" "END_LOWERING_KINDS" # The following kinds are emitted by lowering and used in Julia's untyped IR @@ -138,10 +145,6 @@ function _register_kinds() "static_parameter" # References/declares a global variable within a module "globalref" - "globaldecl" - # Two-argument constant declaration and assignment. - # Translated to :const in the IR for now (we use K"const" already in parsing). - "constdecl" # Unconditional goto "goto" # Conditional goto diff --git a/JuliaLowering/src/linear_ir.jl b/JuliaLowering/src/linear_ir.jl index d15214641fa99..0c1fb510e1a6f 100644 --- a/JuliaLowering/src/linear_ir.jl +++ b/JuliaLowering/src/linear_ir.jl @@ -319,10 +319,10 @@ end # or K"constdecl". flisp: emit-assignment-or-setglobal function emit_simple_assignment(ctx, srcref, lhs, rhs, op=K"=") binfo = lookup_binding(ctx, lhs.var_id) - if binfo.kind == :global && op == K"=" + if binfo.kind == :global emit(ctx, @ast ctx srcref [ K"call" - "setglobal!"::K"core" + op == K"constdecl" ? "declare_const"::K"core" : "setglobal!"::K"core" binfo.mod::K"Value" binfo.name::K"Symbol" rhs @@ -615,6 +615,18 @@ function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) lhs = ex[1] res = if kind(lhs) == K"Placeholder" compile(ctx, ex[2], needs_value, in_tail_pos) + elseif k == K"constdecl" && numchildren(ex) == 1 + # No RHS - make undefined constant + mod, name = if kind(ex[1]) == K"BindingId" + binfo = lookup_binding(ctx, ex[1]) + binfo.mod, binfo.name + else + @assert kind(ex[1]) == K"Value" && typeof(ex[1].value) === GlobalRef + gr = ex[1].value + gr.mod, String(gr.name) + end + emit(ctx, @ast ctx ex [K"call" "declare_const"::K"core" + mod::K"Value" name::K"Symbol"]) else rhs = compile(ctx, ex[2], true, false) # TODO look up arg-map for renaming if lhs was reassigned @@ -792,21 +804,6 @@ function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) end emit(ctx, ex) nothing - elseif k == K"global" - emit(ctx, ex) - ctx.is_toplevel_thunk && emit_latestworld(ctx, ex) - if needs_value - if in_tail_pos && ctx.is_toplevel_thunk - # Permit "statement-like" globals at top level but potentially - # inside blocks. - compile(ctx, nothing_(ctx, ex), needs_value, in_tail_pos) - else - throw(LoweringError(ex, - "global declaration doesn't read the variable and can't return a value")) - end - else - nothing - end elseif k == K"meta" @chk numchildren(ex) >= 1 if ex[1].name_val in ("inline", "noinline", "propagate_inbounds", @@ -862,17 +859,6 @@ function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) # TODO: also exclude deleted vars emit(ctx, ex) end - elseif k == K"globaldecl" - if needs_value - throw(LoweringError(ex, "misplaced global declaration")) - end - if numchildren(ex) == 1 || is_identifier_like(ex[2]) - emit(ctx, ex) - else - rr = emit_assign_tmp(ctx, ex[2]) - emit(ctx, @ast ctx ex [K"globaldecl" ex[1] rr]) - end - ctx.is_toplevel_thunk && emit_latestworld(ctx, ex) elseif k == K"latestworld" if needs_value throw(LoweringError(ex, "misplaced latestsworld")) @@ -880,6 +866,12 @@ function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) emit_latestworld(ctx, ex) elseif k == K"latestworld_if_toplevel" ctx.is_toplevel_thunk && emit_latestworld(ctx, ex) + elseif k == K"unused_only" + if needs_value && !(in_tail_pos && ctx.is_toplevel_thunk) + throw(LoweringError(ex, + "global declaration doesn't read the variable and can't return a value")) + end + compile(ctx, ex[1], needs_value, in_tail_pos) else throw(LoweringError(ex, "Invalid syntax; $(repr(k))")) end diff --git a/JuliaLowering/src/runtime.jl b/JuliaLowering/src/runtime.jl index ec175047a7735..ff089c99cefe1 100644 --- a/JuliaLowering/src/runtime.jl +++ b/JuliaLowering/src/runtime.jl @@ -189,7 +189,7 @@ function eval_closure_type(mod::Module, closure_type_name::Symbol, field_names, false, length(field_names)) Core._setsuper!(type, Core.Function) - @ccall jl_set_const(mod::Module, closure_type_name::Symbol, type::Any)::Cvoid + Core.declare_const(mod, closure_type_name, type) Core._typebody!(false, type, Core.svec(field_types...)) type end @@ -331,7 +331,8 @@ function (g::GeneratedFunctionStub)(world::UInt, source::Method, @nospecialize a graph = ensure_attributes(ensure_macro_attributes(graph), # Additional attribute for resolve_scopes, for # adding our custom lambda below - is_toplevel_thunk=Bool + is_toplevel_thunk=Bool, + toplevel_pure=Bool, ) __module__ = source.module @@ -367,7 +368,7 @@ function (g::GeneratedFunctionStub)(world::UInt, source::Method, @nospecialize a ctx2, ex2 = expand_forms_2(ctx1, ex1) # Wrap expansion in a non-toplevel lambda and run scope resolution - ex2 = @ast ctx2 ex0 [K"lambda"(is_toplevel_thunk=false) + ex2 = @ast ctx2 ex0 [K"lambda"(is_toplevel_thunk=false, toplevel_pure=true) [K"block" (string(n)::K"Identifier" for n in g.argnames)... ] diff --git a/JuliaLowering/src/scope_analysis.jl b/JuliaLowering/src/scope_analysis.jl index 6f146b235848b..f73c736241ebf 100644 --- a/JuliaLowering/src/scope_analysis.jl +++ b/JuliaLowering/src/scope_analysis.jl @@ -43,17 +43,21 @@ function _find_scope_vars!(ctx, assignments, locals, destructured_args, globals, _insert_if_not_present!(locals, NameKey(ex[1]), ex) end elseif k == K"global" - _insert_if_not_present!(globals, NameKey(ex[1]), ex) + if !(kind(ex[1]) == K"Value" && ex[1].value isa GlobalRef) + _insert_if_not_present!(globals, NameKey(ex[1]), ex) + end elseif k == K"assign_or_constdecl_if_global" # like v = val, except that if `v` turns out global(either implicitly or # by explicit `global`), it gains an implicit `const` _insert_if_not_present!(assignments, NameKey(ex[1]), ex) elseif k == K"=" || k == K"constdecl" v = decl_var(ex[1]) - if !(kind(v) in KSet"BindingId globalref Placeholder") + if !(kind(v) in KSet"BindingId globalref Value Placeholder") _insert_if_not_present!(assignments, NameKey(v), v) end - _find_scope_vars!(ctx, assignments, locals, destructured_args, globals, used_names, used_bindings, ex[2]) + if k != K"constdecl" || numchildren(ex) == 2 + _find_scope_vars!(ctx, assignments, locals, destructured_args, globals, used_names, used_bindings, ex[2]) + end elseif k == K"function_decl" v = ex[1] kv = kind(v) @@ -473,7 +477,8 @@ function _resolve_scopes(ctx, ex::SyntaxTree) pop!(ctx.scope_stack) @ast ctx ex [K"lambda"(lambda_bindings=scope.lambda_bindings, - is_toplevel_thunk=is_toplevel_thunk) + is_toplevel_thunk=is_toplevel_thunk, + toplevel_pure=false) arg_bindings sparm_bindings [K"block" @@ -720,10 +725,12 @@ function analyze_variables!(ctx, ex) end elseif k == K"constdecl" id = ex[1] - if lookup_binding(ctx, id).kind == :local - throw(LoweringError(ex, "unsupported `const` declaration on local variable")) + if kind(id) == K"BindingId" + if lookup_binding(ctx, id).kind == :local + throw(LoweringError(ex, "unsupported `const` declaration on local variable")) + end + update_binding!(ctx, id; is_const=true) end - update_binding!(ctx, id; is_const=true) elseif k == K"call" name = ex[1] if kind(name) == K"BindingId" @@ -786,7 +793,7 @@ function resolve_scopes(ctx::ScopeResolutionContext, ex) if kind(ex) != K"lambda" # Wrap in a top level thunk if we're not already expanding a lambda. # (Maybe this should be done elsewhere?) - ex = @ast ctx ex [K"lambda"(is_toplevel_thunk=true) + ex = @ast ctx ex [K"lambda"(is_toplevel_thunk=true, toplevel_pure=false) [K"block"] [K"block"] ex diff --git a/JuliaLowering/test/assignments_ir.jl b/JuliaLowering/test/assignments_ir.jl index 9789fc6e6315e..4fcb0abb1ea9a 100644 --- a/JuliaLowering/test/assignments_ir.jl +++ b/JuliaLowering/test/assignments_ir.jl @@ -142,7 +142,7 @@ X{T} = Y{T,T} 6 slot₁/T 7 (call core.apply_type %₄ %₅ %₆) 8 (call core.UnionAll %₃ %₇) -9 (constdecl TestMod.X %₈) +9 (call core.declare_const TestMod :X %₈) 10 latestworld 11 (return %₈) diff --git a/JuliaLowering/test/closures_ir.jl b/JuliaLowering/test/closures_ir.jl index 475eea8c71938..9f3d07e52187b 100644 --- a/JuliaLowering/test/closures_ir.jl +++ b/JuliaLowering/test/closures_ir.jl @@ -755,7 +755,7 @@ slots: [slot₁/#self#(!read) slot₂/T(!read) slot₃/tmp(!read)] # Error: Closure outside any top level context # (Should only happen in a user-visible way when lowering code emitted # from a `@generated` function code generator.) -@ast_ [K"lambda"(is_toplevel_thunk=false) +@ast_ [K"lambda"(is_toplevel_thunk=false, toplevel_pure=false) [K"block"] [K"block"] [K"->" [K"tuple"] [K"block"]] diff --git a/JuliaLowering/test/decls.jl b/JuliaLowering/test/decls.jl index 50aa98a596504..ab25aaa0b7015 100644 --- a/JuliaLowering/test/decls.jl +++ b/JuliaLowering/test/decls.jl @@ -93,8 +93,29 @@ end # Tuple/destructuring assignments @test JuliaLowering.include_string(test_mod, "(a0, a1, a2) = [1,2,3]") == [1,2,3] - -# Unsupported for now -@test_throws LoweringError JuliaLowering.include_string(test_mod, "const a,b,c = 1,2,3") +@test JuliaLowering.include_string(test_mod, "const a,b,c = 1,2,3") === (1, 2, 3) + +test_mod_2 = Module() +@testset "toplevel-preserving syntax" begin + JuliaLowering.include_string(test_mod_2, "if true; global v1::Bool; else const v1 = 1; end") + @test !isdefined(test_mod_2, :v1) + @test Base.binding_kind(test_mod_2, :v1) == Base.PARTITION_KIND_GLOBAL + @test Core.get_binding_type(test_mod_2, :v1) == Bool + + JuliaLowering.include_string(test_mod_2, "if false; global v2::Bool; else const v2 = 2; end") + @test test_mod_2.v2 === 2 + @test Base.binding_kind(test_mod_2, :v2) == Base.PARTITION_KIND_CONST + + JuliaLowering.include_string(test_mod_2, "v3 = if true; global v4::Bool; 4 else const v4 = 5; 6; end") + @test test_mod_2.v3 == 4 + @test !isdefined(test_mod_2, :v4) + @test Base.binding_kind(test_mod_2, :v4) == Base.PARTITION_KIND_GLOBAL + @test Core.get_binding_type(test_mod_2, :v4) == Bool + + JuliaLowering.include_string(test_mod_2, "v5 = if false; global v6::Bool; 4 else const v6 = 5; 6; end") + @test test_mod_2.v5 === 6 + @test test_mod_2.v6 === 5 + @test Base.binding_kind(test_mod_2, :v6) == Base.PARTITION_KIND_CONST +end end diff --git a/JuliaLowering/test/decls_ir.jl b/JuliaLowering/test/decls_ir.jl index 155754cde9cff..9ad27bdf8ab23 100644 --- a/JuliaLowering/test/decls_ir.jl +++ b/JuliaLowering/test/decls_ir.jl @@ -53,7 +53,7 @@ end # Global declaration allowed in tail position global x #--------------------- -1 (global TestMod.x) +1 (call core.declare_global TestMod :x false) 2 latestworld 3 (return core.nothing) @@ -63,7 +63,7 @@ begin global x end #--------------------- -1 (global TestMod.x) +1 (call core.declare_global TestMod :x false) 2 latestworld 3 (return core.nothing) @@ -92,7 +92,7 @@ y = global x const xx = 10 #--------------------- 1 10 -2 (constdecl TestMod.xx %₁) +2 (call core.declare_const TestMod :xx %₁) 3 latestworld 4 (return %₁) @@ -110,26 +110,34 @@ const xx::T = 10 8 (call top.convert %₁ %₇) 9 (= slot₁/tmp (call core.typeassert %₈ %₁)) 10 slot₁/tmp -11 (constdecl TestMod.xx %₁₀) +11 (call core.declare_const TestMod :xx %₁₀) 12 latestworld 13 (return %₁₀) ######################################## -# Error: Const tuple +# Const tuple const xxx,xxxx,xxxxx = 10,20,30 #--------------------- -LoweringError: -const xxx,xxxx,xxxxx = 10,20,30 -# └─────────────┘ ── Lowering TODO: `const` tuple assignment desugaring +1 10 +2 (call core.declare_const TestMod :xxx %₁) +3 latestworld +4 20 +5 (call core.declare_const TestMod :xxxx %₄) +6 latestworld +7 30 +8 (call core.declare_const TestMod :xxxxx %₇) +9 latestworld +10 (call core.tuple 10 20 30) +11 (return %₁₀) ######################################## # Const in chain: only first is const const c0 = v0 = v1 = 123 #--------------------- 1 123 -2 (constdecl TestMod.c0 %₁) +2 (call core.declare_const TestMod :c0 %₁) 3 latestworld -4 (globaldecl TestMod.v0) +4 (call core.declare_global TestMod :v0 true) 5 latestworld 6 (call core.get_binding_type TestMod :v0) 7 (= slot₁/tmp %₁) @@ -141,7 +149,7 @@ const c0 = v0 = v1 = 123 13 (= slot₁/tmp (call top.convert %₆ %₁₂)) 14 slot₁/tmp 15 (call core.setglobal! TestMod :v0 %₁₄) -16 (globaldecl TestMod.v1) +16 (call core.declare_global TestMod :v1 true) 17 latestworld 18 (call core.get_binding_type TestMod :v1) 19 (= slot₂/tmp %₁) @@ -159,7 +167,7 @@ const c0 = v0 = v1 = 123 # Global assignment xx = 10 #--------------------- -1 (globaldecl TestMod.xx) +1 (call core.declare_global TestMod :xx true) 2 latestworld 3 (call core.get_binding_type TestMod :xx) 4 (= slot₁/tmp 10) @@ -177,23 +185,24 @@ xx = 10 # Typed global assignment global xx::T = 10 #--------------------- -1 (globaldecl TestMod.xx TestMod.T) +1 (call core.declare_global TestMod :xx false) 2 latestworld -3 (global TestMod.xx) -4 latestworld -5 (globaldecl TestMod.xx) -6 latestworld -7 (call core.get_binding_type TestMod :xx) -8 (= slot₁/tmp 10) -9 slot₁/tmp -10 (call core.isa %₉ %₇) -11 (gotoifnot %₁₀ label₁₃) -12 (goto label₁₅) -13 slot₁/tmp -14 (= slot₁/tmp (call top.convert %₇ %₁₃)) -15 slot₁/tmp -16 (call core.setglobal! TestMod :xx %₁₅) -17 (return 10) +3 TestMod.T +4 (call core.declare_global TestMod :xx true %₃) +5 latestworld +6 (call core.declare_global TestMod :xx true) +7 latestworld +8 (call core.get_binding_type TestMod :xx) +9 (= slot₁/tmp 10) +10 slot₁/tmp +11 (call core.isa %₁₀ %₈) +12 (gotoifnot %₁₁ label₁₄) +13 (goto label₁₆) +14 slot₁/tmp +15 (= slot₁/tmp (call top.convert %₈ %₁₄)) +16 slot₁/tmp +17 (call core.setglobal! TestMod :xx %₁₆) +18 (return 10) ######################################## # Error: x declared twice @@ -206,7 +215,7 @@ LoweringError: begin local x::T = 1 local x::S = 1 -# └──┘ ── multiple type declarations found for `x` +# └───────┘ ── multiple type declarations found for `x` end ######################################## @@ -286,6 +295,6 @@ end LoweringError: function f() global x::Int = 1 -# └────┘ ── type declarations for global variables must be at top level, not inside a function +# └─────────┘ ── type declarations for global variables must be at top level, not inside a function end diff --git a/JuliaLowering/test/misc_ir.jl b/JuliaLowering/test/misc_ir.jl index a93e5ac194732..345b86763dd5e 100644 --- a/JuliaLowering/test/misc_ir.jl +++ b/JuliaLowering/test/misc_ir.jl @@ -513,3 +513,32 @@ include("hi.jl") 3 latestworld 4 (return %₂) +######################################## +# Const function assignment syntax (legacy) +const f(x::Int)::Int = x+1 +#--------------------- +1 TestMod.f +2 TestMod.x +3 TestMod.Int +4 (call core.typeassert %₂ %₃) +5 (call %₁ %₄) +6 TestMod.Int +7 (call core.typeassert %₅ %₆) +8 (return %₇) + +######################################## +# Error: Destructuring assignment method definitions (broken, legacy) +f(x)::Int, g() = [1.0, 2.0] +#--------------------- +LoweringError: +f(x)::Int, g() = [1.0, 2.0] +└──┘ ── invalid assignment location + +######################################## +# Error: Destructuring assignment typdef, variable, and function (broken, legacy) +T{U}, (x::Float64, g()) = [Bool, (1, 2)] +#--------------------- +LoweringError: +T{U}, (x::Float64, g()) = [Bool, (1, 2)] +# └─┘ ── invalid assignment location + diff --git a/JuliaLowering/test/scopes_ir.jl b/JuliaLowering/test/scopes_ir.jl index da1f3529fc8d4..84258ef2811e5 100644 --- a/JuliaLowering/test/scopes_ir.jl +++ b/JuliaLowering/test/scopes_ir.jl @@ -166,7 +166,7 @@ begin @islocal(x) end #--------------------- -1 (global TestMod.x) +1 (call core.declare_global TestMod :x false) 2 latestworld 3 (return false) @@ -179,7 +179,7 @@ begin end #--------------------- 1 (newvar slot₁/y) -2 (global TestMod.x) +2 (call core.declare_global TestMod :x false) 3 latestworld 4 (call core.apply_type top.Dict core.Symbol core.Any) 5 (call %₄) @@ -429,3 +429,105 @@ end 3 (call core.isdefinedglobal TestMod :y false) 4 (return %₃) +######################################## +# Global function defined inside let (let over lambda) +let x = 1 + global f(y) = x = y + global g() = x +end +#--------------------- +1 1 +2 (= slot₁/x (call core.Box)) +3 slot₁/x +4 (call core.setfield! %₃ :contents %₁) +5 (call core.declare_global TestMod :f false) +6 latestworld +7 (method TestMod.f) +8 latestworld +9 TestMod.f +10 (call core.Typeof %₉) +11 (call core.svec %₁₀ core.Any) +12 (call core.svec) +13 SourceLocation::2:12 +14 (call core.svec %₁₁ %₁₂ %₁₃) +15 --- code_info + slots: [slot₁/#self#(!read) slot₂/y] + 1 slot₂/y + 2 (captured_local 1) + 3 (call core.setfield! %₂ :contents %₁) + 4 (return %₁) +16 slot₁/x +17 (call core.svec %₁₆) +18 (call JuliaLowering.replace_captured_locals! %₁₅ %₁₇) +19 --- method core.nothing %₁₄ %₁₈ +20 latestworld +21 (call core.declare_global TestMod :g false) +22 latestworld +23 (method TestMod.g) +24 latestworld +25 TestMod.g +26 (call core.Typeof %₂₅) +27 (call core.svec %₂₆) +28 (call core.svec) +29 SourceLocation::3:12 +30 (call core.svec %₂₇ %₂₈ %₂₉) +31 --- code_info + slots: [slot₁/#self#(!read) slot₂/x(!read)] + 1 (captured_local 1) + 2 (call core.isdefined %₁ :contents) + 3 (gotoifnot %₂ label₅) + 4 (goto label₇) + 5 (newvar slot₂/x) + 6 slot₂/x + 7 (call core.getfield %₁ :contents) + 8 (return %₇) +32 slot₁/x +33 (call core.svec %₃₂) +34 (call JuliaLowering.replace_captured_locals! %₃₁ %₃₃) +35 --- method core.nothing %₃₀ %₃₄ +36 latestworld +37 TestMod.g +38 (return %₃₇) + +######################################## +# Modify assignment operator on closure variable +let x = 1 + global f() = x += 1 +end +#--------------------- +1 1 +2 (= slot₁/x (call core.Box)) +3 slot₁/x +4 (call core.setfield! %₃ :contents %₁) +5 (call core.declare_global TestMod :f false) +6 latestworld +7 (method TestMod.f) +8 latestworld +9 TestMod.f +10 (call core.Typeof %₉) +11 (call core.svec %₁₀) +12 (call core.svec) +13 SourceLocation::2:12 +14 (call core.svec %₁₁ %₁₂ %₁₃) +15 --- code_info + slots: [slot₁/#self#(!read) slot₂/x(!read)] + 1 TestMod.+ + 2 (captured_local 1) + 3 (call core.isdefined %₂ :contents) + 4 (gotoifnot %₃ label₆) + 5 (goto label₈) + 6 (newvar slot₂/x) + 7 slot₂/x + 8 (call core.getfield %₂ :contents) + 9 (call %₁ %₈ 1) + 10 (captured_local 1) + 11 (call core.setfield! %₁₀ :contents %₉) + 12 (return %₉) +16 slot₁/x +17 (call core.svec %₁₆) +18 (call JuliaLowering.replace_captured_locals! %₁₅ %₁₇) +19 --- method core.nothing %₁₄ %₁₈ +20 latestworld +21 TestMod.f +22 (return %₂₁) + diff --git a/JuliaLowering/test/typedefs_ir.jl b/JuliaLowering/test/typedefs_ir.jl index 1fcc96fc0ffe7..b1a9f920f16cb 100644 --- a/JuliaLowering/test/typedefs_ir.jl +++ b/JuliaLowering/test/typedefs_ir.jl @@ -193,7 +193,7 @@ abstract type A end 4 (call core._setsuper! %₂ core.Any) 5 slot₁/A 6 (call core._typebody! false %₅) -7 (global TestMod.A) +7 (call core.declare_global TestMod :A false) 8 latestworld 9 (call core.isdefinedglobal TestMod :A false) 10 (gotoifnot %₉ label₁₅) @@ -201,7 +201,7 @@ abstract type A end 12 (call core._equiv_typedef %₁₁ %₂) 13 (gotoifnot %₁₂ label₁₅) 14 (goto label₁₇) -15 (constdecl TestMod.A %₂) +15 (call core.declare_const TestMod :A %₂) 16 latestworld 17 (return core.nothing) @@ -216,7 +216,7 @@ abstract type A <: B end 5 (call core._setsuper! %₂ %₄) 6 slot₁/A 7 (call core._typebody! false %₆) -8 (global TestMod.A) +8 (call core.declare_global TestMod :A false) 9 latestworld 10 (call core.isdefinedglobal TestMod :A false) 11 (gotoifnot %₁₀ label₁₆) @@ -224,7 +224,7 @@ abstract type A <: B end 13 (call core._equiv_typedef %₁₂ %₂) 14 (gotoifnot %₁₃ label₁₆) 15 (goto label₁₈) -16 (constdecl TestMod.A %₂) +16 (call core.declare_const TestMod :A %₂) 17 latestworld 18 (return core.nothing) @@ -243,7 +243,7 @@ abstract type A{X, Y <: X} end 9 (call core._setsuper! %₇ core.Any) 10 slot₁/A 11 (call core._typebody! false %₁₀) -12 (global TestMod.A) +12 (call core.declare_global TestMod :A false) 13 latestworld 14 (call core.isdefinedglobal TestMod :A false) 15 (gotoifnot %₁₄ label₂₀) @@ -251,7 +251,7 @@ abstract type A{X, Y <: X} end 17 (call core._equiv_typedef %₁₆ %₇) 18 (gotoifnot %₁₇ label₂₀) 19 (goto label₂₂) -20 (constdecl TestMod.A %₇) +20 (call core.declare_const TestMod :A %₇) 21 latestworld 22 (return core.nothing) @@ -301,7 +301,7 @@ primitive type P 8 end 4 (call core._setsuper! %₂ core.Any) 5 slot₁/P 6 (call core._typebody! false %₅) -7 (global TestMod.P) +7 (call core.declare_global TestMod :P false) 8 latestworld 9 (call core.isdefinedglobal TestMod :P false) 10 (gotoifnot %₉ label₁₅) @@ -309,7 +309,7 @@ primitive type P 8 end 12 (call core._equiv_typedef %₁₁ %₂) 13 (gotoifnot %₁₂ label₁₅) 14 (goto label₁₇) -15 (constdecl TestMod.P %₂) +15 (call core.declare_const TestMod :P %₂) 16 latestworld 17 (return core.nothing) @@ -328,7 +328,7 @@ primitive type P{X,Y} <: Z 32 end 9 (call core._setsuper! %₆ %₈) 10 slot₁/P 11 (call core._typebody! false %₁₀) -12 (global TestMod.P) +12 (call core.declare_global TestMod :P false) 13 latestworld 14 (call core.isdefinedglobal TestMod :P false) 15 (gotoifnot %₁₄ label₂₀) @@ -336,7 +336,7 @@ primitive type P{X,Y} <: Z 32 end 17 (call core._equiv_typedef %₁₆ %₆) 18 (gotoifnot %₁₇ label₂₀) 19 (goto label₂₂) -20 (constdecl TestMod.P %₆) +20 (call core.declare_const TestMod :P %₆) 21 latestworld 22 (return core.nothing) @@ -352,7 +352,7 @@ primitive type P P_nbits() end 6 (call core._setsuper! %₄ core.Any) 7 slot₁/P 8 (call core._typebody! false %₇) -9 (global TestMod.P) +9 (call core.declare_global TestMod :P false) 10 latestworld 11 (call core.isdefinedglobal TestMod :P false) 12 (gotoifnot %₁₁ label₁₇) @@ -360,7 +360,7 @@ primitive type P P_nbits() end 14 (call core._equiv_typedef %₁₃ %₄) 15 (gotoifnot %₁₄ label₁₇) 16 (goto label₁₉) -17 (constdecl TestMod.P %₄) +17 (call core.declare_const TestMod :P %₄) 18 latestworld 19 (return core.nothing) @@ -369,7 +369,7 @@ primitive type P P_nbits() end struct X end #--------------------- -1 (global TestMod.X) +1 (call core.declare_global TestMod :X false) 2 latestworld 3 (call core.svec) 4 (call core.svec) @@ -393,7 +393,7 @@ end 22 (gotoifnot %₁₅ label₂₃) 23 (call core.svec) 24 (call core._typebody! %₂₁ %₆ %₂₃) -25 (constdecl TestMod.X %₂₄) +25 (call core.declare_const TestMod :X %₂₄) 26 latestworld 27 TestMod.X 28 (call core.apply_type core.Type %₂₇) @@ -415,7 +415,7 @@ struct X X() = new() end #--------------------- -1 (global TestMod.X) +1 (call core.declare_global TestMod :X false) 2 latestworld 3 (call core.svec) 4 (call core.svec) @@ -439,7 +439,7 @@ end 22 (gotoifnot %₁₅ label₂₃) 23 (call core.svec) 24 (call core._typebody! %₂₁ %₆ %₂₃) -25 (constdecl TestMod.X %₂₄) +25 (call core.declare_const TestMod :X %₂₄) 26 latestworld 27 TestMod.X 28 (call core.apply_type core.Type %₂₇) @@ -463,7 +463,7 @@ struct X c end #--------------------- -1 (global TestMod.X) +1 (call core.declare_global TestMod :X false) 2 latestworld 3 (call core.svec) 4 (call core.svec :a :b :c) @@ -488,7 +488,7 @@ end 23 TestMod.T 24 (call core.svec core.Any %₂₃ core.Any) 25 (call core._typebody! %₂₁ %₆ %₂₄) -26 (constdecl TestMod.X %₂₅) +26 (call core.declare_const TestMod :X %₂₅) 27 latestworld 28 TestMod.T 29 (call core.=== core.Any %₂₈) @@ -535,7 +535,7 @@ end struct X{U, S <: V <: T} <: Z end #--------------------- -1 (global TestMod.X) +1 (call core.declare_global TestMod :X false) 2 latestworld 3 (= slot₂/U (call core.TypeVar :U)) 4 TestMod.S @@ -576,7 +576,7 @@ end 39 (= slot₃/V (call core.getfield %₃₈ 1)) 40 (call core.svec) 41 (call core._typebody! %₂₈ %₁₂ %₄₀) -42 (constdecl TestMod.X %₄₁) +42 (call core.declare_const TestMod :X %₄₁) 43 latestworld 44 slot₂/U 45 slot₃/V @@ -606,7 +606,7 @@ struct X const @atomic c end #--------------------- -1 (global TestMod.X) +1 (call core.declare_global TestMod :X false) 2 latestworld 3 (call core.svec) 4 (call core.svec :a :b :c) @@ -630,7 +630,7 @@ end 22 (gotoifnot %₁₅ label₂₃) 23 (call core.svec core.Any core.Any core.Any) 24 (call core._typebody! %₂₁ %₆ %₂₃) -25 (constdecl TestMod.X %₂₄) +25 (call core.declare_const TestMod :X %₂₄) 26 latestworld 27 TestMod.X 28 (call core.apply_type core.Type %₂₇) @@ -658,7 +658,7 @@ struct X b end #--------------------- -1 (global TestMod.X) +1 (call core.declare_global TestMod :X false) 2 latestworld 3 (call core.svec) 4 (call core.svec :a :b) @@ -682,7 +682,7 @@ end 22 (gotoifnot %₁₅ label₂₃) 23 (call core.svec core.Any core.Any) 24 (call core._typebody! %₂₁ %₆ %₂₃) -25 (constdecl TestMod.X %₂₄) +25 (call core.declare_const TestMod :X %₂₄) 26 latestworld 27 TestMod.X 28 (call core.apply_type core.Type %₂₇) @@ -713,7 +713,7 @@ struct X{U} x::U end #--------------------- -1 (global TestMod.X) +1 (call core.declare_global TestMod :X false) 2 latestworld 3 (= slot₁/U (call core.TypeVar :U)) 4 slot₁/U @@ -745,7 +745,7 @@ end 30 slot₁/U 31 (call core.svec %₃₀) 32 (call core._typebody! %₂₃ %₈ %₃₁) -33 (constdecl TestMod.X %₃₂) +33 (call core.declare_const TestMod :X %₃₂) 34 latestworld 35 slot₁/U 36 TestMod.X @@ -797,7 +797,7 @@ struct X{T, S <: Vector{T}} v::Vector{S} end #--------------------- -1 (global TestMod.X) +1 (call core.declare_global TestMod :X false) 2 latestworld 3 (= slot₃/T (call core.TypeVar :T)) 4 TestMod.Vector @@ -841,7 +841,7 @@ end 42 (call core.apply_type %₄₀ %₄₁) 43 (call core.svec %₄₂) 44 (call core._typebody! %₂₈ %₁₃ %₄₃) -45 (constdecl TestMod.X %₄₄) +45 (call core.declare_const TestMod :X %₄₄) 46 latestworld 47 slot₃/T 48 slot₂/S @@ -908,7 +908,7 @@ struct X end #--------------------- 1 (= slot₂/f (call core.Box)) -2 (global TestMod.X) +2 (call core.declare_global TestMod :X false) 3 latestworld 4 (call core.svec) 5 (call core.svec :x) @@ -932,7 +932,7 @@ end 23 (gotoifnot %₁₆ label₂₄) 24 (call core.svec core.Any) 25 (call core._typebody! %₂₂ %₇ %₂₄) -26 (constdecl TestMod.X %₂₅) +26 (call core.declare_const TestMod :X %₂₅) 27 latestworld 28 (call core.svec) 29 (call core.svec) @@ -1037,7 +1037,7 @@ struct X{S,T} end #--------------------- 1 (newvar slot₅/f) -2 (global TestMod.X) +2 (call core.declare_global TestMod :X false) 3 latestworld 4 (= slot₂/S (call core.TypeVar :S)) 5 (= slot₃/T (call core.TypeVar :T)) @@ -1075,7 +1075,7 @@ end 37 (= slot₃/T (call core.getfield %₃₆ 1)) 38 (call core.svec core.Any) 39 (call core._typebody! %₂₆ %₁₁ %₃₈) -40 (constdecl TestMod.X %₃₉) +40 (call core.declare_const TestMod :X %₃₉) 41 latestworld 42 TestMod.X 43 TestMod.A @@ -1142,7 +1142,7 @@ struct X X(xs) = new(xs...) end #--------------------- -1 (global TestMod.X) +1 (call core.declare_global TestMod :X false) 2 latestworld 3 (call core.svec) 4 (call core.svec :x :y) @@ -1166,7 +1166,7 @@ end 22 (gotoifnot %₁₅ label₂₃) 23 (call core.svec core.Any core.Any) 24 (call core._typebody! %₂₁ %₆ %₂₃) -25 (constdecl TestMod.X %₂₄) +25 (call core.declare_const TestMod :X %₂₄) 26 latestworld 27 TestMod.X 28 (call core.apply_type core.Type %₂₇) @@ -1191,7 +1191,7 @@ struct X{T} X{T}(xs) where {T} = new(xs...) end #--------------------- -1 (global TestMod.X) +1 (call core.declare_global TestMod :X false) 2 latestworld 3 (= slot₁/T (call core.TypeVar :T)) 4 slot₁/T @@ -1224,7 +1224,7 @@ end 31 TestMod.A 32 (call core.svec %₃₀ %₃₁) 33 (call core._typebody! %₂₃ %₈ %₃₂) -34 (constdecl TestMod.X %₃₃) +34 (call core.declare_const TestMod :X %₃₃) 35 latestworld 36 (= slot₃/T (call core.TypeVar :T)) 37 TestMod.X diff --git a/JuliaLowering/test/utils.jl b/JuliaLowering/test/utils.jl index 4507f93a6b45a..9c27117f0d6b3 100644 --- a/JuliaLowering/test/utils.jl +++ b/JuliaLowering/test/utils.jl @@ -25,7 +25,8 @@ function _ast_test_graph() ensure_attributes!(graph, kind=Kind, syntax_flags=UInt16, source=Union{SourceRef,NodeId,Tuple,LineNumberNode}, - var_id=Int, value=Any, name_val=String, is_toplevel_thunk=Bool) + var_id=Int, value=Any, name_val=String, is_toplevel_thunk=Bool, + toplevel_pure=Bool) end function _source_node(graph, src) From 4a6b5ab1f10644b9945ff34791b1bcfe39b92dc8 Mon Sep 17 00:00:00 2001 From: Em Chu <61633163+mlechu@users.noreply.github.com> Date: Tue, 14 Oct 2025 15:33:29 -0700 Subject: [PATCH 1085/1109] Fix core lowering hook for testing purposes (JuliaLang/JuliaLowering.jl#97) Also fix a small bug in `_eval` when file is `nothing` --- JuliaLowering/src/eval.jl | 3 ++- JuliaLowering/src/hooks.jl | 6 ++++++ JuliaLowering/test/hooks.jl | 32 ++++++++++++++++++++++++++++---- 3 files changed, 36 insertions(+), 5 deletions(-) diff --git a/JuliaLowering/src/eval.jl b/JuliaLowering/src/eval.jl index f31cfc2592e68..02210946f84e3 100644 --- a/JuliaLowering/src/eval.jl +++ b/JuliaLowering/src/eval.jl @@ -475,8 +475,9 @@ function _eval(mod, iter) break elseif type == :begin_module push!(modules, mod) + filename = something(thunk[4].file, :none) mod = @ccall jl_begin_new_module(mod::Any, thunk[2]::Symbol, thunk[3]::Cint, - thunk[4].file::Cstring, thunk[4].line::Cint)::Module + filename::Cstring, thunk[4].line::Cint)::Module new_mod = mod elseif type == :end_module @ccall jl_end_new_module(mod::Module)::Cvoid diff --git a/JuliaLowering/src/hooks.jl b/JuliaLowering/src/hooks.jl index 311576321f761..d4aef14f13773 100644 --- a/JuliaLowering/src/hooks.jl +++ b/JuliaLowering/src/hooks.jl @@ -18,6 +18,12 @@ function core_lowering_hook(@nospecialize(code), mod::Module, local st0 = nothing try st0 = code isa Expr ? expr_to_syntaxtree(code, LineNumberNode(line, file)) : code + if kind(st0) in KSet"toplevel module" + return Core.svec(code) + elseif kind(st0) === K"doc" && numchildren(st0) >= 2 && kind(st0[2]) === K"module" + # TODO: this ignores module docstrings for now + return Core.svec(Expr(st0[2])) + end ctx1, st1 = expand_forms_1( mod, st0, true, world) ctx2, st2 = expand_forms_2( ctx1, st1) ctx3, st3 = resolve_scopes( ctx2, st2) diff --git a/JuliaLowering/test/hooks.jl b/JuliaLowering/test/hooks.jl index d83c2b14d7856..823e9b6027906 100644 --- a/JuliaLowering/test/hooks.jl +++ b/JuliaLowering/test/hooks.jl @@ -46,14 +46,38 @@ const JL = JuliaLowering @test isdefined(test_mod.M, :x) # Tricky cases with symbols - out = jeval("""module M + out = jeval("""module M2 Base.@constprop :aggressive function f(x); x; end const what = ccall(:jl_value_ptr, Ptr{Cvoid}, (Any,), Core.nothing) end""") @test out isa Module - @test isdefined(test_mod, :M) - @test isdefined(test_mod.M, :f) - @test isdefined(test_mod.M, :what) + @test isdefined(test_mod, :M2) + @test isdefined(test_mod.M2, :f) + @test isdefined(test_mod.M2, :what) + + out = jeval(""" "docstring" module M3 end """) + @test out isa Module + @test isdefined(test_mod, :M3) + + # Macros may produce toplevel expressions. Note that julia handles + # this case badly (macro expansion replaces M5_inner with a + # globalref) and we handle esc(:M5_inner) badly + out = jeval("""module M5 + macro newmod() + return quote + let a = 1 + $(Expr(:toplevel, + Expr(:module, true, :M5_inner, + Expr(:block, :(global asdf = 1))))) + end + end + end + @newmod() + end""") + @test out isa Module + @test isdefined(test_mod, :M5) + @test isdefined(test_mod.M5, :M5_inner) + @test isdefined(test_mod.M5.M5_inner, :asdf) # TODO: broken, commented to prevent error logging # @test jeval("Base.@propagate_inbounds @inline meta_double_quote_issue(x) = x") isa Function From 9b978dceed4d51433e74b15a96ea305e211f8f79 Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki <40514306+aviatesk@users.noreply.github.com> Date: Thu, 16 Oct 2025 00:59:17 +0900 Subject: [PATCH 1086/1109] Remove test no longer relevant after nightly ccall change (JuliaLang/JuliaLowering.jl#100) The removed test attempted to pass a runtime-computed function name to `ccall` via `ccallable_sptest_name(T)`, but `ccall` now requires its function name argument to be a compile-time constant. This pattern only works with `@generated` functions from Julia 1.13 onwards, where the function name can be evaluated at code generation time. Currently JL cannot handle `@generated` functions, so the commenting out the test case updated in the last commit. --------- Co-authored-by: Em Chu --- JuliaLowering/test/misc.jl | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/JuliaLowering/test/misc.jl b/JuliaLowering/test/misc.jl index 51ca6d0979f5d..cfbe971164d7f 100644 --- a/JuliaLowering/test/misc.jl +++ b/JuliaLowering/test/misc.jl @@ -87,17 +87,21 @@ end @test test_mod.ccall_with_sparams(Int) === 1 @test test_mod.ccall_with_sparams(Float64) === 1.0 -# Test that ccall can be passed static parameters in the function name -JuliaLowering.include_string(test_mod, raw""" -# In principle, may add other strlen-like functions here for different string -# types -ccallable_sptest_name(::Type{String}) = :strlen - -function ccall_with_sparams_in_name(s::T) where {T} - ccall(ccallable_sptest_name(T), Csize_t, (Cstring,), s) -end -""") -@test test_mod.ccall_with_sparams_in_name("hii") == 3 +# FIXME Currently JL cannot handle `@generated` functions, so the following test cases are commented out. +# # Test that ccall can be passed static parameters in the function name +# # Note that this only works with `@generated` functions from 1.13 onwards, +# # where the function name can be evaluated at code generation time. +# JuliaLowering.include_string(test_mod, raw""" +# # In principle, may add other strlen-like functions here for different string +# # types +# ccallable_sptest_name(::Type{String}) = :strlen +# +# @generated function ccall_with_sparams_in_name(s::T) where {T} +# name = QuoteNode(ccallable_sptest_name(T)) +# :(ccall($name, Csize_t, (Cstring,), s)) +# end +# """) +# @test test_mod.ccall_with_sparams_in_name("hii") == 3 @testset "CodeInfo: has_image_globalref" begin @test lower_str(test_mod, "x + y").args[1].has_image_globalref === false From ab0e460ea33a27e2543987150032f05ba8344342 Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki <40514306+aviatesk@users.noreply.github.com> Date: Thu, 16 Oct 2025 04:27:03 +0900 Subject: [PATCH 1087/1109] Support nested splat patterns by matching native lowerer algorithm (JuliaLang/JuliaLowering.jl#91) Adds support for nested splat expressions like `tuple((xs...)...)` by restructuring the splat expansion to match the native lowerer's recursive algorithm. The native lowerer unwraps only one layer of `...` per pass and relies on recursive expansion to handle nested cases. This approach naturally builds the nested `_apply_iterate` structure through multiple expansion passes, avoiding the need for explicit depth tracking and normalization. Changes: - Refactor `_wrap_unsplatted_args` to unwrap only one layer of `...` - Refactor `expand_splat` to construct unevaluated `_apply_iterate` call then recursively expand it - Add test cases for nested splats including triple-nested and mixed-depth --- JuliaLowering/src/desugaring.jl | 72 ++++++++++++++++--------- JuliaLowering/test/function_calls_ir.jl | 55 +++++++++++++++++++ JuliaLowering/test/functions.jl | 47 +++++++++++++++- 3 files changed, 148 insertions(+), 26 deletions(-) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 716a8a60648d8..e79d12da1687e 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -947,13 +947,58 @@ function expand_comprehension_to_loops(ctx, ex) ] end +# Mimics native lowerer's tuple-wrap function (julia-syntax.scm:2723-2736) +# Unwraps only ONE layer of `...` and wraps sequences of non-splat args in tuples. +# Example: `[a, b, xs..., c]` -> `[tuple(a, b), xs, tuple(c)]` +function _wrap_unsplatted_args(ctx, call_ex, args) + result = SyntaxList(ctx) + non_splat_run = SyntaxList(ctx) + for arg in args + if kind(arg) == K"..." + # Flush any accumulated non-splat args + if !isempty(non_splat_run) + push!(result, @ast ctx call_ex [K"call" "tuple"::K"core" non_splat_run...]) + non_splat_run = SyntaxList(ctx) + end + # Unwrap only ONE layer of `...` (corresponds to (cadr x) in native lowerer) + push!(result, arg[1]) + else + # Accumulate non-splat args + push!(non_splat_run, arg) + end + end + # Flush any remaining non-splat args + if !isempty(non_splat_run) + push!(result, @ast ctx call_ex [K"call" "tuple"::K"core" non_splat_run...]) + end + result +end + function expand_splat(ctx, ex, topfunc, args) - return @ast ctx ex [K"call" + # Matches native lowerer's algorithm + # https://github.com/JuliaLang/julia/blob/f362f47338de099cdeeb1b2d81b3ec1948443274/src/julia-syntax.scm#L2761-2762: + # 1. Unwrap one layer of `...` from each argument (via _wrap_unsplatted_args) + # 2. Create `_apply_iterate(iterate, f, wrapped_args...)` WITHOUT expanding args yet + # 3. Recursively expand the entire call - if any wrapped_arg still contains `...`, + # the recursive expansion will handle it, naturally building nested structure + # + # Example: tuple((xs...)...) recursion: + # Pass 1: unwrap outer `...` -> _apply_iterate(iterate, tuple, (xs...)) + # Pass 2: expand sees (xs...) in call context, unwraps again + # -> _apply_iterate(iterate, _apply_iterate, tuple(iterate, tuple), xs) + + wrapped_args = _wrap_unsplatted_args(ctx, ex, args) + + # Construct the unevaluated _apply_iterate call + result = @ast ctx ex [K"call" "_apply_iterate"::K"core" "iterate"::K"top" topfunc - expand_forms_2(ctx, _wrap_unsplatted_args(ctx, ex, args))... + wrapped_args... ] + + # Recursively expand the entire call (matching native's expand-forms) + return expand_forms_2(ctx, result) end function expand_array(ctx, ex, topfunc) @@ -1812,29 +1857,6 @@ function expand_ccall(ctx, ex) ] end -# Wrap unsplatted arguments in `tuple`: -# `[a, b, xs..., c]` -> `[(a, b), xs, (c,)]` -function _wrap_unsplatted_args(ctx, call_ex, args) - wrapped = SyntaxList(ctx) - i = 1 - while i <= length(args) - if kind(args[i]) == K"..." - splatarg = args[i] - @chk numchildren(splatarg) == 1 - push!(wrapped, splatarg[1]) - else - i1 = i - # Find range of non-splatted args - while i < length(args) && kind(args[i+1]) != K"..." - i += 1 - end - push!(wrapped, @ast ctx call_ex [K"call" "tuple"::K"core" args[i1:i]...]) - end - i += 1 - end - wrapped -end - function remove_kw_args!(ctx, args::SyntaxList) kws = nothing j = 0 diff --git a/JuliaLowering/test/function_calls_ir.jl b/JuliaLowering/test/function_calls_ir.jl index b0a7016044c94..9370bc5e0d13b 100644 --- a/JuliaLowering/test/function_calls_ir.jl +++ b/JuliaLowering/test/function_calls_ir.jl @@ -597,3 +597,58 @@ function A.ccall() # └─────┘ ── Invalid function name end +######################################## +# Nested splat: simple case +tuple((xs...)...) +#--------------------- +1 TestMod.tuple +2 (call core.tuple top.iterate %₁) +3 TestMod.xs +4 (call core._apply_iterate top.iterate core._apply_iterate %₂ %₃) +5 (return %₄) + +######################################## +# Nested splat: with mixed arguments +tuple(a, (xs...)..., b) +#--------------------- +1 TestMod.tuple +2 TestMod.a +3 (call core.tuple %₂) +4 (call core.tuple top.iterate %₁ %₃) +5 TestMod.xs +6 TestMod.b +7 (call core.tuple %₆) +8 (call core.tuple %₇) +9 (call core._apply_iterate top.iterate core._apply_iterate %₄ %₅ %₈) +10 (return %₉) + +######################################## +# Nested splat: multiple nested splats +tuple((xs...)..., (ys...)...) +#--------------------- +1 TestMod.tuple +2 (call core.tuple top.iterate %₁) +3 TestMod.xs +4 TestMod.ys +5 (call core._apply_iterate top.iterate core._apply_iterate %₂ %₃ %₄) +6 (return %₅) + +######################################## +# Nested splat: triple nesting +tuple(((xs...)...)...) +#--------------------- +1 TestMod.tuple +2 (call core.tuple top.iterate %₁) +3 (call core.tuple top.iterate core._apply_iterate %₂) +4 TestMod.xs +5 (call core._apply_iterate top.iterate core._apply_iterate %₃ %₄) +6 (return %₅) + +######################################## +# Error: Standalone splat expression +(xs...) +#--------------------- +LoweringError: +(xs...) +#└───┘ ── `...` expression outside call + diff --git a/JuliaLowering/test/functions.jl b/JuliaLowering/test/functions.jl index 42c45ee5ca3db..8193d2c7b2efe 100644 --- a/JuliaLowering/test/functions.jl +++ b/JuliaLowering/test/functions.jl @@ -20,6 +20,52 @@ end (2,3,4), (1,2,3,4,5)) +# Nested splatting +@test JuliaLowering.include_string(test_mod, """ +let + xs = [[1, 2], [3, 4]] + tuple((xs...)...) +end +""") == (1, 2, 3, 4) + +@test JuliaLowering.include_string(test_mod, """ +let + xs = [[1, 2]] + ys = [[3, 4]] + tuple((xs...)..., (ys...)...) +end +""") == (1, 2, 3, 4) + +# Multiple (>2) nested splat +@test JuliaLowering.include_string(test_mod, """ +let + xs = [[[1, 2]]] + tuple(((xs...)...)...) +end +""") == (1, 2) +@test JuliaLowering.include_string(test_mod, """ +let + xs = [[[1, 2]]] + ys = [[[3, 4]]] + tuple(((xs...)...)..., ((ys...)...)...) +end +""") == (1, 2, 3, 4) +@test JuliaLowering.include_string(test_mod, """ +let + xs = [[[1, 2]]] + ys = [[[3, 4]]] + tuple(((xs...)...)..., ((ys...)...)) +end +""") == (1, 2, [3, 4]) + +# Trailing comma case should still work (different semantics) +@test JuliaLowering.include_string(test_mod, """ +let + xs = [[1, 2], [3, 4]] + tuple((xs...,)...) +end +""") == ([1, 2], [3, 4]) + # Keyword calls Base.eval(test_mod, :( begin @@ -36,7 +82,6 @@ begin end )) - @test JuliaLowering.include_string(test_mod, """ let kws = (c=3,d=4) From cfb1475612cc602f9d6e9a368333fef983e6d30d Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Mon, 20 Oct 2025 11:39:26 +1000 Subject: [PATCH 1088/1109] Minor fixes for whitespace and typos for migration to the main Julia tree (JuliaLang/JuliaSyntax.jl#603) * Fix whitespace using Julia contrib/check-whitespace.jl * Fix typos as detected by the rust "typos" tool --- JuliaSyntax/LICENSE.md | 1 - JuliaSyntax/docs/src/design.md | 1 - JuliaSyntax/docs/src/howto.md | 1 - JuliaSyntax/docs/src/index.md | 1 - JuliaSyntax/docs/src/reference.md | 3 +-- JuliaSyntax/src/core/parse_stream.jl | 2 +- JuliaSyntax/src/julia/literal_parsing.jl | 1 - JuliaSyntax/src/julia/parser.jl | 2 +- JuliaSyntax/sysimage/compile.jl | 1 - JuliaSyntax/test/diagnostics.jl | 26 ++++++++++++------------ JuliaSyntax/test/fuzz_test.jl | 1 - JuliaSyntax/test/parser.jl | 4 ++-- JuliaSyntax/test/test_utils.jl | 1 - JuliaSyntax/test/test_utils_tests.jl | 1 - JuliaSyntax/tools/bump_in_Base.jl | 1 - 15 files changed, 18 insertions(+), 29 deletions(-) diff --git a/JuliaSyntax/LICENSE.md b/JuliaSyntax/LICENSE.md index 88fc63f3a342a..7efd19088a06f 100644 --- a/JuliaSyntax/LICENSE.md +++ b/JuliaSyntax/LICENSE.md @@ -43,4 +43,3 @@ package and is also licensed under the MIT "Expat" License: > OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE > SOFTWARE. > - diff --git a/JuliaSyntax/docs/src/design.md b/JuliaSyntax/docs/src/design.md index 968a0e11bf609..a11d1b64140ed 100644 --- a/JuliaSyntax/docs/src/design.md +++ b/JuliaSyntax/docs/src/design.md @@ -848,4 +848,3 @@ indentation from the syntax tree? Source formatting involves a big pile of heuristics to get something which "looks nice"... and ML systems have become very good at heuristics. Also, we've got huge piles of training data — just choose some high quality, tastefully hand-formatted libraries. - diff --git a/JuliaSyntax/docs/src/howto.md b/JuliaSyntax/docs/src/howto.md index 0de9e69ad976d..c8bd0503591d5 100644 --- a/JuliaSyntax/docs/src/howto.md +++ b/JuliaSyntax/docs/src/howto.md @@ -35,4 +35,3 @@ To reduce startup latency you can combine with a custom system as described in the [Julia VScode docs](https://www.julia-vscode.org/docs/dev/userguide/compilesysimage/#Creating-a-sysimage-for-the-active-environment), combined with the precompile execution file in `sysimage/precompile_exec.jl` in the source tree. For additional detail see the discussion in [issue #128](https://github.com/JuliaLang/JuliaSyntax.jl/issues/128). - diff --git a/JuliaSyntax/docs/src/index.md b/JuliaSyntax/docs/src/index.md index add8907a330d1..79b8d83b62e6a 100644 --- a/JuliaSyntax/docs/src/index.md +++ b/JuliaSyntax/docs/src/index.md @@ -77,4 +77,3 @@ Julia `Expr` can also be produced: julia> JuliaSyntax.parsestmt(Expr, "(x + y)*z") :((x + y) * z) ``` - diff --git a/JuliaSyntax/docs/src/reference.md b/JuliaSyntax/docs/src/reference.md index be6ff90acf8b9..086bc57ad8224 100644 --- a/JuliaSyntax/docs/src/reference.md +++ b/JuliaSyntax/docs/src/reference.md @@ -80,7 +80,7 @@ class of tokenization errors and lets the parser deal with them. * We use flags rather than child nodes to represent the difference between `struct` and `mutable struct`, `module` and `baremodule` (#220) * Iterations are represented with the `iteration` and `in` heads rather than `=` within the header of a `for`. Thus `for i=is ; body end` parses to `(for (iteration (in i is)) (block body))`. Cartesian iteration as in `for a=as, b=bs body end` are represented with a nested `(iteration (in a as) (in b bs))` rather than a `block` containing `=` because these lists of iterators are neither semantically nor syntactically a sequence of statements, unlike other uses of `block`. Generators also use the `iteration` head - see information on that below. * Short form functions like `f(x) = x + 1` are represented with the `function` head rather than the `=` head. In this case the `SHORT_FORM_FUNCTION_FLAG` flag is set to allow the surface syntactic form to be easily distinguished from long form functions. -* All kinds of updating assignment operators like `+=` are represented with a single `K"op="` head, with the operator itself in infix position. For example, `x += 1` is `(op= x + 1)`, where the plus token is of kind `K"Identifer"`. This greatly reduces the number of distinct forms here from a rather big list (`$=` `%=` `&=` `*=` `+=` `-=` `//=` `/=` `<<=` `>>=` `>>>=` `\=` `^=` `|=` `÷=` `⊻=`) and makes the operator itself appear in the AST as kind `K"Identifier"`, as it should. It also makes it possible to add further unicode updating operators while keeping the AST stable. +* All kinds of updating assignment operators like `+=` are represented with a single `K"op="` head, with the operator itself in infix position. For example, `x += 1` is `(op= x + 1)`, where the plus token is of kind `K"Identifier"`. This greatly reduces the number of distinct forms here from a rather big list (`$=` `%=` `&=` `*=` `+=` `-=` `//=` `/=` `<<=` `>>=` `>>>=` `\=` `^=` `|=` `÷=` `⊻=`) and makes the operator itself appear in the AST as kind `K"Identifier"`, as it should. It also makes it possible to add further unicode updating operators while keeping the AST stable. ## More detail on tree differences @@ -324,4 +324,3 @@ Expr(:ncat) │ └─ :d └─ :x ``` - diff --git a/JuliaSyntax/src/core/parse_stream.jl b/JuliaSyntax/src/core/parse_stream.jl index da4d70ccf1086..393e23c86c075 100644 --- a/JuliaSyntax/src/core/parse_stream.jl +++ b/JuliaSyntax/src/core/parse_stream.jl @@ -269,7 +269,7 @@ mutable struct ParseStream lexer = Tokenize.Lexer(io) # To avoid keeping track of the exact Julia development version where new # features were added or comparing prerelease strings, we treat prereleases - # or dev versons as the release version using only major and minor version + # or dev versions as the release version using only major and minor version # numbers. This means we're inexact for old dev versions but that seems # like an acceptable tradeoff. ver = (version.major, version.minor) diff --git a/JuliaSyntax/src/julia/literal_parsing.jl b/JuliaSyntax/src/julia/literal_parsing.jl index 1db36d7f8e44a..5a087eac6d54e 100644 --- a/JuliaSyntax/src/julia/literal_parsing.jl +++ b/JuliaSyntax/src/julia/literal_parsing.jl @@ -471,4 +471,3 @@ function lower_identifier_name(name::Symbol, k::Kind) Symbol(lower_identifier_name(string(name), k)) end end - diff --git a/JuliaSyntax/src/julia/parser.jl b/JuliaSyntax/src/julia/parser.jl index a2ce4209a8c2a..70a345057a56b 100644 --- a/JuliaSyntax/src/julia/parser.jl +++ b/JuliaSyntax/src/julia/parser.jl @@ -1418,7 +1418,7 @@ function parse_decl_with_initial_ex(ps::ParseState, mark) # (x) -> y # (x; a=1) -> y elseif kb == K"where" - # `where` and `->` have the "wrong" precedence when writing anon functons. + # `where` and `->` have the "wrong" precedence when writing anon functions. # So ignore this case to allow use of grouping brackets with `where`. # This needs to worked around in lowering :-( # (x where T) -> y ==> (-> (x where T) y) diff --git a/JuliaSyntax/sysimage/compile.jl b/JuliaSyntax/sysimage/compile.jl index fbc17232ad6e3..390901eb56cd1 100755 --- a/JuliaSyntax/sysimage/compile.jl +++ b/JuliaSyntax/sysimage/compile.jl @@ -45,4 +45,3 @@ PackageCompiler.create_sysimage( Use it with `julia -J "$image_path"` """ - diff --git a/JuliaSyntax/test/diagnostics.jl b/JuliaSyntax/test/diagnostics.jl index 1397dd215a9be..151aad919c0ed 100644 --- a/JuliaSyntax/test/diagnostics.jl +++ b/JuliaSyntax/test/diagnostics.jl @@ -34,7 +34,7 @@ end end @testset "parser errors" begin - @test diagnostic("+ #==# (a,b)") == + @test diagnostic("+ #==# (a,b)") == Diagnostic(2, 7, :error, "whitespace not allowed between prefix function call and argument list") @test diagnostic("1 -+ (a=1, b=2)") == Diagnostic(5, 5, :error, "whitespace not allowed between prefix function call and argument list") @@ -44,18 +44,18 @@ end @test diagnostic("function (\$f) body end") == Diagnostic(10, 13, :error, "Ambiguous signature. Add a trailing comma if this is a 1-argument anonymous function; remove parentheses if this is a macro call acting as function signature.") - @test diagnostic("A.@B.x", only_first=true) == + @test diagnostic("A.@B.x", only_first=true) == Diagnostic(3, 4, :error, "`@` must appear on first or last macro name component") - @test diagnostic("@M.(x)") == + @test diagnostic("@M.(x)") == Diagnostic(1, 3, :error, "dot call syntax not supported for macros") - @test diagnostic("try x end") == + @test diagnostic("try x end") == Diagnostic(1, 9, :error, "try without catch or finally") # TODO: better range - @test diagnostic("@A.\$x a") == + @test diagnostic("@A.\$x a") == Diagnostic(4, 5, :error, "invalid macro name") - @test diagnostic("a, , b") == + @test diagnostic("a, , b") == Diagnostic(4, 4, :error, "unexpected `,`") @test diagnostic(")", allow_multiple=true) == [ Diagnostic(1, 1, :error, "unexpected `)`") @@ -118,15 +118,15 @@ end end @testset "parser warnings" begin - @test diagnostic("@(A)", only_first=true) == + @test diagnostic("@(A)", only_first=true) == Diagnostic(2, 4, :warning, "parenthesizing macro names is unnecessary") - @test diagnostic("try finally catch a ; b end") == + @test diagnostic("try finally catch a ; b end") == Diagnostic(13, 23, :warning, "`catch` after `finally` will execute out of order") - @test diagnostic("import . .A") == + @test diagnostic("import . .A") == Diagnostic(9, 10, :warning, "space between dots in import path") - @test diagnostic("import A .==") == + @test diagnostic("import A .==") == Diagnostic(9, 9, :warning, "space between dots in import path") - @test diagnostic("import A.:+") == + @test diagnostic("import A.:+") == Diagnostic(10, 10, :warning, "quoting with `:` is not required here") # No warnings for imports of `:` and parenthesized `(..)` @test diagnostic("import A.:, :", allow_multiple=true) == [] @@ -244,8 +244,8 @@ end tempdirname = mktempdir() cd(tempdirname) do rm(tempdirname) - # Test _file_url doesn't fail with nonexistant directories - @test isnothing(JuliaSyntax._file_url(joinpath("__nonexistant__", "test.jl"))) + # Test _file_url doesn't fail with nonexistent directories + @test isnothing(JuliaSyntax._file_url(joinpath("__nonexistent__", "test.jl"))) end end end diff --git a/JuliaSyntax/test/fuzz_test.jl b/JuliaSyntax/test/fuzz_test.jl index 15bfa79de2dfb..e24096a38aaf9 100644 --- a/JuliaSyntax/test/fuzz_test.jl +++ b/JuliaSyntax/test/fuzz_test.jl @@ -1022,4 +1022,3 @@ end # # fuzz_test(try_hook_failure, product_token_fuzz(cutdown_tokens, 2)) # fuzz_test(try_parseall_failure, product_token_fuzz(cutdown_tokens, 2)) - diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index a6ee4b62f1c8a..4aa8652858313 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -300,7 +300,7 @@ tests = [ # `where` combined with `->` still parses strangely. However: # * It's extra hard to add a tuple around the `x` in this syntax corner case. # * The user already needs to add additional, ugly, parens to get this - # to parse correctly because the precendence of `where` is + # to parse correctly because the precedence of `where` is # inconsistent with `::` and `->` in this case. "(x where T)->c" => "(-> (parens (where x T)) c)" "((x::T) where T)->c" => "(-> (parens (where (parens (::-i x T)) T)) c)" @@ -963,7 +963,7 @@ tests = [ "[a \n b]" => "(vcat a b)" # Can't mix multiple ;'s and spaces ((v=v"1.7",), "[a ;; b c]") => "(ncat-2 a (row b (error-t) c))" - # Empty nd arrays + # Empty N-dimensional arrays ((v=v"1.8",), "[;]") => "(ncat-1)" ((v=v"1.8",), "[;;]") => "(ncat-2)" ((v=v"1.8",), "[\n ;; \n ]") => "(ncat-2)" diff --git a/JuliaSyntax/test/test_utils.jl b/JuliaSyntax/test/test_utils.jl index dae16cc03d1a4..ed3d11e2f966f 100644 --- a/JuliaSyntax/test/test_utils.jl +++ b/JuliaSyntax/test/test_utils.jl @@ -482,4 +482,3 @@ function Meta_parseall(text::AbstractString; filename="none", lineno=1) ex,_ = _Meta_parse_string(text, String(filename), lineno, 1, :all) return ex end - diff --git a/JuliaSyntax/test/test_utils_tests.jl b/JuliaSyntax/test/test_utils_tests.jl index 8c68f068a10ed..51515515a83f5 100644 --- a/JuliaSyntax/test/test_utils_tests.jl +++ b/JuliaSyntax/test/test_utils_tests.jl @@ -35,4 +35,3 @@ Expr(:block, LineNumberNode(1), 1)), Expr(:block, LineNumberNode(1)))) end - diff --git a/JuliaSyntax/tools/bump_in_Base.jl b/JuliaSyntax/tools/bump_in_Base.jl index c9ca7473efb45..aec2876deb645 100644 --- a/JuliaSyntax/tools/bump_in_Base.jl +++ b/JuliaSyntax/tools/bump_in_Base.jl @@ -72,4 +72,3 @@ if !isinteractive() exit(bump_in_Base(ARGS[1], juliasyntax_dir, ARGS[2])) end end - From ecf02a6526a99c4a34c6e8a396a5851ed9335dc1 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Mon, 20 Oct 2025 11:55:20 +1000 Subject: [PATCH 1089/1109] Minor fixes for whitespace and typos for migration to the main Julia tree (JuliaLang/JuliaLowering.jl#104) * Fix whitespace using Julia contrib/check-whitespace.jl * Fix typos as detected by the rust "typos" tool --- JuliaLowering/README.md | 12 ++++++------ JuliaLowering/src/ast.jl | 2 +- JuliaLowering/src/bindings.jl | 1 - JuliaLowering/src/closure_conversion.jl | 4 ++-- JuliaLowering/src/desugaring.jl | 22 +++++++++++----------- JuliaLowering/src/eval.jl | 2 +- JuliaLowering/src/kinds.jl | 2 +- JuliaLowering/src/linear_ir.jl | 2 +- JuliaLowering/src/macro_expansion.jl | 2 +- JuliaLowering/src/runtime.jl | 4 ++-- JuliaLowering/src/scope_analysis.jl | 8 ++++---- JuliaLowering/src/syntax_graph.jl | 3 +-- JuliaLowering/test/arrays_ir.jl | 1 - JuliaLowering/test/assignments_ir.jl | 1 - JuliaLowering/test/branching_ir.jl | 1 - JuliaLowering/test/closures_ir.jl | 1 - JuliaLowering/test/compat.jl | 2 +- JuliaLowering/test/decls_ir.jl | 1 - JuliaLowering/test/destructuring.jl | 2 +- JuliaLowering/test/destructuring_ir.jl | 1 - JuliaLowering/test/exceptions_ir.jl | 1 - JuliaLowering/test/function_calls_ir.jl | 1 - JuliaLowering/test/functions_ir.jl | 3 +-- JuliaLowering/test/generators_ir.jl | 1 - JuliaLowering/test/import_ir.jl | 1 - JuliaLowering/test/loops_ir.jl | 1 - JuliaLowering/test/macros_ir.jl | 1 - JuliaLowering/test/misc.jl | 2 +- JuliaLowering/test/misc_ir.jl | 19 +++++++++---------- JuliaLowering/test/quoting_ir.jl | 1 - JuliaLowering/test/scopes_ir.jl | 1 - JuliaLowering/test/typedefs_ir.jl | 1 - 32 files changed, 44 insertions(+), 63 deletions(-) diff --git a/JuliaLowering/README.md b/JuliaLowering/README.md index a420702752b30..5ebe02b95688d 100644 --- a/JuliaLowering/README.md +++ b/JuliaLowering/README.md @@ -267,7 +267,7 @@ JuliaLowering our representation is the tuple `(name,scope_layer)`, also called `VarId` in the scope resolution pass. JuliaLowering's macro expander attaches a unique *scope layer* to each -identifier in a piece of syntax. A "scope layer" is an integer identifer +identifier in a piece of syntax. A "scope layer" is an integer identifier combined with the module in which the syntax was created. When expanding macros, @@ -278,7 +278,7 @@ When expanding macros, in the syntax produced by the macro are tagged with this layer. Subsequently, the `(name,scope_layer)` pairs are used when resolving bindings. -This ensures that, by default, we satisfy the basic rules for hygenic macros +This ensures that, by default, we satisfy the basic rules for hygienic macros discussed in Adams' paper: 1. A macro can't insert a binding that can capture references other than those @@ -577,7 +577,7 @@ optimizations include: Properties of non-globals which are computed per-binding-per-closure include: * Read: the value of the binding is used. -* Write: the binding is asssigned to. +* Write: the binding is assigned to. * Captured: Bindings defined outside the closure which are either Read or Write within the closure are "captured" and need to be one of the closure's fields. * Called: the binding is called as a function, ie, `x()`. (TODO - what is this @@ -621,7 +621,7 @@ begin end let - # f is local so this is a closure becuase `let ... end` introduces a scope + # f is local so this is a closure because `let ... end` introduces a scope function f() body end @@ -831,7 +831,7 @@ runtime itself. ### Untyped IR (`CodeInfo` form) The final lowered IR is expressed as `CodeInfo` objects which are a sequence of -`code` statments containing +`code` statements containing * Literals * Restricted forms of `Expr` (with semantics different from surface syntax, even for the same `head`! for example the arguments to `Expr(:call)` in IR @@ -890,7 +890,7 @@ In the current Julia runtime, - Uses `jl_toplevel_eval_in` which calls `jl_toplevel_eval_flex` `jl_toplevel_eval_flex(mod, ex)` -- Lowers if necessay +- Lowers if necessary - Evaluates certain blessed top level forms * `:.` * `:module` diff --git a/JuliaLowering/src/ast.jl b/JuliaLowering/src/ast.jl index 34ac2939a42fc..a7b087c17ad56 100644 --- a/JuliaLowering/src/ast.jl +++ b/JuliaLowering/src/ast.jl @@ -168,7 +168,7 @@ end # TODO: Replace this with makeleaf variant? function mapleaf(ctx, src, kind) ex = makeleaf(syntax_graph(ctx), src, kind) - # TODO: Value coersion might be broken here due to use of `name_val` vs + # TODO: Value coercion might be broken here due to use of `name_val` vs # `value` vs ... ? copy_attrs!(ex, src) ex diff --git a/JuliaLowering/src/bindings.jl b/JuliaLowering/src/bindings.jl index f35a61c05d876..286e67ecbeb14 100644 --- a/JuliaLowering/src/bindings.jl +++ b/JuliaLowering/src/bindings.jl @@ -247,4 +247,3 @@ end function has_lambda_binding(ctx::AbstractLoweringContext, x) has_lambda_binding(current_lambda_bindings(ctx), x) end - diff --git a/JuliaLowering/src/closure_conversion.jl b/JuliaLowering/src/closure_conversion.jl index 99aecd3f83c97..d40d944db13f1 100644 --- a/JuliaLowering/src/closure_conversion.jl +++ b/JuliaLowering/src/closure_conversion.jl @@ -494,7 +494,7 @@ function _convert_closures(ctx::ClosureConversionCtx, ex) is_closure = kind(name) == K"BindingId" && lookup_binding(ctx, name).kind === :local cap_rewrite = is_closure ? ctx.closure_infos[name.var_id] : nothing ctx2 = ClosureConversionCtx(ctx.graph, ctx.bindings, ctx.mod, - ctx.closure_bindings, cap_rewrite, ctx.lambda_bindings, + ctx.closure_bindings, cap_rewrite, ctx.lambda_bindings, ctx.is_toplevel_seq_point, ctx.toplevel_pure, ctx.toplevel_stmts, ctx.closure_infos) body = map_cl_convert(ctx2, ex[2], false) @@ -520,7 +520,7 @@ function _convert_closures(ctx::ClosureConversionCtx, ex) capture_rewrites = ClosureInfo(ex #=unused=#, field_syms, field_inds) ctx2 = ClosureConversionCtx(ctx.graph, ctx.bindings, ctx.mod, - ctx.closure_bindings, capture_rewrites, ctx.lambda_bindings, + ctx.closure_bindings, capture_rewrites, ctx.lambda_bindings, false, ctx.toplevel_pure, ctx.toplevel_stmts, ctx.closure_infos) init_closure_args = SyntaxList(ctx) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index e79d12da1687e..53aa67bff560b 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -794,7 +794,7 @@ function similar_tuples_or_identifiers(a, b) end # Return the anonymous function taking an iterated value, for use with the -# first agument to `Base.Generator` +# first argument to `Base.Generator` function func_for_generator(ctx, body, iter_value_destructuring) if similar_tuples_or_identifiers(iter_value_destructuring, body) # Use Base.identity for generators which are filters such as @@ -1139,7 +1139,7 @@ end # ncat comes in various layouts which we need to lower to special cases # - one dimensional along some dimension # - balanced column first or row first -# - ragged colum first or row first +# - ragged column first or row first function expand_ncat(ctx, ex) is_typed = kind(ex) == K"typed_ncat" outer_dim = numeric_flags(ex) @@ -1209,7 +1209,7 @@ function expand_ncat(ctx, ex) end else # For unbalanced/ragged concatenations, the shape is specified by the - # number of elements in each ND slice of the array, from layout + # number of elements in each N-dimensional slice of the array, from layout # dimension 1 to N. See the documentation for `hvncat` for details. i = 1 while i <= length(nrow_spans) @@ -1354,7 +1354,7 @@ function expand_assignment(ctx, ex, is_const=false) convert_for_type_decl(ctx, ex, rhs, T, true) ]]) elseif is_identifier_like(x) - # Identifer in lhs[1] is a variable type declaration, eg + # Identifier in lhs[1] is a variable type declaration, eg # x::T = rhs @ast ctx ex [K"block" [K"decl" lhs[1] lhs[2]] @@ -2378,8 +2378,8 @@ end # Select static parameters which are used in function arguments `arg_types`, or # transitively used. # -# The transitive usage check probably doesn't guarentee that the types are -# inferrable during dispatch as they may only be part of the bounds of another +# The transitive usage check probably doesn't guarantee that the types are +# inferable during dispatch as they may only be part of the bounds of another # type. Thus we might get false positives here but we shouldn't get false # negatives. function select_used_typevars(arg_types, typevar_names, typevar_stmts) @@ -3073,7 +3073,7 @@ function expand_function_def(ctx, ex, docs, rewrite_call=identity, rewrite_body= push!(sig_stmts, @ast(ctx, ex, [K"curly" "Tuple"::K"core" arg_types[2:i]...])) end sig_type = @ast ctx ex [K"where" - [K"curly" "Union"::K"core" sig_stmts...] + [K"curly" "Union"::K"core" sig_stmts...] [K"_typevars" [K"block" typevar_names...] [K"block"]] ] out = @ast ctx docs [K"block" @@ -3907,7 +3907,7 @@ function rewrite_new_calls(ctx, ex, struct_name, global_struct_name, ) end -function _constructor_min_initalized(ex::SyntaxTree) +function _constructor_min_initialized(ex::SyntaxTree) if _is_new_call(ex) if any(kind(e) == K"..." for e in ex[2:end]) # Lowering ensures new with splats always inits all fields @@ -3917,7 +3917,7 @@ function _constructor_min_initalized(ex::SyntaxTree) numchildren(ex) - 1 end elseif !is_leaf(ex) - minimum((_constructor_min_initalized(e) for e in children(ex)), init=typemax(Int)) + minimum((_constructor_min_initialized(e) for e in children(ex)), init=typemax(Int)) else typemax(Int) end @@ -3958,7 +3958,7 @@ function expand_struct_def(ctx, ex, docs) _collect_struct_fields(ctx, field_names, field_types, field_attrs, field_docs, inner_defs, children(type_body)) is_mutable = has_flags(ex, JuliaSyntax.MUTABLE_FLAG) - min_initialized = minimum((_constructor_min_initalized(e) for e in inner_defs), + min_initialized = minimum((_constructor_min_initialized(e) for e in inner_defs), init=length(field_names)) newtype_var = ssavar(ctx, ex, "struct_type") hasprev = ssavar(ctx, ex, "hasprev") @@ -3984,7 +3984,7 @@ function expand_struct_def(ctx, ex, docs) need_outer_constructor = false if isempty(inner_defs) && !isempty(typevar_names) # To generate an outer constructor each struct type parameter must be - # able to be inferred from the list of fields passed as constuctor + # able to be inferred from the list of fields passed as constructor # arguments. # # More precisely, it must occur in a field type, or in the bounds of a diff --git a/JuliaLowering/src/eval.jl b/JuliaLowering/src/eval.jl index 02210946f84e3..6beaab5ad8895 100644 --- a/JuliaLowering/src/eval.jl +++ b/JuliaLowering/src/eval.jl @@ -216,7 +216,7 @@ function finish_ir_debug_info!(current_codelocs_stack) _compress_debuginfo(only(current_codelocs_stack)) end -# Convert SyntaxTree to the CodeInfo+Expr data stuctures understood by the +# Convert SyntaxTree to the CodeInfo+Expr data structures understood by the # Julia runtime function to_code_info(ex::SyntaxTree, slots::Vector{Slot}, meta::CompileHints) stmts = Any[] diff --git a/JuliaLowering/src/kinds.jl b/JuliaLowering/src/kinds.jl index 0f2dfbb407d99..bef831db78a29 100644 --- a/JuliaLowering/src/kinds.jl +++ b/JuliaLowering/src/kinds.jl @@ -8,7 +8,7 @@ function _register_kinds() "BEGIN_EXTENSION_KINDS" # atomic fields or accesses (see `@atomic`) "atomic" - # Flag for @generated parts of a functon + # Flag for @generated parts of a function "generated" # Temporary rooting of identifiers (GC.@preserve) "gc_preserve" diff --git a/JuliaLowering/src/linear_ir.jl b/JuliaLowering/src/linear_ir.jl index 0c1fb510e1a6f..2b6838f9e97d3 100644 --- a/JuliaLowering/src/linear_ir.jl +++ b/JuliaLowering/src/linear_ir.jl @@ -890,7 +890,7 @@ function _remove_vars_with_isdefined_check!(vars, ex) end # Find newvar nodes that are unnecessary because -# 1. The variable is not captured and +# 1. The variable is not captured and # 2. The variable is assigned before any branches. # # This is used to remove newvar nodes that are not needed for re-initializing diff --git a/JuliaLowering/src/macro_expansion.jl b/JuliaLowering/src/macro_expansion.jl index a04cbacfef1e5..84efb44fbc978 100644 --- a/JuliaLowering/src/macro_expansion.jl +++ b/JuliaLowering/src/macro_expansion.jl @@ -395,7 +395,7 @@ function expand_forms_1(ctx::MacroExpansionContext, ex::SyntaxTree) @ast ctx ex ex=>K"Placeholder" elseif is_ccall_or_cglobal(name_str) # Lower special identifiers `cglobal` and `ccall` to `K"core"` - # psuedo-refs very early so that cglobal and ccall can never be + # pseudo-refs very early so that cglobal and ccall can never be # turned into normal bindings (eg, assigned to) @ast ctx ex name_str::K"core" else diff --git a/JuliaLowering/src/runtime.jl b/JuliaLowering/src/runtime.jl index ff089c99cefe1..e6b03cec54440 100644 --- a/JuliaLowering/src/runtime.jl +++ b/JuliaLowering/src/runtime.jl @@ -73,9 +73,9 @@ function _interpolated_value(ctx::InterpolationContext, srcref, ex) end append_sourceref(ctx, ex, srcref) elseif ex isa Symbol - # Plain symbols become identifiers. This is an accomodation for + # Plain symbols become identifiers. This is an accommodation for # compatibility to allow `:x` (a Symbol) and `:(x)` (a SyntaxTree) to - # be used interchangably in macros. + # be used interchangeably in macros. makeleaf(ctx, srcref, K"Identifier", string(ex)) else makeleaf(ctx, srcref, K"Value", ex) diff --git a/JuliaLowering/src/scope_analysis.jl b/JuliaLowering/src/scope_analysis.jl index f73c736241ebf..4d751a895adfa 100644 --- a/JuliaLowering/src/scope_analysis.jl +++ b/JuliaLowering/src/scope_analysis.jl @@ -81,7 +81,7 @@ end # Find names of all identifiers used in the given expression, grouping them # into sets by type of usage. # -# NB: This only works propery after desugaring +# NB: This only works properly after desugaring function find_scope_vars(ctx, ex) ExT = typeof(ex) assignments = Dict{NameKey,ExT}() @@ -521,8 +521,8 @@ function _resolve_scopes(ctx, ex::SyntaxTree) [K"call" "apply_type"::K"core" "Dict"::K"top" - "Symbol"::K"core" - "Any"::K"core" + "Symbol"::K"core" + "Any"::K"core" ] ] ]) @@ -578,7 +578,7 @@ function _resolve_scopes(ctx, ex::SyntaxTree) if bk == :argument throw(LoweringError(name, "Cannot add method to a function argument")) elseif bk == :global && !ctx.scope_stack[end].in_toplevel_thunk - throw(LoweringError(name, + throw(LoweringError(name, "Global method definition needs to be placed at the top level, or use `eval()`")) end end diff --git a/JuliaLowering/src/syntax_graph.jl b/JuliaLowering/src/syntax_graph.jl index d8dd51372cbca..767643b8f99dc 100644 --- a/JuliaLowering/src/syntax_graph.jl +++ b/JuliaLowering/src/syntax_graph.jl @@ -146,7 +146,7 @@ function hasattr(graph::SyntaxGraph, name::Symbol) getattr(graph, name, nothing) !== nothing end -# TODO: Probably terribly non-inferrable? +# TODO: Probably terribly non-inferable? function setattr!(graph::SyntaxGraph, id; attrs...) for (k,v) in pairs(attrs) if !isnothing(v) @@ -817,4 +817,3 @@ end # end # out # end - diff --git a/JuliaLowering/test/arrays_ir.jl b/JuliaLowering/test/arrays_ir.jl index f0fa3f1ed0f5d..4595603e4b79d 100644 --- a/JuliaLowering/test/arrays_ir.jl +++ b/JuliaLowering/test/arrays_ir.jl @@ -496,4 +496,3 @@ a[] = rhs 2 TestMod.a 3 (call top.setindex! %₂ %₁) 4 (return %₁) - diff --git a/JuliaLowering/test/assignments_ir.jl b/JuliaLowering/test/assignments_ir.jl index 4fcb0abb1ea9a..2b002fbcef61e 100644 --- a/JuliaLowering/test/assignments_ir.jl +++ b/JuliaLowering/test/assignments_ir.jl @@ -359,4 +359,3 @@ f() += y LoweringError: (if false end, b) += 2 └───────────────┘ ── invalid multiple assignment location - diff --git a/JuliaLowering/test/branching_ir.jl b/JuliaLowering/test/branching_ir.jl index dd45d37c6f07e..f7a63f40291e6 100644 --- a/JuliaLowering/test/branching_ir.jl +++ b/JuliaLowering/test/branching_ir.jl @@ -237,4 +237,3 @@ x = @label foo LoweringError: x = @label foo # └─┘ ── misplaced label in value position - diff --git a/JuliaLowering/test/closures_ir.jl b/JuliaLowering/test/closures_ir.jl index 9f3d07e52187b..0916e3133c97b 100644 --- a/JuliaLowering/test/closures_ir.jl +++ b/JuliaLowering/test/closures_ir.jl @@ -763,4 +763,3 @@ slots: [slot₁/#self#(!read) slot₂/T(!read) slot₃/tmp(!read)] #--------------------- LoweringError: #= line 1 =# - Top level code was found outside any top level context. `@generated` functions may not contain closures, including `do` syntax and generators/comprehension - diff --git a/JuliaLowering/test/compat.jl b/JuliaLowering/test/compat.jl index 71f5d3005f6ad..77c769538bc59 100644 --- a/JuliaLowering/test/compat.jl +++ b/JuliaLowering/test/compat.jl @@ -485,7 +485,7 @@ const JL = JuliaLowering @test JuliaLowering.expr_to_syntaxtree(Expr(:comparison, :x, esc(Symbol(".+")), :y)) ≈ @ast_ [K"comparison" "x"::K"Identifier" - [K"." + [K"." [K"escape" "+"::K"Identifier"] ] "y"::K"Identifier" diff --git a/JuliaLowering/test/decls_ir.jl b/JuliaLowering/test/decls_ir.jl index 9ad27bdf8ab23..1092b4d70d3f5 100644 --- a/JuliaLowering/test/decls_ir.jl +++ b/JuliaLowering/test/decls_ir.jl @@ -297,4 +297,3 @@ function f() global x::Int = 1 # └─────────┘ ── type declarations for global variables must be at top level, not inside a function end - diff --git a/JuliaLowering/test/destructuring.jl b/JuliaLowering/test/destructuring.jl index 4289952ec886b..6158d8bc28ebf 100644 --- a/JuliaLowering/test/destructuring.jl +++ b/JuliaLowering/test/destructuring.jl @@ -45,7 +45,7 @@ end """) == ('a', "βcδ", 'e') -# Use in value position yeilds rhs +# Use in value position yields rhs @test JuliaLowering.include_string(test_mod, """ let as = [1,2] diff --git a/JuliaLowering/test/destructuring_ir.jl b/JuliaLowering/test/destructuring_ir.jl index cd4e3a6ef1e67..990096a87e916 100644 --- a/JuliaLowering/test/destructuring_ir.jl +++ b/JuliaLowering/test/destructuring_ir.jl @@ -385,4 +385,3 @@ LoweringError: LoweringError: (; a=1, b) = rhs # └─┘ ── invalid assignment location - diff --git a/JuliaLowering/test/exceptions_ir.jl b/JuliaLowering/test/exceptions_ir.jl index 874f34a57bf99..8cf423258f0c5 100644 --- a/JuliaLowering/test/exceptions_ir.jl +++ b/JuliaLowering/test/exceptions_ir.jl @@ -355,4 +355,3 @@ end 6 TestMod.b 7 (pop_exception %₁) 8 (return %₆) - diff --git a/JuliaLowering/test/function_calls_ir.jl b/JuliaLowering/test/function_calls_ir.jl index 9370bc5e0d13b..f2772a65d6967 100644 --- a/JuliaLowering/test/function_calls_ir.jl +++ b/JuliaLowering/test/function_calls_ir.jl @@ -651,4 +651,3 @@ tuple(((xs...)...)...) LoweringError: (xs...) #└───┘ ── `...` expression outside call - diff --git a/JuliaLowering/test/functions_ir.jl b/JuliaLowering/test/functions_ir.jl index ccce5ffbcf1bb..a537757b881ba 100644 --- a/JuliaLowering/test/functions_ir.jl +++ b/JuliaLowering/test/functions_ir.jl @@ -1346,7 +1346,7 @@ end ######################################## # Static parameters used in keywords, with and without the static parameter # being present in positional argument types. -# +# # Here the wrong type for `b` will get a `TypeError` but `A` will need to rely # on a MethodError. function f_kw_sparams(x::X; a::A=a_def, b::X=b_def) where {X,A} @@ -1590,4 +1590,3 @@ end 20 latestworld 21 TestMod.f_partially_generated 22 (return %₂₁) - diff --git a/JuliaLowering/test/generators_ir.jl b/JuliaLowering/test/generators_ir.jl index eecfb1b1d0b38..28f0241c92e93 100644 --- a/JuliaLowering/test/generators_ir.jl +++ b/JuliaLowering/test/generators_ir.jl @@ -297,4 +297,3 @@ T[(x,y) for x in xs, y in ys] 48 (gotoifnot %₄₇ label₅₀) 49 (goto label₁₅) 50 (return %₇) - diff --git a/JuliaLowering/test/import_ir.jl b/JuliaLowering/test/import_ir.jl index 6e17b0f58033f..8f34f5f0c4939 100644 --- a/JuliaLowering/test/import_ir.jl +++ b/JuliaLowering/test/import_ir.jl @@ -67,4 +67,3 @@ public a, b, c #--------------------- 1 (call JuliaLowering.eval_public TestMod false ["a", "b", "c"]) 2 (return %₁) - diff --git a/JuliaLowering/test/loops_ir.jl b/JuliaLowering/test/loops_ir.jl index 3ed96c386456a..709322a084c68 100644 --- a/JuliaLowering/test/loops_ir.jl +++ b/JuliaLowering/test/loops_ir.jl @@ -144,4 +144,3 @@ let # ╙ ── `outer` annotations must match with a local variable in an outer scope but no such variable was found nothing end - diff --git a/JuliaLowering/test/macros_ir.jl b/JuliaLowering/test/macros_ir.jl index 9547455418def..183dce3944b35 100644 --- a/JuliaLowering/test/macros_ir.jl +++ b/JuliaLowering/test/macros_ir.jl @@ -186,4 +186,3 @@ cmdmac`hello` cmdmac`hello`12345 #--------------------- 1 (return "hello from cmdmac with suffix 12345") - diff --git a/JuliaLowering/test/misc.jl b/JuliaLowering/test/misc.jl index cfbe971164d7f..4c07cc9ca1842 100644 --- a/JuliaLowering/test/misc.jl +++ b/JuliaLowering/test/misc.jl @@ -1,4 +1,4 @@ -@testset "Miscellanous" begin +@testset "Miscellaneous" begin test_mod = Module() diff --git a/JuliaLowering/test/misc_ir.jl b/JuliaLowering/test/misc_ir.jl index 345b86763dd5e..ffffd6e048dd6 100644 --- a/JuliaLowering/test/misc_ir.jl +++ b/JuliaLowering/test/misc_ir.jl @@ -1,5 +1,5 @@ -module JuxtTest - macro emit_juxt() +module JuxtuposeTest + macro emit_juxtupose() :(10x) end end @@ -304,18 +304,18 @@ end 4 (return %₃) ######################################## -# Juxtaposition - check the juxtapose multiply is resolved to `JuxtTest.*` when -# emitted by the macro in the JuxtTest module. -# +# Juxtaposition - check the juxtapose multiply is resolved to `JuxtuposeTest.*` when +# emitted by the macro in the JuxtuposeTest module. +# # This is consistent with Julia's existing system but it's not entirely clear # this is good - perhaps we should resolve to Base.* instead? Resolving to the # module-local version makes it exactly equivalent to `*`. But one might argue # this is confusing because the symbol `*` appears nowhere in the user's source # code. -JuxtTest.@emit_juxt +JuxtuposeTest.@emit_juxtupose #--------------------- -1 TestMod.JuxtTest.* -2 TestMod.JuxtTest.x +1 TestMod.JuxtuposeTest.* +2 TestMod.JuxtuposeTest.x 3 (call %₁ 10 %₂) 4 (return %₃) @@ -535,10 +535,9 @@ f(x)::Int, g() = [1.0, 2.0] └──┘ ── invalid assignment location ######################################## -# Error: Destructuring assignment typdef, variable, and function (broken, legacy) +# Error: Destructuring assignment typedef, variable, and function (broken, legacy) T{U}, (x::Float64, g()) = [Bool, (1, 2)] #--------------------- LoweringError: T{U}, (x::Float64, g()) = [Bool, (1, 2)] # └─┘ ── invalid assignment location - diff --git a/JuliaLowering/test/quoting_ir.jl b/JuliaLowering/test/quoting_ir.jl index e53be0c61fcbd..5b323c17e3948 100644 --- a/JuliaLowering/test/quoting_ir.jl +++ b/JuliaLowering/test/quoting_ir.jl @@ -42,4 +42,3 @@ quote $$x + 1 # └┘ ── `$` expression outside string or quote block end - diff --git a/JuliaLowering/test/scopes_ir.jl b/JuliaLowering/test/scopes_ir.jl index 84258ef2811e5..7d24698272e31 100644 --- a/JuliaLowering/test/scopes_ir.jl +++ b/JuliaLowering/test/scopes_ir.jl @@ -530,4 +530,3 @@ end 20 latestworld 21 TestMod.f 22 (return %₂₁) - diff --git a/JuliaLowering/test/typedefs_ir.jl b/JuliaLowering/test/typedefs_ir.jl index b1a9f920f16cb..260e3c2f4bfab 100644 --- a/JuliaLowering/test/typedefs_ir.jl +++ b/JuliaLowering/test/typedefs_ir.jl @@ -1346,4 +1346,3 @@ function f() end #─────┘ ── this syntax is only allowed in top level code end - From 06f462a14f5481251117b9c0a423ef57e953d915 Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki <40514306+aviatesk@users.noreply.github.com> Date: Wed, 22 Oct 2025 01:25:39 +0900 Subject: [PATCH 1090/1109] Fix quoted property access syntax (e.g., `Core.:(!==)`) (JuliaLang/JuliaLowering.jl#89) * Fix quoted property access syntax (e.g., `Core.:(!==)`) Handle `K"quote"` nodes in property access during macro expansion by unwrapping them before processing. This allows syntax like `Core.:(!==)` to lower correctly, matching the behavior of `Meta.lower`. Co-Authored-By: Claude --- JuliaLowering/src/macro_expansion.jl | 8 ++++++- JuliaLowering/test/quoting.jl | 27 +++++++++++++++++++++++ JuliaLowering/test/quoting_ir.jl | 33 ++++++++++++++++++++++++++++ 3 files changed, 67 insertions(+), 1 deletion(-) diff --git a/JuliaLowering/src/macro_expansion.jl b/JuliaLowering/src/macro_expansion.jl index 84efb44fbc978..d1ca4ed4f98e5 100644 --- a/JuliaLowering/src/macro_expansion.jl +++ b/JuliaLowering/src/macro_expansion.jl @@ -458,7 +458,13 @@ function expand_forms_1(ctx::MacroExpansionContext, ex::SyntaxTree) elseif k == K"module" || k == K"toplevel" || k == K"inert" ex elseif k == K"." && numchildren(ex) == 2 - e2 = expand_forms_1(ctx, ex[2]) + # Handle quoted property access like `x.:(foo)` or `Core.:(!==)` + # Unwrap the quote to get the identifier before expansion + rhs = ex[2] + if kind(rhs) == K"quote" && numchildren(rhs) == 1 + rhs = rhs[1] + end + e2 = expand_forms_1(ctx, rhs) if kind(e2) == K"Identifier" || kind(e2) == K"Placeholder" # FIXME: Do the K"Symbol" transformation in the parser?? e2 = @ast ctx e2 e2=>K"Symbol" diff --git a/JuliaLowering/test/quoting.jl b/JuliaLowering/test/quoting.jl index 80344f0e3bfc0..e312653fae3b1 100644 --- a/JuliaLowering/test/quoting.jl +++ b/JuliaLowering/test/quoting.jl @@ -94,6 +94,33 @@ end @test kind(ex[2]) == K"Identifier" @test ex[2].name_val == "a" +# Test quoted property access syntax like `Core.:(foo)` and `Core.:(!==)` +@test JuliaLowering.include_string(test_mod, """ + x = (a=1, b=2) + x.:(a) +""") == 1 +@test JuliaLowering.include_string(test_mod, """ + Core.:(!==) +""") === (!==) + +# Test quoted operator function definitions (issue #20) +@test JuliaLowering.include_string(test_mod, """ +begin + struct Issue20 + x::Int + end + Base.:(==)(a::Issue20, b::Issue20) = a.x == b.x + Issue20(1) == Issue20(1) +end +""") === true + +@test JuliaLowering.include_string(test_mod, """ +begin + Base.:(<)(a::Issue20, b::Issue20) = a.x < b.x + Issue20(1) < Issue20(2) +end +""") === true + # interpolations at multiple depths ex = JuliaLowering.include_string(test_mod, raw""" let diff --git a/JuliaLowering/test/quoting_ir.jl b/JuliaLowering/test/quoting_ir.jl index 5b323c17e3948..dda1f65f9ff25 100644 --- a/JuliaLowering/test/quoting_ir.jl +++ b/JuliaLowering/test/quoting_ir.jl @@ -42,3 +42,36 @@ quote $$x + 1 # └┘ ── `$` expression outside string or quote block end + +######################################## +# Quoted property access with identifier +Core.:(foo) +#--------------------- +1 TestMod.Core +2 (call top.getproperty %₁ :foo) +3 (return %₂) + +######################################## +# Quoted property access with operator +Core.:(!==) +#--------------------- +1 TestMod.Core +2 (call top.getproperty %₁ :!==) +3 (return %₂) + +######################################## +# Quoted operator function definition (issue #20) +function Base.:(==)() end +#--------------------- +1 TestMod.Base +2 (call top.getproperty %₁ :==) +3 (call core.Typeof %₂) +4 (call core.svec %₃) +5 (call core.svec) +6 SourceLocation::1:10 +7 (call core.svec %₄ %₅ %₆) +8 --- method core.nothing %₇ + slots: [slot₁/#self#(!read)] + 1 (return core.nothing) +9 latestworld +10 (return core.nothing) From 85f03fa059d4062d4c748a7e43a31245b8b955ef Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Wed, 22 Oct 2025 02:54:56 +1000 Subject: [PATCH 1091/1109] Remove default scope layer for desugaring (JuliaLang/JuliaLowering.jl#106) When desugaring introduces a `K"Identifer"` it should always decorate it with an associates scope layer - either adopted from the users code, or an internal layer created on the fly. This ensures desugaring treats hygiene consistently with macro expansion (thus ensuring that desugaring itself is hygienic). --- JuliaLowering/src/ast.jl | 19 +++++++++++++++++++ JuliaLowering/src/macro_expansion.jl | 15 --------------- JuliaLowering/src/runtime.jl | 8 +++++--- JuliaLowering/src/scope_analysis.jl | 9 +-------- 4 files changed, 25 insertions(+), 26 deletions(-) diff --git a/JuliaLowering/src/ast.jl b/JuliaLowering/src/ast.jl index a7b087c17ad56..93f5a7c13f5c3 100644 --- a/JuliaLowering/src/ast.jl +++ b/JuliaLowering/src/ast.jl @@ -70,6 +70,21 @@ Id for scope layers in macro expansion """ const LayerId = Int +""" +A `ScopeLayer` is a mechanism for automatic hygienic macros; every identifier +is assigned to a particular layer and can only match against bindings which are +themselves part of that layer. + +Normal code contains a single scope layer, whereas each macro expansion +generates a new layer. +""" +struct ScopeLayer + id::LayerId + mod::Module + parent_layer::LayerId # Index of parent layer in a macro expansion. Equal to 0 for no parent + is_macro_expansion::Bool # FIXME +end + #------------------------------------------------------------------------------- # AST creation utilities _node_id(graph::SyntaxGraph, ex::SyntaxTree) = (check_compatible_graph(graph, ex); ex._id) @@ -500,6 +515,10 @@ function adopt_scope(ex::SyntaxTree, scope_layer::LayerId) set_scope_layer(ex, ex, scope_layer, true) end +function adopt_scope(ex::SyntaxTree, layer::ScopeLayer) + adopt_scope(ex, layer.id) +end + function adopt_scope(ex::SyntaxTree, ref::SyntaxTree) adopt_scope(ex, ref.scope_layer) end diff --git a/JuliaLowering/src/macro_expansion.jl b/JuliaLowering/src/macro_expansion.jl index d1ca4ed4f98e5..673d71ae8ff79 100644 --- a/JuliaLowering/src/macro_expansion.jl +++ b/JuliaLowering/src/macro_expansion.jl @@ -1,20 +1,5 @@ # Lowering pass 1: Macro expansion, simple normalizations and quote expansion -""" -A `ScopeLayer` is a mechanism for automatic hygienic macros; every identifier -is assigned to a particular layer and can only match against bindings which are -themselves part of that layer. - -Normal code contains a single scope layer, whereas each macro expansion -generates a new layer. -""" -struct ScopeLayer - id::LayerId - mod::Module - parent_layer::LayerId # Index of parent layer in a macro expansion. Equal to 0 for no parent - is_macro_expansion::Bool # FIXME -end - struct MacroExpansionContext{GraphType} <: AbstractLoweringContext graph::GraphType bindings::Bindings diff --git a/JuliaLowering/src/runtime.jl b/JuliaLowering/src/runtime.jl index e6b03cec54440..e7bd83cea8fba 100644 --- a/JuliaLowering/src/runtime.jl +++ b/JuliaLowering/src/runtime.jl @@ -343,9 +343,11 @@ function (g::GeneratedFunctionStub)(world::UInt, source::Method, @nospecialize a macro_world = typemax(UInt) ctx1 = MacroExpansionContext(graph, __module__, false, macro_world) + layer = only(ctx1.scope_layers) + # Run code generator - this acts like a macro expander and like a macro # expander it gets a MacroContext. - mctx = MacroContext(syntax_graph(ctx1), g.srcref, ctx1.scope_layers[end]) + mctx = MacroContext(syntax_graph(ctx1), g.srcref, layer) ex0 = g.gen(mctx, args...) if ex0 isa SyntaxTree if !is_compatible_graph(ctx1, ex0) @@ -370,10 +372,10 @@ function (g::GeneratedFunctionStub)(world::UInt, source::Method, @nospecialize a # Wrap expansion in a non-toplevel lambda and run scope resolution ex2 = @ast ctx2 ex0 [K"lambda"(is_toplevel_thunk=false, toplevel_pure=true) [K"block" - (string(n)::K"Identifier" for n in g.argnames)... + (adopt_scope(string(n)::K"Identifier", layer) for n in g.argnames)... ] [K"block" - (string(n)::K"Identifier" for n in g.spnames)... + (adopt_scope(string(n)::K"Identifier", layer) for n in g.spnames)... ] ex2 ] diff --git a/JuliaLowering/src/scope_analysis.jl b/JuliaLowering/src/scope_analysis.jl index 4d751a895adfa..ce3f0fba23b76 100644 --- a/JuliaLowering/src/scope_analysis.jl +++ b/JuliaLowering/src/scope_analysis.jl @@ -12,16 +12,9 @@ function Base.isless(a::NameKey, b::NameKey) (a.name, a.layer) < (b.name, b.layer) end -# Identifiers produced by lowering will have the following layer by default. -# -# To make new mutable variables without colliding names, lowering can -# - generate new var_id's directly (like the gensyms used by the old system) -# - create additional layers, though this may be unnecessary -const _lowering_internal_layer = -1 - function NameKey(ex::SyntaxTree) @chk kind(ex) == K"Identifier" - NameKey(ex.name_val, get(ex, :scope_layer, _lowering_internal_layer)) + NameKey(ex.name_val, ex.scope_layer) end #------------------------------------------------------------------------------- From 76f4fbc3773f6f54368181fcf7642a328e56091a Mon Sep 17 00:00:00 2001 From: Em Chu <61633163+mlechu@users.noreply.github.com> Date: Tue, 21 Oct 2025 10:58:04 -0700 Subject: [PATCH 1092/1109] Fix `ccall` with no supplied varargs (JuliaLang/JuliaLowering.jl#102) Just an off-by-one in desugaring's argument counting. Test case from Mmap.jl: ``` ccall(:fcntl, Cint, (RawFD, Cint, Cint...), s, F_GETFL) ``` --- JuliaLowering/src/desugaring.jl | 18 ++++++++---------- JuliaLowering/test/misc_ir.jl | 18 +++++++++++++++++- 2 files changed, 25 insertions(+), 11 deletions(-) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 53aa67bff560b..2e9aba254fc1a 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -1749,28 +1749,26 @@ function expand_ccall(ctx, ex) end arg_types = children(arg_type_tuple) vararg_type = nothing + num_required_args = length(arg_types) if length(arg_types) >= 1 va = arg_types[end] if kind(va) == K"..." @chk numchildren(va) == 1 # Ok: vararg function vararg_type = va + if length(arg_types) <= 1 + throw(LoweringError(vararg_type, "C ABI prohibits vararg without one required argument")) + else + num_required_args = length(arg_types) - 1 + end end end # todo: use multi-range errors here - if length(args) < length(arg_types) + if length(args) < num_required_args throw(LoweringError(ex, "Too few arguments in ccall compared to argument types")) elseif length(args) > length(arg_types) && isnothing(vararg_type) throw(LoweringError(ex, "More arguments than types in ccall")) end - if isnothing(vararg_type) - num_required_args = 0 - else - num_required_args = length(arg_types) - 1 - if num_required_args < 1 - throw(LoweringError(vararg_type, "C ABI prohibits vararg without one required argument")) - end - end sctx = with_stmts(ctx) expanded_types = SyntaxList(ctx) for (i, argt) in enumerate(arg_types) @@ -1845,7 +1843,7 @@ function expand_ccall(ctx, ex) expanded_types... ] ] - num_required_args::K"Integer" + (isnothing(vararg_type) ? 0 : num_required_args)::K"Integer" if isnothing(cconv) "ccall"::K"Symbol" else diff --git a/JuliaLowering/test/misc_ir.jl b/JuliaLowering/test/misc_ir.jl index ffffd6e048dd6..a0a231d5843d2 100644 --- a/JuliaLowering/test/misc_ir.jl +++ b/JuliaLowering/test/misc_ir.jl @@ -396,7 +396,7 @@ end 24 (return %₂₃) ######################################## -# @ccall lowering with varargs and gc_safe +# @ccall lowering with gc_safe @ccall foo(x::X; y::Y)::R gc_safe=true #--------------------- 1 JuliaLowering.Base @@ -424,6 +424,22 @@ end 23 (foreigncall :foo (static_eval TestMod.R) (static_eval (call core.svec TestMod.X TestMod.Y)) 1 :($(QuoteNode((:ccall, 0x0000, true)))) %₁₅ %₂₀ %₂₁ %₂₂) 24 (return %₂₃) +######################################## +# non-macro ccall with vararg in signature, but none provided +ccall(:fcntl, Cint, (RawFD, Cint, Cint...), s, F_GETFL) +#--------------------- +1 TestMod.RawFD +2 TestMod.Cint +3 TestMod.Cint +4 TestMod.s +5 (call top.cconvert %₁ %₄) +6 TestMod.F_GETFL +7 (call top.cconvert %₂ %₆) +8 (call top.unsafe_convert %₁ %₅) +9 (call top.unsafe_convert %₂ %₇) +10 (foreigncall :fcntl (static_eval TestMod.Cint) (static_eval (call core.svec TestMod.RawFD TestMod.Cint TestMod.Cint)) 2 :ccall %₈ %₉ %₅ %₇) +11 (return %₁₀) + ######################################## # Error: No return annotation on @ccall @ccall strlen("foo"::Cstring) From f7d6c4828dcf0b8bd23828c045f02d5de3b70c07 Mon Sep 17 00:00:00 2001 From: Em Chu <61633163+mlechu@users.noreply.github.com> Date: Fri, 24 Oct 2025 09:27:05 -0700 Subject: [PATCH 1093/1109] Interpolation and type-stability improvements (JuliaLang/JuliaLowering.jl#105) * Interpolation and type-stability improvements Should be a quick fix for JuliaLang/JuliaLowering.jl#94. Also improve the interpolation algorithm: instead of starting with a copy of the AST and re-scanning the tree for interpolations with each call to `_interpolate_ast`, do one full unconditional pass over the initial tree that copies and interpolates. Also fixes interpolation into QuoteNode in expr compat mode (e.g. `@eval Base.$x`) --------- Co-authored-by: Claire Foster --- JuliaLowering/src/runtime.jl | 79 ++++++++++++------------------- JuliaLowering/src/syntax_graph.jl | 3 +- JuliaLowering/test/quoting.jl | 36 +++++++++++++- 3 files changed, 67 insertions(+), 51 deletions(-) diff --git a/JuliaLowering/src/runtime.jl b/JuliaLowering/src/runtime.jl index e7bd83cea8fba..d5a908aef1772 100644 --- a/JuliaLowering/src/runtime.jl +++ b/JuliaLowering/src/runtime.jl @@ -48,22 +48,9 @@ _syntax_list(ctx::ExprInterpolationContext) = Any[] _interp_makenode(ctx::InterpolationContext, ex, args) = makenode(ctx, ex, ex, args) _interp_makenode(ctx::ExprInterpolationContext, ex, args) = Expr((ex::Expr).head, args...) -_to_syntax_tree(ex::SyntaxTree) = ex -_to_syntax_tree(@nospecialize(ex)) = expr_to_syntaxtree(ex) - - -function _contains_active_interp(ex, depth) - k = _interp_kind(ex) - if k == K"$" && depth == 0 - return true - elseif _numchildren(ex) == 0 - return false - end - inner_depth = k == K"quote" ? depth + 1 : - k == K"$" ? depth - 1 : - depth - return any(_contains_active_interp(c, inner_depth) for c in _children(ex)) -end +_is_leaf(ex::SyntaxTree) = is_leaf(ex) +_is_leaf(ex::Expr) = false +_is_leaf(@nospecialize(ex)) = true # Produce interpolated node for `$x` syntax function _interpolated_value(ctx::InterpolationContext, srcref, ex) @@ -86,22 +73,17 @@ function _interpolated_value(::ExprInterpolationContext, _, ex) ex end -function copy_ast(::ExprInterpolationContext, @nospecialize(ex)) - @ccall(jl_copy_ast(ex::Any)::Any) +function _interpolate_ast(ctx::ExprInterpolationContext, ex::QuoteNode, depth) + out = _interpolate_ast(ctx, Expr(:inert, ex.value), depth) + QuoteNode(only(out.args)) end -function _interpolate_ast(ctx, ex, depth) - if ctx.current_index[] > length(ctx.values) || !_contains_active_interp(ex, depth) - return ex - end - - # We have an interpolation deeper in the tree somewhere - expand to an - # expression which performs the interpolation. +function _interpolate_ast(ctx, @nospecialize(ex), depth) + _is_leaf(ex) && return ex k = _interp_kind(ex) inner_depth = k == K"quote" ? depth + 1 : k == K"$" ? depth - 1 : depth - expanded_children = _syntax_list(ctx) for e in _children(ex) @@ -120,7 +102,10 @@ function _interpolate_ast(ctx, ex, depth) _interp_makenode(ctx, ex, expanded_children) end -function _setup_interpolation(::Type{SyntaxTree}, ex, values) +# Produced by expanding K"quote". Must create a copy of the AST. Note that +# wrapping `ex` in an extra node handles the edge case where the root `ex` is +# `$` (our recursion is one step removed due to forms like `($ a b)`.) +function interpolate_ast(::Type{SyntaxTree}, ex::SyntaxTree, values...) # Construct graph for interpolation context. We inherit this from the macro # context where possible by detecting it using __macro_ctx__. This feels # hacky though. @@ -137,34 +122,32 @@ function _setup_interpolation(::Type{SyntaxTree}, ex, values) end end if isnothing(graph) - graph = ensure_attributes(SyntaxGraph(), kind=Kind, syntax_flags=UInt16, source=SourceAttrType, - value=Any, name_val=String, scope_layer=LayerId) + graph = ensure_attributes( + SyntaxGraph(), kind=Kind, syntax_flags=UInt16, source=SourceAttrType, + value=Any, name_val=String, scope_layer=LayerId) end ctx = InterpolationContext(graph, values, Ref(1)) - return ctx -end -function _setup_interpolation(::Type{Expr}, ex, values) - return ExprInterpolationContext(values, Ref(1)) + # We must copy the AST into our context to use it as the source reference of + # generated expressions. + ex1 = copy_ast(ctx, ex) + out = _interpolate_ast(ctx, @ast(ctx, ex1, [K"None" ex1]), 0) + length(children(out)) === 1 || throw( + LoweringError(ex1, "More than one value in bare `\$` expression")) + return only(children(out)) end -function interpolate_ast(::Type{T}, ex, values...) where {T} - ctx = _setup_interpolation(T, ex, values) - - # We must copy the AST into our context to use it as the source reference - # of generated expressions (and in the Expr case at least, to avoid mutation) - ex1 = copy_ast(ctx, ex) - if _interp_kind(ex1) == K"$" - @assert length(values) == 1 - vs = values[1] - if length(vs) > 1 - # :($($(xs...))) where xs is more than length 1 - throw(LoweringError(_to_syntax_tree(ex1), - "More than one value in bare `\$` expression")) +function interpolate_ast(::Type{Expr}, @nospecialize(ex), values...) + ctx = ExprInterpolationContext(values, Ref(1)) + if ex isa Expr && ex.head === :$ + @assert length(values) === 1 + if length(ex.args) !== 1 + throw(LoweringError( + expr_to_syntaxtree(ex), "More than one value in bare `\$` expression")) end - _interpolated_value(ctx, ex1, only(vs)) + only(values[1]) else - _interpolate_ast(ctx, ex1, 0) + _interpolate_ast(ctx, ex, 0) end end diff --git a/JuliaLowering/src/syntax_graph.jl b/JuliaLowering/src/syntax_graph.jl index 767643b8f99dc..2e7f2ec48e903 100644 --- a/JuliaLowering/src/syntax_graph.jl +++ b/JuliaLowering/src/syntax_graph.jl @@ -313,7 +313,7 @@ function JuliaSyntax.head(ex::SyntaxTree) end function JuliaSyntax.kind(ex::SyntaxTree) - ex.kind + ex.kind::JuliaSyntax.Kind end function JuliaSyntax.flags(ex::SyntaxTree) @@ -695,6 +695,7 @@ macro SyntaxTree(ex_old) # 3. Using the current file and line number, dig into the re-parsed tree and # discover the piece of AST which should be returned. ex = _find_SyntaxTree_macro(full_ex, __source__.line) + isnothing(ex) && error("_find_SyntaxTree_macro failed") # 4. Do the first step of JuliaLowering's syntax lowering to get # syntax interpolations to work _, ex1 = expand_forms_1(__module__, ex, false, Base.tls_world_age()) diff --git a/JuliaLowering/test/quoting.jl b/JuliaLowering/test/quoting.jl index e312653fae3b1..93ace74e948f2 100644 --- a/JuliaLowering/test/quoting.jl +++ b/JuliaLowering/test/quoting.jl @@ -27,7 +27,8 @@ end @test sprint(io->showprov(io, ex[1][3], tree=true)) == raw""" (call g z) ├─ (call g z) - │ └─ @ string:3 + │ └─ (call g z) + │ └─ @ string:3 └─ ($ y) └─ @ string:5 """ @@ -184,7 +185,31 @@ let :(:($$(args...))) end """) -@test_throws LoweringError JuliaLowering.eval(test_mod, multi_interp_ex) +@test try + JuliaLowering.eval(test_mod, multi_interp_ex) + nothing +catch exc + @test exc isa LoweringError + sprint(io->Base.showerror(io, exc, show_detail=false)) +end == raw""" +LoweringError: +let + args = (:(x), :(y)) + :(:($$(args...))) +# └─────────┘ ── More than one value in bare `$` expression +end""" + +@test try + JuliaLowering.eval(test_mod, multi_interp_ex, expr_compat_mode=true) + nothing +catch exc + @test exc isa LoweringError + sprint(io->Base.showerror(io, exc, show_detail=false)) +end == raw""" +LoweringError: +No source for expression +└ ── More than one value in bare `$` expression""" +# ^ TODO: Improve error messages involving expr_to_syntaxtree! # Interpolation of SyntaxTree Identifier vs plain Symbol symbol_interp = JuliaLowering.include_string(test_mod, raw""" @@ -246,6 +271,13 @@ end end exs """, expr_compat_mode=true) == Any[Expr(:call, :f, :x, :y, :z), Expr(:call, :f, :x, :y, :z)] + + # Test interpolation into QuoteNode + @test JuliaLowering.include_string(test_mod, raw""" + let x = :push! + @eval Base.$x + end + """; expr_compat_mode=true) == Base.push! end end From 2f2de76a0c6bf371398ceb23683a8be2a05bafe6 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sat, 25 Oct 2025 14:58:52 +1000 Subject: [PATCH 1094/1109] Implement `@ eval` macro for SyntaxTree (JuliaLang/JuliaLowering.jl#107) --- JuliaLowering/src/kinds.jl | 2 +- JuliaLowering/src/syntax_macros.jl | 27 +++++++++++++++++++++++++++ JuliaLowering/test/misc.jl | 17 +++++++++++++++++ JuliaLowering/test/misc_ir.jl | 25 +++++++++++++++++++++++++ 4 files changed, 70 insertions(+), 1 deletion(-) diff --git a/JuliaLowering/src/kinds.jl b/JuliaLowering/src/kinds.jl index bef831db78a29..22a243f12f0d5 100644 --- a/JuliaLowering/src/kinds.jl +++ b/JuliaLowering/src/kinds.jl @@ -165,7 +165,7 @@ function _register_kinds() "new_opaque_closure" # Wrapper for the lambda of around opaque closure methods "opaque_closure_method" - # World age increment + # World age increment (TODO: use top level assertion and only one latestworld kind) "latestworld" "END_IR_KINDS" ]) diff --git a/JuliaLowering/src/syntax_macros.jl b/JuliaLowering/src/syntax_macros.jl index 43c186cdbc4cb..e7e5a1c850d7a 100644 --- a/JuliaLowering/src/syntax_macros.jl +++ b/JuliaLowering/src/syntax_macros.jl @@ -272,6 +272,33 @@ function Base.Experimental.var"@opaque"(__context__::MacroContext, ex) ] end +function _at_eval_code(ctx, srcref, mod, ex) + @ast ctx srcref [K"block" + [K"local" + [K"=" + "eval_result"::K"Identifier" + [K"call" + # TODO: Call "eval"::K"core" here + JuliaLowering.eval::K"Value" + mod + [K"quote" ex] + ] + ] + ] + (::K"latestworld_if_toplevel") + "eval_result"::K"Identifier" + ] +end + +function Base.var"@eval"(__context__::MacroContext, ex) + mod = @ast __context__ __context__.macrocall __context__.scope_layer.mod::K"Value" + _at_eval_code(__context__, __context__.macrocall, mod, ex) +end + +function Base.var"@eval"(__context__::MacroContext, mod, ex) + _at_eval_code(__context__, __context__.macrocall, mod, ex) +end + #-------------------------------------------------------------------------------- # The following `@islocal` and `@inert` are macros for special syntax known to # lowering which don't exist in Base but arguably should. diff --git a/JuliaLowering/test/misc.jl b/JuliaLowering/test/misc.jl index 4c07cc9ca1842..a0c9dba7a0e3c 100644 --- a/JuliaLowering/test/misc.jl +++ b/JuliaLowering/test/misc.jl @@ -20,6 +20,23 @@ let x = [1,2] end """) == [1,2] +@test JuliaLowering.include_string(test_mod, raw""" +let + x = 10 + @eval $x + 2 +end +""") == 12 + +@test JuliaLowering.include_string(test_mod, raw""" +module EvalTest + _some_var = 2 +end +let + x = 10 + @eval EvalTest $x + _some_var +end +""") == 12 + @test JuliaLowering.include_string(test_mod, """ let x=11 20x diff --git a/JuliaLowering/test/misc_ir.jl b/JuliaLowering/test/misc_ir.jl index a0a231d5843d2..775960b87d238 100644 --- a/JuliaLowering/test/misc_ir.jl +++ b/JuliaLowering/test/misc_ir.jl @@ -294,6 +294,31 @@ GC.@preserve a b g() begin body end +######################################## +# @eval without module +@eval $f(x, y) +#--------------------- +1 TestMod.f +2 (call core.tuple %₁) +3 (call JuliaLowering.interpolate_ast SyntaxTree (inert (call ($ f) x y)) %₂) +4 (= slot₁/eval_result (call JuliaLowering.eval TestMod %₃)) +5 latestworld +6 slot₁/eval_result +7 (return %₆) + +######################################## +# @eval with module +@eval mod $f(x, y) +#--------------------- +1 TestMod.mod +2 TestMod.f +3 (call core.tuple %₂) +4 (call JuliaLowering.interpolate_ast SyntaxTree (inert (call ($ f) x y)) %₃) +5 (= slot₁/eval_result (call JuliaLowering.eval %₁ %₄)) +6 latestworld +7 slot₁/eval_result +8 (return %₇) + ######################################## # Juxtaposition 20x From 315d125e365831a0b66108317c908f86356344ae Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Wed, 29 Oct 2025 15:30:19 +1000 Subject: [PATCH 1095/1109] Add scope layer for macro arguments of normally-quoted AST fragments (JuliaLang/JuliaLowering.jl#109) Macros may pull apart an expression (eg, a module expression or the right hand side of a `.` expression) or quote that expression, and we should keep track of the scope where this originated. A particular example is the `@eval` macro. Consider ``` let name = :x @eval A.$name end ``` In this case the right hand side of `.` would normally be quoted (as a plain symbol) but in the case of `@eval` an extra `quote` is added around the expression to make the `name` variable valid unquoted code after quote expansion. In general, macros may pull apart or rearrange what's passed to them, so we can't make the assumption that normally-inert syntax passed to them should go without a scope layer. To fix this, this change adds a scope layer to all ASTs passed to macros. After macro expansion is done, we can then remove the layer from any AST we know is definitely inert to prevent it from interfering with future lowering passes over that quoted code. This helps but isn't a full solution - see JuliaLang/JuliaLowering.jl#111 for further work. --- JuliaLowering/src/eval.jl | 5 ++ JuliaLowering/src/macro_expansion.jl | 33 +++++++++---- JuliaLowering/src/syntax_graph.jl | 8 ++++ JuliaLowering/test/macros.jl | 72 ++++++++++++++++++++++++++++ JuliaLowering/test/quoting_ir.jl | 13 +++++ 5 files changed, 121 insertions(+), 10 deletions(-) diff --git a/JuliaLowering/src/eval.jl b/JuliaLowering/src/eval.jl index 6beaab5ad8895..3add153881b9a 100644 --- a/JuliaLowering/src/eval.jl +++ b/JuliaLowering/src/eval.jl @@ -461,6 +461,11 @@ end _eval(mod, iter) end +# Version of eval() taking `Expr` (or Expr tree leaves of any type) +function eval(mod::Module, ex; opts...) + eval(mod, expr_to_syntaxtree(ex); opts...) +end + if VERSION >= v"1.13.0-DEV.1199" # https://github.com/JuliaLang/julia/pull/59604 function _eval(mod, iter) diff --git a/JuliaLowering/src/macro_expansion.jl b/JuliaLowering/src/macro_expansion.jl index 673d71ae8ff79..d66205548e7df 100644 --- a/JuliaLowering/src/macro_expansion.jl +++ b/JuliaLowering/src/macro_expansion.jl @@ -189,13 +189,9 @@ end function set_macro_arg_hygiene(ctx, ex, layer_ids, layer_idx) k = kind(ex) scope_layer = get(ex, :scope_layer, layer_ids[layer_idx]) - if k == K"module" || k == K"toplevel" || k == K"inert" - makenode(ctx, ex, ex, children(ex); - scope_layer=scope_layer) - elseif k == K"." - makenode(ctx, ex, ex, set_macro_arg_hygiene(ctx, ex[1], layer_ids, layer_idx), ex[2], - scope_layer=scope_layer) - elseif !is_leaf(ex) + if is_leaf(ex) + makeleaf(ctx, ex, ex; scope_layer=scope_layer) + else inner_layer_idx = layer_idx if k == K"escape" inner_layer_idx = layer_idx - 1 @@ -210,8 +206,6 @@ function set_macro_arg_hygiene(ctx, ex, layer_ids, layer_idx) end mapchildren(e->set_macro_arg_hygiene(ctx, e, layer_ids, inner_layer_idx), ctx, ex; scope_layer=scope_layer) - else - makeleaf(ctx, ex, ex; scope_layer=scope_layer) end end @@ -359,6 +353,20 @@ function append_sourceref(ctx, ex, secondary_prov) end end +function remove_scope_layer!(ex) + if !is_leaf(ex) + for c in children(ex) + remove_scope_layer!(c) + end + end + deleteattr!(ex, :scope_layer) + ex +end + +function remove_scope_layer(ctx, ex) + remove_scope_layer!(copy_ast(ctx, ex)) +end + """ Lowering pass 1 @@ -441,7 +449,12 @@ function expand_forms_1(ctx::MacroExpansionContext, ex::SyntaxTree) elseif k == K"macrocall" expand_macro(ctx, ex) elseif k == K"module" || k == K"toplevel" || k == K"inert" - ex + # Remove scope layer information from any inert syntax which survives + # macro expansion so that it doesn't contaminate lowering passes which + # are later run against the quoted code. TODO: This works as a first + # approximation but is incorrect in general. We need to revisit such + # "deferred hygiene" situations (see https://github.com/c42f/JuliaLowering.jl/issues/111) + remove_scope_layer(ctx, ex) elseif k == K"." && numchildren(ex) == 2 # Handle quoted property access like `x.:(foo)` or `Core.:(!==)` # Unwrap the quote to get the identifier before expansion diff --git a/JuliaLowering/src/syntax_graph.jl b/JuliaLowering/src/syntax_graph.jl index 2e7f2ec48e903..c8145aa1b93c3 100644 --- a/JuliaLowering/src/syntax_graph.jl +++ b/JuliaLowering/src/syntax_graph.jl @@ -155,6 +155,10 @@ function setattr!(graph::SyntaxGraph, id; attrs...) end end +function deleteattr!(graph::SyntaxGraph, id::NodeId, name::Symbol) + delete!(getattr(graph, name), id) +end + function Base.getproperty(graph::SyntaxGraph, name::Symbol) # TODO: Remove access to internals? name === :edge_ranges && return getfield(graph, :edge_ranges) @@ -294,6 +298,10 @@ function setattr!(ex::SyntaxTree; attrs...) setattr!(ex._graph, ex._id; attrs...) end +function deleteattr!(ex::SyntaxTree, name::Symbol) + deleteattr!(ex._graph, ex._id, name) +end + # JuliaSyntax tree API function JuliaSyntax.is_leaf(ex::SyntaxTree) diff --git a/JuliaLowering/test/macros.jl b/JuliaLowering/test/macros.jl index 21212a3089577..076af5218fe39 100644 --- a/JuliaLowering/test/macros.jl +++ b/JuliaLowering/test/macros.jl @@ -1,6 +1,8 @@ @testset "macro tests" begin test_mod = Module(:macro_test) +Base.eval(test_mod, :(const var"@ast" = $(JuliaLowering.var"@ast"))) +Base.eval(test_mod, :(const var"@K_str" = $(JuliaLowering.var"@K_str"))) JuliaLowering.include_string(test_mod, raw""" module M @@ -406,4 +408,74 @@ end end +@testset "scope layers for normally-inert ASTs" begin + # Right hand side of `.` + @test JuliaLowering.include_string(test_mod, raw""" + let x = :(hi) + :(A.$x) + end + """) ≈ @ast_ [K"." + "A"::K"Identifier" + "hi"::K"Identifier" + ] + # module + @test JuliaLowering.include_string(test_mod, raw""" + let x = :(AA) + :(module $x + end + ) + end + """) ≈ @ast_ [K"module" + "AA"::K"Identifier" + [K"block" + ] + ] + + # In macro expansion, require that expressions passed in as macro + # *arguments* get the lexical scope of the calling context, even for the + # `x` in `M.$x` where the right hand side of `.` is normally quoted. + @test JuliaLowering.include_string(test_mod, raw""" + let x = :(someglobal) + @eval M.$x + end + """) == "global in module M" + + JuliaLowering.include_string(test_mod, raw""" + let y = 101 + @eval module AA + x = $y + end + end + """) + @test test_mod.AA.x == 101 + + # "Deferred hygiene" in macros which emit quoted code currently doesn't + # work as might be expected. + # + # The old macro system also doesn't handle this - here's the equivalent + # implementation + # macro make_quoted_code(init, y) + # QuoteNode(:(let + # x = "inner x" + # $(esc(init)) + # ($(esc(y)), x) + # end)) + # end + # + # TODO: The following should throw an error rather than producing a + # surprising value, or work "as expected" whatever that is! + JuliaLowering.include_string(test_mod, raw""" + macro make_quoted_code(init, y) + q = :(let + x = "inner x" + $init + ($y, x) + end) + @ast q q [K"inert" q] + end + """) + code = JuliaLowering.include_string(test_mod, """@make_quoted_code(x="outer x", x)""") + @test_broken JuliaLowering.eval(test_mod, code) == ("outer x", "inner x") +end + end diff --git a/JuliaLowering/test/quoting_ir.jl b/JuliaLowering/test/quoting_ir.jl index dda1f65f9ff25..ccc61be3cf796 100644 --- a/JuliaLowering/test/quoting_ir.jl +++ b/JuliaLowering/test/quoting_ir.jl @@ -31,6 +31,19 @@ end 3 (call JuliaLowering.interpolate_ast SyntaxTree (inert (block (quote (block (call-i ($ ($ x)) + 1))))) %₂) 4 (return %₃) +######################################## +# Symbols on `.` right hand side need to be scoped correctly +let x = 1 + :(A.$x) +end +#--------------------- +1 1 +2 (= slot₁/x %₁) +3 slot₁/x +4 (call core.tuple %₃) +5 (call JuliaLowering.interpolate_ast SyntaxTree (inert (. A ($ x))) %₄) +6 (return %₅) + ######################################## # Error: Double escape quote From e5582beef92ea046c202081ecbec2febb93db2d4 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Wed, 29 Oct 2025 15:44:30 +1000 Subject: [PATCH 1096/1109] Use relative import paths in tests (JuliaLang/JuliaLowering.jl#110) For vendoring into Base we need to avoid absolute import paths as in `using JuliaLowering` and `using JuliaSyntax` in the test files as neither of these packages will be top level modules. Thus, replace all occurrences of these with relative import paths except for one central location (currently in util.jl) which can be easily adjusted. --- JuliaLowering/test/branching.jl | 5 ----- JuliaLowering/test/compat.jl | 2 -- JuliaLowering/test/import.jl | 33 +++++++++++++++++++-------------- JuliaLowering/test/macros.jl | 9 +++++++-- JuliaLowering/test/repl_mode.jl | 3 +++ JuliaLowering/test/runtests.jl | 2 -- JuliaLowering/test/scopes_ir.jl | 2 +- JuliaLowering/test/utils.jl | 9 ++++++--- 8 files changed, 36 insertions(+), 29 deletions(-) diff --git a/JuliaLowering/test/branching.jl b/JuliaLowering/test/branching.jl index e0844b8937268..2b7eac29f348c 100644 --- a/JuliaLowering/test/branching.jl +++ b/JuliaLowering/test/branching.jl @@ -4,11 +4,6 @@ test_mod = Module() -Base.eval(test_mod, quote - using JuliaLowering: JuliaLowering, @ast, @chk - using JuliaSyntax -end) - #------------------------------------------------------------------------------- @testset "Tail position" begin diff --git a/JuliaLowering/test/compat.jl b/JuliaLowering/test/compat.jl index 77c769538bc59..a7fce558e9f40 100644 --- a/JuliaLowering/test/compat.jl +++ b/JuliaLowering/test/compat.jl @@ -1,6 +1,4 @@ using Test -using JuliaSyntax -using JuliaLowering const JS = JuliaSyntax const JL = JuliaLowering diff --git a/JuliaLowering/test/import.jl b/JuliaLowering/test/import.jl index 0dc39db83f8cd..74cdd9260149e 100644 --- a/JuliaLowering/test/import.jl +++ b/JuliaLowering/test/import.jl @@ -2,18 +2,7 @@ test_mod = Module() -JuliaLowering.include_string(test_mod, """ - using JuliaSyntax - using JuliaLowering: SyntaxTree - using JuliaLowering: SyntaxTree as st - import JuliaLowering: SyntaxTree as st1, SyntaxTree as st2 -""") -@test test_mod.SyntaxTree === JuliaLowering.SyntaxTree -@test test_mod.st === JuliaLowering.SyntaxTree -@test test_mod.st1 === JuliaLowering.SyntaxTree -@test test_mod.st2 === JuliaLowering.SyntaxTree -@test test_mod.parsestmt === JuliaSyntax.parsestmt - +# Test attributes are correctly set for export/public JuliaLowering.include_string(test_mod, """ x = 1 y = 2 @@ -25,21 +14,37 @@ public y @test Base.ispublic(test_mod, :y) @test !Base.isexported(test_mod, :y) +# Test various forms of `using` C = JuliaLowering.include_string(test_mod, """ module C module D + export x + public y, f + x = [101] + y = [202] + function f() "hi" end end module E - using ...C.D: f + using ..D: f + using ..D + using .D: y as D_y + using .D: x as D_x_2, y as D_y_2 + import .D.y as D_y_3 end end """) @test C.D.f === C.E.f +@test C.D.x === C.E.x +@test C.D.y === C.E.D_y +@test C.D.x === C.E.D_x_2 +@test C.D.y === C.E.D_y_2 +@test C.D.y === C.E.D_y_3 -# Test that `using` F brings in the symbol G immediately +# Test that using F brings in the exported symbol G immediately and that it can +# be used next in the import list. F = JuliaLowering.include_string(test_mod, """ module F export G diff --git a/JuliaLowering/test/macros.jl b/JuliaLowering/test/macros.jl index 076af5218fe39..d92b3243a76b4 100644 --- a/JuliaLowering/test/macros.jl +++ b/JuliaLowering/test/macros.jl @@ -4,10 +4,15 @@ test_mod = Module(:macro_test) Base.eval(test_mod, :(const var"@ast" = $(JuliaLowering.var"@ast"))) Base.eval(test_mod, :(const var"@K_str" = $(JuliaLowering.var"@K_str"))) +# These libraries may either be packages or vendored into Base - need to pull +# them in via relative paths in the `using` statements below. +Base.eval(test_mod, :(const JuliaLowering = $(JuliaLowering))) +Base.eval(test_mod, :(const JuliaSyntax = $(JuliaSyntax))) + JuliaLowering.include_string(test_mod, raw""" module M - using JuliaLowering: JuliaLowering, @ast, @chk, adopt_scope - using JuliaSyntax + using ..JuliaLowering: JuliaLowering, adopt_scope + using ..JuliaSyntax # Introspection macro __MODULE__() diff --git a/JuliaLowering/test/repl_mode.jl b/JuliaLowering/test/repl_mode.jl index 6c0a889b250e6..cf85717c03cbf 100644 --- a/JuliaLowering/test/repl_mode.jl +++ b/JuliaLowering/test/repl_mode.jl @@ -1,3 +1,6 @@ +# JuliaLowering REPL mode: an interactive test utility for lowering code (not +# part of the unit tests) + module JuliaLoweringREPL import ReplMaker diff --git a/JuliaLowering/test/runtests.jl b/JuliaLowering/test/runtests.jl index 5225f7dabc6c2..7451ecb5c179f 100644 --- a/JuliaLowering/test/runtests.jl +++ b/JuliaLowering/test/runtests.jl @@ -1,5 +1,3 @@ -using Test - include("utils.jl") @testset "JuliaLowering.jl" begin diff --git a/JuliaLowering/test/scopes_ir.jl b/JuliaLowering/test/scopes_ir.jl index 7d24698272e31..fc00174f144ff 100644 --- a/JuliaLowering/test/scopes_ir.jl +++ b/JuliaLowering/test/scopes_ir.jl @@ -1,4 +1,4 @@ -using JuliaLowering: @islocal +using .JuliaLowering: @islocal using Base: @locals #******************************************************************************* diff --git a/JuliaLowering/test/utils.jl b/JuliaLowering/test/utils.jl index 9c27117f0d6b3..3460ecdee8906 100644 --- a/JuliaLowering/test/utils.jl +++ b/JuliaLowering/test/utils.jl @@ -1,7 +1,9 @@ +# Shared testing code which should be included before running individual test files. using Test using JuliaLowering using JuliaSyntax + import FileWatching # The following are for docstrings testing. We need to load the REPL module @@ -10,9 +12,9 @@ import FileWatching using Markdown import REPL -using JuliaSyntax: sourcetext, set_numeric_flags +using .JuliaSyntax: sourcetext, set_numeric_flags -using JuliaLowering: +using .JuliaLowering: SyntaxGraph, newnode!, ensure_attributes!, Kind, SourceRef, SyntaxTree, NodeId, makenode, makeleaf, setattr!, sethead!, @@ -153,8 +155,9 @@ end function setup_ir_test_module(preamble) test_mod = Module(:TestMod) - JuliaLowering.include_string(test_mod, preamble) + Base.eval(test_mod, :(const JuliaLowering = $JuliaLowering)) Base.eval(test_mod, :(const var"@ast_" = $(var"@ast_"))) + JuliaLowering.include_string(test_mod, preamble) test_mod end From 3ffedc7b3fbe79cbe465c90e0c43434297d0f756 Mon Sep 17 00:00:00 2001 From: Em Chu <61633163+mlechu@users.noreply.github.com> Date: Fri, 7 Nov 2025 09:15:07 -0800 Subject: [PATCH 1097/1109] Fix "Fix `ccall` with no supplied varargs" (JuliaLang/JuliaLowering.jl#118) --- JuliaLowering/src/desugaring.jl | 44 +++++++++++++------------ JuliaLowering/test/function_calls_ir.jl | 15 +++++---- JuliaLowering/test/misc.jl | 22 +++++++++++++ JuliaLowering/test/misc_ir.jl | 17 +++++----- 4 files changed, 61 insertions(+), 37 deletions(-) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 2e9aba254fc1a..7f6633e25b83b 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -1706,6 +1706,16 @@ function expand_kw_call(ctx, srcref, farg, args, kws) ] end +# Special rule: Any becomes core.Any regardless of the module +# scope, and don't need GC roots. +function expand_ccall_argtype(ctx, ex) + if is_same_identifier_like(ex, "Any") + @ast ctx ex "Any"::K"core" + else + expand_forms_2(ctx, ex) + end +end + # Expand the (sym,lib) argument to ccall/cglobal function expand_C_library_symbol(ctx, ex) expanded = expand_forms_2(ctx, ex) @@ -1749,44 +1759,36 @@ function expand_ccall(ctx, ex) end arg_types = children(arg_type_tuple) vararg_type = nothing - num_required_args = length(arg_types) if length(arg_types) >= 1 va = arg_types[end] if kind(va) == K"..." @chk numchildren(va) == 1 # Ok: vararg function - vararg_type = va - if length(arg_types) <= 1 - throw(LoweringError(vararg_type, "C ABI prohibits vararg without one required argument")) - else - num_required_args = length(arg_types) - 1 + vararg_type = expand_ccall_argtype(ctx, va[1]) + arg_types = arg_types[1:end-1] + if length(arg_types) === 0 + throw(LoweringError(va, "C ABI prohibits vararg without one required argument")) end end end # todo: use multi-range errors here - if length(args) < num_required_args + if length(args) < length(arg_types) throw(LoweringError(ex, "Too few arguments in ccall compared to argument types")) elseif length(args) > length(arg_types) && isnothing(vararg_type) throw(LoweringError(ex, "More arguments than types in ccall")) end sctx = with_stmts(ctx) expanded_types = SyntaxList(ctx) - for (i, argt) in enumerate(arg_types) + for argt in arg_types if kind(argt) == K"..." - if i == length(arg_types) - argt = argt[1] - else - throw(LoweringError(argt, "only the trailing ccall argument type should have `...`")) - end - end - if is_same_identifier_like(argt, "Any") - # Special rule: Any becomes core.Any regardless of the module - # scope, and don't need GC roots. - argt = @ast ctx argt "Any"::K"core" + throw(LoweringError(argt, "only the trailing ccall argument type should have `...`")) end - push!(expanded_types, expand_forms_2(ctx, argt)) + push!(expanded_types, expand_ccall_argtype(ctx, argt)) end - # + for _ in length(arg_types)+1:length(args) + push!(expanded_types, vararg_type) + end + # An improvement might be wrap the use of types in cconvert in a special # K"global_scope" expression which modifies the scope resolution. This # would at least make the rules self consistent if not pretty. @@ -1843,7 +1845,7 @@ function expand_ccall(ctx, ex) expanded_types... ] ] - (isnothing(vararg_type) ? 0 : num_required_args)::K"Integer" + (isnothing(vararg_type) ? 0 : length(arg_types))::K"Integer" if isnothing(cconv) "ccall"::K"Symbol" else diff --git a/JuliaLowering/test/function_calls_ir.jl b/JuliaLowering/test/function_calls_ir.jl index f2772a65d6967..8530dfa5d19f6 100644 --- a/JuliaLowering/test/function_calls_ir.jl +++ b/JuliaLowering/test/function_calls_ir.jl @@ -406,14 +406,15 @@ ccall(:printf, Cint, (Cstring, Cstring...), "%s = %s\n", "2 + 2", "5") #--------------------- 1 TestMod.Cstring 2 TestMod.Cstring -3 (call top.cconvert %₁ "%s = %s\n") -4 (call top.cconvert %₂ "2 + 2") -5 (call top.cconvert %₂ "5") -6 (call top.unsafe_convert %₁ %₃) -7 (call top.unsafe_convert %₂ %₄) +3 TestMod.Cstring +4 (call top.cconvert %₁ "%s = %s\n") +5 (call top.cconvert %₂ "2 + 2") +6 (call top.cconvert %₃ "5") +7 (call top.unsafe_convert %₁ %₄) 8 (call top.unsafe_convert %₂ %₅) -9 (foreigncall :printf (static_eval TestMod.Cint) (static_eval (call core.svec TestMod.Cstring TestMod.Cstring TestMod.Cstring)) 1 :ccall %₆ %₇ %₈ %₃ %₄ %₅) -10 (return %₉) +9 (call top.unsafe_convert %₃ %₆) +10 (foreigncall :printf (static_eval TestMod.Cint) (static_eval (call core.svec TestMod.Cstring TestMod.Cstring TestMod.Cstring)) 1 :ccall %₇ %₈ %₉ %₄ %₅ %₆) +11 (return %₁₀) ######################################## # Error: ccall with too few arguments diff --git a/JuliaLowering/test/misc.jl b/JuliaLowering/test/misc.jl index a0c9dba7a0e3c..fea2d10bbe39d 100644 --- a/JuliaLowering/test/misc.jl +++ b/JuliaLowering/test/misc.jl @@ -47,6 +47,28 @@ end @test JuliaLowering.include_string(test_mod, """ ccall(:strlen, Csize_t, (Cstring,), "asdfg") """) == 5 +@test JuliaLowering.include_string(test_mod, """ +function cvarargs_0() + strp = Ref{Ptr{Cchar}}(0) + fmt = "hi" + len = ccall(:asprintf, Cint, (Ptr{Ptr{Cchar}}, Cstring, Cfloat...), strp, fmt) + str = unsafe_string(strp[], len) + Libc.free(strp[]) + return str +end +""") isa Function +@test test_mod.cvarargs_0() == "hi" +@test JuliaLowering.include_string(test_mod, """ +function cvarargs_2(arg1::Float64, arg2::Float64) + strp = Ref{Ptr{Cchar}}(0) + fmt = "%3.1f %3.1f" + len = ccall(:asprintf, Cint, (Ptr{Ptr{Cchar}}, Cstring, Cfloat...), strp, fmt, arg1, arg2) + str = unsafe_string(strp[], len) + Libc.free(strp[]) + return str +end +""") isa Function +@test test_mod.cvarargs_2(1.1, 2.2) == "1.1 2.2" # cfunction JuliaLowering.include_string(test_mod, """ diff --git a/JuliaLowering/test/misc_ir.jl b/JuliaLowering/test/misc_ir.jl index 775960b87d238..23ec84f26c4ae 100644 --- a/JuliaLowering/test/misc_ir.jl +++ b/JuliaLowering/test/misc_ir.jl @@ -455,15 +455,14 @@ ccall(:fcntl, Cint, (RawFD, Cint, Cint...), s, F_GETFL) #--------------------- 1 TestMod.RawFD 2 TestMod.Cint -3 TestMod.Cint -4 TestMod.s -5 (call top.cconvert %₁ %₄) -6 TestMod.F_GETFL -7 (call top.cconvert %₂ %₆) -8 (call top.unsafe_convert %₁ %₅) -9 (call top.unsafe_convert %₂ %₇) -10 (foreigncall :fcntl (static_eval TestMod.Cint) (static_eval (call core.svec TestMod.RawFD TestMod.Cint TestMod.Cint)) 2 :ccall %₈ %₉ %₅ %₇) -11 (return %₁₀) +3 TestMod.s +4 (call top.cconvert %₁ %₃) +5 TestMod.F_GETFL +6 (call top.cconvert %₂ %₅) +7 (call top.unsafe_convert %₁ %₄) +8 (call top.unsafe_convert %₂ %₆) +9 (foreigncall :fcntl (static_eval TestMod.Cint) (static_eval (call core.svec TestMod.RawFD TestMod.Cint)) 2 :ccall %₇ %₈ %₄ %₆) +10 (return %₉) ######################################## # Error: No return annotation on @ccall From 3dff0a0fcfc8c5109ccfc960ea53409c069ca716 Mon Sep 17 00:00:00 2001 From: Sam Schweigel Date: Fri, 7 Nov 2025 19:37:19 -0800 Subject: [PATCH 1098/1109] Apply suggestions from code review for JuliaLang/JuliaLowering.jl#87 (JuliaLang/JuliaLowering.jl#117) Co-authored-by: Claire Foster --- JuliaLowering/src/closure_conversion.jl | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/JuliaLowering/src/closure_conversion.jl b/JuliaLowering/src/closure_conversion.jl index d40d944db13f1..1dc4ca2bc8d9d 100644 --- a/JuliaLowering/src/closure_conversion.jl +++ b/JuliaLowering/src/closure_conversion.jl @@ -130,12 +130,10 @@ function make_globaldecl(ctx, src_ex, mod, name, strong=false, type=nothing; ret [K"call" "declare_global"::K"core" mod::K"Value" name::K"Symbol" strong::K"Bool" - if type !== nothing - type - end + type ] [K"latestworld"] - @ast ctx src_ex [K"removable" "nothing"::K"core"] + "nothing"::K"core" ] if ctx.is_toplevel_seq_point return decl @@ -146,7 +144,7 @@ function make_globaldecl(ctx, src_ex, mod, name, strong=false, type=nothing; ret if ret_nothing nothing else - @ast ctx src_ex [K"removable" "nothing"::K"core"] + @ast ctx src_ex "nothing"::K"core" end end From 0ed8343a84ed5782f0a8ec6136a5f1cf437accd3 Mon Sep 17 00:00:00 2001 From: Em Chu <61633163+mlechu@users.noreply.github.com> Date: Fri, 7 Nov 2025 19:53:22 -0800 Subject: [PATCH 1099/1109] Pass `expr_compat_mode` into the new `@eval` (JuliaLang/JuliaLowering.jl#113) --- JuliaLowering/src/macro_expansion.jl | 3 ++- JuliaLowering/src/runtime.jl | 2 +- JuliaLowering/src/syntax_macros.jl | 6 +++++ JuliaLowering/test/misc.jl | 11 ++++++++ JuliaLowering/test/misc_ir.jl | 40 +++++++++++++++++----------- 5 files changed, 45 insertions(+), 17 deletions(-) diff --git a/JuliaLowering/src/macro_expansion.jl b/JuliaLowering/src/macro_expansion.jl index d66205548e7df..6ea642e376508 100644 --- a/JuliaLowering/src/macro_expansion.jl +++ b/JuliaLowering/src/macro_expansion.jl @@ -75,6 +75,7 @@ struct MacroContext <: AbstractLoweringContext graph::SyntaxGraph macrocall::Union{SyntaxTree,LineNumberNode,SourceRef} scope_layer::ScopeLayer + expr_compat_mode::Bool end function adopt_scope(ex, ctx::MacroContext) @@ -257,7 +258,7 @@ function expand_macro(ctx, ex) @assert kind(ex) == K"macrocall" macname = ex[1] - mctx = MacroContext(ctx.graph, ex, current_layer(ctx)) + mctx = MacroContext(ctx.graph, ex, current_layer(ctx), ctx.expr_compat_mode) macfunc = eval_macro_name(ctx, mctx, macname) raw_args = ex[2:end] macro_loc = let loc = source_location(LineNumberNode, ex) diff --git a/JuliaLowering/src/runtime.jl b/JuliaLowering/src/runtime.jl index d5a908aef1772..c98b71639a743 100644 --- a/JuliaLowering/src/runtime.jl +++ b/JuliaLowering/src/runtime.jl @@ -330,7 +330,7 @@ function (g::GeneratedFunctionStub)(world::UInt, source::Method, @nospecialize a # Run code generator - this acts like a macro expander and like a macro # expander it gets a MacroContext. - mctx = MacroContext(syntax_graph(ctx1), g.srcref, layer) + mctx = MacroContext(syntax_graph(ctx1), g.srcref, layer, false) ex0 = g.gen(mctx, args...) if ex0 isa SyntaxTree if !is_compatible_graph(ctx1, ex0) diff --git a/JuliaLowering/src/syntax_macros.jl b/JuliaLowering/src/syntax_macros.jl index e7e5a1c850d7a..a08ddde1fba67 100644 --- a/JuliaLowering/src/syntax_macros.jl +++ b/JuliaLowering/src/syntax_macros.jl @@ -282,6 +282,12 @@ function _at_eval_code(ctx, srcref, mod, ex) JuliaLowering.eval::K"Value" mod [K"quote" ex] + [K"parameters" + [K"=" + "expr_compat_mode"::K"Identifier" + ctx.expr_compat_mode::K"Bool" + ] + ] ] ] ] diff --git a/JuliaLowering/test/misc.jl b/JuliaLowering/test/misc.jl index fea2d10bbe39d..12786b30b2205 100644 --- a/JuliaLowering/test/misc.jl +++ b/JuliaLowering/test/misc.jl @@ -202,4 +202,15 @@ end end +# SyntaxTree @eval should pass along expr_compat_mode +@test JuliaLowering.include_string(test_mod, "@eval quote x end"; + expr_compat_mode=false) isa SyntaxTree +@test JuliaLowering.include_string(test_mod, "@eval quote x end"; + expr_compat_mode=true) isa Expr +@test JuliaLowering.include_string(test_mod, raw""" + let T = :foo + @eval @doc $"This is a $T" $T = 1 + end +"""; expr_compat_mode=true) === 1 + end diff --git a/JuliaLowering/test/misc_ir.jl b/JuliaLowering/test/misc_ir.jl index 23ec84f26c4ae..436ff984537ba 100644 --- a/JuliaLowering/test/misc_ir.jl +++ b/JuliaLowering/test/misc_ir.jl @@ -298,26 +298,36 @@ end # @eval without module @eval $f(x, y) #--------------------- -1 TestMod.f -2 (call core.tuple %₁) -3 (call JuliaLowering.interpolate_ast SyntaxTree (inert (call ($ f) x y)) %₂) -4 (= slot₁/eval_result (call JuliaLowering.eval TestMod %₃)) -5 latestworld -6 slot₁/eval_result -7 (return %₆) +1 JuliaLowering.eval +2 (call core.tuple :expr_compat_mode) +3 (call core.apply_type core.NamedTuple %₂) +4 (call core.tuple false) +5 (call %₃ %₄) +6 TestMod.f +7 (call core.tuple %₆) +8 (call JuliaLowering.interpolate_ast SyntaxTree (inert (call ($ f) x y)) %₇) +9 (= slot₁/eval_result (call core.kwcall %₅ %₁ TestMod %₈)) +10 latestworld +11 slot₁/eval_result +12 (return %₁₁) ######################################## # @eval with module @eval mod $f(x, y) #--------------------- -1 TestMod.mod -2 TestMod.f -3 (call core.tuple %₂) -4 (call JuliaLowering.interpolate_ast SyntaxTree (inert (call ($ f) x y)) %₃) -5 (= slot₁/eval_result (call JuliaLowering.eval %₁ %₄)) -6 latestworld -7 slot₁/eval_result -8 (return %₇) +1 JuliaLowering.eval +2 (call core.tuple :expr_compat_mode) +3 (call core.apply_type core.NamedTuple %₂) +4 (call core.tuple false) +5 (call %₃ %₄) +6 TestMod.mod +7 TestMod.f +8 (call core.tuple %₇) +9 (call JuliaLowering.interpolate_ast SyntaxTree (inert (call ($ f) x y)) %₈) +10 (= slot₁/eval_result (call core.kwcall %₅ %₁ %₆ %₉)) +11 latestworld +12 slot₁/eval_result +13 (return %₁₂) ######################################## # Juxtaposition From 90d619b64e115be592646ce29c2a3fd71f5f8d93 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Sun, 9 Nov 2025 20:45:47 +0100 Subject: [PATCH 1100/1109] Fix InexactError in peek_behind_pos when skipping nested trivia nodes (JuliaLang/JuliaSyntax.jl#604) --- JuliaSyntax/src/core/parse_stream.jl | 8 +++++++- JuliaSyntax/test/parse_stream.jl | 10 ++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/JuliaSyntax/src/core/parse_stream.jl b/JuliaSyntax/src/core/parse_stream.jl index 393e23c86c075..7bcd5745b8a48 100644 --- a/JuliaSyntax/src/core/parse_stream.jl +++ b/JuliaSyntax/src/core/parse_stream.jl @@ -635,8 +635,14 @@ function peek_behind_pos(stream::ParseStream; skip_trivia::Bool=true, while node_idx > 0 node = stream.output[node_idx] if kind(node) == K"TOMBSTONE" || (skip_trivia && is_trivia(node)) - node_idx -= 1 byte_idx -= node.byte_span + # If this is a non-terminal node, skip its children without + # subtracting their byte_spans, as they're already included in the parent + if is_non_terminal(node) + node_idx -= (1 + node.node_span) + else + node_idx -= 1 + end else break end diff --git a/JuliaSyntax/test/parse_stream.jl b/JuliaSyntax/test/parse_stream.jl index 0eca59b794e3f..43b16ab5c885f 100644 --- a/JuliaSyntax/test/parse_stream.jl +++ b/JuliaSyntax/test/parse_stream.jl @@ -156,3 +156,13 @@ end @test ParseStream(y) isa ParseStream @test parsestmt(Expr, y) == parsestmt(Expr, "1") end + +@testset "peek_behind_pos with negative byte index" begin + # Test that peek_behind_pos doesn't cause InexactError when byte_idx goes negative + # This can happen when parsing certain incomplete keywords like "do" + # where trivia skipping walks back past the beginning of the stream + @test_throws JuliaSyntax.ParseError parseall(GreenNode, "do") + @test_throws JuliaSyntax.ParseError parseall(GreenNode, "do ") + @test_throws JuliaSyntax.ParseError parseall(GreenNode, " do") + @test_throws JuliaSyntax.ParseError parseall(GreenNode, "do\n") +end From 79d96c2c21a1175ef5ec62f658612842b1cae531 Mon Sep 17 00:00:00 2001 From: Em Chu <61633163+mlechu@users.noreply.github.com> Date: Mon, 10 Nov 2025 13:07:04 -0800 Subject: [PATCH 1101/1109] Fix `@nospecialize` with zero args (JuliaLang/JuliaLowering.jl#112) --- JuliaLowering/src/compat.jl | 57 ++++++++++++++++----------------- JuliaLowering/test/macros_ir.jl | 22 +++++++++++++ JuliaLowering/test/utils.jl | 7 ++-- 3 files changed, 55 insertions(+), 31 deletions(-) diff --git a/JuliaLowering/src/compat.jl b/JuliaLowering/src/compat.jl index e9800d0a7ab48..133fb55151bcb 100644 --- a/JuliaLowering/src/compat.jl +++ b/JuliaLowering/src/compat.jl @@ -80,7 +80,7 @@ end """ Return `e.args`, but with any parameters in SyntaxTree (flattened, source) order. -Parameters are expected to be as `e.args[pos]`. +Parameters are expected to be at `e.args[pos]`. e.g. orderings of (a,b,c;d;e;f): Expr: (tuple (parameters (parameters (parameters f) e) d) a b c) @@ -463,36 +463,35 @@ function _insert_convert_expr(@nospecialize(e), graph::SyntaxGraph, src::SourceA if e.args[1] isa Expr && e.args[1].head === :purity st_k = K"meta" child_exprs = [Expr(:quoted_symbol, :purity), Base.EffectsOverride(e.args[1].args...)] - else - @assert e.args[1] isa Symbol - if e.args[1] === :nospecialize - if nargs > 2 - st_k = K"block" - # Kick the can down the road (should only be simple atoms?) - child_exprs = map(c->Expr(:meta, :nospecialize, c), child_exprs[2:end]) - else - st_id, src = _insert_convert_expr(e.args[2], graph, src) - setmeta!(SyntaxTree(graph, st_id); nospecialize=true) - return st_id, src - end - elseif e.args[1] in (:inline, :noinline, :generated, :generated_only, - :max_methods, :optlevel, :toplevel, :push_loc, :pop_loc, - :no_constprop, :aggressive_constprop, :specialize, :compile, :infer, - :nospecializeinfer, :force_compile, :propagate_inbounds, :doc) - # TODO: Some need to be handled in lowering - for (i, ma) in enumerate(e.args) - if ma isa Symbol - # @propagate_inbounds becomes (meta inline - # propagate_inbounds), but usually(?) only args[1] is - # converted here - child_exprs[i] = Expr(:quoted_symbol, e.args[i]) - end + elseif nargs === 0 + # pass + elseif e.args[1] === :nospecialize + if nargs === 1 + child_exprs[1] = Expr(:quoted_symbol, :nospecialize) + elseif nargs > 2 + st_k = K"block" + # Kick the can down the road (should only be simple atoms?) + child_exprs = map(c->Expr(:meta, :nospecialize, c), child_exprs[2:end]) + elseif nargs === 2 + st_id, src = _insert_convert_expr(e.args[2], graph, src) + setmeta!(SyntaxTree(graph, st_id); nospecialize=true) + return st_id, src + end + elseif e.args[1] in (:inline, :noinline, :generated, :generated_only, + :max_methods, :optlevel, :toplevel, :push_loc, :pop_loc, + :no_constprop, :aggressive_constprop, :specialize, :compile, :infer, + :nospecializeinfer, :force_compile, :propagate_inbounds, :doc) + # TODO: Some need to be handled in lowering + for (i, ma) in enumerate(e.args) + if ma isa Symbol + # @propagate_inbounds becomes (meta inline propagate_inbounds) + child_exprs[i] = Expr(:quoted_symbol, e.args[i]) end - else - # Can't throw a hard error; it is explicitly tested that meta can take arbitrary keys. - @error("Unknown meta form at $src: `$e`\n$(sprint(dump, e))") - child_exprs[1] = Expr(:quoted_symbol, e.args[1]) end + else + # Can't throw a hard error; it is explicitly tested that meta can take arbitrary keys. + @error("Unknown meta form at $src: `$e`\n$(sprint(dump, e))") + child_exprs[1] = Expr(:quoted_symbol, e.args[1]) end elseif e.head === :scope_layer @assert nargs === 2 diff --git a/JuliaLowering/test/macros_ir.jl b/JuliaLowering/test/macros_ir.jl index 183dce3944b35..2889023a14b3b 100644 --- a/JuliaLowering/test/macros_ir.jl +++ b/JuliaLowering/test/macros_ir.jl @@ -186,3 +186,25 @@ cmdmac`hello` cmdmac`hello`12345 #--------------------- 1 (return "hello from cmdmac with suffix 12345") + +######################################## +# @nospecialize (zero args) +function foo() + @nospecialize +end +#--------------------- +1 (method TestMod.foo) +2 latestworld +3 TestMod.foo +4 (call core.Typeof %₃) +5 (call core.svec %₄) +6 (call core.svec) +7 SourceLocation::1:10 +8 (call core.svec %₅ %₆ %₇) +9 --- method core.nothing %₈ + slots: [slot₁/#self#(!read)] + 1 (meta :nospecialize) + 2 (return core.nothing) +10 latestworld +11 TestMod.foo +12 (return %₁₁) diff --git a/JuliaLowering/test/utils.jl b/JuliaLowering/test/utils.jl index 3460ecdee8906..16f2f30294ffe 100644 --- a/JuliaLowering/test/utils.jl +++ b/JuliaLowering/test/utils.jl @@ -114,7 +114,10 @@ function uncomment_description(desc) end function comment_description(desc) - replace(desc, r"^"m=>"# ") + lines = replace(split(desc, '\n')) do line + strip("# " * line) + end + join(lines, '\n') end function match_ir_test_case(case_str) @@ -231,7 +234,7 @@ function refresh_ir_test_cases(filename, pattern=nothing) else ir = case.output end - println(io, + (case == cases[end] ? print : println)(io, """ ######################################## $(comment_description(case.description)) From a2ee6c5087e3a64312d2c6fabb18424252820f54 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Tue, 11 Nov 2025 11:32:56 +0100 Subject: [PATCH 1102/1109] Fix multiline function signature parsing (JuliaLang/JuliaSyntax.jl#580) --- JuliaSyntax/src/julia/parser.jl | 17 +++++++++++------ JuliaSyntax/test/parser.jl | 7 +++++++ 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/JuliaSyntax/src/julia/parser.jl b/JuliaSyntax/src/julia/parser.jl index 70a345057a56b..4142a010bb6b9 100644 --- a/JuliaSyntax/src/julia/parser.jl +++ b/JuliaSyntax/src/julia/parser.jl @@ -2199,13 +2199,18 @@ function parse_function_signature(ps::ParseState, is_function::Bool) is_empty_tuple = peek(ps, skip_newlines=true) == K")" opts = parse_brackets(ps, K")") do had_commas, had_splat, num_semis, num_subexprs _parsed_call = was_eventually_call(ps) - _needs_parse_call = peek(ps, 2) ∈ KSet"( ." + _maybe_grouping_parens = !had_commas && !had_splat && num_semis == 0 && num_subexprs == 1 + # Skip intervening newlines only when the parentheses hold a single + # expression, which is the ambiguous case between a name like (::T) + # and an anonymous function parameter list. + next_kind = peek(ps, 2, skip_newlines=_maybe_grouping_parens) + _needs_parse_call = next_kind ∈ KSet"( ." _is_anon_func = (!_needs_parse_call && !_parsed_call) || had_commas - return (needs_parameters = _is_anon_func, - is_anon_func = _is_anon_func, - parsed_call = _parsed_call, - needs_parse_call = _needs_parse_call, - maybe_grouping_parens = !had_commas && !had_splat && num_semis == 0 && num_subexprs == 1) + return (needs_parameters = _is_anon_func, + is_anon_func = _is_anon_func, + parsed_call = _parsed_call, + needs_parse_call = _needs_parse_call, + maybe_grouping_parens = _maybe_grouping_parens) end is_anon_func = opts.is_anon_func parsed_call = opts.parsed_call diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl index 4aa8652858313..6c66ea40123e0 100644 --- a/JuliaSyntax/test/parser.jl +++ b/JuliaSyntax/test/parser.jl @@ -619,6 +619,13 @@ tests = [ "function (::g(x))() end" => "(function (call (parens (::-pre (call g x)))) (block))" "function (f::T{g(i)})() end" => "(function (call (parens (::-i f (curly T (call g i))))) (block))" "function (::T)() end" => "(function (call (parens (::-pre T))) (block))" + "function (\n ::T\n )() end" => "(function (call (parens (::-pre T))) (block))" + "function (\n x::T\n )() end" => "(function (call (parens (::-i x T))) (block))" + "function (\n f\n )() end" => "(function (call (parens f)) (block))" + "function (\n A\n ).f() end" => "(function (call (. (parens A) f)) (block))" + "function (\n ::T\n )(x, y) end" => "(function (call (parens (::-pre T)) x y) (block))" + "function (\n f::T{g(i)}\n )() end" => "(function (call (parens (::-i f (curly T (call g i))))) (block))" + "function (\n x, y\n ) x + y end" => "(function (tuple-p x y) (block (call-i x + y)))" "function (:*=(f))() end" => "(function (call (parens (call (quote-: *=) f))) (block))" "function begin() end" => "(function (call (error begin)) (block))" "function f() end" => "(function (call f) (block))" From 91ae546200ce8dea33f85c38eb36c5897ada6618 Mon Sep 17 00:00:00 2001 From: Em Chu <61633163+mlechu@users.noreply.github.com> Date: Wed, 12 Nov 2025 08:48:13 -0800 Subject: [PATCH 1103/1109] Support curly outer constructor (JuliaLang/JuliaLowering.jl#116) * Support curly outer constructor * Fixes and tests Co-authored-by: Claire Foster --------- Co-authored-by: Claire Foster --- JuliaLowering/src/desugaring.jl | 8 +++++ JuliaLowering/test/function_calls_ir.jl | 10 ++++++ JuliaLowering/test/typedefs.jl | 45 +++++++++++++++++++++++++ JuliaLowering/test/typedefs_ir.jl | 22 ++++++++++++ 4 files changed, 85 insertions(+) diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl index 7f6633e25b83b..30b10d2dbf766 100644 --- a/JuliaLowering/src/desugaring.jl +++ b/JuliaLowering/src/desugaring.jl @@ -2971,6 +2971,14 @@ function expand_function_def(ctx, ex, docs, rewrite_call=identity, rewrite_body= self_type = name[2] end doc_obj = self_type + elseif kind(name) == K"curly" + @chk numchildren(name) >= 2 + self_type = @ast ctx ex [K"function_type" + expand_forms_2(ctx, expand_curly(ctx, name))] + name = name[1] + is_invalid_func_name(name) && throw(LoweringError(name, "Invalid function name")) + doc_obj = name + name_str = get(kind(name) == K"." ? name[2] : name, :name_val, nothing) else if kind(name) == K"Placeholder" # Anonymous function. In this case we may use an ssavar for the diff --git a/JuliaLowering/test/function_calls_ir.jl b/JuliaLowering/test/function_calls_ir.jl index 8530dfa5d19f6..1426ed228ddc8 100644 --- a/JuliaLowering/test/function_calls_ir.jl +++ b/JuliaLowering/test/function_calls_ir.jl @@ -598,6 +598,16 @@ function A.ccall() # └─────┘ ── Invalid function name end +######################################## +# Error: Invalid function name ccall +function ccall{<:T}() +end +#--------------------- +LoweringError: +function ccall{<:T}() +# └───┘ ── Invalid function name +end + ######################################## # Nested splat: simple case tuple((xs...)...) diff --git a/JuliaLowering/test/typedefs.jl b/JuliaLowering/test/typedefs.jl index be509eeb88156..f995d0ed1c9b5 100644 --- a/JuliaLowering/test/typedefs.jl +++ b/JuliaLowering/test/typedefs.jl @@ -128,6 +128,51 @@ let s = test_mod.S5{Any}(42.0, "hi") @test s.x === 42.0 @test s.y == "hi" end +@test JuliaLowering.include_string(test_mod, """ +function S5{Int}(x::Int) + S5(x, x) +end +""") === nothing +let s = test_mod.S5{Int}(1) + @test s.x === 1 + @test s.y === 1 + @test s isa test_mod.S5{Int} +end +@test_throws MethodError test_mod.S5{Int}(1.1) +@test JuliaLowering.include_string(test_mod, """ +function S5{T}(x, y, z) where {T<:AbstractFloat} + S5(x, x) +end +""") === nothing +let s = test_mod.S5{Float64}(Float64(1.1), 0, 0) + @test s.x === 1.1 + @test s.y === 1.1 + @test s isa test_mod.S5{Float64} +end +@test JuliaLowering.include_string(test_mod, """ +S5{<:AbstractFloat}(x) = S5(x, x) +""") === nothing +let s = test_mod.S5{<:AbstractFloat}(Float64(1.1)) + @test s.x === 1.1 + @test s.y === 1.1 + @test s isa test_mod.S5{Float64} +end +@test JuliaLowering.include_string(test_mod, """ +S5{T}(x::T) where {T<:Real} = S5(x, x) +""") === nothing +let s = test_mod.S5{Real}(pi) + @test s.x === pi + @test s.y === pi + @test s isa test_mod.S5{<:Real} +end +outer_mod = Module() +@test JuliaLowering.include_string(test_mod, """ +Base.Vector{T}(x::T) where {S5<:T<:S5} = T[x] +""") === nothing +let v = Base.Vector{test_mod.S5}(test_mod.S5(1,1)) + @test v isa Vector{test_mod.S5} + @test v[1] === test_mod.S5(1,1) +end # User defined inner constructors and helper functions for structs without type params @test JuliaLowering.include_string(test_mod, """ diff --git a/JuliaLowering/test/typedefs_ir.jl b/JuliaLowering/test/typedefs_ir.jl index 260e3c2f4bfab..280f2719d6d6c 100644 --- a/JuliaLowering/test/typedefs_ir.jl +++ b/JuliaLowering/test/typedefs_ir.jl @@ -1346,3 +1346,25 @@ function f() end #─────┘ ── this syntax is only allowed in top level code end + +######################################## +# Constructor with type parameter +A{<:Real}() = A(1) +#--------------------- +1 TestMod.Real +2 (call core.TypeVar :#T1 %₁) +3 TestMod.A +4 (call core.apply_type %₃ %₂) +5 (call core.UnionAll %₂ %₄) +6 (call core.Typeof %₅) +7 (call core.svec %₆) +8 (call core.svec) +9 SourceLocation::1:1 +10 (call core.svec %₇ %₈ %₉) +11 --- method core.nothing %₁₀ + slots: [slot₁/#self#(!read)] + 1 TestMod.A + 2 (call %₁ 1) + 3 (return %₂) +12 latestworld +13 (return core.nothing) From bcc6aa8e10067e57e4eecfe7785d4092f7e921eb Mon Sep 17 00:00:00 2001 From: Cody Tapscott <84105208+topolarity@users.noreply.github.com> Date: Fri, 14 Nov 2025 11:34:55 -0500 Subject: [PATCH 1104/1109] Fixes for 32-bit (JuliaLang/JuliaLowering.jl#119) Co-authored-by: Em Chu --- JuliaLowering/src/eval.jl | 2 +- JuliaLowering/src/hooks.jl | 2 +- JuliaLowering/test/misc.jl | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/JuliaLowering/src/eval.jl b/JuliaLowering/src/eval.jl index 3add153881b9a..cef07133b2f5e 100644 --- a/JuliaLowering/src/eval.jl +++ b/JuliaLowering/src/eval.jl @@ -132,7 +132,7 @@ else conversions = [:(convert($t, $n)) for (t,n) in zip(fts, fns)] expected_fns = (:code, :debuginfo, :ssavaluetypes, :ssaflags, :slotnames, :slotflags, :slottypes, :rettype, :parent, :edges, :min_world, :max_world, :method_for_inference_limit_heuristics, :nargs, :propagate_inbounds, :has_fcall, :has_image_globalref, :nospecializeinfer, :isva, :inlining, :constprop, :purity, :inlining_cost) - expected_fts = (Vector{Any}, Core.DebugInfo, Any, Vector{UInt32}, Vector{Symbol}, Vector{UInt8}, Any, Any, Any, Any, UInt64, UInt64, Any, UInt64, Bool, Bool, Bool, Bool, Bool, UInt8, UInt8, UInt16, UInt16) + expected_fts = (Vector{Any}, Core.DebugInfo, Any, Vector{UInt32}, Vector{Symbol}, Vector{UInt8}, Any, Any, Any, Any, UInt, UInt, Any, UInt, Bool, Bool, Bool, Bool, Bool, UInt8, UInt8, UInt16, UInt16) code = if fns != expected_fns unexpected_fns = collect(setdiff(Set(fns), Set(expected_fns))) diff --git a/JuliaLowering/src/hooks.jl b/JuliaLowering/src/hooks.jl index d4aef14f13773..ca7ba9a0c3de1 100644 --- a/JuliaLowering/src/hooks.jl +++ b/JuliaLowering/src/hooks.jl @@ -13,7 +13,7 @@ function core_lowering_hook(@nospecialize(code), mod::Module, # TODO: fix in base file = file isa Ptr{UInt8} ? unsafe_string(file) : file - line = !(line isa Int64) ? Int64(line) : line + line = !(line isa Int) ? Int(line) : line local st0 = nothing try diff --git a/JuliaLowering/test/misc.jl b/JuliaLowering/test/misc.jl index 12786b30b2205..75bb26e9a6f92 100644 --- a/JuliaLowering/test/misc.jl +++ b/JuliaLowering/test/misc.jl @@ -198,7 +198,7 @@ end jeval(test_mod, "\"docstr12\" f12(x::Int, y::U, z::T=1) where {T, U<:Number}") d = jeval(test_mod, "@doc f12") @test d |> string === "docstr12\n" - @test d.meta[:results][1].data[:typesig] === Union{Tuple{Int64, U, T}, Tuple{Int64, U}} where {T, U<:Number} + @test d.meta[:results][1].data[:typesig] === Union{Tuple{Int, U, T}, Tuple{Int, U}} where {T, U<:Number} end From f838a6b565dc5004a31a6a5d472fd8c4eb576c1e Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Fri, 17 Oct 2025 13:33:42 +1000 Subject: [PATCH 1105/1109] Adapt build system for in-tree JuliaSyntax These are the simplest possible adaptions to create the vendored Base.JuliaSyntax from an in-tree version of JuliaSyntax (JuliaLowering to be hooked up later). Also remove JuliaLowering / JuliaSyntax GHA actions and update JuliaLowering `[sources]` to use local `JuliaSyntax`. --- JuliaLowering/.github/workflows/CI.yml | 35 ----- .../.github/workflows/CompatHelper.yml | 16 --- JuliaLowering/.github/workflows/TagBot.yml | 31 ----- JuliaLowering/Project.toml | 2 +- JuliaSyntax/.github/workflows/CI.yml | 121 ------------------ .../.github/workflows/CompatHelper.yml | 16 --- JuliaSyntax/.github/workflows/TagBot.yml | 15 --- Makefile | 2 +- base/Base.jl | 4 +- deps/JuliaSyntax.mk | 16 --- deps/JuliaSyntax.version | 4 - deps/Makefile | 7 +- sysimage.mk | 3 + 13 files changed, 8 insertions(+), 264 deletions(-) delete mode 100644 JuliaLowering/.github/workflows/CI.yml delete mode 100644 JuliaLowering/.github/workflows/CompatHelper.yml delete mode 100644 JuliaLowering/.github/workflows/TagBot.yml delete mode 100644 JuliaSyntax/.github/workflows/CI.yml delete mode 100644 JuliaSyntax/.github/workflows/CompatHelper.yml delete mode 100644 JuliaSyntax/.github/workflows/TagBot.yml delete mode 100644 deps/JuliaSyntax.mk delete mode 100644 deps/JuliaSyntax.version diff --git a/JuliaLowering/.github/workflows/CI.yml b/JuliaLowering/.github/workflows/CI.yml deleted file mode 100644 index 7ed1133e01315..0000000000000 --- a/JuliaLowering/.github/workflows/CI.yml +++ /dev/null @@ -1,35 +0,0 @@ -name: CI -on: - push: - branches: - - main - tags: ['*'] - pull_request: - workflow_dispatch: -concurrency: - # Skip intermediate builds: always. - # Cancel intermediate builds: only if it is a pull request build. - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }} -jobs: - test: - name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - version: - - 'nightly' - os: - - ubuntu-latest - arch: - - x64 - steps: - - uses: actions/checkout@v4 - - uses: julia-actions/setup-julia@v1 - with: - version: ${{ matrix.version }} - arch: ${{ matrix.arch }} - - uses: julia-actions/cache@v1 - - uses: julia-actions/julia-buildpkg@v1 - - uses: julia-actions/julia-runtest@v1 diff --git a/JuliaLowering/.github/workflows/CompatHelper.yml b/JuliaLowering/.github/workflows/CompatHelper.yml deleted file mode 100644 index cba9134c670f0..0000000000000 --- a/JuliaLowering/.github/workflows/CompatHelper.yml +++ /dev/null @@ -1,16 +0,0 @@ -name: CompatHelper -on: - schedule: - - cron: 0 0 * * * - workflow_dispatch: -jobs: - CompatHelper: - runs-on: ubuntu-latest - steps: - - name: Pkg.add("CompatHelper") - run: julia -e 'using Pkg; Pkg.add("CompatHelper")' - - name: CompatHelper.main() - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - COMPATHELPER_PRIV: ${{ secrets.DOCUMENTER_KEY }} - run: julia -e 'using CompatHelper; CompatHelper.main()' diff --git a/JuliaLowering/.github/workflows/TagBot.yml b/JuliaLowering/.github/workflows/TagBot.yml deleted file mode 100644 index 2bacdb87e004b..0000000000000 --- a/JuliaLowering/.github/workflows/TagBot.yml +++ /dev/null @@ -1,31 +0,0 @@ -name: TagBot -on: - issue_comment: - types: - - created - workflow_dispatch: - inputs: - lookback: - default: 3 -permissions: - actions: read - checks: read - contents: write - deployments: read - issues: read - discussions: read - packages: read - pages: read - pull-requests: read - repository-projects: read - security-events: read - statuses: read -jobs: - TagBot: - if: github.event_name == 'workflow_dispatch' || github.actor == 'JuliaTagBot' - runs-on: ubuntu-latest - steps: - - uses: JuliaRegistries/TagBot@v1 - with: - token: ${{ secrets.GITHUB_TOKEN }} - ssh: ${{ secrets.DOCUMENTER_KEY }} diff --git a/JuliaLowering/Project.toml b/JuliaLowering/Project.toml index 2b01366509e8b..4add188a89eef 100644 --- a/JuliaLowering/Project.toml +++ b/JuliaLowering/Project.toml @@ -7,7 +7,7 @@ version = "1.0.0-DEV" JuliaSyntax = "70703baa-626e-46a2-a12c-08ffd08c73b4" [sources] -JuliaSyntax = {rev = "99e975a7", url = "https://github.com/JuliaLang/JuliaSyntax.jl"} +JuliaSyntax = {path = "../JuliaSyntax"} [compat] julia = "1" diff --git a/JuliaSyntax/.github/workflows/CI.yml b/JuliaSyntax/.github/workflows/CI.yml deleted file mode 100644 index b6ceedb5e4f63..0000000000000 --- a/JuliaSyntax/.github/workflows/CI.yml +++ /dev/null @@ -1,121 +0,0 @@ -name: CI -on: - push: - branches: - - main - - release-* - tags: '*' - pull_request: -jobs: - test: - name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - version: - - '1.0' - - '1.1' - - '1.2' - - '1.3' - - '1.4' - - '1.5' - - '1.6' - - '1.7' - - '1.8' - - '1.9' - - '1' - - 'pre' - - 'nightly' - os: - - ubuntu-latest - - macOS-latest - - windows-latest - arch: - - x86 - - x64 - exclude: - # Test all OS's and arch possibilities on - # - 1.0 - # - 1.6 - # - 1 - # - pre - # - nightly - # but remove some configurations from the build matrix to reduce CI time. - # See https://github.com/marketplace/actions/setup-julia-environment - - {os: 'macOS-latest', version: '1.1'} - - {os: 'macOS-latest', version: '1.2'} - - {os: 'macOS-latest', version: '1.3'} - - {os: 'macOS-latest', version: '1.4'} - - {os: 'macOS-latest', version: '1.5'} - - {os: 'macOS-latest', version: '1.7'} - - {os: 'macOS-latest', version: '1.8'} - - {os: 'macOS-latest', version: '1.9'} - # MacOS not available on x86 - - {os: 'macOS-latest', arch: 'x86'} - - {os: 'windows-latest', version: '1.1'} - - {os: 'windows-latest', version: '1.2'} - - {os: 'windows-latest', version: '1.3'} - - {os: 'windows-latest', version: '1.4'} - - {os: 'windows-latest', version: '1.5'} - - {os: 'windows-latest', version: '1.7'} - - {os: 'windows-latest', version: '1.8'} - - {os: 'windows-latest', version: '1.9'} - - {os: 'ubuntu-latest', version: '1.1', arch: 'x86'} - - {os: 'ubuntu-latest', version: '1.2', arch: 'x86'} - - {os: 'ubuntu-latest', version: '1.3', arch: 'x86'} - - {os: 'ubuntu-latest', version: '1.4', arch: 'x86'} - - {os: 'ubuntu-latest', version: '1.5', arch: 'x86'} - - {os: 'ubuntu-latest', version: '1.7', arch: 'x86'} - - {os: 'ubuntu-latest', version: '1.8', arch: 'x86'} - - {os: 'ubuntu-latest', version: '1.9', arch: 'x86'} - steps: - - uses: actions/checkout@v2 - - uses: julia-actions/setup-julia@v2 - with: - version: ${{ matrix.version }} - arch: ${{ matrix.arch }} - - uses: julia-actions/julia-buildpkg@v1 - - uses: julia-actions/julia-runtest@v1 - - uses: julia-actions/julia-processcoverage@latest - - uses: codecov/codecov-action@v3 - with: - file: lcov.info - test_sysimage: - name: JuliaSyntax sysimage build - ${{ github.event_name }} - runs-on: ubuntu-latest - strategy: - fail-fast: false - steps: - - uses: actions/checkout@v2 - - uses: julia-actions/setup-julia@v2 - with: - version: 1.6 - arch: x64 - - uses: actions/cache@v4 - env: - cache-name: cache-artifacts - with: - path: ~/.julia/artifacts - key: ${{ runner.os }}-test-${{ env.cache-name }}-${{ hashFiles('**/Project.toml') }} - restore-keys: | - ${{ runner.os }}-test-${{ env.cache-name }}- - ${{ runner.os }}-test- - ${{ runner.os }}- - - uses: julia-actions/julia-buildpkg@v1 - - run: julia sysimage/compile.jl - docs: - name: Documentation - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - uses: julia-actions/setup-julia@latest - with: - version: '1.10' - - run: julia --project=docs -e ' - using Pkg; - Pkg.develop(PackageSpec(; path=pwd())); - Pkg.instantiate();' - - run: julia --project=docs docs/make.jl - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/JuliaSyntax/.github/workflows/CompatHelper.yml b/JuliaSyntax/.github/workflows/CompatHelper.yml deleted file mode 100644 index cba9134c670f0..0000000000000 --- a/JuliaSyntax/.github/workflows/CompatHelper.yml +++ /dev/null @@ -1,16 +0,0 @@ -name: CompatHelper -on: - schedule: - - cron: 0 0 * * * - workflow_dispatch: -jobs: - CompatHelper: - runs-on: ubuntu-latest - steps: - - name: Pkg.add("CompatHelper") - run: julia -e 'using Pkg; Pkg.add("CompatHelper")' - - name: CompatHelper.main() - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - COMPATHELPER_PRIV: ${{ secrets.DOCUMENTER_KEY }} - run: julia -e 'using CompatHelper; CompatHelper.main()' diff --git a/JuliaSyntax/.github/workflows/TagBot.yml b/JuliaSyntax/.github/workflows/TagBot.yml deleted file mode 100644 index f49313b662013..0000000000000 --- a/JuliaSyntax/.github/workflows/TagBot.yml +++ /dev/null @@ -1,15 +0,0 @@ -name: TagBot -on: - issue_comment: - types: - - created - workflow_dispatch: -jobs: - TagBot: - if: github.event_name == 'workflow_dispatch' || github.actor == 'JuliaTagBot' - runs-on: ubuntu-latest - steps: - - uses: JuliaRegistries/TagBot@v1 - with: - token: ${{ secrets.GITHUB_TOKEN }} - ssh: ${{ secrets.DOCUMENTER_KEY }} diff --git a/Makefile b/Makefile index 70dbae7e4bccc..7dfd165237aa8 100644 --- a/Makefile +++ b/Makefile @@ -79,7 +79,7 @@ ifndef JULIA_VAGRANT_BUILD endif endif -TOP_LEVEL_PKGS := Compiler +TOP_LEVEL_PKGS := Compiler JuliaSyntax JuliaLowering TOP_LEVEL_PKG_LINK_TARGETS := $(addprefix $(build_datarootdir)/julia/,$(TOP_LEVEL_PKGS)) diff --git a/base/Base.jl b/base/Base.jl index 92d3ad2c04059..c5513b0af0ce3 100644 --- a/base/Base.jl +++ b/base/Base.jl @@ -315,9 +315,9 @@ a_method_to_overwrite_in_test() = inferencebarrier(1) @eval Core const Compiler = $Base.Compiler @eval Compiler const fl_parse = $Base.fl_parse -# External libraries vendored into Base +# Compiler frontend Core.println("JuliaSyntax/src/JuliaSyntax.jl") -include(@__MODULE__, string(BUILDROOT, "JuliaSyntax/src/JuliaSyntax.jl")) # include($BUILDROOT/base/JuliaSyntax/JuliaSyntax.jl) +include(@__MODULE__, string(DATAROOT, "julia/JuliaSyntax/src/JuliaSyntax.jl")) end_base_include = time_ns() diff --git a/deps/JuliaSyntax.mk b/deps/JuliaSyntax.mk deleted file mode 100644 index 4a8afa8fbd53c..0000000000000 --- a/deps/JuliaSyntax.mk +++ /dev/null @@ -1,16 +0,0 @@ -$(eval $(call git-external,JuliaSyntax,JULIASYNTAX,,,$(BUILDDIR))) - -$(BUILDDIR)/$(JULIASYNTAX_SRC_DIR)/build-compiled: $(BUILDDIR)/$(JULIASYNTAX_SRC_DIR)/source-extracted - @# no build steps - echo 1 > $@ - -$(eval $(call symlink_install,JuliaSyntax,$$(JULIASYNTAX_SRC_DIR),$$(BUILDROOT)/base)) - -clean-JuliaSyntax: - -rm -f $(BUILDDIR)/$(JULIASYNTAX_SRC_DIR)/build-compiled -get-JuliaSyntax: $(JULIASYNTAX_SRC_FILE) -extract-JuliaSyntax: $(BUILDDIR)/$(JULIASYNTAX_SRC_DIR)/source-extracted -configure-JuliaSyntax: extract-JuliaSyntax -compile-JuliaSyntax: $(BUILDDIR)/$(JULIASYNTAX_SRC_DIR)/build-compiled -fastcheck-JuliaSyntax: check-JuliaSyntax -check-JuliaSyntax: compile-JuliaSyntax diff --git a/deps/JuliaSyntax.version b/deps/JuliaSyntax.version deleted file mode 100644 index 94f480c65dcf7..0000000000000 --- a/deps/JuliaSyntax.version +++ /dev/null @@ -1,4 +0,0 @@ -JULIASYNTAX_BRANCH = main -JULIASYNTAX_SHA1 = 99e975a726a82994de3f8e961e6fa8d39aed0d37 -JULIASYNTAX_GIT_URL := https://github.com/JuliaLang/JuliaSyntax.jl.git -JULIASYNTAX_TAR_URL = https://api.github.com/repos/JuliaLang/JuliaSyntax.jl/tarball/$1 diff --git a/deps/Makefile b/deps/Makefile index fb6724317759d..cea1e52c55156 100644 --- a/deps/Makefile +++ b/deps/Makefile @@ -36,8 +36,6 @@ BUILDDIR := $(BUILDDIR)$(MAYBE_HOST) # prevent installing libs into usr/lib64 on opensuse unexport CONFIG_SITE -DEP_LIBS := JuliaSyntax - ifeq ($(USE_SYSTEM_LIBBLASTRAMPOLINE), 0) DEP_LIBS += blastrampoline endif @@ -212,7 +210,7 @@ DEP_LIBS_STAGED := $(DEP_LIBS) DEP_LIBS_STAGED_ALL := llvm llvm-tools clang llvmunwind unwind libuv pcre \ openlibm dsfmt blastrampoline openblas lapack gmp mpfr patchelf utf8proc \ objconv openssl libssh2 nghttp2 curl libgit2 libwhich zlib zstd p7zip csl \ - sanitizers libsuitesparse lld libtracyclient ittapi nvtx JuliaSyntax \ + sanitizers libsuitesparse lld libtracyclient ittapi nvtx \ terminfo mmtk_julia DEP_LIBS_ALL := $(DEP_LIBS_STAGED_ALL) @@ -297,7 +295,4 @@ include $(SRCDIR)/terminfo.mk # MMTk include $(SRCDIR)/mmtk_julia.mk -# vendored Julia libs -include $(SRCDIR)/JuliaSyntax.mk - include $(SRCDIR)/tools/uninstallers.mk diff --git a/sysimage.mk b/sysimage.mk index 296a137c12fcc..e7917875e0ef2 100644 --- a/sysimage.mk +++ b/sysimage.mk @@ -76,9 +76,12 @@ COMPILER_SRCS := $(addprefix $(JULIAHOME)/, \ base/traits.jl \ base/tuple.jl) COMPILER_SRCS += $(shell find $(JULIAHOME)/Compiler/src -name \*.jl -and -not -name verifytrim.jl -and -not -name show.jl) +# Julia-based compiler frontend is bootstrapped into Base for now +COMPILER_FRONTEND_SRCS = $(shell find $(JULIAHOME)/JuliaSyntax/src -name \*.jl) # sort these to remove duplicates BASE_SRCS := $(sort $(shell find $(JULIAHOME)/base -name \*.jl -and -not -name sysimg.jl) \ $(shell find $(BUILDROOT)/base -name \*.jl -and -not -name sysimg.jl)) \ + $(COMPILER_FRONTEND_SRCS) \ $(JULIAHOME)/Compiler/src/ssair/show.jl \ $(JULIAHOME)/Compiler/src/verifytrim.jl STDLIB_SRCS := $(JULIAHOME)/base/sysimg.jl $(SYSIMG_STDLIBS_SRCS) From ff358d448d01b460d6739316b3581860510d5063 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Fri, 24 Oct 2025 18:27:22 +1000 Subject: [PATCH 1106/1109] Adjust JuliaSyntax tests to run from `make test` --- JuliaSyntax/test/fuzz_test.jl | 3 +-- JuliaSyntax/test/kinds.jl | 2 +- JuliaSyntax/test/literal_parsing.jl | 2 +- JuliaSyntax/test/parse_stream.jl | 2 +- JuliaSyntax/test/runtests.jl | 5 ++++- JuliaSyntax/test/runtests_vendored.jl | 4 ++++ JuliaSyntax/test/tokenize.jl | 6 +++--- 7 files changed, 15 insertions(+), 9 deletions(-) create mode 100644 JuliaSyntax/test/runtests_vendored.jl diff --git a/JuliaSyntax/test/fuzz_test.jl b/JuliaSyntax/test/fuzz_test.jl index e24096a38aaf9..71c9ff77b1ab6 100644 --- a/JuliaSyntax/test/fuzz_test.jl +++ b/JuliaSyntax/test/fuzz_test.jl @@ -1,5 +1,4 @@ -using JuliaSyntax -using JuliaSyntax: tokenize +using .JuliaSyntax: tokenize import Logging import Test diff --git a/JuliaSyntax/test/kinds.jl b/JuliaSyntax/test/kinds.jl index f58fbd80e74f2..5179544ec15d3 100644 --- a/JuliaSyntax/test/kinds.jl +++ b/JuliaSyntax/test/kinds.jl @@ -2,7 +2,7 @@ if !isdefined(@__MODULE__, :FooKinds) @eval module FooKinds -using JuliaSyntax +using ..JuliaSyntax function _init_kinds() JuliaSyntax.register_kinds!(@__MODULE__, 42, [ diff --git a/JuliaSyntax/test/literal_parsing.jl b/JuliaSyntax/test/literal_parsing.jl index 42fcbc44ef97a..bfb8e932458ad 100644 --- a/JuliaSyntax/test/literal_parsing.jl +++ b/JuliaSyntax/test/literal_parsing.jl @@ -1,4 +1,4 @@ -using JuliaSyntax: +using .JuliaSyntax: parse_int_literal, parse_uint_literal, parse_float_literal, diff --git a/JuliaSyntax/test/parse_stream.jl b/JuliaSyntax/test/parse_stream.jl index 43b16ab5c885f..cda8443be113a 100644 --- a/JuliaSyntax/test/parse_stream.jl +++ b/JuliaSyntax/test/parse_stream.jl @@ -3,7 +3,7 @@ # Here we test the ParseStream interface, by taking input code and checking # that the correct sequence of emit() and bump() produces a valid parse tree. -using JuliaSyntax: ParseStream, +using .JuliaSyntax: ParseStream, peek, peek_token, bump, bump_trivia, bump_invisible, emit, emit_diagnostic, TRIVIA_FLAG, INFIX_FLAG, diff --git a/JuliaSyntax/test/runtests.jl b/JuliaSyntax/test/runtests.jl index 3fd6227801482..644f073124982 100644 --- a/JuliaSyntax/test/runtests.jl +++ b/JuliaSyntax/test/runtests.jl @@ -1,4 +1,7 @@ -using JuliaSyntax +if !(@isdefined JuliaSyntax) + using JuliaSyntax +end + using Test include("test_utils.jl") diff --git a/JuliaSyntax/test/runtests_vendored.jl b/JuliaSyntax/test/runtests_vendored.jl new file mode 100644 index 0000000000000..52980e4917dcf --- /dev/null +++ b/JuliaSyntax/test/runtests_vendored.jl @@ -0,0 +1,4 @@ +# Test copy of JuliaSyntax vendored into Base +using Base.JuliaSyntax: JuliaSyntax + +include("runtests.jl") diff --git a/JuliaSyntax/test/tokenize.jl b/JuliaSyntax/test/tokenize.jl index 5089152065c71..fe5bba6ac073e 100644 --- a/JuliaSyntax/test/tokenize.jl +++ b/JuliaSyntax/test/tokenize.jl @@ -3,7 +3,7 @@ module TokenizeTests using Test -using JuliaSyntax: +using ..JuliaSyntax: JuliaSyntax, @K_str, Kind, @@ -11,13 +11,13 @@ using JuliaSyntax: is_error, is_operator -using JuliaSyntax.Tokenize: +using ..JuliaSyntax.Tokenize: Tokenize, tokenize, untokenize, RawToken -using ..Main: toks +import ..toks tok(str, i = 1) = collect(tokenize(str))[i] From d0f24f24e9b5ce2265b6a58f72142aab2cb5a958 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Fri, 24 Oct 2025 19:41:33 +1000 Subject: [PATCH 1107/1109] Add JuliaSyntax / JuliaLowering to TESTNAMES / `make test` The environment when running under Distributed is slightly different than running via `Pkg.test()` so this required some tweaks to error printing, etc. --- JuliaLowering/test/runtests_vendored.jl | 25 +++++++++++++++++++++++++ test/Makefile | 2 +- test/choosetests.jl | 21 ++++++++++++++++++--- 3 files changed, 44 insertions(+), 4 deletions(-) create mode 100644 JuliaLowering/test/runtests_vendored.jl diff --git a/JuliaLowering/test/runtests_vendored.jl b/JuliaLowering/test/runtests_vendored.jl new file mode 100644 index 0000000000000..84efaa15bffb7 --- /dev/null +++ b/JuliaLowering/test/runtests_vendored.jl @@ -0,0 +1,25 @@ +old_active_project = Base.active_project() +try + # test local (dev) copy of JuliaLowering, not yet vendored into Base + Base.set_active_project(joinpath(@__DIR__, "..", "Project.toml")) + manifest_path = joinpath(@__DIR__, "..", "Manifest.toml") + isfile(manifest_path) && rm(manifest_path) + + # activate and instantiate JuliaSyntax as a local package (rather than using + # Base.JuliaSyntax) + import Pkg + Pkg.instantiate() + + # restore error hints (emptied by `testdefs.jl`) so that errors print as + # JuliaLowering expects them to + Base.Experimental.register_error_hint(Base.UndefVarError_hint, UndefVarError) + + # n.b.: these must be run in `Main`, so that type-printing is equivalent + # when running via Pkg.test() (e.g. "SyntaxGraph" should be printed instead + # of "JuliaLowering.SyntaxGraph") + @eval Main using JuliaLowering + Core.include(Main, joinpath(@__DIR__, "runtests.jl")) # run the actual tests +finally + # Restore original load path and active project + Base.set_active_project(old_active_project) +end diff --git a/test/Makefile b/test/Makefile index b22656766700b..33b74eaf93d3b 100644 --- a/test/Makefile +++ b/test/Makefile @@ -11,7 +11,7 @@ export JULIA_LOAD_PATH := @$(PATHSEP)@stdlib unexport JULIA_PROJECT := unexport JULIA_BINDIR := -TESTGROUPS = unicode strings compiler Compiler +TESTGROUPS = unicode strings compiler Compiler JuliaSyntax JuliaLowering TESTS = all default stdlib $(TESTGROUPS) \ $(patsubst $(STDLIBDIR)/%/,%,$(dir $(wildcard $(STDLIBDIR)/*/.))) \ $(filter-out runtests testdefs relocatedepot, \ diff --git a/test/choosetests.jl b/test/choosetests.jl index ec1ee983a1f4c..3e315523a40a1 100644 --- a/test/choosetests.jl +++ b/test/choosetests.jl @@ -30,7 +30,8 @@ const TESTNAMES = [ "reinterpretarray", "syntax", "corelogging", "missing", "asyncmap", "smallarrayshrink", "opaque_closure", "filesystem", "download", "scopedvalues", "compileall", "rebinding", - "faulty_constructor_method_should_not_cause_stack_overflows" + "faulty_constructor_method_should_not_cause_stack_overflows", + "JuliaSyntax", "JuliaLowering", ] const INTERNET_REQUIRED_LIST = [ @@ -46,6 +47,12 @@ const INTERNET_REQUIRED_LIST = [ const NETWORK_REQUIRED_LIST = vcat(INTERNET_REQUIRED_LIST, ["Sockets"]) +const TOP_LEVEL_PKGS = [ + "Compiler", + "JuliaSyntax", + "JuliaLowering", +] + function test_path(test) t = split(test, '/') if t[1] in STDLIBS @@ -61,6 +68,12 @@ function test_path(test) elseif t[1] == "Compiler" testpath = length(t) >= 2 ? t[2:end] : ("runtests",) return joinpath(@__DIR__, "..", t[1], "test", testpath...) + elseif t[1] == "JuliaSyntax" + testpath = length(t) >= 2 ? t[2:end] : ("runtests_vendored",) + return joinpath(@__DIR__, "..", t[1], "test", testpath...) + elseif t[1] == "JuliaLowering" + testpath = length(t) >= 2 ? t[2:end] : ("runtests_vendored",) + return joinpath(@__DIR__, "..", t[1], "test", testpath...) else return joinpath(@__DIR__, test) end @@ -225,9 +238,11 @@ function choosetests(choices = []) filter!(!in(tests), unhandled) filter!(!in(skip_tests), tests) + is_package_test(testname) = testname in STDLIBS || testname in TOP_LEVEL_PKGS + new_tests = String[] for test in tests - if test in STDLIBS || test == "Compiler" + if is_package_test(test) testfile = test_path("$test/testgroups") if isfile(testfile) testgroups = readlines(testfile) @@ -238,7 +253,7 @@ function choosetests(choices = []) end end end - filter!(x -> (x != "stdlib" && !(x in STDLIBS) && x != "Compiler") , tests) + filter!(x -> (x != "stdlib" && !is_package_test(x)) , tests) append!(tests, new_tests) requested_all || explicit_pkg || filter!(x -> x != "Pkg", tests) From 29a4bbf950aefa6116b296d48ef966ab36c5ca84 Mon Sep 17 00:00:00 2001 From: Erik Schnetter Date: Fri, 14 Nov 2025 15:17:58 -0500 Subject: [PATCH 1108/1109] loading.jl: Make `slug` internally type-stable (#60134) --- base/loading.jl | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/base/loading.jl b/base/loading.jl index 2eac9d06ca66e..18b26794116ce 100644 --- a/base/loading.jl +++ b/base/loading.jl @@ -185,10 +185,9 @@ const slug_chars = String(['A':'Z'; 'a':'z'; '0':'9']) function slug(x::UInt32, p::Int) sprint(sizehint=p) do io - y = x - n = length(slug_chars) + n = UInt32(length(slug_chars)) for i = 1:p - y, d = divrem(y, n) + x, d = divrem(x, n) write(io, slug_chars[1+d]) end end From 6a55e524b87988210135a29f832d614a9366afe0 Mon Sep 17 00:00:00 2001 From: Diogo Netto <61364108+d-netto@users.noreply.github.com> Date: Fri, 14 Nov 2025 17:33:29 -0500 Subject: [PATCH 1109/1109] document metrics from GC_Num; rename one metric from GC_Num to match the name used by the equivalent C struct (#60115) See PR title. I plan to clean up the GC metrics code a bit more in subsequent PRs. --- base/timing.jl | 97 +++++++++++++++++++++++++++++++++------------- src/gc-interface.h | 47 ++++++++++++++++++++-- 2 files changed, 113 insertions(+), 31 deletions(-) diff --git a/base/timing.jl b/base/timing.jl index 2b0e0c1afe81f..d4c219049fd3d 100644 --- a/base/timing.jl +++ b/base/timing.jl @@ -2,35 +2,76 @@ # This type must be kept in sync with the C struct in src/gc-interface.h struct GC_Num - allocd ::Int64 # GC internal - deferred_alloc ::Int64 # GC internal - freed ::Int64 # GC internal - malloc ::Int64 - realloc ::Int64 - poolalloc ::Int64 - bigalloc ::Int64 - freecall ::Int64 - total_time ::Int64 - total_allocd ::Int64 # GC internal - collect ::Csize_t # GC internal - pause ::Cint - full_sweep ::Cint - max_pause ::Int64 - max_memory ::Int64 - time_to_safepoint ::Int64 - max_time_to_safepoint ::Int64 - total_time_to_safepoint ::Int64 - sweep_time ::Int64 - mark_time ::Int64 - stack_pool_sweep_time ::Int64 - total_sweep_time ::Int64 - total_sweep_page_walk_time ::Int64 - total_sweep_madvise_time ::Int64 - total_sweep_free_mallocd_memory_time ::Int64 - total_mark_time ::Int64 + # (GC Internal) Number of allocated bytes since the last collection. This field is reset + # after the end of every garbage collection cycle, so it will always be zero if observed + # during execution of Julia user code + allocd::Int64 + # (GC Internal) Number of allocated bytes within a `gc_disable/gc_enable` block. This field is + # reset after every garbage collection cycle and will always be zero in case of no use + # of `gc_disable/gc_enable` blocks + deferred_alloc::Int64 + # (GC Internal) Number of bytes freed bytes in the current collection cycle. This field is + # reset after every garbage collection cycle and will always be zero when observed + # during execution of Julia user code. It's incremented as memory is reclaimed during a collection, + # used to gather some statistics within the collection itself and reset at the end of a GC cycle. + freed::Int64 + # Number of `malloc/calloc` calls (never reset by the runtime) + malloc::Int64 + # Number of `realloc` calls (never reset by the runtime) + realloc::Int64 + # Number of pool allocation calls (never reset by the runtime) + # NOTE: Julia's stock GC uses an internal (pool) allocator for objects up to 2032 bytes. + # Larger objects are allocated through `malloc/calloc`. + poolalloc::Int64 + # Number of allocations for "big objects" (non-array objects larger than 2032 bytes) + # (never reset by the runtime) + bigalloc::Int64 + # Number of `free` calls (never reset by the runtime) + freecall::Int64 + # Total time spent in garbage collection (never reset by the runtime) + total_time::Int64 + # (GC internal) Total number of bytes allocated since the program started + total_allocd::Int64 + # (GC internal) Per-thread allocation quota before triggering a GC + # NOTE: This field is no longer used by the heuristics in the stock GC + interval::Csize_t + # Duration of the last GC pause in nanoseconds + pause::Cint + # Number of full GC sweeps completed so far (never reset by the runtime) + full_sweep::Cint + # Maximum pause duration observed so far in nanoseconds + max_pause::Int64 + # Maximum number of bytes allocated any point in time. + # NOTE: This is aggregated over objects, not pages + max_memory::Int64 + # Time taken to reach a safepoint in the last GC cycle in nanoseconds + time_to_safepoint::Int64 + # Maximum time taken to reach a safepoint across all GCs in nanoseconds + max_time_to_safepoint::Int64 + # Total time taken to reach safepoints across all GCs in nanoseconds + total_time_to_safepoint::Int64 + # Time spent in the last GC sweeping phase in nanoseconds + sweep_time::Int64 + # Time spent in the last GC marking phase in nanoseconds + mark_time::Int64 + # Time spent sweeping stack pools in the last GC in nanoseconds + stack_pool_sweep_time::Int64 + # Total time spent in sweeping phase across all GCs in nanoseconds + total_sweep_time::Int64 + # Total time spent walking pool allocated pages during sweeping phase across all GCs in nanoseconds + total_sweep_page_walk_time::Int64 + # Total time spent in madvise calls during sweeping phase across all GCs in nanoseconds + total_sweep_madvise_time::Int64 + # Total time spent in freeing malloc'd memory during sweeping phase across all GCs in nanoseconds + total_sweep_free_mallocd_memory_time::Int64 + # Total time spent in marking phase across all GCs in nanoseconds + total_mark_time::Int64 + # Total time spent sweeping stack pools across all GCs in nanoseconds total_stack_pool_sweep_time::Int64 - last_full_sweep ::Int64 - last_incremental_sweep ::Int64 + # Timestamp of the last full GC sweep in nanoseconds + last_full_sweep::Int64 + # Timestamp of the last incremental GC sweep in nanoseconds + last_incremental_sweep::Int64 end gc_num() = ccall(:jl_gc_num, GC_Num, ()) diff --git a/src/gc-interface.h b/src/gc-interface.h index 5edcd4de98bb2..7905270b91795 100644 --- a/src/gc-interface.h +++ b/src/gc-interface.h @@ -26,34 +26,75 @@ struct _jl_genericmemory_t; // This struct must be kept in sync with the Julia type of the same name in base/timing.jl typedef struct { + // (GC Internal) Number of allocated bytes since the last collection. This field is reset + // after the end of every garbage collection cycle, so it will always be zero if observed + // during execution of Julia user code int64_t allocd; + // (GC Internal) Number of allocated bytes within a `gc_disable/gc_enable` block. This field is + // reset after every garbage collection cycle and will always be zero in case of no use + // of `gc_disable/gc_enable` blocks int64_t deferred_alloc; + // (GC Internal) Number of bytes freed bytes in the current collection cycle. This field is + // reset after every garbage collection cycle and will always be zero when observed + // during execution of Julia user code. It's incremented as memory is reclaimed during a collection, + // used to gather some statistics within the collection itself and reset at the end of a GC cycle. int64_t freed; + // Number of `malloc/calloc` calls (never reset by the runtime) uint64_t malloc; + // Number of `realloc` calls (never reset by the runtime) uint64_t realloc; + // Number of pool allocation calls (never reset by the runtime) + // NOTE: Julia's stock GC uses an internal (pool) allocator for objects up to 2032 bytes. + // Larger objects are allocated through `malloc/calloc`. uint64_t poolalloc; + // Number of allocations for "big objects" (non-array objects larger than 2032 bytes) + // (never reset by the runtime) uint64_t bigalloc; + // Number of `free` calls (never reset by the runtime) uint64_t freecall; + // Total time spent in garbage collection (never reset by the runtime) uint64_t total_time; + // (GC internal) Total number of bytes allocated since the program started uint64_t total_allocd; + // (GC internal) Per-thread allocation quota before triggering a GC + // NOTE: This field is no longer used by the heuristics in the stock GC size_t interval; + // Duration of the last GC pause in nanoseconds int pause; + // Number of full GC sweeps completed so far (never reset by the runtime) int full_sweep; + // Maximum pause duration observed so far in nanoseconds uint64_t max_pause; + // Maximum number of bytes allocated any point in time. + // NOTE: This is aggregated over objects, not pages uint64_t max_memory; + // Time taken to reach a safepoint in the last GC cycle in nanoseconds uint64_t time_to_safepoint; + // Maximum time taken to reach a safepoint across all GCs in nanoseconds uint64_t max_time_to_safepoint; + // Total time taken to reach safepoints across all GCs in nanoseconds uint64_t total_time_to_safepoint; + // Time spent in the last GC sweeping phase in nanoseconds uint64_t sweep_time; + // Time spent in the last GC marking phase in nanoseconds uint64_t mark_time; + // Time spent sweeping stack pools in the last GC in nanoseconds uint64_t stack_pool_sweep_time; + // Total time spent in sweeping phase across all GCs in nanoseconds uint64_t total_sweep_time; - uint64_t total_sweep_page_walk_time; - uint64_t total_sweep_madvise_time; - uint64_t total_sweep_free_mallocd_memory_time; + // Total time spent walking pool allocated pages during sweeping phase across all GCs in nanoseconds + uint64_t total_sweep_page_walk_time; + // Total time spent in madvise calls during sweeping phase across all GCs in nanoseconds + uint64_t total_sweep_madvise_time; + // Total time spent in freeing malloc'd memory during sweeping phase across all GCs in nanoseconds + uint64_t total_sweep_free_mallocd_memory_time; + // Total time spent in marking phase across all GCs in nanoseconds uint64_t total_mark_time; + // Total time spent sweeping stack pools across all GCs in nanoseconds uint64_t total_stack_pool_sweep_time; + // Timestamp of the last full GC sweep in nanoseconds uint64_t last_full_sweep; + // Timestamp of the last incremental GC sweep in nanoseconds uint64_t last_incremental_sweep; } jl_gc_num_t;