Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ jobs:
id: set-matrix
run: |
# List of all available parsers
ALL_PARSERS="redshift postgresql cql snowflake tsql doris starrocks trino plsql googlesql mysql partiql tidb mariadb cosmosdb"
ALL_PARSERS="redshift postgresql cql snowflake tsql doris starrocks trino plsql googlesql mysql partiql tidb mariadb cosmosdb mongodb"
# Add more parsers here as they are added to the repository
# ALL_PARSERS="redshift mysql postgresql"

Expand Down
10 changes: 10 additions & 0 deletions mongodb/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
all: build test

build:
@echo "Building MongoDB Shell parser..."
antlr -Dlanguage=Go -package mongodb -visitor -o . MongoShellLexer.g4 MongoShellParser.g4

test:
go test -v -run TestMongoShellParser

.PHONY: all build test
132 changes: 132 additions & 0 deletions mongodb/MongoShellLexer.g4
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
/*
* MongoDB Shell (mongosh) Lexer Grammar
* For use with ANTLR 4
*/

lexer grammar MongoShellLexer;

// Keywords
SHOW: 'show';
DBS: 'dbs';
DATABASES: 'databases';
COLLECTIONS: 'collections';
DB: 'db';
NEW: 'new';
TRUE: 'true';
FALSE: 'false';
NULL: 'null';
GET_COLLECTION: 'getCollection';
GET_COLLECTION_NAMES: 'getCollectionNames';

// Helper function names (recognized as distinct tokens)
OBJECT_ID: 'ObjectId';
ISO_DATE: 'ISODate';
DATE: 'Date';
UUID: 'UUID';
LONG: 'Long';
NUMBER_LONG: 'NumberLong';
INT32: 'Int32';
NUMBER_INT: 'NumberInt';
DOUBLE: 'Double';
DECIMAL128: 'Decimal128';
NUMBER_DECIMAL: 'NumberDecimal';
TIMESTAMP: 'Timestamp';
REG_EXP: 'RegExp';

// Cursor modifiers (methods)
FIND: 'find';
FIND_ONE: 'findOne';
SORT: 'sort';
LIMIT: 'limit';
SKIP_: 'skip';
PROJECTION: 'projection';
PROJECT: 'project';

// Punctuation
LPAREN: '(';
RPAREN: ')';
LBRACE: '{';
RBRACE: '}';
LBRACKET: '[';
RBRACKET: ']';
COLON: ':';
COMMA: ',';
DOT: '.';
SEMI: ';';

// Operators (for query operators like $gt, $lt, etc.)
DOLLAR: '$';

// Comments - must come before REGEX_LITERAL to properly capture /* ... */
LINE_COMMENT
: '//' ~[\r\n]* -> channel(HIDDEN)
;

BLOCK_COMMENT
: '/*' .*? '*/' -> channel(HIDDEN)
;

// Regex literal
REGEX_LITERAL
: '/' REGEX_BODY '/' REGEX_FLAGS?
;

fragment REGEX_BODY
: REGEX_CHAR+
;

fragment REGEX_CHAR
: ~[/\r\n\\]
| '\\' .
;

fragment REGEX_FLAGS
: [gimsuy]+
;

// Numbers
NUMBER
: '-'? INT ('.' [0-9]+)? EXPONENT?
| '-'? '.' [0-9]+ EXPONENT?
;

fragment INT
: '0'
| [1-9] [0-9]*
;

fragment EXPONENT
: [eE] [+-]? [0-9]+
;

// Strings - both single and double quoted
DOUBLE_QUOTED_STRING
: '"' (ESC | ~["\\])* '"'
;

SINGLE_QUOTED_STRING
: '\'' (ESC | ~['\\])* '\''
;

fragment ESC
: '\\' (["\\/bfnrt] | UNICODE | '\'')
;

fragment UNICODE
: 'u' HEX HEX HEX HEX
;

fragment HEX
: [0-9a-fA-F]
;

// Identifiers - for unquoted keys, collection names, method names
// Allows $-prefixed identifiers for MongoDB operators like $gt, $in, etc.
IDENTIFIER
: [$_a-zA-Z] [$_a-zA-Z0-9]*
;

// Whitespace
WS
: [ \t\r\n]+ -> channel(HIDDEN)
;
Loading