/* Apache V2 usage with no significant modifications
   Source: https://github.com/openqasm/openqasm/blob/429781bb9c95ef15944861f306ac6b9e4ff0abf0/source/grammar/qasm3Lexer.g4
   License: https://github.com/openqasm/openqasm/blob/main/LICENSE
*/

lexer grammar qasm3Lexer;

/* Naming conventions in this lexer grammar
 *
 * - Keywords and exact symbols that have only one possible value are written in
 *   all caps.  There is no more information in the parsed text than in the name
 *   of the lexeme.  For example, `INCLUDE` is only ever the string `'include'`.
 *
 * - Lexemes with information in the string form are in PascalCase.  This
 *   indicates there is more information in the token than just the name.  For
 *   example, `Identifier` has a payload containing the name of the identifier.
 */

/* Language keywords. */

OPENQASM: 'OPENQASM' -> pushMode(VERSION_IDENTIFIER);
INCLUDE: 'include';
DEFCALGRAMMAR: 'defcalgrammar';
DEF: 'def';
DEFCAL: 'defcal';
GATE: 'gate';
EXTERN: 'extern';
BOX: 'box';
LET: 'let';

BREAK: 'break';
CONTINUE: 'continue';
IF: 'if';
ELSE: 'else';
END: 'end';
RETURN: 'return';
FOR: 'for';
WHILE: 'while';
IN: 'in';

PRAGMA: '#'? 'pragma' -> pushMode(EAT_TO_LINE_END);
AnnotationKeyword: '@' Identifier ->  pushMode(EAT_TO_LINE_END);


/* Types. */

INPUT: 'input';
OUTPUT: 'output';
CONST: 'const';
MUTABLE: 'mutable';

QREG: 'qreg';
QUBIT: 'qubit';

CREG: 'creg';
BOOL: 'bool';
BIT: 'bit';
INT: 'int';
UINT: 'uint';
FLOAT: 'float';
ANGLE: 'angle';
COMPLEX: 'complex';
ARRAY: 'array';

DURATION:  'duration';
STRETCH: 'stretch';


/* Builtin identifiers and operations */

GPHASE: 'gphase';
INV: 'inv';
POW: 'pow';
CTRL: 'ctrl';
NEGCTRL: 'negctrl';

DIM: '#dim';

DURATIONOF: 'durationof';

DELAY: 'delay';
RESET: 'reset';
MEASURE: 'measure';
BARRIER: 'barrier';

BooleanLiteral: 'true' | 'false';


/* Symbols */

LBRACKET: '[';
RBRACKET: ']';
LBRACE: '{';
RBRACE: '}';
LPAREN: '(';
RPAREN: ')';

COLON: ':';
SEMICOLON: ';';

DOT: '.';
COMMA: ',';

EQUALS: '=';
ARROW: '->';
PLUS: '+';
DOUBLE_PLUS: '++';
MINUS: '-';
ASTERISK: '*';
DOUBLE_ASTERISK: '**';
SLASH: '/';
PERCENT: '%';
PIPE: '|';
DOUBLE_PIPE: '||';
AMPERSAND: '&';
DOUBLE_AMPERSAND: '&&';
CARET: '^';
AT: '@';
TILDE: '~';
EXCLAMATION_POINT: '!';

EqualityOperator: '==' | '!=';
CompoundAssignmentOperator: '+=' | '-=' | '*=' | '/=' | '&=' | '|=' | '~=' | '^=' | '<<=' | '>>=' | '%=' | '**=';
ComparisonOperator: '>' | '<' | '>=' | '<=';
BitshiftOperator: '>>' | '<<';

IMAG: 'im';
ImaginaryLiteral: (DecimalIntegerLiteral | FloatLiteral) ' '* IMAG;

BinaryIntegerLiteral: ('0b' | '0B') ([01] '_'?)* [01];
OctalIntegerLiteral: '0o' ([0-7] '_'?)* [0-7];
DecimalIntegerLiteral: ([0-9] '_'?)* [0-9];
HexIntegerLiteral: ('0x' | '0X') ([0-9a-fA-F] '_'?)* [0-9a-fA-F];

fragment ValidUnicode: [\p{Lu}\p{Ll}\p{Lt}\p{Lm}\p{Lo}\p{Nl}]; // valid unicode chars
fragment Letter: [A-Za-z];
fragment FirstIdCharacter: '_' | ValidUnicode | Letter;
fragment GeneralIdCharacter: FirstIdCharacter | [0-9];

Identifier: FirstIdCharacter GeneralIdCharacter*;
HardwareQubit: '$' [0-9]+;

fragment FloatLiteralExponent: [eE] (PLUS | MINUS)? DecimalIntegerLiteral;
FloatLiteral:
    // 1_123e-3, 123e+4 or 123E5 (needs the exponent or it's just an integer)
    DecimalIntegerLiteral FloatLiteralExponent
    // .1234_5678 or .1e3 (no digits before the dot)
    | DOT DecimalIntegerLiteral FloatLiteralExponent?
    // 123.456, 123. or 145.32e+1_00
    | DecimalIntegerLiteral DOT DecimalIntegerLiteral? FloatLiteralExponent?;

fragment TimeUnit: 'dt' | 'ns' | 'us' | 'µs' | 'ms' | 's';
// represents explicit time value in SI or backend units
TimingLiteral: (DecimalIntegerLiteral | FloatLiteral) TimeUnit;


BitstringLiteral: '"' ([01] '_'?)* [01] '"';
// allow ``"str"`` and ``'str'``
StringLiteral
    : '"' ~["\r\t\n]+? '"'
    | '\'' ~['\r\t\n]+? '\''
    ;

// Ignore whitespace between tokens, and define C++-style comments.
Whitespace: [ \t]+ -> skip ;
Newline: [\r\n]+ -> skip ;
LineComment : '//' ~[\r\n]* -> skip;
BlockComment : '/*' .*? '*/' -> skip;


// The version identifier token would be ambiguous between itself and
// integer/floating-point literals, so we use a special mode to ensure it's
// lexed correctly.
mode VERSION_IDENTIFIER;
    VERSION_IDENTIFER_WHITESPACE: [ \t\r\n]+ -> skip;
    VersionSpecifier: [0-9]+ ('.' [0-9]+)? -> popMode;


// A different lexer mode to swap to when we need handle tokens on a line basis
// rather than the default arbitrary-whitespace-based tokenisation.  This is
// used by the annotation and pragma rules.
mode EAT_TO_LINE_END;
    EAT_INITIAL_SPACE: [ \t]+ -> skip;
    EAT_LINE_END: [\r\n] -> popMode, skip;

    // The line content must be a non-empty token to satisfy ANTLR (otherwise it
    // would be able to produce an infinite number of tokens).  We could include
    // the line ending to guarantee that this is always non-empty, but that just
    // puts an annoying burden on consumers to remove it again.
    RemainingLineContent: ~[ \t\r\n] ~[\r\n]*;