This repository was archived by the owner on Mar 6, 2026. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 324
feat: add custom cell magic parser #213
Merged
Merged
Changes from 5 commits
Commits
Show all changes
20 commits
Select commit
Hold shift + click to select a range
c5e63c6
chore: Move cell magic code into its own directory
plamut 199974f
Add custom argument parser for cell magic
plamut d3525d4
Add AST node visitor
plamut 5690b81
Use a custom parser for cell magic arguments
plamut 976567c
Improve cell magic parser test coverage
plamut f57c6cd
Generalize valid option values
plamut c7e4420
Fix recognizing --params option in state 3
plamut 90c504e
Fix typo in comment
plamut 3b94a55
Merge branch 'master' into fix-cellmagic-parser
plamut 5249c6a
Cover missing parser code path with a test
plamut 83218a2
Merge branch 'master' into fix-cellmagic-parser
plamut 2c74ec8
Preserve the cell magic context's import path
plamut 19af056
Clarify lexer states
plamut 4cbf7f2
Replace re.scanner with finditer()
plamut 64f19b5
Fix typo in docstring
plamut 5d10d36
Simplify string literal in a single line
plamut 46d1def
Explain the visitors module.
plamut ed01a66
Merge branch 'master' into fix-cellmagic-parser
plamut d9dd3e0
Pass pos as a positional arg to finditer()
plamut 415c608
Resolve coverage complaint about a code path
plamut File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,5 +1,5 @@ | ||
| IPython Magics for BigQuery | ||
| =========================== | ||
|
|
||
| .. automodule:: google.cloud.bigquery.magics | ||
| .. automodule:: google.cloud.bigquery.ipython_magics.magics | ||
| :members: | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,13 @@ | ||
| # Copyright 2020 Google LLC | ||
|
plamut marked this conversation as resolved.
|
||
| # | ||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
36 changes: 36 additions & 0 deletions
36
google/cloud/bigquery/ipython_magics/line_arg_parser/__init__.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,36 @@ | ||
| # Copyright 2020 Google LLC | ||
| # | ||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
|
|
||
| from google.cloud.bigquery.ipython_magics.line_arg_parser.exceptions import ParseError | ||
| from google.cloud.bigquery.ipython_magics.line_arg_parser.exceptions import ( | ||
| DuplicateQueryParamsError, | ||
| QueryParamsParseError, | ||
| ) | ||
| from google.cloud.bigquery.ipython_magics.line_arg_parser.lexer import Lexer | ||
| from google.cloud.bigquery.ipython_magics.line_arg_parser.lexer import TokenType | ||
| from google.cloud.bigquery.ipython_magics.line_arg_parser.parser import Parser | ||
| from google.cloud.bigquery.ipython_magics.line_arg_parser.visitors import ( | ||
| QueryParamsExtractor, | ||
| ) | ||
|
|
||
|
|
||
| __all__ = ( | ||
| "DuplicateQueryParamsError", | ||
| "Lexer", | ||
| "Parser", | ||
| "ParseError", | ||
| "QueryParamsExtractor", | ||
| "QueryParamsParseError", | ||
| "TokenType", | ||
| ) |
25 changes: 25 additions & 0 deletions
25
google/cloud/bigquery/ipython_magics/line_arg_parser/exceptions.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,25 @@ | ||
| # Copyright 2020 Google LLC | ||
| # | ||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
|
|
||
|
|
||
| class ParseError(Exception): | ||
| pass | ||
|
|
||
|
|
||
| class QueryParamsParseError(ParseError): | ||
| """Raised when --params option is syntactically incorrect.""" | ||
|
|
||
|
|
||
| class DuplicateQueryParamsError(ParseError): | ||
| pass |
252 changes: 252 additions & 0 deletions
252
google/cloud/bigquery/ipython_magics/line_arg_parser/lexer.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,252 @@ | ||
| # Copyright 2020 Google LLC | ||
|
plamut marked this conversation as resolved.
|
||
| # | ||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
|
|
||
| from collections import namedtuple | ||
| from collections import OrderedDict | ||
| import itertools | ||
| import re | ||
|
|
||
| import enum | ||
|
|
||
|
|
||
| Token = namedtuple("Token", ("type_", "lexeme", "pos")) | ||
| StateTransition = namedtuple("StateTransition", ("new_state", "total_offset")) | ||
|
|
||
|
|
||
| # Token definition order is important, thus an OrderedDict is needed with tightly | ||
| # controlled member definitions (i.e. passed as a sequnce, and *not* via kwargs). | ||
|
plamut marked this conversation as resolved.
Outdated
|
||
| token_types = OrderedDict( | ||
| [ | ||
| ( | ||
| "state_1", | ||
|
plamut marked this conversation as resolved.
Outdated
|
||
| OrderedDict( | ||
| [ | ||
| ( | ||
| "GOTO_STATE_2", | ||
| r"(?P<GOTO_STATE_2>(?=--))", # double dash - starting the options list | ||
| ), | ||
| ( | ||
| "DEST_VAR", | ||
| r"(?P<DEST_VAR>[^\d\W]\w*)", # essentially a Python ID | ||
| ), | ||
| ] | ||
| ), | ||
| ), | ||
| ( | ||
| "state_2", | ||
| OrderedDict( | ||
| [ | ||
| ( | ||
| "GOTO_STATE_3", | ||
| r"(?P<GOTO_STATE_3>(?=--params(?:\s|=|$)))", # the --params option | ||
| ), | ||
| ("OPTION_SPEC", r"(?P<OPTION_SPEC>--\w+)"), | ||
| ("OPTION_EQ", r"(?P<OPTION_EQ>=)"), | ||
| # NOTE: Currently the only valid value for a non "--params" option is | ||
| # either a project/table name or an integer (e.g. max_results). | ||
| ("OPT_VAL", r"(?P<OPT_VAL>(?:\w|\.|-(?!-))+)"), | ||
|
plamut marked this conversation as resolved.
Outdated
|
||
| ] | ||
| ), | ||
| ), | ||
| ( | ||
| "state_3", | ||
| OrderedDict( | ||
| [ | ||
| ( | ||
| "PY_STRING", | ||
| r"(?P<PY_STRING>(?:{})|(?:{}))".format( | ||
| r"'(?:[^'\\]|\.)*'", | ||
| r'"(?:[^"\\]|\.)*"', # single and double quoted strings | ||
| ), | ||
| ), | ||
| ("PARAMS_OPT_SPEC", r"(?P<PARAMS_OPT_SPEC>--params)"), | ||
| ("PARAMS_OPT_EQ", r"(?P<PARAMS_OPT_EQ>=)"), | ||
| ( | ||
| "GOTO_STATE_2", | ||
| r"(?P<GOTO_STATE_2>(?=--\w+))", # found another option spec | ||
| ), | ||
| ("PY_BOOL", r"(?P<PY_BOOL>True|False)"), | ||
| ("DOLLAR_PY_ID", r"(?P<DOLLAR_PY_ID>\$[^\d\W]\w*)"), | ||
| ( | ||
| "PY_NUMBER", | ||
| r"(?P<PY_NUMBER>-?[1-9]\d*(?:\.\d+)?(:?[e|E][+-]?\d+)?)", | ||
| ), | ||
| ("SQUOTE", r"(?P<SQUOTE>')"), | ||
| ("DQUOTE", r'(?P<DQUOTE>")'), | ||
| ("COLON", r"(?P<COLON>:)"), | ||
| ("COMMA", r"(?P<COMMA>,)"), | ||
| ("LCURL", r"(?P<LCURL>\{)"), | ||
| ("RCURL", r"(?P<RCURL>})"), | ||
| ("LSQUARE", r"(?P<LSQUARE>\[)"), | ||
| ("RSQUARE", r"(?P<RSQUARE>])"), | ||
| ("LPAREN", r"(?P<LPAREN>\()"), | ||
| ("RPAREN", r"(?P<RPAREN>\))"), | ||
| ] | ||
| ), | ||
| ), | ||
| ( | ||
| "common", | ||
| OrderedDict( | ||
| [ | ||
| ("WS", r"(?P<WS>\s+)"), | ||
| ("EOL", r"(?P<EOL>$)"), | ||
| ( | ||
| # anything not a whitespace or matched by something else | ||
| "UNKNOWN", | ||
| r"(?P<UNKNOWN>\S+)", | ||
| ), | ||
| ] | ||
| ), | ||
| ), | ||
| ] | ||
| ) | ||
|
|
||
|
|
||
| # The _generate_next_value_() enum hook is only available in Python 3.6+, thus we | ||
| # need to do some acrobatics to implement an "auto str enum" base class. Implementation | ||
| # based on the recipe provided by the very author of the Enum library: | ||
| # https://stackoverflow.com/a/32313954/5040035 | ||
| class StrEnumMeta(enum.EnumMeta): | ||
| @classmethod | ||
| def __prepare__(metacls, name, bases, **kwargs): | ||
| # Having deterministic enum members definition order is nice. | ||
| return OrderedDict() | ||
|
|
||
| def __new__(metacls, name, bases, oldclassdict): | ||
| # Scan through the declared enum members and convert any value that is a plain | ||
| # empty tuple into a `str` of the name instead. | ||
| newclassdict = enum._EnumDict() | ||
| for key, val in oldclassdict.items(): | ||
| if val == (): | ||
| val = key | ||
| newclassdict[key] = val | ||
| return super(StrEnumMeta, metacls).__new__(metacls, name, bases, newclassdict) | ||
|
|
||
|
|
||
| # The @six.add_metaclass decorator does not work, Enum complains about _sunder_ names, | ||
| # and we cannot use class syntax directly, because the Python 3 version would cause | ||
| # a syntax error under Python 2. | ||
| AutoStrEnum = StrEnumMeta( | ||
| "AutoStrEnum", | ||
| (str, enum.Enum), | ||
| {"__doc__": "Base enum class for for name=value str enums."}, | ||
| ) | ||
|
|
||
| TokenType = AutoStrEnum( | ||
| "TokenType", | ||
| [ | ||
| (name, name) | ||
| for name in itertools.chain.from_iterable(token_types.values()) | ||
| if not name.startswith("GOTO_STATE") | ||
| ], | ||
| ) | ||
|
|
||
|
|
||
| class LexerState(AutoStrEnum): | ||
| STATE_1 = () # parsing positional arguments | ||
| STATE_2 = () # parsing options other than "--params" | ||
| STATE_3 = () # parsing the "--params" option | ||
| STATE_END = () | ||
|
|
||
|
|
||
| class Lexer(object): | ||
| """Lexical analyzer for tokenizing the cell magic input line.""" | ||
|
|
||
| _GRAND_PATTERNS = { | ||
| LexerState.STATE_1: re.compile( | ||
| "|".join( | ||
| itertools.chain( | ||
| token_types["state_1"].values(), token_types["common"].values(), | ||
| ) | ||
| ) | ||
| ), | ||
| LexerState.STATE_2: re.compile( | ||
| "|".join( | ||
| itertools.chain( | ||
| token_types["state_2"].values(), token_types["common"].values(), | ||
| ) | ||
| ) | ||
| ), | ||
| LexerState.STATE_3: re.compile( | ||
| "|".join( | ||
| itertools.chain( | ||
| token_types["state_3"].values(), token_types["common"].values(), | ||
| ) | ||
| ) | ||
| ), | ||
| } | ||
|
|
||
| def __init__(self, input_text): | ||
| self._text = input_text | ||
|
|
||
| def __iter__(self): | ||
| # Since re.scanner does not seem to support manipulating inner scanner states, | ||
| # we need to implement lexer state transitions manually using special | ||
| # non-capturing lookahead token patterns to signal when a state transition | ||
| # should be made. | ||
| # Since we don't have "nested" states, we don't really need a stack and | ||
| # this simple mechanism is sufficient. | ||
| state = LexerState.STATE_1 | ||
| offset = 0 # the number of characters processed so far | ||
|
|
||
| while state != LexerState.STATE_END: | ||
| token_generator = self._get_state_token_generator(state, offset) | ||
|
|
||
| for maybe_token in token_generator: # pragma: NO COVER | ||
| if isinstance(maybe_token, StateTransition): | ||
| state = maybe_token.new_state | ||
| offset = maybe_token.total_offset | ||
| break | ||
|
|
||
| if maybe_token.type_ != TokenType.WS: | ||
| yield maybe_token | ||
|
|
||
| if maybe_token.type_ == TokenType.EOL: | ||
| state = LexerState.STATE_END | ||
| break | ||
|
|
||
| def _get_state_token_generator(self, state, current_offset): | ||
| """Return token generator for the current state starting at ``current_offset``. | ||
|
|
||
| Args: | ||
| state (LexerState): The current lexer state. | ||
| current_offset (int): The offset in the input text, i.e. the number | ||
| of characters already scanned so far. | ||
|
|
||
| Returns: | ||
| A generator yielding ``Token`` and ``StateTransition`` instances. | ||
| """ | ||
| pattern = self._GRAND_PATTERNS[state] | ||
| scanner = pattern.scanner(self._text, current_offset) | ||
| return self._scan_for_tokens(scanner) | ||
|
|
||
| def _scan_for_tokens(self, scanner): | ||
| """Yield tokens produced by the scanner or state transition objects. | ||
|
|
||
| Args: | ||
| scanner (SRE_Scanner): The text tokenizer. | ||
|
|
||
| Yields: | ||
| The next ``Token`` or ``StateTransition`` instance. | ||
| """ | ||
| for match in iter(scanner.match, None): # pragma: NO COVER | ||
| token_type = match.lastgroup | ||
|
|
||
| if token_type.startswith("GOTO_STATE"): | ||
| yield StateTransition( | ||
| new_state=getattr(LexerState, token_type[5:]), # w/o "GOTO_" prefix | ||
| total_offset=match.start(), | ||
| ) | ||
|
|
||
| yield Token(token_type, match.group(), match.start()) | ||
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.