diff --git a/ast35/Include/Python-ast.h b/ast35/Include/Python-ast.h index 6b070e69..94e9b5eb 100644 --- a/ast35/Include/Python-ast.h +++ b/ast35/Include/Python-ast.h @@ -304,6 +304,7 @@ struct _expr { struct { object n; + int contains_underscores; } Num; struct { @@ -570,8 +571,9 @@ expr_ty _Ta35_Compare(expr_ty left, asdl_int_seq * ops, asdl_seq * comparators, #define Call(a0, a1, a2, a3, a4, a5) _Ta35_Call(a0, a1, a2, a3, a4, a5) expr_ty _Ta35_Call(expr_ty func, asdl_seq * args, asdl_seq * keywords, int lineno, int col_offset, PyArena *arena); -#define Num(a0, a1, a2, a3) _Ta35_Num(a0, a1, a2, a3) -expr_ty _Ta35_Num(object n, int lineno, int col_offset, PyArena *arena); +#define Num(a0, a1, a2, a3, a4) _Ta35_Num(a0, a1, a2, a3, a4) +expr_ty _Ta35_Num(object n, int contains_underscores, int lineno, int + col_offset, PyArena *arena); #define Str(a0, a1, a2, a3) _Ta35_Str(a0, a1, a2, a3) expr_ty _Ta35_Str(string s, int lineno, int col_offset, PyArena *arena); #define Bytes(a0, a1, a2, a3) _Ta35_Bytes(a0, a1, a2, a3) diff --git a/ast35/Parser/Python.asdl b/ast35/Parser/Python.asdl index c59a551f..190ee317 100644 --- a/ast35/Parser/Python.asdl +++ b/ast35/Parser/Python.asdl @@ -70,7 +70,9 @@ module Python -- x < 4 < 3 and (x < 4) < 3 | Compare(expr left, cmpop* ops, expr* comparators) | Call(expr func, expr* args, keyword* keywords) - | Num(object n) -- a number as a PyObject. + -- contains_underscores is not part of standard Python ASDL + -- and exists here to signal that a Python 3.6 feature was used + | Num(object n, int? contains_underscores) -- a number as a PyObject. | Str(string s) -- need to specify raw, unicode, etc? | Bytes(bytes s) | NameConstant(singleton value) diff --git a/ast35/Parser/tokenizer.c b/ast35/Parser/tokenizer.c index a23546ed..de122585 100644 --- a/ast35/Parser/tokenizer.c +++ b/ast35/Parser/tokenizer.c @@ -1343,6 +1343,27 @@ verify_identifier(struct tok_state *tok) } #endif +static int +tok_decimal_tail(struct tok_state *tok) +{ + int c; + while (1) { + do { + c = tok_nextc(tok); + } while (isdigit(c)); + if (c != '_') { + break; + } + c = tok_nextc(tok); + if (!isdigit(c)) { + tok->done = E_TOKEN; + tok_backup(tok, c); + return 0; + } + } + return c; +} + /* Get next token, after space stripping etc. */ static int @@ -1644,64 +1665,88 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end) if (c == '0') { /* Hex, octal or binary -- maybe. */ c = tok_nextc(tok); - if (c == '.') - goto fraction; - if (c == 'j' || c == 'J') - goto imaginary; if (c == 'x' || c == 'X') { - /* Hex */ c = tok_nextc(tok); - if (!isxdigit(c)) { - tok->done = E_TOKEN; - tok_backup(tok, c); - return ERRORTOKEN; - } do { - c = tok_nextc(tok); - } while (isxdigit(c)); + if (c == '_') + c = tok_nextc(tok); + if (!isxdigit(c)) { + tok->done = E_TOKEN; + tok_backup(tok, c); + return ERRORTOKEN; + } + do { + c = tok_nextc(tok); + } while (isxdigit(c)); + } while (c == '_'); } else if (c == 'o' || c == 'O') { /* Octal */ c = tok_nextc(tok); - if (c < '0' || c >= '8') { - tok->done = E_TOKEN; - tok_backup(tok, c); - return ERRORTOKEN; - } do { - c = tok_nextc(tok); - } while ('0' <= c && c < '8'); + if (c == '_') + c = tok_nextc(tok); + if (c < '0' || c >= '8') { + tok->done = E_TOKEN; + tok_backup(tok, c); + return ERRORTOKEN; + } + do { + c = tok_nextc(tok); + } while ('0' <= c && c < '8'); + } while (c == '_'); } else if (c == 'b' || c == 'B') { /* Binary */ c = tok_nextc(tok); - if (c != '0' && c != '1') { - tok->done = E_TOKEN; - tok_backup(tok, c); - return ERRORTOKEN; - } do { - c = tok_nextc(tok); - } while (c == '0' || c == '1'); + if (c == '_') + c = tok_nextc(tok); + if (c != '0' && c != '1') { + tok->done = E_TOKEN; + tok_backup(tok, c); + return ERRORTOKEN; + } + do { + c = tok_nextc(tok); + } while (c == '0' || c == '1'); + } while (c == '_'); } else { int nonzero = 0; /* maybe old-style octal; c is first char of it */ /* in any case, allow '0' as a literal */ - while (c == '0') + while (1) { + if (c == '_') { + c = tok_nextc(tok); + if (!isdigit(c)) { + tok->done = E_TOKEN; + tok_backup(tok, c); + return ERRORTOKEN; + } + } + if (c != '0') + break; c = tok_nextc(tok); - while (isdigit(c)) { + } + if (isdigit(c)) { nonzero = 1; - c = tok_nextc(tok); + c = tok_decimal_tail(tok); + if (c == 0) { + return ERRORTOKEN; + } } - if (c == '.') + if (c == '.') { + c = tok_nextc(tok); goto fraction; + } else if (c == 'e' || c == 'E') goto exponent; else if (c == 'j' || c == 'J') goto imaginary; else if (nonzero) { + /* Old-style octal: now disallowed. */ tok->done = E_TOKEN; tok_backup(tok, c); return ERRORTOKEN; @@ -1710,17 +1755,22 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end) } else { /* Decimal */ - do { - c = tok_nextc(tok); - } while (isdigit(c)); + c = tok_decimal_tail(tok); + if (c == 0) { + return ERRORTOKEN; + } { /* Accept floating point numbers. */ if (c == '.') { + c = tok_nextc(tok); fraction: /* Fraction */ - do { - c = tok_nextc(tok); - } while (isdigit(c)); + if (isdigit(c)) { + c = tok_decimal_tail(tok); + if (c == 0) { + return ERRORTOKEN; + } + } } if (c == 'e' || c == 'E') { int e; @@ -1742,9 +1792,10 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end) *p_end = tok->cur; return NUMBER; } - do { - c = tok_nextc(tok); - } while (isdigit(c)); + c = tok_decimal_tail(tok); + if (c == 0) { + return ERRORTOKEN; + } } if (c == 'j' || c == 'J') /* Imaginary part */ diff --git a/ast35/Python/Python-ast.c b/ast35/Python/Python-ast.c index 45cf4bb6..ce80939e 100644 --- a/ast35/Python/Python-ast.c +++ b/ast35/Python/Python-ast.c @@ -293,8 +293,10 @@ static char *Call_fields[]={ }; static PyTypeObject *Num_type; _Py_IDENTIFIER(n); +_Py_IDENTIFIER(contains_underscores); static char *Num_fields[]={ "n", + "contains_underscores", }; static PyTypeObject *Str_type; _Py_IDENTIFIER(s); @@ -937,7 +939,7 @@ static int init_types(void) if (!Compare_type) return 0; Call_type = make_type("Call", expr_type, Call_fields, 3); if (!Call_type) return 0; - Num_type = make_type("Num", expr_type, Num_fields, 1); + Num_type = make_type("Num", expr_type, Num_fields, 2); if (!Num_type) return 0; Str_type = make_type("Str", expr_type, Str_fields, 1); if (!Str_type) return 0; @@ -2077,7 +2079,8 @@ Call(expr_ty func, asdl_seq * args, asdl_seq * keywords, int lineno, int } expr_ty -Num(object n, int lineno, int col_offset, PyArena *arena) +Num(object n, int contains_underscores, int lineno, int col_offset, PyArena + *arena) { expr_ty p; if (!n) { @@ -2090,6 +2093,7 @@ Num(object n, int lineno, int col_offset, PyArena *arena) return NULL; p->kind = Num_kind; p->v.Num.n = n; + p->v.Num.contains_underscores = contains_underscores; p->lineno = lineno; p->col_offset = col_offset; return p; @@ -3267,6 +3271,12 @@ ast2obj_expr(void* _o) if (_PyObject_SetAttrId(result, &PyId_n, value) == -1) goto failed; Py_DECREF(value); + value = ast2obj_int(o->v.Num.contains_underscores); + if (!value) goto failed; + if (_PyObject_SetAttrId(result, &PyId_contains_underscores, value) == + -1) + goto failed; + Py_DECREF(value); break; case Str_kind: result = PyType_GenericNew(Str_type, NULL, NULL); @@ -6267,6 +6277,7 @@ obj2ast_expr(PyObject* obj, expr_ty* out, PyArena* arena) } if (isinstance) { object n; + int contains_underscores; if (_PyObject_HasAttrId(obj, &PyId_n)) { int res; @@ -6279,7 +6290,17 @@ obj2ast_expr(PyObject* obj, expr_ty* out, PyArena* arena) PyErr_SetString(PyExc_TypeError, "required field \"n\" missing from Num"); return 1; } - *out = Num(n, lineno, col_offset, arena); + if (exists_not_none(obj, &PyId_contains_underscores)) { + int res; + tmp = _PyObject_GetAttrId(obj, &PyId_contains_underscores); + if (tmp == NULL) goto failed; + res = obj2ast_int(tmp, &contains_underscores, arena); + if (res != 0) goto failed; + Py_CLEAR(tmp); + } else { + contains_underscores = 0; + } + *out = Num(n, contains_underscores, lineno, col_offset, arena); if (*out == NULL) goto failed; return 0; } diff --git a/ast35/Python/ast.c b/ast35/Python/ast.c index 1340717e..d9b632c9 100644 --- a/ast35/Python/ast.c +++ b/ast35/Python/ast.c @@ -2204,7 +2204,9 @@ ast_for_atom(struct compiling *c, const node *n) return Str(str, LINENO(n), n->n_col_offset, c->c_arena); } case NUMBER: { - PyObject *pynum = parsenumber(c, STR(ch)); + const char *s = STR(ch); + int contains_underscores = strchr(s, '_') != NULL; + PyObject *pynum = parsenumber(c, s); if (!pynum) return NULL; @@ -2212,7 +2214,8 @@ ast_for_atom(struct compiling *c, const node *n) Py_DECREF(pynum); return NULL; } - return Num(pynum, LINENO(n), n->n_col_offset, c->c_arena); + return Num(pynum, contains_underscores, LINENO(n), + n->n_col_offset, c->c_arena); } case ELLIPSIS: /* Ellipsis */ return Ellipsis(LINENO(n), n->n_col_offset, c->c_arena); @@ -4124,7 +4127,7 @@ ast_for_stmt(struct compiling *c, const node *n) } static PyObject * -parsenumber(struct compiling *c, const char *s) +parsenumber_raw(struct compiling *c, const char *s) { const char *end; long x; @@ -4166,6 +4169,31 @@ parsenumber(struct compiling *c, const char *s) } } +static PyObject * +parsenumber(struct compiling *c, const char *s) +{ + char *dup, *end; + PyObject *res = NULL; + + assert(s != NULL); + + if (strchr(s, '_') == NULL) { + return parsenumber_raw(c, s); + } + /* Create a duplicate without underscores. */ + dup = PyMem_Malloc(strlen(s) + 1); + end = dup; + for (; *s; s++) { + if (*s != '_') { + *end++ = *s; + } + } + *end = '\0'; + res = parsenumber_raw(c, dup); + PyMem_Free(dup); + return res; +} + static PyObject * decode_utf8(struct compiling *c, const char **sPtr, const char *end) {