From cc48bc56caa79b0d605e80302c32be0457cfde39 Mon Sep 17 00:00:00 2001 From: Jelle Zijlstra Date: Fri, 17 Aug 2018 07:03:58 -0700 Subject: [PATCH] normalize numeric literals (#454) Fixes #452 I ended up making a couple of other normalizations to numeric literals too (lowercase everything, don't allow leading or trailing . in floats, remove redundant + sign in exponent). I don't care too much about those, so I'm happy to change the behavior there. For reference, here is Python's grammar for numeric literals: https://docs.python.org/3/reference/lexical_analysis.html#numeric-literals --- black.py | 59 ++++++++++++++++++++++++++++++ tests/data/numeric_literals.py | 36 ++++++++++++++++++ tests/data/numeric_literals_py2.py | 12 ++++++ tests/test_black.py | 15 ++++++++ 4 files changed, 122 insertions(+) create mode 100644 tests/data/numeric_literals.py create mode 100644 tests/data/numeric_literals_py2.py diff --git a/black.py b/black.py index c9b8be9..7edf2ae 100644 --- a/black.py +++ b/black.py @@ -605,6 +605,7 @@ def format_str( remove_u_prefix=py36 or "unicode_literals" in future_imports, is_pyi=is_pyi, normalize_strings=normalize_strings, + allow_underscores=py36, ) elt = EmptyLineTracker(is_pyi=is_pyi) empty_line = Line() @@ -1391,6 +1392,7 @@ class LineGenerator(Visitor[Line]): normalize_strings: bool = True current_line: Line = Factory(Line) remove_u_prefix: bool = False + allow_underscores: bool = False def line(self, indent: int = 0) -> Iterator[Line]: """Generate a line. @@ -1432,6 +1434,8 @@ class LineGenerator(Visitor[Line]): if self.normalize_strings and node.type == token.STRING: normalize_string_prefix(node, remove_u_prefix=self.remove_u_prefix) normalize_string_quotes(node) + if node.type == token.NUMBER: + normalize_numeric_literal(node, self.allow_underscores) if node.type not in WHITESPACE: self.current_line.append(node) yield from super().visit_default(node) @@ -2493,6 +2497,61 @@ def normalize_string_quotes(leaf: Leaf) -> None: leaf.value = f"{prefix}{new_quote}{new_body}{new_quote}" +def normalize_numeric_literal(leaf: Leaf, allow_underscores: bool) -> None: + """Normalizes numeric (float, int, and complex) literals.""" + # We want all letters (e in exponents, j in complex literals, a-f + # in hex literals) to be lowercase. + text = leaf.value.lower() + if text.startswith(("0o", "0x", "0b")): + # Leave octal, hex, and binary literals alone for now. + pass + elif "e" in text: + before, after = text.split("e") + if after.startswith("-"): + after = after[1:] + sign = "-" + elif after.startswith("+"): + after = after[1:] + sign = "" + else: + sign = "" + before = format_float_or_int_string(before, allow_underscores) + after = format_int_string(after, allow_underscores) + text = f"{before}e{sign}{after}" + # Complex numbers and Python 2 longs + elif "j" in text or "l" in text: + number = text[:-1] + suffix = text[-1] + text = f"{format_float_or_int_string(number, allow_underscores)}{suffix}" + else: + text = format_float_or_int_string(text, allow_underscores) + leaf.value = text + + +def format_float_or_int_string(text: str, allow_underscores: bool) -> str: + """Formats a float string like "1.0".""" + if "." not in text: + return format_int_string(text, allow_underscores) + before, after = text.split(".") + before = format_int_string(before, allow_underscores) if before else "0" + after = format_int_string(after, allow_underscores) if after else "0" + return f"{before}.{after}" + + +def format_int_string(text: str, allow_underscores: bool) -> str: + """Normalizes underscores in a string to e.g. 1_000_000. + + Input must be a string consisting only of digits and underscores. + """ + if not allow_underscores: + return text + text = text.replace("_", "") + if len(text) <= 6: + # No underscores for numbers <= 6 digits long. + return text + return format(int(text), "3_") + + def normalize_invisible_parens(node: Node, parens_after: Set[str]) -> None: """Make existing optional parentheses invisible or create new ones. diff --git a/tests/data/numeric_literals.py b/tests/data/numeric_literals.py new file mode 100644 index 0000000..8999b9d --- /dev/null +++ b/tests/data/numeric_literals.py @@ -0,0 +1,36 @@ +#!/usr/bin/env python3.6 + +x = 123456789 +x = 123456 +x = .1 +x = 1. +x = 1E+1 +x = 1E-1 +x = 123456789.123456789 +x = 123456789.123456789E123456789 +x = 123456789E123456789 +x = 123456789J +x = 123456789.123456789J +x = 0XB1ACC +x = 0B1011 +x = 0O777 + +# output + + +#!/usr/bin/env python3.6 + +x = 123_456_789 +x = 123456 +x = 0.1 +x = 1.0 +x = 1e1 +x = 1e-1 +x = 123_456_789.123_456_789 +x = 123_456_789.123_456_789e123_456_789 +x = 123_456_789e123_456_789 +x = 123_456_789j +x = 123_456_789.123_456_789j +x = 0xb1acc +x = 0b1011 +x = 0o777 diff --git a/tests/data/numeric_literals_py2.py b/tests/data/numeric_literals_py2.py new file mode 100644 index 0000000..107c39b --- /dev/null +++ b/tests/data/numeric_literals_py2.py @@ -0,0 +1,12 @@ +#!/usr/bin/env python2.7 + +x = 123456789L +x = 123456789 + +# output + + +#!/usr/bin/env python2.7 + +x = 123456789l +x = 123456789 diff --git a/tests/test_black.py b/tests/test_black.py index 02bad20..884d046 100644 --- a/tests/test_black.py +++ b/tests/test_black.py @@ -373,6 +373,21 @@ class BlackTestCase(unittest.TestCase): black.assert_equivalent(source, actual) black.assert_stable(source, actual, line_length=ll) + @patch("black.dump_to_file", dump_to_stderr) + def test_numeric_literals(self) -> None: + source, expected = read_data("numeric_literals") + actual = fs(source, mode=black.FileMode.PYTHON36) + self.assertFormatEqual(expected, actual) + black.assert_equivalent(source, actual) + black.assert_stable(source, actual, line_length=ll) + + @patch("black.dump_to_file", dump_to_stderr) + def test_numeric_literals_py2(self) -> None: + source, expected = read_data("numeric_literals_py2") + actual = fs(source) + self.assertFormatEqual(expected, actual) + black.assert_stable(source, actual, line_length=ll) + @patch("black.dump_to_file", dump_to_stderr) def test_python2(self) -> None: source, expected = read_data("python2") -- 2.39.5