]> git.madduck.net Git - etc/vim.git/commitdiff

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

Blacken .py files in blib2to3 (#1011)
authorMichael J. Sullivan <sully@msully.net>
Sun, 20 Oct 2019 13:55:31 +0000 (06:55 -0700)
committerŁukasz Langa <lukasz@langa.pl>
Sun, 20 Oct 2019 13:55:31 +0000 (15:55 +0200)
* Blacken .py files in blib2to3

This is in preparation for adding type annotations to blib2to3 in
order to compiling it with mypyc (#1009, which I can rebase on top of
this).

To enforce that it stays blackened, I just cargo-culted the existing
test code used for validating formatting. It feels pretty clunky now,
though, so I can abstract the common logic out into a helper if that
seems better. (But error messages might be less clear then?)

* Tidy up the tests

12 files changed:
blib2to3/__init__.py
blib2to3/pgen2/conv.py
blib2to3/pgen2/driver.py
blib2to3/pgen2/grammar.py
blib2to3/pgen2/literals.py
blib2to3/pgen2/parse.py
blib2to3/pgen2/pgen.py
blib2to3/pgen2/token.py
blib2to3/pgen2/tokenize.py
blib2to3/pygram.py
blib2to3/pytree.py
tests/test_black.py

index ea30561d839798e1ef284fb70adc009fda12db16..1bb8bf6d7fd4c8d09aea89b47de20fb8bbb61626 100644 (file)
@@ -1 +1 @@
-#empty
+# empty
index dd70752722fc43050b570f527df69d3fa653960c..728b152655c5fa50ba952c9ee6fb485599d218db 100644 (file)
@@ -70,8 +70,7 @@ class Converter(grammar.Grammar):
             lineno += 1
             mo = re.match(r"^#define\s+(\w+)\s+(\d+)$", line)
             if not mo and line.strip():
-                print("%s(%s): can't parse %s" % (filename, lineno,
-                                                  line.strip()))
+                print("%s(%s): can't parse %s" % (filename, lineno, line.strip()))
             else:
                 symbol, number = mo.groups()
                 number = int(number)
@@ -118,39 +117,38 @@ class Converter(grammar.Grammar):
         lineno = 0
 
         # Expect the two #include lines
-        lineno, line = lineno+1, next(f)
+        lineno, line = lineno + 1, next(f)
         assert line == '#include "pgenheaders.h"\n', (lineno, line)
-        lineno, line = lineno+1, next(f)
+        lineno, line = lineno + 1, next(f)
         assert line == '#include "grammar.h"\n', (lineno, line)
 
         # Parse the state definitions
-        lineno, line = lineno+1, next(f)
+        lineno, line = lineno + 1, next(f)
         allarcs = {}
         states = []
         while line.startswith("static arc "):
             while line.startswith("static arc "):
-                mo = re.match(r"static arc arcs_(\d+)_(\d+)\[(\d+)\] = {$",
-                              line)
+                mo = re.match(r"static arc arcs_(\d+)_(\d+)\[(\d+)\] = {$", line)
                 assert mo, (lineno, line)
                 n, m, k = list(map(int, mo.groups()))
                 arcs = []
                 for _ in range(k):
-                    lineno, line = lineno+1, next(f)
+                    lineno, line = lineno + 1, next(f)
                     mo = re.match(r"\s+{(\d+), (\d+)},$", line)
                     assert mo, (lineno, line)
                     i, j = list(map(int, mo.groups()))
                     arcs.append((i, j))
-                lineno, line = lineno+1, next(f)
+                lineno, line = lineno + 1, next(f)
                 assert line == "};\n", (lineno, line)
                 allarcs[(n, m)] = arcs
-                lineno, line = lineno+1, next(f)
+                lineno, line = lineno + 1, next(f)
             mo = re.match(r"static state states_(\d+)\[(\d+)\] = {$", line)
             assert mo, (lineno, line)
             s, t = list(map(int, mo.groups()))
             assert s == len(states), (lineno, line)
             state = []
             for _ in range(t):
-                lineno, line = lineno+1, next(f)
+                lineno, line = lineno + 1, next(f)
                 mo = re.match(r"\s+{(\d+), arcs_(\d+)_(\d+)},$", line)
                 assert mo, (lineno, line)
                 k, n, m = list(map(int, mo.groups()))
@@ -158,9 +156,9 @@ class Converter(grammar.Grammar):
                 assert k == len(arcs), (lineno, line)
                 state.append(arcs)
             states.append(state)
-            lineno, line = lineno+1, next(f)
+            lineno, line = lineno + 1, next(f)
             assert line == "};\n", (lineno, line)
-            lineno, line = lineno+1, next(f)
+            lineno, line = lineno + 1, next(f)
         self.states = states
 
         # Parse the dfas
@@ -169,9 +167,8 @@ class Converter(grammar.Grammar):
         assert mo, (lineno, line)
         ndfas = int(mo.group(1))
         for i in range(ndfas):
-            lineno, line = lineno+1, next(f)
-            mo = re.match(r'\s+{(\d+), "(\w+)", (\d+), (\d+), states_(\d+),$',
-                          line)
+            lineno, line = lineno + 1, next(f)
+            mo = re.match(r'\s+{(\d+), "(\w+)", (\d+), (\d+), states_(\d+),$', line)
             assert mo, (lineno, line)
             symbol = mo.group(2)
             number, x, y, z = list(map(int, mo.group(1, 3, 4, 5)))
@@ -180,7 +177,7 @@ class Converter(grammar.Grammar):
             assert x == 0, (lineno, line)
             state = states[z]
             assert y == len(state), (lineno, line)
-            lineno, line = lineno+1, next(f)
+            lineno, line = lineno + 1, next(f)
             mo = re.match(r'\s+("(?:\\\d\d\d)*")},$', line)
             assert mo, (lineno, line)
             first = {}
@@ -188,21 +185,21 @@ class Converter(grammar.Grammar):
             for i, c in enumerate(rawbitset):
                 byte = ord(c)
                 for j in range(8):
-                    if byte & (1<<j):
-                        first[i*8 + j] = 1
+                    if byte & (1 << j):
+                        first[i * 8 + j] = 1
             dfas[number] = (state, first)
-        lineno, line = lineno+1, next(f)
+        lineno, line = lineno + 1, next(f)
         assert line == "};\n", (lineno, line)
         self.dfas = dfas
 
         # Parse the labels
         labels = []
-        lineno, line = lineno+1, next(f)
+        lineno, line = lineno + 1, next(f)
         mo = re.match(r"static label labels\[(\d+)\] = {$", line)
         assert mo, (lineno, line)
         nlabels = int(mo.group(1))
         for i in range(nlabels):
-            lineno, line = lineno+1, next(f)
+            lineno, line = lineno + 1, next(f)
             mo = re.match(r'\s+{(\d+), (0|"\w+")},$', line)
             assert mo, (lineno, line)
             x, y = mo.groups()
@@ -212,35 +209,35 @@ class Converter(grammar.Grammar):
             else:
                 y = eval(y)
             labels.append((x, y))
-        lineno, line = lineno+1, next(f)
+        lineno, line = lineno + 1, next(f)
         assert line == "};\n", (lineno, line)
         self.labels = labels
 
         # Parse the grammar struct
-        lineno, line = lineno+1, next(f)
+        lineno, line = lineno + 1, next(f)
         assert line == "grammar _PyParser_Grammar = {\n", (lineno, line)
-        lineno, line = lineno+1, next(f)
+        lineno, line = lineno + 1, next(f)
         mo = re.match(r"\s+(\d+),$", line)
         assert mo, (lineno, line)
         ndfas = int(mo.group(1))
         assert ndfas == len(self.dfas)
-        lineno, line = lineno+1, next(f)
+        lineno, line = lineno + 1, next(f)
         assert line == "\tdfas,\n", (lineno, line)
-        lineno, line = lineno+1, next(f)
+        lineno, line = lineno + 1, next(f)
         mo = re.match(r"\s+{(\d+), labels},$", line)
         assert mo, (lineno, line)
         nlabels = int(mo.group(1))
         assert nlabels == len(self.labels), (lineno, line)
-        lineno, line = lineno+1, next(f)
+        lineno, line = lineno + 1, next(f)
         mo = re.match(r"\s+(\d+)$", line)
         assert mo, (lineno, line)
         start = int(mo.group(1))
         assert start in self.number2symbol, (lineno, line)
         self.start = start
-        lineno, line = lineno+1, next(f)
+        lineno, line = lineno + 1, next(f)
         assert line == "};\n", (lineno, line)
         try:
-            lineno, line = lineno+1, next(f)
+            lineno, line = lineno + 1, next(f)
         except StopIteration:
             pass
         else:
@@ -248,8 +245,8 @@ class Converter(grammar.Grammar):
 
     def finish_off(self):
         """Create additional useful structures.  (Internal)."""
-        self.keywords = {} # map from keyword strings to arc labels
-        self.tokens = {}   # map from numeric token values to arc labels
+        self.keywords = {}  # map from keyword strings to arc labels
+        self.tokens = {}  # map from numeric token values to arc labels
         for ilabel, (type, value) in enumerate(self.labels):
             if type == token.NAME and value is not None:
                 self.keywords[value] = ilabel
index 6452c57a1fa4b4a9e4c35541205c4919d6e48ddc..6ba6b929b4b039993cbfa69fb7999226c91312e8 100644 (file)
@@ -28,13 +28,7 @@ from . import grammar, parse, token, tokenize, pgen
 
 
 class Driver(object):
-
-    def __init__(
-        self,
-        grammar,
-        convert=None,
-        logger=None,
-    ):
+    def __init__(self, grammar, convert=None, logger=None):
         self.grammar = grammar
         if logger is None:
             logger = logging.getLogger(__name__)
@@ -73,8 +67,9 @@ class Driver(object):
             if type == token.OP:
                 type = grammar.opmap[value]
             if debug:
-                self.logger.debug("%s %r (prefix=%r)",
-                                  token.tok_name[type], value, prefix)
+                self.logger.debug(
+                    "%s %r (prefix=%r)", token.tok_name[type], value, prefix
+                )
             if type == token.INDENT:
                 indent_columns.append(len(value))
                 _prefix = prefix + value
@@ -96,8 +91,7 @@ class Driver(object):
                 column = 0
         else:
             # We never broke out -- EOF is too soon (how can this happen???)
-            raise parse.ParseError("incomplete input",
-                                   type, value, (prefix, start))
+            raise parse.ParseError("incomplete input", type, value, (prefix, start))
         return p.rootnode
 
     def parse_stream_raw(self, stream, debug=False):
@@ -117,8 +111,7 @@ class Driver(object):
     def parse_string(self, text, debug=False):
         """Parse a string and return the syntax tree."""
         tokens = tokenize.generate_tokens(
-            io.StringIO(text).readline,
-            grammar=self.grammar
+            io.StringIO(text).readline, grammar=self.grammar
         )
         return self.parse_tokens(tokens, debug)
 
@@ -130,24 +123,24 @@ class Driver(object):
         for char in prefix:
             current_line += char
             if wait_for_nl:
-                if char == '\n':
+                if char == "\n":
                     if current_line.strip() and current_column < column:
-                        res = ''.join(lines)
-                        return res, prefix[len(res):]
+                        res = "".join(lines)
+                        return res, prefix[len(res) :]
 
                     lines.append(current_line)
                     current_line = ""
                     current_column = 0
                     wait_for_nl = False
-            elif char in ' \t':
+            elif char in " \t":
                 current_column += 1
-            elif char == '\n':
+            elif char == "\n":
                 # unexpected empty line
                 current_column = 0
             else:
                 # indent is finished
                 wait_for_nl = True
-        return ''.join(lines), current_line
+        return "".join(lines), current_line
 
 
 def _generate_pickle_name(gt, cache_dir=None):
@@ -161,8 +154,7 @@ def _generate_pickle_name(gt, cache_dir=None):
         return name
 
 
-def load_grammar(gt="Grammar.txt", gp=None,
-                 save=True, force=False, logger=None):
+def load_grammar(gt="Grammar.txt", gp=None, save=True, force=False, logger=None):
     """Load the grammar (maybe from a pickle)."""
     if logger is None:
         logger = logging.getLogger(__name__)
@@ -219,11 +211,11 @@ def main(*args):
     """
     if not args:
         args = sys.argv[1:]
-    logging.basicConfig(level=logging.INFO, stream=sys.stdout,
-                        format='%(message)s')
+    logging.basicConfig(level=logging.INFO, stream=sys.stdout, format="%(message)s")
     for gt in args:
         load_grammar(gt, save=True, force=True)
     return True
 
+
 if __name__ == "__main__":
     sys.exit(int(not main()))
index aa025cfd2543de34fe3eea6ec51be0a6138c9c9a..d6f0fc2cbe6b7a65344b0612c3b68f66dfff9af1 100644 (file)
@@ -90,7 +90,9 @@ class Grammar(object):
 
     def dump(self, filename):
         """Dump the grammar tables to a pickle file."""
-        with tempfile.NamedTemporaryFile(dir=os.path.dirname(filename), delete=False) as f:
+        with tempfile.NamedTemporaryFile(
+            dir=os.path.dirname(filename), delete=False
+        ) as f:
             pickle.dump(self.__dict__, f, pickle.HIGHEST_PROTOCOL)
         os.replace(f.name, filename)
 
@@ -109,8 +111,14 @@ class Grammar(object):
         Copy the grammar.
         """
         new = self.__class__()
-        for dict_attr in ("symbol2number", "number2symbol", "dfas", "keywords",
-                          "tokens", "symbol2label"):
+        for dict_attr in (
+            "symbol2number",
+            "number2symbol",
+            "dfas",
+            "keywords",
+            "tokens",
+            "symbol2label",
+        ):
             setattr(new, dict_attr, getattr(self, dict_attr).copy())
         new.labels = self.labels[:]
         new.states = self.states[:]
@@ -121,6 +129,7 @@ class Grammar(object):
     def report(self):
         """Dump the grammar tables to standard output, for debugging."""
         from pprint import pprint
+
         print("s2n")
         pprint(self.symbol2number)
         print("n2s")
index baa17e12a9c42b37d421638da2a59b2bf050f075..93bee528a14c22b568fe49dfd331eb3759873d31 100644 (file)
@@ -5,16 +5,19 @@
 
 import regex as re
 
-simple_escapes = {"a": "\a",
-                  "b": "\b",
-                  "f": "\f",
-                  "n": "\n",
-                  "r": "\r",
-                  "t": "\t",
-                  "v": "\v",
-                  "'": "'",
-                  '"': '"',
-                  "\\": "\\"}
+simple_escapes = {
+    "a": "\a",
+    "b": "\b",
+    "f": "\f",
+    "n": "\n",
+    "r": "\r",
+    "t": "\t",
+    "v": "\v",
+    "'": "'",
+    '"': '"',
+    "\\": "\\",
+}
+
 
 def escape(m):
     all, tail = m.group(0, 1)
@@ -37,16 +40,18 @@ def escape(m):
             raise ValueError("invalid octal string escape ('\\%s')" % tail) from None
     return chr(i)
 
+
 def evalString(s):
     assert s.startswith("'") or s.startswith('"'), repr(s[:1])
     q = s[0]
-    if s[:3] == q*3:
-        q = q*3
-    assert s.endswith(q), repr(s[-len(q):])
-    assert len(s) >= 2*len(q)
-    s = s[len(q):-len(q)]
+    if s[:3] == q * 3:
+        q = q * 3
+    assert s.endswith(q), repr(s[-len(q) :])
+    assert len(s) >= 2 * len(q)
+    s = s[len(q) : -len(q)]
     return re.sub(r"\\(\'|\"|\\|[abfnrtv]|x.{0,2}|[0-7]{1,3})", escape, s)
 
+
 def test():
     for i in range(256):
         c = chr(i)
index 6bebdbba7e52d6bb10c39104cef4ba21922b436e..22f14c89d6cc788932cfc798f3ef70f74b6e1567 100644 (file)
@@ -13,17 +13,20 @@ how this parsing engine works.
 # Local imports
 from . import token
 
+
 class ParseError(Exception):
     """Exception to signal the parser is stuck."""
 
     def __init__(self, msg, type, value, context):
-        Exception.__init__(self, "%s: type=%r, value=%r, context=%r" %
-                           (msg, type, value, context))
+        Exception.__init__(
+            self, "%s: type=%r, value=%r, context=%r" % (msg, type, value, context)
+        )
         self.msg = msg
         self.type = type
         self.value = value
         self.context = context
 
+
 class Parser(object):
     """Parser engine.
 
@@ -108,7 +111,7 @@ class Parser(object):
         stackentry = (self.grammar.dfas[start], 0, newnode)
         self.stack = [stackentry]
         self.rootnode = None
-        self.used_names = set() # Aliased to self.rootnode.used_names in pop()
+        self.used_names = set()  # Aliased to self.rootnode.used_names in pop()
 
     def addtoken(self, type, value, context):
         """Add a token; return True iff this is the end of the program."""
@@ -145,15 +148,14 @@ class Parser(object):
                     if ilabel in itsfirst:
                         # Push a symbol
                         self.push(t, self.grammar.dfas[t], newstate, context)
-                        break # To continue the outer while loop
+                        break  # To continue the outer while loop
             else:
                 if (0, state) in arcs:
                     # An accepting state, pop it and try something else
                     self.pop()
                     if not self.stack:
                         # Done parsing, but another token is input
-                        raise ParseError("too much input",
-                                         type, value, context)
+                        raise ParseError("too much input", type, value, context)
                 else:
                     # No success finding a transition
                     raise ParseError("bad input", type, value, context)
index b0cbd16c4dad4fbf8b7afc8c776786dc7ad997ec..1da6925e5466d5022679fbfdac15965e7ee967fc 100644 (file)
@@ -4,11 +4,12 @@
 # Pgen imports
 from . import grammar, token, tokenize
 
+
 class PgenGrammar(grammar.Grammar):
     pass
 
-class ParserGenerator(object):
 
+class ParserGenerator(object):
     def __init__(self, filename, stream=None):
         close_stream = None
         if stream is None:
@@ -17,11 +18,11 @@ class ParserGenerator(object):
         self.filename = filename
         self.stream = stream
         self.generator = tokenize.generate_tokens(stream.readline)
-        self.gettoken() # Initialize lookahead
+        self.gettoken()  # Initialize lookahead
         self.dfas, self.startsymbol = self.parse()
         if close_stream is not None:
             close_stream()
-        self.first = {} # map from symbol name to set of tokens
+        self.first = {}  # map from symbol name to set of tokens
         self.addfirstsets()
 
     def make_grammar(self):
@@ -96,7 +97,7 @@ class ParserGenerator(object):
                     return ilabel
             else:
                 # An operator (any non-numeric token)
-                itoken = grammar.opmap[value] # Fails if unknown token
+                itoken = grammar.opmap[value]  # Fails if unknown token
                 if itoken in c.tokens:
                     return c.tokens[itoken]
                 else:
@@ -110,11 +111,11 @@ class ParserGenerator(object):
         for name in names:
             if name not in self.first:
                 self.calcfirst(name)
-            #print name, self.first[name].keys()
+            # print name, self.first[name].keys()
 
     def calcfirst(self, name):
         dfa = self.dfas[name]
-        self.first[name] = None # dummy to detect left recursion
+        self.first[name] = None  # dummy to detect left recursion
         state = dfa[0]
         totalset = {}
         overlapcheck = {}
@@ -136,9 +137,11 @@ class ParserGenerator(object):
         for label, itsfirst in overlapcheck.items():
             for symbol in itsfirst:
                 if symbol in inverse:
-                    raise ValueError("rule %s is ambiguous; %s is in the"
-                                     " first sets of %s as well as %s" %
-                                     (name, symbol, label, inverse[symbol]))
+                    raise ValueError(
+                        "rule %s is ambiguous; %s is in the"
+                        " first sets of %s as well as %s"
+                        % (name, symbol, label, inverse[symbol])
+                    )
                 inverse[symbol] = label
         self.first[name] = totalset
 
@@ -154,14 +157,14 @@ class ParserGenerator(object):
             self.expect(token.OP, ":")
             a, z = self.parse_rhs()
             self.expect(token.NEWLINE)
-            #self.dump_nfa(name, a, z)
+            # self.dump_nfa(name, a, z)
             dfa = self.make_dfa(a, z)
-            #self.dump_dfa(name, dfa)
+            # self.dump_dfa(name, dfa)
             oldlen = len(dfa)
             self.simplify_dfa(dfa)
             newlen = len(dfa)
             dfas[name] = dfa
-            #print name, oldlen, newlen
+            # print name, oldlen, newlen
             if startsymbol is None:
                 startsymbol = name
         return dfas, startsymbol
@@ -173,10 +176,12 @@ class ParserGenerator(object):
         # values.
         assert isinstance(start, NFAState)
         assert isinstance(finish, NFAState)
+
         def closure(state):
             base = {}
             addclosure(state, base)
             return base
+
         def addclosure(state, base):
             assert isinstance(state, NFAState)
             if state in base:
@@ -185,8 +190,9 @@ class ParserGenerator(object):
             for label, next in state.arcs:
                 if label is None:
                     addclosure(next, base)
+
         states = [DFAState(closure(start), finish)]
-        for state in states: # NB states grows while we're iterating
+        for state in states:  # NB states grows while we're iterating
             arcs = {}
             for nfastate in state.nfaset:
                 for label, next in nfastate.arcs:
@@ -200,7 +206,7 @@ class ParserGenerator(object):
                     st = DFAState(nfaset, finish)
                     states.append(st)
                 state.addarc(st, label)
-        return states # List of DFAState instances; first one is start
+        return states  # List of DFAState instances; first one is start
 
     def dump_nfa(self, name, start, finish):
         print("Dump of NFA for", name)
@@ -236,10 +242,10 @@ class ParserGenerator(object):
         while changes:
             changes = False
             for i, state_i in enumerate(dfa):
-                for j in range(i+1, len(dfa)):
+                for j in range(i + 1, len(dfa)):
                     state_j = dfa[j]
                     if state_i == state_j:
-                        #print "  unify", i, j
+                        # print "  unify", i, j
                         del dfa[j]
                         for state in dfa:
                             state.unifystate(state_j, state_i)
@@ -266,8 +272,7 @@ class ParserGenerator(object):
     def parse_alt(self):
         # ALT: ITEM+
         a, b = self.parse_item()
-        while (self.value in ("(", "[") or
-               self.type in (token.NAME, token.STRING)):
+        while self.value in ("(", "[") or self.type in (token.NAME, token.STRING):
             c, d = self.parse_item()
             b.addarc(c)
             b = d
@@ -307,13 +312,15 @@ class ParserGenerator(object):
             self.gettoken()
             return a, z
         else:
-            self.raise_error("expected (...) or NAME or STRING, got %s/%s",
-                             self.type, self.value)
+            self.raise_error(
+                "expected (...) or NAME or STRING, got %s/%s", self.type, self.value
+            )
 
     def expect(self, type, value=None):
         if self.type != type or (value is not None and self.value != value):
-            self.raise_error("expected %s/%s, got %s/%s",
-                             type, value, self.type, self.value)
+            self.raise_error(
+                "expected %s/%s, got %s/%s", type, value, self.type, self.value
+            )
         value = self.value
         self.gettoken()
         return value
@@ -323,7 +330,7 @@ class ParserGenerator(object):
         while tup[0] in (tokenize.COMMENT, tokenize.NL):
             tup = next(self.generator)
         self.type, self.value, self.begin, self.end, self.line = tup
-        #print token.tok_name[self.type], repr(self.value)
+        # print token.tok_name[self.type], repr(self.value)
 
     def raise_error(self, msg, *args):
         if args:
@@ -331,28 +338,27 @@ class ParserGenerator(object):
                 msg = msg % args
             except:
                 msg = " ".join([msg] + list(map(str, args)))
-        raise SyntaxError(msg, (self.filename, self.end[0],
-                                self.end[1], self.line))
+        raise SyntaxError(msg, (self.filename, self.end[0], self.end[1], self.line))
 
-class NFAState(object):
 
+class NFAState(object):
     def __init__(self):
-        self.arcs = [] # list of (label, NFAState) pairs
+        self.arcs = []  # list of (label, NFAState) pairs
 
     def addarc(self, next, label=None):
         assert label is None or isinstance(label, str)
         assert isinstance(next, NFAState)
         self.arcs.append((label, next))
 
-class DFAState(object):
 
+class DFAState(object):
     def __init__(self, nfaset, final):
         assert isinstance(nfaset, dict)
         assert isinstance(next(iter(nfaset)), NFAState)
         assert isinstance(final, NFAState)
         self.nfaset = nfaset
         self.isfinal = final in nfaset
-        self.arcs = {} # map from label to DFAState
+        self.arcs = {}  # map from label to DFAState
 
     def addarc(self, next, label):
         assert isinstance(label, str)
@@ -379,7 +385,8 @@ class DFAState(object):
                 return False
         return True
 
-    __hash__ = None # For Py3 compatibility.
+    __hash__ = None  # For Py3 compatibility.
+
 
 def generate_grammar(filename="Grammar.txt"):
     p = ParserGenerator(filename)
index 40aa89d3a100182d5254dc928b98ea5bb1d6fa23..583a8a15c8ae38a2370a119a8c8589e580e05b31 100644 (file)
@@ -3,7 +3,7 @@
 #  Taken from Python (r53757) and modified to include some tokens
 #   originally monkeypatched in by pgen2.tokenize
 
-#--start constants--
+# --start constants--
 ENDMARKER = 0
 NAME = 1
 NUMBER = 2
@@ -66,7 +66,7 @@ ERRORTOKEN = 58
 COLONEQUAL = 59
 N_TOKENS = 60
 NT_OFFSET = 256
-#--end constants--
+# --end constants--
 
 tok_name = {}
 for _name, _value in list(globals().items()):
@@ -77,8 +77,10 @@ for _name, _value in list(globals().items()):
 def ISTERMINAL(x):
     return x < NT_OFFSET
 
+
 def ISNONTERMINAL(x):
     return x >= NT_OFFSET
 
+
 def ISEOF(x):
     return x == ENDMARKER
index 977548912c00f584ef135036d7185b67f289f47d..8c1c4cfac680a2f4815d880c989d6e3b3d115bb0 100644 (file)
@@ -25,17 +25,20 @@ are the same, except instead of generating tokens, tokeneater is a callback
 function to which the 5 fields described above are passed as 5 arguments,
 each time a new token is found."""
 
-__author__ = 'Ka-Ping Yee <ping@lfw.org>'
-__credits__ = \
-    'GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, Skip Montanaro'
+__author__ = "Ka-Ping Yee <ping@lfw.org>"
+__credits__ = "GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, Skip Montanaro"
 
 import regex as re
 from codecs import BOM_UTF8, lookup
 from blib2to3.pgen2.token import *
 
 from . import token
-__all__ = [x for x in dir(token) if x[0] != '_'] + ["tokenize",
-           "generate_tokens", "untokenize"]
+
+__all__ = [x for x in dir(token) if x[0] != "_"] + [
+    "tokenize",
+    "generate_tokens",
+    "untokenize",
+]
 del token
 
 try:
@@ -45,29 +48,40 @@ except NameError:
     # valid Python 3 code.
     bytes = str
 
-def group(*choices): return '(' + '|'.join(choices) + ')'
-def any(*choices): return group(*choices) + '*'
-def maybe(*choices): return group(*choices) + '?'
+
+def group(*choices):
+    return "(" + "|".join(choices) + ")"
+
+
+def any(*choices):
+    return group(*choices) + "*"
+
+
+def maybe(*choices):
+    return group(*choices) + "?"
+
+
 def _combinations(*l):
-    return set(
-        x + y for x in l for y in l + ("",) if x.casefold() != y.casefold()
-    )
+    return set(x + y for x in l for y in l + ("",) if x.casefold() != y.casefold())
 
-Whitespace = r'[ \f\t]*'
-Comment = r'#[^\r\n]*'
-Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment)
-Name = r'\w+'  # this is invalid but it's fine because Name comes after Number in all groups
 
-Binnumber = r'0[bB]_?[01]+(?:_[01]+)*'
-Hexnumber = r'0[xX]_?[\da-fA-F]+(?:_[\da-fA-F]+)*[lL]?'
-Octnumber = r'0[oO]?_?[0-7]+(?:_[0-7]+)*[lL]?'
-Decnumber = group(r'[1-9]\d*(?:_\d+)*[lL]?', '0[lL]?')
+Whitespace = r"[ \f\t]*"
+Comment = r"#[^\r\n]*"
+Ignore = Whitespace + any(r"\\\r?\n" + Whitespace) + maybe(Comment)
+Name = r"\w+"  # this is invalid but it's fine because Name comes after Number in all groups
+
+Binnumber = r"0[bB]_?[01]+(?:_[01]+)*"
+Hexnumber = r"0[xX]_?[\da-fA-F]+(?:_[\da-fA-F]+)*[lL]?"
+Octnumber = r"0[oO]?_?[0-7]+(?:_[0-7]+)*[lL]?"
+Decnumber = group(r"[1-9]\d*(?:_\d+)*[lL]?", "0[lL]?")
 Intnumber = group(Binnumber, Hexnumber, Octnumber, Decnumber)
-Exponent = r'[eE][-+]?\d+(?:_\d+)*'
-Pointfloat = group(r'\d+(?:_\d+)*\.(?:\d+(?:_\d+)*)?', r'\.\d+(?:_\d+)*') + maybe(Exponent)
-Expfloat = r'\d+(?:_\d+)*' + Exponent
+Exponent = r"[eE][-+]?\d+(?:_\d+)*"
+Pointfloat = group(r"\d+(?:_\d+)*\.(?:\d+(?:_\d+)*)?", r"\.\d+(?:_\d+)*") + maybe(
+    Exponent
+)
+Expfloat = r"\d+(?:_\d+)*" + Exponent
 Floatnumber = group(Pointfloat, Expfloat)
-Imagnumber = group(r'\d+(?:_\d+)*[jJ]', Floatnumber + r'[jJ]')
+Imagnumber = group(r"\d+(?:_\d+)*[jJ]", Floatnumber + r"[jJ]")
 Number = group(Imagnumber, Floatnumber, Intnumber)
 
 # Tail end of ' string.
@@ -81,30 +95,39 @@ Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""'
 _litprefix = r"(?:[uUrRbBfF]|[rR][fFbB]|[fFbBuU][rR])?"
 Triple = group(_litprefix + "'''", _litprefix + '"""')
 # Single-line ' or " string.
-String = group(_litprefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*'",
-               _litprefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*"')
+String = group(
+    _litprefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*'",
+    _litprefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*"',
+)
 
 # Because of leftmost-then-longest match semantics, be sure to put the
 # longest operators first (e.g., if = came before ==, == would get
 # recognized as two instances of =).
-Operator = group(r"\*\*=?", r">>=?", r"<<=?", r"<>", r"!=",
-                 r"//=?", r"->",
-                 r"[+\-*/%&@|^=<>:]=?",
-                 r"~")
+Operator = group(
+    r"\*\*=?",
+    r">>=?",
+    r"<<=?",
+    r"<>",
+    r"!=",
+    r"//=?",
+    r"->",
+    r"[+\-*/%&@|^=<>:]=?",
+    r"~",
+)
 
-Bracket = '[][(){}]'
-Special = group(r'\r?\n', r'[:;.,`@]')
+Bracket = "[][(){}]"
+Special = group(r"\r?\n", r"[:;.,`@]")
 Funny = group(Operator, Bracket, Special)
 
 PlainToken = group(Number, Funny, String, Name)
 Token = Ignore + PlainToken
 
 # First (or only) line of ' or " string.
-ContStr = group(_litprefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*" +
-                group("'", r'\\\r?\n'),
-                _litprefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*' +
-                group('"', r'\\\r?\n'))
-PseudoExtras = group(r'\\\r?\n', Comment, Triple)
+ContStr = group(
+    _litprefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*" + group("'", r"\\\r?\n"),
+    _litprefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*' + group('"', r"\\\r?\n"),
+)
+PseudoExtras = group(r"\\\r?\n", Comment, Triple)
 PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name)
 
 tokenprog = re.compile(Token, re.UNICODE)
@@ -113,39 +136,50 @@ single3prog = re.compile(Single3)
 double3prog = re.compile(Double3)
 
 _strprefixes = (
-    _combinations('r', 'R', 'f', 'F') |
-    _combinations('r', 'R', 'b', 'B') |
-    {'u', 'U', 'ur', 'uR', 'Ur', 'UR'}
+    _combinations("r", "R", "f", "F")
+    | _combinations("r", "R", "b", "B")
+    | {"u", "U", "ur", "uR", "Ur", "UR"}
 )
 
-endprogs = {"'": re.compile(Single), '"': re.compile(Double),
-            "'''": single3prog, '"""': double3prog,
-            **{f"{prefix}'''": single3prog for prefix in _strprefixes},
-            **{f'{prefix}"""': double3prog for prefix in _strprefixes},
-            **{prefix: None for prefix in _strprefixes}}
+endprogs = {
+    "'": re.compile(Single),
+    '"': re.compile(Double),
+    "'''": single3prog,
+    '"""': double3prog,
+    **{f"{prefix}'''": single3prog for prefix in _strprefixes},
+    **{f'{prefix}"""': double3prog for prefix in _strprefixes},
+    **{prefix: None for prefix in _strprefixes},
+}
 
 triple_quoted = (
-    {"'''", '"""'} |
-    {f"{prefix}'''" for prefix in _strprefixes} |
-    {f'{prefix}"""' for prefix in _strprefixes}
+    {"'''", '"""'}
+    | {f"{prefix}'''" for prefix in _strprefixes}
+    {f'{prefix}"""' for prefix in _strprefixes}
 )
 single_quoted = (
-    {"'", '"'} |
-    {f"{prefix}'" for prefix in _strprefixes} |
-    {f'{prefix}"' for prefix in _strprefixes}
+    {"'", '"'}
+    | {f"{prefix}'" for prefix in _strprefixes}
+    {f'{prefix}"' for prefix in _strprefixes}
 )
 
 tabsize = 8
 
-class TokenError(Exception): pass
 
-class StopTokenizing(Exception): pass
+class TokenError(Exception):
+    pass
+
+
+class StopTokenizing(Exception):
+    pass
+
 
-def printtoken(type, token, xxx_todo_changeme, xxx_todo_changeme1, line): # for testing
+def printtoken(type, token, xxx_todo_changeme, xxx_todo_changeme1, line):  # for testing
     (srow, scol) = xxx_todo_changeme
     (erow, ecol) = xxx_todo_changeme1
-    print("%d,%d-%d,%d:\t%s\t%s" % \
-        (srow, scol, erow, ecol, tok_name[type], repr(token)))
+    print(
+        "%d,%d-%d,%d:\t%s\t%s" % (srow, scol, erow, ecol, tok_name[type], repr(token))
+    )
+
 
 def tokenize(readline, tokeneater=printtoken):
     """
@@ -165,13 +199,14 @@ def tokenize(readline, tokeneater=printtoken):
     except StopTokenizing:
         pass
 
+
 # backwards compatible interface
 def tokenize_loop(readline, tokeneater):
     for token_info in generate_tokens(readline):
         tokeneater(*token_info)
 
-class Untokenizer:
 
+class Untokenizer:
     def __init__(self):
         self.tokens = []
         self.prev_row = 1
@@ -204,14 +239,14 @@ class Untokenizer:
         toks_append = self.tokens.append
         toknum, tokval = token
         if toknum in (NAME, NUMBER):
-            tokval += ' '
+            tokval += " "
         if toknum in (NEWLINE, NL):
             startline = True
         for tok in iterable:
             toknum, tokval = tok[:2]
 
             if toknum in (NAME, NUMBER, ASYNC, AWAIT):
-                tokval += ' '
+                tokval += " "
 
             if toknum == INDENT:
                 indents.append(tokval)
@@ -226,8 +261,10 @@ class Untokenizer:
                 startline = False
             toks_append(tokval)
 
-cookie_re = re.compile(r'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)', re.ASCII)
-blank_re = re.compile(br'^[ \t\f]*(?:[#\r\n]|$)', re.ASCII)
+
+cookie_re = re.compile(r"^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)", re.ASCII)
+blank_re = re.compile(br"^[ \t\f]*(?:[#\r\n]|$)", re.ASCII)
+
 
 def _get_normal_name(orig_enc):
     """Imitates get_normal_name in tokenizer.c."""
@@ -235,11 +272,13 @@ def _get_normal_name(orig_enc):
     enc = orig_enc[:12].lower().replace("_", "-")
     if enc == "utf-8" or enc.startswith("utf-8-"):
         return "utf-8"
-    if enc in ("latin-1", "iso-8859-1", "iso-latin-1") or \
-       enc.startswith(("latin-1-", "iso-8859-1-", "iso-latin-1-")):
+    if enc in ("latin-1", "iso-8859-1", "iso-latin-1") or enc.startswith(
+        ("latin-1-", "iso-8859-1-", "iso-latin-1-")
+    ):
         return "iso-8859-1"
     return orig_enc
 
+
 def detect_encoding(readline):
     """
     The detect_encoding() function is used to detect the encoding that should
@@ -260,7 +299,8 @@ def detect_encoding(readline):
     """
     bom_found = False
     encoding = None
-    default = 'utf-8'
+    default = "utf-8"
+
     def read_or_stop():
         try:
             return readline()
@@ -269,7 +309,7 @@ def detect_encoding(readline):
 
     def find_cookie(line):
         try:
-            line_string = line.decode('ascii')
+            line_string = line.decode("ascii")
         except UnicodeDecodeError:
             return None
         match = cookie_re.match(line_string)
@@ -283,17 +323,17 @@ def detect_encoding(readline):
             raise SyntaxError("unknown encoding: " + encoding)
 
         if bom_found:
-            if codec.name != 'utf-8':
+            if codec.name != "utf-8":
                 # This behaviour mimics the Python interpreter
-                raise SyntaxError('encoding problem: utf-8')
-            encoding += '-sig'
+                raise SyntaxError("encoding problem: utf-8")
+            encoding += "-sig"
         return encoding
 
     first = read_or_stop()
     if first.startswith(BOM_UTF8):
         bom_found = True
         first = first[3:]
-        default = 'utf-8-sig'
+        default = "utf-8-sig"
     if not first:
         return default, []
 
@@ -313,6 +353,7 @@ def detect_encoding(readline):
 
     return default, [first, second]
 
+
 def untokenize(iterable):
     """Transform tokens back into Python source code.
 
@@ -334,6 +375,7 @@ def untokenize(iterable):
     ut = Untokenizer()
     return ut.untokenize(iterable)
 
+
 def generate_tokens(readline, grammar=None):
     """
     The generate_tokens() generator requires one argument, readline, which
@@ -351,8 +393,8 @@ def generate_tokens(readline, grammar=None):
     logical line; continuation lines are included.
     """
     lnum = parenlev = continued = 0
-    numchars = '0123456789'
-    contstr, needcont = '', 0
+    numchars = "0123456789"
+    contstr, needcont = "", 0
     contline = None
     indents = [0]
 
@@ -365,28 +407,38 @@ def generate_tokens(readline, grammar=None):
     async_def_indent = 0
     async_def_nl = False
 
-    while 1:                                   # loop over lines in stream
+    while 1:  # loop over lines in stream
         try:
             line = readline()
         except StopIteration:
-            line = ''
+            line = ""
         lnum = lnum + 1
         pos, max = 0, len(line)
 
-        if contstr:                            # continued string
+        if contstr:  # continued string
             if not line:
                 raise TokenError("EOF in multi-line string", strstart)
             endmatch = endprog.match(line)
             if endmatch:
                 pos = end = endmatch.end(0)
-                yield (STRING, contstr + line[:end],
-                       strstart, (lnum, end), contline + line)
-                contstr, needcont = '', 0
+                yield (
+                    STRING,
+                    contstr + line[:end],
+                    strstart,
+                    (lnum, end),
+                    contline + line,
+                )
+                contstr, needcont = "", 0
                 contline = None
-            elif needcont and line[-2:] != '\\\n' and line[-3:] != '\\\r\n':
-                yield (ERRORTOKEN, contstr + line,
-                           strstart, (lnum, len(line)), contline)
-                contstr = ''
+            elif needcont and line[-2:] != "\\\n" and line[-3:] != "\\\r\n":
+                yield (
+                    ERRORTOKEN,
+                    contstr + line,
+                    strstart,
+                    (lnum, len(line)),
+                    contline,
+                )
+                contstr = ""
                 contline = None
                 continue
             else:
@@ -395,42 +447,53 @@ def generate_tokens(readline, grammar=None):
                 continue
 
         elif parenlev == 0 and not continued:  # new statement
-            if not line: break
+            if not line:
+                break
             column = 0
-            while pos < max:                   # measure leading whitespace
-                if line[pos] == ' ': column = column + 1
-                elif line[pos] == '\t': column = (column//tabsize + 1)*tabsize
-                elif line[pos] == '\f': column = 0
-                else: break
+            while pos < max:  # measure leading whitespace
+                if line[pos] == " ":
+                    column = column + 1
+                elif line[pos] == "\t":
+                    column = (column // tabsize + 1) * tabsize
+                elif line[pos] == "\f":
+                    column = 0
+                else:
+                    break
                 pos = pos + 1
-            if pos == max: break
+            if pos == max:
+                break
 
             if stashed:
                 yield stashed
                 stashed = None
 
-            if line[pos] in '\r\n':            # skip blank lines
+            if line[pos] in "\r\n":  # skip blank lines
                 yield (NL, line[pos:], (lnum, pos), (lnum, len(line)), line)
                 continue
 
-            if line[pos] == '#':               # skip comments
-                comment_token = line[pos:].rstrip('\r\n')
+            if line[pos] == "#":  # skip comments
+                comment_token = line[pos:].rstrip("\r\n")
                 nl_pos = pos + len(comment_token)
-                yield (COMMENT, comment_token,
-                        (lnum, pos), (lnum, pos + len(comment_token)), line)
-                yield (NL, line[nl_pos:],
-                        (lnum, nl_pos), (lnum, len(line)), line)
+                yield (
+                    COMMENT,
+                    comment_token,
+                    (lnum, pos),
+                    (lnum, pos + len(comment_token)),
+                    line,
+                )
+                yield (NL, line[nl_pos:], (lnum, nl_pos), (lnum, len(line)), line)
                 continue
 
-            if column > indents[-1]:           # count indents
+            if column > indents[-1]:  # count indents
                 indents.append(column)
                 yield (INDENT, line[:pos], (lnum, 0), (lnum, pos), line)
 
-            while column < indents[-1]:        # count dedents
+            while column < indents[-1]:  # count dedents
                 if column not in indents:
                     raise IndentationError(
                         "unindent does not match any outer indentation level",
-                        ("<tokenize>", lnum, pos, line))
+                        ("<tokenize>", lnum, pos, line),
+                    )
                 indents = indents[:-1]
 
                 if async_def and async_def_indent >= indents[-1]:
@@ -438,29 +501,30 @@ def generate_tokens(readline, grammar=None):
                     async_def_nl = False
                     async_def_indent = 0
 
-                yield (DEDENT, '', (lnum, pos), (lnum, pos), line)
+                yield (DEDENT, "", (lnum, pos), (lnum, pos), line)
 
             if async_def and async_def_nl and async_def_indent >= indents[-1]:
                 async_def = False
                 async_def_nl = False
                 async_def_indent = 0
 
-        else:                                  # continued statement
+        else:  # continued statement
             if not line:
                 raise TokenError("EOF in multi-line statement", (lnum, 0))
             continued = 0
 
         while pos < max:
             pseudomatch = pseudoprog.match(line, pos)
-            if pseudomatch:                                # scan for tokens
+            if pseudomatch:  # scan for tokens
                 start, end = pseudomatch.span(1)
                 spos, epos, pos = (lnum, start), (lnum, end), end
                 token, initial = line[start:end], line[start]
 
-                if initial in numchars or \
-                   (initial == '.' and token != '.'):      # ordinary number
+                if initial in numchars or (
+                    initial == "." and token != "."
+                ):  # ordinary number
                     yield (NUMBER, token, spos, epos, line)
-                elif initial in '\r\n':
+                elif initial in "\r\n":
                     newline = NEWLINE
                     if parenlev > 0:
                         newline = NL
@@ -471,7 +535,7 @@ def generate_tokens(readline, grammar=None):
                         stashed = None
                     yield (newline, token, spos, epos, line)
 
-                elif initial == '#':
+                elif initial == "#":
                     assert not token.endswith("\n")
                     if stashed:
                         yield stashed
@@ -480,7 +544,7 @@ def generate_tokens(readline, grammar=None):
                 elif token in triple_quoted:
                     endprog = endprogs[token]
                     endmatch = endprog.match(line, pos)
-                    if endmatch:                           # all on one line
+                    if endmatch:  # all on one line
                         pos = endmatch.end(0)
                         token = line[start:pos]
                         if stashed:
@@ -488,49 +552,61 @@ def generate_tokens(readline, grammar=None):
                             stashed = None
                         yield (STRING, token, spos, (lnum, pos), line)
                     else:
-                        strstart = (lnum, start)           # multiple lines
+                        strstart = (lnum, start)  # multiple lines
                         contstr = line[start:]
                         contline = line
                         break
-                elif initial in single_quoted or \
-                    token[:2] in single_quoted or \
-                    token[:3] in single_quoted:
-                    if token[-1] == '\n':                  # continued string
+                elif (
+                    initial in single_quoted
+                    or token[:2] in single_quoted
+                    or token[:3] in single_quoted
+                ):
+                    if token[-1] == "\n":  # continued string
                         strstart = (lnum, start)
-                        endprog = (endprogs[initial] or endprogs[token[1]] or
-                                   endprogs[token[2]])
+                        endprog = (
+                            endprogs[initial]
+                            or endprogs[token[1]]
+                            or endprogs[token[2]]
+                        )
                         contstr, needcont = line[start:], 1
                         contline = line
                         break
-                    else:                                  # ordinary string
+                    else:  # ordinary string
                         if stashed:
                             yield stashed
                             stashed = None
                         yield (STRING, token, spos, epos, line)
-                elif initial.isidentifier():               # ordinary name
-                    if token in ('async', 'await'):
+                elif initial.isidentifier():  # ordinary name
+                    if token in ("async", "await"):
                         if async_keywords or async_def:
-                            yield (ASYNC if token == 'async' else AWAIT,
-                                   token, spos, epos, line)
+                            yield (
+                                ASYNC if token == "async" else AWAIT,
+                                token,
+                                spos,
+                                epos,
+                                line,
+                            )
                             continue
 
                     tok = (NAME, token, spos, epos, line)
-                    if token == 'async' and not stashed:
+                    if token == "async" and not stashed:
                         stashed = tok
                         continue
 
-                    if token in ('def', 'for'):
-                        if (stashed
-                                and stashed[0] == NAME
-                                and stashed[1] == 'async'):
+                    if token in ("def", "for"):
+                        if stashed and stashed[0] == NAME and stashed[1] == "async":
 
-                            if token == 'def':
+                            if token == "def":
                                 async_def = True
                                 async_def_indent = indents[-1]
 
-                            yield (ASYNC, stashed[1],
-                                   stashed[2], stashed[3],
-                                   stashed[4])
+                            yield (
+                                ASYNC,
+                                stashed[1],
+                                stashed[2],
+                                stashed[3],
+                                stashed[4],
+                            )
                             stashed = None
 
                     if stashed:
@@ -538,7 +614,7 @@ def generate_tokens(readline, grammar=None):
                         stashed = None
 
                     yield tok
-                elif initial == '\\':                      # continued stmt
+                elif initial == "\\":  # continued stmt
                     # This yield is new; needed for better idempotency:
                     if stashed:
                         yield stashed
@@ -546,26 +622,31 @@ def generate_tokens(readline, grammar=None):
                     yield (NL, token, spos, (lnum, pos), line)
                     continued = 1
                 else:
-                    if initial in '([{': parenlev = parenlev + 1
-                    elif initial in ')]}': parenlev = parenlev - 1
+                    if initial in "([{":
+                        parenlev = parenlev + 1
+                    elif initial in ")]}":
+                        parenlev = parenlev - 1
                     if stashed:
                         yield stashed
                         stashed = None
                     yield (OP, token, spos, epos, line)
             else:
-                yield (ERRORTOKEN, line[pos],
-                           (lnum, pos), (lnum, pos+1), line)
+                yield (ERRORTOKEN, line[pos], (lnum, pos), (lnum, pos + 1), line)
                 pos = pos + 1
 
     if stashed:
         yield stashed
         stashed = None
 
-    for indent in indents[1:]:                 # pop remaining indent levels
-        yield (DEDENT, '', (lnum, 0), (lnum, 0), '')
-    yield (ENDMARKER, '', (lnum, 0), (lnum, 0), '')
+    for indent in indents[1:]:  # pop remaining indent levels
+        yield (DEDENT, "", (lnum, 0), (lnum, 0), "")
+    yield (ENDMARKER, "", (lnum, 0), (lnum, 0), "")
 
-if __name__ == '__main__':                     # testing
+
+if __name__ == "__main__":  # testing
     import sys
-    if len(sys.argv) > 1: tokenize(open(sys.argv[1]).readline)
-    else: tokenize(sys.stdin.readline)
+
+    if len(sys.argv) > 1:
+        tokenize(open(sys.argv[1]).readline)
+    else:
+        tokenize(sys.stdin.readline)
index 0c916a933b8332e55c55234d336f5c68da545591..7614af73e1c4e90b051bfaeab670c36aaf38bb8a 100644 (file)
@@ -12,12 +12,10 @@ from .pgen2 import driver
 
 # The grammar file
 _GRAMMAR_FILE = os.path.join(os.path.dirname(__file__), "Grammar.txt")
-_PATTERN_GRAMMAR_FILE = os.path.join(os.path.dirname(__file__),
-                                     "PatternGrammar.txt")
+_PATTERN_GRAMMAR_FILE = os.path.join(os.path.dirname(__file__), "PatternGrammar.txt")
 
 
 class Symbols(object):
-
     def __init__(self, grammar):
         """Initializer.
 
@@ -38,8 +36,7 @@ def initialize(cache_dir=None):
     global pattern_symbols
 
     # Python 2
-    python_grammar = driver.load_packaged_grammar("blib2to3", _GRAMMAR_FILE,
-                                                  cache_dir)
+    python_grammar = driver.load_packaged_grammar("blib2to3", _GRAMMAR_FILE, cache_dir)
 
     python_symbols = Symbols(python_grammar)
 
@@ -56,8 +53,11 @@ def initialize(cache_dir=None):
     python_grammar_no_print_statement_no_exec_statement_async_keywords = (
         python_grammar_no_print_statement_no_exec_statement.copy()
     )
-    python_grammar_no_print_statement_no_exec_statement_async_keywords.async_keywords = True
+    python_grammar_no_print_statement_no_exec_statement_async_keywords.async_keywords = (
+        True
+    )
 
-    pattern_grammar = driver.load_packaged_grammar("blib2to3", _PATTERN_GRAMMAR_FILE,
-                                                   cache_dir)
+    pattern_grammar = driver.load_packaged_grammar(
+        "blib2to3", _PATTERN_GRAMMAR_FILE, cache_dir
+    )
     pattern_symbols = Symbols(pattern_grammar)
index 4da5700aa323d6890552a845f5515d1212def5c4..6776491cfbf26aa69bdb46977cfeefdff91f4221 100644 (file)
@@ -18,16 +18,21 @@ from io import StringIO
 HUGE = 0x7FFFFFFF  # maximum repeat count, default max
 
 _type_reprs = {}
+
+
 def type_repr(type_num):
     global _type_reprs
     if not _type_reprs:
         from .pygram import python_symbols
+
         # printing tokens is possible but not as useful
         # from .pgen2 import token // token.__dict__.items():
         for name, val in python_symbols.__dict__.items():
-            if type(val) == int: _type_reprs[val] = name
+            if type(val) == int:
+                _type_reprs[val] = name
     return _type_reprs.setdefault(type_num, type_num)
 
+
 class Base(object):
 
     """
@@ -40,7 +45,7 @@ class Base(object):
     """
 
     # Default values for instance variables
-    type = None    # int: token number (< 256) or symbol number (>= 256)
+    type = None  # int: token number (< 256) or symbol number (>= 256)
     parent = None  # Parent node pointer, or None
     children = ()  # Tuple of subnodes
     was_changed = False
@@ -61,7 +66,7 @@ class Base(object):
             return NotImplemented
         return self._eq(other)
 
-    __hash__ = None # For Py3 compatibility.
+    __hash__ = None  # For Py3 compatibility.
 
     def _eq(self, other):
         """
@@ -198,17 +203,16 @@ class Base(object):
         return next_sib.prefix
 
     if sys.version_info < (3, 0):
+
         def __str__(self):
             return str(self).encode("ascii")
 
+
 class Node(Base):
 
     """Concrete implementation for interior nodes."""
 
-    def __init__(self,type, children,
-                 context=None,
-                 prefix=None,
-                 fixers_applied=None):
+    def __init__(self, type, children, context=None, prefix=None, fixers_applied=None):
         """
         Initializer.
 
@@ -233,9 +237,11 @@ class Node(Base):
 
     def __repr__(self):
         """Return a canonical string representation."""
-        return "%s(%s, %r)" % (self.__class__.__name__,
-                               type_repr(self.type),
-                               self.children)
+        return "%s(%s, %r)" % (
+            self.__class__.__name__,
+            type_repr(self.type),
+            self.children,
+        )
 
     def __unicode__(self):
         """
@@ -254,8 +260,11 @@ class Node(Base):
 
     def clone(self):
         """Return a cloned (deep) copy of self."""
-        return Node(self.type, [ch.clone() for ch in self.children],
-                    fixers_applied=self.fixers_applied)
+        return Node(
+            self.type,
+            [ch.clone() for ch in self.children],
+            fixers_applied=self.fixers_applied,
+        )
 
     def post_order(self):
         """Return a post-order iterator for the tree."""
@@ -328,19 +337,17 @@ class Node(Base):
             previous = current
         _next[id(current)] = None
 
+
 class Leaf(Base):
 
     """Concrete implementation for leaf nodes."""
 
     # Default values for instance variables
     _prefix = ""  # Whitespace and comments preceding this token in the input
-    lineno = 0    # Line where this token starts in the input
-    column = 0    # Column where this token starts in the input
+    lineno = 0  # Line where this token starts in the input
+    column = 0  # Column where this token starts in the input
 
-    def __init__(self, type, value,
-                 context=None,
-                 prefix=None,
-                 fixers_applied=[]):
+    def __init__(self, type, value, context=None, prefix=None, fixers_applied=[]):
         """
         Initializer.
 
@@ -359,9 +366,12 @@ class Leaf(Base):
     def __repr__(self):
         """Return a canonical string representation."""
         from .pgen2.token import tok_name
-        return "%s(%s, %r)" % (self.__class__.__name__,
-                               tok_name.get(self.type, self.type),
-                               self.value)
+
+        return "%s(%s, %r)" % (
+            self.__class__.__name__,
+            tok_name.get(self.type, self.type),
+            self.value,
+        )
 
     def __unicode__(self):
         """
@@ -380,9 +390,12 @@ class Leaf(Base):
 
     def clone(self):
         """Return a cloned (deep) copy of self."""
-        return Leaf(self.type, self.value,
-                    (self.prefix, (self.lineno, self.column)),
-                    fixers_applied=self.fixers_applied)
+        return Leaf(
+            self.type,
+            self.value,
+            (self.prefix, (self.lineno, self.column)),
+            fixers_applied=self.fixers_applied,
+        )
 
     def leaves(self):
         yield self
@@ -407,6 +420,7 @@ class Leaf(Base):
         self.changed()
         self._prefix = prefix
 
+
 def convert(gr, raw_node):
     """
     Convert raw node information to a Node or Leaf instance.
@@ -443,9 +457,9 @@ class BasePattern(object):
     """
 
     # Defaults for instance variables
-    type = None     # Node type (token if < 256, symbol if >= 256)
+    type = None  # Node type (token if < 256, symbol if >= 256)
     content = None  # Optional content matching pattern
-    name = None     # Optional name used to store match in results dict
+    name = None  # Optional name used to store match in results dict
 
     def __new__(cls, *args, **kwds):
         """Constructor that prevents BasePattern from being instantiated."""
@@ -513,7 +527,6 @@ class BasePattern(object):
 
 
 class LeafPattern(BasePattern):
-
     def __init__(self, type=None, content=None, name=None):
         """
         Initializer.  Takes optional type, content, and name.
@@ -660,7 +673,7 @@ class WildcardPattern(BasePattern):
             # Check sanity of alternatives
             assert len(content), repr(content)  # Can't have zero alternatives
             for alt in content:
-                assert len(alt), repr(alt) # Can have empty alternatives
+                assert len(alt), repr(alt)  # Can have empty alternatives
         self.content = content
         self.min = min
         self.max = max
@@ -669,20 +682,29 @@ class WildcardPattern(BasePattern):
     def optimize(self):
         """Optimize certain stacked wildcard patterns."""
         subpattern = None
-        if (self.content is not None and
-            len(self.content) == 1 and len(self.content[0]) == 1):
+        if (
+            self.content is not None
+            and len(self.content) == 1
+            and len(self.content[0]) == 1
+        ):
             subpattern = self.content[0][0]
         if self.min == 1 and self.max == 1:
             if self.content is None:
                 return NodePattern(name=self.name)
-            if subpattern is not None and  self.name == subpattern.name:
+            if subpattern is not None and self.name == subpattern.name:
                 return subpattern.optimize()
-        if (self.min <= 1 and isinstance(subpattern, WildcardPattern) and
-            subpattern.min <= 1 and self.name == subpattern.name):
-            return WildcardPattern(subpattern.content,
-                                   self.min*subpattern.min,
-                                   self.max*subpattern.max,
-                                   subpattern.name)
+        if (
+            self.min <= 1
+            and isinstance(subpattern, WildcardPattern)
+            and subpattern.min <= 1
+            and self.name == subpattern.name
+        ):
+            return WildcardPattern(
+                subpattern.content,
+                self.min * subpattern.min,
+                self.max * subpattern.max,
+                subpattern.name,
+            )
         return self
 
     def match(self, node, results=None):
@@ -798,7 +820,7 @@ class WildcardPattern(BasePattern):
         if count < self.max:
             for alt in self.content:
                 for c0, r0 in generate_matches(alt, nodes):
-                    for c1, r1 in self._recursive_matches(nodes[c0:], count+1):
+                    for c1, r1 in self._recursive_matches(nodes[c0:], count + 1):
                         r = {}
                         r.update(r0)
                         r.update(r1)
@@ -806,7 +828,6 @@ class WildcardPattern(BasePattern):
 
 
 class NegatedPattern(BasePattern):
-
     def __init__(self, content=None):
         """
         Initializer.
index 10efbcf214f7eff96921a48ae74662c89c5eec72..107f77d368e3f92b15c7479612f084d331117c82 100644 (file)
@@ -158,6 +158,16 @@ class BlackTestCase(unittest.TestCase):
         result = runner.invoke(black.main, args)
         self.assertEqual(result.exit_code, exit_code, msg=runner.stderr_bytes.decode())
 
+    @patch("black.dump_to_file", dump_to_stderr)
+    def checkSourceFile(self, name: str) -> None:
+        path = THIS_DIR.parent / name
+        source, expected = read_data(str(path), data=False)
+        actual = fs(source)
+        self.assertFormatEqual(expected, actual)
+        black.assert_equivalent(source, actual)
+        black.assert_stable(source, actual, black.FileMode())
+        self.assertFalse(ff(path))
+
     @patch("black.dump_to_file", dump_to_stderr)
     def test_empty(self) -> None:
         source = expected = ""
@@ -177,23 +187,44 @@ class BlackTestCase(unittest.TestCase):
             os.unlink(tmp_file)
         self.assertFormatEqual(expected, actual)
 
-    @patch("black.dump_to_file", dump_to_stderr)
     def test_self(self) -> None:
-        source, expected = read_data("test_black", data=False)
-        actual = fs(source)
-        self.assertFormatEqual(expected, actual)
-        black.assert_equivalent(source, actual)
-        black.assert_stable(source, actual, black.FileMode())
-        self.assertFalse(ff(THIS_FILE))
+        self.checkSourceFile("tests/test_black.py")
 
-    @patch("black.dump_to_file", dump_to_stderr)
     def test_black(self) -> None:
-        source, expected = read_data("../black", data=False)
-        actual = fs(source)
-        self.assertFormatEqual(expected, actual)
-        black.assert_equivalent(source, actual)
-        black.assert_stable(source, actual, black.FileMode())
-        self.assertFalse(ff(THIS_DIR / ".." / "black.py"))
+        self.checkSourceFile("black.py")
+
+    def test_pygram(self) -> None:
+        self.checkSourceFile("blib2to3/pygram.py")
+
+    def test_pytree(self) -> None:
+        self.checkSourceFile("blib2to3/pytree.py")
+
+    def test_conv(self) -> None:
+        self.checkSourceFile("blib2to3/pgen2/conv.py")
+
+    def test_driver(self) -> None:
+        self.checkSourceFile("blib2to3/pgen2/driver.py")
+
+    def test_grammar(self) -> None:
+        self.checkSourceFile("blib2to3/pgen2/grammar.py")
+
+    def test_literals(self) -> None:
+        self.checkSourceFile("blib2to3/pgen2/literals.py")
+
+    def test_parse(self) -> None:
+        self.checkSourceFile("blib2to3/pgen2/parse.py")
+
+    def test_pgen(self) -> None:
+        self.checkSourceFile("blib2to3/pgen2/pgen.py")
+
+    def test_tokenize(self) -> None:
+        self.checkSourceFile("blib2to3/pgen2/tokenize.py")
+
+    def test_token(self) -> None:
+        self.checkSourceFile("blib2to3/pgen2/token.py")
+
+    def test_setup(self) -> None:
+        self.checkSourceFile("setup.py")
 
     def test_piping(self) -> None:
         source, expected = read_data("../black", data=False)
@@ -230,15 +261,6 @@ class BlackTestCase(unittest.TestCase):
         actual = actual.rstrip() + "\n"  # the diff output has a trailing space
         self.assertEqual(expected, actual)
 
-    @patch("black.dump_to_file", dump_to_stderr)
-    def test_setup(self) -> None:
-        source, expected = read_data("../setup", data=False)
-        actual = fs(source)
-        self.assertFormatEqual(expected, actual)
-        black.assert_equivalent(source, actual)
-        black.assert_stable(source, actual, black.FileMode())
-        self.assertFalse(ff(THIS_DIR / ".." / "setup.py"))
-
     @patch("black.dump_to_file", dump_to_stderr)
     def test_function(self) -> None:
         source, expected = read_data("function")