X-Git-Url: https://git.madduck.net/etc/vim.git/blobdiff_plain/ecdbf085a772e8d737b8a8735d39a7af413cecfb..3bfb66971f03da39ae1f4c98c30d55e60f63d33b:/blib2to3/pgen2/tokenize.py?ds=inline

diff --git a/blib2to3/pgen2/tokenize.py b/blib2to3/pgen2/tokenize.py
index 4f03130..9775489 100644
--- a/blib2to3/pgen2/tokenize.py
+++ b/blib2to3/pgen2/tokenize.py
@@ -29,7 +29,7 @@ __author__ = 'Ka-Ping Yee <ping@lfw.org>'
 __credits__ = \
     'GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, Skip Montanaro'
 
-import string, re, unicodedata
+import regex as re
 from codecs import BOM_UTF8, lookup
 from blib2to3.pgen2.token import *
 
@@ -56,7 +56,7 @@ def _combinations(*l):
 Whitespace = r'[ \f\t]*'
 Comment = r'#[^\r\n]*'
 Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment)
-Name = r'[^\d\W]\w*'
+Name = r'\w+'  # this is invalid but it's fine because Name comes after Number in all groups
 
 Binnumber = r'0[bB]_?[01]+(?:_[01]+)*'
 Hexnumber = r'0[xX]_?[\da-fA-F]+(?:_[\da-fA-F]+)*[lL]?'
@@ -89,7 +89,7 @@ String = group(_litprefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*'",
 # recognized as two instances of =).
 Operator = group(r"\*\*=?", r">>=?", r"<<=?", r"<>", r"!=",
                  r"//=?", r"->",
-                 r"[+\-*/%&@|^=<>]=?",
+                 r"[+\-*/%&@|^=<>:]=?",
                  r"~")
 
 Bracket = '[][(){}]'
@@ -334,9 +334,7 @@ def untokenize(iterable):
     ut = Untokenizer()
     return ut.untokenize(iterable)
 
-InitialCategories = {'Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl', 'Mn', 'Mc', 'Nd', 'Pc'}
-
-def generate_tokens(readline):
+def generate_tokens(readline, grammar=None):
     """
     The generate_tokens() generator requires one argument, readline, which
     must be a callable object which provides the same interface as the
@@ -353,11 +351,14 @@ def generate_tokens(readline):
     logical line; continuation lines are included.
     """
     lnum = parenlev = continued = 0
-    namechars, numchars = string.ascii_letters + '_', '0123456789'
+    numchars = '0123456789'
     contstr, needcont = '', 0
     contline = None
     indents = [0]
 
+    # If we know we're parsing 3.7+, we can unconditionally parse `async` and
+    # `await` as keywords.
+    async_keywords = False if grammar is None else grammar.async_keywords
     # 'stashed' and 'async_*' are used for async/await parsing
     stashed = None
     async_def = False
@@ -412,10 +413,6 @@ def generate_tokens(readline):
                 yield (NL, line[pos:], (lnum, pos), (lnum, len(line)), line)
                 continue
 
-            if column > indents[-1]:           # count indents
-                indents.append(column)
-                yield (INDENT, line[:pos], (lnum, 0), (lnum, pos), line)
-
             if line[pos] == '#':               # skip comments
                 comment_token = line[pos:].rstrip('\r\n')
                 nl_pos = pos + len(comment_token)
@@ -425,6 +422,10 @@ def generate_tokens(readline):
                         (lnum, nl_pos), (lnum, len(line)), line)
                 continue
 
+            if column > indents[-1]:           # count indents
+                indents.append(column)
+                yield (INDENT, line[:pos], (lnum, 0), (lnum, pos), line)
+
             while column < indents[-1]:        # count dedents
                 if column not in indents:
                     raise IndentationError(
@@ -451,8 +452,6 @@ def generate_tokens(readline):
 
         while pos < max:
             pseudomatch = pseudoprog.match(line, pos)
-            if not pseudomatch:
-                print('no pseudomatch')
             if pseudomatch:                                # scan for tokens
                 start, end = pseudomatch.span(1)
                 spos, epos, pos = (lnum, start), (lnum, end), end
@@ -508,10 +507,9 @@ def generate_tokens(readline):
                             yield stashed
                             stashed = None
                         yield (STRING, token, spos, epos, line)
-                elif (initial in namechars or              # ordinary name
-                      unicodedata.category(initial) in InitialCategories):
+                elif initial.isidentifier():               # ordinary name
                     if token in ('async', 'await'):
-                        if async_def:
+                        if async_keywords or async_def:
                             yield (ASYNC if token == 'async' else AWAIT,
                                    token, spos, epos, line)
                             continue
@@ -521,13 +519,14 @@ def generate_tokens(readline):
                         stashed = tok
                         continue
 
-                    if token == 'def':
+                    if token in ('def', 'for'):
                         if (stashed
                                 and stashed[0] == NAME
                                 and stashed[1] == 'async'):
 
-                            async_def = True
-                            async_def_indent = indents[-1]
+                            if token == 'def':
+                                async_def = True
+                                async_def_indent = indents[-1]
 
                             yield (ASYNC, stashed[1],
                                    stashed[2], stashed[3],