]>
git.madduck.net Git - etc/vim.git/blobdiff - blib2to3/pgen2/tokenize.py
madduck's git repository
Every one of the projects in this repository is available at the canonical
URL git://git.madduck.net/madduck/pub/<projectpath> — see
each project's metadata for the exact URL.
All patches and comments are welcome. Please squash your changes to logical
commits before using git-format-patch and git-send-email to
patches@ git. madduck. net .
If you'd read over the Git project's submission guidelines and adhered to them,
I'd be especially grateful.
SSH access, as well as push access can be individually
arranged .
If you use my repositories frequently, consider adding the following
snippet to ~/.gitconfig and using the third clone URL listed for each
project:
[url "git://git.madduck.net/madduck/"]
insteadOf = madduck:
__credits__ = \
'GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, Skip Montanaro'
__credits__ = \
'GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, Skip Montanaro'
-import string, re, unicodedata
from codecs import BOM_UTF8, lookup
from blib2to3.pgen2.token import *
from codecs import BOM_UTF8, lookup
from blib2to3.pgen2.token import *
Whitespace = r'[ \f\t]*'
Comment = r'#[^\r\n]*'
Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment)
Whitespace = r'[ \f\t]*'
Comment = r'#[^\r\n]*'
Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment)
+Name = r'\w+' # this is invalid but it's fine because Name comes after Number in all groups
Binnumber = r'0[bB]_?[01]+(?:_[01]+)*'
Hexnumber = r'0[xX]_?[\da-fA-F]+(?:_[\da-fA-F]+)*[lL]?'
Binnumber = r'0[bB]_?[01]+(?:_[01]+)*'
Hexnumber = r'0[xX]_?[\da-fA-F]+(?:_[\da-fA-F]+)*[lL]?'
ut = Untokenizer()
return ut.untokenize(iterable)
ut = Untokenizer()
return ut.untokenize(iterable)
-InitialCategories = {'Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl', 'Mn', 'Mc', 'Nd', 'Pc'}
-
-def generate_tokens(readline):
+def generate_tokens(readline, grammar=None):
"""
The generate_tokens() generator requires one argument, readline, which
must be a callable object which provides the same interface as the
"""
The generate_tokens() generator requires one argument, readline, which
must be a callable object which provides the same interface as the
logical line; continuation lines are included.
"""
lnum = parenlev = continued = 0
logical line; continuation lines are included.
"""
lnum = parenlev = continued = 0
- namechars, numchars = string.ascii_letters + '_', '0123456789'
+ numchars = '0123456789'
contstr, needcont = '', 0
contline = None
indents = [0]
contstr, needcont = '', 0
contline = None
indents = [0]
+ # If we know we're parsing 3.7+, we can unconditionally parse `async` and
+ # `await` as keywords.
+ async_keywords = False if grammar is None else grammar.async_keywords
# 'stashed' and 'async_*' are used for async/await parsing
stashed = None
async_def = False
# 'stashed' and 'async_*' are used for async/await parsing
stashed = None
async_def = False
yield (NL, line[pos:], (lnum, pos), (lnum, len(line)), line)
continue
yield (NL, line[pos:], (lnum, pos), (lnum, len(line)), line)
continue
- if column > indents[-1]: # count indents
- indents.append(column)
- yield (INDENT, line[:pos], (lnum, 0), (lnum, pos), line)
-
if line[pos] == '#': # skip comments
comment_token = line[pos:].rstrip('\r\n')
nl_pos = pos + len(comment_token)
if line[pos] == '#': # skip comments
comment_token = line[pos:].rstrip('\r\n')
nl_pos = pos + len(comment_token)
(lnum, nl_pos), (lnum, len(line)), line)
continue
(lnum, nl_pos), (lnum, len(line)), line)
continue
+ if column > indents[-1]: # count indents
+ indents.append(column)
+ yield (INDENT, line[:pos], (lnum, 0), (lnum, pos), line)
+
while column < indents[-1]: # count dedents
if column not in indents:
raise IndentationError(
while column < indents[-1]: # count dedents
if column not in indents:
raise IndentationError(
while pos < max:
pseudomatch = pseudoprog.match(line, pos)
while pos < max:
pseudomatch = pseudoprog.match(line, pos)
- if not pseudomatch:
- print('no pseudomatch')
if pseudomatch: # scan for tokens
start, end = pseudomatch.span(1)
spos, epos, pos = (lnum, start), (lnum, end), end
if pseudomatch: # scan for tokens
start, end = pseudomatch.span(1)
spos, epos, pos = (lnum, start), (lnum, end), end
yield stashed
stashed = None
yield (STRING, token, spos, epos, line)
yield stashed
stashed = None
yield (STRING, token, spos, epos, line)
- elif (initial in namechars or # ordinary name
- unicodedata.category(initial) in InitialCategories):
+ elif initial.isidentifier(): # ordinary name
if token in ('async', 'await'):
if token in ('async', 'await'):
+ if async_keywords or async_ def:
yield (ASYNC if token == 'async' else AWAIT,
token, spos, epos, line)
continue
yield (ASYNC if token == 'async' else AWAIT,
token, spos, epos, line)
continue
+ if token in ('def', 'for') :
if (stashed
and stashed[0] == NAME
and stashed[1] == 'async'):
if (stashed
and stashed[0] == NAME
and stashed[1] == 'async'):
- async_def = True
- async_def_indent = indents[-1]
+ if token == 'def':
+ async_def = True
+ async_def_indent = indents[-1]
yield (ASYNC, stashed[1],
stashed[2], stashed[3],
yield (ASYNC, stashed[1],
stashed[2], stashed[3],