]>
git.madduck.net Git - etc/vim.git/blobdiff - src/blib2to3/pgen2/tokenize.py
madduck's git repository
Every one of the projects in this repository is available at the canonical
URL git://git.madduck.net/madduck/pub/<projectpath> — see
each project's metadata for the exact URL.
All patches and comments are welcome. Please squash your changes to logical
commits before using git-format-patch and git-send-email to
patches@ git. madduck. net .
If you'd read over the Git project's submission guidelines and adhered to them,
I'd be especially grateful.
SSH access, as well as push access can be individually
arranged .
If you use my repositories frequently, consider adding the following
snippet to ~/.gitconfig and using the third clone URL listed for each
project:
[url "git://git.madduck.net/madduck/"]
insteadOf = madduck:
import sys
from typing import (
Callable,
import sys
from typing import (
Callable,
Iterable,
Iterator,
List,
Optional,
Iterable,
Iterator,
List,
Optional,
-if sys.version_info >= (3, 8):
- from typing import Final
-else:
- from typing_extensions import Final
-
-from blib2to3.pgen2.token import *
from blib2to3.pgen2.grammar import Grammar
from blib2to3.pgen2.grammar import Grammar
+from blib2to3.pgen2.token import (
+ ASYNC,
+ AWAIT,
+ COMMENT,
+ DEDENT,
+ ENDMARKER,
+ ERRORTOKEN,
+ INDENT,
+ NAME,
+ NEWLINE,
+ NL,
+ NUMBER,
+ OP,
+ STRING,
+ tok_name,
+)
__author__ = "Ka-Ping Yee <ping@lfw.org>"
__credits__ = "GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, Skip Montanaro"
import re
from codecs import BOM_UTF8, lookup
__author__ = "Ka-Ping Yee <ping@lfw.org>"
__credits__ = "GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, Skip Montanaro"
import re
from codecs import BOM_UTF8, lookup
-from blib2to3.pgen2.token import *
+def group(*choices: str) -> str :
return "(" + "|".join(choices) + ")"
return "(" + "|".join(choices) + ")"
+def any(*choices: str) -> str :
return group(*choices) + "*"
return group(*choices) + "*"
+def maybe(*choices: str) -> str :
return group(*choices) + "?"
return group(*choices) + "?"
-def _combinations(*l) :
- return set(x + y for x in l for y in l + ("",) if x.casefold() != y.casefold())
+def _combinations(*l: str) -> Set[str] :
+ return {x + y for x in l for y in l + ("",) if x.casefold() != y.casefold()}
-def printtoken(type, token, xxx_todo_changeme, xxx_todo_changeme1, line): # for testing
- (srow, scol) = xxx_todo_changeme
- (erow, ecol) = xxx_todo_changeme1
+Coord = Tuple[int, int]
+
+
+def printtoken(
+ type: int, token: str, srow_col: Coord, erow_col: Coord, line: str
+) -> None: # for testing
+ (srow, scol) = srow_col
+ (erow, ecol) = erow_col
print(
"%d,%d-%d,%d:\t%s\t%s" % (srow, scol, erow, ecol, tok_name[type], repr(token))
)
print(
"%d,%d-%d,%d:\t%s\t%s" % (srow, scol, erow, ecol, tok_name[type], repr(token))
)
-Coord = Tuple[int, int]
-TokenEater = Callable[[int, Text, Coord, Coord, Text], None]
+TokenEater = Callable[[int, str, Coord, Coord, str], None]
-def tokenize(readline: Callable[[], Text ], tokeneater: TokenEater = printtoken) -> None:
+def tokenize(readline: Callable[[], str ], tokeneater: TokenEater = printtoken) -> None:
"""
The tokenize() function accepts two parameters: one representing the
input stream, and one providing an output mechanism for tokenize().
"""
The tokenize() function accepts two parameters: one representing the
input stream, and one providing an output mechanism for tokenize().
# backwards compatible interface
# backwards compatible interface
-def tokenize_loop(readline, tokeneater) :
+def tokenize_loop(readline: Callable[[], str], tokeneater: TokenEater) -> None :
for token_info in generate_tokens(readline):
tokeneater(*token_info)
for token_info in generate_tokens(readline):
tokeneater(*token_info)
-GoodTokenInfo = Tuple[int, Text, Coord, Coord, Text ]
+GoodTokenInfo = Tuple[int, str, Coord, Coord, str ]
TokenInfo = Union[Tuple[int, str], GoodTokenInfo]
class Untokenizer:
TokenInfo = Union[Tuple[int, str], GoodTokenInfo]
class Untokenizer:
prev_row: int
prev_col: int
prev_row: int
prev_col: int
if col_offset:
self.tokens.append(" " * col_offset)
if col_offset:
self.tokens.append(" " * col_offset)
- def untokenize(self, iterable: Iterable[TokenInfo]) -> Text :
+ def untokenize(self, iterable: Iterable[TokenInfo]) -> str :
for t in iterable:
if len(t) == 2:
self.compat(cast(Tuple[int, str], t), iterable)
break
tok_type, token, start, end, line = cast(
for t in iterable:
if len(t) == 2:
self.compat(cast(Tuple[int, str], t), iterable)
break
tok_type, token, start, end, line = cast(
- Tuple[int, Text, Coord, Coord, Text ], t
+ Tuple[int, str, Coord, Coord, str ], t
)
self.add_whitespace(start)
self.tokens.append(token)
)
self.add_whitespace(start)
self.tokens.append(token)
self.prev_col = 0
return "".join(self.tokens)
self.prev_col = 0
return "".join(self.tokens)
- def compat(self, token: Tuple[int, Text ], iterable: Iterable[TokenInfo]) -> None:
+ def compat(self, token: Tuple[int, str ], iterable: Iterable[TokenInfo]) -> None:
startline = False
indents = []
toks_append = self.tokens.append
startline = False
indents = []
toks_append = self.tokens.append
try:
return readline()
except StopIteration:
try:
return readline()
except StopIteration:
def find_cookie(line: bytes) -> Optional[str]:
try:
def find_cookie(line: bytes) -> Optional[str]:
try:
return default, [first, second]
return default, [first, second]
-def untokenize(iterable: Iterable[TokenInfo]) -> Text :
+def untokenize(iterable: Iterable[TokenInfo]) -> str :
"""Transform tokens back into Python source code.
Each element returned by the iterable must be a token sequence
"""Transform tokens back into Python source code.
Each element returned by the iterable must be a token sequence
- readline: Callable[[], Text ], grammar: Optional[Grammar] = None
+ readline: Callable[[], str ], grammar: Optional[Grammar] = None
) -> Iterator[GoodTokenInfo]:
"""
The generate_tokens() generator requires one argument, readline, which
) -> Iterator[GoodTokenInfo]:
"""
The generate_tokens() generator requires one argument, readline, which
or endprogs.get(token[1])
or endprogs.get(token[2])
)
or endprogs.get(token[1])
or endprogs.get(token[2])
)
- assert maybe_endprog is not None, f"endprog not found for {token}"
+ assert (
+ maybe_endprog is not None
+ ), f"endprog not found for {token}"
endprog = maybe_endprog
contstr, needcont = line[start:], 1
contline = line
endprog = maybe_endprog
contstr, needcont = line[start:], 1
contline = line
if token in ("def", "for"):
if stashed and stashed[0] == NAME and stashed[1] == "async":
if token in ("def", "for"):
if stashed and stashed[0] == NAME and stashed[1] == "async":
if token == "def":
async_def = True
async_def_indent = indents[-1]
if token == "def":
async_def = True
async_def_indent = indents[-1]
yield stashed
stashed = None
yield stashed
stashed = None
- for indent in indents[1:]: # pop remaining indent levels
+ for _ indent in indents[1:]: # pop remaining indent levels
yield (DEDENT, "", (lnum, 0), (lnum, 0), "")
yield (ENDMARKER, "", (lnum, 0), (lnum, 0), "")
if __name__ == "__main__": # testing
yield (DEDENT, "", (lnum, 0), (lnum, 0), "")
yield (ENDMARKER, "", (lnum, 0), (lnum, 0), "")
if __name__ == "__main__": # testing
if len(sys.argv) > 1:
tokenize(open(sys.argv[1]).readline)
else:
if len(sys.argv) > 1:
tokenize(open(sys.argv[1]).readline)
else: