X-Git-Url: https://git.madduck.net/etc/vim.git/blobdiff_plain/b4dca26c7d93f930bbd5a7b552807370b60d4298..8a16b25fb1145e5b7de9c322e52167e8f6a59c79:/src/blib2to3/pgen2/tokenize.py diff --git a/src/blib2to3/pgen2/tokenize.py b/src/blib2to3/pgen2/tokenize.py index a5e8918..d0607f4 100644 --- a/src/blib2to3/pgen2/tokenize.py +++ b/src/blib2to3/pgen2/tokenize.py @@ -30,29 +30,41 @@ each time a new token is found.""" import sys from typing import ( Callable, + Final, Iterable, Iterator, List, Optional, + Pattern, Set, - Text, Tuple, - Pattern, Union, cast, ) -from typing import Final - -from blib2to3.pgen2.token import * from blib2to3.pgen2.grammar import Grammar +from blib2to3.pgen2.token import ( + ASYNC, + AWAIT, + COMMENT, + DEDENT, + ENDMARKER, + ERRORTOKEN, + INDENT, + NAME, + NEWLINE, + NL, + NUMBER, + OP, + STRING, + tok_name, +) __author__ = "Ka-Ping Yee " __credits__ = "GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, Skip Montanaro" import re from codecs import BOM_UTF8, lookup -from blib2to3.pgen2.token import * from . import token @@ -77,7 +89,7 @@ def maybe(*choices: str) -> str: def _combinations(*l: str) -> Set[str]: - return set(x + y for x in l for y in l + ("",) if x.casefold() != y.casefold()) + return {x + y for x in l for y in l + ("",) if x.casefold() != y.casefold()} Whitespace = r"[ \f\t]*" @@ -189,7 +201,7 @@ Coord = Tuple[int, int] def printtoken( - type: int, token: Text, srow_col: Coord, erow_col: Coord, line: Text + type: int, token: str, srow_col: Coord, erow_col: Coord, line: str ) -> None: # for testing (srow, scol) = srow_col (erow, ecol) = erow_col @@ -198,10 +210,10 @@ def printtoken( ) -TokenEater = Callable[[int, Text, Coord, Coord, Text], None] +TokenEater = Callable[[int, str, Coord, Coord, str], None] -def tokenize(readline: Callable[[], Text], tokeneater: TokenEater = printtoken) -> None: +def tokenize(readline: Callable[[], str], tokeneater: TokenEater = printtoken) -> None: """ The tokenize() function accepts two parameters: one representing the input stream, and one providing an output mechanism for tokenize(). @@ -221,17 +233,17 @@ def tokenize(readline: Callable[[], Text], tokeneater: TokenEater = printtoken) # backwards compatible interface -def tokenize_loop(readline: Callable[[], Text], tokeneater: TokenEater) -> None: +def tokenize_loop(readline: Callable[[], str], tokeneater: TokenEater) -> None: for token_info in generate_tokens(readline): tokeneater(*token_info) -GoodTokenInfo = Tuple[int, Text, Coord, Coord, Text] +GoodTokenInfo = Tuple[int, str, Coord, Coord, str] TokenInfo = Union[Tuple[int, str], GoodTokenInfo] class Untokenizer: - tokens: List[Text] + tokens: List[str] prev_row: int prev_col: int @@ -247,13 +259,13 @@ class Untokenizer: if col_offset: self.tokens.append(" " * col_offset) - def untokenize(self, iterable: Iterable[TokenInfo]) -> Text: + def untokenize(self, iterable: Iterable[TokenInfo]) -> str: for t in iterable: if len(t) == 2: self.compat(cast(Tuple[int, str], t), iterable) break tok_type, token, start, end, line = cast( - Tuple[int, Text, Coord, Coord, Text], t + Tuple[int, str, Coord, Coord, str], t ) self.add_whitespace(start) self.tokens.append(token) @@ -263,7 +275,7 @@ class Untokenizer: self.prev_col = 0 return "".join(self.tokens) - def compat(self, token: Tuple[int, Text], iterable: Iterable[TokenInfo]) -> None: + def compat(self, token: Tuple[int, str], iterable: Iterable[TokenInfo]) -> None: startline = False indents = [] toks_append = self.tokens.append @@ -335,7 +347,7 @@ def detect_encoding(readline: Callable[[], bytes]) -> Tuple[str, List[bytes]]: try: return readline() except StopIteration: - return bytes() + return b"" def find_cookie(line: bytes) -> Optional[str]: try: @@ -384,7 +396,7 @@ def detect_encoding(readline: Callable[[], bytes]) -> Tuple[str, List[bytes]]: return default, [first, second] -def untokenize(iterable: Iterable[TokenInfo]) -> Text: +def untokenize(iterable: Iterable[TokenInfo]) -> str: """Transform tokens back into Python source code. Each element returned by the iterable must be a token sequence @@ -407,7 +419,7 @@ def untokenize(iterable: Iterable[TokenInfo]) -> Text: def generate_tokens( - readline: Callable[[], Text], grammar: Optional[Grammar] = None + readline: Callable[[], str], grammar: Optional[Grammar] = None ) -> Iterator[GoodTokenInfo]: """ The generate_tokens() generator requires one argument, readline, which @@ -677,14 +689,12 @@ def generate_tokens( yield stashed stashed = None - for indent in indents[1:]: # pop remaining indent levels + for _indent in indents[1:]: # pop remaining indent levels yield (DEDENT, "", (lnum, 0), (lnum, 0), "") yield (ENDMARKER, "", (lnum, 0), (lnum, 0), "") if __name__ == "__main__": # testing - import sys - if len(sys.argv) > 1: tokenize(open(sys.argv[1]).readline) else: