src/blib2to3/pgen2/tokenize.py

   1 # Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006 Python Software Foundation.
   2 # All rights reserved.
   3
   4 # mypy: allow-untyped-defs, allow-untyped-calls
   5
   6 """Tokenization help for Python programs.
   7
   8 generate_tokens(readline) is a generator that breaks a stream of
   9 text into Python tokens.  It accepts a readline-like method which is called
  10 repeatedly to get the next line of input (or "" for EOF).  It generates
  11 5-tuples with these members:
  12
  13     the token type (see token.py)
  14     the token (a string)
  15     the starting (row, column) indices of the token (a 2-tuple of ints)
  16     the ending (row, column) indices of the token (a 2-tuple of ints)
  17     the original line (string)
  18
  19 It is designed to match the working of the Python tokenizer exactly, except
  20 that it produces COMMENT tokens for comments and gives type OP for all
  21 operators
  22
  23 Older entry points
  24     tokenize_loop(readline, tokeneater)
  25     tokenize(readline, tokeneater=printtoken)
  26 are the same, except instead of generating tokens, tokeneater is a callback
  27 function to which the 5 fields described above are passed as 5 arguments,
  28 each time a new token is found."""
  29
  30 import sys
  31 from typing import (
  32     Callable,
  33     Iterable,
  34     Iterator,
  35     List,
  36     Optional,
  37     Set,
  38     Text,
  39     Tuple,
  40     Pattern,
  41     Union,
  42     cast,
  43 )
  44
  45 if sys.version_info >= (3, 8):
  46     from typing import Final
  47 else:
  48     from typing_extensions import Final
  49
  50 from blib2to3.pgen2.token import *
  51 from blib2to3.pgen2.grammar import Grammar
  52
  53 __author__ = "Ka-Ping Yee <ping@lfw.org>"
  54 __credits__ = "GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, Skip Montanaro"
  55
  56 import re
  57 from codecs import BOM_UTF8, lookup
  58 from blib2to3.pgen2.token import *
  59
  60 from . import token
  61
  62 __all__ = [x for x in dir(token) if x[0] != "_"] + [
  63     "tokenize",
  64     "generate_tokens",
  65     "untokenize",
  66 ]
  67 del token
  68
  69
  70 def group(*choices: str) -> str:
  71     return "(" + "|".join(choices) + ")"
  72
  73
  74 def any(*choices: str) -> str:
  75     return group(*choices) + "*"
  76
  77
  78 def maybe(*choices: str) -> str:
  79     return group(*choices) + "?"
  80
  81
  82 def _combinations(*l: str) -> Set[str]:
  83     return set(x + y for x in l for y in l + ("",) if x.casefold() != y.casefold())
  84
  85
  86 Whitespace = r"[ \f\t]*"
  87 Comment = r"#[^\r\n]*"
  88 Ignore = Whitespace + any(r"\\\r?\n" + Whitespace) + maybe(Comment)
  89 Name = (  # this is invalid but it's fine because Name comes after Number in all groups
  90     r"[^\s#\(\)\[\]\{\}+\-*/!@$%^&=|;:'\",\.<>/?`~\\]+"
  91 )
  92
  93 Binnumber = r"0[bB]_?[01]+(?:_[01]+)*"
  94 Hexnumber = r"0[xX]_?[\da-fA-F]+(?:_[\da-fA-F]+)*[lL]?"
  95 Octnumber = r"0[oO]?_?[0-7]+(?:_[0-7]+)*[lL]?"
  96 Decnumber = group(r"[1-9]\d*(?:_\d+)*[lL]?", "0[lL]?")
  97 Intnumber = group(Binnumber, Hexnumber, Octnumber, Decnumber)
  98 Exponent = r"[eE][-+]?\d+(?:_\d+)*"
  99 Pointfloat = group(r"\d+(?:_\d+)*\.(?:\d+(?:_\d+)*)?", r"\.\d+(?:_\d+)*") + maybe(
 100     Exponent
 101 )
 102 Expfloat = r"\d+(?:_\d+)*" + Exponent
 103 Floatnumber = group(Pointfloat, Expfloat)
 104 Imagnumber = group(r"\d+(?:_\d+)*[jJ]", Floatnumber + r"[jJ]")
 105 Number = group(Imagnumber, Floatnumber, Intnumber)
 106
 107 # Tail end of ' string.
 108 Single = r"[^'\\]*(?:\\.[^'\\]*)*'"
 109 # Tail end of " string.
 110 Double = r'[^"\\]*(?:\\.[^"\\]*)*"'
 111 # Tail end of ''' string.
 112 Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''"
 113 # Tail end of """ string.
 114 Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""'
 115 _litprefix = r"(?:[uUrRbBfF]|[rR][fFbB]|[fFbBuU][rR])?"
 116 Triple = group(_litprefix + "'''", _litprefix + '"""')
 117 # Single-line ' or " string.
 118 String = group(
 119     _litprefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*'",
 120     _litprefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*"',
 121 )
 122
 123 # Because of leftmost-then-longest match semantics, be sure to put the
 124 # longest operators first (e.g., if = came before ==, == would get
 125 # recognized as two instances of =).
 126 Operator = group(
 127     r"\*\*=?",
 128     r">>=?",
 129     r"<<=?",
 130     r"<>",
 131     r"!=",
 132     r"//=?",
 133     r"->",
 134     r"[+\-*/%&@|^=<>:]=?",
 135     r"~",
 136 )
 137
 138 Bracket = "[][(){}]"
 139 Special = group(r"\r?\n", r"[:;.,`@]")
 140 Funny = group(Operator, Bracket, Special)
 141
 142 # First (or only) line of ' or " string.
 143 ContStr = group(
 144     _litprefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*" + group("'", r"\\\r?\n"),
 145     _litprefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*' + group('"', r"\\\r?\n"),
 146 )
 147 PseudoExtras = group(r"\\\r?\n", Comment, Triple)
 148 PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name)
 149
 150 pseudoprog: Final = re.compile(PseudoToken, re.UNICODE)
 151 single3prog = re.compile(Single3)
 152 double3prog = re.compile(Double3)
 153
 154 _strprefixes = (
 155     _combinations("r", "R", "f", "F")
 156     | _combinations("r", "R", "b", "B")
 157     | {"u", "U", "ur", "uR", "Ur", "UR"}
 158 )
 159
 160 endprogs: Final = {
 161     "'": re.compile(Single),
 162     '"': re.compile(Double),
 163     "'''": single3prog,
 164     '"""': double3prog,
 165     **{f"{prefix}'''": single3prog for prefix in _strprefixes},
 166     **{f'{prefix}"""': double3prog for prefix in _strprefixes},
 167 }
 168
 169 triple_quoted: Final = (
 170     {"'''", '"""'}
 171     | {f"{prefix}'''" for prefix in _strprefixes}
 172     | {f'{prefix}"""' for prefix in _strprefixes}
 173 )
 174 single_quoted: Final = (
 175     {"'", '"'}
 176     | {f"{prefix}'" for prefix in _strprefixes}
 177     | {f'{prefix}"' for prefix in _strprefixes}
 178 )
 179
 180 tabsize = 8
 181
 182
 183 class TokenError(Exception):
 184     pass
 185
 186
 187 class StopTokenizing(Exception):
 188     pass
 189
 190
 191 Coord = Tuple[int, int]
 192
 193
 194 def printtoken(
 195     type: int, token: Text, srow_col: Coord, erow_col: Coord, line: Text
 196 ) -> None:  # for testing
 197     (srow, scol) = srow_col
 198     (erow, ecol) = erow_col
 199     print(
 200         "%d,%d-%d,%d:\t%s\t%s" % (srow, scol, erow, ecol, tok_name[type], repr(token))
 201     )
 202
 203
 204 TokenEater = Callable[[int, Text, Coord, Coord, Text], None]
 205
 206
 207 def tokenize(readline: Callable[[], Text], tokeneater: TokenEater = printtoken) -> None:
 208     """
 209     The tokenize() function accepts two parameters: one representing the
 210     input stream, and one providing an output mechanism for tokenize().
 211
 212     The first parameter, readline, must be a callable object which provides
 213     the same interface as the readline() method of built-in file objects.
 214     Each call to the function should return one line of input as a string.
 215
 216     The second parameter, tokeneater, must also be a callable object. It is
 217     called once for each token, with five arguments, corresponding to the
 218     tuples generated by generate_tokens().
 219     """
 220     try:
 221         tokenize_loop(readline, tokeneater)
 222     except StopTokenizing:
 223         pass
 224
 225
 226 # backwards compatible interface
 227 def tokenize_loop(readline: Callable[[], Text], tokeneater: TokenEater) -> None:
 228     for token_info in generate_tokens(readline):
 229         tokeneater(*token_info)
 230
 231
 232 GoodTokenInfo = Tuple[int, Text, Coord, Coord, Text]
 233 TokenInfo = Union[Tuple[int, str], GoodTokenInfo]
 234
 235
 236 class Untokenizer:
 237     tokens: List[Text]
 238     prev_row: int
 239     prev_col: int
 240
 241     def __init__(self) -> None:
 242         self.tokens = []
 243         self.prev_row = 1
 244         self.prev_col = 0
 245
 246     def add_whitespace(self, start: Coord) -> None:
 247         row, col = start
 248         assert row <= self.prev_row
 249         col_offset = col - self.prev_col
 250         if col_offset:
 251             self.tokens.append(" " * col_offset)
 252
 253     def untokenize(self, iterable: Iterable[TokenInfo]) -> Text:
 254         for t in iterable:
 255             if len(t) == 2:
 256                 self.compat(cast(Tuple[int, str], t), iterable)
 257                 break
 258             tok_type, token, start, end, line = cast(
 259                 Tuple[int, Text, Coord, Coord, Text], t
 260             )
 261             self.add_whitespace(start)
 262             self.tokens.append(token)
 263             self.prev_row, self.prev_col = end
 264             if tok_type in (NEWLINE, NL):
 265                 self.prev_row += 1
 266                 self.prev_col = 0
 267         return "".join(self.tokens)
 268
 269     def compat(self, token: Tuple[int, Text], iterable: Iterable[TokenInfo]) -> None:
 270         startline = False
 271         indents = []
 272         toks_append = self.tokens.append
 273         toknum, tokval = token
 274         if toknum in (NAME, NUMBER):
 275             tokval += " "
 276         if toknum in (NEWLINE, NL):
 277             startline = True
 278         for tok in iterable:
 279             toknum, tokval = tok[:2]
 280
 281             if toknum in (NAME, NUMBER, ASYNC, AWAIT):
 282                 tokval += " "
 283
 284             if toknum == INDENT:
 285                 indents.append(tokval)
 286                 continue
 287             elif toknum == DEDENT:
 288                 indents.pop()
 289                 continue
 290             elif toknum in (NEWLINE, NL):
 291                 startline = True
 292             elif startline and indents:
 293                 toks_append(indents[-1])
 294                 startline = False
 295             toks_append(tokval)
 296
 297
 298 cookie_re = re.compile(r"^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)", re.ASCII)
 299 blank_re = re.compile(rb"^[ \t\f]*(?:[#\r\n]|$)", re.ASCII)
 300
 301
 302 def _get_normal_name(orig_enc: str) -> str:
 303     """Imitates get_normal_name in tokenizer.c."""
 304     # Only care about the first 12 characters.
 305     enc = orig_enc[:12].lower().replace("_", "-")
 306     if enc == "utf-8" or enc.startswith("utf-8-"):
 307         return "utf-8"
 308     if enc in ("latin-1", "iso-8859-1", "iso-latin-1") or enc.startswith(
 309         ("latin-1-", "iso-8859-1-", "iso-latin-1-")
 310     ):
 311         return "iso-8859-1"
 312     return orig_enc
 313
 314
 315 def detect_encoding(readline: Callable[[], bytes]) -> Tuple[str, List[bytes]]:
 316     """
 317     The detect_encoding() function is used to detect the encoding that should
 318     be used to decode a Python source file. It requires one argument, readline,
 319     in the same way as the tokenize() generator.
 320
 321     It will call readline a maximum of twice, and return the encoding used
 322     (as a string) and a list of any lines (left as bytes) it has read
 323     in.
 324
 325     It detects the encoding from the presence of a utf-8 bom or an encoding
 326     cookie as specified in pep-0263. If both a bom and a cookie are present, but
 327     disagree, a SyntaxError will be raised. If the encoding cookie is an invalid
 328     charset, raise a SyntaxError.  Note that if a utf-8 bom is found,
 329     'utf-8-sig' is returned.
 330
 331     If no encoding is specified, then the default of 'utf-8' will be returned.
 332     """
 333     bom_found = False
 334     encoding = None
 335     default = "utf-8"
 336
 337     def read_or_stop() -> bytes:
 338         try:
 339             return readline()
 340         except StopIteration:
 341             return bytes()
 342
 343     def find_cookie(line: bytes) -> Optional[str]:
 344         try:
 345             line_string = line.decode("ascii")
 346         except UnicodeDecodeError:
 347             return None
 348         match = cookie_re.match(line_string)
 349         if not match:
 350             return None
 351         encoding = _get_normal_name(match.group(1))
 352         try:
 353             codec = lookup(encoding)
 354         except LookupError:
 355             # This behaviour mimics the Python interpreter
 356             raise SyntaxError("unknown encoding: " + encoding)
 357
 358         if bom_found:
 359             if codec.name != "utf-8":
 360                 # This behaviour mimics the Python interpreter
 361                 raise SyntaxError("encoding problem: utf-8")
 362             encoding += "-sig"
 363         return encoding
 364
 365     first = read_or_stop()
 366     if first.startswith(BOM_UTF8):
 367         bom_found = True
 368         first = first[3:]
 369         default = "utf-8-sig"
 370     if not first:
 371         return default, []
 372
 373     encoding = find_cookie(first)
 374     if encoding:
 375         return encoding, [first]
 376     if not blank_re.match(first):
 377         return default, [first]
 378
 379     second = read_or_stop()
 380     if not second:
 381         return default, [first]
 382
 383     encoding = find_cookie(second)
 384     if encoding:
 385         return encoding, [first, second]
 386
 387     return default, [first, second]
 388
 389
 390 def untokenize(iterable: Iterable[TokenInfo]) -> Text:
 391     """Transform tokens back into Python source code.
 392
 393     Each element returned by the iterable must be a token sequence
 394     with at least two elements, a token number and token value.  If
 395     only two tokens are passed, the resulting output is poor.
 396
 397     Round-trip invariant for full input:
 398         Untokenized source will match input source exactly
 399
 400     Round-trip invariant for limited input:
 401         # Output text will tokenize the back to the input
 402         t1 = [tok[:2] for tok in generate_tokens(f.readline)]
 403         newcode = untokenize(t1)
 404         readline = iter(newcode.splitlines(1)).next
 405         t2 = [tok[:2] for tokin generate_tokens(readline)]
 406         assert t1 == t2
 407     """
 408     ut = Untokenizer()
 409     return ut.untokenize(iterable)
 410
 411
 412 def generate_tokens(
 413     readline: Callable[[], Text], grammar: Optional[Grammar] = None
 414 ) -> Iterator[GoodTokenInfo]:
 415     """
 416     The generate_tokens() generator requires one argument, readline, which
 417     must be a callable object which provides the same interface as the
 418     readline() method of built-in file objects. Each call to the function
 419     should return one line of input as a string.  Alternately, readline
 420     can be a callable function terminating with StopIteration:
 421         readline = open(myfile).next    # Example of alternate readline
 422
 423     The generator produces 5-tuples with these members: the token type; the
 424     token string; a 2-tuple (srow, scol) of ints specifying the row and
 425     column where the token begins in the source; a 2-tuple (erow, ecol) of
 426     ints specifying the row and column where the token ends in the source;
 427     and the line on which the token was found. The line passed is the
 428     logical line; continuation lines are included.
 429     """
 430     lnum = parenlev = continued = 0
 431     numchars: Final[str] = "0123456789"
 432     contstr, needcont = "", 0
 433     contline: Optional[str] = None
 434     indents = [0]
 435
 436     # If we know we're parsing 3.7+, we can unconditionally parse `async` and
 437     # `await` as keywords.
 438     async_keywords = False if grammar is None else grammar.async_keywords
 439     # 'stashed' and 'async_*' are used for async/await parsing
 440     stashed: Optional[GoodTokenInfo] = None
 441     async_def = False
 442     async_def_indent = 0
 443     async_def_nl = False
 444
 445     strstart: Tuple[int, int]
 446     endprog: Pattern[str]
 447
 448     while 1:  # loop over lines in stream
 449         try:
 450             line = readline()
 451         except StopIteration:
 452             line = ""
 453         lnum += 1
 454         pos, max = 0, len(line)
 455
 456         if contstr:  # continued string
 457             assert contline is not None
 458             if not line:
 459                 raise TokenError("EOF in multi-line string", strstart)
 460             endmatch = endprog.match(line)
 461             if endmatch:
 462                 pos = end = endmatch.end(0)
 463                 yield (
 464                     STRING,
 465                     contstr + line[:end],
 466                     strstart,
 467                     (lnum, end),
 468                     contline + line,
 469                 )
 470                 contstr, needcont = "", 0
 471                 contline = None
 472             elif needcont and line[-2:] != "\\\n" and line[-3:] != "\\\r\n":
 473                 yield (
 474                     ERRORTOKEN,
 475                     contstr + line,
 476                     strstart,
 477                     (lnum, len(line)),
 478                     contline,
 479                 )
 480                 contstr = ""
 481                 contline = None
 482                 continue
 483             else:
 484                 contstr = contstr + line
 485                 contline = contline + line
 486                 continue
 487
 488         elif parenlev == 0 and not continued:  # new statement
 489             if not line:
 490                 break
 491             column = 0
 492             while pos < max:  # measure leading whitespace
 493                 if line[pos] == " ":
 494                     column += 1
 495                 elif line[pos] == "\t":
 496                     column = (column // tabsize + 1) * tabsize
 497                 elif line[pos] == "\f":
 498                     column = 0
 499                 else:
 500                     break
 501                 pos += 1
 502             if pos == max:
 503                 break
 504
 505             if stashed:
 506                 yield stashed
 507                 stashed = None
 508
 509             if line[pos] in "\r\n":  # skip blank lines
 510                 yield (NL, line[pos:], (lnum, pos), (lnum, len(line)), line)
 511                 continue
 512
 513             if line[pos] == "#":  # skip comments
 514                 comment_token = line[pos:].rstrip("\r\n")
 515                 nl_pos = pos + len(comment_token)
 516                 yield (
 517                     COMMENT,
 518                     comment_token,
 519                     (lnum, pos),
 520                     (lnum, nl_pos),
 521                     line,
 522                 )
 523                 yield (NL, line[nl_pos:], (lnum, nl_pos), (lnum, len(line)), line)
 524                 continue
 525
 526             if column > indents[-1]:  # count indents
 527                 indents.append(column)
 528                 yield (INDENT, line[:pos], (lnum, 0), (lnum, pos), line)
 529
 530             while column < indents[-1]:  # count dedents
 531                 if column not in indents:
 532                     raise IndentationError(
 533                         "unindent does not match any outer indentation level",
 534                         ("<tokenize>", lnum, pos, line),
 535                     )
 536                 indents = indents[:-1]
 537
 538                 if async_def and async_def_indent >= indents[-1]:
 539                     async_def = False
 540                     async_def_nl = False
 541                     async_def_indent = 0
 542
 543                 yield (DEDENT, "", (lnum, pos), (lnum, pos), line)
 544
 545             if async_def and async_def_nl and async_def_indent >= indents[-1]:
 546                 async_def = False
 547                 async_def_nl = False
 548                 async_def_indent = 0
 549
 550         else:  # continued statement
 551             if not line:
 552                 raise TokenError("EOF in multi-line statement", (lnum, 0))
 553             continued = 0
 554
 555         while pos < max:
 556             pseudomatch = pseudoprog.match(line, pos)
 557             if pseudomatch:  # scan for tokens
 558                 start, end = pseudomatch.span(1)
 559                 spos, epos, pos = (lnum, start), (lnum, end), end
 560                 token, initial = line[start:end], line[start]
 561
 562                 if initial in numchars or (
 563                     initial == "." and token != "."
 564                 ):  # ordinary number
 565                     yield (NUMBER, token, spos, epos, line)
 566                 elif initial in "\r\n":
 567                     newline = NEWLINE
 568                     if parenlev > 0:
 569                         newline = NL
 570                     elif async_def:
 571                         async_def_nl = True
 572                     if stashed:
 573                         yield stashed
 574                         stashed = None
 575                     yield (newline, token, spos, epos, line)
 576
 577                 elif initial == "#":
 578                     assert not token.endswith("\n")
 579                     if stashed:
 580                         yield stashed
 581                         stashed = None
 582                     yield (COMMENT, token, spos, epos, line)
 583                 elif token in triple_quoted:
 584                     endprog = endprogs[token]
 585                     endmatch = endprog.match(line, pos)
 586                     if endmatch:  # all on one line
 587                         pos = endmatch.end(0)
 588                         token = line[start:pos]
 589                         if stashed:
 590                             yield stashed
 591                             stashed = None
 592                         yield (STRING, token, spos, (lnum, pos), line)
 593                     else:
 594                         strstart = (lnum, start)  # multiple lines
 595                         contstr = line[start:]
 596                         contline = line
 597                         break
 598                 elif (
 599                     initial in single_quoted
 600                     or token[:2] in single_quoted
 601                     or token[:3] in single_quoted
 602                 ):
 603                     if token[-1] == "\n":  # continued string
 604                         strstart = (lnum, start)
 605                         maybe_endprog = (
 606                             endprogs.get(initial)
 607                             or endprogs.get(token[1])
 608                             or endprogs.get(token[2])
 609                         )
 610                         assert (
 611                             maybe_endprog is not None
 612                         ), f"endprog not found for {token}"
 613                         endprog = maybe_endprog
 614                         contstr, needcont = line[start:], 1
 615                         contline = line
 616                         break
 617                     else:  # ordinary string
 618                         if stashed:
 619                             yield stashed
 620                             stashed = None
 621                         yield (STRING, token, spos, epos, line)
 622                 elif initial.isidentifier():  # ordinary name
 623                     if token in ("async", "await"):
 624                         if async_keywords or async_def:
 625                             yield (
 626                                 ASYNC if token == "async" else AWAIT,
 627                                 token,
 628                                 spos,
 629                                 epos,
 630                                 line,
 631                             )
 632                             continue
 633
 634                     tok = (NAME, token, spos, epos, line)
 635                     if token == "async" and not stashed:
 636                         stashed = tok
 637                         continue
 638
 639                     if token in ("def", "for"):
 640                         if stashed and stashed[0] == NAME and stashed[1] == "async":
 641                             if token == "def":
 642                                 async_def = True
 643                                 async_def_indent = indents[-1]
 644
 645                             yield (
 646                                 ASYNC,
 647                                 stashed[1],
 648                                 stashed[2],
 649                                 stashed[3],
 650                                 stashed[4],
 651                             )
 652                             stashed = None
 653
 654                     if stashed:
 655                         yield stashed
 656                         stashed = None
 657
 658                     yield tok
 659                 elif initial == "\\":  # continued stmt
 660                     # This yield is new; needed for better idempotency:
 661                     if stashed:
 662                         yield stashed
 663                         stashed = None
 664                     yield (NL, token, spos, (lnum, pos), line)
 665                     continued = 1
 666                 else:
 667                     if initial in "([{":
 668                         parenlev += 1
 669                     elif initial in ")]}":
 670                         parenlev -= 1
 671                     if stashed:
 672                         yield stashed
 673                         stashed = None
 674                     yield (OP, token, spos, epos, line)
 675             else:
 676                 yield (ERRORTOKEN, line[pos], (lnum, pos), (lnum, pos + 1), line)
 677                 pos += 1
 678
 679     if stashed:
 680         yield stashed
 681         stashed = None
 682
 683     for indent in indents[1:]:  # pop remaining indent levels
 684         yield (DEDENT, "", (lnum, 0), (lnum, 0), "")
 685     yield (ENDMARKER, "", (lnum, 0), (lnum, 0), "")
 686
 687
 688 if __name__ == "__main__":  # testing
 689     import sys
 690
 691     if len(sys.argv) > 1:
 692         tokenize(open(sys.argv[1]).readline)
 693     else:
 694         tokenize(sys.stdin.readline)