]> git.madduck.net Git - etc/vim.git/commitdiff

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

[blib2to3] Support non-ASCII identifiers
authorŁukasz Langa <lukasz@langa.pl>
Thu, 5 Apr 2018 04:38:25 +0000 (21:38 -0700)
committerŁukasz Langa <lukasz@langa.pl>
Thu, 5 Apr 2018 09:29:01 +0000 (02:29 -0700)
This support isn't *exactly* right per PEP 3131 as the regex engine is a bit
too limited for that and I didn't want to spend time on Other_ID_Start and
Other_ID_Continue unless they're actually needed.

Hopefully this doesn't slow it down too much.

blib2to3/pgen2/tokenize.py
tests/expression.diff
tests/expression.py

index b6bbf4ec7dde2d912690be28a938bfb7f0742cc0..6b8a5cb2ef54fb0bdbd98f2d2e20ac73f7ae3c3c 100644 (file)
@@ -29,7 +29,7 @@ __author__ = 'Ka-Ping Yee <ping@lfw.org>'
 __credits__ = \
     'GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, Skip Montanaro'
 
 __credits__ = \
     'GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, Skip Montanaro'
 
-import string, re
+import string, re, unicodedata
 from codecs import BOM_UTF8, lookup
 from blib2to3.pgen2.token import *
 
 from codecs import BOM_UTF8, lookup
 from blib2to3.pgen2.token import *
 
@@ -52,7 +52,7 @@ def maybe(*choices): return group(*choices) + '?'
 Whitespace = r'[ \f\t]*'
 Comment = r'#[^\r\n]*'
 Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment)
 Whitespace = r'[ \f\t]*'
 Comment = r'#[^\r\n]*'
 Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment)
-Name = r'[a-zA-Z_]\w*'
+Name = r'[^\d\W]\w*'
 
 Binnumber = r'0[bB]_?[01]+(?:_[01]+)*'
 Hexnumber = r'0[xX]_?[\da-fA-F]+(?:_[\da-fA-F]+)*[lL]?'
 
 Binnumber = r'0[bB]_?[01]+(?:_[01]+)*'
 Hexnumber = r'0[xX]_?[\da-fA-F]+(?:_[\da-fA-F]+)*[lL]?'
@@ -103,8 +103,10 @@ ContStr = group(_litprefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*" +
 PseudoExtras = group(r'\\\r?\n', Comment, Triple)
 PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name)
 
 PseudoExtras = group(r'\\\r?\n', Comment, Triple)
 PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name)
 
-tokenprog, pseudoprog, single3prog, double3prog = list(map(
-    re.compile, (Token, PseudoToken, Single3, Double3)))
+tokenprog = re.compile(Token, re.UNICODE)
+pseudoprog = re.compile(PseudoToken, re.UNICODE)
+single3prog = re.compile(Single3)
+double3prog = re.compile(Double3)
 endprogs = {"'": re.compile(Single), '"': re.compile(Double),
             "'''": single3prog, '"""': double3prog,
             "r'''": single3prog, 'r"""': double3prog,
 endprogs = {"'": re.compile(Single), '"': re.compile(Double),
             "'''": single3prog, '"""': double3prog,
             "r'''": single3prog, 'r"""': double3prog,
@@ -358,6 +360,8 @@ def untokenize(iterable):
     ut = Untokenizer()
     return ut.untokenize(iterable)
 
     ut = Untokenizer()
     return ut.untokenize(iterable)
 
+InitialCategories = {'Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl', 'Mn', 'Mc', 'Nd', 'Pc'}
+
 def generate_tokens(readline):
     """
     The generate_tokens() generator requires one argument, readline, which
 def generate_tokens(readline):
     """
     The generate_tokens() generator requires one argument, readline, which
@@ -473,6 +477,8 @@ def generate_tokens(readline):
 
         while pos < max:
             pseudomatch = pseudoprog.match(line, pos)
 
         while pos < max:
             pseudomatch = pseudoprog.match(line, pos)
+            if not pseudomatch:
+                print('no pseudomatch')
             if pseudomatch:                                # scan for tokens
                 start, end = pseudomatch.span(1)
                 spos, epos, pos = (lnum, start), (lnum, end), end
             if pseudomatch:                                # scan for tokens
                 start, end = pseudomatch.span(1)
                 spos, epos, pos = (lnum, start), (lnum, end), end
@@ -528,7 +534,8 @@ def generate_tokens(readline):
                             yield stashed
                             stashed = None
                         yield (STRING, token, spos, epos, line)
                             yield stashed
                             stashed = None
                         yield (STRING, token, spos, epos, line)
-                elif initial in namechars:                 # ordinary name
+                elif (initial in namechars or              # ordinary name
+                      unicodedata.category(initial) in InitialCategories):
                     if token in ('async', 'await'):
                         if async_def:
                             yield (ASYNC if token == 'async' else AWAIT,
                     if token in ('async', 'await'):
                         if async_def:
                             yield (ASYNC if token == 'async' else AWAIT,
index 4cdf803e8889d263ef98fdf40b0485301fc49f9c..f37b16bde745c601f6c692bcdb951c4161a12267 100644 (file)
  ]
  slice[0]
  slice[0:1]
  ]
  slice[0]
  slice[0:1]
-@@ -114,71 +123,90 @@
+@@ -114,73 +123,92 @@
  numpy[-(c + 1):, d]
  numpy[:, l[-2]]
  numpy[:, ::-1]
  numpy[-(c + 1):, d]
  numpy[:, l[-2]]
  numpy[:, ::-1]
 +).order_by(
 +    models.Customer.id.asc()
 +).all()
 +).order_by(
 +    models.Customer.id.asc()
 +).all()
-+
+ Ø = set()
+ authors.łukasz.say_thanks()
  
  
++
  def gen():
      yield from outside_of_generator
 +
  def gen():
      yield from outside_of_generator
 +
 +
  last_call()
  # standalone comment at ENDMARKER
 +
  last_call()
  # standalone comment at ENDMARKER
-
index e0c819b68cb322a2c24b8f6007b0a0bf71e2360e..3cd0c61984b684b8bdf1ad33efd5516c62f36cf5 100644 (file)
@@ -135,6 +135,8 @@ e = (1,).count(1)
 what_is_up_with_those_new_coord_names = (coord_names + set(vars_to_create)) + set(vars_to_remove)
 what_is_up_with_those_new_coord_names = (coord_names | set(vars_to_create)) - set(vars_to_remove)
 result = session.query(models.Customer.id).filter(models.Customer.account_id == account_id, models.Customer.email == email_address).order_by(models.Customer.id.asc(),).all()
 what_is_up_with_those_new_coord_names = (coord_names + set(vars_to_create)) + set(vars_to_remove)
 what_is_up_with_those_new_coord_names = (coord_names | set(vars_to_create)) - set(vars_to_remove)
 result = session.query(models.Customer.id).filter(models.Customer.account_id == account_id, models.Customer.email == email_address).order_by(models.Customer.id.asc(),).all()
+Ø = set()
+authors.łukasz.say_thanks()
 
 def gen():
     yield from outside_of_generator
 
 def gen():
     yield from outside_of_generator
@@ -340,6 +342,8 @@ result = session.query(models.Customer.id).filter(
 ).order_by(
     models.Customer.id.asc()
 ).all()
 ).order_by(
     models.Customer.id.asc()
 ).all()
+Ø = set()
+authors.łukasz.say_thanks()
 
 
 def gen():
 
 
 def gen():