Reduce usage of regex (#2644)

author Jelle Zijlstra <jelle.zijlstra@gmail.com>

Wed, 1 Dec 2021 02:01:36 +0000 (18:01 -0800)

committer GitHub <noreply@github.com>

Wed, 1 Dec 2021 02:01:36 +0000 (18:01 -0800)
author Jelle Zijlstra <jelle.zijlstra@gmail.com>
Wed, 1 Dec 2021 02:01:36 +0000 (18:01 -0800)
committer GitHub <noreply@github.com>
Wed, 1 Dec 2021 02:01:36 +0000 (18:01 -0800)
diff --git a/CHANGES.md b/CHANGES.md

index 85feb1a76007a33f44043e7dce57aa2af21b40c4..7214405c4290f0d28e6f6ef923b99c4478ef3622 100644 (file)
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -7,12 +7,13 @@
  - Cell magics are now only processed if they are known Python cell magics. Earlier, all
    cell magics were tokenized, leading to possible indentation errors e.g. with
    `%%writefile`. (#2630)
  - Cell magics are now only processed if they are known Python cell magics. Earlier, all
    cell magics were tokenized, leading to possible indentation errors e.g. with
    `%%writefile`. (#2630)
-- Fixed Python 3.10 support on platforms without ProcessPoolExecutor (#2631)
-- Fixed `match` statements with open sequence subjects, like `match a, b:` or
+- Fix Python 3.10 support on platforms without ProcessPoolExecutor (#2631)
+- Reduce usage of the `regex` dependency (#2644)
+- Fix `match` statements with open sequence subjects, like `match a, b:` or
    `match a, *b:` (#2639) (#2659)
    `match a, *b:` (#2639) (#2659)
-- Fixed `match`/`case` statements that contain `match`/`case` soft keywords multiple
+- Fix `match`/`case` statements that contain `match`/`case` soft keywords multiple
    times, like `match re.match()` (#2661)
    times, like `match re.match()` (#2661)
-- Fixed assignment to environment variables in Jupyter Notebooks (#2642)
+- Fix assignment to environment variables in Jupyter Notebooks (#2642)
  - Add `flake8-simplify` and `flake8-comprehensions` plugins (#2653)
  
  ## 21.11b1
  - Add `flake8-simplify` and `flake8-comprehensions` plugins (#2653)
  
  ## 21.11b1
diff --git a/src/black/__init__.py b/src/black/__init__.py

index c2b52e6eadb07936e41d53c59f61abf37bbb1e9d..1923c069edec794956a3c5a4506ce1ca7a4cea03 100644 (file)
--- a/src/black/__init__.py
+++ b/src/black/__init__.py
@@ -10,7 +10,7 @@ from multiprocessing import Manager, freeze_support
  import os
  from pathlib import Path
  from pathspec.patterns.gitwildmatch import GitWildMatchPatternError
  import os
  from pathlib import Path
  from pathspec.patterns.gitwildmatch import GitWildMatchPatternError
-import regex as re
+import re
  import signal
  import sys
  import tokenize
  import signal
  import sys
  import tokenize
diff --git a/src/black/comments.py b/src/black/comments.py

index a8152d687a3f7d48dd48f709582737cd1d798c1d..28b9117101d7aa0eb19ce64190b6c2a83b83a4fd 100644 (file)
--- a/src/black/comments.py
+++ b/src/black/comments.py
@@ -1,7 +1,7 @@
  import sys
  from dataclasses import dataclass
  from functools import lru_cache
  import sys
  from dataclasses import dataclass
  from functools import lru_cache
-import regex as re
+import re
  from typing import Iterator, List, Optional, Union
  
  if sys.version_info >= (3, 8):
  from typing import Iterator, List, Optional, Union
  
  if sys.version_info >= (3, 8):
diff --git a/src/black/strings.py b/src/black/strings.py

index 97debe3b5de08569e24f5ca8abcdbbfe4e258d21..06a5da01f0cf71f786f3878480e71282731f01c3 100644 (file)
--- a/src/black/strings.py
+++ b/src/black/strings.py
@@ -2,7 +2,7 @@
  Simple formatting on strings. Further string formatting code is in trans.py.
  """
  
  Simple formatting on strings. Further string formatting code is in trans.py.
  """
  
-import regex as re
+import re
  import sys
  from functools import lru_cache
  from typing import List, Pattern
  import sys
  from functools import lru_cache
  from typing import List, Pattern
@@ -156,7 +156,7 @@ def normalize_string_prefix(s: str, remove_u_prefix: bool = False) -> str:
  # performance on a long list literal of strings by 5-9% since lru_cache's
  # caching overhead is much lower.
  @lru_cache(maxsize=64)
  # performance on a long list literal of strings by 5-9% since lru_cache's
  # caching overhead is much lower.
  @lru_cache(maxsize=64)
-def _cached_compile(pattern: str) -> re.Pattern:
+def _cached_compile(pattern: str) -> Pattern[str]:
      return re.compile(pattern)
  
  
      return re.compile(pattern)
  
  
diff --git a/src/black/trans.py b/src/black/trans.py

index d918ef111a21993b517f10b3ba9c30fad05e1a8a..a4d1e6fbc7990f3813dfcc469a906af519252eb7 100644 (file)
--- a/src/black/trans.py
+++ b/src/black/trans.py
@@ -4,7 +4,7 @@ String transformers that can split and merge strings.
  from abc import ABC, abstractmethod
  from collections import defaultdict
  from dataclasses import dataclass
  from abc import ABC, abstractmethod
  from collections import defaultdict
  from dataclasses import dataclass
-import regex as re
+import regex as re  # We need recursive patterns here (?R)
  from typing import (
      Any,
      Callable,
  from typing import (
      Any,
      Callable,
diff --git a/src/blib2to3/pgen2/conv.py b/src/blib2to3/pgen2/conv.py

index 78165217a1b0ebb777c522a56a480c51278aa53f..fa9825e54d67e2cb75f278d931a2660c044b0fcc 100644 (file)
--- a/src/blib2to3/pgen2/conv.py
+++ b/src/blib2to3/pgen2/conv.py
@@ -29,7 +29,7 @@ without having to invoke the Python pgen C program.
  """
  
  # Python imports
  """
  
  # Python imports
-import regex as re
+import re
  
  # Local imports
  from pgen2 import grammar, token
  
  # Local imports
  from pgen2 import grammar, token
diff --git a/src/blib2to3/pgen2/tokenize.py b/src/blib2to3/pgen2/tokenize.py

index 283fac2d5375a8f9de49879447fb80bf622c8111..a7e17df1e8f9f9452ae3e49db895fbc875ab40bd 100644 (file)
--- a/src/blib2to3/pgen2/tokenize.py
+++ b/src/blib2to3/pgen2/tokenize.py
@@ -52,7 +52,7 @@ from blib2to3.pgen2.grammar import Grammar
  __author__ = "Ka-Ping Yee <ping@lfw.org>"
  __credits__ = "GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, Skip Montanaro"
  
  __author__ = "Ka-Ping Yee <ping@lfw.org>"
  __credits__ = "GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, Skip Montanaro"
  
-import regex as re
+import re
  from codecs import BOM_UTF8, lookup
  from blib2to3.pgen2.token import *
  
  from codecs import BOM_UTF8, lookup
  from blib2to3.pgen2.token import *
  
@@ -86,7 +86,7 @@ Whitespace = r"[ \f\t]*"
  Comment = r"#[^\r\n]*"
  Ignore = Whitespace + any(r"\\\r?\n" + Whitespace) + maybe(Comment)
  Name = (  # this is invalid but it's fine because Name comes after Number in all groups
  Comment = r"#[^\r\n]*"
  Ignore = Whitespace + any(r"\\\r?\n" + Whitespace) + maybe(Comment)
  Name = (  # this is invalid but it's fine because Name comes after Number in all groups
-    r"\w+"
+    r"[^\s#\(\)\[\]\{\}+\-*/!@$%^&=|;:'\",\.<>/?`~\\]+"
  )
  
  Binnumber = r"0[bB]_?[01]+(?:_[01]+)*"
  )
  
  Binnumber = r"0[bB]_?[01]+(?:_[01]+)*"
author	Jelle Zijlstra <jelle.zijlstra@gmail.com>
	Wed, 1 Dec 2021 02:01:36 +0000 (18:01 -0800)
committer	GitHub <noreply@github.com>
	Wed, 1 Dec 2021 02:01:36 +0000 (18:01 -0800)
CHANGES.md		patch \| blob \| history
src/black/__init__.py		patch \| blob \| history
src/black/comments.py		patch \| blob \| history
src/black/strings.py		patch \| blob \| history
src/black/trans.py		patch \| blob \| history
src/blib2to3/pgen2/conv.py		patch \| blob \| history
src/blib2to3/pgen2/tokenize.py		patch \| blob \| history