Reduce usage of regex (#2644)

author Jelle Zijlstra <jelle.zijlstra@gmail.com>

Wed, 1 Dec 2021 02:01:36 +0000 (18:01 -0800)

committer GitHub <noreply@github.com>

Wed, 1 Dec 2021 02:01:36 +0000 (18:01 -0800)
author Jelle Zijlstra <jelle.zijlstra@gmail.com>
Wed, 1 Dec 2021 02:01:36 +0000 (18:01 -0800)
committer GitHub <noreply@github.com>
Wed, 1 Dec 2021 02:01:36 +0000 (18:01 -0800)
diff --git a/CHANGES.md b/CHANGES.md

index 85feb1a76007a33f44043e7dce57aa2af21b40c4..7214405c4290f0d28e6f6ef923b99c4478ef3622 100644 (file)
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -7,12 +7,13 @@
  - Cell magics are now only processed if they are known Python cell magics. Earlier, all
    cell magics were tokenized, leading to possible indentation errors e.g. with
    `%%writefile`. (#2630)
-- Fixed Python 3.10 support on platforms without ProcessPoolExecutor (#2631)
-- Fixed `match` statements with open sequence subjects, like `match a, b:` or
+- Fix Python 3.10 support on platforms without ProcessPoolExecutor (#2631)
+- Reduce usage of the `regex` dependency (#2644)
+- Fix `match` statements with open sequence subjects, like `match a, b:` or
    `match a, *b:` (#2639) (#2659)
-- Fixed `match`/`case` statements that contain `match`/`case` soft keywords multiple
+- Fix `match`/`case` statements that contain `match`/`case` soft keywords multiple
    times, like `match re.match()` (#2661)
-- Fixed assignment to environment variables in Jupyter Notebooks (#2642)
+- Fix assignment to environment variables in Jupyter Notebooks (#2642)
  - Add `flake8-simplify` and `flake8-comprehensions` plugins (#2653)
  
  ## 21.11b1
diff --git a/src/black/__init__.py b/src/black/__init__.py

index c2b52e6eadb07936e41d53c59f61abf37bbb1e9d..1923c069edec794956a3c5a4506ce1ca7a4cea03 100644 (file)
--- a/src/black/__init__.py
+++ b/src/black/__init__.py
@@ -10,7 +10,7 @@ from multiprocessing import Manager, freeze_support
  import os
  from pathlib import Path
  from pathspec.patterns.gitwildmatch import GitWildMatchPatternError
-import regex as re
+import re
  import signal
  import sys
  import tokenize
diff --git a/src/black/comments.py b/src/black/comments.py

index a8152d687a3f7d48dd48f709582737cd1d798c1d..28b9117101d7aa0eb19ce64190b6c2a83b83a4fd 100644 (file)
--- a/src/black/comments.py
+++ b/src/black/comments.py
@@ -1,7 +1,7 @@
  import sys
  from dataclasses import dataclass
  from functools import lru_cache
-import regex as re
+import re
  from typing import Iterator, List, Optional, Union
  
  if sys.version_info >= (3, 8):
diff --git a/src/black/strings.py b/src/black/strings.py

index 97debe3b5de08569e24f5ca8abcdbbfe4e258d21..06a5da01f0cf71f786f3878480e71282731f01c3 100644 (file)
--- a/src/black/strings.py
+++ b/src/black/strings.py
@@ -2,7 +2,7 @@
  Simple formatting on strings. Further string formatting code is in trans.py.
  """
  
-import regex as re
+import re
  import sys
  from functools import lru_cache
  from typing import List, Pattern
@@ -156,7 +156,7 @@ def normalize_string_prefix(s: str, remove_u_prefix: bool = False) -> str:
  # performance on a long list literal of strings by 5-9% since lru_cache's
  # caching overhead is much lower.
  @lru_cache(maxsize=64)
-def _cached_compile(pattern: str) -> re.Pattern:
+def _cached_compile(pattern: str) -> Pattern[str]:
      return re.compile(pattern)
  
  
diff --git a/src/black/trans.py b/src/black/trans.py

index d918ef111a21993b517f10b3ba9c30fad05e1a8a..a4d1e6fbc7990f3813dfcc469a906af519252eb7 100644 (file)
--- a/src/black/trans.py
+++ b/src/black/trans.py
@@ -4,7 +4,7 @@ String transformers that can split and merge strings.
  from abc import ABC, abstractmethod
  from collections import defaultdict
  from dataclasses import dataclass
-import regex as re
+import regex as re  # We need recursive patterns here (?R)
  from typing import (
      Any,
      Callable,
diff --git a/src/blib2to3/pgen2/conv.py b/src/blib2to3/pgen2/conv.py

index 78165217a1b0ebb777c522a56a480c51278aa53f..fa9825e54d67e2cb75f278d931a2660c044b0fcc 100644 (file)
--- a/src/blib2to3/pgen2/conv.py
+++ b/src/blib2to3/pgen2/conv.py
@@ -29,7 +29,7 @@ without having to invoke the Python pgen C program.
  """
  
  # Python imports
-import regex as re
+import re
  
  # Local imports
  from pgen2 import grammar, token
diff --git a/src/blib2to3/pgen2/tokenize.py b/src/blib2to3/pgen2/tokenize.py

index 283fac2d5375a8f9de49879447fb80bf622c8111..a7e17df1e8f9f9452ae3e49db895fbc875ab40bd 100644 (file)
--- a/src/blib2to3/pgen2/tokenize.py
+++ b/src/blib2to3/pgen2/tokenize.py
@@ -52,7 +52,7 @@ from blib2to3.pgen2.grammar import Grammar
  __author__ = "Ka-Ping Yee <ping@lfw.org>"
  __credits__ = "GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, Skip Montanaro"
  
-import regex as re
+import re
  from codecs import BOM_UTF8, lookup
  from blib2to3.pgen2.token import *
  
@@ -86,7 +86,7 @@ Whitespace = r"[ \f\t]*"
  Comment = r"#[^\r\n]*"
  Ignore = Whitespace + any(r"\\\r?\n" + Whitespace) + maybe(Comment)
  Name = (  # this is invalid but it's fine because Name comes after Number in all groups
-    r"\w+"
+    r"[^\s#\(\)\[\]\{\}+\-*/!@$%^&=|;:'\",\.<>/?`~\\]+"
  )
  
  Binnumber = r"0[bB]_?[01]+(?:_[01]+)*"
author	Jelle Zijlstra <jelle.zijlstra@gmail.com>
	Wed, 1 Dec 2021 02:01:36 +0000 (18:01 -0800)
committer	GitHub <noreply@github.com>
	Wed, 1 Dec 2021 02:01:36 +0000 (18:01 -0800)
CHANGES.md		patch \| blob \| history
src/black/__init__.py		patch \| blob \| history
src/black/comments.py		patch \| blob \| history
src/black/strings.py		patch \| blob \| history
src/black/trans.py		patch \| blob \| history
src/blib2to3/pgen2/conv.py		patch \| blob \| history
src/blib2to3/pgen2/tokenize.py		patch \| blob \| history